Mixiの自分の日記を取得する - マイペースなプログラミング日記

RubyでMixiの自分の日記を取得してみた。日記はgetDiariesメソッドで最新のから30件分取得される。日記の内容にはタグが含まれてるので、いらなければ自分でgsubを使って除去しておく。日付を取得しわすれたことに今、気づく

#! /usr/bin/ruby -Ku

require 'net/http'
require 'kconv'

email = "*****@******.com"
password = "*****"

class User
    attr_reader :id
    attr_reader :name

    def initialize(id, name)
        @id = id
        @name = name
    end
end

class MixiDiary
    attr_reader :title
    attr_reader :contents

    def initialize(title, contents)
        @title = title
        @contents = contents
    end
end

class Mixi
    def login(user, password)
        http = Net::HTTP.start('mixi.jp')
        response = http.post('/login.pl', "email=#{user}&password=#{password}&next_url=./home.pl")
        @cookie = response['Set-Cookie'].split(',').join(';')
    end

    def getFootPrints()
        http = Net::HTTP.start('mixi.jp')
        response = http.get("/show_log.pl", 'Cookie' => @cookie)
        body = response.body.toutf8

        users = Array.new

        body.scan(/<a href="show_friend.pl\?id=(\d+)">([^<]+)<\/a>/){|id, name|
            users << User.new(id, name)
        }

        return users
    end

    def getNewDiaryTitles()
        http = Net::HTTP.start('mixi.jp')
        response = http.get('/new_friend_diary.pl', 'Cookie' => @cookie)
        body = response.body.toutf8

        titles = Array.new

        body.scan(/<a href="view_diary.pl\?id=(\d+)&owner_id=(\d+)">([^<]+)<\/a>/){|id, owner_id, title|
            titles << title
        }

        return titles
    end

    def getDiaries()

        diaries = Array.new

        getDiaryPaths.each{|path|
            http = Net::HTTP.start('mixi.jp')
            response = http.get('/' + path, 'Cookie' => @cookie)
            body = response.body.toutf8

            title = ""
            contents = ""

            body.scan(/<dt>([^<]+)<span>/){|scanTitle|
                 title = scanTitle[0]
            }

            body.scan(/<div id="diary_body">(.+?)<\/div>/m){|scanContents|
                contents = scanContents[0]
            }

           diaries << MixiDiary.new(title, contents)  
        }

        return diaries
    end

    def getDiaryPaths()
        http = Net::HTTP.start('mixi.jp')
        response = http.get('/list_diary.pl', 'Cookie' => @cookie)
        body = response.body.toutf8

        paths = Array.new

        body.scan(/<input[^>]+><a href="(view_diary.pl\?id=\d+&owner_id=\d+)">[^<]+<\/a>/){|path|
            paths << path[0]
        }

        return paths
    end
end

mixi = Mixi.new
mixi.login(email, password)
mixi.getDiaries.each{|diary|
    puts diary.title
    #gsubでタグを除去しておく
    puts diary.contents.gsub(/<[^>]+>/, '')
    puts
}