RubyでMixiの自分の日記を取得してみた。日記はgetDiariesメソッドで最新のから30件分取得される。日記の内容にはタグが含まれてるので、いらなければ自分でgsubを使って除去しておく。日付を取得しわすれたことに今、気づく
#! /usr/bin/ruby -Ku require 'net/http' require 'kconv' email = "*****@******.com" password = "*****" class User attr_reader :id attr_reader :name def initialize(id, name) @id = id @name = name end end class MixiDiary attr_reader :title attr_reader :contents def initialize(title, contents) @title = title @contents = contents end end class Mixi def login(user, password) http = Net::HTTP.start('mixi.jp') response = http.post('/login.pl', "email=#{user}&password=#{password}&next_url=./home.pl") @cookie = response['Set-Cookie'].split(',').join(';') end def getFootPrints() http = Net::HTTP.start('mixi.jp') response = http.get("/show_log.pl", 'Cookie' => @cookie) body = response.body.toutf8 users = Array.new body.scan(/<a href="show_friend.pl\?id=(\d+)">([^<]+)<\/a>/){|id, name| users << User.new(id, name) } return users end def getNewDiaryTitles() http = Net::HTTP.start('mixi.jp') response = http.get('/new_friend_diary.pl', 'Cookie' => @cookie) body = response.body.toutf8 titles = Array.new body.scan(/<a href="view_diary.pl\?id=(\d+)&owner_id=(\d+)">([^<]+)<\/a>/){|id, owner_id, title| titles << title } return titles end def getDiaries() diaries = Array.new getDiaryPaths.each{|path| http = Net::HTTP.start('mixi.jp') response = http.get('/' + path, 'Cookie' => @cookie) body = response.body.toutf8 title = "" contents = "" body.scan(/<dt>([^<]+)<span>/){|scanTitle| title = scanTitle[0] } body.scan(/<div id="diary_body">(.+?)<\/div>/m){|scanContents| contents = scanContents[0] } diaries << MixiDiary.new(title, contents) } return diaries end def getDiaryPaths() http = Net::HTTP.start('mixi.jp') response = http.get('/list_diary.pl', 'Cookie' => @cookie) body = response.body.toutf8 paths = Array.new body.scan(/<input[^>]+><a href="(view_diary.pl\?id=\d+&owner_id=\d+)">[^<]+<\/a>/){|path| paths << path[0] } return paths end end mixi = Mixi.new mixi.login(email, password) mixi.getDiaries.each{|diary| puts diary.title #gsubでタグを除去しておく puts diary.contents.gsub(/<[^>]+>/, '') puts }