dari88's diary

これから趣味にするプログラミング/PHP/javascript/kohana/CMS/web design/

ヤマレコ山行記録のダウンローダーをRubyで書いてみた

 登山の記録はヤマレコに投稿しています。既に100件以上投稿していますが、何らかの理由で記録が使えなくなったら問題です。そこでヤマレコ専用のダウンローダーを作ってみました。

 言語はお手軽に書けそうなRubyを選択。初めてのRubyプログラミングですが、やりたい事をネットのリファレンスとかで調べて書いたら動きました。非常に敷居が低い言語ですね、驚きました。

 

Rubyの導入

 RubyRuby Installer のページから Ruby 1.9.3-p448 をゲットしてインストールしました。あとはテキストエディターでコードを書いて実行するだけ。簡単ですね~。

 

ヤマレコダウンローダ

 ヤマレコはxoopsをベースにし、PHPjavascriptによる動的なサイトです。汎用のダウンローダーでは動作の再現が困難です。firebugを使って構造を丹念に調べてダウンロードする必要があります。今回はバージョン1.0ということで、完全な再現はできていませんが公開してみます。

・YamarecoDownloader V1.0

#
# YamarecoDownloader V1.0 (c)dari88@yamareco 2013/11/16
#

require "open-uri"


# proxyを使う場合は2個目のoptionsの頭に#を
proxy_host = "http://your.proxy.jp:8080"
proxy_user = "username"
proxy_passwd = "password"
options = {:proxy_http_basic_authentication => [proxy_host,proxy_user,proxy_passwd] }
options = {:proxy => nil}


def yamaget(yamaid,options)

  # yamaidのHTML取得
  fid = yamaid[0..(yamaid.length-5)]
  url = "http://www.yamareco.com/modules/yamareco/detail-" + yamaid + ".html"
  html = open(url,options).read

  # 原画取得とパスの変更
  html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/include\/tmp_imgresize.*fname=)(\w*\.jpg|\w*\.JPG)/) do |match|

    url="http://yamareco.info/modules/yamareco/upimg/"+fid+"/"+yamaid+"/"+$2
    # print url,"\n"

    # ready filepath
    fileName = File.basename(url)
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName

    # create folder if not exist
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)

    # write data
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end

    print "*"
    "./" + filePath
  end
  print "\n"

  # サムネイル原画取得とパスの変更
  html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/upimg\/#{fid}\/#{yamaid}\/)(t_\w*\.jpg|t_\w*\.JPG)/) do |match|
    url="http://yamareco.info/modules/yamareco/upimg/"+fid+"/"+yamaid+"/"+$2
    fileName = File.basename(url)
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    "./" + filePath
  end
  print "\n"

  # 元サイズへのパスの変更
  html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/upimg\/\d+\/\d+\/)(\w*\.jpg|\w*\.JPG)/) do |match|
    dirName = "yama"+yamaid
    "./" + dirName +"/"+ $2
  end


  # js取得とパスの変更
  html = html.gsub(/(http:\/\/[^"']*?\.js)/) do |match|
    url=$1
    fileName = File.basename(url)
    dirName = "script"
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    "./" + filePath
  end
  print "\n"
  html = html.gsub(/src="(include\/cheer.js)"/) do |match|
    #  url="http://www.yamareco.com/modules/yamareco/"+$1
    url="http://yamareco.info/modules/yamareco/"+$1

    fileName = File.basename(url)
    dirName = "script"
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    'src="./' + filePath + '"'
  end
  print "\n"

  # swf取得とパスの変更
  html = html.gsub(/(include\/ofc\/open-flash-chart.swf)/) do |match|
    url="http://yamareco.info/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "script"
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    "./" + filePath
  end
  print "\n"

  # css及びその画像取得とパスの変更
  html = html.gsub(/http:\/\/[^"']*?\.css/) do |match|
    url=match
    fileName = File.basename(url)
    dirName = "script"
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        css = data.read
        output.write(css)

        css.scan(/url\*1/) do |match2|
          url2 = $1.gsub('"',"")
          fileName2 = File.basename(url2)
          dirName2 = "script/"+File.dirname(url2)
          filePath2 = dirName2 +"/"+ fileName2
          FileUtils.mkdir_p(dirName2) unless FileTest.exist?(dirName2)
          url3=File.dirname(url)+"/"+url2
          open(filePath2, 'wb') do |output2|
            begin
              open(url3,options) do |data2|
                output2.write(data2.read)
              end
            rescue
              print "x"
              next
            end
          end
        end
      end
    end
    print "*"
    "./" + filePath
  end
  print "\n"


  # gif,png取得とパスの変更
  html = html.gsub(/(http:\/\/yamareco|http:\/\/www\.yamareco)[^"']*?\.(gif|png)/) do |match|
    url=match
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    "./" + filePath
  end
  print "\n"


  # さらにgif,png取得とパスの変更
  html = html.gsub(/src=["']([^:\.]*?\.(gif|png))(["'])/) do |match|
    url="http://yamareco.info/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      begin
        open(url,options) do |data|
          output.write(data.read)
        end
      rescue
        print "Not found: ",url,"\n"
        next
      end
    end
    print "*"
    "src=" + $3 + "./" + filePath + $3
  end
  print "\n"


  # jpg取得とパスの変更
  html = html.gsub(/http:\/\/www.yamareco[^"']*\.(jpg|JPG)/) do |match|
    url=match
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    "./" + filePath
  end
  print "\n"

  # gpx,xml取得とパスの変更
  html = html.gsub(/= *?["']([^"':]*?\.(gpx|xml))["']/) do |match|
    url="http://www.yamareco.com/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    '="./' + filePath + '"'
  end
  print "\n"

  # グラフ用ファイル取得とパスの変更
  html = html.gsub(/include%2Fofc%2Fofc_gendata_line.php%3Fdid%3D(\d+?)%26xmode%3Ddistance/) do |match|
    url="http://yamareco.info/modules/yamareco/include/ofc/ofc_gendata_line.php?did="+$1+"&xmode=distance"
    fileName = "gendata"+$1+".dat"
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    "./" + filePath
  end
  print "\n"


  # 添付ファイル取得とパスの変更
  html = html.gsub(/(showfile\.php\?fid=(\d+?))(["'])>(.+?(\.[^.]+?))</) do |match|
    url="http://www.yamareco.com/modules/yamareco/"+$1
    fileName = $4
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ "file"+$2+$5
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
    "./" + filePath + $3 + ">"+$4+"<"
  end
  print "\n"


  # HTML書き込み
  fileName = "yama"+yamaid+".html"
  dirName = "."
  filePath = dirName +"/"+ fileName
  FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
  open(filePath, 'wb') do |output|
    output.write(html)
  end

  # jsの書き換え
  open("script/highslide.config.js","r") do |output|
    js = output.read.encode("EUC-JP","EUC-JP")
    js=js.sub(/include\/highslide\/graphics\//,"script/graphics/")
    open("script/highslide.config.js","w") do |output|
      output.write(js)
    end
  end

  open("script/highslide-with-gallery_mod.js","r") do |output|
    js = output.read
    js=js.sub(/highslide\/graphics\//,"script/graphics/")
    open("script/highslide-with-gallery_mod.js","w") do |output|
      output.write(js)
    end
  end

  # その他パーツの取得
  def partget(url,dirName,options)
    fileName = File.basename(url)
    filePath = dirName +"/"+ fileName
    FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
    open(filePath, 'wb') do |output|
      open(url,options) do |data|
        output.write(data.read)
      end
    end
    print "*"
  end

  url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/outlines/custom.png"
  dirName = "script/graphics/outlines"
  partget(url,dirName,options)

  url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/zoomin.cur"
  dirName = "script/graphics"
  partget(url,dirName,options)

  url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/zoomout.cur"
  dirName = "script/graphics"
  partget(url,dirName,options)

  print "\n"

end #++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++defの終わり



#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++indexの始まり

print "Enter your yamareco user number.: "
uid = STDIN.gets.match(/\d+/)
uid = uid[0]
print "UID: "+uid,"\n"


url = "http://www.yamareco.com/modules/yamareco/userinfo-"+uid+"-data.html"

# WEBサイト取得
index = open(url,options).read

index.match(/href=userinfo.php\?pnum=(\d+?)&[^>]+?go to last page/) do |match|
  $lastpage=$1.to_i
end

$table=""
for pnum in 1..$lastpage do
  url = "http://www.yamareco.com/modules/yamareco/userinfo.php?pnum=#{pnum}&act=data&req_uid=#{uid}"
  index = open(url,options).read
  index.match(/<table>.*?<\/table>/m) do |match|
    $top=$`
    $table+=$&
    $bottom=$'
  end
end
index=$top+$table+$bottom


# サムネイル原画取得とパスの変更
index = index.gsub(/(http:\/\/www\.yamareco\.com\/include\/imgresize\.php.*?)(t_\w*\.jpg|t_\w*\.JPG)/) do |match|
  
  url=URI.decode(match.sub(/amp;/,""))
  fileName = File.basename(url)
  dirName = "index"+uid
  filePath = dirName +"/"+ fileName
  FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
  open(filePath, 'wb') do |output|
    open(url,options) do |data|
      output.write(data.read)
    end
  end
  print "*"
  "./" + filePath
end
print "\n"

# yamaへのパスの変更及びyamagetの呼び出し
index = index.gsub(/http:\/\/www\.yamareco\.com\/modules\/yamareco\/detail-(\d+?)\.html/) do |match|
  "./yama" + $1 + ".html"
end
index = index.gsub(/detail-(\d+?)\.html/) do |match|
  yamaid=$1
  "./yama" + yamaid + ".html"
  yamaget(yamaid,options)
  print "GET: yama" + yamaid , "\n"
end

# css及びその画像取得とパスの変更
index = index.gsub(/http:\/\/[^"']*?\.css/) do |match|
  url=match
  fileName = File.basename(url)
  dirName = "script"
  filePath = dirName +"/"+ fileName
  FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
  open(filePath, 'wb') do |output|
    open(url,options) do |data|
      css = data.read
      output.write(css)

      css.scan(/url\*2/) do |match2|
        url2 = $1.gsub('"',"")
        fileName2 = File.basename(url2)
        dirName2 = "script/"+File.dirname(url2)
        filePath2 = dirName2 +"/"+ fileName2
        FileUtils.mkdir_p(dirName2) unless FileTest.exist?(dirName2)
        url3=File.dirname(url)+"/"+url2
        open(filePath2, 'wb') do |output2|
          begin
            open(url3,options) do |data2|
              output2.write(data2.read)
            end
          rescue
            print "x"
            next
          end
        end
      end
    end
  end
  print "*"
  "./" + filePath
end
print "\n"


# gif,png取得とパスの変更
index = index.gsub(/(http:\/\/yamareco|http:\/\/www\.yamareco)[^"']*?\.(gif|png)/) do |match|
  url=match
  fileName = File.basename(url)
  dirName = "images"
  filePath = dirName +"/"+ fileName
  FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
  open(filePath, 'wb') do |output|
    open(url,options) do |data|
      output.write(data.read)
    end
  end
  print "*"
  "./" + filePath
end
print "\n"


# さらにgif,png取得とパスの変更
index = index.gsub(/src=["']([^:\.]*?\.(gif|png))(["'])/) do |match|
  url="http://yamareco.info/modules/yamareco/"+$1
  fileName = File.basename(url)
  dirName = "images"
  filePath = dirName +"/"+ fileName
  FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)
  open(filePath, 'wb') do |output|
    begin
      open(url,options) do |data|
        output.write(data.read)
      end
    rescue
      print "Not found: ",url,"\n"
      next
    end
  end
  print "*"
  "src=" + $2 + "./" + filePath + $2
end
print "\n"


# HTML書き込み
fileName = "index"+uid+".html"
dirName = "."
filePath = dirName +"/"+ fileName

FileUtils.mkdir_p(dirName) unless FileTest.exist?(dirName)

open(filePath, 'wb') do |output|
  output.write(index)
end

print "YamarecoDownloader: Mission accomplished.","\n\n"


 

 500行に満たないコードですが、作るのに一週間ほど要しました。HighSlideによる写真のスライドショーが動かず、これの解析に相当時間を費やしました。現時点では標高グラフの描画が全くできていないので、これから解析してみたいと思います。

 

 

*1:.+?\.(png|gif

*2:.+?\.(png|gif