dari88's diary

これから趣味にするプログラミング/PHP/javascript/kohana/CMS/web design/

ヤマレコダウンローダー V1.5

 ヤマレコの山行記録を一括でダウンロードするプログラム YamarecoDownloader のバージョンアップはV1.5まで進みました。枯れてきた感じがしますので、Rubyのコードを公開しておきます。

主な改善点

 V1.0以降の主な改善点は以下の通りです。

・HTTPヘッダに"User-Agent"を追加した。
・サイトにファイルが無いとopen(url)がエラーを発生することの対策。
・エラー時に空ファイルが書き込まれるので、open(file)とopen(url)の順番を入れ替え。
・コンソールに日本語表示。
・サーバーの負荷軽減対策。取得済みの山行記録は飛ばす、取得済みのパーツ画像は飛ばす。結果的に少し高速化。
・特定の山行記録だけを取得するコマンドを追加。
・src = " abc/def " などの記述におけるスペース" "有無等の配慮で、正規表現を見なおした。
・山行記録が1ページ分に満たない場合の対策。

ヤマレコダウンローダー V1.5

 コードです。

#! ruby -Ku
#
# YamarecoDownloader V1.5 (c)dari88@yamareco 2013/11/21
#

require "open-uri"


# proxyを使う場合は2個目のoptionsの頭に#を
proxy_host = "http://your.proxy.jp:8080"
proxy_user = "username"
proxy_passwd = "password"
options = {:proxy_http_basic_authentication => [proxy_host,proxy_user,proxy_passwd],
  "User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0" }
options = {:proxy => nil,"User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0"}

def fget(url,dirName,filePath,options,ow=0)
  if File.exist?(filePath)&&(ow==0)
    print "."
  else
    FileUtils.mkdir_p(dirName) unless File.exist?(dirName)
    begin
      open(url,options) do |data|
        open(filePath, 'wb') do |output|
          output.write(data.read)
        end
      end
      print "*"
    rescue
      print "x"
    end
  end
end

def yamaget(yamaid,options)
  test=0

  # yamaidのHTML取得
  if (test==1) ; print "HTML start","\n" ; end
  fid = yamaid[0..(yamaid.length-5)]
  url = "http://www.yamareco.com/modules/yamareco/detail-" + yamaid + ".html"
  html = open(url,options).read

  # 原画取得とパスの変更
  if (test==1) ; print "原画 start","\n" ; end
  html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/include\/tmp_imgresize.*?fname=)(\w*\.jpg|\w*\.JPG)/) do |match|
    url="http://yamareco.info/modules/yamareco/upimg/"+fid+"/"+yamaid+"/"+$2
    fileName = File.basename(url)
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "./" + filePath
  end
  print "\n"

  # サムネイル原画取得とパスの変更
  if (test==1) ; print "サムネイル start","\n" ; end
  html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/upimg\/#{fid}\/#{yamaid}\/)(t_\w*\.jpg|t_\w*\.JPG)/) do |match|
    url="http://yamareco.info/modules/yamareco/upimg/"+fid+"/"+yamaid+"/"+$2
    fileName = File.basename(url)
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "./" + filePath
  end
  print "\n"

  # 元サイズへのパスの変更
  html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/upimg\/\d+\/\d+\/)(\w*\.jpg|\w*\.JPG)/) do |match|
    dirName = "yama"+yamaid
    "./" + dirName +"/"+ $2
  end

  # js取得とパスの変更
  if (test==1) ; print "JS start","\n" ; end
  html = html.gsub(/(http:\/\/www\.yamareco\.com\/|http:\/\/yamareco\.info\/)([^"']*\/([^"']*?\.js))/) do |match|
    url="http://yamareco.info/"+$2
    fileName = File.basename(url)
    dirName = "script"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options,ow=1)
    "./" + filePath
  end
  html = html.gsub(/src *= *" *(include\/cheer.js) *"/) do |match|
    url="http://yamareco.info/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "script"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options,ow=1)
    'src="./' + filePath + '"'
  end
  print "\n"

  # swf取得とパスの変更
  if (test==1) ; print "SWF start","\n" ; end
  html = html.gsub(/(include\/ofc\/open-flash-chart.swf)/) do |match|
    url="http://yamareco.info/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "script"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "./" + filePath
  end
  print "\n"

  # css及びその画像取得とパスの変更
  if (test==1) ; print "CSS start","\n" ; end
  html = html.gsub(/http:\/\/yamareco\.info[^"']*?\/(.*?\.css)/) do |match0|
    url0=match0
    if (test==1) ; print url0,"\n" ; end
    fileName0 = File.basename(url0)
    dirName0 = "script"
    filePath0 = dirName0 +"/"+ fileName0
    FileUtils.mkdir_p(dirName0) unless File.exist?(dirName0)
    open(filePath0, 'wb') do |output|
      open(url0,options) do |data|
        css = data.read
        output.write(css)

        css.scan(/url *\( *( *[^)]+?\.(png|gif))/) do |match|
          url2=$1.gsub(/["' ]/,"")
          url=File.dirname(url0)+"/"+url2
          fileName = File.basename(url)
          dirName = "script/"+File.dirname(url2)
          filePath = dirName +"/"+ fileName
          fget(url,dirName,filePath,options)
        end
        
      end
    end
    print "\n"
    "./" + filePath0
  end

  # gif,png取得1とパスの変更
  if (test==1) ; print "GIF/PNG1 start","\n" ; end
  before=""
  html = html.gsub(/src *= *["'] *([^"':]*?\.(gif|png)) *(["'])/) do |match|
    url="http://yamareco.info/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "src=" + $3 + "./" + filePath + $3
  end
  print "\n"

  # gif,png取得2とパスの変更
  if (test==1) ; print "GIF/PNG2 start","\n" ; end
  before=""
  html = html.gsub(/(http:\/\/yamareco\.info\/|http:\/\/www\.yamareco\.com\/)([^"']*?\.(gif|png))/) do |match|
    url="http://yamareco.info/"+$2
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "./" + filePath
  end
  print "\n"

  # jpg取得とパスの変更
  if (test==1) ; print "JPG start","\n" ; end
  html = html.gsub(/http:\/\/www.yamareco[^"']*?\.(jpg|JPG)/) do |match|
    url=match
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "./" + filePath
  end
  print "\n"

  # gpx,xml取得とパスの変更
  if (test==1) ; print "GPX/XML start","\n" ; end
  html = html.gsub(/= *["'] *([^"':]*?\.(gpx|xml)) *["']/) do |match|
    url="http://www.yamareco.com/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options,ow=1)
    '="./' + filePath + '"'
  end
  print "\n"

  # グラフ用ファイル取得とパスの変更
  if (test==1) ; print "グラフデータ start","\n" ; end
  html = html.gsub(/include%2Fofc%2Fofc_gendata_line.php%3Fdid%3D(\d+?)%26xmode%3Ddistance/) do |match|
    url="http://yamareco.info/modules/yamareco/include/ofc/ofc_gendata_line.php?did="+$1+"&xmode=distance"
    fileName = "gendata"+$1+".dat"
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options,ow=1)
    "./" + filePath
  end
  print "\n"

  # 添付ファイル取得とパスの変更
  if (test==1) ; print "添付ファイル start","\n" ; end
  html = html.gsub(/(showfile\.php\?fid=(\d+?)) *(["']) *> *([^<]+?(\.\w+?)) *</) do |match|
    url="http://www.yamareco.com/modules/yamareco/"+$1
    fileName = $4
    dirName = "yama"+yamaid
    filePath = dirName +"/"+ "file"+$2+$5
    fget(url,dirName,filePath,options,ow=1)
    "./" + filePath + $3 + ">"+$4+"<"
  end
  print "\n"

  # HTML書き込み
  fileName = "yama"+yamaid+".html"
  dirName = "."
  filePath = dirName +"/"+ fileName
  FileUtils.mkdir_p(dirName) unless File.exist?(dirName)
  open(filePath, 'wb') do |output|
    output.write(html)
  end

  # jsの書き換え
  if File.exist?(filePath="script/highslide.config.js")
    open(filePath,"r") do |output|
      js = output.read.encode("EUC-JP","EUC-JP")
      js=js.sub(/include\/highslide\/graphics\//,"script/graphics/")
      open(filePath,"w") do |output|
        output.write(js)
      end
    end
  end

  if File.exist?(filePath="script/highslide-with-gallery_mod.js")
    open(filePath,"r") do |output|
      js = output.read
      js=js.sub(/highslide\/graphics\//,"script/graphics/")
      open(filePath,"w") do |output|
        output.write(js)
      end
    end
  end

  # その他パーツの取得
  if (test==1) ; print "その他パーツ start","\n" ; end
  def partget(url,dirName,options)
    fileName = File.basename(url)
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
  end

  url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/outlines/custom.png"
  dirName = "script/graphics/outlines"
  partget(url,dirName,options)

  url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/zoomin.cur"
  dirName = "script/graphics"
  partget(url,dirName,options)

  url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/zoomout.cur"
  dirName = "script/graphics"
  partget(url,dirName,options)

  print "\n"

end
# defの終わり

def allget(mode,options)
  test=0

  print "ユーザー番号を入力して下さい: "
  uid = STDIN.gets.match(/\d+/)
  uid = uid[0]
  print "UID: "+uid,"\n"

  url = "http://www.yamareco.com/modules/yamareco/userinfo-"+uid+"-data.html"

  # WEBサイト取得
  if (test==1) ; print "ALL start","\n" ; end
  index = open(url,options).read
  $lastpage=1
  index.match(/href=userinfo.php\?pnum=(\d+?)&[^>]+?go to last page/) do |match|
    $lastpage=$1.to_i
  end

  $table=""
  for pnum in 1..$lastpage do
    url = "http://www.yamareco.com/modules/yamareco/userinfo.php?pnum=#{pnum}&act=data&req_uid=#{uid}"
    index = open(url,options).read
    index.match(/<table>.*?<\/table>/m) do |match|
      $top=$`
      $table+=$&
      $bottom=$'
    end
  end
  index=$top+$table+$bottom

  # サムネイル原画取得とパスの変更
  if (test==1) ; print "サムネイル start","\n" ; end
  index = index.gsub(/(http:\/\/www\.yamareco\.com\/include\/imgresize\.php[^"']*?)(t_\w*\.jpg|t_\w*\.JPG)/) do |match|
    url=URI.decode(match.sub(/amp;/,""))
    fileName = File.basename(url)
    dirName = "index"+uid
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "./" + filePath
  end
  print "\n"

  # yamaへのパスの変更及びyamagetの呼び出し
  index = index.gsub(/http:\/\/www\.yamareco\.com\/modules\/yamareco\/detail-(\d+?)\.html/) do |match|
    "./yama" + $1 + ".html"
  end
  index = index.gsub(/detail-(\d+?)\.html/) do |match|
    yamaid=$1
    if (mode=="all")
      print "GET: yama" + yamaid , "\n"
      yamaget(yamaid,options)
    else
      fileName = "yama" + yamaid +".html"
      if (File.exist?(fileName))
        #do nothing
      else
        print "GET: yama" + yamaid , "\n"
        yamaget(yamaid,options)
      end
    end
    "./yama" + yamaid + ".html"
  end

  # css及びその画像取得とパスの変更
  if (test==1) ; print "CSS start","\n" ; end
  index = index.gsub(/http:\/\/yamareco\.info[^"']*?\/(.*?\.css)/) do |match0|
    url0=match0
    if (test==1) ; print url0,"\n" ; end
    fileName0 = File.basename(url0)
    dirName0 = "script"
    filePath0 = dirName0 +"/"+ fileName0
    FileUtils.mkdir_p(dirName0) unless File.exist?(dirName0)
    open(filePath0, 'wb') do |output|
      open(url0,options) do |data|
        css = data.read
        output.write(css)

        css.scan(/url *\( *( *[^)]+?\.(png|gif))/) do |match|
          url2=$1.gsub(/["' ]/,"")
          url=File.dirname(url0)+"/"+url2
          fileName = File.basename(url)
          dirName = "script/"+File.dirname(url2)
          filePath = dirName +"/"+ fileName
          fget(url,dirName,filePath,options)
        end
        
      end
    end
    print "\n"
    "./" + filePath0
  end

  # gif,png取得1とパスの変更
  if (test==1) ; print "GIF/PNG1 start","\n" ; end
  before=""
  index = index.gsub(/src *= *["'] *([^"':]*?\.(gif|png)) *(["'])/) do |match|
    url="http://yamareco.info/modules/yamareco/"+$1
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "src=" + $3 + "./" + filePath + $3
  end
  print "\n"

  # gif,png取得2とパスの変更
  if (test==1) ; print "GIF/PNG2 start","\n" ; end
  before=""
  index = index.gsub(/(http:\/\/yamareco\.info\/|http:\/\/www\.yamareco\.com\/)([^"']*?\.(gif|png))/) do |match|
    url="http://yamareco.info/"+$2
    fileName = File.basename(url)
    dirName = "images"
    filePath = dirName +"/"+ fileName
    fget(url,dirName,filePath,options)
    "./" + filePath
  end
  print "\n"

  # HTML書き込み
  fileName = "index"+uid+".html"
  dirName = "."
  filePath = dirName +"/"+ fileName
  FileUtils.mkdir_p(dirName) unless File.exist?(dirName)
  open(filePath, 'wb') do |output|
    output.write(index)
  end

end
# defの終わり

print "<YamarecoDownloader V1.5 (c)dari88@yamareco>","\n","\n"
print "Enterキーでダウンロードを開始します(標準は未取得の山行記録のみ)","\n"
print "・全ての山行記録を取得する場合は all と入力して下さい","\n"
print "・特定の山行記録だけを取得する場合は山行記録の番号を入力して下さい","\n"
print "Enter> "
mode = STDIN.gets.match(/\w*/)
mode = mode[0]
print "\n"

if (mode == "")
  allget(mode,options)
end

mode.match(/all/) do |match|
  allget(mode,options)
end

mode.match(/\d+/) do |match|
  yamaid = mode
  yamaget(yamaid,options)
end

exit=0
while (exit==0)
  print "プログラムを終了する場合はEnterキーを押して下さい","\n"
  print "・再度特定の山行記録だけを取得する場合は山行記録の番号を入力して下さい","\n"
  print "Enter> "
  mode = STDIN.gets.match(/\d*/)
  mode = mode[0]
  print "\n"
  if (mode=="")
    exit=1
  else
    yamaid = mode
    yamaget(yamaid,options)
  end
end
print "終了します","\n"
sleep(1)