ヤマレコダウンローダー V1.5
ヤマレコの山行記録を一括でダウンロードするプログラム YamarecoDownloader のバージョンアップはV1.5まで進みました。枯れてきた感じがしますので、Rubyのコードを公開しておきます。
主な改善点
V1.0以降の主な改善点は以下の通りです。
・HTTPヘッダに"User-Agent"を追加した。
・サイトにファイルが無いとopen(url)がエラーを発生することの対策。
・エラー時に空ファイルが書き込まれるので、open(file)とopen(url)の順番を入れ替え。
・コンソールに日本語表示。
・サーバーの負荷軽減対策。取得済みの山行記録は飛ばす、取得済みのパーツ画像は飛ばす。結果的に少し高速化。
・特定の山行記録だけを取得するコマンドを追加。
・src = " abc/def " などの記述におけるスペース" "有無等の配慮で、正規表現を見なおした。
・山行記録が1ページ分に満たない場合の対策。
ヤマレコダウンローダー V1.5
コードです。
#! ruby -Ku # # YamarecoDownloader V1.5 (c)dari88@yamareco 2013/11/21 # require "open-uri" # proxyを使う場合は2個目のoptionsの頭に#を proxy_host = "http://your.proxy.jp:8080" proxy_user = "username" proxy_passwd = "password" options = {:proxy_http_basic_authentication => [proxy_host,proxy_user,proxy_passwd], "User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0" } options = {:proxy => nil,"User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0"} def fget(url,dirName,filePath,options,ow=0) if File.exist?(filePath)&&(ow==0) print "." else FileUtils.mkdir_p(dirName) unless File.exist?(dirName) begin open(url,options) do |data| open(filePath, 'wb') do |output| output.write(data.read) end end print "*" rescue print "x" end end end def yamaget(yamaid,options) test=0 # yamaidのHTML取得 if (test==1) ; print "HTML start","\n" ; end fid = yamaid[0..(yamaid.length-5)] url = "http://www.yamareco.com/modules/yamareco/detail-" + yamaid + ".html" html = open(url,options).read # 原画取得とパスの変更 if (test==1) ; print "原画 start","\n" ; end html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/include\/tmp_imgresize.*?fname=)(\w*\.jpg|\w*\.JPG)/) do |match| url="http://yamareco.info/modules/yamareco/upimg/"+fid+"/"+yamaid+"/"+$2 fileName = File.basename(url) dirName = "yama"+yamaid filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "./" + filePath end print "\n" # サムネイル原画取得とパスの変更 if (test==1) ; print "サムネイル start","\n" ; end html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/upimg\/#{fid}\/#{yamaid}\/)(t_\w*\.jpg|t_\w*\.JPG)/) do |match| url="http://yamareco.info/modules/yamareco/upimg/"+fid+"/"+yamaid+"/"+$2 fileName = File.basename(url) dirName = "yama"+yamaid filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "./" + filePath end print "\n" # 元サイズへのパスの変更 html = html.gsub(/(http:\/\/yamareco.info\/modules\/yamareco\/upimg\/\d+\/\d+\/)(\w*\.jpg|\w*\.JPG)/) do |match| dirName = "yama"+yamaid "./" + dirName +"/"+ $2 end # js取得とパスの変更 if (test==1) ; print "JS start","\n" ; end html = html.gsub(/(http:\/\/www\.yamareco\.com\/|http:\/\/yamareco\.info\/)([^"']*\/([^"']*?\.js))/) do |match| url="http://yamareco.info/"+$2 fileName = File.basename(url) dirName = "script" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options,ow=1) "./" + filePath end html = html.gsub(/src *= *" *(include\/cheer.js) *"/) do |match| url="http://yamareco.info/modules/yamareco/"+$1 fileName = File.basename(url) dirName = "script" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options,ow=1) 'src="./' + filePath + '"' end print "\n" # swf取得とパスの変更 if (test==1) ; print "SWF start","\n" ; end html = html.gsub(/(include\/ofc\/open-flash-chart.swf)/) do |match| url="http://yamareco.info/modules/yamareco/"+$1 fileName = File.basename(url) dirName = "script" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "./" + filePath end print "\n" # css及びその画像取得とパスの変更 if (test==1) ; print "CSS start","\n" ; end html = html.gsub(/http:\/\/yamareco\.info[^"']*?\/(.*?\.css)/) do |match0| url0=match0 if (test==1) ; print url0,"\n" ; end fileName0 = File.basename(url0) dirName0 = "script" filePath0 = dirName0 +"/"+ fileName0 FileUtils.mkdir_p(dirName0) unless File.exist?(dirName0) open(filePath0, 'wb') do |output| open(url0,options) do |data| css = data.read output.write(css) css.scan(/url *\( *( *[^)]+?\.(png|gif))/) do |match| url2=$1.gsub(/["' ]/,"") url=File.dirname(url0)+"/"+url2 fileName = File.basename(url) dirName = "script/"+File.dirname(url2) filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) end end end print "\n" "./" + filePath0 end # gif,png取得1とパスの変更 if (test==1) ; print "GIF/PNG1 start","\n" ; end before="" html = html.gsub(/src *= *["'] *([^"':]*?\.(gif|png)) *(["'])/) do |match| url="http://yamareco.info/modules/yamareco/"+$1 fileName = File.basename(url) dirName = "images" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "src=" + $3 + "./" + filePath + $3 end print "\n" # gif,png取得2とパスの変更 if (test==1) ; print "GIF/PNG2 start","\n" ; end before="" html = html.gsub(/(http:\/\/yamareco\.info\/|http:\/\/www\.yamareco\.com\/)([^"']*?\.(gif|png))/) do |match| url="http://yamareco.info/"+$2 fileName = File.basename(url) dirName = "images" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "./" + filePath end print "\n" # jpg取得とパスの変更 if (test==1) ; print "JPG start","\n" ; end html = html.gsub(/http:\/\/www.yamareco[^"']*?\.(jpg|JPG)/) do |match| url=match fileName = File.basename(url) dirName = "images" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "./" + filePath end print "\n" # gpx,xml取得とパスの変更 if (test==1) ; print "GPX/XML start","\n" ; end html = html.gsub(/= *["'] *([^"':]*?\.(gpx|xml)) *["']/) do |match| url="http://www.yamareco.com/modules/yamareco/"+$1 fileName = File.basename(url) dirName = "yama"+yamaid filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options,ow=1) '="./' + filePath + '"' end print "\n" # グラフ用ファイル取得とパスの変更 if (test==1) ; print "グラフデータ start","\n" ; end html = html.gsub(/include%2Fofc%2Fofc_gendata_line.php%3Fdid%3D(\d+?)%26xmode%3Ddistance/) do |match| url="http://yamareco.info/modules/yamareco/include/ofc/ofc_gendata_line.php?did="+$1+"&xmode=distance" fileName = "gendata"+$1+".dat" dirName = "yama"+yamaid filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options,ow=1) "./" + filePath end print "\n" # 添付ファイル取得とパスの変更 if (test==1) ; print "添付ファイル start","\n" ; end html = html.gsub(/(showfile\.php\?fid=(\d+?)) *(["']) *> *([^<]+?(\.\w+?)) *</) do |match| url="http://www.yamareco.com/modules/yamareco/"+$1 fileName = $4 dirName = "yama"+yamaid filePath = dirName +"/"+ "file"+$2+$5 fget(url,dirName,filePath,options,ow=1) "./" + filePath + $3 + ">"+$4+"<" end print "\n" # HTML書き込み fileName = "yama"+yamaid+".html" dirName = "." filePath = dirName +"/"+ fileName FileUtils.mkdir_p(dirName) unless File.exist?(dirName) open(filePath, 'wb') do |output| output.write(html) end # jsの書き換え if File.exist?(filePath="script/highslide.config.js") open(filePath,"r") do |output| js = output.read.encode("EUC-JP","EUC-JP") js=js.sub(/include\/highslide\/graphics\//,"script/graphics/") open(filePath,"w") do |output| output.write(js) end end end if File.exist?(filePath="script/highslide-with-gallery_mod.js") open(filePath,"r") do |output| js = output.read js=js.sub(/highslide\/graphics\//,"script/graphics/") open(filePath,"w") do |output| output.write(js) end end end # その他パーツの取得 if (test==1) ; print "その他パーツ start","\n" ; end def partget(url,dirName,options) fileName = File.basename(url) filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) end url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/outlines/custom.png" dirName = "script/graphics/outlines" partget(url,dirName,options) url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/zoomin.cur" dirName = "script/graphics" partget(url,dirName,options) url = "http://yamareco.info/modules/yamareco/include/highslide/graphics/zoomout.cur" dirName = "script/graphics" partget(url,dirName,options) print "\n" end # defの終わり def allget(mode,options) test=0 print "ユーザー番号を入力して下さい: " uid = STDIN.gets.match(/\d+/) uid = uid[0] print "UID: "+uid,"\n" url = "http://www.yamareco.com/modules/yamareco/userinfo-"+uid+"-data.html" # WEBサイト取得 if (test==1) ; print "ALL start","\n" ; end index = open(url,options).read $lastpage=1 index.match(/href=userinfo.php\?pnum=(\d+?)&[^>]+?go to last page/) do |match| $lastpage=$1.to_i end $table="" for pnum in 1..$lastpage do url = "http://www.yamareco.com/modules/yamareco/userinfo.php?pnum=#{pnum}&act=data&req_uid=#{uid}" index = open(url,options).read index.match(/<table>.*?<\/table>/m) do |match| $top=$` $table+=$& $bottom=$' end end index=$top+$table+$bottom # サムネイル原画取得とパスの変更 if (test==1) ; print "サムネイル start","\n" ; end index = index.gsub(/(http:\/\/www\.yamareco\.com\/include\/imgresize\.php[^"']*?)(t_\w*\.jpg|t_\w*\.JPG)/) do |match| url=URI.decode(match.sub(/amp;/,"")) fileName = File.basename(url) dirName = "index"+uid filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "./" + filePath end print "\n" # yamaへのパスの変更及びyamagetの呼び出し index = index.gsub(/http:\/\/www\.yamareco\.com\/modules\/yamareco\/detail-(\d+?)\.html/) do |match| "./yama" + $1 + ".html" end index = index.gsub(/detail-(\d+?)\.html/) do |match| yamaid=$1 if (mode=="all") print "GET: yama" + yamaid , "\n" yamaget(yamaid,options) else fileName = "yama" + yamaid +".html" if (File.exist?(fileName)) #do nothing else print "GET: yama" + yamaid , "\n" yamaget(yamaid,options) end end "./yama" + yamaid + ".html" end # css及びその画像取得とパスの変更 if (test==1) ; print "CSS start","\n" ; end index = index.gsub(/http:\/\/yamareco\.info[^"']*?\/(.*?\.css)/) do |match0| url0=match0 if (test==1) ; print url0,"\n" ; end fileName0 = File.basename(url0) dirName0 = "script" filePath0 = dirName0 +"/"+ fileName0 FileUtils.mkdir_p(dirName0) unless File.exist?(dirName0) open(filePath0, 'wb') do |output| open(url0,options) do |data| css = data.read output.write(css) css.scan(/url *\( *( *[^)]+?\.(png|gif))/) do |match| url2=$1.gsub(/["' ]/,"") url=File.dirname(url0)+"/"+url2 fileName = File.basename(url) dirName = "script/"+File.dirname(url2) filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) end end end print "\n" "./" + filePath0 end # gif,png取得1とパスの変更 if (test==1) ; print "GIF/PNG1 start","\n" ; end before="" index = index.gsub(/src *= *["'] *([^"':]*?\.(gif|png)) *(["'])/) do |match| url="http://yamareco.info/modules/yamareco/"+$1 fileName = File.basename(url) dirName = "images" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "src=" + $3 + "./" + filePath + $3 end print "\n" # gif,png取得2とパスの変更 if (test==1) ; print "GIF/PNG2 start","\n" ; end before="" index = index.gsub(/(http:\/\/yamareco\.info\/|http:\/\/www\.yamareco\.com\/)([^"']*?\.(gif|png))/) do |match| url="http://yamareco.info/"+$2 fileName = File.basename(url) dirName = "images" filePath = dirName +"/"+ fileName fget(url,dirName,filePath,options) "./" + filePath end print "\n" # HTML書き込み fileName = "index"+uid+".html" dirName = "." filePath = dirName +"/"+ fileName FileUtils.mkdir_p(dirName) unless File.exist?(dirName) open(filePath, 'wb') do |output| output.write(index) end end # defの終わり print "<YamarecoDownloader V1.5 (c)dari88@yamareco>","\n","\n" print "Enterキーでダウンロードを開始します(標準は未取得の山行記録のみ)","\n" print "・全ての山行記録を取得する場合は all と入力して下さい","\n" print "・特定の山行記録だけを取得する場合は山行記録の番号を入力して下さい","\n" print "Enter> " mode = STDIN.gets.match(/\w*/) mode = mode[0] print "\n" if (mode == "") allget(mode,options) end mode.match(/all/) do |match| allget(mode,options) end mode.match(/\d+/) do |match| yamaid = mode yamaget(yamaid,options) end exit=0 while (exit==0) print "プログラムを終了する場合はEnterキーを押して下さい","\n" print "・再度特定の山行記録だけを取得する場合は山行記録の番号を入力して下さい","\n" print "Enter> " mode = STDIN.gets.match(/\d*/) mode = mode[0] print "\n" if (mode=="") exit=1 else yamaid = mode yamaget(yamaid,options) end end print "終了します","\n" sleep(1)