#!ruby require 'Kconv' require 'Net/HTTP' # 変数定義 Start_str = %q!! End_str = %q!
! Head_str = %Q!"県名","所在地","目標"\n! body_str = '' target_str = '' print_flag = false Host_str = 'www.ntt-east.co.jp' Path_str = '/cgi-bin/ptd/icpublic/result02.pl' Max_num = 5 Outfile = 'ICCardList_E.csv' Outfile2 = 'ICCardList_' # HTTPで接続 http = Net::HTTP.new( Host_str ) http.start print "connected: #{Host_str}\n" # 各ファイルを一つづつ get Max_num.times{|x| if x == 0 then str = Path_str else str = Path_str + '?page=' + (x+1).to_s + '&kw=' end print "get: #{str}\n" response = http.get2( str ) print "finish.\n" body_str += response.body.to_s } http.finish print "disconnected.\n" # HTML全体から必要な部分の切り出し print "get target text.\n" body_str.each{|line| if print_flag == false and line =~ /#{Start_str}/ then print_flag = true next elsif print_flag then if line =~ /#{End_str}/ then print_flag = false next end line.chop! target_str += line end } print "finish.\n" # CSV形式へ変換 print "text -> csv\n" target_str.gsub!("", '') target_str.gsub!("<\/tr>", "\n") target_str.gsub!("<\/td>", %q!","!) target_str.gsub!("<(|\/)td>", %q!"!) target_str = Head_str + target_str print "finish.\n" # 文字コードを変換 target_str = Kconv.tosjis( target_str ) # ファイルに書き出す print "write file #{Outfile}\n" out = open( Outfile, 'w' ) out.write target_str out.close print "finish.\n" # 分割して書き出す count = 0 word = '' out2 = nil target_str.each{|line| if count == 0 then count += 1 next end /(.*),.*,.*/ === line if $1 != word then if out2 != nil out2.close print "finish.\n" end print "write file #{Outfile2}#{count}.csv\n" out2 = open( "#{Outfile2}#{count}.csv", 'w' ) out2.write Head_str out2.write line count += 1 else out2.write line end word = $1 } exit