#!ruby
require 'Kconv'
require 'Net/HTTP'
# 変数定義
Start_str = %q!
!
Head_str = %Q!"県名","所在地","目標"\n!
body_str = ''
target_str = ''
print_flag = false
Host_str = 'www.ntt-east.co.jp'
Path_str = '/cgi-bin/ptd/icpublic/result02.pl'
Max_num = 5
Outfile = 'ICCardList_E.csv'
Outfile2 = 'ICCardList_'
# HTTPで接続
http = Net::HTTP.new( Host_str )
http.start
print "connected: #{Host_str}\n"
# 各ファイルを一つづつ get
Max_num.times{|x|
if x == 0 then
str = Path_str
else
str = Path_str + '?page=' + (x+1).to_s + '&kw='
end
print "get: #{str}\n"
response = http.get2( str )
print "finish.\n"
body_str += response.body.to_s
}
http.finish
print "disconnected.\n"
# HTML全体から必要な部分の切り出し
print "get target text.\n"
body_str.each{|line|
if print_flag == false and line =~ /#{Start_str}/ then
print_flag = true
next
elsif print_flag then
if line =~ /#{End_str}/ then
print_flag = false
next
end
line.chop!
target_str += line
end
}
print "finish.\n"
# CSV形式へ変換
print "text -> csv\n"
target_str.gsub!("", '')
target_str.gsub!("<\/tr>", "\n")
target_str.gsub!("<\/td>", %q!","!)
target_str.gsub!("<(|\/)td>", %q!"!)
target_str = Head_str + target_str
print "finish.\n"
# 文字コードを変換
target_str = Kconv.tosjis( target_str )
# ファイルに書き出す
print "write file #{Outfile}\n"
out = open( Outfile, 'w' )
out.write target_str
out.close
print "finish.\n"
# 分割して書き出す
count = 0
word = ''
out2 = nil
target_str.each{|line|
if count == 0 then
count += 1
next
end
/(.*),.*,.*/ === line
if $1 != word then
if out2 != nil
out2.close
print "finish.\n"
end
print "write file #{Outfile2}#{count}.csv\n"
out2 = open( "#{Outfile2}#{count}.csv", 'w' )
out2.write Head_str
out2.write line
count += 1
else
out2.write line
end
word = $1
}
exit
|