#!/usr/bin/env perl use strict; use warnings; use WWW::Mechanize; use Web::Scraper; use utf8; use Encode; my $mech = 'WWW::Mechanize'->new('autocheck', 1); $mech->get('http://www.ablog.co.jp/login'); $mech->submit_form('fields', {'name', 'foo', 'password', 'bar'}); $mech->get('http://www.ablog.co.jp/contract'); $mech->submit_form(); my $scraper = scraper { use utf8; process '/html/body/div/div[3]/table/tbody/tr', 'list[]' => scraper { process '//td[3]', contract => 'TEXT'; process '//td[4]', status => 'TEXT'; process '//td[5]', expired => 'TEXT'; process '//td[6]', customer => 'TEXT'; process '//td[7]', group => 'TEXT'; process '//td[8]', permission => 'TEXT'; process '//td[9]', notes => 'TEXT'; } }; my $res = $scraper->scrape($mech->content); for my $list (@{ $res->{list} }) { for (values(%$list)) { s/\s+//sg; } } &write_csv("contract.csv", $res->{list}); sub write_csv { my ($name, $data) = @_; open my $csv, ">:encoding(utf8)", $name or die $!; for my $code (@$data) { printf $csv qq{"%s","%s","%s","%s","%s","%s"\n}, @{$code}{qw/contract status expired customer group permission/}; } close $csv; } __END__
追記(2010/03/24):
s/\s+//sg; のあたりはWeb::Scraperのfilterでやったほうが手っ取り早いかも?:
WWW::Mechanize と Web::Scraper を使って HTML からテキストを抽出して CSV を作成する Perl スクリプト - ablog
process '//td[3]', contract => ['TEXT', sub {s/\s+//sg;} ];
と id:otsune さんにコメントを頂いたので、教えて頂いた書き方で書きなおしてみた。
#!/usr/bin/env perl use strict; use warnings; use WWW::Mechanize; use Web::Scraper; use utf8; use Encode; my $mech = 'WWW::Mechanize'->new('autocheck', 1); $mech->get('http://www.ablog.co.jp/login'); $mech->submit_form('fields', {'name', 'foo', 'password', 'bar'}); $mech->get('http://www.ablog.co.jp/contract'); $mech->submit_form(); my $scraper = scraper { use utf8; process '/html/body/div/div[3]/table/tbody/tr', 'list[]' => scraper { process '//td[3]', contract => ['TEXT', sub {s/\s+//sg;} ]; process '//td[4]', status => ['TEXT', sub {s/\s+//sg;} ]; process '//td[5]', expired => ['TEXT', sub {s/\s+//sg;} ]; process '//td[6]', customer => ['TEXT', sub {s/\s+//sg;} ]; process '//td[7]', group => ['TEXT', sub {s/\s+//sg;} ]; process '//td[8]', permission => ['TEXT', sub {s/\s+//sg;} ]; process '//td[9]', notes => ['TEXT', sub {s/\s+//sg;} ]; } }; my $res = $scraper->scrape($mech->content); &write_csv("contract.csv", $res->{list}); sub write_csv { my ($name, $data) = @_; open my $csv, ">:encoding(utf8)", $name or die $!; for my $code (@$data) { printf $csv qq{"%s","%s","%s","%s","%s","%s"\n}, @{$code}{qw/contract status expired customer group permission/}; } close $csv; } __END__