ちょっと書いてみた Perl スクリプトをメモっておく。
#!/usr/bin/env perl
use strict;
use warnings;
use WWW::Mechanize;
use Web::Scraper;
use utf8;
use Encode;
my $mech = 'WWW::Mechanize'->new('autocheck', 1);
$mech->get('http://www.ablog.co.jp/login');
$mech->submit_form('fields', {'name', 'foo', 'password', 'bar'});
$mech->get('http://www.ablog.co.jp/contract');
$mech->submit_form();
my $scraper = scraper {
use utf8;
process '/html/body/div/div[3]/table/tbody/tr',
'list[]' => scraper {
process '//td[3]', contract => 'TEXT';
process '//td[4]', status => 'TEXT';
process '//td[5]', expired => 'TEXT';
process '//td[6]', customer => 'TEXT';
process '//td[7]', group => 'TEXT';
process '//td[8]', permission => 'TEXT';
process '//td[9]', notes => 'TEXT';
}
};
my $res = $scraper->scrape($mech->content);
for my $list (@{ $res->{list} }) {
for (values(%$list)) {
s/\s+//sg;
}
}
&write_csv("contract.csv", $res->{list});
sub write_csv {
my ($name, $data) = @_;
open my $csv, ">:encoding(utf8)", $name or die $!;
for my $code (@$data) {
printf $csv qq{"%s","%s","%s","%s","%s","%s"\n}, @{$code}{qw/contract status expired customer group permission/};
}
close $csv;
}
追記(2010/03/24):
s/\s+//sg; のあたりはWeb::Scraperのfilterでやったほうが手っ取り早いかも?:
process '//td[3]', contract => ['TEXT', sub {s/\s+//sg;} ];
WWW::Mechanize と Web::Scraper を使って HTML からテキストを抽出して CSV を作成する Perl スクリプト - ablog
と id:otsune さんにコメントを頂いたので、教えて頂いた書き方で書きなおしてみた。
#!/usr/bin/env perl
use strict;
use warnings;
use WWW::Mechanize;
use Web::Scraper;
use utf8;
use Encode;
my $mech = 'WWW::Mechanize'->new('autocheck', 1);
$mech->get('http://www.ablog.co.jp/login');
$mech->submit_form('fields', {'name', 'foo', 'password', 'bar'});
$mech->get('http://www.ablog.co.jp/contract');
$mech->submit_form();
my $scraper = scraper {
use utf8;
process '/html/body/div/div[3]/table/tbody/tr',
'list[]' => scraper {
process '//td[3]', contract => ['TEXT', sub {s/\s+//sg;} ];
process '//td[4]', status => ['TEXT', sub {s/\s+//sg;} ];
process '//td[5]', expired => ['TEXT', sub {s/\s+//sg;} ];
process '//td[6]', customer => ['TEXT', sub {s/\s+//sg;} ];
process '//td[7]', group => ['TEXT', sub {s/\s+//sg;} ];
process '//td[8]', permission => ['TEXT', sub {s/\s+//sg;} ];
process '//td[9]', notes => ['TEXT', sub {s/\s+//sg;} ];
}
};
my $res = $scraper->scrape($mech->content);
&write_csv("contract.csv", $res->{list});
sub write_csv {
my ($name, $data) = @_;
open my $csv, ">:encoding(utf8)", $name or die $!;
for my $code (@$data) {
printf $csv qq{"%s","%s","%s","%s","%s","%s"\n}, @{$code}{qw/contract status expired customer group permission/};
}
close $csv;
}