| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::MobileCarrierJP::Declare; | 
| 2 | 1 |  |  | 1 |  | 3 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 3 | 1 |  |  | 1 |  | 3 | use warnings; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 21 |  | 
| 4 | 1 |  |  | 1 |  | 2 | use utf8; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 5 | 1 |  |  | 1 |  | 18 | use base qw/Exporter/; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 72 |  | 
| 6 | 1 |  |  | 1 |  | 4 | use Web::Scraper; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 7 | 1 |  |  | 1 |  | 47 | use URI; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 8 | 1 |  |  | 1 |  | 569 | use LWP::UserAgent 5.827; | 
|  | 1 |  |  |  |  | 25506 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 9 | 1 |  |  | 1 |  | 26 | use Carp (); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 15 |  | 
| 10 | 1 |  |  | 1 |  | 480 | use Encode qw/decode/; | 
|  | 1 |  |  |  |  | 6417 |  | 
|  | 1 |  |  |  |  | 274 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our @EXPORT = qw(parse_one scraper process col as_tree result p debug get); | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | sub p { | 
| 15 | 0 |  |  | 0 | 0 | 0 | require Data::Dumper; | 
| 16 | 0 |  |  |  |  | 0 | print STDERR Data::Dumper::Dumper(@_); | 
| 17 |  |  |  |  |  |  | } | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | sub debug { | 
| 20 | 0 | 0 |  | 0 | 0 | 0 | print "$_[0]\n" if $ENV{WMCJP_DEBUG}; | 
| 21 |  |  |  |  |  |  | } | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | sub get { | 
| 24 | 0 |  |  | 0 | 0 | 0 | my $url = shift; | 
| 25 | 0 |  |  |  |  | 0 | my $ua = LWP::UserAgent->new(agent => __PACKAGE__); | 
| 26 | 0 |  |  |  |  | 0 | my $res = $ua->get($url); | 
| 27 | 0 | 0 |  |  |  | 0 | if ($res->is_success) { | 
| 28 | 0 |  |  |  |  | 0 | return decode($res->content_charset, $res->content); | 
| 29 |  |  |  |  |  |  | } else { | 
| 30 | 0 |  |  |  |  | 0 | Carp::croak($res->status_line); | 
| 31 |  |  |  |  |  |  | } | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | sub import { | 
| 35 | 6 |  |  | 6 |  | 7 | my $class = shift; | 
| 36 |  |  |  |  |  |  |  | 
| 37 | 6 |  |  |  |  | 20 | strict->import; | 
| 38 | 6 |  |  |  |  | 39 | warnings->import; | 
| 39 | 6 |  |  |  |  | 15 | utf8->import; | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 6 |  |  |  |  | 1093 | $class->export_to_level(1); | 
| 42 |  |  |  |  |  |  | } | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | sub col { | 
| 45 | 0 |  |  | 0 | 0 | 0 | my ($n, @args) = @_; | 
| 46 | 0 |  |  |  |  | 0 | process "td:nth-child($n)", @args; | 
| 47 |  |  |  |  |  |  | } | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | sub parse_one { | 
| 50 | 5 |  |  | 5 | 0 | 39 | my %args = @_; | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 5 |  |  |  |  | 6 | my $pkg = caller(0); | 
| 53 | 1 |  |  | 1 |  | 5 | no strict 'refs'; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 248 |  | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 5 | 50 |  |  |  | 59 | unless ($pkg->can('url')) { | 
| 56 | 5 |  |  | 0 |  | 12 | *{"$pkg\::url"} = sub { $args{urls} }; | 
|  | 5 |  |  |  |  | 14 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 5 |  |  |  |  | 21 | *{"$pkg\::scrape"} = sub { | 
| 60 | 0 |  |  | 0 |  |  | my @res = (); | 
| 61 | 0 | 0 |  |  |  |  | my $urls = $args{urls} or die "missing urls"; | 
| 62 | 0 |  |  |  |  |  | for my $url ( @$urls ) { | 
| 63 | 0 |  |  |  |  |  | my $content = get($url); | 
| 64 | 0 | 0 |  |  |  |  | if ($args{content_filter}) { | 
| 65 | 0 |  |  |  |  |  | $content = $args{content_filter}->($content); | 
| 66 |  |  |  |  |  |  | } | 
| 67 |  |  |  |  |  |  | my $result = scraper { | 
| 68 | 0 |  |  | 0 |  |  | process $args{xpath}, 'rows[]', $args{scraper}; | 
| 69 | 0 |  |  |  |  |  | }->scrape( $content )->{rows}; | 
| 70 | 0 |  |  |  |  |  | my @result = grep { $_ } @$result; | 
|  | 0 |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  |  | 
| 72 | 0 |  |  |  |  |  | push @res, @result; | 
| 73 |  |  |  |  |  |  | } | 
| 74 | 0 |  |  |  |  |  | return \@res; | 
| 75 | 5 |  |  |  |  | 16 | }; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | sub as_tree { | 
| 79 | 0 |  |  | 0 | 0 |  | my $old = shift; | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 0 |  |  |  |  |  | my $tree = HTML::TreeBuilder::XPath->new; | 
| 82 | 0 |  |  |  |  |  | $tree->parse($old->as_HTML); | 
| 83 | 0 |  |  |  |  |  | $tree; | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | 1; |