File Coverage

blib/lib/NewsExtractor/SiteSpecificExtractor/www_rvn_com_tw.pm
Criterion Covered Total %
statement 15 17 88.2
branch n/a
condition n/a
subroutine 5 6 83.3
pod n/a
total 20 23 86.9


line stmt bran cond sub pod time code
1             package NewsExtractor::SiteSpecificExtractor::www_rvn_com_tw;
2 1     1   9 use utf8;
  1         2  
  1         12  
3              
4 1     1   39 use Moo;
  1         3  
  1         7  
5             extends 'NewsExtractor::SiteSpecificExtractor';
6 1     1   424 use Types::Standard qw(InstanceOf);
  1         2  
  1         14  
7              
8             has extractor => (
9             required => 0,
10             is => 'lazy',
11             isa => InstanceOf["NewsExtractor::CSSExtractor"],
12             builder => 1,
13             handles => [qw( headline dateline journalist content_text )],
14             );
15              
16 1     1   839 use NewsExtractor::CSSRuleSet;
  1         3  
  1         35  
17 1     1   7 use NewsExtractor::CSSExtractor;
  1         2  
  1         349  
18              
19             sub _build_extractor {
20 0     0     my ($self) = @_;
21 0           return NewsExtractor::CSSExtractor->new(
22             css_selector => NewsExtractor::CSSRuleSet->new(
23             headline => 'td[height=30][align=CENTER] b font',
24             dateline => 'tr > td[align=left] > b > font[style="font-size:11pt;"]',
25             journalist => 'tr > td[align=left] > b > font[style="font-size:11pt;"]',
26             content_text => 'td[colspan=2] > p > span[style="font-size:16px"]',
27             ),
28             tx => $self->tx,
29             )
30             }
31              
32             around 'dateline' => sub {
33             my $orig = shift;
34             my $ret = $orig->(@_);
35             $ret =~ s/^(.+\S)\s+(記者:.+)$/$1/;
36             return $ret;
37             };
38              
39             around 'journalist' => sub {
40             my $orig = shift;
41             my $ret = $orig->(@_);
42             $ret =~ s/^(.+\S)\s+(記者:.+)$/$2/;
43             return $ret;
44             };
45              
46             1;