File Coverage

blib/lib/NewsExtractor/SiteSpecificExtractor/UDN.pm
Criterion Covered Total %
statement 9 33 27.2
branch 0 10 0.0
condition n/a
subroutine 3 8 37.5
pod 0 4 0.0
total 12 55 21.8


line stmt bran cond sub pod time code
1             package NewsExtractor::SiteSpecificExtractor::UDN;
2 1     1   8 use utf8;
  1         3  
  1         9  
3 1     1   35 use Moo;
  1         2  
  1         8  
4             extends 'NewsExtractor::SiteSpecificExtractor';
5              
6 1     1   383 use Importer 'NewsExtractor::TextUtil' => qw( html2text normalize_whitespace reformat_dateline );
  1         2  
  1         9  
7              
8             sub headline {
9 0     0 0   my ($self) = @_;
10 0 0         my $el = $self->dom->at('#story_art_title, h1.story_art_title, h1.article-content__title') or return;
11 0           my $txt = $el->all_text;
12 0           return normalize_whitespace($txt);
13             }
14              
15             sub dateline {
16 0     0 0   my ($self) = @_;
17 0           my $el;
18 0 0         if ($el = $self->dom->at(".shareBar__info--author > span:nth-child(1), .authors time.article-content__time")) {
19 0           my $txt = $el->all_text;
20 0           return normalize_whitespace($txt);
21             }
22              
23             # opinion.udn.com
24 0 0         if ($el = $self->dom->at('.story_bady_info > time[datetime]')) {
25 0           return reformat_dateline($el->all_text, '+08:00');
26             }
27             }
28              
29             sub journalist {
30 0     0 0   my ($self) = @_;
31 0           my $el;
32 0 0         if ($el = $self->dom->at(".shareBar__info--author, .authors span.article-content__author")) {
33 0           my $txt = $el->all_text;
34 0           $txt =~ s/\s+/ /g; # Sometimes there are newlines
35 0           return normalize_whitespace($txt);
36             }
37              
38             # opinion.udn.com
39 0 0         if ($el = $self->dom->at('.story_bady_info')) {
40 0     0     return $el->find('a.author')->map(sub { normalize_whitespace( $_->text ) })->join(', ') . "";
  0            
41             }
42             }
43              
44             sub content_text {
45 0     0 0   my ($self) = @_;
46 0           my $el = $self->dom->at("section.article-content__editor");
47 0           $el->find("script, style")->map("remove");
48 0           return html2text("$el");
49             }
50              
51             1;