line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package NewsExtractor::SiteSpecificExtractor::www_bbc_com; |
2
|
1
|
|
|
1
|
|
8
|
use utf8; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
10
|
|
3
|
1
|
|
|
1
|
|
39
|
use Moo; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
9
|
|
4
|
|
|
|
|
|
|
extends 'NewsExtractor::GenericExtractor'; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
473
|
use Importer 'NewsExtractor::TextUtil' => 'normalize_whitespace', 'u'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
10
|
|
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
40
|
use POSIX qw(strftime); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
12
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
sub journalist { |
11
|
0
|
|
|
0
|
0
|
|
my $self = $_[0]; |
12
|
0
|
|
|
|
|
|
my $ret; |
13
|
0
|
0
|
|
|
|
|
if (my $el = $self->dom->at('div.story-body > div.byline > span.byline__name')) { |
14
|
0
|
|
|
|
|
|
$ret = $el->text; |
15
|
|
|
|
|
|
|
} |
16
|
0
|
|
|
|
|
|
return $ret; |
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub dateline { |
20
|
0
|
|
|
0
|
0
|
|
my $self = $_[0]; |
21
|
0
|
|
|
|
|
|
my $dateline; |
22
|
0
|
0
|
|
|
|
|
if (my $el = $self->dom->at('div.date[data-seconds]')) { |
23
|
0
|
|
|
|
|
|
my $epoch = $el->attr("data-seconds"); |
24
|
0
|
|
|
|
|
|
$dateline = u(strftime(q(%Y-%m-%dT%H:%M:%S+08:00), gmtime($epoch))); |
25
|
|
|
|
|
|
|
} |
26
|
0
|
|
|
|
|
|
return $dateline; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
1; |