line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package NewsExtractor::Download; |
2
|
1
|
|
|
1
|
|
20
|
use v5.18; |
|
1
|
|
|
|
|
5
|
|
3
|
1
|
|
|
1
|
|
6
|
use Moo; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
20
|
|
4
|
1
|
|
|
1
|
|
462
|
use Types::Standard qw< InstanceOf >; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
23
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
has tx => ( required => 1, is => 'ro', isa => InstanceOf['Mojo::Transaction::HTTP']); |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
1475
|
use NewsExtractor::Article; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
53
|
|
9
|
1
|
|
|
1
|
|
656
|
use NewsExtractor::Extractor; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
50
|
|
10
|
1
|
|
|
1
|
|
11
|
use NewsExtractor::Error; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
30
|
|
11
|
1
|
|
|
1
|
|
6
|
use Importer 'NewsExtractor::TextUtil' => qw(u is_empty); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
10
|
|
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
38
|
use Try::Tiny; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
429
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub parse { |
16
|
0
|
|
|
0
|
0
|
|
my $self = $_[0]; |
17
|
0
|
|
|
|
|
|
my ($err, $o, %article); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
try { |
20
|
0
|
|
|
0
|
|
|
my $x = NewsExtractor::Extractor->new( tx => $self->tx ); |
21
|
0
|
|
|
|
|
|
$article{headline} = $x->headline; |
22
|
0
|
|
|
|
|
|
$article{article_body} = $x->content_text; |
23
|
|
|
|
|
|
|
|
24
|
0
|
|
|
|
|
|
for my $it (qw(dateline journalist)) { |
25
|
0
|
|
|
|
|
|
my $v = $x->$it; |
26
|
0
|
0
|
|
|
|
|
if (defined($v)) { |
27
|
0
|
|
|
|
|
|
$article{$it} = $v; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
0
|
|
|
|
|
|
for my $it (qw(headline article_body)) { |
32
|
0
|
0
|
|
|
|
|
if (is_empty($article{$it})) { |
33
|
0
|
|
|
|
|
|
$err = NewsExtractor::Error->new( |
34
|
|
|
|
|
|
|
message => u("Failed to extract: $it") |
35
|
|
|
|
|
|
|
); |
36
|
0
|
|
|
|
|
|
last; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
0
|
|
|
|
|
|
$o = NewsExtractor::Article->new(%article); |
41
|
|
|
|
|
|
|
} catch { |
42
|
0
|
|
|
0
|
|
|
my $e = $_; |
43
|
|
|
|
|
|
|
|
44
|
0
|
0
|
0
|
|
|
|
if (ref($e) && $e->isa('Error::TypeTiny::Assertion')) { |
45
|
0
|
|
|
|
|
|
$e = $e->message; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
0
|
|
|
|
|
|
$err = NewsExtractor::Error->new( |
49
|
|
|
|
|
|
|
message => u($e), |
50
|
|
|
|
|
|
|
debug => { articleArgs => \%article }, |
51
|
|
|
|
|
|
|
); |
52
|
0
|
|
|
|
|
|
}; |
53
|
|
|
|
|
|
|
|
54
|
0
|
|
|
|
|
|
return ($err, $o); |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
1; |