line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
use Moo; |
2
|
1
|
|
|
1
|
|
6
|
extends 'NewsExtractor::TXExtractor'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
5
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
use Mojo::Transaction::HTTP; |
5
|
1
|
|
|
1
|
|
250
|
use Types::Standard qw( InstanceOf HashRef ArrayRef ); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
6
|
1
|
|
|
1
|
|
26
|
use Mojo::JSON qw(from_json); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
7
|
1
|
|
|
1
|
|
617
|
use Importer 'NewsExtractor::TextUtil' => qw(u remove_control_characters); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
45
|
|
8
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
9
|
|
|
|
|
|
|
has tx => ( |
10
|
|
|
|
|
|
|
required => 1, is => 'ro', |
11
|
|
|
|
|
|
|
isa => InstanceOf['Mojo::Transaction::HTTP'] ); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
has schema_ld => ( |
14
|
|
|
|
|
|
|
required => 0, |
15
|
|
|
|
|
|
|
is => 'lazy', |
16
|
|
|
|
|
|
|
isa => HashRef, |
17
|
|
|
|
|
|
|
builder => 1, |
18
|
|
|
|
|
|
|
); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my ($self) = @_; |
21
|
|
|
|
|
|
|
my $el = $self->dom->at('script[type="application/ld+json"]') or return {}; |
22
|
0
|
|
|
0
|
|
|
my $x = from_json( $el->text ); |
23
|
0
|
0
|
|
|
|
|
if (HashRef->check($x)) { |
24
|
0
|
|
|
|
|
|
return $x; |
25
|
0
|
0
|
|
|
|
|
} |
26
|
0
|
|
|
|
|
|
if (ArrayRef->check($x)) { |
27
|
|
|
|
|
|
|
return $x->[0]; |
28
|
0
|
0
|
|
|
|
|
} |
29
|
0
|
|
|
|
|
|
return {}; |
30
|
|
|
|
|
|
|
} |
31
|
0
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my ($self) = @_; |
33
|
|
|
|
|
|
|
return remove_control_characters(u($self->schema_ld->{author}{name})); |
34
|
|
|
|
|
|
|
} |
35
|
0
|
|
|
0
|
0
|
|
|
36
|
0
|
|
|
|
|
|
my ($self) = @_; |
37
|
|
|
|
|
|
|
return remove_control_characters(u($self->schema_ld->{headline})); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
41
|
0
|
|
|
|
|
|
return remove_control_characters(u($self->schema_ld->{datePublished})); |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my ($self) = @_; |
45
|
0
|
|
|
0
|
0
|
|
my $text = $self->schema_ld->{articleBody} // $self->schema_ld->{description} // ''; |
46
|
0
|
|
|
|
|
|
return remove_control_characters(u($text)); |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
1; |