File Coverage

blib/lib/NewsExtractor/JSONLDExtractor.pm
Criterion Covered Total %
statement 15 32 46.8
branch 0 6 0.0
condition 0 5 0.0
subroutine 5 10 50.0
pod 0 4 0.0
total 20 57 35.0


line stmt bran cond sub pod time code
1             package NewsExtractor::JSONLDExtractor;
2 1     1   9 use Moo;
  1         3  
  1         8  
3             extends 'NewsExtractor::TXExtractor';
4              
5 1     1   399 use Mojo::Transaction::HTTP;
  1         3  
  1         14  
6 1     1   38 use Types::Standard qw( InstanceOf HashRef ArrayRef );
  1         3  
  1         20  
7 1     1   882 use Mojo::JSON qw(from_json);
  1         3  
  1         90  
8 1     1   9 use Importer 'NewsExtractor::TextUtil' => qw(u remove_control_characters);
  1         3  
  1         10  
9              
10             has tx => (
11             required => 1, is => 'ro',
12             isa => InstanceOf['Mojo::Transaction::HTTP'] );
13              
14             has schema_ld => (
15             required => 0,
16             is => 'lazy',
17             isa => HashRef,
18             builder => 1,
19             );
20              
21             sub _build_schema_ld {
22 0     0     my ($self) = @_;
23 0 0         my $el = $self->dom->at('script[type="application/ld+json"]') or return {};
24 0           my $x = from_json( $el->text );
25 0 0         if (HashRef->check($x)) {
26 0           return $x;
27             }
28 0 0         if (ArrayRef->check($x)) {
29 0           return $x->[0];
30             }
31 0           return {};
32             }
33              
34             sub journalist {
35 0     0 0   my ($self) = @_;
36 0           return remove_control_characters(u($self->schema_ld->{author}{name}));
37             }
38              
39             sub headline {
40 0     0 0   my ($self) = @_;
41 0           return remove_control_characters(u($self->schema_ld->{headline}));
42             }
43              
44             sub dateline {
45 0     0 0   my ($self) = @_;
46 0           return remove_control_characters(u($self->schema_ld->{datePublished}));
47             }
48              
49             sub content_text {
50 0     0 0   my ($self) = @_;
51 0   0       my $text = $self->schema_ld->{articleBody} // $self->schema_ld->{description} // '';
      0        
52 0           return remove_control_characters(u($text));
53             }
54              
55             1;