| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package NewsExtractor::JSONLDExtractor; | 
| 2 | 1 |  |  | 1 |  | 9 | use Moo; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 9 |  | 
| 3 |  |  |  |  |  |  | extends 'NewsExtractor::TXExtractor'; | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 1 |  |  | 1 |  | 502 | use Mojo::Transaction::HTTP; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 17 |  | 
| 6 | 1 |  |  | 1 |  | 55 | use Types::Standard qw( InstanceOf HashRef ArrayRef ); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 14 |  | 
| 7 | 1 |  |  | 1 |  | 848 | use Mojo::JSON qw(from_json); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 96 |  | 
| 8 | 1 |  |  | 1 |  | 9 | use Importer 'NewsExtractor::TextUtil' => qw(u remove_control_characters); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 11 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | has tx => ( | 
| 11 |  |  |  |  |  |  | required => 1, is => 'ro', | 
| 12 |  |  |  |  |  |  | isa => InstanceOf['Mojo::Transaction::HTTP'] ); | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | has schema_ld => ( | 
| 15 |  |  |  |  |  |  | required => 0, | 
| 16 |  |  |  |  |  |  | is => 'lazy', | 
| 17 |  |  |  |  |  |  | isa => HashRef, | 
| 18 |  |  |  |  |  |  | builder => 1, | 
| 19 |  |  |  |  |  |  | ); | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | sub _build_schema_ld { | 
| 22 | 0 |  |  | 0 |  |  | my ($self) = @_; | 
| 23 | 0 | 0 |  |  |  |  | my $el = $self->dom->at('script[type="application/ld+json"]') or return {}; | 
| 24 | 0 |  |  |  |  |  | my $x = from_json( $el->text ); | 
| 25 | 0 | 0 |  |  |  |  | if (HashRef->check($x)) { | 
| 26 | 0 |  |  |  |  |  | return $x; | 
| 27 |  |  |  |  |  |  | } | 
| 28 | 0 | 0 |  |  |  |  | if (ArrayRef->check($x)) { | 
| 29 | 0 |  |  |  |  |  | return $x->[0]; | 
| 30 |  |  |  |  |  |  | } | 
| 31 | 0 |  |  |  |  |  | return {}; | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | sub journalist { | 
| 35 | 0 |  |  | 0 | 0 |  | my ($self) = @_; | 
| 36 | 0 |  |  |  |  |  | return remove_control_characters(u($self->schema_ld->{author}{name})); | 
| 37 |  |  |  |  |  |  | } | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | sub headline { | 
| 40 | 0 |  |  | 0 | 0 |  | my ($self) = @_; | 
| 41 | 0 |  |  |  |  |  | return remove_control_characters(u($self->schema_ld->{headline})); | 
| 42 |  |  |  |  |  |  | } | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | sub dateline { | 
| 45 | 0 |  |  | 0 | 0 |  | my ($self) = @_; | 
| 46 | 0 |  |  |  |  |  | return remove_control_characters(u($self->schema_ld->{datePublished})); | 
| 47 |  |  |  |  |  |  | } | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | sub content_text { | 
| 50 | 0 |  |  | 0 | 0 |  | my ($self) = @_; | 
| 51 | 0 |  | 0 |  |  |  | my $text = $self->schema_ld->{articleBody} // $self->schema_ld->{description} // ''; | 
|  |  |  | 0 |  |  |  |  | 
| 52 | 0 |  |  |  |  |  | return remove_control_characters(u($text)); | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | 1; |