line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Feature::Engine::LDRFullFeed; |
2
|
1
|
|
|
1
|
|
595
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
33
|
|
3
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
4
|
1
|
|
|
1
|
|
1011
|
use HTML::TreeBuilder::XPath; |
|
1
|
|
|
|
|
121374
|
|
|
1
|
|
|
|
|
13
|
|
5
|
1
|
|
|
1
|
|
1087
|
use LWP::Simple; |
|
1
|
|
|
|
|
126077
|
|
|
1
|
|
|
|
|
11
|
|
6
|
1
|
|
|
1
|
|
662
|
use Storable qw(retrieve nstore); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
124
|
|
7
|
1
|
|
|
1
|
|
1140
|
use JSON; |
|
1
|
|
|
|
|
15200
|
|
|
1
|
|
|
|
|
5
|
|
8
|
1
|
|
|
1
|
|
158
|
use Encode; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
103
|
|
9
|
1
|
|
|
1
|
|
16
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
64
|
|
10
|
1
|
|
|
1
|
|
6
|
use base qw(HTML::Feature::Base); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
631
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors($_) for qw(_LDRFullFeed); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub run { |
15
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
16
|
0
|
|
|
|
|
|
my $html_ref = shift; |
17
|
0
|
|
|
|
|
|
my $url = shift; |
18
|
0
|
|
|
|
|
|
my $result = shift; |
19
|
0
|
|
|
|
|
|
my $tree = HTML::TreeBuilder::XPath->new; |
20
|
0
|
|
|
|
|
|
$tree->no_space_compacting(1); |
21
|
0
|
|
|
|
|
|
$tree->ignore_ignorable_whitespace(0); |
22
|
0
|
|
|
|
|
|
$tree->parse($$html_ref); |
23
|
0
|
|
|
|
|
|
$tree->eof; |
24
|
0
|
|
|
|
|
|
my $site_info = $self->_detect_siteinfo($url); |
25
|
|
|
|
|
|
|
|
26
|
0
|
0
|
|
|
|
|
if ($site_info) { |
27
|
0
|
|
|
|
|
|
my $xpath = $site_info->{data}->{xpath}; |
28
|
0
|
|
|
|
|
|
my $text; |
29
|
0
|
|
|
|
|
|
for my $node ( $tree->findnodes($xpath) ) { |
30
|
0
|
|
|
|
|
|
$text .= $node->as_text; |
31
|
|
|
|
|
|
|
} |
32
|
0
|
|
|
|
|
|
$result->text($text); |
33
|
0
|
0
|
|
|
|
|
if ( !$result->title ) { |
34
|
0
|
0
|
|
|
|
|
if ( my $title = $tree->look_down( _tag => "title" ) ) { |
35
|
0
|
|
|
|
|
|
$result->title( $title->as_text ); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
} |
38
|
0
|
0
|
|
|
|
|
if ( !$result->desc ) { |
39
|
0
|
0
|
|
|
|
|
if ( my $desc = |
40
|
|
|
|
|
|
|
$tree->look_down( _tag => 'meta', name => 'description' ) ) |
41
|
|
|
|
|
|
|
{ |
42
|
0
|
|
|
|
|
|
$result->desc( $desc->attr("content") ); |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
0
|
0
|
|
|
|
|
if ( $result->text ) { |
48
|
0
|
|
|
|
|
|
$result->{matched_engine} = 'LDRFullFeed'; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
0
|
|
|
|
|
|
$tree->delete; |
52
|
0
|
|
|
|
|
|
return $result; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub LDRFullFeed { |
56
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
57
|
0
|
|
|
|
|
|
my $c = $self->context; |
58
|
|
|
|
|
|
|
$self->_LDRFullFeed || sub { |
59
|
0
|
|
|
0
|
|
|
my $data; |
60
|
0
|
|
|
|
|
|
my $path = $INC{'HTML/Feature/Engine/LDRFullFeed.pm'}; |
61
|
0
|
|
|
|
|
|
$path =~ s/.pm//; |
62
|
0
|
|
|
|
|
|
$path .= '/item.st'; |
63
|
0
|
0
|
|
|
|
|
if ( $c->config->{LDRFullFeed}->{data_file_path} ) { |
64
|
0
|
|
|
|
|
|
my $path = $c->config->{LDRFullFeed}->{data_file_path}; |
65
|
0
|
0
|
|
|
|
|
if ( -e $path ) { |
66
|
0
|
|
|
|
|
|
$data = retrieve($path); |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
else { |
69
|
0
|
|
|
|
|
|
my $json = |
70
|
|
|
|
|
|
|
get('http://wedata.net/databases/LDRFullFeed/items.json'); |
71
|
0
|
|
|
|
|
|
my $data = from_json($json); |
72
|
0
|
|
|
|
|
|
nstore( $data, $path ); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
else { |
76
|
0
|
|
|
|
|
|
$data = retrieve($path); |
77
|
|
|
|
|
|
|
} |
78
|
0
|
|
|
|
|
|
my %priority = ( |
79
|
|
|
|
|
|
|
SBM => 1000, |
80
|
|
|
|
|
|
|
INDIVIDUAL => 100, |
81
|
|
|
|
|
|
|
IND => 100, |
82
|
|
|
|
|
|
|
SUBGENERAL => 10, |
83
|
|
|
|
|
|
|
SUB => 10, |
84
|
|
|
|
|
|
|
GENERAL => 1, |
85
|
|
|
|
|
|
|
GEN => 1 |
86
|
|
|
|
|
|
|
); |
87
|
0
|
|
|
|
|
|
my @sorted = sort { $a->{data}->{priority} <=> $b->{data}->{priority} } |
88
|
|
|
|
|
|
|
map { |
89
|
0
|
|
|
|
|
|
$_->{data}->{priority} ||= sub { |
90
|
0
|
|
|
|
|
|
my $type = $_->{data}->{type}; |
91
|
0
|
0
|
|
|
|
|
if ( $priority{$type} ) { |
92
|
0
|
|
|
|
|
|
$_->{data}->{type} = $priority{$type}; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
else { |
95
|
0
|
|
|
|
|
|
$_->{data}->{type} = 0; |
96
|
|
|
|
|
|
|
} |
97
|
0
|
|
|
|
|
|
return $_; |
98
|
|
|
|
|
|
|
} |
99
|
0
|
|
0
|
|
|
|
->(); |
100
|
|
|
|
|
|
|
} @$data; |
101
|
0
|
|
|
|
|
|
$self->_LDRFullFeed( \@sorted ); |
102
|
|
|
|
|
|
|
} |
103
|
0
|
0
|
|
|
|
|
->(); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub _detect_siteinfo { |
107
|
0
|
|
|
0
|
|
|
my $self = shift; |
108
|
0
|
|
|
|
|
|
my $url = shift; |
109
|
0
|
0
|
|
|
|
|
unless($url){ |
110
|
0
|
|
|
|
|
|
carp("WARNING: if you use 'HTML::Feature::Engine::LDRFullFeed', URL will be necessary (as second arguments)"); |
111
|
0
|
|
|
|
|
|
return; |
112
|
|
|
|
|
|
|
} |
113
|
0
|
|
|
|
|
|
my $data = $self->LDRFullFeed; |
114
|
0
|
|
|
|
|
|
for my $item (@$data) { |
115
|
0
|
0
|
0
|
|
|
|
if ( ( $item->{data}->{url} ) && ( $url =~ /$item->{data}->{url}/ ) ) { |
116
|
0
|
|
|
|
|
|
return $item; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
} |
119
|
0
|
|
|
|
|
|
return; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
1; |
123
|
|
|
|
|
|
|
__END__ |