line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Feature::Engine::GoogleADSection; |
2
|
1
|
|
|
1
|
|
515
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
30
|
|
3
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
21
|
|
4
|
1
|
|
|
1
|
|
1498
|
use HTML::TreeBuilder::LibXML; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
use base qw(HTML::Feature::Base); |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
sub run { |
8
|
|
|
|
|
|
|
my $self = shift; |
9
|
|
|
|
|
|
|
my $html_ref = shift; |
10
|
|
|
|
|
|
|
my $url = shift; |
11
|
|
|
|
|
|
|
my $result = shift; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder::LibXML->new; |
14
|
|
|
|
|
|
|
$tree->parse($$html_ref); |
15
|
|
|
|
|
|
|
$tree->eof; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
if ( !$result->title ) { |
18
|
|
|
|
|
|
|
if ( my $title = $tree->findvalue('//title') ) { |
19
|
|
|
|
|
|
|
$result->title($title); |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
} |
22
|
|
|
|
|
|
|
if ( !$result->desc ) { |
23
|
|
|
|
|
|
|
if ( my $desc = |
24
|
|
|
|
|
|
|
$tree->look_down( _tag => 'meta', name => 'description' ) ) |
25
|
|
|
|
|
|
|
{ |
26
|
|
|
|
|
|
|
my $string = $desc->attr('content'); |
27
|
|
|
|
|
|
|
$string =~ s{ }{}xms; |
28
|
|
|
|
|
|
|
$result->desc($string); |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
} |
31
|
|
|
|
|
|
|
my $regexp = |
32
|
|
|
|
|
|
|
'(.+)'; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
if ( $$html_ref =~ m |$regexp|os ) { |
35
|
|
|
|
|
|
|
my $html = $1; |
36
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder::LibXML->new; |
37
|
|
|
|
|
|
|
$tree->parse($html); |
38
|
|
|
|
|
|
|
$tree->eof; |
39
|
|
|
|
|
|
|
my $text = $tree->as_text; |
40
|
|
|
|
|
|
|
$result->text($text); |
41
|
|
|
|
|
|
|
$result->{matched_engine} = 'GoogleADSection'; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
$tree->delete; |
44
|
|
|
|
|
|
|
return $result; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
1; |
47
|
|
|
|
|
|
|
__END__ |