| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Feed::Find; |
|
2
|
3
|
|
|
3
|
|
232679
|
use strict; |
|
|
3
|
|
|
|
|
28
|
|
|
|
3
|
|
|
|
|
93
|
|
|
3
|
3
|
|
|
3
|
|
14
|
use warnings; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
76
|
|
|
4
|
3
|
|
|
3
|
|
72
|
use 5.008_001; |
|
|
3
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
19
|
use base qw( Class::ErrorHandler ); |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
1543
|
|
|
7
|
3
|
|
|
3
|
|
2247
|
use LWP::UserAgent; |
|
|
3
|
|
|
|
|
96226
|
|
|
|
3
|
|
|
|
|
108
|
|
|
8
|
3
|
|
|
3
|
|
2512
|
use HTML::Parser; |
|
|
3
|
|
|
|
|
17902
|
|
|
|
3
|
|
|
|
|
132
|
|
|
9
|
3
|
|
|
3
|
|
26
|
use URI; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
73
|
|
|
10
|
3
|
|
|
3
|
|
13
|
use Carp; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
203
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
3
|
|
|
3
|
|
19
|
use vars qw( $VERSION $ua ); |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
193
|
|
|
13
|
|
|
|
|
|
|
$VERSION = '0.12'; |
|
14
|
|
|
|
|
|
|
|
|
15
|
3
|
|
|
|
|
2662
|
use constant FEED_MIME_TYPES => [ |
|
16
|
|
|
|
|
|
|
'application/x.atom+xml', |
|
17
|
|
|
|
|
|
|
'application/atom+xml', |
|
18
|
|
|
|
|
|
|
'application/xml', |
|
19
|
|
|
|
|
|
|
'text/xml', |
|
20
|
|
|
|
|
|
|
'application/rss+xml', |
|
21
|
|
|
|
|
|
|
'application/rdf+xml', |
|
22
|
3
|
|
|
3
|
|
18
|
]; |
|
|
3
|
|
|
|
|
5
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $FEED_EXT = qr/\.(?:rss|xml|rdf|atom)$/; |
|
25
|
|
|
|
|
|
|
my %IsFeed = map { $_ => 1 } @{ FEED_MIME_TYPES() }; |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
sub find { |
|
28
|
4
|
|
|
4
|
1
|
6861
|
my $class = shift; |
|
29
|
4
|
|
|
|
|
24
|
my($uri) = @_; |
|
30
|
4
|
100
|
|
|
|
24
|
$ua = LWP::UserAgent->new unless defined $ua; |
|
31
|
4
|
|
|
|
|
3295
|
$ua->env_proxy; |
|
32
|
4
|
|
|
|
|
39063
|
$ua->agent(join '/', $class, $class->VERSION); |
|
33
|
4
|
|
|
|
|
331
|
$ua->parse_head(0); ## We're already basically doing this ourselves. |
|
34
|
4
|
|
|
|
|
327
|
my $req = HTTP::Request->new(GET => $uri); |
|
35
|
4
|
|
|
|
|
14987
|
my $p = HTML::Parser->new(api_version => 3, |
|
36
|
|
|
|
|
|
|
start_h => [ \&_find_links, 'self,tagname,attr' ]); |
|
37
|
4
|
|
|
|
|
230
|
$p->{base_uri} = $uri; |
|
38
|
4
|
|
|
|
|
13
|
$p->{feeds} = []; |
|
39
|
|
|
|
|
|
|
my $res = $ua->request($req, sub { |
|
40
|
7
|
|
|
7
|
|
30205
|
my($chunk, $res, $proto) = @_; |
|
41
|
7
|
50
|
|
|
|
46
|
if ($IsFeed{$res->content_type}) { |
|
42
|
0
|
|
|
|
|
0
|
push @{ $p->{feeds} }, $uri; |
|
|
0
|
|
|
|
|
0
|
|
|
43
|
0
|
|
|
|
|
0
|
croak 'Done parsing'; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
7
|
100
|
|
|
|
538
|
$p->parse($chunk) or croak 'Done parsing'; |
|
46
|
4
|
|
|
|
|
46
|
}); |
|
47
|
4
|
50
|
|
|
|
1720
|
return $class->error($res->status_line) unless $res->is_success; |
|
48
|
4
|
|
|
|
|
42
|
return @{ $p->{feeds} }; |
|
|
4
|
|
|
|
|
85
|
|
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub find_in_html { |
|
52
|
2
|
|
|
2
|
1
|
2499
|
my $class = shift; |
|
53
|
2
|
|
|
|
|
5
|
my($html, $base_uri) = @_; |
|
54
|
2
|
|
|
|
|
11
|
my $p = HTML::Parser->new(api_version => 3, |
|
55
|
|
|
|
|
|
|
start_h => [ \&_find_links, 'self,tagname,attr' ]); |
|
56
|
2
|
|
|
|
|
82
|
$p->{base_uri} = $base_uri; |
|
57
|
2
|
|
|
|
|
5
|
$p->{feeds} = []; |
|
58
|
2
|
|
|
|
|
21
|
$p->parse($$html); |
|
59
|
2
|
|
|
|
|
4
|
return @{ $p->{feeds} }; |
|
|
2
|
|
|
|
|
13
|
|
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _find_links { |
|
63
|
294
|
|
|
294
|
|
533
|
my($p, $tag, $attr) = @_; |
|
64
|
|
|
|
|
|
|
|
|
65
|
294
|
|
|
|
|
441
|
my %head_tag = map { $_ => 1 } |
|
|
2058
|
|
|
|
|
3375
|
|
|
66
|
|
|
|
|
|
|
qw[ meta isindex title script style head html ]; |
|
67
|
|
|
|
|
|
|
|
|
68
|
294
|
|
|
|
|
517
|
my $base_uri = $p->{base_uri}; |
|
69
|
294
|
100
|
|
|
|
695
|
if ($tag eq 'link') { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
70
|
114
|
50
|
|
|
|
208
|
return unless $attr->{rel}; |
|
71
|
114
|
|
|
|
|
279
|
my %rel = map { $_ => 1 } split /\s+/, lc($attr->{rel}); |
|
|
114
|
|
|
|
|
277
|
|
|
72
|
114
|
|
|
|
|
178
|
my $type = ''; |
|
73
|
114
|
100
|
|
|
|
200
|
if ($attr->{type}) { |
|
74
|
51
|
|
|
|
|
167
|
($type = lc $attr->{type}) =~ s/^\s*//; |
|
75
|
51
|
|
|
|
|
207
|
$type =~ s/\s*$//; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
3
|
|
|
|
|
26
|
push @{ $p->{feeds} }, URI->new_abs($attr->{href}, $base_uri)->as_string |
|
78
|
|
|
|
|
|
|
if $IsFeed{$type} && |
|
79
|
114
|
50
|
33
|
|
|
282
|
($rel{alternate} || $rel{'service.feed'}); |
|
|
|
|
66
|
|
|
|
|
|
80
|
|
|
|
|
|
|
} elsif ($tag eq 'base') { |
|
81
|
0
|
0
|
|
|
|
0
|
$p->{base_uri} = $attr->{href} if $attr->{href}; |
|
82
|
|
|
|
|
|
|
} elsif ($head_tag{$tag}) { |
|
83
|
|
|
|
|
|
|
## Ignore other valid tags inside of . |
|
84
|
|
|
|
|
|
|
} elsif ($tag eq 'a') { |
|
85
|
12
|
50
|
|
|
|
28
|
my $href = $attr->{href} or return; |
|
86
|
12
|
|
|
|
|
38
|
my $uri = URI->new($href); |
|
87
|
12
|
100
|
|
|
|
3522
|
push @{ $p->{feeds} }, URI->new_abs($href, $base_uri)->as_string |
|
|
3
|
|
|
|
|
50
|
|
|
88
|
|
|
|
|
|
|
if $uri->path =~ /$FEED_EXT/io; |
|
89
|
|
|
|
|
|
|
} else { |
|
90
|
|
|
|
|
|
|
## Anything else indicates the start of the , |
|
91
|
|
|
|
|
|
|
## so we stop parsing. |
|
92
|
45
|
100
|
|
|
|
50
|
$p->eof if @{ $p->{feeds} }; |
|
|
45
|
|
|
|
|
121
|
|
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
294
|
|
|
|
|
7622
|
return; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
1; |
|
99
|
|
|
|
|
|
|
__END__ |