line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package RSSycklr; |
2
|
2
|
|
|
2
|
|
56357
|
use Mouse; |
|
2
|
|
|
|
|
103933
|
|
|
2
|
|
|
|
|
13
|
|
3
|
2
|
|
|
2
|
|
712
|
no warnings "uninitialized"; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
92
|
|
4
|
2
|
|
|
2
|
|
10
|
use Carp ( qw/ carp confess croak / ); |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
181
|
|
5
|
2
|
|
|
2
|
|
2187
|
use YAML (); |
|
2
|
|
|
|
|
26829
|
|
|
2
|
|
|
|
|
52
|
|
6
|
2
|
|
|
2
|
|
2640
|
use XML::Feed (); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use HTML::Truncate (); |
8
|
|
|
|
|
|
|
use HTML::TokeParser::Simple (); |
9
|
|
|
|
|
|
|
use XML::LibXML (); |
10
|
|
|
|
|
|
|
use DateTime (); |
11
|
|
|
|
|
|
|
use Scalar::Util "blessed"; |
12
|
|
|
|
|
|
|
use URI (); |
13
|
|
|
|
|
|
|
use File::ShareDir (); |
14
|
|
|
|
|
|
|
use Hash::Merge::Simple "merge"; |
15
|
|
|
|
|
|
|
use File::Spec; |
16
|
|
|
|
|
|
|
use Encode; |
17
|
|
|
|
|
|
|
use Capture::Tiny "capture"; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = "0.15"; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has "keep_tags" => |
22
|
|
|
|
|
|
|
is => "rw", |
23
|
|
|
|
|
|
|
isa => "HashRef", |
24
|
|
|
|
|
|
|
default => sub { |
25
|
|
|
|
|
|
|
return { map {; $_ => 1 } qw( del ins i u b em |
26
|
|
|
|
|
|
|
strong abbr br img dfn |
27
|
|
|
|
|
|
|
acronym q sub sup cite |
28
|
|
|
|
|
|
|
code kbd samp strong var |
29
|
|
|
|
|
|
|
strike s tt a ) |
30
|
|
|
|
|
|
|
}; |
31
|
|
|
|
|
|
|
}, |
32
|
|
|
|
|
|
|
; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
has "debug" => |
35
|
|
|
|
|
|
|
is => "rw", |
36
|
|
|
|
|
|
|
isa => "Bool", |
37
|
|
|
|
|
|
|
default => sub { 0 }, |
38
|
|
|
|
|
|
|
; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
has "tt2" => |
41
|
|
|
|
|
|
|
is => "ro", |
42
|
|
|
|
|
|
|
lazy => 1, # not always used |
43
|
|
|
|
|
|
|
isa => "Template", |
44
|
|
|
|
|
|
|
default => sub { |
45
|
|
|
|
|
|
|
require Template; |
46
|
|
|
|
|
|
|
Template->new({ |
47
|
|
|
|
|
|
|
ENCODING => 'UTF-8', |
48
|
|
|
|
|
|
|
DEFAULT_ENCODING => 'UTF-8', |
49
|
|
|
|
|
|
|
}); |
50
|
|
|
|
|
|
|
}, |
51
|
|
|
|
|
|
|
handles => [qw( process )], |
52
|
|
|
|
|
|
|
; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# No type so it can take any Template takes |
55
|
|
|
|
|
|
|
has "template" => |
56
|
|
|
|
|
|
|
is => "rw", |
57
|
|
|
|
|
|
|
lazy => 1, # not always used |
58
|
|
|
|
|
|
|
default => sub { \<<"TT_TEMPLATE"; |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
[%-FOR feed IN rssycklr.feeds() %] |
61
|
|
|
|
|
|
|
[%-NEXT UNLESS feed.count %] |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
<[% feed_title_tag || "h4" %]> |
64
|
|
|
|
|
|
|
[%-FILTER html; feed.title_override || feed.title; END %] |
65
|
|
|
|
|
|
|
[% feed_title_tag || "h4" %]> |
66
|
|
|
|
|
|
|
[%~IF feed.entries.0.lede %] |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
[%-FOR entry IN feed.entries %] |
69
|
|
|
|
|
|
|
[%-entry.title | html %] |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
[% entry.lede %] |
72
|
|
|
|
|
|
|
[% modified = entry.modified ? entry.modified : entry.feed.modified %] |
73
|
|
|
|
|
|
|
[% modified.ymd(".") %] [% modified.hour_12 %]:[% modified.min | format('%02d') %][% modified.am_or_pm %] |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
[%~END %] |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
[%~ELSE %] |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
[%-FOR entry IN feed.entries %] |
81
|
|
|
|
|
|
|
[%-entry.title | html %] |
82
|
|
|
|
|
|
|
[%~END %] |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
[%~END %] |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
[%~END %] |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
TT_TEMPLATE |
89
|
|
|
|
|
|
|
}, |
90
|
|
|
|
|
|
|
; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
has "xml_parser" => |
93
|
|
|
|
|
|
|
is => "rw", |
94
|
|
|
|
|
|
|
isa => "XML::LibXML", |
95
|
|
|
|
|
|
|
default => sub { |
96
|
|
|
|
|
|
|
my $libxml = XML::LibXML->new(); |
97
|
|
|
|
|
|
|
$libxml->keep_blanks(1); |
98
|
|
|
|
|
|
|
$libxml->line_numbers(1); |
99
|
|
|
|
|
|
|
$libxml->complete_attributes(1); |
100
|
|
|
|
|
|
|
$libxml->clean_namespaces(1); |
101
|
|
|
|
|
|
|
$libxml->no_network(1); |
102
|
|
|
|
|
|
|
$libxml->recover_silently(1); |
103
|
|
|
|
|
|
|
return $libxml; |
104
|
|
|
|
|
|
|
}, |
105
|
|
|
|
|
|
|
handles => [qw( parse_html_string )], |
106
|
|
|
|
|
|
|
; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
has "dtd" => |
109
|
|
|
|
|
|
|
is => "rw", |
110
|
|
|
|
|
|
|
isa => "XML::LibXML::Dtd", |
111
|
|
|
|
|
|
|
; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
has "truncater" => |
114
|
|
|
|
|
|
|
is => "rw", |
115
|
|
|
|
|
|
|
isa => "Object", # "HTML::Truncate", |
116
|
|
|
|
|
|
|
default => sub { |
117
|
|
|
|
|
|
|
HTML::Truncate->new(repair => 1, |
118
|
|
|
|
|
|
|
on_space => 1, |
119
|
|
|
|
|
|
|
chars => 170); |
120
|
|
|
|
|
|
|
}, |
121
|
|
|
|
|
|
|
handles => [ qw( truncate ) ], |
122
|
|
|
|
|
|
|
; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
has "feeds" => |
125
|
|
|
|
|
|
|
is => "ro", |
126
|
|
|
|
|
|
|
auto_deref => 1, |
127
|
|
|
|
|
|
|
isa => "ArrayRef", |
128
|
|
|
|
|
|
|
default => sub { [] }, |
129
|
|
|
|
|
|
|
; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
before "feeds" => sub { |
132
|
|
|
|
|
|
|
my $self = shift; |
133
|
|
|
|
|
|
|
while ( my $feed = $self->next() ) |
134
|
|
|
|
|
|
|
{ |
135
|
|
|
|
|
|
|
push @{$self->{feeds}}, $feed; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
}; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub BUILD { |
140
|
|
|
|
|
|
|
my ( $self, $args ) = @_; |
141
|
|
|
|
|
|
|
$self->config(delete $args->{config}) if $args->{config}; |
142
|
|
|
|
|
|
|
$self->load_config(delete $args->{load_config}) if $args->{load_config}; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub config { |
146
|
|
|
|
|
|
|
my $self = shift; |
147
|
|
|
|
|
|
|
$self->{_config} ||= $self->_default_config(); |
148
|
|
|
|
|
|
|
my $hash = shift || return $self->{_config}; |
149
|
|
|
|
|
|
|
$self->{_config} = merge $self->{_config}, $hash; |
150
|
|
|
|
|
|
|
return $self->{_config}; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub load_config { |
154
|
|
|
|
|
|
|
my $self = shift; |
155
|
|
|
|
|
|
|
my $src = shift || return; |
156
|
|
|
|
|
|
|
my $info = ref($src) ? |
157
|
|
|
|
|
|
|
$src : $src !~ /\n/ ? |
158
|
|
|
|
|
|
|
YAML::LoadFile($src) : YAML::Load($src); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
my $feeds = delete $info->{feeds} || []; |
161
|
|
|
|
|
|
|
$self->config($info); |
162
|
|
|
|
|
|
|
$self->add_feeds($feeds); |
163
|
|
|
|
|
|
|
return $self; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub add_feeds { |
167
|
|
|
|
|
|
|
my $self = shift; |
168
|
|
|
|
|
|
|
my $feeds = shift; |
169
|
|
|
|
|
|
|
my $old = scalar @{$self->config->{feeds} || []}; |
170
|
|
|
|
|
|
|
my $new = scalar @{$feeds}; |
171
|
|
|
|
|
|
|
for my $info ( @{$feeds} ) |
172
|
|
|
|
|
|
|
{ |
173
|
|
|
|
|
|
|
confess "URI is missing from feed data for feed: ", YAML::Dump($info) |
174
|
|
|
|
|
|
|
unless $info->{uri}; |
175
|
|
|
|
|
|
|
push @{$self->config->{feeds}}, $info; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
return ( $old + $new ) == @{$self->config->{feeds}}; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub as_string { |
181
|
|
|
|
|
|
|
my $self = shift; |
182
|
|
|
|
|
|
|
my $out = ""; |
183
|
|
|
|
|
|
|
$self->process($self->template, { rssycklr => $self }, \$out) |
184
|
|
|
|
|
|
|
or confess $self->tt2->error(); |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
if ( defined wantarray ) |
187
|
|
|
|
|
|
|
{ |
188
|
|
|
|
|
|
|
return $out; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else |
191
|
|
|
|
|
|
|
{ |
192
|
|
|
|
|
|
|
print $out; |
193
|
|
|
|
|
|
|
return 1; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub next { |
198
|
|
|
|
|
|
|
my $self = shift; |
199
|
|
|
|
|
|
|
if ( $self->_maxed_out ) |
200
|
|
|
|
|
|
|
{ |
201
|
|
|
|
|
|
|
$self->config->{feeds} = []; |
202
|
|
|
|
|
|
|
return; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
my $info = shift @{ $self->config->{feeds} } || return; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
my $uri = blessed($info->{uri}) eq "URI" ? |
207
|
|
|
|
|
|
|
$info->{uri} : URI->new($info->{uri}); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
my $xml_feed; |
210
|
|
|
|
|
|
|
my $ok = eval { |
211
|
|
|
|
|
|
|
local $SIG{ALRM} = sub { die "Feed request timeout: $uri\n" }; |
212
|
|
|
|
|
|
|
alarm( $info->{timeout} || $self->config->{timeout} || 10 ); |
213
|
|
|
|
|
|
|
$xml_feed = XML::Feed->parse($uri) |
214
|
|
|
|
|
|
|
or croak("Could not parse $uri, ", XML::Feed->errstr); |
215
|
|
|
|
|
|
|
alarm(0); |
216
|
|
|
|
|
|
|
1; |
217
|
|
|
|
|
|
|
}; |
218
|
|
|
|
|
|
|
alarm(0); # Racing parsing fatals can happen in the XML::Feed space(?). |
219
|
|
|
|
|
|
|
unless ( $ok == 1 ) |
220
|
|
|
|
|
|
|
{ |
221
|
|
|
|
|
|
|
carp $@ || ( "Unknown error parsing " . $info->{uri} ) |
222
|
|
|
|
|
|
|
if $self->debug; |
223
|
|
|
|
|
|
|
return $self->next; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
my $hours_back = DateTime |
227
|
|
|
|
|
|
|
->now( time_zone => 'floating' ) |
228
|
|
|
|
|
|
|
->subtract( hours => $info->{hours_back} || $self->config->{hours_back} || 170 ); |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
if ( $xml_feed->modified ) |
231
|
|
|
|
|
|
|
{ |
232
|
|
|
|
|
|
|
return $self->next unless 1 == DateTime->compare( $xml_feed->modified, $hours_back ); |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
my $max_display = $info->{max_display} || $self->config->{max_display} || 10; |
236
|
|
|
|
|
|
|
my $excerpt_length = $info->{excerpt_length} || $self->config->{excerpt_length}; |
237
|
|
|
|
|
|
|
my $title_only = exists($info->{title_only}) ? |
238
|
|
|
|
|
|
|
$info->{title_only} # might be undef on purpose to override self->config setting |
239
|
|
|
|
|
|
|
: |
240
|
|
|
|
|
|
|
$self->config->{title_only}; |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
my @entry; |
243
|
|
|
|
|
|
|
ENTRY: |
244
|
|
|
|
|
|
|
for my $entry ( $xml_feed->entries ) |
245
|
|
|
|
|
|
|
{ |
246
|
|
|
|
|
|
|
next ENTRY unless $entry->issued; |
247
|
|
|
|
|
|
|
next ENTRY unless 1 == DateTime->compare( $entry->issued, $hours_back ); |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
my %entry; |
250
|
|
|
|
|
|
|
unless ( $title_only ) |
251
|
|
|
|
|
|
|
{ |
252
|
|
|
|
|
|
|
next ENTRY if $entry->content->body !~ /\S/; |
253
|
|
|
|
|
|
|
my $xhtml = $self->html_to_dom( $entry->content->body ) |
254
|
|
|
|
|
|
|
or die "Couldn't parse ", $entry->content->body; |
255
|
|
|
|
|
|
|
$self->_strip_attributes($xhtml); |
256
|
|
|
|
|
|
|
$self->_strip_tags($xhtml); |
257
|
|
|
|
|
|
|
$self->_handle_images($xhtml, $entry); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
my ( $body ) = $xhtml->findnodes("body"); |
260
|
|
|
|
|
|
|
unless ( $xhtml->findnodes("head") ) |
261
|
|
|
|
|
|
|
{ |
262
|
|
|
|
|
|
|
my $head = $xhtml->createElement("head"); |
263
|
|
|
|
|
|
|
my $title = $xhtml->createElement("title"); |
264
|
|
|
|
|
|
|
my $text = $xhtml->createTextNode(__PACKAGE__ . "/" . $VERSION); |
265
|
|
|
|
|
|
|
$title->appendChild($text); |
266
|
|
|
|
|
|
|
$head->appendChild($title); |
267
|
|
|
|
|
|
|
$xhtml->insertBefore($head,$body); |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# Cache it. |
271
|
|
|
|
|
|
|
unless ( $self->dtd ) |
272
|
|
|
|
|
|
|
{ |
273
|
|
|
|
|
|
|
$self->config->{dtd} ||= "xhtml1-transitional.dtd"; |
274
|
|
|
|
|
|
|
my $dtd_file = File::ShareDir::dist_file(__PACKAGE__, |
275
|
|
|
|
|
|
|
$self->config->{dtd}); |
276
|
|
|
|
|
|
|
$/ = undef; |
277
|
|
|
|
|
|
|
open my $fh, "<", $dtd_file or croak "Couldn't open '$dtd_file' for reading: $!"; |
278
|
|
|
|
|
|
|
$self->{ $self->config->{dtd} } = <$fh>; |
279
|
|
|
|
|
|
|
close $fh or carp "Trouble closing '$dtd_file': $!"; |
280
|
|
|
|
|
|
|
$self->dtd( XML::LibXML::Dtd->parse_string($self->{ $self->config->{dtd} }) ); |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
unless ( eval { $xhtml->validate($self->dtd); 1; } ) |
284
|
|
|
|
|
|
|
{ |
285
|
|
|
|
|
|
|
carp $@ || "Uknown error", |
286
|
|
|
|
|
|
|
" - parsing content of '", $entry->title, |
287
|
|
|
|
|
|
|
"' from ", $xml_feed->link; |
288
|
|
|
|
|
|
|
next ENTRY; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
my $content = ""; |
292
|
|
|
|
|
|
|
$content .= $_->serialize(1) for $body->childNodes(); |
293
|
|
|
|
|
|
|
my $more = join("", |
294
|
|
|
|
|
|
|
decode_utf8($self->config->{ellipsis}), |
295
|
|
|
|
|
|
|
'
296
|
|
|
|
|
|
|
$entry->link, |
297
|
|
|
|
|
|
|
'">', |
298
|
|
|
|
|
|
|
decode_utf8($self->config->{read_more}), |
299
|
|
|
|
|
|
|
'' |
300
|
|
|
|
|
|
|
); |
301
|
|
|
|
|
|
|
my $output = $self->truncate( $content, |
302
|
|
|
|
|
|
|
$excerpt_length, |
303
|
|
|
|
|
|
|
$more ); |
304
|
|
|
|
|
|
|
$output =~ s/\s\s+/ /g; |
305
|
|
|
|
|
|
|
$entry{lede} = $output; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
$entry{xml_feed_entry} = $entry; |
308
|
|
|
|
|
|
|
$entry{feed} = $xml_feed; |
309
|
|
|
|
|
|
|
push @entry, \%entry; |
310
|
|
|
|
|
|
|
last ENTRY if @entry >= $max_display; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
return $self->next unless @entry; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
my $feed = RSSycklr::Feed->new( %{$info}, |
316
|
|
|
|
|
|
|
ellipsis => $self->config->{ellipsis}, # not sure, weak ref to parent instead? |
317
|
|
|
|
|
|
|
xml_feed => $xml_feed, ); |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
$feed->{entries} = [ map { $_->{feed} = $feed; RSSycklr::Feed::Entry->new($_) } @entry ]; |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
$self->{_feeds_returned}++; |
322
|
|
|
|
|
|
|
return $feed; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub html_to_dom { |
326
|
|
|
|
|
|
|
my $self = shift; |
327
|
|
|
|
|
|
|
my $html = shift || return; |
328
|
|
|
|
|
|
|
my $renew = ""; |
329
|
|
|
|
|
|
|
my $p = HTML::TokeParser::Simple->new(\$html); |
330
|
|
|
|
|
|
|
no warnings "uninitialized"; |
331
|
|
|
|
|
|
|
while ( my $token = $p->get_token ) |
332
|
|
|
|
|
|
|
{ |
333
|
|
|
|
|
|
|
if ( $token->is_text |
334
|
|
|
|
|
|
|
or not $HTML::Tagset::isKnown{ $token->get_tag } ) |
335
|
|
|
|
|
|
|
{ |
336
|
|
|
|
|
|
|
my $txt = HTML::Entities::decode_entities($token->as_is); |
337
|
|
|
|
|
|
|
$txt =~ s/[^[:print:]]+/ /g; # kill unprintables for a space. |
338
|
|
|
|
|
|
|
$renew .= $txt; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
elsif ( $token->get_tag =~ /\Abr\b/i ) |
341
|
|
|
|
|
|
|
{ |
342
|
|
|
|
|
|
|
$renew .= "\n"; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
elsif ( $HTML::Tagset::canTighten{ $token->get_tag } ) |
345
|
|
|
|
|
|
|
{ |
346
|
|
|
|
|
|
|
# Replace block-like tags with \n if we have content |
347
|
|
|
|
|
|
|
# already and not more than twice consecutively. |
348
|
|
|
|
|
|
|
$renew .= $token->as_is; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
else |
351
|
|
|
|
|
|
|
{ |
352
|
|
|
|
|
|
|
$renew .= $token->as_is; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# XML::LibXML is noisy even with recover_silently, so- |
357
|
|
|
|
|
|
|
my $dom; |
358
|
|
|
|
|
|
|
my ( $out, $err ) = capture { |
359
|
|
|
|
|
|
|
$dom = $self->parse_html_string(<<"HTML"); |
360
|
|
|
|
|
|
|
Untitled$renew |
361
|
|
|
|
|
|
|
HTML |
362
|
|
|
|
|
|
|
}; |
363
|
|
|
|
|
|
|
$dom; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub _maxed_out { |
367
|
|
|
|
|
|
|
my $self = shift; |
368
|
|
|
|
|
|
|
if ( $self->config->{max_feeds} |
369
|
|
|
|
|
|
|
and |
370
|
|
|
|
|
|
|
$self->config->{max_feeds} <= $self->{_feeds_returned} ) |
371
|
|
|
|
|
|
|
{ |
372
|
|
|
|
|
|
|
return 1; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
return; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub _strip_attributes { |
378
|
|
|
|
|
|
|
my ( $self, $root ) = @_; |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
for my $node ( $root->findnodes("//*") ) |
381
|
|
|
|
|
|
|
{ |
382
|
|
|
|
|
|
|
for my $attr ( $node->attributes ) |
383
|
|
|
|
|
|
|
{ |
384
|
|
|
|
|
|
|
next if $node->nodeName eq 'a' and $attr->name eq 'href'; |
385
|
|
|
|
|
|
|
next if $node->nodeName eq 'img' and $attr->name eq 'src'; |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
next if $attr->name eq 'title' |
388
|
|
|
|
|
|
|
and $node->nodeName =~ /\A(?:acronym|abbr|dfn|a)\z/; |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
$node->removeAttribute($attr->name); |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub _handle_images { |
396
|
|
|
|
|
|
|
my ( $self, $root, $entry ) = @_; |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
for my $node ( $root->findnodes("//img") ) |
399
|
|
|
|
|
|
|
{ |
400
|
|
|
|
|
|
|
if ( $node->getAttribute("src") !~ m,\Ahttp://, ) |
401
|
|
|
|
|
|
|
{ |
402
|
|
|
|
|
|
|
$node->parentNode->removeChild($node); |
403
|
|
|
|
|
|
|
return; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
# Don't put a link on images that already have one. |
406
|
|
|
|
|
|
|
next if $node->parentNode->tagName eq "a"; |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
my $link = $node->getOwner->createElement("a"); |
409
|
|
|
|
|
|
|
$link->setAttribute("href", $entry->link); |
410
|
|
|
|
|
|
|
$link->setAttribute("title", $entry->title); |
411
|
|
|
|
|
|
|
$node->setAttribute("alt", $entry->title); |
412
|
|
|
|
|
|
|
$link->appendChild( $node->cloneNode ); |
413
|
|
|
|
|
|
|
$node->parentNode->replaceChild( $link, $node ); |
414
|
|
|
|
|
|
|
return 1; # Just do one for now. |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub _strip_tags { |
419
|
|
|
|
|
|
|
my ( $self, $root ) = @_; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
my $doc = $root->getOwnerDocument; |
422
|
|
|
|
|
|
|
my $keep = $self->keep_tags; |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# Special case, we must have this and don't want it mucking the interface. |
425
|
|
|
|
|
|
|
$keep->{body} = 1; |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
my @nodes = $root->findnodes("//*"); |
428
|
|
|
|
|
|
|
for my $node ( @nodes ) |
429
|
|
|
|
|
|
|
{ |
430
|
|
|
|
|
|
|
next unless $node; |
431
|
|
|
|
|
|
|
next if $keep->{$node->nodeName}; |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
my $frag = $doc->createDocumentFragment(); |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
for my $n ( $node->childNodes ) |
436
|
|
|
|
|
|
|
{ |
437
|
|
|
|
|
|
|
$frag->appendChild($n); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
$node->replaceNode($frag); |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
return 1 unless $keep->{br}; |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
my @outer = $root->findnodes("body/*"); |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
FORWARD: |
447
|
|
|
|
|
|
|
for my $br ( @outer ) { |
448
|
|
|
|
|
|
|
last FORWARD unless $br and $br->tagName eq "br"; |
449
|
|
|
|
|
|
|
$br->parentNode->removeChild($br); |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
BACKWARD: |
453
|
|
|
|
|
|
|
for my $br ( reverse @outer ) { |
454
|
|
|
|
|
|
|
last BACKWARD unless $br and $br->tagName eq "br"; |
455
|
|
|
|
|
|
|
$br->parentNode->removeChild($br); |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
return 1; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
sub _default_config { |
461
|
|
|
|
|
|
|
return { |
462
|
|
|
|
|
|
|
excerpt_length => 150, |
463
|
|
|
|
|
|
|
ellipsis => "\x{2026}", # chr(8230), |
464
|
|
|
|
|
|
|
read_more => "[more]", |
465
|
|
|
|
|
|
|
title_only => undef, |
466
|
|
|
|
|
|
|
hours_back => 72, |
467
|
|
|
|
|
|
|
max_feeds => 10, |
468
|
|
|
|
|
|
|
# max_entries => 10, |
469
|
|
|
|
|
|
|
max_display => 3, |
470
|
|
|
|
|
|
|
timeout => 30, |
471
|
|
|
|
|
|
|
css_class => "rssycklr", |
472
|
|
|
|
|
|
|
# title_length => undef, |
473
|
|
|
|
|
|
|
# excerpt_style => dl|p|br|ul |
474
|
|
|
|
|
|
|
# title_style => ul|p|br # not implemented, ul/li happens now |
475
|
|
|
|
|
|
|
# max_images => 1 # this is hardcoded for now |
476
|
|
|
|
|
|
|
feed_title_tag => "h4", |
477
|
|
|
|
|
|
|
dtd => "xhtml1-transitional.dtd", |
478
|
|
|
|
|
|
|
}; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
package RSSycklr::Feed; |
484
|
|
|
|
|
|
|
use Mouse; |
485
|
|
|
|
|
|
|
use HTML::Entities qw( decode_entities ); |
486
|
|
|
|
|
|
|
use Encode qw( decode_utf8 ); |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
has "xml_feed" => |
489
|
|
|
|
|
|
|
is => "ro", |
490
|
|
|
|
|
|
|
required => 1, |
491
|
|
|
|
|
|
|
isa => "Object", |
492
|
|
|
|
|
|
|
handles => [qw( tagline link copyright modified |
493
|
|
|
|
|
|
|
author generator language )], |
494
|
|
|
|
|
|
|
; |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
has "entries" => |
497
|
|
|
|
|
|
|
is => "ro", |
498
|
|
|
|
|
|
|
lazy => 1, |
499
|
|
|
|
|
|
|
default => sub { [] }, |
500
|
|
|
|
|
|
|
required => 1, |
501
|
|
|
|
|
|
|
auto_deref => 1, |
502
|
|
|
|
|
|
|
isa => "ArrayRef", |
503
|
|
|
|
|
|
|
; |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
has "title_override" => |
506
|
|
|
|
|
|
|
is => "ro", |
507
|
|
|
|
|
|
|
isa => "Str", |
508
|
|
|
|
|
|
|
default => sub { "" }, |
509
|
|
|
|
|
|
|
; |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub count { |
512
|
|
|
|
|
|
|
scalar @{+shift->entries}; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub title { |
516
|
|
|
|
|
|
|
my $self = shift; |
517
|
|
|
|
|
|
|
return $self->{_title} if $self->{_title}; |
518
|
|
|
|
|
|
|
# Try to guarantee it doesn't return entities. |
519
|
|
|
|
|
|
|
$self->{_title} = decode_entities(decode_entities($self->xml_feed->title)); |
520
|
|
|
|
|
|
|
$self->{_title} = decode_utf8( $self->{_title} ); |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
package RSSycklr::Feed::Entry; |
526
|
|
|
|
|
|
|
use Mouse; |
527
|
|
|
|
|
|
|
use DateTime; |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
has "xml_feed_entry" => ( is => "ro", |
530
|
|
|
|
|
|
|
required => 1, |
531
|
|
|
|
|
|
|
isa => "Object", # ::Atom/RSS |
532
|
|
|
|
|
|
|
handles => [qw( title link content category id author issued modified )], |
533
|
|
|
|
|
|
|
); |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
has "lede" => ( is => "ro", |
536
|
|
|
|
|
|
|
isa => "Str", |
537
|
|
|
|
|
|
|
default => sub { "" }, |
538
|
|
|
|
|
|
|
); |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
has "feed" => ( is => "ro", |
541
|
|
|
|
|
|
|
weak_ref => 1, |
542
|
|
|
|
|
|
|
isa => "RSSycklr::Feed", |
543
|
|
|
|
|
|
|
); |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable(); |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
1; |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
__END__ |