File Coverage

blib/lib/AnyEvent/Feed.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package AnyEvent::Feed;
2 1     1   22747 use strict;
  1         3  
  1         48  
3 1     1   9 no warnings;
  1         2  
  1         44  
4              
5 1     1   7 use Carp qw/croak/;
  1         5  
  1         77  
6 1     1   1074 use Encode;
  1         16385  
  1         96  
7 1     1   351 use XML::Feed;
  0            
  0            
8             use MIME::Base64;
9             use AnyEvent::HTTP;
10             use Digest::SHA1 qw/sha1_base64/;
11             use Scalar::Util qw/weaken/;
12              
13             our $VERSION = '0.3';
14              
15             =head1 NAME
16              
17             AnyEvent::Feed - Receiving RSS/Atom Feed reader with XML::Feed
18              
19             =head1 VERSION
20              
21             Version 0.3
22              
23             =head1 SYNOPSIS
24              
25             use AnyEvent;
26             use AnyEvent::Feed;
27              
28             my $feed_reader =
29             AnyEvent::Feed->new (
30             url => 'http://example.com/atom.xml',
31             );
32              
33             $feed_reader->fetch (sub {
34             my ($feed_reader, $new_entries, $feed, $error) = @_;
35              
36             if (defined $error) {
37             warn "ERROR: $error\n";
38             return;
39             }
40              
41             # $feed is the XML::Feed object belonging to that fetch.
42              
43             for (@$new_entries) {
44             my ($hash, $entry) = @$_;
45             # $hash a unique hash describing the $entry
46             # $entry is the XML::Feed::Entry object of the new entries
47             # since the last fetch.
48             }
49             });
50              
51             # Or:
52              
53             my $feed_reader =
54             AnyEvent::Feed->new (
55             url => 'http://example.com/atom.xml',
56             interval => $seconds,
57              
58             on_fetch => sub {
59             my ($feed_reader, $new_entries, $feed, $error) = @_;
60              
61             if (defined $error) {
62             warn "ERROR: $error\n";
63             return;
64             }
65              
66             # see above
67             }
68             );
69              
70             =head1 DESCRIPTION
71              
72             This module implements some glue between L<AnyEvent::HTTP> and L<XML::Feed>.
73             It can fetch a RSS/Atom feed on a regular interval as well as on customized
74             times. It also keeps track of already fetched entries so that you will only get
75             the new entries.
76              
77             =head1 METHODS
78              
79             =over 4
80              
81             =item $feed_reader = AnyEvent::Feed->new (url => $url, %args)
82              
83             This is the constructor for a new feed reader for the RSS/Atom feed
84             reachable by the URL C<$url>. C<%args> may contain additional key/value pairs:
85              
86             =over 4
87              
88             =item interval => $seconds
89              
90             If this is set you also have to specify the C<on_fetch> callback (see below).
91             It will try to fetch the C<$url> every C<$seconds> seconds and call the
92             callback given by C<on_fetch> with the result.
93              
94             =item headers => $http_hdrs
95              
96             Additional HTTP headers for each GET request can be passed in the C<$http_hdrs>
97             hash reference, just like you would pass it to the C<headers> argument of
98             the C<http_get> request of L<AnyEvent::HTTP>.
99              
100             =item username => $http_user
101              
102             =item password => $http_pass
103              
104             These are the HTTP username and password that will be used for Basic HTTP
105             Authentication with the HTTP server when fetching the feed. This is mostly
106             sugar for you so you don't have to encode them yourself and pass them to the
107             C<headers> argument above.
108              
109             =item on_fetch => $cb->($feed_reader, $new_entries, $feed_obj, $error)
110              
111             This callback is called if the C<interval> parameter is given (see above)
112             with the same arguments as the callback given to the C<fetch> method (see below).
113              
114             =item entry_ages => $hash
115              
116             This will set the hash which keeps track of seen and old entries.
117             See also the documentation of the C<entry_ages> method below.
118             The default will be an empty hash reference.
119              
120             =item max_entry_age => $count
121              
122             This will set the maximum number of times an entry is kept in the C<entry_ages>
123             hash after it has not been seen in the feed anymore. The default value is 2
124             which means that an entry hash is removed from the C<entry_ages> hash after it
125             has not been seen in the feed for 2 fetches.
126              
127             =back
128              
129             =cut
130              
131             sub new {
132             my $this = shift;
133             my $class = ref($this) || $this;
134             my $self = { @_ };
135             bless $self, $class;
136              
137             $self->{entry_ages} ||= {};
138              
139             if (defined $self->{interval}) {
140             unless (defined $self->{on_fetch}) {
141             croak "no 'on_fetch' callback given!";
142             }
143              
144             my $wself = $self;
145             weaken $wself;
146              
147             $self->{timer_cb} = sub {
148             $wself->fetch (sub {
149             my ($self, $e, $f, $err) = @_;
150              
151             $self->{on_fetch}->($self, $e, $f, $err);
152              
153             $self->{timer} =
154             AnyEvent->timer (
155             after => $self->{interval}, cb => $self->{timer_cb});
156             })
157             };
158              
159             $self->{timer_cb}->();
160             }
161              
162             return $self
163             }
164              
165              
166             sub _entry_to_hash {
167             my ($entry) = @_;
168             my $x = sha1_base64
169             encode 'utf-8',
170             (my $a = join '/',
171             $entry->title,
172             ($entry->summary ? $entry->summary->body : ''),
173             ($entry->content ? $entry->content->body : ''),
174             $entry->id,
175             $entry->link);
176             $x
177             }
178              
179             sub _new_entries {
180             my ($self) = @_;
181              
182             $self->{entry_ages} ||= {};
183              
184             my (@ents) = $self->{feed}->entries;
185              
186             my @new;
187              
188             # 'age' the old entries
189             $self->{entry_ages}->{$_}++ for keys %{$self->{entry_ages}};
190              
191             for my $ent (@ents) {
192             my $hash = _entry_to_hash ($ent);
193              
194             unless (exists $self->{entry_ages}->{$hash}) {
195             push @new, [$hash, $ent];
196             }
197              
198             $self->{entry_ages}->{$hash} = 0; # reset age of old entry.
199             }
200              
201             for (keys %{$self->{entry_ages}}) {
202             delete $self->{entry_ages}->{$_}
203             if $self->{entry_ages}->{$_} > $self->{max_entry_ages};
204             }
205              
206             \@new
207             }
208              
209             =item $feed_reader->url
210              
211             Just returns the url that this feed reader is fetching from.
212              
213             =cut
214              
215             sub url { $_[0]->{url} }
216              
217             =item $feed_reader->entry_ages ($new_entry_ages)
218              
219             =item my $entry_ages = $feed_reader->entry_ages
220              
221             This will set the age hash which will keep track of already seen entries.
222             The keys of the hash will be the calculated hashes of the entries and the
223             values will be a counter of how often they have NOT been seen anymore (kind of
224             an age counter). After each fetch this hash is updated and seen entries get
225             a value of 0.
226              
227             =cut
228              
229             sub entry_ages {
230             defined $_[1]
231             ? $_[0]->{entry_ages} = $_[1]
232             : $_[0]->{entry_ages}
233             }
234              
235             =item $feed_reader->fetch ($cb->($feed_reader, $new_entries, $feed_obj, $error))
236              
237             This will initiate a HTTP GET on the URL passed to C<new> and call C<$cb> when
238             done.
239              
240             C<$feed_reader> is the feed reader object itself. C<$new_entries> is an
241             array reference containing the new entries. A new entry in that array is
242             another array containing a calculated hash over the contents of the new entry,
243             and the L<XML::Feed::Entry> object of that entry. C<$feed_obj> is the
244             L<XML::Feed> feed object used to parse the fetched feed and contains all
245             entries (and not just the 'new' ones).
246              
247             What a 'new' entry is, is decided by a map of hashes as described in the
248             C<entry_ages> method's documentation above.
249              
250             =cut
251              
252             sub _get_headers {
253             my ($self, %hdrs) = @_;
254              
255             my %hdrs = %{$self->{headers} || {}};
256              
257             if (defined $self->{last_mod}) {
258             $hdrs{'If-Modified-Since'} = $self->{last_mod};
259             }
260              
261             $hdrs{Authorization} =
262             "Basic " . encode_base64 (join ':', $self->{username}, $self->{password}, '')
263             if defined $self->{username};
264              
265             \%hdrs
266             }
267              
268             sub fetch {
269             my ($self, $cb) = @_;
270              
271             unless (defined $cb) {
272             croak "no callback given to fetch!";
273             }
274              
275             http_get $self->{url}, headers => $self->_get_headers, sub {
276             my ($data, $hdr) = @_;
277              
278             #d# warn "HEADERS ($self->{last_mod}): "
279             #d# . (join ",\n", map { "$_:\t$hdr->{$_}" } keys %$hdr)
280             #d# . "\n";
281              
282             if ($hdr->{Status} =~ /^2/) {
283             my $feed;
284             eval {
285             $self->{feed} = XML::Feed->parse (\$data);
286             };
287             if ($@) {
288             $cb->($self, undef, undef, "exception: $@");
289             } elsif (not defined $self->{feed}) {
290             $cb->($self, undef, undef, XML::Feed->errstr);
291             } else {
292             $cb->($self, $self->_new_entries, $self->{feed});
293              
294             $self->{last_mod} = $hdr->{'last-modified'};
295             }
296              
297             } elsif (defined ($self->{last_mod}) && $hdr->{Status} eq '304') {
298             # do nothing, everything was/is fine!
299             $cb->($self, [], $self->{feed});
300              
301             } else {
302             $cb->($self, undef, undef, "$hdr->{Status} $hdr->{Reason}");
303             }
304             };
305             }
306              
307             =back
308              
309             =head1 AUTHOR
310              
311             Robin Redeker, C<< <elmex@ta-sa.org> >>
312              
313             =head1 SEE ALSO
314              
315             L<XML::Feed>
316              
317             L<AnyEvent::HTTP>
318              
319             L<AnyEvent>
320              
321             =head1 BUGS
322              
323             =head2 Known Bugs
324              
325             There is actually a known bug with encodings of contents of Atom feeds.
326             L<XML::Atom> by default gives you UTF-8 encoded data. You have to set
327             this global variable to be able to use the L<XML::Feed::Entry> interface
328             without knowledge of the underlying feed type:
329              
330             $XML::Atom::ForceUnicode = 1;
331              
332             I've re-reported this bug against L<XML::Feed>, as I think it should
333             take care of this. L<XML::Atom> should probably just fix it's Unicode
334             interface, but it seems to be a bit deserted w.r.t. fixing the bugs in
335             the tracker.
336              
337             =head2 Contact
338              
339             Please report any bugs or feature requests to
340             C<bug-anyevent-feed at rt.cpan.org>, or through the web interface at
341             L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=AnyEvent-Feed>.
342             I will be notified and then you'll automatically be notified of progress on
343             your bug as I make changes.
344              
345             =head1 SUPPORT
346              
347             You can find documentation for this module with the perldoc command.
348              
349             perldoc AnyEvent::Feed
350              
351             You can also look for information at:
352              
353             =over 4
354              
355             =item * IRC: AnyEvent::Feed IRC Channel
356              
357             See the same channel as the L<AnyEvent::XMPP> module:
358              
359             IRC Network: http://freenode.net/
360             Server : chat.freenode.net
361             Channel : #ae_xmpp
362              
363             Feel free to join and ask questions!
364              
365             =item * AnnoCPAN: Annotated CPAN documentation
366              
367             L<http://annocpan.org/dist/AnyEvent-Feed>
368              
369             =item * CPAN Ratings
370              
371             L<http://cpanratings.perl.org/d/AnyEvent-Feed>
372              
373             =item * RT: CPAN's request tracker
374              
375             L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=AnyEvent-Feed>
376              
377             =item * Search CPAN
378              
379             L<http://search.cpan.org/dist/AnyEvent-Feed>
380              
381             =back
382              
383             =head1 COPYRIGHT & LICENSE
384              
385             Copyright 2009 Robin Redeker, all rights reserved.
386              
387             This program is free software; you can redistribute it and/or modify it
388             under the same terms as Perl itself.
389              
390             =cut
391              
392             1;