File Coverage

blib/lib/Feed/Pipe.pm
Criterion Covered Total %
statement 4 6 66.6
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 6 8 75.0


line stmt bran cond sub pod time code
1             package Feed::Pipe;
2             # Housekeeping
3 5     5   238404 use Moose;
  5         2861537  
  5         47  
4 5     5   44079 use Feed::Pipe::Types qw(ArrayRef AtomEntry AtomFeed Datetime Str Uri);
  0            
  0            
5             use Log::Any;
6              
7             our $VERSION = '1.003';
8              
9             # Code
10             use DateTime;
11             use DateTime::Format::HTTP;
12             use XML::Feed;
13             use XML::Atom;
14             use XML::Atom::Feed;
15             $XML::Atom::DefaultVersion = 1.0;
16             $XML::Atom::ForceUnicode = 1;
17              
18             #--------------------------------------------------------------------
19             # ATTRIBUTES
20             #--------------------------------------------------------------------
21              
22             has id => (is => 'rw', isa => Str, lazy_build => 1);
23             sub _build_id {
24             require Data::UUID;
25             my $gen = Data::UUID->new;
26             return 'urn:'.$gen->to_string($gen->create());
27             }
28              
29             has title => (is => 'rw', isa => Str, default => "Combined Feed");
30              
31             has updated => (is => 'rw', isa => Datetime, lazy_build => 1, coerce => 1);
32             sub _build_updated { DateTime->now() }
33              
34             has _entries =>
35             ( is => 'rw'
36             , traits => ['Array']
37             , isa => ArrayRef[AtomEntry]
38             , default => sub {[]}
39             , handles =>
40             { count => 'count'
41             , entries => 'elements'
42             , _clear => 'clear'
43             , _delete => 'delete'
44             , _entry_at => 'accessor'
45             , _first => 'first'
46             , _get => 'get'
47             , _grep => 'grep'
48             , _insert => 'insert'
49             , _map => 'map'
50             , _pop => 'pop'
51             , _push => 'push'
52             , _shift => 'shift'
53             , _shuffle => 'shuffle'
54             , _sort_in_place => 'sort_in_place'
55             # man page lies: does NOT work identically to Perl's splice
56             # , _splice => 'splice'
57             , _unshift => 'unshift'
58             }
59             );
60              
61             has _logger => (is => 'ro', lazy_build => 1);
62             sub _build__logger {Log::Any->get_logger(category => __PACKAGE__);}
63              
64             #--------------------------------------------------------------------
65             # FILTER METHODS
66             #--------------------------------------------------------------------
67              
68             # FIXME: I really want this to add a <source> element to each entry so it can
69             # be traced back to its origin. And to be much more clever. And not to rely
70             # on XML::Feed.
71             sub cat {
72             my ($proto, @feed_urls) = @_;
73             my $self = ref($proto) ? $proto : $proto->new();
74             #$self->_logger->debugf('cat: %s', \@feed_urls);
75            
76             foreach my $f (@feed_urls) {
77             if (ref($f) eq 'Feed::Pipe') {
78             $self->_logger->debug("Adding a Feed::Pipe");
79             $self->_push($f->entries);
80            
81             } elsif ( ref($f) eq 'XML::Atom::Feed') {
82             $self->_add_atom($f);
83             $self->_logger->debug("Adding a XML::Atom::Feed");
84            
85             } elsif (ref($f) =~ /^XML::Feed/) {
86             $self->_logger->debug("Adding a XML::Feed");
87             $f = $self->_xf_to_atom($f);
88             $self->_add_atom($f);
89            
90             } else {
91             $self->_logger->debug("Using XML::Feed for parsing");
92             my $feed = XML::Feed->parse($f);
93             $feed = $self->_xf_to_atom($feed);
94             $self->_add_atom($feed);
95             }
96             }
97             return $self; # ALWAYS return $self for chaining!
98             }
99              
100              
101             sub sort {
102             my ($self, $sub) = @_;
103             $sub ||= sub { ($_[1]->updated||$_[1]->published) cmp ($_[0]->updated||$_[0]->published) };
104             $self->_sort_in_place($sub);
105             return $self; # ALWAYS return $self for chaining!
106             }
107              
108             sub reverse {
109             my ($self) = @_;
110             $self->_entries([reverse $self->entries]);
111             return $self; # ALWAYS return $self for chaining!
112             }
113              
114             sub head {
115             my ($self, $limit) = @_;
116             $limit ||= 10;
117             $self->_entries([splice(@{$self->_entries},0,$limit)]);
118             return $self; # ALWAYS return $self for chaining!
119             }
120              
121             sub tail {
122             my ($self, $limit) = @_;
123             $limit ||= 10;
124             $self->_entries([splice(@{$self->_entries},-$limit)]);
125             return $self; # ALWAYS return $self for chaining!
126             }
127              
128             sub grep {
129             my ($self, $sub) = @_;
130             $sub ||= sub { $_->content||$_->summary };
131             $self->_entries([$self->_grep($sub)]);
132             return $self; # ALWAYS return $self for chaining!
133             }
134              
135             sub map {
136             my ($self, $sub) = @_;
137             unless ($sub) {
138             my ($package, $file, $line) = caller();
139             $self->_logger->warning('Ignoring map() without a code reference at %s:%s',$file,$line);
140             warn sprintf('Ignoring map() without a code reference at %s:%s',$file,$line);
141             return $self;
142             }
143             $self->_entries([$self->_map($sub)]);
144             return $self; # ALWAYS return $self for chaining!
145             }
146              
147              
148             #--------------------------------------------------------------------
149             # OTHER METHODS
150             #--------------------------------------------------------------------
151             sub as_atom_obj {
152             my ($self) = @_;
153             my $feed = XML::Atom::Feed->new;
154             # FIXME: Add support for (at least) the following elements: author category
155             # contributor generator icon link logo rights subtitle
156             $feed->title($self->title);
157             $feed->id($self->id);
158             $feed->updated(DateTime::Format::HTTP->format_isoz($self->updated));
159             $feed->add_entry($_) for $self->entries;
160             return $feed;
161             }
162              
163             sub as_xml {
164             my ($self) = @_;
165             return $self->as_atom_obj->as_xml;
166             }
167              
168             #--------------------------------------------------------------------
169             # PRIVATE METHODS
170             #--------------------------------------------------------------------
171             # This code stolen from XML::Feed::convert and mangled slightly.
172             sub _xf_to_atom {
173             my ($self, $feed) = @_;
174             return $feed->{atom} if $feed->format eq 'Atom';
175            
176             my $new = XML::Feed->new('Atom');
177             for my $field (qw( title link self_link description language author copyright modified generator )) {
178             my $val = $feed->$field();
179             next unless defined $val;
180             $new->$field($val);
181             }
182             for my $entry ($feed->entries) {
183             $new->add_entry($entry->convert('Atom'));
184             }
185             return $new->{atom};
186             }
187              
188             sub _add_atom {
189             my ($self, $feed) = @_;
190             my @entries = $feed->entries; # This clones the entry nodes.
191              
192             # Clean out the entries so we can use $feed as source
193             for my $node ($feed->elem->childNodes) {
194             if ($node->nodeName eq 'entry') {
195             #$self->_logger->debug("Unbinding node ".$node->nodeName);
196             $node->unbindNode();
197             } else {
198             #$self->_logger->debug("Keeping node ".$node->nodeName);
199             }
200             }
201             $self->_push( map {$_->source($feed) unless $_->source; $_ } @entries);
202             }
203              
204              
205             no Moose;
206             __PACKAGE__->meta->make_immutable;
207             1;
208             __END__
209              
210             =head1 NAME
211              
212             Feed::Pipe - Pipe Atom/RSS feeds through UNIX-style high-level filters
213              
214             =head1 SYNOPSIS
215              
216             use Feed::Pipe;
217             my $pipe = Feed::Pipe
218             ->new(title => "Mah Bukkit")
219             ->cat( qw(1.xml 2.rss 3.atom) )
220             ->grep(sub{$_->title =~ /lolrus/i })
221             ->sort
222             ->head
223             ;
224             my $feed = $pipe->as_atom_obj; # returns XML::Atom::Feed
225             # Add feed details such as author and self link. Then...
226             print $feed->as_xml;
227              
228             =head1 DESCRIPTION
229              
230             This module is a Feed model that can mimic the functionality of standard UNIX pipe and filter style text processing tools. Instead of operating on lines from text files, it operates on entries from Atom (or RSS) feeds. The idea is to provide a high-level tool set for combining, filtering, and otherwise manipulating bunches of Atom data from various feeds.
231              
232             Yes, you could do this with Yahoo Pipes. Until they decide to take it down,
233             or start charging for it. And if your code is guaranteed to have Internet
234             access.
235              
236             Also, you could probably do it with L<Plagger>, if you're genius enough to figure
237             out how.
238              
239             =head1 CONSTRUCTOR
240              
241             To construct a feed pipe, call C<new(%options)>, where the keys of C<%options>
242             correspond to any of the method names described under ACCESSOR METHODS. If you
243             do not need to set any options, C<cat> may also be called on a class and will
244             return an instance.
245              
246             my $pipe = Feed::Pipe->new(title => 'Test Feed');
247              
248             =head1 FILTER METHODS
249              
250             =head2 C<cat(@feeds)>
251              
252             my $pipe = Feed::Pipe->new(title => 'Test')->cat(@feeds);
253             # This also works:
254             my $pipe = Feed::Pipe->cat(@feeds);
255              
256             Combine entries from each feed listed, in the order received, into a single feed.
257             RSS feeds will automatically be converted to Atom before their entries are
258             added. (NOTE: Some data may be lost in the conversion. See L<XML::Feed>.)
259              
260             If called as a class method, will implicitly call C<new> with no options
261             to return an instance before adding the passed C<@feeds>.
262              
263             Values passed to C<cat> may be an instance of Feed::Pipe, XML::Atom::Feed,
264             XML::Feed, or URI, a reference to a scalar variable containing the XML to
265             parse, or a filename that contains the XML to parse. URI objects will be
266             dereferenced and fetched, and the result parsed.
267              
268             Returns the feed pipe itself so that you can chain method calls.
269              
270             =head2 C<grep(sub{})>
271              
272             # Keeps all entries with the word "Keep" in the title
273             my $pipe = Feed::Pipe
274             ->cat($feed)
275             ->grep( sub { $_->title =~ /Keep/ } )
276             ;
277              
278             Filters the list of entries to those for which the passed function returns
279             true. If no function is passed, the default is to keep entries which have
280             C<content> (or a C<summary>). The function should test the entry object
281             aliased in C<$_> which will be a L<XML::Atom::Entry>.
282              
283             Returns the feed pipe itself so that you can chain method calls.
284              
285             =head2 C<head(Int $limit=10)>
286              
287             Output C<$limit> entries from the top of the feed, where C<$limit> defaults to
288             10. If your entries are sorted in standard reverse chronological order, this
289             will pull the C<$limit> most recent entries.
290              
291             Returns the feed pipe itself so that you can chain method calls.
292              
293             =head2 C<map(\&mapfunction)>
294              
295             # Converts upper CASE to lower case in each entry title.
296             my $pipe = Feed::Pipe
297             ->cat($feed)
298             ->map( sub { $_->title =~ s/CASE/case/; return $_; } )
299             ;
300              
301             Constructs a new list of entries composed of the return values from
302             C<mapfunction>. The mapfunc I<must> return one or more XML::Atom::Entry
303             objects, or an empty list. Within the C<mapfunction> C<$_> will be
304             aliased to the XML::Atom::Entry it is visiting.
305              
306             Returns the feed pipe itself so that you can chain method calls.
307              
308             =head2 C<reverse()>
309              
310             Returns the feed with entries sorted in the opposite of the input order. This
311             is just for completeness, you could easily do this with C<sort> instead.
312              
313             =head2 C<sort(sub{})>
314              
315             # Returns a feed with entries sorted by title
316             my $pipe = Feed::Pipe
317             ->cat($feed)
318             ->sort(sub{$_[0]->title cmp $_[1]->title})
319             ;
320              
321             Sort the feed's entries using the comparison function passed as the argument.
322             If no function is passed, sorts in standard reverse chronological order.
323             The sort function should be as described in Perl's L<sort>, but using
324             C<$_[0]> and C<$_[1]> in place of C<$a> and C<$b>, respectively. The two
325             arguments will be L<XML::Atom::Entry> objects.
326              
327             Returns the feed pipe itself so that you can chain method calls.
328              
329             =head2 C<tail(Int $limit=10)>
330              
331             Output C<$limit> entries from the end of the feed, where C<$limit> defaults to
332             10. If your entries are sorted in standard reverse chronological order, this
333             will pull the C<$limit> oldest entries.
334              
335             Returns the feed pipe itself so that you can chain method calls.
336              
337             =head1 ACCESSOR METHODS
338              
339             B<NOTE: These methods are not filters. They do not return the feed pipe and
340             must not be used in a filter chain (except maybe at the end).>
341              
342             =head2 title
343              
344             Human readable title of the feed. Defaults to "Combined Feed".
345              
346             =head2 id
347              
348             A string conforming to the definition of an Atom ID. Defaults to a newly
349             generated UUID.
350              
351             =head2 updated
352              
353             A DateTime object representing when the feed should claim to have been updated.
354             Defaults to "now".
355              
356             =head1 OTHER METHODS
357              
358             B<NOTE: These methods are not filters. They do not return the feed pipe and
359             must not be used in a filter chain (except maybe at the end).>
360              
361             =head2 C<as_atom_obj>
362              
363             Returns the L<XML::Atom::Feed> object represented by the feed pipe.
364              
365             =head2 C<as_xml>
366              
367             Serialize the feed object to an XML (Atom 1.0) string and return the string.
368             Equivalent to calling C<$pipe-E<gt>as_atom_obj-E<gt>as_xml>. NOTE: The current
369             implementation does not guarantee that the resultant output will be valid Atom.
370             In particular, you are likely to be missing required C<author> and C<link>
371             elements. For the moment, you should use C<as_atom_obj> and manipulate the
372             feed-level elements as needed if you require validatable output.
373              
374             =head2 C<count>
375              
376             Returns the number of entries in the feed.
377              
378             =head2 C<entries>
379              
380             Returns the list of L<XML::Atom::Entry> objects in the feed.
381              
382             =head1 CONTRIBUTE OR COMPLAIN
383              
384             Report bugs via RT, CPAN's request tracker
385             http://rt.cpan.org/NoAuth/Bugs.html?Dist=Feed-Pipe
386              
387             Clone the code from github:
388             git://github.com/veselosky/Feed-Pipe.git
389              
390             Watch Development:
391             http://github.com/veselosky/Feed-Pipe
392              
393             =head1 AUTHOR
394              
395             Vince Veselosky, C<< <vince at control-escape.com> >>
396              
397             =head1 COPYRIGHT & LICENSE
398              
399             Copyright 2009 Vince Veselosky.
400              
401             This program is distributed under the MIT (X11) License:
402             L<http://www.opensource.org/licenses/mit-license.php>
403              
404             Permission is hereby granted, free of charge, to any person
405             obtaining a copy of this software and associated documentation
406             files (the "Software"), to deal in the Software without
407             restriction, including without limitation the rights to use,
408             copy, modify, merge, publish, distribute, sublicense, and/or sell
409             copies of the Software, and to permit persons to whom the
410             Software is furnished to do so, subject to the following
411             conditions:
412              
413             The above copyright notice and this permission notice shall be
414             included in all copies or substantial portions of the Software.
415              
416             THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
417             EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
418             OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
419             NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
420             HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
421             WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
422             FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
423             OTHER DEALINGS IN THE SOFTWARE.
424              
425              
426             =cut