line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $File: //member/autrijus/XML-RSS-Aggregate/lib/XML/RSS/Aggregate.pm $ $Author: autrijus $ |
2
|
|
|
|
|
|
|
# $Revision: #4 $ $Change: 2924 $ $DateTime: 2002/12/25 15:04:33 $ |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package XML::RSS::Aggregate; |
5
|
|
|
|
|
|
|
$XML::RSS::Aggregate::VERSION = '0.02'; |
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
700
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
37
|
|
8
|
1
|
|
|
1
|
|
439
|
use XML::RSS; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use base 'XML::RSS'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Date::Parse; |
12
|
|
|
|
|
|
|
use LWP::Simple 'get'; |
13
|
|
|
|
|
|
|
use HTML::Entities 'encode_entities'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
XML::RSS::Aggregate - RSS Aggregator |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $rss = XML::RSS::Aggregate->new( |
22
|
|
|
|
|
|
|
# parameters for XML::RSS->channel() |
23
|
|
|
|
|
|
|
title => 'Aggregated Examples', |
24
|
|
|
|
|
|
|
link => 'http://blog.elixus.org/', |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# parameters for XML::RSS::Aggregate->aggregate() |
27
|
|
|
|
|
|
|
sources => [ qw( |
28
|
|
|
|
|
|
|
http://one.example.com/index.rdf |
29
|
|
|
|
|
|
|
http://another.example.com/index.rdf |
30
|
|
|
|
|
|
|
http://etc.example.com/index.rdf |
31
|
|
|
|
|
|
|
) ], |
32
|
|
|
|
|
|
|
sort_by => sub { |
33
|
|
|
|
|
|
|
$_[0]->{dc}{subject} # default to sort by dc:date |
34
|
|
|
|
|
|
|
}, |
35
|
|
|
|
|
|
|
uniq_by => sub { |
36
|
|
|
|
|
|
|
$_[0]->{title} # default to uniq by link |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$rss->aggregate( sources => [ ... ] ); # more items |
41
|
|
|
|
|
|
|
$rss->save("all.rdf"); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 DESCRIPTION |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
This module implements a subclass of B, adding a single |
46
|
|
|
|
|
|
|
C method that fetches other RSS feeds and add to the object |
47
|
|
|
|
|
|
|
itself. It handles the proper ordering and duplication removal for |
48
|
|
|
|
|
|
|
aggregated links. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Also, the constructor C is modified to take arguments to pass |
51
|
|
|
|
|
|
|
implicitly to C and C methods. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
All the base methods are still applicable to this module; please see |
54
|
|
|
|
|
|
|
L for details. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=head1 METHODS |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=over 4 |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item aggregate (sources=>\@url, sort_by=>\&func, uniq_by=>\&func) |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
This method fetches all RSS feeds listed in C<@url> and pass their |
63
|
|
|
|
|
|
|
items to the object's C. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
The optional C argument specifies the function to use for |
66
|
|
|
|
|
|
|
ordering RSS items; it defaults to sort them by their C<{dc}{date}> |
67
|
|
|
|
|
|
|
attribute (converted to absolute timestamps), with ties broken by |
68
|
|
|
|
|
|
|
their C<{link}> attribute. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
The optional C argument specifies the function to use for |
71
|
|
|
|
|
|
|
removing duplicate RSS items; it defaults to remove items that has |
72
|
|
|
|
|
|
|
the same C<{link}> value. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub new { |
79
|
|
|
|
|
|
|
my ($class, %args) = @_; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
my $version = delete($args{version}) || '1.0'; |
82
|
|
|
|
|
|
|
my $self = $class->SUPER::new( version => $version ); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
my $sources = delete($args{sources}); |
85
|
|
|
|
|
|
|
my $sort_by = delete($args{sort_by}); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
$self->channel(%args) if %args; |
88
|
|
|
|
|
|
|
$self->aggregate( |
89
|
|
|
|
|
|
|
sources => $sources, |
90
|
|
|
|
|
|
|
sort_by => $sort_by, |
91
|
|
|
|
|
|
|
) if $sources; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
return $self; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub aggregate { |
97
|
|
|
|
|
|
|
my ($self, %args) = @_; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my $sources = $args{sources} or return; |
100
|
|
|
|
|
|
|
my $sort_by = $args{sort_by} || sub { |
101
|
|
|
|
|
|
|
my $date = $_[0]->{dc}{date}; |
102
|
|
|
|
|
|
|
$date =~ s/:(\d\d)$/$1/ if $date; |
103
|
|
|
|
|
|
|
sprintf("%20s", str2time($date)).$_[0]->{link} |
104
|
|
|
|
|
|
|
}; |
105
|
|
|
|
|
|
|
my $uniq_by = $args{uniq_by} || sub { |
106
|
|
|
|
|
|
|
$_[0]->{link} |
107
|
|
|
|
|
|
|
}; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $old_items = $self->{items} || []; |
110
|
|
|
|
|
|
|
$self->{items} = []; |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
my %saw; |
113
|
|
|
|
|
|
|
$self->add_item(%{$_->[0]}) for |
114
|
|
|
|
|
|
|
sort { $b->[1] cmp $a->[1] } |
115
|
|
|
|
|
|
|
grep { $_->[1] } |
116
|
|
|
|
|
|
|
map { [ $_ => scalar($sort_by->($_)) ] } |
117
|
|
|
|
|
|
|
grep { !$saw{$uniq_by->($_)}++ } @{$old_items}, |
118
|
|
|
|
|
|
|
map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_}; $_ } |
119
|
|
|
|
|
|
|
map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{dc}}; $_ } |
120
|
|
|
|
|
|
|
map { encode_entities($_, '&<>') for grep {!ref($_)} values %{$_->{syn}}; $_ } |
121
|
|
|
|
|
|
|
map { encode_entities($_, '&<>') for grep {!ref($_)} @{$_->{taxo}}; $_ } |
122
|
|
|
|
|
|
|
map { eval { (my $rss = XML::RSS->new)->parse(get($_)); @{$rss->{items}} } } |
123
|
|
|
|
|
|
|
grep { /^\w+:/ } @{$sources}; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
return $self; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
1; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 SEE ALSO |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
L |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head1 AUTHORS |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Autrijus Tang Eautrijus@autrijus.orgE |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head1 COPYRIGHT |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Copyright 2002 by Autrijus Tang Eautrijus@autrijus.orgE. |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
143
|
|
|
|
|
|
|
under the same terms as Perl itself. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
See L |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
__END__ |