line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ABSTRACT: match an RSS item which has been seen before |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
587629
|
use strict; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
122
|
|
4
|
3
|
|
|
3
|
|
17
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
224
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package App::Rssfilter::Match::Duplicates; |
8
|
|
|
|
|
|
|
{ |
9
|
|
|
|
|
|
|
$App::Rssfilter::Match::Duplicates::VERSION = '0.07'; |
10
|
|
|
|
|
|
|
} |
11
|
3
|
|
|
3
|
|
3568
|
use Method::Signatures; |
|
3
|
|
|
|
|
514066
|
|
|
3
|
|
|
|
|
28
|
|
12
|
3
|
|
|
3
|
|
4254
|
use Try::Tiny; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
253
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
3
|
100
|
|
3
|
|
1078186
|
func match ( $item ) { |
|
16
|
100
|
|
16
|
|
10230
|
|
|
15
|
|
|
|
|
95
|
|
|
15
|
|
|
|
|
47
|
|
16
|
3
|
|
|
3
|
|
386
|
use feature 'state'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
812
|
|
17
|
14
|
|
|
|
|
19
|
state %prev; |
18
|
|
|
|
|
|
|
|
19
|
22
|
|
|
|
|
49
|
my @matchables = |
20
|
22
|
|
|
|
|
58
|
map { s/ [?] .* \z //xms; $_ } |
|
26
|
|
|
|
|
14522
|
|
21
|
14
|
|
|
|
|
66
|
grep { $_ ne '' } |
22
|
|
|
|
|
|
|
$item->find( 'guid, link' )->pluck( 'text' )->each; |
23
|
|
|
|
|
|
|
|
24
|
14
|
|
|
|
|
186
|
my $res = grep { defined } @prev{ @matchables }; |
|
22
|
|
|
|
|
60
|
|
25
|
14
|
|
|
|
|
48
|
@prev{ @matchables } = ( 1 ) x @matchables; |
26
|
14
|
|
|
|
|
95
|
return 0 < $res; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
1; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
__END__ |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=pod |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=encoding UTF-8 |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 NAME |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
App::Rssfilter::Match::Duplicates - match an RSS item which has been seen before |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 VERSION |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
version 0.07 |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 SYNOPSIS |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
use App::Rssfilter::Match::Duplicates; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
use Mojo::DOM; |
50
|
|
|
|
|
|
|
my $first_rss = Mojo::DOM->new( <<"End_of_RSS" ); |
51
|
|
|
|
|
|
|
<?xml version="1.0" encoding="UTF-8"?> |
52
|
|
|
|
|
|
|
<rss> |
53
|
|
|
|
|
|
|
<channel> |
54
|
|
|
|
|
|
|
<item> |
55
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~6/gu7UEWn8onK/is-typing-tiring-your-toes</link> |
56
|
|
|
|
|
|
|
<description>type with toes for tighter tarsals</description> |
57
|
|
|
|
|
|
|
</item> |
58
|
|
|
|
|
|
|
<item> |
59
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~9/lloek9InU2p/new-planet-discovered-on-far-side-of-sun</link> |
60
|
|
|
|
|
|
|
<description>vulcan is here</description> |
61
|
|
|
|
|
|
|
</item> |
62
|
|
|
|
|
|
|
</channel> |
63
|
|
|
|
|
|
|
</rss> |
64
|
|
|
|
|
|
|
End_of_RSS |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my $second_rss = Mojo::DOM->new( <<"End_of_RSS" ); |
67
|
|
|
|
|
|
|
<?xml version="1.0" encoding="UTF-8"?> |
68
|
|
|
|
|
|
|
<rss> |
69
|
|
|
|
|
|
|
<channel> |
70
|
|
|
|
|
|
|
<item> |
71
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~3/mnej39gJa9E/new-rocket-to-visit-mars-in-60-days</link> |
72
|
|
|
|
|
|
|
<description>setting a new speed record</description> |
73
|
|
|
|
|
|
|
</item> |
74
|
|
|
|
|
|
|
<item> |
75
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~9/lloek9InU2p/new-planet-discovered-on-far-side-of-sun</link> |
76
|
|
|
|
|
|
|
<description>vulcan is here</description> |
77
|
|
|
|
|
|
|
</item> |
78
|
|
|
|
|
|
|
</channel> |
79
|
|
|
|
|
|
|
</rss> |
80
|
|
|
|
|
|
|
End_of_RSS |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
print "$_\n" for $first_rss->find( 'item' )->grep( \&App::Rssfilter::Match::Duplicates::match ); |
83
|
|
|
|
|
|
|
print "$_\n" for $second_rss->find( 'item' )->grep( \&App::Rssfilter::Match::Duplicates::match ); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# or with an App::Rssfilter::Rule |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
use App::Rssfilter::Rule; |
88
|
|
|
|
|
|
|
my $dupe_rule = App::Rssfilter::Rule->new( |
89
|
|
|
|
|
|
|
condition => 'Duplicates', |
90
|
|
|
|
|
|
|
action => sub { print shift->to_xml, "\n" }, |
91
|
|
|
|
|
|
|
); |
92
|
|
|
|
|
|
|
$dupe_rule->constrain( $first_rss ); |
93
|
|
|
|
|
|
|
$dupe_rule->constrain( $second_rss ); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# either way, prints |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# <item> |
98
|
|
|
|
|
|
|
# <link>http://rss.slashdot.org/~r/Slashdot/slashdot/~9/lloek9InU2p/new-planet-discovered-on-far-side-of-sun</link> |
99
|
|
|
|
|
|
|
# <description>vulcan is here</description> |
100
|
|
|
|
|
|
|
# </item> |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head1 DESCRIPTION |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
This module will match RSS items if either the GUID or link of the item have been seen previously. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 FUNCTIONS |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 match |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
my $item_seen_before = App::Rssfilter::Match::Duplicate::match( $item ); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Returns true if C<$item> has a GUID or link which matches a previously-seen GUID or link. Query strings in links and GUIDs will be ignored for the purposes of matching a previous link. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 SEE ALSO |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=over 4 |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item * |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
L<App::Rssfilter> |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item * |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
L<App::Rssfilter::Rule> |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=back |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head1 AUTHOR |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Daniel Holz <dgholz@gmail.com> |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
This software is copyright (c) 2013 by Daniel Holz. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
137
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |