line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ABSTRACT: match an RSS item which has been seen before |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
247749
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
74
|
|
4
|
3
|
|
|
3
|
|
15
|
use warnings; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
150
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package App::Rssfilter::Match::Duplicates; |
8
|
|
|
|
|
|
|
$App::Rssfilter::Match::Duplicates::VERSION = '0.08'; # TRIAL |
9
|
3
|
|
|
3
|
|
2769
|
use Method::Signatures; |
|
3
|
|
|
|
|
234032
|
|
|
3
|
|
|
|
|
22
|
|
10
|
3
|
|
|
3
|
|
1702
|
use Try::Tiny; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
261
|
|
11
|
3
|
|
|
3
|
|
2723
|
use List::MoreUtils qw< apply >; |
|
3
|
|
|
|
|
36488
|
|
|
3
|
|
|
|
|
21
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
|
14
|
3
|
100
|
|
3
|
|
429313
|
func match ( $item ) { |
|
16
|
100
|
|
16
|
|
5976
|
|
|
15
|
|
|
|
|
27
|
|
|
15
|
|
|
|
|
48
|
|
15
|
3
|
|
|
3
|
|
291
|
use feature 'state'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
731
|
|
16
|
14
|
|
|
|
|
15
|
state %prev; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my @matchables = |
19
|
22
|
|
|
22
|
|
61
|
apply { s/ [?] .* \z //xms } |
20
|
26
|
|
|
|
|
1329
|
grep { $_ ne '' } |
21
|
14
|
|
|
26
|
|
92
|
$item->find( 'guid, link' )->map( sub { $_->text } )->each; |
|
26
|
|
|
|
|
6115
|
|
22
|
|
|
|
|
|
|
|
23
|
14
|
|
|
|
|
231
|
my $res = grep { defined } @prev{ @matchables }; |
|
22
|
|
|
|
|
53
|
|
24
|
14
|
|
|
|
|
45
|
@prev{ @matchables } = ( 1 ) x @matchables; |
25
|
14
|
|
|
|
|
78
|
return 0 < $res; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
1; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
__END__ |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=pod |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=encoding UTF-8 |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 NAME |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
App::Rssfilter::Match::Duplicates - match an RSS item which has been seen before |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 VERSION |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
version 0.08 |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 SYNOPSIS |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
use App::Rssfilter::Match::Duplicates; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
use Mojo::DOM; |
49
|
|
|
|
|
|
|
my $first_rss = Mojo::DOM->new( <<"End_of_RSS" ); |
50
|
|
|
|
|
|
|
<?xml version="1.0" encoding="UTF-8"?> |
51
|
|
|
|
|
|
|
<rss> |
52
|
|
|
|
|
|
|
<channel> |
53
|
|
|
|
|
|
|
<item> |
54
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~6/gu7UEWn8onK/is-typing-tiring-your-toes</link> |
55
|
|
|
|
|
|
|
<description>type with toes for tighter tarsals</description> |
56
|
|
|
|
|
|
|
</item> |
57
|
|
|
|
|
|
|
<item> |
58
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~9/lloek9InU2p/new-planet-discovered-on-far-side-of-sun</link> |
59
|
|
|
|
|
|
|
<description>vulcan is here</description> |
60
|
|
|
|
|
|
|
</item> |
61
|
|
|
|
|
|
|
</channel> |
62
|
|
|
|
|
|
|
</rss> |
63
|
|
|
|
|
|
|
End_of_RSS |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
my $second_rss = Mojo::DOM->new( <<"End_of_RSS" ); |
66
|
|
|
|
|
|
|
<?xml version="1.0" encoding="UTF-8"?> |
67
|
|
|
|
|
|
|
<rss> |
68
|
|
|
|
|
|
|
<channel> |
69
|
|
|
|
|
|
|
<item> |
70
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~3/mnej39gJa9E/new-rocket-to-visit-mars-in-60-days</link> |
71
|
|
|
|
|
|
|
<description>setting a new speed record</description> |
72
|
|
|
|
|
|
|
</item> |
73
|
|
|
|
|
|
|
<item> |
74
|
|
|
|
|
|
|
<link>http://rss.slashdot.org/~r/Slashdot/slashdot/~9/lloek9InU2p/new-planet-discovered-on-far-side-of-sun</link> |
75
|
|
|
|
|
|
|
<description>vulcan is here</description> |
76
|
|
|
|
|
|
|
</item> |
77
|
|
|
|
|
|
|
</channel> |
78
|
|
|
|
|
|
|
</rss> |
79
|
|
|
|
|
|
|
End_of_RSS |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
print "$_\n" for $first_rss->find( 'item' )->grep( \&App::Rssfilter::Match::Duplicates::match ); |
82
|
|
|
|
|
|
|
print "$_\n" for $second_rss->find( 'item' )->grep( \&App::Rssfilter::Match::Duplicates::match ); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# or with an App::Rssfilter::Rule |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
use App::Rssfilter::Rule; |
87
|
|
|
|
|
|
|
my $dupe_rule = App::Rssfilter::Rule->new( |
88
|
|
|
|
|
|
|
condition => 'Duplicates', |
89
|
|
|
|
|
|
|
action => sub { print shift->to_string, "\n" }, |
90
|
|
|
|
|
|
|
); |
91
|
|
|
|
|
|
|
$dupe_rule->constrain( $first_rss ); |
92
|
|
|
|
|
|
|
$dupe_rule->constrain( $second_rss ); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# either way, prints |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# <item> |
97
|
|
|
|
|
|
|
# <link>http://rss.slashdot.org/~r/Slashdot/slashdot/~9/lloek9InU2p/new-planet-discovered-on-far-side-of-sun</link> |
98
|
|
|
|
|
|
|
# <description>vulcan is here</description> |
99
|
|
|
|
|
|
|
# </item> |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 DESCRIPTION |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
This module will match RSS items if either the GUID or link of the item have been seen previously. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head1 FUNCTIONS |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head2 match |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my $item_seen_before = App::Rssfilter::Match::Duplicate::match( $item ); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Returns true if C<$item> has a GUID or link which matches a previously-seen GUID or link. Query strings in links and GUIDs will be ignored for the purposes of matching a previous link. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 SEE ALSO |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=over 4 |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=item * |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
L<App::Rssfilter> |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item * |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
L<App::Rssfilter::Rule> |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=back |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 AUTHOR |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Daniel Holz <dgholz@gmail.com> |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
This software is copyright (c) 2015 by Daniel Holz. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
136
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=cut |