File Coverage

blib/lib/WWW/Noss/FeedReader.pm
Criterion Covered Total %
statement 73 93 78.4
branch 23 40 57.5
condition 9 18 50.0
subroutine 10 11 90.9
pod 1 2 50.0
total 116 164 70.7


line stmt bran cond sub pod time code
1             package WWW::Noss::FeedReader;
2 5     5   2004 use 5.016;
  5         17  
3 5     5   22 use strict;
  5         31  
  5         102  
4 5     5   17 use warnings;
  5         7  
  5         440  
5             our $VERSION = '2.02';
6              
7 5     5   27 use Exporter qw(import);
  5         7  
  5         221  
8             our @EXPORT_OK = qw(read_feed discover_feeds);
9              
10 5     5   4450 use XML::LibXML;
  5         345497  
  5         30  
11              
12 5     5   5657 use WWW::Noss::FeedReader::Atom;
  5         21  
  5         211  
13 5     5   4195 use WWW::Noss::FeedReader::RSS;
  5         39  
  5         239  
14 5     5   32 use WWW::Noss::TextToHtml qw(strip_tags unescape_html);
  5         9  
  5         5340  
15              
16             # What is with difference between 'title' and 'displaytitle'?
17             # Prior to 1.09, there was only title, which served as both the title to use
18             # for internally identifying a post by using it in a post's nossuid, and
19             # also the title to show the user when querying posts. The issue is that if I
20             # ever wanted to change the way noss processed certain titles, this would
21             # interfere with generating posts' nossuids and cause them to be considered
22             # new posts. That is why I introduced displaytitle, so that it could serve
23             # as the human-readable version of a post's title which could be safely
24             # changed without causing issues for existing databases.
25             #
26             # So basically:
27             # title - Internal title used by noss for generating nossuids; should not
28             # change.
29             # displaytitle - Title that will be shown to users; can be changed.
30              
31             sub _title_from_desc {
32              
33 0     0   0 my ($desc) = @_;
34              
35 0 0       0 return '' if not defined $desc;
36              
37 0         0 $desc = unescape_html(strip_tags($desc));
38 0         0 $desc =~ s/\s+/ /g;
39 0         0 $desc =~ s/^\s+|\s+$//g;
40 0         0 my $long = length $desc > 40;
41 0         0 $desc = substr $desc, 0, 40;
42 0         0 $desc =~ s/ $//;
43 0 0       0 $desc .= '...' if $long;
44              
45 0         0 return $desc;
46              
47             }
48              
49             sub read_feed {
50              
51 16     16 1 82 my ($feed) = @_;
52              
53 16         32 my $channel;
54             my $entries;
55              
56 16         23 my $dom = eval { XML::LibXML->load_xml(location => $feed->path) };
  16         71  
57              
58 16 50       8775 if (not defined $dom) {
59 0         0 die sprintf
60             "Failed to parse %s as an XML document, %s might not be an RSS or Atom feed\n",
61             $feed->path,
62             $feed->name;
63             }
64              
65 16 100 33     365 if ($dom->documentElement->nodeName eq 'rss') {
    50          
66 3         38 ($channel, $entries) = WWW::Noss::FeedReader::RSS->read_feed(
67             $feed,
68             $dom
69             );
70             } elsif (
71             $dom->documentElement->nodeName eq 'feed' and
72             $dom->documentElement->getAttribute('xmlns') eq $WWW::Noss::FeedReader::Atom::NS
73             ) {
74 13         306 ($channel, $entries) = WWW::Noss::FeedReader::Atom->read_feed(
75             $feed,
76             $dom
77             );
78             } else {
79 0         0 die sprintf "%s is not an RSS or Atom feed\n", $feed->name;
80             }
81              
82 16 100       558 if (defined $channel->{ description }) {
83 15         444 $channel->{ description } =~ s/\s+/ /g;
84 15         118 $channel->{ description } =~ s/^ | $//g;
85             }
86              
87 16         88 for my $i (0 .. $#$entries) {
88 72 50       149 if (not defined $entries->[$i]{ displaytitle }) {
89 0 0       0 if (defined $entries->[$i]{ summary }) {
    0          
90 0         0 $entries->[$i]{ displaytitle } = _title_from_desc($entries->[$i]{ summary });
91             } elsif (defined $entries->[$i]{ link }) {
92 0         0 $entries->[$i]{ displaytitle } = $entries->[$i]{ link };
93             }
94             }
95 72 100       212 unless ($feed->title_ok($entries->[$i]{ displaytitle })) {
96 9         28 $entries->[$i] = undef;
97 9         20 next;
98             }
99 63 100       156 unless ($feed->content_ok($entries->[$i]{ summary })) {
100 10         30 $entries->[$i] = undef;
101 10         24 next;
102             }
103 53 100       147 unless ($feed->tags_ok($entries->[$i]{ category })) {
104 10         61 $entries->[$i] = undef;
105 10         43 next;
106             }
107             }
108              
109 16         39 @$entries = grep { defined } @$entries;
  72         139  
110              
111 16 50       39 unless (@$entries) {
112 0         0 die sprintf "%s does contain any posts\n", $feed->name;
113             }
114              
115 16 100 66     66 if (defined $feed->limit and $feed->limit < @$entries) {
116 1         4 @$entries = @$entries[-$feed->limit .. -1];
117             }
118              
119 16         47 for my $i (0 .. $#$entries) {
120 40         66 $entries->[$i]{ nossid } = $i + 1;
121 40   33     4121 $entries->[$i]{ author } //= $channel->{ author };
122             $entries->[$i]{ nossuid } =
123             join ";",
124 200   50     482 map { $_ // '' }
125 40         53 @{ $entries->[$i] }{ qw(uid feed title link published) };
  40         112  
126             }
127              
128 16         495 return ($channel, $entries);
129              
130             }
131              
132             sub discover_feeds {
133              
134 1     1 0 3234 my ($html_file) = @_;
135              
136 1         7 my $dom = XML::LibXML->load_html(
137             location => $html_file,
138             recover => 2,
139             suppress_errors => 1,
140             );
141              
142 1         6035 my ($head) = $dom->findnodes('/html/head');
143 1 50       53 if (not defined $head) {
144 0         0 return ();
145             }
146              
147 1         3 my @discovered;
148 1         5 for my $c ($head->childNodes) {
149 5 100       52 next unless $c->isa('XML::LibXML::Element');
150 2 50       13 next unless $c->nodeName eq 'link';
151 2   50     11 my $rel = $c->getAttribute('rel') // '';
152 2 50       34 if ($rel ne 'alternate') {
153 0         0 next;
154             }
155 2   50     9 my $type = $c->getAttribute('type') // '';
156 2 50 66     31 if ($type ne 'application/atom+xml' and $type ne 'application/rss+xml') {
157 0         0 next;
158             }
159 2         9 my $href = $c->getAttribute('href');
160 2 50       25 if (not defined $href) {
161 0         0 next;
162             }
163 2         7 push @discovered, $href;
164             }
165              
166 1         6 return @discovered;
167              
168             }
169              
170             1;
171              
172             =head1 NAME
173              
174             WWW::Noss::FeedReader - RSS/Atom feed reader
175              
176             =head1 USAGE
177              
178             use WWW::Noss::FeedReader qw(read_feed);
179              
180             my ($channel, $entries) = read_feed($feed);
181              
182             =head1 DESCRIPTION
183              
184             B is a module that provides the C
185             subroutine for reading RSS and Atom feeds. This is a private module, please
186             consult the L manual for user documentation.
187              
188             =head1 SUBROUTINES
189              
190             Subroutines are not exported automatically.
191              
192             =over 4
193              
194             =item (\%channel, \@entries) = read_feed($feed)
195              
196             Reads the given L object and returns the channel and
197             entry data. Returns both as C on failure.
198              
199             C<\%channel> should look something like this:
200              
201             {
202             nossname => ...,
203             nosslink => ...,
204             title => ...,
205             link => ...,
206             description => ...,
207             updated => ...,
208             author => ...,
209             category => [ ... ],
210             generator => ...,
211             image => ...,
212             rights => ...,
213             skiphours => [ ... ],
214             skipdays => [ ... ],
215             }
216              
217             C<\@entries> will be a list of hash refs that look something like this:
218              
219             {
220             nossid => ...,
221             status => ...,
222             feed => ...,
223             title => ...,
224             link => ...,
225             author => ...,
226             category => [ ... ],
227             summary => ...,
228             published => ...,
229             updated => ...,
230             uid => ...,
231             displaytitle => ...,
232             }
233              
234             =back
235              
236             =head1 AUTHOR
237              
238             Written by Samuel Young, Esamyoung12788@gmail.comE.
239              
240             This project's source can be found on its
241             L. Comments and pull
242             requests are welcome!
243              
244             =head1 COPYRIGHT
245              
246             Copyright (C) 2025-2026 Samuel Young
247              
248             This program is free software: you can redistribute it and/or modify
249             it under the terms of the GNU General Public License as published by
250             the Free Software Foundation, either version 3 of the License, or
251             (at your option) any later version.
252              
253             =head1 SEE ALSO
254              
255             L, L
256              
257             =cut
258              
259             # vim: expandtab shiftwidth=4