File Coverage

blib/lib/WWW/Noss/FeedReader.pm

Criterion	Covered	Total	%
statement	73	93	78.4
branch	23	40	57.5
condition	9	18	50.0
subroutine	10	11	90.9
pod	1	2	50.0
total	116	164	70.7

line	stmt	bran	cond	sub	pod	time	code
1							package WWW::Noss::FeedReader;
2	5			5		2004	use 5.016;
	5					17
3	5			5		22	use strict;
	5					31
	5					102
4	5			5		17	use warnings;
	5					7
	5					440
5							our $VERSION = '2.02';
6
7	5			5		27	use Exporter qw(import);
	5					7
	5					221
8							our @EXPORT_OK = qw(read_feed discover_feeds);
9
10	5			5		4450	use XML::LibXML;
	5					345497
	5					30
11
12	5			5		5657	use WWW::Noss::FeedReader::Atom;
	5					21
	5					211
13	5			5		4195	use WWW::Noss::FeedReader::RSS;
	5					39
	5					239
14	5			5		32	use WWW::Noss::TextToHtml qw(strip_tags unescape_html);
	5					9
	5					5340
15
16							# What is with difference between 'title' and 'displaytitle'?
17							# Prior to 1.09, there was only title, which served as both the title to use
18							# for internally identifying a post by using it in a post's nossuid, and
19							# also the title to show the user when querying posts. The issue is that if I
20							# ever wanted to change the way noss processed certain titles, this would
21							# interfere with generating posts' nossuids and cause them to be considered
22							# new posts. That is why I introduced displaytitle, so that it could serve
23							# as the human-readable version of a post's title which could be safely
24							# changed without causing issues for existing databases.
25							#
26							# So basically:
27							# title - Internal title used by noss for generating nossuids; should not
28							# change.
29							# displaytitle - Title that will be shown to users; can be changed.
30
31							sub _title_from_desc {
32
33	0			0		0	my ($desc) = @_;
34
35	0	0				0	return '' if not defined $desc;
36
37	0					0	$desc = unescape_html(strip_tags($desc));
38	0					0	$desc =~ s/\s+/ /g;
39	0					0	$desc =~ s/^\s+\|\s+$//g;
40	0					0	my $long = length $desc > 40;
41	0					0	$desc = substr $desc, 0, 40;
42	0					0	$desc =~ s/ $//;
43	0	0				0	$desc .= '...' if $long;
44
45	0					0	return $desc;
46
47							}
48
49							sub read_feed {
50
51	16			16	1	82	my ($feed) = @_;
52
53	16					32	my $channel;
54							my $entries;
55
56	16					23	my $dom = eval { XML::LibXML->load_xml(location => $feed->path) };
	16					71
57
58	16	50				8775	if (not defined $dom) {
59	0					0	die sprintf
60							"Failed to parse %s as an XML document, %s might not be an RSS or Atom feed\n",
61							$feed->path,
62							$feed->name;
63							}
64
65	16	100	33			365	if ($dom->documentElement->nodeName eq 'rss') {
		50
66	3					38	($channel, $entries) = WWW::Noss::FeedReader::RSS->read_feed(
67							$feed,
68							$dom
69							);
70							} elsif (
71							$dom->documentElement->nodeName eq 'feed' and
72							$dom->documentElement->getAttribute('xmlns') eq $WWW::Noss::FeedReader::Atom::NS
73							) {
74	13					306	($channel, $entries) = WWW::Noss::FeedReader::Atom->read_feed(
75							$feed,
76							$dom
77							);
78							} else {
79	0					0	die sprintf "%s is not an RSS or Atom feed\n", $feed->name;
80							}
81
82	16	100				558	if (defined $channel->{ description }) {
83	15					444	$channel->{ description } =~ s/\s+/ /g;
84	15					118	$channel->{ description } =~ s/^ \| $//g;
85							}
86
87	16					88	for my $i (0 .. $#$entries) {
88	72	50				149	if (not defined $entries->[$i]{ displaytitle }) {
89	0	0				0	if (defined $entries->[$i]{ summary }) {
		0
90	0					0	$entries->[$i]{ displaytitle } = _title_from_desc($entries->[$i]{ summary });
91							} elsif (defined $entries->[$i]{ link }) {
92	0					0	$entries->[$i]{ displaytitle } = $entries->[$i]{ link };
93							}
94							}
95	72	100				212	unless ($feed->title_ok($entries->[$i]{ displaytitle })) {
96	9					28	$entries->[$i] = undef;
97	9					20	next;
98							}
99	63	100				156	unless ($feed->content_ok($entries->[$i]{ summary })) {
100	10					30	$entries->[$i] = undef;
101	10					24	next;
102							}
103	53	100				147	unless ($feed->tags_ok($entries->[$i]{ category })) {
104	10					61	$entries->[$i] = undef;
105	10					43	next;
106							}
107							}
108
109	16					39	@$entries = grep { defined } @$entries;
	72					139
110
111	16	50				39	unless (@$entries) {
112	0					0	die sprintf "%s does contain any posts\n", $feed->name;
113							}
114
115	16	100	66			66	if (defined $feed->limit and $feed->limit < @$entries) {
116	1					4	@$entries = @$entries[-$feed->limit .. -1];
117							}
118
119	16					47	for my $i (0 .. $#$entries) {
120	40					66	$entries->[$i]{ nossid } = $i + 1;
121	40		33			4121	$entries->[$i]{ author } //= $channel->{ author };
122							$entries->[$i]{ nossuid } =
123							join ";",
124	200		50			482	map { $_ // '' }
125	40					53	@{ $entries->[$i] }{ qw(uid feed title link published) };
	40					112
126							}
127
128	16					495	return ($channel, $entries);
129
130							}
131
132							sub discover_feeds {
133
134	1			1	0	3234	my ($html_file) = @_;
135
136	1					7	my $dom = XML::LibXML->load_html(
137							location => $html_file,
138							recover => 2,
139							suppress_errors => 1,
140							);
141
142	1					6035	my ($head) = $dom->findnodes('/html/head');
143	1	50				53	if (not defined $head) {
144	0					0	return ();
145							}
146
147	1					3	my @discovered;
148	1					5	for my $c ($head->childNodes) {
149	5	100				52	next unless $c->isa('XML::LibXML::Element');
150	2	50				13	next unless $c->nodeName eq 'link';
151	2		50			11	my $rel = $c->getAttribute('rel') // '';
152	2	50				34	if ($rel ne 'alternate') {
153	0					0	next;
154							}
155	2		50			9	my $type = $c->getAttribute('type') // '';
156	2	50	66			31	if ($type ne 'application/atom+xml' and $type ne 'application/rss+xml') {
157	0					0	next;
158							}
159	2					9	my $href = $c->getAttribute('href');
160	2	50				25	if (not defined $href) {
161	0					0	next;
162							}
163	2					7	push @discovered, $href;
164							}
165
166	1					6	return @discovered;
167
168							}
169
170							1;
171
172							=head1 NAME
173
174							WWW::Noss::FeedReader - RSS/Atom feed reader
175
176							=head1 USAGE
177
178							use WWW::Noss::FeedReader qw(read_feed);
179
180							my ($channel, $entries) = read_feed($feed);
181
182							=head1 DESCRIPTION
183
184							B is a module that provides the C
185							subroutine for reading RSS and Atom feeds. This is a private module, please
186							consult the L manual for user documentation.
187
188							=head1 SUBROUTINES
189
190							Subroutines are not exported automatically.
191
192							=over 4
193
194							=item (\%channel, \@entries) = read_feed($feed)
195
196							Reads the given L object and returns the channel and
197							entry data. Returns both as C on failure.
198
199							C<\%channel> should look something like this:
200
201							{
202							nossname => ...,
203							nosslink => ...,
204							title => ...,
205							link => ...,
206							description => ...,
207							updated => ...,
208							author => ...,
209							category => [ ... ],
210							generator => ...,
211							image => ...,
212							rights => ...,
213							skiphours => [ ... ],
214							skipdays => [ ... ],
215							}
216
217							C<\@entries> will be a list of hash refs that look something like this:
218
219							{
220							nossid => ...,
221							status => ...,
222							feed => ...,
223							title => ...,
224							link => ...,
225							author => ...,
226							category => [ ... ],
227							summary => ...,
228							published => ...,
229							updated => ...,
230							uid => ...,
231							displaytitle => ...,
232							}
233
234							=back
235
236							=head1 AUTHOR
237
238							Written by Samuel Young, Esamyoung12788@gmail.comE.
239
240							This project's source can be found on its
241							L. Comments and pull
242							requests are welcome!
243
244							=head1 COPYRIGHT
245
246							Copyright (C) 2025-2026 Samuel Young
247
248							This program is free software: you can redistribute it and/or modify
249							it under the terms of the GNU General Public License as published by
250							the Free Software Foundation, either version 3 of the License, or
251							(at your option) any later version.
252
253							=head1 SEE ALSO
254
255							L, L
256
257							=cut
258
259							# vim: expandtab shiftwidth=4