File Coverage

script/moku-pona

Criterion	Covered	Total	%
statement	250	319	78.3
branch	81	160	50.6
condition	35	80	43.7
subroutine	26	33	78.7
pod			n/a
total	392	592	66.2

line	stmt	bran	cond	sub	time	code
1						#!/home/alex/perl5/perlbrew/perls/perl-5.32.0/bin/perl
2						# Copyright (C) 2018–2021 Alex Schroeder
3
4						# This program is free software: you can redistribute it and/or modify it under
5						# the terms of the GNU General Public License as published by the Free Software
6						# Foundation, either version 3 of the License, or (at your option) any later
7						# version.
8						#
9						# This program is distributed in the hope that it will be useful, but WITHOUT
10						# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11						# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12						#
13						# You should have received a copy of the GNU General Public License along with
14						# this program. If not, see .
15
16						=encoding utf8
17
18						=head1 Moku Pona
19
20						Moku Pona is a Gemini based feed reader. It can monitor URLs to feeds or regular
21						pages for changes and keeps and updated list of these in a Gemini list. Moku
22						Pona knows how to fetch Gopher URLs, Gemini URLs, and regular web URLs.
23
24						You manage your subscriptions using the command-line, with Moku Pona.
25
26						You serve the resulting file using a Gemini server.
27
28						You read it all using your Gemini client.
29
30						=head2 Limitations
31
32						When Moku Pona isn't watching a feed it can only detect changes on a page. Thus,
33						if there is an item that points to a phlog or blog, that's great. Sometimes
34						people put their phlog in a folder per year. If the Gopher menu lists each
35						folder and a date with the latest change, then that's great, you can use it.
36						Without it, you're in trouble: you need to subscribe to the item for the current
37						year in order to see changes, but when the next year comes around, you're
38						subscribed to the wrong item. Sometimes you're lucky and there will be a menu
39						somewhere with a timestamp for the last change. Use that instead. Good luck!
40
41						=head2 License
42
43						GNU Affero General Public License
44
45						=head2 Installation
46
47						Using C:
48
49						cpan App::mokupona
50
51						Manual install:
52
53						perl Makefile.PL
54						make
55						make install
56
57						=head2 Dependencies
58
59						There are some Perl dependencies you need to satisfy in order to run this
60						program:
61
62						=over
63
64						=item L, or C
65
66						=item L, or C
67
68						=item L, or C
69
70						=item L, or C
71
72						=item L, or c
73
74						=back
75
76						=cut
77
78	9			9	11043	use Modern::Perl '2018';
	9				38961
	9				94
79	9			9	7149	use File::Copy qw(copy);
	9				19686
	9				1094
80	9			9	5034	use Encode::Locale qw(decode_argv);
	9				98874
	9				633
81	9			9	68	use Encode qw(decode_utf8);
	9				34
	9				553
82	9			9	4031	use Mojo::IOLoop;
	9				2816292
	9				116
83	9			9	6908	use XML::LibXML;
	9				294034
	9				61
84	9			9	5799	use URI::Escape;
	9				14820
	9				923
85	9			9	84	use List::Util qw(none);
	9				19
	9				52047
86
87						decode_argv();
88
89						if (-t) {
90						binmode(STDIN, ":encoding(console_in)");
91						binmode(STDOUT, ":encoding(console_out)");
92						binmode(STDERR, ":encoding(console_out)");
93						}
94
95						=head2 The Data Directory
96
97						Moku Pona keeps the list of URLs you are subscribed to in directory. It's
98						probably C<~/.moku-pona> on your system.
99
100						=over
101
102						=item If you have the C environment variable set, then that's your data
103						directory.
104
105						=item If you have the C environment variable set, then your data
106						directory is F<$XDG_DATA_HOME/moku-pona>.
107
108						=item If you you have the C environment variable set, and you have a
109						F<$HOME/.local> directory, then your data directory is
110						F<$HOME/.local/moku-pona>.
111
112						=item If you have the C environment variable set, then your data directory
113						is F<$HOME/.moku-pona>.
114
115						=item If you have the C environment variable set (Windows), then your
116						data directory is F<$APPDATA/moku-pona>.
117
118						=item The last option is to have the C environment variable set.
119
120						=back
121
122						The data directory contains a copy of the latest resources. The names of these
123						cache files are simply the URL with all the slashes replaced by a hyphen.
124
125						=cut
126
127						our $data_dir = data_dir();
128						# say "Using $data_dir";
129
130						sub data_dir {
131	9	50		9	42	return $ENV{MOKU_PONA} if $ENV{MOKU_PONA};
132						# find an existing directory
133	9	50	33		40	return $ENV{XDG_DATA_HOME} . '/moku-pona' if $ENV{XDG_DATA_HOME} and -d $ENV{XDG_DATA_HOME} . '/moku-pona';
134	9	50	33		189	return $ENV{HOME} . '/.local/moku-pona' if $ENV{HOME} and -d $ENV{HOME} . '/.local/moku-pona';
135	9	50	33		182	return $ENV{HOME} . '/.moku-pona' if $ENV{HOME} and -d $ENV{HOME} . '/.moku-pona';
136	9	50	33		80	return $ENV{APPDATA} . '/moku-pona' if $ENV{APPDATA} and -d $ENV{APPDATA} . '/.moku-pona';
137	9	50	33		38	return $ENV{LOGDIR} . '/.moku-pona' if $ENV{LOGDIR} and -d $ENV{LOGDIR} . '/.moku-pona';
138						# or use a new one
139	9	50			30	return $ENV{XDG_DATA_HOME} . '/moku-pona' if $ENV{XDG_DATA_HOME};
140	9	50	33		108	return $ENV{HOME} . '/.local/moku-pona' if $ENV{HOME} and -d $ENV{HOME} . '/.local';
141	9	50			71	return $ENV{HOME} . '/.moku-pona' if $ENV{HOME};
142	0	0			0	return $ENV{APPDATA} . '/moku-pona' if $ENV{APPDATA};
143	0	0			0	return $ENV{LOGDIR} . '/.moku-pona' if $ENV{LOGDIR};
144	0				0	die "Please set the MOKU_PONA environment variable to a directory name\n";
145						}
146
147						=pod
148
149						The C file is a file containing a gemtext list of links, i.e. entries
150						such as these:
151
152						=> gemini://alexschroeder.ch Alex Schroeder
153
154						=cut
155
156						our $site_list = $data_dir . '/sites.txt';
157
158						=pod
159
160						The C file is a file containing a gemtext list of links based on
161						C, but with a timestamp of their last change, and with new updates
162						moved to the top. The ISO date is simply inserted after the URL:
163
164						=> gemini://alexschroeder.ch 2020-11-07 Alex Schroeder
165
166						=cut
167
168						our $updated_list = $data_dir . '/updates.txt';
169
170						=pod
171
172						In order to be at least somewhat backwards compatible with Moku Pona versions
173						1.1 and earlier, C may contain Gopher menu items. These are converted
174						to Gemini URLs during processing and thus the C file still contains
175						regular gemtext.
176
177						1Alex Schroeder ⭾ ⭾ alexschroeder.ch ⭾ 70
178
179						=cut
180
181						sub convert {
182	27			27	138	for (@_) {
183	49	100			236	next if /^=> /; # is already a gemini link
184	17				127	my ($type, $desc, $selector, $host, $port) = /^([^\t])([^\t])\t([^\t])\t([^\t])\t([^\t\r])/;
185	17	100	66		70	if ($host and $port) {
186	13		50		27	$port //= 0;
187	13				55	$_ = "=> gopher://$host:$port/$type$selector $desc";
188						}
189						}
190	27				155	return @_;
191						}
192
193						=pod
194
195						As was said above, however, the recommended format is the use of URLs. Moku Pona
196						supports Gemini, Gopher, and the web (gemini, gopher, gophers, http, and https
197						schemes).
198
199						=cut
200
201						sub query_gemini {
202	0			0	0	my $url = shift;
203	0				0	my $responses = shift;
204	0				0	my($scheme, $authority, $path, $query, $fragment) =
205						$url =~ m\|(?:([^:/?#]+):)?(?://([^/?#]))?([^?#])(?:\?([^#]))?(?:#(.))?\|;
206	0	0	0		0	die "⚠ The URL '$url' must use the gemini scheme\n" unless $scheme and $scheme eq 'gemini';
207	0	0			0	die "⚠ The URL '$url' must have an authority\n" unless $authority;
208	0				0	my ($host, $port) = split(/:/, $authority, 2);
209	0		0		0	$port //= 1965;
210						Mojo::IOLoop->client(
211						{port => $port, address => $host, tls => 1,
212						tls_options => { SSL_verify_mode => 0x00 }, timeout => 120}
213						=> sub {
214	0			0	0	my ($loop, $err, $stream) = @_;
215	0	0			0	if ($err) {
216	0				0	warn "Cannot connect to $url: $err\n";
217	0				0	return;
218						}
219	0				0	$stream->timeout(300); # 5 min
220	0				0	my $header;
221						$stream->on(read => sub {
222	0				0	my ($stream, $bytes) = @_;
223	0				0	$responses->{$url} .= $bytes;
224	0	0			0	$header = $responses->{$url} =~ s/^.*\r\n// unless $header});
	0				0
225	0				0	$stream->write("$url\r\n")})
226	0				0	}
227
228						sub query_gopher {
229	12			12	15	my $url = shift;
230	12				18	my $responses = shift;
231	12				17	my ($selector, $host, $port) = url_to_gopher($url);
232	12				30	my $tls = $url =~ /^gophers/;
233						Mojo::IOLoop->client(
234						{port => $port, address => $host, tls => $tls, timeout => 120 }
235						=> sub {
236	12			12	13901	my ($loop, $err, $stream) = @_;
237	12	50			32	if ($err) {
238	0				0	warn "Cannot connect to $url: $err\n";
239	0				0	return;
240						}
241	12				31	$stream->timeout(300); # 5 min
242						$stream->on(
243						read => sub {
244	12				11087	my ($stream, $bytes) = @_;
245	12				406	$responses->{$url} .= $bytes});
	12				54
246	12				105	$stream->write("$selector\r\n")})
247	12				155	}
248
249						sub url_to_gopher {
250	12			12	15	my $url = shift;
251	12		33		42	my $name = shift\|\|$url;
252	12				76	my($scheme, $authority, $path, $query, $fragment) =
253						$url =~ m\|(?:([^:/?#]+):)?(?://([^/?#]))?([^?#])(?:\?([^#]))?(?:#(.))?\|;
254	12		50		26	$scheme \|\|= "gopher";
255	12	50			39	return unless $scheme =~ /^gophers?$/;
256	12	50			54	$path = substr($path, 1) if substr($path, 0, 1) eq "/";
257	12	50			29	my $type = $path ? substr($path, 0, 1) : "1";
258	12	50			33	my $selector = $path ? substr($path, 1) : "";
259	12				45	my ($host, $port) = split(/:/, $authority, 2);
260	12		50		51	return ($selector, $host, $port\|\|70);
261						}
262
263						sub query_web {
264	0			0	0	my $url = shift;
265	0				0	my $responses = shift;
266	0				0	my($scheme, $authority, $path, $query, $fragment) =
267						$url =~ m\|(?:([^:/?#]+):)?(?://([^/?#]))?([^?#])(?:\?([^#]))?(?:#(.))?\|;
268	0				0	my $tls = $scheme eq "https";
269	0				0	my ($host, $port) = split(/:/, $authority, 2);
270	0		0		0	$path \|\|= "/";
271	0				0	my $selector = $path;
272	0	0			0	$selector .= "?" . $query if $query;
273						# ignore the fragment
274	0	0	0		0	$port \|\|= $tls ? 443 : 80;
275						Mojo::IOLoop->client(
276						{port => $port, address => $host, tls => $tls, timeout => 120 }
277						=> sub {
278	0			0	0	my ($loop, $err, $stream) = @_;
279	0				0	my $header;
280						$stream->on(read => sub {
281	0				0	my ($stream, $bytes) = @_;
282	0				0	$responses->{$url} .= $bytes;
283	0	0			0	$header = $responses->{$url} =~ s/^.*\r\n\r\n//s unless $header});
	0				0
284	0				0	$stream->write("GET $selector HTTP/1.0\r\n"
285						. "Host: $host:$port\r\n"
286						. "User-Agent: moku-pona\r\n"
287	0				0	. "\r\n")});
288						}
289
290						=head2 Migration from 1.1
291
292						The best way to migrate your setup is probably to use the C subcommand
293						explained later, and to recreate your list of subscriptions. Then your
294						C file will use gemtext format.
295
296						moku-pona list \| grep "moku-pona add" > commands
297						mv ~/.moku-pona/sites.txt ~/.moku-pona/sites.txt~
298						sh commands
299
300						=cut
301
302						sub load_site {
303	30			30	1245	my $file = $site_list;
304	30	100			496	return [] if not -f $file;
305	22	50			796	open(my $fh, "<:encoding(UTF-8)", $file) or die "Cannot read $file: $!\n";
306	22				3165	my @lines = <$fh>;
307	22				496	chomp(@lines);
308	22				52	@lines = grep(/^=> /, convert(@lines)); # from gopher
309	22				371	return \@lines;
310						}
311
312						sub load_file {
313	20			20	3035	my $file = shift;
314	20	100			335	return "" if not -f $file;
315	17	50			693	open(my $fh, "<:encoding(UTF-8)", $file)
316						or die "Cannot read $file: $!\n";
317	17				1162	local $/ = undef;
318	17				470	return <$fh>;
319						}
320
321						sub save_file {
322	24			24	2022	my $file = shift;
323	24				31	my $data = shift;
324	24	100			437	mkdir $data_dir unless -d $data_dir;
325	4	50		4	27	open(my $fh, ">:encoding(UTF-8)", $file)
	4				7
	4				29
	24				1462
326						or die "Cannot write $file: $!\n";
327	24				8596	print $fh $data;
328						}
329
330						=head2 List your subscriptions
331
332						moku-pona list [names...]
333
334						This lists all your current subscriptions in a format that is suitable for a
335						shell script. Optionally, only list a subset of the lines. All lines are matched
336						against the regular expressions you provide and are only listed if there is at
337						least one match, if you provided any.
338
339						Example:
340
341						moku-pona list alex
342
343						In this particular case, since I'm testing my own server, the result would be:
344
345						moku-pona add https://alexschroeder.ch/wiki?action=rss "rss"
346						moku-pona add gemini://alexschroeder.ch/ "gemini"
347						moku-pona add gopher://alexschroeder.ch/ "gopher"
348						moku-pona add gophers://alexschroeder.ch:7443/ "gophers"
349
350						=cut
351
352						sub do_list {
353	1			1	1508	my @args = @_;
354	1				19	my $site = load_site();
355	1	50			11	if (@args) {
356	0				0	print("Subscribed items in $site_list matching @args:\n");
357						} else {
358	1				6	print("Subscribed items in $site_list:\n");
359						}
360	1	50			3	print("none\n") unless @$site;
361	1				3	for my $line (@$site) {
362	2	50	33	0	6	next if @args and none { $line =~ /$_/ } @args;
	0				0
363						# skip item type
364	2				14	my ($uri, $name) = $line =~ /^=> (\S+)\s+(.*)/;
365	2				10	print(qq{moku-pona add $uri "$name"\n});
366						}
367						}
368
369						=head2 Add a subscription
370
371						moku-pona add url [description]
372
373						This adds a URL to the list of subscribed items. If the target is an Atom or RSS
374						feed, then that's also supported. You can provide an optional description for
375						this URL. If you don't provide a description, the URL will be used as the item's
376						description.
377
378						Example:
379
380						moku-pona add gemini://alexschroeder.ch kensanata
381
382						=cut
383
384						sub do_add {
385	8			8	1313	my $uri = shift;
386	8				20	my $name = shift;
387	8		33		23	$name \|\|= $uri;
388	8				21	my $line = "=> $uri $name";
389	8				16	my $site = load_site();
390	8				26	my $uri_re = quotemeta($uri);
391	8				17	my $name_re = quotemeta($name);
392	8	50			170	if (grep(/^=> $uri_re /, @$site)) {
		50
393	0				0	warn("$uri already exists in $site_list\n");
394						} elsif (grep(/^=> \S+ $name_re$/, @$site)) {
395	0				0	warn("$name already exists in $site_list\n");
396						} else {
397	8				19	push(@$site, $line);
398						}
399	8				30	save_file($site_list, join("\n", @$site, ""));
400						}
401
402						=head2 Remove a subscription
403
404						moku-pona remove description
405
406						This removes one or more URLs from the list of subscribed items.
407
408						Example:
409
410						moku-pona remove kensanata
411
412						=cut
413
414						sub do_remove {
415	2			2	1603	my @args = @_;
416	2				7	my $site = load_site();
417	2				6	my $count = 0;
418	2				4	my $i = 0;
419	2		66		15	while (@args and $i < @$site) {
420	4				8	my $line = $site->[$i];
421	4				26	my ($uri, $name) = $line =~ /^=> (\S+)\s+(.*)/;
422	4				7	my $found = 0;
423	4				6	my $j = 0;
424	4				10	while ($j < @args) {
425	4	100			11	if ($name eq $args[$j]) {
426	2				3	$count++;
427	2				3	$found = 1;
428	2				6	splice(@$site, $i, 1); # remove the site found
429	2				4	splice(@args, $j, 1); # remove the arg found
430						} else {
431	2				5	$j++;
432						}
433						}
434	4	100			16	$i++ unless $found;
435						}
436	2	50			6	if ($count) {
437	2	50			108	printf("Removed %d %s\n", $count,
438						$count == 1 ? "subscription" : "subscriptions");
439	2				18	save_file($site_list, join("\n", @$site, ""));
440						} else {
441	0				0	warn("No subscriptions matching @args found\n");
442	0				0	warn("Use moku-pona list to find the correct descriptions.\n");
443						}
444						}
445
446						=head2 Clean up the data directory
447
448						moku-pona cleanup [--confirm]
449
450						When Moku Pona updates, copies of the URL targets are saved in the data
451						directory. If you remove a subscription (see above), that leaves a cache file in
452						the data directory that is no longer used – and it leaves an entry in
453						C that is no longer wanted. The cleanup command fixes this. It
454						deletes all the cached pages that you are no longer subscribed to, and it
455						removes those entries from C as well.
456
457						Actually, just to be sure, if you run it without the C<--confirm> argument, it
458						simply prints which files it would trash. Rerun it with the C<--confirm>
459						argument to actually do it.
460
461						Example:
462
463						moku-pona cleanup
464
465						=cut
466
467						sub do_cleanup {
468	2		100	2	2901	my $confirm = shift\|\|'' eq '--confirm';
469	2				4	my $todo = 0;
470						# get a hash map telling us the cache files we expect based on our sites
471	2				4	my $site = load_site();
472						my %caches = map {
473	2				7	my ($uri, $name) = /^=> (\S+)\s+(.*)/;
	2				16
474	2				15	$uri =~ s/[\/:]/-/g;
475	2				14	"$data_dir/$uri" => 1;
476						} @$site;
477						# get a list of text files in the directory
478	2	50			85	opendir(my $dh, $data_dir) or die "Cannot read $data_dir: $!\n";
479	2				60	my @files = map { "$data_dir/$_" } grep { /^[^.]/ } readdir($dh);
	8				18
	12				34
480	2				24	closedir($dh);
481						# remove unnecessary cache files
482	2				6	for my $file (@files) {
483	8	100			18	next if $file eq $site_list;
484	6	100			12	next if $file eq $updated_list;
485	4	100			10	next if $caches{$file};
486	2	100			4	if ($confirm) {
487	1				40	unlink $file;
488						} else {
489	1				6	print "trash $file\n";
490	1				2	$todo++;
491						}
492						}
493						# check updates list
494	2	50			26	if (-f $updated_list) {
495	2	50			65	open(my $fh, "<:encoding(UTF-8)", $updated_list)
496						or die "Cannot read $updated_list: $!\n";
497	2				180	my @lines = <$fh>;
498	2				48	chomp(@lines);
499						# decide what to do about each line in updates, looking just at the names
500	2				30	my %sites = map { s/^=> (\S+)\s+(.*)/$2/; $_ => 1 } @$site;
	2				20
	2				10
501	2				5	my @deletes;
502						my @keeps;
503	2				4	for my $line (@lines) {
504	4	100	66		30	if ($line =~ /^=> \S+ \d\d\d\d-\d\d-\d\d (.+)/ and not $sites{$1}) {
505	2				5	push(@deletes, $line);
506	2				3	$todo++;
507						} else {
508	2				6	push(@keeps, $line);
509						}
510						}
511	2	100	66		14	print "Removing these entries from updates:\n"
512						. join("\n", @deletes, "") if @deletes and not $confirm;
513						# save
514	2	100			27	save_file($updated_list, join("\n", @keeps, "")) if $confirm;
515						}
516	2	100	66		18	if ($todo && !$confirm) {
517	1				3	print "\n";
518	1				5	print "Use moku-pona cleanup --confirm to do it.\n";
519						}
520						}
521
522						=head2 Update
523
524						moku-pona update [--quiet] [names...]
525
526						This updates all the subscribed items and generates a new local page for you to
527						visit: C.
528
529						Example:
530
531						moku-pona update
532
533						If you call it from a cron job, you might want to use the C<--quiet> argument to
534						prevent it from printing all the sites it's contacting (since cron will then
535						mail this to you and you might not care for it unless there's a problem). If
536						there's a problem, you'll still get a message.
537
538						This is how I call it from my C, for example
539
540						#m h dom mon dow command
541						11 7,14 * * * /home/alex/bin/moku-pona update --quiet
542
543						If you're testing things, you can also fetch just a limited number of items by
544						listing them.
545
546						Example:
547
548						moku-pona update "RPG Planet"
549
550						The C files may contain lines that are not links at the top. These
551						will remain untouched. The rest is links. New items are added at the beginning
552						of the links and older copies of such items are removed from the links.
553
554						=cut
555
556						sub add_update {
557	5			5	9	my $line = shift;
558	5				41	my ($uri, $name) = $line =~ /^=> (\S+)\s+(.*)/;
559						# add current date
560	5				53	my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(); # UTC
561	5				34	my $date = sprintf('%4d-%02d-%02d', $year + 1900, $mon + 1, $mday);
562	5				18	$line = "=> $uri $date $name";
563						# load file
564	5				43	my @lines;
565	5	100			76	if (-f $updated_list) {
566	4	50			147	open(my $fh, "<:encoding(UTF-8)", $updated_list)
567						or die "Cannot read $updated_list: $!\n";
568	4				390	@lines = convert(<$fh>); # from gohper
569	4				69	chomp(@lines);
570						}
571						# start the new list with the non-list links
572	5				35	my @new = grep(!/^=>/, @lines);
573						# add the line to the new list
574	5				15	push(@new, $line);
575						# add the remaining links to the new list, except for the ones matching the name of the new line
576	5				8	$name = quotemeta($name);
577	5				139	push(@new, grep(!/\d\d\d\d-\d\d-\d\d $name$/, grep(/^=>/, @lines)));
578						# save
579	5				46	save_file($updated_list, join("\n", @new, ""));
580						}
581
582						sub do_update {
583	6			6	7417	my $quiet = grep { $_ eq '--quiet' } @_;
	0				0
584	6				16	my @sites = grep { $_ ne '--quiet' } @_;
	0				0
585	6				8	my $site = load_site();
586	6				22	my %responses;
587						my @uris;
588	6				0	my %names;
589	6				0	my %lines;
590	6				13	for my $line (@$site) {
591	12				1144	my ($uri, $name) = $line =~ /^=> (\S+)(?:[ \t]+(.*))?/;
592	12		33		27	$name \|\|= $uri;
593	12	50	33		34	next unless @sites == 0 or grep { $_ eq $name } @sites;
	0				0
594	12	50			979	say("Fetching $name...") unless $quiet;
595	12				57	push(@uris, $uri);
596	12				39	$names{$uri} = $name;
597	12				23	$lines{$uri} = $line;
598	12	50			62	if ($uri =~ /^gopher/) {
		0
		0
599	12				27	query_gopher($uri, \%responses);
600						} elsif ($uri =~ /^gemini/) {
601	0				0	query_gemini($uri, \%responses);
602						} elsif ($uri =~ /^http/) {
603	0				0	query_web($uri, \%responses);
604						} else {
605	0				0	warn "Don't know how to fetch $uri\n";
606						}
607						}
608
609	6	50			1510	Mojo::IOLoop->start unless Mojo::IOLoop->is_running;
610
611	6				2481	for my $uri (keys %responses) {
612	12				42	my $name = $names{$uri};
613						# decode the UTF-8 when we have the entire response
614	12				60	my $new = decode_utf8($responses{$uri});
615	12	50			317	if (not $new) {
616	0				0	warn("$name returned an empty document\n");
617	0				0	next;
618						}
619	12				16	my $filename = $uri;
620	12				108	$filename =~ s/[\/:]/-/g;
621	12				30	my $cache = "$data_dir/$filename";
622	12	100			45	if ($new =~ /^<(\?xml\|rss)/) {
623	2				17	$new = to_gemini($new);
624	2				63	my $encoded = uri_escape_utf8($filename);
625	2				76	$lines{$uri} = "=> $encoded $name"; # now referring to the local cache file
626						}
627	12				24	my $old = load_file($cache);
628	12	100			288	if ($new ne $old) {
629	5	50			514	say "$name updated" unless $quiet;
630	5				32	add_update($lines{$uri});
631	5				22	save_file($cache, $new);
632						} else {
633	7	50			773	say "$name unchanged" unless $quiet;
634						}
635						}
636						}
637
638						=head2 Subscribing to feeds
639
640						When the result of an update is an XML document, then it is parsed and the links
641						of its items (if RSS) or entries (if Atom) are extracted and saved in the cache
642						file in the data directory. The effect is this:
643
644						=over
645
646						=item If you subscribe to a regular page, then the link to it in C
647						moves to the top when it changes.
648
649						=item If you subscribe to a feed, then the link in C moves to the
650						top when it changes and it links to a file in the data directory that links to
651						the individual items in the feed.
652
653						=back
654
655						Example:
656
657						moku-pona add https://campaignwiki.org/rpg/feed.xml "RPG"
658						moku-pona update
659
660						This adds the RPG entry to C as follows:
661
662						=> https%3A--campaignwiki.org-rpg-feed.xml 2020-11-07 RPG
663
664						And if you check the file C, you'll see
665						that it's a regular Gemini list. You'll find 100 links like the following:
666
667						=> https://alexschroeder.ch/wiki/2020-11-05_Episode_34 Episode 34
668
669						Now use C (see below) to move the files to the correct
670						directory where your Gemini server expects them.
671
672						=cut
673
674						# Convert a RSS or Atom feed to Gemini links
675						sub to_gemini {
676	5			5	34	my $xml = shift;
677	5				9	my $dom = eval {
678	5				32	require XML::LibXML;
679	5				24	my $parser = XML::LibXML->new(recover => 2); # no errors, no warnings
680	5				364	$parser->load_xml(string => $xml);
681						};
682	5	50			2123	if ($@) {
683	0				0	warn "$@\n";
684	0				0	return '';
685						}
686	5				54	my $root = $dom->documentElement();
687	5				153	my $xpc = XML::LibXML::XPathContext->new;
688	5				35	$xpc->registerNs('atom', 'http://www.w3.org/2005/Atom');
689	5		66		18	my $nodes = $xpc->findnodes('//atom:entry', $root) \|\| $root->findnodes('//item');
690	5				1217	my @lines;
691	5				23	for my $node ($nodes->get_nodelist) {
692	7		66		95	my $titles = $xpc->findnodes('atom:title', $node) \|\| $node->getChildrenByTagName('title');
693	7				1215	my $first = $titles->shift;
694	7	50			112	my $title = $first ? $first->textContent : "untitled";
695	7				154	$title =~ s/\s+$//; # trim right
696	7				22	$title =~ s/^\s+//; # trim left
697	7		100		26	my $links = $xpc->findnodes('atom:link', $node) \|\| $node->getChildrenByTagName('link');
698	7	100			1048	next unless $links;
699	6				136	my $link = $links->shift; # take the first
700	6		66		33	my $href = $link->getAttribute('href') \|\| $link->textContent;
701	6				125	push(@lines, "=> $href $title");
702						}
703	5				126	return join("\n", @lines, "");
704						}
705
706						=head2 Publishing your subscription
707
708						moku-pona publish
709
710						This takes the important files from your data directory and copies them to a
711						target directory. You could just use symbolic links for C and
712						C, of course. But if you've subscribed to actual feeds as described
713						above, then the cache files need to get copied as well!
714
715						Example:
716
717						mkdir ~/subs
718						moku-pona publish ~/subs
719
720						=head2 Serving your subscriptions via Gemini
721
722						This depends entirely on your Gemini server. If you like it really simple, you
723						can use L. Here's how to create the certificate and key
724						files, copy them to the C<~/subs> directory created above, and run C
725						for a quick test.
726
727						make cert
728						cp *.pem ~/subs
729						cd ~/subs
730						lupa-pona
731
732						=cut
733
734						sub do_publish {
735	1			1	538	my $target = shift;
736	1	50			16	die "Target $target is not a directory\n" unless -d $target;
737	1	50			22	die "Source $site_list does not exist\n" unless -f $site_list;
738	1	50			11	die "Source $updated_list does not exist\n" unless -f $updated_list;
739	1				2	my $path;
740						# copy site list
741	1				6	copy($site_list, "$target/sites.txt");
742						# copy updates but with local links for the feed files
743	1	50		2	412	open(my $in, "<:encoding(UTF-8)", $updated_list)
	2				13
	2				3
	2				46
744						or die "Cannot read $updated_list: $!\n";
745	1	50			1276	open(my $out, ">:encoding(UTF-8)", "$target/updates.txt")
746						or die "Cannot write $target/updates.txt: $!\n";
747	1				102	for my $line (<$in>) {
748	1				41	chomp($line);
749	1				3	($line) = convert($line);
750	1				8	my ($uri, $name) = $line =~ /^=> file:\/\/\/(\S+)\s+(.*)/;
751						# if the target is a local file, then that's because it is the result of a
752						# to_gemini call in do_update, so we need to copy it as well
753	1				4	$uri =~ s/[\/:]/-/g;
754	1	50			17	if (-f "$data_dir/$uri") {
755	1				9	copy("$data_dir/$uri", "$target/$uri");
756						}
757	1				403	print $out "$line\n";
758						}
759						}
760
761						sub do_help {
762	0			0	0	my $parser = Pod::Text->new();
763	0				0	$parser->parse_file($0);
764						}
765
766						sub main {
767	0		0	0	0	my $command = shift(@ARGV) \|\| "help";
768	0	0			0	if ($command eq "add") { do_add(@ARGV) }
	0	0			0
		0
		0
		0
		0
		0
769	0				0	elsif ($command eq "remove") { do_remove(@ARGV) }
770	0				0	elsif ($command eq "list") { do_list(@ARGV) }
771	0				0	elsif ($command eq "cleanup") { do_cleanup(@ARGV) }
772	0				0	elsif ($command eq "update") { do_update(@ARGV) }
773	0				0	elsif ($command eq "convert") { do_convert() }
774	0				0	elsif ($command eq "publish") { do_publish(@ARGV) }
775	0				0	else { do_help() }
776						}
777
778						main() if $0 =~ /\bmoku-pona$/;
779
780						1;