File Coverage

script/moku-pona
Criterion Covered Total %
statement 250 319 78.3
branch 81 160 50.6
condition 35 80 43.7
subroutine 26 33 78.7
pod n/a
total 392 592 66.2


line stmt bran cond sub pod time code
1             #!/home/alex/perl5/perlbrew/perls/perl-5.32.0/bin/perl
2             # Copyright (C) 2018–2021 Alex Schroeder
3              
4             # This program is free software: you can redistribute it and/or modify it under
5             # the terms of the GNU General Public License as published by the Free Software
6             # Foundation, either version 3 of the License, or (at your option) any later
7             # version.
8             #
9             # This program is distributed in the hope that it will be useful, but WITHOUT
10             # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11             # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12             #
13             # You should have received a copy of the GNU General Public License along with
14             # this program. If not, see .
15              
16             =encoding utf8
17              
18             =head1 Moku Pona
19              
20             Moku Pona is a Gemini based feed reader. It can monitor URLs to feeds or regular
21             pages for changes and keeps and updated list of these in a Gemini list. Moku
22             Pona knows how to fetch Gopher URLs, Gemini URLs, and regular web URLs.
23              
24             You manage your subscriptions using the command-line, with Moku Pona.
25              
26             You serve the resulting file using a Gemini server.
27              
28             You read it all using your Gemini client.
29              
30             =head2 Limitations
31              
32             When Moku Pona isn't watching a feed it can only detect changes on a page. Thus,
33             if there is an item that points to a phlog or blog, that's great. Sometimes
34             people put their phlog in a folder per year. If the Gopher menu lists each
35             folder and a date with the latest change, then that's great, you can use it.
36             Without it, you're in trouble: you need to subscribe to the item for the current
37             year in order to see changes, but when the next year comes around, you're
38             subscribed to the wrong item. Sometimes you're lucky and there will be a menu
39             somewhere with a timestamp for the last change. Use that instead. Good luck!
40              
41             =head2 License
42              
43             GNU Affero General Public License
44              
45             =head2 Installation
46              
47             Using C:
48              
49             cpan App::mokupona
50              
51             Manual install:
52              
53             perl Makefile.PL
54             make
55             make install
56              
57             =head2 Dependencies
58              
59             There are some Perl dependencies you need to satisfy in order to run this
60             program:
61              
62             =over
63              
64             =item L, or C
65              
66             =item L, or C
67              
68             =item L, or C
69              
70             =item L, or C
71              
72             =item L, or c
73              
74             =back
75              
76             =cut
77              
78 9     9   11043 use Modern::Perl '2018';
  9         38961  
  9         94  
79 9     9   7149 use File::Copy qw(copy);
  9         19686  
  9         1094  
80 9     9   5034 use Encode::Locale qw(decode_argv);
  9         98874  
  9         633  
81 9     9   68 use Encode qw(decode_utf8);
  9         34  
  9         553  
82 9     9   4031 use Mojo::IOLoop;
  9         2816292  
  9         116  
83 9     9   6908 use XML::LibXML;
  9         294034  
  9         61  
84 9     9   5799 use URI::Escape;
  9         14820  
  9         923  
85 9     9   84 use List::Util qw(none);
  9         19  
  9         52047  
86              
87             decode_argv();
88              
89             if (-t) {
90             binmode(STDIN, ":encoding(console_in)");
91             binmode(STDOUT, ":encoding(console_out)");
92             binmode(STDERR, ":encoding(console_out)");
93             }
94              
95             =head2 The Data Directory
96              
97             Moku Pona keeps the list of URLs you are subscribed to in directory. It's
98             probably C<~/.moku-pona> on your system.
99              
100             =over
101              
102             =item If you have the C environment variable set, then that's your data
103             directory.
104              
105             =item If you have the C environment variable set, then your data
106             directory is F<$XDG_DATA_HOME/moku-pona>.
107              
108             =item If you you have the C environment variable set, and you have a
109             F<$HOME/.local> directory, then your data directory is
110             F<$HOME/.local/moku-pona>.
111              
112             =item If you have the C environment variable set, then your data directory
113             is F<$HOME/.moku-pona>.
114              
115             =item If you have the C environment variable set (Windows), then your
116             data directory is F<$APPDATA/moku-pona>.
117              
118             =item The last option is to have the C environment variable set.
119              
120             =back
121              
122             The data directory contains a copy of the latest resources. The names of these
123             cache files are simply the URL with all the slashes replaced by a hyphen.
124              
125             =cut
126              
127             our $data_dir = data_dir();
128             # say "Using $data_dir";
129              
130             sub data_dir {
131 9 50   9   42 return $ENV{MOKU_PONA} if $ENV{MOKU_PONA};
132             # find an existing directory
133 9 50 33     40 return $ENV{XDG_DATA_HOME} . '/moku-pona' if $ENV{XDG_DATA_HOME} and -d $ENV{XDG_DATA_HOME} . '/moku-pona';
134 9 50 33     189 return $ENV{HOME} . '/.local/moku-pona' if $ENV{HOME} and -d $ENV{HOME} . '/.local/moku-pona';
135 9 50 33     182 return $ENV{HOME} . '/.moku-pona' if $ENV{HOME} and -d $ENV{HOME} . '/.moku-pona';
136 9 50 33     80 return $ENV{APPDATA} . '/moku-pona' if $ENV{APPDATA} and -d $ENV{APPDATA} . '/.moku-pona';
137 9 50 33     38 return $ENV{LOGDIR} . '/.moku-pona' if $ENV{LOGDIR} and -d $ENV{LOGDIR} . '/.moku-pona';
138             # or use a new one
139 9 50       30 return $ENV{XDG_DATA_HOME} . '/moku-pona' if $ENV{XDG_DATA_HOME};
140 9 50 33     108 return $ENV{HOME} . '/.local/moku-pona' if $ENV{HOME} and -d $ENV{HOME} . '/.local';
141 9 50       71 return $ENV{HOME} . '/.moku-pona' if $ENV{HOME};
142 0 0       0 return $ENV{APPDATA} . '/moku-pona' if $ENV{APPDATA};
143 0 0       0 return $ENV{LOGDIR} . '/.moku-pona' if $ENV{LOGDIR};
144 0         0 die "Please set the MOKU_PONA environment variable to a directory name\n";
145             }
146              
147             =pod
148              
149             The C file is a file containing a gemtext list of links, i.e. entries
150             such as these:
151              
152             => gemini://alexschroeder.ch Alex Schroeder
153              
154             =cut
155              
156             our $site_list = $data_dir . '/sites.txt';
157              
158             =pod
159              
160             The C file is a file containing a gemtext list of links based on
161             C, but with a timestamp of their last change, and with new updates
162             moved to the top. The ISO date is simply inserted after the URL:
163              
164             => gemini://alexschroeder.ch 2020-11-07 Alex Schroeder
165              
166             =cut
167              
168             our $updated_list = $data_dir . '/updates.txt';
169              
170             =pod
171              
172             In order to be at least somewhat backwards compatible with Moku Pona versions
173             1.1 and earlier, C may contain Gopher menu items. These are converted
174             to Gemini URLs during processing and thus the C file still contains
175             regular gemtext.
176              
177             1Alex Schroeder ⭾ ⭾ alexschroeder.ch ⭾ 70
178              
179             =cut
180              
181             sub convert {
182 27     27   138 for (@_) {
183 49 100       236 next if /^=> /; # is already a gemini link
184 17         127 my ($type, $desc, $selector, $host, $port) = /^([^\t])([^\t]*)\t([^\t]*)\t([^\t]*)\t([^\t\r]*)/;
185 17 100 66     70 if ($host and $port) {
186 13   50     27 $port //= 0;
187 13         55 $_ = "=> gopher://$host:$port/$type$selector $desc";
188             }
189             }
190 27         155 return @_;
191             }
192              
193             =pod
194              
195             As was said above, however, the recommended format is the use of URLs. Moku Pona
196             supports Gemini, Gopher, and the web (gemini, gopher, gophers, http, and https
197             schemes).
198              
199             =cut
200              
201             sub query_gemini {
202 0     0   0 my $url = shift;
203 0         0 my $responses = shift;
204 0         0 my($scheme, $authority, $path, $query, $fragment) =
205             $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
206 0 0 0     0 die "⚠ The URL '$url' must use the gemini scheme\n" unless $scheme and $scheme eq 'gemini';
207 0 0       0 die "⚠ The URL '$url' must have an authority\n" unless $authority;
208 0         0 my ($host, $port) = split(/:/, $authority, 2);
209 0   0     0 $port //= 1965;
210             Mojo::IOLoop->client(
211             {port => $port, address => $host, tls => 1,
212             tls_options => { SSL_verify_mode => 0x00 }, timeout => 120}
213             => sub {
214 0     0   0 my ($loop, $err, $stream) = @_;
215 0 0       0 if ($err) {
216 0         0 warn "Cannot connect to $url: $err\n";
217 0         0 return;
218             }
219 0         0 $stream->timeout(300); # 5 min
220 0         0 my $header;
221             $stream->on(read => sub {
222 0         0 my ($stream, $bytes) = @_;
223 0         0 $responses->{$url} .= $bytes;
224 0 0       0 $header = $responses->{$url} =~ s/^.*\r\n// unless $header});
  0         0  
225 0         0 $stream->write("$url\r\n")})
226 0         0 }
227              
228             sub query_gopher {
229 12     12   15 my $url = shift;
230 12         18 my $responses = shift;
231 12         17 my ($selector, $host, $port) = url_to_gopher($url);
232 12         30 my $tls = $url =~ /^gophers/;
233             Mojo::IOLoop->client(
234             {port => $port, address => $host, tls => $tls, timeout => 120 }
235             => sub {
236 12     12   13901 my ($loop, $err, $stream) = @_;
237 12 50       32 if ($err) {
238 0         0 warn "Cannot connect to $url: $err\n";
239 0         0 return;
240             }
241 12         31 $stream->timeout(300); # 5 min
242             $stream->on(
243             read => sub {
244 12         11087 my ($stream, $bytes) = @_;
245 12         406 $responses->{$url} .= $bytes});
  12         54  
246 12         105 $stream->write("$selector\r\n")})
247 12         155 }
248              
249             sub url_to_gopher {
250 12     12   15 my $url = shift;
251 12   33     42 my $name = shift||$url;
252 12         76 my($scheme, $authority, $path, $query, $fragment) =
253             $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
254 12   50     26 $scheme ||= "gopher";
255 12 50       39 return unless $scheme =~ /^gophers?$/;
256 12 50       54 $path = substr($path, 1) if substr($path, 0, 1) eq "/";
257 12 50       29 my $type = $path ? substr($path, 0, 1) : "1";
258 12 50       33 my $selector = $path ? substr($path, 1) : "";
259 12         45 my ($host, $port) = split(/:/, $authority, 2);
260 12   50     51 return ($selector, $host, $port||70);
261             }
262              
263             sub query_web {
264 0     0   0 my $url = shift;
265 0         0 my $responses = shift;
266 0         0 my($scheme, $authority, $path, $query, $fragment) =
267             $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
268 0         0 my $tls = $scheme eq "https";
269 0         0 my ($host, $port) = split(/:/, $authority, 2);
270 0   0     0 $path ||= "/";
271 0         0 my $selector = $path;
272 0 0       0 $selector .= "?" . $query if $query;
273             # ignore the fragment
274 0 0 0     0 $port ||= $tls ? 443 : 80;
275             Mojo::IOLoop->client(
276             {port => $port, address => $host, tls => $tls, timeout => 120 }
277             => sub {
278 0     0   0 my ($loop, $err, $stream) = @_;
279 0         0 my $header;
280             $stream->on(read => sub {
281 0         0 my ($stream, $bytes) = @_;
282 0         0 $responses->{$url} .= $bytes;
283 0 0       0 $header = $responses->{$url} =~ s/^.*\r\n\r\n//s unless $header});
  0         0  
284 0         0 $stream->write("GET $selector HTTP/1.0\r\n"
285             . "Host: $host:$port\r\n"
286             . "User-Agent: moku-pona\r\n"
287 0         0 . "\r\n")});
288             }
289              
290             =head2 Migration from 1.1
291              
292             The best way to migrate your setup is probably to use the C subcommand
293             explained later, and to recreate your list of subscriptions. Then your
294             C file will use gemtext format.
295              
296             moku-pona list | grep "moku-pona add" > commands
297             mv ~/.moku-pona/sites.txt ~/.moku-pona/sites.txt~
298             sh commands
299              
300             =cut
301              
302             sub load_site {
303 30     30   1245 my $file = $site_list;
304 30 100       496 return [] if not -f $file;
305 22 50       796 open(my $fh, "<:encoding(UTF-8)", $file) or die "Cannot read $file: $!\n";
306 22         3165 my @lines = <$fh>;
307 22         496 chomp(@lines);
308 22         52 @lines = grep(/^=> /, convert(@lines)); # from gopher
309 22         371 return \@lines;
310             }
311              
312             sub load_file {
313 20     20   3035 my $file = shift;
314 20 100       335 return "" if not -f $file;
315 17 50       693 open(my $fh, "<:encoding(UTF-8)", $file)
316             or die "Cannot read $file: $!\n";
317 17         1162 local $/ = undef;
318 17         470 return <$fh>;
319             }
320              
321             sub save_file {
322 24     24   2022 my $file = shift;
323 24         31 my $data = shift;
324 24 100       437 mkdir $data_dir unless -d $data_dir;
325 4 50   4   27 open(my $fh, ">:encoding(UTF-8)", $file)
  4         7  
  4         29  
  24         1462  
326             or die "Cannot write $file: $!\n";
327 24         8596 print $fh $data;
328             }
329              
330             =head2 List your subscriptions
331              
332             moku-pona list [names...]
333              
334             This lists all your current subscriptions in a format that is suitable for a
335             shell script. Optionally, only list a subset of the lines. All lines are matched
336             against the regular expressions you provide and are only listed if there is at
337             least one match, if you provided any.
338              
339             Example:
340              
341             moku-pona list alex
342              
343             In this particular case, since I'm testing my own server, the result would be:
344              
345             moku-pona add https://alexschroeder.ch/wiki?action=rss "rss"
346             moku-pona add gemini://alexschroeder.ch/ "gemini"
347             moku-pona add gopher://alexschroeder.ch/ "gopher"
348             moku-pona add gophers://alexschroeder.ch:7443/ "gophers"
349              
350             =cut
351              
352             sub do_list {
353 1     1   1508 my @args = @_;
354 1         19 my $site = load_site();
355 1 50       11 if (@args) {
356 0         0 print("Subscribed items in $site_list matching @args:\n");
357             } else {
358 1         6 print("Subscribed items in $site_list:\n");
359             }
360 1 50       3 print("none\n") unless @$site;
361 1         3 for my $line (@$site) {
362 2 50 33 0   6 next if @args and none { $line =~ /$_/ } @args;
  0         0  
363             # skip item type
364 2         14 my ($uri, $name) = $line =~ /^=> (\S+)\s+(.*)/;
365 2         10 print(qq{moku-pona add $uri "$name"\n});
366             }
367             }
368              
369             =head2 Add a subscription
370              
371             moku-pona add url [description]
372              
373             This adds a URL to the list of subscribed items. If the target is an Atom or RSS
374             feed, then that's also supported. You can provide an optional description for
375             this URL. If you don't provide a description, the URL will be used as the item's
376             description.
377              
378             Example:
379              
380             moku-pona add gemini://alexschroeder.ch kensanata
381              
382             =cut
383              
384             sub do_add {
385 8     8   1313 my $uri = shift;
386 8         20 my $name = shift;
387 8   33     23 $name ||= $uri;
388 8         21 my $line = "=> $uri $name";
389 8         16 my $site = load_site();
390 8         26 my $uri_re = quotemeta($uri);
391 8         17 my $name_re = quotemeta($name);
392 8 50       170 if (grep(/^=> $uri_re /, @$site)) {
    50          
393 0         0 warn("$uri already exists in $site_list\n");
394             } elsif (grep(/^=> \S+ $name_re$/, @$site)) {
395 0         0 warn("$name already exists in $site_list\n");
396             } else {
397 8         19 push(@$site, $line);
398             }
399 8         30 save_file($site_list, join("\n", @$site, ""));
400             }
401              
402             =head2 Remove a subscription
403              
404             moku-pona remove description
405              
406             This removes one or more URLs from the list of subscribed items.
407              
408             Example:
409              
410             moku-pona remove kensanata
411              
412             =cut
413              
414             sub do_remove {
415 2     2   1603 my @args = @_;
416 2         7 my $site = load_site();
417 2         6 my $count = 0;
418 2         4 my $i = 0;
419 2   66     15 while (@args and $i < @$site) {
420 4         8 my $line = $site->[$i];
421 4         26 my ($uri, $name) = $line =~ /^=> (\S+)\s+(.*)/;
422 4         7 my $found = 0;
423 4         6 my $j = 0;
424 4         10 while ($j < @args) {
425 4 100       11 if ($name eq $args[$j]) {
426 2         3 $count++;
427 2         3 $found = 1;
428 2         6 splice(@$site, $i, 1); # remove the site found
429 2         4 splice(@args, $j, 1); # remove the arg found
430             } else {
431 2         5 $j++;
432             }
433             }
434 4 100       16 $i++ unless $found;
435             }
436 2 50       6 if ($count) {
437 2 50       108 printf("Removed %d %s\n", $count,
438             $count == 1 ? "subscription" : "subscriptions");
439 2         18 save_file($site_list, join("\n", @$site, ""));
440             } else {
441 0         0 warn("No subscriptions matching @args found\n");
442 0         0 warn("Use moku-pona list to find the correct descriptions.\n");
443             }
444             }
445              
446             =head2 Clean up the data directory
447              
448             moku-pona cleanup [--confirm]
449              
450             When Moku Pona updates, copies of the URL targets are saved in the data
451             directory. If you remove a subscription (see above), that leaves a cache file in
452             the data directory that is no longer used – and it leaves an entry in
453             C that is no longer wanted. The cleanup command fixes this. It
454             deletes all the cached pages that you are no longer subscribed to, and it
455             removes those entries from C as well.
456              
457             Actually, just to be sure, if you run it without the C<--confirm> argument, it
458             simply prints which files it would trash. Rerun it with the C<--confirm>
459             argument to actually do it.
460              
461             Example:
462              
463             moku-pona cleanup
464              
465             =cut
466              
467             sub do_cleanup {
468 2   100 2   2901 my $confirm = shift||'' eq '--confirm';
469 2         4 my $todo = 0;
470             # get a hash map telling us the cache files we expect based on our sites
471 2         4 my $site = load_site();
472             my %caches = map {
473 2         7 my ($uri, $name) = /^=> (\S+)\s+(.*)/;
  2         16  
474 2         15 $uri =~ s/[\/:]/-/g;
475 2         14 "$data_dir/$uri" => 1;
476             } @$site;
477             # get a list of text files in the directory
478 2 50       85 opendir(my $dh, $data_dir) or die "Cannot read $data_dir: $!\n";
479 2         60 my @files = map { "$data_dir/$_" } grep { /^[^.]/ } readdir($dh);
  8         18  
  12         34  
480 2         24 closedir($dh);
481             # remove unnecessary cache files
482 2         6 for my $file (@files) {
483 8 100       18 next if $file eq $site_list;
484 6 100       12 next if $file eq $updated_list;
485 4 100       10 next if $caches{$file};
486 2 100       4 if ($confirm) {
487 1         40 unlink $file;
488             } else {
489 1         6 print "trash $file\n";
490 1         2 $todo++;
491             }
492             }
493             # check updates list
494 2 50       26 if (-f $updated_list) {
495 2 50       65 open(my $fh, "<:encoding(UTF-8)", $updated_list)
496             or die "Cannot read $updated_list: $!\n";
497 2         180 my @lines = <$fh>;
498 2         48 chomp(@lines);
499             # decide what to do about each line in updates, looking just at the names
500 2         30 my %sites = map { s/^=> (\S+)\s+(.*)/$2/; $_ => 1 } @$site;
  2         20  
  2         10  
501 2         5 my @deletes;
502             my @keeps;
503 2         4 for my $line (@lines) {
504 4 100 66     30 if ($line =~ /^=> \S+ \d\d\d\d-\d\d-\d\d (.+)/ and not $sites{$1}) {
505 2         5 push(@deletes, $line);
506 2         3 $todo++;
507             } else {
508 2         6 push(@keeps, $line);
509             }
510             }
511 2 100 66     14 print "Removing these entries from updates:\n"
512             . join("\n", @deletes, "") if @deletes and not $confirm;
513             # save
514 2 100       27 save_file($updated_list, join("\n", @keeps, "")) if $confirm;
515             }
516 2 100 66     18 if ($todo && !$confirm) {
517 1         3 print "\n";
518 1         5 print "Use moku-pona cleanup --confirm to do it.\n";
519             }
520             }
521              
522             =head2 Update
523              
524             moku-pona update [--quiet] [names...]
525              
526             This updates all the subscribed items and generates a new local page for you to
527             visit: C.
528              
529             Example:
530              
531             moku-pona update
532              
533             If you call it from a cron job, you might want to use the C<--quiet> argument to
534             prevent it from printing all the sites it's contacting (since cron will then
535             mail this to you and you might not care for it unless there's a problem). If
536             there's a problem, you'll still get a message.
537              
538             This is how I call it from my C, for example
539              
540             #m h dom mon dow command
541             11 7,14 * * * /home/alex/bin/moku-pona update --quiet
542              
543             If you're testing things, you can also fetch just a limited number of items by
544             listing them.
545              
546             Example:
547              
548             moku-pona update "RPG Planet"
549              
550             The C files may contain lines that are not links at the top. These
551             will remain untouched. The rest is links. New items are added at the beginning
552             of the links and older copies of such items are removed from the links.
553              
554             =cut
555              
556             sub add_update {
557 5     5   9 my $line = shift;
558 5         41 my ($uri, $name) = $line =~ /^=> (\S+)\s+(.*)/;
559             # add current date
560 5         53 my ($sec, $min, $hour, $mday, $mon, $year) = gmtime(); # UTC
561 5         34 my $date = sprintf('%4d-%02d-%02d', $year + 1900, $mon + 1, $mday);
562 5         18 $line = "=> $uri $date $name";
563             # load file
564 5         43 my @lines;
565 5 100       76 if (-f $updated_list) {
566 4 50       147 open(my $fh, "<:encoding(UTF-8)", $updated_list)
567             or die "Cannot read $updated_list: $!\n";
568 4         390 @lines = convert(<$fh>); # from gohper
569 4         69 chomp(@lines);
570             }
571             # start the new list with the non-list links
572 5         35 my @new = grep(!/^=>/, @lines);
573             # add the line to the new list
574 5         15 push(@new, $line);
575             # add the remaining links to the new list, except for the ones matching the name of the new line
576 5         8 $name = quotemeta($name);
577 5         139 push(@new, grep(!/\d\d\d\d-\d\d-\d\d $name$/, grep(/^=>/, @lines)));
578             # save
579 5         46 save_file($updated_list, join("\n", @new, ""));
580             }
581              
582             sub do_update {
583 6     6   7417 my $quiet = grep { $_ eq '--quiet' } @_;
  0         0  
584 6         16 my @sites = grep { $_ ne '--quiet' } @_;
  0         0  
585 6         8 my $site = load_site();
586 6         22 my %responses;
587             my @uris;
588 6         0 my %names;
589 6         0 my %lines;
590 6         13 for my $line (@$site) {
591 12         1144 my ($uri, $name) = $line =~ /^=> (\S+)(?:[ \t]+(.*))?/;
592 12   33     27 $name ||= $uri;
593 12 50 33     34 next unless @sites == 0 or grep { $_ eq $name } @sites;
  0         0  
594 12 50       979 say("Fetching $name...") unless $quiet;
595 12         57 push(@uris, $uri);
596 12         39 $names{$uri} = $name;
597 12         23 $lines{$uri} = $line;
598 12 50       62 if ($uri =~ /^gopher/) {
    0          
    0          
599 12         27 query_gopher($uri, \%responses);
600             } elsif ($uri =~ /^gemini/) {
601 0         0 query_gemini($uri, \%responses);
602             } elsif ($uri =~ /^http/) {
603 0         0 query_web($uri, \%responses);
604             } else {
605 0         0 warn "Don't know how to fetch $uri\n";
606             }
607             }
608              
609 6 50       1510 Mojo::IOLoop->start unless Mojo::IOLoop->is_running;
610              
611 6         2481 for my $uri (keys %responses) {
612 12         42 my $name = $names{$uri};
613             # decode the UTF-8 when we have the entire response
614 12         60 my $new = decode_utf8($responses{$uri});
615 12 50       317 if (not $new) {
616 0         0 warn("$name returned an empty document\n");
617 0         0 next;
618             }
619 12         16 my $filename = $uri;
620 12         108 $filename =~ s/[\/:]/-/g;
621 12         30 my $cache = "$data_dir/$filename";
622 12 100       45 if ($new =~ /^<(\?xml|rss)/) {
623 2         17 $new = to_gemini($new);
624 2         63 my $encoded = uri_escape_utf8($filename);
625 2         76 $lines{$uri} = "=> $encoded $name"; # now referring to the local cache file
626             }
627 12         24 my $old = load_file($cache);
628 12 100       288 if ($new ne $old) {
629 5 50       514 say "$name updated" unless $quiet;
630 5         32 add_update($lines{$uri});
631 5         22 save_file($cache, $new);
632             } else {
633 7 50       773 say "$name unchanged" unless $quiet;
634             }
635             }
636             }
637              
638             =head2 Subscribing to feeds
639              
640             When the result of an update is an XML document, then it is parsed and the links
641             of its items (if RSS) or entries (if Atom) are extracted and saved in the cache
642             file in the data directory. The effect is this:
643              
644             =over
645              
646             =item If you subscribe to a regular page, then the link to it in C
647             moves to the top when it changes.
648              
649             =item If you subscribe to a feed, then the link in C moves to the
650             top when it changes and it links to a file in the data directory that links to
651             the individual items in the feed.
652              
653             =back
654              
655             Example:
656              
657             moku-pona add https://campaignwiki.org/rpg/feed.xml "RPG"
658             moku-pona update
659              
660             This adds the RPG entry to C as follows:
661              
662             => https%3A--campaignwiki.org-rpg-feed.xml 2020-11-07 RPG
663              
664             And if you check the file C, you'll see
665             that it's a regular Gemini list. You'll find 100 links like the following:
666              
667             => https://alexschroeder.ch/wiki/2020-11-05_Episode_34 Episode 34
668              
669             Now use C (see below) to move the files to the correct
670             directory where your Gemini server expects them.
671              
672             =cut
673              
674             # Convert a RSS or Atom feed to Gemini links
675             sub to_gemini {
676 5     5   34 my $xml = shift;
677 5         9 my $dom = eval {
678 5         32 require XML::LibXML;
679 5         24 my $parser = XML::LibXML->new(recover => 2); # no errors, no warnings
680 5         364 $parser->load_xml(string => $xml);
681             };
682 5 50       2123 if ($@) {
683 0         0 warn "$@\n";
684 0         0 return '';
685             }
686 5         54 my $root = $dom->documentElement();
687 5         153 my $xpc = XML::LibXML::XPathContext->new;
688 5         35 $xpc->registerNs('atom', 'http://www.w3.org/2005/Atom');
689 5   66     18 my $nodes = $xpc->findnodes('//atom:entry', $root) || $root->findnodes('//item');
690 5         1217 my @lines;
691 5         23 for my $node ($nodes->get_nodelist) {
692 7   66     95 my $titles = $xpc->findnodes('atom:title', $node) || $node->getChildrenByTagName('title');
693 7         1215 my $first = $titles->shift;
694 7 50       112 my $title = $first ? $first->textContent : "untitled";
695 7         154 $title =~ s/\s+$//; # trim right
696 7         22 $title =~ s/^\s+//; # trim left
697 7   100     26 my $links = $xpc->findnodes('atom:link', $node) || $node->getChildrenByTagName('link');
698 7 100       1048 next unless $links;
699 6         136 my $link = $links->shift; # take the first
700 6   66     33 my $href = $link->getAttribute('href') || $link->textContent;
701 6         125 push(@lines, "=> $href $title");
702             }
703 5         126 return join("\n", @lines, "");
704             }
705              
706             =head2 Publishing your subscription
707              
708             moku-pona publish
709              
710             This takes the important files from your data directory and copies them to a
711             target directory. You could just use symbolic links for C and
712             C, of course. But if you've subscribed to actual feeds as described
713             above, then the cache files need to get copied as well!
714              
715             Example:
716              
717             mkdir ~/subs
718             moku-pona publish ~/subs
719              
720             =head2 Serving your subscriptions via Gemini
721              
722             This depends entirely on your Gemini server. If you like it really simple, you
723             can use L. Here's how to create the certificate and key
724             files, copy them to the C<~/subs> directory created above, and run C
725             for a quick test.
726              
727             make cert
728             cp *.pem ~/subs
729             cd ~/subs
730             lupa-pona
731              
732             =cut
733              
734             sub do_publish {
735 1     1   538 my $target = shift;
736 1 50       16 die "Target $target is not a directory\n" unless -d $target;
737 1 50       22 die "Source $site_list does not exist\n" unless -f $site_list;
738 1 50       11 die "Source $updated_list does not exist\n" unless -f $updated_list;
739 1         2 my $path;
740             # copy site list
741 1         6 copy($site_list, "$target/sites.txt");
742             # copy updates but with local links for the feed files
743 1 50   2   412 open(my $in, "<:encoding(UTF-8)", $updated_list)
  2         13  
  2         3  
  2         46  
744             or die "Cannot read $updated_list: $!\n";
745 1 50       1276 open(my $out, ">:encoding(UTF-8)", "$target/updates.txt")
746             or die "Cannot write $target/updates.txt: $!\n";
747 1         102 for my $line (<$in>) {
748 1         41 chomp($line);
749 1         3 ($line) = convert($line);
750 1         8 my ($uri, $name) = $line =~ /^=> file:\/\/\/(\S+)\s+(.*)/;
751             # if the target is a local file, then that's because it is the result of a
752             # to_gemini call in do_update, so we need to copy it as well
753 1         4 $uri =~ s/[\/:]/-/g;
754 1 50       17 if (-f "$data_dir/$uri") {
755 1         9 copy("$data_dir/$uri", "$target/$uri");
756             }
757 1         403 print $out "$line\n";
758             }
759             }
760              
761             sub do_help {
762 0     0   0 my $parser = Pod::Text->new();
763 0         0 $parser->parse_file($0);
764             }
765              
766             sub main {
767 0   0 0   0 my $command = shift(@ARGV) || "help";
768 0 0       0 if ($command eq "add") { do_add(@ARGV) }
  0 0       0  
    0          
    0          
    0          
    0          
    0          
769 0         0 elsif ($command eq "remove") { do_remove(@ARGV) }
770 0         0 elsif ($command eq "list") { do_list(@ARGV) }
771 0         0 elsif ($command eq "cleanup") { do_cleanup(@ARGV) }
772 0         0 elsif ($command eq "update") { do_update(@ARGV) }
773 0         0 elsif ($command eq "convert") { do_convert() }
774 0         0 elsif ($command eq "publish") { do_publish(@ARGV) }
775 0         0 else { do_help() }
776             }
777              
778             main() if $0 =~ /\bmoku-pona$/;
779              
780             1;