File Coverage

blib/lib/App/Wax.pm
Criterion Covered Total %
statement 165 325 50.7
branch 44 116 37.9
condition 8 27 29.6
subroutine 48 66 72.7
pod n/a
total 265 534 49.6


line stmt bran cond sub pod time code
1             package App::Wax;
2              
3 86     86   320048 use 5.008008;
  86         476  
4              
5 86     86   42267 use Digest::SHA qw(sha1_hex);
  86         241685  
  86         6480  
6 86     86   38698 use File::Slurper qw(read_text write_text);
  86         1564804  
  86         4764  
7 86     86   582 use File::Spec;
  86         139  
  86         1628  
8 86     86   53749 use File::Temp;
  86         1624861  
  86         5885  
9 86     86   44588 use IPC::System::Simple qw(EXIT_ANY $EXITVAL systemx);
  86         1015746  
  86         10269  
10 86     86   57842 use LWP::UserAgent;
  86         3325449  
  86         2946  
11 86     86   41216 use Method::Signatures::Simple;
  86         1537740  
  86         549  
12 86     86   73162 use MIME::Types;
  86         328965  
  86         4639  
13 86     86   38439 use Mouse;
  86         2331620  
  86         309  
14 86     86   65454 use Parallel::parallel_map qw(parallel_map);
  86         1681254  
  86         5006  
15 86     86   42933 use Pod::Usage qw(pod2usage);
  86         3092478  
  86         8209  
16 86     86   853 use Try::Tiny qw(try catch);
  86         256  
  86         4340  
17 86     86   38106 use URI::Split qw(uri_split);
  86         53805  
  86         4685  
18              
19             # XXX this declaration must be on a single line
20             # https://metacpan.org/pod/version#How-to-declare()-a-dotted-decimal-version
21 86     86   548 use version; our $VERSION = version->declare('v3.1.2');
  86         238  
  86         581  
22              
23             # defaults
24             use constant {
25 86         13941 CACHE => 0,
26             ENV_PROXY => 1,
27             EXTENSION => qr/.(\.(?:(tar\.(?:bz|bz2|gz|lzo|Z))|(?:[ch]\+\+)|(?:\w+)))$/i,
28             INDEX => '%s.index.txt',
29             MIRROR => 0,
30             NAME => 'wax',
31             SEPARATOR => '--',
32             TEMPLATE => 'XXXXXXXX',
33             TIMEOUT => 60,
34             USER_AGENT => 'Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0',
35             VERBOSE => 0,
36 86     86   12064 };
  86         321  
37              
38             # RFC 2616: "If the media type remains unknown, the recipient SHOULD treat
39             # it as type 'application/octet-stream'."
40 86     86   648 use constant DEFAULT_CONTENT_TYPE => 'application/octet-stream';
  86         88  
  86         5627  
41              
42             # resources with these mime-types may have their extension inferred from the
43             # path part of their URI
44 86         5919 use constant INFER_EXTENSION => {
45             'text/plain' => 1,
46             'application/octet-stream' => 1,
47             'binary/octet-stream' => 1,
48 86     86   528 };
  86         205  
49              
50             # errors
51             use constant {
52 86         26906 OK => 0,
53             E_DOWNLOAD => -1,
54             E_INVALID_OPTION => -2,
55             E_INVALID_OPTIONS => -3,
56             E_INVALID_DIRECTORY => -4,
57             E_NO_ARGUMENTS => -5,
58             E_NO_COMMAND => -6,
59 86     86   730 };
  86         169  
60              
61             has app_name => (
62             is => 'rw',
63             isa => 'Str',
64             default => NAME,
65             );
66              
67             has app_version => (
68             is => 'ro',
69             isa => 'version',
70             default => sub { $VERSION },
71             );
72              
73             has cache => (
74             is => 'rw',
75             isa => 'Bool',
76             default => CACHE,
77             trigger => \&_check_keep,
78             );
79              
80             has directory => (
81             is => 'rw',
82             isa => 'Str',
83             predicate => 'has_directory',
84             required => 0,
85             trigger => \&_check_directory,
86             );
87              
88             has keep => (
89             is => 'ro',
90             isa => 'Bool',
91             default => 0,
92             writer => '_set_keep',
93             );
94              
95             has _lwp_user_agent => (
96             is => 'rw',
97             isa => 'LWP::UserAgent',
98             lazy => 1,
99             builder => '_build_lwp_user_agent',
100             );
101              
102             # this should really be a class attribute, but there's no MouseX::ClassAttribute
103             # (on CPAN)
104             has mime_types => (
105             is => 'ro',
106             isa => 'MIME::Types',
107             lazy => 1,
108             default => sub { MIME::Types->new() },
109             );
110              
111             has mirror => (
112             is => 'rw',
113             isa => 'Bool',
114             default => MIRROR,
115             trigger => \&_check_keep,
116             );
117              
118             has separator => (
119             is => 'rw',
120             isa => 'Str',
121             default => SEPARATOR,
122             clearer => 'clear_separator',
123             predicate => 'has_separator',
124             );
125              
126             # TODO make this private and read only, and rename it to something more
127             # descriptive e.g. tempfile_template
128             has template => (
129             is => 'rw',
130             isa => 'Str',
131             default => method () { sprintf('%s_%s', $self->app_name, TEMPLATE) },
132             lazy => 1,
133             );
134              
135             has timeout => (
136             is => 'rw',
137             isa => 'Int',
138             default => TIMEOUT,
139             trigger => method ($timeout) { $self->_lwp_user_agent->timeout($timeout) },
140             );
141              
142             has user_agent => (
143 86     86   172402 is => 'rw',
  0     0   0  
  0         0  
  0         0  
144 0         0 isa => 'Str',
145             default => USER_AGENT,
146 0 0       0 trigger => method ($user_agent) { $self->_lwp_user_agent->agent($user_agent) },
147 0 0       0 );
148 0         0  
149 0         0 has verbose => (
150             is => 'rw',
151             isa => 'Bool',
152             default => VERBOSE,
153             trigger => method ($verbose) { $| = 1 }, # unbuffer output
154             );
155 86     86   42392  
  1     1   2  
  1         1  
156 1         8 # log the path. if the directory doesn't exist, create it if its parent directory
157             # exists; otherwise, raise an error
158             method _check_directory ($dir) {
159             $self->debug("directory: $dir");
160              
161             unless (-d $dir) {
162             unless (mkdir $dir) {
163             $self->log(ERROR => "Can't create directory (%s): %s", $dir, $!);
164             exit E_INVALID_DIRECTORY;
165 86     86   37889 }
  232     232   532  
  232         331  
166 232 50 66     1381 }
167 0         0 }
168 0         0  
169             # lazy constructor for the default LWP::UserAgent instance
170 232         1311 method _build_lwp_user_agent {
171             LWP::UserAgent->new(
172             env_proxy => ENV_PROXY,
173             timeout => $self->timeout,
174             agent => $self->user_agent
175 86     86   40802 )
  0     0   0  
  0         0  
  0         0  
176 0         0 }
177 0         0  
178 0         0 # set `keep` to true if --cache or --mirror are set,
179 0 0       0 # but raise an error if both are set
180             method _check_keep {
181             if ($self->cache && $self->mirror) {
182             $self->log(ERROR => "--cache and --mirror can't be used together");
183             exit E_INVALID_OPTIONS;
184 86     86   63240 } else {
  0     0   0  
  0         0  
  0         0  
185 0         0 $self->_set_keep(1);
186 0         0 }
187 0         0 }
188              
189 0 0       0 # remove temporary files
190             method _unlink ($unlink) {
191 0         0 for my $filename (@$unlink) {
192             chmod 0600, $filename; # borrowed from File::Temp (may be needed on Windows)
193 0 0       0 $self->debug('removing: %s', $filename);
194 0         0 unlink($filename) || $self->log(WARN => "Can't unlink %s: %s", $filename, $!);
195             }
196 0         0 }
197 0         0  
198             # return the URL's content-type or an empty string if the request fails
199             method content_type ($_url) {
200             my ($url, $url_index) = @$_url;
201 0         0 my $response = $self->_lwp_user_agent->head($url);
202             my $content_type = '';
203              
204             if ($response->is_success) {
205             # the initial (pre-semicolon) part of the mime-type, trimmed and lowercased.
206 86     86   47326 $content_type = $response->headers->content_type;
  0     0   0  
  0         0  
  0         0  
207 0         0  
208 0         0 if ($content_type) {
209 0         0 $self->debug('content-type (%d): %s', $url_index, $content_type);
210             } else {
211 0 0 0     0 $content_type = DEFAULT_CONTENT_TYPE;
    0          
212 0         0 $self->debug('content-type (%d): %s (default)', $url_index, $content_type);
213             }
214 0         0 }
215              
216 0 0       0 return $content_type;
    0          
217 0         0 }
218              
219 0         0 # save the URL to a local filename; returns an error message if an error occurred,
220             # or a falsey value otherwise
221             method download ($_url, $filename) {
222 0         0 my ($url, $url_index) = @$_url;
223             my $ua = $self->_lwp_user_agent;
224 0 0       0 my ($downloaded, $error, $response);
225 0         0  
226             if ($self->cache && (-e $filename)) {
227             $downloaded = 0;
228             } elsif ($self->mirror) {
229 0 0       0 $response = $ua->mirror($url, $filename);
230 0 0       0  
231             if ($response->is_success) {
232 0         0 $downloaded = 1;
233 0         0 } elsif ($response->code == 304) {
234             $downloaded = 0;
235             }
236 0         0 } else {
237             $response = $ua->get($url, ':content_file' => $filename);
238              
239             if ($response->is_success) {
240 86     86   82158 $downloaded = 1;
  0     0   0  
  0         0  
241             }
242             }
243 0         0  
244 0         0 if (defined $downloaded) {
245 0         0 $self->debug('download (%d): %s', $url_index, ($downloaded ? 'yes' : 'no'));
246             } else {
247 0         0 my $status = $response->status_line;
248             $error = "can't download URL #$url_index ($url) to filename ($filename): $status";
249             }
250              
251 86     86   42293 return $error;
  0     0   0  
  0         0  
  0         0  
252 0         0 }
253 0 0       0  
254 0         0 # helper for `dump_command`: escape/quote a shell argument on POSIX shells
255             func _escape ($arg) {
256             # https://stackoverflow.com/a/1250279
257             # https://github.com/boazy/any-shell-escape/issues/1#issuecomment-36226734
258             $arg =~ s!('{1,})!'"$1"'!g;
259             $arg = "'$arg'";
260 86     86   41001 $arg =~ s{^''|''$}{}g;
  0     0   0  
  0         0  
  0         0  
261 0         0  
262 0         0 return $arg;
263 0         0 }
264              
265 0 0       0 # log a message to stderr with the app's name and message's log level
266             method log ($level, $template, @args) {
267 0         0 my $name = $self->app_name;
268 0         0 my $message = @args ? sprintf($template, @args) : $template;
269             warn "$name: $level: $message", $/;
270 0 0       0 }
271              
272 0 0       0 # return a best-effort guess at the URL's file extension based on its content
273             # type, e.g. ".md" or ".tar.gz", or an empty string if one can't be determined.
274 0 0 0     0 # XXX note: makes a network request to determine the content type
      0        
275 0         0 method extension ($_url) {
276             my ($url, $url_index) = @$_url;
277             my $extension = '';
278             my $split = $self->is_url($url);
279 0 0       0  
280 0         0 return $extension unless ($split);
281 0         0  
282             my ($scheme, $domain, $path, $query, $fragment) = @$split;
283 0 0       0 my $content_type = $self->content_type($_url);
284 0         0  
285             return $extension unless ($content_type); # won't be defined if the URL is invalid
286              
287             if (INFER_EXTENSION->{$content_type}) {
288 0         0 # try to get a more specific extension from the path
289             if (not(defined $query) && $path && ($path =~ EXTENSION)) {
290 0         0 $extension = $+;
291             }
292             }
293              
294             unless ($extension) {
295 86     86   54377 my $mime_type = $self->mime_types->type($content_type);
  950     950   1373  
  950         1460  
  950         998  
296 950 100       4053 my @extensions = $mime_type->extensions;
297 387         2188  
298             if (@extensions) {
299 387 50 33     7802 $extension = '.' . $extensions[0];
      33        
300 387         1429 }
301             }
302              
303             $self->debug('extension (%d): %s', $url_index, $extension);
304              
305             return $extension;
306 86     86   47057 }
  1044     1044   1278  
  1044         2396  
  1044         1261  
307 1044 50       2561  
308 0         0 # return a truthy value (an arrayref containing the URL's components)
309 0 0       0 # if the supplied value can be parsed as a URL, or a falsey value otherwise
310 0         0 method is_url ($url) {
311             if ($url =~ m{^[a-zA-Z][\w+]*://}) { # basic sanity check
312             my ($scheme, $domain, $path, $query, $fragment) = uri_split($url);
313              
314             if ($scheme && ($domain || $path)) { # no domain for file:// URLs
315             return [ $scheme, $domain, $path, $query, $fragment ];
316             }
317             }
318             }
319 86     86   39353  
  227     227   1179  
  227         760  
  227         247  
320 227         611 # log a message to stderr if logging is enabled
321             method debug ($template, @args) {
322 227         883 if ($self->verbose) {
323             my $name = $self->app_name;
324 227 100       870 my $message = @args ? sprintf($template, @args) : $template;
325 23         63 warn "$name: $message", $/;
326             }
327             }
328 227 50       475  
329 0         0 # perform housekeeping after a download: replace the placeholder with the file
330 0         0 # path; push the path onto the delete list if it's a temporary file; and log any
331             # errors
332 227         861 #
333             # XXX give this a more descriptive name e.g. _handle_download or _after_download
334             method _handle ($resolved, $command, $unlink) {
335             my ($command_index, $filename, $error) = @$resolved;
336              
337             $command->[$command_index] = $filename;
338              
339             unless ($self->keep) {
340             push @$unlink, $filename;
341 86     86   43489 }
  570     570   2718  
  570         947  
  570         685  
342 570 50       1023  
  2793         8902  
343             if ($error) {
344             $self->log(ERROR => $error);
345             return E_DOWNLOAD;
346             } else {
347             return OK;
348             }
349 86     86   42111 }
  0     0      
  0            
  0            
350 0            
351             # this is purely for diagnostic purposes i.e. there's no guarantee
352 0 0         # that the dumped command can be used as a command line. a better
353 0           # (but still imperfect/incomplete) implementation would require at
354             # least two extra modules: Win32::ShellQuote and String::ShellQuote:
355 0           # https://rt.cpan.org/Public/Bug/Display.html?id=37348
356             method dump_command ($args) {
357             return join(' ', map { /[^0-9A-Za-z+,.\/:=\@_-]/ ? _escape($_) : $_ } @$args);
358 0   0       }
359 0            
360             # takes a URL and returns a $filename => $error pair where
361 0 0         # the filename is the path to the saved file and the error
362             # is the first error message encountered while trying to download
363             # and save it
364             method resolve ($_url) {
365             my ($error, $filename, @resolved);
366              
367             if ($self->keep) {
368 86     86   43670 ($filename, $error) = $self->resolve_keep($_url);
  0     0   0  
  0         0  
  0         0  
369 0         0 } else {
370 0 0       0 ($filename, $error) = $self->resolve_temp($_url);
371 0         0 }
372 0         0  
373 0         0 $error ||= $self->download($_url, $filename);
374             @resolved = ($filename, $error);
375              
376             return wantarray ? @resolved : \@resolved;
377 0 0       0 }
378 0         0  
379             # takes a URL and returns a $filename => $error pair for cacheable files.
380             # in order to calculate the filename, we need to determine the URL's extension,
381 0     0   0 # which requires a network request for the content type. to avoid hitting the
382             # network for subsequent requests, we cache the extension in an index file.
383 0     0   0 method resolve_keep ($_url) {
384 0         0 my ($url, $url_index) = @$_url;
385             my $directory = $self->has_directory ? $self->directory : File::Spec->tmpdir;
386 0         0 my $id = sprintf('%s_%s', $self->app_name, sha1_hex($url));
387 0         0 my $index_file = File::Spec->catfile($directory, sprintf(INDEX, $id));
388             my ($error, $extension);
389              
390 0     0   0 # -s: if /tmp is full, the index file may get written as an empty file, so
391             # make sure it's non-empty
392 0     0   0 if (-s $index_file) {
393 0         0 $self->debug('index (%d): %s (exists)', $url_index, $index_file);
394              
395             try {
396 0         0 $extension = read_text($index_file);
397             } catch {
398 0         0 $error = "unable to load index #$url_index ($index_file): $_";
399             };
400             } else {
401             $self->debug('index (%d): %s (create)', $url_index, $index_file);
402             $extension = $self->extension($_url);
403 86     86   64990  
  0     0   0  
  0         0  
  0         0  
404 0         0 try {
405 0         0 write_text($index_file, $extension);
406             } catch {
407 0 0       0 $error = "unable to save index #$url_index ($index_file): $_";
408 0         0 };
409             }
410 0         0  
411             my $filename = File::Spec->catfile($directory, "$id$extension");
412              
413 0 0       0 return ($filename, $error);
414 0         0 }
415              
416             # takes a URL and returns a $filename => $error pair for
417 0         0 # temporary files (i.e. files which will be automatically unlinked)
418             method resolve_temp ($_url) {
419             my $extension = $self->extension($_url);
420 0     0   0 my %options = (TEMPLATE => $self->template, UNLINK => 0);
421 0         0  
422             if ($self->has_directory) {
423 0     0   0 $options{DIR} = $self->directory;
424 0         0 } else {
425             $options{TMPDIR} = 1;
426 0         0 }
427              
428             if ($extension) {
429             $options{SUFFIX} = $extension;
430             }
431              
432             my ($filename, $error);
433              
434             try {
435 86     86   49253 srand($$); # see the File::Temp docs
  285     285   420  
  285         388  
  285         402  
436 285         810 $filename = File::Temp->new(%options)->filename;
437             } catch {
438 285 50       770 $error = $_;
439 0         0 };
440              
441             return ($filename, $error);
442             }
443              
444             # parse the supplied arrayref of options and return a triple of:
445             #
446             # command: an arrayref containing the command to execute
447 285         409 # resolve: an arrayref of [index, URL] pairs, where index refers to the URL's
448 285         551 # (0-based) index in the commmand array
449 285         386 # test: true if --test was seen; false otherwise
450 285         405 method _parse ($argv) {
451             my @argv = @$argv;
452 285         600  
453 1779         2513 unless (@argv) {
454             pod2usage(
455             -exitval => E_NO_ARGUMENTS,
456 12 50   12   19 -input => $0,
457 12         55 -msg => 'no arguments supplied',
458             -verbose => 0,
459 0         0 );
460             }
461              
462             my $wax_options = 1;
463             my $seen_url = 0;
464             my $test = 0;
465             my (@command, @resolve);
466 1779         5453  
467             while (@argv) {
468 1779 100 100     7024 my $arg = shift @argv;
    100          
    100          
469 816 100       8245  
    50          
    50          
    50          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
    50          
    50          
470 116         1421 my $val = sub {
471             if (@argv) {
472 0         0 return shift(@argv);
473             } else {
474             pod2usage(
475 0         0 -exitval => E_INVALID_OPTION,
476 0         0 -input => $0,
477             -msg => "missing value for $arg option",
478 0         0 -verbose => 1,
479             );
480 0         0 }
481             };
482 116         936  
483             if ($wax_options) {
484 12         18 if ($arg =~ /^(?:-c|--cache)$/) {
485             $self->cache(1);
486 2         9 } elsif ($arg =~ /^(?:-d|--dir|--directory)$/) {
487             $self->directory($val->());
488 285         1643 } elsif ($arg eq '-D') {
489             # "${XDG_CACHE_HOME:-$HOME/.cache}/wax"
490 0         0 require File::BaseDir;
491             $self->directory(File::BaseDir::cache_home($self->app_name));
492 0         0 } elsif ($arg =~ /^(?:-v|--verbose)$/) {
493             $self->verbose(1);
494 0         0 } elsif ($arg =~ /^(?:-[?h]|--help)$/) {
495 0         0 pod2usage(-input => $0, -verbose => 2, -exitval => 0);
496             } elsif ($arg =~ /^(?:-m|--mirror)$/) {
497 0         0 $self->mirror(1);
498             } elsif ($arg =~ /^(?:-s|--separator)$/) {
499             $self->separator($val->());
500             } elsif ($arg =~ /^(?:-S|--no-separator)$/) {
501             $self->clear_separator();
502             } elsif ($arg eq '--test') {
503             $test = 1;
504 285         595 } elsif ($arg =~ /^(?:-t|--timeout)$/) {
505 285         880 $self->timeout($val->());
506             } elsif ($arg =~ /^(?:-u|--user-agent)$/) {
507             $self->user_agent($val->());
508 13         22 } elsif ($arg =~ /^(?:-V|--version)$/) {
509 13         31 printf "%s (%s %s)$/", $self->app_version, __PACKAGE__, $VERSION;
510             exit 0;
511 387 100       822 } elsif ($arg =~ /^-/) { # unknown option
512 270         1007 pod2usage(
513 270         794 -exitval => E_INVALID_OPTION,
514 270         866 -input => $0,
515             -msg => "invalid option: $arg",
516             -verbose => 1,
517 387         773 );
518 387         691 } else { # non-option: exit the wax-options processing stage
519             push @command, $arg;
520 387         855 $wax_options = 0;
521             }
522 387         1199 } elsif ($self->has_separator && ($arg eq $self->separator)) {
523 387         2222 push @command, @argv;
524             last;
525 563         1887 } elsif ($self->is_url($arg)) {
526             unless ($seen_url) {
527             $self->debug('user-agent: %s', $self->user_agent);
528             $self->debug('timeout: %d', $self->timeout);
529 285         917 $seen_url = 1;
530             }
531              
532             my $url_index = @resolve + 1; # 1-based
533 86     86   114215 my $_url = [ $arg, $url_index ];
  285     285   1606  
  285         505  
  285         419  
534 285         608  
535 285         372 $self->debug('url (%d): %s', $url_index, $arg);
536 285         770  
537             push @command, $arg;
538 285 50       684 push @resolve, [ $#command, $_url ];
539 0         0 } else {
540 0 0       0 push @command, $arg;
541             }
542             }
543 285 100       1066  
    100          
544 153         1427 return \@command, \@resolve, $test;
  153         555  
545 153         639 }
546              
547 153         2621 # process the options and execute the command with substituted filenames
548             method run ($argv) {
549 117         384 my $error = 0;
550             my $unlink = [];
551             my ($command, $resolve, $test) = $self->_parse($argv);
552 10     10   275064  
553 10         747 unless (@$command) {
554 117         885 $self->log(ERROR => 'no command supplied');
555             return $test ? $command : E_NO_COMMAND;
556 37         1219733 }
557 74   33     2585  
558             if (@$resolve == 1) {
559             my ($command_index, $_url) = @{ $resolve->[0] };
560             my @resolved = $self->resolve($_url);
561 205 50       815  
    50          
562 0         0 $error = $self->_handle([ $command_index, @resolved ], $command, $unlink);
563 0         0 } elsif (@$resolve) {
564 0         0 $self->debug('jobs: %d', scalar(@$resolve));
565              
566 205         2233 my @resolved = parallel_map {
567             my ($command_index, $_url) = @$_;
568 0           [ $command_index, $self->resolve($_url) ]
569             } @$resolve;
570              
571             for my $resolved (@resolved) {
572             $error ||= $self->_handle($resolved, $command, $unlink);
573 86     86   27536 }
  86         122  
  86         16449  
574 0     0     }
  0            
575 0            
576             if ($error) {
577 0     0     $self->debug('exit code: %d', $error);
578 0           $self->_unlink($unlink);
579 0           return $error;
580             } elsif ($test) {
581 0           return $command;
582 0           } else {
583             $self->debug('command: %s', $self->dump_command($command));
584 0            
585             try {
586             # XXX hack to remove the " in /path/to/App/Wax.pm line "
587             # noise. we just want the error message
588             no warnings qw(redefine);
589             local *IPC::System::Simple::croak = sub { die @_, $/ };
590             systemx(EXIT_ANY, @$command);
591             } catch {
592             chomp;
593             $self->log(ERROR => $_);
594             };
595              
596             $self->debug('exit code: %d', $EXITVAL);
597             $self->_unlink($unlink);
598              
599             return $EXITVAL;
600             }
601             }
602              
603             __PACKAGE__->meta->make_immutable();
604              
605             1;