File Coverage

blib/lib/App/Wax.pm
Criterion Covered Total %
statement 172 320 53.7
branch 23 80 28.7
condition 8 24 33.3
subroutine 57 81 70.3
pod n/a
total 260 505 51.4


line stmt bran cond sub pod time code
1             package App::Wax;
2              
3 88     88   340631 use 5.008008;
  88         815  
4              
5 88     88   48489 use Digest::SHA qw(sha1_hex);
  88         265504  
  88         6969  
6 88     88   41106 use File::Slurper qw(read_text write_text);
  88         1699290  
  88         4805  
7 88     88   668 use File::Spec;
  88         109  
  88         1836  
8 88     88   58944 use File::Temp;
  88         1737870  
  88         6353  
9 88     88   61334 use Getopt::Long qw(GetOptionsFromArray :config posix_default require_order bundling no_auto_abbrev no_ignore_case);
  88         864578  
  88         397  
10 88     88   69106 use IPC::System::Simple qw(EXIT_ANY $EXITVAL systemx);
  88         1072799  
  88         9670  
11 88     88   56716 use LWP::UserAgent;
  88         3568729  
  88         3233  
12 88     88   40986 use Method::Signatures::Simple;
  88         1628454  
  88         662  
13 88     88   73834 use MIME::Types;
  88         351750  
  88         4333  
14 88     88   38078 use Mouse;
  88         2511157  
  88         422  
15 88     88   73710 use Parallel::parallel_map qw(parallel_map);
  88         1783470  
  88         5597  
16 88     88   45682 use Pod::Usage qw(pod2usage);
  88         3253767  
  88         7736  
17 88     88   1961 use Try::Tiny qw(try catch);
  88         211  
  88         75126  
18 88     88   82259 use URI::Split qw(uri_split);
  88         47435  
  88         4800  
19              
20             # NOTE this is the version of the *command* rather than the *module*, i.e.
21             # breaking API changes may occur here which aren't reflected in the SemVer since
22             # they don't break the behavior of the command
23             #
24             # XXX this declaration must be on a single line
25             # https://metacpan.org/pod/version#How-to-declare()-a-dotted-decimal-version
26 88     88   632 use version; our $VERSION = version->declare('v2.4.0');
  88         176  
  88         787  
27              
28             # defaults
29             use constant {
30             CACHE => 0,
31             DEFAULT_USER_AGENT => 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0',
32             ENV_PROXY => 1,
33             ENV_USER_AGENT => $ENV{WAX_USER_AGENT},
34 88         14354 EXTENSION => qr/.(\.(?:(tar\.(?:bz|bz2|gz|lzo|Z))|(?:[ch]\+\+)|(?:\w+)))$/i,
35             INDEX => '%s.index.txt',
36             MIRROR => 0,
37             NAME => 'wax',
38             SEPARATOR => '--',
39             TEMPLATE => 'XXXXXXXX',
40             TIMEOUT => 60,
41             VERBOSE => 0,
42 88     88   14068 };
  88         178  
43              
44 88     88   596 use constant USER_AGENT => ENV_USER_AGENT || DEFAULT_USER_AGENT;
  88         133  
  88         4495  
45              
46             # RFC 2616: "If the media type remains unknown, the recipient SHOULD treat
47             # it as type 'application/octet-stream'."
48 88     88   528 use constant DEFAULT_CONTENT_TYPE => 'application/octet-stream';
  88         92  
  88         4997  
49              
50             # resources with these mime-types may have their extension inferred from the
51             # path part of their URI
52 88         5858 use constant INFER_EXTENSION => {
53             'text/plain' => 1,
54             'application/octet-stream' => 1,
55             'binary/octet-stream' => 1,
56 88     88   553 };
  88         115  
57              
58             # errors
59             use constant {
60 88         21435 OK => 0,
61             E_DOWNLOAD => -1,
62             E_INVALID_DIRECTORY => -2,
63             E_INVALID_OPTIONS => -3,
64             E_NO_COMMAND => -4,
65 88     88   508 };
  88         937  
66              
67             has app_name => (
68             is => 'rw',
69             isa => 'Str',
70             default => NAME,
71             );
72              
73             has cache => (
74             is => 'rw',
75             isa => 'Bool',
76             default => CACHE,
77             trigger => \&_check_keep,
78             );
79              
80             has directory => (
81             is => 'rw',
82             isa => 'Str',
83             predicate => 'has_directory',
84             required => 0,
85             trigger => \&_check_directory,
86             );
87              
88             has keep => (
89             is => 'ro',
90             isa => 'Bool',
91             default => 0,
92             writer => '_set_keep',
93             );
94              
95             has _lwp_user_agent => (
96             is => 'rw',
97             isa => 'LWP::UserAgent',
98             lazy => 1,
99             builder => '_build_lwp_user_agent',
100             );
101              
102             # this should really be a class attribute, but there's no MouseX::ClassAttribute
103             # (on CPAN)
104             has mime_types => (
105             is => 'ro',
106             isa => 'MIME::Types',
107             lazy => 1,
108             default => sub { MIME::Types->new() },
109             );
110              
111             has mirror => (
112             is => 'rw',
113             isa => 'Bool',
114             default => MIRROR,
115             trigger => \&_check_keep,
116             );
117              
118             has separator => (
119             is => 'rw',
120             isa => 'Str',
121             default => SEPARATOR,
122             clearer => 'clear_separator',
123             predicate => 'has_separator',
124             );
125              
126             # TODO make this private and read only, and rename it to something more
127             # descriptive, e.g. tempfile_template
128             has template => (
129             is => 'rw',
130             isa => 'Str',
131             default => method () { sprintf('%s_%s', $self->app_name, TEMPLATE) },
132             lazy => 1,
133             );
134              
135             has timeout => (
136             is => 'rw',
137             isa => 'Int',
138             default => TIMEOUT,
139             trigger => method ($timeout) { $self->_lwp_user_agent->timeout($timeout) },
140             );
141              
142             has user_agent => (
143 88     88   183344 is => 'rw',
  0     0      
  0            
  0            
144 0           isa => 'Str',
145             default => USER_AGENT,
146 0 0         trigger => method ($user_agent) { $self->_lwp_user_agent->agent($user_agent) },
147 0 0         );
148 0            
149 0           has verbose => (
150             is => 'rw',
151             isa => 'Bool',
152             default => VERBOSE,
153             trigger => method ($verbose) { $| = 1 }, # unbuffer output
154             );
155 88     88   45783  
  1     1   2  
  1         1  
156 1         13 # log the path. if the directory doesn't exist, create it if its parent directory
157             # exists; otherwise, raise an error
158             method _check_directory ($dir) {
159             $self->debug("directory: $dir");
160              
161             unless (-d $dir) {
162             unless (mkdir $dir) {
163             $self->log(ERROR => "Can't create directory (%s): %s", $dir, $!);
164             exit E_INVALID_DIRECTORY;
165 88     88   38971 }
  238     238   496  
  238         374  
166 238 50 66     1485 }
167 0         0 }
168 0         0  
169             # lazy constructor for the default LWP::UserAgent instance
170 238         871 method _build_lwp_user_agent {
171             LWP::UserAgent->new(
172             env_proxy => ENV_PROXY,
173             timeout => $self->timeout,
174             agent => $self->user_agent
175 88     88   43982 )
  0     0   0  
  0         0  
  0         0  
176 0         0 }
177 0         0  
178 0         0 # set `keep` to true if --cache or --mirror are set,
179 0 0       0 # but raise an error if both are set
180             method _check_keep {
181             if ($self->cache && $self->mirror) {
182             $self->log(ERROR => "--cache and --mirror can't be used together");
183             exit E_INVALID_OPTIONS;
184 88     88   44246 } else {
  0     0   0  
  0         0  
  0         0  
185 0         0 $self->_set_keep(1);
186 0         0 }
187 0         0 }
188              
189 0 0       0 # remove temporary files
190             method _unlink ($unlink) {
191 0         0 for my $filename (@$unlink) {
192             chmod 0600, $filename; # borrowed from File::Temp (may be needed on Windows)
193 0 0       0 $self->debug('removing: %s', $filename);
194 0         0 unlink($filename) || $self->log(WARN => "Can't unlink %s: %s", $filename, $!);
195             }
196 0         0 }
197 0         0  
198             # return the URL's content-type or an empty string if the request fails
199             method content_type ($_url) {
200             my ($url, $url_index) = @$_url;
201 0         0 my $response = $self->_lwp_user_agent->head($url);
202             my $content_type = '';
203              
204             if ($response->is_success) {
205             # the initial (pre-semicolon) part of the mime-type, trimmed and lowercased.
206 88     88   47408 $content_type = $response->headers->content_type;
  0     0   0  
  0         0  
  0         0  
207 0         0  
208 0         0 if ($content_type) {
209 0         0 $self->debug('content-type (%d): %s', $url_index, $content_type);
210             } else {
211 0 0 0     0 $content_type = DEFAULT_CONTENT_TYPE;
    0          
212 0         0 $self->debug('content-type (%d): %s (default)', $url_index, $content_type);
213             }
214 0         0 }
215              
216 0 0       0 return $content_type;
    0          
217 0         0 }
218              
219 0         0 # save the URL to a local filename; returns an error message if an error occurred,
220             # or a falsey value otherwise
221             method download ($_url, $filename) {
222 0         0 my ($url, $url_index) = @$_url;
223             my $ua = $self->_lwp_user_agent;
224 0 0       0 my ($downloaded, $error, $response);
225 0         0  
226             if ($self->cache && (-e $filename)) {
227             $downloaded = 0;
228             } elsif ($self->mirror) {
229 0 0       0 $response = $ua->mirror($url, $filename);
230 0 0       0  
231             if ($response->is_success) {
232 0         0 $downloaded = 1;
233 0         0 } elsif ($response->code == 304) {
234             $downloaded = 0;
235             }
236 0         0 } else {
237             $response = $ua->get($url, ':content_file' => $filename);
238              
239             if ($response->is_success) {
240 88     88   55855 $downloaded = 1;
  0     0   0  
  0         0  
241             }
242             }
243 0         0  
244 0         0 if (defined $downloaded) {
245 0         0 $self->debug('download (%d): %s', $url_index, ($downloaded ? 'yes' : 'no'));
246             } else {
247 0         0 my $status = $response->status_line;
248             $error = "can't download URL #$url_index ($url) to filename ($filename): $status";
249             }
250 88     88   49783  
  6     6   10  
  6         7  
251             return $error;
252 6         930 }
253 6         2536  
254             # helper for `dump_command`: escape/quote a shell argument on POSIX shells
255             func _escape ($arg) {
256             # https://stackoverflow.com/a/1250279
257 88     88   42670 # https://github.com/boazy/any-shell-escape/issues/1#issuecomment-36226734
  0     0   0  
  0         0  
258 0         0 $arg =~ s!('{1,})!'"$1"'!g;
259 0         0 $arg = "'$arg'";
260             $arg =~ s{^''|''$}{}g;
261              
262             return $arg;
263 88     88   39313 }
  0     0   0  
  0         0  
  0         0  
264 0         0  
265 0 0       0 method _use_default_directory () {
266 0         0 # "${XDG_CACHE_HOME:-$HOME/.cache}/wax"
267             require File::BaseDir;
268             $self->directory(File::BaseDir::cache_home($self->app_name));
269             }
270              
271             # print the version and exit
272 88     88   44316 method _dump_version () {
  0     0   0  
  0         0  
  0         0  
273 0         0 print $VERSION, $/;
274 0         0 exit 0;
275 0         0 }
276              
277 0 0       0 # log a message to stderr with the app's name and message's log level
278             method log ($level, $template, @args) {
279 0         0 my $name = $self->app_name;
280 0         0 my $message = @args ? sprintf($template, @args) : $template;
281             warn "$name: $level: $message", $/;
282 0 0       0 }
283              
284 0 0       0 # return a best-effort guess at the URL's file extension based on its content
285             # type, e.g. ".md" or ".tar.gz", or an empty string if one can't be determined.
286 0 0 0     0 # XXX note: makes a network request to determine the content type
      0        
287 0         0 method extension ($_url) {
288             my ($url, $url_index) = @$_url;
289             my $extension = '';
290             my $split = $self->is_url($url);
291 0 0       0  
292 0         0 return $extension unless ($split);
293 0         0  
294             my ($scheme, $domain, $path, $query, $fragment) = @$split;
295 0 0       0 my $content_type = $self->content_type($_url);
296 0         0  
297             return $extension unless ($content_type); # won't be defined if the URL is invalid
298              
299             if (INFER_EXTENSION->{$content_type}) {
300 0         0 # try to get a more specific extension from the path
301             if (not(defined $query) && $path && ($path =~ EXTENSION)) {
302 0         0 $extension = $+;
303             }
304             }
305              
306             unless ($extension) {
307 88     88   55162 my $mime_type = $self->mime_types->type($content_type);
  1253     1253   1642  
  1253         1894  
  1253         1393  
308 1253 100       5090 my @extensions = $mime_type->extensions;
309 393         2083  
310             if (@extensions) {
311 393 50 33     6078 $extension = '.' . $extensions[0];
      33        
312 393         1477 }
313             }
314              
315             $self->debug('extension (%d): %s', $url_index, $extension);
316              
317             return $extension;
318 88     88   48624 }
  1062     1062   1474  
  1062         2232  
  1062         1217  
319 1062 50       3130  
320 0         0 # return a truthy value (an arrayref containing the URL's components)
321 0 0       0 # if the supplied value can be parsed as a URL, or a falsey value otherwise
322 0         0 method is_url ($url) {
323             if ($url =~ m{^[a-zA-Z][\w+]*://}) { # basic sanity check
324             my ($scheme, $domain, $path, $query, $fragment) = uri_split($url);
325              
326             if ($scheme && ($domain || $path)) { # no domain for file:// URLs
327             return [$scheme, $domain, $path, $query, $fragment];
328             }
329             }
330             }
331 88     88   43582  
  233     233   588  
  233         514  
  233         639  
332 233         740 # log a message to stderr if logging is enabled
333             method debug ($template, @args) {
334 233         993 if ($self->verbose) {
335             my $name = $self->app_name;
336 233 100       830 my $message = @args ? sprintf($template, @args) : $template;
337 23         74 warn "$name: $message", $/;
338             }
339             }
340 233 50       569  
341 0         0 # perform housekeeping after a download: replace the placeholder with the file
342 0         0 # path; push the path onto the delete list if it's a temporary file; and log any
343             # errors
344 233         744 #
345             # XXX give this a more descriptive name, e.g. _handle_download or _after_download
346             method _handle ($resolved, $command, $unlink) {
347             my ($command_index, $filename, $error) = @$resolved;
348              
349             $command->[$command_index] = $filename;
350              
351             unless ($self->keep) {
352             push @$unlink, $filename;
353 88     88   46091 }
  582     582   384062  
  582         851  
  582         709  
354 582 50       1100  
  2839         9243  
355             if ($error) {
356             $self->log(ERROR => $error);
357             return E_DOWNLOAD;
358             } else {
359             return OK;
360             }
361 88     88   45314 }
362              
363             # this is purely for diagnostic purposes, i.e. there's no guarantee
364             # that the dumped command can be used as a command line. a better
365             # (but still imperfect/incomplete) implementation would require at
366             # least two extra modules: Win32::ShellQuote and String::ShellQuote:
367             # https://rt.cpan.org/Public/Bug/Display.html?id=37348
368             method dump_command ($args) {
369             return join(' ', map { /[^0-9A-Za-z+,.\/:=\@_-]/ ? _escape($_) : $_ } @$args);
370             }
371              
372             # takes a URL and returns a $filename => $error pair where
373             # the filename is the path to the saved file and the error
374             # is the first error message encountered while trying to download
375             # and save it
376             method resolve ($_url) {
377             my ($error, $filename, @resolved);
378              
379             if ($self->keep) {
380 88     88   46718 ($filename, $error) = $self->resolve_keep($_url);
  0     0   0  
  0         0  
  0         0  
381 0         0 } else {
382 0 0       0 ($filename, $error) = $self->resolve_temp($_url);
383 0         0 }
384 0         0  
385 0         0 $error ||= $self->download($_url, $filename);
386             @resolved = ($filename, $error);
387              
388             return wantarray ? @resolved : \@resolved;
389 0 0       0 }
390 0         0  
391             # takes a URL and returns a $filename => $error pair for cacheable files.
392             # in order to calculate the filename, we need to determine the URL's extension,
393 0     0   0 # which requires a network request for the content type. to avoid hitting the
394             # network for subsequent requests, we cache the extension in an index file.
395 0     0   0 method resolve_keep ($_url) {
396 0         0 my ($url, $url_index) = @$_url;
397             my $directory = $self->has_directory ? $self->directory : File::Spec->tmpdir;
398 0         0 my $id = sprintf('%s_%s', $self->app_name, sha1_hex($url));
399 0         0 my $index_file = File::Spec->catfile($directory, sprintf(INDEX, $id));
400             my ($error, $extension);
401              
402 0     0   0 # -s: if /tmp is full, the index file may get written as an empty file, so
403             # make sure it's non-empty
404 0     0   0 if (-s $index_file) {
405 0         0 $self->debug('index (%d): %s (exists)', $url_index, $index_file);
406              
407             try {
408 0         0 $extension = read_text($index_file);
409             } catch {
410 0         0 $error = "unable to load index #$url_index ($index_file): $_";
411             };
412             } else {
413             $self->debug('index (%d): %s (create)', $url_index, $index_file);
414             $extension = $self->extension($_url);
415 88     88   69287  
  0     0   0  
  0         0  
  0         0  
416 0         0 try {
417 0         0 write_text($index_file, $extension);
418             } catch {
419 0 0       0 $error = "unable to save index #$url_index ($index_file): $_";
420 0         0 };
421             }
422 0         0  
423             my $filename = File::Spec->catfile($directory, "$id$extension");
424              
425 0 0       0 return ($filename, $error);
426 0         0 }
427              
428             # takes a URL and returns a $filename => $error pair for
429 0         0 # temporary files (i.e. files which will be automatically unlinked)
430             method resolve_temp ($_url) {
431             my $extension = $self->extension($_url);
432 0     0   0 my %options = (TEMPLATE => $self->template, UNLINK => 0);
433 0         0  
434             if ($self->has_directory) {
435 0     0   0 $options{DIR} = $self->directory;
436 0         0 } else {
437             $options{TMPDIR} = 1;
438 0         0 }
439              
440             if ($extension) {
441             $options{SUFFIX} = $extension;
442             }
443              
444             my ($filename, $error);
445              
446             try {
447 88     88   52261 srand($$); # see the File::Temp docs
  291     291   453  
  291         487  
  291         396  
448 291         1177 $filename = File::Temp->new(%options)->filename;
449 291         504 } catch {
450             $error = $_;
451             };
452 122     122   129322  
453 0     0   0 return ($filename, $error);
454 6     6   1317 }
455 0     0   0  
456 116     116   99723 # parse the supplied arrayref of options and return a triple of:
457 12     12   11205 #
458 2     2   1791 # command: an arrayref containing the command to execute and its arguments
459 0     0   0 # resolve: an arrayref of [index, URL] pairs, where index refers to the URL's
460             # (0-based) index in the commmand array
461 0     0   0 # test: true if --test was seen; false otherwise
462 0     0   0 method _parse ($argv) {
463 0     0   0 my @argv = @$argv; # don't mutate the original
464 291         7548 my $test = 0;
465              
466 291 50       51704 my $parsed = GetOptionsFromArray(\@argv,
467 0         0 'c|cache' => sub { $self->cache(1) },
468             'd|dir|directory=s' => sub { $self->directory($_[1]) },
469             'D|default-directory' => sub { $self->_use_default_directory },
470             'h|?|help' => sub { pod2usage(-input => $0, -verbose => 2, -exitval => 0) },
471             'm|mirror' => sub { $self->mirror(1) },
472             's|separator=s' => sub { $self->separator($_[1]) },
473             'S|no-separator' => sub { $self->clear_separator() },
474 291         512 't|timeout=i' => sub { $self->timeout($_[1]) },
475 291         435 'test' => \$test,
476             'u|user-agent=s' => sub { $self->user_agent($_[1]) },
477 291         754 'v|verbose' => sub { $self->verbose(1) },
478 1266         1851 'V|version' => sub { $self->_dump_version },
479             );
480 1266 100 100     6648  
    100          
481 13         28 unless ($parsed) {
482 13         19 pod2usage(
483             -exitval => E_INVALID_OPTIONS,
484 393 100       842 -input => $0,
485 276         1809 -verbose => 0,
486 276         1223 );
487 276         894 }
488 276         1703  
489             my (@command, @resolve);
490             my $seen_url = 0;
491 393         834  
492 393         1015 while (@argv) {
493             my $arg = shift(@argv);
494 393         903  
495             if ($self->has_separator && ($arg eq $self->separator)) {
496 393         621 push @command, @argv;
497 393         1764 last;
498             } elsif ($self->is_url($arg)) {
499 860         2109 unless ($seen_url) {
500             my $source = ENV_USER_AGENT ? ' (env)' : '';
501             $self->debug('user-agent%s: %s', $source, $self->user_agent);
502             $self->debug('timeout: %d', $self->timeout);
503 291 50       703 $seen_url = 1;
504 0         0 }
505              
506             my $url_index = @resolve + 1; # 1-based
507             my $_url = [$arg, $url_index];
508              
509             $self->debug('url (%d): %s', $url_index, $arg);
510              
511             push @command, $arg;
512 291         994 push @resolve, [$#command, $_url];
513             } else {
514             push @command, $arg;
515             }
516 88     88   88141 }
  291     291   4636  
  291         543  
  291         384  
517 291         449  
518 291         424 unless (@command) {
519 291         825 pod2usage(
520             -exitval => E_NO_COMMAND,
521 291 100       923 -input => $0,
    100          
522 159         277 -msg => 'no command supplied',
  159         370  
523 159         601 -verbose => 0,
524             )
525 159         3589 }
526              
527 117         2085 return \@command, \@resolve, $test;
528             }
529              
530 10     10   287505 # process the options and execute the command with substituted filenames
531 10         737 method run ($argv) {
532 117         1238 my $error = 0;
533             my $unlink = [];
534 37         1238296 my ($command, $resolve, $test) = $self->_parse($argv);
535 74   33     2839  
536             if (@$resolve == 1) {
537             my ($command_index, $_url) = @{ $resolve->[0] };
538             my @resolved = $self->resolve($_url);
539 211 50       753  
    50          
540 0         0 $error = $self->_handle([$command_index, @resolved], $command, $unlink);
541 0         0 } elsif (@$resolve) {
542 0         0 $self->debug('jobs: %d', scalar(@$resolve));
543              
544 211         1635 my @resolved = parallel_map {
545             my ($command_index, $_url) = @$_;
546 0           [$command_index, $self->resolve($_url)]
547             } @$resolve;
548              
549             for my $resolved (@resolved) {
550             $error ||= $self->_handle($resolved, $command, $unlink);
551 88     88   33734 }
  88         143  
  88         15827  
552 0     0     }
  0            
553 0            
554             if ($error) {
555 0     0     $self->debug('exit code: %d', $error);
556 0           $self->_unlink($unlink);
557 0           return $error;
558             } elsif ($test) {
559 0           return $command;
560 0           } else {
561             $self->debug('command: %s', $self->dump_command($command));
562 0            
563             try {
564             # XXX hack to remove the " in /path/to/App/Wax.pm line "
565             # noise. we just want the error message
566             no warnings qw(redefine);
567             local *IPC::System::Simple::croak = sub { die @_, $/ };
568             systemx(EXIT_ANY, @$command);
569             } catch {
570             chomp;
571             $self->log(ERROR => $_);
572             };
573              
574             $self->debug('exit code: %d', $EXITVAL);
575             $self->_unlink($unlink);
576              
577             return $EXITVAL;
578             }
579             }
580              
581             __PACKAGE__->meta->make_immutable();
582              
583             1;