File Coverage

blib/lib/App/Wax.pm
Criterion Covered Total %
statement 172 320 53.7
branch 23 80 28.7
condition 8 24 33.3
subroutine 57 81 70.3
pod n/a
total 260 505 51.4


line stmt bran cond sub pod time code
1             package App::Wax;
2              
3 88     88   376654 use 5.008008;
  88         789  
4              
5 88     88   44590 use Digest::SHA qw(sha1_hex);
  88         288828  
  88         8111  
6 88     88   42342 use File::Slurper qw(read_text write_text);
  88         1815852  
  88         5395  
7 88     88   686 use File::Spec;
  88         209  
  88         2041  
8 88     88   63710 use File::Temp;
  88         1867465  
  88         7584  
9 88     88   64453 use Getopt::Long qw(GetOptionsFromArray :config posix_default require_order bundling no_auto_abbrev no_ignore_case);
  88         961981  
  88         407  
10 88     88   73404 use IPC::System::Simple qw(EXIT_ANY $EXITVAL systemx);
  88         1177411  
  88         11905  
11 88     88   64601 use LWP::UserAgent;
  88         3932457  
  88         4035  
12 88     88   50982 use Method::Signatures::Simple;
  88         1796850  
  88         748  
13 88     88   83512 use MIME::Types;
  88         388103  
  88         4808  
14 88     88   40816 use Mouse;
  88         2757235  
  88         406  
15 88     88   78425 use Parallel::parallel_map qw(parallel_map);
  88         1914801  
  88         5793  
16 88     88   47541 use Pod::Usage qw(pod2usage);
  88         3583874  
  88         9370  
17 88     88   30488 use Try::Tiny qw(try catch);
  88         1531  
  88         54776  
18 88     88   94657 use URI::Split qw(uri_split);
  88         50784  
  88         5280  
19              
20             # NOTE this is the version of the *command* rather than the *module*, i.e.
21             # breaking API changes may occur here which aren't reflected in the SemVer since
22             # they don't break the behavior of the command
23             #
24             # XXX this declaration must be on a single line
25             # https://metacpan.org/pod/version#How-to-declare()-a-dotted-decimal-version
26 88     88   668 use version; our $VERSION = version->declare('v2.4.1');
  88         208  
  88         844  
27              
28             # defaults
29             use constant {
30             CACHE => 0,
31             DEFAULT_USER_AGENT => 'Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0',
32             ENV_PROXY => 1,
33             ENV_USER_AGENT => $ENV{WAX_USER_AGENT},
34 88         17836 EXTENSION => qr/.(\.(?:(tar\.(?:bz|bz2|gz|lzo|Z))|(?:[ch]\+\+)|(?:\w+)))$/i,
35             INDEX => '%s.index.txt',
36             MIRROR => 0,
37             NAME => 'wax',
38             SEPARATOR => '--',
39             TEMPLATE => 'XXXXXXXX',
40             TIMEOUT => 60,
41             VERBOSE => 0,
42 88     88   16565 };
  88         280  
43              
44 88     88   1037 use constant USER_AGENT => ENV_USER_AGENT || DEFAULT_USER_AGENT;
  88         177  
  88         6383  
45              
46             # RFC 2616: "If the media type remains unknown, the recipient SHOULD treat
47             # it as type 'application/octet-stream'."
48 88     88   835 use constant DEFAULT_CONTENT_TYPE => 'application/octet-stream';
  88         176  
  88         5761  
49              
50             # resources with these mime-types may have their extension inferred from the
51             # path part of their URI
52 88         6376 use constant INFER_EXTENSION => {
53             'text/plain' => 1,
54             'application/octet-stream' => 1,
55             'binary/octet-stream' => 1,
56 88     88   614 };
  88         175  
57              
58             # errors
59             use constant {
60 88         22201 OK => 0,
61             E_DOWNLOAD => -1,
62             E_INVALID_DIRECTORY => -2,
63             E_INVALID_OPTIONS => -3,
64             E_NO_COMMAND => -4,
65 88     88   559 };
  88         1286  
66              
67             has app_name => (
68             is => 'rw',
69             isa => 'Str',
70             default => NAME,
71             );
72              
73             has cache => (
74             is => 'rw',
75             isa => 'Bool',
76             default => CACHE,
77             trigger => \&_check_keep,
78             );
79              
80             has directory => (
81             is => 'rw',
82             isa => 'Str',
83             predicate => 'has_directory',
84             required => 0,
85             trigger => \&_check_directory,
86             );
87              
88             has keep => (
89             is => 'ro',
90             isa => 'Bool',
91             default => 0,
92             writer => '_set_keep',
93             );
94              
95             has _lwp_user_agent => (
96             is => 'rw',
97             isa => 'LWP::UserAgent',
98             lazy => 1,
99             builder => '_build_lwp_user_agent',
100             );
101              
102             # this should really be a class attribute, but there's no MouseX::ClassAttribute
103             # (on CPAN)
104             has mime_types => (
105             is => 'ro',
106             isa => 'MIME::Types',
107             lazy => 1,
108             default => sub { MIME::Types->new() },
109             );
110              
111             has mirror => (
112             is => 'rw',
113             isa => 'Bool',
114             default => MIRROR,
115             trigger => \&_check_keep,
116             );
117              
118             has separator => (
119             is => 'rw',
120             isa => 'Str',
121             default => SEPARATOR,
122             clearer => 'clear_separator',
123             predicate => 'has_separator',
124             );
125              
126             # TODO make this private and read only, and rename it to something more
127             # descriptive, e.g. tempfile_template
128             has template => (
129             is => 'rw',
130             isa => 'Str',
131             default => method () { sprintf('%s_%s', $self->app_name, TEMPLATE) },
132             lazy => 1,
133             );
134              
135             has timeout => (
136             is => 'rw',
137             isa => 'Int',
138             default => TIMEOUT,
139             trigger => method ($timeout) { $self->_lwp_user_agent->timeout($timeout) },
140             );
141              
142             has user_agent => (
143 88     88   201512 is => 'rw',
  0     0      
  0            
  0            
144 0           isa => 'Str',
145             default => USER_AGENT,
146 0 0         trigger => method ($user_agent) { $self->_lwp_user_agent->agent($user_agent) },
147 0 0         );
148 0            
149 0           has verbose => (
150             is => 'rw',
151             isa => 'Bool',
152             default => VERBOSE,
153             trigger => method ($verbose) { $| = 1 }, # unbuffer output
154             );
155 88     88   49340  
  1     1   3  
  1         2  
156 1         15 # log the path. if the directory doesn't exist, create it if its parent directory
157             # exists; otherwise, raise an error
158             method _check_directory ($dir) {
159             $self->debug("directory: $dir");
160              
161             unless (-d $dir) {
162             unless (mkdir $dir) {
163             $self->log(ERROR => "Can't create directory (%s): %s", $dir, $!);
164             exit E_INVALID_DIRECTORY;
165 88     88   42035 }
  238     238   563  
  238         340  
166 238 50 66     1640 }
167 0         0 }
168 0         0  
169             # lazy constructor for the default LWP::UserAgent instance
170 238         1090 method _build_lwp_user_agent {
171             LWP::UserAgent->new(
172             env_proxy => ENV_PROXY,
173             timeout => $self->timeout,
174             agent => $self->user_agent
175 88     88   46978 )
  0     0   0  
  0         0  
  0         0  
176 0         0 }
177 0         0  
178 0         0 # set `keep` to true if --cache or --mirror are set,
179 0 0       0 # but raise an error if both are set
180             method _check_keep {
181             if ($self->cache && $self->mirror) {
182             $self->log(ERROR => "--cache and --mirror can't be used together");
183             exit E_INVALID_OPTIONS;
184 88     88   47868 } else {
  0     0   0  
  0         0  
  0         0  
185 0         0 $self->_set_keep(1);
186 0         0 }
187 0         0 }
188              
189 0 0       0 # remove temporary files
190             method _unlink ($unlink) {
191 0         0 for my $filename (@$unlink) {
192             chmod 0600, $filename; # borrowed from File::Temp (may be needed on Windows)
193 0 0       0 $self->debug('removing: %s', $filename);
194 0         0 unlink($filename) || $self->log(WARN => "Can't unlink %s: %s", $filename, $!);
195             }
196 0         0 }
197 0         0  
198             # return the URL's content-type or an empty string if the request fails
199             method content_type ($_url) {
200             my ($url, $url_index) = @$_url;
201 0         0 my $response = $self->_lwp_user_agent->head($url);
202             my $content_type = '';
203              
204             if ($response->is_success) {
205             # the initial (pre-semicolon) part of the mime-type, trimmed and lowercased.
206 88     88   52306 $content_type = $response->headers->content_type;
  0     0   0  
  0         0  
  0         0  
207 0         0  
208 0         0 if ($content_type) {
209 0         0 $self->debug('content-type (%d): %s', $url_index, $content_type);
210             } else {
211 0 0 0     0 $content_type = DEFAULT_CONTENT_TYPE;
    0          
212 0         0 $self->debug('content-type (%d): %s (default)', $url_index, $content_type);
213             }
214 0         0 }
215              
216 0 0       0 return $content_type;
    0          
217 0         0 }
218              
219 0         0 # save the URL to a local filename; returns an error message if an error occurred,
220             # or a falsey value otherwise
221             method download ($_url, $filename) {
222 0         0 my ($url, $url_index) = @$_url;
223             my $ua = $self->_lwp_user_agent;
224 0 0       0 my ($downloaded, $error, $response);
225 0         0  
226             if ($self->cache && (-e $filename)) {
227             $downloaded = 0;
228             } elsif ($self->mirror) {
229 0 0       0 $response = $ua->mirror($url, $filename);
230 0 0       0  
231             if ($response->is_success) {
232 0         0 $downloaded = 1;
233 0         0 } elsif ($response->code == 304) {
234             $downloaded = 0;
235             }
236 0         0 } else {
237             $response = $ua->get($url, ':content_file' => $filename);
238              
239             if ($response->is_success) {
240 88     88   60484 $downloaded = 1;
  0     0   0  
  0         0  
241             }
242             }
243 0         0  
244 0         0 if (defined $downloaded) {
245 0         0 $self->debug('download (%d): %s', $url_index, ($downloaded ? 'yes' : 'no'));
246             } else {
247 0         0 my $status = $response->status_line;
248             $error = "can't download URL #$url_index ($url) to filename ($filename): $status";
249             }
250 88     88   49839  
  6     6   13  
  6         10  
251             return $error;
252 6         1000 }
253 6         2773  
254             # helper for `dump_command`: escape/quote a shell argument on POSIX shells
255             func _escape ($arg) {
256             # https://stackoverflow.com/a/1250279
257 88     88   44002 # https://github.com/boazy/any-shell-escape/issues/1#issuecomment-36226734
  0     0   0  
  0         0  
258 0         0 $arg =~ s!('{1,})!'"$1"'!g;
259 0         0 $arg = "'$arg'";
260             $arg =~ s{^''|''$}{}g;
261              
262             return $arg;
263 88     88   43022 }
  0     0   0  
  0         0  
  0         0  
264 0         0  
265 0 0       0 method _use_default_directory () {
266 0         0 # "${XDG_CACHE_HOME:-$HOME/.cache}/wax"
267             require File::BaseDir;
268             $self->directory(File::BaseDir::cache_home($self->app_name));
269             }
270              
271             # print the version and exit
272 88     88   47964 method _dump_version () {
  0     0   0  
  0         0  
  0         0  
273 0         0 print $VERSION, $/;
274 0         0 exit 0;
275 0         0 }
276              
277 0 0       0 # log a message to stderr with the app's name and message's log level
278             method log ($level, $template, @args) {
279 0         0 my $name = $self->app_name;
280 0         0 my $message = @args ? sprintf($template, @args) : $template;
281             warn "$name: $level: $message", $/;
282 0 0       0 }
283              
284 0 0       0 # return a best-effort guess at the URL's file extension based on its content
285             # type, e.g. ".md" or ".tar.gz", or an empty string if one can't be determined.
286 0 0 0     0 # XXX note: makes a network request to determine the content type
      0        
287 0         0 method extension ($_url) {
288             my ($url, $url_index) = @$_url;
289             my $extension = '';
290             my $split = $self->is_url($url);
291 0 0       0  
292 0         0 return $extension unless ($split);
293 0         0  
294             my ($scheme, $domain, $path, $query, $fragment) = @$split;
295 0 0       0 my $content_type = $self->content_type($_url);
296 0         0  
297             return $extension unless ($content_type); # won't be defined if the URL is invalid
298              
299             if (INFER_EXTENSION->{$content_type}) {
300 0         0 # try to get a more specific extension from the path
301             if (not(defined $query) && $path && ($path =~ EXTENSION)) {
302 0         0 $extension = $+;
303             }
304             }
305              
306             unless ($extension) {
307 88     88   61983 my $mime_type = $self->mime_types->type($content_type);
  1253     1253   1880  
  1253         2118  
  1253         1603  
308 1253 100       5991 my @extensions = $mime_type->extensions;
309 393         2125  
310             if (@extensions) {
311 393 50 33     5642 $extension = '.' . $extensions[0];
      33        
312 393         1733 }
313             }
314              
315             $self->debug('extension (%d): %s', $url_index, $extension);
316              
317             return $extension;
318 88     88   54172 }
  1062     1062   1607  
  1062         2440  
  1062         1434  
319 1062 50       3339  
320 0         0 # return a truthy value (an arrayref containing the URL's components)
321 0 0       0 # if the supplied value can be parsed as a URL, or a falsey value otherwise
322 0         0 method is_url ($url) {
323             if ($url =~ m{^[a-zA-Z][\w+]*://}) { # basic sanity check
324             my ($scheme, $domain, $path, $query, $fragment) = uri_split($url);
325              
326             if ($scheme && ($domain || $path)) { # no domain for file:// URLs
327             return [$scheme, $domain, $path, $query, $fragment];
328             }
329             }
330             }
331 88     88   47314  
  233     233   473  
  233         554  
  233         309  
332 233         692 # log a message to stderr if logging is enabled
333             method debug ($template, @args) {
334 233         744 if ($self->verbose) {
335             my $name = $self->app_name;
336 233 100       846 my $message = @args ? sprintf($template, @args) : $template;
337 23         52 warn "$name: $message", $/;
338             }
339             }
340 233 50       620  
341 0         0 # perform housekeeping after a download: replace the placeholder with the file
342 0         0 # path; push the path onto the delete list if it's a temporary file; and log any
343             # errors
344 233         776 #
345             # XXX give this a more descriptive name, e.g. _handle_download or _after_download
346             method _handle ($resolved, $command, $unlink) {
347             my ($command_index, $filename, $error) = @$resolved;
348              
349             $command->[$command_index] = $filename;
350              
351             unless ($self->keep) {
352             push @$unlink, $filename;
353 88     88   52199 }
  582     582   406466  
  582         1070  
  582         811  
354 582 50       1197  
  2839         9594  
355             if ($error) {
356             $self->log(ERROR => $error);
357             return E_DOWNLOAD;
358             } else {
359             return OK;
360             }
361 88     88   49401 }
362              
363             # this is purely for diagnostic purposes, i.e. there's no guarantee
364             # that the dumped command can be used as a command line. a better
365             # (but still imperfect/incomplete) implementation would require at
366             # least two extra modules: Win32::ShellQuote and String::ShellQuote:
367             # https://rt.cpan.org/Public/Bug/Display.html?id=37348
368             method dump_command ($args) {
369             return join(' ', map { /[^0-9A-Za-z+,.\/:=\@_-]/ ? _escape($_) : $_ } @$args);
370             }
371              
372             # takes a URL and returns a $filename => $error pair where
373             # the filename is the path to the saved file and the error
374             # is the first error message encountered while trying to download
375             # and save it
376             method resolve ($_url) {
377             my ($error, $filename, @resolved);
378              
379             if ($self->keep) {
380 88     88   53402 ($filename, $error) = $self->resolve_keep($_url);
  0     0   0  
  0         0  
  0         0  
381 0         0 } else {
382 0 0       0 ($filename, $error) = $self->resolve_temp($_url);
383 0         0 }
384 0         0  
385 0         0 $error ||= $self->download($_url, $filename);
386             @resolved = ($filename, $error);
387              
388             return wantarray ? @resolved : \@resolved;
389 0 0       0 }
390 0         0  
391             # takes a URL and returns a $filename => $error pair for cacheable files.
392             # in order to calculate the filename, we need to determine the URL's extension,
393 0     0   0 # which requires a network request for the content type. to avoid hitting the
394             # network for subsequent requests, we cache the extension in an index file.
395 0     0   0 method resolve_keep ($_url) {
396 0         0 my ($url, $url_index) = @$_url;
397             my $directory = $self->has_directory ? $self->directory : File::Spec->tmpdir;
398 0         0 my $id = sprintf('%s_%s', $self->app_name, sha1_hex($url));
399 0         0 my $index_file = File::Spec->catfile($directory, sprintf(INDEX, $id));
400             my ($error, $extension);
401              
402 0     0   0 # -s: if /tmp is full, the index file may get written as an empty file, so
403             # make sure it's non-empty
404 0     0   0 if (-s $index_file) {
405 0         0 $self->debug('index (%d): %s (exists)', $url_index, $index_file);
406              
407             try {
408 0         0 $extension = read_text($index_file);
409             } catch {
410 0         0 $error = "unable to load index #$url_index ($index_file): $_";
411             };
412             } else {
413             $self->debug('index (%d): %s (create)', $url_index, $index_file);
414             $extension = $self->extension($_url);
415 88     88   73973  
  0     0   0  
  0         0  
  0         0  
416 0         0 try {
417 0         0 write_text($index_file, $extension);
418             } catch {
419 0 0       0 $error = "unable to save index #$url_index ($index_file): $_";
420 0         0 };
421             }
422 0         0  
423             my $filename = File::Spec->catfile($directory, "$id$extension");
424              
425 0 0       0 return ($filename, $error);
426 0         0 }
427              
428             # takes a URL and returns a $filename => $error pair for
429 0         0 # temporary files (i.e. files which will be automatically unlinked)
430             method resolve_temp ($_url) {
431             my $extension = $self->extension($_url);
432 0     0   0 my %options = (TEMPLATE => $self->template, UNLINK => 0);
433 0         0  
434             if ($self->has_directory) {
435 0     0   0 $options{DIR} = $self->directory;
436 0         0 } else {
437             $options{TMPDIR} = 1;
438 0         0 }
439              
440             if ($extension) {
441             $options{SUFFIX} = $extension;
442             }
443              
444             my ($filename, $error);
445              
446 88     88   57368 try {
  291     291   859  
  291         584  
  291         438  
447 291         1038 srand($$); # see the File::Temp docs
448             $filename = File::Temp->new(%options)->filename;
449             } catch {
450 122     122   115536 $error = $_;
451 0     0   0 };
452 6     6   1349  
453 0     0   0 return ($filename, $error);
454 116     116   107843 }
455 12     12   9683  
456 2     2   1634 # parse the supplied arrayref of options and return a pair of:
457 0     0   0 #
458 0     0   0 # command: an arrayref containing the command to execute and its arguments
459 0     0   0 # resolve: an arrayref of [index, URL] pairs, where index refers to the URL's
460 0     0   0 # (0-based) index in the commmand array
461 291         8371 method _parse ($argv) {
462             my @argv = @$argv; # don't mutate the original
463 291 50       47771  
464 0         0 my $parsed = GetOptionsFromArray(\@argv,
465             'c|cache' => sub { $self->cache(1) },
466             'd|dir|directory=s' => sub { $self->directory($_[1]) },
467             'D|default-directory' => sub { $self->_use_default_directory },
468             'h|?|help' => sub { pod2usage(-input => $0, -verbose => 2, -exitval => 0) },
469             'm|mirror' => sub { $self->mirror(1) },
470             's|separator=s' => sub { $self->separator($_[1]) },
471 291         1296 'S|no-separator' => sub { $self->clear_separator() },
472 291         540 't|timeout=i' => sub { $self->timeout($_[1]) },
473             'u|user-agent=s' => sub { $self->user_agent($_[1]) },
474 291         841 'v|verbose' => sub { $self->verbose(1) },
475 1266         2064 'V|version' => sub { $self->_dump_version },
476             );
477 1266 100 100     6522  
    100          
478 13         30 unless ($parsed) {
479 13         19 pod2usage(
480             -exitval => E_INVALID_OPTIONS,
481 393 100       977 -input => $0,
482 276         2142 -verbose => 0,
483 276         1478 );
484 276         997 }
485 276         2066  
486             my (@command, @resolve);
487             my $seen_url = 0;
488 393         866  
489 393         820 while (@argv) {
490             my $arg = shift(@argv);
491 393         975  
492             if ($self->has_separator && ($arg eq $self->separator)) {
493 393         816 push @command, @argv;
494 393         1708 last;
495             } elsif ($self->is_url($arg)) {
496 860         2610 unless ($seen_url) {
497             my $source = ENV_USER_AGENT ? ' (env)' : '';
498             $self->debug('user-agent%s: %s', $source, $self->user_agent);
499             $self->debug('timeout: %d', $self->timeout);
500 291 50       750 $seen_url = 1;
501 0         0 }
502              
503             my $url_index = @resolve + 1; # 1-based
504             my $_url = [$arg, $url_index];
505              
506             $self->debug('url (%d): %s', $url_index, $arg);
507              
508             push @command, $arg;
509 291         1051 push @resolve, [$#command, $_url];
510             } else {
511             push @command, $arg;
512             }
513 88     88   101404 }
  291     291   4218  
  291         920  
  291         432  
514 291         586  
515 291         472 unless (@command) {
516 291         603 pod2usage(
517 291         954 -exitval => E_NO_COMMAND,
518             -input => $0,
519 291 100       1168 -msg => 'no command supplied',
    100          
520 159         213 -verbose => 0,
  159         437  
521 159         628 )
522             }
523 159         3792  
524             return \@command, \@resolve;
525 117         880 }
526              
527             # process the options and execute the command with substituted filenames
528 10     10   294938 method run ($argv, %options) {
529 10         714 my $test = $options{test};
530 117         1254 my $error = 0;
531             my $unlink = [];
532 37         1183406 my ($command, $resolve) = $self->_parse($argv);
533 74   33     2523  
534             if (@$resolve == 1) {
535             my ($command_index, $_url) = @{ $resolve->[0] };
536             my @resolved = $self->resolve($_url);
537 211 50       826  
    50          
538 0         0 $error = $self->_handle([$command_index, @resolved], $command, $unlink);
539 0         0 } elsif (@$resolve) {
540 0         0 $self->debug('jobs: %d', scalar(@$resolve));
541              
542 211         2495 my @resolved = parallel_map {
543             my ($command_index, $_url) = @$_;
544 0           [$command_index, $self->resolve($_url)]
545             } @$resolve;
546              
547             for my $resolved (@resolved) {
548             $error ||= $self->_handle($resolved, $command, $unlink);
549 88     88   31669 }
  88         199  
  88         16985  
550 0     0     }
  0            
551 0            
552             if ($error) {
553 0     0     $self->debug('exit code: %d', $error);
554 0           $self->_unlink($unlink);
555 0           return $error;
556             } elsif ($test) {
557 0           return $command;
558 0           } else {
559             $self->debug('command: %s', $self->dump_command($command));
560 0            
561             try {
562             # XXX hack to remove the " in /path/to/App/Wax.pm line "
563             # noise. we just want the error message
564             no warnings qw(redefine);
565             local *IPC::System::Simple::croak = sub { die @_, $/ };
566             systemx(EXIT_ANY, @$command);
567             } catch {
568             chomp;
569             $self->log(ERROR => $_);
570             };
571              
572             $self->debug('exit code: %d', $EXITVAL);
573             $self->_unlink($unlink);
574              
575             return $EXITVAL;
576             }
577             }
578              
579             __PACKAGE__->meta->make_immutable();
580              
581             1;