File Coverage

lib/File/Fetch.pm
Criterion Covered Total %
statement 309 535 57.7
branch 87 254 34.2
condition 25 78 32.0
subroutine 41 48 85.4
pod 4 4 100.0
total 466 919 50.7


line stmt bran cond sub pod time code
1             package File::Fetch;
2              
3 2     2   611320 use strict;
  2         4  
  2         110  
4 2     2   12 use warnings;
  2         3  
  2         191  
5 2     2   1036 use FileHandle;
  2         22200  
  2         12  
6 2     2   2820 use File::Temp;
  2         22774  
  2         228  
7 2     2   1036 use File::Copy;
  2         6903  
  2         159  
8 2     2   15 use File::Spec;
  2         3  
  2         43  
9 2     2   7 use File::Spec::Unix;
  2         3  
  2         55  
10 2     2   9 use File::Basename qw[dirname];
  2         6  
  2         107  
11              
12 2     2   10 use Cwd qw[cwd];
  2         4  
  2         99  
13 2     2   14 use Carp qw[carp];
  2         33  
  2         125  
14 2     2   1565 use IPC::Cmd qw[can_run run QUOTE];
  2         111196  
  2         199  
15 2     2   21 use File::Path qw[mkpath];
  2         3  
  2         102  
16 2     2   8 use File::Temp qw[tempdir];
  2         3  
  2         128  
17 2     2   10 use Params::Check qw[check];
  2         4  
  2         108  
18 2     2   16 use Module::Load::Conditional qw[can_load];
  2         9  
  2         85  
19 2     2   9 use Locale::Maketext::Simple Style => 'gettext';
  2         3  
  2         11  
20              
21 2         883 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
22             $BLACKLIST $METHOD_FAIL $VERSION $METHODS
23             $FTP_PASSIVE $TIMEOUT $DEBUG $WARN $FORCEIPV4
24 2     2   609 ];
  2         3  
25              
26             $VERSION = '1.08';
27             $VERSION = eval $VERSION; # avoid warnings with development releases
28             $PREFER_BIN = 0; # XXX TODO implement
29             $FROM_EMAIL = 'File-Fetch@example.com';
30             $USER_AGENT = "File::Fetch/$VERSION";
31             $BLACKLIST = [qw|ftp|];
32             push @$BLACKLIST, qw|lftp| if $^O eq 'dragonfly' || $^O eq 'hpux';
33             $METHOD_FAIL = { };
34             $FTP_PASSIVE = 1;
35             $TIMEOUT = 0;
36             $DEBUG = 0;
37             $WARN = 1;
38             $FORCEIPV4 = 0;
39              
40             ### methods available to fetch the file depending on the scheme
41             $METHODS = {
42             http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
43             https => [ qw|lwp httptiny wget curl| ],
44             ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
45             file => [ qw|lwp lftp file| ],
46             rsync => [ qw|rsync| ],
47             git => [ qw|git| ],
48             };
49              
50             ### silly warnings ###
51             local $Params::Check::VERBOSE = 1;
52             local $Params::Check::VERBOSE = 1;
53             local $Module::Load::Conditional::VERBOSE = 0;
54             local $Module::Load::Conditional::VERBOSE = 0;
55              
56             ### see what OS we are on, important for file:// uris ###
57 2     2   13 use constant ON_WIN => ($^O eq 'MSWin32');
  2         4  
  2         248  
58 2     2   15 use constant ON_VMS => ($^O eq 'VMS');
  2         4  
  2         179  
59 2     2   11 use constant ON_UNIX => (!ON_WIN);
  2         5  
  2         158  
60 2     2   11 use constant HAS_VOL => (ON_WIN);
  2         3  
  2         106  
61 2     2   8 use constant HAS_SHARE => (ON_WIN);
  2         2  
  2         242  
62 2     2   11 use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly|midnightbsd)$! );
  2         3  
  2         341  
63              
64             =pod
65              
66             =head1 NAME
67              
68             File::Fetch - A generic file fetching mechanism
69              
70             =head1 SYNOPSIS
71              
72             use File::Fetch;
73              
74             ### build a File::Fetch object ###
75             my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
76              
77             ### fetch the uri to cwd() ###
78             my $where = $ff->fetch() or die $ff->error;
79              
80             ### fetch the uri to /tmp ###
81             my $where = $ff->fetch( to => '/tmp' );
82              
83             ### parsed bits from the uri ###
84             $ff->uri;
85             $ff->scheme;
86             $ff->host;
87             $ff->path;
88             $ff->file;
89              
90             =head1 DESCRIPTION
91              
92             File::Fetch is a generic file fetching mechanism.
93              
94             It allows you to fetch any file pointed to by a C, C,
95             C, C or C uri by a number of different means.
96              
97             See the C section further down for details.
98              
99             =head1 ACCESSORS
100              
101             A C object has the following accessors
102              
103             =over 4
104              
105             =item $ff->uri
106              
107             The uri you passed to the constructor
108              
109             =item $ff->scheme
110              
111             The scheme from the uri (like 'file', 'http', etc)
112              
113             =item $ff->host
114              
115             The hostname in the uri. Will be empty if host was originally
116             'localhost' for a 'file://' url.
117              
118             =item $ff->vol
119              
120             On operating systems with the concept of a volume the second element
121             of a file:// is considered to the be volume specification for the file.
122             Thus on Win32 this routine returns the volume, on other operating
123             systems this returns nothing.
124              
125             On Windows this value may be empty if the uri is to a network share, in
126             which case the 'share' property will be defined. Additionally, volume
127             specifications that use '|' as ':' will be converted on read to use ':'.
128              
129             On VMS, which has a volume concept, this field will be empty because VMS
130             file specifications are converted to absolute UNIX format and the volume
131             information is transparently included.
132              
133             =item $ff->share
134              
135             On systems with the concept of a network share (currently only Windows) returns
136             the sharename from a file://// url. On other operating systems returns empty.
137              
138             =item $ff->path
139              
140             The path from the uri, will be at least a single '/'.
141              
142             =item $ff->file
143              
144             The name of the remote file. For the local file name, the
145             result of $ff->output_file will be used.
146              
147             =item $ff->file_default
148              
149             The name of the default local file, that $ff->output_file falls back to if
150             it would otherwise return no filename. For example when fetching a URI like
151             http://www.abc.net.au/ the contents retrieved may be from a remote file called
152             'index.html'. The default value of this attribute is literally 'file_default'.
153              
154             =cut
155              
156              
157             ##########################
158             ### Object & Accessors ###
159             ##########################
160              
161             {
162             ### template for autogenerated accessors ###
163             my $Tmpl = {
164             scheme => { default => 'http' },
165             host => { default => 'localhost' },
166             path => { default => '/' },
167             file => { required => 1 },
168             uri => { required => 1 },
169             userinfo => { default => '' },
170             vol => { default => '' }, # windows for file:// uris
171             share => { default => '' }, # windows for file:// uris
172             file_default => { default => 'file_default' },
173             tempdir_root => { required => 1 }, # Should be lazy-set at ->new()
174             _error_msg => { no_override => 1 },
175             _error_msg_long => { no_override => 1 },
176             };
177              
178             for my $method ( keys %$Tmpl ) {
179 2     2   12 no strict 'refs';
  2         3  
  2         14773  
180             *$method = sub {
181 617     617   56680 my $self = shift;
182 617 100       2081 $self->{$method} = $_[0] if @_;
183 617         9860 return $self->{$method};
184             }
185             }
186              
187             sub _create {
188 49     49   251 my $class = shift;
189 49         1246 my %hash = @_;
190              
191 49 50       1202 my $args = check( $Tmpl, \%hash ) or return;
192              
193 49         22325 bless $args, $class;
194              
195 49 50 66     806 if( lc($args->scheme) ne 'file' and not $args->host ) {
196 0         0 return $class->_error(loc(
197             "Hostname required when fetching from '%1'",$args->scheme));
198             }
199              
200 49         150 for (qw[path]) {
201 49 50       319 unless( $args->$_() ) { # 5.5.x needs the ()
202 0         0 return $class->_error(loc("No '%1' specified",$_));
203             }
204             }
205              
206 49         430 return $args;
207             }
208             }
209              
210             =item $ff->output_file
211              
212             The name of the output file. This is the same as $ff->file,
213             but any query parameters are stripped off. For example:
214              
215             http://example.com/index.html?x=y
216              
217             would make the output file be C rather than
218             C.
219              
220             =back
221              
222             =cut
223              
224             sub output_file {
225 106     106 1 64069 my $self = shift;
226 106         992 my $file = $self->file;
227              
228 106         1314 $file =~ s/\?.*$//g;
229              
230 106   66     699 $file ||= $self->file_default;
231              
232 106         2495 return $file;
233             }
234              
235             ### XXX do this or just point to URI::Escape?
236             # =head2 $esc_uri = $ff->escaped_uri
237             #
238             # =cut
239             #
240             # ### most of this is stolen straight from URI::escape
241             # { ### Build a char->hex map
242             # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
243             #
244             # sub escaped_uri {
245             # my $self = shift;
246             # my $uri = $self->uri;
247             #
248             # ### Default unsafe characters. RFC 2732 ^(uric - reserved)
249             # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
250             # $escapes{$1} || $self->_fail_hi($1)/ge;
251             #
252             # return $uri;
253             # }
254             #
255             # sub _fail_hi {
256             # my $self = shift;
257             # my $char = shift;
258             #
259             # $self->_error(loc(
260             # "Can't escape '%1', try using the '%2' module instead",
261             # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
262             # ));
263             # }
264             #
265             # sub output_file {
266             #
267             # }
268             #
269             #
270             # }
271              
272             =head1 METHODS
273              
274             =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
275              
276             Parses the uri and creates a corresponding File::Fetch::Item object,
277             that is ready to be Ced and returns it.
278              
279             Returns false on failure.
280              
281             =cut
282              
283             sub new {
284 49     49 1 201422 my $class = shift;
285 49         435 my %hash = @_;
286              
287 49         323 my ($uri, $file_default, $tempdir_root);
288 49         588 my $tmpl = {
289             uri => { required => 1, store => \$uri },
290             file_default => { required => 0, store => \$file_default },
291             tempdir_root => { required => 0, store => \$tempdir_root },
292             };
293              
294 49 50       445 check( $tmpl, \%hash ) or return;
295              
296             ### parse the uri to usable parts ###
297 49 50       8135 my $href = $class->_parse_uri( $uri ) or return;
298              
299 49 50       221 $href->{file_default} = $file_default if $file_default;
300 49 50       164 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
301 49 50       518281 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root};
302              
303             ### make it into a FFI object ###
304 49 50       2053 my $ff = $class->_create( %$href ) or return;
305              
306              
307             ### return the object ###
308 49         1443 return $ff;
309             }
310              
311             ### parses an uri to a hash structure:
312             ###
313             ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
314             ###
315             ### becomes:
316             ###
317             ### $href = {
318             ### scheme => 'ftp',
319             ### host => 'ftp.cpan.org',
320             ### path => '/pub/mirror',
321             ### file => 'index.html'
322             ### };
323             ###
324             ### In the case of file:// urls there maybe be additional fields
325             ###
326             ### For systems with volume specifications such as Win32 there will be
327             ### a volume specifier provided in the 'vol' field.
328             ###
329             ### 'vol' => 'volumename'
330             ###
331             ### For windows file shares there may be a 'share' key specified
332             ###
333             ### 'share' => 'sharename'
334             ###
335             ### Note that the rules of what a file:// url means vary by the operating system
336             ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
337             ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
338             ### not '/foo/bar.txt'
339             ###
340             ### Similarly if the host interpreting the url is VMS then
341             ### file:///disk$user/my/notes/note12345.txt' means
342             ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
343             ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
344             ### Except for some cases in the File::Spec methods, Perl on VMS will generally
345             ### handle UNIX format file specifications.
346             ###
347             ### This means it is impossible to serve certain file:// urls on certain systems.
348             ###
349             ### Thus are the problems with a protocol-less specification. :-(
350             ###
351              
352             sub _parse_uri {
353 57     57   3039495 my $self = shift;
354 57 50       376 my $uri = shift or return;
355              
356 57         359 my $href = { uri => $uri };
357              
358             ### find the scheme ###
359 57         841 $uri =~ s|^(\w+)://||;
360 57         439 $href->{scheme} = $1;
361              
362             ### See rfc 1738 section 3.10
363             ### https://datatracker.ietf.org/doc/html/rfc1738#section-3.10
364             ### And wikipedia for more on windows file:// urls
365             ### http://en.wikipedia.org/wiki/File://
366 57 100 66     508 if( $href->{scheme} eq 'file' ) {
    100          
367              
368 9         72 my @parts = split '/',$uri;
369              
370             ### file://hostname/...
371             ### file://hostname/...
372             ### normalize file://localhost with file:///
373 9   100     70 $href->{host} = $parts[0] || '';
374              
375             ### index in @parts where the path components begin;
376 9         25 my $index = 1;
377              
378             ### file:////hostname/sharename/blah.txt
379 9         13 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
380              
381             $href->{host} = $parts[2] || ''; # avoid warnings
382             $href->{share} = $parts[3] || ''; # avoid warnings
383              
384             $index = 4 # index after the share
385              
386             ### file:///D|/blah.txt
387             ### file:///D:/blah.txt
388 0         0 } elsif (HAS_VOL) {
389              
390             ### this code comes from dmq's patch, but:
391             ### XXX if volume is empty, wouldn't that be an error? --kane
392             ### if so, our file://localhost test needs to be fixed as wel
393             $href->{vol} = $parts[1] || '';
394              
395             ### correct D| style colume descriptors
396             $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
397              
398             $index = 2; # index after the volume
399             }
400              
401             ### rebuild the path from the leftover parts;
402 9         66 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
403              
404             } elsif ( $href->{scheme} eq 'http' || $href->{scheme} eq 'https' ) {
405             ### using anything but qw() in hash slices may produce warnings
406             ### in older perls :-(
407 42         569 @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)?$|s;
  42         375  
408 42 100       237 $href->{path} = '/' unless defined $href->{path};
409             } else {
410 6         57 @{$href}{ qw(userinfo host path) } = $uri =~ m|(?:([^\@:]*:[^\:\@]*)@)?([^/]*)(/.*)$|s;
  6         25  
411             }
412              
413             ### split the path into file + dir ###
414 57         106 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
  57         1921  
415 57         1059 $href->{path} = $parts[1];
416 57         395 $href->{file} = $parts[2];
417             }
418              
419             ### host will be empty if the target was 'localhost' and the
420             ### scheme was 'file'
421             $href->{host} = '' if ($href->{host} eq 'localhost') and
422 57 100 100     301 ($href->{scheme} eq 'file');
423              
424 57         1080 return $href;
425             }
426              
427             =head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
428              
429             Fetches the file you requested and returns the full path to the file.
430              
431             By default it writes to C, but you can override that by specifying
432             the C argument:
433              
434             ### file fetch to /tmp, full path to the file in $where
435             $where = $ff->fetch( to => '/tmp' );
436              
437             ### file slurped into $scalar, full path to the file in $where
438             ### file is downloaded to a temp directory and cleaned up at exit time
439             $where = $ff->fetch( to => \$scalar );
440              
441             Returns the full path to the downloaded file on success, and false
442             on failure.
443              
444             =cut
445              
446             sub fetch {
447 78 50   78 1 2441 my $self = shift or return;
448 78         741 my %hash = @_;
449              
450 78         450 my $target;
451 78         725811 my $tmpl = {
452             to => { default => cwd(), store => \$target },
453             };
454              
455 78 50       3156 check( $tmpl, \%hash ) or return;
456              
457 78         15463 my ($to, $fh);
458             ### you want us to slurp the contents
459 78 100 66     1433 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
460 39         779 $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
461              
462             ### plain old fetch
463             } else {
464 39         489 $to = $target;
465              
466             ### On VMS force to VMS format so File::Spec will work.
467 39         77 $to = VMS::Filespec::vmspath($to) if ON_VMS;
468              
469             ### create the path if it doesn't exist yet ###
470 39 100       1499 unless( -d $to ) {
471 1         8 eval { mkpath( $to ) };
  1         448  
472              
473 1 50       20 return $self->_error(loc("Could not create path '%1'",$to)) if $@;
474             }
475             }
476              
477             ### set passive ftp if required ###
478 78         39155 local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
479              
480             ### we dont use catfile on win32 because if we are using a cygwin tool
481             ### under cmd.exe they wont understand windows style separators.
482 78         1337 my $out_to = ON_WIN ? $to.'/'.$self->output_file
483             : File::Spec->catfile( $to, $self->output_file );
484              
485 78         269 for my $method ( @{ $METHODS->{$self->scheme} } ) {
  78         610  
486 78         696 my $sub = '_'.$method.'_fetch';
487              
488 78 50       1258 unless( __PACKAGE__->can($sub) ) {
489 0         0 $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
490             $method));
491 0         0 next;
492             }
493              
494             ### method is blacklisted ###
495 78 50       499 next if grep { lc $_ eq $method } @$BLACKLIST;
  78         736  
496              
497             ### method is known to fail ###
498             next if ref $METHOD_FAIL->{$method}
499             ? $METHOD_FAIL->{$method}{$self->scheme}
500 78 50       665 : $METHOD_FAIL->{$method};
    100          
501              
502             ### there's serious issues with IPC::Run and quoting of command
503             ### line arguments. using quotes in the wrong place breaks things,
504             ### and in the case of say,
505             ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
506             ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
507             ### it doesn't matter how you quote, it always fails.
508 41         648 local $IPC::Cmd::USE_IPC_RUN = 0;
509              
510 41 100       392 if( my $file = $self->$sub(
511             to => $out_to
512             )){
513              
514 28 50 33     1342 unless( -e $file && -s _ ) {
515 0         0 $self->_error(loc("'%1' said it fetched '%2', ".
516             "but it was not created",$method,$file));
517              
518             ### mark the failure ###
519 0         0 $METHOD_FAIL->{$method} = 1;
520              
521 0         0 next;
522              
523             } else {
524              
525             ### slurp mode?
526 28 100 66     281 if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
527              
528             ### open the file
529 14 50       892 open my $fh, "<$file" or do {
530 0         0 $self->_error(
531             loc("Could not open '%1': %2", $file, $!));
532 0         0 return;
533             };
534              
535             ### slurp
536 14         41 $$target = do { local $/; <$fh> };
  14         117  
  14         1007  
537              
538             }
539              
540 28         1912 my $abs = File::Spec->rel2abs( $file );
541 28         9178 return $abs;
542              
543             }
544             }
545             }
546              
547              
548             ### if we got here, we looped over all methods, but we weren't able
549             ### to fetch it.
550 50         1304 return;
551             }
552              
553             ########################
554             ### _*_fetch methods ###
555             ########################
556              
557             ### LWP fetching ###
558             sub _lwp_fetch {
559 10     10   26 my $self = shift;
560 10         104 my %hash = @_;
561              
562 10         90 my ($to);
563 10         61 my $tmpl = {
564             to => { required => 1, store => \$to }
565             };
566 10 50       48 check( $tmpl, \%hash ) or return;
567              
568             ### modules required to download with lwp ###
569 10         1329 my $use_list = {
570             LWP => '0.0',
571             'LWP::UserAgent' => '0.0',
572             'HTTP::Request' => '0.0',
573             'HTTP::Status' => '0.0',
574             URI => '0.0',
575              
576             };
577              
578             ### Fix CVE-2016-1238 ###
579 10         72 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
580 10 50       189 unless( can_load( modules => $use_list ) ) {
581 0         0 $METHOD_FAIL->{'lwp'} = 1;
582 0         0 return;
583             }
584              
585 10 50       169608 if ($self->scheme eq 'https') {
586 0         0 my $https_use_list = {
587             'LWP::Protocol::https' => '0.0',
588             };
589              
590 0 0       0 unless ( can_load(modules => $https_use_list) ) {
591 0         0 $METHOD_FAIL->{'lwp'} = { 'https' => 1 };
592 0         0 return;
593             }
594             }
595              
596             ### setup the uri object
597 10         77 my $uri = URI->new( File::Spec::Unix->catfile(
598             $self->path, $self->file
599             ) );
600              
601             ### special rules apply for file:// uris ###
602 10         9528 $uri->scheme( $self->scheme );
603 10 100       9771 $uri->host( $self->scheme eq 'file' ? '' : $self->host );
604              
605 10 50       2804 if ($self->userinfo) {
    100          
606 0         0 $uri->userinfo($self->userinfo);
607             } elsif ($self->scheme ne 'file') {
608 8         104 $uri->userinfo("anonymous:$FROM_EMAIL");
609             }
610              
611             ### set up the useragent object
612 10         1682 my $ua = LWP::UserAgent->new();
613 10 50       9195 $ua->timeout( $TIMEOUT ) if $TIMEOUT;
614 10         320 $ua->agent( $USER_AGENT );
615 10         1060 $ua->from( $FROM_EMAIL );
616 10         862 $ua->env_proxy;
617              
618 10 50       10552 my $res = $ua->mirror($uri, $to) or return;
619              
620             ### uptodate or fetched ok ###
621 10 50 33     5810226 if ( $res->code == 304 or $res->code == 200 ) {
622 10         1006 return $to;
623              
624             } else {
625 0         0 return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
626             $res->code, HTTP::Status::status_message($res->code),
627             $res->status_line));
628             }
629              
630             }
631              
632             ### HTTP::Tiny fetching ###
633             sub _httptiny_fetch {
634 8     8   51 my $self = shift;
635 8         113 my %hash = @_;
636              
637 8         68 my ($to);
638 8         108 my $tmpl = {
639             to => { required => 1, store => \$to }
640             };
641 8 50       79 check( $tmpl, \%hash ) or return;
642              
643 8         840 my $use_list = {
644             'HTTP::Tiny' => '0.008',
645              
646             };
647              
648             ### Fix CVE-2016-1238 ###
649 8         61 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
650 8 50       169 unless( can_load(modules => $use_list) ) {
651 0         0 $METHOD_FAIL->{'httptiny'} = 1;
652 0         0 return;
653             }
654 8 50 33     72124 if ( $self->scheme eq 'https' && !HTTP::Tiny->can_ssl ) {
655 0         0 $METHOD_FAIL->{'httptiny'} = 1;
656 0         0 return;
657             }
658              
659 8         61 my $uri = $self->uri;
660              
661 8 50       270 my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
662              
663 8         1661 my $rc = $http->mirror( $uri, $to );
664              
665 8 50       2528392 unless ( $rc->{success} ) {
666              
667             return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
668 0         0 $rc->{status}, $rc->{reason} ) );
669              
670             }
671              
672 8         1220 return $to;
673              
674             }
675              
676             ### HTTP::Lite fetching ###
677             sub _httplite_fetch {
678 1     1   10 my $self = shift;
679 1         13 my %hash = @_;
680              
681 1         5 my ($to);
682 1         10 my $tmpl = {
683             to => { required => 1, store => \$to }
684             };
685 1 50       17 check( $tmpl, \%hash ) or return;
686              
687             ### modules required to download with lwp ###
688 1         119 my $use_list = {
689             'HTTP::Lite' => '2.2',
690             'MIME::Base64' => '0',
691             };
692              
693             ### Fix CVE-2016-1238 ###
694 1         13 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
695 1 50       26 unless( can_load(modules => $use_list) ) {
696 1         575 $METHOD_FAIL->{'httplite'} = 1;
697 1         10 return;
698             }
699              
700 0         0 my $uri = $self->uri;
701 0         0 my $retries = 0;
702              
703 0         0 RETRIES: while ( $retries++ < 5 ) {
704              
705 0         0 my $http = HTTP::Lite->new();
706             # Naughty naughty but there isn't any accessor/setter
707 0 0       0 $http->{timeout} = $TIMEOUT if $TIMEOUT;
708 0         0 $http->http11_mode(1);
709              
710 0 0       0 if ($self->userinfo) {
711 0         0 my $encoded = MIME::Base64::encode($self->userinfo, '');
712 0         0 $http->add_req_header("Authorization", "Basic $encoded");
713             }
714              
715 0         0 my $fh = FileHandle->new;
716              
717 0 0       0 unless ( $fh->open($to,'>') ) {
718 0         0 return $self->_error(loc(
719             "Could not open '%1' for writing: %2",$to,$!));
720             }
721              
722 0         0 $fh->autoflush(1);
723              
724 0         0 binmode $fh;
725              
726 0     0   0 my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
  0         0  
  0         0  
  0         0  
  0         0  
727              
728 0         0 close $fh;
729              
730 0 0 0     0 if ( $rc == 301 || $rc == 302 ) {
    0          
731 0         0 my $loc;
732 0         0 HEADERS: for ($http->headers_array) {
733 0 0       0 /Location: (\S+)/ and $loc = $1, last HEADERS;
734             }
735             #$loc or last; # Think we should squeal here.
736 0 0       0 if ($loc =~ m!^/!) {
737 0         0 $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
738 0         0 $uri .= $loc;
739             }
740             else {
741 0         0 $uri = $loc;
742             }
743 0         0 next RETRIES;
744             }
745             elsif ( $rc == 200 ) {
746 0         0 return $to;
747             }
748             else {
749 0         0 return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
750             $rc, $http->status_message));
751             }
752              
753             } # Loop for 5 retries.
754              
755 0         0 return $self->_error("Fetch failed! Gave up after 5 tries");
756              
757             }
758              
759             ### Simple IO::Socket::INET fetching ###
760             sub _iosock_fetch {
761 8     8   40 my $self = shift;
762 8         62 my %hash = @_;
763              
764 8         34 my ($to);
765 8         52 my $tmpl = {
766             to => { required => 1, store => \$to }
767             };
768 8 50       45 check( $tmpl, \%hash ) or return;
769              
770 8         749 my $use_list = {
771             'IO::Socket::INET' => '0.0',
772             'IO::Select' => '0.0',
773             };
774              
775             ### Fix CVE-2016-1238 ###
776 8         69 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
777 8 50       123 unless( can_load(modules => $use_list) ) {
778 0         0 $METHOD_FAIL->{'iosock'} = 1;
779 0         0 return;
780             }
781              
782 8 50       9327 my $sock = IO::Socket::INET->new(
783             PeerHost => $self->host,
784             ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
785             );
786              
787 8 50       102305 unless ( $sock ) {
788 0         0 return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
789             }
790              
791 8         170 my $fh = FileHandle->new;
792              
793             # Check open()
794              
795 8 50       563 unless ( $fh->open($to,'>') ) {
796 0         0 return $self->_error(loc(
797             "Could not open '%1' for writing: %2",$to,$!));
798             }
799              
800 8         2285 $fh->autoflush(1);
801 8         490 binmode $fh;
802              
803 8         87 my $path = File::Spec::Unix->catfile( $self->path, $self->file );
804 8         67 my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
805 8         133 $sock->send( $req );
806              
807 8         1048 my $select = IO::Select->new( $sock );
808              
809 8         1103 my $resp = '';
810 8         32 my $normal = 0;
811 8   50     61 while ( $select->can_read( $TIMEOUT || 60 ) ) {
812 16         5305719 my $ret = $sock->sysread( $resp, 4096, length($resp) );
813 16 100 66     740 if ( !defined $ret or $ret == 0 ) {
814 8         60 $select->remove( $sock );
815 8         638 $normal++;
816             }
817             }
818 8         1379 close $sock;
819              
820 8 50       55 unless ( $normal ) {
821 0   0     0 return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
822             }
823              
824             # Check the "response"
825             # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
826 8         73 $resp =~ s/^(\x0d?\x0a)+//;
827             # Check it is an HTTP response
828 8 50       89 unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
829 0         0 return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
830             }
831              
832             # Check for OK
833 8         115 my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
834 8 50       179 unless ( $code eq '200' ) {
835 8         68 return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
836             }
837              
838             {
839 0         0 local $\;
  0         0  
840 0         0 print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
841             }
842 0         0 close $fh;
843 0         0 return $to;
844             }
845              
846             ### Net::FTP fetching
847             sub _netftp_fetch {
848 0     0   0 my $self = shift;
849 0         0 my %hash = @_;
850              
851 0         0 my ($to);
852 0         0 my $tmpl = {
853             to => { required => 1, store => \$to }
854             };
855 0 0       0 check( $tmpl, \%hash ) or return;
856              
857             ### required modules ###
858 0         0 my $use_list = { 'Net::FTP' => 0 };
859              
860             ### Fix CVE-2016-1238 ###
861 0         0 local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
862 0 0       0 unless( can_load( modules => $use_list ) ) {
863 0         0 $METHOD_FAIL->{'netftp'} = 1;
864 0         0 return;
865             }
866              
867             ### make connection ###
868 0         0 my $ftp;
869 0         0 my @options = ($self->host);
870 0 0       0 push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
871 0 0       0 unless( $ftp = Net::FTP->new( @options ) ) {
872 0         0 return $self->_error(loc("Ftp creation failed: %1",$@));
873             }
874              
875             ### login ###
876 0 0       0 unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
877 0         0 return $self->_error(loc("Could not login to '%1'",$self->host));
878             }
879              
880             ### set binary mode, just in case ###
881 0         0 $ftp->binary;
882              
883             ### create the remote path
884             ### remember remote paths are unix paths! [#11483]
885 0         0 my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
886              
887             ### fetch the file ###
888 0         0 my $target;
889 0 0       0 unless( $target = $ftp->get( $remote, $to ) ) {
890 0         0 return $self->_error(loc("Could not fetch '%1' from '%2'",
891             $remote, $self->host));
892             }
893              
894             ### log out ###
895 0         0 $ftp->quit;
896              
897 0         0 return $target;
898              
899             }
900              
901             ### /bin/wget fetch ###
902             sub _wget_fetch {
903 1     1   13 my $self = shift;
904 1         9 my %hash = @_;
905              
906 1         32 my ($to);
907 1         14 my $tmpl = {
908             to => { required => 1, store => \$to }
909             };
910 1 50       16 check( $tmpl, \%hash ) or return;
911              
912 1         135 my $wget;
913             ### see if we have a wget binary ###
914 1 50       20 unless( $wget = can_run('wget') ) {
915 1         603 $METHOD_FAIL->{'wget'} = 1;
916 1         15 return;
917             }
918              
919             ### no verboseness, thanks ###
920 0         0 my $cmd = [ $wget, '--quiet' ];
921              
922             ### if a timeout is set, add it ###
923 0 0       0 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
924              
925             ### run passive if specified ###
926 0 0 0     0 push @$cmd, '--passive-ftp' if $self->scheme eq 'ftp' && $FTP_PASSIVE;
927              
928             ### set the output document, add the uri ###
929 0         0 push @$cmd, '--output-document', $to, $self->uri;
930              
931             ### with IPC::Cmd > 0.41, this is fixed in teh library,
932             ### and there's no need for special casing any more.
933             ### DO NOT quote things for IPC::Run, it breaks stuff.
934             # $IPC::Cmd::USE_IPC_RUN
935             # ? ($to, $self->uri)
936             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
937              
938             ### shell out ###
939 0         0 my $captured;
940 0 0       0 unless(run( command => $cmd,
941             buffer => \$captured,
942             verbose => $DEBUG
943             )) {
944             ### wget creates the output document always, even if the fetch
945             ### fails.. so unlink it in that case
946 0         0 1 while unlink $to;
947              
948 0   0     0 return $self->_error(loc( "Command failed: %1", $captured || '' ));
949             }
950              
951 0         0 return $to;
952             }
953              
954             ### /bin/lftp fetch ###
955             sub _lftp_fetch {
956 1     1   11 my $self = shift;
957 1         15 my %hash = @_;
958              
959 1         7 my ($to);
960 1         12 my $tmpl = {
961             to => { required => 1, store => \$to }
962             };
963 1 50       10 check( $tmpl, \%hash ) or return;
964              
965             ### see if we have a lftp binary ###
966 1         101 my $lftp;
967 1 50       17 unless( $lftp = can_run('lftp') ) {
968 1         157637 $METHOD_FAIL->{'lftp'} = 1;
969 1         16 return;
970             }
971              
972             ### no verboseness, thanks ###
973 0         0 my $cmd = [ $lftp, '-f' ];
974              
975 0         0 my $fh = File::Temp->new;
976              
977 0         0 my $str;
978              
979             ### if a timeout is set, add it ###
980 0 0       0 $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
981              
982             ### lftp can get stuck in a loop of retries without this
983 0         0 $str .= "set net:reconnect-interval-base 5;\nset net:max-retries 2;\n";
984              
985             ### run passive if specified ###
986 0 0       0 $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
987              
988             ### set the output document, add the uri ###
989             ### quote the URI, because lftp supports certain shell
990             ### expansions, most notably & for backgrounding.
991             ### ' quote does nto work, must be "
992 0         0 $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
993              
994 0 0       0 if( $DEBUG ) {
995 0         0 my $pp_str = join ' ', split $/, $str;
996 0         0 print "# lftp command: $pp_str\n";
997             }
998              
999             ### write straight to the file.
1000 0         0 $fh->autoflush(1);
1001 0         0 print $fh $str;
1002              
1003             ### the command needs to be 1 string to be executed
1004 0         0 push @$cmd, $fh->filename;
1005              
1006             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1007             ### and there's no need for special casing any more.
1008             ### DO NOT quote things for IPC::Run, it breaks stuff.
1009             # $IPC::Cmd::USE_IPC_RUN
1010             # ? ($to, $self->uri)
1011             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1012              
1013              
1014             ### shell out ###
1015 0         0 my $captured;
1016 0 0       0 unless(run( command => $cmd,
1017             buffer => \$captured,
1018             verbose => $DEBUG
1019             )) {
1020             ### wget creates the output document always, even if the fetch
1021             ### fails.. so unlink it in that case
1022 0         0 1 while unlink $to;
1023              
1024 0   0     0 return $self->_error(loc( "Command failed: %1", $captured || '' ));
1025             }
1026              
1027 0         0 return $to;
1028             }
1029              
1030              
1031              
1032             ### /bin/ftp fetch ###
1033             sub _ftp_fetch {
1034 0     0   0 my $self = shift;
1035 0         0 my %hash = @_;
1036              
1037 0         0 my ($to);
1038 0         0 my $tmpl = {
1039             to => { required => 1, store => \$to }
1040             };
1041 0 0       0 check( $tmpl, \%hash ) or return;
1042              
1043             ### see if we have a ftp binary ###
1044 0         0 my $ftp;
1045 0 0       0 unless( $ftp = can_run('ftp') ) {
1046 0         0 $METHOD_FAIL->{'ftp'} = 1;
1047 0         0 return;
1048             }
1049              
1050 0         0 my $fh = FileHandle->new;
1051              
1052 0         0 local $SIG{CHLD} = 'IGNORE';
1053              
1054 0 0       0 unless ($fh->open("$ftp -n", '|-')) {
1055 0         0 return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1056             }
1057              
1058 0         0 my @dialog = (
1059             "lcd " . dirname($to),
1060             "open " . $self->host,
1061             "user anonymous $FROM_EMAIL",
1062             "cd /",
1063             "cd " . $self->path,
1064             "binary",
1065             "get " . $self->file . " " . $self->output_file,
1066             "quit",
1067             );
1068              
1069 0         0 foreach (@dialog) { $fh->print($_, "\n") }
  0         0  
1070 0 0       0 $fh->close or return;
1071              
1072 0         0 return $to;
1073             }
1074              
1075             ### lynx is stupid - it decompresses any .gz file it finds to be text
1076             ### use /bin/lynx to fetch files
1077             sub _lynx_fetch {
1078 1     1   13 my $self = shift;
1079 1         15 my %hash = @_;
1080              
1081 1         7 my ($to);
1082 1         11 my $tmpl = {
1083             to => { required => 1, store => \$to }
1084             };
1085 1 50       12 check( $tmpl, \%hash ) or return;
1086              
1087             ### see if we have a lynx binary ###
1088 1         107 my $lynx;
1089 1 50       136 unless ( $lynx = can_run('lynx') ){
1090 1         932 $METHOD_FAIL->{'lynx'} = 1;
1091 1         13 return;
1092             }
1093              
1094 0 0       0 unless( IPC::Cmd->can_capture_buffer ) {
1095 0         0 $METHOD_FAIL->{'lynx'} = 1;
1096              
1097 0         0 return $self->_error(loc(
1098             "Can not capture buffers. Can not use '%1' to fetch files",
1099             'lynx' ));
1100             }
1101              
1102             ### check if the HTTP resource exists ###
1103 0 0       0 if ($self->uri =~ /^https?:\/\//i) {
1104 0         0 my $cmd = [
1105             $lynx,
1106             '-head',
1107             '-source',
1108             "-auth=anonymous:$FROM_EMAIL",
1109             ];
1110              
1111 0 0       0 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1112              
1113 0         0 push @$cmd, $self->uri;
1114              
1115             ### shell out ###
1116 0         0 my $head;
1117 0 0       0 unless(run( command => $cmd,
1118             buffer => \$head,
1119             verbose => $DEBUG )
1120             ) {
1121 0   0     0 return $self->_error(loc("Command failed: %1", $head || ''));
1122             }
1123              
1124 0 0       0 unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1125 0   0     0 return $self->_error(loc("Command failed: %1", $head || ''));
1126             }
1127             }
1128              
1129             ### write to the output file ourselves, since lynx ass_u_mes to much
1130 0 0       0 my $local = FileHandle->new( $to, 'w' )
1131             or return $self->_error(loc(
1132             "Could not open '%1' for writing: %2",$to,$!));
1133              
1134             ### dump to stdout ###
1135 0         0 my $cmd = [
1136             $lynx,
1137             '-source',
1138             "-auth=anonymous:$FROM_EMAIL",
1139             ];
1140              
1141 0 0       0 push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1142              
1143             ### DO NOT quote things for IPC::Run, it breaks stuff.
1144 0         0 push @$cmd, $self->uri;
1145              
1146             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1147             ### and there's no need for special casing any more.
1148             ### DO NOT quote things for IPC::Run, it breaks stuff.
1149             # $IPC::Cmd::USE_IPC_RUN
1150             # ? $self->uri
1151             # : QUOTE. $self->uri .QUOTE;
1152              
1153              
1154             ### shell out ###
1155 0         0 my $captured;
1156 0 0       0 unless(run( command => $cmd,
1157             buffer => \$captured,
1158             verbose => $DEBUG )
1159             ) {
1160 0   0     0 return $self->_error(loc("Command failed: %1", $captured || ''));
1161             }
1162              
1163             ### print to local file ###
1164             ### XXX on a 404 with a special error page, $captured will actually
1165             ### hold the contents of that page, and make it *appear* like the
1166             ### request was a success, when really it wasn't :(
1167             ### there doesn't seem to be an option for lynx to change the exit
1168             ### code based on a 4XX status or so.
1169             ### the closest we can come is using --error_file and parsing that,
1170             ### which is very unreliable ;(
1171 0         0 $local->print( $captured );
1172 0 0       0 $local->close or return;
1173              
1174 0         0 return $to;
1175             }
1176              
1177             ### use /bin/ncftp to fetch files
1178             sub _ncftp_fetch {
1179 0     0   0 my $self = shift;
1180 0         0 my %hash = @_;
1181              
1182 0         0 my ($to);
1183 0         0 my $tmpl = {
1184             to => { required => 1, store => \$to }
1185             };
1186 0 0       0 check( $tmpl, \%hash ) or return;
1187              
1188             ### we can only set passive mode in interactive sessions, so bail out
1189             ### if $FTP_PASSIVE is set
1190 0 0       0 return if $FTP_PASSIVE;
1191              
1192             ### see if we have a ncftp binary ###
1193 0         0 my $ncftp;
1194 0 0       0 unless( $ncftp = can_run('ncftp') ) {
1195 0         0 $METHOD_FAIL->{'ncftp'} = 1;
1196 0         0 return;
1197             }
1198              
1199 0 0       0 my $cmd = [
1200             $ncftp,
1201             '-V', # do not be verbose
1202             '-p', $FROM_EMAIL, # email as password
1203             $self->host, # hostname
1204             dirname($to), # local dir for the file
1205             # remote path to the file
1206             ### DO NOT quote things for IPC::Run, it breaks stuff.
1207             $IPC::Cmd::USE_IPC_RUN
1208             ? File::Spec::Unix->catdir( $self->path, $self->file )
1209             : QUOTE. File::Spec::Unix->catdir(
1210             $self->path, $self->file ) .QUOTE
1211              
1212             ];
1213              
1214             ### shell out ###
1215 0         0 my $captured;
1216 0 0       0 unless(run( command => $cmd,
1217             buffer => \$captured,
1218             verbose => $DEBUG )
1219             ) {
1220 0   0     0 return $self->_error(loc("Command failed: %1", $captured || ''));
1221             }
1222              
1223 0         0 return $to;
1224              
1225             }
1226              
1227             ### use /bin/curl to fetch files
1228             sub _curl_fetch {
1229 8     8   42 my $self = shift;
1230 8         87 my %hash = @_;
1231              
1232 8         39 my ($to);
1233 8         69 my $tmpl = {
1234             to => { required => 1, store => \$to }
1235             };
1236 8 50       61 check( $tmpl, \%hash ) or return;
1237 8         727 my $curl;
1238 8 50       127 unless ( $curl = can_run('curl') ) {
1239 0         0 $METHOD_FAIL->{'curl'} = 1;
1240 0         0 return;
1241             }
1242              
1243             ### these long opts are self explanatory - I like that -jmb
1244 8         4152 my $cmd = [ $curl, '-q' ];
1245              
1246 8 0 33     64 push(@$cmd, '-4') if $^O eq 'netbsd' && $FORCEIPV4; # only seen this on NetBSD so far
1247              
1248 8 50       75 push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1249              
1250 8 50       78 push(@$cmd, '--silent') unless $DEBUG;
1251              
1252             ### curl does the right thing with passive, regardless ###
1253 8 50       64 if ($self->scheme eq 'ftp') {
1254 0         0 push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1255             }
1256              
1257             ### curl doesn't follow 302 (temporarily moved) etc automatically
1258             ### so we add --location to enable that.
1259 8         59 push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1260              
1261             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1262             ### and there's no need for special casing any more.
1263             ### DO NOT quote things for IPC::Run, it breaks stuff.
1264             # $IPC::Cmd::USE_IPC_RUN
1265             # ? ($to, $self->uri)
1266             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1267              
1268              
1269 8         22 my $captured;
1270 8 50       140 unless(run( command => $cmd,
1271             buffer => \$captured,
1272             verbose => $DEBUG )
1273             ) {
1274              
1275 0   0     0 return $self->_error(loc("Command failed: %1", $captured || ''));
1276             }
1277              
1278 8         2113448 return $to;
1279              
1280             }
1281              
1282             ### /usr/bin/fetch fetch! ###
1283             sub _fetch_fetch {
1284 1     1   13 my $self = shift;
1285 1         13 my %hash = @_;
1286              
1287 1         7 my ($to);
1288 1         13 my $tmpl = {
1289             to => { required => 1, store => \$to }
1290             };
1291 1 50       13 check( $tmpl, \%hash ) or return;
1292              
1293             ### see if we have a fetch binary ###
1294 1         108 my $fetch;
1295 1 50 33     29 unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1296 1         14 $METHOD_FAIL->{'fetch'} = 1;
1297 1         14 return;
1298             }
1299              
1300             ### no verboseness, thanks ###
1301 0         0 my $cmd = [ $fetch, '-q' ];
1302              
1303             ### if a timeout is set, add it ###
1304 0 0       0 push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1305              
1306             ### run passive if specified ###
1307             #push @$cmd, '-p' if $FTP_PASSIVE;
1308 0 0       0 local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1309              
1310             ### set the output document, add the uri ###
1311 0         0 push @$cmd, '-o', $to, $self->uri;
1312              
1313             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1314             ### and there's no need for special casing any more.
1315             ### DO NOT quote things for IPC::Run, it breaks stuff.
1316             # $IPC::Cmd::USE_IPC_RUN
1317             # ? ($to, $self->uri)
1318             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1319              
1320             ### shell out ###
1321 0         0 my $captured;
1322 0 0       0 unless(run( command => $cmd,
1323             buffer => \$captured,
1324             verbose => $DEBUG
1325             )) {
1326             ### wget creates the output document always, even if the fetch
1327             ### fails.. so unlink it in that case
1328 0         0 1 while unlink $to;
1329              
1330 0   0     0 return $self->_error(loc( "Command failed: %1", $captured || '' ));
1331             }
1332              
1333 0         0 return $to;
1334             }
1335              
1336             ### use File::Copy for fetching file:// urls ###
1337             ###
1338             ### See section 3.10 of RFC 1738 (https://datatracker.ietf.org/doc/html/rfc1738#section-3.10)
1339             ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1340             ###
1341              
1342             sub _file_fetch {
1343 2     2   16 my $self = shift;
1344 2         20 my %hash = @_;
1345              
1346 2         8 my ($to);
1347 2         21 my $tmpl = {
1348             to => { required => 1, store => \$to }
1349             };
1350 2 50       24 check( $tmpl, \%hash ) or return;
1351              
1352              
1353              
1354             ### prefix a / on unix systems with a file uri, since it would
1355             ### look somewhat like this:
1356             ### file:///home/kane/file
1357             ### whereas windows file uris for 'c:\some\dir\file' might look like:
1358             ### file:///C:/some/dir/file
1359             ### file:///C|/some/dir/file
1360             ### or for a network share '\\host\share\some\dir\file':
1361             ### file:////host/share/some/dir/file
1362             ###
1363             ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1364             ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1365             ###
1366              
1367 2         203 my $path = $self->path;
1368 2         20 my $vol = $self->vol;
1369 2         18 my $share = $self->share;
1370              
1371 2         6 my $remote;
1372 2 50 33     51 if (!$share and $self->host) {
1373 0         0 return $self->_error(loc(
1374             "Currently %1 cannot handle hosts in %2 urls",
1375             'File::Fetch', 'file://'
1376             ));
1377             }
1378              
1379 2 50       43 if( $vol ) {
    50          
1380 0         0 $path = File::Spec->catdir( split /\//, $path );
1381 0         0 $remote = File::Spec->catpath( $vol, $path, $self->file);
1382              
1383             } elsif( $share ) {
1384             ### win32 specific, and a share name, so we wont bother with File::Spec
1385 0         0 $path =~ s|/+|\\|g;
1386 0         0 $remote = "\\\\".$self->host."\\$share\\$path";
1387              
1388             } else {
1389             ### File::Spec on VMS can not currently handle UNIX syntax.
1390 2         13 my $file_class = ON_VMS
1391             ? 'File::Spec::Unix'
1392             : 'File::Spec';
1393              
1394 2         57 $remote = $file_class->catfile( $path, $self->file );
1395             }
1396              
1397             ### File::Copy is littered with 'die' statements :( ###
1398 2         17 my $rv = eval { File::Copy::copy( $remote, $to ) };
  2         85  
1399              
1400             ### something went wrong ###
1401 2 50 33     1388 if( !$rv or $@ ) {
1402 0         0 return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1403             $remote, $to, $!, $@));
1404             }
1405              
1406 2         26 return $to;
1407             }
1408              
1409             ### use /usr/bin/rsync to fetch files
1410             sub _rsync_fetch {
1411 0     0   0 my $self = shift;
1412 0         0 my %hash = @_;
1413              
1414 0         0 my ($to);
1415 0         0 my $tmpl = {
1416             to => { required => 1, store => \$to }
1417             };
1418 0 0       0 check( $tmpl, \%hash ) or return;
1419 0         0 my $rsync;
1420 0 0       0 unless ( $rsync = can_run('rsync') ) {
1421 0         0 $METHOD_FAIL->{'rsync'} = 1;
1422 0         0 return;
1423             }
1424              
1425 0         0 my $cmd = [ $rsync ];
1426              
1427             ### XXX: rsync has no I/O timeouts at all, by default
1428 0 0       0 push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1429              
1430 0 0       0 push(@$cmd, '--quiet') unless $DEBUG;
1431              
1432             ### DO NOT quote things for IPC::Run, it breaks stuff.
1433 0         0 push @$cmd, $self->uri, $to;
1434              
1435             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1436             ### and there's no need for special casing any more.
1437             ### DO NOT quote things for IPC::Run, it breaks stuff.
1438             # $IPC::Cmd::USE_IPC_RUN
1439             # ? ($to, $self->uri)
1440             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1441              
1442 0         0 my $captured;
1443 0 0       0 unless(run( command => $cmd,
1444             buffer => \$captured,
1445             verbose => $DEBUG )
1446             ) {
1447              
1448 0   0     0 return $self->_error(loc("Command %1 failed: %2",
      0        
1449             "@$cmd" || '', $captured || ''));
1450             }
1451              
1452 0         0 return $to;
1453              
1454             }
1455              
1456             ### use git to fetch files
1457             sub _git_fetch {
1458 0     0   0 my $self = shift;
1459 0         0 my %hash = @_;
1460              
1461 0         0 my ($to);
1462 0         0 my $tmpl = {
1463             to => { required => 1, store => \$to }
1464             };
1465 0 0       0 check( $tmpl, \%hash ) or return;
1466 0         0 my $git;
1467 0 0       0 unless ( $git = can_run('git') ) {
1468 0         0 $METHOD_FAIL->{'git'} = 1;
1469 0         0 return;
1470             }
1471              
1472 0         0 my $cmd = [ $git, 'clone' ];
1473              
1474             #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1475              
1476 0 0       0 push(@$cmd, '--quiet') unless $DEBUG;
1477              
1478             ### DO NOT quote things for IPC::Run, it breaks stuff.
1479 0         0 push @$cmd, $self->uri, $to;
1480              
1481             ### with IPC::Cmd > 0.41, this is fixed in teh library,
1482             ### and there's no need for special casing any more.
1483             ### DO NOT quote things for IPC::Run, it breaks stuff.
1484             # $IPC::Cmd::USE_IPC_RUN
1485             # ? ($to, $self->uri)
1486             # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1487              
1488 0         0 my $captured;
1489 0 0       0 unless(run( command => $cmd,
1490             buffer => \$captured,
1491             verbose => $DEBUG )
1492             ) {
1493              
1494 0   0     0 return $self->_error(loc("Command %1 failed: %2",
      0        
1495             "@$cmd" || '', $captured || ''));
1496             }
1497              
1498 0         0 return $to;
1499              
1500             }
1501              
1502             #################################
1503             #
1504             # Error code
1505             #
1506             #################################
1507              
1508             =pod
1509              
1510             =head2 $ff->error([BOOL])
1511              
1512             Returns the last encountered error as string.
1513             Pass it a true value to get the C output instead.
1514              
1515             =cut
1516              
1517             ### error handling the way Archive::Extract does it
1518             sub _error {
1519 8     8   450 my $self = shift;
1520 8         23 my $error = shift;
1521              
1522 8         40 $self->_error_msg( $error );
1523 8         5380 $self->_error_msg_long( Carp::longmess($error) );
1524              
1525 8 50       50 if( $WARN ) {
1526 8 50       44 carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1527             }
1528              
1529 8         436 return;
1530             }
1531              
1532             sub error {
1533 0     0 1   my $self = shift;
1534 0 0         return shift() ? $self->_error_msg_long : $self->_error_msg;
1535             }
1536              
1537              
1538             1;
1539              
1540             =pod
1541              
1542             =head1 HOW IT WORKS
1543              
1544             File::Fetch is able to fetch a variety of uris, by using several
1545             external programs and modules.
1546              
1547             Below is a mapping of what utilities will be used in what order
1548             for what schemes, if available:
1549              
1550             file => LWP, lftp, file
1551             http => LWP, HTTP::Tiny, wget, curl, lftp, fetch, HTTP::Lite, lynx, iosock
1552             ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1553             rsync => rsync
1554             git => git
1555              
1556             If you'd like to disable the use of one or more of these utilities
1557             and/or modules, see the C<$BLACKLIST> variable further down.
1558              
1559             If a utility or module isn't available, it will be marked in a cache
1560             (see the C<$METHOD_FAIL> variable further down), so it will not be
1561             tried again. The C method will only fail when all options are
1562             exhausted, and it was not able to retrieve the file.
1563              
1564             The C utility is available on FreeBSD. NetBSD and Dragonfly BSD
1565             may also have it from C. We only check for C on those
1566             three platforms.
1567              
1568             C is a very limited L based mechanism for
1569             retrieving C schemed urls. It doesn't follow redirects for instance.
1570              
1571             C only supports C style urls.
1572              
1573             A special note about fetching files from an ftp uri:
1574              
1575             By default, all ftp connections are done in passive mode. To change
1576             that, see the C<$FTP_PASSIVE> variable further down.
1577              
1578             Furthermore, ftp uris only support anonymous connections, so no
1579             named user/password pair can be passed along.
1580              
1581             C is blacklisted by default; see the C<$BLACKLIST> variable
1582             further down.
1583              
1584             =head1 GLOBAL VARIABLES
1585              
1586             The behaviour of File::Fetch can be altered by changing the following
1587             global variables:
1588              
1589             =head2 $File::Fetch::FROM_EMAIL
1590              
1591             This is the email address that will be sent as your anonymous ftp
1592             password.
1593              
1594             Default is C.
1595              
1596             =head2 $File::Fetch::USER_AGENT
1597              
1598             This is the useragent as C will report it.
1599              
1600             Default is C.
1601              
1602             =head2 $File::Fetch::FTP_PASSIVE
1603              
1604             This variable controls whether the environment variable C
1605             and any passive switches to commandline tools will be set to true.
1606              
1607             Default value is 1.
1608              
1609             Note: When $FTP_PASSIVE is true, C will not be used to fetch
1610             files, since passive mode can only be set interactively for this binary
1611              
1612             =head2 $File::Fetch::TIMEOUT
1613              
1614             When set, controls the network timeout (counted in seconds).
1615              
1616             Default value is 0.
1617              
1618             =head2 $File::Fetch::WARN
1619              
1620             This variable controls whether errors encountered internally by
1621             C should be C'd or not.
1622              
1623             Set to false to silence warnings. Inspect the output of the C
1624             method manually to see what went wrong.
1625              
1626             Defaults to C.
1627              
1628             =head2 $File::Fetch::DEBUG
1629              
1630             This enables debugging output when calling commandline utilities to
1631             fetch files.
1632             This also enables C errors, instead of the regular
1633             C errors.
1634              
1635             Good for tracking down why things don't work with your particular
1636             setup.
1637              
1638             Default is 0.
1639              
1640             =head2 $File::Fetch::BLACKLIST
1641              
1642             This is an array ref holding blacklisted modules/utilities for fetching
1643             files with.
1644              
1645             To disallow the use of, for example, C and C, you could
1646             set $File::Fetch::BLACKLIST to:
1647              
1648             $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1649              
1650             The default blacklist is [qw|ftp|], as C is rather unreliable.
1651              
1652             See the note on C below.
1653              
1654             =head2 $File::Fetch::METHOD_FAIL
1655              
1656             This is a hashref registering what modules/utilities were known to fail
1657             for fetching files (mostly because they weren't installed).
1658              
1659             You can reset this cache by assigning an empty hashref to it, or
1660             individually remove keys.
1661              
1662             See the note on C below.
1663              
1664             =head1 MAPPING
1665              
1666              
1667             Here's a quick mapping for the utilities/modules, and their names for
1668             the $BLACKLIST, $METHOD_FAIL and other internal functions.
1669              
1670             LWP => lwp
1671             HTTP::Lite => httplite
1672             HTTP::Tiny => httptiny
1673             Net::FTP => netftp
1674             wget => wget
1675             lynx => lynx
1676             ncftp => ncftp
1677             ftp => ftp
1678             curl => curl
1679             rsync => rsync
1680             lftp => lftp
1681             fetch => fetch
1682             IO::Socket => iosock
1683              
1684             =head1 FREQUENTLY ASKED QUESTIONS
1685              
1686             =head2 So how do I use a proxy with File::Fetch?
1687              
1688             C currently only supports proxies with LWP::UserAgent.
1689             You will need to set your environment variables accordingly. For
1690             example, to use an ftp proxy:
1691              
1692             $ENV{ftp_proxy} = 'foo.com';
1693              
1694             Refer to the LWP::UserAgent manpage for more details.
1695              
1696             =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1697              
1698             C can only fetch remote files by dumping its contents to C,
1699             which we in turn capture. If that content is a 'custom' error file
1700             (like, say, a C<404 handler>), you will get that contents instead.
1701              
1702             Sadly, C doesn't support any options to return a different exit
1703             code on non-C<200 OK> status, giving us no way to tell the difference
1704             between a 'successful' fetch and a custom error page.
1705              
1706             Therefor, we recommend to only use C as a last resort. This is
1707             why it is at the back of our list of methods to try as well.
1708              
1709             =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1710              
1711             C is relatively smart about things. When trying to write
1712             a file to disk, it removes the C (see the
1713             C method for details) from the file name before creating
1714             it. In most cases this suffices.
1715              
1716             If you have any other characters you need to escape, please install
1717             the C module from CPAN, and pre-encode your URI before
1718             passing it to C. You can read about the details of URIs
1719             and URI encoding here:
1720              
1721             L
1722              
1723             =head1 TODO
1724              
1725             =over 4
1726              
1727             =item Implement $PREFER_BIN
1728              
1729             To indicate to rather use commandline tools than modules
1730              
1731             =back
1732              
1733             =head1 BUG REPORTS
1734              
1735             Please report bugs or other issues to Ebug-file-fetch@rt.cpan.org.
1736              
1737             =head1 AUTHOR
1738              
1739             This module by Jos Boumans Ekane@cpan.orgE.
1740              
1741             =head1 COPYRIGHT
1742              
1743             This library is free software; you may redistribute and/or modify it
1744             under the same terms as Perl itself.
1745              
1746              
1747             =cut
1748              
1749             # Local variables:
1750             # c-indentation-style: bsd
1751             # c-basic-offset: 4
1752             # indent-tabs-mode: nil
1753             # End:
1754             # vim: expandtab shiftwidth=4:
1755              
1756              
1757              
1758