File Coverage

lib/File/Fetch.pm
Criterion Covered Total %
statement 110 509 21.6
branch 15 236 6.4
condition 7 66 10.6
subroutine 27 47 57.4
total 159 858 18.5


line stmt bran cond sub time code
1           package File::Fetch;
2            
3 2     2 301892 use strict;
  2       6  
  2       87  
4 2     2 639 use FileHandle;
  2       22334  
  2       19  
5 2     2 2679 use File::Temp;
  2       24134  
  2       269  
6 2     2 810 use File::Copy;
  2       8252  
  2       153  
7 2     2 13 use File::Spec;
  2       5  
  2       45  
8 2     2 13 use File::Spec::Unix;
  2       4  
  2       65  
9 2     2 12 use File::Basename qw[dirname];
  2       7  
  2       137  
10            
11 2     2 12 use Cwd qw[cwd];
  2       4  
  2       111  
12 2     2 11 use Carp qw[carp];
  2       5  
  2       138  
13 2     2 1185 use IPC::Cmd qw[can_run run QUOTE];
  2       136748  
  2       232  
14 2     2 22 use File::Path qw[mkpath];
  2       7  
  2       126  
15 2     2 13 use File::Temp qw[tempdir];
  2       6  
  2       200  
16 2     2 12 use Params::Check qw[check];
  2       7  
  2       118  
17 2     2 15 use Module::Load::Conditional qw[can_load];
  2       8  
  2       124  
18 2     2 13 use Locale::Maketext::Simple Style => 'gettext';
  2       6  
  2       15  
19            
20 2       766 use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT
21           $BLACKLIST $METHOD_FAIL $VERSION $METHODS
22           $FTP_PASSIVE $TIMEOUT $DEBUG $WARN
23 2     2 357 ];
  2       5  
24            
25           $VERSION = '0.42';
26           $VERSION = eval $VERSION; # avoid warnings with development releases
27           $PREFER_BIN = 0; # XXX TODO implement
28           $FROM_EMAIL = 'File-Fetch@example.com';
29           $USER_AGENT = "File::Fetch/$VERSION";
30           $BLACKLIST = [qw|ftp|];
31           $METHOD_FAIL = { };
32           $FTP_PASSIVE = 1;
33           $TIMEOUT = 0;
34           $DEBUG = 0;
35           $WARN = 1;
36            
37           ### methods available to fetch the file depending on the scheme
38           $METHODS = {
39           http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ],
40           ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ],
41           file => [ qw|lwp lftp file| ],
42           rsync => [ qw|rsync| ],
43           git => [ qw|git| ],
44           };
45            
46           ### silly warnings ###
47           local $Params::Check::VERBOSE = 1;
48           local $Params::Check::VERBOSE = 1;
49           local $Module::Load::Conditional::VERBOSE = 0;
50           local $Module::Load::Conditional::VERBOSE = 0;
51            
52           ### see what OS we are on, important for file:// uris ###
53 2     2 14 use constant ON_WIN => ($^O eq 'MSWin32');
  2       6  
  2       189  
54 2     2 12 use constant ON_VMS => ($^O eq 'VMS');
  2       5  
  2       159  
55 2     2 13 use constant ON_UNIX => (!ON_WIN);
  2       5  
  2       142  
56 2     2 12 use constant HAS_VOL => (ON_WIN);
  2       6  
  2       148  
57 2     2 13 use constant HAS_SHARE => (ON_WIN);
  2       5  
  2       234  
58 2     2 13 use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! );
  2       6  
  2       370  
59            
60           =pod
61            
62           =head1 NAME
63            
64           File::Fetch - A generic file fetching mechanism
65            
66           =head1 SYNOPSIS
67            
68           use File::Fetch;
69            
70           ### build a File::Fetch object ###
71           my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt');
72            
73           ### fetch the uri to cwd() ###
74           my $where = $ff->fetch() or die $ff->error;
75            
76           ### fetch the uri to /tmp ###
77           my $where = $ff->fetch( to => '/tmp' );
78            
79           ### parsed bits from the uri ###
80           $ff->uri;
81           $ff->scheme;
82           $ff->host;
83           $ff->path;
84           $ff->file;
85            
86           =head1 DESCRIPTION
87            
88           File::Fetch is a generic file fetching mechanism.
89            
90           It allows you to fetch any file pointed to by a C, C,
91           C, C or C uri by a number of different means.
92            
93           See the C section further down for details.
94            
95           =head1 ACCESSORS
96            
97           A C object has the following accessors
98            
99           =over 4
100            
101           =item $ff->uri
102            
103           The uri you passed to the constructor
104            
105           =item $ff->scheme
106            
107           The scheme from the uri (like 'file', 'http', etc)
108            
109           =item $ff->host
110            
111           The hostname in the uri. Will be empty if host was originally
112           'localhost' for a 'file://' url.
113            
114           =item $ff->vol
115            
116           On operating systems with the concept of a volume the second element
117           of a file:// is considered to the be volume specification for the file.
118           Thus on Win32 this routine returns the volume, on other operating
119           systems this returns nothing.
120            
121           On Windows this value may be empty if the uri is to a network share, in
122           which case the 'share' property will be defined. Additionally, volume
123           specifications that use '|' as ':' will be converted on read to use ':'.
124            
125           On VMS, which has a volume concept, this field will be empty because VMS
126           file specifications are converted to absolute UNIX format and the volume
127           information is transparently included.
128            
129           =item $ff->share
130            
131           On systems with the concept of a network share (currently only Windows) returns
132           the sharename from a file://// url. On other operating systems returns empty.
133            
134           =item $ff->path
135            
136           The path from the uri, will be at least a single '/'.
137            
138           =item $ff->file
139            
140           The name of the remote file. For the local file name, the
141           result of $ff->output_file will be used.
142            
143           =item $ff->file_default
144            
145           The name of the default local file, that $ff->output_file falls back to if
146           it would otherwise return no filename. For example when fetching a URI like
147           http://www.abc.net.au/ the contents retrieved may be from a remote file called
148           'index.html'. The default value of this attribute is literally 'file_default'.
149            
150           =cut
151            
152            
153           ##########################
154           ### Object & Accessors ###
155           ##########################
156            
157           {
158           ### template for autogenerated accessors ###
159           my $Tmpl = {
160           scheme => { default => 'http' },
161           host => { default => 'localhost' },
162           path => { default => '/' },
163           file => { required => 1 },
164           uri => { required => 1 },
165           vol => { default => '' }, # windows for file:// uris
166           share => { default => '' }, # windows for file:// uris
167           file_default => { default => 'file_default' },
168           tempdir_root => { required => 1 }, # Should be lazy-set at ->new()
169           _error_msg => { no_override => 1 },
170           _error_msg_long => { no_override => 1 },
171           };
172            
173           for my $method ( keys %$Tmpl ) {
174 2     2 14 no strict 'refs';
  2       6  
  2       17424  
175           *$method = sub {
176 56     56 33627 my $self = shift;
177 56 50     390 $self->{$method} = $_[0] if @_;
178 56       930 return $self->{$method};
179           }
180           }
181            
182           sub _create {
183 9     9 103 my $class = shift;
184 9       208 my %hash = @_;
185            
186 9 50     313 my $args = check( $Tmpl, \%hash ) or return;
187            
188 9       5447 bless $args, $class;
189            
190 9 50 66   135 if( lc($args->scheme) ne 'file' and not $args->host ) {
191 0       0 return $class->_error(loc(
192           "Hostname required when fetching from '%1'",$args->scheme));
193           }
194            
195 9       235 for (qw[path]) {
196 9 50     96 unless( $args->$_() ) { # 5.5.x needs the ()
197 0       0 return $class->_error(loc("No '%1' specified",$_));
198           }
199           }
200            
201 9       113 return $args;
202           }
203           }
204            
205           =item $ff->output_file
206            
207           The name of the output file. This is the same as $ff->file,
208           but any query parameters are stripped off. For example:
209            
210           http://example.com/index.html?x=y
211            
212           would make the output file be C rather than
213           C.
214            
215           =back
216            
217           =cut
218            
219           sub output_file {
220 0     0 0 my $self = shift;
221 0       0 my $file = $self->file;
222            
223 0       0 $file =~ s/\?.*$//g;
224            
225 0   0   0 $file ||= $self->file_default;
226            
227 0       0 return $file;
228           }
229            
230           ### XXX do this or just point to URI::Escape?
231           # =head2 $esc_uri = $ff->escaped_uri
232           #
233           # =cut
234           #
235           # ### most of this is stolen straight from URI::escape
236           # { ### Build a char->hex map
237           # my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255;
238           #
239           # sub escaped_uri {
240           # my $self = shift;
241           # my $uri = $self->uri;
242           #
243           # ### Default unsafe characters. RFC 2732 ^(uric - reserved)
244           # $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/
245           # $escapes{$1} || $self->_fail_hi($1)/ge;
246           #
247           # return $uri;
248           # }
249           #
250           # sub _fail_hi {
251           # my $self = shift;
252           # my $char = shift;
253           #
254           # $self->_error(loc(
255           # "Can't escape '%1', try using the '%2' module instead",
256           # sprintf("\\x{%04X}", ord($char)), 'URI::Escape'
257           # ));
258           # }
259           #
260           # sub output_file {
261           #
262           # }
263           #
264           #
265           # }
266            
267           =head1 METHODS
268            
269           =head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' );
270            
271           Parses the uri and creates a corresponding File::Fetch::Item object,
272           that is ready to be Ced and returns it.
273            
274           Returns false on failure.
275            
276           =cut
277            
278           sub new {
279 9     9 9580 my $class = shift;
280 9       55 my %hash = @_;
281            
282 9       23 my ($uri, $file_default, $tempdir_root);
283 9       92 my $tmpl = {
284           uri => { required => 1, store => \$uri },
285           file_default => { required => 0, store => \$file_default },
286           tempdir_root => { required => 0, store => \$tempdir_root },
287           };
288            
289 9 50     93 check( $tmpl, \%hash ) or return;
290            
291           ### parse the uri to usable parts ###
292 9 50     1413 my $href = $class->_parse_uri( $uri ) or return;
293            
294 9 50     41 $href->{file_default} = $file_default if $file_default;
295 9 50     32 $href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root;
296 9 50     50760 $href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root};
297            
298           ### make it into a FFI object ###
299 9 50     510 my $ff = $class->_create( %$href ) or return;
300            
301            
302           ### return the object ###
303 9       298 return $ff;
304           }
305            
306           ### parses an uri to a hash structure:
307           ###
308           ### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' )
309           ###
310           ### becomes:
311           ###
312           ### $href = {
313           ### scheme => 'ftp',
314           ### host => 'ftp.cpan.org',
315           ### path => '/pub/mirror',
316           ### file => 'index.html'
317           ### };
318           ###
319           ### In the case of file:// urls there maybe be additional fields
320           ###
321           ### For systems with volume specifications such as Win32 there will be
322           ### a volume specifier provided in the 'vol' field.
323           ###
324           ### 'vol' => 'volumename'
325           ###
326           ### For windows file shares there may be a 'share' key specified
327           ###
328           ### 'share' => 'sharename'
329           ###
330           ### Note that the rules of what a file:// url means vary by the operating system
331           ### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious
332           ### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and
333           ### not '/foo/bar.txt'
334           ###
335           ### Similarly if the host interpreting the url is VMS then
336           ### file:///disk$user/my/notes/note12345.txt' means
337           ### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as
338           ### if it is unix where it means /disk$user/my/notes/note12345.txt'.
339           ### Except for some cases in the File::Spec methods, Perl on VMS will generally
340           ### handle UNIX format file specifications.
341           ###
342           ### This means it is impossible to serve certain file:// urls on certain systems.
343           ###
344           ### Thus are the problems with a protocol-less specification. :-(
345           ###
346            
347           sub _parse_uri {
348 16     16 16827 my $self = shift;
349 16 50     70 my $uri = shift or return;
350            
351 16       60 my $href = { uri => $uri };
352            
353           ### find the scheme ###
354 16       157 $uri =~ s|^(\w+)://||;
355 16       92 $href->{scheme} = $1;
356            
357           ### See rfc 1738 section 3.10
358           ### http://www.faqs.org/rfcs/rfc1738.html
359           ### And wikipedia for more on windows file:// urls
360           ### http://en.wikipedia.org/wiki/File://
361 16 100     80 if( $href->{scheme} eq 'file' ) {
362            
363 6       52 my @parts = split '/',$uri;
364            
365           ### file://hostname/...
366           ### file://hostname/...
367           ### normalize file://localhost with file:///
368 6   100   43 $href->{host} = $parts[0] || '';
369            
370           ### index in @parts where the path components begin;
371 6       12 my $index = 1;
372            
373           ### file:////hostname/sharename/blah.txt
374 6       13 if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) {
375            
376           $href->{host} = $parts[2] || ''; # avoid warnings
377           $href->{share} = $parts[3] || ''; # avoid warnings
378            
379           $index = 4 # index after the share
380            
381           ### file:///D|/blah.txt
382           ### file:///D:/blah.txt
383 0       0 } elsif (HAS_VOL) {
384            
385           ### this code comes from dmq's patch, but:
386           ### XXX if volume is empty, wouldn't that be an error? --kane
387           ### if so, our file://localhost test needs to be fixed as wel
388           $href->{vol} = $parts[1] || '';
389            
390           ### correct D| style colume descriptors
391           $href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN;
392            
393           $index = 2; # index after the volume
394           }
395            
396           ### rebuild the path from the leftover parts;
397 6       41 $href->{path} = join '/', '', splice( @parts, $index, $#parts );
398            
399           } else {
400           ### using anything but qw() in hash slices may produce warnings
401           ### in older perls :-(
402 10       109 @{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s;
  10       54  
403           }
404            
405           ### split the path into file + dir ###
406 16       41 { my @parts = File::Spec::Unix->splitpath( delete $href->{path} );
  16       497  
407 16       64 $href->{path} = $parts[1];
408 16       97 $href->{file} = $parts[2];
409           }
410            
411           ### host will be empty if the target was 'localhost' and the
412           ### scheme was 'file'
413 16 100 100   119 $href->{host} = '' if ($href->{host} eq 'localhost') and
414           ($href->{scheme} eq 'file');
415            
416 16       78 return $href;
417           }
418            
419           =head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] )
420            
421           Fetches the file you requested and returns the full path to the file.
422            
423           By default it writes to C, but you can override that by specifying
424           the C argument:
425            
426           ### file fetch to /tmp, full path to the file in $where
427           $where = $ff->fetch( to => '/tmp' );
428            
429           ### file slurped into $scalar, full path to the file in $where
430           ### file is downloaded to a temp directory and cleaned up at exit time
431           $where = $ff->fetch( to => \$scalar );
432            
433           Returns the full path to the downloaded file on success, and false
434           on failure.
435            
436           =cut
437            
438           sub fetch {
439 0 0   0   my $self = shift or return;
440 0         my %hash = @_;
441            
442 0         my $target;
443 0         my $tmpl = {
444           to => { default => cwd(), store => \$target },
445           };
446            
447 0 0       check( $tmpl, \%hash ) or return;
448            
449 0         my ($to, $fh);
450           ### you want us to slurp the contents
451 0 0 0     if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
452 0         $to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 );
453            
454           ### plain old fetch
455           } else {
456 0         $to = $target;
457            
458           ### On VMS force to VMS format so File::Spec will work.
459 0         $to = VMS::Filespec::vmspath($to) if ON_VMS;
460            
461           ### create the path if it doesn't exist yet ###
462 0 0       unless( -d $to ) {
463 0         eval { mkpath( $to ) };
  0          
464            
465 0 0       return $self->_error(loc("Could not create path '%1'",$to)) if $@;
466           }
467           }
468            
469           ### set passive ftp if required ###
470 0         local $ENV{FTP_PASSIVE} = $FTP_PASSIVE;
471            
472           ### we dont use catfile on win32 because if we are using a cygwin tool
473           ### under cmd.exe they wont understand windows style separators.
474 0         my $out_to = ON_WIN ? $to.'/'.$self->output_file
475           : File::Spec->catfile( $to, $self->output_file );
476            
477 0         for my $method ( @{ $METHODS->{$self->scheme} } ) {
  0          
478 0         my $sub = '_'.$method.'_fetch';
479            
480 0 0       unless( __PACKAGE__->can($sub) ) {
481 0         $self->_error(loc("Cannot call method for '%1' -- WEIRD!",
482           $method));
483 0         next;
484           }
485            
486           ### method is blacklisted ###
487 0 0       next if grep { lc $_ eq $method } @$BLACKLIST;
  0          
488            
489           ### method is known to fail ###
490 0 0       next if $METHOD_FAIL->{$method};
491            
492           ### there's serious issues with IPC::Run and quoting of command
493           ### line arguments. using quotes in the wrong place breaks things,
494           ### and in the case of say,
495           ### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document
496           ### "index.html" "http://www.cpan.org/index.html?q=1&y=2"
497           ### it doesn't matter how you quote, it always fails.
498 0         local $IPC::Cmd::USE_IPC_RUN = 0;
499            
500 0 0       if( my $file = $self->$sub(
501           to => $out_to
502           )){
503            
504 0 0 0     unless( -e $file && -s _ ) {
505 0         $self->_error(loc("'%1' said it fetched '%2', ".
506           "but it was not created",$method,$file));
507            
508           ### mark the failure ###
509 0         $METHOD_FAIL->{$method} = 1;
510            
511 0         next;
512            
513           } else {
514            
515           ### slurp mode?
516 0 0 0     if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) {
517            
518           ### open the file
519 0 0       open my $fh, "<$file" or do {
520 0         $self->_error(
521           loc("Could not open '%1': %2", $file, $!));
522 0         return;
523           };
524            
525           ### slurp
526 0         $$target = do { local $/; <$fh> };
  0          
  0          
527            
528           }
529            
530 0         my $abs = File::Spec->rel2abs( $file );
531 0         return $abs;
532            
533           }
534           }
535           }
536            
537            
538           ### if we got here, we looped over all methods, but we weren't able
539           ### to fetch it.
540 0         return;
541           }
542            
543           ########################
544           ### _*_fetch methods ###
545           ########################
546            
547           ### LWP fetching ###
548           sub _lwp_fetch {
549 0     0   my $self = shift;
550 0         my %hash = @_;
551            
552 0         my ($to);
553 0         my $tmpl = {
554           to => { required => 1, store => \$to }
555           };
556 0 0       check( $tmpl, \%hash ) or return;
557            
558           ### modules required to download with lwp ###
559 0         my $use_list = {
560           LWP => '0.0',
561           'LWP::UserAgent' => '0.0',
562           'HTTP::Request' => '0.0',
563           'HTTP::Status' => '0.0',
564           URI => '0.0',
565            
566           };
567            
568 0 0       unless( can_load( modules => $use_list ) ) {
569 0         $METHOD_FAIL->{'lwp'} = 1;
570 0         return;
571           }
572            
573           ### setup the uri object
574 0         my $uri = URI->new( File::Spec::Unix->catfile(
575           $self->path, $self->file
576           ) );
577            
578           ### special rules apply for file:// uris ###
579 0         $uri->scheme( $self->scheme );
580 0 0       $uri->host( $self->scheme eq 'file' ? '' : $self->host );
581 0 0       $uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file';
582            
583           ### set up the useragent object
584 0         my $ua = LWP::UserAgent->new();
585 0 0       $ua->timeout( $TIMEOUT ) if $TIMEOUT;
586 0         $ua->agent( $USER_AGENT );
587 0         $ua->from( $FROM_EMAIL );
588 0         $ua->env_proxy;
589            
590 0 0       my $res = $ua->mirror($uri, $to) or return;
591            
592           ### uptodate or fetched ok ###
593 0 0 0     if ( $res->code == 304 or $res->code == 200 ) {
594 0         return $to;
595            
596           } else {
597 0         return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]",
598           $res->code, HTTP::Status::status_message($res->code),
599           $res->status_line));
600           }
601            
602           }
603            
604           ### HTTP::Tiny fetching ###
605           sub _httptiny_fetch {
606 0     0   my $self = shift;
607 0         my %hash = @_;
608            
609 0         my ($to);
610 0         my $tmpl = {
611           to => { required => 1, store => \$to }
612           };
613 0 0       check( $tmpl, \%hash ) or return;
614            
615 0         my $use_list = {
616           'HTTP::Tiny' => '0.008',
617            
618           };
619            
620 0 0       unless( can_load(modules => $use_list) ) {
621 0         $METHOD_FAIL->{'httptiny'} = 1;
622 0         return;
623           }
624            
625 0         my $uri = $self->uri;
626            
627 0 0       my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) );
628            
629 0         my $rc = $http->mirror( $uri, $to );
630            
631 0 0       unless ( $rc->{success} ) {
632            
633 0         return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]",
634           $rc->{status}, $rc->{reason} ) );
635            
636           }
637            
638 0         return $to;
639            
640           }
641            
642           ### HTTP::Lite fetching ###
643           sub _httplite_fetch {
644 0     0   my $self = shift;
645 0         my %hash = @_;
646            
647 0         my ($to);
648 0         my $tmpl = {
649           to => { required => 1, store => \$to }
650           };
651 0 0       check( $tmpl, \%hash ) or return;
652            
653           ### modules required to download with lwp ###
654 0         my $use_list = {
655           'HTTP::Lite' => '2.2',
656            
657           };
658            
659 0 0       unless( can_load(modules => $use_list) ) {
660 0         $METHOD_FAIL->{'httplite'} = 1;
661 0         return;
662           }
663            
664 0         my $uri = $self->uri;
665 0         my $retries = 0;
666            
667 0         RETRIES: while ( $retries++ < 5 ) {
668            
669 0         my $http = HTTP::Lite->new();
670           # Naughty naughty but there isn't any accessor/setter
671 0 0       $http->{timeout} = $TIMEOUT if $TIMEOUT;
672 0         $http->http11_mode(1);
673            
674 0         my $fh = FileHandle->new;
675            
676 0 0       unless ( $fh->open($to,'>') ) {
677 0         return $self->_error(loc(
678           "Could not open '%1' for writing: %2",$to,$!));
679           }
680            
681 0         $fh->autoflush(1);
682            
683 0         binmode $fh;
684            
685 0     0   my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh );
  0          
  0          
  0          
  0          
686            
687 0         close $fh;
688            
689 0 0 0     if ( $rc == 301 || $rc == 302 ) {
    0        
690 0         my $loc;
691 0         HEADERS: for ($http->headers_array) {
692 0 0       /Location: (\S+)/ and $loc = $1, last HEADERS;
693           }
694           #$loc or last; # Think we should squeal here.
695 0 0       if ($loc =~ m!^/!) {
696 0         $uri =~ s{^(\w+?://[^/]+)/.*$}{$1};
697 0         $uri .= $loc;
698           }
699           else {
700 0         $uri = $loc;
701           }
702 0         next RETRIES;
703           }
704           elsif ( $rc == 200 ) {
705 0         return $to;
706           }
707           else {
708 0         return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]",
709           $rc, $http->status_message));
710           }
711            
712           } # Loop for 5 retries.
713            
714 0         return $self->_error("Fetch failed! Gave up after 5 tries");
715            
716           }
717            
718           ### Simple IO::Socket::INET fetching ###
719           sub _iosock_fetch {
720 0     0   my $self = shift;
721 0         my %hash = @_;
722            
723 0         my ($to);
724 0         my $tmpl = {
725           to => { required => 1, store => \$to }
726           };
727 0 0       check( $tmpl, \%hash ) or return;
728            
729 0         my $use_list = {
730           'IO::Socket::INET' => '0.0',
731           'IO::Select' => '0.0',
732           };
733            
734 0 0       unless( can_load(modules => $use_list) ) {
735 0         $METHOD_FAIL->{'iosock'} = 1;
736 0         return;
737           }
738            
739 0 0       my $sock = IO::Socket::INET->new(
740           PeerHost => $self->host,
741           ( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ),
742           );
743            
744 0 0       unless ( $sock ) {
745 0         return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!));
746           }
747            
748 0         my $fh = FileHandle->new;
749            
750           # Check open()
751            
752 0 0       unless ( $fh->open($to,'>') ) {
753 0         return $self->_error(loc(
754           "Could not open '%1' for writing: %2",$to,$!));
755           }
756            
757 0         $fh->autoflush(1);
758 0         binmode $fh;
759            
760 0         my $path = File::Spec::Unix->catfile( $self->path, $self->file );
761 0         my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a";
762 0         $sock->send( $req );
763            
764 0         my $select = IO::Select->new( $sock );
765            
766 0         my $resp = '';
767 0         my $normal = 0;
768 0   0     while ( $select->can_read( $TIMEOUT || 60 ) ) {
769 0         my $ret = $sock->sysread( $resp, 4096, length($resp) );
770 0 0 0     if ( !defined $ret or $ret == 0 ) {
771 0         $select->remove( $sock );
772 0         $normal++;
773           }
774           }
775 0         close $sock;
776            
777 0 0       unless ( $normal ) {
778 0   0     return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 )));
779           }
780            
781           # Check the "response"
782           # Strip preceding blank lines apparently they are allowed (RFC 2616 4.1)
783 0         $resp =~ s/^(\x0d?\x0a)+//;
784           # Check it is an HTTP response
785 0 0       unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) {
786 0         return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host));
787           }
788            
789           # Check for OK
790 0         my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i;
791 0 0       unless ( $code eq '200' ) {
792 0         return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host));
793           }
794            
795           {
796 0         local $\;
  0          
797 0         print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0];
798           }
799 0         close $fh;
800 0         return $to;
801           }
802            
803           ### Net::FTP fetching
804           sub _netftp_fetch {
805 0     0   my $self = shift;
806 0         my %hash = @_;
807            
808 0         my ($to);
809 0         my $tmpl = {
810           to => { required => 1, store => \$to }
811           };
812 0 0       check( $tmpl, \%hash ) or return;
813            
814           ### required modules ###
815 0         my $use_list = { 'Net::FTP' => 0 };
816            
817 0 0       unless( can_load( modules => $use_list ) ) {
818 0         $METHOD_FAIL->{'netftp'} = 1;
819 0         return;
820           }
821            
822           ### make connection ###
823 0         my $ftp;
824 0         my @options = ($self->host);
825 0 0       push(@options, Timeout => $TIMEOUT) if $TIMEOUT;
826 0 0       unless( $ftp = Net::FTP->new( @options ) ) {
827 0         return $self->_error(loc("Ftp creation failed: %1",$@));
828           }
829            
830           ### login ###
831 0 0       unless( $ftp->login( anonymous => $FROM_EMAIL ) ) {
832 0         return $self->_error(loc("Could not login to '%1'",$self->host));
833           }
834            
835           ### set binary mode, just in case ###
836 0         $ftp->binary;
837            
838           ### create the remote path
839           ### remember remote paths are unix paths! [#11483]
840 0         my $remote = File::Spec::Unix->catfile( $self->path, $self->file );
841            
842           ### fetch the file ###
843 0         my $target;
844 0 0       unless( $target = $ftp->get( $remote, $to ) ) {
845 0         return $self->_error(loc("Could not fetch '%1' from '%2'",
846           $remote, $self->host));
847           }
848            
849           ### log out ###
850 0         $ftp->quit;
851            
852 0         return $target;
853            
854           }
855            
856           ### /bin/wget fetch ###
857           sub _wget_fetch {
858 0     0   my $self = shift;
859 0         my %hash = @_;
860            
861 0         my ($to);
862 0         my $tmpl = {
863           to => { required => 1, store => \$to }
864           };
865 0 0       check( $tmpl, \%hash ) or return;
866            
867 0         my $wget;
868           ### see if we have a wget binary ###
869 0 0       unless( $wget = can_run('wget') ) {
870 0         $METHOD_FAIL->{'wget'} = 1;
871 0         return;
872           }
873            
874           ### no verboseness, thanks ###
875 0         my $cmd = [ $wget, '--quiet' ];
876            
877           ### if a timeout is set, add it ###
878 0 0       push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
879            
880           ### run passive if specified ###
881 0 0       push @$cmd, '--passive-ftp' if $FTP_PASSIVE;
882            
883           ### set the output document, add the uri ###
884 0         push @$cmd, '--output-document', $to, $self->uri;
885            
886           ### with IPC::Cmd > 0.41, this is fixed in teh library,
887           ### and there's no need for special casing any more.
888           ### DO NOT quote things for IPC::Run, it breaks stuff.
889           # $IPC::Cmd::USE_IPC_RUN
890           # ? ($to, $self->uri)
891           # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
892            
893           ### shell out ###
894 0         my $captured;
895 0 0       unless(run( command => $cmd,
896           buffer => \$captured,
897           verbose => $DEBUG
898           )) {
899           ### wget creates the output document always, even if the fetch
900           ### fails.. so unlink it in that case
901 0         1 while unlink $to;
902            
903 0   0     return $self->_error(loc( "Command failed: %1", $captured || '' ));
904           }
905            
906 0         return $to;
907           }
908            
909           ### /bin/lftp fetch ###
910           sub _lftp_fetch {
911 0     0   my $self = shift;
912 0         my %hash = @_;
913            
914 0         my ($to);
915 0         my $tmpl = {
916           to => { required => 1, store => \$to }
917           };
918 0 0       check( $tmpl, \%hash ) or return;
919            
920           ### see if we have a lftp binary ###
921 0         my $lftp;
922 0 0       unless( $lftp = can_run('lftp') ) {
923 0         $METHOD_FAIL->{'lftp'} = 1;
924 0         return;
925           }
926            
927           ### no verboseness, thanks ###
928 0         my $cmd = [ $lftp, '-f' ];
929            
930 0         my $fh = File::Temp->new;
931            
932 0         my $str;
933            
934           ### if a timeout is set, add it ###
935 0 0       $str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT;
936            
937           ### run passive if specified ###
938 0 0       $str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE;
939            
940           ### set the output document, add the uri ###
941           ### quote the URI, because lftp supports certain shell
942           ### expansions, most notably & for backgrounding.
943           ### ' quote does nto work, must be "
944 0         $str .= q[get ']. $self->uri .q[' -o ]. $to . $/;
945            
946 0 0       if( $DEBUG ) {
947 0         my $pp_str = join ' ', split $/, $str;
948 0         print "# lftp command: $pp_str\n";
949           }
950            
951           ### write straight to the file.
952 0         $fh->autoflush(1);
953 0         print $fh $str;
954            
955           ### the command needs to be 1 string to be executed
956 0         push @$cmd, $fh->filename;
957            
958           ### with IPC::Cmd > 0.41, this is fixed in teh library,
959           ### and there's no need for special casing any more.
960           ### DO NOT quote things for IPC::Run, it breaks stuff.
961           # $IPC::Cmd::USE_IPC_RUN
962           # ? ($to, $self->uri)
963           # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
964            
965            
966           ### shell out ###
967 0         my $captured;
968 0 0       unless(run( command => $cmd,
969           buffer => \$captured,
970           verbose => $DEBUG
971           )) {
972           ### wget creates the output document always, even if the fetch
973           ### fails.. so unlink it in that case
974 0         1 while unlink $to;
975            
976 0   0     return $self->_error(loc( "Command failed: %1", $captured || '' ));
977           }
978            
979 0         return $to;
980           }
981            
982            
983            
984           ### /bin/ftp fetch ###
985           sub _ftp_fetch {
986 0     0   my $self = shift;
987 0         my %hash = @_;
988            
989 0         my ($to);
990 0         my $tmpl = {
991           to => { required => 1, store => \$to }
992           };
993 0 0       check( $tmpl, \%hash ) or return;
994            
995           ### see if we have a ftp binary ###
996 0         my $ftp;
997 0 0       unless( $ftp = can_run('ftp') ) {
998 0         $METHOD_FAIL->{'ftp'} = 1;
999 0         return;
1000           }
1001            
1002 0         my $fh = FileHandle->new;
1003            
1004 0         local $SIG{CHLD} = 'IGNORE';
1005            
1006 0 0       unless ($fh->open("$ftp -n", '|-')) {
1007 0         return $self->_error(loc("%1 creation failed: %2", $ftp, $!));
1008           }
1009            
1010 0         my @dialog = (
1011           "lcd " . dirname($to),
1012           "open " . $self->host,
1013           "user anonymous $FROM_EMAIL",
1014           "cd /",
1015           "cd " . $self->path,
1016           "binary",
1017           "get " . $self->file . " " . $self->output_file,
1018           "quit",
1019           );
1020            
1021 0         foreach (@dialog) { $fh->print($_, "\n") }
  0          
1022 0 0       $fh->close or return;
1023            
1024 0         return $to;
1025           }
1026            
1027           ### lynx is stupid - it decompresses any .gz file it finds to be text
1028           ### use /bin/lynx to fetch files
1029           sub _lynx_fetch {
1030 0     0   my $self = shift;
1031 0         my %hash = @_;
1032            
1033 0         my ($to);
1034 0         my $tmpl = {
1035           to => { required => 1, store => \$to }
1036           };
1037 0 0       check( $tmpl, \%hash ) or return;
1038            
1039           ### see if we have a lynx binary ###
1040 0         my $lynx;
1041 0 0       unless ( $lynx = can_run('lynx') ){
1042 0         $METHOD_FAIL->{'lynx'} = 1;
1043 0         return;
1044           }
1045            
1046 0 0       unless( IPC::Cmd->can_capture_buffer ) {
1047 0         $METHOD_FAIL->{'lynx'} = 1;
1048            
1049 0         return $self->_error(loc(
1050           "Can not capture buffers. Can not use '%1' to fetch files",
1051           'lynx' ));
1052           }
1053            
1054           ### check if the HTTP resource exists ###
1055 0 0       if ($self->uri =~ /^https?:\/\//i) {
1056 0         my $cmd = [
1057           $lynx,
1058           '-head',
1059           '-source',
1060           "-auth=anonymous:$FROM_EMAIL",
1061           ];
1062            
1063 0 0       push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1064            
1065 0         push @$cmd, $self->uri;
1066            
1067           ### shell out ###
1068 0         my $head;
1069 0 0       unless(run( command => $cmd,
1070           buffer => \$head,
1071           verbose => $DEBUG )
1072           ) {
1073 0   0     return $self->_error(loc("Command failed: %1", $head || ''));
1074           }
1075            
1076 0 0       unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) {
1077 0   0     return $self->_error(loc("Command failed: %1", $head || ''));
1078           }
1079           }
1080            
1081           ### write to the output file ourselves, since lynx ass_u_mes to much
1082 0 0       my $local = FileHandle->new( $to, 'w' )
1083           or return $self->_error(loc(
1084           "Could not open '%1' for writing: %2",$to,$!));
1085            
1086           ### dump to stdout ###
1087 0         my $cmd = [
1088           $lynx,
1089           '-source',
1090           "-auth=anonymous:$FROM_EMAIL",
1091           ];
1092            
1093 0 0       push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT;
1094            
1095           ### DO NOT quote things for IPC::Run, it breaks stuff.
1096 0         push @$cmd, $self->uri;
1097            
1098           ### with IPC::Cmd > 0.41, this is fixed in teh library,
1099           ### and there's no need for special casing any more.
1100           ### DO NOT quote things for IPC::Run, it breaks stuff.
1101           # $IPC::Cmd::USE_IPC_RUN
1102           # ? $self->uri
1103           # : QUOTE. $self->uri .QUOTE;
1104            
1105            
1106           ### shell out ###
1107 0         my $captured;
1108 0 0       unless(run( command => $cmd,
1109           buffer => \$captured,
1110           verbose => $DEBUG )
1111           ) {
1112 0   0     return $self->_error(loc("Command failed: %1", $captured || ''));
1113           }
1114            
1115           ### print to local file ###
1116           ### XXX on a 404 with a special error page, $captured will actually
1117           ### hold the contents of that page, and make it *appear* like the
1118           ### request was a success, when really it wasn't :(
1119           ### there doesn't seem to be an option for lynx to change the exit
1120           ### code based on a 4XX status or so.
1121           ### the closest we can come is using --error_file and parsing that,
1122           ### which is very unreliable ;(
1123 0         $local->print( $captured );
1124 0 0       $local->close or return;
1125            
1126 0         return $to;
1127           }
1128            
1129           ### use /bin/ncftp to fetch files
1130           sub _ncftp_fetch {
1131 0     0   my $self = shift;
1132 0         my %hash = @_;
1133            
1134 0         my ($to);
1135 0         my $tmpl = {
1136           to => { required => 1, store => \$to }
1137           };
1138 0 0       check( $tmpl, \%hash ) or return;
1139            
1140           ### we can only set passive mode in interactive sessions, so bail out
1141           ### if $FTP_PASSIVE is set
1142 0 0       return if $FTP_PASSIVE;
1143            
1144           ### see if we have a ncftp binary ###
1145 0         my $ncftp;
1146 0 0       unless( $ncftp = can_run('ncftp') ) {
1147 0         $METHOD_FAIL->{'ncftp'} = 1;
1148 0         return;
1149           }
1150            
1151 0 0       my $cmd = [
1152           $ncftp,
1153           '-V', # do not be verbose
1154           '-p', $FROM_EMAIL, # email as password
1155           $self->host, # hostname
1156           dirname($to), # local dir for the file
1157           # remote path to the file
1158           ### DO NOT quote things for IPC::Run, it breaks stuff.
1159           $IPC::Cmd::USE_IPC_RUN
1160           ? File::Spec::Unix->catdir( $self->path, $self->file )
1161           : QUOTE. File::Spec::Unix->catdir(
1162           $self->path, $self->file ) .QUOTE
1163            
1164           ];
1165            
1166           ### shell out ###
1167 0         my $captured;
1168 0 0       unless(run( command => $cmd,
1169           buffer => \$captured,
1170           verbose => $DEBUG )
1171           ) {
1172 0   0     return $self->_error(loc("Command failed: %1", $captured || ''));
1173           }
1174            
1175 0         return $to;
1176            
1177           }
1178            
1179           ### use /bin/curl to fetch files
1180           sub _curl_fetch {
1181 0     0   my $self = shift;
1182 0         my %hash = @_;
1183            
1184 0         my ($to);
1185 0         my $tmpl = {
1186           to => { required => 1, store => \$to }
1187           };
1188 0 0       check( $tmpl, \%hash ) or return;
1189 0         my $curl;
1190 0 0       unless ( $curl = can_run('curl') ) {
1191 0         $METHOD_FAIL->{'curl'} = 1;
1192 0         return;
1193           }
1194            
1195           ### these long opts are self explanatory - I like that -jmb
1196 0         my $cmd = [ $curl, '-q' ];
1197            
1198 0 0       push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT;
1199            
1200 0 0       push(@$cmd, '--silent') unless $DEBUG;
1201            
1202           ### curl does the right thing with passive, regardless ###
1203 0 0       if ($self->scheme eq 'ftp') {
1204 0         push(@$cmd, '--user', "anonymous:$FROM_EMAIL");
1205           }
1206            
1207           ### curl doesn't follow 302 (temporarily moved) etc automatically
1208           ### so we add --location to enable that.
1209 0         push @$cmd, '--fail', '--location', '--output', $to, $self->uri;
1210            
1211           ### with IPC::Cmd > 0.41, this is fixed in teh library,
1212           ### and there's no need for special casing any more.
1213           ### DO NOT quote things for IPC::Run, it breaks stuff.
1214           # $IPC::Cmd::USE_IPC_RUN
1215           # ? ($to, $self->uri)
1216           # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1217            
1218            
1219 0         my $captured;
1220 0 0       unless(run( command => $cmd,
1221           buffer => \$captured,
1222           verbose => $DEBUG )
1223           ) {
1224            
1225 0   0     return $self->_error(loc("Command failed: %1", $captured || ''));
1226           }
1227            
1228 0         return $to;
1229            
1230           }
1231            
1232           ### /usr/bin/fetch fetch! ###
1233           sub _fetch_fetch {
1234 0     0   my $self = shift;
1235 0         my %hash = @_;
1236            
1237 0         my ($to);
1238 0         my $tmpl = {
1239           to => { required => 1, store => \$to }
1240           };
1241 0 0       check( $tmpl, \%hash ) or return;
1242            
1243           ### see if we have a fetch binary ###
1244 0         my $fetch;
1245 0 0 0     unless( HAS_FETCH and $fetch = can_run('fetch') ) {
1246 0         $METHOD_FAIL->{'fetch'} = 1;
1247 0         return;
1248           }
1249            
1250           ### no verboseness, thanks ###
1251 0         my $cmd = [ $fetch, '-q' ];
1252            
1253           ### if a timeout is set, add it ###
1254 0 0       push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT;
1255            
1256           ### run passive if specified ###
1257           #push @$cmd, '-p' if $FTP_PASSIVE;
1258 0 0       local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE;
1259            
1260           ### set the output document, add the uri ###
1261 0         push @$cmd, '-o', $to, $self->uri;
1262            
1263           ### with IPC::Cmd > 0.41, this is fixed in teh library,
1264           ### and there's no need for special casing any more.
1265           ### DO NOT quote things for IPC::Run, it breaks stuff.
1266           # $IPC::Cmd::USE_IPC_RUN
1267           # ? ($to, $self->uri)
1268           # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1269            
1270           ### shell out ###
1271 0         my $captured;
1272 0 0       unless(run( command => $cmd,
1273           buffer => \$captured,
1274           verbose => $DEBUG
1275           )) {
1276           ### wget creates the output document always, even if the fetch
1277           ### fails.. so unlink it in that case
1278 0         1 while unlink $to;
1279            
1280 0   0     return $self->_error(loc( "Command failed: %1", $captured || '' ));
1281           }
1282            
1283 0         return $to;
1284           }
1285            
1286           ### use File::Copy for fetching file:// urls ###
1287           ###
1288           ### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html)
1289           ### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://)
1290           ###
1291            
1292           sub _file_fetch {
1293 0     0   my $self = shift;
1294 0         my %hash = @_;
1295            
1296 0         my ($to);
1297 0         my $tmpl = {
1298           to => { required => 1, store => \$to }
1299           };
1300 0 0       check( $tmpl, \%hash ) or return;
1301            
1302            
1303            
1304           ### prefix a / on unix systems with a file uri, since it would
1305           ### look somewhat like this:
1306           ### file:///home/kane/file
1307           ### whereas windows file uris for 'c:\some\dir\file' might look like:
1308           ### file:///C:/some/dir/file
1309           ### file:///C|/some/dir/file
1310           ### or for a network share '\\host\share\some\dir\file':
1311           ### file:////host/share/some/dir/file
1312           ###
1313           ### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like:
1314           ### file://vms.host.edu/disk$user/my/notes/note12345.txt
1315           ###
1316            
1317 0         my $path = $self->path;
1318 0         my $vol = $self->vol;
1319 0         my $share = $self->share;
1320            
1321 0         my $remote;
1322 0 0 0     if (!$share and $self->host) {
1323 0         return $self->_error(loc(
1324           "Currently %1 cannot handle hosts in %2 urls",
1325           'File::Fetch', 'file://'
1326           ));
1327           }
1328            
1329 0 0       if( $vol ) {
    0        
1330 0         $path = File::Spec->catdir( split /\//, $path );
1331 0         $remote = File::Spec->catpath( $vol, $path, $self->file);
1332            
1333           } elsif( $share ) {
1334           ### win32 specific, and a share name, so we wont bother with File::Spec
1335 0         $path =~ s|/+|\\|g;
1336 0         $remote = "\\\\".$self->host."\\$share\\$path";
1337            
1338           } else {
1339           ### File::Spec on VMS can not currently handle UNIX syntax.
1340 0         my $file_class = ON_VMS
1341           ? 'File::Spec::Unix'
1342           : 'File::Spec';
1343            
1344 0         $remote = $file_class->catfile( $path, $self->file );
1345           }
1346            
1347           ### File::Copy is littered with 'die' statements :( ###
1348 0         my $rv = eval { File::Copy::copy( $remote, $to ) };
  0          
1349            
1350           ### something went wrong ###
1351 0 0 0     if( !$rv or $@ ) {
1352 0         return $self->_error(loc("Could not copy '%1' to '%2': %3 %4",
1353           $remote, $to, $!, $@));
1354           }
1355            
1356 0         return $to;
1357           }
1358            
1359           ### use /usr/bin/rsync to fetch files
1360           sub _rsync_fetch {
1361 0     0   my $self = shift;
1362 0         my %hash = @_;
1363            
1364 0         my ($to);
1365 0         my $tmpl = {
1366           to => { required => 1, store => \$to }
1367           };
1368 0 0       check( $tmpl, \%hash ) or return;
1369 0         my $rsync;
1370 0 0       unless ( $rsync = can_run('rsync') ) {
1371 0         $METHOD_FAIL->{'rsync'} = 1;
1372 0         return;
1373           }
1374            
1375 0         my $cmd = [ $rsync ];
1376            
1377           ### XXX: rsync has no I/O timeouts at all, by default
1378 0 0       push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1379            
1380 0 0       push(@$cmd, '--quiet') unless $DEBUG;
1381            
1382           ### DO NOT quote things for IPC::Run, it breaks stuff.
1383 0         push @$cmd, $self->uri, $to;
1384            
1385           ### with IPC::Cmd > 0.41, this is fixed in teh library,
1386           ### and there's no need for special casing any more.
1387           ### DO NOT quote things for IPC::Run, it breaks stuff.
1388           # $IPC::Cmd::USE_IPC_RUN
1389           # ? ($to, $self->uri)
1390           # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1391            
1392 0         my $captured;
1393 0 0       unless(run( command => $cmd,
1394           buffer => \$captured,
1395           verbose => $DEBUG )
1396           ) {
1397            
1398 0   0     return $self->_error(loc("Command %1 failed: %2",
      0      
1399           "@$cmd" || '', $captured || ''));
1400           }
1401            
1402 0         return $to;
1403            
1404           }
1405            
1406           ### use git to fetch files
1407           sub _git_fetch {
1408 0     0   my $self = shift;
1409 0         my %hash = @_;
1410            
1411 0         my ($to);
1412 0         my $tmpl = {
1413           to => { required => 1, store => \$to }
1414           };
1415 0 0       check( $tmpl, \%hash ) or return;
1416 0         my $git;
1417 0 0       unless ( $git = can_run('git') ) {
1418 0         $METHOD_FAIL->{'git'} = 1;
1419 0         return;
1420           }
1421            
1422 0         my $cmd = [ $git, 'clone' ];
1423            
1424           #push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT;
1425            
1426 0 0       push(@$cmd, '--quiet') unless $DEBUG;
1427            
1428           ### DO NOT quote things for IPC::Run, it breaks stuff.
1429 0         push @$cmd, $self->uri, $to;
1430            
1431           ### with IPC::Cmd > 0.41, this is fixed in teh library,
1432           ### and there's no need for special casing any more.
1433           ### DO NOT quote things for IPC::Run, it breaks stuff.
1434           # $IPC::Cmd::USE_IPC_RUN
1435           # ? ($to, $self->uri)
1436           # : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE);
1437            
1438 0         my $captured;
1439 0 0       unless(run( command => $cmd,
1440           buffer => \$captured,
1441           verbose => $DEBUG )
1442           ) {
1443            
1444 0   0     return $self->_error(loc("Command %1 failed: %2",
      0      
1445           "@$cmd" || '', $captured || ''));
1446           }
1447            
1448 0         return $to;
1449            
1450           }
1451            
1452           #################################
1453           #
1454           # Error code
1455           #
1456           #################################
1457            
1458           =pod
1459            
1460           =head2 $ff->error([BOOL])
1461            
1462           Returns the last encountered error as string.
1463           Pass it a true value to get the C output instead.
1464            
1465           =cut
1466            
1467           ### error handling the way Archive::Extract does it
1468           sub _error {
1469 0     0   my $self = shift;
1470 0         my $error = shift;
1471            
1472 0         $self->_error_msg( $error );
1473 0         $self->_error_msg_long( Carp::longmess($error) );
1474            
1475 0 0       if( $WARN ) {
1476 0 0       carp $DEBUG ? $self->_error_msg_long : $self->_error_msg;
1477           }
1478            
1479 0         return;
1480           }
1481            
1482           sub error {
1483 0     0   my $self = shift;
1484 0 0       return shift() ? $self->_error_msg_long : $self->_error_msg;
1485           }
1486            
1487            
1488           1;
1489            
1490           =pod
1491            
1492           =head1 HOW IT WORKS
1493            
1494           File::Fetch is able to fetch a variety of uris, by using several
1495           external programs and modules.
1496            
1497           Below is a mapping of what utilities will be used in what order
1498           for what schemes, if available:
1499            
1500           file => LWP, lftp, file
1501           http => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock
1502           ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp
1503           rsync => rsync
1504           git => git
1505            
1506           If you'd like to disable the use of one or more of these utilities
1507           and/or modules, see the C<$BLACKLIST> variable further down.
1508            
1509           If a utility or module isn't available, it will be marked in a cache
1510           (see the C<$METHOD_FAIL> variable further down), so it will not be
1511           tried again. The C method will only fail when all options are
1512           exhausted, and it was not able to retrieve the file.
1513            
1514           The C utility is available on FreeBSD. NetBSD and Dragonfly BSD
1515           may also have it from C. We only check for C on those
1516           three platforms.
1517            
1518           C is a very limited L based mechanism for
1519           retrieving C schemed urls. It doesn't follow redirects for instance.
1520            
1521           C only supports C style urls.
1522            
1523           A special note about fetching files from an ftp uri:
1524            
1525           By default, all ftp connections are done in passive mode. To change
1526           that, see the C<$FTP_PASSIVE> variable further down.
1527            
1528           Furthermore, ftp uris only support anonymous connections, so no
1529           named user/password pair can be passed along.
1530            
1531           C is blacklisted by default; see the C<$BLACKLIST> variable
1532           further down.
1533            
1534           =head1 GLOBAL VARIABLES
1535            
1536           The behaviour of File::Fetch can be altered by changing the following
1537           global variables:
1538            
1539           =head2 $File::Fetch::FROM_EMAIL
1540            
1541           This is the email address that will be sent as your anonymous ftp
1542           password.
1543            
1544           Default is C.
1545            
1546           =head2 $File::Fetch::USER_AGENT
1547            
1548           This is the useragent as C will report it.
1549            
1550           Default is C.
1551            
1552           =head2 $File::Fetch::FTP_PASSIVE
1553            
1554           This variable controls whether the environment variable C
1555           and any passive switches to commandline tools will be set to true.
1556            
1557           Default value is 1.
1558            
1559           Note: When $FTP_PASSIVE is true, C will not be used to fetch
1560           files, since passive mode can only be set interactively for this binary
1561            
1562           =head2 $File::Fetch::TIMEOUT
1563            
1564           When set, controls the network timeout (counted in seconds).
1565            
1566           Default value is 0.
1567            
1568           =head2 $File::Fetch::WARN
1569            
1570           This variable controls whether errors encountered internally by
1571           C should be C'd or not.
1572            
1573           Set to false to silence warnings. Inspect the output of the C
1574           method manually to see what went wrong.
1575            
1576           Defaults to C.
1577            
1578           =head2 $File::Fetch::DEBUG
1579            
1580           This enables debugging output when calling commandline utilities to
1581           fetch files.
1582           This also enables C errors, instead of the regular
1583           C errors.
1584            
1585           Good for tracking down why things don't work with your particular
1586           setup.
1587            
1588           Default is 0.
1589            
1590           =head2 $File::Fetch::BLACKLIST
1591            
1592           This is an array ref holding blacklisted modules/utilities for fetching
1593           files with.
1594            
1595           To disallow the use of, for example, C and C, you could
1596           set $File::Fetch::BLACKLIST to:
1597            
1598           $File::Fetch::BLACKLIST = [qw|lwp netftp|]
1599            
1600           The default blacklist is [qw|ftp|], as C is rather unreliable.
1601            
1602           See the note on C below.
1603            
1604           =head2 $File::Fetch::METHOD_FAIL
1605            
1606           This is a hashref registering what modules/utilities were known to fail
1607           for fetching files (mostly because they weren't installed).
1608            
1609           You can reset this cache by assigning an empty hashref to it, or
1610           individually remove keys.
1611            
1612           See the note on C below.
1613            
1614           =head1 MAPPING
1615            
1616            
1617           Here's a quick mapping for the utilities/modules, and their names for
1618           the $BLACKLIST, $METHOD_FAIL and other internal functions.
1619            
1620           LWP => lwp
1621           HTTP::Lite => httplite
1622           HTTP::Tiny => httptiny
1623           Net::FTP => netftp
1624           wget => wget
1625           lynx => lynx
1626           ncftp => ncftp
1627           ftp => ftp
1628           curl => curl
1629           rsync => rsync
1630           lftp => lftp
1631           fetch => fetch
1632           IO::Socket => iosock
1633            
1634           =head1 FREQUENTLY ASKED QUESTIONS
1635            
1636           =head2 So how do I use a proxy with File::Fetch?
1637            
1638           C currently only supports proxies with LWP::UserAgent.
1639           You will need to set your environment variables accordingly. For
1640           example, to use an ftp proxy:
1641            
1642           $ENV{ftp_proxy} = 'foo.com';
1643            
1644           Refer to the LWP::UserAgent manpage for more details.
1645            
1646           =head2 I used 'lynx' to fetch a file, but its contents is all wrong!
1647            
1648           C can only fetch remote files by dumping its contents to C,
1649           which we in turn capture. If that content is a 'custom' error file
1650           (like, say, a C<404 handler>), you will get that contents instead.
1651            
1652           Sadly, C doesn't support any options to return a different exit
1653           code on non-C<200 OK> status, giving us no way to tell the difference
1654           between a 'successful' fetch and a custom error page.
1655            
1656           Therefor, we recommend to only use C as a last resort. This is
1657           why it is at the back of our list of methods to try as well.
1658            
1659           =head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do?
1660            
1661           C is relatively smart about things. When trying to write
1662           a file to disk, it removes the C (see the
1663           C method for details) from the file name before creating
1664           it. In most cases this suffices.
1665            
1666           If you have any other characters you need to escape, please install
1667           the C module from CPAN, and pre-encode your URI before
1668           passing it to C. You can read about the details of URIs
1669           and URI encoding here:
1670            
1671           http://www.faqs.org/rfcs/rfc2396.html
1672            
1673           =head1 TODO
1674            
1675           =over 4
1676            
1677           =item Implement $PREFER_BIN
1678            
1679           To indicate to rather use commandline tools than modules
1680            
1681           =back
1682            
1683           =head1 BUG REPORTS
1684            
1685           Please report bugs or other issues to Ebug-file-fetch@rt.cpan.org.
1686            
1687           =head1 AUTHOR
1688            
1689           This module by Jos Boumans Ekane@cpan.orgE.
1690            
1691           =head1 COPYRIGHT
1692            
1693           This library is free software; you may redistribute and/or modify it
1694           under the same terms as Perl itself.
1695            
1696            
1697           =cut
1698            
1699           # Local variables:
1700           # c-indentation-style: bsd
1701           # c-basic-offset: 4
1702           # indent-tabs-mode: nil
1703           # End:
1704           # vim: expandtab shiftwidth=4:
1705            
1706            
1707            
1708