File Coverage

lib/CPANPLUS/Internals/Fetch.pm
Criterion Covered Total %
statement 119 134 88.8
branch 33 48 68.7
condition 9 17 52.9
subroutine 15 15 100.0
pod n/a
total 176 214 82.2


line stmt bran cond sub pod time code
1             package CPANPLUS::Internals::Fetch;
2              
3 20     20   164 use strict;
  20         49  
  20         958  
4              
5 20     20   120 use CPANPLUS::Error;
  20         55  
  20         1677  
6 20     20   161 use CPANPLUS::Internals::Constants;
  20         70  
  20         10281  
7              
8 20     20   15687 use File::Fetch;
  20         213330  
  20         1064  
9 20     20   166 use File::Spec;
  20         45  
  20         560  
10 20     20   106 use Cwd qw[cwd];
  20         48  
  20         1252  
11 20     20   117 use IPC::Cmd qw[run];
  20         41  
  20         871  
12 20     20   113 use Params::Check qw[check];
  20         42  
  20         933  
13 20     20   116 use Module::Load::Conditional qw[can_load];
  20         36  
  20         990  
14 20     20   121 use Locale::Maketext::Simple Class => 'CPANPLUS', Style => 'gettext';
  20         42  
  20         122  
15 20     20   5409 use vars qw[$VERSION];
  20         43  
  20         34464  
16             $VERSION = "0.9916";
17              
18             $Params::Check::VERBOSE = 1;
19              
20             =pod
21              
22             =head1 NAME
23              
24             CPANPLUS::Internals::Fetch - internals for fetching files
25              
26             =head1 SYNOPSIS
27              
28             my $output = $cb->_fetch(
29             module => $modobj,
30             fetchdir => '/path/to/save/to',
31             verbose => BOOL,
32             force => BOOL,
33             );
34              
35             $cb->_add_fail_host( host => 'foo.com' );
36             $cb->_host_ok( host => 'foo.com' );
37              
38              
39             =head1 DESCRIPTION
40              
41             CPANPLUS::Internals::Fetch fetches files from either ftp, http, file
42             or rsync mirrors.
43              
44             This is the rough flow:
45              
46             $cb->_fetch
47             Delegate to File::Fetch;
48              
49              
50             =head1 METHODS
51              
52             =cut
53              
54             =head1 $path = _fetch( module => $modobj, [fetchdir => '/path/to/save/to', fetch_from => 'scheme://path/to/fetch/from', verbose => BOOL, force => BOOL, prefer_bin => BOOL, ttl => $seconds] )
55              
56             C<_fetch> will fetch files based on the information in a module
57             object. You always need a module object. If you want a fake module
58             object for a one-off fetch, look at C.
59              
60             C is the place to save the file to. Usually this
61             information comes from your configuration, but you can override it
62             expressly if needed.
63              
64             C lets you specify an URI to get this file from. If you
65             do not specify one, your list of configured hosts will be probed to
66             download the file from.
67              
68             C forces a new download, even if the file already exists.
69              
70             C simply indicates whether or not to print extra messages.
71              
72             C indicates whether you prefer the use of commandline
73             programs over perl modules. Defaults to your corresponding config
74             setting.
75              
76             C (in seconds) indicates how long a cached copy is valid for. If
77             the fetch time of the local copy is within the ttl, the cached copy is
78             returned. Otherwise, the file is refetched.
79              
80             C<_fetch> figures out, based on the host list, what scheme to use and
81             from there, delegates to C do the actual fetching.
82              
83             Returns the path of the output file on success, false on failure.
84              
85             Note that you can set a C on certain methods in the config.
86             Simply add the identifying name of the method (ie, C) to:
87             $conf->_set_fetch( blacklist => ['lwp'] );
88              
89             And the C function will be skipped by C.
90              
91             =cut
92              
93             sub _fetch {
94 85     85   1042 my $self = shift;
95 85         399 my $conf = $self->configure_object;
96 85         551 my %hash = @_;
97              
98 85         251 local $Params::Check::NO_DUPLICATES = 0;
99              
100 85         216 my ($modobj, $verbose, $force, $fetch_from, $ttl);
101 85         1322 my $tmpl = {
102             module => { required => 1, allow => IS_MODOBJ, store => \$modobj },
103             fetchdir => { default => $conf->get_conf('fetchdir') },
104             fetch_from => { default => '', store => \$fetch_from },
105             force => { default => $conf->get_conf('force'),
106             store => \$force },
107             verbose => { default => $conf->get_conf('verbose'),
108             store => \$verbose },
109             prefer_bin => { default => $conf->get_conf('prefer_bin') },
110             ttl => { default => 0, store => \$ttl },
111             };
112              
113              
114 85 50       641 my $args = check( $tmpl, \%hash ) or return;
115              
116             ### check if we already downloaded the thing ###
117 85 50 66     5396 if( (my $where = $modobj->status->fetch()) and not $force and not $ttl ) {
      33        
118              
119 0         0 msg(loc("Already fetched '%1' to '%2', " .
120             "won't fetch again without force",
121             $modobj->module, $where ), $verbose );
122 0         0 return $where;
123             }
124              
125 85         9184 my ($remote_file, $local_file, $local_path);
126              
127             ### build the local path to download to ###
128             {
129 85         191 $local_path = $args->{fetchdir} ||
130 85   66     1145 File::Spec->catdir(
131             $conf->get_conf('base'),
132             $modobj->path,
133             );
134              
135             ### create the path if it doesn't exist ###
136 85 100       3785 unless( -d $local_path ) {
137 13 50       292 unless( $self->_mkdir( dir => $local_path ) ) {
138 0         0 msg( loc("Could not create path '%1'", $local_path), $verbose);
139 0         0 return;
140             }
141             }
142              
143 85         726 $local_file = File::Spec->rel2abs(
144             File::Spec->catfile(
145             $local_path,
146             $modobj->package,
147             )
148             );
149              
150             ### do we already have the file? if so, can we use the cached version,
151             ### or do we need to refetch?
152 85 100       2736 if( -e $local_file ) {
153              
154 28         127 my $unlink = 0;
155 28         95 my $use_cached = 0;
156              
157             ### if force is in effect, we have to refetch
158 28 100 66     682 if( $force ) {
    100          
    50          
159 9         31 $unlink++
160              
161             ### if you provided a ttl, and it was exceeded, we'll refetch,
162             } elsif( $ttl and ([stat $local_file]->[9] + $ttl > time) ) {
163 15         196 msg(loc("Using cached file '%1' on disk; ".
164             "ttl (%2s) is not exceeded",
165             $local_file, $ttl), $verbose );
166              
167 15         271 $use_cached++;
168              
169             ### if you provided a ttl, and the above conditional didn't match,
170             ### we exceeded the ttl, so we refetch
171             } elsif ( $ttl ) {
172 0         0 $unlink++;
173              
174             ### otherwise we can use the cached version
175             } else {
176 4         22 $use_cached++;
177             }
178              
179 28 100       197 if( $unlink ) {
180             ### some fetches will fail if the files exist already, so let's
181             ### delete them first
182 9         1866 1 while unlink $local_file;
183              
184 9 50       159 msg(loc("Could not delete %1, some methods may " .
185             "fail to force a download", $local_file), $verbose)
186             if -e $local_file;
187              
188             } else {
189              
190             ### store where we fetched it ###
191 19         158 $modobj->status->fetch( $local_file );
192              
193 19         3325 return $local_file;
194             }
195             }
196             }
197              
198              
199             ### we got a custom URI
200 66 100       286 if ( $fetch_from ) {
201 2         23 my $abs = $self->__file_fetch( from => $fetch_from,
202             to => $local_path,
203             verbose => $verbose );
204              
205 2 50       35 unless( $abs ) {
206 0         0 error(loc("Unable to download '%1'", $fetch_from));
207 0         0 return;
208             }
209              
210             ### store where we fetched it ###
211 2         130 $modobj->status->fetch( $abs );
212              
213 2         613 return $abs;
214              
215             ### we will get it from one of our mirrors
216             } else {
217             ### build the remote path to download from ###
218 64         131 { $remote_file = File::Spec::Unix->catfile(
  64         540  
219             $modobj->path,
220             $modobj->package,
221             );
222 64 50       305 unless( $remote_file ) {
223 0         0 error( loc('No remote file given for download') );
224 0         0 return;
225             }
226             }
227              
228             ### see if we even have a host or a method to use to download with ###
229 64         181 my $found_host;
230             my @maybe_bad_host;
231              
232             HOST: {
233             ### F*CKING PIECE OF F*CKING p4 SHIT makes
234             ### '$File :: Fetch::SOME_VAR'
235             ### into a meta variable and starts substituting the file name...
236             ### GRAAAAAAAAAAAAAAAAAAAAAAH!
237             ### use ' to combat it!
238              
239             ### set up some flags for File::Fetch ###
240 64         123 local $File::Fetch::BLACKLIST = $conf->_get_fetch('blacklist');
  64         707  
241 64         565 local $File::Fetch::TIMEOUT = $conf->get_conf('timeout');
242 64         431 local $File::Fetch::DEBUG = $conf->get_conf('debug');
243 64         418 local $File::Fetch::FTP_PASSIVE = $conf->get_conf('passive');
244 64         423 local $File::Fetch::FROM_EMAIL = $conf->get_conf('email');
245 64         477 local $File::Fetch::PREFER_BIN = $conf->get_conf('prefer_bin');
246 64         265 local $File::Fetch::WARN = $verbose;
247              
248              
249             ### loop over all hosts we have ###
250 64         149 for my $host ( @{$conf->get_conf('hosts')} ) {
  64         397  
251 65         181 $found_host++;
252              
253 65         157 my $where;
254              
255             ### file:// uris are special and need parsing
256 65 50       351 if( $host->{'scheme'} eq 'file' ) {
257              
258             ### the full path in the native format of the OS
259             my $host_spec =
260             File::Spec->file_name_is_absolute( $host->{'path'} )
261             ? $host->{'path'}
262 65 100       1108 : File::Spec->rel2abs( $host->{'path'} );
263              
264             ### there might be volumes involved on vms/win32
265 65         190 if( ON_WIN32 or ON_VMS ) {
266              
267             ### now extract the volume in order to be Win32 and
268             ### VMS friendly.
269             ### 'no_file' indicates that there's no file part
270             ### of this path, so we only get 2 bits returned.
271             my ($vol, $host_path) = File::Spec->splitpath(
272             $host_spec, 'no_file'
273             );
274              
275             ### and split up the directories
276             my @host_dirs = File::Spec->splitdir( $host_path );
277              
278             ### if we got a volume we pretend its a directory for
279             ### the sake of the file:// url
280             if( defined $vol and $vol ) {
281              
282             ### D:\foo\bar needs to be encoded as D|\foo\bar
283             ### For details, see the following link:
284             ### http://en.wikipedia.org/wiki/File://
285             ### The RFC doesn't seem to address Windows volume
286             ### descriptors but it does address VMS volume
287             ### descriptors, however wikipedia covers a bit of
288             ### history regarding win32
289             $vol =~ s/:$/|/ if ON_WIN32;
290              
291             $vol =~ s/:// if ON_VMS;
292              
293             ### XXX i'm not sure what cases this is addressing.
294             ### this comes straight from dmq's file:// patches
295             ### for win32. --kane
296             ### According to dmq, the best summary is:
297             ### "if file:// urls don't look right on VMS reuse
298             ### the win32 logic and see if that fixes things"
299              
300             ### first element not empty? Might happen on VMS.
301             ### prepend the volume in that case.
302             if( $host_dirs[0] ) {
303             unshift @host_dirs, $vol;
304              
305             ### element empty? reuse it to store the volume
306             ### encoded as a directory name. (Win32/VMS)
307             } else {
308             $host_dirs[0] = $vol;
309             }
310             }
311              
312             ### now it's in UNIX format, which is the same format
313             ### as used for URIs
314             $host_spec = File::Spec::Unix->catdir( @host_dirs );
315             }
316              
317             ### now create the file:// uri from the components
318             $where = CREATE_FILE_URI->(
319             File::Spec::Unix->catfile(
320 65   50     2069 $host->{'host'} || '',
321             $host_spec,
322             $remote_file,
323             )
324             );
325              
326             ### its components will be in unix format, for a http://,
327             ### ftp:// or any other style of URI
328             } else {
329             my $mirror_path = File::Spec::Unix->catfile(
330 0         0 $host->{'path'}, $remote_file
331             );
332              
333             my %args = ( scheme => $host->{scheme},
334             host => $host->{host},
335 0         0 path => $mirror_path,
336             );
337              
338 0         0 $where = $self->_host_to_uri( %args );
339             }
340              
341 65         530 my $abs = $self->__file_fetch( from => $where,
342             to => $local_path,
343             verbose => $verbose );
344              
345             ### we got a path back?
346 65 100       366 if( $abs ) {
347             ### store where we fetched it ###
348 64         1075 $modobj->status->fetch( $abs );
349              
350             ### this host is good, the previous ones are apparently
351             ### not, so mark them as such.
352 64         15642 $self->_add_fail_host( host => $_ ) for @maybe_bad_host;
353              
354 64         4795 return $abs;
355             }
356              
357             ### so we tried to get the file but didn't actually fetch it --
358             ### there's a chance this host is bad. mark it as such and
359             ### actually flag it back if we manage to get the file
360             ### somewhere else
361 1         11 push @maybe_bad_host, $host;
362             }
363             }
364              
365             $found_host
366 0 0       0 ? error(loc("Fetch failed: host list exhausted " .
367             "-- are you connected today?"))
368             : error(loc("No hosts found to download from " .
369             "-- check your config"));
370             }
371              
372 0         0 return;
373             }
374              
375             sub __file_fetch {
376 67     67   176 my $self = shift;
377 67         347 my $conf = $self->configure_object;
378 67         636 my %hash = @_;
379              
380 67         231 my ($where, $local_path, $verbose);
381 67         950 my $tmpl = {
382             from => { required => 1, store => \$where },
383             to => { required => 1, store => \$local_path },
384             verbose => { default => $conf->get_conf('verbose'),
385             store => \$verbose },
386             };
387              
388 67 50       417 check( $tmpl, \%hash ) or return;
389              
390 67         11301 msg(loc("Trying to get '%1'", $where ), $verbose );
391              
392             ### build the object ###
393 67         2116 my $ff = File::Fetch->new( uri => $where );
394              
395             ### sanity check ###
396 67 50       801757 error(loc("Bad uri '%1'",$where)), return unless $ff;
397              
398 67 100       1152 if( my $file = $ff->fetch( to => $local_path ) ) {
399 66 50 33     3446843 unless( -e $file && -s _ ) {
400 0         0 msg(loc("'%1' said it fetched '%2', but it was not created",
401             'File::Fetch', $file), $verbose);
402              
403             } else {
404 66         1091 my $abs = File::Spec->rel2abs( $file );
405              
406             ### so TTLs will work
407 66         1879 $self->_update_timestamp( file => $abs );
408              
409 66         3429 return $abs;
410             }
411              
412             } else {
413 1         17132 error(loc("Fetching of '%1' failed: %2", $where, $ff->error));
414             }
415              
416 1         48 return;
417             }
418              
419             =pod
420              
421             =head2 _add_fail_host( host => $host_hashref )
422              
423             Mark a particular host as bad. This makes C
424             skip it in fetches until this cache is flushed.
425              
426             =head2 _host_ok( host => $host_hashref )
427              
428             Query the cache to see if this host is ok, or if it has been flagged
429             as bad.
430              
431             Returns true if the host is ok, false otherwise.
432              
433             =cut
434              
435             { ### caching functions ###
436              
437             sub _add_fail_host {
438 3     3   1725 my $self = shift;
439 3         28 my %hash = @_;
440              
441 3         8 my $host;
442 3         23 my $tmpl = {
443             host => { required => 1, default => {},
444             strict_type => 1, store => \$host },
445             };
446              
447 3 50       17 check( $tmpl, \%hash ) or return;
448              
449 3         345 return $self->_hosts->{$host} = 1;
450             }
451              
452             sub _host_ok {
453 6     6   1915 my $self = shift;
454 6         29 my %hash = @_;
455              
456 6         13 my $host;
457 6         34 my $tmpl = {
458             host => { required => 1, store => \$host },
459             };
460              
461 6 50       34 check( $tmpl, \%hash ) or return;
462              
463 6 100       710 return $self->_hosts->{$host} ? 0 : 1;
464             }
465             }
466              
467              
468             1;
469              
470             # Local variables:
471             # c-indentation-style: bsd
472             # c-basic-offset: 4
473             # indent-tabs-mode: nil
474             # End:
475             # vim: expandtab shiftwidth=4: