line |
stmt |
bran |
cond |
sub |
time |
code |
1
|
|
|
|
|
|
package File::Fetch; |
2
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
301892
|
use strict; |
|
2
|
|
|
|
6
|
|
|
2
|
|
|
|
87
|
|
4
|
2
|
|
|
2
|
639
|
use FileHandle; |
|
2
|
|
|
|
22334
|
|
|
2
|
|
|
|
19
|
|
5
|
2
|
|
|
2
|
2679
|
use File::Temp; |
|
2
|
|
|
|
24134
|
|
|
2
|
|
|
|
269
|
|
6
|
2
|
|
|
2
|
810
|
use File::Copy; |
|
2
|
|
|
|
8252
|
|
|
2
|
|
|
|
153
|
|
7
|
2
|
|
|
2
|
13
|
use File::Spec; |
|
2
|
|
|
|
5
|
|
|
2
|
|
|
|
45
|
|
8
|
2
|
|
|
2
|
13
|
use File::Spec::Unix; |
|
2
|
|
|
|
4
|
|
|
2
|
|
|
|
65
|
|
9
|
2
|
|
|
2
|
12
|
use File::Basename qw[dirname]; |
|
2
|
|
|
|
7
|
|
|
2
|
|
|
|
137
|
|
10
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
12
|
use Cwd qw[cwd]; |
|
2
|
|
|
|
4
|
|
|
2
|
|
|
|
111
|
|
12
|
2
|
|
|
2
|
11
|
use Carp qw[carp]; |
|
2
|
|
|
|
5
|
|
|
2
|
|
|
|
138
|
|
13
|
2
|
|
|
2
|
1185
|
use IPC::Cmd qw[can_run run QUOTE]; |
|
2
|
|
|
|
136748
|
|
|
2
|
|
|
|
232
|
|
14
|
2
|
|
|
2
|
22
|
use File::Path qw[mkpath]; |
|
2
|
|
|
|
7
|
|
|
2
|
|
|
|
126
|
|
15
|
2
|
|
|
2
|
13
|
use File::Temp qw[tempdir]; |
|
2
|
|
|
|
6
|
|
|
2
|
|
|
|
200
|
|
16
|
2
|
|
|
2
|
12
|
use Params::Check qw[check]; |
|
2
|
|
|
|
7
|
|
|
2
|
|
|
|
118
|
|
17
|
2
|
|
|
2
|
15
|
use Module::Load::Conditional qw[can_load]; |
|
2
|
|
|
|
8
|
|
|
2
|
|
|
|
124
|
|
18
|
2
|
|
|
2
|
13
|
use Locale::Maketext::Simple Style => 'gettext'; |
|
2
|
|
|
|
6
|
|
|
2
|
|
|
|
15
|
|
19
|
|
|
|
|
|
|
20
|
2
|
|
|
|
766
|
use vars qw[ $VERBOSE $PREFER_BIN $FROM_EMAIL $USER_AGENT |
21
|
|
|
|
|
|
$BLACKLIST $METHOD_FAIL $VERSION $METHODS |
22
|
|
|
|
|
|
$FTP_PASSIVE $TIMEOUT $DEBUG $WARN |
23
|
2
|
|
|
2
|
357
|
]; |
|
2
|
|
|
|
5
|
|
24
|
|
|
|
|
|
|
25
|
|
|
|
|
|
$VERSION = '0.42'; |
26
|
|
|
|
|
|
$VERSION = eval $VERSION; # avoid warnings with development releases |
27
|
|
|
|
|
|
$PREFER_BIN = 0; # XXX TODO implement |
28
|
|
|
|
|
|
$FROM_EMAIL = 'File-Fetch@example.com'; |
29
|
|
|
|
|
|
$USER_AGENT = "File::Fetch/$VERSION"; |
30
|
|
|
|
|
|
$BLACKLIST = [qw|ftp|]; |
31
|
|
|
|
|
|
$METHOD_FAIL = { }; |
32
|
|
|
|
|
|
$FTP_PASSIVE = 1; |
33
|
|
|
|
|
|
$TIMEOUT = 0; |
34
|
|
|
|
|
|
$DEBUG = 0; |
35
|
|
|
|
|
|
$WARN = 1; |
36
|
|
|
|
|
|
|
37
|
|
|
|
|
|
### methods available to fetch the file depending on the scheme |
38
|
|
|
|
|
|
$METHODS = { |
39
|
|
|
|
|
|
http => [ qw|lwp httptiny wget curl lftp fetch httplite lynx iosock| ], |
40
|
|
|
|
|
|
ftp => [ qw|lwp netftp wget curl lftp fetch ncftp ftp| ], |
41
|
|
|
|
|
|
file => [ qw|lwp lftp file| ], |
42
|
|
|
|
|
|
rsync => [ qw|rsync| ], |
43
|
|
|
|
|
|
git => [ qw|git| ], |
44
|
|
|
|
|
|
}; |
45
|
|
|
|
|
|
|
46
|
|
|
|
|
|
### silly warnings ### |
47
|
|
|
|
|
|
local $Params::Check::VERBOSE = 1; |
48
|
|
|
|
|
|
local $Params::Check::VERBOSE = 1; |
49
|
|
|
|
|
|
local $Module::Load::Conditional::VERBOSE = 0; |
50
|
|
|
|
|
|
local $Module::Load::Conditional::VERBOSE = 0; |
51
|
|
|
|
|
|
|
52
|
|
|
|
|
|
### see what OS we are on, important for file:// uris ### |
53
|
2
|
|
|
2
|
14
|
use constant ON_WIN => ($^O eq 'MSWin32'); |
|
2
|
|
|
|
6
|
|
|
2
|
|
|
|
189
|
|
54
|
2
|
|
|
2
|
12
|
use constant ON_VMS => ($^O eq 'VMS'); |
|
2
|
|
|
|
5
|
|
|
2
|
|
|
|
159
|
|
55
|
2
|
|
|
2
|
13
|
use constant ON_UNIX => (!ON_WIN); |
|
2
|
|
|
|
5
|
|
|
2
|
|
|
|
142
|
|
56
|
2
|
|
|
2
|
12
|
use constant HAS_VOL => (ON_WIN); |
|
2
|
|
|
|
6
|
|
|
2
|
|
|
|
148
|
|
57
|
2
|
|
|
2
|
13
|
use constant HAS_SHARE => (ON_WIN); |
|
2
|
|
|
|
5
|
|
|
2
|
|
|
|
234
|
|
58
|
2
|
|
|
2
|
13
|
use constant HAS_FETCH => ( $^O =~ m!^(freebsd|netbsd|dragonfly)$! ); |
|
2
|
|
|
|
6
|
|
|
2
|
|
|
|
370
|
|
59
|
|
|
|
|
|
|
60
|
|
|
|
|
|
=pod |
61
|
|
|
|
|
|
|
62
|
|
|
|
|
|
=head1 NAME |
63
|
|
|
|
|
|
|
64
|
|
|
|
|
|
File::Fetch - A generic file fetching mechanism |
65
|
|
|
|
|
|
|
66
|
|
|
|
|
|
=head1 SYNOPSIS |
67
|
|
|
|
|
|
|
68
|
|
|
|
|
|
use File::Fetch; |
69
|
|
|
|
|
|
|
70
|
|
|
|
|
|
### build a File::Fetch object ### |
71
|
|
|
|
|
|
my $ff = File::Fetch->new(uri => 'http://some.where.com/dir/a.txt'); |
72
|
|
|
|
|
|
|
73
|
|
|
|
|
|
### fetch the uri to cwd() ### |
74
|
|
|
|
|
|
my $where = $ff->fetch() or die $ff->error; |
75
|
|
|
|
|
|
|
76
|
|
|
|
|
|
### fetch the uri to /tmp ### |
77
|
|
|
|
|
|
my $where = $ff->fetch( to => '/tmp' ); |
78
|
|
|
|
|
|
|
79
|
|
|
|
|
|
### parsed bits from the uri ### |
80
|
|
|
|
|
|
$ff->uri; |
81
|
|
|
|
|
|
$ff->scheme; |
82
|
|
|
|
|
|
$ff->host; |
83
|
|
|
|
|
|
$ff->path; |
84
|
|
|
|
|
|
$ff->file; |
85
|
|
|
|
|
|
|
86
|
|
|
|
|
|
=head1 DESCRIPTION |
87
|
|
|
|
|
|
|
88
|
|
|
|
|
|
File::Fetch is a generic file fetching mechanism. |
89
|
|
|
|
|
|
|
90
|
|
|
|
|
|
It allows you to fetch any file pointed to by a C, C, |
91
|
|
|
|
|
|
C, C or C uri by a number of different means. |
92
|
|
|
|
|
|
|
93
|
|
|
|
|
|
See the C section further down for details. |
94
|
|
|
|
|
|
|
95
|
|
|
|
|
|
=head1 ACCESSORS |
96
|
|
|
|
|
|
|
97
|
|
|
|
|
|
A C object has the following accessors |
98
|
|
|
|
|
|
|
99
|
|
|
|
|
|
=over 4 |
100
|
|
|
|
|
|
|
101
|
|
|
|
|
|
=item $ff->uri |
102
|
|
|
|
|
|
|
103
|
|
|
|
|
|
The uri you passed to the constructor |
104
|
|
|
|
|
|
|
105
|
|
|
|
|
|
=item $ff->scheme |
106
|
|
|
|
|
|
|
107
|
|
|
|
|
|
The scheme from the uri (like 'file', 'http', etc) |
108
|
|
|
|
|
|
|
109
|
|
|
|
|
|
=item $ff->host |
110
|
|
|
|
|
|
|
111
|
|
|
|
|
|
The hostname in the uri. Will be empty if host was originally |
112
|
|
|
|
|
|
'localhost' for a 'file://' url. |
113
|
|
|
|
|
|
|
114
|
|
|
|
|
|
=item $ff->vol |
115
|
|
|
|
|
|
|
116
|
|
|
|
|
|
On operating systems with the concept of a volume the second element |
117
|
|
|
|
|
|
of a file:// is considered to the be volume specification for the file. |
118
|
|
|
|
|
|
Thus on Win32 this routine returns the volume, on other operating |
119
|
|
|
|
|
|
systems this returns nothing. |
120
|
|
|
|
|
|
|
121
|
|
|
|
|
|
On Windows this value may be empty if the uri is to a network share, in |
122
|
|
|
|
|
|
which case the 'share' property will be defined. Additionally, volume |
123
|
|
|
|
|
|
specifications that use '|' as ':' will be converted on read to use ':'. |
124
|
|
|
|
|
|
|
125
|
|
|
|
|
|
On VMS, which has a volume concept, this field will be empty because VMS |
126
|
|
|
|
|
|
file specifications are converted to absolute UNIX format and the volume |
127
|
|
|
|
|
|
information is transparently included. |
128
|
|
|
|
|
|
|
129
|
|
|
|
|
|
=item $ff->share |
130
|
|
|
|
|
|
|
131
|
|
|
|
|
|
On systems with the concept of a network share (currently only Windows) returns |
132
|
|
|
|
|
|
the sharename from a file://// url. On other operating systems returns empty. |
133
|
|
|
|
|
|
|
134
|
|
|
|
|
|
=item $ff->path |
135
|
|
|
|
|
|
|
136
|
|
|
|
|
|
The path from the uri, will be at least a single '/'. |
137
|
|
|
|
|
|
|
138
|
|
|
|
|
|
=item $ff->file |
139
|
|
|
|
|
|
|
140
|
|
|
|
|
|
The name of the remote file. For the local file name, the |
141
|
|
|
|
|
|
result of $ff->output_file will be used. |
142
|
|
|
|
|
|
|
143
|
|
|
|
|
|
=item $ff->file_default |
144
|
|
|
|
|
|
|
145
|
|
|
|
|
|
The name of the default local file, that $ff->output_file falls back to if |
146
|
|
|
|
|
|
it would otherwise return no filename. For example when fetching a URI like |
147
|
|
|
|
|
|
http://www.abc.net.au/ the contents retrieved may be from a remote file called |
148
|
|
|
|
|
|
'index.html'. The default value of this attribute is literally 'file_default'. |
149
|
|
|
|
|
|
|
150
|
|
|
|
|
|
=cut |
151
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
153
|
|
|
|
|
|
########################## |
154
|
|
|
|
|
|
### Object & Accessors ### |
155
|
|
|
|
|
|
########################## |
156
|
|
|
|
|
|
|
157
|
|
|
|
|
|
{ |
158
|
|
|
|
|
|
### template for autogenerated accessors ### |
159
|
|
|
|
|
|
my $Tmpl = { |
160
|
|
|
|
|
|
scheme => { default => 'http' }, |
161
|
|
|
|
|
|
host => { default => 'localhost' }, |
162
|
|
|
|
|
|
path => { default => '/' }, |
163
|
|
|
|
|
|
file => { required => 1 }, |
164
|
|
|
|
|
|
uri => { required => 1 }, |
165
|
|
|
|
|
|
vol => { default => '' }, # windows for file:// uris |
166
|
|
|
|
|
|
share => { default => '' }, # windows for file:// uris |
167
|
|
|
|
|
|
file_default => { default => 'file_default' }, |
168
|
|
|
|
|
|
tempdir_root => { required => 1 }, # Should be lazy-set at ->new() |
169
|
|
|
|
|
|
_error_msg => { no_override => 1 }, |
170
|
|
|
|
|
|
_error_msg_long => { no_override => 1 }, |
171
|
|
|
|
|
|
}; |
172
|
|
|
|
|
|
|
173
|
|
|
|
|
|
for my $method ( keys %$Tmpl ) { |
174
|
2
|
|
|
2
|
14
|
no strict 'refs'; |
|
2
|
|
|
|
6
|
|
|
2
|
|
|
|
17424
|
|
175
|
|
|
|
|
|
*$method = sub { |
176
|
56
|
|
|
56
|
33627
|
my $self = shift; |
177
|
56
|
50
|
|
|
390
|
$self->{$method} = $_[0] if @_; |
178
|
56
|
|
|
|
930
|
return $self->{$method}; |
179
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
182
|
|
|
|
|
|
sub _create { |
183
|
9
|
|
|
9
|
103
|
my $class = shift; |
184
|
9
|
|
|
|
208
|
my %hash = @_; |
185
|
|
|
|
|
|
|
186
|
9
|
50
|
|
|
313
|
my $args = check( $Tmpl, \%hash ) or return; |
187
|
|
|
|
|
|
|
188
|
9
|
|
|
|
5447
|
bless $args, $class; |
189
|
|
|
|
|
|
|
190
|
9
|
50
|
66
|
|
135
|
if( lc($args->scheme) ne 'file' and not $args->host ) { |
191
|
0
|
|
|
|
0
|
return $class->_error(loc( |
192
|
|
|
|
|
|
"Hostname required when fetching from '%1'",$args->scheme)); |
193
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
195
|
9
|
|
|
|
235
|
for (qw[path]) { |
196
|
9
|
50
|
|
|
96
|
unless( $args->$_() ) { # 5.5.x needs the () |
197
|
0
|
|
|
|
0
|
return $class->_error(loc("No '%1' specified",$_)); |
198
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
201
|
9
|
|
|
|
113
|
return $args; |
202
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
205
|
|
|
|
|
|
=item $ff->output_file |
206
|
|
|
|
|
|
|
207
|
|
|
|
|
|
The name of the output file. This is the same as $ff->file, |
208
|
|
|
|
|
|
but any query parameters are stripped off. For example: |
209
|
|
|
|
|
|
|
210
|
|
|
|
|
|
http://example.com/index.html?x=y |
211
|
|
|
|
|
|
|
212
|
|
|
|
|
|
would make the output file be C rather than |
213
|
|
|
|
|
|
C. |
214
|
|
|
|
|
|
|
215
|
|
|
|
|
|
=back |
216
|
|
|
|
|
|
|
217
|
|
|
|
|
|
=cut |
218
|
|
|
|
|
|
|
219
|
|
|
|
|
|
sub output_file { |
220
|
0
|
|
|
0
|
0
|
my $self = shift; |
221
|
0
|
|
|
|
0
|
my $file = $self->file; |
222
|
|
|
|
|
|
|
223
|
0
|
|
|
|
0
|
$file =~ s/\?.*$//g; |
224
|
|
|
|
|
|
|
225
|
0
|
|
0
|
|
0
|
$file ||= $self->file_default; |
226
|
|
|
|
|
|
|
227
|
0
|
|
|
|
0
|
return $file; |
228
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
230
|
|
|
|
|
|
### XXX do this or just point to URI::Escape? |
231
|
|
|
|
|
|
# =head2 $esc_uri = $ff->escaped_uri |
232
|
|
|
|
|
|
# |
233
|
|
|
|
|
|
# =cut |
234
|
|
|
|
|
|
# |
235
|
|
|
|
|
|
# ### most of this is stolen straight from URI::escape |
236
|
|
|
|
|
|
# { ### Build a char->hex map |
237
|
|
|
|
|
|
# my %escapes = map { chr($_) => sprintf("%%%02X", $_) } 0..255; |
238
|
|
|
|
|
|
# |
239
|
|
|
|
|
|
# sub escaped_uri { |
240
|
|
|
|
|
|
# my $self = shift; |
241
|
|
|
|
|
|
# my $uri = $self->uri; |
242
|
|
|
|
|
|
# |
243
|
|
|
|
|
|
# ### Default unsafe characters. RFC 2732 ^(uric - reserved) |
244
|
|
|
|
|
|
# $uri =~ s/([^A-Za-z0-9\-_.!~*'()])/ |
245
|
|
|
|
|
|
# $escapes{$1} || $self->_fail_hi($1)/ge; |
246
|
|
|
|
|
|
# |
247
|
|
|
|
|
|
# return $uri; |
248
|
|
|
|
|
|
# } |
249
|
|
|
|
|
|
# |
250
|
|
|
|
|
|
# sub _fail_hi { |
251
|
|
|
|
|
|
# my $self = shift; |
252
|
|
|
|
|
|
# my $char = shift; |
253
|
|
|
|
|
|
# |
254
|
|
|
|
|
|
# $self->_error(loc( |
255
|
|
|
|
|
|
# "Can't escape '%1', try using the '%2' module instead", |
256
|
|
|
|
|
|
# sprintf("\\x{%04X}", ord($char)), 'URI::Escape' |
257
|
|
|
|
|
|
# )); |
258
|
|
|
|
|
|
# } |
259
|
|
|
|
|
|
# |
260
|
|
|
|
|
|
# sub output_file { |
261
|
|
|
|
|
|
# |
262
|
|
|
|
|
|
# } |
263
|
|
|
|
|
|
# |
264
|
|
|
|
|
|
# |
265
|
|
|
|
|
|
# } |
266
|
|
|
|
|
|
|
267
|
|
|
|
|
|
=head1 METHODS |
268
|
|
|
|
|
|
|
269
|
|
|
|
|
|
=head2 $ff = File::Fetch->new( uri => 'http://some.where.com/dir/file.txt' ); |
270
|
|
|
|
|
|
|
271
|
|
|
|
|
|
Parses the uri and creates a corresponding File::Fetch::Item object, |
272
|
|
|
|
|
|
that is ready to be Ced and returns it. |
273
|
|
|
|
|
|
|
274
|
|
|
|
|
|
Returns false on failure. |
275
|
|
|
|
|
|
|
276
|
|
|
|
|
|
=cut |
277
|
|
|
|
|
|
|
278
|
|
|
|
|
|
sub new { |
279
|
9
|
|
|
9
|
9580
|
my $class = shift; |
280
|
9
|
|
|
|
55
|
my %hash = @_; |
281
|
|
|
|
|
|
|
282
|
9
|
|
|
|
23
|
my ($uri, $file_default, $tempdir_root); |
283
|
9
|
|
|
|
92
|
my $tmpl = { |
284
|
|
|
|
|
|
uri => { required => 1, store => \$uri }, |
285
|
|
|
|
|
|
file_default => { required => 0, store => \$file_default }, |
286
|
|
|
|
|
|
tempdir_root => { required => 0, store => \$tempdir_root }, |
287
|
|
|
|
|
|
}; |
288
|
|
|
|
|
|
|
289
|
9
|
50
|
|
|
93
|
check( $tmpl, \%hash ) or return; |
290
|
|
|
|
|
|
|
291
|
|
|
|
|
|
### parse the uri to usable parts ### |
292
|
9
|
50
|
|
|
1413
|
my $href = $class->_parse_uri( $uri ) or return; |
293
|
|
|
|
|
|
|
294
|
9
|
50
|
|
|
41
|
$href->{file_default} = $file_default if $file_default; |
295
|
9
|
50
|
|
|
32
|
$href->{tempdir_root} = File::Spec->rel2abs( $tempdir_root ) if $tempdir_root; |
296
|
9
|
50
|
|
|
50760
|
$href->{tempdir_root} = File::Spec->rel2abs( Cwd::cwd ) if not $href->{tempdir_root}; |
297
|
|
|
|
|
|
|
298
|
|
|
|
|
|
### make it into a FFI object ### |
299
|
9
|
50
|
|
|
510
|
my $ff = $class->_create( %$href ) or return; |
300
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
302
|
|
|
|
|
|
### return the object ### |
303
|
9
|
|
|
|
298
|
return $ff; |
304
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
306
|
|
|
|
|
|
### parses an uri to a hash structure: |
307
|
|
|
|
|
|
### |
308
|
|
|
|
|
|
### $class->_parse_uri( 'ftp://ftp.cpan.org/pub/mirror/index.txt' ) |
309
|
|
|
|
|
|
### |
310
|
|
|
|
|
|
### becomes: |
311
|
|
|
|
|
|
### |
312
|
|
|
|
|
|
### $href = { |
313
|
|
|
|
|
|
### scheme => 'ftp', |
314
|
|
|
|
|
|
### host => 'ftp.cpan.org', |
315
|
|
|
|
|
|
### path => '/pub/mirror', |
316
|
|
|
|
|
|
### file => 'index.html' |
317
|
|
|
|
|
|
### }; |
318
|
|
|
|
|
|
### |
319
|
|
|
|
|
|
### In the case of file:// urls there maybe be additional fields |
320
|
|
|
|
|
|
### |
321
|
|
|
|
|
|
### For systems with volume specifications such as Win32 there will be |
322
|
|
|
|
|
|
### a volume specifier provided in the 'vol' field. |
323
|
|
|
|
|
|
### |
324
|
|
|
|
|
|
### 'vol' => 'volumename' |
325
|
|
|
|
|
|
### |
326
|
|
|
|
|
|
### For windows file shares there may be a 'share' key specified |
327
|
|
|
|
|
|
### |
328
|
|
|
|
|
|
### 'share' => 'sharename' |
329
|
|
|
|
|
|
### |
330
|
|
|
|
|
|
### Note that the rules of what a file:// url means vary by the operating system |
331
|
|
|
|
|
|
### of the host being addressed. Thus file:///d|/foo/bar.txt means the obvious |
332
|
|
|
|
|
|
### 'D:\foo\bar.txt' on windows, but on unix it means '/d|/foo/bar.txt' and |
333
|
|
|
|
|
|
### not '/foo/bar.txt' |
334
|
|
|
|
|
|
### |
335
|
|
|
|
|
|
### Similarly if the host interpreting the url is VMS then |
336
|
|
|
|
|
|
### file:///disk$user/my/notes/note12345.txt' means |
337
|
|
|
|
|
|
### 'DISK$USER:[MY.NOTES]NOTE123456.TXT' but will be returned the same as |
338
|
|
|
|
|
|
### if it is unix where it means /disk$user/my/notes/note12345.txt'. |
339
|
|
|
|
|
|
### Except for some cases in the File::Spec methods, Perl on VMS will generally |
340
|
|
|
|
|
|
### handle UNIX format file specifications. |
341
|
|
|
|
|
|
### |
342
|
|
|
|
|
|
### This means it is impossible to serve certain file:// urls on certain systems. |
343
|
|
|
|
|
|
### |
344
|
|
|
|
|
|
### Thus are the problems with a protocol-less specification. :-( |
345
|
|
|
|
|
|
### |
346
|
|
|
|
|
|
|
347
|
|
|
|
|
|
sub _parse_uri { |
348
|
16
|
|
|
16
|
16827
|
my $self = shift; |
349
|
16
|
50
|
|
|
70
|
my $uri = shift or return; |
350
|
|
|
|
|
|
|
351
|
16
|
|
|
|
60
|
my $href = { uri => $uri }; |
352
|
|
|
|
|
|
|
353
|
|
|
|
|
|
### find the scheme ### |
354
|
16
|
|
|
|
157
|
$uri =~ s|^(\w+)://||; |
355
|
16
|
|
|
|
92
|
$href->{scheme} = $1; |
356
|
|
|
|
|
|
|
357
|
|
|
|
|
|
### See rfc 1738 section 3.10 |
358
|
|
|
|
|
|
### http://www.faqs.org/rfcs/rfc1738.html |
359
|
|
|
|
|
|
### And wikipedia for more on windows file:// urls |
360
|
|
|
|
|
|
### http://en.wikipedia.org/wiki/File:// |
361
|
16
|
100
|
|
|
80
|
if( $href->{scheme} eq 'file' ) { |
362
|
|
|
|
|
|
|
363
|
6
|
|
|
|
52
|
my @parts = split '/',$uri; |
364
|
|
|
|
|
|
|
365
|
|
|
|
|
|
### file://hostname/... |
366
|
|
|
|
|
|
### file://hostname/... |
367
|
|
|
|
|
|
### normalize file://localhost with file:/// |
368
|
6
|
|
100
|
|
43
|
$href->{host} = $parts[0] || ''; |
369
|
|
|
|
|
|
|
370
|
|
|
|
|
|
### index in @parts where the path components begin; |
371
|
6
|
|
|
|
12
|
my $index = 1; |
372
|
|
|
|
|
|
|
373
|
|
|
|
|
|
### file:////hostname/sharename/blah.txt |
374
|
6
|
|
|
|
13
|
if ( HAS_SHARE and not length $parts[0] and not length $parts[1] ) { |
375
|
|
|
|
|
|
|
376
|
|
|
|
|
|
$href->{host} = $parts[2] || ''; # avoid warnings |
377
|
|
|
|
|
|
$href->{share} = $parts[3] || ''; # avoid warnings |
378
|
|
|
|
|
|
|
379
|
|
|
|
|
|
$index = 4 # index after the share |
380
|
|
|
|
|
|
|
381
|
|
|
|
|
|
### file:///D|/blah.txt |
382
|
|
|
|
|
|
### file:///D:/blah.txt |
383
|
0
|
|
|
|
0
|
} elsif (HAS_VOL) { |
384
|
|
|
|
|
|
|
385
|
|
|
|
|
|
### this code comes from dmq's patch, but: |
386
|
|
|
|
|
|
### XXX if volume is empty, wouldn't that be an error? --kane |
387
|
|
|
|
|
|
### if so, our file://localhost test needs to be fixed as wel |
388
|
|
|
|
|
|
$href->{vol} = $parts[1] || ''; |
389
|
|
|
|
|
|
|
390
|
|
|
|
|
|
### correct D| style colume descriptors |
391
|
|
|
|
|
|
$href->{vol} =~ s/\A([A-Z])\|\z/$1:/i if ON_WIN; |
392
|
|
|
|
|
|
|
393
|
|
|
|
|
|
$index = 2; # index after the volume |
394
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
396
|
|
|
|
|
|
### rebuild the path from the leftover parts; |
397
|
6
|
|
|
|
41
|
$href->{path} = join '/', '', splice( @parts, $index, $#parts ); |
398
|
|
|
|
|
|
|
399
|
|
|
|
|
|
} else { |
400
|
|
|
|
|
|
### using anything but qw() in hash slices may produce warnings |
401
|
|
|
|
|
|
### in older perls :-( |
402
|
10
|
|
|
|
109
|
@{$href}{ qw(host path) } = $uri =~ m|([^/]*)(/.*)$|s; |
|
10
|
|
|
|
54
|
|
403
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
405
|
|
|
|
|
|
### split the path into file + dir ### |
406
|
16
|
|
|
|
41
|
{ my @parts = File::Spec::Unix->splitpath( delete $href->{path} ); |
|
16
|
|
|
|
497
|
|
407
|
16
|
|
|
|
64
|
$href->{path} = $parts[1]; |
408
|
16
|
|
|
|
97
|
$href->{file} = $parts[2]; |
409
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
411
|
|
|
|
|
|
### host will be empty if the target was 'localhost' and the |
412
|
|
|
|
|
|
### scheme was 'file' |
413
|
16
|
100
|
100
|
|
119
|
$href->{host} = '' if ($href->{host} eq 'localhost') and |
414
|
|
|
|
|
|
($href->{scheme} eq 'file'); |
415
|
|
|
|
|
|
|
416
|
16
|
|
|
|
78
|
return $href; |
417
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
419
|
|
|
|
|
|
=head2 $where = $ff->fetch( [to => /my/output/dir/ | \$scalar] ) |
420
|
|
|
|
|
|
|
421
|
|
|
|
|
|
Fetches the file you requested and returns the full path to the file. |
422
|
|
|
|
|
|
|
423
|
|
|
|
|
|
By default it writes to C, but you can override that by specifying |
424
|
|
|
|
|
|
the C argument: |
425
|
|
|
|
|
|
|
426
|
|
|
|
|
|
### file fetch to /tmp, full path to the file in $where |
427
|
|
|
|
|
|
$where = $ff->fetch( to => '/tmp' ); |
428
|
|
|
|
|
|
|
429
|
|
|
|
|
|
### file slurped into $scalar, full path to the file in $where |
430
|
|
|
|
|
|
### file is downloaded to a temp directory and cleaned up at exit time |
431
|
|
|
|
|
|
$where = $ff->fetch( to => \$scalar ); |
432
|
|
|
|
|
|
|
433
|
|
|
|
|
|
Returns the full path to the downloaded file on success, and false |
434
|
|
|
|
|
|
on failure. |
435
|
|
|
|
|
|
|
436
|
|
|
|
|
|
=cut |
437
|
|
|
|
|
|
|
438
|
|
|
|
|
|
sub fetch { |
439
|
0
|
0
|
|
0
|
|
my $self = shift or return; |
440
|
0
|
|
|
|
|
my %hash = @_; |
441
|
|
|
|
|
|
|
442
|
0
|
|
|
|
|
my $target; |
443
|
0
|
|
|
|
|
my $tmpl = { |
444
|
|
|
|
|
|
to => { default => cwd(), store => \$target }, |
445
|
|
|
|
|
|
}; |
446
|
|
|
|
|
|
|
447
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
448
|
|
|
|
|
|
|
449
|
0
|
|
|
|
|
my ($to, $fh); |
450
|
|
|
|
|
|
### you want us to slurp the contents |
451
|
0
|
0
|
0
|
|
|
if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { |
452
|
0
|
|
|
|
|
$to = tempdir( 'FileFetch.XXXXXX', DIR => $self->tempdir_root, CLEANUP => 1 ); |
453
|
|
|
|
|
|
|
454
|
|
|
|
|
|
### plain old fetch |
455
|
|
|
|
|
|
} else { |
456
|
0
|
|
|
|
|
$to = $target; |
457
|
|
|
|
|
|
|
458
|
|
|
|
|
|
### On VMS force to VMS format so File::Spec will work. |
459
|
0
|
|
|
|
|
$to = VMS::Filespec::vmspath($to) if ON_VMS; |
460
|
|
|
|
|
|
|
461
|
|
|
|
|
|
### create the path if it doesn't exist yet ### |
462
|
0
|
0
|
|
|
|
unless( -d $to ) { |
463
|
0
|
|
|
|
|
eval { mkpath( $to ) }; |
|
0
|
|
|
|
|
|
464
|
|
|
|
|
|
|
465
|
0
|
0
|
|
|
|
return $self->_error(loc("Could not create path '%1'",$to)) if $@; |
466
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
469
|
|
|
|
|
|
### set passive ftp if required ### |
470
|
0
|
|
|
|
|
local $ENV{FTP_PASSIVE} = $FTP_PASSIVE; |
471
|
|
|
|
|
|
|
472
|
|
|
|
|
|
### we dont use catfile on win32 because if we are using a cygwin tool |
473
|
|
|
|
|
|
### under cmd.exe they wont understand windows style separators. |
474
|
0
|
|
|
|
|
my $out_to = ON_WIN ? $to.'/'.$self->output_file |
475
|
|
|
|
|
|
: File::Spec->catfile( $to, $self->output_file ); |
476
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
for my $method ( @{ $METHODS->{$self->scheme} } ) { |
|
0
|
|
|
|
|
|
478
|
0
|
|
|
|
|
my $sub = '_'.$method.'_fetch'; |
479
|
|
|
|
|
|
|
480
|
0
|
0
|
|
|
|
unless( __PACKAGE__->can($sub) ) { |
481
|
0
|
|
|
|
|
$self->_error(loc("Cannot call method for '%1' -- WEIRD!", |
482
|
|
|
|
|
|
$method)); |
483
|
0
|
|
|
|
|
next; |
484
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
486
|
|
|
|
|
|
### method is blacklisted ### |
487
|
0
|
0
|
|
|
|
next if grep { lc $_ eq $method } @$BLACKLIST; |
|
0
|
|
|
|
|
|
488
|
|
|
|
|
|
|
489
|
|
|
|
|
|
### method is known to fail ### |
490
|
0
|
0
|
|
|
|
next if $METHOD_FAIL->{$method}; |
491
|
|
|
|
|
|
|
492
|
|
|
|
|
|
### there's serious issues with IPC::Run and quoting of command |
493
|
|
|
|
|
|
### line arguments. using quotes in the wrong place breaks things, |
494
|
|
|
|
|
|
### and in the case of say, |
495
|
|
|
|
|
|
### C:\cygwin\bin\wget.EXE --quiet --passive-ftp --output-document |
496
|
|
|
|
|
|
### "index.html" "http://www.cpan.org/index.html?q=1&y=2" |
497
|
|
|
|
|
|
### it doesn't matter how you quote, it always fails. |
498
|
0
|
|
|
|
|
local $IPC::Cmd::USE_IPC_RUN = 0; |
499
|
|
|
|
|
|
|
500
|
0
|
0
|
|
|
|
if( my $file = $self->$sub( |
501
|
|
|
|
|
|
to => $out_to |
502
|
|
|
|
|
|
)){ |
503
|
|
|
|
|
|
|
504
|
0
|
0
|
0
|
|
|
unless( -e $file && -s _ ) { |
505
|
0
|
|
|
|
|
$self->_error(loc("'%1' said it fetched '%2', ". |
506
|
|
|
|
|
|
"but it was not created",$method,$file)); |
507
|
|
|
|
|
|
|
508
|
|
|
|
|
|
### mark the failure ### |
509
|
0
|
|
|
|
|
$METHOD_FAIL->{$method} = 1; |
510
|
|
|
|
|
|
|
511
|
0
|
|
|
|
|
next; |
512
|
|
|
|
|
|
|
513
|
|
|
|
|
|
} else { |
514
|
|
|
|
|
|
|
515
|
|
|
|
|
|
### slurp mode? |
516
|
0
|
0
|
0
|
|
|
if( ref $target and UNIVERSAL::isa( $target, 'SCALAR' ) ) { |
517
|
|
|
|
|
|
|
518
|
|
|
|
|
|
### open the file |
519
|
0
|
0
|
|
|
|
open my $fh, "<$file" or do { |
520
|
0
|
|
|
|
|
$self->_error( |
521
|
|
|
|
|
|
loc("Could not open '%1': %2", $file, $!)); |
522
|
0
|
|
|
|
|
return; |
523
|
|
|
|
|
|
}; |
524
|
|
|
|
|
|
|
525
|
|
|
|
|
|
### slurp |
526
|
0
|
|
|
|
|
$$target = do { local $/; <$fh> }; |
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
527
|
|
|
|
|
|
|
528
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
530
|
0
|
|
|
|
|
my $abs = File::Spec->rel2abs( $file ); |
531
|
0
|
|
|
|
|
return $abs; |
532
|
|
|
|
|
|
|
533
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
538
|
|
|
|
|
|
### if we got here, we looped over all methods, but we weren't able |
539
|
|
|
|
|
|
### to fetch it. |
540
|
0
|
|
|
|
|
return; |
541
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
543
|
|
|
|
|
|
######################## |
544
|
|
|
|
|
|
### _*_fetch methods ### |
545
|
|
|
|
|
|
######################## |
546
|
|
|
|
|
|
|
547
|
|
|
|
|
|
### LWP fetching ### |
548
|
|
|
|
|
|
sub _lwp_fetch { |
549
|
0
|
|
|
0
|
|
my $self = shift; |
550
|
0
|
|
|
|
|
my %hash = @_; |
551
|
|
|
|
|
|
|
552
|
0
|
|
|
|
|
my ($to); |
553
|
0
|
|
|
|
|
my $tmpl = { |
554
|
|
|
|
|
|
to => { required => 1, store => \$to } |
555
|
|
|
|
|
|
}; |
556
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
557
|
|
|
|
|
|
|
558
|
|
|
|
|
|
### modules required to download with lwp ### |
559
|
0
|
|
|
|
|
my $use_list = { |
560
|
|
|
|
|
|
LWP => '0.0', |
561
|
|
|
|
|
|
'LWP::UserAgent' => '0.0', |
562
|
|
|
|
|
|
'HTTP::Request' => '0.0', |
563
|
|
|
|
|
|
'HTTP::Status' => '0.0', |
564
|
|
|
|
|
|
URI => '0.0', |
565
|
|
|
|
|
|
|
566
|
|
|
|
|
|
}; |
567
|
|
|
|
|
|
|
568
|
0
|
0
|
|
|
|
unless( can_load( modules => $use_list ) ) { |
569
|
0
|
|
|
|
|
$METHOD_FAIL->{'lwp'} = 1; |
570
|
0
|
|
|
|
|
return; |
571
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
573
|
|
|
|
|
|
### setup the uri object |
574
|
0
|
|
|
|
|
my $uri = URI->new( File::Spec::Unix->catfile( |
575
|
|
|
|
|
|
$self->path, $self->file |
576
|
|
|
|
|
|
) ); |
577
|
|
|
|
|
|
|
578
|
|
|
|
|
|
### special rules apply for file:// uris ### |
579
|
0
|
|
|
|
|
$uri->scheme( $self->scheme ); |
580
|
0
|
0
|
|
|
|
$uri->host( $self->scheme eq 'file' ? '' : $self->host ); |
581
|
0
|
0
|
|
|
|
$uri->userinfo("anonymous:$FROM_EMAIL") if $self->scheme ne 'file'; |
582
|
|
|
|
|
|
|
583
|
|
|
|
|
|
### set up the useragent object |
584
|
0
|
|
|
|
|
my $ua = LWP::UserAgent->new(); |
585
|
0
|
0
|
|
|
|
$ua->timeout( $TIMEOUT ) if $TIMEOUT; |
586
|
0
|
|
|
|
|
$ua->agent( $USER_AGENT ); |
587
|
0
|
|
|
|
|
$ua->from( $FROM_EMAIL ); |
588
|
0
|
|
|
|
|
$ua->env_proxy; |
589
|
|
|
|
|
|
|
590
|
0
|
0
|
|
|
|
my $res = $ua->mirror($uri, $to) or return; |
591
|
|
|
|
|
|
|
592
|
|
|
|
|
|
### uptodate or fetched ok ### |
593
|
0
|
0
|
0
|
|
|
if ( $res->code == 304 or $res->code == 200 ) { |
594
|
0
|
|
|
|
|
return $to; |
595
|
|
|
|
|
|
|
596
|
|
|
|
|
|
} else { |
597
|
0
|
|
|
|
|
return $self->_error(loc("Fetch failed! HTTP response: %1 %2 [%3]", |
598
|
|
|
|
|
|
$res->code, HTTP::Status::status_message($res->code), |
599
|
|
|
|
|
|
$res->status_line)); |
600
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
602
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
604
|
|
|
|
|
|
### HTTP::Tiny fetching ### |
605
|
|
|
|
|
|
sub _httptiny_fetch { |
606
|
0
|
|
|
0
|
|
my $self = shift; |
607
|
0
|
|
|
|
|
my %hash = @_; |
608
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
my ($to); |
610
|
0
|
|
|
|
|
my $tmpl = { |
611
|
|
|
|
|
|
to => { required => 1, store => \$to } |
612
|
|
|
|
|
|
}; |
613
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
614
|
|
|
|
|
|
|
615
|
0
|
|
|
|
|
my $use_list = { |
616
|
|
|
|
|
|
'HTTP::Tiny' => '0.008', |
617
|
|
|
|
|
|
|
618
|
|
|
|
|
|
}; |
619
|
|
|
|
|
|
|
620
|
0
|
0
|
|
|
|
unless( can_load(modules => $use_list) ) { |
621
|
0
|
|
|
|
|
$METHOD_FAIL->{'httptiny'} = 1; |
622
|
0
|
|
|
|
|
return; |
623
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
625
|
0
|
|
|
|
|
my $uri = $self->uri; |
626
|
|
|
|
|
|
|
627
|
0
|
0
|
|
|
|
my $http = HTTP::Tiny->new( ( $TIMEOUT ? ( timeout => $TIMEOUT ) : () ) ); |
628
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
my $rc = $http->mirror( $uri, $to ); |
630
|
|
|
|
|
|
|
631
|
0
|
0
|
|
|
|
unless ( $rc->{success} ) { |
632
|
|
|
|
|
|
|
633
|
0
|
|
|
|
|
return $self->_error(loc( "Fetch failed! HTTP response: %1 [%2]", |
634
|
|
|
|
|
|
$rc->{status}, $rc->{reason} ) ); |
635
|
|
|
|
|
|
|
636
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
638
|
0
|
|
|
|
|
return $to; |
639
|
|
|
|
|
|
|
640
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
642
|
|
|
|
|
|
### HTTP::Lite fetching ### |
643
|
|
|
|
|
|
sub _httplite_fetch { |
644
|
0
|
|
|
0
|
|
my $self = shift; |
645
|
0
|
|
|
|
|
my %hash = @_; |
646
|
|
|
|
|
|
|
647
|
0
|
|
|
|
|
my ($to); |
648
|
0
|
|
|
|
|
my $tmpl = { |
649
|
|
|
|
|
|
to => { required => 1, store => \$to } |
650
|
|
|
|
|
|
}; |
651
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
652
|
|
|
|
|
|
|
653
|
|
|
|
|
|
### modules required to download with lwp ### |
654
|
0
|
|
|
|
|
my $use_list = { |
655
|
|
|
|
|
|
'HTTP::Lite' => '2.2', |
656
|
|
|
|
|
|
|
657
|
|
|
|
|
|
}; |
658
|
|
|
|
|
|
|
659
|
0
|
0
|
|
|
|
unless( can_load(modules => $use_list) ) { |
660
|
0
|
|
|
|
|
$METHOD_FAIL->{'httplite'} = 1; |
661
|
0
|
|
|
|
|
return; |
662
|
|
|
|
|
|
} |
663
|
|
|
|
|
|
|
664
|
0
|
|
|
|
|
my $uri = $self->uri; |
665
|
0
|
|
|
|
|
my $retries = 0; |
666
|
|
|
|
|
|
|
667
|
0
|
|
|
|
|
RETRIES: while ( $retries++ < 5 ) { |
668
|
|
|
|
|
|
|
669
|
0
|
|
|
|
|
my $http = HTTP::Lite->new(); |
670
|
|
|
|
|
|
# Naughty naughty but there isn't any accessor/setter |
671
|
0
|
0
|
|
|
|
$http->{timeout} = $TIMEOUT if $TIMEOUT; |
672
|
0
|
|
|
|
|
$http->http11_mode(1); |
673
|
|
|
|
|
|
|
674
|
0
|
|
|
|
|
my $fh = FileHandle->new; |
675
|
|
|
|
|
|
|
676
|
0
|
0
|
|
|
|
unless ( $fh->open($to,'>') ) { |
677
|
0
|
|
|
|
|
return $self->_error(loc( |
678
|
|
|
|
|
|
"Could not open '%1' for writing: %2",$to,$!)); |
679
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
681
|
0
|
|
|
|
|
$fh->autoflush(1); |
682
|
|
|
|
|
|
|
683
|
0
|
|
|
|
|
binmode $fh; |
684
|
|
|
|
|
|
|
685
|
0
|
|
|
0
|
|
my $rc = $http->request( $uri, sub { my ($self,$dref,$cbargs) = @_; local $\; print {$cbargs} $$dref }, $fh ); |
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
686
|
|
|
|
|
|
|
687
|
0
|
|
|
|
|
close $fh; |
688
|
|
|
|
|
|
|
689
|
0
|
0
|
0
|
|
|
if ( $rc == 301 || $rc == 302 ) { |
|
|
0
|
|
|
|
|
690
|
0
|
|
|
|
|
my $loc; |
691
|
0
|
|
|
|
|
HEADERS: for ($http->headers_array) { |
692
|
0
|
0
|
|
|
|
/Location: (\S+)/ and $loc = $1, last HEADERS; |
693
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
#$loc or last; # Think we should squeal here. |
695
|
0
|
0
|
|
|
|
if ($loc =~ m!^/!) { |
696
|
0
|
|
|
|
|
$uri =~ s{^(\w+?://[^/]+)/.*$}{$1}; |
697
|
0
|
|
|
|
|
$uri .= $loc; |
698
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
else { |
700
|
0
|
|
|
|
|
$uri = $loc; |
701
|
|
|
|
|
|
} |
702
|
0
|
|
|
|
|
next RETRIES; |
703
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
elsif ( $rc == 200 ) { |
705
|
0
|
|
|
|
|
return $to; |
706
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
else { |
708
|
0
|
|
|
|
|
return $self->_error(loc("Fetch failed! HTTP response: %1 [%2]", |
709
|
|
|
|
|
|
$rc, $http->status_message)); |
710
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
712
|
|
|
|
|
|
} # Loop for 5 retries. |
713
|
|
|
|
|
|
|
714
|
0
|
|
|
|
|
return $self->_error("Fetch failed! Gave up after 5 tries"); |
715
|
|
|
|
|
|
|
716
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
718
|
|
|
|
|
|
### Simple IO::Socket::INET fetching ### |
719
|
|
|
|
|
|
sub _iosock_fetch { |
720
|
0
|
|
|
0
|
|
my $self = shift; |
721
|
0
|
|
|
|
|
my %hash = @_; |
722
|
|
|
|
|
|
|
723
|
0
|
|
|
|
|
my ($to); |
724
|
0
|
|
|
|
|
my $tmpl = { |
725
|
|
|
|
|
|
to => { required => 1, store => \$to } |
726
|
|
|
|
|
|
}; |
727
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
728
|
|
|
|
|
|
|
729
|
0
|
|
|
|
|
my $use_list = { |
730
|
|
|
|
|
|
'IO::Socket::INET' => '0.0', |
731
|
|
|
|
|
|
'IO::Select' => '0.0', |
732
|
|
|
|
|
|
}; |
733
|
|
|
|
|
|
|
734
|
0
|
0
|
|
|
|
unless( can_load(modules => $use_list) ) { |
735
|
0
|
|
|
|
|
$METHOD_FAIL->{'iosock'} = 1; |
736
|
0
|
|
|
|
|
return; |
737
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
739
|
0
|
0
|
|
|
|
my $sock = IO::Socket::INET->new( |
740
|
|
|
|
|
|
PeerHost => $self->host, |
741
|
|
|
|
|
|
( $self->host =~ /:/ ? () : ( PeerPort => 80 ) ), |
742
|
|
|
|
|
|
); |
743
|
|
|
|
|
|
|
744
|
0
|
0
|
|
|
|
unless ( $sock ) { |
745
|
0
|
|
|
|
|
return $self->_error(loc("Could not open socket to '%1', '%2'",$self->host,$!)); |
746
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
748
|
0
|
|
|
|
|
my $fh = FileHandle->new; |
749
|
|
|
|
|
|
|
750
|
|
|
|
|
|
# Check open() |
751
|
|
|
|
|
|
|
752
|
0
|
0
|
|
|
|
unless ( $fh->open($to,'>') ) { |
753
|
0
|
|
|
|
|
return $self->_error(loc( |
754
|
|
|
|
|
|
"Could not open '%1' for writing: %2",$to,$!)); |
755
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
757
|
0
|
|
|
|
|
$fh->autoflush(1); |
758
|
0
|
|
|
|
|
binmode $fh; |
759
|
|
|
|
|
|
|
760
|
0
|
|
|
|
|
my $path = File::Spec::Unix->catfile( $self->path, $self->file ); |
761
|
0
|
|
|
|
|
my $req = "GET $path HTTP/1.0\x0d\x0aHost: " . $self->host . "\x0d\x0a\x0d\x0a"; |
762
|
0
|
|
|
|
|
$sock->send( $req ); |
763
|
|
|
|
|
|
|
764
|
0
|
|
|
|
|
my $select = IO::Select->new( $sock ); |
765
|
|
|
|
|
|
|
766
|
0
|
|
|
|
|
my $resp = ''; |
767
|
0
|
|
|
|
|
my $normal = 0; |
768
|
0
|
|
0
|
|
|
while ( $select->can_read( $TIMEOUT || 60 ) ) { |
769
|
0
|
|
|
|
|
my $ret = $sock->sysread( $resp, 4096, length($resp) ); |
770
|
0
|
0
|
0
|
|
|
if ( !defined $ret or $ret == 0 ) { |
771
|
0
|
|
|
|
|
$select->remove( $sock ); |
772
|
0
|
|
|
|
|
$normal++; |
773
|
|
|
|
|
|
} |
774
|
|
|
|
|
|
} |
775
|
0
|
|
|
|
|
close $sock; |
776
|
|
|
|
|
|
|
777
|
0
|
0
|
|
|
|
unless ( $normal ) { |
778
|
0
|
|
0
|
|
|
return $self->_error(loc("Socket timed out after '%1' seconds", ( $TIMEOUT || 60 ))); |
779
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
781
|
|
|
|
|
|
# Check the "response" |
782
|
|
|
|
|
|
# Strip preceding blank lines apparently they are allowed (RFC 2616 4.1) |
783
|
0
|
|
|
|
|
$resp =~ s/^(\x0d?\x0a)+//; |
784
|
|
|
|
|
|
# Check it is an HTTP response |
785
|
0
|
0
|
|
|
|
unless ( $resp =~ m!^HTTP/(\d+)\.(\d+)!i ) { |
786
|
0
|
|
|
|
|
return $self->_error(loc("Did not get a HTTP response from '%1'",$self->host)); |
787
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
789
|
|
|
|
|
|
# Check for OK |
790
|
0
|
|
|
|
|
my ($code) = $resp =~ m!^HTTP/\d+\.\d+\s+(\d+)!i; |
791
|
0
|
0
|
|
|
|
unless ( $code eq '200' ) { |
792
|
0
|
|
|
|
|
return $self->_error(loc("Got a '%1' from '%2' expected '200'",$code,$self->host)); |
793
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
795
|
|
|
|
|
|
{ |
796
|
0
|
|
|
|
|
local $\; |
|
0
|
|
|
|
|
|
797
|
0
|
|
|
|
|
print $fh +($resp =~ m/\x0d\x0a\x0d\x0a(.*)$/s )[0]; |
798
|
|
|
|
|
|
} |
799
|
0
|
|
|
|
|
close $fh; |
800
|
0
|
|
|
|
|
return $to; |
801
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
803
|
|
|
|
|
|
### Net::FTP fetching |
804
|
|
|
|
|
|
sub _netftp_fetch { |
805
|
0
|
|
|
0
|
|
my $self = shift; |
806
|
0
|
|
|
|
|
my %hash = @_; |
807
|
|
|
|
|
|
|
808
|
0
|
|
|
|
|
my ($to); |
809
|
0
|
|
|
|
|
my $tmpl = { |
810
|
|
|
|
|
|
to => { required => 1, store => \$to } |
811
|
|
|
|
|
|
}; |
812
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
813
|
|
|
|
|
|
|
814
|
|
|
|
|
|
### required modules ### |
815
|
0
|
|
|
|
|
my $use_list = { 'Net::FTP' => 0 }; |
816
|
|
|
|
|
|
|
817
|
0
|
0
|
|
|
|
unless( can_load( modules => $use_list ) ) { |
818
|
0
|
|
|
|
|
$METHOD_FAIL->{'netftp'} = 1; |
819
|
0
|
|
|
|
|
return; |
820
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
822
|
|
|
|
|
|
### make connection ### |
823
|
0
|
|
|
|
|
my $ftp; |
824
|
0
|
|
|
|
|
my @options = ($self->host); |
825
|
0
|
0
|
|
|
|
push(@options, Timeout => $TIMEOUT) if $TIMEOUT; |
826
|
0
|
0
|
|
|
|
unless( $ftp = Net::FTP->new( @options ) ) { |
827
|
0
|
|
|
|
|
return $self->_error(loc("Ftp creation failed: %1",$@)); |
828
|
|
|
|
|
|
} |
829
|
|
|
|
|
|
|
830
|
|
|
|
|
|
### login ### |
831
|
0
|
0
|
|
|
|
unless( $ftp->login( anonymous => $FROM_EMAIL ) ) { |
832
|
0
|
|
|
|
|
return $self->_error(loc("Could not login to '%1'",$self->host)); |
833
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
835
|
|
|
|
|
|
### set binary mode, just in case ### |
836
|
0
|
|
|
|
|
$ftp->binary; |
837
|
|
|
|
|
|
|
838
|
|
|
|
|
|
### create the remote path |
839
|
|
|
|
|
|
### remember remote paths are unix paths! [#11483] |
840
|
0
|
|
|
|
|
my $remote = File::Spec::Unix->catfile( $self->path, $self->file ); |
841
|
|
|
|
|
|
|
842
|
|
|
|
|
|
### fetch the file ### |
843
|
0
|
|
|
|
|
my $target; |
844
|
0
|
0
|
|
|
|
unless( $target = $ftp->get( $remote, $to ) ) { |
845
|
0
|
|
|
|
|
return $self->_error(loc("Could not fetch '%1' from '%2'", |
846
|
|
|
|
|
|
$remote, $self->host)); |
847
|
|
|
|
|
|
} |
848
|
|
|
|
|
|
|
849
|
|
|
|
|
|
### log out ### |
850
|
0
|
|
|
|
|
$ftp->quit; |
851
|
|
|
|
|
|
|
852
|
0
|
|
|
|
|
return $target; |
853
|
|
|
|
|
|
|
854
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
856
|
|
|
|
|
|
### /bin/wget fetch ### |
857
|
|
|
|
|
|
sub _wget_fetch { |
858
|
0
|
|
|
0
|
|
my $self = shift; |
859
|
0
|
|
|
|
|
my %hash = @_; |
860
|
|
|
|
|
|
|
861
|
0
|
|
|
|
|
my ($to); |
862
|
0
|
|
|
|
|
my $tmpl = { |
863
|
|
|
|
|
|
to => { required => 1, store => \$to } |
864
|
|
|
|
|
|
}; |
865
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
866
|
|
|
|
|
|
|
867
|
0
|
|
|
|
|
my $wget; |
868
|
|
|
|
|
|
### see if we have a wget binary ### |
869
|
0
|
0
|
|
|
|
unless( $wget = can_run('wget') ) { |
870
|
0
|
|
|
|
|
$METHOD_FAIL->{'wget'} = 1; |
871
|
0
|
|
|
|
|
return; |
872
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
874
|
|
|
|
|
|
### no verboseness, thanks ### |
875
|
0
|
|
|
|
|
my $cmd = [ $wget, '--quiet' ]; |
876
|
|
|
|
|
|
|
877
|
|
|
|
|
|
### if a timeout is set, add it ### |
878
|
0
|
0
|
|
|
|
push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; |
879
|
|
|
|
|
|
|
880
|
|
|
|
|
|
### run passive if specified ### |
881
|
0
|
0
|
|
|
|
push @$cmd, '--passive-ftp' if $FTP_PASSIVE; |
882
|
|
|
|
|
|
|
883
|
|
|
|
|
|
### set the output document, add the uri ### |
884
|
0
|
|
|
|
|
push @$cmd, '--output-document', $to, $self->uri; |
885
|
|
|
|
|
|
|
886
|
|
|
|
|
|
### with IPC::Cmd > 0.41, this is fixed in teh library, |
887
|
|
|
|
|
|
### and there's no need for special casing any more. |
888
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
889
|
|
|
|
|
|
# $IPC::Cmd::USE_IPC_RUN |
890
|
|
|
|
|
|
# ? ($to, $self->uri) |
891
|
|
|
|
|
|
# : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); |
892
|
|
|
|
|
|
|
893
|
|
|
|
|
|
### shell out ### |
894
|
0
|
|
|
|
|
my $captured; |
895
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
896
|
|
|
|
|
|
buffer => \$captured, |
897
|
|
|
|
|
|
verbose => $DEBUG |
898
|
|
|
|
|
|
)) { |
899
|
|
|
|
|
|
### wget creates the output document always, even if the fetch |
900
|
|
|
|
|
|
### fails.. so unlink it in that case |
901
|
0
|
|
|
|
|
1 while unlink $to; |
902
|
|
|
|
|
|
|
903
|
0
|
|
0
|
|
|
return $self->_error(loc( "Command failed: %1", $captured || '' )); |
904
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
906
|
0
|
|
|
|
|
return $to; |
907
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
909
|
|
|
|
|
|
### /bin/lftp fetch ### |
910
|
|
|
|
|
|
sub _lftp_fetch { |
911
|
0
|
|
|
0
|
|
my $self = shift; |
912
|
0
|
|
|
|
|
my %hash = @_; |
913
|
|
|
|
|
|
|
914
|
0
|
|
|
|
|
my ($to); |
915
|
0
|
|
|
|
|
my $tmpl = { |
916
|
|
|
|
|
|
to => { required => 1, store => \$to } |
917
|
|
|
|
|
|
}; |
918
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
919
|
|
|
|
|
|
|
920
|
|
|
|
|
|
### see if we have a lftp binary ### |
921
|
0
|
|
|
|
|
my $lftp; |
922
|
0
|
0
|
|
|
|
unless( $lftp = can_run('lftp') ) { |
923
|
0
|
|
|
|
|
$METHOD_FAIL->{'lftp'} = 1; |
924
|
0
|
|
|
|
|
return; |
925
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
927
|
|
|
|
|
|
### no verboseness, thanks ### |
928
|
0
|
|
|
|
|
my $cmd = [ $lftp, '-f' ]; |
929
|
|
|
|
|
|
|
930
|
0
|
|
|
|
|
my $fh = File::Temp->new; |
931
|
|
|
|
|
|
|
932
|
0
|
|
|
|
|
my $str; |
933
|
|
|
|
|
|
|
934
|
|
|
|
|
|
### if a timeout is set, add it ### |
935
|
0
|
0
|
|
|
|
$str .= "set net:timeout $TIMEOUT;\n" if $TIMEOUT; |
936
|
|
|
|
|
|
|
937
|
|
|
|
|
|
### run passive if specified ### |
938
|
0
|
0
|
|
|
|
$str .= "set ftp:passive-mode 1;\n" if $FTP_PASSIVE; |
939
|
|
|
|
|
|
|
940
|
|
|
|
|
|
### set the output document, add the uri ### |
941
|
|
|
|
|
|
### quote the URI, because lftp supports certain shell |
942
|
|
|
|
|
|
### expansions, most notably & for backgrounding. |
943
|
|
|
|
|
|
### ' quote does nto work, must be " |
944
|
0
|
|
|
|
|
$str .= q[get ']. $self->uri .q[' -o ]. $to . $/; |
945
|
|
|
|
|
|
|
946
|
0
|
0
|
|
|
|
if( $DEBUG ) { |
947
|
0
|
|
|
|
|
my $pp_str = join ' ', split $/, $str; |
948
|
0
|
|
|
|
|
print "# lftp command: $pp_str\n"; |
949
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
951
|
|
|
|
|
|
### write straight to the file. |
952
|
0
|
|
|
|
|
$fh->autoflush(1); |
953
|
0
|
|
|
|
|
print $fh $str; |
954
|
|
|
|
|
|
|
955
|
|
|
|
|
|
### the command needs to be 1 string to be executed |
956
|
0
|
|
|
|
|
push @$cmd, $fh->filename; |
957
|
|
|
|
|
|
|
958
|
|
|
|
|
|
### with IPC::Cmd > 0.41, this is fixed in teh library, |
959
|
|
|
|
|
|
### and there's no need for special casing any more. |
960
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
961
|
|
|
|
|
|
# $IPC::Cmd::USE_IPC_RUN |
962
|
|
|
|
|
|
# ? ($to, $self->uri) |
963
|
|
|
|
|
|
# : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); |
964
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
966
|
|
|
|
|
|
### shell out ### |
967
|
0
|
|
|
|
|
my $captured; |
968
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
969
|
|
|
|
|
|
buffer => \$captured, |
970
|
|
|
|
|
|
verbose => $DEBUG |
971
|
|
|
|
|
|
)) { |
972
|
|
|
|
|
|
### wget creates the output document always, even if the fetch |
973
|
|
|
|
|
|
### fails.. so unlink it in that case |
974
|
0
|
|
|
|
|
1 while unlink $to; |
975
|
|
|
|
|
|
|
976
|
0
|
|
0
|
|
|
return $self->_error(loc( "Command failed: %1", $captured || '' )); |
977
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
979
|
0
|
|
|
|
|
return $to; |
980
|
|
|
|
|
|
} |
981
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
984
|
|
|
|
|
|
### /bin/ftp fetch ### |
985
|
|
|
|
|
|
sub _ftp_fetch { |
986
|
0
|
|
|
0
|
|
my $self = shift; |
987
|
0
|
|
|
|
|
my %hash = @_; |
988
|
|
|
|
|
|
|
989
|
0
|
|
|
|
|
my ($to); |
990
|
0
|
|
|
|
|
my $tmpl = { |
991
|
|
|
|
|
|
to => { required => 1, store => \$to } |
992
|
|
|
|
|
|
}; |
993
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
994
|
|
|
|
|
|
|
995
|
|
|
|
|
|
### see if we have a ftp binary ### |
996
|
0
|
|
|
|
|
my $ftp; |
997
|
0
|
0
|
|
|
|
unless( $ftp = can_run('ftp') ) { |
998
|
0
|
|
|
|
|
$METHOD_FAIL->{'ftp'} = 1; |
999
|
0
|
|
|
|
|
return; |
1000
|
|
|
|
|
|
} |
1001
|
|
|
|
|
|
|
1002
|
0
|
|
|
|
|
my $fh = FileHandle->new; |
1003
|
|
|
|
|
|
|
1004
|
0
|
|
|
|
|
local $SIG{CHLD} = 'IGNORE'; |
1005
|
|
|
|
|
|
|
1006
|
0
|
0
|
|
|
|
unless ($fh->open("$ftp -n", '|-')) { |
1007
|
0
|
|
|
|
|
return $self->_error(loc("%1 creation failed: %2", $ftp, $!)); |
1008
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
1010
|
0
|
|
|
|
|
my @dialog = ( |
1011
|
|
|
|
|
|
"lcd " . dirname($to), |
1012
|
|
|
|
|
|
"open " . $self->host, |
1013
|
|
|
|
|
|
"user anonymous $FROM_EMAIL", |
1014
|
|
|
|
|
|
"cd /", |
1015
|
|
|
|
|
|
"cd " . $self->path, |
1016
|
|
|
|
|
|
"binary", |
1017
|
|
|
|
|
|
"get " . $self->file . " " . $self->output_file, |
1018
|
|
|
|
|
|
"quit", |
1019
|
|
|
|
|
|
); |
1020
|
|
|
|
|
|
|
1021
|
0
|
|
|
|
|
foreach (@dialog) { $fh->print($_, "\n") } |
|
0
|
|
|
|
|
|
1022
|
0
|
0
|
|
|
|
$fh->close or return; |
1023
|
|
|
|
|
|
|
1024
|
0
|
|
|
|
|
return $to; |
1025
|
|
|
|
|
|
} |
1026
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
### lynx is stupid - it decompresses any .gz file it finds to be text |
1028
|
|
|
|
|
|
### use /bin/lynx to fetch files |
1029
|
|
|
|
|
|
sub _lynx_fetch { |
1030
|
0
|
|
|
0
|
|
my $self = shift; |
1031
|
0
|
|
|
|
|
my %hash = @_; |
1032
|
|
|
|
|
|
|
1033
|
0
|
|
|
|
|
my ($to); |
1034
|
0
|
|
|
|
|
my $tmpl = { |
1035
|
|
|
|
|
|
to => { required => 1, store => \$to } |
1036
|
|
|
|
|
|
}; |
1037
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
1038
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
### see if we have a lynx binary ### |
1040
|
0
|
|
|
|
|
my $lynx; |
1041
|
0
|
0
|
|
|
|
unless ( $lynx = can_run('lynx') ){ |
1042
|
0
|
|
|
|
|
$METHOD_FAIL->{'lynx'} = 1; |
1043
|
0
|
|
|
|
|
return; |
1044
|
|
|
|
|
|
} |
1045
|
|
|
|
|
|
|
1046
|
0
|
0
|
|
|
|
unless( IPC::Cmd->can_capture_buffer ) { |
1047
|
0
|
|
|
|
|
$METHOD_FAIL->{'lynx'} = 1; |
1048
|
|
|
|
|
|
|
1049
|
0
|
|
|
|
|
return $self->_error(loc( |
1050
|
|
|
|
|
|
"Can not capture buffers. Can not use '%1' to fetch files", |
1051
|
|
|
|
|
|
'lynx' )); |
1052
|
|
|
|
|
|
} |
1053
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
### check if the HTTP resource exists ### |
1055
|
0
|
0
|
|
|
|
if ($self->uri =~ /^https?:\/\//i) { |
1056
|
0
|
|
|
|
|
my $cmd = [ |
1057
|
|
|
|
|
|
$lynx, |
1058
|
|
|
|
|
|
'-head', |
1059
|
|
|
|
|
|
'-source', |
1060
|
|
|
|
|
|
"-auth=anonymous:$FROM_EMAIL", |
1061
|
|
|
|
|
|
]; |
1062
|
|
|
|
|
|
|
1063
|
0
|
0
|
|
|
|
push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; |
1064
|
|
|
|
|
|
|
1065
|
0
|
|
|
|
|
push @$cmd, $self->uri; |
1066
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
### shell out ### |
1068
|
0
|
|
|
|
|
my $head; |
1069
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
1070
|
|
|
|
|
|
buffer => \$head, |
1071
|
|
|
|
|
|
verbose => $DEBUG ) |
1072
|
|
|
|
|
|
) { |
1073
|
0
|
|
0
|
|
|
return $self->_error(loc("Command failed: %1", $head || '')); |
1074
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
1076
|
0
|
0
|
|
|
|
unless($head =~ /^HTTP\/\d+\.\d+ 200\b/) { |
1077
|
0
|
|
0
|
|
|
return $self->_error(loc("Command failed: %1", $head || '')); |
1078
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
} |
1080
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
### write to the output file ourselves, since lynx ass_u_mes to much |
1082
|
0
|
0
|
|
|
|
my $local = FileHandle->new( $to, 'w' ) |
1083
|
|
|
|
|
|
or return $self->_error(loc( |
1084
|
|
|
|
|
|
"Could not open '%1' for writing: %2",$to,$!)); |
1085
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
### dump to stdout ### |
1087
|
0
|
|
|
|
|
my $cmd = [ |
1088
|
|
|
|
|
|
$lynx, |
1089
|
|
|
|
|
|
'-source', |
1090
|
|
|
|
|
|
"-auth=anonymous:$FROM_EMAIL", |
1091
|
|
|
|
|
|
]; |
1092
|
|
|
|
|
|
|
1093
|
0
|
0
|
|
|
|
push @$cmd, "-connect_timeout=$TIMEOUT" if $TIMEOUT; |
1094
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1096
|
0
|
|
|
|
|
push @$cmd, $self->uri; |
1097
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
### with IPC::Cmd > 0.41, this is fixed in teh library, |
1099
|
|
|
|
|
|
### and there's no need for special casing any more. |
1100
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1101
|
|
|
|
|
|
# $IPC::Cmd::USE_IPC_RUN |
1102
|
|
|
|
|
|
# ? $self->uri |
1103
|
|
|
|
|
|
# : QUOTE. $self->uri .QUOTE; |
1104
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
### shell out ### |
1107
|
0
|
|
|
|
|
my $captured; |
1108
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
1109
|
|
|
|
|
|
buffer => \$captured, |
1110
|
|
|
|
|
|
verbose => $DEBUG ) |
1111
|
|
|
|
|
|
) { |
1112
|
0
|
|
0
|
|
|
return $self->_error(loc("Command failed: %1", $captured || '')); |
1113
|
|
|
|
|
|
} |
1114
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
### print to local file ### |
1116
|
|
|
|
|
|
### XXX on a 404 with a special error page, $captured will actually |
1117
|
|
|
|
|
|
### hold the contents of that page, and make it *appear* like the |
1118
|
|
|
|
|
|
### request was a success, when really it wasn't :( |
1119
|
|
|
|
|
|
### there doesn't seem to be an option for lynx to change the exit |
1120
|
|
|
|
|
|
### code based on a 4XX status or so. |
1121
|
|
|
|
|
|
### the closest we can come is using --error_file and parsing that, |
1122
|
|
|
|
|
|
### which is very unreliable ;( |
1123
|
0
|
|
|
|
|
$local->print( $captured ); |
1124
|
0
|
0
|
|
|
|
$local->close or return; |
1125
|
|
|
|
|
|
|
1126
|
0
|
|
|
|
|
return $to; |
1127
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
### use /bin/ncftp to fetch files |
1130
|
|
|
|
|
|
sub _ncftp_fetch { |
1131
|
0
|
|
|
0
|
|
my $self = shift; |
1132
|
0
|
|
|
|
|
my %hash = @_; |
1133
|
|
|
|
|
|
|
1134
|
0
|
|
|
|
|
my ($to); |
1135
|
0
|
|
|
|
|
my $tmpl = { |
1136
|
|
|
|
|
|
to => { required => 1, store => \$to } |
1137
|
|
|
|
|
|
}; |
1138
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
1139
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
### we can only set passive mode in interactive sessions, so bail out |
1141
|
|
|
|
|
|
### if $FTP_PASSIVE is set |
1142
|
0
|
0
|
|
|
|
return if $FTP_PASSIVE; |
1143
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
### see if we have a ncftp binary ### |
1145
|
0
|
|
|
|
|
my $ncftp; |
1146
|
0
|
0
|
|
|
|
unless( $ncftp = can_run('ncftp') ) { |
1147
|
0
|
|
|
|
|
$METHOD_FAIL->{'ncftp'} = 1; |
1148
|
0
|
|
|
|
|
return; |
1149
|
|
|
|
|
|
} |
1150
|
|
|
|
|
|
|
1151
|
0
|
0
|
|
|
|
my $cmd = [ |
1152
|
|
|
|
|
|
$ncftp, |
1153
|
|
|
|
|
|
'-V', # do not be verbose |
1154
|
|
|
|
|
|
'-p', $FROM_EMAIL, # email as password |
1155
|
|
|
|
|
|
$self->host, # hostname |
1156
|
|
|
|
|
|
dirname($to), # local dir for the file |
1157
|
|
|
|
|
|
# remote path to the file |
1158
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1159
|
|
|
|
|
|
$IPC::Cmd::USE_IPC_RUN |
1160
|
|
|
|
|
|
? File::Spec::Unix->catdir( $self->path, $self->file ) |
1161
|
|
|
|
|
|
: QUOTE. File::Spec::Unix->catdir( |
1162
|
|
|
|
|
|
$self->path, $self->file ) .QUOTE |
1163
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
]; |
1165
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
### shell out ### |
1167
|
0
|
|
|
|
|
my $captured; |
1168
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
1169
|
|
|
|
|
|
buffer => \$captured, |
1170
|
|
|
|
|
|
verbose => $DEBUG ) |
1171
|
|
|
|
|
|
) { |
1172
|
0
|
|
0
|
|
|
return $self->_error(loc("Command failed: %1", $captured || '')); |
1173
|
|
|
|
|
|
} |
1174
|
|
|
|
|
|
|
1175
|
0
|
|
|
|
|
return $to; |
1176
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
### use /bin/curl to fetch files |
1180
|
|
|
|
|
|
sub _curl_fetch { |
1181
|
0
|
|
|
0
|
|
my $self = shift; |
1182
|
0
|
|
|
|
|
my %hash = @_; |
1183
|
|
|
|
|
|
|
1184
|
0
|
|
|
|
|
my ($to); |
1185
|
0
|
|
|
|
|
my $tmpl = { |
1186
|
|
|
|
|
|
to => { required => 1, store => \$to } |
1187
|
|
|
|
|
|
}; |
1188
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
1189
|
0
|
|
|
|
|
my $curl; |
1190
|
0
|
0
|
|
|
|
unless ( $curl = can_run('curl') ) { |
1191
|
0
|
|
|
|
|
$METHOD_FAIL->{'curl'} = 1; |
1192
|
0
|
|
|
|
|
return; |
1193
|
|
|
|
|
|
} |
1194
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
### these long opts are self explanatory - I like that -jmb |
1196
|
0
|
|
|
|
|
my $cmd = [ $curl, '-q' ]; |
1197
|
|
|
|
|
|
|
1198
|
0
|
0
|
|
|
|
push(@$cmd, '--connect-timeout', $TIMEOUT) if $TIMEOUT; |
1199
|
|
|
|
|
|
|
1200
|
0
|
0
|
|
|
|
push(@$cmd, '--silent') unless $DEBUG; |
1201
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
### curl does the right thing with passive, regardless ### |
1203
|
0
|
0
|
|
|
|
if ($self->scheme eq 'ftp') { |
1204
|
0
|
|
|
|
|
push(@$cmd, '--user', "anonymous:$FROM_EMAIL"); |
1205
|
|
|
|
|
|
} |
1206
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
### curl doesn't follow 302 (temporarily moved) etc automatically |
1208
|
|
|
|
|
|
### so we add --location to enable that. |
1209
|
0
|
|
|
|
|
push @$cmd, '--fail', '--location', '--output', $to, $self->uri; |
1210
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
### with IPC::Cmd > 0.41, this is fixed in teh library, |
1212
|
|
|
|
|
|
### and there's no need for special casing any more. |
1213
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1214
|
|
|
|
|
|
# $IPC::Cmd::USE_IPC_RUN |
1215
|
|
|
|
|
|
# ? ($to, $self->uri) |
1216
|
|
|
|
|
|
# : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); |
1217
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
1219
|
0
|
|
|
|
|
my $captured; |
1220
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
1221
|
|
|
|
|
|
buffer => \$captured, |
1222
|
|
|
|
|
|
verbose => $DEBUG ) |
1223
|
|
|
|
|
|
) { |
1224
|
|
|
|
|
|
|
1225
|
0
|
|
0
|
|
|
return $self->_error(loc("Command failed: %1", $captured || '')); |
1226
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
1228
|
0
|
|
|
|
|
return $to; |
1229
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
### /usr/bin/fetch fetch! ### |
1233
|
|
|
|
|
|
sub _fetch_fetch { |
1234
|
0
|
|
|
0
|
|
my $self = shift; |
1235
|
0
|
|
|
|
|
my %hash = @_; |
1236
|
|
|
|
|
|
|
1237
|
0
|
|
|
|
|
my ($to); |
1238
|
0
|
|
|
|
|
my $tmpl = { |
1239
|
|
|
|
|
|
to => { required => 1, store => \$to } |
1240
|
|
|
|
|
|
}; |
1241
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
1242
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
### see if we have a fetch binary ### |
1244
|
0
|
|
|
|
|
my $fetch; |
1245
|
0
|
0
|
0
|
|
|
unless( HAS_FETCH and $fetch = can_run('fetch') ) { |
1246
|
0
|
|
|
|
|
$METHOD_FAIL->{'fetch'} = 1; |
1247
|
0
|
|
|
|
|
return; |
1248
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
### no verboseness, thanks ### |
1251
|
0
|
|
|
|
|
my $cmd = [ $fetch, '-q' ]; |
1252
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
### if a timeout is set, add it ### |
1254
|
0
|
0
|
|
|
|
push(@$cmd, '-T', $TIMEOUT) if $TIMEOUT; |
1255
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
### run passive if specified ### |
1257
|
|
|
|
|
|
#push @$cmd, '-p' if $FTP_PASSIVE; |
1258
|
0
|
0
|
|
|
|
local $ENV{'FTP_PASSIVE_MODE'} = 1 if $FTP_PASSIVE; |
1259
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
### set the output document, add the uri ### |
1261
|
0
|
|
|
|
|
push @$cmd, '-o', $to, $self->uri; |
1262
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
### with IPC::Cmd > 0.41, this is fixed in teh library, |
1264
|
|
|
|
|
|
### and there's no need for special casing any more. |
1265
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1266
|
|
|
|
|
|
# $IPC::Cmd::USE_IPC_RUN |
1267
|
|
|
|
|
|
# ? ($to, $self->uri) |
1268
|
|
|
|
|
|
# : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); |
1269
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
### shell out ### |
1271
|
0
|
|
|
|
|
my $captured; |
1272
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
1273
|
|
|
|
|
|
buffer => \$captured, |
1274
|
|
|
|
|
|
verbose => $DEBUG |
1275
|
|
|
|
|
|
)) { |
1276
|
|
|
|
|
|
### wget creates the output document always, even if the fetch |
1277
|
|
|
|
|
|
### fails.. so unlink it in that case |
1278
|
0
|
|
|
|
|
1 while unlink $to; |
1279
|
|
|
|
|
|
|
1280
|
0
|
|
0
|
|
|
return $self->_error(loc( "Command failed: %1", $captured || '' )); |
1281
|
|
|
|
|
|
} |
1282
|
|
|
|
|
|
|
1283
|
0
|
|
|
|
|
return $to; |
1284
|
|
|
|
|
|
} |
1285
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
### use File::Copy for fetching file:// urls ### |
1287
|
|
|
|
|
|
### |
1288
|
|
|
|
|
|
### See section 3.10 of RFC 1738 (http://www.faqs.org/rfcs/rfc1738.html) |
1289
|
|
|
|
|
|
### Also see wikipedia on file:// (http://en.wikipedia.org/wiki/File://) |
1290
|
|
|
|
|
|
### |
1291
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
sub _file_fetch { |
1293
|
0
|
|
|
0
|
|
my $self = shift; |
1294
|
0
|
|
|
|
|
my %hash = @_; |
1295
|
|
|
|
|
|
|
1296
|
0
|
|
|
|
|
my ($to); |
1297
|
0
|
|
|
|
|
my $tmpl = { |
1298
|
|
|
|
|
|
to => { required => 1, store => \$to } |
1299
|
|
|
|
|
|
}; |
1300
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
1301
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
### prefix a / on unix systems with a file uri, since it would |
1305
|
|
|
|
|
|
### look somewhat like this: |
1306
|
|
|
|
|
|
### file:///home/kane/file |
1307
|
|
|
|
|
|
### whereas windows file uris for 'c:\some\dir\file' might look like: |
1308
|
|
|
|
|
|
### file:///C:/some/dir/file |
1309
|
|
|
|
|
|
### file:///C|/some/dir/file |
1310
|
|
|
|
|
|
### or for a network share '\\host\share\some\dir\file': |
1311
|
|
|
|
|
|
### file:////host/share/some/dir/file |
1312
|
|
|
|
|
|
### |
1313
|
|
|
|
|
|
### VMS file uri's for 'DISK$USER:[MY.NOTES]NOTE123456.TXT' might look like: |
1314
|
|
|
|
|
|
### file://vms.host.edu/disk$user/my/notes/note12345.txt |
1315
|
|
|
|
|
|
### |
1316
|
|
|
|
|
|
|
1317
|
0
|
|
|
|
|
my $path = $self->path; |
1318
|
0
|
|
|
|
|
my $vol = $self->vol; |
1319
|
0
|
|
|
|
|
my $share = $self->share; |
1320
|
|
|
|
|
|
|
1321
|
0
|
|
|
|
|
my $remote; |
1322
|
0
|
0
|
0
|
|
|
if (!$share and $self->host) { |
1323
|
0
|
|
|
|
|
return $self->_error(loc( |
1324
|
|
|
|
|
|
"Currently %1 cannot handle hosts in %2 urls", |
1325
|
|
|
|
|
|
'File::Fetch', 'file://' |
1326
|
|
|
|
|
|
)); |
1327
|
|
|
|
|
|
} |
1328
|
|
|
|
|
|
|
1329
|
0
|
0
|
|
|
|
if( $vol ) { |
|
|
0
|
|
|
|
|
1330
|
0
|
|
|
|
|
$path = File::Spec->catdir( split /\//, $path ); |
1331
|
0
|
|
|
|
|
$remote = File::Spec->catpath( $vol, $path, $self->file); |
1332
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
} elsif( $share ) { |
1334
|
|
|
|
|
|
### win32 specific, and a share name, so we wont bother with File::Spec |
1335
|
0
|
|
|
|
|
$path =~ s|/+|\\|g; |
1336
|
0
|
|
|
|
|
$remote = "\\\\".$self->host."\\$share\\$path"; |
1337
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
} else { |
1339
|
|
|
|
|
|
### File::Spec on VMS can not currently handle UNIX syntax. |
1340
|
0
|
|
|
|
|
my $file_class = ON_VMS |
1341
|
|
|
|
|
|
? 'File::Spec::Unix' |
1342
|
|
|
|
|
|
: 'File::Spec'; |
1343
|
|
|
|
|
|
|
1344
|
0
|
|
|
|
|
$remote = $file_class->catfile( $path, $self->file ); |
1345
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
### File::Copy is littered with 'die' statements :( ### |
1348
|
0
|
|
|
|
|
my $rv = eval { File::Copy::copy( $remote, $to ) }; |
|
0
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
### something went wrong ### |
1351
|
0
|
0
|
0
|
|
|
if( !$rv or $@ ) { |
1352
|
0
|
|
|
|
|
return $self->_error(loc("Could not copy '%1' to '%2': %3 %4", |
1353
|
|
|
|
|
|
$remote, $to, $!, $@)); |
1354
|
|
|
|
|
|
} |
1355
|
|
|
|
|
|
|
1356
|
0
|
|
|
|
|
return $to; |
1357
|
|
|
|
|
|
} |
1358
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
### use /usr/bin/rsync to fetch files |
1360
|
|
|
|
|
|
sub _rsync_fetch { |
1361
|
0
|
|
|
0
|
|
my $self = shift; |
1362
|
0
|
|
|
|
|
my %hash = @_; |
1363
|
|
|
|
|
|
|
1364
|
0
|
|
|
|
|
my ($to); |
1365
|
0
|
|
|
|
|
my $tmpl = { |
1366
|
|
|
|
|
|
to => { required => 1, store => \$to } |
1367
|
|
|
|
|
|
}; |
1368
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
1369
|
0
|
|
|
|
|
my $rsync; |
1370
|
0
|
0
|
|
|
|
unless ( $rsync = can_run('rsync') ) { |
1371
|
0
|
|
|
|
|
$METHOD_FAIL->{'rsync'} = 1; |
1372
|
0
|
|
|
|
|
return; |
1373
|
|
|
|
|
|
} |
1374
|
|
|
|
|
|
|
1375
|
0
|
|
|
|
|
my $cmd = [ $rsync ]; |
1376
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
### XXX: rsync has no I/O timeouts at all, by default |
1378
|
0
|
0
|
|
|
|
push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; |
1379
|
|
|
|
|
|
|
1380
|
0
|
0
|
|
|
|
push(@$cmd, '--quiet') unless $DEBUG; |
1381
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1383
|
0
|
|
|
|
|
push @$cmd, $self->uri, $to; |
1384
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
### with IPC::Cmd > 0.41, this is fixed in teh library, |
1386
|
|
|
|
|
|
### and there's no need for special casing any more. |
1387
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1388
|
|
|
|
|
|
# $IPC::Cmd::USE_IPC_RUN |
1389
|
|
|
|
|
|
# ? ($to, $self->uri) |
1390
|
|
|
|
|
|
# : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); |
1391
|
|
|
|
|
|
|
1392
|
0
|
|
|
|
|
my $captured; |
1393
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
1394
|
|
|
|
|
|
buffer => \$captured, |
1395
|
|
|
|
|
|
verbose => $DEBUG ) |
1396
|
|
|
|
|
|
) { |
1397
|
|
|
|
|
|
|
1398
|
0
|
|
0
|
|
|
return $self->_error(loc("Command %1 failed: %2", |
|
|
|
0
|
|
|
|
1399
|
|
|
|
|
|
"@$cmd" || '', $captured || '')); |
1400
|
|
|
|
|
|
} |
1401
|
|
|
|
|
|
|
1402
|
0
|
|
|
|
|
return $to; |
1403
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
} |
1405
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
### use git to fetch files |
1407
|
|
|
|
|
|
sub _git_fetch { |
1408
|
0
|
|
|
0
|
|
my $self = shift; |
1409
|
0
|
|
|
|
|
my %hash = @_; |
1410
|
|
|
|
|
|
|
1411
|
0
|
|
|
|
|
my ($to); |
1412
|
0
|
|
|
|
|
my $tmpl = { |
1413
|
|
|
|
|
|
to => { required => 1, store => \$to } |
1414
|
|
|
|
|
|
}; |
1415
|
0
|
0
|
|
|
|
check( $tmpl, \%hash ) or return; |
1416
|
0
|
|
|
|
|
my $git; |
1417
|
0
|
0
|
|
|
|
unless ( $git = can_run('git') ) { |
1418
|
0
|
|
|
|
|
$METHOD_FAIL->{'git'} = 1; |
1419
|
0
|
|
|
|
|
return; |
1420
|
|
|
|
|
|
} |
1421
|
|
|
|
|
|
|
1422
|
0
|
|
|
|
|
my $cmd = [ $git, 'clone' ]; |
1423
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
#push(@$cmd, '--timeout=' . $TIMEOUT) if $TIMEOUT; |
1425
|
|
|
|
|
|
|
1426
|
0
|
0
|
|
|
|
push(@$cmd, '--quiet') unless $DEBUG; |
1427
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1429
|
0
|
|
|
|
|
push @$cmd, $self->uri, $to; |
1430
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
### with IPC::Cmd > 0.41, this is fixed in teh library, |
1432
|
|
|
|
|
|
### and there's no need for special casing any more. |
1433
|
|
|
|
|
|
### DO NOT quote things for IPC::Run, it breaks stuff. |
1434
|
|
|
|
|
|
# $IPC::Cmd::USE_IPC_RUN |
1435
|
|
|
|
|
|
# ? ($to, $self->uri) |
1436
|
|
|
|
|
|
# : (QUOTE. $to .QUOTE, QUOTE. $self->uri .QUOTE); |
1437
|
|
|
|
|
|
|
1438
|
0
|
|
|
|
|
my $captured; |
1439
|
0
|
0
|
|
|
|
unless(run( command => $cmd, |
1440
|
|
|
|
|
|
buffer => \$captured, |
1441
|
|
|
|
|
|
verbose => $DEBUG ) |
1442
|
|
|
|
|
|
) { |
1443
|
|
|
|
|
|
|
1444
|
0
|
|
0
|
|
|
return $self->_error(loc("Command %1 failed: %2", |
|
|
|
0
|
|
|
|
1445
|
|
|
|
|
|
"@$cmd" || '', $captured || '')); |
1446
|
|
|
|
|
|
} |
1447
|
|
|
|
|
|
|
1448
|
0
|
|
|
|
|
return $to; |
1449
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
} |
1451
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
################################# |
1453
|
|
|
|
|
|
# |
1454
|
|
|
|
|
|
# Error code |
1455
|
|
|
|
|
|
# |
1456
|
|
|
|
|
|
################################# |
1457
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
=pod |
1459
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
=head2 $ff->error([BOOL]) |
1461
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
Returns the last encountered error as string. |
1463
|
|
|
|
|
|
Pass it a true value to get the C output instead. |
1464
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
=cut |
1466
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
### error handling the way Archive::Extract does it |
1468
|
|
|
|
|
|
sub _error { |
1469
|
0
|
|
|
0
|
|
my $self = shift; |
1470
|
0
|
|
|
|
|
my $error = shift; |
1471
|
|
|
|
|
|
|
1472
|
0
|
|
|
|
|
$self->_error_msg( $error ); |
1473
|
0
|
|
|
|
|
$self->_error_msg_long( Carp::longmess($error) ); |
1474
|
|
|
|
|
|
|
1475
|
0
|
0
|
|
|
|
if( $WARN ) { |
1476
|
0
|
0
|
|
|
|
carp $DEBUG ? $self->_error_msg_long : $self->_error_msg; |
1477
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
1479
|
0
|
|
|
|
|
return; |
1480
|
|
|
|
|
|
} |
1481
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
sub error { |
1483
|
0
|
|
|
0
|
|
my $self = shift; |
1484
|
0
|
0
|
|
|
|
return shift() ? $self->_error_msg_long : $self->_error_msg; |
1485
|
|
|
|
|
|
} |
1486
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
1; |
1489
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
=pod |
1491
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
=head1 HOW IT WORKS |
1493
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
File::Fetch is able to fetch a variety of uris, by using several |
1495
|
|
|
|
|
|
external programs and modules. |
1496
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
Below is a mapping of what utilities will be used in what order |
1498
|
|
|
|
|
|
for what schemes, if available: |
1499
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
file => LWP, lftp, file |
1501
|
|
|
|
|
|
http => LWP, HTTP::Lite, wget, curl, lftp, fetch, lynx, iosock |
1502
|
|
|
|
|
|
ftp => LWP, Net::FTP, wget, curl, lftp, fetch, ncftp, ftp |
1503
|
|
|
|
|
|
rsync => rsync |
1504
|
|
|
|
|
|
git => git |
1505
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
If you'd like to disable the use of one or more of these utilities |
1507
|
|
|
|
|
|
and/or modules, see the C<$BLACKLIST> variable further down. |
1508
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
If a utility or module isn't available, it will be marked in a cache |
1510
|
|
|
|
|
|
(see the C<$METHOD_FAIL> variable further down), so it will not be |
1511
|
|
|
|
|
|
tried again. The C method will only fail when all options are |
1512
|
|
|
|
|
|
exhausted, and it was not able to retrieve the file. |
1513
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
The C utility is available on FreeBSD. NetBSD and Dragonfly BSD |
1515
|
|
|
|
|
|
may also have it from C. We only check for C on those |
1516
|
|
|
|
|
|
three platforms. |
1517
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
C is a very limited L based mechanism for |
1519
|
|
|
|
|
|
retrieving C schemed urls. It doesn't follow redirects for instance. |
1520
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
C only supports C style urls. |
1522
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
A special note about fetching files from an ftp uri: |
1524
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
By default, all ftp connections are done in passive mode. To change |
1526
|
|
|
|
|
|
that, see the C<$FTP_PASSIVE> variable further down. |
1527
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
Furthermore, ftp uris only support anonymous connections, so no |
1529
|
|
|
|
|
|
named user/password pair can be passed along. |
1530
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
C is blacklisted by default; see the C<$BLACKLIST> variable |
1532
|
|
|
|
|
|
further down. |
1533
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
=head1 GLOBAL VARIABLES |
1535
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
The behaviour of File::Fetch can be altered by changing the following |
1537
|
|
|
|
|
|
global variables: |
1538
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
=head2 $File::Fetch::FROM_EMAIL |
1540
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
This is the email address that will be sent as your anonymous ftp |
1542
|
|
|
|
|
|
password. |
1543
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
Default is C. |
1545
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
=head2 $File::Fetch::USER_AGENT |
1547
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
This is the useragent as C will report it. |
1549
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
Default is C. |
1551
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
=head2 $File::Fetch::FTP_PASSIVE |
1553
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
This variable controls whether the environment variable C |
1555
|
|
|
|
|
|
and any passive switches to commandline tools will be set to true. |
1556
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
Default value is 1. |
1558
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
Note: When $FTP_PASSIVE is true, C will not be used to fetch |
1560
|
|
|
|
|
|
files, since passive mode can only be set interactively for this binary |
1561
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
=head2 $File::Fetch::TIMEOUT |
1563
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
When set, controls the network timeout (counted in seconds). |
1565
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
Default value is 0. |
1567
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
=head2 $File::Fetch::WARN |
1569
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
This variable controls whether errors encountered internally by |
1571
|
|
|
|
|
|
C should be C'd or not. |
1572
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
Set to false to silence warnings. Inspect the output of the C |
1574
|
|
|
|
|
|
method manually to see what went wrong. |
1575
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
Defaults to C. |
1577
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
=head2 $File::Fetch::DEBUG |
1579
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
This enables debugging output when calling commandline utilities to |
1581
|
|
|
|
|
|
fetch files. |
1582
|
|
|
|
|
|
This also enables C errors, instead of the regular |
1583
|
|
|
|
|
|
C errors. |
1584
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
Good for tracking down why things don't work with your particular |
1586
|
|
|
|
|
|
setup. |
1587
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
Default is 0. |
1589
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
=head2 $File::Fetch::BLACKLIST |
1591
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
This is an array ref holding blacklisted modules/utilities for fetching |
1593
|
|
|
|
|
|
files with. |
1594
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
To disallow the use of, for example, C and C, you could |
1596
|
|
|
|
|
|
set $File::Fetch::BLACKLIST to: |
1597
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
$File::Fetch::BLACKLIST = [qw|lwp netftp|] |
1599
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
The default blacklist is [qw|ftp|], as C is rather unreliable. |
1601
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
See the note on C below. |
1603
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
=head2 $File::Fetch::METHOD_FAIL |
1605
|
|
|
|
|
|
|
1606
|
|
|
|
|
|
This is a hashref registering what modules/utilities were known to fail |
1607
|
|
|
|
|
|
for fetching files (mostly because they weren't installed). |
1608
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
You can reset this cache by assigning an empty hashref to it, or |
1610
|
|
|
|
|
|
individually remove keys. |
1611
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
See the note on C below. |
1613
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
=head1 MAPPING |
1615
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
Here's a quick mapping for the utilities/modules, and their names for |
1618
|
|
|
|
|
|
the $BLACKLIST, $METHOD_FAIL and other internal functions. |
1619
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
LWP => lwp |
1621
|
|
|
|
|
|
HTTP::Lite => httplite |
1622
|
|
|
|
|
|
HTTP::Tiny => httptiny |
1623
|
|
|
|
|
|
Net::FTP => netftp |
1624
|
|
|
|
|
|
wget => wget |
1625
|
|
|
|
|
|
lynx => lynx |
1626
|
|
|
|
|
|
ncftp => ncftp |
1627
|
|
|
|
|
|
ftp => ftp |
1628
|
|
|
|
|
|
curl => curl |
1629
|
|
|
|
|
|
rsync => rsync |
1630
|
|
|
|
|
|
lftp => lftp |
1631
|
|
|
|
|
|
fetch => fetch |
1632
|
|
|
|
|
|
IO::Socket => iosock |
1633
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
=head1 FREQUENTLY ASKED QUESTIONS |
1635
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
=head2 So how do I use a proxy with File::Fetch? |
1637
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
C currently only supports proxies with LWP::UserAgent. |
1639
|
|
|
|
|
|
You will need to set your environment variables accordingly. For |
1640
|
|
|
|
|
|
example, to use an ftp proxy: |
1641
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
$ENV{ftp_proxy} = 'foo.com'; |
1643
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
Refer to the LWP::UserAgent manpage for more details. |
1645
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
=head2 I used 'lynx' to fetch a file, but its contents is all wrong! |
1647
|
|
|
|
|
|
|
1648
|
|
|
|
|
|
C can only fetch remote files by dumping its contents to C, |
1649
|
|
|
|
|
|
which we in turn capture. If that content is a 'custom' error file |
1650
|
|
|
|
|
|
(like, say, a C<404 handler>), you will get that contents instead. |
1651
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
Sadly, C doesn't support any options to return a different exit |
1653
|
|
|
|
|
|
code on non-C<200 OK> status, giving us no way to tell the difference |
1654
|
|
|
|
|
|
between a 'successful' fetch and a custom error page. |
1655
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
Therefor, we recommend to only use C as a last resort. This is |
1657
|
|
|
|
|
|
why it is at the back of our list of methods to try as well. |
1658
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
=head2 Files I'm trying to fetch have reserved characters or non-ASCII characters in them. What do I do? |
1660
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
C is relatively smart about things. When trying to write |
1662
|
|
|
|
|
|
a file to disk, it removes the C (see the |
1663
|
|
|
|
|
|
C method for details) from the file name before creating |
1664
|
|
|
|
|
|
it. In most cases this suffices. |
1665
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
If you have any other characters you need to escape, please install |
1667
|
|
|
|
|
|
the C module from CPAN, and pre-encode your URI before |
1668
|
|
|
|
|
|
passing it to C. You can read about the details of URIs |
1669
|
|
|
|
|
|
and URI encoding here: |
1670
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
http://www.faqs.org/rfcs/rfc2396.html |
1672
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
=head1 TODO |
1674
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
=over 4 |
1676
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
=item Implement $PREFER_BIN |
1678
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
To indicate to rather use commandline tools than modules |
1680
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
=back |
1682
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
=head1 BUG REPORTS |
1684
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
Please report bugs or other issues to Ebug-file-fetch@rt.cpan.org. |
1686
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
=head1 AUTHOR |
1688
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
This module by Jos Boumans Ekane@cpan.orgE. |
1690
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
=head1 COPYRIGHT |
1692
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
This library is free software; you may redistribute and/or modify it |
1694
|
|
|
|
|
|
under the same terms as Perl itself. |
1695
|
|
|
|
|
|
|
1696
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
=cut |
1698
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
# Local variables: |
1700
|
|
|
|
|
|
# c-indentation-style: bsd |
1701
|
|
|
|
|
|
# c-basic-offset: 4 |
1702
|
|
|
|
|
|
# indent-tabs-mode: nil |
1703
|
|
|
|
|
|
# End: |
1704
|
|
|
|
|
|
# vim: expandtab shiftwidth=4: |
1705
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|