File Coverage

blib/lib/Net/FTP/Tiny.pm
Criterion Covered Total %
statement 34 179 18.9
branch 14 100 14.0
condition 0 24 0.0
subroutine 11 22 50.0
pod 1 1 100.0
total 60 326 18.4


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Net::FTP::Tiny - minimal FTP client
4              
5             =head1 SYNOPSIS
6              
7             use Net::FTP::Tiny qw(ftp_get);
8              
9             $data = ftp_get("ftp://ftp.iana.org/tz/data/iso3166.tab");
10              
11             =head1 DESCRIPTION
12              
13             This module provides an easy interface to retrieve files using the FTP
14             protocol. The location of a file to retrieve is specified using a URL.
15             IPv6 is supported, if the optional module L is installed.
16             Only retrieval is supported, not storing or anything more exotic.
17              
18             =cut
19              
20             package Net::FTP::Tiny;
21              
22 3     3   244503 { use 5.006; }
  3         19  
23 3     3   24 use warnings;
  3         8  
  3         130  
24 3     3   23 use strict;
  3         9  
  3         122  
25              
26 3     3   25 use Carp qw(croak);
  3         14  
  3         252  
27              
28             our $VERSION = "0.002";
29              
30             # Set up superclass manually, rather than via "parent", to avoid non-core
31             # dependency.
32 3     3   26 use Exporter ();
  3         7  
  3         4752  
33             our @ISA = qw(Exporter);
34             our @EXPORT_OK = qw(ftp_get);
35              
36             =head1 FUNCTIONS
37              
38             =over
39              
40             =item ftp_get(URL)
41              
42             I must be a URL using the C scheme. The file that it refers to
43             is retrieved from the FTP server, and its content is returned in the form
44             of a string of octets. If any error occurs then the function Cs.
45             Possible errors include the URL being malformed, inability to contact
46             the FTP server, and the FTP server reporting that the file doesn't exist.
47              
48             =cut
49              
50             {
51             local $SIG{__DIE__};
52 3     3   2927 eval("$]" >= 5.008 ? q{
  3         134  
  3         111  
53             use utf8 ();
54             *_downgrade = \&utf8::downgrade;
55             } : q{
56             sub _downgrade($) {
57             # Logic copied from Scalar::String. See there
58             # for explanation; the code depends on accidents
59             # of the Perl 5.6 implementation.
60             return if unpack("C", "\xaa".$_[0]) == 170;
61             {
62             use bytes;
63             $_[0] =~ /\A[\x00-\x7f\x80-\xbf\xc2\xc3]*\z/
64             or die "Wide character";
65             }
66             use utf8;
67             ($_[0]) = ($_[0] =~ /\A([\x00-\xff]*)\z/);
68             }
69             });
70             die $@ unless $@ eq "";
71             }
72              
73 33     33   5407 sub _croak($) { croak "FTP error: $_[0]" }
74              
75             #
76             # FTP URL interpretation is governed by RFC 3986 (generic URI syntax) and
77             # RFC 1738 (an older URL standard, containing the FTP-specific parts).
78             # There is no formal specification for the syntax of FTP URLs in the
79             # context of RFC 3986's base syntax, so this code merges the two in
80             # what seems like a reasonable manner. Generally, RFC 3986 is used to
81             # determine which characters are permitted in each component, and RFC
82             # 1738 determines higher-level structure.
83             #
84              
85             my $safechar_rx = qr/[0-9A-Za-z\-\.\_\~\!\$\&\'\(\)\*\+\,\;\=]/;
86             my $hexpair_rx = qr/\%[0-9A-Fa-f]{2}/;
87              
88             my $d8_rx = qr/25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]/;
89             my $ipv4_address_rx = qr/$d8_rx\.$d8_rx\.$d8_rx\.$d8_rx/o;
90              
91             my $h16_rx = qr/[0-9A-Fa-f]{1,4}/;
92             my $ls32_rx = qr/$h16_rx\:$h16_rx|$ipv4_address_rx/o;
93             my $ipv6_address_rx = qr/
94             (?:) (?:$h16_rx\:){6} $ls32_rx
95             | \:\: (?:$h16_rx\:){5} $ls32_rx
96             | (?: $h16_rx )? \:\: (?:$h16_rx\:){4} $ls32_rx
97             | (?: (?:$h16_rx\:){0,1} $h16_rx )? \:\: (?:$h16_rx\:){3} $ls32_rx
98             | (?: (?:$h16_rx\:){0,2} $h16_rx )? \:\: (?:$h16_rx\:){2} $ls32_rx
99             | (?: (?:$h16_rx\:){0,3} $h16_rx )? \:\: (?:$h16_rx\:) $ls32_rx
100             | (?: (?:$h16_rx\:){0,4} $h16_rx )? \:\: $ls32_rx
101             | (?: (?:$h16_rx\:){0,5} $h16_rx )? \:\: $h16_rx
102             | (?: (?:$h16_rx\:){0,6} $h16_rx )? \:\:
103             /xo;
104              
105             my $ip_future_rx = qr/[vV][0-9A-Fa-f]+\.(?:$safechar_rx|\:)+/o;
106             my $ip_literal_rx = qr/\[(?:$ipv6_address_rx|$ip_future_rx)\]/o;
107             my $hostname_rx = qr/
108             (?:[0-9A-Za-z](?:[\-0-9A-Za-z]*[0-9A-Za-z])?\.)*
109             [A-Za-z](?:[\-0-9A-Za-z]*[0-9A-Za-z])?
110             /x;
111             my $host_rx = qr/$ip_literal_rx|$ipv4_address_rx|$hostname_rx/o;
112              
113             my $userdata_rx = qr/(?:$safechar_rx|$hexpair_rx)*/o;
114             my $filename_rx = qr/(?:(?!\;)$safechar_rx|[\:\@]|$hexpair_rx)*/o;
115              
116             sub _uri_decode($) {
117 94     94   235 my($str) = @_;
118 94         233 $str =~ s/\%([0-9A-Fa-f]{2})/chr(hex($1))/eg;
  8         52  
119 94         547 return $str;
120             }
121              
122             sub _parse_ftp_url($) {
123 185     185   160566 my($url) = @_;
124 185         3269 my($user, $pass, $host, $port, $path, $type) = ($url =~ m/\A
125             [fF][tT][pP]\:\/\/
126             (?:((?>$userdata_rx))(?:\:((?>$userdata_rx)))?\@)?
127             ((?>$host_rx))(?:\:([0-9]+)?)?
128             (?:((?>(?>\/$filename_rx)+))(?:\;type\=([aAiIdD]))?)?
129             \z/xo);
130 185 100       738 defined $host or _croak "<$url> is not an ftp URL";
131 152 100       694 my @path = defined($path) ? ($path =~ m#/($filename_rx)#og) : ();
132 152         295 my $filename = pop(@path);
133             return {
134             (defined($user) ? (username => _uri_decode($user)) : ()),
135             (defined($pass) ? (password => _uri_decode($pass)) : ()),
136             host => $host,
137             port => defined($port) ? 0+$port : 21,
138             (defined($path) ? (
139 152 100       877 dirs => [ map { _uri_decode($_) } @path ],
  15 100       37  
    100          
    100          
    100          
140             filename => _uri_decode($filename),
141             ) : ()),
142             (defined($type) ? (type => lc($type)) : ()),
143             };
144             }
145              
146             my $blksize = 0x8000;
147             my $timeout = 50;
148              
149             my $socket_class;
150             sub _socket_class() {
151             return $socket_class ||=
152             eval { local $SIG{__DIE__};
153             require IO::Socket::IP;
154             IO::Socket::IP->VERSION(0.08);
155             "IO::Socket::IP";
156 0   0 0   0 } || do {
      0        
157             require IO::Socket::INET;
158             IO::Socket::INET->VERSION(1.24);
159             "IO::Socket::INET";
160             };
161             }
162              
163             sub _socket_new($@) {
164 0     0   0 my $what = shift(@_);
165 0   0     0 return _socket_class()->new(@_) || do {
166             my $err = $@;
167             chomp $err;
168             $err =~ s/\AIO::Socket::[A-Z0-9]+: //;
169             $err ne "" or $err = "$socket_class didn't say why";
170             _croak "failed to $what: $err";
171             };
172             }
173              
174             sub _open_tcp($$) {
175 0     0   0 my($host, $port) = @_;
176 0 0       0 if($host =~ /\A\[v/) {
177 0         0 _croak "IP addresses from the future not supported";
178             }
179 0 0       0 if($host =~ /\A\[/) {
180 0 0       0 _croak "IPv6 support requires IO::Socket::IP"
181             unless _socket_class() eq "IO::Socket::IP";
182             }
183 0 0       0 my $bare_host = $host =~ /\A\[(.*)\]\z/s ? $1 : $host;
184 0 0 0     0 $port >= 1 && $port <= 65535
185             or _croak "failed to connect to $host TCP port $port: ".
186             "invalid port number";
187 0         0 return _socket_new("connect to $host TCP port $port",
188             PeerHost => $bare_host,
189             PeerPort => $port,
190             Proto => "tcp",
191             Timeout => $timeout,
192             );
193             }
194              
195             my $loaded_domains;
196             my %domain_val_tag;
197             sub _decode_domain($) {
198 0     0   0 my($domval) = @_;
199 0 0       0 unless($loaded_domains) {
200 0         0 require Socket;
201 0         0 Socket->VERSION(1.72);
202 0         0 foreach my $tag (qw(INET INET6)) {
203 3     3   35 no strict "refs";
  3         9  
  3         7743  
204 0 0       0 my $sub = *{"Socket::AF_$tag"}{CODE} or next;
  0         0  
205 0         0 my $val = eval { local $SIG{__DIE__}; $sub->() };
  0         0  
  0         0  
206 0 0       0 defined $val and $domain_val_tag{$val} = $tag;
207             }
208 0         0 $loaded_domains = 1;
209             }
210 0         0 my $tag = $domain_val_tag{$domval};
211 0 0       0 defined $tag or _croak "unrecognised socket domain";
212 0         0 return $tag;
213             }
214              
215             sub _check_timeout($$$) {
216 0     0   0 my($sock, $writing, $what) = @_;
217 0         0 vec(my $b = "", $sock->fileno, 1) = 1;
218 0 0       0 my $s = select($writing ? undef : $b, $writing ? $b : undef, $b,
    0          
219             $timeout);
220 0 0       0 $s >= 1 or _croak "failed to $what: @{[$s ? $! : q(timed out)]}";
  0 0       0  
221             }
222              
223             sub _send_cmd($$) {
224 0     0   0 my($ctlconn, $cmd) = @_;
225             # This encoding is specified by RFC 2640. It ensures that
226             # a parameter string can be distinguished from the \r\n that
227             # terminates the command.
228 0         0 $cmd =~ s/\r/\r\0/g;
229 0         0 $cmd .= "\r\n";
230 0         0 my $len = length($cmd);
231 0         0 local $SIG{PIPE} = "IGNORE";
232 0         0 for(my $pos = 0; $pos != $len; ) {
233 0         0 _check_timeout($ctlconn, 1, "send command");
234 0         0 my $n = $ctlconn->syswrite($cmd, $len-$pos, $pos);
235 0 0       0 defined $n or _croak "failed to send command: $!";
236 0         0 $pos += $n;
237             }
238             }
239              
240             sub _recv_reply($$) {
241 0     0   0 my($ctlconn, $rbufp) = @_;
242 0         0 my $content;
243 0         0 while(1) {
244 0 0       0 $$rbufp !~ /\A(?:[0-9]{0,2}[^0-9]|[0-9]{3}[^\-\ ])|\r[^\0\n]/
245             or _croak "malformed reply from server";
246 0 0       0 if($$rbufp =~ s/\A([0-9]{3} (?>(?>(?>[^\r]+)|\r\0)*))\r\n//) {
    0          
247 0         0 $content = $1;
248 0         0 last;
249             } elsif($$rbufp =~ s/\A
250             ([0-9]{3})-((?>(?>(?>[^\r]+)|\r\0)*)\r\n
251             (?>(?>(?>[^\r]+)|\r\0)*\r\n)*?)
252             \1\ ((?:(?>[^\r]+)|\r\0)*)\r\n
253             //x) {
254 0         0 $content = "$1 $2$3";
255 0         0 last;
256             }
257 0         0 _check_timeout($ctlconn, 0, "receive reply");
258 0         0 my $n = $ctlconn->sysread($$rbufp, $blksize, length($$rbufp));
259 0 0       0 defined $n or _croak "failed to receive reply: $!";
260 0 0       0 $n != 0 or _croak "failed to receive reply: unexpected EOF";
261             }
262             # We don't need to preserve exact character content of reply,
263             # so sanitise the reply for use in error messages. Some servers
264             # send the reply code on every line, in the SMTP style.
265 0         0 my($code) = ($content =~ /\A([0-9]{3})/);
266 0         0 $content =~ s/\r\n\Q$code\E-/\r\n/g;
267 0         0 $content =~ s/\r\n/%NL/g;
268 0         0 $content =~ s/\r\0/\r/g;
269 0         0 $content =~ s/([^ -~])/sprintf("%%%02X", ord($1))/eg;
  0         0  
270 0         0 return $content;
271             }
272              
273             sub _negotiate_dataconn($$) {
274 0     0   0 my($ctlconn, $rbufp) = @_;
275 0 0       0 my $pasv = _decode_domain($ctlconn->sockdomain) eq "INET" ?
276             "PASV" : "EPSV";
277 0         0 _send_cmd($ctlconn, $pasv);
278 0         0 my $r = _recv_reply($ctlconn, $rbufp);
279 0 0 0     0 if($pasv eq "PASV" &&
    0 0        
    0          
280             $r =~ /\A227 .*?($d8_rx(?:,$d8_rx){5})(?![0-9])/so) {
281 0         0 my @p = split(/,/, $1);
282 0         0 my $host = join(".", @p[0..3]);
283 0         0 my $port = ((0+$p[4]) << 8) | (0+$p[5]);
284 0         0 my $conn = _open_tcp($host, $port);
285 0     0   0 return sub { $conn };
  0         0  
286             } elsif($pasv eq "EPSV" &&
287             $r =~ /\A229 .*?\(([!-~])\1\1([0-9]+)\1\)/s) {
288 0         0 my $port = $2;
289 0         0 my $conn = _open_tcp($ctlconn->peerhost, $port);
290 0     0   0 return sub { $conn };
  0         0  
291             } elsif($r !~ /\A50[02]/) {
292 0         0 _croak $r;
293             }
294 0         0 my $lsock = _socket_new("listen on TCP port",
295             LocalAddr => $ctlconn->sockhost,
296             Proto => "tcp",
297             Listen => 128,
298             Timeout => $timeout,
299             );
300 0         0 my $domain = _decode_domain($lsock->sockdomain);
301 0         0 my $myaddr = $lsock->sockhost;
302 0         0 my $myport = $lsock->sockport;
303 0         0 my $port_cmd;
304 0 0       0 if($domain eq "INET") {
    0          
305 0         0 my @p = (split(/\./, $myaddr), $myport >> 8, $myport & 0xff);
306 0         0 $port_cmd = "PORT @{[join(q(,), @p)]}";
  0         0  
307             } elsif($domain eq "INET6") {
308 0         0 $port_cmd = "EPRT |2|$myaddr|$myport|";
309 0         0 } else { _croak "unrecognised socket domain" }
310 0         0 _send_cmd($ctlconn, $port_cmd);
311 0         0 $r = _recv_reply($ctlconn, $rbufp);
312 0 0       0 $r =~ /\A200/ or _croak $r;
313 0         0 my $require_peerhost = $ctlconn->peerhost;
314 0         0 my $require_peerport = $ctlconn->peerport - 1;
315             return sub {
316 0     0   0 _check_timeout($lsock, 0, "accept TCP connection");
317 0         0 my $conn = $lsock->accept;
318 0 0       0 defined $conn or _croak "failed to accept TCP connection: $!";
319 0         0 $lsock = undef;
320 0 0 0     0 unless($conn->peerhost eq $require_peerhost &&
321             $conn->peerport == $require_peerport) {
322 0         0 _croak "data connection made by wrong peer";
323             }
324 0         0 return $conn;
325 0         0 };
326             }
327              
328             sub ftp_get($) {
329 1     1 1 67 my($url) = @_;
330 1         13 _downgrade($url);
331 0           my %params = %{_parse_ftp_url($url)};
  0            
332 0 0         unless(exists $params{username}) {
333 0           $params{username} = "anonymous";
334 0           $params{password} = "-anonymous\@";
335             }
336 0 0         defined $params{filename} or _croak "no path supplied";
337 0 0         exists $params{type} or $params{type} = "i";
338 0 0         $params{type} eq "d" and _croak "directory listing not supported";
339 0           my $ctlconn = _open_tcp($params{host}, $params{port});
340 0           my $rbuf = "";
341 0           my $r = _recv_reply($ctlconn, \$rbuf);
342 0 0         $r =~ /\A120/ and $r = _recv_reply($ctlconn, \$rbuf);
343 0 0         $r =~ /\A220/ or _croak $r;
344 0           _send_cmd($ctlconn, "USER $params{username}");
345 0           $r = _recv_reply($ctlconn, \$rbuf);
346 0 0 0       if($r =~ /\A331/ && exists($params{password})) {
347 0           _send_cmd($ctlconn, "PASS $params{password}");
348 0           $r = _recv_reply($ctlconn, \$rbuf);
349             }
350 0 0         $r =~ /\A230/ or _croak $r;
351 0           foreach my $dir (@{$params{dirs}}) {
  0            
352 0           _send_cmd($ctlconn, "CWD $dir");
353 0           $r = _recv_reply($ctlconn, \$rbuf);
354 0 0         $r =~ /\A250/ or _croak $r;
355             }
356 0 0         if($params{type} eq "i") {
357 0           _send_cmd($ctlconn, "TYPE I");
358 0           $r = _recv_reply($ctlconn, \$rbuf);
359 0 0         $r =~ /\A200/ or _croak $r;
360             }
361 0           my $dataconn_thunk = _negotiate_dataconn($ctlconn, \$rbuf);
362 0           _send_cmd($ctlconn, "RETR $params{filename}");
363 0           $r = _recv_reply($ctlconn, \$rbuf);
364 0 0         $r =~ /\A1(?:25|50)/ or _croak $r;
365 0           my $dataconn = $dataconn_thunk->();
366 0           $dataconn_thunk = undef;
367 0           my $data = "";
368 0           while(1) {
369 0           _check_timeout($dataconn, 0, "receive data");
370 0           my $n = $dataconn->sysread($data, $blksize, length($data));
371 0 0         defined $n or _croak "failed to receive data: $!";
372 0 0         $n == 0 and last;
373             }
374 0           $dataconn = undef;
375 0           $r = _recv_reply($ctlconn, \$rbuf);
376 0 0         $r =~ /\A2(?:26|50)/ or _croak $r;
377 0           return $data;
378             }
379              
380             =back
381              
382             =head1 SEE ALSO
383              
384             L,
385             L,
386             L
387              
388             =head1 AUTHOR
389              
390             Andrew Main (Zefram)
391              
392             =head1 COPYRIGHT
393              
394             Copyright (C) 2012, 2017 Andrew Main (Zefram)
395              
396             =head1 LICENSE
397              
398             This module is free software; you can redistribute it and/or modify it
399             under the same terms as Perl itself.
400              
401             =cut
402              
403             1;