File Coverage

blib/lib/Treex/PML/IO.pm
Criterion Covered Total %
statement 67 431 15.5
branch 0 276 0.0
condition 8 180 4.4
subroutine 19 60 31.6
pod 25 26 96.1
total 119 973 12.2


line stmt bran cond sub pod time code
1             # -*- cperl -*-
2              
3             =head1 NAME
4              
5             Treex::PML::IO - I/O support functions used by Treex::PML
6              
7             =head1 DESCRIPTION
8              
9             This module implements various I/O and filesystem related functions
10             used by L.
11              
12             The current implementation supports the following protocols for
13             reading:
14              
15             http, https, ftp, gopher, news - reading (POSIX and Windows)
16              
17             ssh, fish, sftp - reading/writing on POSIX systems via secure shell copy
18             or the kioclient from KDE.
19              
20             The module attempts to handle GNU Zip-compressed files (suffix .gz)
21             transparently.
22              
23             =head1 FUNCTIONS
24              
25             =cut
26              
27             package Treex::PML::IO;
28 1     1   3 use Exporter;
  1         1  
  1         36  
29 1     1   581 use File::Temp 0.14 qw();
  1         13427  
  1         22  
30 1     1   402 use IO::File;
  1         631  
  1         93  
31 1     1   456 use IO::Pipe;
  1         772  
  1         22  
32 1     1   4 use strict;
  1         1  
  1         12  
33 1     1   3 use URI;
  1         1  
  1         13  
34 1     1   388 use URI::file;
  1         3386  
  1         22  
35 1     1   4 use URI::Escape;
  1         2  
  1         41  
36 1     1   4 use Scalar::Util qw(blessed);
  1         1  
  1         30  
37 1     1   4 use UNIVERSAL::DOES;
  1         1  
  1         21  
38 1     1   3 use Carp;
  1         1  
  1         31  
39 1     1   549 use LWP::UserAgent;
  1         22634  
  1         23  
40 1     1   5 use File::Spec;
  1         2  
  1         16  
41 1     1   4 use Fcntl qw(SEEK_SET);
  1         1  
  1         48  
42              
43 1     1   3 use Cwd qw(getcwd);
  1         1  
  1         43  
44              
45 1         146 use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK
46             %UNLINK_ON_CLOSE
47             $Debug
48             $kioclient $kioclient_opts
49             $ssh $ssh_opts
50             $curl $curl_opts
51             $gzip $gzip_opts
52             $zcat $zcat_opts
53             $reject_proto
54             $lwp_user_agent
55 1     1   3 );
  1         1  
56              
57             sub DOES {
58 0     0 0   my ($self,$role)=@_;
59 0 0 0       if ($role eq 'IO' or $role eq __PACKAGE__) { # backward compatibility
60 0           return 1;
61             } else {
62 0           return $self->SUPER::DOES($role);
63             }
64             }
65              
66             {
67             package Treex::PML::IO::UserAgent;
68 1     1   4 use base qw(LWP::UserAgent);
  1         1  
  1         169  
69             }
70              
71             #$Debug=0;
72             my %input_protocol_handler;
73              
74             BEGIN {
75             *_find_exe = eval {
76             require File::Which;
77             \&File::Which::which
78 1   50 1   2 } || sub {};
79              
80 1         2 $VERSION = '2.21'; # version template
81 1         8 @ISA=qw(Exporter);
82 1         3 @EXPORT_OK = qw($kioclient $kioclient_opts
83             $ssh $ssh_opts
84             $curl $curl_opts
85             $gzip $gzip_opts
86             $zcat $zcat_opts
87             &set_encoding
88             &open_backend &open_uri &close_backend &close_uri
89             &get_protocol "e_filename
90             &rename_uri);
91              
92 1   33     5 $zcat ||= _find_exe('zcat');
93 1   33     217 $gzip ||= _find_exe('gzip');
94 1   33     163 $kioclient ||= _find_exe('kioclient');
95 1   33     136 $ssh ||= _find_exe('ssh');
96 1   33     118 $curl ||= _find_exe('curl');
97 1   50     132 $ssh_opts ||= '-C';
98 1   50     4 $reject_proto ||= '^(pop3?s?|imaps?)\$';
99 1         9 $lwp_user_agent = Treex::PML::IO::UserAgent->new(keep_alive=>1);
100 1         3165 $lwp_user_agent->agent("Treex::PML_IO/$VERSION");
101             };
102              
103              
104             =over 4
105              
106             =item C
107              
108             Returns directory part of a given path (including volume).
109              
110             =cut
111              
112             sub DirPart {
113 0     0 1   return File::Spec->catpath(
114             (File::Spec->splitpath($_[0]))[0,1],''
115             );
116             }
117              
118             =item C
119              
120             If called without an argument, returns the directory of the perl
121             module or macro-file that invoked this macro.
122              
123             If a relative path is given as an argument, a respective absolute path
124             is computed based on the caller's directory and returned.
125              
126             =cut
127              
128             sub CallerDir {
129             return
130 0 0   0 1   @_>0
131             ? File::Spec->rel2abs($_[0], DirPart( (caller)[1] ))
132             : DirPart( (caller)[1] );
133             }
134              
135             =item C
136              
137             Register a callback to fetch URIs of a given protocol. C<$scheme> is
138             the URI scheme of the protocol (i.e. the first part of an URI
139             preceding the comma, e.g. 'ftp' or 'https'). <$callback> is either a
140             CODE reference or an ARRAY reference whose first element is a CODE
141             reference and the other elements are additional arguments to be passed
142             to the callback prior to the standard arguments.
143              
144             When the library attempts to fetch a resource from an URI matching
145             given scheme, the callback is invoked with the (optional) user
146             parameters followed by the URI.
147              
148             The callback function must either return a new URI (typically a
149             file:// URI pointing to a temporary file) and a boolean flag
150             indicating whether the library should attempt to delete the
151             returned file after it finished reading.
152              
153             If the callback returns the same or another URI with the C<$scheme>,
154             the callback is not reinvoked, but passed on to further processing
155             (i.e. by Treex::PML I/O backends).
156              
157             =cut
158              
159             sub register_input_protocol_handler {
160 0     0 1   my ($proto,$handler)=@_;
161 0 0 0       if (ref($handler) eq 'CODE' or ref($handler) eq 'ARRAY') {
162 0 0         if (exists($input_protocol_handler{$proto})) {
163 0           carp(__PACKAGE__."::register_input_protocol_handler: WARNING: redefining protocol handler for '$proto'");
164             }
165 0           $input_protocol_handler{$proto}=$handler;
166             } else {
167 0           croak("Wrong arguments. Usage: Treex::PML::IO::register_input_protocol_handler(protocol=>callback)");
168             }
169             }
170              
171             =item unregister_input_protocol_handler($scheme)
172              
173             Unregister a handler for a given URI scheme.
174              
175             =cut
176              
177             sub unregister_input_protocol_handler {
178 0     0 1   my ($proto)=@_;
179 0           return delete $input_protocol_handler{$proto};
180             }
181              
182             =item get_input_protocol_handler($scheme)
183              
184             Returns the user-defined handler registered for a given URI scheme; if
185             none, undef is returned.
186              
187             =cut
188              
189             sub get_input_protocol_handler {
190 0     0 1   my ($proto)=@_;
191 0           return $input_protocol_handler{$proto};
192             }
193              
194             =item set_encoding($filehandle, $encoding)
195              
196             Safely resets Perl I/O-layer on a given filehandle to decode or encode
197             from/to a given encoding. This is equivalent to:
198              
199             binmode($filehandle,":raw:perlio:encoding($encoding)");
200              
201             except that errors are turned into warnings.
202              
203             =cut
204              
205             sub set_encoding {
206 0     0 1   my ($fh,$encoding) = @_;
207 1     1   732 no integer;
  1         8  
  1         4  
208 0 0 0       if (defined($fh) and defined($encoding) and ($]>=5.008)) {
      0        
209 0           eval {
210 0           binmode($fh,":raw:perlio:encoding($encoding)");
211             };
212 0 0         warn $@ if $@;
213             }
214 0           return $fh;
215             }
216              
217             =item get_protocol($filename_or_URI)
218              
219             If the argument is a filename, returns 'file'; if the argument is an
220             URI, returns the URI's scheme. Note: unless the argument is an URI
221             object, a heuristic is used to determine the scheme. To avoid
222             reporting Windows drive names as URI schemes, only URI schemes
223             consisting of at least two characters are supported, i.e. C:foo is
224             considered a file name wheres CC:foo would be an URI with the scheme
225             'CC'.
226              
227             =cut
228              
229             # to avoid collision with Win32 drive-names, we only support protocols
230             # with at least two letters
231             sub get_protocol {
232 0     0 1   my ($uri) = @_;
233 0 0 0       if (blessed($uri) and $uri->isa('URI')) {
234 0   0       return $uri->scheme || 'file';
235             }
236 0 0         if ($uri =~ m{^\s*([[:alnum:]][[:alnum:]]+):}) {
237 0           return $1;
238             } else {
239 0           return 'file';
240             }
241             }
242              
243             =item quote_filename($string)
244              
245             Returns given string in shell-quotes with special characters (\, $, ")
246             escaped.
247              
248             =cut
249              
250             sub quote_filename {
251 0     0 1   my ($uri)=@_;
252 0           $uri =~ s{\\}{\\\\}g;
253 0           $uri =~ s{\$}{\\\$}g;
254 0           $uri =~ s{"}{\\"}g;
255 0           return '"'.$uri.'"';
256             }
257              
258             =item get_filename($URI_or_filename)
259              
260             Upgrades given string to an URI and if the resulting URI is in the
261             'file' scheme (e.g. file:///bar/baz), returns the file-name portion of
262             the URI (e.g. /bar/baz). Otherwise returns nothing.
263              
264             =cut
265              
266             sub get_filename {
267 0     0 1   my ($uri)=@_;
268 0           $uri=make_URI($uri); # cast to URI or make a copy
269 0 0         $uri->scheme('file') if !$uri->scheme;
270 0 0         if ($uri->scheme eq 'file') {
271 0           return $uri->file;
272             }
273 0           return;
274             }
275              
276             =item make_abs_URI($URL_or_filename)
277              
278             Upgrades a given string (URL or filename) into an URI object with
279             absolute path (relative URIs are resolved using the current working
280             directory obtained via Cwd::getcwd())
281              
282             =cut
283              
284             sub make_abs_URI {
285 0     0 1   my ($url)=@_;
286 0           my $uri = make_URI($url);
287 0           my $cwd = getcwd();
288 0 0         $cwd = VMS::Filespec::unixpath($cwd) if $^O eq 'VMS';
289 0           $cwd = URI::file->new($cwd);
290 0 0         $cwd .= "/" unless substr($cwd, -1, 1) eq "/";
291 0           return $uri->abs($cwd);
292             }
293              
294             =item make_URI($URL_or_filename)
295              
296             Upgrades a given string (URL or filename) into an URI object.
297              
298             =cut
299              
300             sub make_URI {
301 0     0 1   my ($url)=@_;
302 0           my $uri = URI->new($url);
303 0 0 0       return $uri if blessed($url) and $url->isa('URI'); # return a copy if was URI already
304 0 0 0       if (($uri eq $url or URI::Escape::uri_unescape($uri) eq $url)
      0        
305             and $url =~ m(^\s*[[:alnum:]]+://)) { # looks like it is URL already
306 0           return $uri;
307             } else {
308 0           return URI::file->new($url);
309             }
310             }
311              
312             =item make_relative_URI($URL,$baseURI)
313              
314             Returns a relative URI based in a given base URI. The arguments
315             are automatically upgraded using make_URI() if necessary.
316              
317             =cut
318              
319             sub make_relative_URI {
320 0     0 1   my ($href,$base)=@_;
321             # if (Treex::PML::_is_url($href)) {
322 0 0 0       $href = URI->new(make_URI($href)) unless blessed($href) and $href->isa('URI');
323 0           $base = make_URI($base);
324             ### $href = $href->abs($base)->rel($base);
325 0           $href = $href->rel($base);
326             }
327              
328             =item strip_protocol($URI)
329              
330             Returns the scheme-specific part of the URI (everything between the
331             scheme and the fragment). If the scheme of the URI was 'file', returns
332             the URI as a file name.
333              
334             =cut
335              
336             sub strip_protocol {
337 0     0 1   my ($uri)=@_;
338 0           $uri=make_URI($uri); # make a copy
339 0 0         $uri->scheme('file') if !$uri->scheme;
340 0 0         if ($uri->scheme eq 'file') {
341 0           return $uri->file;
342             }
343 0           return $uri->opaque;
344             }
345              
346             # =item is_gzip($filename)
347              
348             # Auxiliary:
349             # Returns true if the filename ends with the suffix .gz or .gz~.
350              
351             # =cut
352              
353             sub _is_gzip {
354 0 0   0     ($_[0] =~/.gz~?$/) ? 1 : 0;
355             }
356              
357             =item is_same_filename($URI_1,$URI_2)
358              
359             Checks if $URI_1 and $URI_2 point to the same resource. For filenames
360             and URIs in the 'file' scheme checks that the referred files (if
361             exist) are the same using is_same_file(); for other schemes simply
362             checks for string equality on canonical versions of the URIs (see
363             URI->canonical).
364              
365             =cut
366              
367             sub is_same_filename {
368 0     0 1   my ($f1,$f2)=@_;
369 0 0         return 1 if $f1 eq $f2;
370 0 0 0       my $u1 = (blessed($f1) and $f1->isa('URI')) ? $f1 : make_URI($f1);
371 0 0 0       my $u2 = (blessed($f2) and $f2->isa('URI')) ? $f2 : make_URI($f2);
372 0 0         return 1 if $u1 eq $u2;
373 0 0         return 1 if $u1->canonical eq $u2->canonical;
374 0 0 0       if (!ref($f1) and !ref($f2) and $^O ne 'MSWin32' and -f $f1 and -f $f2) {
      0        
      0        
      0        
375 0           return is_same_file($f1,$f2);
376             }
377 0           return 0;
378             }
379              
380             =item is_same_file($filename_1,$filename_2)
381              
382             Uses device and i-node numbers (reported by stat()) to check if the
383             two filenames point to the same file on the filesystem. Returns 1 if
384             yes, 0 otherwise.
385              
386             =cut
387              
388             sub is_same_file {
389 0     0 1   my ($f1,$f2) = @_;
390 0 0         return 1 if $f1 eq $f2;
391 0           my ($d1,$i1)=stat($f1);
392 0           my ($d2,$i2)=stat($f2);
393 0 0 0       return ($d1==$d2 and $i1!=0 and $i1==$i2) ? 1 : 0;
394             }
395              
396             =item open_pipe($filename,$mode,$command)
397              
398             Returns a filehandle of a newly open pipe in a given mode.
399              
400             In write mode ($mode = 'w'), opens a writing pipe to a given
401             command redirecting the standard output of the command to a given
402             file. Moreover, if the last suffix of the $filename is '.gz' or
403             '.gz~', the output of the command is gzipped before saving to
404             $filename.
405              
406             In read mode ($mode = 'r'), opens a reading pipe to a given
407             command redirecting the content of the given file to the standard
408             input of the command. Moreover, if the last suffix of the $filename is
409             '.gz' or '.gz~', the output of the command is un-gzipped before it is passed
410             to the command.
411              
412             =cut
413              
414             sub open_pipe {
415 0     0 1   my ($file,$rw,$pipe) = @_;
416 0           my $fh;
417 0 0         if (_is_gzip($file)) {
418 0 0 0       if (-x $gzip && -x $zcat) {
419 0 0         if ($rw eq 'w') {
420 0   0       open $fh, "| $pipe | $gzip $gzip_opts > ".quote_filename($file) || undef $fh;
421             } else {
422 0   0       open $fh, "$zcat $zcat_opts < ".quote_filename($file)." | $pipe |" || undef $fh;
423             }
424             } else {
425 0           warn "Need a functional gzip and zcat to open this file\n";
426             }
427             } else {
428 0 0         if ($rw eq 'w') {
429 0   0       open $fh, "| $pipe > ".quote_filename($file) || undef $fh;
430             } else {
431 0   0       open $fh, "$pipe < ".quote_filename($file)." |" || undef $fh;
432             }
433             }
434 0           return $fh;
435             }
436              
437             # _open_file_zcat:
438             #
439             # Note: This function represents the original strategy used on POSIX
440             # systems. It turns out, however, that the calls to zcat/gzip cause
441             # serious penalty on btred when loading large amount of files and also
442             # cause the process' priority to lessen. It also turns out that we
443             # cannot use IO::Zlib filehandles directly with some backends, such as
444             # StorableBackend.
445             #
446             # I'm leaving the function here, but it is not used anymore.
447              
448             sub _open_file_zcat {
449 0     0     my ($file,$rw) = @_;
450 0           my $fh;
451 0 0         if (_is_gzip($file)) {
452 0 0         if (-x $gzip) {
453 0           $fh = new IO::Pipe();
454 0 0         if ($rw eq 'w') {
455 0 0         $fh->writer("$gzip $gzip_opts > ".quote_filename($file)) || undef $fh;
456             } else {
457 0 0         $fh->reader("$zcat $zcat_opts < ".quote_filename($file)) || undef $fh;
458             }
459             }
460 0 0         unless ($fh) {
461 0 0         eval {
462 0           require IO::Zlib;
463 0           $fh = new IO::Zlib;
464             } || return;
465 0 0         $fh->open($file,$rw."b") || undef $fh;
466             }
467             } else {
468 0           $fh = new IO::File();
469 0 0         $fh->open($file,$rw) || undef $fh;
470             }
471 0           return $fh;
472             }
473              
474             =item open_file($filename,$mode)
475              
476             Opens a given file for reading ($mode = 'r') or writing ($mode =
477             'w'). If the last suffix of the filename is '.gz' or '.gz~', the data
478             are transparently un-gzipped (when reading) or gzipped (when writing).
479              
480             =cut
481              
482             sub open_file {
483 0     0 1   my ($file,$rw) = @_;
484 0           my $fh;
485 0 0         if (_is_gzip($file)) {
486 0           eval {
487 0           $fh = File::Temp->new(UNLINK => 1);
488             };
489 0 0         die if $@;
490 0 0         return unless $fh;
491 0 0         if ($rw eq 'w') {
492 0 0         print STDERR __PACKAGE__.": Storing ZIPTOFILE: $rw\n" if $Debug;
493 0           ${*$fh}{'ZIPTOFILE'}=$file;
  0            
494             } else {
495 0           my $tmp;
496 0 0 0       eval {
497 0           require IO::Zlib;
498 0           $tmp = new IO::Zlib();
499             } && $tmp || return;
500 0 0         $tmp->open($file,"rb") || return;
501 0           my $buffer;
502 0           my $length = 1024*1024;
503 0           while (read($tmp,$buffer,$length)) {
504 0           $fh->print($buffer);
505             }
506 0           $tmp->close();
507 0           seek($fh,0,SEEK_SET);
508             }
509 0           return $fh;
510             } else {
511 0           $fh = new IO::File();
512 0 0         $fh->open($file,$rw) || return;
513             }
514 0           return $fh;
515             }
516              
517             sub _callback {
518 0     0     my $callback = shift;
519 0 0         if (ref($callback) eq 'CODE') {
    0          
520 0           return $callback->(@_);
521             } elsif (ref($callback) eq 'ARRAY') {
522 0           my ($cb,@args)=@{$callback};
  0            
523 0           $cb->(@args,@_);
524             }
525             }
526              
527             sub _fetch_file {
528 0     0     my ($uri) = @_;
529 0           my $proto = get_protocol($uri);
530 0 0 0       if ($proto eq 'file') {
    0          
    0          
531 0           my $file = get_filename($uri);
532 0 0         print STDERR __PACKAGE__.": _fetch_file: $file\n" if $Debug;
533 0 0         die("File does not exist: $file\n") unless -e $file;
534 0 0         die("File is not readable: $file\n") unless -r $file;
535 0 0         die("File is empty: $file\n") if -z $file;
536 0           return ($file,0);
537             } elsif ($proto eq 'ntred' or $proto =~ /$reject_proto/) {
538 0           return ($uri,0);
539             } elsif (exists($input_protocol_handler{$proto})) {
540 0           my ($new_uri,$unlink) = _callback($input_protocol_handler{$proto},$uri);
541 0           my $new_proto = get_protocol($new_uri);
542 0 0         if ($new_proto ne $proto) {
543 0           return _fetch_file($new_uri);
544             } else {
545 0           return ($new_uri,$unlink);
546             }
547             } else {
548 0 0         if ($^O eq 'MSWin32') {
549 0           return _fetch_file_win32($uri,$proto);
550             } else {
551 0           return _fetch_file_posix($uri,$proto);
552             }
553             }
554             }
555              
556              
557             =item fetch_file($uri)
558              
559             Fetches a resource from a given URI and returns a path to a local file
560             with the content of the resource and a boolean unlink flag. If the
561             unlink flag is true, the caller is responsible for removing the local
562             file when finished using it. Otherwise, the caller should not remove
563             the file (usually when it points to the original resource). The
564             caller may assume that the resource is already un-gzipped if the URI
565             had the '.gz' or '.gz~' suffix.
566              
567             =cut
568              
569             sub fetch_file {
570 0     0 1   my ($uri) = @_;
571 0           my ($file,$unlink) = &_fetch_file;
572 0 0 0       if (get_protocol($file) eq 'file' and _is_gzip($uri)) {
573 0           my ($fh,$ungzfile) = File::Temp::tempfile("tredgzioXXXXXX",
574             DIR => File::Spec->tmpdir(),
575             UNLINK => 0,
576             );
577 0 0         die "Cannot create temporary file: $!" unless $fh;
578 0           my $tmp;
579 0 0 0       eval {
580 0           require IO::Zlib;
581 0           $tmp = new IO::Zlib();
582             } && $tmp || die "Cannot load IO::Zlib: $@";
583 0 0         $tmp->open($file,"rb") || die "Cannot read $uri ($file)";
584 0           my $buffer;
585 0           my $length = 1024*1024;
586 0           while (read($tmp,$buffer,$length)) {
587 0           $fh->print($buffer);
588             }
589 0           $tmp->close();
590 0           $fh->close;
591 0 0         unlink $file if $unlink;
592 0           return ($ungzfile,1);
593             } else {
594 0           return ($file,$unlink);
595             }
596             }
597              
598              
599             sub _fetch_cmd {
600 0     0     my ($cmd, $filename)=@_;
601 0 0         print STDERR __PACKAGE__.": _fetch_cmd: $cmd\n" if $Debug;
602 0 0         if (system($cmd." > ".$filename)==0) {
603 0           return ($filename,1);
604             } else {
605 0           warn "$cmd > $filename failed (code $?): $!\n";
606 0           return $filename,0;
607             }
608             }
609              
610             sub _fetch_with_lwp {
611 0     0     my ($uri,$fh,$filename)=@_;
612 0           my $status = $lwp_user_agent->get($uri, ':content_file' => $filename);
613 0 0 0       if ($status and $status->is_error and $status->code == 401) {
      0        
614             # unauthorized
615             # Got authorization error 401, maybe the nonce is stale, let's try again...
616 0           $status = $lwp_user_agent->get($uri, ':content_file' => $filename);
617             }
618 0 0         if ($status->is_success()) {
619 0           close $fh;
620 0           return ($filename,1);
621             } else {
622 0           unlink $fh;
623 0           close $fh;
624 0           die "Error occured while fetching URL $uri $@\n".
625             $status->status_line()."\n";
626             }
627             }
628              
629             sub _fetch_file_win32 {
630 0     0     my ($uri,$proto)=@_;
631 0 0         my ($fh,$filename) = File::Temp::tempfile("tredioXXXXXX",
632             DIR => File::Spec->tmpdir(),
633             SUFFIX => (_is_gzip($uri) ? ".gz" : ""),
634             UNLINK => 0,
635             );
636 0 0         print STDERR __PACKAGE__.": fetching URI $uri as proto $proto to $filename\n" if $Debug;
637 0 0         if ($proto=~m(^https?|ftp|gopher|news)) {
638 0           return _fetch_with_lwp($uri,$fh,$filename);
639             }
640 0           return($uri,0);
641             }
642              
643             sub _fetch_file_posix {
644 0     0     my ($uri,$proto)=@_;
645 0 0         print STDERR __PACKAGE__.": fetching file using protocol $proto ($uri)\n" if $Debug;
646 0 0         my ($fh,$tempfile) = File::Temp::tempfile("tredioXXXXXX",
647             DIR => File::Spec->tmpdir(),
648             SUFFIX => (_is_gzip($uri) ? ".gz" : ""),
649             UNLINK => 0,
650             );
651 0 0         print STDERR __PACKAGE__.": tempfile: $tempfile\n" if $Debug;
652 0 0         if ($proto=~m(^https?|ftp|gopher|news)) {
653 0           return _fetch_with_lwp($uri,$fh,$tempfile);
654             }
655 0           close($fh);
656 0 0 0       if ($ssh and -x $ssh and $proto =~ /^(ssh|fish|sftp)$/) {
      0        
657 0 0         print STDERR __PACKAGE__.": using plain ssh\n" if $Debug;
658 0 0         if ($uri =~ m{^\s*(?:ssh|sftp|fish):(?://)?([^-/][^/]*)(/.*)$}) {
659 0           my ($host,$file) = ($1,$2);
660 0 0         print STDERR __PACKAGE__.": tempfile: $tempfile\n" if $Debug;
661             return
662 0           _fetch_cmd($ssh." ".$ssh_opts." ".quote_filename($host).
663             " /bin/cat ".quote_filename(quote_filename($file)),$tempfile);
664             } else {
665 0           die "failed to parse URI for ssh $uri\n";
666             }
667             }
668 0 0 0       if ($kioclient and -x $kioclient) {
669 0 0         print STDERR __PACKAGE__.": using kioclient\n" if $Debug;
670             # translate ssh protocol to fish protocol
671 0 0         if ($proto eq 'ssh') {
672 0           ($uri =~ s{^\s*ssh:(?://)?([/:]*)[:/]}{fish://$1/});
673             }
674 0           return _fetch_cmd($kioclient." ".$kioclient_opts.
675             " cat ".quote_filename($uri),$tempfile);
676             }
677 0 0 0       if ($curl and -x $curl and $proto =~ /^(?:https?|ftps?|gopher)$/) {
      0        
678 0           return _fetch_cmd($curl." ".$curl_opts." ".quote_filename($uri),$tempfile);
679             }
680 0           warn "No handlers for protocol $proto\n";
681 0           return ($uri,0);
682             }
683              
684             sub _open_upload_pipe {
685 0     0     my ($need_gzip,$user_pipe,$upload_pipe)=@_;
686 0           my $fh;
687 0 0 0       $user_pipe="| ".$user_pipe if defined($user_pipe) and $user_pipe !~ /^\|/;
688 0           $user_pipe.=" ";
689 0           my $cmd;
690 0 0         if ($need_gzip) {
691 0 0         if (-x $gzip) {
692 0           $cmd = $user_pipe."| $gzip $gzip_opts | $upload_pipe ";
693             } else {
694 0           die "Need a functional gzip and zcat to open this file\n";
695             }
696             } else {
697 0           $cmd = $user_pipe."| $upload_pipe ";
698             }
699 0 0         print STDERR __PACKAGE__.": upload: $cmd\n" if $Debug;
700 0   0       open $fh, $cmd || undef $fh;
701 0           return $fh;
702             }
703              
704             sub _get_upload_fh_win32 {
705 0     0     my ($uri,$proto,$userpipe)=@_;
706 0           die "Can't save files using protocol $proto on Windows\n";
707             }
708              
709             sub _get_upload_fh_posix {
710 0     0     my ($uri,$proto,$userpipe)=@_;
711 0 0         print STDERR __PACKAGE__.": uploading file using protocol $proto ($uri)\n" if $Debug;
712 0 0 0       return if $proto eq 'http' or $proto eq 'https';
713 0 0 0       if ($ssh and -x $ssh and $proto =~ /^(ssh|fish|sftp)$/) {
      0        
714 0 0         print STDERR __PACKAGE__.": using plain ssh\n" if $Debug;
715 0 0         if ($uri =~ m{^\s*(?:ssh|sftp|fish):(?://)?([^-/][^/]*)(/.*)$}) {
716 0           my ($host,$file) = ($1,$2);
717 0           return _open_upload_pipe(_is_gzip($uri), $userpipe, "$ssh $ssh_opts ".
718             quote_filename($host)." /bin/cat \\> ".
719             quote_filename(quote_filename($file)));
720             } else {
721 0           die "failed to parse URI for ssh $uri\n";
722             }
723             }
724 0 0 0       if ($kioclient and -x $kioclient) {
725 0 0         print STDERR __PACKAGE__.": using kioclient\n" if $Debug;
726             # translate ssh protocol to fish protocol
727 0 0         if ($proto eq 'ssh') {
728 0           $uri =~ s{^\s*ssh:(?://)?([/:]*)[:/]}{fish://$1/};
729             }
730 0           return _open_upload_pipe(_is_gzip($uri),$userpipe,
731             "$kioclient $kioclient_opts put ".quote_filename($uri));
732             }
733 0 0 0       if ($curl and -x $curl and $proto =~ /^(?:ftps?)$/) {
      0        
734 0           return _open_upload_pipe("$curl --upload-file - $curl_opts ".quote_filename($uri));
735             }
736 0           die "No handlers for protocol $proto\n";
737             }
738              
739             =item get_store_fh ($uri, $command?)
740              
741             If $command is provided, returns a writable filehandle for a pipe to a given
742             command whose output is redirected to an uploader to the given $URI
743             (for file $URIs this simply redirects the output of the command to the
744             given file (gzipping the data first if the $URI ends with the '.gz' or
745             '.gz~' suffix).
746              
747             If $command is not given, simly retuns a writable file handle to a
748             given file (possibly performing gzip if the file name ends with the
749             '.gz' or '.gz~' suffix).
750              
751             =cut
752              
753             sub get_store_fh {
754 0     0 1   my ($uri,$user_pipe) = @_;
755 0           my $proto = get_protocol($uri);
756 0 0 0       if ($proto eq 'file') {
    0          
757 0           $uri = get_filename($uri);
758 0 0         if ($user_pipe) {
759 0           return open_pipe($uri,'w',$user_pipe);
760             } else {
761 0           return open_file($uri,'w');
762             }
763             } elsif ($proto eq 'ntred' or $proto =~ /$reject_proto/) {
764 0           return $uri;
765             } else {
766 0 0         if ($^O eq 'MSWin32') {
767 0           return _get_upload_fh_win32($uri,$proto,$user_pipe);
768             } else {
769 0           return _get_upload_fh_posix($uri,$proto,$user_pipe);
770             }
771             }
772             }
773              
774             =item unlink_uri($URI)
775              
776             Delete the resource point to by a given URI (if supported by the
777             corresponding protocol handler).
778              
779             =cut
780              
781             sub unlink_uri {
782 0 0   0 1   ($^O eq 'MSWin32') ? &_unlink_uri_win32 : &_unlink_uri_posix;
783             }
784              
785             sub _unlink_uri_win32 {
786 0     0     my ($uri) = @_;
787 0           my $proto = get_protocol($uri);
788 0 0         if ($proto eq 'file') {
789 0           unlink get_filename($uri);
790             } else {
791 0           die "Can't unlink file $uri\n";
792             }
793             }
794              
795             sub _unlink_uri_posix {
796 0     0     my ($uri)=@_;
797 0           my $proto = get_protocol($uri);
798 0 0         if ($proto eq 'file') {
799 0           return unlink get_filename($uri);
800             }
801 0 0         print STDERR __PACKAGE__.": unlinking file $uri using protocol $proto\n" if $Debug;
802 0 0 0       if ($ssh and -x $ssh and $proto =~ /^(ssh|fish|sftp)$/) {
      0        
803 0 0         print STDERR __PACKAGE__.": using plain ssh\n" if $Debug;
804 0 0         if ($uri =~ m{^\s*(?:ssh|sftp|fish):(?://)?([^-/][^/]*)(/.*)$}) {
805 0           my ($host,$file) = ($1,$2);
806 0 0         return (system("$ssh $ssh_opts ".quote_filename($host)." /bin/rm ".
807             quote_filename(quote_filename($file)))==0) ? 1 : 0;
808             } else {
809 0           die "failed to parse URI for ssh $uri\n";
810             }
811             }
812 0 0 0       if ($kioclient and -x $kioclient) {
813             # translate ssh protocol to fish protocol
814 0 0         if ($proto eq 'ssh') {
815 0           $uri =~ s{^\s*ssh:(?://)?([/:]*)[:/]}{fish://$1/};
816             }
817 0 0         return (system("$kioclient $kioclient_opts rm ".quote_filename($uri))==0 ? 1 : 0);
818             }
819 0           die "No handlers for protocol $proto\n";
820             }
821              
822             =item rename_uri($URI_1,$URI_2)
823              
824             Rename the resource point to by $URI_1 to $URI_2 (if supported by the
825             corresponding protocol handlers). The URIs must point to the same
826             physical storage.
827              
828             =cut
829              
830             sub rename_uri {
831 0 0   0 1   print STDERR __PACKAGE__.": rename @_\n" if $Debug;
832 0 0         ($^O eq 'MSWin32') ? &_rename_uri_win32 : &_rename_uri_posix;
833             }
834              
835              
836             sub _rename_uri_win32 {
837 0     0     my ($uri1,$uri2) = @_;
838 0           my $proto1 = get_protocol($uri1);
839 0           my $proto2 = get_protocol($uri2);
840 0 0 0       if ($proto1 eq 'file' and $proto2 eq 'file') {
841 0           my $uri1 = get_filename($uri1);
842 0 0         return unless -f $uri1;
843 0           rename $uri1, get_filename($uri2);
844             } else {
845 0           die "Can't rename file $uri1 to $uri2\n";
846             }
847             }
848              
849             sub _rename_uri_posix {
850 0     0     my ($uri1,$uri2) = @_;
851 0           my $proto = get_protocol($uri1);
852 0           my $proto2 = get_protocol($uri2);
853 0 0         if ($proto ne $proto2) {
854 0           die "Can't rename file $uri1 to $uri2\n";
855             }
856 0 0         if ($proto eq 'file') {
857 0           my $uri1 = get_filename($uri1);
858 0 0         return unless -f $uri1;
859 0           return rename $uri1, get_filename($uri2);
860             }
861 0 0         print STDERR __PACKAGE__.": rename file $uri1 to $uri2 using protocol $proto\n" if $Debug;
862 0 0 0       if ($ssh and -x $ssh and $proto =~ /^(ssh|fish|sftp)$/) {
      0        
863 0 0         print STDERR __PACKAGE__.": using plain ssh\n" if $Debug;
864 0 0         if ($uri1 =~ m{^\s*(?:ssh|sftp|fish):(?://)?([^-/][^/]*)(/.*)$}) {
865 0           my ($host,$file) = ($1,$2);
866 0 0 0       if ($uri2 =~ m{^\s*(?:ssh|sftp|fish):(?://)?([^-/][^/]*)(/.*)$} and $1 eq $host) {
867 0           my $file2 = $2;
868 0 0         return (system("$ssh $ssh_opts ".quote_filename($host)." /bin/mv ".
869             quote_filename(quote_filename($file))." ".
870             quote_filename(quote_filename($file2)))==0) ? 1 : 0;
871             } else {
872 0           die "failed to parse URI for ssh $uri2\n";
873             }
874             } else {
875 0           die "failed to parse URI for ssh $uri1\n";
876             }
877             }
878 0 0 0       if ($kioclient and -x $kioclient) {
879             # translate ssh protocol to fish protocol
880 0 0         if ($proto eq 'ssh') {
881 0           $uri1 =~ s{^\s*ssh:(?://)?([/:]*)[:/]}{fish://$1/};
882 0           $uri2 =~ s{^\s*ssh:(?://)?([/:]*)[:/]}{fish://$1/};
883             }
884 0 0         return (system("$kioclient $kioclient_opts mv ".quote_filename($uri1).
885             " ".quote_filename($uri2))==0 ? 1 : 0);
886             }
887 0           die "No handlers for protocol $proto\n";
888             }
889              
890              
891              
892             =item open_backend (filename,mode,encoding?)
893              
894             Open given file for reading or writing (depending on mode which may be
895             one of "r" or "w"); Return the corresponding object based on
896             File::Handle class. Only files the filename of which ends with '.gz'
897             are considered to be gz-commpressed. All other files are opened using
898             IO::File.
899              
900             Optionally, in perl ver. >= 5.8, you may also specify file character
901             encoding.
902              
903             =cut
904              
905              
906             sub open_backend {
907 0     0 1   my ($filename, $rw,$encoding)=@_;
908 0           $filename =~ s/^\s*|\s*$//g;
909 0 0         if ($rw eq 'r') {
    0          
910 0   0       return set_encoding(open_file($filename,$rw)||undef,$encoding);
911             } elsif ($rw eq 'w') {
912 0   0       return set_encoding(get_store_fh($filename)||undef,$encoding);
913             } else {
914 0           croak "2nd argument to open_backend must be 'r' or 'w'!";
915             }
916 0           return;
917             }
918              
919             =pod
920              
921             =item close_backend (filehandle)
922              
923             Close given filehandle opened by previous call to C
924              
925             =cut
926              
927             sub close_backend {
928 0     0 1   my ($fh)=@_;
929             # Win32 hack:
930 0 0         if (ref($fh) eq 'File::Temp') {
931 0           my $filename = ${*$fh}{'ZIPTOFILE'};
  0            
932 0 0         if ($filename ne "") {
933 0 0         print STDERR __PACKAGE__.": Doing the real save to $filename\n" if $Debug;
934 0           seek($fh,0,SEEK_SET);
935 0           require IO::Zlib;
936 0           my $tmp = new IO::Zlib();
937 0 0         $tmp->open($filename,"wb") || die "Cannot write to $filename: $!\n";
938             # probably bug in Perl 5.8.9? - using just :raw here is not enough
939 0           binmode $fh, ':raw:perlio:bytes';
940 0           local $/;
941 0           $tmp->print(<$fh>);
942 0           $tmp->close;
943             }
944             }
945 0           my $ret;
946 0 0 0       if ((blessed($fh) and $fh->isa('IO::Zlib'))) {
947 0           $ret = 1;
948             } else {
949 0   0       $ret = ref($fh) && $fh->close();
950             }
951 0           my $unlink = delete $UNLINK_ON_CLOSE{ $fh };
952 0 0         if ($unlink) {
953 0           unlink $unlink;
954             }
955 0           return $ret;
956             }
957              
958              
959             =item open_uri (URI,encoding?)
960              
961             Open given URL for reading, returning an object based on File::Handle
962             class. Since for some types of URLs this function first copies the
963             data into a temporary file, use close_uri($fh) on the resulting
964             filehandle to close it and clean up the temporary file.
965              
966             Optionally, in perl ver. >= 5.8, you may also specify file character
967             encoding.
968              
969             =cut
970              
971             sub open_uri {
972 0     0 1   my ($uri,$encoding) = @_;
973 0           my ($local_file, $is_temporary) = fetch_file( $uri );
974 0   0       my $fh = open_backend($local_file,'r') || return;
975 0 0 0       if ($is_temporary and $local_file ne $uri ) {
976 0 0         if (!unlink($local_file)) {
977 0           $UNLINK_ON_CLOSE{ $fh } = $local_file;
978             }
979             }
980 0           return set_encoding($fh,$encoding);
981             }
982              
983             *close_uri = \&close_backend;
984              
985             =item close_uri (filehandle)
986              
987             Close given filehandle opened by previous call to C.
988              
989             =cut
990              
991              
992             =item copy_uri ($URI_1,$URI_2)
993              
994             Copy the resource pointed to by the URI $URI_1 to $URI_2. The type of
995             $URI_2 must be writable.
996              
997             =cut
998              
999             sub copy_uri {
1000 0     0 1   my ($src_uri,$target_uri)=@_;
1001 0 0         my $in = open_uri($src_uri)
1002             or die "Cannot open source $src_uri: $!\n";
1003 0 0         my $out = open_backend($target_uri,'w')
1004             or die "Cannot open target $target_uri: $!\n";
1005 0           my $L=1024*100;
1006 0           my $buffer;
1007 0           while(read($in,$buffer,$L)>0) {
1008 0           print $out ($buffer);
1009             }
1010 0           close_backend($in);
1011 0           close_backend($out);
1012             }
1013              
1014             =back
1015              
1016             =head1 COPYRIGHT AND LICENSE
1017              
1018             Copyright (C) 2006-2010 by Petr Pajas
1019              
1020             This library is free software; you can redistribute it and/or modify
1021             it under the same terms as Perl itself, either Perl version 5.8.2 or,
1022             at your option, any later version of Perl 5 you may have available.
1023              
1024             =cut