File Coverage

lib/Archive/Extract.pm
Criterion Covered Total %
statement 376 525 71.6
branch 146 282 51.7
condition 37 64 57.8
subroutine 71 78 91.0
pod 18 23 78.2
total 648 972 66.6


line stmt bran cond sub pod time code
1             package Archive::Extract;
2 1     1   1424 use if $] > 5.017, 'deprecate';
  1         29  
  1         6  
3              
4 1     1   1136 use strict;
  1         3  
  1         22  
5              
6 1     1   5 use Cwd qw[cwd chdir];
  1         1  
  1         50  
7 1     1   5 use Carp qw[carp];
  1         1  
  1         45  
8 1     1   668 use IPC::Cmd qw[run can_run];
  1         31870  
  1         66  
9 1     1   8 use FileHandle;
  1         1  
  1         6  
10 1     1   285 use File::Path qw[mkpath];
  1         2  
  1         52  
11 1     1   4 use File::Spec;
  1         2  
  1         20  
12 1     1   4 use File::Basename qw[dirname basename];
  1         2  
  1         62  
13 1     1   5 use Params::Check qw[check];
  1         2  
  1         47  
14 1     1   5 use Module::Load::Conditional qw[can_load check_install];
  1         2  
  1         81  
15 1     1   16 use Locale::Maketext::Simple Style => 'gettext';
  1         2  
  1         5  
16              
17             ### solaris has silly /bin/tar output ###
18 1 50   1   240 use constant ON_SOLARIS => $^O eq 'solaris' ? 1 : 0;
  1         2  
  1         122  
19 1 50   1   6 use constant ON_NETBSD => $^O =~ m!^(netbsd|minix)$! ? 1 : 0;
  1         2  
  1         111  
20 1 50   1   7 use constant ON_OPENBSD => $^O =~ m!^(openbsd|bitrig)$! ? 1 : 0;
  1         1  
  1         105  
21 1 50   1   6 use constant ON_FREEBSD => $^O =~ m!^(free|midnight|dragonfly)(bsd)?$! ? 1 : 0;
  1         25  
  1         63  
22 1 50   1   6 use constant ON_LINUX => $^O eq 'linux' ? 1 : 0;
  1         1  
  1         82  
23 1 50   1   5 use constant FILE_EXISTS => sub { -e $_[0] ? 1 : 0 };
  1         2  
  1         66  
  68         9635  
24              
25             ### VMS may require quoting upper case command options
26 1 50   1   6 use constant ON_VMS => $^O eq 'VMS' ? 1 : 0;
  1         1  
  1         63  
27              
28             ### Windows needs special treatment of Tar options
29 1 50   1   5 use constant ON_WIN32 => $^O eq 'MSWin32' ? 1 : 0;
  1         2  
  1         57  
30              
31             ### we can't use this extraction method, because of missing
32             ### modules/binaries:
33 1     1   6 use constant METHOD_NA => [];
  1         1  
  1         64  
34              
35             ### If these are changed, update @TYPES and the new() POD
36 1     1   6 use constant TGZ => 'tgz';
  1         1  
  1         50  
37 1     1   5 use constant TAR => 'tar';
  1         2  
  1         39  
38 1     1   4 use constant GZ => 'gz';
  1         2  
  1         47  
39 1     1   6 use constant ZIP => 'zip';
  1         1  
  1         53  
40 1     1   5 use constant BZ2 => 'bz2';
  1         2  
  1         39  
41 1     1   5 use constant TBZ => 'tbz';
  1         1  
  1         48  
42 1     1   6 use constant Z => 'Z';
  1         1  
  1         51  
43 1     1   5 use constant LZMA => 'lzma';
  1         1  
  1         77  
44 1     1   7 use constant XZ => 'xz';
  1         1  
  1         52  
45 1     1   6 use constant TXZ => 'txz';
  1         1  
  1         45  
46              
47 1         537 use vars qw[$VERSION $PREFER_BIN $PROGRAMS $WARN $DEBUG
48             $_ALLOW_BIN $_ALLOW_PURE_PERL $_ALLOW_TAR_ITER
49 1     1   5 ];
  1         1  
50              
51             $VERSION = '0.86';
52             $PREFER_BIN = 0;
53             $WARN = 1;
54             $DEBUG = 0;
55             $_ALLOW_PURE_PERL = 1; # allow pure perl extractors
56             $_ALLOW_BIN = 1; # allow binary extractors
57             $_ALLOW_TAR_ITER = 1; # try to use Archive::Tar->iter if available
58              
59             # same as all constants
60             my @Types = ( TGZ, TAR, GZ, ZIP, BZ2, TBZ, Z, LZMA, XZ, TXZ );
61              
62             local $Params::Check::VERBOSE = $Params::Check::VERBOSE = 1;
63             local $Module::Load::Conditional::FORCE_SAFE_INC = 1;
64              
65             =pod
66              
67             =head1 NAME
68              
69             Archive::Extract - A generic archive extracting mechanism
70              
71             =head1 SYNOPSIS
72              
73             use Archive::Extract;
74              
75             ### build an Archive::Extract object ###
76             my $ae = Archive::Extract->new( archive => 'foo.tgz' );
77              
78             ### extract to cwd() ###
79             my $ok = $ae->extract;
80              
81             ### extract to /tmp ###
82             my $ok = $ae->extract( to => '/tmp' );
83              
84             ### what if something went wrong?
85             my $ok = $ae->extract or die $ae->error;
86              
87             ### files from the archive ###
88             my $files = $ae->files;
89              
90             ### dir that was extracted to ###
91             my $outdir = $ae->extract_path;
92              
93              
94             ### quick check methods ###
95             $ae->is_tar # is it a .tar file?
96             $ae->is_tgz # is it a .tar.gz or .tgz file?
97             $ae->is_gz; # is it a .gz file?
98             $ae->is_zip; # is it a .zip file?
99             $ae->is_bz2; # is it a .bz2 file?
100             $ae->is_tbz; # is it a .tar.bz2 or .tbz file?
101             $ae->is_lzma; # is it a .lzma file?
102             $ae->is_xz; # is it a .xz file?
103             $ae->is_txz; # is it a .tar.xz or .txz file?
104              
105             ### absolute path to the archive you provided ###
106             $ae->archive;
107              
108             ### commandline tools, if found ###
109             $ae->bin_tar # path to /bin/tar, if found
110             $ae->bin_gzip # path to /bin/gzip, if found
111             $ae->bin_unzip # path to /bin/unzip, if found
112             $ae->bin_bunzip2 # path to /bin/bunzip2 if found
113             $ae->bin_unlzma # path to /bin/unlzma if found
114             $ae->bin_unxz # path to /bin/unxz if found
115              
116             =head1 DESCRIPTION
117              
118             Archive::Extract is a generic archive extraction mechanism.
119              
120             It allows you to extract any archive file of the type .tar, .tar.gz,
121             .gz, .Z, tar.bz2, .tbz, .bz2, .zip, .xz,, .txz, .tar.xz or .lzma
122             without having to worry how it
123             does so, or use different interfaces for each type by using either
124             perl modules, or commandline tools on your system.
125              
126             See the C section further down for details.
127              
128             =cut
129              
130              
131             ### see what /bin/programs are available ###
132             $PROGRAMS = {};
133             CMD: for my $pgm (qw[tar unzip gzip bunzip2 uncompress unlzma unxz]) {
134             if ( $pgm eq 'unzip' and ON_FREEBSD and my $unzip = can_run('info-unzip') ) {
135             $PROGRAMS->{$pgm} = $unzip;
136             next CMD;
137             }
138             if ( $pgm eq 'unzip' and ( ON_FREEBSD || ON_LINUX ) ) {
139             local $IPC::Cmd::INSTANCES = 1;
140             ($PROGRAMS->{$pgm}) = grep { _is_infozip_esque($_) } can_run($pgm);
141             next CMD;
142             }
143             if ( $pgm eq 'unzip' and ON_NETBSD ) {
144             local $IPC::Cmd::INSTANCES = 1;
145             ($PROGRAMS->{$pgm}) = grep { m!/usr/pkg/! } can_run($pgm);
146             next CMD;
147             }
148             if ( $pgm eq 'tar' and ( ON_OPENBSD || ON_SOLARIS || ON_NETBSD ) ) {
149             # try gtar first
150             next CMD if $PROGRAMS->{$pgm} = can_run('gtar');
151             }
152             $PROGRAMS->{$pgm} = can_run($pgm);
153             }
154              
155             ### mapping from types to extractor methods ###
156             my $Mapping = { # binary program # pure perl module
157             is_tgz => { bin => '_untar_bin', pp => '_untar_at' },
158             is_tar => { bin => '_untar_bin', pp => '_untar_at' },
159             is_gz => { bin => '_gunzip_bin', pp => '_gunzip_cz' },
160             is_zip => { bin => '_unzip_bin', pp => '_unzip_az' },
161             is_tbz => { bin => '_untar_bin', pp => '_untar_at' },
162             is_bz2 => { bin => '_bunzip2_bin', pp => '_bunzip2_bz2'},
163             is_Z => { bin => '_uncompress_bin', pp => '_gunzip_cz' },
164             is_lzma => { bin => '_unlzma_bin', pp => '_unlzma_cz' },
165             is_xz => { bin => '_unxz_bin', pp => '_unxz_cz' },
166             is_txz => { bin => '_untar_bin', pp => '_untar_at' },
167             };
168              
169             { ### use subs so we re-generate array refs etc for the no-override flags
170             ### if we don't, then we reuse the same arrayref, meaning objects store
171             ### previous errors
172             my $tmpl = {
173             archive => sub { { required => 1, allow => FILE_EXISTS } },
174             type => sub { { default => '', allow => [ @Types ] } },
175             _error_msg => sub { { no_override => 1, default => [] } },
176             _error_msg_long => sub { { no_override => 1, default => [] } },
177             };
178              
179             ### build accessors ###
180             for my $method( keys %$tmpl,
181             qw[_extractor _gunzip_to files extract_path],
182             ) {
183 1     1   7 no strict 'refs';
  1         2  
  1         2940  
184             *$method = sub {
185 4237     4237   493282 my $self = shift;
186 4237 100       16020 $self->{$method} = $_[0] if @_;
187 4237         60832 return $self->{$method};
188             }
189             }
190              
191             =head1 METHODS
192              
193             =head2 $ae = Archive::Extract->new(archive => '/path/to/archive',[type => TYPE])
194              
195             Creates a new C object based on the archive file you
196             passed it. Automatically determines the type of archive based on the
197             extension, but you can override that by explicitly providing the
198             C argument.
199              
200             Valid values for C are:
201              
202             =over 4
203              
204             =item tar
205              
206             Standard tar files, as produced by, for example, C.
207             Corresponds to a C<.tar> suffix.
208              
209             =item tgz
210              
211             Gzip compressed tar files, as produced by, for example C.
212             Corresponds to a C<.tgz> or C<.tar.gz> suffix.
213              
214             =item gz
215              
216             Gzip compressed file, as produced by, for example C.
217             Corresponds to a C<.gz> suffix.
218              
219             =item Z
220              
221             Lempel-Ziv compressed file, as produced by, for example C.
222             Corresponds to a C<.Z> suffix.
223              
224             =item zip
225              
226             Zip compressed file, as produced by, for example C.
227             Corresponds to a C<.zip>, C<.jar> or C<.par> suffix.
228              
229             =item bz2
230              
231             Bzip2 compressed file, as produced by, for example, C.
232             Corresponds to a C<.bz2> suffix.
233              
234             =item tbz
235              
236             Bzip2 compressed tar file, as produced by, for example C.
237             Corresponds to a C<.tbz> or C<.tar.bz2> suffix.
238              
239             =item lzma
240              
241             Lzma compressed file, as produced by C.
242             Corresponds to a C<.lzma> suffix.
243              
244             =item xz
245              
246             Xz compressed file, as produced by C.
247             Corresponds to a C<.xz> suffix.
248              
249             =item txz
250              
251             Xz compressed tar file, as produced by, for example C.
252             Corresponds to a C<.txz> or C<.tar.xz> suffix.
253              
254             =back
255              
256             Returns a C object on success, or false on failure.
257              
258             =cut
259              
260             ### constructor ###
261             sub new {
262 68     68 1 126513 my $class = shift;
263 68         388 my %hash = @_;
264              
265             ### see above why we use subs here and generate the template;
266             ### it's basically to not re-use arrayrefs
267 68         439 my %utmpl = map { $_ => $tmpl->{$_}->() } keys %$tmpl;
  272         784  
268              
269 68 50       553 my $parsed = check( \%utmpl, \%hash ) or return;
270              
271             ### make sure we have an absolute path ###
272 68         3404 my $ar = $parsed->{archive} = File::Spec->rel2abs( $parsed->{archive} );
273              
274             ### figure out the type, if it wasn't already specified ###
275 68 100       240 unless ( $parsed->{type} ) {
276             $parsed->{type} =
277 58 100       4693 $ar =~ /.+?\.(?:tar\.gz|tgz)$/i ? TGZ :
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
278             $ar =~ /.+?\.gz$/i ? GZ :
279             $ar =~ /.+?\.tar$/i ? TAR :
280             $ar =~ /.+?\.(zip|jar|ear|war|par)$/i ? ZIP :
281             $ar =~ /.+?\.(?:tbz2?|tar\.bz2?)$/i ? TBZ :
282             $ar =~ /.+?\.bz2$/i ? BZ2 :
283             $ar =~ /.+?\.Z$/ ? Z :
284             $ar =~ /.+?\.lzma$/ ? LZMA :
285             $ar =~ /.+?\.(?:txz|tar\.xz)$/i ? TXZ :
286             $ar =~ /.+?\.xz$/ ? XZ :
287             '';
288              
289             }
290              
291 68         182 bless $parsed, $class;
292              
293             ### don't know what type of file it is
294             ### XXX this *has* to be an object call, not a package call
295             return $parsed->_error(loc("Cannot determine file type for '%1'",
296 68 100       182 $parsed->{archive} )) unless $parsed->{type};
297 67         423 return $parsed;
298             }
299             }
300              
301             =head2 $ae->extract( [to => '/output/path'] )
302              
303             Extracts the archive represented by the C object to
304             the path of your choice as specified by the C argument. Defaults to
305             C.
306              
307             Since C<.gz> files never hold a directory, but only a single file; if
308             the C argument is an existing directory, the file is extracted
309             there, with its C<.gz> suffix stripped.
310             If the C argument is not an existing directory, the C argument
311             is understood to be a filename, if the archive type is C.
312             In the case that you did not specify a C argument, the output
313             file will be the name of the archive file, stripped from its C<.gz>
314             suffix, in the current working directory.
315              
316             C will try a pure perl solution first, and then fall back to
317             commandline tools if they are available. See the C
318             section below on how to alter this behaviour.
319              
320             It will return true on success, and false on failure.
321              
322             On success, it will also set the follow attributes in the object:
323              
324             =over 4
325              
326             =item $ae->extract_path
327              
328             This is the directory that the files where extracted to.
329              
330             =item $ae->files
331              
332             This is an array ref with the paths of all the files in the archive,
333             relative to the C argument you specified.
334             To get the full path to an extracted file, you would use:
335              
336             File::Spec->catfile( $to, $ae->files->[0] );
337              
338             Note that all files from a tar archive will be in unix format, as per
339             the tar specification.
340              
341             =back
342              
343             =cut
344              
345             sub extract {
346 97     97 1 28429 my $self = shift;
347 97         316 my %hash = @_;
348              
349             ### reset error messages
350 97         383 $self->_error_msg( [] );
351 97         374 $self->_error_msg_long( [] );
352              
353 97         184 my $to;
354 97         418 my $tmpl = {
355             to => { default => '.', store => \$to }
356             };
357              
358 97 100       345 check( $tmpl, \%hash ) or return;
359              
360             ### so 'to' could be a file or a dir, depending on whether it's a .gz
361             ### file, or basically anything else.
362             ### so, check that, then act accordingly.
363             ### set an accessor specifically so _gunzip can know what file to extract
364             ### to.
365 96         7891 my $dir;
366             { ### a foo.gz file
367 96 100 100     198 if( $self->is_gz or $self->is_bz2 or $self->is_Z or $self->is_lzma or $self->is_xz ) {
  96   100     312  
      100        
      100        
368              
369 16         91 my $cp = $self->archive; $cp =~ s/\.(?:gz|bz2?|Z|lzma|xz)$//i;
  16         248  
370              
371             ### to is a dir?
372 16 50       340 if ( -d $to ) {
373 0         0 $dir = $to;
374 0         0 $self->_gunzip_to( basename($cp) );
375              
376             ### then it's a filename
377             } else {
378 16         1718 $dir = dirname($to);
379 16         630 $self->_gunzip_to( basename($to) );
380             }
381              
382             ### not a foo.gz file
383             } else {
384 80         199 $dir = $to;
385             }
386             }
387              
388             ### make the dir if it doesn't exist ###
389 96 100       1947 unless( -d $dir ) {
390 38         103 eval { mkpath( $dir ) };
  38         6349  
391              
392 38 50       195 return $self->_error(loc("Could not create path '%1': %2", $dir, $@))
393             if $@;
394             }
395              
396             ### get the current dir, to restore later ###
397 96         318895 my $cwd = cwd();
398              
399 96         2142 my $ok = 1;
400             EXTRACT: {
401              
402             ### chdir to the target dir ###
403 96 50       220 unless( chdir $dir ) {
  96         11362  
404 0         0 $self->_error(loc("Could not chdir to '%1': %2", $dir, $!));
405 0         0 $ok = 0; last EXTRACT;
  0         0  
406             }
407              
408             ### set files to an empty array ref, so there's always an array
409             ### ref IN the accessor, to avoid errors like:
410             ### Can't use an undefined value as an ARRAY reference at
411             ### ../lib/Archive/Extract.pm line 742. (rt #19815)
412 96         2755 $self->files( [] );
413              
414             ### find out the dispatch methods needed for this type of
415             ### archive. Do a $self->is_XXX to figure out the type, then
416             ### get the hashref with bin + pure perl dispatchers.
417 96         3395 my ($map) = map { $Mapping->{$_} } grep { $self->$_ } keys %$Mapping;
  96         910  
  960         6226  
418              
419             ### add pure perl extractor if allowed & add bin extractor if allowed
420 96         365 my @methods;
421 96 100       682 push @methods, $map->{'pp'} if $_ALLOW_PURE_PERL;
422 96 100       698 push @methods, $map->{'bin'} if $_ALLOW_BIN;
423              
424             ### reverse it if we prefer bin extractors
425 96 50       764 @methods = reverse @methods if $PREFER_BIN;
426              
427 96         271 my($na, $fail);
428 96         672 for my $method (@methods) {
429 96         2007 $self->debug( "# Extracting with ->$method\n" );
430              
431 96         1657 my $rv = $self->$method;
432              
433             ### a positive extraction
434 96 50 33     7786 if( $rv and $rv ne METHOD_NA ) {
    0 0        
435 96         1522 $self->debug( "# Extraction succeeded\n" );
436 96         1170 $self->_extractor($method);
437 96         755 last;
438              
439             ### method is not available
440             } elsif ( $rv and $rv eq METHOD_NA ) {
441 0         0 $self->debug( "# Extraction method not available\n" );
442 0         0 $na++;
443             } else {
444 0         0 $self->debug( "# Extraction method failed\n" );
445 0         0 $fail++;
446             }
447             }
448              
449             ### warn something went wrong if we didn't get an extractor
450 96 50       344 unless( $self->_extractor ) {
451 0 0       0 my $diag = $fail ? loc("Extract failed due to errors") :
    0          
452             $na ? loc("Extract failed; no extractors available") :
453             '';
454              
455 0         0 $self->_error($diag);
456 0         0 $ok = 0;
457             }
458             }
459              
460             ### and chdir back ###
461 96 50       7426 unless( chdir $cwd ) {
462 0         0 $self->_error(loc("Could not chdir back to start dir '%1': %2'",
463             $cwd, $!));
464             }
465              
466 96         3282 return $ok;
467             }
468              
469             =pod
470              
471             =head1 ACCESSORS
472              
473             =head2 $ae->error([BOOL])
474              
475             Returns the last encountered error as string.
476             Pass it a true value to get the C output instead.
477              
478             =head2 $ae->extract_path
479              
480             This is the directory the archive got extracted to.
481             See C for details.
482              
483             =head2 $ae->files
484              
485             This is an array ref holding all the paths from the archive.
486             See C for details.
487              
488             =head2 $ae->archive
489              
490             This is the full path to the archive file represented by this
491             C object.
492              
493             =head2 $ae->type
494              
495             This is the type of archive represented by this C
496             object. See accessors below for an easier way to use this.
497             See the C method for details.
498              
499             =head2 $ae->types
500              
501             Returns a list of all known C for C's
502             C method.
503              
504             =cut
505              
506 1     1 1 1104 sub types { return @Types }
507              
508             =head2 $ae->is_tgz
509              
510             Returns true if the file is of type C<.tar.gz>.
511             See the C method for details.
512              
513             =head2 $ae->is_tar
514              
515             Returns true if the file is of type C<.tar>.
516             See the C method for details.
517              
518             =head2 $ae->is_gz
519              
520             Returns true if the file is of type C<.gz>.
521             See the C method for details.
522              
523             =head2 $ae->is_Z
524              
525             Returns true if the file is of type C<.Z>.
526             See the C method for details.
527              
528             =head2 $ae->is_zip
529              
530             Returns true if the file is of type C<.zip>.
531             See the C method for details.
532              
533             =head2 $ae->is_lzma
534              
535             Returns true if the file is of type C<.lzma>.
536             See the C method for details.
537              
538             =head2 $ae->is_xz
539              
540             Returns true if the file is of type C<.xz>.
541             See the C method for details.
542              
543             =cut
544              
545             ### quick check methods ###
546 376     376 1 13700 sub is_tgz { return $_[0]->type eq TGZ }
547 160     160 1 7212 sub is_tar { return $_[0]->type eq TAR }
548 274     274 1 1962 sub is_gz { return $_[0]->type eq GZ }
549 118     118 1 10163 sub is_zip { return $_[0]->type eq ZIP }
550 288     288 0 8462 sub is_tbz { return $_[0]->type eq TBZ }
551 268     268 0 2477 sub is_bz2 { return $_[0]->type eq BZ2 }
552 262     262 1 1464 sub is_Z { return $_[0]->type eq Z }
553 256     256 1 2297 sub is_lzma { return $_[0]->type eq LZMA }
554 252     252 1 1998 sub is_xz { return $_[0]->type eq XZ }
555 272     272 0 27854 sub is_txz { return $_[0]->type eq TXZ }
556              
557             =pod
558              
559             =head2 $ae->bin_tar
560              
561             Returns the full path to your tar binary, if found.
562              
563             =head2 $ae->bin_gzip
564              
565             Returns the full path to your gzip binary, if found
566              
567             =head2 $ae->bin_unzip
568              
569             Returns the full path to your unzip binary, if found
570              
571             =head2 $ae->bin_unlzma
572              
573             Returns the full path to your unlzma binary, if found
574              
575             =head2 $ae->bin_unxz
576              
577             Returns the full path to your unxz binary, if found
578              
579             =cut
580              
581             ### paths to commandline tools ###
582 52 50   52 1 779 sub bin_gzip { return $PROGRAMS->{'gzip'} if $PROGRAMS->{'gzip'} }
583 0 0   0 1 0 sub bin_unzip { return $PROGRAMS->{'unzip'} if $PROGRAMS->{'unzip'} }
584 144 50   144 1 1476 sub bin_tar { return $PROGRAMS->{'tar'} if $PROGRAMS->{'tar'} }
585 34 50   34 0 541 sub bin_bunzip2 { return $PROGRAMS->{'bunzip2'} if $PROGRAMS->{'bunzip2'} }
586             sub bin_uncompress { return $PROGRAMS->{'uncompress'}
587 4 50   4 0 60 if $PROGRAMS->{'uncompress'} }
588 4 50   4 1 49 sub bin_unlzma { return $PROGRAMS->{'unlzma'} if $PROGRAMS->{'unlzma'} }
589 52 50   52 1 394 sub bin_unxz { return $PROGRAMS->{'unxz'} if $PROGRAMS->{'unxz'} }
590              
591             =head2 $bool = $ae->have_old_bunzip2
592              
593             Older versions of C, from before the C release,
594             require all archive names to end in C<.bz2> or it will not extract
595             them. This method checks if you have a recent version of C
596             that allows any extension, or an older one that doesn't.
597              
598             =cut
599              
600             sub have_old_bunzip2 {
601 3     3 1 4483 my $self = shift;
602              
603             ### no bunzip2? no old bunzip2 either :)
604 3 50       25 return unless $self->bin_bunzip2;
605              
606             ### if we can't run this, we can't be sure if it's too old or not
607             ### XXX stupid stupid stupid bunzip2 doesn't understand --version
608             ### is not a request to extract data:
609             ### $ bunzip2 --version
610             ### bzip2, a block-sorting file compressor. Version 1.0.2, 30-Dec-2001.
611             ### [...]
612             ### bunzip2: I won't read compressed data from a terminal.
613             ### bunzip2: For help, type: `bunzip2 --help'.
614             ### $ echo $?
615             ### 1
616             ### HATEFUL!
617              
618             ### double hateful: bunzip2 --version also hangs if input is a pipe
619             ### See #32370: Archive::Extract will hang if stdin is a pipe [+PATCH]
620             ### So, we have to provide *another* argument which is a fake filename,
621             ### just so it wont try to read from stdin to print its version..
622             ### *sigh*
623             ### Even if the file exists, it won't clobber or change it.
624 3         14 my $buffer;
625 3         15 scalar run(
626             command => [$self->bin_bunzip2, '--version', 'NoSuchFile'],
627             verbose => 0,
628             buffer => \$buffer
629             );
630              
631             ### no output
632 3 100       32387 return unless $buffer;
633              
634 2         44 my ($version) = $buffer =~ /version \s+ (\d+)/ix;
635              
636 2 50       25 return 1 if $version < 1;
637 2         54 return;
638             }
639              
640             #################################
641             #
642             # Untar code
643             #
644             #################################
645              
646             ### annoying issue with (gnu) tar on win32, as illustrated by this
647             ### bug: https://rt.cpan.org/Ticket/Display.html?id=40138
648             ### which shows that (gnu) tar will interpret a file name with a :
649             ### in it as a remote file name, so C:\tmp\foo.txt is interpreted
650             ### as a remote shell, and the extract fails.
651             { my @ExtraTarFlags;
652             if( ON_WIN32 and my $cmd = __PACKAGE__->bin_tar ) {
653             $cmd = $1 if $cmd =~ m{^(.+)}s; # Tainted perl #
654             ### if this is gnu tar we are running, we need to use --force-local
655             push @ExtraTarFlags, '--force-local' if `$cmd --version` =~ /gnu tar/i;
656             }
657              
658              
659             ### use /bin/tar to extract ###
660             sub _untar_bin {
661 48     48   123 my $self = shift;
662              
663             ### check for /bin/tar ###
664             ### check for /bin/gzip if we need it ###
665             ### if any of the binaries are not available, return NA
666 48 50 66     66 { my $diag = !$self->bin_tar ?
  48 50 66     179  
    50 66        
    50          
667             loc("No '%1' program found", '/bin/tar') :
668             $self->is_tgz && !$self->bin_gzip ?
669             loc("No '%1' program found", '/bin/gzip') :
670             $self->is_tbz && !$self->bin_bunzip2 ?
671             loc("No '%1' program found", '/bin/bunzip2') :
672             $self->is_txz && !$self->bin_unxz ?
673             loc("No '%1' program found", '/bin/unxz') :
674             '';
675              
676 48 50       242 if( $diag ) {
677 0         0 $self->_error( $diag );
678 0         0 return METHOD_NA;
679             }
680             }
681              
682             ### XXX figure out how to make IPC::Run do this in one call --
683             ### currently i don't know how to get output of a command after a pipe
684             ### trapped in a scalar. Mailed barries about this 5th of june 2004.
685              
686             ### see what command we should run, based on whether
687             ### it's a .tgz or .tar
688              
689             ### GNU tar can't handled VMS filespecs, but VMSTAR can handle Unix filespecs.
690 48         172 my $archive = $self->archive;
691 48         74 $archive = VMS::Filespec::unixify($archive) if ON_VMS;
692              
693             ### XXX solaris tar and bsdtar are having different outputs
694             ### depending whether you run with -x or -t
695             ### compensate for this insanity by running -t first, then -x
696 48 100       123 { my $cmd =
    100          
    100          
697             $self->is_tgz ? [$self->bin_gzip, '-c', '-d', '-f', $archive, '|',
698             $self->bin_tar, '-tf', '-'] :
699             $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
700             $self->bin_tar, '-tf', '-'] :
701             $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
702             $self->bin_tar, '-tf', '-'] :
703             [$self->bin_tar, @ExtraTarFlags, '-tf', $archive];
704              
705             ### run the command
706             ### newer versions of 'tar' (1.21 and up) now print record size
707             ### to STDERR as well if v OR t is given (used to be both). This
708             ### is a 'feature' according to the changelog, so we must now only
709             ### inspect STDOUT, otherwise, failures like these occur:
710             ### http://www.cpantesters.org/cpan/report/3230366
711 48         340 my $buffer = '';
712 48         1536 my @out = run( command => $cmd,
713             buffer => \$buffer,
714             verbose => $DEBUG );
715              
716             ### command was unsuccessful
717 48 50       393582 unless( $out[0] ) {
718 0         0 return $self->_error(loc(
719             "Error listing contents of archive '%1': %2",
720             $archive, $buffer ));
721             }
722              
723             ### no buffers available?
724 48 100 66     1452 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
725 24         1150 $self->_error( $self->_no_buffer_files( $archive ) );
726              
727             } else {
728             ### if we're on solaris we /might/ be using /bin/tar, which has
729             ### a weird output format... we might also be using
730             ### /usr/local/bin/tar, which is gnu tar, which is perfectly
731             ### fine... so we have to do some guessing here =/
732 38         75 my @files = map { chomp; s!\x0D!!g if ON_WIN32;
  38         49  
733 38         179 !ON_SOLARIS ? $_
734             : (m|^ x \s+ # 'xtract' -- sigh
735             (.+?), # the actual file name
736             \s+ [\d,.]+ \s bytes,
737             \s+ [\d,.]+ \s tape \s blocks
738             |x ? $1 : $_);
739              
740             ### only STDOUT, see above. Sometimes, extra whitespace
741             ### is present, so make sure we only pick lines with
742             ### a length
743 24         7405 } grep { length } map { split $/, $_ } join '', @{$out[3]};
  38         114  
  24         1292  
  24         97  
744              
745             ### store the files that are in the archive ###
746 24         565 $self->files(\@files);
747             }
748             }
749              
750             ### now actually extract it ###
751 48 100       68 { my $cmd =
  48 100       109  
  48 100       713  
752             $self->is_tgz ? [$self->bin_gzip, '-c', '-d', '-f', $archive, '|',
753             $self->bin_tar, '-xf', '-'] :
754             $self->is_tbz ? [$self->bin_bunzip2, '-cd', $archive, '|',
755             $self->bin_tar, '-xf', '-'] :
756             $self->is_txz ? [$self->bin_unxz, '-cd', $archive, '|',
757             $self->bin_tar, '-xf', '-'] :
758             [$self->bin_tar, @ExtraTarFlags, '-xf', $archive];
759              
760 48         231 my $buffer = '';
761 48 50       601 unless( scalar run( command => $cmd,
762             buffer => \$buffer,
763             verbose => $DEBUG )
764             ) {
765 0         0 return $self->_error(loc("Error extracting archive '%1': %2",
766             $archive, $buffer ));
767             }
768              
769             ### we might not have them, due to lack of buffers
770 48 50       366159 if( $self->files ) {
771             ### now that we've extracted, figure out where we extracted to
772 48         199 my $dir = $self->__get_extract_dir( $self->files );
773              
774             ### store the extraction dir ###
775 48         699 $self->extract_path( $dir );
776             }
777             }
778              
779             ### we got here, no error happened
780 48         519 return 1;
781             }
782             }
783              
784              
785             ### use archive::tar to extract ###
786             sub _untar_at {
787 32     32   346 my $self = shift;
788              
789             ### Loading Archive::Tar is going to set it to 1, so make it local
790             ### within this block, starting with its initial value. Whatever
791             ### Achive::Tar does will be undone when we return.
792             ###
793             ### Also, later, set $Archive::Tar::WARN to $Archive::Extract::WARN
794             ### so users don't have to even think about this variable. If they
795             ### do, they still get their set value outside of this call.
796 32         107 local $Archive::Tar::WARN = $Archive::Tar::WARN;
797              
798             ### we definitely need Archive::Tar, so load that first
799 32         61 { my $use_list = { 'Archive::Tar' => '0.0' };
  32         578  
800              
801 32 50       1163 unless( can_load( modules => $use_list ) ) {
802              
803 0         0 $self->_error(loc("You do not have '%1' installed - " .
804             "Please install it as soon as possible.",
805             'Archive::Tar'));
806              
807 0         0 return METHOD_NA;
808             }
809             }
810              
811             ### we might pass it a filehandle if it's a .tbz file..
812 32         73290 my $fh_to_read = $self->archive;
813              
814             ### we will need Compress::Zlib too, if it's a tgz... and IO::Zlib
815             ### if A::T's version is 0.99 or higher
816 32 100       275 if( $self->is_tgz ) {
    100          
    50          
817 16         290 my $use_list = { 'Compress::Zlib' => '0.0' };
818             {
819 16         93 local $@;
  16         52  
820             $use_list->{ 'IO::Zlib' } = '0.0'
821 16 50       59 if eval { Archive::Tar->VERSION('0.99'); 1 };
  16         840  
  16         507  
822             }
823              
824 16 50       141 unless( can_load( modules => $use_list ) ) {
825 0         0 my $which = join '/', sort keys %$use_list;
826              
827 0         0 $self->_error(loc(
828             "You do not have '%1' installed - Please ".
829             "install it as soon as possible.", $which)
830             );
831              
832 0         0 return METHOD_NA;
833             }
834              
835             } elsif ( $self->is_tbz ) {
836 8         425 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
837 8 50       61 unless( can_load( modules => $use_list ) ) {
838 0         0 $self->_error(loc(
839             "You do not have '%1' installed - Please " .
840             "install it as soon as possible.",
841             'IO::Uncompress::Bunzip2')
842             );
843              
844 0         0 return METHOD_NA;
845             }
846              
847 8 50       14553 my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
848             return $self->_error(loc("Unable to open '%1': %2",
849             $self->archive,
850             $IO::Uncompress::Bunzip2::Bunzip2Error));
851              
852 8         18266 $fh_to_read = $bz;
853             } elsif ( $self->is_txz ) {
854 0         0 my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
855 0 0       0 unless( can_load( modules => $use_list ) ) {
856 0         0 $self->_error(loc(
857             "You do not have '%1' installed - Please " .
858             "install it as soon as possible.",
859             'IO::Uncompress::UnXz')
860             );
861              
862 0         0 return METHOD_NA;
863             }
864              
865 0 0       0 my $xz = IO::Uncompress::UnXz->new( $self->archive ) or
866             return $self->_error(loc("Unable to open '%1': %2",
867             $self->archive,
868             $IO::Uncompress::UnXz::UnXzError));
869              
870 0         0 $fh_to_read = $xz;
871             }
872              
873 32         12093 my @files;
874             {
875             ### $Archive::Tar::WARN is 1 by default in Archive::Tar, but we've
876             ### localized $Archive::Tar::WARN already.
877 32         56 $Archive::Tar::WARN = $Archive::Extract::WARN;
  32         61  
878              
879             ### only tell it it's compressed if it's a .tgz, as we give it a file
880             ### handle if it's a .tbz
881 32 100       109 my @read = ( $fh_to_read, ( $self->is_tgz ? 1 : 0 ) );
882              
883             ### for version of Archive::Tar > 1.04
884 32         435 local $Archive::Tar::CHOWN = 0;
885              
886             ### use the iterator if we can. it's a feature of A::T 1.40 and up
887 32 100 66     1121 if ( $_ALLOW_TAR_ITER && Archive::Tar->can( 'iter' ) ) {
888              
889 16         178 my $next;
890 16 50       247 unless ( $next = Archive::Tar->iter( @read ) ) {
891 0         0 return $self->_error(loc(
892             "Unable to read '%1': %2", $self->archive,
893             $Archive::Tar::error));
894             }
895              
896 16         41532 while ( my $file = $next->() ) {
897 26         65321 push @files, $file->full_path;
898              
899 26 50       1108 $file->extract or return $self->_error(loc(
900             "Unable to read '%1': %2",
901             $self->archive,
902             $Archive::Tar::error));
903             }
904              
905             ### older version, read the archive into memory
906             } else {
907              
908 16         481 my $tar = Archive::Tar->new();
909              
910 16 50       668 unless( $tar->read( @read ) ) {
911 0         0 return $self->_error(loc("Unable to read '%1': %2",
912             $self->archive, $Archive::Tar::error));
913             }
914              
915             ### workaround to prevent Archive::Tar from setting uid, which
916             ### is a potential security hole. -autrijus
917             ### have to do it here, since A::T needs to be /loaded/ first ###
918 1     1   9 { no strict 'refs'; local $^W;
  1         1  
  1         3417  
  16         63  
919              
920             ### older versions of archive::tar <= 0.23
921 16     0   846 *Archive::Tar::chown = sub {};
922             }
923              
924 16         117910 { local $^W; # quell 'splice() offset past end of array' warnings
  16         63  
  16         114  
925             # on older versions of A::T
926              
927             ### older archive::tar always returns $self, return value
928             ### slightly fux0r3d because of it.
929 16 50       156 $tar->extract or return $self->_error(loc(
930             "Unable to extract '%1': %2",
931             $self->archive, $Archive::Tar::error ));
932             }
933              
934 16         78538 @files = $tar->list_files;
935             }
936             }
937              
938 32         138602 my $dir = $self->__get_extract_dir( \@files );
939              
940             ### store the files that are in the archive ###
941 32         754 $self->files(\@files);
942              
943             ### store the extraction dir ###
944 32         498 $self->extract_path( $dir );
945              
946             ### check if the dir actually appeared ###
947 32 50       95 return 1 if -d $self->extract_path;
948              
949             ### no dir, we failed ###
950 0         0 return $self->_error(loc("Unable to extract '%1': %2",
951             $self->archive, $Archive::Tar::error ));
952             }
953              
954             #################################
955             #
956             # Gunzip code
957             #
958             #################################
959              
960             sub _gunzip_bin {
961 2     2   23 my $self = shift;
962              
963             ### check for /bin/gzip -- we need it ###
964 2 50       18 unless( $self->bin_gzip ) {
965 0         0 $self->_error(loc("No '%1' program found", '/bin/gzip'));
966 0         0 return METHOD_NA;
967             }
968              
969 2 50       27 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
970             return $self->_error(loc("Could not open '%1' for writing: %2",
971             $self->_gunzip_to, $! ));
972              
973 2         514 my $cmd = [ $self->bin_gzip, '-c', '-d', '-f', $self->archive ];
974              
975 2         5 my $buffer;
976 2 50       59 unless( scalar run( command => $cmd,
977             verbose => $DEBUG,
978             buffer => \$buffer )
979             ) {
980 0         0 return $self->_error(loc("Unable to gunzip '%1': %2",
981             $self->archive, $buffer));
982             }
983              
984             ### no buffers available?
985 2 50 66     8563 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
986 1         69 $self->_error( $self->_no_buffer_content( $self->archive ) );
987             }
988              
989 2 50       306 $self->_print($fh, $buffer) if defined $buffer;
990              
991 2         37 close $fh;
992              
993             ### set what files where extract, and where they went ###
994 2         33 $self->files( [$self->_gunzip_to] );
995 2         5818 $self->extract_path( File::Spec->rel2abs(cwd()) );
996              
997 2         117 return 1;
998             }
999              
1000             sub _gunzip_cz {
1001 4     4   32 my $self = shift;
1002              
1003 4         68 my $use_list = { 'Compress::Zlib' => '0.0' };
1004 4 50       118 unless( can_load( modules => $use_list ) ) {
1005 0         0 $self->_error(loc("You do not have '%1' installed - Please " .
1006             "install it as soon as possible.", 'Compress::Zlib'));
1007 0         0 return METHOD_NA;
1008             }
1009              
1010 4 50       77305 my $gz = Compress::Zlib::gzopen( $self->archive, "rb" ) or
1011             return $self->_error(loc("Unable to open '%1': %2",
1012             $self->archive, $Compress::Zlib::gzerrno));
1013              
1014 4 50       11499 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1015             return $self->_error(loc("Could not open '%1' for writing: %2",
1016             $self->_gunzip_to, $! ));
1017              
1018 4         503 my $buffer;
1019 4         28 $self->_print($fh, $buffer) while $gz->gzread($buffer) > 0;
1020 4         1306 $fh->close;
1021              
1022             ### set what files where extract, and where they went ###
1023 4         164 $self->files( [$self->_gunzip_to] );
1024 4         13299 $self->extract_path( File::Spec->rel2abs(cwd()) );
1025              
1026 4         346 return 1;
1027             }
1028              
1029             #################################
1030             #
1031             # Uncompress code
1032             #
1033             #################################
1034              
1035             sub _uncompress_bin {
1036 2     2   21 my $self = shift;
1037              
1038             ### check for /bin/gzip -- we need it ###
1039 2 50       24 unless( $self->bin_uncompress ) {
1040 0         0 $self->_error(loc("No '%1' program found", '/bin/uncompress'));
1041 0         0 return METHOD_NA;
1042             }
1043              
1044 2 50       31 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1045             return $self->_error(loc("Could not open '%1' for writing: %2",
1046             $self->_gunzip_to, $! ));
1047              
1048 2         579 my $cmd = [ $self->bin_uncompress, '-c', $self->archive ];
1049              
1050 2         15 my $buffer;
1051 2 50       52 unless( scalar run( command => $cmd,
1052             verbose => $DEBUG,
1053             buffer => \$buffer )
1054             ) {
1055 0         0 return $self->_error(loc("Unable to uncompress '%1': %2",
1056             $self->archive, $buffer));
1057             }
1058              
1059             ### no buffers available?
1060 2 50 66     11432 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1061 1         86 $self->_error( $self->_no_buffer_content( $self->archive ) );
1062             }
1063              
1064 2 100       422 $self->_print($fh, $buffer) if defined $buffer;
1065              
1066 2         81 close $fh;
1067              
1068             ### set what files where extract, and where they went ###
1069 2         49 $self->files( [$self->_gunzip_to] );
1070 2         6155 $self->extract_path( File::Spec->rel2abs(cwd()) );
1071              
1072 2         127 return 1;
1073             }
1074              
1075              
1076             #################################
1077             #
1078             # Unzip code
1079             #
1080             #################################
1081              
1082              
1083             sub _unzip_bin {
1084 0     0   0 my $self = shift;
1085              
1086             ### check for /bin/gzip if we need it ###
1087 0 0       0 unless( $self->bin_unzip ) {
1088 0         0 $self->_error(loc("No '%1' program found", '/bin/unzip'));
1089 0         0 return METHOD_NA;
1090             }
1091              
1092             ### first, get the files.. it must be 2 different commands with 'unzip' :(
1093             { ### on VMS, capital letter options have to be quoted. This is
1094             ### reported by John Malmberg on P5P Tue 21 Aug 2007 05:05:11
1095             ### Subject: [patch@31735]Archive Extract fix on VMS.
1096 0         0 my $opt = ON_VMS ? '"-Z"' : '-Z';
1097 0         0 my $cmd = [ $self->bin_unzip, $opt, '-1', $self->archive ];
1098              
1099 0         0 my $buffer = '';
1100 0 0       0 unless( scalar run( command => $cmd,
1101             verbose => $DEBUG,
1102             buffer => \$buffer )
1103             ) {
1104 0         0 return $self->_error(loc("Unable to unzip '%1': %2",
1105             $self->archive, $buffer));
1106             }
1107              
1108             ### no buffers available?
1109 0 0 0     0 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1110 0         0 $self->_error( $self->_no_buffer_files( $self->archive ) );
1111              
1112             } else {
1113             ### Annoyingly, pesky MSWin32 can either have 'native' tools
1114             ### which have \r\n line endings or Cygwin-based tools which
1115             ### have \n line endings. Jan Dubois suggested using this fix
1116 0         0 my $split = ON_WIN32 ? qr/\r?\n/ : "\n";
1117 0         0 $self->files( [split $split, $buffer] );
1118             }
1119             }
1120              
1121             ### now, extract the archive ###
1122 0         0 { my $cmd = [ $self->bin_unzip, '-qq', '-o', $self->archive ];
  0         0  
  0         0  
1123              
1124 0         0 my $buffer;
1125 0 0       0 unless( scalar run( command => $cmd,
1126             verbose => $DEBUG,
1127             buffer => \$buffer )
1128             ) {
1129 0         0 return $self->_error(loc("Unable to unzip '%1': %2",
1130             $self->archive, $buffer));
1131             }
1132              
1133 0 0       0 if( scalar @{$self->files} ) {
  0         0  
1134 0         0 my $files = $self->files;
1135 0         0 my $dir = $self->__get_extract_dir( $files );
1136              
1137 0         0 $self->extract_path( $dir );
1138             }
1139             }
1140              
1141 0         0 return 1;
1142             }
1143              
1144             sub _unzip_az {
1145 0     0   0 my $self = shift;
1146              
1147 0         0 my $use_list = { 'Archive::Zip' => '0.0' };
1148 0 0       0 unless( can_load( modules => $use_list ) ) {
1149 0         0 $self->_error(loc("You do not have '%1' installed - Please " .
1150             "install it as soon as possible.", 'Archive::Zip'));
1151 0         0 return METHOD_NA;
1152             }
1153              
1154 0         0 my $zip = Archive::Zip->new();
1155              
1156 0 0       0 unless( $zip->read( $self->archive ) == &Archive::Zip::AZ_OK ) {
1157 0         0 return $self->_error(loc("Unable to read '%1'", $self->archive));
1158             }
1159              
1160 0         0 my @files;
1161              
1162              
1163             ### Address: #43278: Explicitly tell Archive::Zip where to put the files:
1164             ### "In my BackPAN indexing, Archive::Zip was extracting things
1165             ### in my script's directory instead of the current working directory.
1166             ### I traced this back through Archive::Zip::_asLocalName which
1167             ### eventually calls File::Spec::Win32::rel2abs which on Windows might
1168             ### call Cwd::getdcwd. getdcwd returns the wrong directory in my
1169             ### case, even though I think I'm on the same drive.
1170             ###
1171             ### To fix this, I pass the optional second argument to
1172             ### extractMember using the cwd from Archive::Extract." --bdfoy
1173              
1174             ## store cwd() before looping; calls to cwd() can be expensive, and
1175             ### it won't change during the loop
1176 0         0 my $extract_dir = cwd();
1177              
1178             ### have to extract every member individually ###
1179 0         0 for my $member ($zip->members) {
1180 0         0 push @files, $member->{fileName};
1181              
1182             ### file to extract to, to avoid the above problem
1183 0         0 my $to = File::Spec->catfile( $extract_dir, $member->{fileName} );
1184              
1185 0 0       0 unless( $zip->extractMember($member, $to) == &Archive::Zip::AZ_OK ) {
1186             return $self->_error(loc("Extraction of '%1' from '%2' failed",
1187 0         0 $member->{fileName}, $self->archive ));
1188             }
1189             }
1190              
1191 0         0 my $dir = $self->__get_extract_dir( \@files );
1192              
1193             ### set what files where extract, and where they went ###
1194 0         0 $self->files( \@files );
1195 0         0 $self->extract_path( File::Spec->rel2abs($dir) );
1196              
1197 0         0 return 1;
1198             }
1199              
1200             sub __get_extract_dir {
1201 82     82   2106 my $self = shift;
1202 82   50     982 my $files = shift || [];
1203              
1204 82 100       744 return unless scalar @$files;
1205              
1206 58         168 my($dir1, $dir2);
1207 58         510 for my $aref ( [ \$dir1, 0 ], [ \$dir2, -1 ] ) {
1208 116         590 my($dir,$pos) = @$aref;
1209              
1210             ### add a catdir(), so that any trailing slashes get
1211             ### take care of (removed)
1212             ### also, a catdir() normalises './dir/foo' to 'dir/foo';
1213             ### which was the problem in bug #23999
1214 116 100       10366 my $res = -d $files->[$pos]
1215             ? File::Spec->catdir( $files->[$pos], '' )
1216             : File::Spec->catdir( dirname( $files->[$pos] ) );
1217              
1218 116         897 $$dir = $res;
1219             }
1220              
1221             ### if the first and last dir don't match, make sure the
1222             ### dirname is not set wrongly
1223 58         163 my $dir;
1224              
1225             ### dirs are the same, so we know for sure what the extract dir is
1226 58 50       222 if( $dir1 eq $dir2 ) {
1227 58         436 $dir = $dir1;
1228              
1229             ### dirs are different.. do they share the base dir?
1230             ### if so, use that, if not, fall back to '.'
1231             } else {
1232 0         0 my $base1 = [ File::Spec->splitdir( $dir1 ) ]->[0];
1233 0         0 my $base2 = [ File::Spec->splitdir( $dir2 ) ]->[0];
1234              
1235 0 0       0 $dir = File::Spec->rel2abs( $base1 eq $base2 ? $base1 : '.' );
1236             }
1237              
1238 58         3754 return File::Spec->rel2abs( $dir );
1239             }
1240              
1241             #################################
1242             #
1243             # Bunzip2 code
1244             #
1245             #################################
1246              
1247             sub _bunzip2_bin {
1248 2     2   23 my $self = shift;
1249              
1250             ### check for /bin/gzip -- we need it ###
1251 2 50       25 unless( $self->bin_bunzip2 ) {
1252 0         0 $self->_error(loc("No '%1' program found", '/bin/bunzip2'));
1253 0         0 return METHOD_NA;
1254             }
1255              
1256 2 50       30 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1257             return $self->_error(loc("Could not open '%1' for writing: %2",
1258             $self->_gunzip_to, $! ));
1259              
1260             ### guard against broken bunzip2. See ->have_old_bunzip2()
1261             ### for details
1262 2 50 33     530 if( $self->have_old_bunzip2 and $self->archive !~ /\.bz2$/i ) {
1263 0         0 return $self->_error(loc("Your bunzip2 version is too old and ".
1264             "can only extract files ending in '%1'",
1265             '.bz2'));
1266             }
1267              
1268 2         43 my $cmd = [ $self->bin_bunzip2, '-cd', $self->archive ];
1269              
1270 2         10 my $buffer;
1271 2 50       34 unless( scalar run( command => $cmd,
1272             verbose => $DEBUG,
1273             buffer => \$buffer )
1274             ) {
1275 0         0 return $self->_error(loc("Unable to bunzip2 '%1': %2",
1276             $self->archive, $buffer));
1277             }
1278              
1279             ### no buffers available?
1280 2 50 66     9150 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1281 1         74 $self->_error( $self->_no_buffer_content( $self->archive ) );
1282             }
1283              
1284 2 50       315 $self->_print($fh, $buffer) if defined $buffer;
1285              
1286 2         39 close $fh;
1287              
1288             ### set what files where extract, and where they went ###
1289 2         45 $self->files( [$self->_gunzip_to] );
1290 2         6011 $self->extract_path( File::Spec->rel2abs(cwd()) );
1291              
1292 2         115 return 1;
1293             }
1294              
1295             ### using cz2, the compact versions... this we use mainly in archive::tar
1296             ### extractor..
1297             # sub _bunzip2_cz1 {
1298             # my $self = shift;
1299             #
1300             # my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1301             # unless( can_load( modules => $use_list ) ) {
1302             # return $self->_error(loc("You do not have '%1' installed - Please " .
1303             # "install it as soon as possible.",
1304             # 'IO::Uncompress::Bunzip2'));
1305             # }
1306             #
1307             # my $bz = IO::Uncompress::Bunzip2->new( $self->archive ) or
1308             # return $self->_error(loc("Unable to open '%1': %2",
1309             # $self->archive,
1310             # $IO::Uncompress::Bunzip2::Bunzip2Error));
1311             #
1312             # my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1313             # return $self->_error(loc("Could not open '%1' for writing: %2",
1314             # $self->_gunzip_to, $! ));
1315             #
1316             # my $buffer;
1317             # $fh->print($buffer) while $bz->read($buffer) > 0;
1318             # $fh->close;
1319             #
1320             # ### set what files where extract, and where they went ###
1321             # $self->files( [$self->_gunzip_to] );
1322             # $self->extract_path( File::Spec->rel2abs(cwd()) );
1323             #
1324             # return 1;
1325             # }
1326              
1327             sub _bunzip2_bz2 {
1328 2     2   30 my $self = shift;
1329              
1330 2         59 my $use_list = { 'IO::Uncompress::Bunzip2' => '0.0' };
1331 2 50       67 unless( can_load( modules => $use_list ) ) {
1332 0         0 $self->_error(loc("You do not have '%1' installed - Please " .
1333             "install it as soon as possible.",
1334             'IO::Uncompress::Bunzip2'));
1335 0         0 return METHOD_NA;
1336             }
1337              
1338 2 50       697 IO::Uncompress::Bunzip2::bunzip2($self->archive => $self->_gunzip_to)
1339             or return $self->_error(loc("Unable to uncompress '%1': %2",
1340             $self->archive,
1341             $IO::Uncompress::Bunzip2::Bunzip2Error));
1342              
1343             ### set what files where extract, and where they went ###
1344 2         7223 $self->files( [$self->_gunzip_to] );
1345 2         7319 $self->extract_path( File::Spec->rel2abs(cwd()) );
1346              
1347 2         55 return 1;
1348             }
1349              
1350             #################################
1351             #
1352             # UnXz code
1353             #
1354             #################################
1355              
1356             sub _unxz_bin {
1357 2     2   23 my $self = shift;
1358              
1359             ### check for /bin/unxz -- we need it ###
1360 2 50       21 unless( $self->bin_unxz ) {
1361 0         0 $self->_error(loc("No '%1' program found", '/bin/unxz'));
1362 0         0 return METHOD_NA;
1363             }
1364              
1365 2 50       45 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1366             return $self->_error(loc("Could not open '%1' for writing: %2",
1367             $self->_gunzip_to, $! ));
1368              
1369 2         594 my $cmd = [ $self->bin_unxz, '-c', '-d', '-f', $self->archive ];
1370              
1371 2         7 my $buffer;
1372 2 50       59 unless( scalar run( command => $cmd,
1373             verbose => $DEBUG,
1374             buffer => \$buffer )
1375             ) {
1376 0         0 return $self->_error(loc("Unable to unxz '%1': %2",
1377             $self->archive, $buffer));
1378             }
1379              
1380             ### no buffers available?
1381 2 50 66     10864 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1382 1         85 $self->_error( $self->_no_buffer_content( $self->archive ) );
1383             }
1384              
1385 2 50       321 $self->_print($fh, $buffer) if defined $buffer;
1386              
1387 2         36 close $fh;
1388              
1389             ### set what files where extract, and where they went ###
1390 2         51 $self->files( [$self->_gunzip_to] );
1391 2         7153 $self->extract_path( File::Spec->rel2abs(cwd()) );
1392              
1393 2         114 return 1;
1394             }
1395              
1396             sub _unxz_cz {
1397 0     0   0 my $self = shift;
1398              
1399 0         0 my $use_list = { 'IO::Uncompress::UnXz' => '0.0' };
1400 0 0       0 unless( can_load( modules => $use_list ) ) {
1401 0         0 $self->_error(loc("You do not have '%1' installed - Please " .
1402             "install it as soon as possible.",
1403             'IO::Uncompress::UnXz'));
1404 0         0 return METHOD_NA;
1405             }
1406              
1407 0 0       0 IO::Uncompress::UnXz::unxz($self->archive => $self->_gunzip_to)
1408             or return $self->_error(loc("Unable to uncompress '%1': %2",
1409             $self->archive,
1410             $IO::Uncompress::UnXz::UnXzError));
1411              
1412             ### set what files where extract, and where they went ###
1413 0         0 $self->files( [$self->_gunzip_to] );
1414 0         0 $self->extract_path( File::Spec->rel2abs(cwd()) );
1415              
1416 0         0 return 1;
1417             }
1418              
1419              
1420             #################################
1421             #
1422             # unlzma code
1423             #
1424             #################################
1425              
1426             sub _unlzma_bin {
1427 2     2   18 my $self = shift;
1428              
1429             ### check for /bin/unlzma -- we need it ###
1430 2 50       24 unless( $self->bin_unlzma ) {
1431 0         0 $self->_error(loc("No '%1' program found", '/bin/unlzma'));
1432 0         0 return METHOD_NA;
1433             }
1434              
1435 2 50       19 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1436             return $self->_error(loc("Could not open '%1' for writing: %2",
1437             $self->_gunzip_to, $! ));
1438              
1439 2         656 my $cmd = [ $self->bin_unlzma, '-c', $self->archive ];
1440              
1441 2         5 my $buffer;
1442 2 50       53 unless( scalar run( command => $cmd,
1443             verbose => $DEBUG,
1444             buffer => \$buffer )
1445             ) {
1446 0         0 return $self->_error(loc("Unable to unlzma '%1': %2",
1447             $self->archive, $buffer));
1448             }
1449              
1450             ### no buffers available?
1451 2 50 66     9695 if( !IPC::Cmd->can_capture_buffer and !$buffer ) {
1452 1         84 $self->_error( $self->_no_buffer_content( $self->archive ) );
1453             }
1454              
1455 2 50       299 $self->_print($fh, $buffer) if defined $buffer;
1456              
1457 2         31 close $fh;
1458              
1459             ### set what files where extract, and where they went ###
1460 2         44 $self->files( [$self->_gunzip_to] );
1461 2         5701 $self->extract_path( File::Spec->rel2abs(cwd()) );
1462              
1463 2         123 return 1;
1464             }
1465              
1466             sub _unlzma_cz {
1467 0     0   0 my $self = shift;
1468              
1469 0         0 my $use_list1 = { 'IO::Uncompress::UnLzma' => '0.0' };
1470 0         0 my $use_list2 = { 'Compress::unLZMA' => '0.0' };
1471              
1472 0 0       0 if (can_load( modules => $use_list1 ) ) {
    0          
1473 0 0       0 IO::Uncompress::UnLzma::unlzma($self->archive => $self->_gunzip_to)
1474             or return $self->_error(loc("Unable to uncompress '%1': %2",
1475             $self->archive,
1476             $IO::Uncompress::UnLzma::UnLzmaError));
1477             }
1478             elsif (can_load( modules => $use_list2 ) ) {
1479              
1480 0 0       0 my $fh = FileHandle->new('>'. $self->_gunzip_to) or
1481             return $self->_error(loc("Could not open '%1' for writing: %2",
1482             $self->_gunzip_to, $! ));
1483              
1484 0         0 my $buffer;
1485 0         0 $buffer = Compress::unLZMA::uncompressfile( $self->archive );
1486 0 0       0 unless ( defined $buffer ) {
1487 0         0 return $self->_error(loc("Could not unlzma '%1': %2",
1488             $self->archive, $@));
1489             }
1490              
1491 0 0       0 $self->_print($fh, $buffer) if defined $buffer;
1492              
1493 0         0 close $fh;
1494             }
1495             else {
1496 0         0 $self->_error(loc("You do not have '%1' or '%2' installed - Please " .
1497             "install it as soon as possible.", 'Compress::unLZMA', 'IO::Uncompress::UnLzma'));
1498 0         0 return METHOD_NA;
1499             }
1500              
1501             ### set what files where extract, and where they went ###
1502 0         0 $self->files( [$self->_gunzip_to] );
1503 0         0 $self->extract_path( File::Spec->rel2abs(cwd()) );
1504              
1505 0         0 return 1;
1506             }
1507              
1508             #####################################
1509             #
1510             # unzip heuristics for FreeBSD-alikes
1511             #
1512             #####################################
1513              
1514             sub _is_infozip_esque {
1515 0     0   0 my $unzip = shift;
1516              
1517 0         0 my @strings;
1518 0         0 my $buf = '';
1519              
1520             {
1521 0 0       0 open my $file, '<', $unzip or die "$!\n";
  0         0  
1522 0         0 binmode $file;
1523 0         0 local $/ = \1;
1524 0         0 local $_;
1525 0         0 while(<$file>) {
1526 0 0       0 if ( m![[:print:]]! ) {
1527 0         0 $buf .= $_;
1528 0         0 next;
1529             }
1530 0 0 0     0 if ( $buf and m![^[:print:]]! ) {
1531 0 0       0 push @strings, $buf if length $buf >= 4;
1532 0         0 $buf = '';
1533 0         0 next;
1534             }
1535             }
1536             }
1537 0 0       0 push @strings, $buf if $buf;
1538 0         0 foreach my $part ( @strings ) {
1539 0 0 0     0 if ( $part =~ m!ZIPINFO! or $part =~ m!usage:.+?Z1! ) {
1540 0         0 return $unzip;
1541             }
1542             }
1543 0         0 return;
1544             }
1545              
1546             #################################
1547             #
1548             # Error code
1549             #
1550             #################################
1551              
1552             # For printing binaries that avoids interfering globals
1553             sub _print {
1554 3     3   1078 my $self = shift;
1555 3         29 my $fh = shift;
1556              
1557 3         101 local( $\, $", $, ) = ( undef, ' ', '' );
1558 3         67 return print $fh @_;
1559             }
1560              
1561             sub _error {
1562 35     35   2242 my $self = shift;
1563 35         63 my $error = shift;
1564 35         10592 my $lerror = Carp::longmess($error);
1565              
1566 35         7787 push @{$self->_error_msg}, $error;
  35         410  
1567 35         69 push @{$self->_error_msg_long}, $lerror;
  35         108  
1568              
1569             ### set $Archive::Extract::WARN to 0 to disable printing
1570             ### of errors
1571 35 100       112 if( $WARN ) {
1572 1 50       174 carp $DEBUG ? $lerror : $error;
1573             }
1574              
1575 35         226 return;
1576             }
1577              
1578             sub error {
1579 110     110 1 7100 my $self = shift;
1580              
1581             ### make sure we have a fallback aref
1582 110   100     877 my $aref = do {
1583             shift()
1584             ? $self->_error_msg_long
1585             : $self->_error_msg
1586             } || [];
1587              
1588 110         783 return join $/, @$aref;
1589             }
1590              
1591             =head2 debug( MESSAGE )
1592              
1593             This method outputs MESSAGE to the default filehandle if C<$DEBUG> is
1594             true. It's a small method, but it's here if you'd like to subclass it
1595             so you can so something else with any debugging output.
1596              
1597             =cut
1598              
1599             ### this is really a stub for subclassing
1600             sub debug {
1601 192 50   192 1 1171 return unless $DEBUG;
1602              
1603 0         0 print $_[1];
1604             }
1605              
1606             sub _no_buffer_files {
1607 24     24   102 my $self = shift;
1608 24 50       97 my $file = shift or return;
1609 24         543 return loc("No buffer captured, unable to tell ".
1610             "extracted files or extraction dir for '%1'", $file);
1611             }
1612              
1613             sub _no_buffer_content {
1614 5     5   25 my $self = shift;
1615 5 50       46 my $file = shift or return;
1616 5         124 return loc("No buffer captured, unable to get content for '%1'", $file);
1617             }
1618             1;
1619              
1620             =pod
1621              
1622             =head1 HOW IT WORKS
1623              
1624             C tries first to determine what type of archive you
1625             are passing it, by inspecting its suffix. It does not do this by using
1626             Mime magic, or something related. See C below.
1627              
1628             Once it has determined the file type, it knows which extraction methods
1629             it can use on the archive. It will try a perl solution first, then fall
1630             back to a commandline tool if that fails. If that also fails, it will
1631             return false, indicating it was unable to extract the archive.
1632             See the section on C to see how to alter this order.
1633              
1634             =head1 CAVEATS
1635              
1636             =head2 File Extensions
1637              
1638             C trusts on the extension of the archive to determine
1639             what type it is, and what extractor methods therefore can be used. If
1640             your archives do not have any of the extensions as described in the
1641             C method, you will have to specify the type explicitly, or
1642             C will not be able to extract the archive for you.
1643              
1644             =head2 Supporting Very Large Files
1645              
1646             C can use either pure perl modules or command line
1647             programs under the hood. Some of the pure perl modules (like
1648             C and Compress::unLZMA) take the entire contents of the archive into memory,
1649             which may not be feasible on your system. Consider setting the global
1650             variable C<$Archive::Extract::PREFER_BIN> to C<1>, which will prefer
1651             the use of command line programs and won't consume so much memory.
1652              
1653             See the C section below for details.
1654              
1655             =head2 Bunzip2 support of arbitrary extensions.
1656              
1657             Older versions of C do not support arbitrary file
1658             extensions and insist on a C<.bz2> suffix. Although we do our best
1659             to guard against this, if you experience a bunzip2 error, it may
1660             be related to this. For details, please see the C
1661             method.
1662              
1663             =head1 GLOBAL VARIABLES
1664              
1665             =head2 $Archive::Extract::DEBUG
1666              
1667             Set this variable to C to have all calls to command line tools
1668             be printed out, including all their output.
1669             This also enables C errors, instead of the regular
1670             C errors.
1671              
1672             Good for tracking down why things don't work with your particular
1673             setup.
1674              
1675             Defaults to C.
1676              
1677             =head2 $Archive::Extract::WARN
1678              
1679             This variable controls whether errors encountered internally by
1680             C should be C'd or not.
1681              
1682             Set to false to silence warnings. Inspect the output of the C
1683             method manually to see what went wrong.
1684              
1685             Defaults to C.
1686              
1687             =head2 $Archive::Extract::PREFER_BIN
1688              
1689             This variables controls whether C should prefer the
1690             use of perl modules, or commandline tools to extract archives.
1691              
1692             Set to C to have C prefer commandline tools.
1693              
1694             Defaults to C.
1695              
1696             =head1 TODO / CAVEATS
1697              
1698             =over 4
1699              
1700             =item Mime magic support
1701              
1702             Maybe this module should use something like C to determine
1703             the type, rather than blindly trust the suffix.
1704              
1705             =item Thread safety
1706              
1707             Currently, C does a C to the extraction dir before
1708             extraction, and a C back again after. This is not necessarily
1709             thread safe. See C bug C<#45671> for details.
1710              
1711             =back
1712              
1713             =head1 BUG REPORTS
1714              
1715             Please report bugs or other issues to Ebug-archive-extract@rt.cpan.orgE.
1716              
1717             =head1 AUTHOR
1718              
1719             This module by Jos Boumans Ekane@cpan.orgE.
1720              
1721             =head1 COPYRIGHT
1722              
1723             This library is free software; you may redistribute and/or modify it
1724             under the same terms as Perl itself.
1725              
1726             =cut
1727              
1728             # Local variables:
1729             # c-indentation-style: bsd
1730             # c-basic-offset: 4
1731             # indent-tabs-mode: nil
1732             # End:
1733             # vim: expandtab shiftwidth=4:
1734