File Coverage

lib/Archive/Tar/File.pm
Criterion Covered Total %
statement 185 196 94.3
branch 49 80 61.2
condition 20 31 64.5
subroutine 39 39 100.0
pod 22 23 95.6
total 315 369 85.3


line stmt bran cond sub pod time code
1             package Archive::Tar::File;
2 9     9   72866 use strict;
  9         28  
  9         318  
3              
4 9     9   55 use Carp ();
  9         45  
  9         192  
5 9     9   619 use IO::File;
  9         9159  
  9         1282  
6 9     9   158 use File::Spec::Unix ();
  9         28  
  9         242  
7 9     9   54 use File::Spec ();
  9         31  
  9         187  
8 9     9   109 use File::Basename ();
  9         16  
  9         246  
9              
10 9     9   3335 use Archive::Tar::Constant;
  9         29  
  9         2302  
11              
12 9     9   85 use vars qw[@ISA $VERSION];
  9         26  
  9         1310  
13             #@ISA = qw[Archive::Tar];
14             $VERSION = '2.40';
15              
16             ### set value to 1 to oct() it during the unpack ###
17              
18             my $tmpl = [
19             name => 0, # string A100
20             mode => 1, # octal A8
21             uid => 1, # octal A8
22             gid => 1, # octal A8
23             size => 0, # octal # cdrake - not *always* octal.. A12
24             mtime => 1, # octal A12
25             chksum => 1, # octal A8
26             type => 0, # character A1
27             linkname => 0, # string A100
28             magic => 0, # string A6
29             version => 0, # 2 bytes A2
30             uname => 0, # string A32
31             gname => 0, # string A32
32             devmajor => 1, # octal A8
33             devminor => 1, # octal A8
34             prefix => 0, # A155 x 12
35              
36             ### end UNPACK items ###
37             raw => 0, # the raw data chunk
38             data => 0, # the data associated with the file --
39             # This might be very memory intensive
40             ];
41              
42             ### install get/set accessors for this object.
43             for ( my $i=0; $i
44             my $key = $tmpl->[$i];
45 9     9   81 no strict 'refs';
  9         26  
  9         24057  
46             *{__PACKAGE__."::$key"} = sub {
47 12437     12437   45541 my $self = shift;
48 12437 100       19247 $self->{$key} = $_[0] if @_;
49              
50             ### just in case the key is not there or undef or something ###
51 12437         12789 { local $^W = 0;
  12437         23485  
52 12437         73606 return $self->{$key};
53             }
54             }
55             }
56              
57             =head1 NAME
58              
59             Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
60              
61             =head1 SYNOPSIS
62              
63             my @items = $tar->get_files;
64              
65             print $_->name, ' ', $_->size, "\n" for @items;
66              
67             print $object->get_content;
68             $object->replace_content('new content');
69              
70             $object->rename( 'new/full/path/to/file.c' );
71              
72             =head1 DESCRIPTION
73              
74             Archive::Tar::Files provides a neat little object layer for in-memory
75             extracted files. It's mostly used internally in Archive::Tar to tidy
76             up the code, but there's no reason users shouldn't use this API as
77             well.
78              
79             =head2 Accessors
80              
81             A lot of the methods in this package are accessors to the various
82             fields in the tar header:
83              
84             =over 4
85              
86             =item name
87              
88             The file's name
89              
90             =item mode
91              
92             The file's mode
93              
94             =item uid
95              
96             The user id owning the file
97              
98             =item gid
99              
100             The group id owning the file
101              
102             =item size
103              
104             File size in bytes
105              
106             =item mtime
107              
108             Modification time. Adjusted to mac-time on MacOS if required
109              
110             =item chksum
111              
112             Checksum field for the tar header
113              
114             =item type
115              
116             File type -- numeric, but comparable to exported constants -- see
117             Archive::Tar's documentation
118              
119             =item linkname
120              
121             If the file is a symlink, the file it's pointing to
122              
123             =item magic
124              
125             Tar magic string -- not useful for most users
126              
127             =item version
128              
129             Tar version string -- not useful for most users
130              
131             =item uname
132              
133             The user name that owns the file
134              
135             =item gname
136              
137             The group name that owns the file
138              
139             =item devmajor
140              
141             Device major number in case of a special file
142              
143             =item devminor
144              
145             Device minor number in case of a special file
146              
147             =item prefix
148              
149             Any directory to prefix to the extraction path, if any
150              
151             =item raw
152              
153             Raw tar header -- not useful for most users
154              
155             =back
156              
157             =head1 Methods
158              
159             =head2 Archive::Tar::File->new( file => $path )
160              
161             Returns a new Archive::Tar::File object from an existing file.
162              
163             Returns undef on failure.
164              
165             =head2 Archive::Tar::File->new( data => $path, $data, $opt )
166              
167             Returns a new Archive::Tar::File object from data.
168              
169             C<$path> defines the file name (which need not exist), C<$data> the
170             file contents, and C<$opt> is a reference to a hash of attributes
171             which may be used to override the default attributes (fields in the
172             tar header), which are described above in the Accessors section.
173              
174             Returns undef on failure.
175              
176             =head2 Archive::Tar::File->new( chunk => $chunk )
177              
178             Returns a new Archive::Tar::File object from a raw 512-byte tar
179             archive chunk.
180              
181             Returns undef on failure.
182              
183             =cut
184              
185             sub new {
186 320     320 1 7770 my $class = shift;
187 320         495 my $what = shift;
188              
189 320 50       1287 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
    100          
    100          
190             ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
191             ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
192             undef;
193              
194 320         1058 return $obj;
195             }
196              
197             ### copies the data, creates a clone ###
198             sub clone {
199 117     117 0 206 my $self = shift;
200 117         2069 return bless { %$self }, ref $self;
201             }
202              
203             sub _new_from_chunk {
204 268     268   417 my $class = shift;
205 268 50       564 my $chunk = shift or return; # 512 bytes of tar header
206 268         462 my %hash = @_;
207              
208             ### filter any arguments on defined-ness of values.
209             ### this allows overriding from what the tar-header is saying
210             ### about this tar-entry. Particularly useful for @LongLink files
211 268         494 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
  61         176  
  61         198  
212              
213             ### makes it start at 0 actually... :) ###
214 268         357 my $i = -1;
215             my %entry = map {
216 268         1982 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake
  4288         7099  
217 4288 100       12722 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake
218 4288 100       11418 $s=> $v ? oct $_ : $_ # cdrake
219             # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb
220             } unpack( UNPACK, $chunk ); # cdrake
221             # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake
222              
223              
224 268 50       1262 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake
225 0         0 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
  0         0  
226             } else { # cdrake
227 268         980 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake
  268         560  
228             } # cdrake
229              
230              
231 268         2575 my $obj = bless { %entry, %args }, $class;
232              
233             ### magic is a filetype string.. it should have something like 'ustar' or
234             ### something similar... if the chunk is garbage, skip it
235 268 50       914 return unless $obj->magic !~ /\W/;
236              
237             ### store the original chunk ###
238 268         732 $obj->raw( $chunk );
239              
240 268 50 33     564 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
241 268 50 66     690 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
242              
243              
244 268         1052 return $obj;
245              
246             }
247              
248             sub _new_from_file {
249 16     16   48 my $class = shift;
250 16         31 my $path = shift;
251              
252             ### path has to at least exist
253 16 50       51 return unless defined $path;
254              
255 16         81 my $type = __PACKAGE__->_filetype($path);
256 16         96 my $data = '';
257              
258             READ: {
259 16 100       40 unless ($type == DIR ) {
  16         107  
260 15         126 my $fh = IO::File->new;
261              
262 15 50       747 unless( $fh->open($path) ) {
263             ### dangling symlinks are fine, stop reading but continue
264             ### creating the object
265 0 0       0 last READ if $type == SYMLINK;
266              
267             ### otherwise, return from this function --
268             ### anything that's *not* a symlink should be
269             ### resolvable
270 0         0 return;
271             }
272              
273             ### binmode needed to read files properly on win32 ###
274 15         873 binmode $fh;
275 15         30 $data = do { local $/; <$fh> };
  15         82  
  15         1080  
276 15         290 close $fh;
277             }
278             }
279              
280 16         204 my @items = qw[mode uid gid size mtime];
281 16         299 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
  80         368  
282              
283 16         68 if (ON_VMS) {
284             ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
285             ### not used. We currently do not have an easy way to see if we are in
286             ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
287             ### The VMS UIC has the upper 16 bits is the GID, which in many cases
288             ### the VMS UIC will be larger than 209715, the largest that TAR can
289             ### handle. So for now, assume it is traditional if the UID is larger
290             ### than 0x10000.
291              
292             if ($hash{uid} > 0x10000) {
293             $hash{uid} = $hash{uid} & 0xFFFF;
294             }
295              
296             ### The file length from stat() is the physical length of the file
297             ### However the amount of data read in may be more for some file types.
298             ### Fixed length files are read past the logical EOF to end of the block
299             ### containing. Other file types get expanded on read because record
300             ### delimiters are added.
301              
302             my $data_len = length $data;
303             $hash{size} = $data_len if $hash{size} < $data_len;
304              
305             }
306             ### you *must* set size == 0 on symlinks, or the next entry will be
307             ### though of as the contents of the symlink, which is wrong.
308             ### this fixes bug #7937
309 16 100 66     186 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
310 16         52 $hash{mtime} -= TIME_OFFSET;
311              
312             ### strip the high bits off the mode, which we don't need to store
313 16         202 $hash{mode} = STRIP_MODE->( $hash{mode} );
314              
315              
316             ### probably requires some file path munging here ... ###
317             ### name and prefix are set later
318             my $obj = {
319             %hash,
320             name => '',
321             chksum => CHECK_SUM,
322             type => $type,
323             linkname => ($type == SYMLINK and CAN_READLINK)
324             ? readlink $path
325             : '',
326             magic => MAGIC,
327             version => TAR_VERSION,
328             uname => UNAME->( $hash{uid} ),
329 16 50 50     253 gname => GNAME->( $hash{gid} ),
330             devmajor => 0, # not handled
331             devminor => 0, # not handled
332             prefix => '',
333             data => $data,
334             };
335              
336 16         117 bless $obj, $class;
337              
338             ### fix up the prefix and file from the path
339 16         60 my($prefix,$file) = $obj->_prefix_and_file( $path );
340 16         84 $obj->prefix( $prefix );
341 16         49 $obj->name( $file );
342              
343 16         70 return $obj;
344             }
345              
346             sub _new_from_data {
347 36     36   71 my $class = shift;
348 36 50       79 my $path = shift; return unless defined $path;
  36         116  
349 36 100       85 my $data = shift; return unless defined $data;
  36         77  
350 35         52 my $opt = shift;
351              
352 35         229 my $obj = {
353             data => $data,
354             name => '',
355             mode => MODE,
356             uid => UID,
357             gid => GID,
358             size => length $data,
359             mtime => time - TIME_OFFSET,
360             chksum => CHECK_SUM,
361             type => FILE,
362             linkname => '',
363             magic => MAGIC,
364             version => TAR_VERSION,
365             uname => UNAME->( UID ),
366             gname => GNAME->( GID ),
367             devminor => 0,
368             devmajor => 0,
369             prefix => '',
370             };
371              
372             ### overwrite with user options, if provided ###
373 35 100 66     308 if( $opt and ref $opt eq 'HASH' ) {
374 22         113 for my $key ( keys %$opt ) {
375              
376             ### don't write bogus options ###
377 31 50       98 next unless exists $obj->{$key};
378 31         69 $obj->{$key} = $opt->{$key};
379             }
380             }
381              
382 35         88 bless $obj, $class;
383              
384             ### fix up the prefix and file from the path
385 35         105 my($prefix,$file) = $obj->_prefix_and_file( $path );
386 35         121 $obj->prefix( $prefix );
387 35         92 $obj->name( $file );
388              
389 35         65 return $obj;
390             }
391              
392             sub _prefix_and_file {
393 146     146   253 my $self = shift;
394 146         228 my $path = shift;
395              
396 146         381 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
397 146         1129 my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
398              
399             ### if it's a directory, then $file might be empty
400 146 100 66     353 $file = pop @dirs if $self->is_dir and not length $file;
401              
402             ### splitting ../ gives you the relative path in native syntax
403             ### Remove the root (000000) directory
404             ### The volume from splitpath will also be in native syntax
405 146         233 if (ON_VMS) {
406             map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs;
407             if (length($vol)) {
408             $vol = VMS::Filespec::unixify($vol);
409             unshift @dirs, $vol;
410             }
411             }
412              
413 146         884 my $prefix = File::Spec::Unix->catdir(@dirs);
414 146         682 return( $prefix, $file );
415             }
416              
417             sub _filetype {
418 16     16   33 my $self = shift;
419 16         31 my $file = shift;
420              
421 16 50       45 return unless defined $file;
422              
423 16 50       212 return SYMLINK if (-l $file); # Symlink
424              
425 16 100       122 return FILE if (-f _); # Plain file
426              
427 1 50       7 return DIR if (-d _); # Directory
428              
429 0 0       0 return FIFO if (-p _); # Named pipe
430              
431 0 0       0 return SOCKET if (-S _); # Socket
432              
433 0 0       0 return BLOCKDEV if (-b _); # Block special
434              
435 0 0       0 return CHARDEV if (-c _); # Character special
436              
437             ### shouldn't happen, this is when making archives, not reading ###
438 0 0       0 return LONGLINK if ( $file eq LONGLINK_NAME );
439              
440 0         0 return UNKNOWN; # Something else (like what?)
441              
442             }
443              
444             ### this method 'downgrades' a file to plain file -- this is used for
445             ### symlinks when FOLLOW_SYMLINKS is true.
446             sub _downgrade_to_plainfile {
447 1     1   3 my $entry = shift;
448 1         6 $entry->type( FILE );
449 1         5 $entry->mode( MODE );
450 1         3 $entry->linkname('');
451              
452 1         6 return 1;
453             }
454              
455             =head2 $bool = $file->extract( [ $alternative_name ] )
456              
457             Extract this object, optionally to an alternative name.
458              
459             See C<< Archive::Tar->extract_file >> for details.
460              
461             Returns true on success and false on failure.
462              
463             =cut
464              
465             sub extract {
466 2     2 1 16 my $self = shift;
467              
468 2         15 local $Carp::CarpLevel += 1;
469              
470             ### avoid circular use, so only require;
471 2         31 require Archive::Tar;
472 2         12 return Archive::Tar->_extract_file( $self, @_ );
473             }
474              
475             =head2 $path = $file->full_path
476              
477             Returns the full path from the tar header; this is basically a
478             concatenation of the C and C fields.
479              
480             =cut
481              
482             sub full_path {
483 820     820 1 145647 my $self = shift;
484              
485             ### if prefix field is empty
486 820 100 66     2015 return $self->name unless defined $self->prefix and length $self->prefix;
487              
488             ### or otherwise, catfile'd
489 118         292 return File::Spec::Unix->catfile( $self->prefix, $self->name );
490             }
491              
492              
493             =head2 $bool = $file->validate
494              
495             Done by Archive::Tar internally when reading the tar file:
496             validate the header against the checksum to ensure integer tar file.
497              
498             Returns true on success, false on failure
499              
500             =cut
501              
502             sub validate {
503 203     203 1 741 my $self = shift;
504              
505 203         357 my $raw = $self->raw;
506              
507             ### don't know why this one is different from the one we /write/ ###
508 203         556 substr ($raw, 148, 8) = " ";
509              
510             ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
511             ### like GNU tar does. See here for details:
512             ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
513             ### so we do both a signed AND unsigned validate. if one succeeds, that's
514             ### good enough
515 203 50 66     1074 return ( (unpack ("%16C*", $raw) == $self->chksum)
516             or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
517             }
518              
519             =head2 $bool = $file->has_content
520              
521             Returns a boolean to indicate whether the current object has content.
522             Some special files like directories and so on never will have any
523             content. This method is mainly to make sure you don't get warnings
524             for using uninitialized values when looking at an object's content.
525              
526             =cut
527              
528             sub has_content {
529 105     105 1 200 my $self = shift;
530 105 100 100     197 return defined $self->data() && length $self->data() ? 1 : 0;
531             }
532              
533             =head2 $content = $file->get_content
534              
535             Returns the current content for the in-memory file
536              
537             =cut
538              
539             sub get_content {
540 84     84 1 5887 my $self = shift;
541 84         235 $self->data( );
542             }
543              
544             =head2 $cref = $file->get_content_by_ref
545              
546             Returns the current content for the in-memory file as a scalar
547             reference. Normal users won't need this, but it will save memory if
548             you are dealing with very large data files in your tar archive, since
549             it will pass the contents by reference, rather than make a copy of it
550             first.
551              
552             =cut
553              
554             sub get_content_by_ref {
555 207     207 1 272 my $self = shift;
556              
557 207         550 return \$self->{data};
558             }
559              
560             =head2 $bool = $file->replace_content( $content )
561              
562             Replace the current content of the file with the new content. This
563             only affects the in-memory archive, not the on-disk version until
564             you write it.
565              
566             Returns true on success, false on failure.
567              
568             =cut
569              
570             sub replace_content {
571 11     11 1 2984 my $self = shift;
572 11   100     45 my $data = shift || '';
573              
574 11         36 $self->data( $data );
575 11         42 $self->size( length $data );
576 11         38 return 1;
577             }
578              
579             =head2 $bool = $file->rename( $new_name )
580              
581             Rename the current file to $new_name.
582              
583             Note that you must specify a Unix path for $new_name, since per tar
584             standard, all files in the archive must be Unix paths.
585              
586             Returns true on success and false on failure.
587              
588             =cut
589              
590             sub rename {
591 12     12 1 24 my $self = shift;
592 12         27 my $path = shift;
593              
594 12 50       36 return unless defined $path;
595              
596 12         65 my ($prefix,$file) = $self->_prefix_and_file( $path );
597              
598 12         53 $self->name( $file );
599 12         33 $self->prefix( $prefix );
600              
601 12         56 return 1;
602             }
603              
604             =head2 $bool = $file->chmod $mode)
605              
606             Change mode of $file to $mode. The mode can be a string or a number
607             which is interpreted as octal whether or not a leading 0 is given.
608              
609             Returns true on success and false on failure.
610              
611             =cut
612              
613             sub chmod {
614 1     1 1 2 my $self = shift;
615 1 50 33     2 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
  1         8  
616 1         3 $self->{mode} = oct($mode);
617 1         3 return 1;
618             }
619              
620             =head2 $bool = $file->chown( $user [, $group])
621              
622             Change owner of $file to $user. If a $group is given that is changed
623             as well. You can also pass a single parameter with a colon separating the
624             use and group as in 'root:wheel'.
625              
626             Returns true on success and false on failure.
627              
628             =cut
629              
630             sub chown {
631 12     12 1 29 my $self = shift;
632 12         21 my $uname = shift;
633 12 50       30 return unless defined $uname;
634 12         19 my $gname;
635 12 50       82 if (-1 != index($uname, ':')) {
636 0         0 ($uname, $gname) = split(/:/, $uname);
637             } else {
638 12 100       35 $gname = shift if @_ > 0;
639             }
640              
641 12         39 $self->uname( $uname );
642 12 100       44 $self->gname( $gname ) if $gname;
643 12         39 return 1;
644             }
645              
646             =head1 Convenience methods
647              
648             To quickly check the type of a C object, you can
649             use the following methods:
650              
651             =over 4
652              
653             =item $file->is_file
654              
655             Returns true if the file is of type C
656              
657             =item $file->is_dir
658              
659             Returns true if the file is of type C
660              
661             =item $file->is_hardlink
662              
663             Returns true if the file is of type C
664              
665             =item $file->is_symlink
666              
667             Returns true if the file is of type C
668              
669             =item $file->is_chardev
670              
671             Returns true if the file is of type C
672              
673             =item $file->is_blockdev
674              
675             Returns true if the file is of type C
676              
677             =item $file->is_fifo
678              
679             Returns true if the file is of type C
680              
681             =item $file->is_socket
682              
683             Returns true if the file is of type C
684              
685             =item $file->is_longlink
686              
687             Returns true if the file is of type C.
688             Should not happen after a successful C.
689              
690             =item $file->is_label
691              
692             Returns true if the file is of type C
693             Should not happen after a successful C.
694              
695             =item $file->is_unknown
696              
697             Returns true if the file type is C
698              
699             =back
700              
701             =cut
702              
703             #stupid perl5.5.3 needs to warn if it's not numeric
704 1011     1011 1 16368 sub is_file { local $^W; FILE == $_[0]->type }
  1011         1881  
705 585     585 1 3348 sub is_dir { local $^W; DIR == $_[0]->type }
  585         2284  
706 4     4 1 1130 sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
  4         16  
707 210     210 1 1519 sub is_symlink { local $^W; SYMLINK == $_[0]->type }
  210         365  
708 4     4 1 1112 sub is_chardev { local $^W; CHARDEV == $_[0]->type }
  4         14  
709 4     4 1 1187 sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
  4         32  
710 4     4 1 1111 sub is_fifo { local $^W; FIFO == $_[0]->type }
  4         12  
711 4     4 1 1201 sub is_socket { local $^W; SOCKET == $_[0]->type }
  4         18  
712 123     123 1 1500 sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
  123         336  
713 567     567 1 2166 sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
  567         993  
714 281     281 1 1616 sub is_label { local $^W; LABEL eq $_[0]->type }
  281         486  
715              
716             1;