File Coverage

lib/Archive/Tar/File.pm
Criterion Covered Total %
statement 187 198 94.4
branch 51 82 62.2
condition 20 31 64.5
subroutine 39 39 100.0
pod 22 23 95.6
total 319 373 85.5


line stmt bran cond sub pod time code
1             package Archive::Tar::File;
2 10     10   66456 use strict;
  10         32  
  10         294  
3              
4 10     10   60 use Carp ();
  10         20  
  10         199  
5 10     10   484 use IO::File;
  10         8333  
  10         1312  
6 10     10   74 use File::Spec::Unix ();
  10         20  
  10         170  
7 10     10   45 use File::Spec ();
  10         18  
  10         217  
8 10     10   128 use File::Basename ();
  10         26  
  10         183  
9              
10 10     10   3406 use Archive::Tar::Constant;
  10         27  
  10         2012  
11              
12 10     10   73 use vars qw[@ISA $VERSION];
  10         17  
  10         1304  
13             #@ISA = qw[Archive::Tar];
14             $VERSION = '3.00';
15              
16             ### set value to 1 to oct() it during the unpack ###
17              
18             my $tmpl = [
19             name => 0, # string A100
20             mode => 1, # octal A8
21             uid => 1, # octal A8
22             gid => 1, # octal A8
23             size => 0, # octal # cdrake - not *always* octal.. A12
24             mtime => 1, # octal A12
25             chksum => 1, # octal A8
26             type => 0, # character A1
27             linkname => 0, # string A100
28             magic => 0, # string A6
29             version => 0, # 2 bytes A2
30             uname => 0, # string A32
31             gname => 0, # string A32
32             devmajor => 1, # octal A8
33             devminor => 1, # octal A8
34             prefix => 0, # A155 x 12
35              
36             ### end UNPACK items ###
37             raw => 0, # the raw data chunk
38             data => 0, # the data associated with the file --
39             # This might be very memory intensive
40             ];
41              
42             ### install get/set accessors for this object.
43             for ( my $i=0; $i
44             my $key = $tmpl->[$i];
45 10     10   67 no strict 'refs';
  10         28  
  10         25061  
46             *{__PACKAGE__."::$key"} = sub {
47 34423     34423   69302 my $self = shift;
48 34423 100       52813 $self->{$key} = $_[0] if @_;
49              
50             ### just in case the key is not there or undef or something ###
51 34423         36616 { local $^W = 0;
  34423         76018  
52 34423         153261 return $self->{$key};
53             }
54             }
55             }
56              
57             =head1 NAME
58              
59             Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
60              
61             =head1 SYNOPSIS
62              
63             my @items = $tar->get_files;
64              
65             print $_->name, ' ', $_->size, "\n" for @items;
66              
67             print $object->get_content;
68             $object->replace_content('new content');
69              
70             $object->rename( 'new/full/path/to/file.c' );
71              
72             =head1 DESCRIPTION
73              
74             Archive::Tar::File provides a neat little object layer for in-memory
75             extracted files. It's mostly used internally in Archive::Tar to tidy
76             up the code, but there's no reason users shouldn't use this API as
77             well.
78              
79             =head2 Accessors
80              
81             A lot of the methods in this package are accessors to the various
82             fields in the tar header:
83              
84             =over 4
85              
86             =item name
87              
88             The file's name
89              
90             =item mode
91              
92             The file's mode
93              
94             =item uid
95              
96             The user id owning the file
97              
98             =item gid
99              
100             The group id owning the file
101              
102             =item size
103              
104             File size in bytes
105              
106             =item mtime
107              
108             Modification time. Adjusted to mac-time on MacOS if required
109              
110             =item chksum
111              
112             Checksum field for the tar header
113              
114             =item type
115              
116             File type -- numeric, but comparable to exported constants -- see
117             Archive::Tar's documentation
118              
119             =item linkname
120              
121             If the file is a symlink, the file it's pointing to
122              
123             =item magic
124              
125             Tar magic string -- not useful for most users
126              
127             =item version
128              
129             Tar version string -- not useful for most users
130              
131             =item uname
132              
133             The user name that owns the file
134              
135             =item gname
136              
137             The group name that owns the file
138              
139             =item devmajor
140              
141             Device major number in case of a special file
142              
143             =item devminor
144              
145             Device minor number in case of a special file
146              
147             =item prefix
148              
149             Any directory to prefix to the extraction path, if any
150              
151             =item raw
152              
153             Raw tar header -- not useful for most users
154              
155             =back
156              
157             =head1 Methods
158              
159             =head2 Archive::Tar::File->new( file => $path )
160              
161             Returns a new Archive::Tar::File object from an existing file.
162              
163             Returns undef on failure.
164              
165             =head2 Archive::Tar::File->new( data => $path, $data, $opt )
166              
167             Returns a new Archive::Tar::File object from data.
168              
169             C<$path> defines the file name (which need not exist), C<$data> the
170             file contents, and C<$opt> is a reference to a hash of attributes
171             which may be used to override the default attributes (fields in the
172             tar header), which are described above in the Accessors section.
173              
174             Returns undef on failure.
175              
176             =head2 Archive::Tar::File->new( chunk => $chunk )
177              
178             Returns a new Archive::Tar::File object from a raw 512-byte tar
179             archive chunk.
180              
181             Returns undef on failure.
182              
183             =cut
184              
185             sub new {
186 344     344 1 7313 my $class = shift;
187 344         535 my $what = shift;
188              
189 344 50       1228 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
    100          
    100          
190             ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
191             ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
192             undef;
193              
194 344         1054 return $obj;
195             }
196              
197             ### copies the data, creates a clone ###
198             sub clone {
199 123     123 0 219 my $self = shift;
200 123         1986 return bless { %$self }, ref $self;
201             }
202              
203             sub _new_from_chunk {
204 292     292   410 my $class = shift;
205 292 50       897 my $chunk = shift or return; # 512 bytes of tar header
206 292         504 my %hash = @_;
207              
208             ### filter any arguments on defined-ness of values.
209             ### this allows overriding from what the tar-header is saying
210             ### about this tar-entry. Particularly useful for @LongLink files
211 292         480 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
  63         169  
  63         207  
212              
213             ### makes it start at 0 actually... :) ###
214 292         415 my $i = -1;
215             my %entry = map {
216 292         2016 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake
  4672         7324  
217 4672 100       13114 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake
218 4672 100       11877 $s=> $v ? oct $_ : $_ # cdrake
219             # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb
220             } unpack( UNPACK, $chunk ); # cdrake
221             # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake
222              
223              
224 292 50       1232 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake
225 0         0 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
  0         0  
226             } else { # cdrake
227 292         953 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake
  292         624  
228             } # cdrake
229              
230              
231 292         2634 my $obj = bless { %entry, %args }, $class;
232              
233             ### magic is a filetype string.. it should have something like 'ustar' or
234             ### something similar... if the chunk is garbage, skip it
235 292 50       884 return unless $obj->magic !~ /\W/;
236              
237             ### store the original chunk ###
238 292         702 $obj->raw( $chunk );
239              
240 292 50 33     574 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
241 292 50 66     675 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
242              
243              
244 292         1162 return $obj;
245              
246             }
247              
248             sub _new_from_file {
249 16     16   39 my $class = shift;
250 16         36 my $path = shift;
251              
252             ### path has to at least exist
253 16 50       87 return unless defined $path;
254              
255 16         57 my $type = __PACKAGE__->_filetype($path);
256 16         89 my $data = '';
257              
258             READ: {
259 16 100       35 unless ($type == DIR ) {
  16         65  
260 15         101 my $fh = IO::File->new;
261              
262 15 50       607 unless( $fh->open($path) ) {
263             ### dangling symlinks are fine, stop reading but continue
264             ### creating the object
265 0 0       0 last READ if $type == SYMLINK;
266              
267             ### otherwise, return from this function --
268             ### anything that's *not* a symlink should be
269             ### resolvable
270 0         0 return;
271             }
272              
273             ### binmode needed to read files properly on win32 ###
274 15         756 binmode $fh;
275 15         24 $data = do { local $/; <$fh> };
  15         63  
  15         1034  
276 15         234 close $fh;
277             }
278             }
279              
280 16         206 my @items = qw[mode uid gid size mtime];
281 16         268 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
  80         328  
282              
283 16         48 if (ON_VMS) {
284             ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
285             ### not used. We currently do not have an easy way to see if we are in
286             ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
287             ### The VMS UIC has the upper 16 bits is the GID, which in many cases
288             ### the VMS UIC will be larger than 209715, the largest that TAR can
289             ### handle. So for now, assume it is traditional if the UID is larger
290             ### than 0x10000.
291              
292             if ($hash{uid} > 0x10000) {
293             $hash{uid} = $hash{uid} & 0xFFFF;
294             }
295              
296             ### The file length from stat() is the physical length of the file
297             ### However the amount of data read in may be more for some file types.
298             ### Fixed length files are read past the logical EOF to end of the block
299             ### containing. Other file types get expanded on read because record
300             ### delimiters are added.
301              
302             my $data_len = length $data;
303             $hash{size} = $data_len if $hash{size} < $data_len;
304              
305             }
306             ### you *must* set size == 0 on symlinks, or the next entry will be
307             ### though of as the contents of the symlink, which is wrong.
308             ### this fixes bug #7937
309 16 100 66     165 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
310 16         45 $hash{mtime} -= TIME_OFFSET;
311              
312             ### strip the high bits off the mode, which we don't need to store
313 16         163 $hash{mode} = STRIP_MODE->( $hash{mode} );
314              
315              
316             ### probably requires some file path munging here ... ###
317             ### name and prefix are set later
318             my $obj = {
319             %hash,
320             name => '',
321             chksum => CHECK_SUM,
322             type => $type,
323             linkname => ($type == SYMLINK and CAN_READLINK)
324             ? readlink $path
325             : '',
326             magic => MAGIC,
327             version => TAR_VERSION,
328             uname => UNAME->( $hash{uid} ),
329 16 50 50     202 gname => GNAME->( $hash{gid} ),
330             devmajor => 0, # not handled
331             devminor => 0, # not handled
332             prefix => '',
333             data => $data,
334             };
335              
336 16         128 bless $obj, $class;
337              
338             ### fix up the prefix and file from the path
339 16         92 my($prefix,$file) = $obj->_prefix_and_file( $path );
340 16         58 $obj->prefix( $prefix );
341 16         57 $obj->name( $file );
342              
343 16         62 return $obj;
344             }
345              
346             sub _new_from_data {
347 36     36   58 my $class = shift;
348 36 50       68 my $path = shift; return unless defined $path;
  36         120  
349 36 100       62 my $data = shift; return unless defined $data;
  36         81  
350 35         54 my $opt = shift;
351              
352 35         170 my $obj = {
353             data => $data,
354             name => '',
355             mode => MODE,
356             uid => UID,
357             gid => GID,
358             size => length $data,
359             mtime => time - TIME_OFFSET,
360             chksum => CHECK_SUM,
361             type => FILE,
362             linkname => '',
363             magic => MAGIC,
364             version => TAR_VERSION,
365             uname => UNAME->( UID ),
366             gname => GNAME->( GID ),
367             devminor => 0,
368             devmajor => 0,
369             prefix => '',
370             };
371              
372             ### overwrite with user options, if provided ###
373 35 100 66     269 if( $opt and ref $opt eq 'HASH' ) {
374 22         121 for my $key ( keys %$opt ) {
375              
376             ### don't write bogus options ###
377 31 50       82 next unless exists $obj->{$key};
378 31         80 $obj->{$key} = $opt->{$key};
379             }
380             }
381              
382 35         78 bless $obj, $class;
383              
384             ### fix up the prefix and file from the path
385 35         97 my($prefix,$file) = $obj->_prefix_and_file( $path );
386 35         98 $obj->prefix( $prefix );
387 35         87 $obj->name( $file );
388              
389 35         67 return $obj;
390             }
391              
392             sub _prefix_and_file {
393 149     149   236 my $self = shift;
394 149         221 my $path = shift;
395              
396 149         324 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
397 149         985 my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
398              
399             ### if it's a directory, then $file might be empty
400 149 100 66     359 $file = pop @dirs if $self->is_dir and not length $file;
401              
402             ### splitting ../ gives you the relative path in native syntax
403             ### Remove the root (000000) directory
404             ### The volume from splitpath will also be in native syntax
405 149         220 if (ON_VMS) {
406             map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs;
407             if (length($vol)) {
408             $vol = VMS::Filespec::unixify($vol);
409             unshift @dirs, $vol;
410             }
411             }
412              
413 149         805 my $prefix = File::Spec::Unix->catdir(@dirs);
414 149         630 return( $prefix, $file );
415             }
416              
417             sub _filetype {
418 16     16   29 my $self = shift;
419 16         24 my $file = shift;
420              
421 16 50       58 return unless defined $file;
422              
423 16 50       206 return SYMLINK if (-l $file); # Symlink
424              
425 16 100       111 return FILE if (-f _); # Plain file
426              
427 1 50       5 return DIR if (-d _); # Directory
428              
429 0 0       0 return FIFO if (-p _); # Named pipe
430              
431 0 0       0 return SOCKET if (-S _); # Socket
432              
433 0 0       0 return BLOCKDEV if (-b _); # Block special
434              
435 0 0       0 return CHARDEV if (-c _); # Character special
436              
437             ### shouldn't happen, this is when making archives, not reading ###
438 0 0       0 return LONGLINK if ( $file eq LONGLINK_NAME );
439              
440 0         0 return UNKNOWN; # Something else (like what?)
441              
442             }
443              
444             ### this method 'downgrades' a file to plain file -- this is used for
445             ### symlinks when FOLLOW_SYMLINKS is true.
446             sub _downgrade_to_plainfile {
447 1     1   4 my $entry = shift;
448 1         5 $entry->type( FILE );
449 1         4 $entry->mode( MODE );
450 1         4 $entry->linkname('');
451              
452 1         5 return 1;
453             }
454              
455             =head2 $bool = $file->extract( [ $alternative_name ] )
456              
457             Extract this object, optionally to an alternative name.
458              
459             See C<< Archive::Tar->extract_file >> for details.
460              
461             Returns true on success and false on failure.
462              
463             =cut
464              
465             sub extract {
466 2     2 1 19 my $self = shift;
467              
468 2         12 local $Carp::CarpLevel += 1;
469              
470             ### avoid circular use, so only require;
471 2         31 require Archive::Tar;
472 2         14 return Archive::Tar->_extract_file( $self, @_ );
473             }
474              
475             =head2 $path = $file->full_path
476              
477             Returns the full path from the tar header; this is basically a
478             concatenation of the C and C fields.
479              
480             =cut
481              
482             sub full_path {
483 1553     1553 1 278757 my $self = shift;
484              
485             ### if prefix field is empty
486 1553 100 66     3711 return $self->name unless defined $self->prefix and length $self->prefix;
487              
488             ### or otherwise, catfile'd
489 232         518 my $path = File::Spec::Unix->catfile( $self->prefix, $self->name );
490 232 100       536 $path .= "/" if $self->name =~ m{/$}; # Re-add trailing slash if necessary, as catfile() strips them off.
491 232         1345 return $path;
492             }
493              
494              
495             =head2 $bool = $file->validate
496              
497             Done by Archive::Tar internally when reading the tar file:
498             validate the header against the checksum to ensure integer tar file.
499              
500             Returns true on success, false on failure
501              
502             =cut
503              
504             sub validate {
505 215     215 1 617 my $self = shift;
506              
507 215         337 my $raw = $self->raw;
508              
509             ### don't know why this one is different from the one we /write/ ###
510 215         440 substr ($raw, 148, 8) = " ";
511              
512             ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
513             ### like GNU tar does. See here for details:
514             ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
515             ### so we do both a signed AND unsigned validate. if one succeeds, that's
516             ### good enough
517 215 50 66     1081 return ( (unpack ("%16C*", $raw) == $self->chksum)
518             or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
519             }
520              
521             =head2 $bool = $file->has_content
522              
523             Returns a boolean to indicate whether the current object has content.
524             Some special files like directories and so on never will have any
525             content. This method is mainly to make sure you don't get warnings
526             for using uninitialized values when looking at an object's content.
527              
528             =cut
529              
530             sub has_content {
531 108     108 1 193 my $self = shift;
532 108 100 100     351 return defined $self->data() && length $self->data() ? 1 : 0;
533             }
534              
535             =head2 $content = $file->get_content
536              
537             Returns the current content for the in-memory file
538              
539             =cut
540              
541             sub get_content {
542 84     84 1 5500 my $self = shift;
543 84         161 $self->data( );
544             }
545              
546             =head2 $cref = $file->get_content_by_ref
547              
548             Returns the current content for the in-memory file as a scalar
549             reference. Normal users won't need this, but it will save memory if
550             you are dealing with very large data files in your tar archive, since
551             it will pass the contents by reference, rather than make a copy of it
552             first.
553              
554             =cut
555              
556             sub get_content_by_ref {
557 479     479 1 1097 my $self = shift;
558              
559 479         1293 return \$self->{data};
560             }
561              
562             =head2 $bool = $file->replace_content( $content )
563              
564             Replace the current content of the file with the new content. This
565             only affects the in-memory archive, not the on-disk version until
566             you write it.
567              
568             Returns true on success, false on failure.
569              
570             =cut
571              
572             sub replace_content {
573 11     11 1 2672 my $self = shift;
574 11   100     41 my $data = shift || '';
575              
576 11         33 $self->data( $data );
577 11         37 $self->size( length $data );
578 11         46 return 1;
579             }
580              
581             =head2 $bool = $file->rename( $new_name )
582              
583             Rename the current file to $new_name.
584              
585             Note that you must specify a Unix path for $new_name, since per tar
586             standard, all files in the archive must be Unix paths.
587              
588             Returns true on success and false on failure.
589              
590             =cut
591              
592             sub rename {
593 12     12 1 28 my $self = shift;
594 12         23 my $path = shift;
595              
596 12 50       33 return unless defined $path;
597              
598 12         36 my ($prefix,$file) = $self->_prefix_and_file( $path );
599              
600 12         53 $self->name( $file );
601 12         31 $self->prefix( $prefix );
602              
603 12         59 return 1;
604             }
605              
606             =head2 $bool = $file->chmod( $mode )
607              
608             Change mode of $file to $mode. The mode can be a string or a number
609             which is interpreted as octal whether or not a leading 0 is given.
610              
611             Returns true on success and false on failure.
612              
613             =cut
614              
615             sub chmod {
616 1     1 1 3 my $self = shift;
617 1 50 33     1 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
  1         9  
618 1         3 $self->{mode} = oct($mode);
619 1         2 return 1;
620             }
621              
622             =head2 $bool = $file->chown( $user [, $group])
623              
624             Change owner of $file to $user. If a $group is given that is changed
625             as well. You can also pass a single parameter with a colon separating the
626             use and group as in 'root:wheel'.
627              
628             Returns true on success and false on failure.
629              
630             =cut
631              
632             sub chown {
633 12     12 1 25 my $self = shift;
634 12         20 my $uname = shift;
635 12 50       32 return unless defined $uname;
636 12         18 my $gname;
637 12 50       37 if (-1 != index($uname, ':')) {
638 0         0 ($uname, $gname) = split(/:/, $uname);
639             } else {
640 12 100       34 $gname = shift if @_ > 0;
641             }
642              
643 12         38 $self->uname( $uname );
644 12 100       35 $self->gname( $gname ) if $gname;
645 12         40 return 1;
646             }
647              
648             =head1 Convenience methods
649              
650             To quickly check the type of a C object, you can
651             use the following methods:
652              
653             =over 4
654              
655             =item $file->is_file
656              
657             Returns true if the file is of type C
658              
659             =item $file->is_dir
660              
661             Returns true if the file is of type C
662              
663             =item $file->is_hardlink
664              
665             Returns true if the file is of type C
666              
667             =item $file->is_symlink
668              
669             Returns true if the file is of type C
670              
671             =item $file->is_chardev
672              
673             Returns true if the file is of type C
674              
675             =item $file->is_blockdev
676              
677             Returns true if the file is of type C
678              
679             =item $file->is_fifo
680              
681             Returns true if the file is of type C
682              
683             =item $file->is_socket
684              
685             Returns true if the file is of type C
686              
687             =item $file->is_longlink
688              
689             Returns true if the file is of type C.
690             Should not happen after a successful C.
691              
692             =item $file->is_label
693              
694             Returns true if the file is of type C
695             Should not happen after a successful C.
696              
697             =item $file->is_unknown
698              
699             Returns true if the file type is C
700              
701             =back
702              
703             =cut
704              
705             #stupid perl5.5.3 needs to warn if it's not numeric
706 1256     1256 1 16096 sub is_file { local $^W; FILE == $_[0]->type }
  1256         2545  
707 1064     1064 1 5052 sub is_dir { local $^W; DIR == $_[0]->type }
  1064         3502  
708 4     4 1 1084 sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
  4         18  
709 220     220 1 1544 sub is_symlink { local $^W; SYMLINK == $_[0]->type }
  220         362  
710 4     4 1 1148 sub is_chardev { local $^W; CHARDEV == $_[0]->type }
  4         12  
711 4     4 1 1165 sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
  4         37  
712 4     4 1 1103 sub is_fifo { local $^W; FIFO == $_[0]->type }
  4         14  
713 4     4 1 1155 sub is_socket { local $^W; SOCKET == $_[0]->type }
  4         16  
714 308     308 1 1832 sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
  308         607  
715 615     615 1 2117 sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
  615         988  
716 305     305 1 1624 sub is_label { local $^W; LABEL eq $_[0]->type }
  305         500  
717              
718             1;