File Coverage

lib/Archive/Tar/File.pm
Criterion Covered Total %
statement 187 198 94.4
branch 51 82 62.2
condition 20 31 64.5
subroutine 39 39 100.0
pod 22 23 95.6
total 319 373 85.5


line stmt bran cond sub pod time code
1             package Archive::Tar::File;
2 10     10   67609 use strict;
  10         25  
  10         306  
3              
4 10     10   56 use Carp ();
  10         29  
  10         199  
5 10     10   513 use IO::File;
  10         8521  
  10         1414  
6 10     10   66 use File::Spec::Unix ();
  10         20  
  10         187  
7 10     10   44 use File::Spec ();
  10         19  
  10         180  
8 10     10   111 use File::Basename ();
  10         30  
  10         184  
9              
10 10     10   3758 use Archive::Tar::Constant;
  10         38  
  10         2037  
11              
12 10     10   89 use vars qw[@ISA $VERSION];
  10         24  
  10         1313  
13             #@ISA = qw[Archive::Tar];
14             $VERSION = '3.02';
15              
16             ### set value to 1 to oct() it during the unpack ###
17              
18             my $tmpl = [
19             name => 0, # string A100
20             mode => 1, # octal A8
21             uid => 1, # octal A8
22             gid => 1, # octal A8
23             size => 0, # octal # cdrake - not *always* octal.. A12
24             mtime => 1, # octal A12
25             chksum => 1, # octal A8
26             type => 0, # character A1
27             linkname => 0, # string A100
28             magic => 0, # string A6
29             version => 0, # 2 bytes A2
30             uname => 0, # string A32
31             gname => 0, # string A32
32             devmajor => 1, # octal A8
33             devminor => 1, # octal A8
34             prefix => 0, # A155 x 12
35              
36             ### end UNPACK items ###
37             raw => 0, # the raw data chunk
38             data => 0, # the data associated with the file --
39             # This might be very memory intensive
40             ];
41              
42             ### install get/set accessors for this object.
43             for ( my $i=0; $i
44             my $key = $tmpl->[$i];
45 10     10   67 no strict 'refs';
  10         20  
  10         27498  
46             *{__PACKAGE__."::$key"} = sub {
47 33663     33663   69121 my $self = shift;
48 33663 100       55824 $self->{$key} = $_[0] if @_;
49              
50             ### just in case the key is not there or undef or something ###
51 33663         34910 { local $^W = 0;
  33663         72952  
52 33663         147581 return $self->{$key};
53             }
54             }
55             }
56              
57             =head1 NAME
58              
59             Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
60              
61             =head1 SYNOPSIS
62              
63             my @items = $tar->get_files;
64              
65             print $_->name, ' ', $_->size, "\n" for @items;
66              
67             print $object->get_content;
68             $object->replace_content('new content');
69              
70             $object->rename( 'new/full/path/to/file.c' );
71              
72             =head1 DESCRIPTION
73              
74             Archive::Tar::File provides a neat little object layer for in-memory
75             extracted files. It's mostly used internally in Archive::Tar to tidy
76             up the code, but there's no reason users shouldn't use this API as
77             well.
78              
79             =head2 Accessors
80              
81             A lot of the methods in this package are accessors to the various
82             fields in the tar header:
83              
84             =over 4
85              
86             =item name
87              
88             The file's name
89              
90             =item mode
91              
92             The file's mode
93              
94             =item uid
95              
96             The user id owning the file
97              
98             =item gid
99              
100             The group id owning the file
101              
102             =item size
103              
104             File size in bytes
105              
106             =item mtime
107              
108             Modification time. Adjusted to mac-time on MacOS if required
109              
110             =item chksum
111              
112             Checksum field for the tar header
113              
114             =item type
115              
116             File type -- numeric, but comparable to exported constants -- see
117             Archive::Tar's documentation
118              
119             =item linkname
120              
121             If the file is a symlink, the file it's pointing to
122              
123             =item magic
124              
125             Tar magic string -- not useful for most users
126              
127             =item version
128              
129             Tar version string -- not useful for most users
130              
131             =item uname
132              
133             The user name that owns the file
134              
135             =item gname
136              
137             The group name that owns the file
138              
139             =item devmajor
140              
141             Device major number in case of a special file
142              
143             =item devminor
144              
145             Device minor number in case of a special file
146              
147             =item prefix
148              
149             Any directory to prefix to the extraction path, if any
150              
151             =item raw
152              
153             Raw tar header -- not useful for most users
154              
155             =back
156              
157             =head1 Methods
158              
159             =head2 Archive::Tar::File->new( file => $path )
160              
161             Returns a new Archive::Tar::File object from an existing file.
162              
163             Returns undef on failure.
164              
165             =head2 Archive::Tar::File->new( data => $path, $data, $opt )
166              
167             Returns a new Archive::Tar::File object from data.
168              
169             C<$path> defines the file name (which need not exist), C<$data> the
170             file contents, and C<$opt> is a reference to a hash of attributes
171             which may be used to override the default attributes (fields in the
172             tar header), which are described above in the Accessors section.
173              
174             Returns undef on failure.
175              
176             =head2 Archive::Tar::File->new( chunk => $chunk )
177              
178             Returns a new Archive::Tar::File object from a raw 512-byte tar
179             archive chunk.
180              
181             Returns undef on failure.
182              
183             =cut
184              
185             sub new {
186 344     344 1 7285 my $class = shift;
187 344         496 my $what = shift;
188              
189 344 50       1224 my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
    100          
    100          
190             ($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
191             ($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
192             undef;
193              
194 344         1089 return $obj;
195             }
196              
197             ### copies the data, creates a clone ###
198             sub clone {
199 123     123 0 216 my $self = shift;
200 123         2005 return bless { %$self }, ref $self;
201             }
202              
203             sub _new_from_chunk {
204 292     292   479 my $class = shift;
205 292 50       525 my $chunk = shift or return; # 512 bytes of tar header
206 292         463 my %hash = @_;
207              
208             ### filter any arguments on defined-ness of values.
209             ### this allows overriding from what the tar-header is saying
210             ### about this tar-entry. Particularly useful for @LongLink files
211 292         477 my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
  63         173  
  63         193  
212              
213             ### makes it start at 0 actually... :) ###
214 292         348 my $i = -1;
215             my %entry = map {
216 292         2034 my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake
  4672         7291  
217 4672 100       13096 ($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake
218 4672 100       12117 $s=> $v ? oct $_ : $_ # cdrake
219             # $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb
220             } unpack( UNPACK, $chunk ); # cdrake
221             # } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake
222              
223              
224 292 50       1219 if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake
225 0         0 my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
  0         0  
226             } else { # cdrake
227 292         919 ($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake
  292         586  
228             } # cdrake
229              
230              
231 292         2671 my $obj = bless { %entry, %args }, $class;
232              
233             ### magic is a filetype string.. it should have something like 'ustar' or
234             ### something similar... if the chunk is garbage, skip it
235 292 50       873 return unless $obj->magic !~ /\W/;
236              
237             ### store the original chunk ###
238 292         751 $obj->raw( $chunk );
239              
240 292 50 33     550 $obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
241 292 50 66     633 $obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
242              
243              
244 292         1241 return $obj;
245              
246             }
247              
248             sub _new_from_file {
249 16     16   38 my $class = shift;
250 16         26 my $path = shift;
251              
252             ### path has to at least exist
253 16 50       46 return unless defined $path;
254              
255 16         84 my $type = __PACKAGE__->_filetype($path);
256 16         78 my $data = '';
257              
258             READ: {
259 16 100       50 unless ($type == DIR ) {
  16         81  
260 15         91 my $fh = IO::File->new;
261              
262 15 50       579 unless( $fh->open($path) ) {
263             ### dangling symlinks are fine, stop reading but continue
264             ### creating the object
265 0 0       0 last READ if $type == SYMLINK;
266              
267             ### otherwise, return from this function --
268             ### anything that's *not* a symlink should be
269             ### resolvable
270 0         0 return;
271             }
272              
273             ### binmode needed to read files properly on win32 ###
274 15         709 binmode $fh;
275 15         35 $data = do { local $/; <$fh> };
  15         60  
  15         991  
276 15         241 close $fh;
277             }
278             }
279              
280 16         173 my @items = qw[mode uid gid size mtime];
281 16         258 my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
  80         296  
282              
283 16         44 if (ON_VMS) {
284             ### VMS has two UID modes, traditional and POSIX. Normally POSIX is
285             ### not used. We currently do not have an easy way to see if we are in
286             ### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
287             ### The VMS UIC has the upper 16 bits is the GID, which in many cases
288             ### the VMS UIC will be larger than 209715, the largest that TAR can
289             ### handle. So for now, assume it is traditional if the UID is larger
290             ### than 0x10000.
291              
292             if ($hash{uid} > 0x10000) {
293             $hash{uid} = $hash{uid} & 0xFFFF;
294             }
295              
296             ### The file length from stat() is the physical length of the file
297             ### However the amount of data read in may be more for some file types.
298             ### Fixed length files are read past the logical EOF to end of the block
299             ### containing. Other file types get expanded on read because record
300             ### delimiters are added.
301              
302             my $data_len = length $data;
303             $hash{size} = $data_len if $hash{size} < $data_len;
304              
305             }
306             ### you *must* set size == 0 on symlinks, or the next entry will be
307             ### though of as the contents of the symlink, which is wrong.
308             ### this fixes bug #7937
309 16 100 66     141 $hash{size} = 0 if ($type == DIR or $type == SYMLINK);
310 16         41 $hash{mtime} -= TIME_OFFSET;
311              
312             ### strip the high bits off the mode, which we don't need to store
313 16         144 $hash{mode} = STRIP_MODE->( $hash{mode} );
314              
315              
316             ### probably requires some file path munging here ... ###
317             ### name and prefix are set later
318             my $obj = {
319             %hash,
320             name => '',
321             chksum => CHECK_SUM,
322             type => $type,
323             linkname => ($type == SYMLINK and CAN_READLINK)
324             ? readlink $path
325             : '',
326             magic => MAGIC,
327             version => TAR_VERSION,
328             uname => UNAME->( $hash{uid} ),
329 16 50 50     170 gname => GNAME->( $hash{gid} ),
330             devmajor => 0, # not handled
331             devminor => 0, # not handled
332             prefix => '',
333             data => $data,
334             };
335              
336 16         92 bless $obj, $class;
337              
338             ### fix up the prefix and file from the path
339 16         65 my($prefix,$file) = $obj->_prefix_and_file( $path );
340 16         70 $obj->prefix( $prefix );
341 16         43 $obj->name( $file );
342              
343 16         57 return $obj;
344             }
345              
346             sub _new_from_data {
347 36     36   64 my $class = shift;
348 36 50       62 my $path = shift; return unless defined $path;
  36         107  
349 36 100       62 my $data = shift; return unless defined $data;
  36         72  
350 35         44 my $opt = shift;
351              
352 35         155 my $obj = {
353             data => $data,
354             name => '',
355             mode => MODE,
356             uid => UID,
357             gid => GID,
358             size => length $data,
359             mtime => time - TIME_OFFSET,
360             chksum => CHECK_SUM,
361             type => FILE,
362             linkname => '',
363             magic => MAGIC,
364             version => TAR_VERSION,
365             uname => UNAME->( UID ),
366             gname => GNAME->( GID ),
367             devminor => 0,
368             devmajor => 0,
369             prefix => '',
370             };
371              
372             ### overwrite with user options, if provided ###
373 35 100 66     280 if( $opt and ref $opt eq 'HASH' ) {
374 22         99 for my $key ( keys %$opt ) {
375              
376             ### don't write bogus options ###
377 31 50       92 next unless exists $obj->{$key};
378 31         66 $obj->{$key} = $opt->{$key};
379             }
380             }
381              
382 35         73 bless $obj, $class;
383              
384             ### fix up the prefix and file from the path
385 35         89 my($prefix,$file) = $obj->_prefix_and_file( $path );
386 35         131 $obj->prefix( $prefix );
387 35         86 $obj->name( $file );
388              
389 35         76 return $obj;
390             }
391              
392             sub _prefix_and_file {
393 149     149   266 my $self = shift;
394 149         234 my $path = shift;
395              
396 149         299 my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
397 149         956 my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
398              
399             ### if it's a directory, then $file might be empty
400 149 100 66     313 $file = pop @dirs if $self->is_dir and not length $file;
401              
402             ### splitting ../ gives you the relative path in native syntax
403             ### Remove the root (000000) directory
404             ### The volume from splitpath will also be in native syntax
405 149         230 if (ON_VMS) {
406             map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs;
407             if (length($vol)) {
408             $vol = VMS::Filespec::unixify($vol);
409             unshift @dirs, $vol;
410             }
411             }
412              
413 149         831 my $prefix = File::Spec::Unix->catdir(@dirs);
414 149         635 return( $prefix, $file );
415             }
416              
417             sub _filetype {
418 16     16   32 my $self = shift;
419 16         24 my $file = shift;
420              
421 16 50       48 return unless defined $file;
422              
423 16 50       179 return SYMLINK if (-l $file); # Symlink
424              
425 16 100       93 return FILE if (-f _); # Plain file
426              
427 1 50       6 return DIR if (-d _); # Directory
428              
429 0 0       0 return FIFO if (-p _); # Named pipe
430              
431 0 0       0 return SOCKET if (-S _); # Socket
432              
433 0 0       0 return BLOCKDEV if (-b _); # Block special
434              
435 0 0       0 return CHARDEV if (-c _); # Character special
436              
437             ### shouldn't happen, this is when making archives, not reading ###
438 0 0       0 return LONGLINK if ( $file eq LONGLINK_NAME );
439              
440 0         0 return UNKNOWN; # Something else (like what?)
441              
442             }
443              
444             ### this method 'downgrades' a file to plain file -- this is used for
445             ### symlinks when FOLLOW_SYMLINKS is true.
446             sub _downgrade_to_plainfile {
447 1     1   3 my $entry = shift;
448 1         5 $entry->type( FILE );
449 1         5 $entry->mode( MODE );
450 1         4 $entry->linkname('');
451              
452 1         4 return 1;
453             }
454              
455             =head2 $bool = $file->extract( [ $alternative_name ] )
456              
457             Extract this object, optionally to an alternative name.
458              
459             See C<< Archive::Tar->extract_file >> for details.
460              
461             Returns true on success and false on failure.
462              
463             =cut
464              
465             sub extract {
466 2     2 1 13 my $self = shift;
467              
468 2         10 local $Carp::CarpLevel += 1;
469              
470             ### avoid circular use, so only require;
471 2         46 require Archive::Tar;
472 2         17 return Archive::Tar->_extract_file( $self, @_ );
473             }
474              
475             =head2 $path = $file->full_path
476              
477             Returns the full path from the tar header; this is basically a
478             concatenation of the C and C fields.
479              
480             =cut
481              
482             sub full_path {
483 1435     1435 1 255187 my $self = shift;
484              
485             ### if prefix field is empty
486 1435 100 66     3430 return $self->name unless defined $self->prefix and length $self->prefix;
487              
488             ### or otherwise, catfile'd
489 214         467 my $path = File::Spec::Unix->catfile( $self->prefix, $self->name );
490 214 100       480 $path .= "/" if $self->name =~ m{/$}; # Re-add trailing slash if necessary, as catfile() strips them off.
491 214         1088 return $path;
492             }
493              
494              
495             =head2 $bool = $file->validate
496              
497             Done by Archive::Tar internally when reading the tar file:
498             validate the header against the checksum to ensure integer tar file.
499              
500             Returns true on success, false on failure
501              
502             =cut
503              
504             sub validate {
505 215     215 1 600 my $self = shift;
506              
507 215         331 my $raw = $self->raw;
508              
509             ### don't know why this one is different from the one we /write/ ###
510 215         405 substr ($raw, 148, 8) = " ";
511              
512             ### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
513             ### like GNU tar does. See here for details:
514             ### http://www.gnu.org/software/tar/manual/tar.html#SEC139
515             ### so we do both a signed AND unsigned validate. if one succeeds, that's
516             ### good enough
517 215 50 66     1076 return ( (unpack ("%16C*", $raw) == $self->chksum)
518             or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
519             }
520              
521             =head2 $bool = $file->has_content
522              
523             Returns a boolean to indicate whether the current object has content.
524             Some special files like directories and so on never will have any
525             content. This method is mainly to make sure you don't get warnings
526             for using uninitialized values when looking at an object's content.
527              
528             =cut
529              
530             sub has_content {
531 108     108 1 185 my $self = shift;
532 108 100 100     274 return defined $self->data() && length $self->data() ? 1 : 0;
533             }
534              
535             =head2 $content = $file->get_content
536              
537             Returns the current content for the in-memory file
538              
539             =cut
540              
541             sub get_content {
542 84     84 1 5540 my $self = shift;
543 84         190 $self->data( );
544             }
545              
546             =head2 $cref = $file->get_content_by_ref
547              
548             Returns the current content for the in-memory file as a scalar
549             reference. Normal users won't need this, but it will save memory if
550             you are dealing with very large data files in your tar archive, since
551             it will pass the contents by reference, rather than make a copy of it
552             first.
553              
554             =cut
555              
556             sub get_content_by_ref {
557 452     452 1 1037 my $self = shift;
558              
559 452         1150 return \$self->{data};
560             }
561              
562             =head2 $bool = $file->replace_content( $content )
563              
564             Replace the current content of the file with the new content. This
565             only affects the in-memory archive, not the on-disk version until
566             you write it.
567              
568             Returns true on success, false on failure.
569              
570             =cut
571              
572             sub replace_content {
573 11     11 1 2681 my $self = shift;
574 11   100     39 my $data = shift || '';
575              
576 11         31 $self->data( $data );
577 11         35 $self->size( length $data );
578 11         43 return 1;
579             }
580              
581             =head2 $bool = $file->rename( $new_name )
582              
583             Rename the current file to $new_name.
584              
585             Note that you must specify a Unix path for $new_name, since per tar
586             standard, all files in the archive must be Unix paths.
587              
588             Returns true on success and false on failure.
589              
590             =cut
591              
592             sub rename {
593 12     12 1 22 my $self = shift;
594 12         30 my $path = shift;
595              
596 12 50       28 return unless defined $path;
597              
598 12         32 my ($prefix,$file) = $self->_prefix_and_file( $path );
599              
600 12         33 $self->name( $file );
601 12         34 $self->prefix( $prefix );
602              
603 12         43 return 1;
604             }
605              
606             =head2 $bool = $file->chmod( $mode )
607              
608             Change mode of $file to $mode. The mode can be a string or a number
609             which is interpreted as octal whether or not a leading 0 is given.
610              
611             Returns true on success and false on failure.
612              
613             =cut
614              
615             sub chmod {
616 1     1 1 2 my $self = shift;
617 1 50 33     1 my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
  1         8  
618 1         3 $self->{mode} = oct($mode);
619 1         2 return 1;
620             }
621              
622             =head2 $bool = $file->chown( $user [, $group])
623              
624             Change owner of $file to $user. If a $group is given that is changed
625             as well. You can also pass a single parameter with a colon separating the
626             use and group as in 'root:wheel'.
627              
628             Returns true on success and false on failure.
629              
630             =cut
631              
632             sub chown {
633 12     12 1 25 my $self = shift;
634 12         19 my $uname = shift;
635 12 50       31 return unless defined $uname;
636 12         19 my $gname;
637 12 50       31 if (-1 != index($uname, ':')) {
638 0         0 ($uname, $gname) = split(/:/, $uname);
639             } else {
640 12 100       39 $gname = shift if @_ > 0;
641             }
642              
643 12         31 $self->uname( $uname );
644 12 100       36 $self->gname( $gname ) if $gname;
645 12         39 return 1;
646             }
647              
648             =head1 Convenience methods
649              
650             To quickly check the type of a C object, you can
651             use the following methods:
652              
653             =over 4
654              
655             =item $file->is_file
656              
657             Returns true if the file is of type C
658              
659             =item $file->is_dir
660              
661             Returns true if the file is of type C
662              
663             =item $file->is_hardlink
664              
665             Returns true if the file is of type C
666              
667             =item $file->is_symlink
668              
669             Returns true if the file is of type C
670              
671             =item $file->is_chardev
672              
673             Returns true if the file is of type C
674              
675             =item $file->is_blockdev
676              
677             Returns true if the file is of type C
678              
679             =item $file->is_fifo
680              
681             Returns true if the file is of type C
682              
683             =item $file->is_socket
684              
685             Returns true if the file is of type C
686              
687             =item $file->is_longlink
688              
689             Returns true if the file is of type C.
690             Should not happen after a successful C.
691              
692             =item $file->is_label
693              
694             Returns true if the file is of type C
695             Should not happen after a successful C.
696              
697             =item $file->is_unknown
698              
699             Returns true if the file type is C
700              
701             =back
702              
703             =cut
704              
705             #stupid perl5.5.3 needs to warn if it's not numeric
706 1226     1226 1 15907 sub is_file { local $^W; FILE == $_[0]->type }
  1226         2051  
707 988     988 1 4715 sub is_dir { local $^W; DIR == $_[0]->type }
  988         3914  
708 4     4 1 1095 sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
  4         12  
709 220     220 1 1542 sub is_symlink { local $^W; SYMLINK == $_[0]->type }
  220         357  
710 4     4 1 1075 sub is_chardev { local $^W; CHARDEV == $_[0]->type }
  4         28  
711 4     4 1 1083 sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
  4         30  
712 4     4 1 1142 sub is_fifo { local $^W; FIFO == $_[0]->type }
  4         12  
713 4     4 1 1125 sub is_socket { local $^W; SOCKET == $_[0]->type }
  4         11  
714 278     278 1 1712 sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
  278         525  
715 615     615 1 2194 sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
  615         935  
716 305     305 1 1726 sub is_label { local $^W; LABEL eq $_[0]->type }
  305         519  
717              
718             1;