line
stmt
bran
cond
sub
pod
time
code
1
package Archive::Tar::File;
2
18
18
119589
use strict;
18
39
18
693
3
4
18
18
117
use Carp ();
18
34
18
403
5
18
18
467
use IO::File;
18
6801
18
2907
6
18
18
112
use File::Spec::Unix ();
18
3088
18
484
7
18
18
90
use File::Spec ();
18
33
18
385
8
18
18
85
use File::Basename ();
18
25
18
409
9
10
18
18
8443
use Archive::Tar::Constant;
18
61
18
4356
11
12
18
18
148
use vars qw[@ISA $VERSION];
18
34
18
3079
13
#@ISA = qw[Archive::Tar];
14
$VERSION = '3.06';
15
16
### set value to 1 to oct() it during the unpack ###
17
18
my $tmpl = [
19
name => 0, # string A100
20
mode => 1, # octal A8
21
uid => 1, # octal A8
22
gid => 1, # octal A8
23
size => 0, # octal # cdrake - not *always* octal.. A12
24
mtime => 1, # octal A12
25
chksum => 1, # octal A8
26
type => 0, # character A1
27
linkname => 0, # string A100
28
magic => 0, # string A6
29
version => 0, # 2 bytes A2
30
uname => 0, # string A32
31
gname => 0, # string A32
32
devmajor => 1, # octal A8
33
devminor => 1, # octal A8
34
prefix => 0, # A155 x 12
35
36
### end UNPACK items ###
37
raw => 0, # the raw data chunk
38
data => 0, # the data associated with the file --
39
# This might be very memory intensive
40
];
41
42
### install get/set accessors for this object.
43
for ( my $i=0; $i
44
my $key = $tmpl->[$i];
45
18
18
136
no strict 'refs';
18
55
18
60684
46
*{__PACKAGE__."::$key"} = sub {
47
32830
32830
73171
my $self = shift;
48
32830
100
55969
$self->{$key} = $_[0] if @_;
49
50
### just in case the key is not there or undef or something ###
51
32830
36626
{ local $^W = 0;
32830
69809
52
32830
163524
return $self->{$key};
53
}
54
}
55
}
56
57
=head1 NAME
58
59
Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar
60
61
=head1 SYNOPSIS
62
63
my @items = $tar->get_files;
64
65
print $_->name, ' ', $_->size, "\n" for @items;
66
67
print $object->get_content;
68
$object->replace_content('new content');
69
70
$object->rename( 'new/full/path/to/file.c' );
71
72
=head1 DESCRIPTION
73
74
Archive::Tar::File provides a neat little object layer for in-memory
75
extracted files. It's mostly used internally in Archive::Tar to tidy
76
up the code, but there's no reason users shouldn't use this API as
77
well.
78
79
=head2 Accessors
80
81
A lot of the methods in this package are accessors to the various
82
fields in the tar header:
83
84
=over 4
85
86
=item name
87
88
The file's name
89
90
=item mode
91
92
The file's mode
93
94
=item uid
95
96
The user id owning the file
97
98
=item gid
99
100
The group id owning the file
101
102
=item size
103
104
File size in bytes
105
106
=item mtime
107
108
Modification time. Adjusted to mac-time on MacOS if required
109
110
=item chksum
111
112
Checksum field for the tar header
113
114
=item type
115
116
File type -- numeric, but comparable to exported constants -- see
117
Archive::Tar's documentation
118
119
=item linkname
120
121
If the file is a symlink, the file it's pointing to
122
123
=item magic
124
125
Tar magic string -- not useful for most users
126
127
=item version
128
129
Tar version string -- not useful for most users
130
131
=item uname
132
133
The user name that owns the file
134
135
=item gname
136
137
The group name that owns the file
138
139
=item devmajor
140
141
Device major number in case of a special file
142
143
=item devminor
144
145
Device minor number in case of a special file
146
147
=item prefix
148
149
Any directory to prefix to the extraction path, if any
150
151
=item raw
152
153
Raw tar header -- not useful for most users
154
155
=back
156
157
=head1 Methods
158
159
=head2 Archive::Tar::File->new( file => $path )
160
161
Returns a new Archive::Tar::File object from an existing file.
162
163
Returns undef on failure.
164
165
=head2 Archive::Tar::File->new( data => $path, $data, $opt )
166
167
Returns a new Archive::Tar::File object from data.
168
169
C<$path> defines the file name (which need not exist), C<$data> the
170
file contents, and C<$opt> is a reference to a hash of attributes
171
which may be used to override the default attributes (fields in the
172
tar header), which are described above in the Accessors section.
173
174
Returns undef on failure.
175
176
=head2 Archive::Tar::File->new( chunk => $chunk )
177
178
Returns a new Archive::Tar::File object from a raw 512-byte tar
179
archive chunk.
180
181
Returns undef on failure.
182
183
=cut
184
185
sub new {
186
293
293
1
173270
my $class = shift;
187
293
529
my $what = shift;
188
189
293
50
1352
my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) :
100
100
190
($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) :
191
($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) :
192
undef;
193
194
293
1170
return $obj;
195
}
196
197
### copies the data, creates a clone ###
198
sub clone {
199
86
86
0
183
my $self = shift;
200
86
2017
return bless { %$self }, ref $self;
201
}
202
203
sub _new_from_chunk {
204
242
242
410
my $class = shift;
205
242
50
572
my $chunk = shift or return; # 512 bytes of tar header
206
242
1373
my %hash = @_;
207
208
### filter any arguments on defined-ness of values.
209
### this allows overriding from what the tar-header is saying
210
### about this tar-entry. Particularly useful for @LongLink files
211
242
571
my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash;
51
164
51
149
212
213
### makes it start at 0 actually... :) ###
214
242
365
my $i = -1;
215
my %entry = map {
216
242
1683
my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake
3872
7061
217
3872
100
10741
($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake
218
3872
100
10423
$s=> $v ? oct $_ : $_ # cdrake
219
# $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb
220
} unpack( UNPACK, $chunk ); # cdrake
221
# } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake
222
223
224
242
50
1280
if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake
225
0
0
my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake
0
0
226
} else { # cdrake
227
242
902
($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake
242
623
228
} # cdrake
229
230
231
242
2438
my $obj = bless { %entry, %args }, $class;
232
233
### magic is a filetype string.. it should have something like 'ustar' or
234
### something similar... if the chunk is garbage, skip it
235
242
50
3848
return unless $obj->magic !~ /\W/;
236
237
### store the original chunk ###
238
242
721
$obj->raw( $chunk );
239
240
242
50
33
551
$obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) );
241
242
50
66
621
$obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) );
242
243
244
242
1389
return $obj;
245
246
}
247
248
sub _new_from_file {
249
22
22
100
my $class = shift;
250
22
55
my $path = shift;
251
252
### path has to at least exist
253
22
50
82
return unless defined $path;
254
255
22
106
my $type = __PACKAGE__->_filetype($path);
256
22
80
my $data = '';
257
258
READ: {
259
22
100
38
unless ($type == DIR ) {
22
105
260
21
215
my $fh = IO::File->new;
261
262
21
50
1046
unless( $fh->open($path, 'r') ) {
263
### dangling symlinks are fine, stop reading but continue
264
### creating the object
265
0
0
0
last READ if $type == SYMLINK;
266
267
### otherwise, return from this function --
268
### anything that's *not* a symlink should be
269
### resolvable
270
0
0
return;
271
}
272
273
### binmode needed to read files properly on win32 ###
274
21
2158
binmode $fh;
275
21
48
$data = do { local $/; <$fh> };
21
103
21
4509
276
21
368
close $fh;
277
}
278
}
279
280
22
220
my @items = qw[mode uid gid size mtime];
281
22
383
my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9];
110
443
282
283
22
62
if (ON_VMS) {
284
### VMS has two UID modes, traditional and POSIX. Normally POSIX is
285
### not used. We currently do not have an easy way to see if we are in
286
### POSIX mode. In traditional mode, the UID is actually the VMS UIC.
287
### The VMS UIC has the upper 16 bits is the GID, which in many cases
288
### the VMS UIC will be larger than 209715, the largest that TAR can
289
### handle. So for now, assume it is traditional if the UID is larger
290
### than 0x10000.
291
292
if ($hash{uid} > 0x10000) {
293
$hash{uid} = $hash{uid} & 0xFFFF;
294
}
295
296
### The file length from stat() is the physical length of the file
297
### However the amount of data read in may be more for some file types.
298
### Fixed length files are read past the logical EOF to end of the block
299
### containing. Other file types get expanded on read because record
300
### delimiters are added.
301
302
my $data_len = length $data;
303
$hash{size} = $data_len if $hash{size} < $data_len;
304
305
}
306
### you *must* set size == 0 on symlinks, or the next entry will be
307
### though of as the contents of the symlink, which is wrong.
308
### this fixes bug #7937
309
22
100
66
299
$hash{size} = 0 if ($type == DIR or $type == SYMLINK);
310
22
91
$hash{mtime} -= TIME_OFFSET;
311
312
### strip the high bits off the mode, which we don't need to store
313
22
216
$hash{mode} = STRIP_MODE->( $hash{mode} );
314
315
316
### probably requires some file path munging here ... ###
317
### name and prefix are set later
318
my $obj = {
319
%hash,
320
name => '',
321
chksum => CHECK_SUM,
322
type => $type,
323
linkname => ($type == SYMLINK and CAN_READLINK)
324
? readlink $path
325
: '',
326
magic => MAGIC,
327
version => TAR_VERSION,
328
uname => UNAME->( $hash{uid} ),
329
22
50
50
254
gname => GNAME->( $hash{gid} ),
330
devmajor => 0, # not handled
331
devminor => 0, # not handled
332
prefix => '',
333
data => $data,
334
};
335
336
22
227
bless $obj, $class;
337
338
### fix up the prefix and file from the path
339
22
110
my($prefix,$file) = $obj->_prefix_and_file( $path );
340
22
102
$obj->prefix( $prefix );
341
22
83
$obj->name( $file );
342
343
22
86
return $obj;
344
}
345
346
sub _new_from_data {
347
29
29
48
my $class = shift;
348
29
50
50
my $path = shift; return unless defined $path;
29
72
349
29
100
43
my $data = shift; return unless defined $data;
29
61
350
28
40
my $opt = shift;
351
352
28
158
my $obj = {
353
data => $data,
354
name => '',
355
mode => MODE,
356
uid => UID,
357
gid => GID,
358
size => length $data,
359
mtime => time - TIME_OFFSET,
360
chksum => CHECK_SUM,
361
type => FILE,
362
linkname => '',
363
magic => MAGIC,
364
version => TAR_VERSION,
365
uname => UNAME->( UID ),
366
gname => GNAME->( GID ),
367
devminor => 0,
368
devmajor => 0,
369
prefix => '',
370
};
371
372
### overwrite with user options, if provided ###
373
28
100
66
202
if( $opt and ref $opt eq 'HASH' ) {
374
16
76
for my $key ( keys %$opt ) {
375
376
### don't write bogus options ###
377
25
50
52
next unless exists $obj->{$key};
378
25
65
$obj->{$key} = $opt->{$key};
379
}
380
}
381
382
28
68
bless $obj, $class;
383
384
### fix up the prefix and file from the path
385
28
77
my($prefix,$file) = $obj->_prefix_and_file( $path );
386
28
94
$obj->prefix( $prefix );
387
28
65
$obj->name( $file );
388
389
28
55
return $obj;
390
}
391
392
sub _prefix_and_file {
393
132
132
256
my $self = shift;
394
132
265
my $path = shift;
395
396
132
359
my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir );
397
132
1347
my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) );
398
399
### if it's a directory, then $file might be empty
400
132
100
66
305
$file = pop @dirs if $self->is_dir and not length $file;
401
402
### splitting ../ gives you the relative path in native syntax
403
### Remove the root (000000) directory
404
### The volume from splitpath will also be in native syntax
405
132
269
if (ON_VMS) {
406
map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs;
407
if (length($vol)) {
408
$vol = VMS::Filespec::unixify($vol);
409
unshift @dirs, $vol;
410
}
411
}
412
413
132
764
my $prefix = File::Spec::Unix->catdir(@dirs);
414
132
718
return( $prefix, $file );
415
}
416
417
sub _filetype {
418
22
22
44
my $self = shift;
419
22
66
my $file = shift;
420
421
22
50
89
return unless defined $file;
422
423
22
50
446
return SYMLINK if (-l $file); # Symlink
424
425
22
100
153
return FILE if (-f _); # Plain file
426
427
1
50
5
return DIR if (-d _); # Directory
428
429
0
0
0
return FIFO if (-p _); # Named pipe
430
431
0
0
0
return SOCKET if (-S _); # Socket
432
433
0
0
0
return BLOCKDEV if (-b _); # Block special
434
435
0
0
0
return CHARDEV if (-c _); # Character special
436
437
### shouldn't happen, this is when making archives, not reading ###
438
0
0
0
return LONGLINK if ( $file eq LONGLINK_NAME );
439
440
0
0
return UNKNOWN; # Something else (like what?)
441
442
}
443
444
### this method 'downgrades' a file to plain file -- this is used for
445
### symlinks when FOLLOW_SYMLINKS is true.
446
sub _downgrade_to_plainfile {
447
1
1
2
my $entry = shift;
448
1
2
$entry->type( FILE );
449
1
4
$entry->mode( MODE );
450
1
3
$entry->linkname('');
451
452
1
2
return 1;
453
}
454
455
=head2 $bool = $file->extract( [ $alternative_name ] )
456
457
Extract this object, optionally to an alternative name.
458
459
See C<< Archive::Tar->extract_file >> for details.
460
461
Returns true on success and false on failure.
462
463
=cut
464
465
sub extract {
466
3
3
1
14
my $self = shift;
467
468
3
12
local $Carp::CarpLevel += 1;
469
470
### avoid circular use, so only require;
471
3
38
require Archive::Tar;
472
3
18
return Archive::Tar->_extract_file( $self, @_ );
473
}
474
475
=head2 $path = $file->full_path
476
477
Returns the full path from the tar header; this is basically a
478
concatenation of the C and C fields.
479
480
=cut
481
482
sub full_path {
483
1373
1373
1
354866
my $self = shift;
484
485
### if prefix field is empty
486
1373
100
66
3645
return $self->name unless defined $self->prefix and length $self->prefix;
487
488
### or otherwise, catfile'd
489
206
533
my $path = File::Spec::Unix->catfile( $self->prefix, $self->name );
490
206
50
468
$path .= "/" if $self->name =~ m{/$}; # Re-add trailing slash if necessary, as catfile() strips them off.
491
206
1313
return $path;
492
}
493
494
495
=head2 $bool = $file->validate
496
497
Done by Archive::Tar internally when reading the tar file:
498
validate the header against the checksum to ensure integer tar file.
499
500
Returns true on success, false on failure
501
502
=cut
503
504
sub validate {
505
185
185
1
670
my $self = shift;
506
507
185
433
my $raw = $self->raw;
508
509
### don't know why this one is different from the one we /write/ ###
510
185
406
substr ($raw, 148, 8) = " ";
511
512
### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar
513
### like GNU tar does. See here for details:
514
### http://www.gnu.org/software/tar/manual/tar.html#SEC139
515
### so we do both a signed AND unsigned validate. if one succeeds, that's
516
### good enough
517
185
50
66
1012
return ( (unpack ("%16C*", $raw) == $self->chksum)
518
or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0;
519
}
520
521
=head2 $bool = $file->has_content
522
523
Returns a boolean to indicate whether the current object has content.
524
Some special files like directories and so on never will have any
525
content. This method is mainly to make sure you don't get warnings
526
for using uninitialized values when looking at an object's content.
527
528
=cut
529
530
sub has_content {
531
87
87
1
182
my $self = shift;
532
87
100
100
236
return defined $self->data() && length $self->data() ? 1 : 0;
533
}
534
535
=head2 $content = $file->get_content
536
537
Returns the current content for the in-memory file
538
539
=cut
540
541
sub get_content {
542
87
87
1
5743
my $self = shift;
543
87
227
$self->data( );
544
}
545
546
=head2 $cref = $file->get_content_by_ref
547
548
Returns the current content for the in-memory file as a scalar
549
reference. Normal users won't need this, but it will save memory if
550
you are dealing with very large data files in your tar archive, since
551
it will pass the contents by reference, rather than make a copy of it
552
first.
553
554
=cut
555
556
sub get_content_by_ref {
557
423
423
1
662
my $self = shift;
558
559
423
4339
return \$self->{data};
560
}
561
562
=head2 $bool = $file->replace_content( $content )
563
564
Replace the current content of the file with the new content. This
565
only affects the in-memory archive, not the on-disk version until
566
you write it.
567
568
Returns true on success, false on failure.
569
570
=cut
571
572
sub replace_content {
573
11
11
1
2653
my $self = shift;
574
11
100
35
my $data = shift || '';
575
576
11
24
$self->data( $data );
577
11
28
$self->size( length $data );
578
11
52
return 1;
579
}
580
581
=head2 $bool = $file->rename( $new_name )
582
583
Rename the current file to $new_name.
584
585
Note that you must specify a Unix path for $new_name, since per tar
586
standard, all files in the archive must be Unix paths.
587
588
Returns true on success and false on failure.
589
590
=cut
591
592
sub rename {
593
12
12
1
21
my $self = shift;
594
12
17
my $path = shift;
595
596
12
50
29
return unless defined $path;
597
598
12
27
my ($prefix,$file) = $self->_prefix_and_file( $path );
599
600
12
29
$self->name( $file );
601
12
25
$self->prefix( $prefix );
602
603
12
38
return 1;
604
}
605
606
=head2 $bool = $file->chmod( $mode )
607
608
Change mode of $file to $mode. The mode can be a string or a number
609
which is interpreted as octal whether or not a leading 0 is given.
610
611
Returns true on success and false on failure.
612
613
=cut
614
615
sub chmod {
616
1
1
1
3
my $self = shift;
617
1
50
33
2
my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1
10
618
1
4
$self->{mode} = oct($mode);
619
1
3
return 1;
620
}
621
622
=head2 $bool = $file->chown( $user [, $group])
623
624
Change owner of $file to $user. If a $group is given that is changed
625
as well. You can also pass a single parameter with a colon separating the
626
use and group as in 'root:wheel'.
627
628
Returns true on success and false on failure.
629
630
=cut
631
632
sub chown {
633
12
12
1
18
my $self = shift;
634
12
17
my $uname = shift;
635
12
50
40
return unless defined $uname;
636
12
16
my $gname;
637
12
50
24
if (-1 != index($uname, ':')) {
638
0
0
($uname, $gname) = split(/:/, $uname);
639
} else {
640
12
100
28
$gname = shift if @_ > 0;
641
}
642
643
12
29
$self->uname( $uname );
644
12
100
28
$self->gname( $gname ) if $gname;
645
12
100
return 1;
646
}
647
648
=head1 Convenience methods
649
650
To quickly check the type of a C object, you can
651
use the following methods:
652
653
=over 4
654
655
=item $file->is_file
656
657
Returns true if the file is of type C
658
659
=item $file->is_dir
660
661
Returns true if the file is of type C
662
663
=item $file->is_hardlink
664
665
Returns true if the file is of type C
666
667
=item $file->is_symlink
668
669
Returns true if the file is of type C
670
671
=item $file->is_chardev
672
673
Returns true if the file is of type C
674
675
=item $file->is_blockdev
676
677
Returns true if the file is of type C
678
679
=item $file->is_fifo
680
681
Returns true if the file is of type C
682
683
=item $file->is_socket
684
685
Returns true if the file is of type C
686
687
=item $file->is_longlink
688
689
Returns true if the file is of type C.
690
Should not happen after a successful C.
691
692
=item $file->is_label
693
694
Returns true if the file is of type C.
695
Should not happen after a successful C.
696
697
=item $file->is_unknown
698
699
Returns true if the file type is C
700
701
=back
702
703
=cut
704
705
#stupid perl5.5.3 needs to warn if it's not numeric
706
1100
1100
1
18403
sub is_file { local $^W; FILE == $_[0]->type }
1100
1987
707
959
959
1
6149
sub is_dir { local $^W; DIR == $_[0]->type }
959
4663
708
784
784
1
3590
sub is_hardlink { local $^W; HARDLINK == $_[0]->type }
784
1809
709
176
176
1
1569
sub is_symlink { local $^W; SYMLINK == $_[0]->type }
176
374
710
4
4
1
1045
sub is_chardev { local $^W; CHARDEV == $_[0]->type }
4
8
711
4
4
1
1043
sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type }
4
11
712
4
4
1
951
sub is_fifo { local $^W; FIFO == $_[0]->type }
4
10
713
4
4
1
911
sub is_socket { local $^W; SOCKET == $_[0]->type }
4
9
714
279
279
1
1437
sub is_unknown { local $^W; UNKNOWN == $_[0]->type }
279
638
715
518
518
1
1815
sub is_longlink { local $^W; LONGLINK eq $_[0]->type }
518
1029
716
255
255
1
1299
sub is_label { local $^W; LABEL eq $_[0]->type }
255
529
717
718
1;