line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# (C) 2010-2014, jnw@cpan.org, all rights reserved. |
3
|
|
|
|
|
|
|
# Distribute under the same license as Perl itself. |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# sudo zypper -v in perl-Compress-Raw-Zlib |
7
|
|
|
|
|
|
|
# -> 'nothing to do' |
8
|
|
|
|
|
|
|
# sudo zypper -v in 'perl-Compress-Raw-Zlib >= 2.027' |
9
|
|
|
|
|
|
|
# -> 'perl' providing 'perl-Compress-Raw-Zlib >= 2.027' is already installed. |
10
|
|
|
|
|
|
|
# sudo zypper -v in --force perl-Compress-Raw-Zlib |
11
|
|
|
|
|
|
|
# -> works, |
12
|
|
|
|
|
|
|
# sudo zypper -v in --from 12 perl-Compress-Raw-Zlib |
13
|
|
|
|
|
|
|
# -> works, if d.l.p is repo #12. |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# TODO: |
16
|
|
|
|
|
|
|
# * evaluate File::Extract - Extract Text From Arbitrary File Types |
17
|
|
|
|
|
|
|
# (HTML, PDF, Plain, RTF, Excel) |
18
|
|
|
|
|
|
|
# |
19
|
|
|
|
|
|
|
# * make taint checks really check things, instead of $1 if m{^(.*)$}; |
20
|
|
|
|
|
|
|
# |
21
|
|
|
|
|
|
|
# * Implement disk space monitoring. |
22
|
|
|
|
|
|
|
# |
23
|
|
|
|
|
|
|
# * formats: |
24
|
|
|
|
|
|
|
# - use lzmadec/xzdec as fallback to lzcat. |
25
|
|
|
|
|
|
|
# - glest has bzipped tar files named glest-1.0.10-data.tar.bz2.tar; |
26
|
|
|
|
|
|
|
# - Not all suffixes are appended by . e.g. openh323-v1_15_2-src-tar.bz2 is different. |
27
|
|
|
|
|
|
|
# - gzip -dc can unpack old compress .Z, add its mime-type |
28
|
|
|
|
|
|
|
# - java-1_5_0-sun hides zip-files in shell scripts with suffix .bin |
29
|
|
|
|
|
|
|
# - cpio fails on \.delta\.rpm |
30
|
|
|
|
|
|
|
# - rpm files should extract all header info in readable format. |
31
|
|
|
|
|
|
|
# - do we rely on rpm2cpio to handle them all: |
32
|
|
|
|
|
|
|
# rpm -qp --nodigest --nosignature --qf "%{PAYLOADCOMPRESSOR}" $f |
33
|
|
|
|
|
|
|
# - m{\.(otf|ttf|ps|eps)$}i |
34
|
|
|
|
|
|
|
# - application/x-frame # xorg-modular/doc/xorg-docs/specs/XPRINT/xp_libraryTOC.doc |
35
|
|
|
|
|
|
|
# |
36
|
|
|
|
|
|
|
# * blacklisting? |
37
|
|
|
|
|
|
|
# # th_en_US.dat is an 11MB thesaurus in OOo |
38
|
|
|
|
|
|
|
# skip if $from =~ m{(/(ustar|pax)\-big\-\d+g\.tar\.bz2|/th_en_US\.dat|/testtar\.tar|\.html\.(ru|ja|ko\.euc-kr|fr|es|cz))$} |
39
|
|
|
|
|
|
|
# |
40
|
|
|
|
|
|
|
# * use LWP::Simple::getstore() if $archive =~ m{^\w+://} |
41
|
|
|
|
|
|
|
# * application/x-debian-package is a 'application/x-archive' -> (ar xv /dev/stdin) < $qufrom"; |
42
|
|
|
|
|
|
|
# * application/x-iso9660 -> "isoinfo -d -i %(src)s" |
43
|
|
|
|
|
|
|
# * PDF improvement: okular says: 'this document contains embedded files.' How can we grab those? |
44
|
|
|
|
|
|
|
|
45
|
1
|
|
|
1
|
|
30233
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
35
|
|
46
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
66
|
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
package File::Unpack; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
BEGIN |
51
|
|
|
|
|
|
|
{ |
52
|
|
|
|
|
|
|
# Requires: shared-mime-info |
53
|
1
|
|
|
1
|
|
65
|
eval 'use File::LibMagic;'; # only needed in mime(); mime() dies, if missing |
|
1
|
|
|
1
|
|
474
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
54
|
1
|
|
|
1
|
|
54
|
eval 'use File::MimeInfo::Magic;'; # only needed in mime(); okay, if missing. |
|
1
|
|
|
|
|
6685
|
|
|
1
|
|
|
|
|
10787
|
|
|
1
|
|
|
|
|
65
|
|
55
|
|
|
|
|
|
|
# unless builtin! |
56
|
1
|
|
|
1
|
|
68
|
eval 'use Compress::Raw::Lzma;'; # only needed in mime(); for finding lzma. |
|
1
|
|
|
|
|
562
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
57
|
1
|
|
|
1
|
|
61
|
eval 'use Compress::Raw::Bzip2;'; # only needed in mime(); for finding second level types |
|
1
|
|
|
|
|
1152
|
|
|
1
|
|
|
|
|
1869
|
|
|
1
|
|
|
|
|
168
|
|
58
|
1
|
|
|
1
|
|
62
|
eval 'use Compress::Raw::Zlib;'; # only needed in mime(); for finding second level types |
|
1
|
|
|
|
|
1215
|
|
|
1
|
|
|
|
|
6458
|
|
|
1
|
|
|
|
|
312
|
|
59
|
1
|
|
|
1
|
|
69
|
eval 'use BSD::Resource;'; # setrlimit |
|
1
|
|
|
|
|
1006
|
|
|
1
|
|
|
|
|
17702
|
|
|
1
|
|
|
|
|
7
|
|
60
|
1
|
|
|
1
|
|
381
|
eval 'use Filesys::Statvfs;'; # statvfs(); |
|
1
|
|
|
|
|
955
|
|
|
1
|
|
|
|
|
731
|
|
|
1
|
|
|
|
|
68
|
|
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
65
|
|
64
|
1
|
|
|
1
|
|
7
|
use File::Path; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
50
|
|
65
|
1
|
|
|
1
|
|
1287
|
use File::Temp (); # tempdir() in _run_mime_helper. |
|
1
|
|
|
|
|
28265
|
|
|
1
|
|
|
|
|
26
|
|
66
|
1
|
|
|
1
|
|
1041
|
use File::Copy (); |
|
1
|
|
|
|
|
12507
|
|
|
1
|
|
|
|
|
30
|
|
67
|
1
|
|
|
1
|
|
1779
|
use File::Compare (); |
|
1
|
|
|
|
|
1795
|
|
|
1
|
|
|
|
|
22
|
|
68
|
1
|
|
|
1
|
|
1573
|
use JSON; |
|
1
|
|
|
|
|
27428
|
|
|
1
|
|
|
|
|
7
|
|
69
|
1
|
|
|
1
|
|
1280
|
use String::ShellQuote; # used in _prep_configdir |
|
1
|
|
|
|
|
1043
|
|
|
1
|
|
|
|
|
127
|
|
70
|
1
|
|
|
1
|
|
503
|
use IPC::Run; # implements File::Unpack::run() |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
use Text::Sprintf::Named; # used to parse @builtin_mime_helpers |
72
|
|
|
|
|
|
|
use Cwd 'getcwd'; # run(), moves us there and back. |
73
|
|
|
|
|
|
|
use Data::Dumper; |
74
|
|
|
|
|
|
|
use POSIX (); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 NAME |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
File::Unpack - A strong bz2/gz/zip/tar/cpio/rpm/deb/cab/lzma/7z/rar/... archive unpacker, based on mime-types |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 VERSION |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Version 0.69 |
83
|
|
|
|
|
|
|
=cut |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# We'll have 1.x versions only after minfree() has a baseline implementation. |
86
|
|
|
|
|
|
|
# Please run perl Makefile.PL after changing the version here. |
87
|
|
|
|
|
|
|
our $VERSION = '0.69'; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
POSIX::setlocale(&POSIX::LC_ALL, 'C'); |
90
|
|
|
|
|
|
|
$ENV{PATH} = '/usr/bin:/bin'; |
91
|
|
|
|
|
|
|
$ENV{SHELL} = '/bin/sh'; |
92
|
|
|
|
|
|
|
delete $ENV{ENV}; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# what we name the temporary directories, while helpers are working. |
95
|
|
|
|
|
|
|
my $TMPDIR_TEMPL = '_fu_XXXXX'; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# no longer used by the tick-tick ticker to show where we are. |
98
|
|
|
|
|
|
|
# my $lsof = '/usr/bin/lsof'; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Compress::Raw::Bunzip2 needs several 100k of input data, we special case this. |
101
|
|
|
|
|
|
|
# File::LibMagic wants to read ca. 70k of input data, before it says application/vnd.ms-excel |
102
|
|
|
|
|
|
|
# Anything else works with 1024. |
103
|
|
|
|
|
|
|
my $UNCOMP_BUFSZ = 1024; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# unpack will give up, after unpacking that many levels. It is more likely we |
106
|
|
|
|
|
|
|
# got into a loop by then, than really have that many levels. |
107
|
|
|
|
|
|
|
my $RECURSION_LIMIT = 200; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Suggested place, where admins should install the helpers bundled with this module. |
110
|
|
|
|
|
|
|
sub _default_helper_dir { $ENV{FILE_UNPACK_HELPER_DIR}||'/usr/share/File-Unpack/helper' } |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# we use '=' in the mime_name, this expands to '/(x\-|ANY\+)?' |
113
|
|
|
|
|
|
|
## |
114
|
|
|
|
|
|
|
## Caution: always use (?: ... ) below for grouping, so that no extra capturing clauses are created. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my @builtin_mime_helpers = ( |
117
|
|
|
|
|
|
|
# mimetype pattern # suffix_re # command with redirects, as defined with IPC::Run::run |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# Requires: xz bzip2 gzip unzip lzip |
120
|
|
|
|
|
|
|
[ 'application=x-lzip', qr{(?:lz)}, [qw(/usr/bin/lzip -dc %(src)s)], qw(> %(destfile)s) ], |
121
|
|
|
|
|
|
|
[ 'application=xz', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/lzcat)], qw(< %(src)s > %(destfile)s) ], |
122
|
|
|
|
|
|
|
[ 'application=xz', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/xz -dc %(src)s)], qw(> %(destfile)s) ], |
123
|
|
|
|
|
|
|
[ 'application=lzma', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/lzcat)], qw(< %(src)s > %(destfile)s) ], |
124
|
|
|
|
|
|
|
[ 'application=lzma', qr{(?:xz|lz(ma)?)}, [qw(/usr/bin/xz -dc %(src)s)], qw(> %(destfile)s) ], |
125
|
|
|
|
|
|
|
[ 'application=bzip2', qr{bz2}, [qw(/usr/bin/bunzip2 -dc -f %(src)s)], qw(> %(destfile)s) ], |
126
|
|
|
|
|
|
|
[ 'application=gzip', qr{(?:gz|Z)}, [qw(/usr/bin/gzip -dc -f %(src)s)], qw(> %(destfile)s) ], |
127
|
|
|
|
|
|
|
[ 'application=compress', qr{(?:gz|Z)}, [qw(/usr/bin/gzip -dc -f %(src)s)], qw(> %(destfile)s) ], |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Requires: sharutils |
130
|
|
|
|
|
|
|
[ 'text=uuencode', qr{uu}, [qw(/usr/bin/uudecode -o %(destfile)s %(src)s)] ], |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# Requires: upx |
133
|
|
|
|
|
|
|
[ 'application=upx', qr{(?:upx\.exe|upx)}, [qw(/usr/bin/upx -q -q -q -d -o%(destfile)s %(lsrc)s) ] ], |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# xml.summary.Mono.Security.Authenticode is twice inside of monodoc-1.0.4.tar.gz/Mono.zip/ -> use -o |
136
|
|
|
|
|
|
|
[ 'application=zip', qr{(?:zip|jar|sar)}, [qw(/usr/bin/unzip -P no_pw -q -o %(src)s)] ], |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Requires: unrar |
139
|
|
|
|
|
|
|
[ 'application=rar', qr{rar}, [qw(/usr/bin/unrar x -o- -p- -inul -kb -y %(src)s)] ], |
140
|
|
|
|
|
|
|
# Requires: lha |
141
|
|
|
|
|
|
|
[ 'application=x-lha', qr{lha}, [qw(/usr/bin/lha x -q %(src)s)] ], |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# Requires: binutils |
144
|
|
|
|
|
|
|
[ 'application=archive', qr{(?:a|ar|deb)}, [qw(/usr/bin/ar x %(src)s)] ], |
145
|
|
|
|
|
|
|
[ 'application=x-deb', qr{deb}, [qw(/usr/bin/ar x %(src)s)] ], |
146
|
|
|
|
|
|
|
[ 'application=x-debian-package', qr{deb}, [qw(/usr/bin/ar x %(src)s)] ], |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Requires: cabextract |
149
|
|
|
|
|
|
|
[ 'application/vnd.ms-cab-compressed', qr{cab}, [qw(/usr/bin/cabextract -q %(src)s)] ], |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# Requires: p7zip |
152
|
|
|
|
|
|
|
[ 'application/x-7z-compressed', qr{7z}, [qw(/usr/bin/7z x -pPass -y %(src)s)] ], |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# Requires: tar rpm cpio |
155
|
|
|
|
|
|
|
[ 'application=tar', qr{(?:tar|gem)}, [\&_locate_tar, qw(-xf %(src)s)] ], |
156
|
|
|
|
|
|
|
[ 'application=tar+bzip2', qr{(?:tar\.bz2|tbz)}, [\&_locate_tar, qw(-jxf %(src)s)] ], |
157
|
|
|
|
|
|
|
[ 'application=tar+gzip', qr{t(?:ar\.gz|gz)}, [\&_locate_tar, qw(-zxf %(src)s)] ], |
158
|
|
|
|
|
|
|
# [ 'application=tar+gzip', qr{t(?:ar\.gz|gz)}, [qw(/home/testy/src/C/slowcat)], qw(< %(src)s |), [\&_locate_tar, qw(-zxf -)] ], |
159
|
|
|
|
|
|
|
[ 'application=tar+lzma', qr{tar\.(?:xz|lzma|lz)}, [qw(/usr/bin/lzcat)], qw(< %(src)s |), [\&_locate_tar, qw(-xf -)] ], |
160
|
|
|
|
|
|
|
[ 'application=tar+lzma', qr{tar\.(?:xz|lzma|lz)}, [qw(/usr/bin/xz -dc -f %(src)s)], '|', [\&_locate_tar, qw(-xf -)] ], |
161
|
|
|
|
|
|
|
[ 'application=rpm', qr{(?:src\.r|s|r)pm}, [qw(/usr/bin/rpm2cpio %(src)s)], '|', [\&_locate_cpio_i] ], |
162
|
|
|
|
|
|
|
[ 'application=cpio', qr{cpio}, [\&_locate_cpio_i], qw(< %(src)s) ], |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Requires: poppler-tools |
165
|
|
|
|
|
|
|
[ 'application=pdf', qr{pdf}, [qw(/usr/bin/pdftotext %(src)s %(destfile)s.txt)], '&', [qw(/usr/bin/pdfimages -j %(src)s pdfimages)] ], |
166
|
|
|
|
|
|
|
); |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
## CAUTION keep _my_shell_quote in sync with all _locate_* functions. |
169
|
|
|
|
|
|
|
sub _locate_tar |
170
|
|
|
|
|
|
|
{ |
171
|
|
|
|
|
|
|
my $self = shift; |
172
|
|
|
|
|
|
|
return @{$self->{_locate_tar}} if defined $self->{_locate_tar}; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# cannot use tar -C %(destdir)s, we rely on being chdir'ed inside already :-) |
175
|
|
|
|
|
|
|
# E: /bin/tar: /tmp/xxx/_VASn/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_/_: Cannot chdir: Permission denied |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my @tar = (-f '/bin/tar' ? '/bin/tar' : '/usr/bin/tar' ); |
178
|
|
|
|
|
|
|
## osc co loves to create directories with : in them. |
179
|
|
|
|
|
|
|
## Tell tar to accept such directories as directores. |
180
|
|
|
|
|
|
|
push @tar, "--force-local" |
181
|
|
|
|
|
|
|
unless $self->run([@tar, "--force-local", "--help"], { out_err => '/dev/null' }); |
182
|
|
|
|
|
|
|
push @tar, "--no-unquote" |
183
|
|
|
|
|
|
|
unless $self->run([@tar, "--no-unquote", "--help"], { out_err => '/dev/null'}); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
$self->{_locate_tar} = \@tar; |
186
|
|
|
|
|
|
|
return @tar; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub _locate_cpio_i |
190
|
|
|
|
|
|
|
{ |
191
|
|
|
|
|
|
|
my $self = shift; |
192
|
|
|
|
|
|
|
return @{$self->{_locate_cpio_i}} if defined $self->{_locate_cpio_i}; |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
my @cpio_i = ('/usr/bin/cpio', '-idm'); |
195
|
|
|
|
|
|
|
$cpio_i[1] .= 'u' |
196
|
|
|
|
|
|
|
unless run(['/usr/bin/cpio', '-idmu', '--usage'], {out_err => '/dev/null'}); |
197
|
|
|
|
|
|
|
push @cpio_i, '--sparse' |
198
|
|
|
|
|
|
|
unless run([@cpio_i, '--sparse', '--usage'], {out_err => '/dev/null'}); |
199
|
|
|
|
|
|
|
push @cpio_i, '--no-absolute-filenames' |
200
|
|
|
|
|
|
|
unless run([@cpio_i, '--no-absolute-filenames', '--usage'], {out_err => '/dev/null'}); |
201
|
|
|
|
|
|
|
push @cpio_i, '--force-local' |
202
|
|
|
|
|
|
|
unless run([@cpio_i, '--force-local', '--usage'], {out_err => '/dev/null'}); |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
@{$self->{_locate_cpio_i}} = \@cpio_i; |
205
|
|
|
|
|
|
|
return @cpio_i; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=head1 SYNOPSIS |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
This perl module comes with an executable script: |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
/usr/bin/file_unpack -h |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
/usr/bin/file_unpack [-1] [-m] ARCHIVE_FILE ... |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
File::Unpack is an unpacker for archives and files |
218
|
|
|
|
|
|
|
(bz2/gz/zip/tar/cpio/iso/rpm/deb/cab/lzma/7z/rar ... pdf/odf) based on |
219
|
|
|
|
|
|
|
MIME types. We call it strong, because it is not fooled by file suffixes, or |
220
|
|
|
|
|
|
|
multiply wrapped packages. It recursively descends into each archive found |
221
|
|
|
|
|
|
|
until it finally exposes all unpackable payload contents. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
A logfile can be written, precisely describing MIME types and unpack actions. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
use File::Unpack; |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
my $log; |
228
|
|
|
|
|
|
|
my $u = File::Unpack->new(logfile => \$log); |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
my $m = $u->mime('/etc/init.d/rc'); |
231
|
|
|
|
|
|
|
print "$m->[0]; charset=$m->[1]\n"; |
232
|
|
|
|
|
|
|
# text/x-shellscript; charset=us-ascii |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
map { print "$_->{name}\n" } @{$u->mime_helper()}; |
235
|
|
|
|
|
|
|
# application/%rpm |
236
|
|
|
|
|
|
|
# application/%tar+gzip |
237
|
|
|
|
|
|
|
# application/%tar+bzip2 |
238
|
|
|
|
|
|
|
# ... |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
$u->unpack("inputfile.tar.bz2"); |
241
|
|
|
|
|
|
|
while ($log =~ m{^\s*"(.*?)":}g) # it's JSON. |
242
|
|
|
|
|
|
|
{ |
243
|
|
|
|
|
|
|
print "$1\n"; # report all files unpacked |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
... |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
Most of the known archive file formats are supported. Shell-script-style |
249
|
|
|
|
|
|
|
plugins can be added to support additinal formats. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Helper shell-scripts can be added to support additional mime-types. Example: |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
F<< $ echo "ar x $1" > /usr/share/File-Unpack/helper/application=x-debian-package >> |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
F<< $ chmod a+x /usr/share/File-Unpack/helper/application=x-debian-package >> |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
This example creates a trivial external equivalent of the builtin MIME helper for *.deb packages. |
258
|
|
|
|
|
|
|
For details see the documentation of the C method. |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
C examines the contents of an archive file or directory using an extensive |
261
|
|
|
|
|
|
|
mime-type analysis. The contents is unpacked recursively to the given destination |
262
|
|
|
|
|
|
|
directory; a listing of the unpacked files is reported through the built in |
263
|
|
|
|
|
|
|
logging facility during unpacking. Most common archive file formats are handled |
264
|
|
|
|
|
|
|
directly; more can easily be added as mime-type helper plugins. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head2 new |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
my $u = new(destdir => '.', logfile => \*STDOUT, maxfilesize => '2G', verbose => 1, |
271
|
|
|
|
|
|
|
world_readable => 0, one_shot => 0, no_op => 0, archive_name_as_dir => 0, |
272
|
|
|
|
|
|
|
follow_file_symlinks => 0, |
273
|
|
|
|
|
|
|
log_params => {}, log_type => 'JSON'); |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Creates an unpacker instance. The parameter C must be a writable location; all output |
276
|
|
|
|
|
|
|
files and directories are placed inside this destdir. Subdirectories will be |
277
|
|
|
|
|
|
|
created in an attempt to reflect the structure of the input. Destdir defaults |
278
|
|
|
|
|
|
|
to the current directory; relative paths are resolved immediatly, so that |
279
|
|
|
|
|
|
|
chdir() after calling new is harmless. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
The parameter C can be a reference to a scalar, a filename, or a filedescriptor. |
282
|
|
|
|
|
|
|
The logfile starts with a JSON formatted prolog, where all lines start |
283
|
|
|
|
|
|
|
with printable characters. |
284
|
|
|
|
|
|
|
For each file unpacked, a one line record is appended, starting with a single |
285
|
|
|
|
|
|
|
whitespace ' ', and terminated by "\n". The format is a JSON-encoded C<< "key": |
286
|
|
|
|
|
|
|
{value},\n >> pair, where key is the filename, and value is a hash including 'mime', |
287
|
|
|
|
|
|
|
'size', and other information. |
288
|
|
|
|
|
|
|
The logfile is terminated by an epilog, where each line starts with a printable character. |
289
|
|
|
|
|
|
|
As part of the epilog, a dummy file named "\" with an empty hash is added to the list. |
290
|
|
|
|
|
|
|
It should be ignored while parsing. |
291
|
|
|
|
|
|
|
Per default, the logfile is sent to STDOUT. |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
The parameter C is a safeguard against compressed sparse files and |
294
|
|
|
|
|
|
|
test-files for archivers. Such files could easily fill up any available disk |
295
|
|
|
|
|
|
|
space when unpacked. Files hitting this limit will be silently truncated. |
296
|
|
|
|
|
|
|
Check the logfile records or epilog to see if this has happened. BSD::Resource |
297
|
|
|
|
|
|
|
is used manipulate RLIMIT_FSIZE. |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
The parameter C can optionally be set to non-zero, to limit unpacking |
300
|
|
|
|
|
|
|
to one step of unpacking. Unpacking of well known compressed archives like |
301
|
|
|
|
|
|
|
e.g. '.tar.bz2' is considered one step only. If uncompressing an archive is |
302
|
|
|
|
|
|
|
considered an extra step before unpacking the archive depends on the configured |
303
|
|
|
|
|
|
|
mime helpers. |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
The parameter C causes unpack() to only print one shell command to |
306
|
|
|
|
|
|
|
STDOUT and exit. This implies one_shot=1. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
The parameter C causes unpack() change all directories to 0755, |
309
|
|
|
|
|
|
|
and all files to 444. Otherwise 0700 and 0400 (user readable) is asserted. |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
The parameter C causes some or all symlinks to files |
312
|
|
|
|
|
|
|
to be included. |
313
|
|
|
|
|
|
|
A value of 1 follows symlinks that exist in the input directory and point to a file. |
314
|
|
|
|
|
|
|
This has no effect if the input is an archive file. A value of 2 also follows symlinks |
315
|
|
|
|
|
|
|
that were extracted from archives. CAUTION: This may cause unpack() to visit |
316
|
|
|
|
|
|
|
files or archives elsewhere in the local filesystem. |
317
|
|
|
|
|
|
|
Directory symlinks are always excluded. |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
The parameter C causes the unpacker to store all unpacked |
320
|
|
|
|
|
|
|
files inside a directory with the same name as their archive. |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
The default depends on how many files are unpacked from the archive: If exactly one |
323
|
|
|
|
|
|
|
file (or one toplevel directory) is unpacked, then no extra directory is used. |
324
|
|
|
|
|
|
|
E.g. F would unpack to F or |
325
|
|
|
|
|
|
|
F would unpack to F and no files outside this directory. |
326
|
|
|
|
|
|
|
If multiple files (or directories) are unpacked, and the suffix of the archive can |
327
|
|
|
|
|
|
|
be removed with the C of its C, then the |
328
|
|
|
|
|
|
|
shortened name is used as a directory. E.g. F would unpack to |
329
|
|
|
|
|
|
|
F. Otherwise F<._> is appended to the archive name. E.g. F would unpack to |
330
|
|
|
|
|
|
|
F. |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
In any case, the suffix F<._> or F<._B> is used to avoid conflicts with |
333
|
|
|
|
|
|
|
already existing names where B is a numeric value. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 exclude |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
exclude(add => ['.svn', '*.orig' ], del => '.svn', force => 1, follow_file_symlinks => 0) |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Defines the exclude-list for unpacking. This list is advisory for the MIME helpers. |
340
|
|
|
|
|
|
|
The exclude-list items are shell glob patterns, where '*' or '?' never match '/'. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
You can use force to have any of these removed after unpacking. |
343
|
|
|
|
|
|
|
Use (vcs => 1) to exclude a long list of known version control system directories, use (vcs => 0) to remove them. |
344
|
|
|
|
|
|
|
The default is C<< exclude(empty => 1) >>, which is the same as C<< exclude(empty_file => 1, empty_dir => 1) >> -- |
345
|
|
|
|
|
|
|
having the obvious meaning. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
(re => 1) returns the active exclude-list as a regexp pattern. |
348
|
|
|
|
|
|
|
Otherwise C always returns the list as an array ref. |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Some symbolic links are included if {follow_file_symlinks} is nonzero. For details see C<>. |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
If exclude patterns were effective, or if symlinks, fifos, sockets, ... were encountered during unpack(), |
353
|
|
|
|
|
|
|
the logfile contains an additional 'skipped' keyword with statistics. |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=cut |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub _glob_list_re |
358
|
|
|
|
|
|
|
{ |
359
|
|
|
|
|
|
|
my @re; |
360
|
|
|
|
|
|
|
return unless @_; |
361
|
|
|
|
|
|
|
for my $text (@_) |
362
|
|
|
|
|
|
|
{ |
363
|
|
|
|
|
|
|
# Taken from pdb2perl:glob2re() and adapted, to not match slashes in wildcards. |
364
|
|
|
|
|
|
|
# This should be kept compatible with tar --exclude . |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
$text =~ s{([\.\(\)\[\]\{\}])}{\\$1}g; ## protect magic re characters. |
367
|
|
|
|
|
|
|
$text =~ s{\*}{[^/]*}g; ## * -> [^/]* |
368
|
|
|
|
|
|
|
$text =~ s{\?}{[^/]}g; ## ? -> [^/] |
369
|
|
|
|
|
|
|
push @re, $text; |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
return '(/|^)(' . join('|', @re) . ')(/|$)'; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub _not_excluded |
375
|
|
|
|
|
|
|
{ |
376
|
|
|
|
|
|
|
my $self = shift; |
377
|
|
|
|
|
|
|
my ($dir, $file) = @_; |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
return 1 unless my $re = $self->{exclude}{re}; |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
$dir ||= ''; |
382
|
|
|
|
|
|
|
$dir .= '/' unless $dir =~ m{/$}; |
383
|
|
|
|
|
|
|
$file = $dir . $file; |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
return 0 if $file =~ m{$re}; |
386
|
|
|
|
|
|
|
return 1; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
sub exclude |
390
|
|
|
|
|
|
|
{ |
391
|
|
|
|
|
|
|
my $self = shift; |
392
|
|
|
|
|
|
|
my %opt = $#_ ? @_ : (add => $_[0]); |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# ADD to this list from: https://build.opensuse.org/project/show?project=devel%3Atools%3Ascm |
395
|
|
|
|
|
|
|
my @vcs = qw(SCCS RCS CVS .svn .git .hg .osc); |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
$opt{add} = [ $opt{add} ] unless ref $opt{add}; |
398
|
|
|
|
|
|
|
$opt{del} = [ $opt{del} ] unless ref $opt{del}; |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
push @{$opt{add}}, @vcs if defined $opt{vcs} and $opt{vcs}; |
401
|
|
|
|
|
|
|
push @{$opt{del}}, @vcs if defined $opt{vcs} and !$opt{vcs}; |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
for my $a (@{$opt{add}}) |
405
|
|
|
|
|
|
|
{ |
406
|
|
|
|
|
|
|
$self->{exclude}{list}{$a}++ if defined $a; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
for my $a (@{$opt{del}}) |
410
|
|
|
|
|
|
|
{ |
411
|
|
|
|
|
|
|
delete $self->{exclude}{list}{$a} if defined $a; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
my @list = sort keys %{$self->{exclude}{list}}; |
415
|
|
|
|
|
|
|
$self->{exclude}{re} = _glob_list_re(@list); |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
$opt{empty_dir} = $opt{empty_file} = $opt{empty} if defined $opt{empty}; |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
for my $o (qw(empty_file empty_dir force)) |
420
|
|
|
|
|
|
|
{ |
421
|
|
|
|
|
|
|
$self->{exclude}{$o} = $opt{$o} if defined $opt{$o}; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
$self->{follow_file_symlinks} = $opt{follow_file_symlinks} |
425
|
|
|
|
|
|
|
if defined $opt{follow_file_symlinks}; |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
return $opt{re} ? $self->{exclude}{re} : \@list; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=begin private |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=item log, logf, loggable_pathname |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
The C method is used by C to send text to the logfile. |
435
|
|
|
|
|
|
|
The C method takes a filename and a hash, and logs a JSON formatted line. |
436
|
|
|
|
|
|
|
The trailing newline character of a line is delayed; it is printed by the next call to |
437
|
|
|
|
|
|
|
C or C. In case of C, a comma is emitted before the newline |
438
|
|
|
|
|
|
|
from the second call onward. |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
The C shortens a path to be relative to either |
441
|
|
|
|
|
|
|
$self->{destdir} or $self->{input} unless $self->{log_fullpath} is true. |
442
|
|
|
|
|
|
|
If a hash is provided as a second parameter and the path was found to be relative |
443
|
|
|
|
|
|
|
to $self->{input}, then an entry { 'srcdir' => 'input' } is added to this hash. |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=end private |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=cut |
448
|
|
|
|
|
|
|
sub log |
449
|
|
|
|
|
|
|
{ |
450
|
|
|
|
|
|
|
my ($self, $text) = @_; |
451
|
|
|
|
|
|
|
if (my $fp = $self->{lfp}) |
452
|
|
|
|
|
|
|
{ |
453
|
|
|
|
|
|
|
my $oldpos = eval { $fp->tell; }; # old perl at SLES11 has no IO::Handle::tell() |
454
|
|
|
|
|
|
|
$fp->write($text) or die "log($self->{logfile}): write failed: $!\n"; |
455
|
|
|
|
|
|
|
my $r = eval { $fp->tell - $oldpos; }; |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
## We do not expect any multibyte utf8 issues in here. It is plain 7-bit JSON. |
458
|
|
|
|
|
|
|
## E.g. /dev/null is not seekable. Be forgiving. |
459
|
|
|
|
|
|
|
die "$oldpos,$r=log($self->{logfile}): write failed: $text\n" if $r and $r != length($text); |
460
|
|
|
|
|
|
|
$self->{lfp_printed}++; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
sub loggable_pathname |
465
|
|
|
|
|
|
|
{ |
466
|
|
|
|
|
|
|
my ($self, $file, $hash) = @_; |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
unless ($self->{log_fullpath}) |
469
|
|
|
|
|
|
|
{ |
470
|
|
|
|
|
|
|
# very frequently, files are inside the destdir |
471
|
|
|
|
|
|
|
unless ($file =~ s{^\Q$self->{destdir}\E/}{}) |
472
|
|
|
|
|
|
|
{ |
473
|
|
|
|
|
|
|
# less frequently, archives are logged inside the input dir |
474
|
|
|
|
|
|
|
if ($self->{input}) |
475
|
|
|
|
|
|
|
{ |
476
|
|
|
|
|
|
|
if ($file =~ s{^\Q$self->{input}\E/}{\./input/./}) |
477
|
|
|
|
|
|
|
{ |
478
|
|
|
|
|
|
|
$hash->{srcdir} = 'input' if ref $hash eq 'HASH'; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
return $file; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub logf |
487
|
|
|
|
|
|
|
{ |
488
|
|
|
|
|
|
|
my ($self,$file,$hash,$suff) = @_; |
489
|
|
|
|
|
|
|
$suff = "" unless defined $suff; |
490
|
|
|
|
|
|
|
my $json = $self->{json} ||= JSON->new()->ascii(1); |
491
|
|
|
|
|
|
|
$file = $self->loggable_pathname($file, $hash); |
492
|
|
|
|
|
|
|
if (my $fp = $self->{lfp}) |
493
|
|
|
|
|
|
|
{ |
494
|
|
|
|
|
|
|
if ($self->{log_type} eq 'plain') |
495
|
|
|
|
|
|
|
{ |
496
|
|
|
|
|
|
|
my $str = $file . ' ('; |
497
|
|
|
|
|
|
|
$str .= $hash->{mime} if defined $hash->{mime}; |
498
|
|
|
|
|
|
|
$str .= ')'; |
499
|
|
|
|
|
|
|
$str = "# $str -> " . $hash->{unpacked} if $hash->{unpacked}; |
500
|
|
|
|
|
|
|
$str .= "\n"; |
501
|
|
|
|
|
|
|
$self->log($str); |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
else |
504
|
|
|
|
|
|
|
{ |
505
|
|
|
|
|
|
|
$self->log(qq[{ "oops": "logf used before prolog??",\n"unpacked_files":{\n]) |
506
|
|
|
|
|
|
|
unless $self->{lfp_printed}; # sysseek($fp, 0, 1); # }} there is no systell() ... |
507
|
|
|
|
|
|
|
my $str = $json->encode({$file => $hash}); |
508
|
|
|
|
|
|
|
$str =~ s{^\{}{}s; |
509
|
|
|
|
|
|
|
$str =~ s{\}$}{}s; |
510
|
|
|
|
|
|
|
my $pre = " "; |
511
|
|
|
|
|
|
|
$pre = ",\n " if $self->{logf_continuation}++; |
512
|
|
|
|
|
|
|
die "logf failed to encode newline char: $str\n" if $str =~ m{(?:\n|\r)}; |
513
|
|
|
|
|
|
|
$self->log("$pre$str$suff"); |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
$SIG{'XFSZ'} = sub |
519
|
|
|
|
|
|
|
{ |
520
|
|
|
|
|
|
|
print STDERR "soft RLIMIT_FSIZE exceeded. SIGXFSZ recieved. Exiting\n"; |
521
|
|
|
|
|
|
|
exit; |
522
|
|
|
|
|
|
|
}; |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
# if this returns 0, we test again and call it again, possibly. |
525
|
|
|
|
|
|
|
# if this returns nonzero, we just continue. |
526
|
|
|
|
|
|
|
sub _default_fs_warn |
527
|
|
|
|
|
|
|
{ |
528
|
|
|
|
|
|
|
carp "Filesystem (@_) is almost full.\n $0 paused for 30 sec.\n"; |
529
|
|
|
|
|
|
|
sleep(30); |
530
|
|
|
|
|
|
|
return 0; |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
## returns 1, if enough space free. |
534
|
|
|
|
|
|
|
## returns 0, if warn-method was called, and returned nonzero |
535
|
|
|
|
|
|
|
## returns -1, if no warn method |
536
|
|
|
|
|
|
|
## or does not return at all, and rechecks the status |
537
|
|
|
|
|
|
|
## with at least on second delay, if warn-method returns 0. |
538
|
|
|
|
|
|
|
sub _fs_check |
539
|
|
|
|
|
|
|
{ |
540
|
|
|
|
|
|
|
my ($self, $needed_b, $needed_i, $needed_p) = @_; |
541
|
|
|
|
|
|
|
$needed_b = '1M' unless defined $needed_b; # bytes |
542
|
|
|
|
|
|
|
$needed_i = 100 unless defined $needed_i; # inodes |
543
|
|
|
|
|
|
|
$needed_p = 1.0 unless defined $needed_p; # percent |
544
|
|
|
|
|
|
|
$needed_b = _bytes_unit($needed_b); |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
my $DIR; |
547
|
|
|
|
|
|
|
open $DIR, "<", $self->{destdir} or |
548
|
|
|
|
|
|
|
opendir $DIR, $self->{destdir} or return; |
549
|
|
|
|
|
|
|
## fileno() does not work with opendir() handles. |
550
|
|
|
|
|
|
|
my $fd = fileno($DIR); return unless defined $fd; |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
for (;;) |
553
|
|
|
|
|
|
|
{ |
554
|
|
|
|
|
|
|
my $st = eval { [ fstatvfs($fd) ] }; |
555
|
|
|
|
|
|
|
my $total_b = $st->[1] * $st->[2]; # f_frsize * f_blocks |
556
|
|
|
|
|
|
|
my $free_b = $st->[0] * $st->[4]; # f_bsize * f_bavail |
557
|
|
|
|
|
|
|
my $free_i = $st->[7]; # f_favail |
558
|
|
|
|
|
|
|
my $perc = 100.0 * ($total_b - $free_b) / ($total_b||1); |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
return 1 if $free_b >= $needed_b && |
561
|
|
|
|
|
|
|
$free_i >= $needed_i && |
562
|
|
|
|
|
|
|
(100-$perc > $needed_p); |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
return -1 unless $self->{fs_warn}; |
565
|
|
|
|
|
|
|
my $w = $self->{fs_warn}->($self->{destdir}, $perc, $free_b, $free_i); |
566
|
|
|
|
|
|
|
return 0 if $w; |
567
|
|
|
|
|
|
|
sleep 1; |
568
|
|
|
|
|
|
|
} |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
sub new |
572
|
|
|
|
|
|
|
{ |
573
|
|
|
|
|
|
|
my $self = shift; |
574
|
|
|
|
|
|
|
my $class = ref($self) || $self; |
575
|
|
|
|
|
|
|
my %obj = (ref $_[0] eq 'HASH') ? %{$_[0]} : @_; |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
$obj{verbose} = 1 unless defined $obj{verbose}; |
578
|
|
|
|
|
|
|
$obj{destdir} ||= '.'; |
579
|
|
|
|
|
|
|
$obj{logfile} ||= \*STDOUT; |
580
|
|
|
|
|
|
|
$obj{log_type} ||= 'json'; # or 'plain' |
581
|
|
|
|
|
|
|
$obj{log_type} = lc $obj{log_type}; |
582
|
|
|
|
|
|
|
$obj{maxfilesize} = $ENV{'FILE_UNPACK_MAXFILESIZE'}||'2.5G' unless defined $obj{maxfilesize}; |
583
|
|
|
|
|
|
|
$obj{maxfilesize} = _bytes_unit($obj{maxfilesize}); |
584
|
|
|
|
|
|
|
$ENV{'FILE_UNPACK_MAXFILESIZE'} = $obj{maxfilesize}; # so that children see the same. |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
mkpath($obj{destdir}); # abs_path is unreliable if destdir does not exist |
587
|
|
|
|
|
|
|
$obj{destdir} = Cwd::fast_abs_path($obj{destdir}); |
588
|
|
|
|
|
|
|
$obj{destdir} =~ s{(.)/+$}{$1}; # assert no trailing '/'. |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
# used in unpack() to jail mime_helpers deep inside destdir: |
591
|
|
|
|
|
|
|
$obj{dot_dot_safeguard} = 20 unless defined $obj{dot_dot_safeguard}; |
592
|
|
|
|
|
|
|
$obj{jail_chmod0} ||= 0; |
593
|
|
|
|
|
|
|
# used in unpack, print only: |
594
|
|
|
|
|
|
|
$obj{no_op} ||= 0; |
595
|
|
|
|
|
|
|
# used in unpack, blocks recursion after archive unpacking: |
596
|
|
|
|
|
|
|
$obj{one_shot} ||= $obj{no_op}; |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
# With $self->{within_archives} we know the difference between symlinks found in |
599
|
|
|
|
|
|
|
# the given repository or symlinks that were unpacked from an archive. |
600
|
|
|
|
|
|
|
# Those from an archive are followed only with follow_file_symlinks == 2. |
601
|
|
|
|
|
|
|
$obj{follow_file_symlinks} ||= 0; |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
warn "WARNING: We are running as root: Malicious archives may clobber your filesystem.\n" if $obj{verbose} and !$>; |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
if (ref $obj{logfile} eq 'SCALAR' or !(ref $obj{logfile})) |
606
|
|
|
|
|
|
|
{ |
607
|
|
|
|
|
|
|
open $obj{lfp}, ">", $obj{logfile} or croak "open logfile $obj{logfile} failed: $!\n"; |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
else |
610
|
|
|
|
|
|
|
{ |
611
|
|
|
|
|
|
|
$obj{lfp} = $obj{logfile}; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
# make $obj{lfp} unbuffered, so that other processes can read line by line... |
614
|
|
|
|
|
|
|
$obj{lfp}->autoflush(1); |
615
|
|
|
|
|
|
|
$obj{lfp_printed} = 0; |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
$obj{readable_file_modes} = [ 0400 ]; |
618
|
|
|
|
|
|
|
$obj{readable_dir_modes} = [ 0700, 0500 ]; |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
if ($obj{world_readable}) |
621
|
|
|
|
|
|
|
{ |
622
|
|
|
|
|
|
|
unshift @{$obj{readable_file_modes}}, 0444; |
623
|
|
|
|
|
|
|
unshift @{$obj{readable_dir_modes}}, 0755; |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
if ($obj{maxfilesize}) |
627
|
|
|
|
|
|
|
{ |
628
|
|
|
|
|
|
|
eval |
629
|
|
|
|
|
|
|
{ |
630
|
|
|
|
|
|
|
no strict; |
631
|
|
|
|
|
|
|
# helper/application=x-shellscript calls File::Unpack->new(), with defaults... |
632
|
|
|
|
|
|
|
my @have = BSD::Resource::getrlimit(RLIMIT_FSIZE); |
633
|
|
|
|
|
|
|
if ($have[0] == RLIM_INFINITY or $have[0] > $obj{maxfilesize}) |
634
|
|
|
|
|
|
|
{ |
635
|
|
|
|
|
|
|
# if RLIM_INFINITY is seen as an attempt to increase limits, we would fail. Ignore this. |
636
|
|
|
|
|
|
|
BSD::Resource::setrlimit(RLIMIT_FSIZE, $obj{maxfilesize}, RLIM_INFINITY) or |
637
|
|
|
|
|
|
|
BSD::Resource::setrlimit(RLIMIT_FSIZE, $obj{maxfilesize}, $obj{maxfilesize}) or |
638
|
|
|
|
|
|
|
warn "RLIMIT_FSIZE($obj{maxfilesize}), limit=($have[0],$have[1]) failed\n"; |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
}; |
641
|
|
|
|
|
|
|
if ($@) |
642
|
|
|
|
|
|
|
{ |
643
|
|
|
|
|
|
|
carp "WARNING maxfilesize=$obj{maxfilesize} ignored:\n $@ $!\n Maybe package perl-BSD-Resource is not installed??\n\n"; |
644
|
|
|
|
|
|
|
} |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
$obj{minfree}{factor} = 10 unless defined $obj{minfree}{factor}; |
648
|
|
|
|
|
|
|
$obj{minfree}{bytes} = '1M' unless defined $obj{minfree}{bytes}; |
649
|
|
|
|
|
|
|
$obj{minfree}{percent} = '1%' unless defined $obj{minfree}{percent}; |
650
|
|
|
|
|
|
|
minfree(\%obj, warning => $obj{fs_warn}||\&_default_fs_warn); |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
$obj{exclude}{empty_dir} = 1 unless defined $obj{exclude}{empty_dir}; |
653
|
|
|
|
|
|
|
$obj{exclude}{empty_file} = 1 unless defined $obj{exclude}{empty_file}; |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
$self = bless \%obj, $class; |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
for my $h (@builtin_mime_helpers) |
658
|
|
|
|
|
|
|
{ |
659
|
|
|
|
|
|
|
$self->mime_helper(@$h); |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
$obj{helper_dir} = _default_helper_dir unless exists $obj{helper_dir}; |
662
|
|
|
|
|
|
|
$self->mime_helper_dir($obj{helper_dir}) if defined $obj{helper_dir} and -d $obj{helper_dir}; |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
unless ($ENV{PERL5LIB}) |
665
|
|
|
|
|
|
|
{ |
666
|
|
|
|
|
|
|
# in case we are using non-standard perl lib dirs, put them into the environment, |
667
|
|
|
|
|
|
|
# so that any helper scripts see them too. They might need them, if written in perl. |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
use Config; |
670
|
|
|
|
|
|
|
my $pat = qr{^(?:\Q$Config{vendorlib}\E|\Q$Config{sitelib}\E|\Q$Config{privlib}\E)\b}; |
671
|
|
|
|
|
|
|
my @add; # all dirs, that come before the standard dirs. |
672
|
|
|
|
|
|
|
for my $i (@INC) |
673
|
|
|
|
|
|
|
{ |
674
|
|
|
|
|
|
|
last if $i =~ m{$pat}; |
675
|
|
|
|
|
|
|
push @add, $i; |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
$ENV{PERL5LIB} = join ':', @add if @add; |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
return $self; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
sub DESTROY |
684
|
|
|
|
|
|
|
{ |
685
|
|
|
|
|
|
|
my $self = shift; |
686
|
|
|
|
|
|
|
# when unpack() processes an input, it should delete {lfp} afterwards. |
687
|
|
|
|
|
|
|
# Added some 'or' cases, as $self->{input} might be empty, although we had processed an input. |
688
|
|
|
|
|
|
|
# |
689
|
|
|
|
|
|
|
# We rather catch an error, than produce incomplete output. |
690
|
|
|
|
|
|
|
# This happens with ksh/ast-base.2012-08-01.tar.bz2 after unpack('.../ast-base.2012-08-01/src/cmd/pax/data/a'): not much file or directory |
691
|
|
|
|
|
|
|
# |
692
|
|
|
|
|
|
|
if (($self->{input} or |
693
|
|
|
|
|
|
|
($self->{lfp_printed}||0) or |
694
|
|
|
|
|
|
|
($self->{recursion_level}||0)) and $self->{lfp}) |
695
|
|
|
|
|
|
|
{ |
696
|
|
|
|
|
|
|
if ($self->{log_type} eq 'plain') |
697
|
|
|
|
|
|
|
{ |
698
|
|
|
|
|
|
|
# pass |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
else |
701
|
|
|
|
|
|
|
{ |
702
|
|
|
|
|
|
|
$self->log(sprintf(qq[{"pid":"%d", "unpacked":{], $$)) unless $self->{lfp_printed}; |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
my $r = $self->{recursion_level}||0; |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
# this should never happen. |
707
|
|
|
|
|
|
|
# always delete $self->{lfp} manually, when done. |
708
|
|
|
|
|
|
|
## {{ |
709
|
|
|
|
|
|
|
my $msg = "unexpected destructor seen"; |
710
|
|
|
|
|
|
|
$msg = join('; ', @{$self->{error}}) if $self->{error}; |
711
|
|
|
|
|
|
|
if ($self->{log_type} eq 'plain') |
712
|
|
|
|
|
|
|
{ |
713
|
|
|
|
|
|
|
$self->log("# error: (l=$self->{lfp_printed},r=$r): $msg\n"); |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
else |
716
|
|
|
|
|
|
|
{ |
717
|
|
|
|
|
|
|
$self->log(qq[\n}, "error":"(l=$self->{lfp_printed},r=$r): $msg"}\n]); |
718
|
|
|
|
|
|
|
} |
719
|
|
|
|
|
|
|
close $self->{lfp} if $self->{lfp} ne $self->{logfile}; |
720
|
|
|
|
|
|
|
delete $self->{lfp}; |
721
|
|
|
|
|
|
|
delete $self->{lfp_printed}; |
722
|
|
|
|
|
|
|
} |
723
|
|
|
|
|
|
|
if ($self->{configdir}) |
724
|
|
|
|
|
|
|
{ |
725
|
|
|
|
|
|
|
rmtree($self->{configdir}); |
726
|
|
|
|
|
|
|
delete $self->{configdir}; |
727
|
|
|
|
|
|
|
} |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=head2 unpack |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
$u->unpack($archive, [$destdir]) |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
Determines the contents of an archive and recursivly extracts its files. |
735
|
|
|
|
|
|
|
An archive may be the pathname of a file or directory. The extracted contents will be |
736
|
|
|
|
|
|
|
stored in F, where dest_name is the filename |
737
|
|
|
|
|
|
|
component of archive without any leading pathname components, and possibly |
738
|
|
|
|
|
|
|
stripped or added suffix. (Subdir defaults to ''.) If archive is a directory, |
739
|
|
|
|
|
|
|
then dest_name will also be a directory. If archive is a file, the type of |
740
|
|
|
|
|
|
|
dest_name depends on the type of packing: If the archive expands to multiple |
741
|
|
|
|
|
|
|
files, dest_name will be a directory, otherwise it will be a file. If a file of |
742
|
|
|
|
|
|
|
the same name already exists in the destination subdir, an additional subdir |
743
|
|
|
|
|
|
|
component is created to avoid any conflicts. |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
For each extracted file, a record is written to the logfile. |
746
|
|
|
|
|
|
|
When unpacking is finished, the logfile contains one valid JSON structure. |
747
|
|
|
|
|
|
|
Unpack achieves this by writing suitable prolog and epilog lines to the logfile. |
748
|
|
|
|
|
|
|
The logfile can also be parsed line by line. All file records is one line and start |
749
|
|
|
|
|
|
|
with a ' ' whitespace, and end in a ',' comma. Everything else is prolog or epilog. |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
The actual unpacking is dispatched to MIME type specific helpers, |
752
|
|
|
|
|
|
|
selected using C. A MIME helper can either be built-in code, or an |
753
|
|
|
|
|
|
|
external shell-script found in a directory registered with |
754
|
|
|
|
|
|
|
C. The standard place for external helpers is |
755
|
|
|
|
|
|
|
F; it can be changed by the environment variable |
756
|
|
|
|
|
|
|
F or the C parameter C. |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
The naming of helper scripts is described under C. |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
A MIME helper must have executable permission and is called with 6 parameters: |
761
|
|
|
|
|
|
|
source_path, destfile, destination_path, mimetype, description, and config_dir. |
762
|
|
|
|
|
|
|
Note, that destination_path is a freshly created empty working directory, even |
763
|
|
|
|
|
|
|
if the unpacker is expected to unpack only a single file. The unpacker is |
764
|
|
|
|
|
|
|
called after chdir into destination_path, so you usually do not need to |
765
|
|
|
|
|
|
|
evaluate the third parameter. |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
The directory C contains unpack configuration in .sh, .js and possibly |
768
|
|
|
|
|
|
|
other formats. A MIME helper may use this information, but need not. |
769
|
|
|
|
|
|
|
All data passed into C is reflected there, as well as the active exclude-list. |
770
|
|
|
|
|
|
|
Using the config information can help a MIME helper to skip unwanted |
771
|
|
|
|
|
|
|
work or otherwise optimize unpacking. |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
C monitors the available filesystem space in destdir. If there is less space |
774
|
|
|
|
|
|
|
than configured with C, a warning can be printed and unpacking is |
775
|
|
|
|
|
|
|
optionally paused. It also monitors the MIME helpers progress reading the archive |
776
|
|
|
|
|
|
|
at source_path and reports percentages to STDERR (if verbose is 1 or more). |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
After the MIME helper is finished, C examines the files it created. |
779
|
|
|
|
|
|
|
If it created no files in F, an error is reported, and the |
780
|
|
|
|
|
|
|
F may be passed to other unpackers, or finally be added to the log as is. |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
If the MIME helper wants to express that F is already unpacked as far as possible |
783
|
|
|
|
|
|
|
and should be added to the log without any error messages, it creates a symbolic link |
784
|
|
|
|
|
|
|
F pointing to F. |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
The system considers replacing the |
788
|
|
|
|
|
|
|
directory with a file, if all of the following conditions are met: |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
=over |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
=item * |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
There is exactly one file in the directory. |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=item * |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
The file name is identical with the directory name, |
799
|
|
|
|
|
|
|
except for one changed or removed |
800
|
|
|
|
|
|
|
suffix-word. (*.tar.gz -> *.tar; or *.tgz -> *.tar) |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=item * |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
The file must not already exist in the parent directory. |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=back |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
C prepares 20 empty subdirectory levels and chdirs the unpacker |
809
|
|
|
|
|
|
|
in there. This number can be adjusted using C<< new(dot_dot_safeguard => 20) >>. |
810
|
|
|
|
|
|
|
A directory 20 levels up from the current working dir has mode 0 while |
811
|
|
|
|
|
|
|
the MIME helper runs. C can optionally chmod(0) the parent of the subdirectory |
812
|
|
|
|
|
|
|
after it chdirs the unpacker inside. Use C<< new(jail_chmod0 => 1) >> for this, default |
813
|
|
|
|
|
|
|
is off. If enabled, a MIME helper trying to place files outside of the specified |
814
|
|
|
|
|
|
|
destination_path may receive 'permission denied' conditions. |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
These are special hacks to keep badly constructed |
817
|
|
|
|
|
|
|
tar-balls, cpio-, or zip-archives at bay. |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
Please note, that this can help against archives containing relative paths |
820
|
|
|
|
|
|
|
(like starting with '../../../foo'), but will be ineffective with absolute paths |
821
|
|
|
|
|
|
|
(starting with '/foo'). |
822
|
|
|
|
|
|
|
It is the responsibility of MIME helpers to not create absolute paths; |
823
|
|
|
|
|
|
|
C should not be run as the root user, to minimize the risk of |
824
|
|
|
|
|
|
|
compromising the root filesystem. |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
A missing MIME helper is skipped, and subsequent helpers may take effect. A |
827
|
|
|
|
|
|
|
MIME helper is expected to return an exit status of 0 upon success. If it runs |
828
|
|
|
|
|
|
|
into a problem, it should print lines |
829
|
|
|
|
|
|
|
starting with the affected filenames to stderr. |
830
|
|
|
|
|
|
|
Such errors are recorded in the log with the unpacked archive, and as far as |
831
|
|
|
|
|
|
|
files were created, also with these files. |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
Symbolic links are ignored while unpacking. |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
Currently you can call C only once. |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
=cut |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
sub unpack |
840
|
|
|
|
|
|
|
{ |
841
|
|
|
|
|
|
|
## as long as $archive is outside $self->{destdir}, we construct our destdir by |
842
|
|
|
|
|
|
|
## replacing $self->{input_dir} with $self->{destdir}. |
843
|
|
|
|
|
|
|
## This $self->{input_dir} must be created and kept constant at the earliest |
844
|
|
|
|
|
|
|
## possible call. |
845
|
|
|
|
|
|
|
## When the $archive is inside $self->{destdir}, we do not use $self->{input_dir}, |
846
|
|
|
|
|
|
|
## we then use the current $in_dir as destdir. |
847
|
|
|
|
|
|
|
## |
848
|
|
|
|
|
|
|
## Whenever an archive path outside $self->{destdir} is found, |
849
|
|
|
|
|
|
|
## it is first passed through Cwd::fast_abs_path before any other processing occurs. |
850
|
|
|
|
|
|
|
## |
851
|
|
|
|
|
|
|
my ($self, $archive, $destdir) = @_; |
852
|
|
|
|
|
|
|
$destdir = $self->{destdir} unless defined $destdir; |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
$destdir = $1 if $destdir =~ m{^(.*)$}s; # brute force untaint |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
if (($self->{recursion_level}||0) > $RECURSION_LIMIT) |
857
|
|
|
|
|
|
|
{ |
858
|
|
|
|
|
|
|
push @{$self->{error}}, "unpack('$archive','$destdir'): recursion limit $RECURSION_LIMIT"; |
859
|
|
|
|
|
|
|
## this is only an emergency stop. |
860
|
|
|
|
|
|
|
return 1; |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
if ($archive !~ m{^/} or $archive !~ m{^\Q$self->{destdir}\E/}) |
864
|
|
|
|
|
|
|
{ |
865
|
|
|
|
|
|
|
# Cwd::fast_abs_path($archive) not only makes nice absolute paths, but it also expands |
866
|
|
|
|
|
|
|
# file symlinks. This is a bad idea for two reasons: |
867
|
|
|
|
|
|
|
# * when we allow {follow_file_symlinks} the link destination gets into the log file, |
868
|
|
|
|
|
|
|
# rather than the (expected) link itself. |
869
|
|
|
|
|
|
|
# * Also, this could easily trigger "path escaped" below . |
870
|
|
|
|
|
|
|
###### |
871
|
|
|
|
|
|
|
if ($self->{follow_file_symlinks} && $archive =~ m{^(.*)/(.*?)$}) |
872
|
|
|
|
|
|
|
{ |
873
|
|
|
|
|
|
|
# we solve both issues by doing this: |
874
|
|
|
|
|
|
|
# chop off the filename; expand the path; re-add the filename. |
875
|
|
|
|
|
|
|
my ($a_path, $a_file) = ($1,$2); |
876
|
|
|
|
|
|
|
$a_path = Cwd::fast_abs_path($a_path) if -e $a_path; |
877
|
|
|
|
|
|
|
$archive = $a_path . '/' . $a_file; |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
else |
880
|
|
|
|
|
|
|
{ |
881
|
|
|
|
|
|
|
$archive = Cwd::fast_abs_path($archive) if -e $archive; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
my $start_time = time; |
886
|
|
|
|
|
|
|
if ($self->{recursion_level}++ == 0) |
887
|
|
|
|
|
|
|
{ |
888
|
|
|
|
|
|
|
print STDERR "unpack: starting...\n" if $self->{verbose} > 1; |
889
|
|
|
|
|
|
|
## State that needs to be reset when (re)starting goes in here. |
890
|
|
|
|
|
|
|
# |
891
|
|
|
|
|
|
|
# CAUTION: recursion_level decrements again, as we return from unpack() |
892
|
|
|
|
|
|
|
# how do we assert, that this code only runs at the start, |
893
|
|
|
|
|
|
|
# and not once again at the end? |
894
|
|
|
|
|
|
|
$self->{inside_archives} = 0; |
895
|
|
|
|
|
|
|
$self->{json} ||= JSON->new()->ascii(1); # used often, create it unconditionally here and once. |
896
|
|
|
|
|
|
|
$self->{iput} = $archive; |
897
|
|
|
|
|
|
|
$self->{progress_tstamp} = $start_time; |
898
|
|
|
|
|
|
|
($self->{input_dir}, $self->{input_file}) = ($1, $2) if $archive =~ m{^(.*)/([^/]*)$}; |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
if ($self->{log_type} eq 'plain') |
901
|
|
|
|
|
|
|
{ |
902
|
|
|
|
|
|
|
# pass |
903
|
|
|
|
|
|
|
} |
904
|
|
|
|
|
|
|
else |
905
|
|
|
|
|
|
|
{ |
906
|
|
|
|
|
|
|
# logfile prolog |
907
|
|
|
|
|
|
|
my $prolog = {destdir=>$self->{destdir}, fu=>$VERSION, pid=>$$, input => $archive, start => scalar localtime}; |
908
|
|
|
|
|
|
|
$prolog->{params} = $self->{log_params} if keys %{$self->{log_params}}; |
909
|
|
|
|
|
|
|
my $s = $self->{json}->encode($prolog); |
910
|
|
|
|
|
|
|
$s =~ s@}$@, "unpacked":{\n@; |
911
|
|
|
|
|
|
|
$self->log($s); |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
} |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
unless (-e $archive) |
916
|
|
|
|
|
|
|
{ |
917
|
|
|
|
|
|
|
# contstucted $archive wrongly |
918
|
|
|
|
|
|
|
# e.g. we have 'pax/data/a/' instead of 'pax/data/_fu_3CEuA/a/' |
919
|
|
|
|
|
|
|
push @{$self->{error}}, "unpack('$archive'): not much file or directory; "; |
920
|
|
|
|
|
|
|
return 1; |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
unless ($self->{input_dir}) |
924
|
|
|
|
|
|
|
{ |
925
|
|
|
|
|
|
|
push @{$self->{error}}, "unpack('$archive'); internal error: no {input_dir}"; |
926
|
|
|
|
|
|
|
return 1; |
927
|
|
|
|
|
|
|
} |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
my ($in_dir, $in_file) = ('/', ''); |
930
|
|
|
|
|
|
|
($in_dir, $in_file) = ($1, $2) if $archive =~ m{^(.*/)([^/]*)$}; |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
my $inside_destdir = 1; |
933
|
|
|
|
|
|
|
my $subdir = $in_dir; # remainder after stripping $orig_archive_prefix / $self->{destdir} |
934
|
|
|
|
|
|
|
unless ($subdir =~ s{^\Q$self->{destdir}\E/+}{}) |
935
|
|
|
|
|
|
|
{ |
936
|
|
|
|
|
|
|
$inside_destdir = 0; |
937
|
|
|
|
|
|
|
die "$archive path escaped. Neither inside original $self->{input_dir} nor inside destdir='$self->{destdir}'\n" |
938
|
|
|
|
|
|
|
unless $subdir =~ s{^\Q$self->{input_dir}\E/+}{}; |
939
|
|
|
|
|
|
|
} |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
print STDERR "unpack: r=$self->{recursion_level} in_dir=$in_dir, in_file=$in_file, destdir=$destdir\n" if $self->{verbose} > 1; |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
my @missing_unpacker; |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
if ($self->{progress_tstamp} + 10 < $start_time) |
946
|
|
|
|
|
|
|
{ |
947
|
|
|
|
|
|
|
printf "T: %d files ...\n", $self->{file_count}||0; |
948
|
|
|
|
|
|
|
$self->{progress_tstamp} = $start_time; |
949
|
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
if (-d $archive) |
952
|
|
|
|
|
|
|
{ |
953
|
|
|
|
|
|
|
$self->_chmod_add($archive, @{$self->{readable_dir_modes}}); |
954
|
|
|
|
|
|
|
if (opendir DIR, $archive) |
955
|
|
|
|
|
|
|
{ |
956
|
|
|
|
|
|
|
my @f = sort grep { $_ ne '.' && $_ ne '..' } readdir DIR; |
957
|
|
|
|
|
|
|
closedir DIR; |
958
|
|
|
|
|
|
|
print STDERR "dir = @f\n" if $self->{verbose} > 1; |
959
|
|
|
|
|
|
|
for my $f (@f) |
960
|
|
|
|
|
|
|
{ |
961
|
|
|
|
|
|
|
if ($self->{exclude}{re} && $f =~ m{$self->{exclude}{re}}) |
962
|
|
|
|
|
|
|
{ |
963
|
|
|
|
|
|
|
$self->{skipped}{exclude}++; |
964
|
|
|
|
|
|
|
} |
965
|
|
|
|
|
|
|
my $new_in = "$archive/$f"; |
966
|
|
|
|
|
|
|
## if $archive is $inside_destdir, then $archive is normally indentical to $destdir. |
967
|
|
|
|
|
|
|
## ($inside_destdir means inside $self->{destdir}, actually) |
968
|
|
|
|
|
|
|
my $new_destdir = $destdir; $new_destdir .= "/$f" if -d $new_in; |
969
|
|
|
|
|
|
|
my $symlink_to_skip = -l $new_in; |
970
|
|
|
|
|
|
|
my $dangeous_symlink = $self->{inside_archives} ? 1 : 0; |
971
|
|
|
|
|
|
|
if ($symlink_to_skip and ($self->{follow_file_symlinks} > $dangeous_symlink)) |
972
|
|
|
|
|
|
|
{ |
973
|
|
|
|
|
|
|
$symlink_to_skip = 0 if -f $new_in; |
974
|
|
|
|
|
|
|
# directory and dead symlinks we always skip. |
975
|
|
|
|
|
|
|
# directory symlinks could cause us to recurse out of the current tree. |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
if ($symlink_to_skip) |
979
|
|
|
|
|
|
|
{ |
980
|
|
|
|
|
|
|
# test -l first, as -f could be also true here... |
981
|
|
|
|
|
|
|
print STDERR "symlink $new_in: skipped\n" if $self->{verbose} > 1; |
982
|
|
|
|
|
|
|
$self->{skipped}{symlink}++; |
983
|
|
|
|
|
|
|
} |
984
|
|
|
|
|
|
|
elsif (-f $new_in or -d _) |
985
|
|
|
|
|
|
|
{ |
986
|
|
|
|
|
|
|
$self->unpack($new_in, $new_destdir); |
987
|
|
|
|
|
|
|
} |
988
|
|
|
|
|
|
|
else |
989
|
|
|
|
|
|
|
{ |
990
|
|
|
|
|
|
|
print STDERR "special file $new_in: skipped\n" if $self->{verbose} > 1; |
991
|
|
|
|
|
|
|
$self->{skipped}{device_node}++; |
992
|
|
|
|
|
|
|
} |
993
|
|
|
|
|
|
|
$self->{progress_tstamp} = time; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
} |
996
|
|
|
|
|
|
|
else |
997
|
|
|
|
|
|
|
{ |
998
|
|
|
|
|
|
|
push @{$self->{error}}, "unpack dir ($archive) failed: $!"; |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
} |
1001
|
|
|
|
|
|
|
elsif (-f $archive) |
1002
|
|
|
|
|
|
|
{ |
1003
|
|
|
|
|
|
|
if ($self->_not_excluded($subdir, $in_file) and |
1004
|
|
|
|
|
|
|
!defined($self->{done}{$archive})) |
1005
|
|
|
|
|
|
|
{ |
1006
|
|
|
|
|
|
|
$self->_chmod_add($archive, @{$self->{readable_file_modes}}); |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
my $m = $self->mime($archive); |
1009
|
|
|
|
|
|
|
my ($h, $more) = $self->find_mime_helper($m); |
1010
|
|
|
|
|
|
|
my $data = { mime => $m->[0] }; |
1011
|
|
|
|
|
|
|
if ($more) |
1012
|
|
|
|
|
|
|
{ |
1013
|
|
|
|
|
|
|
$data->{found} = $more; |
1014
|
|
|
|
|
|
|
push @missing_unpacker, @{$more->{missing}} if $more->{missing}; |
1015
|
|
|
|
|
|
|
} |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
if ($m->[0] eq 'text/plain' or !$h) |
1018
|
|
|
|
|
|
|
{ |
1019
|
|
|
|
|
|
|
# not really an archive. |
1020
|
|
|
|
|
|
|
unless ($archive =~ m{^\Q$self->{destdir}\E/}) |
1021
|
|
|
|
|
|
|
{ |
1022
|
|
|
|
|
|
|
mkpath($destdir) unless $self->{no_op}; |
1023
|
|
|
|
|
|
|
my $destdir_in_file; |
1024
|
|
|
|
|
|
|
$destdir_in_file = $1 if "$destdir/$in_file" =~ m{^(.*)$}s; # brute force untaint |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
if (-e "$destdir_in_file") |
1027
|
|
|
|
|
|
|
{ |
1028
|
|
|
|
|
|
|
print STDERR "unpack copy in: $destdir_in_file already exists, " if $self->{verbose}; |
1029
|
|
|
|
|
|
|
$destdir = File::Temp::tempdir($TMPDIR_TEMPL, DIR => $destdir); |
1030
|
|
|
|
|
|
|
$destdir_in_file = $1 if "$destdir/$in_file" =~ m{^(.*)$}s; # brute force untaint |
1031
|
|
|
|
|
|
|
print STDERR "using $destdir_in_file instead.\n" if $self->{verbose}; |
1032
|
|
|
|
|
|
|
} |
1033
|
|
|
|
|
|
|
$data->{error} = "copy($archive): $!" unless File::Copy::copy($archive, $destdir_in_file); |
1034
|
|
|
|
|
|
|
$self->logf($destdir_in_file => $data); |
1035
|
|
|
|
|
|
|
} |
1036
|
|
|
|
|
|
|
else |
1037
|
|
|
|
|
|
|
{ |
1038
|
|
|
|
|
|
|
$self->logf($archive => $data); |
1039
|
|
|
|
|
|
|
} |
1040
|
|
|
|
|
|
|
$self->{file_count}++; |
1041
|
|
|
|
|
|
|
} |
1042
|
|
|
|
|
|
|
else |
1043
|
|
|
|
|
|
|
{ |
1044
|
|
|
|
|
|
|
# really an archive. |
1045
|
|
|
|
|
|
|
if ($self->{archive_name_as_dir}) |
1046
|
|
|
|
|
|
|
{ |
1047
|
|
|
|
|
|
|
print STDERR "archive_name_as_dir: expanding destdir $destdir\n" if $self->{verbose}; |
1048
|
|
|
|
|
|
|
$destdir = _unused_pathname($destdir, $in_file); |
1049
|
|
|
|
|
|
|
print STDERR "archive_name_as_dir: to $destdir\n" if $self->{verbose}; |
1050
|
|
|
|
|
|
|
} |
1051
|
|
|
|
|
|
|
mkpath($destdir) unless $self->{no_op}; |
1052
|
|
|
|
|
|
|
$self->{configdir} = $self->_prep_configdir() unless exists $self->{configdir}; |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
## new_name is a suggestion for the mime_helper only. |
1055
|
|
|
|
|
|
|
my $new_name = $in_file; |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
# Either shorten the name from e.g. foo.txt.bz2 to foo.txt or append |
1058
|
|
|
|
|
|
|
# something: foo.pdf to foo.pdf._; |
1059
|
|
|
|
|
|
|
# Normally a suffix is appended by '.', but we also see '-' or '_' in real life. |
1060
|
|
|
|
|
|
|
unless ($h->{suffix_re} and $new_name =~ s{[\._-]$h->{suffix_re}(?:\._\d*)?$}{}i) |
1061
|
|
|
|
|
|
|
{ |
1062
|
|
|
|
|
|
|
# avoid unary notation of recursion couning. There may be a 256 char limit per |
1063
|
|
|
|
|
|
|
# directory entry. Start counting in decimal, if two or more. |
1064
|
|
|
|
|
|
|
# Hmm, the /e modifier is not mentioned in perlre, but it works. Is it deprecated?? |
1065
|
|
|
|
|
|
|
$new_name .= "._"; |
1066
|
|
|
|
|
|
|
$new_name =~ s{\._\._$}{\._2}; |
1067
|
|
|
|
|
|
|
$new_name =~ s{\._(\d+)\._$}{ "._".($1+1) }e; |
1068
|
|
|
|
|
|
|
} |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
## if consumer of logf wants to do progress indication himself, |
1071
|
|
|
|
|
|
|
## then tell him what we do before we start. (Our timer tick code may be an alternative...) |
1072
|
|
|
|
|
|
|
# |
1073
|
|
|
|
|
|
|
# if ($archive =~ m{^\Q$self->{destdir}\E}) |
1074
|
|
|
|
|
|
|
# { |
1075
|
|
|
|
|
|
|
# $self->logf($archive => { unpacking => $h->{fmt_p} }); |
1076
|
|
|
|
|
|
|
# } |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
my ($unpacked, $diag) = |
1079
|
|
|
|
|
|
|
$self->_run_mime_helper($h, $archive, $new_name, $destdir, |
1080
|
|
|
|
|
|
|
$m->[0], $m->[2], $self->{configdir}); |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
# die Dumper "_run_mime_helper: $archive, $new_name, $destdir", readlink($unpacked), $unpacked; |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
unless (ref $unpacked or -e $unpacked) |
1085
|
|
|
|
|
|
|
{ |
1086
|
|
|
|
|
|
|
warn("archive=$archive, new_name=$new_name\n"); |
1087
|
|
|
|
|
|
|
die("assert -e '$unpacked'") |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
return 0 if $self->{no_op}; |
1091
|
|
|
|
|
|
|
if (ref $unpacked) |
1092
|
|
|
|
|
|
|
{ |
1093
|
|
|
|
|
|
|
# a ref here means, something went wrong. |
1094
|
|
|
|
|
|
|
$data->{failed} = $h->{fmt_p}; |
1095
|
|
|
|
|
|
|
$data->{error} = $unpacked->{error}; |
1096
|
|
|
|
|
|
|
$data->{stderr} = $unpacked->{stderr} if defined $unpacked->{stderr}; |
1097
|
|
|
|
|
|
|
$self->logf($archive => $data); |
1098
|
|
|
|
|
|
|
$self->{file_count}++; |
1099
|
|
|
|
|
|
|
$self->{helper_errors}++; |
1100
|
|
|
|
|
|
|
} |
1101
|
|
|
|
|
|
|
elsif (readlink($unpacked)||'' eq $archive) |
1102
|
|
|
|
|
|
|
{ |
1103
|
|
|
|
|
|
|
# a symlink backwards means, there is nothing to unpack here. take it as is. |
1104
|
|
|
|
|
|
|
unlink $unpacked; |
1105
|
|
|
|
|
|
|
rmdir $destdir if $self->{archive_name_as_dir}; # now an empty dir. |
1106
|
|
|
|
|
|
|
$data->{passed} = $h->{name}; |
1107
|
|
|
|
|
|
|
$data->{input} = $self->loggable_pathname($archive); |
1108
|
|
|
|
|
|
|
$data->{cmd} = $h->{fmt_p}; |
1109
|
|
|
|
|
|
|
{ |
1110
|
|
|
|
|
|
|
local $Data::Dumper::Terse = 1; |
1111
|
|
|
|
|
|
|
local $Data::Dumper::Indent = 0; |
1112
|
|
|
|
|
|
|
$data->{diag} = Dumper $diag if $diag; |
1113
|
|
|
|
|
|
|
} |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
if ($archive =~ m{^\Q$self->{destdir}\E}) |
1116
|
|
|
|
|
|
|
{ |
1117
|
|
|
|
|
|
|
# if inside, we just flag it done and log it. |
1118
|
|
|
|
|
|
|
$self->{done}{$archive} = $archive; |
1119
|
|
|
|
|
|
|
$self->logf($archive => $data); |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
else |
1122
|
|
|
|
|
|
|
{ |
1123
|
|
|
|
|
|
|
# if the archive itself was outside destdir, |
1124
|
|
|
|
|
|
|
# we copy it in, flag it done there, and log it here. |
1125
|
|
|
|
|
|
|
if (File::Copy::copy($archive, $unpacked)) |
1126
|
|
|
|
|
|
|
{ |
1127
|
|
|
|
|
|
|
$self->{done}{$archive} = $unpacked; |
1128
|
|
|
|
|
|
|
$self->logf($unpacked => $data); |
1129
|
|
|
|
|
|
|
} |
1130
|
|
|
|
|
|
|
else |
1131
|
|
|
|
|
|
|
{ |
1132
|
|
|
|
|
|
|
$data->{error} = "copy($archive, $unpacked): $!"; |
1133
|
|
|
|
|
|
|
$self->logf($archive => $data); |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
} |
1136
|
|
|
|
|
|
|
$self->{file_count}++; |
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
else |
1139
|
|
|
|
|
|
|
{ |
1140
|
|
|
|
|
|
|
# normal case: mime helper placed all |
1141
|
|
|
|
|
|
|
# in a directory (or file) called $unpacked |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
if ($archive =~ m{^\Q$self->{destdir}\E}) |
1145
|
|
|
|
|
|
|
{ |
1146
|
|
|
|
|
|
|
# to delete it, we should know if it was created during unpack. |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
$self->{done}{$archive} = $unpacked; |
1149
|
|
|
|
|
|
|
$data->{cmd} = $h->{fmt_p}; |
1150
|
|
|
|
|
|
|
$data->{unpacked} = $self->loggable_pathname($unpacked); |
1151
|
|
|
|
|
|
|
$self->logf($archive => $data); |
1152
|
|
|
|
|
|
|
$self->{file_count}++; |
1153
|
|
|
|
|
|
|
$self->{inside_archives}++; |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
my $newdestdir = $unpacked; |
1156
|
|
|
|
|
|
|
$newdestdir =~ s{/+[^/]+}{} unless -d $newdestdir; # make sure it is a directory |
1157
|
|
|
|
|
|
|
$newdestdir = $destdir unless $newdestdir =~ m{^\Q$self->{destdir}\E/}; # make sure it does not escape |
1158
|
|
|
|
|
|
|
if ($self->{one_shot}) |
1159
|
|
|
|
|
|
|
{ |
1160
|
|
|
|
|
|
|
local $self->{mime_orcish}; |
1161
|
|
|
|
|
|
|
local $self->{mime_helper}; |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
$self->unpack($unpacked, $newdestdir); |
1164
|
|
|
|
|
|
|
} |
1165
|
|
|
|
|
|
|
else |
1166
|
|
|
|
|
|
|
{ |
1167
|
|
|
|
|
|
|
$self->unpack($unpacked, $newdestdir); |
1168
|
|
|
|
|
|
|
} |
1169
|
|
|
|
|
|
|
$self->{progress_tstamp} = time; |
1170
|
|
|
|
|
|
|
$self->{inside_archives}--; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
} |
1173
|
|
|
|
|
|
|
} |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
else |
1176
|
|
|
|
|
|
|
{ |
1177
|
|
|
|
|
|
|
$self->logf($archive => { "skipped" => "special file"}); |
1178
|
|
|
|
|
|
|
$self->{file_count}++; |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
if (--$self->{recursion_level} == 0) |
1182
|
|
|
|
|
|
|
{ |
1183
|
|
|
|
|
|
|
if ($self->{log_type} eq 'plain') |
1184
|
|
|
|
|
|
|
{ |
1185
|
|
|
|
|
|
|
for my $m (@missing_unpacker) |
1186
|
|
|
|
|
|
|
{ |
1187
|
|
|
|
|
|
|
$self->log("# missing unpacker: $m\n"); |
1188
|
|
|
|
|
|
|
} |
1189
|
|
|
|
|
|
|
for my $s (sort keys %{$self->{skipped}}) |
1190
|
|
|
|
|
|
|
{ |
1191
|
|
|
|
|
|
|
$self->log("# skipped: $s: $self->{skipped}{$s}\n"); |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
$self->log("# error: ".join('; ', @{$self->{error}})."\n") if $self->{error}; |
1194
|
|
|
|
|
|
|
} |
1195
|
|
|
|
|
|
|
else |
1196
|
|
|
|
|
|
|
{ |
1197
|
|
|
|
|
|
|
my $epilog = {end => scalar localtime, sec => time-$start_time }; |
1198
|
|
|
|
|
|
|
$epilog->{skipped} = $self->{skipped} if $self->{skipped}; |
1199
|
|
|
|
|
|
|
$epilog->{error}{msg} = $self->{error} if $self->{error}; # just in case some errors were non-fatal. |
1200
|
|
|
|
|
|
|
$epilog->{error}{helper} = $self->{helper_errors} if $self->{helper_errors}; # counting |
1201
|
|
|
|
|
|
|
$epilog->{missing_unpacker} = \@missing_unpacker if @missing_unpacker; |
1202
|
|
|
|
|
|
|
my $s = $self->{json}->encode($epilog); |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
$s =~ s@^{@\n},@; |
1205
|
|
|
|
|
|
|
$self->log($s . "\n"); |
1206
|
|
|
|
|
|
|
} |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
if ($self->{lfp} ne $self->{logfile}) |
1209
|
|
|
|
|
|
|
{ |
1210
|
|
|
|
|
|
|
close $self->{lfp} or carp "logfile write ($self->{logfile}) failed: $!\n"; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
delete $self->{lfp}; |
1213
|
|
|
|
|
|
|
delete $self->{lfp_printed}; |
1214
|
|
|
|
|
|
|
} |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
# FIXME: should return nonzero if we had any unrecoverable errors. |
1217
|
|
|
|
|
|
|
return $self->{error} ? 1 : 0; |
1218
|
|
|
|
|
|
|
} |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
# Try a few modes to add to the current permission bits. |
1221
|
|
|
|
|
|
|
# The first mode that succeeds ends the list. |
1222
|
|
|
|
|
|
|
sub _chmod_add |
1223
|
|
|
|
|
|
|
{ |
1224
|
|
|
|
|
|
|
my ($self, $file, @modes) = @_; |
1225
|
|
|
|
|
|
|
$file = $1 if $file =~ m{^(.*)$}m; |
1226
|
|
|
|
|
|
|
my $perm = (stat $file)[2] & 07777; |
1227
|
|
|
|
|
|
|
for my $m (@modes) |
1228
|
|
|
|
|
|
|
{ |
1229
|
|
|
|
|
|
|
last if chmod($perm|$m, $file); # may or may not succeed. Harmless here. |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
} |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
=head2 run |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
$u->run([argv0, ...], @redir, ... { init => sub ..., in, out, err, watch, every, prog, ... }) |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
A general purpose fork-exec wrapper, based on IPC::Run. STDIN is closed, unless you specify |
1238
|
|
|
|
|
|
|
an C<< in => >> as described in IPC::Run. STDERR and STDOUT are both printed to |
1239
|
|
|
|
|
|
|
STDOUT, prefixed with 'E: ' and 'O: ' respectively, unless you specify C<< out => >>, |
1240
|
|
|
|
|
|
|
C<< err => >>, or C<< out_err => >> ... for both. |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
Using redirection operators in @redir takes precedence over the above in/out/err |
1243
|
|
|
|
|
|
|
redirections. See also L. If you use the options in/out/err, you should |
1244
|
|
|
|
|
|
|
restrict your redirection operators to the forms '<', '0<', '1>', '2>', or '>&' due |
1245
|
|
|
|
|
|
|
to limitations in the precedence logic. Piping via '|' is properly recognized, |
1246
|
|
|
|
|
|
|
but background execution '&' may confuse the precedence logic. |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
This C method is completly independent of the rest of File::Unpack. It works both |
1249
|
|
|
|
|
|
|
as a static function and as a method call. |
1250
|
|
|
|
|
|
|
It is used internally by C, but is exported to be of use elsewhere. |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
Init is run after construction of redirects. Calling chdir() in init thus has no |
1253
|
|
|
|
|
|
|
effect on redirects with relative paths. |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
Return value in scalar context is the first nonzero result code, if any. In list context |
1256
|
|
|
|
|
|
|
all return values are returned. |
1257
|
|
|
|
|
|
|
=cut |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
sub run |
1260
|
|
|
|
|
|
|
{ |
1261
|
|
|
|
|
|
|
shift if ref $_[0] ne 'ARRAY'; # toss $self object handle. |
1262
|
|
|
|
|
|
|
my (@cmd) = @_; |
1263
|
|
|
|
|
|
|
my $opt; |
1264
|
|
|
|
|
|
|
$opt = pop @cmd if ref $cmd[-1] eq 'HASH'; |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
my $cmdname = $cmd[0][0]; $cmdname =~ s{^.*/}{}; |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
# run the command with |
1269
|
|
|
|
|
|
|
# - STDIN closed, unless you specify an { in => ... } |
1270
|
|
|
|
|
|
|
# - STDERR and STDOUT printed prefixed with 'E: ', 'O: ' to STDOUT, |
1271
|
|
|
|
|
|
|
# unless you specify out =>, err =>, or out_err => ... for both. |
1272
|
|
|
|
|
|
|
$opt->{in} ||= \undef; |
1273
|
|
|
|
|
|
|
$opt->{out} ||= $opt->{out_err}; |
1274
|
|
|
|
|
|
|
$opt->{err} ||= $opt->{out_err}; |
1275
|
|
|
|
|
|
|
$opt->{out} ||= sub { print "O: ($cmdname) @_\n"; }; |
1276
|
|
|
|
|
|
|
$opt->{err} ||= sub { print "E: ($cmdname) @_\n"; }; |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
my $has_i_redir = 0; |
1279
|
|
|
|
|
|
|
my $has_o_redir = 0; |
1280
|
|
|
|
|
|
|
my $has_e_redir = 0; |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
## The ugly truth is, there might be multiple commands with pipes. |
1283
|
|
|
|
|
|
|
## We need to provide all of them with the proper redirects. |
1284
|
|
|
|
|
|
|
## A command that pipes somewhere else, has_o_redir outbound through the pipe. |
1285
|
|
|
|
|
|
|
## A command that is piped into, has_i_redir inbound from the pipe. |
1286
|
|
|
|
|
|
|
my @run = (); |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
for my $c (@cmd) |
1290
|
|
|
|
|
|
|
{ |
1291
|
|
|
|
|
|
|
if (ref $c) |
1292
|
|
|
|
|
|
|
{ |
1293
|
|
|
|
|
|
|
push @run, $c; |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
# put init early, so that it is run, before any IO redirects access relative paths. |
1296
|
|
|
|
|
|
|
push @run, init => $opt->{init} if $opt->{init}; |
1297
|
|
|
|
|
|
|
next; # don't look into argvs, but |
1298
|
|
|
|
|
|
|
} |
1299
|
|
|
|
|
|
|
# look only into redirection operators |
1300
|
|
|
|
|
|
|
$has_i_redir++ if $c =~ m{^0?<}; |
1301
|
|
|
|
|
|
|
$has_o_redir++ if $c =~ m{^1?>}; |
1302
|
|
|
|
|
|
|
$has_e_redir++ if $c =~ m{^(?:2>|>&$)}; |
1303
|
|
|
|
|
|
|
if ($c eq '|') |
1304
|
|
|
|
|
|
|
{ |
1305
|
|
|
|
|
|
|
push @run, '0<', $opt->{in} unless $has_i_redir; |
1306
|
|
|
|
|
|
|
$has_i_redir = 'piped'; |
1307
|
|
|
|
|
|
|
push @run, "2>", $opt->{err} unless $has_e_redir; |
1308
|
|
|
|
|
|
|
$has_e_redir = $has_o_redir = 0; |
1309
|
|
|
|
|
|
|
} |
1310
|
|
|
|
|
|
|
push @run, $c; # $1 if $c =~ m{^(.*)$}s; # brute force untaint |
1311
|
|
|
|
|
|
|
} |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
push @run, '0<', $opt->{in} unless $has_i_redir; |
1314
|
|
|
|
|
|
|
push @run, "1>", $opt->{out} unless $has_o_redir; |
1315
|
|
|
|
|
|
|
push @run, "2>", $opt->{err} unless $has_e_redir; |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
# die Dumper \@run if $cmd[0][0] eq '/usr/bin/rpm2cpio'; |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
my $t; |
1320
|
|
|
|
|
|
|
$t = IPC::Run::timer($opt->{every}-0.6) if $opt->{every}; |
1321
|
|
|
|
|
|
|
push @run, $t if $t; |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
$run[0][0] = $1 if $run[0][0] =~ m{^(.*)$}s; |
1324
|
|
|
|
|
|
|
push @run, debug => $opt->{debug} if $opt->{debug}; |
1325
|
|
|
|
|
|
|
my $h = eval { IPC::Run::start @run; }; |
1326
|
|
|
|
|
|
|
return wantarray ? (undef, $@) : undef unless $h; |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
while ($h->pumpable) |
1329
|
|
|
|
|
|
|
{ |
1330
|
|
|
|
|
|
|
# eval {} guards against 'process ended prematurely' errors. |
1331
|
|
|
|
|
|
|
# This happens on very fast commands, despite pumpable(). |
1332
|
|
|
|
|
|
|
eval { $h->pump }; |
1333
|
|
|
|
|
|
|
if ($t && $t->is_expired) |
1334
|
|
|
|
|
|
|
{ |
1335
|
|
|
|
|
|
|
$t->{has_fired}++; |
1336
|
|
|
|
|
|
|
$opt->{prog}->($h, $opt); |
1337
|
|
|
|
|
|
|
$t->start($opt->{every}); |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
} |
1340
|
|
|
|
|
|
|
$h->finish; |
1341
|
|
|
|
|
|
|
$opt->{finished} = 1; |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
## call it once more, to get the 100% printout, or somthing else... |
1344
|
|
|
|
|
|
|
$opt->{prog}->($h, $opt) if $t->{has_fired}; |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
return wantarray ? $h->full_results : $h->result; |
1347
|
|
|
|
|
|
|
} |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
=head2 fmt_run_shellcmd |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
File::Unpack::fmt_run_shellcmd( $m->{argvv} ) |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
Static function to pretty print the return value $m of method find_mime_helper(); |
1354
|
|
|
|
|
|
|
It formats a command array used with run() as a properly escaped shell command string. |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=cut |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
sub _my_shell_quote |
1359
|
|
|
|
|
|
|
{ |
1360
|
|
|
|
|
|
|
my @a = @_; |
1361
|
|
|
|
|
|
|
my $sub; |
1362
|
|
|
|
|
|
|
if (@a and defined $a[0]) |
1363
|
|
|
|
|
|
|
{ |
1364
|
|
|
|
|
|
|
$sub = '\\&_locate_tar' if $a[0] eq \&_locate_tar; |
1365
|
|
|
|
|
|
|
$sub = '\\&_locate_cpio_i' if $a[0] eq \&_locate_cpio_i; |
1366
|
|
|
|
|
|
|
} |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
if ($sub) |
1369
|
|
|
|
|
|
|
{ |
1370
|
|
|
|
|
|
|
shift @a; |
1371
|
|
|
|
|
|
|
return "$sub " . shell_quote(@a); |
1372
|
|
|
|
|
|
|
} |
1373
|
|
|
|
|
|
|
return shell_quote(@a); |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
sub fmt_run_shellcmd |
1377
|
|
|
|
|
|
|
{ |
1378
|
|
|
|
|
|
|
my @a = @_; |
1379
|
|
|
|
|
|
|
@a = @{$a[0]{argvv}} if ref $a[0] eq 'HASH'; |
1380
|
|
|
|
|
|
|
my @r = (); |
1381
|
|
|
|
|
|
|
for my $a (@a) |
1382
|
|
|
|
|
|
|
{ |
1383
|
|
|
|
|
|
|
push @r, ref($a) ? '('._my_shell_quote(@$a).')' : _my_shell_quote($a); |
1384
|
|
|
|
|
|
|
} |
1385
|
|
|
|
|
|
|
my $r = join ' ', @r; |
1386
|
|
|
|
|
|
|
$r =~ s{^\((.*)\)$}{$1} unless $#a; # parenthesis around a single cmd are unneeded. |
1387
|
|
|
|
|
|
|
return $r; |
1388
|
|
|
|
|
|
|
} |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
## not a method, officially. |
1391
|
|
|
|
|
|
|
# |
1392
|
|
|
|
|
|
|
## Chdir in and out of a jail is done here, as IPC::Run::run({init}->()) |
1393
|
|
|
|
|
|
|
## has bad timing for our purposes. |
1394
|
|
|
|
|
|
|
# |
1395
|
|
|
|
|
|
|
## fastjar extracts happily to ../../.. |
1396
|
|
|
|
|
|
|
## this happens in cups-1.2.1/scripting/java/cups.jar |
1397
|
|
|
|
|
|
|
# |
1398
|
|
|
|
|
|
|
## FIXME: |
1399
|
|
|
|
|
|
|
# "/tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4/scripting/java/cups.jar": |
1400
|
|
|
|
|
|
|
# {"cmd":"/usr/bin/unzip -P no_pw -q -o '%(src)s'", |
1401
|
|
|
|
|
|
|
# "unpacked":"/tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4/_Knw_"} |
1402
|
|
|
|
|
|
|
# Two issues: |
1403
|
|
|
|
|
|
|
# a) _run_mime_helper in /tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4 |
1404
|
|
|
|
|
|
|
# should be /tmp/xxxx/cups-1.2.4-11.5.1.el5/cups-1.2.4/scripting/java |
1405
|
|
|
|
|
|
|
# b) _Knw_ should never appear in the end result ... |
1406
|
|
|
|
|
|
|
# |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
sub _run_mime_helper |
1409
|
|
|
|
|
|
|
{ |
1410
|
|
|
|
|
|
|
my ($self, $h, @argv) = @_; |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
for my $i (0..$#argv) |
1413
|
|
|
|
|
|
|
{ |
1414
|
|
|
|
|
|
|
$argv[$i] = $1 if $argv[$i] =~ m{^(.*)$}s; # brute force untaint |
1415
|
|
|
|
|
|
|
} |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
my $destdir = $argv[2]; |
1418
|
|
|
|
|
|
|
my $dot_dot_safeguard = $self->{dot_dot_safeguard}||0; |
1419
|
|
|
|
|
|
|
$dot_dot_safeguard = 2 if $dot_dot_safeguard < 2; |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
my $jail_base = '/dev/null'; |
1422
|
|
|
|
|
|
|
my $jail = $jail_base; |
1423
|
|
|
|
|
|
|
unless ($self->{no_op}) |
1424
|
|
|
|
|
|
|
{ |
1425
|
|
|
|
|
|
|
mkpath($destdir); |
1426
|
|
|
|
|
|
|
$jail_base = File::Temp::tempdir($TMPDIR_TEMPL, DIR => $destdir); |
1427
|
|
|
|
|
|
|
$jail = $jail_base . ("/_" x $dot_dot_safeguard); |
1428
|
|
|
|
|
|
|
mkpath($jail); |
1429
|
|
|
|
|
|
|
} |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
my $args = |
1432
|
|
|
|
|
|
|
{ |
1433
|
|
|
|
|
|
|
src => $argv[0], # abs_path() - but not symlink resolved, so that the unpacker sees 'our' name |
1434
|
|
|
|
|
|
|
destfile => $argv[1], # filename() - a suggested name, simply based on src, in case the unpacker needs it. |
1435
|
|
|
|
|
|
|
destdir => $jail, # abs_path() - for now... |
1436
|
|
|
|
|
|
|
mime => $argv[3], |
1437
|
|
|
|
|
|
|
descr => $argv[4], # mime_descr |
1438
|
|
|
|
|
|
|
configdir => $argv[5] # abs_path() |
1439
|
|
|
|
|
|
|
}; |
1440
|
|
|
|
|
|
|
$args->{lsrc} = Cwd::realpath($args->{src}); # symlinks resolved; use this with a stupid unpacker like 'upx' |
1441
|
|
|
|
|
|
|
die "src must be an abs_path." unless $args->{src} =~ m{^/}; |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
my @cmd; |
1444
|
|
|
|
|
|
|
for my $a (@{$h->{argvv}}) |
1445
|
|
|
|
|
|
|
{ |
1446
|
|
|
|
|
|
|
if (ref $a) |
1447
|
|
|
|
|
|
|
{ |
1448
|
|
|
|
|
|
|
my @c = (); |
1449
|
|
|
|
|
|
|
for my $b (@$a) |
1450
|
|
|
|
|
|
|
{ |
1451
|
|
|
|
|
|
|
push @c, _subst_args($b, $args); |
1452
|
|
|
|
|
|
|
} |
1453
|
|
|
|
|
|
|
push @cmd, [@c]; |
1454
|
|
|
|
|
|
|
} |
1455
|
|
|
|
|
|
|
else |
1456
|
|
|
|
|
|
|
{ |
1457
|
|
|
|
|
|
|
push @cmd, _subst_args($a, $args); |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
} |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
if ($self->{no_op}) |
1462
|
|
|
|
|
|
|
{ |
1463
|
|
|
|
|
|
|
print fmt_run_shellcmd(@cmd) . "\n"; |
1464
|
|
|
|
|
|
|
return undef; |
1465
|
|
|
|
|
|
|
} |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
print STDERR "_run_mime_helper in $destdir: " . fmt_run_shellcmd(@cmd) . "\n" if $self->{verbose} > 1; |
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
my $cwd = getcwd() or carp "cannot fetch initial working directory, getcwd: $!"; |
1470
|
|
|
|
|
|
|
$cwd = $1 if $cwd =~ m{^(.*)$}s; # brute force untaint. Whereever you go, there you are. |
1471
|
|
|
|
|
|
|
chdir $jail or die "chdir '$jail'"; |
1472
|
|
|
|
|
|
|
chmod 0, $jail_base if $self->{jail_chmod0}; |
1473
|
|
|
|
|
|
|
# Now have fully initialzed in the parent before forking. |
1474
|
|
|
|
|
|
|
# This is needed, as all redirect operators are executed in the parent before forking. |
1475
|
|
|
|
|
|
|
# init => sub { ... } is no longer needed. sigh, I really wanted to the init sub for the chdir. |
1476
|
|
|
|
|
|
|
# But hey, mkpath() and rmtree() change the cwd so often, and restore it, so why shouldn't we? |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
my $run_error = undef; # we capture the first error line for the logfile. |
1480
|
|
|
|
|
|
|
my @r = $self->run(@cmd, |
1481
|
|
|
|
|
|
|
{ |
1482
|
|
|
|
|
|
|
debug => ($self->{verbose} > 2) ? $self->{verbose} - 2 : 0, |
1483
|
|
|
|
|
|
|
watch => $args->{src}, every => 5, fu_obj => $self, mime_helper => $h, |
1484
|
|
|
|
|
|
|
err => sub { print "E: @_\n" if $self->{verbose}; $run_error = "@_" unless length $run_error }, |
1485
|
|
|
|
|
|
|
prog => sub |
1486
|
|
|
|
|
|
|
{ |
1487
|
|
|
|
|
|
|
$_[1]{tick}++; |
1488
|
|
|
|
|
|
|
my $name = $_[1]{watch}; $name =~ s{.*/}{}; |
1489
|
|
|
|
|
|
|
if ($_[1]{finished}) |
1490
|
|
|
|
|
|
|
{ |
1491
|
|
|
|
|
|
|
printf "T: %s (%s, done)\n", $name, _unit_bytes(-s $_[1]{watch},1) |
1492
|
|
|
|
|
|
|
if $self->{verbose}; |
1493
|
|
|
|
|
|
|
} |
1494
|
|
|
|
|
|
|
elsif (my $p = _children_fuser($_[1]{watch}, POSIX::getpid())) |
1495
|
|
|
|
|
|
|
{ |
1496
|
|
|
|
|
|
|
_fuser_offset($p); |
1497
|
|
|
|
|
|
|
# we may get muliple process with multiple filedescriptors. |
1498
|
|
|
|
|
|
|
# select the one that moves fastest. |
1499
|
|
|
|
|
|
|
my $largest_diff = -1; |
1500
|
|
|
|
|
|
|
for my $pid (keys %$p) |
1501
|
|
|
|
|
|
|
{ |
1502
|
|
|
|
|
|
|
for my $fd (keys %{$p->{$pid}{fd}}) |
1503
|
|
|
|
|
|
|
{ |
1504
|
|
|
|
|
|
|
my $diff = ($p->{$pid}{fd}{$fd}{pos}||0) - ($_[1]{fuser}{$pid}{fd}{$fd}{pos}||0); |
1505
|
|
|
|
|
|
|
if ($diff > $largest_diff) |
1506
|
|
|
|
|
|
|
{ |
1507
|
|
|
|
|
|
|
$largest_diff = $diff; |
1508
|
|
|
|
|
|
|
$p->{fastest_fd} = $p->{$pid}{fd}{$fd}; |
1509
|
|
|
|
|
|
|
} |
1510
|
|
|
|
|
|
|
} |
1511
|
|
|
|
|
|
|
} |
1512
|
|
|
|
|
|
|
# Stick with the one we had before, if none moves. |
1513
|
|
|
|
|
|
|
$p->{fastest_fd} = $_[1]{fuser}{fastest_fd} if $largest_diff <= 0; |
1514
|
|
|
|
|
|
|
$_[1]{fuser} = $p; |
1515
|
|
|
|
|
|
|
my $off = $p->{fastest_fd}{pos}||0; |
1516
|
|
|
|
|
|
|
my $tot = $p->{fastest_fd}{size}||(-s $_[1]{watch})||1; |
1517
|
|
|
|
|
|
|
printf "T: %s (%s, %.1f%%)\n", $name, _unit_bytes($off,1), ($off*100)/$tot |
1518
|
|
|
|
|
|
|
if $self->{verbose}; |
1519
|
|
|
|
|
|
|
} |
1520
|
|
|
|
|
|
|
else |
1521
|
|
|
|
|
|
|
{ |
1522
|
|
|
|
|
|
|
print "T: $name tick_tick $_[1]{tick}\n" |
1523
|
|
|
|
|
|
|
if $self->{verbose}; |
1524
|
|
|
|
|
|
|
} |
1525
|
|
|
|
|
|
|
}, |
1526
|
|
|
|
|
|
|
}); |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
# system("ls -la $jail_base/..; find $jail_base"); |
1529
|
|
|
|
|
|
|
# print STDERR Dumper \@r; |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
chmod 0700, $jail_base if $self->{jail_chmod0}; |
1532
|
|
|
|
|
|
|
chdir $cwd or die "cannot chdir back to cwd: chdir($cwd): $!"; |
1533
|
|
|
|
|
|
|
my @nonzero = grep { $_ } @r; |
1534
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
# TODO: handle failure |
1536
|
|
|
|
|
|
|
# - remove all, |
1537
|
|
|
|
|
|
|
# - retry with a fallback helper , if any. |
1538
|
|
|
|
|
|
|
printf STDERR "Non-Zero return value: $nonzero[0]: %s\n", fmt_run_shellcmd(@cmd) |
1539
|
|
|
|
|
|
|
if $nonzero[0] and $self->{verbose}; |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
# FIXME: fallback helper not implemented |
1542
|
|
|
|
|
|
|
# t/data/pdftxt-a.txt is really plain/text altthough it begins with "PDF-1.4..." and |
1543
|
|
|
|
|
|
|
# thus fools the mime-type tests. |
1544
|
|
|
|
|
|
|
# should run other helpers, and finally 'strings -' as a trivial fallback. |
1545
|
|
|
|
|
|
|
if ($nonzero[0]) |
1546
|
|
|
|
|
|
|
{ |
1547
|
|
|
|
|
|
|
rmtree($jail_base); # empty or has unusable contents now. |
1548
|
|
|
|
|
|
|
## FIXME: we should at least copy in the original file as is... |
1549
|
|
|
|
|
|
|
return { error => "nonzero retval:\n " . Dumper(\@r), stderr => $run_error }; |
1550
|
|
|
|
|
|
|
} |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
# loop through all _: if it only contains one item , replace it with this item, |
1553
|
|
|
|
|
|
|
# be it a file or dir. This uses $jail_tmp, an unused pathname. |
1554
|
|
|
|
|
|
|
my $jail_tmp = File::Temp::tempdir($TMPDIR_TEMPL, DIR => $destdir); |
1555
|
|
|
|
|
|
|
rmdir $jail_tmp; |
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
# if only one file in $jail, move it up, and return |
1558
|
|
|
|
|
|
|
# the filename instead of the dirname here. |
1559
|
|
|
|
|
|
|
# (We don't search for $args->{destfile}, it is the unpackers choice to use it or not.) |
1560
|
|
|
|
|
|
|
my $wanted_name; |
1561
|
|
|
|
|
|
|
for (my $i = 0; $i <= $dot_dot_safeguard; $i++) |
1562
|
|
|
|
|
|
|
{ |
1563
|
|
|
|
|
|
|
opendir DIR, $jail_base or last; |
1564
|
|
|
|
|
|
|
my @found = grep { $_ ne '.' and $_ ne '..' } readdir DIR; |
1565
|
|
|
|
|
|
|
closedir DIR; |
1566
|
|
|
|
|
|
|
my $found0; |
1567
|
|
|
|
|
|
|
$found0 = $1 if defined($found[0]) and $found[0] =~ m{^(.*)$}s; # brute force untaint |
1568
|
|
|
|
|
|
|
print STDERR "dot_dot_safeguard=$dot_dot_safeguard, i=$i, found=$found0\n" if $self->{verbose} > 2; |
1569
|
|
|
|
|
|
|
unless (@found) |
1570
|
|
|
|
|
|
|
{ |
1571
|
|
|
|
|
|
|
rmdir $jail_base; |
1572
|
|
|
|
|
|
|
my $name; |
1573
|
|
|
|
|
|
|
$name = $1 if $args->{src} =~ m{/([^/]+)$}; |
1574
|
|
|
|
|
|
|
print STDERR "oops(i=$i): nothing unpacked?? Adding $name as is.\n" if $self->{verbose}; |
1575
|
|
|
|
|
|
|
return { error => "nothing unpacked" }; |
1576
|
|
|
|
|
|
|
} |
1577
|
|
|
|
|
|
|
last if scalar @found != 1; |
1578
|
|
|
|
|
|
|
$wanted_name = $found0 if $i == $dot_dot_safeguard; |
1579
|
|
|
|
|
|
|
last unless -d $jail_base . "/" . $found0; |
1580
|
|
|
|
|
|
|
# assert writable dirs. needed for ksh/ast-base.2012-08-01.tar.bz2/src/cmd/pax/data/ro.dat |
1581
|
|
|
|
|
|
|
chmod(0755, $jail_base . "/" . $found0); |
1582
|
|
|
|
|
|
|
rename $jail_base, $jail_tmp or die("4:$i rename($jail_base, $jail_tmp) failed: $!"); |
1583
|
|
|
|
|
|
|
rename $jail_tmp . "/" . $found0, $jail_base or die("5:$i rename($jail_tmp .'/'. $found0, $jail_base) failed: $!"); |
1584
|
|
|
|
|
|
|
rmdir $jail_tmp or last; |
1585
|
|
|
|
|
|
|
} |
1586
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
## this message is broken. |
1588
|
|
|
|
|
|
|
# print STDERR "Hmmm, unpacker did not use destname: $args->{destfile}\n" if $self->{verbose} and !defined $wanted_name; |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
# say nothing, if $args->{destfile} is equal to or a prefix of $wanted_name. |
1591
|
|
|
|
|
|
|
print STDERR "Hmmm, unpacker saw destname: $args->{destfile}, but used destname: $wanted_name\n" |
1592
|
|
|
|
|
|
|
if $self->{verbose} > 1 and defined($wanted_name) and $wanted_name !~ m{^\Q$args->{destfile}}; |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
$wanted_name = $args->{destfile} unless defined $wanted_name; |
1595
|
|
|
|
|
|
|
my $wanted_path; |
1596
|
|
|
|
|
|
|
$wanted_path = _unused_pathname($destdir, $wanted_name) if defined $wanted_name; |
1597
|
|
|
|
|
|
|
my $unpacked = $jail_base; |
1598
|
|
|
|
|
|
|
if (defined($wanted_name) and !-e $wanted_path) |
1599
|
|
|
|
|
|
|
{ |
1600
|
|
|
|
|
|
|
if (-d $jail_base) |
1601
|
|
|
|
|
|
|
{ |
1602
|
|
|
|
|
|
|
## find out, if the unpacker created exactly one file or one directory, |
1603
|
|
|
|
|
|
|
## in this case we can move one level further. |
1604
|
|
|
|
|
|
|
opendir DIR, $jail_base; |
1605
|
|
|
|
|
|
|
my @found = grep { $_ ne '.' and $_ ne '..' } readdir DIR; |
1606
|
|
|
|
|
|
|
closedir DIR; |
1607
|
|
|
|
|
|
|
my $found0; |
1608
|
|
|
|
|
|
|
$found0 = $1 if defined($found[0]) and $found[0] =~ m{^(.*)$}s; # brute force untaint |
1609
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
if ($#found == 0 and $found0 eq $wanted_name) |
1611
|
|
|
|
|
|
|
{ |
1612
|
|
|
|
|
|
|
rename "$jail_base/$found0", $wanted_path or die "1 rename($jail_base/$found0, $wanted_path) failed: $!"; |
1613
|
|
|
|
|
|
|
rmdir $jail_base; |
1614
|
|
|
|
|
|
|
} |
1615
|
|
|
|
|
|
|
else |
1616
|
|
|
|
|
|
|
{ |
1617
|
|
|
|
|
|
|
rename $jail_base, $wanted_path or die "2 rename($jail_base, $wanted_path) failed: $!"; |
1618
|
|
|
|
|
|
|
} |
1619
|
|
|
|
|
|
|
} |
1620
|
|
|
|
|
|
|
else |
1621
|
|
|
|
|
|
|
{ |
1622
|
|
|
|
|
|
|
rename $jail_base, $wanted_path or die "3 rename($jail_base, $wanted_path) failed: $!"; |
1623
|
|
|
|
|
|
|
} |
1624
|
|
|
|
|
|
|
$unpacked = $wanted_path; |
1625
|
|
|
|
|
|
|
} |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
# catch some patholigical cases. |
1628
|
|
|
|
|
|
|
if (-f $unpacked and !-l $unpacked) |
1629
|
|
|
|
|
|
|
{ |
1630
|
|
|
|
|
|
|
if (!-s $unpacked) |
1631
|
|
|
|
|
|
|
{ |
1632
|
|
|
|
|
|
|
print STDERR "Ooops, only one empty file -> symlink back\n" if $self->{verbose}; |
1633
|
|
|
|
|
|
|
unlink $unpacked; |
1634
|
|
|
|
|
|
|
symlink $args->{src}, $unpacked; |
1635
|
|
|
|
|
|
|
} |
1636
|
|
|
|
|
|
|
elsif (-s $unpacked eq (my $s = -s $args->{src})) |
1637
|
|
|
|
|
|
|
{ |
1638
|
|
|
|
|
|
|
print STDERR "Hmm, same size ($s bytes) after unpacking???\n" if $self->{verbose}; |
1639
|
|
|
|
|
|
|
## xz -dc -f behaves like cat, if called on an unknown file. |
1640
|
|
|
|
|
|
|
## Compare the files. If they are identical, stop this: |
1641
|
|
|
|
|
|
|
if (File::Compare::cmp($args->{src}, $unpacked) == 0) |
1642
|
|
|
|
|
|
|
{ |
1643
|
|
|
|
|
|
|
print STDERR "Oops, identical -> symlink back\n" if $self->{verbose}; |
1644
|
|
|
|
|
|
|
unlink $unpacked; |
1645
|
|
|
|
|
|
|
symlink $args->{src}, $unpacked; |
1646
|
|
|
|
|
|
|
} |
1647
|
|
|
|
|
|
|
} |
1648
|
|
|
|
|
|
|
} |
1649
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
my $diag = undef; |
1651
|
|
|
|
|
|
|
$diag->{stderr} = $run_error if defined $run_error; |
1652
|
|
|
|
|
|
|
return ($unpacked, $diag); |
1653
|
|
|
|
|
|
|
} |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
sub _unused_pathname |
1656
|
|
|
|
|
|
|
{ |
1657
|
|
|
|
|
|
|
my ($destdir, $wanted_name) = @_; |
1658
|
|
|
|
|
|
|
my $wanted_path = $destdir . "/" . $wanted_name; |
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
if (-e $wanted_path) |
1661
|
|
|
|
|
|
|
{ |
1662
|
|
|
|
|
|
|
## try to come up with a very similar name, just different suffix. |
1663
|
|
|
|
|
|
|
## be compatible with path name shortening in unpack() |
1664
|
|
|
|
|
|
|
my $test_path = $wanted_path . '._'; |
1665
|
|
|
|
|
|
|
for my $i ('', 1..999) |
1666
|
|
|
|
|
|
|
{ |
1667
|
|
|
|
|
|
|
# All our mime detectors work on file contents, rather than on suffixes. |
1668
|
|
|
|
|
|
|
# Thus messing with the suffix should be okay here. |
1669
|
|
|
|
|
|
|
unless (-e $test_path.$i) |
1670
|
|
|
|
|
|
|
{ |
1671
|
|
|
|
|
|
|
$wanted_path = $test_path.$i; |
1672
|
|
|
|
|
|
|
last; |
1673
|
|
|
|
|
|
|
} |
1674
|
|
|
|
|
|
|
} |
1675
|
|
|
|
|
|
|
} |
1676
|
|
|
|
|
|
|
die "_unused_pathname failed: last attempt $wanted_path\n" if -e $wanted_path; |
1677
|
|
|
|
|
|
|
return $wanted_path; |
1678
|
|
|
|
|
|
|
} |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
sub _children_fuser |
1682
|
|
|
|
|
|
|
{ |
1683
|
|
|
|
|
|
|
my ($file, $ppid) = @_; |
1684
|
|
|
|
|
|
|
$ppid ||= 1; |
1685
|
|
|
|
|
|
|
$file = Cwd::abs_path($file); |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
opendir DIR, "/proc" or die "opendir /proc failed: $!\n"; |
1688
|
|
|
|
|
|
|
my %p = map { $_ => {} } grep { /^\d+$/ } readdir DIR; |
1689
|
|
|
|
|
|
|
closedir DIR; |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
# get all procs, and their parent pids |
1692
|
|
|
|
|
|
|
for my $p (keys %p) |
1693
|
|
|
|
|
|
|
{ |
1694
|
|
|
|
|
|
|
if (open IN, "<", "/proc/$p/stat") |
1695
|
|
|
|
|
|
|
{ |
1696
|
|
|
|
|
|
|
# don't care if open fails. the process may have exited. |
1697
|
|
|
|
|
|
|
my $text = join '', ; |
1698
|
|
|
|
|
|
|
close IN; |
1699
|
|
|
|
|
|
|
if ($text =~ m{\((.*)\)\s+(\w)\s+(\d+)}s) |
1700
|
|
|
|
|
|
|
{ |
1701
|
|
|
|
|
|
|
$p{$p}{cmd} = $1; |
1702
|
|
|
|
|
|
|
$p{$p}{state} = $2; |
1703
|
|
|
|
|
|
|
$p{$p}{ppid} = $3; |
1704
|
|
|
|
|
|
|
} |
1705
|
|
|
|
|
|
|
} |
1706
|
|
|
|
|
|
|
} |
1707
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
# Weed out those who are not in our family |
1709
|
|
|
|
|
|
|
if ($ppid > 1) |
1710
|
|
|
|
|
|
|
{ |
1711
|
|
|
|
|
|
|
for my $p (keys %p) |
1712
|
|
|
|
|
|
|
{ |
1713
|
|
|
|
|
|
|
my $family = 0; |
1714
|
|
|
|
|
|
|
my $pid = $p; |
1715
|
|
|
|
|
|
|
while ($pid) |
1716
|
|
|
|
|
|
|
{ |
1717
|
|
|
|
|
|
|
# Those that have ppid==1 may also belong to our family. |
1718
|
|
|
|
|
|
|
# We never know. |
1719
|
|
|
|
|
|
|
if ($pid == $ppid or $pid == 1) |
1720
|
|
|
|
|
|
|
{ |
1721
|
|
|
|
|
|
|
$family = 1; |
1722
|
|
|
|
|
|
|
last; |
1723
|
|
|
|
|
|
|
} |
1724
|
|
|
|
|
|
|
last unless $p{$pid}; |
1725
|
|
|
|
|
|
|
$pid = $p{$pid}{ppid}; |
1726
|
|
|
|
|
|
|
} |
1727
|
|
|
|
|
|
|
delete $p{$p} unless $family; |
1728
|
|
|
|
|
|
|
} |
1729
|
|
|
|
|
|
|
} |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
my %o; # matching open files are recorded here |
1732
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
# see what files they have open |
1734
|
|
|
|
|
|
|
for my $p (keys %p) |
1735
|
|
|
|
|
|
|
{ |
1736
|
|
|
|
|
|
|
if (opendir DIR, "/proc/$p/fd") |
1737
|
|
|
|
|
|
|
{ |
1738
|
|
|
|
|
|
|
my @l = grep { /^\d+$/ } readdir DIR; |
1739
|
|
|
|
|
|
|
closedir DIR; |
1740
|
|
|
|
|
|
|
for my $l (@l) |
1741
|
|
|
|
|
|
|
{ |
1742
|
|
|
|
|
|
|
my $r = readlink("/proc/$p/fd/$l"); |
1743
|
|
|
|
|
|
|
next unless defined $r; |
1744
|
|
|
|
|
|
|
# warn "$p, $l, $r\n"; |
1745
|
|
|
|
|
|
|
if ($r eq $file) |
1746
|
|
|
|
|
|
|
{ |
1747
|
|
|
|
|
|
|
$o{$p}{cmd} ||= $p{$p}{cmd}; |
1748
|
|
|
|
|
|
|
$o{$p}{fd}{$l} = { file => $file }; |
1749
|
|
|
|
|
|
|
} |
1750
|
|
|
|
|
|
|
} |
1751
|
|
|
|
|
|
|
} |
1752
|
|
|
|
|
|
|
} |
1753
|
|
|
|
|
|
|
return \%o; |
1754
|
|
|
|
|
|
|
} |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
# see if we can read the file offset of a file descriptor, and the size of its file. |
1757
|
|
|
|
|
|
|
sub _fuser_offset |
1758
|
|
|
|
|
|
|
{ |
1759
|
|
|
|
|
|
|
my ($p) = @_; |
1760
|
|
|
|
|
|
|
for my $pid (keys %$p) |
1761
|
|
|
|
|
|
|
{ |
1762
|
|
|
|
|
|
|
for my $fd (keys %{$p->{$pid}{fd}}) |
1763
|
|
|
|
|
|
|
{ |
1764
|
|
|
|
|
|
|
if (open IN, "/proc/$pid/fdinfo/$fd") |
1765
|
|
|
|
|
|
|
{ |
1766
|
|
|
|
|
|
|
while (defined (my $line = )) |
1767
|
|
|
|
|
|
|
{ |
1768
|
|
|
|
|
|
|
chomp $line; |
1769
|
|
|
|
|
|
|
$p->{$pid}{fd}{$fd}{$1} = $2 if $line =~ m{^(\w+):\s+(.*)\b}; |
1770
|
|
|
|
|
|
|
} |
1771
|
|
|
|
|
|
|
} |
1772
|
|
|
|
|
|
|
close IN; |
1773
|
|
|
|
|
|
|
$p->{$pid}{fd}{$fd}{size} = -s $p->{$pid}{fd}{$fd}{file}; |
1774
|
|
|
|
|
|
|
} |
1775
|
|
|
|
|
|
|
} |
1776
|
|
|
|
|
|
|
} |
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
sub _prep_configdir |
1780
|
|
|
|
|
|
|
{ |
1781
|
|
|
|
|
|
|
my ($self) = @_; |
1782
|
|
|
|
|
|
|
my $dir = "/tmp/file_unpack_$$/"; |
1783
|
|
|
|
|
|
|
mkpath($dir); |
1784
|
|
|
|
|
|
|
my $j = $self->{json}->allow_nonref(); |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
open my $SH, ">", "$dir/config.sh"; |
1787
|
|
|
|
|
|
|
open my $JS, ">", "$dir/config.js"; |
1788
|
|
|
|
|
|
|
|
1789
|
|
|
|
|
|
|
print $JS "{\n"; |
1790
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
for my $group ('', 'minfree', 'exclude') |
1792
|
|
|
|
|
|
|
{ |
1793
|
|
|
|
|
|
|
my $h_ref = ($group eq '') ? $self : $self->{$group}; |
1794
|
|
|
|
|
|
|
for my $k (sort keys %$h_ref) |
1795
|
|
|
|
|
|
|
{ |
1796
|
|
|
|
|
|
|
my $val = $h_ref->{$k}; |
1797
|
|
|
|
|
|
|
next if $k eq 'recursion_level'; |
1798
|
|
|
|
|
|
|
next if ref $val; # we only take scalars. |
1799
|
|
|
|
|
|
|
my $name = ($group eq '') ? $k : "${group}_$k"; |
1800
|
|
|
|
|
|
|
printf $SH "%s=%s\n", shell_quote(uc "fu_$name"), shell_quote($val); |
1801
|
|
|
|
|
|
|
printf $JS "%s:%s,\n", $j->encode($name), $j->encode($val); |
1802
|
|
|
|
|
|
|
} |
1803
|
|
|
|
|
|
|
} |
1804
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
print $SH "FU_VERSION=$VERSION\n"; |
1806
|
|
|
|
|
|
|
print $JS qq["fu_version":"$VERSION"\n}\n]; |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
close $SH; |
1809
|
|
|
|
|
|
|
close $JS; |
1810
|
|
|
|
|
|
|
return $dir; |
1811
|
|
|
|
|
|
|
} |
1812
|
|
|
|
|
|
|
|
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
=head2 mime_helper_dir mime_helper |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
$u->mime_helper_dir($dir, ...) |
1817
|
|
|
|
|
|
|
$u->mime_helper($mime_name, $suffix_regexp, \@argv, @redir, ...) |
1818
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
Registers one or more directories where external MIME helper programs are found. |
1820
|
|
|
|
|
|
|
Helpers plugins are shellscripts that server as specialized MIME type handlers for unpacking. |
1821
|
|
|
|
|
|
|
A list of helpers comes builtin interfacing most well-known archivers. This list can be appended to using the mime_helper_dir() or mime_helper() methods. |
1822
|
|
|
|
|
|
|
Multiple directories can be registered, They are searched in reverse order, i.e. |
1823
|
|
|
|
|
|
|
last added takes precedence. Any external MIME helper takes precedence over built-in code. |
1824
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
The suffix_regexp is used to derive the destination name from the source name. |
1826
|
|
|
|
|
|
|
It is not used for selecting helpers. |
1827
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
When collecting external helper scripts via C, there is no C. Instead, |
1829
|
|
|
|
|
|
|
external helper scripts can explicitly create a toplevel directory with the desired name. |
1830
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
Helpers are mapped to MIME types by their mime_name. The name can be constructed |
1832
|
|
|
|
|
|
|
from the MIME type by replacing the '/' with a '=' character, and by using the |
1833
|
|
|
|
|
|
|
word 'ANY' as a wildcard component. The '=' character is interpreted as an |
1834
|
|
|
|
|
|
|
implicit '=ANY+' if needed. |
1835
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
Examples: |
1837
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
Mimetype helper names tried from top to bottom |
1839
|
|
|
|
|
|
|
----------------------------------------------------------------- |
1840
|
|
|
|
|
|
|
image/png image=png |
1841
|
|
|
|
|
|
|
image=ANY |
1842
|
|
|
|
|
|
|
image |
1843
|
|
|
|
|
|
|
ANY=png |
1844
|
|
|
|
|
|
|
ANY=ANY |
1845
|
|
|
|
|
|
|
ANY |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
application/vnd.oasis+zip application=vnd.oasis+zip |
1848
|
|
|
|
|
|
|
application=ANY+zip |
1849
|
|
|
|
|
|
|
application=ANYzip |
1850
|
|
|
|
|
|
|
application=zip |
1851
|
|
|
|
|
|
|
application=ANY |
1852
|
|
|
|
|
|
|
... |
1853
|
|
|
|
|
|
|
|
1854
|
|
|
|
|
|
|
A trailing '=ANY' is implicit, as shown by these examples. |
1855
|
|
|
|
|
|
|
The rules for precedence are this: |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
=over |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
=item * |
1860
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
Search in the latest directory is exhaused first, then the previously added directory is considered in turn, |
1862
|
|
|
|
|
|
|
up to all directories have been traversed, or until a matching helper is found. |
1863
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
=item * |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
A matching name with wildcards has lower precedence than a matching name without. |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
=item * |
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
A wildcard before the '=' sign lowers precedence more than one after it. |
1871
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
=back |
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
The mapping takes place when C is called. Adding helper scripts to a directory |
1875
|
|
|
|
|
|
|
afterwards has no effect. C does not do any implicit expansions. Call it |
1876
|
|
|
|
|
|
|
multiple times with the same helper command and different names if needed. |
1877
|
|
|
|
|
|
|
The default argument list is "%(src)s %(destfile)s %(destdir)s %(mime)s %(descr)s %(configdir)s" -- |
1878
|
|
|
|
|
|
|
this is applied, if no args are given and no redirections are given. See also C for more semantics and how a helper should behave. |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
Both methods return an ARRAY-ref of HASHes describing all known (old and newly added) mime helpers. |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
=cut |
1883
|
|
|
|
|
|
|
my @def_mime_helper_fmt = qw(%(src)s %(destfile)s %(destdir)s %(mime)s %(descr)s %(configdir)s); |
1884
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
sub _subst_args |
1886
|
|
|
|
|
|
|
{ |
1887
|
|
|
|
|
|
|
my $f = Text::Sprintf::Named->new({fmt => $_[0]}); |
1888
|
|
|
|
|
|
|
return $f->format({args => $_[1]}); |
1889
|
|
|
|
|
|
|
} |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
sub mime_helper |
1892
|
|
|
|
|
|
|
{ |
1893
|
|
|
|
|
|
|
my ($self, $name, $suffix_re, @args) = @_; |
1894
|
|
|
|
|
|
|
@args = ($name) unless @args; |
1895
|
|
|
|
|
|
|
@args = ([@args]) unless ref $args[0]; |
1896
|
|
|
|
|
|
|
push @{$args[0]}, @def_mime_helper_fmt unless $#{$args[0]} or defined $args[1]; |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
# cut away the path prefix from name. And use / instead of = in the mime name. |
1899
|
|
|
|
|
|
|
$name =~ s{(.*/)?(.*?)=(.*?)$}{$2=$3}; |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
unless ($name =~ m{[/=]}) |
1902
|
|
|
|
|
|
|
{ |
1903
|
|
|
|
|
|
|
print STDERR "mime_helper '$name' needs a '=' or '/'.\n" if $self->{verbose}; |
1904
|
|
|
|
|
|
|
return $self->{mime_helper}; |
1905
|
|
|
|
|
|
|
} |
1906
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
my $pat = "^\Q$name\E\$"; |
1908
|
|
|
|
|
|
|
$pat =~ s{\\=}{/(?:x-|ANY\\+)?}; |
1909
|
|
|
|
|
|
|
$pat =~ s{\\%}{ANY}g; |
1910
|
|
|
|
|
|
|
$pat =~ s{^\^ANY}{}; |
1911
|
|
|
|
|
|
|
$pat =~ s{ANY\$$}{}; |
1912
|
|
|
|
|
|
|
$pat =~ s{ANY}{\\b\[\^\/\]+\\b}g; |
1913
|
|
|
|
|
|
|
unshift @{$self->{mime_helper}}, |
1914
|
|
|
|
|
|
|
{ |
1915
|
|
|
|
|
|
|
name => $name, pat => $pat, suffix_re => $suffix_re, |
1916
|
|
|
|
|
|
|
fmt_p => fmt_run_shellcmd(@args), argvv => \@args |
1917
|
|
|
|
|
|
|
}; |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
delete $self->{mime_orcish}; # to be rebuilt in find_mime_helper() |
1920
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
return $self->{mime_helper}; |
1922
|
|
|
|
|
|
|
} |
1923
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
=head2 list |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
Returns an ARRAY of preformatted patterns and MIME helpers. |
1927
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
Example: |
1929
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
printf @$_ for $u->list(); |
1931
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
=cut |
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
sub list |
1935
|
|
|
|
|
|
|
{ |
1936
|
|
|
|
|
|
|
my ($self) = @_; |
1937
|
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
my $width = 10; |
1939
|
|
|
|
|
|
|
for my $m (@{$self->{mime_helper}}) |
1940
|
|
|
|
|
|
|
{ |
1941
|
|
|
|
|
|
|
$width = length($m->{pat}) if length($m->{pat}) > $width; |
1942
|
|
|
|
|
|
|
} |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
my @r; |
1945
|
|
|
|
|
|
|
for my $m (@{$self->{mime_helper}}) |
1946
|
|
|
|
|
|
|
{ |
1947
|
|
|
|
|
|
|
push @r, [ "%-${width}s %s\n", $m->{pat}, $m->{fmt_p} ]; |
1948
|
|
|
|
|
|
|
} |
1949
|
|
|
|
|
|
|
return @r; |
1950
|
|
|
|
|
|
|
} |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
sub mime_helper_dir |
1953
|
|
|
|
|
|
|
{ |
1954
|
|
|
|
|
|
|
my ($self, @dirs) = @_; |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
for my $d (@dirs) |
1957
|
|
|
|
|
|
|
{ |
1958
|
|
|
|
|
|
|
my %h; |
1959
|
|
|
|
|
|
|
if (opendir DIR, $d) |
1960
|
|
|
|
|
|
|
{ |
1961
|
|
|
|
|
|
|
%h = map { $_ => { a => "$d/$_" } } grep { -f "$d/$_" } readdir DIR; |
1962
|
|
|
|
|
|
|
closedir DIR; |
1963
|
|
|
|
|
|
|
} |
1964
|
|
|
|
|
|
|
else |
1965
|
|
|
|
|
|
|
{ |
1966
|
|
|
|
|
|
|
carp "Cannot opendir $d: $!, skipped\n"; |
1967
|
|
|
|
|
|
|
} |
1968
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
# add =ANY suffix, if missing |
1970
|
|
|
|
|
|
|
for my $h (keys %h) |
1971
|
|
|
|
|
|
|
{ |
1972
|
|
|
|
|
|
|
if ($h !~ m{[/=]}) |
1973
|
|
|
|
|
|
|
{ |
1974
|
|
|
|
|
|
|
my $h2 = $h . "=ANY"; |
1975
|
|
|
|
|
|
|
$h{$h2} = { %{$h{$h}} } unless defined $h{$h2}; |
1976
|
|
|
|
|
|
|
} |
1977
|
|
|
|
|
|
|
} |
1978
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
# not needed, this is implicit in mime_helper()/$pat |
1980
|
|
|
|
|
|
|
# |
1981
|
|
|
|
|
|
|
# # add expansion of = to =ANY+, if missing |
1982
|
|
|
|
|
|
|
# for my $h (keys %h) |
1983
|
|
|
|
|
|
|
# { |
1984
|
|
|
|
|
|
|
# next if $h =~ m{=ANY+}; |
1985
|
|
|
|
|
|
|
# my $h2 = $h; $h2 =~ s{=}{=ANY+}; |
1986
|
|
|
|
|
|
|
# $h{$h2} = $h{$h} unless defined $h{$h2}; |
1987
|
|
|
|
|
|
|
# } |
1988
|
|
|
|
|
|
|
|
1989
|
|
|
|
|
|
|
# calculate priorities |
1990
|
|
|
|
|
|
|
for my $h (keys %h) |
1991
|
|
|
|
|
|
|
{ |
1992
|
|
|
|
|
|
|
my $n = 1000000; |
1993
|
|
|
|
|
|
|
my $p = 1000; |
1994
|
|
|
|
|
|
|
while ($h =~ m{(ANY|=)}g) |
1995
|
|
|
|
|
|
|
{ |
1996
|
|
|
|
|
|
|
if ($1 eq '=') |
1997
|
|
|
|
|
|
|
{ |
1998
|
|
|
|
|
|
|
$n = 1000; |
1999
|
|
|
|
|
|
|
} |
2000
|
|
|
|
|
|
|
else |
2001
|
|
|
|
|
|
|
{ |
2002
|
|
|
|
|
|
|
$p += $n; |
2003
|
|
|
|
|
|
|
} |
2004
|
|
|
|
|
|
|
} |
2005
|
|
|
|
|
|
|
# longer length has prio over shorter length. Hmm, this is ineffective, isnt it? |
2006
|
|
|
|
|
|
|
$h{$h}{p} = $p - length($h); |
2007
|
|
|
|
|
|
|
} |
2008
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
# Now push them, sorted by prio. |
2010
|
|
|
|
|
|
|
# Smaller prio_number is better. Later addition is prefered. |
2011
|
|
|
|
|
|
|
for my $h (sort { $h{$b}{p} <=> $h{$a}{p} } keys %h) |
2012
|
|
|
|
|
|
|
{ |
2013
|
|
|
|
|
|
|
# do not ruin the original name by resolving symlinks and such. |
2014
|
|
|
|
|
|
|
$self->mime_helper($h, undef, [Cwd::fast_abs_path($h{$h}{a})]); |
2015
|
|
|
|
|
|
|
} |
2016
|
|
|
|
|
|
|
} |
2017
|
|
|
|
|
|
|
return $self->{mime_helper}; |
2018
|
|
|
|
|
|
|
} |
2019
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
=head2 find_mime_helper |
2021
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
$u->find_mime_helper($mimetype) |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
Returns a MIME helper suitable for unpacking the given $mimetype. |
2025
|
|
|
|
|
|
|
If called in list context, a second return value indicates which |
2026
|
|
|
|
|
|
|
mime helpers would be suitable, but could not be found in the system. |
2027
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
=cut |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
sub find_mime_helper |
2031
|
|
|
|
|
|
|
{ |
2032
|
|
|
|
|
|
|
my ($self, $mimetype) = @_; |
2033
|
|
|
|
|
|
|
$mimetype = $mimetype->[0] if ref $mimetype eq 'ARRAY'; |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
return $self->{mime_orcish}{$mimetype} |
2036
|
|
|
|
|
|
|
if defined $self->{mime_orcish}{$mimetype} and |
2037
|
|
|
|
|
|
|
-f $self->{mime_orcish}{$mimetype}{argvv}[0][0]; |
2038
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
my $r = undef; |
2040
|
|
|
|
|
|
|
for my $h (@{$self->{mime_helper}}) |
2041
|
|
|
|
|
|
|
{ |
2042
|
|
|
|
|
|
|
if ($mimetype =~ m{$h->{pat}}) |
2043
|
|
|
|
|
|
|
{ |
2044
|
|
|
|
|
|
|
$self->_finalize_argvv($h); |
2045
|
|
|
|
|
|
|
unless (-f $h->{argvv}[0][0]) |
2046
|
|
|
|
|
|
|
{ |
2047
|
|
|
|
|
|
|
push @{$r->{missing}}, $h->{argvv}[0][0]; |
2048
|
|
|
|
|
|
|
next; |
2049
|
|
|
|
|
|
|
} |
2050
|
|
|
|
|
|
|
$self->{mime_orcish}{$mimetype} = $h; |
2051
|
|
|
|
|
|
|
return wantarray ? ($h, $r) : $h; |
2052
|
|
|
|
|
|
|
} |
2053
|
|
|
|
|
|
|
} |
2054
|
|
|
|
|
|
|
return wantarray ? (undef, $r) : undef; |
2055
|
|
|
|
|
|
|
} |
2056
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
# |
2058
|
|
|
|
|
|
|
# _finalize_argvv() executes a sub in 3 places: |
2059
|
|
|
|
|
|
|
# The argvv ptr itself can be a sub: |
2060
|
|
|
|
|
|
|
# this should return an array, where the |
2061
|
|
|
|
|
|
|
# first element is the command (as an array-ref) and subsequent elements are |
2062
|
|
|
|
|
|
|
# redirects. See run() for details. |
2063
|
|
|
|
|
|
|
# One of the argvv elements is a sub: |
2064
|
|
|
|
|
|
|
# this should return the command as an array-ref, if it is argvv[0], |
2065
|
|
|
|
|
|
|
# or return one or more redirects. |
2066
|
|
|
|
|
|
|
# One element of argvv[0] is a sub: |
2067
|
|
|
|
|
|
|
# this should return one or more command names, options, arguments, |
2068
|
|
|
|
|
|
|
# |
2069
|
|
|
|
|
|
|
# Tricky part of the implementation is the in-place array expansion while iterating. |
2070
|
|
|
|
|
|
|
# |
2071
|
|
|
|
|
|
|
sub _finalize_argvv |
2072
|
|
|
|
|
|
|
{ |
2073
|
|
|
|
|
|
|
my ($self, $h) = @_; |
2074
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
my $update_fmt_p = 0; |
2076
|
|
|
|
|
|
|
if (ref $h->{argvv} eq 'CODE') |
2077
|
|
|
|
|
|
|
{ |
2078
|
|
|
|
|
|
|
$h->{argvv} = [ $h->{argvv}->($self) ]; |
2079
|
|
|
|
|
|
|
$update_fmt_p++; |
2080
|
|
|
|
|
|
|
} |
2081
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
# If any part of LIST is an array, "foreach" will get very confused if you add or |
2083
|
|
|
|
|
|
|
# remove elements within the loop body, for example with "splice". So don't do |
2084
|
|
|
|
|
|
|
# that. |
2085
|
|
|
|
|
|
|
# Sigh, we want do do exactly that, a sub may replace itself by any number of elements. Use booring C-style loop. |
2086
|
|
|
|
|
|
|
my $last = $#{$h->{argvv}}; |
2087
|
|
|
|
|
|
|
for (my $idx = 0; $idx <= $last; $idx++) |
2088
|
|
|
|
|
|
|
{ |
2089
|
|
|
|
|
|
|
if (ref $h->{argvv}[$idx] eq 'CODE') |
2090
|
|
|
|
|
|
|
{ |
2091
|
|
|
|
|
|
|
my @r = $h->{argvv}[$idx]($self); |
2092
|
|
|
|
|
|
|
splice @{$h->{argvv}}, $idx, 1, @r; |
2093
|
|
|
|
|
|
|
$idx += $#r; |
2094
|
|
|
|
|
|
|
$last +=$#r; |
2095
|
|
|
|
|
|
|
$update_fmt_p++; |
2096
|
|
|
|
|
|
|
} |
2097
|
|
|
|
|
|
|
} |
2098
|
|
|
|
|
|
|
$last = $#{$h->{argvv}}; |
2099
|
|
|
|
|
|
|
for (my $idx = 0; $idx <= $last; $idx++) |
2100
|
|
|
|
|
|
|
{ |
2101
|
|
|
|
|
|
|
next unless ref $h->{argvv}[$idx] eq 'ARRAY'; |
2102
|
|
|
|
|
|
|
my $last1 = $#{$h->{argvv}[$idx]}; |
2103
|
|
|
|
|
|
|
for (my $idx1 = 0; $idx1 <= $last1; $idx1++) |
2104
|
|
|
|
|
|
|
{ |
2105
|
|
|
|
|
|
|
if (ref $h->{argvv}[$idx][$idx1] eq 'CODE') |
2106
|
|
|
|
|
|
|
{ |
2107
|
|
|
|
|
|
|
my @r = $h->{argvv}[$idx][$idx1]->($self); |
2108
|
|
|
|
|
|
|
splice @{$h->{argvv}[$idx]}, $idx1, 1, @r; |
2109
|
|
|
|
|
|
|
$idx1 += $#r; |
2110
|
|
|
|
|
|
|
$last1 +=$#r; |
2111
|
|
|
|
|
|
|
$update_fmt_p++; |
2112
|
|
|
|
|
|
|
} |
2113
|
|
|
|
|
|
|
} |
2114
|
|
|
|
|
|
|
} |
2115
|
|
|
|
|
|
|
|
2116
|
|
|
|
|
|
|
$h->{fmt_p} = fmt_run_shellcmd($h) if $update_fmt_p; |
2117
|
|
|
|
|
|
|
} |
2118
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
=head2 minfree |
2120
|
|
|
|
|
|
|
|
2121
|
|
|
|
|
|
|
$u->minfree(factor => 10, bytes => '100M', percent => '3%', warning => sub { .. }) |
2122
|
|
|
|
|
|
|
|
2123
|
|
|
|
|
|
|
THESE TESTS ARE TO BE IMPLEMENTED. |
2124
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
Guard the filesystem (destdir) against becoming full during C. |
2126
|
|
|
|
|
|
|
Before unpacking each source archive, the free space is measured and compared against three conditions: |
2127
|
|
|
|
|
|
|
|
2128
|
|
|
|
|
|
|
=over |
2129
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
=item * |
2131
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
The archive size multiplied with the given factor must fit into the filesystem. |
2133
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
=item * |
2135
|
|
|
|
|
|
|
|
2136
|
|
|
|
|
|
|
The given number of bytes (in optional K, M, G, or T units) must be free. |
2137
|
|
|
|
|
|
|
|
2138
|
|
|
|
|
|
|
=item * |
2139
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
The filesystem must have at least the given free percentage. The '%' character is optional. |
2141
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
=back |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
The warning method is called if any of the above conditions fail. Its signature is: |
2145
|
|
|
|
|
|
|
&warning->($pathname, $full_percentage, $free_bytes, $free_inodes); |
2146
|
|
|
|
|
|
|
It is expected to print an appropriate warning message, and delay a few seconds. |
2147
|
|
|
|
|
|
|
It should return 0 to cause a retry. It should return nonzero to continue unpacking. |
2148
|
|
|
|
|
|
|
The default warning method prints a message to STDERR, waits 30 seconds, and returns 0. |
2149
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
The filesystem may still become full and unpacking may fail, if e.g. factor was chosen lower than |
2151
|
|
|
|
|
|
|
the average compression ratio of the archives. |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
=cut |
2154
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
sub _bytes_unit |
2156
|
|
|
|
|
|
|
{ |
2157
|
|
|
|
|
|
|
my ($text) = @_; |
2158
|
|
|
|
|
|
|
return int($1*1024) if $text =~ m{([\d\.]+)k}i; |
2159
|
|
|
|
|
|
|
return int($1*1024*1024) if $text =~ m{([\d\.]+)m}i; |
2160
|
|
|
|
|
|
|
return int($1*1024*1024*1024) if $text =~ m{([\d\.]+)g}i; |
2161
|
|
|
|
|
|
|
return int($1*1024*1024*1024*1024) if $text =~ m{([\d\.]+)t}i; |
2162
|
|
|
|
|
|
|
return int($text); |
2163
|
|
|
|
|
|
|
} |
2164
|
|
|
|
|
|
|
|
2165
|
|
|
|
|
|
|
sub _unit_bytes |
2166
|
|
|
|
|
|
|
{ |
2167
|
|
|
|
|
|
|
my ($number, $dec_places) = @_; |
2168
|
|
|
|
|
|
|
$dec_places = 2 unless defined $dec_places; |
2169
|
|
|
|
|
|
|
my $div = 1; |
2170
|
|
|
|
|
|
|
my $unit = ''; |
2171
|
|
|
|
|
|
|
my $neg = ''; |
2172
|
|
|
|
|
|
|
if ($number < 0) |
2173
|
|
|
|
|
|
|
{ |
2174
|
|
|
|
|
|
|
$neg = '-'; $number = -$number; |
2175
|
|
|
|
|
|
|
} |
2176
|
|
|
|
|
|
|
if ($number > $div * 1024) |
2177
|
|
|
|
|
|
|
{ |
2178
|
|
|
|
|
|
|
$div *= 1024; $unit = 'k'; |
2179
|
|
|
|
|
|
|
if ($number > $div * 1024) |
2180
|
|
|
|
|
|
|
{ |
2181
|
|
|
|
|
|
|
$div *= 1024; $unit = 'm'; |
2182
|
|
|
|
|
|
|
if ($number > $div * 1024) |
2183
|
|
|
|
|
|
|
{ |
2184
|
|
|
|
|
|
|
$div *= 1024; $unit = 'g'; |
2185
|
|
|
|
|
|
|
if ($number > $div * 1024) |
2186
|
|
|
|
|
|
|
{ |
2187
|
|
|
|
|
|
|
$div *= 1024; $unit = 't'; |
2188
|
|
|
|
|
|
|
} |
2189
|
|
|
|
|
|
|
} |
2190
|
|
|
|
|
|
|
} |
2191
|
|
|
|
|
|
|
} |
2192
|
|
|
|
|
|
|
return sprintf "%s%.*f%s", $neg, $dec_places, ($number / $div), $unit; |
2193
|
|
|
|
|
|
|
} |
2194
|
|
|
|
|
|
|
|
2195
|
|
|
|
|
|
|
# see fs.pm/check_fs_health() |
2196
|
|
|
|
|
|
|
|
2197
|
|
|
|
|
|
|
sub minfree |
2198
|
|
|
|
|
|
|
{ |
2199
|
|
|
|
|
|
|
my $self = shift; |
2200
|
|
|
|
|
|
|
my %opt = @_; |
2201
|
|
|
|
|
|
|
|
2202
|
|
|
|
|
|
|
for my $i (qw(factor bytes percent)) |
2203
|
|
|
|
|
|
|
{ |
2204
|
|
|
|
|
|
|
$self->{minfree}{$i} = $opt{$i} if defined $opt{$i}; |
2205
|
|
|
|
|
|
|
$self->{minfree}{$i} ||= 0; |
2206
|
|
|
|
|
|
|
} |
2207
|
|
|
|
|
|
|
$self->{minfree}{bytes} = _bytes_unit($self->{minfree}{bytes}); |
2208
|
|
|
|
|
|
|
$self->{minfree}{percent} =~ s{%$}{}; |
2209
|
|
|
|
|
|
|
$self->{fs_warn} = $opt{warning} if ref $opt{warning}; |
2210
|
|
|
|
|
|
|
} |
2211
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
=head2 mime |
2213
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
$u->mime($filename) |
2215
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
$u->mime(file => $filename) |
2217
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
$u->mime(buf => "#!/bin ...", file => "what-was-read") |
2219
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
$u->mime(fd => \*STDIN, file => "what-was-opened") |
2221
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
Determines the MIME type (and optionally additional information) of a file. |
2223
|
|
|
|
|
|
|
The file can be specified by filename, by a provided buffer or an opened file descriptor. |
2224
|
|
|
|
|
|
|
For the latter two cases, specifying a filename is optional, and used only for diagnostics. |
2225
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
C uses libmagic by Christos Zoulas exposed via File::LibMagic and also uses |
2227
|
|
|
|
|
|
|
the shared-mime-info database from freedesktop.org exposed via |
2228
|
|
|
|
|
|
|
File::MimeInfo::Magic, if available. Either one is sufficient, but having both |
2229
|
|
|
|
|
|
|
is better. LibMagic sometimes says 'text/x-pascal', although we have a F<.desktop> |
2230
|
|
|
|
|
|
|
file, or says 'text/plain', but has contradicting details in its description. |
2231
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
C is consulted where the libmagic output is dubious. E.g. when |
2233
|
|
|
|
|
|
|
the desciption says something interesting like 'Debian binary package (format 2.0)' but the |
2234
|
|
|
|
|
|
|
mimetype says 'application/octet-stream'. The combination of both libraries gives us |
2235
|
|
|
|
|
|
|
excellent reliability in the critical field of MIME type recognition. |
2236
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
This implementation also features multi-level MIME type recognition for efficient unpacking. |
2238
|
|
|
|
|
|
|
When e.g. unpacking a large bzipped tar archive, this saves us from creating a |
2239
|
|
|
|
|
|
|
huge temporary tar-file which C would extract in a second step. The multi-level recognition |
2240
|
|
|
|
|
|
|
returns 'application/x-tar+bzip2' in this case, and allows for a MIME helper |
2241
|
|
|
|
|
|
|
to e.g. pipe the bzip2 contents into tar (which is exactly what 'tar jxvf' |
2242
|
|
|
|
|
|
|
does, making a very simple and efficient MIME helper). |
2243
|
|
|
|
|
|
|
|
2244
|
|
|
|
|
|
|
C returns a 3 or 4 element arrayref with mimetype, charset, description, diff; |
2245
|
|
|
|
|
|
|
where diff is only present when the libfile and shared-mime-info methods disagree. |
2246
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
In case of 'text/plain', an additional rule based on file name suffix is used to allow |
2248
|
|
|
|
|
|
|
recognition of well known plain text pack formats. |
2249
|
|
|
|
|
|
|
We return 'text/x-suffix-XX+plain', where XX is one of the recognized suffixes |
2250
|
|
|
|
|
|
|
(in all lower case and without the dot). E.g. a plain mmencoded file has no |
2251
|
|
|
|
|
|
|
header and looks like 'plain/text' to all the known magic libraries. We |
2252
|
|
|
|
|
|
|
recognize the suffixes .mm, .b64, and .base64 for this (case insignificant). |
2253
|
|
|
|
|
|
|
A similar rule exitst for 'application/octect-stream'. It may trigger e.g. for |
2254
|
|
|
|
|
|
|
LZMA compressed files which fail to provide a magic number. |
2255
|
|
|
|
|
|
|
|
2256
|
|
|
|
|
|
|
Examples: |
2257
|
|
|
|
|
|
|
|
2258
|
|
|
|
|
|
|
[ 'text/x-perl', 'us-ascii', 'a /usr/bin/perl -w script text'] |
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
[ 'text/x-mpegurl', 'utf-8', 'M3U playlist text', |
2261
|
|
|
|
|
|
|
[ 'text/plain', 'application/x-mpegurl']] |
2262
|
|
|
|
|
|
|
|
2263
|
|
|
|
|
|
|
[ 'application/x-tar+bzip2, 'binary', |
2264
|
|
|
|
|
|
|
"bzip2 compressed data, block size = 900k\nPOSIX tar archive (GNU)", ...] |
2265
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
=cut |
2267
|
|
|
|
|
|
|
|
2268
|
|
|
|
|
|
|
sub mime |
2269
|
|
|
|
|
|
|
{ |
2270
|
|
|
|
|
|
|
my ($self, @in) = @_; |
2271
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
my %in; |
2273
|
|
|
|
|
|
|
%in = %{$in[0]} if !$#in and ref $in[0] eq 'HASH'; |
2274
|
|
|
|
|
|
|
unshift @in, 'file' if !$#in and !ref $in[0]; |
2275
|
|
|
|
|
|
|
%in = @in if $#in > 0; |
2276
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
my $flm = $self->{flm} ||= File::LibMagic->new(); |
2278
|
|
|
|
|
|
|
|
2279
|
|
|
|
|
|
|
unless (defined $in{buf}) |
2280
|
|
|
|
|
|
|
{ |
2281
|
|
|
|
|
|
|
my $fd = $in{fd}; |
2282
|
|
|
|
|
|
|
unless ($fd) |
2283
|
|
|
|
|
|
|
{ |
2284
|
|
|
|
|
|
|
open $fd, "<", $in{file} or |
2285
|
|
|
|
|
|
|
return [ 'x-system/x-error', undef, "cannot open '$in{file}': $!" ]; |
2286
|
|
|
|
|
|
|
} |
2287
|
|
|
|
|
|
|
|
2288
|
|
|
|
|
|
|
my $f = $in{file}||'-'; |
2289
|
|
|
|
|
|
|
$in{buf} = ''; |
2290
|
|
|
|
|
|
|
my $pos = tell $fd; |
2291
|
|
|
|
|
|
|
##bzip2 below needs a long buffer, or it returns 0. |
2292
|
|
|
|
|
|
|
my $len = read $fd, $in{buf}, $UNCOMP_BUFSZ; |
2293
|
|
|
|
|
|
|
return [ 'x-system/x-error', undef, "read '$f' failed: $!" ] unless defined $len; |
2294
|
|
|
|
|
|
|
return [ 'x-system/x-error', undef, "read '$f' failed: $len: $!" ] if $len < 0; |
2295
|
|
|
|
|
|
|
return [ 'text/x-empty', undef, 'empty' ] if $len == 0; |
2296
|
|
|
|
|
|
|
seek $fd, $pos, 0; |
2297
|
|
|
|
|
|
|
|
2298
|
|
|
|
|
|
|
close $fd unless $in{fd}; |
2299
|
|
|
|
|
|
|
} |
2300
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
|
2302
|
|
|
|
|
|
|
## flm can say 'cannot open \'IP\' (No such file or directory)' |
2303
|
|
|
|
|
|
|
## flm can say 'CDF V2 Document, corrupt: Can\'t read SAT' (application/vnd.ms-excel) |
2304
|
|
|
|
|
|
|
my $mime1 = $flm->checktype_contents($in{buf}); |
2305
|
|
|
|
|
|
|
if ($mime1 =~ m{, corrupt: } or $mime1 =~ m{^application/octet-stream\b}) |
2306
|
|
|
|
|
|
|
{ |
2307
|
|
|
|
|
|
|
# application/x-iso9660-image is reported as application/octet-stream if the buffer is short. |
2308
|
|
|
|
|
|
|
# iso images usually start with 0x8000 bytes of all '\0'. |
2309
|
|
|
|
|
|
|
print STDERR "mime: readahead buffer $UNCOMP_BUFSZ too short\n" if $self->{verbose} > 2; |
2310
|
|
|
|
|
|
|
if (defined $in{file} and -f $in{file}) |
2311
|
|
|
|
|
|
|
{ |
2312
|
|
|
|
|
|
|
print STDERR "mime: reopening $in{file}\n" if $self->{verbose} > 1; |
2313
|
|
|
|
|
|
|
$mime1 = $flm->checktype_filename($in{file}); |
2314
|
|
|
|
|
|
|
} |
2315
|
|
|
|
|
|
|
} |
2316
|
|
|
|
|
|
|
print STDERR "flm->checktype_contents: $mime1\n" if $self->{verbose} > 1; |
2317
|
|
|
|
|
|
|
$in{file} = '-' unless defined $in{file}; |
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
return [ 'x-system/x-error', undef, $mime1 ] if $mime1 =~ m{^cannot open}; |
2320
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
# in SLES11 we get 'text/plain charset=utf-8' without semicolon. |
2322
|
|
|
|
|
|
|
my $enc; ($mime1, $enc) = ($1,$2) if $mime1 =~ m{^(.*?);\s*(.*)$} or |
2323
|
|
|
|
|
|
|
$mime1 =~ m{^(.*?)\s+(.*)$}; |
2324
|
|
|
|
|
|
|
$enc =~ s{^charset=}{} if defined $enc; |
2325
|
|
|
|
|
|
|
my @r = ($mime1, $enc, $flm->describe_contents($in{buf}) ); |
2326
|
|
|
|
|
|
|
my $mime2; |
2327
|
|
|
|
|
|
|
|
2328
|
|
|
|
|
|
|
|
2329
|
|
|
|
|
|
|
if ($mime1 =~ m{^application/xml}) |
2330
|
|
|
|
|
|
|
{ |
2331
|
|
|
|
|
|
|
# This is horrible from a greedy text cruncher perspective: |
2332
|
|
|
|
|
|
|
# although xml is a plain text syntax, it is reported by flm to be |
2333
|
|
|
|
|
|
|
# outside text/* |
2334
|
|
|
|
|
|
|
$r[0] = "text/x-application-xml"; |
2335
|
|
|
|
|
|
|
} |
2336
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
if ($mime1 =~ m{^text/x-(?:pascal|fortran)$}) |
2338
|
|
|
|
|
|
|
{ |
2339
|
|
|
|
|
|
|
# xterm.desktop |
2340
|
|
|
|
|
|
|
# ['text/x-pascal; charset=utf-8','UTF-8 Unicode Pascal program text'] |
2341
|
|
|
|
|
|
|
# 'application/x-desktop' |
2342
|
|
|
|
|
|
|
# |
2343
|
|
|
|
|
|
|
# Times-Roman.afm |
2344
|
|
|
|
|
|
|
# ['text/x-fortran; charset=us-ascii','ASCII font metrics'] |
2345
|
|
|
|
|
|
|
# 'application/x-font-afm' |
2346
|
|
|
|
|
|
|
# |
2347
|
|
|
|
|
|
|
# debian/rules |
2348
|
|
|
|
|
|
|
# ['text/x-pascal; charset=us-ascii','a /usr/bin/make -f script text'] |
2349
|
|
|
|
|
|
|
# 'text/x-makefile' |
2350
|
|
|
|
|
|
|
if ($mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); }) |
2351
|
|
|
|
|
|
|
{ |
2352
|
|
|
|
|
|
|
$r[0] = "text/$1" if $mime2 =~ m{/(\S+)}; |
2353
|
|
|
|
|
|
|
} |
2354
|
|
|
|
|
|
|
} |
2355
|
|
|
|
|
|
|
elsif (($mime1 eq 'text/plain' and $r[2] =~ m{(?:PostScript|font)}i) |
2356
|
|
|
|
|
|
|
or ($mime1 eq 'application/postscript')) |
2357
|
|
|
|
|
|
|
{ |
2358
|
|
|
|
|
|
|
# 11.3 says: |
2359
|
|
|
|
|
|
|
# IPA.pfa |
2360
|
|
|
|
|
|
|
# ['text/plain; charset=us-ascii','PostScript Type 1 font text (OmegaSerifIPA 001.000)'] |
2361
|
|
|
|
|
|
|
# sles11 says: |
2362
|
|
|
|
|
|
|
# IPA.pfa |
2363
|
|
|
|
|
|
|
# ['application/postscript', undef, 'PostScript document text'] |
2364
|
|
|
|
|
|
|
# |
2365
|
|
|
|
|
|
|
# mime2 = 'application/x-font-type1' |
2366
|
|
|
|
|
|
|
# $mime2 = eval { File::MimeInfo::Magic::mimetype($in{file}); }; |
2367
|
|
|
|
|
|
|
$mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); }; |
2368
|
|
|
|
|
|
|
if ($mime2 and $mime2 =~ m{^(.*)/(.*)$}) |
2369
|
|
|
|
|
|
|
{ |
2370
|
|
|
|
|
|
|
my ($a,$b) = ($1,$2); |
2371
|
|
|
|
|
|
|
$a = 'text' if $r[2] =~ m{\btext\b}i; |
2372
|
|
|
|
|
|
|
$r[0] = "$a/$b"; |
2373
|
|
|
|
|
|
|
} |
2374
|
|
|
|
|
|
|
} |
2375
|
|
|
|
|
|
|
|
2376
|
|
|
|
|
|
|
if ($r[0] eq 'text/plain' or |
2377
|
|
|
|
|
|
|
$r[0] eq 'application/octet-stream') |
2378
|
|
|
|
|
|
|
{ |
2379
|
|
|
|
|
|
|
# hmm, are we sure? No, if the description contradicts: |
2380
|
|
|
|
|
|
|
# |
2381
|
|
|
|
|
|
|
$r[0] = "text/x-uuencode" if $r[2] eq 'uuencoded or xxencoded text'; |
2382
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
# bin/floor |
2384
|
|
|
|
|
|
|
# ['text/x-pascal; charset=us-ascii','a /usr/bin/tclsh script text'] |
2385
|
|
|
|
|
|
|
# 'text/plain' |
2386
|
|
|
|
|
|
|
$r[0] = "text/x-$2" if $r[2] =~ m{^a (\S*/)?([^/\s]+) .*script text$}i; |
2387
|
|
|
|
|
|
|
if ($r[2] =~ m{\bimage\b}) |
2388
|
|
|
|
|
|
|
{ |
2389
|
|
|
|
|
|
|
# ./opengl/test.tga |
2390
|
|
|
|
|
|
|
# ['application/octet-stream; charset=binary','Targa image data - RGB 128 x 128'] |
2391
|
|
|
|
|
|
|
# 'image/x-tga' |
2392
|
|
|
|
|
|
|
$mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); }; |
2393
|
|
|
|
|
|
|
$r[0] = $mime2 if $mime2 and $mime2 =~ m{^image/}; |
2394
|
|
|
|
|
|
|
} |
2395
|
|
|
|
|
|
|
} |
2396
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
if ($r[0] eq 'application/octet-stream') |
2398
|
|
|
|
|
|
|
{ |
2399
|
|
|
|
|
|
|
# it can't get much worse, can it? |
2400
|
|
|
|
|
|
|
## |
2401
|
|
|
|
|
|
|
# dotdot.tar.lzma |
2402
|
|
|
|
|
|
|
# {'File::MimeInfo::Magic' => 'application/x-lzma-compressed-tar'} -- no, that was suffix based! |
2403
|
|
|
|
|
|
|
# {'File::LibMagic' => ['application/octet-stream; charset=binary','data']} |
2404
|
|
|
|
|
|
|
$mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); }; |
2405
|
|
|
|
|
|
|
# |
2406
|
|
|
|
|
|
|
# File::LibMagic misreads monotone-0.99.1/monotone.info-1 as app/bin |
2407
|
|
|
|
|
|
|
# File::MimeInfo::Magic::magic() returns undef for that one. |
2408
|
|
|
|
|
|
|
# But perl itself does not agree: |
2409
|
|
|
|
|
|
|
$mime2 ||= 'application/x-text-mixed' if -T $in{file}; |
2410
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
$r[0] = $mime2 if $mime2; |
2412
|
|
|
|
|
|
|
} |
2413
|
|
|
|
|
|
|
|
2414
|
|
|
|
|
|
|
if ($r[0] eq 'application/octet-stream') |
2415
|
|
|
|
|
|
|
{ |
2416
|
|
|
|
|
|
|
if ($r[2] =~ m{\bcpio\s+archive\b}i) |
2417
|
|
|
|
|
|
|
{ |
2418
|
|
|
|
|
|
|
# Mac pax files are gzipped cpio: 'ASCII cpio archive (pre-SVR4 or odc)' |
2419
|
|
|
|
|
|
|
$r[0] = 'application/x-cpio'; |
2420
|
|
|
|
|
|
|
} |
2421
|
|
|
|
|
|
|
} |
2422
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
my $uncomp_buf = ''; |
2424
|
|
|
|
|
|
|
|
2425
|
|
|
|
|
|
|
if ($r[0] eq 'application/octet-stream') |
2426
|
|
|
|
|
|
|
{ |
2427
|
|
|
|
|
|
|
## lzma is an extremly bad format. It has no magic. |
2428
|
|
|
|
|
|
|
# |
2429
|
|
|
|
|
|
|
# WARNING from Compress::unLZMA |
2430
|
|
|
|
|
|
|
# "This version only implements in-memory decompression (patches are welcomed). |
2431
|
|
|
|
|
|
|
# There is no way to recognize a valid LZMA encoded file with the SDK. |
2432
|
|
|
|
|
|
|
# So, in some cases, you can crash your script if you try to uncompress a |
2433
|
|
|
|
|
|
|
# non valid LZMA encoded file." |
2434
|
|
|
|
|
|
|
# Does this also apply to us? |
2435
|
|
|
|
|
|
|
# |
2436
|
|
|
|
|
|
|
# -- hmm, maybe we better leave it at calling lzcat. |
2437
|
|
|
|
|
|
|
# Trade in "always a bit expensive" versus "sometimes crashing"... |
2438
|
|
|
|
|
|
|
# |
2439
|
|
|
|
|
|
|
# my $lztest = `sh -c "/usr/bin/lzcat < $in{file} | head -c 1k > /dev/null" 2>&1`; |
2440
|
|
|
|
|
|
|
# # -> /usr/bin/lzcat: (stdin): File format not recognized |
2441
|
|
|
|
|
|
|
# if ($lztest !~ m{(not recognized|error)}i) |
2442
|
|
|
|
|
|
|
# { |
2443
|
|
|
|
|
|
|
# $r[0] = 'application/x-lzma'; |
2444
|
|
|
|
|
|
|
# } |
2445
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
if (10 < length $in{buf}) |
2447
|
|
|
|
|
|
|
{ |
2448
|
|
|
|
|
|
|
no strict 'subs'; # Compress::Raw::Lzma::AloneDecoder, LZMA_OK, LZMA_STREAM_END |
2449
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
my $saved_input = $in{buf}; |
2451
|
|
|
|
|
|
|
my ($lz, $stat) = eval { Compress::Raw::Lzma::AloneDecoder->new(-Bufsize => $UNCOMP_BUFSZ, -LimitOutput => 1); }; |
2452
|
|
|
|
|
|
|
if ($lz) |
2453
|
|
|
|
|
|
|
{ |
2454
|
|
|
|
|
|
|
$stat = $lz->code($in{buf}, $uncomp_buf); |
2455
|
|
|
|
|
|
|
if (($stat == LZMA_OK or $stat == LZMA_STREAM_END) |
2456
|
|
|
|
|
|
|
and |
2457
|
|
|
|
|
|
|
(length($uncomp_buf) > length($saved_input))) |
2458
|
|
|
|
|
|
|
{ |
2459
|
|
|
|
|
|
|
$r[0] = "application/x-lzma"; |
2460
|
|
|
|
|
|
|
$r[2] = "LZMA compressed data, no magic"; |
2461
|
|
|
|
|
|
|
} |
2462
|
|
|
|
|
|
|
# This decompressor consumes the input. |
2463
|
|
|
|
|
|
|
$in{buf} = $saved_input; |
2464
|
|
|
|
|
|
|
} |
2465
|
|
|
|
|
|
|
} |
2466
|
|
|
|
|
|
|
} |
2467
|
|
|
|
|
|
|
# printf STDERR "in-buf = %d bytes\n", length($in{buf}); |
2468
|
|
|
|
|
|
|
|
2469
|
|
|
|
|
|
|
if ($r[0] =~ m{^application/(?:x-)?gzip$}) |
2470
|
|
|
|
|
|
|
{ |
2471
|
|
|
|
|
|
|
my ($gz, $stat) = eval { new Compress::Raw::Zlib::Inflate( -WindowBits => WANT_GZIP() ); }; |
2472
|
|
|
|
|
|
|
if ($gz) |
2473
|
|
|
|
|
|
|
{ |
2474
|
|
|
|
|
|
|
my $stat = $gz->inflate($in{buf}, $uncomp_buf); |
2475
|
|
|
|
|
|
|
# printf STDERR "stat=%s, uncomp=%d bytes \n", $stat, length($uncomp_buf); |
2476
|
|
|
|
|
|
|
} |
2477
|
|
|
|
|
|
|
} |
2478
|
|
|
|
|
|
|
|
2479
|
|
|
|
|
|
|
## bzip2 is not nice for stacked mime checking. |
2480
|
|
|
|
|
|
|
## It needs a huge input buffer that we do not normally provide. |
2481
|
|
|
|
|
|
|
## We only support it at the top of a stack, where we acquire enough additional |
2482
|
|
|
|
|
|
|
## input until bzip2 is happy. |
2483
|
|
|
|
|
|
|
if ($r[0] =~ m{^application/(?:x-)?bzip2$} && !$in{recursion}) |
2484
|
|
|
|
|
|
|
{ |
2485
|
|
|
|
|
|
|
my $limitOutput = 1; |
2486
|
|
|
|
|
|
|
my ($bz, $stat) = eval { new Compress::Raw::Bunzip2 0, 0, 0, 0, $limitOutput; }; |
2487
|
|
|
|
|
|
|
if ($bz) |
2488
|
|
|
|
|
|
|
{ |
2489
|
|
|
|
|
|
|
## this only works if this is a first level call. |
2490
|
|
|
|
|
|
|
open my $IN, "<", $in{file} unless $in{file} eq '-'; |
2491
|
|
|
|
|
|
|
seek $IN, length($in{buf}), 0; |
2492
|
|
|
|
|
|
|
while (!length $uncomp_buf) |
2493
|
|
|
|
|
|
|
{ |
2494
|
|
|
|
|
|
|
my $stat = $bz->bzinflate($in{buf}, $uncomp_buf); |
2495
|
|
|
|
|
|
|
# $bz->bzflush($uncomp_buf); # wishful thinking.... |
2496
|
|
|
|
|
|
|
last if length($in{buf}); # did not consume, strange. |
2497
|
|
|
|
|
|
|
last if length $stat; # something wrong, or file ends. |
2498
|
|
|
|
|
|
|
last unless read $IN, $in{buf}, 10*1024, length($in{buf}); # try to get more data |
2499
|
|
|
|
|
|
|
} |
2500
|
|
|
|
|
|
|
my $slurped = tell $IN; # likely to get ca. 800k yacc! |
2501
|
|
|
|
|
|
|
close $IN; |
2502
|
|
|
|
|
|
|
# use Data::Dumper; warn Dumper $stat, length($in{buf}), length($uncomp_buf), "slurped=$slurped"; |
2503
|
|
|
|
|
|
|
} |
2504
|
|
|
|
|
|
|
} |
2505
|
|
|
|
|
|
|
|
2506
|
|
|
|
|
|
|
## try to get at the second level mime type, for some well known linear compressors. |
2507
|
|
|
|
|
|
|
while (length $uncomp_buf && $r[0] =~ m{^application/(x-)?([+\w]+)$}) |
2508
|
|
|
|
|
|
|
{ |
2509
|
|
|
|
|
|
|
my $compname = $2; |
2510
|
|
|
|
|
|
|
my $next_uncomp_buf = ''; |
2511
|
|
|
|
|
|
|
|
2512
|
|
|
|
|
|
|
# use Data::Dumper; printf STDERR "calling mime with buf=%d bytes, compname=$compname\n", length($uncomp_buf); |
2513
|
|
|
|
|
|
|
|
2514
|
|
|
|
|
|
|
######### |
2515
|
|
|
|
|
|
|
## FIXME: adding +$compname to the filename prevents reopening in mime, if needed. |
2516
|
|
|
|
|
|
|
## Why did I do this in the first place? |
2517
|
|
|
|
|
|
|
# my $m2 = $self->mime(buf => $uncomp_buf, file => "$in{file}+$compname", uncomp => \$next_uncomp_buf, recursion => 1); |
2518
|
|
|
|
|
|
|
######### |
2519
|
|
|
|
|
|
|
|
2520
|
|
|
|
|
|
|
my $m2 = $self->mime(buf => $uncomp_buf, file => $in{file}, uncomp => \$next_uncomp_buf, recursion => 1); |
2521
|
|
|
|
|
|
|
my ($a,$xminus,$b) = ($m2->[0] =~ m{^(.*)/(x-)?(.*)$}); |
2522
|
|
|
|
|
|
|
if ($a eq 'application') |
2523
|
|
|
|
|
|
|
{ |
2524
|
|
|
|
|
|
|
$r[0] = "application/x-$b+$compname" |
2525
|
|
|
|
|
|
|
} |
2526
|
|
|
|
|
|
|
else |
2527
|
|
|
|
|
|
|
{ |
2528
|
|
|
|
|
|
|
$r[0] = "application/x-$a-$b+$compname" |
2529
|
|
|
|
|
|
|
} |
2530
|
|
|
|
|
|
|
$r[2] .= "\n" . $m2->[2]; |
2531
|
|
|
|
|
|
|
$uncomp_buf = $next_uncomp_buf; |
2532
|
|
|
|
|
|
|
# print Dumper "new: ", \@r, $m2, $compname, length($uncomp_buf); |
2533
|
|
|
|
|
|
|
} |
2534
|
|
|
|
|
|
|
|
2535
|
|
|
|
|
|
|
# use Data::Dumper; |
2536
|
|
|
|
|
|
|
# die Dumper \@r, "--------------------"; |
2537
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
if ($r[0] eq 'application/unknown+zip' and $r[2] =~ m{\btext\b}i) |
2539
|
|
|
|
|
|
|
{ |
2540
|
|
|
|
|
|
|
# empty.odt |
2541
|
|
|
|
|
|
|
# ['application/unknown+zip; charset=binary','Zip archive data, at least v2.0 to extract, mime type application/vnd OpenDocument Text'] |
2542
|
|
|
|
|
|
|
# application/vnd.oasis.opendocument.text |
2543
|
|
|
|
|
|
|
if ($mime2 ||= eval { open my $fd,'<',\$in{buf}; File::MimeInfo::Magic::magic($fd); }) |
2544
|
|
|
|
|
|
|
{ |
2545
|
|
|
|
|
|
|
$mime2 .= '+zip' unless $mime2 =~ m{\+zip}i; |
2546
|
|
|
|
|
|
|
$r[0] = $mime2 if $mime2 =~ m{^application/}; |
2547
|
|
|
|
|
|
|
} |
2548
|
|
|
|
|
|
|
} |
2549
|
|
|
|
|
|
|
$r[0] .= '+zip' if $r[0] =~ m{^application/vnd\.oasis\.opendocument\.text$}; |
2550
|
|
|
|
|
|
|
|
2551
|
|
|
|
|
|
|
if ($r[0] eq 'text/plain' and $in{file} =~ m{\.(mm|b64|base64)$}i) |
2552
|
|
|
|
|
|
|
{ |
2553
|
|
|
|
|
|
|
my $suf = lc $1; |
2554
|
|
|
|
|
|
|
$r[0] = "text/x-suffix-$suf+plain"; |
2555
|
|
|
|
|
|
|
} |
2556
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
if ($r[0] eq 'application/octet-stream' and $in{file} =~ m{\.(lzma|zx|lz)$}i) |
2558
|
|
|
|
|
|
|
{ |
2559
|
|
|
|
|
|
|
my $suf = lc $1; |
2560
|
|
|
|
|
|
|
$r[0] = "application/x-suffix-$suf+octet-stream"; |
2561
|
|
|
|
|
|
|
} |
2562
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
if ($r[0] =~ m{^application/x-(ms-dos-|)executable$}) |
2564
|
|
|
|
|
|
|
{ |
2565
|
|
|
|
|
|
|
if (-x '/usr/bin/upx') |
2566
|
|
|
|
|
|
|
{ |
2567
|
|
|
|
|
|
|
# upx refuses to read symlinks. Work around this. |
2568
|
|
|
|
|
|
|
my $in_file = $in{file}; |
2569
|
|
|
|
|
|
|
$in_file = readlink($in{file}) if -l $in{file}; |
2570
|
|
|
|
|
|
|
$r[0] .= '+upx' unless run(['/usr/bin/upx', '-q', '-q', '-t', $in_file]); |
2571
|
|
|
|
|
|
|
} |
2572
|
|
|
|
|
|
|
} |
2573
|
|
|
|
|
|
|
|
2574
|
|
|
|
|
|
|
${$in{uncomp}} = $uncomp_buf if ref $in{uncomp} eq 'SCALAR'; |
2575
|
|
|
|
|
|
|
$r[3] = [ $mime1, $mime2 ] if $mime1 ne $r[0] or ($mime2 and $mime2 ne $mime1); |
2576
|
|
|
|
|
|
|
|
2577
|
|
|
|
|
|
|
return \@r; |
2578
|
|
|
|
|
|
|
} |
2579
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
=head1 AUTHOR |
2581
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
Juergen Weigert, C<< >> |
2583
|
|
|
|
|
|
|
|
2584
|
|
|
|
|
|
|
=head1 BUGS |
2585
|
|
|
|
|
|
|
|
2586
|
|
|
|
|
|
|
The implementation of C is an ugly hack. We suffer from the existence of |
2587
|
|
|
|
|
|
|
multiple file magic databases, and multiple conflicting implementations. With |
2588
|
|
|
|
|
|
|
Perl we have at least 5 modules for this; here we use two. |
2589
|
|
|
|
|
|
|
|
2590
|
|
|
|
|
|
|
The builtin list of MIME helpers is incomplete. Please submit your handler code. |
2591
|
|
|
|
|
|
|
|
2592
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
2593
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
2594
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
2595
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
=head1 RELATED MODULES |
2598
|
|
|
|
|
|
|
|
2599
|
|
|
|
|
|
|
While designing File::Unpack, a range of other perl modules were examined. Many modules provide valuable service to File::Unpack and became dependencies or are recommended. |
2600
|
|
|
|
|
|
|
Others exposed drawbacks during closer examination and may find some of their |
2601
|
|
|
|
|
|
|
wheels re-invented here. |
2602
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
=head2 Used Modules |
2604
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
=over |
2606
|
|
|
|
|
|
|
|
2607
|
|
|
|
|
|
|
=item File::LibMagic |
2608
|
|
|
|
|
|
|
|
2609
|
|
|
|
|
|
|
This is the prefered mimetype engine. It disregards the suffix, recognizes more |
2610
|
|
|
|
|
|
|
types than any of the alternatives, and uses exactly the same engine as |
2611
|
|
|
|
|
|
|
/usr/bin/file in openSUSE systems. It also returns charset and description |
2612
|
|
|
|
|
|
|
information. We crossreference the description with the mimetype to detect |
2613
|
|
|
|
|
|
|
weaknesses, and consult File::MimeInfo::Magic and some own logic, for e.g. |
2614
|
|
|
|
|
|
|
detecting LZMA compression which fails to provide any recognizable magic. |
2615
|
|
|
|
|
|
|
Required if you use C; otherwise not a hard requirement. |
2616
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
=item File::MimeInfo::Magic |
2618
|
|
|
|
|
|
|
|
2619
|
|
|
|
|
|
|
Uses both magic information and file suffixes to determine the mimetype. Its |
2620
|
|
|
|
|
|
|
magic() function is used in a few cases, where File::LibMagic fails. E.g. as |
2621
|
|
|
|
|
|
|
of June 2010, libmagic does not recognize 'image/x-targa'. |
2622
|
|
|
|
|
|
|
File::MimeInfo::Magic may be slower, but it features the shared-mime-info |
2623
|
|
|
|
|
|
|
database from freedesktop.org . Recommended if you use C. |
2624
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
=item String::ShellQuote |
2626
|
|
|
|
|
|
|
|
2627
|
|
|
|
|
|
|
Used to call external MIME helpers. Required. |
2628
|
|
|
|
|
|
|
|
2629
|
|
|
|
|
|
|
=item BSD::Resource |
2630
|
|
|
|
|
|
|
|
2631
|
|
|
|
|
|
|
Used to reliably restrict the maximum file size. Recommended. |
2632
|
|
|
|
|
|
|
|
2633
|
|
|
|
|
|
|
=item File::Path |
2634
|
|
|
|
|
|
|
|
2635
|
|
|
|
|
|
|
mkpath(). Required. |
2636
|
|
|
|
|
|
|
|
2637
|
|
|
|
|
|
|
=item Cwd |
2638
|
|
|
|
|
|
|
|
2639
|
|
|
|
|
|
|
fast_abs_path(). Required. |
2640
|
|
|
|
|
|
|
|
2641
|
|
|
|
|
|
|
=item JSON |
2642
|
|
|
|
|
|
|
|
2643
|
|
|
|
|
|
|
Used for formatting the logfile. Required. |
2644
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
=back |
2646
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
=head2 Modules Not Used |
2648
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
=over |
2650
|
|
|
|
|
|
|
|
2651
|
|
|
|
|
|
|
=item Archive::Extract |
2652
|
|
|
|
|
|
|
|
2653
|
|
|
|
|
|
|
Archive::Extract tries first to determine what type of archive you are passing |
2654
|
|
|
|
|
|
|
it, by inspecting its suffix. 'Maybe this module should use something like |
2655
|
|
|
|
|
|
|
"File::Type" to determine the type, rather than blindly trust the suffix'. |
2656
|
|
|
|
|
|
|
[quoted from perldoc] |
2657
|
|
|
|
|
|
|
|
2658
|
|
|
|
|
|
|
Set $Archive::Extract::PREFER_BIN to 1, which will prefer the use of command |
2659
|
|
|
|
|
|
|
line programs and won't consume so much memory. Default: use "Archive::Tar". |
2660
|
|
|
|
|
|
|
|
2661
|
|
|
|
|
|
|
=item Archive::Zip |
2662
|
|
|
|
|
|
|
|
2663
|
|
|
|
|
|
|
If you are just going to be extracting zips (and/or other archives) you are |
2664
|
|
|
|
|
|
|
recommended to look at using Archive::Extract . [quoted from perldoc] |
2665
|
|
|
|
|
|
|
It is pure perl, so it's a lot slower then your '/usr/bin/zip'. |
2666
|
|
|
|
|
|
|
|
2667
|
|
|
|
|
|
|
=item Archive::Tar |
2668
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
It is pure Perl, so it's a lot slower then your "/bin/tar". |
2670
|
|
|
|
|
|
|
It is heavy on memory, all will be read into memory. [quoted from perldoc] |
2671
|
|
|
|
|
|
|
|
2672
|
|
|
|
|
|
|
=item File::MMagic, File::MMagic::XS, File::Type |
2673
|
|
|
|
|
|
|
|
2674
|
|
|
|
|
|
|
Compared to File::LibMagic and File::MimeInfo::Magic, these three are inferior. |
2675
|
|
|
|
|
|
|
They often say 'text/plain' or 'application/octet-stream' where the latter two report |
2676
|
|
|
|
|
|
|
useful mimetypes. |
2677
|
|
|
|
|
|
|
|
2678
|
|
|
|
|
|
|
=back |
2679
|
|
|
|
|
|
|
|
2680
|
|
|
|
|
|
|
=head1 SUPPORT |
2681
|
|
|
|
|
|
|
|
2682
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
2683
|
|
|
|
|
|
|
|
2684
|
|
|
|
|
|
|
perldoc File::Unpack |
2685
|
|
|
|
|
|
|
|
2686
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
You can also look for information at: |
2688
|
|
|
|
|
|
|
|
2689
|
|
|
|
|
|
|
=over 4 |
2690
|
|
|
|
|
|
|
|
2691
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
2692
|
|
|
|
|
|
|
|
2693
|
|
|
|
|
|
|
L |
2694
|
|
|
|
|
|
|
|
2695
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
2696
|
|
|
|
|
|
|
|
2697
|
|
|
|
|
|
|
L |
2698
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
=item * CPAN Ratings |
2700
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
L |
2702
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
=item * Search CPAN |
2704
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
L |
2706
|
|
|
|
|
|
|
|
2707
|
|
|
|
|
|
|
=back |
2708
|
|
|
|
|
|
|
|
2709
|
|
|
|
|
|
|
=head1 SOURCE REPOSITORY |
2710
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
L |
2712
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
L |
2714
|
|
|
|
|
|
|
|
2715
|
|
|
|
|
|
|
git clone L |
2716
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
|
2718
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
2719
|
|
|
|
|
|
|
|
2720
|
|
|
|
|
|
|
MIME type recognition relies heavily on libmagic by Christos Zoulas. I had long |
2721
|
|
|
|
|
|
|
hesitated implementing File::Unpack, but set to work, when I dicovered |
2722
|
|
|
|
|
|
|
that File::LibMagic brings your library to perl. Thanks Christos. And thanks |
2723
|
|
|
|
|
|
|
for tcsh too. |
2724
|
|
|
|
|
|
|
|
2725
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
2726
|
|
|
|
|
|
|
|
2727
|
|
|
|
|
|
|
Copyright 2010,2011,2012,2013 Juergen Weigert. |
2728
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
2730
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
2731
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
2732
|
|
|
|
|
|
|
|
2733
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
2734
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
|
2736
|
|
|
|
|
|
|
=cut |
2737
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
1; # End of File::Unpack |