| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package File::MimeInfo::Magic; |
|
2
|
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
61384
|
use strict; |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
77
|
|
|
4
|
3
|
|
|
3
|
|
21
|
use Carp; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
161
|
|
|
5
|
3
|
|
|
3
|
|
14
|
use Fcntl 'SEEK_SET'; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
114
|
|
|
6
|
3
|
|
|
3
|
|
765
|
use File::BaseDir qw/data_files/; |
|
|
3
|
|
|
|
|
2356
|
|
|
|
3
|
|
|
|
|
239
|
|
|
7
|
|
|
|
|
|
|
require File::MimeInfo; |
|
8
|
|
|
|
|
|
|
require Exporter; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
BEGIN { |
|
11
|
3
|
|
|
3
|
|
20
|
no strict "refs"; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
233
|
|
|
12
|
3
|
|
|
3
|
|
11
|
for (qw/extensions describe globs inodetype default/) { |
|
13
|
15
|
|
|
|
|
24
|
*{$_} = \&{"File::MimeInfo::$_"}; |
|
|
15
|
|
|
|
|
4850
|
|
|
|
15
|
|
|
|
|
47
|
|
|
14
|
|
|
|
|
|
|
} |
|
15
|
|
|
|
|
|
|
} |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our @ISA = qw(Exporter File::MimeInfo); |
|
18
|
|
|
|
|
|
|
our @EXPORT = qw(mimetype); |
|
19
|
|
|
|
|
|
|
our @EXPORT_OK = qw(extensions describe globs inodetype magic); |
|
20
|
|
|
|
|
|
|
our $VERSION = '0.32'; |
|
21
|
|
|
|
|
|
|
our $DEBUG; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $_hashed = 0; |
|
24
|
|
|
|
|
|
|
our $max_buffer = 32; |
|
25
|
|
|
|
|
|
|
our (@magic_80, @magic); |
|
26
|
|
|
|
|
|
|
# @magic_80 and @magic are used to store the parse tree of magic data |
|
27
|
|
|
|
|
|
|
# @magic_80 contains magic rules with priority 80 and higher, @magic the rest |
|
28
|
|
|
|
|
|
|
# $max_buffer contains the maximum number of chars to be buffered from a non-seekable |
|
29
|
|
|
|
|
|
|
# filehandle in order to do magic mimetyping |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub mimetype { |
|
32
|
8
|
|
|
8
|
1
|
2408
|
my $file = pop; |
|
33
|
8
|
50
|
|
|
|
20
|
croak 'subroutine "mimetype" needs a filename as argument' unless defined $file; |
|
34
|
|
|
|
|
|
|
|
|
35
|
8
|
50
|
0
|
|
|
27
|
return magic($file) || default($file) if ref $file; |
|
36
|
8
|
50
|
33
|
|
|
164
|
return &File::MimeInfo::mimetype($file) unless -s $file and -r _; |
|
37
|
|
|
|
|
|
|
|
|
38
|
8
|
|
|
|
|
21
|
my ($mimet, $fh); |
|
39
|
8
|
50
|
|
|
|
26
|
return $mimet if $mimet = inodetype($file); |
|
40
|
|
|
|
|
|
|
|
|
41
|
8
|
|
|
|
|
22
|
($mimet, $fh) = _magic($file, \@magic_80); # high priority rules |
|
42
|
8
|
100
|
|
|
|
35
|
return $mimet if $mimet; |
|
43
|
|
|
|
|
|
|
|
|
44
|
5
|
50
|
|
|
|
19
|
return $mimet if $mimet = globs($file); |
|
45
|
|
|
|
|
|
|
|
|
46
|
5
|
|
|
|
|
14
|
($mimet, $fh) = _magic($fh, \@magic); # lower priority rules |
|
47
|
5
|
50
|
|
|
|
64
|
close $fh if ref $fh; |
|
48
|
|
|
|
|
|
|
|
|
49
|
5
|
100
|
|
|
|
31
|
return $mimet if $mimet; |
|
50
|
2
|
|
|
|
|
10
|
return default($file); |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub magic { |
|
54
|
8
|
|
|
8
|
1
|
12
|
my $file = pop; |
|
55
|
8
|
50
|
|
|
|
16
|
croak 'subroutine "magic" needs a filename as argument' unless defined $file; |
|
56
|
8
|
50
|
33
|
|
|
127
|
return undef unless ref($file) || -s $file; |
|
57
|
8
|
50
|
|
|
|
21
|
print STDERR "> Checking all magic rules\n" if $DEBUG; |
|
58
|
|
|
|
|
|
|
|
|
59
|
8
|
|
|
|
|
22
|
my ($mimet, $fh) = _magic($file, \@magic_80, \@magic); |
|
60
|
8
|
50
|
|
|
|
40
|
close $fh unless ref $file; |
|
61
|
|
|
|
|
|
|
|
|
62
|
8
|
|
|
|
|
49
|
return $mimet; |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _magic { |
|
66
|
21
|
|
|
21
|
|
39
|
my ($file, @rules) = @_; |
|
67
|
21
|
100
|
|
|
|
38
|
_rehash() unless $_hashed; |
|
68
|
|
|
|
|
|
|
|
|
69
|
21
|
|
|
|
|
27
|
my $fh; |
|
70
|
21
|
100
|
|
|
|
38
|
unless (ref $file) { |
|
71
|
16
|
50
|
|
|
|
491
|
open $fh, '<', $file or return undef; |
|
72
|
16
|
|
|
|
|
57
|
binmode $fh; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
5
|
|
|
|
|
7
|
else { $fh = $file } |
|
75
|
|
|
|
|
|
|
|
|
76
|
21
|
|
|
|
|
76
|
for my $type (map @$_, @rules) { |
|
77
|
62
|
|
|
|
|
135
|
for (2..$#$type) { |
|
78
|
212
|
100
|
|
|
|
316
|
next unless _check_rule($$type[$_], $fh, 0); |
|
79
|
12
|
100
|
|
|
|
113
|
close $fh unless ref $file; |
|
80
|
12
|
|
|
|
|
55
|
return ($$type[1], $fh); |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
} |
|
83
|
9
|
|
|
|
|
26
|
return (undef, $fh); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub _check_rule { |
|
87
|
216
|
|
|
216
|
|
308
|
my ($ref, $fh, $lev) = @_; |
|
88
|
216
|
|
|
|
|
245
|
my $line; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Read |
|
91
|
216
|
50
|
|
|
|
318
|
if (ref $fh eq 'GLOB') { |
|
92
|
216
|
|
|
|
|
1654
|
seek($fh, $$ref[0], SEEK_SET); # seek offset |
|
93
|
216
|
|
|
|
|
1647
|
read($fh, $line, $$ref[1]); # read max length |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
else { # allowing for IO::Something |
|
96
|
0
|
|
|
|
|
0
|
$fh->seek($$ref[0], SEEK_SET); # seek offset |
|
97
|
0
|
|
|
|
|
0
|
$fh->read($line, $$ref[1]); # read max length |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Match regex |
|
101
|
216
|
100
|
|
|
|
535
|
$line = unpack 'b*', $line if $$ref[2]; # unpack to bits if using mask |
|
102
|
216
|
100
|
|
|
|
1074
|
return undef unless $line =~ $$ref[3]; # match regex |
|
103
|
16
|
50
|
|
|
|
30
|
print STDERR '>', '>'x$lev, ' Value "', _escape_bytes($2), |
|
104
|
|
|
|
|
|
|
'" at offset ', $$ref[1]+length($1), |
|
105
|
|
|
|
|
|
|
" matches at $$ref[4]\n" |
|
106
|
|
|
|
|
|
|
if $DEBUG; |
|
107
|
16
|
100
|
|
|
|
55
|
return 1 unless $#$ref > 4; |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Check nested rules and recurs |
|
110
|
4
|
|
|
|
|
8
|
for (5..$#$ref) { |
|
111
|
4
|
50
|
|
|
|
13
|
return 1 if _check_rule($$ref[$_], $fh, $lev+1); |
|
112
|
|
|
|
|
|
|
} |
|
113
|
0
|
0
|
0
|
|
|
0
|
print STDERR "> Failed nested rules\n" if $DEBUG && ! $lev; |
|
114
|
0
|
|
|
|
|
0
|
return 0; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub rehash { |
|
118
|
0
|
|
|
0
|
1
|
0
|
&File::MimeInfo::rehash(); |
|
119
|
0
|
|
|
|
|
0
|
&_rehash(); |
|
120
|
|
|
|
|
|
|
#use Data::Dumper; |
|
121
|
|
|
|
|
|
|
#print Dumper \@magic_80, \@magic; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub _rehash { |
|
125
|
1
|
|
|
1
|
|
2
|
local $_; # limit scope of $_ ... :S |
|
126
|
1
|
|
|
|
|
4
|
($max_buffer, @magic_80, @magic) = (32); # clear data |
|
127
|
|
|
|
|
|
|
my @magicfiles = @File::MimeInfo::DIRS |
|
128
|
1
|
50
|
|
|
|
8
|
? ( grep {-e $_ && -r $_} |
|
|
1
|
50
|
|
|
|
26
|
|
|
129
|
|
|
|
|
|
|
map "$_/magic", @File::MimeInfo::DIRS ) |
|
130
|
|
|
|
|
|
|
: ( reverse data_files('mime/magic') ) ; |
|
131
|
1
|
|
|
|
|
2
|
my @done; |
|
132
|
1
|
|
|
|
|
3
|
for my $file (@magicfiles) { |
|
133
|
1
|
50
|
|
|
|
3
|
next if grep {$file eq $_} @done; |
|
|
0
|
|
|
|
|
0
|
|
|
134
|
1
|
|
|
|
|
4
|
_hash_magic($file); |
|
135
|
1
|
|
|
|
|
4
|
push @done, $file; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
1
|
|
|
|
|
7
|
@magic = sort {$$b[0] <=> $$a[0]} @magic; |
|
|
10
|
|
|
|
|
17
|
|
|
138
|
1
|
|
|
|
|
4
|
while ($magic[0][0] >= 80) { |
|
139
|
2
|
|
|
|
|
5
|
push @magic_80, shift @magic; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
1
|
|
|
|
|
3
|
$_hashed = 1; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub _hash_magic { |
|
145
|
1
|
|
|
1
|
|
2
|
my $file = shift; |
|
146
|
|
|
|
|
|
|
|
|
147
|
1
|
|
33
|
|
|
33
|
open MAGIC, '<', $file |
|
148
|
|
|
|
|
|
|
|| croak "Could not open file '$file' for reading"; |
|
149
|
1
|
|
|
|
|
4
|
binmode MAGIC; |
|
150
|
1
|
50
|
|
|
|
37
|
eq "MIME-Magic\x00\n" |
|
151
|
|
|
|
|
|
|
or carp "Magic file '$file' doesn't seem to be a magic file"; |
|
152
|
1
|
|
|
|
|
3
|
my $line = 1; |
|
153
|
1
|
|
|
|
|
5
|
while () { |
|
154
|
37
|
|
|
|
|
50
|
$line++; |
|
155
|
|
|
|
|
|
|
|
|
156
|
37
|
100
|
|
|
|
87
|
if (/^\[(\d+):(.*?)\]\n$/) { |
|
157
|
6
|
|
|
|
|
22
|
push @magic, [$1,$2]; |
|
158
|
6
|
|
|
|
|
17
|
next; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
s/^(\d*)>(\d+)=(.{2})//s |
|
162
|
31
|
50
|
0
|
|
|
142
|
|| warn "$file line $line skipped\n" && next; |
|
163
|
31
|
|
|
|
|
107
|
my ($i, $o, $l) = ($1, $2, unpack 'n', $3); |
|
164
|
|
|
|
|
|
|
# indent, offset, value length |
|
165
|
31
|
|
|
|
|
60
|
while (length($_) <= $l) { |
|
166
|
0
|
|
|
|
|
0
|
$_ .= ; |
|
167
|
0
|
|
|
|
|
0
|
$line++; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
31
|
|
|
|
|
54
|
my $v = substr $_, 0, $l, ''; # value |
|
171
|
|
|
|
|
|
|
|
|
172
|
31
|
50
|
0
|
|
|
454
|
/^(?:&(.{$l}))?(?:~(\d+))?(?:\+(\d+))?\n$/s |
|
173
|
|
|
|
|
|
|
|| warn "$file line $line skipped\n" && next; |
|
174
|
31
|
|
100
|
|
|
205
|
my ($m, $w, $r) = ($1, $2 || 1, $3 || 1); |
|
|
|
|
100
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# mask, word size, range |
|
176
|
31
|
|
|
|
|
41
|
my $mdef = defined $m; |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# possible big endian to little endian conversion |
|
179
|
|
|
|
|
|
|
# as a bonus perl also takes care of weird endian cases |
|
180
|
31
|
100
|
|
|
|
52
|
if ( $w != 1 ) { |
|
181
|
5
|
|
|
|
|
9
|
my ( $utpl, $ptpl ); |
|
182
|
5
|
50
|
|
|
|
10
|
if ( 2 == $w ) { |
|
|
|
0
|
|
|
|
|
|
|
183
|
5
|
|
|
|
|
12
|
$v = pack 'S', unpack 'n', $v; |
|
184
|
5
|
50
|
|
|
|
10
|
$m = pack 'S', unpack 'n', $m if $mdef; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
elsif ( 4 == $w ) { |
|
187
|
0
|
|
|
|
|
0
|
$v = pack 'L', unpack 'N', $v; |
|
188
|
0
|
0
|
|
|
|
0
|
$m = pack 'L', unpack 'N', $m if $mdef; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
else { |
|
191
|
0
|
|
|
|
|
0
|
warn "Unsupported word size: $w octets ". |
|
192
|
|
|
|
|
|
|
" at $file line $line\n" |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
31
|
|
|
|
|
49
|
my $end = $o + $l + $r - 1; |
|
197
|
31
|
100
|
|
|
|
49
|
$max_buffer = $end if $max_buffer < $end; |
|
198
|
31
|
100
|
|
|
|
48
|
my $ref = $i ? _find_branch($i) : $magic[-1]; |
|
199
|
31
|
|
|
|
|
33
|
$r--; # 1-based => 0-based range for regex |
|
200
|
31
|
100
|
|
|
|
43
|
$r *= 8 if $mdef; # bytes => bits for matching a mask |
|
201
|
31
|
100
|
|
|
|
101
|
my $reg = '^' |
|
|
|
100
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
. ( $r ? "(.{0,$r}?)" : '()' ) |
|
203
|
|
|
|
|
|
|
. ( $mdef ? '('. _mask_regex($v, $m) .')' |
|
204
|
|
|
|
|
|
|
: '('. quotemeta($v) .')' ) ; |
|
205
|
31
|
|
|
|
|
322
|
push @$ref, [ |
|
206
|
|
|
|
|
|
|
$o, $end, # offset, offset+length+range |
|
207
|
|
|
|
|
|
|
$mdef, # boolean for mask |
|
208
|
|
|
|
|
|
|
qr/$reg/sm, # the regex to match |
|
209
|
|
|
|
|
|
|
undef # debug data |
|
210
|
|
|
|
|
|
|
]; |
|
211
|
31
|
50
|
|
|
|
202
|
$$ref[-1][-1] = "$file line $line" if $DEBUG; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
1
|
|
|
|
|
28
|
close MAGIC; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _find_branch { # finds last branch of tree of rules |
|
217
|
4
|
|
|
4
|
|
6
|
my $i = shift; |
|
218
|
4
|
|
|
|
|
5
|
my $ref = $magic[-1]; |
|
219
|
4
|
|
|
|
|
10
|
for (1..$i) { $ref = $$ref[-1] } |
|
|
6
|
|
|
|
|
10
|
|
|
220
|
4
|
|
|
|
|
6
|
return $ref; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub _mask_regex { # build regex based on mask |
|
224
|
1
|
|
|
1
|
|
3
|
my ($v, $m) = @_; |
|
225
|
1
|
|
|
|
|
17
|
my @v = split '', unpack "b*", $v; |
|
226
|
1
|
|
|
|
|
14
|
my @m = split '', unpack "b*", $m; |
|
227
|
1
|
|
|
|
|
3
|
my $re = ''; |
|
228
|
1
|
|
|
|
|
3
|
for (0 .. $#m) { |
|
229
|
64
|
100
|
|
|
|
84
|
$re .= $m[$_] ? $v[$_] : '.' ; |
|
230
|
|
|
|
|
|
|
# If $mask = 1 than ($input && $mask) will be same as $input |
|
231
|
|
|
|
|
|
|
# If $mask = 0 than ($input && $mask) is always 0 |
|
232
|
|
|
|
|
|
|
# But $mask = 0 only makes sense if $value = 0 |
|
233
|
|
|
|
|
|
|
# So if $mask = 0 we ignore that bit of $input |
|
234
|
|
|
|
|
|
|
} |
|
235
|
1
|
|
|
|
|
7
|
return $re; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub _escape_bytes { # used for debug output |
|
239
|
0
|
|
|
0
|
|
|
my $string = shift; |
|
240
|
0
|
0
|
|
|
|
|
if ($string =~ /[\x00-\x1F\x7F]/) { |
|
241
|
|
|
|
|
|
|
$string = join '', map { |
|
242
|
0
|
|
|
|
|
|
my $o = ord($_); |
|
|
0
|
|
|
|
|
|
|
|
243
|
0
|
0
|
|
|
|
|
($o < 32) ? '^' . chr($o + 64) : |
|
|
|
0
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
($o == 127) ? '^?' : $_ ; |
|
245
|
|
|
|
|
|
|
} split '', $string; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
0
|
|
|
|
|
|
return $string; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
1; |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
__END__ |