line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package File::MimeInfo::Magic; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
61384
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
77
|
|
4
|
3
|
|
|
3
|
|
21
|
use Carp; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
161
|
|
5
|
3
|
|
|
3
|
|
14
|
use Fcntl 'SEEK_SET'; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
114
|
|
6
|
3
|
|
|
3
|
|
765
|
use File::BaseDir qw/data_files/; |
|
3
|
|
|
|
|
2356
|
|
|
3
|
|
|
|
|
239
|
|
7
|
|
|
|
|
|
|
require File::MimeInfo; |
8
|
|
|
|
|
|
|
require Exporter; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
BEGIN { |
11
|
3
|
|
|
3
|
|
20
|
no strict "refs"; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
233
|
|
12
|
3
|
|
|
3
|
|
11
|
for (qw/extensions describe globs inodetype default/) { |
13
|
15
|
|
|
|
|
24
|
*{$_} = \&{"File::MimeInfo::$_"}; |
|
15
|
|
|
|
|
4850
|
|
|
15
|
|
|
|
|
47
|
|
14
|
|
|
|
|
|
|
} |
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our @ISA = qw(Exporter File::MimeInfo); |
18
|
|
|
|
|
|
|
our @EXPORT = qw(mimetype); |
19
|
|
|
|
|
|
|
our @EXPORT_OK = qw(extensions describe globs inodetype magic); |
20
|
|
|
|
|
|
|
our $VERSION = '0.32'; |
21
|
|
|
|
|
|
|
our $DEBUG; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
our $_hashed = 0; |
24
|
|
|
|
|
|
|
our $max_buffer = 32; |
25
|
|
|
|
|
|
|
our (@magic_80, @magic); |
26
|
|
|
|
|
|
|
# @magic_80 and @magic are used to store the parse tree of magic data |
27
|
|
|
|
|
|
|
# @magic_80 contains magic rules with priority 80 and higher, @magic the rest |
28
|
|
|
|
|
|
|
# $max_buffer contains the maximum number of chars to be buffered from a non-seekable |
29
|
|
|
|
|
|
|
# filehandle in order to do magic mimetyping |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub mimetype { |
32
|
8
|
|
|
8
|
1
|
2408
|
my $file = pop; |
33
|
8
|
50
|
|
|
|
20
|
croak 'subroutine "mimetype" needs a filename as argument' unless defined $file; |
34
|
|
|
|
|
|
|
|
35
|
8
|
50
|
0
|
|
|
27
|
return magic($file) || default($file) if ref $file; |
36
|
8
|
50
|
33
|
|
|
164
|
return &File::MimeInfo::mimetype($file) unless -s $file and -r _; |
37
|
|
|
|
|
|
|
|
38
|
8
|
|
|
|
|
21
|
my ($mimet, $fh); |
39
|
8
|
50
|
|
|
|
26
|
return $mimet if $mimet = inodetype($file); |
40
|
|
|
|
|
|
|
|
41
|
8
|
|
|
|
|
22
|
($mimet, $fh) = _magic($file, \@magic_80); # high priority rules |
42
|
8
|
100
|
|
|
|
35
|
return $mimet if $mimet; |
43
|
|
|
|
|
|
|
|
44
|
5
|
50
|
|
|
|
19
|
return $mimet if $mimet = globs($file); |
45
|
|
|
|
|
|
|
|
46
|
5
|
|
|
|
|
14
|
($mimet, $fh) = _magic($fh, \@magic); # lower priority rules |
47
|
5
|
50
|
|
|
|
64
|
close $fh if ref $fh; |
48
|
|
|
|
|
|
|
|
49
|
5
|
100
|
|
|
|
31
|
return $mimet if $mimet; |
50
|
2
|
|
|
|
|
10
|
return default($file); |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub magic { |
54
|
8
|
|
|
8
|
1
|
12
|
my $file = pop; |
55
|
8
|
50
|
|
|
|
16
|
croak 'subroutine "magic" needs a filename as argument' unless defined $file; |
56
|
8
|
50
|
33
|
|
|
127
|
return undef unless ref($file) || -s $file; |
57
|
8
|
50
|
|
|
|
21
|
print STDERR "> Checking all magic rules\n" if $DEBUG; |
58
|
|
|
|
|
|
|
|
59
|
8
|
|
|
|
|
22
|
my ($mimet, $fh) = _magic($file, \@magic_80, \@magic); |
60
|
8
|
50
|
|
|
|
40
|
close $fh unless ref $file; |
61
|
|
|
|
|
|
|
|
62
|
8
|
|
|
|
|
49
|
return $mimet; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _magic { |
66
|
21
|
|
|
21
|
|
39
|
my ($file, @rules) = @_; |
67
|
21
|
100
|
|
|
|
38
|
_rehash() unless $_hashed; |
68
|
|
|
|
|
|
|
|
69
|
21
|
|
|
|
|
27
|
my $fh; |
70
|
21
|
100
|
|
|
|
38
|
unless (ref $file) { |
71
|
16
|
50
|
|
|
|
491
|
open $fh, '<', $file or return undef; |
72
|
16
|
|
|
|
|
57
|
binmode $fh; |
73
|
|
|
|
|
|
|
} |
74
|
5
|
|
|
|
|
7
|
else { $fh = $file } |
75
|
|
|
|
|
|
|
|
76
|
21
|
|
|
|
|
76
|
for my $type (map @$_, @rules) { |
77
|
62
|
|
|
|
|
135
|
for (2..$#$type) { |
78
|
212
|
100
|
|
|
|
316
|
next unless _check_rule($$type[$_], $fh, 0); |
79
|
12
|
100
|
|
|
|
113
|
close $fh unless ref $file; |
80
|
12
|
|
|
|
|
55
|
return ($$type[1], $fh); |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
} |
83
|
9
|
|
|
|
|
26
|
return (undef, $fh); |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub _check_rule { |
87
|
216
|
|
|
216
|
|
308
|
my ($ref, $fh, $lev) = @_; |
88
|
216
|
|
|
|
|
245
|
my $line; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Read |
91
|
216
|
50
|
|
|
|
318
|
if (ref $fh eq 'GLOB') { |
92
|
216
|
|
|
|
|
1654
|
seek($fh, $$ref[0], SEEK_SET); # seek offset |
93
|
216
|
|
|
|
|
1647
|
read($fh, $line, $$ref[1]); # read max length |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
else { # allowing for IO::Something |
96
|
0
|
|
|
|
|
0
|
$fh->seek($$ref[0], SEEK_SET); # seek offset |
97
|
0
|
|
|
|
|
0
|
$fh->read($line, $$ref[1]); # read max length |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Match regex |
101
|
216
|
100
|
|
|
|
535
|
$line = unpack 'b*', $line if $$ref[2]; # unpack to bits if using mask |
102
|
216
|
100
|
|
|
|
1074
|
return undef unless $line =~ $$ref[3]; # match regex |
103
|
16
|
50
|
|
|
|
30
|
print STDERR '>', '>'x$lev, ' Value "', _escape_bytes($2), |
104
|
|
|
|
|
|
|
'" at offset ', $$ref[1]+length($1), |
105
|
|
|
|
|
|
|
" matches at $$ref[4]\n" |
106
|
|
|
|
|
|
|
if $DEBUG; |
107
|
16
|
100
|
|
|
|
55
|
return 1 unless $#$ref > 4; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Check nested rules and recurs |
110
|
4
|
|
|
|
|
8
|
for (5..$#$ref) { |
111
|
4
|
50
|
|
|
|
13
|
return 1 if _check_rule($$ref[$_], $fh, $lev+1); |
112
|
|
|
|
|
|
|
} |
113
|
0
|
0
|
0
|
|
|
0
|
print STDERR "> Failed nested rules\n" if $DEBUG && ! $lev; |
114
|
0
|
|
|
|
|
0
|
return 0; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub rehash { |
118
|
0
|
|
|
0
|
1
|
0
|
&File::MimeInfo::rehash(); |
119
|
0
|
|
|
|
|
0
|
&_rehash(); |
120
|
|
|
|
|
|
|
#use Data::Dumper; |
121
|
|
|
|
|
|
|
#print Dumper \@magic_80, \@magic; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub _rehash { |
125
|
1
|
|
|
1
|
|
2
|
local $_; # limit scope of $_ ... :S |
126
|
1
|
|
|
|
|
4
|
($max_buffer, @magic_80, @magic) = (32); # clear data |
127
|
|
|
|
|
|
|
my @magicfiles = @File::MimeInfo::DIRS |
128
|
1
|
50
|
|
|
|
8
|
? ( grep {-e $_ && -r $_} |
|
1
|
50
|
|
|
|
26
|
|
129
|
|
|
|
|
|
|
map "$_/magic", @File::MimeInfo::DIRS ) |
130
|
|
|
|
|
|
|
: ( reverse data_files('mime/magic') ) ; |
131
|
1
|
|
|
|
|
2
|
my @done; |
132
|
1
|
|
|
|
|
3
|
for my $file (@magicfiles) { |
133
|
1
|
50
|
|
|
|
3
|
next if grep {$file eq $_} @done; |
|
0
|
|
|
|
|
0
|
|
134
|
1
|
|
|
|
|
4
|
_hash_magic($file); |
135
|
1
|
|
|
|
|
4
|
push @done, $file; |
136
|
|
|
|
|
|
|
} |
137
|
1
|
|
|
|
|
7
|
@magic = sort {$$b[0] <=> $$a[0]} @magic; |
|
10
|
|
|
|
|
17
|
|
138
|
1
|
|
|
|
|
4
|
while ($magic[0][0] >= 80) { |
139
|
2
|
|
|
|
|
5
|
push @magic_80, shift @magic; |
140
|
|
|
|
|
|
|
} |
141
|
1
|
|
|
|
|
3
|
$_hashed = 1; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub _hash_magic { |
145
|
1
|
|
|
1
|
|
2
|
my $file = shift; |
146
|
|
|
|
|
|
|
|
147
|
1
|
|
33
|
|
|
33
|
open MAGIC, '<', $file |
148
|
|
|
|
|
|
|
|| croak "Could not open file '$file' for reading"; |
149
|
1
|
|
|
|
|
4
|
binmode MAGIC; |
150
|
1
|
50
|
|
|
|
37
|
eq "MIME-Magic\x00\n" |
151
|
|
|
|
|
|
|
or carp "Magic file '$file' doesn't seem to be a magic file"; |
152
|
1
|
|
|
|
|
3
|
my $line = 1; |
153
|
1
|
|
|
|
|
5
|
while () { |
154
|
37
|
|
|
|
|
50
|
$line++; |
155
|
|
|
|
|
|
|
|
156
|
37
|
100
|
|
|
|
87
|
if (/^\[(\d+):(.*?)\]\n$/) { |
157
|
6
|
|
|
|
|
22
|
push @magic, [$1,$2]; |
158
|
6
|
|
|
|
|
17
|
next; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
s/^(\d*)>(\d+)=(.{2})//s |
162
|
31
|
50
|
0
|
|
|
142
|
|| warn "$file line $line skipped\n" && next; |
163
|
31
|
|
|
|
|
107
|
my ($i, $o, $l) = ($1, $2, unpack 'n', $3); |
164
|
|
|
|
|
|
|
# indent, offset, value length |
165
|
31
|
|
|
|
|
60
|
while (length($_) <= $l) { |
166
|
0
|
|
|
|
|
0
|
$_ .= ; |
167
|
0
|
|
|
|
|
0
|
$line++; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
31
|
|
|
|
|
54
|
my $v = substr $_, 0, $l, ''; # value |
171
|
|
|
|
|
|
|
|
172
|
31
|
50
|
0
|
|
|
454
|
/^(?:&(.{$l}))?(?:~(\d+))?(?:\+(\d+))?\n$/s |
173
|
|
|
|
|
|
|
|| warn "$file line $line skipped\n" && next; |
174
|
31
|
|
100
|
|
|
205
|
my ($m, $w, $r) = ($1, $2 || 1, $3 || 1); |
|
|
|
100
|
|
|
|
|
175
|
|
|
|
|
|
|
# mask, word size, range |
176
|
31
|
|
|
|
|
41
|
my $mdef = defined $m; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# possible big endian to little endian conversion |
179
|
|
|
|
|
|
|
# as a bonus perl also takes care of weird endian cases |
180
|
31
|
100
|
|
|
|
52
|
if ( $w != 1 ) { |
181
|
5
|
|
|
|
|
9
|
my ( $utpl, $ptpl ); |
182
|
5
|
50
|
|
|
|
10
|
if ( 2 == $w ) { |
|
|
0
|
|
|
|
|
|
183
|
5
|
|
|
|
|
12
|
$v = pack 'S', unpack 'n', $v; |
184
|
5
|
50
|
|
|
|
10
|
$m = pack 'S', unpack 'n', $m if $mdef; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
elsif ( 4 == $w ) { |
187
|
0
|
|
|
|
|
0
|
$v = pack 'L', unpack 'N', $v; |
188
|
0
|
0
|
|
|
|
0
|
$m = pack 'L', unpack 'N', $m if $mdef; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else { |
191
|
0
|
|
|
|
|
0
|
warn "Unsupported word size: $w octets ". |
192
|
|
|
|
|
|
|
" at $file line $line\n" |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
31
|
|
|
|
|
49
|
my $end = $o + $l + $r - 1; |
197
|
31
|
100
|
|
|
|
49
|
$max_buffer = $end if $max_buffer < $end; |
198
|
31
|
100
|
|
|
|
48
|
my $ref = $i ? _find_branch($i) : $magic[-1]; |
199
|
31
|
|
|
|
|
33
|
$r--; # 1-based => 0-based range for regex |
200
|
31
|
100
|
|
|
|
43
|
$r *= 8 if $mdef; # bytes => bits for matching a mask |
201
|
31
|
100
|
|
|
|
101
|
my $reg = '^' |
|
|
100
|
|
|
|
|
|
202
|
|
|
|
|
|
|
. ( $r ? "(.{0,$r}?)" : '()' ) |
203
|
|
|
|
|
|
|
. ( $mdef ? '('. _mask_regex($v, $m) .')' |
204
|
|
|
|
|
|
|
: '('. quotemeta($v) .')' ) ; |
205
|
31
|
|
|
|
|
322
|
push @$ref, [ |
206
|
|
|
|
|
|
|
$o, $end, # offset, offset+length+range |
207
|
|
|
|
|
|
|
$mdef, # boolean for mask |
208
|
|
|
|
|
|
|
qr/$reg/sm, # the regex to match |
209
|
|
|
|
|
|
|
undef # debug data |
210
|
|
|
|
|
|
|
]; |
211
|
31
|
50
|
|
|
|
202
|
$$ref[-1][-1] = "$file line $line" if $DEBUG; |
212
|
|
|
|
|
|
|
} |
213
|
1
|
|
|
|
|
28
|
close MAGIC; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _find_branch { # finds last branch of tree of rules |
217
|
4
|
|
|
4
|
|
6
|
my $i = shift; |
218
|
4
|
|
|
|
|
5
|
my $ref = $magic[-1]; |
219
|
4
|
|
|
|
|
10
|
for (1..$i) { $ref = $$ref[-1] } |
|
6
|
|
|
|
|
10
|
|
220
|
4
|
|
|
|
|
6
|
return $ref; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub _mask_regex { # build regex based on mask |
224
|
1
|
|
|
1
|
|
3
|
my ($v, $m) = @_; |
225
|
1
|
|
|
|
|
17
|
my @v = split '', unpack "b*", $v; |
226
|
1
|
|
|
|
|
14
|
my @m = split '', unpack "b*", $m; |
227
|
1
|
|
|
|
|
3
|
my $re = ''; |
228
|
1
|
|
|
|
|
3
|
for (0 .. $#m) { |
229
|
64
|
100
|
|
|
|
84
|
$re .= $m[$_] ? $v[$_] : '.' ; |
230
|
|
|
|
|
|
|
# If $mask = 1 than ($input && $mask) will be same as $input |
231
|
|
|
|
|
|
|
# If $mask = 0 than ($input && $mask) is always 0 |
232
|
|
|
|
|
|
|
# But $mask = 0 only makes sense if $value = 0 |
233
|
|
|
|
|
|
|
# So if $mask = 0 we ignore that bit of $input |
234
|
|
|
|
|
|
|
} |
235
|
1
|
|
|
|
|
7
|
return $re; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub _escape_bytes { # used for debug output |
239
|
0
|
|
|
0
|
|
|
my $string = shift; |
240
|
0
|
0
|
|
|
|
|
if ($string =~ /[\x00-\x1F\x7F]/) { |
241
|
|
|
|
|
|
|
$string = join '', map { |
242
|
0
|
|
|
|
|
|
my $o = ord($_); |
|
0
|
|
|
|
|
|
|
243
|
0
|
0
|
|
|
|
|
($o < 32) ? '^' . chr($o + 64) : |
|
|
0
|
|
|
|
|
|
244
|
|
|
|
|
|
|
($o == 127) ? '^?' : $_ ; |
245
|
|
|
|
|
|
|
} split '', $string; |
246
|
|
|
|
|
|
|
} |
247
|
0
|
|
|
|
|
|
return $string; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
1; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
__END__ |