| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package File::MimeInfo::Magic; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 3 |  |  | 3 |  | 70411 | use strict; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 89 |  | 
| 4 | 3 |  |  | 3 |  | 36 | use Carp; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 180 |  | 
| 5 | 3 |  |  | 3 |  | 19 | use Fcntl 'SEEK_SET'; | 
|  | 3 |  |  |  |  | 4 |  | 
|  | 3 |  |  |  |  | 134 |  | 
| 6 | 3 |  |  | 3 |  | 827 | use File::BaseDir qw/data_files/; | 
|  | 3 |  |  |  |  | 2430 |  | 
|  | 3 |  |  |  |  | 204 |  | 
| 7 |  |  |  |  |  |  | require File::MimeInfo; | 
| 8 |  |  |  |  |  |  | require Exporter; | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | BEGIN { | 
| 11 | 3 |  |  | 3 |  | 21 | no strict "refs"; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 192 |  | 
| 12 | 3 |  |  | 3 |  | 12 | for (qw/extensions describe globs inodetype default/) { | 
| 13 | 15 |  |  |  |  | 21 | *{$_} = \&{"File::MimeInfo::$_"}; | 
|  | 15 |  |  |  |  | 5589 |  | 
|  | 15 |  |  |  |  | 52 |  | 
| 14 |  |  |  |  |  |  | } | 
| 15 |  |  |  |  |  |  | } | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | our @ISA = qw(Exporter File::MimeInfo); | 
| 18 |  |  |  |  |  |  | our @EXPORT = qw(mimetype); | 
| 19 |  |  |  |  |  |  | our @EXPORT_OK = qw(extensions describe globs inodetype magic); | 
| 20 |  |  |  |  |  |  | our $VERSION = '0.31'; | 
| 21 |  |  |  |  |  |  | our $DEBUG; | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | our $_hashed = 0; | 
| 24 |  |  |  |  |  |  | our $max_buffer = 32; | 
| 25 |  |  |  |  |  |  | our (@magic_80, @magic); | 
| 26 |  |  |  |  |  |  | # @magic_80 and @magic are used to store the parse tree of magic data | 
| 27 |  |  |  |  |  |  | # @magic_80 contains magic rules with priority 80 and higher, @magic the rest | 
| 28 |  |  |  |  |  |  | # $max_buffer contains the maximum number of chars to be buffered from a non-seekable | 
| 29 |  |  |  |  |  |  | # filehandle in order to do magic mimetyping | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | sub mimetype { | 
| 32 | 8 |  |  | 8 | 1 | 2769 | my $file = pop; | 
| 33 | 8 | 50 |  |  |  | 22 | croak 'subroutine "mimetype" needs a filename as argument' unless defined $file; | 
| 34 |  |  |  |  |  |  |  | 
| 35 | 8 | 50 | 0 |  |  | 20 | return magic($file) || default($file) if ref $file; | 
| 36 | 8 | 50 | 33 |  |  | 184 | return &File::MimeInfo::mimetype($file) unless -s $file and -r _; | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 8 |  |  |  |  | 24 | my ($mimet, $fh); | 
| 39 | 8 | 50 |  |  |  | 27 | return $mimet if $mimet = inodetype($file); | 
| 40 |  |  |  |  |  |  |  | 
| 41 | 8 |  |  |  |  | 25 | ($mimet, $fh) = _magic($file, \@magic_80); # high priority rules | 
| 42 | 8 | 100 |  |  |  | 43 | return $mimet if $mimet; | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 5 | 50 |  |  |  | 18 | return $mimet if $mimet = globs($file); | 
| 45 |  |  |  |  |  |  |  | 
| 46 | 5 |  |  |  |  | 12 | ($mimet, $fh) = _magic($fh, \@magic); # lower priority rules | 
| 47 | 5 | 50 |  |  |  | 78 | close $fh if ref $fh; | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 5 | 100 |  |  |  | 34 | return $mimet if $mimet; | 
| 50 | 2 |  |  |  |  | 11 | return default($file); | 
| 51 |  |  |  |  |  |  | } | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | sub magic { | 
| 54 | 8 |  |  | 8 | 1 | 15 | my $file = pop; | 
| 55 | 8 | 50 |  |  |  | 17 | croak 'subroutine "magic" needs a filename as argument' unless defined $file; | 
| 56 | 8 | 50 | 33 |  |  | 153 | return undef unless ref($file) || -s $file; | 
| 57 | 8 | 50 |  |  |  | 28 | print STDERR "> Checking all magic rules\n" if $DEBUG; | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 8 |  |  |  |  | 26 | my ($mimet, $fh) = _magic($file, \@magic_80, \@magic); | 
| 60 | 8 | 50 |  |  |  | 48 | close $fh unless ref $file; | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 8 |  |  |  |  | 58 | return $mimet; | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | sub _magic { | 
| 66 | 21 |  |  | 21 |  | 51 | my ($file, @rules) = @_; | 
| 67 | 21 | 100 |  |  |  | 44 | _rehash() unless $_hashed; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 21 |  |  |  |  | 30 | my $fh; | 
| 70 | 21 | 100 |  |  |  | 43 | unless (ref $file) { | 
| 71 | 16 | 50 |  |  |  | 615 | open $fh, '<', $file or return undef; | 
| 72 | 16 |  |  |  |  | 75 | binmode $fh; | 
| 73 |  |  |  |  |  |  | } | 
| 74 | 5 |  |  |  |  | 8 | else { $fh = $file } | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 21 |  |  |  |  | 93 | for my $type (map @$_, @rules) { | 
| 77 | 62 |  |  |  |  | 150 | for (2..$#$type) { | 
| 78 | 212 | 100 |  |  |  | 390 | next unless _check_rule($$type[$_], $fh, 0); | 
| 79 | 12 | 100 |  |  |  | 143 | close $fh unless ref $file; | 
| 80 | 12 |  |  |  |  | 62 | return ($$type[1], $fh); | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  | } | 
| 83 | 9 |  |  |  |  | 30 | return (undef, $fh); | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub _check_rule { | 
| 87 | 216 |  |  | 216 |  | 367 | my ($ref, $fh, $lev) = @_; | 
| 88 | 216 |  |  |  |  | 252 | my $line; | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | # Read | 
| 91 | 216 | 50 |  |  |  | 386 | if (ref $fh eq 'GLOB') { | 
| 92 | 216 |  |  |  |  | 1974 | seek($fh, $$ref[0], SEEK_SET); # seek offset | 
| 93 | 216 |  |  |  |  | 1923 | read($fh, $line, $$ref[1]);    # read max length | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  | else { # allowing for IO::Something | 
| 96 | 0 |  |  |  |  | 0 | $fh->seek($$ref[0], SEEK_SET); # seek offset | 
| 97 | 0 |  |  |  |  | 0 | $fh->read($line, $$ref[1]);    # read max length | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | # Match regex | 
| 101 | 216 | 100 |  |  |  | 638 | $line = unpack 'b*', $line if $$ref[2]; # unpack to bits if using mask | 
| 102 | 216 | 100 |  |  |  | 1260 | return undef unless $line =~ $$ref[3];  # match regex | 
| 103 | 16 | 50 |  |  |  | 42 | print STDERR '>', '>'x$lev, ' Value "', _escape_bytes($2), | 
| 104 |  |  |  |  |  |  | '" at offset ', $$ref[1]+length($1), | 
| 105 |  |  |  |  |  |  | " matches at $$ref[4]\n" | 
| 106 |  |  |  |  |  |  | if $DEBUG; | 
| 107 | 16 | 100 |  |  |  | 54 | return 1 unless $#$ref > 4; | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | # Check nested rules and recurs | 
| 110 | 4 |  |  |  |  | 10 | for (5..$#$ref) { | 
| 111 | 4 | 50 |  |  |  | 17 | return 1 if _check_rule($$ref[$_], $fh, $lev+1); | 
| 112 |  |  |  |  |  |  | } | 
| 113 | 0 | 0 | 0 |  |  | 0 | print STDERR "> Failed nested rules\n" if $DEBUG && ! $lev; | 
| 114 | 0 |  |  |  |  | 0 | return 0; | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | sub rehash { | 
| 118 | 0 |  |  | 0 | 1 | 0 | &File::MimeInfo::rehash(); | 
| 119 | 0 |  |  |  |  | 0 | &_rehash(); | 
| 120 |  |  |  |  |  |  | #use Data::Dumper; | 
| 121 |  |  |  |  |  |  | #print Dumper \@magic_80, \@magic; | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | sub _rehash { | 
| 125 | 1 |  |  | 1 |  | 2 | local $_; # limit scope of $_ ... :S | 
| 126 | 1 |  |  |  |  | 4 | ($max_buffer, @magic_80, @magic) = (32); # clear data | 
| 127 |  |  |  |  |  |  | my @magicfiles = @File::MimeInfo::DIRS | 
| 128 | 1 | 50 |  |  |  | 8 | ? ( grep {-e $_ && -r $_} | 
|  | 1 | 50 |  |  |  | 30 |  | 
| 129 |  |  |  |  |  |  | map "$_/magic", @File::MimeInfo::DIRS ) | 
| 130 |  |  |  |  |  |  | : ( reverse data_files('mime/magic') ) ; | 
| 131 | 1 |  |  |  |  | 3 | my @done; | 
| 132 | 1 |  |  |  |  | 3 | for my $file (@magicfiles) { | 
| 133 | 1 | 50 |  |  |  | 3 | next if grep {$file eq $_} @done; | 
|  | 0 |  |  |  |  | 0 |  | 
| 134 | 1 |  |  |  |  | 4 | _hash_magic($file); | 
| 135 | 1 |  |  |  |  | 4 | push @done, $file; | 
| 136 |  |  |  |  |  |  | } | 
| 137 | 1 |  |  |  |  | 7 | @magic = sort {$$b[0] <=> $$a[0]} @magic; | 
|  | 10 |  |  |  |  | 20 |  | 
| 138 | 1 |  |  |  |  | 4 | while ($magic[0][0] >= 80) { | 
| 139 | 2 |  |  |  |  | 6 | push @magic_80, shift @magic; | 
| 140 |  |  |  |  |  |  | } | 
| 141 | 1 |  |  |  |  | 4 | $_hashed = 1; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | sub _hash_magic { | 
| 145 | 1 |  |  | 1 |  | 2 | my $file = shift; | 
| 146 |  |  |  |  |  |  |  | 
| 147 | 1 |  | 33 |  |  | 34 | open MAGIC, '<', $file | 
| 148 |  |  |  |  |  |  | || croak "Could not open file '$file' for reading"; | 
| 149 | 1 |  |  |  |  | 4 | binmode MAGIC; | 
| 150 | 1 | 50 |  |  |  | 42 | eq "MIME-Magic\x00\n" | 
| 151 |  |  |  |  |  |  | or carp "Magic file '$file' doesn't seem to be a magic file"; | 
| 152 | 1 |  |  |  |  | 3 | my $line = 1; | 
| 153 | 1 |  |  |  |  | 5 | while () { | 
| 154 | 37 |  |  |  |  | 102 | $line++; | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 37 | 100 |  |  |  | 105 | if (/^\[(\d+):(.*?)\]\n$/) { | 
| 157 | 6 |  |  |  |  | 25 | push @magic, [$1,$2]; | 
| 158 | 6 |  |  |  |  | 20 | next; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | s/^(\d*)>(\d+)=(.{2})//s | 
| 162 | 31 | 50 | 0 |  |  | 160 | || warn "$file line $line skipped\n" && next; | 
| 163 | 31 |  |  |  |  | 130 | my ($i, $o, $l) = ($1, $2, unpack 'n', $3); | 
| 164 |  |  |  |  |  |  | # indent, offset, value length | 
| 165 | 31 |  |  |  |  | 74 | while (length($_) <= $l) { | 
| 166 | 0 |  |  |  |  | 0 | $_ .= ; | 
| 167 | 0 |  |  |  |  | 0 | $line++; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 | 31 |  |  |  |  | 68 | my $v = substr $_, 0, $l, ''; # value | 
| 171 |  |  |  |  |  |  |  | 
| 172 | 31 | 50 | 0 |  |  | 544 | /^(?:&(.{$l}))?(?:~(\d+))?(?:\+(\d+))?\n$/s | 
| 173 |  |  |  |  |  |  | || warn "$file line $line skipped\n" && next; | 
| 174 | 31 |  | 100 |  |  | 181 | my ($m, $w, $r) = ($1, $2 || 1, $3 || 1); | 
|  |  |  | 100 |  |  |  |  | 
| 175 |  |  |  |  |  |  | # mask, word size, range | 
| 176 | 31 |  |  |  |  | 49 | my $mdef = defined $m; | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | # possible big endian to little endian conversion | 
| 179 |  |  |  |  |  |  | # as a bonus perl also takes care of weird endian cases | 
| 180 | 31 | 100 |  |  |  | 59 | if ( $w != 1 ) { | 
| 181 | 5 |  |  |  |  | 8 | my ( $utpl, $ptpl ); | 
| 182 | 5 | 50 |  |  |  | 10 | if ( 2 == $w ) { | 
|  |  | 0 |  |  |  |  |  | 
| 183 | 5 |  |  |  |  | 17 | $v = pack 'S', unpack 'n', $v; | 
| 184 | 5 | 50 |  |  |  | 12 | $m = pack 'S', unpack 'n', $m if $mdef; | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  | elsif ( 4 == $w ) { | 
| 187 | 0 |  |  |  |  | 0 | $v = pack 'L', unpack 'N', $v; | 
| 188 | 0 | 0 |  |  |  | 0 | $m = pack 'L', unpack 'N', $m if $mdef; | 
| 189 |  |  |  |  |  |  | } | 
| 190 |  |  |  |  |  |  | else { | 
| 191 | 0 |  |  |  |  | 0 | warn "Unsupported word size: $w octets ". | 
| 192 |  |  |  |  |  |  | " at $file line $line\n" | 
| 193 |  |  |  |  |  |  | } | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 31 |  |  |  |  | 100 | my $end = $o + $l + $r - 1; | 
| 197 | 31 | 100 |  |  |  | 59 | $max_buffer = $end if $max_buffer < $end; | 
| 198 | 31 | 100 |  |  |  | 77 | my $ref = $i ? _find_branch($i) : $magic[-1]; | 
| 199 | 31 |  |  |  |  | 42 | $r--;             # 1-based => 0-based range for regex | 
| 200 | 31 | 100 |  |  |  | 55 | $r *= 8 if $mdef; # bytes => bits for matching a mask | 
| 201 | 31 | 100 |  |  |  | 111 | my $reg = '^' | 
|  |  | 100 |  |  |  |  |  | 
| 202 |  |  |  |  |  |  | . ( $r    ? "(.{0,$r}?)" : '()'           ) | 
| 203 |  |  |  |  |  |  | . ( $mdef ? '('. _mask_regex($v, $m) .')' | 
| 204 |  |  |  |  |  |  | : '('. quotemeta($v)       .')' ) ; | 
| 205 | 31 |  |  |  |  | 361 | push @$ref, [ | 
| 206 |  |  |  |  |  |  | $o, $end,    # offset, offset+length+range | 
| 207 |  |  |  |  |  |  | $mdef,       # boolean for mask | 
| 208 |  |  |  |  |  |  | qr/$reg/sm,  # the regex to match | 
| 209 |  |  |  |  |  |  | undef        # debug data | 
| 210 |  |  |  |  |  |  | ]; | 
| 211 | 31 | 50 |  |  |  | 248 | $$ref[-1][-1] = "$file line $line" if $DEBUG; | 
| 212 |  |  |  |  |  |  | } | 
| 213 | 1 |  |  |  |  | 15 | close MAGIC; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | sub _find_branch { # finds last branch of tree of rules | 
| 217 | 4 |  |  | 4 |  | 9 | my $i = shift; | 
| 218 | 4 |  |  |  |  | 5 | my $ref = $magic[-1]; | 
| 219 | 4 |  |  |  |  | 13 | for (1..$i) { $ref = $$ref[-1] } | 
|  | 6 |  |  |  |  | 11 |  | 
| 220 | 4 |  |  |  |  | 8 | return $ref; | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  | sub _mask_regex { # build regex based on mask | 
| 224 | 1 |  |  | 1 |  | 4 | my ($v, $m) = @_; | 
| 225 | 1 |  |  |  |  | 19 | my @v = split '', unpack "b*", $v; | 
| 226 | 1 |  |  |  |  | 17 | my @m = split '', unpack "b*", $m; | 
| 227 | 1 |  |  |  |  | 3 | my $re = ''; | 
| 228 | 1 |  |  |  |  | 4 | for (0 .. $#m) { | 
| 229 | 64 | 100 |  |  |  | 101 | $re .= $m[$_] ? $v[$_] : '.' ; | 
| 230 |  |  |  |  |  |  | # If $mask = 1 than ($input && $mask) will be same as $input | 
| 231 |  |  |  |  |  |  | # If $mask = 0 than ($input && $mask) is always 0 | 
| 232 |  |  |  |  |  |  | # But $mask = 0 only makes sense if $value = 0 | 
| 233 |  |  |  |  |  |  | # So if $mask = 0 we ignore that bit of $input | 
| 234 |  |  |  |  |  |  | } | 
| 235 | 1 |  |  |  |  | 10 | return $re; | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | sub _escape_bytes { # used for debug output | 
| 239 | 0 |  |  | 0 |  |  | my $string = shift; | 
| 240 | 0 | 0 |  |  |  |  | if ($string =~ /[\x00-\x1F\x7F]/) { | 
| 241 |  |  |  |  |  |  | $string = join '', map { | 
| 242 | 0 |  |  |  |  |  | my $o = ord($_); | 
|  | 0 |  |  |  |  |  |  | 
| 243 | 0 | 0 |  |  |  |  | ($o < 32)   ? '^' . chr($o + 64) : | 
|  |  | 0 |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | ($o == 127) ? '^?'               : $_ ; | 
| 245 |  |  |  |  |  |  | } split '', $string; | 
| 246 |  |  |  |  |  |  | } | 
| 247 | 0 |  |  |  |  |  | return $string; | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | 1; | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | __END__ |