| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package MIME::Detect::Type; | 
| 2 | 4 |  |  | 4 |  | 31 | use strict; | 
|  | 4 |  |  |  |  | 9 |  | 
|  | 4 |  |  |  |  | 137 |  | 
| 3 | 4 |  |  | 4 |  | 23 | use Moo 2; | 
|  | 4 |  |  |  |  | 108 |  | 
|  | 4 |  |  |  |  | 36 |  | 
| 4 | 4 |  |  | 4 |  | 1504 | use Filter::signatures; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 38 |  | 
| 5 | 4 |  |  | 4 |  | 125 | use feature 'signatures'; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 385 |  | 
| 6 | 4 |  |  | 4 |  | 35 | no warnings 'experimental::signatures'; | 
|  | 4 |  |  |  |  | 21 |  | 
|  | 4 |  |  |  |  | 4984 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | our $VERSION = '0.10'; | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | =head1 NAME | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | MIME::Detect::Type - the type of a file | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | my $type = $mime->mime_type('/usr/bin/perl'); | 
| 17 |  |  |  |  |  |  | print $type->mime_type; | 
| 18 |  |  |  |  |  |  | print $type->comment; | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | =head1 METHODS | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | =cut | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | =head2 C<< $type->aliases >> | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | Reference to the aliases of this type | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | =cut | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | has 'aliases' => ( | 
| 31 |  |  |  |  |  |  | is => 'ro', | 
| 32 |  |  |  |  |  |  | default => sub {[]}, | 
| 33 |  |  |  |  |  |  | ); | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | =head2 C<< $type->comment >> | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | Array reference of the type description in various languages | 
| 38 |  |  |  |  |  |  | (currently unused) | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | =cut | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | has 'comment' => ( | 
| 43 |  |  |  |  |  |  | is => 'ro', | 
| 44 |  |  |  |  |  |  | ); | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | =head2 C<< $type->mime_type >> | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | print "Content-Type: " . $type->mime_type . "\r\n"; | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | String of the content type | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | =cut | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | has 'mime_type' => ( | 
| 55 |  |  |  |  |  |  | is => 'ro', | 
| 56 |  |  |  |  |  |  | ); | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | =head2 C<< $type->globs >> | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | print $_ for @{ $type->globs }; | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | Arrayref of the wildcard globs of this type | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | =cut | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | has 'globs' => ( | 
| 67 |  |  |  |  |  |  | is => 'ro', | 
| 68 |  |  |  |  |  |  | default => sub {[]}, | 
| 69 |  |  |  |  |  |  | ); | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 2322 |  |  | 2322 |  | 4096 | sub _get_extension( $e=undef ) { | 
|  | 2322 |  |  |  |  | 2965 |  | 
|  | 2322 |  |  |  |  | 2687 |  | 
| 72 | 2322 | 100 |  |  |  | 4502 | if( defined $e ) { $e =~ s!^\*\.!! }; | 
|  | 6 |  |  |  |  | 26 |  | 
| 73 | 2322 |  |  |  |  | 6871 | $e | 
| 74 |  |  |  |  |  |  | } | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 3078 |  |  | 3078 |  | 3689 | sub _globmatch( $target, $glob ) { | 
|  | 3078 |  |  |  |  | 4344 |  | 
|  | 3078 |  |  |  |  | 4566 |  | 
|  | 3078 |  |  |  |  | 3507 |  | 
| 77 | 3078 |  |  |  |  | 13673 | $glob =~ s!([.+\\])!\\$1!g; | 
| 78 | 3078 |  |  |  |  | 9337 | $glob =~ s!\*!.*!g; | 
| 79 | 3078 |  |  |  |  | 36692 | $target =~ /\A$glob\z/; | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | =head2 C<< $type->extension >> | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | print $type->extension; # pl | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | Returns the default extension for this mime type, without a separating | 
| 87 |  |  |  |  |  |  | dot or the glob. | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | =cut | 
| 90 |  |  |  |  |  |  |  | 
| 91 | 0 |  |  | 0 | 1 | 0 | sub extension($self) { | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 92 | 0 |  |  |  |  | 0 | _get_extension( $self->globs->[0] ); | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | =head2 C<< $type->valid_extension( $fn ) >> | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | print "$fn has the wrong extension" | 
| 98 |  |  |  |  |  |  | unless $type->valid_extension( $fn ); | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | Returns whether C<$fn> matches one of the extensions | 
| 101 |  |  |  |  |  |  | as specified in C. If there is a match, the extension is returned | 
| 102 |  |  |  |  |  |  | without dot. | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =cut | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 2322 |  |  | 2322 | 1 | 3148 | sub valid_extension( $self, $fn ) { | 
|  | 2322 |  |  |  |  | 2990 |  | 
|  | 2322 |  |  |  |  | 3073 |  | 
|  | 2322 |  |  |  |  | 2802 |  | 
| 107 |  |  |  |  |  |  | _get_extension((grep { | 
| 108 | 3078 |  |  |  |  | 5086 | _globmatch( $fn, $_ ) | 
| 109 | 2322 |  |  |  |  | 2918 | } @{ $self->globs })[0]) | 
|  | 2322 |  |  |  |  | 7051 |  | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | =head2 C<< $type->priority >> | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | print $type->priority; | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | Priority of this type. Types with higher priority | 
| 117 |  |  |  |  |  |  | get tried first when trying to recognize a file type. | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | The default priority is 50. | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | =cut | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | has 'priority' => ( | 
| 124 |  |  |  |  |  |  | is => 'ro', | 
| 125 |  |  |  |  |  |  | default => 50, | 
| 126 |  |  |  |  |  |  | ); | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | has 'rules' => ( | 
| 129 |  |  |  |  |  |  | is => 'ro', | 
| 130 |  |  |  |  |  |  | default => sub { [] }, | 
| 131 |  |  |  |  |  |  | ); | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | =head2 C<< $type->superclass >> | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | my $sc = $type->superclass; | 
| 136 |  |  |  |  |  |  | print $sc->mime_type; | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | The notional superclass of this file type. Note that superclasses | 
| 139 |  |  |  |  |  |  | don't necessarily match the same magic numbers. | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =cut | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | has 'superclass' => ( | 
| 144 |  |  |  |  |  |  | is => 'rw', | 
| 145 |  |  |  |  |  |  | default => undef, | 
| 146 |  |  |  |  |  |  | ); | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 243 |  |  | 243 | 0 | 303 | sub parse_num( $num ) { | 
|  | 243 |  |  |  |  | 347 |  | 
|  | 243 |  |  |  |  | 286 |  | 
| 149 | 243 | 100 |  |  |  | 1442 | $num =~ /^0x/ and return hex $num; | 
| 150 | 30 |  |  |  |  | 163 | return 0+$num | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 | 2321 |  |  | 2321 | 0 | 55760 | sub BUILD($self, $args) { | 
|  | 2321 |  |  |  |  | 3299 |  | 
|  | 2321 |  |  |  |  | 2914 |  | 
|  | 2321 |  |  |  |  | 2612 |  | 
| 154 |  |  |  |  |  |  | # Preparse the rules here: | 
| 155 | 2321 |  |  |  |  | 3028 | for my $rule (@{ $args->{rules} }) { | 
|  | 2321 |  |  |  |  | 8957 |  | 
| 156 | 2249 |  |  |  |  | 3451 | my $value = $rule->{value}; | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | # This should go into the part reading the XML, not into the part | 
| 159 |  |  |  |  |  |  | # evaluating the rules | 
| 160 | 2249 | 100 | 66 |  |  | 9098 | if( ref $rule eq 'HASH' and $rule->{type} eq 'string' ) { | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 100 | 66 |  |  |  |  | 
|  |  | 50 | 33 |  |  |  |  | 
| 161 | 2006 |  |  |  |  | 6010 | my %replace = ( | 
| 162 |  |  |  |  |  |  | 'n' => "\n", | 
| 163 |  |  |  |  |  |  | 'r' => "\r", | 
| 164 |  |  |  |  |  |  | 't' => "\t", | 
| 165 |  |  |  |  |  |  | "\\" => "\\", | 
| 166 |  |  |  |  |  |  | ); | 
| 167 | 2006 | 50 |  |  |  | 6250 | $value =~ s{\\([nrt\\]|([0-7][0-7][0-7])|x([0-9a-fA-F][0-9a-fA-F]))} | 
|  | 1571 | 100 |  |  |  | 8725 |  | 
|  |  | 100 |  |  |  |  |  | 
| 168 |  |  |  |  |  |  | { $replace{$1} ? $replace{$1} | 
| 169 |  |  |  |  |  |  | : $2 ? chr(oct($2)) | 
| 170 |  |  |  |  |  |  | : $3 ? chr(hex($3)) | 
| 171 |  |  |  |  |  |  | : $1 | 
| 172 |  |  |  |  |  |  | }xge; | 
| 173 |  |  |  |  |  |  |  | 
| 174 | 63 |  |  |  |  | 146 | } elsif( ref $rule eq 'HASH' and $rule->{type} eq 'little32' ) { | 
| 175 |  |  |  |  |  |  | $value = pack 'V', parse_num($rule->{value}); | 
| 176 |  |  |  |  |  |  |  | 
| 177 | 24 |  |  |  |  | 66 | } elsif( ref $rule eq 'HASH' and $rule->{type} eq 'little16' ) { | 
| 178 |  |  |  |  |  |  | $value = pack 'v', parse_num($rule->{value}); | 
| 179 |  |  |  |  |  |  |  | 
| 180 | 96 |  |  |  |  | 218 | } elsif( ref $rule eq 'HASH' and $rule->{type} eq 'big32' ) { | 
| 181 |  |  |  |  |  |  | $value = pack 'N', parse_num($rule->{value}); | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 27 |  |  |  |  | 74 | } elsif( ref $rule eq 'HASH' and $rule->{type} eq 'big16' ) { | 
| 184 |  |  |  |  |  |  | $value = pack 'n', parse_num($rule->{value}); | 
| 185 |  |  |  |  |  |  |  | 
| 186 | 12 |  |  |  |  | 29 | } elsif( ref $rule eq 'HASH' and $rule->{type} eq 'host16' ) { | 
| 187 |  |  |  |  |  |  | $value = pack 'S', parse_num($rule->{value}); | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 9 |  |  |  |  | 34 | } elsif( ref $rule eq 'HASH' and $rule->{type} eq 'host32' ) { | 
| 190 |  |  |  |  |  |  | $value = pack 'L', parse_num($rule->{value}); | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 12 |  |  |  |  | 31 | } elsif( ref $rule eq 'HASH' and $rule->{type} eq 'byte' ) { | 
| 193 |  |  |  |  |  |  | $value = pack 'c', parse_num($rule->{value}); | 
| 194 |  |  |  |  |  |  |  | 
| 195 | 0 |  |  |  |  | 0 | } else { | 
| 196 |  |  |  |  |  |  | die "Unknown rule type '$rule->{type}'"; | 
| 197 |  |  |  |  |  |  | }; | 
| 198 | 2249 |  |  |  |  | 3558 |  | 
| 199 | 2249 |  |  |  |  | 8433 | $rule->{type} = 'string'; | 
| 200 |  |  |  |  |  |  | $rule->{value} = $value; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  | } | 
| 203 | 0 |  |  | 0 | 0 | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 204 | 0 |  |  |  |  | 0 | sub compile($self,$fragment) { | 
| 205 |  |  |  |  |  |  | die "No direct-to-Perl compilation implemented yet."; | 
| 206 |  |  |  |  |  |  | } | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | =head2 C<< $type->matches $buffer >> | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | my $buf = "PK\003\004"; # first four bytes of file | 
| 211 |  |  |  |  |  |  | if( $type->matches( $buf ) { | 
| 212 |  |  |  |  |  |  | print "Looks like a " . $type->mime_type . " file"; | 
| 213 |  |  |  |  |  |  | }; | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | =cut | 
| 216 | 1552 |  |  | 1552 | 1 | 7067 |  | 
|  | 1552 |  |  |  |  | 1742 |  | 
|  | 1552 |  |  |  |  | 1607 |  | 
|  | 1552 |  |  |  |  | 2680 |  | 
|  | 1552 |  |  |  |  | 1666 |  | 
| 217 | 1552 |  |  |  |  | 2417 | sub matches($self, $buffer, $rules = $self->rules) { | 
| 218 |  |  |  |  |  |  | my @rules = @$rules; | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | # Superclasses are for information only | 
| 221 |  |  |  |  |  |  | #if( $self->superclass and $self->superclass->mime_type !~ m!^text/!) { | 
| 222 |  |  |  |  |  |  | #    return if ! $self->superclass->matches($buffer); | 
| 223 |  |  |  |  |  |  | #}; | 
| 224 | 1552 | 100 |  |  |  | 2461 |  | 
| 225 |  |  |  |  |  |  | if( !ref $buffer) { | 
| 226 | 4 |  |  |  |  | 9 | # Upgrade to an in-memory filehandle | 
| 227 | 4 | 50 |  | 1 |  | 100 | my $_buffer = $buffer; | 
|  | 1 |  |  |  |  | 11 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 7 |  | 
| 228 |  |  |  |  |  |  | open my $fh, '<', \$_buffer | 
| 229 | 4 |  |  |  |  | 966 | or die "Couldn't open in-memory handle!"; | 
| 230 | 4 |  |  |  |  | 108 | binmode $fh; | 
| 231 |  |  |  |  |  |  | $buffer = MIME::Detect::Buffer->new(fh => $fh); | 
| 232 |  |  |  |  |  |  | }; | 
| 233 |  |  |  |  |  |  |  | 
| 234 | 1552 | 100 |  |  |  | 4531 | # Hardcoded rule for plain text detection... | 
| 235 | 2 |  |  |  |  | 9 | if( $self->mime_type eq 'text/plain') { | 
| 236 | 2 |  |  |  |  | 18 | my $buf = $buffer->request(0,256); | 
| 237 |  |  |  |  |  |  | return $buf !~ /[\x00-\x08\x0b\x0c\x0e-\x1f]/; | 
| 238 |  |  |  |  |  |  | }; | 
| 239 | 1550 |  |  |  |  | 1681 |  | 
| 240 | 1550 |  |  |  |  | 2030 | my $matches; | 
| 241 |  |  |  |  |  |  | for my $rule (@rules) { | 
| 242 | 1499 |  |  |  |  | 2562 |  | 
| 243 |  |  |  |  |  |  | my $value = $rule->{value}; | 
| 244 | 1499 |  |  |  |  | 3155 |  | 
| 245 |  |  |  |  |  |  | my $buf = $buffer->request($rule->{offset}, length $value); | 
| 246 |  |  |  |  |  |  | #use Data::Dumper; | 
| 247 | 4 |  |  | 4 |  | 35 | #$Data::Dumper::Useqq = 1; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 827 |  | 
| 248 | 1499 | 100 |  |  |  | 3199 | no warnings ('uninitialized', 'substr'); | 
| 249 |  |  |  |  |  |  | if( $rule->{offset} =~ m!^(\d+):(\d+)$! ) { | 
| 250 |  |  |  |  |  |  | #warn sprintf "%s: index match %d:%d for %s", $self->mime_type, $1,$2, Dumper $value; | 
| 251 | 227 |  | 66 |  |  | 1139 | #warn Dumper substr( $buf, 0, ($2-$1)+length($value)); | 
| 252 |  |  |  |  |  |  | $matches = $matches || 1+index( substr( $buf, 0, ($2-$1)+length($value)), $value ); | 
| 253 |  |  |  |  |  |  | } else { | 
| 254 |  |  |  |  |  |  | #warn sprintf "%s: substring match %d for %s", $self->mime_type, $rule->{offset}, Dumper $value; | 
| 255 | 1272 |  | 66 |  |  | 3286 | #warn Dumper substr( $buf, $rule->{offset}, length($value)); | 
| 256 |  |  |  |  |  |  | $matches = $matches || substr( $buf, 0, length($value)) eq $value; | 
| 257 | 1499 | 100 | 33 |  |  | 2708 | }; | 
| 258 |  |  |  |  |  |  | $matches = $matches && $self->matches( $buffer, $rule->{and} ) if $rule->{and}; | 
| 259 | 1499 | 100 |  |  |  | 2825 |  | 
| 260 |  |  |  |  |  |  | last if $matches; | 
| 261 | 1550 |  |  |  |  | 4067 | }; | 
| 262 |  |  |  |  |  |  | !!$matches | 
| 263 |  |  |  |  |  |  | } | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | 1; | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | =head1 REPOSITORY | 
| 268 |  |  |  |  |  |  |  | 
| 269 |  |  |  |  |  |  | The public repository of this module is | 
| 270 |  |  |  |  |  |  | L. | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | =head1 SUPPORT | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | The public support forum of this module is | 
| 275 |  |  |  |  |  |  | L. | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | =head1 BUG TRACKER | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | Please report bugs in this module via the RT CPAN bug queue at | 
| 280 |  |  |  |  |  |  | L | 
| 281 |  |  |  |  |  |  | or via mail to L. | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | =head1 AUTHOR | 
| 284 |  |  |  |  |  |  |  | 
| 285 |  |  |  |  |  |  | Max Maischein C | 
| 286 |  |  |  |  |  |  |  | 
| 287 |  |  |  |  |  |  | =head1 COPYRIGHT (c) | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | Copyright 2015-2018 by Max Maischein C. | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | =head1 LICENSE | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | This module is released under the same terms as Perl itself. | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | =cut |