File Coverage

blib/lib/File/MimeInfo.pm
Criterion Covered Total %
statement 195 216 90.2
branch 119 190 62.6
condition 11 18 61.1
subroutine 24 24 100.0
pod 13 13 100.0
total 362 461 78.5


line stmt bran cond sub pod time code
1             package File::MimeInfo;
2              
3 9     9   1521772 use strict;
  9         23  
  9         422  
4 9     9   59 use warnings;
  9         16  
  9         542  
5 9     9   60 use Carp;
  9         16  
  9         703  
6 9     9   49 use Fcntl 'SEEK_SET';
  9         23  
  9         548  
7 9     9   51 use File::Spec;
  9         15  
  9         367  
8 9     9   2145 use File::BaseDir qw/data_files/;
  9         6487  
  9         11021  
9             require Exporter;
10              
11             our @ISA = qw(Exporter);
12             our @EXPORT = qw(mimetype);
13             our @EXPORT_OK = qw(extensions describe globs inodetype mimetype_canon mimetype_isa);
14             our $VERSION = '0.37';
15             our $DEBUG;
16              
17             our ($_hashed, $_hashed_aliases, $_hashed_subclasses, $_has_mimeinfo_database);
18             our (@globs, %literal, %extension, %mime2ext, %aliases, %subclasses);
19             our ($LANG, @DIRS);
20             # @globs = [ [ 'glob', qr//, $mime_string ], ... ]
21             # %literal contains literal matches
22             # %extension contains extensions (globs matching /^\*(\.\w)+$/ )
23             # %mime2ext is used for looking up extension by mime type
24             # %aliases contains the aliases table
25             # %subclasses contains the subclasses table
26             # $LANG can be used to set a default language for the comments
27             # @DIRS can be used to specify custom database directories
28              
29 1     1 1 966 sub new { bless \$VERSION, shift } # what else is there to bless ?
30              
31             sub mimetype {
32 21     21 1 9426 my $file = pop;
33 21 50       64 croak 'subroutine "mimetype" needs a filename as argument' unless defined $file;
34             return
35 21   100     43 inodetype($file) ||
36             globs($file) ||
37             default($file);
38             }
39              
40             sub inodetype {
41 29     29 1 42 my $file = pop;
42 29 50       75 print STDERR "> Checking inode type\n" if $DEBUG;
43 29 100       771 lstat $file or return undef;
44 16 100       91 return undef if -f _;
45 2 0       24 my $t = (-l $file) ? 'inode/symlink' : # Win32 does not like '_' here
    0          
    0          
    0          
    50          
    100          
46             (-d _) ? 'inode/directory' :
47             (-p _) ? 'inode/fifo' :
48             (-c _) ? 'inode/chardevice' :
49             (-b _) ? 'inode/blockdevice' :
50             (-S _) ? 'inode/socket' : '' ;
51 2 100       8 if ($t eq 'inode/directory') { # compare devices to detect mount-points
52 1         4 my $dev = (stat _)[0]; # device of the node under investigation
53 1         43 $file = File::Spec->rel2abs($file); # get full path
54 1         12 my @dirs = File::Spec->splitdir($file);
55 1         11 $file = File::Spec->catfile(@dirs); # removes trailing '/' or equivalent
56 1 50       20 return $t if -l $file; # parent can be on other dev for links
57 1         16 pop @dirs;
58 1         7 my $dir = File::Spec->catdir(@dirs); # parent dir
59 1 50       15 $t = 'inode/mount-point' unless (stat $dir)[0] == $dev; # compare devices
60 1         10 return $t;
61             }
62 1 50       14 else { return $t ? $t : undef }
63             }
64              
65             sub globs {
66 25     25 1 50 my $file = pop;
67 25 50       50 croak 'subroutine "globs" needs a filename as argument' unless defined $file;
68 25 100       50 rehash() unless $_hashed;
69 25         400 (undef, undef, $file) = File::Spec->splitpath($file); # remove path
70 25 50       70 print STDERR "> Checking globs for basename '$file'\n" if $DEBUG;
71              
72 25 100       70 return $literal{$file} if exists $literal{$file};
73              
74 24 100       84 if ($file =~ /\.(\w+(\.\w+)*)$/) {
75 11         35 my @ext = split /\./, $1;
76 11         33 while (@ext) {
77 16         35 my $ext = join('.', @ext);
78 16 50       28 print STDERR "> Checking for extension '.$ext'\n" if $DEBUG;
79 16 50       30 carp "WARNING: wantarray behaviour of globs() will change in the future.\n" if wantarray;
80             return wantarray
81             ? ($extension{$ext}, $ext)
82             : $extension{$ext}
83 16 50       115 if exists $extension{$ext};
    100          
84 6         15 shift @ext;
85             }
86             }
87              
88 14         31 for (@globs) {
89 14 100       122 next unless $file =~ $_->[1];
90 2 50       7 print STDERR "> This file name matches \"$_->[0]\"\n" if $DEBUG;
91 2         19 return $_->[2];
92             }
93              
94 12 100       43 return globs(lc $file) if $file =~ /[A-Z]/; # recurs
95 11         43 return undef;
96             }
97              
98             sub default {
99 8     8 1 14 my $file = pop;
100 8 50       41 croak 'subroutine "default" needs a filename as argument' unless defined $file;
101              
102 8         12 my $line;
103 8 0       28 unless (ref $file) {
    50          
104 8 100       122 return undef unless -f $file;
105 7 50       19 print STDERR "> File exists, trying default method\n" if $DEBUG;
106 7 100       70 return 'text/plain' if -z $file;
107              
108 6 50       178 open FILE, '<', $file or return undef;
109 6 50       36 binmode FILE, ':utf8' unless $] < 5.008;
110 6         153 read FILE, $line, 32;
111 6         61 close FILE;
112             }
113 0         0 elsif (ref $file eq 'Path::Tiny') {
114 0 0       0 return undef unless $file->exists;
115 0 0       0 print STDERR "> File is Path::Tiny object and exists, "
116             . "trying default method\n" if $DEBUG;
117 0 0       0 open my $fh, '<', $file or return undef;
118 0 0       0 binmode $fh, ':utf8' unless $] < 5.008;
119 0         0 read $fh, $line, 32;
120 0         0 close $fh;
121             }
122             else {
123 0 0       0 print STDERR "> Trying default method on object\n" if $DEBUG;
124              
125 0         0 $file->seek(0, SEEK_SET);
126 0         0 $file->read($line, 32);
127             }
128              
129             {
130 9     9   99 no warnings; # warnings can be thrown when input not ascii
  9         32  
  9         678  
  6         10  
131 6 100 66     47 if ($] < 5.008 or ! utf8::valid($line)) {
132 9     9   1052 use bytes; # avoid invalid utf8 chars
  9         1122  
  9         67  
133 2         14 $line =~ s/\s//g; # \m, \n and \t are also control chars
134 2 50       11 return 'text/plain' unless $line =~ /[\x00-\x1F\x7F]/;
135             }
136             else {
137             # use perl to do something intelligent for ascii & utf8
138 4 100       46 return 'text/plain' unless $line =~ /[^[:print:]\s]/;
139             }
140             }
141 3 50       8 print STDERR "> First 10 bytes of the file contain control chars\n" if $DEBUG;
142 3         23 return 'application/octet-stream';
143             }
144              
145             sub rehash {
146 5     5 1 547 (@globs, %literal, %extension, %mime2ext) = (); # clear all data
147 5         8 local $_; # limit scope of $_ ... :S
148             my @globfiles = @DIRS
149 5 50       31 ? ( grep {-e $_ && -r $_} map "$_/globs", @DIRS )
  1 100       23  
150             : ( reverse data_files('mime/globs') );
151 5 100       509 if (@globfiles) {
152 3         4 $_has_mimeinfo_database = 1;
153             } else {
154 2         492 carp "WARNING: You don't seem to have a mime-info database. " .
155             "The shared-mime-info package is available from http://freedesktop.org/";
156             }
157 5         17 my @done;
158 5         12 for my $file (@globfiles) {
159 5 100       11 next if grep {$file eq $_} @done;
  2         5  
160 3         25 _hash_globs($file);
161 3         7 push @done, $file;
162             }
163 5         16 $_hashed = 1;
164             }
165              
166             sub _hash_globs {
167 3     3   4 my $file = shift;
168 3 50       74 open GLOB, '<', $file or croak "Could not open file '$file' for reading" ;
169 3 50       15 binmode GLOB, ':utf8' unless $] < 5.008;
170 3         7 my ($string, $glob);
171 3         0 my %seen_extension;
172 3         69 while () {
173 33 100 66     150 next if /^\s*#/ or ! /\S/; # skip comments and empty lines
174 30         50 chomp;
175 30         85 ($string, $glob) = split /:/, $_, 2;
176 30 100       128 unless ($glob =~ /[\?\*\[]/) { $literal{$glob} = $string }
  3 100       14  
177 0         0 elsif ($glob =~ /^\*\.(\w+(\.\w+)*)$/) {
178 24 100       97 next if $seen_extension{$1}++;
179 21 50 33     48 if (exists $extension{$1} && $extension{$1} ne $string) {
180 0         0 my $old = $extension{$1};
181 0 0       0 if (defined $mime2ext{$old}) {
182 0         0 @{$mime2ext{$old}} = grep { $_ ne $1 } @{$mime2ext{$old}};
  0         0  
  0         0  
  0         0  
183 0 0       0 delete $mime2ext{$old} unless @{$mime2ext{$old}};
  0         0  
184             }
185             }
186 21         33 $extension{$1} = $string;
187 21 100       55 $mime2ext{$string} = [] if !defined($mime2ext{$string});
188 21         55 push @{$mime2ext{$string}}, $1
189 21 50       20 unless grep { $_ eq $1 } @{$mime2ext{$string}};
  3         8  
  21         40  
190 3         6 } else { unshift @globs, [$glob, _glob_to_regexp($glob), $string] }
191             }
192 3 50       40 close GLOB or croak "Could not open file '$file' for reading" ;
193             }
194              
195             sub _glob_to_regexp {
196 7     7   9734 my $glob = shift;
197 7         24 $glob =~ s/\./\\./g;
198 7         49 $glob =~ s/([?*])/.$1/g;
199 7         15 $glob =~ s/([^\w\/\\\.\?\*\[\]])/\\$1/g;
200 7         167 qr/^$glob$/;
201             }
202              
203             sub has_mimeinfo_database {
204 2 50   2 1 475027 rehash() if (!$_hashed);
205 2         14 return $_has_mimeinfo_database;
206             }
207              
208             sub extensions {
209 2     2 1 603 my $mimet = mimetype_canon(pop @_);
210 2 100       7 rehash() unless $_hashed;
211 2 50       3 my $ref = $mime2ext{$mimet} if exists $mime2ext{$mimet};
212 2 50       6 return $ref ? @{$ref} : undef if wantarray;
  1 100       6  
213 1 50       2 return $ref ? @{$ref}[0] : '';
  1         7  
214             }
215              
216             sub describe {
217 2 50   2 1 924 shift if ref $_[0];
218 2         6 my ($mt, $lang) = @_;
219 2 50       7 croak 'subroutine "describe" needs a mimetype as argument' unless $mt;
220 2         8 $mt = mimetype_canon($mt);
221 2 50       18 $lang = $LANG unless defined $lang;
222 2 100       7 my $att = $lang ? qq{xml:lang="$lang"} : '';
223 2         3 my $desc;
224             my @descfiles = @DIRS
225 2 0       16 ? ( grep {-e $_ && -r $_} map "$_/$mt.xml", @DIRS )
  0 50       0  
226             : ( reverse data_files('mime', split '/', "$mt.xml") ) ;
227 2         222 for my $file (@descfiles) {
228 2         5 $desc = ''; # if a file was found, return at least empty string
229 2 50       65 open XML, '<', $file or croak "Could not open file '$file' for reading";
230 2 50       21 binmode XML, ':utf8' unless $] < 5.008;
231 2         67 while () {
232 20 100       241 next unless m!(.*?)!;
233 2         8 $desc = $1;
234 2         5 last;
235             }
236 2 50       70 close XML or croak "Could not open file '$file' for reading";
237 2 50       8 last if $desc;
238             }
239 2         16 return $desc;
240             }
241              
242             sub mimetype_canon {
243 16     16 1 753 my $mimet = pop;
244 16 50       44 croak 'mimetype_canon needs argument' unless defined $mimet;
245 16 100       41 rehash_aliases() unless $_hashed_aliases;
246 16 100       72 return exists($aliases{$mimet}) ? $aliases{$mimet} : $mimet;
247             }
248              
249             sub rehash_aliases {
250 2     2 1 7 %aliases = _read_map_files('aliases');
251 2         5 $_hashed_aliases++;
252             }
253              
254             sub _read_map_files {
255 3     3   6 my ($name, $list) = @_;
256             my @files = @DIRS
257 3 0       19 ? ( grep {-e $_ && -r $_} map "$_/$name", @DIRS )
  0 50       0  
258             : ( reverse data_files("mime/$name") );
259 3         458 my (@done, %map);
260 3         11 for my $file (@files) {
261 6 100       15 next if grep {$_ eq $file} @done;
  3         13  
262 3 50       100 open MAP, '<', $file or croak "Could not open file '$file' for reading";
263 3 50       19 binmode MAP, ':utf8' unless $] < 5.008;
264 3         117 while (my $line = ) {
265 6 50       31 next unless $line =~ m/\S/; # skip empty lines
266 6 50       27 next if $line =~ m/^\s*#/; # skip comment lines
267 6         13 chomp $line;
268 6         20 my ($k, $v) = split m/\s+/, $line, 2;
269 6 100       16 if ($list) {
270 2 100       10 $map{$k} = [] unless $map{$k};
271 2         3 push @{$map{$k}}, $v;
  2         18  
272             }
273 4         27 else { $map{$k} = $v }
274             }
275 3         60 close MAP;
276 3         12 push @done, $file;
277             }
278 3         24 return %map;
279             }
280              
281             sub mimetype_isa {
282 5   33 5 1 23 my $parent = pop || croak 'mimetype_isa needs argument';
283 5         10 my $mimet = pop;
284 5 100 66     27 if (ref $mimet or ! defined $mimet) {
285 2         5 $mimet = mimetype_canon($parent);
286 2         5 undef $parent;
287             }
288             else {
289 3         7 $mimet = mimetype_canon($mimet);
290 3         6 $parent = mimetype_canon($parent);
291             }
292 5 100       17 rehash_subclasses() unless $_hashed_subclasses;
293              
294 5         8 my @subc;
295 5 100       14 push @subc, 'inode/directory' if $mimet eq 'inode/mount-point';
296 5 100       13 push @subc, @{$subclasses{$mimet}} if exists $subclasses{$mimet};
  2         8  
297 5 100       19 push @subc, 'text/plain' if $mimet =~ m#^text/#;
298 5 100       16 push @subc, 'application/octet-stream' unless $mimet =~ m#^inode/#;
299              
300 5 100       28 return $parent ? scalar(grep {$_ eq $parent} @subc) : @subc;
  6         27  
301             }
302              
303             sub rehash_subclasses {
304 1     1 1 4 %subclasses = _read_map_files('subclasses', 'LIST');
305 1         4 $_hashed_subclasses++;
306             }
307              
308             1;
309              
310             __END__