File Coverage

blib/lib/Apache2/SSI/File/Type.pm
Criterion Covered Total %
statement 165 539 30.6
branch 58 336 17.2
condition 14 88 15.9
subroutine 20 36 55.5
pod 11 20 55.0
total 268 1019 26.3


line stmt bran cond sub pod time code
1             ##----------------------------------------------------------------------------
2             ## Apache2 Server Side Include Parser - ~/lib/Apache2/SSI/File/Type.pm
3             ## Version v0.1.0
4             ## Copyright(c) 2021 DEGUEST Pte. Ltd.
5             ## Author: Jacques Deguest <jack@deguest.jp>
6             ## Created 2021/03/27
7             ## Modified 2021/03/29
8             ## All rights reserved
9             ##
10             ## This program is free software; you can redistribute it and/or modify it
11             ## under the same terms as Perl itself.
12             ##----------------------------------------------------------------------------
13             package Apache2::SSI::File::Type;
14             BEGIN
15             {
16 18     18   82993 use strict;
  18         39  
  18         570  
17 18     18   85 use warnings;
  18         29  
  18         479  
18 18     18   132 use warnings::register;
  18         33  
  18         1791  
19 18     18   488 use parent qw( Module::Generic );
  18         281  
  18         84  
20 18     18   9514162 use Digest::MD5;
  18         28  
  18         819  
21 18     18   94 use File::Basename ();
  18         29  
  18         230  
22 18     18   79 use File::Spec ();
  18         26  
  18         268  
23 18     18   81 use IO::File;
  18         26  
  18         3230  
24 18     18   114 use Nice::Try;
  18         26  
  18         147  
25 18     18   31766469 use Scalar::Util ();
  18         50  
  18         475  
26 18     18   2787 use URI::file;
  18         25649  
  18         3629  
27 18     18   63 our $VERSION = 'v0.1.0';
28             ## Translation of type in magic file to unpack template and byte count
29 18         397 our $TEMPLATES =
30             {
31             'byte' => [ 'c', 1 ],
32             'ubyte' => [ 'C', 1 ],
33             'char' => [ 'c', 1 ],
34             'uchar' => [ 'C', 1 ],
35             'short' => [ 's', 2 ],
36             'ushort' => [ 'S', 2 ],
37             'long' => [ 'l', 4 ],
38             'ulong' => [ 'L', 4 ],
39             'date' => [ 'l', 4 ],
40             'ubeshort' => [ 'n', 2 ],
41             'beshort' => [ [ 'n', 'S', 's' ], 2 ],
42             'ubelong' => [ 'N', 4 ],
43             'belong' => [ [ 'N', 'I', 'i' ], 4 ],
44             'bedate' => [ 'N', 4 ],
45             'uleshort' => [ 'v', 2 ],
46             'leshort' => [ [ 'v', 'S', 's' ], 2 ],
47             'ulelong' => [ 'V', 4 ],
48             'lelong' => [ [ 'V', 'I', 'i' ], 4 ],
49             'ledate' => [ 'V', 4 ],
50             'string' => undef(),
51             };
52            
53             ## For letter escapes in magic file
54 18         105 our $ESC =
55             {
56             'n' => "\n",
57             'r' => "\r",
58             'b' => "\b",
59             't' => "\t",
60             'f' => "\f"
61             };
62             ## Cache
63 18         30 our $MAGIC_DATA = [];
64             ## Keep a record of the source data file, if any, so we can re-use this cached data instead of re-reading from it
65 18         90418 our $MAGIC_DATA_SOURCE = '';
66             };
67              
68             sub init
69             {
70 2     2 1 214 my $self = shift( @_ );
71 2         4 my $file;
72 2 50       10 $file = shift( @_ ) if( @_ % 2 );
73 2         14 my $opts = $self->_get_args_as_hash( @_ );
74 2 50       24 $opts->{magic} = $file if( length( $file ) );
75 2         113 $self->{follow_links} = 1;
76 2         7 $self->{check_magic} = 0;
77             ## If there is an error or file is empty, it returns undef instead of application/octet-stream
78 2         4 $self->{error_returns_undef} = 0;
79             ## Default to returns text/plain. If not, it will return an empty string and leave the caller to set the default mime-type.
80 2         5 $self->{default_type} = 'text/plain';
81 2         6 $self->{_init_strict_use_sub} = 1;
82 2         13 $self->SUPER::init( @_ );
83 2         178 $self->{magic} = {};
84 2         8 $self->{magic_data} = [];
85             local $load_json_data = sub
86             {
87 2   50 2   9 my $json_file = shift( @_ ) || return;
88 2   50     24 my $io = IO::File->new( "<$json_file" ) ||
89             return( $self->error( "Unable to open our own json magic file \"$json_file\": $!" ) );
90 2         216 local $/;
91 2         203 my $buf = scalar( <$io> );
92 2         28 $io->close;
93 2         40 try
94 2         2 {
95 2         67 my $j = JSON->new->relaxed->allow_nonref;
96 2         1893 $MAGIC_DATA = $self->{magic_data} = $j->decode( $buf );
97 2         25 return( 1 );
98             }
99 2 50       17 catch( $e )
  0 50       0  
  2 50       8  
  2 0       4  
  2 50       6  
  2         3  
  2         4  
  2         4  
  2         10  
  0         0  
  2         6  
  0         0  
  2         10  
  2         5  
  2         6  
  2         10  
  0         0  
  0         0  
  0         0  
  0         0  
100 0         0 {
101 0         0 return( $self->error( "An error occured while trying to json decode ", length( $buf ), " bytes of json data: $e" ) );
102 0 0 33     0 }
  0 0 33     0  
  0 50       0  
  0 50       0  
  0         0  
  0         0  
  2         23  
  2         28  
103 2         17 };
104            
105 2 50 50     20 if( $opts->{magic} )
    50          
106             {
107 0         0 $file = $opts->{magic};
108 0         0 my $file_abs = URI::file->new_abs( $file )->file( $^O );
109 0         0 $self->message( 3, "Magic file \"$file\" ($file_abs) provided. slurping it." );
110 0 0 0     0 if( $file_abs eq $MAGIC_DATA_SOURCE && scalar( @$MAGIC_DATA ) )
111             {
112 0         0 $self->message( 3, "Data for magic file \"$file\" ($file_abs) is already loaded, re-using it." );
113 0         0 $self->{magic_data} = $MAGIC_DATA;
114             }
115             else
116             {
117 0         0 my $checksum = Digest::MD5::md5_hex( $file_abs );
118 0         0 my $base = File::Basename::basename( $file );
119 0         0 my $path = File::Spec->catpath( File::Spec->tmpdir, $base . "_${checksum}.json" );
120 0 0 0     0 if( -e( $path ) && -s( $path ) )
121             {
122 0         0 $self->message( 3, "Found previous magic json data file \"$path\", loading it instead." );
123 0 0       0 $load_json_data->( $path ) || return;
124             }
125             else
126             {
127 0 0       0 return( $self->error( "Magic file provided \"$file\" does not exist." ) ) if( !-e( $file ) );
128 0   0     0 my $io = IO::File->new( "<$file" ) ||
129             return( $self->error( "Unable to open magic file provided \"$file\": $!" ) );
130 0         0 $io->binmode;
131 0         0 $self->parse_magic_file( $io );
132 0         0 $MAGIC_DATA = $self->{magic_data};
133 0         0 $io->close;
134 0         0 $self->message( 3, "Saving magic data to json cache file \"$path\"." );
135 0   0     0 my $json = $self->as_json || return;
136 0   0     0 my $fh = IO::File->new( ">$path" ) ||
137             return( $self->error( "Unable to write to magic cache json data file \"$path\": $!" ) );
138 0         0 $fh->binmode;
139 0         0 $fh->print( $json );
140 0         0 $fh->close;
141             }
142 0         0 $MAGIC_DATA_SOURCE = $file_abs;
143             }
144             }
145             elsif( $MAGIC_DATA && scalar( @$MAGIC_DATA ) )
146             {
147 0         0 $self->{magic_data} = $MAGIC_DATA;
148             }
149             else
150             {
151 2         4 $file = __FILE__;
152 2         13 $file =~ s/\.pm/\.json/;
153 2         19 $self->message( 3, "No magic file specified, reading our magic json data from \"$file\"" );
154 2 50       86 return( $self->error( "Apache2::SSI magic file \"$file\" does not exist." ) ) if( !-e( $file ) );
155 2 50       10 $load_json_data->( $file ) || return;
156             }
157            
158             ## From the BSD names.h, some tokens for hard-coded checks of different texts.
159             ## This isn't rocket science. It's prone to failure so these checks are only a last resort.
160             $self->{SPECIALS} =
161             {
162 2         32 'message/rfc822' =>
163             [
164             '^Received:',
165             '^>From ',
166             '^From ',
167             '^To: ',
168             '^Return-Path: ',
169             '^Cc: ',
170             '^X-Mailer: '
171             ],
172             'message/news' =>
173             [
174             '^Newsgroups: ',
175             '^Path: ',
176             '^X-Newsreader: '
177             ],
178             'text/html' =>
179             [
180             '<html[^>]*>',
181             '<HTML[^>]*>',
182             '<head[^>]*>',
183             '<HEAD[^>]*>',
184             '<body[^>]*>',
185             '<BODY[^>]*>',
186             '<title[^>]*>',
187             '<TITLE[^>]*>',
188             '<h1[^>]*>',
189             '<H1[^>]*>',
190             ],
191             'text/x-roff' =>
192             [
193             "^\\.SH",
194             "^\\.PP",
195             "^\\.TH",
196             "^\\.BR",
197             "^\\.SS",
198             "^\\.TP",
199             "^\\.IR",
200             ],
201             };
202              
203             $self->{FILE_EXTS} =
204             {
205 2         37 qr/\.gz$/ => 'application/x-gzip',
206             qr/\.bz2$/ => 'application/x-bzip2',
207             qr/\.Z$/ => 'application/x-compress',
208             qr/\.txt$/ => 'text/plain',
209             qr/\.html$/ => 'text/html',
210             qr/\.htm$/ => 'text/html',
211             };
212 2         41 return( $self );
213             }
214              
215             sub as_json
216             {
217 0     0 1 0 my $self = shift( @_ );
218 0         0 my $data = $self->{magic_data};
219 0         0 my $j = JSON->new->relaxed->allow_nonref;
220 0         0 my $json = $j->pretty->encode( $data );
221 0         0 return( $json );
222             }
223              
224             sub check
225             {
226 0     0 1 0 my $self = shift( @_ );
227 0         0 my $file = shift( @_ );
228 0         0 my $prev = $self->check_magic;
229 0         0 $self->check_magic( 1 );
230 0   0     0 my $io = IO::File->new( "<$file" ) || return( $self->error( "Unable to open magic file \"$file\": $!" ) );
231 0         0 $io->binmode;
232 0         0 $self->{magic}->{io} = $io;
233 0         0 my $data = [];
234 0         0 while( !$io->eof() )
235             {
236 0         0 $self->read_magic_entry( $data );
237             }
238 0         0 $io->close();
239 0         0 $self->dump( $data );
240 0         0 $self->check_magic( $prev );
241 0         0 return( $self );
242             }
243              
244 231     231 1 553 sub check_magic { return( shift->_set_get_boolean( 'check_magic', @_ ) ); }
245              
246             sub data
247             {
248 0     0 1 0 my $self = shift( @_ );
249 0         0 my $data = shift( @_ );
250 0         0 my $type = '';
251            
252 0 0       0 if( length( $data ) <= 0 )
253             {
254 0 0       0 return( $self->{default_type} ? 'application/octet-stream' : '' );
255             }
256            
257 0         0 $type = $self->with_magic( $data );
258            
259             ## 4) Check if it's text or binary.
260             ## If it's text, then do a bunch of searching for special tokens
261 0 0       0 if( !defined( $type ) )
262             {
263 0         0 $type = $self->with_data( $data );
264             }
265 0 0       0 if( !defined( $type ) )
266             {
267 0 0       0 $type = $self->{default_type} ? 'text/plain' : '';
268             }
269 0         0 return( $type );
270             }
271              
272 1     1 1 27 sub default_type { return( shift->_set_get_scalar( 'default_type', @_ ) ); }
273              
274             ## Recursively write the magic file to stderr.
275             ## Numbers are written in decimal.
276             sub dump
277             {
278 0     0 1 0 my $self = shift( @_ );
279 0   0     0 my $data = shift( @_ ) || $self->{magic_data};
280 0         0 my $depth = shift( @_ );
281 0 0       0 $data = [] unless( defined( $data ) );
282 0 0       0 $depth = 0 unless( defined( $depth ) );
283 0         0 our $err = IO::File->new;
284 0         0 $err->autoflush( 1 );
285 0 0       0 $err->fdopen( fileno( STDERR ), 'w' ) || return( $self->error( "Cannot write to STDERR: $!" ) );
286 0         0 $err->binmode;
287              
288 0         0 $self->messagef( 3, "There are %d entries in \$data", scalar( @$data ) );
289 0         0 foreach my $entry ( @$data )
290             {
291             ## Delayed evaluation.
292 0 0       0 $entry = $self->parse_magic_line( @$entry ) if( scalar( @$entry ) == 3 );
293 0 0       0 next if( !defined( $entry ) );
294 0         0 my( $offtype, $offset, $numbytes, $type, $mask, $op, $testval, $template, $message, $subtests ) = @$entry;
295 0         0 $err->print( '>' x $depth );
296 0 0       0 if( $offtype == 1 )
    0          
297             {
298 0         0 $offset->[2] =~ tr/c/b/;
299 0         0 $err->printf( "(%s.%s%s)", $offset->[0], $offset->[2], $offset->[3] );
300             }
301             elsif( $offtype == 2 )
302             {
303 0         0 $err->print( "&", $offset );
304             }
305             else
306             {
307             ## offtype == 0
308 0         0 $err->print( $offset );
309             }
310 0         0 $err->print( "\t", $type );
311 0 0       0 if( $mask )
312             {
313 0         0 $err->print( "&", $mask );
314             }
315 0         0 $err->print( "\t", $op, $testval, "\t", $message, "\n" );
316            
317 0 0       0 if( $subtests )
318             {
319 0         0 $self->dump( $subtests, $depth + 1 );
320             }
321             }
322             }
323              
324 0     0 1 0 sub error_returns_undef { return( shift->_set_get_boolean( 'error_returns_undef', @_ ) ); }
325              
326             sub file
327             {
328 3     3 1 3971 my $self = shift( @_ );
329             ## Iterate over each file explicitly so we can seek
330             my $file = shift( @_ ) || do
331 3   33     16 {
332             if( $self->{error_returns_undef} )
333             {
334             return( $self->error( "Missing file arguement. Usage: \$magic->file( \$some_file_name )" ) );
335             }
336             else
337             {
338             $desc .= "no file provided.";
339             return( "x-system/x-error; $desc" );
340             }
341             };
342             ## The description line. append info to this string
343 3         6 my $desc = '';
344 3         7 my $type = '';
345            
346             ## No need to let everybody know what is our server file system
347 3         201 my $base_file = File::Basename::basename( $file );
348             ## 0) Check existence
349 3 50       142 if( !-e( $file ) )
    50          
350             {
351 0 0       0 if( $self->{error_returns_undef} )
352             {
353 0         0 return( $self->error( "File $file does not exist." ) );
354             }
355             else
356             {
357 0         0 $desc .= "file '$file' does not exist.";
358 0         0 return( "x-system/x-error; $desc" );
359             }
360             }
361             ## 1) Check permission
362             elsif( !-r( $file ) )
363             {
364 0 0       0 if( $self->{error_returns_undef} )
365             {
366 0         0 return( $self->error( "Unable to read file '$file'; lacking permission" ) );
367             }
368             else
369             {
370 0         0 $desc .= "unable to read '$base_file': Permission denied.";
371 0         0 return( "x-system/x-error; $desc" );
372             }
373             }
374            
375             ## 2) Check for various special files first
376 3 50       19 if( $self->follow_links )
377             {
378 3         203 CORE::stat( $file );
379             }
380             else
381             {
382 0         0 CORE::lstat( $file );
383             }
384             ## Avoid doing many useless redondant system stat, use '_'. See perlfunc man page
385 3 50 33     31 if( !-f( _ ) || -z( _ ) )
386             {
387 0 0 0     0 if( !$self->follow_links && -l( _ ) )
    0          
    0          
    0          
    0          
    0          
    0          
388             {
389             #$desc .= " symbolic link to ". readlink( $file );
390 0         0 return( 'application/x-link' );
391             }
392 0         0 elsif( -d( _ ) ) { return( 'application/x-directory' ); }
393             ## Named pipe
394 0         0 elsif( -p( _ ) ) { return( 'application/x-pipe' ); }
395 0         0 elsif( -S( _ ) ) { return( 'application/x-socket' ); }
396             ## Block special file
397 0         0 elsif( -b( _ ) ) { return( 'application/x-block' ); }
398             ## Character special file
399 0         0 elsif( -c( _ ) ) { return( 'application/x-character' ); }
400 0         0 elsif( -z( _ ) ) { return( 'application/x-empty' ); }
401             else
402             {
403 0 0       0 return( $self->{default_type} ? $self->{default_type} : 'application/x-unknown' );
404             }
405             }
406            
407             ## Current file handle. or undef if check_magic (-c option) is true.
408 3         25 $self->message( 3, "Opening file \"$file\" to have a peek." );
409 3         67 my $io;
410             $io = IO::File->new( "<$file" ) || do
411 3   33     28 {
412             if( $self->{error_returns_undef} )
413             {
414             return( $self->error( "Unable to open file '$file': $!" ) );
415             }
416             else
417             {
418             return( "x-system/x-error; $base_file: $!" );
419             }
420             };
421 3         298 $io->binmode;
422            
423             ## 3) Check for script
424             ## if( ( -x( $file ) || ( $^O =~ /^(dos|mswin32|NetWare|symbian|win32)$/i && $file =~ /\.(?:pl|cgi)$/ ) ) &&
425             # if( ( -x( $file ) || $file =~ /\.(?:cgi|pl|t)$/ ) &&
426             # -T( _ ) )
427 3         28 my $default;
428 3 100 66     135 if( -x( $file ) && -T( _ ) )
429             {
430             ## Note, some magic files include elaborate attempts to match #! header lines
431             ## and return pretty responses but this slows down matching and is unnecessary.
432 2         86 my $line1 = $io->getline;
433 2 50       86 if( $line1 =~ /^\#![[:blank:]\h]*(\S+)/ )
434             {
435             ## Returns the binary name, without file path
436 2         53 my $bin_name = File::Basename::basename( $1 );
437             #$desc .= " executable $bin_name script text";
438             ## $io->close;
439             ## return( "text/x-${bin_name}" );
440 2         8 $default = "text/x-${bin_name}";
441             }
442             }
443 3         19 $self->message( 3, "Using file data to find content-type for file '$file'." );
444             ## $self->messagef( 3, "There are %d entries in \$self->{magic_data}", scalar( @{$self->{magic_data}} ) );
445 3         69 my $out = $self->handle( $io, $desc, { default => $default } );
446 3         26 $io->close;
447 3         127 return( $out );
448             }
449              
450 3     3 1 30 sub follow_links { return( shift->_set_get_boolean( 'follow_links', @_ ) ); }
451              
452             sub handle
453             {
454 3     3 1 9 my $self = shift( @_ );
455 3         6 my $io = shift( @_ );
456 3         8 my $desc = shift( @_ );
457 3         13 my $opts = $self->_get_args_as_hash( @_ );
458 3 100       79 $opts->{default} = $self->default_type if( !length( $opts->{default} ) );
459 3         23 my $type = '';
460            
461             ## $self->message( 5, "Is file handle '$io' active ? ", ( Scalar::Util::blessed( $io ) && $io->opened ) ? 'Yes' : 'No' );
462             ## 3) Iterate over each magic entry.
463 3         7 my $match_found = 0;
464             ## $self->messagef( 3, "\$self->{magic_data} contains %d entries.", scalar( @{$self->{magic_data}} ) );
465 3         7 for( my $m = 0; $m <= $#{ $self->{magic_data} }; $m++ )
  264         420  
466             {
467             ## Check if the m-th magic entry matches and if it does, then $desc will contain
468             ## an updated description
469             ## $self->message( 5, "Checking entry $m: (", scalar( @{$self->{magic_data}->[$m]} ), " elements)" ) if( scalar( @{$self->{magic_data}->[$m]} ) );
470 264         240 my $test;
471 264 100       531 if( ( $test = $self->_magic_match( $self->{magic_data}->[$m], \$desc, $io ) ) )
    100          
472             {
473             ## $self->message( 4, "Found entry at position '$m'\n" );
474 3 50 33     24 if( defined( $desc ) && $desc ne '' )
475             {
476 3         7 $match_found = 1;
477 3         8 $type = $desc;
478 3         8 last;
479             }
480             }
481             elsif( !defined( $test ) )
482             {
483 33 50 33     2709 warnings::warn( "Error occurred while checking for match: ", $self->error ) if( warnings::enabled() && $self->debug );
484             }
485            
486             ## Read another entry from the magic file if we've exhausted all the entries
487             ## already buffered. read_magic_entry will add to the end of the array
488             ## if there are more.
489 261 0 33     326 if( $m == $#{ $self->{magic_data} } &&
  261   0     573  
490             $self->{magic}->{io} &&
491             !$self->{magic}->{io}->eof )
492             {
493 0         0 $self->read_magic_entry();
494             #$self->message( 4, "\$self->{magic_data} is now %d items big.\n", scalar( @{$self->{magic_data}} ) );
495             }
496             }
497            
498             ## 4) Check if it's text or binary.
499             ## if It's text, then do a bunch of searching for special tokens
500 3 50       11 if( !$match_found )
501             {
502 0         0 my $data = '';
503 0         0 $io->seek( 0, 0 );
504 0         0 $io->read( $data, 0x8564 );
505 0         0 $type = $self->with_data( $data );
506             }
507 3 50       12 if( !defined( $type ) )
508             {
509 0 0       0 $type = $opts->{default} ? $opts->{default} : '';
510             }
511 3         10 return( $type );
512             }
513              
514             sub parse_magic_file
515             {
516 0     0 0 0 my $self = shift( @_ );
517 0         0 my $io = shift( @_ );
518             ##----{ Initialize values
519 0         0 $self->{magic}->{io} = $io;
520 0         0 $self->{magic}->{buffer} = undef();
521 0         0 $self->{magic}->{count} = 0;
522 0         0 while( !$io->eof() )
523             {
524 0         0 $self->read_magic_entry();
525             }
526 0         0 seek( $io, 0, 0 );
527             }
528              
529             ## parse_magic_line( $line, $line_num, $subtests )
530             ##
531             ## Parses the match info out of $line. Returns a reference to an array.
532             ##
533             ## Format is:
534             ##
535             ## [ offset, bytes, type, mask, operator, testval, template, sprintf, subtests ]
536             ## 0 1 2 3 4 5 6 7 8
537             ##
538             ## subtests is an array like @$data.
539             sub parse_magic_line
540             {
541 0     0 0 0 my $self = shift( @_ );
542 0         0 my( $line, $line_num, $subtests ) = @_;
543 0         0 my( $offtype, $offset, $numbytes, $type, $mask, $operator, $testval, $template, $message );
544            
545             ## This would be easier if escaped whitespace wasn't allowed.
546            
547             ## Grab the offset and type. offset can either be a decimal, oct, or hex offset or
548             ## an indirect offset specified in parenthesis like (x[.[bsl]][+-][y]), or a relative
549             ## offset specified by &. offtype : 0 = absolute, 1 = indirect, 2 = relative
550 0 0       0 if( $line =~ s/^>*([&\(]?[a-flsx\.\+\-\d]+\)?)[[:blank:]\h]+(\S+)[[:blank:]\h]+// )
551             {
552 0         0 ( $offset, $type ) = ( $1, $2 );
553 0 0       0 if( $offset =~ /^\(/ )
    0          
554             {
555             ## Indirect offset.
556 0         0 $offtype = 1;
557             ## Store as a reference [ offset1 type template offset2 ]
558 0         0 my( $o1, $type, $o2 );
559 0 0       0 if( ( $o1, $type, $o2 ) = ( $offset =~ /\((\d+)(\.[bsl])?([\+\-]?\d+)?\)/ ) )
560             {
561 0 0       0 $o1 = oct( $o1 ) if( $o1 =~ /^0/o );
562 0 0       0 $o2 = oct( $o2 ) if( $o2 =~ /^0/o );
563            
564 0         0 $type =~ s/\.//;
565             ## Default to long
566 0 0       0 $type = 'l' if( $type eq '' );
567             ## Type will be template for unpack
568 0         0 $type =~ tr/b/c/;
569             ## Number of bytes
570 0         0 my $sz = $type;
571 0         0 $sz =~ tr/csl/124/;
572            
573 0         0 $offset = [ $o1, $sz, $type, int( $o2 ) ];
574             }
575             else
576             {
577 0         0 return( $self->error( "Bad indirect offset at line $line_num. '$offset'" ) );
578             }
579             }
580             elsif( $offset =~ /^&/o )
581             {
582             ## Relative offset
583 0         0 $offtype = 2;
584            
585 0         0 $offset = substr( $offset, 1 );
586 0 0       0 $offset = oct( $offset ) if( $offset =~ /^0/o );
587             }
588             else
589             {
590             ## Mormal absolute offset
591 0         0 $offtype = 0;
592            
593             ## Convert if needed
594 0 0       0 $offset = oct( $offset ) if( $offset =~ /^0/o );
595             }
596             }
597             else
598             {
599 0         0 return( $self->error( "Bad Offset/Type at line $line_num. '$line'" ) );
600             }
601            
602             ## Check for & operator on type
603 0 0       0 if( $type =~ s/&(.*)// )
604             {
605 0         0 $mask = $1;
606             ## Convert if needed
607 0 0       0 $mask = oct( $mask ) if( $mask =~ /^0/o );
608             }
609            
610             ## Check if type is valid
611 0 0       0 if( !exists( $TEMPLATES->{ $type } ) )
612             {
613 0         0 return( $self->error( "Invalid type '$type' at line $line_num" ) );
614             }
615            
616             ## Take everything after the first non-escaped space
617 0 0       0 if( $line =~ s/([^\\])\s+(.*)/$1/ )
618             {
619 0         0 $message = $2;
620             }
621             else
622             {
623 0         0 return( $self->error( "Missing or invalid test condition/message at line $line_num" ) );
624             }
625            
626             ## Remove the return if it is still there
627 0         0 $line =~ s/\n$//o;
628              
629             ## Get the operator. If 'x', must be alone. Default is '='.
630 0 0       0 if( $line =~ s/^([><&^=!])//o )
    0          
631             {
632 0         0 $operator = $1;
633             }
634             elsif( $line eq 'x' )
635             {
636 0         0 $operator = 'x';
637             }
638             else
639             {
640 0         0 $operator = '=';
641             }
642            
643 0 0       0 if( $type eq 'string' )
644             {
645 0         0 $testval = $line;
646            
647             ## Do octal/hex conversion
648 0         0 $testval =~ s/\\([x0-7][0-7]?[0-7]?)/chr( oct( $1 ) )/eg;
  0         0  
649            
650             ## Do single char escapes
651 0 0       0 $testval =~ s/\\(.)/$ESC->{ $1 }||$1/eg;
  0         0  
652            
653             ## Put the number of bytes to read in numbytes.
654             ## '0' means read to \0 or \n.
655 0 0       0 if( $operator =~ /[>x]/o )
    0          
    0          
656             {
657 0         0 $numbytes = 0;
658             }
659             elsif( $operator =~ /[=<]/o )
660             {
661 0         0 $numbytes = length( $testval );
662             }
663             elsif( $operator eq '!' )
664             {
665             ## Annoying special case. ! operator only applies to numerics so put it back.
666 0         0 $testval = $operator . $testval;
667 0         0 $numbytes = length( $testval );
668 0         0 $operator = '=';
669             }
670             else
671             {
672             ## There's a bug in my magic file where there's a line that says
673             ## "0 string ^!<arc..." and the BSD file program treats the argument
674             ## like a numeric. To minimize hassles, complain about bad ops only if -c is set.
675 0         0 return( $self->error( "Invalid operator '$operator' for type 'string' at line $line_num." ) );
676             }
677             }
678             else
679             {
680             ## Numeric
681 0 0       0 if( $operator ne 'x' )
682             {
683             ## This conversion is very forgiving. Tt's faster and it doesn't complain
684             ## about bugs in popular magic files, but it will silently turn a string into zero.
685 0 0       0 if( $line =~ /^0/o )
686             {
687 0         0 $testval = oct( $line );
688             }
689             else
690             {
691 0         0 $testval = int( $line );
692             }
693             }
694            
695 0         0 ( $template, $numbytes ) = @{$TEMPLATES->{ $type }};
  0         0  
696            
697             ## Unset coercion of $unsigned unless we're doing order comparison
698 0 0       0 if( ref( $template ) )
699             {
700 0 0 0     0 $template = $template->[0] unless( $operator eq '>' || $operator eq '<' );
701             }
702             }
703 0         0 return( [ $offtype, $offset, $numbytes, $type, $mask, $operator, $testval, $template, $message, $subtests ] );
704             }
705              
706             ## read_magic_entry( $magic_data, $depth )
707             ##
708             ## Reads the next entry from the magic file and stores it as a ref to an array at the
709             ## end of @$magic_data.
710             ##
711             ## $magic = { filehandle, last buffered line, line count }
712             ##
713             ## This is called recursively with increasing $depth to read in sub-clauses
714             ##
715             ## Returns the depth of the current buffered line.
716             sub read_magic_entry
717             {
718 0     0 0 0 my $self = shift( @_ );
719 0   0     0 my $data = shift( @_ ) || $self->{magic_data};
720 0         0 my $depth = shift( @_ );
721 0         0 my $magic = $self->{magic};
722            
723 0         0 my $io = $magic->{io};
724             ## A ref to an array containing a magic line's components
725 0         0 my $entry = [];
726 0         0 my $line = '';
727            
728             ## Buffered last line
729 0         0 $line = $magic->{buffer};
730 0         0 while( 1 )
731             {
732 0 0       0 $line = '' if( !defined( $line ) );
733 0 0 0     0 if( $line =~ /^\#/ || $line =~ /^[[:blank:]\h]*$/ )
734             {
735             #$self->message( 4, "Line is a comment or is empty." );
736 0 0       0 last if( $io->eof );
737 0         0 $line = <$io>;
738 0         0 $magic->{count}++;
739 0         0 next;
740             }
741            
742 0         0 my $this_depth = ( $line =~ /^(>+)/ )[0];
743 0 0       0 $this_depth = '' if( !defined( $this_depth ) );
744 0 0       0 $depth = 0 if( !defined( $depth ) );
745            
746 0         0 $self->message( 4, "\$this_depth ($this_depth), \$depth ($depth)" );
747 0 0       0 if( length( $this_depth ) > $depth )
    0          
    0          
748             {
749 0         0 $magic->{buffer} = $line;
750            
751             ## Call ourselves recursively. will return the depth of the entry following
752             ## the nested group.
753 0 0 0     0 if( $self->read_magic_entry( $entry->[2], $depth + 1 ) < $depth ||
754             $io->eof )
755             {
756 0         0 $self->message( 4, "\$this_depth is greater than \$depth. Returning nothing" );
757 0         0 return;
758             }
759 0         0 $line = $magic->{buffer};
760             }
761             elsif( length( $this_depth ) < $depth )
762             {
763 0         0 $magic->{buffer} = $line;
764 0         0 $self->message( 4, "\$this_depth is less than \$depth. Returning length( \$this_depth )" );
765 0         0 return( length( $this_depth ) );
766             }
767             elsif( @$entry )
768             {
769 0         0 $self->message( 4, "\@\$entry is defined. Returning length( \$this_depth )" );
770             ## Already have an entry. This is not a continuation. Save this line for the
771             ## next call and exit.
772 0         0 $magic->{buffer} = $line;
773 0         0 return( length( $this_depth ) );
774             }
775             else
776             {
777 0         0 $self->message( 4, "Other: Setting \$entry and adding it to \@\$data. Ending loop (possibly). Fetching line" );
778             ## We're here if the number of '>' is the same as the current depth and we
779             ## haven't read a magic line yet.
780              
781             ## Create temp entry later, if we ever get around to evaluating this condition,
782             ## we'll replace @$entry with the results from parse_magic_line.
783 0         0 $entry = [ $line , $magic->{count}, [] ];
784              
785             ## Add to list
786 0         0 push( @$data, $entry );
787              
788             ## Read the next line
789 0 0       0 $self->message( 4, "We reached end of file $io->eof()\n" ) if( $io->eof() );
790 0 0       0 last if( $io->eof() );
791 0         0 $line = <$io>;
792 0         0 my $tmp = $line;
793 0         0 $tmp =~ s/\n$//gs;
794 0         0 $self->message( 4, "(2) Fetched line '$tmp'\n" );
795 0         0 $magic->{count}++;
796             }
797             ## print( STDERR "$line" );
798             }
799             }
800              
801             sub with_magic
802             {
803 0     0 0 0 my $self = shift( @_ );
804 0         0 my $data = shift( @_ );
805 0         0 my $desc = '';
806 0         0 my $type = '';
807            
808 0 0       0 return( 'application/octet-stream' ) if( length( $data ) <= 0 );
809            
810             ## 3) Iterate over each magic entry.
811 0         0 for( my $m = 0; $m <= $#{ $self->{magic_data} }; $m++ )
  0         0  
812             {
813             ## Check if the m-th magic entry matches and if it does, then $desc will contain
814             ## an updated description
815 0 0       0 if( $self->_magic_match_str( $self->{magic_data}->[ $m ], \$desc, $data ) )
816             {
817 0 0 0     0 if( defined( $desc ) && $desc ne '' )
818             {
819 0         0 $type = $desc;
820 0         0 last;
821             }
822             }
823            
824             ## Read another entry from the magic file if we've exhausted all the entries
825             ## already buffered. read_magic_entry will add to the end of the array if
826             ## there are more.
827 0 0 0     0 if( $m == $#{ $self->{magic_data} } && !$self->{magic}->{io}->eof() )
  0         0  
828             {
829 0         0 $self->read_magic_entry();
830             }
831             }
832 0         0 return( $type );
833             }
834              
835             sub with_data
836             {
837 0     0 0 0 my $self = shift( @_ );
838 0         0 my $data = shift( @_ );
839 0         0 my $type = undef();
840            
841 0 0       0 return if( length( $data ) <= 0 );
842            
843             ## Truncate data
844 0         0 $data = substr( $data, 0, 0x8564 );
845            
846 0 0       0 if( _is_binary( $data ) )
847             {
848 0         0 $type = 'application/octet-stream';
849             }
850             else
851             {
852             ## In BSD's version, there's an effort to search from more specific to less,
853             ## but I don't do that.
854 0         0 my( $token, %val );
855 0         0 foreach my $type ( keys( %{$self->{SPECIALS}} ) )
  0         0  
856             {
857 0         0 my $token = '(' . ( join( '|', sort{ length( $a ) <=> length( $b ) } @{$self->{SPECIALS}->{ $type } } ) ) . ')';
  0         0  
  0         0  
858 0         0 my $tdata = $data;
859 0 0       0 if( $tdata =~ /$token/mg )
860             {
861 0         0 $val{ $type } = pos( $tdata );
862             }
863             }
864             ## Search latest match
865 0 0       0 if( scalar( keys( %val ) ) )
866             {
867 0         0 my @skeys = sort{ $val{ $a } <=> $val{ $b } } keys( %val );
  0         0  
868 0         0 $type = $skeys[0];
869             }
870            
871             ## ALLDONE:
872             ## $type = 'text/plain' if( !defined( $type ) );
873             }
874             ## $type = 'text/plain' if( !defined( $type ) );
875 0         0 return( $type );
876             }
877              
878             sub with_filename
879             {
880 0     0 0 0 my $self = shift( @_ );
881 0         0 my $fname = shift( @_ );
882 0         0 my $type = '';
883            
884 0         0 my $file = $fname;
885 0         0 $fname =~ s/^.*\///;
886 0         0 for my $regex ( keys( %{$self->{FILE_EXTS}} ) )
  0         0  
887             {
888 0 0       0 if( $fname =~ /$regex/i )
889             {
890 0 0 0     0 if( ( defined( $type ) && $type !~ /;/ ) ||
      0        
891             !defined( $type ) )
892             {
893             ## has no x-type param
894 0         0 $type = $self->{FILE_EXTS}->{ $regex };
895             }
896             }
897             }
898 0         0 return( $type );
899             }
900              
901             sub _is_binary
902             {
903 0     0   0 my( $data ) = @_;
904 0         0 my $len = length( $data );
905             ## Exclude TAB, ESC, nl, cr
906 0         0 my $count = ( $data =~ tr/[\x00-\x08\x0b-\x0c\x0e-\x1a\x1c-\x1f]// );
907             ## No contents
908 0 0       0 return( 1 ) if( $len <= 0 );
909             ## Binary
910 0 0       0 return( 1 ) if( ( $count / $len ) > 0.1 );
911 0         0 return( 0 );
912             }
913              
914             ## Compare the magic item with the filehandle.
915             ## If success, print info and return true, otherwise return undef.
916             ##
917             ## This is called recursively if an item has subitems.
918             sub _magic_match
919             {
920 264     264   295 my $self = shift( @_ );
921             ## $io is the file handle of the file being inspected
922 264         359 my( $item, $p_desc, $io ) = @_;
923            
924             ## Delayed evaluation. If this is our first time considering this item, then parse out
925             ## its structure. @$item is just the raw string, line number, and subtests until we
926             ## need the real info. This saves time otherwise wasted parsing unused subtests.
927 264 50       397 $item = $self->parse_magic_line( @$item ) if( @$item == 3 );
928            
929             ## $item could be undef if we ran into troubles while reading the entry.
930 264 50       373 return unless( defined( $item ) );
931            
932             ## $io is not defined if -c. That way we always return false for every item which
933             ## allows reading/checking the entire magic file.
934 264 50       318 return( $self->error( "File handle is not defined." ) ) unless( defined( $io ) );
935             ## return unless( defined( fileno( $io ) ) );
936             # $self->message( 3, "Is file handle '$io' active ? (", Scalar::Util::openhandle( $io ) ? 'yes' : 'no', ")." );
937             # return unless( Scalar::Util::openhandle( $io ) );
938             # $self->message( 3, "Is file handle '$io' active ? (", ( defined( $io ) && $io->opened ) ? 'yes' : 'no', ")." );
939            
940 264         627 my( $offtype, $offset, $numbytes, $type, $mask, $op, $testval, $template, $message, $subtests ) = @$item;
941             ## $self->message( 5, "Checking item for description $$p_desc: ", sub{ $self->SUPER::dump( $item ) }) if( scalar( @$item ) );
942 264         285 $self->{trick}++;
943 264 50 33     438 if( $self->{trick} > 186 && $self->{trick} < 192 )
944             {
945             ## $self->message( 4, "$item\n" );
946 0         0 my $c = -1;
947             ## $self->message( 4, join( "\n", map{ sprintf( "%s: %s", $_, $item->[ ++$c ] ) } qw( offtype offset numbytes type mask op testval template message subtests ) ), "\n--------\n" );
948             }
949             ## Bytes from file
950 264         264 my $data = '';
951              
952             ## Set to true if match
953 264         244 my $match = 0;
954            
955             ## offset = [ off1, sz, template, off2 ] for indirect offset
956 264 50       439 if( $offtype == 1 )
    50          
957             {
958 0         0 my( $off1, $sz, $template, $off2 ) = @$offset;
959 0 0       0 $io->seek( $off1, 0 ) || return( $self->error( "Unable to seek to offset $off1 in file" ) );
960             # return( $self->error( "Unable to read $sz bytes of data from file. Buffer is only ", length( $data ), " bytes." ) ) if( $io->read( $data, $sz ) != $sz );
961 0 0       0 return if( $io->read( $data, $sz ) != $sz );
962 0         0 $off2 += unpack( $template, $data );
963 0 0       0 $io->seek( $off2, 0 ) || return( $self->error( "Unable to seek to offset $off2 in file." ) );
964             }
965             elsif( $offtype == 2 )
966             {
967             ## Relative offsets from previous seek
968 0 0       0 $io->seek( $offset, 1 ) || return( $self->error( "Unable to seek to offset $offset in file" ) );
969             }
970             else
971             {
972             ## Absolute offset
973 264 50       581 $io->seek( $offset, 0 ) || return( $self->error( "Unable to seek to offset $offset in file" ) );
974             }
975            
976 264 100       4341 if( $type eq 'string' )
977             {
978             ## Read the length of the match string unless the comparison is
979             ## '>' ($numbytes == 0), in which case read to the next null or "\n".
980             ## (that's what BSD's file does)
981 237 50       322 if( $numbytes > 0 )
982             {
983             # return( $self->error( "Unable to read $numbytes bytes of data from file. Buffer is only ", length( $data ), " bytes." ) ) if( $io->read( $data, $numbytes ) != $numbytes );
984 237 100       565 return if( $io->read( $data, $numbytes ) != $numbytes );
985             ## $self->message( 5, "Data now contains '$data'." );
986             }
987             else
988             {
989 0         0 my $ch = $io->getc();
990 0   0     0 while( defined( $ch ) && $ch ne "\0" && $ch ne "\n" )
      0        
991             {
992 0         0 $data .= $ch;
993 0         0 $ch = $io->getc();
994             }
995             }
996             ## $self->message( 4, "Checking data '$data' against test value '$testval'\n" );
997            
998             ## Now do the comparison
999 204 50       2556 if( $op eq '=' )
    0          
    0          
1000             {
1001 204         308 $match = ( $data eq $testval );
1002             }
1003             elsif( $op eq '<' )
1004             {
1005 0         0 $match = ( $data lt $testval );
1006             }
1007             elsif( $op eq '>' )
1008             {
1009 0         0 $match = ( $data gt $testval );
1010             }
1011             ## Else bogus op, but don't die, just skip
1012 204 50       388 if( $self->check_magic )
1013             {
1014 0         0 print( STDERR "STRING: $data $op $testval => $match\n" );
1015             }
1016             }
1017             else
1018             {
1019             ## Numeric
1020             ## Read up to 4 bytes
1021             # return( $self->error( "Unable to read $numbytes bytes of data from file. Buffer is only ", length( $data ), " bytes." ) ) if( $io->read( $data, $numbytes ) != $numbytes );
1022 27 50       81 return if( $io->read( $data, $numbytes ) != $numbytes );
1023            
1024             ## If template is a ref to an array of 3 letters, then this is an endian number
1025             ## which must be first unpacked into an unsigned and then coerced into a signed.
1026             ## Is there a better way?
1027 27 50       348 if( ref( $template ) )
1028             {
1029 0         0 $data = unpack( $template->[2], pack( $template->[1], unpack( $template->[0], $data ) ) );
1030             }
1031             else
1032             {
1033 27         77 $data = unpack( $template, $data );
1034             }
1035            
1036             ## If mask
1037 27 100       51 if( defined( $mask ) )
1038             {
1039 6         13 $data &= $mask;
1040             }
1041            
1042             ## Now do the check
1043 27 100       65 if( $op eq '=' )
    50          
    50          
    50          
    0          
    0          
    0          
1044             {
1045 24         33 $match = ( $data == $testval );
1046             }
1047             elsif( $op eq 'x' )
1048             {
1049 0         0 $match = 1;
1050             }
1051             elsif( $op eq '!' )
1052             {
1053 0         0 $match = ( $data != $testval );
1054             }
1055             elsif( $op eq '&' )
1056             {
1057 3         9 $match = ( ( $data & $testval ) == $testval );
1058             }
1059             elsif( $op eq '^' )
1060             {
1061 0         0 $match = ( ( ~$data & $testval ) == $testval );
1062             }
1063             elsif( $op eq '<' )
1064             {
1065 0         0 $match = ( $data < $testval );
1066             }
1067             elsif( $op eq '>' )
1068             {
1069 0         0 $match = ( $data > $testval );
1070             }
1071             ## Else bogus entry that we're ignoring
1072 27 50       56 if( $self->check_magic )
1073             {
1074 0         0 print( STDERR "NUMERIC: $data $op $testval => $match\n" );
1075             }
1076             }
1077            
1078 231 100       7210 if( $match )
1079             {
1080             ## It's pretty common to find "\b" in the message, but sprintf doesn't insert a
1081             ## backspace. If it's at the beginning (typical) then don't include separator space.
1082 3 50       16 if( $message =~ s/^\\b// )
1083             {
1084 0 0       0 $$p_desc .= ( index( $message, '%s' ) != -1 ? sprintf( $message, $data ) : $message );
1085             }
1086             else
1087             {
1088             ## $$p_desc .= ' ' . sprintf( $message, $data ) if( $message );
1089 3 50       22 $$p_desc .= ( index( $message, '%s' ) != -1 ? sprintf( $message, $data ) : $message ) if( $message );
    50          
1090             }
1091            
1092 3         11 foreach my $subtest ( @$subtests )
1093             {
1094 0         0 $self->_magic_match( $subtest, $p_desc, $io );
1095             }
1096 3         42 return( 1 );
1097             }
1098             }
1099              
1100             sub _magic_match_str
1101             {
1102 0     0     my $self = shift( @_ );
1103 0           my( $item, $p_desc, $str ) = @_;
1104 0           my $origstr = $str;
1105            
1106             ## Delayed evaluation. If this is our first time considering this item, then parse out
1107             ## its structure. @$item is just the raw string, line number, and subtests until we
1108             ## need the real info. This saves time otherwise wasted parsing unused subtests.
1109 0 0         $item = $self->parse_magic_line( @$item ) if( @$item == 3 );
1110            
1111             ## $item could be undef if we ran into troubles while reading the entry.
1112 0 0         return unless( defined( $item ) );
1113            
1114             ## $fh is not be defined if -c. That way we always return false for every item which
1115             ## allows reading/checking the entire magic file.
1116 0 0         return unless( defined( $str ) );
1117 0 0         return if( $str eq '' );
1118            
1119 0           my( $offtype, $offset, $numbytes, $type, $mask, $op, $testval, $template, $message, $subtests ) = @$item;
1120 0 0         return unless( defined( $op ) );
1121            
1122             ## Bytes from file
1123 0           my $data = '';
1124            
1125             ## Set to true if match
1126 0           my $match = 0;
1127            
1128             ## offset = [ off1, sz, template, off2 ] for indirect offset
1129 0 0         if( $offtype == 1 )
    0          
1130             {
1131 0           my( $off1, $sz, $template, $off2 ) = @$offset;
1132 0 0         return if( length( $str ) < $off1 );
1133 0           $data = pack( "a$sz", $str );
1134 0           $off2 += unpack( $template, $data );
1135 0 0         return if( length( $str ) < $off2 );
1136             }
1137             elsif( $offtype == 2 )
1138             {
1139             ## Unable to handle relative offsets from previous seek
1140 0           return;
1141             }
1142             else
1143             {
1144             ## Absolute offset
1145 0 0         return if( $offset > length( $str ) );
1146 0           $str = substr( $str, $offset );
1147             }
1148            
1149 0 0         if( $type eq 'string' )
1150             {
1151             ## Read the length of the match string unless the comparison is
1152             ## '>' ($numbytes == 0), in which case read to the next null or "\n".
1153             ## (that's what BSD's file does)
1154 0 0         if( $numbytes > 0 )
1155             {
1156 0           $data = pack( "a$numbytes", $str );
1157             }
1158             else
1159             {
1160 0           $str =~ /^(.*)\0|$/;
1161 0           $data = $1;
1162             }
1163              
1164             ## Now do the comparison
1165 0 0         if( $op eq '=' )
    0          
    0          
1166             {
1167 0           $match = ( $data eq $testval );
1168             }
1169             elsif( $op eq '<' )
1170             {
1171 0           $match = ( $data lt $testval );
1172             }
1173             elsif( $op eq '>' )
1174             {
1175 0           $match = ( $data gt $testval );
1176             }
1177             ## Else bogus op, but don't die, just skip
1178            
1179 0 0         if( $self->check_magic )
1180             {
1181 0           print( STDERR "STRING: $data $op $testval => $match\n" );
1182             }
1183             }
1184             else
1185             {
1186             ## Numeric
1187             ## Read up to 4 bytes
1188 0           $data = substr( $str, 0, 4 );
1189            
1190             ## If template is a ref to an array of 3 letters, then this is an endian number
1191             ## which must be first unpacked into an unsigned and then coerced into a signed.
1192             ## Is there a better way?
1193 0 0         if( ref( $template ) )
1194             {
1195 0           $data = unpack( $template->[2], pack( $template->[1], unpack( $template->[0], $data ) ) );
1196             }
1197             else
1198             {
1199 0           $data = unpack( $template, $data );
1200             }
1201            
1202             ## If mask
1203 0 0         if( defined( $mask ) )
1204             {
1205 0           $data &= $mask;
1206             }
1207            
1208             ## Now do the check
1209 0 0         if( $op eq '=' )
    0          
    0          
    0          
    0          
    0          
    0          
1210             {
1211 0           $match = ( $data == $testval );
1212             }
1213             elsif( $op eq 'x' )
1214             {
1215 0           $match = 1;
1216             }
1217             elsif( $op eq '!' )
1218             {
1219 0           $match = ( $data != $testval );
1220             }
1221             elsif( $op eq '&' )
1222             {
1223 0           $match = ( ( $data & $testval ) == $testval );
1224             }
1225             elsif( $op eq '^' )
1226             {
1227 0           $match = ( ( ~$data & $testval ) == $testval );
1228             }
1229             elsif( $op eq '<' )
1230             {
1231 0           $match = ( $data < $testval );
1232             }
1233             elsif( $op eq '>' )
1234             {
1235 0           $match = ( $data > $testval );
1236             }
1237             ## else bogus entry that we're ignoring
1238 0 0         if( $self->check_magic )
1239             {
1240 0           print( STDERR "NUMERIC: $data $op $testval => $match\n" );
1241             }
1242             }
1243            
1244 0 0         if( $match )
1245             {
1246             ## It's pretty common to find "\b" in the message, but sprintf doesn't insert a
1247             ## backspace. If it's at the beginning (typical) then don't include separator space.
1248 0 0         if( $message =~ s/^\\b// )
1249             {
1250 0           $$p_desc .= sprintf( $message, $data );
1251             }
1252             else
1253             {
1254             ## $$p_desc .= ' ' . sprintf( $message, $data ) if( $message );
1255 0 0         $$p_desc .= sprintf( $message, $data ) if( $message );
1256             }
1257 0           foreach my $subtest ( @$subtests )
1258             {
1259             ## Finish evaluation when matched.
1260 0           $self->_magic_match_str( $subtest, $p_desc, $origstr );
1261             }
1262 0           return( 1 );
1263             }
1264             }
1265              
1266             ## Obsolete routines
1267             sub add_specials
1268             {
1269 0     0 0   my $self = shift( @_ );
1270 0           my $type = shift( @_ );
1271 0           $self->{SPECIALS}->{ $type } = [ @_ ];
1272 0           return( $self );
1273             }
1274              
1275             sub add_file_exts
1276             {
1277 0     0 0   my $self = shift( @_ );
1278 0           my $filepat = shift( @_ );
1279 0           my $type = shift( @_ );
1280 0           $self->{FILE_EXTS}->{ $filepat } = $type;
1281 0           return( $self );
1282             }
1283              
1284             sub add_magic_entry
1285             {
1286 0     0 0   my $self = shift( @_ );
1287 0           my $entry = shift( @_ );
1288 0           unshift( @{$self->{magic_data}}, [ $entry, -1, [] ] );
  0            
1289 0           return( $self );
1290             }
1291              
1292             1;
1293              
1294             __END__
1295              
1296             =head1 NAME
1297              
1298             Apache2::SSI::File::Type - Guess file MIME Type using Magic
1299              
1300             =head1 SYNOPSIS
1301              
1302             use Apache2::SSI::File::Type;
1303            
1304             # use internal magic data; no outside dependencies
1305             my $m = Apache2::SSI::File::Type->new;
1306             # use external magic file
1307             # my $m = Apache2::SSI::File::Type->new( '/etc/apache2/magic' );
1308             my $mime_type = $m->file( "/somewhere/unknown/file" );
1309             # or, on windows
1310             my $mime_type = $m->file( "C:\Documents\myfile.cgi" );
1311             # using a file handle works too
1312             my $io = IO::File->new( "</somewhere/unknown/file2" );
1313             my $mime_type = $m->handle( $io );
1314            
1315             $io->read( $data, 0x8564 );
1316             my $mime_type = $m->data( $data );
1317              
1318             =head1 DESCRIPTION
1319              
1320             This module emulates the functionnality of L<file(1)> unix utility cross platform, and returns the file MIME type.
1321              
1322             It can guess it from a file name, data or file handle using methods described below.
1323              
1324             It does not depend upon an external application to function.
1325              
1326             =head1 CONSTRUCTOR
1327              
1328             =over 4
1329              
1330             =item B<new>( [ "/some/where/file.cgi" ] )
1331              
1332             Creates a new L<Apache2::SSI::File::Type> object and returns it.
1333             If a file is provided, L<Apache2::SSI::File::Type> will use it instead of its default internal data.
1334              
1335             If it can not open it or read it, it will set an error object and return undef. See L<Module::Generic/error> for more information.
1336              
1337             The result of the parsing of the given file is cached as a json file in the system's temporary folder, wherever that is. The location is provided by L<File::Spec/tmpdir>
1338              
1339             The internal magic data is provided internally from a json data file located in the same place as this module.
1340              
1341             =back
1342              
1343             =head1 METHODS
1344              
1345             =head2 as_json
1346              
1347             This returns the internal magic data as a properly formatted json string using L<JSON>.
1348              
1349             This is used to create cache of magic files.
1350              
1351             =head2 check( "/etc/apache2/magic" )
1352              
1353             Checks the magic file provided and dumps it on the STDERR.
1354              
1355             This is equivalent to option C<-c> of L<file(1)>.
1356              
1357             =head2 check_magic
1358              
1359             Set or gets the boolean value used to decide whether the magic data are checked.
1360              
1361             =head2 data( $some_data )
1362              
1363             Guess the mime type based upon the data provided with C<$some_data> and returns it.
1364              
1365             If C<$some_data> is zero length big, it will return C<application/x-empty>.
1366              
1367             Otherwise, it defaults to the value set with L</default_type>, which, by default, is I<text/plain> if L</default_type> is set to a true value or an empty value otherwise.
1368              
1369             =head2 default_type
1370              
1371             Set the default mime type to be returned as default, if any at all. If this is empty, it will default to C<text/plain> by default.
1372              
1373             If it iset to a true value, it will return that value or text/plain if it is set to empty string otherwise.
1374              
1375             =head2 dump
1376              
1377             Provided with an optional data as an array reference, or if nothing is provided, the internal magic data and this will print it out as a properly formatted magic file suitable to be re-used.
1378              
1379             For example on your command line interface:
1380            
1381             # my_script.pl file:
1382             #/usr/bin/perl
1383             BEGIN
1384             {
1385             use strict;
1386             use warnings;
1387             use Apache2::SSI::File::Type;
1388             };
1389            
1390             my $m = Apache2::SSI::File::Type->new;
1391             $m->dump;
1392             exit;
1393            
1394             # on the command line:
1395             ./my_script.pl 2>my_magic
1396              
1397             =head2 error_returns_undef
1398              
1399             Sets or gets the boolean value to decide whether this module will return a default value (see L</default_type>) or C<undef> when there is an error.
1400              
1401             By default this is set to false, and the module will return a default value upon error.
1402              
1403             =head2 file( '/some/file/path.txt' )
1404              
1405             Provided with a file and this will guess its mim type.
1406              
1407             If an error occurs, and if L</error_returns_undef> is set to true, it will return C<x-system/x-error; description>
1408             where description is the description of the error, otherwise it will set an error object with the error string and return C<undef>. See L<Module::Generic/error> for more information about the error object.
1409              
1410             If the file to check is not a regular file or is empty, it will call L<perlfunc/stat> and it will try hard to find its mime type.
1411              
1412             Otherwise, it defaults to the value set with L</default_type>.
1413              
1414             =head2 follow_links
1415              
1416             Provided with a boolean value, this sets whether links should be followed.
1417              
1418             Default to true.
1419              
1420             =head2 handle
1421              
1422             Provided with an opened file handle and this method will try to guess the mime type and returns it.
1423              
1424             It defaults to whatever value is set with L</default_type>.
1425              
1426             =head1 AUTHOR
1427              
1428             Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
1429              
1430             =head1 CREDITS
1431              
1432             Credits Nokubi Takatsugu.
1433              
1434             =head1 SEE ALSO
1435              
1436             L<file(1)>
1437              
1438             L<Apache2::SSI>, L<Apache2::SSI::File>, L<Apache2::SSI::Finfo>, L<Apache2::SSI::URI>
1439              
1440             =head1 COPYRIGHT & LICENSE
1441              
1442             Copyright (c) 2021 DEGUEST Pte. Ltd.
1443              
1444             You can use, copy, modify and redistribute this package and associated
1445             files under the same terms as Perl itself.
1446              
1447             =cut