File Coverage

blib/lib/File/MMagic.pm
Criterion Covered Total %
statement 286 479 59.7
branch 144 324 44.4
condition 30 87 34.4
subroutine 20 27 74.0
pod 10 22 45.4
total 490 939 52.1


line stmt bran cond sub pod time code
1             # File::MMagic
2             #
3             # $Id: MMagic.pm 290 2013-07-02 06:21:39Z knok $
4             #
5             # This program is originated from file.kulp that is a production of The
6             # Unix Reconstruction Projct.
7             #
8             # Copyright 1999,2000,2001,2002 NOKUBI Takatsugu .
9             #
10             # This product includes software developed by the Apache Group
11             # for use in the Apache HTTP server project (http://www.apache.org/).
12             #
13             # License for the program is followed the original software. The license is
14             # below.
15             #
16             # This program is copyright by dkulp 1999.
17             #
18             # This program is free and open software. You may use, copy, modify, distribute
19             # and sell this program (and any modified variants) in any way you wish,
20             # provided you do not restrict others to do the same, except for the following
21             # consideration.
22             #
23             #I read some of Ian F. Darwin's BSD C implementation, to
24             #try to determine how some of this was done since the specification
25             #is a little vague. I don't believe that this perl version could
26             #be construed as an "altered version", but I did grab the tokens for
27             #identifying the hard-coded file types in names.h and copied some of
28             #the man page.
29             #
30             #Here's his notice:
31             #
32             # * Copyright (c) Ian F. Darwin, 1987.
33             # * Written by Ian F. Darwin.
34             # *
35             # * This software is not subject to any license of the American Telephone
36             # * and Telegraph Company or of the Regents of the University of California.
37             # *
38             # * Permission is granted to anyone to use this software for any purpose on
39             # * any computer system, and to alter it and redistribute it freely, subject
40             # * to the following restrictions:
41             # *
42             # * 1. The author is not responsible for the consequences of use of this
43             # * software, no matter how awful, even if they arise from flaws in it.
44             # *
45             # * 2. The origin of this software must not be misrepresented, either by
46             # * explicit claim or by omission. Since few users ever read sources,
47             # * credits must appear in the documentation.
48             # *
49             # * 3. Altered versions must be plainly marked as such, and must not be
50             # * misrepresented as being the original software. Since few users
51             # * ever read sources, credits must appear in the documentation.
52             # *
53             # * 4. This notice may not be removed or altered.
54             #
55             # The following is the Apache License. This program contains the magic file
56             # that derived from the Apache HTTP Server.
57             #
58             # * Copyright (c) 1995-1999 The Apache Group. All rights reserved.
59             # *
60             # * Redistribution and use in source and binary forms, with or without
61             # * modification, are permitted provided that the following conditions
62             # * are met:
63             # *
64             # * 1. Redistributions of source code must retain the above copyright
65             # * notice, this list of conditions and the following disclaimer.
66             # *
67             # * 2. Redistributions in binary form must reproduce the above copyright
68             # * notice, this list of conditions and the following disclaimer in
69             # * the documentation and/or other materials provided with the
70             # * distribution.
71             # *
72             # * 3. All advertising materials mentioning features or use of this
73             # * software must display the following acknowledgment:
74             # * "This product includes software developed by the Apache Group
75             # * for use in the Apache HTTP server project (http://www.apache.org/)."
76             # *
77             # * 4. The names "Apache Server" and "Apache Group" must not be used to
78             # * endorse or promote products derived from this software without
79             # * prior written permission. For written permission, please contact
80             # * apache@apache.org.
81             # *
82             # * 5. Products derived from this software may not be called "Apache"
83             # * nor may "Apache" appear in their names without prior written
84             # * permission of the Apache Group.
85             # *
86             # * 6. Redistributions of any form whatsoever must retain the following
87             # * acknowledgment:
88             # * "This product includes software developed by the Apache Group
89             # * for use in the Apache HTTP server project (http://www.apache.org/)."
90             # *
91             # * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
92             # * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
93             # * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
94             # * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR
95             # * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
96             # * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
97             # * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
98             # * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99             # * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
100             # * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
101             # * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
102             # * OF THE POSSIBILITY OF SUCH DAMAGE.
103              
104             package File::MMagic;
105              
106             =head1 NAME
107              
108             File::MMagic - Guess file type
109              
110             =head1 SYNOPSIS
111              
112             use File::MMagic;
113             use FileHandle;
114              
115             $mm = new File::MMagic; # use internal magic file
116             # $mm = File::MMagic->new('/etc/magic'); # use external magic file
117             # $mm = File::MMagic->new('/usr/share/etc/magic'); # if you use Debian
118             $res = $mm->checktype_filename("/somewhere/unknown/file");
119              
120             $fh = new FileHandle "< /somewhere/unknown/file2";
121             $res = $mm->checktype_filehandle($fh);
122              
123             $fh->read($data, 0x8564);
124             $res = $mm->checktype_contents($data);
125              
126             =head1 ABSTRACT
127              
128             This perl library uses perl5 objects to guess file type from filename
129             and/or filehandle.
130              
131             =head1 DESCRIPTION
132              
133             checktype_filename(), checktype_filehandle() and checktype_contents
134             returns string contains file type with MIME mediatype format.
135              
136             =head1 METHODS
137              
138             =over 4
139              
140             =item File::MMagic->new()
141              
142             =item File::MMagic->new( $filename )
143              
144             Initializes the module. If no filename is given, the magic numbers
145             stored in File::MMagic are used.
146              
147             =item $mm->addSpecials
148              
149             If a filetype cannot be determined by magic numbers, extra checks are
150             done based on extra regular expressions which can be defined here. The
151             first argument should be the filetype, the remaining arguments should
152             be one or more regular expressions.
153              
154             By default, checks are done for message/news, message/rfc822,
155             text/html, text/x-roff.
156              
157             =item $mm->removeSpecials
158              
159             Removes special regular expressions. Specify one or more filetypes. If
160             no filetypes are specified, all special regexps are removed.
161              
162             Returns a hash containing the removed entries.
163              
164             =item $mm->addFileExts
165              
166             If a filetype cannot be determined by magic numbers, extra checks can
167             be done based on the file extension (actually, a regexp). Two
168             arguments should be geiven: the filename pattern and the corresponding
169             filetype.
170              
171             By default, checks are done for application/x-compress,
172             application/x-bzip2, application/x-gzip, text/html, text/plain
173              
174             =item $mm->removeFileExts
175              
176             Remove filename pattern checks. Specify one or more patterns. If no
177             pattern is specified, all are removed.
178              
179             Returns a hash containing the removed entries.
180              
181             =item $mm->addMagicEntry
182              
183             Add a new magic entry in the object. The format is same as magic(5) file.
184              
185             Ex.
186             # Add a entry
187             $mm->addMagicEntry("0\tstring\tabc\ttext/abc");
188             # Add a entry with a sub entry
189             $mm->addMagicEntry("0\tstring\tdef\t");
190             $mm->addMagicEntry(">10\tstring\tghi\ttext/ghi");
191              
192             =item $mm->readMagicHandle
193              
194             =item $mm->checktype_filename
195              
196             =item $mm->checktype_magic
197              
198             =item $mm->checktype_contents
199              
200             =back
201              
202             =head1 COPYRIGHT
203              
204             This program is originated from file.kulp that is a production of The
205             Unix Reconstruction Projct.
206            
207             Copyright (c) 1999 NOKUBI Takatsugu .
208              
209             There is no warranty for the program.
210              
211             This product includes software developed by the Apache Group
212             for use in the Apache HTTP server project (http://www.apache.org/).
213              
214             License for the program is followed the original software. The license is
215             below.
216              
217             This program is free and open software. You may use, copy, modify, distribute
218             and sell this program (and any modified variants) in any way you wish,
219             provided you do not restrict others to do the same, except for the following
220             consideration.
221              
222             I read some of Ian F. Darwin's BSD C implementation, to
223             try to determine how some of this was done since the specification
224             is a little vague. I don't believe that this perl version could
225             be construed as an "altered version", but I did grab the tokens for
226             identifying the hard-coded file types in names.h and copied some of
227             the man page.
228              
229             Here's his notice:
230              
231             * Copyright (c) Ian F. Darwin, 1987.
232             * Written by Ian F. Darwin.
233             *
234             * This software is not subject to any license of the American Telephone
235             * and Telegraph Company or of the Regents of the University of California.
236             *
237             * Permission is granted to anyone to use this software for any purpose on
238             * any computer system, and to alter it and redistribute it freely, subject
239             * to the following restrictions:
240             *
241             * 1. The author is not responsible for the consequences of use of this
242             * software, no matter how awful, even if they arise from flaws in it.
243             *
244             * 2. The origin of this software must not be misrepresented, either by
245             * explicit claim or by omission. Since few users ever read sources,
246             * credits must appear in the documentation.
247             *
248             * 3. Altered versions must be plainly marked as such, and must not be
249             * misrepresented as being the original software. Since few users
250             * ever read sources, credits must appear in the documentation.
251             *
252             * 4. This notice may not be removed or altered.
253              
254             The following is the Apache License. This program contains the magic file
255             that derived from the Apache HTTP Server.
256              
257             * Copyright (c) 1995-1999 The Apache Group. All rights reserved.
258             *
259             * Redistribution and use in source and binary forms, with or without
260             * modification, are permitted provided that the following conditions
261             * are met:
262             *
263             * 1. Redistributions of source code must retain the above copyright
264             * notice, this list of conditions and the following disclaimer.
265             *
266             * 2. Redistributions in binary form must reproduce the above copyright
267             * notice, this list of conditions and the following disclaimer in
268             * the documentation and/or other materials provided with the
269             * distribution.
270             *
271             * 3. All advertising materials mentioning features or use of this
272             * software must display the following acknowledgment:
273             * "This product includes software developed by the Apache Group
274             * for use in the Apache HTTP server project (http://www.apache.org/)."
275             *
276             * 4. The names "Apache Server" and "Apache Group" must not be used to
277             * endorse or promote products derived from this software without
278             * prior written permission. For written permission, please contact
279             * apache@apache.org.
280             *
281             * 5. Products derived from this software may not be called "Apache"
282             * nor may "Apache" appear in their names without prior written
283             * permission of the Apache Group.
284             *
285             * 6. Redistributions of any form whatsoever must retain the following
286             * acknowledgment:
287             * "This product includes software developed by the Apache Group
288             * for use in the Apache HTTP server project (http://www.apache.org/)."
289             *
290             * THIS SOFTWARE IS PROVIDED BY THE APACHE GROUP ``AS IS'' AND ANY
291             * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
292             * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
293             * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE APACHE GROUP OR
294             * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
295             * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
296             * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
297             * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
298             * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
299             * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
300             * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
301             * OF THE POSSIBILITY OF SUCH DAMAGE.
302              
303             =cut
304              
305 7     7   88720 use FileHandle;
  7         163728  
  7         46  
306 7     7   8764 use strict;
  7         13  
  7         320  
307              
308 7         2097 use vars qw(
309             %TEMPLATES %ESC $VERSION
310             $magicFile $checkMagic $followLinks $fileList
311             $allowEightbit
312 7     7   37 );
  7         16  
313              
314             BEGIN {
315             # translation of type in magic file to unpack template and byte count
316 7     7   211 %TEMPLATES = (byte => [ 'c', 1 ],
317             ubyte => [ 'C', 1 ],
318             char => [ 'c', 1 ],
319             uchar => [ 'C', 1 ],
320             short => [ 's', 2 ],
321             ushort => [ 'S', 2 ],
322             long => [ 'l', 4 ],
323             ulong => [ 'L', 4 ],
324             date => [ 'l', 4 ],
325             ubeshort => [ 'n', 2 ],
326             beshort => [ [ 'n', 'S', 's' ], 2 ],
327             ubelong => [ 'N', 4 ],
328             belong => [ [ 'N', 'I', 'i' ], 4 ],
329             bedate => [ 'N', 4 ],
330             uleshort => [ 'v', 2 ],
331             leshort => [ [ 'v', 'S', 's' ], 2 ],
332             ulelong => [ 'V', 4 ],
333             lelong => [ [ 'V', 'I', 'i' ], 4 ],
334             ledate => [ 'V', 4 ],
335             string => undef);
336              
337             # for letter escapes in magic file
338 7         63 %ESC = ( n => "\n",
339             r => "\r",
340             b => "\b",
341             t => "\t",
342             f => "\f");
343              
344 7         14 $VERSION = "1.30";
345 7         875 $allowEightbit = 1;
346             }
347              
348             sub new {
349 9     9 1 1863 my $self = {};
350 9         25 my $proto = shift;
351 9   33     71 my $class = ref($proto) || $proto;
352 9         32 $self->{MF} = [];
353 9         29 $self->{magic} = [];
354 9 100       39 if (! @_) {
355 8         24 my $fh = *File::MMagic::DATA{IO};
356 8         169 binmode($fh);
357 8 100       61 bless $fh, 'FileHandle' if ref $fh ne 'FileHandle';
358 8         17 my $dataLoc;
359             # code block to localise the no strict;, contribute by Simon Matthews
360             {
361 7     7   52 no strict 'refs';
  7         20  
  7         55093  
  8         21  
362 8         16 my $instance = \${ "$class\::_instance" };
  8         62  
363 8 100       85 $$instance = $fh->tell() unless $$instance;
364 8         122 $dataLoc = $$instance;
365             }
366              
367 8         49 $fh->seek($dataLoc, 0);
368 8         333 &readMagicHandle($self, $fh);
369             } else {
370 1         2 my $filename = shift;
371 1         12 my $fh = new FileHandle;
372 1 50       46 if ($fh->open("< $filename")) {
373 1         73 binmode($fh);
374 1         6 &readMagicHandle($self, $fh);
375             } else {
376 0         0 warn __PACKAGE__ . " couldn't load specified file $filename";
377             }
378             }
379              
380             # from the BSD names.h, some tokens for hard-coded checks of
381             # different texts. This isn't rocket science. It's prone to
382             # failure so these checks are only a last resort.
383              
384             # removSpecials() can be used to remove those afterwards.
385 9         1836 $self->{SPECIALS} = {
386             "message/rfc822" => [ "^Received:",
387             "^>From ",
388             "^From ",
389             "^To: ",
390             "^Return-Path: ",
391             "^Cc: ",
392             "^X-Mailer: "],
393             "message/news" => [ "^Newsgroups: ",
394             "^Path: ",
395             "^X-Newsreader: "],
396             "text/html" => [ "]*>",
397             "]*>",
398             "]*>",
399             "]*>",
400             "]*>",
401             "]*>",
402             "]*>",
403             "]*>",
404             "]*>",
405             "]*>",
406             ],
407             "text/x-roff" => [
408             '^\\.\\\\"',
409             "^\\.SH ",
410             "^\\.PP ",
411             "^\\.TH ",
412             "^\\.BR ",
413             "^\\.SS ",
414             "^\\.TP ",
415             "^\\.IR ",
416             ],
417             };
418              
419 9         680 $self->{FILEEXTS} = {
420             '\.gz$' => 'application/x-gzip',
421             '\.bz2$' => 'application/x-bzip2',
422             '\.Z$' => 'application/x-compress',
423             '\.txt$' => 'text/plain',
424             '\.html$' => 'text/html',
425             '\.htm$' => 'text/html',
426             };
427             # content hook
428 9         581 $self->{chook} = {};
429 9         58 return bless $self, $class;
430             }
431              
432             sub addSpecials {
433 0     0 1 0 my $self = shift;
434 0         0 my $mtype = shift;
435 0         0 $self->{SPECIALS}->{"$mtype"} = [@_];
436 0         0 return $self;
437             }
438              
439             sub removeSpecials {
440 0     0 1 0 my $self = shift;
441             # Remove all keys if no arguments given
442 0   0     0 my @mtypes = (@_ or keys %{$self->{SPECIALS}});
443 0         0 my %returnmtypes;
444 0         0 foreach my $mtype (@mtypes) {
445 0         0 $returnmtypes{"$mtype"} = delete $self->{SPECIALS}->{"$mtype"};
446             }
447 0         0 return %returnmtypes;
448             }
449              
450             sub addFileExts {
451 0     0 1 0 my $self = shift;
452 0         0 my $filepat = shift;
453 0         0 my $mtype = shift;
454 0         0 $self->{FILEEXTS}->{"$filepat"} = $mtype;
455 0         0 return $self;
456             }
457              
458             sub removeFileExts {
459 0     0 1 0 my $self = shift;
460             # Remove all keys if no arguments given
461 0   0     0 my @filepats = (@_ or keys %{$self->{FILEEXTS}});
462 0         0 my %returnfilepats;
463 0         0 foreach my $filepat (@filepats) {
464 0         0 $returnfilepats{"$filepat"} = delete $self->{FILEEXTS}->{"$filepat"};
465             }
466 0         0 return %returnfilepats;
467             }
468              
469             sub addMagicEntry {
470 1     1 1 8 my $self = shift;
471 1         2 my $entry = shift;
472 1 50       6 if ($entry =~ /^>/) {
473 0         0 $entry =~ s/^>//;
474 0         0 my $depth = 1;
475 0         0 my $entref = ${${$self->{magic}}[0]}[2];
  0         0  
  0         0  
476 0         0 while ($entry =~ /^>/) {
477 0         0 $entry =~ s/^>//;
478 0         0 $depth ++;
479 0         0 $entref = ${${$entref}[0]}[2];
  0         0  
  0         0  
480             }
481 0         0 $entry = '>' x $depth . $entry;
482 0         0 unshift @{$entref}, [$entry, -1, []];
  0         0  
483 0         0 return $self;
484             }
485 1         2 unshift @{$self->{magic}}, [$entry, -1, []];
  1         9  
486 1         3 return $self;
487             }
488              
489             sub readMagicHandle {
490 9     9 1 21 my $self = shift;
491 9         19 my $fh = shift;
492 9         36 $self->{MF}->[0] = $fh;
493 9         25 $self->{MF}->[1] = undef;
494 9         25 $self->{MF}->[2] = 0;
495 9         43 readMagicEntry($self->{magic}, $self->{MF});
496             }
497              
498             sub addContainerHook {
499 1     1 0 10 my $self = shift;
500 1         2 my $mtype = shift;
501 1         2 my $funcref = shift;
502 1         6 $self->{chook}->{$mtype} = $funcref;
503             }
504              
505             # Not implimented.
506             #
507             #sub readMagicFile {
508             # my $self = shift;
509             # my $mfile = shift;
510             #}
511              
512             sub checktype_filename {
513 6     6 1 42 my $self = shift;
514              
515             # iterate over each file explicitly so we can seek
516 6         12 my $file = shift;
517              
518             # the description line. append info to this string
519 6         35 my $desc;
520             my $mtype;
521              
522             # 0) check permission
523 6 50       188 if (! -r $file) {
524 0         0 $desc .= " can't read `$file': Permission denied.";
525 0         0 return "x-system/x-error; $desc";
526             }
527              
528             # 1) check for various special files first
529 6 50       629 if ($^O eq 'MSWin32') {
530 0         0 stat($file);
531             } else {
532 6 50       21 if ($followLinks) { stat($file); } else { lstat($file); }
  0         0  
  6         125  
533             }
534 6 50 33     52 if (! -f _ or -z _) {
535 0 0 0     0 if ( $^O ne 'MSWin32' && !$followLinks && -l _ ) {
    0 0        
    0          
    0          
    0          
    0          
    0          
536 0         0 $desc .= " symbolic link to ".readlink($file);
537             }
538 0         0 elsif ( -d _ ) { $desc .= " directory"; }
539 0         0 elsif ( -p _ ) { $desc .= " named pipe"; }
540 0         0 elsif ( -S _ ) { $desc .= " socket"; }
541 0         0 elsif ( -b _ ) { $desc .= " block special file"; }
542 0         0 elsif ( -c _ ) { $desc .= " character special file"; }
543 0         0 elsif ( -z _ ) { $desc .= " empty"; }
544 0         0 else { $desc .= " special"; }
545              
546 0         0 return "x-system/x-unix; $desc";
547             }
548              
549             # current file handle. or undef if checkMagic (-c option) is true.
550 6         12 my $fh;
551              
552             # $fh = new FileHandle "< $file" or die "$F: $file: $!\n" ;
553 6 50       61 $fh = new FileHandle "< $file" or return "x-system/x-error; $file: $!\n" ;
554              
555 6         1380 binmode($fh); # for MSWin32
556              
557             # 2) check for script
558 6 50 33     113 if (-x $file && -T _) {
559              
560             # Note, some magic files include elaborate attempts
561             # to match #! header lines and return pretty responses
562             # but this slows down matching and is unnecessary.
563 0         0 my $line1 = <$fh>;
564 0 0       0 if ($line1 =~ /^\#!\s*(\S+)/) {
565 0         0 $desc .= " executable $1 script text";
566             }
567 0         0 else { $desc .= " commands text"; }
568              
569 0         0 $fh->close();
570              
571 0         0 return "x-system/x-unix; $desc";
572              
573             }
574              
575 6         24 my $out = checktype_filehandle($self, $fh, $desc);
576 6         14 undef $fh;
577              
578 6         160 return $out;
579             }
580              
581             sub checktype_filehandle {
582 6     6 0 8 my $self = shift;
583 6         11 my ($fh, $desc) = @_;
584 6         8 my $mtype;
585              
586 6         11 binmode($fh); # for MSWin32 architecture.
587              
588             # 3) iterate over each magic entry.
589 6         122 my $matchFound = 0;
590 6         9 my $m;
591 6         12 for ($m = 0; $m <= $#{$self->{magic}}; $m++) {
  164         496  
592              
593             # check if the m-th magic entry matches
594             # if it does, then $desc will contain an updated description
595 162 100       375 if (magicMatch($self->{magic}->[$m],\$desc,$fh)) {
596 4 50 33     27 if (defined $desc && $desc ne '') {
597 4         6 $matchFound = 1;
598 4         6 $mtype = $desc;
599 4         10 last;
600             }
601             }
602              
603             # read another entry from the magic file if we've exhausted
604             # all the entries already buffered. readMagicEntry will
605             # add to the end of the array if there are more.
606 158 100 66     210 if ($m == $#{$self->{magic}} && !$self->{MF}->[0]->eof()) {
  158         763  
607 156         1317 readMagicEntry($self->{magic}, $self->{MF});
608             }
609             }
610              
611             # 4) check if it's text or binary.
612             # if it's text, then do a bunch of searching for special tokens
613 6 100       20 if (!$matchFound) {
614 2         3 my $data;
615 2         8 $fh->seek(0,0);
616 2         24 $fh->read($data, 0x8564);
617 2         40 $mtype = checktype_data($self, $data);
618             }
619              
620 6 100       30 $mtype = 'text/plain' if (! defined $mtype);
621              
622 6         18 return $mtype;
623             }
624              
625             sub checktype_contents {
626 1     1 1 9 my $self = shift;
627 1         2 my $data = shift;
628 1         2 my $mtype;
629              
630 1 50       5 return 'application/octet-stream' if (length($data) <= 0);
631              
632 1         4 $mtype = checktype_container($self, $data);
633 1 50       5 return $mtype unless $mtype eq "";
634              
635 1         4 $mtype = checktype_magic($self, $data);
636              
637             # 4) check if it's text or binary.
638             # if it's text, then do a bunch of searching for special tokens
639 1 50       6 if (!defined $mtype) {
640 1         9 $mtype = checktype_data($self, $data);
641             }
642              
643 1 50       5 $mtype = 'text/plain' if (! defined $mtype);
644              
645 1         6 return $mtype;
646             }
647              
648             sub checktype_container {
649 2     2 0 7 my $self = shift;
650 2         4 my $data = shift;
651 2         10 my $href = $self->{chook};
652 2         10 foreach my $mtype (keys %$href) {
653 1         1 my $ret = &{$href->{$mtype}}($self, $data);
  1         4  
654 1 50       11 return $ret if $ret ne "";
655             }
656 1         3 return "";
657             }
658              
659             sub checktype_magic {
660 1     1 1 2 my $self = shift;
661 1         4 my $data = shift;
662 1         2 my $desc;
663             my $mtype;
664              
665 1 50       4 return 'application/octet-stream' if (length($data) <= 0);
666              
667             # 3) iterate over each magic entry.
668 1         1 my $m;
669 1         3 for ($m = 0; $m <= $#{$self->{magic}}; $m++) {
  131         383  
670              
671             # check if the m-th magic entry matches
672             # if it does, then $desc will contain an updated description
673 130 50       290 if (magicMatchStr($self->{magic}->[$m],\$desc,$data)) {
674 0 0 0     0 if (defined $desc && $desc ne '') {
675 0         0 $mtype = $desc;
676 0         0 last;
677             }
678             }
679              
680             # read another entry from the magic file if we've exhausted
681             # all the entries already buffered. readMagicEntry will
682             # add to the end of the array if there are more.
683 130 100 66     180 if ($m == $#{$self->{magic}} && !$self->{MF}->[0]->eof()) {
  130         617  
684 129         1117 readMagicEntry($self->{magic}, $self->{MF});
685             }
686             }
687              
688 1         4 return $mtype;
689             }
690              
691             sub checktype_data {
692 3     3 0 6 my $self = shift;
693 3         8 my $data = shift;
694 3         5 my $mtype;
695              
696 3 50       11 return undef if (length($data) <= 0);
697              
698             # truncate data
699 3         8 $data = substr($data, 0, 0x8564);
700              
701             # at first, check SPECIALS
702             {
703             # in BSD's version, there's an effort to search from
704             # more specific to less, but I don't do that.
705 3         6 my %val;
  3         12  
706 3         4 foreach my $type (keys %{$self->{SPECIALS}}) {
  3         19  
707 12         15 my $matched_pos = undef;
708 12         15 foreach my $token (@{$self->{SPECIALS}->{$type}}){
  12         32  
709 84         147 pos($data) = 0;
710 84 100       1037 if ($data =~ /$token/mg) {
711 3         4 my $tmp = pos($data);
712 3 100 66     16 if ((! defined $matched_pos) || ($matched_pos > $tmp)) {
713 1         2 $matched_pos = $tmp;
714             }
715             }
716             }
717 12 100       38 $val{$type} = $matched_pos if $matched_pos;
718             }
719             # search latest match
720 3 100       16 if (%val) {
721 1         4 my @skeys = sort { $val{$a} <=> $val{$b} } keys %val;
  0         0  
722 1         4 $mtype = $skeys[0];
723             }
724            
725             # $mtype = 'text/plain' if (! defined $mtype);
726             }
727 3 50 66     19 if (! defined $mtype && check_binary($data)) {
728 0         0 $mtype = "application/octet-stream";
729             }
730            
731             # $mtype = 'text/plain' if (! defined $mtype);
732 3         18 return $mtype;
733             }
734              
735             sub checktype_byfilename {
736 0     0 0 0 my $self = shift;
737 0         0 my $fname = shift;
738 0         0 my $type;
739              
740 0         0 $fname =~ s/^.*\///;
741 0         0 for my $regex (keys %{$self->{FILEEXTS}}) {
  0         0  
742 0 0       0 if ($fname =~ /$regex/i) {
743 0 0 0     0 if ((defined $type && $type !~ /;/) || (! defined $type)) {
      0        
744 0         0 $type = $self->{FILEEXTS}->{$regex}; # has no x-type param
745             }
746             }
747             }
748 0 0       0 $type = 'application/octet-stream' unless defined $type;
749 0         0 return $type;
750             }
751              
752             sub check_binary {
753 2     2 0 5 my ($data) = @_;
754 2         5 my $len = length($data);
755 2 50       8 if ($allowEightbit) {
756 2         7 my $count = ($data =~ tr/\x00-\x08\x0b-\x0c\x0e-\x1a\x1c-\x1f//); # exclude TAB, ESC, nl, cr
757 2 50       50 return 1 if ($len <= 0); # no contents
758 2 50       16 return 1 if (($count/$len) > 0.1); # binary
759             } else {
760 0         0 my $count = ($data =~ tr/\x00-\x08\x0b-\x0c\x0e-\x1a\x1c-\x1f\x80-\xff//); # exclude TAB, ESC, nl, cr
761 0 0       0 return 1 if ($len <= 0); # no contents
762 0 0       0 return 1 if (($count/$len) > 0.3); # binary
763             }
764 2         25 return 0;
765             }
766              
767             sub check_magic {
768 0     0 0 0 my $self = shift @_;
769             # read the whole file if we haven't already
770 0         0 while (!$self->{MF}->[0]->eof()) {
771 0         0 readMagicEntry($self->{magic}, $self->{MF});
772             }
773 0         0 dumpMagic($self->{magic});
774             }
775              
776             ####### SUBROUTINES ###########
777              
778             # compare the magic item with the filehandle.
779             # if success, print info and return true. otherwise return undef.
780             #
781             # this is called recursively if an item has subitems.
782             sub magicMatch {
783 162     162 0 202 my ($item, $p_desc, $fh) = @_;
784              
785             # delayed evaluation. if this is our first time considering
786             # this item, then parse out its structure. @$item is just the
787             # raw string, line number, and subtests until we need the real info.
788             # this saves time otherwise wasted parsing unused subtests.
789 162 50       308 if (@$item == 3){
790 162         295 my $tmp = readMagicLine(@$item);
791 162 50       340 return unless defined($tmp);
792 162         1062 @$item = @$tmp;
793             }
794              
795             # $item could be undef if we ran into troubles while reading
796             # the entry.
797 162 50       391 return unless defined($item);
798              
799             # $fh is not be defined if -c. that way we always return
800             # false for every item which allows reading/checking the entire
801             # magic file.
802 162 50       278 return unless defined($fh);
803            
804 162         410 my ($offtype, $offset, $numbytes, $type, $mask, $op, $testval,
805             $template, $message, $subtests) = @$item;
806              
807             # bytes from file
808 162         153 my $data;
809              
810             # set to true if match
811 162         162 my $match = 0;
812              
813             # offset = [ off1, sz, template, off2 ] for indirect offset
814 162 50       336 if ($offtype == 1) {
    50          
815 0         0 my ($off1, $sz, $template, $off2) = @$offset;
816 0 0       0 $fh->seek($off1,0) or return;
817 0 0       0 if ($fh->read($data,$sz) != $sz) { return };
  0         0  
818 0         0 $off2 += unpack($template,$data);
819 0 0       0 $fh->seek($off2,0) or return;
820             }
821             elsif ($offtype == 2) {
822             # relative offsets from previous seek
823 0 0       0 $fh->seek($offset,1) or return;
824             }
825             else {
826             # absolute offset
827 162 50       468 $fh->seek($offset,0) or return;
828             }
829              
830 162 100 66     2547 if ($type =~ /^string/ || $type =~ /^regex/) {
    50          
831             # read the length of the match string unless the
832             # comparison is '>' ($numbytes == 0), in which case
833             # read to the next null or "\n". (that's what BSD's file does)
834 140 50       224 if ($numbytes > 0) {
835 140 100       385 if ($fh->read($data,$numbytes) != $numbytes) { return; }
  8         91  
836             }
837             else {
838 0         0 my $ch = $fh->getc();
839 0   0     0 while (defined($ch) && $ch ne "\0" && $ch ne "\n") {
      0        
840 0         0 $data .= $ch;
841 0         0 $ch = $fh->getc();
842             }
843             }
844              
845             # now do the comparison
846 132 50       79949 if ($op eq '=') {
    0          
    0          
    0          
847 132         227 $match = ($data eq $testval);
848             }
849             elsif ($op eq '<') {
850 0         0 $match = ($data lt $testval);
851             }
852             elsif ($op eq '>') {
853 0         0 $match = ($data gt $testval);
854             }
855             elsif ($op eq 'match') {
856 0 0 0     0 $data = ($data || '') ? $data : '';
857 0         0 $match = $data =~ /$testval/;
858             }
859             # else bogus op, but don't die, just skip
860              
861 132 50       267 if ($checkMagic) {
862 0         0 print STDERR "STRING: $data $op $testval => $match\n";
863             }
864              
865             }
866             elsif ($type =~ /^search\//)
867             {
868             }
869             else {
870             #numeric
871              
872             # read up to 4 bytes
873 22 50       54 if ($fh->read($data,$numbytes) != $numbytes) { return; }
  0         0  
874              
875             # If template is a ref to an array of 3 letters,
876             # then this is an endian
877             # number which must be first unpacked into an unsigned and then
878             # coerced into a signed. Is there a better way?
879 22 50       212 if (ref($template)) {
880 0         0 $data = unpack($$template[2],
881             pack($$template[1],
882             unpack($$template[0],$data)));
883             }
884             else {
885 22         44 $data = unpack($template,$data);
886             }
887              
888             # if mask
889 22 100       45 if (defined($mask)) {
890 6         8 $data &= $mask;
891             }
892              
893             # Now do the check
894 22 100       41 if ($op eq '=') {
    50          
    50          
    50          
    0          
    0          
    0          
895 21         23 $match = ($data == $testval);
896             }
897             elsif ($op eq 'x') {
898 0         0 $match = 1;
899             }
900             elsif ($op eq '!') {
901 0         0 $match = ($data != $testval);
902             }
903             elsif ($op eq '&') {
904 1         2 $match = (($data & $testval) == $testval);
905             }
906             elsif ($op eq '^') {
907 0         0 $match = ((~$data & $testval) == $testval);
908             }
909             elsif ($op eq '<') {
910 0         0 $match = ($data < $testval);
911             }
912             elsif ($op eq '>') {
913 0         0 $match = ($data > $testval);
914             }
915             # else bogus entry that we're ignoring
916              
917 22 50       41 if ($checkMagic) {
918 0         0 print STDERR "NUMERIC: $data $op $testval => $match\n";
919             }
920              
921             }
922              
923 154 100       614 if ($match) {
924             # it's pretty common to find "\b" in the message, but
925             # sprintf doesn't insert a backspace. if it's at the
926             # beginning (typical) then don't include separator space.
927 4 50       18 if ($message =~ s/^\\b//) {
928 0         0 $$p_desc .= sprintf($message,$data);
929             }
930             else {
931             # $$p_desc .= ' ' . sprintf($message,$data) if $message;
932 4 50       26 $$p_desc .= sprintf($message,$data) if $message;
933             }
934              
935 4         6 my $subtest;
936 4         11 foreach $subtest (@$subtests) {
937 0         0 magicMatch($subtest,$p_desc,$fh);
938             }
939              
940 4         17 return 1;
941             }
942            
943             }
944              
945             sub magicMatchStr {
946 130     130 0 186 my ($item, $p_desc, $str) = @_;
947 130         164 my $origstr = $str;
948              
949             # delayed evaluation. if this is our first time considering
950             # this item, then parse out its structure. @$item is just the
951             # raw string, line number, and subtests until we need the real info.
952             # this saves time otherwise wasted parsing unused subtests.
953 130 50       237 if (@$item == 3){
954 130         224 my $tmp = readMagicLine(@$item);
955              
956             # $item could be undef if we ran into troubles while reading
957             # the entry.
958 130 50       264 return unless defined($tmp);
959              
960 130         856 @$item = @$tmp;
961             }
962              
963             # $fh is not be defined if -c. that way we always return
964             # false for every item which allows reading/checking the entire
965             # magic file.
966 130 50       299 return unless defined($str);
967 130 50       239 return if ($str eq '');
968            
969 130         281 my ($offtype, $offset, $numbytes, $type, $mask, $op, $testval,
970             $template, $message, $subtests) = @$item;
971 130 50       222 return unless defined $op;
972              
973             # bytes from file
974 130         137 my $data;
975              
976             # set to true if match
977 130         146 my $match = 0;
978              
979             # offset = [ off1, sz, template, off2 ] for indirect offset
980 130 50       299 if ($offtype == 1) {
    50          
981 0         0 my ($off1, $sz, $template, $off2) = @$offset;
982 0 0       0 return if (length($str) < $off1);
983 0         0 $data = pack("a$sz", $str);
984 0         0 $off2 += unpack($template,$data);
985 0 0       0 return if (length($str) < $off2);
986             }
987             elsif ($offtype == 2) {
988             # can't handle relative offsets from previous seek
989 0         0 return;
990             }
991             else {
992             # absolute offset
993 130 100       276 return if ($offset > length($str));
994 118         190 $str = substr($str, $offset);
995             }
996              
997 118 100 66     407 if ($type =~ /^string/ || $type =~ /^regex/) {
    50          
998             # read the length of the match string unless the
999             # comparison is '>' ($numbytes == 0), in which case
1000             # read to the next null or "\n". (that's what BSD's file does)
1001 97 50       146 if ($numbytes > 0) {
1002 97         244 $data = pack("a$numbytes", $str);
1003             }
1004             else {
1005 0         0 $str =~ /^(.*)\0|$/;
1006 0         0 $data = $1;
1007             }
1008              
1009             # now do the comparison
1010 97 50       158 if ($op eq '=') {
    0          
    0          
    0          
1011 97         126 $match = ($data eq $testval);
1012             }
1013             elsif ($op eq '<') {
1014 0         0 $match = ($data lt $testval);
1015             }
1016             elsif ($op eq '>') {
1017 0         0 $match = ($data gt $testval);
1018             }
1019             elsif ($op eq 'match') {
1020 0   0     0 $match = eval {($data || '') =~ /$testval/};
  0         0  
1021             }
1022              
1023             # else bogus op, but don't die, just skip
1024              
1025 97 50       185 if ($checkMagic) {
1026 0         0 print STDERR "STRING: $data $op $testval => $match\n";
1027             }
1028              
1029             }
1030             elsif ($type =~ /^search\//)
1031             {
1032             }
1033             else {
1034             #numeric
1035              
1036             # read up to 4 bytes
1037 21 50       40 return if (length($str) < 4);
1038 21         52 $data = substr($str, 0, 4);
1039              
1040             # If template is a ref to an array of 3 letters,
1041             # then this is an endian
1042             # number which must be first unpacked into an unsigned and then
1043             # coerced into a signed. Is there a better way?
1044 21 50       37 if (ref($template)) {
1045 0         0 $data = unpack($$template[2],
1046             pack($$template[1],
1047             unpack($$template[0],$data)));
1048             }
1049             else {
1050 21         56 $data = unpack($template,$data);
1051             }
1052              
1053             # if mask
1054 21 100       39 if (defined($mask)) {
1055 6         9 $data &= $mask;
1056             }
1057              
1058             # Now do the check
1059 21 100       41 if ($op eq '=') {
    50          
    50          
    50          
    0          
    0          
    0          
1060 20         30 $match = ($data == $testval);
1061             }
1062             elsif ($op eq 'x') {
1063 0         0 $match = 1;
1064             }
1065             elsif ($op eq '!') {
1066 0         0 $match = ($data != $testval);
1067             }
1068             elsif ($op eq '&') {
1069 1         3 $match = (($data & $testval) == $testval);
1070             }
1071             elsif ($op eq '^') {
1072 0         0 $match = ((~$data & $testval) == $testval);
1073             }
1074             elsif ($op eq '<') {
1075 0         0 $match = ($data < $testval);
1076             }
1077             elsif ($op eq '>') {
1078 0         0 $match = ($data > $testval);
1079             }
1080             # else bogus entry that we're ignoring
1081              
1082 21 50       39 if ($checkMagic) {
1083 0         0 print STDERR "NUMERIC: $data $op $testval => $match\n";
1084             }
1085              
1086             }
1087              
1088 118 50       451 if ($match) {
1089             # it's pretty common to find "\b" in the message, but
1090             # sprintf doesn't insert a backspace. if it's at the
1091             # beginning (typical) then don't include separator space.
1092 0 0       0 if ($message =~ s/^\\b//) {
1093 0         0 $$p_desc .= sprintf($message,$data);
1094             }
1095             else {
1096             # $$p_desc .= ' ' . sprintf($message,$data) if $message;
1097 0 0       0 $$p_desc .= sprintf($message,$data) if $message;
1098             }
1099              
1100 0         0 my $subtest;
1101 0         0 foreach $subtest (@$subtests) {
1102             # finish evaluation when matched.
1103 0         0 magicMatchStr($subtest,$p_desc,$origstr);
1104             }
1105              
1106 0         0 return 1;
1107             }
1108            
1109             }
1110              
1111             # readMagicEntry($pa_magic, $MF, $depth)
1112             #
1113             # reads the next entry from the magic file and stores it as
1114             # a ref to an array at the end of @$pa_magic.
1115             #
1116             # $MF = [ filehandle, last buffered line, line count ]
1117             #
1118             # This is called recursively with increasing $depth to read in sub-clauses
1119             #
1120             # returns the depth of the current buffered line.
1121             #
1122             sub readMagicEntry {
1123 354     354 0 496 my ($pa_magic, $MF, $depth) = @_;
1124              
1125             # for some reason I need a local var because <$$MF[0]> doesn't work.(?)
1126 354         452 my $magicFH = $$MF[0];
1127              
1128             # a ref to an array containing a magic line's components
1129 354         344 my ($entry, $line);
1130              
1131 354         472 $line = $$MF[1]; # buffered last line
1132 354         426 while (1) {
1133 1662 100       3297 $line = '' if (! defined $line);
1134 1662 100 100     7384 if ($line =~ /^\#/ || $line =~ /^\s*$/) {
1135 897 100       2152 last if $magicFH->eof();
1136 895         81485 $line = <$magicFH>;
1137 895         1641 $$MF[2]++;
1138 895         1190 next;
1139             }
1140            
1141 765         1407 my ($thisDepth) = ($line =~ /^(>+)/);
1142 765 100       1661 $thisDepth = '' if (! defined $thisDepth);
1143 765 100       7152 $depth = 0 if (! defined $depth);
1144              
1145 765 100 66     3255 if (length($thisDepth) > $depth) {
    100          
    100          
1146 60         84 $$MF[1] = $line;
1147              
1148             # call ourselves recursively. will return the depth
1149             # of the entry following the nested group.
1150 60 50 100     137 if ((readMagicEntry($entry->[2], $MF, $depth+1) || 0) < $depth ||
      33        
1151             $$MF[0]->eof())
1152             {
1153 0         0 return;
1154             }
1155 60         481 $line = $$MF[1];
1156             }
1157             elsif (length($thisDepth) < $depth) {
1158 28         48 $$MF[1] = $line;
1159 28         300 return length($thisDepth);
1160             }
1161             elsif ('ARRAY' eq ref $entry && @$entry) {
1162             # already have an entry. this is not a continuation.
1163             # save this line for the next call and exit.
1164 323         465 $$MF[1] = $line;
1165 323         1158 return length($thisDepth);
1166             }
1167             else {
1168             # we're here if the number of '>' is the same as the
1169             # current depth and we haven't read a magic line yet.
1170              
1171             # create temp entry
1172             # later -- if we ever get around to evaluating this condition --
1173             # we'll replace @$entry with the results from readMagicLine.
1174 354         927 $entry = [ $line , $$MF[2], [] ];
1175              
1176             # add to list
1177 354         604 push(@$pa_magic,$entry);
1178              
1179             # read the next line
1180 354 100       871 last if $magicFH->eof();
1181 353         3210 $line = <$magicFH>;
1182 353         654 $$MF[2]++;
1183             }
1184             }
1185             }
1186              
1187             # readMagicLine($line, $line_num, $subtests)
1188             #
1189             # parses the match info out of $line. Returns a reference to an array.
1190             #
1191             # Format is:
1192             #
1193             # [ offset, bytes, type, mask, operator, testval, template, sprintf, subtests ]
1194             # 0 1 2 3 4 5 6 7 8
1195             #
1196             # subtests is an array like @$pa_magic.
1197             #
1198             sub readMagicLine {
1199 292     292 0 404 my ($line, $line_num, $subtests) = @_;
1200              
1201 292         334 my ($offtype, $offset, $numbytes, $type, $mask,
1202             $operator, $testval, $template, $message);
1203            
1204             # this would be easier if escaped whitespace wasn't allowed.
1205              
1206             # grab the offset and type. offset can either be a decimal, oct,
1207             # or hex offset or an indirect offset specified in parenthesis
1208             # like (x[.[bsl]][+-][y]), or a relative offset specified by &.
1209             # offtype : 0 = absolute, 1 = indirect, 2 = relative
1210 292 50       1615 if ($line =~ s/^>*([&\(]?[a-fA-Flsx\.\+\-\d]+\)?)\s+(\S+)\s+//) {
1211 292         810 ($offset,$type) = ($1,$2);
1212              
1213 292 50       705 if ($offset =~ /^\(/) {
    50          
1214             # indirect offset.
1215 0         0 $offtype = 1;
1216              
1217             # store as a reference [ offset1 type template offset2 ]
1218              
1219 0         0 my ($o1,$type,$o2);
1220 0 0       0 if (($o1,$type,$o2) = ($offset =~ /\((\d+)(\.[bsl])?([\+\-]?\d+)?\)/))
1221             {
1222 0 0       0 $o1 = oct($o1) if $o1 =~ /^0/o;
1223 0 0       0 $o2 = oct($o2) if $o2 =~ /^0/o;
1224              
1225 0         0 $type =~ s/\.//;
1226 0 0       0 if ($type eq '') { $type = 'l'; } # default to long
  0         0  
1227 0         0 $type =~ tr/b/c/; # type will be template for unpack
1228              
1229 0         0 my $sz = $type; # number of bytes
1230 0         0 $sz =~ tr/csl/124/;
1231              
1232 0         0 $offset = [ $o1,$sz,$type,int($o2) ];
1233             } else {
1234 0         0 warn "Bad indirect offset at line $line_num. '$offset'\n";
1235 0         0 return;
1236             }
1237             }
1238             elsif ($offset =~ /^&/o) {
1239             # relative offset
1240 0         0 $offtype = 2;
1241              
1242 0         0 $offset = substr($offset,1);
1243 0 0       0 $offset = oct($offset) if $offset =~ /^0/o;
1244             }
1245             else {
1246             # normal absolute offset
1247 292         324 $offtype = 0;
1248              
1249             # convert if needed
1250 292 100       1077 $offset = oct($offset) if $offset =~ /^0/o;
1251             }
1252             }
1253             else {
1254 0         0 warn "Bad Offset/Type at line $line_num. '$line'\n";
1255 0         0 return;
1256             }
1257            
1258             # check for & operator on type
1259 292 100       676 if ($type =~ s/&(.*)//) {
1260 12         20 $mask = $1;
1261              
1262             # convert if needed
1263 12 50       39 $mask = oct($mask) if $mask =~ /^0/o;
1264             }
1265            
1266             # check if type is valid
1267 292 0 66     667 if (!exists($TEMPLATES{$type}) && $type !~ /^string/ && $type !~ /^regex/ && $type !~ /^search\//) {
      33        
      0        
1268 0         0 warn "Invalid type '$type' at line $line_num\n";
1269 0         0 return;
1270             }
1271            
1272             # take everything after the first non-escaped space
1273 292 50       1700 if ($line =~ s/([^\\])\s+(.*)/$1/) {
1274 292         488 $message = $2;
1275             }
1276             else {
1277 0         0 warn "Missing or invalid test condition or message at line $line_num\n";
1278 0         0 return;
1279             }
1280            
1281             # remove the return if it's still there
1282 292         833 $line =~ s/\n$//o;
1283              
1284             # get the operator. if 'x', must be alone. default is '='.
1285 292 50       13439 if ($type !~ /regex/)
1286             {
1287 292 100       804 if ($line =~ s/^([><&^=!])//o) {
    50          
1288 2         5 $operator = $1;
1289             }
1290             elsif ($line eq 'x') {
1291 0         0 $operator = 'x';
1292             }
1293 290         402 else { $operator = '='; }
1294             } else {
1295 0         0 $operator = 'match';
1296             }
1297            
1298              
1299 292 100 66     1000 if ($type =~ /^string/ || $type =~ /^regex/)
    50          
1300             {
1301 248         328 $testval = $line;
1302              
1303             # do octal/hex conversion
1304             # manmin
1305 248         303 $testval =~ s/\\x([0-9a-fA-F][0-9a-fA-F])/pack("H2", $1)/eg;
  0         0  
1306 248         462 $testval =~ s/\\([0-7][0-7]?[0-7]?)/chr(oct($1))/eg;
  132         383  
1307             # end manmin
1308              
1309             # do single char escapes
1310 248 50       502 $testval =~ s/\\(.)/$ESC{$1}||$1/eg;
  136         699  
1311              
1312             # put the number of bytes to read in numbytes.
1313             # '0' means read to \0 or \n.
1314 248 50       868 if ($operator =~ /[>x]/o) {
    50          
    0          
    0          
1315 0         0 $numbytes = 0;
1316             }
1317             elsif ($operator =~ /[=<]/o) {
1318 248         307 $numbytes = length($testval);
1319             }
1320             elsif ($operator eq '!') {
1321             # annoying special case. ! operator only applies to numerics so
1322             # put it back.
1323 0         0 $testval = $operator . $testval;
1324 0         0 $numbytes = length($testval);
1325 0         0 $operator = '=';
1326             }
1327             elsif ($operator eq 'match') {
1328 0         0 eval {"" =~ /$testval/}; # Check the regex
  0         0  
1329 0 0       0 warn "Invalid regex at line $line_num - $@\n" if ($@);
1330 0         0 $numbytes = 0;
1331             }
1332             else {
1333             # there's a bug in my magic file where there's
1334             # a line that says "0 string ^!
1335             # file program treats the argument like a numeric. To minimize
1336             # hassles, complain about bad ops only if -c is set.
1337 0 0       0 warn "Invalid operator '$operator' for type 'string' at line $line_num.\n"
1338             if $checkMagic;
1339 0         0 return;
1340             }
1341             }
1342             elsif ($type =~ /^search\//)
1343             {
1344             }
1345             else {
1346             # numeric
1347 44 50       94 if ($operator ne 'x') {
1348             # this conversion is very forgiving. it's faster and
1349             # it doesn't complain about bugs in popular magic files,
1350             # but it will silently turn a string into zero.
1351 44 100       109 if ($line =~ /^0/o) {
1352 40         58 $testval = oct($line);
1353             } else {
1354 4         9 $testval = int($line);
1355             }
1356             }
1357              
1358 44         49 ($template,$numbytes) = @{$TEMPLATES{$type}};
  44         110  
1359              
1360             # unset coercion of $unsigned unless we're doing order comparison
1361 44 100       101 if (ref($template)) {
1362 32 50 33     150 $template = $$template[0]
1363             unless $operator eq '>' || $operator eq '<';
1364             }
1365             }
1366            
1367 292         1199 return [ $offtype, $offset, $numbytes, $type, $mask,
1368             $operator, $testval, $template, $message, $subtests ];
1369             }
1370              
1371             # recursively write the magic file to stderr. Numbers are written
1372             # in decimal.
1373             sub dumpMagic {
1374 0     0 0   my ($magic,$depth) = @_;
1375 0 0         $magic = [] unless defined $magic;
1376 0 0         $depth = 0 unless defined $depth;
1377              
1378 0           my $entry;
1379 0           foreach $entry (@$magic) {
1380             # delayed evaluation.
1381 0 0         if (@$entry == 3){
1382 0           my $tmp = readMagicLine(@$entry);
1383 0 0         next if (! $tmp);
1384 0           @$entry = @$tmp;
1385             }
1386              
1387 0 0         next if !defined($entry);
1388              
1389 0           my ($offtype, $offset, $numbytes, $type, $mask, $op, $testval,
1390             $template, $message, $subtests) = @$entry;
1391              
1392 0           print STDERR '>'x$depth;
1393 0 0         if ($offtype == 1) {
    0          
1394 0           $offset->[2] =~ tr/c/b/;
1395 0           print STDERR "($offset->[0].$offset->[2]$offset->[3])";
1396             }
1397             elsif ($offtype == 2) {
1398 0           print STDERR "&",$offset;
1399             }
1400             else {
1401             # offtype == 0
1402 0           print STDERR $offset;
1403             }
1404 0           print STDERR "\t",$type;
1405 0 0         if ($mask) { print STDERR "&",$mask; }
  0            
1406 0           print STDERR "\t",$op,$testval,"\t",$message,"\n";
1407              
1408 0 0         if ($subtests) {
1409 0           dumpMagic($subtests,$depth+1);
1410             }
1411             }
1412             }
1413              
1414             1;
1415             __DATA__