| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package arXiv::FileGuess; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | =head1 NAME | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | arXiv::FileGuess - Central file type determination for arXiv | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | Central file type identification for arXiv.org. | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | Much of this should probably be replaced with the file program with externally | 
| 12 |  |  |  |  |  |  | maintained magic information. However, we need here to support a number of cases | 
| 13 |  |  |  |  |  |  | special to arXiv and to make some rather fine-grained determinations of TeX | 
| 14 |  |  |  |  |  |  | related file formats, along with determination of the order in which they | 
| 15 |  |  |  |  |  |  | should be processed. | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | =cut | 
| 18 |  |  |  |  |  |  |  | 
| 19 | 1 |  |  | 1 |  | 19175 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 35 |  | 
| 20 |  |  |  |  |  |  |  | 
| 21 | 1 |  |  | 1 |  | 5 | use base qw(Exporter); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 118 |  | 
| 22 |  |  |  |  |  |  | our @EXPORT_OK = qw( guess_file_type is_tex_type type_name ); | 
| 23 |  |  |  |  |  |  |  | 
| 24 | 1 |  |  | 1 |  | 4 | use vars qw(%NAME %TEX_types); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 370 |  | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =head2 INTERNAL VARIABLES | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | =head3 %TEX_types | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | A hash of type names which represent TeX types (i.e. should be processed with | 
| 31 |  |  |  |  |  |  | AutoTeX). Accessed via function is_tex_type(). | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | =cut | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | =head3 %NAME | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | A display name indexed by type. Accessed via function type_name(). | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | =cut | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | BEGIN { | 
| 42 |  |  |  |  |  |  | # Make sure %TEX_types and %NAME full before use | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 1 |  |  | 1 |  | 5 | @TEX_types{qw( | 
| 45 |  |  |  |  |  |  | TYPE_LATEX | 
| 46 |  |  |  |  |  |  | TYPE_TEX | 
| 47 |  |  |  |  |  |  | TYPE_TEX_priority | 
| 48 |  |  |  |  |  |  | TYPE_TEX_AMS | 
| 49 |  |  |  |  |  |  | TYPE_TEX_MAC | 
| 50 |  |  |  |  |  |  | TYPE_LATEX2e | 
| 51 |  |  |  |  |  |  | TYPE_TEX_priority2 | 
| 52 |  |  |  |  |  |  | TYPE_TEXINFO | 
| 53 |  |  |  |  |  |  | TYPE_PDFLATEX | 
| 54 |  |  |  |  |  |  | TYPE_PDFTEX | 
| 55 |  |  |  |  |  |  | )} = (); | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 1 |  |  |  |  | 2 | $NAME{'TYPE_ABORT'} = 'Immediate stop'; | 
| 58 | 1 |  |  |  |  | 2 | $NAME{'TYPE_FAILED'} = 'unknown'; | 
| 59 | 1 |  |  |  |  | 2 | $NAME{'TYPE_ALWAYS_IGNORE'} = 'Always ignore'; | 
| 60 | 1 |  |  |  |  | 1 | $NAME{'TYPE_INPUT'} = 'Input for (La)TeX'; | 
| 61 | 1 |  |  |  |  | 1 | $NAME{'TYPE_BIBTEX'} = 'BiBTeX'; | 
| 62 | 1 |  |  |  |  | 2 | $NAME{'TYPE_POSTSCRIPT'} = 'Postscript'; | 
| 63 | 1 |  |  |  |  | 1 | $NAME{'TYPE_DOS_EPS'} = 'DOS EPS Binary File'; | 
| 64 | 1 |  |  |  |  | 2 | $NAME{'TYPE_PS_FONT'} = 'Postscript Type 1 Font'; | 
| 65 | 1 |  |  |  |  | 1 | $NAME{'TYPE_PS_PC'} = '^D%! Postscript'; | 
| 66 | 1 |  |  |  |  | 1 | $NAME{'TYPE_IMAGE'} = 'Image (gif/jpg etc)'; | 
| 67 | 1 |  |  |  |  | 2 | $NAME{'TYPE_ANIM'} = 'Animation (mpeg etc)'; | 
| 68 | 1 |  |  |  |  | 1 | $NAME{'TYPE_HTML'} = 'HTML'; | 
| 69 | 1 |  |  |  |  | 2 | $NAME{'TYPE_PDF'} = 'PDF'; | 
| 70 | 1 |  |  |  |  | 2 | $NAME{'TYPE_DVI'} = 'DVI'; | 
| 71 | 1 |  |  |  |  | 1 | $NAME{'TYPE_NOTEBOOK'} = 'Mathematica Notebook'; | 
| 72 | 1 |  |  |  |  | 2 | $NAME{'TYPE_ODF'} = 'OpenDocument Format'; | 
| 73 | 1 |  |  |  |  | 1 | $NAME{'TYPE_DOCX'} = 'Microsoft DOCX'; | 
| 74 | 1 |  |  |  |  | 16 | $NAME{'TYPE_TEX'} = 'TEX'; | 
| 75 | 1 |  |  |  |  | 1 | $NAME{'TYPE_PDFTEX'} = 'PDFTEX'; | 
| 76 | 1 |  |  |  |  | 2 | $NAME{'TYPE_TEX_priority2'} = 'TeX (with \\end or \\bye - not starting a line)'; | 
| 77 | 1 |  |  |  |  | 2 | $NAME{'TYPE_TEX_AMS'} = 'AMSTeX'; | 
| 78 | 1 |  |  |  |  | 1 | $NAME{'TYPE_TEX_priority'} = 'TeX (with \\end or \\bye)'; | 
| 79 | 1 |  |  |  |  | 1 | $NAME{'TYPE_TEX_MAC'} = 'TeX +macros (harv,lanl..)'; | 
| 80 | 1 |  |  |  |  | 2 | $NAME{'TYPE_LATEX'} = 'LaTeX'; | 
| 81 | 1 |  |  |  |  | 1 | $NAME{'TYPE_LATEX2e'} = 'LATEX2e'; | 
| 82 | 1 |  |  |  |  | 1 | $NAME{'TYPE_PDFLATEX'} = 'PDFLATEX'; | 
| 83 | 1 |  |  |  |  | 1 | $NAME{'TYPE_TEXINFO'} = 'Texinfo'; | 
| 84 | 1 |  |  |  |  | 2 | $NAME{'TYPE_MF'} = 'Metafont'; | 
| 85 | 1 |  |  |  |  | 2 | $NAME{'TYPE_UUENCODED'} = 'UUencoded'; | 
| 86 | 1 |  |  |  |  | 1 | $NAME{'TYPE_ENCRYPTED'} = 'Encrypted'; | 
| 87 | 1 |  |  |  |  | 2 | $NAME{'TYPE_PC'} = 'PC-ctrl-Ms'; | 
| 88 | 1 |  |  |  |  | 2 | $NAME{'TYPE_MAC'} = 'MAC-ctrl-Ms'; | 
| 89 | 1 |  |  |  |  | 2 | $NAME{'TYPE_CSH'} = 'CSH'; | 
| 90 | 1 |  |  |  |  | 1 | $NAME{'TYPE_SH'} = 'SH'; | 
| 91 | 1 |  |  |  |  | 2 | $NAME{'TYPE_JAR'} = 'JAR archive'; | 
| 92 | 1 |  |  |  |  | 2 | $NAME{'TYPE_RAR'} = 'RAR archive'; | 
| 93 | 1 |  |  |  |  | 1 | $NAME{'TYPE_COMPRESSED'} = 'UNIX-compressed'; | 
| 94 | 1 |  |  |  |  | 2 | $NAME{'TYPE_ZIP'} = 'ZIP-compressed'; | 
| 95 | 1 |  |  |  |  | 1 | $NAME{'TYPE_GZIPPED'} = 'GZIP-compressed'; | 
| 96 | 1 |  |  |  |  | 7 | $NAME{'TYPE_BZIP2'} = 'BZIP2-compressed'; | 
| 97 | 1 |  |  |  |  | 1 | $NAME{'TYPE_MULTI_PART_MIME'} = 'MULTI_PART_MIME'; | 
| 98 | 1 |  |  |  |  | 2 | $NAME{'TYPE_TAR'} = 'TAR archive'; | 
| 99 | 1 |  |  |  |  | 2 | $NAME{'TYPE_IGNORE'} = ' user defined IGNORE'; | 
| 100 | 1 |  |  |  |  | 1 | $NAME{'TYPE_README'} = 'override'; | 
| 101 | 1 |  |  |  |  | 2 | $NAME{'TYPE_TEXAUX'} = 'TeX auxiliary'; | 
| 102 | 1 |  |  |  |  | 1 | $NAME{'TYPE_ABS'} = 'abstract'; | 
| 103 | 1 |  |  |  |  | 1949 | $NAME{'TYPE_INCLUDE'} = ' keep'; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | =head2 SUBROUTINES | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | =head3 guess_file_type($filename) | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | Guess the file type $filename. Returns ($type, $tex_format, | 
| 112 |  |  |  |  |  |  | $error), all of which are strings. The $type may be supplied to | 
| 113 |  |  |  |  |  |  | is_tex_type($type) or type_name($type) for additional information | 
| 114 |  |  |  |  |  |  | related to the file type. | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | For most files $tex_format and $error will be undefined. $tex_format | 
| 117 |  |  |  |  |  |  | will be defined for some TeX file formats where there is additional | 
| 118 |  |  |  |  |  |  | information about the type of TeX. | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | =cut | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | sub guess_file_type { | 
| 123 | 31 |  |  | 31 | 1 | 672 | my ($filename) = @_; | 
| 124 | 31 |  |  |  |  | 43 | local $_ = $filename; | 
| 125 |  |  |  |  |  |  |  | 
| 126 | 31 | 100 |  |  |  | 76 | return 'TYPE_README' if /(^|\/)00README\.XXX$/; | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | # Ignore tmp files created by (unpatched) dvihps, in top dir | 
| 129 | 30 | 50 |  |  |  | 52 | return 'TYPE_ALWAYS_IGNORE' if /(^|\/)(head|body)\.tmp$/; | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | # missfont.log files created in top dir should abort processing | 
| 132 |  |  |  |  |  |  | # (missfont.log files in subdirs can be ignored) | 
| 133 | 30 | 50 |  |  |  | 57 | return 'TYPE_ABORT' if /(^|\/)missfont.log$/; | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 30 | 50 |  |  |  | 128 | return 'TYPE_TEXAUX' if /\.(sty|cls|mf|\d*pk|bbl|bst|tfm|ax|def|log|hrfldf|cfg|clo|inx|end|fgx|tbx|rtx|rty|toc)$/i; | 
| 136 | 30 | 50 |  |  |  | 55 | return 'TYPE_ABS' if /\.abs$/; | 
| 137 | 30 | 50 |  |  |  | 80 | return 'TYPE_IGNORE' if /\.fig$/; # ignore xfig files | 
| 138 | 30 | 50 |  |  |  | 57 | return 'TYPE_NOTEBOOK' if /\.nb$/i; | 
| 139 | 30 | 50 |  |  |  | 56 | return 'TYPE_INPUT' if /\.inp$/i; | 
| 140 |  |  |  |  |  |  |  | 
| 141 | 30 | 50 |  |  |  | 57 | return 'TYPE_HTML' if /\.html?$/i; | 
| 142 | 30 | 50 |  |  |  | 699 | return 'TYPE_ENCRYPTED' if /\.cry$/; | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | # Ignore zero size files | 
| 145 | 30 | 50 |  |  |  | 420 | return 'TYPE_IGNORE' if (-z $filename); | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | # Open file and read first few bytes to do magic sequence identification | 
| 148 |  |  |  |  |  |  | # note that file will be auto-closed when $FILE_TO_GUESS goes out of scope | 
| 149 | 30 | 100 |  |  |  | 862 | open(my $FILE_TO_GUESS, '<', $_) || | 
| 150 |  |  |  |  |  |  | return ('TYPE_FAILED', undef, "failed to open '$filename' to guess its format: $!. Continuing.\n"); | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 28 |  | 50 |  |  | 344 | my $b1 = ord(getc($FILE_TO_GUESS) || 0); | 
| 153 | 28 |  | 50 |  |  | 53 | my $b2 = ord(getc($FILE_TO_GUESS) || 0); | 
| 154 | 28 |  | 50 |  |  | 50 | my $b3 = ord(getc($FILE_TO_GUESS) || 0); | 
| 155 | 28 |  | 50 |  |  | 42 | my $b4 = ord(getc($FILE_TO_GUESS) || 0); | 
| 156 | 28 |  | 50 |  |  | 50 | my $b5 = ord(getc($FILE_TO_GUESS) || 0); | 
| 157 | 28 |  | 50 |  |  | 47 | my $b6 = ord(getc($FILE_TO_GUESS) || 0); | 
| 158 | 28 |  | 50 |  |  | 49 | my $b7 = ord(getc($FILE_TO_GUESS) || 0); | 
| 159 | 28 |  | 50 |  |  | 45 | my $b8 = ord(getc($FILE_TO_GUESS) || 0); | 
| 160 |  |  |  |  |  |  |  | 
| 161 | 28 | 50 | 33 |  |  | 67 | return 'TYPE_COMPRESSED' if $b1 == 037 && $b2 == 0235; | 
| 162 | 28 | 50 | 33 |  |  | 52 | return 'TYPE_GZIPPED'    if $b1 == 037 && $b2 == 0213; | 
| 163 | 28 | 50 | 66 |  |  | 123 | return 'TYPE_BZIP2'      if $b1 == 0x42 && $b2 == 0x5A && $b3 == 0x68 && $b4 > 0x2F; | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | # POSIX tarfiles: look for the string 'ustar' at posn 257 | 
| 166 |  |  |  |  |  |  | # (There used to be additional code to detect non-POSIX tar files | 
| 167 |  |  |  |  |  |  | # which is not detected with above, no longer necessary) | 
| 168 | 25 |  |  |  |  | 20 | my $tar_test; | 
| 169 | 25 |  |  |  |  | 161 | seek($FILE_TO_GUESS, 257, 0); | 
| 170 | 25 | 50 | 66 |  |  | 220 | if ( read($FILE_TO_GUESS, $tar_test, 5) && $tar_test eq 'ustar' ) { | 
| 171 | 0 |  |  |  |  | 0 | return 'TYPE_TAR'; | 
| 172 |  |  |  |  |  |  | } | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | # DVI | 
| 175 | 25 | 100 | 66 |  |  | 76 | return 'TYPE_DVI'        if $b1 == oct(367) && $b2 == oct(2); | 
| 176 |  |  |  |  |  |  | # GIF | 
| 177 | 24 | 50 |  |  |  | 79 | return 'TYPE_IMAGE'      if (sprintf('%c%c%c%c',$b1,$b2,$b3,$b4) eq 'GIF8'); | 
| 178 |  |  |  |  |  |  | # PNG | 
| 179 | 24 | 50 |  |  |  | 88 | return 'TYPE_IMAGE'      if sprintf('%c%c%c%c%c%c%c%c',$b1,$b2,$b3,$b4,$b5,$b6,$b7,$b8) eq "\211PNG\r\n\032\n"; | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | # TIF | 
| 182 | 24 | 0 | 33 |  |  | 47 | return 'TYPE_IMAGE'      if $b1 == 0115 && $b2 == 0115 && $filename =~ /\.tif/i; | 
|  |  |  | 33 |  |  |  |  | 
| 183 |  |  |  |  |  |  | # JPEG | 
| 184 | 24 | 0 | 33 |  |  | 42 | return 'TYPE_IMAGE'      if ($b1==0377 && $b2==0330 && $b3==0377 && ($b4==0340 || $b4==0356)); | 
|  |  |  | 33 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | # MPEG | 
| 187 | 24 | 50 | 66 |  |  | 49 | return 'TYPE_ANIM'       if $b1 == 0 && $b2 == 0 && $b3 == 01 && $b4 == 0263; | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 24 | 100 | 66 |  |  | 110 | if (("PK\003\004" eq  sprintf('%c%c%c%c', $b1,$b2,$b3,$b4)) or | 
| 190 |  |  |  |  |  |  | ("PK00PK\003\004" eq sprintf('%c%c%c%c%c%c%c%c', $b1,$b2,$b3,$b4,$b5,$b6,$b7,$b8))) { | 
| 191 | 7 | 100 |  |  |  | 40 | return 'TYPE_JAR' if ($filename =~ /\.jar$/i); | 
| 192 | 6 | 100 |  |  |  | 27 | return 'TYPE_ODF' if ($filename =~ /\.odt$/i); | 
| 193 | 5 | 100 |  |  |  | 44 | return 'TYPE_DOCX' if ($filename =~ /\.docx$/i); | 
| 194 | 3 |  |  |  |  | 55 | return 'TYPE_ZIP' | 
| 195 |  |  |  |  |  |  | } | 
| 196 | 17 | 100 |  |  |  | 63 | return 'TYPE_RAR' if ('Rar!' eq sprintf('%c%c%c%c', $b1,$b2,$b3,$b4)); | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | #:0  belong          0xC5D0D3C6      DOS EPS Binary File | 
| 199 |  |  |  |  |  |  | #->4 long            >0              Postscript starts at byte %d | 
| 200 | 16 | 50 | 66 |  |  | 80 | return 'TYPE_DOS_EPS' if $b1 == oct(305) && $b2 == oct(320) && $b3 == oct(323) && $b4 == oct(306); | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 201 |  |  |  |  |  |  |  | 
| 202 | 14 |  |  |  |  | 14 | my $one_kb; | 
| 203 | 14 |  |  |  |  | 78 | seek($FILE_TO_GUESS, 0, 0); | 
| 204 | 14 |  |  |  |  | 85 | read($FILE_TO_GUESS, $one_kb, 1024); | 
| 205 | 14 | 50 |  |  |  | 42 | return 'TYPE_PDF' if index($one_kb, '%PDF-') >= 0; | 
| 206 | 14 | 50 |  |  |  | 1667 | return 'TYPE_MAC' if $one_kb =~ /#!\/bin\/csh -f\r#|(\r|^)begin \d{1,4}\s+\S.*\r[^\n]/; | 
| 207 |  |  |  |  |  |  |  | 
| 208 | 14 |  |  |  |  | 13 | my ($maybe_tex, $maybe_tex_priority, $maybe_tex_priority2); | 
| 209 | 14 |  |  |  |  | 72 | seek($FILE_TO_GUESS, 0, 0); | 
| 210 |  |  |  |  |  |  |  | 
| 211 | 14 |  |  |  |  | 53 | local $/ = "\n"; | 
| 212 | 14 |  |  |  |  | 16 | my $accum=''; | 
| 213 | 14 |  |  |  |  | 170 | while (<$FILE_TO_GUESS>) { | 
| 214 | 40 | 50 | 33 |  |  | 92 | if (/\%auto-ignore/ && $. <= 10) { | 
| 215 | 0 |  |  |  |  | 0 | return 'TYPE_IGNORE'; | 
| 216 |  |  |  |  |  |  | } | 
| 217 | 40 | 50 | 33 |  |  | 157 | if ($. <= 10 && /\\input texinfo/) { | 
| 218 | 0 |  |  |  |  | 0 | return 'TYPE_TEXINFO'; | 
| 219 |  |  |  |  |  |  | } | 
| 220 | 40 | 50 | 33 |  |  | 1398 | if ($. <= 40 && /(^|\r)Content-type: /i ) { | 
| 221 | 0 |  |  |  |  | 0 | return 'TYPE_MULTI_PART_MIME'; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | # Match strings starting at either 1st or 7th byte. Use $accum | 
| 225 |  |  |  |  |  |  | # to build string of file to this point as the preceding 6 chars | 
| 226 |  |  |  |  |  |  | # may include \n | 
| 227 | 40 |  |  |  |  | 70 | $accum.=$_; | 
| 228 | 40 | 100 | 100 |  |  | 150 | if ($. <= 7 && $accum=~/^(......)?%\!(PS-AdobeFont-1\.|FontType1|PS-Adobe-3\.0\ Resource-Font)/s) { | 
| 229 | 4 |  |  |  |  | 76 | return 'TYPE_PS_FONT'; | 
| 230 |  |  |  |  |  |  | } | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | # This must come after the test for TYPE_PS_FONT | 
| 233 | 36 | 50 | 66 |  |  | 91 | if ($. == 1 && /^%\!/) { | 
| 234 | 0 |  |  |  |  | 0 | return 'TYPE_POSTSCRIPT'; | 
| 235 |  |  |  |  |  |  | } | 
| 236 |  |  |  |  |  |  |  | 
| 237 | 36 | 50 | 33 |  |  | 258 | if (($. == 1 && (/^\%*\004%\!/ || /.*%\!PS-Adobe/)) | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 238 |  |  |  |  |  |  | || ($. <= 10 && /^%\!PS/ && !$maybe_tex)) { | 
| 239 | 0 |  |  |  |  | 0 | return 'TYPE_PS_PC'; | 
| 240 |  |  |  |  |  |  | } | 
| 241 |  |  |  |  |  |  |  | 
| 242 | 36 | 50 | 33 |  |  | 139 | if ($. <= 12 && /^\r?%\&([^\s\n]+)/) { | 
| 243 | 0 | 0 | 0 |  |  | 0 | if ($1 eq 'latex209' || $1 eq 'biglatex' || | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 244 |  |  |  |  |  |  | $1 eq 'latex' || $1 eq 'LaTeX') { | 
| 245 | 0 |  |  |  |  | 0 | return ('TYPE_LATEX', $1); | 
| 246 |  |  |  |  |  |  | } else { | 
| 247 | 0 |  |  |  |  | 0 | return ('TYPE_TEX_MAC', $1); | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  | } | 
| 250 | 36 | 50 | 33 |  |  | 171 | if ($. <= 10 && /\s]/i) { | 
| 251 | 0 |  |  |  |  | 0 | return 'TYPE_HTML'; | 
| 252 |  |  |  |  |  |  | } | 
| 253 | 36 | 50 | 33 |  |  | 133 | if ($. <= 10 && /\%auto-include/) { | 
| 254 | 0 |  |  |  |  | 0 | return 'TYPE_INCLUDE'; | 
| 255 |  |  |  |  |  |  | } | 
| 256 |  |  |  |  |  |  | # All subsequent checks have lines with '%' in them chopped. | 
| 257 |  |  |  |  |  |  | #  if we need to look for a % then do it earlier! | 
| 258 | 36 |  |  |  |  | 104 | s/\%[^\r]*//; | 
| 259 | 36 | 100 |  |  |  | 73 | if (/(^|\r)\s*\\documentstyle/) { | 
| 260 | 1 |  |  |  |  | 19 | return 'TYPE_LATEX'; | 
| 261 |  |  |  |  |  |  | } | 
| 262 | 35 | 100 |  |  |  | 72 | if (/(^|\r)\s*\\documentclass/) { | 
| 263 | 8 |  |  |  |  | 10 | return _type_of_latex2e(\*{$FILE_TO_GUESS}); | 
|  | 8 |  |  |  |  | 20 |  | 
| 264 |  |  |  |  |  |  | } | 
| 265 | 27 | 50 |  |  |  | 57 | if (/(^|\r)\s*(\\font|\\magnification|\\input|\\def|\\special|\\baselineskip|\\begin)/) { | 
| 266 | 0 |  |  |  |  | 0 | $maybe_tex = 1; | 
| 267 | 0 | 0 |  |  |  | 0 | return 'TYPE_TEX_priority' if /\\input\s+amstex/; | 
| 268 |  |  |  |  |  |  | } | 
| 269 | 27 | 50 |  |  |  | 54 | if (/(^|\r)\s*\\(end|bye)(\s|$)/) { | 
| 270 | 0 |  |  |  |  | 0 | $maybe_tex_priority = 1; | 
| 271 |  |  |  |  |  |  | } | 
| 272 | 27 | 50 |  |  |  | 64 | if (/\\(end|bye)(\s|$)/) { | 
| 273 | 0 |  |  |  |  | 0 | $maybe_tex_priority2 = 1; | 
| 274 |  |  |  |  |  |  | } | 
| 275 | 27 | 50 | 33 |  |  | 102 | if (/\\input *(harv|lanl)mac/ || /\\input\s+phyzzx/) { | 
| 276 | 0 |  |  |  |  | 0 | return 'TYPE_TEX_MAC'; | 
| 277 |  |  |  |  |  |  | } | 
| 278 | 27 | 50 |  |  |  | 52 | if (/beginchar\(/) { | 
| 279 | 0 |  |  |  |  | 0 | return 'TYPE_MF'; | 
| 280 |  |  |  |  |  |  | } | 
| 281 | 27 | 50 |  |  |  | 52 | if (/(^|\r)\@(book|article|inbook|unpublished)\{/i) { | 
| 282 | 0 |  |  |  |  | 0 | return 'TYPE_BIBTEX'; | 
| 283 |  |  |  |  |  |  | } | 
| 284 | 27 | 50 |  |  |  | 37 | if (/^begin \d{1,4}\s+[^\s]+\r?$/) { | 
| 285 | 0 | 0 |  |  |  | 0 | return 'TYPE_TEX_priority' if $maybe_tex_priority; | 
| 286 | 0 | 0 |  |  |  | 0 | return 'TYPE_TEX' if $maybe_tex; | 
| 287 | 0 | 0 |  |  |  | 0 | return 'TYPE_PC' if /\r$/; | 
| 288 | 0 |  |  |  |  | 0 | return 'TYPE_UUENCODED'; | 
| 289 |  |  |  |  |  |  | } | 
| 290 | 27 | 50 |  |  |  | 98 | if (m/paper deliberately replaced by what little/) { | 
| 291 | 0 |  |  |  |  | 0 | return 'TYPE_ALWAYS_IGNORE'; # Was 'TYPE_FAILED' | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  | } | 
| 294 | 1 | 50 |  |  |  | 12 | close $FILE_TO_GUESS || warn "couldn't close file: $!"; | 
| 295 |  |  |  |  |  |  |  | 
| 296 | 1 | 50 |  |  |  | 4 | return 'TYPE_TEX_priority' if $maybe_tex_priority; | 
| 297 | 1 | 50 |  |  |  | 3 | return 'TYPE_TEX_priority2' if $maybe_tex_priority2; | 
| 298 | 1 | 50 |  |  |  | 4 | return 'TYPE_TEX' if $maybe_tex; | 
| 299 |  |  |  |  |  |  |  | 
| 300 | 1 |  |  |  |  | 9 | return 'TYPE_FAILED'; | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | # _type_of_latex2e($filehandle) | 
| 305 |  |  |  |  |  |  | # | 
| 306 |  |  |  |  |  |  | # Takes an open file handle and searches for some heuristic regexps to | 
| 307 |  |  |  |  |  |  | # determine whether the contents is of type pdflatex or regular latex2e. | 
| 308 |  |  |  |  |  |  | # | 
| 309 |  |  |  |  |  |  | sub _type_of_latex2e { | 
| 310 | 8 |  |  | 8 |  | 9 | my ($filehandle) = @_; | 
| 311 | 8 |  |  |  |  | 11 | my $lines_to_search=$.+5; #will search for pdfoutput from beginning to 5 lines beyond | 
| 312 | 8 | 50 |  |  |  | 60 | if (seek($filehandle,0,0)) { | 
| 313 | 8 |  |  |  |  | 26 | local $. = 0; #reset line counter if rewind worked | 
| 314 |  |  |  |  |  |  | } | 
| 315 |  |  |  |  |  |  |  | 
| 316 | 8 |  |  |  |  | 56 | while (<$filehandle>) { | 
| 317 | 295 | 100 | 100 |  |  | 1392 | if (/^[^%]*\\includegraphics[^%]*\.(?:pdf|png|gif|jpg)\s?\}/i || | 
|  |  |  | 66 |  |  |  |  | 
| 318 |  |  |  |  |  |  | ($. < $lines_to_search && /^[^%]*\\pdfoutput(?:\s+)?=(?:\s+)?1/)) { | 
| 319 | 5 |  |  |  |  | 39 | return 'TYPE_PDFLATEX'; | 
| 320 |  |  |  |  |  |  | } | 
| 321 |  |  |  |  |  |  | } | 
| 322 | 3 |  |  |  |  | 25 | return 'TYPE_LATEX2e'; | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | =head3 is_tex_type($type) | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | Returns true (1) if the type name supplied is a TeX type, false ('') | 
| 329 |  |  |  |  |  |  | otherwise. | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | =cut | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | sub is_tex_type { | 
| 334 | 4 |  |  | 4 | 1 | 7 | my ($type) = @_; | 
| 335 | 4 |  |  |  |  | 18 | return exists $TEX_types{$type}; | 
| 336 |  |  |  |  |  |  | } | 
| 337 |  |  |  |  |  |  |  | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | =head3 type_name($type) | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | Returns display string for the type $type. Or 'unknown' if | 
| 342 |  |  |  |  |  |  | $type is not recognized. | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | =cut | 
| 345 |  |  |  |  |  |  |  | 
| 346 |  |  |  |  |  |  | sub type_name { | 
| 347 | 4 |  |  | 4 | 1 | 1819 | my ($type)=@_; | 
| 348 | 4 |  | 100 |  |  | 40 | return( $NAME{$type} || 'unknown' ); | 
| 349 |  |  |  |  |  |  | } | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | 1; | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | __END__ |