File Coverage

blib/lib/Image/ExifTool/PostScript.pm
Criterion Covered Total %
statement 197 318 61.9
branch 126 242 52.0
condition 21 66 31.8
subroutine 12 14 85.7
pod 0 10 0.0
total 356 650 54.7


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: PostScript.pm
3             #
4             # Description: Read PostScript meta information
5             #
6             # Revisions: 07/08/2005 - P. Harvey Created
7             #
8             # References: 1) http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
9             # 2) http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf
10             # 3) http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf
11             #------------------------------------------------------------------------------
12              
13             package Image::ExifTool::PostScript;
14              
15 29     29   7054 use strict;
  29         77  
  29         1655  
16 29     29   323 use vars qw($VERSION $AUTOLOAD);
  29         61  
  29         2092  
17 29     29   237 use Image::ExifTool qw(:DataAccess :Utils);
  29         66  
  29         182239  
18              
19             $VERSION = '1.46';
20              
21             sub WritePS($$);
22             sub ProcessPS($$;$);
23              
24             # PostScript tag table
25             %Image::ExifTool::PostScript::Main = (
26             PROCESS_PROC => \&ProcessPS,
27             WRITE_PROC => \&WritePS,
28             PREFERRED => 1, # always add these tags when writing
29             GROUPS => { 2 => 'Image' },
30             # Note: Make all of these tags priority 0 since the first one found at
31             # the start of the file should take priority (in case multiples exist)
32             Author => { Priority => 0, Groups => { 2 => 'Author' }, Writable => 'string' },
33             BoundingBox => { Priority => 0 },
34             Copyright => { Priority => 0, Writable => 'string' }, #2
35             CreationDate => {
36             Name => 'CreateDate',
37             Priority => 0,
38             Groups => { 2 => 'Time' },
39             Writable => 'string',
40             PrintConv => '$self->ConvertDateTime($val)',
41             PrintConvInv => '$self->InverseDateTime($val)',
42             },
43             Creator => { Priority => 0, Writable => 'string' },
44             ImageData => { Priority => 0 },
45             For => { Priority => 0, Writable => 'string', Notes => 'for whom the document was prepared'},
46             Keywords => { Priority => 0, Writable => 'string' },
47             ModDate => {
48             Name => 'ModifyDate',
49             Priority => 0,
50             Groups => { 2 => 'Time' },
51             Writable => 'string',
52             PrintConv => '$self->ConvertDateTime($val)',
53             PrintConvInv => '$self->InverseDateTime($val)',
54             },
55             Pages => { Priority => 0 },
56             Routing => { Priority => 0, Writable => 'string' }, #2
57             Subject => { Priority => 0, Writable => 'string' },
58             Title => { Priority => 0, Writable => 'string' },
59             Version => { Priority => 0, Writable => 'string' }, #2
60             # these subdirectories for documentation only
61             BeginPhotoshop => {
62             Name => 'PhotoshopData',
63             SubDirectory => {
64             TagTable => 'Image::ExifTool::Photoshop::Main',
65             },
66             },
67             BeginICCProfile => {
68             Name => 'ICC_Profile',
69             SubDirectory => {
70             TagTable => 'Image::ExifTool::ICC_Profile::Main',
71             },
72             },
73             begin_xml_packet => {
74             Name => 'XMP',
75             SubDirectory => {
76             TagTable => 'Image::ExifTool::XMP::Main',
77             },
78             },
79             TIFFPreview => {
80             Groups => { 2 => 'Preview' },
81             Binary => 1,
82             Notes => q{
83             not a real tag ID, but used to represent the TIFF preview extracted from DOS
84             EPS images
85             },
86             },
87             BeginDocument => {
88             Name => 'EmbeddedFile',
89             SubDirectory => {
90             TagTable => 'Image::ExifTool::PostScript::Main',
91             },
92             Notes => 'extracted with L option',
93             },
94             EmbeddedFileName => {
95             Notes => q{
96             not a real tag ID, but the file name from a BeginDocument statement.
97             Extracted with document metadata when L option is used
98             },
99             },
100             # AI metadata (most with a single leading '%')
101             AI9_ColorModel => {
102             Name => 'AIColorModel',
103             PrintConv => {
104             1 => 'RGB',
105             2 => 'CMYK',
106             },
107             },
108             AI3_ColorUsage => { Name => 'AIColorUsage' },
109             AI5_RulerUnits => {
110             Name => 'AIRulerUnits',
111             PrintConv => {
112             0 => 'Inches',
113             1 => 'Millimeters',
114             2 => 'Points',
115             3 => 'Picas',
116             4 => 'Centimeters',
117             6 => 'Pixels',
118             },
119             },
120             AI5_TargetResolution => { Name => 'AITargetResolution' },
121             AI5_NumLayers => { Name => 'AINumLayers' },
122             AI5_FileFormat => { Name => 'AIFileFormat' },
123             AI8_CreatorVersion => { Name => 'AICreatorVersion' }, # (double leading '%')
124             AI12_BuildNumber => { Name => 'AIBuildNumber' },
125             );
126              
127             # composite tags
128             %Image::ExifTool::PostScript::Composite = (
129             GROUPS => { 2 => 'Image' },
130             # BoundingBox is in points, not pixels,
131             # but use it anyway if ImageData is not available
132             ImageWidth => {
133             Desire => {
134             0 => 'Main:PostScript:ImageData',
135             1 => 'PostScript:BoundingBox',
136             },
137             ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 0)',
138             },
139             ImageHeight => {
140             Desire => {
141             0 => 'Main:PostScript:ImageData',
142             1 => 'PostScript:BoundingBox',
143             },
144             ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 1)',
145             },
146             );
147              
148             # add our composite tags
149             Image::ExifTool::AddCompositeTags('Image::ExifTool::PostScript');
150              
151             #------------------------------------------------------------------------------
152             # AutoLoad our writer routines when necessary
153             #
154             sub AUTOLOAD
155             {
156 19     19   164 return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_);
157             }
158              
159             #------------------------------------------------------------------------------
160             # Get image width or height
161             # Inputs: 0) value list ref (ImageData, BoundingBox), 1) true to get height
162             sub ImageSize($$)
163             {
164 5     5 0 13 my ($vals, $getHeight) = @_;
165 5         9 my ($w, $h);
166 5 50 33     47 if ($$vals[0] and $$vals[0] =~ /^(\d+) (\d+)/) {
    0 0        
167 5         44 ($w, $h) = ($1, $2);
168             } elsif ($$vals[1] and $$vals[1] =~ /^(\d+) (\d+) (\d+) (\d+)/) {
169 0         0 ($w, $h) = ($3 - $1, $4 - $2);
170             }
171 5 100       78 return $getHeight ? $h : $w;
172             }
173              
174             #------------------------------------------------------------------------------
175             # Set PostScript format error warning
176             # Inputs: 0) ExifTool object reference, 1) error string
177             # Returns: 1
178             sub PSErr($$)
179             {
180 0     0 0 0 my ($et, $str) = @_;
181             # set file type if not done already
182 0         0 my $ext = $$et{FILE_EXT};
183 0 0 0     0 $et->SetFileType(($ext and $ext eq 'AI') ? 'AI' : 'PS');
184 0         0 $et->Warn("PostScript format error ($str)");
185 0         0 return 1;
186             }
187              
188             #------------------------------------------------------------------------------
189             # Return input record separator to use for the specified file
190             # Inputs: 0) RAF reference
191             # Returns: Input record separator or undef on error
192             sub GetInputRecordSeparator($)
193             {
194 8     8 0 17 my $raf = shift;
195 8         30 my $pos = $raf->Tell(); # save current position
196 8         18 my ($data, $sep);
197 8 50       31 $raf->Read($data,256) or return undef;
198 8         24 my ($a, $d) = (999,999);
199 8 100       56 $a = pos($data), pos($data) = 0 if $data =~ /\x0a/g;
200 8 100       42 $d = pos($data) if $data =~ /\x0d/g;
201 8         30 my $diff = $a - $d;
202 8 50       69 if ($diff == 1) {
    50          
    100          
    100          
203 0         0 $sep = "\x0d\x0a";
204             } elsif ($diff == -1) {
205 0         0 $sep = "\x0a\x0d";
206             } elsif ($diff > 0) {
207 3         9 $sep = "\x0d";
208             } elsif ($diff < 0) {
209 4         12 $sep = "\x0a";
210             } # else error
211 8         47 $raf->Seek($pos, 0); # restore original position
212 8         74 return $sep;
213             }
214              
215             #------------------------------------------------------------------------------
216             # Split into lines ending in any CR, LF or CR+LF combination
217             # (this is annoying, and could be avoided if EPS files didn't mix linefeeds!)
218             # Inputs: 0) data pointer, 1) reference to lines array
219             # Notes: Fills @$lines with lines from splitting $$dataPt
220             sub SplitLine($$)
221             {
222 1     1 0 2 my ($dataPt, $lines) = @_;
223 1         3 for (;;) {
224 108         97 my $endl;
225             # find the position of the first LF (\x0a)
226 108 100       171 $endl = pos($$dataPt), pos($$dataPt) = 0 if $$dataPt =~ /\x0a/g;
227 108 50       172 if ($$dataPt =~ /\x0d/g) { # find the first CR (\x0d)
    0          
228 108 100       111 if (defined $endl) {
229             # (remember, CR+LF is a DOS newline...)
230 107 50       126 $endl = pos($$dataPt) if pos($$dataPt) < $endl - 1;
231             } else {
232 1         2 $endl = pos($$dataPt);
233             }
234             } elsif (not defined $endl) {
235 0         0 push @$lines, $$dataPt;
236 0         0 last;
237             }
238 108 100       124 if (length $$dataPt == $endl) {
239 1         3 push @$lines, $$dataPt;
240 1         3 last;
241             } else {
242             # continue to split into separate lines
243 107         141 push @$lines, substr($$dataPt, 0, $endl);
244 107         167 $$dataPt = substr($$dataPt, $endl);
245             }
246             }
247             }
248              
249             #------------------------------------------------------------------------------
250             # check to be sure we haven't read past end of PS data in DOS-style file
251             # Inputs: 0) RAF ref (with PSEnd member), 1) data ref
252             # - modifies data and sets RAF to EOF if end of PS is reached
253             sub CheckPSEnd($$)
254             {
255 0     0 0 0 my ($raf, $dataPt) = @_;
256 0         0 my $pos = $raf->Tell();
257 0 0       0 if ($pos >= $$raf{PSEnd}) {
258 0         0 $raf->Seek(0, 2); # seek to end of file so we can't read any more
259 0 0       0 $$dataPt = substr($$dataPt, 0, length($$dataPt) - $pos + $$raf{PSEnd}) if $pos > $$raf{PSEnd};
260             }
261             }
262              
263             #------------------------------------------------------------------------------
264             # Read next line from EPS file
265             # Inputs: 0) RAF ref (with PSEnd member if Postscript ends before end of file)
266             # 1) array of lines from file
267             # Returns: true on success
268             sub GetNextLine($$)
269             {
270 114     114 0 246 my ($raf, $lines) = @_;
271 114         226 my ($data, $changedNL);
272 114 100       350 my $altnl = ($/ eq "\x0d") ? "\x0a" : "\x0d";
273 114         169 for (;;) {
274 114 50       390 $raf->ReadLine($data) or last;
275 114 50       265 $$raf{PSEnd} and CheckPSEnd($raf, \$data);
276             # split line if it contains other newline sequences
277 114 100       505 if ($data =~ /$altnl/) {
278 1 50 33     7 if (length($data) > 500000 and Image::ExifTool::IsPC()) {
279             # patch for Windows memory problem
280 0 0       0 unless ($changedNL) {
281 0         0 $changedNL = $/;
282 0         0 $/ = $altnl;
283 0         0 $altnl = $changedNL;
284 0         0 $raf->Seek(-length($data), 1);
285 0         0 next;
286             }
287             } else {
288             # split into separate lines
289             # push @$lines, split /$altnl/, $data, -1;
290             # if (@$lines == 2 and $$lines[1] eq $/) {
291             # # handle case of DOS newline data inside file using Unix newlines
292             # $$lines[0] .= pop @$lines;
293             # }
294             # split into separate lines if necessary
295 1         5 SplitLine(\$data, $lines);
296             }
297             } else {
298 113         378 push @$lines, $data;
299             }
300 114 50       332 $/ = $changedNL if $changedNL;
301 114         447 return 1;
302             }
303 0         0 return 0;
304             }
305              
306             #------------------------------------------------------------------------------
307             # Decode comment from PostScript file
308             # Inputs: 0) comment string, 1) RAF ref, 2) reference to lines array
309             # 3) optional data reference for extra lines read from file
310             # Returns: Decoded comment string (may be an array reference)
311             # - handles multi-line comments and escape sequences
312             sub DecodeComment($$$;$)
313             {
314 31     31 0 73 my ($val, $raf, $lines, $dataPt) = @_;
315 31         286 $val =~ s/\x0d*\x0a*$//; # remove trailing CR, LF or CR/LF
316             # check for continuation comments
317 31         52 for (;;) {
318 31 50 33     111 @$lines or GetNextLine($raf, $lines) or last;
319 31 50       90 last unless $$lines[0] =~ /^%%\+/; # is the next line a continuation?
320 0 0       0 $$dataPt .= $$lines[0] if $dataPt; # add to data if necessary
321 0         0 $$lines[0] =~ s/\x0d*\x0a*$//; # remove trailing CR, LF or CR/LF
322 0         0 $val .= substr(shift(@$lines), 3); # add to value (without leading "%%+")
323             }
324 31         46 my @vals;
325             # handle bracketed string values
326 31 100       108 if ($val =~ s/^\((.*)\)$/$1/) { # remove brackets if necessary
327             # split into an array of strings if necessary
328 3         5 my $nesting = 1;
329 3         42 while ($val =~ /(\(|\))/g) {
330 0         0 my $bra = $1;
331 0         0 my $pos = pos($val) - 2;
332 0         0 my $backslashes = 0;
333 0   0     0 while ($pos and substr($val, $pos, 1) eq '\\') {
334 0         0 --$pos;
335 0         0 ++$backslashes;
336             }
337 0 0       0 next if $backslashes & 0x01; # escaped if odd number
338 0 0       0 if ($bra eq '(') {
339 0         0 ++$nesting;
340             } else {
341 0         0 --$nesting;
342 0 0       0 unless ($nesting) {
343 0         0 push @vals, substr($val, 0, pos($val)-1);
344 0         0 $val = substr($val, pos($val));
345 0 0       0 ++$nesting if $val =~ s/\s*\(//;
346             }
347             }
348             }
349 3         8 push @vals, $val;
350 3         7 foreach $val (@vals) {
351             # decode escape sequences in bracketed strings
352             # (similar to code in PDF.pm, but without line continuation)
353 3         11 while ($val =~ /\\(.)/sg) {
354 0         0 my $n = pos($val) - 2;
355 0         0 my $c = $1;
356 0         0 my $r;
357 0 0       0 if ($c =~ /[0-7]/) {
358             # get up to 2 more octal digits
359 0 0       0 $c .= $1 if $val =~ /\G([0-7]{1,2})/g;
360             # convert octal escape code
361 0         0 $r = chr(oct($c) & 0xff);
362             } else {
363             # convert escaped characters
364 0         0 ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
365             }
366 0         0 substr($val, $n, length($c)+1) = $r;
367             # continue search after this character
368 0         0 pos($val) = $n + length($r);
369             }
370             }
371 3 50       9 $val = @vals > 1 ? \@vals : $vals[0];
372             }
373 31         92 return $val;
374             }
375              
376             #------------------------------------------------------------------------------
377             # Unescape PostScript string
378             # Inputs: 0) string
379             # Returns: unescaped string
380             sub UnescapePostScript($)
381             {
382 10     10 0 25 my $str = shift;
383             # decode escape sequences in literal strings
384 10         43 while ($str =~ /\\(.)/sg) {
385 12         25 my $n = pos($str) - 2;
386 12         23 my $c = $1;
387 12         19 my $r;
388 12 100       37 if ($c =~ /[0-7]/) {
    50          
    50          
389             # get up to 2 more octal digits
390 11 50       51 $c .= $1 if $str =~ /\G([0-7]{1,2})/g;
391             # convert octal escape code
392 11         28 $r = chr(oct($c) & 0xff);
393             } elsif ($c eq "\x0d") {
394             # the string is continued if the line ends with '\'
395             # (also remove "\x0d\x0a")
396 0 0       0 $c .= $1 if $str =~ /\G(\x0a)/g;
397 0         0 $r = '';
398             } elsif ($c eq "\x0a") {
399 0         0 $r = '';
400             } else {
401             # convert escaped characters
402 1         4 ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
403             }
404 12         29 substr($str, $n, length($c)+1) = $r;
405             # continue search after this character
406 12         57 pos($str) = $n + length($r);
407             }
408 10         27 return $str;
409             }
410              
411             #------------------------------------------------------------------------------
412             # Extract information from EPS, PS or AI file
413             # Inputs: 0) ExifTool object reference, 1) dirInfo reference, 2) optional tag table ref
414             # Returns: 1 if this was a valid PostScript file
415             sub ProcessPS($$;$)
416             {
417 4     4 0 13 my ($et, $dirInfo, $tagTablePtr) = @_;
418 4         12 my $raf = $$dirInfo{RAF};
419 4         20 my $embedded = $et->Options('ExtractEmbedded');
420 4         10 my ($data, $dos, $endDoc, $fontTable, $comment);
421              
422             # allow read from data
423 4 50       18 unless ($raf) {
424 0         0 $raf = File::RandomAccess->new($$dirInfo{DataPt});
425 0         0 $et->VerboseDir('PostScript');
426             }
427             #
428             # determine if this is a postscript file
429             #
430 4 50       35 $raf->Read($data, 4) == 4 or return 0;
431             # accept either ASCII or DOS binary postscript file format
432 4 50       31 return 0 unless $data =~ /^(%!PS|%!Ad|%!Fo|\xc5\xd0\xd3\xc6)/;
433 4 50       29 if ($data =~ /^%!Ad/) {
    50          
434             # I've seen PS files start with "%!Adobe-PS"...
435 0 0 0     0 return 0 unless $raf->Read($data, 6) == 6 and $data eq "obe-PS";
436             } elsif ($data =~ /^\xc5\xd0\xd3\xc6/) {
437             # process DOS binary file header
438             # - save DOS header then seek ahead and check PS header
439 0 0       0 $raf->Read($dos, 26) == 26 or return 0;
440 0         0 SetByteOrder('II');
441 0         0 my $psStart = Get32u(\$dos, 0);
442 0 0 0     0 unless ($raf->Seek($psStart, 0) and
      0        
443             $raf->Read($data, 4) == 4 and $data eq '%!PS')
444             {
445 0         0 return PSErr($et, 'invalid header');
446             }
447 0         0 $$raf{PSEnd} = $psStart + Get32u(\$dos, 4); # set end of PostScript data in RAF
448             } else {
449             # check for PostScript font file (PFA or PFB)
450 4         9 my $d2;
451 4 50       16 $data .= $d2 if $raf->Read($d2,12);
452 4 100       41 if ($data =~ /^%!(PS-(AdobeFont-|Bitstream )|FontType1-)/) {
453 2         19 $et->SetFileType('PFA'); # PostScript ASCII font file
454 2         10 $fontTable = GetTagTable('Image::ExifTool::Font::PSInfo');
455             # PostScript font files may contain an unformatted comments which may
456             # contain useful information, so accumulate these for the Comment tag
457 2         6 $comment = 1;
458             }
459 4         20 $raf->Seek(-length($data), 1);
460             }
461             #
462             # set the newline type based on the first newline found in the file
463             #
464 4         25 local $/ = GetInputRecordSeparator($raf);
465 4 50       18 $/ or return PSErr($et, 'invalid PS data');
466              
467             # set file type (PostScript or EPS)
468 4 50       33 $raf->ReadLine($data) or $data = '';
469 4         13 my $type;
470 4 100       20 if ($data =~ /EPSF/) {
471 2         6 $type = 'EPS';
472             } else {
473             # read next line to see if this is an Illustrator file
474 2         5 my $line2;
475 2         9 my $pos = $raf->Tell();
476 2 50 33     7 if ($raf->ReadLine($line2) and $line2 =~ /^%%Creator: Adobe Illustrator/) {
477 0         0 $type = 'AI';
478             } else {
479 2         5 $type = 'PS';
480             }
481 2         10 $raf->Seek($pos, 0);
482             }
483 4         37 $et->SetFileType($type);
484 4 50 33     44 return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
485             #
486             # extract TIFF information from DOS header
487             #
488 4 50       22 $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::PostScript::Main');
489 4 50       16 if ($dos) {
490 0         0 my $base = Get32u(\$dos, 16);
491 0 0       0 if ($base) {
492 0         0 my $pos = $raf->Tell();
493             # extract the TIFF preview
494 0         0 my $len = Get32u(\$dos, 20);
495 0         0 my $val = $et->ExtractBinary($base, $len, 'TIFFPreview');
496 0 0 0     0 if (defined $val and $val =~ /^(MM\0\x2a|II\x2a\0|Binary)/) {
497 0         0 $et->HandleTag($tagTablePtr, 'TIFFPreview', $val);
498             } else {
499 0         0 $et->Warn('Bad TIFF preview image');
500             }
501             # extract information from TIFF in DOS header
502             # (set Parent to '' to avoid setting FileType tag again)
503 0         0 my %dirInfo = (
504             Parent => '',
505             RAF => $raf,
506             Base => $base,
507             );
508 0 0       0 $et->ProcessTIFF(\%dirInfo) or $et->Warn('Bad embedded TIFF');
509             # position file pointer to extract PS information
510 0         0 $raf->Seek($pos, 0);
511             }
512             }
513             #
514             # parse the postscript
515             #
516 4         19 my ($buff, $mode, $beginToken, $endToken, $docNum, $subDocNum, $changedNL);
517 4         0 my (@lines, $altnl);
518 4 100       19 if ($/ eq "\x0d") {
519 2         5 $altnl = "\x0a";
520             } else {
521 2         33 $/ = "\x0a"; # end on any LF (even if DOS CR+LF)
522 2         6 $altnl = "\x0d";
523             }
524 4         8 for (;;) {
525 725 100       1153 if (@lines) {
526 243         337 $data = shift @lines;
527             } else {
528 482 100       1169 $raf->ReadLine($data) or last;
529             # check for alternate newlines as efficiently as possible
530 478 100       1605 if ($data =~ /$altnl/) {
531 2 50 33     9 if (length($data) > 500000 and Image::ExifTool::IsPC()) {
532             # Windows can't split very long lines due to poor memory handling,
533             # so re-read the file with the other newline character instead
534             # (slower but uses less memory)
535 0 0       0 unless ($changedNL) {
536 0         0 $changedNL = 1;
537 0         0 my $t = $/;
538 0         0 $/ = $altnl;
539 0         0 $altnl = $t;
540 0         0 $raf->Seek(-length($data), 1);
541 0         0 next;
542             }
543             } else {
544             # split into separate lines
545 2         104 @lines = split /$altnl/, $data, -1;
546 2         7 $data = shift @lines;
547 2 50 33     10 if (@lines == 1 and $lines[0] eq $/) {
548             # handle case of DOS newline data inside file using Unix newlines
549 0         0 $data .= $lines[0];
550 0         0 undef @lines;
551             }
552             }
553             }
554             }
555 721         1015 undef $changedNL;
556 721 100 100     3272 if ($mode) {
    100 66        
    100 66        
    50          
    100          
    50          
    100          
557 306 50       1023 if (not $endToken) {
    100          
    100          
558 0         0 $buff .= $data;
559 0 0       0 next unless $data =~ m{<\?xpacket end=.(w|r).\?>(\n|\r|$)};
560             } elsif ($data !~ /^$endToken/i) {
561 298 100       560 if ($mode eq 'XMP') {
    100          
562 216         292 $buff .= $data;
563             } elsif ($mode eq 'Document') {
564             # ignore embedded documents, but keep track of nesting level
565 29 100       118 $docNum .= '-1' if $data =~ /^$beginToken/;
566             } else {
567             # data is ASCII-hex encoded
568 53         70 $data =~ tr/0-9A-Fa-f//dc; # remove all but hex characters
569 53         108 $buff .= pack('H*', $data); # translate from hex
570             }
571 298         387 next;
572             } elsif ($mode eq 'Document') {
573 4         27 $docNum =~ s/-?\d+$//; # decrement document nesting level
574             # done with Document mode if we are back at the top level
575 4 100       27 undef $mode unless $docNum;
576 4         9 next;
577             }
578             } elsif ($endDoc and $data =~ /^$endDoc/i) {
579 4         13 $docNum =~ s/-?(\d+)$//; # decrement nesting level
580 4         10 $subDocNum = $1; # remember our last sub-document number
581 4         7 $$et{DOC_NUM} = $docNum;
582 4 100       9 undef $endDoc unless $docNum; # done with document if top level
583 4         4 next;
584             } elsif ($data =~ /^(%{1,2})(Begin)(_xml_packet|Photoshop|ICCProfile|Document|Binary)/i) {
585             # the beginning of a data block
586 13         96 my %modeLookup = (
587             _xml_packet => 'XMP',
588             photoshop => 'Photoshop',
589             iccprofile => 'ICC_Profile',
590             document => 'Document',
591             binary => undef, # (we will try to skip this)
592             );
593 13         55 $mode = $modeLookup{lc $3};
594 13 100       31 unless ($mode) {
595 2 50 33     24 if (not @lines and $data =~ /^%{1,2}BeginBinary:\s*(\d+)/i) {
596 2 50       30 $raf->Seek($1, 1) or last; # skip binary data
597             }
598 2         9 next;
599             }
600 11         18 $buff = '';
601 11         78 $beginToken = $1 . $2 . $3;
602 11 100       45 $endToken = $1 . ($2 eq 'begin' ? 'end' : 'End') . $3;
603 11 100       29 if ($mode eq 'Document') {
604             # this is either the 1st sub-document or Nth document
605 7 100       15 if ($docNum) {
606             # increase nesting level
607 1         3 $docNum .= '-' . (++$subDocNum);
608             } else {
609             # this is the Nth document
610 6         16 $docNum = $$et{DOC_COUNT} + 1;
611             }
612 7         39 $subDocNum = 0; # new level, so reset subDocNum
613 7 100       26 next unless $embedded; # skip over this document
614             # set document number for family 4-7 group names
615 4         8 $$et{DOC_NUM} = $docNum;
616 4         9 $$et{LIST_TAGS} = { }; # don't build lists across different documents
617 4         7 $$et{PROCESSED} = { }; # re-initialize processed directory lookup too
618 4         6 $endDoc = $endToken; # parse to EndDocument token
619             # reset mode to allow parsing into sub-directories
620 4         5 undef $endToken;
621 4         5 undef $mode;
622             # save document name if available
623 4 50       65 if ($data =~ /^$beginToken:\s+([^\n\r]+)/i) {
624 4         9 my $docName = $1;
625             # remove brackets if necessary
626 4 50       15 $docName = $1 if $docName =~ /^\((.*)\)$/;
627 4         16 $et->HandleTag($tagTablePtr, 'EmbeddedFileName', $docName);
628             }
629             }
630 8         28 next;
631             } elsif ($data =~ /^<\?xpacket begin=.{7,13}W5M0MpCehiHzreSzNTczkc9d/) {
632             # pick up any stray XMP data
633 0         0 $mode = 'XMP';
634 0         0 $buff = $data;
635 0         0 undef $endToken; # no end token (just look for xpacket end)
636             # XMP could be contained in a single line (if newlines are different)
637 0 0       0 next unless $data =~ m{<\?xpacket end=.(w|r).\?>(\n|\r|$)};
638             } elsif ($data =~ /^%%?(\w+): ?(.*)/s and $$tagTablePtr{$1}) {
639 29         125 my ($tag, $val) = ($1, $2);
640             # only allow 'ImageData' and AI tags to have single leading '%'
641 29 50 66     122 next unless $data =~ /^%(%|AI\d+_)/ or $tag eq 'ImageData';
642             # decode comment string (reading continuation lines if necessary)
643 29         86 $val = DecodeComment($val, $raf, \@lines);
644 29         141 $et->HandleTag($tagTablePtr, $tag, $val);
645 29         53 next;
646             } elsif ($embedded and $data =~ /^%AI12_CompressedData/) {
647             # the rest of the file is compressed
648 0 0       0 unless (eval { require Compress::Zlib }) {
  0         0  
649 0         0 $et->Warn('Install Compress::Zlib to extract compressed embedded data');
650 0         0 last;
651             }
652             # seek back to find the start of the compressed data in the file
653 0         0 my $tlen = length($data) + @lines;
654 0         0 $tlen += length $_ foreach @lines;
655 0         0 my $backTo = $raf->Tell() - $tlen - 64;
656 0 0       0 $backTo = 0 if $backTo < 0;
657 0 0 0     0 last unless $raf->Seek($backTo, 0) and $raf->Read($data, 2048);
658 0 0       0 last unless $data =~ s/.*?%AI12_CompressedData//;
659 0         0 my $inflate = Compress::Zlib::inflateInit();
660 0 0       0 $inflate or $et->Warn('Error initializing inflate'), last;
661             # generate a PS-like file in memory from the compressed data
662 0         0 my $verbose = $et->Options('Verbose');
663 0 0       0 if ($verbose > 1) {
664 0         0 $et->VerboseDir('AI12_CompressedData (first 4kB)');
665 0         0 $et->VerboseDump(\$data);
666             }
667             # remove header if it exists (Windows AI files only)
668 0         0 $data =~ s/^.{0,256}EndData[\x0d\x0a]+//s;
669 0         0 my $val;
670 0         0 for (;;) {
671 0         0 my ($v2, $stat) = $inflate->inflate($data);
672 0 0       0 $stat == Compress::Zlib::Z_STREAM_END() and $val .= $v2, last;
673 0 0       0 $stat != Compress::Zlib::Z_OK() and undef($val), last;
674 0 0       0 if (defined $val) {
    0          
675 0         0 $val .= $v2;
676             } elsif ($v2 =~ /^%!PS/) {
677 0         0 $val = $v2;
678             } else {
679             # add postscript header (for file recognition) if it doesn't exist
680 0         0 $val = "%!PS-Adobe-3.0$/" . $v2;
681             }
682 0 0       0 $raf->Read($data, 65536) or last;
683             }
684 0 0       0 defined $val or $et->Warn('Error inflating AI compressed data'), last;
685 0 0       0 if ($verbose > 1) {
686 0         0 $et->VerboseDir('Uncompressed AI12 Data');
687 0         0 $et->VerboseDump(\$val);
688             }
689             # extract information from embedded images in the uncompressed data
690             $val = # add PS header in case it needs one
691 0         0 ProcessPS($et, { DataPt => \$val });
692 0         0 last;
693             } elsif ($fontTable) {
694 58 100       159 if (defined $comment) {
695             # extract initial comments from PostScript Font files
696 10 100       81 if ($data =~ /^%\s+(.*?)[\x0d\x0a]/) {
    100          
697 4 50       16 $comment .= "\n" if $comment;
698 4         16 $comment .= $1;
699 4         10 next;
700             } elsif ($data !~ /^%/) {
701             # stop extracting comments at the first non-comment line
702 2 50       16 $et->FoundTag('Comment', $comment) if length $comment;
703 2         4 undef $comment;
704             }
705             }
706 54 100 100     487 if ($data =~ m{^\s*/(\w+)\s*(.*)} and $$fontTable{$1}) {
    100          
707 24         135 my ($tag, $val) = ($1, $2);
708 24 100       122 if ($val =~ /^\((.*)\)/) {
    50          
709 10         31 $val = UnescapePostScript($1);
710             } elsif ($val =~ m{/?(\S+)}) {
711 14         33 $val = $1;
712             }
713 24         101 $et->HandleTag($fontTable, $tag, $val);
714             } elsif ($data =~ /^currentdict end/) {
715             # only extract tags from initial FontInfo dict
716 2         5 undef $fontTable;
717             }
718 54         111 next;
719             } else {
720 311         465 next;
721             }
722             # extract information from buffered data
723 4         32 my %dirInfo = (
724             DataPt => \$buff,
725             DataLen => length $buff,
726             DirStart => 0,
727             DirLen => length $buff,
728             Parent => 'PostScript',
729             );
730 4         21 my $subTablePtr = GetTagTable("Image::ExifTool::${mode}::Main");
731 4 50       25 unless ($et->ProcessDirectory(\%dirInfo, $subTablePtr)) {
732 0         0 $et->Warn("Error processing $mode information in PostScript file");
733             }
734 4         12 undef $buff;
735 4         20 undef $mode;
736             }
737 4 50 33     23 $mode = 'Document' if $endDoc and not $mode;
738 4 50       16 $mode and PSErr($et, "unterminated $mode data");
739 4         44 return 1;
740             }
741              
742             #------------------------------------------------------------------------------
743             # Extract information from EPS file
744             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
745             # Returns: 1 if this was a valid PostScript file
746             sub ProcessEPS($$)
747             {
748 2     2 0 10 return ProcessPS($_[0],$_[1]);
749             }
750              
751             1; # end
752              
753              
754             __END__