File Coverage

blib/lib/Image/ExifTool/PostScript.pm

Criterion	Covered	Total	%
statement	197	318	61.9
branch	126	242	52.0
condition	21	66	31.8
subroutine	12	14	85.7
pod	0	10	0.0
total	356	650	54.7

line	stmt	bran	cond	sub	pod	time	code
1							#------------------------------------------------------------------------------
2							# File: PostScript.pm
3							#
4							# Description: Read PostScript meta information
5							#
6							# Revisions: 07/08/2005 - P. Harvey Created
7							#
8							# References: 1) http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
9							# 2) http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf
10							# 3) http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf
11							#------------------------------------------------------------------------------
12
13							package Image::ExifTool::PostScript;
14
15	29			29		7054	use strict;
	29					77
	29					1655
16	29			29		323	use vars qw($VERSION $AUTOLOAD);
	29					61
	29					2092
17	29			29		237	use Image::ExifTool qw(:DataAccess :Utils);
	29					66
	29					182239
18
19							$VERSION = '1.46';
20
21							sub WritePS($$);
22							sub ProcessPS($$;$);
23
24							# PostScript tag table
25							%Image::ExifTool::PostScript::Main = (
26							PROCESS_PROC => \&ProcessPS,
27							WRITE_PROC => \&WritePS,
28							PREFERRED => 1, # always add these tags when writing
29							GROUPS => { 2 => 'Image' },
30							# Note: Make all of these tags priority 0 since the first one found at
31							# the start of the file should take priority (in case multiples exist)
32							Author => { Priority => 0, Groups => { 2 => 'Author' }, Writable => 'string' },
33							BoundingBox => { Priority => 0 },
34							Copyright => { Priority => 0, Writable => 'string' }, #2
35							CreationDate => {
36							Name => 'CreateDate',
37							Priority => 0,
38							Groups => { 2 => 'Time' },
39							Writable => 'string',
40							PrintConv => '$self->ConvertDateTime($val)',
41							PrintConvInv => '$self->InverseDateTime($val)',
42							},
43							Creator => { Priority => 0, Writable => 'string' },
44							ImageData => { Priority => 0 },
45							For => { Priority => 0, Writable => 'string', Notes => 'for whom the document was prepared'},
46							Keywords => { Priority => 0, Writable => 'string' },
47							ModDate => {
48							Name => 'ModifyDate',
49							Priority => 0,
50							Groups => { 2 => 'Time' },
51							Writable => 'string',
52							PrintConv => '$self->ConvertDateTime($val)',
53							PrintConvInv => '$self->InverseDateTime($val)',
54							},
55							Pages => { Priority => 0 },
56							Routing => { Priority => 0, Writable => 'string' }, #2
57							Subject => { Priority => 0, Writable => 'string' },
58							Title => { Priority => 0, Writable => 'string' },
59							Version => { Priority => 0, Writable => 'string' }, #2
60							# these subdirectories for documentation only
61							BeginPhotoshop => {
62							Name => 'PhotoshopData',
63							SubDirectory => {
64							TagTable => 'Image::ExifTool::Photoshop::Main',
65							},
66							},
67							BeginICCProfile => {
68							Name => 'ICC_Profile',
69							SubDirectory => {
70							TagTable => 'Image::ExifTool::ICC_Profile::Main',
71							},
72							},
73							begin_xml_packet => {
74							Name => 'XMP',
75							SubDirectory => {
76							TagTable => 'Image::ExifTool::XMP::Main',
77							},
78							},
79							TIFFPreview => {
80							Groups => { 2 => 'Preview' },
81							Binary => 1,
82							Notes => q{
83							not a real tag ID, but used to represent the TIFF preview extracted from DOS
84							EPS images
85							},
86							},
87							BeginDocument => {
88							Name => 'EmbeddedFile',
89							SubDirectory => {
90							TagTable => 'Image::ExifTool::PostScript::Main',
91							},
92							Notes => 'extracted with L option',
93							},
94							EmbeddedFileName => {
95							Notes => q{
96							not a real tag ID, but the file name from a BeginDocument statement.
97							Extracted with document metadata when L option is used
98							},
99							},
100							# AI metadata (most with a single leading '%')
101							AI9_ColorModel => {
102							Name => 'AIColorModel',
103							PrintConv => {
104							1 => 'RGB',
105							2 => 'CMYK',
106							},
107							},
108							AI3_ColorUsage => { Name => 'AIColorUsage' },
109							AI5_RulerUnits => {
110							Name => 'AIRulerUnits',
111							PrintConv => {
112							0 => 'Inches',
113							1 => 'Millimeters',
114							2 => 'Points',
115							3 => 'Picas',
116							4 => 'Centimeters',
117							6 => 'Pixels',
118							},
119							},
120							AI5_TargetResolution => { Name => 'AITargetResolution' },
121							AI5_NumLayers => { Name => 'AINumLayers' },
122							AI5_FileFormat => { Name => 'AIFileFormat' },
123							AI8_CreatorVersion => { Name => 'AICreatorVersion' }, # (double leading '%')
124							AI12_BuildNumber => { Name => 'AIBuildNumber' },
125							);
126
127							# composite tags
128							%Image::ExifTool::PostScript::Composite = (
129							GROUPS => { 2 => 'Image' },
130							# BoundingBox is in points, not pixels,
131							# but use it anyway if ImageData is not available
132							ImageWidth => {
133							Desire => {
134							0 => 'Main:PostScript:ImageData',
135							1 => 'PostScript:BoundingBox',
136							},
137							ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 0)',
138							},
139							ImageHeight => {
140							Desire => {
141							0 => 'Main:PostScript:ImageData',
142							1 => 'PostScript:BoundingBox',
143							},
144							ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 1)',
145							},
146							);
147
148							# add our composite tags
149							Image::ExifTool::AddCompositeTags('Image::ExifTool::PostScript');
150
151							#------------------------------------------------------------------------------
152							# AutoLoad our writer routines when necessary
153							#
154							sub AUTOLOAD
155							{
156	19			19		164	return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_);
157							}
158
159							#------------------------------------------------------------------------------
160							# Get image width or height
161							# Inputs: 0) value list ref (ImageData, BoundingBox), 1) true to get height
162							sub ImageSize($$)
163							{
164	5			5	0	13	my ($vals, $getHeight) = @_;
165	5					9	my ($w, $h);
166	5	50	33			47	if ($$vals[0] and $$vals[0] =~ /^(\d+) (\d+)/) {
		0	0
167	5					44	($w, $h) = ($1, $2);
168							} elsif ($$vals[1] and $$vals[1] =~ /^(\d+) (\d+) (\d+) (\d+)/) {
169	0					0	($w, $h) = ($3 - $1, $4 - $2);
170							}
171	5	100				78	return $getHeight ? $h : $w;
172							}
173
174							#------------------------------------------------------------------------------
175							# Set PostScript format error warning
176							# Inputs: 0) ExifTool object reference, 1) error string
177							# Returns: 1
178							sub PSErr($$)
179							{
180	0			0	0	0	my ($et, $str) = @_;
181							# set file type if not done already
182	0					0	my $ext = $$et{FILE_EXT};
183	0	0	0			0	$et->SetFileType(($ext and $ext eq 'AI') ? 'AI' : 'PS');
184	0					0	$et->Warn("PostScript format error ($str)");
185	0					0	return 1;
186							}
187
188							#------------------------------------------------------------------------------
189							# Return input record separator to use for the specified file
190							# Inputs: 0) RAF reference
191							# Returns: Input record separator or undef on error
192							sub GetInputRecordSeparator($)
193							{
194	8			8	0	17	my $raf = shift;
195	8					30	my $pos = $raf->Tell(); # save current position
196	8					18	my ($data, $sep);
197	8	50				31	$raf->Read($data,256) or return undef;
198	8					24	my ($a, $d) = (999,999);
199	8	100				56	$a = pos($data), pos($data) = 0 if $data =~ /\x0a/g;
200	8	100				42	$d = pos($data) if $data =~ /\x0d/g;
201	8					30	my $diff = $a - $d;
202	8	50				69	if ($diff == 1) {
		50
		100
		100
203	0					0	$sep = "\x0d\x0a";
204							} elsif ($diff == -1) {
205	0					0	$sep = "\x0a\x0d";
206							} elsif ($diff > 0) {
207	3					9	$sep = "\x0d";
208							} elsif ($diff < 0) {
209	4					12	$sep = "\x0a";
210							} # else error
211	8					47	$raf->Seek($pos, 0); # restore original position
212	8					74	return $sep;
213							}
214
215							#------------------------------------------------------------------------------
216							# Split into lines ending in any CR, LF or CR+LF combination
217							# (this is annoying, and could be avoided if EPS files didn't mix linefeeds!)
218							# Inputs: 0) data pointer, 1) reference to lines array
219							# Notes: Fills @$lines with lines from splitting $$dataPt
220							sub SplitLine($$)
221							{
222	1			1	0	2	my ($dataPt, $lines) = @_;
223	1					3	for (;;) {
224	108					97	my $endl;
225							# find the position of the first LF (\x0a)
226	108	100				171	$endl = pos($$dataPt), pos($$dataPt) = 0 if $$dataPt =~ /\x0a/g;
227	108	50				172	if ($$dataPt =~ /\x0d/g) { # find the first CR (\x0d)
		0
228	108	100				111	if (defined $endl) {
229							# (remember, CR+LF is a DOS newline...)
230	107	50				126	$endl = pos($$dataPt) if pos($$dataPt) < $endl - 1;
231							} else {
232	1					2	$endl = pos($$dataPt);
233							}
234							} elsif (not defined $endl) {
235	0					0	push @$lines, $$dataPt;
236	0					0	last;
237							}
238	108	100				124	if (length $$dataPt == $endl) {
239	1					3	push @$lines, $$dataPt;
240	1					3	last;
241							} else {
242							# continue to split into separate lines
243	107					141	push @$lines, substr($$dataPt, 0, $endl);
244	107					167	$$dataPt = substr($$dataPt, $endl);
245							}
246							}
247							}
248
249							#------------------------------------------------------------------------------
250							# check to be sure we haven't read past end of PS data in DOS-style file
251							# Inputs: 0) RAF ref (with PSEnd member), 1) data ref
252							# - modifies data and sets RAF to EOF if end of PS is reached
253							sub CheckPSEnd($$)
254							{
255	0			0	0	0	my ($raf, $dataPt) = @_;
256	0					0	my $pos = $raf->Tell();
257	0	0				0	if ($pos >= $$raf{PSEnd}) {
258	0					0	$raf->Seek(0, 2); # seek to end of file so we can't read any more
259	0	0				0	$$dataPt = substr($$dataPt, 0, length($$dataPt) - $pos + $$raf{PSEnd}) if $pos > $$raf{PSEnd};
260							}
261							}
262
263							#------------------------------------------------------------------------------
264							# Read next line from EPS file
265							# Inputs: 0) RAF ref (with PSEnd member if Postscript ends before end of file)
266							# 1) array of lines from file
267							# Returns: true on success
268							sub GetNextLine($$)
269							{
270	114			114	0	246	my ($raf, $lines) = @_;
271	114					226	my ($data, $changedNL);
272	114	100				350	my $altnl = ($/ eq "\x0d") ? "\x0a" : "\x0d";
273	114					169	for (;;) {
274	114	50				390	$raf->ReadLine($data) or last;
275	114	50				265	$$raf{PSEnd} and CheckPSEnd($raf, \$data);
276							# split line if it contains other newline sequences
277	114	100				505	if ($data =~ /$altnl/) {
278	1	50	33			7	if (length($data) > 500000 and Image::ExifTool::IsPC()) {
279							# patch for Windows memory problem
280	0	0				0	unless ($changedNL) {
281	0					0	$changedNL = $/;
282	0					0	$/ = $altnl;
283	0					0	$altnl = $changedNL;
284	0					0	$raf->Seek(-length($data), 1);
285	0					0	next;
286							}
287							} else {
288							# split into separate lines
289							# push @$lines, split /$altnl/, $data, -1;
290							# if (@$lines == 2 and $$lines[1] eq $/) {
291							# # handle case of DOS newline data inside file using Unix newlines
292							# $$lines[0] .= pop @$lines;
293							# }
294							# split into separate lines if necessary
295	1					5	SplitLine(\$data, $lines);
296							}
297							} else {
298	113					378	push @$lines, $data;
299							}
300	114	50				332	$/ = $changedNL if $changedNL;
301	114					447	return 1;
302							}
303	0					0	return 0;
304							}
305
306							#------------------------------------------------------------------------------
307							# Decode comment from PostScript file
308							# Inputs: 0) comment string, 1) RAF ref, 2) reference to lines array
309							# 3) optional data reference for extra lines read from file
310							# Returns: Decoded comment string (may be an array reference)
311							# - handles multi-line comments and escape sequences
312							sub DecodeComment($$$;$)
313							{
314	31			31	0	73	my ($val, $raf, $lines, $dataPt) = @_;
315	31					286	$val =~ s/\x0d\x0a$//; # remove trailing CR, LF or CR/LF
316							# check for continuation comments
317	31					52	for (;;) {
318	31	50	33			111	@$lines or GetNextLine($raf, $lines) or last;
319	31	50				90	last unless $$lines[0] =~ /^%%\+/; # is the next line a continuation?
320	0	0				0	$$dataPt .= $$lines[0] if $dataPt; # add to data if necessary
321	0					0	$$lines[0] =~ s/\x0d\x0a$//; # remove trailing CR, LF or CR/LF
322	0					0	$val .= substr(shift(@$lines), 3); # add to value (without leading "%%+")
323							}
324	31					46	my @vals;
325							# handle bracketed string values
326	31	100				108	if ($val =~ s/^$(.*)$$/$1/) { # remove brackets if necessary
327							# split into an array of strings if necessary
328	3					5	my $nesting = 1;
329	3					42	while ($val =~ /($\|$)/g) {
330	0					0	my $bra = $1;
331	0					0	my $pos = pos($val) - 2;
332	0					0	my $backslashes = 0;
333	0		0			0	while ($pos and substr($val, $pos, 1) eq '\\') {
334	0					0	--$pos;
335	0					0	++$backslashes;
336							}
337	0	0				0	next if $backslashes & 0x01; # escaped if odd number
338	0	0				0	if ($bra eq '(') {
339	0					0	++$nesting;
340							} else {
341	0					0	--$nesting;
342	0	0				0	unless ($nesting) {
343	0					0	push @vals, substr($val, 0, pos($val)-1);
344	0					0	$val = substr($val, pos($val));
345	0	0				0	++$nesting if $val =~ s/\s*\(//;
346							}
347							}
348							}
349	3					8	push @vals, $val;
350	3					7	foreach $val (@vals) {
351							# decode escape sequences in bracketed strings
352							# (similar to code in PDF.pm, but without line continuation)
353	3					11	while ($val =~ /\\(.)/sg) {
354	0					0	my $n = pos($val) - 2;
355	0					0	my $c = $1;
356	0					0	my $r;
357	0	0				0	if ($c =~ /[0-7]/) {
358							# get up to 2 more octal digits
359	0	0				0	$c .= $1 if $val =~ /\G([0-7]{1,2})/g;
360							# convert octal escape code
361	0					0	$r = chr(oct($c) & 0xff);
362							} else {
363							# convert escaped characters
364	0					0	($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
365							}
366	0					0	substr($val, $n, length($c)+1) = $r;
367							# continue search after this character
368	0					0	pos($val) = $n + length($r);
369							}
370							}
371	3	50				9	$val = @vals > 1 ? \@vals : $vals[0];
372							}
373	31					92	return $val;
374							}
375
376							#------------------------------------------------------------------------------
377							# Unescape PostScript string
378							# Inputs: 0) string
379							# Returns: unescaped string
380							sub UnescapePostScript($)
381							{
382	10			10	0	25	my $str = shift;
383							# decode escape sequences in literal strings
384	10					43	while ($str =~ /\\(.)/sg) {
385	12					25	my $n = pos($str) - 2;
386	12					23	my $c = $1;
387	12					19	my $r;
388	12	100				37	if ($c =~ /[0-7]/) {
		50
		50
389							# get up to 2 more octal digits
390	11	50				51	$c .= $1 if $str =~ /\G([0-7]{1,2})/g;
391							# convert octal escape code
392	11					28	$r = chr(oct($c) & 0xff);
393							} elsif ($c eq "\x0d") {
394							# the string is continued if the line ends with '\'
395							# (also remove "\x0d\x0a")
396	0	0				0	$c .= $1 if $str =~ /\G(\x0a)/g;
397	0					0	$r = '';
398							} elsif ($c eq "\x0a") {
399	0					0	$r = '';
400							} else {
401							# convert escaped characters
402	1					4	($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
403							}
404	12					29	substr($str, $n, length($c)+1) = $r;
405							# continue search after this character
406	12					57	pos($str) = $n + length($r);
407							}
408	10					27	return $str;
409							}
410
411							#------------------------------------------------------------------------------
412							# Extract information from EPS, PS or AI file
413							# Inputs: 0) ExifTool object reference, 1) dirInfo reference, 2) optional tag table ref
414							# Returns: 1 if this was a valid PostScript file
415							sub ProcessPS($$;$)
416							{
417	4			4	0	13	my ($et, $dirInfo, $tagTablePtr) = @_;
418	4					12	my $raf = $$dirInfo{RAF};
419	4					20	my $embedded = $et->Options('ExtractEmbedded');
420	4					10	my ($data, $dos, $endDoc, $fontTable, $comment);
421
422							# allow read from data
423	4	50				18	unless ($raf) {
424	0					0	$raf = File::RandomAccess->new($$dirInfo{DataPt});
425	0					0	$et->VerboseDir('PostScript');
426							}
427							#
428							# determine if this is a postscript file
429							#
430	4	50				35	$raf->Read($data, 4) == 4 or return 0;
431							# accept either ASCII or DOS binary postscript file format
432	4	50				31	return 0 unless $data =~ /^(%!PS\|%!Ad\|%!Fo\|\xc5\xd0\xd3\xc6)/;
433	4	50				29	if ($data =~ /^%!Ad/) {
		50
434							# I've seen PS files start with "%!Adobe-PS"...
435	0	0	0			0	return 0 unless $raf->Read($data, 6) == 6 and $data eq "obe-PS";
436							} elsif ($data =~ /^\xc5\xd0\xd3\xc6/) {
437							# process DOS binary file header
438							# - save DOS header then seek ahead and check PS header
439	0	0				0	$raf->Read($dos, 26) == 26 or return 0;
440	0					0	SetByteOrder('II');
441	0					0	my $psStart = Get32u(\$dos, 0);
442	0	0	0			0	unless ($raf->Seek($psStart, 0) and
			0
443							$raf->Read($data, 4) == 4 and $data eq '%!PS')
444							{
445	0					0	return PSErr($et, 'invalid header');
446							}
447	0					0	$$raf{PSEnd} = $psStart + Get32u(\$dos, 4); # set end of PostScript data in RAF
448							} else {
449							# check for PostScript font file (PFA or PFB)
450	4					9	my $d2;
451	4	50				16	$data .= $d2 if $raf->Read($d2,12);
452	4	100				41	if ($data =~ /^%!(PS-(AdobeFont-\|Bitstream )\|FontType1-)/) {
453	2					19	$et->SetFileType('PFA'); # PostScript ASCII font file
454	2					10	$fontTable = GetTagTable('Image::ExifTool::Font::PSInfo');
455							# PostScript font files may contain an unformatted comments which may
456							# contain useful information, so accumulate these for the Comment tag
457	2					6	$comment = 1;
458							}
459	4					20	$raf->Seek(-length($data), 1);
460							}
461							#
462							# set the newline type based on the first newline found in the file
463							#
464	4					25	local $/ = GetInputRecordSeparator($raf);
465	4	50				18	$/ or return PSErr($et, 'invalid PS data');
466
467							# set file type (PostScript or EPS)
468	4	50				33	$raf->ReadLine($data) or $data = '';
469	4					13	my $type;
470	4	100				20	if ($data =~ /EPSF/) {
471	2					6	$type = 'EPS';
472							} else {
473							# read next line to see if this is an Illustrator file
474	2					5	my $line2;
475	2					9	my $pos = $raf->Tell();
476	2	50	33			7	if ($raf->ReadLine($line2) and $line2 =~ /^%%Creator: Adobe Illustrator/) {
477	0					0	$type = 'AI';
478							} else {
479	2					5	$type = 'PS';
480							}
481	2					10	$raf->Seek($pos, 0);
482							}
483	4					37	$et->SetFileType($type);
484	4	50	33			44	return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
485							#
486							# extract TIFF information from DOS header
487							#
488	4	50				22	$tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::PostScript::Main');
489	4	50				16	if ($dos) {
490	0					0	my $base = Get32u(\$dos, 16);
491	0	0				0	if ($base) {
492	0					0	my $pos = $raf->Tell();
493							# extract the TIFF preview
494	0					0	my $len = Get32u(\$dos, 20);
495	0					0	my $val = $et->ExtractBinary($base, $len, 'TIFFPreview');
496	0	0	0			0	if (defined $val and $val =~ /^(MM\0\x2a\|II\x2a\0\|Binary)/) {
497	0					0	$et->HandleTag($tagTablePtr, 'TIFFPreview', $val);
498							} else {
499	0					0	$et->Warn('Bad TIFF preview image');
500							}
501							# extract information from TIFF in DOS header
502							# (set Parent to '' to avoid setting FileType tag again)
503	0					0	my %dirInfo = (
504							Parent => '',
505							RAF => $raf,
506							Base => $base,
507							);
508	0	0				0	$et->ProcessTIFF(\%dirInfo) or $et->Warn('Bad embedded TIFF');
509							# position file pointer to extract PS information
510	0					0	$raf->Seek($pos, 0);
511							}
512							}
513							#
514							# parse the postscript
515							#
516	4					19	my ($buff, $mode, $beginToken, $endToken, $docNum, $subDocNum, $changedNL);
517	4					0	my (@lines, $altnl);
518	4	100				19	if ($/ eq "\x0d") {
519	2					5	$altnl = "\x0a";
520							} else {
521	2					33	$/ = "\x0a"; # end on any LF (even if DOS CR+LF)
522	2					6	$altnl = "\x0d";
523							}
524	4					8	for (;;) {
525	725	100				1153	if (@lines) {
526	243					337	$data = shift @lines;
527							} else {
528	482	100				1169	$raf->ReadLine($data) or last;
529							# check for alternate newlines as efficiently as possible
530	478	100				1605	if ($data =~ /$altnl/) {
531	2	50	33			9	if (length($data) > 500000 and Image::ExifTool::IsPC()) {
532							# Windows can't split very long lines due to poor memory handling,
533							# so re-read the file with the other newline character instead
534							# (slower but uses less memory)
535	0	0				0	unless ($changedNL) {
536	0					0	$changedNL = 1;
537	0					0	my $t = $/;
538	0					0	$/ = $altnl;
539	0					0	$altnl = $t;
540	0					0	$raf->Seek(-length($data), 1);
541	0					0	next;
542							}
543							} else {
544							# split into separate lines
545	2					104	@lines = split /$altnl/, $data, -1;
546	2					7	$data = shift @lines;
547	2	50	33			10	if (@lines == 1 and $lines[0] eq $/) {
548							# handle case of DOS newline data inside file using Unix newlines
549	0					0	$data .= $lines[0];
550	0					0	undef @lines;
551							}
552							}
553							}
554							}
555	721					1015	undef $changedNL;
556	721	100	100			3272	if ($mode) {
		100	66
		100	66
		50
		100
		50
		100
557	306	50				1023	if (not $endToken) {
		100
		100
558	0					0	$buff .= $data;
559	0	0				0	next unless $data =~ m{<\?xpacket end=.(w\|r).\?>(\n\|\r\|$)};
560							} elsif ($data !~ /^$endToken/i) {
561	298	100				560	if ($mode eq 'XMP') {
		100
562	216					292	$buff .= $data;
563							} elsif ($mode eq 'Document') {
564							# ignore embedded documents, but keep track of nesting level
565	29	100				118	$docNum .= '-1' if $data =~ /^$beginToken/;
566							} else {
567							# data is ASCII-hex encoded
568	53					70	$data =~ tr/0-9A-Fa-f//dc; # remove all but hex characters
569	53					108	$buff .= pack('H*', $data); # translate from hex
570							}
571	298					387	next;
572							} elsif ($mode eq 'Document') {
573	4					27	$docNum =~ s/-?\d+$//; # decrement document nesting level
574							# done with Document mode if we are back at the top level
575	4	100				27	undef $mode unless $docNum;
576	4					9	next;
577							}
578							} elsif ($endDoc and $data =~ /^$endDoc/i) {
579	4					13	$docNum =~ s/-?(\d+)$//; # decrement nesting level
580	4					10	$subDocNum = $1; # remember our last sub-document number
581	4					7	$$et{DOC_NUM} = $docNum;
582	4	100				9	undef $endDoc unless $docNum; # done with document if top level
583	4					4	next;
584							} elsif ($data =~ /^(%{1,2})(Begin)(_xml_packet\|Photoshop\|ICCProfile\|Document\|Binary)/i) {
585							# the beginning of a data block
586	13					96	my %modeLookup = (
587							_xml_packet => 'XMP',
588							photoshop => 'Photoshop',
589							iccprofile => 'ICC_Profile',
590							document => 'Document',
591							binary => undef, # (we will try to skip this)
592							);
593	13					55	$mode = $modeLookup{lc $3};
594	13	100				31	unless ($mode) {
595	2	50	33			24	if (not @lines and $data =~ /^%{1,2}BeginBinary:\s*(\d+)/i) {
596	2	50				30	$raf->Seek($1, 1) or last; # skip binary data
597							}
598	2					9	next;
599							}
600	11					18	$buff = '';
601	11					78	$beginToken = $1 . $2 . $3;
602	11	100				45	$endToken = $1 . ($2 eq 'begin' ? 'end' : 'End') . $3;
603	11	100				29	if ($mode eq 'Document') {
604							# this is either the 1st sub-document or Nth document
605	7	100				15	if ($docNum) {
606							# increase nesting level
607	1					3	$docNum .= '-' . (++$subDocNum);
608							} else {
609							# this is the Nth document
610	6					16	$docNum = $$et{DOC_COUNT} + 1;
611							}
612	7					39	$subDocNum = 0; # new level, so reset subDocNum
613	7	100				26	next unless $embedded; # skip over this document
614							# set document number for family 4-7 group names
615	4					8	$$et{DOC_NUM} = $docNum;
616	4					9	$$et{LIST_TAGS} = { }; # don't build lists across different documents
617	4					7	$$et{PROCESSED} = { }; # re-initialize processed directory lookup too
618	4					6	$endDoc = $endToken; # parse to EndDocument token
619							# reset mode to allow parsing into sub-directories
620	4					5	undef $endToken;
621	4					5	undef $mode;
622							# save document name if available
623	4	50				65	if ($data =~ /^$beginToken:\s+([^\n\r]+)/i) {
624	4					9	my $docName = $1;
625							# remove brackets if necessary
626	4	50				15	$docName = $1 if $docName =~ /^$(.*)$$/;
627	4					16	$et->HandleTag($tagTablePtr, 'EmbeddedFileName', $docName);
628							}
629							}
630	8					28	next;
631							} elsif ($data =~ /^<\?xpacket begin=.{7,13}W5M0MpCehiHzreSzNTczkc9d/) {
632							# pick up any stray XMP data
633	0					0	$mode = 'XMP';
634	0					0	$buff = $data;
635	0					0	undef $endToken; # no end token (just look for xpacket end)
636							# XMP could be contained in a single line (if newlines are different)
637	0	0				0	next unless $data =~ m{<\?xpacket end=.(w\|r).\?>(\n\|\r\|$)};
638							} elsif ($data =~ /^%%?(\w+): ?(.*)/s and $$tagTablePtr{$1}) {
639	29					125	my ($tag, $val) = ($1, $2);
640							# only allow 'ImageData' and AI tags to have single leading '%'
641	29	50	66			122	next unless $data =~ /^%(%\|AI\d+_)/ or $tag eq 'ImageData';
642							# decode comment string (reading continuation lines if necessary)
643	29					86	$val = DecodeComment($val, $raf, \@lines);
644	29					141	$et->HandleTag($tagTablePtr, $tag, $val);
645	29					53	next;
646							} elsif ($embedded and $data =~ /^%AI12_CompressedData/) {
647							# the rest of the file is compressed
648	0	0				0	unless (eval { require Compress::Zlib }) {
	0					0
649	0					0	$et->Warn('Install Compress::Zlib to extract compressed embedded data');
650	0					0	last;
651							}
652							# seek back to find the start of the compressed data in the file
653	0					0	my $tlen = length($data) + @lines;
654	0					0	$tlen += length $_ foreach @lines;
655	0					0	my $backTo = $raf->Tell() - $tlen - 64;
656	0	0				0	$backTo = 0 if $backTo < 0;
657	0	0	0			0	last unless $raf->Seek($backTo, 0) and $raf->Read($data, 2048);
658	0	0				0	last unless $data =~ s/.*?%AI12_CompressedData//;
659	0					0	my $inflate = Compress::Zlib::inflateInit();
660	0	0				0	$inflate or $et->Warn('Error initializing inflate'), last;
661							# generate a PS-like file in memory from the compressed data
662	0					0	my $verbose = $et->Options('Verbose');
663	0	0				0	if ($verbose > 1) {
664	0					0	$et->VerboseDir('AI12_CompressedData (first 4kB)');
665	0					0	$et->VerboseDump(\$data);
666							}
667							# remove header if it exists (Windows AI files only)
668	0					0	$data =~ s/^.{0,256}EndData[\x0d\x0a]+//s;
669	0					0	my $val;
670	0					0	for (;;) {
671	0					0	my ($v2, $stat) = $inflate->inflate($data);
672	0	0				0	$stat == Compress::Zlib::Z_STREAM_END() and $val .= $v2, last;
673	0	0				0	$stat != Compress::Zlib::Z_OK() and undef($val), last;
674	0	0				0	if (defined $val) {
		0
675	0					0	$val .= $v2;
676							} elsif ($v2 =~ /^%!PS/) {
677	0					0	$val = $v2;
678							} else {
679							# add postscript header (for file recognition) if it doesn't exist
680	0					0	$val = "%!PS-Adobe-3.0$/" . $v2;
681							}
682	0	0				0	$raf->Read($data, 65536) or last;
683							}
684	0	0				0	defined $val or $et->Warn('Error inflating AI compressed data'), last;
685	0	0				0	if ($verbose > 1) {
686	0					0	$et->VerboseDir('Uncompressed AI12 Data');
687	0					0	$et->VerboseDump(\$val);
688							}
689							# extract information from embedded images in the uncompressed data
690							$val = # add PS header in case it needs one
691	0					0	ProcessPS($et, { DataPt => \$val });
692	0					0	last;
693							} elsif ($fontTable) {
694	58	100				159	if (defined $comment) {
695							# extract initial comments from PostScript Font files
696	10	100				81	if ($data =~ /^%\s+(.*?)[\x0d\x0a]/) {
		100
697	4	50				16	$comment .= "\n" if $comment;
698	4					16	$comment .= $1;
699	4					10	next;
700							} elsif ($data !~ /^%/) {
701							# stop extracting comments at the first non-comment line
702	2	50				16	$et->FoundTag('Comment', $comment) if length $comment;
703	2					4	undef $comment;
704							}
705							}
706	54	100	100			487	if ($data =~ m{^\s/(\w+)\s(.*)} and $$fontTable{$1}) {
		100
707	24					135	my ($tag, $val) = ($1, $2);
708	24	100				122	if ($val =~ /^$(.*)$/) {
		50
709	10					31	$val = UnescapePostScript($1);
710							} elsif ($val =~ m{/?(\S+)}) {
711	14					33	$val = $1;
712							}
713	24					101	$et->HandleTag($fontTable, $tag, $val);
714							} elsif ($data =~ /^currentdict end/) {
715							# only extract tags from initial FontInfo dict
716	2					5	undef $fontTable;
717							}
718	54					111	next;
719							} else {
720	311					465	next;
721							}
722							# extract information from buffered data
723	4					32	my %dirInfo = (
724							DataPt => \$buff,
725							DataLen => length $buff,
726							DirStart => 0,
727							DirLen => length $buff,
728							Parent => 'PostScript',
729							);
730	4					21	my $subTablePtr = GetTagTable("Image::ExifTool::${mode}::Main");
731	4	50				25	unless ($et->ProcessDirectory(\%dirInfo, $subTablePtr)) {
732	0					0	$et->Warn("Error processing $mode information in PostScript file");
733							}
734	4					12	undef $buff;
735	4					20	undef $mode;
736							}
737	4	50	33			23	$mode = 'Document' if $endDoc and not $mode;
738	4	50				16	$mode and PSErr($et, "unterminated $mode data");
739	4					44	return 1;
740							}
741
742							#------------------------------------------------------------------------------
743							# Extract information from EPS file
744							# Inputs: 0) ExifTool object reference, 1) dirInfo reference
745							# Returns: 1 if this was a valid PostScript file
746							sub ProcessEPS($$)
747							{
748	2			2	0	10	return ProcessPS($_[0],$_[1]);
749							}
750
751							1; # end
752
753
754							__END__