File Coverage

blib/lib/Image/ExifTool/PostScript.pm

Criterion	Covered	Total	%
statement	197	319	61.7
branch	126	242	52.0
condition	21	66	31.8
subroutine	12	15	80.0
pod	0	11	0.0
total	356	653	54.5

line	stmt	bran	cond	sub	pod	time	code
1							#------------------------------------------------------------------------------
2							# File: PostScript.pm
3							#
4							# Description: Read PostScript meta information
5							#
6							# Revisions: 07/08/2005 - P. Harvey Created
7							#
8							# References: 1) http://partners.adobe.com/public/developer/en/ps/5002.EPSF_Spec.pdf
9							# 2) http://partners.adobe.com/public/developer/en/ps/5001.DSC_Spec.pdf
10							# 3) http://partners.adobe.com/public/developer/en/illustrator/sdk/AI7FileFormat.pdf
11							#------------------------------------------------------------------------------
12
13							package Image::ExifTool::PostScript;
14
15	29			29		3552	use strict;
	29					65
	29					992
16	29			29		146	use vars qw($VERSION $AUTOLOAD);
	29					57
	29					1282
17	29			29		163	use Image::ExifTool qw(:DataAccess :Utils);
	29					64
	29					113514
18
19							$VERSION = '1.44';
20
21							sub WritePS($$);
22							sub ProcessPS($$;$);
23
24							# PostScript tag table
25							%Image::ExifTool::PostScript::Main = (
26							PROCESS_PROC => \&ProcessPS,
27							WRITE_PROC => \&WritePS,
28							PREFERRED => 1, # always add these tags when writing
29							GROUPS => { 2 => 'Image' },
30							# Note: Make all of these tags priority 0 since the first one found at
31							# the start of the file should take priority (in case multiples exist)
32							Author => { Priority => 0, Groups => { 2 => 'Author' }, Writable => 'string' },
33							BoundingBox => { Priority => 0 },
34							Copyright => { Priority => 0, Writable => 'string' }, #2
35							CreationDate => {
36							Name => 'CreateDate',
37							Priority => 0,
38							Groups => { 2 => 'Time' },
39							Writable => 'string',
40							PrintConv => '$self->ConvertDateTime($val)',
41							PrintConvInv => '$self->InverseDateTime($val)',
42							},
43							Creator => { Priority => 0, Writable => 'string' },
44							ImageData => { Priority => 0 },
45							For => { Priority => 0, Writable => 'string', Notes => 'for whom the document was prepared'},
46							Keywords => { Priority => 0, Writable => 'string' },
47							ModDate => {
48							Name => 'ModifyDate',
49							Priority => 0,
50							Groups => { 2 => 'Time' },
51							Writable => 'string',
52							PrintConv => '$self->ConvertDateTime($val)',
53							PrintConvInv => '$self->InverseDateTime($val)',
54							},
55							Pages => { Priority => 0 },
56							Routing => { Priority => 0, Writable => 'string' }, #2
57							Subject => { Priority => 0, Writable => 'string' },
58							Title => { Priority => 0, Writable => 'string' },
59							Version => { Priority => 0, Writable => 'string' }, #2
60							# these subdirectories for documentation only
61							BeginPhotoshop => {
62							Name => 'PhotoshopData',
63							SubDirectory => {
64							TagTable => 'Image::ExifTool::Photoshop::Main',
65							},
66							},
67							BeginICCProfile => {
68							Name => 'ICC_Profile',
69							SubDirectory => {
70							TagTable => 'Image::ExifTool::ICC_Profile::Main',
71							},
72							},
73							begin_xml_packet => {
74							Name => 'XMP',
75							SubDirectory => {
76							TagTable => 'Image::ExifTool::XMP::Main',
77							},
78							},
79							TIFFPreview => {
80							Groups => { 2 => 'Preview' },
81							Binary => 1,
82							Notes => q{
83							not a real tag ID, but used to represent the TIFF preview extracted from DOS
84							EPS images
85							},
86							},
87							BeginDocument => {
88							Name => 'EmbeddedFile',
89							SubDirectory => {
90							TagTable => 'Image::ExifTool::PostScript::Main',
91							},
92							Notes => 'extracted with L option',
93							},
94							EmbeddedFileName => {
95							Notes => q{
96							not a real tag ID, but the file name from a BeginDocument statement.
97							Extracted with document metadata when L option is used
98							},
99							},
100							# AI metadata (most with a single leading '%')
101							AI9_ColorModel => {
102							Name => 'AIColorModel',
103							PrintConv => {
104							1 => 'RGB',
105							2 => 'CMYK',
106							},
107							},
108							AI3_ColorUsage => { Name => 'AIColorUsage' },
109							AI5_RulerUnits => {
110							Name => 'AIRulerUnits',
111							PrintConv => {
112							0 => 'Inches',
113							1 => 'Millimeters',
114							2 => 'Points',
115							3 => 'Picas',
116							4 => 'Centimeters',
117							6 => 'Pixels',
118							},
119							},
120							AI5_TargetResolution => { Name => 'AITargetResolution' },
121							AI5_NumLayers => { Name => 'AINumLayers' },
122							AI5_FileFormat => { Name => 'AIFileFormat' },
123							AI8_CreatorVersion => { Name => 'AICreatorVersion' }, # (double leading '%')
124							AI12_BuildNumber => { Name => 'AIBuildNumber' },
125							);
126
127							# composite tags
128							%Image::ExifTool::PostScript::Composite = (
129							GROUPS => { 2 => 'Image' },
130							# BoundingBox is in points, not pixels,
131							# but use it anyway if ImageData is not available
132							ImageWidth => {
133							Desire => {
134							0 => 'Main:PostScript:ImageData',
135							1 => 'PostScript:BoundingBox',
136							},
137							ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 0)',
138							},
139							ImageHeight => {
140							Desire => {
141							0 => 'Main:PostScript:ImageData',
142							1 => 'PostScript:BoundingBox',
143							},
144							ValueConv => 'Image::ExifTool::PostScript::ImageSize(\@val, 1)',
145							},
146							);
147
148							# add our composite tags
149							Image::ExifTool::AddCompositeTags('Image::ExifTool::PostScript');
150
151							#------------------------------------------------------------------------------
152							# AutoLoad our writer routines when necessary
153							#
154							sub AUTOLOAD
155							{
156	19			19		151	return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_);
157							}
158
159							#------------------------------------------------------------------------------
160							# Is this a PC system
161							# Returns: true for PC systems
162							my %isPC = (MSWin32 => 1, os2 => 1, dos => 1, NetWare => 1, symbian => 1, cygwin => 1);
163							sub IsPC()
164							{
165	0			0	0	0	return $isPC{$^O};
166							}
167
168							#------------------------------------------------------------------------------
169							# Get image width or height
170							# Inputs: 0) value list ref (ImageData, BoundingBox), 1) true to get height
171							sub ImageSize($$)
172							{
173	4			4	0	11	my ($vals, $getHeight) = @_;
174	4					7	my ($w, $h);
175	4	50	33			84	if ($$vals[0] and $$vals[0] =~ /^(\d+) (\d+)/) {
		0	0
176	4					16	($w, $h) = ($1, $2);
177							} elsif ($$vals[1] and $$vals[1] =~ /^(\d+) (\d+) (\d+) (\d+)/) {
178	0					0	($w, $h) = ($3 - $1, $4 - $2);
179							}
180	4	100				32	return $getHeight ? $h : $w;
181							}
182
183							#------------------------------------------------------------------------------
184							# Set PostScript format error warning
185							# Inputs: 0) ExifTool object reference, 1) error string
186							# Returns: 1
187							sub PSErr($$)
188							{
189	0			0	0	0	my ($et, $str) = @_;
190							# set file type if not done already
191	0					0	my $ext = $$et{FILE_EXT};
192	0	0	0			0	$et->SetFileType(($ext and $ext eq 'AI') ? 'AI' : 'PS');
193	0					0	$et->Warn("PostScript format error ($str)");
194	0					0	return 1;
195							}
196
197							#------------------------------------------------------------------------------
198							# Return input record separator to use for the specified file
199							# Inputs: 0) RAF reference
200							# Returns: Input record separator or undef on error
201							sub GetInputRecordSeparator($)
202							{
203	8			8	0	26	my $raf = shift;
204	8					32	my $pos = $raf->Tell(); # save current position
205	8					20	my ($data, $sep);
206	8	50				25	$raf->Read($data,256) or return undef;
207	8					24	my ($a, $d) = (999,999);
208	8	100				53	$a = pos($data), pos($data) = 0 if $data =~ /\x0a/g;
209	8	100				41	$d = pos($data) if $data =~ /\x0d/g;
210	8					22	my $diff = $a - $d;
211	8	50				47	if ($diff == 1) {
		50
		100
		100
212	0					0	$sep = "\x0d\x0a";
213							} elsif ($diff == -1) {
214	0					0	$sep = "\x0a\x0d";
215							} elsif ($diff > 0) {
216	3					8	$sep = "\x0d";
217							} elsif ($diff < 0) {
218	4					8	$sep = "\x0a";
219							} # else error
220	8					38	$raf->Seek($pos, 0); # restore original position
221	8					59	return $sep;
222							}
223
224							#------------------------------------------------------------------------------
225							# Split into lines ending in any CR, LF or CR+LF combination
226							# (this is annoying, and could be avoided if EPS files didn't mix linefeeds!)
227							# Inputs: 0) data pointer, 1) reference to lines array
228							# Notes: Fills @$lines with lines from splitting $$dataPt
229							sub SplitLine($$)
230							{
231	1			1	0	3	my ($dataPt, $lines) = @_;
232	1					2	for (;;) {
233	108					125	my $endl;
234							# find the position of the first LF (\x0a)
235	108	100				231	$endl = pos($$dataPt), pos($$dataPt) = 0 if $$dataPt =~ /\x0a/g;
236	108	50				203	if ($$dataPt =~ /\x0d/g) { # find the first CR (\x0d)
		0
237	108	100				140	if (defined $endl) {
238							# (remember, CR+LF is a DOS newline...)
239	107	50				157	$endl = pos($$dataPt) if pos($$dataPt) < $endl - 1;
240							} else {
241	1					2	$endl = pos($$dataPt);
242							}
243							} elsif (not defined $endl) {
244	0					0	push @$lines, $$dataPt;
245	0					0	last;
246							}
247	108	100				128	if (length $$dataPt == $endl) {
248	1					3	push @$lines, $$dataPt;
249	1					3	last;
250							} else {
251							# continue to split into separate lines
252	107					191	push @$lines, substr($$dataPt, 0, $endl);
253	107					225	$$dataPt = substr($$dataPt, $endl);
254							}
255							}
256							}
257
258							#------------------------------------------------------------------------------
259							# check to be sure we haven't read past end of PS data in DOS-style file
260							# Inputs: 0) RAF ref (with PSEnd member), 1) data ref
261							# - modifies data and sets RAF to EOF if end of PS is reached
262							sub CheckPSEnd($$)
263							{
264	0			0	0	0	my ($raf, $dataPt) = @_;
265	0					0	my $pos = $raf->Tell();
266	0	0				0	if ($pos >= $$raf{PSEnd}) {
267	0					0	$raf->Seek(0, 2); # seek to end of file so we can't read any more
268	0	0				0	$$dataPt = substr($$dataPt, 0, length($$dataPt) - $pos + $$raf{PSEnd}) if $pos > $$raf{PSEnd};
269							}
270							}
271
272							#------------------------------------------------------------------------------
273							# Read next line from EPS file
274							# Inputs: 0) RAF ref (with PSEnd member if Postscript ends before end of file)
275							# 1) array of lines from file
276							# Returns: true on success
277							sub GetNextLine($$)
278							{
279	114			114	0	175	my ($raf, $lines) = @_;
280	114					125	my ($data, $changedNL);
281	114	100				210	my $altnl = ($/ eq "\x0d") ? "\x0a" : "\x0d";
282	114					121	for (;;) {
283	114	50				236	$raf->ReadLine($data) or last;
284	114	50				198	$$raf{PSEnd} and CheckPSEnd($raf, \$data);
285							# split line if it contains other newline sequences
286	114	100				253	if ($data =~ /$altnl/) {
287	1	50	33			8	if (length($data) > 500000 and IsPC()) {
288							# patch for Windows memory problem
289	0	0				0	unless ($changedNL) {
290	0					0	$changedNL = $/;
291	0					0	$/ = $altnl;
292	0					0	$altnl = $changedNL;
293	0					0	$raf->Seek(-length($data), 1);
294	0					0	next;
295							}
296							} else {
297							# split into separate lines
298							# push @$lines, split /$altnl/, $data, -1;
299							# if (@$lines == 2 and $$lines[1] eq $/) {
300							# # handle case of DOS newline data inside file using Unix newlines
301							# $$lines[0] .= pop @$lines;
302							# }
303							# split into separate lines if necessary
304	1					5	SplitLine(\$data, $lines);
305							}
306							} else {
307	113					204	push @$lines, $data;
308							}
309	114	50				168	$/ = $changedNL if $changedNL;
310	114					323	return 1;
311							}
312	0					0	return 0;
313							}
314
315							#------------------------------------------------------------------------------
316							# Decode comment from PostScript file
317							# Inputs: 0) comment string, 1) RAF ref, 2) reference to lines array
318							# 3) optional data reference for extra lines read from file
319							# Returns: Decoded comment string (may be an array reference)
320							# - handles multi-line comments and escape sequences
321							sub DecodeComment($$$;$)
322							{
323	31			31	0	85	my ($val, $raf, $lines, $dataPt) = @_;
324	31					160	$val =~ s/\x0d\x0a$//; # remove trailing CR, LF or CR/LF
325							# check for continuation comments
326	31					53	for (;;) {
327	31	50	33			87	@$lines or GetNextLine($raf, $lines) or last;
328	31	50				81	last unless $$lines[0] =~ /^%%\+/; # is the next line a continuation?
329	0	0				0	$$dataPt .= $$lines[0] if $dataPt; # add to data if necessary
330	0					0	$$lines[0] =~ s/\x0d\x0a$//; # remove trailing CR, LF or CR/LF
331	0					0	$val .= substr(shift(@$lines), 3); # add to value (without leading "%%+")
332							}
333	31					38	my @vals;
334							# handle bracketed string values
335	31	100				71	if ($val =~ s/^$(.*)$$/$1/) { # remove brackets if necessary
336							# split into an array of strings if necessary
337	3					6	my $nesting = 1;
338	3					12	while ($val =~ /($\|$)/g) {
339	0					0	my $bra = $1;
340	0					0	my $pos = pos($val) - 2;
341	0					0	my $backslashes = 0;
342	0		0			0	while ($pos and substr($val, $pos, 1) eq '\\') {
343	0					0	--$pos;
344	0					0	++$backslashes;
345							}
346	0	0				0	next if $backslashes & 0x01; # escaped if odd number
347	0	0				0	if ($bra eq '(') {
348	0					0	++$nesting;
349							} else {
350	0					0	--$nesting;
351	0	0				0	unless ($nesting) {
352	0					0	push @vals, substr($val, 0, pos($val)-1);
353	0					0	$val = substr($val, pos($val));
354	0	0				0	++$nesting if $val =~ s/\s*\(//;
355							}
356							}
357							}
358	3					9	push @vals, $val;
359	3					5	foreach $val (@vals) {
360							# decode escape sequences in bracketed strings
361							# (similar to code in PDF.pm, but without line continuation)
362	3					8	while ($val =~ /\\(.)/sg) {
363	0					0	my $n = pos($val) - 2;
364	0					0	my $c = $1;
365	0					0	my $r;
366	0	0				0	if ($c =~ /[0-7]/) {
367							# get up to 2 more octal digits
368	0	0				0	$c .= $1 if $val =~ /\G([0-7]{1,2})/g;
369							# convert octal escape code
370	0					0	$r = chr(oct($c) & 0xff);
371							} else {
372							# convert escaped characters
373	0					0	($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
374							}
375	0					0	substr($val, $n, length($c)+1) = $r;
376							# continue search after this character
377	0					0	pos($val) = $n + length($r);
378							}
379							}
380	3	50				8	$val = @vals > 1 ? \@vals : $vals[0];
381							}
382	31					66	return $val;
383							}
384
385							#------------------------------------------------------------------------------
386							# Unescape PostScript string
387							# Inputs: 0) string
388							# Returns: unescaped string
389							sub UnescapePostScript($)
390							{
391	10			10	0	17	my $str = shift;
392							# decode escape sequences in literal strings
393	10					26	while ($str =~ /\\(.)/sg) {
394	12					19	my $n = pos($str) - 2;
395	12					16	my $c = $1;
396	12					14	my $r;
397	12	100				30	if ($c =~ /[0-7]/) {
		50
		50
398							# get up to 2 more octal digits
399	11	50				30	$c .= $1 if $str =~ /\G([0-7]{1,2})/g;
400							# convert octal escape code
401	11					17	$r = chr(oct($c) & 0xff);
402							} elsif ($c eq "\x0d") {
403							# the string is continued if the line ends with '\'
404							# (also remove "\x0d\x0a")
405	0	0				0	$c .= $1 if $str =~ /\G(\x0a)/g;
406	0					0	$r = '';
407							} elsif ($c eq "\x0a") {
408	0					0	$r = '';
409							} else {
410							# convert escaped characters
411	1					4	($r = $c) =~ tr/nrtbf/\n\r\t\b\f/;
412							}
413	12					24	substr($str, $n, length($c)+1) = $r;
414							# continue search after this character
415	12					37	pos($str) = $n + length($r);
416							}
417	10					22	return $str;
418							}
419
420							#------------------------------------------------------------------------------
421							# Extract information from EPS, PS or AI file
422							# Inputs: 0) ExifTool object reference, 1) dirInfo reference, 2) optional tag table ref
423							# Returns: 1 if this was a valid PostScript file
424							sub ProcessPS($$;$)
425							{
426	4			4	0	11	my ($et, $dirInfo, $tagTablePtr) = @_;
427	4					8	my $raf = $$dirInfo{RAF};
428	4					13	my $embedded = $et->Options('ExtractEmbedded');
429	4					9	my ($data, $dos, $endDoc, $fontTable, $comment);
430
431							# allow read from data
432	4	50				10	unless ($raf) {
433	0					0	$raf = new File::RandomAccess($$dirInfo{DataPt});
434	0					0	$et->VerboseDir('PostScript');
435							}
436							#
437							# determine if this is a postscript file
438							#
439	4	50				18	$raf->Read($data, 4) == 4 or return 0;
440							# accept either ASCII or DOS binary postscript file format
441	4	50				24	return 0 unless $data =~ /^(%!PS\|%!Ad\|%!Fo\|\xc5\xd0\xd3\xc6)/;
442	4	50				19	if ($data =~ /^%!Ad/) {
		50
443							# I've seen PS files start with "%!Adobe-PS"...
444	0	0	0			0	return 0 unless $raf->Read($data, 6) == 6 and $data eq "obe-PS";
445							} elsif ($data =~ /^\xc5\xd0\xd3\xc6/) {
446							# process DOS binary file header
447							# - save DOS header then seek ahead and check PS header
448	0	0				0	$raf->Read($dos, 26) == 26 or return 0;
449	0					0	SetByteOrder('II');
450	0					0	my $psStart = Get32u(\$dos, 0);
451	0	0	0			0	unless ($raf->Seek($psStart, 0) and
			0
452							$raf->Read($data, 4) == 4 and $data eq '%!PS')
453							{
454	0					0	return PSErr($et, 'invalid header');
455							}
456	0					0	$$raf{PSEnd} = $psStart + Get32u(\$dos, 4); # set end of PostScript data in RAF
457							} else {
458							# check for PostScript font file (PFA or PFB)
459	4					5	my $d2;
460	4	50				12	$data .= $d2 if $raf->Read($d2,12);
461	4	100				29	if ($data =~ /^%!(PS-(AdobeFont-\|Bitstream )\|FontType1-)/) {
462	2					9	$et->SetFileType('PFA'); # PostScript ASCII font file
463	2					5	$fontTable = GetTagTable('Image::ExifTool::Font::PSInfo');
464							# PostScript font files may contain an unformatted comments which may
465							# contain useful information, so accumulate these for the Comment tag
466	2					3	$comment = 1;
467							}
468	4					16	$raf->Seek(-length($data), 1);
469							}
470							#
471							# set the newline type based on the first newline found in the file
472							#
473	4					19	local $/ = GetInputRecordSeparator($raf);
474	4	50				14	$/ or return PSErr($et, 'invalid PS data');
475
476							# set file type (PostScript or EPS)
477	4	50				14	$raf->ReadLine($data) or $data = '';
478	4					8	my $type;
479	4	100				17	if ($data =~ /EPSF/) {
480	2					3	$type = 'EPS';
481							} else {
482							# read next line to see if this is an Illustrator file
483	2					3	my $line2;
484	2					7	my $pos = $raf->Tell();
485	2	50	33			6	if ($raf->ReadLine($line2) and $line2 =~ /^%%Creator: Adobe Illustrator/) {
486	0					0	$type = 'AI';
487							} else {
488	2					4	$type = 'PS';
489							}
490	2					7	$raf->Seek($pos, 0);
491							}
492	4					19	$et->SetFileType($type);
493	4	50	33			16	return 1 if $$et{OPTIONS}{FastScan} and $$et{OPTIONS}{FastScan} == 3;
494							#
495							# extract TIFF information from DOS header
496							#
497	4	50				15	$tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::PostScript::Main');
498	4	50				15	if ($dos) {
499	0					0	my $base = Get32u(\$dos, 16);
500	0	0				0	if ($base) {
501	0					0	my $pos = $raf->Tell();
502							# extract the TIFF preview
503	0					0	my $len = Get32u(\$dos, 20);
504	0					0	my $val = $et->ExtractBinary($base, $len, 'TIFFPreview');
505	0	0	0			0	if (defined $val and $val =~ /^(MM\0\x2a\|II\x2a\0\|Binary)/) {
506	0					0	$et->HandleTag($tagTablePtr, 'TIFFPreview', $val);
507							} else {
508	0					0	$et->Warn('Bad TIFF preview image');
509							}
510							# extract information from TIFF in DOS header
511							# (set Parent to '' to avoid setting FileType tag again)
512	0					0	my %dirInfo = (
513							Parent => '',
514							RAF => $raf,
515							Base => $base,
516							);
517	0	0				0	$et->ProcessTIFF(\%dirInfo) or $et->Warn('Bad embedded TIFF');
518							# position file pointer to extract PS information
519	0					0	$raf->Seek($pos, 0);
520							}
521							}
522							#
523							# parse the postscript
524							#
525	4					16	my ($buff, $mode, $beginToken, $endToken, $docNum, $subDocNum, $changedNL);
526	4					0	my (@lines, $altnl);
527	4	100				13	if ($/ eq "\x0d") {
528	2					2	$altnl = "\x0a";
529							} else {
530	2					6	$/ = "\x0a"; # end on any LF (even if DOS CR+LF)
531	2					4	$altnl = "\x0d";
532							}
533	4					14	for (;;) {
534	725	100				936	if (@lines) {
535	243					261	$data = shift @lines;
536							} else {
537	482	100				1141	$raf->ReadLine($data) or last;
538							# check for alternate newlines as efficiently as possible
539	478	100				1029	if ($data =~ /$altnl/) {
540	2	50	33			10	if (length($data) > 500000 and IsPC()) {
541							# Windows can't split very long lines due to poor memory handling,
542							# so re-read the file with the other newline character instead
543							# (slower but uses less memory)
544	0	0				0	unless ($changedNL) {
545	0					0	$changedNL = 1;
546	0					0	my $t = $/;
547	0					0	$/ = $altnl;
548	0					0	$altnl = $t;
549	0					0	$raf->Seek(-length($data), 1);
550	0					0	next;
551							}
552							} else {
553							# split into separate lines
554	2					59	@lines = split /$altnl/, $data, -1;
555	2					5	$data = shift @lines;
556	2	50	33			7	if (@lines == 1 and $lines[0] eq $/) {
557							# handle case of DOS newline data inside file using Unix newlines
558	0					0	$data .= $lines[0];
559	0					0	undef @lines;
560							}
561							}
562							}
563							}
564	721					813	undef $changedNL;
565	721	100	100			2283	if ($mode) {
		100	66
		100	66
		50
		100
		50
		100
566	306	50				672	if (not $endToken) {
		100
		100
567	0					0	$buff .= $data;
568	0	0				0	next unless $data =~ m{<\?xpacket end=.(w\|r).\?>(\n\|\r\|$)};
569							} elsif ($data !~ /^$endToken/i) {
570	298	100				457	if ($mode eq 'XMP') {
		100
571	216					271	$buff .= $data;
572							} elsif ($mode eq 'Document') {
573							# ignore embedded documents, but keep track of nesting level
574	29	100				68	$docNum .= '-1' if $data =~ /^$beginToken/;
575							} else {
576							# data is ASCII-hex encoded
577	53					65	$data =~ tr/0-9A-Fa-f//dc; # remove all but hex characters
578	53					122	$buff .= pack('H*', $data); # translate from hex
579							}
580	298					390	next;
581							} elsif ($mode eq 'Document') {
582	4					14	$docNum =~ s/-?\d+$//; # decrement document nesting level
583							# done with Document mode if we are back at the top level
584	4	100				10	undef $mode unless $docNum;
585	4					5	next;
586							}
587							} elsif ($endDoc and $data =~ /^$endDoc/i) {
588	4					14	$docNum =~ s/-?(\d+)$//; # decrement nesting level
589	4					9	$subDocNum = $1; # remember our last sub-document number
590	4					7	$$et{DOC_NUM} = $docNum;
591	4	100				19	undef $endDoc unless $docNum; # done with document if top level
592	4					6	next;
593							} elsif ($data =~ /^(%{1,2})(Begin)(_xml_packet\|Photoshop\|ICCProfile\|Document\|Binary)/i) {
594							# the beginning of a data block
595	13					63	my %modeLookup = (
596							_xml_packet => 'XMP',
597							photoshop => 'Photoshop',
598							iccprofile => 'ICC_Profile',
599							document => 'Document',
600							binary => undef, # (we will try to skip this)
601							);
602	13					35	$mode = $modeLookup{lc $3};
603	13	100				26	unless ($mode) {
604	2	50	33			15	if (not @lines and $data =~ /^%{1,2}BeginBinary:\s*(\d+)/i) {
605	2	50				8	$raf->Seek($1, 1) or last; # skip binary data
606							}
607	2					6	next;
608							}
609	11					16	$buff = '';
610	11					29	$beginToken = $1 . $2 . $3;
611	11	100				34	$endToken = $1 . ($2 eq 'begin' ? 'end' : 'End') . $3;
612	11	100				23	if ($mode eq 'Document') {
613							# this is either the 1st sub-document or Nth document
614	7	100				12	if ($docNum) {
615							# increase nesting level
616	1					3	$docNum .= '-' . (++$subDocNum);
617							} else {
618							# this is the Nth document
619	6					11	$docNum = $$et{DOC_COUNT} + 1;
620							}
621	7					8	$subDocNum = 0; # new level, so reset subDocNum
622	7	100				17	next unless $embedded; # skip over this document
623							# set document number for family 4-7 group names
624	4					7	$$et{DOC_NUM} = $docNum;
625	4					8	$$et{LIST_TAGS} = { }; # don't build lists across different documents
626	4					8	$$et{PROCESSED} = { }; # re-initialize processed directory lookup too
627	4					6	$endDoc = $endToken; # parse to EndDocument token
628							# reset mode to allow parsing into sub-directories
629	4					5	undef $endToken;
630	4					6	undef $mode;
631							# save document name if available
632	4	50				42	if ($data =~ /^$beginToken:\s+([^\n\r]+)/i) {
633	4					10	my $docName = $1;
634							# remove brackets if necessary
635	4	50				14	$docName = $1 if $docName =~ /^$(.*)$$/;
636	4					14	$et->HandleTag($tagTablePtr, 'EmbeddedFileName', $docName);
637							}
638							}
639	8					21	next;
640							} elsif ($data =~ /^<\?xpacket begin=.{7,13}W5M0MpCehiHzreSzNTczkc9d/) {
641							# pick up any stray XMP data
642	0					0	$mode = 'XMP';
643	0					0	$buff = $data;
644	0					0	undef $endToken; # no end token (just look for xpacket end)
645							# XMP could be contained in a single line (if newlines are different)
646	0	0				0	next unless $data =~ m{<\?xpacket end=.(w\|r).\?>(\n\|\r\|$)};
647							} elsif ($data =~ /^%%?(\w+): ?(.*)/s and $$tagTablePtr{$1}) {
648	29					85	my ($tag, $val) = ($1, $2);
649							# only allow 'ImageData' and AI tags to have single leading '%'
650	29	50	66			105	next unless $data =~ /^%(%\|AI\d+_)/ or $tag eq 'ImageData';
651							# decode comment string (reading continuation lines if necessary)
652	29					64	$val = DecodeComment($val, $raf, \@lines);
653	29					90	$et->HandleTag($tagTablePtr, $tag, $val);
654	29					42	next;
655							} elsif ($embedded and $data =~ /^%AI12_CompressedData/) {
656							# the rest of the file is compressed
657	0	0				0	unless (eval { require Compress::Zlib }) {
	0					0
658	0					0	$et->Warn('Install Compress::Zlib to extract compressed embedded data');
659	0					0	last;
660							}
661							# seek back to find the start of the compressed data in the file
662	0					0	my $tlen = length($data) + @lines;
663	0					0	$tlen += length $_ foreach @lines;
664	0					0	my $backTo = $raf->Tell() - $tlen - 64;
665	0	0				0	$backTo = 0 if $backTo < 0;
666	0	0	0			0	last unless $raf->Seek($backTo, 0) and $raf->Read($data, 2048);
667	0	0				0	last unless $data =~ s/.*?%AI12_CompressedData//;
668	0					0	my $inflate = Compress::Zlib::inflateInit();
669	0	0				0	$inflate or $et->Warn('Error initializing inflate'), last;
670							# generate a PS-like file in memory from the compressed data
671	0					0	my $verbose = $et->Options('Verbose');
672	0	0				0	if ($verbose > 1) {
673	0					0	$et->VerboseDir('AI12_CompressedData (first 4kB)');
674	0					0	$et->VerboseDump(\$data);
675							}
676							# remove header if it exists (Windows AI files only)
677	0					0	$data =~ s/^.{0,256}EndData[\x0d\x0a]+//s;
678	0					0	my $val;
679	0					0	for (;;) {
680	0					0	my ($v2, $stat) = $inflate->inflate($data);
681	0	0				0	$stat == Compress::Zlib::Z_STREAM_END() and $val .= $v2, last;
682	0	0				0	$stat != Compress::Zlib::Z_OK() and undef($val), last;
683	0	0				0	if (defined $val) {
		0
684	0					0	$val .= $v2;
685							} elsif ($v2 =~ /^%!PS/) {
686	0					0	$val = $v2;
687							} else {
688							# add postscript header (for file recognition) if it doesn't exist
689	0					0	$val = "%!PS-Adobe-3.0$/" . $v2;
690							}
691	0	0				0	$raf->Read($data, 65536) or last;
692							}
693	0	0				0	defined $val or $et->Warn('Error inflating AI compressed data'), last;
694	0	0				0	if ($verbose > 1) {
695	0					0	$et->VerboseDir('Uncompressed AI12 Data');
696	0					0	$et->VerboseDump(\$val);
697							}
698							# extract information from embedded images in the uncompressed data
699							$val = # add PS header in case it needs one
700	0					0	ProcessPS($et, { DataPt => \$val });
701	0					0	last;
702							} elsif ($fontTable) {
703	58	100				86	if (defined $comment) {
704							# extract initial comments from PostScript Font files
705	10	100				43	if ($data =~ /^%\s+(.*?)[\x0d\x0a]/) {
		100
706	4	50				11	$comment .= "\n" if $comment;
707	4					10	$comment .= $1;
708	4					6	next;
709							} elsif ($data !~ /^%/) {
710							# stop extracting comments at the first non-comment line
711	2	50				9	$et->FoundTag('Comment', $comment) if length $comment;
712	2					3	undef $comment;
713							}
714							}
715	54	100	100			242	if ($data =~ m{^\s/(\w+)\s(.*)} and $$fontTable{$1}) {
		100
716	24					83	my ($tag, $val) = ($1, $2);
717	24	100				68	if ($val =~ /^$(.*)$/) {
		50
718	10					26	$val = UnescapePostScript($1);
719							} elsif ($val =~ m{/?(\S+)}) {
720	14					23	$val = $1;
721							}
722	24					55	$et->HandleTag($fontTable, $tag, $val);
723							} elsif ($data =~ /^currentdict end/) {
724							# only extract tags from initial FontInfo dict
725	2					3	undef $fontTable;
726							}
727	54					69	next;
728							} else {
729	311					361	next;
730							}
731							# extract information from buffered data
732	4					21	my %dirInfo = (
733							DataPt => \$buff,
734							DataLen => length $buff,
735							DirStart => 0,
736							DirLen => length $buff,
737							Parent => 'PostScript',
738							);
739	4					17	my $subTablePtr = GetTagTable("Image::ExifTool::${mode}::Main");
740	4	50				16	unless ($et->ProcessDirectory(\%dirInfo, $subTablePtr)) {
741	0					0	$et->Warn("Error processing $mode information in PostScript file");
742							}
743	4					9	undef $buff;
744	4					8	undef $mode;
745							}
746	4	50	33			20	$mode = 'Document' if $endDoc and not $mode;
747	4	50				9	$mode and PSErr($et, "unterminated $mode data");
748	4					23	return 1;
749							}
750
751							#------------------------------------------------------------------------------
752							# Extract information from EPS file
753							# Inputs: 0) ExifTool object reference, 1) dirInfo reference
754							# Returns: 1 if this was a valid PostScript file
755							sub ProcessEPS($$)
756							{
757	2			2	0	8	return ProcessPS($_[0],$_[1]);
758							}
759
760							1; # end
761
762
763							__END__