File Coverage

blib/lib/Image/ExifTool/WritePDF.pl
Criterion Covered Total %
statement 278 413 67.3
branch 171 326 52.4
condition 56 101 55.4
subroutine 7 9 77.7
pod 0 7 0.0
total 512 856 59.8


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: WritePDF.pl
3             #
4             # Description: Write PDF meta information
5             #
6             # Revisions: 12/08/2007 - P. Harvey Created
7             #
8             # References: 1) http://partners.adobe.com/public/developer/pdf/index_reference.html
9             #
10             # Notes: The special "PDF-update" group can be deleted to revert exiftool updates
11             #------------------------------------------------------------------------------
12             package Image::ExifTool::PDF;
13              
14 19     19   136 use strict;
  19         48  
  19         970  
15 19     19   115 use vars qw($lastFetched);
  19         39  
  19         122988  
16              
17             sub WriteObject($$);
18             sub EncodeString($);
19             sub CryptObject($);
20              
21             # comments to mark beginning and end of ExifTool incremental update
22             my $beginComment = '%BeginExifToolUpdate';
23             my $endComment = '%EndExifToolUpdate ';
24              
25             my $keyExt; # crypt key extension
26             my $pdfVer; # version of PDF file we are writing (highest Version in Root dictionaries)
27              
28             # internal tags used in dictionary objects
29             my %myDictTags = (
30             _tags => 1, _stream => 1, _decrypted => 1, _needCrypt => 1,
31             _filtered => 1, _entry_size => 1, _table => 1,
32             );
33              
34             # map for directories that we can add
35             my %pdfMap = (
36             XMP => 'PDF',
37             );
38              
39             #------------------------------------------------------------------------------
40             # Validate raw PDF values for writing (string date integer real boolean name)
41             # Inputs: 0) ExifTool object ref, 1) tagInfo hash ref, 2) raw value ref
42             # Returns: error string or undef (and possibly changes value) on success
43             sub CheckPDF($$$)
44             {
45 101     101 0 326 my ($et, $tagInfo, $valPtr) = @_;
46 101   66     677 my $format = $$tagInfo{Writable} || $tagInfo->{Table}->{WRITABLE};
47 101 50       515 if (not $format) {
    100          
    50          
    0          
    0          
    0          
    0          
48 0         0 return 'No writable format';
49             } elsif ($format eq 'string') {
50             # (encode later because list-type string tags need to be encoded as a unit)
51             } elsif ($format eq 'date') {
52             # be flexible about this for now
53 31 50       258 return 'Bad date format' unless $$valPtr =~ /^\d{4}/;
54             } elsif ($format eq 'integer') {
55 0 0       0 return 'Not an integer' unless Image::ExifTool::IsInt($$valPtr);
56             } elsif ($format eq 'real') {
57 0 0       0 return 'Not a real number' unless $$valPtr =~ /^[+-]?(?=\d|\.\d)\d*(\.\d*)?$/;
58             } elsif ($format eq 'boolean') {
59 0 0 0     0 $$valPtr = ($$valPtr and $$valPtr !~ /^f/i) ? 'true' : 'false';
60             } elsif ($format eq 'name') {
61 0 0       0 return 'Invalid PDF name' if $$valPtr =~ /\0/;
62             } else {
63 0         0 return "Invalid PDF format '${format}'";
64             }
65 101         481 return undef; # value is OK
66             }
67              
68             #------------------------------------------------------------------------------
69             # Format value for writing to PDF file
70             # Inputs: 0) ExifTool ref, 1) value, 2) format string (string,date,integer,real,boolean,name)
71             # Returns: formatted value or undef on error
72             # Notes: Called at write time, so $pdfVer may be checked
73             sub WritePDFValue($$$)
74             {
75 13     13 0 41 my ($et, $val, $format) = @_;
76 13 50       51 if (not $format) {
    100          
    50          
    0          
    0          
77 0         0 return undef;
78             } elsif ($format eq 'string') {
79             # encode as UCS2 if it contains any special characters
80 11 50       45 $val = "\xfe\xff" . $et->Encode($val,'UCS2','MM') if $val =~ /[\x80-\xff]/;
81 11         49 EncodeString(\$val);
82             } elsif ($format eq 'date') {
83             # convert date to "D:YYYYmmddHHMMSS+-HH'MM'" format
84 2         15 $val =~ s/(:\d{2})\.\d*/$1/; # remove sub-seconds
85 2         25 $val =~ s/([-+]\d{2}):(\d{2})/${1}'${2}'/; # change timezone delimiters if necessary
86 2         8 $val =~ tr/ ://d; # remove spaces and colons
87 2         6 $val = "D:$val"; # add leading "D:"
88 2         7 EncodeString(\$val);
89             } elsif ($format =~ /^(integer|real|boolean)$/) {
90             # no reformatting necessary
91             } elsif ($format eq 'name') {
92 0 0       0 return undef if $val =~ /\0/;
93 0 0       0 if ($pdfVer >= 1.2) {
94 0         0 $val =~ s/([\t\n\f\r ()<>[\]{}\/%#])/sprintf('#%.2x',ord $1)/sge;
  0         0  
95             } else {
96 0 0       0 return undef if $val =~ /[\t\n\f\r ()<>[\]{}\/%]/;
97             }
98 0         0 $val = "/$val"; # add leading '/'
99             } else {
100 0         0 return undef;
101             }
102 13         41 return $val;
103             }
104              
105             #------------------------------------------------------------------------------
106             # Encode PDF string
107             # Inputs: 0) reference to PDF string
108             # Returns: (updates string with encoded data)
109             sub EncodeString($)
110             {
111 13     13 0 28 my $strPt = shift;
112 13 50       34 if (ref $$strPt eq 'ARRAY') {
113 0         0 my $str;
114 0         0 foreach $str (@{$$strPt}) {
  0         0  
115 0         0 EncodeString(\$str);
116             }
117 0         0 return;
118             }
119 13         87 Crypt($strPt, $keyExt, 1); # encrypt if necessary
120             # encode as hex if we have any control characters (except tab)
121 13 50       43 if ($$strPt=~/[\x00-\x08\x0a-\x1f\x7f\xff]/) {
122             # encode as hex
123 0         0 my $str='';
124 0         0 my $len = length $$strPt;
125 0         0 my $i = 0;
126 0         0 for (;;) {
127 0 0       0 my $n = $len - $i or last;
128 0 0       0 $n = 40 if $n > 40; # break into reasonable-length lines
129 0 0       0 $str .= $/ if $i;
130 0         0 $str .= unpack('H*', substr($$strPt, $i, $n));
131 0         0 $i += $n;
132             }
133 0         0 $$strPt = "<$str>";
134             } else {
135 13         35 $$strPt =~ s/([()\\])/\\$1/g; # must escape round brackets and backslashes
136 13         36 $$strPt = "($$strPt)";
137             }
138             }
139              
140             #------------------------------------------------------------------------------
141             # Encrypt an object
142             # Inputs: 0) PDF object (encrypts in place)
143             # Notes: Encrypts according to "_needCrypt" dictionary entry,
144             # then deletes "_needCrypt" when done
145             sub CryptObject($)
146             {
147 0     0 0 0 my $obj = $_[0];
148 0 0       0 if (not ref $obj) {
    0          
    0          
149             # only literal strings and hex strings are encrypted
150 0 0       0 if ($obj =~ /^[(<]/) {
151 0         0 undef $lastFetched; # (reset this just in case)
152 0         0 my $val = ReadPDFValue($obj);
153 0         0 EncodeString(\$val);
154 0         0 $_[0] = $val;
155             }
156             } elsif (ref $obj eq 'HASH') {
157 0         0 my $tag;
158 0         0 my $needCrypt = $$obj{_needCrypt};
159 0         0 foreach $tag (keys %$obj) {
160 0 0       0 next if $myDictTags{$tag};
161             # re-encrypt necessary objects only (others are still encrypted)
162             # (this is really annoying, but is necessary because objects stored
163             # in encrypted streams are decrypted when extracting, but strings stored
164             # as direct objects are decrypted later since they must be decoded
165             # before being decrypted)
166 0 0       0 if ($needCrypt) {
167 0 0       0 next unless defined $$needCrypt{$tag} ? $$needCrypt{$tag} : $$needCrypt{'*'};
    0          
168             }
169 0         0 CryptObject($$obj{$tag});
170             }
171 0         0 delete $$obj{_needCrypt}; # avoid re-re-crypting
172             } elsif (ref $obj eq 'ARRAY') {
173 0         0 my $val;
174 0         0 foreach $val (@$obj) {
175 0         0 CryptObject($val);
176             }
177             }
178             }
179              
180             #------------------------------------------------------------------------------
181             # Get free entries from xref stream dictionary that we wrote previously
182             # Inputs: 0) xref dictionary reference
183             # Returns: free entry hash (keys are object numbers, values are xref entry list refs)
184             sub GetFreeEntries($)
185             {
186 0     0 0 0 my $dict = shift;
187 0         0 my %xrefFree;
188             # we write xref stream entries in 'CNn' or 'CNNn' format (with 8-byte 'NN' offset),
189 0         0 my $w = $$dict{W};
190 0 0       0 if (ref $w eq 'ARRAY') {
191 0         0 my $bytes = "@$w";
192 0         0 my $fmt;
193 0 0       0 if ($bytes eq '1 4 2') {
    0          
194 0         0 $fmt = 'CNn';
195             } elsif ($bytes eq '1 8 2') {
196 0         0 $fmt = 'CNNn';
197             } else {
198 0         0 return \%xrefFree;
199             }
200 0         0 my $size = $$dict{_entry_size}; # this will be 7 for 'CNn' or 11 for 'CNNn'
201 0         0 my $index = $$dict{Index};
202 0         0 my $len = length $$dict{_stream};
203             # scan the table for free objects
204 0         0 my $num = scalar(@$index) / 2;
205 0         0 my $pos = 0;
206 0         0 my ($i, $j);
207 0         0 for ($i=0; $i<$num; ++$i) {
208 0         0 my $start = $$index[$i*2];
209 0         0 my $count = $$index[$i*2+1];
210 0         0 for ($j=0; $j<$count; ++$j) {
211 0 0       0 last if $pos + $size > $len;
212 0         0 my @t = unpack("x$pos $fmt", $$dict{_stream});
213 0 0       0 if (@t == 4) {
214 0         0 $t[1] = $t[1] * 4294967296 + $t[2];
215 0         0 $t[2] = $t[3];
216 0         0 @t = 3;
217             }
218             # add entry if object was free
219 0 0       0 $xrefFree{$start+$j} = [ $t[1], $t[2], 'f' ] if $t[0] == 0;
220 0         0 $pos += $size; # step to next entry
221             }
222             }
223             }
224 0         0 return \%xrefFree;
225             }
226              
227             #------------------------------------------------------------------------------
228             # Write PDF object
229             # Inputs: 0) output file or scalar ref, 1) PDF object
230             # Returns: true on success
231             # Notes: inserts white space before object, but none afterward
232             sub WriteObject($$)
233             {
234 208     208 0 403 my ($outfile, $obj) = @_;
235 208 100       703 if (ref $obj eq 'SCALAR') {
    100          
    100          
236 43 50       109 Write($outfile, ' ', $$obj) or return 0;
237             } elsif (ref $obj eq 'ARRAY') {
238             # write array
239 12 50       64 Write($outfile, @$obj > 10 ? $/ : ' ', '[') or return 0;
    50          
240 12         29 my $item;
241 12         31 foreach $item (@$obj) {
242 22 50       49 WriteObject($outfile, $item) or return 0;
243             }
244 12 50       71 Write($outfile, ' ]') or return 0;
245             } elsif (ref $obj eq 'HASH') {
246             # write dictionary
247 47         72 my $tag;
248 47 50       109 Write($outfile, $/, '<<') or return 0;
249             # prepare object as required if it has a stream
250 47 100       155 if ($$obj{_stream}) {
251             # encrypt stream if necessary (must be done before determining Length)
252 9 50       66 CryptStream($obj, $keyExt) if $$obj{_decrypted};
253             # write "Length" entry in dictionary
254 9         32 $$obj{Length} = length $$obj{_stream};
255 9         21 push @{$$obj{_tags}}, 'Length';
  9         36  
256             # delete Filter-related entries since we don't yet write filtered streams
257 9         25 delete $$obj{Filter};
258 9         15 delete $$obj{DecodeParms};
259 9         18 delete $$obj{DL};
260             }
261             # don't write my internal entries
262 47         343 my %wrote = %myDictTags;
263             # write tags in original order, adding new ones later alphabetically
264 47         93 foreach $tag (@{$$obj{_tags}}, sort keys %$obj) {
  47         263  
265             # ignore already-written or missing entries
266 339 100 100     1034 next if $wrote{$tag} or not defined $$obj{$tag};
267 139 50       379 Write($outfile, $/, "/$tag") or return 0;
268 139 50       339 WriteObject($outfile, $$obj{$tag}) or return 0;
269 139         331 $wrote{$tag} = 1;
270             }
271 47 50       125 Write($outfile, $/, '>>') or return 0;
272 47 100       196 if ($$obj{_stream}) {
273             # write object stream
274             # (a single 0x0d may not follow 'stream', so use 0x0d+0x0a here to be sure)
275 9 50       25 Write($outfile, $/, "stream\x0d\x0a") or return 0;
276 9 50       31 Write($outfile, $$obj{_stream}, $/, 'endstream') or return 0;
277             }
278             } else {
279             # write string, number, name or object reference
280 106         215 Write($outfile, ' ', $obj);
281             }
282 208         547 return 1;
283             }
284              
285             #------------------------------------------------------------------------------
286             # Write PDF File
287             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
288             # Returns: 1 on success, 0 if not valid PDF file, -1 on write error
289             # Notes: dictionary structure: Main --+--> Info
290             # +--> Root --> Metadata
291             sub WritePDF($$)
292             {
293 19     19 0 55 my ($et, $dirInfo) = @_;
294 19         59 my $raf = $$dirInfo{RAF};
295 19         46 my $outfile = $$dirInfo{OutFile};
296 19         67 my ($buff, %capture, %newXRef, %newObj, $objRef);
297 19         0 my ($out, $id, $gen, $obj);
298              
299             # make sure this is a PDF file
300 19         89 my $pos = $raf->Tell();
301 19 50       89 $raf->Read($buff, 1024) >= 8 or return 0;
302 19 50       227 $buff =~ /^(\s*)%PDF-(\d+\.\d+)/ or return 0;
303 19         97 $$et{PDFBase} = length $1;
304 19         81 $raf->Seek($pos, 0);
305              
306             # create a new ExifTool object and use it to read PDF and XMP information
307 19         157 my $newTool = Image::ExifTool->new;
308 19         92 $newTool->Options(List => 1);
309 19         107 $newTool->Options(Password => $et->Options('Password'));
310 19         96 $newTool->Options(NoPDFList => $et->Options('NoPDFList'));
311 19         98 $$newTool{PDF_CAPTURE} = \%capture;
312 19         88 my $info = $newTool->ImageInfo($raf, 'XMP', 'PDF:*', 'Error', 'Warning');
313             # not a valid PDF file unless we got a version number
314 19         107 $pdfVer = $$newTool{PDFVersion};
315 19 50       63 $pdfVer or $et->Error('Missing PDF:PDFVersion'), return 0;
316             # check version number
317 19 50       64 if ($pdfVer > 2.0) {
318 0 0       0 $et->Error("Writing PDF $pdfVer is untested", 1) and return 0;
319             }
320             # fail if we had any serious errors while extracting information
321 19 50 33     164 if ($capture{Error} or $$info{Error}) {
322 0   0     0 $et->Error($capture{Error} || $$info{Error});
323 0         0 return 1;
324             }
325             # make sure we have everything we need to rewrite this file
326 19         67 foreach $obj (qw(Main Root xref)) {
327 57 50       186 next if $capture{$obj};
328             # any warning we received may give a clue about why this object is missing
329 0 0       0 $et->Error($$info{Warning}) if $$info{Warning};
330 0         0 $et->Error("Can't find $obj object");
331 0         0 return 1;
332             }
333 19         262 $et->InitWriteDirs(\%pdfMap, 'XMP');
334              
335             # copy file up to start of previous exiftool update or end of file
336             # (comment, startxref & EOF with 11-digit offsets and 2-byte newlines is 63 bytes)
337 19 50 33     139 $raf->Seek(-64,2) and $raf->Read($buff,64) and $raf->Seek(0,0) or return -1;
      33        
338 19         47 my $rtn = 1;
339 19         41 my $prevUpdate;
340             # (now $endComment is before "startxref", but pre-7.41 we wrote it after the EOF)
341 19 100       400 if ($buff =~ /$endComment(\d+)\s+(startxref\s+\d+\s+%%EOF\s+)?$/s) {
    50          
342 16         87 $prevUpdate = $1;
343             # rewrite the file up to the original EOF
344 16 50       135 Image::ExifTool::CopyBlock($raf, $outfile, $prevUpdate + $$et{PDFBase}) or $rtn = -1;
345             # verify that we are now at the start of an ExifTool update
346 16 50 33     1162 unless ($raf->Read($buff, length $beginComment) and $buff eq $beginComment) {
347 0         0 $et->Error('Previous ExifTool update is corrupted');
348 0         0 return $rtn;
349             }
350 16 50       72 $raf->Seek($prevUpdate+$$et{PDFBase}, 0) or $rtn = -1;
351 16 100       74 if ($$et{DEL_GROUP}{'PDF-update'}) {
352 2         14 $et->VPrint(0, " Reverted previous ExifTool updates\n");
353 2         5 ++$$et{CHANGED};
354 2         39 return $rtn;
355             }
356             } elsif ($$et{DEL_GROUP}{'PDF-update'}) {
357 0         0 $et->Error('File contains no previous ExifTool update');
358 0         0 return $rtn;
359             } else {
360             # rewrite the whole file
361 3         17 while ($raf->Read($buff, 65536)) {
362 3 50       22 Write($outfile, $buff) or $rtn = -1;
363             }
364             }
365 17 50       113 $out = $et->Options('TextOut') if $et->Options('Verbose');
366             #
367             # create our new PDF objects to write
368             #
369 17         49 my $xref = $capture{xref};
370 17         39 my $mainDict = $capture{Main};
371 17         51 my $metaRef = $capture{Root}->{Metadata};
372 17         39 my $nextObject;
373              
374             # start by finding reference for info object in case it was deleted
375             # in a previous edit so we can re-use it here if adding PDF Info
376             my $prevInfoRef;
377 17 100       70 if ($prevUpdate) {
378 14 50       55 unless ($capture{Prev}) {
379 0         0 $et->Error("Can't locate trailer dictionary prior to last edit");
380 0         0 return $rtn;
381             }
382 14         38 $prevInfoRef = $capture{Prev}->{Info};
383             # start from previous size so the xref table doesn't continue
384             # to grow if we repeatedly add and delete the Metadata object
385 14         43 $nextObject = $capture{Prev}->{Size};
386             # don't re-use Meta reference if object was added in a previous update
387 14 100 66     151 undef $metaRef if $metaRef and $$metaRef=~/^(\d+)/ and $1 >= $nextObject;
      100        
388             } else {
389 3         9 $prevInfoRef = $$mainDict{Info};
390 3         13 $nextObject = $$mainDict{Size};
391             }
392              
393             # delete entire PDF group if specified
394 17         42 my $infoChanged = 0;
395 17 100 100     78 if ($$et{DEL_GROUP}{PDF} and $capture{Info}) {
396 4         50 delete $capture{Info};
397 4         34 $info = { XMP => $$info{XMP} }; # remove extracted PDF tags
398 4 50       15 print $out " Deleting PDF Info dictionary\n" if $out;
399 4         41 ++$infoChanged;
400             }
401              
402             # create new Info dictionary if necessary
403 17 100       95 $capture{Info} = { _tags => [ ] } unless $capture{Info};
404 17         35 my $infoDict = $capture{Info};
405              
406             # must pre-determine Info reference to be used in encryption
407 17   100     88 my $infoRef = $prevInfoRef || \ "$nextObject 0 R";
408 17 50       59 unless (ref $infoRef eq 'SCALAR') {
409 0         0 $et->Error("Info dictionary is not an indirect object");
410 0         0 return $rtn;
411             }
412 17         44 $keyExt = $$infoRef;
413              
414             # must encrypt all values in dictionary if they came from an encrypted stream
415 17 50       58 CryptObject($infoDict) if $$infoDict{_needCrypt};
416              
417             # must set line separator before calling WritePDFValue()
418 17         129 local $/ = $capture{newline};
419              
420             # rewrite PDF Info tags
421 17         117 my $newTags = $et->GetNewTagInfoHash(\%Image::ExifTool::PDF::Info);
422 17         33 my $tagID;
423 17         81 foreach $tagID (sort keys %$newTags) {
424 11         34 my $tagInfo = $$newTags{$tagID};
425 11 50 33     40 if ($pdfVer >= 2.0 and not $$tagInfo{PDF2}) {
426 0 0       0 next if $et->Warn("Writing PDF:$$tagInfo{Name} is deprecated for PDF 2.0 documents",2);
427             }
428 11         55 my $nvHash = $et->GetNewValueHash($tagInfo);
429 11         21 my (@vals, $deleted);
430 11         61 my $tag = $$tagInfo{Name};
431 11         31 my $val = $$info{$tag};
432 11         25 my $tagKey = $tag;
433 11 100       35 unless (defined $val) {
434             # must check for tag key with copy number
435 9         243 ($tagKey) = grep /^$tag/, keys %$info;
436 9 100       41 $val = $$info{$tagKey} if $tagKey;
437             }
438 11 100       41 if (defined $val) {
    50          
439 7         19 my @oldVals;
440 7 100       23 if (ref $val eq 'ARRAY') {
441 3         17 @oldVals = @$val;
442 3         8 $val = shift @oldVals;
443             }
444 7         14 for (;;) {
445 11 100       46 if ($et->IsOverwriting($nvHash, $val) > 0) {
446 5         11 $deleted = 1;
447 5         40 $et->VerboseValue("- PDF:$tag", $val);
448 5         13 ++$infoChanged;
449             } else {
450 6         19 push @vals, $val;
451             }
452 11 100       30 last unless @oldVals;
453 4         11 $val = shift @oldVals;
454             }
455             # don't write this out if we deleted all values
456 7 100       31 delete $$infoDict{$tagID} unless @vals;
457             } elsif ($$nvHash{EditOnly}) {
458 0         0 next;
459             }
460             # decide whether we want to write this tag
461             # (native PDF information is always preferred, so don't check IsCreating)
462 11 50 100     69 next unless $deleted or $$tagInfo{List} or not exists $$infoDict{$tagID};
      66        
463              
464             # add new values to existing ones
465 11         68 my @newVals = $et->GetNewValue($nvHash);
466 11 100       31 if (@newVals) {
467 9         25 push @vals, @newVals;
468 9         20 ++$infoChanged;
469 9 50       25 if ($out) {
470 0         0 foreach $val (@newVals) {
471 0         0 $et->VerboseValue("+ PDF:$tag", $val);
472             }
473             }
474             }
475 11 50       31 unless (@vals) {
476             # remove this entry from the Info dictionary if no values remain
477 0         0 delete $$infoDict{$tagID};
478 0         0 next;
479             }
480             # format value(s) for writing to PDF file
481 11   66     99 my $writable = $$tagInfo{Writable} || $Image::ExifTool::PDF::Info{WRITABLE};
482 11 100       48 if (not $$tagInfo{List}) {
    100          
483 5         19 $val = WritePDFValue($et, shift(@vals), $writable);
484             } elsif ($$tagInfo{List} eq 'array') {
485 3         10 foreach $val (@vals) {
486 5         22 $val = WritePDFValue($et, $val, $writable);
487 5 50       24 defined $val or undef(@vals), last;
488             }
489 3 50       16 $val = @vals ? \@vals : undef;
490             } else {
491 3         20 $val = WritePDFValue($et, join($et->Options('ListSep'), @vals), $writable);
492             }
493 11 50       28 if (defined $val) {
494 11         35 $$infoDict{$tagID} = $val;
495 11         43 ++$infoChanged;
496             } else {
497 0         0 $et->Warn("Error converting $$tagInfo{Name} value");
498             }
499             }
500 17 100       85 if ($infoChanged) {
    100          
501 10         33 $$et{CHANGED} += $infoChanged;
502             } elsif ($prevUpdate) {
503             # must still write Info dictionary if it was previously updated
504 6         39 my $oldPos = LocateObject($xref, $$infoRef);
505 6 100 66     44 $infoChanged = 1 if $oldPos and $oldPos > $prevUpdate;
506             }
507              
508             # create new Info dictionary if necessary
509 17 100       91 if ($infoChanged) {
510             # increment object count if we used a new object here
511 14 100       26 if (scalar(keys %{$capture{Info}}) > 1) {
  14         79  
512 10         35 $newObj{$$infoRef} = $capture{Info};# save to write later
513 10         29 $$mainDict{Info} = $infoRef; # add reference to trailer dictionary
514 10 100       38 ++$nextObject unless $prevInfoRef;
515             } else {
516             # remove Info from Main (trailer) dictionary
517 4         18 delete $$mainDict{Info};
518             # write free entry in xref table if Info existed prior to all edits
519 4 100       22 $newObj{$$infoRef} = '' if $prevInfoRef;
520             }
521             }
522              
523             # rewrite XMP
524             my %xmpInfo = (
525             DataPt => $$info{XMP},
526 17         109 Parent => 'PDF',
527             );
528 17         112 my $xmpTable = Image::ExifTool::GetTagTable('Image::ExifTool::XMP::Main');
529 17         44 my $oldChanged = $$et{CHANGED};
530 17         114 my $newXMP = $et->WriteDirectory(\%xmpInfo, $xmpTable);
531 17 100       91 $newXMP = $$info{XMP} ? ${$$info{XMP}} : '' unless defined $newXMP;
  4 100       14  
532              
533             # WriteDirectory() will increment CHANGED erroneously if non-existent
534             # XMP is deleted as a block -- so check for this
535 17 100 100     74 unless ($newXMP or $$info{XMP}) {
536 3         10 $$et{CHANGED} = $oldChanged;
537 3         18 $et->VPrint(0, " (XMP not changed -- still empty)\n");
538             }
539 17         40 my ($metaChanged, $rootChanged);
540              
541 17 100 66     133 if ($$et{CHANGED} != $oldChanged and defined $newXMP) {
    100 33        
542 10         29 $metaChanged = 1;
543             } elsif ($prevUpdate and $capture{Root}->{Metadata}) {
544             # must still write Metadata dictionary if it was previously updated
545 4         8 my $oldPos = LocateObject($xref, ${$capture{Root}->{Metadata}});
  4         29  
546 4 50 33     26 $metaChanged = 1 if $oldPos and $oldPos > $prevUpdate;
547             }
548 17 100       59 if ($metaChanged) {
549 14 100       60 if ($newXMP) {
    50          
550 9 100       33 unless (ref $metaRef) {
551             # allocate new PDF object
552 5         19 $metaRef = \ "$nextObject 0 R";
553 5         13 ++$nextObject;
554 5         20 $capture{Root}->{Metadata} = $metaRef;
555 5         11 $rootChanged = 1; # set flag to replace Root dictionary
556             }
557             # create the new metadata dictionary to write later
558 9         113 $newObj{$$metaRef} = {
559             Type => '/Metadata',
560             Subtype => '/XML',
561             # Length => length $newXMP, (set by WriteObject)
562             _tags => [ qw(Type Subtype) ],
563             _stream => $newXMP,
564             _decrypted => 1, # (this will be ignored if EncryptMetadata is false)
565             };
566             } elsif ($capture{Root}->{Metadata}) {
567             # free existing metadata object
568 5         10 $newObj{${$capture{Root}->{Metadata}}} = '';
  5         22  
569 5         16 delete $capture{Root}->{Metadata};
570 5         11 $rootChanged = 1; # set flag to replace Root dictionary
571             }
572             }
573             # add new Root dictionary if necessary
574 17         52 my $rootRef = $$mainDict{Root};
575 17 50       56 unless ($rootRef) {
576 0         0 $et->Error("Can't find Root dictionary");
577 0         0 return $rtn;
578             }
579 17 100 100     76 if (not $rootChanged and $prevUpdate) {
580             # must still write Root dictionary if it was previously updated
581 6         33 my $oldPos = LocateObject($xref, $$rootRef);
582 6 100 66     35 $rootChanged = 1 if $oldPos and $oldPos > $prevUpdate;
583             }
584 17 100       84 $newObj{$$rootRef} = $capture{Root} if $rootChanged;
585             #
586             # write incremental update if anything was changed
587             #
588 17 100       77 if ($$et{CHANGED}) {
    50          
589             # remember position of original EOF
590 16         82 my $oldEOF = Tell($outfile) - $$et{PDFBase};
591 16 50       89 Write($outfile, $beginComment) or $rtn = -1;
592              
593             # write new objects
594 16         90 foreach $objRef (sort keys %newObj) {
595 38 50       319 $objRef =~ /^(\d+) (\d+)/ or $rtn = -1, last;
596 38         167 ($id, $gen) = ($1, $2);
597 38 100       113 if (not $newObj{$objRef}) {
598 7 50       30 ++$gen if $gen < 65535;
599             # write free entry in xref table
600 7         25 $newXRef{$id} = [ 0, $gen, 'f' ];
601 7         19 next;
602             }
603             # create new entry for xref table
604 31         93 $newXRef{$id} = [ Tell($outfile) - $$et{PDFBase} + length($/), $gen, 'n' ];
605 31         82 $keyExt = "$id $gen obj"; # (must set for stream encryption)
606 31 50       75 Write($outfile, $/, $keyExt) or $rtn = -1;
607 31 50       135 WriteObject($outfile, $newObj{$objRef}) or $rtn = -1;
608 31 50       85 Write($outfile, $/, 'endobj') or $rtn = -1;
609             }
610              
611             # Prev points to old xref table
612 16 100       62 $$mainDict{Prev} = $capture{startxref} unless $prevUpdate;
613              
614             # add xref entry for head of free-object list
615 16         67 $newXRef{0} = [ 0, 65535, 'f' ];
616              
617             # must insert free xref entries from previous exiftool update if applicable
618 16 100       47 if ($prevUpdate) {
619 13         28 my $mainFree;
620             # extract free entries from our previous Main xref stream
621 13 50 33     74 if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
622 0         0 $mainFree = GetFreeEntries($xref->{dicts}->[0]);
623             } else {
624             # free entries from Main xref table already captured for us
625 13         38 $mainFree = $capture{mainFree};
626             }
627 13         79 foreach $id (sort { $a <=> $b } keys %$mainFree) {
  6         30  
628 19 100       73 $newXRef{$id} = $$mainFree{$id} unless $newXRef{$id};
629             }
630             }
631              
632             # connect linked list of free object in our xref table
633 16         42 my $prevFree = 0;
634 16         87 foreach $id (sort { $b <=> $a } keys %newXRef) { # (reverse sort)
  60         176  
635 59 100       148 next unless $newXRef{$id}->[2] eq 'f'; # skip if not free
636             # no need to add free entry for objects added by us
637             # in previous edits then freed again
638 28 100       70 if ($id >= $nextObject) {
639 3         11 delete $newXRef{$id}; # Note: deleting newXRef entry!
640 3         5 next;
641             }
642 25         54 $newXRef{$id}->[0] = $prevFree;
643 25         57 $prevFree = $id;
644             }
645              
646             # prepare our main dictionary for writing
647 16         79 $$mainDict{Size} = $nextObject; # update number of objects
648             # must change the ID if it exists
649 16 100 66     74 if (ref $$mainDict{ID} eq 'ARRAY' and @{$$mainDict{ID}} > 1) {
  6         34  
650             # increment first byte since this is an easy change to make
651 6         19 $id = $mainDict->{ID}->[1];
652 6 50 0     40 if ($id =~ /^<([0-9a-f]{2})/i) {
    0 0        
      0        
653 6         59 my $byte = unpack('H2',chr((hex($1) + 1) & 0xff));
654 6         19 substr($id, 1, 2) = $byte;
655             } elsif ($id =~ /^\((.)/s and $1 ne '\\' and $1 ne ')' and $1 ne '(') {
656 0         0 my $ch = chr((ord($1) + 1) & 0xff);
657             # avoid generating characters that could cause problems
658 0 0       0 $ch = 'a' if $ch =~ /[()\\\x00-\x08\x0a-\x1f\x7f\xff]/;
659 0         0 substr($id, 1, 1) = $ch;
660             }
661 6         17 $mainDict->{ID}->[1] = $id;
662             }
663              
664             # remember position of xref table in file (we will write this next)
665 16         60 my $startxref = Tell($outfile) - $$et{PDFBase} + length($/);
666              
667             # must write xref as a stream in xref-stream-only files
668 16 50 33     59 if ($$mainDict{Type} and $$mainDict{Type} eq '/XRef') {
669              
670             # create entry for the xref stream object itself
671 0         0 $newXRef{$nextObject++} = [ Tell($outfile) - $$et{PDFBase} + length($/), 0, 'n' ];
672 0         0 $$mainDict{Size} = $nextObject;
673             # create xref stream and Index entry
674 0         0 my $bits = 4;
675 0         0 Restart: for (;;) {
676 0         0 $$mainDict{W} = [ 1, $bits, 2 ]; # int8u, int32u/int64u, int16u ('CNn' or 'CNNn')
677 0         0 $$mainDict{Index} = [ ];
678 0         0 $$mainDict{_stream} = '';
679 0         0 my @ids = sort { $a <=> $b } keys %newXRef;
  0         0  
680 0         0 while (@ids) {
681 0         0 my $startID = $ids[0];
682 0         0 for (;;) {
683 0         0 $id = shift @ids;
684 0         0 my ($pos, $gen, $type) = @{$newXRef{$id}};
  0         0  
685 0 0       0 if ($pos > 0xffffffff) {
686 0 0       0 if ($bits == 4) {
687             # switch to 64-bit integer offsets
688 0         0 $bits = 8;
689 0         0 next Restart;
690             }
691             }
692 0 0       0 if ($bits == 4) {
693 0 0       0 $$mainDict{_stream} .= pack('CNn', $type eq 'f' ? 0 : 1, $pos, $gen);
694             } else {
695 0         0 my $hi = int($pos / 4294967296);
696 0         0 my $lo = $pos - $hi * 4294967296;
697 0 0       0 $$mainDict{_stream} .= pack('CNNn', $type eq 'f' ? 0 : 1, $hi, $lo, $gen);
698             }
699 0 0 0     0 last if not @ids or $ids[0] != $id + 1;
700             }
701             # add Index entries for this section of the xref stream
702 0         0 push @{$$mainDict{Index}}, $startID, $id - $startID + 1;
  0         0  
703             }
704 0         0 last;
705             }
706             # write the xref stream object
707 0         0 $keyExt = "$id 0 obj"; # (set anyway, but xref stream should NOT be encrypted)
708 0 0       0 Write($outfile, $/, $keyExt) or $rtn = -1;
709 0 0       0 WriteObject($outfile, $mainDict) or $rtn = -1;
710 0 0       0 Write($outfile, $/, 'endobj') or $rtn = -1;
711              
712             } else {
713              
714             # write new xref table
715 16 50       55 Write($outfile, $/, 'xref', $/) or $rtn = -1;
716             # lines must be exactly 20 bytes, so pad newline if necessary
717 16 50       90 my $endl = (length($/) == 1 ? ' ' : '') . $/;
718 16         61 my @ids = sort { $a <=> $b } keys %newXRef;
  62         186  
719 16         51 while (@ids) {
720 34         64 my $startID = $ids[0];
721 34         58 $buff = '';
722 34         56 for (;;) {
723 56         94 $id = shift @ids;
724 56         82 $buff .= sprintf("%.10d %.5d %s%s", @{$newXRef{$id}}, $endl);
  56         211  
725 56 100 100     204 last if not @ids or $ids[0] != $id + 1;
726             }
727             # write this (contiguous-numbered object) section of the xref table
728 34 50       111 Write($outfile, $startID, ' ', $id - $startID + 1, $/, $buff) or $rtn = -1;
729             }
730              
731             # write main (trailer) dictionary
732 16 50       52 Write($outfile, 'trailer') or $rtn = -1;
733 16 50       49 WriteObject($outfile, $mainDict) or $rtn = -1;
734             }
735             # write trailing comment (marker to allow edits to be reverted)
736 16 50       50 Write($outfile, $/, $endComment, $oldEOF, $/) or $rtn = -1;
737              
738             # write pointer to main xref table and EOF marker
739 16 50       58 Write($outfile, 'startxref', $/, $startxref, $/, '%%EOF', $/) or $rtn = -1;
740              
741             } elsif ($prevUpdate) {
742              
743             # nothing new changed, so copy over previous incremental update
744 1 50       9 $raf->Seek($prevUpdate+$$et{PDFBase}, 0) or $rtn = -1;
745 1         7 while ($raf->Read($buff, 65536)) {
746 1 50       9 Write($outfile, $buff) or $rtn = -1;
747             }
748             }
749 17 100 66     178 if ($rtn > 0 and $$et{CHANGED} and ($$et{DEL_GROUP}{PDF} or $$et{DEL_GROUP}{XMP})) {
      100        
      100        
750 6         40 $et->Warn('ExifTool PDF edits are reversible. Deleted tags may be recovered!', 1);
751             }
752 17         305 undef $newTool;
753 17         206 undef %capture;
754 17         597 return $rtn;
755             }
756              
757              
758             1; # end
759              
760             __END__