File Coverage

lib/PDF/Data.pm
Criterion Covered Total %
statement 47 680 6.9
branch 0 422 0.0
condition 0 336 0.0
subroutine 16 66 24.2
pod 46 50 92.0
total 109 1554 7.0


line stmt bran cond sub pod time code
1             package PDF::Data;
2              
3             # Require Perl v5.16; enable warnings and UTF-8.
4 1     1   297390 use v5.16;
  1         4  
5 1     1   6 use warnings;
  1         2  
  1         69  
6 1     1   6 use utf8;
  1         2  
  1         7  
7              
8             # Declare module version. (Also in pod documentation below.)
9 1     1   629 use version; our $VERSION = version->declare('v1.2.0');
  1         2593  
  1         8  
10              
11             # Initialize modules.
12 1     1   180 use mro;
  1         3  
  1         8  
13 1     1   732 use namespace::autoclean;
  1         25822  
  1         5  
14 1     1   96 use Carp qw[carp croak confess];;
  1         6  
  1         82  
15 1     1   739 use Clone;
  1         730  
  1         76  
16 1     1   891 use Compress::Raw::Zlib qw[:status :flush];
  1         8281  
  1         329  
17 1     1   849 use Data::Dump qw[dd dump];
  1         8274  
  1         143  
18 1     1   778 use List::MoreUtils qw[minmax];
  1         22626  
  1         9  
19 1     1   1727 use List::Util qw[max];
  1         3  
  1         99  
20 1     1   815 use Math::Trig qw[pi];
  1         26427  
  1         137  
21 1     1   899 use POSIX qw[mktime strftime];
  1         9585  
  1         8  
22 1     1   2199 use Scalar::Util qw[blessed reftype];
  1         2  
  1         74  
23              
24             # Use byte strings instead of Unicode character strings.
25 1     1   7 use bytes;
  1         2  
  1         8  
26              
27             # Basic parsing regular expressions.
28             our $n = qr/(?:\n|\r\n?)/; # Match a newline. (LF, CRLF or CR)
29             our $ss = '\x00\x09\x0a\x0c\x0d\x20'; # List of PDF whitespace characters.
30             our $s = "[$ss]"; # Match a single PDF whitespace character.
31             our $ws = qr/(?:(?:(?>%[^\r\n]*)?$s+)+)/; # Match whitespace, including PDF comments.
32              
33             # Declare prototypes.
34             sub is_hash ($);
35             sub is_array ($);
36             sub is_stream ($);
37              
38             # Utility functions.
39 0 0   0 0   sub is_hash ($) { ref $_[0] && reftype($_[0]) eq "HASH"; }
40 0 0   0 0   sub is_array ($) { ref $_[0] && reftype($_[0]) eq "ARRAY"; }
41 0 0   0 0   sub is_stream ($) { &is_hash && exists $_[0]{-data}; }
42              
43             # Create a new PDF::Data object, representing a minimal PDF file.
44             sub new {
45 0     0 1   my ($self, %args) = @_;
46              
47             # Get the class name.
48 0   0       my $class = blessed $self || $self;
49              
50             # Create a new instance using the constructor arguments.
51 0           my $pdf = bless \%args, $class;
52              
53             # Set creation timestamp.
54 0           $pdf->{Info}{CreationDate} = $pdf->timestamp;
55              
56             # Create an empty document catalog and page tree.
57 0           $pdf->{Root}{Pages} = { Kids => [], Count => 0 };
58              
59             # Validate the PDF structure and return the new instance.
60 0           return $pdf->validate;
61             }
62              
63             # Deep copy entire PDF::Data object.
64             sub clone {
65 0     0 1   my ($self) = @_;
66 0           return Clone::clone($self);
67             }
68              
69             # Create a new page with the specified size.
70             sub new_page {
71 0     0 1   my ($self, $x, $y) = @_;
72              
73             # Paper sizes.
74 0           my %sizes = (
75             LETTER => [ 8.5, 11 ],
76             LEGAL => [ 8.5, 14 ],
77             A0 => [ 33.125, 46.8125 ],
78             A1 => [ 23.375, 33.125 ],
79             A2 => [ 16.5, 23.375 ],
80             A3 => [ 11.75, 16.5 ],
81             A4 => [ 8.25, 11.75 ],
82             A5 => [ 5.875, 8.25 ],
83             A6 => [ 4.125, 5.875 ],
84             A7 => [ 2.9375, 4.125 ],
85             A8 => [ 2.0625, 2.9375 ],
86             );
87              
88             # Default page size to US Letter (8.5" x 11").
89 0 0 0       unless ($x and $y and $x > 0 and $y > 0) {
      0        
      0        
90 0   0       $x ||= "LETTER";
91 0 0         croak "Error: Unknown paper size \"$x\"!\n" unless $sizes{$x};
92 0           ($x, $y) = @{$sizes{$x}};
  0            
93             }
94              
95             # Make sure page size was specified.
96 0 0 0       croak join(": ", $self->{-file} || (), "Error: Paper size not specified!\n") unless $x and $y and $x > 0 and $y > 0;
      0        
      0        
      0        
97              
98             # Scale inches to default user space units (72 DPI).
99 0 0         $x *= 72 if $x < 72;
100 0 0         $y *= 72 if $y < 72;
101              
102             # Create and return a new page object.
103             return {
104 0           Type => "/Page",
105             MediaBox => [0, 0, $x, $y],
106             Contents => { -data => "" },
107             Resources => {
108             ProcSet => ["/PDF", "/Text"],
109             },
110             };
111             }
112              
113             # Deep copy the specified page object.
114             sub copy_page {
115 0     0 1   my ($self, $page) = @_;
116              
117             # Temporarily hide parent reference.
118 0           delete local $page->{Parent};
119              
120             # Clone the page object.
121 0           my $copied_page = Clone::clone($page);
122              
123             # return cloned page object.
124 0           return $copied_page;
125             }
126              
127             # Append the specified page to the PDF.
128             sub append_page {
129 0     0 1   my ($self, $page) = @_;
130              
131             # Increment page count for page tree root node.
132 0           $self->{Root}{Pages}{Count}++;
133              
134             # Add page object to page tree root node for simplicity.
135 0           push @{$self->{Root}{Pages}{Kids}}, $page;
  0            
136 0           $page->{Parent} = $self->{Root}{Pages};
137              
138             # Return the page object.
139 0           return $page;
140             }
141              
142             # Read and parse PDF file.
143             sub read_pdf {
144 0     0 1   my ($self, $file, %args) = @_;
145              
146             # Read entire file at once.
147 0           local $/;
148              
149             # Contents of entire PDF file.
150 0           my $data;
151              
152             # Check for standard input.
153 0 0 0       if (($file // "-") eq "-") {
154             # Read all data from standard input.
155 0           $file = "";
156 0 0         binmode STDIN or croak "$file: $!\n";
157 0           $data = ;
158 0 0         close STDIN or croak "$file: $!\n";
159             } else {
160             # Read the entire file.
161 0 0         open my $IN, '<', $file or croak "$file: $!\n";
162 0 0         binmode $IN or croak "$file: $!\n";
163 0           $data = <$IN>;
164 0 0         close $IN or croak "$file: $!\n";
165             }
166              
167             # Parse PDF file data and return new instance.
168 0           return $self->parse_pdf($data, -file => $file, %args);
169             }
170              
171             # Parse PDF file data.
172             sub parse_pdf {
173 0     0 1   my ($self, $data, %args) = @_;
174              
175             # Get the class name.
176 0   0       my $class = blessed $self || $self;
177              
178             # Create a new instance using the provided arguments.
179 0           $self = bless \%args, $class;
180              
181             # Validate minimal PDF file structure starting with %PDF and ending with %%EOF.
182             my ($pdf_version, $pdf_data) = $data =~ /%PDF-(\d+\.\d+)$s*?$n(.*)%%EOF/s
183 0 0 0       or croak join(": ", $self->{-file} || (), "File does not contain a valid PDF document!\n");
184              
185             # Discard startxref value which should be present in any valid PDF, but don't require it.
186 0           $pdf_data =~ s/\bstartxref$ws?(\d+)$ws\z//s;
187              
188             # Check PDF version.
189 0 0 0       warn join(": ", $self->{-file} || (), "Warning: PDF version $pdf_version not supported!\n")
190             unless $pdf_version =~ /^1\.[0-7]$/;
191              
192             # Parsed indirect objects.
193 0           my $objects = {};
194              
195             # Parse PDF objects.
196 0           my @objects = $self->parse_objects($objects, $pdf_data, 0);
197              
198             # PDF trailer dictionary.
199 0           my $trailer;
200              
201             # Find trailer dictionary.
202 0           for (my $i = 0; $i < @objects; $i++) {
203 0 0         if ($objects[$i][0] eq "trailer") {
204             $i < $#objects and $objects[$i + 1][1]{type} eq "dict"
205 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $objects[$i][1]{offset}: Invalid trailer dictionary!\n");
      0        
206 0           $trailer = $objects[$i + 1][0];
207 0           last;
208             }
209             }
210              
211             # Make sure trailer dictionary was found.
212 0 0 0       croak join(": ", $self->{-file} || (), "PDF trailer dictionary not found!\n") unless defined $trailer;
213              
214             # Resolve indirect object references.
215 0           $self->resolve_references($objects, $trailer);
216              
217             # Create a new instance from the parsed data.
218 0           my $pdf = bless $trailer, $class;
219              
220             # Add any provided arguments.
221 0           foreach my $key (sort keys %args) {
222 0           $pdf->{$key} = $args{$key};
223             }
224              
225             # Validate the PDF structure (unless the -novalidate flag is set) and return the new instance.
226 0 0         return $self->{-novalidate} ? $pdf : $pdf->validate;
227             }
228              
229             # Generate and write a new PDF file.
230             sub write_pdf {
231 0     0 1   my ($self, $file, $time) = @_;
232              
233             # Default missing timestamp to current time, but keep a zero time as a flag.
234 0   0       $time //= time;
235              
236             # Generate PDF file data.
237 0           my $pdf_data = $self->pdf_file_data($time);
238              
239             # Check if standard output is wanted.
240 0 0 0       if (($file // "-") eq "-") {
241             # Write PDF file data to standard output.
242 0           $file = "";
243 0 0         binmode STDOUT or croak "$file: $!\n";
244 0 0         print STDOUT $pdf_data or croak "$file: $!\n";
245             } else {
246             # Write PDF file data to specified output file.
247 0 0         open my $OUT, ">", $file or croak "$file: $!\n";
248 0 0         binmode $OUT or croak "$file: $!\n";
249 0 0         print $OUT $pdf_data or croak "$file: $!\n";
250 0 0         close $OUT or croak "$file: $!\n";
251              
252             # Set modification time to the specified or current timestamp, unless zero.
253 0 0         utime $time, $time, $file if $time;
254              
255             # Print success message.
256 0           print STDERR "Wrote new PDF file \"$file\".\n\n";
257             }
258             }
259              
260             # Generate PDF file data suitable for writing to an output PDF file.
261             sub pdf_file_data {
262 0     0 1   my ($self, $time) = @_;
263              
264             # Default missing timestamp to current time, but keep a zero time as a flag.
265 0   0       $time //= time;
266              
267             # Set PDF modification timestamp, unless zero.
268 0 0         $self->{Info}{ModDate} = $self->timestamp($time) if $time;
269              
270             # Set PDF producer.
271 0           $self->{Info}{Producer} = sprintf "(%s)", join " ", __PACKAGE__, $VERSION;
272              
273             # Validate the PDF structure.
274 0           $self->validate;
275              
276             # Array of indirect objects, with lookup hash as first element.
277 0           my $objects = [{}];
278              
279             # Objects seen while generating the PDF file data.
280 0           my $seen = {};
281              
282             # Start with PDF header.
283 0           my $pdf_file_data = "%PDF-1.4\n%\xBF\xF7\xA2\xFE\n\n";
284              
285             # Write all indirect objects.
286 0           my $xrefs = $self->write_indirect_objects(\$pdf_file_data, $objects, $seen);
287              
288             # Add cross-reference table.
289 0           my $startxref = length($pdf_file_data);
290 0           $pdf_file_data .= sprintf "xref\n0 %d\n", scalar @{$xrefs};
  0            
291 0           $pdf_file_data .= join("", @{$xrefs});
  0            
292              
293             # Save correct size in trailer dictionary.
294 0           $self->{Size} = scalar @{$xrefs};
  0            
295              
296             # Write trailer dictionary.
297 0           $pdf_file_data .= "trailer ";
298 0           $self->write_object(\$pdf_file_data, $objects, $seen, $self, 0);
299              
300             # Write startxref value.
301 0           $pdf_file_data =~ s/\n?\z/\n/;
302 0           $pdf_file_data .= "startxref\n$startxref\n";
303              
304             # End of PDF file data.
305 0           $pdf_file_data .= "%%EOF\n";
306              
307             # Return PDF file data.
308 0           return $pdf_file_data;
309             }
310              
311             # Dump internal structure of PDF file.
312             sub dump_pdf {
313 0     0 1   my ($self, $file, $mode) = @_;
314              
315             # Default to standard output.
316 0 0 0       $file = "-" if not defined $file or $file eq "";
317              
318             # Default to dumping full PDF internal structure.
319 0   0       $mode //= "";
320              
321             # Use "" instead of "-" to describe standard output.
322 0   0       my $filename = ($file // "") =~ s/^-?$//r;
323              
324             # Open output file.
325 0 0         open my $OUT, ">$file" or croak "$filename: $!\n";
326              
327             # Data structures already seen.
328 0           my $seen = {};
329              
330             # Dump PDF structures.
331 0 0         printf $OUT "\$pdf = %s;\n", $self->dump_object($self, '$pdf', $seen, 0, $mode) or croak "$filename: $!\n";
332              
333             # Close output file.
334 0 0         close $OUT or croak "$filename: $!\n";
335              
336             # Print success message.
337 0 0         if ($mode eq "outline") {
338 0 0         print STDERR "Dumped outline of PDF internal structure to file \"$file\".\n\n" unless $file eq "-";
339             } else {
340 0 0         print STDERR "Dumped PDF internal structure to file \"$file\".\n\n" unless $file eq "-";
341             }
342             }
343              
344             # Dump outline of internal structure of PDF file.
345             sub dump_outline {
346 0     0 1   my ($self, $file) = @_;
347              
348             # Call dump_pdf() with outline parameter.
349 0   0       return $self->dump_pdf($file // "-", "outline");
350             }
351              
352             # Merge content streams.
353             sub merge_content_streams {
354 0     0 1   my ($self, $streams) = @_;
355              
356             # Make sure content is an array.
357 0 0         return $streams unless is_array $streams;
358              
359             # Remove extra trailing space from streams.
360 0           foreach my $stream (@{$streams}) {
  0            
361 0 0         die unless is_stream $stream;
362 0   0       $stream->{-data} //= "";
363 0           $stream->{-data} =~ s/(?<=$s) \z//;
364             }
365              
366             # Concatenate stream data and calculate new length.
367 0           my $merged = { -data => join("", map { $_->{-data}; } @{$streams}) };
  0            
  0            
368 0           $merged->{Length} = length($merged->{-data});
369              
370             # Return merged content stream.
371 0           return $merged;
372             }
373              
374             # Find bounding box for a content stream.
375             sub find_bbox {
376 0     0 1   my ($self, $content_stream, $new) = @_;
377              
378             # Get data from stream, if necessary.
379 0 0 0       $content_stream = $content_stream->{-data} // "" if is_stream $content_stream;
380              
381             # Split content stream into lines.
382 0           my @lines = grep { $_ ne ""; } split /\n/, $content_stream;
  0            
383              
384             # Bounding box.
385 0           my ($left, $bottom, $right, $top);
386              
387             # Regex to match a number.
388 0           my $n = qr/-?\d+(?:\.\d+)?/;
389              
390             # Determine bounding box from content stream.
391 0           foreach (@lines) {
392             # Skip neutral lines.
393 0 0         next if m{^(?:/Figure <>BDC|/PlacedGraphic /MC\d BDC|EMC|/GS\d gs|BX /Sh\d sh EX Q|[Qqh]|W n|$n $n $n $n $n $n cm)$s*$};
394              
395             # Capture coordinates from drawing operations to calculate bounding box.
396 0 0         if (my ($x1, $y1, $x2, $y2, $x3, $y3) = /^($n) ($n) (?:[ml]|($n) ($n) (?:[vy]|($n) ($n) c))$/) {
    0          
397 0           ($left, $right) = minmax grep { defined $_; } $left, $right, $x1, $x2, $x3;
  0            
398 0           ($bottom, $top) = minmax grep { defined $_; } $bottom, $top, $y1, $y2, $y3;
  0            
399             } elsif (my ($x, $y, $width, $height) = /^($n) ($n) ($n) ($n) re$/) {
400 0           ($left, $right) = minmax grep { defined $_; } $left, $right, $x, $x + $width;
  0            
401 0           ($bottom, $top) = minmax grep { defined $_; } $bottom, $top, $y, $y + $height;
  0            
402             } else {
403 0           croak "Parse error: Content line \"$_\" not recognized!\n";
404             }
405             }
406              
407             # Print bounding box and rectangle.
408 0           my $width = $right - $left;
409 0           my $height = $top - $bottom;
410 0           print STDERR "Bounding Box: $left $bottom $right $top\nRectangle: $left $bottom $width $height\n\n";
411              
412             # Return unless generating a new bounding box.
413 0 0         return unless $new;
414              
415             # Update content stream.
416 0           for ($content_stream) {
417             # Update coordinates in drawing operations.
418 0           s/^($n) ($n) ([ml])$/join " ", $self->round($1 - $left, $2 - $bottom), $3/egm;
  0            
419 0           s/^($n) ($n) ($n) ($n) ([vy])$/join " ", $self->round($1 - $left, $2 - $bottom, $3 - $left, $4 - $bottom), $5/egm;
  0            
420 0           s/^($n) ($n) ($n) ($n) ($n) ($n) (c)$/join " ", $self->round($1 - $left, $2 - $bottom, $3 - $left, $4 - $bottom, $5 - $left, $6 - $bottom), $7/egm;
  0            
421 0           s/^($n $n $n $n) ($n) ($n) (cm)$/join " ", $1, $self->round($2 - $left, $3 - $bottom), $4/egm;
  0            
422             }
423              
424             # Return content stream.
425 0           return $content_stream;
426             }
427              
428             # Make a new bounding box for a content stream.
429             sub new_bbox {
430 0     0 1   my ($self, $content_stream) = @_;
431              
432             # Call find_bbox() with "new" parameter.
433 0           $self->find_bbox($content_stream, 1);
434             }
435              
436             # Generate timestamp in PDF internal format.
437             sub timestamp {
438 0     0 1   my ($self, $time) = @_;
439              
440 0   0       $time //= time;
441 0           my @time = localtime $time;
442 0           my $tz = $time[8] * 60 - mktime(gmtime 0) / 60;
443 0           return sprintf "(D:%s%+03d'%02d')", strftime("%Y%m%d%H%M%S", @time), $tz / 60, abs($tz) % 60;
444             }
445              
446             # Round numeric values to 12 significant digits to avoid floating-point rounding error and remove trailing zeroes.
447             sub round {
448 0     0 1   my ($self, @numbers) = @_;
449              
450 0   0       @numbers = map { sprintf("%.12f", sprintf("%.12g", $_ || 0)) =~ s/\.?0+$//r; } @numbers;
  0            
451 0 0         return wantarray ? @numbers : $numbers[0];
452             }
453              
454             # Concatenate a transformation matrix with an original matrix, returning a new matrix.
455             sub concat_matrix {
456 0     0 1   my ($self, $transform, $orig) = @_;
457              
458 0           return [$self->round(
459             $transform->[0] * $orig->[0] + $transform->[1] * $orig->[2],
460             $transform->[0] * $orig->[1] + $transform->[1] * $orig->[3],
461             $transform->[2] * $orig->[0] + $transform->[3] * $orig->[2],
462             $transform->[2] * $orig->[1] + $transform->[3] * $orig->[3],
463             $transform->[4] * $orig->[0] + $transform->[5] * $orig->[2] + $orig->[4],
464             $transform->[4] * $orig->[1] + $transform->[5] * $orig->[3] + $orig->[5],
465             )];
466             }
467              
468             # Calculate the inverse of a matrix, if possible.
469             sub invert_matrix {
470 0     0 1   my ($self, $matrix) = @_;
471              
472             # Calculate the determinant of the matrix.
473 0           my $det = $self->round($matrix->[0] * $matrix->[3] - $matrix->[1] * $matrix->[2]);
474              
475             # If the determinant is zero, then the matrix is not invertible.
476 0 0         return if $det == 0;
477              
478             # Return the inverse matrix.
479 0           return [$self->round(
480             $matrix->[3] / $det,
481             -$matrix->[1] / $det,
482             -$matrix->[2] / $det,
483             $matrix->[0] / $det,
484             ($matrix->[2] * $matrix->[5] - $matrix->[3] * $matrix->[4]) / $det,
485             ($matrix->[1] * $matrix->[4] - $matrix->[0] * $matrix->[5]) / $det,
486             )];
487             }
488              
489             # Create a transformation matrix to translate the origin of the coordinate system to the specified coordinates.
490             sub translate {
491 0     0 1   my ($self, $x, $y) = @_;
492              
493             # Return a translate matrix.
494 0           return [$self->round(1, 0, 0, 1, $x, $y)];
495             }
496              
497             # Create a transformation matrix to scale the coordinate space by the specified horizontal and vertical scaling factors.
498             sub scale {
499 0     0 1   my ($self, $x, $y) = @_;
500              
501             # Return a scale matrix.
502 0           return [$self->round($x, 0, 0, $y, 0, 0)];
503             }
504              
505             # Create a transformation matrix to rotate the coordinate space counterclockwise by the specified angle (in degrees).
506             sub rotate {
507 0     0 1   my ($self, $angle) = @_;
508              
509             # Calculate the sine and cosine of the angle.
510 0           my $sin = sin($angle * pi / 180);
511 0           my $cos = cos($angle * pi / 180);
512              
513             # Return a rotate matrix.
514 0           return [$self->round($cos, $sin, -$sin, $cos, 0, 0)];
515             }
516              
517             # Validate PDF structure.
518             sub validate {
519 0     0 1   my ($self) = @_;
520              
521             # Catch validation errors.
522 0           eval {
523             # Make sure document catalog exists and has the correct type.
524 0           $self->validate_key("Root", "Type", "/Catalog", "document catalog");
525              
526             # Make sure page tree root node exists, has the correct type, and has no parent.
527 0           $self->validate_key("Root/Pages", "Type", "/Pages", "page tree root");
528 0           $self->validate_key("Root/Pages", "Parent", undef, "page tree root");
529              
530             # Validate page tree.
531 0           $self->validate_page_tree("Root/Pages", $self->{Root}{Pages});
532             };
533              
534             # Check for validation errors.
535 0 0         if ($@) {
536             # Make validation errors fatal if -validate flag is set.
537 0 0         if ($self->{-validate}) {
538 0           croak $@;
539             } else {
540 0           carp $@;
541             }
542             }
543              
544             # Return this instance.
545 0           return $self;
546             }
547              
548             # Validate page tree.
549             sub validate_page_tree {
550 0     0 1   my ($self, $path, $page_tree_node) = @_;
551              
552             # Count of leaf nodes (page objects) under this page tree node.
553 0           my $count = 0;
554              
555             # Validate children.
556 0 0 0       is_array(my $kids = $page_tree_node->{Kids}) or croak join(": ", $self->{-file} || (), "Error: $path\->{Kids} must be an array!\n");
557 0           for (my $i = 0; $i < @{$kids}; $i++) {
  0            
558 0 0 0       is_hash(my $kid = $kids->[$i]) or croak join(": ", $self->{-file} || (), "Error: $path\[$i] must be be a hash!\n");
559 0 0 0       $kid->{Type} or croak join(": ", $self->{-file} || (), "Error: $path\[$i]->{Type} is a required field!\n");
560 0 0         if ($kid->{Type} eq "/Pages") {
    0          
561 0           $count += $self->validate_page_tree("$path\[$i]", $kid);
562             } elsif ($kid->{Type} eq "/Page") {
563 0           $self->validate_page("$path\[$i]", $kid);
564 0           $count++;
565             } else {
566 0   0       croak join(": ", $self->{-file} || (), "Error: $path\[$i]->{Type} must be /Pages or /Page!\n");
567             }
568             }
569              
570             # Validate resources, if any.
571 0 0         $self->validate_resources("$path\->{Resources}", $page_tree_node->{Resources}) if is_hash($page_tree_node->{Resources});
572              
573             # Fix leaf node count if wrong.
574 0 0 0       if (($page_tree_node->{Count} || 0) != $count) {
575 0   0       warn join(": ", $self->{-file} || (), "Warning: Fixing: $path\->{Count} = $count\n");
576 0           $page_tree_node->{Count} = $count;
577             }
578              
579             # Return leaf node count.
580 0           return $count;
581             }
582              
583             # Validate page object.
584             sub validate_page {
585 0     0 1   my ($self, $path, $page) = @_;
586              
587 0 0         if (my $contents = $page->{Contents}) {
588 0 0         $contents = $self->merge_content_streams($contents) if is_array($contents);
589 0 0 0       is_stream($contents) or croak join(": ", $self->{-file} || (), "Error: $path\->{Contents} must be an array or stream!\n");
590 0   0       $contents->{-data} //= "";
591 0           $self->validate_content_stream("$path\->{Contents}", $contents);
592             }
593              
594             # Validate resources, if any.
595 0 0         $self->validate_resources("$path\->{Resources}", $page->{Resources}) if is_hash($page->{Resources});
596             }
597              
598             # Validate resources.
599             sub validate_resources {
600 0     0 1   my ($self, $path, $resources) = @_;
601              
602             # Validate XObjects, if any.
603 0 0         $self->validate_xobjects("$path\{XObject}", $resources->{XObject}) if is_hash($resources->{XObject});
604             }
605              
606             # Validate form XObjects.
607             sub validate_xobjects {
608 0     0 1   my ($self, $path, $xobjects) = @_;
609              
610             # Validate each form XObject.
611 0           foreach my $name (sort keys %{$xobjects}) {
  0            
612 0           $self->validate_xobject("$path\{$name}", $xobjects->{$name});
613             }
614             }
615              
616             # Validate a single XObject.
617             sub validate_xobject {
618 0     0 1   my ($self, $path, $xobject) = @_;
619              
620             # Make sure the XObject is a stream.
621 0 0 0       is_stream($xobject) or croak join(": ", $self->{-file} || (), "Error: $path must be a content stream!\n");
622 0   0       $xobject->{-data} //= "";
623              
624             # Validate the content stream, if this is a form XObject.
625 0 0         $self->validate_content_stream($path, $xobject) if $xobject->{Subtype} eq "/Form";
626              
627             # Validate resources, if any.
628 0 0         $self->validate_resources("$path\{Resources}", $xobject->{Resources}) if is_hash($xobject->{Resources});
629             }
630              
631             # Validate content stream.
632             sub validate_content_stream {
633 0     0 1   my ($self, $path, $stream) = @_;
634              
635             # Make sure the content stream can be parsed.
636 0   0       my @objects = eval { $self->parse_objects({}, $stream->{-data} // "", 0); };
  0            
637 0 0 0       croak join(": ", $self->{-file} || (), "Error: $path: $@") if $@;
638              
639             # Minify content stream if requested.
640 0 0         $self->minify_content_stream($stream, \@objects) if $self->{-minify};
641             }
642              
643             # Minify content stream.
644             sub minify_content_stream {
645 0     0 1   my ($self, $stream, $objects) = @_;
646              
647             # Parse object stream if necessary.
648 0   0       $objects ||= [ $self->parse_objects({}, $stream->{-data} // "", 0) ];
      0        
649              
650             # Generate new content stream from objects.
651 0           $stream->{-data} = $self->generate_content_stream($objects);
652              
653             # Recalculate stream length.
654 0           $stream->{Length} = length $stream->{-data};
655              
656             # Sanity check.
657             die "Content stream serialization failed"
658 0           if dump([map {$_->[0]} @{$objects}]) ne
  0            
659 0 0         dump([map {$_->[0]} $self->parse_objects({}, $stream->{-data}, 0)]);
  0            
660             }
661              
662             # Generate new content stream from objects.
663             sub generate_content_stream {
664 0     0 1   my ($self, $objects) = @_;
665              
666             # Generated content stream.
667 0           my $stream = "";
668              
669             # Loop across parsed objects.
670 0           foreach my $object (@{$objects}) {
  0            
671             # Check parsed object type.
672 0 0         if ($object->[1]{type} eq "dict") {
    0          
    0          
673             # Serialize dictionary.
674 0           $self->serialize_dictionary(\$stream, $object->[0]);
675             } elsif ($object->[1]{type} eq "array") {
676             # Serialize array.
677 0           $self->serialize_array(\$stream, $object->[0]);
678             } elsif ($object->[1]{type} eq "image") {
679             # Serialize inline image data.
680 0           $self->serialize_image(\$stream, $object->[0]);
681             } else {
682             # Serialize string or other token.
683 0           $self->serialize_object(\$stream, $object->[0]);
684             }
685             }
686              
687             # Return generated content stream.
688 0           return $stream;
689             }
690              
691             # Serialize a hash as a dictionary object.
692             sub serialize_dictionary {
693 0     0 1   my ($self, $stream, $hash) = @_;
694              
695             # Serialize the hash key-value pairs.
696 0           my @pairs = %{$hash};
  0            
697 0           ${$stream} .= "<<";
  0            
698 0           for (my $i = 0; $i < @pairs; $i++) {
699 0 0         if ($i % 2) {
700 0 0         if (is_hash($pairs[$i])) {
    0          
701 0           $self->serialize_dictionary($stream, $pairs[$i]);
702             } elsif (is_array($pairs[$i])) {
703 0           $self->serialize_array($stream, $pairs[$i]);
704             } else {
705 0           $self->serialize_object($stream, $pairs[$i]);
706             }
707             } else {
708 0           ${$stream} .= "/$pairs[$i]";
  0            
709             }
710             }
711 0           ${$stream} .= ">>";
  0            
712             }
713              
714             # Serialize an array.
715             sub serialize_array {
716 0     0 1   my ($self, $stream, $array) = @_;
717              
718             # Serialize the array values.
719 0           ${$stream} .= "[";
  0            
720 0           foreach my $obj (@{$array}) {
  0            
721 0 0         if (is_hash($obj)) {
    0          
722 0           $self->serialize_dictionary($stream, $obj);
723             } elsif (is_array($obj)) {
724 0           $self->serialize_array($stream, $obj);
725             } else {
726 0           $self->serialize_object($stream, $obj);
727             }
728             }
729 0           ${$stream} .= "]";
  0            
730             }
731              
732             # Append the serialization of inline image data to the generated content stream.
733             sub serialize_image {
734 0     0 0   my ($self, $stream, $image) = @_;
735              
736             # Append inline image data between ID (Image Data) and EI (End Image) operators.
737 0           ${$stream} .= "\nID\n$image\nEI\n";
  0            
738             }
739              
740             # Append the serialization of an object to the generated content stream.
741             sub serialize_object {
742 0     0 1   my ($self, $stream, $object) = @_;
743              
744             # Strip leading/trailing whitespace from object if minifying.
745 0 0         if ($self->{-minify}) {
746 0           $object =~ s/^$s+//;
747 0           $object =~ s/$s+$//;
748             }
749              
750             # Wrap the line if line length would exceed 255 characters.
751 0 0         ${$stream} .= "\n" if length(${$stream}) - (rindex(${$stream}, "\n") + 1) + length($object) >= 255;
  0            
  0            
  0            
752              
753             # Add a space if necessary.
754 0 0 0       ${$stream} .= " " unless ${$stream} =~ /(^|[$ss)>\[\]{}])$/ or $object =~ /^[$ss()<>\[\]{}\/%]/;
  0            
  0            
755              
756             # Add the serialized object.
757 0           ${$stream} .= $object;
  0            
758             }
759              
760             # Validate the specified hash key value.
761             sub validate_key {
762 0     0 1   my ($self, $hash, $key, $value, $label) = @_;
763              
764             # Create the hash if necessary.
765 0 0         $hash = $_[1] = {} unless $hash;
766              
767             # Get the hash node from the PDF structure by path, if necessary.
768 0 0         $hash = $self->get_hash_node($hash) unless is_hash $hash;
769              
770             # Make sure the hash key has the correct value.
771 0 0 0       if (defined $value and (not defined $hash->{$key} or $hash->{$key} ne $value)) {
    0 0        
      0        
772 0 0 0       warn join(": ", $self->{-file} || (), "Warning: Fixing $label: {$key} $hash->{$key} -> $value\n") if $hash->{$key};
773 0           $hash->{$key} = $value;
774             } elsif (not defined $value and exists $hash->{$key}) {
775 0 0 0       warn join(": ", $self->{-file} || (), "Warning: Deleting $label: {$key} $hash->{$key}\n") if $hash->{$key};
776 0           delete $hash->{$key};
777             }
778              
779             # Return this instance.
780 0           return $self;
781             }
782              
783             # Get a hash node from the PDF structure by path.
784             sub get_hash_node {
785 0     0 1   my ($self, $path) = @_;
786              
787             # Split the path.
788 0           my @path = split /\//, $path;
789              
790             # Find the hash node with the specified path, creating nodes if necessary.
791 0           my $hash = $self;
792 0           foreach my $key (@path) {
793 0   0       $hash->{$key} ||= {};
794 0           $hash = $hash->{$key};
795             }
796              
797             # Return the hash node.
798 0           return $hash;
799             }
800              
801             # Parse PDF objects into Perl representations.
802             sub parse_objects {
803 0     0 1   my ($self, $objects, $data, $offset) = @_;
804              
805             # Parsed PDF objects.
806 0           my @objects;
807              
808             # Calculate EOF offset.
809 0           my $eof = $offset + length $data;
810              
811             # Copy data for parsing.
812 0           local $_ = $data;
813              
814             # Parse PDF objects in input string.
815 0           while ($_ ne "") {
816             # Update the file offset.
817 0           $offset = $eof - length $_;
818              
819             # Parse the next PDF object.
820 0 0         if (s/\A$ws//) { # Strip leading whitespace/comments.
    0          
    0          
    0          
    0          
    0          
821 0           next;
822             } elsif (s/\A(<<((?:[^<>]+|<[^<>]+>|(?1))*)$ws?>>)//) { # Dictionary: <<...>> (including nested dictionaries)
823 0           my @pairs = $self->parse_objects($objects, $2, $offset);
824 0           for (my $i = 0; $i < @pairs; $i++) {
825             $pairs[$i] = $i % 2 ? $pairs[$i][0] : $pairs[$i][1]{name}
826 0 0 0       // croak join(": ", $self->{-file} || (), "Byte offset $offset: Dictionary key is not a name!\n");
      0        
827             }
828 0           push @objects, [ { @pairs }, { type => "dict" } ];
829             } elsif (s/\A(\[((?:(?>[^\[\]]+)|(?1))*)\])//) { # Array: [...] (including nested arrays)
830 0           my $array = [ map $_->[0], $self->parse_objects($objects, $2, $offset) ];
831 0           push @objects, [ $array, { type => "array" }];
832             } elsif (s/\A(\((?:(?>[^\\()]+)|\\.|(?1))*\))//) { # String literal: (...) (including nested parens)
833 0           push @objects, [ $1, { type => "string" } ];
834             } elsif (s/\A(<[0-9A-Fa-f$ss]*>)//) { # Hexadecimal string literal: <...>
835 0           push @objects, [ lc($1) =~ s/$s+//gr, { type => "hex" } ];
836             } elsif (s/\A(\/?[^$ss()<>\[\]{}\/%]+)//) { # /Name, number or other token
837             # Check for tokens of special interest.
838 0           my $token = $1;
839 0 0 0       if ($token eq "obj" or $token eq "R") { # Indirect object/reference: 999 0 obj or 999 0 R
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
840 0           my ($id, $gen) = splice @objects, -2;
841 0 0         my $type = $token eq "R" ? "reference" : "definition";
842             "$id->[1]{type} $gen->[1]{type}" eq "int int"
843 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $offset: $id->[0] $gen->[0] $token: Invalid indirect object $type!\n");
844 0   0       my $new_id = join("-", $id->[0], $gen->[0] || ());
845             push @objects, [
846             ($token eq "R" ? \$new_id : $new_id),
847             { type => $token, offset => $id->[1]{offset} }
848 0 0         ];
849             } elsif ($token eq "ID") { # Inline image data: ID ... EI
850 0 0 0       s/\A$s(.*?)(?:\r\n|$s)?EI$s//s or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid inline image data!\n");
851 0           my $image = $1;
852              
853             # TODO: Apply encoding filters?
854              
855 0           push @objects, [ $image, { type => "image" } ];
856             } elsif ($token eq "stream") { # Stream content: stream ... endstream
857 0           my ($id, $stream) = @objects[-2,-1];
858 0 0 0       $stream->[1]{type} eq "dict" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Stream dictionary missing!\n");
859 0 0 0       $id->[1]{type} eq "obj" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid indirect object definition!\n");
860 0           $_ = $_->[0] for $id, $stream;
861             defined(my $length = $stream->{Length})
862 0 0 0       or warn join(": ", $self->{-file} || (), "Byte offset $offset: Object #$id: Stream length not found in metadata!\n");
863 0           s/\A\r?\n//;
864              
865             # Check for unsupported stream types.
866 0   0       my $type = $stream->{Type} // "";
867 0 0         if ($type eq "/ObjStm") {
    0          
    0          
868 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: PDF 1.5 object streams are not supported!\n");
869             } elsif ($type eq "/XRef") {
870 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: PDF 1.5 cross-reference streams are not supported!\n");
871             } elsif ($type !~ /^(?:\/(?:CMap|Metadata|XObject))?$/) {
872 0   0       carp join(": ", $self->{-file} || (), "Byte offset $offset: Unrecognized stream type \"$type\"!\n");
873             }
874              
875             # If the declared stream length is missing or invalid, determine the shortest possible length to make the stream valid.
876 0 0 0       unless (defined($length) && !ref($length) && substr($_, $length) =~ /\A($s*endstream$ws)/) {
      0        
877 0 0         if (/\A((?>(?:[^e]+|(?!endstream$s)e)*))endstream$s/) {
878 0           $length = length($1);
879             } else {
880 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid stream definition!\n");
881             }
882             }
883              
884 0           $stream->{-data} = substr($_, 0, $length);
885 0           $stream->{-id} = $id;
886 0           $stream->{Length} = $length;
887              
888 0           $_ = substr($_, $length);
889 0           s/\A$s*endstream$ws//;
890              
891 0 0         $self->filter_stream($stream) if $stream->{Filter};
892             } elsif ($token eq "endobj") { # Indirect object definition: 999 0 obj ... endobj
893 0           my ($id, $object) = splice @objects, -2;
894 0 0 0       $id->[1]{type} eq "obj" or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid indirect object definition!\n");
895 0           $object->[1]{id} = $id->[0];
896 0           $objects->{$id->[0]} = $object;
897 0   0       $objects->{offset}{$object->[1]{offset} // $offset} = $object;
898 0           push @objects, $object;
899             } elsif ($token eq "xref") { # Cross-reference table
900             s/\A$ws\d+$ws\d+$n(?>\d{10}\ \d{5}\ [fn](?:\ [\r\n]|\r\n))+//
901 0 0 0       or croak join(": ", $self->{-file} || (), "Byte offset $offset: Invalid cross-reference table!\n");
902             } elsif ($token =~ /^[+-]?\d+$/) { # Integer: [+-]999
903 0           push @objects, [ $token, { type => "int" } ];
904             } elsif ($token =~ /^[+-]?(?:\d+\.\d*|\.\d+)$/) { # Real number: [+-]999.999
905 0           push @objects, [ $token, { type => "real" } ];
906             } elsif ($token =~ /^\/(.*)$/) { # Name: /Name
907 0           push @objects, [ $token, { type => "name", name => $1 } ];
908             } elsif ($token =~ /^(?:true|false)$/) { # Boolean: true or false
909 0           push @objects, [ $token, { type => "bool", bool => $token eq "true" } ];
910             } else { # Other token
911 0           push @objects, [ $token, { type => "token" } ];
912             }
913             } else {
914 0           s/\A([^\r\n]*).*\z/$1/s;
915 0   0       croak join(": ", $self->{-file} || (), "Byte offset $offset: Parse error on input: \"$_\"\n");
916             }
917              
918             # Update offset/length of last object.
919 0   0       $objects[-1][1]{offset} //= $offset;
920 0           $objects[-1][1]{length} = $eof - length($_) - $objects[-1][1]{offset};
921             }
922              
923             # Return parsed PDF objects.
924 0           return @objects;
925             }
926              
927             # Parse PDF objects from standalone PDF data.
928             sub parse_data {
929 0     0 1   my ($self, $data) = @_;
930              
931             # Parse PDF objects from data.
932 0   0       my @objects = $self->parse_objects({}, $data // "", 0);
933              
934             # Discard parser metadata.
935 0           @objects = map { $_->[0]; } @objects;
  0            
936              
937             # Return parsed objects.
938 0 0         return wantarray ? @objects : $objects[0];
939             }
940              
941             # Filter stream data.
942             sub filter_stream {
943 0     0 1   my ($self, $stream) = @_;
944              
945             # Get stream filters, if any.
946 0 0         my @filters = $stream->{Filter} ? is_array $stream->{Filter} ? @{$stream->{Filter}} : ($stream->{Filter}) : ();
  0 0          
947              
948             # Decompress stream data if necessary.
949 0 0         if ($filters[0] eq "/FlateDecode") {
950             # Remember that this stream was compressed.
951 0           $stream->{-compress} = 1;
952              
953             # Decompress the stream.
954 0           my $zlib = new Compress::Raw::Zlib::Inflate;
955 0           my $output;
956 0           my $status = $zlib->inflate($stream->{-data}, $output);
957 0 0 0       if ($status == Z_OK or $status == Z_STREAM_END) {
958 0           $stream->{-data} = $output;
959 0           $stream->{Length} = length $output;
960             } else {
961 0   0       croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream inflation failed! ($zlib->msg)\n");
962             }
963              
964             # Stream is no longer compressed; remove /FlateDecode filter.
965 0           shift @filters;
966              
967             # Preserve remaining filters, if any.
968 0 0         if (@filters > 1) {
    0          
969 0           $stream->{Filter} = \@filters;
970             } elsif (@filters) {
971 0           $stream->{Filter} = shift @filters;
972             } else {
973 0           delete $stream->{Filter};
974             }
975             }
976             }
977              
978             # Compress stream data.
979             sub compress_stream {
980 0     0 1   my ($self, $stream) = @_;
981              
982             # Get stream filters, if any.
983 0 0         my @filters = $stream->{Filter} ? is_array $stream->{Filter} ? @{$stream->{Filter}} : ($stream->{Filter}) : ();
  0 0          
984              
985             # Return a new stream so the in-memory copy remains uncompressed to work with.
986 0           my $new_stream = { %{$stream} };
  0            
987 0           $new_stream->{-data} = "";
988 0           my ($zlib, $status) = Compress::Raw::Zlib::Deflate->new(-Level => 9, -Bufsize => 65536, AppendOutput => 1);
989 0 0 0       $zlib->deflate($stream->{-data}, $new_stream->{-data}) == Z_OK or croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream deflation failed! ($zlib->msg)\n");
990 0 0 0       $zlib->flush($new_stream->{-data}, Z_FINISH) == Z_OK or croak join(": ", $self->{-file} || (), "Object #$stream->{-id}: Stream deflation failed! ($zlib->msg)\n");
991 0           $new_stream->{Length} = length $new_stream->{-data};
992 0 0         $new_stream->{Filter} = @filters ? ["/FlateDecode", @filters] : "/FlateDecode";
993 0           return $new_stream;
994             }
995              
996             # Resolve indirect object references.
997             sub resolve_references {
998 0     0 1   my ($self, $objects, $object) = @_;
999              
1000             # Replace indirect object references with a reference to the actual object.
1001 0 0 0       if (ref $object and reftype($object) eq "SCALAR") {
1002 0           my $id = ${$object};
  0            
1003 0 0         if ($objects->{$id}) {
1004 0           ($object, my $metadata) = @{$objects->{$id}};
  0            
1005 0 0         return $object if $metadata->{resolved}++;
1006             } else {
1007 0           ($id, my $gen) = split /-/, $id;
1008 0   0       $gen ||= "0";
1009 0   0       warn join(": ", $self->{-file} || (), "Warning: $id $gen R: Referenced indirect object not found!\n");
1010             }
1011             }
1012              
1013             # Check object type.
1014 0 0         if (is_hash $object) {
    0          
1015             # Resolve references in hash values.
1016 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1017 0 0         $object->{$key} = $self->resolve_references($objects, $object->{$key}) if ref $object->{$key};
1018             }
1019              
1020             # For streams, validate the length metadata.
1021 0 0         if (is_stream $object) {
1022 0   0       $object->{-data} //= "";
1023 0 0 0       substr($object->{-data}, $object->{Length}) =~ s/\A$s+\z// if $object->{Length} and length($object->{-data}) > $object->{Length};
1024 0           my $len = length $object->{-data};
1025 0   0       $object->{Length} ||= $len;
1026             $len == $object->{Length}
1027 0 0 0       or warn join(": ", $self->{-file} || (), "Warning: Object #$object->{-id}: Stream length does not match metadata! ($len != $object->{Length})\n");
1028             }
1029             } elsif (is_array $object) {
1030             # Resolve references in array values.
1031 0           foreach my $i (0 .. $#{$object}) {
  0            
1032 0 0         $object->[$i] = $self->resolve_references($objects, $object->[$i]) if ref $object->[$i];
1033             }
1034             }
1035              
1036             # Return object with resolved references.
1037 0           return $object;
1038             }
1039              
1040             # Write all indirect objects to PDF file data.
1041             sub write_indirect_objects {
1042 0     0 1   my ($self, $pdf_file_data, $objects, $seen) = @_;
1043              
1044             # Enumerate all indirect objects.
1045 0           $self->enumerate_indirect_objects($objects);
1046              
1047             # Cross-reference file offsets.
1048 0           my $xrefs = ["0000000000 65535 f \n"];
1049              
1050             # Loop across indirect objects.
1051 0           for (my $i = 1; $i <= $#{$objects}; $i++) {
  0            
1052             # Save file offset for cross-reference table.
1053 0           push @{$xrefs}, sprintf "%010d 00000 n \n", length(${$pdf_file_data});
  0            
  0            
1054              
1055             # Write the indirect object header.
1056 0           ${$pdf_file_data} .= "$i 0 obj\n";
  0            
1057              
1058             # Write the object itself.
1059 0           $self->write_object($pdf_file_data, $objects, $seen, $objects->[$i], 0);
1060              
1061             # Write the indirect object trailer.
1062 0           ${$pdf_file_data} =~ s/\n?\z/\n/;
  0            
1063 0           ${$pdf_file_data} .= "endobj\n\n";
  0            
1064             }
1065              
1066             # Return cross-reference file offsets.
1067 0           return $xrefs;
1068             }
1069              
1070             # Enumerate all indirect objects.
1071             sub enumerate_indirect_objects {
1072 0     0 1   my ($self, $objects) = @_;
1073              
1074             # Add top-level PDF indirect objects.
1075             $self->add_indirect_objects($objects,
1076             $self->{Root} ? $self->{Root} : (), # Document catalog
1077             $self->{Info} ? $self->{Info} : (), # Document information dictionary (if any)
1078             $self->{Root}{Dests} ? $self->{Root}{Dests} : (), # Named destinations (if any)
1079             $self->{Root}{Metadata} ? $self->{Root}{Metadata} : (), # Document metadata (if any)
1080             $self->{Root}{Outlines} ? $self->{Root}{Outlines} : (), # Document outline hierarchy (if any)
1081             $self->{Root}{Pages} ? $self->{Root}{Pages} : (), # Document page tree
1082             $self->{Root}{Threads} ? $self->{Root}{Threads} : (), # Articles (if any)
1083 0 0         $self->{Root}{StructTreeRoot} ? $self->{Root}{StructTreeRoot} : (), # Document structure tree (if any)
    0          
    0          
    0          
    0          
    0          
    0          
    0          
1084             );
1085              
1086             # Add optional content groups, if any.
1087 0 0         $self->add_indirect_objects($objects, @{$self->{Root}{OCProperties}{OCGs}}) if $self->{Root}{OCProperties};
  0            
1088              
1089             # Enumerate shared objects.
1090 0           $self->enumerate_shared_objects($objects, {}, {}, $self->{Root});
1091              
1092             # Add referenced indirect objects.
1093 0           for (my $i = 1; $i <= $#{$objects}; $i++) {
  0            
1094             # Get object.
1095 0           my $object = $objects->[$i];
1096              
1097             # Check object type.
1098 0 0         if (is_hash $object) {
1099             # Objects to add.
1100 0           my @objects;
1101              
1102             # Hashes to scan.
1103 0           my @hashes = $object;
1104              
1105             # Iteratively recurse through hash tree.
1106 0           while (@hashes) {
1107             # Get the next hash.
1108 0           $object = shift @hashes;
1109              
1110             # Check each hash key.
1111 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1112 0 0 0       if (($object->{Type} // "") eq "/ExtGState" and $key eq "Font" and is_array $object->{Font} and is_hash $object->{Font}[0]) {
    0 0        
    0 0        
    0 0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
      0        
1113 0           push @objects, $object->{Font}[0];
1114             } elsif ($key =~ /^(?:Data|First|ID|Last|Next|Obj|Parent|ParentTree|Popup|Prev|Root|StmOwn|Threads|Widths)$/
1115             or $key =~ /^(?:AN|Annotation|B|C|CI|DocMDP|F|FontDescriptor|I|IX|K|Lock|N|P|Pg|RI|SE|SV|V)$/ and ref $object->{$key} and is_hash $object->{$key}
1116             or is_hash $object->{$key} and ($object->{$key}{-data} or $object->{$key}{Kids} or ($object->{$key}{Type} // "") =~ /^\/(?:Filespec|Font)$/)
1117             or ($object->{S} // "") eq "/Thread" and $key eq "D"
1118             or ($object->{S} // "") eq "/Hide" and $key eq "T"
1119             ) {
1120 0           push @objects, $object->{$key};
1121             } elsif ($key =~ /^(?:Annots|B|C|CO|Fields|K|Kids|O|Pages|TrapRegions)$/ and is_array $object->{$key}) {
1122 0           push @objects, grep { is_hash $_; } @{$object->{$key}};
  0            
  0            
1123             } elsif (is_hash $object->{$key}) {
1124 0           push @hashes, $object->{$key};
1125             }
1126             }
1127             }
1128              
1129             # Add the objects found, if any.
1130 0 0         $self->add_indirect_objects($objects, @objects) if @objects;
1131             }
1132             }
1133             }
1134              
1135             # Enumerate shared objects.
1136             sub enumerate_shared_objects {
1137 0     0 1   my ($self, $objects, $seen, $ancestors, $object) = @_;
1138              
1139             # Add shared indirect objects.
1140 0 0         if ($seen->{$object}++) {
1141 0 0         $self->add_indirect_objects($objects, $object) unless $objects->[0]{$object};
1142 0           return;
1143             }
1144              
1145             # Return if this object is an ancestor of itself.
1146 0 0         return if $ancestors->{$object};
1147              
1148             # Add this object to the lookup hash of ancestors.
1149 0           $ancestors->{$object}++;
1150              
1151             # Recurse to check entire object tree.
1152 0 0         if (is_hash $object) {
    0          
1153 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1154 0 0         $self->enumerate_shared_objects($objects, $seen, $ancestors, $object->{$key}) if ref $object->{$key};
1155             }
1156             } elsif (is_array $object) {
1157 0           foreach my $obj (@{$object}) {
  0            
1158 0 0         $self->enumerate_shared_objects($objects, $seen, $ancestors, $obj) if ref $obj;
1159             }
1160             }
1161              
1162             # Remove this object from the lookup hash of ancestors.
1163 0           delete $ancestors->{$object};
1164             }
1165              
1166             # Add indirect objects.
1167             sub add_indirect_objects {
1168 0     0 1   my ($self, $objects, @objects) = @_;
1169              
1170             # Loop across specified objects.
1171 0           foreach my $object (@objects) {
1172             # Make sure content streams are defined.
1173 0 0 0       $object->{-data} //= "" if is_stream $object;
1174              
1175             # Check if object exists and is not in the lookup hash yet.
1176 0 0 0       if (defined $object and not $objects->[0]{$object}) {
1177             # Add the new indirect object to the array.
1178 0           push @{$objects}, $object;
  0            
1179              
1180             # Save the object ID in the lookup hash, keyed by the object.
1181 0           $objects->[0]{$object} = $#{$objects};
  0            
1182             }
1183             }
1184             }
1185              
1186             # Write a direct object to the string of PDF file data.
1187             sub write_object {
1188 0     0 1   my ($self, $pdf_file_data, $objects, $seen, $object, $indent) = @_;
1189              
1190             # Make sure the same object isn't written twice.
1191 0 0 0       if (ref $object and $seen->{$object}++) {
1192 0   0       croak join(": ", $self->{-file} || (), "Object $object written more than once!\n");
1193             }
1194              
1195             # Check object type.
1196 0 0 0       if (is_hash $object) {
    0          
    0          
    0          
1197             # For streams, compress the stream or update the length metadata.
1198 0 0         if (is_stream $object) {
1199 0   0       $object->{-data} //= "";
1200 0 0 0       if (($self->{-compress} or $object->{-compress}) and not ($self->{-decompress} or $object->{-decompress})) {
      0        
      0        
1201 0           $object = $self->compress_stream($object);
1202             } else {
1203 0           $object->{Length} = length $object->{-data};
1204             }
1205             }
1206              
1207             # Dictionary object.
1208 0           $self->serialize_object($pdf_file_data, "<<\n");
1209 0 0         foreach my $key (sort { fc($a) cmp fc($b) || $a cmp $b; } keys %{$object}) {
  0            
  0            
1210 0 0         next if $key =~ /^-/;
1211 0           my $obj = $object->{$key};
1212 0 0         $self->add_indirect_objects($objects, $obj) if is_stream $obj;
1213 0           $self->serialize_object($pdf_file_data, join("", " " x ($indent + 2), "/$key "));
1214 0 0         if (not ref $obj) {
    0          
1215 0           $self->serialize_object($pdf_file_data, "$obj\n");
1216             } elsif ($objects->[0]{$obj}) {
1217 0           $self->serialize_object($pdf_file_data, "$objects->[0]{$obj} 0 R\n");
1218             } else {
1219 0 0         $self->write_object($pdf_file_data, $objects, $seen, $object->{$key}, ref $object ? $indent + 2 : 0);
1220             }
1221             }
1222 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, ">>\n"));
1223              
1224             # For streams, write the stream data.
1225 0 0         if (is_stream $object) {
1226 0   0       $object->{-data} //= "";
1227 0 0 0       croak join(": ", $self->{-file} || (), "Stream written as direct object!\n") if $indent;
1228 0 0         my $newline = substr($object->{-data}, -1) eq "\n" ? "" : "\n";
1229 0           ${$pdf_file_data} =~ s/\n?\z/\n/;
  0            
1230 0           ${$pdf_file_data} .= "stream\n$object->{-data}${newline}endstream\n";
  0            
1231             }
1232 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1233             # Array of simple objects.
1234 0 0         if ($self->{-minify}) {
1235 0           $self->serialize_array($pdf_file_data, $object);
1236             } else {
1237 0           ${$pdf_file_data} .= "[ @{$object} ]\n";
  0            
  0            
1238             }
1239             } elsif (is_array $object) {
1240             # Array object.
1241 0           $self->serialize_object($pdf_file_data, "[\n");
1242 0           my $spaces = " " x ($indent + 2);
1243 0           foreach my $obj (@{$object}) {
  0            
1244 0 0         $self->add_indirect_objects($objects, $obj) if is_stream $obj;
1245 0 0         ${$pdf_file_data} .= $spaces unless $self->{-minify};
  0            
1246 0 0         if (not ref $obj) {
    0          
1247 0           $self->serialize_object($pdf_file_data, $obj);
1248 0           $spaces = " ";
1249             } elsif ($objects->[0]{$obj}) {
1250 0           $self->serialize_object($pdf_file_data, "$objects->[0]{$obj} 0 R\n");
1251 0           $spaces = " " x ($indent + 2);
1252             } else {
1253 0           $self->write_object($pdf_file_data, $objects, $seen, $obj, $indent + 2);
1254 0           $spaces = " " x ($indent + 2);
1255             }
1256             }
1257 0 0 0       ${$pdf_file_data} .= "\n" if $spaces eq " " and not $self->{-minify};
  0            
1258 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "]\n"));
1259             } elsif (reftype($object) eq "SCALAR") {
1260             # Unresolved indirect reference.
1261 0           my ($id, $gen) = split /-/, ${$object};
  0            
1262 0   0       $gen ||= "0";
1263 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "($id $gen R)\n"));
1264             } else {
1265             # Simple object.
1266 0           $self->serialize_object($pdf_file_data, join("", " " x $indent, "$object\n"));
1267             }
1268             }
1269              
1270             # Dump PDF object.
1271             sub dump_object {
1272 0     0 1   my ($self, $object, $label, $seen, $indent, $mode) = @_;
1273              
1274             # Dump output.
1275 0           my $output = "";
1276              
1277             # Hash key sort priority.
1278 0           my %priority = (
1279             Type => -2,
1280             Version => -1,
1281             Root => 1,
1282             Pages => 2,
1283             PageLabels => 3,
1284             Names => 4,
1285             Dests => 5,
1286             Outlines => 6,
1287             Threads => 7,
1288             StructTreeRoot => 8,
1289             );
1290              
1291             # Check mode and object type.
1292 0 0 0       if ($mode eq "outline") {
    0 0        
    0          
    0          
    0          
    0          
1293 0 0 0       if (ref $object and $seen->{$object}) {
    0 0        
    0          
    0          
    0          
1294             # Previously-seen object; dump the label.
1295 0           $output = "$seen->{$object}";
1296             } elsif (is_hash $object) {
1297             # Hash object.
1298 0           $seen->{$object} = $label;
1299 0 0         if (is_stream $object) {
1300 0           $output = "(STREAM)";
1301             } else {
1302 0           $label =~ s/(?<=\w)$/->/;
1303 0 0 0       my @keys = sort { ($priority{$a} // 0) <=> ($priority{$b} // 0) || fc($a) cmp fc($b) || $a cmp $b; } keys %{$object};
  0   0        
  0   0        
1304 0           my $key_len = max map length $_, @keys;
1305 0           foreach my $key (@keys) {
1306 0           my $obj = $object->{$key};
1307 0 0         next unless ref $obj;
1308 0           $output .= sprintf "%s%-${key_len}s => ", " " x ($indent + 2), $key;
1309 0 0         $output .= $self->dump_object($object->{$key}, "$label\{$key\}", $seen, ref $object ? $indent + 2 : 0, $mode) . ",\n";
1310             }
1311 0 0         if ($output) {
1312 0           $output = join("", "{ # $label\n", $output, (" " x $indent), "}");
1313             } else {
1314 0           $output = "{...}";
1315             }
1316 0           $output =~ s/\{ \# \$pdf->\n/\{\n/;
1317             }
1318 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1319             # Array of simple objects.
1320 0           $output = "[...]";
1321             } elsif (is_array $object) {
1322             # Array object.
1323 0           for (my $i = 0; $i < @{$object}; $i++) {
  0            
1324 0 0         $output .= sprintf "%s%s,\n", " " x ($indent + 2), $self->dump_object($object->[$i], "$label\[$i\]", $seen, $indent + 2, $mode) if ref $object->[$i];
1325             }
1326 0 0         if ($output =~ /\A$s+(.*?),\n\z/) {
    0          
1327 0           $output = "[... $1]";
1328             } elsif ($output =~ /\n/) {
1329 0           $output = join("", "[ # $label\n", $output, (" " x $indent), "]");
1330             } else {
1331 0           $output = "[$output]";
1332             }
1333             } elsif (reftype($object) eq "SCALAR") {
1334             # Unresolved indirect reference.
1335 0           my ($id, $gen) = split /-/, ${$object};
  0            
1336 0   0       $gen ||= "0";
1337 0           $output .= "\"$id $gen R\"";
1338             }
1339             } elsif (ref $object and $seen->{$object}) {
1340             # Previously-seen object; dump the label.
1341 0           $output = $seen->{$object};
1342             } elsif (is_hash $object) {
1343             # Hash object.
1344 0           $seen->{$object} = $label;
1345 0           $output = "{ # $label\n";
1346 0           $label =~ s/(?<=\w)$/->/;
1347 0 0 0       my @keys = sort { ($priority{$a} // 0) <=> ($priority{$b} // 0) || fc($a) cmp fc($b) || $a cmp $b; } keys %{$object};
  0   0        
  0   0        
1348 0           my $key_len = max map length $_, @keys;
1349 0           foreach my $key (@keys) {
1350 0           my $obj = $object->{$key};
1351 0           $output .= sprintf "%s%-${key_len}s => ", " " x ($indent + 2), $key;
1352 0 0         if ($key eq -data) {
    0          
1353 0           chomp $obj;
1354 0 0         $output .= $obj =~ /\A(?:<\?xpacket|[\n\t -~]*\z)/ ? "<<'EOF',\n$obj\nEOF\n" : dump($obj) . "\n";
1355             } elsif (not ref $obj) {
1356 0           $output .= dump($obj) . ",\n";
1357             } else {
1358 0 0         $output .= $self->dump_object($object->{$key}, "$label\{$key\}", $seen, ref $object ? $indent + 2 : 0, $mode) . ",\n";
1359             }
1360             }
1361 0           $output .= (" " x $indent) . "}";
1362 0           $output =~ s/\{ \# \$pdf\n/\{\n/;
1363 0           } elsif (is_array $object and not grep { ref $_; } @{$object}) {
  0            
1364             # Array of simple objects.
1365 0 0         $output = sprintf "[%s]", join(", ", map { /^\d+\.\d+$/ ? $_ : dump($_); } @{$object});
  0            
  0            
1366             } elsif (is_array $object) {
1367             # Array object.
1368 0           $output .= "[ # $label\n";
1369 0           my $spaces = " " x ($indent + 2);
1370 0           for (my $i = 0; $i < @{$object}; $i++) {
  0            
1371 0           my $obj = $object->[$i];
1372 0 0         if (ref $obj) {
1373 0           $output .= sprintf "%s%s,\n", $spaces, $self->dump_object($obj, "$label\[$i\]", $seen, $indent + 2, $mode);
1374 0           $spaces = " " x ($indent + 2);
1375             } else {
1376 0           $output .= $spaces . dump($obj) . ",";
1377 0           $spaces = " ";
1378             }
1379             }
1380 0 0         $output .= ",\n" if $spaces eq " ";
1381 0           $output .= (" " x $indent) . "]";
1382             } elsif (reftype($object) eq "SCALAR") {
1383             # Unresolved indirect reference.
1384 0           my ($id, $gen) = split /-/, ${$object};
  0            
1385 0   0       $gen ||= "0";
1386 0           $output .= "\"$id $gen R\"";
1387             } else {
1388             # Simple object.
1389 0           $output = sprintf "%s%s\n", " " x $indent, dump($object);
1390             }
1391              
1392             # Return generated output.
1393 0           return $output;
1394             }
1395              
1396             1;
1397              
1398             __END__