File Coverage

blib/lib/PDF/Builder/Basic/PDF/File.pm
Criterion Covered Total %
statement 579 840 68.9
branch 211 426 49.5
condition 65 156 41.6
subroutine 44 47 93.6
pod 25 26 96.1
total 924 1495 61.8


line stmt bran cond sub pod time code
1             #=======================================================================
2             #
3             # THIS IS A REUSED PERL MODULE, FOR PROPER LICENCING TERMS SEE BELOW:
4             #
5             # Copyright Martin Hosken <Martin_Hosken@sil.org>
6             #
7             # Modified for PDF::API2 by Alfred Reibenschuh <alfredreibenschuh@gmx.net>
8             #
9             # No warranty or expression of effectiveness, least of all regarding
10             # anyone's safety, is implied in this software or documentation.
11             #
12             # This specific module is licensed under the Perl Artistic License.
13             # Effective 28 January 2021, the original author and copyright holder,
14             # Martin Hosken, has given permission to use and redistribute this module
15             # under the MIT license.
16             #
17             #=======================================================================
18             package PDF::Builder::Basic::PDF::File;
19              
20 40     40   154375 use strict;
  40         110  
  40         1852  
21 40     40   232 use warnings;
  40         111  
  40         5330  
22              
23             our $VERSION = '3.028'; # VERSION
24             our $LAST_UPDATE = '3.028'; # manually update whenever code is changed
25              
26             =head1 NAME
27              
28             PDF::Builder::Basic::PDF::File - Holds the trailers and cross-reference tables for a PDF file
29              
30             =head1 SYNOPSIS
31              
32             $p = PDF::Builder::Basic::PDF::File->open("filename.pdf", 1);
33             $p->new_obj($obj_ref);
34             $p->free_obj($obj_ref);
35             $p->append_file();
36             $p->close_file();
37             $p->release(); # IMPORTANT!
38              
39             =head1 DESCRIPTION
40              
41             This class keeps track of the directory aspects of a PDF file. There are two
42             parts to the directory: the main directory object, which is the parent to all
43             other objects, and a chain of cross-reference tables and corresponding trailer
44             dictionaries, starting with the main directory object.
45              
46             =head1 INSTANCE VARIABLES
47              
48             Within this class hierarchy, rather than making everything visible via methods,
49             which would be a lot of work, there are various instance variables which are
50             accessible via associative array referencing. To distinguish instance variables
51             from content variables (which may come from the PDF content itself), each such
52             variable name will start with a space.
53              
54             Variable names which do not start with a space directly reflect elements in a
55             PDF dictionary. In the case of a C<PDF::Builder::Basic::PDF::File>, the
56             elements reflect those in the trailer dictionary.
57              
58             Since some variables are not designed for class users to access, variables are
59             marked in the documentation with B<(R)> to indicate that such an entry should
60             only be used as B<read-only> information. B<(P)> indicates that the information
61             is B<private>, and not designed for user use at all, but is included in the
62             documentation for completeness and to ensure that nobody else tries to use it.
63              
64             =over
65              
66             =item newroot
67              
68             This variable allows the user to create a new root entry to occur in the trailer
69             dictionary which is output when the file is written or appended. If you wish to
70             override the root element in the dictionary you have, use this entry to indicate
71             that without losing the current Root entry. Notice that newroot should point to
72             a PDF level object and not just to a dictionary, which does not have object
73             status.
74              
75             =item INFILE (R)
76              
77             Contains the filehandle used to read this information into this PDF directory.
78             It is an IO object.
79              
80             =item fname (R)
81              
82             This is the filename which is reflected by INFILE, or the original IO object
83             passed in.
84              
85             =item update (R)
86              
87             This indicates that the read file has been opened for update and that at some
88             point, C<< $p->appendfile() >> can be called to update the file with the
89             changes that have been made to the memory representation.
90              
91             =item maxobj (R)
92              
93             Contains the first usable object number above any that have already appeared
94             in the file so far.
95              
96             =item outlist (P)
97              
98             This is a list of Objind which are to be output when the next C<appendfile()>
99             or C<outfile()> occurs.
100              
101             =item firstfree (P)
102              
103             Contains the first free object in the free object list. Free objects are removed
104             from the front of the list and added to the end.
105              
106             =item lastfree (P)
107              
108             Contains the last free object in the free list. It may be the same as the
109             C<firstfree> if there is only one free object.
110              
111             =item objcache (P)
112              
113             All objects are held in the cache to ensure that a system only has one
114             occurrence of each object. In effect, the objind class acts as a container type
115             class to hold the PDF object structure, and it would be unfortunate if there
116             were two identical place-holders floating around a system.
117              
118             =item epos (P)
119              
120             The end location of the read-file.
121              
122             =back
123              
124             Each trailer dictionary contains a number of private instance variables which
125             hold the chain together.
126              
127             =over
128              
129             =item loc (P)
130              
131             Contains the location of the start of the cross-reference table preceding the
132             trailer.
133              
134             =item xref (P)
135              
136             Contains an anonymous array of each cross-reference table entry.
137              
138             =item prev (P)
139              
140             A reference to the previous table. Note this differs from the Prev entry which
141             is in PDF, which contains the location of the previous cross-reference table.
142              
143             =back
144              
145             =cut
146              
147 40     40   293 use Scalar::Util qw(blessed weaken);
  40         154  
  40         3454  
148              
149 40     40   284 use vars qw($cr $irreg_char $reg_char $ws_char $delim_char %types);
  40         95  
  40         9180  
150              
151             $ws_char = '[ \t\r\n\f\0]';
152             $delim_char = '[][<>{}()/%]';
153             $reg_char = '[^][<>{}()/% \t\r\n\f\0]';
154             $irreg_char = '[][<>{}()/% \t\r\n\f\0]';
155             # \015 = x0D = CR or \r, \012 = x0A = LF or \n
156             # TBD a line-end character is space CR ' \r', space LF ' \n', or CR LF '\r\n'
157             # have seen working PDFs with just a CR and space CR
158             $cr = '\s*(?:\015|\012|(?:\015\012))';
159              
160             my $re_comment = qr/(?:\%[^\r\n]*)/;
161             my $re_whitespace = qr/(?:[ \t\r\n\f\0]|$re_comment)/;
162              
163             %types = (
164             'Page' => 'PDF::Builder::Basic::PDF::Page',
165             'Pages' => 'PDF::Builder::Basic::PDF::Pages',
166             );
167              
168             my $readDebug = 0;
169              
170 40     40   313 use Carp;
  40         101  
  40         3459  
171 40     40   1004 use IO::File;
  40         11871  
  40         7016  
172              
173             # Now for the basic PDF types
174 40     40   1057 use PDF::Builder::Basic::PDF::Utils;
  40         87  
  40         4065  
175              
176 40     40   280 use PDF::Builder::Basic::PDF::Array;
  40         104  
  40         1334  
177 40     40   221 use PDF::Builder::Basic::PDF::Bool;
  40         88  
  40         1123  
178 40     40   241 use PDF::Builder::Basic::PDF::Dict;
  40         86  
  40         1182  
179 40     40   202 use PDF::Builder::Basic::PDF::Name;
  40         93  
  40         1795  
180 40     40   288 use PDF::Builder::Basic::PDF::Number;
  40         86  
  40         1051  
181 40     40   220 use PDF::Builder::Basic::PDF::Objind;
  40         93  
  40         1273  
182 40     40   239 use PDF::Builder::Basic::PDF::String;
  40         88  
  40         1011  
183 40     40   25597 use PDF::Builder::Basic::PDF::Page;
  40         141  
  40         1769  
184 40     40   306 use PDF::Builder::Basic::PDF::Pages;
  40         87  
  40         1019  
185 40     40   220 use PDF::Builder::Basic::PDF::Null;
  40         90  
  40         1240  
186 40     40   220 use POSIX qw(ceil floor);
  40         90  
  40         432  
187              
188             =head1 METHODS
189              
190             =head2 new
191              
192             PDF::Builder::Basic::PDF::File->new()
193              
194             =over
195              
196             Creates a new, empty file object which can act as the host to other PDF objects.
197             Since there is no file associated with this object, it is assumed that the
198             object is created in readiness for creating a new PDF file.
199              
200             =back
201              
202             =cut
203              
204             sub new {
205 234     234 1 821 my ($class, $root) = @_;
206 234         1215 my $self = $class->_new();
207              
208 234 50       885 unless ($root) {
209 234         1173 $root = PDFDict();
210 234         937 $root->{'Type'} = PDFName('Catalog');
211             }
212 234         1329 $self->new_obj($root);
213              
214 234         825 $self->{'Root'} = $root;
215              
216 234         1133 return $self;
217             }
218              
219             =head2 open
220              
221             $p = PDF::Builder::Basic::PDF::File->open($filename, $update, %options)
222              
223             =over
224              
225             Opens the file and reads all the trailers and cross reference tables to build
226             a complete directory of objects.
227              
228             C<$filename> may be a string or an IO object.
229              
230             C<$update> specifies whether this file is being opened for updating and editing
231             (I<TRUE> value), or simply to be read (I<FALSE> or undefined value).
232              
233             C<%options> may include
234              
235             =over
236              
237             =item diags => 1
238              
239             If C<diags> is set to 1, various warning messages will be given if a
240             suspicious PDF structure is found, and some fixup may be attempted. There is
241             no guarantee that any fixup will change the PDF to legitimate, or that there
242             won't be other problems found further down the line. If this flag is I<not>
243             given, and a structural problem is found, it is fairly likely that errors (and
244             even a program B<crash>) may happen further along. If you experience crashes
245             when reading in a PDF file, try running with C<diags> and see what is reported.
246              
247             There are many PDF files out "in the wild" which, while failing to conform to
248             Adobe's standards, appear to be tolerated by PDF Readers. Thus, Builder will
249             not fail on them, but merely comment on their existence.
250              
251             =back
252              
253             =back
254              
255             =cut
256              
257             sub open {
258 18     18 1 84 my ($class, $filename, $update, %options) = @_;
259             # copy dashed option names to preferred undashed names
260 18 50 33     95 if (defined $options{'-diags'} && !defined $options{'diags'}) { $options{'diags'} = delete($options{'-diags'}); }
  0         0  
261 18         47 my ($fh, $buffer);
262 18 50       95 $options{'diags'} = 0 if not defined $options{'diags'}; # default
263              
264 18         46 my $comment = ''; # any comment jammed into the PDF header
265 18         123 my $self = $class->_new();
266 18 50       119 if (ref $filename) {
267 18         96 $self->{' INFILE'} = $filename;
268 18 50       75 if ($update) {
269 18         73 $self->{' update'} = 1;
270 18         64 $self->{' OUTFILE'} = $filename;
271             }
272 18         47 $fh = $filename;
273             } else {
274 0 0       0 die "PDF file '$filename' to open does not exist!" unless -f $filename;
275 0 0       0 die "PDF file '$filename' to open is not readable!" unless -r $filename;
276             # requesting to update (write) to file? needs to be r/w
277 0 0       0 if ($update) {
278 0 0       0 die "PDF file '$filename' to update is not writable!" unless -w $filename;
279             }
280 0 0       0 $fh = IO::File->new(($update ? '+' : '') . "<$filename");
281 0 0       0 if (!$fh) {
282 0         0 die "File '$filename' unable to open! $!";
283             }
284 0         0 $self->{' INFILE'} = $fh;
285 0 0       0 if ($update) {
286 0         0 $self->{' update'} = 1;
287 0         0 $self->{' OUTFILE'} = $fh;
288 0         0 $self->{' fname'} = $filename;
289             }
290             }
291 18         149 binmode $fh, ':raw';
292 18         272 $fh->seek(0, 0); # go to start of file
293 18         288 $fh->read($buffer, 255);
294 18 50       1093 unless ($buffer =~ m/^\%PDF\-(\d+\.\d+)(.*?)$cr/mo) {
295 0         0 die "$filename does not contain a valid PDF version number";
296             }
297 18         115 $self->{' version'} = $1;
298             # can't run verCheckInput() yet, as full ' version' not set
299 18 50 33     185 if (defined $2 && length($2) > 0) {
300 0         0 $comment = $2; # save for output as comment
301             # since we just echo the original header + comment, unless that causes
302             # problems in some Readers, we can just leave it be (no call to strip
303             # out inline comment and create a separate comment further along).
304             }
305              
306             # there should always be 'startxref' within 16*64 bytes of end
307 18         137 $fh->seek(0, 2); # go to end of file
308 18         206 my $end = $fh->tell();
309 18         217 $self->{' epos'} = $end;
310 18         89 foreach my $offset (1 .. 64) {
311 36         230 $fh->seek($end - 16 * $offset, 0);
312 36         303 $fh->read($buffer, 16 * $offset);
313 36 100       1783 last if $buffer =~ m/startxref($cr|\s*)\d+($cr|\s*)\%\%eof.*?/i;
314             }
315 18 50       1125 unless ($buffer =~ m/startxref[^\d]+([0-9]+)($cr|\s*)\%\%eof.*?/i) {
316 0 0       0 if ($options{'diags'} == 1) {
317 0         0 warn "Malformed PDF file $filename";
318             }
319             }
320 18         283 my $xpos = $1; # offset given after 'startxref'
321             # should point to either xref table ('xref'), or object with xref stream
322 18         84 $self->{' xref_position'} = $xpos;
323              
324 18         135 my $tdict = $self->readxrtr($xpos, %options);
325 18         131 foreach my $key (keys %$tdict) {
326 133         318 $self->{$key} = $tdict->{$key};
327             }
328              
329 18         228 return $self;
330             } # end of open()
331              
332             =head2 version
333              
334             $new_version = $p->pdf_version($version, %opts) # Set
335              
336             $ver = $p->pdf_version() # Get
337              
338             =over
339              
340             Gets/sets the PDF version (e.g., 1.5). Setting sets both the header and
341             trailer versions. Getting returns the higher of header and trailer versions.
342              
343             For compatibility with earlier releases, if no decimal point is given, assume
344             "1." precedes the number given.
345              
346             A warning message is given if you attempt to I<decrease> the PDF version, as you
347             might have already read in a higher level file, or used a higher level feature.
348             This message is suppressed if the 'silent' option is given with any value.
349              
350             =back
351              
352             =cut
353              
354             sub pdf_version {
355 31     31 0 91 my $self = shift();
356              
357             # current version is the higher of trailer and header versions
358 31         110 my $header_version = $self->header_version();
359 31         298 my $trailer_version = $self->trailer_version();
360 31 100 100     172 my $old_version = (defined $trailer_version &&
361             $trailer_version > $header_version)?
362             $trailer_version: $header_version;
363              
364 31 100       109 if (@_) { # Set, possibly with options
365 3         7 my $version = shift();
366 3         8 my %opts = @_;
367             # copy dashed option names to preferred undashed names
368 3 50 33     17 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
369            
370             # 1.x and 2.x versions allowed
371 3 50       25 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
372             # check if well formed 1.x and 2.x
373 3 50       34 if ($version !~ /^[12]\.[0-9]+$/) {
374 0 0       0 croak "Invalid version '$version' ignored" unless defined $opts{'silent'};
375 0         0 return $old_version;
376             }
377              
378 3 50       11 if ($old_version > $version) {
379 0 0       0 croak "Warning: call to header_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
380             }
381            
382             # have already squawked about any problems with $version
383 3         12 $self->header_version($version, 'silent'=>1);
384             #if ($version >= 1.4) { # min 1.4 level
385 3         9 $self->trailer_version($version, 'silent'=>1);
386             #}
387             #else {
388             # delete $self->{'Root'}->{'Version'};
389             # $self->out_obj($self->{'Root'});
390             #}
391 3         10 return $version;
392             }
393              
394             # Get
395 28         105 return $old_version;
396             }
397              
398             =head2 header_version
399              
400             $new_version = $p->header_version($version, %opts) # Set
401              
402             $version = $p->header_version() # Get
403              
404             =over
405              
406             Gets/sets the PDF version stored in the file header.
407              
408             For compatibility with earlier releases, if no decimal point is given, assume
409             "1." precedes the number given.
410              
411             A warning message is given if you attempt to I<decrease> the PDF version, as you
412             might have already read in a higher level file, or used a higher level feature.
413             This message is suppressed if the 'silent' option is given with any value.
414              
415             =back
416              
417             =cut
418              
419             sub header_version {
420 39     39 1 116 my $self = shift();
421              
422             # current (header) version
423 39         101 my $old_version = $self->{' version'};
424              
425 39 100       138 if (@_) { # Set, permits versions 1.x and 2.x
426 6         11 my $version = shift();
427 6         16 my %opts = @_;
428             # copy dashed option names to preferred undashed names
429 6 50 33     17 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
430            
431             # 1.x and 2.x versions allowed
432 6 50       33 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
433             # check if well formed 1.x and 2.x
434 6 50       26 if ($version !~ /^[12]\.[0-9]+$/) {
435 0 0       0 croak "Invalid header_version '$version' ignored" unless defined $opts{'silent'};
436 0         0 return $old_version;
437             }
438              
439 6 50       41 if ($old_version > $version) {
440 0 0       0 croak "Warning: call to header_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
441             }
442            
443 6         13 $self->{' version'} = $version;
444 6         17 return $version;
445             }
446              
447             # Get
448 33         100 return $old_version;
449             }
450              
451             =head2 trailer_version
452              
453             $new_version = $p->trailer_version($version, %opts) # Set
454              
455             $version = $p->trailer_version() # Get
456              
457             =over
458              
459             Gets/sets the PDF version stored in the document catalog.
460              
461             Note that the minimum PDF level for a trailer version is 1.4. It is not
462             permitted to set a PDF level of 1.3 or lower. An existing PDF (read in) of
463             1.3 or below returns undefined.
464              
465             For compatibility with earlier releases, if no decimal point is given, assume
466             "1." precedes the number given.
467              
468             A warning message is given if you attempt to I<decrease> the PDF version, as you
469             might have already read in a higher level file, or used a higher level feature.
470             This message is suppressed if the 'silent' option is given with any value.
471              
472             =back
473              
474             =cut
475              
476             sub trailer_version {
477 37     37 1 84 my $self = shift();
478              
479 37         212 my $old_version = undef;
480 37 100       172 if ($self->{'Root'}->{'Version'}) {
481 12         49 $self->{'Root'}->{'Version'}->realise();
482 12         32 $old_version = $self->{'Root'}->{'Version'}->val();
483             }
484              
485 37 100       99 if (@_) { # Set, allows versions 1.x and 2.x
486 5         11 my $version = shift();
487 5         16 my %opts = @_;
488             # copy dashed option names to preferred undashed names
489 5 50 33     14 if (defined $opts{'-silent'} && !defined $opts{'silent'}) { $opts{'silent'} = delete($opts{'-silent'}); }
  0         0  
490            
491             # 1.x and 2.x versions allowed
492 5 50       20 if ($version =~ m/^\d+$/) { $version = "1.$version"; } # no x.? assume it's 1.something
  0         0  
493             # check if well formed 1.x and 2.x
494 5 50       20 if ($version !~ /^[12]\.[0-9]+$/) {
495 0 0       0 croak "Invalid trailer_version '$version' ignored" unless defined $opts{'silent'};
496 0         0 return $old_version;
497             }
498              
499 5 50 66     29 if (defined $old_version && $old_version > $version) {
500 0 0       0 croak "Warning: call to trailer_version() to LOWER the output PDF version number!" unless defined $opts{'silent'};
501             }
502            
503 5         21 $self->{'Root'}->{'Version'} = PDFName($version);
504 5         23 $self->out_obj($self->{'Root'});
505 5         12 return $version;
506             }
507              
508             # Get
509 32         76 return $old_version;
510             }
511              
512             =head2 require_version
513              
514             $prev_version = $p->require_version($version)
515              
516             =over
517              
518             Ensures that the PDF version is at least C<$version>.
519             Silently sets the version to the higher level.
520              
521             =back
522              
523             =cut
524              
525             sub require_version {
526 3     3 1 1416 my ($self, $min_version) = @_;
527 3         11 my $current_version = $self->pdf_version();
528 3 100       15 $self->pdf_version($min_version) if $current_version < $min_version;
529 3         9 return $current_version;
530             }
531              
532             =head2 release
533              
534             $p->release()
535              
536             =over
537              
538             Releases ALL of the memory used by the PDF document and all of its
539             component objects. After calling this method, do B<NOT> expect to
540             have anything left in the C<PDF::Builder::Basic::PDF::File> object
541             (so if you need to save, be sure to do it before calling this method).
542              
543             B<NOTE>, that it is important that you call this method on any
544             C<PDF::Builder::Basic::PDF::File> object when you wish to destroy it and
545             free up its memory. Internally, PDF files have an enormous number of
546             cross-references, and this causes circular references within the
547             internal data structures. Calling C<release()> causes a brute-force
548             cleanup of the data structures, freeing up all of the memory. Once
549             you've called this method, though, don't expect to be able to do
550             anything else with the C<PDF::Builder::Basic::PDF::File> object; it'll
551             have B<no> internal state whatsoever.
552              
553             =back
554              
555             =cut
556              
557             # Maintainer's Question: Couldn't this be handled by a DESTROY method
558             # instead of requiring an explicit call to release()?
559             sub release {
560 178     178 1 564 my $self = shift();
561              
562 178 50       642 return $self unless ref($self);
563 178         1110 my @tofree = values %$self;
564              
565 178         1163 foreach my $key (keys %$self) {
566 2891         4495 $self->{$key} = undef;
567 2891         4997 delete $self->{$key};
568             }
569              
570             # PDFs with highly-interconnected page trees or outlines can hit Perl's
571             # recursion limit pretty easily, so disable the warning for this specific
572             # loop.
573 40     40   87455 no warnings 'recursion'; ## no critic
  40         136  
  40         457680  
574              
575 178         1025 while (my $item = shift @tofree) {
576 8745 100 100     31024 if (blessed($item) and $item->can('release')) {
    100          
    100          
577 2527         6023 $item->release(1);
578             } elsif (ref($item) eq 'ARRAY') {
579 1848         6245 push @tofree, @$item;
580             } elsif (ref($item) eq 'HASH') {
581 1057         3033 push @tofree, values %$item;
582 1057         3208 foreach my $key (keys %$item) {
583 4556         7839 $item->{$key} = undef;
584 4556         10927 delete $item->{$key};
585             }
586             } else {
587 3313         10765 $item = undef;
588             }
589             }
590              
591 178         1056 return;
592             } # end of release()
593              
594             =head2 append_file
595              
596             $p->append_file()
597              
598             =over
599              
600             Appends the objects for output to the read file and then appends the
601             appropriate table.
602              
603             =back
604              
605             =cut
606              
607             sub append_file {
608 8     8 1 21 my $self = shift();
609 8 50       43 return unless $self->{' update'};
610              
611 8         26 my $fh = $self->{' INFILE'};
612              
613             # hack to upgrade pdf-version number to support requested features in
614             # higher versions than the pdf was originally created. WARNING: new version
615             # must be exactly SAME length as the old (e.g., 1.6 replacing 1.4), or
616             # problems are likely with overwriting header. perhaps some day we will
617             # need to check the old version being ovewritten, and adjust something to
618             # avoid corrupting the file.
619 8   50     39 my $version = $self->{' version'} || 1.4;
620 8         78 $fh->seek(0, 0);
621             # assume that any existing EOL after version will be reused
622 8         147 $fh->print("%PDF-$version");
623              
624 8         160 my $tdict = PDFDict();
625 8         41 $tdict->{'Prev'} = PDFNum($self->{' loc'});
626 8         30 $tdict->{'Info'} = $self->{'Info'};
627 8 50       34 if (defined $self->{' newroot'}) {
628 0         0 $tdict->{'Root'} = $self->{' newroot'};
629             } else {
630 8         28 $tdict->{'Root'} = $self->{'Root'};
631             }
632 8         28 $tdict->{'Size'} = $self->{'Size'};
633              
634 8         68 foreach my $key (grep { $_ !~ m/^\s/ } keys %$self) {
  151         426  
635 25 50       85 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
636             }
637              
638 8         64 $fh->seek($self->{' epos'}, 0);
639 8         95 $self->out_trailer($tdict, $self->{' update'});
640 8         41 close $self->{' OUTFILE'};
641              
642 8         182 return;
643             } # end of append_file()
644              
645             =head2 out_file
646              
647             $p->out_file($fname)
648              
649             =over
650              
651             Writes a PDF file to a file of the given filename, based on the current list of
652             objects to be output. It creates the trailer dictionary based on information
653             in C<$self>.
654              
655             $fname may be a string or an IO object.
656              
657             =back
658              
659             =cut
660              
661             sub out_file {
662 170     170 1 548 my ($self, $fname) = @_;
663              
664 170         827 $self = $self->create_file($fname);
665 170         775 $self = $self->close_file();
666              
667 170         679 return $self;
668             }
669              
670             =head2 create_file
671              
672             $p->create_file($fname)
673              
674             =over
675              
676             Creates a new output file (no check is made of an existing open file) of
677             the given filename or IO object. Note: make sure that C<< $p->{' version'} >>
678             is set correctly before calling this function.
679              
680             =back
681              
682             =cut
683              
684             sub create_file {
685 170     170 1 462 my ($self, $filename) = @_;
686 170         357 my $fh;
687              
688 170         777 $self->{' fname'} = $filename;
689 170 50       1316 if (ref $filename) {
690 170         490 $fh = $filename;
691             } else {
692 0   0     0 $fh = IO::File->new(">$filename") || die "Unable to open $filename for writing";
693 0         0 binmode($fh,':raw');
694             }
695              
696 170         727 $self->{' OUTFILE'} = $fh;
697 170   50     3604 $fh->print('%PDF-' . ($self->{' version'} // '1.4') . "\n");
698 170         2047 $fh->print("%\xC6\xCD\xCD\xB5\n"); # and some binary stuff in a comment.
699              
700             # PDF spec requires 4 or more "binary" bytes (128 or higher value) in a
701             # comment immediately following the PDF-x.y header, to alert reader that
702             # there is binary data. Actual values are apparently arbitrary. This DOES
703             # mean that other comments can NOT be inserted between the header and the
704             # binary comment! PDF::Builder always outputs this comment, so is always
705             # claiming binary data (no harm done?).
706              
707 170         1205 return $self;
708             }
709              
710             =head2 close_file
711              
712             $p->close_file()
713              
714             =over
715              
716             Closes up the open file for output, by outputting the trailer, etc.
717              
718             =back
719              
720             =cut
721              
722             sub close_file {
723 170     170 1 480 my $self = shift();
724              
725 170         680 my $tdict = PDFDict();
726 170 50       1077 $tdict->{'Info'} = $self->{'Info'} if defined $self->{'Info'};
727 170 50 33     951 $tdict->{'Root'} = (defined $self->{' newroot'} and $self->{' newroot'} ne '') ? $self->{' newroot'} : $self->{'Root'};
728              
729             # remove all freed objects from the outlist, AND the outlist_cache if not updating
730             # NO! Don't do that thing! In fact, let out_trailer do the opposite!
731              
732 170   33     946 $tdict->{'Size'} = $self->{'Size'} || PDFNum(1);
733 170 50       635 $tdict->{'Prev'} = PDFNum($self->{' loc'}) if $self->{' loc'};
734 170 50       640 if ($self->{' update'}) {
735 0         0 foreach my $key (grep { $_ !~ m/^[\s\-]/ } keys %$self) {
  0         0  
736 0 0       0 $tdict->{$key} = $self->{$key} unless defined $tdict->{$key};
737             }
738              
739 0         0 my $fh = $self->{' INFILE'};
740 0         0 $fh->seek($self->{' epos'}, 0);
741             }
742              
743 170         1490 $self->out_trailer($tdict, $self->{' update'});
744 170         1083 close($self->{' OUTFILE'});
745 170 50 33     1161 if ($^O eq 'MacOS' and not ref($self->{' fname'})) {
746 0         0 MacPerl::SetFileInfo('CARO', 'TEXT', $self->{' fname'});
747             }
748              
749 170         1203 return $self;
750             } # end of close_file()
751              
752             =head2 readval
753              
754             ($value, $str) = $p->readval($str, %opts)
755              
756             =over
757              
758             Reads a PDF value from the current position in the file. If C<$str> is too
759             short, read some more from the current location in the file until the whole
760             object is read. This is a recursive call which may slurp in a whole big stream
761             (unprocessed).
762              
763             Returns the recursive data structure read and also the current C<$str> that has
764             been read from the file.
765              
766             =back
767              
768             =cut
769              
770             sub readval {
771 1479     1479 1 276833 my ($self, $str, %opts) = @_;
772 1479         3012 my $fh = $self->{' INFILE'};
773 1479         2406 my ($result, $value);
774              
775 1479 100       3641 my $update = defined($opts{'update'}) ? $opts{'update'} : 1;
776 1479 100       4306 $str = update($fh, $str) if $update;
777              
778 1479         6567 $str =~ s/^$ws_char+//; # Ignore initial white space
779 1479         5037 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
780              
781 1479 100       27320 if ($str =~ m/^<</s) {
    100          
    100          
    100          
    100          
    50          
    100          
    50          
    50          
    0          
782             # Dictionary
783 162         476 $str = substr ($str, 2);
784 162 100       541 $str = update($fh, $str) if $update;
785 162         667 $result = PDFDict();
786              
787 162         505 while ($str !~ m/^>>/) {
788 457         2226 $str =~ s/^$ws_char+//; # Ignore initial white space
789 457         2032 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
790              
791 457 50       3105 if ($str =~ s|^/($reg_char+)||) {
    0          
    0          
792 457         1598 my $key = PDF::Builder::Basic::PDF::Name::name_to_string($1, $self);
793 457         2136 ($value, $str) = $self->readval($str, %opts);
794             # per Vadim Repin (RT 131147) CHG 1. His conclusion is that
795             # it is highly unlikely, but remotely possible, that there
796             # could be legitimate use of Null objects that should NOT be
797             # prevented from bubbling up. If such a case is discovered, we
798             # might have to try Klaus Ethgen's more limited (in scope)
799             # patch in ./Pages.pm. See full discussion in RT 131147 for
800             # details on what's going on and how this fixes it.
801             #$result->{$key} = $value; # original code
802 457 50       2244 $result->{$key} = $value
803             unless ref($value) eq 'PDF::Builder::Basic::PDF::Null';
804             } elsif ($str =~ s|^/$ws_char+||) {
805             # fixes a broken key problem of acrobat. -- fredo
806 0         0 ($value, $str) = $self->readval($str, %opts);
807 0         0 $result->{'null'} = $value;
808             } elsif ($str =~ s|^//|/|) {
809             # fixes again a broken key problem of illustrator/enfocus. -- fredo
810 0         0 ($value, $str) = $self->readval($str, %opts);
811 0         0 $result->{'null'} = $value;
812             } else {
813 0         0 die "Invalid dictionary key";
814             }
815 457 100       1421 $str = update($fh, $str) if $update; # thanks gareth.jones@stud.man.ac.uk
816             }
817              
818 162         490 $str =~ s/^>>//;
819 162 100       493 $str = update($fh, $str) if $update;
820             # streams can't be followed by a lone carriage-return.
821             # fredo: yes they can !!! -- use the MacOS, Luke.
822             # TBD isn't this covered by $cr as space CR?
823 162 100 66     590 if (($str =~ s/^stream(?:(?:\015\012)|\012|\015)//) and ($result->{'Length'}->val() != 0)) { # stream
824 11         36 my $length = $result->{'Length'}->val() + 0; # ensure seen as decimal
825 11         36 $result->{' streamsrc'} = $fh;
826 11         56 $result->{' streamloc'} = $fh->tell() - length($str);
827              
828 11 50       117 unless ($opts{'nostreams'}) {
829 11 50       37 if ($length > length($str)) {
830 0         0 $value = $str;
831 0         0 $length -= length($str);
832 0         0 read $fh, $str, $length + 11; # slurp the whole stream!
833             } else {
834 11         40 $value = '';
835             }
836 11         44 $value .= substr($str, 0, $length);
837 11         43 $result->{' stream'} = $value;
838 11         59 $result->{' nofilt'} = 1;
839 11 50       46 $str = update($fh, $str, 1) if $update; # tell update we are in-stream and only need an endstream
840 11         56 $str = substr($str, index($str, 'endstream') + 9);
841             }
842             }
843              
844 162 100 100     800 if (defined $result->{'Type'} and defined $types{$result->{'Type'}->val()}) {
845 38         178 bless $result, $types{$result->{'Type'}->val()};
846             }
847             # gdj: FIXME: if any of the ws chars were crs, then the whole
848             # string might not have been read.
849              
850             } elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R/s) {
851             # Indirect Object
852 166         585 my $num = $1 + 0; # ensure seen as decimal value
853 166         366 $value = $2 + 0;
854 166         2432 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+R//s;
855 166 100       531 unless ($result = $self->test_obj($num, $value)) {
856 131         725 $result = PDF::Builder::Basic::PDF::Objind->new();
857 131         505 $result->{' objnum'} = $num;
858 131         287 $result->{' objgen'} = $value;
859 131         420 $self->add_obj($result, $num, $value);
860             }
861 166         389 $result->{' parent'} = $self;
862 166         449 weaken $result->{' parent'};
863             #$result->{' realised'} = 0;
864             # removed to address changes being lost when an indirect object
865             # is realised twice
866             # gdj: FIXME: if any of the ws chars were crs, then the whole
867             # string might not have been read.
868              
869             } elsif ($str =~ m/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj/s) {
870             # Object
871 104         214 my $obj;
872 104         391 my $num = $1 + 0; # ensure seen as decimal value
873 104         225 $value = $2 + 0;
874 104         2002 $str =~ s/^([0-9]+)(?:$ws_char|$re_comment)+([0-9]+)(?:$ws_char|$re_comment)+obj//s;
875 104         556 ($obj, $str) = $self->readval($str, %opts);
876 104 100       334 if ($result = $self->test_obj($num, $value)) {
877 90         390 $result->merge($obj);
878             } else {
879 14         52 $result = $obj;
880 14         39 $self->add_obj($result, $num, $value);
881 14         28 $result->{' realised'} = 1;
882             }
883 104 100       331 $str = update($fh, $str) if $update; # thanks to kundrat@kundrat.sk
884 104         643 $str =~ s/^endobj//;
885              
886             } elsif ($str =~ m|^/($reg_char*)|s) {
887             # Name
888 577         1820 $value = $1;
889 577         3458 $str =~ s|^/($reg_char*)||s;
890 577         2900 $result = PDF::Builder::Basic::PDF::Name->from_pdf($value, $self);
891              
892             } elsif ($str =~ m/^\(/) {
893             # Literal String
894             # We now need to find an unbalanced, unescaped right-paren.
895             # This can't be done with a regex.
896 2         6 my $value = '(';
897 2         6 $str = substr($str, 1);
898              
899 2         4 my $nested_level = 1;
900 2         6 while (1) {
901             # Ignore everything up to the first escaped or parenthesis character
902 2 50       12 if ($str =~ /^([^\\()]+)(.*)/s) {
903 2         7 $value .= $1;
904 2         7 $str = $2;
905             }
906              
907             # Ignore escaped parentheses
908 2 50       15 if ($str =~ /^(\\[()])/) {
    50          
    50          
    0          
909 0         0 $value .= $1;
910 0         0 $str = substr($str, 2);
911              
912             } elsif ($str =~ /^\(/) {
913             # Left parenthesis: increase nesting
914 0         0 $value .= '(';
915 0         0 $str = substr($str, 1);
916 0         0 $nested_level++;
917              
918             } elsif ($str =~ /^\)/) {
919             # Right parenthesis: decrease nesting
920 2         4 $value .= ')';
921 2         6 $str = substr($str, 1);
922 2         5 $nested_level--;
923 2 50       7 last unless $nested_level;
924              
925             } elsif ($str =~ /^(\\[^()])/) {
926             # Other escaped character
927 0         0 $value .= $1;
928 0         0 $str = substr($str, 2);
929              
930             } else {
931             # If there wasn't an escaped or parenthesis character,
932             # read some more.
933              
934             # We don't use update because we don't want to remove
935             # whitespace or comments.
936 0 0       0 $fh->read($str, 255, length($str)) or die 'Unterminated string.';
937             }
938             } # end while(TRUE) loop
939              
940 2         9 $result = PDF::Builder::Basic::PDF::String->from_pdf($value);
941             # end Literal String check
942              
943             } elsif ($str =~ m/^</) {
944             # Hex String
945 0         0 $str =~ s/^<//;
946 0         0 $fh->read($str, 255, length($str)) while (0 > index($str, '>'));
947 0         0 ($value, $str) = ($str =~ /^(.*?)>(.*)/s);
948 0         0 $result = PDF::Builder::Basic::PDF::String->from_pdf('<' . $value . '>');
949              
950             } elsif ($str =~ m/^\[/) {
951             # Array
952 98         421 $str =~ s/^\[//;
953 98 50       354 $str = update($fh, $str) if $update;
954 98         410 $result = PDFArray();
955 98         301 while ($str !~ m/^\]/) {
956 782         6927 $str =~ s/^$ws_char+//; # Ignore initial white space
957 782         2969 $str =~ s/^\%[^\015\012]*$ws_char+//; # Ignore comments
958              
959 782         2920 ($value, $str) = $self->readval($str, %opts);
960 782         3443 $result->add_elements($value);
961 782 50       2236 $str = update($fh, $str) if $update; # str might just be exhausted!
962             }
963 98         341 $str =~ s/^\]//;
964              
965             } elsif ($str =~ m/^(true|false)($irreg_char|$)/) {
966             # Boolean
967 0         0 $value = $1;
968 0         0 $str =~ s/^(?:true|false)//;
969 0         0 $result = PDF::Builder::Basic::PDF::Bool->from_pdf($value);
970              
971             } elsif ($str =~ m/^([+-.0-9]+)($irreg_char|$)/) {
972             # Number
973 370         946 $value = $1;
974 370         1221 $str =~ s/^([+-.0-9]+)//;
975              
976             # If $str only consists of whitespace (or is empty), call update to
977             # see if this is the beginning of an indirect object or reference
978 370 100 100     5920 if ($update and ($str =~ /^$re_whitespace*$/s or $str =~ /^$re_whitespace+[0-9]+$re_whitespace*$/s)) {
      100        
979 6         56 $str =~ s/^$re_whitespace+/ /s;
980 6         47 $str =~ s/$re_whitespace+$/ /s;
981 6         10 $str = update($fh, $str);
982 6 100       101 if ($str =~ m/^$re_whitespace*([0-9]+)$re_whitespace+(?:R|obj)/s) {
983 4         12 return $self->readval("$value $str", %opts);
984             }
985             }
986              
987 366         1493 $result = PDF::Builder::Basic::PDF::Number->from_pdf($value);
988              
989             } elsif ($str =~ m/^null($irreg_char|$)/) {
990             # Null
991 0         0 $str =~ s/^null//;
992 0         0 $result = PDF::Builder::Basic::PDF::Null->new();
993              
994             } else {
995 0         0 die "Can't parse `$str' near " . ($fh->tell()) . " length " . length($str) . ".";
996             }
997              
998 1475         8798 $str =~ s/^$ws_char+//s;
999 1475         5374 return ($result, $str);
1000             } # end of readval()
1001              
1002             =head2 read_obj
1003              
1004             $ref = $p->read_obj($objind, %opts)
1005              
1006             =over
1007              
1008             Given an indirect object reference, locate it and read the object returning
1009             the read-in object.
1010              
1011             =back
1012              
1013             =cut
1014              
1015             sub read_obj {
1016 86     86 1 225 my ($self, $objind, %opts) = @_;
1017              
1018 86   50     303 my $res = $self->read_objnum($objind->{' objnum'}, $objind->{' objgen'}, %opts) || return;
1019 86 50       347 $objind->merge($res) unless $objind eq $res;
1020              
1021 86         387 return $objind;
1022             }
1023              
1024             =head2 read_objnum
1025              
1026             $ref = $p->read_objnum($num, $gen, %opts)
1027              
1028             =over
1029              
1030             Returns a fully read object of given number and generation in this file
1031              
1032             =back
1033              
1034             =cut
1035              
1036             sub read_objnum {
1037 94     94 1 3305 my ($self, $num, $gen, %opts) = @_;
1038              
1039 94 50       256 croak 'Undefined object number in call to read_objnum($num, $gen)' unless defined $num;
1040 94 50       312 croak 'Undefined object generation in call to read_objnum($num, $gen)' unless defined $gen;
1041 94 50       574 croak "Invalid object number '$num' in call to read_objnum" unless $num =~ /^[0-9]+$/;
1042 94 50       347 croak "Invalid object generation '$gen' in call to read_objnum" unless $gen =~ /^[0-9]+$/;
1043              
1044 94   50     344 my $object_location = $self->locate_obj($num, $gen) || return;
1045 94         210 my $object;
1046              
1047             # Compressed object
1048 94 100       225 if (ref($object_location)) {
1049 4         8 my ($object_stream_num, $object_stream_pos) = @{$object_location};
  4         11  
1050              
1051 4         42 my $object_stream = $self->read_objnum($object_stream_num, 0, %opts);
1052 4 50       17 die 'Cannot find the compressed object stream' unless $object_stream;
1053              
1054 4 50       29 $object_stream->read_stream() if $object_stream->{' nofilt'};
1055              
1056             # An object stream starts with pairs of integers containing object numbers and
1057             # stream offsets relative to the First key
1058 4         10 my $fh;
1059             my $pairs;
1060 4 50       15 unless ($object_stream->{' streamfile'}) {
1061 4         17 $pairs = substr($object_stream->{' stream'}, 0, $object_stream->{'First'}->val());
1062             } else {
1063 0         0 CORE::open($fh, '<', $object_stream->{' streamfile'});
1064 0         0 read($fh, $pairs, $object_stream->{'First'}->val());
1065             }
1066 4         20 my @map = split /\s+/, $pairs;
1067              
1068             # Find the offset of the object in the stream
1069 4         10 my $index = $object_stream_pos * 2;
1070 4 50       16 die "Objind $num does not exist at index $index" unless $map[$index] == $num;
1071 4         12 my $start = $map[$index + 1];
1072              
1073             # Unless this is the last object in the stream, its length is
1074             # determined by the offset of the next object.
1075 4         11 my $last_object_in_stream = $map[-2];
1076 4         10 my $length;
1077 4 100       12 if ($last_object_in_stream == $num) {
1078 2 50       9 if ($object_stream->{' stream'}) {
1079 2         9 $length = length($object_stream->{' stream'}) - $object_stream->{'First'}->val() - $start;
1080             } else {
1081 0         0 $length = (-s $object_stream->{' streamfile'}) - $object_stream->{'First'}->val() - $start;
1082             }
1083             } else {
1084 2         10 my $next_start = $map[$index + 3];
1085 2         6 $length = $next_start - $start;
1086             }
1087              
1088             # Read the object from the stream
1089 4         14 my $stream = "$num 0 obj ";
1090 4 50       11 unless ($object_stream->{' streamfile'}) {
1091 4         31 $stream .= substr($object_stream->{' stream'}, $object_stream->{'First'}->val() + $start, $length);
1092             } else {
1093 0         0 seek($fh, $object_stream->{'First'}->val() + $start, 0);
1094 0         0 read($fh, $stream, $length, length($stream));
1095 0         0 close $fh;
1096             }
1097              
1098 4         30 ($object) = $self->readval($stream, %opts, update => 0);
1099 4         30 return $object;
1100             }
1101              
1102 90         505 my $current_location = $self->{' INFILE'}->tell();
1103 90         993 $self->{' INFILE'}->seek($object_location, 0);
1104 90         1051 ($object) = $self->readval('', %opts);
1105 90         559 $self->{' INFILE'}->seek($current_location, 0);
1106              
1107 90         784 return $object;
1108             } # end of read_objnum()
1109              
1110             =head2 new_obj
1111              
1112             $objind = $p->new_obj($obj)
1113              
1114             =over
1115              
1116             Creates a new, free object reference based on free space in the cross reference
1117             chain. If nothing is free, then think up a new number. If C<$obj>, then turns
1118             that object into this new object rather than returning a new object.
1119              
1120             =back
1121              
1122             =cut
1123              
1124             sub new_obj {
1125 1399     1399 1 3137 my ($self, $base) = @_;
1126 1399         2381 my $res;
1127              
1128 1399 50 66     4654 if (defined $self->{' free'} and scalar @{$self->{' free'}} > 0) {
  14         59  
1129 0         0 $res = shift(@{$self->{' free'}});
  0         0  
1130 0 0       0 if (defined $base) {
1131 0         0 my ($num, $gen) = @{$self->{' objects'}{$res->uid()}};
  0         0  
1132 0         0 $self->remove_obj($res);
1133 0         0 $self->add_obj($base, $num, $gen);
1134 0         0 return $self->out_obj($base);
1135             } else {
1136 0         0 $self->{' objects'}{$res->uid()}[2] = 0;
1137 0         0 return $res;
1138             }
1139             }
1140              
1141 1399         2502 my $tdict = $self;
1142 1399         2170 my $i;
1143 1399         3401 while (defined $tdict) {
1144 1400 50       5430 $i = $tdict->{' xref'}{defined($i) ? $i : ''}[0];
1145 1400   33     4322 while (defined $i and $i != 0) {
1146 0         0 my ($ni, $ng) = @{$tdict->{' xref'}{$i}};
  0         0  
1147 0 0       0 unless (defined $self->locate_obj($i, $ng)) {
1148 0 0       0 if (defined $base) {
1149 0         0 $self->add_obj($base, $i, $ng);
1150 0         0 return $base;
1151             } else {
1152 0   0     0 $res = $self->test_obj($i, $ng) || $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, $ng);
1153 0         0 $self->out_obj($res);
1154 0         0 return $res;
1155             }
1156             }
1157 0         0 $i = $ni;
1158             }
1159 1400         3460 $tdict = $tdict->{' prev'};
1160             }
1161              
1162 1399         3666 $i = $self->{' maxobj'}++;
1163 1399 50       3023 if (defined $base) {
1164 1399         5390 $self->add_obj($base, $i, 0);
1165 1399         4206 $self->out_obj($base);
1166 1399         3580 return $base;
1167             } else {
1168 0         0 $res = $self->add_obj(PDF::Builder::Basic::PDF::Objind->new(), $i, 0);
1169 0         0 $self->out_obj($res);
1170 0         0 return $res;
1171             }
1172             }
1173              
1174             =head2 out_obj
1175              
1176             $p->out_obj($obj)
1177              
1178             =over
1179              
1180             Indicates that the given object reference should appear in the output xref
1181             table whether with data or freed.
1182              
1183             =back
1184              
1185             =cut
1186              
1187             sub out_obj {
1188 3053     3053 1 6387 my ($self, $obj) = @_;
1189              
1190             # This is why we've been keeping the outlist CACHE around; to speed
1191             # up this method by orders of magnitude (it saves up from having to
1192             # grep the full outlist each time through as we'll just do a lookup
1193             # in the hash) (which is super-fast).
1194 3053 100       10702 unless (exists $self->{' outlist_cache'}{$obj}) {
1195 1416         2295 push @{$self->{' outlist'}}, $obj;
  1416         3706  
1196             # weaken $self->{' outlist'}->[-1];
1197 1416         4785 $self->{' outlist_cache'}{$obj} = 1;
1198             }
1199              
1200 3053         6269 return $obj;
1201             }
1202              
1203             =head2 free_obj
1204              
1205             $p->free_obj($obj)
1206              
1207             =over
1208              
1209             Marks an object reference for output as being freed.
1210              
1211             =back
1212              
1213             =cut
1214              
1215             sub free_obj {
1216 0     0 1 0 my ($self, $obj) = @_;
1217              
1218 0         0 push @{$self->{' free'}}, $obj;
  0         0  
1219 0         0 $self->{' objects'}{$obj->uid()}[2] = 1;
1220 0         0 $self->out_obj($obj);
1221              
1222 0         0 return;
1223             }
1224              
1225             =head2 remove_obj
1226              
1227             $p->remove_obj($objind)
1228              
1229             =over
1230              
1231             Removes the object from all places where we might remember it.
1232              
1233             =back
1234              
1235             =cut
1236              
1237             sub remove_obj {
1238 0     0 1 0 my ($self, $objind) = @_;
1239              
1240             # who says it has to be fast
1241 0         0 delete $self->{' objects'}{$objind->uid()};
1242 0         0 delete $self->{' outlist_cache'}{$objind};
1243 0         0 delete $self->{' printed_cache'}{$objind};
1244 0         0 @{$self->{' outlist'}} = grep { $_ ne $objind } @{$self->{' outlist'}};
  0         0  
  0         0  
  0         0  
1245 0         0 @{$self->{' printed'}} = grep { $_ ne $objind } @{$self->{' printed'}};
  0         0  
  0         0  
  0         0  
1246             $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} = undef
1247 0 0       0 if $self->{' objcache'}{$objind->{' objnum'}, $objind->{' objgen'}} eq $objind;
1248              
1249 0         0 return $self;
1250             }
1251              
1252             =head2 ship_out
1253              
1254             $p->ship_out(@objects)
1255              
1256             $p->ship_out()
1257              
1258             =over
1259              
1260             Ships the given objects (or all objects for output if C<@objects> is empty) to
1261             the currently open output file (assuming there is one). Freed objects are not
1262             shipped, and once an object is shipped it is switched such that this file
1263             becomes its source and it will not be shipped again unless out_obj is called
1264             again. Notice that a shipped out object can be re-output or even freed, but
1265             that it will not cause the data already output to be changed.
1266              
1267             =back
1268              
1269             =cut
1270              
1271             sub ship_out {
1272 183     183 1 637 my ($self, @objects) = @_;
1273              
1274 183 50       739 return unless defined $self->{' OUTFILE'};
1275 183         504 my $fh = $self->{' OUTFILE'};
1276 183         672 seek($fh, 0, 2); # go to the end of the file
1277              
1278 183 50       803 @objects = @{$self->{' outlist'}} unless scalar @objects > 0;
  183         735  
1279 183         543 foreach my $objind (@objects) {
1280 1066 50       4017 next unless $objind->is_obj($self);
1281 1066         2074 my $j = -1;
1282 1066         2083 for (my $i = 0; $i < scalar @{$self->{' outlist'}}; $i++) {
  1066         3118  
1283 1066 50       3730 if ($self->{' outlist'}[$i] eq $objind) {
1284 1066         1786 $j = $i;
1285 1066         2548 last;
1286             }
1287             }
1288 1066 50       2547 next if $j < 0;
1289 1066         1800 splice(@{$self->{' outlist'}}, $j, 1);
  1066         2894  
1290 1066         3368 delete $self->{' outlist_cache'}{$objind};
1291 1066 50       1831 next if grep { $_ eq $objind } @{$self->{' free'}};
  0         0  
  1066         3205  
1292              
1293 1066 50       2590 map { $fh->print("\% $_ \n") } split(/$cr/, $objind->{' comments'}) if $objind->{' comments'};
  0         0  
1294 1066         4114 $self->{' locs'}{$objind->uid()} = $fh->tell();
1295 1066         2200 my ($objnum, $objgen) = @{$self->{' objects'}{$objind->uid()}}[0..1];
  1066         2734  
1296 1066         3981 $fh->printf("%d %d obj\n", $objnum, $objgen);
1297 1066         13625 $objind->outobjdeep($fh, $self);
1298 1066         3322 $fh->print("\nendobj\n");
1299              
1300             # Note that we've output this obj, not forgetting to update
1301             # the cache of what's printed.
1302 1066 50       8429 unless (exists $self->{' printed_cache'}{$objind}) {
1303 1066         1784 push @{$self->{' printed'}}, $objind;
  1066         3080  
1304 1066         4774 $self->{' printed_cache'}{$objind}++;
1305             }
1306             }
1307              
1308 183         706 return $self;
1309             } # end of ship_out()
1310              
1311             =head2 copy
1312              
1313             $p->copy($outpdf, \&filter)
1314              
1315             =over
1316              
1317             Iterates over every object in the file reading the object, calling C<filter>
1318             with the object, and outputting the result. If C<filter> is not defined,
1319             just copies input to output.
1320              
1321             =back
1322              
1323             =cut
1324              
1325             sub copy {
1326 0     0 1 0 my ($self, $outpdf, $filter) = @_;
1327 0         0 my ($obj, $minl, $mini, $ming);
1328              
1329 0         0 foreach my $key (grep { not m/^[\s\-]/ } keys %$self) {
  0         0  
1330 0 0       0 $outpdf->{$key} = $self->{$key} unless defined $outpdf->{$key};
1331             }
1332              
1333 0         0 my $tdict = $self;
1334 0         0 while (defined $tdict) {
1335 0         0 foreach my $i (sort {$a <=> $b} keys %{$tdict->{' xref'}}) {
  0         0  
  0         0  
1336 0         0 my ($nl, $ng, $nt) = @{$tdict->{' xref'}{$i}};
  0         0  
1337 0 0       0 next unless $nt eq 'n';
1338              
1339 0 0 0     0 if ($nl < $minl or $mini == 0) {
1340 0         0 $mini = $i;
1341 0         0 $ming = $ng;
1342 0         0 $minl = $nl;
1343             }
1344 0 0       0 unless ($obj = $self->test_obj($i, $ng)) {
1345 0         0 $obj = PDF::Builder::Basic::PDF::Objind->new();
1346 0         0 $obj->{' objnum'} = $i;
1347 0         0 $obj->{' objgen'} = $ng;
1348 0         0 $self->add_obj($obj, $i, $ng);
1349 0         0 $obj->{' parent'} = $self;
1350 0         0 weaken $obj->{' parent'};
1351 0         0 $obj->{' realised'} = 0;
1352             }
1353 0         0 $obj->realise();
1354 0 0       0 my $res = defined $filter ? &{$filter}($obj) : $obj;
  0         0  
1355 0 0 0     0 $outpdf->new_obj($res) unless (!$res || $res->is_obj($outpdf));
1356             }
1357 0         0 $tdict = $tdict->{' prev'};
1358             }
1359              
1360             # test for linearized and remove it from output
1361 0         0 $obj = $self->test_obj($mini, $ming);
1362 0 0 0     0 if ($obj->isa('PDF::Builder::Basic::PDF::Dict') && $obj->{'Linearized'}) {
1363 0         0 $outpdf->free_obj($obj);
1364             }
1365              
1366 0         0 return $self;
1367             } # end of copy()
1368              
1369             =head1 PRIVATE METHODS & FUNCTIONS
1370              
1371             The following methods and functions are considered B<private> to this class.
1372             This does not mean you cannot use them if you have a need, just that they
1373             aren't really designed for users of this class.
1374              
1375             =head2 locate_obj
1376              
1377             $offset = $p->locate_obj($num, $gen)
1378              
1379             =over
1380              
1381             Returns a file offset to the object asked for by following the chain of cross
1382             reference tables until it finds the one you want.
1383              
1384             =back
1385              
1386             =cut
1387              
1388             sub locate_obj {
1389 94     94 1 322 my ($self, $num, $gen) = @_;
1390              
1391 94         209 my $tdict = $self;
1392 94         196 my $seen = {};
1393 94         267 while (defined $tdict) {
1394 103         385 $seen->{$tdict->{' loc'}} = 1;
1395 103 100       469 if (ref $tdict->{' xref'}{$num}) {
1396 94         202 my $ref = $tdict->{' xref'}{$num};
1397 94 100       240 return $ref unless scalar(@$ref) == 3;
1398              
1399 90 50       229 if ($ref->[1] == $gen) {
1400 90 50       646 return $ref->[0] if $ref->[2] eq 'n';
1401 0         0 return; # if $ref->[2] eq 'f';
1402             }
1403             }
1404 9         23 $tdict = $tdict->{' prev'};
1405 9 50       77 if ($seen->{$tdict->{' loc'}}) {
1406 0         0 die "Malformed PDF: trailer contains a loop or repeated object ID";
1407             }
1408             }
1409              
1410 0         0 return;
1411             }
1412              
1413             =head2 update
1414              
1415             update($fh, $str, $instream)
1416              
1417             =over
1418              
1419             Keeps reading C<$fh> for more data to ensure that C<$str> has at least a line
1420             full for C<readval> to work on. At this point we also take the opportunity to
1421             ignore comments.
1422              
1423             =back
1424              
1425             =cut
1426              
1427             sub update {
1428 3262     3262 1 6545 my ($fh, $str, $instream) = @_;
1429              
1430 3262 50       6681 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1431 3262 100       6473 if ($instream) {
1432             # we are inside a (possible binary) stream
1433             # so we fetch data till we see an 'endstream'
1434             # -- fredo/2004-09-03
1435 11   33     69 while ($str !~ m/endstream/ and not $fh->eof()) {
1436 0 0       0 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1437 0         0 $fh->read($str, 314, length($str));
1438             }
1439             } else {
1440 3251         17618 $str =~ s/^$ws_char*//;
1441 3251   100     220092 while ($str !~ m/$cr/ and not $fh->eof()) {
1442 128 50       1217 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1443 128         524 $fh->read($str, 314, length($str));
1444 128         6594 $str =~ s/^$ws_char*//so;
1445             }
1446 3251         32182 while ($str =~ m/^\%/) { # restructured by fredo/2003-03-23
1447 1 50       2 print STDERR 'fpos=' . tell($fh) . ' strlen=' . length($str) . "\n" if $readDebug;
1448 1   33     42 $fh->read($str, 314, length($str)) while ($str !~ m/$cr/ and not $fh->eof());
1449 1         32 $str =~ s/^\%[^\015\012]*$ws_char*//so; # fixed for reportlab -- fredo
1450             }
1451             }
1452              
1453 3262         9871 return $str;
1454             } # end of update()
1455              
1456             =head2 test_obj
1457              
1458             $objind = $p->test_obj($num, $gen)
1459              
1460             =over
1461              
1462             Tests the cache to see whether an object reference (which may or may not have
1463             been getobj()ed) has been cached. Returns it if it has.
1464              
1465             =back
1466              
1467             =cut
1468              
1469             sub test_obj {
1470 270     270 1 658 my ($self, $num, $gen) = @_;
1471              
1472 270         1454 return $self->{' objcache'}{$num, $gen};
1473             }
1474              
1475             =head2 add_obj
1476              
1477             $p->add_obj($objind)
1478              
1479             =over
1480              
1481             Adds the given object to the internal object cache.
1482              
1483             =back
1484              
1485             =cut
1486              
1487             sub add_obj {
1488 1544     1544 1 3684 my ($self, $obj, $num, $gen) = @_;
1489              
1490 1544         7348 $self->{' objcache'}{$num, $gen} = $obj;
1491 1544         8152 $self->{' objects'}{$obj->uid()} = [$num, $gen];
1492             # weaken $self->{' objcache'}{$num, $gen};
1493 1544         3502 return $obj;
1494             }
1495              
1496             =head2 readxrtr
1497              
1498             $tdict = $p->readxrtr($xpos, %options)
1499              
1500             =over
1501              
1502             Recursive function which reads each of the cross-reference and trailer tables
1503             in turn until there are no more.
1504              
1505             Returns a dictionary corresponding to the trailer chain. Each trailer also
1506             includes the corresponding cross-reference table.
1507              
1508             The structure of the xref private element in a trailer dictionary is of an
1509             anonymous hash of cross reference elements by object number. Each element
1510             consists of an array of 3 elements corresponding to the three elements read
1511             in [location, generation number, free or used]. See the PDF specification
1512             for details.
1513              
1514             See C<open> for options allowed.
1515              
1516             =back
1517              
1518             =cut
1519              
1520             sub _unpack_xref_stream {
1521 78     78   187 my ($self, $width, $data) = @_;
1522              
1523             # handle some oddball cases
1524 78 50       244 if ($width == 3) {
    50          
    50          
    50          
1525 0         0 $data = "\x00$data";
1526 0         0 $width = 4;
1527             } elsif ($width == 5) {
1528 0         0 $data = "\x00\x00\x00$data";
1529 0         0 $width = 8;
1530             } elsif ($width == 6) {
1531 0         0 $data = "\x00\x00$data";
1532 0         0 $width = 8;
1533             } elsif ($width == 7) {
1534 0         0 $data = "\x00$data";
1535 0         0 $width = 8;
1536             }
1537             # in all cases, "Network" (Big-Endian) byte order assumed
1538 78 100       185 return unpack('C', $data) if $width == 1;
1539 52 50       167 return unpack('n', $data) if $width == 2;
1540 0 0       0 return unpack('N', $data) if $width == 4;
1541 0 0       0 if ($width == 8) {
1542             # Some ways other packages handle this, without Perl-64, according
1543             # to Vadim Repin. Possibly they end up converting the value to
1544             # "double" behind the scenes if on a 32-bit platform?
1545             # PDF::Tiny return hex unpack('H16', $data);
1546             # CAM::PDF my @b = unpack('C*', $data);
1547             # my $i=0; ($i <<= 8) += shift @b while @b; return $i;
1548            
1549 0 0       0 if (substr($data, 0, 4) eq "\x00\x00\x00\x00") {
1550             # can treat as 32 bit unsigned int
1551 0         0 return unpack('N', substr($data, 4, 4));
1552             } else {
1553             # requires 64-bit platform (chip and Perl), else fatal error
1554             # it may blow up and produce a smoking crater if 32-bit Perl!
1555             # also note that Q needs Big-Endian flag (>) specified, else
1556             # it will use the native chip order (Big- or Little- Endian)
1557 0         0 return unpack('Q>', $data);
1558             }
1559             }
1560              
1561 0         0 die "Unsupported field width: $width. 1-8 supported.";
1562             }
1563              
1564             sub readxrtr {
1565 21     21 1 124 my ($self, $xpos, %options) = @_;
1566             # $xpos SHOULD be pointing to "xref" keyword
1567             # UNLESS an xref stream is in use (v 1.5+)
1568             # copy dashed option names to preferred undashed names
1569 21 50 33     137 if (defined $options{'-diags'} && !defined $options{'diags'}) { $options{'diags'} = delete($options{'-diags'}); }
  0         0  
1570              
1571 21         53 my ($tdict, $buf, $xmin, $xnum, $xdiff);
1572              
1573 21         78 my $fh = $self->{' INFILE'};
1574 21         124 $fh->seek($xpos, 0);
1575 21         265 $fh->read($buf, 22); # 22 should overlap into first subsection
1576 21         442 $buf = update($fh, $buf); # fix for broken JAWS xref calculation.
1577              
1578 21         61 my $xlist = {};
1579              
1580             ## it seems that some products calculate wrong prev entries (short)
1581             ## so we seek ahead to find one -- fredo; save for now
1582             #while ($buf !~ m/^xref$cr/i && !eof($fh)) {
1583             # $buf =~ s/^(\s+|\S+|.)//i;
1584             # $buf = update($fh, $buf);
1585             #}
1586              
1587 21 100       501 if ($buf =~ s/^xref$cr//i) { # xref table, remove xrefEOL from buffer
    50          
1588             # Plain XRef tables.
1589             #
1590             # look to match startobj# count# EOL of first (or only) subsection
1591             # supposed to be single ASCII space between numbers, but this is
1592             # more lenient for some writers, allowing 1 or more whitespace
1593 18         47 my $subsection_count = 0;
1594 18         104 my $entry_format_error = 0;
1595 18         44 my $xrefListEmpty = 0;
1596              
1597 18         962 while ($buf =~ m/^$ws_char*([0-9]+)$ws_char+([0-9]+)$ws_char*$cr(.*?)$/s) {
1598 23         69 my $old_buf = $buf;
1599 23         70 $xmin = $1; # starting object number of this subsection
1600 23         51 $xnum = $2; # number of entries in this subsection
1601 23         58 $buf = $3; # remainder of buffer
1602 23         48 $subsection_count++;
1603             # go back and warn if other than single space separating numbers
1604 23 50       474 unless ($old_buf =~ /^[0-9]+ [0-9]+$cr/) { #orig 'warn'
1605 0 0       0 if ($options{'diags'} == 1) {
1606             # See PDF 1.7 section 7.5.4: Cross-Reference Table
1607 0         0 warn "Malformed xref: subsection header needs a single\n" .
1608             "ASCII space between the numbers and no extra spaces.\n";
1609             }
1610             }
1611 23         80 $xdiff = length($buf); # how much remaining in buffer
1612              
1613             # in case xnum == 0 is permitted (or used and tolerated by readers),
1614             # skip over entry reads and go to next subsection
1615 23 50       108 if ($xnum < 1) {
1616 0 0       0 if ($options{'diags'} == 1) {
1617 0         0 warn "Xref subsection has 0 entries. Skipped.\n";
1618             }
1619 0         0 $xrefListEmpty = 1;
1620 0         0 next;
1621             }
1622              
1623             # read chunk of entire subsection list
1624 23         75 my $entry_size = 20;
1625             # test read first entry, see if $cr in expected place, adjust size
1626 23         190 $fh->read($buf, $entry_size * 1 - $xdiff + 15, $xdiff);
1627 23 50       774 if ($buf =~ m/^(.*?)$cr/) {
1628 23         207 $entry_size = length($1) + 2;
1629             }
1630 23 50 33     96 if ($entry_size != 20 && $options{'diags'} == 1) {
1631 0         0 warn "Xref entries supposed to be 20 bytes long, are $entry_size.\n";
1632             }
1633 23         52 $xdiff = length($buf);
1634              
1635             # read remaining entries
1636 23         179 $fh->read($buf, $entry_size * $xnum - $xdiff + 15, $xdiff);
1637             # each entry is two integers and flag. spec says single ASCII space
1638             # between each field and certain length for each (10, 5, 1), so
1639             # this appears to be more lenient than spec
1640             # is object 0 supposed to be in subsection 1, or is any place OK?
1641 23   66     1562 while ($xnum-- > 0 and
1642             $buf =~ m/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr/) {
1643             # check if format doesn't match spec
1644 132 50 33     1233 if ($buf =~ m/^\d{10} \d{5} [nf]$cr/ ||
1645             $entry_format_error) {
1646             # format OK or have already reported format problem
1647             } else {
1648 0 0       0 if ($options{'diags'} == 1) {
1649 0         0 warn "Xref entry readable, but doesn't meet PDF spec.\n";
1650             }
1651 0         0 $entry_format_error++;
1652             }
1653              
1654 132         1638 $buf =~ s/^$ws_char*(\d+)$ws_char+(\d+)$ws_char+([nf])$ws_char*$cr//;
1655             # $1 = object's starting offset in file (n) or
1656             # next object in free list (f) [0 if last]
1657             # $2 = generation number (n) or 65535 for object 0 (f) or
1658             # next generation number (f)
1659             # $3 = flag (n = object in use, f = free)
1660             # buf reduced by entry just processed
1661 132 50       373 if (exists $xlist->{$xmin}) {
1662 0 0       0 if ($options{'diags'} == 1) {
1663 0         0 warn "Duplicate object number $xmin in xref table ignored.\n";
1664             }
1665             } else {
1666 132         606 $xlist->{$xmin} = [$1, $2, $3];
1667 132 50 66     425 if ($xmin == 0 && $subsection_count > 1 && $options{'diags'} == 1) {
      33        
1668 0         0 warn "Xref object 0 entry not in first subsection.\n";
1669             }
1670             }
1671 132         1148 $xmin++;
1672             } # traverse one subsection for objects xmin through xmin+xnum-1
1673             # go back for next subsection (if any)
1674             } # loop through xref subsections
1675             # fall through to here when run out of xref subsections
1676             # xlist should have two or more object entries, may not be contiguous
1677              
1678             # should have an object 0
1679             # at this point, no idea if object 0 was in first subsection (legal?)
1680             # could attempt a fixup if no object 0 found. many fixups are quite
1681             # risky and could end up corrupting the free list.
1682             # there's no guarantee that a proper free list will result, but any
1683             # error should hopefully be caught further on
1684 18 0 33     87 if (!exists $xlist->{'0'} && !$xrefListEmpty) {
1685             # for now, 1 subsection starting with 1, and only object 1 in
1686             # free list, try to fix up
1687 0 0 0     0 if ($subsection_count == 1 && exists $xlist->{'1'}) {
1688             # apparently a common enough error in PDF writers
1689              
1690 0 0 0     0 if ($xlist->{'1'}[0] == 0 && # only member of free list
      0        
1691             $xlist->{'1'}[1] == 65535 &&
1692             $xlist->{'1'}[2] eq 'f') {
1693             # object 1 appears to be the free list head, so shift
1694             # down all objects
1695 0 0       0 if ($options{'diags'} == 1) {
1696 0         0 warn "xref appears to be mislabeled starting with 1. Shift down all elements.\n";
1697             }
1698 0         0 my $next = 1;
1699 0         0 while (exists $xlist->{$next}) {
1700 0         0 $xlist->{$next - 1} = $xlist->{$next};
1701 0         0 $next++;
1702             }
1703 0         0 delete $xlist->{--$next};
1704              
1705             } else {
1706             # if object 1 does not appear to be a free list head,
1707             # insert a new object 0
1708 0 0       0 if ($options{'diags'} == 1) {
1709 0         0 warn "Xref appears to be missing object 0. Insert a new one.\n";
1710             }
1711 0         0 $xlist->{'0'} = [0, 65535, 'f'];
1712             }
1713             } else {
1714 0 0       0 if ($options{'diags'} == 1) {
1715 0         0 warn "Malformed cross reference list in PDF file $self->{' fname'} -- no object 0 (free list head)\n";
1716             }
1717 0         0 $xlist->{'0'} = [0, 65535, 'f'];
1718             }
1719             } # no object 0 entry
1720              
1721             # build/validate the free list (and no active objects have f flag)
1722 18         144 my @free_list;
1723 18         172 foreach (sort {$a <=> $b} keys %{ $xlist }) {
  256         469  
  18         198  
1724             # if 'f' flag, is in free list
1725 132 100       405 if ($xlist->{$_}[2] eq 'f') {
    50          
1726 18 50 33     116 if ($xlist->{$_}[1] <= 0 && $options{'diags'} == 1) {
1727 0         0 warn "Xref free list entry $_ with bad next generation number.\n";
1728             } else {
1729 18         64 push @free_list, $_; # should be in numeric order (0 first)
1730             }
1731             } elsif ($xlist->{$_}[2] eq 'n') {
1732 114 50 33     327 if ($xlist->{$_}[0] <= 0 && $options{'diags'} == 1) {
1733 0         0 warn "Xref active object $_ entry with bad length ".($xlist->{$_}[1])."\n";
1734             }
1735 114 50 33     365 if ($xlist->{$_}[1] < 0 && $options{'diags'} == 1) {
1736 0         0 warn "Xref active object $_ entry with bad generation number ".($xlist->{$_}[1])."\n";
1737             }
1738             } else {
1739 0 0       0 if ($options{'diags'} == 1) {
1740 0         0 warn "Xref entry has flag that is not 'f' or 'n'.\n";
1741             }
1742             }
1743             } # go through xlist and build free_list and check entries
1744             # traverse free list and check that "next object" is also in free list
1745 18         60 my $next_free = 0; # object 0 should always be in free list
1746 18 50 33     96 if ($xlist->{'0'}[1] != 65535 && $options{'diags'} == 1) {
1747 0         0 warn "Object 0 next generation is not 65535.\n";
1748             }
1749             do {
1750 18 50       103 if ($xlist->{$next_free}[2] ne 'f') {
1751 0 0       0 if ($options{'diags'} == 1) {
1752 0         0 warn "Corrupted free object list: next=$next_free is not a free object.\n";
1753             }
1754 0         0 $next_free = 0; # force end of free list
1755             } else {
1756 18         53 $next_free = $xlist->{$next_free}[0];
1757             }
1758             # remove this entry from free list array
1759 18         222 splice(@free_list, index(@free_list, $next_free), 1);
1760 18   33     52 } while ($next_free && exists $xlist->{$next_free});
1761 18 50 33     64 if (scalar @free_list && $options{'diags'} == 1) {
1762 0         0 warn "Corrupted xref list: object(s) @free_list marked as free, but are not in free chain.\n";
1763             }
1764              
1765             # done with cross reference table, so go on to trailer
1766 18 50 33     162 if ($buf !~ /^\s*trailer\b/i && $options{'diags'} == 1) { #orig 'die'
1767 0         0 warn "Malformed trailer in PDF file $self->{' fname'} at " . ($fh->tell() - length($buf));
1768             }
1769              
1770 18         86 $buf =~ s/^\s*trailer\b//i;
1771              
1772 18         118 ($tdict, $buf) = $self->readval($buf);
1773              
1774             } elsif ($buf =~ m/^(\d+)\s+(\d+)\s+obj/i) { # object for xref stream
1775 3         20 my ($xref_obj, $xref_gen) = ($1, $2);
1776 3         42 $PDF::Builder::global_pdf->verCheckOutput(1.5, "importing cross-reference stream");
1777             # XRef streams
1778 3         16 ($tdict, $buf) = $self->readval($buf);
1779              
1780 3 50       15 unless ($tdict->{' stream'}) {
1781 0 0       0 if ($options{'diags'} == 1) {
1782 0         0 warn "Malformed XRefStm at $xref_obj $xref_gen obj in PDF file $self->{' fname'}";
1783             }
1784             }
1785 3         20 $tdict->read_stream(1);
1786              
1787 3         11 my $stream = $tdict->{' stream'};
1788 3         7 my @widths = map { $_->val() } @{$tdict->{'W'}->val()};
  9         25  
  3         19  
1789              
1790 3         7 my $start = 0;
1791 3         8 my $last;
1792              
1793             my @index;
1794 3 100       11 if (defined $tdict->{'Index'}) {
1795 1         2 @index = map { $_->val() } @{$tdict->{'Index'}->val()};
  2         5  
  1         4  
1796             } else {
1797 2         9 @index = (0, $tdict->{'Size'}->val());
1798             }
1799              
1800 3         11 while (scalar @index) {
1801 3         8 $start = shift(@index);
1802 3         10 $last = $start + shift(@index) - 1;
1803              
1804 3         14 for my $i ($start...$last) {
1805             # Replaced "for $xmin" because it creates a loop-specific local
1806             # variable, and we need $xmin to be correct for maxobj below.
1807 26         42 $xmin = $i;
1808              
1809 26         50 my @cols;
1810              
1811 26         44 for my $w (@widths) {
1812 78         114 my $data;
1813 78 50       306 $data = $self->_unpack_xref_stream($w, substr($stream, 0, $w, '')) if $w;
1814              
1815 78         166 push @cols, $data;
1816             }
1817              
1818 26 100       60 $cols[0] = 1 unless defined $cols[0];
1819 26 50 33     72 if ($cols[0] > 2 && $options{'diags'} == 1) {
1820 0         0 warn "Invalid XRefStm entry type ($cols[0]) at $xref_obj $xref_gen obj";
1821             }
1822              
1823 26 50       75 next if exists $xlist->{$xmin};
1824              
1825 26 50       94 my @objind = ($cols[1], defined($cols[2]) ? $cols[2] : ($xmin ? 0 : 65535));
    100          
1826 26 100       76 push @objind, ($cols[0] == 0? 'f': 'n') if $cols[0] < 2;
    100          
1827              
1828 26         126 $xlist->{$xmin} = \@objind;
1829             }
1830             }
1831              
1832             } else { #orig 'die'
1833 0 0       0 if ($options{'diags'} == 1) {
1834 0         0 warn "Malformed xref in PDF file $self->{' fname'}";
1835             }
1836             }
1837              
1838             # did we get to here without managing to set $xmin?
1839 21   50     123 $xmin ||= 0;
1840              
1841 21         81 $tdict->{' loc'} = $xpos;
1842 21         75 $tdict->{' xref'} = $xlist;
1843 21 100       110 $self->{' maxobj'} = $xmin + 1 if $xmin + 1 > $self->{' maxobj'};
1844             $tdict->{' prev'} = $self->readxrtr($tdict->{'Prev'}->val(), %options)
1845 21 100 66     140 if (defined $tdict->{'Prev'} and $tdict->{'Prev'}->val() != 0);
1846 21 100       84 delete $tdict->{' prev'} unless defined $tdict->{' prev'};
1847              
1848 21         95 return $tdict;
1849             } # end of readxrtr()
1850              
1851             =head2 out_trailer
1852              
1853             $p->out_trailer($tdict, $update)
1854              
1855             $p->out_trailer($tdict)
1856              
1857             =over
1858              
1859             Outputs the body and trailer for a PDF file by outputting all the objects in
1860             the ' outlist' and then outputting a xref table for those objects and any
1861             freed ones. It then outputs the trailing dictionary and the trailer code.
1862              
1863             =back
1864              
1865             =cut
1866              
1867             sub out_trailer {
1868 178     178 1 846 my ($self, $tdict, $update) = @_;
1869              
1870 178         567 my $fh = $self->{' OUTFILE'};
1871              
1872 178         376 while (@{$self->{' outlist'}}) {
  361         1602  
1873 183         837 $self->ship_out();
1874             }
1875              
1876 178 100       704 if (defined $self->{'Size'}) {
1877 8         71 $tdict->{'Size'} = PDFNum($self->{' maxobj'} -1 );
1878             } else {
1879 170         986 $tdict->{'Size'} = PDFNum($self->{' maxobj'} );
1880             }
1881              
1882 178         782 my $tloc = $fh->tell();
1883             ## $fh->print("xref\n");
1884             # instead of directly outputting (fh->print) xreflist, we accumulate in @out
1885 178         1230 my @out;
1886 178 100       432 my @xreflist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } (@{$self->{' printed'} || []}, @{$self->{' free'} || []});
  1596 100       4118  
  178         735  
  178         1347  
1887              
1888 178         588 my ($i, $j, $k);
1889 178 100       666 unless ($update) {
1890 170         336 $i = 1;
1891 170         740 for ($j = 0; $j < @xreflist; $j++) {
1892 1044         1520 my @inserts;
1893 1044         1947 $k = $xreflist[$j];
1894 1044         2636 while ($i < $self->{' objects'}{$k->uid()}[0]) {
1895 0         0 my ($n) = PDF::Builder::Basic::PDF::Objind->new();
1896 0         0 $self->add_obj($n, $i, 0);
1897 0         0 $self->free_obj($n);
1898 0         0 push(@inserts, $n);
1899 0         0 $i++;
1900             }
1901 1044         1856 splice(@xreflist, $j, 0, @inserts);
1902 1044         1625 $j += @inserts;
1903 1044         2327 $i++;
1904             }
1905             }
1906              
1907 178 100       449 my @freelist = sort { $self->{' objects'}{$a->uid()}[0] <=> $self->{' objects'}{$b->uid()}[0] } @{$self->{' free'} || []};
  0         0  
  178         983  
1908              
1909 178         381 $j = 0; my $first = -1; $k = 0;
  178         425  
  178         496  
1910 178         739 for ($i = 0; $i <= $#xreflist + 1; $i++) {
1911 1244 100 100     3900 if ($i > $#xreflist || $self->{' objects'}{$xreflist[$i]->uid()}[0] != $j + 1) {
1912             ## $fh->print(($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n");
1913 190 100       1195 push @out, ($first == -1 ? "0 " : "$self->{' objects'}{$xreflist[$first]->uid()}[0] ") . ($i - $first) . "\n";
1914 190 100       662 if ($first == -1) {
1915             ## $fh->printf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0);
1916 178 50       1162 push @out, sprintf("%010d 65535 f \n", defined $freelist[$k] ? $self->{' objects'}{$freelist[$k]->uid()}[0] : 0);
1917 178         437 $first = 0;
1918             }
1919 190         777 for ($j = $first; $j < $i; $j++) {
1920 1066         1831 my $xref = $xreflist[$j];
1921 1066 50 33     2946 if (defined $freelist[$k] && defined $xref && "$freelist[$k]" eq "$xref") {
      33        
1922 0         0 $k++;
1923             ## $fh->print(pack("A10AA5A4",
1924             push(@out, pack("A10AA5A4",
1925             sprintf("%010d", (defined $freelist[$k] ?
1926             $self->{' objects'}{$freelist[$k]->uid()}[0] : 0)), " ",
1927 0 0       0 sprintf("%05d", $self->{' objects'}{$xref->uid()}[1] + 1),
1928             " f \n"));
1929             } else {
1930             ## $fh->print(pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ",
1931             push(@out, pack("A10AA5A4", sprintf("%010d", $self->{' locs'}{$xref->uid()}), " ",
1932 1066         2916 sprintf("%05d", $self->{' objects'}{$xref->uid()}[1]),
1933             " n \n"));
1934             }
1935             }
1936 190         449 $first = $i;
1937 190 100       910 $j = $self->{' objects'}{$xreflist[$i]->uid()}[0] if ($i < scalar @xreflist);
1938              
1939             } else {
1940 1054         2480 $j++;
1941             }
1942             } # end for loop through xreflists
1943             ## $fh->print("trailer\n");
1944             ## $tdict->outobjdeep($fh, $self);
1945             ## $fh->print("\nstartxref\n$tloc\n%%EOF\n");
1946             ## start new code for 117184 fix by Vadim. @out has array of xref content
1947 178 50 33     886 if (exists $tdict->{'Type'} and $tdict->{'Type'}->val() eq 'XRef') {
1948              
1949 0         0 my (@index, @stream);
1950 0         0 for (@out) { # @out is the accumulated cross reference list
1951 0         0 my @a = split;
1952 0 0       0 @a == 2 ? push @index, @a : push @stream, \@a;
1953             }
1954 0         0 my $i = $self->{' maxobj'}++;
1955 0         0 $self->add_obj($tdict, $i, 0);
1956 0         0 $self->out_obj($tdict);
1957              
1958 0         0 push @index, $i, 1;
1959 0         0 push @stream, [ $tloc, 0, 'n' ];
1960              
1961 0 0       0 my $len = $tloc > 0xFFFF ? 4 : 2; # don't expect files > 4 Gb
1962 0 0       0 my $tpl = $tloc > 0xFFFF ? 'CNC' : 'CnC'; # don't expect gennum > 255, it's absurd.
1963             # Adobe doesn't use them anymore anyway
1964 0         0 my $sstream = '';
1965 0         0 my @prev = ( 0 ) x ( $len + 2 ); # init prev to all 0's
1966 0         0 for (@stream) {
1967             # OK to zero out gennum of 65535 for a cross reference stream,
1968             # rather than just truncating to 255 -- Vadim
1969 0 0 0     0 $_->[ 1 ] = 0 if $_->[ 1 ] == 65535 and
1970             $_->[ 2 ] eq 'f';
1971             # make sure is 0..255, since will pack with 'C' code -- Phil
1972 0 0       0 if ($_->[1] > 0xFF) {
1973 0         0 print "generation number ".($_->[1])." in entry '$_->[0] $_->[1] $_->[2]' exceeds 256, reduced to ".($_->[1] & 0x00FF)."\n";
1974             }
1975 0         0 $_->[ 1 ] &= 0x00FF;
1976 0 0       0 my @line = unpack 'C*', pack $tpl, $_->[ 2 ] eq 'n'? 1 : 0, @{ $_ }[ 0 .. 1 ];
  0         0  
1977              
1978             $sstream .= pack 'C*', 2, # prepend filtering method, "PNG Up"
1979 0         0 map {($line[ $_ ] - $prev[ $_ ] + 256) % 256} 0 .. $#line;
  0         0  
1980 0         0 @prev = @line;
1981             }
1982             # build a dictionary for the cross reference stream
1983 0         0 $tdict->{'Size'} = PDFNum($i + 1);
1984 0         0 $tdict->{'Index'} = PDFArray(map { PDFNum($_) } @index);
  0         0  
1985 0         0 $tdict->{'W'} = PDFArray(map { PDFNum($_) } 1, $len, 1);
  0         0  
1986 0         0 $tdict->{'Filter'} = PDFName('FlateDecode');
1987              
1988             # it's compressed
1989 0         0 $tdict->{'DecodeParms'} = PDFDict();
1990 0         0 $tdict->{'DecodeParms'}->val()->{'Predictor'} = PDFNum(12);
1991 0         0 $tdict->{'DecodeParms'}->val()->{'Columns'} = PDFNum($len + 2);
1992              
1993 0         0 $sstream = PDF::Builder::Basic::PDF::Filter::FlateDecode->new()->outfilt($sstream, 1);
1994 0         0 $tdict->{' stream'} = $sstream;
1995 0         0 $tdict->{' nofilt'} = 1;
1996 0         0 delete $tdict->{'Length'};
1997 0         0 $self->ship_out();
1998             } else {
1999             # delete may be moved later by Vadim closer to where XRefStm created
2000 178         586 delete $tdict->{'XRefStm'};
2001             # almost the original code
2002 178         1049 $fh->print("xref\n", @out, "trailer\n");
2003 178         2401 $tdict->outobjdeep($fh, $self);
2004 178         558 $fh->print("\n");
2005             }
2006 178         1817 $fh->print("startxref\n$tloc\n%%EOF\n");
2007             ## end of new code
2008              
2009 178         4346 return;
2010             } # end of out_trailer()
2011              
2012             =head2 _new
2013              
2014             PDF::Builder::Basic::PDF::File->_new()
2015              
2016             =over
2017              
2018             Creates a very empty PDF file object (used by new() and open())
2019              
2020             =back
2021              
2022             =cut
2023              
2024             sub _new {
2025 252     252   742 my $class = shift();
2026 252         596 my $self = {};
2027              
2028 252         622 bless $self, $class;
2029 252         1181 $self->{' outlist'} = [];
2030 252         847 $self->{' outlist_cache'} = {}; # A cache of what's in the 'outlist'
2031 252         819 $self->{' maxobj'} = 1;
2032 252         793 $self->{' objcache'} = {};
2033 252         851 $self->{' objects'} = {};
2034              
2035 252         731 return $self;
2036             }
2037              
2038             1;
2039              
2040             =head1 AUTHOR
2041              
2042             Martin Hosken Martin_Hosken@sil.org
2043              
2044             Copyright Martin Hosken 1999
2045              
2046             No warranty or expression of effectiveness, least of all regarding anyone's
2047             safety, is implied in this software or documentation.