File Coverage

blib/lib/CAM/PDF.pm

Criterion	Covered	Total	%
statement	1363	2499	54.5
branch	421	1028	40.9
condition	91	315	28.8
subroutine	98	141	69.5
pod	106	106	100.0
total	2079	4089	50.8

line	stmt	bran	cond	sub	pod	time	code
1							package CAM::PDF;
2
3	3			3		52405	use 5.006;
	3					12
	3					125
4	3			3		16	use warnings;
	3					6
	3					118
5	3			3		29	use strict;
	3					6
	3					127
6	3			3		15	use Carp qw(croak cluck);
	3					5
	3					239
7	3			3		2943	use English qw(-no_match_vars);
	3					11208
	3					18
8	3			3		3485	use CAM::PDF::Node;
	3					17
	3					79
9	3			3		2022	use CAM::PDF::Decrypt;
	3					9
	3					85307
10
11							our $VERSION = '1.60';
12
13							## no critic(Bangs::ProhibitCommentedOutCode)
14							## no critic(ControlStructures::ProhibitDeepNests)
15
16							=for stopwords eval'ed CR-NL PDFLib defiltered prefill indices inline de-embedding 4th linearized viewable decrypted
17
18							=head1 NAME
19
20							CAM::PDF - PDF manipulation library
21
22							=head1 LICENSE
23
24							Copyright 2002-2006 Clotho Advanced Media, Inc., L
25
26							Copyright 2007-2008 Chris Dolan
27
28							This library is free software; you can redistribute it and/or modify it
29							under the same terms as Perl itself.
30
31							=head1 SYNOPSIS
32
33							use CAM::PDF;
34
35							my $pdf = CAM::PDF->new('test1.pdf');
36
37							my $page1 = $pdf->getPageContent(1);
38							[ ... mess with page ... ]
39							$pdf->setPageContent(1, $page1);
40							[ ... create some new content ... ]
41							$pdf->appendPageContent(1, $newcontent);
42
43							my $anotherpdf = CAM::PDF->new('test2.pdf');
44							$pdf->appendPDF($anotherpdf);
45
46							my @prefs = $pdf->getPrefs();
47							$prefs[$CAM::PDF::PREF_OPASS] = 'mypassword';
48							$prefs[$CAM::PDF::PREF_UPASS] = 'mypassword';
49							$pdf->setPrefs(@prefs);
50
51							$pdf->cleanoutput('out1.pdf');
52							print $pdf->toPDF();
53
54							Many example programs are included in this distribution to do useful
55							tasks. See the C subdirectory.
56
57							=head1 DESCRIPTION
58
59							This package reads and writes any document that conforms to the PDF
60							specification generously provided by Adobe at
61							L
62							(link last checked Oct 2005).
63
64							The file format through PDF 1.5 is well-supported, with the exception
65							of the "linearized" or "optimized" output format, which this module
66							can read but not write. Many specific aspects of the document model
67							are not manipulable with this package (like fonts), but if the input
68							document is correctly written, then this module will preserve the
69							model integrity.
70
71							The PDF writing feature saves as PDF 1.4-compatible. That means that
72							we cannot write compressed object streams. The consequence is that
73							reading and then writing a PDF 1.5+ document may enlarge the resulting
74							file by a fair margin.
75
76							This library grants you some power over the PDF security model. Note
77							that applications editing PDF documents via this library MUST respect
78							the security preferences of the document. Any violation of this
79							respect is contrary to Adobe's intellectual property position, as
80							stated in the reference manual at the above URL.
81
82							Technical detail regarding corrupt PDFs: This library adheres strictly
83							to the PDF specification. Adobe's Acrobat Reader is more lenient,
84							allowing some corrupted PDFs to be viewable. Therefore, it is
85							possible that some PDFs may be readable by Acrobat that are illegible
86							to this library. In particular, files which have had line endings
87							converted to or from DOS/Windows style (i.e. CR-NL) may be rendered
88							unusable even though Acrobat does not complain. Future library
89							versions may relax the parser, but not yet.
90
91							=cut
92
93							our $PREF_OPASS = 0;
94							our $PREF_UPASS = 1;
95							our $PREF_PRINT = 2;
96							our $PREF_MODIFY = 3;
97							our $PREF_COPY = 4;
98							our $PREF_ADD = 5;
99
100							our $MAX_STRING = 65; # length of output string
101
102							my %filterabbrevs = (
103							AHx => 'ASCIIHexDecode',
104							A85 => 'ASCII85Decode',
105							CCF => 'CCITTFaxDecode',
106							DCT => 'DCTDecode',
107							Fl => 'FlateDecode',
108							LZW => 'LZWDecode',
109							RL => 'RunLengthDecode',
110							);
111
112							my %inlineabbrevs = (
113							%filterabbrevs,
114							BPC => 'BitsPerComponent',
115							CS => 'ColorSpace',
116							D => 'Decode',
117							DP => 'DecodeParms',
118							F => 'Filter',
119							H => 'Height',
120							IM => 'ImageMask',
121							I => 'Interpolate',
122							W => 'Width',
123							CMYK => 'DeviceCMYK',
124							G => 'DeviceGray',
125							RGB => 'DeviceRGB',
126							I => 'Indexed',
127							);
128
129							=head1 API
130
131							=head2 Functions intended to be used externally
132
133							$self = CAM::PDF->new(content \| filename \| '-')
134							$self->toPDF()
135							$self->needsSave()
136							$self->save()
137							$self->cleansave()
138							$self->output(filename \| '-')
139							$self->cleanoutput(filename \| '-')
140							$self->previousRevision()
141							$self->allRevisions()
142							$self->preserveOrder()
143							$self->appendObject(olddoc, oldnum, [follow=(1\|0)])
144							$self->replaceObject(newnum, olddoc, oldnum, [follow=(1\|0)])
145							(olddoc can be undef in the above for adding new objects)
146							$self->numPages()
147							$self->getPageText(pagenum)
148							$self->getPageDimensions(pagenum)
149							$self->getPageContent(pagenum)
150							$self->setPageContent(pagenum, content)
151							$self->appendPageContent(pagenum, content)
152							$self->deletePage(pagenum)
153							$self->deletePages(pagenum, pagenum, ...)
154							$self->extractPages(pagenum, pagenum, ...)
155							$self->appendPDF(CAM::PDF object)
156							$self->prependPDF(CAM::PDF object)
157							$self->wrapString(string, width, fontsize, page, fontlabel)
158							$self->getFontNames(pagenum)
159							$self->addFont(page, fontname, fontlabel, [fontmetrics])
160							$self->deEmbedFont(page, fontname, [newfontname])
161							$self->deEmbedFontByBaseName(page, basename, [newfont])
162							$self->getPrefs()
163							$self->setPrefs()
164							$self->canPrint()
165							$self->canModify()
166							$self->canCopy()
167							$self->canAdd()
168							$self->getFormFieldList()
169							$self->fillFormFields(fieldname, value, [fieldname, value, ...])
170							or $self->fillFormFields(%values)
171							$self->clearFormFieldTriggers(fieldname, fieldname, ...)
172
173							Note: 'clean' as in cleansave() and cleanobject() means write a fresh
174							PDF document. The alternative (e.g. save()) reuses the existing doc
175							and just appends to it. Also note that 'clean' functions sort the
176							objects numerically. If you prefer that the new PDF docs more closely
177							resemble the old ones, call preserveOrder() before cleansave() or
178							cleanobject().
179
180							=head2 Slightly less external, but useful, functions
181
182							$self->toString()
183							$self->getPage(pagenum)
184							$self->getFont(pagenum, fontname)
185							$self->getFonts(pagenum)
186							$self->getStringWidth(fontdict, string)
187							$self->getFormField(fieldname)
188							$self->getFormFieldDict(object)
189							$self->isLinearized()
190							$self->decodeObject(objectnum)
191							$self->decodeAll(any-node)
192							$self->decodeOne(dict-node)
193							$self->encodeObject(objectnum, filter)
194							$self->encodeOne(any-node, filter)
195							$self->changeString(obj-node, hashref)
196
197							=head2 Deeper utilities
198
199							$self->pageAddName(pagenum, name, objectnum)
200							$self->getPageObjnum(pagenum)
201							$self->getPropertyNames(pagenum)
202							$self->getProperty(pagenum, propname)
203							$self->getValue(any-node)
204							$self->dereference(objectnum) or $self->dereference(name,pagenum)
205							$self->deleteObject(objectnum)
206							$self->copyObject(obj-node)
207							$self->cacheObjects()
208							$self->setObjNum(obj-node, num)
209							$self->getRefList(obj-node)
210							$self->changeRefKeys(obj-node, hashref)
211
212							=head2 More rarely needed utilities
213
214							$self->getObjValue(objectnum)
215
216							=head2 Routines that should not be called
217
218							$self->_startdoc()
219							$self->delinearlize()
220							$self->build*()
221							$self->parse*()
222							$self->write*()
223							$self->*CB()
224							$self->traverse()
225							$self->fixDecode()
226							$self->abbrevInlineImage()
227							$self->unabbrevInlineImage()
228							$self->cleanse()
229							$self->clean()
230							$self->createID()
231
232
233							=head1 FUNCTIONS
234
235							=head2 Object creation/manipulation
236
237							=over
238
239							=item $doc->new($package, $content)
240
241							=item $doc->new($package, $content, $ownerpass, $userpass)
242
243							=item $doc->new($package, $content, $ownerpass, $userpass, $prompt)
244
245							=item $doc->new($package, $content, $ownerpass, $userpass, $options)
246
247							Instantiate a new CAM::PDF object. C<$content> can be a document in a
248							string, a filename, or '-'. The latter indicates that the document
249							should be read from standard input. If the document is password
250							protected, the passwords should be passed as additional arguments. If
251							they are not known, a boolean C<$prompt> argument allows the programmer to
252							suggest that the constructor prompt the user for a password. This is
253							rudimentary prompting: passwords are in the clear on the console.
254
255							This constructor takes an optional final argument which is a hash
256							reference. This hash can contain any of the following optional
257							parameters:
258
259							=over
260
261							=item prompt_for_password => $boolean
262
263							This is the same as the C<$prompt> argument described above.
264
265							=item fault_tolerant => $boolean
266
267							This flag causes the instance to be more lenient when reading the
268							input PDF. Currently, this only affects PDFs which cannot be
269							successfully decrypted.
270
271							=back
272
273							=cut
274
275							sub new ## no critic(Subroutines::ProhibitExcessComplexity, Unpack)
276							{
277	27			27	1	83238	my $pkg = shift;
278	27					66	my $content = shift; # or a filename
279							# Optional args:
280	27					58	my $opassword = shift;
281	27					59	my $upassword = shift;
282	27					48	my $options;
283							# Backward compatible support for prompt flag as final argument
284	27	100				105	if (ref $_[0])
285							{
286	4					9	$options = shift;
287	4	50				20	if ((ref $options) ne 'HASH')
288							{
289	0					0	croak 'Options must be a hash reference';
290							}
291							}
292							else
293							{
294	23					98	$options = {
295							prompt_for_password => shift,
296							};
297							}
298
299
300	27					65	my $pdfversion = '1.2';
301	27	100				153	if ($content =~ m/ \A%PDF-([\d.]+) /xms)
302							{
303	17					67	my $ver = $1;
304	17	100	66			1229	if ($ver && $ver > $pdfversion)
305							{
306	13					36	$pdfversion = $ver;
307							}
308							}
309							else
310							{
311	10	50				45	if (1024 > length $content)
312							{
313	10					24	my $file = $content;
314	10	50				30	if ($file eq q{-})
315							{
316	0					0	$content = q{};
317	0					0	my $offset = 0;
318	0					0	my $step = 4096;
319	0					0	binmode STDIN; ##no critic (Syscalls)
320	0					0	while ($step == read STDIN, $content, $step, $offset)
321							{
322	0					0	$offset += $step;
323							}
324							}
325							else
326							{
327	10	100				666	if (open my $fh, '<', $file)
328							{
329	9					39	binmode $fh; ##no critic (Syscalls)
330	9					128	my $size = -s $file;
331	9	50				7005	if ($size != read $fh, $content, $size) {
332	0					0	$CAM::PDF::errstr = "Failed to read $file bytes\n";
333	0					0	return;
334							}
335	9	50				1561	if (!close $fh) {
336	0					0	$CAM::PDF::errstr = "Failed to close reading $file\n";
337	0					0	return;
338							}
339							}
340							else
341							{
342	1					14	$CAM::PDF::errstr = "Failed to open $file: $ERRNO\n";
343	1					10	return;
344							}
345							}
346							}
347	9	50				257	if ($content =~ m/ \A%PDF-([\d.]+) /xms)
348							{
349	9					47	my $ver = $1;
350	9	100	66			3744	if ($ver && $ver > $pdfversion)
351							{
352	7					22	$pdfversion = $ver;
353							}
354							}
355							else
356							{
357	0					0	$CAM::PDF::errstr = "Content does not begin with \"%PDF-\"\n";
358	0					0	return;
359							}
360							}
361							#warn "got pdfversion $pdfversion\n";
362
363	26					458	my $self = {
364							options => $options,
365
366							pdfversion => $pdfversion,
367							maxstr => $CAM::PDF::MAX_STRING, # length of output string
368							content => $content,
369							contentlength => length $content,
370							xref => {},
371							maxobj => 0,
372							changes => {},
373							versions => {},
374
375							# Caches:
376							objcache => {},
377							pagecache => {},
378							formcache => {},
379							Names => {},
380							NameObjects => {},
381							fontmetrics => {},
382							};
383	26					109	bless $self, $pkg;
384	26	50				109	if (!$self->_startdoc())
385							{
386	0					0	return;
387							}
388	26	100				124	if ($self->{trailer}->{ID})
389							{
390	24					82	my $id = $self->getValue($self->{trailer}->{ID});
391	24	50				81	if (ref $id)
392							{
393	24					46	my $accum = q{};
394	24					36	for my $objnode (@{$id})
	24					60
395							{
396	48					105	$accum .= $self->getValue($objnode);
397							}
398	24					70	$id = $accum;
399							}
400	24					61	$self->{ID} = $id;
401							}
402
403	26					222	$self->{crypt} = CAM::PDF::Decrypt->new($self, $opassword, $upassword,
404							$self->{options}->{prompt_for_password});
405	26	100	66			157	if (!$self->{crypt} && !$self->{options}->{fault_tolerant})
406							{
407	8					596	return;
408							}
409
410	18					83	return $self;
411							}
412
413							=item $doc->toPDF()
414
415							Serializes the data structure as a PDF document stream and returns as
416							in a scalar.
417
418							=cut
419
420							sub toPDF
421							{
422	8			8	1	366768	my $self = shift;
423
424	8	100				41	if ($self->needsSave())
425							{
426	4					22	$self->cleansave();
427							}
428	8					854	return $self->{content};
429							}
430
431							=item $doc->toString()
432
433							Returns a serialized representation of the data structure.
434							Implemented via Data::Dumper.
435
436							=cut
437
438							sub toString ## no critic (Unpack)
439							{
440	0			0	1	0	my $self = shift;
441	0	0				0	my @skip = @_ == 0 ? qw(content) : @_;
442
443	0					0	my %hold;
444	0					0	for my $key (@skip)
445							{
446	0					0	$hold{$key} = delete $self->{$key};
447							}
448
449	0					0	require Data::Dumper;
450	0					0	my $result = Data::Dumper->Dump([$self], [qw(doc)]);
451
452	0					0	for my $key (keys %hold)
453							{
454	0					0	$self->{$key} = $hold{$key};
455							}
456	0					0	return $result;
457							}
458
459							################################################################################
460
461							=back
462
463							=head2 Document reading
464
465							(all of these functions are intended for internal only)
466
467							=over
468
469							=cut
470
471
472							# PRIVATE METHOD
473							# read the document index and some metadata.
474
475							sub _startdoc
476							{
477	26			26		50	my $self = shift;
478
479							### Parse the document metadata
480
481							# Start by parsing out the location of the last xref block
482
483							# Implementation note: The PDF spec says "The last line of the file
484							# contains only the end-of-file marker, %%EOF." but it also says
485							# "Acrobat viewers require only that the %%EOF marker appear
486							# somewhere within the last 1024 bytes of the file."
487							# So, we follow the latter more lenient rule.
488
489	26					84	my $doc_length = length $self->{content};
490	26					39	my $startxref;
491	26	50				79	if ($doc_length > 1024)
492							{
493							# The initial ".*" is for the unlikely case that there are two "startxref" statements in the last 1024 bytes
494	26					423	($startxref) = (substr $self->{content}, $doc_length - 1024, 1024) =~ m/ .* startxref\s(\d+)\s%%EOF.*?\z /xms;
495							}
496							else
497							{
498	0					0	($startxref) = $self->{content} =~ m/ .* startxref\s(\d+)\s%%EOF.*?\z /xms;
499							}
500
501	26	50				311	if (!$startxref)
502							{
503	0					0	$CAM::PDF::errstr = "Cannot find the index in the PDF content\n";
504	0					0	return;
505							}
506
507							# Parse the hierarchy of xref blocks
508	26					104	$self->{startxref} = $startxref;
509	26					48	my @objstreamrefs;
510	26					139	$self->{trailer} = $self->_buildxref($self->{startxref}, $self->{xref}, $self->{versions}, \@objstreamrefs);
511	26	50				121	if (!defined $self->{trailer})
512							{
513	0					0	return;
514							}
515	26					97	$self->_buildendxref();
516	26					62	for my $objstreamref (@objstreamrefs)
517							{
518	58	50				121	if (!$self->_index_objstream($objstreamref))
519							{
520	0					0	return;
521							}
522							}
523
524							### Cache some page content descriptors
525
526							# Get the document root catalog
527	26	50				125	if (!exists $self->{trailer}->{Root})
528							{
529	0					0	$CAM::PDF::errstr = "No root node present in PDF trailer.\n";
530	0					0	return;
531							}
532	26					120	my $root = $self->getRootDict();
533	26	50	33			189	if (!$root \|\| (ref $root) ne 'HASH')
534							{
535	0					0	$CAM::PDF::errstr = "The PDF root node is not a dictionary.\n";
536	0					0	return;
537							}
538
539							# Get the root of the page tree
540	26	50				88	if (!exists $root->{Pages})
541							{
542	0					0	$CAM::PDF::errstr = "The PDF root node doesn't have a reference to the page tree.\n";
543	0					0	return;
544							}
545	26					84	my $pages = $self->getPagesDict();
546	26	50	33			181	if (!$pages \|\| (ref $pages) ne 'HASH')
547							{
548	0					0	$CAM::PDF::errstr = "The PDF page tree root is not a dictionary.\n";
549	0					0	return;
550							}
551
552							# Get the number of pages in the document
553	26					91	$self->{PageCount} = $self->getValue($pages->{Count});
554	26	50	33			174	if (!$self->{PageCount} \|\| $self->{PageCount} < 1)
555							{
556	0					0	$CAM::PDF::errstr = "Bad number of pages in PDF document\n";
557	0					0	return;
558							}
559
560	26					183	return 1;
561							}
562
563							# PRIVATE FUNCTION
564							# read document index
565
566							sub _buildxref
567							{
568	28			28		48	my $self = shift;
569	28					47	my $startxref = shift;
570	28					44	my $index = shift;
571	28					47	my $versions = shift;
572	28					39	my $objstreamrefs = shift;
573
574	28					46	my $trailer;
575	28	100				118	if ('xref' eq substr $self->{content}, $startxref, 4)
576							{
577	24					111	$trailer = $self->_buildxref_pdf14($startxref, $index, $versions);
578	24	50	33			176	if ($trailer && exists $trailer->{XRefStm})
579							{
580	0	0				0	if (!$self->_buildxref_pdf15($trailer->{XRefStm}->{value}, $index, $versions, $objstreamrefs))
581							{
582	0					0	return;
583							}
584							}
585							}
586							else
587							{
588	4					20	$trailer = $self->_buildxref_pdf15($startxref, $index, $versions, $objstreamrefs);
589							}
590
591	28	100	66			172	if ($trailer && exists $trailer->{Prev})
592							{
593	2	50				18	if (!$self->_buildxref($trailer->{Prev}->{value}, $index, $versions, $objstreamrefs))
594							{
595	0					0	return;
596							}
597							}
598
599	28					160	return $trailer;
600							}
601
602							# Just for debugging
603							sub __dump_binary_stream {
604	0			0		0	my $stream = shift;
605
606	0					0	my @b = unpack 'C*', $stream;
607	0					0	print ' 0 1 2 3 4 5 6 7 8 9 a b c d e f';
608	0					0	for my $i (0 .. $#b) {
609	0	0				0	if (0 == $i % 15) {
		0
610	0					0	printf "\n%04x: ", $i;
611							} elsif (0 == $i % 3) {
612	0					0	print q{ };
613							}
614	0					0	printf '%02x', $b[$i];
615							}
616	0					0	print "\n";
617
618	0					0	return;
619							}
620
621							sub _buildxref_pdf15
622							{
623	4			4		7	my $self = shift;
624	4					11	my $startxref = shift;
625	4					7	my $index = shift;
626	4					7	my $versions = shift;
627	4					7	my $objstreamrefs = shift;
628
629	4					18	my ($trailer, $stream) = $self->_buildxref_pdf15_getstream($startxref);
630	4	50				17	if (!$trailer) {
631	0					0	return;
632							}
633
634							#__dump_binary_stream($stream);
635
636	4					5058	my @byte_pattern = map {$_->{value}} @{$trailer->{W}->{value}};
	12					395
	4					19
637	4					190	my $entry_size = $byte_pattern[0] + $byte_pattern[1] + $byte_pattern[2];
638							#print STDOUT "pack: [@byte_pattern] => total size $entry_size\n";
639
640	4					9	my @objstreamrefs;
641							{
642	4					8	my @pairs = (0, $trailer->{Size}->{value});
	4					19
643	4	100				190	if (exists $trailer->{Index})
644							{
645	2					6	@pairs = map {$_->{value}} @{$trailer->{Index}->{value}};
	4					21
	2					12
646							}
647							#print STDOUT "Pairs: (Index,Size)=@pairs; size of stream=",length($stream)," ?= $entry_size x ",(length($stream)/$entry_size),"\n";
648
649	4					9	my $i = 0;
650	4					14	while (@pairs)
651							{
652	4					10	my $start = shift @pairs;
653	4					13	my $len = shift @pairs;
654	4					8	my $end = $start + $len;
655	4					12	for my $objnum ($start .. $end - 1)
656							{
657	92					288	my @byte = unpack 'C', substr $stream, $i++ $entry_size, $entry_size;
658	92					230	my %values = (type => 1, major => 0, minor => 0);
659	92					221	my @w = @byte_pattern;
660	92					110	my $pos = 0;
661	92					111	my $w = 0;
662	92					122	for my $key (qw(type major minor)) {
663	276					310	$w += shift @w;
664	276	50				546	if ($w > $pos) {
665	276					285	my $val = 0;
666	276					509	for (; $pos < $w; ++$pos) { ## no critic (ProhibitCStyleForLoops)
667	368					830	$val = ($val << 8) + $byte[$pos];
668							}
669	276					544	$values{$key} = $val;
670							}
671							}
672
673	92	50				279	next if (exists $index->{$objnum}); # keep only latest revision
674
675							#my %strs = (
676							# 0 => {str=>'free', major=>'objnext', minor=>'gennum'},
677							# 1 => {str=>'raw', major=>'byte', minor=>'gennum'},
678							# 2 => {str=>'zip', major=>'stream', minor=>'index'},
679							#);
680							#my $type_def = $strs{$values{type}};
681							#my $type_str = $type_def ? $type_def->{str} : 'unk';
682							#my $major_str = $type_def ? $type_def->{major} : 'unk';
683							#my $minor_str = $type_def ? $type_def->{minor} : 'unk';
684							#print STDOUT "xref $objnum = $values{type}=$type_str $major_str=$values{major}(",
685							# sprintf('%04x',$values{major}),") $minor_str=$values{minor}(",
686							# sprintf('%02x',$values{minor}),")\n";
687
688							# Ignore type 0
689	92	100				249	if ($values{type} == 1)
		100
690							{
691	32					81	$index->{$objnum} = $values{major};
692	32					113	$versions->{$objnum} = $values{minor};
693							}
694							elsif ($values{type} == 2)
695							{
696	58					70	push @{$objstreamrefs}, {objnum => $objnum, streamnum => $values{major}, indx => $values{minor}};
	58					270
697	58					177	$index->{$objnum} = {}; # will be overwritten later
698	58					189	$versions->{$objnum} = 0;
699							}
700							}
701	4	100				26	if ($end - 1 > $self->{maxobj})
702							{
703	2					11	$self->{maxobj} = $end - 1;
704							}
705							}
706							}
707	4					20	return $trailer;
708							}
709
710							sub _buildxref_pdf15_getstream
711							{
712	4			4		9	my $self = shift;
713	4					7	my $startxref = shift;
714
715							# Don't slurp in the whole file
716	4					8	my $chunk_size = 1024;
717	4					31	my @content = (substr $self->{content}, $startxref, $chunk_size);
718							# warning: this doesn't account for the case where "endobj" crosses a 1024-byte boundary
719							# instead, we hit the end of file and find it after concatenation -- a hack but it works
720	4		33			51	while ($content[-1] && $content[-1] !~ m/endobj/xms) {
721	0					0	my $offset = $startxref + $chunk_size * @content;
722	0	0				0	if ($offset >= length $self->{content}) {
723							# end of file
724	0					0	last;
725							}
726	0					0	push @content, substr $self->{content}, $offset, $chunk_size;
727							}
728	4					23	my $content = join q{}, @content;
729
730	4					20	my $xrefstream = $self->parseObj(\$content);
731	4	50				20	if (!$xrefstream)
732							{
733	0					0	$CAM::PDF::errstr = 'Failed to locate the xref stream';
734	0					0	return;
735							}
736	4					15	my $trailer = $xrefstream->{value}->{value}; # dict hash
737	4	50				17	if (!$trailer)
738							{
739	0					0	$CAM::PDF::errstr = 'Invalid xref stream: no trailer';
740	0					0	return;
741							}
742	4	50				24	if ('HASH' ne ref $trailer)
743							{
744	0					0	$CAM::PDF::errstr = 'Invalid xref stream: trailer is not a dictionary';
745	0					0	return;
746							}
747							#print STDOUT "Trailer: @{[sort keys %{$trailer}]}, $trailer->{Type}->{value}\n";
748	4	50	33			39	if (!exists $trailer->{Type} \|\| 'XRef' ne $trailer->{Type}->{value})
749							{
750	0					0	$CAM::PDF::errstr = 'Invalid xref stream: type is not XRef';
751	0					0	return;
752							}
753
754	4					28	my $stream = $self->decodeOne($xrefstream->{value});
755	4	50				18	if (!$stream)
756							{
757	0					0	$CAM::PDF::errstr = 'Invalid xref stream: could not decode the stream';
758	0					0	return;
759							}
760
761	4					32	return ($trailer, $stream);
762							}
763
764							sub _index_objstream
765							{
766	58			58		68	my $self = shift;
767	58					82	my $objstreamref = shift;
768
769	58					143	my $objstream = $self->dereference($objstreamref->{streamnum});
770	58	50				133	if (!$objstream)
771							{
772	0					0	$CAM::PDF::errstr = 'Failed to read object stream ' . $objstreamref->{streamnum};
773	0					0	return;
774							}
775	58	100				139	if ($objstream->{_indexed}++)
776							{
777	46					139	return 1;
778							}
779	12					41	my $stream = $self->decodeOne($objstream->{value});
780	12	50				34	if (!$stream)
781							{
782	0					0	$CAM::PDF::errstr = 'Invalid xref stream: could not decode objstream ' . $objstreamref->{streamnum};
783	0					0	return;
784							}
785	12					30	my $dict = $objstream->{value}->{value};
786	12					29	my $n = $dict->{N}->{value};
787	12					25	my $first = $dict->{First}->{value};
788	12					40	my $lookup = substr $stream, 0, $first;
789	12					13	my @objs;
790	12					36	my $streamholder = {stream => $stream};
791	12					34	for my $i (0 .. $n-1)
792							{
793	58	50				225	if ($lookup =~ m/\G\s*(\d+)\s+(\d+)/cgxms)
794							{
795	58					125	my ($objnum, $offset) = ($1, $2);
796	58					165	my $pos = {objstream => $streamholder, start => $first + $offset};
797	58					82	push @objs, $pos;
798	58	50				158	if (exists $self->{xref}->{$objnum}) {
799							# keep only latest revision
800	58	50				158	next if !ref $self->{xref}->{$objnum};
801	58	50				156	next if $self->{xref}->{$objnum}->{objstream};
802							}
803							#print "objnum $objnum at pos $offset of objstream $objstreamref->{streamnum}\n";
804	58					89	$self->{xref}->{$objnum} = $pos;
805	58					153	$self->{versions}->{$objnum} = 0;
806							}
807							else
808							{
809	0					0	$CAM::PDF::errstr = 'Failed to read the objstream index for ' . $objstreamref->{streamnum};
810	0					0	return;
811							}
812							}
813	12					31	for my $i (0 .. $#objs-1)
814							{
815	46					85	$objs[$i]->{end} = $objs[$i+1]->{start};
816							}
817	12					30	$objs[-1]->{end} = length $stream;
818
819	12					66	return 1;
820							}
821
822							sub _buildxref_pdf14
823							{
824	24			24		42	my $self = shift;
825	24					40	my $startxref = shift;
826	24					44	my $index = shift;
827	24					38	my $versions = shift;
828
829	24					117	my $trailerpos = index $self->{content}, 'trailer', $startxref;
830
831							# Workaround for Perl 5.6.1 bug
832	24	50	33			146	if ($trailerpos > 0 && $trailerpos < $startxref)
833							{
834	0					0	my $xrefstr = substr $self->{content}, $startxref;
835	0					0	$trailerpos = $startxref + index $xrefstr, 'trailer';
836							}
837
838	24					118	my $end = substr $self->{content}, $startxref, $trailerpos-$startxref;
839
840	24	50				214	if ($end !~ s/ \A xref\s+ //xms)
841							{
842	0					0	my $len = length $end;
843	0					0	$CAM::PDF::errstr = "Could not find PDF cross-ref table at location $startxref/$trailerpos/$len\n" . $self->trimstr($end);
844	0					0	return;
845							}
846	24					49	my $part = 0;
847	24					183	while ($end =~ s/ \A (\d+)\s+(\d+)\s+ //xms)
848							{
849	24					59	my $s = $1;
850	24					59	my $n = $2;
851
852	24					40	$part++;
853	24					68	for my $i (0 .. $n-1)
854							{
855	2066					2466	my $objnum = $s+$i;
856	2066	50				4643	next if (exists $index->{$objnum});
857
858	2066					3245	my $row = substr $end, $i*20, 20;
859	2066					8205	my ($indexnum, $version, $type) = $row =~ m/ \A (\d{10}) [ ] (\d{5}) [ ] (\w) /xms;
860	2066	50				4228	if (!$indexnum)
861							{
862	0					0	$CAM::PDF::errstr = "Could not decipher xref row:\n" . $self->trimstr($row);
863	0					0	return;
864							}
865	2066	100				3787	if ($type eq 'n')
866							{
867	818	50				1660	if ($indexnum != 0) # if the index says it's at byte zero, pretend it's an 'f' instead of an 'n'
868							{
869	818					10428	$index->{$objnum} = $indexnum;
870	818					1560	$versions->{$objnum} = $version;
871							}
872							}
873	2066	100				4376	if ($objnum > $self->{maxobj})
874							{
875	2042					3873	$self->{maxobj} = $objnum;
876							}
877							}
878
879	24					133	$end = substr $end, 20*$n;
880							}
881
882	24					85	my $sxrefpos = index $self->{content}, 'startxref', $trailerpos;
883	24	50	33			146	if ($sxrefpos > 0 && $sxrefpos < $trailerpos) # workaround for 5.6.1 bug
884							{
885	0					0	my $tail = substr $self->{content}, $trailerpos;
886	0					0	$sxrefpos = $trailerpos + index $tail, 'startxref';
887							}
888	24					77	$end = substr $self->{content}, $trailerpos, $sxrefpos-$trailerpos;
889
890	24	50				165	if ($end !~ s/ \A trailer\s* //xms)
891							{
892	0					0	$CAM::PDF::errstr = "Did not find expected trailer block after xref\n" . $self->trimstr($end);
893	0					0	return;
894							}
895	24					123	my $trailer = $self->parseDict(\$end)->{value};
896	24					106	return $trailer;
897							}
898
899							# PRIVATE FUNCTION
900							# _buildendxref -- compute the end of each object
901							# note that this is not always the actual end of the object, but
902							# we guarantee that the object will end at or before this point.
903
904							sub _buildendxref
905							{
906	26			26		43	my $self = shift;
907
908	26					55	my $x = $self->{xref}; # shorthand
909							# make a list of objnums sorted by file position. Ignore objects inside objstreams.
910	26					57	my @keys = sort {$x->{$a} <=> $x->{$b}} grep {!ref $x->{$_}} keys %{$x};
	3411					4123
	908					1536
	26					265
911
912	26					107	my $r = {};
913	26					90	for my $i (0 .. $#keys-1)
914							{
915							# set the end of each object to be the beginning of the next object
916	824					1803	$r->{$keys[$i]} = $x->{$keys[$i+1]};
917							}
918							# The end of the last object is the end of the file
919	26					92	$r->{$keys[-1]} = $self->{contentlength};
920
921	26					58	$self->{endxref} = $r;
922	26					109	return;
923							}
924
925							# PRIVATE FUNTION
926							# _buildNameTable -- descend into the page tree and extract all XObject
927							# and Font name references.
928
929							sub _buildNameTable
930							{
931	12			12		30	my $self = shift;
932	12					25	my $pagenum = shift;
933
934	12	50	33			86	if (!$pagenum \|\| $pagenum eq 'All') # Build the ENTIRE name table
935							{
936	0					0	$self->cacheObjects();
937	0					0	for my $p (1 .. $self->{PageCount})
938							{
939	0					0	$self->_buildNameTable($p);
940							}
941	0					0	my %n;
942	0					0	for my $objnode (values %{$self->{objcache}})
	0					0
943							{
944	0	0				0	if ($objnode->{value}->{type} eq 'dictionary')
945							{
946	0					0	my $dict = $objnode->{value}->{value};
947	0	0				0	if ($dict->{Name})
948							{
949	0					0	$n{$dict->{Name}->{value}} = CAM::PDF::Node->new('reference', $objnode->{objnum});
950							}
951							}
952							}
953	0					0	$self->{Names}->{All} = {%n};
954	0					0	return;
955							}
956
957	12	100				60	return if (exists $self->{Names}->{$pagenum});
958
959	6					9	my %n;
960	6					19	my $page = $self->getPage($pagenum);
961	6					22	while ($page)
962							{
963	12					32	my $objnum = $self->getPageObjnum($pagenum);
964	12	100				34	if (exists $page->{Resources})
965							{
966	6					20	my $r = $self->getValue($page->{Resources});
967	6					17	for my $key ('XObject', 'Font')
968							{
969	12	100				1167	if (exists $r->{$key})
970							{
971	8					24	my $x = $self->getValue($r->{$key});
972	8	50				25	if ((ref $x) eq 'HASH')
973							{
974	8					9	%n = (%{$x}, %n);
	8					63
975							}
976							}
977							}
978							}
979
980							# Inherit from parent
981	12					27	$page = $page->{Parent};
982	12	100				37	if ($page)
983							{
984	6					12	$page = $self->getValue($page);
985							}
986							}
987
988	6					32	$self->{Names}->{$pagenum} = {%n};
989	6					22	return;
990							}
991
992							=item $doc->getRootDict()
993
994							Returns the Root dictionary for the PDF.
995
996							=cut
997
998							sub getRootDict
999							{
1000	156			156	1	206	my $self = shift;
1001
1002	156					542	return $self->getValue($self->{trailer}->{Root});
1003							}
1004
1005							=item $doc->getPagesDict()
1006
1007							Returns the root Pages dictionary for the PDF.
1008
1009							=cut
1010
1011							sub getPagesDict
1012							{
1013	70			70	1	147	my $self = shift;
1014
1015	70					203	return $self->getValue($self->getRootDict()->{Pages});
1016							}
1017
1018							=item $doc->parseObj($string)
1019
1020							Use parseAny() instead of this, if possible.
1021
1022							Given a fragment of PDF page content, parse it and return an object
1023							Node. This can be called as a class method in most circumstances, but
1024							is intended as an instance method.
1025
1026							=cut
1027
1028							sub parseObj
1029							{
1030	297			297	1	432	my $self = shift;
1031	297					342	my $c = shift;
1032
1033	297	50				373	if (${$c} !~ m/ \G\s(\d+)\s+(\d+)\s+obj\s /cgxms) ##no critic(ProhibitUnusedCapture)
	297					1935
1034							{
1035	0					0	die "Expected object open tag\n" . $self->trimstr(${$c});
	0					0
1036							}
1037							# need to implement like this with explicit capture vars for 5.6.1
1038							# compatibility
1039	297					880	my ($objnum, $gennum) = ($1, $2); ##no critic(ProhibitCaptureWithoutTest)
1040	297					401	$objnum = int $objnum;
1041	297					406	$gennum = int $gennum;
1042
1043	297					348	my $objnode;
1044	297	50				444	if (${$c} =~ m/ \G(.?)endobj\s /cgxms)
	297					5695
1045							{
1046	297					3474	my $string = $1;
1047	297					729	$objnode = $self->parseAny(\$string, $objnum, $gennum);
1048	297	100				1268	if ($string =~ m/ \Gstream /xms)
1049							{
1050	66	50				242	if ($objnode->{type} ne 'dictionary')
1051							{
1052	0					0	die "Found an object stream without a preceding dictionary\n" . $self->trimstr(${$c});
	0					0
1053							}
1054	66					237	$objnode->{value}->{StreamData} = $self->parseStream(\$string, $objnum, $gennum, $objnode->{value});
1055							}
1056							}
1057							else
1058							{
1059	0					0	die "Expected endobj\n" . $self->trimstr(${$c});
	0					0
1060							}
1061	297					1075	return CAM::PDF::Node->new('object', $objnode, $objnum, $gennum);
1062							}
1063
1064
1065							=item $doc->parseInlineImage($string)
1066
1067							=item $doc->parseInlineImage($string, $objnum)
1068
1069							=item $doc->parseInlineImage($string, $objnum, $gennum)
1070
1071							Given a fragment of PDF page content, parse it and return an object
1072							Node. This can be called as a class method in some cases, but
1073							is intended as an instance method.
1074
1075							=cut
1076
1077							sub parseInlineImage
1078							{
1079	3			3	1	6	my $self = shift;
1080	3					4	my $c = shift;
1081	3					6	my $objnum = shift;
1082	3					3	my $gennum = shift;
1083
1084	3	50				4	if (${$c} !~ m/ \GBI\b /xms)
	3					433
1085							{
1086	0					0	die "Expected inline image open tag\n" . $self->trimstr(${$c});
	0					0
1087							}
1088	3					16	my $dict = $self->parseDict($c, $objnum, $gennum, 'BI\\b\\s*', 'ID\\b');
1089	3					15	$self->unabbrevInlineImage($dict);
1090	3					14	$dict->{value}->{Type} = CAM::PDF::Node->new('label', 'XObject', $objnum, $gennum);
1091	3					13	$dict->{value}->{Subtype} = CAM::PDF::Node->new('label', 'Image', $objnum, $gennum);
1092	3					28	$dict->{value}->{StreamData} = $self->parseStream($c, $objnum, $gennum, $dict->{value},
1093							qr/ \s* /xms, qr/ \s*EI(?!\S) /xms);
1094	3					11	${$c} =~ m/ \G\s+ /cgxms;
	3					10
1095
1096	3					12	return CAM::PDF::Node->new('object', $dict, $objnum, $gennum);
1097							}
1098
1099
1100							=item $doc->writeInlineImage($objectnode)
1101
1102							This is the inverse of parseInlineImage(), intended for use only in
1103							the CAM::PDF::Content class.
1104
1105							=cut
1106
1107							sub writeInlineImage
1108							{
1109	1			1	1	3	my $self = shift;
1110	1					2	my $objnode = shift;
1111
1112							# Make a copy since we are going to trash the image
1113	1					5	my $dictobj = $self->copyObject($objnode)->{value};
1114
1115	1					4	my $dict = $dictobj->{value};
1116	1					4	delete $dict->{Type};
1117	1					26	delete $dict->{Subtype};
1118	1					3	my $stream = $dict->{StreamData}->{value};
1119	1					3	delete $dict->{StreamData};
1120	1					6	$self->abbrevInlineImage($dictobj);
1121
1122	1					5	my $str = $self->writeAny($dictobj);
1123	1					6	$str =~ s/ \A << /BI /xms;
1124	1					6	$str =~ s/ >> \z / ID/xms;
1125	1					4	$str .= "\n" . $stream . "\nEI";
1126	1					14	return $str;
1127							}
1128
1129							=item $doc->parseStream($string, $objnum, $gennum, $dictnode)
1130
1131							This should only be used by parseObj(), or other specialized cases.
1132
1133							Given a fragment of PDF page content, parse it and return a stream
1134							Node. This can be called as a class method in most circumstances, but
1135							is intended as an instance method.
1136
1137							The dictionary Node argument is typically the body of the object Node
1138							that precedes this stream.
1139
1140							=cut
1141
1142							sub parseStream
1143							{
1144	69			69	1	119	my $self = shift;
1145	69					130	my $c = shift;
1146	69					142	my $objnum = shift;
1147	69					93	my $gennum = shift;
1148	69					93	my $dict = shift;
1149
1150	69		66			487	my $begin = shift \|\| qr/ stream[ \t]*\r?\n /xms;
1151	69		66			527	my $end = shift \|\| qr/ \sendstream\s /xms;
1152
1153	69	50				101	if (${$c} !~ m/ \G$begin /cgxms)
	69					739
1154							{
1155	0					0	die "Expected stream open tag\n" . $self->trimstr(${$c});
	0					0
1156							}
1157
1158	69					119	my $stream;
1159
1160	69		66			210	my $l = $dict->{Length} \|\| $dict->{L};
1161	69	100				150	if (!defined $l)
1162							{
1163	2	50				8	if ($begin =~ m/ \Gstream /xms)
1164							{
1165	0					0	die "Missing stream length\n" . $self->trimstr(${$c});
	0					0
1166							}
1167	2	50				3	if (${$c} =~ m/ \G$begin(.*?)$end /cgxms)
	2					61
1168							{
1169	2					7	$stream = $1;
1170	2					10	my $len = length $stream;
1171	2					8	$dict->{Length} = CAM::PDF::Node->new('number', $len, $objnum, $gennum);
1172							}
1173							else
1174							{
1175	0					0	die "Missing stream begin/end\n" . $self->trimstr(${$c});
	0					0
1176							}
1177							}
1178							else
1179							{
1180	67					167	my $length = $self->getValue($l);
1181	67					108	my $pos = pos ${$c};
	67					136
1182	67					88	$stream = substr ${$c}, $pos, $length;
	67					1227
1183	67					99	pos(${$c}) += $length; ## no critic(CodeLayout::ProhibitParensWithBuiltins)
	67					245
1184	67	50				115	if (${$c} !~ m/ \G$end /cgxms)
	67					796
1185							{
1186	0					0	die "Expected endstream\n" . $self->trimstr(${$c});
	0					0
1187							}
1188							}
1189
1190	69	100				207	if (ref $self)
1191							{
1192							# in the rare case of CAM::PDF::Content::_parseInlineImage, this
1193							# may be called as a class method, thus making the above test
1194							# necessary
1195
1196	66	100				189	if ($self->{crypt})
1197							{
1198	50					286	$stream = $self->{crypt}->decrypt($self, $stream, $objnum, $gennum);
1199							}
1200							}
1201
1202	69					269	return CAM::PDF::Node->new('stream', $stream, $objnum, $gennum);
1203							}
1204
1205							=item $doc->parseDict($string)
1206
1207							=item $doc->parseDict($string, $objnum)
1208
1209							=item $doc->parseDict($string, $objnum, $gennum)
1210
1211							Use parseAny() instead of this, if possible.
1212
1213							Given a fragment of PDF page content, parse it and return an dictionary
1214							Node. This can be called as a class method in most circumstances, but
1215							is intended as an instance method.
1216
1217							=cut
1218
1219							sub parseDict
1220							{
1221	350			350	1	491	my $pkg_or_doc = shift;
1222	350					381	my $c = shift;
1223	350					506	my $objnum = shift;
1224	350					426	my $gennum = shift;
1225
1226	350		100			1224	my $begin = shift \|\| '<<\\s*';
1227	350		100			1199	my $end = shift \|\| '>>\\s*';
1228
1229	350					545	my $dict = {};
1230	350	50				387	if (${$c} =~ m/ \G$begin /cgxms)
	350					2266
1231							{
1232	350					413	while (${$c} !~ m/ \G$end /cgxms)
	1847					7819
1233							{
1234							#warn "looking for label:\n" . $pkg_or_doc->trimstr(${$c});
1235	1497					3824	my $keyref = $pkg_or_doc->parseLabel($c, $objnum, $gennum);
1236	1497					2537	my $key = $keyref->{value};
1237							#warn "looking for value:\n" . $pkg_or_doc->trimstr(${$c});
1238	1497					3177	my $value = $pkg_or_doc->parseAny($c, $objnum, $gennum);
1239	1497					6198	$dict->{$key} = $value;
1240							}
1241							}
1242
1243	350					1199	return CAM::PDF::Node->new('dictionary', $dict, $objnum, $gennum);
1244							}
1245
1246							=item $doc->parseArray($string)
1247
1248							=item $doc->parseArray($string, $objnum)
1249
1250							=item $doc->parseArray($string, $objnum, $gennum)
1251
1252							Use parseAny() instead of this, if possible.
1253
1254							Given a fragment of PDF page content, parse it and return an array
1255							Node. This can be called as a class or instance method.
1256
1257							=cut
1258
1259							sub parseArray
1260							{
1261	173			173	1	244	my $pkg_or_doc = shift;
1262	173					229	my $c = shift;
1263	173					201	my $objnum = shift;
1264	173					205	my $gennum = shift;
1265
1266	173					304	my $array = [];
1267	173	50				202	if (${$c} =~ m/ \G\[\s* /cgxms)
	173					605
1268							{
1269	173					212	while (${$c} !~ m/ \G\]\s* /cgxms)
	3283					8561
1270							{
1271							#warn "looking for array value:\n" . $pkg_or_doc->trimstr(${$c});
1272	3110					3082	push @{$array}, $pkg_or_doc->parseAny($c, $objnum, $gennum);
	3110					6641
1273							}
1274							}
1275
1276	173					777	return CAM::PDF::Node->new('array', $array, $objnum, $gennum);
1277							}
1278
1279							=item $doc->parseLabel($string)
1280
1281							=item $doc->parseLabel($string, $objnum)
1282
1283							=item $doc->parseLabel($string, $objnum, $gennum)
1284
1285							Use parseAny() instead of this, if possible.
1286
1287							Given a fragment of PDF page content, parse it and return a label
1288							Node. This can be called as a class or instance method.
1289
1290							=cut
1291
1292							sub parseLabel
1293							{
1294	2605			2605	1	3190	my $pkg_or_doc = shift;
1295	2605					3503	my $c = shift;
1296	2605					2702	my $objnum = shift;
1297	2605					3001	my $gennum = shift;
1298
1299	2605					2444	my $label;
1300	2605	50				2446	if (${$c} =~ m{ \G/([^\s<>/\[\]()]+)\s* }cgxms)
	2605					9079
1301							{
1302	2605					4469	$label = $1;
1303							}
1304							else
1305							{
1306	0					0	die "Expected identifier label:\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1307							}
1308	2605					8395	return CAM::PDF::Node->new('label', $label, $objnum, $gennum);
1309							}
1310
1311							=item $doc->parseRef($string)
1312
1313							=item $doc->parseRef($string, $objnum)
1314
1315							=item $doc->parseRef($string, $objnum, $gennum)
1316
1317							Use parseAny() instead of this, if possible.
1318
1319							Given a fragment of PDF page content, parse it and return a reference
1320							Node. This can be called as a class or instance method.
1321
1322							=cut
1323
1324							sub parseRef
1325							{
1326	412			412	1	555	my $pkg_or_doc = shift;
1327	412					427	my $c = shift;
1328	412					430	my $objnum = shift;
1329	412					499	my $gennum = shift;
1330
1331	412					393	my $newobjnum;
1332	412	50				405	if (${$c} =~ m/ \G(\d+)\s+\d+\s+R\s* /cgxms)
	412					1628
1333							{
1334	412					777	$newobjnum = int $1;
1335							}
1336							else
1337							{
1338	0					0	die "Expected object reference\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1339							}
1340	412					1236	return CAM::PDF::Node->new('reference', $newobjnum, $objnum, $gennum);
1341							}
1342
1343							=item $doc->parseNum($string)
1344
1345							=item $doc->parseNum($string, $objnum)
1346
1347							=item $doc->parseNum($string, $objnum, $gennum)
1348
1349							Use parseAny() instead of this, if possible.
1350
1351							Given a fragment of PDF page content, parse it and return a number
1352							Node. This can be called as a class or instance method.
1353
1354							=cut
1355
1356							sub parseNum
1357							{
1358	17935			17935	1	20939	my $pkg_or_doc = shift;
1359	17935					18237	my $c = shift;
1360	17935					20777	my $objnum = shift;
1361	17935					19793	my $gennum = shift;
1362
1363	17935					16909	my $value;
1364	17935	50				16699	if (${$c} =~ m/ \G([\d.+-]+)\s* /cgxms)
	17935					58178
1365							{
1366	17935					31483	$value = $1;
1367							}
1368							else
1369							{
1370	0					0	die "Expected numerical constant\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1371							}
1372	17935					58758	return CAM::PDF::Node->new('number', $value, $objnum, $gennum);
1373							}
1374
1375							=item $doc->parseString($string)
1376
1377							=item $doc->parseString($string, $objnum)
1378
1379							=item $doc->parseString($string, $objnum, $gennum)
1380
1381							Use parseAny() instead of this, if possible.
1382
1383							Given a fragment of PDF page content, parse it and return a string
1384							Node. This can be called as a class or instance method.
1385
1386							=cut
1387
1388							sub parseString
1389							{
1390	1709			1709	1	13150	my $pkg_or_doc = shift;
1391	1709					1895	my $c = shift;
1392	1709					2200	my $objnum = shift;
1393	1709					2160	my $gennum = shift;
1394
1395	1709					2283	my $value = q{};
1396	1709	50				1717	if (${$c} =~ m/ \G [(] /cgxms)
	1709					5091
1397							{
1398							# TODO: use Text::Balanced or Regexp::Common from CPAN??
1399
1400	1709					2432	my $depth = 1;
1401	1709					3500	while ($depth > 0)
1402							{
1403	1744	50				1694	if (${$c} =~ m/ \G ([^()]*) ([()]) /cgxms)
	1744					5229
1404							{
1405	1744					3376	my $string = $1;
1406	1744					4605	my $delim = $2;
1407	1744					2500	$value .= $string;
1408
1409							# Make sure this is not an escaped paren, OR an real paren
1410							# preceded by an escaped backslash!
1411	1744	100	100			11922	if ($string =~ m/ (\\+) \z/xms && 1 == (length $1) % 2)
		100
		100
1412							{
1413	33					102	$value .= $delim;
1414							}
1415							elsif ($delim eq '(')
1416							{
1417	1					3	$value .= $delim;
1418	1					3	$depth++;
1419							}
1420							elsif(--$depth > 0)
1421							{
1422	1					3	$value .= $delim;
1423							}
1424							}
1425							else
1426							{
1427	0					0	die "Expected string closing\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1428							}
1429							}
1430	1709					1725	${$c} =~ m/ \G\s* /cgxms;
	1709					4093
1431							}
1432							else
1433							{
1434	0					0	die "Expected string opener\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1435							}
1436
1437							# Unescape slash-escaped characters. Treat \\ specially.
1438	1709					6232	my @parts = split /\\\\\|\\134/xms, $value, -1;
1439	1709					3729	for (@parts)
1440							{
1441							# concatenate continued lines
1442	1745					2481	s/ \\\r?\n //gxms;
1443	1745					1968	s/ \\\r //gxms;
1444
1445							# special characters
1446	1745					1809	s/ \\n /\n/gxms;
1447	1745					1878	s/ \\r /\r/gxms;
1448	1745					1776	s/ \\t /\t/gxms;
1449	1745					2103	s/ \\f /\f/gxms;
1450	1745					1742	s/ \\b /\x{8}/gxms;
1451
1452							# octal numbers
1453	1745					2173	s/ \\(\d{1,3}) /chr oct $1/gexms;
	92					585
1454
1455							# Ignore all other slashes (i.e. following characters are treated literally)
1456	1745					4309	s/ \\ //gxms;
1457							}
1458	1709					3657	$value = join q{\\}, @parts;
1459
1460	1709	100				3622	if (ref $pkg_or_doc)
1461							{
1462	99					139	my $self = $pkg_or_doc;
1463	99	100				318	if ($self->{crypt})
1464							{
1465	31					144	$value = $self->{crypt}->decrypt($self, $value, $objnum, $gennum);
1466							}
1467							}
1468	1709					6098	return CAM::PDF::Node->new('string', $value, $objnum, $gennum);
1469							}
1470
1471							=item $doc->parseHexString($string)
1472
1473							=item $doc->parseHexString($string, $objnum)
1474
1475							=item $doc->parseHexString($string, $objnum, $gennum)
1476
1477							Use parseAny() instead of this, if possible.
1478
1479							Given a fragment of PDF page content, parse it and return a hex string
1480							Node. This can be called as a class or instance method.
1481
1482							=cut
1483
1484							sub parseHexString
1485							{
1486	70			70	1	1750	my $pkg_or_doc = shift;
1487	70					95	my $c = shift;
1488	70					107	my $objnum = shift;
1489	70					117	my $gennum = shift;
1490
1491	70					113	my $str = q{};
1492	70	50				93	if (${$c} =~ m/ \G<([\da-fA-F\s])>\s /cgxms)
	70					3539
1493							{
1494	70					359	$str = $1;
1495	70					1097	$str =~ s/\s+//gxms;
1496	70					106	my $len = length $str;
1497	70	100				216	if ($len % 2 == 1)
1498							{
1499	1					3	$str .= '0';
1500							}
1501	70					792	$str = pack 'H*', $str;
1502							}
1503							else
1504							{
1505	0					0	die "Expected hex string\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1506							}
1507
1508	70	100				191	if (ref $pkg_or_doc)
1509							{
1510	64					101	my $self = $pkg_or_doc;
1511	64	100				208	if ($self->{crypt})
1512							{
1513	6					65	$str = $self->{crypt}->decrypt($self, $str, $objnum, $gennum);
1514							}
1515							}
1516	70					256	return CAM::PDF::Node->new('hexstring', $str, $objnum, $gennum);
1517							}
1518
1519							=item $doc->parseBoolean($string)
1520
1521							=item $doc->parseBoolean($string, $objnum)
1522
1523							=item $doc->parseBoolean($string, $objnum, $gennum)
1524
1525							Use parseAny() instead of this, if possible.
1526
1527							Given a fragment of PDF page content, parse it and return a boolean
1528							Node. This can be called as a class or instance method.
1529
1530							=cut
1531
1532							sub parseBoolean
1533							{
1534	21			21	1	544	my $pkg_or_doc = shift;
1535	21					29	my $c = shift;
1536	21					32	my $objnum = shift;
1537	21					35	my $gennum = shift;
1538
1539	21					34	my $val = q{};
1540	21	50				28	if (${$c} =~ m/ \G(true\|false)\s* /cgxmsi)
	21					111
1541							{
1542	21					58	$val = lc $1;
1543							}
1544							else
1545							{
1546	0					0	die "Expected boolean true or false keyword\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1547							}
1548
1549	21					76	return CAM::PDF::Node->new('boolean', $val, $objnum, $gennum);
1550							}
1551
1552							=item $doc->parseNull($string)
1553
1554							=item $doc->parseNull($string, $objnum)
1555
1556							=item $doc->parseNull($string, $objnum, $gennum)
1557
1558							Use parseAny() instead of this, if possible.
1559
1560							Given a fragment of PDF page content, parse it and return a null
1561							Node. This can be called as a class or instance method.
1562
1563							=cut
1564
1565							sub parseNull
1566							{
1567	1			1	1	2	my $pkg_or_doc = shift;
1568	1					3	my $c = shift;
1569	1					1	my $objnum = shift;
1570	1					3	my $gennum = shift;
1571
1572	1					2	my $val = q{};
1573	1	50				1	if (${$c} =~ m/ \Gnull\s* /cgxmsi)
	1					6
1574							{
1575	1					3	$val = undef;
1576							}
1577							else
1578							{
1579	0					0	die "Expected null keyword\n" . $pkg_or_doc->trimstr(${$c});
	0					0
1580							}
1581
1582	1					5	return CAM::PDF::Node->new('null', $val, $objnum, $gennum);
1583							}
1584
1585							=item $doc->parseAny($string)
1586
1587							=item $doc->parseAny($string, $objnum)
1588
1589							=item $doc->parseAny($string, $objnum, $gennum)
1590
1591							Given a fragment of PDF page content, parse it and return a Node of
1592							the appropriate type. This can be called as a class or instance
1593							method.
1594
1595							=cut
1596
1597							sub parseAny
1598							{
1599	21728			21728	1	29256	my $p = shift; # pkg or doc
1600	21728					22782	my $c = shift;
1601	21728					32949	my $objnum = shift;
1602	21728					22111	my $gennum = shift;
1603
1604	21728					225209	return ${$c} =~ m/ \G \d+\s+\d+\s+R\b /xms ? $p->parseRef( $c, $objnum, $gennum)
	21316					44340
1605	20208					35795	: ${$c} =~ m{ \G / }xms ? $p->parseLabel( $c, $objnum, $gennum)
1606	19885					35148	: ${$c} =~ m/ \G << /xms ? $p->parseDict( $c, $objnum, $gennum)
1607	19712					37903	: ${$c} =~ m/ \G \[ /xms ? $p->parseArray( $c, $objnum, $gennum)
1608	18020					30039	: ${$c} =~ m/ \G [(] /xms ? $p->parseString( $c, $objnum, $gennum)
1609	17953					71748	: ${$c} =~ m/ \G < /xms ? $p->parseHexString($c, $objnum, $gennum)
1610	18					115	: ${$c} =~ m/ \G [\d.+-]+ /xms ? $p->parseNum( $c, $objnum, $gennum)
1611	0					0	: ${$c} =~ m/ \G (true\|false) /ixms ? $p->parseBoolean( $c, $objnum, $gennum)
1612	0					0	: ${$c} =~ m/ \G null /ixms ? $p->parseNull( $c, $objnum, $gennum)
1613	21728	0				23995	: die "Unrecognized type in parseAny:\n" . $p->trimstr(${$c});
		50
		100
		100
		100
		100
		100
		100
		100
1614							}
1615
1616							################################################################################
1617
1618							=back
1619
1620							=head2 Data Accessors
1621
1622							=over
1623
1624							=item $doc->getValue($object)
1625
1626							I
1627
1628							Dereference a data object, return a value. Given an node object
1629							of any kind, returns raw scalar object: hashref, arrayref, string,
1630							number. This function follows all references, and descends into all
1631							objects.
1632
1633							=cut
1634
1635							sub getValue
1636							{
1637	3839			3839	1	5615	my $self = shift;
1638	3839					4450	my $objnode = shift;
1639
1640	3839	100				17902	return if (! ref $objnode);
1641
1642	3835		100			16835	while ($objnode->{type} eq 'reference' \|\| $objnode->{type} eq 'object')
1643							{
1644	3276	100				8433	if ($objnode->{type} eq 'reference')
		50
1645							{
1646	1638					2616	my $objnum = $objnode->{value};
1647	1638					3827	$objnode = $self->dereference($objnum);
1648							}
1649							elsif ($objnode->{type} eq 'object')
1650							{
1651	1638					2651	$objnode = $objnode->{value};
1652							}
1653	3276	50				20455	return if (! ref $objnode);
1654							}
1655
1656	3835					9961	return $objnode->{value};
1657							}
1658
1659							=item $doc->getObjValue($objectnum)
1660
1661							I
1662
1663							Dereference a data object, and return a value. Behaves just like the
1664							getValue() function, but used when all you know is the object number.
1665
1666							=cut
1667
1668							sub getObjValue
1669							{
1670	28			28	1	64	my $self = shift;
1671	28					70	my $objnum = shift;
1672
1673	28					186	return $self->getValue(CAM::PDF::Node->new('reference', $objnum));
1674							}
1675
1676
1677							=item $doc->dereference($objectnum)
1678
1679							=item $doc->dereference($name, $pagenum)
1680
1681							I
1682
1683							Dereference a data object, return a PDF object as a node. This
1684							function makes heavy use of the internal object cache. Most (if not
1685							all) object requests should go through this function.
1686
1687							C<$name> should look something like '/R12'.
1688
1689							=cut
1690
1691							sub dereference
1692							{
1693	4103			4103	1	5915	my $self = shift;
1694	4103					5323	my $key = shift;
1695	4103					4226	my $pagenum = shift; # only used if $key is a named resource
1696
1697	4103	50				10673	if ($key =~ s/ \A\/ //xms) # strip off the leading slash while testing
1698							{
1699							# This is a request for a named object
1700	0					0	$self->_buildNameTable($pagenum);
1701	0					0	$key = $self->{Names}->{$pagenum}->{$key};
1702	0	0				0	return if (!defined $key);
1703							# $key should now point to a 'reference' object
1704	0	0				0	if ((ref $key) ne 'CAM::PDF::Node')
1705							{
1706	0					0	die "Assertion failed: key is a reference object in dereference\n";
1707							}
1708	0					0	$key = $key->{value};
1709							}
1710
1711	4103					4808	$key = int $key;
1712	4103	100				19630	if (!exists $self->{objcache}->{$key})
1713							{
1714							#print "Filling cache for obj \#$key...\n";
1715
1716	293					651	my $pos = $self->{xref}->{$key};
1717
1718	293	50				548	if (!$pos)
1719							{
1720	0					0	warn "Bad request for object $key at position 0 in the file\n";
1721	0					0	return;
1722							}
1723
1724	293					298	my $content_fragment;
1725	293	100				535	if (ref $pos)
1726							{
1727	44					374	$content_fragment = substr $pos->{objstream}->{stream}, $pos->{start}, $pos->{end};
1728	44					151	$content_fragment = "$key 0 obj\n$content_fragment\nendobj\n";
1729							}
1730							else
1731							{
1732							# This is fastest and safest
1733	249	50				661	if (!exists $self->{endxref})
1734							{
1735	0					0	$self->_buildendxref();
1736							}
1737	249					565	my $endpos = $self->{endxref}->{$key};
1738	249	50	33			1066	if (!defined $endpos \|\| $endpos < $pos)
1739							{
1740							# really slow, but a totally safe fallback
1741	0					0	$endpos = $self->{contentlength};
1742							}
1743
1744	249					6332	$content_fragment = substr $self->{content}, $pos, $endpos - $pos;
1745							}
1746	293					901	$self->{objcache}->{$key} = $self->parseObj(\$content_fragment, $key);
1747							}
1748
1749	4103					14175	return $self->{objcache}->{$key};
1750							}
1751
1752
1753							=item $doc->getPropertyNames($pagenum)
1754
1755							=item $doc->getProperty($pagenum, $propertyname)
1756
1757							Each PDF page contains a list of resources that it uses (images,
1758							fonts, etc). getPropertyNames() returns an array of the names of
1759							those resources. getProperty() returns a node representing a
1760							named property (most likely a reference node).
1761
1762							=cut
1763
1764							sub getPropertyNames
1765							{
1766	0			0	1	0	my $self = shift;
1767	0					0	my $pagenum = shift;
1768
1769	0					0	$self->_buildNameTable($pagenum);
1770	0					0	my $props = $self->{Names}->{$pagenum};
1771	0	0				0	return if (!defined $props);
1772	0					0	return keys %{$props};
	0					0
1773							}
1774							sub getProperty
1775							{
1776	0			0	1	0	my $self = shift;
1777	0					0	my $pagenum = shift;
1778	0					0	my $name = shift;
1779
1780	0					0	$self->_buildNameTable($pagenum);
1781	0					0	my $props = $self->{Names}->{$pagenum};
1782	0	0				0	return if (!defined $props);
1783	0	0				0	return if (!defined $name);
1784	0					0	return $props->{$name};
1785							}
1786
1787							=item $doc->getFont($pagenum, $fontname)
1788
1789							I
1790
1791							Returns a dictionary for a given font identified by its label,
1792							referenced by page.
1793
1794							=cut
1795
1796							sub getFont
1797							{
1798	0			0	1	0	my $self = shift;
1799	0					0	my $pagenum = shift;
1800	0					0	my $fontname = shift;
1801
1802	0					0	$fontname =~ s/ \A\/? /\//xms; # add leading slash if needed
1803	0					0	my $objnode = $self->dereference($fontname, $pagenum);
1804	0	0				0	return if (!$objnode);
1805
1806	0					0	my $dict = $self->getValue($objnode);
1807	0	0	0			0	if ($dict && $dict->{Type} && $dict->{Type}->{value} eq 'Font')
			0
1808							{
1809	0					0	return $dict;
1810							}
1811							else
1812							{
1813	0					0	return;
1814							}
1815							}
1816
1817							=item $doc->getFontNames($pagenum)
1818
1819							I
1820
1821							Returns a list of fonts for a given page.
1822
1823							=cut
1824
1825							sub getFontNames
1826							{
1827	0			0	1	0	my $self = shift;
1828	0					0	my $pagenum = shift;
1829
1830	0					0	$self->_buildNameTable($pagenum);
1831	0					0	my $list = $self->{Names}->{$pagenum};
1832	0					0	my @names;
1833	0	0				0	if ($list)
1834							{
1835	0					0	for my $key (keys %{$list})
	0					0
1836							{
1837	0					0	my $dict = $self->getValue($list->{$key});
1838	0	0	0			0	if ($dict && $dict->{Type} && $dict->{Type}->{value} eq 'Font')
			0
1839							{
1840	0					0	push @names, $key;
1841							}
1842							}
1843							}
1844	0					0	return @names;
1845							}
1846
1847
1848							=item $doc->getFonts($pagenum)
1849
1850							I
1851
1852							Returns an array of font objects for a given page.
1853
1854							=cut
1855
1856							sub getFonts
1857							{
1858	0			0	1	0	my $self = shift;
1859	0					0	my $pagenum = shift;
1860
1861	0					0	$self->_buildNameTable($pagenum);
1862	0					0	my $list = $self->{Names}->{$pagenum};
1863	0					0	my @fonts;
1864	0	0				0	if ($list)
1865							{
1866	0					0	for my $key (keys %{$list})
	0					0
1867							{
1868	0					0	my $dict = $self->getValue($list->{$key});
1869	0	0	0			0	if ($dict && $dict->{Type} && $dict->{Type}->{value} eq 'Font')
			0
1870							{
1871	0					0	push @fonts, $dict;
1872							}
1873							}
1874							}
1875	0					0	return @fonts;
1876							}
1877
1878							=item $doc->getFontByBaseName($pagenum, $fontname)
1879
1880							I
1881
1882							Returns a dictionary for a given font, referenced by page and the name
1883							of the base font.
1884
1885							=cut
1886
1887							sub getFontByBaseName
1888							{
1889	0			0	1	0	my $self = shift;
1890	0					0	my $pagenum = shift;
1891	0					0	my $fontname = shift;
1892
1893	0					0	$self->_buildNameTable($pagenum);
1894	0					0	my $list = $self->{Names}->{$pagenum};
1895	0					0	for my $key (keys %{$list})
	0					0
1896							{
1897	0					0	my $num = $list->{$key}->{value};
1898	0					0	my $objnode = $self->dereference($num);
1899	0					0	my $dict = $self->getValue($objnode);
1900	0	0	0			0	if ($dict &&
			0
			0
			0
1901							$dict->{Type} && $dict->{Type}->{value} eq 'Font' &&
1902							$dict->{BaseFont} && $dict->{BaseFont}->{value} eq $fontname)
1903							{
1904	0					0	return $dict;
1905							}
1906							}
1907	0					0	return;
1908							}
1909
1910							=item $doc->getFontMetrics($properties $fontname)
1911
1912							I
1913
1914							Returns a data structure representing the font metrics for the named
1915							font. The property list is the results of something like the
1916							following:
1917
1918							$self->_buildNameTable($pagenum);
1919							my $properties = $self->{Names}->{$pagenum};
1920
1921							Alternatively, if you know the page number, it might be easier to do:
1922
1923							my $font = $self->dereference($fontlabel, $pagenum);
1924							my $fontmetrics = $font->{value}->{value};
1925
1926							where the C<$fontlabel> is something like '/Helv'. The getFontMetrics()
1927							method is useful in the cases where you've forgotten which page number
1928							you are working on (e.g. in CAM::PDF::GS), or if your property list
1929							isn't part of any page (e.g. working with form field annotation
1930							objects).
1931
1932							=cut
1933
1934							sub getFontMetrics
1935							{
1936	164			164	1	318	my $self = shift;
1937	164					262	my $props = shift;
1938	164					303	my $fontname = shift;
1939
1940	164					247	my $fontmetrics;
1941
1942							#print STDERR "looking for font $fontname\n";
1943
1944							# Sometimes we are passed just the object list, sometimes the whole
1945							# properties data structure
1946	164	50				575	if ($props->{Font})
1947							{
1948	0					0	$props = $self->getValue($props->{Font});
1949							}
1950
1951	164	50				693	if ($props->{$fontname})
1952							{
1953	164					631	my $fontdict = $self->getValue($props->{$fontname});
1954	164	50	33			1822	if ($fontdict && $fontdict->{Type} && $fontdict->{Type}->{value} eq 'Font')
			33
1955							{
1956	164					304	$fontmetrics = $fontdict;
1957							#print STDERR "Got font\n";
1958							}
1959							else
1960							{
1961							#print STDERR "Almost got font\n";
1962							}
1963							}
1964							else
1965							{
1966							#print STDERR "No font with that name in the dict\n";
1967							}
1968							#print STDERR "Failed to get font\n" if (!$fontmetrics);
1969	164					605	return $fontmetrics;
1970							}
1971
1972							=item $doc->addFont($pagenum, $fontname, $fontlabel)
1973
1974							=item $doc->addFont($pagenum, $fontname, $fontlabel, $fontmetrics)
1975
1976							Adds a reference to the specified font to the page.
1977
1978							If a font metrics hash is supplied (it is required for a font other
1979							than the 14 core fonts), then it is cloned and inserted into the new
1980							font structure. Note that if those font metrics contain references
1981							(e.g. to the C), the referred objects are not copied --
1982							you must do that part yourself.
1983
1984							For Type1 fonts, the font metrics must minimally contain the following
1985							fields: C, C, C, C,
1986							C.
1987
1988							=cut
1989
1990							sub addFont
1991							{
1992	0			0	1	0	my $self = shift;
1993	0					0	my $pagenum = shift;
1994	0					0	my $name = shift;
1995	0					0	my $label = shift;
1996	0					0	my $fontmetrics = shift; # optional
1997
1998							# Check if this font already exists
1999	0					0	my $page = $self->getPage($pagenum);
2000	0	0				0	if (exists $page->{Resources})
2001							{
2002	0					0	my $r = $self->getValue($page->{Resources});
2003	0	0				0	if (exists $r->{Font})
2004							{
2005	0					0	my $f = $self->getValue($r->{Font});
2006	0	0				0	if (exists $f->{$label})
2007							{
2008							# Font already exists. Skip.
2009	0					0	return $self;
2010							}
2011							}
2012							}
2013
2014							# Build the font
2015	0					0	my $dict = CAM::PDF::Node->new('dictionary',
2016							{
2017							Type => CAM::PDF::Node->new('label', 'Font'),
2018							Name => CAM::PDF::Node->new('label', $label),
2019							BaseFont => CAM::PDF::Node->new('label', $name),
2020							},
2021							);
2022	0	0				0	if ($fontmetrics)
2023							{
2024	0					0	my $copy = $self->copyObject($fontmetrics);
2025	0					0	for my $key (keys %{$copy})
	0					0
2026							{
2027	0	0				0	if (!$dict->{value}->{$key})
2028							{
2029	0					0	$dict->{value}->{$key} = $copy->{$key};
2030							}
2031							}
2032							}
2033							else
2034							{
2035	0					0	$dict->{value}->{Subtype} = CAM::PDF::Node->new('label', 'Type1');
2036							}
2037
2038							# Add the font to the document
2039	0					0	my $fontobjnum = $self->appendObject(undef, CAM::PDF::Node->new('object', $dict), 0);
2040
2041							# Add the font to the page
2042	0					0	my ($objnum,$gennum) = $self->getPageObjnum($pagenum);
2043	0	0				0	if (!exists $page->{Resources})
2044							{
2045	0					0	$page->{Resources} = CAM::PDF::Node->new('dictionary', {}, $objnum, $gennum);
2046							}
2047	0					0	my $r = $self->getValue($page->{Resources});
2048	0	0				0	if (!exists $r->{Font})
2049							{
2050	0					0	$r->{Font} = CAM::PDF::Node->new('dictionary', {}, $objnum, $gennum);
2051							}
2052	0					0	my $f = $self->getValue($r->{Font});
2053	0					0	$f->{$label} = CAM::PDF::Node->new('reference', $fontobjnum, $objnum, $gennum);
2054
2055	0					0	delete $self->{Names}->{$pagenum}; # decache
2056	0					0	$self->{changes}->{$objnum} = 1;
2057	0					0	return $self;
2058							}
2059
2060							=item $doc->deEmbedFont($pagenum, $fontname)
2061
2062							=item $doc->deEmbedFont($pagenum, $fontname, $basefont)
2063
2064							Removes embedded font data, leaving font reference intact. Returns
2065							true if the font exists and 1) font is not embedded or 2) embedded
2066							data was successfully discarded. Returns false if the font does not
2067							exist, or the embedded data could not be discarded.
2068
2069							The optional C<$basefont> parameter allows you to change the font. This
2070							is useful when some applications embed a standard font (see below) and
2071							give it a funny name, like C. In this example, it's
2072							important to change the basename back to the standard C when
2073							de-embedding.
2074
2075							De-embedding the font does NOT remove it from the PDF document, it
2076							just removes references to it. To get a size reduction by throwing
2077							away unused font data, you should use the following code sometime
2078							after this method.
2079
2080							$self->cleanse();
2081
2082							For reference, the standard fonts are C, C, and
2083							C (and their bold, italic and bold-italic forms) plus C and
2084							C. (Adobe PDF Reference v1.4, p.319)
2085
2086							=cut
2087
2088							sub deEmbedFont
2089							{
2090	0			0	1	0	my $self = shift;
2091	0					0	my $pagenum = shift;
2092	0					0	my $fontname = shift;
2093	0					0	my $basefont = shift;
2094
2095	0					0	my $success;
2096	0					0	my $font = $self->getFont($pagenum, $fontname);
2097	0	0				0	if ($font)
2098							{
2099	0					0	$self->_deEmbedFontObj($font, $basefont);
2100	0					0	$success = $self;
2101							}
2102							else
2103							{
2104	0					0	$success = undef;
2105							}
2106	0					0	return $success;
2107							}
2108
2109							=item $doc->deEmbedFontByBaseName($pagenum, $fontname)
2110
2111							=item $doc->deEmbedFontByBaseName($pagenum, $fontname, $basefont)
2112
2113							Just like deEmbedFont(), except that the font name parameter refers to
2114							the name of the current base font instead of the PDF label for the
2115							font.
2116
2117							=cut
2118
2119							sub deEmbedFontByBaseName
2120							{
2121	0			0	1	0	my $self = shift;
2122	0					0	my $pagenum = shift;
2123	0					0	my $fontname = shift;
2124	0					0	my $basefont = shift;
2125
2126	0					0	my $success;
2127	0					0	my $font = $self->getFontByBaseName($pagenum, $fontname);
2128	0	0				0	if ($font)
2129							{
2130	0					0	$self->_deEmbedFontObj($font, $basefont);
2131	0					0	$success = $self;
2132							}
2133							else
2134							{
2135	0					0	$success = undef;
2136							}
2137	0					0	return $success;
2138							}
2139
2140							sub _deEmbedFontObj
2141							{
2142	0			0		0	my $self = shift;
2143	0					0	my $font = shift;
2144	0					0	my $basefont = shift;
2145
2146	0	0				0	if ($basefont)
2147							{
2148	0					0	$font->{BaseFont} = CAM::PDF::Node->new('label', $basefont);
2149							}
2150	0					0	delete $font->{FontDescriptor};
2151	0					0	delete $font->{Widths};
2152	0					0	delete $font->{FirstChar};
2153	0					0	delete $font->{LastChar};
2154	0					0	$self->{changes}->{$font->{Type}->{objnum}} = 1;
2155	0					0	return;
2156							}
2157
2158							=item $doc->wrapString($string, $width, $fontsize, $fontmetrics)
2159
2160							=item $doc->wrapString($string, $width, $fontsize, $pagenum, $fontlabel)
2161
2162							Returns an array of strings wrapped to the specified width.
2163
2164							=cut
2165
2166							sub wrapString ## no critic (Unpack)
2167							{
2168	0			0	1	0	my $self = shift;
2169	0					0	my $string = shift;
2170	0					0	my $width = shift;
2171	0					0	my $size = shift;
2172
2173	0					0	my $fm;
2174	0	0	0			0	if (defined $_[0] && ref $_[0])
2175							{
2176	0					0	$fm = shift;
2177							}
2178							else
2179							{
2180	0					0	my $pagenum = shift;
2181	0					0	my $fontlabel = shift;
2182	0					0	$fm = $self->getFont($pagenum, $fontlabel);
2183							}
2184
2185	0					0	$string =~ s/ \r\n /\n/gxms;
2186							# no split limit, so trailing null strings are omitted
2187	0					0	my @strings = split /[\r\n]/xms, $string;
2188	0					0	my @out;
2189	0					0	$width /= $size;
2190							#print STDERR 'wrapping '.join('\|',@strings)."\n";
2191	0					0	for my $s (@strings)
2192							{
2193	0					0	$s =~ s/ \s+\z //xms;
2194	0					0	my $w = $self->getStringWidth($fm, $s);
2195	0	0				0	if ($w <= $width)
2196							{
2197	0					0	push @out, $s;
2198							}
2199							else
2200							{
2201	0					0	my $cur;
2202	0	0				0	if ($s =~ s/ \A(\s*) //xms)
2203							{
2204	0					0	$cur = $1;
2205							}
2206	0	0				0	my $curw = $cur eq q{} ? 0 : $self->getStringWidth($fm, $cur);
2207	0					0	while ($s)
2208							{
2209	0					0	my ($sp,$wd);
2210	0	0				0	if ($s =~ s/ \A(\s)(\S) //xms)
2211							{
2212	0					0	($sp,$wd) = ($1,$2);
2213							}
2214	0	0				0	my $wwd = $wd eq q{} ? 0 : $self->getStringWidth($fm, $wd);
2215	0	0				0	if ($curw == 0)
2216							{
2217	0					0	$cur = $wd;
2218	0					0	$curw = $wwd;
2219							}
2220							else
2221							{
2222	0	0				0	my $wsp = $sp eq q{} ? 0 : $self->getStringWidth($fm, $sp);
2223	0	0				0	if ($curw + $wsp + $wwd <= $width)
2224							{
2225	0					0	$cur .= $sp . $wd;
2226	0					0	$curw += $wsp + $wwd;
2227							}
2228							else
2229							{
2230	0					0	push @out, $cur;
2231	0					0	$cur = $wd;
2232	0					0	$curw = $wwd;
2233							}
2234							}
2235							}
2236	0	0				0	if (0 < length $cur)
2237							{
2238	0					0	push @out, $cur;
2239							}
2240							}
2241							}
2242							#print STDERR 'wrapped to '.join('\|',@out)."\n";
2243	0					0	return @out;
2244							}
2245
2246							=item $doc->getStringWidth($fontmetrics, $string)
2247
2248							I
2249
2250							Returns the width of the string, using the font metrics if possible.
2251
2252							=cut
2253
2254							sub getStringWidth
2255							{
2256	795			795	1	1289	my $self = shift;
2257	795					1009	my $fontmetrics = shift;
2258	795					1113	my $string = shift;
2259
2260	795	50	33			3877	if (! defined $string \|\| $string eq q{})
2261							{
2262	0					0	return 0;
2263							}
2264
2265	795					1042	my $width = 0;
2266	795	50				1522	if ($fontmetrics)
2267							{
2268	795	100				2335	if ($fontmetrics->{Widths})
		50
2269							{
2270	792					2492	my $firstc = $self->getValue($fontmetrics->{FirstChar});
2271	792					2245	my $lastc = $self->getValue($fontmetrics->{LastChar});
2272	792					2391	my $widths = $self->getValue($fontmetrics->{Widths});
2273	792					1092	my $missing_width; # populate this on demand
2274							CHAR:
2275	792					2764	for my $char (unpack 'C*', $string)
2276							{
2277	3637	50	33			16794	if ($char >= $firstc && $char <= $lastc)
2278							{
2279	3637					7583	$width += $widths->[$char - $firstc]->{value};
2280	3637					6272	next CHAR;
2281							}
2282
2283	0	0				0	if (!defined $missing_width)
2284							{
2285	0	0				0	my $fd = exists $fontmetrics->{FontDescriptor} ?
2286							$self->getValue($fontmetrics->{FontDescriptor}) : undef;
2287	0	0	0			0	$missing_width = $fd && exists $fd->{MissingWidth} ?
2288							$self->getValue($fd->{MissingWidth}) : 0;
2289							}
2290
2291	0					0	$width += $missing_width;
2292							}
2293	792					2050	$width /= 1000.0; # units conversion
2294							}
2295							elsif ($fontmetrics->{BaseFont})
2296							{
2297	3					14	my $fontname = $self->getValue($fontmetrics->{BaseFont});
2298	3	100				13	if (!exists $self->{fontmetrics}->{$fontname})
2299							{
2300	1					2911	require Text::PDF::SFont;
2301	1					25166	require Text::PDF::File;
2302	1					13305	my $pdf = Text::PDF::File->new();
2303	1					198	$self->{fontmetrics}->{$fontname} =
2304							Text::PDF::SFont->new($pdf, $fontname, 'NULL');
2305							}
2306	3	50				183	if ($self->{fontmetrics}->{$fontname})
2307							{
2308	3					19	$width = $self->{fontmetrics}->{$fontname}->width($string);
2309							}
2310							}
2311							else
2312							{
2313	0					0	warn 'Failed to understand this font';
2314							}
2315							}
2316
2317	795	50				3219	if ($width == 0)
2318							{
2319							# HACK!!!
2320							#warn "Using klugy width!\n";
2321	0					0	$width = 0.2 * length $string;
2322							}
2323
2324	795					2522	return $width;
2325							}
2326
2327							=item $doc->numPages()
2328
2329							Returns the number of pages in the PDF document.
2330
2331							=cut
2332
2333							sub numPages
2334							{
2335	96			96	1	5544	my $self = shift;
2336	96					504	return $self->{PageCount};
2337							}
2338
2339							=item $doc->getPage($pagenum)
2340
2341							I
2342
2343							Returns a dictionary for a given numbered page.
2344
2345							=cut
2346
2347							sub getPage
2348							{
2349	96			96	1	158	my $self = shift;
2350	96					122	my $pagenum = shift;
2351
2352	96	50	33			704	if ($pagenum < 1 \|\| $pagenum > $self->{PageCount})
2353							{
2354	0					0	warn "Invalid page number requested: $pagenum\n";
2355	0					0	return;
2356							}
2357
2358	96	100				339	if (!exists $self->{pagecache}->{$pagenum})
2359							{
2360	32					213	my $node = $self->getPagesDict();
2361	32					59	my $nodestart = 1;
2362	32					128	while ($self->getValue($node->{Type}) eq 'Pages')
2363							{
2364	92					402	my $kids = $self->getValue($node->{Kids});
2365	92	50				238	if ((ref $kids) ne 'ARRAY')
2366							{
2367	0					0	die "Error: \@kids is not an array\n";
2368							}
2369	92					124	my $child = 0;
2370	92	100				107	if (@{$kids} == 1)
	92					207
2371							{
2372							# Do the simple case first:
2373	52					68	$child = 0;
2374							# nodestart is unchanged
2375							}
2376							else
2377							{
2378							# search through all kids EXCEPT don't bother looking at
2379							# the last one because that is surely the right one if all
2380							# the others are wrong.
2381
2382	40					62	while ($child < $#{$kids})
	70					208
2383							{
2384							# the first leaf of the kid is the page we want. It
2385							# doesn't matter if the kid is a leaf or a node.
2386	40	100				103	last if ($pagenum == $nodestart);
2387
2388							# Retrieve the dictionary of this child
2389	38					100	my $sub = $self->getValue($kids->[$child]);
2390	38	100				188	if ($sub->{Type}->{value} ne 'Pages')
2391							{
2392							# Its a leaf, and not the right one. Move on.
2393	16					33	$nodestart++;
2394							}
2395							else
2396							{
2397	22					58	my $count = $self->getValue($sub->{Count});
2398
2399							# The page we want is in this kid. Descend.
2400	22	100				76	last if ($nodestart + $count - 1 >= $pagenum);
2401
2402							# Not in this kid. Move on.
2403	14					22	$nodestart += $count;
2404							}
2405	30					47	$child++;
2406							}
2407							}
2408
2409	92					251	$node = $self->getValue($kids->[$child]);
2410	92	50				391	if (! ref $node)
2411							{
2412	0					0	require Data::Dumper;
2413	0					0	cluck Data::Dumper::Dumper($node);
2414							}
2415							}
2416
2417							# Ok, now we've got the right page. Store it.
2418	32					109	$self->{pagecache}->{$pagenum} = $node;
2419							}
2420
2421	96					252	return $self->{pagecache}->{$pagenum};
2422							}
2423
2424							=item $doc->getPageObjnum($pagenum)
2425
2426							I
2427
2428							Return the number of the PDF object in which the specified page occurs.
2429
2430							=cut
2431
2432							sub getPageObjnum
2433							{
2434	40			40	1	70	my $self = shift;
2435	40					70	my $pagenum = shift;
2436
2437	40					3098	my $page = $self->getPage($pagenum);
2438	40	50				106	return if (!$page);
2439	40					54	my ($anyobj) = values %{$page};
	40					138
2440	40	50				99	if (!$anyobj)
2441							{
2442	0					0	die "Internal error: page has no attributes!!!\n";
2443							}
2444	40	100				83	if (wantarray)
2445							{
2446	24					126	return ($anyobj->{objnum}, $anyobj->{gennum});
2447							}
2448							else
2449							{
2450	16					51	return $anyobj->{objnum};
2451							}
2452							}
2453
2454							=item $doc->getPageText($pagenum)
2455
2456							Extracts the text from a PDF page as a string.
2457
2458							=cut
2459
2460							sub getPageText
2461							{
2462	6			6	1	45	my $self = shift;
2463	6					9	my $pagenum = shift;
2464	6					11	my $verbose = shift;
2465
2466	6					23	my $pagetree = $self->getPageContentTree($pagenum, $verbose);
2467	6	50				20	if (!$pagetree)
2468							{
2469	0					0	return;
2470							}
2471
2472	6					1358	require CAM::PDF::PageText;
2473	6					58	return CAM::PDF::PageText->render($pagetree, $verbose);
2474							}
2475
2476							=item $doc->getPageContentTree($pagenum)
2477
2478							Retrieves a parsed page content data structure, or undef if there is a
2479							syntax error or if the page does not exist.
2480
2481							=cut
2482
2483							sub getPageContentTree
2484							{
2485	12			12	1	26	my $self = shift;
2486	12					20	my $pagenum = shift;
2487	12					20	my $verbose = shift;
2488
2489	12					49	my $content = $self->getPageContent($pagenum);
2490	12	50				57	return if (!defined $content);
2491
2492	12					56	$self->_buildNameTable($pagenum);
2493
2494	12					36	my $page = $self->getPage($pagenum);
2495	12					39	my $box = [0, 0, 612, 792];
2496	12	50				45	if ($page->{MediaBox})
2497							{
2498	12					34	my $mediabox = $self->getValue($page->{MediaBox});
2499	12					36	$box->[0] = $self->getValue($mediabox->[0]);
2500	12					35	$box->[1] = $self->getValue($mediabox->[1]);
2501	12					38	$box->[2] = $self->getValue($mediabox->[2]);
2502	12					30	$box->[3] = $self->getValue($mediabox->[3]);
2503							}
2504
2505	12					1445	require CAM::PDF::Content;
2506	12					189	my $tree = CAM::PDF::Content->new($content, {
2507							doc => $self,
2508							properties => $self->{Names}->{$pagenum},
2509							mediabox => $box,
2510							}, $verbose);
2511
2512	12					66	return $tree;
2513							}
2514
2515							=item $doc->getPageContent($pagenum)
2516
2517							Return a string with the layout contents of one page.
2518
2519							=cut
2520
2521							sub getPageContent
2522							{
2523	30			30	1	25552	my $self = shift;
2524	30					55	my $pagenum = shift;
2525
2526	30					208	my $page = $self->getPage($pagenum);
2527	30	50	33			162	if (!$page \|\| !exists $page->{Contents})
2528							{
2529	0					0	return q{};
2530							}
2531
2532	30					96	my $contents = $self->getValue($page->{Contents});
2533
2534	30	50				132	if (! ref $contents)
		50
		0
2535							{
2536	0					0	return $contents;
2537							}
2538							elsif ((ref $contents) eq 'HASH')
2539							{
2540							# doesn't matter if it's not encoded...
2541	30					170	return $self->decodeOne(CAM::PDF::Node->new('dictionary', $contents));
2542							}
2543							elsif ((ref $contents) eq 'ARRAY')
2544							{
2545	0					0	my $stream = q{};
2546	0					0	for my $arrobj (@{$contents})
	0					0
2547							{
2548	0					0	my $streamdata = $self->getValue($arrobj);
2549	0	0				0	if (! ref $streamdata)
		0
2550							{
2551	0					0	$stream .= $streamdata;
2552							}
2553							elsif ((ref $streamdata) eq 'HASH')
2554							{
2555	0					0	$stream .= $self->decodeOne(CAM::PDF::Node->new('dictionary',$streamdata)); # doesn't matter if it's not encoded...
2556							}
2557							else
2558							{
2559	0					0	die "Unexpected content type for page contents\n";
2560							}
2561							}
2562	0					0	return $stream;
2563							}
2564							else
2565							{
2566	0					0	die "Unexpected content type for page contents\n";
2567							}
2568	0					0	return; # should never get here
2569							}
2570
2571							=item $doc->getPageDimensions($pagenum)
2572
2573							Returns an array of C, C, C and C numbers that
2574							define the dimensions of the specified page in points (1/72 inches).
2575							Technically, this is the C dimensions, which explains why
2576							it's possible for C and C to be non-zero, but that's a rare
2577							case.
2578
2579							For example, given a simple 8.5 by 11 inch page, this method will return
2580							C<(0,0,612,792)>.
2581
2582							This method will die() if the specified page number does not exist.
2583
2584							=cut
2585
2586							sub getPageDimensions
2587							{
2588	0			0	1	0	my $self = shift;
2589	0					0	my $pagenum = shift;
2590	0					0	my $pagedict = shift; # only used during recursion
2591
2592	0	0				0	if (!$pagedict)
2593							{
2594	0					0	$pagedict = $self->getPage($pagenum);
2595	0	0				0	if (!$pagedict)
2596							{
2597	0					0	die 'No such page '.$pagenum;
2598							}
2599							}
2600
2601	0	0				0	if (exists $pagedict->{MediaBox})
		0
2602							{
2603	0					0	my $box = $self->getValue($pagedict->{MediaBox});
2604	0					0	return ($self->getValue($box->[0]),
2605							$self->getValue($box->[1]),
2606							$self->getValue($box->[2]),
2607							$self->getValue($box->[3]));
2608							}
2609							elsif (exists $pagedict->{Parent})
2610							{
2611	0					0	return $self->getPageDimensions($pagenum, $self->getValue($pagedict->{Parent}));
2612							}
2613							else
2614							{
2615	0					0	die 'Failed to find the page dimensions';
2616							}
2617	0					0	return; # never gets here
2618							}
2619
2620							=item $doc->getName($object)
2621
2622							I
2623
2624							Given a PDF object reference, return it's name, if it has one. This
2625							is useful for indirect references to images in particular.
2626
2627							=cut
2628
2629							sub getName
2630							{
2631	0			0	1	0	my $self = shift;
2632	0					0	my $objnode = shift;
2633
2634	0	0				0	if ($objnode->{value}->{type} eq 'dictionary')
2635							{
2636	0					0	my $dict = $objnode->{value}->{value};
2637	0	0				0	if (exists $dict->{Name})
2638							{
2639	0					0	return $self->getValue($dict->{Name});
2640							}
2641							}
2642	0					0	return q{};
2643							}
2644
2645							=item $doc->getPrefs()
2646
2647							Return an array of security information for the document:
2648
2649							owner password
2650							user password
2651							print boolean
2652							modify boolean
2653							copy boolean
2654							add boolean
2655
2656							See the PDF reference for the intended use of the latter four booleans.
2657
2658							This module publishes the array indices of these values for your
2659							convenience:
2660
2661							$CAM::PDF::PREF_OPASS
2662							$CAM::PDF::PREF_UPASS
2663							$CAM::PDF::PREF_PRINT
2664							$CAM::PDF::PREF_MODIFY
2665							$CAM::PDF::PREF_COPY
2666							$CAM::PDF::PREF_ADD
2667
2668							So, you can retrieve the value of the Copy boolean via:
2669
2670							my ($canCopy) = ($self->getPrefs())[$CAM::PDF::PREF_COPY];
2671
2672							=cut
2673
2674							sub getPrefs
2675							{
2676	48			48	1	3246	my $self = shift;
2677
2678	48					119	my @p = (1,1,1,1);
2679	48	100				195	if (exists $self->{crypt}->{P})
2680							{
2681	28					134	@p = $self->{crypt}->decode_permissions($self->{crypt}->{P});
2682							}
2683	48					434	return($self->{crypt}->{opass}, $self->{crypt}->{upass}, @p);
2684							}
2685
2686							=item $doc->canPrint()
2687
2688							Return a boolean indicating whether the Print permission is enabled
2689							on the PDF.
2690
2691							=cut
2692
2693							sub canPrint
2694							{
2695	8			8	1	18	my $self = shift;
2696	8					31	return ($self->getPrefs())[$PREF_PRINT];
2697							}
2698
2699							=item $doc->canModify()
2700
2701							Return a boolean indicating whether the Modify permission is enabled
2702							on the PDF.
2703
2704							=cut
2705
2706							sub canModify
2707							{
2708	8			8	1	21	my $self = shift;
2709	8					35	return ($self->getPrefs())[$PREF_MODIFY];
2710							}
2711
2712							=item $doc->canCopy()
2713
2714							Return a boolean indicating whether the Copy permission is enabled
2715							on the PDF.
2716
2717							=cut
2718
2719							sub canCopy
2720							{
2721	8			8	1	18	my $self = shift;
2722	8					27	return ($self->getPrefs())[$PREF_COPY];
2723							}
2724
2725							=item $doc->canAdd()
2726
2727							Return a boolean indicating whether the Add permission is enabled
2728							on the PDF.
2729
2730							=cut
2731
2732							sub canAdd
2733							{
2734	8			8	1	19	my $self = shift;
2735	8					30	return ($self->getPrefs())[$PREF_ADD];
2736							}
2737
2738							=item $doc->getFormFieldList()
2739
2740							Return an array of the names of all of the PDF form fields. The names
2741							are the full hierarchical names constructed as explained in the PDF
2742							reference manual. These names are useful for the fillFormFields()
2743							function.
2744
2745							=cut
2746
2747							sub getFormFieldList
2748							{
2749	0			0	1	0	my $self = shift;
2750	0					0	my $parentname = shift; # very optional
2751
2752	0	0				0	my $prefix = (defined $parentname ? $parentname . q{.} : q{});
2753
2754	0					0	my $kidlist;
2755	0	0	0			0	if (defined $parentname && $parentname ne q{})
2756							{
2757	0					0	my $parent = $self->getFormField($parentname);
2758	0	0				0	return if (!$parent);
2759	0					0	my $dict = $self->getValue($parent);
2760	0	0				0	return if (!exists $dict->{Kids});
2761	0					0	$kidlist = $self->getValue($dict->{Kids});
2762							}
2763							else
2764							{
2765	0					0	my $root = $self->getRootDict()->{AcroForm};
2766	0	0				0	return if (!$root);
2767	0					0	my $parent = $self->getValue($root);
2768	0	0				0	return if (!exists $parent->{Fields});
2769	0					0	$kidlist = $self->getValue($parent->{Fields});
2770							}
2771
2772	0					0	my @list;
2773	0					0	for my $kid (@{$kidlist})
	0					0
2774							{
2775	0	0	0			0	if ((! ref $kid) \|\| (ref $kid) ne 'CAM::PDF::Node' \|\| $kid->{type} ne 'reference')
			0
2776							{
2777	0					0	die "Expected a reference as the form child of '$parentname'\n";
2778							}
2779	0					0	my $objnode = $self->dereference($kid->{value});
2780	0					0	my $dict = $self->getValue($objnode);
2781	0					0	my $name = '(no name)'; # assume the worst
2782	0	0				0	if (exists $dict->{T})
2783							{
2784	0					0	$name = $self->getValue($dict->{T});
2785							}
2786	0					0	$name = $prefix . $name;
2787	0					0	push @list, $name;
2788	0	0				0	if (exists $dict->{TU})
2789							{
2790	0					0	push @list, $prefix . $self->getValue($dict->{TU}) . ' (alternate name)';
2791							}
2792	0					0	$self->{formcache}->{$name} = $objnode;
2793	0					0	my @kidnames = $self->getFormFieldList($name);
2794	0	0				0	if (@kidnames > 0)
2795							{
2796							#push @list, 'descend...';
2797	0					0	push @list, @kidnames;
2798							#push @list, 'ascend...';
2799							}
2800							}
2801	0					0	return @list;
2802							}
2803
2804							=item $doc->getFormField($name)
2805
2806							I
2807
2808							Return the object containing the form field definition for the
2809							specified field name. C<$name> can be either the full name or the
2810							"short/alternate" name.
2811
2812							=cut
2813
2814							sub getFormField
2815							{
2816	0			0	1	0	my $self = shift;
2817	0					0	my $fieldname = shift;
2818
2819	0	0				0	return if (!defined $fieldname);
2820
2821	0	0				0	if (! exists $self->{formcache}->{$fieldname})
2822							{
2823	0					0	my $kidlist;
2824							my $parent;
2825	0	0				0	if ($fieldname =~ m/ [.] /xms)
2826							{
2827	0					0	my $parentname;
2828	0	0				0	if ($fieldname =~ s/ \A(.*)[.]([.]+)\z /$2/xms)
2829							{
2830	0					0	$parentname = $1;
2831							}
2832	0	0				0	return if (!$parentname);
2833	0					0	$parent = $self->getFormField($parentname);
2834	0	0				0	return if (!$parent);
2835	0					0	my $dict = $self->getValue($parent);
2836	0	0				0	return if (!exists $dict->{Kids});
2837	0					0	$kidlist = $self->getValue($dict->{Kids});
2838							}
2839							else
2840							{
2841	0					0	my $root = $self->getRootDict()->{AcroForm};
2842	0	0				0	return if (!$root);
2843	0					0	$parent = $self->dereference($root->{value});
2844	0	0				0	return if (!$parent);
2845	0					0	my $dict = $self->getValue($parent);
2846	0	0				0	return if (!exists $dict->{Fields});
2847	0					0	$kidlist = $self->getValue($dict->{Fields});
2848							}
2849
2850	0					0	$self->{formcache}->{$fieldname} = undef; # assume the worst...
2851	0					0	for my $kid (@{$kidlist})
	0					0
2852							{
2853	0					0	my $objnode = $self->dereference($kid->{value});
2854	0					0	$objnode->{formparent} = $parent;
2855	0					0	my $dict = $self->getValue($objnode);
2856	0	0				0	if (exists $dict->{T})
2857							{
2858	0					0	$self->{formcache}->{$self->getValue($dict->{T})} = $objnode;
2859							}
2860	0	0				0	if (exists $dict->{TU})
2861							{
2862	0					0	$self->{formcache}->{$self->getValue($dict->{TU})} = $objnode;
2863							}
2864							}
2865							}
2866
2867	0					0	return $self->{formcache}->{$fieldname};
2868							}
2869
2870							=item $doc->getFormFieldDict($formfieldobject)
2871
2872							I
2873
2874							Return a hash reference representing the accumulated property list for
2875							a form field, including all of it's inherited properties. This should
2876							be treated as a read-only hash! It ONLY retrieves the properties it
2877							knows about.
2878
2879							=cut
2880
2881							sub getFormFieldDict
2882							{
2883	0			0	1	0	my $self = shift;
2884	0					0	my $field = shift;
2885
2886	0	0				0	return if (!defined $field);
2887
2888	0					0	my $dict = {};
2889	0	0				0	if ($field->{formparent})
2890							{
2891	0					0	$dict = $self->getFormFieldDict($field->{formparent});
2892							}
2893	0					0	my $olddict = $self->getValue($field);
2894
2895	0	0				0	if ($olddict->{DR})
2896							{
2897	0		0			0	$dict->{DR} \|\|= CAM::PDF::Node->new('dictionary', {});
2898	0					0	my $dr = $self->getValue($dict->{DR});
2899	0					0	my $olddr = $self->getValue($olddict->{DR});
2900	0					0	for my $key (keys %{$olddr})
	0					0
2901							{
2902	0	0				0	if ($dr->{$key})
2903							{
2904	0	0				0	if ($key eq 'Font')
2905							{
2906	0					0	my $fonts = $self->getValue($olddr->{$key});
2907	0					0	for my $font (keys %{$fonts})
	0					0
2908							{
2909	0					0	$dr->{$key}->{$font} = $self->copyObject($fonts->{$font});
2910							}
2911							}
2912							else
2913							{
2914	0					0	warn "Unknown resource key '$key' in form field dictionary";
2915							}
2916							}
2917							else
2918							{
2919	0					0	$dr->{$key} = $self->copyObject($olddr->{$key});
2920							}
2921							}
2922							}
2923
2924							# Some properties are simple: inherit means override
2925	0					0	for my $prop (qw(Q DA Ff V FT))
2926							{
2927	0	0				0	if ($olddict->{$prop})
2928							{
2929	0					0	$dict->{$prop} = $self->copyObject($olddict->{$prop});
2930							}
2931							}
2932
2933	0					0	return $dict;
2934							}
2935
2936							################################################################################
2937
2938							=back
2939
2940							=head2 Data/Object Manipulation
2941
2942							=over
2943
2944							=item $doc->setPrefs($ownerpass, $userpass, $print?, $modify?, $copy?, $add?)
2945
2946							Alter the document's security information. Note that modifying these
2947							parameters must be done respecting the intellectual property of the
2948							original document. See Adobe's statement in the introduction of the
2949							reference manual.
2950
2951							B Most PDF readers (Acrobat, Preview.app) only offer
2952							one password field for opening documents. So, if the C<$ownerpass>
2953							and C<$userpass> are different, those applications cannot read the
2954							documents. (Perhaps this is a bug in CAM::PDF?)
2955
2956							Note: any omitted booleans default to false. So, these two are
2957							equivalent:
2958
2959							$doc->setPrefs('password', 'password');
2960							$doc->setPrefs('password', 'password', 0, 0, 0, 0);
2961
2962							=cut
2963
2964							sub setPrefs
2965							{
2966	12			12	1	42	my ($self, @prefs) = @_;
2967
2968	12					86	my $p = $self->{crypt}->encode_permissions(@prefs[2..5]);
2969	12					63	$self->{crypt}->set_passwords($self, @prefs[0..1], $p);
2970	12					52	return;
2971							}
2972
2973							=item $doc->setName($object, $name)
2974
2975							I
2976
2977							Change the name of a PDF object structure.
2978
2979							=cut
2980
2981							sub setName
2982							{
2983	0			0	1	0	my $self = shift;
2984	0					0	my $objnode = shift;
2985	0					0	my $name = shift;
2986
2987	0	0	0			0	if ($name && $objnode->{value}->{type} eq 'dictionary')
2988							{
2989	0					0	$objnode->{value}->{value}->{Name} = CAM::PDF::Node->new('label', $name, $objnode->{objnum}, $objnode->{gennum});
2990	0	0				0	if ($objnode->{objnum})
2991							{
2992	0					0	$self->{changes}->{$objnode->{objnum}} = 1;
2993							}
2994	0					0	return $self;
2995							}
2996	0					0	return;
2997							}
2998
2999							=item $doc->removeName($object)
3000
3001							I
3002
3003							Delete the name of a PDF object structure.
3004
3005							=cut
3006
3007							sub removeName
3008							{
3009	0			0	1	0	my $self = shift;
3010	0					0	my $objnode = shift;
3011
3012	0	0	0			0	if ($objnode->{value}->{type} eq 'dictionary' && exists $objnode->{value}->{value}->{Name})
3013							{
3014	0					0	delete $objnode->{value}->{value}->{Name};
3015	0	0				0	if ($objnode->{objnum})
3016							{
3017	0					0	$self->{changes}->{$objnode->{objnum}} = 1;
3018							}
3019	0					0	return $self;
3020							}
3021	0					0	return;
3022							}
3023
3024
3025							=item $doc->pageAddName($pagenum, $name, $objectnum)
3026
3027							I
3028
3029							Append a named object to the metadata for a given page.
3030
3031							=cut
3032
3033							sub pageAddName
3034							{
3035	0			0	1	0	my $self = shift;
3036	0					0	my $pagenum = shift;
3037	0					0	my $name = shift;
3038	0					0	my $key = shift;
3039
3040	0					0	$self->_buildNameTable($pagenum);
3041	0					0	my $page = $self->getPage($pagenum);
3042	0					0	my ($objnum, $gennum) = $self->getPageObjnum($pagenum);
3043
3044	0	0				0	if (!exists $self->{NameObjects}->{$pagenum})
3045							{
3046	0	0				0	if ($objnum)
3047							{
3048	0					0	$self->{changes}->{$objnum} = 1;
3049							}
3050	0	0				0	if (!exists $page->{Resources})
3051							{
3052	0					0	$page->{Resources} = CAM::PDF::Node->new('dictionary', {}, $objnum, $gennum);
3053							}
3054	0					0	my $r = $self->getValue($page->{Resources});
3055	0	0				0	if (!exists $r->{XObject})
3056							{
3057	0					0	$r->{XObject} = CAM::PDF::Node->new('dictionary', {}, $objnum, $gennum);
3058							}
3059	0					0	$self->{NameObjects}->{$pagenum} = $self->getValue($r->{XObject});
3060							}
3061
3062	0					0	$self->{NameObjects}->{$pagenum}->{$name} = CAM::PDF::Node->new('reference', $key, $objnum, $gennum);
3063	0	0				0	if ($objnum)
3064							{
3065	0					0	$self->{changes}->{$objnum} = 1;
3066							}
3067	0					0	return;
3068							}
3069
3070							=item $doc->setPageContent($pagenum, $content)
3071
3072							=item $doc->setPageContent($pagenum, $tree->toString)
3073
3074							Replace the content of the specified page with a new version. This
3075							function is often used after the getPageContent() function and some
3076							manipulation of the returned string from that function.
3077
3078							If your content is a parsed tree (i.e. the result of
3079							getPageContentTree) then you should serialize it via toString first.
3080
3081							=cut
3082
3083							sub setPageContent
3084							{
3085	4			4	1	10	my $self = shift;
3086	4					7	my $pagenum = shift;
3087	4					7	my $content = shift;
3088
3089							# Note that this could be implemented as
3090							# delete current content
3091							# appendPageContent
3092							# but that would lose the optimization below of reusing the content
3093							# object, where possible
3094
3095	4					14	my $page = $self->getPage($pagenum);
3096
3097	4					23	my $stream = $self->createStreamObject($content, 'FlateDecode');
3098	4	50	33			26	if ($page->{Contents} && $page->{Contents}->{type} eq 'reference')
3099							{
3100	0					0	my $key = $page->{Contents}->{value};
3101	0					0	$self->replaceObject($key, undef, $stream, 0);
3102							}
3103							else
3104							{
3105	4					18	my ($objnum, $gennum) = $self->getPageObjnum($pagenum);
3106	4					20	my $key = $self->appendObject(undef, $stream, 0);
3107	4					20	$page->{Contents} = CAM::PDF::Node->new('reference', $key, $objnum, $gennum);
3108	4					14	$self->{changes}->{$objnum} = 1;
3109							}
3110	4					12	return;
3111							}
3112
3113							=item $doc->appendPageContent($pagenum, $content)
3114
3115							Add more content to the specified page. Note that this function does
3116							NOT do any page metadata work for you (like creating font objects for
3117							any newly defined fonts).
3118
3119							=cut
3120
3121							sub appendPageContent
3122							{
3123	0			0	1	0	my $self = shift;
3124	0					0	my $pagenum = shift;
3125	0					0	my $content = shift;
3126
3127	0					0	my $page = $self->getPage($pagenum);
3128
3129	0					0	my ($objnum, $gennum) = $self->getPageObjnum($pagenum);
3130	0					0	my $stream = $self->createStreamObject($content, 'FlateDecode');
3131	0					0	my $key = $self->appendObject(undef, $stream, 0);
3132	0					0	my $streamref = CAM::PDF::Node->new('reference', $key, $objnum, $gennum);
3133
3134	0	0				0	if (!$page->{Contents})
		0
		0
3135							{
3136	0					0	$page->{Contents} = $streamref;
3137							}
3138							elsif ($page->{Contents}->{type} eq 'array')
3139							{
3140	0					0	push @{$page->{Contents}->{value}}, $streamref;
	0					0
3141							}
3142							elsif ($page->{Contents}->{type} eq 'reference')
3143							{
3144	0					0	$page->{Contents} = CAM::PDF::Node->new('array', [ $page->{Contents}, $streamref ], $objnum, $gennum);
3145							}
3146							else
3147							{
3148	0					0	die "Unsupported Content type \"$page->{Contents}->{type}\" on page $pagenum\n";
3149							}
3150	0					0	$self->{changes}->{$objnum} = 1;
3151	0					0	return;
3152							}
3153
3154							=item $doc->extractPages($pages...)
3155
3156							Remove all pages from the PDF except the specified ones. Like
3157							deletePages(), the pages can be multiple arguments, comma separated
3158							lists, ranges (open or closed).
3159
3160							=cut
3161
3162							sub extractPages ## no critic (Unpack)
3163							{
3164	4			4	1	13	my $self = shift;
3165	4	50				21	return $self if (@_ == 0); # no-work shortcut
3166	4					19	my @pages = $self->rangeToArray(1,$self->numPages(),@_);
3167
3168	4	50				18	if (@pages == 0)
3169							{
3170	0					0	croak 'Tried to delete all the pages';
3171							}
3172
3173	4					12	my %pages = map {$_ => 1} @pages; # eliminate duplicates
	8					26
3174
3175							# make a list that is the complement of the @pages list
3176	4					18	my @delete = grep {!$pages{$_}} 1..$self->numPages();
	24					62
3177
3178	4	50				20	return $self if (@delete == 0); # no-work shortcut
3179	4					24	return $self->_deletePages(@delete);
3180							}
3181
3182							=item $doc->deletePages($pages...)
3183
3184							Remove the specified pages from the PDF. The pages can be multiple
3185							arguments, comma separated lists, ranges (open or closed).
3186
3187							=cut
3188
3189							sub deletePages ## no critic (Unpack)
3190							{
3191	12			12	1	15264	my $self = shift;
3192	12	100				65	return $self if (@_ == 0); # no-work shortcut
3193	4					18	my @pages = $self->rangeToArray(1,$self->numPages(),@_);
3194
3195	4	50				19	return $self if (@pages == 0); # no-work shortcut
3196
3197	4					13	my %pages = map {$_ => 1} @pages; # eliminate duplicates
	4					19
3198
3199	4	50				14	if ($self->numPages() == scalar keys %pages)
3200							{
3201	0					0	croak 'Tried to delete all the pages';
3202							}
3203
3204	4					18	return $self->_deletePages(keys %pages);
3205							}
3206
3207							sub _deletePages
3208							{
3209	8			8		24	my ($self, @pages) = @_;
3210
3211							# Pages should be reverse sorted since we need to delete from the
3212							# end to make the page numbers come out right.
3213	8					18	my @objnums;
3214	8					34	for my $page (reverse sort {$a <=> $b} @pages)
	20					31
3215							{
3216	20					62	my $objnum = $self->_deletePage($page);
3217	20	50				58	if (!$objnum)
3218							{
3219	0					0	$self->_deleteRefsToPages(@objnums); # emergency cleanup to prevent corruption
3220	0					0	return;
3221							}
3222	20					55	push @objnums, $objnum;
3223							}
3224	8					39	$self->_deleteRefsToPages(@objnums);
3225	8					30	$self->cleanse();
3226	8					162	return $self;
3227							}
3228
3229							=item $doc->deletePage($pagenum)
3230
3231							Remove the specified page from the PDF. If the PDF has only one page,
3232							this method will fail.
3233
3234							=cut
3235
3236							sub deletePage
3237							{
3238	0			0	1	0	my $self = shift;
3239	0					0	my $pagenum = shift;
3240
3241	0					0	my $objnum = $self->_deletePage($pagenum);
3242	0	0				0	if ($objnum)
3243							{
3244	0					0	$self->_deleteRefsToPages($objnum);
3245	0					0	$self->cleanse();
3246							}
3247	0	0				0	return $objnum ? $self : ();
3248							}
3249
3250							# Internal method, called by deletePage() or deletePages()
3251							# Returns the objnum of the deleted page
3252
3253							sub _deletePage
3254							{
3255	20			20		33	my $self = shift;
3256	20					30	my $pagenum = shift;
3257
3258	20	50				48	if ($self->numPages() <= 1) # don't delete the last page
3259							{
3260	0					0	croak 'Tried to delete the only page';
3261							}
3262	20					78	my ($objnum, $gennum) = $self->getPageObjnum($pagenum);
3263	20	50				56	if (!defined $objnum)
3264							{
3265	0					0	croak 'Tried to delete a non-existent page';
3266							}
3267
3268	20					72	$self->_deletePage_backPointers($objnum);
3269	20					58	$self->_deletePage_removeFromPageTree($pagenum);
3270
3271							# Removing the page is easy:
3272	20					53	$self->deleteObject($objnum);
3273
3274							# Caches are now bad for all pages from this one
3275	20					67	$self->decachePages($pagenum .. $self->numPages());
3276
3277	20					38	$self->{PageCount}--;
3278
3279	20					50	return $objnum;
3280							}
3281
3282
3283							sub _deletePage_backPointers
3284							{
3285	20			20		26	my $self = shift;
3286	20					29	my $objnum = shift;
3287
3288							# Delete pointer from annotation back to the page
3289	20					52	my $page = $self->dereference($objnum);
3290	20					43	my $pagedict = $page->{value}->{value};
3291	20	100				57	if ($pagedict->{Annots})
3292							{
3293	3					11	my $annots = $self->getValue($pagedict->{Annots});
3294	3	50				17	if ($annots)
3295							{
3296	3					7	for my $annotref (@{$annots})
	3					11
3297							{
3298	3					10	my $annot = $self->getValue($annotref);
3299	3	50				18	if ($annot)
3300							{
3301	3					34	delete $annot->{P};
3302							}
3303							}
3304							}
3305							}
3306	20					34	return;
3307							}
3308
3309							sub _deletePage_removeFromPageTree
3310							{
3311	20			20		30	my $self = shift;
3312	20					30	my $pagenum = shift;
3313
3314							# Removing references to the page is hard:
3315							# (much of this code is lifted from getPage)
3316	20					33	my $parentdict;
3317	20					63	my $node = $self->dereference($self->getRootDict()->{Pages}->{value});
3318	20					49	my $nodedict = $node->{value}->{value};
3319	20					26	my $nodestart = 1;
3320	20		66			131	while ($node && $nodedict->{Type}->{value} eq 'Pages')
3321							{
3322	62					65	my $count;
3323	62	50				147	if ($nodedict->{Count}->{type} eq 'reference')
3324							{
3325	0					0	my $countobj = $self->dereference($nodedict->{Count}->{value});
3326	0					0	$count = $countobj->{value}->{value}--;
3327	0					0	$self->{changes}->{$countobj->{objnum}} = 1;
3328							}
3329							else
3330							{
3331	62					142	$count = $nodedict->{Count}->{value}--;
3332							}
3333	62					156	$self->{changes}->{$node->{objnum}} = 1;
3334
3335	62	100				170	if ($count == 1)
3336							{
3337							# only one left, so this is it
3338	12	50				34	if (!$parentdict)
3339							{
3340	0					0	croak 'Tried to delete the only page';
3341							}
3342	12					31	my $parentkids = $self->getValue($parentdict->{Kids});
3343	12					20	@{$parentkids} = grep {$_->{value} != $node->{objnum}} @{$parentkids};
	12					60
	24					70
	12					23
3344	12					46	$self->{changes}->{$parentdict->{Kids}->{objnum}} = 1;
3345	12					41	$self->deleteObject($node->{objnum});
3346	12					18	last;
3347							}
3348
3349	50					124	my $kids = $self->getValue($nodedict->{Kids});
3350	50	100				62	if (@{$kids} == 1)
	50					106
3351							{
3352							# Count was not 1, so this must not be a leaf node
3353							# hop down into node's child
3354
3355	8					27	my $sub = $self->dereference($kids->[0]->{value});
3356	8					20	my $subdict = $sub->{value}->{value};
3357	8					11	$parentdict = $nodedict;
3358	8					12	$node = $sub;
3359	8					47	$nodedict = $subdict;
3360							}
3361							else
3362							{
3363							# search through all kids
3364	42					51	for my $child (0 .. $#{$kids})
	42					108
3365							{
3366	64					198	my $sub = $self->dereference($kids->[$child]->{value});
3367	64					137	my $subdict = $sub->{value}->{value};
3368
3369	64	100				173	if ($subdict->{Type}->{value} ne 'Pages')
3370							{
3371	16	100				34	if ($pagenum == $nodestart)
3372							{
3373							# Got it!
3374	8					11	splice @{$kids}, $child, 1;
	8					23
3375	8					39	$node = undef; # flag that we are done
3376	8					43	last;
3377							}
3378							else
3379							{
3380							# Its a leaf, and not the right one. Move on.
3381	8					17	$nodestart++;
3382							}
3383							}
3384							else
3385							{
3386							# Type=='Pages' node
3387	48					99	my $child_count = $self->getValue($subdict->{Count});
3388	48	100				127	if ($nodestart + $child_count - 1 >= $pagenum)
3389							{
3390							# The page we want is in this kid. Descend.
3391	34					46	$parentdict = $nodedict;
3392	34					37	$node = $sub;
3393	34					44	$nodedict = $subdict;
3394	34					206	last;
3395							}
3396							else
3397							{
3398							# Not in this kid. Move on.
3399	14					26	$nodestart += $child_count;
3400							}
3401							}
3402	22	50				32	if ($child == $#{$kids})
	22					75
3403							{
3404	0					0	die "Internal error: did not find the page to delete -- corrupted page index\n";
3405							}
3406							}
3407							}
3408							}
3409	20					234	return;
3410							}
3411
3412							sub _deleteRefsToPages
3413							{
3414	8			8		26	my ($self, @objnums) = @_;
3415	8					18	my %objnums = map {$_ => 1} @objnums;
	20					65
3416
3417	8					26	my $root = $self->getRootDict();
3418	8	50				42	if ($root->{Names})
3419							{
3420	0					0	my $names = $self->getValue($root->{Names});
3421	0	0				0	if ($names->{Dests})
3422							{
3423	0					0	my $dests = $self->getValue($names->{Dests});
3424	0	0				0	if ($self->_deleteDests($dests, \%objnums))
3425							{
3426	0					0	delete $names->{Dests};
3427							}
3428							}
3429
3430	0	0				0	if (0 == scalar keys %{$names})
	0					0
3431							{
3432	0					0	my $names_objnum = $root->{Names}->{value};
3433	0					0	$self->deleteObject($names_objnum);
3434	0					0	delete $root->{Names};
3435							}
3436							}
3437
3438	8	100				32	if ($root->{Outlines})
3439							{
3440	2					10	my $outlines = $self->getValue($root->{Outlines});
3441	2					11	$self->_deleteOutlines($outlines, \%objnums);
3442							}
3443	8					22	return;
3444							}
3445
3446							sub _deleteOutlines
3447							{
3448	2			2		5	my $self = shift;
3449	2					4	my $outlines = shift;
3450	2					6	my $objnums = shift;
3451
3452	2					6	my @deletes;
3453	2					5	my @stack = ($outlines);
3454
3455	2					10	while (@stack > 0)
3456							{
3457	2					6	my $node = shift @stack;
3458
3459							# Check for a Destination (aka internal hyperlink)
3460							# A is indirect ref, Dest is direct ref; only one can be present
3461	2					4	my $dest;
3462	2	50				17	if ($node->{A})
		50
3463							{
3464	0					0	$dest = $self->getValue($node->{A});
3465	0					0	$dest = $self->getValue($dest->{D});
3466							}
3467							elsif ($node->{Dest})
3468							{
3469	0					0	$dest = $self->getValue($node->{Dest});
3470							}
3471	2	0	33			12	if ($dest && (ref $dest) && (ref $dest) eq 'ARRAY')
			33
3472							{
3473	0					0	my $ref = $dest->[0];
3474	0	0	0			0	if ($ref && $ref->{type} eq 'reference' && $objnums->{$ref->{value}})
			0
3475							{
3476	0					0	$self->deleteObject($ref->{objnum});
3477							# Easier to just delete both, even though only one may exist
3478	0					0	delete $node->{A};
3479	0					0	delete $node->{Dest};
3480							}
3481							}
3482
3483	2	50				9	if ($node->{Next})
3484							{
3485	0					0	push @stack, $self->getValue($node->{Next});
3486							}
3487	2	50				14	if ($node->{First})
3488							{
3489	0					0	push @stack, $self->getValue($node->{First});
3490							}
3491							}
3492	2					7	return;
3493							}
3494
3495							sub _deleteDests ## no critic(Subroutines::ProhibitExcessComplexity)
3496							{
3497	0			0		0	my $self = shift;
3498	0					0	my $dests = shift;
3499	0					0	my $objnums = shift;
3500
3501							## Accumulate the nodes to delete
3502	0					0	my @deletes;
3503	0					0	my @stack = ([$dests]);
3504
3505	0					0	while (@stack > 0)
3506							{
3507	0					0	my $chain = pop @stack;
3508	0					0	my $node = $chain->[0];
3509	0	0				0	if ($node->{Names})
		0
		0
3510							{
3511	0					0	my $pairs = $self->getValue($node->{Names});
3512	0					0	for (my $i=1; $i<@{$pairs}; $i+=2) ## no critic(ControlStructures::ProhibitCStyleForLoops)
	0					0
3513							{
3514	0					0	push @stack, [$self->getValue($pairs->[$i]), @{$chain}];
	0					0
3515							}
3516							}
3517							elsif ($node->{Kids})
3518							{
3519	0					0	my $list = $self->getValue($node->{Kids});
3520	0					0	push @stack, map {[$self->getValue($_), @{$chain}]} @{$list};
	0					0
	0					0
	0					0
3521							}
3522							elsif ($node->{D})
3523							{
3524	0					0	my $props = $self->getValue($node->{D});
3525	0					0	my $ref = $props->[0];
3526	0	0	0			0	if ($ref && $ref->{type} eq 'reference' && $objnums->{$ref->{value}})
			0
3527							{
3528	0					0	push @deletes, $chain;
3529							}
3530							}
3531							}
3532
3533							## Delete the nodes, and their parents if applicable
3534	0					0	for my $chain (@deletes)
3535							{
3536	0					0	my $objnode = shift @{$chain};
	0					0
3537	0					0	my $objnum = [values %{$objnode}]->[0]->{objnum};
	0					0
3538	0	0				0	if (!$objnum)
3539							{
3540	0					0	die 'Destination object lacks an object number (number '.@{$chain}.' in the chain)';
	0					0
3541							}
3542	0					0	$self->deleteObject($objnum);
3543
3544							# Ascend chain... $objnum gets overwritten
3545
3546	0					0	CHAIN:
3547	0					0	for my $node (@{$chain})
3548							{
3549	0	0				0	last if (exists $node->{deleted}); # internal flag
3550
3551	0		0			0	my $node_objnum = [values %{$node}]->[0]->{objnum} \|\| die;
3552
3553	0	0				0	if ($node->{Names})
		0
3554							{
3555	0					0	my $pairs = $self->getValue($node->{Names});
3556	0					0	my $limits = $self->getValue($node->{Limits});
3557	0					0	my $redo_limits = 0;
3558
3559							# Find and remove child reference
3560							# iterate over keys of key-value array
3561	0					0	for (my $i=@{$pairs}-2; $i>=0; $i-=2) ## no critic(ControlStructures::ProhibitCStyleForLoops)
	0					0
3562							{
3563	0	0				0	if ($pairs->[$i+1]->{value} == $objnum)
3564							{
3565	0		0			0	my $name = $pairs->[$i]->{value} \|\| die 'No name in Name tree';
3566	0					0	splice @{$pairs}, $i, 2;
	0					0
3567	0	0	0			0	if ($limits->[0]->{value} eq $name \|\| $limits->[1]->{value} eq $name)
3568							{
3569	0					0	$redo_limits = 1;
3570							}
3571							}
3572							}
3573
3574	0	0				0	if (@{$pairs} > 0)
	0					0
3575							{
3576	0	0				0	if ($redo_limits)
3577							{
3578	0					0	my @names;
3579	0					0	for (my $i=0; $i<@{$pairs}; $i+=2) ## no critic(ControlStructures::ProhibitCStyleForLoops)
	0					0
3580							{
3581	0					0	push @names, $pairs->[$i]->{value};
3582							}
3583	0					0	@names = sort @names;
3584	0					0	$limits->[0]->{value} = $names[0];
3585	0					0	$limits->[1]->{value} = $names[-1];
3586							}
3587	0					0	last CHAIN;
3588							}
3589							}
3590
3591							elsif ($node->{Kids})
3592							{
3593	0					0	my $list = $self->getValue($node->{Kids});
3594
3595							# Find and remove child reference
3596	0					0	for my $i (reverse 0 .. $#{$list})
	0					0
3597							{
3598	0	0				0	if ($list->[$i]->{value} == $objnum)
3599							{
3600	0					0	splice @{$list}, $i, 1;
	0					0
3601							}
3602							}
3603
3604	0	0				0	if (@{$list} > 0)
	0					0
3605							{
3606	0	0				0	if ($node->{Limits})
3607							{
3608	0					0	my $limits = $self->getValue($node->{Limits});
3609	0	0	0			0	if (!$limits \|\| @{$limits} != 2)
	0					0
3610							{
3611	0					0	die 'Internal error: trouble parsing the Limits array in a name tree';
3612							}
3613	0					0	my @names;
3614	0					0	for my $i (0..$#{$list})
	0					0
3615							{
3616	0					0	my $child = $self->getValue($list->[$i]);
3617	0					0	my $child_limits = $self->getValue($child->{Limits});
3618	0					0	push @names, map {$_->{value}} @{$child_limits};
	0					0
	0					0
3619							}
3620	0					0	@names = sort @names;
3621	0					0	$limits->[0]->{value} = $names[0];
3622	0					0	$limits->[1]->{value} = $names[-1];
3623							}
3624	0					0	last CHAIN;
3625							}
3626							}
3627
3628							else
3629							{
3630	0					0	die 'Internal error: found a parent node with neither Names nor Kids. This should be impossible.';
3631							}
3632
3633							# If we got here, the node is empty, so delete it and move onward
3634	0					0	$self->deleteObject($node_objnum);
3635	0					0	$node->{deleted} = undef; # internal flag
3636
3637							# Prepare for next iteration
3638	0					0	$objnum = $node_objnum;
3639							}
3640							}
3641
3642	0					0	return exists $dests->{deleted};
3643							}
3644
3645							=item $doc->decachePages($pagenum, $pagenum, ...)
3646
3647							Clears cached copies of the specified page data structures. This is
3648							useful if an operation has been performed that changes a page.
3649
3650							=cut
3651
3652							sub decachePages
3653							{
3654	24			24	1	58	my ($self, @pages) = @_;
3655
3656	24					60	for (@pages)
3657							{
3658	44					636	delete $self->{pagecache}->{$_};
3659	44					112	delete $self->{Names}->{$_};
3660	44					115	delete $self->{NameObjects}->{$_};
3661							}
3662	24					52	delete $self->{Names}->{All};
3663	24					48	return $self;
3664							}
3665
3666
3667							=item $doc->addPageResources($pagenum, $resourcehash)
3668
3669							Add the resources from the given object to the page resource
3670							dictionary. If the page does not have a resource dictionary, create
3671							one. This function avoids duplicating resources where feasible.
3672
3673							=cut
3674
3675							sub addPageResources
3676							{
3677	0			0	1	0	my $self = shift;
3678	0					0	my $pagenum = shift;
3679	0					0	my $newrsrcs = shift;
3680
3681	0	0				0	return if (!$newrsrcs);
3682	0					0	my $page = $self->getPage($pagenum);
3683	0	0				0	return if (!$page);
3684
3685	0					0	my ($anyobj) = values %{$page};
	0					0
3686	0					0	my $objnum = $anyobj->{objnum};
3687	0					0	my $gennum = $anyobj->{gennum};
3688
3689	0					0	my $pagersrcs;
3690	0	0				0	if ($page->{Resources})
3691							{
3692	0					0	$pagersrcs = $self->getValue($page->{Resources});
3693							}
3694							else
3695							{
3696	0					0	$pagersrcs = {};
3697	0					0	$page->{Resources} = CAM::PDF::Node->new('dictionary', $pagersrcs, $objnum, $gennum);
3698	0					0	$self->{changes}->{$objnum} = 1;
3699							}
3700	0					0	for my $type (keys %{$newrsrcs})
	0					0
3701							{
3702	0					0	my $new_r = $self->getValue($newrsrcs->{$type});
3703	0					0	my $page_r;
3704	0	0				0	if ($pagersrcs->{$type})
3705							{
3706	0					0	$page_r = $self->getValue($pagersrcs->{$type});
3707							}
3708	0	0				0	if ($type eq 'Font')
		0
		0
3709							{
3710	0	0				0	if (!$page_r)
3711							{
3712	0					0	$page_r = {};
3713	0					0	$pagersrcs->{$type} = CAM::PDF::Node->new('dictionary', $page_r, $objnum, $gennum);
3714	0					0	$self->{changes}->{$objnum} = 1;
3715							}
3716	0					0	for my $font (keys %{$new_r})
	0					0
3717							{
3718	0	0				0	next if (exists $page_r->{$font});
3719	0					0	my $val = $new_r->{$font};
3720	0	0				0	if ($val->{type} ne 'reference')
3721							{
3722	0					0	die 'Internal error: font entry is not a reference';
3723							}
3724	0					0	$page_r->{$font} = CAM::PDF::Node->new('reference', $val->{value}, $objnum, $gennum);
3725	0					0	$self->{changes}->{$objnum} = 1;
3726							}
3727							}
3728							elsif ($type eq 'ProcSet')
3729							{
3730	0	0				0	if (!$page_r)
3731							{
3732	0					0	$page_r = [];
3733	0					0	$pagersrcs->{$type} = CAM::PDF::Node->new('array', $page_r, $objnum, $gennum);
3734	0					0	$self->{changes}->{$objnum} = 1;
3735							}
3736	0					0	for my $proc (@{$new_r})
	0					0
3737							{
3738	0	0				0	if ($proc->{type} ne 'label')
3739							{
3740	0					0	die 'Internal error: procset entry is not a label';
3741							}
3742							{
3743							## no critic(BuiltinFunctions::ProhibitBooleanGrep) -- TODO: use any() instead
3744	0	0				0	next if (grep {$_->{value} eq $proc->{value}} @{$page_r});
	0					0
	0					0
	0					0
3745							}
3746	0					0	push @{$page_r}, CAM::PDF::Node->new('label', $proc->{value}, $objnum, $gennum);
	0					0
3747	0					0	$self->{changes}->{$objnum} = 1;
3748							}
3749							}
3750							elsif ($type eq 'Encoding')
3751							{
3752							# TODO: is this a hack or is it right?
3753							# EXPLICITLY skip /Encoding from form DR entry
3754							}
3755							else
3756							{
3757	0					0	warn "Internal error: unsupported resource type '$type'";
3758							}
3759							}
3760	0					0	return;
3761							}
3762
3763							=item $doc->appendPDF($pdf)
3764
3765							Append pages from another PDF document to this one. No optimization
3766							is done -- the pieces are just appended and the internal table of
3767							contents is updated.
3768
3769							Note that this can break documents with annotations. See the
3770							F script for a workaround.
3771
3772							=cut
3773
3774							sub appendPDF
3775							{
3776	12			12	1	90	my $self = shift;
3777	12					29	my $otherdoc = shift;
3778	12					22	my $prepend = shift; # boolean, default false
3779
3780	12					49	my $pageroot = $self->getPagesDict();
3781	12					26	my ($anyobj) = values %{$pageroot};
	12					40
3782	12					43	my $objnum = $anyobj->{objnum};
3783	12					29	my $gennum = $anyobj->{gennum};
3784
3785	12					36	my $root = $self->getRootDict();
3786	12					36	my $otherroot = $otherdoc->getRootDict();
3787	12					59	my $otherpageobj = $otherdoc->dereference($otherroot->{Pages}->{value});
3788	12					57	my ($key, %refkeys) = $self->appendObject($otherdoc, $otherpageobj->{objnum}, 1);
3789	12					87	my $subpage = $self->getObjValue($key);
3790
3791	12					48	my $newdict = {};
3792	12					57	my $newpage = CAM::PDF::Node->new('object',
3793							CAM::PDF::Node->new('dictionary', $newdict));
3794	12					49	$newdict->{Type} = CAM::PDF::Node->new('label', 'Pages');
3795	12	50				86	$newdict->{Kids} = CAM::PDF::Node->new('array',
		50
3796							[
3797							CAM::PDF::Node->new('reference', $prepend ? $key : $objnum),
3798							CAM::PDF::Node->new('reference', $prepend ? $objnum : $key),
3799							]);
3800	12					55	$self->{PageCount} += $otherdoc->{PageCount};
3801	12					49	$newdict->{Count} = CAM::PDF::Node->new('number', $self->{PageCount});
3802	12					48	my $newpagekey = $self->appendObject(undef, $newpage, 0);
3803	12					54	$root->{Pages}->{value} = $newpagekey;
3804
3805	12					83	$pageroot->{Parent} = CAM::PDF::Node->new('reference', $newpagekey, $key, $subpage->{gennum});
3806	12					66	$subpage->{Parent} = CAM::PDF::Node->new('reference', $newpagekey, $key, $subpage->{gennum});
3807
3808	12	100				66	if ($otherroot->{AcroForm})
3809							{
3810	3					16	my $forms = $otherdoc->getValue($otherdoc->getValue($otherroot->{AcroForm})->{Fields});
3811	3					9	my @newforms;
3812	3					8	for my $reference (@{$forms})
	3					11
3813							{
3814	3	50				18	if ($reference->{type} ne 'reference')
3815							{
3816	0					0	die 'Internal error: expected a reference';
3817							}
3818	3					8	my $newkey = $refkeys{$reference->{value}};
3819	3	50				15	if ($newkey)
3820							{
3821	3					24	push @newforms, CAM::PDF::Node->new('reference', $newkey);
3822							}
3823							}
3824	3	50				17	if ($root->{AcroForm})
3825							{
3826	3					13	my $mainforms = $self->getValue($self->getValue($root->{AcroForm})->{Fields});
3827	3					9	for my $reference (@newforms)
3828							{
3829	3					12	$reference->{objnum} = $mainforms->[0]->{objnum};
3830	3					16	$reference->{gennum} = $mainforms->[0]->{gennum};
3831							}
3832	3					9	push @{$mainforms}, @newforms;
	3					11
3833							}
3834							else
3835							{
3836	0					0	die 'adding new forms is not implemented';
3837							}
3838							}
3839
3840	12	50				39	if ($prepend)
3841							{
3842							# clear caches
3843	0					0	$self->{pagecache} = {};
3844	0					0	$self->{Names} = {};
3845	0					0	$self->{NameObjects} = {};
3846							}
3847
3848	12					3962	return $key;
3849							}
3850
3851							=item $doc->prependPDF($pdf)
3852
3853							Just like appendPDF() except the new document is inserted on page 1
3854							instead of at the end.
3855
3856							=cut
3857
3858							sub prependPDF
3859							{
3860	0			0	1	0	my ($self, @args) = @_;
3861	0					0	return $self->appendPDF(@args, 1);
3862							}
3863
3864							=item $doc->duplicatePage($pagenum)
3865
3866							=item $doc->duplicatePage($pagenum, $leaveblank)
3867
3868							Inserts an identical copy of the specified page into the document.
3869							The new page's number will be C<$pagenum + 1>.
3870
3871							If C<$leaveblank> is true, the new page does not get any content.
3872							Thus, the document is broken until you subsequently call
3873							setPageContent().
3874
3875							=cut
3876
3877							sub duplicatePage
3878							{
3879	4			4	1	10	my $self = shift;
3880	4					9	my $pagenum = shift;
3881	4		50			32	my $leave_blank = shift \|\| 0;
3882
3883	4					21	my $page = $self->getPage($pagenum);
3884	4					21	my $objnum = $self->getPageObjnum($pagenum);
3885	4					25	my $newobjnum = $self->appendObject($self, $objnum, 0);
3886	4					23	my $newdict = $self->getObjValue($newobjnum);
3887	4					28	delete $newdict->{Contents};
3888	4					17	my $parent = $self->getValue($page->{Parent});
3889	4					12	push @{$self->getValue($parent->{Kids})}, CAM::PDF::Node->new('reference', $newobjnum);
	4					15
3890
3891	4					21	while ($parent)
3892							{
3893	16					810	$self->{changes}->{$parent->{Count}->{objnum}} = 1;
3894	16	50				56	if ($parent->{Count}->{type} eq 'reference')
3895							{
3896	0					0	my $countobj = $self->dereference($parent->{Count}->{value});
3897	0					0	$countobj->{value}->{value}++;
3898	0					0	$self->{changes}->{$countobj->{objnum}} = 1;
3899							}
3900							else
3901							{
3902	16					38	$parent->{Count}->{value}++;
3903							}
3904	16					57	$parent = $self->getValue($parent->{Parent});
3905							}
3906	4					10	$self->{PageCount}++;
3907
3908	4	50				18	if (!$leave_blank)
3909							{
3910	4					28	$self->setPageContent($pagenum+1, $self->getPageContent($pagenum));
3911							}
3912
3913							# Caches are now bad for all pages from this one
3914	4					30	$self->decachePages($pagenum + 1 .. $self->numPages());
3915
3916	4					34	return $self;
3917							}
3918
3919							=item $doc->createStreamObject($content)
3920
3921							=item $doc->createStreamObject($content, $filter ...)
3922
3923							I
3924
3925							Create a new Stream object. This object is NOT added to the document.
3926							Use the appendObject() function to do that after calling this
3927							function.
3928
3929							=cut
3930
3931							sub createStreamObject
3932							{
3933	4			4	1	10	my $self = shift;
3934	4					9	my $content = shift;
3935
3936	4					23	my $dict = CAM::PDF::Node->new('dictionary',
3937							{
3938							Length => CAM::PDF::Node->new('number', length $content),
3939							StreamData => CAM::PDF::Node->new('stream', $content),
3940							},
3941							);
3942
3943	4					20	my $objnode = CAM::PDF::Node->new('object', $dict);
3944
3945	4					21	while (my $filter = shift)
3946							{
3947							#warn "$filter encoding\n";
3948	4					20	$self->encodeOne($objnode->{value}, $filter);
3949							}
3950
3951	4					11	return $objnode;
3952							}
3953
3954							=item $doc->uninlineImages()
3955
3956							=item $doc->uninlineImages($pagenum)
3957
3958							Search the content of the specified page (or all pages if the
3959							page number is omitted) for embedded images. If there are any, replace
3960							them with indirect objects. This procedure uses heuristics to detect
3961							in-line images, and is subject to confusion in extremely rare cases of text
3962							that uses C and C a lot.
3963
3964							=cut
3965
3966							sub uninlineImages
3967							{
3968	0			0	1	0	my $self = shift;
3969	0					0	my $pagenum = shift;
3970
3971	0					0	my $changes = 0;
3972	0	0				0	if (!$pagenum)
3973							{
3974	0					0	my $pages = $self->numPages();
3975	0					0	for my $p (1 .. $pages)
3976							{
3977	0					0	$changes += $self->uninlineImages($p);
3978							}
3979							}
3980							else
3981							{
3982	0					0	my $c = $self->getPageContent($pagenum);
3983	0					0	my $pos = 0;
3984	0					0	while (($pos = index $c, 'BI', $pos) != 1)
3985							{
3986							# manual \bBI check
3987							# if beginning of string or token
3988	0	0	0			0	if ($pos == 0 \|\| (substr $c, $pos-1, 1) =~ m/ \W /xms)
3989							{
3990	0					0	my $part = substr $c, $pos;
3991	0	0				0	if ($part =~ m/ \A BI\b(.*?)\bID\b /xms)
3992							{
3993	0					0	my $im = $1;
3994
3995							## Long series of tests to make sure this is really an
3996							## image and not just coincidental text
3997
3998							# Fix easy cases of "BI text) BI ... ID"
3999	0					0	$im =~ s/ \A .*\bBI\b //xms;
4000							# There should never be an EI inside of a BI ... ID
4001	0	0				0	next if ($im =~ m/ \bEI\b /xms);
4002
4003							# Easy tests: is this the beginning or end of a string?
4004							# (these aren't really good tests...)
4005	0	0				0	next if ($im =~ m/ \A [)] /xms);
4006	0	0				0	next if ($im =~ m/ [(] \z /xms);
4007
4008							# this is the most complex heuristic:
4009							# make sure that there is an open paren before every close
4010							# if not, then the "BI" or the "ID" was part of a string
4011	0					0	my $test = $im; # make a copy we can scribble on
4012	0					0	my $failed = 0;
4013							# get rid of escaped parens for the test
4014	0					0	$test =~ s/ \\[()] //gxms;
4015							# Look for closing parens
4016	0					0	while ($test =~ s/ \A(.*?)[)] //xms)
4017							{
4018							# If there is NOT an opening paren before the
4019							# closing paren we detected above, then the start of
4020							# our string is INSIDE a paren pair, thus a failure.
4021	0					0	my $bit = $1;
4022	0	0				0	if ($bit !~ m/ [(] /xms)
4023							{
4024	0					0	$failed = 1;
4025	0					0	last;
4026							}
4027							}
4028	0	0				0	next if ($failed);
4029
4030							# End of heuristics. This is likely a real embedded image.
4031							# Now do the replacement.
4032
4033	0					0	my $oldlen = length $part;
4034	0					0	my $image = $self->parseInlineImage(\$part, undef);
4035	0					0	my $newlen = length $part;
4036	0					0	my $imagelen = $oldlen - $newlen;
4037
4038							# Construct a new image name like "I3". Start with
4039							# "I1" and continue until we get an unused "I"
4040							# (first, get the list of already-used labels)
4041	0					0	$self->_buildNameTable($pagenum);
4042	0					0	my $i = 1;
4043	0					0	my $name = 'Im1';
4044	0					0	while (exists $self->{Names}->{$pagenum}->{$name})
4045							{
4046	0					0	$name = 'Im' . ++$i;
4047							}
4048
4049	0					0	$self->setName($image, $name);
4050	0					0	my $key = $self->appendObject(undef, $image, 0);
4051	0					0	$self->pageAddName($pagenum, $name, $key);
4052
4053	0					0	$c = (substr $c, 0, $pos) . "/$name Do" . (substr $c, $pos+$imagelen);
4054	0					0	$changes++;
4055							}
4056							}
4057							}
4058	0	0				0	if ($changes > 0)
4059							{
4060	0					0	$self->setPageContent($pagenum, $c);
4061							}
4062							}
4063	0					0	return $changes;
4064							}
4065
4066							=item $doc->appendObject($doc, $objectnum, $recurse?)
4067
4068							=item $doc->appendObject($undef, $object, $recurse?)
4069
4070							Duplicate an object from another PDF document and add it to this
4071							document, optionally descending into the object and copying any other
4072							objects it references.
4073
4074							Like replaceObject(), the second form allows you to append a
4075							newly-created block to the PDF.
4076
4077							=cut
4078
4079							sub appendObject
4080							{
4081	309			309	1	466	my $self = shift;
4082	309					388	my $otherdoc = shift;
4083	309					504	my $otherkey = shift;
4084	309					520	my $follow = shift;
4085
4086	309					678	my $objnum = ++$self->{maxobj};
4087
4088							# Make sure our new object has a number higher than anything in
4089							# either document, otherwise the changeRefKeys might change
4090							# something twice! We had a problem of 15 -> 134 -> 333 in 1.52
4091							# (private email with Charlie Katz)
4092	309	100	100			1758	if ($otherdoc && $otherdoc->{maxobj} >= $objnum) {
4093	4					13	$objnum = $self->{maxobj} = $otherdoc->{maxobj} + 1;
4094							}
4095
4096	309	50				989	if (exists $self->{versions}->{$objnum}) {
4097	0					0	$self->{versions}->{$objnum}++;
4098							} else {
4099	309					1065	$self->{versions}->{$objnum} = 0;
4100							}
4101
4102	309					1110	my %refkeys = $self->replaceObject($objnum, $otherdoc, $otherkey, $follow);
4103	309	100				866	if (wantarray)
4104							{
4105	12					291	return ($objnum, %refkeys);
4106							}
4107							else
4108							{
4109	297					1142	return $objnum;
4110							}
4111							}
4112
4113							=item $doc->replaceObject($objectnum, $doc, $objectnum, $recurse?)
4114
4115							=item $doc->replaceObject($objectnum, $undef, $object)
4116
4117							Duplicate an object from another PDF document and insert it into this
4118							document, replacing an existing object. Optionally descend into the
4119							original object and copy any other objects it references.
4120
4121							If the other document is undefined, then the object to copy is taken
4122							to be an anonymous object that is not part of any other document.
4123							This is useful when you've just created that anonymous object.
4124
4125							=cut
4126
4127							sub replaceObject
4128							{
4129	317			317	1	444	my $self = shift;
4130	317					480	my $key = shift;
4131	317					431	my $otherdoc = shift;
4132	317					450	my $otherkey = shift;
4133	317					368	my $follow = shift;
4134
4135							# careful! 'undef' means something different from '0' here!
4136	317	50				729	if (!defined $follow)
4137							{
4138	0					0	$follow = 1;
4139							}
4140
4141	317					398	my $objnode;
4142							my $otherobj;
4143	317	100				645	if ($otherdoc)
4144							{
4145	289					749	$otherobj = $otherdoc->dereference($otherkey);
4146	289					857	$objnode = $self->copyObject($otherobj);
4147							}
4148							else
4149							{
4150	28					50	$objnode = $otherkey;
4151	28	50				117	if ($follow)
4152							{
4153	0					0	warn "Error: you cannot \"follow\" an object if it has no document.\n" .
4154							"Resetting follow = false and continuing....\n";
4155	0					0	$follow = 0;
4156							}
4157							}
4158
4159	317					2347	$self->setObjNum($objnode, $key, 0);
4160
4161							# Preserve the name of the object
4162	317	50				1310	if ($self->{xref}->{$key}) # make sure it isn't a brand new object
4163							{
4164	0					0	my $oldname = $self->getName($self->dereference($key));
4165	0	0				0	if ($oldname)
4166							{
4167	0					0	$self->setName($objnode, $oldname);
4168							}
4169							else
4170							{
4171	0					0	$self->removeName($objnode);
4172							}
4173							}
4174
4175	317					1260	$self->{objcache}->{$key} = $objnode;
4176	317					1015	$self->{changes}->{$key} = 1;
4177
4178	317					1047	my %newrefkeys = ($otherkey, $key);
4179	317	100				865	if ($follow)
4180							{
4181	12					58	for my $oldrefkey ($otherdoc->getRefList($otherobj))
4182							{
4183	285	100				1016	next if ($oldrefkey == $otherkey);
4184	273					812	my $newkey = $self->appendObject($otherdoc, $oldrefkey, 0);
4185	273					924	$newrefkeys{$oldrefkey} = $newkey;
4186							}
4187	12					264	my $already_changed = {}; # hash used by traverse() to avoid repeats
4188	12					71	$self->changeRefKeys($objnode, \%newrefkeys, $already_changed);
4189	12					72	for my $newkey (values %newrefkeys)
4190							{
4191	285					748	$self->changeRefKeys($self->dereference($newkey), \%newrefkeys, $already_changed);
4192							}
4193							}
4194	317					2071	return (%newrefkeys);
4195							}
4196
4197							=item $doc->deleteObject($objectnum)
4198
4199							Remove an object from the document. This function does NOT take care
4200							of dependencies on this object.
4201
4202							=cut
4203
4204							sub deleteObject
4205							{
4206	554			554	1	624	my $self = shift;
4207	554					584	my $objnum = shift;
4208
4209							# DON'T clear versuion number! We need to keep this to increment later
4210							#delete $self->{versions}->{$objnum};
4211	554					9516	delete $self->{objcache}->{$objnum};
4212	554					972	delete $self->{xref}->{$objnum};
4213	554					730	delete $self->{endxref}->{$objnum};
4214	554					684	delete $self->{changes}->{$objnum};
4215	554					983	return;
4216							}
4217
4218							=item $doc->cleanse()
4219
4220							Remove unused objects. I this function breaks some PDF
4221							documents because it removes objects that are strictly part of the
4222							page model hierarchy, but which are required anyway (like some font
4223							definition objects).
4224
4225							=cut
4226
4227							sub cleanse
4228							{
4229	8			8	1	23	my $self = shift;
4230
4231	8					26	delete $self->{trailer}->{XRefStm}; # can't write this
4232	8					24	delete $self->getRootDict()->{PieceInfo}; # can't handle this one, too complicated
4233
4234	8					53	my $base = CAM::PDF::Node->new('dictionary', $self->{trailer});
4235	8					112	my @list = sort {$a<=>$b} $self->getRefList($base);
	599					733
4236							#print join(',', @list), "\n";
4237
4238	8					134	for my $i (1 .. $self->{maxobj})
4239							{
4240	856	100	66			3195	if (@list && $list[0] == $i)
4241							{
4242	335					495	shift @list;
4243							}
4244							else
4245							{
4246							#warn "delete object $i\n";
4247	521					978	$self->deleteObject($i);
4248							}
4249							}
4250	8					38	return;
4251							}
4252
4253							=item $doc->createID()
4254
4255							I
4256
4257							Generate a new document ID. Contrary the Adobe recommendation, this
4258							is a random number.
4259
4260							=cut
4261
4262							sub createID
4263							{
4264	28			28	1	67	my $self = shift;
4265
4266							# Warning: this is non-repeatable, and depends on Linux!
4267
4268	28					47	my $addbytes;
4269	28	100				105	if ($self->{ID})
4270							{
4271							# do not change the first half of an existing ID
4272	27					115	$self->{ID} = substr $self->{ID}, 0, 16;
4273	27					59	$addbytes = 16;
4274							}
4275							else
4276							{
4277	1					5	$self->{ID} = q{};
4278	1					2	$addbytes = 32;
4279							}
4280
4281							# Append $addbytes random bytes
4282							# First try the system random number generator
4283	28	50				1007	if (-f '/dev/urandom')
4284							{
4285	0	0				0	if (open my $fh, '<', '/dev/urandom')
4286							{
4287	0					0	my $bytes_read = read $fh, $self->{ID}, $addbytes, 32-$addbytes;
4288	0					0	close $fh; ##no critic(Syscalls)
4289	0					0	$addbytes -= $bytes_read;
4290							}
4291							}
4292							# If that failed, use Perl's random number generator
4293	28					99	for (1..$addbytes)
4294							{
4295	464					2621	$self->{ID} .= pack 'C', int rand 256;
4296							}
4297
4298	28	50				135	if ($self->{trailer})
4299							{
4300	28					410	$self->{trailer}->{ID} = CAM::PDF::Node->new('array',
4301							[
4302							CAM::PDF::Node->new('hexstring', substr $self->{ID}, 0, 16),
4303							CAM::PDF::Node->new('hexstring', substr $self->{ID}, 16, 16),
4304							],
4305							);
4306							}
4307
4308	28					513	return 1;
4309							}
4310
4311							=item $doc->fillFormFields($name => $value, ...)
4312
4313							=item $doc->fillFormFields($opts_hash, $name => $value, ...)
4314
4315							Set the default values of PDF form fields. The name should be the
4316							full hierarchical name of the field as output by the
4317							getFormFieldList() function. The argument list can be a hash if you
4318							like. A simple way to use this function is something like this:
4319
4320							my %fields = (fname => 'John', lname => 'Smith', state => 'WI');
4321							$field{zip} = 53703;
4322							$self->fillFormFields(%fields);
4323
4324							If the first argument is a hash reference, it is interpreted as
4325							options for how to render the filled data:
4326
4327							=over
4328
4329							=item background_color =E 'none' \| $gray \| [$r, $g, $b]
4330
4331							Specify the background color for the text field.
4332
4333							=item max_autoscale_fontsize =E $size
4334
4335							=item min_autoscale_fontsize =E $size
4336
4337							If the form field is set to auto-size the text to fit, then you may
4338							use these options to constrain the limits of that
4339							autoscaling. Otherwise, for example, a very long string will become
4340							arbitrarily small to fit in the box.
4341
4342							=back
4343
4344							=cut
4345
4346							sub fillFormFields ## no critic(Subroutines::ProhibitExcessComplexity, Unpack)
4347							{
4348	0			0	1	0	my $self = shift;
4349	0	0				0	my $opts = ref $_[0] ? shift : {};
4350	0					0	my @list = (@_);
4351
4352	0					0	my %opts = (
4353							background_color => 1,
4354	0					0	%{$opts},
4355							);
4356
4357	0					0	my $filled = 0;
4358	0					0	while (@list > 0)
4359							{
4360	0					0	my $key = shift @list;
4361	0					0	my $value = shift @list;
4362	0	0				0	if (!defined $value)
4363							{
4364	0					0	$value = q{};
4365							}
4366
4367	0	0				0	next if (!$key);
4368	0	0				0	next if (ref $key);
4369	0					0	my $objnode = $self->getFormField($key);
4370	0	0				0	next if (!$objnode);
4371
4372	0					0	my $objnum = $objnode->{objnum};
4373	0					0	my $gennum = $objnode->{gennum};
4374
4375							# This read-only dict includes inherited properties
4376	0					0	my $propdict = $self->getFormFieldDict($objnode);
4377
4378							# This read-write dict does not include inherited properties
4379	0					0	my $dict = $self->getValue($objnode);
4380	0					0	$dict->{V} = CAM::PDF::Node->new('string', $value, $objnum, $gennum);
4381							#$dict->{DV} = CAM::PDF::Node->new('string', $value, $objnum, $gennum);
4382
4383	0	0	0			0	if ($propdict->{FT} && $self->getValue($propdict->{FT}) eq 'Tx') # Is it a text field?
4384							{
4385							# Set up display of form value
4386	0	0				0	if (!$dict->{AP})
4387							{
4388	0					0	$dict->{AP} = CAM::PDF::Node->new('dictionary', {}, $objnum, $gennum);
4389							}
4390	0	0				0	if (!$dict->{AP}->{value}->{N})
4391							{
4392	0					0	my $newobj = CAM::PDF::Node->new('object',
4393							CAM::PDF::Node->new('dictionary',{}),
4394							);
4395	0					0	my $num = $self->appendObject(undef, $newobj, 0);
4396	0					0	$dict->{AP}->{value}->{N} = CAM::PDF::Node->new('reference', $num, $objnum, $gennum);
4397							}
4398	0					0	my $formobj = $self->dereference($dict->{AP}->{value}->{N}->{value});
4399	0					0	my $formonum = $formobj->{objnum};
4400	0					0	my $formgnum = $formobj->{gennum};
4401	0					0	my $formdict = $self->getValue($formobj);
4402	0	0				0	if (!$formdict->{Subtype})
4403							{
4404	0					0	$formdict->{Subtype} = CAM::PDF::Node->new('label', 'Form', $formonum, $formgnum);
4405							}
4406	0					0	my @rect = (0,0,0,0);
4407	0	0				0	if ($dict->{Rect})
4408							{
4409							## no critic(Bangs::ProhibitNumberedNames)
4410	0					0	my $r = $self->getValue($dict->{Rect});
4411	0					0	my ($x1, $y1, $x2, $y2) = @{$r};
	0					0
4412	0					0	@rect = (
4413							$self->getValue($x1),
4414							$self->getValue($y1),
4415							$self->getValue($x2),
4416							$self->getValue($y2),
4417							);
4418							}
4419	0					0	my $dx = $rect[2]-$rect[0];
4420	0					0	my $dy = $rect[3]-$rect[1];
4421	0	0				0	if (!$formdict->{BBox})
4422							{
4423	0					0	$formdict->{BBox} = CAM::PDF::Node->new('array',
4424							[
4425							CAM::PDF::Node->new('number', 0, $formonum, $formgnum),
4426							CAM::PDF::Node->new('number', 0, $formonum, $formgnum),
4427							CAM::PDF::Node->new('number', $dx, $formonum, $formgnum),
4428							CAM::PDF::Node->new('number', $dy, $formonum, $formgnum),
4429							],
4430							$formonum,
4431							$formgnum);
4432							}
4433	0					0	my $text = $value;
4434	0					0	$text =~ s/ \r\n? /\n/gxms;
4435	0					0	$text =~ s/ \n+\z //xms;
4436
4437	0					0	my @rsrcs;
4438	0					0	my $fontmetrics = 0;
4439	0					0	my $fontname = q{};
4440	0					0	my $fontsize = 0;
4441	0					0	my $da = q{};
4442	0					0	my $tl = q{};
4443	0					0	my $border = 2;
4444	0					0	my $tx = $border;
4445	0					0	my $ty = $border + 2;
4446	0					0	my $stringwidth;
4447	0	0				0	if ($propdict->{DA}) {
4448	0					0	$da = $self->getValue($propdict->{DA});
4449
4450							# Try to pull out all of the resources used in the text object
4451	0					0	@rsrcs = ($da =~ m{ /([^\s<>/\[\]()]+) }gxms);
4452
4453							# Try to pull out the font size, if any. If more than
4454							# one, pick the last one. Font commands look like:
4455							# "/ Tf"
4456	0	0				0	if ($da =~ m{ \s/(\w+)\s+(\d+)\s+Tf.? \z }xms)
4457							{
4458	0					0	$fontname = $1;
4459	0					0	$fontsize = $2;
4460	0	0				0	if ($fontname)
4461							{
4462	0	0				0	if ($propdict->{DR})
4463							{
4464	0					0	my $dr = $self->getValue($propdict->{DR});
4465	0					0	$fontmetrics = $self->getFontMetrics($dr, $fontname);
4466							}
4467							#print STDERR "Didn't get font\n" if (!$fontmetrics);
4468							}
4469							}
4470							}
4471
4472	0					0	my %flags = (
4473							Justify => 'left',
4474							);
4475	0	0				0	if ($propdict->{Ff})
4476							{
4477							# Just decode the ones we actually care about
4478							# PDF ref, 3rd ed pp 532,543
4479	0					0	my $ff = $self->getValue($propdict->{Ff});
4480	0					0	my @flags = split m//xms, unpack 'b*', pack 'V', $ff;
4481	0					0	$flags{ReadOnly} = $flags[0];
4482	0					0	$flags{Required} = $flags[1];
4483	0					0	$flags{NoExport} = $flags[2];
4484	0					0	$flags{Multiline} = $flags[12];
4485	0					0	$flags{Password} = $flags[13];
4486	0					0	$flags{FileSelect} = $flags[20];
4487	0					0	$flags{DoNotSpellCheck} = $flags[22];
4488	0					0	$flags{DoNotScroll} = $flags[23];
4489							}
4490	0	0				0	if ($propdict->{Q})
4491							{
4492	0		0			0	my $q = $self->getValue($propdict->{Q}) \|\| 0;
4493	0	0				0	$flags{Justify} = $q==2 ? 'right' : ($q==1 ? 'center' : 'left');
		0
4494							}
4495
4496							# The order of the following sections is important!
4497	0	0				0	if ($flags{Password})
4498							{
4499	0					0	$text =~ s/ [^\n] /*/gxms; # Asterisks for password characters
4500							}
4501
4502	0	0	0			0	if ($fontmetrics && ! $fontsize)
4503							{
4504							# Fix autoscale fonts
4505	0					0	$stringwidth = 0;
4506	0					0	my $lines = 0;
4507	0					0	for my $line (split /\n/xms, $text) # trailing null strings omitted
4508							{
4509	0					0	$lines++;
4510	0					0	my $w = $self->getStringWidth($fontmetrics, $line);
4511	0	0	0			0	if ($w && $w > $stringwidth)
4512							{
4513	0					0	$stringwidth = $w;
4514							}
4515							}
4516	0		0			0	$lines \|\|= 1;
4517							# Initial guess
4518	0					0	$fontsize = ($dy - 2 * $border) / ($lines * 1.5);
4519	0					0	my $fontwidth = $fontsize * $stringwidth;
4520	0					0	my $maxwidth = $dx - 2 * $border;
4521	0	0				0	if ($fontwidth > $maxwidth)
4522							{
4523	0					0	$fontsize *= $maxwidth / $fontwidth;
4524							}
4525
4526							# allow for user override
4527	0	0	0			0	if (exists $opts->{max_autoscale_fontsize} && $fontsize > $opts->{max_autoscale_fontsize}) {
4528	0					0	$fontsize = $opts->{max_autoscale_fontsize};
4529							}
4530	0	0	0			0	if (exists $opts->{min_autoscale_fontsize} && $fontsize < $opts->{min_autoscale_fontsize}) {
4531	0					0	$fontsize = $opts->{min_autoscale_fontsize};
4532							}
4533
4534	0					0	$da =~ s/ \/$fontname\s+0\s+Tf\b /\/$fontname $fontsize Tf/gxms;
4535							}
4536	0	0				0	if ($fontsize)
4537							{
4538							# This formula is TOTALLY empirical. It's probably wrong.
4539	0					0	$ty = $border + 2 + (9 - $fontsize) * 0.4;
4540							}
4541
4542
4543							# escape characters
4544	0					0	$text = $self->writeString($text);
4545
4546	0	0				0	if ($flags{Multiline})
4547							{
4548							# TODO: wrap the field with wrapString()??
4549							# Shawn Dawson of Silent Solutions pointed out that this does not auto-wrap the input text
4550
4551	0					0	my $linebreaks = $text =~ s/ \\n /\) Tj T* \(/gxms;
4552
4553							# Total guess work:
4554							# line height is either 150% of fontsize or thrice
4555							# the corner offset
4556	0	0				0	$tl = $fontsize ? $fontsize * 1.5 : $ty * 3;
4557
4558							# Bottom aligned
4559							#$ty += $linebreaks * $tl;
4560							# Top aligned
4561	0					0	$ty = $dy - $border - $tl;
4562
4563	0	0				0	if ($flags{Justify} ne 'left')
4564							{
4565	0					0	warn 'Justified text not supported for multiline fields';
4566							}
4567
4568	0					0	$tl .= ' TL';
4569							}
4570							else
4571							{
4572	0	0	0			0	if ($flags{Justify} ne 'left' && $fontmetrics)
4573							{
4574	0		0			0	my $width = $stringwidth \|\| $self->getStringWidth($fontmetrics, $text);
4575	0					0	my $diff = $dx - $width*$fontsize;
4576
4577	0	0				0	if ($flags{Justify} eq 'center')
		0
4578							{
4579	0					0	$text = ($diff/2)." 0 Td $text";
4580							}
4581							elsif ($flags{Justify} eq 'right')
4582							{
4583	0					0	$text = "$diff 0 Td $text";
4584							}
4585							}
4586							}
4587
4588							# Move text from lower left corner of form field
4589	0					0	my $tm = "1 0 0 1 $tx $ty Tm ";
4590
4591							# if not 'none', draw a background as a filled rectangle of solid color
4592	0	0				0	my $background_color
		0
4593							= $opts{background_color} eq 'none' ? q{}
4594	0					0	: ref $opts{background_color} ? "@{$opts{background_color}} rgb"
4595							: "$opts{background_color} g";
4596	0	0				0	my $background = $background_color ? "$background_color 0 0 $dx $dy re f" : q{};
4597
4598	0					0	$text = "$tl $da $tm $text Tj";
4599	0					0	$text = "$background /Tx BMC q 1 1 ".($dx-$border).q{ }.($dy-$border)." re W n BT $text ET Q EMC";
4600	0					0	my $len = length $text;
4601	0					0	$formdict->{Length} = CAM::PDF::Node->new('number', $len, $formonum, $formgnum);
4602	0					0	$formdict->{StreamData} = CAM::PDF::Node->new('stream', $text, $formonum, $formgnum);
4603
4604	0	0				0	if (@rsrcs > 0) {
4605	0	0				0	if (!$formdict->{Resources})
4606							{
4607	0					0	$formdict->{Resources} = CAM::PDF::Node->new('dictionary', {}, $formonum, $formgnum);
4608							}
4609	0					0	my $rdict = $self->getValue($formdict->{Resources});
4610	0	0				0	if (!$rdict->{ProcSet})
4611							{
4612	0					0	$rdict->{ProcSet} = CAM::PDF::Node->new('array',
4613							[
4614							CAM::PDF::Node->new('label', 'PDF', $formonum, $formgnum),
4615							CAM::PDF::Node->new('label', 'Text', $formonum, $formgnum),
4616							],
4617							$formonum,
4618							$formgnum);
4619							}
4620	0	0				0	if (!$rdict->{Font})
4621							{
4622	0					0	$rdict->{Font} = CAM::PDF::Node->new('dictionary', {}, $formonum, $formgnum);
4623							}
4624	0					0	my $fdict = $self->getValue($rdict->{Font});
4625
4626							# Search out font resources. This is a total kluge.
4627							# TODO: the right way to do this is to look for the DR
4628							# attribute in the form element or it's ancestors.
4629	0					0	for my $font (@rsrcs)
4630							{
4631	0					0	my $fobj = $self->dereference("/$font", 'All');
4632	0	0				0	if (!$fobj)
4633							{
4634	0					0	die "Could not find resource /$font while preparing form field $key\n";
4635							}
4636	0					0	$fdict->{$font} = CAM::PDF::Node->new('reference', $fobj->{objnum}, $formonum, $formgnum);
4637							}
4638							}
4639							}
4640	0					0	$filled++;
4641							}
4642	0					0	return $filled;
4643							}
4644
4645
4646							=item $doc->clearFormFieldTriggers($name, $name, ...)
4647
4648							Disable any triggers set on data entry for the specified form field
4649							names. This is useful in the case where, for example, the data entry
4650							Javascript forbids punctuation and you want to prefill with a
4651							hyphenated word. If you don't clear the trigger, the prefill may not
4652							happen.
4653
4654							=cut
4655
4656							sub clearFormFieldTriggers
4657							{
4658	0			0	1	0	my ($self, @fieldnames) = @_;
4659
4660	0					0	for my $fieldname (@fieldnames)
4661							{
4662	0					0	my $objnode = $self->getFormField($fieldname);
4663	0	0				0	if ($objnode)
4664							{
4665	0	0				0	if (exists $objnode->{value}->{value}->{AA})
4666							{
4667	0					0	delete $objnode->{value}->{value}->{AA};
4668	0					0	my $objnum = $objnode->{objnum};
4669	0	0				0	if ($objnum)
4670							{
4671	0					0	$self->{changes}->{$objnum} = 1;
4672							}
4673							}
4674							}
4675							}
4676	0					0	return;
4677							}
4678
4679							=item $doc->clearAnnotations()
4680
4681							Remove all annotations from the document. If form fields are
4682							encountered, their text is added to the appropriate page.
4683
4684							=cut
4685
4686							sub clearAnnotations
4687							{
4688	0			0	1	0	my $self = shift;
4689
4690	0					0	my $formrsrcs;
4691	0					0	my $root = $self->getRootDict();
4692	0	0				0	if ($root->{AcroForm})
4693							{
4694	0					0	my $acroform = $self->getValue($root->{AcroForm});
4695							# Get the form resources
4696	0	0				0	if ($acroform->{DR})
4697							{
4698	0					0	$formrsrcs = $self->getValue($acroform->{DR});
4699							}
4700
4701							# Kill off the forms
4702	0					0	$self->deleteObject($root->{AcroForm}->{value});
4703	0					0	delete $root->{AcroForm};
4704							}
4705
4706							# Iterate through the pages, deleting annotations
4707
4708	0					0	my $pages = $self->numPages();
4709	0					0	for my $p (1..$pages)
4710							{
4711	0					0	my $page = $self->getPage($p);
4712	0	0				0	if ($page->{Annots}) {
4713	0					0	$self->addPageResources($p, $formrsrcs);
4714	0					0	my $annotsarray = $self->getValue($page->{Annots});
4715	0					0	delete $page->{Annots};
4716	0					0	for my $annotref (@{$annotsarray})
	0					0
4717							{
4718	0					0	my $annot = $self->getValue($annotref);
4719	0	0				0	if ((ref $annot) ne 'HASH')
4720							{
4721	0					0	die 'Internal error: annotation is not a dictionary';
4722							}
4723							# Copy all text field values into the page, if present
4724	0	0	0			0	if ($annot->{Subtype} &&
			0
			0
			0
4725							$annot->{Subtype}->{value} eq 'Widget' &&
4726							$annot->{FT} &&
4727							$annot->{FT}->{value} eq 'Tx' &&
4728							$annot->{AP})
4729							{
4730	0					0	my $ap = $self->getValue($annot->{AP});
4731	0					0	my $rect = $self->getValue($annot->{Rect});
4732	0					0	my $x = $self->getValue($rect->[0]);
4733	0					0	my $y = $self->getValue($rect->[1]);
4734	0	0				0	if ($ap->{N})
4735							{
4736	0					0	my $n = $self->dereference($ap->{N}->{value})->{value};
4737	0					0	my $content = $self->decodeOne($n, 0);
4738	0	0				0	if (!$content)
4739							{
4740	0					0	die 'Internal error: expected a content stream from the form copy';
4741							}
4742	0					0	$content =~ s/ \bre(\s+)f\b /re$1n/gxms;
4743	0					0	$content = "q 1 0 0 1 $x $y cm\n$content Q\n";
4744	0					0	$self->appendPageContent($p, $content);
4745	0					0	$self->addPageResources($p, $self->getValue($n->{value}->{Resources}));
4746							}
4747							}
4748	0					0	$self->deleteObject($annotref->{value});
4749							}
4750							}
4751							}
4752
4753							# kill off the annotation dependencies
4754	0					0	$self->cleanse();
4755	0					0	return;
4756							}
4757
4758							=item $doc->previousRevision()
4759
4760							If this PDF was previously saved in append mode (that is, if
4761							C was not invoked on it), return a new instance representing
4762							that previous version. Otherwise return void. If this is an
4763							encrypted PDF, this method assumes that previous revisions were
4764							encrypted with the same password, which may be an incorrect
4765							assumption.
4766
4767							=cut
4768
4769							sub previousRevision {
4770	0			0	1	0	my $self = shift;
4771
4772	0					0	my $content = \$self->{content};
4773	0	0				0	return if !${$content}; # already wiped...
	0					0
4774
4775							# Figure out line end character
4776	0					0	my ($lineend) = ${$content} =~ m/ (.)%%EOF.*?\z /xms;
	0					0
4777	0	0				0	return if !$lineend; # Corrupt PDF: Cannot find the end-of-file marker
4778
4779	0					0	my $eof = $lineend.'%%EOF';
4780	0					0	my $i = rindex ${$content}, $eof;
	0					0
4781	0					0	my $j = rindex ${$content}, $eof, $i-1;
	0					0
4782	0	0				0	return if $j < 0; # just one revision
4783
4784	0					0	my $prev_content = (substr ${$content}, 0, $j) . $eof . $lineend;
	0					0
4785							# assume the passwords were the same in the previous rev
4786	0					0	my ($opass, $upass, @perms) = $self->getPrefs;
4787
4788	0					0	return __PACKAGE__->new($prev_content, $opass, $upass);
4789							}
4790
4791							=item $doc->allRevisions()
4792
4793							Accumulate CAM::PDF instances returned by C until
4794							there are no more previous revisions. Returns a list of instances
4795							from newest to oldest including this instance as the newest.
4796
4797							=cut
4798
4799							sub allRevisions {
4800	0			0	1	0	my ($self) = @_;
4801	0					0	my @revs;
4802	0					0	for (my $pdf = $self; $pdf; $pdf = $pdf->previous_revision) { ## no critic(ProhibitCStyleForLoops)
4803	0					0	push @revs, $pdf;
4804							}
4805	0					0	return @revs;
4806							}
4807
4808							################################################################################
4809
4810							=back
4811
4812							=head2 Document Writing
4813
4814							=over
4815
4816							=item $doc->preserveOrder()
4817
4818							Try to recreate the original document as much as possible. This may
4819							help in recreating documents which use undocumented tricks of saving
4820							font information in adjacent objects.
4821
4822							=cut
4823
4824							sub preserveOrder
4825							{
4826							# Call this to record the order of the objects in the original file
4827							# If called, then any new file will try to preserve the original order
4828	0			0	1	0	my $self = shift;
4829
4830	0					0	my $x = $self->{xref}; # shorthand
4831	0					0	$self->{order} = [sort {$x->{$a} <=> $x->{$b}} grep {!ref $x->{$_}} keys %{$x}];
	0					0
	0					0
	0					0
4832	0					0	return;
4833							}
4834
4835							=item $doc->isLinearized()
4836
4837							Returns a boolean indicating whether this PDF is linearized (aka
4838							"optimized").
4839
4840							=cut
4841
4842							sub isLinearized
4843							{
4844	8			8	1	1845	my $self = shift;
4845
4846	8					15	my $first;
4847	8	50				37	if (exists $self->{order})
4848							{
4849	0					0	$first = $self->{order}->[0];
4850							}
4851							else
4852							{
4853	8					20	my $x = $self->{xref}; # shorthand
4854	8					16	($first) = sort {$x->{$a} <=> $x->{$b}} grep {!ref $x->{$_}} keys %{$x};
	2797					3682
	558					1112
	8					159
4855							}
4856
4857	8					76	my $linearized; # false
4858	8					38	my $objnode = $self->dereference($first);
4859	8	100	66			78	if ($objnode && $objnode->{value}->{type} eq 'dictionary')
4860							{
4861	6	100				28	if (exists $objnode->{value}->{value}->{Linearized})
4862							{
4863	1					3	$linearized = $self; # true
4864							}
4865							}
4866	8					35	return $linearized;
4867							}
4868
4869							=item $doc->delinearize()
4870
4871							I
4872
4873							Undo the tweaks used to make the document 'optimized'. This function
4874							is automatically called on every save or output since this library
4875							does not yet support linearized documents.
4876
4877							=cut
4878
4879							sub delinearize
4880							{
4881	44			44	1	83	my $self = shift;
4882
4883	44	100				206	return if ($self->{delinearized});
4884
4885							# Turn off Linearization, if set
4886	4					8	my $first;
4887	4	50				18	if (exists $self->{order})
4888							{
4889	0					0	$first = $self->{order}->[0];
4890							}
4891							else
4892							{
4893	4					9	my $x = $self->{xref}; # shorthand
4894	4					8	($first) = sort {$x->{$a} <=> $x->{$b}} grep {!ref $x->{$_}} keys %{$x};
	383					534
	131					339
	4					47
4895							}
4896
4897	4					32	my $objnode = $self->dereference($first);
4898	4	50				25	if ($objnode->{value}->{type} eq 'dictionary')
4899							{
4900	4	100				21	if (exists $objnode->{value}->{value}->{Linearized})
4901							{
4902	1					8	$self->deleteObject($first);
4903							}
4904							}
4905
4906	4					11	$self->{delinearized} = 1;
4907	4					37	return;
4908							}
4909
4910							=item $doc->clean()
4911
4912							Cache all parts of the document and throw away it's old structure.
4913							This is useful for writing PDFs anew, instead of simply appending
4914							changes to the existing documents. This is called by cleansave() and
4915							cleanoutput().
4916
4917							=cut
4918
4919							sub clean
4920							{
4921	28			28	1	73	my $self = shift;
4922
4923							# Make sure to extract everything before we wipe the old version
4924	28					140	$self->cacheObjects();
4925
4926	28					115	$self->delinearize();
4927
4928							# Update the ID number to make this document distinct from the original.
4929							# If there is already an ID, only the second half is changed
4930	28					135	$self->createID();
4931
4932							# Mark everything changed
4933	28					1312	%{$self->{changes}} = (
	28					534
4934	591					889	%{$self->{changes}},
4935	28					63	map { $_ => 1 } keys %{$self->{xref}},
	28					506
4936							);
4937
4938							# Mark everything new
4939	28					2307	%{$self->{versions}} = (
	28					1328
4940	591					777	%{$self->{versions}},
4941	28					602	map { $_ => 0 } keys %{$self->{xref}},
	28					5358
4942							);
4943
4944	28					703	$self->{xref} = {};
4945	28					282	delete $self->{endxref};
4946	28					95	$self->{startxref} = 0;
4947	28					72	$self->{content} = q{};
4948	28					57	$self->{contentlength} = 0;
4949
4950	28					56	my $trailer = $self->{trailer};
4951	28					61	delete $trailer->{Prev};
4952	28					67	delete $trailer->{XRefStm};
4953	28	100	66			152	if (exists $trailer->{Type} && $trailer->{Type}->{value} eq 'XRef') {
4954	1					6	delete $trailer->{Type};
4955	1					6	delete $trailer->{Size};
4956	1					12	delete $trailer->{Index};
4957	1					14	delete $trailer->{W};
4958	1					6	delete $trailer->{Length};
4959	1					2	delete $trailer->{L};
4960	1					6	delete $trailer->{StreamData};
4961	1					6	delete $trailer->{Filter};
4962	1					2	delete $trailer->{F};
4963	1					12	delete $trailer->{DecodeParms};
4964	1					2	delete $trailer->{DP};
4965							}
4966	28					76	return;
4967							}
4968
4969							=item $doc->needsSave()
4970
4971							Returns a boolean indicating whether the save() method needs to be
4972							called. Like save(), this has nothing to do with whether the document
4973							has been saved to disk, but whether the in-memory representation of
4974							the document has been serialized.
4975
4976							=cut
4977
4978							sub needsSave
4979							{
4980	24			24	1	60	my $self = shift;
4981
4982	24					45	return 0 != keys %{$self->{changes}};
	24					156
4983							}
4984
4985							=item $doc->save()
4986
4987							Serialize the document into a single string. All changed document
4988							elements are normalized, and a new index and an updated trailer are
4989							created.
4990
4991							This function operates solely in memory. It DOES NOT write the
4992							document to a file. See the output() function for that.
4993
4994							=cut
4995
4996							sub save
4997							{
4998	16			16	1	40	my $self = shift;
4999
5000	16	50				52	if (!$self->needsSave())
5001							{
5002	0					0	return $self;
5003							}
5004
5005	16					69	$self->delinearize();
5006
5007	16					37	delete $self->{endxref};
5008
5009	16	50				74	if (!$self->{content})
5010							{
5011	16					62	$self->{content} = '%PDF-' . $self->{pdfversion} . "\n%\217\n";
5012							}
5013
5014	16					31	my %allobjs = (%{$self->{changes}}, %{$self->{xref}});
	16					178
	16					469
5015	16					624	my @objects = sort {$a<=>$b} keys %allobjs;
	4464					4540
5016	16	50				114	if ($self->{order})
5017							{
5018
5019							# Sort in the order in $self->{order} array, with the rest later
5020							# in objnum order
5021	0					0	my %o;
5022	0					0	my $n = @{$self->{order}};
	0					0
5023	0					0	for my $i (0 .. $n-1)
5024							{
5025	0					0	$o{$self->{order}->[$i]} = $i;
5026							}
5027	0		0			0	@objects = map {$_->[1]} sort {$a->[0] <=> $b->[0]} map {[$o{$_} \|\| $_+$n, $_]} @objects;
	0					0
	0					0
	0					0
5028							}
5029	16					40	delete $self->{order};
5030
5031	16					36	my %newxref;
5032	16					219	my $offset = length $self->{content};
5033	16					49	for my $key (@objects)
5034							{
5035	900	50				3208	next if (!$self->{changes}->{$key});
5036	900					1673	$newxref{$key} = $offset;
5037
5038							#print "Writing object $key\n";
5039	900					2396	my $obj = $self->writeObject($key);
5040	900					6939	$self->{content} .= $obj;
5041	900					1267	$offset += length $obj;
5042
5043	900					2780	$self->{xref}->{$key} = $newxref{$key};
5044	900					2623	delete $self->{changes}->{$key};
5045							}
5046
5047	16	50				12016	if ($self->{content} !~ m/ [\r\n] \z /xms)
5048							{
5049	0					0	$self->{content} .= "\n";
5050							}
5051
5052	16					122	my $startxref = length $self->{content};
5053
5054							# Append the new xref
5055	16					6510	$self->{content} .= "xref\n";
5056	16					97	my %blocks = (
5057							0 => "0000000000 65535 f \n",
5058							);
5059	16					438	for my $key (keys %newxref)
5060							{
5061	900					3639	$blocks{$key} = sprintf "%010d %05d n \n", $newxref{$key}, $self->{versions}->{$key};
5062							}
5063
5064							# If there is only one version of the document, there must be no
5065							# holes in the xref. Test for versions by checking if there's already an xref.
5066							# If clean() has been called, it will be absent
5067	16	50				145	if (!$self->{startxref})
5068							{
5069							# Fill in holes
5070	16					31	my $prevfreeblock = 0;
5071	16					192	for my $key (reverse 0 .. $self->{maxobj}-1)
5072							{
5073	1728	100				4038	if (!exists $blocks{$key})
5074							{
5075							# Add an entry to the free list
5076							# On $key == 0, this blows away the above definition of
5077							# the head of the free block list, but that's no big deal.
5078	828	50				2769	$blocks{$key} = sprintf "%010d %05d f \n",
5079							$prevfreeblock, ($key == 0 ? 65_535 : 1);
5080	828					1158	$prevfreeblock = $key;
5081							}
5082							}
5083							}
5084
5085	16					208	my $currblock = q{};
5086	16					38	my $currnum = 0;
5087	16					37	my $currstart = 0;
5088	16					390	my @blockkeys = sort {$a<=>$b} keys %blocks;
	9974					9302
5089	16					158	for my $i (0 .. $#blockkeys)
5090							{
5091	1744					2090	my $key = $blockkeys[$i];
5092	1744					2201	$currblock .= $blocks{$key};
5093	1744					1675	$currnum++;
5094	1744	100	66			7842	if ($i == $#blockkeys \|\| $key+1 < $blockkeys[$i+1])
5095							{
5096	16					121	$self->{content} .= "$currstart $currnum\n$currblock";
5097	16	50				88	if ($i < $#blockkeys)
5098							{
5099	0					0	$currblock = q{};
5100	0					0	$currnum = 0;
5101	0					0	$currstart = $blockkeys[$i+1];
5102							}
5103							}
5104							}
5105
5106							# Append the new trailer
5107	16					176	$self->{trailer}->{Size} = CAM::PDF::Node->new('number', $self->{maxobj} + 1);
5108	16	50				104	if ($self->{startxref})
5109							{
5110	0					0	$self->{trailer}->{Prev} = CAM::PDF::Node->new('number', $self->{startxref});
5111							}
5112	16					78	$self->{content} .= "trailer\n" . $self->writeAny(CAM::PDF::Node->new('dictionary', $self->{trailer})) . "\n";
5113
5114							# Append the new startxref
5115	16					80	$self->{content} .= "startxref\n$startxref\n";
5116	16					40	$self->{startxref} = $startxref;
5117
5118							# Append EOF
5119	16					44	$self->{content} .= "%%EOF\n";
5120
5121	16					45	$self->{contentlength} = length $self->{content};
5122
5123	16					6918	return $self;
5124							}
5125
5126							=item $doc->cleansave()
5127
5128							Call the clean() function, then call the save() function.
5129
5130							=cut
5131
5132							sub cleansave
5133							{
5134	16			16	1	944946	my $self = shift;
5135
5136	16					82	$self->clean();
5137	16					79	return $self->save();
5138							}
5139
5140							=item $doc->output($filename)
5141
5142							=item $doc->output()
5143
5144							Save the document to a file. The save() function is called first to
5145							serialize the data structure. If no filename is specified, or if the
5146							filename is '-', the document is written to standard output.
5147
5148							Note: it is the responsibility of the application to ensure that the
5149							PDF document has either the Modify or Add permission. You can do this
5150							like the following:
5151
5152							if ($self->canModify()) {
5153							$self->output($outfile);
5154							} else {
5155							die "The PDF file denies permission to make modifications\n";
5156							}
5157
5158							=cut
5159
5160							sub output
5161							{
5162	0			0	1	0	my $self = shift;
5163	0					0	my $file = shift;
5164	0	0				0	if (!defined $file)
5165							{
5166	0					0	$file = q{-};
5167							}
5168
5169	0					0	$self->save();
5170
5171	0	0				0	if ($file eq q{-})
5172							{
5173	0					0	binmode STDOUT; ##no critic(RequireCheckedSysCalls)
5174	0					0	print $self->{content};
5175							}
5176							else
5177							{
5178	0	0				0	open my $fh, '>', $file or die "Failed to write file $file\n";
5179	0	0				0	binmode $fh or die "Failed to set binmode for file $file\n";
5180	0					0	print {$fh} $self->{content};
	0					0
5181	0	0				0	close $fh or die "Failed to write file $file\n";
5182							}
5183	0					0	return $self;
5184							}
5185
5186							=item $doc->cleanoutput($file)
5187
5188							=item $doc->cleanoutput()
5189
5190							Call the clean() function, then call the output() function to write a
5191							fresh copy of the document to a file.
5192
5193							=cut
5194
5195							sub cleanoutput
5196							{
5197	0			0	1	0	my $self = shift;
5198	0					0	my $file = shift;
5199
5200	0					0	$self->clean();
5201	0					0	return $self->output($file);
5202							}
5203
5204							=item $doc->writeObject($objnum)
5205
5206							Return the serialization of the specified object.
5207
5208							=cut
5209
5210							sub writeObject
5211							{
5212	900			900	1	1338	my $self = shift;
5213	900					1048	my $objnum = shift;
5214
5215	900					2793	return "$objnum 0 " . $self->writeAny($self->dereference($objnum));
5216							}
5217
5218							=item $doc->writeString($string)
5219
5220							Return the serialization of the specified string. Works on normal or
5221							hex strings. If encryption is desired, the string should be encrypted
5222							before being passed here.
5223
5224							=cut
5225
5226							sub writeString
5227							{
5228	177			177	1	33913	my $pkg_or_doc = shift;
5229	177					330	my $string = shift;
5230
5231							# Divide the string into manageable pieces, which will be
5232							# re-concatenated with "\" continuation characters at the end of
5233							# their lines
5234
5235							# -- This code used to do concatenation by juxtaposing multiple
5236							# -- "()" compenents, but this breaks many PDF
5237							# -- implementations (incl Acrobat5 and XPDF)
5238
5239							# Break the string into pieces of length $maxstr. Note that an
5240							# artifact of this usage of split returns empty strings between
5241							# the fragments, so grep them out
5242
5243	177	50				459	my $maxstr = (ref $pkg_or_doc) ? $pkg_or_doc->{maxstr} : $CAM::PDF::MAX_STRING;
5244	177					949	my @strs = grep {$_ ne q{}} split /(.{$maxstr}})/xms, $string;
	177					658
5245	177					395	for (@strs)
5246							{
5247	177					336	s/ \\ /\\\\/gxms; # escape escapes -- this line must come first!
5248	177					407	s/ ([()]) /\\$1/gxms; # escape parens
5249	177					253	s/ \n /\\n/gxms;
5250	177					254	s/ \r /\\r/gxms;
5251	177					238	s/ \t /\\t/gxms;
5252	177					498	s/ \f /\\f/gxms;
5253							# TODO: handle backspace char
5254							#s/ ??? /\\b/gxms;
5255							}
5256	177					931	return '(' . (join "\\\n", @strs) . ')';
5257							}
5258
5259							=item $doc->writeAny($node)
5260
5261							Returns the serialization of the specified node. This handles all
5262							Node types, including object Nodes.
5263
5264							=cut
5265
5266							sub writeAny
5267							{
5268	18676			18676	1	29674	my $self = shift;
5269	18676					21271	my $objnode = shift;
5270
5271	18676	50				41798	if (! ref $objnode)
5272							{
5273	0					0	die 'Not a ref';
5274							}
5275
5276	18676					34487	my $key = $objnode->{type};
5277	18676					30307	my $val = $objnode->{value};
5278	18676					29238	my $objnum = $objnode->{objnum};
5279	18676					31198	my $gennum = $objnode->{gennum};
5280
5281	18676	50	0			120118	return $key eq 'string' ? $self->writeString($self->{crypt}->encrypt($self, $val, $objnum, $gennum))
		100
		100
		100
		50
		100
		100
		100
		100
		100
5282							: $key eq 'hexstring' ? '<' . (unpack 'H*', $self->{crypt}->encrypt($self, $val, $objnum, $gennum)) . '>'
5283							: $key eq 'number' ? "$val"
5284							: $key eq 'reference' ? "$val 0 R" # TODO: lookup the gennum and use it instead of 0 (?)
5285							: $key eq 'boolean' ? $val
5286							: $key eq 'null' ? 'null'
5287							: $key eq 'label' ? "/$val"
5288							: $key eq 'array' ? $self->_writeArray($objnode)
5289							: $key eq 'dictionary' ? $self->_writeDictionary($objnode)
5290							: $key eq 'object' ? $self->_writeObject($objnode)
5291
5292							: die "Unknown key '$key' in writeAny (objnum ".($objnum\|\|'').")\n";
5293							}
5294
5295							sub _writeArray
5296							{
5297	432			432		511	my $self = shift;
5298	432					513	my $objnode = shift;
5299
5300	432					687	my $val = $objnode->{value};
5301	432	50				463	if (@{$val} == 0)
	432					1298
5302							{
5303	0					0	return '[ ]';
5304							}
5305	432					639	my $str = q{};
5306	432					547	my @strs;
5307	432					490	for (@{$val})
	432					1051
5308							{
5309	13064					34367	my $newstr = $self->writeAny($_);
5310	13064	100				34801	if ($str ne q{})
5311							{
5312	12632	100				27597	if ($self->{maxstr} < length $str . $newstr)
5313							{
5314	941					1790	push @strs, $str;
5315	941					1280	$str = q{};
5316							}
5317							else
5318							{
5319	11691					14179	$str .= q{ };
5320							}
5321							}
5322	13064					20363	$str .= $newstr;
5323							}
5324	432	100				1267	if (@strs > 0)
5325							{
5326	93					599	$str = join "\n", @strs, $str;
5327							}
5328	432					2140	return '[ ' . $str . ' ]';
5329							}
5330
5331							sub _writeDictionary
5332							{
5333	852			852		1237	my $self = shift;
5334	852					1133	my $objnode = shift;
5335
5336	852					1238	my $val = $objnode->{value};
5337	852					1278	my $str = q{};
5338	852					1080	my @strs;
5339	852	100				2345	if (exists $val->{Type})
5340							{
5341	419	50				1801	$str .= ($str ? q{ } : q{}) . '/Type ' . $self->writeAny($val->{Type});
5342							}
5343	852	100				2113	if (exists $val->{Subtype})
5344							{
5345	184	50				738	$str .= ($str ? q{ } : q{}) . '/Subtype ' . $self->writeAny($val->{Subtype});
5346							}
5347	852					1255	for my $dictkey (sort keys %{$val})
	852					4733
5348							{
5349	4121	100				8618	next if ($dictkey eq 'Type');
5350	3702	100				6802	next if ($dictkey eq 'Subtype');
5351	3518	100				6788	next if ($dictkey eq 'StreamDataDone');
5352	3317	100				6646	if ($dictkey eq 'StreamData')
5353							{
5354	201	50				554	if (exists $val->{StreamDataDone})
5355							{
5356	201					377	delete $val->{StreamDataDone};
5357	201					445	next;
5358							}
5359							# This is a stream way down deep in the data... Probably due to a solidifyObject
5360
5361							# First, try to handle the easy case:
5362	0	0	0			0	if (2 == scalar keys %{$val} && (exists $val->{Length} \|\| exists $val->{L}))
	0		0			0
5363							{
5364	0					0	my $binary = $val->{$dictkey}->{value};
5365	0					0	my $len = length $binary;
5366	0					0	my $unpacked = unpack 'H' . $len*2, $binary;
5367	0					0	return $self->writeAny(CAM::PDF::Node->new('hexstring', $unpacked, $objnode->{objnum}, $objnode->{gennum}));
5368							}
5369
5370							# TODO: Handle more complex streams ...
5371	0					0	die "This stream is too complex for me to write... Giving up\n";
5372
5373	0					0	next; ## no critic(ControlStructures::ProhibitUnreachableCode)
5374							}
5375
5376	3116					10098	my $newstr = "/$dictkey " . $self->writeAny($val->{$dictkey});
5377	3116	100				26266	if ($str ne q{})
5378							{
5379	2716	100				7356	if ($self->{maxstr} < length $str . $newstr)
5380							{
5381	767					1532	push @strs, $str;
5382	767					1149	$str = q{};
5383							}
5384							else
5385							{
5386	1949					2668	$str .= q{ };
5387							}
5388							}
5389	3116					6067	$str .= $newstr;
5390							}
5391	852	100				2700	if (@strs > 0)
5392							{
5393	385					1167	$str = join "\n", @strs, $str;
5394							}
5395	852					4323	return '<< ' . $str . ' >>';
5396							}
5397
5398							sub _writeObject
5399							{
5400	900			900		1299	my $self = shift;
5401	900					1148	my $objnode = shift;
5402
5403	900					1411	my $val = $objnode->{value};
5404	900	50				2323	if (! ref $val)
5405							{
5406	0					0	die "Obj data is not a ref! ($val)";
5407							}
5408	900					1047	my $stream;
5409	900	100	100			5846	if ($val->{type} eq 'dictionary' && exists $val->{value}->{StreamData})
5410							{
5411	201					942	$stream = $val->{value}->{StreamData}->{value};
5412	201					378	my $length = length $stream;
5413
5414	201		33			1065	my $l = $val->{value}->{Length} \|\| $val->{value}->{L};
5415	201					642	my $oldlength = $self->getValue($l);
5416	201	50				729	if ($length != $oldlength)
5417							{
5418	0					0	$val->{value}->{Length} = CAM::PDF::Node->new('number', $length, $objnode->{objnum}, $objnode->{gennum});
5419	0					0	delete $val->{value}->{L};
5420							}
5421	201					626	$val->{value}->{StreamDataDone} = 1;
5422							}
5423	900					2060	my $str = $self->writeAny($val);
5424	900	100				2219	if (defined $stream)
5425							{
5426	201					1124	$stream = $self->{crypt}->encrypt($self, $stream, $objnode->{objnum}, $objnode->{gennum});
5427	201					2443823	$str .= "\nstream\n" . $stream . 'endstream';
5428							}
5429	900					11877	return "obj\n$str\nendobj\n";
5430							}
5431
5432							######################################################################
5433
5434							=back
5435
5436							=head2 Document Traversing
5437
5438							=over
5439
5440							=item $doc->traverse($dereference, $node, $callbackfunc, $callbackdata)
5441
5442							Recursive traversal of a PDF data structure.
5443
5444							In many cases, it's useful to apply one action to every node in an
5445							object tree. The routines below all use this traverse() function.
5446							One of the most important parameters is the first: the C<$dereference>
5447							boolean. If true, the traversal follows reference Nodes. If false,
5448							it does not descend into reference Nodes.
5449
5450							Optionally, you can pass in a hashref as a final argument to reduce
5451							redundant traversing across multiple calls. Just pass in an empty
5452							hashref the first time and pass in the same hashref each time. See
5453							C for an example.
5454
5455							=cut
5456
5457							sub traverse
5458							{
5459	638			638	1	1118	my $self = shift;
5460	638					705	my $deref = shift;
5461	638					12950	my $startnode = shift;
5462	638					725	my $func = shift;
5463	638					659	my $funcdata = shift;
5464	638		100			2419	my $traversed = shift \|\| {};
5465
5466	638					1335	my @stack = ($startnode);
5467
5468	638					879	my $i = 0;
5469	638					2037	while ($i < @stack)
5470							{
5471	30413					51096	my $objnode = $stack[$i++];
5472	30413					50338	$self->$func($objnode, $funcdata);
5473
5474	30413					43814	my $type = $objnode->{type};
5475	30413					44184	my $val = $objnode->{value};
5476
5477	30413	100				60581	if ($type eq 'object')
5478							{
5479							# Shrink stack periodically
5480	1355					3350	splice @stack, 0, $i;
5481	1355					1656	$i = 0;
5482							# Mark object done
5483	1355	50				3497	if ($objnode->{objnum})
5484							{
5485	1355					3573	$traversed->{$objnode->{objnum}} = 1;
5486							}
5487							}
5488
5489	1147					9186	push @stack, $type eq 'dictionary' ? values %{$val}
	591					6412
5490	30413	100	100			158416	: $type eq 'array' ? @{$val}
		100
		100
		100
5491							: $type eq 'object' ? $val
5492							: $type eq 'reference'
5493							&& $deref
5494							&& !exists $traversed->{$val} ? $self->dereference($val)
5495							: ();
5496							}
5497	638					2672	return;
5498							}
5499
5500							# decodeObject and decodeAll differ from each other like this:
5501							#
5502							# decodeObject JUST decodes a single stream directly below the object
5503							# specified by the objnum
5504							#
5505							# decodeAll descends through a whole object tree (following
5506							# references) decoding everything it can find
5507
5508							=item $doc->decodeObject($objectnum)
5509
5510							I
5511
5512							Remove any filters (like compression, etc) from a data stream
5513							indicated by the object number.
5514
5515							=cut
5516
5517							sub decodeObject
5518							{
5519	0			0	1	0	my $self = shift;
5520	0					0	my $objnum = shift;
5521
5522	0					0	my $objnode = $self->dereference($objnum);
5523
5524	0					0	$self->decodeOne($objnode->{value}, 1);
5525	0					0	return;
5526							}
5527
5528							=item $doc->decodeAll($object)
5529
5530							I
5531
5532							Remove any filters from any data stream in this object or any object
5533							referenced by it.
5534
5535							=cut
5536
5537							sub decodeAll
5538							{
5539	0			0	1	0	my $self = shift;
5540	0					0	my $objnode = shift;
5541
5542	0					0	$self->traverse(1, $objnode, \&decodeOne, 1);
5543	0					0	return;
5544							}
5545
5546							=item $doc->decodeOne($object)
5547
5548							=item $doc->decodeOne($object, $save?)
5549
5550							I
5551
5552							Remove any filters from an object. The boolean flag C<$save> (defaults to
5553							false) indicates whether this removal should be permanent or just
5554							this once. If true, the function returns success or failure. If
5555							false, the function returns the defiltered content.
5556
5557							=cut
5558
5559							sub decodeOne
5560							{
5561	46			46	1	65	my $self = shift;
5562	46					65	my $objnode = shift;
5563	46		50			173	my $save = shift \|\| 0;
5564
5565	46					63	my $changed = 0;
5566	46					110	my $streamdata = q{};
5567
5568	46	50				133	if ($objnode->{type} ne 'dictionary')
5569							{
5570	0	0				0	return $save ? $changed : $streamdata;
5571							}
5572
5573	46					75	my $dict = $objnode->{value};
5574
5575	46					118	$streamdata = $dict->{StreamData}->{value};
5576							#warn 'decoding thing ' . ($dict->{StreamData}->{objnum} \|\| '(unknown)') . "\n";
5577
5578							# Don't work on {F} since that's too common a word
5579							#my $filtobj = $dict->{Filter} \|\| $dict->{F};
5580	46					72	my $filtobj = $dict->{Filter};
5581
5582	46	100				111	if (defined $filtobj)
5583							{
5584	41	50				157	my @filters = $filtobj->{type} eq 'array' ? @{$filtobj->{value}} : ($filtobj);
	0					0
5585	41		66			186	my $parmobj = $dict->{DecodeParms} \|\| $dict->{DP};
5586	41					130	my @parms;
5587	41	100				103	if ($parmobj)
5588							{
5589	4	50				20	@parms = $parmobj->{type} eq 'array' ? @{$parmobj->{value}} : ($parmobj);
	0					0
5590							}
5591
5592	41					89	for my $filter (@filters)
5593							{
5594	41	50				130	if ($filter->{type} ne 'label')
5595							{
5596	0					0	warn "All filter names must be labels\n";
5597	0					0	require Data::Dumper;
5598	0					0	warn Data::Dumper->Dump([$filter], ['Filter']);
5599	0					0	next;
5600							}
5601	41					88	my $filtername = $filter->{value};
5602
5603							# Make sure this is not an encrypt dict
5604	41	50				102	next if ($filtername eq 'Standard');
5605
5606	41					50	my $filt;
5607	41					62	eval {
5608	41					1956	require Text::PDF::Filter;
5609	41		33			103769	my $pkg = 'Text::PDF::' . ($filterabbrevs{$filtername} \|\| $filtername);
5610	41					252	$filt = $pkg->new;
5611	41					31414	1;
5612							};
5613	41	50				139	if (!$filt)
5614							{
5615	0					0	warn "Failed to open filter $filtername (Text::PDF::$filtername)\n";
5616	0					0	last;
5617							}
5618
5619	41					71	my $oldlength = length $streamdata;
5620
5621							{
5622							# Hack to turn off warnings in Filter library
5623	3			3		46	no warnings; ## no critic(TestingAndDebugging::ProhibitNoWarnings)
	3					7
	3					10743
	41					83
5624	41					219	$streamdata = $filt->infilt($streamdata, 1);
5625							}
5626
5627	41					3370	$self->fixDecode(\$streamdata, $filtername, shift @parms);
5628	41					91	my $length = length $streamdata;
5629
5630							#warn "decoded length: $oldlength -> $length\n";
5631
5632	41	50				873	if ($save)
5633							{
5634	0					0	my $objnum = $dict->{StreamData}->{objnum};
5635	0					0	my $gennum = $dict->{StreamData}->{gennum};
5636	0	0				0	if ($objnum)
5637							{
5638	0					0	$self->{changes}->{$objnum} = 1;
5639							}
5640	0					0	$changed = 1;
5641	0					0	$dict->{StreamData}->{value} = $streamdata;
5642	0	0				0	if ($length != $oldlength)
5643							{
5644	0					0	$dict->{Length} = CAM::PDF::Node->new('number', $length, $objnum, $gennum);
5645	0					0	delete $dict->{L};
5646							}
5647
5648							# These changes should happen later, but I prefer to do it
5649							# redundantly near the changes hash
5650	0					0	delete $dict->{Filter};
5651	0					0	delete $dict->{F};
5652	0					0	delete $dict->{DecodeParms};
5653	0					0	delete $dict->{DP};
5654							}
5655							}
5656							}
5657							#else { use Data::Dumper; print Dumper($dict); }
5658
5659	46	50				281	return $save ? $changed : $streamdata;
5660							}
5661
5662							=item $doc->fixDecode($streamdata, $filter, $params)
5663
5664							This is a utility method to do any tweaking after removing the filter
5665							from a data stream.
5666
5667							=cut
5668
5669							sub fixDecode
5670							{
5671	41			41	1	88	my $self = shift;
5672	41					68	my $streamdata = shift;
5673	41					69	my $filter = shift;
5674	41					72	my $parms = shift;
5675
5676	41	100				94	if (!$parms)
5677							{
5678	37					101	return;
5679							}
5680	4					14	my $d = $self->getValue($parms);
5681	4	50	33			37	if (!$d \|\| (ref $d) ne 'HASH')
5682							{
5683	0					0	die "DecodeParms must be a dictionary.\n";
5684							}
5685	4	0	33			24	if ($filter eq 'FlateDecode' \|\| $filter eq 'Fl' \|\|
			33
			0
5686							$filter eq 'LZWDecode' \|\| $filter eq 'LZW')
5687							{
5688	4	50				23	my $p = exists $d->{Predictor} ? $self->getValue($d->{Predictor}) : 1;
5689	4	50				26	if ($p == 2)
		50
5690							{
5691	0					0	$self->_fixDecodeTIFF($streamdata, $d);
5692							}
5693							elsif ($p >= 10)
5694							{
5695	4					21	$self->_fixDecodePNG($streamdata, $d);
5696							}
5697							# else no fix needed
5698							}
5699	4					9	return;
5700							}
5701
5702							sub _fixDecodeTIFF
5703							{
5704	0			0		0	my $self = shift;
5705	0					0	my $streamdata = shift;
5706	0					0	my $d = shift;
5707
5708	0					0	die 'The TIFF image predictor is not supported';
5709							}
5710
5711							sub _fixDecodePNG
5712							{
5713	4			4		8	my $self = shift;
5714	4					9	my $streamdata = shift;
5715	4					10	my $d = shift;
5716
5717							# PNG differencing algorithms http://www.w3.org/TR/PNG-Filters.html
5718	4	50				19	my $colors = exists $d->{Colors} ? $self->getValue($d->{Colors}) : 1;
5719	4	50				22	my $columns = exists $d->{Columns} ? $self->getValue($d->{Columns}) : 1;
5720	4	50				19	my $bpc = exists $d->{BitsPerComponent} ? $self->getValue($d->{BitsPerComponent}) : 8;
5721	4	50				18	if (0 != $bpc % 8) {
5722	0					0	die 'Color samples that are not multiples of 8 bits are not supported';
5723							}
5724	4					16	my $width = 1 + $colors * $columns * ($bpc >> 3); # size of a row in bytes, including the 1-byte predictor
5725	4					6	my $len = length ${$streamdata};
	4					11
5726	4	50				19	if (0 != $len % $width) {
5727	0					0	die 'The stream data is not evenly divisible into rows';
5728							}
5729	4					10	my $rows = $len / $width;
5730	4					9	my $newdata = q{};
5731	4					109	my $prev_row = [(0) x ($width - 1)];
5732	4					14	for my $irow (0 .. $rows - 1)
5733							{
5734	92					109	my ($row_pred, @row) = unpack 'C' . $width, substr ${$streamdata}, $irow * $width, $width;
	92					273
5735	92	50				235	if ($row_pred == 1) { ##no critic (IfElse)
		50
		0
		0
5736	0					0	for my $i (1 .. $width-2) {
5737	0					0	$row[$i] = ($row[$i-1] + $row[$i]) & 0xff;
5738							}
5739							} elsif ($row_pred == 2) {
5740	92					147	for my $i (0 .. $width-2) {
5741	368					584	$row[$i] = ($prev_row->[$i] + $row[$i]) & 0xff;
5742							}
5743							} elsif ($row_pred == 3) {
5744	0					0	$row[0] = (($prev_row->[0] >> 1) + $row[0]) & 0xff;
5745	0					0	for my $i (1 .. $width-2) {
5746	0					0	$row[$i] = ((($row[$i-1] + $prev_row->[$i]) >> 1) + $row[$i]) & 0xff;
5747							}
5748							} elsif ($row_pred == 4) {
5749							# Paeth reduces to up for first column
5750	0					0	$row[0] = ($prev_row->[0] + $row[0]) & 0xff;
5751	0					0	for my $i (1 .. $width-2) {
5752	0					0	my $a = $row[$i-1];
5753	0					0	my $b = $prev_row->[$i];
5754	0					0	my $c = $prev_row->[$i-1];
5755	0					0	my $p = $a + $b - $c;
5756	0					0	my $pa = abs $p - $a;
5757	0					0	my $pb = abs $p - $b;
5758	0					0	my $pc = abs $p - $c;
5759	0	0	0			0	my $paeth = $pa <= $pb && $pa <= $pc ? $a : $pb <= $pc ? $b : $c;
		0
5760	0					0	$row[$i] = ($paeth + $row[$i]) & 0xff;
5761							}
5762							}
5763	92					215	$newdata .= pack 'C*', @row;
5764	92					237	$prev_row = \@row;
5765							}
5766	4					9	${$streamdata} = $newdata;
	4					10
5767	4					21	return;
5768							}
5769
5770							=item $doc->encodeObject($objectnum, $filter)
5771
5772							Apply the specified filter to the object.
5773
5774							=cut
5775
5776							sub encodeObject
5777							{
5778	0			0	1	0	my $self = shift;
5779	0					0	my $objnum = shift;
5780	0					0	my $filtername = shift;
5781
5782	0					0	my $objnode = $self->dereference($objnum);
5783
5784	0					0	$self->encodeOne($objnode->{value}, $filtername);
5785	0					0	return;
5786							}
5787
5788							=item $doc->encodeOne($object, $filter)
5789
5790							Apply the specified filter to the object.
5791
5792							=cut
5793
5794							sub encodeOne ## no critic(Subroutines::ProhibitExcessComplexity)
5795							{
5796	4			4	1	9	my $self = shift;
5797	4					10	my $objnode = shift;
5798	4					10	my $filtername = shift;
5799
5800	4					8	my $changed = 0;
5801
5802	4	50				19	if ($objnode->{type} eq 'dictionary')
5803							{
5804	4					8	my $dict = $objnode->{value};
5805	4					12	my $objnum = $objnode->{objnum};
5806	4					8	my $gennum = $objnode->{gennum};
5807
5808	4	50				17	if (! exists $dict->{StreamData})
5809							{
5810							#warn "Object does not contain a Stream to encode\n";
5811	0					0	return 0;
5812							}
5813
5814	4	50	33			30	if ($filtername eq 'LZWDecode' \|\| $filtername eq 'LZW')
5815							{
5816	0					0	$filtername = 'FlateDecode';
5817	0					0	warn "LZWDecode filter not supported for encoding. Using $filtername instead\n";
5818							}
5819	4					9	my $filt = eval {
5820	4					35	require Text::PDF::Filter;
5821	4					12	my $pkg = "Text::PDF::$filtername";
5822	4					25	$pkg->new;
5823							};
5824	4	50				1631	if (!$filt)
5825							{
5826	0					0	warn "Failed to open filter $filtername (Text::PDF::$filtername)\n";
5827	0					0	return 0;
5828							}
5829
5830	4		33			21	my $l = $dict->{Length} \|\| $dict->{L};
5831	4					15	my $oldlength = $self->getValue($l);
5832	4					25	$dict->{StreamData}->{value} = $filt->outfilt($dict->{StreamData}->{value}, 1);
5833	4					1920	my $length = length $dict->{StreamData}->{value};
5834
5835	4	50	33			35	if (! defined $oldlength \|\| $length != $oldlength)
5836							{
5837	4	50	33			55	if (defined $l && $l->{type} eq 'reference')
		50	33
5838							{
5839	0					0	my $lenobj = $self->dereference($l->{value})->{value};
5840	0	0				0	if ($lenobj->{type} ne 'number')
5841							{
5842	0					0	die "Expected length to be a reference to an object containing a number while encoding\n";
5843							}
5844	0					0	$lenobj->{value} = $length;
5845							}
5846							elsif (!defined $l \|\| $l->{type} eq 'number')
5847							{
5848	4					21	$dict->{Length} = CAM::PDF::Node->new('number', $length, $objnum, $gennum);
5849	4					11	delete $dict->{L};
5850							}
5851							else
5852							{
5853	0					0	die "Unexpected type \"$l->{type}\" for Length while encoding.\n" .
5854							"(expected \"number\" or \"reference\")\n";
5855							}
5856							}
5857
5858							# Record the filter
5859	4					18	my $newfilt = CAM::PDF::Node->new('label', $filtername, $objnum, $gennum);
5860	4		33			33	my $f = $dict->{Filter} \|\| $dict->{F};
5861	4	50				14	if (!defined $f)
		0
		0
5862							{
5863	4					13	$dict->{Filter} = $newfilt;
5864	4					10	delete $dict->{F};
5865							}
5866							elsif ($f->{type} eq 'label')
5867							{
5868	0					0	$dict->{Filter} = CAM::PDF::Node->new('array', [
5869							$newfilt,
5870							$f,
5871							],
5872							$objnum, $gennum);
5873	0					0	delete $dict->{F};
5874							}
5875							elsif ($f->{type} eq 'array')
5876							{
5877	0					0	unshift @{$f->{value}}, $newfilt;
	0					0
5878							}
5879							else
5880							{
5881	0					0	die "Confused: Filter type is \"$f->{type}\", not the\n" .
5882							"expected \"array\" or \"label\"\n";
5883							}
5884
5885	4	50	33			34	if ($dict->{DecodeParms} \|\| $dict->{DP})
5886							{
5887	0					0	die "Insertion of DecodeParms not yet supported...\n";
5888							}
5889
5890	4	50				16	if ($objnum)
5891							{
5892	0					0	$self->{changes}->{$objnum} = 1;
5893							}
5894	4					80	$changed = 1;
5895							}
5896	4					21	return $changed;
5897							}
5898
5899
5900							=item $doc->setObjNum($object, $objectnum, $gennum)
5901
5902							Descend into an object and change all of the INTERNAL object number
5903							flags to a new number. This is just for consistency of internal
5904							accounting.
5905
5906							=cut
5907
5908							sub setObjNum
5909							{
5910	317			317	1	508	my $self = shift;
5911	317					497	my $objnode = shift;
5912	317					462	my $objnum = shift;
5913	317					400	my $gennum = shift;
5914
5915	317					1558	$self->traverse(0, $objnode, \&_setObjNumCB, [$objnum, $gennum]);
5916	317					743	return;
5917							}
5918
5919							# PRIVATE FUNCTION
5920
5921							sub _setObjNumCB
5922							{
5923	6334			6334		7549	my $self = shift;
5924	6334					10784	my $objnode = shift;
5925	6334					6726	my $nums = shift;
5926
5927	6334					19581	$objnode->{objnum} = $nums->[0];
5928	6334					8695	$objnode->{gennum} = $nums->[1];
5929	6334					8974	return;
5930							}
5931
5932							=item $doc->getRefList($object)
5933
5934							I
5935
5936							Return an array all of objects referred to in this object.
5937
5938							=cut
5939
5940							sub getRefList
5941							{
5942	20			20	1	49	my $self = shift;
5943	20					35	my $objnode = shift;
5944
5945	20					41	my $list = {};
5946	20					98	$self->traverse(1, $objnode, \&_getRefListCB, $list);
5947
5948	20					51	return (sort keys %{$list});
	20					853
5949							}
5950
5951							# PRIVATE FUNCTION
5952
5953							sub _getRefListCB
5954							{
5955	17875			17875		40564	my $self = shift;
5956	17875					18359	my $objnode = shift;
5957	17875					24007	my $list = shift;
5958
5959	17875	100				46457	if ($objnode->{type} eq 'reference')
5960							{
5961	856					2315	$list->{$objnode->{value}} = 1;
5962							}
5963	17875					24774	return;
5964							}
5965
5966							=item $doc->changeRefKeys($object, $hashref)
5967
5968							I
5969
5970							Renumber all references in an object.
5971
5972							=cut
5973
5974							sub changeRefKeys
5975							{
5976	297			297	1	413	my $self = shift;
5977	297					347	my $objnode = shift;
5978	297					352	my $newrefkeys = shift;
5979	297					337	my $traversed = shift; # optional
5980
5981	297		50			1151	my $follow = shift \|\| 0; # almost always false
5982
5983	297					925	$self->traverse($follow, $objnode, \&_changeRefKeysCB, $newrefkeys, $traversed);
5984	297					772	return;
5985							}
5986
5987							# PRIVATE FUNCTION
5988
5989							sub _changeRefKeysCB
5990							{
5991	6150			6150		6938	my $self = shift;
5992	6150					6420	my $objnode = shift;
5993	6150					6974	my $newrefkeys = shift;
5994
5995	6150	100				17165	if ($objnode->{type} eq 'reference')
5996							{
5997	357	100				1362	if (exists $newrefkeys->{$objnode->{value}})
5998							{
5999	339					736	$objnode->{value} = $newrefkeys->{$objnode->{value}};
6000							}
6001							}
6002	6150					9026	return;
6003							}
6004
6005							=item $doc->abbrevInlineImage($object)
6006
6007							Contract all image keywords to inline abbreviations.
6008
6009							=cut
6010
6011							sub abbrevInlineImage
6012							{
6013	1			1	1	3	my $self = shift;
6014	1					2	my $objnode = shift;
6015
6016	1					30	$self->traverse(0, $objnode, \&_abbrevInlineImageCB, {reverse %inlineabbrevs});
6017	1					6	return;
6018							}
6019
6020							=item $doc->unabbrevInlineImage($object)
6021
6022							Expand all inline image abbreviations.
6023
6024							=cut
6025
6026							sub unabbrevInlineImage
6027							{
6028	3			3	1	6	my $self = shift;
6029	3					4	my $objnode = shift;
6030
6031	3					23	$self->traverse(0, $objnode, \&_abbrevInlineImageCB, \%inlineabbrevs);
6032	3					6	return;
6033							}
6034
6035							# PRIVATE FUNCTION
6036
6037							sub _abbrevInlineImageCB
6038							{
6039	54			54		59	my $self = shift;
6040	54					240	my $objnode = shift;
6041	54					55	my $convert = shift;
6042
6043	54	100				148	if ($objnode->{type} eq 'label')
		100
6044							{
6045	12					30	my $new = $convert->{$objnode->{value}};
6046	12	50				28	if (defined $new)
6047							{
6048	12					21	$objnode->{value} = $new;
6049							}
6050							}
6051							elsif ($objnode->{type} eq 'dictionary')
6052							{
6053	8					13	my $dict = $objnode->{value};
6054	8					8	for my $key (keys %{$dict})
	8					31
6055							{
6056	34					64	my $new = $convert->{$key};
6057	34	100	66			194	if (defined $new && $new ne $key)
6058							{
6059	24					49	$dict->{$new} = $dict->{$key};
6060	24					48	delete $dict->{$key};
6061							}
6062							}
6063							}
6064	54					83	return;
6065							}
6066
6067							=item $doc->changeString($object, $hashref)
6068
6069							Alter all instances of a given string. The hashref is a dictionary of
6070							from-string and to-string. If the from-string looks like C
6071							then it is interpreted as a Perl regular expression and is eval'ed.
6072							Otherwise the search-and-replace is literal.
6073
6074							=cut
6075
6076							sub changeString
6077							{
6078	0			0	1	0	my $self = shift;
6079	0					0	my $objnode = shift;
6080	0					0	my $changelist = shift;
6081
6082	0					0	$self->traverse(0, $objnode, \&_changeStringCB, $changelist);
6083	0					0	return;
6084							}
6085
6086							# PRIVATE FUNCTION
6087
6088							sub _changeStringCB
6089							{
6090	0			0		0	my $self = shift;
6091	0					0	my $objnode = shift;
6092	0					0	my $changelist = shift;
6093
6094	0	0				0	if ($objnode->{type} eq 'string')
6095							{
6096	0					0	for my $key (keys %{$changelist})
	0					0
6097							{
6098	0	0				0	if ($key =~ m/ \A regex[(](.*)[)] \z /xms)
6099							{
6100	0					0	my $regex = $1;
6101	0					0	my $res;
6102	0					0	my $eval_result = eval {
6103	0					0	$res = ($objnode->{value} =~ s/ $regex /$changelist->{$key}/gxms);
6104	0					0	1;
6105							};
6106	0	0				0	if (!$eval_result)
6107							{
6108	0					0	die "Failed regex search/replace: $EVAL_ERROR\n";
6109							}
6110	0	0	0			0	if ($res && $objnode->{objnum})
6111							{
6112	0					0	$self->{changes}->{$objnode->{objnum}} = 1;
6113							}
6114							}
6115							else
6116							{
6117	0	0	0			0	if ($objnode->{value} =~ s/ $key /$changelist->{$key}/gxms && $objnode->{objnum})
6118							{
6119	0					0	$self->{changes}->{$objnode->{objnum}} = 1;
6120							}
6121							}
6122							}
6123							}
6124	0					0	return;
6125							}
6126
6127							######################################################################
6128
6129							=back
6130
6131							=head2 Utility functions
6132
6133							=over
6134
6135							=item $doc->rangeToArray($min, $max, $list...)
6136
6137							Converts string lists of numbers to an array. For example,
6138
6139							CAM::PDF->rangeToArray(1, 15, '1,3-5,12,9', '14-', '8 - 6, -2');
6140
6141							becomes
6142
6143							(1,3,4,5,12,9,14,15,8,7,6,1,2)
6144
6145							=cut
6146
6147							sub rangeToArray
6148							{
6149	18			18	1	547	my ($pkg_or_doc, $min, $max, @range_parts) = @_;
6150	18					43	my @in_array = grep {defined $_} @range_parts;
	27					81
6151
6152	18					44	for (@in_array) # modify in place
6153							{
6154	27					88	s/ [^\d\-,] //gxms; # clean
6155							}
6156							# split on numbers and ranges
6157	18					36	@in_array = map {m/ ([\d\-]+) /gxms} @in_array;
	27					160
6158
6159	18					40	my @out_array;
6160	18	100				55	if (@in_array == 0)
6161							{
6162	1					5	@out_array = $min .. $max;
6163							}
6164							else
6165							{
6166	17					34	for (@in_array)
6167							{
6168	34	100	100			234	if (m/ (\d)-(\d) /xms)
		100
6169							{
6170	13					28	my $aa = $1;
6171	13					23	my $bb = $2;
6172	13	100				32	if ($aa eq q{})
6173							{
6174	3					4	$aa = $min-1;
6175							}
6176	13	100				29	if ($bb eq q{})
6177							{
6178	3					6	$bb = $max+1;
6179							}
6180
6181							# Check if these are possible
6182	13	100	100			53	next if ($aa < $min && $bb < $min);
6183	11	100	66			44	next if ($aa > $max && $bb > $max);
6184
6185	9	100				19	if ($aa < $min)
6186							{
6187	2					3	$aa = $min;
6188							}
6189	9	50				21	if ($bb < $min)
6190							{
6191	0					0	$bb = $min;
6192							}
6193	9	50				19	if ($aa > $max)
6194							{
6195	0					0	$aa = $max;
6196							}
6197	9	100				20	if ($bb > $max)
6198							{
6199	2					3	$bb = $max;
6200							}
6201
6202	9	100				16	if ($aa > $bb)
6203							{
6204	2					8	push @out_array, reverse $bb .. $aa;
6205							}
6206							else
6207							{
6208	7					24	push @out_array, $aa .. $bb;
6209							}
6210							}
6211							elsif ($_ >= $min && $_ <= $max)
6212							{
6213	18					49	push @out_array, $_;
6214							}
6215							}
6216							}
6217	18					122	return @out_array;
6218							}
6219
6220							=item $doc->trimstr($string)
6221
6222							Used solely for debugging. Trims a string to a max of 40 characters,
6223							handling nulls and non-Unix line endings.
6224
6225							=cut
6226
6227							sub trimstr ## no critic (Unpack)
6228							{
6229	0			0	1	0	my $pkg_or_doc = shift;
6230	0					0	my $s = $_[0];
6231
6232	0					0	my $pos = pos $_[0];
6233	0		0			0	$pos \|\|= 0;
6234
6235	0	0	0			0	if (!defined $s \|\| $s eq q{})
		0
6236							{
6237	0					0	$s = '(empty)';
6238							}
6239							elsif (length $s > 40)
6240							{
6241	0					0	$s = (substr $s, $pos, 40) . '...';
6242							}
6243	0					0	$s =~ s/ \r /^M/gxms;
6244	0					0	return $pos . q{ } . $s . "\n";
6245							}
6246
6247							=item $doc->copyObject($node)
6248
6249							Clones a node via Data::Dumper and eval().
6250
6251							=cut
6252
6253							sub copyObject
6254							{
6255	290			290	1	437	my $self = shift;
6256	290					373	my $objnode = shift;
6257
6258							# replace $objnode with a copy of itself
6259	290					8820	require Data::Dumper;
6260	290					15226	my $d = Data::Dumper->new([$objnode],['objnode']);
6261	290					9541	$d->Purity(1)->Indent(0);
6262	290					4769	$objnode = eval $d->Dump(); ## no critic(ProhibitStringyEval)
6263	290					34501	return $objnode;
6264							}
6265
6266							=item $doc->cacheObjects()
6267
6268							Parses all object Nodes and stores them in the cache. This is useful
6269							for cases where you intend to do some global manipulation and want all
6270							of the data conveniently in RAM.
6271
6272							=cut
6273
6274							sub cacheObjects
6275							{
6276	28			28	1	56	my $self = shift;
6277
6278	28					57	for my $key (keys %{$self->{xref}})
	28					279
6279							{
6280	592	100				1430	if (!exists $self->{objcache}->{$key})
6281							{
6282	68					174	$self->{objcache}->{$key} = $self->dereference($key);
6283							}
6284							}
6285	28					144	return;
6286							}
6287
6288							=item $doc->asciify($string)
6289
6290							Helper class/instance method to massage a string, cleaning up some
6291							non-ASCII problems. This is a very incomplete list. Specifically:
6292
6293							=over
6294
6295							=item f-i ligatures
6296
6297							=item (R) symbol
6298
6299							=back
6300
6301							=cut
6302
6303							sub asciify
6304							{
6305	0			0	1		my $pkg_or_doc = shift;
6306	0						my $R_string = shift; # scalar reference
6307
6308							## Heuristics: fix up some odd text characters:
6309							# f-i ligature
6310	0						${$R_string} =~ s/ \223 /fi/gxms;
	0
6311							# Registered symbol
6312	0						${$R_string} =~ s/ \xae /(R)/gxms;
	0
6313	0						return $pkg_or_doc;
6314							}
6315
6316							1;
6317							__END__