line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package PDF::Tiny; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
1891
|
use 5.01; |
|
1
|
|
|
|
|
4
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
$VERSION = '0.07'; # Update the POD, too! |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# Fields |
8
|
|
|
|
|
|
|
sub vers () { 0 } |
9
|
|
|
|
|
|
|
sub fh () { 1 } |
10
|
|
|
|
|
|
|
sub trai () { 2 } # trailer |
11
|
|
|
|
|
|
|
sub id () { 3 } # original doc ID |
12
|
|
|
|
|
|
|
sub stxr () { 4 } # startxref, used for /Prev when appending |
13
|
|
|
|
|
|
|
sub file () { 5 } # file name |
14
|
|
|
|
|
|
|
sub size () { 6 } # object count + 1 |
15
|
|
|
|
|
|
|
sub free () { 7 } # array of free object ids |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Hash fields; must be consecutive |
18
|
|
|
|
|
|
|
sub xrft () { 8 } # xref table |
19
|
|
|
|
|
|
|
sub mods () { 9 } # modified objects |
20
|
|
|
|
|
|
|
sub objs () {10 } |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub impo () {12 } # imported objects |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub DEBUG () { 0 } |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub croak { |
27
|
0
|
|
|
0
|
0
|
0
|
die "$_[0] at " . join(' line ', (caller(DEBUG ? 0 : 1+$_[1]))[1,2]) |
28
|
|
|
|
|
|
|
. ".\n"; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$null = ['null']; |
32
|
|
|
|
|
|
|
|
33
|
1
|
|
|
1
|
|
6
|
use warnings; no warnings qw 'numeric uninitialized'; |
|
1
|
|
|
1
|
|
1
|
|
|
1
|
|
|
|
|
43
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
8594
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# REGEXPS FOR PARSING |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
$S = '[\0\t\cj\cl\cm ]'; # PDF whitespace chars |
38
|
|
|
|
|
|
|
$_S = '[\0\t\cl ]'; #PDF whitespace chars except line breaks |
39
|
|
|
|
|
|
|
$N = '(?:\cm\cj?|\cj)'; # PDF line break chars |
40
|
|
|
|
|
|
|
$D = '[\(\)<>\[\]\{\}\/]'; # PDF delimiter characters (except %); |
41
|
|
|
|
|
|
|
$R = '[^\0\t\cj\cl\cm \(\)<>\[\]\{\}\/%]'; # PDF regular characters |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# CONSTRUCTOR |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub new { |
47
|
16
|
|
|
16
|
0
|
2137
|
my $class = shift; |
48
|
16
|
|
|
|
|
23
|
my ($file, %opts); |
49
|
16
|
100
|
|
|
|
41
|
if (@_ == 1) { |
50
|
14
|
|
|
|
|
23
|
$file = shift; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
else { |
53
|
2
|
|
|
|
|
6
|
%opts = @_; |
54
|
2
|
|
|
|
|
4
|
$file = $opts{filename}; |
55
|
|
|
|
|
|
|
} |
56
|
16
|
|
|
|
|
24
|
my $self = []; |
57
|
16
|
|
|
|
|
37
|
$self->[file] = $file; |
58
|
16
|
|
|
|
|
83
|
$self->[$_] = {} for xrft..objs; # This is why they must be consecutive. |
59
|
16
|
|
|
|
|
22
|
$self->[free] = []; |
60
|
16
|
|
|
|
|
25
|
bless $self, $class; |
61
|
16
|
100
|
|
|
|
35
|
if (defined $file) { |
62
|
14
|
50
|
|
|
|
383
|
open my $fh, "<", $file or croak "Cannot open $file: $!"; |
63
|
14
|
|
|
|
|
37
|
binmode $self->[fh] = $fh; |
64
|
14
|
50
|
|
|
|
214
|
defined read $fh, my $read, 1024 or croak "Cannot read $file: $!"; |
65
|
14
|
50
|
|
|
|
95
|
if ($read !~ /%PDF-([0-9.]+)/) { |
66
|
0
|
|
|
|
|
0
|
croak "The file $file is not a PDF"; |
67
|
|
|
|
|
|
|
} |
68
|
14
|
|
|
|
|
38
|
$self->[vers] = $1; |
69
|
14
|
|
|
|
|
32
|
_parse_xref($self); |
70
|
14
|
|
|
|
|
25
|
$self->[size] = $self->[trai][1]{Size}[1]; |
71
|
14
|
100
|
|
|
|
39
|
if ($self->[trai][1]{ID}) { |
72
|
10
|
|
|
|
|
27
|
$self->[id] = $self->[trai][1]{ID}[1][0][1]; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
else { |
76
|
2
|
|
100
|
|
|
11
|
$self->[vers] = $opts{version} || 1.4; |
77
|
2
|
|
|
|
|
82
|
$self->[trai] = make_dict(my $trailer_hash = {}); |
78
|
2
|
50
|
|
|
|
7
|
if (!$opts{empty}) { |
79
|
2
|
|
|
|
|
43
|
$$trailer_hash{Root} = make_ref("1 0"); |
80
|
2
|
|
|
|
|
46
|
@{$self->[objs]}{"1 0","2 0"} = |
|
2
|
|
|
|
|
8
|
|
81
|
|
|
|
|
|
|
( make_dict({ |
82
|
|
|
|
|
|
|
Type => make_name("Catalog"), Pages => make_ref("2 0") |
83
|
|
|
|
|
|
|
}), |
84
|
|
|
|
|
|
|
make_dict({ |
85
|
|
|
|
|
|
|
Type => make_name("Pages"), |
86
|
|
|
|
|
|
|
Kids => make_array([]), |
87
|
|
|
|
|
|
|
Count => make_num(0) |
88
|
|
|
|
|
|
|
}) |
89
|
|
|
|
|
|
|
); |
90
|
2
|
|
|
|
|
6
|
$self->[size] = 3; |
91
|
|
|
|
|
|
|
} |
92
|
0
|
|
|
|
|
0
|
else { $self->[size] = 1; } |
93
|
|
|
|
|
|
|
} |
94
|
16
|
|
|
|
|
87
|
$self; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub _parse_xref { |
98
|
14
|
|
|
14
|
|
23
|
my($self) = shift; |
99
|
14
|
|
|
|
|
20
|
my $fh = $self->[fh]; |
100
|
14
|
100
|
|
|
|
57
|
seek $fh, -1024,2 or seek $fh, 0,0; |
101
|
14
|
50
|
|
|
|
102
|
read $fh, my $read, 1024 |
102
|
|
|
|
|
|
|
or croak "Cannot read $self->[file]: $!", 1; |
103
|
14
|
|
|
|
|
127
|
$read =~ /startxref$N(\d+)$N%%EOF$N?$/o; |
104
|
|
|
|
|
|
|
|
105
|
14
|
|
|
|
|
37
|
$self->[stxr] = my $startxref = $1; |
106
|
14
|
|
|
|
|
18
|
my $xref = $self->[xrft]; |
107
|
|
|
|
|
|
|
|
108
|
14
|
|
|
|
|
19
|
my $trailer; |
109
|
14
|
|
|
|
|
31
|
while(defined $startxref){ |
110
|
|
|
|
|
|
|
# read from the position indicated by $startxref, up to the word |
111
|
|
|
|
|
|
|
# "startxref" |
112
|
|
|
|
|
|
|
|
113
|
15
|
50
|
|
|
|
60
|
seek $fh, $startxref, 0 |
114
|
|
|
|
|
|
|
or croak "Cannot seek in $self->[file]: $!",1; |
115
|
15
|
50
|
|
|
|
143
|
read $fh, my $read, 1024, length $read |
116
|
|
|
|
|
|
|
or croak "Cannot read $self->[file]: $!", 1; |
117
|
15
|
100
|
|
|
|
344
|
if ($read =~ /^$S*[0-9]/o) { # cross-reference stream |
118
|
2
|
|
|
|
|
8
|
my $obj = _read_obj($self, $startxref); |
119
|
2
|
|
|
|
|
10
|
my $stream = $self->decode_stream($obj); |
120
|
2
|
|
|
|
|
4
|
$trailer = $$obj[1]; |
121
|
2
|
|
|
|
|
3
|
my $hash = $$trailer[1]; |
122
|
2
|
|
|
|
|
3
|
my @widths = map $$_[1], @{$$hash{W}[1]}; |
|
2
|
|
|
|
|
13
|
|
123
|
2
|
|
|
|
|
4
|
my $width = $widths[0] + $widths[1] + $widths[2]; |
124
|
2
|
|
|
|
|
9
|
my $unpack = join '', map "H".$_*2, @widths; |
125
|
|
|
|
|
|
|
my @indices = $$hash{Index} |
126
|
1
|
|
|
|
|
5
|
? map $$_[1], @{$$hash{Index}[1]} |
127
|
2
|
100
|
|
|
|
8
|
: (0, $$hash{Size}[1]); |
128
|
2
|
|
|
|
|
4
|
my ($ix, $last) = splice @indices, 0, 2; |
129
|
2
|
|
|
|
|
4
|
$last += $ix - 1; |
130
|
2
|
|
|
|
|
6
|
while (length $stream) { |
131
|
20
|
|
|
|
|
92
|
my($type,$where,$gen) |
132
|
|
|
|
|
|
|
= map hex, |
133
|
|
|
|
|
|
|
unpack $unpack, |
134
|
|
|
|
|
|
|
substr $stream, 0, |
135
|
|
|
|
|
|
|
$width, ''; |
136
|
20
|
50
|
|
|
|
39
|
$widths[0] or $type = 1; |
137
|
|
|
|
|
|
|
|
138
|
20
|
100
|
|
|
|
35
|
if ($type == 1) { |
|
|
100
|
|
|
|
|
|
139
|
12
|
|
|
|
|
20
|
my $obj_ref = "$ix $gen"; |
140
|
|
|
|
|
|
|
!exists $$xref{$obj_ref} |
141
|
12
|
50
|
|
|
|
34
|
and $$xref{$obj_ref} = $where; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
elsif ($type == 2) { |
144
|
7
|
|
|
|
|
9
|
my $obj_ref = "$ix 0"; |
145
|
|
|
|
|
|
|
!exists $$xref{$obj_ref} |
146
|
7
|
50
|
|
|
|
28
|
and $$xref{$obj_ref} = |
147
|
|
|
|
|
|
|
["$where 0", $gen]; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
else { # free |
150
|
1
|
50
|
33
|
|
|
6
|
push @{$self->[free]}, "$ix $gen" |
|
0
|
|
|
|
|
0
|
|
151
|
|
|
|
|
|
|
if $ix && $gen != 65535 |
152
|
|
|
|
|
|
|
} |
153
|
20
|
50
|
|
|
|
80
|
if ($ix++ > $last) { |
154
|
0
|
|
|
|
|
0
|
($ix, $last) = splice @indices,0,2; |
155
|
0
|
|
|
|
|
0
|
$last += $ix - 1; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
else { |
160
|
13
|
|
|
|
|
56
|
while($read !~ /startxref/){ |
161
|
0
|
0
|
|
|
|
0
|
read $fh, $read, 1024, length $read |
162
|
|
|
|
|
|
|
or croak "Cannot read $self->[file]: $!", 1; |
163
|
|
|
|
|
|
|
} |
164
|
13
|
|
|
|
|
63
|
$read =~ /xref(.*?)trailer(.*)/s; |
165
|
13
|
|
|
|
|
40
|
my $xreftext =$1; |
166
|
|
|
|
|
|
|
|
167
|
13
|
|
|
|
|
90
|
$trailer = parse_string("$2",qr/^startxref\z/); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# remove initial line, and read the numbers, |
170
|
|
|
|
|
|
|
# repeating as necessary |
171
|
|
|
|
|
|
|
|
172
|
13
|
|
|
|
|
146
|
while ($xreftext=~ s/^$N?(\d+) (\d+).*?$N//o) { |
173
|
13
|
|
|
|
|
67
|
for ($1..$1+$2-1) { |
174
|
|
|
|
|
|
|
#$xreftext =~ s/(.{20})//s; # get 20 bytes |
175
|
248
|
|
|
|
|
255
|
my $_1 = substr($xreftext,0,20,''); |
176
|
248
|
|
|
|
|
342
|
my $obj_ref = "$_ " . substr($_1,11,5)*1; |
177
|
248
|
100
|
|
|
|
302
|
if (substr ($_1, 17,1) eq 'n') { |
178
|
|
|
|
|
|
|
!exists $$xref{$obj_ref} |
179
|
218
|
50
|
|
|
|
622
|
and $$xref{$obj_ref} = |
180
|
|
|
|
|
|
|
substr($_1,0,10); |
181
|
|
|
|
|
|
|
# (See PDF Reference [5th ed.], p. 70.) |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
else { # free |
184
|
30
|
100
|
|
|
|
77
|
push @{$self->[free]}, $obj_ref |
|
17
|
|
|
|
|
27
|
|
185
|
|
|
|
|
|
|
unless substr($_1,11,5) == 65535 |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
15
|
100
|
|
|
|
40
|
unless ($self->[trai]) { |
191
|
14
|
|
|
|
|
19
|
$self->[trai] = $trailer; |
192
|
|
|
|
|
|
|
exists $$trailer[1]{Encrypt} |
193
|
14
|
50
|
|
|
|
30
|
and croak "$self->[file] is encrypted", 1; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
15
|
|
|
|
|
20
|
$trailer = $$trailer[1]; |
197
|
15
|
100
|
|
|
|
75
|
$startxref = defined $$trailer{Prev} ? $$trailer{Prev}[1] : undef; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# HIGH-LEVEL METHODS |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub page_count { |
205
|
1
|
|
|
1
|
0
|
10
|
$_[0]->get_obj("/Root", "/Pages", "/Count")->[1] |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub _walk_pages { |
209
|
11
|
|
|
11
|
|
13
|
my $self = shift; |
210
|
11
|
|
50
|
|
|
35
|
my $pages = shift || $self->get_obj("/Root", "/Pages") |
211
|
|
|
|
|
|
|
|| return wantarray ? () : 0; |
212
|
11
|
|
|
|
|
11
|
my @pages; # output |
213
|
11
|
|
|
|
|
21
|
my $kids = $self->get_obj($pages, "/Kids"); |
214
|
11
|
50
|
|
|
|
24
|
if ($self->get_obj($pages, "/Count")->[1] == @{$$kids[1]}) { |
|
11
|
|
|
|
|
28
|
|
215
|
11
|
|
|
|
|
12
|
return @{$$kids[1]} |
|
11
|
|
|
|
|
35
|
|
216
|
|
|
|
|
|
|
} |
217
|
0
|
|
|
|
|
0
|
my $kid; |
218
|
0
|
|
|
|
|
0
|
for (0 .. $#{$$kids[1]}){ |
|
0
|
|
|
|
|
0
|
|
219
|
0
|
|
|
|
|
0
|
$kid = $$kids[1][$_]; |
220
|
0
|
0
|
|
|
|
0
|
push @pages, ${$self->get_obj($kid, '/Type')}[1] eq 'Pages' |
|
0
|
|
|
|
|
0
|
|
221
|
|
|
|
|
|
|
? _walk_pages($self, $kid) |
222
|
|
|
|
|
|
|
: $kid; |
223
|
|
|
|
|
|
|
} |
224
|
0
|
|
|
|
|
0
|
return @pages; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub delete_page { |
228
|
0
|
|
|
0
|
0
|
0
|
my ($self, $num,) = @'_; |
229
|
0
|
|
|
|
|
0
|
my $root = $self->get_obj("/Root"); |
230
|
0
|
|
|
|
|
0
|
my $pages_id = $$root[1]{Pages}[1]; |
231
|
0
|
|
|
|
|
0
|
my $pages = $self->get_obj($pages_id); |
232
|
0
|
|
|
|
|
0
|
my $pages_array = $self->get_obj($pages, '/Kids'); |
233
|
0
|
|
|
|
|
0
|
my $count = $self->get_obj($pages, "/Count"); |
234
|
0
|
0
|
|
|
|
0
|
if (@{$pages_array->[1]} != $count->[1]) { |
|
0
|
|
|
|
|
0
|
|
235
|
|
|
|
|
|
|
# Flatten the pages array. Other structures just require too much code. |
236
|
0
|
|
|
|
|
0
|
_flatten_pages($self, $pages_id, $pages, $pages_array); |
237
|
|
|
|
|
|
|
} |
238
|
0
|
|
|
|
|
0
|
splice @{$pages_array->[1]}, $num, 1; |
|
0
|
|
|
|
|
0
|
|
239
|
0
|
|
|
|
|
0
|
$count->[1]--; |
240
|
|
|
|
|
|
|
_: |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub import_page { |
244
|
4
|
|
|
4
|
0
|
27
|
my ($self, $source_pdf, $num, $whither) = @'_; |
245
|
4
|
|
|
|
|
11
|
my @pages = _walk_pages($source_pdf); |
246
|
4
|
|
33
|
|
|
23
|
my $page_to_import = |
247
|
|
|
|
|
|
|
$source_pdf->get_obj(($pages[$num] || croak "No such page: $num")->[1]); |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# We cannot simply use import_obj. import_obj will follow the /Parent |
250
|
|
|
|
|
|
|
# link and import the entire page tree from the source PDF. |
251
|
|
|
|
|
|
|
# Furthermore, if the values of /Resources, /MediaBox and /CropBox are |
252
|
|
|
|
|
|
|
# inherited from the parent node that we are not importing, they need to |
253
|
|
|
|
|
|
|
# be transferred to the page object itself. |
254
|
4
|
|
|
|
|
11
|
my $temp_copy = [@$page_to_import]; |
255
|
4
|
|
|
|
|
5
|
$temp_copy->[1] = {%{ $temp_copy->[1] }}; |
|
4
|
|
|
|
|
20
|
|
256
|
4
|
|
|
|
|
10
|
my $node = $temp_copy; |
257
|
4
|
|
33
|
|
|
45
|
while (!$temp_copy->[1]{Resources} || !$temp_copy->[1]{MediaBox} |
|
|
|
33
|
|
|
|
|
258
|
|
|
|
|
|
|
|| !$temp_copy->[1]{CropBox} and $node->[1]{Parent}) { |
259
|
4
|
|
|
|
|
14
|
$node = $source_pdf->get_obj($node, '/Parent'); |
260
|
|
|
|
|
|
|
$node->[1]{$_} and !$temp_copy->[1]{$_} |
261
|
|
|
|
|
|
|
and $temp_copy->[1]{$_} = $node->[1]{$_} |
262
|
4
|
|
66
|
|
|
60
|
for qw< Resources MediaBox CropBox >; |
|
|
|
50
|
|
|
|
|
263
|
|
|
|
|
|
|
} |
264
|
4
|
|
|
|
|
8
|
delete $temp_copy->[1]{Parent}; |
265
|
4
|
|
|
|
|
16
|
my $page_id = |
266
|
|
|
|
|
|
|
$self->add_obj(my $real_copy=$self->import_obj($source_pdf, $temp_copy)); |
267
|
|
|
|
|
|
|
|
268
|
4
|
|
|
|
|
13
|
my $root = $self->get_obj("/Root"); |
269
|
4
|
|
|
|
|
9
|
my $pages_id = $$root[1]{Pages}[1]; |
270
|
4
|
|
|
|
|
10
|
$real_copy->[1]{Parent} = ['ref',$pages_id]; |
271
|
4
|
|
|
|
|
9
|
my $pages = $self->get_obj($pages_id); |
272
|
4
|
|
|
|
|
8
|
my $pages_array = $self->get_obj($pages, '/Kids'); |
273
|
4
|
|
|
|
|
9
|
my $count = $self->get_obj($pages, "/Count"); |
274
|
4
|
50
|
|
|
|
6
|
if (@{$pages_array->[1]} != $count->[1]) { |
|
4
|
|
|
|
|
13
|
|
275
|
|
|
|
|
|
|
# Flatten the pages array. Other structures just require too much code. |
276
|
0
|
|
|
|
|
0
|
_flatten_pages($self, $pages_id, $pages, $pages_array); |
277
|
|
|
|
|
|
|
} |
278
|
4
|
|
66
|
|
|
5
|
splice @{$pages_array->[1]}, $whither//@{$pages_array->[1]}, 0, |
|
4
|
|
|
|
|
19
|
|
|
4
|
|
|
|
|
17
|
|
279
|
|
|
|
|
|
|
['ref',$page_id]; |
280
|
4
|
|
|
|
|
6
|
$count->[1]++; |
281
|
|
|
|
|
|
|
_: |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
sub _flatten_pages { |
284
|
0
|
|
|
0
|
|
0
|
my ($self, $pages_id, $pages, $pages_array) = @ '_; |
285
|
0
|
|
|
|
|
0
|
my @pages = _walk_pages($self, $pages); |
286
|
0
|
|
|
|
|
0
|
for (@pages) { |
287
|
0
|
|
|
|
|
0
|
my $page = $self->get_obj($_); |
288
|
0
|
0
|
|
|
|
0
|
next if $page->[1]{Parent}[1] eq $pages_id; |
289
|
0
|
|
|
|
|
0
|
my $node = $page; |
290
|
0
|
|
0
|
|
|
0
|
while (!$page->[1]{Resources} || !$page->[1]{MediaBox} |
|
|
|
0
|
|
|
|
|
291
|
|
|
|
|
|
|
|| !$page->[1]{CropBox} and $node->[1]{Parent}[1] ne $pages_id) { |
292
|
0
|
|
|
|
|
0
|
$node = $self->get_obj($node, '/Parent'); |
293
|
|
|
|
|
|
|
$node->[1]{$_} and $page->[1]{$_} = $node->[1]{$_} |
294
|
0
|
|
0
|
|
|
0
|
for qw< Resources MediaBox CropBox >; |
295
|
|
|
|
|
|
|
} |
296
|
0
|
|
|
|
|
0
|
$page->[1]{Parent}[1] = $pages_id; |
297
|
|
|
|
|
|
|
} |
298
|
0
|
|
|
|
|
0
|
$pages_array->[1] = \@pages; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
sub append { |
302
|
3
|
|
|
3
|
0
|
16
|
my $self = shift; |
303
|
3
|
50
|
|
|
|
6
|
if (!defined $self->[file]) { |
304
|
0
|
|
|
|
|
0
|
croak "No file to write to!" |
305
|
|
|
|
|
|
|
} |
306
|
3
|
50
|
|
|
|
4
|
if (!%{$self->[mods]}) { |
|
3
|
|
|
|
|
14
|
|
307
|
0
|
|
|
|
|
0
|
return; |
308
|
|
|
|
|
|
|
} |
309
|
3
|
50
|
|
|
|
6
|
if ($self->[trai][1]{Type}) { |
310
|
0
|
|
|
|
|
0
|
croak "Cannot append to files with cross-reference streams"; |
311
|
|
|
|
|
|
|
} |
312
|
3
|
50
|
|
|
|
73
|
open my $fh, ">>", $self->[file] |
313
|
|
|
|
|
|
|
or croak "Cannot open $self->[file] for writing: $!"; |
314
|
3
|
|
|
|
|
4
|
binmode $fh; |
315
|
3
|
|
|
|
|
8
|
local ($\,$,); |
316
|
3
|
|
|
|
|
27
|
print $fh "\n"; # The existing %%EOF might not have \n after it |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# Update the doc ID now. If it already exists, it might be an indirect |
319
|
|
|
|
|
|
|
# object, in which case changes to it must included in the objects that we |
320
|
|
|
|
|
|
|
# append to the file before we reach the trailer. |
321
|
3
|
|
|
|
|
7
|
my $id_array = $self->vivify_obj('array',"/ID"); |
322
|
3
|
50
|
33
|
|
|
4
|
if (@{$$id_array[1]} == 2 |
|
3
|
|
33
|
|
|
13
|
|
323
|
|
|
|
|
|
|
and $self->vivify_obj('str', $id_array, 0)->[1] ne $self->[id] |
324
|
|
|
|
|
|
|
|| $self->vivify_obj('str', $id_array, 1)->[1] ne $self->[id]) { |
325
|
|
|
|
|
|
|
# User has assigned his own id. Leave it alone. |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
else { |
328
|
3
|
|
33
|
|
|
4
|
$self->vivify_obj('str', $id_array, 0)->[1] |
329
|
|
|
|
|
|
|
||= time."" ^ "".rand ^ "".(0+$self); |
330
|
3
|
|
|
|
|
5
|
$self->vivify_obj('str', $id_array, 1)->[1] |
331
|
|
|
|
|
|
|
^= time."" ^ "".rand ^ "".(0+$self); |
332
|
3
|
|
|
|
|
5
|
@{$$id_array[1]} = @{$$id_array[1]}[0,1]; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
7
|
|
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
3
|
|
|
|
|
4
|
my %offsets; |
336
|
3
|
|
|
|
|
4
|
my @ids = grep $self->[objs]{$_}, sort {$a<=>$b} keys %{$self->[mods]}; |
|
2
|
|
|
|
|
14
|
|
|
3
|
|
|
|
|
20
|
|
337
|
3
|
|
|
|
|
6
|
for (@ids) { |
338
|
4
|
|
|
|
|
5
|
my $obj = $self->[objs]{$_}; |
339
|
4
|
|
|
|
|
58
|
$offsets{$_} = tell $fh; |
340
|
|
|
|
|
|
|
|
341
|
4
|
100
|
|
|
|
8
|
if ($$obj[0] eq 'stream') { |
342
|
1
|
|
|
|
|
3
|
print $fh join_tokens( |
343
|
|
|
|
|
|
|
$_,'obj', |
344
|
|
|
|
|
|
|
_serialize($obj) |
345
|
|
|
|
|
|
|
), $$obj[2], "\nendstream endobj\n" |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
else { |
348
|
3
|
|
|
|
|
7
|
print $fh join_tokens( |
349
|
|
|
|
|
|
|
$_,'obj', |
350
|
|
|
|
|
|
|
_serialize($obj), |
351
|
|
|
|
|
|
|
"endobj" |
352
|
|
|
|
|
|
|
), "\n"; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
} |
355
|
3
|
|
|
|
|
36
|
my $startxref = tell $fh; |
356
|
3
|
|
|
|
|
6
|
print $fh "xref\n"; |
357
|
|
|
|
|
|
|
# Divide the ids into chunks of consecutive numbers |
358
|
3
|
|
|
|
|
10
|
my @chunks = ['0 65535']; |
359
|
3
|
|
|
|
|
8
|
$offsets{'0 65535'} = $self->[free][0]; |
360
|
3
|
|
|
|
|
5
|
for (@ids) { |
361
|
4
|
100
|
|
|
|
17
|
if ($chunks[-1][-1] + 1 != $_) { |
362
|
3
|
|
|
|
|
3
|
push @chunks, []; |
363
|
|
|
|
|
|
|
} |
364
|
4
|
|
|
|
|
3
|
push @{$chunks[-1]}, $_ |
|
4
|
|
|
|
|
12
|
|
365
|
|
|
|
|
|
|
} |
366
|
3
|
|
|
|
|
4
|
for (@chunks) { |
367
|
6
|
|
|
|
|
20
|
printf $fh "%d %s\n", $$_[0], scalar @$_; |
368
|
|
|
|
|
|
|
printf $fh "%010d %05d %s \n", |
369
|
|
|
|
|
|
|
$offsets{$_}, /\ (\d+)/, $_ == 0 ? "f" : "n" |
370
|
6
|
100
|
|
|
|
48
|
for @$_; |
371
|
|
|
|
|
|
|
} |
372
|
3
|
|
|
|
|
4
|
my $trailerhash = $self->[trai]->[1]; |
373
|
3
|
|
|
|
|
7
|
$trailerhash->{Prev} = ['num', $self->[stxr]]; |
374
|
3
|
|
|
|
|
4
|
$trailerhash->{Size} = ['num', $self->[size]]; |
375
|
3
|
|
|
|
|
8
|
print $fh join_tokens(trailer=>serialize($self->[trai])), |
376
|
|
|
|
|
|
|
"\nstartxref\n$startxref\n%%EOF\n"; |
377
|
3
|
50
|
|
|
|
74
|
close $fh or croak "Cannot close $self->[file]: $!"; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub print { |
381
|
4
|
|
|
4
|
0
|
1374
|
my $self = shift; |
382
|
4
|
|
|
|
|
15
|
my %args = @_; |
383
|
4
|
|
66
|
|
|
23
|
$args{fh} // $args{filename} // croak "No file to write to!"; |
|
|
|
50
|
|
|
|
|
384
|
4
|
|
|
|
|
9
|
my $fh; |
385
|
4
|
100
|
|
|
|
11
|
if ($args{filename}) { |
386
|
|
|
|
|
|
|
open $fh, ">", $args{filename} |
387
|
2
|
50
|
|
|
|
194
|
or croak "Cannot open $args{filename} for writing: $!"; |
388
|
|
|
|
|
|
|
} |
389
|
2
|
|
|
|
|
4
|
else { $fh = $args{fh} } |
390
|
4
|
|
|
|
|
10
|
binmode $fh; |
391
|
4
|
|
|
|
|
17
|
local ($\,$,); |
392
|
4
|
|
|
|
|
17
|
my $pos = length(my $buf = "%PDF-$self->[vers]\n%\xff\xff\xff\xff\n"); |
393
|
4
|
|
|
|
|
15
|
print $fh $buf; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# Generate the doc ID now. If it already exists, it might be an indirect |
396
|
|
|
|
|
|
|
# object, in which case changes to it must included in the objects that we |
397
|
|
|
|
|
|
|
# append to the file before we reach the trailer. |
398
|
4
|
|
|
|
|
14
|
my $id_array = $self->vivify_obj('array',"/ID"); |
399
|
4
|
100
|
66
|
|
|
5
|
if (@{$$id_array[1]} == 2 |
|
4
|
|
|
|
|
23
|
|
400
|
|
|
|
|
|
|
and $self->vivify_obj('str', $id_array, 0)->[1] ne $self->[id]) { |
401
|
|
|
|
|
|
|
# User has assigned his own id. Leave it alone. |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
else { |
404
|
1
|
|
|
|
|
20
|
@{$$id_array[1]} = (['str', time."" ^ "".rand ^ "".(0+$self)])x2; |
|
1
|
|
|
|
|
4
|
|
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# We assume that if this points to a cross-reference stream’s dictionary |
408
|
|
|
|
|
|
|
# then we will not be emitting that cross-reference stream. |
409
|
4
|
|
|
|
|
9
|
delete @{ $self->[trai][1] }{qw< XRefStm Length Filter DecodeParms F |
|
4
|
|
|
|
|
24
|
|
410
|
|
|
|
|
|
|
FFilter FDecodeParms DL Type Size Index |
411
|
|
|
|
|
|
|
Prev W >}; |
412
|
|
|
|
|
|
|
|
413
|
4
|
|
|
|
|
14
|
my @trailer = _serialize($self->[trai]); |
414
|
4
|
|
|
|
|
11
|
my %seen; |
415
|
|
|
|
|
|
|
my @ids; |
416
|
4
|
|
|
|
|
12
|
for (2..$#trailer) { |
417
|
48
|
100
|
|
|
|
97
|
next unless $trailer[$_] eq 'R'; |
418
|
7
|
|
|
|
|
38
|
my $id = sprintf '%d %d',@trailer[$_-2,$_-1]; |
419
|
7
|
50
|
|
|
|
22
|
next if $seen{$id}++; |
420
|
7
|
|
|
|
|
14
|
push @ids, $id; |
421
|
|
|
|
|
|
|
} |
422
|
4
|
|
|
|
|
5
|
my %offsets; |
423
|
4
|
|
|
|
|
9
|
while (@ids) { |
424
|
50
|
|
|
|
|
65
|
my $id = shift @ids; |
425
|
50
|
|
|
|
|
83
|
my $del = !$self->[objs]{$id}; |
426
|
50
|
50
|
|
|
|
94
|
my $obj = $self->get_obj($id) or next; |
427
|
|
|
|
|
|
|
my @tokens = (my $flat = $obj->[0] eq 'flat') |
428
|
|
|
|
|
|
|
? tokenize($obj->[1],qr/^(?:endobj|stream)\z/) |
429
|
50
|
50
|
|
|
|
166
|
: $obj->[0] eq 'tokens' ? @{$obj->[1]} : _serialize($obj); |
|
0
|
50
|
|
|
|
0
|
|
430
|
50
|
|
|
|
|
128
|
for (2..$#tokens) { |
431
|
501
|
100
|
|
|
|
797
|
next unless $tokens[$_] eq 'R'; |
432
|
54
|
|
|
|
|
186
|
my $id = sprintf '%d %d',@tokens[$_-2,$_-1]; |
433
|
54
|
100
|
|
|
|
131
|
next if $seen{$id}++; |
434
|
43
|
|
|
|
|
71
|
push @ids, $id; |
435
|
|
|
|
|
|
|
} |
436
|
50
|
|
|
|
|
117
|
$offsets{$id} = $pos; |
437
|
50
|
100
|
|
|
|
98
|
if ($$obj[0] eq 'stream') { |
438
|
8
|
|
|
|
|
19
|
$pos += length($buf = join_tokens( |
439
|
|
|
|
|
|
|
$id,'obj', |
440
|
|
|
|
|
|
|
@tokens |
441
|
|
|
|
|
|
|
) . $$obj[2] . "\nendstream endobj\n" |
442
|
|
|
|
|
|
|
); |
443
|
8
|
|
|
|
|
57
|
print $fh $buf; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
else { |
446
|
42
|
|
|
|
|
94
|
$pos += length ($buf = join_tokens( |
447
|
|
|
|
|
|
|
$id,'obj', |
448
|
|
|
|
|
|
|
@tokens, |
449
|
|
|
|
|
|
|
"endobj" |
450
|
|
|
|
|
|
|
) . "\n" |
451
|
|
|
|
|
|
|
); |
452
|
42
|
|
|
|
|
197
|
print $fh $buf; |
453
|
|
|
|
|
|
|
} |
454
|
50
|
100
|
|
|
|
308
|
delete $self->[objs]{$id} if $del; # Avoid reading the whole file into |
455
|
|
|
|
|
|
|
} # memory at once. |
456
|
4
|
|
|
|
|
39
|
for (sort {$a<=>$b} keys %offsets) { |
|
149
|
|
|
|
|
153
|
|
457
|
50
|
|
|
|
|
65
|
$ids[$_] = $_; |
458
|
|
|
|
|
|
|
} |
459
|
4
|
|
|
|
|
14
|
my @free = $ids[0] = '0 65535'; |
460
|
4
|
|
|
|
|
13
|
for (1..$#ids-1) { |
461
|
58
|
100
|
|
|
|
102
|
next if $ids[$_]; |
462
|
12
|
|
|
|
|
29
|
push @free, $ids[$_] = "$_ 0"; |
463
|
|
|
|
|
|
|
} |
464
|
4
|
|
|
|
|
7
|
my %next_free; |
465
|
4
|
|
|
|
|
14
|
for (0..$#free) { |
466
|
16
|
|
|
|
|
52
|
$next_free{$free[$_]} = 0+$free[$_+1]; |
467
|
|
|
|
|
|
|
} |
468
|
4
|
|
|
|
|
11
|
my $startxref = $pos; |
469
|
4
|
|
|
|
|
26
|
printf $fh "xref\n0 %d\n", scalar @ids; |
470
|
4
|
|
|
|
|
12
|
for (@ids) { |
471
|
|
|
|
|
|
|
printf $fh "%010d %05d %s \n", |
472
|
|
|
|
|
|
|
exists $next_free{$_} |
473
|
|
|
|
|
|
|
? ($next_free{$_}, /\ (\d+)/, "f") |
474
|
66
|
100
|
|
|
|
408
|
: ($offsets {$_}, /\ (\d+)/, "n") |
475
|
|
|
|
|
|
|
} |
476
|
4
|
|
|
|
|
11
|
my $trailerhash = $self->[trai]->[1]; |
477
|
4
|
|
|
|
|
6
|
delete $trailerhash->{Prev}; |
478
|
4
|
|
|
|
|
18
|
$trailerhash->{Size} = ['flat', 1+$ids[-1]]; |
479
|
4
|
|
|
|
|
15
|
print $fh join_tokens(trailer=>serialize($self->[trai])), |
480
|
|
|
|
|
|
|
"\nstartxref\n$startxref\n%%EOF\n"; |
481
|
4
|
100
|
|
|
|
62
|
if ($args{filename}) { |
482
|
2
|
50
|
|
|
|
105
|
close $fh or croak "Cannot close $args{filename}: $!"; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
# LOW-LEVEL METHODS |
487
|
|
|
|
|
|
|
|
488
|
1
|
|
|
1
|
0
|
12
|
sub version :lvalue { $_[0][vers] } |
489
|
0
|
|
|
0
|
0
|
0
|
sub xref { $_[0][xrft] } |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
sub modified { |
492
|
3
|
|
|
3
|
0
|
18
|
my $self = shift; |
493
|
3
|
50
|
|
|
|
7
|
@_ or return $self->[mods]; |
494
|
3
|
100
|
66
|
|
|
16
|
if (@_ == 1 && $_[0] !~ m.^/.) { |
495
|
1
|
50
|
|
|
|
5
|
croak "$_[0] is not an object id" unless $_[0] =~ /^[0-9]+ [0-9]+\z/; |
496
|
1
|
|
|
|
|
2
|
$self->[mods]{$_[0]}++; |
497
|
|
|
|
|
|
|
return |
498
|
1
|
|
|
|
|
9
|
} |
499
|
2
|
|
|
|
|
6
|
my (undef, $last_ref) = _get_obj($self, 0, @_); |
500
|
2
|
50
|
|
|
|
6
|
$last_ref and $self->[mods]{$last_ref}++; |
501
|
2
|
|
|
|
|
3
|
$self->[mods]; |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
0
|
|
|
0
|
0
|
0
|
sub objects { $_[0][objs] } |
505
|
1
|
|
|
1
|
0
|
10
|
sub trailer { $_[0][trai] } |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
sub read_obj { |
508
|
71
|
|
|
71
|
0
|
69
|
my $self = shift; |
509
|
71
|
|
|
|
|
80
|
my $id = shift; |
510
|
71
|
|
100
|
|
|
65
|
{ return $self->[objs]{$id} || next } |
|
71
|
|
|
|
|
168
|
|
511
|
66
|
50
|
|
|
|
283
|
croak "$id is not a valid id" unless $id =~ /^[0-9]+ [0-9]+\z/; |
512
|
66
|
50
|
|
|
|
137
|
if (!$self->[fh]) { |
513
|
0
|
|
|
|
|
0
|
croak "No file open"; |
514
|
|
|
|
|
|
|
} |
515
|
66
|
|
50
|
|
|
167
|
my $loc = $self->[xrft]{$id} || return $null; |
516
|
66
|
50
|
|
|
|
119
|
if (ref $loc) { # handle object streams here |
517
|
0
|
|
|
|
|
0
|
my ($strmid, $ix) = @$loc; |
518
|
|
|
|
|
|
|
# Since we have to decompress the stream and calculate the offsets, let’s |
519
|
|
|
|
|
|
|
# go ahead and extract all the objects into the objects hash, in flat |
520
|
|
|
|
|
|
|
# format. We may have reached this code because somebody manually |
521
|
|
|
|
|
|
|
# deleted an objects entry in order to re-read it, so only extract |
522
|
|
|
|
|
|
|
# objects that are not already in the hash. |
523
|
0
|
|
|
|
|
0
|
my $obj = $self->get_obj($strmid); |
524
|
0
|
|
|
|
|
0
|
my $stream = \$self->decode_stream($obj); |
525
|
0
|
|
|
|
|
0
|
my $count = $self->get_obj($$obj[1], "/N")->[1]; |
526
|
0
|
|
|
|
|
0
|
my $first = $self->get_obj($$obj[1], "/First")->[1]; |
527
|
0
|
|
|
|
|
0
|
my @numbers = |
528
|
|
|
|
|
|
|
split /(?:$S++|%[^\cm\cj]*[\cm\cj])+/, substr $$stream, 0, $first, ''; |
529
|
0
|
|
|
|
|
0
|
while (@numbers) { |
530
|
0
|
|
|
|
|
0
|
my ($id, $off) = splice @numbers, 0, 2; |
531
|
0
|
|
|
|
|
0
|
$id .= " 0"; |
532
|
0
|
0
|
0
|
|
|
0
|
$self->[objs]{$id} ||= |
533
|
|
|
|
|
|
|
['flat', |
534
|
|
|
|
|
|
|
substr $$stream, $off, @numbers ? $numbers[1]-$off : length $$stream] |
535
|
|
|
|
|
|
|
} |
536
|
0
|
|
|
|
|
0
|
return $self->[objs]{$id} |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
# otherwise use the seek-and-read approach |
539
|
66
|
|
|
|
|
128
|
_read_obj($self, $loc, $id); |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
sub _read_obj { |
542
|
68
|
|
|
68
|
|
90
|
my ($self, $loc, $id) = @_; |
543
|
68
|
|
|
|
|
265
|
seek $self->[fh], $loc, 0; |
544
|
68
|
50
|
|
|
|
773
|
read $self->[fh], my $buf, 1024 or croak "Cannot read $self->[file]: $!"; |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
my @tokens = tokenize($buf, qr/^(?:endobj|stream)\z/, |
547
|
|
|
|
|
|
|
sub { |
548
|
146
|
50
|
|
146
|
|
4291
|
defined read $self->[fh], $buf, 1024, length $buf |
549
|
|
|
|
|
|
|
or croak "Cannot read $self->[file]: $!" |
550
|
68
|
|
|
|
|
465
|
}); |
551
|
68
|
|
|
|
|
512
|
my $read_id = 0+shift(@tokens) . ' ' . (0+shift@tokens); |
552
|
68
|
50
|
66
|
|
|
290
|
if ($id and $read_id ne $id) { |
553
|
0
|
|
|
|
|
0
|
croak "$self->[file]: Found $read_id at offset $loc instead of $id"; |
554
|
|
|
|
|
|
|
} |
555
|
68
|
|
|
|
|
59
|
shift @tokens; # remove “obj” |
556
|
68
|
|
|
|
|
78
|
my $obj; |
557
|
68
|
100
|
|
|
|
134
|
if ($tokens[-1] eq 'stream') { |
558
|
10
|
|
|
|
|
27
|
my $pos = tell $self->[fh]; |
559
|
10
|
|
|
|
|
29
|
$obj = _interpret_token(\@tokens); |
560
|
10
|
|
|
|
|
78
|
$buf =~ s/^\cm?\cj//; |
561
|
|
|
|
|
|
|
# Create the new obj now, to avoid having to copy a huge buffer on pre- |
562
|
|
|
|
|
|
|
# COW perls. |
563
|
10
|
|
|
|
|
29
|
my $new_obj = ['stream', $obj, $buf]; |
564
|
|
|
|
|
|
|
# Have to use get_obj here, not $obj[1]{Length}[1], as /Length could be |
565
|
|
|
|
|
|
|
# an indirect reference. |
566
|
10
|
|
|
|
|
33
|
my $stream_length = $self->get_obj($obj, '/Length')->[1]; |
567
|
10
|
100
|
|
|
|
43
|
if (length $buf < $stream_length) { |
568
|
4
|
|
|
|
|
22
|
seek $self->[fh], $pos, 0; |
569
|
4
|
50
|
|
|
|
79
|
read $self->[fh], $new_obj->[2], $stream_length-length $buf, length $buf |
570
|
|
|
|
|
|
|
or croak "Cannot read $self->[file]: $!"; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
else { |
573
|
6
|
|
|
|
|
26
|
substr $new_obj->[2], $stream_length, = ''; |
574
|
|
|
|
|
|
|
} |
575
|
10
|
|
|
|
|
24
|
$obj = $new_obj; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
else { |
578
|
58
|
|
|
|
|
52
|
pop @tokens; # remove ‘endobj’ |
579
|
58
|
|
|
|
|
118
|
$obj = ['tokens', \@tokens]; |
580
|
|
|
|
|
|
|
} |
581
|
68
|
|
|
|
|
398
|
$self->[objs]{$read_id} = $obj |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
sub get_obj { |
585
|
147
|
|
|
147
|
0
|
276
|
splice @_, 1, 0, 0; |
586
|
147
|
|
|
|
|
199
|
(&_get_obj)[0] |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
sub _get_obj { |
589
|
174
|
|
|
174
|
|
146
|
my $self = shift; |
590
|
174
|
|
|
|
|
152
|
my $vivify = shift; |
591
|
174
|
|
|
|
|
165
|
my $obj = shift; |
592
|
|
|
|
|
|
|
# $obj may be any of: |
593
|
|
|
|
|
|
|
# • "4 0" |
594
|
|
|
|
|
|
|
# • "/Root" |
595
|
|
|
|
|
|
|
# • ['dict', { ... }] |
596
|
|
|
|
|
|
|
# • ['array', { ... }] |
597
|
|
|
|
|
|
|
# • ['ref', "4 0 R"] |
598
|
|
|
|
|
|
|
# • ['anything else', ...] |
599
|
174
|
|
|
|
|
156
|
my $lastref; |
600
|
|
|
|
|
|
|
{ |
601
|
174
|
100
|
|
|
|
142
|
if (ref $obj) { |
|
248
|
100
|
|
|
|
499
|
|
602
|
116
|
100
|
|
|
|
248
|
if ($$obj[0] eq 'ref') { |
603
|
36
|
|
|
|
|
41
|
$obj = $$obj[1]; redo |
604
|
36
|
|
|
|
|
38
|
} |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
elsif ($obj =~ m.^/.) { |
607
|
38
|
|
|
|
|
81
|
my $subobj = $self->[trai][1]{substr $obj, 1}; |
608
|
38
|
100
|
|
|
|
66
|
if (!$subobj) { |
609
|
2
|
50
|
|
|
|
6
|
if ($vivify) { |
610
|
2
|
50
|
|
|
|
8
|
$obj = $self->[trai][1]{substr $obj, 1} =_viv($vivify, @_ ? $_[0]: ()) |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
else { |
613
|
|
|
|
|
|
|
return |
614
|
0
|
|
|
|
|
0
|
} |
615
|
|
|
|
|
|
|
} |
616
|
36
|
|
|
|
|
43
|
else { $obj = $subobj } |
617
|
38
|
|
|
|
|
44
|
redo; # $obj may be ['ref', '1894 0'] |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
else { |
620
|
94
|
|
|
|
|
88
|
$lastref = $obj; |
621
|
94
|
|
66
|
|
|
253
|
$obj = $self->[objs]{$obj} || $self->read_obj($obj); |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
} |
624
|
174
|
50
|
|
|
|
303
|
$obj or return; |
625
|
174
|
|
|
|
|
294
|
while (@_) { |
626
|
99
|
100
|
|
|
|
166
|
if ($$obj[0] eq 'stream') { $obj = $$obj[1] } # for get_obj($stream,$key) |
|
4
|
|
|
|
|
6
|
|
627
|
99
|
|
|
|
|
159
|
_unflatten($obj); |
628
|
99
|
|
|
|
|
101
|
my $key = shift; |
629
|
99
|
100
|
|
|
|
297
|
my $subobj = $key =~ m.^/. ? $$obj[1]{substr $key, 1} : $$obj[1][$key]; |
630
|
99
|
100
|
|
|
|
126
|
if (!$subobj) { |
631
|
1
|
50
|
|
|
|
4
|
if ($vivify) { |
632
|
1
|
50
|
|
|
|
7
|
$obj = $key =~ m.^/. ? $$obj[1]{substr $key, 1} : $$obj[1][$key] = |
|
|
50
|
|
|
|
|
|
633
|
|
|
|
|
|
|
_viv($vivify, @_ ? $_[0]: ()) |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
else { |
636
|
|
|
|
|
|
|
return |
637
|
0
|
|
|
|
|
0
|
} |
638
|
|
|
|
|
|
|
} |
639
|
98
|
|
|
|
|
91
|
else { $obj = $subobj } |
640
|
99
|
100
|
66
|
|
|
425
|
if ($obj && $$obj[0] eq 'ref') { |
641
|
31
|
|
|
|
|
38
|
$lastref = $$obj[1]; |
642
|
31
|
|
66
|
|
|
111
|
$obj = $self->[objs]{$$obj[1]} || $self->read_obj($$obj[1]); |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
} |
645
|
174
|
|
|
|
|
248
|
_unflatten($obj); |
646
|
174
|
50
|
|
|
|
292
|
$obj->[0] eq 'null' and return; |
647
|
174
|
|
|
|
|
567
|
$obj, $lastref; |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
sub _unflatten { |
650
|
277
|
|
|
277
|
|
230
|
my $obj = shift; |
651
|
277
|
50
|
|
|
|
655
|
if ($$obj[0] eq 'flat') { |
|
|
100
|
|
|
|
|
|
652
|
0
|
|
|
|
|
0
|
@$obj = @{ _interpret_token([tokenize($$obj[1])]) }; |
|
0
|
|
|
|
|
0
|
|
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
elsif($$obj[0] eq 'tokens') { |
655
|
67
|
|
|
|
|
56
|
@$obj = @{ _interpret_token($$obj[1]) }; |
|
67
|
|
|
|
|
95
|
|
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
sub _viv { |
659
|
3
|
|
|
3
|
|
5
|
my ($type, $key) = @_; |
660
|
3
|
0
|
|
|
|
47
|
[defined $key |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
661
|
|
|
|
|
|
|
? $key =~ m.^/. ? ('dict',{}) : ('array',[]) |
662
|
|
|
|
|
|
|
: ($type, $type eq 'dict' ? {} |
663
|
|
|
|
|
|
|
: $type =~ /^(?:array|tokens)\z/ ? [] |
664
|
|
|
|
|
|
|
: $type eq 'num' ? 0 |
665
|
|
|
|
|
|
|
: $type eq 'null' ? () |
666
|
|
|
|
|
|
|
: $type eq 'stream' ? (['dict',{}], '') : '') |
667
|
|
|
|
|
|
|
]; |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
sub vivify_obj { |
671
|
25
|
|
|
25
|
0
|
123
|
my $self = $_[0]; |
672
|
25
|
50
|
|
|
|
91
|
if ($_[1] !~ /^[a-z]+\z/) { |
673
|
0
|
|
|
|
|
0
|
croak "First arg to vivify_obj must be a type"; |
674
|
|
|
|
|
|
|
} |
675
|
25
|
|
|
|
|
37
|
my($obj, $lastref) = &_get_obj; |
676
|
25
|
100
|
|
|
|
51
|
$lastref and $$self[mods]{$lastref}++; |
677
|
25
|
|
|
|
|
129
|
$obj; |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
sub get_page { |
681
|
7
|
|
|
7
|
0
|
11
|
my $self = shift; |
682
|
7
|
|
|
|
|
11
|
my @pages = _walk_pages($self); |
683
|
7
|
|
|
|
|
16
|
$self->get_obj($pages[$_[0]]) |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
# The import cache looks like this: |
687
|
|
|
|
|
|
|
# # src dest src dest |
688
|
|
|
|
|
|
|
# { $other_pdf => { '2 0' => '1 0', '12 0' => '13 0', ... }, |
689
|
|
|
|
|
|
|
# $another_pdf => { '1 0' => '3 0', '12 0' => '13 0', ... }, |
690
|
|
|
|
|
|
|
# ... |
691
|
|
|
|
|
|
|
# } |
692
|
|
|
|
|
|
|
# where src is the PDF imported from and dest is the PDF that owns the |
693
|
|
|
|
|
|
|
# cache. |
694
|
|
|
|
|
|
|
sub import_obj { |
695
|
4
|
|
|
4
|
0
|
7
|
my ($self, $spdf, $obj) = @'_; |
696
|
|
|
|
|
|
|
my $cach = |
697
|
|
|
|
|
|
|
($self->[impo] ||= |
698
|
2
|
|
|
|
|
783
|
do { require Hash'Util'FieldHash; &Hash'Util'FieldHash'fieldhash({}) }) |
|
2
|
|
|
|
|
1044
|
|
699
|
4
|
|
66
|
|
|
23
|
->{$spdf} ||= {}; |
|
|
|
100
|
|
|
|
|
700
|
4
|
|
|
|
|
55
|
my $ret; |
701
|
4
|
50
|
|
|
|
14
|
if (!ref $obj) { |
702
|
0
|
0
|
|
|
|
0
|
croak "$obj is not an object id" unless $obj =~ /^[0-9]+ [0-9]+\z/; |
703
|
0
|
0
|
|
|
|
0
|
if ($cach->{$obj}) { |
704
|
0
|
|
|
|
|
0
|
return $cach->{$obj} |
705
|
|
|
|
|
|
|
} |
706
|
|
|
|
|
|
|
# Assign a new number now. In the loop below, we assume that all |
707
|
|
|
|
|
|
|
# objects have had new numbers assigned already, and that the objects |
708
|
|
|
|
|
|
|
# just need cloning. |
709
|
|
|
|
|
|
|
# Temporarily assign an empty array. |
710
|
0
|
|
|
|
|
0
|
$ret = $cach->{$obj} = $self->add_obj([]); |
711
|
|
|
|
|
|
|
} |
712
|
4
|
|
|
|
|
10
|
my $return_id = !ref $obj; |
713
|
4
|
|
|
|
|
10
|
my @objs = $obj; |
714
|
4
|
|
|
|
|
10
|
while (@objs) { |
715
|
20
|
|
|
|
|
27
|
my $obj = shift @objs; |
716
|
20
|
|
|
|
|
18
|
my $id; |
717
|
20
|
100
|
|
|
|
39
|
if (!ref $obj) { |
718
|
16
|
|
|
|
|
14
|
$id = $obj; |
719
|
16
|
|
|
|
|
33
|
$obj = $spdf->read_obj($obj); |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
my @tokens = ($obj->[0] eq 'flat') |
722
|
|
|
|
|
|
|
? tokenize($obj->[1],qr/^stream\z/) |
723
|
20
|
100
|
|
|
|
77
|
: $obj->[0] eq 'tokens' ? @{$obj->[1]} : _serialize($obj); |
|
6
|
50
|
|
|
|
33
|
|
724
|
20
|
|
|
|
|
59
|
for (2..$#tokens) { |
725
|
222
|
100
|
|
|
|
398
|
next unless $tokens[$_] eq 'R'; |
726
|
20
|
|
|
|
|
90
|
my $id = sprintf '%d %d',@tokens[$_-2,$_-1]; |
727
|
20
|
100
|
|
|
|
47
|
if (!$cach->{$id}) { |
728
|
|
|
|
|
|
|
# Temporarily assign an empty array. |
729
|
16
|
|
|
|
|
38
|
$cach->{$id} = $self->add_obj([]); |
730
|
|
|
|
|
|
|
# Add to the list of ids to process. |
731
|
16
|
|
|
|
|
24
|
push @objs, $id; |
732
|
|
|
|
|
|
|
} |
733
|
20
|
|
|
|
|
78
|
@tokens[$_-2,$_-1] = split / /, $cach->{$id}; |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
# Clone the object. |
736
|
|
|
|
|
|
|
# If an object id is in @objs at this point, it can only be because it |
737
|
|
|
|
|
|
|
# has had a new id assigned already. |
738
|
|
|
|
|
|
|
my $clone = |
739
|
|
|
|
|
|
|
$id && ($cach->{$id} || die "Internal error: $obj got uncached") |
740
|
20
|
100
|
66
|
|
|
93
|
? $self->[objs]{$cach->{$id}} # cached empty array |
741
|
|
|
|
|
|
|
: []; # cloning the top-level object with no cache |
742
|
20
|
|
66
|
|
|
54
|
$ret ||= $clone; |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
## We are not supporting flat streams yet (if ever). |
745
|
|
|
|
|
|
|
#if ($$obj[0] eq 'flat' && $tokens[-1] eq "stream\n") { |
746
|
|
|
|
|
|
|
# pop @tokens; |
747
|
|
|
|
|
|
|
# @$clone = ('stream', ['tokens', \@tokens, ...??? |
748
|
|
|
|
|
|
|
#} |
749
|
|
|
|
|
|
|
|
750
|
20
|
100
|
|
|
|
39
|
if ($$obj[0] eq 'stream') { |
751
|
|
|
|
|
|
|
# tokenize() above will have ended up putting a "stream\n" token on the |
752
|
|
|
|
|
|
|
# end, which we do not want in the dictionary. |
753
|
5
|
|
|
|
|
7
|
pop @tokens; |
754
|
5
|
|
|
|
|
35
|
@$clone = ('stream', ['tokens', \@tokens], $$obj[2]); |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
else { |
757
|
15
|
|
|
|
|
59
|
@$clone = ('tokens', \@tokens); |
758
|
|
|
|
|
|
|
} |
759
|
|
|
|
|
|
|
} |
760
|
4
|
50
|
|
|
|
18
|
_unflatten($ret) if ref $ret; |
761
|
4
|
|
|
|
|
18
|
$ret; |
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
sub add_obj { |
765
|
23
|
|
|
23
|
0
|
33
|
my $self = shift; |
766
|
23
|
|
33
|
|
|
23
|
my $id = shift @{$self->[free]} || $self->[size]++ . ' 0'; |
767
|
23
|
|
|
|
|
46
|
$self->[objs]{$id} = shift; |
768
|
23
|
|
|
|
|
36
|
$self->[mods]{$id}++; |
769
|
23
|
|
|
|
|
80
|
$id; |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
sub decode_stream :lvalue{ |
773
|
2
|
|
|
2
|
0
|
4
|
my $self = shift; |
774
|
2
|
|
|
|
|
5
|
my $stream = $self->get_obj(@_); |
775
|
2
|
|
|
|
|
5
|
my @filters = $self->get_obj($stream, "/Filter"); |
776
|
2
|
50
|
|
|
|
7
|
if ($filters[0][0] eq 'array') { |
777
|
0
|
|
|
|
|
0
|
@filters = map $self->get_obj($filters[0],$_)->[1],0..$#{$filters[0][1]}; |
|
0
|
|
|
|
|
0
|
|
778
|
|
|
|
|
|
|
} |
779
|
2
|
|
|
|
|
5
|
else { @filters = $filters[0][1] } |
780
|
2
|
|
33
|
|
|
7
|
my @params = $self->get_obj($stream, "/DecodeParms") |
781
|
|
|
|
|
|
|
|| $self->get_obj($stream, "/DP"); # unofficial but Acrobat sup- |
782
|
2
|
50
|
|
|
|
5
|
if (@params) { # ports it |
783
|
2
|
50
|
|
|
|
6
|
if ($params[0][0] eq 'array') { |
784
|
|
|
|
|
|
|
@params = map scalar $self->get_obj($params[0], $_), |
785
|
0
|
|
|
|
|
0
|
0..$#{$params[0][1]}; |
|
0
|
|
|
|
|
0
|
|
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
} |
788
|
2
|
|
|
|
|
5
|
$stream = \$stream->[2]; |
789
|
2
|
|
|
|
|
4
|
for (@filters) { |
790
|
2
|
|
|
|
|
5
|
$stream = _unfilter($self, $stream, $_, shift @params); |
791
|
|
|
|
|
|
|
} |
792
|
2
|
|
|
|
|
7
|
$$stream |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
sub _unfilter { |
796
|
2
|
|
|
2
|
|
4
|
my ($self, $stream, $filter, $params) = @_; |
797
|
2
|
50
|
|
|
|
6
|
$filter eq 'FlateDecode' |
798
|
|
|
|
|
|
|
or croak "The $filter filter is not supported", 1; |
799
|
2
|
|
|
|
|
4
|
my ($predictor, $bpc, $cols, $colours) = (1, 8, 1, 1); |
800
|
2
|
50
|
|
|
|
4
|
if ($params) { |
801
|
|
|
|
|
|
|
$params->[1]{Predictor} |
802
|
2
|
50
|
|
|
|
9
|
and $predictor = $self->get_obj($params, "/Predictor")->[1]; |
803
|
2
|
50
|
33
|
|
|
13
|
$predictor == 1 || $predictor >= 10 |
804
|
|
|
|
|
|
|
|| croak "Predictor functions other than PNG are not supported", 1; |
805
|
|
|
|
|
|
|
$params->[1]{BitsPerComponent} |
806
|
2
|
50
|
|
|
|
7
|
and $bpc = $self->get_obj($params, "/BitsPerComponent")->[1]; |
807
|
2
|
50
|
|
|
|
8
|
$$params[1]{Columns} and $cols=$self->get_obj($params, "/Columns")->[1]; |
808
|
2
|
50
|
|
|
|
6
|
$$params[1]{Colours} and $colours=$self->get_obj($params,"/Colors")->[1]; |
809
|
2
|
50
|
|
|
|
6
|
$bpc % 8 and croak "BitsPerComponent values that are not multiples of" |
810
|
|
|
|
|
|
|
. " 8 are not supported", 1; |
811
|
2
|
|
|
|
|
3
|
$bpc >>= 3; # bytes per component |
812
|
2
|
|
|
|
|
2
|
$bpc *= $colours; |
813
|
|
|
|
|
|
|
} |
814
|
2
|
|
|
|
|
578
|
require Compress::Zlib; |
815
|
2
|
50
|
|
|
|
44501
|
my $x = Compress'Zlib'inflateInit() |
816
|
|
|
|
|
|
|
or croak "Could not create an inflation stream (whatever that is)", 1; |
817
|
2
|
|
|
|
|
211
|
my ($flate_output, $flate_stat) = inflate $x my $copy = $$stream; |
818
|
2
|
50
|
|
|
|
52
|
croak "Inflation failed for some reason", 1 |
819
|
|
|
|
|
|
|
unless $flate_stat == &Compress'Zlib'Z_STREAM_END; |
820
|
2
|
50
|
|
|
|
24
|
if ($predictor >= 10) { # rats |
821
|
2
|
|
|
|
|
3
|
my $output = ''; |
822
|
2
|
|
|
|
|
4
|
my $rowsize = 1 + $bpc * $cols; |
823
|
2
|
|
|
|
|
4
|
my $prev = "\0"x($rowsize-1); |
824
|
2
|
|
|
|
|
7
|
for my $row (1..length($flate_output) / $rowsize) { |
825
|
20
|
|
|
|
|
34
|
my $filter = vec $flate_output, ($row-1) * $rowsize, 8; |
826
|
20
|
|
|
|
|
33
|
my $samples = substr $flate_output, ($row-1) * $rowsize + 1, $rowsize-1; |
827
|
20
|
50
|
|
|
|
30
|
if ($filter == 2) { # Up (first ’cos it’s the most common) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
828
|
20
|
|
|
|
|
22
|
for (0..$rowsize-2) { |
829
|
80
|
|
|
|
|
138
|
vec ($samples, $_, 8) += vec $prev, $_, 8; |
830
|
|
|
|
|
|
|
} |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
elsif (!$filter) { # Nothing |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
elsif ($filter == 1) { # Sub (left) |
835
|
0
|
|
|
|
|
0
|
for (0..$rowsize-2) { |
836
|
0
|
|
|
|
|
0
|
vec ($samples, $_, 8) += vec $samples, $_ - $bpc, 8; |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
elsif ($filter == 3) { # Avg |
840
|
0
|
|
|
|
|
0
|
for (0..$rowsize-2) { |
841
|
0
|
|
|
|
|
0
|
vec ($samples, $_, 8) += |
842
|
|
|
|
|
|
|
(vec($prev, $_, 8) + vec $samples, $_ - $bpc, 8) / 2; |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
elsif ($filter == 4) { # Paeth |
846
|
0
|
|
|
|
|
0
|
for (0..$rowsize-2) { |
847
|
0
|
|
|
|
|
0
|
my ($a,$b,$c) = (vec($samples, $_ - $bpc, 8), |
848
|
|
|
|
|
|
|
vec($prev , $_ , 8), |
849
|
|
|
|
|
|
|
vec $prev , $_ - $bpc, 8 ); |
850
|
0
|
|
|
|
|
0
|
my $p = $a + $b - $c; |
851
|
0
|
|
|
|
|
0
|
my ($pa, $pb, $pc) = (abs($p - $a), abs($p - $b), abs($p - $c)); |
852
|
0
|
0
|
0
|
|
|
0
|
vec $samples, $_, 8 =>= |
|
|
0
|
|
|
|
|
|
853
|
|
|
|
|
|
|
$pa <= $pb && $pa <= $pc ? $a : $pb <= $pc ? $b : $c |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
} |
856
|
0
|
|
|
|
|
0
|
else { croak "Invalid PNG filter value: $filter", 1 } |
857
|
20
|
|
|
|
|
32
|
$output .= $prev = $samples; |
858
|
|
|
|
|
|
|
} |
859
|
2
|
|
|
|
|
15
|
\$output; |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
else { |
862
|
0
|
|
|
|
|
0
|
\$flate_output; |
863
|
|
|
|
|
|
|
} |
864
|
|
|
|
|
|
|
} |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
# FUNCTIONS |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
*tokenise = *tokenize; |
870
|
|
|
|
|
|
|
sub tokenize { # This function tokenizes. |
871
|
|
|
|
|
|
|
# accepts three arguments: the text to parse, the token to stop |
872
|
|
|
|
|
|
|
# on (such as 'endobj') and a function to supply more text |
873
|
|
|
|
|
|
|
# the 2nd and 3rd args are optional |
874
|
|
|
|
|
|
|
|
875
|
82
|
|
|
82
|
0
|
143
|
for (shift) { |
876
|
82
|
|
|
|
|
102
|
my $endtoken=shift; |
877
|
82
|
|
|
|
|
72
|
my $more = shift; |
878
|
82
|
|
|
|
|
69
|
my @tokens; |
879
|
|
|
|
|
|
|
my $prev_length; |
880
|
82
|
|
|
|
|
97
|
TOKEN: while (1) { |
881
|
1440
|
100
|
100
|
|
|
9390
|
if ($more and length() < 500) { |
|
|
50
|
33
|
|
|
|
|
882
|
137
|
|
|
|
|
193
|
&$more(); |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
elsif(!length or length == $prev_length) { |
885
|
0
|
|
|
|
|
0
|
last TOKEN; |
886
|
|
|
|
|
|
|
} |
887
|
1440
|
|
|
|
|
1565
|
$prev_length = length; |
888
|
1440
|
|
|
|
|
5046
|
s/^(?:$S++|%[^\cm\cj]*$N)+//o; |
889
|
1440
|
100
|
|
|
|
5499
|
if (s _^(($R+)|<<|>>|[\[\]\{\}]|/$R*)__o) { |
890
|
1395
|
|
|
|
|
2256
|
push @tokens, $1; |
891
|
1395
|
100
|
66
|
|
|
7416
|
last TOKEN if defined $endtoken && length $2 |
|
|
|
100
|
|
|
|
|
892
|
|
|
|
|
|
|
&& $1 =~ $endtoken; |
893
|
|
|
|
|
|
|
next TOKEN |
894
|
1314
|
|
|
|
|
1850
|
} |
895
|
45
|
100
|
|
|
|
107
|
if (s.^\(..) { # remove paren. |
896
|
15
|
|
|
|
|
156
|
&$more() |
897
|
|
|
|
|
|
|
until s/( |
898
|
|
|
|
|
|
|
(?:\\.|[^()\\])++# escaped char or non-\() |
899
|
|
|
|
|
|
|
| |
900
|
|
|
|
|
|
|
\((?1)\) # parenthesized stuff |
901
|
|
|
|
|
|
|
)*\) # final closing paren |
902
|
|
|
|
|
|
|
//xs; |
903
|
14
|
|
|
|
|
52
|
push @tokens, "($1)"; |
904
|
|
|
|
|
|
|
next |
905
|
14
|
|
|
|
|
22
|
} |
906
|
30
|
100
|
|
|
|
114
|
s.^(<[^>]*>).. and push @tokens, $1; |
907
|
30
|
|
|
|
|
78
|
&$more() while /^<[^>]*\z/; |
908
|
|
|
|
|
|
|
} |
909
|
81
|
|
|
|
|
924
|
return @tokens; |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
sub join_tokens { |
914
|
|
|
|
|
|
|
# PDF lines are not supposed to be longer than 255 (outside of content |
915
|
|
|
|
|
|
|
# streams). I don’t know whether that includes the line ending. I assume |
916
|
|
|
|
|
|
|
# it does. |
917
|
80
|
|
|
80
|
0
|
108
|
my $ret = ''; |
918
|
80
|
|
|
|
|
69
|
my $line = ''; |
919
|
80
|
|
|
|
|
107
|
for (@_) { |
920
|
|
|
|
|
|
|
# We assume that only strings can get too long to fit on a line. After |
921
|
|
|
|
|
|
|
# all, they are the only token that can be split between lines. |
922
|
1030
|
100
|
100
|
|
|
2022
|
if (length() + length $line > 254 && /^$S*([(<])/o) { |
923
|
3
|
|
|
|
|
11
|
my $hex = $1 eq '<'; |
924
|
|
|
|
|
|
|
# Put a line break before the string. |
925
|
3
|
|
|
|
|
8
|
$ret .= "$line\n"; |
926
|
3
|
|
|
|
|
6
|
$line = ''; |
927
|
|
|
|
|
|
|
# To keep this code simple, just ignore the fact that strings can have |
928
|
|
|
|
|
|
|
# embedded line breaks. Just split it up into pieces that are small |
929
|
|
|
|
|
|
|
# enough to fit on a line. |
930
|
3
|
|
|
|
|
13
|
while (length > 254) { |
931
|
|
|
|
|
|
|
# Don’t split it between an escaper and an escapee. |
932
|
65
|
|
|
|
|
82
|
my $piecepiece = substr $_, 0, 253; |
933
|
65
|
50
|
|
|
|
372
|
chop $piecepiece unless $piecepiece =~ /^[^\\]*(?:\\.[^\\]*)*\z/s; |
934
|
65
|
100
|
|
|
|
125
|
$ret .= $hex ? "$piecepiece\n" : "$piecepiece\\\n"; |
935
|
65
|
|
|
|
|
149
|
substr $_, 0, length $piecepiece, = ''; |
936
|
|
|
|
|
|
|
} |
937
|
3
|
|
|
|
|
12
|
$ret .= "$_\n"; |
938
|
|
|
|
|
|
|
} |
939
|
|
|
|
|
|
|
else { |
940
|
|
|
|
|
|
|
# Wherever whitespace is mandatory, use a line break, to avoid that more |
941
|
|
|
|
|
|
|
# complicated string-splitting logic above. (Speeeeeeeed!) (I hope.) |
942
|
|
|
|
|
|
|
# PDF::Extract won’t be able to read it. That’s the least of |
943
|
|
|
|
|
|
|
# its problems. |
944
|
1027
|
100
|
|
|
|
1414
|
for (ref eq 'SCALAR' ? $$_ : $_) { |
945
|
1027
|
100
|
100
|
|
|
5453
|
if (length($line) and $line !~ /$D\z/o && !/^$D/o |
|
|
|
66
|
|
|
|
|
946
|
|
|
|
|
|
|
||length($line) + length > 254) { |
947
|
425
|
|
|
|
|
422
|
$ret .= "$line\n"; |
948
|
425
|
|
|
|
|
386
|
$line = ''; |
949
|
|
|
|
|
|
|
} |
950
|
1027
|
|
|
|
|
1597
|
$line .= $_; |
951
|
|
|
|
|
|
|
} |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
} |
954
|
80
|
|
|
|
|
439
|
"$ret$line"; |
955
|
|
|
|
|
|
|
} |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
sub parse_string { |
958
|
14
|
|
|
14
|
0
|
42
|
parse_tokens( tokenize @_[0,1] ) |
959
|
|
|
|
|
|
|
} |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
sub parse_tokens { |
962
|
13
|
|
|
13
|
0
|
26
|
my @newtokens; |
963
|
13
|
50
|
|
|
|
45
|
wantarray or return _interpret_token(\@_); |
964
|
0
|
|
|
|
|
0
|
while (scalar ( @_)){ |
965
|
0
|
|
|
|
|
0
|
push @newtokens, _interpret_token(\@_); |
966
|
|
|
|
|
|
|
} |
967
|
0
|
|
|
|
|
0
|
return @newtokens; |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
sub _interpret_token { # pass an array ref |
971
|
|
|
|
|
|
|
# interpret_token removes the first token or set of tokens from an |
972
|
|
|
|
|
|
|
# array and returns the token in 'parsed object' format. |
973
|
|
|
|
|
|
|
|
974
|
536
|
|
|
536
|
|
436
|
my $tokens = shift; |
975
|
536
|
|
|
|
|
669
|
for (shift @$tokens) { |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
# references: |
978
|
|
|
|
|
|
|
|
979
|
536
|
100
|
100
|
|
|
4012
|
if ($_ =~ /^\d+$/ and |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
980
|
|
|
|
|
|
|
@$tokens >= 2 && $$tokens[0] =~ /^\d+$/ |
981
|
|
|
|
|
|
|
&& $$tokens[1] eq 'R') { |
982
|
128
|
|
|
|
|
278
|
my $to_return = ['ref', |
983
|
|
|
|
|
|
|
"$_ " . (shift @$tokens)]; |
984
|
128
|
|
|
|
|
127
|
shift @$tokens; # shift off the 'R' |
985
|
128
|
|
|
|
|
350
|
return $to_return; |
986
|
|
|
|
|
|
|
} |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
# names |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
elsif (s.^/..) { # if it begins with "/" |
991
|
|
|
|
|
|
|
# replace #XX sequences with real chars: |
992
|
71
|
|
|
|
|
85
|
s/#([a-f\d]{2})/chr hex $1/gie; |
|
0
|
|
|
|
|
0
|
|
993
|
71
|
|
|
|
|
268
|
return ['name', $_]; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
# dictionaries: |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
elsif ($_ eq '<<') { |
999
|
80
|
|
|
|
|
73
|
my %tmp_hash; |
1000
|
80
|
|
|
|
|
151
|
while(scalar @$tokens){ |
1001
|
372
|
|
|
|
|
370
|
my $name = shift @$tokens; |
1002
|
372
|
100
|
|
|
|
527
|
if ($name eq '>>') { |
1003
|
80
|
|
|
|
|
308
|
return ['dict', \%tmp_hash]; |
1004
|
|
|
|
|
|
|
}else { |
1005
|
292
|
|
|
|
|
635
|
$name =~ s.^/..; |
1006
|
|
|
|
|
|
|
# replace #XX sequences with real chars: |
1007
|
292
|
|
|
|
|
309
|
$name =~ s/#([a-f\d]{2})/chr hex $1/gie; |
|
0
|
|
|
|
|
0
|
|
1008
|
292
|
|
|
|
|
375
|
$tmp_hash{$name} = |
1009
|
|
|
|
|
|
|
_interpret_token($tokens); |
1010
|
|
|
|
|
|
|
delete $tmp_hash{$name} |
1011
|
292
|
50
|
|
|
|
890
|
if $tmp_hash{$name}[0] eq 'null' |
1012
|
|
|
|
|
|
|
} |
1013
|
|
|
|
|
|
|
} |
1014
|
|
|
|
|
|
|
} |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
# arrays: |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
elsif ($_ eq '[') { |
1019
|
52
|
|
|
|
|
70
|
my @tmp_array; |
1020
|
52
|
|
|
|
|
94
|
while(scalar @$tokens){ |
1021
|
206
|
100
|
|
|
|
285
|
if ($$tokens[0] eq ']') { |
1022
|
52
|
|
|
|
|
45
|
shift @$tokens; #shift off the "]" |
1023
|
52
|
|
|
|
|
170
|
return ['array', \@tmp_array]; |
1024
|
|
|
|
|
|
|
}else { |
1025
|
154
|
|
|
|
|
197
|
push @tmp_array, _interpret_token($tokens); |
1026
|
|
|
|
|
|
|
} |
1027
|
|
|
|
|
|
|
} |
1028
|
|
|
|
|
|
|
} |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
# strings |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
elsif (s/^\(//){ #if it begins with a '(' |
1033
|
|
|
|
|
|
|
#i.e., if it's a string |
1034
|
14
|
|
|
|
|
34
|
s/\)$//; # remove final ")" |
1035
|
|
|
|
|
|
|
# and remove wack escapes: |
1036
|
14
|
|
|
|
|
125
|
s,($N)|\\($N|\d{1\,3}|.), my $match = $2; |
|
34
|
|
|
|
|
58
|
|
1037
|
34
|
|
|
|
|
35
|
my $unescaped = $1; |
1038
|
34
|
0
|
|
|
|
1462
|
$unescaped ? "\cj" : # EOL |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
$match =~ /$N/o ? '' : # \EOL |
1040
|
|
|
|
|
|
|
$match=~/\d/?chr oct$match : # octal |
1041
|
|
|
|
|
|
|
$match eq 'n' ? "\cj" : # CR |
1042
|
|
|
|
|
|
|
$match eq 'r' ? "\cm" : # LF |
1043
|
|
|
|
|
|
|
$match eq 't' ? "\t" : # tab |
1044
|
|
|
|
|
|
|
$match eq 'b' ? "\010" : # backspace |
1045
|
|
|
|
|
|
|
$match eq 'f' ? "\x0c" : # form feed |
1046
|
|
|
|
|
|
|
$match eq '(' ? "(" : # ( |
1047
|
|
|
|
|
|
|
$match eq ')' ? ')' : # ) |
1048
|
|
|
|
|
|
|
$match eq '\\' ? '\\' : # | |
1049
|
|
|
|
|
|
|
length $match ? $match : # ignore backslash as per Adobe's instructions |
1050
|
|
|
|
|
|
|
$fullmatch # anything else |
1051
|
|
|
|
|
|
|
,goes; |
1052
|
14
|
|
|
|
|
57
|
return ['str', $_]; |
1053
|
|
|
|
|
|
|
} |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
# numbers: |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
elsif (/^[+\-]?\d+$/ or |
1058
|
|
|
|
|
|
|
/^[+\-]?[\d\.]+$/ && y/.// == 1) { |
1059
|
174
|
|
|
|
|
565
|
return ['num',$_]; |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
# hex strings |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
elsif (s/^/){ #if it begins with a '<' |
1065
|
17
|
|
|
|
|
37
|
s/>$//; # remove final ">" |
1066
|
17
|
|
|
|
|
52
|
s/$S//g; #remove whitespace |
1067
|
17
|
|
|
|
|
111
|
return ['str', pack "H*", $_]; |
1068
|
|
|
|
|
|
|
} |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
# booleans: |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
elsif ($_ eq 'true') { |
1073
|
0
|
|
|
|
|
0
|
return ['bool', 1]; |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
elsif($_ eq 'false'){ |
1076
|
0
|
|
|
|
|
0
|
return ['bool','']; |
1077
|
|
|
|
|
|
|
} |
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
# null: |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
elsif ($_ eq 'null') { |
1082
|
0
|
|
|
|
|
0
|
return ['null', ]; |
1083
|
|
|
|
|
|
|
} |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
# in case something went wrong: |
1087
|
|
|
|
|
|
|
|
1088
|
0
|
|
|
|
|
0
|
else { return ['?',$_]; } |
1089
|
|
|
|
|
|
|
} |
1090
|
|
|
|
|
|
|
} |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
*serialise = *serialize; |
1093
|
|
|
|
|
|
|
sub serialize { |
1094
|
19
|
|
|
19
|
0
|
37
|
join_tokens(&_serialize) |
1095
|
|
|
|
|
|
|
} |
1096
|
|
|
|
|
|
|
sub _serialize; |
1097
|
|
|
|
|
|
|
sub _serialize { |
1098
|
462
|
|
|
462
|
|
536
|
for (shift) { |
1099
|
|
|
|
|
|
|
# numbers |
1100
|
462
|
100
|
|
|
|
786
|
if($$_[0]eq'num'){ for ($$_[1]) { |
|
135
|
|
|
|
|
147
|
|
1101
|
135
|
100
|
66
|
|
|
470
|
!$_||$_==-0 and return 0; |
1102
|
99
|
50
|
|
|
|
487
|
/^[+-]?(?:[0-9]+(?:\.[0-9]*)?|\.[0-9]*)\z/ and return $_; |
1103
|
0
|
|
|
|
|
0
|
my $ret = 0+$_; |
1104
|
0
|
0
|
|
|
|
0
|
return $ret unless $ret =~ /e([+-][0-9]+)/; |
1105
|
0
|
|
|
|
|
0
|
$ret = sprintf"%.$1f",$ret; |
1106
|
0
|
|
|
|
|
0
|
$ret =~ s/\.?0+\z//; |
1107
|
0
|
|
|
|
|
0
|
return $ret; |
1108
|
|
|
|
|
|
|
}} |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
# names |
1111
|
327
|
100
|
|
|
|
484
|
if($$_[0]eq'name'){ |
1112
|
60
|
|
|
|
|
101
|
for (my $copy = $$_[1]) { |
1113
|
60
|
|
|
|
|
187
|
s/($D|$S|#)/sprintf'#%02x',ord$1/ego; |
|
0
|
|
|
|
|
0
|
|
1114
|
60
|
|
|
|
|
176
|
return "/$_"; |
1115
|
|
|
|
|
|
|
} |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
# dictionaries |
1119
|
267
|
100
|
|
|
|
414
|
if ($$_[0] eq 'dict') { |
1120
|
64
|
|
|
|
|
54
|
my (@ret,$key,$key_copy); |
1121
|
64
|
|
|
|
|
51
|
for $key (sort keys %{$$_[1]}) { |
|
64
|
|
|
|
|
305
|
|
1122
|
233
|
|
|
|
|
677
|
($key_copy=$key) |
1123
|
0
|
|
|
|
|
0
|
=~s/($D|$S|#)/sprintf'#%02x',ord$1/ego; |
1124
|
233
|
|
|
|
|
457
|
push @ret,"/$key_copy", _serialize $$_[1]{$key}; |
1125
|
|
|
|
|
|
|
} |
1126
|
64
|
|
|
|
|
451
|
return"<<",@ret,">>"; |
1127
|
|
|
|
|
|
|
} |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
# indirect references |
1130
|
203
|
100
|
|
|
|
599
|
$$_[0] eq 'ref' and return split(/ /,$$_[1]), "R"; |
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
# arrays |
1133
|
102
|
100
|
|
|
|
195
|
if ($$_[0]eq'array'){ |
1134
|
43
|
|
|
|
|
41
|
my (@ret); |
1135
|
43
|
|
|
|
|
38
|
for(@{$$_[1]}){ |
|
43
|
|
|
|
|
87
|
|
1136
|
124
|
|
|
|
|
148
|
push @ret, _serialize$_; |
1137
|
|
|
|
|
|
|
} |
1138
|
43
|
|
|
|
|
199
|
return "[",@ret,"]"; |
1139
|
|
|
|
|
|
|
} |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
# screams |
1142
|
59
|
100
|
|
|
|
98
|
if ($$_[0]eq'stream'){ |
1143
|
14
|
|
|
|
|
28
|
return _serialize($$_[1]), "stream\n" |
1144
|
|
|
|
|
|
|
} |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
# strings |
1147
|
45
|
100
|
|
|
|
79
|
if($$_[0]eq 'str'){ |
1148
|
|
|
|
|
|
|
# copy it so we don't modify the object being flattened |
1149
|
32
|
|
|
|
|
57
|
for (my $ret = $$_[1]) { |
1150
|
32
|
50
|
|
|
|
113
|
s/([\\()\r])/$1 eq "\r" ? '\r' : "\\$1"/ge; |
|
3
|
|
|
|
|
15
|
|
1151
|
32
|
|
|
|
|
119
|
return"($_)"; |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
} |
1154
|
|
|
|
|
|
|
|
1155
|
13
|
100
|
|
|
|
32
|
$$_[0]eq'tokens'&&return@{$$_[1]}; |
|
6
|
|
|
|
|
32
|
|
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
# booleans |
1158
|
7
|
50
|
|
|
|
18
|
$$_[0]eq'bool'&&return+(false=>'true')[$$_[1]]; |
1159
|
|
|
|
|
|
|
|
1160
|
7
|
50
|
|
|
|
36
|
$$_[0]eq'flat'&&return\$$_[1]; |
1161
|
|
|
|
|
|
|
|
1162
|
0
|
0
|
|
|
|
0
|
$$_[0]eq'null'&&return'null'; |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
# If we get this far, then there's probably an empty array element or hash value which is not supposed to be there, so we shouldn't return anything. |
1165
|
|
|
|
|
|
|
} |
1166
|
|
|
|
|
|
|
} |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
for (qw< bool num str name array dict ref>) { |
1169
|
2
|
|
|
2
|
0
|
44
|
eval "sub make_$_ { ['$_', \$_[0] ] }" |
|
0
|
|
|
0
|
0
|
0
|
|
|
7
|
|
|
7
|
0
|
100
|
|
|
4
|
|
|
4
|
0
|
86
|
|
|
2
|
|
|
2
|
0
|
43
|
|
|
5
|
|
|
5
|
0
|
58
|
|
|
2
|
|
|
2
|
0
|
27
|
|
1170
|
|
|
|
|
|
|
} |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
!()__END__()! |
1176
|
|
|
|
|
|
|
|