line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
2
|
|
|
|
|
|
|
# File: PDF.pm |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Description: Read PDF meta information |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Revisions: 07/11/2005 - P. Harvey Created |
7
|
|
|
|
|
|
|
# 07/25/2005 - P. Harvey Add support for encrypted documents |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# References: 1) http://www.adobe.com/devnet/pdf/pdf_reference.html |
10
|
|
|
|
|
|
|
# 2) http://search.cpan.org/dist/Crypt-RC4/ |
11
|
|
|
|
|
|
|
# 3) http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf |
12
|
|
|
|
|
|
|
# 4) http://www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000.pdf |
13
|
|
|
|
|
|
|
# 5) http://tools.ietf.org/search/rfc3454 |
14
|
|
|
|
|
|
|
# 6) http://www.armware.dk/RFC/rfc/rfc4013.html |
15
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
package Image::ExifTool::PDF; |
18
|
|
|
|
|
|
|
|
19
|
25
|
|
|
25
|
|
3999
|
use strict; |
|
25
|
|
|
|
|
56
|
|
|
25
|
|
|
|
|
832
|
|
20
|
25
|
|
|
25
|
|
121
|
use vars qw($VERSION $AUTOLOAD $lastFetched); |
|
25
|
|
|
|
|
58
|
|
|
25
|
|
|
|
|
1293
|
|
21
|
25
|
|
|
25
|
|
135
|
use Image::ExifTool qw(:DataAccess :Utils); |
|
25
|
|
|
|
|
56
|
|
|
25
|
|
|
|
|
282217
|
|
22
|
|
|
|
|
|
|
require Exporter; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$VERSION = '1.55'; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub FetchObject($$$$); |
27
|
|
|
|
|
|
|
sub ExtractObject($$;$$); |
28
|
|
|
|
|
|
|
sub ReadToNested($;$); |
29
|
|
|
|
|
|
|
sub ProcessDict($$$$;$$); |
30
|
|
|
|
|
|
|
sub ProcessAcroForm($$$$;$$); |
31
|
|
|
|
|
|
|
sub ExpandArray($); |
32
|
|
|
|
|
|
|
sub ReadPDFValue($); |
33
|
|
|
|
|
|
|
sub CheckPDF($$$); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# $lastFetched - last fetched object reference (used for decryption) |
36
|
|
|
|
|
|
|
# (undefined if fetched object was already decrypted, eg. object from stream) |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
my $cryptInfo; # encryption object reference (plus additional information) |
39
|
|
|
|
|
|
|
my $cryptString; # flag that strings are encrypted |
40
|
|
|
|
|
|
|
my $cryptStream; # flag that streams are encrypted |
41
|
|
|
|
|
|
|
my $lastOffset; # last fetched object offset |
42
|
|
|
|
|
|
|
my %streamObjs; # hash of stream objects |
43
|
|
|
|
|
|
|
my %fetched; # dicts fetched in verbose mode (to avoid cyclical recursion) |
44
|
|
|
|
|
|
|
my $pdfVer; # version of PDF file being processed |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# filters supported in DecodeStream() |
47
|
|
|
|
|
|
|
my %supportedFilter = ( |
48
|
|
|
|
|
|
|
'/FlateDecode' => 1, |
49
|
|
|
|
|
|
|
'/Crypt' => 1, |
50
|
|
|
|
|
|
|
'/Identity' => 1, # (not filtered) |
51
|
|
|
|
|
|
|
'/DCTDecode' => 1, # (JPEG image - not filtered) |
52
|
|
|
|
|
|
|
'/JPXDecode' => 1, # (Jpeg2000 image - not filtered) |
53
|
|
|
|
|
|
|
'/LZWDecode' => 1, # (usually a bitmapped image) |
54
|
|
|
|
|
|
|
'/ASCIIHexDecode' => 1, |
55
|
|
|
|
|
|
|
'/ASCII85Decode' => 1, |
56
|
|
|
|
|
|
|
# other standard filters that we currently don't support |
57
|
|
|
|
|
|
|
#'/JBIG2Decode' => 0, # (JBIG2 image format not supported) |
58
|
|
|
|
|
|
|
#'/CCITTFaxDecode' => 0, |
59
|
|
|
|
|
|
|
#'/RunLengthDecode' => 0, |
60
|
|
|
|
|
|
|
); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# tags in main PDF directories |
63
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Main = ( |
64
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
65
|
|
|
|
|
|
|
VARS => { CAPTURE => ['Main','Prev'] }, |
66
|
|
|
|
|
|
|
Info => { |
67
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Info' }, |
68
|
|
|
|
|
|
|
# Adobe Acrobat 10.1.5 will create a duplicate Info dictionary with |
69
|
|
|
|
|
|
|
# a different object number when metadata is edited. This flag |
70
|
|
|
|
|
|
|
# is part of a patch to ignore this duplicate information (unless |
71
|
|
|
|
|
|
|
# the IgnoreMinorErrors option is used) |
72
|
|
|
|
|
|
|
IgnoreDuplicates => 1, |
73
|
|
|
|
|
|
|
}, |
74
|
|
|
|
|
|
|
Root => { |
75
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Root' }, |
76
|
|
|
|
|
|
|
}, |
77
|
|
|
|
|
|
|
Encrypt => { |
78
|
|
|
|
|
|
|
NoProcess => 1, # don't process normally (processed in advance) |
79
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Encrypt' }, |
80
|
|
|
|
|
|
|
}, |
81
|
|
|
|
|
|
|
_linearized => { |
82
|
|
|
|
|
|
|
Name => 'Linearized', |
83
|
|
|
|
|
|
|
Notes => 'flag set if document is linearized for fast web display; not a real Tag ID', |
84
|
|
|
|
|
|
|
PrintConv => { 'true' => 'Yes', 'false' => 'No' }, |
85
|
|
|
|
|
|
|
}, |
86
|
|
|
|
|
|
|
); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# tags in PDF Info dictionary |
89
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Info = ( |
90
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
91
|
|
|
|
|
|
|
VARS => { CAPTURE => ['Info'] }, |
92
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 1, # extract all unknown tags in this directory |
93
|
|
|
|
|
|
|
WRITE_PROC => \&Image::ExifTool::DummyWriteProc, |
94
|
|
|
|
|
|
|
CHECK_PROC => \&CheckPDF, |
95
|
|
|
|
|
|
|
WRITABLE => 'string', |
96
|
|
|
|
|
|
|
# set PRIORITY to 0 so most recent Info dictionary takes precedence |
97
|
|
|
|
|
|
|
# (Acrobat Pro bug? doesn't use same object/generation number for |
98
|
|
|
|
|
|
|
# new Info dictionary when doing incremental update) |
99
|
|
|
|
|
|
|
PRIORITY => 0, |
100
|
|
|
|
|
|
|
NOTES => q{ |
101
|
|
|
|
|
|
|
As well as the tags listed below, the PDF specification allows for |
102
|
|
|
|
|
|
|
user-defined tags to exist in the Info dictionary. These tags, which should |
103
|
|
|
|
|
|
|
have corresponding XMP-pdfx entries in the XMP of the PDF XML Metadata |
104
|
|
|
|
|
|
|
object, are also extracted by ExifTool. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
B specifies the value format, and may be C, C, |
107
|
|
|
|
|
|
|
C, C, C or C for PDF tags. |
108
|
|
|
|
|
|
|
}, |
109
|
|
|
|
|
|
|
Title => { }, |
110
|
|
|
|
|
|
|
Author => { Groups => { 2 => 'Author' } }, |
111
|
|
|
|
|
|
|
Subject => { }, |
112
|
|
|
|
|
|
|
Keywords => { List => 'string' }, # this is a string list |
113
|
|
|
|
|
|
|
Creator => { }, |
114
|
|
|
|
|
|
|
Producer => { }, |
115
|
|
|
|
|
|
|
CreationDate => { |
116
|
|
|
|
|
|
|
Name => 'CreateDate', |
117
|
|
|
|
|
|
|
Writable => 'date', |
118
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
119
|
|
|
|
|
|
|
Shift => 'Time', |
120
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
121
|
|
|
|
|
|
|
PrintConvInv => '$self->InverseDateTime($val)', |
122
|
|
|
|
|
|
|
}, |
123
|
|
|
|
|
|
|
ModDate => { |
124
|
|
|
|
|
|
|
Name => 'ModifyDate', |
125
|
|
|
|
|
|
|
Writable => 'date', |
126
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
127
|
|
|
|
|
|
|
Shift => 'Time', |
128
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
129
|
|
|
|
|
|
|
PrintConvInv => '$self->InverseDateTime($val)', |
130
|
|
|
|
|
|
|
}, |
131
|
|
|
|
|
|
|
Trapped => { |
132
|
|
|
|
|
|
|
Protected => 1, |
133
|
|
|
|
|
|
|
# remove leading '/' from '/True' or '/False' |
134
|
|
|
|
|
|
|
ValueConv => '$val=~s{^/}{}; $val', |
135
|
|
|
|
|
|
|
ValueConvInv => '"/$val"', |
136
|
|
|
|
|
|
|
}, |
137
|
|
|
|
|
|
|
'AAPL:Keywords' => { #PH |
138
|
|
|
|
|
|
|
Name => 'AppleKeywords', |
139
|
|
|
|
|
|
|
List => 'array', # this is an array of values |
140
|
|
|
|
|
|
|
Notes => q{ |
141
|
|
|
|
|
|
|
keywords written by Apple utilities, although they seem to use PDF:Keywords |
142
|
|
|
|
|
|
|
when reading |
143
|
|
|
|
|
|
|
}, |
144
|
|
|
|
|
|
|
}, |
145
|
|
|
|
|
|
|
); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# tags in the PDF Root document catalog |
148
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Root = ( |
149
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
150
|
|
|
|
|
|
|
# note: can't capture previous versions of Root since they are not parsed |
151
|
|
|
|
|
|
|
VARS => { CAPTURE => ['Root'] }, |
152
|
|
|
|
|
|
|
NOTES => 'This is the PDF document catalog.', |
153
|
|
|
|
|
|
|
MarkInfo => { |
154
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::MarkInfo' }, |
155
|
|
|
|
|
|
|
}, |
156
|
|
|
|
|
|
|
Metadata => { |
157
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, |
158
|
|
|
|
|
|
|
}, |
159
|
|
|
|
|
|
|
Pages => { |
160
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Pages' }, |
161
|
|
|
|
|
|
|
}, |
162
|
|
|
|
|
|
|
Perms => { |
163
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Perms' }, |
164
|
|
|
|
|
|
|
}, |
165
|
|
|
|
|
|
|
AcroForm => { |
166
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AcroForm' }, |
167
|
|
|
|
|
|
|
}, |
168
|
|
|
|
|
|
|
Lang => 'Language', |
169
|
|
|
|
|
|
|
PageLayout => { }, |
170
|
|
|
|
|
|
|
PageMode => { }, |
171
|
|
|
|
|
|
|
Version => 'PDFVersion', |
172
|
|
|
|
|
|
|
); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# tags extracted from the PDF Encrypt dictionary |
175
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Encrypt = ( |
176
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
177
|
|
|
|
|
|
|
NOTES => 'Tags extracted from the document Encrypt dictionary.', |
178
|
|
|
|
|
|
|
Filter => { |
179
|
|
|
|
|
|
|
Name => 'Encryption', |
180
|
|
|
|
|
|
|
Notes => q{ |
181
|
|
|
|
|
|
|
extracted value is actually a combination of the Filter, SubFilter, V, R and |
182
|
|
|
|
|
|
|
Length information from the Encrypt dictionary |
183
|
|
|
|
|
|
|
}, |
184
|
|
|
|
|
|
|
}, |
185
|
|
|
|
|
|
|
P => { |
186
|
|
|
|
|
|
|
Name => 'UserAccess', |
187
|
|
|
|
|
|
|
ValueConv => '$val & 0x0f3c', # ignore reserved bits |
188
|
|
|
|
|
|
|
PrintConvColumns => 2, |
189
|
|
|
|
|
|
|
PrintConv => { BITMASK => { |
190
|
|
|
|
|
|
|
2 => 'Print', |
191
|
|
|
|
|
|
|
3 => 'Modify', |
192
|
|
|
|
|
|
|
4 => 'Copy', |
193
|
|
|
|
|
|
|
5 => 'Annotate', |
194
|
|
|
|
|
|
|
8 => 'Fill forms', |
195
|
|
|
|
|
|
|
9 => 'Extract', |
196
|
|
|
|
|
|
|
10 => 'Assemble', |
197
|
|
|
|
|
|
|
11 => 'Print high-res', |
198
|
|
|
|
|
|
|
}}, |
199
|
|
|
|
|
|
|
}, |
200
|
|
|
|
|
|
|
); |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# tags in PDF Pages dictionary |
203
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Pages = ( |
204
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
205
|
|
|
|
|
|
|
Count => 'PageCount', |
206
|
|
|
|
|
|
|
Kids => { |
207
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Kids' }, |
208
|
|
|
|
|
|
|
}, |
209
|
|
|
|
|
|
|
); |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# tags in PDF Perms dictionary |
212
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Perms = ( |
213
|
|
|
|
|
|
|
NOTES => 'Additional document permissions imposed by digital signatures.', |
214
|
|
|
|
|
|
|
DocMDP => { |
215
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, |
216
|
|
|
|
|
|
|
}, |
217
|
|
|
|
|
|
|
FieldMDP => { |
218
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, |
219
|
|
|
|
|
|
|
}, |
220
|
|
|
|
|
|
|
UR3 => { |
221
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, |
222
|
|
|
|
|
|
|
}, |
223
|
|
|
|
|
|
|
); |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# tags in PDF Perms dictionary |
226
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AcroForm = ( |
227
|
|
|
|
|
|
|
PROCESS_PROC => \&ProcessAcroForm, |
228
|
|
|
|
|
|
|
_has_xfa => { |
229
|
|
|
|
|
|
|
Name => 'HasXFA', |
230
|
|
|
|
|
|
|
Notes => q{ |
231
|
|
|
|
|
|
|
this tag is defined if a document contains form fields, and is true if it |
232
|
|
|
|
|
|
|
uses XML Forms Architecture; not a real Tag ID |
233
|
|
|
|
|
|
|
}, |
234
|
|
|
|
|
|
|
PrintConv => { 'true' => 'Yes', 'false' => 'No' }, |
235
|
|
|
|
|
|
|
}, |
236
|
|
|
|
|
|
|
); |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# tags in PDF Kids dictionary |
239
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Kids = ( |
240
|
|
|
|
|
|
|
Metadata => { |
241
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, |
242
|
|
|
|
|
|
|
}, |
243
|
|
|
|
|
|
|
PieceInfo => { |
244
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::PieceInfo' }, |
245
|
|
|
|
|
|
|
}, |
246
|
|
|
|
|
|
|
Resources => { |
247
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Resources' }, |
248
|
|
|
|
|
|
|
}, |
249
|
|
|
|
|
|
|
Kids => { |
250
|
|
|
|
|
|
|
Condition => '$self->Options("ExtractEmbedded")', |
251
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Kids' }, |
252
|
|
|
|
|
|
|
}, |
253
|
|
|
|
|
|
|
); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# tags in PDF Resources dictionary |
256
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Resources = ( |
257
|
|
|
|
|
|
|
ColorSpace => { |
258
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::ColorSpace' }, |
259
|
|
|
|
|
|
|
}, |
260
|
|
|
|
|
|
|
XObject => { |
261
|
|
|
|
|
|
|
Condition => '$self->Options("ExtractEmbedded")', |
262
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::XObject' }, |
263
|
|
|
|
|
|
|
}, |
264
|
|
|
|
|
|
|
Properties => { |
265
|
|
|
|
|
|
|
Condition => '$self->Options("ExtractEmbedded")', |
266
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Properties' }, |
267
|
|
|
|
|
|
|
}, |
268
|
|
|
|
|
|
|
); |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# tags in PDF ColorSpace dictionary |
271
|
|
|
|
|
|
|
%Image::ExifTool::PDF::ColorSpace = ( |
272
|
|
|
|
|
|
|
DefaultRGB => { |
273
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
274
|
|
|
|
|
|
|
ConvertToDict => 1, # (not seen yet, but just in case) |
275
|
|
|
|
|
|
|
}, |
276
|
|
|
|
|
|
|
DefaultCMYK => { |
277
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
278
|
|
|
|
|
|
|
# hack: this is stored as an array instead of a dictionary in my |
279
|
|
|
|
|
|
|
# sample, so convert to a dictionary to extract the ICCBased element |
280
|
|
|
|
|
|
|
ConvertToDict => 1, |
281
|
|
|
|
|
|
|
}, |
282
|
|
|
|
|
|
|
Cs1 => { |
283
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
284
|
|
|
|
|
|
|
ConvertToDict => 1, # (just in case) |
285
|
|
|
|
|
|
|
}, |
286
|
|
|
|
|
|
|
CS0 => { |
287
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
288
|
|
|
|
|
|
|
ConvertToDict => 1, # (just in case) |
289
|
|
|
|
|
|
|
}, |
290
|
|
|
|
|
|
|
); |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# tags in PDF DefaultRGB dictionary |
293
|
|
|
|
|
|
|
%Image::ExifTool::PDF::DefaultRGB = ( |
294
|
|
|
|
|
|
|
ICCBased => { |
295
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::ICCBased' }, |
296
|
|
|
|
|
|
|
}, |
297
|
|
|
|
|
|
|
); |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# tags in PDF ICCBased, Cs1 and CS0 dictionaries |
300
|
|
|
|
|
|
|
%Image::ExifTool::PDF::ICCBased = ( |
301
|
|
|
|
|
|
|
_stream => { |
302
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::ICC_Profile::Main' }, |
303
|
|
|
|
|
|
|
}, |
304
|
|
|
|
|
|
|
); |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# tags in PDF XObject dictionary (parsed only if ExtractEmbedded is enabled) |
307
|
|
|
|
|
|
|
%Image::ExifTool::PDF::XObject = ( |
308
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 0, # extract known but numbered tags (Im1, Im2, etc) |
309
|
|
|
|
|
|
|
Im => { |
310
|
|
|
|
|
|
|
Notes => q{ |
311
|
|
|
|
|
|
|
the L option enables information to be extracted from these |
312
|
|
|
|
|
|
|
embedded images |
313
|
|
|
|
|
|
|
}, |
314
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Im' }, |
315
|
|
|
|
|
|
|
}, |
316
|
|
|
|
|
|
|
); |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# tags in PDF Im# dictionary |
319
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Im = ( |
320
|
|
|
|
|
|
|
NOTES => q{ |
321
|
|
|
|
|
|
|
Information extracted from embedded images with the L option. |
322
|
|
|
|
|
|
|
The EmbeddedImage and its metadata are extracted only for JPEG and Jpeg2000 |
323
|
|
|
|
|
|
|
image formats. |
324
|
|
|
|
|
|
|
}, |
325
|
|
|
|
|
|
|
Width => 'EmbeddedImageWidth', |
326
|
|
|
|
|
|
|
Height => 'EmbeddedImageHeight', |
327
|
|
|
|
|
|
|
Filter => { Name => 'EmbeddedImageFilter', List => 1 }, |
328
|
|
|
|
|
|
|
ColorSpace => { |
329
|
|
|
|
|
|
|
Name => 'EmbeddedImageColorSpace', |
330
|
|
|
|
|
|
|
List => 1, |
331
|
|
|
|
|
|
|
RawConv => 'ref $val ? undef : $val', # (ignore color space data) |
332
|
|
|
|
|
|
|
}, |
333
|
|
|
|
|
|
|
Image_stream => { |
334
|
|
|
|
|
|
|
Name => 'EmbeddedImage', |
335
|
|
|
|
|
|
|
Groups => { 2 => 'Preview' }, |
336
|
|
|
|
|
|
|
Binary => 1, |
337
|
|
|
|
|
|
|
}, |
338
|
|
|
|
|
|
|
); |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# tags in PDF Properties dictionary |
341
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Properties = ( |
342
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 0, # extract known but numbered tags (MC0, MC1, etc) |
343
|
|
|
|
|
|
|
MC => { |
344
|
|
|
|
|
|
|
Notes => q{ |
345
|
|
|
|
|
|
|
the L option enables information to be extracted from these |
346
|
|
|
|
|
|
|
embedded metadata dictionaries |
347
|
|
|
|
|
|
|
}, |
348
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::MC' }, |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
); |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# tags in PDF MC# dictionary |
353
|
|
|
|
|
|
|
%Image::ExifTool::PDF::MC = ( |
354
|
|
|
|
|
|
|
Metadata => { |
355
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
); |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# tags in PDF PieceInfo dictionary |
360
|
|
|
|
|
|
|
%Image::ExifTool::PDF::PieceInfo = ( |
361
|
|
|
|
|
|
|
AdobePhotoshop => { |
362
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AdobePhotoshop' }, |
363
|
|
|
|
|
|
|
}, |
364
|
|
|
|
|
|
|
Illustrator => { |
365
|
|
|
|
|
|
|
# assume this is an illustrator file if it contains this directory |
366
|
|
|
|
|
|
|
# and doesn't have a ".PDF" extension |
367
|
|
|
|
|
|
|
Condition => q{ |
368
|
|
|
|
|
|
|
$self->OverrideFileType("AI") unless $$self{FILE_EXT} and $$self{FILE_EXT} eq 'PDF'; |
369
|
|
|
|
|
|
|
return 1; |
370
|
|
|
|
|
|
|
}, |
371
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Illustrator' }, |
372
|
|
|
|
|
|
|
}, |
373
|
|
|
|
|
|
|
); |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# tags in PDF AdobePhotoshop dictionary |
376
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AdobePhotoshop = ( |
377
|
|
|
|
|
|
|
Private => { |
378
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Private' }, |
379
|
|
|
|
|
|
|
}, |
380
|
|
|
|
|
|
|
); |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# tags in PDF Illustrator dictionary |
383
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Illustrator = ( |
384
|
|
|
|
|
|
|
Private => { |
385
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AIPrivate' }, |
386
|
|
|
|
|
|
|
}, |
387
|
|
|
|
|
|
|
); |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# tags in PDF Private dictionary |
390
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Private = ( |
391
|
|
|
|
|
|
|
ImageResources => { |
392
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::ImageResources' }, |
393
|
|
|
|
|
|
|
}, |
394
|
|
|
|
|
|
|
); |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# tags in PDF AI Private dictionary |
397
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AIPrivate = ( |
398
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
399
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 0, # extract known but numbered tags |
400
|
|
|
|
|
|
|
AIMetaData => { |
401
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AIMetaData' }, |
402
|
|
|
|
|
|
|
}, |
403
|
|
|
|
|
|
|
AIPrivateData => { |
404
|
|
|
|
|
|
|
Notes => q{ |
405
|
|
|
|
|
|
|
the L option enables information to be extracted from embedded |
406
|
|
|
|
|
|
|
PostScript documents in the AIPrivateData# and AIPDFPrivateData# streams |
407
|
|
|
|
|
|
|
}, |
408
|
|
|
|
|
|
|
JoinStreams => 1, # join streams from numbered tags and process as one |
409
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, |
410
|
|
|
|
|
|
|
}, |
411
|
|
|
|
|
|
|
AIPDFPrivateData => { |
412
|
|
|
|
|
|
|
JoinStreams => 1, # join streams from numbered tags and process as one |
413
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, |
414
|
|
|
|
|
|
|
}, |
415
|
|
|
|
|
|
|
RoundTripVersion => { }, |
416
|
|
|
|
|
|
|
ContainerVersion => { }, |
417
|
|
|
|
|
|
|
CreatorVersion => { }, |
418
|
|
|
|
|
|
|
); |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# tags in PDF AIMetaData dictionary |
421
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AIMetaData = ( |
422
|
|
|
|
|
|
|
_stream => { |
423
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, |
424
|
|
|
|
|
|
|
}, |
425
|
|
|
|
|
|
|
); |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# tags in PDF ImageResources dictionary |
428
|
|
|
|
|
|
|
%Image::ExifTool::PDF::ImageResources = ( |
429
|
|
|
|
|
|
|
_stream => { |
430
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::Photoshop::Main' }, |
431
|
|
|
|
|
|
|
}, |
432
|
|
|
|
|
|
|
); |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
# tags in PDF MarkInfo dictionary |
435
|
|
|
|
|
|
|
%Image::ExifTool::PDF::MarkInfo = ( |
436
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
437
|
|
|
|
|
|
|
Marked => { |
438
|
|
|
|
|
|
|
Name => 'TaggedPDF', |
439
|
|
|
|
|
|
|
Notes => "not a Tagged PDF if this tag is missing", |
440
|
|
|
|
|
|
|
PrintConv => { 'true' => 'Yes', 'false' => 'No' }, |
441
|
|
|
|
|
|
|
}, |
442
|
|
|
|
|
|
|
); |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
# tags in PDF Metadata dictionary |
445
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Metadata = ( |
446
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
447
|
|
|
|
|
|
|
XML_stream => { # this is the stream for a Subtype /XML dictionary (not a real tag) |
448
|
|
|
|
|
|
|
Name => 'XMP', |
449
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }, |
450
|
|
|
|
|
|
|
}, |
451
|
|
|
|
|
|
|
); |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
# tags in PDF signature directories (DocMDP, FieldMDP or UR3) |
454
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Signature = ( |
455
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
456
|
|
|
|
|
|
|
ContactInfo => 'SignerContactInfo', |
457
|
|
|
|
|
|
|
Location => 'SigningLocation', |
458
|
|
|
|
|
|
|
M => { |
459
|
|
|
|
|
|
|
Name => 'SigningDate', |
460
|
|
|
|
|
|
|
Format => 'date', |
461
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
462
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
463
|
|
|
|
|
|
|
}, |
464
|
|
|
|
|
|
|
Name => 'SigningAuthority', |
465
|
|
|
|
|
|
|
Reason => 'SigningReason', |
466
|
|
|
|
|
|
|
Reference => { |
467
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Reference' }, |
468
|
|
|
|
|
|
|
}, |
469
|
|
|
|
|
|
|
Prop_AuthTime => { |
470
|
|
|
|
|
|
|
Name => 'AuthenticationTime', |
471
|
|
|
|
|
|
|
PrintConv => 'ConvertTimeSpan($val) . " ago"', |
472
|
|
|
|
|
|
|
}, |
473
|
|
|
|
|
|
|
Prop_AuthType => 'AuthenticationType', |
474
|
|
|
|
|
|
|
); |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# tags in PDF Reference dictionary |
477
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Reference = ( |
478
|
|
|
|
|
|
|
TransformParams => { |
479
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::TransformParams' }, |
480
|
|
|
|
|
|
|
}, |
481
|
|
|
|
|
|
|
); |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# tags in PDF TransformParams dictionary |
484
|
|
|
|
|
|
|
%Image::ExifTool::PDF::TransformParams = ( |
485
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
486
|
|
|
|
|
|
|
Annots => { |
487
|
|
|
|
|
|
|
Name => 'AnnotationUsageRights', |
488
|
|
|
|
|
|
|
Notes => q{ |
489
|
|
|
|
|
|
|
possible values are Create, Delete, Modify, Copy, Import and Export; |
490
|
|
|
|
|
|
|
additional values for UR3 signatures are Online and SummaryView |
491
|
|
|
|
|
|
|
}, |
492
|
|
|
|
|
|
|
List => 1, |
493
|
|
|
|
|
|
|
}, |
494
|
|
|
|
|
|
|
Document => { |
495
|
|
|
|
|
|
|
Name => 'DocumentUsageRights', |
496
|
|
|
|
|
|
|
Notes => 'only possible value is FullSave', |
497
|
|
|
|
|
|
|
List => 1, |
498
|
|
|
|
|
|
|
}, |
499
|
|
|
|
|
|
|
Form => { |
500
|
|
|
|
|
|
|
Name => 'FormUsageRights', |
501
|
|
|
|
|
|
|
Notes => q{ |
502
|
|
|
|
|
|
|
possible values are FillIn, Import, Export, SubmitStandalone and |
503
|
|
|
|
|
|
|
SpawnTemplate; additional values for UR3 signatures are BarcodePlaintext and |
504
|
|
|
|
|
|
|
Online |
505
|
|
|
|
|
|
|
}, |
506
|
|
|
|
|
|
|
List => 1, |
507
|
|
|
|
|
|
|
}, |
508
|
|
|
|
|
|
|
FormEX => { |
509
|
|
|
|
|
|
|
Name => 'FormExtraUsageRights', |
510
|
|
|
|
|
|
|
Notes => 'UR signatures only; only possible value is BarcodePlaintext', |
511
|
|
|
|
|
|
|
List => 1, |
512
|
|
|
|
|
|
|
}, |
513
|
|
|
|
|
|
|
Signature => { |
514
|
|
|
|
|
|
|
Name => 'SignatureUsageRights', |
515
|
|
|
|
|
|
|
Notes => 'only possible value is Modify', |
516
|
|
|
|
|
|
|
List => 1, |
517
|
|
|
|
|
|
|
}, |
518
|
|
|
|
|
|
|
EF => { |
519
|
|
|
|
|
|
|
Name => 'EmbeddedFileUsageRights', |
520
|
|
|
|
|
|
|
Notes => 'possible values are Create, Delete, Modify and Import', |
521
|
|
|
|
|
|
|
List => 1, |
522
|
|
|
|
|
|
|
}, |
523
|
|
|
|
|
|
|
Msg => 'UsageRightsMessage', |
524
|
|
|
|
|
|
|
P => { |
525
|
|
|
|
|
|
|
Name => 'ModificationPermissions', |
526
|
|
|
|
|
|
|
Notes => q{ |
527
|
|
|
|
|
|
|
1-3 for DocMDP signatures, default 2; true/false for UR3 signatures, default |
528
|
|
|
|
|
|
|
false |
529
|
|
|
|
|
|
|
}, |
530
|
|
|
|
|
|
|
PrintConv => { |
531
|
|
|
|
|
|
|
1 => 'No changes permitted', |
532
|
|
|
|
|
|
|
2 => 'Fill forms, Create page templates, Sign', |
533
|
|
|
|
|
|
|
3 => 'Fill forms, Create page templates, Sign, Create/Delete/Edit annotations', |
534
|
|
|
|
|
|
|
'true' => 'Restrict all applications to reader permissions', |
535
|
|
|
|
|
|
|
'false' => 'Do not restrict applications to reader permissions', |
536
|
|
|
|
|
|
|
}, |
537
|
|
|
|
|
|
|
}, |
538
|
|
|
|
|
|
|
Action => { |
539
|
|
|
|
|
|
|
Name => 'FieldPermissions', |
540
|
|
|
|
|
|
|
Notes => 'FieldMDP signatures only', |
541
|
|
|
|
|
|
|
PrintConv => { |
542
|
|
|
|
|
|
|
'All' => 'Disallow changes to all form fields', |
543
|
|
|
|
|
|
|
'Include' => 'Disallow changes to specified form fields', |
544
|
|
|
|
|
|
|
'Exclude' => 'Allow changes to specified form fields', |
545
|
|
|
|
|
|
|
}, |
546
|
|
|
|
|
|
|
}, |
547
|
|
|
|
|
|
|
Fields => { |
548
|
|
|
|
|
|
|
Notes => 'FieldMDP signatures only', |
549
|
|
|
|
|
|
|
Name => 'FormFields', |
550
|
|
|
|
|
|
|
List => 1, |
551
|
|
|
|
|
|
|
}, |
552
|
|
|
|
|
|
|
); |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
# unknown tags for use in verbose option |
555
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Unknown = ( |
556
|
|
|
|
|
|
|
GROUPS => { 2 => 'Unknown' }, |
557
|
|
|
|
|
|
|
); |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
560
|
|
|
|
|
|
|
# AutoLoad our writer routines when necessary |
561
|
|
|
|
|
|
|
# |
562
|
|
|
|
|
|
|
sub AUTOLOAD |
563
|
|
|
|
|
|
|
{ |
564
|
19
|
|
|
19
|
|
100
|
return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_); |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
568
|
|
|
|
|
|
|
# Convert from PDF to EXIF-style date/time |
569
|
|
|
|
|
|
|
# Inputs: 0) PDF date/time string (D:YYYYmmddHHMMSS+HH'MM') |
570
|
|
|
|
|
|
|
# Returns: EXIF date string (YYYY:mm:dd HH:MM:SS+HH:MM) |
571
|
|
|
|
|
|
|
sub ConvertPDFDate($) |
572
|
|
|
|
|
|
|
{ |
573
|
10
|
|
|
10
|
0
|
12
|
my $date = shift; |
574
|
|
|
|
|
|
|
# remove optional 'D:' prefix |
575
|
10
|
|
|
|
|
32
|
$date =~ s/^D://; |
576
|
|
|
|
|
|
|
# fill in default values if necessary |
577
|
|
|
|
|
|
|
# YYYYmmddHHMMSS |
578
|
10
|
|
|
|
|
27
|
my $default = '00000101000000'; |
579
|
10
|
50
|
|
|
|
25
|
if (length $date < length $default) { |
580
|
0
|
|
|
|
|
0
|
$date .= substr($default, length $date); |
581
|
|
|
|
|
|
|
} |
582
|
10
|
50
|
|
|
|
40
|
$date =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(.*)/ or return $date; |
583
|
10
|
|
|
|
|
49
|
$date = "$1:$2:$3 $4:$5:$6"; |
584
|
10
|
50
|
|
|
|
26
|
if ($7) { |
585
|
10
|
|
|
|
|
16
|
my $tz = $7; |
586
|
10
|
50
|
|
|
|
43
|
if ($tz =~ /^\s*Z/i) { |
|
|
50
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# ignore any "HH'mm'" after the Z (OS X 10.6 does this) |
588
|
0
|
|
|
|
|
0
|
$date .= 'Z'; |
589
|
|
|
|
|
|
|
# tolerate some improper formatting in timezone specification |
590
|
|
|
|
|
|
|
} elsif ($tz =~ /^\s*([-+])\s*(\d+)[': ]+(\d*)/) { |
591
|
10
|
|
50
|
|
|
36
|
$date .= $1 . $2 . ':' . ($3 || '00'); |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
} |
594
|
10
|
|
|
|
|
42
|
return $date; |
595
|
|
|
|
|
|
|
} |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
598
|
|
|
|
|
|
|
# Locate any object in the XRef tables (including compressed objects) |
599
|
|
|
|
|
|
|
# Inputs: 0) XRef reference, 1) object reference string (or free object number) |
600
|
|
|
|
|
|
|
# Returns: offset to object in file or compressed object reference string, |
601
|
|
|
|
|
|
|
# 0 if object is free, or undefined on error |
602
|
|
|
|
|
|
|
sub LocateAnyObject($$) |
603
|
|
|
|
|
|
|
{ |
604
|
238
|
|
|
238
|
0
|
348
|
my ($xref, $ref) = @_; |
605
|
238
|
50
|
|
|
|
387
|
return undef unless $xref; |
606
|
238
|
100
|
|
|
|
587
|
return $$xref{$ref} if exists $$xref{$ref}; |
607
|
|
|
|
|
|
|
# get the object number |
608
|
7
|
50
|
|
|
|
38
|
return undef unless $ref =~ /^(\d+)/; |
609
|
7
|
|
|
|
|
16
|
my $objNum = $1; |
610
|
|
|
|
|
|
|
# return 0 if the object number has been reused (old object is free) |
611
|
7
|
100
|
|
|
|
24
|
return 0 if defined $$xref{$objNum}; |
612
|
|
|
|
|
|
|
# |
613
|
|
|
|
|
|
|
# scan our XRef stream dictionaries for this object |
614
|
|
|
|
|
|
|
# |
615
|
1
|
50
|
|
|
|
5
|
return undef unless $$xref{dicts}; |
616
|
0
|
|
|
|
|
0
|
my $dict; |
617
|
0
|
|
|
|
|
0
|
foreach $dict (@{$$xref{dicts}}) { |
|
0
|
|
|
|
|
0
|
|
618
|
|
|
|
|
|
|
# quick check to see if the object is in the range for this xref stream |
619
|
0
|
0
|
|
|
|
0
|
next if $objNum >= $$dict{Size}; |
620
|
0
|
|
|
|
|
0
|
my $index = $$dict{Index}; |
621
|
0
|
0
|
|
|
|
0
|
next if $objNum < $$index[0]; |
622
|
|
|
|
|
|
|
# scan the tables for the specified object |
623
|
0
|
|
|
|
|
0
|
my $size = $$dict{_entry_size}; |
624
|
0
|
|
|
|
|
0
|
my $num = scalar(@$index) / 2; |
625
|
0
|
|
|
|
|
0
|
my $tot = 0; |
626
|
0
|
|
|
|
|
0
|
my $i; |
627
|
0
|
|
|
|
|
0
|
for ($i=0; $i<$num; ++$i) { |
628
|
0
|
|
|
|
|
0
|
my $start = $$index[$i*2]; |
629
|
0
|
|
|
|
|
0
|
my $count = $$index[$i*2+1]; |
630
|
|
|
|
|
|
|
# table is in ascending order, so quit if we have passed the object |
631
|
0
|
0
|
|
|
|
0
|
last if $objNum < $start; |
632
|
0
|
0
|
|
|
|
0
|
if ($objNum < $start + $count) { |
633
|
0
|
|
|
|
|
0
|
my $offset = $size * ($objNum - $start + $tot); |
634
|
0
|
0
|
|
|
|
0
|
last if $offset + $size > length $$dict{_stream}; |
635
|
0
|
|
|
|
|
0
|
my @c = unpack("x$offset C$size", $$dict{_stream}); |
636
|
|
|
|
|
|
|
# extract values from this table entry |
637
|
|
|
|
|
|
|
# (can be 1, 2, 3, 4, etc.. bytes per value) |
638
|
0
|
|
|
|
|
0
|
my (@t, $j, $k); |
639
|
0
|
|
|
|
|
0
|
my $w = $$dict{W}; |
640
|
0
|
|
|
|
|
0
|
for ($j=0; $j<3; ++$j) { |
641
|
|
|
|
|
|
|
# use default value if W entry is 0 (as per spec) |
642
|
|
|
|
|
|
|
# - 0th element defaults to 1, others default to 0 |
643
|
0
|
0
|
|
|
|
0
|
$$w[$j] or $t[$j] = ($j ? 0 : 1), next; |
|
|
0
|
|
|
|
|
|
644
|
0
|
|
|
|
|
0
|
$t[$j] = shift(@c); |
645
|
0
|
|
|
|
|
0
|
for ($k=1; $k < $$w[$j]; ++$k) { |
646
|
0
|
|
|
|
|
0
|
$t[$j] = 256 * $t[$j] + shift(@c); |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
# by default, use "o g R" as the xref key |
650
|
|
|
|
|
|
|
# (o = object number, g = generation number) |
651
|
0
|
|
|
|
|
0
|
my $ref2 = "$objNum $t[2] R"; |
652
|
0
|
0
|
|
|
|
0
|
if ($t[0] == 1) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
653
|
|
|
|
|
|
|
# normal object reference: |
654
|
|
|
|
|
|
|
# $t[1]=offset of object from start, $t[2]=generation number |
655
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = $t[1]; |
656
|
|
|
|
|
|
|
} elsif ($t[0] == 2) { |
657
|
|
|
|
|
|
|
# compressed object reference: |
658
|
|
|
|
|
|
|
# $t[1]=stream object number, $t[2]=index of object in stream |
659
|
0
|
|
|
|
|
0
|
$ref2 = "$objNum 0 R"; |
660
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = "I$t[2] $t[1] 0 R"; |
661
|
|
|
|
|
|
|
} elsif ($t[0] == 0) { |
662
|
|
|
|
|
|
|
# free object: |
663
|
|
|
|
|
|
|
# $t[1]=next free object in linked list, $t[2]=generation number |
664
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = 0; |
665
|
|
|
|
|
|
|
} else { |
666
|
|
|
|
|
|
|
# treat as a null object |
667
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = undef; |
668
|
|
|
|
|
|
|
} |
669
|
0
|
|
|
|
|
0
|
$$xref{$objNum} = $t[1]; # remember offsets by object number too |
670
|
0
|
0
|
|
|
|
0
|
return $$xref{$ref} if $ref eq $ref2; |
671
|
0
|
|
|
|
|
0
|
return 0; # object is free or was reused |
672
|
|
|
|
|
|
|
} |
673
|
0
|
|
|
|
|
0
|
$tot += $count; |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
} |
676
|
0
|
|
|
|
|
0
|
return undef; |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
680
|
|
|
|
|
|
|
# Locate a regular object in the XRef tables (does not include compressed objects) |
681
|
|
|
|
|
|
|
# Inputs: 0) XRef reference, 1) object reference string (or free object number) |
682
|
|
|
|
|
|
|
# Returns: offset to object in file, 0 if object is free, |
683
|
|
|
|
|
|
|
# or undef on error or if object was compressed |
684
|
|
|
|
|
|
|
sub LocateObject($$) |
685
|
|
|
|
|
|
|
{ |
686
|
41
|
|
|
41
|
0
|
82
|
my ($xref, $ref) = @_; |
687
|
41
|
|
|
|
|
69
|
my $offset = LocateAnyObject($xref, $ref); |
688
|
41
|
50
|
66
|
|
|
151
|
return undef if $offset and $offset =~ /^I/; |
689
|
41
|
|
|
|
|
111
|
return $offset; |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
693
|
|
|
|
|
|
|
# Check that the correct object is located at the specified file offset |
694
|
|
|
|
|
|
|
# Inputs: 0) ExifTool ref, 1) object name, 2) object reference string, 3) file offset |
695
|
|
|
|
|
|
|
# Returns: first non-blank line at start of object, or undef on error |
696
|
|
|
|
|
|
|
sub CheckObject($$$$) |
697
|
|
|
|
|
|
|
{ |
698
|
217
|
|
|
217
|
0
|
375
|
my ($et, $tag, $ref, $offset) = @_; |
699
|
217
|
|
|
|
|
282
|
my ($data, $obj, $dat, $pat); |
700
|
|
|
|
|
|
|
|
701
|
217
|
|
|
|
|
318
|
my $raf = $$et{RAF}; |
702
|
217
|
50
|
|
|
|
623
|
$raf->Seek($offset+$$et{PDFBase}, 0) or $et->Warn("Bad $tag offset"), return undef; |
703
|
|
|
|
|
|
|
# verify that we are reading the expected object |
704
|
217
|
|
|
|
|
875
|
($obj = $ref) =~ s/R/obj/; |
705
|
217
|
|
|
|
|
376
|
for (;;) { |
706
|
217
|
50
|
|
|
|
524
|
$raf->ReadLine($data) or $et->Warn("Error reading $tag data"), return undef; |
707
|
217
|
50
|
|
|
|
3237
|
last if $data =~ s/^$obj//; |
708
|
0
|
0
|
|
|
|
0
|
next if $data =~ /^\s+$/; # keep reading if this was a blank line |
709
|
|
|
|
|
|
|
# handle cases where other whitespace characters are used in the object ID string |
710
|
0
|
|
|
|
|
0
|
while ($data =~ /^\d+(\s+\d+)?\s*$/) { |
711
|
0
|
|
|
|
|
0
|
$raf->ReadLine($dat); |
712
|
0
|
|
|
|
|
0
|
$data .= $dat; |
713
|
|
|
|
|
|
|
} |
714
|
0
|
|
|
|
|
0
|
($pat = $obj) =~ s/ /\\s+/g; |
715
|
0
|
0
|
|
|
|
0
|
unless ($data =~ s/$pat//) { |
716
|
0
|
|
|
|
|
0
|
$tag = ucfirst $tag; |
717
|
0
|
|
|
|
|
0
|
$et->Warn("$tag object ($obj) not found at offset $offset"); |
718
|
0
|
|
|
|
|
0
|
return undef; |
719
|
|
|
|
|
|
|
} |
720
|
0
|
|
|
|
|
0
|
last; |
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
# read the first line of data from the object (ignoring blank lines and comments) |
723
|
217
|
|
|
|
|
360
|
for (;;) { |
724
|
434
|
100
|
66
|
|
|
1565
|
last if $data =~ /\S/ and $data !~ /^\s*%/; |
725
|
217
|
50
|
|
|
|
575
|
$raf->ReadLine($data) or $et->Warn("Error reading $tag data"), return undef; |
726
|
|
|
|
|
|
|
} |
727
|
217
|
|
|
|
|
452
|
return $data; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
731
|
|
|
|
|
|
|
# Fetch indirect object from file (from inside a stream if required) |
732
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) object reference string, |
733
|
|
|
|
|
|
|
# 2) xref lookup, 3) object name (for warning messages) |
734
|
|
|
|
|
|
|
# Returns: object data or undefined on error |
735
|
|
|
|
|
|
|
# Notes: sets $lastFetched to the object reference, or undef if the object |
736
|
|
|
|
|
|
|
# was extracted from an encrypted stream |
737
|
|
|
|
|
|
|
sub FetchObject($$$$) |
738
|
|
|
|
|
|
|
{ |
739
|
197
|
|
|
197
|
0
|
323
|
my ($et, $ref, $xref, $tag) = @_; |
740
|
197
|
|
|
|
|
268
|
$lastFetched = $ref; # save this for decoding if necessary |
741
|
197
|
|
|
|
|
304
|
my $offset = LocateAnyObject($xref, $ref); |
742
|
197
|
|
|
|
|
257
|
$lastOffset = $offset; |
743
|
197
|
100
|
|
|
|
348
|
unless ($offset) { |
744
|
5
|
50
|
|
|
|
11
|
$et->Warn("Bad $tag reference") unless defined $offset; |
745
|
5
|
|
|
|
|
10
|
return undef; |
746
|
|
|
|
|
|
|
} |
747
|
192
|
|
|
|
|
236
|
my ($data, $obj); |
748
|
192
|
50
|
|
|
|
448
|
if ($offset =~ s/^I(\d+) //) { |
749
|
0
|
|
|
|
|
0
|
my $index = $1; # object index in stream |
750
|
0
|
|
|
|
|
0
|
my ($objNum) = split ' ', $ref; # save original object number |
751
|
0
|
|
|
|
|
0
|
$ref = $offset; # now a reference to the containing stream object |
752
|
0
|
|
|
|
|
0
|
$obj = $streamObjs{$ref}; |
753
|
0
|
0
|
|
|
|
0
|
unless ($obj) { |
754
|
|
|
|
|
|
|
# don't try to load the same object stream twice |
755
|
0
|
0
|
|
|
|
0
|
return undef if defined $obj; |
756
|
0
|
|
|
|
|
0
|
$streamObjs{$ref} = ''; |
757
|
|
|
|
|
|
|
# load the parent object stream |
758
|
0
|
|
|
|
|
0
|
$obj = FetchObject($et, $ref, $xref, $tag); |
759
|
|
|
|
|
|
|
# make sure it contains everything we need |
760
|
0
|
0
|
0
|
|
|
0
|
return undef unless defined $obj and ref($obj) eq 'HASH'; |
761
|
0
|
0
|
0
|
|
|
0
|
return undef unless $$obj{First} and $$obj{N}; |
762
|
0
|
0
|
|
|
|
0
|
return undef unless DecodeStream($et, $obj); |
763
|
|
|
|
|
|
|
# add a special '_table' entry to this dictionary which contains |
764
|
|
|
|
|
|
|
# the list of object number/offset pairs from the stream header |
765
|
0
|
|
|
|
|
0
|
my $num = $$obj{N} * 2; |
766
|
0
|
|
|
|
|
0
|
my @table = split ' ', $$obj{_stream}, $num; |
767
|
0
|
0
|
|
|
|
0
|
return undef unless @table == $num; |
768
|
|
|
|
|
|
|
# remove everything before first object in stream |
769
|
0
|
|
|
|
|
0
|
$$obj{_stream} = substr($$obj{_stream}, $$obj{First}); |
770
|
0
|
|
|
|
|
0
|
$table[$num-1] =~ s/^(\d+).*/$1/s; # trim excess from last number |
771
|
0
|
|
|
|
|
0
|
$$obj{_table} = \@table; |
772
|
|
|
|
|
|
|
# save the object stream so we don't have to re-load it later |
773
|
0
|
|
|
|
|
0
|
$streamObjs{$ref} = $obj; |
774
|
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
# verify that we have the specified object |
776
|
0
|
|
|
|
|
0
|
my $i = 2 * $index; |
777
|
0
|
|
|
|
|
0
|
my $table = $$obj{_table}; |
778
|
0
|
0
|
0
|
|
|
0
|
unless ($index < $$obj{N} and $$table[$i] == $objNum) { |
779
|
0
|
|
|
|
|
0
|
$et->Warn("Bad index for stream object $tag"); |
780
|
0
|
|
|
|
|
0
|
return undef; |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
# extract the object at the specified index in the stream |
783
|
|
|
|
|
|
|
# (offsets in table are in sequential order, so we can subtract from |
784
|
|
|
|
|
|
|
# the next offset to get the object length) |
785
|
0
|
|
|
|
|
0
|
$offset = $$table[$i + 1]; |
786
|
0
|
|
0
|
|
|
0
|
my $len = ($$table[$i + 3] || length($$obj{_stream})) - $offset; |
787
|
0
|
|
|
|
|
0
|
$data = substr($$obj{_stream}, $offset, $len); |
788
|
|
|
|
|
|
|
# avoid re-decrypting data in already decrypted streams |
789
|
0
|
0
|
|
|
|
0
|
undef $lastFetched if $cryptStream; |
790
|
0
|
|
|
|
|
0
|
return ExtractObject($et, \$data); |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
# load the start of the object |
793
|
192
|
|
|
|
|
329
|
$data = CheckObject($et, $tag, $ref, $offset); |
794
|
192
|
50
|
|
|
|
331
|
return undef unless defined $data; |
795
|
|
|
|
|
|
|
|
796
|
192
|
|
|
|
|
515
|
return ExtractObject($et, \$data, $$et{RAF}, $xref); |
797
|
|
|
|
|
|
|
} |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
800
|
|
|
|
|
|
|
# Convert PDF value to something readable |
801
|
|
|
|
|
|
|
# Inputs: 0) PDF object data |
802
|
|
|
|
|
|
|
# Returns: converted object |
803
|
|
|
|
|
|
|
sub ReadPDFValue($) |
804
|
|
|
|
|
|
|
{ |
805
|
148
|
|
|
148
|
0
|
229
|
my $str = shift; |
806
|
|
|
|
|
|
|
# decode all strings in an array |
807
|
148
|
100
|
|
|
|
237
|
if (ref $str eq 'ARRAY') { |
808
|
|
|
|
|
|
|
# create new list to not alter the original data when rewriting |
809
|
12
|
|
|
|
|
19
|
my ($val, @vals); |
810
|
12
|
|
|
|
|
21
|
foreach $val (@$str) { |
811
|
20
|
|
|
|
|
40
|
push @vals, ReadPDFValue($val); |
812
|
|
|
|
|
|
|
} |
813
|
12
|
|
|
|
|
28
|
return \@vals; |
814
|
|
|
|
|
|
|
} |
815
|
136
|
50
|
|
|
|
232
|
length $str or return $str; |
816
|
136
|
|
|
|
|
210
|
my $delim = substr($str, 0, 1); |
817
|
136
|
100
|
|
|
|
300
|
if ($delim eq '(') { # literal string |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
818
|
58
|
50
|
|
|
|
268
|
$str = $1 if $str =~ /^.*?\((.*)\)/s; # remove brackets |
819
|
|
|
|
|
|
|
# decode escape sequences in literal strings |
820
|
58
|
|
|
|
|
145
|
while ($str =~ /\\(.)/sg) { |
821
|
0
|
|
|
|
|
0
|
my $n = pos($str) - 2; |
822
|
0
|
|
|
|
|
0
|
my $c = $1; |
823
|
0
|
|
|
|
|
0
|
my $r; |
824
|
0
|
0
|
|
|
|
0
|
if ($c =~ /[0-7]/) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
825
|
|
|
|
|
|
|
# get up to 2 more octal digits |
826
|
0
|
0
|
|
|
|
0
|
$c .= $1 if $str =~ /\G([0-7]{1,2})/g; |
827
|
|
|
|
|
|
|
# convert octal escape code |
828
|
0
|
|
|
|
|
0
|
$r = chr(oct($c) & 0xff); |
829
|
|
|
|
|
|
|
} elsif ($c eq "\x0d") { |
830
|
|
|
|
|
|
|
# the string is continued if the line ends with '\' |
831
|
|
|
|
|
|
|
# (also remove "\x0d\x0a") |
832
|
0
|
0
|
|
|
|
0
|
$c .= $1 if $str =~ /\G(\x0a)/g; |
833
|
0
|
|
|
|
|
0
|
$r = ''; |
834
|
|
|
|
|
|
|
} elsif ($c eq "\x0a") { |
835
|
0
|
|
|
|
|
0
|
$r = ''; |
836
|
|
|
|
|
|
|
} else { |
837
|
|
|
|
|
|
|
# convert escaped characters |
838
|
0
|
|
|
|
|
0
|
($r = $c) =~ tr/nrtbf/\n\r\t\b\f/; |
839
|
|
|
|
|
|
|
} |
840
|
0
|
|
|
|
|
0
|
substr($str, $n, length($c)+1) = $r; |
841
|
|
|
|
|
|
|
# continue search after this character |
842
|
0
|
|
|
|
|
0
|
pos($str) = $n + length($r); |
843
|
|
|
|
|
|
|
} |
844
|
58
|
50
|
|
|
|
91
|
Crypt(\$str, $lastFetched) if $cryptString; |
845
|
|
|
|
|
|
|
} elsif ($delim eq '<') { # hex string |
846
|
|
|
|
|
|
|
# decode hex data |
847
|
41
|
|
|
|
|
83
|
$str =~ tr/0-9A-Fa-f//dc; |
848
|
41
|
50
|
|
|
|
84
|
$str .= '0' if length($str) & 0x01; # (by the spec) |
849
|
41
|
|
|
|
|
131
|
$str = pack('H*', $str); |
850
|
41
|
100
|
|
|
|
82
|
Crypt(\$str, $lastFetched) if $cryptString; |
851
|
|
|
|
|
|
|
} elsif ($delim eq '/') { # name |
852
|
0
|
|
|
|
|
0
|
$str = substr($str, 1); |
853
|
|
|
|
|
|
|
# convert escape codes (PDF 1.2 or later) |
854
|
0
|
0
|
|
|
|
0
|
$str =~ s/#([0-9a-f]{2})/chr(hex($1))/sgei if $pdfVer >= 1.2; |
|
0
|
|
|
|
|
0
|
|
855
|
|
|
|
|
|
|
} |
856
|
136
|
|
|
|
|
248
|
return $str; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
860
|
|
|
|
|
|
|
# Extract PDF object from combination of buffered data and file |
861
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) data reference, |
862
|
|
|
|
|
|
|
# 2) optional raf reference, 3) optional xref table |
863
|
|
|
|
|
|
|
# Returns: converted PDF object or undef on error |
864
|
|
|
|
|
|
|
# a) dictionary object --> hash reference |
865
|
|
|
|
|
|
|
# b) array object --> array reference |
866
|
|
|
|
|
|
|
# c) indirect reference --> scalar reference |
867
|
|
|
|
|
|
|
# d) string, name, integer, boolean, null --> scalar value |
868
|
|
|
|
|
|
|
# - updates $$dataPt on return to contain unused data |
869
|
|
|
|
|
|
|
# - creates two bogus entries ('_stream' and '_tags') in dictionaries to represent |
870
|
|
|
|
|
|
|
# the stream data and a list of the tags (not including '_stream' and '_tags') |
871
|
|
|
|
|
|
|
# in their original order |
872
|
|
|
|
|
|
|
sub ExtractObject($$;$$) |
873
|
|
|
|
|
|
|
{ |
874
|
754
|
|
|
754
|
0
|
1280
|
my ($et, $dataPt, $raf, $xref) = @_; |
875
|
754
|
|
|
|
|
911
|
my (@tags, $data, $objData); |
876
|
754
|
|
|
|
|
1022
|
my $dict = { }; |
877
|
754
|
|
|
|
|
836
|
my $delim; |
878
|
|
|
|
|
|
|
|
879
|
754
|
|
|
|
|
807
|
for (;;) { |
880
|
824
|
100
|
|
|
|
2330
|
if ($$dataPt =~ /^\s*(<{1,2}|\[|\()/s) { |
|
|
50
|
|
|
|
|
|
881
|
754
|
|
|
|
|
1252
|
$delim = $1; |
882
|
754
|
|
|
|
|
1271
|
$$dataPt =~ s/^\s+//; # remove leading white space |
883
|
754
|
|
|
|
|
1303
|
$objData = ReadToNested($dataPt, $raf); |
884
|
754
|
50
|
|
|
|
1213
|
return undef unless defined $objData; |
885
|
754
|
|
|
|
|
968
|
last; |
886
|
|
|
|
|
|
|
} elsif ($$dataPt =~ s{^\s*(\S[^[(/<>\s]*)\s*}{}s) { |
887
|
|
|
|
|
|
|
# |
888
|
|
|
|
|
|
|
# extract boolean, numerical, string, name, null object or indirect reference |
889
|
|
|
|
|
|
|
# |
890
|
0
|
|
|
|
|
0
|
$objData = $1; |
891
|
|
|
|
|
|
|
# look for an indirect reference |
892
|
0
|
0
|
0
|
|
|
0
|
if ($objData =~ /^\d+$/ and $$dataPt =~ s/^(\d+)\s+R//s) { |
893
|
0
|
|
|
|
|
0
|
$objData .= "$1 R"; |
894
|
0
|
|
|
|
|
0
|
$objData = \$objData; # return scalar reference |
895
|
|
|
|
|
|
|
} |
896
|
0
|
|
|
|
|
0
|
return $objData; # return simple scalar or scalar reference |
897
|
|
|
|
|
|
|
} |
898
|
70
|
50
|
33
|
|
|
254
|
$raf and $raf->ReadLine($data) or return undef; |
899
|
70
|
|
|
|
|
137
|
$$dataPt .= $data; |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
# |
902
|
|
|
|
|
|
|
# return literal string or hex string without parsing |
903
|
|
|
|
|
|
|
# |
904
|
754
|
100
|
100
|
|
|
2146
|
if ($delim eq '(' or $delim eq '<') { |
|
|
100
|
|
|
|
|
|
905
|
160
|
|
|
|
|
386
|
return $objData; |
906
|
|
|
|
|
|
|
# |
907
|
|
|
|
|
|
|
# extract array |
908
|
|
|
|
|
|
|
# |
909
|
|
|
|
|
|
|
} elsif ($delim eq '[') { |
910
|
167
|
50
|
|
|
|
561
|
$objData =~ /^.*?\[(.*)\]/s or return undef; |
911
|
167
|
|
|
|
|
290
|
my $data = $1; # brackets removed |
912
|
167
|
|
|
|
|
184
|
my @list; |
913
|
167
|
|
|
|
|
178
|
for (;;) { |
914
|
594
|
100
|
|
|
|
1447
|
last unless $data =~ m{\s*(\S[^[(/<>\s]*)}sg; |
915
|
427
|
|
|
|
|
711
|
my $val = $1; |
916
|
427
|
100
|
|
|
|
1062
|
if ($val =~ /^(<{1,2}|\[|\()/) { |
|
|
100
|
|
|
|
|
|
917
|
78
|
|
|
|
|
115
|
my $pos = pos($data) - length($val); |
918
|
|
|
|
|
|
|
# nested dict, array, literal string or hex string |
919
|
78
|
|
|
|
|
123
|
my $buff = substr($data, $pos); |
920
|
78
|
|
|
|
|
139
|
$val = ReadToNested(\$buff); |
921
|
78
|
50
|
|
|
|
142
|
last unless defined $val; |
922
|
78
|
|
|
|
|
135
|
pos($data) = $pos + length($val); |
923
|
78
|
|
|
|
|
150
|
$val = ExtractObject($et, \$val); |
924
|
|
|
|
|
|
|
} elsif ($val =~ /^\d/) { |
925
|
245
|
|
|
|
|
294
|
my $pos = pos($data); |
926
|
245
|
100
|
|
|
|
443
|
if ($data =~ /\G\s+(\d+)\s+R/g) { |
927
|
37
|
|
|
|
|
104
|
$val = \ "$val $1 R"; # make a reference |
928
|
|
|
|
|
|
|
} else { |
929
|
208
|
|
|
|
|
368
|
pos($data) = $pos; |
930
|
|
|
|
|
|
|
} |
931
|
|
|
|
|
|
|
} |
932
|
427
|
|
|
|
|
733
|
push @list, $val; |
933
|
|
|
|
|
|
|
} |
934
|
167
|
|
|
|
|
502
|
return \@list; |
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
# |
937
|
|
|
|
|
|
|
# extract dictionary |
938
|
|
|
|
|
|
|
# |
939
|
|
|
|
|
|
|
# Note: entries are not necessarily separated by whitespace (doh!) |
940
|
|
|
|
|
|
|
# eg) "/Tag/Name", "/Tag(string)", "/Tag[array]", etc are legal! |
941
|
|
|
|
|
|
|
# Also, they may be separated by a comment (eg. "/Tag%comment\nValue"), |
942
|
|
|
|
|
|
|
# but comments have already been removed |
943
|
427
|
|
|
|
|
1841
|
while ($objData =~ m{(\s*)/([^/[\]()<>{}\s]+)\s*(\S[^[(/<>\s]*)}sg) { |
944
|
1229
|
|
|
|
|
1984
|
my $tag = $2; |
945
|
1229
|
|
|
|
|
1719
|
my $val = $3; |
946
|
1229
|
100
|
|
|
|
3407
|
if ($val =~ /^(<{1,2}|\[|\()/) { |
|
|
100
|
|
|
|
|
|
947
|
|
|
|
|
|
|
# nested dict, array, literal string or hex string |
948
|
396
|
|
|
|
|
885
|
$objData = substr($objData, pos($objData)-length($val)); |
949
|
396
|
|
|
|
|
681
|
$val = ReadToNested(\$objData, $raf); |
950
|
396
|
50
|
|
|
|
675
|
last unless defined $val; |
951
|
396
|
|
|
|
|
689
|
$val = ExtractObject($et, \$val); |
952
|
396
|
|
|
|
|
651
|
pos($objData) = 0; |
953
|
|
|
|
|
|
|
} elsif ($val =~ /^\d/) { |
954
|
618
|
|
|
|
|
768
|
my $pos = pos($objData); |
955
|
618
|
100
|
|
|
|
1423
|
if ($objData =~ /\G\s+(\d+)\s+R/sg) { |
956
|
416
|
|
|
|
|
1058
|
$val = \ "$val $1 R"; # make a reference |
957
|
|
|
|
|
|
|
} else { |
958
|
202
|
|
|
|
|
344
|
pos($objData) = $pos; |
959
|
|
|
|
|
|
|
} |
960
|
|
|
|
|
|
|
} |
961
|
1229
|
50
|
|
|
|
2023
|
if ($$dict{$tag}) { |
962
|
|
|
|
|
|
|
# duplicate dictionary entries are not allowed |
963
|
0
|
|
|
|
|
0
|
$et->Warn("Duplicate '${tag}' entry in dictionary (ignored)"); |
964
|
|
|
|
|
|
|
} else { |
965
|
|
|
|
|
|
|
# save the entry |
966
|
1229
|
|
|
|
|
1802
|
push @tags, $tag; |
967
|
1229
|
|
|
|
|
4446
|
$$dict{$tag} = $val; |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
} |
970
|
427
|
50
|
|
|
|
742
|
return undef unless @tags; |
971
|
427
|
|
|
|
|
749
|
$$dict{_tags} = \@tags; |
972
|
427
|
100
|
|
|
|
850
|
return $dict unless $raf; # direct objects can not have streams |
973
|
|
|
|
|
|
|
# |
974
|
|
|
|
|
|
|
# extract the stream object |
975
|
|
|
|
|
|
|
# |
976
|
|
|
|
|
|
|
# dictionary must specify stream Length |
977
|
262
|
100
|
|
|
|
826
|
my $length = $$dict{Length} or return $dict; |
978
|
43
|
100
|
|
|
|
81
|
if (ref $length) { |
979
|
25
|
|
|
|
|
40
|
$length = $$length; |
980
|
25
|
|
|
|
|
58
|
my $oldpos = $raf->Tell(); |
981
|
|
|
|
|
|
|
# get the location of the object specifying the length |
982
|
|
|
|
|
|
|
# (compressed objects are not allowed) |
983
|
25
|
50
|
|
|
|
65
|
my $offset = LocateObject($xref, $length) or return $dict; |
984
|
25
|
50
|
|
|
|
47
|
$offset or $et->Warn('Bad stream Length object'), return $dict; |
985
|
25
|
|
|
|
|
45
|
$data = CheckObject($et, 'stream Length', $length, $offset); |
986
|
25
|
50
|
|
|
|
74
|
defined $data or return $dict; |
987
|
25
|
50
|
|
|
|
88
|
$data =~ /^\s*(\d+)/ or $et->Warn('Stream Length not found'), return $dict; |
988
|
25
|
|
|
|
|
53
|
$length = $1; |
989
|
25
|
|
|
|
|
63
|
$raf->Seek($oldpos, 0); # restore position to start of stream |
990
|
|
|
|
|
|
|
} |
991
|
|
|
|
|
|
|
# extract the trailing stream data |
992
|
43
|
|
|
|
|
52
|
for (;;) { |
993
|
|
|
|
|
|
|
# find the stream token |
994
|
86
|
100
|
|
|
|
276
|
if ($$dataPt =~ /(\S+)/) { |
995
|
43
|
50
|
|
|
|
113
|
last unless $1 eq 'stream'; |
996
|
|
|
|
|
|
|
# read an extra line because it may contain our \x0a |
997
|
43
|
50
|
|
|
|
91
|
$$dataPt .= $data if $raf->ReadLine($data); |
998
|
|
|
|
|
|
|
# remove our stream header |
999
|
43
|
|
|
|
|
268
|
$$dataPt =~ s/^\s*stream(\x0a|\x0d\x0a)//s; |
1000
|
43
|
|
|
|
|
121
|
my $more = $length - length($$dataPt); |
1001
|
43
|
100
|
|
|
|
105
|
if ($more > 0) { |
|
|
50
|
|
|
|
|
|
1002
|
28
|
50
|
|
|
|
74
|
unless ($raf->Read($data, $more) == $more) { |
1003
|
0
|
|
|
|
|
0
|
$et->Warn('Error reading stream data'); |
1004
|
0
|
|
|
|
|
0
|
$$dataPt = ''; |
1005
|
0
|
|
|
|
|
0
|
return $dict; |
1006
|
|
|
|
|
|
|
} |
1007
|
28
|
|
|
|
|
132
|
$$dict{_stream} = $$dataPt . $data; |
1008
|
28
|
|
|
|
|
56
|
$$dataPt = ''; |
1009
|
|
|
|
|
|
|
} elsif ($more < 0) { |
1010
|
15
|
|
|
|
|
90
|
$$dict{_stream} = substr($$dataPt, 0, $length); |
1011
|
15
|
|
|
|
|
36
|
$$dataPt = substr($$dataPt, $length); |
1012
|
|
|
|
|
|
|
} else { |
1013
|
0
|
|
|
|
|
0
|
$$dict{_stream} = $$dataPt; |
1014
|
0
|
|
|
|
|
0
|
$$dataPt = ''; |
1015
|
|
|
|
|
|
|
} |
1016
|
43
|
|
|
|
|
75
|
last; |
1017
|
|
|
|
|
|
|
} |
1018
|
43
|
50
|
|
|
|
106
|
$raf->ReadLine($data) or last; |
1019
|
43
|
|
|
|
|
120
|
$$dataPt .= $data; |
1020
|
|
|
|
|
|
|
} |
1021
|
43
|
|
|
|
|
155
|
return $dict; |
1022
|
|
|
|
|
|
|
} |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1025
|
|
|
|
|
|
|
# Read to nested delimiter |
1026
|
|
|
|
|
|
|
# Inputs: 0) data reference, 1) optional raf reference |
1027
|
|
|
|
|
|
|
# Returns: data up to and including matching delimiter (or undef on error) |
1028
|
|
|
|
|
|
|
# - updates data reference with trailing data |
1029
|
|
|
|
|
|
|
# - unescapes characters in literal strings |
1030
|
|
|
|
|
|
|
my %closingDelim = ( # lookup for matching delimiter |
1031
|
|
|
|
|
|
|
'(' => ')', |
1032
|
|
|
|
|
|
|
'[' => ']', |
1033
|
|
|
|
|
|
|
'<' => '>', |
1034
|
|
|
|
|
|
|
'<<' => '>>', |
1035
|
|
|
|
|
|
|
); |
1036
|
|
|
|
|
|
|
sub ReadToNested($;$) |
1037
|
|
|
|
|
|
|
{ |
1038
|
1228
|
|
|
1228
|
0
|
1728
|
my ($dataPt, $raf) = @_; |
1039
|
1228
|
|
|
|
|
1734
|
my @delim = (''); # closing delimiter list, most deeply nested first |
1040
|
1228
|
|
|
|
|
2172
|
pos($$dataPt) = 0; # begin at start of data |
1041
|
1228
|
|
|
|
|
1707
|
for (;;) { |
1042
|
5744
|
100
|
|
|
|
30017
|
unless ($$dataPt =~ /(\\*)(\(|\)|<{1,2}|>{1,2}|\[|\]|%)/g) { |
1043
|
|
|
|
|
|
|
# must read some more data |
1044
|
1148
|
|
|
|
|
1239
|
my $buff; |
1045
|
1148
|
50
|
33
|
|
|
2616
|
last unless $raf and $raf->ReadLine($buff); |
1046
|
1148
|
|
|
|
|
2135
|
$$dataPt .= $buff; |
1047
|
1148
|
|
|
|
|
1932
|
pos($$dataPt) = length($$dataPt) - length($buff); |
1048
|
1148
|
|
|
|
|
1675
|
next; |
1049
|
|
|
|
|
|
|
} |
1050
|
|
|
|
|
|
|
# are we in a literal string? |
1051
|
4596
|
100
|
|
|
|
9080
|
if ($delim[0] eq ')') { |
|
|
50
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
# ignore escaped delimiters (preceded by odd number of \'s) |
1053
|
434
|
50
|
|
|
|
763
|
next if length($1) & 0x01; |
1054
|
|
|
|
|
|
|
# ignore all delimiters but unescaped braces |
1055
|
434
|
50
|
33
|
|
|
1176
|
next unless $2 eq '(' or $2 eq ')'; |
1056
|
|
|
|
|
|
|
} elsif ($2 eq '%') { |
1057
|
|
|
|
|
|
|
# ignore the comment |
1058
|
0
|
|
|
|
|
0
|
my $pos = pos($$dataPt) - 1; |
1059
|
|
|
|
|
|
|
# remove everything from '%' up to but not including newline |
1060
|
0
|
|
|
|
|
0
|
$$dataPt =~ /.*/g; |
1061
|
0
|
|
|
|
|
0
|
my $end = pos($$dataPt); |
1062
|
0
|
|
|
|
|
0
|
$$dataPt = substr($$dataPt, 0, $pos) . substr($$dataPt, $end); |
1063
|
0
|
|
|
|
|
0
|
pos($$dataPt) = $pos; |
1064
|
0
|
|
|
|
|
0
|
next; |
1065
|
|
|
|
|
|
|
} |
1066
|
4596
|
100
|
|
|
|
7367
|
if ($closingDelim{$2}) { |
1067
|
|
|
|
|
|
|
# push the corresponding closing delimiter |
1068
|
2298
|
|
|
|
|
3894
|
unshift @delim, $closingDelim{$2}; |
1069
|
2298
|
|
|
|
|
2732
|
next; |
1070
|
|
|
|
|
|
|
} |
1071
|
2298
|
50
|
|
|
|
3359
|
unless ($2 eq $delim[0]) { |
1072
|
|
|
|
|
|
|
# handle the case where we find a ">>>" and interpret it |
1073
|
|
|
|
|
|
|
# as ">> >" instead of "> >>" |
1074
|
0
|
0
|
0
|
|
|
0
|
next unless $2 eq '>>' and $delim[0] eq '>'; |
1075
|
0
|
|
|
|
|
0
|
pos($$dataPt) = pos($$dataPt) - 1; |
1076
|
|
|
|
|
|
|
} |
1077
|
2298
|
|
|
|
|
2446
|
shift @delim; # remove from nesting list |
1078
|
2298
|
100
|
|
|
|
3566
|
next if $delim[0]; # keep going if we have more nested delimiters |
1079
|
1228
|
|
|
|
|
1449
|
my $pos = pos($$dataPt); |
1080
|
1228
|
|
|
|
|
1924
|
my $buff = substr($$dataPt, 0, $pos); |
1081
|
1228
|
|
|
|
|
1930
|
$$dataPt = substr($$dataPt, $pos); |
1082
|
1228
|
|
|
|
|
2278
|
return $buff; # success! |
1083
|
|
|
|
|
|
|
} |
1084
|
0
|
|
|
|
|
0
|
return undef; # didn't find matching delimiter |
1085
|
|
|
|
|
|
|
} |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1088
|
|
|
|
|
|
|
# Decode LZW-encoded data (ref 1) |
1089
|
|
|
|
|
|
|
# Inputs: 0) data reference |
1090
|
|
|
|
|
|
|
# Returns: true on success and data is decoded, or false and data is untouched |
1091
|
|
|
|
|
|
|
sub DecodeLZW($) |
1092
|
|
|
|
|
|
|
{ |
1093
|
0
|
|
|
0
|
0
|
0
|
my $dataPt = shift; |
1094
|
0
|
0
|
|
|
|
0
|
return 0 if length $$dataPt < 4; |
1095
|
0
|
|
|
|
|
0
|
my @lzw = (map(chr, 0..255), undef, undef); # LZW code table |
1096
|
0
|
|
|
|
|
0
|
my $mask = 0x01ff; # mask for least-significant 9 bits |
1097
|
0
|
|
|
|
|
0
|
my @dat = unpack 'n*', $$dataPt . "\0"; |
1098
|
0
|
|
|
|
|
0
|
my $word = ($dat[0] << 16) | $dat[1]; |
1099
|
0
|
|
|
|
|
0
|
my ($bit, $pos, $bits, $out) = (0, 2, 9, ''); |
1100
|
0
|
|
|
|
|
0
|
my $lastVal; |
1101
|
0
|
|
|
|
|
0
|
for (;;) { |
1102
|
|
|
|
|
|
|
# bits are packed MSB first in PDF LZW (the PDF spec doesn't mention this) |
1103
|
0
|
|
|
|
|
0
|
my $shift = 32 - ($bit + $bits); |
1104
|
0
|
0
|
|
|
|
0
|
if ($shift < 0) { |
1105
|
0
|
0
|
|
|
|
0
|
return 0 if $pos >= @dat; # missing EOD marker |
1106
|
0
|
|
|
|
|
0
|
$word = (($word & 0xffff) << 16) | $dat[$pos++]; # read next word |
1107
|
0
|
|
|
|
|
0
|
$bit -= 16; |
1108
|
0
|
|
|
|
|
0
|
$shift += 16; |
1109
|
|
|
|
|
|
|
}; |
1110
|
0
|
|
|
|
|
0
|
my $code = ($word >> $shift) & $mask; |
1111
|
0
|
|
|
|
|
0
|
$bit += $bits; |
1112
|
0
|
|
|
|
|
0
|
my $val = $lzw[$code]; |
1113
|
0
|
0
|
|
|
|
0
|
if (defined $val) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# store new code as previous sequence plus 1st char of new sequence |
1115
|
0
|
0
|
|
|
|
0
|
push @lzw, $lastVal . substr($val, 0, 1) if defined $lastVal; |
1116
|
|
|
|
|
|
|
} elsif ($code == @lzw) { # new code |
1117
|
0
|
0
|
|
|
|
0
|
return 0 unless defined $lastVal; |
1118
|
|
|
|
|
|
|
# we are using the code that we are about to generate, so the last |
1119
|
|
|
|
|
|
|
# character in the new sequence must be the same as the first |
1120
|
|
|
|
|
|
|
# character in the previous sequence (makes sense if you think about it) |
1121
|
0
|
|
|
|
|
0
|
$val = $lastVal . substr($lastVal, 0, 1); |
1122
|
0
|
|
|
|
|
0
|
push @lzw, $val; |
1123
|
|
|
|
|
|
|
} elsif ($code == 256) { # clear table |
1124
|
0
|
|
|
|
|
0
|
splice @lzw, 258; |
1125
|
0
|
|
|
|
|
0
|
$bits = 9; |
1126
|
0
|
|
|
|
|
0
|
$mask = 0x1ff; |
1127
|
0
|
|
|
|
|
0
|
undef $lastVal; |
1128
|
0
|
|
|
|
|
0
|
next; |
1129
|
|
|
|
|
|
|
} elsif ($code == 257) { # EOD marker |
1130
|
0
|
|
|
|
|
0
|
last; # all done! |
1131
|
|
|
|
|
|
|
} else { |
1132
|
0
|
|
|
|
|
0
|
return 0; |
1133
|
|
|
|
|
|
|
} |
1134
|
0
|
|
|
|
|
0
|
$out .= $val; # add this byte sequence to the output |
1135
|
|
|
|
|
|
|
# we added a new entry to the LZW table, so we must increase |
1136
|
|
|
|
|
|
|
# the bit width if necessary, up to a maximum of 12 |
1137
|
0
|
0
|
0
|
|
|
0
|
@lzw >= $mask and $bits < 12 and ++$bits, $mask |= $mask << 1; |
1138
|
0
|
|
|
|
|
0
|
$lastVal = $val; |
1139
|
|
|
|
|
|
|
} |
1140
|
0
|
|
|
|
|
0
|
$$dataPt = $out; # return decompressed data |
1141
|
0
|
|
|
|
|
0
|
return 1; |
1142
|
|
|
|
|
|
|
} |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1145
|
|
|
|
|
|
|
# Decode filtered stream |
1146
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dictionary reference |
1147
|
|
|
|
|
|
|
# Returns: true if stream has been decoded OK |
1148
|
|
|
|
|
|
|
sub DecodeStream($$) |
1149
|
|
|
|
|
|
|
{ |
1150
|
43
|
|
|
43
|
0
|
53
|
local $_; |
1151
|
43
|
|
|
|
|
68
|
my ($et, $dict) = @_; |
1152
|
|
|
|
|
|
|
|
1153
|
43
|
50
|
|
|
|
77
|
return 0 unless $$dict{_stream}; # no stream to decode |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
# get list of filters |
1156
|
43
|
|
|
|
|
70
|
my (@filters, @decodeParms, $filter); |
1157
|
43
|
50
|
|
|
|
112
|
if (ref $$dict{Filter} eq 'ARRAY') { |
|
|
50
|
|
|
|
|
|
1158
|
0
|
|
|
|
|
0
|
@filters = @{$$dict{Filter}}; |
|
0
|
|
|
|
|
0
|
|
1159
|
|
|
|
|
|
|
} elsif (defined $$dict{Filter}) { |
1160
|
0
|
|
|
|
|
0
|
@filters = ($$dict{Filter}); |
1161
|
|
|
|
|
|
|
} |
1162
|
|
|
|
|
|
|
# be sure we can process all the filters before we take the time to do the decryption |
1163
|
43
|
|
|
|
|
72
|
foreach $filter (@filters) { |
1164
|
0
|
0
|
|
|
|
0
|
next if $supportedFilter{$filter}; |
1165
|
0
|
|
|
|
|
0
|
$et->WarnOnce("Unsupported Filter $filter"); |
1166
|
0
|
|
|
|
|
0
|
return 0; |
1167
|
|
|
|
|
|
|
} |
1168
|
|
|
|
|
|
|
# apply decryption first if required (and if the default encryption |
1169
|
|
|
|
|
|
|
# has not been overridden by a Crypt filter. Note: the Crypt filter |
1170
|
|
|
|
|
|
|
# must be first in the Filter array: ref 3, page 38) |
1171
|
43
|
50
|
33
|
|
|
134
|
unless (defined $$dict{_decrypted} or ($filters[0] and $filters[0] eq '/Crypt')) { |
|
|
|
33
|
|
|
|
|
1172
|
43
|
|
|
|
|
79
|
CryptStream($dict, $lastFetched); |
1173
|
|
|
|
|
|
|
} |
1174
|
43
|
50
|
|
|
|
123
|
return 1 unless $$dict{Filter}; # Filter entry is mandatory |
1175
|
0
|
0
|
|
|
|
0
|
return 0 if defined $$dict{_filtered}; # avoid double-filtering |
1176
|
0
|
|
|
|
|
0
|
$$dict{_filtered} = 1; # set flag to prevent double-filtering |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
# get array of DecodeParms dictionaries |
1179
|
0
|
0
|
|
|
|
0
|
if (ref $$dict{DecodeParms} eq 'ARRAY') { |
1180
|
0
|
|
|
|
|
0
|
@decodeParms = @{$$dict{DecodeParms}}; |
|
0
|
|
|
|
|
0
|
|
1181
|
|
|
|
|
|
|
} else { |
1182
|
0
|
|
|
|
|
0
|
@decodeParms = ($$dict{DecodeParms}); |
1183
|
|
|
|
|
|
|
} |
1184
|
|
|
|
|
|
|
|
1185
|
0
|
|
|
|
|
0
|
foreach $filter (@filters) { |
1186
|
0
|
|
|
|
|
0
|
my $decodeParms = shift @decodeParms; |
1187
|
|
|
|
|
|
|
|
1188
|
0
|
0
|
|
|
|
0
|
if ($filter eq '/FlateDecode') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
# make sure we support the predictor (if used) before decoding |
1190
|
0
|
|
|
|
|
0
|
my $pre; |
1191
|
0
|
0
|
|
|
|
0
|
if (ref $decodeParms eq 'HASH') { |
1192
|
0
|
|
|
|
|
0
|
$pre = $$decodeParms{Predictor}; |
1193
|
0
|
0
|
0
|
|
|
0
|
if ($pre and $pre ne '1' and $pre ne '12') { |
|
|
|
0
|
|
|
|
|
1194
|
0
|
|
|
|
|
0
|
$et->WarnOnce("FlateDecode Predictor $pre currently not supported"); |
1195
|
0
|
|
|
|
|
0
|
return 0; |
1196
|
|
|
|
|
|
|
} |
1197
|
|
|
|
|
|
|
} |
1198
|
0
|
0
|
|
|
|
0
|
if (eval { require Compress::Zlib }) { |
|
0
|
|
|
|
|
0
|
|
1199
|
0
|
|
|
|
|
0
|
my $inflate = Compress::Zlib::inflateInit(); |
1200
|
0
|
|
|
|
|
0
|
my ($buff, $stat); |
1201
|
0
|
0
|
|
|
|
0
|
$inflate and ($buff, $stat) = $inflate->inflate($$dict{_stream}); |
1202
|
0
|
0
|
0
|
|
|
0
|
if ($inflate and $stat == Compress::Zlib::Z_STREAM_END()) { |
1203
|
0
|
|
|
|
|
0
|
$$dict{_stream} = $buff; |
1204
|
|
|
|
|
|
|
} else { |
1205
|
0
|
|
|
|
|
0
|
$et->Warn('Error inflating stream'); |
1206
|
0
|
|
|
|
|
0
|
return 0; |
1207
|
|
|
|
|
|
|
} |
1208
|
|
|
|
|
|
|
} else { |
1209
|
0
|
|
|
|
|
0
|
$et->WarnOnce('Install Compress::Zlib to process filtered streams'); |
1210
|
0
|
|
|
|
|
0
|
return 0; |
1211
|
|
|
|
|
|
|
} |
1212
|
0
|
0
|
0
|
|
|
0
|
next unless $pre and $pre eq '12'; # 12 = 'up' prediction |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# apply anti-predictor |
1215
|
0
|
|
|
|
|
0
|
my $cols = $$decodeParms{Columns}; |
1216
|
0
|
0
|
|
|
|
0
|
unless ($cols) { |
1217
|
|
|
|
|
|
|
# currently only support 'up' prediction |
1218
|
0
|
|
|
|
|
0
|
$et->WarnOnce('No Columns for decoding stream'); |
1219
|
0
|
|
|
|
|
0
|
return 0; |
1220
|
|
|
|
|
|
|
} |
1221
|
0
|
|
|
|
|
0
|
my @bytes = unpack('C*', $$dict{_stream}); |
1222
|
0
|
|
|
|
|
0
|
my @pre = (0) x $cols; # initialize predictor array |
1223
|
0
|
|
|
|
|
0
|
my $buff = ''; |
1224
|
0
|
|
|
|
|
0
|
while (@bytes > $cols) { |
1225
|
0
|
0
|
|
|
|
0
|
unless (($_ = shift @bytes) == 2) { |
1226
|
0
|
|
|
|
|
0
|
$et->WarnOnce("Unsupported PNG filter $_"); # (yes, PNG) |
1227
|
0
|
|
|
|
|
0
|
return 0; |
1228
|
|
|
|
|
|
|
} |
1229
|
0
|
|
|
|
|
0
|
foreach (@pre) { |
1230
|
0
|
|
|
|
|
0
|
$_ = ($_ + shift(@bytes)) & 0xff; |
1231
|
|
|
|
|
|
|
} |
1232
|
0
|
|
|
|
|
0
|
$buff .= pack('C*', @pre); |
1233
|
|
|
|
|
|
|
} |
1234
|
0
|
|
|
|
|
0
|
$$dict{_stream} = $buff; |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
} elsif ($filter eq '/Crypt') { |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
# (we shouldn't have to check the _decrypted flag since we |
1239
|
|
|
|
|
|
|
# already checked the _filtered flag, but what the heck...) |
1240
|
0
|
0
|
|
|
|
0
|
next if defined $$dict{_decrypted}; |
1241
|
|
|
|
|
|
|
# assume Identity filter (the default) if DecodeParms are missing |
1242
|
0
|
0
|
|
|
|
0
|
next unless ref $decodeParms eq 'HASH'; |
1243
|
0
|
|
|
|
|
0
|
my $name = $$decodeParms{Name}; |
1244
|
0
|
0
|
0
|
|
|
0
|
next unless defined $name or $name eq 'Identity'; |
1245
|
0
|
0
|
|
|
|
0
|
if ($name ne 'StdCF') { |
1246
|
0
|
|
|
|
|
0
|
$et->WarnOnce("Unsupported Crypt Filter $name"); |
1247
|
0
|
|
|
|
|
0
|
return 0; |
1248
|
|
|
|
|
|
|
} |
1249
|
0
|
0
|
|
|
|
0
|
unless ($cryptInfo) { |
1250
|
0
|
|
|
|
|
0
|
$et->WarnOnce('Missing Encrypt StdCF entry'); |
1251
|
0
|
|
|
|
|
0
|
return 0; |
1252
|
|
|
|
|
|
|
} |
1253
|
|
|
|
|
|
|
# decrypt the stream manually because we want to: |
1254
|
|
|
|
|
|
|
# 1) ignore $cryptStream (StmF) setting |
1255
|
|
|
|
|
|
|
# 2) ignore EncryptMetadata setting (I can't find mention of how to |
1256
|
|
|
|
|
|
|
# reconcile this in the spec., but this would make sense) |
1257
|
|
|
|
|
|
|
# 3) avoid adding the crypt key extension (ref 3, page 58, Algorithm 1b) |
1258
|
|
|
|
|
|
|
# 4) set _decrypted flag so we will recrypt according to StmF when |
1259
|
|
|
|
|
|
|
# writing (since we don't yet write Filter'd streams) |
1260
|
0
|
|
|
|
|
0
|
Crypt(\$$dict{_stream}, 'none'); |
1261
|
0
|
0
|
|
|
|
0
|
$$dict{_decrypted} = ($cryptStream ? 1 : 0); |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
} elsif ($filter eq '/LZWDecode') { |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
# make sure we don't have any unsupported decoding parameters |
1266
|
0
|
0
|
|
|
|
0
|
if (ref $decodeParms eq 'HASH') { |
1267
|
0
|
0
|
|
|
|
0
|
if ($$decodeParms{Predictor}) { |
|
|
0
|
|
|
|
|
|
1268
|
0
|
|
|
|
|
0
|
$et->WarnOnce("LZWDecode Predictor $$decodeParms{Predictor} currently not supported"); |
1269
|
0
|
|
|
|
|
0
|
return 0; |
1270
|
|
|
|
|
|
|
} elsif ($$decodeParms{EarlyChange}) { |
1271
|
0
|
|
|
|
|
0
|
$et->WarnOnce("LZWDecode EarlyChange currently not supported"); |
1272
|
0
|
|
|
|
|
0
|
return 0; |
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
} |
1275
|
0
|
0
|
|
|
|
0
|
unless (DecodeLZW(\$$dict{_stream})) { |
1276
|
0
|
|
|
|
|
0
|
$et->WarnOnce('LZW decompress error'); |
1277
|
0
|
|
|
|
|
0
|
return 0; |
1278
|
|
|
|
|
|
|
} |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
} elsif ($filter eq '/ASCIIHexDecode') { |
1281
|
|
|
|
|
|
|
|
1282
|
0
|
|
|
|
|
0
|
$$dict{_stream} =~ s/>.*//; # truncate at '>' (end of data mark) |
1283
|
0
|
|
|
|
|
0
|
$$dict{_stream} =~ tr/0-9a-zA-Z//d; # remove illegal characters |
1284
|
0
|
|
|
|
|
0
|
$$dict{_stream} = pack 'H*', $$dict{_stream}; |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
} elsif ($filter eq '/ASCII85Decode') { |
1287
|
|
|
|
|
|
|
|
1288
|
0
|
|
|
|
|
0
|
my ($err, @out, $i); |
1289
|
0
|
|
|
|
|
0
|
my ($n, $val) = (0, 0); |
1290
|
0
|
|
|
|
|
0
|
foreach (split //, $$dict{_stream}) { |
1291
|
0
|
0
|
0
|
|
|
0
|
if ($_ ge '!' and $_ le 'u') {; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1292
|
0
|
|
|
|
|
0
|
$val = 85 * $val + ord($_) - 33; |
1293
|
0
|
0
|
|
|
|
0
|
next unless ++$n == 5; |
1294
|
|
|
|
|
|
|
} elsif ($_ eq '~') { |
1295
|
0
|
0
|
|
|
|
0
|
$n == 1 and $err = 1; # error to have a single char in the last group of 5 |
1296
|
0
|
|
|
|
|
0
|
for ($i=$n; $i<5; ++$i) { $val *= 85; } |
|
0
|
|
|
|
|
0
|
|
1297
|
|
|
|
|
|
|
} elsif ($_ eq 'z') { |
1298
|
0
|
0
|
|
|
|
0
|
$n and $err = 2, last; # error if 'z' isn't the first char |
1299
|
0
|
|
|
|
|
0
|
$n = 5; |
1300
|
|
|
|
|
|
|
} else { |
1301
|
0
|
0
|
|
|
|
0
|
next if /^\s$/; # ignore white space |
1302
|
0
|
|
|
|
|
0
|
$err = 3, last; # any other character is an error |
1303
|
|
|
|
|
|
|
} |
1304
|
0
|
|
|
|
|
0
|
$val = unpack('V', pack('N', $val)); # reverse byte order |
1305
|
0
|
|
|
|
|
0
|
while (--$n > 0) { |
1306
|
0
|
|
|
|
|
0
|
push @out, $val & 0xff; |
1307
|
0
|
|
|
|
|
0
|
$val >>= 8; |
1308
|
|
|
|
|
|
|
} |
1309
|
0
|
0
|
|
|
|
0
|
last if $_ eq '~'; |
1310
|
|
|
|
|
|
|
# (both $n and $val are zero again now) |
1311
|
|
|
|
|
|
|
} |
1312
|
0
|
0
|
|
|
|
0
|
$err and $et->WarnOnce("ASCII85Decode error $err"); |
1313
|
0
|
|
|
|
|
0
|
$$dict{_stream} = pack('C*', @out); |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
} |
1316
|
0
|
|
|
|
|
0
|
return 1; |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1320
|
|
|
|
|
|
|
# Initialize state for RC4 en/decryption (ref 2) |
1321
|
|
|
|
|
|
|
# Inputs: 0) RC4 key string |
1322
|
|
|
|
|
|
|
# Returns: RC4 key hash reference |
1323
|
|
|
|
|
|
|
sub RC4Init($) |
1324
|
|
|
|
|
|
|
{ |
1325
|
22
|
|
|
22
|
0
|
49
|
my @key = unpack('C*', shift); |
1326
|
22
|
|
|
|
|
172
|
my @state = (0 .. 255); |
1327
|
22
|
|
|
|
|
31
|
my ($i, $j) = (0, 0); |
1328
|
22
|
|
|
|
|
38
|
while ($i < 256) { |
1329
|
5632
|
|
|
|
|
5874
|
my $st = $state[$i]; |
1330
|
5632
|
|
|
|
|
6635
|
$j = ($j + $st + $key[$i % scalar(@key)]) & 0xff; |
1331
|
5632
|
|
|
|
|
6082
|
$state[$i++] = $state[$j]; |
1332
|
5632
|
|
|
|
|
7473
|
$state[$j] = $st; |
1333
|
|
|
|
|
|
|
} |
1334
|
22
|
|
|
|
|
80
|
return { State => \@state, XY => [ 0, 0 ] }; |
1335
|
|
|
|
|
|
|
} |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1338
|
|
|
|
|
|
|
# Apply RC4 en/decryption (ref 2) |
1339
|
|
|
|
|
|
|
# Inputs: 0) data reference, 1) RC4 key hash reference or RC4 key string |
1340
|
|
|
|
|
|
|
# - can call this method directly with a key string, or with with the key |
1341
|
|
|
|
|
|
|
# reference returned by RC4Init |
1342
|
|
|
|
|
|
|
# - RC4 is a symmetric algorithm, so encryption is the same as decryption |
1343
|
|
|
|
|
|
|
sub RC4Crypt($$) |
1344
|
|
|
|
|
|
|
{ |
1345
|
22
|
|
|
22
|
0
|
39
|
my ($dataPt, $key) = @_; |
1346
|
22
|
50
|
|
|
|
46
|
$key = RC4Init($key) unless ref $key eq 'HASH'; |
1347
|
22
|
|
|
|
|
34
|
my $state = $$key{State}; |
1348
|
22
|
|
|
|
|
26
|
my ($x, $y) = @{$$key{XY}}; |
|
22
|
|
|
|
|
33
|
|
1349
|
|
|
|
|
|
|
|
1350
|
22
|
|
|
|
|
59
|
my @data = unpack('C*', $$dataPt); |
1351
|
22
|
|
|
|
|
35
|
foreach (@data) { |
1352
|
356
|
|
|
|
|
373
|
$x = ($x + 1) & 0xff; |
1353
|
356
|
|
|
|
|
365
|
my $stx = $$state[$x]; |
1354
|
356
|
|
|
|
|
367
|
$y = ($stx + $y) & 0xff; |
1355
|
356
|
|
|
|
|
383
|
my $sty = $$state[$x] = $$state[$y]; |
1356
|
356
|
|
|
|
|
357
|
$$state[$y] = $stx; |
1357
|
356
|
|
|
|
|
440
|
$_ ^= $$state[($stx + $sty) & 0xff]; |
1358
|
|
|
|
|
|
|
} |
1359
|
22
|
|
|
|
|
44
|
$$key{XY} = [ $x, $y ]; |
1360
|
22
|
|
|
|
|
155
|
$$dataPt = pack('C*', @data); |
1361
|
|
|
|
|
|
|
} |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1364
|
|
|
|
|
|
|
# Update AES cipher with a bit of data |
1365
|
|
|
|
|
|
|
# Inputs: 0) data |
1366
|
|
|
|
|
|
|
# Returns: encrypted data |
1367
|
|
|
|
|
|
|
my $cipherMore; |
1368
|
|
|
|
|
|
|
sub CipherUpdate($) |
1369
|
|
|
|
|
|
|
{ |
1370
|
0
|
|
|
0
|
0
|
0
|
my $dat = shift; |
1371
|
0
|
|
|
|
|
0
|
my $pos = 0; |
1372
|
0
|
0
|
|
|
|
0
|
$dat = $cipherMore . $dat if length $dat; |
1373
|
0
|
|
|
|
|
0
|
while ($pos + 16 <= length($dat)) { |
1374
|
0
|
|
|
|
|
0
|
substr($dat,$pos,16) = Image::ExifTool::AES::Cipher(substr($dat,$pos,16)); |
1375
|
0
|
|
|
|
|
0
|
$pos += 16; |
1376
|
|
|
|
|
|
|
} |
1377
|
0
|
0
|
|
|
|
0
|
if ($pos < length $dat) { |
1378
|
0
|
|
|
|
|
0
|
$cipherMore = substr($dat,$pos); |
1379
|
0
|
|
|
|
|
0
|
$dat = substr($dat,0,$pos); |
1380
|
|
|
|
|
|
|
} else { |
1381
|
0
|
|
|
|
|
0
|
$cipherMore = ''; |
1382
|
|
|
|
|
|
|
} |
1383
|
0
|
|
|
|
|
0
|
return $dat; |
1384
|
|
|
|
|
|
|
} |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1387
|
|
|
|
|
|
|
# Get encrypted hash |
1388
|
|
|
|
|
|
|
# Inputs: 0) Password, 1) salt, 2) vector, 3) encryption revision |
1389
|
|
|
|
|
|
|
# Returns: hash |
1390
|
|
|
|
|
|
|
sub GetHash($$$$) |
1391
|
|
|
|
|
|
|
{ |
1392
|
6
|
|
|
6
|
0
|
20
|
my ($password, $salt, $vector, $rev) = @_; |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
# return Rev 5 hash |
1395
|
6
|
50
|
|
|
|
46
|
return Digest::SHA::sha256($password, $salt, $vector) if $rev == 5; |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
# compute Rev 6 hardened hash |
1398
|
|
|
|
|
|
|
# (ref http://code.google.com/p/origami-pdf/source/browse/lib/origami/encryption.rb) |
1399
|
0
|
|
|
|
|
0
|
my $blockSize = 32; |
1400
|
0
|
|
|
|
|
0
|
my $input = Digest::SHA::sha256($password, $salt, $vector) . ("\0" x 32); |
1401
|
0
|
|
|
|
|
0
|
my $key = substr($input, 0, 16); |
1402
|
0
|
|
|
|
|
0
|
my $iv = substr($input, 16, 16); |
1403
|
0
|
|
|
|
|
0
|
my $h; |
1404
|
0
|
|
|
|
|
0
|
my $x = ''; |
1405
|
0
|
|
|
|
|
0
|
my $i = 0; |
1406
|
0
|
|
0
|
|
|
0
|
while ($i < 64 or $i < ord(substr($x,-1,1))+32) { |
1407
|
|
|
|
|
|
|
|
1408
|
0
|
|
|
|
|
0
|
my $block = substr($input, 0, $blockSize); |
1409
|
0
|
|
|
|
|
0
|
$x = ''; |
1410
|
0
|
|
|
|
|
0
|
Image::ExifTool::AES::Crypt(\$x, $key, $iv, 1); |
1411
|
0
|
|
|
|
|
0
|
$cipherMore = ''; |
1412
|
|
|
|
|
|
|
|
1413
|
0
|
|
|
|
|
0
|
my ($j, $digest); |
1414
|
0
|
|
|
|
|
0
|
for ($j=0; $j<64; ++$j) { |
1415
|
0
|
|
|
|
|
0
|
$x = ''; |
1416
|
0
|
0
|
|
|
|
0
|
$x .= CipherUpdate($password) if length $password; |
1417
|
0
|
|
|
|
|
0
|
$x .= CipherUpdate($block); |
1418
|
0
|
0
|
|
|
|
0
|
$x .= CipherUpdate($vector) if length $vector; |
1419
|
0
|
0
|
|
|
|
0
|
if ($j == 0) { |
1420
|
0
|
|
|
|
|
0
|
my @a = unpack('C16', $x); |
1421
|
0
|
|
|
|
|
0
|
my $sum = 0; |
1422
|
0
|
|
|
|
|
0
|
$sum += $_ foreach @a; |
1423
|
|
|
|
|
|
|
# set SHA block size (32, 48 or 64 bytes = SHA-256, 384 or 512) |
1424
|
0
|
|
|
|
|
0
|
$blockSize = 32 + ($sum % 3) * 16; |
1425
|
0
|
|
|
|
|
0
|
$digest = Digest::SHA->new($blockSize * 8); |
1426
|
|
|
|
|
|
|
} |
1427
|
0
|
|
|
|
|
0
|
$digest->add($x); |
1428
|
|
|
|
|
|
|
} |
1429
|
|
|
|
|
|
|
|
1430
|
0
|
|
|
|
|
0
|
$h = $digest->digest(); |
1431
|
0
|
|
|
|
|
0
|
$key = substr($h, 0, 16); |
1432
|
0
|
|
|
|
|
0
|
substr($input,0,16) = $h; |
1433
|
0
|
|
|
|
|
0
|
$iv = substr($h, 16, 16); |
1434
|
0
|
|
|
|
|
0
|
++$i; |
1435
|
|
|
|
|
|
|
} |
1436
|
0
|
|
|
|
|
0
|
return substr($h, 0, 32); |
1437
|
|
|
|
|
|
|
} |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1440
|
|
|
|
|
|
|
# Initialize decryption |
1441
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) Encrypt dictionary reference, |
1442
|
|
|
|
|
|
|
# 2) ID from file trailer dictionary |
1443
|
|
|
|
|
|
|
# Returns: error string or undef on success (and sets $cryptInfo) |
1444
|
|
|
|
|
|
|
sub DecryptInit($$$) |
1445
|
|
|
|
|
|
|
{ |
1446
|
4
|
|
|
4
|
0
|
22
|
local $_; |
1447
|
4
|
|
|
|
|
10
|
my ($et, $encrypt, $id) = @_; |
1448
|
|
|
|
|
|
|
|
1449
|
4
|
|
|
|
|
5
|
undef $cryptInfo; |
1450
|
4
|
50
|
33
|
|
|
19
|
unless ($encrypt and ref $encrypt eq 'HASH') { |
1451
|
0
|
|
|
|
|
0
|
return 'Error loading Encrypt object'; |
1452
|
|
|
|
|
|
|
} |
1453
|
4
|
|
|
|
|
7
|
my $filt = $$encrypt{Filter}; |
1454
|
4
|
50
|
33
|
|
|
28
|
unless ($filt and $filt =~ s/^\///) { |
1455
|
0
|
|
|
|
|
0
|
return 'Encrypt dictionary has no Filter!'; |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
# extract some interesting tags |
1458
|
4
|
|
50
|
|
|
12
|
my $ver = $$encrypt{V} || 0; |
1459
|
4
|
|
100
|
|
|
14
|
my $rev = $$encrypt{R} || 0; |
1460
|
4
|
|
|
|
|
10
|
my $enc = "$filt V$ver"; |
1461
|
4
|
50
|
|
|
|
14
|
$enc .= ".$rev" if $filt eq 'Standard'; |
1462
|
4
|
50
|
33
|
|
|
12
|
$enc .= " ($1)" if $$encrypt{SubFilter} and $$encrypt{SubFilter} =~ /^\/(.*)/; |
1463
|
4
|
50
|
100
|
|
|
18
|
$enc .= ' (' . ($$encrypt{Length} || 40) . '-bit)' if $filt eq 'Standard'; |
1464
|
4
|
|
|
|
|
11
|
my $tagTablePtr = GetTagTable('Image::ExifTool::PDF::Encrypt'); |
1465
|
4
|
|
|
|
|
16
|
$et->HandleTag($tagTablePtr, 'Filter', $enc); |
1466
|
4
|
50
|
|
|
|
16
|
if ($filt ne 'Standard') { |
|
|
50
|
|
|
|
|
|
1467
|
0
|
|
|
|
|
0
|
return "Encryption filter $filt currently not supported"; |
1468
|
|
|
|
|
|
|
} elsif (not defined $$encrypt{R}) { |
1469
|
0
|
|
|
|
|
0
|
return 'Standard security handler missing revision'; |
1470
|
|
|
|
|
|
|
} |
1471
|
4
|
50
|
33
|
|
|
26
|
unless ($$encrypt{O} and $$encrypt{P} and $$encrypt{U}) { |
|
|
|
33
|
|
|
|
|
1472
|
0
|
|
|
|
|
0
|
return 'Incomplete Encrypt specification'; |
1473
|
|
|
|
|
|
|
} |
1474
|
4
|
50
|
|
|
|
19
|
if ("$ver.$rev" >= 5.6) { |
1475
|
|
|
|
|
|
|
# apologize for poor performance (AES is a pure Perl implementation) |
1476
|
0
|
|
|
|
|
0
|
$et->Warn('Decryption is very slow for encryption V5.6 or higher', 3); |
1477
|
|
|
|
|
|
|
} |
1478
|
4
|
|
|
|
|
15
|
$et->HandleTag($tagTablePtr, 'P', $$encrypt{P}); |
1479
|
|
|
|
|
|
|
|
1480
|
4
|
|
|
|
|
7
|
my %parm; # optional parameters extracted from Encrypt dictionary |
1481
|
|
|
|
|
|
|
|
1482
|
4
|
100
|
66
|
|
|
25
|
if ($ver == 1 or $ver == 2) { |
|
|
50
|
66
|
|
|
|
|
1483
|
1
|
|
|
|
|
3
|
$cryptString = $cryptStream = 1; |
1484
|
|
|
|
|
|
|
} elsif ($ver == 4 or $ver == 5) { |
1485
|
|
|
|
|
|
|
# initialize our $cryptString and $cryptStream flags |
1486
|
3
|
|
|
|
|
4
|
foreach ('StrF', 'StmF') { |
1487
|
6
|
100
|
|
|
|
13
|
my $flagPt = $_ eq 'StrF' ? \$cryptString : \$cryptStream; |
1488
|
6
|
|
|
|
|
12
|
$$flagPt = $$encrypt{$_}; |
1489
|
6
|
50
|
33
|
|
|
18
|
undef $$flagPt if $$flagPt and $$flagPt eq '/Identity'; |
1490
|
6
|
50
|
33
|
|
|
20
|
return "Unsupported $_ encryption $$flagPt" if $$flagPt and $$flagPt ne '/StdCF'; |
1491
|
|
|
|
|
|
|
} |
1492
|
3
|
50
|
33
|
|
|
8
|
if ($cryptString or $cryptStream) { |
1493
|
|
|
|
|
|
|
return 'Missing or invalid Encrypt StdCF entry' unless ref $$encrypt{CF} eq 'HASH' and |
1494
|
3
|
50
|
33
|
|
|
18
|
ref $$encrypt{CF}{StdCF} eq 'HASH' and $$encrypt{CF}{StdCF}{CFM}; |
|
|
|
33
|
|
|
|
|
1495
|
3
|
|
|
|
|
7
|
my $cryptMeth = $$encrypt{CF}{StdCF}{CFM}; |
1496
|
3
|
50
|
|
|
|
15
|
unless ($cryptMeth =~ /^\/(V2|AESV2|AESV3)$/) { |
1497
|
0
|
|
|
|
|
0
|
return "Unsupported encryption method $cryptMeth"; |
1498
|
|
|
|
|
|
|
} |
1499
|
|
|
|
|
|
|
# set "_aesv2" or "_aesv3" flag in %$encrypt hash if AES encryption was used |
1500
|
3
|
50
|
|
|
|
18
|
$$encrypt{'_' . lc($1)} = 1 if $cryptMeth =~ /^\/(AESV2|AESV3)$/; |
1501
|
|
|
|
|
|
|
} |
1502
|
3
|
100
|
|
|
|
9
|
if ($ver == 5) { |
1503
|
|
|
|
|
|
|
# validate OE and UE entries |
1504
|
2
|
|
|
|
|
5
|
foreach ('OE', 'UE') { |
1505
|
4
|
50
|
|
|
|
9
|
return "Missing Encrypt $_ entry" unless $$encrypt{$_}; |
1506
|
4
|
|
|
|
|
9
|
$parm{$_} = ReadPDFValue($$encrypt{$_}); |
1507
|
4
|
50
|
|
|
|
11
|
return "Invalid Encrypt $_ entry" unless length $parm{$_} == 32; |
1508
|
|
|
|
|
|
|
} |
1509
|
2
|
|
|
|
|
10
|
require Image::ExifTool::AES; # will need this later |
1510
|
|
|
|
|
|
|
} |
1511
|
|
|
|
|
|
|
} else { |
1512
|
0
|
|
|
|
|
0
|
return "Encryption version $ver currently not supported"; |
1513
|
|
|
|
|
|
|
} |
1514
|
4
|
50
|
|
|
|
8
|
$id or return "Can't decrypt (no document ID)"; |
1515
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
# make sure we have the necessary libraries available |
1517
|
4
|
100
|
|
|
|
12
|
if ($ver < 5) { |
1518
|
2
|
50
|
|
|
|
4
|
unless (eval { require Digest::MD5 }) { |
|
2
|
|
|
|
|
14
|
|
1519
|
0
|
|
|
|
|
0
|
return "Install Digest::MD5 to process encrypted PDF"; |
1520
|
|
|
|
|
|
|
} |
1521
|
|
|
|
|
|
|
} else { |
1522
|
2
|
50
|
|
|
|
4
|
unless (eval { require Digest::SHA }) { |
|
2
|
|
|
|
|
10
|
|
1523
|
0
|
|
|
|
|
0
|
return "Install Digest::SHA to process AES-256 encrypted PDF"; |
1524
|
|
|
|
|
|
|
} |
1525
|
|
|
|
|
|
|
} |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
# calculate file-level en/decryption key |
1528
|
4
|
|
|
|
|
8
|
my $pad = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08". |
1529
|
|
|
|
|
|
|
"\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A"; |
1530
|
4
|
|
|
|
|
10
|
my $o = ReadPDFValue($$encrypt{O}); |
1531
|
4
|
|
|
|
|
10
|
my $u = ReadPDFValue($$encrypt{U}); |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
# set flag indicating whether metadata is encrypted |
1534
|
|
|
|
|
|
|
# (in version 4 and higher, metadata streams may not be encrypted) |
1535
|
4
|
100
|
100
|
|
|
25
|
if ($ver < 4 or not $$encrypt{EncryptMetadata} or $$encrypt{EncryptMetadata} !~ /false/i) { |
|
|
|
66
|
|
|
|
|
1536
|
3
|
|
|
|
|
7
|
$$encrypt{_meta} = 1; |
1537
|
|
|
|
|
|
|
} |
1538
|
|
|
|
|
|
|
# try no password first, then try provided password if available |
1539
|
4
|
|
|
|
|
6
|
my ($try, $key); |
1540
|
4
|
|
|
|
|
7
|
for ($try=0; ; ++$try) { |
1541
|
5
|
|
|
|
|
7
|
my $password; |
1542
|
5
|
100
|
|
|
|
12
|
if ($try == 0) { |
|
|
50
|
|
|
|
|
|
1543
|
4
|
|
|
|
|
8
|
$password = ''; |
1544
|
|
|
|
|
|
|
} elsif ($try == 1) { |
1545
|
1
|
|
|
|
|
4
|
$password = $et->Options('Password'); |
1546
|
1
|
50
|
|
|
|
5
|
return 'Document is password protected (use Password option)' unless defined $password; |
1547
|
|
|
|
|
|
|
# make sure there is no UTF-8 flag on the password |
1548
|
1
|
50
|
33
|
|
|
5
|
if ($] >= 5.006 and (eval { require Encode; Encode::is_utf8($password) } or $@)) { |
|
|
|
33
|
|
|
|
|
1549
|
|
|
|
|
|
|
# repack by hand if Encode isn't available |
1550
|
0
|
0
|
|
|
|
0
|
$password = $@ ? pack('C*',unpack($] < 5.010000 ? 'U0C*' : 'C0C*',$password)) : Encode::encode('utf8',$password); |
|
|
0
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
} |
1552
|
|
|
|
|
|
|
} else { |
1553
|
0
|
|
|
|
|
0
|
return 'Incorrect password'; |
1554
|
|
|
|
|
|
|
} |
1555
|
5
|
100
|
|
|
|
8
|
if ($ver < 5) { |
1556
|
2
|
50
|
|
|
|
6
|
if (length $password) { |
1557
|
|
|
|
|
|
|
# password must be encoding in PDFDocEncoding (ref iso32000) |
1558
|
0
|
|
|
|
|
0
|
$password = $et->Encode($password, 'PDFDoc'); |
1559
|
|
|
|
|
|
|
# truncate or pad the password to exactly 32 bytes |
1560
|
0
|
0
|
|
|
|
0
|
if (length($password) > 32) { |
|
|
0
|
|
|
|
|
|
1561
|
0
|
|
|
|
|
0
|
$password = substr($password, 0, 32); |
1562
|
|
|
|
|
|
|
} elsif (length($password) < 32) { |
1563
|
0
|
|
|
|
|
0
|
$password .= substr($pad, 0, 32-length($password)); |
1564
|
|
|
|
|
|
|
} |
1565
|
|
|
|
|
|
|
} else { |
1566
|
2
|
|
|
|
|
4
|
$password = $pad; |
1567
|
|
|
|
|
|
|
} |
1568
|
2
|
|
|
|
|
10
|
$key = $password . $o . pack('V', $$encrypt{P}) . $id; |
1569
|
2
|
|
|
|
|
3
|
my $rep = 1; |
1570
|
2
|
100
|
66
|
|
|
12
|
if ($rev == 3 or $rev == 4) { |
1571
|
|
|
|
|
|
|
# must add this if metadata not encrypted |
1572
|
1
|
50
|
|
|
|
4
|
$key .= "\xff\xff\xff\xff" unless $$encrypt{_meta}; |
1573
|
1
|
|
|
|
|
3
|
$rep += 50; # repeat MD5 50 more times if revision is 3 or greater |
1574
|
|
|
|
|
|
|
} |
1575
|
2
|
|
|
|
|
3
|
my ($len, $i, $dat); |
1576
|
2
|
100
|
|
|
|
5
|
if ($ver == 1) { |
1577
|
1
|
|
|
|
|
2
|
$len = 5; |
1578
|
|
|
|
|
|
|
} else { |
1579
|
1
|
|
50
|
|
|
13
|
$len = $$encrypt{Length} || 40; |
1580
|
1
|
50
|
|
|
|
4
|
$len >= 40 or return 'Bad Encrypt Length'; |
1581
|
1
|
|
|
|
|
3
|
$len = int($len / 8); |
1582
|
|
|
|
|
|
|
} |
1583
|
2
|
|
|
|
|
7
|
for ($i=0; $i<$rep; ++$i) { |
1584
|
52
|
|
|
|
|
119
|
$key = substr(Digest::MD5::md5($key), 0, $len); |
1585
|
|
|
|
|
|
|
} |
1586
|
|
|
|
|
|
|
# decrypt U to see if a user password is required |
1587
|
2
|
100
|
|
|
|
6
|
if ($rev >= 3) { |
1588
|
1
|
|
|
|
|
4
|
$dat = Digest::MD5::md5($pad . $id); |
1589
|
1
|
|
|
|
|
4
|
RC4Crypt(\$dat, $key); |
1590
|
1
|
|
|
|
|
4
|
for ($i=1; $i<=19; ++$i) { |
1591
|
19
|
|
|
|
|
35
|
my @key = unpack('C*', $key); |
1592
|
19
|
|
|
|
|
27
|
foreach (@key) { $_ ^= $i; } |
|
304
|
|
|
|
|
316
|
|
1593
|
19
|
|
|
|
|
42
|
RC4Crypt(\$dat, pack('C*', @key)); |
1594
|
|
|
|
|
|
|
} |
1595
|
1
|
|
|
|
|
3
|
$dat .= substr($u, 16); |
1596
|
|
|
|
|
|
|
} else { |
1597
|
1
|
|
|
|
|
2
|
$dat = $pad; |
1598
|
1
|
|
|
|
|
3
|
RC4Crypt(\$dat, $key); |
1599
|
|
|
|
|
|
|
} |
1600
|
2
|
50
|
|
|
|
19
|
last if $dat eq $u; # all done if this was the correct key |
1601
|
|
|
|
|
|
|
} else { |
1602
|
3
|
50
|
33
|
|
|
13
|
return 'Invalid O or U Encrypt entries' if length($o) < 48 or length($u) < 48; |
1603
|
3
|
100
|
|
|
|
6
|
if (length $password) { |
1604
|
|
|
|
|
|
|
# Note: this should be good for passwords containing reasonable characters, |
1605
|
|
|
|
|
|
|
# but to be bullet-proof we need to apply the SASLprep (IETF RFC 4013) profile |
1606
|
|
|
|
|
|
|
# of stringprep (IETF RFC 3454) to the password before encoding in UTF-8 |
1607
|
1
|
|
|
|
|
5
|
$password = $et->Encode($password, 'UTF8'); |
1608
|
1
|
50
|
|
|
|
5
|
$password = substr($password, 0, 127) if length($password) > 127; |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
# test for the owner password |
1611
|
3
|
|
|
|
|
10
|
my $sha = GetHash($password, substr($o,32,8), substr($u,0,48), $rev); |
1612
|
3
|
100
|
|
|
|
11
|
if ($sha eq substr($o, 0, 32)) { |
1613
|
2
|
|
|
|
|
7
|
$key = GetHash($password, substr($o,40,8), substr($u,0,48), $rev); |
1614
|
2
|
|
|
|
|
5
|
my $dat = ("\0" x 16) . $parm{OE}; |
1615
|
|
|
|
|
|
|
# decrypt with no padding |
1616
|
2
|
|
|
|
|
8
|
my $err = Image::ExifTool::AES::Crypt(\$dat, $key, 0, 1); |
1617
|
2
|
50
|
|
|
|
7
|
return $err if $err; |
1618
|
2
|
|
|
|
|
4
|
$key = $dat; # use this as the file decryption key |
1619
|
2
|
|
|
|
|
4
|
last; |
1620
|
|
|
|
|
|
|
} |
1621
|
|
|
|
|
|
|
# test for the user password |
1622
|
1
|
|
|
|
|
4
|
$sha = GetHash($password, substr($u,32,8), '', $rev); |
1623
|
1
|
50
|
|
|
|
5
|
if ($sha eq substr($u, 0, 32)) { |
1624
|
0
|
|
|
|
|
0
|
$key = GetHash($password, substr($u,40,8), '', $rev); |
1625
|
0
|
|
|
|
|
0
|
my $dat = ("\0" x 16) . $parm{UE}; |
1626
|
0
|
|
|
|
|
0
|
my $err = Image::ExifTool::AES::Crypt(\$dat, $key, 0, 1); |
1627
|
0
|
0
|
|
|
|
0
|
return $err if $err; |
1628
|
0
|
|
|
|
|
0
|
$key = $dat; # use this as the file decryption key |
1629
|
0
|
|
|
|
|
0
|
last; |
1630
|
|
|
|
|
|
|
} |
1631
|
|
|
|
|
|
|
} |
1632
|
|
|
|
|
|
|
} |
1633
|
4
|
|
|
|
|
11
|
$$encrypt{_key} = $key; # save the file-level encryption key |
1634
|
4
|
|
|
|
|
6
|
$cryptInfo = $encrypt; # save reference to the file-level Encrypt object |
1635
|
4
|
|
|
|
|
16
|
return undef; # success! |
1636
|
|
|
|
|
|
|
} |
1637
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1639
|
|
|
|
|
|
|
# Decrypt/Encrypt data |
1640
|
|
|
|
|
|
|
# Inputs: 0) data ref |
1641
|
|
|
|
|
|
|
# 1) PDF object reference to use as crypt key extension (may be 'none' to |
1642
|
|
|
|
|
|
|
# avoid extending the encryption key, as for streams with Crypt Filter) |
1643
|
|
|
|
|
|
|
# 2) encrypt flag (false for decryption) |
1644
|
|
|
|
|
|
|
sub Crypt($$;$) |
1645
|
|
|
|
|
|
|
{ |
1646
|
29
|
100
|
|
29
|
0
|
84
|
return unless $cryptInfo; |
1647
|
4
|
|
|
|
|
9
|
my ($dataPt, $keyExt, $encrypt) = @_; |
1648
|
|
|
|
|
|
|
# do not decrypt if the key extension object is undefined |
1649
|
|
|
|
|
|
|
# (this doubles as a flag to disable decryption/encryption) |
1650
|
4
|
50
|
|
|
|
10
|
return unless defined $keyExt; |
1651
|
4
|
|
|
|
|
7
|
my $key = $$cryptInfo{_key}; |
1652
|
|
|
|
|
|
|
# apply the necessary crypt key extension |
1653
|
4
|
100
|
|
|
|
11
|
unless ($$cryptInfo{_aesv3}) { |
1654
|
2
|
50
|
|
|
|
7
|
unless ($keyExt eq 'none') { |
1655
|
|
|
|
|
|
|
# extend crypt key using object and generation number |
1656
|
2
|
50
|
|
|
|
15
|
unless ($keyExt =~ /^(I\d+ )?(\d+) (\d+)/) { |
1657
|
0
|
|
|
|
|
0
|
$$cryptInfo{_error} = 'Invalid object reference for encryption'; |
1658
|
0
|
|
|
|
|
0
|
return; |
1659
|
|
|
|
|
|
|
} |
1660
|
2
|
|
|
|
|
25
|
$key .= substr(pack('V', $2), 0, 3) . substr(pack('V', $3), 0, 2); |
1661
|
|
|
|
|
|
|
} |
1662
|
|
|
|
|
|
|
# add AES-128 salt if necessary (this little gem is conveniently |
1663
|
|
|
|
|
|
|
# omitted from the Adobe PDF 1.6 documentation, causing me to |
1664
|
|
|
|
|
|
|
# waste 12 hours trying to figure out why this wasn't working -- |
1665
|
|
|
|
|
|
|
# it appears in ISO32000 though, so I should have been using that) |
1666
|
2
|
100
|
|
|
|
7
|
$key .= 'sAlT' if $$cryptInfo{_aesv2}; |
1667
|
2
|
|
|
|
|
4
|
my $len = length($key); |
1668
|
2
|
|
|
|
|
8
|
$key = Digest::MD5::md5($key); # get 16-byte MD5 digest |
1669
|
2
|
100
|
|
|
|
8
|
$key = substr($key, 0, $len) if $len < 16; # trim if necessary |
1670
|
|
|
|
|
|
|
} |
1671
|
|
|
|
|
|
|
# perform the decryption/encryption |
1672
|
4
|
100
|
100
|
|
|
17
|
if ($$cryptInfo{_aesv2} or $$cryptInfo{_aesv3}) { |
1673
|
3
|
|
|
|
|
15
|
require Image::ExifTool::AES; |
1674
|
3
|
|
|
|
|
8
|
my $err = Image::ExifTool::AES::Crypt($dataPt, $key, $encrypt); |
1675
|
3
|
50
|
|
|
|
11
|
$err and $$cryptInfo{_error} = $err; |
1676
|
|
|
|
|
|
|
} else { |
1677
|
1
|
|
|
|
|
3
|
RC4Crypt($dataPt, $key); |
1678
|
|
|
|
|
|
|
} |
1679
|
|
|
|
|
|
|
} |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1682
|
|
|
|
|
|
|
# Decrypt/Encrypt stream data |
1683
|
|
|
|
|
|
|
# Inputs: 0) dictionary ref, 1) PDF object reference to use as crypt key extension |
1684
|
|
|
|
|
|
|
sub CryptStream($$) |
1685
|
|
|
|
|
|
|
{ |
1686
|
52
|
50
|
|
52
|
0
|
104
|
return unless $cryptStream; |
1687
|
0
|
|
|
|
|
0
|
my ($dict, $keyExt) = @_; |
1688
|
0
|
|
0
|
|
|
0
|
my $type = $$dict{Type} || ''; |
1689
|
|
|
|
|
|
|
# XRef streams are not encrypted (ref 3, page 50), |
1690
|
|
|
|
|
|
|
# and Metadata may or may not be encrypted |
1691
|
0
|
0
|
0
|
|
|
0
|
if ($cryptInfo and $type ne '/XRef' and |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
1692
|
|
|
|
|
|
|
($$cryptInfo{_meta} or $type ne '/Metadata')) |
1693
|
|
|
|
|
|
|
{ |
1694
|
0
|
|
|
|
|
0
|
Crypt(\$$dict{_stream}, $keyExt, $$dict{_decrypted}); |
1695
|
|
|
|
|
|
|
# toggle _decrypted flag |
1696
|
0
|
0
|
|
|
|
0
|
$$dict{_decrypted} = ($$dict{_decrypted} ? undef : 1); |
1697
|
|
|
|
|
|
|
} else { |
1698
|
0
|
|
|
|
|
0
|
$$dict{_decrypted} = 0; # stream should never be encrypted |
1699
|
|
|
|
|
|
|
} |
1700
|
|
|
|
|
|
|
} |
1701
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1703
|
|
|
|
|
|
|
# Generate a new PDF tag (based on its ID) and add it to a tag table |
1704
|
|
|
|
|
|
|
# Inputs: 0) tag table ref, 1) tag ID |
1705
|
|
|
|
|
|
|
# Returns: tag info ref |
1706
|
|
|
|
|
|
|
sub NewPDFTag($$) |
1707
|
|
|
|
|
|
|
{ |
1708
|
0
|
|
|
0
|
0
|
0
|
my ($tagTablePtr, $tag) = @_; |
1709
|
0
|
|
|
|
|
0
|
my $name = $tag; |
1710
|
|
|
|
|
|
|
# translate URL-like escape sequences |
1711
|
0
|
|
|
|
|
0
|
$name =~ s/#([0-9a-f]{2})/chr(hex($1))/ige; |
|
0
|
|
|
|
|
0
|
|
1712
|
0
|
|
|
|
|
0
|
$name =~ s/[^-\w]+/_/g; # translate invalid characters to an underline |
1713
|
0
|
|
|
|
|
0
|
$name =~ s/(^|_)([a-z])/\U$2/g; # start words with upper case |
1714
|
0
|
|
|
|
|
0
|
my $tagInfo = { Name => $name }; |
1715
|
0
|
|
|
|
|
0
|
AddTagToTable($tagTablePtr, $tag, $tagInfo); |
1716
|
0
|
|
|
|
|
0
|
return $tagInfo; |
1717
|
|
|
|
|
|
|
} |
1718
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1720
|
|
|
|
|
|
|
# Process AcroForm dictionary to set HasXMLFormsArchitecture flag |
1721
|
|
|
|
|
|
|
# Inputs: Same as ProcessDict |
1722
|
|
|
|
|
|
|
sub ProcessAcroForm($$$$;$$) |
1723
|
|
|
|
|
|
|
{ |
1724
|
0
|
|
|
0
|
0
|
0
|
my ($et, $tagTablePtr, $dict, $xref, $nesting, $type) = @_; |
1725
|
0
|
0
|
|
|
|
0
|
$et->HandleTag($tagTablePtr, '_has_xfa', $$dict{XFA} ? 'true' : 'false'); |
1726
|
0
|
|
|
|
|
0
|
return ProcessDict($et, $tagTablePtr, $dict, $xref, $nesting, $type); |
1727
|
|
|
|
|
|
|
} |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1730
|
|
|
|
|
|
|
# Expand array into a string |
1731
|
|
|
|
|
|
|
# Inputs: 0) array ref |
1732
|
|
|
|
|
|
|
# Return: string |
1733
|
|
|
|
|
|
|
sub ExpandArray($) |
1734
|
|
|
|
|
|
|
{ |
1735
|
0
|
|
|
0
|
0
|
0
|
my $val = shift; |
1736
|
0
|
|
|
|
|
0
|
my @list = @$val; |
1737
|
0
|
|
|
|
|
0
|
foreach (@list) { |
1738
|
0
|
0
|
|
|
|
0
|
ref $_ eq 'SCALAR' and $_ = "ref($$_)", next; |
1739
|
0
|
0
|
|
|
|
0
|
ref $_ eq 'ARRAY' and $_ = ExpandArray($_), next; |
1740
|
0
|
0
|
|
|
|
0
|
defined $_ or $_ = '', next; |
1741
|
|
|
|
|
|
|
} |
1742
|
0
|
|
|
|
|
0
|
return '[' . join(',',@list) . ']'; |
1743
|
|
|
|
|
|
|
} |
1744
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
1746
|
|
|
|
|
|
|
# Process PDF dictionary extract tag values |
1747
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) tag table reference |
1748
|
|
|
|
|
|
|
# 2) dictionary reference, 3) cross-reference table reference, |
1749
|
|
|
|
|
|
|
# 4) nesting depth, 5) dictionary capture type |
1750
|
|
|
|
|
|
|
sub ProcessDict($$$$;$$) |
1751
|
|
|
|
|
|
|
{ |
1752
|
350
|
|
|
350
|
0
|
665
|
my ($et, $tagTablePtr, $dict, $xref, $nesting, $type) = @_; |
1753
|
350
|
|
|
|
|
850
|
my $verbose = $et->Options('Verbose'); |
1754
|
350
|
|
|
|
|
582
|
my $unknown = $$tagTablePtr{EXTRACT_UNKNOWN}; |
1755
|
350
|
|
33
|
|
|
839
|
my $embedded = (defined $unknown and not $unknown and $et->Options('ExtractEmbedded')); |
1756
|
350
|
|
|
|
|
398
|
my @tags = @{$$dict{_tags}}; |
|
350
|
|
|
|
|
943
|
|
1757
|
350
|
|
|
|
|
486
|
my ($next, %join); |
1758
|
350
|
|
|
|
|
398
|
my $index = 0; |
1759
|
|
|
|
|
|
|
|
1760
|
350
|
|
100
|
|
|
699
|
$nesting = ($nesting || 0) + 1; |
1761
|
350
|
50
|
|
|
|
563
|
if ($nesting > 50) { |
1762
|
0
|
|
|
|
|
0
|
$et->WarnOnce('Nesting too deep (directory ignored)'); |
1763
|
0
|
|
|
|
|
0
|
return; |
1764
|
|
|
|
|
|
|
} |
1765
|
|
|
|
|
|
|
# save entire dictionary for rewriting if specified |
1766
|
350
|
50
|
100
|
|
|
919
|
if ($$et{PDF_CAPTURE} and $$tagTablePtr{VARS} and |
|
|
|
66
|
|
|
|
|
1767
|
|
|
|
|
|
|
$tagTablePtr->{VARS}->{CAPTURE}) |
1768
|
|
|
|
|
|
|
{ |
1769
|
66
|
|
|
|
|
82
|
my $name; |
1770
|
66
|
|
|
|
|
83
|
foreach $name (@{$tagTablePtr->{VARS}->{CAPTURE}}) { |
|
66
|
|
|
|
|
165
|
|
1771
|
82
|
100
|
|
|
|
156
|
next if $$et{PDF_CAPTURE}{$name}; |
1772
|
|
|
|
|
|
|
# make sure we load the right type if indicated |
1773
|
66
|
50
|
66
|
|
|
160
|
next if $type and $type ne $name; |
1774
|
66
|
|
|
|
|
118
|
$$et{PDF_CAPTURE}{$name} = $dict; |
1775
|
66
|
|
|
|
|
87
|
last; |
1776
|
|
|
|
|
|
|
} |
1777
|
|
|
|
|
|
|
} |
1778
|
|
|
|
|
|
|
# |
1779
|
|
|
|
|
|
|
# extract information from all tags in the dictionary |
1780
|
|
|
|
|
|
|
# |
1781
|
350
|
|
|
|
|
419
|
for (;;) { |
1782
|
1473
|
|
|
|
|
1693
|
my ($tag, $isSubDoc); |
1783
|
1473
|
100
|
33
|
|
|
2442
|
if (@tags) { |
|
|
50
|
|
|
|
|
|
1784
|
1123
|
|
|
|
|
1501
|
$tag = shift @tags; |
1785
|
|
|
|
|
|
|
} elsif (defined $next and not $next) { |
1786
|
0
|
|
|
|
|
0
|
$tag = 'Next'; |
1787
|
0
|
|
|
|
|
0
|
$next = 1; |
1788
|
|
|
|
|
|
|
} else { |
1789
|
350
|
|
|
|
|
426
|
last; |
1790
|
|
|
|
|
|
|
} |
1791
|
1123
|
|
|
|
|
1650
|
my $val = $$dict{$tag}; |
1792
|
1123
|
|
|
|
|
2285
|
my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag); |
1793
|
1123
|
100
|
33
|
|
|
2355
|
if ($tagInfo) { |
|
|
50
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
1794
|
419
|
50
|
|
|
|
659
|
undef $tagInfo if $$tagInfo{NoProcess}; |
1795
|
|
|
|
|
|
|
} elsif ($embedded and $tag =~ /^(.*?)(\d+)$/ and |
1796
|
|
|
|
|
|
|
$$tagTablePtr{$1} and (ref $val ne 'SCALAR' or not $fetched{$$val})) |
1797
|
|
|
|
|
|
|
{ |
1798
|
0
|
|
|
|
|
0
|
my ($name, $num) = ($1, $2); |
1799
|
0
|
|
|
|
|
0
|
$tagInfo = $et->GetTagInfo($tagTablePtr, $name); |
1800
|
0
|
0
|
0
|
|
|
0
|
if (ref $tagInfo eq 'HASH' and $$tagInfo{JoinStreams}) { |
1801
|
0
|
|
|
|
|
0
|
$fetched{$$val} = 1; |
1802
|
0
|
|
|
|
|
0
|
my $obj = FetchObject($et, $$val, $xref, $tag); |
1803
|
0
|
0
|
|
|
|
0
|
$join{$name} = [] unless $join{$name}; |
1804
|
0
|
0
|
0
|
|
|
0
|
next unless ref $obj eq 'HASH' and $$obj{_stream}; |
1805
|
|
|
|
|
|
|
# save all the stream data to join later |
1806
|
0
|
|
|
|
|
0
|
DecodeStream($et, $obj); |
1807
|
0
|
|
|
|
|
0
|
$join{$name}->[$num] = $$obj{_stream}; |
1808
|
0
|
|
|
|
|
0
|
undef $tagInfo; # don't process |
1809
|
|
|
|
|
|
|
} else { |
1810
|
0
|
|
|
|
|
0
|
$isSubDoc = 1; # treat as a sub-document |
1811
|
|
|
|
|
|
|
} |
1812
|
|
|
|
|
|
|
} |
1813
|
1123
|
50
|
|
|
|
1600
|
if ($verbose) { |
1814
|
0
|
|
|
|
|
0
|
my ($val2, $extra); |
1815
|
0
|
0
|
|
|
|
0
|
if (ref $val eq 'SCALAR') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1816
|
0
|
|
|
|
|
0
|
$extra = ", indirect object ($$val)"; |
1817
|
0
|
0
|
0
|
|
|
0
|
if ($fetched{$$val}) { |
|
|
0
|
|
|
|
|
|
1818
|
0
|
|
|
|
|
0
|
$val2 = "ref($$val)"; |
1819
|
|
|
|
|
|
|
} elsif ($tag eq 'Next' and not $next) { |
1820
|
|
|
|
|
|
|
# handle 'Next' links after all others |
1821
|
0
|
|
|
|
|
0
|
$next = 0; |
1822
|
0
|
|
|
|
|
0
|
next; |
1823
|
|
|
|
|
|
|
} else { |
1824
|
0
|
|
|
|
|
0
|
$fetched{$$val} = 1; |
1825
|
0
|
|
|
|
|
0
|
$val = FetchObject($et, $$val, $xref, $tag); |
1826
|
0
|
0
|
|
|
|
0
|
unless (defined $val) { |
1827
|
0
|
|
|
|
|
0
|
my $str; |
1828
|
0
|
0
|
|
|
|
0
|
if (defined $lastOffset) { |
1829
|
0
|
|
|
|
|
0
|
$val2 = ''; |
1830
|
0
|
|
|
|
|
0
|
$str = 'Object was freed'; |
1831
|
|
|
|
|
|
|
} else { |
1832
|
0
|
|
|
|
|
0
|
$val2 = ''; |
1833
|
0
|
|
|
|
|
0
|
$str = 'Error reading object'; |
1834
|
|
|
|
|
|
|
} |
1835
|
0
|
|
|
|
|
0
|
$et->VPrint(0, "$$et{INDENT}${str}:\n"); |
1836
|
|
|
|
|
|
|
} |
1837
|
|
|
|
|
|
|
} |
1838
|
|
|
|
|
|
|
} elsif (ref $val eq 'HASH') { |
1839
|
0
|
|
|
|
|
0
|
$extra = ', direct dictionary'; |
1840
|
|
|
|
|
|
|
} elsif (ref $val eq 'ARRAY') { |
1841
|
0
|
|
|
|
|
0
|
$extra = ', direct array of ' . scalar(@$val) . ' objects'; |
1842
|
|
|
|
|
|
|
} else { |
1843
|
0
|
|
|
|
|
0
|
$extra = ', direct object'; |
1844
|
|
|
|
|
|
|
} |
1845
|
0
|
|
|
|
|
0
|
my $isSubdir; |
1846
|
0
|
0
|
|
|
|
0
|
if (ref $val eq 'HASH') { |
|
|
0
|
|
|
|
|
|
1847
|
0
|
|
|
|
|
0
|
$isSubdir = 1; |
1848
|
|
|
|
|
|
|
} elsif (ref $val eq 'ARRAY') { |
1849
|
|
|
|
|
|
|
# recurse into objects in arrays only if they are lists of |
1850
|
|
|
|
|
|
|
# dictionaries or indirect objects which could be dictionaries |
1851
|
0
|
0
|
|
|
|
0
|
$isSubdir = 1 if @$val; |
1852
|
0
|
|
|
|
|
0
|
foreach (@$val) { |
1853
|
0
|
0
|
0
|
|
|
0
|
next if ref $_ eq 'HASH' or ref $_ eq 'SCALAR'; |
1854
|
0
|
|
|
|
|
0
|
undef $isSubdir; |
1855
|
0
|
|
|
|
|
0
|
last; |
1856
|
|
|
|
|
|
|
} |
1857
|
|
|
|
|
|
|
} |
1858
|
0
|
0
|
|
|
|
0
|
if ($isSubdir) { |
1859
|
|
|
|
|
|
|
# create bogus subdirectory to recurse into this dict |
1860
|
0
|
0
|
|
|
|
0
|
$tagInfo or $tagInfo = { |
1861
|
|
|
|
|
|
|
Name => $tag, |
1862
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Unknown' }, |
1863
|
|
|
|
|
|
|
}; |
1864
|
|
|
|
|
|
|
} else { |
1865
|
0
|
0
|
|
|
|
0
|
$val2 = ExpandArray($val) if ref $val eq 'ARRAY'; |
1866
|
|
|
|
|
|
|
# generate tag info if we will use it later |
1867
|
0
|
0
|
0
|
|
|
0
|
if (not $tagInfo and defined $val and $unknown) { |
|
|
|
0
|
|
|
|
|
1868
|
0
|
|
|
|
|
0
|
$tagInfo = NewPDFTag($tagTablePtr, $tag); |
1869
|
|
|
|
|
|
|
} |
1870
|
|
|
|
|
|
|
} |
1871
|
0
|
|
0
|
|
|
0
|
$et->VerboseInfo($tag, $tagInfo, |
1872
|
|
|
|
|
|
|
Value => $val2 || $val, |
1873
|
|
|
|
|
|
|
Extra => $extra, |
1874
|
|
|
|
|
|
|
Index => $index++, |
1875
|
|
|
|
|
|
|
); |
1876
|
0
|
0
|
|
|
|
0
|
next unless defined $val; |
1877
|
|
|
|
|
|
|
} |
1878
|
1123
|
100
|
|
|
|
1610
|
unless ($tagInfo) { |
1879
|
|
|
|
|
|
|
# add any tag found in Info dictionary to table |
1880
|
704
|
50
|
|
|
|
1170
|
next unless $unknown; |
1881
|
0
|
|
|
|
|
0
|
$tagInfo = NewPDFTag($tagTablePtr, $tag); |
1882
|
|
|
|
|
|
|
} |
1883
|
|
|
|
|
|
|
# increment document number if necessary |
1884
|
419
|
|
|
|
|
558
|
my ($oldDocNum, $oldNumTags); |
1885
|
419
|
50
|
|
|
|
722
|
if ($isSubDoc) { |
1886
|
0
|
|
|
|
|
0
|
$oldDocNum = $$et{DOC_NUM}; |
1887
|
0
|
|
|
|
|
0
|
$oldNumTags = $$et{NUM_FOUND}; |
1888
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = ++$$et{DOC_COUNT}; |
1889
|
|
|
|
|
|
|
} |
1890
|
419
|
100
|
|
|
|
720
|
if ($$tagInfo{SubDirectory}) { |
1891
|
|
|
|
|
|
|
# process the subdirectory |
1892
|
332
|
|
|
|
|
391
|
my @subDicts; |
1893
|
332
|
100
|
|
|
|
636
|
if (ref $val eq 'ARRAY') { |
1894
|
|
|
|
|
|
|
# hack to convert array to dictionary if necessary |
1895
|
37
|
50
|
33
|
|
|
97
|
if ($$tagInfo{ConvertToDict} and @$val == 2 and not ref $$val[0]) { |
|
|
|
33
|
|
|
|
|
1896
|
0
|
|
|
|
|
0
|
my $tg = $$val[0]; |
1897
|
0
|
|
|
|
|
0
|
$tg =~ s(^/)(); # remove name |
1898
|
0
|
|
|
|
|
0
|
my %dict = ( _tags => [ $tg ], $tg => $$val[1] ); |
1899
|
0
|
|
|
|
|
0
|
@subDicts = ( \%dict ); |
1900
|
|
|
|
|
|
|
} else { |
1901
|
37
|
|
|
|
|
40
|
@subDicts = @{$val}; |
|
37
|
|
|
|
|
71
|
|
1902
|
|
|
|
|
|
|
} |
1903
|
|
|
|
|
|
|
} else { |
1904
|
295
|
|
|
|
|
431
|
@subDicts = ( $val ); |
1905
|
|
|
|
|
|
|
} |
1906
|
|
|
|
|
|
|
# loop through all values of this tag |
1907
|
332
|
|
|
|
|
457
|
for (;;) { |
1908
|
664
|
100
|
|
|
|
1171
|
my $subDict = shift @subDicts or last; |
1909
|
|
|
|
|
|
|
# save last fetched object in case we fetch another one here |
1910
|
332
|
|
|
|
|
452
|
my $prevFetched = $lastFetched; |
1911
|
332
|
100
|
|
|
|
660
|
if (ref $subDict eq 'SCALAR') { |
1912
|
|
|
|
|
|
|
# only fetch once (other copies are obsolete) |
1913
|
244
|
100
|
|
|
|
480
|
next if $fetched{$$subDict}; |
1914
|
197
|
100
|
|
|
|
322
|
if ($$tagInfo{IgnoreDuplicates}) { |
1915
|
28
|
|
|
|
|
53
|
my $flag = "ProcessedPDF_$tag"; |
1916
|
28
|
50
|
|
|
|
57
|
if ($$et{$flag}) { |
1917
|
0
|
0
|
|
|
|
0
|
next if $et->WarnOnce("Ignored duplicate $tag dictionary", 2); |
1918
|
|
|
|
|
|
|
} else { |
1919
|
28
|
|
|
|
|
52
|
$$et{$flag} = 1; |
1920
|
|
|
|
|
|
|
} |
1921
|
|
|
|
|
|
|
} |
1922
|
|
|
|
|
|
|
# load dictionary via an indirect reference |
1923
|
197
|
|
|
|
|
302
|
$fetched{$$subDict} = 1; |
1924
|
197
|
|
|
|
|
360
|
my $obj = FetchObject($et, $$subDict, $xref, $tag); |
1925
|
197
|
100
|
|
|
|
410
|
unless (defined $obj) { |
1926
|
5
|
50
|
|
|
|
12
|
unless (defined $lastOffset) { |
1927
|
0
|
|
|
|
|
0
|
$et->Warn("Error reading $tag object ($$subDict)"); |
1928
|
|
|
|
|
|
|
} |
1929
|
5
|
|
|
|
|
11
|
next; |
1930
|
|
|
|
|
|
|
} |
1931
|
192
|
|
|
|
|
337
|
$subDict = $obj; |
1932
|
|
|
|
|
|
|
} |
1933
|
280
|
50
|
|
|
|
572
|
if (ref $subDict eq 'ARRAY') { |
1934
|
|
|
|
|
|
|
# convert array of key/value pairs to a hash |
1935
|
0
|
0
|
|
|
|
0
|
next if @$subDict < 2; |
1936
|
0
|
|
|
|
|
0
|
my %hash = ( _tags => [] ); |
1937
|
0
|
|
|
|
|
0
|
while (@$subDict >= 2) { |
1938
|
0
|
|
|
|
|
0
|
my $key = shift @$subDict; |
1939
|
0
|
|
|
|
|
0
|
$key =~ s/^\///; |
1940
|
0
|
|
|
|
|
0
|
push @{$hash{_tags}}, $key; |
|
0
|
|
|
|
|
0
|
|
1941
|
0
|
|
|
|
|
0
|
$hash{$key} = shift @$subDict; |
1942
|
|
|
|
|
|
|
} |
1943
|
0
|
|
|
|
|
0
|
$subDict = \%hash; |
1944
|
|
|
|
|
|
|
} else { |
1945
|
280
|
50
|
|
|
|
473
|
next unless ref $subDict eq 'HASH'; |
1946
|
|
|
|
|
|
|
} |
1947
|
|
|
|
|
|
|
# set flag to re-crypt all strings when rewriting if the dictionary |
1948
|
|
|
|
|
|
|
# came from an encrypted stream |
1949
|
280
|
50
|
|
|
|
443
|
$$subDict{_needCrypt}{'*'} = 1 unless $lastFetched; |
1950
|
280
|
|
|
|
|
848
|
my $subTablePtr = GetTagTable($tagInfo->{SubDirectory}->{TagTable}); |
1951
|
280
|
50
|
|
|
|
445
|
if (not $verbose) { |
|
|
0
|
|
|
|
|
|
1952
|
280
|
|
50
|
|
|
904
|
my $proc = $$subTablePtr{PROCESS_PROC} || \&ProcessDict; |
1953
|
280
|
|
|
|
|
652
|
&$proc($et, $subTablePtr, $subDict, $xref, $nesting); |
1954
|
|
|
|
|
|
|
} elsif ($next) { |
1955
|
|
|
|
|
|
|
# handle 'Next' links at this level to avoid deep recursion |
1956
|
0
|
|
|
|
|
0
|
undef $next; |
1957
|
0
|
|
|
|
|
0
|
$index = 0; |
1958
|
0
|
|
|
|
|
0
|
$tagTablePtr = $subTablePtr; |
1959
|
0
|
|
|
|
|
0
|
$dict = $subDict; |
1960
|
0
|
|
|
|
|
0
|
@tags = @{$$subDict{_tags}}; |
|
0
|
|
|
|
|
0
|
|
1961
|
0
|
|
|
|
|
0
|
$et->VerboseDir($tag, scalar(@tags)); |
1962
|
|
|
|
|
|
|
} else { |
1963
|
0
|
|
|
|
|
0
|
my $oldIndent = $$et{INDENT}; |
1964
|
0
|
|
|
|
|
0
|
my $oldDir = $$et{DIR_NAME}; |
1965
|
0
|
|
|
|
|
0
|
$$et{INDENT} .= '| '; |
1966
|
0
|
|
|
|
|
0
|
$$et{DIR_NAME} = $tag; |
1967
|
0
|
|
|
|
|
0
|
$et->VerboseDir($tag, scalar(@{$$subDict{_tags}})); |
|
0
|
|
|
|
|
0
|
|
1968
|
0
|
|
|
|
|
0
|
ProcessDict($et, $subTablePtr, $subDict, $xref, $nesting); |
1969
|
0
|
|
|
|
|
0
|
$$et{INDENT} = $oldIndent; |
1970
|
0
|
|
|
|
|
0
|
$$et{DIR_NAME} = $oldDir; |
1971
|
|
|
|
|
|
|
} |
1972
|
280
|
|
|
|
|
805
|
$lastFetched = $prevFetched; |
1973
|
|
|
|
|
|
|
} |
1974
|
|
|
|
|
|
|
} else { |
1975
|
|
|
|
|
|
|
# fetch object if necessary |
1976
|
|
|
|
|
|
|
# (OS X 10.6 writes indirect objects in the Info dictionary!) |
1977
|
87
|
50
|
|
|
|
144
|
if (ref $val eq 'SCALAR') { |
1978
|
0
|
|
|
|
|
0
|
my $prevFetched = $lastFetched; |
1979
|
|
|
|
|
|
|
# (note: fetching the same object multiple times is OK here) |
1980
|
0
|
|
|
|
|
0
|
$val = FetchObject($et, $$val, $xref, $tag); |
1981
|
0
|
0
|
|
|
|
0
|
if (defined $val) { |
1982
|
0
|
|
|
|
|
0
|
$val = ReadPDFValue($val); |
1983
|
|
|
|
|
|
|
# set flag to re-encrypt if necessary if rewritten |
1984
|
0
|
0
|
|
|
|
0
|
$$dict{_needCrypt}{$tag} = ($lastFetched ? 0 : 1) if $cryptString; |
|
|
0
|
|
|
|
|
|
1985
|
0
|
|
|
|
|
0
|
$lastFetched = $prevFetched; # restore last fetched object reference |
1986
|
|
|
|
|
|
|
} |
1987
|
|
|
|
|
|
|
} else { |
1988
|
87
|
|
|
|
|
144
|
$val = ReadPDFValue($val); |
1989
|
|
|
|
|
|
|
} |
1990
|
87
|
100
|
|
|
|
231
|
if (ref $val) { |
|
|
50
|
|
|
|
|
|
1991
|
12
|
50
|
|
|
|
31
|
if (ref $val eq 'ARRAY') { |
1992
|
12
|
50
|
|
|
|
34
|
delete $$et{LIST_TAGS}{$tagInfo} if $$tagInfo{List}; |
1993
|
12
|
|
|
|
|
16
|
my $v; |
1994
|
12
|
|
|
|
|
24
|
foreach $v (@$val) { |
1995
|
20
|
|
|
|
|
46
|
$et->FoundTag($tagInfo, $v); |
1996
|
|
|
|
|
|
|
} |
1997
|
|
|
|
|
|
|
} |
1998
|
|
|
|
|
|
|
} elsif (defined $val) { |
1999
|
|
|
|
|
|
|
# convert from UTF-16 (big endian) to UTF-8 or Latin if necessary |
2000
|
|
|
|
|
|
|
# unless this is binary data (hex-encoded strings would not have been converted) |
2001
|
75
|
|
100
|
|
|
313
|
my $format = $$tagInfo{Format} || $$tagInfo{Writable} || 'string'; |
2002
|
75
|
100
|
|
|
|
162
|
$val = ConvertPDFDate($val) if $format eq 'date'; |
2003
|
75
|
50
|
33
|
|
|
289
|
if (not $$tagInfo{Binary} and $val =~ /[\x18-\x1f\x80-\xff]/) { |
2004
|
|
|
|
|
|
|
# text string is already in Unicode if it starts with "\xfe\xff", |
2005
|
|
|
|
|
|
|
# otherwise we must first convert from PDFDocEncoding |
2006
|
0
|
0
|
|
|
|
0
|
$val = $et->Decode($val, ($val=~s/^\xfe\xff// ? 'UCS2' : 'PDFDoc'), 'MM'); |
2007
|
|
|
|
|
|
|
} |
2008
|
75
|
100
|
66
|
|
|
187
|
if ($$tagInfo{List} and not $$et{OPTIONS}{NoPDFList}) { |
2009
|
|
|
|
|
|
|
# separate tokens in comma or whitespace delimited lists |
2010
|
12
|
50
|
|
|
|
70
|
my @values = ($val =~ /,/) ? split /,+\s*/, $val : split ' ', $val; |
2011
|
12
|
|
|
|
|
30
|
foreach $val (@values) { |
2012
|
28
|
|
|
|
|
58
|
$et->FoundTag($tagInfo, $val); |
2013
|
|
|
|
|
|
|
} |
2014
|
|
|
|
|
|
|
} else { |
2015
|
|
|
|
|
|
|
# a simple tag value |
2016
|
63
|
|
|
|
|
157
|
$et->FoundTag($tagInfo, $val); |
2017
|
|
|
|
|
|
|
} |
2018
|
|
|
|
|
|
|
} |
2019
|
|
|
|
|
|
|
} |
2020
|
419
|
50
|
|
|
|
731
|
if ($isSubDoc) { |
2021
|
|
|
|
|
|
|
# restore original document number |
2022
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = $oldDocNum; |
2023
|
0
|
0
|
|
|
|
0
|
--$$et{DOC_COUNT} if $oldNumTags == $$et{NUM_FOUND}; |
2024
|
|
|
|
|
|
|
} |
2025
|
|
|
|
|
|
|
} |
2026
|
|
|
|
|
|
|
# |
2027
|
|
|
|
|
|
|
# extract information from joined streams if necessary |
2028
|
|
|
|
|
|
|
# |
2029
|
|
|
|
|
|
|
|
2030
|
350
|
50
|
|
|
|
573
|
if (%join) { |
2031
|
0
|
|
|
|
|
0
|
my ($tag, $i); |
2032
|
0
|
|
|
|
|
0
|
foreach $tag (sort keys %join) { |
2033
|
0
|
|
|
|
|
0
|
my $list = $join{$tag}; |
2034
|
0
|
0
|
0
|
|
|
0
|
last unless defined $$list[1] and $$list[1] =~ /^%.*?([\x0d\x0a]*)/; |
2035
|
0
|
|
|
|
|
0
|
my $buff = "%!PS-Adobe-3.0$1"; # add PS header with same line break |
2036
|
0
|
|
|
|
|
0
|
for ($i=1; defined $$list[$i]; ++$i) { |
2037
|
0
|
|
|
|
|
0
|
$buff .= $$list[$i]; |
2038
|
0
|
|
|
|
|
0
|
undef $$list[$i]; # free memory |
2039
|
|
|
|
|
|
|
} |
2040
|
|
|
|
|
|
|
# increment document number for tags extracted from embedded EPS |
2041
|
0
|
|
|
|
|
0
|
my $oldDocNum = $$et{DOC_NUM}; |
2042
|
0
|
|
|
|
|
0
|
my $oldNumTags = $$et{NUM_FOUND}; |
2043
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = ++$$et{DOC_COUNT}; |
2044
|
|
|
|
|
|
|
# extract PostScript information |
2045
|
0
|
|
|
|
|
0
|
$et->HandleTag($tagTablePtr, $tag, $buff); |
2046
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = $oldDocNum; |
2047
|
|
|
|
|
|
|
# revert document counter if we didn't add any new tags |
2048
|
0
|
0
|
|
|
|
0
|
--$$et{DOC_COUNT} if $oldNumTags == $$et{NUM_FOUND}; |
2049
|
0
|
|
|
|
|
0
|
delete $$et{DOC_NUM}; |
2050
|
|
|
|
|
|
|
} |
2051
|
|
|
|
|
|
|
} |
2052
|
|
|
|
|
|
|
# |
2053
|
|
|
|
|
|
|
# extract information from stream object if it exists (eg. Metadata stream) |
2054
|
|
|
|
|
|
|
# |
2055
|
350
|
|
|
|
|
414
|
for (;;) { # (cheap goto) |
2056
|
350
|
100
|
|
|
|
914
|
last unless $$dict{_stream}; |
2057
|
43
|
|
|
|
|
84
|
my $tag = '_stream'; |
2058
|
|
|
|
|
|
|
# add Subtype (if it exists) to stream name and remove leading '/' |
2059
|
43
|
100
|
|
|
|
166
|
($tag = $$dict{Subtype} . $tag) =~ s/^\/// if $$dict{Subtype}; |
2060
|
43
|
50
|
|
|
|
107
|
last unless $$tagTablePtr{$tag}; |
2061
|
43
|
50
|
|
|
|
87
|
my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag) or last; |
2062
|
43
|
50
|
|
|
|
111
|
unless ($$tagInfo{SubDirectory}) { |
2063
|
|
|
|
|
|
|
# don't build filter lists across different images |
2064
|
0
|
|
|
|
|
0
|
delete $$et{LIST_TAGS}{$$tagTablePtr{Filter}}; |
2065
|
|
|
|
|
|
|
# we arrive here only when extracting embedded images |
2066
|
|
|
|
|
|
|
# - only extract known image types and ignore others |
2067
|
0
|
|
0
|
|
|
0
|
my $filter = $$dict{Filter} || ''; |
2068
|
0
|
0
|
|
|
|
0
|
$filter = @$filter[-1] if ref $filter eq 'ARRAY'; # (get last Filter type) |
2069
|
0
|
|
|
|
|
0
|
my $result; |
2070
|
0
|
0
|
0
|
|
|
0
|
if ($filter eq '/DCTDecode' or $filter eq '/JPXDecode') { |
2071
|
0
|
0
|
|
|
|
0
|
DecodeStream($et, $dict) or last; |
2072
|
|
|
|
|
|
|
# save the image itself |
2073
|
0
|
|
|
|
|
0
|
$et->FoundTag($tagInfo, \$$dict{_stream}); |
2074
|
|
|
|
|
|
|
# extract information from embedded image |
2075
|
0
|
|
|
|
|
0
|
$result = $et->ExtractInfo(\$$dict{_stream}, { ReEntry => 1 }); |
2076
|
|
|
|
|
|
|
} |
2077
|
0
|
0
|
|
|
|
0
|
unless ($result) { |
2078
|
0
|
0
|
|
|
|
0
|
$et->FoundTag('FileType', defined $result ? '(unknown)' : '(unsupported)'); |
2079
|
|
|
|
|
|
|
} |
2080
|
0
|
|
|
|
|
0
|
last; |
2081
|
|
|
|
|
|
|
} |
2082
|
|
|
|
|
|
|
# decode stream if necessary |
2083
|
43
|
50
|
|
|
|
107
|
DecodeStream($et, $dict) or last; |
2084
|
43
|
50
|
|
|
|
81
|
if ($verbose > 2) { |
2085
|
0
|
|
|
|
|
0
|
$et->VPrint(2,"$$et{INDENT}$$et{DIR_NAME} stream data\n"); |
2086
|
0
|
|
|
|
|
0
|
$et->VerboseDump(\$$dict{_stream}); |
2087
|
|
|
|
|
|
|
} |
2088
|
|
|
|
|
|
|
# extract information from stream |
2089
|
|
|
|
|
|
|
my %dirInfo = ( |
2090
|
|
|
|
|
|
|
DataPt => \$$dict{_stream}, |
2091
|
|
|
|
|
|
|
DataLen => length $$dict{_stream}, |
2092
|
|
|
|
|
|
|
DirStart => 0, |
2093
|
|
|
|
|
|
|
DirLen => length $$dict{_stream}, |
2094
|
43
|
|
|
|
|
182
|
Parent => 'PDF', |
2095
|
|
|
|
|
|
|
); |
2096
|
43
|
|
|
|
|
129
|
my $subTablePtr = GetTagTable($tagInfo->{SubDirectory}->{TagTable}); |
2097
|
43
|
50
|
|
|
|
115
|
unless ($et->ProcessDirectory(\%dirInfo, $subTablePtr)) { |
2098
|
0
|
|
|
|
|
0
|
$et->Warn("Error processing $$tagInfo{Name} information"); |
2099
|
|
|
|
|
|
|
} |
2100
|
43
|
|
|
|
|
159
|
last; |
2101
|
|
|
|
|
|
|
} |
2102
|
|
|
|
|
|
|
} |
2103
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
2105
|
|
|
|
|
|
|
# Extract information from PDF file |
2106
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
2107
|
|
|
|
|
|
|
# Returns: 0 if not a PDF file, 1 on success, otherwise a negative error number |
2108
|
|
|
|
|
|
|
sub ReadPDF($$) |
2109
|
|
|
|
|
|
|
{ |
2110
|
37
|
|
|
37
|
0
|
54
|
my ($et, $dirInfo) = @_; |
2111
|
37
|
|
|
|
|
58
|
my $raf = $$dirInfo{RAF}; |
2112
|
37
|
|
|
|
|
103
|
my $verbose = $et->Options('Verbose'); |
2113
|
37
|
|
|
|
|
54
|
my ($buff, $encrypt, $id); |
2114
|
|
|
|
|
|
|
# |
2115
|
|
|
|
|
|
|
# validate PDF file |
2116
|
|
|
|
|
|
|
# |
2117
|
|
|
|
|
|
|
# (linearization dictionary must be in the first 1024 bytes of the file) |
2118
|
37
|
50
|
|
|
|
108
|
$raf->Read($buff, 1024) >= 8 or return 0; |
2119
|
37
|
50
|
|
|
|
226
|
$buff =~ /^(\s*)%PDF-(\d+\.\d+)/ or return 0; |
2120
|
37
|
50
|
|
|
|
143
|
$$et{PDFBase} = length $1 and $et->Warn('PDF header is not at start of file',1); |
2121
|
37
|
|
|
|
|
66
|
$pdfVer = $2; |
2122
|
37
|
|
|
|
|
152
|
$et->SetFileType(); # set the FileType tag |
2123
|
37
|
50
|
|
|
|
143
|
$et->Warn("The PDF $pdfVer specification is held hostage by the ISO") if $pdfVer >= 2.0; |
2124
|
|
|
|
|
|
|
# store PDFVersion tag |
2125
|
37
|
|
|
|
|
75
|
my $tagTablePtr = GetTagTable('Image::ExifTool::PDF::Root'); |
2126
|
37
|
|
|
|
|
109
|
$et->HandleTag($tagTablePtr, 'Version', $pdfVer); |
2127
|
37
|
|
|
|
|
74
|
$tagTablePtr = GetTagTable('Image::ExifTool::PDF::Main'); |
2128
|
|
|
|
|
|
|
# |
2129
|
|
|
|
|
|
|
# check for a linearized PDF (only if reading) |
2130
|
|
|
|
|
|
|
# |
2131
|
37
|
|
|
|
|
725
|
my $capture = $$et{PDF_CAPTURE}; |
2132
|
37
|
100
|
|
|
|
75
|
unless ($capture) { |
2133
|
18
|
|
|
|
|
27
|
my $lin = 'false'; |
2134
|
18
|
50
|
|
|
|
74
|
if ($buff =~ /<
|
2135
|
18
|
|
|
|
|
61
|
$buff = substr($buff, pos($buff) - 2); |
2136
|
18
|
|
|
|
|
39
|
my $dict = ExtractObject($et, \$buff); |
2137
|
18
|
0
|
33
|
|
|
106
|
if (ref $dict eq 'HASH' and $$dict{Linearized} and $$dict{L}) { |
|
|
|
33
|
|
|
|
|
2138
|
0
|
0
|
|
|
|
0
|
if (not $$et{VALUE}{FileSize}) { |
|
|
0
|
|
|
|
|
|
2139
|
0
|
|
|
|
|
0
|
undef $lin; # can't determine if it is linearized |
2140
|
|
|
|
|
|
|
} elsif ($$dict{L} == $$et{VALUE}{FileSize} - $$et{PDFBase}) { |
2141
|
0
|
|
|
|
|
0
|
$lin = 'true'; |
2142
|
|
|
|
|
|
|
} |
2143
|
|
|
|
|
|
|
} |
2144
|
|
|
|
|
|
|
} |
2145
|
18
|
50
|
|
|
|
69
|
$et->HandleTag($tagTablePtr, '_linearized', $lin) if $lin; |
2146
|
|
|
|
|
|
|
} |
2147
|
|
|
|
|
|
|
# |
2148
|
|
|
|
|
|
|
# read the xref tables referenced from startxref at the end of the file |
2149
|
|
|
|
|
|
|
# |
2150
|
37
|
|
|
|
|
71
|
my @xrefOffsets; |
2151
|
37
|
50
|
|
|
|
94
|
$raf->Seek(0, 2) or return -2; |
2152
|
|
|
|
|
|
|
# the %%EOF must occur within the last 1024 bytes of the file (PDF spec, appendix H) |
2153
|
37
|
|
|
|
|
125
|
my $len = $raf->Tell(); |
2154
|
37
|
50
|
|
|
|
106
|
$len = 1024 if $len > 1024; |
2155
|
37
|
50
|
|
|
|
83
|
$raf->Seek(-$len, 2) or return -2; |
2156
|
37
|
50
|
|
|
|
107
|
$raf->Read($buff, $len) == $len or return -3; |
2157
|
|
|
|
|
|
|
# find the LAST xref table in the file (may be multiple %%EOF marks, |
2158
|
|
|
|
|
|
|
# and comments between "startxref" and "%%EOF") |
2159
|
37
|
50
|
|
|
|
343
|
$buff =~ /^.*startxref(\s+)(\d+)(\s+)(%[^\x0d\x0a]*\s+)*%%EOF/s or return -4; |
2160
|
37
|
|
|
|
|
121
|
my $ws = $1 . $3; |
2161
|
37
|
|
|
|
|
69
|
my $xr = $2; |
2162
|
37
|
|
|
|
|
93
|
push @xrefOffsets, $xr, 'Main'; |
2163
|
|
|
|
|
|
|
# set input record separator |
2164
|
37
|
50
|
|
|
|
261
|
local $/ = $ws =~ /(\x0d\x0a|\x0d|\x0a)/ ? $1 : "\x0a"; |
2165
|
37
|
|
|
|
|
71
|
my (%xref, @mainDicts, %loaded, $mainFree); |
2166
|
37
|
|
|
|
|
75
|
my ($xrefSize, $mainDictSize) = (0, 0); |
2167
|
|
|
|
|
|
|
# initialize variables to capture when rewriting |
2168
|
37
|
100
|
|
|
|
71
|
if ($capture) { |
2169
|
19
|
|
|
|
|
50
|
$capture->{startxref} = $xr; |
2170
|
19
|
|
|
|
|
38
|
$capture->{xref} = \%xref; |
2171
|
19
|
|
|
|
|
43
|
$capture->{newline} = $/; |
2172
|
19
|
|
|
|
|
40
|
$capture->{mainFree} = $mainFree = { }; |
2173
|
|
|
|
|
|
|
} |
2174
|
|
|
|
|
|
|
XRef: |
2175
|
37
|
|
|
|
|
73
|
while (@xrefOffsets) { |
2176
|
70
|
|
|
|
|
111
|
my $offset = shift @xrefOffsets; |
2177
|
70
|
|
|
|
|
95
|
my $type = shift @xrefOffsets; |
2178
|
70
|
50
|
|
|
|
147
|
next if $loaded{$offset}; # avoid infinite recursion |
2179
|
70
|
50
|
|
|
|
262
|
unless ($raf->Seek($offset+$$et{PDFBase}, 0)) { |
2180
|
0
|
0
|
|
|
|
0
|
%loaded or return -5; |
2181
|
0
|
|
|
|
|
0
|
$et->Warn('Bad offset for secondary xref table'); |
2182
|
0
|
|
|
|
|
0
|
next; |
2183
|
|
|
|
|
|
|
} |
2184
|
|
|
|
|
|
|
# Note: care must be taken because ReadLine may read more than we want if |
2185
|
|
|
|
|
|
|
# the newline sequence for this table is different than the rest of the file |
2186
|
70
|
|
|
|
|
120
|
for (;;) { |
2187
|
70
|
50
|
|
|
|
185
|
unless ($raf->ReadLine($buff)) { |
2188
|
0
|
0
|
|
|
|
0
|
%loaded or return -6; |
2189
|
0
|
|
|
|
|
0
|
$et->Warn('Bad offset for secondary xref table'); |
2190
|
0
|
|
|
|
|
0
|
next XRef; |
2191
|
|
|
|
|
|
|
} |
2192
|
70
|
50
|
|
|
|
273
|
last if $buff =~/\S/; # skip blank lines |
2193
|
|
|
|
|
|
|
} |
2194
|
70
|
|
|
|
|
102
|
my $loadXRefStream; |
2195
|
70
|
50
|
|
|
|
369
|
if ($buff =~ s/^\s*xref\s+//s) { |
|
|
0
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
# load xref table |
2197
|
70
|
|
|
|
|
100
|
for (;;) { |
2198
|
|
|
|
|
|
|
# read another line if necessary (skipping blank lines) |
2199
|
177
|
|
50
|
|
|
491
|
$raf->ReadLine($buff) or return -6 until $buff =~ /\S/; |
2200
|
177
|
100
|
|
|
|
560
|
last if $buff =~ s/^\s*trailer([\s<[(])/$1/s; |
2201
|
107
|
50
|
|
|
|
379
|
$buff =~ s/^\s*(\d+)\s+(\d+)\s+//s or return -4; |
2202
|
107
|
|
|
|
|
285
|
my ($start, $num) = ($1, $2); |
2203
|
107
|
50
|
|
|
|
253
|
$raf->Seek(-length($buff), 1) or return -4; |
2204
|
107
|
|
|
|
|
171
|
my $i; |
2205
|
107
|
|
|
|
|
243
|
for ($i=0; $i<$num; ++$i) { |
2206
|
622
|
50
|
|
|
|
1191
|
$raf->Read($buff, 20) == 20 or return -6; |
2207
|
622
|
50
|
|
|
|
1960
|
$buff =~ /^\s*(\d{10}) (\d{5}) (f|n)/s or return -4; |
2208
|
622
|
|
|
|
|
822
|
my $num = $start + $i; |
2209
|
622
|
100
|
|
|
|
917
|
$xrefSize = $num if $num > $xrefSize; |
2210
|
|
|
|
|
|
|
# locate object to generate entry from stream if necessary |
2211
|
|
|
|
|
|
|
# (must do this before we test $xref{$num}) |
2212
|
622
|
50
|
|
|
|
899
|
LocateAnyObject(\%xref, $num) if $xref{dicts}; |
2213
|
|
|
|
|
|
|
# save offset for newest copy of all objects |
2214
|
|
|
|
|
|
|
# (or next object number for free objects) |
2215
|
622
|
100
|
|
|
|
1040
|
unless (defined $xref{$num}) { |
2216
|
526
|
|
|
|
|
1187
|
my ($offset, $gen) = (int($1), int($2)); |
2217
|
526
|
|
|
|
|
808
|
$xref{$num} = $offset; |
2218
|
526
|
100
|
|
|
|
910
|
if ($3 eq 'f') { |
2219
|
|
|
|
|
|
|
# save free objects in last xref table for rewriting |
2220
|
52
|
100
|
|
|
|
124
|
$$mainFree{$num} = [ $offset, $gen, 'f' ] if $mainFree; |
2221
|
52
|
|
|
|
|
115
|
next; |
2222
|
|
|
|
|
|
|
} |
2223
|
|
|
|
|
|
|
# also save offset keyed by object reference string |
2224
|
474
|
|
|
|
|
1262
|
$xref{"$num $gen R"} = $offset; |
2225
|
|
|
|
|
|
|
} |
2226
|
|
|
|
|
|
|
} |
2227
|
|
|
|
|
|
|
# (I have a sample from Adobe which has an empty xref table) |
2228
|
|
|
|
|
|
|
# %xref or return -4; # xref table may not be empty |
2229
|
107
|
|
|
|
|
214
|
$buff = ''; |
2230
|
|
|
|
|
|
|
} |
2231
|
70
|
|
|
|
|
127
|
undef $mainFree; # only do this for the last xref table |
2232
|
|
|
|
|
|
|
} elsif ($buff =~ s/^\s*(\d+)\s+(\d+)\s+obj//s) { |
2233
|
|
|
|
|
|
|
# this is a PDF-1.5 cross-reference stream dictionary |
2234
|
0
|
|
|
|
|
0
|
$loadXRefStream = 1; |
2235
|
|
|
|
|
|
|
} else { |
2236
|
0
|
0
|
|
|
|
0
|
%loaded or return -4; |
2237
|
0
|
|
|
|
|
0
|
$et->Warn('Invalid secondary xref table'); |
2238
|
0
|
|
|
|
|
0
|
next; |
2239
|
|
|
|
|
|
|
} |
2240
|
70
|
|
|
|
|
181
|
my $mainDict = ExtractObject($et, \$buff, $raf, \%xref); |
2241
|
70
|
50
|
|
|
|
172
|
unless (ref $mainDict eq 'HASH') { |
2242
|
0
|
0
|
|
|
|
0
|
%loaded or return -8; |
2243
|
0
|
|
|
|
|
0
|
$et->Warn('Error loading secondary dictionary'); |
2244
|
0
|
|
|
|
|
0
|
next; |
2245
|
|
|
|
|
|
|
} |
2246
|
|
|
|
|
|
|
# keep track of total trailer dictionary Size |
2247
|
70
|
100
|
66
|
|
|
284
|
$mainDictSize = $$mainDict{Size} if $$mainDict{Size} and $$mainDict{Size} > $mainDictSize; |
2248
|
70
|
50
|
|
|
|
124
|
if ($loadXRefStream) { |
2249
|
|
|
|
|
|
|
# decode and save our XRef stream from PDF-1.5 file |
2250
|
|
|
|
|
|
|
# (but parse it later as required to save time) |
2251
|
|
|
|
|
|
|
# Note: this technique can potentially result in an old object |
2252
|
|
|
|
|
|
|
# being used if the file was incrementally updated and an older |
2253
|
|
|
|
|
|
|
# object from an xref table was replaced by a newer object in an |
2254
|
|
|
|
|
|
|
# xref stream. But doing so isn't a good idea (if allowed at all) |
2255
|
|
|
|
|
|
|
# because a PDF 1.4 consumer would also make this same mistake. |
2256
|
0
|
0
|
0
|
|
|
0
|
if ($$mainDict{Type} eq '/XRef' and $$mainDict{W} and |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
2257
|
0
|
|
|
|
|
0
|
@{$$mainDict{W}} > 2 and $$mainDict{Size} and |
2258
|
|
|
|
|
|
|
DecodeStream($et, $mainDict)) |
2259
|
|
|
|
|
|
|
{ |
2260
|
|
|
|
|
|
|
# create Index entry if it doesn't exist |
2261
|
0
|
0
|
|
|
|
0
|
$$mainDict{Index} or $$mainDict{Index} = [ 0, $$mainDict{Size} ]; |
2262
|
|
|
|
|
|
|
# create '_entry_size' entry for internal use |
2263
|
0
|
|
|
|
|
0
|
my $w = $$mainDict{W}; |
2264
|
0
|
|
|
|
|
0
|
my $size = 0; |
2265
|
0
|
|
|
|
|
0
|
foreach (@$w) { $size += $_; } |
|
0
|
|
|
|
|
0
|
|
2266
|
0
|
|
|
|
|
0
|
$$mainDict{_entry_size} = $size; |
2267
|
|
|
|
|
|
|
# save this stream dictionary to use later if required |
2268
|
0
|
0
|
|
|
|
0
|
$xref{dicts} = [] unless $xref{dicts}; |
2269
|
0
|
|
|
|
|
0
|
push @{$xref{dicts}}, $mainDict; |
|
0
|
|
|
|
|
0
|
|
2270
|
|
|
|
|
|
|
} else { |
2271
|
0
|
0
|
|
|
|
0
|
%loaded or return -9; |
2272
|
0
|
|
|
|
|
0
|
$et->Warn('Invalid xref stream in secondary dictionary'); |
2273
|
|
|
|
|
|
|
} |
2274
|
|
|
|
|
|
|
} |
2275
|
70
|
|
|
|
|
136
|
$loaded{$offset} = 1; |
2276
|
|
|
|
|
|
|
# load XRef stream in hybrid file if it exists |
2277
|
70
|
50
|
|
|
|
146
|
push @xrefOffsets, $$mainDict{XRefStm}, 'XRefStm' if $$mainDict{XRefStm}; |
2278
|
70
|
50
|
|
|
|
114
|
$encrypt = $$mainDict{Encrypt} if $$mainDict{Encrypt}; |
2279
|
70
|
50
|
33
|
|
|
128
|
undef $encrypt if $encrypt and $encrypt eq 'null'; # (have seen "null") |
2280
|
70
|
100
|
66
|
|
|
179
|
if ($$mainDict{ID} and ref $$mainDict{ID} eq 'ARRAY') { |
2281
|
29
|
|
|
|
|
62
|
$id = ReadPDFValue($mainDict->{ID}->[0]); |
2282
|
|
|
|
|
|
|
} |
2283
|
70
|
|
|
|
|
143
|
push @mainDicts, $mainDict, $type; |
2284
|
|
|
|
|
|
|
# load previous xref table if it exists |
2285
|
70
|
100
|
|
|
|
222
|
push @xrefOffsets, $$mainDict{Prev}, 'Prev' if $$mainDict{Prev}; |
2286
|
|
|
|
|
|
|
} |
2287
|
37
|
50
|
|
|
|
75
|
if ($xrefSize > $mainDictSize) { |
2288
|
0
|
|
|
|
|
0
|
my $str = "Objects in xref table ($xrefSize) exceed trailer dictionary Size ($mainDictSize)"; |
2289
|
0
|
0
|
|
|
|
0
|
$capture ? $et->Error($str) : $et->Warn($str); |
2290
|
|
|
|
|
|
|
} |
2291
|
|
|
|
|
|
|
# |
2292
|
|
|
|
|
|
|
# extract encryption information if necessary |
2293
|
|
|
|
|
|
|
# |
2294
|
37
|
50
|
|
|
|
68
|
if ($encrypt) { |
2295
|
0
|
0
|
|
|
|
0
|
if (ref $encrypt eq 'SCALAR') { |
2296
|
0
|
|
|
|
|
0
|
$encrypt = FetchObject($et, $$encrypt, \%xref, 'Encrypt'); |
2297
|
|
|
|
|
|
|
} |
2298
|
|
|
|
|
|
|
# generate Encryption tag information |
2299
|
0
|
|
|
|
|
0
|
my $err = DecryptInit($et, $encrypt, $id); |
2300
|
0
|
0
|
|
|
|
0
|
if ($err) { |
2301
|
0
|
|
|
|
|
0
|
$et->Warn($err); |
2302
|
0
|
0
|
|
|
|
0
|
$$capture{Error} = $err if $capture; |
2303
|
0
|
|
|
|
|
0
|
return -1; |
2304
|
|
|
|
|
|
|
} |
2305
|
|
|
|
|
|
|
} |
2306
|
|
|
|
|
|
|
# |
2307
|
|
|
|
|
|
|
# extract the information beginning with each of the main dictionaries |
2308
|
|
|
|
|
|
|
# |
2309
|
37
|
|
|
|
|
79
|
my $i = 0; |
2310
|
37
|
|
|
|
|
75
|
my $num = (scalar @mainDicts) / 2; |
2311
|
37
|
|
|
|
|
65
|
while (@mainDicts) { |
2312
|
70
|
|
|
|
|
94
|
my $dict = shift @mainDicts; |
2313
|
70
|
|
|
|
|
94
|
my $type = shift @mainDicts; |
2314
|
70
|
50
|
|
|
|
149
|
if ($verbose) { |
2315
|
0
|
|
|
|
|
0
|
++$i; |
2316
|
0
|
|
|
|
|
0
|
my $n = scalar(@{$$dict{_tags}}); |
|
0
|
|
|
|
|
0
|
|
2317
|
0
|
|
|
|
|
0
|
$et->VPrint(0, "PDF dictionary ($i of $num) with $n entries:\n"); |
2318
|
|
|
|
|
|
|
} |
2319
|
70
|
|
|
|
|
140
|
ProcessDict($et, $tagTablePtr, $dict, \%xref, 0, $type); |
2320
|
|
|
|
|
|
|
} |
2321
|
|
|
|
|
|
|
# handle any decryption errors |
2322
|
37
|
50
|
|
|
|
83
|
if ($encrypt) { |
2323
|
0
|
|
|
|
|
0
|
my $err = $$encrypt{_error}; |
2324
|
0
|
0
|
|
|
|
0
|
if ($err) { |
2325
|
0
|
|
|
|
|
0
|
$et->Warn($err); |
2326
|
0
|
0
|
|
|
|
0
|
$$capture{Error} = $err if $capture; |
2327
|
0
|
|
|
|
|
0
|
return -1; |
2328
|
|
|
|
|
|
|
} |
2329
|
|
|
|
|
|
|
} |
2330
|
37
|
|
|
|
|
229
|
return 1; |
2331
|
|
|
|
|
|
|
} |
2332
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
2334
|
|
|
|
|
|
|
# ReadPDF() warning strings for each error return value |
2335
|
|
|
|
|
|
|
my %pdfWarning = ( |
2336
|
|
|
|
|
|
|
# -1 is reserved as error return value with no associated warning |
2337
|
|
|
|
|
|
|
-2 => 'Error seeking in file', |
2338
|
|
|
|
|
|
|
-3 => 'Error reading file', |
2339
|
|
|
|
|
|
|
-4 => 'Invalid xref table', |
2340
|
|
|
|
|
|
|
-5 => 'Invalid xref offset', |
2341
|
|
|
|
|
|
|
-6 => 'Error reading xref table', |
2342
|
|
|
|
|
|
|
-7 => 'Error reading trailer', |
2343
|
|
|
|
|
|
|
-8 => 'Error reading main dictionary', |
2344
|
|
|
|
|
|
|
-9 => 'Invalid xref stream in main dictionary', |
2345
|
|
|
|
|
|
|
); |
2346
|
|
|
|
|
|
|
|
2347
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
2348
|
|
|
|
|
|
|
# Extract information from PDF file |
2349
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
2350
|
|
|
|
|
|
|
# Returns: 1 if this was a valid PDF file |
2351
|
|
|
|
|
|
|
sub ProcessPDF($$) |
2352
|
|
|
|
|
|
|
{ |
2353
|
37
|
|
|
37
|
0
|
70
|
my ($et, $dirInfo) = @_; |
2354
|
|
|
|
|
|
|
|
2355
|
37
|
|
|
|
|
57
|
undef $cryptInfo; # (must not delete after returning so writer can use it) |
2356
|
37
|
|
|
|
|
53
|
undef $cryptStream; |
2357
|
37
|
|
|
|
|
45
|
undef $cryptString; |
2358
|
37
|
|
|
|
|
80
|
my $result = ReadPDF($et, $dirInfo); |
2359
|
37
|
50
|
|
|
|
89
|
if ($result < 0) { |
2360
|
0
|
0
|
|
|
|
0
|
$et->Warn($pdfWarning{$result}) if $pdfWarning{$result}; |
2361
|
0
|
|
|
|
|
0
|
$result = 1; |
2362
|
|
|
|
|
|
|
} |
2363
|
|
|
|
|
|
|
# clean up and return |
2364
|
37
|
|
|
|
|
59
|
undef %streamObjs; |
2365
|
37
|
|
|
|
|
66
|
undef %fetched; |
2366
|
37
|
|
|
|
|
80
|
return $result; |
2367
|
|
|
|
|
|
|
} |
2368
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
1; # end |
2370
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
__END__ |