| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
2
|
|
|
|
|
|
|
# File: PDF.pm |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Description: Read PDF meta information |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Revisions: 07/11/2005 - P. Harvey Created |
|
7
|
|
|
|
|
|
|
# 07/25/2005 - P. Harvey Add support for encrypted documents |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# References: 1) http://www.adobe.com/devnet/pdf/pdf_reference.html |
|
10
|
|
|
|
|
|
|
# 2) http://search.cpan.org/dist/Crypt-RC4/ |
|
11
|
|
|
|
|
|
|
# 3) http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf |
|
12
|
|
|
|
|
|
|
# 4) http://www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000.pdf |
|
13
|
|
|
|
|
|
|
# 5) http://tools.ietf.org/search/rfc3454 |
|
14
|
|
|
|
|
|
|
# 6) http://www.armware.dk/RFC/rfc/rfc4013.html |
|
15
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
package Image::ExifTool::PDF; |
|
18
|
|
|
|
|
|
|
|
|
19
|
25
|
|
|
25
|
|
3999
|
use strict; |
|
|
25
|
|
|
|
|
56
|
|
|
|
25
|
|
|
|
|
832
|
|
|
20
|
25
|
|
|
25
|
|
121
|
use vars qw($VERSION $AUTOLOAD $lastFetched); |
|
|
25
|
|
|
|
|
58
|
|
|
|
25
|
|
|
|
|
1293
|
|
|
21
|
25
|
|
|
25
|
|
135
|
use Image::ExifTool qw(:DataAccess :Utils); |
|
|
25
|
|
|
|
|
56
|
|
|
|
25
|
|
|
|
|
282217
|
|
|
22
|
|
|
|
|
|
|
require Exporter; |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$VERSION = '1.55'; |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub FetchObject($$$$); |
|
27
|
|
|
|
|
|
|
sub ExtractObject($$;$$); |
|
28
|
|
|
|
|
|
|
sub ReadToNested($;$); |
|
29
|
|
|
|
|
|
|
sub ProcessDict($$$$;$$); |
|
30
|
|
|
|
|
|
|
sub ProcessAcroForm($$$$;$$); |
|
31
|
|
|
|
|
|
|
sub ExpandArray($); |
|
32
|
|
|
|
|
|
|
sub ReadPDFValue($); |
|
33
|
|
|
|
|
|
|
sub CheckPDF($$$); |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# $lastFetched - last fetched object reference (used for decryption) |
|
36
|
|
|
|
|
|
|
# (undefined if fetched object was already decrypted, eg. object from stream) |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
my $cryptInfo; # encryption object reference (plus additional information) |
|
39
|
|
|
|
|
|
|
my $cryptString; # flag that strings are encrypted |
|
40
|
|
|
|
|
|
|
my $cryptStream; # flag that streams are encrypted |
|
41
|
|
|
|
|
|
|
my $lastOffset; # last fetched object offset |
|
42
|
|
|
|
|
|
|
my %streamObjs; # hash of stream objects |
|
43
|
|
|
|
|
|
|
my %fetched; # dicts fetched in verbose mode (to avoid cyclical recursion) |
|
44
|
|
|
|
|
|
|
my $pdfVer; # version of PDF file being processed |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# filters supported in DecodeStream() |
|
47
|
|
|
|
|
|
|
my %supportedFilter = ( |
|
48
|
|
|
|
|
|
|
'/FlateDecode' => 1, |
|
49
|
|
|
|
|
|
|
'/Crypt' => 1, |
|
50
|
|
|
|
|
|
|
'/Identity' => 1, # (not filtered) |
|
51
|
|
|
|
|
|
|
'/DCTDecode' => 1, # (JPEG image - not filtered) |
|
52
|
|
|
|
|
|
|
'/JPXDecode' => 1, # (Jpeg2000 image - not filtered) |
|
53
|
|
|
|
|
|
|
'/LZWDecode' => 1, # (usually a bitmapped image) |
|
54
|
|
|
|
|
|
|
'/ASCIIHexDecode' => 1, |
|
55
|
|
|
|
|
|
|
'/ASCII85Decode' => 1, |
|
56
|
|
|
|
|
|
|
# other standard filters that we currently don't support |
|
57
|
|
|
|
|
|
|
#'/JBIG2Decode' => 0, # (JBIG2 image format not supported) |
|
58
|
|
|
|
|
|
|
#'/CCITTFaxDecode' => 0, |
|
59
|
|
|
|
|
|
|
#'/RunLengthDecode' => 0, |
|
60
|
|
|
|
|
|
|
); |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# tags in main PDF directories |
|
63
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Main = ( |
|
64
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
65
|
|
|
|
|
|
|
VARS => { CAPTURE => ['Main','Prev'] }, |
|
66
|
|
|
|
|
|
|
Info => { |
|
67
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Info' }, |
|
68
|
|
|
|
|
|
|
# Adobe Acrobat 10.1.5 will create a duplicate Info dictionary with |
|
69
|
|
|
|
|
|
|
# a different object number when metadata is edited. This flag |
|
70
|
|
|
|
|
|
|
# is part of a patch to ignore this duplicate information (unless |
|
71
|
|
|
|
|
|
|
# the IgnoreMinorErrors option is used) |
|
72
|
|
|
|
|
|
|
IgnoreDuplicates => 1, |
|
73
|
|
|
|
|
|
|
}, |
|
74
|
|
|
|
|
|
|
Root => { |
|
75
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Root' }, |
|
76
|
|
|
|
|
|
|
}, |
|
77
|
|
|
|
|
|
|
Encrypt => { |
|
78
|
|
|
|
|
|
|
NoProcess => 1, # don't process normally (processed in advance) |
|
79
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Encrypt' }, |
|
80
|
|
|
|
|
|
|
}, |
|
81
|
|
|
|
|
|
|
_linearized => { |
|
82
|
|
|
|
|
|
|
Name => 'Linearized', |
|
83
|
|
|
|
|
|
|
Notes => 'flag set if document is linearized for fast web display; not a real Tag ID', |
|
84
|
|
|
|
|
|
|
PrintConv => { 'true' => 'Yes', 'false' => 'No' }, |
|
85
|
|
|
|
|
|
|
}, |
|
86
|
|
|
|
|
|
|
); |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# tags in PDF Info dictionary |
|
89
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Info = ( |
|
90
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
91
|
|
|
|
|
|
|
VARS => { CAPTURE => ['Info'] }, |
|
92
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 1, # extract all unknown tags in this directory |
|
93
|
|
|
|
|
|
|
WRITE_PROC => \&Image::ExifTool::DummyWriteProc, |
|
94
|
|
|
|
|
|
|
CHECK_PROC => \&CheckPDF, |
|
95
|
|
|
|
|
|
|
WRITABLE => 'string', |
|
96
|
|
|
|
|
|
|
# set PRIORITY to 0 so most recent Info dictionary takes precedence |
|
97
|
|
|
|
|
|
|
# (Acrobat Pro bug? doesn't use same object/generation number for |
|
98
|
|
|
|
|
|
|
# new Info dictionary when doing incremental update) |
|
99
|
|
|
|
|
|
|
PRIORITY => 0, |
|
100
|
|
|
|
|
|
|
NOTES => q{ |
|
101
|
|
|
|
|
|
|
As well as the tags listed below, the PDF specification allows for |
|
102
|
|
|
|
|
|
|
user-defined tags to exist in the Info dictionary. These tags, which should |
|
103
|
|
|
|
|
|
|
have corresponding XMP-pdfx entries in the XMP of the PDF XML Metadata |
|
104
|
|
|
|
|
|
|
object, are also extracted by ExifTool. |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
B specifies the value format, and may be C, C, |
|
107
|
|
|
|
|
|
|
C, C, C or C for PDF tags. |
|
108
|
|
|
|
|
|
|
}, |
|
109
|
|
|
|
|
|
|
Title => { }, |
|
110
|
|
|
|
|
|
|
Author => { Groups => { 2 => 'Author' } }, |
|
111
|
|
|
|
|
|
|
Subject => { }, |
|
112
|
|
|
|
|
|
|
Keywords => { List => 'string' }, # this is a string list |
|
113
|
|
|
|
|
|
|
Creator => { }, |
|
114
|
|
|
|
|
|
|
Producer => { }, |
|
115
|
|
|
|
|
|
|
CreationDate => { |
|
116
|
|
|
|
|
|
|
Name => 'CreateDate', |
|
117
|
|
|
|
|
|
|
Writable => 'date', |
|
118
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
|
119
|
|
|
|
|
|
|
Shift => 'Time', |
|
120
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
|
121
|
|
|
|
|
|
|
PrintConvInv => '$self->InverseDateTime($val)', |
|
122
|
|
|
|
|
|
|
}, |
|
123
|
|
|
|
|
|
|
ModDate => { |
|
124
|
|
|
|
|
|
|
Name => 'ModifyDate', |
|
125
|
|
|
|
|
|
|
Writable => 'date', |
|
126
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
|
127
|
|
|
|
|
|
|
Shift => 'Time', |
|
128
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
|
129
|
|
|
|
|
|
|
PrintConvInv => '$self->InverseDateTime($val)', |
|
130
|
|
|
|
|
|
|
}, |
|
131
|
|
|
|
|
|
|
Trapped => { |
|
132
|
|
|
|
|
|
|
Protected => 1, |
|
133
|
|
|
|
|
|
|
# remove leading '/' from '/True' or '/False' |
|
134
|
|
|
|
|
|
|
ValueConv => '$val=~s{^/}{}; $val', |
|
135
|
|
|
|
|
|
|
ValueConvInv => '"/$val"', |
|
136
|
|
|
|
|
|
|
}, |
|
137
|
|
|
|
|
|
|
'AAPL:Keywords' => { #PH |
|
138
|
|
|
|
|
|
|
Name => 'AppleKeywords', |
|
139
|
|
|
|
|
|
|
List => 'array', # this is an array of values |
|
140
|
|
|
|
|
|
|
Notes => q{ |
|
141
|
|
|
|
|
|
|
keywords written by Apple utilities, although they seem to use PDF:Keywords |
|
142
|
|
|
|
|
|
|
when reading |
|
143
|
|
|
|
|
|
|
}, |
|
144
|
|
|
|
|
|
|
}, |
|
145
|
|
|
|
|
|
|
); |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# tags in the PDF Root document catalog |
|
148
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Root = ( |
|
149
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
150
|
|
|
|
|
|
|
# note: can't capture previous versions of Root since they are not parsed |
|
151
|
|
|
|
|
|
|
VARS => { CAPTURE => ['Root'] }, |
|
152
|
|
|
|
|
|
|
NOTES => 'This is the PDF document catalog.', |
|
153
|
|
|
|
|
|
|
MarkInfo => { |
|
154
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::MarkInfo' }, |
|
155
|
|
|
|
|
|
|
}, |
|
156
|
|
|
|
|
|
|
Metadata => { |
|
157
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, |
|
158
|
|
|
|
|
|
|
}, |
|
159
|
|
|
|
|
|
|
Pages => { |
|
160
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Pages' }, |
|
161
|
|
|
|
|
|
|
}, |
|
162
|
|
|
|
|
|
|
Perms => { |
|
163
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Perms' }, |
|
164
|
|
|
|
|
|
|
}, |
|
165
|
|
|
|
|
|
|
AcroForm => { |
|
166
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AcroForm' }, |
|
167
|
|
|
|
|
|
|
}, |
|
168
|
|
|
|
|
|
|
Lang => 'Language', |
|
169
|
|
|
|
|
|
|
PageLayout => { }, |
|
170
|
|
|
|
|
|
|
PageMode => { }, |
|
171
|
|
|
|
|
|
|
Version => 'PDFVersion', |
|
172
|
|
|
|
|
|
|
); |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# tags extracted from the PDF Encrypt dictionary |
|
175
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Encrypt = ( |
|
176
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
177
|
|
|
|
|
|
|
NOTES => 'Tags extracted from the document Encrypt dictionary.', |
|
178
|
|
|
|
|
|
|
Filter => { |
|
179
|
|
|
|
|
|
|
Name => 'Encryption', |
|
180
|
|
|
|
|
|
|
Notes => q{ |
|
181
|
|
|
|
|
|
|
extracted value is actually a combination of the Filter, SubFilter, V, R and |
|
182
|
|
|
|
|
|
|
Length information from the Encrypt dictionary |
|
183
|
|
|
|
|
|
|
}, |
|
184
|
|
|
|
|
|
|
}, |
|
185
|
|
|
|
|
|
|
P => { |
|
186
|
|
|
|
|
|
|
Name => 'UserAccess', |
|
187
|
|
|
|
|
|
|
ValueConv => '$val & 0x0f3c', # ignore reserved bits |
|
188
|
|
|
|
|
|
|
PrintConvColumns => 2, |
|
189
|
|
|
|
|
|
|
PrintConv => { BITMASK => { |
|
190
|
|
|
|
|
|
|
2 => 'Print', |
|
191
|
|
|
|
|
|
|
3 => 'Modify', |
|
192
|
|
|
|
|
|
|
4 => 'Copy', |
|
193
|
|
|
|
|
|
|
5 => 'Annotate', |
|
194
|
|
|
|
|
|
|
8 => 'Fill forms', |
|
195
|
|
|
|
|
|
|
9 => 'Extract', |
|
196
|
|
|
|
|
|
|
10 => 'Assemble', |
|
197
|
|
|
|
|
|
|
11 => 'Print high-res', |
|
198
|
|
|
|
|
|
|
}}, |
|
199
|
|
|
|
|
|
|
}, |
|
200
|
|
|
|
|
|
|
); |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# tags in PDF Pages dictionary |
|
203
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Pages = ( |
|
204
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
205
|
|
|
|
|
|
|
Count => 'PageCount', |
|
206
|
|
|
|
|
|
|
Kids => { |
|
207
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Kids' }, |
|
208
|
|
|
|
|
|
|
}, |
|
209
|
|
|
|
|
|
|
); |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# tags in PDF Perms dictionary |
|
212
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Perms = ( |
|
213
|
|
|
|
|
|
|
NOTES => 'Additional document permissions imposed by digital signatures.', |
|
214
|
|
|
|
|
|
|
DocMDP => { |
|
215
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, |
|
216
|
|
|
|
|
|
|
}, |
|
217
|
|
|
|
|
|
|
FieldMDP => { |
|
218
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, |
|
219
|
|
|
|
|
|
|
}, |
|
220
|
|
|
|
|
|
|
UR3 => { |
|
221
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, |
|
222
|
|
|
|
|
|
|
}, |
|
223
|
|
|
|
|
|
|
); |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# tags in PDF Perms dictionary |
|
226
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AcroForm = ( |
|
227
|
|
|
|
|
|
|
PROCESS_PROC => \&ProcessAcroForm, |
|
228
|
|
|
|
|
|
|
_has_xfa => { |
|
229
|
|
|
|
|
|
|
Name => 'HasXFA', |
|
230
|
|
|
|
|
|
|
Notes => q{ |
|
231
|
|
|
|
|
|
|
this tag is defined if a document contains form fields, and is true if it |
|
232
|
|
|
|
|
|
|
uses XML Forms Architecture; not a real Tag ID |
|
233
|
|
|
|
|
|
|
}, |
|
234
|
|
|
|
|
|
|
PrintConv => { 'true' => 'Yes', 'false' => 'No' }, |
|
235
|
|
|
|
|
|
|
}, |
|
236
|
|
|
|
|
|
|
); |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# tags in PDF Kids dictionary |
|
239
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Kids = ( |
|
240
|
|
|
|
|
|
|
Metadata => { |
|
241
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, |
|
242
|
|
|
|
|
|
|
}, |
|
243
|
|
|
|
|
|
|
PieceInfo => { |
|
244
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::PieceInfo' }, |
|
245
|
|
|
|
|
|
|
}, |
|
246
|
|
|
|
|
|
|
Resources => { |
|
247
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Resources' }, |
|
248
|
|
|
|
|
|
|
}, |
|
249
|
|
|
|
|
|
|
Kids => { |
|
250
|
|
|
|
|
|
|
Condition => '$self->Options("ExtractEmbedded")', |
|
251
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Kids' }, |
|
252
|
|
|
|
|
|
|
}, |
|
253
|
|
|
|
|
|
|
); |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# tags in PDF Resources dictionary |
|
256
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Resources = ( |
|
257
|
|
|
|
|
|
|
ColorSpace => { |
|
258
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::ColorSpace' }, |
|
259
|
|
|
|
|
|
|
}, |
|
260
|
|
|
|
|
|
|
XObject => { |
|
261
|
|
|
|
|
|
|
Condition => '$self->Options("ExtractEmbedded")', |
|
262
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::XObject' }, |
|
263
|
|
|
|
|
|
|
}, |
|
264
|
|
|
|
|
|
|
Properties => { |
|
265
|
|
|
|
|
|
|
Condition => '$self->Options("ExtractEmbedded")', |
|
266
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Properties' }, |
|
267
|
|
|
|
|
|
|
}, |
|
268
|
|
|
|
|
|
|
); |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# tags in PDF ColorSpace dictionary |
|
271
|
|
|
|
|
|
|
%Image::ExifTool::PDF::ColorSpace = ( |
|
272
|
|
|
|
|
|
|
DefaultRGB => { |
|
273
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
|
274
|
|
|
|
|
|
|
ConvertToDict => 1, # (not seen yet, but just in case) |
|
275
|
|
|
|
|
|
|
}, |
|
276
|
|
|
|
|
|
|
DefaultCMYK => { |
|
277
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
|
278
|
|
|
|
|
|
|
# hack: this is stored as an array instead of a dictionary in my |
|
279
|
|
|
|
|
|
|
# sample, so convert to a dictionary to extract the ICCBased element |
|
280
|
|
|
|
|
|
|
ConvertToDict => 1, |
|
281
|
|
|
|
|
|
|
}, |
|
282
|
|
|
|
|
|
|
Cs1 => { |
|
283
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
|
284
|
|
|
|
|
|
|
ConvertToDict => 1, # (just in case) |
|
285
|
|
|
|
|
|
|
}, |
|
286
|
|
|
|
|
|
|
CS0 => { |
|
287
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, |
|
288
|
|
|
|
|
|
|
ConvertToDict => 1, # (just in case) |
|
289
|
|
|
|
|
|
|
}, |
|
290
|
|
|
|
|
|
|
); |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# tags in PDF DefaultRGB dictionary |
|
293
|
|
|
|
|
|
|
%Image::ExifTool::PDF::DefaultRGB = ( |
|
294
|
|
|
|
|
|
|
ICCBased => { |
|
295
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::ICCBased' }, |
|
296
|
|
|
|
|
|
|
}, |
|
297
|
|
|
|
|
|
|
); |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# tags in PDF ICCBased, Cs1 and CS0 dictionaries |
|
300
|
|
|
|
|
|
|
%Image::ExifTool::PDF::ICCBased = ( |
|
301
|
|
|
|
|
|
|
_stream => { |
|
302
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::ICC_Profile::Main' }, |
|
303
|
|
|
|
|
|
|
}, |
|
304
|
|
|
|
|
|
|
); |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# tags in PDF XObject dictionary (parsed only if ExtractEmbedded is enabled) |
|
307
|
|
|
|
|
|
|
%Image::ExifTool::PDF::XObject = ( |
|
308
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 0, # extract known but numbered tags (Im1, Im2, etc) |
|
309
|
|
|
|
|
|
|
Im => { |
|
310
|
|
|
|
|
|
|
Notes => q{ |
|
311
|
|
|
|
|
|
|
the L option enables information to be extracted from these |
|
312
|
|
|
|
|
|
|
embedded images |
|
313
|
|
|
|
|
|
|
}, |
|
314
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Im' }, |
|
315
|
|
|
|
|
|
|
}, |
|
316
|
|
|
|
|
|
|
); |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# tags in PDF Im# dictionary |
|
319
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Im = ( |
|
320
|
|
|
|
|
|
|
NOTES => q{ |
|
321
|
|
|
|
|
|
|
Information extracted from embedded images with the L option. |
|
322
|
|
|
|
|
|
|
The EmbeddedImage and its metadata are extracted only for JPEG and Jpeg2000 |
|
323
|
|
|
|
|
|
|
image formats. |
|
324
|
|
|
|
|
|
|
}, |
|
325
|
|
|
|
|
|
|
Width => 'EmbeddedImageWidth', |
|
326
|
|
|
|
|
|
|
Height => 'EmbeddedImageHeight', |
|
327
|
|
|
|
|
|
|
Filter => { Name => 'EmbeddedImageFilter', List => 1 }, |
|
328
|
|
|
|
|
|
|
ColorSpace => { |
|
329
|
|
|
|
|
|
|
Name => 'EmbeddedImageColorSpace', |
|
330
|
|
|
|
|
|
|
List => 1, |
|
331
|
|
|
|
|
|
|
RawConv => 'ref $val ? undef : $val', # (ignore color space data) |
|
332
|
|
|
|
|
|
|
}, |
|
333
|
|
|
|
|
|
|
Image_stream => { |
|
334
|
|
|
|
|
|
|
Name => 'EmbeddedImage', |
|
335
|
|
|
|
|
|
|
Groups => { 2 => 'Preview' }, |
|
336
|
|
|
|
|
|
|
Binary => 1, |
|
337
|
|
|
|
|
|
|
}, |
|
338
|
|
|
|
|
|
|
); |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# tags in PDF Properties dictionary |
|
341
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Properties = ( |
|
342
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 0, # extract known but numbered tags (MC0, MC1, etc) |
|
343
|
|
|
|
|
|
|
MC => { |
|
344
|
|
|
|
|
|
|
Notes => q{ |
|
345
|
|
|
|
|
|
|
the L option enables information to be extracted from these |
|
346
|
|
|
|
|
|
|
embedded metadata dictionaries |
|
347
|
|
|
|
|
|
|
}, |
|
348
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::MC' }, |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
); |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
# tags in PDF MC# dictionary |
|
353
|
|
|
|
|
|
|
%Image::ExifTool::PDF::MC = ( |
|
354
|
|
|
|
|
|
|
Metadata => { |
|
355
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
); |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# tags in PDF PieceInfo dictionary |
|
360
|
|
|
|
|
|
|
%Image::ExifTool::PDF::PieceInfo = ( |
|
361
|
|
|
|
|
|
|
AdobePhotoshop => { |
|
362
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AdobePhotoshop' }, |
|
363
|
|
|
|
|
|
|
}, |
|
364
|
|
|
|
|
|
|
Illustrator => { |
|
365
|
|
|
|
|
|
|
# assume this is an illustrator file if it contains this directory |
|
366
|
|
|
|
|
|
|
# and doesn't have a ".PDF" extension |
|
367
|
|
|
|
|
|
|
Condition => q{ |
|
368
|
|
|
|
|
|
|
$self->OverrideFileType("AI") unless $$self{FILE_EXT} and $$self{FILE_EXT} eq 'PDF'; |
|
369
|
|
|
|
|
|
|
return 1; |
|
370
|
|
|
|
|
|
|
}, |
|
371
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Illustrator' }, |
|
372
|
|
|
|
|
|
|
}, |
|
373
|
|
|
|
|
|
|
); |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# tags in PDF AdobePhotoshop dictionary |
|
376
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AdobePhotoshop = ( |
|
377
|
|
|
|
|
|
|
Private => { |
|
378
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Private' }, |
|
379
|
|
|
|
|
|
|
}, |
|
380
|
|
|
|
|
|
|
); |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# tags in PDF Illustrator dictionary |
|
383
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Illustrator = ( |
|
384
|
|
|
|
|
|
|
Private => { |
|
385
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AIPrivate' }, |
|
386
|
|
|
|
|
|
|
}, |
|
387
|
|
|
|
|
|
|
); |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# tags in PDF Private dictionary |
|
390
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Private = ( |
|
391
|
|
|
|
|
|
|
ImageResources => { |
|
392
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::ImageResources' }, |
|
393
|
|
|
|
|
|
|
}, |
|
394
|
|
|
|
|
|
|
); |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# tags in PDF AI Private dictionary |
|
397
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AIPrivate = ( |
|
398
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
399
|
|
|
|
|
|
|
EXTRACT_UNKNOWN => 0, # extract known but numbered tags |
|
400
|
|
|
|
|
|
|
AIMetaData => { |
|
401
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::AIMetaData' }, |
|
402
|
|
|
|
|
|
|
}, |
|
403
|
|
|
|
|
|
|
AIPrivateData => { |
|
404
|
|
|
|
|
|
|
Notes => q{ |
|
405
|
|
|
|
|
|
|
the L option enables information to be extracted from embedded |
|
406
|
|
|
|
|
|
|
PostScript documents in the AIPrivateData# and AIPDFPrivateData# streams |
|
407
|
|
|
|
|
|
|
}, |
|
408
|
|
|
|
|
|
|
JoinStreams => 1, # join streams from numbered tags and process as one |
|
409
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, |
|
410
|
|
|
|
|
|
|
}, |
|
411
|
|
|
|
|
|
|
AIPDFPrivateData => { |
|
412
|
|
|
|
|
|
|
JoinStreams => 1, # join streams from numbered tags and process as one |
|
413
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, |
|
414
|
|
|
|
|
|
|
}, |
|
415
|
|
|
|
|
|
|
RoundTripVersion => { }, |
|
416
|
|
|
|
|
|
|
ContainerVersion => { }, |
|
417
|
|
|
|
|
|
|
CreatorVersion => { }, |
|
418
|
|
|
|
|
|
|
); |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# tags in PDF AIMetaData dictionary |
|
421
|
|
|
|
|
|
|
%Image::ExifTool::PDF::AIMetaData = ( |
|
422
|
|
|
|
|
|
|
_stream => { |
|
423
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, |
|
424
|
|
|
|
|
|
|
}, |
|
425
|
|
|
|
|
|
|
); |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# tags in PDF ImageResources dictionary |
|
428
|
|
|
|
|
|
|
%Image::ExifTool::PDF::ImageResources = ( |
|
429
|
|
|
|
|
|
|
_stream => { |
|
430
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::Photoshop::Main' }, |
|
431
|
|
|
|
|
|
|
}, |
|
432
|
|
|
|
|
|
|
); |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
# tags in PDF MarkInfo dictionary |
|
435
|
|
|
|
|
|
|
%Image::ExifTool::PDF::MarkInfo = ( |
|
436
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
437
|
|
|
|
|
|
|
Marked => { |
|
438
|
|
|
|
|
|
|
Name => 'TaggedPDF', |
|
439
|
|
|
|
|
|
|
Notes => "not a Tagged PDF if this tag is missing", |
|
440
|
|
|
|
|
|
|
PrintConv => { 'true' => 'Yes', 'false' => 'No' }, |
|
441
|
|
|
|
|
|
|
}, |
|
442
|
|
|
|
|
|
|
); |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
# tags in PDF Metadata dictionary |
|
445
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Metadata = ( |
|
446
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
447
|
|
|
|
|
|
|
XML_stream => { # this is the stream for a Subtype /XML dictionary (not a real tag) |
|
448
|
|
|
|
|
|
|
Name => 'XMP', |
|
449
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }, |
|
450
|
|
|
|
|
|
|
}, |
|
451
|
|
|
|
|
|
|
); |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
# tags in PDF signature directories (DocMDP, FieldMDP or UR3) |
|
454
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Signature = ( |
|
455
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
456
|
|
|
|
|
|
|
ContactInfo => 'SignerContactInfo', |
|
457
|
|
|
|
|
|
|
Location => 'SigningLocation', |
|
458
|
|
|
|
|
|
|
M => { |
|
459
|
|
|
|
|
|
|
Name => 'SigningDate', |
|
460
|
|
|
|
|
|
|
Format => 'date', |
|
461
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
|
462
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
|
463
|
|
|
|
|
|
|
}, |
|
464
|
|
|
|
|
|
|
Name => 'SigningAuthority', |
|
465
|
|
|
|
|
|
|
Reason => 'SigningReason', |
|
466
|
|
|
|
|
|
|
Reference => { |
|
467
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Reference' }, |
|
468
|
|
|
|
|
|
|
}, |
|
469
|
|
|
|
|
|
|
Prop_AuthTime => { |
|
470
|
|
|
|
|
|
|
Name => 'AuthenticationTime', |
|
471
|
|
|
|
|
|
|
PrintConv => 'ConvertTimeSpan($val) . " ago"', |
|
472
|
|
|
|
|
|
|
}, |
|
473
|
|
|
|
|
|
|
Prop_AuthType => 'AuthenticationType', |
|
474
|
|
|
|
|
|
|
); |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# tags in PDF Reference dictionary |
|
477
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Reference = ( |
|
478
|
|
|
|
|
|
|
TransformParams => { |
|
479
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::TransformParams' }, |
|
480
|
|
|
|
|
|
|
}, |
|
481
|
|
|
|
|
|
|
); |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# tags in PDF TransformParams dictionary |
|
484
|
|
|
|
|
|
|
%Image::ExifTool::PDF::TransformParams = ( |
|
485
|
|
|
|
|
|
|
GROUPS => { 2 => 'Document' }, |
|
486
|
|
|
|
|
|
|
Annots => { |
|
487
|
|
|
|
|
|
|
Name => 'AnnotationUsageRights', |
|
488
|
|
|
|
|
|
|
Notes => q{ |
|
489
|
|
|
|
|
|
|
possible values are Create, Delete, Modify, Copy, Import and Export; |
|
490
|
|
|
|
|
|
|
additional values for UR3 signatures are Online and SummaryView |
|
491
|
|
|
|
|
|
|
}, |
|
492
|
|
|
|
|
|
|
List => 1, |
|
493
|
|
|
|
|
|
|
}, |
|
494
|
|
|
|
|
|
|
Document => { |
|
495
|
|
|
|
|
|
|
Name => 'DocumentUsageRights', |
|
496
|
|
|
|
|
|
|
Notes => 'only possible value is FullSave', |
|
497
|
|
|
|
|
|
|
List => 1, |
|
498
|
|
|
|
|
|
|
}, |
|
499
|
|
|
|
|
|
|
Form => { |
|
500
|
|
|
|
|
|
|
Name => 'FormUsageRights', |
|
501
|
|
|
|
|
|
|
Notes => q{ |
|
502
|
|
|
|
|
|
|
possible values are FillIn, Import, Export, SubmitStandalone and |
|
503
|
|
|
|
|
|
|
SpawnTemplate; additional values for UR3 signatures are BarcodePlaintext and |
|
504
|
|
|
|
|
|
|
Online |
|
505
|
|
|
|
|
|
|
}, |
|
506
|
|
|
|
|
|
|
List => 1, |
|
507
|
|
|
|
|
|
|
}, |
|
508
|
|
|
|
|
|
|
FormEX => { |
|
509
|
|
|
|
|
|
|
Name => 'FormExtraUsageRights', |
|
510
|
|
|
|
|
|
|
Notes => 'UR signatures only; only possible value is BarcodePlaintext', |
|
511
|
|
|
|
|
|
|
List => 1, |
|
512
|
|
|
|
|
|
|
}, |
|
513
|
|
|
|
|
|
|
Signature => { |
|
514
|
|
|
|
|
|
|
Name => 'SignatureUsageRights', |
|
515
|
|
|
|
|
|
|
Notes => 'only possible value is Modify', |
|
516
|
|
|
|
|
|
|
List => 1, |
|
517
|
|
|
|
|
|
|
}, |
|
518
|
|
|
|
|
|
|
EF => { |
|
519
|
|
|
|
|
|
|
Name => 'EmbeddedFileUsageRights', |
|
520
|
|
|
|
|
|
|
Notes => 'possible values are Create, Delete, Modify and Import', |
|
521
|
|
|
|
|
|
|
List => 1, |
|
522
|
|
|
|
|
|
|
}, |
|
523
|
|
|
|
|
|
|
Msg => 'UsageRightsMessage', |
|
524
|
|
|
|
|
|
|
P => { |
|
525
|
|
|
|
|
|
|
Name => 'ModificationPermissions', |
|
526
|
|
|
|
|
|
|
Notes => q{ |
|
527
|
|
|
|
|
|
|
1-3 for DocMDP signatures, default 2; true/false for UR3 signatures, default |
|
528
|
|
|
|
|
|
|
false |
|
529
|
|
|
|
|
|
|
}, |
|
530
|
|
|
|
|
|
|
PrintConv => { |
|
531
|
|
|
|
|
|
|
1 => 'No changes permitted', |
|
532
|
|
|
|
|
|
|
2 => 'Fill forms, Create page templates, Sign', |
|
533
|
|
|
|
|
|
|
3 => 'Fill forms, Create page templates, Sign, Create/Delete/Edit annotations', |
|
534
|
|
|
|
|
|
|
'true' => 'Restrict all applications to reader permissions', |
|
535
|
|
|
|
|
|
|
'false' => 'Do not restrict applications to reader permissions', |
|
536
|
|
|
|
|
|
|
}, |
|
537
|
|
|
|
|
|
|
}, |
|
538
|
|
|
|
|
|
|
Action => { |
|
539
|
|
|
|
|
|
|
Name => 'FieldPermissions', |
|
540
|
|
|
|
|
|
|
Notes => 'FieldMDP signatures only', |
|
541
|
|
|
|
|
|
|
PrintConv => { |
|
542
|
|
|
|
|
|
|
'All' => 'Disallow changes to all form fields', |
|
543
|
|
|
|
|
|
|
'Include' => 'Disallow changes to specified form fields', |
|
544
|
|
|
|
|
|
|
'Exclude' => 'Allow changes to specified form fields', |
|
545
|
|
|
|
|
|
|
}, |
|
546
|
|
|
|
|
|
|
}, |
|
547
|
|
|
|
|
|
|
Fields => { |
|
548
|
|
|
|
|
|
|
Notes => 'FieldMDP signatures only', |
|
549
|
|
|
|
|
|
|
Name => 'FormFields', |
|
550
|
|
|
|
|
|
|
List => 1, |
|
551
|
|
|
|
|
|
|
}, |
|
552
|
|
|
|
|
|
|
); |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
# unknown tags for use in verbose option |
|
555
|
|
|
|
|
|
|
%Image::ExifTool::PDF::Unknown = ( |
|
556
|
|
|
|
|
|
|
GROUPS => { 2 => 'Unknown' }, |
|
557
|
|
|
|
|
|
|
); |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
560
|
|
|
|
|
|
|
# AutoLoad our writer routines when necessary |
|
561
|
|
|
|
|
|
|
# |
|
562
|
|
|
|
|
|
|
sub AUTOLOAD |
|
563
|
|
|
|
|
|
|
{ |
|
564
|
19
|
|
|
19
|
|
100
|
return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_); |
|
565
|
|
|
|
|
|
|
} |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
568
|
|
|
|
|
|
|
# Convert from PDF to EXIF-style date/time |
|
569
|
|
|
|
|
|
|
# Inputs: 0) PDF date/time string (D:YYYYmmddHHMMSS+HH'MM') |
|
570
|
|
|
|
|
|
|
# Returns: EXIF date string (YYYY:mm:dd HH:MM:SS+HH:MM) |
|
571
|
|
|
|
|
|
|
sub ConvertPDFDate($) |
|
572
|
|
|
|
|
|
|
{ |
|
573
|
10
|
|
|
10
|
0
|
12
|
my $date = shift; |
|
574
|
|
|
|
|
|
|
# remove optional 'D:' prefix |
|
575
|
10
|
|
|
|
|
32
|
$date =~ s/^D://; |
|
576
|
|
|
|
|
|
|
# fill in default values if necessary |
|
577
|
|
|
|
|
|
|
# YYYYmmddHHMMSS |
|
578
|
10
|
|
|
|
|
27
|
my $default = '00000101000000'; |
|
579
|
10
|
50
|
|
|
|
25
|
if (length $date < length $default) { |
|
580
|
0
|
|
|
|
|
0
|
$date .= substr($default, length $date); |
|
581
|
|
|
|
|
|
|
} |
|
582
|
10
|
50
|
|
|
|
40
|
$date =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(.*)/ or return $date; |
|
583
|
10
|
|
|
|
|
49
|
$date = "$1:$2:$3 $4:$5:$6"; |
|
584
|
10
|
50
|
|
|
|
26
|
if ($7) { |
|
585
|
10
|
|
|
|
|
16
|
my $tz = $7; |
|
586
|
10
|
50
|
|
|
|
43
|
if ($tz =~ /^\s*Z/i) { |
|
|
|
50
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# ignore any "HH'mm'" after the Z (OS X 10.6 does this) |
|
588
|
0
|
|
|
|
|
0
|
$date .= 'Z'; |
|
589
|
|
|
|
|
|
|
# tolerate some improper formatting in timezone specification |
|
590
|
|
|
|
|
|
|
} elsif ($tz =~ /^\s*([-+])\s*(\d+)[': ]+(\d*)/) { |
|
591
|
10
|
|
50
|
|
|
36
|
$date .= $1 . $2 . ':' . ($3 || '00'); |
|
592
|
|
|
|
|
|
|
} |
|
593
|
|
|
|
|
|
|
} |
|
594
|
10
|
|
|
|
|
42
|
return $date; |
|
595
|
|
|
|
|
|
|
} |
|
596
|
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
598
|
|
|
|
|
|
|
# Locate any object in the XRef tables (including compressed objects) |
|
599
|
|
|
|
|
|
|
# Inputs: 0) XRef reference, 1) object reference string (or free object number) |
|
600
|
|
|
|
|
|
|
# Returns: offset to object in file or compressed object reference string, |
|
601
|
|
|
|
|
|
|
# 0 if object is free, or undefined on error |
|
602
|
|
|
|
|
|
|
sub LocateAnyObject($$) |
|
603
|
|
|
|
|
|
|
{ |
|
604
|
238
|
|
|
238
|
0
|
348
|
my ($xref, $ref) = @_; |
|
605
|
238
|
50
|
|
|
|
387
|
return undef unless $xref; |
|
606
|
238
|
100
|
|
|
|
587
|
return $$xref{$ref} if exists $$xref{$ref}; |
|
607
|
|
|
|
|
|
|
# get the object number |
|
608
|
7
|
50
|
|
|
|
38
|
return undef unless $ref =~ /^(\d+)/; |
|
609
|
7
|
|
|
|
|
16
|
my $objNum = $1; |
|
610
|
|
|
|
|
|
|
# return 0 if the object number has been reused (old object is free) |
|
611
|
7
|
100
|
|
|
|
24
|
return 0 if defined $$xref{$objNum}; |
|
612
|
|
|
|
|
|
|
# |
|
613
|
|
|
|
|
|
|
# scan our XRef stream dictionaries for this object |
|
614
|
|
|
|
|
|
|
# |
|
615
|
1
|
50
|
|
|
|
5
|
return undef unless $$xref{dicts}; |
|
616
|
0
|
|
|
|
|
0
|
my $dict; |
|
617
|
0
|
|
|
|
|
0
|
foreach $dict (@{$$xref{dicts}}) { |
|
|
0
|
|
|
|
|
0
|
|
|
618
|
|
|
|
|
|
|
# quick check to see if the object is in the range for this xref stream |
|
619
|
0
|
0
|
|
|
|
0
|
next if $objNum >= $$dict{Size}; |
|
620
|
0
|
|
|
|
|
0
|
my $index = $$dict{Index}; |
|
621
|
0
|
0
|
|
|
|
0
|
next if $objNum < $$index[0]; |
|
622
|
|
|
|
|
|
|
# scan the tables for the specified object |
|
623
|
0
|
|
|
|
|
0
|
my $size = $$dict{_entry_size}; |
|
624
|
0
|
|
|
|
|
0
|
my $num = scalar(@$index) / 2; |
|
625
|
0
|
|
|
|
|
0
|
my $tot = 0; |
|
626
|
0
|
|
|
|
|
0
|
my $i; |
|
627
|
0
|
|
|
|
|
0
|
for ($i=0; $i<$num; ++$i) { |
|
628
|
0
|
|
|
|
|
0
|
my $start = $$index[$i*2]; |
|
629
|
0
|
|
|
|
|
0
|
my $count = $$index[$i*2+1]; |
|
630
|
|
|
|
|
|
|
# table is in ascending order, so quit if we have passed the object |
|
631
|
0
|
0
|
|
|
|
0
|
last if $objNum < $start; |
|
632
|
0
|
0
|
|
|
|
0
|
if ($objNum < $start + $count) { |
|
633
|
0
|
|
|
|
|
0
|
my $offset = $size * ($objNum - $start + $tot); |
|
634
|
0
|
0
|
|
|
|
0
|
last if $offset + $size > length $$dict{_stream}; |
|
635
|
0
|
|
|
|
|
0
|
my @c = unpack("x$offset C$size", $$dict{_stream}); |
|
636
|
|
|
|
|
|
|
# extract values from this table entry |
|
637
|
|
|
|
|
|
|
# (can be 1, 2, 3, 4, etc.. bytes per value) |
|
638
|
0
|
|
|
|
|
0
|
my (@t, $j, $k); |
|
639
|
0
|
|
|
|
|
0
|
my $w = $$dict{W}; |
|
640
|
0
|
|
|
|
|
0
|
for ($j=0; $j<3; ++$j) { |
|
641
|
|
|
|
|
|
|
# use default value if W entry is 0 (as per spec) |
|
642
|
|
|
|
|
|
|
# - 0th element defaults to 1, others default to 0 |
|
643
|
0
|
0
|
|
|
|
0
|
$$w[$j] or $t[$j] = ($j ? 0 : 1), next; |
|
|
|
0
|
|
|
|
|
|
|
644
|
0
|
|
|
|
|
0
|
$t[$j] = shift(@c); |
|
645
|
0
|
|
|
|
|
0
|
for ($k=1; $k < $$w[$j]; ++$k) { |
|
646
|
0
|
|
|
|
|
0
|
$t[$j] = 256 * $t[$j] + shift(@c); |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
} |
|
649
|
|
|
|
|
|
|
# by default, use "o g R" as the xref key |
|
650
|
|
|
|
|
|
|
# (o = object number, g = generation number) |
|
651
|
0
|
|
|
|
|
0
|
my $ref2 = "$objNum $t[2] R"; |
|
652
|
0
|
0
|
|
|
|
0
|
if ($t[0] == 1) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
# normal object reference: |
|
654
|
|
|
|
|
|
|
# $t[1]=offset of object from start, $t[2]=generation number |
|
655
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = $t[1]; |
|
656
|
|
|
|
|
|
|
} elsif ($t[0] == 2) { |
|
657
|
|
|
|
|
|
|
# compressed object reference: |
|
658
|
|
|
|
|
|
|
# $t[1]=stream object number, $t[2]=index of object in stream |
|
659
|
0
|
|
|
|
|
0
|
$ref2 = "$objNum 0 R"; |
|
660
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = "I$t[2] $t[1] 0 R"; |
|
661
|
|
|
|
|
|
|
} elsif ($t[0] == 0) { |
|
662
|
|
|
|
|
|
|
# free object: |
|
663
|
|
|
|
|
|
|
# $t[1]=next free object in linked list, $t[2]=generation number |
|
664
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = 0; |
|
665
|
|
|
|
|
|
|
} else { |
|
666
|
|
|
|
|
|
|
# treat as a null object |
|
667
|
0
|
|
|
|
|
0
|
$$xref{$ref2} = undef; |
|
668
|
|
|
|
|
|
|
} |
|
669
|
0
|
|
|
|
|
0
|
$$xref{$objNum} = $t[1]; # remember offsets by object number too |
|
670
|
0
|
0
|
|
|
|
0
|
return $$xref{$ref} if $ref eq $ref2; |
|
671
|
0
|
|
|
|
|
0
|
return 0; # object is free or was reused |
|
672
|
|
|
|
|
|
|
} |
|
673
|
0
|
|
|
|
|
0
|
$tot += $count; |
|
674
|
|
|
|
|
|
|
} |
|
675
|
|
|
|
|
|
|
} |
|
676
|
0
|
|
|
|
|
0
|
return undef; |
|
677
|
|
|
|
|
|
|
} |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
680
|
|
|
|
|
|
|
# Locate a regular object in the XRef tables (does not include compressed objects) |
|
681
|
|
|
|
|
|
|
# Inputs: 0) XRef reference, 1) object reference string (or free object number) |
|
682
|
|
|
|
|
|
|
# Returns: offset to object in file, 0 if object is free, |
|
683
|
|
|
|
|
|
|
# or undef on error or if object was compressed |
|
684
|
|
|
|
|
|
|
sub LocateObject($$) |
|
685
|
|
|
|
|
|
|
{ |
|
686
|
41
|
|
|
41
|
0
|
82
|
my ($xref, $ref) = @_; |
|
687
|
41
|
|
|
|
|
69
|
my $offset = LocateAnyObject($xref, $ref); |
|
688
|
41
|
50
|
66
|
|
|
151
|
return undef if $offset and $offset =~ /^I/; |
|
689
|
41
|
|
|
|
|
111
|
return $offset; |
|
690
|
|
|
|
|
|
|
} |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
693
|
|
|
|
|
|
|
# Check that the correct object is located at the specified file offset |
|
694
|
|
|
|
|
|
|
# Inputs: 0) ExifTool ref, 1) object name, 2) object reference string, 3) file offset |
|
695
|
|
|
|
|
|
|
# Returns: first non-blank line at start of object, or undef on error |
|
696
|
|
|
|
|
|
|
sub CheckObject($$$$) |
|
697
|
|
|
|
|
|
|
{ |
|
698
|
217
|
|
|
217
|
0
|
375
|
my ($et, $tag, $ref, $offset) = @_; |
|
699
|
217
|
|
|
|
|
282
|
my ($data, $obj, $dat, $pat); |
|
700
|
|
|
|
|
|
|
|
|
701
|
217
|
|
|
|
|
318
|
my $raf = $$et{RAF}; |
|
702
|
217
|
50
|
|
|
|
623
|
$raf->Seek($offset+$$et{PDFBase}, 0) or $et->Warn("Bad $tag offset"), return undef; |
|
703
|
|
|
|
|
|
|
# verify that we are reading the expected object |
|
704
|
217
|
|
|
|
|
875
|
($obj = $ref) =~ s/R/obj/; |
|
705
|
217
|
|
|
|
|
376
|
for (;;) { |
|
706
|
217
|
50
|
|
|
|
524
|
$raf->ReadLine($data) or $et->Warn("Error reading $tag data"), return undef; |
|
707
|
217
|
50
|
|
|
|
3237
|
last if $data =~ s/^$obj//; |
|
708
|
0
|
0
|
|
|
|
0
|
next if $data =~ /^\s+$/; # keep reading if this was a blank line |
|
709
|
|
|
|
|
|
|
# handle cases where other whitespace characters are used in the object ID string |
|
710
|
0
|
|
|
|
|
0
|
while ($data =~ /^\d+(\s+\d+)?\s*$/) { |
|
711
|
0
|
|
|
|
|
0
|
$raf->ReadLine($dat); |
|
712
|
0
|
|
|
|
|
0
|
$data .= $dat; |
|
713
|
|
|
|
|
|
|
} |
|
714
|
0
|
|
|
|
|
0
|
($pat = $obj) =~ s/ /\\s+/g; |
|
715
|
0
|
0
|
|
|
|
0
|
unless ($data =~ s/$pat//) { |
|
716
|
0
|
|
|
|
|
0
|
$tag = ucfirst $tag; |
|
717
|
0
|
|
|
|
|
0
|
$et->Warn("$tag object ($obj) not found at offset $offset"); |
|
718
|
0
|
|
|
|
|
0
|
return undef; |
|
719
|
|
|
|
|
|
|
} |
|
720
|
0
|
|
|
|
|
0
|
last; |
|
721
|
|
|
|
|
|
|
} |
|
722
|
|
|
|
|
|
|
# read the first line of data from the object (ignoring blank lines and comments) |
|
723
|
217
|
|
|
|
|
360
|
for (;;) { |
|
724
|
434
|
100
|
66
|
|
|
1565
|
last if $data =~ /\S/ and $data !~ /^\s*%/; |
|
725
|
217
|
50
|
|
|
|
575
|
$raf->ReadLine($data) or $et->Warn("Error reading $tag data"), return undef; |
|
726
|
|
|
|
|
|
|
} |
|
727
|
217
|
|
|
|
|
452
|
return $data; |
|
728
|
|
|
|
|
|
|
} |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
731
|
|
|
|
|
|
|
# Fetch indirect object from file (from inside a stream if required) |
|
732
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) object reference string, |
|
733
|
|
|
|
|
|
|
# 2) xref lookup, 3) object name (for warning messages) |
|
734
|
|
|
|
|
|
|
# Returns: object data or undefined on error |
|
735
|
|
|
|
|
|
|
# Notes: sets $lastFetched to the object reference, or undef if the object |
|
736
|
|
|
|
|
|
|
# was extracted from an encrypted stream |
|
737
|
|
|
|
|
|
|
sub FetchObject($$$$) |
|
738
|
|
|
|
|
|
|
{ |
|
739
|
197
|
|
|
197
|
0
|
323
|
my ($et, $ref, $xref, $tag) = @_; |
|
740
|
197
|
|
|
|
|
268
|
$lastFetched = $ref; # save this for decoding if necessary |
|
741
|
197
|
|
|
|
|
304
|
my $offset = LocateAnyObject($xref, $ref); |
|
742
|
197
|
|
|
|
|
257
|
$lastOffset = $offset; |
|
743
|
197
|
100
|
|
|
|
348
|
unless ($offset) { |
|
744
|
5
|
50
|
|
|
|
11
|
$et->Warn("Bad $tag reference") unless defined $offset; |
|
745
|
5
|
|
|
|
|
10
|
return undef; |
|
746
|
|
|
|
|
|
|
} |
|
747
|
192
|
|
|
|
|
236
|
my ($data, $obj); |
|
748
|
192
|
50
|
|
|
|
448
|
if ($offset =~ s/^I(\d+) //) { |
|
749
|
0
|
|
|
|
|
0
|
my $index = $1; # object index in stream |
|
750
|
0
|
|
|
|
|
0
|
my ($objNum) = split ' ', $ref; # save original object number |
|
751
|
0
|
|
|
|
|
0
|
$ref = $offset; # now a reference to the containing stream object |
|
752
|
0
|
|
|
|
|
0
|
$obj = $streamObjs{$ref}; |
|
753
|
0
|
0
|
|
|
|
0
|
unless ($obj) { |
|
754
|
|
|
|
|
|
|
# don't try to load the same object stream twice |
|
755
|
0
|
0
|
|
|
|
0
|
return undef if defined $obj; |
|
756
|
0
|
|
|
|
|
0
|
$streamObjs{$ref} = ''; |
|
757
|
|
|
|
|
|
|
# load the parent object stream |
|
758
|
0
|
|
|
|
|
0
|
$obj = FetchObject($et, $ref, $xref, $tag); |
|
759
|
|
|
|
|
|
|
# make sure it contains everything we need |
|
760
|
0
|
0
|
0
|
|
|
0
|
return undef unless defined $obj and ref($obj) eq 'HASH'; |
|
761
|
0
|
0
|
0
|
|
|
0
|
return undef unless $$obj{First} and $$obj{N}; |
|
762
|
0
|
0
|
|
|
|
0
|
return undef unless DecodeStream($et, $obj); |
|
763
|
|
|
|
|
|
|
# add a special '_table' entry to this dictionary which contains |
|
764
|
|
|
|
|
|
|
# the list of object number/offset pairs from the stream header |
|
765
|
0
|
|
|
|
|
0
|
my $num = $$obj{N} * 2; |
|
766
|
0
|
|
|
|
|
0
|
my @table = split ' ', $$obj{_stream}, $num; |
|
767
|
0
|
0
|
|
|
|
0
|
return undef unless @table == $num; |
|
768
|
|
|
|
|
|
|
# remove everything before first object in stream |
|
769
|
0
|
|
|
|
|
0
|
$$obj{_stream} = substr($$obj{_stream}, $$obj{First}); |
|
770
|
0
|
|
|
|
|
0
|
$table[$num-1] =~ s/^(\d+).*/$1/s; # trim excess from last number |
|
771
|
0
|
|
|
|
|
0
|
$$obj{_table} = \@table; |
|
772
|
|
|
|
|
|
|
# save the object stream so we don't have to re-load it later |
|
773
|
0
|
|
|
|
|
0
|
$streamObjs{$ref} = $obj; |
|
774
|
|
|
|
|
|
|
} |
|
775
|
|
|
|
|
|
|
# verify that we have the specified object |
|
776
|
0
|
|
|
|
|
0
|
my $i = 2 * $index; |
|
777
|
0
|
|
|
|
|
0
|
my $table = $$obj{_table}; |
|
778
|
0
|
0
|
0
|
|
|
0
|
unless ($index < $$obj{N} and $$table[$i] == $objNum) { |
|
779
|
0
|
|
|
|
|
0
|
$et->Warn("Bad index for stream object $tag"); |
|
780
|
0
|
|
|
|
|
0
|
return undef; |
|
781
|
|
|
|
|
|
|
} |
|
782
|
|
|
|
|
|
|
# extract the object at the specified index in the stream |
|
783
|
|
|
|
|
|
|
# (offsets in table are in sequential order, so we can subtract from |
|
784
|
|
|
|
|
|
|
# the next offset to get the object length) |
|
785
|
0
|
|
|
|
|
0
|
$offset = $$table[$i + 1]; |
|
786
|
0
|
|
0
|
|
|
0
|
my $len = ($$table[$i + 3] || length($$obj{_stream})) - $offset; |
|
787
|
0
|
|
|
|
|
0
|
$data = substr($$obj{_stream}, $offset, $len); |
|
788
|
|
|
|
|
|
|
# avoid re-decrypting data in already decrypted streams |
|
789
|
0
|
0
|
|
|
|
0
|
undef $lastFetched if $cryptStream; |
|
790
|
0
|
|
|
|
|
0
|
return ExtractObject($et, \$data); |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
# load the start of the object |
|
793
|
192
|
|
|
|
|
329
|
$data = CheckObject($et, $tag, $ref, $offset); |
|
794
|
192
|
50
|
|
|
|
331
|
return undef unless defined $data; |
|
795
|
|
|
|
|
|
|
|
|
796
|
192
|
|
|
|
|
515
|
return ExtractObject($et, \$data, $$et{RAF}, $xref); |
|
797
|
|
|
|
|
|
|
} |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
800
|
|
|
|
|
|
|
# Convert PDF value to something readable |
|
801
|
|
|
|
|
|
|
# Inputs: 0) PDF object data |
|
802
|
|
|
|
|
|
|
# Returns: converted object |
|
803
|
|
|
|
|
|
|
sub ReadPDFValue($) |
|
804
|
|
|
|
|
|
|
{ |
|
805
|
148
|
|
|
148
|
0
|
229
|
my $str = shift; |
|
806
|
|
|
|
|
|
|
# decode all strings in an array |
|
807
|
148
|
100
|
|
|
|
237
|
if (ref $str eq 'ARRAY') { |
|
808
|
|
|
|
|
|
|
# create new list to not alter the original data when rewriting |
|
809
|
12
|
|
|
|
|
19
|
my ($val, @vals); |
|
810
|
12
|
|
|
|
|
21
|
foreach $val (@$str) { |
|
811
|
20
|
|
|
|
|
40
|
push @vals, ReadPDFValue($val); |
|
812
|
|
|
|
|
|
|
} |
|
813
|
12
|
|
|
|
|
28
|
return \@vals; |
|
814
|
|
|
|
|
|
|
} |
|
815
|
136
|
50
|
|
|
|
232
|
length $str or return $str; |
|
816
|
136
|
|
|
|
|
210
|
my $delim = substr($str, 0, 1); |
|
817
|
136
|
100
|
|
|
|
300
|
if ($delim eq '(') { # literal string |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
818
|
58
|
50
|
|
|
|
268
|
$str = $1 if $str =~ /^.*?\((.*)\)/s; # remove brackets |
|
819
|
|
|
|
|
|
|
# decode escape sequences in literal strings |
|
820
|
58
|
|
|
|
|
145
|
while ($str =~ /\\(.)/sg) { |
|
821
|
0
|
|
|
|
|
0
|
my $n = pos($str) - 2; |
|
822
|
0
|
|
|
|
|
0
|
my $c = $1; |
|
823
|
0
|
|
|
|
|
0
|
my $r; |
|
824
|
0
|
0
|
|
|
|
0
|
if ($c =~ /[0-7]/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
# get up to 2 more octal digits |
|
826
|
0
|
0
|
|
|
|
0
|
$c .= $1 if $str =~ /\G([0-7]{1,2})/g; |
|
827
|
|
|
|
|
|
|
# convert octal escape code |
|
828
|
0
|
|
|
|
|
0
|
$r = chr(oct($c) & 0xff); |
|
829
|
|
|
|
|
|
|
} elsif ($c eq "\x0d") { |
|
830
|
|
|
|
|
|
|
# the string is continued if the line ends with '\' |
|
831
|
|
|
|
|
|
|
# (also remove "\x0d\x0a") |
|
832
|
0
|
0
|
|
|
|
0
|
$c .= $1 if $str =~ /\G(\x0a)/g; |
|
833
|
0
|
|
|
|
|
0
|
$r = ''; |
|
834
|
|
|
|
|
|
|
} elsif ($c eq "\x0a") { |
|
835
|
0
|
|
|
|
|
0
|
$r = ''; |
|
836
|
|
|
|
|
|
|
} else { |
|
837
|
|
|
|
|
|
|
# convert escaped characters |
|
838
|
0
|
|
|
|
|
0
|
($r = $c) =~ tr/nrtbf/\n\r\t\b\f/; |
|
839
|
|
|
|
|
|
|
} |
|
840
|
0
|
|
|
|
|
0
|
substr($str, $n, length($c)+1) = $r; |
|
841
|
|
|
|
|
|
|
# continue search after this character |
|
842
|
0
|
|
|
|
|
0
|
pos($str) = $n + length($r); |
|
843
|
|
|
|
|
|
|
} |
|
844
|
58
|
50
|
|
|
|
91
|
Crypt(\$str, $lastFetched) if $cryptString; |
|
845
|
|
|
|
|
|
|
} elsif ($delim eq '<') { # hex string |
|
846
|
|
|
|
|
|
|
# decode hex data |
|
847
|
41
|
|
|
|
|
83
|
$str =~ tr/0-9A-Fa-f//dc; |
|
848
|
41
|
50
|
|
|
|
84
|
$str .= '0' if length($str) & 0x01; # (by the spec) |
|
849
|
41
|
|
|
|
|
131
|
$str = pack('H*', $str); |
|
850
|
41
|
100
|
|
|
|
82
|
Crypt(\$str, $lastFetched) if $cryptString; |
|
851
|
|
|
|
|
|
|
} elsif ($delim eq '/') { # name |
|
852
|
0
|
|
|
|
|
0
|
$str = substr($str, 1); |
|
853
|
|
|
|
|
|
|
# convert escape codes (PDF 1.2 or later) |
|
854
|
0
|
0
|
|
|
|
0
|
$str =~ s/#([0-9a-f]{2})/chr(hex($1))/sgei if $pdfVer >= 1.2; |
|
|
0
|
|
|
|
|
0
|
|
|
855
|
|
|
|
|
|
|
} |
|
856
|
136
|
|
|
|
|
248
|
return $str; |
|
857
|
|
|
|
|
|
|
} |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
860
|
|
|
|
|
|
|
# Extract PDF object from combination of buffered data and file |
|
861
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) data reference, |
|
862
|
|
|
|
|
|
|
# 2) optional raf reference, 3) optional xref table |
|
863
|
|
|
|
|
|
|
# Returns: converted PDF object or undef on error |
|
864
|
|
|
|
|
|
|
# a) dictionary object --> hash reference |
|
865
|
|
|
|
|
|
|
# b) array object --> array reference |
|
866
|
|
|
|
|
|
|
# c) indirect reference --> scalar reference |
|
867
|
|
|
|
|
|
|
# d) string, name, integer, boolean, null --> scalar value |
|
868
|
|
|
|
|
|
|
# - updates $$dataPt on return to contain unused data |
|
869
|
|
|
|
|
|
|
# - creates two bogus entries ('_stream' and '_tags') in dictionaries to represent |
|
870
|
|
|
|
|
|
|
# the stream data and a list of the tags (not including '_stream' and '_tags') |
|
871
|
|
|
|
|
|
|
# in their original order |
|
872
|
|
|
|
|
|
|
sub ExtractObject($$;$$) |
|
873
|
|
|
|
|
|
|
{ |
|
874
|
754
|
|
|
754
|
0
|
1280
|
my ($et, $dataPt, $raf, $xref) = @_; |
|
875
|
754
|
|
|
|
|
911
|
my (@tags, $data, $objData); |
|
876
|
754
|
|
|
|
|
1022
|
my $dict = { }; |
|
877
|
754
|
|
|
|
|
836
|
my $delim; |
|
878
|
|
|
|
|
|
|
|
|
879
|
754
|
|
|
|
|
807
|
for (;;) { |
|
880
|
824
|
100
|
|
|
|
2330
|
if ($$dataPt =~ /^\s*(<{1,2}|\[|\()/s) { |
|
|
|
50
|
|
|
|
|
|
|
881
|
754
|
|
|
|
|
1252
|
$delim = $1; |
|
882
|
754
|
|
|
|
|
1271
|
$$dataPt =~ s/^\s+//; # remove leading white space |
|
883
|
754
|
|
|
|
|
1303
|
$objData = ReadToNested($dataPt, $raf); |
|
884
|
754
|
50
|
|
|
|
1213
|
return undef unless defined $objData; |
|
885
|
754
|
|
|
|
|
968
|
last; |
|
886
|
|
|
|
|
|
|
} elsif ($$dataPt =~ s{^\s*(\S[^[(/<>\s]*)\s*}{}s) { |
|
887
|
|
|
|
|
|
|
# |
|
888
|
|
|
|
|
|
|
# extract boolean, numerical, string, name, null object or indirect reference |
|
889
|
|
|
|
|
|
|
# |
|
890
|
0
|
|
|
|
|
0
|
$objData = $1; |
|
891
|
|
|
|
|
|
|
# look for an indirect reference |
|
892
|
0
|
0
|
0
|
|
|
0
|
if ($objData =~ /^\d+$/ and $$dataPt =~ s/^(\d+)\s+R//s) { |
|
893
|
0
|
|
|
|
|
0
|
$objData .= "$1 R"; |
|
894
|
0
|
|
|
|
|
0
|
$objData = \$objData; # return scalar reference |
|
895
|
|
|
|
|
|
|
} |
|
896
|
0
|
|
|
|
|
0
|
return $objData; # return simple scalar or scalar reference |
|
897
|
|
|
|
|
|
|
} |
|
898
|
70
|
50
|
33
|
|
|
254
|
$raf and $raf->ReadLine($data) or return undef; |
|
899
|
70
|
|
|
|
|
137
|
$$dataPt .= $data; |
|
900
|
|
|
|
|
|
|
} |
|
901
|
|
|
|
|
|
|
# |
|
902
|
|
|
|
|
|
|
# return literal string or hex string without parsing |
|
903
|
|
|
|
|
|
|
# |
|
904
|
754
|
100
|
100
|
|
|
2146
|
if ($delim eq '(' or $delim eq '<') { |
|
|
|
100
|
|
|
|
|
|
|
905
|
160
|
|
|
|
|
386
|
return $objData; |
|
906
|
|
|
|
|
|
|
# |
|
907
|
|
|
|
|
|
|
# extract array |
|
908
|
|
|
|
|
|
|
# |
|
909
|
|
|
|
|
|
|
} elsif ($delim eq '[') { |
|
910
|
167
|
50
|
|
|
|
561
|
$objData =~ /^.*?\[(.*)\]/s or return undef; |
|
911
|
167
|
|
|
|
|
290
|
my $data = $1; # brackets removed |
|
912
|
167
|
|
|
|
|
184
|
my @list; |
|
913
|
167
|
|
|
|
|
178
|
for (;;) { |
|
914
|
594
|
100
|
|
|
|
1447
|
last unless $data =~ m{\s*(\S[^[(/<>\s]*)}sg; |
|
915
|
427
|
|
|
|
|
711
|
my $val = $1; |
|
916
|
427
|
100
|
|
|
|
1062
|
if ($val =~ /^(<{1,2}|\[|\()/) { |
|
|
|
100
|
|
|
|
|
|
|
917
|
78
|
|
|
|
|
115
|
my $pos = pos($data) - length($val); |
|
918
|
|
|
|
|
|
|
# nested dict, array, literal string or hex string |
|
919
|
78
|
|
|
|
|
123
|
my $buff = substr($data, $pos); |
|
920
|
78
|
|
|
|
|
139
|
$val = ReadToNested(\$buff); |
|
921
|
78
|
50
|
|
|
|
142
|
last unless defined $val; |
|
922
|
78
|
|
|
|
|
135
|
pos($data) = $pos + length($val); |
|
923
|
78
|
|
|
|
|
150
|
$val = ExtractObject($et, \$val); |
|
924
|
|
|
|
|
|
|
} elsif ($val =~ /^\d/) { |
|
925
|
245
|
|
|
|
|
294
|
my $pos = pos($data); |
|
926
|
245
|
100
|
|
|
|
443
|
if ($data =~ /\G\s+(\d+)\s+R/g) { |
|
927
|
37
|
|
|
|
|
104
|
$val = \ "$val $1 R"; # make a reference |
|
928
|
|
|
|
|
|
|
} else { |
|
929
|
208
|
|
|
|
|
368
|
pos($data) = $pos; |
|
930
|
|
|
|
|
|
|
} |
|
931
|
|
|
|
|
|
|
} |
|
932
|
427
|
|
|
|
|
733
|
push @list, $val; |
|
933
|
|
|
|
|
|
|
} |
|
934
|
167
|
|
|
|
|
502
|
return \@list; |
|
935
|
|
|
|
|
|
|
} |
|
936
|
|
|
|
|
|
|
# |
|
937
|
|
|
|
|
|
|
# extract dictionary |
|
938
|
|
|
|
|
|
|
# |
|
939
|
|
|
|
|
|
|
# Note: entries are not necessarily separated by whitespace (doh!) |
|
940
|
|
|
|
|
|
|
# eg) "/Tag/Name", "/Tag(string)", "/Tag[array]", etc are legal! |
|
941
|
|
|
|
|
|
|
# Also, they may be separated by a comment (eg. "/Tag%comment\nValue"), |
|
942
|
|
|
|
|
|
|
# but comments have already been removed |
|
943
|
427
|
|
|
|
|
1841
|
while ($objData =~ m{(\s*)/([^/[\]()<>{}\s]+)\s*(\S[^[(/<>\s]*)}sg) { |
|
944
|
1229
|
|
|
|
|
1984
|
my $tag = $2; |
|
945
|
1229
|
|
|
|
|
1719
|
my $val = $3; |
|
946
|
1229
|
100
|
|
|
|
3407
|
if ($val =~ /^(<{1,2}|\[|\()/) { |
|
|
|
100
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
# nested dict, array, literal string or hex string |
|
948
|
396
|
|
|
|
|
885
|
$objData = substr($objData, pos($objData)-length($val)); |
|
949
|
396
|
|
|
|
|
681
|
$val = ReadToNested(\$objData, $raf); |
|
950
|
396
|
50
|
|
|
|
675
|
last unless defined $val; |
|
951
|
396
|
|
|
|
|
689
|
$val = ExtractObject($et, \$val); |
|
952
|
396
|
|
|
|
|
651
|
pos($objData) = 0; |
|
953
|
|
|
|
|
|
|
} elsif ($val =~ /^\d/) { |
|
954
|
618
|
|
|
|
|
768
|
my $pos = pos($objData); |
|
955
|
618
|
100
|
|
|
|
1423
|
if ($objData =~ /\G\s+(\d+)\s+R/sg) { |
|
956
|
416
|
|
|
|
|
1058
|
$val = \ "$val $1 R"; # make a reference |
|
957
|
|
|
|
|
|
|
} else { |
|
958
|
202
|
|
|
|
|
344
|
pos($objData) = $pos; |
|
959
|
|
|
|
|
|
|
} |
|
960
|
|
|
|
|
|
|
} |
|
961
|
1229
|
50
|
|
|
|
2023
|
if ($$dict{$tag}) { |
|
962
|
|
|
|
|
|
|
# duplicate dictionary entries are not allowed |
|
963
|
0
|
|
|
|
|
0
|
$et->Warn("Duplicate '${tag}' entry in dictionary (ignored)"); |
|
964
|
|
|
|
|
|
|
} else { |
|
965
|
|
|
|
|
|
|
# save the entry |
|
966
|
1229
|
|
|
|
|
1802
|
push @tags, $tag; |
|
967
|
1229
|
|
|
|
|
4446
|
$$dict{$tag} = $val; |
|
968
|
|
|
|
|
|
|
} |
|
969
|
|
|
|
|
|
|
} |
|
970
|
427
|
50
|
|
|
|
742
|
return undef unless @tags; |
|
971
|
427
|
|
|
|
|
749
|
$$dict{_tags} = \@tags; |
|
972
|
427
|
100
|
|
|
|
850
|
return $dict unless $raf; # direct objects can not have streams |
|
973
|
|
|
|
|
|
|
# |
|
974
|
|
|
|
|
|
|
# extract the stream object |
|
975
|
|
|
|
|
|
|
# |
|
976
|
|
|
|
|
|
|
# dictionary must specify stream Length |
|
977
|
262
|
100
|
|
|
|
826
|
my $length = $$dict{Length} or return $dict; |
|
978
|
43
|
100
|
|
|
|
81
|
if (ref $length) { |
|
979
|
25
|
|
|
|
|
40
|
$length = $$length; |
|
980
|
25
|
|
|
|
|
58
|
my $oldpos = $raf->Tell(); |
|
981
|
|
|
|
|
|
|
# get the location of the object specifying the length |
|
982
|
|
|
|
|
|
|
# (compressed objects are not allowed) |
|
983
|
25
|
50
|
|
|
|
65
|
my $offset = LocateObject($xref, $length) or return $dict; |
|
984
|
25
|
50
|
|
|
|
47
|
$offset or $et->Warn('Bad stream Length object'), return $dict; |
|
985
|
25
|
|
|
|
|
45
|
$data = CheckObject($et, 'stream Length', $length, $offset); |
|
986
|
25
|
50
|
|
|
|
74
|
defined $data or return $dict; |
|
987
|
25
|
50
|
|
|
|
88
|
$data =~ /^\s*(\d+)/ or $et->Warn('Stream Length not found'), return $dict; |
|
988
|
25
|
|
|
|
|
53
|
$length = $1; |
|
989
|
25
|
|
|
|
|
63
|
$raf->Seek($oldpos, 0); # restore position to start of stream |
|
990
|
|
|
|
|
|
|
} |
|
991
|
|
|
|
|
|
|
# extract the trailing stream data |
|
992
|
43
|
|
|
|
|
52
|
for (;;) { |
|
993
|
|
|
|
|
|
|
# find the stream token |
|
994
|
86
|
100
|
|
|
|
276
|
if ($$dataPt =~ /(\S+)/) { |
|
995
|
43
|
50
|
|
|
|
113
|
last unless $1 eq 'stream'; |
|
996
|
|
|
|
|
|
|
# read an extra line because it may contain our \x0a |
|
997
|
43
|
50
|
|
|
|
91
|
$$dataPt .= $data if $raf->ReadLine($data); |
|
998
|
|
|
|
|
|
|
# remove our stream header |
|
999
|
43
|
|
|
|
|
268
|
$$dataPt =~ s/^\s*stream(\x0a|\x0d\x0a)//s; |
|
1000
|
43
|
|
|
|
|
121
|
my $more = $length - length($$dataPt); |
|
1001
|
43
|
100
|
|
|
|
105
|
if ($more > 0) { |
|
|
|
50
|
|
|
|
|
|
|
1002
|
28
|
50
|
|
|
|
74
|
unless ($raf->Read($data, $more) == $more) { |
|
1003
|
0
|
|
|
|
|
0
|
$et->Warn('Error reading stream data'); |
|
1004
|
0
|
|
|
|
|
0
|
$$dataPt = ''; |
|
1005
|
0
|
|
|
|
|
0
|
return $dict; |
|
1006
|
|
|
|
|
|
|
} |
|
1007
|
28
|
|
|
|
|
132
|
$$dict{_stream} = $$dataPt . $data; |
|
1008
|
28
|
|
|
|
|
56
|
$$dataPt = ''; |
|
1009
|
|
|
|
|
|
|
} elsif ($more < 0) { |
|
1010
|
15
|
|
|
|
|
90
|
$$dict{_stream} = substr($$dataPt, 0, $length); |
|
1011
|
15
|
|
|
|
|
36
|
$$dataPt = substr($$dataPt, $length); |
|
1012
|
|
|
|
|
|
|
} else { |
|
1013
|
0
|
|
|
|
|
0
|
$$dict{_stream} = $$dataPt; |
|
1014
|
0
|
|
|
|
|
0
|
$$dataPt = ''; |
|
1015
|
|
|
|
|
|
|
} |
|
1016
|
43
|
|
|
|
|
75
|
last; |
|
1017
|
|
|
|
|
|
|
} |
|
1018
|
43
|
50
|
|
|
|
106
|
$raf->ReadLine($data) or last; |
|
1019
|
43
|
|
|
|
|
120
|
$$dataPt .= $data; |
|
1020
|
|
|
|
|
|
|
} |
|
1021
|
43
|
|
|
|
|
155
|
return $dict; |
|
1022
|
|
|
|
|
|
|
} |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1025
|
|
|
|
|
|
|
# Read to nested delimiter |
|
1026
|
|
|
|
|
|
|
# Inputs: 0) data reference, 1) optional raf reference |
|
1027
|
|
|
|
|
|
|
# Returns: data up to and including matching delimiter (or undef on error) |
|
1028
|
|
|
|
|
|
|
# - updates data reference with trailing data |
|
1029
|
|
|
|
|
|
|
# - unescapes characters in literal strings |
|
1030
|
|
|
|
|
|
|
my %closingDelim = ( # lookup for matching delimiter |
|
1031
|
|
|
|
|
|
|
'(' => ')', |
|
1032
|
|
|
|
|
|
|
'[' => ']', |
|
1033
|
|
|
|
|
|
|
'<' => '>', |
|
1034
|
|
|
|
|
|
|
'<<' => '>>', |
|
1035
|
|
|
|
|
|
|
); |
|
1036
|
|
|
|
|
|
|
sub ReadToNested($;$) |
|
1037
|
|
|
|
|
|
|
{ |
|
1038
|
1228
|
|
|
1228
|
0
|
1728
|
my ($dataPt, $raf) = @_; |
|
1039
|
1228
|
|
|
|
|
1734
|
my @delim = (''); # closing delimiter list, most deeply nested first |
|
1040
|
1228
|
|
|
|
|
2172
|
pos($$dataPt) = 0; # begin at start of data |
|
1041
|
1228
|
|
|
|
|
1707
|
for (;;) { |
|
1042
|
5744
|
100
|
|
|
|
30017
|
unless ($$dataPt =~ /(\\*)(\(|\)|<{1,2}|>{1,2}|\[|\]|%)/g) { |
|
1043
|
|
|
|
|
|
|
# must read some more data |
|
1044
|
1148
|
|
|
|
|
1239
|
my $buff; |
|
1045
|
1148
|
50
|
33
|
|
|
2616
|
last unless $raf and $raf->ReadLine($buff); |
|
1046
|
1148
|
|
|
|
|
2135
|
$$dataPt .= $buff; |
|
1047
|
1148
|
|
|
|
|
1932
|
pos($$dataPt) = length($$dataPt) - length($buff); |
|
1048
|
1148
|
|
|
|
|
1675
|
next; |
|
1049
|
|
|
|
|
|
|
} |
|
1050
|
|
|
|
|
|
|
# are we in a literal string? |
|
1051
|
4596
|
100
|
|
|
|
9080
|
if ($delim[0] eq ')') { |
|
|
|
50
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
# ignore escaped delimiters (preceded by odd number of \'s) |
|
1053
|
434
|
50
|
|
|
|
763
|
next if length($1) & 0x01; |
|
1054
|
|
|
|
|
|
|
# ignore all delimiters but unescaped braces |
|
1055
|
434
|
50
|
33
|
|
|
1176
|
next unless $2 eq '(' or $2 eq ')'; |
|
1056
|
|
|
|
|
|
|
} elsif ($2 eq '%') { |
|
1057
|
|
|
|
|
|
|
# ignore the comment |
|
1058
|
0
|
|
|
|
|
0
|
my $pos = pos($$dataPt) - 1; |
|
1059
|
|
|
|
|
|
|
# remove everything from '%' up to but not including newline |
|
1060
|
0
|
|
|
|
|
0
|
$$dataPt =~ /.*/g; |
|
1061
|
0
|
|
|
|
|
0
|
my $end = pos($$dataPt); |
|
1062
|
0
|
|
|
|
|
0
|
$$dataPt = substr($$dataPt, 0, $pos) . substr($$dataPt, $end); |
|
1063
|
0
|
|
|
|
|
0
|
pos($$dataPt) = $pos; |
|
1064
|
0
|
|
|
|
|
0
|
next; |
|
1065
|
|
|
|
|
|
|
} |
|
1066
|
4596
|
100
|
|
|
|
7367
|
if ($closingDelim{$2}) { |
|
1067
|
|
|
|
|
|
|
# push the corresponding closing delimiter |
|
1068
|
2298
|
|
|
|
|
3894
|
unshift @delim, $closingDelim{$2}; |
|
1069
|
2298
|
|
|
|
|
2732
|
next; |
|
1070
|
|
|
|
|
|
|
} |
|
1071
|
2298
|
50
|
|
|
|
3359
|
unless ($2 eq $delim[0]) { |
|
1072
|
|
|
|
|
|
|
# handle the case where we find a ">>>" and interpret it |
|
1073
|
|
|
|
|
|
|
# as ">> >" instead of "> >>" |
|
1074
|
0
|
0
|
0
|
|
|
0
|
next unless $2 eq '>>' and $delim[0] eq '>'; |
|
1075
|
0
|
|
|
|
|
0
|
pos($$dataPt) = pos($$dataPt) - 1; |
|
1076
|
|
|
|
|
|
|
} |
|
1077
|
2298
|
|
|
|
|
2446
|
shift @delim; # remove from nesting list |
|
1078
|
2298
|
100
|
|
|
|
3566
|
next if $delim[0]; # keep going if we have more nested delimiters |
|
1079
|
1228
|
|
|
|
|
1449
|
my $pos = pos($$dataPt); |
|
1080
|
1228
|
|
|
|
|
1924
|
my $buff = substr($$dataPt, 0, $pos); |
|
1081
|
1228
|
|
|
|
|
1930
|
$$dataPt = substr($$dataPt, $pos); |
|
1082
|
1228
|
|
|
|
|
2278
|
return $buff; # success! |
|
1083
|
|
|
|
|
|
|
} |
|
1084
|
0
|
|
|
|
|
0
|
return undef; # didn't find matching delimiter |
|
1085
|
|
|
|
|
|
|
} |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1088
|
|
|
|
|
|
|
# Decode LZW-encoded data (ref 1) |
|
1089
|
|
|
|
|
|
|
# Inputs: 0) data reference |
|
1090
|
|
|
|
|
|
|
# Returns: true on success and data is decoded, or false and data is untouched |
|
1091
|
|
|
|
|
|
|
sub DecodeLZW($) |
|
1092
|
|
|
|
|
|
|
{ |
|
1093
|
0
|
|
|
0
|
0
|
0
|
my $dataPt = shift; |
|
1094
|
0
|
0
|
|
|
|
0
|
return 0 if length $$dataPt < 4; |
|
1095
|
0
|
|
|
|
|
0
|
my @lzw = (map(chr, 0..255), undef, undef); # LZW code table |
|
1096
|
0
|
|
|
|
|
0
|
my $mask = 0x01ff; # mask for least-significant 9 bits |
|
1097
|
0
|
|
|
|
|
0
|
my @dat = unpack 'n*', $$dataPt . "\0"; |
|
1098
|
0
|
|
|
|
|
0
|
my $word = ($dat[0] << 16) | $dat[1]; |
|
1099
|
0
|
|
|
|
|
0
|
my ($bit, $pos, $bits, $out) = (0, 2, 9, ''); |
|
1100
|
0
|
|
|
|
|
0
|
my $lastVal; |
|
1101
|
0
|
|
|
|
|
0
|
for (;;) { |
|
1102
|
|
|
|
|
|
|
# bits are packed MSB first in PDF LZW (the PDF spec doesn't mention this) |
|
1103
|
0
|
|
|
|
|
0
|
my $shift = 32 - ($bit + $bits); |
|
1104
|
0
|
0
|
|
|
|
0
|
if ($shift < 0) { |
|
1105
|
0
|
0
|
|
|
|
0
|
return 0 if $pos >= @dat; # missing EOD marker |
|
1106
|
0
|
|
|
|
|
0
|
$word = (($word & 0xffff) << 16) | $dat[$pos++]; # read next word |
|
1107
|
0
|
|
|
|
|
0
|
$bit -= 16; |
|
1108
|
0
|
|
|
|
|
0
|
$shift += 16; |
|
1109
|
|
|
|
|
|
|
}; |
|
1110
|
0
|
|
|
|
|
0
|
my $code = ($word >> $shift) & $mask; |
|
1111
|
0
|
|
|
|
|
0
|
$bit += $bits; |
|
1112
|
0
|
|
|
|
|
0
|
my $val = $lzw[$code]; |
|
1113
|
0
|
0
|
|
|
|
0
|
if (defined $val) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# store new code as previous sequence plus 1st char of new sequence |
|
1115
|
0
|
0
|
|
|
|
0
|
push @lzw, $lastVal . substr($val, 0, 1) if defined $lastVal; |
|
1116
|
|
|
|
|
|
|
} elsif ($code == @lzw) { # new code |
|
1117
|
0
|
0
|
|
|
|
0
|
return 0 unless defined $lastVal; |
|
1118
|
|
|
|
|
|
|
# we are using the code that we are about to generate, so the last |
|
1119
|
|
|
|
|
|
|
# character in the new sequence must be the same as the first |
|
1120
|
|
|
|
|
|
|
# character in the previous sequence (makes sense if you think about it) |
|
1121
|
0
|
|
|
|
|
0
|
$val = $lastVal . substr($lastVal, 0, 1); |
|
1122
|
0
|
|
|
|
|
0
|
push @lzw, $val; |
|
1123
|
|
|
|
|
|
|
} elsif ($code == 256) { # clear table |
|
1124
|
0
|
|
|
|
|
0
|
splice @lzw, 258; |
|
1125
|
0
|
|
|
|
|
0
|
$bits = 9; |
|
1126
|
0
|
|
|
|
|
0
|
$mask = 0x1ff; |
|
1127
|
0
|
|
|
|
|
0
|
undef $lastVal; |
|
1128
|
0
|
|
|
|
|
0
|
next; |
|
1129
|
|
|
|
|
|
|
} elsif ($code == 257) { # EOD marker |
|
1130
|
0
|
|
|
|
|
0
|
last; # all done! |
|
1131
|
|
|
|
|
|
|
} else { |
|
1132
|
0
|
|
|
|
|
0
|
return 0; |
|
1133
|
|
|
|
|
|
|
} |
|
1134
|
0
|
|
|
|
|
0
|
$out .= $val; # add this byte sequence to the output |
|
1135
|
|
|
|
|
|
|
# we added a new entry to the LZW table, so we must increase |
|
1136
|
|
|
|
|
|
|
# the bit width if necessary, up to a maximum of 12 |
|
1137
|
0
|
0
|
0
|
|
|
0
|
@lzw >= $mask and $bits < 12 and ++$bits, $mask |= $mask << 1; |
|
1138
|
0
|
|
|
|
|
0
|
$lastVal = $val; |
|
1139
|
|
|
|
|
|
|
} |
|
1140
|
0
|
|
|
|
|
0
|
$$dataPt = $out; # return decompressed data |
|
1141
|
0
|
|
|
|
|
0
|
return 1; |
|
1142
|
|
|
|
|
|
|
} |
|
1143
|
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1145
|
|
|
|
|
|
|
# Decode filtered stream |
|
1146
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dictionary reference |
|
1147
|
|
|
|
|
|
|
# Returns: true if stream has been decoded OK |
|
1148
|
|
|
|
|
|
|
sub DecodeStream($$) |
|
1149
|
|
|
|
|
|
|
{ |
|
1150
|
43
|
|
|
43
|
0
|
53
|
local $_; |
|
1151
|
43
|
|
|
|
|
68
|
my ($et, $dict) = @_; |
|
1152
|
|
|
|
|
|
|
|
|
1153
|
43
|
50
|
|
|
|
77
|
return 0 unless $$dict{_stream}; # no stream to decode |
|
1154
|
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
# get list of filters |
|
1156
|
43
|
|
|
|
|
70
|
my (@filters, @decodeParms, $filter); |
|
1157
|
43
|
50
|
|
|
|
112
|
if (ref $$dict{Filter} eq 'ARRAY') { |
|
|
|
50
|
|
|
|
|
|
|
1158
|
0
|
|
|
|
|
0
|
@filters = @{$$dict{Filter}}; |
|
|
0
|
|
|
|
|
0
|
|
|
1159
|
|
|
|
|
|
|
} elsif (defined $$dict{Filter}) { |
|
1160
|
0
|
|
|
|
|
0
|
@filters = ($$dict{Filter}); |
|
1161
|
|
|
|
|
|
|
} |
|
1162
|
|
|
|
|
|
|
# be sure we can process all the filters before we take the time to do the decryption |
|
1163
|
43
|
|
|
|
|
72
|
foreach $filter (@filters) { |
|
1164
|
0
|
0
|
|
|
|
0
|
next if $supportedFilter{$filter}; |
|
1165
|
0
|
|
|
|
|
0
|
$et->WarnOnce("Unsupported Filter $filter"); |
|
1166
|
0
|
|
|
|
|
0
|
return 0; |
|
1167
|
|
|
|
|
|
|
} |
|
1168
|
|
|
|
|
|
|
# apply decryption first if required (and if the default encryption |
|
1169
|
|
|
|
|
|
|
# has not been overridden by a Crypt filter. Note: the Crypt filter |
|
1170
|
|
|
|
|
|
|
# must be first in the Filter array: ref 3, page 38) |
|
1171
|
43
|
50
|
33
|
|
|
134
|
unless (defined $$dict{_decrypted} or ($filters[0] and $filters[0] eq '/Crypt')) { |
|
|
|
|
33
|
|
|
|
|
|
1172
|
43
|
|
|
|
|
79
|
CryptStream($dict, $lastFetched); |
|
1173
|
|
|
|
|
|
|
} |
|
1174
|
43
|
50
|
|
|
|
123
|
return 1 unless $$dict{Filter}; # Filter entry is mandatory |
|
1175
|
0
|
0
|
|
|
|
0
|
return 0 if defined $$dict{_filtered}; # avoid double-filtering |
|
1176
|
0
|
|
|
|
|
0
|
$$dict{_filtered} = 1; # set flag to prevent double-filtering |
|
1177
|
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
# get array of DecodeParms dictionaries |
|
1179
|
0
|
0
|
|
|
|
0
|
if (ref $$dict{DecodeParms} eq 'ARRAY') { |
|
1180
|
0
|
|
|
|
|
0
|
@decodeParms = @{$$dict{DecodeParms}}; |
|
|
0
|
|
|
|
|
0
|
|
|
1181
|
|
|
|
|
|
|
} else { |
|
1182
|
0
|
|
|
|
|
0
|
@decodeParms = ($$dict{DecodeParms}); |
|
1183
|
|
|
|
|
|
|
} |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
0
|
|
|
|
|
0
|
foreach $filter (@filters) { |
|
1186
|
0
|
|
|
|
|
0
|
my $decodeParms = shift @decodeParms; |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
0
|
0
|
|
|
|
0
|
if ($filter eq '/FlateDecode') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
# make sure we support the predictor (if used) before decoding |
|
1190
|
0
|
|
|
|
|
0
|
my $pre; |
|
1191
|
0
|
0
|
|
|
|
0
|
if (ref $decodeParms eq 'HASH') { |
|
1192
|
0
|
|
|
|
|
0
|
$pre = $$decodeParms{Predictor}; |
|
1193
|
0
|
0
|
0
|
|
|
0
|
if ($pre and $pre ne '1' and $pre ne '12') { |
|
|
|
|
0
|
|
|
|
|
|
1194
|
0
|
|
|
|
|
0
|
$et->WarnOnce("FlateDecode Predictor $pre currently not supported"); |
|
1195
|
0
|
|
|
|
|
0
|
return 0; |
|
1196
|
|
|
|
|
|
|
} |
|
1197
|
|
|
|
|
|
|
} |
|
1198
|
0
|
0
|
|
|
|
0
|
if (eval { require Compress::Zlib }) { |
|
|
0
|
|
|
|
|
0
|
|
|
1199
|
0
|
|
|
|
|
0
|
my $inflate = Compress::Zlib::inflateInit(); |
|
1200
|
0
|
|
|
|
|
0
|
my ($buff, $stat); |
|
1201
|
0
|
0
|
|
|
|
0
|
$inflate and ($buff, $stat) = $inflate->inflate($$dict{_stream}); |
|
1202
|
0
|
0
|
0
|
|
|
0
|
if ($inflate and $stat == Compress::Zlib::Z_STREAM_END()) { |
|
1203
|
0
|
|
|
|
|
0
|
$$dict{_stream} = $buff; |
|
1204
|
|
|
|
|
|
|
} else { |
|
1205
|
0
|
|
|
|
|
0
|
$et->Warn('Error inflating stream'); |
|
1206
|
0
|
|
|
|
|
0
|
return 0; |
|
1207
|
|
|
|
|
|
|
} |
|
1208
|
|
|
|
|
|
|
} else { |
|
1209
|
0
|
|
|
|
|
0
|
$et->WarnOnce('Install Compress::Zlib to process filtered streams'); |
|
1210
|
0
|
|
|
|
|
0
|
return 0; |
|
1211
|
|
|
|
|
|
|
} |
|
1212
|
0
|
0
|
0
|
|
|
0
|
next unless $pre and $pre eq '12'; # 12 = 'up' prediction |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# apply anti-predictor |
|
1215
|
0
|
|
|
|
|
0
|
my $cols = $$decodeParms{Columns}; |
|
1216
|
0
|
0
|
|
|
|
0
|
unless ($cols) { |
|
1217
|
|
|
|
|
|
|
# currently only support 'up' prediction |
|
1218
|
0
|
|
|
|
|
0
|
$et->WarnOnce('No Columns for decoding stream'); |
|
1219
|
0
|
|
|
|
|
0
|
return 0; |
|
1220
|
|
|
|
|
|
|
} |
|
1221
|
0
|
|
|
|
|
0
|
my @bytes = unpack('C*', $$dict{_stream}); |
|
1222
|
0
|
|
|
|
|
0
|
my @pre = (0) x $cols; # initialize predictor array |
|
1223
|
0
|
|
|
|
|
0
|
my $buff = ''; |
|
1224
|
0
|
|
|
|
|
0
|
while (@bytes > $cols) { |
|
1225
|
0
|
0
|
|
|
|
0
|
unless (($_ = shift @bytes) == 2) { |
|
1226
|
0
|
|
|
|
|
0
|
$et->WarnOnce("Unsupported PNG filter $_"); # (yes, PNG) |
|
1227
|
0
|
|
|
|
|
0
|
return 0; |
|
1228
|
|
|
|
|
|
|
} |
|
1229
|
0
|
|
|
|
|
0
|
foreach (@pre) { |
|
1230
|
0
|
|
|
|
|
0
|
$_ = ($_ + shift(@bytes)) & 0xff; |
|
1231
|
|
|
|
|
|
|
} |
|
1232
|
0
|
|
|
|
|
0
|
$buff .= pack('C*', @pre); |
|
1233
|
|
|
|
|
|
|
} |
|
1234
|
0
|
|
|
|
|
0
|
$$dict{_stream} = $buff; |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
} elsif ($filter eq '/Crypt') { |
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
# (we shouldn't have to check the _decrypted flag since we |
|
1239
|
|
|
|
|
|
|
# already checked the _filtered flag, but what the heck...) |
|
1240
|
0
|
0
|
|
|
|
0
|
next if defined $$dict{_decrypted}; |
|
1241
|
|
|
|
|
|
|
# assume Identity filter (the default) if DecodeParms are missing |
|
1242
|
0
|
0
|
|
|
|
0
|
next unless ref $decodeParms eq 'HASH'; |
|
1243
|
0
|
|
|
|
|
0
|
my $name = $$decodeParms{Name}; |
|
1244
|
0
|
0
|
0
|
|
|
0
|
next unless defined $name or $name eq 'Identity'; |
|
1245
|
0
|
0
|
|
|
|
0
|
if ($name ne 'StdCF') { |
|
1246
|
0
|
|
|
|
|
0
|
$et->WarnOnce("Unsupported Crypt Filter $name"); |
|
1247
|
0
|
|
|
|
|
0
|
return 0; |
|
1248
|
|
|
|
|
|
|
} |
|
1249
|
0
|
0
|
|
|
|
0
|
unless ($cryptInfo) { |
|
1250
|
0
|
|
|
|
|
0
|
$et->WarnOnce('Missing Encrypt StdCF entry'); |
|
1251
|
0
|
|
|
|
|
0
|
return 0; |
|
1252
|
|
|
|
|
|
|
} |
|
1253
|
|
|
|
|
|
|
# decrypt the stream manually because we want to: |
|
1254
|
|
|
|
|
|
|
# 1) ignore $cryptStream (StmF) setting |
|
1255
|
|
|
|
|
|
|
# 2) ignore EncryptMetadata setting (I can't find mention of how to |
|
1256
|
|
|
|
|
|
|
# reconcile this in the spec., but this would make sense) |
|
1257
|
|
|
|
|
|
|
# 3) avoid adding the crypt key extension (ref 3, page 58, Algorithm 1b) |
|
1258
|
|
|
|
|
|
|
# 4) set _decrypted flag so we will recrypt according to StmF when |
|
1259
|
|
|
|
|
|
|
# writing (since we don't yet write Filter'd streams) |
|
1260
|
0
|
|
|
|
|
0
|
Crypt(\$$dict{_stream}, 'none'); |
|
1261
|
0
|
0
|
|
|
|
0
|
$$dict{_decrypted} = ($cryptStream ? 1 : 0); |
|
1262
|
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
} elsif ($filter eq '/LZWDecode') { |
|
1264
|
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
# make sure we don't have any unsupported decoding parameters |
|
1266
|
0
|
0
|
|
|
|
0
|
if (ref $decodeParms eq 'HASH') { |
|
1267
|
0
|
0
|
|
|
|
0
|
if ($$decodeParms{Predictor}) { |
|
|
|
0
|
|
|
|
|
|
|
1268
|
0
|
|
|
|
|
0
|
$et->WarnOnce("LZWDecode Predictor $$decodeParms{Predictor} currently not supported"); |
|
1269
|
0
|
|
|
|
|
0
|
return 0; |
|
1270
|
|
|
|
|
|
|
} elsif ($$decodeParms{EarlyChange}) { |
|
1271
|
0
|
|
|
|
|
0
|
$et->WarnOnce("LZWDecode EarlyChange currently not supported"); |
|
1272
|
0
|
|
|
|
|
0
|
return 0; |
|
1273
|
|
|
|
|
|
|
} |
|
1274
|
|
|
|
|
|
|
} |
|
1275
|
0
|
0
|
|
|
|
0
|
unless (DecodeLZW(\$$dict{_stream})) { |
|
1276
|
0
|
|
|
|
|
0
|
$et->WarnOnce('LZW decompress error'); |
|
1277
|
0
|
|
|
|
|
0
|
return 0; |
|
1278
|
|
|
|
|
|
|
} |
|
1279
|
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
} elsif ($filter eq '/ASCIIHexDecode') { |
|
1281
|
|
|
|
|
|
|
|
|
1282
|
0
|
|
|
|
|
0
|
$$dict{_stream} =~ s/>.*//; # truncate at '>' (end of data mark) |
|
1283
|
0
|
|
|
|
|
0
|
$$dict{_stream} =~ tr/0-9a-zA-Z//d; # remove illegal characters |
|
1284
|
0
|
|
|
|
|
0
|
$$dict{_stream} = pack 'H*', $$dict{_stream}; |
|
1285
|
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
} elsif ($filter eq '/ASCII85Decode') { |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
0
|
|
|
|
|
0
|
my ($err, @out, $i); |
|
1289
|
0
|
|
|
|
|
0
|
my ($n, $val) = (0, 0); |
|
1290
|
0
|
|
|
|
|
0
|
foreach (split //, $$dict{_stream}) { |
|
1291
|
0
|
0
|
0
|
|
|
0
|
if ($_ ge '!' and $_ le 'u') {; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1292
|
0
|
|
|
|
|
0
|
$val = 85 * $val + ord($_) - 33; |
|
1293
|
0
|
0
|
|
|
|
0
|
next unless ++$n == 5; |
|
1294
|
|
|
|
|
|
|
} elsif ($_ eq '~') { |
|
1295
|
0
|
0
|
|
|
|
0
|
$n == 1 and $err = 1; # error to have a single char in the last group of 5 |
|
1296
|
0
|
|
|
|
|
0
|
for ($i=$n; $i<5; ++$i) { $val *= 85; } |
|
|
0
|
|
|
|
|
0
|
|
|
1297
|
|
|
|
|
|
|
} elsif ($_ eq 'z') { |
|
1298
|
0
|
0
|
|
|
|
0
|
$n and $err = 2, last; # error if 'z' isn't the first char |
|
1299
|
0
|
|
|
|
|
0
|
$n = 5; |
|
1300
|
|
|
|
|
|
|
} else { |
|
1301
|
0
|
0
|
|
|
|
0
|
next if /^\s$/; # ignore white space |
|
1302
|
0
|
|
|
|
|
0
|
$err = 3, last; # any other character is an error |
|
1303
|
|
|
|
|
|
|
} |
|
1304
|
0
|
|
|
|
|
0
|
$val = unpack('V', pack('N', $val)); # reverse byte order |
|
1305
|
0
|
|
|
|
|
0
|
while (--$n > 0) { |
|
1306
|
0
|
|
|
|
|
0
|
push @out, $val & 0xff; |
|
1307
|
0
|
|
|
|
|
0
|
$val >>= 8; |
|
1308
|
|
|
|
|
|
|
} |
|
1309
|
0
|
0
|
|
|
|
0
|
last if $_ eq '~'; |
|
1310
|
|
|
|
|
|
|
# (both $n and $val are zero again now) |
|
1311
|
|
|
|
|
|
|
} |
|
1312
|
0
|
0
|
|
|
|
0
|
$err and $et->WarnOnce("ASCII85Decode error $err"); |
|
1313
|
0
|
|
|
|
|
0
|
$$dict{_stream} = pack('C*', @out); |
|
1314
|
|
|
|
|
|
|
} |
|
1315
|
|
|
|
|
|
|
} |
|
1316
|
0
|
|
|
|
|
0
|
return 1; |
|
1317
|
|
|
|
|
|
|
} |
|
1318
|
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1320
|
|
|
|
|
|
|
# Initialize state for RC4 en/decryption (ref 2) |
|
1321
|
|
|
|
|
|
|
# Inputs: 0) RC4 key string |
|
1322
|
|
|
|
|
|
|
# Returns: RC4 key hash reference |
|
1323
|
|
|
|
|
|
|
sub RC4Init($) |
|
1324
|
|
|
|
|
|
|
{ |
|
1325
|
22
|
|
|
22
|
0
|
49
|
my @key = unpack('C*', shift); |
|
1326
|
22
|
|
|
|
|
172
|
my @state = (0 .. 255); |
|
1327
|
22
|
|
|
|
|
31
|
my ($i, $j) = (0, 0); |
|
1328
|
22
|
|
|
|
|
38
|
while ($i < 256) { |
|
1329
|
5632
|
|
|
|
|
5874
|
my $st = $state[$i]; |
|
1330
|
5632
|
|
|
|
|
6635
|
$j = ($j + $st + $key[$i % scalar(@key)]) & 0xff; |
|
1331
|
5632
|
|
|
|
|
6082
|
$state[$i++] = $state[$j]; |
|
1332
|
5632
|
|
|
|
|
7473
|
$state[$j] = $st; |
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
22
|
|
|
|
|
80
|
return { State => \@state, XY => [ 0, 0 ] }; |
|
1335
|
|
|
|
|
|
|
} |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1338
|
|
|
|
|
|
|
# Apply RC4 en/decryption (ref 2) |
|
1339
|
|
|
|
|
|
|
# Inputs: 0) data reference, 1) RC4 key hash reference or RC4 key string |
|
1340
|
|
|
|
|
|
|
# - can call this method directly with a key string, or with with the key |
|
1341
|
|
|
|
|
|
|
# reference returned by RC4Init |
|
1342
|
|
|
|
|
|
|
# - RC4 is a symmetric algorithm, so encryption is the same as decryption |
|
1343
|
|
|
|
|
|
|
sub RC4Crypt($$) |
|
1344
|
|
|
|
|
|
|
{ |
|
1345
|
22
|
|
|
22
|
0
|
39
|
my ($dataPt, $key) = @_; |
|
1346
|
22
|
50
|
|
|
|
46
|
$key = RC4Init($key) unless ref $key eq 'HASH'; |
|
1347
|
22
|
|
|
|
|
34
|
my $state = $$key{State}; |
|
1348
|
22
|
|
|
|
|
26
|
my ($x, $y) = @{$$key{XY}}; |
|
|
22
|
|
|
|
|
33
|
|
|
1349
|
|
|
|
|
|
|
|
|
1350
|
22
|
|
|
|
|
59
|
my @data = unpack('C*', $$dataPt); |
|
1351
|
22
|
|
|
|
|
35
|
foreach (@data) { |
|
1352
|
356
|
|
|
|
|
373
|
$x = ($x + 1) & 0xff; |
|
1353
|
356
|
|
|
|
|
365
|
my $stx = $$state[$x]; |
|
1354
|
356
|
|
|
|
|
367
|
$y = ($stx + $y) & 0xff; |
|
1355
|
356
|
|
|
|
|
383
|
my $sty = $$state[$x] = $$state[$y]; |
|
1356
|
356
|
|
|
|
|
357
|
$$state[$y] = $stx; |
|
1357
|
356
|
|
|
|
|
440
|
$_ ^= $$state[($stx + $sty) & 0xff]; |
|
1358
|
|
|
|
|
|
|
} |
|
1359
|
22
|
|
|
|
|
44
|
$$key{XY} = [ $x, $y ]; |
|
1360
|
22
|
|
|
|
|
155
|
$$dataPt = pack('C*', @data); |
|
1361
|
|
|
|
|
|
|
} |
|
1362
|
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1364
|
|
|
|
|
|
|
# Update AES cipher with a bit of data |
|
1365
|
|
|
|
|
|
|
# Inputs: 0) data |
|
1366
|
|
|
|
|
|
|
# Returns: encrypted data |
|
1367
|
|
|
|
|
|
|
my $cipherMore; |
|
1368
|
|
|
|
|
|
|
sub CipherUpdate($) |
|
1369
|
|
|
|
|
|
|
{ |
|
1370
|
0
|
|
|
0
|
0
|
0
|
my $dat = shift; |
|
1371
|
0
|
|
|
|
|
0
|
my $pos = 0; |
|
1372
|
0
|
0
|
|
|
|
0
|
$dat = $cipherMore . $dat if length $dat; |
|
1373
|
0
|
|
|
|
|
0
|
while ($pos + 16 <= length($dat)) { |
|
1374
|
0
|
|
|
|
|
0
|
substr($dat,$pos,16) = Image::ExifTool::AES::Cipher(substr($dat,$pos,16)); |
|
1375
|
0
|
|
|
|
|
0
|
$pos += 16; |
|
1376
|
|
|
|
|
|
|
} |
|
1377
|
0
|
0
|
|
|
|
0
|
if ($pos < length $dat) { |
|
1378
|
0
|
|
|
|
|
0
|
$cipherMore = substr($dat,$pos); |
|
1379
|
0
|
|
|
|
|
0
|
$dat = substr($dat,0,$pos); |
|
1380
|
|
|
|
|
|
|
} else { |
|
1381
|
0
|
|
|
|
|
0
|
$cipherMore = ''; |
|
1382
|
|
|
|
|
|
|
} |
|
1383
|
0
|
|
|
|
|
0
|
return $dat; |
|
1384
|
|
|
|
|
|
|
} |
|
1385
|
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1387
|
|
|
|
|
|
|
# Get encrypted hash |
|
1388
|
|
|
|
|
|
|
# Inputs: 0) Password, 1) salt, 2) vector, 3) encryption revision |
|
1389
|
|
|
|
|
|
|
# Returns: hash |
|
1390
|
|
|
|
|
|
|
sub GetHash($$$$) |
|
1391
|
|
|
|
|
|
|
{ |
|
1392
|
6
|
|
|
6
|
0
|
20
|
my ($password, $salt, $vector, $rev) = @_; |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
# return Rev 5 hash |
|
1395
|
6
|
50
|
|
|
|
46
|
return Digest::SHA::sha256($password, $salt, $vector) if $rev == 5; |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
# compute Rev 6 hardened hash |
|
1398
|
|
|
|
|
|
|
# (ref http://code.google.com/p/origami-pdf/source/browse/lib/origami/encryption.rb) |
|
1399
|
0
|
|
|
|
|
0
|
my $blockSize = 32; |
|
1400
|
0
|
|
|
|
|
0
|
my $input = Digest::SHA::sha256($password, $salt, $vector) . ("\0" x 32); |
|
1401
|
0
|
|
|
|
|
0
|
my $key = substr($input, 0, 16); |
|
1402
|
0
|
|
|
|
|
0
|
my $iv = substr($input, 16, 16); |
|
1403
|
0
|
|
|
|
|
0
|
my $h; |
|
1404
|
0
|
|
|
|
|
0
|
my $x = ''; |
|
1405
|
0
|
|
|
|
|
0
|
my $i = 0; |
|
1406
|
0
|
|
0
|
|
|
0
|
while ($i < 64 or $i < ord(substr($x,-1,1))+32) { |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
0
|
|
|
|
|
0
|
my $block = substr($input, 0, $blockSize); |
|
1409
|
0
|
|
|
|
|
0
|
$x = ''; |
|
1410
|
0
|
|
|
|
|
0
|
Image::ExifTool::AES::Crypt(\$x, $key, $iv, 1); |
|
1411
|
0
|
|
|
|
|
0
|
$cipherMore = ''; |
|
1412
|
|
|
|
|
|
|
|
|
1413
|
0
|
|
|
|
|
0
|
my ($j, $digest); |
|
1414
|
0
|
|
|
|
|
0
|
for ($j=0; $j<64; ++$j) { |
|
1415
|
0
|
|
|
|
|
0
|
$x = ''; |
|
1416
|
0
|
0
|
|
|
|
0
|
$x .= CipherUpdate($password) if length $password; |
|
1417
|
0
|
|
|
|
|
0
|
$x .= CipherUpdate($block); |
|
1418
|
0
|
0
|
|
|
|
0
|
$x .= CipherUpdate($vector) if length $vector; |
|
1419
|
0
|
0
|
|
|
|
0
|
if ($j == 0) { |
|
1420
|
0
|
|
|
|
|
0
|
my @a = unpack('C16', $x); |
|
1421
|
0
|
|
|
|
|
0
|
my $sum = 0; |
|
1422
|
0
|
|
|
|
|
0
|
$sum += $_ foreach @a; |
|
1423
|
|
|
|
|
|
|
# set SHA block size (32, 48 or 64 bytes = SHA-256, 384 or 512) |
|
1424
|
0
|
|
|
|
|
0
|
$blockSize = 32 + ($sum % 3) * 16; |
|
1425
|
0
|
|
|
|
|
0
|
$digest = Digest::SHA->new($blockSize * 8); |
|
1426
|
|
|
|
|
|
|
} |
|
1427
|
0
|
|
|
|
|
0
|
$digest->add($x); |
|
1428
|
|
|
|
|
|
|
} |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
0
|
|
|
|
|
0
|
$h = $digest->digest(); |
|
1431
|
0
|
|
|
|
|
0
|
$key = substr($h, 0, 16); |
|
1432
|
0
|
|
|
|
|
0
|
substr($input,0,16) = $h; |
|
1433
|
0
|
|
|
|
|
0
|
$iv = substr($h, 16, 16); |
|
1434
|
0
|
|
|
|
|
0
|
++$i; |
|
1435
|
|
|
|
|
|
|
} |
|
1436
|
0
|
|
|
|
|
0
|
return substr($h, 0, 32); |
|
1437
|
|
|
|
|
|
|
} |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1440
|
|
|
|
|
|
|
# Initialize decryption |
|
1441
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) Encrypt dictionary reference, |
|
1442
|
|
|
|
|
|
|
# 2) ID from file trailer dictionary |
|
1443
|
|
|
|
|
|
|
# Returns: error string or undef on success (and sets $cryptInfo) |
|
1444
|
|
|
|
|
|
|
sub DecryptInit($$$) |
|
1445
|
|
|
|
|
|
|
{ |
|
1446
|
4
|
|
|
4
|
0
|
22
|
local $_; |
|
1447
|
4
|
|
|
|
|
10
|
my ($et, $encrypt, $id) = @_; |
|
1448
|
|
|
|
|
|
|
|
|
1449
|
4
|
|
|
|
|
5
|
undef $cryptInfo; |
|
1450
|
4
|
50
|
33
|
|
|
19
|
unless ($encrypt and ref $encrypt eq 'HASH') { |
|
1451
|
0
|
|
|
|
|
0
|
return 'Error loading Encrypt object'; |
|
1452
|
|
|
|
|
|
|
} |
|
1453
|
4
|
|
|
|
|
7
|
my $filt = $$encrypt{Filter}; |
|
1454
|
4
|
50
|
33
|
|
|
28
|
unless ($filt and $filt =~ s/^\///) { |
|
1455
|
0
|
|
|
|
|
0
|
return 'Encrypt dictionary has no Filter!'; |
|
1456
|
|
|
|
|
|
|
} |
|
1457
|
|
|
|
|
|
|
# extract some interesting tags |
|
1458
|
4
|
|
50
|
|
|
12
|
my $ver = $$encrypt{V} || 0; |
|
1459
|
4
|
|
100
|
|
|
14
|
my $rev = $$encrypt{R} || 0; |
|
1460
|
4
|
|
|
|
|
10
|
my $enc = "$filt V$ver"; |
|
1461
|
4
|
50
|
|
|
|
14
|
$enc .= ".$rev" if $filt eq 'Standard'; |
|
1462
|
4
|
50
|
33
|
|
|
12
|
$enc .= " ($1)" if $$encrypt{SubFilter} and $$encrypt{SubFilter} =~ /^\/(.*)/; |
|
1463
|
4
|
50
|
100
|
|
|
18
|
$enc .= ' (' . ($$encrypt{Length} || 40) . '-bit)' if $filt eq 'Standard'; |
|
1464
|
4
|
|
|
|
|
11
|
my $tagTablePtr = GetTagTable('Image::ExifTool::PDF::Encrypt'); |
|
1465
|
4
|
|
|
|
|
16
|
$et->HandleTag($tagTablePtr, 'Filter', $enc); |
|
1466
|
4
|
50
|
|
|
|
16
|
if ($filt ne 'Standard') { |
|
|
|
50
|
|
|
|
|
|
|
1467
|
0
|
|
|
|
|
0
|
return "Encryption filter $filt currently not supported"; |
|
1468
|
|
|
|
|
|
|
} elsif (not defined $$encrypt{R}) { |
|
1469
|
0
|
|
|
|
|
0
|
return 'Standard security handler missing revision'; |
|
1470
|
|
|
|
|
|
|
} |
|
1471
|
4
|
50
|
33
|
|
|
26
|
unless ($$encrypt{O} and $$encrypt{P} and $$encrypt{U}) { |
|
|
|
|
33
|
|
|
|
|
|
1472
|
0
|
|
|
|
|
0
|
return 'Incomplete Encrypt specification'; |
|
1473
|
|
|
|
|
|
|
} |
|
1474
|
4
|
50
|
|
|
|
19
|
if ("$ver.$rev" >= 5.6) { |
|
1475
|
|
|
|
|
|
|
# apologize for poor performance (AES is a pure Perl implementation) |
|
1476
|
0
|
|
|
|
|
0
|
$et->Warn('Decryption is very slow for encryption V5.6 or higher', 3); |
|
1477
|
|
|
|
|
|
|
} |
|
1478
|
4
|
|
|
|
|
15
|
$et->HandleTag($tagTablePtr, 'P', $$encrypt{P}); |
|
1479
|
|
|
|
|
|
|
|
|
1480
|
4
|
|
|
|
|
7
|
my %parm; # optional parameters extracted from Encrypt dictionary |
|
1481
|
|
|
|
|
|
|
|
|
1482
|
4
|
100
|
66
|
|
|
25
|
if ($ver == 1 or $ver == 2) { |
|
|
|
50
|
66
|
|
|
|
|
|
1483
|
1
|
|
|
|
|
3
|
$cryptString = $cryptStream = 1; |
|
1484
|
|
|
|
|
|
|
} elsif ($ver == 4 or $ver == 5) { |
|
1485
|
|
|
|
|
|
|
# initialize our $cryptString and $cryptStream flags |
|
1486
|
3
|
|
|
|
|
4
|
foreach ('StrF', 'StmF') { |
|
1487
|
6
|
100
|
|
|
|
13
|
my $flagPt = $_ eq 'StrF' ? \$cryptString : \$cryptStream; |
|
1488
|
6
|
|
|
|
|
12
|
$$flagPt = $$encrypt{$_}; |
|
1489
|
6
|
50
|
33
|
|
|
18
|
undef $$flagPt if $$flagPt and $$flagPt eq '/Identity'; |
|
1490
|
6
|
50
|
33
|
|
|
20
|
return "Unsupported $_ encryption $$flagPt" if $$flagPt and $$flagPt ne '/StdCF'; |
|
1491
|
|
|
|
|
|
|
} |
|
1492
|
3
|
50
|
33
|
|
|
8
|
if ($cryptString or $cryptStream) { |
|
1493
|
|
|
|
|
|
|
return 'Missing or invalid Encrypt StdCF entry' unless ref $$encrypt{CF} eq 'HASH' and |
|
1494
|
3
|
50
|
33
|
|
|
18
|
ref $$encrypt{CF}{StdCF} eq 'HASH' and $$encrypt{CF}{StdCF}{CFM}; |
|
|
|
|
33
|
|
|
|
|
|
1495
|
3
|
|
|
|
|
7
|
my $cryptMeth = $$encrypt{CF}{StdCF}{CFM}; |
|
1496
|
3
|
50
|
|
|
|
15
|
unless ($cryptMeth =~ /^\/(V2|AESV2|AESV3)$/) { |
|
1497
|
0
|
|
|
|
|
0
|
return "Unsupported encryption method $cryptMeth"; |
|
1498
|
|
|
|
|
|
|
} |
|
1499
|
|
|
|
|
|
|
# set "_aesv2" or "_aesv3" flag in %$encrypt hash if AES encryption was used |
|
1500
|
3
|
50
|
|
|
|
18
|
$$encrypt{'_' . lc($1)} = 1 if $cryptMeth =~ /^\/(AESV2|AESV3)$/; |
|
1501
|
|
|
|
|
|
|
} |
|
1502
|
3
|
100
|
|
|
|
9
|
if ($ver == 5) { |
|
1503
|
|
|
|
|
|
|
# validate OE and UE entries |
|
1504
|
2
|
|
|
|
|
5
|
foreach ('OE', 'UE') { |
|
1505
|
4
|
50
|
|
|
|
9
|
return "Missing Encrypt $_ entry" unless $$encrypt{$_}; |
|
1506
|
4
|
|
|
|
|
9
|
$parm{$_} = ReadPDFValue($$encrypt{$_}); |
|
1507
|
4
|
50
|
|
|
|
11
|
return "Invalid Encrypt $_ entry" unless length $parm{$_} == 32; |
|
1508
|
|
|
|
|
|
|
} |
|
1509
|
2
|
|
|
|
|
10
|
require Image::ExifTool::AES; # will need this later |
|
1510
|
|
|
|
|
|
|
} |
|
1511
|
|
|
|
|
|
|
} else { |
|
1512
|
0
|
|
|
|
|
0
|
return "Encryption version $ver currently not supported"; |
|
1513
|
|
|
|
|
|
|
} |
|
1514
|
4
|
50
|
|
|
|
8
|
$id or return "Can't decrypt (no document ID)"; |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
# make sure we have the necessary libraries available |
|
1517
|
4
|
100
|
|
|
|
12
|
if ($ver < 5) { |
|
1518
|
2
|
50
|
|
|
|
4
|
unless (eval { require Digest::MD5 }) { |
|
|
2
|
|
|
|
|
14
|
|
|
1519
|
0
|
|
|
|
|
0
|
return "Install Digest::MD5 to process encrypted PDF"; |
|
1520
|
|
|
|
|
|
|
} |
|
1521
|
|
|
|
|
|
|
} else { |
|
1522
|
2
|
50
|
|
|
|
4
|
unless (eval { require Digest::SHA }) { |
|
|
2
|
|
|
|
|
10
|
|
|
1523
|
0
|
|
|
|
|
0
|
return "Install Digest::SHA to process AES-256 encrypted PDF"; |
|
1524
|
|
|
|
|
|
|
} |
|
1525
|
|
|
|
|
|
|
} |
|
1526
|
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
# calculate file-level en/decryption key |
|
1528
|
4
|
|
|
|
|
8
|
my $pad = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08". |
|
1529
|
|
|
|
|
|
|
"\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A"; |
|
1530
|
4
|
|
|
|
|
10
|
my $o = ReadPDFValue($$encrypt{O}); |
|
1531
|
4
|
|
|
|
|
10
|
my $u = ReadPDFValue($$encrypt{U}); |
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
# set flag indicating whether metadata is encrypted |
|
1534
|
|
|
|
|
|
|
# (in version 4 and higher, metadata streams may not be encrypted) |
|
1535
|
4
|
100
|
100
|
|
|
25
|
if ($ver < 4 or not $$encrypt{EncryptMetadata} or $$encrypt{EncryptMetadata} !~ /false/i) { |
|
|
|
|
66
|
|
|
|
|
|
1536
|
3
|
|
|
|
|
7
|
$$encrypt{_meta} = 1; |
|
1537
|
|
|
|
|
|
|
} |
|
1538
|
|
|
|
|
|
|
# try no password first, then try provided password if available |
|
1539
|
4
|
|
|
|
|
6
|
my ($try, $key); |
|
1540
|
4
|
|
|
|
|
7
|
for ($try=0; ; ++$try) { |
|
1541
|
5
|
|
|
|
|
7
|
my $password; |
|
1542
|
5
|
100
|
|
|
|
12
|
if ($try == 0) { |
|
|
|
50
|
|
|
|
|
|
|
1543
|
4
|
|
|
|
|
8
|
$password = ''; |
|
1544
|
|
|
|
|
|
|
} elsif ($try == 1) { |
|
1545
|
1
|
|
|
|
|
4
|
$password = $et->Options('Password'); |
|
1546
|
1
|
50
|
|
|
|
5
|
return 'Document is password protected (use Password option)' unless defined $password; |
|
1547
|
|
|
|
|
|
|
# make sure there is no UTF-8 flag on the password |
|
1548
|
1
|
50
|
33
|
|
|
5
|
if ($] >= 5.006 and (eval { require Encode; Encode::is_utf8($password) } or $@)) { |
|
|
|
|
33
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
# repack by hand if Encode isn't available |
|
1550
|
0
|
0
|
|
|
|
0
|
$password = $@ ? pack('C*',unpack($] < 5.010000 ? 'U0C*' : 'C0C*',$password)) : Encode::encode('utf8',$password); |
|
|
|
0
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
} |
|
1552
|
|
|
|
|
|
|
} else { |
|
1553
|
0
|
|
|
|
|
0
|
return 'Incorrect password'; |
|
1554
|
|
|
|
|
|
|
} |
|
1555
|
5
|
100
|
|
|
|
8
|
if ($ver < 5) { |
|
1556
|
2
|
50
|
|
|
|
6
|
if (length $password) { |
|
1557
|
|
|
|
|
|
|
# password must be encoding in PDFDocEncoding (ref iso32000) |
|
1558
|
0
|
|
|
|
|
0
|
$password = $et->Encode($password, 'PDFDoc'); |
|
1559
|
|
|
|
|
|
|
# truncate or pad the password to exactly 32 bytes |
|
1560
|
0
|
0
|
|
|
|
0
|
if (length($password) > 32) { |
|
|
|
0
|
|
|
|
|
|
|
1561
|
0
|
|
|
|
|
0
|
$password = substr($password, 0, 32); |
|
1562
|
|
|
|
|
|
|
} elsif (length($password) < 32) { |
|
1563
|
0
|
|
|
|
|
0
|
$password .= substr($pad, 0, 32-length($password)); |
|
1564
|
|
|
|
|
|
|
} |
|
1565
|
|
|
|
|
|
|
} else { |
|
1566
|
2
|
|
|
|
|
4
|
$password = $pad; |
|
1567
|
|
|
|
|
|
|
} |
|
1568
|
2
|
|
|
|
|
10
|
$key = $password . $o . pack('V', $$encrypt{P}) . $id; |
|
1569
|
2
|
|
|
|
|
3
|
my $rep = 1; |
|
1570
|
2
|
100
|
66
|
|
|
12
|
if ($rev == 3 or $rev == 4) { |
|
1571
|
|
|
|
|
|
|
# must add this if metadata not encrypted |
|
1572
|
1
|
50
|
|
|
|
4
|
$key .= "\xff\xff\xff\xff" unless $$encrypt{_meta}; |
|
1573
|
1
|
|
|
|
|
3
|
$rep += 50; # repeat MD5 50 more times if revision is 3 or greater |
|
1574
|
|
|
|
|
|
|
} |
|
1575
|
2
|
|
|
|
|
3
|
my ($len, $i, $dat); |
|
1576
|
2
|
100
|
|
|
|
5
|
if ($ver == 1) { |
|
1577
|
1
|
|
|
|
|
2
|
$len = 5; |
|
1578
|
|
|
|
|
|
|
} else { |
|
1579
|
1
|
|
50
|
|
|
13
|
$len = $$encrypt{Length} || 40; |
|
1580
|
1
|
50
|
|
|
|
4
|
$len >= 40 or return 'Bad Encrypt Length'; |
|
1581
|
1
|
|
|
|
|
3
|
$len = int($len / 8); |
|
1582
|
|
|
|
|
|
|
} |
|
1583
|
2
|
|
|
|
|
7
|
for ($i=0; $i<$rep; ++$i) { |
|
1584
|
52
|
|
|
|
|
119
|
$key = substr(Digest::MD5::md5($key), 0, $len); |
|
1585
|
|
|
|
|
|
|
} |
|
1586
|
|
|
|
|
|
|
# decrypt U to see if a user password is required |
|
1587
|
2
|
100
|
|
|
|
6
|
if ($rev >= 3) { |
|
1588
|
1
|
|
|
|
|
4
|
$dat = Digest::MD5::md5($pad . $id); |
|
1589
|
1
|
|
|
|
|
4
|
RC4Crypt(\$dat, $key); |
|
1590
|
1
|
|
|
|
|
4
|
for ($i=1; $i<=19; ++$i) { |
|
1591
|
19
|
|
|
|
|
35
|
my @key = unpack('C*', $key); |
|
1592
|
19
|
|
|
|
|
27
|
foreach (@key) { $_ ^= $i; } |
|
|
304
|
|
|
|
|
316
|
|
|
1593
|
19
|
|
|
|
|
42
|
RC4Crypt(\$dat, pack('C*', @key)); |
|
1594
|
|
|
|
|
|
|
} |
|
1595
|
1
|
|
|
|
|
3
|
$dat .= substr($u, 16); |
|
1596
|
|
|
|
|
|
|
} else { |
|
1597
|
1
|
|
|
|
|
2
|
$dat = $pad; |
|
1598
|
1
|
|
|
|
|
3
|
RC4Crypt(\$dat, $key); |
|
1599
|
|
|
|
|
|
|
} |
|
1600
|
2
|
50
|
|
|
|
19
|
last if $dat eq $u; # all done if this was the correct key |
|
1601
|
|
|
|
|
|
|
} else { |
|
1602
|
3
|
50
|
33
|
|
|
13
|
return 'Invalid O or U Encrypt entries' if length($o) < 48 or length($u) < 48; |
|
1603
|
3
|
100
|
|
|
|
6
|
if (length $password) { |
|
1604
|
|
|
|
|
|
|
# Note: this should be good for passwords containing reasonable characters, |
|
1605
|
|
|
|
|
|
|
# but to be bullet-proof we need to apply the SASLprep (IETF RFC 4013) profile |
|
1606
|
|
|
|
|
|
|
# of stringprep (IETF RFC 3454) to the password before encoding in UTF-8 |
|
1607
|
1
|
|
|
|
|
5
|
$password = $et->Encode($password, 'UTF8'); |
|
1608
|
1
|
50
|
|
|
|
5
|
$password = substr($password, 0, 127) if length($password) > 127; |
|
1609
|
|
|
|
|
|
|
} |
|
1610
|
|
|
|
|
|
|
# test for the owner password |
|
1611
|
3
|
|
|
|
|
10
|
my $sha = GetHash($password, substr($o,32,8), substr($u,0,48), $rev); |
|
1612
|
3
|
100
|
|
|
|
11
|
if ($sha eq substr($o, 0, 32)) { |
|
1613
|
2
|
|
|
|
|
7
|
$key = GetHash($password, substr($o,40,8), substr($u,0,48), $rev); |
|
1614
|
2
|
|
|
|
|
5
|
my $dat = ("\0" x 16) . $parm{OE}; |
|
1615
|
|
|
|
|
|
|
# decrypt with no padding |
|
1616
|
2
|
|
|
|
|
8
|
my $err = Image::ExifTool::AES::Crypt(\$dat, $key, 0, 1); |
|
1617
|
2
|
50
|
|
|
|
7
|
return $err if $err; |
|
1618
|
2
|
|
|
|
|
4
|
$key = $dat; # use this as the file decryption key |
|
1619
|
2
|
|
|
|
|
4
|
last; |
|
1620
|
|
|
|
|
|
|
} |
|
1621
|
|
|
|
|
|
|
# test for the user password |
|
1622
|
1
|
|
|
|
|
4
|
$sha = GetHash($password, substr($u,32,8), '', $rev); |
|
1623
|
1
|
50
|
|
|
|
5
|
if ($sha eq substr($u, 0, 32)) { |
|
1624
|
0
|
|
|
|
|
0
|
$key = GetHash($password, substr($u,40,8), '', $rev); |
|
1625
|
0
|
|
|
|
|
0
|
my $dat = ("\0" x 16) . $parm{UE}; |
|
1626
|
0
|
|
|
|
|
0
|
my $err = Image::ExifTool::AES::Crypt(\$dat, $key, 0, 1); |
|
1627
|
0
|
0
|
|
|
|
0
|
return $err if $err; |
|
1628
|
0
|
|
|
|
|
0
|
$key = $dat; # use this as the file decryption key |
|
1629
|
0
|
|
|
|
|
0
|
last; |
|
1630
|
|
|
|
|
|
|
} |
|
1631
|
|
|
|
|
|
|
} |
|
1632
|
|
|
|
|
|
|
} |
|
1633
|
4
|
|
|
|
|
11
|
$$encrypt{_key} = $key; # save the file-level encryption key |
|
1634
|
4
|
|
|
|
|
6
|
$cryptInfo = $encrypt; # save reference to the file-level Encrypt object |
|
1635
|
4
|
|
|
|
|
16
|
return undef; # success! |
|
1636
|
|
|
|
|
|
|
} |
|
1637
|
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1639
|
|
|
|
|
|
|
# Decrypt/Encrypt data |
|
1640
|
|
|
|
|
|
|
# Inputs: 0) data ref |
|
1641
|
|
|
|
|
|
|
# 1) PDF object reference to use as crypt key extension (may be 'none' to |
|
1642
|
|
|
|
|
|
|
# avoid extending the encryption key, as for streams with Crypt Filter) |
|
1643
|
|
|
|
|
|
|
# 2) encrypt flag (false for decryption) |
|
1644
|
|
|
|
|
|
|
sub Crypt($$;$) |
|
1645
|
|
|
|
|
|
|
{ |
|
1646
|
29
|
100
|
|
29
|
0
|
84
|
return unless $cryptInfo; |
|
1647
|
4
|
|
|
|
|
9
|
my ($dataPt, $keyExt, $encrypt) = @_; |
|
1648
|
|
|
|
|
|
|
# do not decrypt if the key extension object is undefined |
|
1649
|
|
|
|
|
|
|
# (this doubles as a flag to disable decryption/encryption) |
|
1650
|
4
|
50
|
|
|
|
10
|
return unless defined $keyExt; |
|
1651
|
4
|
|
|
|
|
7
|
my $key = $$cryptInfo{_key}; |
|
1652
|
|
|
|
|
|
|
# apply the necessary crypt key extension |
|
1653
|
4
|
100
|
|
|
|
11
|
unless ($$cryptInfo{_aesv3}) { |
|
1654
|
2
|
50
|
|
|
|
7
|
unless ($keyExt eq 'none') { |
|
1655
|
|
|
|
|
|
|
# extend crypt key using object and generation number |
|
1656
|
2
|
50
|
|
|
|
15
|
unless ($keyExt =~ /^(I\d+ )?(\d+) (\d+)/) { |
|
1657
|
0
|
|
|
|
|
0
|
$$cryptInfo{_error} = 'Invalid object reference for encryption'; |
|
1658
|
0
|
|
|
|
|
0
|
return; |
|
1659
|
|
|
|
|
|
|
} |
|
1660
|
2
|
|
|
|
|
25
|
$key .= substr(pack('V', $2), 0, 3) . substr(pack('V', $3), 0, 2); |
|
1661
|
|
|
|
|
|
|
} |
|
1662
|
|
|
|
|
|
|
# add AES-128 salt if necessary (this little gem is conveniently |
|
1663
|
|
|
|
|
|
|
# omitted from the Adobe PDF 1.6 documentation, causing me to |
|
1664
|
|
|
|
|
|
|
# waste 12 hours trying to figure out why this wasn't working -- |
|
1665
|
|
|
|
|
|
|
# it appears in ISO32000 though, so I should have been using that) |
|
1666
|
2
|
100
|
|
|
|
7
|
$key .= 'sAlT' if $$cryptInfo{_aesv2}; |
|
1667
|
2
|
|
|
|
|
4
|
my $len = length($key); |
|
1668
|
2
|
|
|
|
|
8
|
$key = Digest::MD5::md5($key); # get 16-byte MD5 digest |
|
1669
|
2
|
100
|
|
|
|
8
|
$key = substr($key, 0, $len) if $len < 16; # trim if necessary |
|
1670
|
|
|
|
|
|
|
} |
|
1671
|
|
|
|
|
|
|
# perform the decryption/encryption |
|
1672
|
4
|
100
|
100
|
|
|
17
|
if ($$cryptInfo{_aesv2} or $$cryptInfo{_aesv3}) { |
|
1673
|
3
|
|
|
|
|
15
|
require Image::ExifTool::AES; |
|
1674
|
3
|
|
|
|
|
8
|
my $err = Image::ExifTool::AES::Crypt($dataPt, $key, $encrypt); |
|
1675
|
3
|
50
|
|
|
|
11
|
$err and $$cryptInfo{_error} = $err; |
|
1676
|
|
|
|
|
|
|
} else { |
|
1677
|
1
|
|
|
|
|
3
|
RC4Crypt($dataPt, $key); |
|
1678
|
|
|
|
|
|
|
} |
|
1679
|
|
|
|
|
|
|
} |
|
1680
|
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1682
|
|
|
|
|
|
|
# Decrypt/Encrypt stream data |
|
1683
|
|
|
|
|
|
|
# Inputs: 0) dictionary ref, 1) PDF object reference to use as crypt key extension |
|
1684
|
|
|
|
|
|
|
sub CryptStream($$) |
|
1685
|
|
|
|
|
|
|
{ |
|
1686
|
52
|
50
|
|
52
|
0
|
104
|
return unless $cryptStream; |
|
1687
|
0
|
|
|
|
|
0
|
my ($dict, $keyExt) = @_; |
|
1688
|
0
|
|
0
|
|
|
0
|
my $type = $$dict{Type} || ''; |
|
1689
|
|
|
|
|
|
|
# XRef streams are not encrypted (ref 3, page 50), |
|
1690
|
|
|
|
|
|
|
# and Metadata may or may not be encrypted |
|
1691
|
0
|
0
|
0
|
|
|
0
|
if ($cryptInfo and $type ne '/XRef' and |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
($$cryptInfo{_meta} or $type ne '/Metadata')) |
|
1693
|
|
|
|
|
|
|
{ |
|
1694
|
0
|
|
|
|
|
0
|
Crypt(\$$dict{_stream}, $keyExt, $$dict{_decrypted}); |
|
1695
|
|
|
|
|
|
|
# toggle _decrypted flag |
|
1696
|
0
|
0
|
|
|
|
0
|
$$dict{_decrypted} = ($$dict{_decrypted} ? undef : 1); |
|
1697
|
|
|
|
|
|
|
} else { |
|
1698
|
0
|
|
|
|
|
0
|
$$dict{_decrypted} = 0; # stream should never be encrypted |
|
1699
|
|
|
|
|
|
|
} |
|
1700
|
|
|
|
|
|
|
} |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1703
|
|
|
|
|
|
|
# Generate a new PDF tag (based on its ID) and add it to a tag table |
|
1704
|
|
|
|
|
|
|
# Inputs: 0) tag table ref, 1) tag ID |
|
1705
|
|
|
|
|
|
|
# Returns: tag info ref |
|
1706
|
|
|
|
|
|
|
sub NewPDFTag($$) |
|
1707
|
|
|
|
|
|
|
{ |
|
1708
|
0
|
|
|
0
|
0
|
0
|
my ($tagTablePtr, $tag) = @_; |
|
1709
|
0
|
|
|
|
|
0
|
my $name = $tag; |
|
1710
|
|
|
|
|
|
|
# translate URL-like escape sequences |
|
1711
|
0
|
|
|
|
|
0
|
$name =~ s/#([0-9a-f]{2})/chr(hex($1))/ige; |
|
|
0
|
|
|
|
|
0
|
|
|
1712
|
0
|
|
|
|
|
0
|
$name =~ s/[^-\w]+/_/g; # translate invalid characters to an underline |
|
1713
|
0
|
|
|
|
|
0
|
$name =~ s/(^|_)([a-z])/\U$2/g; # start words with upper case |
|
1714
|
0
|
|
|
|
|
0
|
my $tagInfo = { Name => $name }; |
|
1715
|
0
|
|
|
|
|
0
|
AddTagToTable($tagTablePtr, $tag, $tagInfo); |
|
1716
|
0
|
|
|
|
|
0
|
return $tagInfo; |
|
1717
|
|
|
|
|
|
|
} |
|
1718
|
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1720
|
|
|
|
|
|
|
# Process AcroForm dictionary to set HasXMLFormsArchitecture flag |
|
1721
|
|
|
|
|
|
|
# Inputs: Same as ProcessDict |
|
1722
|
|
|
|
|
|
|
sub ProcessAcroForm($$$$;$$) |
|
1723
|
|
|
|
|
|
|
{ |
|
1724
|
0
|
|
|
0
|
0
|
0
|
my ($et, $tagTablePtr, $dict, $xref, $nesting, $type) = @_; |
|
1725
|
0
|
0
|
|
|
|
0
|
$et->HandleTag($tagTablePtr, '_has_xfa', $$dict{XFA} ? 'true' : 'false'); |
|
1726
|
0
|
|
|
|
|
0
|
return ProcessDict($et, $tagTablePtr, $dict, $xref, $nesting, $type); |
|
1727
|
|
|
|
|
|
|
} |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1730
|
|
|
|
|
|
|
# Expand array into a string |
|
1731
|
|
|
|
|
|
|
# Inputs: 0) array ref |
|
1732
|
|
|
|
|
|
|
# Return: string |
|
1733
|
|
|
|
|
|
|
sub ExpandArray($) |
|
1734
|
|
|
|
|
|
|
{ |
|
1735
|
0
|
|
|
0
|
0
|
0
|
my $val = shift; |
|
1736
|
0
|
|
|
|
|
0
|
my @list = @$val; |
|
1737
|
0
|
|
|
|
|
0
|
foreach (@list) { |
|
1738
|
0
|
0
|
|
|
|
0
|
ref $_ eq 'SCALAR' and $_ = "ref($$_)", next; |
|
1739
|
0
|
0
|
|
|
|
0
|
ref $_ eq 'ARRAY' and $_ = ExpandArray($_), next; |
|
1740
|
0
|
0
|
|
|
|
0
|
defined $_ or $_ = '', next; |
|
1741
|
|
|
|
|
|
|
} |
|
1742
|
0
|
|
|
|
|
0
|
return '[' . join(',',@list) . ']'; |
|
1743
|
|
|
|
|
|
|
} |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
1746
|
|
|
|
|
|
|
# Process PDF dictionary extract tag values |
|
1747
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) tag table reference |
|
1748
|
|
|
|
|
|
|
# 2) dictionary reference, 3) cross-reference table reference, |
|
1749
|
|
|
|
|
|
|
# 4) nesting depth, 5) dictionary capture type |
|
1750
|
|
|
|
|
|
|
sub ProcessDict($$$$;$$) |
|
1751
|
|
|
|
|
|
|
{ |
|
1752
|
350
|
|
|
350
|
0
|
665
|
my ($et, $tagTablePtr, $dict, $xref, $nesting, $type) = @_; |
|
1753
|
350
|
|
|
|
|
850
|
my $verbose = $et->Options('Verbose'); |
|
1754
|
350
|
|
|
|
|
582
|
my $unknown = $$tagTablePtr{EXTRACT_UNKNOWN}; |
|
1755
|
350
|
|
33
|
|
|
839
|
my $embedded = (defined $unknown and not $unknown and $et->Options('ExtractEmbedded')); |
|
1756
|
350
|
|
|
|
|
398
|
my @tags = @{$$dict{_tags}}; |
|
|
350
|
|
|
|
|
943
|
|
|
1757
|
350
|
|
|
|
|
486
|
my ($next, %join); |
|
1758
|
350
|
|
|
|
|
398
|
my $index = 0; |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
350
|
|
100
|
|
|
699
|
$nesting = ($nesting || 0) + 1; |
|
1761
|
350
|
50
|
|
|
|
563
|
if ($nesting > 50) { |
|
1762
|
0
|
|
|
|
|
0
|
$et->WarnOnce('Nesting too deep (directory ignored)'); |
|
1763
|
0
|
|
|
|
|
0
|
return; |
|
1764
|
|
|
|
|
|
|
} |
|
1765
|
|
|
|
|
|
|
# save entire dictionary for rewriting if specified |
|
1766
|
350
|
50
|
100
|
|
|
919
|
if ($$et{PDF_CAPTURE} and $$tagTablePtr{VARS} and |
|
|
|
|
66
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
$tagTablePtr->{VARS}->{CAPTURE}) |
|
1768
|
|
|
|
|
|
|
{ |
|
1769
|
66
|
|
|
|
|
82
|
my $name; |
|
1770
|
66
|
|
|
|
|
83
|
foreach $name (@{$tagTablePtr->{VARS}->{CAPTURE}}) { |
|
|
66
|
|
|
|
|
165
|
|
|
1771
|
82
|
100
|
|
|
|
156
|
next if $$et{PDF_CAPTURE}{$name}; |
|
1772
|
|
|
|
|
|
|
# make sure we load the right type if indicated |
|
1773
|
66
|
50
|
66
|
|
|
160
|
next if $type and $type ne $name; |
|
1774
|
66
|
|
|
|
|
118
|
$$et{PDF_CAPTURE}{$name} = $dict; |
|
1775
|
66
|
|
|
|
|
87
|
last; |
|
1776
|
|
|
|
|
|
|
} |
|
1777
|
|
|
|
|
|
|
} |
|
1778
|
|
|
|
|
|
|
# |
|
1779
|
|
|
|
|
|
|
# extract information from all tags in the dictionary |
|
1780
|
|
|
|
|
|
|
# |
|
1781
|
350
|
|
|
|
|
419
|
for (;;) { |
|
1782
|
1473
|
|
|
|
|
1693
|
my ($tag, $isSubDoc); |
|
1783
|
1473
|
100
|
33
|
|
|
2442
|
if (@tags) { |
|
|
|
50
|
|
|
|
|
|
|
1784
|
1123
|
|
|
|
|
1501
|
$tag = shift @tags; |
|
1785
|
|
|
|
|
|
|
} elsif (defined $next and not $next) { |
|
1786
|
0
|
|
|
|
|
0
|
$tag = 'Next'; |
|
1787
|
0
|
|
|
|
|
0
|
$next = 1; |
|
1788
|
|
|
|
|
|
|
} else { |
|
1789
|
350
|
|
|
|
|
426
|
last; |
|
1790
|
|
|
|
|
|
|
} |
|
1791
|
1123
|
|
|
|
|
1650
|
my $val = $$dict{$tag}; |
|
1792
|
1123
|
|
|
|
|
2285
|
my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag); |
|
1793
|
1123
|
100
|
33
|
|
|
2355
|
if ($tagInfo) { |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1794
|
419
|
50
|
|
|
|
659
|
undef $tagInfo if $$tagInfo{NoProcess}; |
|
1795
|
|
|
|
|
|
|
} elsif ($embedded and $tag =~ /^(.*?)(\d+)$/ and |
|
1796
|
|
|
|
|
|
|
$$tagTablePtr{$1} and (ref $val ne 'SCALAR' or not $fetched{$$val})) |
|
1797
|
|
|
|
|
|
|
{ |
|
1798
|
0
|
|
|
|
|
0
|
my ($name, $num) = ($1, $2); |
|
1799
|
0
|
|
|
|
|
0
|
$tagInfo = $et->GetTagInfo($tagTablePtr, $name); |
|
1800
|
0
|
0
|
0
|
|
|
0
|
if (ref $tagInfo eq 'HASH' and $$tagInfo{JoinStreams}) { |
|
1801
|
0
|
|
|
|
|
0
|
$fetched{$$val} = 1; |
|
1802
|
0
|
|
|
|
|
0
|
my $obj = FetchObject($et, $$val, $xref, $tag); |
|
1803
|
0
|
0
|
|
|
|
0
|
$join{$name} = [] unless $join{$name}; |
|
1804
|
0
|
0
|
0
|
|
|
0
|
next unless ref $obj eq 'HASH' and $$obj{_stream}; |
|
1805
|
|
|
|
|
|
|
# save all the stream data to join later |
|
1806
|
0
|
|
|
|
|
0
|
DecodeStream($et, $obj); |
|
1807
|
0
|
|
|
|
|
0
|
$join{$name}->[$num] = $$obj{_stream}; |
|
1808
|
0
|
|
|
|
|
0
|
undef $tagInfo; # don't process |
|
1809
|
|
|
|
|
|
|
} else { |
|
1810
|
0
|
|
|
|
|
0
|
$isSubDoc = 1; # treat as a sub-document |
|
1811
|
|
|
|
|
|
|
} |
|
1812
|
|
|
|
|
|
|
} |
|
1813
|
1123
|
50
|
|
|
|
1600
|
if ($verbose) { |
|
1814
|
0
|
|
|
|
|
0
|
my ($val2, $extra); |
|
1815
|
0
|
0
|
|
|
|
0
|
if (ref $val eq 'SCALAR') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1816
|
0
|
|
|
|
|
0
|
$extra = ", indirect object ($$val)"; |
|
1817
|
0
|
0
|
0
|
|
|
0
|
if ($fetched{$$val}) { |
|
|
|
0
|
|
|
|
|
|
|
1818
|
0
|
|
|
|
|
0
|
$val2 = "ref($$val)"; |
|
1819
|
|
|
|
|
|
|
} elsif ($tag eq 'Next' and not $next) { |
|
1820
|
|
|
|
|
|
|
# handle 'Next' links after all others |
|
1821
|
0
|
|
|
|
|
0
|
$next = 0; |
|
1822
|
0
|
|
|
|
|
0
|
next; |
|
1823
|
|
|
|
|
|
|
} else { |
|
1824
|
0
|
|
|
|
|
0
|
$fetched{$$val} = 1; |
|
1825
|
0
|
|
|
|
|
0
|
$val = FetchObject($et, $$val, $xref, $tag); |
|
1826
|
0
|
0
|
|
|
|
0
|
unless (defined $val) { |
|
1827
|
0
|
|
|
|
|
0
|
my $str; |
|
1828
|
0
|
0
|
|
|
|
0
|
if (defined $lastOffset) { |
|
1829
|
0
|
|
|
|
|
0
|
$val2 = ''; |
|
1830
|
0
|
|
|
|
|
0
|
$str = 'Object was freed'; |
|
1831
|
|
|
|
|
|
|
} else { |
|
1832
|
0
|
|
|
|
|
0
|
$val2 = ''; |
|
1833
|
0
|
|
|
|
|
0
|
$str = 'Error reading object'; |
|
1834
|
|
|
|
|
|
|
} |
|
1835
|
0
|
|
|
|
|
0
|
$et->VPrint(0, "$$et{INDENT}${str}:\n"); |
|
1836
|
|
|
|
|
|
|
} |
|
1837
|
|
|
|
|
|
|
} |
|
1838
|
|
|
|
|
|
|
} elsif (ref $val eq 'HASH') { |
|
1839
|
0
|
|
|
|
|
0
|
$extra = ', direct dictionary'; |
|
1840
|
|
|
|
|
|
|
} elsif (ref $val eq 'ARRAY') { |
|
1841
|
0
|
|
|
|
|
0
|
$extra = ', direct array of ' . scalar(@$val) . ' objects'; |
|
1842
|
|
|
|
|
|
|
} else { |
|
1843
|
0
|
|
|
|
|
0
|
$extra = ', direct object'; |
|
1844
|
|
|
|
|
|
|
} |
|
1845
|
0
|
|
|
|
|
0
|
my $isSubdir; |
|
1846
|
0
|
0
|
|
|
|
0
|
if (ref $val eq 'HASH') { |
|
|
|
0
|
|
|
|
|
|
|
1847
|
0
|
|
|
|
|
0
|
$isSubdir = 1; |
|
1848
|
|
|
|
|
|
|
} elsif (ref $val eq 'ARRAY') { |
|
1849
|
|
|
|
|
|
|
# recurse into objects in arrays only if they are lists of |
|
1850
|
|
|
|
|
|
|
# dictionaries or indirect objects which could be dictionaries |
|
1851
|
0
|
0
|
|
|
|
0
|
$isSubdir = 1 if @$val; |
|
1852
|
0
|
|
|
|
|
0
|
foreach (@$val) { |
|
1853
|
0
|
0
|
0
|
|
|
0
|
next if ref $_ eq 'HASH' or ref $_ eq 'SCALAR'; |
|
1854
|
0
|
|
|
|
|
0
|
undef $isSubdir; |
|
1855
|
0
|
|
|
|
|
0
|
last; |
|
1856
|
|
|
|
|
|
|
} |
|
1857
|
|
|
|
|
|
|
} |
|
1858
|
0
|
0
|
|
|
|
0
|
if ($isSubdir) { |
|
1859
|
|
|
|
|
|
|
# create bogus subdirectory to recurse into this dict |
|
1860
|
0
|
0
|
|
|
|
0
|
$tagInfo or $tagInfo = { |
|
1861
|
|
|
|
|
|
|
Name => $tag, |
|
1862
|
|
|
|
|
|
|
SubDirectory => { TagTable => 'Image::ExifTool::PDF::Unknown' }, |
|
1863
|
|
|
|
|
|
|
}; |
|
1864
|
|
|
|
|
|
|
} else { |
|
1865
|
0
|
0
|
|
|
|
0
|
$val2 = ExpandArray($val) if ref $val eq 'ARRAY'; |
|
1866
|
|
|
|
|
|
|
# generate tag info if we will use it later |
|
1867
|
0
|
0
|
0
|
|
|
0
|
if (not $tagInfo and defined $val and $unknown) { |
|
|
|
|
0
|
|
|
|
|
|
1868
|
0
|
|
|
|
|
0
|
$tagInfo = NewPDFTag($tagTablePtr, $tag); |
|
1869
|
|
|
|
|
|
|
} |
|
1870
|
|
|
|
|
|
|
} |
|
1871
|
0
|
|
0
|
|
|
0
|
$et->VerboseInfo($tag, $tagInfo, |
|
1872
|
|
|
|
|
|
|
Value => $val2 || $val, |
|
1873
|
|
|
|
|
|
|
Extra => $extra, |
|
1874
|
|
|
|
|
|
|
Index => $index++, |
|
1875
|
|
|
|
|
|
|
); |
|
1876
|
0
|
0
|
|
|
|
0
|
next unless defined $val; |
|
1877
|
|
|
|
|
|
|
} |
|
1878
|
1123
|
100
|
|
|
|
1610
|
unless ($tagInfo) { |
|
1879
|
|
|
|
|
|
|
# add any tag found in Info dictionary to table |
|
1880
|
704
|
50
|
|
|
|
1170
|
next unless $unknown; |
|
1881
|
0
|
|
|
|
|
0
|
$tagInfo = NewPDFTag($tagTablePtr, $tag); |
|
1882
|
|
|
|
|
|
|
} |
|
1883
|
|
|
|
|
|
|
# increment document number if necessary |
|
1884
|
419
|
|
|
|
|
558
|
my ($oldDocNum, $oldNumTags); |
|
1885
|
419
|
50
|
|
|
|
722
|
if ($isSubDoc) { |
|
1886
|
0
|
|
|
|
|
0
|
$oldDocNum = $$et{DOC_NUM}; |
|
1887
|
0
|
|
|
|
|
0
|
$oldNumTags = $$et{NUM_FOUND}; |
|
1888
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = ++$$et{DOC_COUNT}; |
|
1889
|
|
|
|
|
|
|
} |
|
1890
|
419
|
100
|
|
|
|
720
|
if ($$tagInfo{SubDirectory}) { |
|
1891
|
|
|
|
|
|
|
# process the subdirectory |
|
1892
|
332
|
|
|
|
|
391
|
my @subDicts; |
|
1893
|
332
|
100
|
|
|
|
636
|
if (ref $val eq 'ARRAY') { |
|
1894
|
|
|
|
|
|
|
# hack to convert array to dictionary if necessary |
|
1895
|
37
|
50
|
33
|
|
|
97
|
if ($$tagInfo{ConvertToDict} and @$val == 2 and not ref $$val[0]) { |
|
|
|
|
33
|
|
|
|
|
|
1896
|
0
|
|
|
|
|
0
|
my $tg = $$val[0]; |
|
1897
|
0
|
|
|
|
|
0
|
$tg =~ s(^/)(); # remove name |
|
1898
|
0
|
|
|
|
|
0
|
my %dict = ( _tags => [ $tg ], $tg => $$val[1] ); |
|
1899
|
0
|
|
|
|
|
0
|
@subDicts = ( \%dict ); |
|
1900
|
|
|
|
|
|
|
} else { |
|
1901
|
37
|
|
|
|
|
40
|
@subDicts = @{$val}; |
|
|
37
|
|
|
|
|
71
|
|
|
1902
|
|
|
|
|
|
|
} |
|
1903
|
|
|
|
|
|
|
} else { |
|
1904
|
295
|
|
|
|
|
431
|
@subDicts = ( $val ); |
|
1905
|
|
|
|
|
|
|
} |
|
1906
|
|
|
|
|
|
|
# loop through all values of this tag |
|
1907
|
332
|
|
|
|
|
457
|
for (;;) { |
|
1908
|
664
|
100
|
|
|
|
1171
|
my $subDict = shift @subDicts or last; |
|
1909
|
|
|
|
|
|
|
# save last fetched object in case we fetch another one here |
|
1910
|
332
|
|
|
|
|
452
|
my $prevFetched = $lastFetched; |
|
1911
|
332
|
100
|
|
|
|
660
|
if (ref $subDict eq 'SCALAR') { |
|
1912
|
|
|
|
|
|
|
# only fetch once (other copies are obsolete) |
|
1913
|
244
|
100
|
|
|
|
480
|
next if $fetched{$$subDict}; |
|
1914
|
197
|
100
|
|
|
|
322
|
if ($$tagInfo{IgnoreDuplicates}) { |
|
1915
|
28
|
|
|
|
|
53
|
my $flag = "ProcessedPDF_$tag"; |
|
1916
|
28
|
50
|
|
|
|
57
|
if ($$et{$flag}) { |
|
1917
|
0
|
0
|
|
|
|
0
|
next if $et->WarnOnce("Ignored duplicate $tag dictionary", 2); |
|
1918
|
|
|
|
|
|
|
} else { |
|
1919
|
28
|
|
|
|
|
52
|
$$et{$flag} = 1; |
|
1920
|
|
|
|
|
|
|
} |
|
1921
|
|
|
|
|
|
|
} |
|
1922
|
|
|
|
|
|
|
# load dictionary via an indirect reference |
|
1923
|
197
|
|
|
|
|
302
|
$fetched{$$subDict} = 1; |
|
1924
|
197
|
|
|
|
|
360
|
my $obj = FetchObject($et, $$subDict, $xref, $tag); |
|
1925
|
197
|
100
|
|
|
|
410
|
unless (defined $obj) { |
|
1926
|
5
|
50
|
|
|
|
12
|
unless (defined $lastOffset) { |
|
1927
|
0
|
|
|
|
|
0
|
$et->Warn("Error reading $tag object ($$subDict)"); |
|
1928
|
|
|
|
|
|
|
} |
|
1929
|
5
|
|
|
|
|
11
|
next; |
|
1930
|
|
|
|
|
|
|
} |
|
1931
|
192
|
|
|
|
|
337
|
$subDict = $obj; |
|
1932
|
|
|
|
|
|
|
} |
|
1933
|
280
|
50
|
|
|
|
572
|
if (ref $subDict eq 'ARRAY') { |
|
1934
|
|
|
|
|
|
|
# convert array of key/value pairs to a hash |
|
1935
|
0
|
0
|
|
|
|
0
|
next if @$subDict < 2; |
|
1936
|
0
|
|
|
|
|
0
|
my %hash = ( _tags => [] ); |
|
1937
|
0
|
|
|
|
|
0
|
while (@$subDict >= 2) { |
|
1938
|
0
|
|
|
|
|
0
|
my $key = shift @$subDict; |
|
1939
|
0
|
|
|
|
|
0
|
$key =~ s/^\///; |
|
1940
|
0
|
|
|
|
|
0
|
push @{$hash{_tags}}, $key; |
|
|
0
|
|
|
|
|
0
|
|
|
1941
|
0
|
|
|
|
|
0
|
$hash{$key} = shift @$subDict; |
|
1942
|
|
|
|
|
|
|
} |
|
1943
|
0
|
|
|
|
|
0
|
$subDict = \%hash; |
|
1944
|
|
|
|
|
|
|
} else { |
|
1945
|
280
|
50
|
|
|
|
473
|
next unless ref $subDict eq 'HASH'; |
|
1946
|
|
|
|
|
|
|
} |
|
1947
|
|
|
|
|
|
|
# set flag to re-crypt all strings when rewriting if the dictionary |
|
1948
|
|
|
|
|
|
|
# came from an encrypted stream |
|
1949
|
280
|
50
|
|
|
|
443
|
$$subDict{_needCrypt}{'*'} = 1 unless $lastFetched; |
|
1950
|
280
|
|
|
|
|
848
|
my $subTablePtr = GetTagTable($tagInfo->{SubDirectory}->{TagTable}); |
|
1951
|
280
|
50
|
|
|
|
445
|
if (not $verbose) { |
|
|
|
0
|
|
|
|
|
|
|
1952
|
280
|
|
50
|
|
|
904
|
my $proc = $$subTablePtr{PROCESS_PROC} || \&ProcessDict; |
|
1953
|
280
|
|
|
|
|
652
|
&$proc($et, $subTablePtr, $subDict, $xref, $nesting); |
|
1954
|
|
|
|
|
|
|
} elsif ($next) { |
|
1955
|
|
|
|
|
|
|
# handle 'Next' links at this level to avoid deep recursion |
|
1956
|
0
|
|
|
|
|
0
|
undef $next; |
|
1957
|
0
|
|
|
|
|
0
|
$index = 0; |
|
1958
|
0
|
|
|
|
|
0
|
$tagTablePtr = $subTablePtr; |
|
1959
|
0
|
|
|
|
|
0
|
$dict = $subDict; |
|
1960
|
0
|
|
|
|
|
0
|
@tags = @{$$subDict{_tags}}; |
|
|
0
|
|
|
|
|
0
|
|
|
1961
|
0
|
|
|
|
|
0
|
$et->VerboseDir($tag, scalar(@tags)); |
|
1962
|
|
|
|
|
|
|
} else { |
|
1963
|
0
|
|
|
|
|
0
|
my $oldIndent = $$et{INDENT}; |
|
1964
|
0
|
|
|
|
|
0
|
my $oldDir = $$et{DIR_NAME}; |
|
1965
|
0
|
|
|
|
|
0
|
$$et{INDENT} .= '| '; |
|
1966
|
0
|
|
|
|
|
0
|
$$et{DIR_NAME} = $tag; |
|
1967
|
0
|
|
|
|
|
0
|
$et->VerboseDir($tag, scalar(@{$$subDict{_tags}})); |
|
|
0
|
|
|
|
|
0
|
|
|
1968
|
0
|
|
|
|
|
0
|
ProcessDict($et, $subTablePtr, $subDict, $xref, $nesting); |
|
1969
|
0
|
|
|
|
|
0
|
$$et{INDENT} = $oldIndent; |
|
1970
|
0
|
|
|
|
|
0
|
$$et{DIR_NAME} = $oldDir; |
|
1971
|
|
|
|
|
|
|
} |
|
1972
|
280
|
|
|
|
|
805
|
$lastFetched = $prevFetched; |
|
1973
|
|
|
|
|
|
|
} |
|
1974
|
|
|
|
|
|
|
} else { |
|
1975
|
|
|
|
|
|
|
# fetch object if necessary |
|
1976
|
|
|
|
|
|
|
# (OS X 10.6 writes indirect objects in the Info dictionary!) |
|
1977
|
87
|
50
|
|
|
|
144
|
if (ref $val eq 'SCALAR') { |
|
1978
|
0
|
|
|
|
|
0
|
my $prevFetched = $lastFetched; |
|
1979
|
|
|
|
|
|
|
# (note: fetching the same object multiple times is OK here) |
|
1980
|
0
|
|
|
|
|
0
|
$val = FetchObject($et, $$val, $xref, $tag); |
|
1981
|
0
|
0
|
|
|
|
0
|
if (defined $val) { |
|
1982
|
0
|
|
|
|
|
0
|
$val = ReadPDFValue($val); |
|
1983
|
|
|
|
|
|
|
# set flag to re-encrypt if necessary if rewritten |
|
1984
|
0
|
0
|
|
|
|
0
|
$$dict{_needCrypt}{$tag} = ($lastFetched ? 0 : 1) if $cryptString; |
|
|
|
0
|
|
|
|
|
|
|
1985
|
0
|
|
|
|
|
0
|
$lastFetched = $prevFetched; # restore last fetched object reference |
|
1986
|
|
|
|
|
|
|
} |
|
1987
|
|
|
|
|
|
|
} else { |
|
1988
|
87
|
|
|
|
|
144
|
$val = ReadPDFValue($val); |
|
1989
|
|
|
|
|
|
|
} |
|
1990
|
87
|
100
|
|
|
|
231
|
if (ref $val) { |
|
|
|
50
|
|
|
|
|
|
|
1991
|
12
|
50
|
|
|
|
31
|
if (ref $val eq 'ARRAY') { |
|
1992
|
12
|
50
|
|
|
|
34
|
delete $$et{LIST_TAGS}{$tagInfo} if $$tagInfo{List}; |
|
1993
|
12
|
|
|
|
|
16
|
my $v; |
|
1994
|
12
|
|
|
|
|
24
|
foreach $v (@$val) { |
|
1995
|
20
|
|
|
|
|
46
|
$et->FoundTag($tagInfo, $v); |
|
1996
|
|
|
|
|
|
|
} |
|
1997
|
|
|
|
|
|
|
} |
|
1998
|
|
|
|
|
|
|
} elsif (defined $val) { |
|
1999
|
|
|
|
|
|
|
# convert from UTF-16 (big endian) to UTF-8 or Latin if necessary |
|
2000
|
|
|
|
|
|
|
# unless this is binary data (hex-encoded strings would not have been converted) |
|
2001
|
75
|
|
100
|
|
|
313
|
my $format = $$tagInfo{Format} || $$tagInfo{Writable} || 'string'; |
|
2002
|
75
|
100
|
|
|
|
162
|
$val = ConvertPDFDate($val) if $format eq 'date'; |
|
2003
|
75
|
50
|
33
|
|
|
289
|
if (not $$tagInfo{Binary} and $val =~ /[\x18-\x1f\x80-\xff]/) { |
|
2004
|
|
|
|
|
|
|
# text string is already in Unicode if it starts with "\xfe\xff", |
|
2005
|
|
|
|
|
|
|
# otherwise we must first convert from PDFDocEncoding |
|
2006
|
0
|
0
|
|
|
|
0
|
$val = $et->Decode($val, ($val=~s/^\xfe\xff// ? 'UCS2' : 'PDFDoc'), 'MM'); |
|
2007
|
|
|
|
|
|
|
} |
|
2008
|
75
|
100
|
66
|
|
|
187
|
if ($$tagInfo{List} and not $$et{OPTIONS}{NoPDFList}) { |
|
2009
|
|
|
|
|
|
|
# separate tokens in comma or whitespace delimited lists |
|
2010
|
12
|
50
|
|
|
|
70
|
my @values = ($val =~ /,/) ? split /,+\s*/, $val : split ' ', $val; |
|
2011
|
12
|
|
|
|
|
30
|
foreach $val (@values) { |
|
2012
|
28
|
|
|
|
|
58
|
$et->FoundTag($tagInfo, $val); |
|
2013
|
|
|
|
|
|
|
} |
|
2014
|
|
|
|
|
|
|
} else { |
|
2015
|
|
|
|
|
|
|
# a simple tag value |
|
2016
|
63
|
|
|
|
|
157
|
$et->FoundTag($tagInfo, $val); |
|
2017
|
|
|
|
|
|
|
} |
|
2018
|
|
|
|
|
|
|
} |
|
2019
|
|
|
|
|
|
|
} |
|
2020
|
419
|
50
|
|
|
|
731
|
if ($isSubDoc) { |
|
2021
|
|
|
|
|
|
|
# restore original document number |
|
2022
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = $oldDocNum; |
|
2023
|
0
|
0
|
|
|
|
0
|
--$$et{DOC_COUNT} if $oldNumTags == $$et{NUM_FOUND}; |
|
2024
|
|
|
|
|
|
|
} |
|
2025
|
|
|
|
|
|
|
} |
|
2026
|
|
|
|
|
|
|
# |
|
2027
|
|
|
|
|
|
|
# extract information from joined streams if necessary |
|
2028
|
|
|
|
|
|
|
# |
|
2029
|
|
|
|
|
|
|
|
|
2030
|
350
|
50
|
|
|
|
573
|
if (%join) { |
|
2031
|
0
|
|
|
|
|
0
|
my ($tag, $i); |
|
2032
|
0
|
|
|
|
|
0
|
foreach $tag (sort keys %join) { |
|
2033
|
0
|
|
|
|
|
0
|
my $list = $join{$tag}; |
|
2034
|
0
|
0
|
0
|
|
|
0
|
last unless defined $$list[1] and $$list[1] =~ /^%.*?([\x0d\x0a]*)/; |
|
2035
|
0
|
|
|
|
|
0
|
my $buff = "%!PS-Adobe-3.0$1"; # add PS header with same line break |
|
2036
|
0
|
|
|
|
|
0
|
for ($i=1; defined $$list[$i]; ++$i) { |
|
2037
|
0
|
|
|
|
|
0
|
$buff .= $$list[$i]; |
|
2038
|
0
|
|
|
|
|
0
|
undef $$list[$i]; # free memory |
|
2039
|
|
|
|
|
|
|
} |
|
2040
|
|
|
|
|
|
|
# increment document number for tags extracted from embedded EPS |
|
2041
|
0
|
|
|
|
|
0
|
my $oldDocNum = $$et{DOC_NUM}; |
|
2042
|
0
|
|
|
|
|
0
|
my $oldNumTags = $$et{NUM_FOUND}; |
|
2043
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = ++$$et{DOC_COUNT}; |
|
2044
|
|
|
|
|
|
|
# extract PostScript information |
|
2045
|
0
|
|
|
|
|
0
|
$et->HandleTag($tagTablePtr, $tag, $buff); |
|
2046
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = $oldDocNum; |
|
2047
|
|
|
|
|
|
|
# revert document counter if we didn't add any new tags |
|
2048
|
0
|
0
|
|
|
|
0
|
--$$et{DOC_COUNT} if $oldNumTags == $$et{NUM_FOUND}; |
|
2049
|
0
|
|
|
|
|
0
|
delete $$et{DOC_NUM}; |
|
2050
|
|
|
|
|
|
|
} |
|
2051
|
|
|
|
|
|
|
} |
|
2052
|
|
|
|
|
|
|
# |
|
2053
|
|
|
|
|
|
|
# extract information from stream object if it exists (eg. Metadata stream) |
|
2054
|
|
|
|
|
|
|
# |
|
2055
|
350
|
|
|
|
|
414
|
for (;;) { # (cheap goto) |
|
2056
|
350
|
100
|
|
|
|
914
|
last unless $$dict{_stream}; |
|
2057
|
43
|
|
|
|
|
84
|
my $tag = '_stream'; |
|
2058
|
|
|
|
|
|
|
# add Subtype (if it exists) to stream name and remove leading '/' |
|
2059
|
43
|
100
|
|
|
|
166
|
($tag = $$dict{Subtype} . $tag) =~ s/^\/// if $$dict{Subtype}; |
|
2060
|
43
|
50
|
|
|
|
107
|
last unless $$tagTablePtr{$tag}; |
|
2061
|
43
|
50
|
|
|
|
87
|
my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag) or last; |
|
2062
|
43
|
50
|
|
|
|
111
|
unless ($$tagInfo{SubDirectory}) { |
|
2063
|
|
|
|
|
|
|
# don't build filter lists across different images |
|
2064
|
0
|
|
|
|
|
0
|
delete $$et{LIST_TAGS}{$$tagTablePtr{Filter}}; |
|
2065
|
|
|
|
|
|
|
# we arrive here only when extracting embedded images |
|
2066
|
|
|
|
|
|
|
# - only extract known image types and ignore others |
|
2067
|
0
|
|
0
|
|
|
0
|
my $filter = $$dict{Filter} || ''; |
|
2068
|
0
|
0
|
|
|
|
0
|
$filter = @$filter[-1] if ref $filter eq 'ARRAY'; # (get last Filter type) |
|
2069
|
0
|
|
|
|
|
0
|
my $result; |
|
2070
|
0
|
0
|
0
|
|
|
0
|
if ($filter eq '/DCTDecode' or $filter eq '/JPXDecode') { |
|
2071
|
0
|
0
|
|
|
|
0
|
DecodeStream($et, $dict) or last; |
|
2072
|
|
|
|
|
|
|
# save the image itself |
|
2073
|
0
|
|
|
|
|
0
|
$et->FoundTag($tagInfo, \$$dict{_stream}); |
|
2074
|
|
|
|
|
|
|
# extract information from embedded image |
|
2075
|
0
|
|
|
|
|
0
|
$result = $et->ExtractInfo(\$$dict{_stream}, { ReEntry => 1 }); |
|
2076
|
|
|
|
|
|
|
} |
|
2077
|
0
|
0
|
|
|
|
0
|
unless ($result) { |
|
2078
|
0
|
0
|
|
|
|
0
|
$et->FoundTag('FileType', defined $result ? '(unknown)' : '(unsupported)'); |
|
2079
|
|
|
|
|
|
|
} |
|
2080
|
0
|
|
|
|
|
0
|
last; |
|
2081
|
|
|
|
|
|
|
} |
|
2082
|
|
|
|
|
|
|
# decode stream if necessary |
|
2083
|
43
|
50
|
|
|
|
107
|
DecodeStream($et, $dict) or last; |
|
2084
|
43
|
50
|
|
|
|
81
|
if ($verbose > 2) { |
|
2085
|
0
|
|
|
|
|
0
|
$et->VPrint(2,"$$et{INDENT}$$et{DIR_NAME} stream data\n"); |
|
2086
|
0
|
|
|
|
|
0
|
$et->VerboseDump(\$$dict{_stream}); |
|
2087
|
|
|
|
|
|
|
} |
|
2088
|
|
|
|
|
|
|
# extract information from stream |
|
2089
|
|
|
|
|
|
|
my %dirInfo = ( |
|
2090
|
|
|
|
|
|
|
DataPt => \$$dict{_stream}, |
|
2091
|
|
|
|
|
|
|
DataLen => length $$dict{_stream}, |
|
2092
|
|
|
|
|
|
|
DirStart => 0, |
|
2093
|
|
|
|
|
|
|
DirLen => length $$dict{_stream}, |
|
2094
|
43
|
|
|
|
|
182
|
Parent => 'PDF', |
|
2095
|
|
|
|
|
|
|
); |
|
2096
|
43
|
|
|
|
|
129
|
my $subTablePtr = GetTagTable($tagInfo->{SubDirectory}->{TagTable}); |
|
2097
|
43
|
50
|
|
|
|
115
|
unless ($et->ProcessDirectory(\%dirInfo, $subTablePtr)) { |
|
2098
|
0
|
|
|
|
|
0
|
$et->Warn("Error processing $$tagInfo{Name} information"); |
|
2099
|
|
|
|
|
|
|
} |
|
2100
|
43
|
|
|
|
|
159
|
last; |
|
2101
|
|
|
|
|
|
|
} |
|
2102
|
|
|
|
|
|
|
} |
|
2103
|
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
2105
|
|
|
|
|
|
|
# Extract information from PDF file |
|
2106
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
|
2107
|
|
|
|
|
|
|
# Returns: 0 if not a PDF file, 1 on success, otherwise a negative error number |
|
2108
|
|
|
|
|
|
|
sub ReadPDF($$) |
|
2109
|
|
|
|
|
|
|
{ |
|
2110
|
37
|
|
|
37
|
0
|
54
|
my ($et, $dirInfo) = @_; |
|
2111
|
37
|
|
|
|
|
58
|
my $raf = $$dirInfo{RAF}; |
|
2112
|
37
|
|
|
|
|
103
|
my $verbose = $et->Options('Verbose'); |
|
2113
|
37
|
|
|
|
|
54
|
my ($buff, $encrypt, $id); |
|
2114
|
|
|
|
|
|
|
# |
|
2115
|
|
|
|
|
|
|
# validate PDF file |
|
2116
|
|
|
|
|
|
|
# |
|
2117
|
|
|
|
|
|
|
# (linearization dictionary must be in the first 1024 bytes of the file) |
|
2118
|
37
|
50
|
|
|
|
108
|
$raf->Read($buff, 1024) >= 8 or return 0; |
|
2119
|
37
|
50
|
|
|
|
226
|
$buff =~ /^(\s*)%PDF-(\d+\.\d+)/ or return 0; |
|
2120
|
37
|
50
|
|
|
|
143
|
$$et{PDFBase} = length $1 and $et->Warn('PDF header is not at start of file',1); |
|
2121
|
37
|
|
|
|
|
66
|
$pdfVer = $2; |
|
2122
|
37
|
|
|
|
|
152
|
$et->SetFileType(); # set the FileType tag |
|
2123
|
37
|
50
|
|
|
|
143
|
$et->Warn("The PDF $pdfVer specification is held hostage by the ISO") if $pdfVer >= 2.0; |
|
2124
|
|
|
|
|
|
|
# store PDFVersion tag |
|
2125
|
37
|
|
|
|
|
75
|
my $tagTablePtr = GetTagTable('Image::ExifTool::PDF::Root'); |
|
2126
|
37
|
|
|
|
|
109
|
$et->HandleTag($tagTablePtr, 'Version', $pdfVer); |
|
2127
|
37
|
|
|
|
|
74
|
$tagTablePtr = GetTagTable('Image::ExifTool::PDF::Main'); |
|
2128
|
|
|
|
|
|
|
# |
|
2129
|
|
|
|
|
|
|
# check for a linearized PDF (only if reading) |
|
2130
|
|
|
|
|
|
|
# |
|
2131
|
37
|
|
|
|
|
725
|
my $capture = $$et{PDF_CAPTURE}; |
|
2132
|
37
|
100
|
|
|
|
75
|
unless ($capture) { |
|
2133
|
18
|
|
|
|
|
27
|
my $lin = 'false'; |
|
2134
|
18
|
50
|
|
|
|
74
|
if ($buff =~ /<
|
|
2135
|
18
|
|
|
|
|
61
|
$buff = substr($buff, pos($buff) - 2); |
|
2136
|
18
|
|
|
|
|
39
|
my $dict = ExtractObject($et, \$buff); |
|
2137
|
18
|
0
|
33
|
|
|
106
|
if (ref $dict eq 'HASH' and $$dict{Linearized} and $$dict{L}) { |
|
|
|
|
33
|
|
|
|
|
|
2138
|
0
|
0
|
|
|
|
0
|
if (not $$et{VALUE}{FileSize}) { |
|
|
|
0
|
|
|
|
|
|
|
2139
|
0
|
|
|
|
|
0
|
undef $lin; # can't determine if it is linearized |
|
2140
|
|
|
|
|
|
|
} elsif ($$dict{L} == $$et{VALUE}{FileSize} - $$et{PDFBase}) { |
|
2141
|
0
|
|
|
|
|
0
|
$lin = 'true'; |
|
2142
|
|
|
|
|
|
|
} |
|
2143
|
|
|
|
|
|
|
} |
|
2144
|
|
|
|
|
|
|
} |
|
2145
|
18
|
50
|
|
|
|
69
|
$et->HandleTag($tagTablePtr, '_linearized', $lin) if $lin; |
|
2146
|
|
|
|
|
|
|
} |
|
2147
|
|
|
|
|
|
|
# |
|
2148
|
|
|
|
|
|
|
# read the xref tables referenced from startxref at the end of the file |
|
2149
|
|
|
|
|
|
|
# |
|
2150
|
37
|
|
|
|
|
71
|
my @xrefOffsets; |
|
2151
|
37
|
50
|
|
|
|
94
|
$raf->Seek(0, 2) or return -2; |
|
2152
|
|
|
|
|
|
|
# the %%EOF must occur within the last 1024 bytes of the file (PDF spec, appendix H) |
|
2153
|
37
|
|
|
|
|
125
|
my $len = $raf->Tell(); |
|
2154
|
37
|
50
|
|
|
|
106
|
$len = 1024 if $len > 1024; |
|
2155
|
37
|
50
|
|
|
|
83
|
$raf->Seek(-$len, 2) or return -2; |
|
2156
|
37
|
50
|
|
|
|
107
|
$raf->Read($buff, $len) == $len or return -3; |
|
2157
|
|
|
|
|
|
|
# find the LAST xref table in the file (may be multiple %%EOF marks, |
|
2158
|
|
|
|
|
|
|
# and comments between "startxref" and "%%EOF") |
|
2159
|
37
|
50
|
|
|
|
343
|
$buff =~ /^.*startxref(\s+)(\d+)(\s+)(%[^\x0d\x0a]*\s+)*%%EOF/s or return -4; |
|
2160
|
37
|
|
|
|
|
121
|
my $ws = $1 . $3; |
|
2161
|
37
|
|
|
|
|
69
|
my $xr = $2; |
|
2162
|
37
|
|
|
|
|
93
|
push @xrefOffsets, $xr, 'Main'; |
|
2163
|
|
|
|
|
|
|
# set input record separator |
|
2164
|
37
|
50
|
|
|
|
261
|
local $/ = $ws =~ /(\x0d\x0a|\x0d|\x0a)/ ? $1 : "\x0a"; |
|
2165
|
37
|
|
|
|
|
71
|
my (%xref, @mainDicts, %loaded, $mainFree); |
|
2166
|
37
|
|
|
|
|
75
|
my ($xrefSize, $mainDictSize) = (0, 0); |
|
2167
|
|
|
|
|
|
|
# initialize variables to capture when rewriting |
|
2168
|
37
|
100
|
|
|
|
71
|
if ($capture) { |
|
2169
|
19
|
|
|
|
|
50
|
$capture->{startxref} = $xr; |
|
2170
|
19
|
|
|
|
|
38
|
$capture->{xref} = \%xref; |
|
2171
|
19
|
|
|
|
|
43
|
$capture->{newline} = $/; |
|
2172
|
19
|
|
|
|
|
40
|
$capture->{mainFree} = $mainFree = { }; |
|
2173
|
|
|
|
|
|
|
} |
|
2174
|
|
|
|
|
|
|
XRef: |
|
2175
|
37
|
|
|
|
|
73
|
while (@xrefOffsets) { |
|
2176
|
70
|
|
|
|
|
111
|
my $offset = shift @xrefOffsets; |
|
2177
|
70
|
|
|
|
|
95
|
my $type = shift @xrefOffsets; |
|
2178
|
70
|
50
|
|
|
|
147
|
next if $loaded{$offset}; # avoid infinite recursion |
|
2179
|
70
|
50
|
|
|
|
262
|
unless ($raf->Seek($offset+$$et{PDFBase}, 0)) { |
|
2180
|
0
|
0
|
|
|
|
0
|
%loaded or return -5; |
|
2181
|
0
|
|
|
|
|
0
|
$et->Warn('Bad offset for secondary xref table'); |
|
2182
|
0
|
|
|
|
|
0
|
next; |
|
2183
|
|
|
|
|
|
|
} |
|
2184
|
|
|
|
|
|
|
# Note: care must be taken because ReadLine may read more than we want if |
|
2185
|
|
|
|
|
|
|
# the newline sequence for this table is different than the rest of the file |
|
2186
|
70
|
|
|
|
|
120
|
for (;;) { |
|
2187
|
70
|
50
|
|
|
|
185
|
unless ($raf->ReadLine($buff)) { |
|
2188
|
0
|
0
|
|
|
|
0
|
%loaded or return -6; |
|
2189
|
0
|
|
|
|
|
0
|
$et->Warn('Bad offset for secondary xref table'); |
|
2190
|
0
|
|
|
|
|
0
|
next XRef; |
|
2191
|
|
|
|
|
|
|
} |
|
2192
|
70
|
50
|
|
|
|
273
|
last if $buff =~/\S/; # skip blank lines |
|
2193
|
|
|
|
|
|
|
} |
|
2194
|
70
|
|
|
|
|
102
|
my $loadXRefStream; |
|
2195
|
70
|
50
|
|
|
|
369
|
if ($buff =~ s/^\s*xref\s+//s) { |
|
|
|
0
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
# load xref table |
|
2197
|
70
|
|
|
|
|
100
|
for (;;) { |
|
2198
|
|
|
|
|
|
|
# read another line if necessary (skipping blank lines) |
|
2199
|
177
|
|
50
|
|
|
491
|
$raf->ReadLine($buff) or return -6 until $buff =~ /\S/; |
|
2200
|
177
|
100
|
|
|
|
560
|
last if $buff =~ s/^\s*trailer([\s<[(])/$1/s; |
|
2201
|
107
|
50
|
|
|
|
379
|
$buff =~ s/^\s*(\d+)\s+(\d+)\s+//s or return -4; |
|
2202
|
107
|
|
|
|
|
285
|
my ($start, $num) = ($1, $2); |
|
2203
|
107
|
50
|
|
|
|
253
|
$raf->Seek(-length($buff), 1) or return -4; |
|
2204
|
107
|
|
|
|
|
171
|
my $i; |
|
2205
|
107
|
|
|
|
|
243
|
for ($i=0; $i<$num; ++$i) { |
|
2206
|
622
|
50
|
|
|
|
1191
|
$raf->Read($buff, 20) == 20 or return -6; |
|
2207
|
622
|
50
|
|
|
|
1960
|
$buff =~ /^\s*(\d{10}) (\d{5}) (f|n)/s or return -4; |
|
2208
|
622
|
|
|
|
|
822
|
my $num = $start + $i; |
|
2209
|
622
|
100
|
|
|
|
917
|
$xrefSize = $num if $num > $xrefSize; |
|
2210
|
|
|
|
|
|
|
# locate object to generate entry from stream if necessary |
|
2211
|
|
|
|
|
|
|
# (must do this before we test $xref{$num}) |
|
2212
|
622
|
50
|
|
|
|
899
|
LocateAnyObject(\%xref, $num) if $xref{dicts}; |
|
2213
|
|
|
|
|
|
|
# save offset for newest copy of all objects |
|
2214
|
|
|
|
|
|
|
# (or next object number for free objects) |
|
2215
|
622
|
100
|
|
|
|
1040
|
unless (defined $xref{$num}) { |
|
2216
|
526
|
|
|
|
|
1187
|
my ($offset, $gen) = (int($1), int($2)); |
|
2217
|
526
|
|
|
|
|
808
|
$xref{$num} = $offset; |
|
2218
|
526
|
100
|
|
|
|
910
|
if ($3 eq 'f') { |
|
2219
|
|
|
|
|
|
|
# save free objects in last xref table for rewriting |
|
2220
|
52
|
100
|
|
|
|
124
|
$$mainFree{$num} = [ $offset, $gen, 'f' ] if $mainFree; |
|
2221
|
52
|
|
|
|
|
115
|
next; |
|
2222
|
|
|
|
|
|
|
} |
|
2223
|
|
|
|
|
|
|
# also save offset keyed by object reference string |
|
2224
|
474
|
|
|
|
|
1262
|
$xref{"$num $gen R"} = $offset; |
|
2225
|
|
|
|
|
|
|
} |
|
2226
|
|
|
|
|
|
|
} |
|
2227
|
|
|
|
|
|
|
# (I have a sample from Adobe which has an empty xref table) |
|
2228
|
|
|
|
|
|
|
# %xref or return -4; # xref table may not be empty |
|
2229
|
107
|
|
|
|
|
214
|
$buff = ''; |
|
2230
|
|
|
|
|
|
|
} |
|
2231
|
70
|
|
|
|
|
127
|
undef $mainFree; # only do this for the last xref table |
|
2232
|
|
|
|
|
|
|
} elsif ($buff =~ s/^\s*(\d+)\s+(\d+)\s+obj//s) { |
|
2233
|
|
|
|
|
|
|
# this is a PDF-1.5 cross-reference stream dictionary |
|
2234
|
0
|
|
|
|
|
0
|
$loadXRefStream = 1; |
|
2235
|
|
|
|
|
|
|
} else { |
|
2236
|
0
|
0
|
|
|
|
0
|
%loaded or return -4; |
|
2237
|
0
|
|
|
|
|
0
|
$et->Warn('Invalid secondary xref table'); |
|
2238
|
0
|
|
|
|
|
0
|
next; |
|
2239
|
|
|
|
|
|
|
} |
|
2240
|
70
|
|
|
|
|
181
|
my $mainDict = ExtractObject($et, \$buff, $raf, \%xref); |
|
2241
|
70
|
50
|
|
|
|
172
|
unless (ref $mainDict eq 'HASH') { |
|
2242
|
0
|
0
|
|
|
|
0
|
%loaded or return -8; |
|
2243
|
0
|
|
|
|
|
0
|
$et->Warn('Error loading secondary dictionary'); |
|
2244
|
0
|
|
|
|
|
0
|
next; |
|
2245
|
|
|
|
|
|
|
} |
|
2246
|
|
|
|
|
|
|
# keep track of total trailer dictionary Size |
|
2247
|
70
|
100
|
66
|
|
|
284
|
$mainDictSize = $$mainDict{Size} if $$mainDict{Size} and $$mainDict{Size} > $mainDictSize; |
|
2248
|
70
|
50
|
|
|
|
124
|
if ($loadXRefStream) { |
|
2249
|
|
|
|
|
|
|
# decode and save our XRef stream from PDF-1.5 file |
|
2250
|
|
|
|
|
|
|
# (but parse it later as required to save time) |
|
2251
|
|
|
|
|
|
|
# Note: this technique can potentially result in an old object |
|
2252
|
|
|
|
|
|
|
# being used if the file was incrementally updated and an older |
|
2253
|
|
|
|
|
|
|
# object from an xref table was replaced by a newer object in an |
|
2254
|
|
|
|
|
|
|
# xref stream. But doing so isn't a good idea (if allowed at all) |
|
2255
|
|
|
|
|
|
|
# because a PDF 1.4 consumer would also make this same mistake. |
|
2256
|
0
|
0
|
0
|
|
|
0
|
if ($$mainDict{Type} eq '/XRef' and $$mainDict{W} and |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2257
|
0
|
|
|
|
|
0
|
@{$$mainDict{W}} > 2 and $$mainDict{Size} and |
|
2258
|
|
|
|
|
|
|
DecodeStream($et, $mainDict)) |
|
2259
|
|
|
|
|
|
|
{ |
|
2260
|
|
|
|
|
|
|
# create Index entry if it doesn't exist |
|
2261
|
0
|
0
|
|
|
|
0
|
$$mainDict{Index} or $$mainDict{Index} = [ 0, $$mainDict{Size} ]; |
|
2262
|
|
|
|
|
|
|
# create '_entry_size' entry for internal use |
|
2263
|
0
|
|
|
|
|
0
|
my $w = $$mainDict{W}; |
|
2264
|
0
|
|
|
|
|
0
|
my $size = 0; |
|
2265
|
0
|
|
|
|
|
0
|
foreach (@$w) { $size += $_; } |
|
|
0
|
|
|
|
|
0
|
|
|
2266
|
0
|
|
|
|
|
0
|
$$mainDict{_entry_size} = $size; |
|
2267
|
|
|
|
|
|
|
# save this stream dictionary to use later if required |
|
2268
|
0
|
0
|
|
|
|
0
|
$xref{dicts} = [] unless $xref{dicts}; |
|
2269
|
0
|
|
|
|
|
0
|
push @{$xref{dicts}}, $mainDict; |
|
|
0
|
|
|
|
|
0
|
|
|
2270
|
|
|
|
|
|
|
} else { |
|
2271
|
0
|
0
|
|
|
|
0
|
%loaded or return -9; |
|
2272
|
0
|
|
|
|
|
0
|
$et->Warn('Invalid xref stream in secondary dictionary'); |
|
2273
|
|
|
|
|
|
|
} |
|
2274
|
|
|
|
|
|
|
} |
|
2275
|
70
|
|
|
|
|
136
|
$loaded{$offset} = 1; |
|
2276
|
|
|
|
|
|
|
# load XRef stream in hybrid file if it exists |
|
2277
|
70
|
50
|
|
|
|
146
|
push @xrefOffsets, $$mainDict{XRefStm}, 'XRefStm' if $$mainDict{XRefStm}; |
|
2278
|
70
|
50
|
|
|
|
114
|
$encrypt = $$mainDict{Encrypt} if $$mainDict{Encrypt}; |
|
2279
|
70
|
50
|
33
|
|
|
128
|
undef $encrypt if $encrypt and $encrypt eq 'null'; # (have seen "null") |
|
2280
|
70
|
100
|
66
|
|
|
179
|
if ($$mainDict{ID} and ref $$mainDict{ID} eq 'ARRAY') { |
|
2281
|
29
|
|
|
|
|
62
|
$id = ReadPDFValue($mainDict->{ID}->[0]); |
|
2282
|
|
|
|
|
|
|
} |
|
2283
|
70
|
|
|
|
|
143
|
push @mainDicts, $mainDict, $type; |
|
2284
|
|
|
|
|
|
|
# load previous xref table if it exists |
|
2285
|
70
|
100
|
|
|
|
222
|
push @xrefOffsets, $$mainDict{Prev}, 'Prev' if $$mainDict{Prev}; |
|
2286
|
|
|
|
|
|
|
} |
|
2287
|
37
|
50
|
|
|
|
75
|
if ($xrefSize > $mainDictSize) { |
|
2288
|
0
|
|
|
|
|
0
|
my $str = "Objects in xref table ($xrefSize) exceed trailer dictionary Size ($mainDictSize)"; |
|
2289
|
0
|
0
|
|
|
|
0
|
$capture ? $et->Error($str) : $et->Warn($str); |
|
2290
|
|
|
|
|
|
|
} |
|
2291
|
|
|
|
|
|
|
# |
|
2292
|
|
|
|
|
|
|
# extract encryption information if necessary |
|
2293
|
|
|
|
|
|
|
# |
|
2294
|
37
|
50
|
|
|
|
68
|
if ($encrypt) { |
|
2295
|
0
|
0
|
|
|
|
0
|
if (ref $encrypt eq 'SCALAR') { |
|
2296
|
0
|
|
|
|
|
0
|
$encrypt = FetchObject($et, $$encrypt, \%xref, 'Encrypt'); |
|
2297
|
|
|
|
|
|
|
} |
|
2298
|
|
|
|
|
|
|
# generate Encryption tag information |
|
2299
|
0
|
|
|
|
|
0
|
my $err = DecryptInit($et, $encrypt, $id); |
|
2300
|
0
|
0
|
|
|
|
0
|
if ($err) { |
|
2301
|
0
|
|
|
|
|
0
|
$et->Warn($err); |
|
2302
|
0
|
0
|
|
|
|
0
|
$$capture{Error} = $err if $capture; |
|
2303
|
0
|
|
|
|
|
0
|
return -1; |
|
2304
|
|
|
|
|
|
|
} |
|
2305
|
|
|
|
|
|
|
} |
|
2306
|
|
|
|
|
|
|
# |
|
2307
|
|
|
|
|
|
|
# extract the information beginning with each of the main dictionaries |
|
2308
|
|
|
|
|
|
|
# |
|
2309
|
37
|
|
|
|
|
79
|
my $i = 0; |
|
2310
|
37
|
|
|
|
|
75
|
my $num = (scalar @mainDicts) / 2; |
|
2311
|
37
|
|
|
|
|
65
|
while (@mainDicts) { |
|
2312
|
70
|
|
|
|
|
94
|
my $dict = shift @mainDicts; |
|
2313
|
70
|
|
|
|
|
94
|
my $type = shift @mainDicts; |
|
2314
|
70
|
50
|
|
|
|
149
|
if ($verbose) { |
|
2315
|
0
|
|
|
|
|
0
|
++$i; |
|
2316
|
0
|
|
|
|
|
0
|
my $n = scalar(@{$$dict{_tags}}); |
|
|
0
|
|
|
|
|
0
|
|
|
2317
|
0
|
|
|
|
|
0
|
$et->VPrint(0, "PDF dictionary ($i of $num) with $n entries:\n"); |
|
2318
|
|
|
|
|
|
|
} |
|
2319
|
70
|
|
|
|
|
140
|
ProcessDict($et, $tagTablePtr, $dict, \%xref, 0, $type); |
|
2320
|
|
|
|
|
|
|
} |
|
2321
|
|
|
|
|
|
|
# handle any decryption errors |
|
2322
|
37
|
50
|
|
|
|
83
|
if ($encrypt) { |
|
2323
|
0
|
|
|
|
|
0
|
my $err = $$encrypt{_error}; |
|
2324
|
0
|
0
|
|
|
|
0
|
if ($err) { |
|
2325
|
0
|
|
|
|
|
0
|
$et->Warn($err); |
|
2326
|
0
|
0
|
|
|
|
0
|
$$capture{Error} = $err if $capture; |
|
2327
|
0
|
|
|
|
|
0
|
return -1; |
|
2328
|
|
|
|
|
|
|
} |
|
2329
|
|
|
|
|
|
|
} |
|
2330
|
37
|
|
|
|
|
229
|
return 1; |
|
2331
|
|
|
|
|
|
|
} |
|
2332
|
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
2334
|
|
|
|
|
|
|
# ReadPDF() warning strings for each error return value |
|
2335
|
|
|
|
|
|
|
my %pdfWarning = ( |
|
2336
|
|
|
|
|
|
|
# -1 is reserved as error return value with no associated warning |
|
2337
|
|
|
|
|
|
|
-2 => 'Error seeking in file', |
|
2338
|
|
|
|
|
|
|
-3 => 'Error reading file', |
|
2339
|
|
|
|
|
|
|
-4 => 'Invalid xref table', |
|
2340
|
|
|
|
|
|
|
-5 => 'Invalid xref offset', |
|
2341
|
|
|
|
|
|
|
-6 => 'Error reading xref table', |
|
2342
|
|
|
|
|
|
|
-7 => 'Error reading trailer', |
|
2343
|
|
|
|
|
|
|
-8 => 'Error reading main dictionary', |
|
2344
|
|
|
|
|
|
|
-9 => 'Invalid xref stream in main dictionary', |
|
2345
|
|
|
|
|
|
|
); |
|
2346
|
|
|
|
|
|
|
|
|
2347
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
2348
|
|
|
|
|
|
|
# Extract information from PDF file |
|
2349
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
|
2350
|
|
|
|
|
|
|
# Returns: 1 if this was a valid PDF file |
|
2351
|
|
|
|
|
|
|
sub ProcessPDF($$) |
|
2352
|
|
|
|
|
|
|
{ |
|
2353
|
37
|
|
|
37
|
0
|
70
|
my ($et, $dirInfo) = @_; |
|
2354
|
|
|
|
|
|
|
|
|
2355
|
37
|
|
|
|
|
57
|
undef $cryptInfo; # (must not delete after returning so writer can use it) |
|
2356
|
37
|
|
|
|
|
53
|
undef $cryptStream; |
|
2357
|
37
|
|
|
|
|
45
|
undef $cryptString; |
|
2358
|
37
|
|
|
|
|
80
|
my $result = ReadPDF($et, $dirInfo); |
|
2359
|
37
|
50
|
|
|
|
89
|
if ($result < 0) { |
|
2360
|
0
|
0
|
|
|
|
0
|
$et->Warn($pdfWarning{$result}) if $pdfWarning{$result}; |
|
2361
|
0
|
|
|
|
|
0
|
$result = 1; |
|
2362
|
|
|
|
|
|
|
} |
|
2363
|
|
|
|
|
|
|
# clean up and return |
|
2364
|
37
|
|
|
|
|
59
|
undef %streamObjs; |
|
2365
|
37
|
|
|
|
|
66
|
undef %fetched; |
|
2366
|
37
|
|
|
|
|
80
|
return $result; |
|
2367
|
|
|
|
|
|
|
} |
|
2368
|
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
1; # end |
|
2370
|
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
__END__ |