| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
2
|
|
|
|
|
|
|
# File: ZIP.pm |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Description: Read ZIP archive meta information |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Revisions: 10/28/2007 - P. Harvey Created |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# References: 1) http://www.pkware.com/documents/casestudies/APPNOTE.TXT |
|
9
|
|
|
|
|
|
|
# 2) http://www.cpanforum.com/threads/9046 |
|
10
|
|
|
|
|
|
|
# 3) http://www.gzip.org/zlib/rfc-gzip.html |
|
11
|
|
|
|
|
|
|
# 4) http://DataCompression.info/ArchiveFormats/RAR202.txt |
|
12
|
|
|
|
|
|
|
# 5) https://jira.atlassian.com/browse/CONF-21706 |
|
13
|
|
|
|
|
|
|
# 6) http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf |
|
14
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
package Image::ExifTool::ZIP; |
|
17
|
|
|
|
|
|
|
|
|
18
|
1
|
|
|
1
|
|
4375
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
33
|
|
|
19
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION $warnString); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
46
|
|
|
20
|
1
|
|
|
1
|
|
5
|
use Image::ExifTool qw(:DataAccess :Utils); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
3522
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
$VERSION = '1.27'; |
|
23
|
|
|
|
|
|
|
|
|
24
|
0
|
|
|
0
|
0
|
0
|
sub WarnProc($) { $warnString = $_[0]; } |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# file types for recognized Open Document "mimetype" values |
|
27
|
|
|
|
|
|
|
my %openDocType = ( |
|
28
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.database' => 'ODB', #5 |
|
29
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.chart' => 'ODC', #5 |
|
30
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.formula' => 'ODF', #5 |
|
31
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.graphics' => 'ODG', #5 |
|
32
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.image' => 'ODI', #5 |
|
33
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.presentation' => 'ODP', |
|
34
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.spreadsheet' => 'ODS', |
|
35
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.text' => 'ODT', |
|
36
|
|
|
|
|
|
|
'application/vnd.adobe.indesign-idml-package' => 'IDML', #6 (not open doc) |
|
37
|
|
|
|
|
|
|
'application/epub+zip' => 'EPUB', #PH (not open doc) |
|
38
|
|
|
|
|
|
|
); |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# iWork file types based on names of files found in the zip archive |
|
41
|
|
|
|
|
|
|
my %iWorkFile = ( |
|
42
|
|
|
|
|
|
|
'Index/Slide.iwa' => 'KEY', |
|
43
|
|
|
|
|
|
|
'Index/Tables/DataList.iwa' => 'NUMBERS', |
|
44
|
|
|
|
|
|
|
); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my %iWorkType = ( |
|
47
|
|
|
|
|
|
|
NUMBERS => 'NUMBERS', |
|
48
|
|
|
|
|
|
|
PAGES => 'PAGES', |
|
49
|
|
|
|
|
|
|
KEY => 'KEY', |
|
50
|
|
|
|
|
|
|
KTH => 'KTH', |
|
51
|
|
|
|
|
|
|
NMBTEMPLATE => 'NMBTEMPLATE', |
|
52
|
|
|
|
|
|
|
); |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# ZIP metadata blocks |
|
55
|
|
|
|
|
|
|
%Image::ExifTool::ZIP::Main = ( |
|
56
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, |
|
57
|
|
|
|
|
|
|
GROUPS => { 2 => 'Other' }, |
|
58
|
|
|
|
|
|
|
FORMAT => 'int16u', |
|
59
|
|
|
|
|
|
|
NOTES => q{ |
|
60
|
|
|
|
|
|
|
The following tags are extracted from ZIP archives. ExifTool also extracts |
|
61
|
|
|
|
|
|
|
additional meta information from compressed documents inside some ZIP-based |
|
62
|
|
|
|
|
|
|
files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC, |
|
63
|
|
|
|
|
|
|
ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One |
|
64
|
|
|
|
|
|
|
Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML), |
|
65
|
|
|
|
|
|
|
Electronic Publication (EPUB), and Sketch design files (SKETCH). The |
|
66
|
|
|
|
|
|
|
ExifTool family 3 groups may be used to organize ZIP tags by embedded |
|
67
|
|
|
|
|
|
|
document number (ie. the exiftool C<-g3> option). |
|
68
|
|
|
|
|
|
|
}, |
|
69
|
|
|
|
|
|
|
2 => 'ZipRequiredVersion', |
|
70
|
|
|
|
|
|
|
3 => { |
|
71
|
|
|
|
|
|
|
Name => 'ZipBitFlag', |
|
72
|
|
|
|
|
|
|
PrintConv => '$val ? sprintf("0x%.4x",$val) : $val', |
|
73
|
|
|
|
|
|
|
}, |
|
74
|
|
|
|
|
|
|
4 => { |
|
75
|
|
|
|
|
|
|
Name => 'ZipCompression', |
|
76
|
|
|
|
|
|
|
PrintConv => { |
|
77
|
|
|
|
|
|
|
0 => 'None', |
|
78
|
|
|
|
|
|
|
1 => 'Shrunk', |
|
79
|
|
|
|
|
|
|
2 => 'Reduced with compression factor 1', |
|
80
|
|
|
|
|
|
|
3 => 'Reduced with compression factor 2', |
|
81
|
|
|
|
|
|
|
4 => 'Reduced with compression factor 3', |
|
82
|
|
|
|
|
|
|
5 => 'Reduced with compression factor 4', |
|
83
|
|
|
|
|
|
|
6 => 'Imploded', |
|
84
|
|
|
|
|
|
|
7 => 'Tokenized', |
|
85
|
|
|
|
|
|
|
8 => 'Deflated', |
|
86
|
|
|
|
|
|
|
9 => 'Enhanced Deflate using Deflate64(tm)', |
|
87
|
|
|
|
|
|
|
10 => 'Imploded (old IBM TERSE)', |
|
88
|
|
|
|
|
|
|
12 => 'BZIP2', |
|
89
|
|
|
|
|
|
|
14 => 'LZMA (EFS)', |
|
90
|
|
|
|
|
|
|
18 => 'IBM TERSE (new)', |
|
91
|
|
|
|
|
|
|
19 => 'IBM LZ77 z Architecture (PFS)', |
|
92
|
|
|
|
|
|
|
96 => 'JPEG recompressed', #2 |
|
93
|
|
|
|
|
|
|
97 => 'WavPack compressed', #2 |
|
94
|
|
|
|
|
|
|
98 => 'PPMd version I, Rev 1', |
|
95
|
|
|
|
|
|
|
}, |
|
96
|
|
|
|
|
|
|
}, |
|
97
|
|
|
|
|
|
|
5 => { |
|
98
|
|
|
|
|
|
|
Name => 'ZipModifyDate', |
|
99
|
|
|
|
|
|
|
Format => 'int32u', |
|
100
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
|
101
|
|
|
|
|
|
|
ValueConv => sub { |
|
102
|
|
|
|
|
|
|
my $val = shift; |
|
103
|
|
|
|
|
|
|
return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d', |
|
104
|
|
|
|
|
|
|
($val >> 25) + 1980, # year |
|
105
|
|
|
|
|
|
|
($val >> 21) & 0x0f, # month |
|
106
|
|
|
|
|
|
|
($val >> 16) & 0x1f, # day |
|
107
|
|
|
|
|
|
|
($val >> 11) & 0x1f, # hour |
|
108
|
|
|
|
|
|
|
($val >> 5) & 0x3f, # minute |
|
109
|
|
|
|
|
|
|
($val & 0x1f) * 2 # second |
|
110
|
|
|
|
|
|
|
); |
|
111
|
|
|
|
|
|
|
}, |
|
112
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
|
113
|
|
|
|
|
|
|
}, |
|
114
|
|
|
|
|
|
|
7 => { Name => 'ZipCRC', Format => 'int32u', PrintConv => 'sprintf("0x%.8x",$val)' }, |
|
115
|
|
|
|
|
|
|
9 => { Name => 'ZipCompressedSize', Format => 'int32u' }, |
|
116
|
|
|
|
|
|
|
11 => { Name => 'ZipUncompressedSize', Format => 'int32u' }, |
|
117
|
|
|
|
|
|
|
13 => { |
|
118
|
|
|
|
|
|
|
Name => 'ZipFileNameLength', |
|
119
|
|
|
|
|
|
|
# don't store a tag -- just extract the value for use with ZipFileName |
|
120
|
|
|
|
|
|
|
Hidden => 1, |
|
121
|
|
|
|
|
|
|
RawConv => '$$self{ZipFileNameLength} = $val; undef', |
|
122
|
|
|
|
|
|
|
}, |
|
123
|
|
|
|
|
|
|
# 14 => 'ZipExtraFieldLength', |
|
124
|
|
|
|
|
|
|
15 => { |
|
125
|
|
|
|
|
|
|
Name => 'ZipFileName', |
|
126
|
|
|
|
|
|
|
Format => 'string[$$self{ZipFileNameLength}]', |
|
127
|
|
|
|
|
|
|
}, |
|
128
|
|
|
|
|
|
|
_com => 'ZipFileComment', |
|
129
|
|
|
|
|
|
|
); |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# GNU ZIP tags (ref 3) |
|
132
|
|
|
|
|
|
|
%Image::ExifTool::ZIP::GZIP = ( |
|
133
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, |
|
134
|
|
|
|
|
|
|
GROUPS => { 2 => 'Other' }, |
|
135
|
|
|
|
|
|
|
NOTES => q{ |
|
136
|
|
|
|
|
|
|
These tags are extracted from GZIP (GNU ZIP) archives, but currently only |
|
137
|
|
|
|
|
|
|
for the first file in the archive. |
|
138
|
|
|
|
|
|
|
}, |
|
139
|
|
|
|
|
|
|
2 => { |
|
140
|
|
|
|
|
|
|
Name => 'Compression', |
|
141
|
|
|
|
|
|
|
PrintConv => { |
|
142
|
|
|
|
|
|
|
8 => 'Deflated', |
|
143
|
|
|
|
|
|
|
}, |
|
144
|
|
|
|
|
|
|
}, |
|
145
|
|
|
|
|
|
|
3 => { |
|
146
|
|
|
|
|
|
|
Name => 'Flags', |
|
147
|
|
|
|
|
|
|
PrintConv => { BITMASK => { |
|
148
|
|
|
|
|
|
|
0 => 'Text', |
|
149
|
|
|
|
|
|
|
1 => 'CRC16', |
|
150
|
|
|
|
|
|
|
2 => 'ExtraFields', |
|
151
|
|
|
|
|
|
|
3 => 'FileName', |
|
152
|
|
|
|
|
|
|
4 => 'Comment', |
|
153
|
|
|
|
|
|
|
}}, |
|
154
|
|
|
|
|
|
|
}, |
|
155
|
|
|
|
|
|
|
4 => { |
|
156
|
|
|
|
|
|
|
Name => 'ModifyDate', |
|
157
|
|
|
|
|
|
|
Format => 'int32u', |
|
158
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
|
159
|
|
|
|
|
|
|
ValueConv => 'ConvertUnixTime($val,1)', |
|
160
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
|
161
|
|
|
|
|
|
|
}, |
|
162
|
|
|
|
|
|
|
8 => { |
|
163
|
|
|
|
|
|
|
Name => 'ExtraFlags', |
|
164
|
|
|
|
|
|
|
PrintConv => { |
|
165
|
|
|
|
|
|
|
0 => '(none)', |
|
166
|
|
|
|
|
|
|
2 => 'Maximum Compression', |
|
167
|
|
|
|
|
|
|
4 => 'Fastest Algorithm', |
|
168
|
|
|
|
|
|
|
}, |
|
169
|
|
|
|
|
|
|
}, |
|
170
|
|
|
|
|
|
|
9 => { |
|
171
|
|
|
|
|
|
|
Name => 'OperatingSystem', |
|
172
|
|
|
|
|
|
|
PrintConv => { |
|
173
|
|
|
|
|
|
|
0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)', |
|
174
|
|
|
|
|
|
|
1 => 'Amiga', |
|
175
|
|
|
|
|
|
|
2 => 'VMS (or OpenVMS)', |
|
176
|
|
|
|
|
|
|
3 => 'Unix', |
|
177
|
|
|
|
|
|
|
4 => 'VM/CMS', |
|
178
|
|
|
|
|
|
|
5 => 'Atari TOS', |
|
179
|
|
|
|
|
|
|
6 => 'HPFS filesystem (OS/2, NT)', |
|
180
|
|
|
|
|
|
|
7 => 'Macintosh', |
|
181
|
|
|
|
|
|
|
8 => 'Z-System', |
|
182
|
|
|
|
|
|
|
9 => 'CP/M', |
|
183
|
|
|
|
|
|
|
10 => 'TOPS-20', |
|
184
|
|
|
|
|
|
|
11 => 'NTFS filesystem (NT)', |
|
185
|
|
|
|
|
|
|
12 => 'QDOS', |
|
186
|
|
|
|
|
|
|
13 => 'Acorn RISCOS', |
|
187
|
|
|
|
|
|
|
255 => 'unknown', |
|
188
|
|
|
|
|
|
|
}, |
|
189
|
|
|
|
|
|
|
}, |
|
190
|
|
|
|
|
|
|
10 => 'ArchivedFileName', |
|
191
|
|
|
|
|
|
|
11 => 'Comment', |
|
192
|
|
|
|
|
|
|
); |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# RAR tags (ref 4) |
|
195
|
|
|
|
|
|
|
%Image::ExifTool::ZIP::RAR = ( |
|
196
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, |
|
197
|
|
|
|
|
|
|
GROUPS => { 2 => 'Other' }, |
|
198
|
|
|
|
|
|
|
NOTES => 'These tags are extracted from RAR archive files.', |
|
199
|
|
|
|
|
|
|
0 => { |
|
200
|
|
|
|
|
|
|
Name => 'CompressedSize', |
|
201
|
|
|
|
|
|
|
Format => 'int32u', |
|
202
|
|
|
|
|
|
|
}, |
|
203
|
|
|
|
|
|
|
4 => { |
|
204
|
|
|
|
|
|
|
Name => 'UncompressedSize', |
|
205
|
|
|
|
|
|
|
Format => 'int32u', |
|
206
|
|
|
|
|
|
|
}, |
|
207
|
|
|
|
|
|
|
8 => { |
|
208
|
|
|
|
|
|
|
Name => 'OperatingSystem', |
|
209
|
|
|
|
|
|
|
PrintConv => { |
|
210
|
|
|
|
|
|
|
0 => 'MS-DOS', |
|
211
|
|
|
|
|
|
|
1 => 'OS/2', |
|
212
|
|
|
|
|
|
|
2 => 'Win32', |
|
213
|
|
|
|
|
|
|
3 => 'Unix', |
|
214
|
|
|
|
|
|
|
}, |
|
215
|
|
|
|
|
|
|
}, |
|
216
|
|
|
|
|
|
|
13 => { |
|
217
|
|
|
|
|
|
|
Name => 'ModifyDate', |
|
218
|
|
|
|
|
|
|
Format => 'int32u', |
|
219
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
|
220
|
|
|
|
|
|
|
ValueConv => sub { |
|
221
|
|
|
|
|
|
|
my $val = shift; |
|
222
|
|
|
|
|
|
|
return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d', |
|
223
|
|
|
|
|
|
|
($val >> 25) + 1980, # year |
|
224
|
|
|
|
|
|
|
($val >> 21) & 0x0f, # month |
|
225
|
|
|
|
|
|
|
($val >> 16) & 0x1f, # day |
|
226
|
|
|
|
|
|
|
($val >> 11) & 0x1f, # hour |
|
227
|
|
|
|
|
|
|
($val >> 5) & 0x3f, # minute |
|
228
|
|
|
|
|
|
|
($val & 0x1f) * 2 # second |
|
229
|
|
|
|
|
|
|
); |
|
230
|
|
|
|
|
|
|
}, |
|
231
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
|
232
|
|
|
|
|
|
|
}, |
|
233
|
|
|
|
|
|
|
18 => { |
|
234
|
|
|
|
|
|
|
Name => 'PackingMethod', |
|
235
|
|
|
|
|
|
|
PrintHex => 1, |
|
236
|
|
|
|
|
|
|
PrintConv => { |
|
237
|
|
|
|
|
|
|
0x30 => 'Stored', |
|
238
|
|
|
|
|
|
|
0x31 => 'Fastest', |
|
239
|
|
|
|
|
|
|
0x32 => 'Fast', |
|
240
|
|
|
|
|
|
|
0x33 => 'Normal', |
|
241
|
|
|
|
|
|
|
0x34 => 'Good Compression', |
|
242
|
|
|
|
|
|
|
0x35 => 'Best Compression', |
|
243
|
|
|
|
|
|
|
}, |
|
244
|
|
|
|
|
|
|
}, |
|
245
|
|
|
|
|
|
|
19 => { |
|
246
|
|
|
|
|
|
|
Name => 'FileNameLength', |
|
247
|
|
|
|
|
|
|
Format => 'int16u', |
|
248
|
|
|
|
|
|
|
Hidden => 1, |
|
249
|
|
|
|
|
|
|
RawConv => '$$self{FileNameLength} = $val; undef', |
|
250
|
|
|
|
|
|
|
}, |
|
251
|
|
|
|
|
|
|
25 => { |
|
252
|
|
|
|
|
|
|
Name => 'ArchivedFileName', |
|
253
|
|
|
|
|
|
|
Format => 'string[$$self{FileNameLength}]', |
|
254
|
|
|
|
|
|
|
}, |
|
255
|
|
|
|
|
|
|
); |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
258
|
|
|
|
|
|
|
# Extract information from a RAR file (ref 4) |
|
259
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
|
260
|
|
|
|
|
|
|
# Returns: 1 on success, 0 if this wasn't a valid RAR file |
|
261
|
|
|
|
|
|
|
sub ProcessRAR($$) |
|
262
|
|
|
|
|
|
|
{ |
|
263
|
0
|
|
|
0
|
0
|
0
|
my ($et, $dirInfo) = @_; |
|
264
|
0
|
|
|
|
|
0
|
my $raf = $$dirInfo{RAF}; |
|
265
|
0
|
|
|
|
|
0
|
my ($flags, $buff); |
|
266
|
|
|
|
|
|
|
|
|
267
|
0
|
0
|
0
|
|
|
0
|
return 0 unless $raf->Read($buff, 7) and $buff eq "Rar!\x1a\x07\0"; |
|
268
|
|
|
|
|
|
|
|
|
269
|
0
|
|
|
|
|
0
|
$et->SetFileType(); |
|
270
|
0
|
|
|
|
|
0
|
SetByteOrder('II'); |
|
271
|
0
|
|
|
|
|
0
|
my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR'); |
|
272
|
0
|
|
|
|
|
0
|
my $docNum = 0; |
|
273
|
|
|
|
|
|
|
|
|
274
|
0
|
|
|
|
|
0
|
for (;;) { |
|
275
|
|
|
|
|
|
|
# read block header |
|
276
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, 7) == 7 or last; |
|
277
|
0
|
|
|
|
|
0
|
my ($type, $flags, $size) = unpack('xxCvv', $buff); |
|
278
|
0
|
|
|
|
|
0
|
$size -= 7; |
|
279
|
0
|
0
|
|
|
|
0
|
if ($flags & 0x8000) { |
|
280
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, 4) == 4 or last; |
|
281
|
0
|
|
|
|
|
0
|
$size += unpack('V',$buff) - 4; |
|
282
|
|
|
|
|
|
|
} |
|
283
|
0
|
0
|
|
|
|
0
|
last if $size < 0; |
|
284
|
0
|
0
|
|
|
|
0
|
next unless $size; # ignore blocks with no data |
|
285
|
|
|
|
|
|
|
# don't try to read very large blocks unless LargeFileSupport is enabled |
|
286
|
0
|
0
|
0
|
|
|
0
|
if ($size >= 0x80000000 and not $et->Options('LargeFileSupport')) { |
|
287
|
0
|
|
|
|
|
0
|
$et->Warn('Large block encountered. Aborting.'); |
|
288
|
0
|
|
|
|
|
0
|
last; |
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
# process the block |
|
291
|
0
|
0
|
0
|
|
|
0
|
if ($type == 0x74) { # file block |
|
|
|
0
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# read maximum 4 KB from a file block |
|
293
|
0
|
0
|
|
|
|
0
|
my $n = $size > 4096 ? 4096 : $size; |
|
294
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, $n) == $n or last; |
|
295
|
|
|
|
|
|
|
# add compressed size to start of data so we can extract it with the other tags |
|
296
|
0
|
|
|
|
|
0
|
$buff = pack('V',$size) . $buff; |
|
297
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = ++$docNum; |
|
298
|
0
|
|
|
|
|
0
|
$et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr); |
|
299
|
0
|
|
|
|
|
0
|
$size -= $n; |
|
300
|
|
|
|
|
|
|
} elsif ($type == 0x75 and $size > 6) { # comment block |
|
301
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, $size) == $size or last; |
|
302
|
|
|
|
|
|
|
# save comment, only if "Stored" (this is untested) |
|
303
|
0
|
0
|
|
|
|
0
|
if (Get8u(\$buff, 3) == 0x30) { |
|
304
|
0
|
|
|
|
|
0
|
$et->FoundTag('Comment', substr($buff, 6)); |
|
305
|
|
|
|
|
|
|
} |
|
306
|
0
|
|
|
|
|
0
|
next; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
# seek to the start of the next block |
|
309
|
0
|
0
|
0
|
|
|
0
|
$raf->Seek($size, 1) or last if $size; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = 0; |
|
312
|
0
|
0
|
0
|
|
|
0
|
if ($docNum > 1 and not $et->Options('Duplicates')) { |
|
313
|
0
|
|
|
|
|
0
|
$et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
|
|
316
|
0
|
|
|
|
|
0
|
return 1; |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
320
|
|
|
|
|
|
|
# Extract information from a GNU ZIP file (ref 3) |
|
321
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
|
322
|
|
|
|
|
|
|
# Returns: 1 on success, 0 if this wasn't a valid GZIP file |
|
323
|
|
|
|
|
|
|
sub ProcessGZIP($$) |
|
324
|
|
|
|
|
|
|
{ |
|
325
|
1
|
|
|
1
|
0
|
3
|
my ($et, $dirInfo) = @_; |
|
326
|
1
|
|
|
|
|
4
|
my $raf = $$dirInfo{RAF}; |
|
327
|
1
|
|
|
|
|
2
|
my ($flags, $buff); |
|
328
|
|
|
|
|
|
|
|
|
329
|
1
|
50
|
33
|
|
|
4
|
return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/; |
|
330
|
|
|
|
|
|
|
|
|
331
|
1
|
|
|
|
|
5
|
$et->SetFileType(); |
|
332
|
1
|
|
|
|
|
4
|
SetByteOrder('II'); |
|
333
|
|
|
|
|
|
|
|
|
334
|
1
|
|
|
|
|
4
|
my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP'); |
|
335
|
1
|
|
|
|
|
5
|
$et->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2)); |
|
336
|
1
|
|
|
|
|
13
|
$et->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3)); |
|
337
|
1
|
|
|
|
|
5
|
$et->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4)); |
|
338
|
1
|
|
|
|
|
4
|
$et->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8)); |
|
339
|
1
|
|
|
|
|
6
|
$et->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9)); |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# extract file name and comment if they exist |
|
342
|
1
|
50
|
|
|
|
8
|
if ($flags & 0x18) { |
|
343
|
1
|
50
|
|
|
|
5
|
if ($flags & 0x04) { |
|
344
|
|
|
|
|
|
|
# skip extra field |
|
345
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, 2) == 2 or return 1; |
|
346
|
0
|
|
|
|
|
0
|
my $len = Get16u(\$buff, 0); |
|
347
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, $len) == $len or return 1; |
|
348
|
|
|
|
|
|
|
} |
|
349
|
1
|
50
|
|
|
|
5
|
$raf->Read($buff, 4096) or return 1; |
|
350
|
1
|
|
|
|
|
9
|
my $pos = 0; |
|
351
|
1
|
|
|
|
|
2
|
my $tagID; |
|
352
|
|
|
|
|
|
|
# loop for ArchivedFileName (10) and Comment (11) tags |
|
353
|
1
|
|
|
|
|
4
|
foreach $tagID (10, 11) { |
|
354
|
2
|
100
|
|
|
|
18
|
my $mask = $tagID == 10 ? 0x08 : 0x10; |
|
355
|
2
|
50
|
|
|
|
7
|
next unless $flags & $mask; |
|
356
|
2
|
50
|
|
|
|
11
|
my $end = $buff =~ /\0/g ? pos($buff) - 1 : length($buff); |
|
357
|
|
|
|
|
|
|
# (the doc specifies the string should be ISO 8859-1, |
|
358
|
|
|
|
|
|
|
# but in OS X it seems to be UTF-8, so don't translate |
|
359
|
|
|
|
|
|
|
# it because I could just as easily screw it up) |
|
360
|
2
|
|
|
|
|
6
|
my $str = substr($buff, $pos, $end - $pos); |
|
361
|
2
|
|
|
|
|
8
|
$et->HandleTag($tagTablePtr, $tagID, $str); |
|
362
|
2
|
50
|
|
|
|
7
|
last if $end >= length $buff; |
|
363
|
2
|
|
|
|
|
9
|
$pos = $end + 1; |
|
364
|
|
|
|
|
|
|
} |
|
365
|
|
|
|
|
|
|
} |
|
366
|
1
|
|
|
|
|
4
|
return 1; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
370
|
|
|
|
|
|
|
# Call HandleTags for attributes of an Archive::Zip member |
|
371
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object ref, 1) member ref, 2) optional tag table ref |
|
372
|
|
|
|
|
|
|
sub HandleMember($$;$) |
|
373
|
|
|
|
|
|
|
{ |
|
374
|
28
|
|
|
28
|
0
|
62
|
my ($et, $member, $tagTablePtr) = @_; |
|
375
|
28
|
100
|
|
|
|
83
|
$tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main'); |
|
376
|
28
|
|
|
|
|
94
|
$et->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract()); |
|
377
|
28
|
|
|
|
|
104
|
$et->HandleTag($tagTablePtr, 3, $member->bitFlag()); |
|
378
|
28
|
|
|
|
|
94
|
$et->HandleTag($tagTablePtr, 4, $member->compressionMethod()); |
|
379
|
28
|
|
|
|
|
94
|
$et->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime()); |
|
380
|
28
|
|
|
|
|
93
|
$et->HandleTag($tagTablePtr, 7, $member->crc32()); |
|
381
|
28
|
|
|
|
|
114
|
$et->HandleTag($tagTablePtr, 9, $member->compressedSize()); |
|
382
|
28
|
|
|
|
|
99
|
$et->HandleTag($tagTablePtr, 11, $member->uncompressedSize()); |
|
383
|
28
|
|
|
|
|
108
|
$et->HandleTag($tagTablePtr, 15, $member->fileName()); |
|
384
|
28
|
|
|
|
|
100
|
my $com = $member->fileComment(); |
|
385
|
28
|
50
|
33
|
|
|
274
|
$et->HandleTag($tagTablePtr, '_com', $com) if defined $com and length $com; |
|
386
|
|
|
|
|
|
|
} |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
389
|
|
|
|
|
|
|
# Extract information from a ZIP file |
|
390
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
|
391
|
|
|
|
|
|
|
# Returns: 1 on success, 0 if this wasn't a valid ZIP file |
|
392
|
|
|
|
|
|
|
sub ProcessZIP($$) |
|
393
|
|
|
|
|
|
|
{ |
|
394
|
5
|
|
|
5
|
0
|
15
|
my ($et, $dirInfo) = @_; |
|
395
|
5
|
|
|
|
|
13
|
my $raf = $$dirInfo{RAF}; |
|
396
|
5
|
|
|
|
|
10
|
my ($buff, $buf2, $zip); |
|
397
|
|
|
|
|
|
|
|
|
398
|
5
|
50
|
33
|
|
|
15
|
return 0 unless $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/; |
|
399
|
|
|
|
|
|
|
|
|
400
|
5
|
|
|
|
|
19
|
my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main'); |
|
401
|
5
|
|
|
|
|
17
|
my $docNum = 0; |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# use Archive::Zip if available |
|
404
|
5
|
|
|
|
|
12
|
for (;;) { |
|
405
|
5
|
50
|
33
|
|
|
13
|
unless (eval { require Archive::Zip } and eval { require IO::File }) { |
|
|
5
|
|
|
|
|
876
|
|
|
|
5
|
|
|
|
|
82252
|
|
|
406
|
0
|
0
|
0
|
|
|
0
|
if ($$et{FILE_EXT} and $$et{FILE_EXT} ne 'ZIP') { |
|
407
|
0
|
|
|
|
|
0
|
$et->Warn("Install Archive::Zip to decode compressed ZIP information"); |
|
408
|
|
|
|
|
|
|
} |
|
409
|
0
|
|
|
|
|
0
|
last; |
|
410
|
|
|
|
|
|
|
} |
|
411
|
|
|
|
|
|
|
# Archive::Zip requires a seekable IO::File object |
|
412
|
5
|
|
|
|
|
13
|
my $fh; |
|
413
|
5
|
50
|
|
|
|
21
|
if ($raf->{TESTED} >= 0) { |
|
|
|
0
|
|
|
|
|
|
|
414
|
5
|
50
|
|
|
|
11
|
unless (eval { require IO::File }) { |
|
|
5
|
|
|
|
|
27
|
|
|
415
|
|
|
|
|
|
|
# (this shouldn't happen because IO::File is a prerequisite of Archive::Zip) |
|
416
|
0
|
|
|
|
|
0
|
$et->Warn("Install IO::File to decode compressed ZIP information"); |
|
417
|
0
|
|
|
|
|
0
|
last; |
|
418
|
|
|
|
|
|
|
} |
|
419
|
5
|
|
|
|
|
24
|
$raf->Seek(0,0); |
|
420
|
5
|
|
|
|
|
14
|
$fh = $raf->{FILE_PT}; |
|
421
|
5
|
|
|
|
|
24
|
bless $fh, 'IO::File'; # Archive::Zip expects an IO::File object |
|
422
|
0
|
|
|
|
|
0
|
} elsif (eval { require IO::String }) { |
|
423
|
|
|
|
|
|
|
# read the whole file into memory (what else can I do?) |
|
424
|
0
|
|
|
|
|
0
|
$raf->Slurp(); |
|
425
|
0
|
|
|
|
|
0
|
$fh = new IO::String ${$raf->{BUFF_PT}}; |
|
|
0
|
|
|
|
|
0
|
|
|
426
|
|
|
|
|
|
|
} else { |
|
427
|
0
|
0
|
|
|
|
0
|
my $type = $raf->{FILE_PT} ? 'pipe or socket' : 'scalar reference'; |
|
428
|
0
|
|
|
|
|
0
|
$et->Warn("Install IO::String to decode compressed ZIP information from a $type"); |
|
429
|
0
|
|
|
|
|
0
|
last; |
|
430
|
|
|
|
|
|
|
} |
|
431
|
5
|
|
|
|
|
30
|
$et->VPrint(1, " --- using Archive::Zip ---\n"); |
|
432
|
5
|
|
|
|
|
26
|
$zip = new Archive::Zip; |
|
433
|
|
|
|
|
|
|
# catch all warnings! (Archive::Zip is bad for this) |
|
434
|
5
|
|
|
|
|
197
|
local $SIG{'__WARN__'} = \&WarnProc; |
|
435
|
5
|
|
|
|
|
24
|
my $status = $zip->readFromFileHandle($fh); |
|
436
|
5
|
0
|
33
|
|
|
17716
|
if ($status eq '4' and $raf->{TESTED} >= 0 and eval { require IO::String } and |
|
|
0
|
|
33
|
|
|
0
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
437
|
|
|
|
|
|
|
$raf->Seek(0,2) and $raf->Tell() < 100000000) |
|
438
|
|
|
|
|
|
|
{ |
|
439
|
|
|
|
|
|
|
# try again, reading it ourself this time in an attempt to avoid |
|
440
|
|
|
|
|
|
|
# a failed test with Perl 5.6.2 GNU/Linux 2.6.32-5-686 i686-linux-64int-ld |
|
441
|
0
|
|
|
|
|
0
|
$raf->Seek(0,0); |
|
442
|
0
|
|
|
|
|
0
|
$raf->Slurp(); |
|
443
|
0
|
|
|
|
|
0
|
$fh = new IO::String ${$raf->{BUFF_PT}}; |
|
|
0
|
|
|
|
|
0
|
|
|
444
|
0
|
|
|
|
|
0
|
$zip = new Archive::Zip; |
|
445
|
0
|
|
|
|
|
0
|
$status = $zip->readFromFileHandle($fh); |
|
446
|
|
|
|
|
|
|
} |
|
447
|
5
|
50
|
|
|
|
18
|
if ($status) { |
|
448
|
0
|
|
|
|
|
0
|
undef $zip; |
|
449
|
0
|
|
|
|
|
0
|
my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' ); |
|
450
|
0
|
|
0
|
|
|
0
|
my $err = $err{$status} || "Error $status"; |
|
451
|
0
|
|
|
|
|
0
|
$et->Warn("$err reading ZIP file"); |
|
452
|
0
|
|
|
|
|
0
|
last; |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
# extract zip file comment |
|
455
|
5
|
|
|
|
|
24
|
my $comment = $zip->zipfileComment(); |
|
456
|
5
|
50
|
33
|
|
|
63
|
$et->FoundTag(Comment => $comment) if defined $comment and length $comment; |
|
457
|
|
|
|
|
|
|
|
|
458
|
5
|
|
|
|
|
14
|
$$dirInfo{ZIP} = $zip; |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# check for an Office Open file (DOCX, etc) |
|
461
|
|
|
|
|
|
|
# --> read '[Content_Types].xml' to determine the file type |
|
462
|
5
|
|
|
|
|
9
|
my ($mime, @members); |
|
463
|
5
|
|
|
|
|
18
|
my $cType = $zip->memberNamed('[Content_Types].xml'); |
|
464
|
5
|
100
|
|
|
|
291
|
if ($cType) { |
|
465
|
1
|
|
|
|
|
5
|
($buff, $status) = $zip->contents($cType); |
|
466
|
1
|
0
|
33
|
|
|
1497
|
if (not $status and ( |
|
|
|
|
33
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# first look for the main document with the expected name |
|
468
|
|
|
|
|
|
|
$buff =~ m{\sPartName\s*=\s*['"](?:/ppt/presentation.xml|/word/document.xml|/xl/workbook.xml)['"][^>]*\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1} or |
|
469
|
|
|
|
|
|
|
# then look for the main part |
|
470
|
|
|
|
|
|
|
$buff =~ /]*\sPartName[^<]+\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/ or |
|
471
|
|
|
|
|
|
|
# and if all else fails, use the default main |
|
472
|
|
|
|
|
|
|
$buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/)) |
|
473
|
|
|
|
|
|
|
{ |
|
474
|
1
|
|
|
|
|
5
|
$mime = $2; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
# check for docProps if we couldn't find a MIME type |
|
478
|
5
|
100
|
|
|
|
23
|
$mime or @members = $zip->membersMatching('^docProps/.*\.(xml|XML)$'); |
|
479
|
5
|
100
|
66
|
|
|
409
|
if ($mime or @members) { |
|
480
|
1
|
|
|
|
|
3
|
$$dirInfo{MIME} = $mime; |
|
481
|
1
|
|
|
|
|
615
|
require Image::ExifTool::OOXML; |
|
482
|
1
|
|
|
|
|
8
|
Image::ExifTool::OOXML::ProcessDOCX($et, $dirInfo); |
|
483
|
1
|
|
|
|
|
4
|
delete $$dirInfo{MIME}; |
|
484
|
1
|
|
|
|
|
6
|
last; |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# check for an EIP file |
|
488
|
4
|
|
|
|
|
18
|
@members = $zip->membersMatching('^CaptureOne/.*\.(cos|COS)$'); |
|
489
|
4
|
100
|
|
|
|
373
|
if (@members) { |
|
490
|
1
|
|
|
|
|
655
|
require Image::ExifTool::CaptureOne; |
|
491
|
1
|
|
|
|
|
8
|
Image::ExifTool::CaptureOne::ProcessEIP($et, $dirInfo); |
|
492
|
1
|
|
|
|
|
4
|
last; |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
# check for an iWork file |
|
496
|
3
|
|
|
|
|
10
|
@members = $zip->membersMatching('(?i)^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+\.(pages|numbers|key)/Index.(zip|xml|apxl))$'); |
|
497
|
3
|
100
|
|
|
|
552
|
if (@members) { |
|
498
|
1
|
|
|
|
|
608
|
require Image::ExifTool::iWork; |
|
499
|
1
|
|
|
|
|
6
|
Image::ExifTool::iWork::Process_iWork($et, $dirInfo); |
|
500
|
1
|
|
|
|
|
4
|
last; |
|
501
|
|
|
|
|
|
|
} |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# check for an Open Document, IDML or EPUB file |
|
504
|
2
|
|
|
|
|
8
|
my $mType = $zip->memberNamed('mimetype'); |
|
505
|
2
|
100
|
|
|
|
45
|
if ($mType) { |
|
506
|
1
|
|
|
|
|
5
|
($mime, $status) = $zip->contents($mType); |
|
507
|
1
|
50
|
33
|
|
|
716
|
if (not $status and $mime =~ /([\x21-\xfe]+)/s) { |
|
508
|
|
|
|
|
|
|
# clean up MIME type just in case (note that MIME is case insensitive) |
|
509
|
1
|
|
|
|
|
5
|
$mime = lc $1; |
|
510
|
1
|
|
50
|
|
|
10
|
$et->SetFileType($openDocType{$mime} || 'ZIP', $mime); |
|
511
|
1
|
50
|
|
|
|
6
|
$et->Warn("Unrecognized MIMEType $mime") unless $openDocType{$mime}; |
|
512
|
|
|
|
|
|
|
# extract Open Document metadata from "meta.xml" |
|
513
|
1
|
|
|
|
|
9
|
my $meta = $zip->memberNamed('meta.xml'); |
|
514
|
|
|
|
|
|
|
# IDML files have metadata in a different place (ref 6) |
|
515
|
1
|
50
|
|
|
|
125
|
$meta or $meta = $zip->memberNamed('META-INF/metadata.xml'); |
|
516
|
1
|
50
|
|
|
|
4
|
if ($meta) { |
|
517
|
1
|
|
|
|
|
4
|
($buff, $status) = $zip->contents($meta); |
|
518
|
1
|
50
|
|
|
|
637
|
unless ($status) { |
|
519
|
1
|
|
|
|
|
8
|
my %dirInfo = ( |
|
520
|
|
|
|
|
|
|
DirName => 'XML', |
|
521
|
|
|
|
|
|
|
DataPt => \$buff, |
|
522
|
|
|
|
|
|
|
DirLen => length $buff, |
|
523
|
|
|
|
|
|
|
DataLen => length $buff, |
|
524
|
|
|
|
|
|
|
); |
|
525
|
|
|
|
|
|
|
# (avoid structure warnings when copying from XML) |
|
526
|
1
|
|
|
|
|
4
|
my $oldWarn = $$et{NO_STRUCT_WARN}; |
|
527
|
1
|
|
|
|
|
3
|
$$et{NO_STRUCT_WARN} = 1; |
|
528
|
1
|
|
|
|
|
5
|
$et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::Main')); |
|
529
|
1
|
|
|
|
|
3
|
$$et{NO_STRUCT_WARN} = $oldWarn; |
|
530
|
|
|
|
|
|
|
} |
|
531
|
|
|
|
|
|
|
} |
|
532
|
|
|
|
|
|
|
# process rootfile of EPUB container if applicable |
|
533
|
1
|
|
|
|
|
3
|
for (;;) { |
|
534
|
1
|
50
|
33
|
|
|
9
|
last if $meta and $mime ne 'application/epub+zip'; |
|
535
|
0
|
|
|
|
|
0
|
my $container = $zip->memberNamed('META-INF/container.xml'); |
|
536
|
0
|
|
|
|
|
0
|
($buff, $status) = $zip->contents($container); |
|
537
|
0
|
0
|
|
|
|
0
|
last if $status; |
|
538
|
0
|
0
|
|
|
|
0
|
$buff =~ /]*?\bfull-path=(['"])(.*?)\1/s or last; |
|
539
|
|
|
|
|
|
|
# load the rootfile data (OPF extension; contains XML metadata) |
|
540
|
0
|
0
|
|
|
|
0
|
my $meta2 = $zip->memberNamed($2) or last; |
|
541
|
0
|
|
|
|
|
0
|
$meta = $meta2; |
|
542
|
0
|
|
|
|
|
0
|
($buff, $status) = $zip->contents($meta); |
|
543
|
0
|
0
|
|
|
|
0
|
last if $status; |
|
544
|
|
|
|
|
|
|
# use opf:event to generate more meaningful tag names for dc:date |
|
545
|
0
|
|
|
|
|
0
|
while ($buff =~ s{([^<]+)}{$2}s) { |
|
546
|
0
|
|
|
|
|
0
|
my $dcTable = GetTagTable('Image::ExifTool::XMP::dc'); |
|
547
|
0
|
|
|
|
|
0
|
my $tag = "${1}Date"; |
|
548
|
|
|
|
|
|
|
AddTagToTable($dcTable, $tag, { |
|
549
|
|
|
|
|
|
|
Name => ucfirst $tag, |
|
550
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
|
551
|
|
|
|
|
|
|
List => 'Seq', |
|
552
|
|
|
|
|
|
|
%Image::ExifTool::XMP::dateTimeInfo |
|
553
|
0
|
0
|
|
|
|
0
|
}) unless $$dcTable{$tag}; |
|
554
|
|
|
|
|
|
|
} |
|
555
|
0
|
|
|
|
|
0
|
my %dirInfo = ( |
|
556
|
|
|
|
|
|
|
DataPt => \$buff, |
|
557
|
|
|
|
|
|
|
DirLen => length $buff, |
|
558
|
|
|
|
|
|
|
DataLen => length $buff, |
|
559
|
|
|
|
|
|
|
IgnoreProp => { 'package' => 1, metadata => 1 }, |
|
560
|
|
|
|
|
|
|
); |
|
561
|
|
|
|
|
|
|
# (avoid structure warnings when copying from XML) |
|
562
|
0
|
|
|
|
|
0
|
my $oldWarn = $$et{NO_STRUCT_WARN}; |
|
563
|
0
|
|
|
|
|
0
|
$$et{NO_STRUCT_WARN} = 1; |
|
564
|
0
|
|
|
|
|
0
|
$et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::XML')); |
|
565
|
0
|
|
|
|
|
0
|
$$et{NO_STRUCT_WARN} = $oldWarn; |
|
566
|
0
|
|
|
|
|
0
|
last; |
|
567
|
|
|
|
|
|
|
} |
|
568
|
1
|
50
|
33
|
|
|
5
|
if ($openDocType{$mime} or $meta) { |
|
569
|
|
|
|
|
|
|
# extract preview image(s) from "Thumbnails" directory if they exist |
|
570
|
1
|
|
|
|
|
1
|
my $type; |
|
571
|
1
|
|
|
|
|
5
|
my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' ); |
|
572
|
1
|
|
|
|
|
4
|
foreach $type ('jpg', 'png') { |
|
573
|
2
|
|
|
|
|
9
|
my $thumb = $zip->memberNamed("Thumbnails/thumbnail.$type"); |
|
574
|
2
|
100
|
|
|
|
270
|
next unless $thumb; |
|
575
|
1
|
|
|
|
|
7
|
($buff, $status) = $zip->contents($thumb); |
|
576
|
1
|
50
|
|
|
|
1357
|
$et->FoundTag($tag{$type}, $buff) unless $status; |
|
577
|
|
|
|
|
|
|
} |
|
578
|
1
|
|
|
|
|
8
|
last; # all done since we recognized the MIME type or found metadata |
|
579
|
|
|
|
|
|
|
} |
|
580
|
|
|
|
|
|
|
# continue on to list ZIP contents... |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
} |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
# otherwise just extract general ZIP information |
|
585
|
1
|
|
|
|
|
6
|
$et->SetFileType(); |
|
586
|
1
|
|
|
|
|
4
|
@members = $zip->members(); |
|
587
|
1
|
|
|
|
|
7
|
my ($member, $iWorkType); |
|
588
|
|
|
|
|
|
|
# special files to extract |
|
589
|
1
|
|
|
|
|
12
|
my %extract = ( |
|
590
|
|
|
|
|
|
|
'meta.json' => 1, |
|
591
|
|
|
|
|
|
|
'previews/preview.png' => 'PreviewPNG', |
|
592
|
|
|
|
|
|
|
'preview.jpg' => 'PreviewImage', # (iWork 2013 files) |
|
593
|
|
|
|
|
|
|
'preview-web.jpg' => 'OtherImage', # (iWork 2013 files) |
|
594
|
|
|
|
|
|
|
'preview-micro.jpg' => 'ThumbnailImage', # (iWork 2013 files) |
|
595
|
|
|
|
|
|
|
'QuickLook/Thumbnail.jpg' => 'ThumbnailImage', # (iWork 2009 files) |
|
596
|
|
|
|
|
|
|
'QuickLook/Preview.pdf' => 'PreviewPDF', # (iWork 2009 files) |
|
597
|
|
|
|
|
|
|
); |
|
598
|
1
|
|
|
|
|
3
|
foreach $member (@members) { |
|
599
|
1
|
|
|
|
|
3
|
$$et{DOC_NUM} = ++$docNum; |
|
600
|
1
|
|
|
|
|
5
|
HandleMember($et, $member, $tagTablePtr); |
|
601
|
1
|
|
|
|
|
10
|
my $file = $member->fileName(); |
|
602
|
|
|
|
|
|
|
# extract things from Sketch files |
|
603
|
1
|
50
|
33
|
|
|
15
|
if ($extract{$file}) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
604
|
0
|
|
|
|
|
0
|
($buff, $status) = $zip->contents($member); |
|
605
|
0
|
0
|
|
|
|
0
|
$status and $et->Warn("Error extracting $file"), next; |
|
606
|
0
|
0
|
|
|
|
0
|
if ($file eq 'meta.json') { |
|
607
|
0
|
|
|
|
|
0
|
$et->ExtractInfo(\$buff, { ReEntry => 1 }); |
|
608
|
0
|
0
|
0
|
|
|
0
|
if ($$et{VALUE}{App} and $$et{VALUE}{App} =~ /sketch/i) { |
|
609
|
0
|
|
|
|
|
0
|
$et->OverrideFileType('SKETCH'); |
|
610
|
|
|
|
|
|
|
} |
|
611
|
|
|
|
|
|
|
} else { |
|
612
|
0
|
|
|
|
|
0
|
$et->FoundTag($extract{$file} => $buff); |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
} elsif ($file eq 'Index/Document.iwa' and not $iWorkType) { |
|
615
|
0
|
|
0
|
|
|
0
|
my $type = $iWorkType{$$et{FILE_EXT} || ''}; |
|
616
|
0
|
|
0
|
|
|
0
|
$iWorkType = $type || 'PAGES'; |
|
617
|
|
|
|
|
|
|
} elsif ($iWorkFile{$file}) { |
|
618
|
0
|
|
|
|
|
0
|
$iWorkType = $iWorkFile{$file}; |
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
} |
|
621
|
1
|
50
|
|
|
|
4
|
$et->OverrideFileType($iWorkType) if $iWorkType; |
|
622
|
1
|
|
|
|
|
6
|
last; |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
# all done if we processed this using Archive::Zip |
|
625
|
5
|
50
|
|
|
|
22
|
if ($zip) { |
|
626
|
5
|
|
|
|
|
15
|
delete $$dirInfo{ZIP}; |
|
627
|
5
|
|
|
|
|
20
|
delete $$et{DOC_NUM}; |
|
628
|
5
|
50
|
33
|
|
|
19
|
if ($docNum > 1 and not $et->Options('Duplicates')) { |
|
629
|
0
|
|
|
|
|
0
|
$et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); |
|
630
|
|
|
|
|
|
|
} |
|
631
|
5
|
|
|
|
|
150
|
return 1; |
|
632
|
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
# |
|
634
|
|
|
|
|
|
|
# process the ZIP file by hand (funny, but this seems easier than using Archive::Zip) |
|
635
|
|
|
|
|
|
|
# |
|
636
|
0
|
|
|
|
|
|
$et->VPrint(1, " -- processing as binary data --\n"); |
|
637
|
0
|
|
|
|
|
|
$raf->Seek(30, 0); |
|
638
|
0
|
|
|
|
|
|
$et->SetFileType(); |
|
639
|
0
|
|
|
|
|
|
SetByteOrder('II'); |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
# A. Local file header: |
|
642
|
|
|
|
|
|
|
# local file header signature 0) 4 bytes (0x04034b50) |
|
643
|
|
|
|
|
|
|
# version needed to extract 4) 2 bytes |
|
644
|
|
|
|
|
|
|
# general purpose bit flag 6) 2 bytes |
|
645
|
|
|
|
|
|
|
# compression method 8) 2 bytes |
|
646
|
|
|
|
|
|
|
# last mod file time 10) 2 bytes |
|
647
|
|
|
|
|
|
|
# last mod file date 12) 2 bytes |
|
648
|
|
|
|
|
|
|
# crc-32 14) 4 bytes |
|
649
|
|
|
|
|
|
|
# compressed size 18) 4 bytes |
|
650
|
|
|
|
|
|
|
# uncompressed size 22) 4 bytes |
|
651
|
|
|
|
|
|
|
# file name length 26) 2 bytes |
|
652
|
|
|
|
|
|
|
# extra field length 28) 2 bytes |
|
653
|
0
|
|
|
|
|
|
for (;;) { |
|
654
|
0
|
|
|
|
|
|
my $len = Get16u(\$buff, 26) + Get16u(\$buff, 28); |
|
655
|
0
|
0
|
|
|
|
|
$raf->Read($buf2, $len) == $len or last; |
|
656
|
|
|
|
|
|
|
|
|
657
|
0
|
|
|
|
|
|
$$et{DOC_NUM} = ++$docNum; |
|
658
|
0
|
|
|
|
|
|
$buff .= $buf2; |
|
659
|
0
|
|
|
|
|
|
my %dirInfo = ( |
|
660
|
|
|
|
|
|
|
DataPt => \$buff, |
|
661
|
|
|
|
|
|
|
DataPos => $raf->Tell() - 30 - $len, |
|
662
|
|
|
|
|
|
|
DataLen => 30 + $len, |
|
663
|
|
|
|
|
|
|
DirStart => 0, |
|
664
|
|
|
|
|
|
|
DirLen => 30 + $len, |
|
665
|
|
|
|
|
|
|
MixedTags => 1, # (to ignore FileComment tag) |
|
666
|
|
|
|
|
|
|
); |
|
667
|
0
|
|
|
|
|
|
$et->ProcessDirectory(\%dirInfo, $tagTablePtr); |
|
668
|
0
|
|
|
|
|
|
my $flags = Get16u(\$buff, 6); |
|
669
|
0
|
0
|
|
|
|
|
if ($flags & 0x08) { |
|
670
|
|
|
|
|
|
|
# we don't yet support skipping stream mode data |
|
671
|
|
|
|
|
|
|
# (when this happens, the CRC, compressed size and uncompressed |
|
672
|
|
|
|
|
|
|
# sizes are set to 0 in the header. Instead, they are stored |
|
673
|
|
|
|
|
|
|
# after the compressed data with an optional header of 0x08074b50) |
|
674
|
0
|
|
|
|
|
|
$et->Warn('Stream mode data encountered, file list may be incomplete'); |
|
675
|
0
|
|
|
|
|
|
last; |
|
676
|
|
|
|
|
|
|
} |
|
677
|
0
|
|
|
|
|
|
$len = Get32u(\$buff, 18); # file data length |
|
678
|
0
|
0
|
|
|
|
|
$raf->Seek($len, 1) or last; # skip file data |
|
679
|
0
|
0
|
0
|
|
|
|
$raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last; |
|
680
|
|
|
|
|
|
|
} |
|
681
|
0
|
|
|
|
|
|
delete $$et{DOC_NUM}; |
|
682
|
0
|
0
|
0
|
|
|
|
if ($docNum > 1 and not $et->Options('Duplicates')) { |
|
683
|
0
|
|
|
|
|
|
$et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); |
|
684
|
|
|
|
|
|
|
} |
|
685
|
0
|
|
|
|
|
|
return 1; |
|
686
|
|
|
|
|
|
|
} |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
1; # end |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
__END__ |