line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
2
|
|
|
|
|
|
|
# File: ZIP.pm |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Description: Read ZIP archive meta information |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Revisions: 10/28/2007 - P. Harvey Created |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# References: 1) http://www.pkware.com/documents/casestudies/APPNOTE.TXT |
9
|
|
|
|
|
|
|
# 2) http://www.cpanforum.com/threads/9046 |
10
|
|
|
|
|
|
|
# 3) http://www.gzip.org/zlib/rfc-gzip.html |
11
|
|
|
|
|
|
|
# 4) http://DataCompression.info/ArchiveFormats/RAR202.txt |
12
|
|
|
|
|
|
|
# 5) https://jira.atlassian.com/browse/CONF-21706 |
13
|
|
|
|
|
|
|
# 6) http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf |
14
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
package Image::ExifTool::ZIP; |
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
1
|
|
4027
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
39
|
|
19
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION $warnString); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
55
|
|
20
|
1
|
|
|
1
|
|
7
|
use Image::ExifTool qw(:DataAccess :Utils); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
3129
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
$VERSION = '1.27'; |
23
|
|
|
|
|
|
|
|
24
|
0
|
|
|
0
|
0
|
0
|
sub WarnProc($) { $warnString = $_[0]; } |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# file types for recognized Open Document "mimetype" values |
27
|
|
|
|
|
|
|
my %openDocType = ( |
28
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.database' => 'ODB', #5 |
29
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.chart' => 'ODC', #5 |
30
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.formula' => 'ODF', #5 |
31
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.graphics' => 'ODG', #5 |
32
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.image' => 'ODI', #5 |
33
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.presentation' => 'ODP', |
34
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.spreadsheet' => 'ODS', |
35
|
|
|
|
|
|
|
'application/vnd.oasis.opendocument.text' => 'ODT', |
36
|
|
|
|
|
|
|
'application/vnd.adobe.indesign-idml-package' => 'IDML', #6 (not open doc) |
37
|
|
|
|
|
|
|
'application/epub+zip' => 'EPUB', #PH (not open doc) |
38
|
|
|
|
|
|
|
); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# iWork file types based on names of files found in the zip archive |
41
|
|
|
|
|
|
|
my %iWorkFile = ( |
42
|
|
|
|
|
|
|
'Index/Slide.iwa' => 'KEY', |
43
|
|
|
|
|
|
|
'Index/Tables/DataList.iwa' => 'NUMBERS', |
44
|
|
|
|
|
|
|
); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my %iWorkType = ( |
47
|
|
|
|
|
|
|
NUMBERS => 'NUMBERS', |
48
|
|
|
|
|
|
|
PAGES => 'PAGES', |
49
|
|
|
|
|
|
|
KEY => 'KEY', |
50
|
|
|
|
|
|
|
KTH => 'KTH', |
51
|
|
|
|
|
|
|
NMBTEMPLATE => 'NMBTEMPLATE', |
52
|
|
|
|
|
|
|
); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# ZIP metadata blocks |
55
|
|
|
|
|
|
|
%Image::ExifTool::ZIP::Main = ( |
56
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, |
57
|
|
|
|
|
|
|
GROUPS => { 2 => 'Other' }, |
58
|
|
|
|
|
|
|
FORMAT => 'int16u', |
59
|
|
|
|
|
|
|
NOTES => q{ |
60
|
|
|
|
|
|
|
The following tags are extracted from ZIP archives. ExifTool also extracts |
61
|
|
|
|
|
|
|
additional meta information from compressed documents inside some ZIP-based |
62
|
|
|
|
|
|
|
files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC, |
63
|
|
|
|
|
|
|
ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One |
64
|
|
|
|
|
|
|
Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML), |
65
|
|
|
|
|
|
|
Electronic Publication (EPUB), and Sketch design files (SKETCH). The |
66
|
|
|
|
|
|
|
ExifTool family 3 groups may be used to organize ZIP tags by embedded |
67
|
|
|
|
|
|
|
document number (ie. the exiftool C<-g3> option). |
68
|
|
|
|
|
|
|
}, |
69
|
|
|
|
|
|
|
2 => 'ZipRequiredVersion', |
70
|
|
|
|
|
|
|
3 => { |
71
|
|
|
|
|
|
|
Name => 'ZipBitFlag', |
72
|
|
|
|
|
|
|
PrintConv => '$val ? sprintf("0x%.4x",$val) : $val', |
73
|
|
|
|
|
|
|
}, |
74
|
|
|
|
|
|
|
4 => { |
75
|
|
|
|
|
|
|
Name => 'ZipCompression', |
76
|
|
|
|
|
|
|
PrintConv => { |
77
|
|
|
|
|
|
|
0 => 'None', |
78
|
|
|
|
|
|
|
1 => 'Shrunk', |
79
|
|
|
|
|
|
|
2 => 'Reduced with compression factor 1', |
80
|
|
|
|
|
|
|
3 => 'Reduced with compression factor 2', |
81
|
|
|
|
|
|
|
4 => 'Reduced with compression factor 3', |
82
|
|
|
|
|
|
|
5 => 'Reduced with compression factor 4', |
83
|
|
|
|
|
|
|
6 => 'Imploded', |
84
|
|
|
|
|
|
|
7 => 'Tokenized', |
85
|
|
|
|
|
|
|
8 => 'Deflated', |
86
|
|
|
|
|
|
|
9 => 'Enhanced Deflate using Deflate64(tm)', |
87
|
|
|
|
|
|
|
10 => 'Imploded (old IBM TERSE)', |
88
|
|
|
|
|
|
|
12 => 'BZIP2', |
89
|
|
|
|
|
|
|
14 => 'LZMA (EFS)', |
90
|
|
|
|
|
|
|
18 => 'IBM TERSE (new)', |
91
|
|
|
|
|
|
|
19 => 'IBM LZ77 z Architecture (PFS)', |
92
|
|
|
|
|
|
|
96 => 'JPEG recompressed', #2 |
93
|
|
|
|
|
|
|
97 => 'WavPack compressed', #2 |
94
|
|
|
|
|
|
|
98 => 'PPMd version I, Rev 1', |
95
|
|
|
|
|
|
|
}, |
96
|
|
|
|
|
|
|
}, |
97
|
|
|
|
|
|
|
5 => { |
98
|
|
|
|
|
|
|
Name => 'ZipModifyDate', |
99
|
|
|
|
|
|
|
Format => 'int32u', |
100
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
101
|
|
|
|
|
|
|
ValueConv => sub { |
102
|
|
|
|
|
|
|
my $val = shift; |
103
|
|
|
|
|
|
|
return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d', |
104
|
|
|
|
|
|
|
($val >> 25) + 1980, # year |
105
|
|
|
|
|
|
|
($val >> 21) & 0x0f, # month |
106
|
|
|
|
|
|
|
($val >> 16) & 0x1f, # day |
107
|
|
|
|
|
|
|
($val >> 11) & 0x1f, # hour |
108
|
|
|
|
|
|
|
($val >> 5) & 0x3f, # minute |
109
|
|
|
|
|
|
|
($val & 0x1f) * 2 # second |
110
|
|
|
|
|
|
|
); |
111
|
|
|
|
|
|
|
}, |
112
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
113
|
|
|
|
|
|
|
}, |
114
|
|
|
|
|
|
|
7 => { Name => 'ZipCRC', Format => 'int32u', PrintConv => 'sprintf("0x%.8x",$val)' }, |
115
|
|
|
|
|
|
|
9 => { Name => 'ZipCompressedSize', Format => 'int32u' }, |
116
|
|
|
|
|
|
|
11 => { Name => 'ZipUncompressedSize', Format => 'int32u' }, |
117
|
|
|
|
|
|
|
13 => { |
118
|
|
|
|
|
|
|
Name => 'ZipFileNameLength', |
119
|
|
|
|
|
|
|
# don't store a tag -- just extract the value for use with ZipFileName |
120
|
|
|
|
|
|
|
Hidden => 1, |
121
|
|
|
|
|
|
|
RawConv => '$$self{ZipFileNameLength} = $val; undef', |
122
|
|
|
|
|
|
|
}, |
123
|
|
|
|
|
|
|
# 14 => 'ZipExtraFieldLength', |
124
|
|
|
|
|
|
|
15 => { |
125
|
|
|
|
|
|
|
Name => 'ZipFileName', |
126
|
|
|
|
|
|
|
Format => 'string[$$self{ZipFileNameLength}]', |
127
|
|
|
|
|
|
|
}, |
128
|
|
|
|
|
|
|
_com => 'ZipFileComment', |
129
|
|
|
|
|
|
|
); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# GNU ZIP tags (ref 3) |
132
|
|
|
|
|
|
|
%Image::ExifTool::ZIP::GZIP = ( |
133
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, |
134
|
|
|
|
|
|
|
GROUPS => { 2 => 'Other' }, |
135
|
|
|
|
|
|
|
NOTES => q{ |
136
|
|
|
|
|
|
|
These tags are extracted from GZIP (GNU ZIP) archives, but currently only |
137
|
|
|
|
|
|
|
for the first file in the archive. |
138
|
|
|
|
|
|
|
}, |
139
|
|
|
|
|
|
|
2 => { |
140
|
|
|
|
|
|
|
Name => 'Compression', |
141
|
|
|
|
|
|
|
PrintConv => { |
142
|
|
|
|
|
|
|
8 => 'Deflated', |
143
|
|
|
|
|
|
|
}, |
144
|
|
|
|
|
|
|
}, |
145
|
|
|
|
|
|
|
3 => { |
146
|
|
|
|
|
|
|
Name => 'Flags', |
147
|
|
|
|
|
|
|
PrintConv => { BITMASK => { |
148
|
|
|
|
|
|
|
0 => 'Text', |
149
|
|
|
|
|
|
|
1 => 'CRC16', |
150
|
|
|
|
|
|
|
2 => 'ExtraFields', |
151
|
|
|
|
|
|
|
3 => 'FileName', |
152
|
|
|
|
|
|
|
4 => 'Comment', |
153
|
|
|
|
|
|
|
}}, |
154
|
|
|
|
|
|
|
}, |
155
|
|
|
|
|
|
|
4 => { |
156
|
|
|
|
|
|
|
Name => 'ModifyDate', |
157
|
|
|
|
|
|
|
Format => 'int32u', |
158
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
159
|
|
|
|
|
|
|
ValueConv => 'ConvertUnixTime($val,1)', |
160
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
161
|
|
|
|
|
|
|
}, |
162
|
|
|
|
|
|
|
8 => { |
163
|
|
|
|
|
|
|
Name => 'ExtraFlags', |
164
|
|
|
|
|
|
|
PrintConv => { |
165
|
|
|
|
|
|
|
0 => '(none)', |
166
|
|
|
|
|
|
|
2 => 'Maximum Compression', |
167
|
|
|
|
|
|
|
4 => 'Fastest Algorithm', |
168
|
|
|
|
|
|
|
}, |
169
|
|
|
|
|
|
|
}, |
170
|
|
|
|
|
|
|
9 => { |
171
|
|
|
|
|
|
|
Name => 'OperatingSystem', |
172
|
|
|
|
|
|
|
PrintConv => { |
173
|
|
|
|
|
|
|
0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)', |
174
|
|
|
|
|
|
|
1 => 'Amiga', |
175
|
|
|
|
|
|
|
2 => 'VMS (or OpenVMS)', |
176
|
|
|
|
|
|
|
3 => 'Unix', |
177
|
|
|
|
|
|
|
4 => 'VM/CMS', |
178
|
|
|
|
|
|
|
5 => 'Atari TOS', |
179
|
|
|
|
|
|
|
6 => 'HPFS filesystem (OS/2, NT)', |
180
|
|
|
|
|
|
|
7 => 'Macintosh', |
181
|
|
|
|
|
|
|
8 => 'Z-System', |
182
|
|
|
|
|
|
|
9 => 'CP/M', |
183
|
|
|
|
|
|
|
10 => 'TOPS-20', |
184
|
|
|
|
|
|
|
11 => 'NTFS filesystem (NT)', |
185
|
|
|
|
|
|
|
12 => 'QDOS', |
186
|
|
|
|
|
|
|
13 => 'Acorn RISCOS', |
187
|
|
|
|
|
|
|
255 => 'unknown', |
188
|
|
|
|
|
|
|
}, |
189
|
|
|
|
|
|
|
}, |
190
|
|
|
|
|
|
|
10 => 'ArchivedFileName', |
191
|
|
|
|
|
|
|
11 => 'Comment', |
192
|
|
|
|
|
|
|
); |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# RAR tags (ref 4) |
195
|
|
|
|
|
|
|
%Image::ExifTool::ZIP::RAR = ( |
196
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData, |
197
|
|
|
|
|
|
|
GROUPS => { 2 => 'Other' }, |
198
|
|
|
|
|
|
|
NOTES => 'These tags are extracted from RAR archive files.', |
199
|
|
|
|
|
|
|
0 => { |
200
|
|
|
|
|
|
|
Name => 'CompressedSize', |
201
|
|
|
|
|
|
|
Format => 'int32u', |
202
|
|
|
|
|
|
|
}, |
203
|
|
|
|
|
|
|
4 => { |
204
|
|
|
|
|
|
|
Name => 'UncompressedSize', |
205
|
|
|
|
|
|
|
Format => 'int32u', |
206
|
|
|
|
|
|
|
}, |
207
|
|
|
|
|
|
|
8 => { |
208
|
|
|
|
|
|
|
Name => 'OperatingSystem', |
209
|
|
|
|
|
|
|
PrintConv => { |
210
|
|
|
|
|
|
|
0 => 'MS-DOS', |
211
|
|
|
|
|
|
|
1 => 'OS/2', |
212
|
|
|
|
|
|
|
2 => 'Win32', |
213
|
|
|
|
|
|
|
3 => 'Unix', |
214
|
|
|
|
|
|
|
}, |
215
|
|
|
|
|
|
|
}, |
216
|
|
|
|
|
|
|
13 => { |
217
|
|
|
|
|
|
|
Name => 'ModifyDate', |
218
|
|
|
|
|
|
|
Format => 'int32u', |
219
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
220
|
|
|
|
|
|
|
ValueConv => sub { |
221
|
|
|
|
|
|
|
my $val = shift; |
222
|
|
|
|
|
|
|
return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d', |
223
|
|
|
|
|
|
|
($val >> 25) + 1980, # year |
224
|
|
|
|
|
|
|
($val >> 21) & 0x0f, # month |
225
|
|
|
|
|
|
|
($val >> 16) & 0x1f, # day |
226
|
|
|
|
|
|
|
($val >> 11) & 0x1f, # hour |
227
|
|
|
|
|
|
|
($val >> 5) & 0x3f, # minute |
228
|
|
|
|
|
|
|
($val & 0x1f) * 2 # second |
229
|
|
|
|
|
|
|
); |
230
|
|
|
|
|
|
|
}, |
231
|
|
|
|
|
|
|
PrintConv => '$self->ConvertDateTime($val)', |
232
|
|
|
|
|
|
|
}, |
233
|
|
|
|
|
|
|
18 => { |
234
|
|
|
|
|
|
|
Name => 'PackingMethod', |
235
|
|
|
|
|
|
|
PrintHex => 1, |
236
|
|
|
|
|
|
|
PrintConv => { |
237
|
|
|
|
|
|
|
0x30 => 'Stored', |
238
|
|
|
|
|
|
|
0x31 => 'Fastest', |
239
|
|
|
|
|
|
|
0x32 => 'Fast', |
240
|
|
|
|
|
|
|
0x33 => 'Normal', |
241
|
|
|
|
|
|
|
0x34 => 'Good Compression', |
242
|
|
|
|
|
|
|
0x35 => 'Best Compression', |
243
|
|
|
|
|
|
|
}, |
244
|
|
|
|
|
|
|
}, |
245
|
|
|
|
|
|
|
19 => { |
246
|
|
|
|
|
|
|
Name => 'FileNameLength', |
247
|
|
|
|
|
|
|
Format => 'int16u', |
248
|
|
|
|
|
|
|
Hidden => 1, |
249
|
|
|
|
|
|
|
RawConv => '$$self{FileNameLength} = $val; undef', |
250
|
|
|
|
|
|
|
}, |
251
|
|
|
|
|
|
|
25 => { |
252
|
|
|
|
|
|
|
Name => 'ArchivedFileName', |
253
|
|
|
|
|
|
|
Format => 'string[$$self{FileNameLength}]', |
254
|
|
|
|
|
|
|
}, |
255
|
|
|
|
|
|
|
); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
258
|
|
|
|
|
|
|
# Extract information from a RAR file (ref 4) |
259
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
260
|
|
|
|
|
|
|
# Returns: 1 on success, 0 if this wasn't a valid RAR file |
261
|
|
|
|
|
|
|
sub ProcessRAR($$) |
262
|
|
|
|
|
|
|
{ |
263
|
0
|
|
|
0
|
0
|
0
|
my ($et, $dirInfo) = @_; |
264
|
0
|
|
|
|
|
0
|
my $raf = $$dirInfo{RAF}; |
265
|
0
|
|
|
|
|
0
|
my ($flags, $buff); |
266
|
|
|
|
|
|
|
|
267
|
0
|
0
|
0
|
|
|
0
|
return 0 unless $raf->Read($buff, 7) and $buff eq "Rar!\x1a\x07\0"; |
268
|
|
|
|
|
|
|
|
269
|
0
|
|
|
|
|
0
|
$et->SetFileType(); |
270
|
0
|
|
|
|
|
0
|
SetByteOrder('II'); |
271
|
0
|
|
|
|
|
0
|
my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR'); |
272
|
0
|
|
|
|
|
0
|
my $docNum = 0; |
273
|
|
|
|
|
|
|
|
274
|
0
|
|
|
|
|
0
|
for (;;) { |
275
|
|
|
|
|
|
|
# read block header |
276
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, 7) == 7 or last; |
277
|
0
|
|
|
|
|
0
|
my ($type, $flags, $size) = unpack('xxCvv', $buff); |
278
|
0
|
|
|
|
|
0
|
$size -= 7; |
279
|
0
|
0
|
|
|
|
0
|
if ($flags & 0x8000) { |
280
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, 4) == 4 or last; |
281
|
0
|
|
|
|
|
0
|
$size += unpack('V',$buff) - 4; |
282
|
|
|
|
|
|
|
} |
283
|
0
|
0
|
|
|
|
0
|
last if $size < 0; |
284
|
0
|
0
|
|
|
|
0
|
next unless $size; # ignore blocks with no data |
285
|
|
|
|
|
|
|
# don't try to read very large blocks unless LargeFileSupport is enabled |
286
|
0
|
0
|
0
|
|
|
0
|
if ($size >= 0x80000000 and not $et->Options('LargeFileSupport')) { |
287
|
0
|
|
|
|
|
0
|
$et->Warn('Large block encountered. Aborting.'); |
288
|
0
|
|
|
|
|
0
|
last; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
# process the block |
291
|
0
|
0
|
0
|
|
|
0
|
if ($type == 0x74) { # file block |
|
|
0
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# read maximum 4 KB from a file block |
293
|
0
|
0
|
|
|
|
0
|
my $n = $size > 4096 ? 4096 : $size; |
294
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, $n) == $n or last; |
295
|
|
|
|
|
|
|
# add compressed size to start of data so we can extract it with the other tags |
296
|
0
|
|
|
|
|
0
|
$buff = pack('V',$size) . $buff; |
297
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = ++$docNum; |
298
|
0
|
|
|
|
|
0
|
$et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr); |
299
|
0
|
|
|
|
|
0
|
$size -= $n; |
300
|
|
|
|
|
|
|
} elsif ($type == 0x75 and $size > 6) { # comment block |
301
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, $size) == $size or last; |
302
|
|
|
|
|
|
|
# save comment, only if "Stored" (this is untested) |
303
|
0
|
0
|
|
|
|
0
|
if (Get8u(\$buff, 3) == 0x30) { |
304
|
0
|
|
|
|
|
0
|
$et->FoundTag('Comment', substr($buff, 6)); |
305
|
|
|
|
|
|
|
} |
306
|
0
|
|
|
|
|
0
|
next; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
# seek to the start of the next block |
309
|
0
|
0
|
0
|
|
|
0
|
$raf->Seek($size, 1) or last if $size; |
310
|
|
|
|
|
|
|
} |
311
|
0
|
|
|
|
|
0
|
$$et{DOC_NUM} = 0; |
312
|
0
|
0
|
0
|
|
|
0
|
if ($docNum > 1 and not $et->Options('Duplicates')) { |
313
|
0
|
|
|
|
|
0
|
$et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
0
|
|
|
|
|
0
|
return 1; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
320
|
|
|
|
|
|
|
# Extract information from a GNU ZIP file (ref 3) |
321
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
322
|
|
|
|
|
|
|
# Returns: 1 on success, 0 if this wasn't a valid GZIP file |
323
|
|
|
|
|
|
|
sub ProcessGZIP($$) |
324
|
|
|
|
|
|
|
{ |
325
|
1
|
|
|
1
|
0
|
5
|
my ($et, $dirInfo) = @_; |
326
|
1
|
|
|
|
|
3
|
my $raf = $$dirInfo{RAF}; |
327
|
1
|
|
|
|
|
2
|
my ($flags, $buff); |
328
|
|
|
|
|
|
|
|
329
|
1
|
50
|
33
|
|
|
4
|
return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/; |
330
|
|
|
|
|
|
|
|
331
|
1
|
|
|
|
|
4
|
$et->SetFileType(); |
332
|
1
|
|
|
|
|
5
|
SetByteOrder('II'); |
333
|
|
|
|
|
|
|
|
334
|
1
|
|
|
|
|
3
|
my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP'); |
335
|
1
|
|
|
|
|
5
|
$et->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2)); |
336
|
1
|
|
|
|
|
4
|
$et->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3)); |
337
|
1
|
|
|
|
|
6
|
$et->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4)); |
338
|
1
|
|
|
|
|
16
|
$et->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8)); |
339
|
1
|
|
|
|
|
11
|
$et->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9)); |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# extract file name and comment if they exist |
342
|
1
|
50
|
|
|
|
4
|
if ($flags & 0x18) { |
343
|
1
|
50
|
|
|
|
3
|
if ($flags & 0x04) { |
344
|
|
|
|
|
|
|
# skip extra field |
345
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, 2) == 2 or return 1; |
346
|
0
|
|
|
|
|
0
|
my $len = Get16u(\$buff, 0); |
347
|
0
|
0
|
|
|
|
0
|
$raf->Read($buff, $len) == $len or return 1; |
348
|
|
|
|
|
|
|
} |
349
|
1
|
50
|
|
|
|
3
|
$raf->Read($buff, 4096) or return 1; |
350
|
1
|
|
|
|
|
3
|
my $pos = 0; |
351
|
1
|
|
|
|
|
1
|
my $tagID; |
352
|
|
|
|
|
|
|
# loop for ArchivedFileName (10) and Comment (11) tags |
353
|
1
|
|
|
|
|
3
|
foreach $tagID (10, 11) { |
354
|
2
|
100
|
|
|
|
5
|
my $mask = $tagID == 10 ? 0x08 : 0x10; |
355
|
2
|
50
|
|
|
|
5
|
next unless $flags & $mask; |
356
|
2
|
50
|
|
|
|
14
|
my $end = $buff =~ /\0/g ? pos($buff) - 1 : length($buff); |
357
|
|
|
|
|
|
|
# (the doc specifies the string should be ISO 8859-1, |
358
|
|
|
|
|
|
|
# but in OS X it seems to be UTF-8, so don't translate |
359
|
|
|
|
|
|
|
# it because I could just as easily screw it up) |
360
|
2
|
|
|
|
|
6
|
my $str = substr($buff, $pos, $end - $pos); |
361
|
2
|
|
|
|
|
7
|
$et->HandleTag($tagTablePtr, $tagID, $str); |
362
|
2
|
50
|
|
|
|
5
|
last if $end >= length $buff; |
363
|
2
|
|
|
|
|
5
|
$pos = $end + 1; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
} |
366
|
1
|
|
|
|
|
2
|
return 1; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
370
|
|
|
|
|
|
|
# Call HandleTags for attributes of an Archive::Zip member |
371
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object ref, 1) member ref, 2) optional tag table ref |
372
|
|
|
|
|
|
|
sub HandleMember($$;$) |
373
|
|
|
|
|
|
|
{ |
374
|
28
|
|
|
28
|
0
|
49
|
my ($et, $member, $tagTablePtr) = @_; |
375
|
28
|
100
|
|
|
|
66
|
$tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main'); |
376
|
28
|
|
|
|
|
70
|
$et->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract()); |
377
|
28
|
|
|
|
|
73
|
$et->HandleTag($tagTablePtr, 3, $member->bitFlag()); |
378
|
28
|
|
|
|
|
71
|
$et->HandleTag($tagTablePtr, 4, $member->compressionMethod()); |
379
|
28
|
|
|
|
|
84
|
$et->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime()); |
380
|
28
|
|
|
|
|
74
|
$et->HandleTag($tagTablePtr, 7, $member->crc32()); |
381
|
28
|
|
|
|
|
73
|
$et->HandleTag($tagTablePtr, 9, $member->compressedSize()); |
382
|
28
|
|
|
|
|
71
|
$et->HandleTag($tagTablePtr, 11, $member->uncompressedSize()); |
383
|
28
|
|
|
|
|
67
|
$et->HandleTag($tagTablePtr, 15, $member->fileName()); |
384
|
28
|
|
|
|
|
77
|
my $com = $member->fileComment(); |
385
|
28
|
50
|
33
|
|
|
220
|
$et->HandleTag($tagTablePtr, '_com', $com) if defined $com and length $com; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
389
|
|
|
|
|
|
|
# Extract information from a ZIP file |
390
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
391
|
|
|
|
|
|
|
# Returns: 1 on success, 0 if this wasn't a valid ZIP file |
392
|
|
|
|
|
|
|
sub ProcessZIP($$) |
393
|
|
|
|
|
|
|
{ |
394
|
5
|
|
|
5
|
0
|
15
|
my ($et, $dirInfo) = @_; |
395
|
5
|
|
|
|
|
11
|
my $raf = $$dirInfo{RAF}; |
396
|
5
|
|
|
|
|
10
|
my ($buff, $buf2, $zip); |
397
|
|
|
|
|
|
|
|
398
|
5
|
50
|
33
|
|
|
13
|
return 0 unless $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/; |
399
|
|
|
|
|
|
|
|
400
|
5
|
|
|
|
|
14
|
my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main'); |
401
|
5
|
|
|
|
|
10
|
my $docNum = 0; |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# use Archive::Zip if available |
404
|
5
|
|
|
|
|
6
|
for (;;) { |
405
|
5
|
50
|
33
|
|
|
9
|
unless (eval { require Archive::Zip } and eval { require IO::File }) { |
|
5
|
|
|
|
|
683
|
|
|
5
|
|
|
|
|
66773
|
|
406
|
0
|
0
|
0
|
|
|
0
|
if ($$et{FILE_EXT} and $$et{FILE_EXT} ne 'ZIP') { |
407
|
0
|
|
|
|
|
0
|
$et->Warn("Install Archive::Zip to decode compressed ZIP information"); |
408
|
|
|
|
|
|
|
} |
409
|
0
|
|
|
|
|
0
|
last; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
# Archive::Zip requires a seekable IO::File object |
412
|
5
|
|
|
|
|
9
|
my $fh; |
413
|
5
|
50
|
|
|
|
15
|
if ($raf->{TESTED} >= 0) { |
|
|
0
|
|
|
|
|
|
414
|
5
|
50
|
|
|
|
9
|
unless (eval { require IO::File }) { |
|
5
|
|
|
|
|
19
|
|
415
|
|
|
|
|
|
|
# (this shouldn't happen because IO::File is a prerequisite of Archive::Zip) |
416
|
0
|
|
|
|
|
0
|
$et->Warn("Install IO::File to decode compressed ZIP information"); |
417
|
0
|
|
|
|
|
0
|
last; |
418
|
|
|
|
|
|
|
} |
419
|
5
|
|
|
|
|
22
|
$raf->Seek(0,0); |
420
|
5
|
|
|
|
|
11
|
$fh = $raf->{FILE_PT}; |
421
|
5
|
|
|
|
|
19
|
bless $fh, 'IO::File'; # Archive::Zip expects an IO::File object |
422
|
0
|
|
|
|
|
0
|
} elsif (eval { require IO::String }) { |
423
|
|
|
|
|
|
|
# read the whole file into memory (what else can I do?) |
424
|
0
|
|
|
|
|
0
|
$raf->Slurp(); |
425
|
0
|
|
|
|
|
0
|
$fh = new IO::String ${$raf->{BUFF_PT}}; |
|
0
|
|
|
|
|
0
|
|
426
|
|
|
|
|
|
|
} else { |
427
|
0
|
0
|
|
|
|
0
|
my $type = $raf->{FILE_PT} ? 'pipe or socket' : 'scalar reference'; |
428
|
0
|
|
|
|
|
0
|
$et->Warn("Install IO::String to decode compressed ZIP information from a $type"); |
429
|
0
|
|
|
|
|
0
|
last; |
430
|
|
|
|
|
|
|
} |
431
|
5
|
|
|
|
|
23
|
$et->VPrint(1, " --- using Archive::Zip ---\n"); |
432
|
5
|
|
|
|
|
22
|
$zip = new Archive::Zip; |
433
|
|
|
|
|
|
|
# catch all warnings! (Archive::Zip is bad for this) |
434
|
5
|
|
|
|
|
162
|
local $SIG{'__WARN__'} = \&WarnProc; |
435
|
5
|
|
|
|
|
19
|
my $status = $zip->readFromFileHandle($fh); |
436
|
5
|
0
|
33
|
|
|
14310
|
if ($status eq '4' and $raf->{TESTED} >= 0 and eval { require IO::String } and |
|
0
|
|
33
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
437
|
|
|
|
|
|
|
$raf->Seek(0,2) and $raf->Tell() < 100000000) |
438
|
|
|
|
|
|
|
{ |
439
|
|
|
|
|
|
|
# try again, reading it ourself this time in an attempt to avoid |
440
|
|
|
|
|
|
|
# a failed test with Perl 5.6.2 GNU/Linux 2.6.32-5-686 i686-linux-64int-ld |
441
|
0
|
|
|
|
|
0
|
$raf->Seek(0,0); |
442
|
0
|
|
|
|
|
0
|
$raf->Slurp(); |
443
|
0
|
|
|
|
|
0
|
$fh = new IO::String ${$raf->{BUFF_PT}}; |
|
0
|
|
|
|
|
0
|
|
444
|
0
|
|
|
|
|
0
|
$zip = new Archive::Zip; |
445
|
0
|
|
|
|
|
0
|
$status = $zip->readFromFileHandle($fh); |
446
|
|
|
|
|
|
|
} |
447
|
5
|
50
|
|
|
|
14
|
if ($status) { |
448
|
0
|
|
|
|
|
0
|
undef $zip; |
449
|
0
|
|
|
|
|
0
|
my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' ); |
450
|
0
|
|
0
|
|
|
0
|
my $err = $err{$status} || "Error $status"; |
451
|
0
|
|
|
|
|
0
|
$et->Warn("$err reading ZIP file"); |
452
|
0
|
|
|
|
|
0
|
last; |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
# extract zip file comment |
455
|
5
|
|
|
|
|
16
|
my $comment = $zip->zipfileComment(); |
456
|
5
|
50
|
33
|
|
|
54
|
$et->FoundTag(Comment => $comment) if defined $comment and length $comment; |
457
|
|
|
|
|
|
|
|
458
|
5
|
|
|
|
|
13
|
$$dirInfo{ZIP} = $zip; |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# check for an Office Open file (DOCX, etc) |
461
|
|
|
|
|
|
|
# --> read '[Content_Types].xml' to determine the file type |
462
|
5
|
|
|
|
|
8
|
my ($mime, @members); |
463
|
5
|
|
|
|
|
17
|
my $cType = $zip->memberNamed('[Content_Types].xml'); |
464
|
5
|
100
|
|
|
|
244
|
if ($cType) { |
465
|
1
|
|
|
|
|
4
|
($buff, $status) = $zip->contents($cType); |
466
|
1
|
0
|
33
|
|
|
1154
|
if (not $status and ( |
|
|
|
33
|
|
|
|
|
467
|
|
|
|
|
|
|
# first look for the main document with the expected name |
468
|
|
|
|
|
|
|
$buff =~ m{\sPartName\s*=\s*['"](?:/ppt/presentation.xml|/word/document.xml|/xl/workbook.xml)['"][^>]*\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1} or |
469
|
|
|
|
|
|
|
# then look for the main part |
470
|
|
|
|
|
|
|
$buff =~ /]*\sPartName[^<]+\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/ or |
471
|
|
|
|
|
|
|
# and if all else fails, use the default main |
472
|
|
|
|
|
|
|
$buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/)) |
473
|
|
|
|
|
|
|
{ |
474
|
1
|
|
|
|
|
4
|
$mime = $2; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
# check for docProps if we couldn't find a MIME type |
478
|
5
|
100
|
|
|
|
24
|
$mime or @members = $zip->membersMatching('^docProps/.*\.(xml|XML)$'); |
479
|
5
|
100
|
66
|
|
|
328
|
if ($mime or @members) { |
480
|
1
|
|
|
|
|
3
|
$$dirInfo{MIME} = $mime; |
481
|
1
|
|
|
|
|
459
|
require Image::ExifTool::OOXML; |
482
|
1
|
|
|
|
|
7
|
Image::ExifTool::OOXML::ProcessDOCX($et, $dirInfo); |
483
|
1
|
|
|
|
|
3
|
delete $$dirInfo{MIME}; |
484
|
1
|
|
|
|
|
5
|
last; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# check for an EIP file |
488
|
4
|
|
|
|
|
14
|
@members = $zip->membersMatching('^CaptureOne/.*\.(cos|COS)$'); |
489
|
4
|
100
|
|
|
|
266
|
if (@members) { |
490
|
1
|
|
|
|
|
458
|
require Image::ExifTool::CaptureOne; |
491
|
1
|
|
|
|
|
6
|
Image::ExifTool::CaptureOne::ProcessEIP($et, $dirInfo); |
492
|
1
|
|
|
|
|
6
|
last; |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
# check for an iWork file |
496
|
3
|
|
|
|
|
9
|
@members = $zip->membersMatching('(?i)^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+\.(pages|numbers|key)/Index.(zip|xml|apxl))$'); |
497
|
3
|
100
|
|
|
|
406
|
if (@members) { |
498
|
1
|
|
|
|
|
463
|
require Image::ExifTool::iWork; |
499
|
1
|
|
|
|
|
5
|
Image::ExifTool::iWork::Process_iWork($et, $dirInfo); |
500
|
1
|
|
|
|
|
4
|
last; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# check for an Open Document, IDML or EPUB file |
504
|
2
|
|
|
|
|
7
|
my $mType = $zip->memberNamed('mimetype'); |
505
|
2
|
100
|
|
|
|
38
|
if ($mType) { |
506
|
1
|
|
|
|
|
5
|
($mime, $status) = $zip->contents($mType); |
507
|
1
|
50
|
33
|
|
|
571
|
if (not $status and $mime =~ /([\x21-\xfe]+)/s) { |
508
|
|
|
|
|
|
|
# clean up MIME type just in case (note that MIME is case insensitive) |
509
|
1
|
|
|
|
|
4
|
$mime = lc $1; |
510
|
1
|
|
50
|
|
|
8
|
$et->SetFileType($openDocType{$mime} || 'ZIP', $mime); |
511
|
1
|
50
|
|
|
|
4
|
$et->Warn("Unrecognized MIMEType $mime") unless $openDocType{$mime}; |
512
|
|
|
|
|
|
|
# extract Open Document metadata from "meta.xml" |
513
|
1
|
|
|
|
|
4
|
my $meta = $zip->memberNamed('meta.xml'); |
514
|
|
|
|
|
|
|
# IDML files have metadata in a different place (ref 6) |
515
|
1
|
50
|
|
|
|
132
|
$meta or $meta = $zip->memberNamed('META-INF/metadata.xml'); |
516
|
1
|
50
|
|
|
|
5
|
if ($meta) { |
517
|
1
|
|
|
|
|
4
|
($buff, $status) = $zip->contents($meta); |
518
|
1
|
50
|
|
|
|
523
|
unless ($status) { |
519
|
1
|
|
|
|
|
7
|
my %dirInfo = ( |
520
|
|
|
|
|
|
|
DirName => 'XML', |
521
|
|
|
|
|
|
|
DataPt => \$buff, |
522
|
|
|
|
|
|
|
DirLen => length $buff, |
523
|
|
|
|
|
|
|
DataLen => length $buff, |
524
|
|
|
|
|
|
|
); |
525
|
|
|
|
|
|
|
# (avoid structure warnings when copying from XML) |
526
|
1
|
|
|
|
|
4
|
my $oldWarn = $$et{NO_STRUCT_WARN}; |
527
|
1
|
|
|
|
|
2
|
$$et{NO_STRUCT_WARN} = 1; |
528
|
1
|
|
|
|
|
4
|
$et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::Main')); |
529
|
1
|
|
|
|
|
4
|
$$et{NO_STRUCT_WARN} = $oldWarn; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
# process rootfile of EPUB container if applicable |
533
|
1
|
|
|
|
|
2
|
for (;;) { |
534
|
1
|
50
|
33
|
|
|
7
|
last if $meta and $mime ne 'application/epub+zip'; |
535
|
0
|
|
|
|
|
0
|
my $container = $zip->memberNamed('META-INF/container.xml'); |
536
|
0
|
|
|
|
|
0
|
($buff, $status) = $zip->contents($container); |
537
|
0
|
0
|
|
|
|
0
|
last if $status; |
538
|
0
|
0
|
|
|
|
0
|
$buff =~ /]*?\bfull-path=(['"])(.*?)\1/s or last; |
539
|
|
|
|
|
|
|
# load the rootfile data (OPF extension; contains XML metadata) |
540
|
0
|
0
|
|
|
|
0
|
my $meta2 = $zip->memberNamed($2) or last; |
541
|
0
|
|
|
|
|
0
|
$meta = $meta2; |
542
|
0
|
|
|
|
|
0
|
($buff, $status) = $zip->contents($meta); |
543
|
0
|
0
|
|
|
|
0
|
last if $status; |
544
|
|
|
|
|
|
|
# use opf:event to generate more meaningful tag names for dc:date |
545
|
0
|
|
|
|
|
0
|
while ($buff =~ s{([^<]+)}{$2}s) { |
546
|
0
|
|
|
|
|
0
|
my $dcTable = GetTagTable('Image::ExifTool::XMP::dc'); |
547
|
0
|
|
|
|
|
0
|
my $tag = "${1}Date"; |
548
|
|
|
|
|
|
|
AddTagToTable($dcTable, $tag, { |
549
|
|
|
|
|
|
|
Name => ucfirst $tag, |
550
|
|
|
|
|
|
|
Groups => { 2 => 'Time' }, |
551
|
|
|
|
|
|
|
List => 'Seq', |
552
|
|
|
|
|
|
|
%Image::ExifTool::XMP::dateTimeInfo |
553
|
0
|
0
|
|
|
|
0
|
}) unless $$dcTable{$tag}; |
554
|
|
|
|
|
|
|
} |
555
|
0
|
|
|
|
|
0
|
my %dirInfo = ( |
556
|
|
|
|
|
|
|
DataPt => \$buff, |
557
|
|
|
|
|
|
|
DirLen => length $buff, |
558
|
|
|
|
|
|
|
DataLen => length $buff, |
559
|
|
|
|
|
|
|
IgnoreProp => { 'package' => 1, metadata => 1 }, |
560
|
|
|
|
|
|
|
); |
561
|
|
|
|
|
|
|
# (avoid structure warnings when copying from XML) |
562
|
0
|
|
|
|
|
0
|
my $oldWarn = $$et{NO_STRUCT_WARN}; |
563
|
0
|
|
|
|
|
0
|
$$et{NO_STRUCT_WARN} = 1; |
564
|
0
|
|
|
|
|
0
|
$et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::XML')); |
565
|
0
|
|
|
|
|
0
|
$$et{NO_STRUCT_WARN} = $oldWarn; |
566
|
0
|
|
|
|
|
0
|
last; |
567
|
|
|
|
|
|
|
} |
568
|
1
|
50
|
33
|
|
|
5
|
if ($openDocType{$mime} or $meta) { |
569
|
|
|
|
|
|
|
# extract preview image(s) from "Thumbnails" directory if they exist |
570
|
1
|
|
|
|
|
2
|
my $type; |
571
|
1
|
|
|
|
|
4
|
my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' ); |
572
|
1
|
|
|
|
|
3
|
foreach $type ('jpg', 'png') { |
573
|
2
|
|
|
|
|
8
|
my $thumb = $zip->memberNamed("Thumbnails/thumbnail.$type"); |
574
|
2
|
100
|
|
|
|
219
|
next unless $thumb; |
575
|
1
|
|
|
|
|
4
|
($buff, $status) = $zip->contents($thumb); |
576
|
1
|
50
|
|
|
|
1064
|
$et->FoundTag($tag{$type}, $buff) unless $status; |
577
|
|
|
|
|
|
|
} |
578
|
1
|
|
|
|
|
6
|
last; # all done since we recognized the MIME type or found metadata |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
# continue on to list ZIP contents... |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
# otherwise just extract general ZIP information |
585
|
1
|
|
|
|
|
7
|
$et->SetFileType(); |
586
|
1
|
|
|
|
|
4
|
@members = $zip->members(); |
587
|
1
|
|
|
|
|
6
|
my ($member, $iWorkType); |
588
|
|
|
|
|
|
|
# special files to extract |
589
|
1
|
|
|
|
|
7
|
my %extract = ( |
590
|
|
|
|
|
|
|
'meta.json' => 1, |
591
|
|
|
|
|
|
|
'previews/preview.png' => 'PreviewPNG', |
592
|
|
|
|
|
|
|
'preview.jpg' => 'PreviewImage', # (iWork 2013 files) |
593
|
|
|
|
|
|
|
'preview-web.jpg' => 'OtherImage', # (iWork 2013 files) |
594
|
|
|
|
|
|
|
'preview-micro.jpg' => 'ThumbnailImage', # (iWork 2013 files) |
595
|
|
|
|
|
|
|
'QuickLook/Thumbnail.jpg' => 'ThumbnailImage', # (iWork 2009 files) |
596
|
|
|
|
|
|
|
'QuickLook/Preview.pdf' => 'PreviewPDF', # (iWork 2009 files) |
597
|
|
|
|
|
|
|
); |
598
|
1
|
|
|
|
|
3
|
foreach $member (@members) { |
599
|
1
|
|
|
|
|
3
|
$$et{DOC_NUM} = ++$docNum; |
600
|
1
|
|
|
|
|
4
|
HandleMember($et, $member, $tagTablePtr); |
601
|
1
|
|
|
|
|
17
|
my $file = $member->fileName(); |
602
|
|
|
|
|
|
|
# extract things from Sketch files |
603
|
1
|
50
|
33
|
|
|
15
|
if ($extract{$file}) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
604
|
0
|
|
|
|
|
0
|
($buff, $status) = $zip->contents($member); |
605
|
0
|
0
|
|
|
|
0
|
$status and $et->Warn("Error extracting $file"), next; |
606
|
0
|
0
|
|
|
|
0
|
if ($file eq 'meta.json') { |
607
|
0
|
|
|
|
|
0
|
$et->ExtractInfo(\$buff, { ReEntry => 1 }); |
608
|
0
|
0
|
0
|
|
|
0
|
if ($$et{VALUE}{App} and $$et{VALUE}{App} =~ /sketch/i) { |
609
|
0
|
|
|
|
|
0
|
$et->OverrideFileType('SKETCH'); |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
} else { |
612
|
0
|
|
|
|
|
0
|
$et->FoundTag($extract{$file} => $buff); |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
} elsif ($file eq 'Index/Document.iwa' and not $iWorkType) { |
615
|
0
|
|
0
|
|
|
0
|
my $type = $iWorkType{$$et{FILE_EXT} || ''}; |
616
|
0
|
|
0
|
|
|
0
|
$iWorkType = $type || 'PAGES'; |
617
|
|
|
|
|
|
|
} elsif ($iWorkFile{$file}) { |
618
|
0
|
|
|
|
|
0
|
$iWorkType = $iWorkFile{$file}; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
} |
621
|
1
|
50
|
|
|
|
4
|
$et->OverrideFileType($iWorkType) if $iWorkType; |
622
|
1
|
|
|
|
|
5
|
last; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
# all done if we processed this using Archive::Zip |
625
|
5
|
50
|
|
|
|
17
|
if ($zip) { |
626
|
5
|
|
|
|
|
9
|
delete $$dirInfo{ZIP}; |
627
|
5
|
|
|
|
|
10
|
delete $$et{DOC_NUM}; |
628
|
5
|
50
|
33
|
|
|
14
|
if ($docNum > 1 and not $et->Options('Duplicates')) { |
629
|
0
|
|
|
|
|
0
|
$et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); |
630
|
|
|
|
|
|
|
} |
631
|
5
|
|
|
|
|
116
|
return 1; |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
# |
634
|
|
|
|
|
|
|
# process the ZIP file by hand (funny, but this seems easier than using Archive::Zip) |
635
|
|
|
|
|
|
|
# |
636
|
0
|
|
|
|
|
|
$et->VPrint(1, " -- processing as binary data --\n"); |
637
|
0
|
|
|
|
|
|
$raf->Seek(30, 0); |
638
|
0
|
|
|
|
|
|
$et->SetFileType(); |
639
|
0
|
|
|
|
|
|
SetByteOrder('II'); |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
# A. Local file header: |
642
|
|
|
|
|
|
|
# local file header signature 0) 4 bytes (0x04034b50) |
643
|
|
|
|
|
|
|
# version needed to extract 4) 2 bytes |
644
|
|
|
|
|
|
|
# general purpose bit flag 6) 2 bytes |
645
|
|
|
|
|
|
|
# compression method 8) 2 bytes |
646
|
|
|
|
|
|
|
# last mod file time 10) 2 bytes |
647
|
|
|
|
|
|
|
# last mod file date 12) 2 bytes |
648
|
|
|
|
|
|
|
# crc-32 14) 4 bytes |
649
|
|
|
|
|
|
|
# compressed size 18) 4 bytes |
650
|
|
|
|
|
|
|
# uncompressed size 22) 4 bytes |
651
|
|
|
|
|
|
|
# file name length 26) 2 bytes |
652
|
|
|
|
|
|
|
# extra field length 28) 2 bytes |
653
|
0
|
|
|
|
|
|
for (;;) { |
654
|
0
|
|
|
|
|
|
my $len = Get16u(\$buff, 26) + Get16u(\$buff, 28); |
655
|
0
|
0
|
|
|
|
|
$raf->Read($buf2, $len) == $len or last; |
656
|
|
|
|
|
|
|
|
657
|
0
|
|
|
|
|
|
$$et{DOC_NUM} = ++$docNum; |
658
|
0
|
|
|
|
|
|
$buff .= $buf2; |
659
|
0
|
|
|
|
|
|
my %dirInfo = ( |
660
|
|
|
|
|
|
|
DataPt => \$buff, |
661
|
|
|
|
|
|
|
DataPos => $raf->Tell() - 30 - $len, |
662
|
|
|
|
|
|
|
DataLen => 30 + $len, |
663
|
|
|
|
|
|
|
DirStart => 0, |
664
|
|
|
|
|
|
|
DirLen => 30 + $len, |
665
|
|
|
|
|
|
|
MixedTags => 1, # (to ignore FileComment tag) |
666
|
|
|
|
|
|
|
); |
667
|
0
|
|
|
|
|
|
$et->ProcessDirectory(\%dirInfo, $tagTablePtr); |
668
|
0
|
|
|
|
|
|
my $flags = Get16u(\$buff, 6); |
669
|
0
|
0
|
|
|
|
|
if ($flags & 0x08) { |
670
|
|
|
|
|
|
|
# we don't yet support skipping stream mode data |
671
|
|
|
|
|
|
|
# (when this happens, the CRC, compressed size and uncompressed |
672
|
|
|
|
|
|
|
# sizes are set to 0 in the header. Instead, they are stored |
673
|
|
|
|
|
|
|
# after the compressed data with an optional header of 0x08074b50) |
674
|
0
|
|
|
|
|
|
$et->Warn('Stream mode data encountered, file list may be incomplete'); |
675
|
0
|
|
|
|
|
|
last; |
676
|
|
|
|
|
|
|
} |
677
|
0
|
|
|
|
|
|
$len = Get32u(\$buff, 18); # file data length |
678
|
0
|
0
|
|
|
|
|
$raf->Seek($len, 1) or last; # skip file data |
679
|
0
|
0
|
0
|
|
|
|
$raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last; |
680
|
|
|
|
|
|
|
} |
681
|
0
|
|
|
|
|
|
delete $$et{DOC_NUM}; |
682
|
0
|
0
|
0
|
|
|
|
if ($docNum > 1 and not $et->Options('Duplicates')) { |
683
|
0
|
|
|
|
|
|
$et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1); |
684
|
|
|
|
|
|
|
} |
685
|
0
|
|
|
|
|
|
return 1; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
1; # end |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
__END__ |