File Coverage

blib/lib/Image/ExifTool/ZIP.pm
Criterion Covered Total %
statement 202 321 62.9
branch 68 184 36.9
condition 22 89 24.7
subroutine 9 10 90.0
pod 0 7 0.0
total 301 611 49.2


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: ZIP.pm
3             #
4             # Description: Read ZIP archive meta information
5             #
6             # Revisions: 10/28/2007 - P. Harvey Created
7             #
8             # References: 1) http://www.pkware.com/documents/casestudies/APPNOTE.TXT
9             # 2) http://www.cpanforum.com/threads/9046
10             # 3) http://www.gzip.org/zlib/rfc-gzip.html
11             # 4) http://DataCompression.info/ArchiveFormats/RAR202.txt
12             # 5) https://jira.atlassian.com/browse/CONF-21706
13             # 6) http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/indesign/cs55-docs/IDML/idml-specification.pdf
14             # 7) https://www.rarlab.com/technote.htm
15             #------------------------------------------------------------------------------
16              
17             package Image::ExifTool::ZIP;
18              
19 1     1   5743 use strict;
  1         2  
  1         43  
20 1     1   4 use vars qw($VERSION $warnString);
  1         1  
  1         42  
21 1     1   3 use Image::ExifTool qw(:DataAccess :Utils);
  1         1  
  1         4203  
22              
23             $VERSION = '1.32';
24              
25 0     0 0 0 sub WarnProc($) { $warnString = $_[0]; }
26              
27             # file types for recognized Open Document "mimetype" values
28             my %openDocType = (
29             'application/vnd.oasis.opendocument.database' => 'ODB', #5
30             'application/vnd.oasis.opendocument.chart' => 'ODC', #5
31             'application/vnd.oasis.opendocument.formula' => 'ODF', #5
32             'application/vnd.oasis.opendocument.graphics' => 'ODG', #5
33             'application/vnd.oasis.opendocument.image' => 'ODI', #5
34             'application/vnd.oasis.opendocument.presentation' => 'ODP',
35             'application/vnd.oasis.opendocument.spreadsheet' => 'ODS',
36             'application/vnd.oasis.opendocument.text' => 'ODT',
37             'application/vnd.adobe.indesign-idml-package' => 'IDML', #6 (not open doc)
38             'application/epub+zip' => 'EPUB', #PH (not open doc)
39             );
40              
41             # iWork file types based on names of files found in the zip archive
42             my %iWorkFile = (
43             'Index/Slide.iwa' => 'KEY',
44             'Index/Tables/DataList.iwa' => 'NUMBERS',
45             );
46              
47             my %iWorkType = (
48             NUMBERS => 'NUMBERS',
49             PAGES => 'PAGES',
50             KEY => 'KEY',
51             KTH => 'KTH',
52             NMBTEMPLATE => 'NMBTEMPLATE',
53             );
54              
55             # ZIP metadata blocks
56             %Image::ExifTool::ZIP::Main = (
57             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
58             GROUPS => { 2 => 'Other' },
59             FORMAT => 'int16u',
60             NOTES => q{
61             The following tags are extracted from ZIP archives. ExifTool also extracts
62             additional meta information from compressed documents inside some ZIP-based
63             files such Office Open XML (DOCX, PPTX and XLSX), Open Document (ODB, ODC,
64             ODF, ODG, ODI, ODP, ODS and ODT), iWork (KEY, PAGES, NUMBERS), Capture One
65             Enhanced Image Package (EIP), Adobe InDesign Markup Language (IDML),
66             Electronic Publication (EPUB), and Sketch design files (SKETCH). The
67             ExifTool family 3 groups may be used to organize ZIP tags by embedded
68             document number (ie. the exiftool C<-g3> option).
69             },
70             2 => 'ZipRequiredVersion',
71             3 => {
72             Name => 'ZipBitFlag',
73             PrintConv => '$val ? sprintf("0x%.4x",$val) : $val',
74             },
75             4 => {
76             Name => 'ZipCompression',
77             PrintConv => {
78             0 => 'None',
79             1 => 'Shrunk',
80             2 => 'Reduced with compression factor 1',
81             3 => 'Reduced with compression factor 2',
82             4 => 'Reduced with compression factor 3',
83             5 => 'Reduced with compression factor 4',
84             6 => 'Imploded',
85             7 => 'Tokenized',
86             8 => 'Deflated',
87             9 => 'Enhanced Deflate using Deflate64(tm)',
88             10 => 'Imploded (old IBM TERSE)',
89             12 => 'BZIP2',
90             14 => 'LZMA (EFS)',
91             18 => 'IBM TERSE (new)',
92             19 => 'IBM LZ77 z Architecture (PFS)',
93             96 => 'JPEG recompressed', #2
94             97 => 'WavPack compressed', #2
95             98 => 'PPMd version I, Rev 1',
96             },
97             },
98             5 => {
99             Name => 'ZipModifyDate',
100             Format => 'int32u',
101             Groups => { 2 => 'Time' },
102             ValueConv => sub {
103             my $val = shift;
104             return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
105             ($val >> 25) + 1980, # year
106             ($val >> 21) & 0x0f, # month
107             ($val >> 16) & 0x1f, # day
108             ($val >> 11) & 0x1f, # hour
109             ($val >> 5) & 0x3f, # minute
110             ($val & 0x1f) * 2 # second
111             );
112             },
113             PrintConv => '$self->ConvertDateTime($val)',
114             },
115             7 => { Name => 'ZipCRC', Format => 'int32u', PrintConv => 'sprintf("0x%.8x",$val)' },
116             9 => { Name => 'ZipCompressedSize', Format => 'int32u' },
117             11 => { Name => 'ZipUncompressedSize', Format => 'int32u' },
118             13 => {
119             Name => 'ZipFileNameLength',
120             # don't store a tag -- just extract the value for use with ZipFileName
121             Hidden => 1,
122             RawConv => '$$self{ZipFileNameLength} = $val; undef',
123             },
124             # 14 => 'ZipExtraFieldLength',
125             15 => {
126             Name => 'ZipFileName',
127             Format => 'string[$$self{ZipFileNameLength}]',
128             },
129             _com => 'ZipFileComment',
130             );
131              
132             # GNU ZIP tags (ref 3)
133             %Image::ExifTool::ZIP::GZIP = (
134             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
135             GROUPS => { 2 => 'Other' },
136             NOTES => q{
137             These tags are extracted from GZIP (GNU ZIP) archives, but currently only
138             for the first file in the archive.
139             },
140             2 => {
141             Name => 'Compression',
142             PrintConv => {
143             8 => 'Deflated',
144             },
145             },
146             3 => {
147             Name => 'Flags',
148             PrintConv => { BITMASK => {
149             0 => 'Text',
150             1 => 'CRC16',
151             2 => 'ExtraFields',
152             3 => 'FileName',
153             4 => 'Comment',
154             }},
155             },
156             4 => {
157             Name => 'ModifyDate',
158             Format => 'int32u',
159             Groups => { 2 => 'Time' },
160             ValueConv => 'ConvertUnixTime($val,1)',
161             PrintConv => '$self->ConvertDateTime($val)',
162             },
163             8 => {
164             Name => 'ExtraFlags',
165             PrintConv => {
166             0 => '(none)',
167             2 => 'Maximum Compression',
168             4 => 'Fastest Algorithm',
169             },
170             },
171             9 => {
172             Name => 'OperatingSystem',
173             PrintConv => {
174             0 => 'FAT filesystem (MS-DOS, OS/2, NT/Win32)',
175             1 => 'Amiga',
176             2 => 'VMS (or OpenVMS)',
177             3 => 'Unix',
178             4 => 'VM/CMS',
179             5 => 'Atari TOS',
180             6 => 'HPFS filesystem (OS/2, NT)',
181             7 => 'Macintosh',
182             8 => 'Z-System',
183             9 => 'CP/M',
184             10 => 'TOPS-20',
185             11 => 'NTFS filesystem (NT)',
186             12 => 'QDOS',
187             13 => 'Acorn RISCOS',
188             255 => 'unknown',
189             },
190             },
191             10 => 'ArchivedFileName',
192             11 => 'Comment',
193             );
194              
195             # RAR v4 tags (ref 4)
196             %Image::ExifTool::ZIP::RAR = (
197             PROCESS_PROC => \&Image::ExifTool::ProcessBinaryData,
198             GROUPS => { 2 => 'Other' },
199             NOTES => 'These tags are extracted from RAR archive files.',
200             0 => {
201             Name => 'CompressedSize',
202             Format => 'int32u',
203             },
204             4 => {
205             Name => 'UncompressedSize',
206             Format => 'int32u',
207             },
208             8 => {
209             Name => 'OperatingSystem',
210             PrintConv => {
211             0 => 'MS-DOS',
212             1 => 'OS/2',
213             2 => 'Win32',
214             3 => 'Unix',
215             },
216             },
217             13 => {
218             Name => 'ModifyDate',
219             Format => 'int32u',
220             Groups => { 2 => 'Time' },
221             ValueConv => sub {
222             my $val = shift;
223             return sprintf('%.4d:%.2d:%.2d %.2d:%.2d:%.2d',
224             ($val >> 25) + 1980, # year
225             ($val >> 21) & 0x0f, # month
226             ($val >> 16) & 0x1f, # day
227             ($val >> 11) & 0x1f, # hour
228             ($val >> 5) & 0x3f, # minute
229             ($val & 0x1f) * 2 # second
230             );
231             },
232             PrintConv => '$self->ConvertDateTime($val)',
233             },
234             18 => {
235             Name => 'PackingMethod',
236             PrintHex => 1,
237             PrintConv => {
238             0x30 => 'Stored',
239             0x31 => 'Fastest',
240             0x32 => 'Fast',
241             0x33 => 'Normal',
242             0x34 => 'Good Compression',
243             0x35 => 'Best Compression',
244             },
245             },
246             19 => {
247             Name => 'FileNameLength',
248             Format => 'int16u',
249             Hidden => 1,
250             RawConv => '$$self{FileNameLength} = $val; undef',
251             },
252             25 => {
253             Name => 'ArchivedFileName',
254             Format => 'string[$$self{FileNameLength}]',
255             },
256             );
257              
258             # RAR v5 tags (ref 7, github#203)
259             %Image::ExifTool::ZIP::RAR5 = (
260             GROUPS => { 2 => 'Other' },
261             VARS => { ID_FMT => 'none' },
262             NOTES => 'These tags are extracted from RAR v5 and 7z archive files.',
263             FileVersion => { },
264             CompressedSize => { },
265             ModifyDate => {
266             Groups => { 2 => 'Time' },
267             ValueConv => 'ConvertUnixTime($val,1)',
268             PrintConv => '$self->ConvertDateTime($val)',
269             },
270             UncompressedSize => { },
271             OperatingSystem => {
272             PrintConv => { 0 => 'Win32', 1 => 'Unix' },
273             },
274             ArchivedFileName => { },
275             );
276              
277             #------------------------------------------------------------------------------
278             # Read unsigned LEB (Little Endian Base) from file
279             # Inputs: 0) RAF ref
280             # Returns: integer value
281             sub ReadULEB($)
282             {
283 15     15 0 16 my $raf = shift;
284 15         15 my ($i, $buff);
285 15         15 my $rtnVal = 0;
286 15         14 for ($i=0; ; ++$i) {
287 18 100       27 $raf->Read($buff, 1) or last;
288 17         35 my $num = ord($buff);
289 17         18 $rtnVal += ($num & 0x7f) << ($i * 7);
290 17 100       25 $num & 0x80 or last;
291             }
292 15         17 return $rtnVal;
293             }
294              
295             #------------------------------------------------------------------------------
296             # Extract information from a RAR file
297             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
298             # Returns: 1 on success, 0 if this wasn't a valid RAR file
299             sub ProcessRAR($$)
300             {
301 1     1 0 3 my ($et, $dirInfo) = @_;
302 1         2 my $raf = $$dirInfo{RAF};
303 1         2 my ($flags, $buff);
304 1         2 my $docNum = 0;
305              
306 1 50 33     2 return 0 unless $raf->Read($buff, 7) and $buff =~ "Rar!\x1a\x07[\0\x01]";
307              
308 1 50       4 if ($buff eq "Rar!\x1a\x07\0") { # RARv4 (ref 4)
309              
310 0         0 $et->SetFileType();
311 0         0 SetByteOrder('II');
312 0         0 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR5');
313 0         0 $et->HandleTag($tagTablePtr, 'FileVersion', 'RAR v4');
314 0         0 $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR');
315              
316 0         0 for (;;) {
317             # read block header
318 0 0       0 $raf->Read($buff, 7) == 7 or last;
319 0         0 my ($type, $flags, $size) = unpack('xxCvv', $buff);
320 0         0 $size -= 7;
321 0 0       0 if ($flags & 0x8000) {
322 0 0       0 $raf->Read($buff, 4) == 4 or last;
323 0         0 $size += unpack('V',$buff) - 4;
324             }
325 0 0       0 last if $size < 0;
326 0 0       0 next unless $size; # ignore blocks with no data
327             # don't try to read very large blocks unless LargeFileSupport is enabled
328 0 0       0 if ($size >= 0x80000000) {
329 0 0       0 if (not $et->Options('LargeFileSupport')) {
    0          
330 0         0 $et->Warn('Large block encountered. Aborting.');
331 0         0 last;
332             } elsif ($et->Options('LargeFileSupport') eq '2') {
333 0         0 $et->Warn('Processing large block (LargeFileSupport is 2)');
334             }
335             }
336             # process the block
337 0 0 0     0 if ($type == 0x74) { # file block
    0          
338             # read maximum 4 KB from a file block
339 0 0       0 my $n = $size > 4096 ? 4096 : $size;
340 0 0       0 $raf->Read($buff, $n) == $n or last;
341             # add compressed size to start of data so we can extract it with the other tags
342 0         0 $buff = pack('V',$size) . $buff;
343 0         0 $$et{DOC_NUM} = ++$docNum;
344 0         0 $et->ProcessDirectory({ DataPt => \$buff }, $tagTablePtr);
345 0         0 $size -= $n;
346             } elsif ($type == 0x75 and $size > 6) { # comment block
347 0 0       0 $raf->Read($buff, $size) == $size or last;
348             # save comment, only if "Stored" (this is untested)
349 0 0       0 if (Get8u(\$buff, 3) == 0x30) {
350 0         0 $et->FoundTag('Comment', substr($buff, 6));
351             }
352 0         0 next;
353             }
354             # seek to the start of the next block
355 0 0 0     0 $raf->Seek($size, 1) or last if $size;
356             }
357              
358             } else { # RARv5 (ref 7, github#203)
359              
360 1 50 33     5 return 0 unless $raf->Read($buff, 1) and $buff eq "\0";
361 1         5 $et->SetFileType();
362 1         3 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::RAR5');
363 1         4 $et->HandleTag($tagTablePtr, 'FileVersion', 'RAR v5');
364 1         2 $$et{INDENT} .= '| ';
365              
366             # loop through header blocks
367 1         1 for (;;) {
368 4         10 $raf->Seek(4, 1); # skip header CRC
369 4         6 my $headSize = ReadULEB($raf);
370 4 100       9 last if $headSize == 0;
371             # read the header and create new RAF object for reading it
372 3         3 my $header;
373 3 50       4 $raf->Read($header, $headSize) == $headSize or last;
374 3         7 my $rafHdr = File::RandomAccess->new(\$header);
375 3         6 my $headType = ReadULEB($rafHdr); # get header type
376              
377 3 50       6 if ($headType == 4) { # encryption block
378 0         0 $et->Warn("File is encrypted.", 0);
379 0         0 last;
380             }
381             # skip over all headers except file or service header
382 3 100 66     12 next unless $headType == 2 or $headType == 3;
383 1 50       7 $et->VerboseDir('RAR5 file', undef, $headSize) if $headType == 2;
384              
385 1         2 my $headFlag = ReadULEB($rafHdr);
386 1         2 ReadULEB($rafHdr); # skip extraSize
387 1         1 my $dataSize;
388 1 50       3 if ($headFlag & 0x0002) {
389 1         3 $dataSize = ReadULEB($rafHdr); # compressed data size
390 1 50       3 if ($headType == 2) {
391 1         4 $et->HandleTag($tagTablePtr, 'CompressedSize', $dataSize);
392             } else {
393 0         0 $raf->Seek($dataSize, 1); # skip service data section
394 0         0 next;
395             }
396             } else {
397 0 0       0 next if $headType == 3; # all done with service header
398 0         0 $dataSize = 0;
399             }
400 1         3 my $fileFlag = ReadULEB($rafHdr);
401 1         3 my $uncompressedSize = ReadULEB($rafHdr);
402 1 50       4 $et->HandleTag($tagTablePtr, 'UncompressedSize', $uncompressedSize) unless $fileFlag & 0x0008;
403 1         2 ReadULEB($rafHdr); # skip file attributes
404 1 50       3 if ($fileFlag & 0x0002) {
405 0 0       0 $rafHdr->Read($buff, 4) == 4 or last;
406             # (untested)
407 0         0 $et->HandleTag($tagTablePtr, 'ModifyDate', unpack('V', $buff));
408             }
409 1 50       4 $rafHdr->Seek(4, 1) if $fileFlag & 0x0004; # skip CRC if present
410              
411 1         2 ReadULEB($rafHdr); # skip compressionInfo
412              
413             # get operating system
414 1         2 my $os = ReadULEB($rafHdr);
415 1         4 $et->HandleTag($tagTablePtr, 'OperatingSystem', $os);
416              
417             # get filename
418 1 50       3 $rafHdr->Read($buff, 1) == 1 or last;
419 1         2 my $nameLen = ord($buff);
420 1 50       4 $rafHdr->Read($buff, $nameLen) == $nameLen or last;
421 1         3 $buff =~ s/\0+$//; # remove trailing nulls (if any)
422 1         2 $et->HandleTag($tagTablePtr, 'ArchivedFileName', $buff);
423              
424 1         2 $$et{DOC_NUM} = ++$docNum;
425              
426 1         4 $raf->Seek($dataSize, 1); # skip data section
427             }
428 1         4 $$et{INDENT} = substr($$et{INDENT}, 0, -2);
429             }
430              
431 1         2 $$et{DOC_NUM} = 0;
432 1 50 33     5 if ($docNum > 1 and not $et->Options('Duplicates')) {
433 0         0 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
434             }
435              
436 1         2 return 1;
437             }
438              
439             #------------------------------------------------------------------------------
440             # Extract information from a GNU ZIP file (ref 3)
441             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
442             # Returns: 1 on success, 0 if this wasn't a valid GZIP file
443             sub ProcessGZIP($$)
444             {
445 1     1 0 4 my ($et, $dirInfo) = @_;
446 1         2 my $raf = $$dirInfo{RAF};
447 1         3 my ($flags, $buff);
448              
449 1 50 33     2 return 0 unless $raf->Read($buff, 10) and $buff =~ /^\x1f\x8b\x08/;
450              
451 1         5 $et->SetFileType();
452 1         4 SetByteOrder('II');
453              
454 1         3 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::GZIP');
455 1         4 $et->HandleTag($tagTablePtr, 2, Get8u(\$buff, 2));
456 1         3 $et->HandleTag($tagTablePtr, 3, $flags = Get8u(\$buff, 3));
457 1         3 $et->HandleTag($tagTablePtr, 4, Get32u(\$buff, 4));
458 1         3 $et->HandleTag($tagTablePtr, 8, Get8u(\$buff, 8));
459 1         3 $et->HandleTag($tagTablePtr, 9, Get8u(\$buff, 9));
460              
461             # extract file name and comment if they exist
462 1 50       3 if ($flags & 0x18) {
463 1 50       2 if ($flags & 0x04) {
464             # skip extra field
465 0 0       0 $raf->Read($buff, 2) == 2 or return 1;
466 0         0 my $len = Get16u(\$buff, 0);
467 0 0       0 $raf->Read($buff, $len) == $len or return 1;
468             }
469 1 50       22 $raf->Read($buff, 4096) or return 1;
470 1         1 my $pos = 0;
471 1         2 my $tagID;
472             # loop for ArchivedFileName (10) and Comment (11) tags
473 1         2 foreach $tagID (10, 11) {
474 2 100       4 my $mask = $tagID == 10 ? 0x08 : 0x10;
475 2 50       3 next unless $flags & $mask;
476 2 50       14 my $end = $buff =~ /\0/g ? pos($buff) - 1 : length($buff);
477             # (the doc specifies the string should be ISO 8859-1,
478             # but in OS X it seems to be UTF-8, so don't translate
479             # it because I could just as easily screw it up)
480 2         6 my $str = substr($buff, $pos, $end - $pos);
481 2         7 $et->HandleTag($tagTablePtr, $tagID, $str);
482 2 50       4 last if $end >= length $buff;
483 2         5 $pos = $end + 1;
484             }
485             }
486 1         3 return 1;
487             }
488              
489             #------------------------------------------------------------------------------
490             # Call HandleTags for attributes of an Archive::Zip member
491             # Inputs: 0) ExifTool object ref, 1) member ref, 2) optional tag table ref
492             sub HandleMember($$;$)
493             {
494 28     28 0 55 my ($et, $member, $tagTablePtr) = @_;
495 28 100       76 $tagTablePtr or $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
496 28         77 $et->HandleTag($tagTablePtr, 2, $member->versionNeededToExtract());
497 28         80 $et->HandleTag($tagTablePtr, 3, $member->bitFlag());
498 28         77 $et->HandleTag($tagTablePtr, 4, $member->compressionMethod());
499 28         77 $et->HandleTag($tagTablePtr, 5, $member->lastModFileDateTime());
500 28         95 $et->HandleTag($tagTablePtr, 7, $member->crc32());
501 28         71 $et->HandleTag($tagTablePtr, 9, $member->compressedSize());
502 28         68 $et->HandleTag($tagTablePtr, 11, $member->uncompressedSize());
503 28         71 $et->HandleTag($tagTablePtr, 15, $member->fileName());
504 28         82 my $com = $member->fileComment();
505 28 50 33     230 $et->HandleTag($tagTablePtr, '_com', $com) if defined $com and length $com;
506             }
507              
508             #------------------------------------------------------------------------------
509             # Extract file from ZIP archive
510             # Inputs: 0) ExifTool ref, 1) Zip object ref, 2) file name
511             # Returns: zip member or undef it it didn't exist
512             sub ExtractFile($$$)
513             {
514 10     10 0 21 my ($et, $zip, $file) = @_;
515 10         32 my $result = $zip->memberNamed($file);
516 10         592 $et->VPrint(1, " (Extracting '${file}' from zip archive)\n");
517 10         19 return $result;
518             }
519              
520             #------------------------------------------------------------------------------
521             # Extract information from a ZIP file
522             # Inputs: 0) ExifTool object reference, 1) dirInfo reference
523             # Returns: 1 on success, 0 if this wasn't a valid ZIP file
524             sub ProcessZIP($$)
525             {
526 5     5 0 16 my ($et, $dirInfo) = @_;
527 5         12 my $raf = $$dirInfo{RAF};
528 5         7 my ($buff, $buf2, $zip);
529              
530 5 50 33     18 return 0 unless $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/;
531              
532 5         22 my $tagTablePtr = GetTagTable('Image::ExifTool::ZIP::Main');
533 5         9 my $docNum = 0;
534              
535             # use Archive::Zip if available
536 5         9 for (;;) {
537 5 50 33     10 unless (eval { require Archive::Zip } and eval { require IO::File }) {
  5         2972  
  5         72900  
538 0 0 0     0 if ($$et{FILE_EXT} and $$et{FILE_EXT} ne 'ZIP') {
539 0         0 $et->Warn("Install Archive::Zip to decode compressed ZIP information");
540             }
541 0         0 last;
542             }
543             # Archive::Zip requires a seekable IO::File object
544 5         12 my $fh;
545 5 50       21 if ($raf->{TESTED} >= 0) {
    0          
546 5 50       10 unless (eval { require IO::File }) {
  5         25  
547             # (this shouldn't happen because IO::File is a prerequisite of Archive::Zip)
548 0         0 $et->Warn("Install IO::File to decode compressed ZIP information");
549 0         0 last;
550             }
551 5         23 $raf->Seek(0,0);
552 5         12 $fh = $raf->{FILE_PT};
553 5         37 bless $fh, 'IO::File'; # Archive::Zip expects an IO::File object
554 0         0 } elsif (eval { require IO::String }) {
555             # read the whole file into memory (what else can I do?)
556 0         0 $raf->Slurp();
557 0         0 $fh = IO::String->new(${$raf->{BUFF_PT}});
  0         0  
558             } else {
559 0 0       0 my $type = $raf->{FILE_PT} ? 'pipe or socket' : 'scalar reference';
560 0         0 $et->Warn("Install IO::String to decode compressed ZIP information from a $type");
561 0         0 last;
562             }
563 5         37 $et->VPrint(1, " --- using Archive::Zip ---\n");
564 5         41 $zip = Archive::Zip->new;
565             # catch all warnings! (Archive::Zip is bad for this)
566 5         295 local $SIG{'__WARN__'} = \&WarnProc;
567 5         36 my $status = $zip->readFromFileHandle($fh);
568 5 0 33     15292 if ($status eq '4' and $raf->{TESTED} >= 0 and eval { require IO::String } and
  0   33     0  
      0        
      0        
569             $raf->Seek(0,2) and $raf->Tell() < 100000000)
570             {
571             # try again, reading it ourself this time in an attempt to avoid
572             # a failed test with Perl 5.6.2 GNU/Linux 2.6.32-5-686 i686-linux-64int-ld
573 0         0 $raf->Seek(0,0);
574 0         0 $raf->Slurp();
575 0         0 $fh = IO::String->new(${$raf->{BUFF_PT}});
  0         0  
576 0         0 $zip = Archive::Zip->new;
577 0         0 $status = $zip->readFromFileHandle($fh);
578             }
579 5 50       15 if ($status) {
580 0         0 undef $zip;
581 0         0 my %err = ( 1=>'Stream end error', 3=>'Format error', 4=>'IO error' );
582 0   0     0 my $err = $err{$status} || "Error $status";
583 0         0 $et->Warn("$err reading ZIP file");
584 0         0 last;
585             }
586             # extract zip file comment
587 5         18 my $comment = $zip->zipfileComment();
588 5 50 33     51 $et->FoundTag(Comment => $comment) if defined $comment and length $comment;
589              
590 5         15 $$dirInfo{ZIP} = $zip;
591              
592             # check for an Office Open file (DOCX, etc)
593             # --> read '[Content_Types].xml' to determine the file type
594 5         10 my ($mime, @members);
595 5         18 my $cType = ExtractFile($et, $zip, '[Content_Types].xml');
596 5 100       14 if ($cType) {
597 1         6 ($buff, $status) = $zip->contents($cType);
598 1 0 33     1176 if (not $status and (
      33        
599             # first look for the main document with the expected name
600             $buff =~ m{\sPartName\s*=\s*['"](?:/ppt/presentation.xml|/word/document.xml|/xl/workbook.xml)['"][^>]*\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1} or
601             # then look for the main part
602             $buff =~ /]*\sPartName[^<]+\sContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/ or
603             # and if all else fails, use the default main
604             $buff =~ /ContentType\s*=\s*(['"])([^"']+)\.main(\+xml)?\1/))
605             {
606 1         4 $mime = $2;
607             }
608             }
609             # check for docProps if we couldn't find a MIME type
610 5 100       23 $mime or @members = $zip->membersMatching('^docProps/.*\.(xml|XML)$');
611 5 100 66     389 if ($mime or @members) {
612 1         3 $$dirInfo{MIME} = $mime;
613 1         1751 require Image::ExifTool::OOXML;
614 1         5 Image::ExifTool::OOXML::ProcessDOCX($et, $dirInfo);
615 1         4 delete $$dirInfo{MIME};
616 1         4 last;
617             }
618              
619             # check for an EIP file
620 4         12 @members = $zip->membersMatching('^CaptureOne/.*\.(cos|COS)$');
621 4 100       292 if (@members) {
622 1         1471 require Image::ExifTool::CaptureOne;
623 1         4 Image::ExifTool::CaptureOne::ProcessEIP($et, $dirInfo);
624 1         10 last;
625             }
626              
627             # check for an iWork file
628 3         35 @members = $zip->membersMatching('(?i)^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+\.(pages|numbers|key)/Index.(zip|xml|apxl))$');
629 3 100       441 if (@members) {
630 1         2117 require Image::ExifTool::iWork;
631 1         6 Image::ExifTool::iWork::Process_iWork($et, $dirInfo);
632 1         7 last;
633             }
634              
635             # check for an Open Document, IDML or EPUB file
636 2         6 my $mType = ExtractFile($et, $zip, 'mimetype');
637 2 100       6 if ($mType) {
638 1         4 ($mime, $status) = $zip->contents($mType);
639 1 50 33     531 if (not $status and $mime =~ /([\x21-\xfe]+)/s) {
640             # clean up MIME type just in case (note that MIME is case insensitive)
641 1         4 $mime = lc $1;
642 1   50     9 $et->SetFileType($openDocType{$mime} || 'ZIP', $mime);
643 1 50       5 $et->Warn("Unrecognized MIMEType $mime") unless $openDocType{$mime};
644             # extract Open Document metadata from "meta.xml"
645 1         2 my $meta = ExtractFile($et, $zip, 'meta.xml');
646             # IDML files have metadata in a different place (ref 6)
647 1 50       4 $meta or $meta = ExtractFile($et, $zip, 'META-INF/metadata.xml');
648 1 50       3 if ($meta) {
649 1         4 ($buff, $status) = $zip->contents($meta);
650 1 50       455 unless ($status) {
651 1         6 my %dirInfo = (
652             DirName => 'XML',
653             DataPt => \$buff,
654             DirLen => length $buff,
655             DataLen => length $buff,
656             );
657             # (avoid structure warnings when copying from XML)
658 1         3 my $oldWarn = $$et{NO_STRUCT_WARN};
659 1         2 $$et{NO_STRUCT_WARN} = 1;
660 1         6 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::Main'));
661 1         4 $$et{NO_STRUCT_WARN} = $oldWarn;
662             }
663             }
664             # process rootfile of EPUB container if applicable
665 1         2 for (;;) {
666 1 50 33     6 last if $meta and $mime ne 'application/epub+zip';
667 0         0 my $container = ExtractFile($et, $zip, 'META-INF/container.xml');
668 0         0 ($buff, $status) = $zip->contents($container);
669 0 0       0 last if $status;
670 0 0       0 $buff =~ /]*?\bfull-path=(['"])(.*?)\1/s or last;
671             # load the rootfile data (OPF extension; contains XML metadata)
672 0 0       0 my $meta2 = $zip->memberNamed($2) or last;
673 0         0 $meta = $meta2;
674 0         0 ($buff, $status) = $zip->contents($meta);
675 0 0       0 last if $status;
676             # use opf:event to generate more meaningful tag names for dc:date
677 0         0 while ($buff =~ s{([^<]+)}{$2}s) {
678 0         0 my $dcTable = GetTagTable('Image::ExifTool::XMP::dc');
679 0         0 my $tag = "${1}Date";
680             AddTagToTable($dcTable, $tag, {
681             Name => ucfirst $tag,
682             Groups => { 2 => 'Time' },
683             List => 'Seq',
684             %Image::ExifTool::XMP::dateTimeInfo
685 0 0       0 }) unless $$dcTable{$tag};
686             }
687 0         0 my %dirInfo = (
688             DataPt => \$buff,
689             DirLen => length $buff,
690             DataLen => length $buff,
691             IgnoreProp => { 'package' => 1, metadata => 1 },
692             );
693             # (avoid structure warnings when copying from XML)
694 0         0 my $oldWarn = $$et{NO_STRUCT_WARN};
695 0         0 $$et{NO_STRUCT_WARN} = 1;
696 0         0 $et->ProcessDirectory(\%dirInfo, GetTagTable('Image::ExifTool::XMP::XML'));
697 0         0 $$et{NO_STRUCT_WARN} = $oldWarn;
698 0         0 last;
699             }
700 1 50 33     6 if ($openDocType{$mime} or $meta) {
701             # extract preview image(s) from "Thumbnails" directory if they exist
702 1         1 my $type;
703 1         4 my %tag = ( jpg => 'PreviewImage', png => 'PreviewPNG' );
704 1         2 foreach $type ('jpg', 'png') {
705 2         6 my $thumb = ExtractFile($et, $zip, "Thumbnails/thumbnail.$type");
706 2 100       18 next unless $thumb;
707 1         6 ($buff, $status) = $zip->contents($thumb);
708 1 50       1040 $et->FoundTag($tag{$type}, $buff) unless $status;
709             }
710 1         8 last; # all done since we recognized the MIME type or found metadata
711             }
712             # continue on to list ZIP contents...
713             }
714             }
715              
716             # otherwise just extract general ZIP information
717 1         6 $et->SetFileType();
718 1         3 @members = $zip->members();
719 1         5 my ($member, $iWorkType);
720             # special files to extract
721 1         17 my %extract = (
722             'meta.json' => 1,
723             'previews/preview.png' => 'PreviewPNG',
724             'preview.jpg' => 'PreviewImage', # (iWork 2013 files)
725             'preview-web.jpg' => 'OtherImage', # (iWork 2013 files)
726             'preview-micro.jpg' => 'ThumbnailImage', # (iWork 2013 files)
727             'QuickLook/Thumbnail.jpg' => 'ThumbnailImage', # (iWork 2009 files)
728             'QuickLook/Preview.pdf' => 'PreviewPDF', # (iWork 2009 files)
729             );
730 1         3 foreach $member (@members) {
731 1         3 $$et{DOC_NUM} = ++$docNum;
732 1         4 HandleMember($et, $member, $tagTablePtr);
733 1         3 my $file = $member->fileName();
734             # extract things from Sketch files
735 1 50 33     12 if ($extract{$file}) {
    50          
    50          
736 0         0 ($buff, $status) = $zip->contents($member);
737 0 0       0 $status and $et->Warn("Error extracting $file"), next;
738 0 0       0 if ($file eq 'meta.json') {
739 0         0 $et->ExtractInfo(\$buff, { ReEntry => 1 });
740 0 0 0     0 if ($$et{VALUE}{App} and $$et{VALUE}{App} =~ /sketch/i) {
741 0         0 $et->OverrideFileType('SKETCH');
742             }
743             } else {
744 0         0 $et->FoundTag($extract{$file} => $buff);
745             }
746             } elsif ($file eq 'Index/Document.iwa' and not $iWorkType) {
747 0   0     0 my $type = $iWorkType{$$et{FILE_EXT} || ''};
748 0   0     0 $iWorkType = $type || 'PAGES';
749             } elsif ($iWorkFile{$file}) {
750 0         0 $iWorkType = $iWorkFile{$file};
751             }
752             }
753 1 50       3 $et->OverrideFileType($iWorkType) if $iWorkType;
754 1         6 last;
755             }
756             # all done if we processed this using Archive::Zip
757 5 50       31 if ($zip) {
758 5         17 delete $$dirInfo{ZIP};
759 5         11 delete $$et{DOC_NUM};
760 5 50 33     16 if ($docNum > 1 and not $et->Options('Duplicates')) {
761 0         0 $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
762             }
763 5         204 return 1;
764             }
765             #
766             # process the ZIP file by hand (funny, but this seems easier than using Archive::Zip)
767             #
768 0           $et->VPrint(1, " -- processing as binary data --\n");
769 0           $raf->Seek(30, 0);
770 0           $et->SetFileType();
771 0           SetByteOrder('II');
772              
773             # A. Local file header:
774             # local file header signature 0) 4 bytes (0x04034b50)
775             # version needed to extract 4) 2 bytes
776             # general purpose bit flag 6) 2 bytes
777             # compression method 8) 2 bytes
778             # last mod file time 10) 2 bytes
779             # last mod file date 12) 2 bytes
780             # crc-32 14) 4 bytes
781             # compressed size 18) 4 bytes
782             # uncompressed size 22) 4 bytes
783             # file name length 26) 2 bytes
784             # extra field length 28) 2 bytes
785 0           for (;;) {
786 0           my $len = Get16u(\$buff, 26) + Get16u(\$buff, 28);
787 0 0         $raf->Read($buf2, $len) == $len or last;
788              
789 0           $$et{DOC_NUM} = ++$docNum;
790 0           $buff .= $buf2;
791 0           my %dirInfo = (
792             DataPt => \$buff,
793             DataPos => $raf->Tell() - 30 - $len,
794             DataLen => 30 + $len,
795             DirStart => 0,
796             DirLen => 30 + $len,
797             MixedTags => 1, # (to ignore FileComment tag)
798             );
799 0           $et->ProcessDirectory(\%dirInfo, $tagTablePtr);
800 0           my $flags = Get16u(\$buff, 6);
801 0 0         if ($flags & 0x08) {
802             # we don't yet support skipping stream mode data
803             # (when this happens, the CRC, compressed size and uncompressed
804             # sizes are set to 0 in the header. Instead, they are stored
805             # after the compressed data with an optional header of 0x08074b50)
806 0           $et->Warn('Stream mode data encountered, file list may be incomplete');
807 0           last;
808             }
809 0           $len = Get32u(\$buff, 18); # file data length
810 0 0         $raf->Seek($len, 1) or last; # skip file data
811 0 0 0       $raf->Read($buff, 30) == 30 and $buff =~ /^PK\x03\x04/ or last;
812             }
813 0           delete $$et{DOC_NUM};
814 0 0 0       if ($docNum > 1 and not $et->Options('Duplicates')) {
815 0           $et->Warn("Use the Duplicates option to extract tags for all $docNum files", 1);
816             }
817 0           return 1;
818             }
819              
820             1; # end
821              
822             __END__