| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
2
|
|
|
|
|
|
|
# File: iWork.pm |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Description: Read Apple iWork '09 XML+ZIP files |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Revisions: 2009/11/11 - P. Harvey Created |
|
7
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package Image::ExifTool::iWork; |
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
32
|
|
|
12
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
45
|
|
|
13
|
1
|
|
|
1
|
|
5
|
use Image::ExifTool qw(:DataAccess :Utils); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
221
|
|
|
14
|
1
|
|
|
1
|
|
5
|
use Image::ExifTool::XMP; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
30
|
|
|
15
|
1
|
|
|
1
|
|
5
|
use Image::ExifTool::ZIP; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
966
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
$VERSION = '1.06'; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# test for recognized iWork document extensions and outer XML elements |
|
20
|
|
|
|
|
|
|
my %iWorkType = ( |
|
21
|
|
|
|
|
|
|
# file extensions |
|
22
|
|
|
|
|
|
|
NUMBERS => 'NUMBERS', |
|
23
|
|
|
|
|
|
|
PAGES => 'PAGES', |
|
24
|
|
|
|
|
|
|
KEY => 'KEY', |
|
25
|
|
|
|
|
|
|
KTH => 'KTH', |
|
26
|
|
|
|
|
|
|
NMBTEMPLATE => 'NMBTEMPLATE', |
|
27
|
|
|
|
|
|
|
# we don't support double extensions -- |
|
28
|
|
|
|
|
|
|
# "PAGES.TEMPLATE" => 'Apple Pages Template', |
|
29
|
|
|
|
|
|
|
# outer XML elements |
|
30
|
|
|
|
|
|
|
'ls:document' => 'NUMBERS', |
|
31
|
|
|
|
|
|
|
'sl:document' => 'PAGES', |
|
32
|
|
|
|
|
|
|
'key:presentation' => 'KEY', |
|
33
|
|
|
|
|
|
|
); |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# MIME types for iWork files (Apple has not registered these yet, but these |
|
36
|
|
|
|
|
|
|
# are my best guess after doing some googling. I'm not 100% sure what "sff" |
|
37
|
|
|
|
|
|
|
# indicates, but I think it refers to the new "flattened" package format) |
|
38
|
|
|
|
|
|
|
my %mimeType = ( |
|
39
|
|
|
|
|
|
|
'NUMBERS' => 'application/x-iwork-numbers-sffnumbers', |
|
40
|
|
|
|
|
|
|
'PAGES' => 'application/x-iwork-pages-sffpages', |
|
41
|
|
|
|
|
|
|
'KEY' => 'application/x-iWork-keynote-sffkey', |
|
42
|
|
|
|
|
|
|
'NMBTEMPLATE' => 'application/x-iwork-numbers-sfftemplate', |
|
43
|
|
|
|
|
|
|
'PAGES.TEMPLATE'=> 'application/x-iwork-pages-sfftemplate', |
|
44
|
|
|
|
|
|
|
'KTH' => 'application/x-iWork-keynote-sffkth', |
|
45
|
|
|
|
|
|
|
); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# iWork tags |
|
48
|
|
|
|
|
|
|
%Image::ExifTool::iWork::Main = ( |
|
49
|
|
|
|
|
|
|
GROUPS => { 0 => 'XML', 1 => 'XML', 2 => 'Document' }, |
|
50
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::XMP::ProcessXMP, |
|
51
|
|
|
|
|
|
|
VARS => { NO_ID => 1 }, |
|
52
|
|
|
|
|
|
|
NOTES => q{ |
|
53
|
|
|
|
|
|
|
The Apple iWork '09 file format is a ZIP archive containing XML files |
|
54
|
|
|
|
|
|
|
similar to the Office Open XML (OOXML) format. Metadata tags in iWork |
|
55
|
|
|
|
|
|
|
files are extracted even if they don't appear below. |
|
56
|
|
|
|
|
|
|
}, |
|
57
|
|
|
|
|
|
|
authors => { Name => 'Author', Groups => { 2 => 'Author' } }, |
|
58
|
|
|
|
|
|
|
comment => { }, |
|
59
|
|
|
|
|
|
|
copyright => { Groups => { 2 => 'Author' } }, |
|
60
|
|
|
|
|
|
|
keywords => { }, |
|
61
|
|
|
|
|
|
|
projects => { List => 1 }, |
|
62
|
|
|
|
|
|
|
title => { }, |
|
63
|
|
|
|
|
|
|
); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
66
|
|
|
|
|
|
|
# Generate a tag ID for this XML tag |
|
67
|
|
|
|
|
|
|
# Inputs: 0) tag property name list ref |
|
68
|
|
|
|
|
|
|
# Returns: tagID |
|
69
|
|
|
|
|
|
|
sub GetTagID($) |
|
70
|
|
|
|
|
|
|
{ |
|
71
|
8
|
|
|
8
|
0
|
11
|
my $props = shift; |
|
72
|
8
|
100
|
|
|
|
74
|
return 0 if $$props[-1] =~ /^\w+:ID$/; # ignore ID tags |
|
73
|
7
|
50
|
|
|
|
37
|
return $$props[0] =~ /^.*?:(.*)/ ? $1 : $$props[0]; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
77
|
|
|
|
|
|
|
# We found an XMP property name/value |
|
78
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object ref, 1) tag table ref |
|
79
|
|
|
|
|
|
|
# 2) reference to array of XMP property names (last is current property) |
|
80
|
|
|
|
|
|
|
# 3) property value, 4) attribute hash ref (not used here) |
|
81
|
|
|
|
|
|
|
# Returns: 1 if valid tag was found |
|
82
|
|
|
|
|
|
|
sub FoundTag($$$$;$) |
|
83
|
|
|
|
|
|
|
{ |
|
84
|
8
|
|
|
8
|
0
|
15
|
my ($et, $tagTablePtr, $props, $val, $attrs) = @_; |
|
85
|
8
|
50
|
|
|
|
13
|
return 0 unless @$props; |
|
86
|
8
|
|
|
|
|
24
|
my $verbose = $et->Options('Verbose'); |
|
87
|
|
|
|
|
|
|
|
|
88
|
8
|
50
|
|
|
|
17
|
$et->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# un-escape XML character entities |
|
91
|
8
|
|
|
|
|
18
|
$val = Image::ExifTool::XMP::UnescapeXML($val); |
|
92
|
|
|
|
|
|
|
# convert from UTF8 to ExifTool Charset |
|
93
|
8
|
|
|
|
|
22
|
$val = $et->Decode($val, 'UTF8'); |
|
94
|
8
|
100
|
|
|
|
15
|
my $tag = GetTagID($props) or return 0; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# add any unknown tags to table |
|
97
|
7
|
50
|
|
|
|
19
|
unless ($$tagTablePtr{$tag}) { |
|
98
|
0
|
0
|
|
|
|
0
|
$et->VPrint(0, " [adding $tag]\n") if $verbose; |
|
99
|
0
|
|
|
|
|
0
|
AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag }); |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
# save the tag |
|
102
|
7
|
|
|
|
|
21
|
$et->HandleTag($tagTablePtr, $tag, $val); |
|
103
|
|
|
|
|
|
|
|
|
104
|
7
|
|
|
|
|
14
|
return 1; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
|
108
|
|
|
|
|
|
|
# Extract information from an iWork file |
|
109
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
|
110
|
|
|
|
|
|
|
# Returns: 1 |
|
111
|
|
|
|
|
|
|
# Notes: Upon entry to this routine, the file type has already been verified |
|
112
|
|
|
|
|
|
|
# as ZIP and the dirInfo hash contains a 'ZIP' Archive::Zip object reference |
|
113
|
|
|
|
|
|
|
sub Process_iWork($$) |
|
114
|
|
|
|
|
|
|
{ |
|
115
|
1
|
|
|
1
|
0
|
2
|
my ($et, $dirInfo) = @_; |
|
116
|
1
|
|
|
|
|
2
|
my $zip = $$dirInfo{ZIP}; |
|
117
|
1
|
|
|
|
|
2
|
my ($type, $index, $indexFile, $status); |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# try to determine the file type |
|
120
|
1
|
|
|
|
|
4
|
local $SIG{'__WARN__'} = \&Image::ExifTool::ZIP::WarnProc; |
|
121
|
|
|
|
|
|
|
# trust type given by file extension if available |
|
122
|
1
|
50
|
|
|
|
4
|
$type = $iWorkType{$$et{FILE_EXT}} if $$et{FILE_EXT}; |
|
123
|
1
|
50
|
|
|
|
3
|
unless ($type) { |
|
124
|
|
|
|
|
|
|
# read the index file |
|
125
|
0
|
|
|
|
|
0
|
my @members = $zip->membersMatching('^index\.(xml|apxl)$'); |
|
126
|
0
|
0
|
|
|
|
0
|
if (@members) { |
|
127
|
0
|
|
|
|
|
0
|
($index, $status) = $zip->contents($members[0]); |
|
128
|
0
|
0
|
|
|
|
0
|
unless ($status) { |
|
129
|
0
|
|
|
|
|
0
|
$indexFile = $members[0]->fileName(); |
|
130
|
0
|
0
|
|
|
|
0
|
if ($index =~ /^\s*<\?xml version=[^<]+<(\w+:\w+)/s) { |
|
131
|
0
|
0
|
|
|
|
0
|
$type = $iWorkType{$1} if $iWorkType{$1}; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} else { |
|
135
|
0
|
|
|
|
|
0
|
@members = $zip->membersMatching('(?i)^.*\.(pages|numbers|key)/Index.*'); |
|
136
|
0
|
0
|
|
|
|
0
|
if (@members) { |
|
137
|
0
|
|
|
|
|
0
|
my $tmp = $members[0]->fileName(); |
|
138
|
0
|
0
|
|
|
|
0
|
$type = $iWorkType{uc $1} if $tmp =~ /\.(pages|numbers|key)/i; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
} |
|
141
|
0
|
0
|
|
|
|
0
|
$type or $type = 'ZIP'; # assume ZIP by default |
|
142
|
|
|
|
|
|
|
} |
|
143
|
1
|
|
|
|
|
7
|
$et->SetFileType($type, $mimeType{$type}); |
|
144
|
|
|
|
|
|
|
|
|
145
|
1
|
|
|
|
|
4
|
my @members = $zip->members(); |
|
146
|
1
|
|
|
|
|
5
|
my $docNum = 0; |
|
147
|
1
|
|
|
|
|
2
|
my $member; |
|
148
|
1
|
|
|
|
|
2
|
foreach $member (@members) { |
|
149
|
|
|
|
|
|
|
# get filename of this ZIP member |
|
150
|
4
|
|
|
|
|
12
|
my $file = $member->fileName(); |
|
151
|
4
|
50
|
|
|
|
28
|
next unless defined $file; |
|
152
|
4
|
|
|
|
|
13
|
$et->VPrint(0, "File: $file\n"); |
|
153
|
|
|
|
|
|
|
# set the document number and extract ZIP tags |
|
154
|
4
|
|
|
|
|
6
|
$$et{DOC_NUM} = ++$docNum; |
|
155
|
4
|
|
|
|
|
10
|
Image::ExifTool::ZIP::HandleMember($et, $member); |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# process only the index XML and JPEG thumbnail/preview files |
|
158
|
4
|
100
|
|
|
|
20
|
next unless $file =~ m{^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+/preview(-micro|-web)?.jpg)$}i; |
|
159
|
|
|
|
|
|
|
# get the file contents if necessary |
|
160
|
|
|
|
|
|
|
# (CAREFUL! $buff MUST be local since we hand off a value ref to PreviewImage) |
|
161
|
2
|
|
|
|
|
4
|
my ($buff, $buffPt); |
|
162
|
2
|
50
|
33
|
|
|
6
|
if ($indexFile and $indexFile eq $file) { |
|
163
|
|
|
|
|
|
|
# use the index file we already loaded |
|
164
|
0
|
|
|
|
|
0
|
$buffPt = \$index; |
|
165
|
|
|
|
|
|
|
} else { |
|
166
|
2
|
|
|
|
|
6
|
($buff, $status) = $zip->contents($member); |
|
167
|
2
|
50
|
|
|
|
1701
|
$status and $et->Warn("Error extracting $file"), next; |
|
168
|
2
|
|
|
|
|
4
|
$buffPt = \$buff; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
# extract JPEG as PreviewImage (should only be QuickLook/Thumbnail.jpg) |
|
171
|
2
|
100
|
|
|
|
8
|
if ($file =~ /\.jpg$/) { |
|
172
|
1
|
0
|
|
|
|
4
|
my $type = ($file =~ /preview-(\w+)/) ? ($1 eq 'web' ? 'Other' : 'Thumbnail') : 'Preview'; |
|
|
|
50
|
|
|
|
|
|
|
173
|
1
|
|
|
|
|
5
|
$et->FoundTag($type . 'Image', $buffPt); |
|
174
|
1
|
|
|
|
|
2
|
next; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
# process "metadata" section of XML index file |
|
177
|
1
|
50
|
|
|
|
9
|
next unless $$buffPt =~ /<(\w+):metadata>/g; |
|
178
|
1
|
|
|
|
|
3
|
my $ns = $1; |
|
179
|
1
|
|
|
|
|
2
|
my $p1 = pos $$buffPt; |
|
180
|
1
|
50
|
|
|
|
13
|
next unless $$buffPt =~ m{${ns}:metadata>}g; |
|
181
|
|
|
|
|
|
|
# construct XML data from "metadata" section only |
|
182
|
1
|
|
|
|
|
4
|
$$buffPt = '' . substr($$buffPt, $p1, pos($$buffPt)-$p1); |
|
183
|
1
|
|
|
|
|
7
|
my %dirInfo = ( |
|
184
|
|
|
|
|
|
|
DataPt => $buffPt, |
|
185
|
|
|
|
|
|
|
DirLen => length $$buffPt, |
|
186
|
|
|
|
|
|
|
DataLen => length $$buffPt, |
|
187
|
|
|
|
|
|
|
XMPParseOpts => { |
|
188
|
|
|
|
|
|
|
FoundProc => \&FoundTag, |
|
189
|
|
|
|
|
|
|
}, |
|
190
|
|
|
|
|
|
|
); |
|
191
|
1
|
|
|
|
|
3
|
my $tagTablePtr = GetTagTable('Image::ExifTool::iWork::Main'); |
|
192
|
1
|
|
|
|
|
5
|
$et->ProcessDirectory(\%dirInfo, $tagTablePtr); |
|
193
|
1
|
|
|
|
|
4
|
undef $$buffPt; # (free memory now) |
|
194
|
|
|
|
|
|
|
} |
|
195
|
1
|
|
|
|
|
3
|
delete $$et{DOC_NUM}; |
|
196
|
1
|
|
|
|
|
4
|
return 1; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
1; # end |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
__END__ |