line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
2
|
|
|
|
|
|
|
# File: iWork.pm |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Description: Read Apple iWork '09 XML+ZIP files |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Revisions: 2009/11/11 - P. Harvey Created |
7
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package Image::ExifTool::iWork; |
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
8
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
46
|
|
12
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
54
|
|
13
|
1
|
|
|
1
|
|
6
|
use Image::ExifTool qw(:DataAccess :Utils); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
272
|
|
14
|
1
|
|
|
1
|
|
8
|
use Image::ExifTool::XMP; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
38
|
|
15
|
1
|
|
|
1
|
|
5
|
use Image::ExifTool::ZIP; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1284
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
$VERSION = '1.06'; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# test for recognized iWork document extensions and outer XML elements |
20
|
|
|
|
|
|
|
my %iWorkType = ( |
21
|
|
|
|
|
|
|
# file extensions |
22
|
|
|
|
|
|
|
NUMBERS => 'NUMBERS', |
23
|
|
|
|
|
|
|
PAGES => 'PAGES', |
24
|
|
|
|
|
|
|
KEY => 'KEY', |
25
|
|
|
|
|
|
|
KTH => 'KTH', |
26
|
|
|
|
|
|
|
NMBTEMPLATE => 'NMBTEMPLATE', |
27
|
|
|
|
|
|
|
# we don't support double extensions -- |
28
|
|
|
|
|
|
|
# "PAGES.TEMPLATE" => 'Apple Pages Template', |
29
|
|
|
|
|
|
|
# outer XML elements |
30
|
|
|
|
|
|
|
'ls:document' => 'NUMBERS', |
31
|
|
|
|
|
|
|
'sl:document' => 'PAGES', |
32
|
|
|
|
|
|
|
'key:presentation' => 'KEY', |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# MIME types for iWork files (Apple has not registered these yet, but these |
36
|
|
|
|
|
|
|
# are my best guess after doing some googling. I'm not 100% sure what "sff" |
37
|
|
|
|
|
|
|
# indicates, but I think it refers to the new "flattened" package format) |
38
|
|
|
|
|
|
|
my %mimeType = ( |
39
|
|
|
|
|
|
|
'NUMBERS' => 'application/x-iwork-numbers-sffnumbers', |
40
|
|
|
|
|
|
|
'PAGES' => 'application/x-iwork-pages-sffpages', |
41
|
|
|
|
|
|
|
'KEY' => 'application/x-iWork-keynote-sffkey', |
42
|
|
|
|
|
|
|
'NMBTEMPLATE' => 'application/x-iwork-numbers-sfftemplate', |
43
|
|
|
|
|
|
|
'PAGES.TEMPLATE'=> 'application/x-iwork-pages-sfftemplate', |
44
|
|
|
|
|
|
|
'KTH' => 'application/x-iWork-keynote-sffkth', |
45
|
|
|
|
|
|
|
); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# iWork tags |
48
|
|
|
|
|
|
|
%Image::ExifTool::iWork::Main = ( |
49
|
|
|
|
|
|
|
GROUPS => { 0 => 'XML', 1 => 'XML', 2 => 'Document' }, |
50
|
|
|
|
|
|
|
PROCESS_PROC => \&Image::ExifTool::XMP::ProcessXMP, |
51
|
|
|
|
|
|
|
VARS => { NO_ID => 1 }, |
52
|
|
|
|
|
|
|
NOTES => q{ |
53
|
|
|
|
|
|
|
The Apple iWork '09 file format is a ZIP archive containing XML files |
54
|
|
|
|
|
|
|
similar to the Office Open XML (OOXML) format. Metadata tags in iWork |
55
|
|
|
|
|
|
|
files are extracted even if they don't appear below. |
56
|
|
|
|
|
|
|
}, |
57
|
|
|
|
|
|
|
authors => { Name => 'Author', Groups => { 2 => 'Author' } }, |
58
|
|
|
|
|
|
|
comment => { }, |
59
|
|
|
|
|
|
|
copyright => { Groups => { 2 => 'Author' } }, |
60
|
|
|
|
|
|
|
keywords => { }, |
61
|
|
|
|
|
|
|
projects => { List => 1 }, |
62
|
|
|
|
|
|
|
title => { }, |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
66
|
|
|
|
|
|
|
# Generate a tag ID for this XML tag |
67
|
|
|
|
|
|
|
# Inputs: 0) tag property name list ref |
68
|
|
|
|
|
|
|
# Returns: tagID |
69
|
|
|
|
|
|
|
sub GetTagID($) |
70
|
|
|
|
|
|
|
{ |
71
|
8
|
|
|
8
|
0
|
13
|
my $props = shift; |
72
|
8
|
100
|
|
|
|
25
|
return 0 if $$props[-1] =~ /^\w+:ID$/; # ignore ID tags |
73
|
7
|
50
|
|
|
|
43
|
return $$props[0] =~ /^.*?:(.*)/ ? $1 : $$props[0]; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
77
|
|
|
|
|
|
|
# We found an XMP property name/value |
78
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object ref, 1) tag table ref |
79
|
|
|
|
|
|
|
# 2) reference to array of XMP property names (last is current property) |
80
|
|
|
|
|
|
|
# 3) property value, 4) attribute hash ref (not used here) |
81
|
|
|
|
|
|
|
# Returns: 1 if valid tag was found |
82
|
|
|
|
|
|
|
sub FoundTag($$$$;$) |
83
|
|
|
|
|
|
|
{ |
84
|
8
|
|
|
8
|
0
|
20
|
my ($et, $tagTablePtr, $props, $val, $attrs) = @_; |
85
|
8
|
50
|
|
|
|
19
|
return 0 unless @$props; |
86
|
8
|
|
|
|
|
26
|
my $verbose = $et->Options('Verbose'); |
87
|
|
|
|
|
|
|
|
88
|
8
|
50
|
|
|
|
18
|
$et->VPrint(0, " | - Tag '", join('/',@$props), "'\n") if $verbose > 1; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# un-escape XML character entities |
91
|
8
|
|
|
|
|
24
|
$val = Image::ExifTool::XMP::UnescapeXML($val); |
92
|
|
|
|
|
|
|
# convert from UTF8 to ExifTool Charset |
93
|
8
|
|
|
|
|
25
|
$val = $et->Decode($val, 'UTF8'); |
94
|
8
|
100
|
|
|
|
18
|
my $tag = GetTagID($props) or return 0; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# add any unknown tags to table |
97
|
7
|
50
|
|
|
|
25
|
unless ($$tagTablePtr{$tag}) { |
98
|
0
|
0
|
|
|
|
0
|
$et->VPrint(0, " [adding $tag]\n") if $verbose; |
99
|
0
|
|
|
|
|
0
|
AddTagToTable($tagTablePtr, $tag, { Name => ucfirst $tag }); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
# save the tag |
102
|
7
|
|
|
|
|
23
|
$et->HandleTag($tagTablePtr, $tag, $val); |
103
|
|
|
|
|
|
|
|
104
|
7
|
|
|
|
|
18
|
return 1; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#------------------------------------------------------------------------------ |
108
|
|
|
|
|
|
|
# Extract information from an iWork file |
109
|
|
|
|
|
|
|
# Inputs: 0) ExifTool object reference, 1) dirInfo reference |
110
|
|
|
|
|
|
|
# Returns: 1 |
111
|
|
|
|
|
|
|
# Notes: Upon entry to this routine, the file type has already been verified |
112
|
|
|
|
|
|
|
# as ZIP and the dirInfo hash contains a 'ZIP' Archive::Zip object reference |
113
|
|
|
|
|
|
|
sub Process_iWork($$) |
114
|
|
|
|
|
|
|
{ |
115
|
1
|
|
|
1
|
0
|
4
|
my ($et, $dirInfo) = @_; |
116
|
1
|
|
|
|
|
3
|
my $zip = $$dirInfo{ZIP}; |
117
|
1
|
|
|
|
|
2
|
my ($type, $index, $indexFile, $status); |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# try to determine the file type |
120
|
1
|
|
|
|
|
5
|
local $SIG{'__WARN__'} = \&Image::ExifTool::ZIP::WarnProc; |
121
|
|
|
|
|
|
|
# trust type given by file extension if available |
122
|
1
|
50
|
|
|
|
6
|
$type = $iWorkType{$$et{FILE_EXT}} if $$et{FILE_EXT}; |
123
|
1
|
50
|
|
|
|
114
|
unless ($type) { |
124
|
|
|
|
|
|
|
# read the index file |
125
|
0
|
|
|
|
|
0
|
my @members = $zip->membersMatching('^index\.(xml|apxl)$'); |
126
|
0
|
0
|
|
|
|
0
|
if (@members) { |
127
|
0
|
|
|
|
|
0
|
($index, $status) = $zip->contents($members[0]); |
128
|
0
|
0
|
|
|
|
0
|
unless ($status) { |
129
|
0
|
|
|
|
|
0
|
$indexFile = $members[0]->fileName(); |
130
|
0
|
0
|
|
|
|
0
|
if ($index =~ /^\s*<\?xml version=[^<]+<(\w+:\w+)/s) { |
131
|
0
|
0
|
|
|
|
0
|
$type = $iWorkType{$1} if $iWorkType{$1}; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} else { |
135
|
0
|
|
|
|
|
0
|
@members = $zip->membersMatching('(?i)^.*\.(pages|numbers|key)/Index.*'); |
136
|
0
|
0
|
|
|
|
0
|
if (@members) { |
137
|
0
|
|
|
|
|
0
|
my $tmp = $members[0]->fileName(); |
138
|
0
|
0
|
|
|
|
0
|
$type = $iWorkType{uc $1} if $tmp =~ /\.(pages|numbers|key)/i; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} |
141
|
0
|
0
|
|
|
|
0
|
$type or $type = 'ZIP'; # assume ZIP by default |
142
|
|
|
|
|
|
|
} |
143
|
1
|
|
|
|
|
12
|
$et->SetFileType($type, $mimeType{$type}); |
144
|
|
|
|
|
|
|
|
145
|
1
|
|
|
|
|
5
|
my @members = $zip->members(); |
146
|
1
|
|
|
|
|
8
|
my $docNum = 0; |
147
|
1
|
|
|
|
|
2
|
my $member; |
148
|
1
|
|
|
|
|
2
|
foreach $member (@members) { |
149
|
|
|
|
|
|
|
# get filename of this ZIP member |
150
|
4
|
|
|
|
|
12
|
my $file = $member->fileName(); |
151
|
4
|
50
|
|
|
|
34
|
next unless defined $file; |
152
|
4
|
|
|
|
|
18
|
$et->VPrint(0, "File: $file\n"); |
153
|
|
|
|
|
|
|
# set the document number and extract ZIP tags |
154
|
4
|
|
|
|
|
9
|
$$et{DOC_NUM} = ++$docNum; |
155
|
4
|
|
|
|
|
13
|
Image::ExifTool::ZIP::HandleMember($et, $member); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# process only the index XML and JPEG thumbnail/preview files |
158
|
4
|
100
|
|
|
|
26
|
next unless $file =~ m{^(index\.(xml|apxl)|QuickLook/Thumbnail\.jpg|[^/]+/preview(-micro|-web)?.jpg)$}i; |
159
|
|
|
|
|
|
|
# get the file contents if necessary |
160
|
|
|
|
|
|
|
# (CAREFUL! $buff MUST be local since we hand off a value ref to PreviewImage) |
161
|
2
|
|
|
|
|
5
|
my ($buff, $buffPt); |
162
|
2
|
50
|
33
|
|
|
6
|
if ($indexFile and $indexFile eq $file) { |
163
|
|
|
|
|
|
|
# use the index file we already loaded |
164
|
0
|
|
|
|
|
0
|
$buffPt = \$index; |
165
|
|
|
|
|
|
|
} else { |
166
|
2
|
|
|
|
|
13
|
($buff, $status) = $zip->contents($member); |
167
|
2
|
50
|
|
|
|
2251
|
$status and $et->Warn("Error extracting $file"), next; |
168
|
2
|
|
|
|
|
4
|
$buffPt = \$buff; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
# extract JPEG as PreviewImage (should only be QuickLook/Thumbnail.jpg) |
171
|
2
|
100
|
|
|
|
9
|
if ($file =~ /\.jpg$/) { |
172
|
1
|
0
|
|
|
|
4
|
my $type = ($file =~ /preview-(\w+)/) ? ($1 eq 'web' ? 'Other' : 'Thumbnail') : 'Preview'; |
|
|
50
|
|
|
|
|
|
173
|
1
|
|
|
|
|
6
|
$et->FoundTag($type . 'Image', $buffPt); |
174
|
1
|
|
|
|
|
4
|
next; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
# process "metadata" section of XML index file |
177
|
1
|
50
|
|
|
|
10
|
next unless $$buffPt =~ /<(\w+):metadata>/g; |
178
|
1
|
|
|
|
|
6
|
my $ns = $1; |
179
|
1
|
|
|
|
|
2
|
my $p1 = pos $$buffPt; |
180
|
1
|
50
|
|
|
|
16
|
next unless $$buffPt =~ m{${ns}:metadata>}g; |
181
|
|
|
|
|
|
|
# construct XML data from "metadata" section only |
182
|
1
|
|
|
|
|
5
|
$$buffPt = '' . substr($$buffPt, $p1, pos($$buffPt)-$p1); |
183
|
1
|
|
|
|
|
11
|
my %dirInfo = ( |
184
|
|
|
|
|
|
|
DataPt => $buffPt, |
185
|
|
|
|
|
|
|
DirLen => length $$buffPt, |
186
|
|
|
|
|
|
|
DataLen => length $$buffPt, |
187
|
|
|
|
|
|
|
XMPParseOpts => { |
188
|
|
|
|
|
|
|
FoundProc => \&FoundTag, |
189
|
|
|
|
|
|
|
}, |
190
|
|
|
|
|
|
|
); |
191
|
1
|
|
|
|
|
4
|
my $tagTablePtr = GetTagTable('Image::ExifTool::iWork::Main'); |
192
|
1
|
|
|
|
|
6
|
$et->ProcessDirectory(\%dirInfo, $tagTablePtr); |
193
|
1
|
|
|
|
|
6
|
undef $$buffPt; # (free memory now) |
194
|
|
|
|
|
|
|
} |
195
|
1
|
|
|
|
|
3
|
delete $$et{DOC_NUM}; |
196
|
1
|
|
|
|
|
6
|
return 1; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
1; # end |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
__END__ |