line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
########################################################### |
2
|
|
|
|
|
|
|
# A Perl package for showing/modifying JPEG (meta)data. # |
3
|
|
|
|
|
|
|
# Copyright (C) 2004,2005,2006 Stefano Bettelli # |
4
|
|
|
|
|
|
|
# See the COPYING and LICENSE files for license terms. # |
5
|
|
|
|
|
|
|
########################################################### |
6
|
15
|
|
|
15
|
|
69
|
use Image::MetaData::JPEG::data::Tables qw(:TagsAPP13); |
|
15
|
|
|
|
|
23
|
|
|
15
|
|
|
|
|
1895
|
|
7
|
15
|
|
|
15
|
|
85
|
no integer; |
|
15
|
|
|
|
|
22
|
|
|
15
|
|
|
|
|
71
|
|
8
|
15
|
|
|
15
|
|
267
|
use strict; |
|
15
|
|
|
|
|
23
|
|
|
15
|
|
|
|
|
353
|
|
9
|
15
|
|
|
15
|
|
55
|
use warnings; |
|
15
|
|
|
|
|
21
|
|
|
15
|
|
|
|
|
7723
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
########################################################### |
12
|
|
|
|
|
|
|
# This method parses an APP13 segment, often used by pho- # |
13
|
|
|
|
|
|
|
# to-manipulation programs to store IPTC (International # |
14
|
|
|
|
|
|
|
# Press Telecommunications Council) tags, although this # |
15
|
|
|
|
|
|
|
# isn't a formally defined standard (first adopted by # |
16
|
|
|
|
|
|
|
# Adobe). The structure of an APP13 segment is as follows # |
17
|
|
|
|
|
|
|
#---------------------------------------------------------# |
18
|
|
|
|
|
|
|
# 14 bytes identifier, e.g. "Photoshop 3.0\000" # |
19
|
|
|
|
|
|
|
# 8 bytes resolution (?, Photoshop 2.5 only) # |
20
|
|
|
|
|
|
|
# ..... sequence of Photoshop Image Resource blocks # |
21
|
|
|
|
|
|
|
#=========================================================# |
22
|
|
|
|
|
|
|
# The sequence of resource blocks may require additional # |
23
|
|
|
|
|
|
|
# APP13 markers, whose order is always to be preserved. # |
24
|
|
|
|
|
|
|
# TODO: implement parsing of multiple blocks!!!! # |
25
|
|
|
|
|
|
|
#=========================================================# |
26
|
|
|
|
|
|
|
# Ref: "Adobe Photoshop 6.0: File Formats Specifications",# |
27
|
|
|
|
|
|
|
# Adobe System Inc., ver.6.0, rel.2, November 2000. # |
28
|
|
|
|
|
|
|
# and "\"Solo\" Image File Format. RichTIFF and its # |
29
|
|
|
|
|
|
|
# replacement by \"Solo\" JFIF", version 2.0a, # |
30
|
|
|
|
|
|
|
# Coatsworth Comm. Inc., Brampton, Ontario, Canada # |
31
|
|
|
|
|
|
|
########################################################### |
32
|
|
|
|
|
|
|
sub parse_app13 { |
33
|
30
|
|
|
30
|
0
|
52
|
my ($this) = @_; |
34
|
30
|
|
|
|
|
42
|
my $offset = 0; |
35
|
|
|
|
|
|
|
# they say that this segment always starts with a specific |
36
|
|
|
|
|
|
|
# string from Adobe, namely "Photoshop 3.0\000". But some |
37
|
|
|
|
|
|
|
# old pics, with only non-IPTC data, use other strings ... |
38
|
|
|
|
|
|
|
# try all known possibilities and die if no match is found |
39
|
30
|
|
|
|
|
72
|
for my $good_id (@$APP13_PHOTOSHOP_IDS) { |
40
|
60
|
100
|
|
|
|
144
|
next if $this->size() < length $good_id; |
41
|
56
|
|
|
|
|
150
|
my $id = $this->read_record($UNDEF, 0, length $good_id); |
42
|
56
|
100
|
|
|
|
275
|
next unless $good_id eq $id; |
43
|
|
|
|
|
|
|
# store the identifier (and some additional bytes for ver.2.5 only) |
44
|
29
|
|
|
|
|
111
|
$this->store_record('Identifier', $ASCII, $offset, length $id); |
45
|
29
|
100
|
|
|
|
138
|
$this->store_record('Resolution', $SHORT, $offset, 4) if $id =~ /2\.5/; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
# Die if no identifier was found (show first ten characters) |
48
|
30
|
100
|
|
|
|
106
|
$this->die('Wrong identifier ('.$this->read_record($UNDEF, 0, 10).')') |
49
|
|
|
|
|
|
|
unless $this->search_record('Identifier'); |
50
|
|
|
|
|
|
|
# not much to do now, except calling repeatedly a method for |
51
|
|
|
|
|
|
|
# parsing resource data blocks. The argument is the current |
52
|
|
|
|
|
|
|
# offset, and the output is the new offset after the block |
53
|
29
|
|
|
|
|
100
|
$offset = $this->parse_resource_data_block($offset) |
54
|
|
|
|
|
|
|
while ($offset < $this->size()); |
55
|
|
|
|
|
|
|
# complain if we read a bit too much ... |
56
|
29
|
|
|
|
|
128
|
$this->test_size($offset, "parsed after segment end"); |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
########################################################### |
60
|
|
|
|
|
|
|
# This method parses an APP13 resource data block (TODO: # |
61
|
|
|
|
|
|
|
# blocks spanning multiple APP13s). Currently, it treates # |
62
|
|
|
|
|
|
|
# in details IPTC (International Press Telecommunications # |
63
|
|
|
|
|
|
|
# Council) blocks, and just saves the other tags (which # |
64
|
|
|
|
|
|
|
# are, however, in general, much simpler). The only argu- # |
65
|
|
|
|
|
|
|
# ment is the current offset in the data area of this # |
66
|
|
|
|
|
|
|
# object. The output is the new offset after this block. # |
67
|
|
|
|
|
|
|
# The structure of a resource data block is: # |
68
|
|
|
|
|
|
|
#---------------------------------------------------------# |
69
|
|
|
|
|
|
|
# 4 bytes type (Photoshop uses "8BIM" from v.6.0 on) # |
70
|
|
|
|
|
|
|
# 2 bytes unique identifier (e.g. "\004\004" for IPTC) # |
71
|
|
|
|
|
|
|
# 1 byte length of resource data block name # |
72
|
|
|
|
|
|
|
# .... name (padded to make size even incl. length) # |
73
|
|
|
|
|
|
|
# 4 bytes size of resource data (following data only) # |
74
|
|
|
|
|
|
|
# .... data (padded to make size even) # |
75
|
|
|
|
|
|
|
#---------------------------------------------------------# |
76
|
|
|
|
|
|
|
# The content of each Photoshop non-IPTC data block is # |
77
|
|
|
|
|
|
|
# transformed into a record and stored in a first-level # |
78
|
|
|
|
|
|
|
# subdirectory, depending on its type. The block type is, # |
79
|
|
|
|
|
|
|
# in fact, no more supposed to be '8BIM'; however, only # |
80
|
|
|
|
|
|
|
# some known values are accepted. The IPTC data block is # |
81
|
|
|
|
|
|
|
# instead analysed in detail, and all findings are stored # |
82
|
|
|
|
|
|
|
# in another (sub)directory tree. Empty subdirectories # |
83
|
|
|
|
|
|
|
# are not created. # |
84
|
|
|
|
|
|
|
#=========================================================# |
85
|
|
|
|
|
|
|
# Ref: "Adobe Photoshop 6.0: File Formats Specifications",# |
86
|
|
|
|
|
|
|
# Adobe System Inc., ver.6.0, rel.2, November 2000. # |
87
|
|
|
|
|
|
|
# and "\"Solo\" Image File Format. RichTIFF and its # |
88
|
|
|
|
|
|
|
# replacement by \"Solo\" JFIF", version 2.0a, # |
89
|
|
|
|
|
|
|
# Coatsworth Comm. Inc., Brampton, Ontario, Canada # |
90
|
|
|
|
|
|
|
########################################################### |
91
|
|
|
|
|
|
|
sub parse_resource_data_block { |
92
|
407
|
|
|
407
|
0
|
471
|
my ($this, $offset) = @_; |
93
|
|
|
|
|
|
|
# An "Adobe Phostoshop" block usually starts with "8BIM". |
94
|
|
|
|
|
|
|
# Accepted values are listed in @$APP13_PHOTOSHOP_TYPE. |
95
|
407
|
|
|
|
|
780
|
my $type = $this->read_record($ASCII, $offset, 4); |
96
|
1221
|
|
|
|
|
1777
|
$this->die("Wrong resource data block type ($type)") |
97
|
407
|
50
|
|
|
|
989
|
unless grep { $_ eq $type } @$APP13_PHOTOSHOP_TYPE; |
98
|
|
|
|
|
|
|
# then there is the block identifier |
99
|
407
|
|
|
|
|
799
|
my $identifier = $this->read_record($SHORT, $offset); |
100
|
|
|
|
|
|
|
# get the name length and the name. The length is the first byte. |
101
|
|
|
|
|
|
|
# The name can be padded so that length+name span an even number |
102
|
|
|
|
|
|
|
# of bytes. Usually the name is "" (the empty string, with length |
103
|
|
|
|
|
|
|
# 0, not "\000", which has length 1) so we get "\000\000" here. |
104
|
407
|
|
|
|
|
1242
|
my $name_length = $this->read_record($BYTE, $offset); |
105
|
407
|
|
|
|
|
1258
|
my $name = $this->read_record($ASCII, $offset, $name_length); |
106
|
|
|
|
|
|
|
# read the padding byte if length was even |
107
|
407
|
50
|
|
|
|
1560
|
$this->read_record($UNDEF, $offset, 1) if ($name_length % 2) == 0; |
108
|
|
|
|
|
|
|
# the next four bytes encode the resource data size. Also in this |
109
|
|
|
|
|
|
|
# case the total size must be padded to an even number of bytes |
110
|
407
|
|
|
|
|
1046
|
my $data_length = $this->read_record($LONG, $offset); |
111
|
407
|
100
|
|
|
|
1110
|
my $need_padding = ($data_length % 2) ? 1 : 0; |
112
|
|
|
|
|
|
|
# check that there is enough data for this block; obviously, this |
113
|
|
|
|
|
|
|
# break the case of a resource data block spanning multiple segments! |
114
|
407
|
|
|
|
|
908
|
$this->test_size($offset + $data_length + $need_padding, |
115
|
|
|
|
|
|
|
"in IPTC resource data block"); |
116
|
|
|
|
|
|
|
# calculate the absolute end of the resource data block |
117
|
407
|
|
|
|
|
423
|
my $boundary = $offset + $data_length; |
118
|
|
|
|
|
|
|
# Currently, the IPTC block deserves a special treatment: repeatedly |
119
|
|
|
|
|
|
|
# read data from the data block, up to an amount equal to $data_length. |
120
|
|
|
|
|
|
|
# The IPTC-parsing routine, as usual, returns the new working offset at |
121
|
|
|
|
|
|
|
# the end. The IPTC records are written in separate subdirectories. There |
122
|
|
|
|
|
|
|
# should be no resource block description for IPTC, make it an error. |
123
|
407
|
100
|
|
|
|
629
|
if ($identifier eq $APP13_PHOTOSHOP_IPTC) { |
124
|
24
|
50
|
|
|
|
62
|
$this->die("Non-empty IPTC resource block descriptor") if $name ne ''; |
125
|
24
|
|
|
|
|
127
|
$offset=$this->parse_IPTC_dataset($offset) while ($offset<$boundary); } |
126
|
|
|
|
|
|
|
# Less interesting tags are mistreated. However, they should not pollute |
127
|
|
|
|
|
|
|
# the root dir, so a subdirectory is used, which depends on $type. $name |
128
|
|
|
|
|
|
|
# is stored in the "extra" field for use at dump time. |
129
|
383
|
|
|
|
|
604
|
else { my $dirname = $APP13_PHOTOSHOP_DIRNAME . '_' . $type; |
130
|
383
|
|
|
|
|
716
|
my $dir = $this->provide_subdirectory($dirname); |
131
|
383
|
|
|
|
|
794
|
$this->store_record($dir,$identifier,$UNDEF,$offset,$data_length); |
132
|
383
|
50
|
|
|
|
655
|
$this->search_record('LAST_RECORD',$dir)->{extra} = $name if $name;} |
133
|
|
|
|
|
|
|
# pad, if you need padding ... |
134
|
407
|
100
|
|
|
|
619
|
++$offset if $need_padding; |
135
|
|
|
|
|
|
|
# that's it, return the working offset |
136
|
407
|
|
|
|
|
1111
|
return $offset; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
########################################################### |
140
|
|
|
|
|
|
|
# This method parses one dataset from an APP13 IPTC block # |
141
|
|
|
|
|
|
|
# and creates a corresponding record in the appropriate # |
142
|
|
|
|
|
|
|
# subdirectory (which depends on the IPTC record number). # |
143
|
|
|
|
|
|
|
# The $offset argument is a pointer in the segment data # |
144
|
|
|
|
|
|
|
# area, which must be returned updated at the end of the # |
145
|
|
|
|
|
|
|
# routine. An IPTC record is a sequence of datasets, # |
146
|
|
|
|
|
|
|
# which need not be in numerical order, unless otherwise # |
147
|
|
|
|
|
|
|
# specified. Each dataset consists of a unique tag and a # |
148
|
|
|
|
|
|
|
# data field. A standard tag is used when the data field # |
149
|
|
|
|
|
|
|
# size is less than 32768 bytes; otherwise, an extended # |
150
|
|
|
|
|
|
|
# tag is used. The structure of a dataset is: # |
151
|
|
|
|
|
|
|
#---------------------------------------------------------# |
152
|
|
|
|
|
|
|
# 1 byte tag marker (must be 0x1c) # |
153
|
|
|
|
|
|
|
# 1 byte record number (e.g., 2 for 2:xx datasets) # |
154
|
|
|
|
|
|
|
# 1 byte dataset number # |
155
|
|
|
|
|
|
|
# 2 bytes data length (< 32768 octets) or length of ... # |
156
|
|
|
|
|
|
|
# <....> data length (> 32767 bytes only) # |
157
|
|
|
|
|
|
|
# .... data (its length is specified before) # |
158
|
|
|
|
|
|
|
#=========================================================# |
159
|
|
|
|
|
|
|
# So, standard datasets have a 5 bytes tag; the last two # |
160
|
|
|
|
|
|
|
# bytes in the tag contain the data field length, the msb # |
161
|
|
|
|
|
|
|
# being always 0. For extended datasets instead, these # |
162
|
|
|
|
|
|
|
# two bytes contain the length of the (following) data # |
163
|
|
|
|
|
|
|
# field length, the msb being always 1. The value of the # |
164
|
|
|
|
|
|
|
# msb thus distinguishes "standard" from "extended"; in # |
165
|
|
|
|
|
|
|
# digital photographies, I assume that the datasets which # |
166
|
|
|
|
|
|
|
# are actually used (a subset of the standard) are always # |
167
|
|
|
|
|
|
|
# standard; therefore, we are likely not to have the IPTC # |
168
|
|
|
|
|
|
|
# record not spanning more than one APP13 segment. # |
169
|
|
|
|
|
|
|
#=========================================================# |
170
|
|
|
|
|
|
|
# The record types defined by the IPTC-NAA standard and # |
171
|
|
|
|
|
|
|
# the corresponding dataset ranges are: # |
172
|
|
|
|
|
|
|
# # |
173
|
|
|
|
|
|
|
# Object Envelop Record: 1:xx # |
174
|
|
|
|
|
|
|
# Application Records: 2:xx through 6:xx # |
175
|
|
|
|
|
|
|
# Pre-ObjectData Descriptor Record: 7:xx # |
176
|
|
|
|
|
|
|
# ObjectData Record: 8:xx # |
177
|
|
|
|
|
|
|
# Post-ObjectData Descriptor Record: 9:xx # |
178
|
|
|
|
|
|
|
# # |
179
|
|
|
|
|
|
|
# The Adobe "pseudo"-standard is usually restricted to # |
180
|
|
|
|
|
|
|
# the first application record, so it is unlikely, but # |
181
|
|
|
|
|
|
|
# not impossible, to find datasets outside of 2:xx. # |
182
|
|
|
|
|
|
|
# Record numbers should only be found in increasing # |
183
|
|
|
|
|
|
|
# order, but this rule is currently not enforced here. # |
184
|
|
|
|
|
|
|
#=========================================================# |
185
|
|
|
|
|
|
|
# Ref: "IPTC-NAA: Information Interchange Model Version 4"# |
186
|
|
|
|
|
|
|
# Comité Internat. des Télécommunications de Presse. # |
187
|
|
|
|
|
|
|
########################################################### |
188
|
|
|
|
|
|
|
sub parse_IPTC_dataset { |
189
|
370
|
|
|
370
|
0
|
378
|
my ($this, $offset) = @_; |
190
|
|
|
|
|
|
|
# check that there is enough data for the dataset header |
191
|
370
|
|
|
|
|
780
|
$this->test_size($offset + 5, "in IPTC dataset"); |
192
|
|
|
|
|
|
|
# each record is a sequence of variable length data sets read the |
193
|
|
|
|
|
|
|
# first four fields (five bytes), and store them in local variables. |
194
|
370
|
|
|
|
|
720
|
my $marker = $this->read_record($BYTE , $offset); |
195
|
370
|
|
|
|
|
1137
|
my $rnumber = $this->read_record($BYTE , $offset); |
196
|
370
|
|
|
|
|
1086
|
my $dataset = $this->read_record($BYTE , $offset); |
197
|
370
|
|
|
|
|
1099
|
my $length = $this->read_record($SHORT, $offset); |
198
|
|
|
|
|
|
|
# check that the tag marker is 0x1c as specified by the IPTC standard |
199
|
370
|
50
|
|
|
|
929
|
$this->die("Invalid IPTC tag marker ($marker)") |
200
|
|
|
|
|
|
|
if $marker ne $APP13_IPTC_TAGMARKER; |
201
|
|
|
|
|
|
|
# retrieve or create the correct subdirectory; this depends on |
202
|
|
|
|
|
|
|
# the record number (most often, it is 2, for 2:xx datasets) |
203
|
370
|
|
|
|
|
1089
|
my $dir = $this->provide_subdirectory("${APP13_IPTC_DIRNAME}_$rnumber"); |
204
|
|
|
|
|
|
|
# if $length has the msb set, then we are dealing with an |
205
|
|
|
|
|
|
|
# extended dataset. In this case, abort and write more code |
206
|
370
|
50
|
|
|
|
703
|
$this->die("IPTC extended datasets not yet supported") |
207
|
|
|
|
|
|
|
if $length & (0x01 << 15); |
208
|
|
|
|
|
|
|
# push a new record reference in the correct subdir. Use the |
209
|
|
|
|
|
|
|
# dataset number as identifier, the rest is strightforward |
210
|
|
|
|
|
|
|
# (assume that the data type is always ASCII). |
211
|
370
|
|
|
|
|
762
|
$this->store_record($dir, $dataset, $ASCII, $offset, $length); |
212
|
|
|
|
|
|
|
# return the update offset |
213
|
370
|
|
|
|
|
1078
|
return $offset; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# successful load |
217
|
|
|
|
|
|
|
1; |