| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | ########################################################### | 
| 2 |  |  |  |  |  |  | # A Perl package for showing/modifying JPEG (meta)data.   # | 
| 3 |  |  |  |  |  |  | # Copyright (C) 2004,2005,2006 Stefano Bettelli           # | 
| 4 |  |  |  |  |  |  | # See the COPYING and LICENSE files for license terms.    # | 
| 5 |  |  |  |  |  |  | ########################################################### | 
| 6 | 15 |  |  | 15 |  | 94 | use Image::MetaData::JPEG::data::Tables qw(:TagsAPP13); | 
|  | 15 |  |  |  |  | 31 |  | 
|  | 15 |  |  |  |  | 2780 |  | 
| 7 | 15 |  |  | 15 |  | 107 | no  integer; | 
|  | 15 |  |  |  |  | 34 |  | 
|  | 15 |  |  |  |  | 96 |  | 
| 8 | 15 |  |  | 15 |  | 338 | use strict; | 
|  | 15 |  |  |  |  | 31 |  | 
|  | 15 |  |  |  |  | 568 |  | 
| 9 | 15 |  |  | 15 |  | 81 | use warnings; | 
|  | 15 |  |  |  |  | 27 |  | 
|  | 15 |  |  |  |  | 18229 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | ########################################################### | 
| 12 |  |  |  |  |  |  | # This method parses an APP13 segment, often used by pho- # | 
| 13 |  |  |  |  |  |  | # to-manipulation programs to store IPTC (International   # | 
| 14 |  |  |  |  |  |  | # Press Telecommunications Council) tags, although this   # | 
| 15 |  |  |  |  |  |  | # isn't a formally defined standard (first adopted by     # | 
| 16 |  |  |  |  |  |  | # Adobe). The structure of an APP13 segment is as follows # | 
| 17 |  |  |  |  |  |  | #---------------------------------------------------------# | 
| 18 |  |  |  |  |  |  | # 14 bytes  identifier, e.g. "Photoshop 3.0\000"          # | 
| 19 |  |  |  |  |  |  | #  8 bytes  resolution (?, Photoshop 2.5 only)            # | 
| 20 |  |  |  |  |  |  | #   .....   sequence of Photoshop Image Resource blocks   # | 
| 21 |  |  |  |  |  |  | #=========================================================# | 
| 22 |  |  |  |  |  |  | # The sequence of resource blocks may require additional  # | 
| 23 |  |  |  |  |  |  | # APP13 markers, whose order is always to be preserved.   # | 
| 24 |  |  |  |  |  |  | # TODO: implement parsing of multiple blocks!!!!          # | 
| 25 |  |  |  |  |  |  | #=========================================================# | 
| 26 |  |  |  |  |  |  | # Ref: "Adobe Photoshop 6.0: File Formats Specifications",# | 
| 27 |  |  |  |  |  |  | #      Adobe System Inc., ver.6.0, rel.2, November 2000.  # | 
| 28 |  |  |  |  |  |  | # and  "\"Solo\" Image File Format. RichTIFF and its      # | 
| 29 |  |  |  |  |  |  | #       replacement by \"Solo\" JFIF", version 2.0a,      # | 
| 30 |  |  |  |  |  |  | #       Coatsworth Comm. Inc., Brampton, Ontario, Canada  # | 
| 31 |  |  |  |  |  |  | ########################################################### | 
| 32 |  |  |  |  |  |  | sub parse_app13 { | 
| 33 | 30 |  |  | 30 | 0 | 69 | my ($this) = @_; | 
| 34 | 30 |  |  |  |  | 63 | my $offset = 0; | 
| 35 |  |  |  |  |  |  | # they say that this segment always starts with a specific | 
| 36 |  |  |  |  |  |  | # string from Adobe, namely "Photoshop 3.0\000". But some | 
| 37 |  |  |  |  |  |  | # old pics, with only non-IPTC data, use other strings ... | 
| 38 |  |  |  |  |  |  | # try all known possibilities and die if no match is found | 
| 39 | 30 |  |  |  |  | 97 | for my $good_id (@$APP13_PHOTOSHOP_IDS) { | 
| 40 | 60 | 100 |  |  |  | 229 | next if $this->size() < length $good_id; | 
| 41 | 56 |  |  |  |  | 222 | my $id = $this->read_record($UNDEF, 0, length $good_id); | 
| 42 | 56 | 100 |  |  |  | 489 | next unless $good_id eq $id; | 
| 43 |  |  |  |  |  |  | # store the identifier (and some additional bytes for ver.2.5 only) | 
| 44 | 29 |  |  |  |  | 148 | $this->store_record('Identifier', $ASCII, $offset, length $id); | 
| 45 | 29 | 100 |  |  |  | 149 | $this->store_record('Resolution', $SHORT, $offset, 4) if $id =~ /2\.5/; | 
| 46 |  |  |  |  |  |  | } | 
| 47 |  |  |  |  |  |  | # Die if no identifier was found (show first ten characters) | 
| 48 | 30 | 100 |  |  |  | 137 | $this->die('Wrong identifier ('.$this->read_record($UNDEF, 0, 10).')') | 
| 49 |  |  |  |  |  |  | unless $this->search_record('Identifier'); | 
| 50 |  |  |  |  |  |  | # not much to do now, except calling repeatedly a method for | 
| 51 |  |  |  |  |  |  | # parsing resource data blocks. The argument is the current | 
| 52 |  |  |  |  |  |  | # offset, and the output is the new offset after the block | 
| 53 | 29 |  |  |  |  | 115 | $offset = $this->parse_resource_data_block($offset) | 
| 54 |  |  |  |  |  |  | while ($offset < $this->size()); | 
| 55 |  |  |  |  |  |  | # complain if we read a bit too much ... | 
| 56 | 29 |  |  |  |  | 125 | $this->test_size($offset, "parsed after segment end"); | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | ########################################################### | 
| 60 |  |  |  |  |  |  | # This method parses an APP13 resource data block (TODO:  # | 
| 61 |  |  |  |  |  |  | # blocks spanning multiple APP13s). Currently, it treates # | 
| 62 |  |  |  |  |  |  | # in details IPTC (International Press Telecommunications # | 
| 63 |  |  |  |  |  |  | # Council) blocks, and just saves the other tags (which   # | 
| 64 |  |  |  |  |  |  | # are, however, in general, much simpler). The only argu- # | 
| 65 |  |  |  |  |  |  | # ment is the current offset in the data area of this     # | 
| 66 |  |  |  |  |  |  | # object. The output is the new offset after this block.  # | 
| 67 |  |  |  |  |  |  | # The structure of a resource data block is:              # | 
| 68 |  |  |  |  |  |  | #---------------------------------------------------------# | 
| 69 |  |  |  |  |  |  | #  4 bytes  type (Photoshop uses "8BIM" from v.6.0 on)    # | 
| 70 |  |  |  |  |  |  | #  2 bytes  unique identifier (e.g. "\004\004" for IPTC)  # | 
| 71 |  |  |  |  |  |  | #  1 byte   length of resource data block name            # | 
| 72 |  |  |  |  |  |  | #   ....    name (padded to make size even incl. length)  # | 
| 73 |  |  |  |  |  |  | #  4 bytes  size of resource data (following data only)   # | 
| 74 |  |  |  |  |  |  | #   ....    data (padded to make size even)               # | 
| 75 |  |  |  |  |  |  | #---------------------------------------------------------# | 
| 76 |  |  |  |  |  |  | # The content of each Photoshop non-IPTC data block is    # | 
| 77 |  |  |  |  |  |  | # transformed into a record and stored in a first-level   # | 
| 78 |  |  |  |  |  |  | # subdirectory, depending on its type. The block type is, # | 
| 79 |  |  |  |  |  |  | # in fact, no more supposed to be '8BIM'; however, only   # | 
| 80 |  |  |  |  |  |  | # some known values are accepted. The IPTC data block is  # | 
| 81 |  |  |  |  |  |  | # instead analysed in detail, and all findings are stored # | 
| 82 |  |  |  |  |  |  | # in another (sub)directory tree. Empty subdirectories    # | 
| 83 |  |  |  |  |  |  | # are not created.                                        # | 
| 84 |  |  |  |  |  |  | #=========================================================# | 
| 85 |  |  |  |  |  |  | # Ref: "Adobe Photoshop 6.0: File Formats Specifications",# | 
| 86 |  |  |  |  |  |  | #      Adobe System Inc., ver.6.0, rel.2, November 2000.  # | 
| 87 |  |  |  |  |  |  | # and  "\"Solo\" Image File Format. RichTIFF and its      # | 
| 88 |  |  |  |  |  |  | #       replacement by \"Solo\" JFIF", version 2.0a,      # | 
| 89 |  |  |  |  |  |  | #       Coatsworth Comm. Inc., Brampton, Ontario, Canada  # | 
| 90 |  |  |  |  |  |  | ########################################################### | 
| 91 |  |  |  |  |  |  | sub parse_resource_data_block { | 
| 92 | 407 |  |  | 407 | 0 | 595 | my ($this, $offset) = @_; | 
| 93 |  |  |  |  |  |  | # An "Adobe Phostoshop" block usually starts with "8BIM". | 
| 94 |  |  |  |  |  |  | # Accepted values are listed in @$APP13_PHOTOSHOP_TYPE. | 
| 95 | 407 |  |  |  |  | 1505 | my $type = $this->read_record($ASCII, $offset, 4); | 
| 96 | 1221 |  |  |  |  | 2818 | $this->die("Wrong resource data block type ($type)") | 
| 97 | 407 | 50 |  |  |  | 2364 | unless grep { $_ eq $type } @$APP13_PHOTOSHOP_TYPE; | 
| 98 |  |  |  |  |  |  | # then there is the block identifier | 
| 99 | 407 |  |  |  |  | 1293 | my $identifier = $this->read_record($SHORT, $offset); | 
| 100 |  |  |  |  |  |  | # get the name length and the name. The length is the first byte. | 
| 101 |  |  |  |  |  |  | # The name can be padded so that length+name span an even number | 
| 102 |  |  |  |  |  |  | # of bytes. Usually the name is "" (the empty string, with length | 
| 103 |  |  |  |  |  |  | # 0, not "\000", which has length 1) so we get "\000\000" here. | 
| 104 | 407 |  |  |  |  | 2199 | my $name_length = $this->read_record($BYTE, $offset); | 
| 105 | 407 |  |  |  |  | 1830 | my $name = $this->read_record($ASCII, $offset, $name_length); | 
| 106 |  |  |  |  |  |  | # read the padding byte if length was even | 
| 107 | 407 | 50 |  |  |  | 3003 | $this->read_record($UNDEF, $offset, 1) if ($name_length % 2) == 0; | 
| 108 |  |  |  |  |  |  | # the next four bytes encode the resource data size. Also in this | 
| 109 |  |  |  |  |  |  | # case the total size must be padded to an even number of bytes | 
| 110 | 407 |  |  |  |  | 1916 | my $data_length = $this->read_record($LONG, $offset); | 
| 111 | 407 | 100 |  |  |  | 1468 | my $need_padding = ($data_length % 2) ? 1 : 0; | 
| 112 |  |  |  |  |  |  | # check that there is enough data for this block; obviously, this | 
| 113 |  |  |  |  |  |  | # break the case of a resource data block spanning multiple segments! | 
| 114 | 407 |  |  |  |  | 1574 | $this->test_size($offset + $data_length + $need_padding, | 
| 115 |  |  |  |  |  |  | "in IPTC resource data block"); | 
| 116 |  |  |  |  |  |  | # calculate the absolute end of the resource data block | 
| 117 | 407 |  |  |  |  | 674 | my $boundary = $offset + $data_length; | 
| 118 |  |  |  |  |  |  | # Currently, the IPTC block deserves a special treatment: repeatedly | 
| 119 |  |  |  |  |  |  | # read data from the data block, up to an amount equal to $data_length. | 
| 120 |  |  |  |  |  |  | # The IPTC-parsing routine, as usual, returns the new working offset at | 
| 121 |  |  |  |  |  |  | # the end. The IPTC records are written in separate subdirectories. There | 
| 122 |  |  |  |  |  |  | # should be no resource block description for IPTC, make it an error. | 
| 123 | 407 | 100 |  |  |  | 892 | if ($identifier eq $APP13_PHOTOSHOP_IPTC) { | 
| 124 | 24 | 50 |  |  |  | 74 | $this->die("Non-empty IPTC resource block descriptor") if $name ne ''; | 
| 125 | 24 |  |  |  |  | 194 | $offset=$this->parse_IPTC_dataset($offset) while ($offset<$boundary); } | 
| 126 |  |  |  |  |  |  | # Less interesting tags are mistreated. However, they should not pollute | 
| 127 |  |  |  |  |  |  | # the root dir, so a subdirectory is used, which depends on $type. $name | 
| 128 |  |  |  |  |  |  | # is stored in the "extra" field for use at dump time. | 
| 129 | 383 |  |  |  |  | 776 | else { my $dirname = $APP13_PHOTOSHOP_DIRNAME . '_' . $type; | 
| 130 | 383 |  |  |  |  | 1097 | my $dir = $this->provide_subdirectory($dirname); | 
| 131 | 383 |  |  |  |  | 1331 | $this->store_record($dir,$identifier,$UNDEF,$offset,$data_length); | 
| 132 | 383 | 50 |  |  |  | 976 | $this->search_record('LAST_RECORD',$dir)->{extra} = $name if $name;} | 
| 133 |  |  |  |  |  |  | # pad, if you need padding ... | 
| 134 | 407 | 100 |  |  |  | 905 | ++$offset if $need_padding; | 
| 135 |  |  |  |  |  |  | # that's it, return the working offset | 
| 136 | 407 |  |  |  |  | 2691 | return $offset; | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | ########################################################### | 
| 140 |  |  |  |  |  |  | # This method parses one dataset from an APP13 IPTC block # | 
| 141 |  |  |  |  |  |  | # and creates a corresponding record in the appropriate   # | 
| 142 |  |  |  |  |  |  | # subdirectory (which depends on the IPTC record number). # | 
| 143 |  |  |  |  |  |  | # The $offset argument is a pointer in the segment data   # | 
| 144 |  |  |  |  |  |  | # area, which must be returned updated at the end of the  # | 
| 145 |  |  |  |  |  |  | # routine. An IPTC record is a sequence of datasets,      # | 
| 146 |  |  |  |  |  |  | # which need not be in numerical order, unless otherwise  # | 
| 147 |  |  |  |  |  |  | # specified. Each dataset consists of a unique tag and a  # | 
| 148 |  |  |  |  |  |  | # data field. A standard tag is used when the data field  # | 
| 149 |  |  |  |  |  |  | # size is less than 32768 bytes; otherwise, an extended   # | 
| 150 |  |  |  |  |  |  | # tag is used. The structure of a dataset is:             # | 
| 151 |  |  |  |  |  |  | #---------------------------------------------------------# | 
| 152 |  |  |  |  |  |  | #  1 byte   tag marker (must be 0x1c)                     # | 
| 153 |  |  |  |  |  |  | #  1 byte   record number (e.g., 2 for 2:xx datasets)     # | 
| 154 |  |  |  |  |  |  | #  1 byte   dataset number                                # | 
| 155 |  |  |  |  |  |  | #  2 bytes  data length (< 32768 octets) or length of ... # | 
| 156 |  |  |  |  |  |  | #  <....>   data length (> 32767 bytes only)              # | 
| 157 |  |  |  |  |  |  | #   ....    data (its length is specified before)         # | 
| 158 |  |  |  |  |  |  | #=========================================================# | 
| 159 |  |  |  |  |  |  | # So, standard datasets have a 5 bytes tag; the last two  # | 
| 160 |  |  |  |  |  |  | # bytes in the tag contain the data field length, the msb # | 
| 161 |  |  |  |  |  |  | # being always 0. For extended datasets instead, these    # | 
| 162 |  |  |  |  |  |  | # two bytes contain the length of the (following) data    # | 
| 163 |  |  |  |  |  |  | # field length, the msb being always 1. The value of the  # | 
| 164 |  |  |  |  |  |  | # msb thus distinguishes "standard" from "extended"; in   # | 
| 165 |  |  |  |  |  |  | # digital photographies, I assume that the datasets which # | 
| 166 |  |  |  |  |  |  | # are actually used (a subset of the standard) are always # | 
| 167 |  |  |  |  |  |  | # standard; therefore, we are likely not to have the IPTC # | 
| 168 |  |  |  |  |  |  | # record not spanning more than one APP13 segment.        # | 
| 169 |  |  |  |  |  |  | #=========================================================# | 
| 170 |  |  |  |  |  |  | # The record types defined by the IPTC-NAA standard and   # | 
| 171 |  |  |  |  |  |  | # the corresponding dataset ranges are:                   # | 
| 172 |  |  |  |  |  |  | #                                                         # | 
| 173 |  |  |  |  |  |  | # Object Envelop Record:                       1:xx       # | 
| 174 |  |  |  |  |  |  | # Application Records:                  2:xx through 6:xx # | 
| 175 |  |  |  |  |  |  | # Pre-ObjectData Descriptor Record:            7:xx       # | 
| 176 |  |  |  |  |  |  | # ObjectData Record:                           8:xx       # | 
| 177 |  |  |  |  |  |  | # Post-ObjectData Descriptor Record:           9:xx       # | 
| 178 |  |  |  |  |  |  | #                                                         # | 
| 179 |  |  |  |  |  |  | # The Adobe "pseudo"-standard is usually restricted to    # | 
| 180 |  |  |  |  |  |  | # the first application record, so it is unlikely, but    # | 
| 181 |  |  |  |  |  |  | # not impossible, to find datasets outside of 2:xx.       # | 
| 182 |  |  |  |  |  |  | # Record numbers should only be found in increasing       # | 
| 183 |  |  |  |  |  |  | # order, but this rule is currently not enforced here.    # | 
| 184 |  |  |  |  |  |  | #=========================================================# | 
| 185 |  |  |  |  |  |  | # Ref: "IPTC-NAA: Information Interchange Model Version 4"# | 
| 186 |  |  |  |  |  |  | #      Comité Internat. des Télécommunications de Presse. # | 
| 187 |  |  |  |  |  |  | ########################################################### | 
| 188 |  |  |  |  |  |  | sub parse_IPTC_dataset { | 
| 189 | 370 |  |  | 370 | 0 | 545 | my ($this, $offset) = @_; | 
| 190 |  |  |  |  |  |  | # check that there is enough data for the dataset header | 
| 191 | 370 |  |  |  |  | 1308 | $this->test_size($offset + 5, "in IPTC dataset"); | 
| 192 |  |  |  |  |  |  | # each record is a sequence of variable length data sets read the | 
| 193 |  |  |  |  |  |  | # first four fields (five bytes), and store them in local variables. | 
| 194 | 370 |  |  |  |  | 1173 | my $marker  = $this->read_record($BYTE , $offset); | 
| 195 | 370 |  |  |  |  | 2161 | my $rnumber = $this->read_record($BYTE , $offset); | 
| 196 | 370 |  |  |  |  | 1643 | my $dataset = $this->read_record($BYTE , $offset); | 
| 197 | 370 |  |  |  |  | 1706 | my $length  = $this->read_record($SHORT, $offset); | 
| 198 |  |  |  |  |  |  | # check that the tag marker is 0x1c as specified by the IPTC standard | 
| 199 | 370 | 50 |  |  |  | 1502 | $this->die("Invalid IPTC tag marker ($marker)") | 
| 200 |  |  |  |  |  |  | if $marker ne $APP13_IPTC_TAGMARKER; | 
| 201 |  |  |  |  |  |  | # retrieve or create the correct subdirectory; this depends on | 
| 202 |  |  |  |  |  |  | # the record number (most often, it is 2, for 2:xx datasets) | 
| 203 | 370 |  |  |  |  | 1617 | my $dir = $this->provide_subdirectory("${APP13_IPTC_DIRNAME}_$rnumber"); | 
| 204 |  |  |  |  |  |  | # if $length has the msb set, then we are dealing with an | 
| 205 |  |  |  |  |  |  | # extended dataset. In this case, abort and write more code | 
| 206 | 370 | 50 |  |  |  | 974 | $this->die("IPTC extended datasets not yet supported") | 
| 207 |  |  |  |  |  |  | if $length & (0x01 << 15); | 
| 208 |  |  |  |  |  |  | # push a new record reference in the correct subdir. Use the | 
| 209 |  |  |  |  |  |  | # dataset number as identifier, the rest is strightforward | 
| 210 |  |  |  |  |  |  | # (assume that the data type is always ASCII). | 
| 211 | 370 |  |  |  |  | 1181 | $this->store_record($dir, $dataset, $ASCII, $offset, $length); | 
| 212 |  |  |  |  |  |  | # return the update offset | 
| 213 | 370 |  |  |  |  | 1902 | return $offset; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | # successful load | 
| 217 |  |  |  |  |  |  | 1; |