| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 2 |  |  |  |  |  |  | # File:         PDF.pm | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Description:  Read PDF meta information | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # Revisions:    07/11/2005 - P. Harvey Created | 
| 7 |  |  |  |  |  |  | #               07/25/2005 - P. Harvey Add support for encrypted documents | 
| 8 |  |  |  |  |  |  | # | 
| 9 |  |  |  |  |  |  | # References:   1) http://www.adobe.com/devnet/pdf/pdf_reference.html | 
| 10 |  |  |  |  |  |  | #               2) http://search.cpan.org/dist/Crypt-RC4/ | 
| 11 |  |  |  |  |  |  | #               3) http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf | 
| 12 |  |  |  |  |  |  | #               4) http://www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000.pdf | 
| 13 |  |  |  |  |  |  | #               5) http://tools.ietf.org/search/rfc3454 | 
| 14 |  |  |  |  |  |  | #               6) http://www.armware.dk/RFC/rfc/rfc4013.html | 
| 15 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | package Image::ExifTool::PDF; | 
| 18 |  |  |  |  |  |  |  | 
| 19 | 25 |  |  | 25 |  | 4841 | use strict; | 
|  | 25 |  |  |  |  | 70 |  | 
|  | 25 |  |  |  |  | 1095 |  | 
| 20 | 25 |  |  | 25 |  | 164 | use vars qw($VERSION $AUTOLOAD $lastFetched); | 
|  | 25 |  |  |  |  | 68 |  | 
|  | 25 |  |  |  |  | 1580 |  | 
| 21 | 25 |  |  | 25 |  | 164 | use Image::ExifTool qw(:DataAccess :Utils); | 
|  | 25 |  |  |  |  | 86 |  | 
|  | 25 |  |  |  |  | 351333 |  | 
| 22 |  |  |  |  |  |  | require Exporter; | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | $VERSION = '1.55'; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | sub FetchObject($$$$); | 
| 27 |  |  |  |  |  |  | sub ExtractObject($$;$$); | 
| 28 |  |  |  |  |  |  | sub ReadToNested($;$); | 
| 29 |  |  |  |  |  |  | sub ProcessDict($$$$;$$); | 
| 30 |  |  |  |  |  |  | sub ProcessAcroForm($$$$;$$); | 
| 31 |  |  |  |  |  |  | sub ExpandArray($); | 
| 32 |  |  |  |  |  |  | sub ReadPDFValue($); | 
| 33 |  |  |  |  |  |  | sub CheckPDF($$$); | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | # $lastFetched - last fetched object reference (used for decryption) | 
| 36 |  |  |  |  |  |  | #                (undefined if fetched object was already decrypted, eg. object from stream) | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | my $cryptInfo;      # encryption object reference (plus additional information) | 
| 39 |  |  |  |  |  |  | my $cryptString;    # flag that strings are encrypted | 
| 40 |  |  |  |  |  |  | my $cryptStream;    # flag that streams are encrypted | 
| 41 |  |  |  |  |  |  | my $lastOffset;     # last fetched object offset | 
| 42 |  |  |  |  |  |  | my %streamObjs;     # hash of stream objects | 
| 43 |  |  |  |  |  |  | my %fetched;        # dicts fetched in verbose mode (to avoid cyclical recursion) | 
| 44 |  |  |  |  |  |  | my $pdfVer;         # version of PDF file being processed | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | # filters supported in DecodeStream() | 
| 47 |  |  |  |  |  |  | my %supportedFilter = ( | 
| 48 |  |  |  |  |  |  | '/FlateDecode' => 1, | 
| 49 |  |  |  |  |  |  | '/Crypt' => 1, | 
| 50 |  |  |  |  |  |  | '/Identity' => 1,  # (not filtered) | 
| 51 |  |  |  |  |  |  | '/DCTDecode' => 1, # (JPEG image - not filtered) | 
| 52 |  |  |  |  |  |  | '/JPXDecode' => 1, # (Jpeg2000 image - not filtered) | 
| 53 |  |  |  |  |  |  | '/LZWDecode' => 1, # (usually a bitmapped image) | 
| 54 |  |  |  |  |  |  | '/ASCIIHexDecode' => 1, | 
| 55 |  |  |  |  |  |  | '/ASCII85Decode' => 1, | 
| 56 |  |  |  |  |  |  | # other standard filters that we currently don't support | 
| 57 |  |  |  |  |  |  | #'/JBIG2Decode' => 0, # (JBIG2 image format not supported) | 
| 58 |  |  |  |  |  |  | #'/CCITTFaxDecode' => 0, | 
| 59 |  |  |  |  |  |  | #'/RunLengthDecode' => 0, | 
| 60 |  |  |  |  |  |  | ); | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | # tags in main PDF directories | 
| 63 |  |  |  |  |  |  | %Image::ExifTool::PDF::Main = ( | 
| 64 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 65 |  |  |  |  |  |  | VARS => { CAPTURE => ['Main','Prev'] }, | 
| 66 |  |  |  |  |  |  | Info => { | 
| 67 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Info' }, | 
| 68 |  |  |  |  |  |  | # Adobe Acrobat 10.1.5 will create a duplicate Info dictionary with | 
| 69 |  |  |  |  |  |  | # a different object number when metadata is edited.  This flag | 
| 70 |  |  |  |  |  |  | # is part of a patch to ignore this duplicate information (unless | 
| 71 |  |  |  |  |  |  | # the IgnoreMinorErrors option is used) | 
| 72 |  |  |  |  |  |  | IgnoreDuplicates => 1, | 
| 73 |  |  |  |  |  |  | }, | 
| 74 |  |  |  |  |  |  | Root => { | 
| 75 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Root' }, | 
| 76 |  |  |  |  |  |  | }, | 
| 77 |  |  |  |  |  |  | Encrypt => { | 
| 78 |  |  |  |  |  |  | NoProcess => 1, # don't process normally (processed in advance) | 
| 79 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Encrypt' }, | 
| 80 |  |  |  |  |  |  | }, | 
| 81 |  |  |  |  |  |  | _linearized => { | 
| 82 |  |  |  |  |  |  | Name => 'Linearized', | 
| 83 |  |  |  |  |  |  | Notes => 'flag set if document is linearized for fast web display; not a real Tag ID', | 
| 84 |  |  |  |  |  |  | PrintConv => { 'true' => 'Yes', 'false' => 'No' }, | 
| 85 |  |  |  |  |  |  | }, | 
| 86 |  |  |  |  |  |  | ); | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | # tags in PDF Info dictionary | 
| 89 |  |  |  |  |  |  | %Image::ExifTool::PDF::Info = ( | 
| 90 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 91 |  |  |  |  |  |  | VARS => { CAPTURE => ['Info'] }, | 
| 92 |  |  |  |  |  |  | EXTRACT_UNKNOWN => 1, # extract all unknown tags in this directory | 
| 93 |  |  |  |  |  |  | WRITE_PROC => \&Image::ExifTool::DummyWriteProc, | 
| 94 |  |  |  |  |  |  | CHECK_PROC => \&CheckPDF, | 
| 95 |  |  |  |  |  |  | WRITABLE => 'string', | 
| 96 |  |  |  |  |  |  | # set PRIORITY to 0 so most recent Info dictionary takes precedence | 
| 97 |  |  |  |  |  |  | # (Acrobat Pro bug? doesn't use same object/generation number for | 
| 98 |  |  |  |  |  |  | #  new Info dictionary when doing incremental update) | 
| 99 |  |  |  |  |  |  | PRIORITY => 0, | 
| 100 |  |  |  |  |  |  | NOTES => q{ | 
| 101 |  |  |  |  |  |  | As well as the tags listed below, the PDF specification allows for | 
| 102 |  |  |  |  |  |  | user-defined tags to exist in the Info dictionary.  These tags, which should | 
| 103 |  |  |  |  |  |  | have corresponding XMP-pdfx entries in the XMP of the PDF XML Metadata | 
| 104 |  |  |  |  |  |  | object, are also extracted by ExifTool. | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | B specifies the value format, and may be C, C, | 
| 107 |  |  |  |  |  |  | C, C, C or C for PDF tags. | 
| 108 |  |  |  |  |  |  | }, | 
| 109 |  |  |  |  |  |  | Title       => { }, | 
| 110 |  |  |  |  |  |  | Author      => { Groups => { 2 => 'Author' } }, | 
| 111 |  |  |  |  |  |  | Subject     => { }, | 
| 112 |  |  |  |  |  |  | Keywords    => { List => 'string' },  # this is a string list | 
| 113 |  |  |  |  |  |  | Creator     => { }, | 
| 114 |  |  |  |  |  |  | Producer    => { }, | 
| 115 |  |  |  |  |  |  | CreationDate => { | 
| 116 |  |  |  |  |  |  | Name => 'CreateDate', | 
| 117 |  |  |  |  |  |  | Writable => 'date', | 
| 118 |  |  |  |  |  |  | Groups => { 2 => 'Time' }, | 
| 119 |  |  |  |  |  |  | Shift => 'Time', | 
| 120 |  |  |  |  |  |  | PrintConv => '$self->ConvertDateTime($val)', | 
| 121 |  |  |  |  |  |  | PrintConvInv => '$self->InverseDateTime($val)', | 
| 122 |  |  |  |  |  |  | }, | 
| 123 |  |  |  |  |  |  | ModDate => { | 
| 124 |  |  |  |  |  |  | Name => 'ModifyDate', | 
| 125 |  |  |  |  |  |  | Writable => 'date', | 
| 126 |  |  |  |  |  |  | Groups => { 2 => 'Time' }, | 
| 127 |  |  |  |  |  |  | Shift => 'Time', | 
| 128 |  |  |  |  |  |  | PrintConv => '$self->ConvertDateTime($val)', | 
| 129 |  |  |  |  |  |  | PrintConvInv => '$self->InverseDateTime($val)', | 
| 130 |  |  |  |  |  |  | }, | 
| 131 |  |  |  |  |  |  | Trapped => { | 
| 132 |  |  |  |  |  |  | Protected => 1, | 
| 133 |  |  |  |  |  |  | # remove leading '/' from '/True' or '/False' | 
| 134 |  |  |  |  |  |  | ValueConv => '$val=~s{^/}{}; $val', | 
| 135 |  |  |  |  |  |  | ValueConvInv => '"/$val"', | 
| 136 |  |  |  |  |  |  | }, | 
| 137 |  |  |  |  |  |  | 'AAPL:Keywords' => { #PH | 
| 138 |  |  |  |  |  |  | Name => 'AppleKeywords', | 
| 139 |  |  |  |  |  |  | List => 'array', # this is an array of values | 
| 140 |  |  |  |  |  |  | Notes => q{ | 
| 141 |  |  |  |  |  |  | keywords written by Apple utilities, although they seem to use PDF:Keywords | 
| 142 |  |  |  |  |  |  | when reading | 
| 143 |  |  |  |  |  |  | }, | 
| 144 |  |  |  |  |  |  | }, | 
| 145 |  |  |  |  |  |  | ); | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | # tags in the PDF Root document catalog | 
| 148 |  |  |  |  |  |  | %Image::ExifTool::PDF::Root = ( | 
| 149 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 150 |  |  |  |  |  |  | # note: can't capture previous versions of Root since they are not parsed | 
| 151 |  |  |  |  |  |  | VARS => { CAPTURE => ['Root'] }, | 
| 152 |  |  |  |  |  |  | NOTES => 'This is the PDF document catalog.', | 
| 153 |  |  |  |  |  |  | MarkInfo => { | 
| 154 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::MarkInfo' }, | 
| 155 |  |  |  |  |  |  | }, | 
| 156 |  |  |  |  |  |  | Metadata => { | 
| 157 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, | 
| 158 |  |  |  |  |  |  | }, | 
| 159 |  |  |  |  |  |  | Pages => { | 
| 160 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Pages' }, | 
| 161 |  |  |  |  |  |  | }, | 
| 162 |  |  |  |  |  |  | Perms => { | 
| 163 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Perms' }, | 
| 164 |  |  |  |  |  |  | }, | 
| 165 |  |  |  |  |  |  | AcroForm => { | 
| 166 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::AcroForm' }, | 
| 167 |  |  |  |  |  |  | }, | 
| 168 |  |  |  |  |  |  | Lang       => 'Language', | 
| 169 |  |  |  |  |  |  | PageLayout => { }, | 
| 170 |  |  |  |  |  |  | PageMode   => { }, | 
| 171 |  |  |  |  |  |  | Version    => 'PDFVersion', | 
| 172 |  |  |  |  |  |  | ); | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | # tags extracted from the PDF Encrypt dictionary | 
| 175 |  |  |  |  |  |  | %Image::ExifTool::PDF::Encrypt = ( | 
| 176 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 177 |  |  |  |  |  |  | NOTES => 'Tags extracted from the document Encrypt dictionary.', | 
| 178 |  |  |  |  |  |  | Filter => { | 
| 179 |  |  |  |  |  |  | Name => 'Encryption', | 
| 180 |  |  |  |  |  |  | Notes => q{ | 
| 181 |  |  |  |  |  |  | extracted value is actually a combination of the Filter, SubFilter, V, R and | 
| 182 |  |  |  |  |  |  | Length information from the Encrypt dictionary | 
| 183 |  |  |  |  |  |  | }, | 
| 184 |  |  |  |  |  |  | }, | 
| 185 |  |  |  |  |  |  | P => { | 
| 186 |  |  |  |  |  |  | Name => 'UserAccess', | 
| 187 |  |  |  |  |  |  | ValueConv => '$val & 0x0f3c',  # ignore reserved bits | 
| 188 |  |  |  |  |  |  | PrintConvColumns => 2, | 
| 189 |  |  |  |  |  |  | PrintConv => { BITMASK => { | 
| 190 |  |  |  |  |  |  | 2 => 'Print', | 
| 191 |  |  |  |  |  |  | 3 => 'Modify', | 
| 192 |  |  |  |  |  |  | 4 => 'Copy', | 
| 193 |  |  |  |  |  |  | 5 => 'Annotate', | 
| 194 |  |  |  |  |  |  | 8 => 'Fill forms', | 
| 195 |  |  |  |  |  |  | 9 => 'Extract', | 
| 196 |  |  |  |  |  |  | 10 => 'Assemble', | 
| 197 |  |  |  |  |  |  | 11 => 'Print high-res', | 
| 198 |  |  |  |  |  |  | }}, | 
| 199 |  |  |  |  |  |  | }, | 
| 200 |  |  |  |  |  |  | ); | 
| 201 |  |  |  |  |  |  |  | 
| 202 |  |  |  |  |  |  | # tags in PDF Pages dictionary | 
| 203 |  |  |  |  |  |  | %Image::ExifTool::PDF::Pages = ( | 
| 204 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 205 |  |  |  |  |  |  | Count => 'PageCount', | 
| 206 |  |  |  |  |  |  | Kids => { | 
| 207 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Kids' }, | 
| 208 |  |  |  |  |  |  | }, | 
| 209 |  |  |  |  |  |  | ); | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | # tags in PDF Perms dictionary | 
| 212 |  |  |  |  |  |  | %Image::ExifTool::PDF::Perms = ( | 
| 213 |  |  |  |  |  |  | NOTES => 'Additional document permissions imposed by digital signatures.', | 
| 214 |  |  |  |  |  |  | DocMDP => { | 
| 215 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, | 
| 216 |  |  |  |  |  |  | }, | 
| 217 |  |  |  |  |  |  | FieldMDP => { | 
| 218 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, | 
| 219 |  |  |  |  |  |  | }, | 
| 220 |  |  |  |  |  |  | UR3 => { | 
| 221 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Signature' }, | 
| 222 |  |  |  |  |  |  | }, | 
| 223 |  |  |  |  |  |  | ); | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | # tags in PDF Perms dictionary | 
| 226 |  |  |  |  |  |  | %Image::ExifTool::PDF::AcroForm = ( | 
| 227 |  |  |  |  |  |  | PROCESS_PROC => \&ProcessAcroForm, | 
| 228 |  |  |  |  |  |  | _has_xfa => { | 
| 229 |  |  |  |  |  |  | Name => 'HasXFA', | 
| 230 |  |  |  |  |  |  | Notes => q{ | 
| 231 |  |  |  |  |  |  | this tag is defined if a document contains form fields, and is true if it | 
| 232 |  |  |  |  |  |  | uses XML Forms Architecture; not a real Tag ID | 
| 233 |  |  |  |  |  |  | }, | 
| 234 |  |  |  |  |  |  | PrintConv => { 'true' => 'Yes', 'false' => 'No' }, | 
| 235 |  |  |  |  |  |  | }, | 
| 236 |  |  |  |  |  |  | ); | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | # tags in PDF Kids dictionary | 
| 239 |  |  |  |  |  |  | %Image::ExifTool::PDF::Kids = ( | 
| 240 |  |  |  |  |  |  | Metadata => { | 
| 241 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, | 
| 242 |  |  |  |  |  |  | }, | 
| 243 |  |  |  |  |  |  | PieceInfo => { | 
| 244 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::PieceInfo' }, | 
| 245 |  |  |  |  |  |  | }, | 
| 246 |  |  |  |  |  |  | Resources => { | 
| 247 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Resources' }, | 
| 248 |  |  |  |  |  |  | }, | 
| 249 |  |  |  |  |  |  | Kids => { | 
| 250 |  |  |  |  |  |  | Condition => '$self->Options("ExtractEmbedded")', | 
| 251 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Kids' }, | 
| 252 |  |  |  |  |  |  | }, | 
| 253 |  |  |  |  |  |  | ); | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | # tags in PDF Resources dictionary | 
| 256 |  |  |  |  |  |  | %Image::ExifTool::PDF::Resources = ( | 
| 257 |  |  |  |  |  |  | ColorSpace => { | 
| 258 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::ColorSpace' }, | 
| 259 |  |  |  |  |  |  | }, | 
| 260 |  |  |  |  |  |  | XObject => { | 
| 261 |  |  |  |  |  |  | Condition => '$self->Options("ExtractEmbedded")', | 
| 262 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::XObject' }, | 
| 263 |  |  |  |  |  |  | }, | 
| 264 |  |  |  |  |  |  | Properties => { | 
| 265 |  |  |  |  |  |  | Condition => '$self->Options("ExtractEmbedded")', | 
| 266 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Properties' }, | 
| 267 |  |  |  |  |  |  | }, | 
| 268 |  |  |  |  |  |  | ); | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | # tags in PDF ColorSpace dictionary | 
| 271 |  |  |  |  |  |  | %Image::ExifTool::PDF::ColorSpace = ( | 
| 272 |  |  |  |  |  |  | DefaultRGB => { | 
| 273 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, | 
| 274 |  |  |  |  |  |  | ConvertToDict => 1, # (not seen yet, but just in case) | 
| 275 |  |  |  |  |  |  | }, | 
| 276 |  |  |  |  |  |  | DefaultCMYK => { | 
| 277 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, | 
| 278 |  |  |  |  |  |  | # hack: this is stored as an array instead of a dictionary in my | 
| 279 |  |  |  |  |  |  | # sample, so convert to a dictionary to extract the ICCBased element | 
| 280 |  |  |  |  |  |  | ConvertToDict => 1, | 
| 281 |  |  |  |  |  |  | }, | 
| 282 |  |  |  |  |  |  | Cs1 => { | 
| 283 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, | 
| 284 |  |  |  |  |  |  | ConvertToDict => 1, # (just in case) | 
| 285 |  |  |  |  |  |  | }, | 
| 286 |  |  |  |  |  |  | CS0 => { | 
| 287 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::DefaultRGB' }, | 
| 288 |  |  |  |  |  |  | ConvertToDict => 1, # (just in case) | 
| 289 |  |  |  |  |  |  | }, | 
| 290 |  |  |  |  |  |  | ); | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | # tags in PDF DefaultRGB dictionary | 
| 293 |  |  |  |  |  |  | %Image::ExifTool::PDF::DefaultRGB = ( | 
| 294 |  |  |  |  |  |  | ICCBased => { | 
| 295 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::ICCBased' }, | 
| 296 |  |  |  |  |  |  | }, | 
| 297 |  |  |  |  |  |  | ); | 
| 298 |  |  |  |  |  |  |  | 
| 299 |  |  |  |  |  |  | # tags in PDF ICCBased, Cs1 and CS0 dictionaries | 
| 300 |  |  |  |  |  |  | %Image::ExifTool::PDF::ICCBased = ( | 
| 301 |  |  |  |  |  |  | _stream => { | 
| 302 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::ICC_Profile::Main' }, | 
| 303 |  |  |  |  |  |  | }, | 
| 304 |  |  |  |  |  |  | ); | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | # tags in PDF XObject dictionary (parsed only if ExtractEmbedded is enabled) | 
| 307 |  |  |  |  |  |  | %Image::ExifTool::PDF::XObject = ( | 
| 308 |  |  |  |  |  |  | EXTRACT_UNKNOWN => 0,   # extract known but numbered tags (Im1, Im2, etc) | 
| 309 |  |  |  |  |  |  | Im => { | 
| 310 |  |  |  |  |  |  | Notes => q{ | 
| 311 |  |  |  |  |  |  | the L option enables information to be extracted from these | 
| 312 |  |  |  |  |  |  | embedded images | 
| 313 |  |  |  |  |  |  | }, | 
| 314 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Im' }, | 
| 315 |  |  |  |  |  |  | }, | 
| 316 |  |  |  |  |  |  | ); | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | # tags in PDF Im# dictionary | 
| 319 |  |  |  |  |  |  | %Image::ExifTool::PDF::Im = ( | 
| 320 |  |  |  |  |  |  | NOTES => q{ | 
| 321 |  |  |  |  |  |  | Information extracted from embedded images with the L option. | 
| 322 |  |  |  |  |  |  | The EmbeddedImage and its metadata are extracted only for JPEG and Jpeg2000 | 
| 323 |  |  |  |  |  |  | image formats. | 
| 324 |  |  |  |  |  |  | }, | 
| 325 |  |  |  |  |  |  | Width => 'EmbeddedImageWidth', | 
| 326 |  |  |  |  |  |  | Height => 'EmbeddedImageHeight', | 
| 327 |  |  |  |  |  |  | Filter => { Name => 'EmbeddedImageFilter', List => 1 }, | 
| 328 |  |  |  |  |  |  | ColorSpace => { | 
| 329 |  |  |  |  |  |  | Name => 'EmbeddedImageColorSpace', | 
| 330 |  |  |  |  |  |  | List => 1, | 
| 331 |  |  |  |  |  |  | RawConv => 'ref $val ? undef : $val', # (ignore color space data) | 
| 332 |  |  |  |  |  |  | }, | 
| 333 |  |  |  |  |  |  | Image_stream => { | 
| 334 |  |  |  |  |  |  | Name => 'EmbeddedImage', | 
| 335 |  |  |  |  |  |  | Groups => { 2 => 'Preview' }, | 
| 336 |  |  |  |  |  |  | Binary => 1, | 
| 337 |  |  |  |  |  |  | }, | 
| 338 |  |  |  |  |  |  | ); | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | # tags in PDF Properties dictionary | 
| 341 |  |  |  |  |  |  | %Image::ExifTool::PDF::Properties = ( | 
| 342 |  |  |  |  |  |  | EXTRACT_UNKNOWN => 0,   # extract known but numbered tags (MC0, MC1, etc) | 
| 343 |  |  |  |  |  |  | MC => { | 
| 344 |  |  |  |  |  |  | Notes => q{ | 
| 345 |  |  |  |  |  |  | the L option enables information to be extracted from these | 
| 346 |  |  |  |  |  |  | embedded metadata dictionaries | 
| 347 |  |  |  |  |  |  | }, | 
| 348 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::MC' }, | 
| 349 |  |  |  |  |  |  | } | 
| 350 |  |  |  |  |  |  | ); | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | # tags in PDF MC# dictionary | 
| 353 |  |  |  |  |  |  | %Image::ExifTool::PDF::MC = ( | 
| 354 |  |  |  |  |  |  | Metadata => { | 
| 355 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Metadata' }, | 
| 356 |  |  |  |  |  |  | } | 
| 357 |  |  |  |  |  |  | ); | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | # tags in PDF PieceInfo dictionary | 
| 360 |  |  |  |  |  |  | %Image::ExifTool::PDF::PieceInfo = ( | 
| 361 |  |  |  |  |  |  | AdobePhotoshop => { | 
| 362 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::AdobePhotoshop' }, | 
| 363 |  |  |  |  |  |  | }, | 
| 364 |  |  |  |  |  |  | Illustrator => { | 
| 365 |  |  |  |  |  |  | # assume this is an illustrator file if it contains this directory | 
| 366 |  |  |  |  |  |  | # and doesn't have a ".PDF" extension | 
| 367 |  |  |  |  |  |  | Condition => q{ | 
| 368 |  |  |  |  |  |  | $self->OverrideFileType("AI") unless $$self{FILE_EXT} and $$self{FILE_EXT} eq 'PDF'; | 
| 369 |  |  |  |  |  |  | return 1; | 
| 370 |  |  |  |  |  |  | }, | 
| 371 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Illustrator' }, | 
| 372 |  |  |  |  |  |  | }, | 
| 373 |  |  |  |  |  |  | ); | 
| 374 |  |  |  |  |  |  |  | 
| 375 |  |  |  |  |  |  | # tags in PDF AdobePhotoshop dictionary | 
| 376 |  |  |  |  |  |  | %Image::ExifTool::PDF::AdobePhotoshop = ( | 
| 377 |  |  |  |  |  |  | Private => { | 
| 378 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Private' }, | 
| 379 |  |  |  |  |  |  | }, | 
| 380 |  |  |  |  |  |  | ); | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | # tags in PDF Illustrator dictionary | 
| 383 |  |  |  |  |  |  | %Image::ExifTool::PDF::Illustrator = ( | 
| 384 |  |  |  |  |  |  | Private => { | 
| 385 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::AIPrivate' }, | 
| 386 |  |  |  |  |  |  | }, | 
| 387 |  |  |  |  |  |  | ); | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | # tags in PDF Private dictionary | 
| 390 |  |  |  |  |  |  | %Image::ExifTool::PDF::Private = ( | 
| 391 |  |  |  |  |  |  | ImageResources => { | 
| 392 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::ImageResources' }, | 
| 393 |  |  |  |  |  |  | }, | 
| 394 |  |  |  |  |  |  | ); | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  | # tags in PDF AI Private dictionary | 
| 397 |  |  |  |  |  |  | %Image::ExifTool::PDF::AIPrivate = ( | 
| 398 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 399 |  |  |  |  |  |  | EXTRACT_UNKNOWN => 0,   # extract known but numbered tags | 
| 400 |  |  |  |  |  |  | AIMetaData => { | 
| 401 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::AIMetaData' }, | 
| 402 |  |  |  |  |  |  | }, | 
| 403 |  |  |  |  |  |  | AIPrivateData => { | 
| 404 |  |  |  |  |  |  | Notes => q{ | 
| 405 |  |  |  |  |  |  | the L option enables information to be extracted from embedded | 
| 406 |  |  |  |  |  |  | PostScript documents in the AIPrivateData# and AIPDFPrivateData# streams | 
| 407 |  |  |  |  |  |  | }, | 
| 408 |  |  |  |  |  |  | JoinStreams => 1,   # join streams from numbered tags and process as one | 
| 409 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, | 
| 410 |  |  |  |  |  |  | }, | 
| 411 |  |  |  |  |  |  | AIPDFPrivateData => { | 
| 412 |  |  |  |  |  |  | JoinStreams => 1,   # join streams from numbered tags and process as one | 
| 413 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, | 
| 414 |  |  |  |  |  |  | }, | 
| 415 |  |  |  |  |  |  | RoundTripVersion => { }, | 
| 416 |  |  |  |  |  |  | ContainerVersion => { }, | 
| 417 |  |  |  |  |  |  | CreatorVersion => { }, | 
| 418 |  |  |  |  |  |  | ); | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | # tags in PDF AIMetaData dictionary | 
| 421 |  |  |  |  |  |  | %Image::ExifTool::PDF::AIMetaData = ( | 
| 422 |  |  |  |  |  |  | _stream => { | 
| 423 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PostScript::Main' }, | 
| 424 |  |  |  |  |  |  | }, | 
| 425 |  |  |  |  |  |  | ); | 
| 426 |  |  |  |  |  |  |  | 
| 427 |  |  |  |  |  |  | # tags in PDF ImageResources dictionary | 
| 428 |  |  |  |  |  |  | %Image::ExifTool::PDF::ImageResources = ( | 
| 429 |  |  |  |  |  |  | _stream => { | 
| 430 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::Photoshop::Main' }, | 
| 431 |  |  |  |  |  |  | }, | 
| 432 |  |  |  |  |  |  | ); | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | # tags in PDF MarkInfo dictionary | 
| 435 |  |  |  |  |  |  | %Image::ExifTool::PDF::MarkInfo = ( | 
| 436 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 437 |  |  |  |  |  |  | Marked => { | 
| 438 |  |  |  |  |  |  | Name => 'TaggedPDF', | 
| 439 |  |  |  |  |  |  | Notes => "not a Tagged PDF if this tag is missing", | 
| 440 |  |  |  |  |  |  | PrintConv => { 'true' => 'Yes', 'false' => 'No' }, | 
| 441 |  |  |  |  |  |  | }, | 
| 442 |  |  |  |  |  |  | ); | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | # tags in PDF Metadata dictionary | 
| 445 |  |  |  |  |  |  | %Image::ExifTool::PDF::Metadata = ( | 
| 446 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 447 |  |  |  |  |  |  | XML_stream => { # this is the stream for a Subtype /XML dictionary (not a real tag) | 
| 448 |  |  |  |  |  |  | Name => 'XMP', | 
| 449 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::XMP::Main' }, | 
| 450 |  |  |  |  |  |  | }, | 
| 451 |  |  |  |  |  |  | ); | 
| 452 |  |  |  |  |  |  |  | 
| 453 |  |  |  |  |  |  | # tags in PDF signature directories (DocMDP, FieldMDP or UR3) | 
| 454 |  |  |  |  |  |  | %Image::ExifTool::PDF::Signature = ( | 
| 455 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 456 |  |  |  |  |  |  | ContactInfo => 'SignerContactInfo', | 
| 457 |  |  |  |  |  |  | Location => 'SigningLocation', | 
| 458 |  |  |  |  |  |  | M => { | 
| 459 |  |  |  |  |  |  | Name => 'SigningDate', | 
| 460 |  |  |  |  |  |  | Format => 'date', | 
| 461 |  |  |  |  |  |  | Groups => { 2 => 'Time' }, | 
| 462 |  |  |  |  |  |  | PrintConv => '$self->ConvertDateTime($val)', | 
| 463 |  |  |  |  |  |  | }, | 
| 464 |  |  |  |  |  |  | Name     => 'SigningAuthority', | 
| 465 |  |  |  |  |  |  | Reason   => 'SigningReason', | 
| 466 |  |  |  |  |  |  | Reference => { | 
| 467 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Reference' }, | 
| 468 |  |  |  |  |  |  | }, | 
| 469 |  |  |  |  |  |  | Prop_AuthTime => { | 
| 470 |  |  |  |  |  |  | Name => 'AuthenticationTime', | 
| 471 |  |  |  |  |  |  | PrintConv => 'ConvertTimeSpan($val) . " ago"', | 
| 472 |  |  |  |  |  |  | }, | 
| 473 |  |  |  |  |  |  | Prop_AuthType => 'AuthenticationType', | 
| 474 |  |  |  |  |  |  | ); | 
| 475 |  |  |  |  |  |  |  | 
| 476 |  |  |  |  |  |  | # tags in PDF Reference dictionary | 
| 477 |  |  |  |  |  |  | %Image::ExifTool::PDF::Reference = ( | 
| 478 |  |  |  |  |  |  | TransformParams => { | 
| 479 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::TransformParams' }, | 
| 480 |  |  |  |  |  |  | }, | 
| 481 |  |  |  |  |  |  | ); | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | # tags in PDF TransformParams dictionary | 
| 484 |  |  |  |  |  |  | %Image::ExifTool::PDF::TransformParams = ( | 
| 485 |  |  |  |  |  |  | GROUPS => { 2 => 'Document' }, | 
| 486 |  |  |  |  |  |  | Annots => { | 
| 487 |  |  |  |  |  |  | Name => 'AnnotationUsageRights', | 
| 488 |  |  |  |  |  |  | Notes => q{ | 
| 489 |  |  |  |  |  |  | possible values are Create, Delete, Modify, Copy, Import and Export; | 
| 490 |  |  |  |  |  |  | additional values for UR3 signatures are Online and SummaryView | 
| 491 |  |  |  |  |  |  | }, | 
| 492 |  |  |  |  |  |  | List => 1, | 
| 493 |  |  |  |  |  |  | }, | 
| 494 |  |  |  |  |  |  | Document => { | 
| 495 |  |  |  |  |  |  | Name => 'DocumentUsageRights', | 
| 496 |  |  |  |  |  |  | Notes => 'only possible value is FullSave', | 
| 497 |  |  |  |  |  |  | List => 1, | 
| 498 |  |  |  |  |  |  | }, | 
| 499 |  |  |  |  |  |  | Form => { | 
| 500 |  |  |  |  |  |  | Name => 'FormUsageRights', | 
| 501 |  |  |  |  |  |  | Notes => q{ | 
| 502 |  |  |  |  |  |  | possible values are FillIn, Import, Export, SubmitStandalone and | 
| 503 |  |  |  |  |  |  | SpawnTemplate; additional values for UR3 signatures are BarcodePlaintext and | 
| 504 |  |  |  |  |  |  | Online | 
| 505 |  |  |  |  |  |  | }, | 
| 506 |  |  |  |  |  |  | List => 1, | 
| 507 |  |  |  |  |  |  | }, | 
| 508 |  |  |  |  |  |  | FormEX => { | 
| 509 |  |  |  |  |  |  | Name => 'FormExtraUsageRights', | 
| 510 |  |  |  |  |  |  | Notes => 'UR signatures only; only possible value is BarcodePlaintext', | 
| 511 |  |  |  |  |  |  | List => 1, | 
| 512 |  |  |  |  |  |  | }, | 
| 513 |  |  |  |  |  |  | Signature => { | 
| 514 |  |  |  |  |  |  | Name => 'SignatureUsageRights', | 
| 515 |  |  |  |  |  |  | Notes => 'only possible value is Modify', | 
| 516 |  |  |  |  |  |  | List => 1, | 
| 517 |  |  |  |  |  |  | }, | 
| 518 |  |  |  |  |  |  | EF => { | 
| 519 |  |  |  |  |  |  | Name => 'EmbeddedFileUsageRights', | 
| 520 |  |  |  |  |  |  | Notes => 'possible values are Create, Delete, Modify and Import', | 
| 521 |  |  |  |  |  |  | List => 1, | 
| 522 |  |  |  |  |  |  | }, | 
| 523 |  |  |  |  |  |  | Msg => 'UsageRightsMessage', | 
| 524 |  |  |  |  |  |  | P => { | 
| 525 |  |  |  |  |  |  | Name => 'ModificationPermissions', | 
| 526 |  |  |  |  |  |  | Notes => q{ | 
| 527 |  |  |  |  |  |  | 1-3 for DocMDP signatures, default 2; true/false for UR3 signatures, default | 
| 528 |  |  |  |  |  |  | false | 
| 529 |  |  |  |  |  |  | }, | 
| 530 |  |  |  |  |  |  | PrintConv => { | 
| 531 |  |  |  |  |  |  | 1 => 'No changes permitted', | 
| 532 |  |  |  |  |  |  | 2 => 'Fill forms, Create page templates, Sign', | 
| 533 |  |  |  |  |  |  | 3 => 'Fill forms, Create page templates, Sign, Create/Delete/Edit annotations', | 
| 534 |  |  |  |  |  |  | 'true' => 'Restrict all applications to reader permissions', | 
| 535 |  |  |  |  |  |  | 'false' => 'Do not restrict applications to reader permissions', | 
| 536 |  |  |  |  |  |  | }, | 
| 537 |  |  |  |  |  |  | }, | 
| 538 |  |  |  |  |  |  | Action => { | 
| 539 |  |  |  |  |  |  | Name => 'FieldPermissions', | 
| 540 |  |  |  |  |  |  | Notes => 'FieldMDP signatures only', | 
| 541 |  |  |  |  |  |  | PrintConv => { | 
| 542 |  |  |  |  |  |  | 'All' => 'Disallow changes to all form fields', | 
| 543 |  |  |  |  |  |  | 'Include' => 'Disallow changes to specified form fields', | 
| 544 |  |  |  |  |  |  | 'Exclude' => 'Allow changes to specified form fields', | 
| 545 |  |  |  |  |  |  | }, | 
| 546 |  |  |  |  |  |  | }, | 
| 547 |  |  |  |  |  |  | Fields => { | 
| 548 |  |  |  |  |  |  | Notes => 'FieldMDP signatures only', | 
| 549 |  |  |  |  |  |  | Name => 'FormFields', | 
| 550 |  |  |  |  |  |  | List => 1, | 
| 551 |  |  |  |  |  |  | }, | 
| 552 |  |  |  |  |  |  | ); | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | # unknown tags for use in verbose option | 
| 555 |  |  |  |  |  |  | %Image::ExifTool::PDF::Unknown = ( | 
| 556 |  |  |  |  |  |  | GROUPS => { 2 => 'Unknown' }, | 
| 557 |  |  |  |  |  |  | ); | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 560 |  |  |  |  |  |  | # AutoLoad our writer routines when necessary | 
| 561 |  |  |  |  |  |  | # | 
| 562 |  |  |  |  |  |  | sub AUTOLOAD | 
| 563 |  |  |  |  |  |  | { | 
| 564 | 19 |  |  | 19 |  | 142 | return Image::ExifTool::DoAutoLoad($AUTOLOAD, @_); | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 568 |  |  |  |  |  |  | # Convert from PDF to EXIF-style date/time | 
| 569 |  |  |  |  |  |  | # Inputs: 0) PDF date/time string (D:YYYYmmddHHMMSS+HH'MM') | 
| 570 |  |  |  |  |  |  | # Returns: EXIF date string (YYYY:mm:dd HH:MM:SS+HH:MM) | 
| 571 |  |  |  |  |  |  | sub ConvertPDFDate($) | 
| 572 |  |  |  |  |  |  | { | 
| 573 | 10 |  |  | 10 | 0 | 25 | my $date = shift; | 
| 574 |  |  |  |  |  |  | # remove optional 'D:' prefix | 
| 575 | 10 |  |  |  |  | 52 | $date =~ s/^D://; | 
| 576 |  |  |  |  |  |  | # fill in default values if necessary | 
| 577 |  |  |  |  |  |  | #              YYYYmmddHHMMSS | 
| 578 | 10 |  |  |  |  | 25 | my $default = '00000101000000'; | 
| 579 | 10 | 50 |  |  |  | 31 | if (length $date < length $default) { | 
| 580 | 0 |  |  |  |  | 0 | $date .= substr($default, length $date); | 
| 581 |  |  |  |  |  |  | } | 
| 582 | 10 | 50 |  |  |  | 46 | $date =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})(.*)/ or return $date; | 
| 583 | 10 |  |  |  |  | 73 | $date = "$1:$2:$3 $4:$5:$6"; | 
| 584 | 10 | 50 |  |  |  | 43 | if ($7) { | 
| 585 | 10 |  |  |  |  | 22 | my $tz = $7; | 
| 586 | 10 | 50 |  |  |  | 64 | if ($tz =~ /^\s*Z/i) { | 
|  |  | 50 |  |  |  |  |  | 
| 587 |  |  |  |  |  |  | # ignore any "HH'mm'" after the Z (OS X 10.6 does this) | 
| 588 | 0 |  |  |  |  | 0 | $date .= 'Z'; | 
| 589 |  |  |  |  |  |  | # tolerate some improper formatting in timezone specification | 
| 590 |  |  |  |  |  |  | } elsif ($tz =~ /^\s*([-+])\s*(\d+)[': ]+(\d*)/) { | 
| 591 | 10 |  | 50 |  |  | 48 | $date .= $1 . $2 . ':' . ($3 || '00'); | 
| 592 |  |  |  |  |  |  | } | 
| 593 |  |  |  |  |  |  | } | 
| 594 | 10 |  |  |  |  | 29 | return $date; | 
| 595 |  |  |  |  |  |  | } | 
| 596 |  |  |  |  |  |  |  | 
| 597 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 598 |  |  |  |  |  |  | # Locate any object in the XRef tables (including compressed objects) | 
| 599 |  |  |  |  |  |  | # Inputs: 0) XRef reference, 1) object reference string (or free object number) | 
| 600 |  |  |  |  |  |  | # Returns: offset to object in file or compressed object reference string, | 
| 601 |  |  |  |  |  |  | #          0 if object is free, or undefined on error | 
| 602 |  |  |  |  |  |  | sub LocateAnyObject($$) | 
| 603 |  |  |  |  |  |  | { | 
| 604 | 238 |  |  | 238 | 0 | 437 | my ($xref, $ref) = @_; | 
| 605 | 238 | 50 |  |  |  | 491 | return undef unless $xref; | 
| 606 | 238 | 100 |  |  |  | 874 | return $$xref{$ref} if exists $$xref{$ref}; | 
| 607 |  |  |  |  |  |  | # get the object number | 
| 608 | 7 | 50 |  |  |  | 52 | return undef unless $ref =~ /^(\d+)/; | 
| 609 | 7 |  |  |  |  | 29 | my $objNum = $1; | 
| 610 |  |  |  |  |  |  | # return 0 if the object number has been reused (old object is free) | 
| 611 | 7 | 100 |  |  |  | 44 | return 0 if defined $$xref{$objNum}; | 
| 612 |  |  |  |  |  |  | # | 
| 613 |  |  |  |  |  |  | # scan our XRef stream dictionaries for this object | 
| 614 |  |  |  |  |  |  | # | 
| 615 | 1 | 50 |  |  |  | 9 | return undef unless $$xref{dicts}; | 
| 616 | 0 |  |  |  |  | 0 | my $dict; | 
| 617 | 0 |  |  |  |  | 0 | foreach $dict (@{$$xref{dicts}}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 618 |  |  |  |  |  |  | # quick check to see if the object is in the range for this xref stream | 
| 619 | 0 | 0 |  |  |  | 0 | next if $objNum >= $$dict{Size}; | 
| 620 | 0 |  |  |  |  | 0 | my $index = $$dict{Index}; | 
| 621 | 0 | 0 |  |  |  | 0 | next if $objNum < $$index[0]; | 
| 622 |  |  |  |  |  |  | # scan the tables for the specified object | 
| 623 | 0 |  |  |  |  | 0 | my $size = $$dict{_entry_size}; | 
| 624 | 0 |  |  |  |  | 0 | my $num = scalar(@$index) / 2; | 
| 625 | 0 |  |  |  |  | 0 | my $tot = 0; | 
| 626 | 0 |  |  |  |  | 0 | my $i; | 
| 627 | 0 |  |  |  |  | 0 | for ($i=0; $i<$num; ++$i) { | 
| 628 | 0 |  |  |  |  | 0 | my $start = $$index[$i*2]; | 
| 629 | 0 |  |  |  |  | 0 | my $count = $$index[$i*2+1]; | 
| 630 |  |  |  |  |  |  | # table is in ascending order, so quit if we have passed the object | 
| 631 | 0 | 0 |  |  |  | 0 | last if $objNum < $start; | 
| 632 | 0 | 0 |  |  |  | 0 | if ($objNum < $start + $count) { | 
| 633 | 0 |  |  |  |  | 0 | my $offset = $size * ($objNum - $start + $tot); | 
| 634 | 0 | 0 |  |  |  | 0 | last if $offset + $size > length $$dict{_stream}; | 
| 635 | 0 |  |  |  |  | 0 | my @c = unpack("x$offset C$size", $$dict{_stream}); | 
| 636 |  |  |  |  |  |  | # extract values from this table entry | 
| 637 |  |  |  |  |  |  | # (can be 1, 2, 3, 4, etc.. bytes per value) | 
| 638 | 0 |  |  |  |  | 0 | my (@t, $j, $k); | 
| 639 | 0 |  |  |  |  | 0 | my $w = $$dict{W}; | 
| 640 | 0 |  |  |  |  | 0 | for ($j=0; $j<3; ++$j) { | 
| 641 |  |  |  |  |  |  | # use default value if W entry is 0 (as per spec) | 
| 642 |  |  |  |  |  |  | # - 0th element defaults to 1, others default to 0 | 
| 643 | 0 | 0 |  |  |  | 0 | $$w[$j] or $t[$j] = ($j ? 0 : 1), next; | 
|  |  | 0 |  |  |  |  |  | 
| 644 | 0 |  |  |  |  | 0 | $t[$j] = shift(@c); | 
| 645 | 0 |  |  |  |  | 0 | for ($k=1; $k < $$w[$j]; ++$k) { | 
| 646 | 0 |  |  |  |  | 0 | $t[$j] = 256 * $t[$j] + shift(@c); | 
| 647 |  |  |  |  |  |  | } | 
| 648 |  |  |  |  |  |  | } | 
| 649 |  |  |  |  |  |  | # by default, use "o g R" as the xref key | 
| 650 |  |  |  |  |  |  | # (o = object number, g = generation number) | 
| 651 | 0 |  |  |  |  | 0 | my $ref2 = "$objNum $t[2] R"; | 
| 652 | 0 | 0 |  |  |  | 0 | if ($t[0] == 1) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 653 |  |  |  |  |  |  | # normal object reference: | 
| 654 |  |  |  |  |  |  | # $t[1]=offset of object from start, $t[2]=generation number | 
| 655 | 0 |  |  |  |  | 0 | $$xref{$ref2} = $t[1]; | 
| 656 |  |  |  |  |  |  | } elsif ($t[0] == 2) { | 
| 657 |  |  |  |  |  |  | # compressed object reference: | 
| 658 |  |  |  |  |  |  | # $t[1]=stream object number, $t[2]=index of object in stream | 
| 659 | 0 |  |  |  |  | 0 | $ref2 = "$objNum 0 R"; | 
| 660 | 0 |  |  |  |  | 0 | $$xref{$ref2} = "I$t[2] $t[1] 0 R"; | 
| 661 |  |  |  |  |  |  | } elsif ($t[0] == 0) { | 
| 662 |  |  |  |  |  |  | # free object: | 
| 663 |  |  |  |  |  |  | # $t[1]=next free object in linked list, $t[2]=generation number | 
| 664 | 0 |  |  |  |  | 0 | $$xref{$ref2} = 0; | 
| 665 |  |  |  |  |  |  | } else { | 
| 666 |  |  |  |  |  |  | # treat as a null object | 
| 667 | 0 |  |  |  |  | 0 | $$xref{$ref2} = undef; | 
| 668 |  |  |  |  |  |  | } | 
| 669 | 0 |  |  |  |  | 0 | $$xref{$objNum} = $t[1];    # remember offsets by object number too | 
| 670 | 0 | 0 |  |  |  | 0 | return $$xref{$ref} if $ref eq $ref2; | 
| 671 | 0 |  |  |  |  | 0 | return 0;   # object is free or was reused | 
| 672 |  |  |  |  |  |  | } | 
| 673 | 0 |  |  |  |  | 0 | $tot += $count; | 
| 674 |  |  |  |  |  |  | } | 
| 675 |  |  |  |  |  |  | } | 
| 676 | 0 |  |  |  |  | 0 | return undef; | 
| 677 |  |  |  |  |  |  | } | 
| 678 |  |  |  |  |  |  |  | 
| 679 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 680 |  |  |  |  |  |  | # Locate a regular object in the XRef tables (does not include compressed objects) | 
| 681 |  |  |  |  |  |  | # Inputs: 0) XRef reference, 1) object reference string (or free object number) | 
| 682 |  |  |  |  |  |  | # Returns: offset to object in file, 0 if object is free, | 
| 683 |  |  |  |  |  |  | #          or undef on error or if object was compressed | 
| 684 |  |  |  |  |  |  | sub LocateObject($$) | 
| 685 |  |  |  |  |  |  | { | 
| 686 | 41 |  |  | 41 | 0 | 150 | my ($xref, $ref) = @_; | 
| 687 | 41 |  |  |  |  | 110 | my $offset = LocateAnyObject($xref, $ref); | 
| 688 | 41 | 50 | 66 |  |  | 332 | return undef if $offset and $offset =~ /^I/; | 
| 689 | 41 |  |  |  |  | 176 | return $offset; | 
| 690 |  |  |  |  |  |  | } | 
| 691 |  |  |  |  |  |  |  | 
| 692 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 693 |  |  |  |  |  |  | # Check that the correct object is located at the specified file offset | 
| 694 |  |  |  |  |  |  | # Inputs: 0) ExifTool ref, 1) object name, 2) object reference string, 3) file offset | 
| 695 |  |  |  |  |  |  | # Returns: first non-blank line at start of object, or undef on error | 
| 696 |  |  |  |  |  |  | sub CheckObject($$$$) | 
| 697 |  |  |  |  |  |  | { | 
| 698 | 217 |  |  | 217 | 0 | 527 | my ($et, $tag, $ref, $offset) = @_; | 
| 699 | 217 |  |  |  |  | 377 | my ($data, $obj, $dat, $pat); | 
| 700 |  |  |  |  |  |  |  | 
| 701 | 217 |  |  |  |  | 508 | my $raf = $$et{RAF}; | 
| 702 | 217 | 50 |  |  |  | 877 | $raf->Seek($offset+$$et{PDFBase}, 0) or $et->Warn("Bad $tag offset"), return undef; | 
| 703 |  |  |  |  |  |  | # verify that we are reading the expected object | 
| 704 | 217 |  |  |  |  | 1089 | ($obj = $ref) =~ s/R/obj/; | 
| 705 | 217 |  |  |  |  | 377 | for (;;) { | 
| 706 | 217 | 50 |  |  |  | 721 | $raf->ReadLine($data) or $et->Warn("Error reading $tag data"), return undef; | 
| 707 | 217 | 50 |  |  |  | 4632 | last if $data =~ s/^$obj//; | 
| 708 | 0 | 0 |  |  |  | 0 | next if $data =~ /^\s+$/;   # keep reading if this was a blank line | 
| 709 |  |  |  |  |  |  | # handle cases where other whitespace characters are used in the object ID string | 
| 710 | 0 |  |  |  |  | 0 | while ($data =~ /^\d+(\s+\d+)?\s*$/) { | 
| 711 | 0 |  |  |  |  | 0 | $raf->ReadLine($dat); | 
| 712 | 0 |  |  |  |  | 0 | $data .= $dat; | 
| 713 |  |  |  |  |  |  | } | 
| 714 | 0 |  |  |  |  | 0 | ($pat = $obj) =~ s/ /\\s+/g; | 
| 715 | 0 | 0 |  |  |  | 0 | unless ($data =~ s/$pat//) { | 
| 716 | 0 |  |  |  |  | 0 | $tag = ucfirst $tag; | 
| 717 | 0 |  |  |  |  | 0 | $et->Warn("$tag object ($obj) not found at offset $offset"); | 
| 718 | 0 |  |  |  |  | 0 | return undef; | 
| 719 |  |  |  |  |  |  | } | 
| 720 | 0 |  |  |  |  | 0 | last; | 
| 721 |  |  |  |  |  |  | } | 
| 722 |  |  |  |  |  |  | # read the first line of data from the object (ignoring blank lines and comments) | 
| 723 | 217 |  |  |  |  | 463 | for (;;) { | 
| 724 | 434 | 100 | 66 |  |  | 2183 | last if $data =~ /\S/ and $data !~ /^\s*%/; | 
| 725 | 217 | 50 |  |  |  | 704 | $raf->ReadLine($data) or $et->Warn("Error reading $tag data"), return undef; | 
| 726 |  |  |  |  |  |  | } | 
| 727 | 217 |  |  |  |  | 699 | return $data; | 
| 728 |  |  |  |  |  |  | } | 
| 729 |  |  |  |  |  |  |  | 
| 730 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 731 |  |  |  |  |  |  | # Fetch indirect object from file (from inside a stream if required) | 
| 732 |  |  |  |  |  |  | # Inputs: 0) ExifTool object reference, 1) object reference string, | 
| 733 |  |  |  |  |  |  | #         2) xref lookup, 3) object name (for warning messages) | 
| 734 |  |  |  |  |  |  | # Returns: object data or undefined on error | 
| 735 |  |  |  |  |  |  | # Notes: sets $lastFetched to the object reference, or undef if the object | 
| 736 |  |  |  |  |  |  | #        was extracted from an encrypted stream | 
| 737 |  |  |  |  |  |  | sub FetchObject($$$$) | 
| 738 |  |  |  |  |  |  | { | 
| 739 | 197 |  |  | 197 | 0 | 450 | my ($et, $ref, $xref, $tag) = @_; | 
| 740 | 197 |  |  |  |  | 342 | $lastFetched = $ref;    # save this for decoding if necessary | 
| 741 | 197 |  |  |  |  | 464 | my $offset = LocateAnyObject($xref, $ref); | 
| 742 | 197 |  |  |  |  | 358 | $lastOffset = $offset; | 
| 743 | 197 | 100 |  |  |  | 428 | unless ($offset) { | 
| 744 | 5 | 50 |  |  |  | 20 | $et->Warn("Bad $tag reference") unless defined $offset; | 
| 745 | 5 |  |  |  |  | 13 | return undef; | 
| 746 |  |  |  |  |  |  | } | 
| 747 | 192 |  |  |  |  | 333 | my ($data, $obj); | 
| 748 | 192 | 50 |  |  |  | 596 | if ($offset =~ s/^I(\d+) //) { | 
| 749 | 0 |  |  |  |  | 0 | my $index = $1; # object index in stream | 
| 750 | 0 |  |  |  |  | 0 | my ($objNum) = split ' ', $ref; # save original object number | 
| 751 | 0 |  |  |  |  | 0 | $ref = $offset; # now a reference to the containing stream object | 
| 752 | 0 |  |  |  |  | 0 | $obj = $streamObjs{$ref}; | 
| 753 | 0 | 0 |  |  |  | 0 | unless ($obj) { | 
| 754 |  |  |  |  |  |  | # don't try to load the same object stream twice | 
| 755 | 0 | 0 |  |  |  | 0 | return undef if defined $obj; | 
| 756 | 0 |  |  |  |  | 0 | $streamObjs{$ref} = ''; | 
| 757 |  |  |  |  |  |  | # load the parent object stream | 
| 758 | 0 |  |  |  |  | 0 | $obj = FetchObject($et, $ref, $xref, $tag); | 
| 759 |  |  |  |  |  |  | # make sure it contains everything we need | 
| 760 | 0 | 0 | 0 |  |  | 0 | return undef unless defined $obj and ref($obj) eq 'HASH'; | 
| 761 | 0 | 0 | 0 |  |  | 0 | return undef unless $$obj{First} and $$obj{N}; | 
| 762 | 0 | 0 |  |  |  | 0 | return undef unless DecodeStream($et, $obj); | 
| 763 |  |  |  |  |  |  | # add a special '_table' entry to this dictionary which contains | 
| 764 |  |  |  |  |  |  | # the list of object number/offset pairs from the stream header | 
| 765 | 0 |  |  |  |  | 0 | my $num = $$obj{N} * 2; | 
| 766 | 0 |  |  |  |  | 0 | my @table = split ' ', $$obj{_stream}, $num; | 
| 767 | 0 | 0 |  |  |  | 0 | return undef unless @table == $num; | 
| 768 |  |  |  |  |  |  | # remove everything before first object in stream | 
| 769 | 0 |  |  |  |  | 0 | $$obj{_stream} = substr($$obj{_stream}, $$obj{First}); | 
| 770 | 0 |  |  |  |  | 0 | $table[$num-1] =~ s/^(\d+).*/$1/s;  # trim excess from last number | 
| 771 | 0 |  |  |  |  | 0 | $$obj{_table} = \@table; | 
| 772 |  |  |  |  |  |  | # save the object stream so we don't have to re-load it later | 
| 773 | 0 |  |  |  |  | 0 | $streamObjs{$ref} = $obj; | 
| 774 |  |  |  |  |  |  | } | 
| 775 |  |  |  |  |  |  | # verify that we have the specified object | 
| 776 | 0 |  |  |  |  | 0 | my $i = 2 * $index; | 
| 777 | 0 |  |  |  |  | 0 | my $table = $$obj{_table}; | 
| 778 | 0 | 0 | 0 |  |  | 0 | unless ($index < $$obj{N} and $$table[$i] == $objNum) { | 
| 779 | 0 |  |  |  |  | 0 | $et->Warn("Bad index for stream object $tag"); | 
| 780 | 0 |  |  |  |  | 0 | return undef; | 
| 781 |  |  |  |  |  |  | } | 
| 782 |  |  |  |  |  |  | # extract the object at the specified index in the stream | 
| 783 |  |  |  |  |  |  | # (offsets in table are in sequential order, so we can subtract from | 
| 784 |  |  |  |  |  |  | #  the next offset to get the object length) | 
| 785 | 0 |  |  |  |  | 0 | $offset = $$table[$i + 1]; | 
| 786 | 0 |  | 0 |  |  | 0 | my $len = ($$table[$i + 3] || length($$obj{_stream})) - $offset; | 
| 787 | 0 |  |  |  |  | 0 | $data = substr($$obj{_stream}, $offset, $len); | 
| 788 |  |  |  |  |  |  | # avoid re-decrypting data in already decrypted streams | 
| 789 | 0 | 0 |  |  |  | 0 | undef $lastFetched if $cryptStream; | 
| 790 | 0 |  |  |  |  | 0 | return ExtractObject($et, \$data); | 
| 791 |  |  |  |  |  |  | } | 
| 792 |  |  |  |  |  |  | # load the start of the object | 
| 793 | 192 |  |  |  |  | 431 | $data = CheckObject($et, $tag, $ref, $offset); | 
| 794 | 192 | 50 |  |  |  | 467 | return undef unless defined $data; | 
| 795 |  |  |  |  |  |  |  | 
| 796 | 192 |  |  |  |  | 628 | return ExtractObject($et, \$data, $$et{RAF}, $xref); | 
| 797 |  |  |  |  |  |  | } | 
| 798 |  |  |  |  |  |  |  | 
| 799 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 800 |  |  |  |  |  |  | # Convert PDF value to something readable | 
| 801 |  |  |  |  |  |  | # Inputs: 0) PDF object data | 
| 802 |  |  |  |  |  |  | # Returns: converted object | 
| 803 |  |  |  |  |  |  | sub ReadPDFValue($) | 
| 804 |  |  |  |  |  |  | { | 
| 805 | 148 |  |  | 148 | 0 | 257 | my $str = shift; | 
| 806 |  |  |  |  |  |  | # decode all strings in an array | 
| 807 | 148 | 100 |  |  |  | 322 | if (ref $str eq 'ARRAY') { | 
| 808 |  |  |  |  |  |  | # create new list to not alter the original data when rewriting | 
| 809 | 12 |  |  |  |  | 91 | my ($val, @vals); | 
| 810 | 12 |  |  |  |  | 45 | foreach $val (@$str) { | 
| 811 | 20 |  |  |  |  | 82 | push @vals, ReadPDFValue($val); | 
| 812 |  |  |  |  |  |  | } | 
| 813 | 12 |  |  |  |  | 51 | return \@vals; | 
| 814 |  |  |  |  |  |  | } | 
| 815 | 136 | 50 |  |  |  | 339 | length $str or return $str; | 
| 816 | 136 |  |  |  |  | 279 | my $delim = substr($str, 0, 1); | 
| 817 | 136 | 100 |  |  |  | 453 | if ($delim eq '(') {    # literal string | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 818 | 58 | 50 |  |  |  | 359 | $str = $1 if $str =~ /^.*?\((.*)\)/s;   # remove brackets | 
| 819 |  |  |  |  |  |  | # decode escape sequences in literal strings | 
| 820 | 58 |  |  |  |  | 177 | while ($str =~ /\\(.)/sg) { | 
| 821 | 0 |  |  |  |  | 0 | my $n = pos($str) - 2; | 
| 822 | 0 |  |  |  |  | 0 | my $c = $1; | 
| 823 | 0 |  |  |  |  | 0 | my $r; | 
| 824 | 0 | 0 |  |  |  | 0 | if ($c =~ /[0-7]/) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 825 |  |  |  |  |  |  | # get up to 2 more octal digits | 
| 826 | 0 | 0 |  |  |  | 0 | $c .= $1 if $str =~ /\G([0-7]{1,2})/g; | 
| 827 |  |  |  |  |  |  | # convert octal escape code | 
| 828 | 0 |  |  |  |  | 0 | $r = chr(oct($c) & 0xff); | 
| 829 |  |  |  |  |  |  | } elsif ($c eq "\x0d") { | 
| 830 |  |  |  |  |  |  | # the string is continued if the line ends with '\' | 
| 831 |  |  |  |  |  |  | # (also remove "\x0d\x0a") | 
| 832 | 0 | 0 |  |  |  | 0 | $c .= $1 if $str =~ /\G(\x0a)/g; | 
| 833 | 0 |  |  |  |  | 0 | $r = ''; | 
| 834 |  |  |  |  |  |  | } elsif ($c eq "\x0a") { | 
| 835 | 0 |  |  |  |  | 0 | $r = ''; | 
| 836 |  |  |  |  |  |  | } else { | 
| 837 |  |  |  |  |  |  | # convert escaped characters | 
| 838 | 0 |  |  |  |  | 0 | ($r = $c) =~ tr/nrtbf/\n\r\t\b\f/; | 
| 839 |  |  |  |  |  |  | } | 
| 840 | 0 |  |  |  |  | 0 | substr($str, $n, length($c)+1) = $r; | 
| 841 |  |  |  |  |  |  | # continue search after this character | 
| 842 | 0 |  |  |  |  | 0 | pos($str) = $n + length($r); | 
| 843 |  |  |  |  |  |  | } | 
| 844 | 58 | 50 |  |  |  | 133 | Crypt(\$str, $lastFetched) if $cryptString; | 
| 845 |  |  |  |  |  |  | } elsif ($delim eq '<') {   # hex string | 
| 846 |  |  |  |  |  |  | # decode hex data | 
| 847 | 41 |  |  |  |  | 108 | $str =~ tr/0-9A-Fa-f//dc; | 
| 848 | 41 | 50 |  |  |  | 121 | $str .= '0' if length($str) & 0x01; # (by the spec) | 
| 849 | 41 |  |  |  |  | 170 | $str = pack('H*', $str); | 
| 850 | 41 | 100 |  |  |  | 132 | Crypt(\$str, $lastFetched) if $cryptString; | 
| 851 |  |  |  |  |  |  | } elsif ($delim eq '/') {   # name | 
| 852 | 0 |  |  |  |  | 0 | $str = substr($str, 1); | 
| 853 |  |  |  |  |  |  | # convert escape codes (PDF 1.2 or later) | 
| 854 | 0 | 0 |  |  |  | 0 | $str =~ s/#([0-9a-f]{2})/chr(hex($1))/sgei if $pdfVer >= 1.2; | 
|  | 0 |  |  |  |  | 0 |  | 
| 855 |  |  |  |  |  |  | } | 
| 856 | 136 |  |  |  |  | 472 | return $str; | 
| 857 |  |  |  |  |  |  | } | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 860 |  |  |  |  |  |  | # Extract PDF object from combination of buffered data and file | 
| 861 |  |  |  |  |  |  | # Inputs: 0) ExifTool object reference, 1) data reference, | 
| 862 |  |  |  |  |  |  | #         2) optional raf reference, 3) optional xref table | 
| 863 |  |  |  |  |  |  | # Returns: converted PDF object or undef on error | 
| 864 |  |  |  |  |  |  | #          a) dictionary object --> hash reference | 
| 865 |  |  |  |  |  |  | #          b) array object --> array reference | 
| 866 |  |  |  |  |  |  | #          c) indirect reference --> scalar reference | 
| 867 |  |  |  |  |  |  | #          d) string, name, integer, boolean, null --> scalar value | 
| 868 |  |  |  |  |  |  | # - updates $$dataPt on return to contain unused data | 
| 869 |  |  |  |  |  |  | # - creates two bogus entries ('_stream' and '_tags') in dictionaries to represent | 
| 870 |  |  |  |  |  |  | #   the stream data and a list of the tags (not including '_stream' and '_tags') | 
| 871 |  |  |  |  |  |  | #   in their original order | 
| 872 |  |  |  |  |  |  | sub ExtractObject($$;$$) | 
| 873 |  |  |  |  |  |  | { | 
| 874 | 754 |  |  | 754 | 0 | 1608 | my ($et, $dataPt, $raf, $xref) = @_; | 
| 875 | 754 |  |  |  |  | 1088 | my (@tags, $data, $objData); | 
| 876 | 754 |  |  |  |  | 1168 | my $dict = { }; | 
| 877 | 754 |  |  |  |  | 1308 | my $delim; | 
| 878 |  |  |  |  |  |  |  | 
| 879 | 754 |  |  |  |  | 1135 | for (;;) { | 
| 880 | 824 | 100 |  |  |  | 2842 | if ($$dataPt =~ /^\s*(<{1,2}|\[|\()/s) { | 
|  |  | 50 |  |  |  |  |  | 
| 881 | 754 |  |  |  |  | 1631 | $delim = $1; | 
| 882 | 754 |  |  |  |  | 1587 | $$dataPt =~ s/^\s+//;   # remove leading white space | 
| 883 | 754 |  |  |  |  | 1590 | $objData = ReadToNested($dataPt, $raf); | 
| 884 | 754 | 50 |  |  |  | 1474 | return undef unless defined $objData; | 
| 885 | 754 |  |  |  |  | 1125 | last; | 
| 886 |  |  |  |  |  |  | } elsif ($$dataPt =~ s{^\s*(\S[^[(/<>\s]*)\s*}{}s) { | 
| 887 |  |  |  |  |  |  | # | 
| 888 |  |  |  |  |  |  | # extract boolean, numerical, string, name, null object or indirect reference | 
| 889 |  |  |  |  |  |  | # | 
| 890 | 0 |  |  |  |  | 0 | $objData = $1; | 
| 891 |  |  |  |  |  |  | # look for an indirect reference | 
| 892 | 0 | 0 | 0 |  |  | 0 | if ($objData =~ /^\d+$/ and $$dataPt =~ s/^(\d+)\s+R//s) { | 
| 893 | 0 |  |  |  |  | 0 | $objData .= "$1 R"; | 
| 894 | 0 |  |  |  |  | 0 | $objData = \$objData;   # return scalar reference | 
| 895 |  |  |  |  |  |  | } | 
| 896 | 0 |  |  |  |  | 0 | return $objData;    # return simple scalar or scalar reference | 
| 897 |  |  |  |  |  |  | } | 
| 898 | 70 | 50 | 33 |  |  | 375 | $raf and $raf->ReadLine($data) or return undef; | 
| 899 | 70 |  |  |  |  | 174 | $$dataPt .= $data; | 
| 900 |  |  |  |  |  |  | } | 
| 901 |  |  |  |  |  |  | # | 
| 902 |  |  |  |  |  |  | # return literal string or hex string without parsing | 
| 903 |  |  |  |  |  |  | # | 
| 904 | 754 | 100 | 100 |  |  | 2919 | if ($delim eq '(' or $delim eq '<') { | 
|  |  | 100 |  |  |  |  |  | 
| 905 | 160 |  |  |  |  | 496 | return $objData; | 
| 906 |  |  |  |  |  |  | # | 
| 907 |  |  |  |  |  |  | # extract array | 
| 908 |  |  |  |  |  |  | # | 
| 909 |  |  |  |  |  |  | } elsif ($delim eq '[') { | 
| 910 | 167 | 50 |  |  |  | 711 | $objData =~ /^.*?\[(.*)\]/s or return undef; | 
| 911 | 167 |  |  |  |  | 402 | my $data = $1;    # brackets removed | 
| 912 | 167 |  |  |  |  | 224 | my @list; | 
| 913 | 167 |  |  |  |  | 220 | for (;;) { | 
| 914 | 594 | 100 |  |  |  | 1715 | last unless $data =~ m{\s*(\S[^[(/<>\s]*)}sg; | 
| 915 | 427 |  |  |  |  | 901 | my $val = $1; | 
| 916 | 427 | 100 |  |  |  | 1531 | if ($val =~ /^(<{1,2}|\[|\()/) { | 
|  |  | 100 |  |  |  |  |  | 
| 917 | 78 |  |  |  |  | 180 | my $pos = pos($data) - length($val); | 
| 918 |  |  |  |  |  |  | # nested dict, array, literal string or hex string | 
| 919 | 78 |  |  |  |  | 173 | my $buff = substr($data, $pos); | 
| 920 | 78 |  |  |  |  | 166 | $val = ReadToNested(\$buff); | 
| 921 | 78 | 50 |  |  |  | 231 | last unless defined $val; | 
| 922 | 78 |  |  |  |  | 200 | pos($data) = $pos + length($val); | 
| 923 | 78 |  |  |  |  | 234 | $val = ExtractObject($et, \$val); | 
| 924 |  |  |  |  |  |  | } elsif ($val =~ /^\d/) { | 
| 925 | 245 |  |  |  |  | 359 | my $pos = pos($data); | 
| 926 | 245 | 100 |  |  |  | 585 | if ($data =~ /\G\s+(\d+)\s+R/g) { | 
| 927 | 37 |  |  |  |  | 169 | $val = \ "$val $1 R";   # make a reference | 
| 928 |  |  |  |  |  |  | } else { | 
| 929 | 208 |  |  |  |  | 406 | pos($data) = $pos; | 
| 930 |  |  |  |  |  |  | } | 
| 931 |  |  |  |  |  |  | } | 
| 932 | 427 |  |  |  |  | 931 | push @list, $val; | 
| 933 |  |  |  |  |  |  | } | 
| 934 | 167 |  |  |  |  | 594 | return \@list; | 
| 935 |  |  |  |  |  |  | } | 
| 936 |  |  |  |  |  |  | # | 
| 937 |  |  |  |  |  |  | # extract dictionary | 
| 938 |  |  |  |  |  |  | # | 
| 939 |  |  |  |  |  |  | # Note: entries are not necessarily separated by whitespace (doh!) | 
| 940 |  |  |  |  |  |  | # eg) "/Tag/Name", "/Tag(string)", "/Tag[array]", etc are legal! | 
| 941 |  |  |  |  |  |  | # Also, they may be separated by a comment (eg. "/Tag%comment\nValue"), | 
| 942 |  |  |  |  |  |  | # but comments have already been removed | 
| 943 | 427 |  |  |  |  | 2339 | while ($objData =~ m{(\s*)/([^/[\]()<>{}\s]+)\s*(\S[^[(/<>\s]*)}sg) { | 
| 944 | 1229 |  |  |  |  | 2647 | my $tag = $2; | 
| 945 | 1229 |  |  |  |  | 2330 | my $val = $3; | 
| 946 | 1229 | 100 |  |  |  | 3989 | if ($val =~ /^(<{1,2}|\[|\()/) { | 
|  |  | 100 |  |  |  |  |  | 
| 947 |  |  |  |  |  |  | # nested dict, array, literal string or hex string | 
| 948 | 396 |  |  |  |  | 1112 | $objData = substr($objData, pos($objData)-length($val)); | 
| 949 | 396 |  |  |  |  | 847 | $val = ReadToNested(\$objData, $raf); | 
| 950 | 396 | 50 |  |  |  | 841 | last unless defined $val; | 
| 951 | 396 |  |  |  |  | 974 | $val = ExtractObject($et, \$val); | 
| 952 | 396 |  |  |  |  | 919 | pos($objData) = 0; | 
| 953 |  |  |  |  |  |  | } elsif ($val =~ /^\d/) { | 
| 954 | 618 |  |  |  |  | 1048 | my $pos = pos($objData); | 
| 955 | 618 | 100 |  |  |  | 1807 | if ($objData =~ /\G\s+(\d+)\s+R/sg) { | 
| 956 | 416 |  |  |  |  | 1408 | $val = \ "$val $1 R";   # make a reference | 
| 957 |  |  |  |  |  |  | } else { | 
| 958 | 202 |  |  |  |  | 550 | pos($objData) = $pos; | 
| 959 |  |  |  |  |  |  | } | 
| 960 |  |  |  |  |  |  | } | 
| 961 | 1229 | 50 |  |  |  | 2789 | if ($$dict{$tag}) { | 
| 962 |  |  |  |  |  |  | # duplicate dictionary entries are not allowed | 
| 963 | 0 |  |  |  |  | 0 | $et->Warn("Duplicate '${tag}' entry in dictionary (ignored)"); | 
| 964 |  |  |  |  |  |  | } else { | 
| 965 |  |  |  |  |  |  | # save the entry | 
| 966 | 1229 |  |  |  |  | 2378 | push @tags, $tag; | 
| 967 | 1229 |  |  |  |  | 5564 | $$dict{$tag} = $val; | 
| 968 |  |  |  |  |  |  | } | 
| 969 |  |  |  |  |  |  | } | 
| 970 | 427 | 50 |  |  |  | 1046 | return undef unless @tags; | 
| 971 | 427 |  |  |  |  | 864 | $$dict{_tags} = \@tags; | 
| 972 | 427 | 100 |  |  |  | 1056 | return $dict unless $raf;   # direct objects can not have streams | 
| 973 |  |  |  |  |  |  | # | 
| 974 |  |  |  |  |  |  | # extract the stream object | 
| 975 |  |  |  |  |  |  | # | 
| 976 |  |  |  |  |  |  | # dictionary must specify stream Length | 
| 977 | 262 | 100 |  |  |  | 1097 | my $length = $$dict{Length} or return $dict; | 
| 978 | 43 | 100 |  |  |  | 188 | if (ref $length) { | 
| 979 | 25 |  |  |  |  | 60 | $length = $$length; | 
| 980 | 25 |  |  |  |  | 90 | my $oldpos = $raf->Tell(); | 
| 981 |  |  |  |  |  |  | # get the location of the object specifying the length | 
| 982 |  |  |  |  |  |  | # (compressed objects are not allowed) | 
| 983 | 25 | 50 |  |  |  | 131 | my $offset = LocateObject($xref, $length) or return $dict; | 
| 984 | 25 | 50 |  |  |  | 85 | $offset or $et->Warn('Bad stream Length object'), return $dict; | 
| 985 | 25 |  |  |  |  | 62 | $data = CheckObject($et, 'stream Length', $length, $offset); | 
| 986 | 25 | 50 |  |  |  | 128 | defined $data or return $dict; | 
| 987 | 25 | 50 |  |  |  | 166 | $data =~ /^\s*(\d+)/ or $et->Warn('Stream Length not found'), return $dict; | 
| 988 | 25 |  |  |  |  | 69 | $length = $1; | 
| 989 | 25 |  |  |  |  | 96 | $raf->Seek($oldpos, 0); # restore position to start of stream | 
| 990 |  |  |  |  |  |  | } | 
| 991 |  |  |  |  |  |  | # extract the trailing stream data | 
| 992 | 43 |  |  |  |  | 182 | for (;;) { | 
| 993 |  |  |  |  |  |  | # find the stream token | 
| 994 | 86 | 100 |  |  |  | 407 | if ($$dataPt =~ /(\S+)/) { | 
| 995 | 43 | 50 |  |  |  | 168 | last unless $1 eq 'stream'; | 
| 996 |  |  |  |  |  |  | # read an extra line because it may contain our \x0a | 
| 997 | 43 | 50 |  |  |  | 135 | $$dataPt .= $data if $raf->ReadLine($data); | 
| 998 |  |  |  |  |  |  | # remove our stream header | 
| 999 | 43 |  |  |  |  | 469 | $$dataPt =~ s/^\s*stream(\x0a|\x0d\x0a)//s; | 
| 1000 | 43 |  |  |  |  | 177 | my $more = $length - length($$dataPt); | 
| 1001 | 43 | 100 |  |  |  | 166 | if ($more > 0) { | 
|  |  | 50 |  |  |  |  |  | 
| 1002 | 28 | 50 |  |  |  | 95 | unless ($raf->Read($data, $more) == $more) { | 
| 1003 | 0 |  |  |  |  | 0 | $et->Warn('Error reading stream data'); | 
| 1004 | 0 |  |  |  |  | 0 | $$dataPt = ''; | 
| 1005 | 0 |  |  |  |  | 0 | return $dict; | 
| 1006 |  |  |  |  |  |  | } | 
| 1007 | 28 |  |  |  |  | 209 | $$dict{_stream} = $$dataPt . $data; | 
| 1008 | 28 |  |  |  |  | 71 | $$dataPt = ''; | 
| 1009 |  |  |  |  |  |  | } elsif ($more < 0) { | 
| 1010 | 15 |  |  |  |  | 148 | $$dict{_stream} = substr($$dataPt, 0, $length); | 
| 1011 | 15 |  |  |  |  | 55 | $$dataPt = substr($$dataPt, $length); | 
| 1012 |  |  |  |  |  |  | } else { | 
| 1013 | 0 |  |  |  |  | 0 | $$dict{_stream} = $$dataPt; | 
| 1014 | 0 |  |  |  |  | 0 | $$dataPt = ''; | 
| 1015 |  |  |  |  |  |  | } | 
| 1016 | 43 |  |  |  |  | 92 | last; | 
| 1017 |  |  |  |  |  |  | } | 
| 1018 | 43 | 50 |  |  |  | 221 | $raf->ReadLine($data) or last; | 
| 1019 | 43 |  |  |  |  | 186 | $$dataPt .= $data; | 
| 1020 |  |  |  |  |  |  | } | 
| 1021 | 43 |  |  |  |  | 179 | return $dict; | 
| 1022 |  |  |  |  |  |  | } | 
| 1023 |  |  |  |  |  |  |  | 
| 1024 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1025 |  |  |  |  |  |  | # Read to nested delimiter | 
| 1026 |  |  |  |  |  |  | # Inputs: 0) data reference, 1) optional raf reference | 
| 1027 |  |  |  |  |  |  | # Returns: data up to and including matching delimiter (or undef on error) | 
| 1028 |  |  |  |  |  |  | # - updates data reference with trailing data | 
| 1029 |  |  |  |  |  |  | # - unescapes characters in literal strings | 
| 1030 |  |  |  |  |  |  | my %closingDelim = (    # lookup for matching delimiter | 
| 1031 |  |  |  |  |  |  | '(' => ')', | 
| 1032 |  |  |  |  |  |  | '[' => ']', | 
| 1033 |  |  |  |  |  |  | '<' => '>', | 
| 1034 |  |  |  |  |  |  | '<<' => '>>', | 
| 1035 |  |  |  |  |  |  | ); | 
| 1036 |  |  |  |  |  |  | sub ReadToNested($;$) | 
| 1037 |  |  |  |  |  |  | { | 
| 1038 | 1228 |  |  | 1228 | 0 | 2034 | my ($dataPt, $raf) = @_; | 
| 1039 | 1228 |  |  |  |  | 2146 | my @delim = ('');   # closing delimiter list, most deeply nested first | 
| 1040 | 1228 |  |  |  |  | 2813 | pos($$dataPt) = 0;  # begin at start of data | 
| 1041 | 1228 |  |  |  |  | 2134 | for (;;) { | 
| 1042 | 5744 | 100 |  |  |  | 37425 | unless ($$dataPt =~ /(\\*)(\(|\)|<{1,2}|>{1,2}|\[|\]|%)/g) { | 
| 1043 |  |  |  |  |  |  | # must read some more data | 
| 1044 | 1148 |  |  |  |  | 1539 | my $buff; | 
| 1045 | 1148 | 50 | 33 |  |  | 3172 | last unless $raf and $raf->ReadLine($buff); | 
| 1046 | 1148 |  |  |  |  | 2834 | $$dataPt .= $buff; | 
| 1047 | 1148 |  |  |  |  | 2611 | pos($$dataPt) = length($$dataPt) - length($buff); | 
| 1048 | 1148 |  |  |  |  | 2060 | next; | 
| 1049 |  |  |  |  |  |  | } | 
| 1050 |  |  |  |  |  |  | # are we in a literal string? | 
| 1051 | 4596 | 100 |  |  |  | 11767 | if ($delim[0] eq ')') { | 
|  |  | 50 |  |  |  |  |  | 
| 1052 |  |  |  |  |  |  | # ignore escaped delimiters (preceded by odd number of \'s) | 
| 1053 | 434 | 50 |  |  |  | 1091 | next if length($1) & 0x01; | 
| 1054 |  |  |  |  |  |  | # ignore all delimiters but unescaped braces | 
| 1055 | 434 | 50 | 33 |  |  | 1579 | next unless $2 eq '(' or $2 eq ')'; | 
| 1056 |  |  |  |  |  |  | } elsif ($2 eq '%') { | 
| 1057 |  |  |  |  |  |  | # ignore the comment | 
| 1058 | 0 |  |  |  |  | 0 | my $pos = pos($$dataPt) - 1; | 
| 1059 |  |  |  |  |  |  | # remove everything from '%' up to but not including newline | 
| 1060 | 0 |  |  |  |  | 0 | $$dataPt =~ /.*/g; | 
| 1061 | 0 |  |  |  |  | 0 | my $end = pos($$dataPt); | 
| 1062 | 0 |  |  |  |  | 0 | $$dataPt = substr($$dataPt, 0, $pos) . substr($$dataPt, $end); | 
| 1063 | 0 |  |  |  |  | 0 | pos($$dataPt) = $pos; | 
| 1064 | 0 |  |  |  |  | 0 | next; | 
| 1065 |  |  |  |  |  |  | } | 
| 1066 | 4596 | 100 |  |  |  | 9746 | if ($closingDelim{$2}) { | 
| 1067 |  |  |  |  |  |  | # push the corresponding closing delimiter | 
| 1068 | 2298 |  |  |  |  | 5271 | unshift @delim, $closingDelim{$2}; | 
| 1069 | 2298 |  |  |  |  | 3201 | next; | 
| 1070 |  |  |  |  |  |  | } | 
| 1071 | 2298 | 50 |  |  |  | 4677 | unless ($2 eq $delim[0]) { | 
| 1072 |  |  |  |  |  |  | # handle the case where we find a ">>>" and interpret it | 
| 1073 |  |  |  |  |  |  | # as ">> >" instead of "> >>" | 
| 1074 | 0 | 0 | 0 |  |  | 0 | next unless $2 eq '>>' and $delim[0] eq '>'; | 
| 1075 | 0 |  |  |  |  | 0 | pos($$dataPt) = pos($$dataPt) - 1; | 
| 1076 |  |  |  |  |  |  | } | 
| 1077 | 2298 |  |  |  |  | 3111 | shift @delim;               # remove from nesting list | 
| 1078 | 2298 | 100 |  |  |  | 4373 | next if $delim[0];          # keep going if we have more nested delimiters | 
| 1079 | 1228 |  |  |  |  | 1911 | my $pos = pos($$dataPt); | 
| 1080 | 1228 |  |  |  |  | 2453 | my $buff = substr($$dataPt, 0, $pos); | 
| 1081 | 1228 |  |  |  |  | 2679 | $$dataPt = substr($$dataPt, $pos); | 
| 1082 | 1228 |  |  |  |  | 2926 | return $buff;   # success! | 
| 1083 |  |  |  |  |  |  | } | 
| 1084 | 0 |  |  |  |  | 0 | return undef;   # didn't find matching delimiter | 
| 1085 |  |  |  |  |  |  | } | 
| 1086 |  |  |  |  |  |  |  | 
| 1087 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1088 |  |  |  |  |  |  | # Decode LZW-encoded data (ref 1) | 
| 1089 |  |  |  |  |  |  | # Inputs: 0) data reference | 
| 1090 |  |  |  |  |  |  | # Returns: true on success and data is decoded, or false and data is untouched | 
| 1091 |  |  |  |  |  |  | sub DecodeLZW($) | 
| 1092 |  |  |  |  |  |  | { | 
| 1093 | 0 |  |  | 0 | 0 | 0 | my $dataPt = shift; | 
| 1094 | 0 | 0 |  |  |  | 0 | return 0 if length $$dataPt < 4; | 
| 1095 | 0 |  |  |  |  | 0 | my @lzw = (map(chr, 0..255), undef, undef); # LZW code table | 
| 1096 | 0 |  |  |  |  | 0 | my $mask = 0x01ff;  # mask for least-significant 9 bits | 
| 1097 | 0 |  |  |  |  | 0 | my @dat = unpack 'n*', $$dataPt . "\0"; | 
| 1098 | 0 |  |  |  |  | 0 | my $word = ($dat[0] << 16) | $dat[1]; | 
| 1099 | 0 |  |  |  |  | 0 | my ($bit, $pos, $bits, $out) = (0, 2, 9, ''); | 
| 1100 | 0 |  |  |  |  | 0 | my $lastVal; | 
| 1101 | 0 |  |  |  |  | 0 | for (;;) { | 
| 1102 |  |  |  |  |  |  | # bits are packed MSB first in PDF LZW (the PDF spec doesn't mention this) | 
| 1103 | 0 |  |  |  |  | 0 | my $shift = 32 - ($bit + $bits); | 
| 1104 | 0 | 0 |  |  |  | 0 | if ($shift < 0) { | 
| 1105 | 0 | 0 |  |  |  | 0 | return 0 if $pos >= @dat; # missing EOD marker | 
| 1106 | 0 |  |  |  |  | 0 | $word = (($word & 0xffff) << 16) | $dat[$pos++]; # read next word | 
| 1107 | 0 |  |  |  |  | 0 | $bit -= 16; | 
| 1108 | 0 |  |  |  |  | 0 | $shift += 16; | 
| 1109 |  |  |  |  |  |  | }; | 
| 1110 | 0 |  |  |  |  | 0 | my $code = ($word >> $shift) & $mask; | 
| 1111 | 0 |  |  |  |  | 0 | $bit += $bits; | 
| 1112 | 0 |  |  |  |  | 0 | my $val = $lzw[$code]; | 
| 1113 | 0 | 0 |  |  |  | 0 | if (defined $val) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 1114 |  |  |  |  |  |  | # store new code as previous sequence plus 1st char of new sequence | 
| 1115 | 0 | 0 |  |  |  | 0 | push @lzw, $lastVal . substr($val, 0, 1) if defined $lastVal; | 
| 1116 |  |  |  |  |  |  | } elsif ($code == @lzw) { # new code | 
| 1117 | 0 | 0 |  |  |  | 0 | return 0 unless defined $lastVal; | 
| 1118 |  |  |  |  |  |  | # we are using the code that we are about to generate, so the last | 
| 1119 |  |  |  |  |  |  | # character in the new sequence must be the same as the first | 
| 1120 |  |  |  |  |  |  | # character in the previous sequence (makes sense if you think about it) | 
| 1121 | 0 |  |  |  |  | 0 | $val = $lastVal . substr($lastVal, 0, 1); | 
| 1122 | 0 |  |  |  |  | 0 | push @lzw, $val; | 
| 1123 |  |  |  |  |  |  | } elsif ($code == 256) { # clear table | 
| 1124 | 0 |  |  |  |  | 0 | splice @lzw, 258; | 
| 1125 | 0 |  |  |  |  | 0 | $bits = 9; | 
| 1126 | 0 |  |  |  |  | 0 | $mask = 0x1ff; | 
| 1127 | 0 |  |  |  |  | 0 | undef $lastVal; | 
| 1128 | 0 |  |  |  |  | 0 | next; | 
| 1129 |  |  |  |  |  |  | } elsif ($code == 257) { # EOD marker | 
| 1130 | 0 |  |  |  |  | 0 | last;   # all done! | 
| 1131 |  |  |  |  |  |  | } else { | 
| 1132 | 0 |  |  |  |  | 0 | return 0; | 
| 1133 |  |  |  |  |  |  | } | 
| 1134 | 0 |  |  |  |  | 0 | $out .= $val;   # add this byte sequence to the output | 
| 1135 |  |  |  |  |  |  | # we added a new entry to the LZW table, so we must increase | 
| 1136 |  |  |  |  |  |  | # the bit width if necessary, up to a maximum of 12 | 
| 1137 | 0 | 0 | 0 |  |  | 0 | @lzw >= $mask and $bits < 12 and ++$bits, $mask |= $mask << 1; | 
| 1138 | 0 |  |  |  |  | 0 | $lastVal = $val; | 
| 1139 |  |  |  |  |  |  | } | 
| 1140 | 0 |  |  |  |  | 0 | $$dataPt = $out;    # return decompressed data | 
| 1141 | 0 |  |  |  |  | 0 | return 1; | 
| 1142 |  |  |  |  |  |  | } | 
| 1143 |  |  |  |  |  |  |  | 
| 1144 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1145 |  |  |  |  |  |  | # Decode filtered stream | 
| 1146 |  |  |  |  |  |  | # Inputs: 0) ExifTool object reference, 1) dictionary reference | 
| 1147 |  |  |  |  |  |  | # Returns: true if stream has been decoded OK | 
| 1148 |  |  |  |  |  |  | sub DecodeStream($$) | 
| 1149 |  |  |  |  |  |  | { | 
| 1150 | 43 |  |  | 43 | 0 | 100 | local $_; | 
| 1151 | 43 |  |  |  |  | 106 | my ($et, $dict) = @_; | 
| 1152 |  |  |  |  |  |  |  | 
| 1153 | 43 | 50 |  |  |  | 176 | return 0 unless $$dict{_stream}; # no stream to decode | 
| 1154 |  |  |  |  |  |  |  | 
| 1155 |  |  |  |  |  |  | # get list of filters | 
| 1156 | 43 |  |  |  |  | 85 | my (@filters, @decodeParms, $filter); | 
| 1157 | 43 | 50 |  |  |  | 173 | if (ref $$dict{Filter} eq 'ARRAY') { | 
|  |  | 50 |  |  |  |  |  | 
| 1158 | 0 |  |  |  |  | 0 | @filters = @{$$dict{Filter}}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1159 |  |  |  |  |  |  | } elsif (defined $$dict{Filter}) { | 
| 1160 | 0 |  |  |  |  | 0 | @filters = ($$dict{Filter}); | 
| 1161 |  |  |  |  |  |  | } | 
| 1162 |  |  |  |  |  |  | # be sure we can process all the filters before we take the time to do the decryption | 
| 1163 | 43 |  |  |  |  | 95 | foreach $filter (@filters) { | 
| 1164 | 0 | 0 |  |  |  | 0 | next if $supportedFilter{$filter}; | 
| 1165 | 0 |  |  |  |  | 0 | $et->WarnOnce("Unsupported Filter $filter"); | 
| 1166 | 0 |  |  |  |  | 0 | return 0; | 
| 1167 |  |  |  |  |  |  | } | 
| 1168 |  |  |  |  |  |  | # apply decryption first if required (and if the default encryption | 
| 1169 |  |  |  |  |  |  | # has not been overridden by a Crypt filter. Note: the Crypt filter | 
| 1170 |  |  |  |  |  |  | # must be first in the Filter array: ref 3, page 38) | 
| 1171 | 43 | 50 | 33 |  |  | 237 | unless (defined $$dict{_decrypted} or ($filters[0] and $filters[0] eq '/Crypt')) { | 
|  |  |  | 33 |  |  |  |  | 
| 1172 | 43 |  |  |  |  | 159 | CryptStream($dict, $lastFetched); | 
| 1173 |  |  |  |  |  |  | } | 
| 1174 | 43 | 50 |  |  |  | 184 | return 1 unless $$dict{Filter};         # Filter entry is mandatory | 
| 1175 | 0 | 0 |  |  |  | 0 | return 0 if defined $$dict{_filtered};  # avoid double-filtering | 
| 1176 | 0 |  |  |  |  | 0 | $$dict{_filtered} = 1;                  # set flag to prevent double-filtering | 
| 1177 |  |  |  |  |  |  |  | 
| 1178 |  |  |  |  |  |  | # get array of DecodeParms dictionaries | 
| 1179 | 0 | 0 |  |  |  | 0 | if (ref $$dict{DecodeParms} eq 'ARRAY') { | 
| 1180 | 0 |  |  |  |  | 0 | @decodeParms = @{$$dict{DecodeParms}}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1181 |  |  |  |  |  |  | } else { | 
| 1182 | 0 |  |  |  |  | 0 | @decodeParms = ($$dict{DecodeParms}); | 
| 1183 |  |  |  |  |  |  | } | 
| 1184 |  |  |  |  |  |  |  | 
| 1185 | 0 |  |  |  |  | 0 | foreach $filter (@filters) { | 
| 1186 | 0 |  |  |  |  | 0 | my $decodeParms = shift @decodeParms; | 
| 1187 |  |  |  |  |  |  |  | 
| 1188 | 0 | 0 |  |  |  | 0 | if ($filter eq '/FlateDecode') { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 1189 |  |  |  |  |  |  | # make sure we support the predictor (if used) before decoding | 
| 1190 | 0 |  |  |  |  | 0 | my $pre; | 
| 1191 | 0 | 0 |  |  |  | 0 | if (ref $decodeParms eq 'HASH') { | 
| 1192 | 0 |  |  |  |  | 0 | $pre = $$decodeParms{Predictor}; | 
| 1193 | 0 | 0 | 0 |  |  | 0 | if ($pre and $pre ne '1' and $pre ne '12') { | 
|  |  |  | 0 |  |  |  |  | 
| 1194 | 0 |  |  |  |  | 0 | $et->WarnOnce("FlateDecode Predictor $pre currently not supported"); | 
| 1195 | 0 |  |  |  |  | 0 | return 0; | 
| 1196 |  |  |  |  |  |  | } | 
| 1197 |  |  |  |  |  |  | } | 
| 1198 | 0 | 0 |  |  |  | 0 | if (eval { require Compress::Zlib }) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 1199 | 0 |  |  |  |  | 0 | my $inflate = Compress::Zlib::inflateInit(); | 
| 1200 | 0 |  |  |  |  | 0 | my ($buff, $stat); | 
| 1201 | 0 | 0 |  |  |  | 0 | $inflate and ($buff, $stat) = $inflate->inflate($$dict{_stream}); | 
| 1202 | 0 | 0 | 0 |  |  | 0 | if ($inflate and $stat == Compress::Zlib::Z_STREAM_END()) { | 
| 1203 | 0 |  |  |  |  | 0 | $$dict{_stream} = $buff; | 
| 1204 |  |  |  |  |  |  | } else { | 
| 1205 | 0 |  |  |  |  | 0 | $et->Warn('Error inflating stream'); | 
| 1206 | 0 |  |  |  |  | 0 | return 0; | 
| 1207 |  |  |  |  |  |  | } | 
| 1208 |  |  |  |  |  |  | } else { | 
| 1209 | 0 |  |  |  |  | 0 | $et->WarnOnce('Install Compress::Zlib to process filtered streams'); | 
| 1210 | 0 |  |  |  |  | 0 | return 0; | 
| 1211 |  |  |  |  |  |  | } | 
| 1212 | 0 | 0 | 0 |  |  | 0 | next unless $pre and $pre eq '12';  # 12 = 'up' prediction | 
| 1213 |  |  |  |  |  |  |  | 
| 1214 |  |  |  |  |  |  | # apply anti-predictor | 
| 1215 | 0 |  |  |  |  | 0 | my $cols = $$decodeParms{Columns}; | 
| 1216 | 0 | 0 |  |  |  | 0 | unless ($cols) { | 
| 1217 |  |  |  |  |  |  | # currently only support 'up' prediction | 
| 1218 | 0 |  |  |  |  | 0 | $et->WarnOnce('No Columns for decoding stream'); | 
| 1219 | 0 |  |  |  |  | 0 | return 0; | 
| 1220 |  |  |  |  |  |  | } | 
| 1221 | 0 |  |  |  |  | 0 | my @bytes = unpack('C*', $$dict{_stream}); | 
| 1222 | 0 |  |  |  |  | 0 | my @pre = (0) x $cols;  # initialize predictor array | 
| 1223 | 0 |  |  |  |  | 0 | my $buff = ''; | 
| 1224 | 0 |  |  |  |  | 0 | while (@bytes > $cols) { | 
| 1225 | 0 | 0 |  |  |  | 0 | unless (($_ = shift @bytes) == 2) { | 
| 1226 | 0 |  |  |  |  | 0 | $et->WarnOnce("Unsupported PNG filter $_"); # (yes, PNG) | 
| 1227 | 0 |  |  |  |  | 0 | return 0; | 
| 1228 |  |  |  |  |  |  | } | 
| 1229 | 0 |  |  |  |  | 0 | foreach (@pre) { | 
| 1230 | 0 |  |  |  |  | 0 | $_ = ($_ + shift(@bytes)) & 0xff; | 
| 1231 |  |  |  |  |  |  | } | 
| 1232 | 0 |  |  |  |  | 0 | $buff .= pack('C*', @pre); | 
| 1233 |  |  |  |  |  |  | } | 
| 1234 | 0 |  |  |  |  | 0 | $$dict{_stream} = $buff; | 
| 1235 |  |  |  |  |  |  |  | 
| 1236 |  |  |  |  |  |  | } elsif ($filter eq '/Crypt') { | 
| 1237 |  |  |  |  |  |  |  | 
| 1238 |  |  |  |  |  |  | # (we shouldn't have to check the _decrypted flag since we | 
| 1239 |  |  |  |  |  |  | #  already checked the _filtered flag, but what the heck...) | 
| 1240 | 0 | 0 |  |  |  | 0 | next if defined $$dict{_decrypted}; | 
| 1241 |  |  |  |  |  |  | # assume Identity filter (the default) if DecodeParms are missing | 
| 1242 | 0 | 0 |  |  |  | 0 | next unless ref $decodeParms eq 'HASH'; | 
| 1243 | 0 |  |  |  |  | 0 | my $name = $$decodeParms{Name}; | 
| 1244 | 0 | 0 | 0 |  |  | 0 | next unless defined $name or $name eq 'Identity'; | 
| 1245 | 0 | 0 |  |  |  | 0 | if ($name ne 'StdCF') { | 
| 1246 | 0 |  |  |  |  | 0 | $et->WarnOnce("Unsupported Crypt Filter $name"); | 
| 1247 | 0 |  |  |  |  | 0 | return 0; | 
| 1248 |  |  |  |  |  |  | } | 
| 1249 | 0 | 0 |  |  |  | 0 | unless ($cryptInfo) { | 
| 1250 | 0 |  |  |  |  | 0 | $et->WarnOnce('Missing Encrypt StdCF entry'); | 
| 1251 | 0 |  |  |  |  | 0 | return 0; | 
| 1252 |  |  |  |  |  |  | } | 
| 1253 |  |  |  |  |  |  | # decrypt the stream manually because we want to: | 
| 1254 |  |  |  |  |  |  | # 1) ignore $cryptStream (StmF) setting | 
| 1255 |  |  |  |  |  |  | # 2) ignore EncryptMetadata setting (I can't find mention of how to | 
| 1256 |  |  |  |  |  |  | #    reconcile this in the spec., but this would make sense) | 
| 1257 |  |  |  |  |  |  | # 3) avoid adding the crypt key extension (ref 3, page 58, Algorithm 1b) | 
| 1258 |  |  |  |  |  |  | # 4) set _decrypted flag so we will recrypt according to StmF when | 
| 1259 |  |  |  |  |  |  | #    writing (since we don't yet write Filter'd streams) | 
| 1260 | 0 |  |  |  |  | 0 | Crypt(\$$dict{_stream}, 'none'); | 
| 1261 | 0 | 0 |  |  |  | 0 | $$dict{_decrypted} = ($cryptStream ? 1 : 0); | 
| 1262 |  |  |  |  |  |  |  | 
| 1263 |  |  |  |  |  |  | } elsif ($filter eq '/LZWDecode') { | 
| 1264 |  |  |  |  |  |  |  | 
| 1265 |  |  |  |  |  |  | # make sure we don't have any unsupported decoding parameters | 
| 1266 | 0 | 0 |  |  |  | 0 | if (ref $decodeParms eq 'HASH') { | 
| 1267 | 0 | 0 |  |  |  | 0 | if ($$decodeParms{Predictor}) { | 
|  |  | 0 |  |  |  |  |  | 
| 1268 | 0 |  |  |  |  | 0 | $et->WarnOnce("LZWDecode Predictor $$decodeParms{Predictor} currently not supported"); | 
| 1269 | 0 |  |  |  |  | 0 | return 0; | 
| 1270 |  |  |  |  |  |  | } elsif ($$decodeParms{EarlyChange}) { | 
| 1271 | 0 |  |  |  |  | 0 | $et->WarnOnce("LZWDecode EarlyChange currently not supported"); | 
| 1272 | 0 |  |  |  |  | 0 | return 0; | 
| 1273 |  |  |  |  |  |  | } | 
| 1274 |  |  |  |  |  |  | } | 
| 1275 | 0 | 0 |  |  |  | 0 | unless (DecodeLZW(\$$dict{_stream})) { | 
| 1276 | 0 |  |  |  |  | 0 | $et->WarnOnce('LZW decompress error'); | 
| 1277 | 0 |  |  |  |  | 0 | return 0; | 
| 1278 |  |  |  |  |  |  | } | 
| 1279 |  |  |  |  |  |  |  | 
| 1280 |  |  |  |  |  |  | } elsif ($filter eq '/ASCIIHexDecode') { | 
| 1281 |  |  |  |  |  |  |  | 
| 1282 | 0 |  |  |  |  | 0 | $$dict{_stream} =~ s/>.*//; # truncate at '>' (end of data mark) | 
| 1283 | 0 |  |  |  |  | 0 | $$dict{_stream} =~ tr/0-9a-zA-Z//d; # remove illegal characters | 
| 1284 | 0 |  |  |  |  | 0 | $$dict{_stream} = pack 'H*', $$dict{_stream}; | 
| 1285 |  |  |  |  |  |  |  | 
| 1286 |  |  |  |  |  |  | } elsif ($filter eq '/ASCII85Decode') { | 
| 1287 |  |  |  |  |  |  |  | 
| 1288 | 0 |  |  |  |  | 0 | my ($err, @out, $i); | 
| 1289 | 0 |  |  |  |  | 0 | my ($n, $val) = (0, 0); | 
| 1290 | 0 |  |  |  |  | 0 | foreach (split //, $$dict{_stream}) { | 
| 1291 | 0 | 0 | 0 |  |  | 0 | if ($_ ge '!' and $_ le 'u') {; | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 1292 | 0 |  |  |  |  | 0 | $val = 85 * $val + ord($_) - 33; | 
| 1293 | 0 | 0 |  |  |  | 0 | next unless ++$n == 5; | 
| 1294 |  |  |  |  |  |  | } elsif ($_ eq '~') { | 
| 1295 | 0 | 0 |  |  |  | 0 | $n == 1 and $err = 1;   # error to have a single char in the last group of 5 | 
| 1296 | 0 |  |  |  |  | 0 | for ($i=$n; $i<5; ++$i) { $val *= 85; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 1297 |  |  |  |  |  |  | } elsif ($_ eq 'z') { | 
| 1298 | 0 | 0 |  |  |  | 0 | $n and $err = 2, last;  # error if 'z' isn't the first char | 
| 1299 | 0 |  |  |  |  | 0 | $n = 5; | 
| 1300 |  |  |  |  |  |  | } else { | 
| 1301 | 0 | 0 |  |  |  | 0 | next if /^\s$/;         # ignore white space | 
| 1302 | 0 |  |  |  |  | 0 | $err = 3, last;         # any other character is an error | 
| 1303 |  |  |  |  |  |  | } | 
| 1304 | 0 |  |  |  |  | 0 | $val = unpack('V', pack('N', $val)); # reverse byte order | 
| 1305 | 0 |  |  |  |  | 0 | while (--$n > 0) { | 
| 1306 | 0 |  |  |  |  | 0 | push @out, $val & 0xff; | 
| 1307 | 0 |  |  |  |  | 0 | $val >>= 8; | 
| 1308 |  |  |  |  |  |  | } | 
| 1309 | 0 | 0 |  |  |  | 0 | last if $_ eq '~'; | 
| 1310 |  |  |  |  |  |  | # (both $n and $val are zero again now) | 
| 1311 |  |  |  |  |  |  | } | 
| 1312 | 0 | 0 |  |  |  | 0 | $err and $et->WarnOnce("ASCII85Decode error $err"); | 
| 1313 | 0 |  |  |  |  | 0 | $$dict{_stream} = pack('C*', @out); | 
| 1314 |  |  |  |  |  |  | } | 
| 1315 |  |  |  |  |  |  | } | 
| 1316 | 0 |  |  |  |  | 0 | return 1; | 
| 1317 |  |  |  |  |  |  | } | 
| 1318 |  |  |  |  |  |  |  | 
| 1319 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1320 |  |  |  |  |  |  | # Initialize state for RC4 en/decryption (ref 2) | 
| 1321 |  |  |  |  |  |  | # Inputs: 0) RC4 key string | 
| 1322 |  |  |  |  |  |  | # Returns: RC4 key hash reference | 
| 1323 |  |  |  |  |  |  | sub RC4Init($) | 
| 1324 |  |  |  |  |  |  | { | 
| 1325 | 22 |  |  | 22 | 0 | 62 | my @key = unpack('C*', shift); | 
| 1326 | 22 |  |  |  |  | 223 | my @state = (0 .. 255); | 
| 1327 | 22 |  |  |  |  | 44 | my ($i, $j) = (0, 0); | 
| 1328 | 22 |  |  |  |  | 49 | while ($i < 256) { | 
| 1329 | 5632 |  |  |  |  | 7757 | my $st = $state[$i]; | 
| 1330 | 5632 |  |  |  |  | 8261 | $j = ($j + $st + $key[$i % scalar(@key)]) & 0xff; | 
| 1331 | 5632 |  |  |  |  | 7705 | $state[$i++] = $state[$j]; | 
| 1332 | 5632 |  |  |  |  | 9677 | $state[$j] = $st; | 
| 1333 |  |  |  |  |  |  | } | 
| 1334 | 22 |  |  |  |  | 196 | return { State => \@state, XY => [ 0, 0 ] }; | 
| 1335 |  |  |  |  |  |  | } | 
| 1336 |  |  |  |  |  |  |  | 
| 1337 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1338 |  |  |  |  |  |  | # Apply RC4 en/decryption (ref 2) | 
| 1339 |  |  |  |  |  |  | # Inputs: 0) data reference, 1) RC4 key hash reference or RC4 key string | 
| 1340 |  |  |  |  |  |  | # - can call this method directly with a key string, or with with the key | 
| 1341 |  |  |  |  |  |  | #   reference returned by RC4Init | 
| 1342 |  |  |  |  |  |  | # - RC4 is a symmetric algorithm, so encryption is the same as decryption | 
| 1343 |  |  |  |  |  |  | sub RC4Crypt($$) | 
| 1344 |  |  |  |  |  |  | { | 
| 1345 | 22 |  |  | 22 | 0 | 46 | my ($dataPt, $key) = @_; | 
| 1346 | 22 | 50 |  |  |  | 68 | $key = RC4Init($key) unless ref $key eq 'HASH'; | 
| 1347 | 22 |  |  |  |  | 54 | my $state = $$key{State}; | 
| 1348 | 22 |  |  |  |  | 39 | my ($x, $y) = @{$$key{XY}}; | 
|  | 22 |  |  |  |  | 51 |  | 
| 1349 |  |  |  |  |  |  |  | 
| 1350 | 22 |  |  |  |  | 69 | my @data = unpack('C*', $$dataPt); | 
| 1351 | 22 |  |  |  |  | 48 | foreach (@data) { | 
| 1352 | 356 |  |  |  |  | 462 | $x = ($x + 1) & 0xff; | 
| 1353 | 356 |  |  |  |  | 470 | my $stx = $$state[$x]; | 
| 1354 | 356 |  |  |  |  | 461 | $y = ($stx + $y) & 0xff; | 
| 1355 | 356 |  |  |  |  | 518 | my $sty = $$state[$x] = $$state[$y]; | 
| 1356 | 356 |  |  |  |  | 455 | $$state[$y] = $stx; | 
| 1357 | 356 |  |  |  |  | 590 | $_ ^= $$state[($stx + $sty) & 0xff]; | 
| 1358 |  |  |  |  |  |  | } | 
| 1359 | 22 |  |  |  |  | 74 | $$key{XY} = [ $x, $y ]; | 
| 1360 | 22 |  |  |  |  | 198 | $$dataPt = pack('C*', @data); | 
| 1361 |  |  |  |  |  |  | } | 
| 1362 |  |  |  |  |  |  |  | 
| 1363 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1364 |  |  |  |  |  |  | # Update AES cipher with a bit of data | 
| 1365 |  |  |  |  |  |  | # Inputs: 0) data | 
| 1366 |  |  |  |  |  |  | # Returns: encrypted data | 
| 1367 |  |  |  |  |  |  | my $cipherMore; | 
| 1368 |  |  |  |  |  |  | sub CipherUpdate($) | 
| 1369 |  |  |  |  |  |  | { | 
| 1370 | 0 |  |  | 0 | 0 | 0 | my $dat = shift; | 
| 1371 | 0 |  |  |  |  | 0 | my $pos = 0; | 
| 1372 | 0 | 0 |  |  |  | 0 | $dat = $cipherMore . $dat if length $dat; | 
| 1373 | 0 |  |  |  |  | 0 | while ($pos + 16 <= length($dat)) { | 
| 1374 | 0 |  |  |  |  | 0 | substr($dat,$pos,16) = Image::ExifTool::AES::Cipher(substr($dat,$pos,16)); | 
| 1375 | 0 |  |  |  |  | 0 | $pos += 16; | 
| 1376 |  |  |  |  |  |  | } | 
| 1377 | 0 | 0 |  |  |  | 0 | if ($pos < length $dat) { | 
| 1378 | 0 |  |  |  |  | 0 | $cipherMore = substr($dat,$pos); | 
| 1379 | 0 |  |  |  |  | 0 | $dat = substr($dat,0,$pos); | 
| 1380 |  |  |  |  |  |  | } else { | 
| 1381 | 0 |  |  |  |  | 0 | $cipherMore = ''; | 
| 1382 |  |  |  |  |  |  | } | 
| 1383 | 0 |  |  |  |  | 0 | return $dat; | 
| 1384 |  |  |  |  |  |  | } | 
| 1385 |  |  |  |  |  |  |  | 
| 1386 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1387 |  |  |  |  |  |  | # Get encrypted hash | 
| 1388 |  |  |  |  |  |  | # Inputs: 0) Password, 1) salt, 2) vector, 3) encryption revision | 
| 1389 |  |  |  |  |  |  | # Returns: hash | 
| 1390 |  |  |  |  |  |  | sub GetHash($$$$) | 
| 1391 |  |  |  |  |  |  | { | 
| 1392 | 6 |  |  | 6 | 0 | 28 | my ($password, $salt, $vector, $rev) = @_; | 
| 1393 |  |  |  |  |  |  |  | 
| 1394 |  |  |  |  |  |  | # return Rev 5 hash | 
| 1395 | 6 | 50 |  |  |  | 65 | return Digest::SHA::sha256($password, $salt, $vector) if $rev == 5; | 
| 1396 |  |  |  |  |  |  |  | 
| 1397 |  |  |  |  |  |  | # compute Rev 6 hardened hash | 
| 1398 |  |  |  |  |  |  | # (ref http://code.google.com/p/origami-pdf/source/browse/lib/origami/encryption.rb) | 
| 1399 | 0 |  |  |  |  | 0 | my $blockSize = 32; | 
| 1400 | 0 |  |  |  |  | 0 | my $input = Digest::SHA::sha256($password, $salt, $vector) . ("\0" x 32); | 
| 1401 | 0 |  |  |  |  | 0 | my $key = substr($input, 0, 16); | 
| 1402 | 0 |  |  |  |  | 0 | my $iv = substr($input, 16, 16); | 
| 1403 | 0 |  |  |  |  | 0 | my $h; | 
| 1404 | 0 |  |  |  |  | 0 | my $x = ''; | 
| 1405 | 0 |  |  |  |  | 0 | my $i = 0; | 
| 1406 | 0 |  | 0 |  |  | 0 | while ($i < 64 or $i < ord(substr($x,-1,1))+32) { | 
| 1407 |  |  |  |  |  |  |  | 
| 1408 | 0 |  |  |  |  | 0 | my $block = substr($input, 0, $blockSize); | 
| 1409 | 0 |  |  |  |  | 0 | $x = ''; | 
| 1410 | 0 |  |  |  |  | 0 | Image::ExifTool::AES::Crypt(\$x, $key, $iv, 1); | 
| 1411 | 0 |  |  |  |  | 0 | $cipherMore = ''; | 
| 1412 |  |  |  |  |  |  |  | 
| 1413 | 0 |  |  |  |  | 0 | my ($j, $digest); | 
| 1414 | 0 |  |  |  |  | 0 | for ($j=0; $j<64; ++$j)  { | 
| 1415 | 0 |  |  |  |  | 0 | $x = ''; | 
| 1416 | 0 | 0 |  |  |  | 0 | $x .= CipherUpdate($password) if length $password; | 
| 1417 | 0 |  |  |  |  | 0 | $x .= CipherUpdate($block); | 
| 1418 | 0 | 0 |  |  |  | 0 | $x .= CipherUpdate($vector) if length $vector; | 
| 1419 | 0 | 0 |  |  |  | 0 | if ($j == 0) { | 
| 1420 | 0 |  |  |  |  | 0 | my @a = unpack('C16', $x); | 
| 1421 | 0 |  |  |  |  | 0 | my $sum = 0; | 
| 1422 | 0 |  |  |  |  | 0 | $sum += $_ foreach @a; | 
| 1423 |  |  |  |  |  |  | # set SHA block size (32, 48 or 64 bytes = SHA-256, 384 or 512) | 
| 1424 | 0 |  |  |  |  | 0 | $blockSize = 32 + ($sum % 3) * 16; | 
| 1425 | 0 |  |  |  |  | 0 | $digest = Digest::SHA->new($blockSize * 8); | 
| 1426 |  |  |  |  |  |  | } | 
| 1427 | 0 |  |  |  |  | 0 | $digest->add($x); | 
| 1428 |  |  |  |  |  |  | } | 
| 1429 |  |  |  |  |  |  |  | 
| 1430 | 0 |  |  |  |  | 0 | $h = $digest->digest(); | 
| 1431 | 0 |  |  |  |  | 0 | $key = substr($h, 0, 16); | 
| 1432 | 0 |  |  |  |  | 0 | substr($input,0,16) = $h; | 
| 1433 | 0 |  |  |  |  | 0 | $iv = substr($h, 16, 16); | 
| 1434 | 0 |  |  |  |  | 0 | ++$i; | 
| 1435 |  |  |  |  |  |  | } | 
| 1436 | 0 |  |  |  |  | 0 | return substr($h, 0, 32); | 
| 1437 |  |  |  |  |  |  | } | 
| 1438 |  |  |  |  |  |  |  | 
| 1439 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1440 |  |  |  |  |  |  | # Initialize decryption | 
| 1441 |  |  |  |  |  |  | # Inputs: 0) ExifTool object reference, 1) Encrypt dictionary reference, | 
| 1442 |  |  |  |  |  |  | #         2) ID from file trailer dictionary | 
| 1443 |  |  |  |  |  |  | # Returns: error string or undef on success (and sets $cryptInfo) | 
| 1444 |  |  |  |  |  |  | sub DecryptInit($$$) | 
| 1445 |  |  |  |  |  |  | { | 
| 1446 | 4 |  |  | 4 | 0 | 31 | local $_; | 
| 1447 | 4 |  |  |  |  | 15 | my ($et, $encrypt, $id) = @_; | 
| 1448 |  |  |  |  |  |  |  | 
| 1449 | 4 |  |  |  |  | 11 | undef $cryptInfo; | 
| 1450 | 4 | 50 | 33 |  |  | 29 | unless ($encrypt and ref $encrypt eq 'HASH') { | 
| 1451 | 0 |  |  |  |  | 0 | return 'Error loading Encrypt object'; | 
| 1452 |  |  |  |  |  |  | } | 
| 1453 | 4 |  |  |  |  | 23 | my $filt = $$encrypt{Filter}; | 
| 1454 | 4 | 50 | 33 |  |  | 39 | unless ($filt and $filt =~ s/^\///) { | 
| 1455 | 0 |  |  |  |  | 0 | return 'Encrypt dictionary has no Filter!'; | 
| 1456 |  |  |  |  |  |  | } | 
| 1457 |  |  |  |  |  |  | # extract some interesting tags | 
| 1458 | 4 |  | 50 |  |  | 18 | my $ver = $$encrypt{V} || 0; | 
| 1459 | 4 |  | 100 |  |  | 18 | my $rev = $$encrypt{R} || 0; | 
| 1460 | 4 |  |  |  |  | 14 | my $enc = "$filt V$ver"; | 
| 1461 | 4 | 50 |  |  |  | 21 | $enc .= ".$rev" if $filt eq 'Standard'; | 
| 1462 | 4 | 50 | 33 |  |  | 18 | $enc .= " ($1)" if $$encrypt{SubFilter} and $$encrypt{SubFilter} =~ /^\/(.*)/; | 
| 1463 | 4 | 50 | 100 |  |  | 30 | $enc .= ' (' . ($$encrypt{Length} || 40) . '-bit)' if $filt eq 'Standard'; | 
| 1464 | 4 |  |  |  |  | 19 | my $tagTablePtr = GetTagTable('Image::ExifTool::PDF::Encrypt'); | 
| 1465 | 4 |  |  |  |  | 31 | $et->HandleTag($tagTablePtr, 'Filter', $enc); | 
| 1466 | 4 | 50 |  |  |  | 24 | if ($filt ne 'Standard') { | 
|  |  | 50 |  |  |  |  |  | 
| 1467 | 0 |  |  |  |  | 0 | return "Encryption filter $filt currently not supported"; | 
| 1468 |  |  |  |  |  |  | } elsif (not defined $$encrypt{R}) { | 
| 1469 | 0 |  |  |  |  | 0 | return 'Standard security handler missing revision'; | 
| 1470 |  |  |  |  |  |  | } | 
| 1471 | 4 | 50 | 33 |  |  | 60 | unless ($$encrypt{O} and $$encrypt{P} and $$encrypt{U}) { | 
|  |  |  | 33 |  |  |  |  | 
| 1472 | 0 |  |  |  |  | 0 | return 'Incomplete Encrypt specification'; | 
| 1473 |  |  |  |  |  |  | } | 
| 1474 | 4 | 50 |  |  |  | 29 | if ("$ver.$rev" >= 5.6) { | 
| 1475 |  |  |  |  |  |  | # apologize for poor performance (AES is a pure Perl implementation) | 
| 1476 | 0 |  |  |  |  | 0 | $et->Warn('Decryption is very slow for encryption V5.6 or higher', 3); | 
| 1477 |  |  |  |  |  |  | } | 
| 1478 | 4 |  |  |  |  | 18 | $et->HandleTag($tagTablePtr, 'P', $$encrypt{P}); | 
| 1479 |  |  |  |  |  |  |  | 
| 1480 | 4 |  |  |  |  | 8 | my %parm;   # optional parameters extracted from Encrypt dictionary | 
| 1481 |  |  |  |  |  |  |  | 
| 1482 | 4 | 100 | 66 |  |  | 55 | if ($ver == 1 or $ver == 2) { | 
|  |  | 50 | 66 |  |  |  |  | 
| 1483 | 1 |  |  |  |  | 4 | $cryptString = $cryptStream = 1; | 
| 1484 |  |  |  |  |  |  | } elsif ($ver == 4 or $ver == 5) { | 
| 1485 |  |  |  |  |  |  | # initialize our $cryptString and $cryptStream flags | 
| 1486 | 3 |  |  |  |  | 9 | foreach ('StrF', 'StmF') { | 
| 1487 | 6 | 100 |  |  |  | 16 | my $flagPt = $_ eq 'StrF' ? \$cryptString : \$cryptStream; | 
| 1488 | 6 |  |  |  |  | 16 | $$flagPt = $$encrypt{$_}; | 
| 1489 | 6 | 50 | 33 |  |  | 25 | undef $$flagPt if $$flagPt and $$flagPt eq '/Identity'; | 
| 1490 | 6 | 50 | 33 |  |  | 58 | return "Unsupported $_ encryption $$flagPt" if $$flagPt and $$flagPt ne '/StdCF'; | 
| 1491 |  |  |  |  |  |  | } | 
| 1492 | 3 | 50 | 33 |  |  | 18 | if ($cryptString or $cryptStream) { | 
| 1493 |  |  |  |  |  |  | return 'Missing or invalid Encrypt StdCF entry' unless ref $$encrypt{CF} eq 'HASH' and | 
| 1494 | 3 | 50 | 33 |  |  | 50 | ref $$encrypt{CF}{StdCF} eq 'HASH' and $$encrypt{CF}{StdCF}{CFM}; | 
|  |  |  | 33 |  |  |  |  | 
| 1495 | 3 |  |  |  |  | 9 | my $cryptMeth = $$encrypt{CF}{StdCF}{CFM}; | 
| 1496 | 3 | 50 |  |  |  | 22 | unless ($cryptMeth =~ /^\/(V2|AESV2|AESV3)$/) { | 
| 1497 | 0 |  |  |  |  | 0 | return "Unsupported encryption method $cryptMeth"; | 
| 1498 |  |  |  |  |  |  | } | 
| 1499 |  |  |  |  |  |  | # set "_aesv2" or "_aesv3" flag in %$encrypt hash if AES encryption was used | 
| 1500 | 3 | 50 |  |  |  | 28 | $$encrypt{'_' . lc($1)} = 1 if $cryptMeth =~ /^\/(AESV2|AESV3)$/; | 
| 1501 |  |  |  |  |  |  | } | 
| 1502 | 3 | 100 |  |  |  | 10 | if ($ver == 5) { | 
| 1503 |  |  |  |  |  |  | # validate OE and UE entries | 
| 1504 | 2 |  |  |  |  | 5 | foreach ('OE', 'UE') { | 
| 1505 | 4 | 50 |  |  |  | 14 | return "Missing Encrypt $_ entry" unless $$encrypt{$_}; | 
| 1506 | 4 |  |  |  |  | 13 | $parm{$_} = ReadPDFValue($$encrypt{$_}); | 
| 1507 | 4 | 50 |  |  |  | 18 | return "Invalid Encrypt $_ entry" unless length $parm{$_} == 32; | 
| 1508 |  |  |  |  |  |  | } | 
| 1509 | 2 |  |  |  |  | 24 | require Image::ExifTool::AES;   # will need this later | 
| 1510 |  |  |  |  |  |  | } | 
| 1511 |  |  |  |  |  |  | } else { | 
| 1512 | 0 |  |  |  |  | 0 | return "Encryption version $ver currently not supported"; | 
| 1513 |  |  |  |  |  |  | } | 
| 1514 | 4 | 50 |  |  |  | 12 | $id or return "Can't decrypt (no document ID)"; | 
| 1515 |  |  |  |  |  |  |  | 
| 1516 |  |  |  |  |  |  | # make sure we have the necessary libraries available | 
| 1517 | 4 | 100 |  |  |  | 13 | if ($ver < 5) { | 
| 1518 | 2 | 50 |  |  |  | 6 | unless (eval { require Digest::MD5 }) { | 
|  | 2 |  |  |  |  | 28 |  | 
| 1519 | 0 |  |  |  |  | 0 | return "Install Digest::MD5 to process encrypted PDF"; | 
| 1520 |  |  |  |  |  |  | } | 
| 1521 |  |  |  |  |  |  | } else { | 
| 1522 | 2 | 50 |  |  |  | 5 | unless (eval { require Digest::SHA }) { | 
|  | 2 |  |  |  |  | 15 |  | 
| 1523 | 0 |  |  |  |  | 0 | return "Install Digest::SHA to process AES-256 encrypted PDF"; | 
| 1524 |  |  |  |  |  |  | } | 
| 1525 |  |  |  |  |  |  | } | 
| 1526 |  |  |  |  |  |  |  | 
| 1527 |  |  |  |  |  |  | # calculate file-level en/decryption key | 
| 1528 | 4 |  |  |  |  | 13 | my $pad = "\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08". | 
| 1529 |  |  |  |  |  |  | "\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A"; | 
| 1530 | 4 |  |  |  |  | 15 | my $o = ReadPDFValue($$encrypt{O}); | 
| 1531 | 4 |  |  |  |  | 10 | my $u = ReadPDFValue($$encrypt{U}); | 
| 1532 |  |  |  |  |  |  |  | 
| 1533 |  |  |  |  |  |  | # set flag indicating whether metadata is encrypted | 
| 1534 |  |  |  |  |  |  | # (in version 4 and higher, metadata streams may not be encrypted) | 
| 1535 | 4 | 100 | 100 |  |  | 47 | if ($ver < 4 or not $$encrypt{EncryptMetadata} or $$encrypt{EncryptMetadata} !~ /false/i) { | 
|  |  |  | 66 |  |  |  |  | 
| 1536 | 3 |  |  |  |  | 12 | $$encrypt{_meta} = 1; | 
| 1537 |  |  |  |  |  |  | } | 
| 1538 |  |  |  |  |  |  | # try no password first, then try provided password if available | 
| 1539 | 4 |  |  |  |  | 9 | my ($try, $key); | 
| 1540 | 4 |  |  |  |  | 10 | for ($try=0; ; ++$try) { | 
| 1541 | 5 |  |  |  |  | 10 | my $password; | 
| 1542 | 5 | 100 |  |  |  | 18 | if ($try == 0) { | 
|  |  | 50 |  |  |  |  |  | 
| 1543 | 4 |  |  |  |  | 10 | $password = ''; | 
| 1544 |  |  |  |  |  |  | } elsif ($try == 1) { | 
| 1545 | 1 |  |  |  |  | 6 | $password = $et->Options('Password'); | 
| 1546 | 1 | 50 |  |  |  | 4 | return 'Document is password protected (use Password option)' unless defined $password; | 
| 1547 |  |  |  |  |  |  | # make sure there is no UTF-8 flag on the password | 
| 1548 | 1 | 50 | 33 |  |  | 6 | if ($] >= 5.006 and (eval { require Encode; Encode::is_utf8($password) } or $@)) { | 
|  |  |  | 33 |  |  |  |  | 
| 1549 |  |  |  |  |  |  | # repack by hand if Encode isn't available | 
| 1550 | 0 | 0 |  |  |  | 0 | $password = $@ ? pack('C*',unpack($] < 5.010000 ? 'U0C*' : 'C0C*',$password)) : Encode::encode('utf8',$password); | 
|  |  | 0 |  |  |  |  |  | 
| 1551 |  |  |  |  |  |  | } | 
| 1552 |  |  |  |  |  |  | } else { | 
| 1553 | 0 |  |  |  |  | 0 | return 'Incorrect password'; | 
| 1554 |  |  |  |  |  |  | } | 
| 1555 | 5 | 100 |  |  |  | 14 | if ($ver < 5) { | 
| 1556 | 2 | 50 |  |  |  | 7 | if (length $password) { | 
| 1557 |  |  |  |  |  |  | # password must be encoding in PDFDocEncoding (ref iso32000) | 
| 1558 | 0 |  |  |  |  | 0 | $password = $et->Encode($password, 'PDFDoc'); | 
| 1559 |  |  |  |  |  |  | # truncate or pad the password to exactly 32 bytes | 
| 1560 | 0 | 0 |  |  |  | 0 | if (length($password) > 32) { | 
|  |  | 0 |  |  |  |  |  | 
| 1561 | 0 |  |  |  |  | 0 | $password = substr($password, 0, 32); | 
| 1562 |  |  |  |  |  |  | } elsif (length($password) < 32) { | 
| 1563 | 0 |  |  |  |  | 0 | $password .= substr($pad, 0, 32-length($password)); | 
| 1564 |  |  |  |  |  |  | } | 
| 1565 |  |  |  |  |  |  | } else { | 
| 1566 | 2 |  |  |  |  | 5 | $password = $pad; | 
| 1567 |  |  |  |  |  |  | } | 
| 1568 | 2 |  |  |  |  | 14 | $key = $password . $o . pack('V', $$encrypt{P}) . $id; | 
| 1569 | 2 |  |  |  |  | 8 | my $rep = 1; | 
| 1570 | 2 | 100 | 66 |  |  | 15 | if ($rev == 3 or $rev == 4) { | 
| 1571 |  |  |  |  |  |  | # must add this if metadata not encrypted | 
| 1572 | 1 | 50 |  |  |  | 7 | $key .= "\xff\xff\xff\xff" unless $$encrypt{_meta}; | 
| 1573 | 1 |  |  |  |  | 6 | $rep += 50; # repeat MD5 50 more times if revision is 3 or greater | 
| 1574 |  |  |  |  |  |  | } | 
| 1575 | 2 |  |  |  |  | 6 | my ($len, $i, $dat); | 
| 1576 | 2 | 100 |  |  |  | 8 | if ($ver == 1) { | 
| 1577 | 1 |  |  |  |  | 2 | $len = 5; | 
| 1578 |  |  |  |  |  |  | } else { | 
| 1579 | 1 |  | 50 |  |  | 5 | $len = $$encrypt{Length} || 40; | 
| 1580 | 1 | 50 |  |  |  | 5 | $len >= 40 or return 'Bad Encrypt Length'; | 
| 1581 | 1 |  |  |  |  | 5 | $len = int($len / 8); | 
| 1582 |  |  |  |  |  |  | } | 
| 1583 | 2 |  |  |  |  | 10 | for ($i=0; $i<$rep; ++$i) { | 
| 1584 | 52 |  |  |  |  | 149 | $key = substr(Digest::MD5::md5($key), 0, $len); | 
| 1585 |  |  |  |  |  |  | } | 
| 1586 |  |  |  |  |  |  | # decrypt U to see if a user password is required | 
| 1587 | 2 | 100 |  |  |  | 34 | if ($rev >= 3) { | 
| 1588 | 1 |  |  |  |  | 6 | $dat = Digest::MD5::md5($pad . $id); | 
| 1589 | 1 |  |  |  |  | 12 | RC4Crypt(\$dat, $key); | 
| 1590 | 1 |  |  |  |  | 9 | for ($i=1; $i<=19; ++$i) { | 
| 1591 | 19 |  |  |  |  | 49 | my @key = unpack('C*', $key); | 
| 1592 | 19 |  |  |  |  | 37 | foreach (@key) { $_ ^= $i; } | 
|  | 304 |  |  |  |  | 383 |  | 
| 1593 | 19 |  |  |  |  | 72 | RC4Crypt(\$dat, pack('C*', @key)); | 
| 1594 |  |  |  |  |  |  | } | 
| 1595 | 1 |  |  |  |  | 10 | $dat .= substr($u, 16); | 
| 1596 |  |  |  |  |  |  | } else { | 
| 1597 | 1 |  |  |  |  | 3 | $dat = $pad; | 
| 1598 | 1 |  |  |  |  | 7 | RC4Crypt(\$dat, $key); | 
| 1599 |  |  |  |  |  |  | } | 
| 1600 | 2 | 50 |  |  |  | 14 | last if $dat eq $u; # all done if this was the correct key | 
| 1601 |  |  |  |  |  |  | } else { | 
| 1602 | 3 | 50 | 33 |  |  | 14 | return 'Invalid O or U Encrypt entries' if length($o) < 48 or length($u) < 48; | 
| 1603 | 3 | 100 |  |  |  | 13 | if (length $password) { | 
| 1604 |  |  |  |  |  |  | # Note: this should be good for passwords containing reasonable characters, | 
| 1605 |  |  |  |  |  |  | # but to be bullet-proof we need to apply the SASLprep (IETF RFC 4013) profile | 
| 1606 |  |  |  |  |  |  | # of stringprep (IETF RFC 3454) to the password before encoding in UTF-8 | 
| 1607 | 1 |  |  |  |  | 6 | $password = $et->Encode($password, 'UTF8'); | 
| 1608 | 1 | 50 |  |  |  | 6 | $password = substr($password, 0, 127) if length($password) > 127; | 
| 1609 |  |  |  |  |  |  | } | 
| 1610 |  |  |  |  |  |  | # test for the owner password | 
| 1611 | 3 |  |  |  |  | 17 | my $sha = GetHash($password, substr($o,32,8), substr($u,0,48), $rev); | 
| 1612 | 3 | 100 |  |  |  | 12 | if ($sha eq substr($o, 0, 32)) { | 
| 1613 | 2 |  |  |  |  | 8 | $key = GetHash($password, substr($o,40,8), substr($u,0,48), $rev); | 
| 1614 | 2 |  |  |  |  | 14 | my $dat = ("\0" x 16) . $parm{OE}; | 
| 1615 |  |  |  |  |  |  | # decrypt with no padding | 
| 1616 | 2 |  |  |  |  | 10 | my $err = Image::ExifTool::AES::Crypt(\$dat, $key, 0, 1); | 
| 1617 | 2 | 50 |  |  |  | 14 | return $err if $err; | 
| 1618 | 2 |  |  |  |  | 9 | $key = $dat;    # use this as the file decryption key | 
| 1619 | 2 |  |  |  |  | 10 | last; | 
| 1620 |  |  |  |  |  |  | } | 
| 1621 |  |  |  |  |  |  | # test for the user password | 
| 1622 | 1 |  |  |  |  | 7 | $sha = GetHash($password, substr($u,32,8), '', $rev); | 
| 1623 | 1 | 50 |  |  |  | 7 | if ($sha eq substr($u, 0, 32)) { | 
| 1624 | 0 |  |  |  |  | 0 | $key = GetHash($password, substr($u,40,8), '', $rev); | 
| 1625 | 0 |  |  |  |  | 0 | my $dat = ("\0" x 16) . $parm{UE}; | 
| 1626 | 0 |  |  |  |  | 0 | my $err = Image::ExifTool::AES::Crypt(\$dat, $key, 0, 1); | 
| 1627 | 0 | 0 |  |  |  | 0 | return $err if $err; | 
| 1628 | 0 |  |  |  |  | 0 | $key = $dat;    # use this as the file decryption key | 
| 1629 | 0 |  |  |  |  | 0 | last; | 
| 1630 |  |  |  |  |  |  | } | 
| 1631 |  |  |  |  |  |  | } | 
| 1632 |  |  |  |  |  |  | } | 
| 1633 | 4 |  |  |  |  | 20 | $$encrypt{_key} = $key; # save the file-level encryption key | 
| 1634 | 4 |  |  |  |  | 14 | $cryptInfo = $encrypt;  # save reference to the file-level Encrypt object | 
| 1635 | 4 |  |  |  |  | 33 | return undef;           # success! | 
| 1636 |  |  |  |  |  |  | } | 
| 1637 |  |  |  |  |  |  |  | 
| 1638 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1639 |  |  |  |  |  |  | # Decrypt/Encrypt data | 
| 1640 |  |  |  |  |  |  | # Inputs: 0) data ref | 
| 1641 |  |  |  |  |  |  | #         1) PDF object reference to use as crypt key extension (may be 'none' to | 
| 1642 |  |  |  |  |  |  | #            avoid extending the encryption key, as for streams with Crypt Filter) | 
| 1643 |  |  |  |  |  |  | #         2) encrypt flag (false for decryption) | 
| 1644 |  |  |  |  |  |  | sub Crypt($$;$) | 
| 1645 |  |  |  |  |  |  | { | 
| 1646 | 29 | 100 |  | 29 | 0 | 150 | return unless $cryptInfo; | 
| 1647 | 4 |  |  |  |  | 22 | my ($dataPt, $keyExt, $encrypt) = @_; | 
| 1648 |  |  |  |  |  |  | # do not decrypt if the key extension object is undefined | 
| 1649 |  |  |  |  |  |  | # (this doubles as a flag to disable decryption/encryption) | 
| 1650 | 4 | 50 |  |  |  | 16 | return unless defined $keyExt; | 
| 1651 | 4 |  |  |  |  | 12 | my $key = $$cryptInfo{_key}; | 
| 1652 |  |  |  |  |  |  | # apply the necessary crypt key extension | 
| 1653 | 4 | 100 |  |  |  | 21 | unless ($$cryptInfo{_aesv3}) { | 
| 1654 | 2 | 50 |  |  |  | 8 | unless ($keyExt eq 'none') { | 
| 1655 |  |  |  |  |  |  | # extend crypt key using object and generation number | 
| 1656 | 2 | 50 |  |  |  | 23 | unless ($keyExt =~ /^(I\d+ )?(\d+) (\d+)/) { | 
| 1657 | 0 |  |  |  |  | 0 | $$cryptInfo{_error} = 'Invalid object reference for encryption'; | 
| 1658 | 0 |  |  |  |  | 0 | return; | 
| 1659 |  |  |  |  |  |  | } | 
| 1660 | 2 |  |  |  |  | 19 | $key .= substr(pack('V', $2), 0, 3) . substr(pack('V', $3), 0, 2); | 
| 1661 |  |  |  |  |  |  | } | 
| 1662 |  |  |  |  |  |  | # add AES-128 salt if necessary (this little gem is conveniently | 
| 1663 |  |  |  |  |  |  | # omitted from the Adobe PDF 1.6 documentation, causing me to | 
| 1664 |  |  |  |  |  |  | # waste 12 hours trying to figure out why this wasn't working -- | 
| 1665 |  |  |  |  |  |  | # it appears in ISO32000 though, so I should have been using that) | 
| 1666 | 2 | 100 |  |  |  | 11 | $key .= 'sAlT' if $$cryptInfo{_aesv2}; | 
| 1667 | 2 |  |  |  |  | 8 | my $len = length($key); | 
| 1668 | 2 |  |  |  |  | 13 | $key = Digest::MD5::md5($key);              # get 16-byte MD5 digest | 
| 1669 | 2 | 100 |  |  |  | 12 | $key = substr($key, 0, $len) if $len < 16;  # trim if necessary | 
| 1670 |  |  |  |  |  |  | } | 
| 1671 |  |  |  |  |  |  | # perform the decryption/encryption | 
| 1672 | 4 | 100 | 100 |  |  | 28 | if ($$cryptInfo{_aesv2} or $$cryptInfo{_aesv3}) { | 
| 1673 | 3 |  |  |  |  | 47 | require Image::ExifTool::AES; | 
| 1674 | 3 |  |  |  |  | 18 | my $err = Image::ExifTool::AES::Crypt($dataPt, $key, $encrypt); | 
| 1675 | 3 | 50 |  |  |  | 22 | $err and $$cryptInfo{_error} = $err; | 
| 1676 |  |  |  |  |  |  | } else { | 
| 1677 | 1 |  |  |  |  | 3 | RC4Crypt($dataPt, $key); | 
| 1678 |  |  |  |  |  |  | } | 
| 1679 |  |  |  |  |  |  | } | 
| 1680 |  |  |  |  |  |  |  | 
| 1681 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1682 |  |  |  |  |  |  | # Decrypt/Encrypt stream data | 
| 1683 |  |  |  |  |  |  | # Inputs: 0) dictionary ref, 1) PDF object reference to use as crypt key extension | 
| 1684 |  |  |  |  |  |  | sub CryptStream($$) | 
| 1685 |  |  |  |  |  |  | { | 
| 1686 | 52 | 50 |  | 52 | 0 | 150 | return unless $cryptStream; | 
| 1687 | 0 |  |  |  |  | 0 | my ($dict, $keyExt) = @_; | 
| 1688 | 0 |  | 0 |  |  | 0 | my $type = $$dict{Type} || ''; | 
| 1689 |  |  |  |  |  |  | # XRef streams are not encrypted (ref 3, page 50), | 
| 1690 |  |  |  |  |  |  | # and Metadata may or may not be encrypted | 
| 1691 | 0 | 0 | 0 |  |  | 0 | if ($cryptInfo and $type ne '/XRef' and | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 1692 |  |  |  |  |  |  | ($$cryptInfo{_meta} or $type ne '/Metadata')) | 
| 1693 |  |  |  |  |  |  | { | 
| 1694 | 0 |  |  |  |  | 0 | Crypt(\$$dict{_stream}, $keyExt, $$dict{_decrypted}); | 
| 1695 |  |  |  |  |  |  | # toggle _decrypted flag | 
| 1696 | 0 | 0 |  |  |  | 0 | $$dict{_decrypted} = ($$dict{_decrypted} ? undef : 1); | 
| 1697 |  |  |  |  |  |  | } else { | 
| 1698 | 0 |  |  |  |  | 0 | $$dict{_decrypted} = 0; # stream should never be encrypted | 
| 1699 |  |  |  |  |  |  | } | 
| 1700 |  |  |  |  |  |  | } | 
| 1701 |  |  |  |  |  |  |  | 
| 1702 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1703 |  |  |  |  |  |  | # Generate a new PDF tag (based on its ID) and add it to a tag table | 
| 1704 |  |  |  |  |  |  | # Inputs: 0) tag table ref, 1) tag ID | 
| 1705 |  |  |  |  |  |  | # Returns: tag info ref | 
| 1706 |  |  |  |  |  |  | sub NewPDFTag($$) | 
| 1707 |  |  |  |  |  |  | { | 
| 1708 | 0 |  |  | 0 | 0 | 0 | my ($tagTablePtr, $tag) = @_; | 
| 1709 | 0 |  |  |  |  | 0 | my $name = $tag; | 
| 1710 |  |  |  |  |  |  | # translate URL-like escape sequences | 
| 1711 | 0 |  |  |  |  | 0 | $name =~ s/#([0-9a-f]{2})/chr(hex($1))/ige; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1712 | 0 |  |  |  |  | 0 | $name =~ s/[^-\w]+/_/g;         # translate invalid characters to an underline | 
| 1713 | 0 |  |  |  |  | 0 | $name =~ s/(^|_)([a-z])/\U$2/g; # start words with upper case | 
| 1714 | 0 |  |  |  |  | 0 | my $tagInfo = { Name => $name }; | 
| 1715 | 0 |  |  |  |  | 0 | AddTagToTable($tagTablePtr, $tag, $tagInfo); | 
| 1716 | 0 |  |  |  |  | 0 | return $tagInfo; | 
| 1717 |  |  |  |  |  |  | } | 
| 1718 |  |  |  |  |  |  |  | 
| 1719 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1720 |  |  |  |  |  |  | # Process AcroForm dictionary to set HasXMLFormsArchitecture flag | 
| 1721 |  |  |  |  |  |  | # Inputs: Same as ProcessDict | 
| 1722 |  |  |  |  |  |  | sub ProcessAcroForm($$$$;$$) | 
| 1723 |  |  |  |  |  |  | { | 
| 1724 | 0 |  |  | 0 | 0 | 0 | my ($et, $tagTablePtr, $dict, $xref, $nesting, $type) = @_; | 
| 1725 | 0 | 0 |  |  |  | 0 | $et->HandleTag($tagTablePtr, '_has_xfa', $$dict{XFA} ? 'true' : 'false'); | 
| 1726 | 0 |  |  |  |  | 0 | return ProcessDict($et, $tagTablePtr, $dict, $xref, $nesting, $type); | 
| 1727 |  |  |  |  |  |  | } | 
| 1728 |  |  |  |  |  |  |  | 
| 1729 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1730 |  |  |  |  |  |  | # Expand array into a string | 
| 1731 |  |  |  |  |  |  | # Inputs: 0) array ref | 
| 1732 |  |  |  |  |  |  | # Return: string | 
| 1733 |  |  |  |  |  |  | sub ExpandArray($) | 
| 1734 |  |  |  |  |  |  | { | 
| 1735 | 0 |  |  | 0 | 0 | 0 | my $val = shift; | 
| 1736 | 0 |  |  |  |  | 0 | my @list = @$val; | 
| 1737 | 0 |  |  |  |  | 0 | foreach (@list) { | 
| 1738 | 0 | 0 |  |  |  | 0 | ref $_ eq 'SCALAR' and $_ = "ref($$_)", next; | 
| 1739 | 0 | 0 |  |  |  | 0 | ref $_ eq 'ARRAY' and $_ = ExpandArray($_), next; | 
| 1740 | 0 | 0 |  |  |  | 0 | defined $_ or $_ = '', next; | 
| 1741 |  |  |  |  |  |  | } | 
| 1742 | 0 |  |  |  |  | 0 | return '[' . join(',',@list) . ']'; | 
| 1743 |  |  |  |  |  |  | } | 
| 1744 |  |  |  |  |  |  |  | 
| 1745 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 1746 |  |  |  |  |  |  | # Process PDF dictionary extract tag values | 
| 1747 |  |  |  |  |  |  | # Inputs: 0) ExifTool object reference, 1) tag table reference | 
| 1748 |  |  |  |  |  |  | #         2) dictionary reference, 3) cross-reference table reference, | 
| 1749 |  |  |  |  |  |  | #         4) nesting depth, 5) dictionary capture type | 
| 1750 |  |  |  |  |  |  | sub ProcessDict($$$$;$$) | 
| 1751 |  |  |  |  |  |  | { | 
| 1752 | 350 |  |  | 350 | 0 | 900 | my ($et, $tagTablePtr, $dict, $xref, $nesting, $type) = @_; | 
| 1753 | 350 |  |  |  |  | 1057 | my $verbose = $et->Options('Verbose'); | 
| 1754 | 350 |  |  |  |  | 848 | my $unknown = $$tagTablePtr{EXTRACT_UNKNOWN}; | 
| 1755 | 350 |  | 33 |  |  | 1238 | my $embedded = (defined $unknown and not $unknown and $et->Options('ExtractEmbedded')); | 
| 1756 | 350 |  |  |  |  | 516 | my @tags = @{$$dict{_tags}}; | 
|  | 350 |  |  |  |  | 1552 |  | 
| 1757 | 350 |  |  |  |  | 643 | my ($next, %join); | 
| 1758 | 350 |  |  |  |  | 515 | my $index = 0; | 
| 1759 |  |  |  |  |  |  |  | 
| 1760 | 350 |  | 100 |  |  | 865 | $nesting = ($nesting || 0) + 1; | 
| 1761 | 350 | 50 |  |  |  | 724 | if ($nesting > 50) { | 
| 1762 | 0 |  |  |  |  | 0 | $et->WarnOnce('Nesting too deep (directory ignored)'); | 
| 1763 | 0 |  |  |  |  | 0 | return; | 
| 1764 |  |  |  |  |  |  | } | 
| 1765 |  |  |  |  |  |  | # save entire dictionary for rewriting if specified | 
| 1766 | 350 | 50 | 100 |  |  | 1415 | if ($$et{PDF_CAPTURE} and $$tagTablePtr{VARS} and | 
|  |  |  | 66 |  |  |  |  | 
| 1767 |  |  |  |  |  |  | $tagTablePtr->{VARS}->{CAPTURE}) | 
| 1768 |  |  |  |  |  |  | { | 
| 1769 | 66 |  |  |  |  | 112 | my $name; | 
| 1770 | 66 |  |  |  |  | 115 | foreach $name (@{$tagTablePtr->{VARS}->{CAPTURE}}) { | 
|  | 66 |  |  |  |  | 229 |  | 
| 1771 | 82 | 100 |  |  |  | 254 | next if $$et{PDF_CAPTURE}{$name}; | 
| 1772 |  |  |  |  |  |  | # make sure we load the right type if indicated | 
| 1773 | 66 | 50 | 66 |  |  | 284 | next if $type and $type ne $name; | 
| 1774 | 66 |  |  |  |  | 172 | $$et{PDF_CAPTURE}{$name} = $dict; | 
| 1775 | 66 |  |  |  |  | 110 | last; | 
| 1776 |  |  |  |  |  |  | } | 
| 1777 |  |  |  |  |  |  | } | 
| 1778 |  |  |  |  |  |  | # | 
| 1779 |  |  |  |  |  |  | # extract information from all tags in the dictionary | 
| 1780 |  |  |  |  |  |  | # | 
| 1781 | 350 |  |  |  |  | 517 | for (;;) { | 
| 1782 | 1473 |  |  |  |  | 2160 | my ($tag, $isSubDoc); | 
| 1783 | 1473 | 100 | 33 |  |  | 3035 | if (@tags) { | 
|  |  | 50 |  |  |  |  |  | 
| 1784 | 1123 |  |  |  |  | 1944 | $tag = shift @tags; | 
| 1785 |  |  |  |  |  |  | } elsif (defined $next and not $next) { | 
| 1786 | 0 |  |  |  |  | 0 | $tag = 'Next'; | 
| 1787 | 0 |  |  |  |  | 0 | $next = 1; | 
| 1788 |  |  |  |  |  |  | } else { | 
| 1789 | 350 |  |  |  |  | 511 | last; | 
| 1790 |  |  |  |  |  |  | } | 
| 1791 | 1123 |  |  |  |  | 2441 | my $val = $$dict{$tag}; | 
| 1792 | 1123 |  |  |  |  | 2839 | my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag); | 
| 1793 | 1123 | 100 | 33 |  |  | 3175 | if ($tagInfo) { | 
|  |  | 50 | 33 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 1794 | 419 | 50 |  |  |  | 1079 | undef $tagInfo if $$tagInfo{NoProcess}; | 
| 1795 |  |  |  |  |  |  | } elsif ($embedded and $tag =~ /^(.*?)(\d+)$/ and | 
| 1796 |  |  |  |  |  |  | $$tagTablePtr{$1} and (ref $val ne 'SCALAR' or not $fetched{$$val})) | 
| 1797 |  |  |  |  |  |  | { | 
| 1798 | 0 |  |  |  |  | 0 | my ($name, $num) = ($1, $2); | 
| 1799 | 0 |  |  |  |  | 0 | $tagInfo = $et->GetTagInfo($tagTablePtr, $name); | 
| 1800 | 0 | 0 | 0 |  |  | 0 | if (ref $tagInfo eq 'HASH' and $$tagInfo{JoinStreams}) { | 
| 1801 | 0 |  |  |  |  | 0 | $fetched{$$val} = 1; | 
| 1802 | 0 |  |  |  |  | 0 | my $obj = FetchObject($et, $$val, $xref, $tag); | 
| 1803 | 0 | 0 |  |  |  | 0 | $join{$name} = [] unless $join{$name}; | 
| 1804 | 0 | 0 | 0 |  |  | 0 | next unless ref $obj eq 'HASH' and $$obj{_stream}; | 
| 1805 |  |  |  |  |  |  | # save all the stream data to join later | 
| 1806 | 0 |  |  |  |  | 0 | DecodeStream($et, $obj); | 
| 1807 | 0 |  |  |  |  | 0 | $join{$name}->[$num] = $$obj{_stream}; | 
| 1808 | 0 |  |  |  |  | 0 | undef $tagInfo;    # don't process | 
| 1809 |  |  |  |  |  |  | } else { | 
| 1810 | 0 |  |  |  |  | 0 | $isSubDoc = 1;  # treat as a sub-document | 
| 1811 |  |  |  |  |  |  | } | 
| 1812 |  |  |  |  |  |  | } | 
| 1813 | 1123 | 50 |  |  |  | 2147 | if ($verbose) { | 
| 1814 | 0 |  |  |  |  | 0 | my ($val2, $extra); | 
| 1815 | 0 | 0 |  |  |  | 0 | if (ref $val eq 'SCALAR') { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 1816 | 0 |  |  |  |  | 0 | $extra = ", indirect object ($$val)"; | 
| 1817 | 0 | 0 | 0 |  |  | 0 | if ($fetched{$$val}) { | 
|  |  | 0 |  |  |  |  |  | 
| 1818 | 0 |  |  |  |  | 0 | $val2 = "ref($$val)"; | 
| 1819 |  |  |  |  |  |  | } elsif ($tag eq 'Next' and not $next) { | 
| 1820 |  |  |  |  |  |  | # handle 'Next' links after all others | 
| 1821 | 0 |  |  |  |  | 0 | $next = 0; | 
| 1822 | 0 |  |  |  |  | 0 | next; | 
| 1823 |  |  |  |  |  |  | } else { | 
| 1824 | 0 |  |  |  |  | 0 | $fetched{$$val} = 1; | 
| 1825 | 0 |  |  |  |  | 0 | $val = FetchObject($et, $$val, $xref, $tag); | 
| 1826 | 0 | 0 |  |  |  | 0 | unless (defined $val) { | 
| 1827 | 0 |  |  |  |  | 0 | my $str; | 
| 1828 | 0 | 0 |  |  |  | 0 | if (defined $lastOffset) { | 
| 1829 | 0 |  |  |  |  | 0 | $val2 = ''; | 
| 1830 | 0 |  |  |  |  | 0 | $str = 'Object was freed'; | 
| 1831 |  |  |  |  |  |  | } else { | 
| 1832 | 0 |  |  |  |  | 0 | $val2 = ''; | 
| 1833 | 0 |  |  |  |  | 0 | $str = 'Error reading object'; | 
| 1834 |  |  |  |  |  |  | } | 
| 1835 | 0 |  |  |  |  | 0 | $et->VPrint(0, "$$et{INDENT}${str}:\n"); | 
| 1836 |  |  |  |  |  |  | } | 
| 1837 |  |  |  |  |  |  | } | 
| 1838 |  |  |  |  |  |  | } elsif (ref $val eq 'HASH') { | 
| 1839 | 0 |  |  |  |  | 0 | $extra = ', direct dictionary'; | 
| 1840 |  |  |  |  |  |  | } elsif (ref $val eq 'ARRAY') { | 
| 1841 | 0 |  |  |  |  | 0 | $extra = ', direct array of ' . scalar(@$val) . ' objects'; | 
| 1842 |  |  |  |  |  |  | } else { | 
| 1843 | 0 |  |  |  |  | 0 | $extra = ', direct object'; | 
| 1844 |  |  |  |  |  |  | } | 
| 1845 | 0 |  |  |  |  | 0 | my $isSubdir; | 
| 1846 | 0 | 0 |  |  |  | 0 | if (ref $val eq 'HASH') { | 
|  |  | 0 |  |  |  |  |  | 
| 1847 | 0 |  |  |  |  | 0 | $isSubdir = 1; | 
| 1848 |  |  |  |  |  |  | } elsif (ref $val eq 'ARRAY') { | 
| 1849 |  |  |  |  |  |  | # recurse into objects in arrays only if they are lists of | 
| 1850 |  |  |  |  |  |  | # dictionaries or indirect objects which could be dictionaries | 
| 1851 | 0 | 0 |  |  |  | 0 | $isSubdir = 1 if @$val; | 
| 1852 | 0 |  |  |  |  | 0 | foreach (@$val) { | 
| 1853 | 0 | 0 | 0 |  |  | 0 | next if ref $_ eq 'HASH' or ref $_ eq 'SCALAR'; | 
| 1854 | 0 |  |  |  |  | 0 | undef $isSubdir; | 
| 1855 | 0 |  |  |  |  | 0 | last; | 
| 1856 |  |  |  |  |  |  | } | 
| 1857 |  |  |  |  |  |  | } | 
| 1858 | 0 | 0 |  |  |  | 0 | if ($isSubdir) { | 
| 1859 |  |  |  |  |  |  | # create bogus subdirectory to recurse into this dict | 
| 1860 | 0 | 0 |  |  |  | 0 | $tagInfo or $tagInfo = { | 
| 1861 |  |  |  |  |  |  | Name => $tag, | 
| 1862 |  |  |  |  |  |  | SubDirectory => { TagTable => 'Image::ExifTool::PDF::Unknown' }, | 
| 1863 |  |  |  |  |  |  | }; | 
| 1864 |  |  |  |  |  |  | } else { | 
| 1865 | 0 | 0 |  |  |  | 0 | $val2 = ExpandArray($val) if ref $val eq 'ARRAY'; | 
| 1866 |  |  |  |  |  |  | # generate tag info if we will use it later | 
| 1867 | 0 | 0 | 0 |  |  | 0 | if (not $tagInfo and defined $val and $unknown) { | 
|  |  |  | 0 |  |  |  |  | 
| 1868 | 0 |  |  |  |  | 0 | $tagInfo = NewPDFTag($tagTablePtr, $tag); | 
| 1869 |  |  |  |  |  |  | } | 
| 1870 |  |  |  |  |  |  | } | 
| 1871 | 0 |  | 0 |  |  | 0 | $et->VerboseInfo($tag, $tagInfo, | 
| 1872 |  |  |  |  |  |  | Value => $val2 || $val, | 
| 1873 |  |  |  |  |  |  | Extra => $extra, | 
| 1874 |  |  |  |  |  |  | Index => $index++, | 
| 1875 |  |  |  |  |  |  | ); | 
| 1876 | 0 | 0 |  |  |  | 0 | next unless defined $val; | 
| 1877 |  |  |  |  |  |  | } | 
| 1878 | 1123 | 100 |  |  |  | 2074 | unless ($tagInfo) { | 
| 1879 |  |  |  |  |  |  | # add any tag found in Info dictionary to table | 
| 1880 | 704 | 50 |  |  |  | 1487 | next unless $unknown; | 
| 1881 | 0 |  |  |  |  | 0 | $tagInfo = NewPDFTag($tagTablePtr, $tag); | 
| 1882 |  |  |  |  |  |  | } | 
| 1883 |  |  |  |  |  |  | # increment document number if necessary | 
| 1884 | 419 |  |  |  |  | 693 | my ($oldDocNum, $oldNumTags); | 
| 1885 | 419 | 50 |  |  |  | 819 | if ($isSubDoc) { | 
| 1886 | 0 |  |  |  |  | 0 | $oldDocNum = $$et{DOC_NUM}; | 
| 1887 | 0 |  |  |  |  | 0 | $oldNumTags = $$et{NUM_FOUND}; | 
| 1888 | 0 |  |  |  |  | 0 | $$et{DOC_NUM} = ++$$et{DOC_COUNT}; | 
| 1889 |  |  |  |  |  |  | } | 
| 1890 | 419 | 100 |  |  |  | 1133 | if ($$tagInfo{SubDirectory}) { | 
| 1891 |  |  |  |  |  |  | # process the subdirectory | 
| 1892 | 332 |  |  |  |  | 575 | my @subDicts; | 
| 1893 | 332 | 100 |  |  |  | 801 | if (ref $val eq 'ARRAY') { | 
| 1894 |  |  |  |  |  |  | # hack to convert array to dictionary if necessary | 
| 1895 | 37 | 50 | 33 |  |  | 288 | if ($$tagInfo{ConvertToDict} and @$val == 2 and not ref $$val[0]) { | 
|  |  |  | 33 |  |  |  |  | 
| 1896 | 0 |  |  |  |  | 0 | my $tg = $$val[0]; | 
| 1897 | 0 |  |  |  |  | 0 | $tg =~ s(^/)();   # remove name | 
| 1898 | 0 |  |  |  |  | 0 | my %dict = ( _tags => [ $tg ], $tg => $$val[1] ); | 
| 1899 | 0 |  |  |  |  | 0 | @subDicts = ( \%dict ); | 
| 1900 |  |  |  |  |  |  | } else { | 
| 1901 | 37 |  |  |  |  | 88 | @subDicts = @{$val}; | 
|  | 37 |  |  |  |  | 116 |  | 
| 1902 |  |  |  |  |  |  | } | 
| 1903 |  |  |  |  |  |  | } else { | 
| 1904 | 295 |  |  |  |  | 548 | @subDicts = ( $val ); | 
| 1905 |  |  |  |  |  |  | } | 
| 1906 |  |  |  |  |  |  | # loop through all values of this tag | 
| 1907 | 332 |  |  |  |  | 576 | for (;;) { | 
| 1908 | 664 | 100 |  |  |  | 1540 | my $subDict = shift @subDicts or last; | 
| 1909 |  |  |  |  |  |  | # save last fetched object in case we fetch another one here | 
| 1910 | 332 |  |  |  |  | 632 | my $prevFetched = $lastFetched; | 
| 1911 | 332 | 100 |  |  |  | 877 | if (ref $subDict eq 'SCALAR') { | 
| 1912 |  |  |  |  |  |  | # only fetch once (other copies are obsolete) | 
| 1913 | 244 | 100 |  |  |  | 675 | next if $fetched{$$subDict}; | 
| 1914 | 197 | 100 |  |  |  | 467 | if ($$tagInfo{IgnoreDuplicates}) { | 
| 1915 | 28 |  |  |  |  | 97 | my $flag = "ProcessedPDF_$tag"; | 
| 1916 | 28 | 50 |  |  |  | 101 | if ($$et{$flag}) { | 
| 1917 | 0 | 0 |  |  |  | 0 | next if $et->WarnOnce("Ignored duplicate $tag dictionary", 2); | 
| 1918 |  |  |  |  |  |  | } else { | 
| 1919 | 28 |  |  |  |  | 86 | $$et{$flag} = 1; | 
| 1920 |  |  |  |  |  |  | } | 
| 1921 |  |  |  |  |  |  | } | 
| 1922 |  |  |  |  |  |  | # load dictionary via an indirect reference | 
| 1923 | 197 |  |  |  |  | 458 | $fetched{$$subDict} = 1; | 
| 1924 | 197 |  |  |  |  | 502 | my $obj = FetchObject($et, $$subDict, $xref, $tag); | 
| 1925 | 197 | 100 |  |  |  | 603 | unless (defined $obj) { | 
| 1926 | 5 | 50 |  |  |  | 19 | unless (defined $lastOffset) { | 
| 1927 | 0 |  |  |  |  | 0 | $et->Warn("Error reading $tag object ($$subDict)"); | 
| 1928 |  |  |  |  |  |  | } | 
| 1929 | 5 |  |  |  |  | 12 | next; | 
| 1930 |  |  |  |  |  |  | } | 
| 1931 | 192 |  |  |  |  | 381 | $subDict = $obj; | 
| 1932 |  |  |  |  |  |  | } | 
| 1933 | 280 | 50 |  |  |  | 776 | if (ref $subDict eq 'ARRAY') { | 
| 1934 |  |  |  |  |  |  | # convert array of key/value pairs to a hash | 
| 1935 | 0 | 0 |  |  |  | 0 | next if @$subDict < 2; | 
| 1936 | 0 |  |  |  |  | 0 | my %hash = ( _tags => [] ); | 
| 1937 | 0 |  |  |  |  | 0 | while (@$subDict >= 2) { | 
| 1938 | 0 |  |  |  |  | 0 | my $key = shift @$subDict; | 
| 1939 | 0 |  |  |  |  | 0 | $key =~ s/^\///; | 
| 1940 | 0 |  |  |  |  | 0 | push @{$hash{_tags}}, $key; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1941 | 0 |  |  |  |  | 0 | $hash{$key} = shift @$subDict; | 
| 1942 |  |  |  |  |  |  | } | 
| 1943 | 0 |  |  |  |  | 0 | $subDict = \%hash; | 
| 1944 |  |  |  |  |  |  | } else { | 
| 1945 | 280 | 50 |  |  |  | 794 | next unless ref $subDict eq 'HASH'; | 
| 1946 |  |  |  |  |  |  | } | 
| 1947 |  |  |  |  |  |  | # set flag to re-crypt all strings when rewriting if the dictionary | 
| 1948 |  |  |  |  |  |  | # came from an encrypted stream | 
| 1949 | 280 | 50 |  |  |  | 620 | $$subDict{_needCrypt}{'*'} = 1 unless $lastFetched; | 
| 1950 | 280 |  |  |  |  | 1156 | my $subTablePtr = GetTagTable($tagInfo->{SubDirectory}->{TagTable}); | 
| 1951 | 280 | 50 |  |  |  | 755 | if (not $verbose) { | 
|  |  | 0 |  |  |  |  |  | 
| 1952 | 280 |  | 50 |  |  | 1359 | my $proc = $$subTablePtr{PROCESS_PROC} || \&ProcessDict; | 
| 1953 | 280 |  |  |  |  | 964 | &$proc($et, $subTablePtr, $subDict, $xref, $nesting); | 
| 1954 |  |  |  |  |  |  | } elsif ($next) { | 
| 1955 |  |  |  |  |  |  | # handle 'Next' links at this level to avoid deep recursion | 
| 1956 | 0 |  |  |  |  | 0 | undef $next; | 
| 1957 | 0 |  |  |  |  | 0 | $index = 0; | 
| 1958 | 0 |  |  |  |  | 0 | $tagTablePtr = $subTablePtr; | 
| 1959 | 0 |  |  |  |  | 0 | $dict = $subDict; | 
| 1960 | 0 |  |  |  |  | 0 | @tags = @{$$subDict{_tags}}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1961 | 0 |  |  |  |  | 0 | $et->VerboseDir($tag, scalar(@tags)); | 
| 1962 |  |  |  |  |  |  | } else { | 
| 1963 | 0 |  |  |  |  | 0 | my $oldIndent = $$et{INDENT}; | 
| 1964 | 0 |  |  |  |  | 0 | my $oldDir = $$et{DIR_NAME}; | 
| 1965 | 0 |  |  |  |  | 0 | $$et{INDENT} .= '| '; | 
| 1966 | 0 |  |  |  |  | 0 | $$et{DIR_NAME} = $tag; | 
| 1967 | 0 |  |  |  |  | 0 | $et->VerboseDir($tag, scalar(@{$$subDict{_tags}})); | 
|  | 0 |  |  |  |  | 0 |  | 
| 1968 | 0 |  |  |  |  | 0 | ProcessDict($et, $subTablePtr, $subDict, $xref, $nesting); | 
| 1969 | 0 |  |  |  |  | 0 | $$et{INDENT} = $oldIndent; | 
| 1970 | 0 |  |  |  |  | 0 | $$et{DIR_NAME} = $oldDir; | 
| 1971 |  |  |  |  |  |  | } | 
| 1972 | 280 |  |  |  |  | 1395 | $lastFetched = $prevFetched; | 
| 1973 |  |  |  |  |  |  | } | 
| 1974 |  |  |  |  |  |  | } else { | 
| 1975 |  |  |  |  |  |  | # fetch object if necessary | 
| 1976 |  |  |  |  |  |  | # (OS X 10.6 writes indirect objects in the Info dictionary!) | 
| 1977 | 87 | 50 |  |  |  | 274 | if (ref $val eq 'SCALAR') { | 
| 1978 | 0 |  |  |  |  | 0 | my $prevFetched = $lastFetched; | 
| 1979 |  |  |  |  |  |  | # (note: fetching the same object multiple times is OK here) | 
| 1980 | 0 |  |  |  |  | 0 | $val = FetchObject($et, $$val, $xref, $tag); | 
| 1981 | 0 | 0 |  |  |  | 0 | if (defined $val) { | 
| 1982 | 0 |  |  |  |  | 0 | $val = ReadPDFValue($val); | 
| 1983 |  |  |  |  |  |  | # set flag to re-encrypt if necessary if rewritten | 
| 1984 | 0 | 0 |  |  |  | 0 | $$dict{_needCrypt}{$tag} = ($lastFetched ? 0 : 1) if $cryptString; | 
|  |  | 0 |  |  |  |  |  | 
| 1985 | 0 |  |  |  |  | 0 | $lastFetched = $prevFetched; # restore last fetched object reference | 
| 1986 |  |  |  |  |  |  | } | 
| 1987 |  |  |  |  |  |  | } else { | 
| 1988 | 87 |  |  |  |  | 228 | $val = ReadPDFValue($val); | 
| 1989 |  |  |  |  |  |  | } | 
| 1990 | 87 | 100 |  |  |  | 366 | if (ref $val) { | 
|  |  | 50 |  |  |  |  |  | 
| 1991 | 12 | 50 |  |  |  | 89 | if (ref $val eq 'ARRAY') { | 
| 1992 | 12 | 50 |  |  |  | 68 | delete $$et{LIST_TAGS}{$tagInfo} if $$tagInfo{List}; | 
| 1993 | 12 |  |  |  |  | 23 | my $v; | 
| 1994 | 12 |  |  |  |  | 33 | foreach $v (@$val) { | 
| 1995 | 20 |  |  |  |  | 64 | $et->FoundTag($tagInfo, $v); | 
| 1996 |  |  |  |  |  |  | } | 
| 1997 |  |  |  |  |  |  | } | 
| 1998 |  |  |  |  |  |  | } elsif (defined $val) { | 
| 1999 |  |  |  |  |  |  | # convert from UTF-16 (big endian) to UTF-8 or Latin if necessary | 
| 2000 |  |  |  |  |  |  | # unless this is binary data (hex-encoded strings would not have been converted) | 
| 2001 | 75 |  | 100 |  |  | 534 | my $format = $$tagInfo{Format} || $$tagInfo{Writable} || 'string'; | 
| 2002 | 75 | 100 |  |  |  | 297 | $val = ConvertPDFDate($val) if $format eq 'date'; | 
| 2003 | 75 | 50 | 33 |  |  | 461 | if (not $$tagInfo{Binary} and $val =~ /[\x18-\x1f\x80-\xff]/) { | 
| 2004 |  |  |  |  |  |  | # text string is already in Unicode if it starts with "\xfe\xff", | 
| 2005 |  |  |  |  |  |  | # otherwise we must first convert from PDFDocEncoding | 
| 2006 | 0 | 0 |  |  |  | 0 | $val = $et->Decode($val, ($val=~s/^\xfe\xff// ? 'UCS2' : 'PDFDoc'), 'MM'); | 
| 2007 |  |  |  |  |  |  | } | 
| 2008 | 75 | 100 | 66 |  |  | 277 | if ($$tagInfo{List} and not $$et{OPTIONS}{NoPDFList}) { | 
| 2009 |  |  |  |  |  |  | # separate tokens in comma or whitespace delimited lists | 
| 2010 | 12 | 50 |  |  |  | 157 | my @values = ($val =~ /,/) ? split /,+\s*/, $val : split ' ', $val; | 
| 2011 | 12 |  |  |  |  | 43 | foreach $val (@values) { | 
| 2012 | 28 |  |  |  |  | 84 | $et->FoundTag($tagInfo, $val); | 
| 2013 |  |  |  |  |  |  | } | 
| 2014 |  |  |  |  |  |  | } else { | 
| 2015 |  |  |  |  |  |  | # a simple tag value | 
| 2016 | 63 |  |  |  |  | 195 | $et->FoundTag($tagInfo, $val); | 
| 2017 |  |  |  |  |  |  | } | 
| 2018 |  |  |  |  |  |  | } | 
| 2019 |  |  |  |  |  |  | } | 
| 2020 | 419 | 50 |  |  |  | 1061 | if ($isSubDoc) { | 
| 2021 |  |  |  |  |  |  | # restore original document number | 
| 2022 | 0 |  |  |  |  | 0 | $$et{DOC_NUM} = $oldDocNum; | 
| 2023 | 0 | 0 |  |  |  | 0 | --$$et{DOC_COUNT} if $oldNumTags == $$et{NUM_FOUND}; | 
| 2024 |  |  |  |  |  |  | } | 
| 2025 |  |  |  |  |  |  | } | 
| 2026 |  |  |  |  |  |  | # | 
| 2027 |  |  |  |  |  |  | # extract information from joined streams if necessary | 
| 2028 |  |  |  |  |  |  | # | 
| 2029 |  |  |  |  |  |  |  | 
| 2030 | 350 | 50 |  |  |  | 781 | if (%join) { | 
| 2031 | 0 |  |  |  |  | 0 | my ($tag, $i); | 
| 2032 | 0 |  |  |  |  | 0 | foreach $tag (sort keys %join) { | 
| 2033 | 0 |  |  |  |  | 0 | my $list = $join{$tag}; | 
| 2034 | 0 | 0 | 0 |  |  | 0 | last unless defined $$list[1] and $$list[1] =~ /^%.*?([\x0d\x0a]*)/; | 
| 2035 | 0 |  |  |  |  | 0 | my $buff = "%!PS-Adobe-3.0$1";  # add PS header with same line break | 
| 2036 | 0 |  |  |  |  | 0 | for ($i=1; defined $$list[$i]; ++$i) { | 
| 2037 | 0 |  |  |  |  | 0 | $buff .= $$list[$i]; | 
| 2038 | 0 |  |  |  |  | 0 | undef $$list[$i];   # free memory | 
| 2039 |  |  |  |  |  |  | } | 
| 2040 |  |  |  |  |  |  | # increment document number for tags extracted from embedded EPS | 
| 2041 | 0 |  |  |  |  | 0 | my $oldDocNum = $$et{DOC_NUM}; | 
| 2042 | 0 |  |  |  |  | 0 | my $oldNumTags = $$et{NUM_FOUND}; | 
| 2043 | 0 |  |  |  |  | 0 | $$et{DOC_NUM} = ++$$et{DOC_COUNT}; | 
| 2044 |  |  |  |  |  |  | # extract PostScript information | 
| 2045 | 0 |  |  |  |  | 0 | $et->HandleTag($tagTablePtr, $tag, $buff); | 
| 2046 | 0 |  |  |  |  | 0 | $$et{DOC_NUM} = $oldDocNum; | 
| 2047 |  |  |  |  |  |  | # revert document counter if we didn't add any new tags | 
| 2048 | 0 | 0 |  |  |  | 0 | --$$et{DOC_COUNT} if $oldNumTags == $$et{NUM_FOUND}; | 
| 2049 | 0 |  |  |  |  | 0 | delete $$et{DOC_NUM}; | 
| 2050 |  |  |  |  |  |  | } | 
| 2051 |  |  |  |  |  |  | } | 
| 2052 |  |  |  |  |  |  | # | 
| 2053 |  |  |  |  |  |  | # extract information from stream object if it exists (eg. Metadata stream) | 
| 2054 |  |  |  |  |  |  | # | 
| 2055 | 350 |  |  |  |  | 490 | for (;;) { # (cheap goto) | 
| 2056 | 350 | 100 |  |  |  | 1353 | last unless $$dict{_stream}; | 
| 2057 | 43 |  |  |  |  | 119 | my $tag = '_stream'; | 
| 2058 |  |  |  |  |  |  | # add Subtype (if it exists) to stream name and remove leading '/' | 
| 2059 | 43 | 100 |  |  |  | 282 | ($tag = $$dict{Subtype} . $tag) =~ s/^\/// if $$dict{Subtype}; | 
| 2060 | 43 | 50 |  |  |  | 190 | last unless $$tagTablePtr{$tag}; | 
| 2061 | 43 | 50 |  |  |  | 140 | my $tagInfo = $et->GetTagInfo($tagTablePtr, $tag) or last; | 
| 2062 | 43 | 50 |  |  |  | 223 | unless ($$tagInfo{SubDirectory}) { | 
| 2063 |  |  |  |  |  |  | # don't build filter lists across different images | 
| 2064 | 0 |  |  |  |  | 0 | delete $$et{LIST_TAGS}{$$tagTablePtr{Filter}}; | 
| 2065 |  |  |  |  |  |  | # we arrive here only when extracting embedded images | 
| 2066 |  |  |  |  |  |  | # - only extract known image types and ignore others | 
| 2067 | 0 |  | 0 |  |  | 0 | my $filter = $$dict{Filter} || ''; | 
| 2068 | 0 | 0 |  |  |  | 0 | $filter = @$filter[-1] if ref $filter eq 'ARRAY'; # (get last Filter type) | 
| 2069 | 0 |  |  |  |  | 0 | my $result; | 
| 2070 | 0 | 0 | 0 |  |  | 0 | if ($filter eq '/DCTDecode' or $filter eq '/JPXDecode') { | 
| 2071 | 0 | 0 |  |  |  | 0 | DecodeStream($et, $dict) or last; | 
| 2072 |  |  |  |  |  |  | # save the image itself | 
| 2073 | 0 |  |  |  |  | 0 | $et->FoundTag($tagInfo, \$$dict{_stream}); | 
| 2074 |  |  |  |  |  |  | # extract information from embedded image | 
| 2075 | 0 |  |  |  |  | 0 | $result = $et->ExtractInfo(\$$dict{_stream}, { ReEntry => 1 }); | 
| 2076 |  |  |  |  |  |  | } | 
| 2077 | 0 | 0 |  |  |  | 0 | unless ($result) { | 
| 2078 | 0 | 0 |  |  |  | 0 | $et->FoundTag('FileType', defined $result ? '(unknown)' : '(unsupported)'); | 
| 2079 |  |  |  |  |  |  | } | 
| 2080 | 0 |  |  |  |  | 0 | last; | 
| 2081 |  |  |  |  |  |  | } | 
| 2082 |  |  |  |  |  |  | # decode stream if necessary | 
| 2083 | 43 | 50 |  |  |  | 184 | DecodeStream($et, $dict) or last; | 
| 2084 | 43 | 50 |  |  |  | 167 | if ($verbose > 2) { | 
| 2085 | 0 |  |  |  |  | 0 | $et->VPrint(2,"$$et{INDENT}$$et{DIR_NAME} stream data\n"); | 
| 2086 | 0 |  |  |  |  | 0 | $et->VerboseDump(\$$dict{_stream}); | 
| 2087 |  |  |  |  |  |  | } | 
| 2088 |  |  |  |  |  |  | # extract information from stream | 
| 2089 |  |  |  |  |  |  | my %dirInfo = ( | 
| 2090 |  |  |  |  |  |  | DataPt   => \$$dict{_stream}, | 
| 2091 |  |  |  |  |  |  | DataLen  => length $$dict{_stream}, | 
| 2092 |  |  |  |  |  |  | DirStart => 0, | 
| 2093 |  |  |  |  |  |  | DirLen   => length $$dict{_stream}, | 
| 2094 | 43 |  |  |  |  | 459 | Parent   => 'PDF', | 
| 2095 |  |  |  |  |  |  | ); | 
| 2096 | 43 |  |  |  |  | 172 | my $subTablePtr = GetTagTable($tagInfo->{SubDirectory}->{TagTable}); | 
| 2097 | 43 | 50 |  |  |  | 310 | unless ($et->ProcessDirectory(\%dirInfo, $subTablePtr)) { | 
| 2098 | 0 |  |  |  |  | 0 | $et->Warn("Error processing $$tagInfo{Name} information"); | 
| 2099 |  |  |  |  |  |  | } | 
| 2100 | 43 |  |  |  |  | 249 | last; | 
| 2101 |  |  |  |  |  |  | } | 
| 2102 |  |  |  |  |  |  | } | 
| 2103 |  |  |  |  |  |  |  | 
| 2104 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 2105 |  |  |  |  |  |  | # Extract information from PDF file | 
| 2106 |  |  |  |  |  |  | # Inputs: 0) ExifTool object reference, 1) dirInfo reference | 
| 2107 |  |  |  |  |  |  | # Returns: 0 if not a PDF file, 1 on success, otherwise a negative error number | 
| 2108 |  |  |  |  |  |  | sub ReadPDF($$) | 
| 2109 |  |  |  |  |  |  | { | 
| 2110 | 37 |  |  | 37 | 0 | 92 | my ($et, $dirInfo) = @_; | 
| 2111 | 37 |  |  |  |  | 93 | my $raf = $$dirInfo{RAF}; | 
| 2112 | 37 |  |  |  |  | 142 | my $verbose = $et->Options('Verbose'); | 
| 2113 | 37 |  |  |  |  | 118 | my ($buff, $encrypt, $id); | 
| 2114 |  |  |  |  |  |  | # | 
| 2115 |  |  |  |  |  |  | # validate PDF file | 
| 2116 |  |  |  |  |  |  | # | 
| 2117 |  |  |  |  |  |  | # (linearization dictionary must be in the first 1024 bytes of the file) | 
| 2118 | 37 | 50 |  |  |  | 187 | $raf->Read($buff, 1024) >= 8 or return 0; | 
| 2119 | 37 | 50 |  |  |  | 344 | $buff =~ /^(\s*)%PDF-(\d+\.\d+)/ or return 0; | 
| 2120 | 37 | 50 |  |  |  | 251 | $$et{PDFBase} = length $1 and $et->Warn('PDF header is not at start of file',1); | 
| 2121 | 37 |  |  |  |  | 129 | $pdfVer = $2; | 
| 2122 | 37 |  |  |  |  | 200 | $et->SetFileType();   # set the FileType tag | 
| 2123 | 37 | 50 |  |  |  | 300 | $et->Warn("The PDF $pdfVer specification is held hostage by the ISO") if $pdfVer >= 2.0; | 
| 2124 |  |  |  |  |  |  | # store PDFVersion tag | 
| 2125 | 37 |  |  |  |  | 105 | my $tagTablePtr = GetTagTable('Image::ExifTool::PDF::Root'); | 
| 2126 | 37 |  |  |  |  | 213 | $et->HandleTag($tagTablePtr, 'Version', $pdfVer); | 
| 2127 | 37 |  |  |  |  | 98 | $tagTablePtr = GetTagTable('Image::ExifTool::PDF::Main'); | 
| 2128 |  |  |  |  |  |  | # | 
| 2129 |  |  |  |  |  |  | # check for a linearized PDF (only if reading) | 
| 2130 |  |  |  |  |  |  | # | 
| 2131 | 37 |  |  |  |  | 114 | my $capture = $$et{PDF_CAPTURE}; | 
| 2132 | 37 | 100 |  |  |  | 142 | unless ($capture) { | 
| 2133 | 18 |  |  |  |  | 54 | my $lin = 'false'; | 
| 2134 | 18 | 50 |  |  |  | 117 | if ($buff =~ /< | 
| 2135 | 18 |  |  |  |  | 94 | $buff = substr($buff, pos($buff) - 2); | 
| 2136 | 18 |  |  |  |  | 83 | my $dict = ExtractObject($et, \$buff); | 
| 2137 | 18 | 0 | 33 |  |  | 236 | if (ref $dict eq 'HASH' and $$dict{Linearized} and $$dict{L}) { | 
|  |  |  | 33 |  |  |  |  | 
| 2138 | 0 | 0 |  |  |  | 0 | if (not $$et{VALUE}{FileSize}) { | 
|  |  | 0 |  |  |  |  |  | 
| 2139 | 0 |  |  |  |  | 0 | undef $lin; # can't determine if it is linearized | 
| 2140 |  |  |  |  |  |  | } elsif ($$dict{L} == $$et{VALUE}{FileSize} - $$et{PDFBase}) { | 
| 2141 | 0 |  |  |  |  | 0 | $lin = 'true'; | 
| 2142 |  |  |  |  |  |  | } | 
| 2143 |  |  |  |  |  |  | } | 
| 2144 |  |  |  |  |  |  | } | 
| 2145 | 18 | 50 |  |  |  | 129 | $et->HandleTag($tagTablePtr, '_linearized', $lin) if $lin; | 
| 2146 |  |  |  |  |  |  | } | 
| 2147 |  |  |  |  |  |  | # | 
| 2148 |  |  |  |  |  |  | # read the xref tables referenced from startxref at the end of the file | 
| 2149 |  |  |  |  |  |  | # | 
| 2150 | 37 |  |  |  |  | 95 | my @xrefOffsets; | 
| 2151 | 37 | 50 |  |  |  | 187 | $raf->Seek(0, 2) or return -2; | 
| 2152 |  |  |  |  |  |  | # the %%EOF must occur within the last 1024 bytes of the file (PDF spec, appendix H) | 
| 2153 | 37 |  |  |  |  | 152 | my $len = $raf->Tell(); | 
| 2154 | 37 | 50 |  |  |  | 173 | $len = 1024 if $len > 1024; | 
| 2155 | 37 | 50 |  |  |  | 132 | $raf->Seek(-$len, 2) or return -2; | 
| 2156 | 37 | 50 |  |  |  | 147 | $raf->Read($buff, $len) == $len or return -3; | 
| 2157 |  |  |  |  |  |  | # find the LAST xref table in the file (may be multiple %%EOF marks, | 
| 2158 |  |  |  |  |  |  | # and comments between "startxref" and "%%EOF") | 
| 2159 | 37 | 50 |  |  |  | 469 | $buff =~ /^.*startxref(\s+)(\d+)(\s+)(%[^\x0d\x0a]*\s+)*%%EOF/s or return -4; | 
| 2160 | 37 |  |  |  |  | 165 | my $ws = $1 . $3; | 
| 2161 | 37 |  |  |  |  | 124 | my $xr = $2; | 
| 2162 | 37 |  |  |  |  | 139 | push @xrefOffsets, $xr, 'Main'; | 
| 2163 |  |  |  |  |  |  | # set input record separator | 
| 2164 | 37 | 50 |  |  |  | 413 | local $/ = $ws =~ /(\x0d\x0a|\x0d|\x0a)/ ? $1 : "\x0a"; | 
| 2165 | 37 |  |  |  |  | 103 | my (%xref, @mainDicts, %loaded, $mainFree); | 
| 2166 | 37 |  |  |  |  | 95 | my ($xrefSize, $mainDictSize) = (0, 0); | 
| 2167 |  |  |  |  |  |  | # initialize variables to capture when rewriting | 
| 2168 | 37 | 100 |  |  |  | 105 | if ($capture) { | 
| 2169 | 19 |  |  |  |  | 74 | $capture->{startxref} = $xr; | 
| 2170 | 19 |  |  |  |  | 74 | $capture->{xref} = \%xref; | 
| 2171 | 19 |  |  |  |  | 83 | $capture->{newline} = $/; | 
| 2172 | 19 |  |  |  |  | 65 | $capture->{mainFree} = $mainFree = { }; | 
| 2173 |  |  |  |  |  |  | } | 
| 2174 |  |  |  |  |  |  | XRef: | 
| 2175 | 37 |  |  |  |  | 147 | while (@xrefOffsets) { | 
| 2176 | 70 |  |  |  |  | 157 | my $offset = shift @xrefOffsets; | 
| 2177 | 70 |  |  |  |  | 117 | my $type = shift @xrefOffsets; | 
| 2178 | 70 | 50 |  |  |  | 204 | next if $loaded{$offset};   # avoid infinite recursion | 
| 2179 | 70 | 50 |  |  |  | 355 | unless ($raf->Seek($offset+$$et{PDFBase}, 0)) { | 
| 2180 | 0 | 0 |  |  |  | 0 | %loaded or return -5; | 
| 2181 | 0 |  |  |  |  | 0 | $et->Warn('Bad offset for secondary xref table'); | 
| 2182 | 0 |  |  |  |  | 0 | next; | 
| 2183 |  |  |  |  |  |  | } | 
| 2184 |  |  |  |  |  |  | # Note: care must be taken because ReadLine may read more than we want if | 
| 2185 |  |  |  |  |  |  | # the newline sequence for this table is different than the rest of the file | 
| 2186 | 70 |  |  |  |  | 214 | for (;;) { | 
| 2187 | 70 | 50 |  |  |  | 250 | unless ($raf->ReadLine($buff)) { | 
| 2188 | 0 | 0 |  |  |  | 0 | %loaded or return -6; | 
| 2189 | 0 |  |  |  |  | 0 | $et->Warn('Bad offset for secondary xref table'); | 
| 2190 | 0 |  |  |  |  | 0 | next XRef; | 
| 2191 |  |  |  |  |  |  | } | 
| 2192 | 70 | 50 |  |  |  | 449 | last if $buff =~/\S/;   # skip blank lines | 
| 2193 |  |  |  |  |  |  | } | 
| 2194 | 70 |  |  |  |  | 189 | my $loadXRefStream; | 
| 2195 | 70 | 50 |  |  |  | 487 | if ($buff =~ s/^\s*xref\s+//s) { | 
|  |  | 0 |  |  |  |  |  | 
| 2196 |  |  |  |  |  |  | # load xref table | 
| 2197 | 70 |  |  |  |  | 129 | for (;;) { | 
| 2198 |  |  |  |  |  |  | # read another line if necessary (skipping blank lines) | 
| 2199 | 177 |  | 50 |  |  | 646 | $raf->ReadLine($buff) or return -6 until $buff =~ /\S/; | 
| 2200 | 177 | 100 |  |  |  | 824 | last if $buff =~ s/^\s*trailer([\s<[(])/$1/s; | 
| 2201 | 107 | 50 |  |  |  | 576 | $buff =~ s/^\s*(\d+)\s+(\d+)\s+//s or return -4; | 
| 2202 | 107 |  |  |  |  | 383 | my ($start, $num) = ($1, $2); | 
| 2203 | 107 | 50 |  |  |  | 373 | $raf->Seek(-length($buff), 1) or return -4; | 
| 2204 | 107 |  |  |  |  | 314 | my $i; | 
| 2205 | 107 |  |  |  |  | 352 | for ($i=0; $i<$num; ++$i) { | 
| 2206 | 622 | 50 |  |  |  | 1433 | $raf->Read($buff, 20) == 20 or return -6; | 
| 2207 | 622 | 50 |  |  |  | 2524 | $buff =~ /^\s*(\d{10}) (\d{5}) (f|n)/s or return -4; | 
| 2208 | 622 |  |  |  |  | 1038 | my $num = $start + $i; | 
| 2209 | 622 | 100 |  |  |  | 1200 | $xrefSize = $num if $num > $xrefSize; | 
| 2210 |  |  |  |  |  |  | # locate object to generate entry from stream if necessary | 
| 2211 |  |  |  |  |  |  | # (must do this before we test $xref{$num}) | 
| 2212 | 622 | 50 |  |  |  | 1206 | LocateAnyObject(\%xref, $num) if $xref{dicts}; | 
| 2213 |  |  |  |  |  |  | # save offset for newest copy of all objects | 
| 2214 |  |  |  |  |  |  | # (or next object number for free objects) | 
| 2215 | 622 | 100 |  |  |  | 1427 | unless (defined $xref{$num}) { | 
| 2216 | 526 |  |  |  |  | 1574 | my ($offset, $gen) = (int($1), int($2)); | 
| 2217 | 526 |  |  |  |  | 1109 | $xref{$num} = $offset; | 
| 2218 | 526 | 100 |  |  |  | 1215 | if ($3 eq 'f') { | 
| 2219 |  |  |  |  |  |  | # save free objects in last xref table for rewriting | 
| 2220 | 52 | 100 |  |  |  | 181 | $$mainFree{$num} =  [ $offset, $gen, 'f' ] if $mainFree; | 
| 2221 | 52 |  |  |  |  | 153 | next; | 
| 2222 |  |  |  |  |  |  | } | 
| 2223 |  |  |  |  |  |  | # also save offset keyed by object reference string | 
| 2224 | 474 |  |  |  |  | 1655 | $xref{"$num $gen R"} = $offset; | 
| 2225 |  |  |  |  |  |  | } | 
| 2226 |  |  |  |  |  |  | } | 
| 2227 |  |  |  |  |  |  | # (I have a sample from Adobe which has an empty xref table) | 
| 2228 |  |  |  |  |  |  | # %xref or return -4; # xref table may not be empty | 
| 2229 | 107 |  |  |  |  | 232 | $buff = ''; | 
| 2230 |  |  |  |  |  |  | } | 
| 2231 | 70 |  |  |  |  | 225 | undef $mainFree;    # only do this for the last xref table | 
| 2232 |  |  |  |  |  |  | } elsif ($buff =~ s/^\s*(\d+)\s+(\d+)\s+obj//s) { | 
| 2233 |  |  |  |  |  |  | # this is a PDF-1.5 cross-reference stream dictionary | 
| 2234 | 0 |  |  |  |  | 0 | $loadXRefStream = 1; | 
| 2235 |  |  |  |  |  |  | } else { | 
| 2236 | 0 | 0 |  |  |  | 0 | %loaded or return -4; | 
| 2237 | 0 |  |  |  |  | 0 | $et->Warn('Invalid secondary xref table'); | 
| 2238 | 0 |  |  |  |  | 0 | next; | 
| 2239 |  |  |  |  |  |  | } | 
| 2240 | 70 |  |  |  |  | 257 | my $mainDict = ExtractObject($et, \$buff, $raf, \%xref); | 
| 2241 | 70 | 50 |  |  |  | 322 | unless (ref $mainDict eq 'HASH') { | 
| 2242 | 0 | 0 |  |  |  | 0 | %loaded or return -8; | 
| 2243 | 0 |  |  |  |  | 0 | $et->Warn('Error loading secondary dictionary'); | 
| 2244 | 0 |  |  |  |  | 0 | next; | 
| 2245 |  |  |  |  |  |  | } | 
| 2246 |  |  |  |  |  |  | # keep track of total trailer dictionary Size | 
| 2247 | 70 | 100 | 66 |  |  | 510 | $mainDictSize = $$mainDict{Size} if $$mainDict{Size} and $$mainDict{Size} > $mainDictSize; | 
| 2248 | 70 | 50 |  |  |  | 176 | if ($loadXRefStream) { | 
| 2249 |  |  |  |  |  |  | # decode and save our XRef stream from PDF-1.5 file | 
| 2250 |  |  |  |  |  |  | # (but parse it later as required to save time) | 
| 2251 |  |  |  |  |  |  | # Note: this technique can potentially result in an old object | 
| 2252 |  |  |  |  |  |  | # being used if the file was incrementally updated and an older | 
| 2253 |  |  |  |  |  |  | # object from an xref table was replaced by a newer object in an | 
| 2254 |  |  |  |  |  |  | # xref stream.  But doing so isn't a good idea (if allowed at all) | 
| 2255 |  |  |  |  |  |  | # because a PDF 1.4 consumer would also make this same mistake. | 
| 2256 | 0 | 0 | 0 |  |  | 0 | if ($$mainDict{Type} eq '/XRef' and $$mainDict{W} and | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 2257 | 0 |  |  |  |  | 0 | @{$$mainDict{W}} > 2 and $$mainDict{Size} and | 
| 2258 |  |  |  |  |  |  | DecodeStream($et, $mainDict)) | 
| 2259 |  |  |  |  |  |  | { | 
| 2260 |  |  |  |  |  |  | # create Index entry if it doesn't exist | 
| 2261 | 0 | 0 |  |  |  | 0 | $$mainDict{Index} or $$mainDict{Index} = [ 0, $$mainDict{Size} ]; | 
| 2262 |  |  |  |  |  |  | # create '_entry_size' entry for internal use | 
| 2263 | 0 |  |  |  |  | 0 | my $w = $$mainDict{W}; | 
| 2264 | 0 |  |  |  |  | 0 | my $size = 0; | 
| 2265 | 0 |  |  |  |  | 0 | foreach (@$w) { $size += $_; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 2266 | 0 |  |  |  |  | 0 | $$mainDict{_entry_size} = $size; | 
| 2267 |  |  |  |  |  |  | # save this stream dictionary to use later if required | 
| 2268 | 0 | 0 |  |  |  | 0 | $xref{dicts} = [] unless $xref{dicts}; | 
| 2269 | 0 |  |  |  |  | 0 | push @{$xref{dicts}}, $mainDict; | 
|  | 0 |  |  |  |  | 0 |  | 
| 2270 |  |  |  |  |  |  | } else { | 
| 2271 | 0 | 0 |  |  |  | 0 | %loaded or return -9; | 
| 2272 | 0 |  |  |  |  | 0 | $et->Warn('Invalid xref stream in secondary dictionary'); | 
| 2273 |  |  |  |  |  |  | } | 
| 2274 |  |  |  |  |  |  | } | 
| 2275 | 70 |  |  |  |  | 196 | $loaded{$offset} = 1; | 
| 2276 |  |  |  |  |  |  | # load XRef stream in hybrid file if it exists | 
| 2277 | 70 | 50 |  |  |  | 179 | push @xrefOffsets, $$mainDict{XRefStm}, 'XRefStm' if $$mainDict{XRefStm}; | 
| 2278 | 70 | 50 |  |  |  | 174 | $encrypt = $$mainDict{Encrypt} if $$mainDict{Encrypt}; | 
| 2279 | 70 | 50 | 33 |  |  | 206 | undef $encrypt if $encrypt and $encrypt eq 'null'; # (have seen "null") | 
| 2280 | 70 | 100 | 66 |  |  | 262 | if ($$mainDict{ID} and ref $$mainDict{ID} eq 'ARRAY') { | 
| 2281 | 29 |  |  |  |  | 106 | $id = ReadPDFValue($mainDict->{ID}->[0]); | 
| 2282 |  |  |  |  |  |  | } | 
| 2283 | 70 |  |  |  |  | 232 | push @mainDicts, $mainDict, $type; | 
| 2284 |  |  |  |  |  |  | # load previous xref table if it exists | 
| 2285 | 70 | 100 |  |  |  | 363 | push @xrefOffsets, $$mainDict{Prev}, 'Prev' if $$mainDict{Prev}; | 
| 2286 |  |  |  |  |  |  | } | 
| 2287 | 37 | 50 |  |  |  | 114 | if ($xrefSize > $mainDictSize) { | 
| 2288 | 0 |  |  |  |  | 0 | my $str = "Objects in xref table ($xrefSize) exceed trailer dictionary Size ($mainDictSize)"; | 
| 2289 | 0 | 0 |  |  |  | 0 | $capture ? $et->Error($str) : $et->Warn($str); | 
| 2290 |  |  |  |  |  |  | } | 
| 2291 |  |  |  |  |  |  | # | 
| 2292 |  |  |  |  |  |  | # extract encryption information if necessary | 
| 2293 |  |  |  |  |  |  | # | 
| 2294 | 37 | 50 |  |  |  | 122 | if ($encrypt) { | 
| 2295 | 0 | 0 |  |  |  | 0 | if (ref $encrypt eq 'SCALAR') { | 
| 2296 | 0 |  |  |  |  | 0 | $encrypt = FetchObject($et, $$encrypt, \%xref, 'Encrypt'); | 
| 2297 |  |  |  |  |  |  | } | 
| 2298 |  |  |  |  |  |  | # generate Encryption tag information | 
| 2299 | 0 |  |  |  |  | 0 | my $err = DecryptInit($et, $encrypt, $id); | 
| 2300 | 0 | 0 |  |  |  | 0 | if ($err) { | 
| 2301 | 0 |  |  |  |  | 0 | $et->Warn($err); | 
| 2302 | 0 | 0 |  |  |  | 0 | $$capture{Error} = $err if $capture; | 
| 2303 | 0 |  |  |  |  | 0 | return -1; | 
| 2304 |  |  |  |  |  |  | } | 
| 2305 |  |  |  |  |  |  | } | 
| 2306 |  |  |  |  |  |  | # | 
| 2307 |  |  |  |  |  |  | # extract the information beginning with each of the main dictionaries | 
| 2308 |  |  |  |  |  |  | # | 
| 2309 | 37 |  |  |  |  | 73 | my $i = 0; | 
| 2310 | 37 |  |  |  |  | 122 | my $num = (scalar @mainDicts) / 2; | 
| 2311 | 37 |  |  |  |  | 103 | while (@mainDicts) { | 
| 2312 | 70 |  |  |  |  | 135 | my $dict = shift @mainDicts; | 
| 2313 | 70 |  |  |  |  | 132 | my $type = shift @mainDicts; | 
| 2314 | 70 | 50 |  |  |  | 158 | if ($verbose) { | 
| 2315 | 0 |  |  |  |  | 0 | ++$i; | 
| 2316 | 0 |  |  |  |  | 0 | my $n = scalar(@{$$dict{_tags}}); | 
|  | 0 |  |  |  |  | 0 |  | 
| 2317 | 0 |  |  |  |  | 0 | $et->VPrint(0, "PDF dictionary ($i of $num) with $n entries:\n"); | 
| 2318 |  |  |  |  |  |  | } | 
| 2319 | 70 |  |  |  |  | 227 | ProcessDict($et, $tagTablePtr, $dict, \%xref, 0, $type); | 
| 2320 |  |  |  |  |  |  | } | 
| 2321 |  |  |  |  |  |  | # handle any decryption errors | 
| 2322 | 37 | 50 |  |  |  | 183 | if ($encrypt) { | 
| 2323 | 0 |  |  |  |  | 0 | my $err = $$encrypt{_error}; | 
| 2324 | 0 | 0 |  |  |  | 0 | if ($err) { | 
| 2325 | 0 |  |  |  |  | 0 | $et->Warn($err); | 
| 2326 | 0 | 0 |  |  |  | 0 | $$capture{Error} = $err if $capture; | 
| 2327 | 0 |  |  |  |  | 0 | return -1; | 
| 2328 |  |  |  |  |  |  | } | 
| 2329 |  |  |  |  |  |  | } | 
| 2330 | 37 |  |  |  |  | 349 | return 1; | 
| 2331 |  |  |  |  |  |  | } | 
| 2332 |  |  |  |  |  |  |  | 
| 2333 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 2334 |  |  |  |  |  |  | # ReadPDF() warning strings for each error return value | 
| 2335 |  |  |  |  |  |  | my %pdfWarning = ( | 
| 2336 |  |  |  |  |  |  | # -1 is reserved as error return value with no associated warning | 
| 2337 |  |  |  |  |  |  | -2 => 'Error seeking in file', | 
| 2338 |  |  |  |  |  |  | -3 => 'Error reading file', | 
| 2339 |  |  |  |  |  |  | -4 => 'Invalid xref table', | 
| 2340 |  |  |  |  |  |  | -5 => 'Invalid xref offset', | 
| 2341 |  |  |  |  |  |  | -6 => 'Error reading xref table', | 
| 2342 |  |  |  |  |  |  | -7 => 'Error reading trailer', | 
| 2343 |  |  |  |  |  |  | -8 => 'Error reading main dictionary', | 
| 2344 |  |  |  |  |  |  | -9 => 'Invalid xref stream in main dictionary', | 
| 2345 |  |  |  |  |  |  | ); | 
| 2346 |  |  |  |  |  |  |  | 
| 2347 |  |  |  |  |  |  | #------------------------------------------------------------------------------ | 
| 2348 |  |  |  |  |  |  | # Extract information from PDF file | 
| 2349 |  |  |  |  |  |  | # Inputs: 0) ExifTool object reference, 1) dirInfo reference | 
| 2350 |  |  |  |  |  |  | # Returns: 1 if this was a valid PDF file | 
| 2351 |  |  |  |  |  |  | sub ProcessPDF($$) | 
| 2352 |  |  |  |  |  |  | { | 
| 2353 | 37 |  |  | 37 | 0 | 143 | my ($et, $dirInfo) = @_; | 
| 2354 |  |  |  |  |  |  |  | 
| 2355 | 37 |  |  |  |  | 109 | undef $cryptInfo;   # (must not delete after returning so writer can use it) | 
| 2356 | 37 |  |  |  |  | 97 | undef $cryptStream; | 
| 2357 | 37 |  |  |  |  | 88 | undef $cryptString; | 
| 2358 | 37 |  |  |  |  | 128 | my $result = ReadPDF($et, $dirInfo); | 
| 2359 | 37 | 50 |  |  |  | 114 | if ($result < 0) { | 
| 2360 | 0 | 0 |  |  |  | 0 | $et->Warn($pdfWarning{$result}) if $pdfWarning{$result}; | 
| 2361 | 0 |  |  |  |  | 0 | $result = 1; | 
| 2362 |  |  |  |  |  |  | } | 
| 2363 |  |  |  |  |  |  | # clean up and return | 
| 2364 | 37 |  |  |  |  | 90 | undef %streamObjs; | 
| 2365 | 37 |  |  |  |  | 104 | undef %fetched; | 
| 2366 | 37 |  |  |  |  | 112 | return $result; | 
| 2367 |  |  |  |  |  |  | } | 
| 2368 |  |  |  |  |  |  |  | 
| 2369 |  |  |  |  |  |  | 1; # end | 
| 2370 |  |  |  |  |  |  |  | 
| 2371 |  |  |  |  |  |  |  | 
| 2372 |  |  |  |  |  |  | __END__ |