File Coverage

blib/lib/Image/ExifTool/Import.pm
Criterion Covered Total %
statement 94 169 55.6
branch 52 136 38.2
condition 11 34 32.3
subroutine 4 6 66.6
pod 2 4 50.0
total 163 349 46.7


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: Import.pm
3             #
4             # Description: Import CSV and JSON database files
5             #
6             # Revisions: 2011-03-05 - P. Harvey Created
7             #------------------------------------------------------------------------------
8             package Image::ExifTool::Import;
9              
10 17     17   132 use strict;
  17         43  
  17         903  
11             require Exporter;
12              
13 17     17   92 use vars qw($VERSION @ISA @EXPORT_OK);
  17         36  
  17         45740  
14              
15             $VERSION = '1.14';
16             @ISA = qw(Exporter);
17             @EXPORT_OK = qw(ReadCSV ReadJSON);
18              
19             sub ReadJSONObject($;$);
20              
21             my %unescapeJSON = ( 't'=>"\t", 'n'=>"\n", 'r'=>"\r", 'b' => "\b", 'f' => "\f" );
22             my $charset;
23              
24             #------------------------------------------------------------------------------
25             # Read CSV file
26             # Inputs: 0) CSV file name, file ref or RAF ref, 1) database hash ref,
27             # 2) missing tag value, 3) delimiter if other than ','
28             # Returns: undef on success, or error string
29             # Notes: There are various flavours of CSV, but here we assume that only
30             # double quotes are escaped, and they are escaped by doubling them
31             sub ReadCSV($$;$$)
32             {
33 0     0 1 0 local ($_, $/);
34 0         0 my ($file, $database, $missingValue, $delim) = @_;
35 0         0 my ($buff, @tags, $found, $err, $raf, $openedFile);
36              
37 0 0       0 if (UNIVERSAL::isa($file, 'File::RandomAccess')) {
    0          
38 0         0 $raf = $file;
39 0         0 $file = 'CSV file';
40             } elsif (ref $file eq 'GLOB') {
41 0         0 $raf = File::RandomAccess->new($file);
42 0         0 $file = 'CSV file';
43             } else {
44 0 0       0 open CSVFILE, $file or return "Error opening CSV file '${file}'";
45 0         0 binmode CSVFILE;
46 0         0 $openedFile = 1;
47 0         0 $raf = File::RandomAccess->new(\*CSVFILE);
48             }
49 0 0       0 $delim = ',' unless defined $delim;
50             # set input record separator by first newline found in the file
51             # (safe because first line should contain only tag names)
52 0         0 while ($raf->Read($buff, 65536)) {
53 0 0       0 $buff =~ /(\x0d\x0a|\x0d|\x0a)/ and $/ = $1, last;
54             }
55 0         0 $raf->Seek(0,0);
56 0         0 while ($raf->ReadLine($buff)) {
57 0         0 my (@vals, $v, $i, %fileInfo);
58 0         0 my @toks = split /\Q$delim/, $buff;
59 0         0 while (@toks) {
60 0         0 ($v = shift @toks) =~ s/^ +//; # remove leading spaces
61 0 0       0 if ($v =~ s/^"//) {
62             # quoted value must end in an odd number of quotes
63 0   0     0 while ($v !~ /("+)\s*$/ or not length($1) & 1) {
64 0 0       0 if (@toks) {
65 0         0 $v .= $delim . shift @toks;
66             } else {
67             # read another line from the file
68 0 0       0 $raf->ReadLine($buff) or last;
69 0         0 @toks = split /\Q$delim/, $buff;
70 0 0       0 last unless @toks;
71 0         0 $v .= shift @toks;
72             }
73             }
74 0         0 $v =~ s/"\s*$//; # remove trailing quote and whitespace
75 0         0 $v =~ s/""/"/g; # un-escape quotes
76             } else {
77 0         0 $v =~ s/[ \n\r]+$//;# remove trailing spaces/newlines
78             }
79 0         0 push @vals, $v;
80             }
81 0 0       0 if (@tags) {
82             # save values for each tag
83 0         0 $fileInfo{_ordered_keys_} = [ ];
84 0   0     0 for ($i=0; $i<@vals and $i<@tags; ++$i) {
85             # ignore empty entries unless missingValue is empty too
86 0 0 0     0 next unless length $vals[$i] or defined $missingValue and $missingValue eq '';
      0        
87             # delete tag (set value to undef) if value is same as missing tag
88 0 0 0     0 $fileInfo{$tags[$i]} =
89             (defined $missingValue and $vals[$i] eq $missingValue) ? undef : $vals[$i];
90 0         0 push @{$fileInfo{_ordered_keys_}}, $tags[$i];
  0         0  
91             }
92             # figure out the file name to use
93 0 0       0 if ($fileInfo{SourceFile}) {
94 0         0 $$database{$fileInfo{SourceFile}} = \%fileInfo;
95 0         0 $found = 1;
96             }
97             } else {
98             # the first row should be the tag names
99 0         0 foreach (@vals) {
100             # terminate at first blank tag name (eg. extra comma at end of line)
101 0 0       0 last unless length $_;
102 0 0       0 @tags or s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
103 0 0       0 /^([-_0-9A-Z]+:)*[-_0-9A-Z]+#?$/i or $err = "Invalid tag name '${_}'", last;
104 0         0 push(@tags, $_);
105             }
106 0 0       0 last if $err;
107 0 0       0 @tags or $err = 'No tags found', last;
108             # fix "SourceFile" case if necessary
109 0 0       0 $tags[0] = 'SourceFile' if lc $tags[0] eq 'sourcefile';
110             }
111             }
112 0 0       0 close CSVFILE if $openedFile;
113 0         0 undef $raf;
114 0 0 0     0 $err = 'No SourceFile column' unless $found or $err;
115 0 0       0 return $err ? "$err in $file" : undef;
116             }
117              
118             #------------------------------------------------------------------------------
119             # Convert unicode code point to UTF-8
120             # Inputs: 0) integer Unicode character
121             # Returns: UTF-8 bytes
122             sub ToUTF8($)
123             {
124 0     0 0 0 require Image::ExifTool::Charset;
125 0         0 return Image::ExifTool::Charset::Recompose(undef, [$_[0]], $charset);
126             }
127              
128             #------------------------------------------------------------------------------
129             # Read JSON object from file
130             # Inputs: 0) RAF reference or undef, 1) optional scalar reference for data
131             # to read before reading from file (ie. the file read buffer)
132             # Returns: JSON object (scalar, hash ref, or array ref), or undef on EOF or
133             # empty object or array (and sets $$buffPt to empty string on EOF)
134             # Notes: position in buffer is significant
135             sub ReadJSONObject($;$)
136             {
137 510     510 0 871 my ($raf, $buffPt) = @_;
138             # initialize buffer if necessary
139 510         626 my ($pos, $readMore, $rtnVal, $tok, $key, $didBOM);
140 510 100       715 if ($buffPt) {
141 449         493 $pos = pos $$buffPt;
142 449 50       642 $pos = pos($$buffPt) = 0 unless defined $pos;
143             } else {
144 61         141 my $buff = '';
145 61         121 $buffPt = \$buff;
146 61         119 $pos = 0;
147             }
148 510         545 Tok: for (;;) {
149             # (didn't spend the time to understand how $pos could be undef, but
150             # put a test here to be safe because one user reported this problem)
151 510 50       740 last unless defined $pos;
152 510 100 66     1312 if ($pos >= length $$buffPt or $readMore) {
153 61 50       160 last unless defined $raf;
154             # read another 64kB and add to unparsed data
155 61         125 my $offset = length($$buffPt) - $pos;
156 61 50       158 if ($offset) {
157 0         0 my $buff;
158 0 0       0 $raf->Read($buff, 65536) or $$buffPt = '', last;
159 0         0 $$buffPt = substr($$buffPt, $pos) . $buff;
160             } else {
161 61 50       298 $raf->Read($$buffPt, 65536) or $$buffPt = '', last;
162             }
163 61 50       141 unless ($didBOM) {
164 61         155 $$buffPt =~ s/^\xef\xbb\xbf//; # remove UTF-8 BOM if it exists
165 61         108 $didBOM = 1;
166             }
167 61         294 $pos = pos($$buffPt) = 0;
168 61         163 $readMore = 0;
169             }
170 510 50       748 unless ($tok) {
171             # skip white space and find next character
172 510 50       1168 $$buffPt =~ /(\S)/g or $pos = length($$buffPt), next;
173 510         745 $tok = $1;
174 510         611 $pos = pos $$buffPt;
175             }
176             # see what type of object this is
177 510 100 100     1240 if ($tok eq '{') { # object (hash)
    100 66        
    100          
    100          
178 107 50       402 $rtnVal = { _ordered_keys_ => [ ] } unless defined $rtnVal;
179 107         162 for (;;) {
180             # read "KEY":"VALUE" pairs
181 208 50       373 unless (defined $key) {
182 208         407 $key = ReadJSONObject($raf, $buffPt);
183 208         321 $pos = pos $$buffPt;
184             }
185             # ($key may be undef for empty JSON object)
186 208 100       379 if (defined $key) {
187             # scan to delimiting ':'
188 205 50       488 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
189 205 50       406 $1 eq ':' or return undef; # error if not a colon
190 205         322 my $val = ReadJSONObject($raf, $buffPt);
191 205         280 $pos = pos $$buffPt;
192 205 50       361 return undef unless defined $val;
193 205         463 $$rtnVal{$key} = $val;
194 205         232 push @{$$rtnVal{_ordered_keys_}}, $key;
  205         471  
195 205         289 undef $key;
196             }
197             # scan to delimiting ',' or bounding '}'
198 208 50       538 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
199 208 100       447 last if $1 eq '}'; # check for end of object
200 101 50       163 $1 eq ',' or return undef; # error if not a comma
201             }
202             } elsif ($tok eq '[') { # array
203 17 50       39 $rtnVal = [ ] unless defined $rtnVal;
204 17         23 for (;;) {
205 33         67 my $item = ReadJSONObject($raf, $buffPt);
206 33         44 $pos = pos $$buffPt;
207             # ($item may be undef for empty array)
208 33 100       56 push @$rtnVal, $item if defined $item;
209             # scan to delimiting ',' or bounding ']'
210 33 50       62 $$buffPt =~ /(\S)/g or $readMore = 1, next Tok;
211 33 100       58 last if $1 eq ']'; # check for end of array
212 16 50       28 $1 eq ',' or return undef; # error if not a comma
213             }
214             } elsif ($tok eq '"') { # quoted string
215 295         358 for (;;) {
216 295 50       1006 $$buffPt =~ /(\\*)"/g or $readMore = 1, next Tok;
217 295 50       639 last unless length($1) & 1; # check for escaped quote
218             }
219 295         609 $rtnVal = substr($$buffPt, $pos, pos($$buffPt)-$pos-1);
220             # unescape characters
221 295         405 $rtnVal =~ s/\\u([0-9a-f]{4})/ToUTF8(hex $1)/ige;
  0         0  
222 295 0       377 $rtnVal =~ s/\\(.)/$unescapeJSON{$1}||$1/sge;
  0         0  
223             # decode base64 (binary data) values
224 295 50 33     634 if ($rtnVal =~ /^base64:[A-Za-z0-9+\/]*={0,2}$/ and length($rtnVal) % 4 == 3) {
225 0         0 require Image::ExifTool::XMP;
226 0         0 $rtnVal = ${Image::ExifTool::XMP::DecodeBase64(substr($rtnVal,7))};
  0         0  
227             }
228             } elsif ($tok eq ']' or $tok eq '}' or $tok eq ',') {
229             # return undef for empty object, array, or list item
230             # (empty list item actually not valid JSON)
231 8         12 pos($$buffPt) = pos($$buffPt) - 1;
232             } else { # number, 'true', 'false', 'null'
233 83 50       150 $$buffPt =~ /([\s:,\}\]])/g or $readMore = 1, next;
234 83         144 pos($$buffPt) = pos($$buffPt) - 1;
235 83         143 $rtnVal = $tok . substr($$buffPt, $pos, pos($$buffPt)-$pos);
236             }
237 510         583 last;
238             }
239 510         968 return $rtnVal;
240             }
241              
242             #------------------------------------------------------------------------------
243             # Read JSON file
244             # Inputs: 0) JSON file name, file ref, RAF ref or SCALAR ref, 1) database hash ref,
245             # 2) flag to delete "-" tags, 3) character set
246             # Returns: undef on success, or error string
247             sub ReadJSON($$;$$)
248             {
249 61     61 1 123 local $_;
250 61         201 my ($file, $database, $missingValue, $chset) = @_;
251 61         104 my ($raf, $openedFile);
252              
253             # initialize character set for converting "\uHHHH" chars
254 61   50     176 $charset = $chset || 'UTF8';
255 61 50       322 if (UNIVERSAL::isa($file, 'File::RandomAccess')) {
    0          
    0          
256 61         105 $raf = $file;
257 61         125 $file = 'JSON file';
258             } elsif (ref $file eq 'GLOB') {
259 0         0 $raf = File::RandomAccess->new($file);
260 0         0 $file = 'JSON file';
261             } elsif (ref $file eq 'SCALAR') {
262 0         0 $raf = File::RandomAccess->new($file);
263 0         0 $file = 'in memory';
264             } else {
265 0 0       0 open JSONFILE, $file or return "Error opening JSON file '${file}'";
266 0         0 binmode JSONFILE;
267 0         0 $openedFile = 1;
268 0         0 $raf = File::RandomAccess->new(\*JSONFILE);
269             }
270 61         218 my $obj = ReadJSONObject($raf);
271 61 50       157 close JSONFILE if $openedFile;
272 61 50       169 unless (ref $obj eq 'ARRAY') {
273 61 50       169 ref $obj eq 'HASH' or return "Format error in JSON file '${file}'";
274 61         147 $obj = [ $obj ];
275             }
276 61         121 my ($info, $found);
277 61         146 foreach $info (@$obj) {
278 61 50       225 next unless ref $info eq 'HASH';
279             # fix "SourceFile" case, or assume '*' if SourceFile not specified
280 61 50       185 unless (defined $$info{SourceFile}) {
281 61         333 my ($key) = grep /^SourceFile$/i, keys %$info;
282 61 50       151 if ($key) {
283 0         0 $$info{SourceFile} = $$info{$key};
284 0         0 delete $$info{$key};
285             } else {
286 61         178 $$info{SourceFile} = '*';
287             }
288             }
289 61 100       152 if (defined $missingValue) {
290 1   100     14 $$info{$_} eq $missingValue and $$info{$_} = undef foreach keys %$info;
291             }
292 61         177 $$database{$$info{SourceFile}} = $info;
293 61         150 $found = 1;
294             }
295 61 50       314 return $found ? undef : "No valid JSON objects in '${file}'";
296             }
297              
298              
299             1; # end
300              
301             __END__