line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# $Id$ |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# file::csv Brik |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
package Metabrik::File::Csv; |
7
|
2
|
|
|
2
|
|
1080
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
57
|
|
8
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
53
|
|
9
|
|
|
|
|
|
|
|
10
|
2
|
|
|
2
|
|
10
|
use base qw(Metabrik); |
|
2
|
|
|
|
|
17
|
|
|
2
|
|
|
|
|
6137
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub brik_properties { |
13
|
|
|
|
|
|
|
return { |
14
|
0
|
|
|
0
|
1
|
|
revision => '$Revision$', |
15
|
|
|
|
|
|
|
tags => [ qw(unstable) ], |
16
|
|
|
|
|
|
|
author => 'GomoR ', |
17
|
|
|
|
|
|
|
license => 'http://opensource.org/licenses/BSD-3-Clause', |
18
|
|
|
|
|
|
|
attributes => { |
19
|
|
|
|
|
|
|
input => [ qw(file) ], |
20
|
|
|
|
|
|
|
output => [ qw(file) ], |
21
|
|
|
|
|
|
|
first_line_is_header => [ qw(0|1) ], |
22
|
|
|
|
|
|
|
separator => [ qw(character) ], |
23
|
|
|
|
|
|
|
escape => [ qw(character) ], |
24
|
|
|
|
|
|
|
header => [ qw($column_header_list) ], |
25
|
|
|
|
|
|
|
encoding => [ qw(utf8|ascii) ], |
26
|
|
|
|
|
|
|
overwrite => [ qw(0|1) ], |
27
|
|
|
|
|
|
|
append => [ qw(0|1) ], |
28
|
|
|
|
|
|
|
write_header => [ qw(0|1) ], |
29
|
|
|
|
|
|
|
use_quoting => [ qw(0|1) ], |
30
|
|
|
|
|
|
|
use_locking => [ qw(0|1) ], |
31
|
|
|
|
|
|
|
unbuffered => [ qw(0|1) ], |
32
|
|
|
|
|
|
|
encoded_fields => [ qw(fields) ], |
33
|
|
|
|
|
|
|
object_fields => [ qw(fields) ], |
34
|
|
|
|
|
|
|
_csv => [ qw(INTERNAL) ], |
35
|
|
|
|
|
|
|
_fd => [ qw(INTERNAL) ], |
36
|
|
|
|
|
|
|
_sb => [ qw(INTERNAL) ], |
37
|
|
|
|
|
|
|
_sc => [ qw(INTERNAL) ], |
38
|
|
|
|
|
|
|
}, |
39
|
|
|
|
|
|
|
attributes_default => { |
40
|
|
|
|
|
|
|
first_line_is_header => 1, |
41
|
|
|
|
|
|
|
separator => ',', |
42
|
|
|
|
|
|
|
escape => '"', |
43
|
|
|
|
|
|
|
encoding => 'utf8', |
44
|
|
|
|
|
|
|
overwrite => 0, |
45
|
|
|
|
|
|
|
append => 1, |
46
|
|
|
|
|
|
|
write_header => 1, |
47
|
|
|
|
|
|
|
use_quoting => 0, |
48
|
|
|
|
|
|
|
use_locking => 0, |
49
|
|
|
|
|
|
|
unbuffered => 0, |
50
|
|
|
|
|
|
|
}, |
51
|
|
|
|
|
|
|
commands => { |
52
|
|
|
|
|
|
|
read => [ qw(input_file|OPTIONAL) ], |
53
|
|
|
|
|
|
|
write => [ qw(csv_struct output_file|OPTIONAL) ], |
54
|
|
|
|
|
|
|
get_column_values => [ qw($data column_name|column_int) ], |
55
|
|
|
|
|
|
|
read_next => [ qw(input_file|OPTIONAL) ], |
56
|
|
|
|
|
|
|
}, |
57
|
|
|
|
|
|
|
require_modules => { |
58
|
|
|
|
|
|
|
'Data::Dump' => [ ], |
59
|
|
|
|
|
|
|
'Text::CSV_XS' => [ ], |
60
|
|
|
|
|
|
|
'Metabrik::File::Read' => [ ], |
61
|
|
|
|
|
|
|
'Metabrik::File::Write' => [ ], |
62
|
|
|
|
|
|
|
'Metabrik::String::Base64' => [ ], |
63
|
|
|
|
|
|
|
'Metabrik::String::Compress' => [ ], |
64
|
|
|
|
|
|
|
}, |
65
|
|
|
|
|
|
|
}; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub brik_init { |
69
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
70
|
|
|
|
|
|
|
|
71
|
0
|
0
|
|
|
|
|
my $sb = Metabrik::String::Base64->new_from_brik_init($self) or return; |
72
|
0
|
0
|
|
|
|
|
my $sc = Metabrik::String::Compress->new_from_brik_init($self) or return; |
73
|
0
|
|
|
|
|
|
$self->_sb($sb); |
74
|
0
|
|
|
|
|
|
$self->_sc($sc); |
75
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
return $self->SUPER::brik_init; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub read { |
80
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
81
|
0
|
|
|
|
|
|
my ($input) = @_; |
82
|
|
|
|
|
|
|
|
83
|
0
|
|
0
|
|
|
|
$input ||= $self->input; |
84
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('read', $input) or return; |
85
|
0
|
0
|
|
|
|
|
$self->brik_help_run_file_not_found('read', $input) or return; |
86
|
|
|
|
|
|
|
|
87
|
0
|
0
|
|
|
|
|
my $csv = Text::CSV_XS->new({ |
88
|
|
|
|
|
|
|
binary => 1, |
89
|
|
|
|
|
|
|
sep_char => $self->separator, |
90
|
|
|
|
|
|
|
allow_loose_quotes => 1, |
91
|
|
|
|
|
|
|
allow_loose_escapes => 1, |
92
|
|
|
|
|
|
|
escape_char => $self->escape, |
93
|
|
|
|
|
|
|
}) or return $self->log->error('read: Text::CSV_XS new failed'); |
94
|
|
|
|
|
|
|
|
95
|
0
|
0
|
|
|
|
|
my $fr = Metabrik::File::Read->new_from_brik_init($self) or return; |
96
|
0
|
|
|
|
|
|
$fr->encoding($self->encoding); |
97
|
0
|
0
|
|
|
|
|
my $fd = $fr->open($input) or return; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# When some content is too complex to be stored as a standard CSV cell, |
100
|
|
|
|
|
|
|
# we should encode it as base64. |
101
|
0
|
|
|
|
|
|
my $sb = $self->_sb; |
102
|
0
|
|
|
|
|
|
my $sc = $self->_sc; |
103
|
0
|
|
|
|
|
|
my $encoded_fields = $self->encoded_fields; |
104
|
0
|
0
|
|
|
|
|
if (defined($encoded_fields)) { |
105
|
0
|
|
|
|
|
|
my $str = join(',', @$encoded_fields); |
106
|
0
|
|
|
|
|
|
$encoded_fields = { map { $_ => 1 } @$encoded_fields }; |
|
0
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
$self->log->debug("read: will decode field(s) [$str] in encoded format"); |
108
|
|
|
|
|
|
|
} |
109
|
0
|
|
|
|
|
|
my $object_fields = $self->object_fields; |
110
|
0
|
0
|
|
|
|
|
if (defined($object_fields)) { |
111
|
0
|
|
|
|
|
|
my $str = join(',', @$object_fields); |
112
|
0
|
|
|
|
|
|
$object_fields = { map { $_ => 1 } @$object_fields }; |
|
0
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
|
$self->log->debug("read: will decode field(s) [$str] in object format"); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
0
|
|
|
|
|
|
my $object_re = qr/^OBJECT:(.*)$/; |
117
|
0
|
|
|
|
|
|
my $base64_re = qr/^BASE64:(.*)$/; # Keep for backward compat. |
118
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my $sep = $self->separator; |
120
|
0
|
|
|
|
|
|
my $headers; |
121
|
|
|
|
|
|
|
my $count; |
122
|
0
|
|
|
|
|
|
my $first_line = 1; |
123
|
0
|
|
|
|
|
|
my @rows = (); |
124
|
0
|
|
|
|
|
|
while (my $row = $csv->getline($fd)) { |
125
|
|
|
|
|
|
|
# The CSV file has a header, we output an array of hashes |
126
|
0
|
0
|
|
|
|
|
if ($self->first_line_is_header) { |
127
|
0
|
0
|
|
|
|
|
if ($first_line) { # This is first line |
128
|
0
|
|
|
|
|
|
$headers = $row; |
129
|
0
|
|
|
|
|
|
$count = scalar @$row - 1; |
130
|
0
|
|
|
|
|
|
$first_line = 0; |
131
|
0
|
|
|
|
|
|
$self->header($headers); |
132
|
0
|
|
|
|
|
|
next; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
my $h; |
136
|
|
|
|
|
|
|
# We have to decode some fields |
137
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields || $object_fields) { |
138
|
0
|
|
|
|
|
|
for (0..$count) { |
139
|
0
|
|
|
|
|
|
my $k = $headers->[$_]; |
140
|
0
|
|
|
|
|
|
my $v = $row->[$_]; |
141
|
0
|
0
|
|
|
|
|
next unless defined($v); |
142
|
|
|
|
|
|
|
# Decode only if it has been asked and the value is not empty. |
143
|
|
|
|
|
|
|
# Decode the encode format |
144
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields && exists($encoded_fields->{$k}) && length($v)) { |
|
|
|
0
|
|
|
|
|
145
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($v); |
146
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
147
|
0
|
|
|
|
|
|
$self->log->error("read: decode encoded format failed, ". |
148
|
|
|
|
|
|
|
"skipping data with length [".length($v)."]"); |
149
|
0
|
|
|
|
|
|
next; |
150
|
|
|
|
|
|
|
} |
151
|
0
|
|
|
|
|
|
my $gunzipped = $sc->gunzip($decoded); |
152
|
0
|
0
|
|
|
|
|
if (! defined($gunzipped)) { |
153
|
0
|
|
|
|
|
|
$self->log->error("read: gunzip failed, skipping ". |
154
|
|
|
|
|
|
|
"decoded data with length [".length($decoded)."]"); |
155
|
0
|
|
|
|
|
|
next; |
156
|
|
|
|
|
|
|
} |
157
|
0
|
|
|
|
|
|
$v = $$gunzipped; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
# Decode the object format |
160
|
0
|
0
|
0
|
|
|
|
if ($object_fields && exists($object_fields->{$k}) && length($v) |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
161
|
|
|
|
|
|
|
&& ($v =~ $object_re || $v =~ $base64_re)) { |
162
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($1); |
163
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
164
|
0
|
|
|
|
|
|
$self->log->error("read: decode object format failed, ". |
165
|
|
|
|
|
|
|
"skipping data with length [".length($v)."]"); |
166
|
0
|
|
|
|
|
|
next; |
167
|
|
|
|
|
|
|
} |
168
|
0
|
|
|
|
|
|
$v = eval($decoded); |
169
|
|
|
|
|
|
|
} |
170
|
0
|
|
|
|
|
|
$h->{$k} = $v; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
# Or not. |
174
|
|
|
|
|
|
|
else { |
175
|
0
|
|
|
|
|
|
for (0..$count) { |
176
|
0
|
|
|
|
|
|
$h->{$headers->[$_]} = $row->[$_]; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
0
|
|
|
|
|
|
push @rows, $h; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
# The CSV has no header, we output an array of arrays |
182
|
|
|
|
|
|
|
else { |
183
|
0
|
|
|
|
|
|
push @rows, $row; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
if (! $csv->eof) { |
188
|
0
|
|
|
|
|
|
my $error_str = "".$csv->error_diag(); |
189
|
0
|
|
|
|
|
|
$self->log->error("read: incomplete: error [$error_str]"); |
190
|
0
|
|
|
|
|
|
return \@rows; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
$fr->close; |
194
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
return \@rows; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# |
199
|
|
|
|
|
|
|
# We only handle ARRAY of HASHes format (aoh) for writing |
200
|
|
|
|
|
|
|
# |
201
|
|
|
|
|
|
|
sub write { |
202
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
203
|
0
|
|
|
|
|
|
my ($csv_struct, $output) = @_; |
204
|
|
|
|
|
|
|
|
205
|
0
|
|
0
|
|
|
|
$output ||= $self->output; |
206
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('write', $csv_struct) or return; |
207
|
0
|
0
|
|
|
|
|
$self->brik_help_run_invalid_arg('write', $csv_struct, 'ARRAY') or return; |
208
|
0
|
0
|
|
|
|
|
$self->brik_help_run_empty_array_arg('write', $csv_struct, 'ARRAY') or return; |
209
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('write', $output) or return; |
210
|
|
|
|
|
|
|
|
211
|
0
|
0
|
|
|
|
|
if (ref($csv_struct->[0]) ne 'HASH') { |
212
|
0
|
|
|
|
|
|
return $self->log->error("write: csv structure does not contain HASHes"); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
0
|
0
|
|
|
|
|
my $fw = Metabrik::File::Write->new_from_brik_init($self) or return; |
216
|
0
|
|
|
|
|
|
$fw->output($output); |
217
|
0
|
|
|
|
|
|
$fw->encoding($self->encoding); |
218
|
0
|
|
|
|
|
|
$fw->overwrite($self->overwrite); |
219
|
0
|
|
|
|
|
|
$fw->append($self->append); |
220
|
0
|
|
|
|
|
|
$fw->use_locking($self->use_locking); |
221
|
0
|
|
|
|
|
|
$fw->unbuffered($self->unbuffered); |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# When some content is too complex to be stored as a standard CSV cell, |
224
|
|
|
|
|
|
|
# we should encode it as base64. |
225
|
0
|
|
|
|
|
|
my $sb = $self->_sb; |
226
|
0
|
|
|
|
|
|
my $sc = $self->_sc; |
227
|
0
|
|
|
|
|
|
my $encoded_fields = $self->encoded_fields; |
228
|
0
|
0
|
|
|
|
|
if (defined($encoded_fields)) { |
229
|
0
|
|
|
|
|
|
my $str = join(',', @$encoded_fields); |
230
|
0
|
|
|
|
|
|
$encoded_fields = { map { $_ => 1 } @$encoded_fields }; |
|
0
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
|
$self->log->debug("write: will encode field(s) [$str] in encoded format"); |
232
|
|
|
|
|
|
|
} |
233
|
0
|
|
|
|
|
|
my $object_fields = $self->object_fields; |
234
|
0
|
0
|
|
|
|
|
if (defined($object_fields)) { |
235
|
0
|
|
|
|
|
|
my $str = join(',', @$object_fields); |
236
|
0
|
|
|
|
|
|
$object_fields = { map { $_ => 1 } @$object_fields }; |
|
0
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
$self->log->debug("write: will encode field(s) [$str] in object format"); |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# |
241
|
|
|
|
|
|
|
# Set header ordering |
242
|
|
|
|
|
|
|
# |
243
|
0
|
|
|
|
|
|
my %order = (); |
244
|
0
|
|
|
|
|
|
my @header = (); |
245
|
|
|
|
|
|
|
# Order headers either by using user provided one or our own default ordering. |
246
|
0
|
0
|
|
|
|
|
if ($self->header) { |
247
|
0
|
|
|
|
|
|
@header = @{$self->header}; |
|
0
|
|
|
|
|
|
|
248
|
0
|
|
|
|
|
|
my $idx = 0; |
249
|
0
|
|
|
|
|
|
for my $k (@header) { |
250
|
0
|
|
|
|
|
|
$order{$k} = $idx; |
251
|
0
|
|
|
|
|
|
$idx++; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
# If user didn't provide her own header, we use first element from struct. |
255
|
|
|
|
|
|
|
else { |
256
|
0
|
|
|
|
|
|
my $first = $csv_struct->[0]; |
257
|
0
|
|
|
|
|
|
@header = sort { $a cmp $b } keys %$first; |
|
0
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
my $idx = 0; |
259
|
0
|
|
|
|
|
|
for my $k (@header) { |
260
|
0
|
|
|
|
|
|
$order{$k} = $idx; |
261
|
0
|
|
|
|
|
|
$idx++; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
my $header_count = @header; |
266
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
my $is_new_file = (! -f $output); |
268
|
0
|
0
|
|
|
|
|
my $fd = $fw->open or return; |
269
|
|
|
|
|
|
|
|
270
|
0
|
|
|
|
|
|
my $written = ''; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# Write header if this is a new file and user asked for it. |
273
|
0
|
0
|
0
|
|
|
|
if ($self->write_header && ($is_new_file || $self->overwrite)) { |
|
|
|
0
|
|
|
|
|
274
|
0
|
|
|
|
|
|
my $data = join($self->separator, @header)."\n"; |
275
|
0
|
|
|
|
|
|
my $r = $fw->write($data); |
276
|
0
|
0
|
|
|
|
|
if (! defined($r)) { |
277
|
0
|
|
|
|
|
|
return; |
278
|
|
|
|
|
|
|
} |
279
|
0
|
|
|
|
|
|
$written .= $data; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
0
|
|
|
|
|
|
my $separator = $self->separator; |
283
|
0
|
|
|
|
|
|
my $escape = $self->escape; |
284
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
local $Data::Dump::INDENT = ""; # No indentation shorten length |
286
|
0
|
|
|
|
|
|
local $Data::Dump::TRY_BASE64 = 0; # Never encode in base64 |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# Write the structure to file. |
289
|
0
|
|
|
|
|
|
for my $this (@$csv_struct) { |
290
|
0
|
|
|
|
|
|
my @fields = (); |
291
|
|
|
|
|
|
|
# We have to decode some fields |
292
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields || $object_fields) { |
293
|
0
|
|
|
|
|
|
for my $key (keys %$this) { |
294
|
|
|
|
|
|
|
# We may have some unwanted data in this HASH, we skip it. |
295
|
0
|
0
|
|
|
|
|
next if (! defined($order{$key})); |
296
|
0
|
|
|
|
|
|
my $k = $key; |
297
|
0
|
|
|
|
|
|
my $v = $this->{$key}; |
298
|
0
|
0
|
|
|
|
|
next unless defined($v); |
299
|
|
|
|
|
|
|
# Encode only if it has been asked and the value is not empty. |
300
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields && exists($encoded_fields->{$k}) && length($v)) { |
|
|
|
0
|
|
|
|
|
301
|
|
|
|
|
|
|
# Gzip to handle UTF-like encodings, cause Base64 does not like that. |
302
|
0
|
|
|
|
|
|
my $gzipped = $sc->gzip($v); |
303
|
0
|
0
|
|
|
|
|
if (! defined($gzipped)) { |
304
|
0
|
|
|
|
|
|
$self->log->error("write: gzip failed, skipping"); |
305
|
0
|
|
|
|
|
|
next; |
306
|
|
|
|
|
|
|
} |
307
|
0
|
|
|
|
|
|
$v = $sb->encode($$gzipped); |
308
|
0
|
0
|
|
|
|
|
if (! defined($v)) { |
309
|
0
|
|
|
|
|
|
$self->log->error("write: encode in encoded format failed, skipping"); |
310
|
0
|
|
|
|
|
|
next; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
# Encode only if it has been asked and the value is not empty. |
314
|
0
|
0
|
0
|
|
|
|
if ($object_fields && exists($object_fields->{$k}) && length($v)) { |
|
|
|
0
|
|
|
|
|
315
|
|
|
|
|
|
|
# Encode ARRAYs and HASHes only if they are not empty. |
316
|
|
|
|
|
|
|
# Do not encode simple strings. |
317
|
0
|
0
|
0
|
|
|
|
if (ref($v) eq 'ARRAY' && @$v > 0 |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
318
|
|
|
|
|
|
|
|| ref($v) eq 'HASH' && keys %$v > 0) { |
319
|
0
|
|
|
|
|
|
$v = Data::Dump::dump($v); $v =~ s{\n}{}g; |
|
0
|
|
|
|
|
|
|
320
|
0
|
|
|
|
|
|
$v = 'OBJECT:'.$sb->encode($v); |
321
|
0
|
0
|
|
|
|
|
if (! defined($v)) { |
322
|
0
|
|
|
|
|
|
$self->log->error("write: encode in object format failed, skipping"); |
323
|
0
|
|
|
|
|
|
next; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
# If this is a simple string, we do not encode at all. |
327
|
|
|
|
|
|
|
elsif (ref($v) eq '' && length($v)) { |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
# And for empty objects, we set them to empty string. |
330
|
|
|
|
|
|
|
else { |
331
|
0
|
|
|
|
|
|
$v = ""; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
} |
334
|
0
|
|
|
|
|
|
$fields[$order{$key}] = $v; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
# Or not. |
338
|
|
|
|
|
|
|
else { |
339
|
0
|
|
|
|
|
|
for my $key (keys %$this) { |
340
|
|
|
|
|
|
|
# We may have some unwanted data in this HASH, we skip it. |
341
|
0
|
0
|
|
|
|
|
next if (! defined($order{$key})); |
342
|
0
|
|
|
|
|
|
$fields[$order{$key}] = $this->{$key}; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
|
346
|
0
|
0
|
|
|
|
|
@fields = map { defined($_) ? $_ : '' } @fields; |
|
0
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# If this entry has less fields than the header, we add null entries. |
349
|
0
|
|
|
|
|
|
my $field_count = @fields; |
350
|
0
|
0
|
|
|
|
|
if ($field_count < $header_count) { |
351
|
0
|
|
|
|
|
|
my $diff = $header_count - $field_count; |
352
|
0
|
|
|
|
|
|
for (1..$diff) { |
353
|
0
|
|
|
|
|
|
push @fields, ''; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
0
|
0
|
|
|
|
|
if ($self->use_quoting) { |
358
|
0
|
|
|
|
|
|
for (@fields) { |
359
|
0
|
|
|
|
|
|
s/"/${escape}"/g; |
360
|
0
|
|
|
|
|
|
$_ = '"'.$_.'"'; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
my $data = join($separator, @fields)."\n"; |
365
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
|
my $r = $fw->write($data); |
367
|
0
|
0
|
|
|
|
|
if (! defined($r)) { |
368
|
0
|
|
|
|
|
|
next; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
0
|
|
|
|
|
|
$written .= $data; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
|
$fw->close; |
375
|
|
|
|
|
|
|
|
376
|
0
|
0
|
|
|
|
|
if (! length($written)) { |
377
|
0
|
|
|
|
|
|
return $self->log->error("write: nothing to write"); |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
0
|
|
|
|
|
|
return $written; |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
sub get_column_values { |
384
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
385
|
0
|
|
|
|
|
|
my ($data, $column) = @_; |
386
|
|
|
|
|
|
|
|
387
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('get_column_values', $data) or return; |
388
|
0
|
0
|
|
|
|
|
$self->brik_help_run_invalid_arg('get_column_values', $data, 'ARRAY') or return; |
389
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('get_column_values', $column) or return; |
390
|
|
|
|
|
|
|
|
391
|
0
|
|
|
|
|
|
my @results = (); |
392
|
|
|
|
|
|
|
# CSV structure is an ARRAYREF of HASHREFs |
393
|
0
|
0
|
|
|
|
|
if ($self->first_line_is_header) { |
|
|
0
|
|
|
|
|
|
394
|
0
|
0
|
|
|
|
|
if (@{$self->header} == 0) { |
|
0
|
|
|
|
|
|
|
395
|
0
|
|
|
|
|
|
return $self->log->error("get_column_values: no CSV header found"); |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
|
398
|
0
|
|
|
|
|
|
for my $row (@$data) { |
399
|
0
|
0
|
|
|
|
|
if (ref($row) ne 'HASH') { |
400
|
0
|
|
|
|
|
|
$self->log->warning("get_column_values: row is not a HASHREF"); |
401
|
0
|
|
|
|
|
|
next; |
402
|
|
|
|
|
|
|
} |
403
|
0
|
0
|
|
|
|
|
if (exists($row->{$column})) { |
404
|
0
|
|
|
|
|
|
push @results, $row->{$column}; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
# CSV structure is an ARRAYREF of ARRAYREFs |
409
|
|
|
|
|
|
|
elsif ($column =~ m{^\d+$}) { |
410
|
0
|
|
|
|
|
|
for my $row (@$data) { |
411
|
0
|
0
|
|
|
|
|
if (ref($row) ne 'ARRAY') { |
412
|
0
|
|
|
|
|
|
$self->log->warning("get_column_values: row is not an ARRAYREF"); |
413
|
0
|
|
|
|
|
|
next; |
414
|
|
|
|
|
|
|
} |
415
|
0
|
0
|
|
|
|
|
if (exists($row->[$column])) { |
416
|
0
|
|
|
|
|
|
push @results, $row->[$column]; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
return \@results; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub read_next { |
425
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
426
|
0
|
|
|
|
|
|
my ($input) = @_; |
427
|
|
|
|
|
|
|
|
428
|
0
|
|
0
|
|
|
|
$input ||= $self->input; |
429
|
0
|
0
|
|
|
|
|
$self->brik_help_run_undef_arg('read_next', $input) or return; |
430
|
0
|
0
|
|
|
|
|
$self->brik_help_run_file_not_found('read_next', $input) or return; |
431
|
|
|
|
|
|
|
|
432
|
0
|
|
|
|
|
|
my $csv = $self->_csv; |
433
|
0
|
|
|
|
|
|
my $fd = $self->_fd; |
434
|
0
|
0
|
|
|
|
|
if (! defined($csv)) { |
435
|
0
|
|
|
|
|
|
$self->log->debug('read_next: first call, create _csv'); |
436
|
0
|
0
|
|
|
|
|
$csv = Text::CSV_XS->new({ |
437
|
|
|
|
|
|
|
binary => 1, |
438
|
|
|
|
|
|
|
sep_char => $self->separator, |
439
|
|
|
|
|
|
|
allow_loose_quotes => 1, |
440
|
|
|
|
|
|
|
allow_loose_escapes => 1, |
441
|
|
|
|
|
|
|
escape_char => $self->escape, |
442
|
|
|
|
|
|
|
}) or return $self->log->error('read_next: Text::CSV_XS new failed'); |
443
|
0
|
|
|
|
|
|
$self->_csv($csv); |
444
|
|
|
|
|
|
|
|
445
|
0
|
0
|
|
|
|
|
my $fr = Metabrik::File::Read->new_from_brik_init($self) or return; |
446
|
0
|
|
|
|
|
|
$fr->encoding($self->encoding); |
447
|
0
|
0
|
|
|
|
|
$fd = $fr->open($input) or return; |
448
|
0
|
|
|
|
|
|
$self->_fd($fd); |
449
|
|
|
|
|
|
|
|
450
|
0
|
0
|
|
|
|
|
if ($self->first_line_is_header) { |
451
|
0
|
|
|
|
|
|
my $header = $csv->getline($fd); |
452
|
0
|
|
|
|
|
|
$self->header($header); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
# When some content is too complex to be stored as a standard CSV cell, |
457
|
|
|
|
|
|
|
# we should encode it as base64. |
458
|
0
|
|
|
|
|
|
my $sb = $self->_sb; |
459
|
0
|
|
|
|
|
|
my $sc = $self->_sc; |
460
|
0
|
|
|
|
|
|
my $encoded_fields = $self->encoded_fields; |
461
|
0
|
0
|
|
|
|
|
if (defined($encoded_fields)) { |
462
|
0
|
|
|
|
|
|
my $str = join(',', @$encoded_fields); |
463
|
0
|
|
|
|
|
|
$encoded_fields = { map { $_ => 1 } @$encoded_fields }; |
|
0
|
|
|
|
|
|
|
464
|
0
|
|
|
|
|
|
$self->log->debug("read_next: will decode field(s) [$str] in base64"); |
465
|
|
|
|
|
|
|
} |
466
|
0
|
|
|
|
|
|
my $object_fields = $self->object_fields; |
467
|
0
|
0
|
|
|
|
|
if (defined($object_fields)) { |
468
|
0
|
|
|
|
|
|
my $str = join(',', @$object_fields); |
469
|
0
|
|
|
|
|
|
$object_fields = { map { $_ => 1 } @$object_fields }; |
|
0
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
|
$self->log->debug("read_next: will decode field(s) [$str] in object format"); |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
0
|
|
|
|
|
|
my $object_re = qr/^OBJECT:(.*)$/; |
474
|
0
|
|
|
|
|
|
my $base64_re = qr/^BASE64:(.*)$/; # Keep for backward compat. |
475
|
|
|
|
|
|
|
|
476
|
0
|
|
|
|
|
|
my $row = $csv->getline($fd); |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
# If a header is given as an Attribute, we use it to return a HASH |
479
|
0
|
|
|
|
|
|
my $header = $self->header; |
480
|
0
|
0
|
|
|
|
|
if (defined($header)) { |
481
|
0
|
|
|
|
|
|
my $h = {}; |
482
|
0
|
|
|
|
|
|
my $i = 0; |
483
|
|
|
|
|
|
|
# We have to decode some fields |
484
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields || $object_fields) { |
485
|
0
|
|
|
|
|
|
for (@$header) { |
486
|
0
|
|
|
|
|
|
my $k = $_; |
487
|
0
|
|
|
|
|
|
my $v = $row->[$i++]; |
488
|
0
|
0
|
|
|
|
|
next unless defined($v); |
489
|
|
|
|
|
|
|
# Decode only if it has been asked and the value is not empty. |
490
|
|
|
|
|
|
|
# Decode the encode format |
491
|
0
|
0
|
0
|
|
|
|
if ($encoded_fields && exists($encoded_fields->{$k}) && length($v)) { |
|
|
|
0
|
|
|
|
|
492
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($v); |
493
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
494
|
0
|
|
|
|
|
|
$self->log->error("read_next: decode failed, skipping data with ". |
495
|
|
|
|
|
|
|
"with length [".length($v)."]"); |
496
|
0
|
|
|
|
|
|
next; |
497
|
|
|
|
|
|
|
} |
498
|
0
|
|
|
|
|
|
my $gunzipped = $sc->gunzip($decoded); |
499
|
0
|
0
|
|
|
|
|
if (! defined($gunzipped)) { |
500
|
0
|
|
|
|
|
|
$self->log->error("read_next: gunzip failed, skipping ". |
501
|
|
|
|
|
|
|
"decoded data with length [".length($decoded)."]"); |
502
|
0
|
|
|
|
|
|
next; |
503
|
|
|
|
|
|
|
} |
504
|
0
|
|
|
|
|
|
$v = $$gunzipped; |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
# Decode the object format |
507
|
0
|
0
|
0
|
|
|
|
if ($object_fields && exists($object_fields->{$k}) && length($v) |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
508
|
|
|
|
|
|
|
&& ($v =~ $object_re || $v =~ $base64_re)) { |
509
|
0
|
|
|
|
|
|
my $decoded = $sb->decode($1); |
510
|
0
|
0
|
|
|
|
|
if (! defined($decoded)) { |
511
|
0
|
|
|
|
|
|
$self->log->error("read_next: decode object format failed, ". |
512
|
|
|
|
|
|
|
"skipping data with length [".length($v)."]"); |
513
|
0
|
|
|
|
|
|
next; |
514
|
|
|
|
|
|
|
} |
515
|
0
|
|
|
|
|
|
$v = eval($decoded); |
516
|
|
|
|
|
|
|
} |
517
|
0
|
|
|
|
|
|
$h->{$k} = $v; |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
# Or not. |
521
|
|
|
|
|
|
|
else { |
522
|
0
|
|
|
|
|
|
for (@$header) { |
523
|
0
|
|
|
|
|
|
$h->{$_} = $row->[$i++]; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
} |
526
|
0
|
|
|
|
|
|
$row = $h; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
0
|
0
|
|
|
|
|
if ($csv->eof) { |
530
|
0
|
|
|
|
|
|
$self->log->debug('read_next: eof reached'); |
531
|
0
|
|
|
|
|
|
$self->_fd(undef); |
532
|
0
|
|
|
|
|
|
$self->_csv(undef); |
533
|
0
|
|
|
|
|
|
return 0; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
0
|
|
|
|
|
|
return $row; |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
1; |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
__END__ |