line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
2
|
|
|
|
|
|
|
# ABSTRACT: helper module to automate the use of Text::CSV |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# vim:tw=100 |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# Text/AutoCSV.pm |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Written by Sébastien Millet |
10
|
|
|
|
|
|
|
# March, July, August, September 2016 |
11
|
|
|
|
|
|
|
# January, February 2017 |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
package Text::AutoCSV; |
15
|
|
|
|
|
|
|
$Text::AutoCSV::VERSION = '1.1.9'; |
16
|
|
|
|
|
|
|
my $PKG = "Text::AutoCSV"; |
17
|
|
|
|
|
|
|
|
18
|
20
|
|
|
20
|
|
879687
|
use strict; |
|
20
|
|
|
|
|
65
|
|
|
20
|
|
|
|
|
510
|
|
19
|
17
|
|
|
17
|
|
83
|
use warnings; |
|
17
|
|
|
|
|
33
|
|
|
17
|
|
|
|
|
904
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
require Exporter; |
22
|
|
|
|
|
|
|
our @ISA = 'Exporter'; |
23
|
|
|
|
|
|
|
our @EXPORT_OK = qw(remove_accents); |
24
|
|
|
|
|
|
|
|
25
|
17
|
|
|
17
|
|
91
|
use Carp; |
|
17
|
|
|
|
|
33
|
|
|
17
|
|
|
|
|
995
|
|
26
|
17
|
|
|
17
|
|
5429
|
use Params::Validate qw(validate validate_pos :types); |
|
17
|
|
|
|
|
112305
|
|
|
17
|
|
|
|
|
3238
|
|
27
|
17
|
|
|
17
|
|
5606
|
use List::MoreUtils qw(first_index indexes); |
|
17
|
|
|
|
|
113952
|
|
|
17
|
|
|
|
|
156
|
|
28
|
17
|
|
|
17
|
|
11361
|
use Fcntl qw(SEEK_SET); |
|
17
|
|
|
|
|
34
|
|
|
17
|
|
|
|
|
791
|
|
29
|
17
|
|
|
17
|
|
4861
|
use File::BOM; |
|
17
|
|
|
|
|
398401
|
|
|
17
|
|
|
|
|
868
|
|
30
|
17
|
|
|
17
|
|
7409
|
use Text::CSV; |
|
17
|
|
|
|
|
231961
|
|
|
17
|
|
|
|
|
780
|
|
31
|
17
|
|
|
17
|
|
9579
|
use DateTime; |
|
17
|
|
|
|
|
6663742
|
|
|
17
|
|
|
|
|
958
|
|
32
|
|
|
|
|
|
|
# DateTime::Format::Strptime 1.70 does not work properly with us. |
33
|
|
|
|
|
|
|
# Actually all version as of 1.63 are fine, except 1.70. |
34
|
17
|
|
|
17
|
|
8364
|
use DateTime::Format::Strptime 1.71; |
|
17
|
|
|
|
|
885728
|
|
|
17
|
|
|
|
|
125
|
|
35
|
17
|
|
|
17
|
|
7995
|
use Class::Struct; |
|
17
|
|
|
|
|
25020
|
|
|
17
|
|
|
|
|
102
|
|
36
|
17
|
|
|
17
|
|
7503
|
use Unicode::Normalize; |
|
17
|
|
|
|
|
26892
|
|
|
17
|
|
|
|
|
988
|
|
37
|
|
|
|
|
|
|
# lock_keys is used to prevent accessing non existing keys |
38
|
|
|
|
|
|
|
# Credits: 3381159 on http://stackoverflow.com |
39
|
|
|
|
|
|
|
# "make perl shout when trying to access undefined hash key" |
40
|
17
|
|
|
17
|
|
5396
|
use Hash::Util qw(lock_keys); |
|
17
|
|
|
|
|
33754
|
|
|
17
|
|
|
|
|
97
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# FIXME |
43
|
|
|
|
|
|
|
# Not needed in release -> should be always commented unless at dev time |
44
|
|
|
|
|
|
|
#use feature qw(say); |
45
|
|
|
|
|
|
|
#use Data::Dumper; |
46
|
|
|
|
|
|
|
#$Data::Dumper::Sortkeys = 1; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# Set to 1 if you wish to turn on debug without touching caller's code |
49
|
|
|
|
|
|
|
our $ALWAYS_DEBUG = 0; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Keep it set to 0 unless you know what you're doing! |
52
|
|
|
|
|
|
|
# Note |
53
|
|
|
|
|
|
|
# Taken into account only if debug is set. |
54
|
|
|
|
|
|
|
my $DEBUG_DATETIME_FORMATS = 0; |
55
|
|
|
|
|
|
|
# The below is taken into account only if $DEBUG_DATETIME_FORMATS is set. |
56
|
|
|
|
|
|
|
# It'll resqult in even more debug output. It becomes really MASSIVE debug output. |
57
|
|
|
|
|
|
|
my $DEBUG_DATETIME_FORMATS_EVEN_MORE = 0; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# |
60
|
|
|
|
|
|
|
# Uncomment to replace carp and croak with cluck and confess, respectively |
61
|
|
|
|
|
|
|
# Also reachable with perl option: |
62
|
|
|
|
|
|
|
# -MCarp=verbose |
63
|
|
|
|
|
|
|
# See 'perldoc Carp'. |
64
|
|
|
|
|
|
|
# |
65
|
|
|
|
|
|
|
#$Carp::Verbose = 1; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# * *************** * |
69
|
|
|
|
|
|
|
# * BEHAVIOR TUNING * |
70
|
|
|
|
|
|
|
# * *************** * |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# * **************************************************** * |
74
|
|
|
|
|
|
|
# * ALL THE VARIABLES BELOW ARE RATHER LOW LEVEL. * |
75
|
|
|
|
|
|
|
# * IF YOU UPDATE IT, IT WILL LIKELY BREAK THE TEST PLAN * |
76
|
|
|
|
|
|
|
# * **************************************************** * |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
my $DEF_SEARCH_CASE = 0; # Case insensitive search by default |
79
|
|
|
|
|
|
|
my $DEF_SEARCH_TRIM = 1; # Trim values by default |
80
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_ACCENTS = 1; # Ignore accents |
81
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_EMPTY = 1; # Ignore empty strings in searches by default |
82
|
|
|
|
|
|
|
my $DEF_SEARCH_VALUE_IF_NOT_FOUND = undef; # If not found, returned field value is undef |
83
|
|
|
|
|
|
|
my $DEF_SEARCH_VALUE_IF_AMBIGUOUS = undef; # If more than one record found by search (when a |
84
|
|
|
|
|
|
|
# unique value is expected), return undef |
85
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_AMBIGUOUS = 1; # By default, ignore the fact that multiple records are |
86
|
|
|
|
|
|
|
# found by search and return the first record found |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DETECT_ENCODING = 1; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my $DEFAULT_IN_ENCODING = 'UTF-8,latin1'; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# By default, input encoding detected is used for output. |
93
|
|
|
|
|
|
|
# -> the constant below is used if and only if: |
94
|
|
|
|
|
|
|
# Inbound encoding is unknown |
95
|
|
|
|
|
|
|
# No providing of out_encoding attribute (out_encoding takes precedence when provided) |
96
|
|
|
|
|
|
|
my $DEFAULT_OUT_ENCODING = 'UTF-8'; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
my $DEFAULT_ESCAPE_CHAR = '\\'; |
99
|
|
|
|
|
|
|
my $DEFAULT_QUOTE_CHAR = '"'; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# |
102
|
|
|
|
|
|
|
# The code that workarounds $FIX_PERLMONKS_823214 (see below) makes sense only under plain |
103
|
|
|
|
|
|
|
# Windows. |
104
|
|
|
|
|
|
|
# |
105
|
|
|
|
|
|
|
# "Plain" Windows? |
106
|
|
|
|
|
|
|
# This code MUST NOT be executed under cygwin because cygwin uses unix line breaks. This is |
107
|
|
|
|
|
|
|
# why we detect /mswin/. Would we detect /win/, we'd catch cygwin, too, and we don't want |
108
|
|
|
|
|
|
|
# that. |
109
|
|
|
|
|
|
|
# |
110
|
|
|
|
|
|
|
my $OS_IS_PLAIN_WINDOWS = !! ($^O =~ /mswin/i); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# |
113
|
|
|
|
|
|
|
# Shall we fix the issue reported as #823214 in PerlMonks? See |
114
|
|
|
|
|
|
|
# http://www.perlmonks.org/?node_id=823214 |
115
|
|
|
|
|
|
|
# |
116
|
|
|
|
|
|
|
# In brief (in case the link above would be broken one day): |
117
|
|
|
|
|
|
|
# Under Windows, output mode set to UTF-16LE produces line breaks made of octets "0d 0a 00", |
118
|
|
|
|
|
|
|
# whereas it should be "0d 00 0a 00". |
119
|
|
|
|
|
|
|
# |
120
|
|
|
|
|
|
|
# The code also fixes UTF-16BE (but it was not tested). |
121
|
|
|
|
|
|
|
# |
122
|
|
|
|
|
|
|
my $FIX_PERLMONKS_823214 = 1; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# * **** * |
126
|
|
|
|
|
|
|
# * CODE * |
127
|
|
|
|
|
|
|
# * **** * |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub ERR_UNKNOWN_FIELD() { 0 } |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# Store meta-data about each column |
133
|
|
|
|
|
|
|
struct ColData => { |
134
|
|
|
|
|
|
|
field_name => '$', |
135
|
|
|
|
|
|
|
header_text => '$', |
136
|
|
|
|
|
|
|
description => '$', |
137
|
|
|
|
|
|
|
dt_format => '$', |
138
|
|
|
|
|
|
|
dt_locale => '$', |
139
|
|
|
|
|
|
|
multiline => '$' |
140
|
|
|
|
|
|
|
}; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# |
143
|
|
|
|
|
|
|
# Enumeration of ef_type member below |
144
|
|
|
|
|
|
|
# Alternative: |
145
|
|
|
|
|
|
|
# use enum (...) |
146
|
|
|
|
|
|
|
# |
147
|
|
|
|
|
|
|
# But it is not also by default on my distro and installing a package for 3 constants, I find it |
148
|
|
|
|
|
|
|
# a bit overkill! |
149
|
|
|
|
|
|
|
# |
150
|
|
|
|
|
|
|
my ($EF_LINK, $EF_FUNC, $EF_COPY) = 0..2; |
151
|
|
|
|
|
|
|
struct ExtraField => { |
152
|
|
|
|
|
|
|
ef_type => '$', |
153
|
|
|
|
|
|
|
self_name => '$', |
154
|
|
|
|
|
|
|
description => '$', |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
check_field_existence => '$', |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# For when ef_type is set to $EF_LINK |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
link_self_search => '$', |
161
|
|
|
|
|
|
|
link_remote_obj => '$', |
162
|
|
|
|
|
|
|
link_remote_search => '$', |
163
|
|
|
|
|
|
|
link_remote_read => '$', |
164
|
|
|
|
|
|
|
link_vlookup_opts => '%', |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# For when ef_type is set to $EF_FUNC |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
func_sub => '$', |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# For when ef_type is set to $EF_COPY |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
copy_source => '$', |
173
|
|
|
|
|
|
|
copy_sub => '$' |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
}; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my $SEARCH_VALIDATE_OPTIONS = { |
178
|
|
|
|
|
|
|
value_if_not_found => {type => UNDEF | SCALAR, optional => 1}, |
179
|
|
|
|
|
|
|
value_if_found => {type => UNDEF | SCALAR, optional => 1}, |
180
|
|
|
|
|
|
|
value_if_ambiguous => {type => UNDEF | SCALAR, optional => 1}, |
181
|
|
|
|
|
|
|
ignore_ambiguous => {type => BOOLEAN, optional => 1}, |
182
|
|
|
|
|
|
|
case => {type => BOOLEAN, optional => 1}, |
183
|
|
|
|
|
|
|
trim => {type => BOOLEAN, optional => 1}, |
184
|
|
|
|
|
|
|
ignore_empty => {type => BOOLEAN, optional => 1}, |
185
|
|
|
|
|
|
|
ignore_accents => {type => BOOLEAN, optional => 1} |
186
|
|
|
|
|
|
|
}; |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub _is_utf8 { |
189
|
424
|
|
|
424
|
|
828
|
my $e = shift; |
190
|
|
|
|
|
|
|
|
191
|
424
|
100
|
|
|
|
3130
|
return 1 if $e =~ m/^(utf-?8|ucs-?8)/i; |
192
|
32
|
|
|
|
|
94
|
return 0; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# To replace // in old perls: return the first non-undef value in provided list |
196
|
|
|
|
|
|
|
sub _get_def { |
197
|
12079
|
|
|
12079
|
|
19554
|
for (@_) { |
198
|
18709
|
100
|
|
|
|
40127
|
return $_ if defined($_); |
199
|
|
|
|
|
|
|
} |
200
|
895
|
|
|
|
|
1451
|
return undef; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub _print { |
204
|
15
|
|
|
15
|
|
20
|
my $self = shift; |
205
|
15
|
|
|
|
|
21
|
my $t = shift; |
206
|
|
|
|
|
|
|
|
207
|
15
|
|
|
|
|
26
|
my $infoh = $self->{infoh}; |
208
|
15
|
50
|
|
|
|
27
|
return if ref $infoh ne 'GLOB'; |
209
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
0
|
print($infoh $t); |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _printf { |
214
|
17
|
|
|
17
|
|
21
|
my $self = shift; |
215
|
|
|
|
|
|
|
|
216
|
17
|
|
|
|
|
20
|
my $infoh = $self->{infoh}; |
217
|
17
|
50
|
|
|
|
53
|
return if ref $infoh ne 'GLOB'; |
218
|
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
0
|
printf($infoh @_); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub _print_warning { |
223
|
67
|
|
|
67
|
|
103
|
my $self = shift; |
224
|
67
|
|
|
|
|
93
|
my $warning_message = shift; |
225
|
67
|
|
|
|
|
93
|
my $dont_wrap = shift; |
226
|
|
|
|
|
|
|
|
227
|
67
|
100
|
|
|
|
161
|
my $msg = ($dont_wrap ? $warning_message : "$PKG: warning: $warning_message"); |
228
|
67
|
100
|
|
|
|
4313
|
carp $msg unless $self->{quiet}; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub _close_inh { |
232
|
316
|
|
|
316
|
|
467
|
my $self = shift; |
233
|
|
|
|
|
|
|
|
234
|
316
|
100
|
|
|
|
3175
|
close $self->{_inh} if $self->{_close_inh_when_finished}; |
235
|
316
|
|
|
|
|
1218
|
$self->{_inh} = undef; |
236
|
316
|
|
|
|
|
602
|
$self->{_close_inh_when_finished} = undef; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub _close_outh { |
240
|
127
|
|
|
127
|
|
197
|
my $self = shift; |
241
|
|
|
|
|
|
|
|
242
|
127
|
50
|
66
|
|
|
11912
|
close $self->{outh} if defined($self->{outh}) and $self->{_close_outh_when_finished}; |
243
|
127
|
|
|
|
|
413
|
$self->{outh} = undef; |
244
|
127
|
|
|
|
|
278
|
$self->{_close_outh_when_finished} = undef; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub _print_error { |
248
|
81
|
|
|
81
|
|
316
|
my ($self, $error_message, $dont_stop, $err_code, $err_extra) = @_; |
249
|
|
|
|
|
|
|
|
250
|
81
|
|
|
|
|
194
|
my $msg = "$PKG: error: $error_message"; |
251
|
|
|
|
|
|
|
|
252
|
81
|
100
|
100
|
|
|
267
|
if (defined($err_code) and !$self->{quiet} and $self->{croak_if_error}) { |
|
|
|
100
|
|
|
|
|
253
|
5
|
50
|
|
|
|
15
|
if ($err_code == ERR_UNKNOWN_FIELD) { |
254
|
5
|
|
|
|
|
10
|
my %f = %{$err_extra}; |
|
5
|
|
|
|
|
20
|
|
255
|
5
|
|
|
|
|
9
|
my @cols; |
256
|
5
|
|
|
|
|
18
|
for my $n (keys %f) { |
257
|
15
|
|
|
|
|
30
|
$cols[$f{$n}] = $n; |
258
|
|
|
|
|
|
|
} |
259
|
5
|
|
|
|
|
17
|
$self->_print($self->get_in_file_disp() . " column - field name correspondance:\n"); |
260
|
5
|
|
|
|
|
13
|
$self->_print("COL # FIELD\n"); |
261
|
5
|
|
|
|
|
13
|
$self->_print("----- -----\n"); |
262
|
5
|
|
|
|
|
15
|
for my $i (0..$#cols) { |
263
|
17
|
100
|
|
|
|
45
|
$self->_printf("%05d %s\n", $i, (defined($cols[$i]) ? $cols[$i] : '')); |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
} else { |
266
|
0
|
|
|
|
|
0
|
confess "Unknown error code: '$err_code'\n"; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
81
|
100
|
100
|
|
|
287
|
if ($self->{croak_if_error} and !$dont_stop) { |
271
|
30
|
|
|
|
|
92
|
$self->_close_read(1); |
272
|
30
|
|
|
|
|
84
|
$self->_close_inh(); |
273
|
30
|
|
|
|
|
72
|
$self->_close_outh(); |
274
|
30
|
|
|
|
|
91
|
$self->_status_reset(1); |
275
|
30
|
|
|
|
|
4526
|
croak $msg; |
276
|
|
|
|
|
|
|
} |
277
|
51
|
|
|
|
|
132
|
$self->_print_warning($msg, 1); |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# |
281
|
|
|
|
|
|
|
# Return the string passed in argument with all accents removed from characters. |
282
|
|
|
|
|
|
|
# Do it in a rather general and reliable way, not tied to latin1. |
283
|
|
|
|
|
|
|
# Tested on latin1 and latin2 character sets. |
284
|
|
|
|
|
|
|
# |
285
|
|
|
|
|
|
|
# Credits: |
286
|
|
|
|
|
|
|
# http://stackoverflow.com/questions/17561839/remove-accents-from-accented-characters |
287
|
|
|
|
|
|
|
# |
288
|
|
|
|
|
|
|
sub remove_accents { |
289
|
3163
|
|
|
3163
|
1
|
25231
|
validate_pos(@_, {type => SCALAR}); |
290
|
|
|
|
|
|
|
|
291
|
3163
|
|
|
|
|
7569
|
my $s = $_[0]; |
292
|
3163
|
|
|
|
|
11025
|
my $r = NFKD($s); |
293
|
3163
|
|
|
|
|
7004
|
$r =~ s/\p{Nonspacing_Mark}//g; |
294
|
3163
|
|
|
|
|
6659
|
return $r; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub _detect_csv_sep { |
298
|
262
|
|
|
262
|
|
435
|
my $ST_OUTSIDE = 0; |
299
|
262
|
|
|
|
|
435
|
my $ST_INSIDE = 1; |
300
|
|
|
|
|
|
|
|
301
|
262
|
|
|
|
|
605
|
my ($self, $escape_char, $quote_char, $sep) = @_; |
302
|
|
|
|
|
|
|
|
303
|
262
|
|
|
|
|
491
|
my $_debugh = $self->{_debugh}; |
304
|
262
|
|
|
|
|
425
|
my $inh = $self->{_inh}; |
305
|
262
|
|
|
|
|
413
|
my $_debug = $self->{_debug}; |
306
|
|
|
|
|
|
|
|
307
|
262
|
|
|
|
|
401
|
delete $self->{_inh_header}; |
308
|
|
|
|
|
|
|
|
309
|
262
|
100
|
|
|
|
596
|
$escape_char = $DEFAULT_ESCAPE_CHAR unless defined($escape_char); |
310
|
|
|
|
|
|
|
|
311
|
262
|
50
|
|
|
|
612
|
$self->_print_error("illegal \$escape_char: '$escape_char' (length >= 2)"), return 0 |
312
|
|
|
|
|
|
|
if length($escape_char) >= 2; |
313
|
|
|
|
|
|
|
|
314
|
262
|
50
|
|
|
|
528
|
$self->_print_error("$PKG: error: illegal \$quote_char '$quote_char' (length >= 2)"), return 0 |
315
|
|
|
|
|
|
|
if length($quote_char) >= 2; |
316
|
|
|
|
|
|
|
|
317
|
262
|
50
|
|
|
|
553
|
$escape_char = '--' if $escape_char eq ''; |
318
|
262
|
50
|
|
|
|
511
|
$quote_char = '--' if $quote_char eq ''; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# FIXME (?) |
321
|
|
|
|
|
|
|
# Avoid inlined magic values for separator auto-detection. |
322
|
|
|
|
|
|
|
# Issue is, as you can see below, the behavior is also hard-coded and not straightforward to |
323
|
|
|
|
|
|
|
# render 'tunable' ("," and ";" take precedence over "\t"). |
324
|
262
|
|
|
|
|
857
|
my %Seps = ( |
325
|
|
|
|
|
|
|
";" => 0, |
326
|
|
|
|
|
|
|
"," => 0, |
327
|
|
|
|
|
|
|
"\t" => 0 |
328
|
|
|
|
|
|
|
); |
329
|
|
|
|
|
|
|
|
330
|
262
|
|
|
|
|
2921
|
my $h = <$inh>; |
331
|
262
|
50
|
|
|
|
2566
|
if ($self->{inh_is_stdin}) { |
332
|
0
|
|
|
|
|
0
|
$self->{_inh_header} = $h; |
333
|
0
|
0
|
|
|
|
0
|
print($_debugh "Input is STDIN => saving header line to re-read it " . |
334
|
|
|
|
|
|
|
"later (in-memory)\n") if $_debug; |
335
|
|
|
|
|
|
|
} else { |
336
|
262
|
|
|
|
|
1974
|
seek $inh, 0, SEEK_SET; |
337
|
262
|
50
|
|
|
|
699
|
print($_debugh "Input is not STDIN => using seek function to rewind " . |
338
|
|
|
|
|
|
|
"read head after header line reading\n") if $_debug; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
262
|
|
|
|
|
581
|
chomp $h; |
342
|
262
|
|
|
|
|
439
|
my $status = $ST_OUTSIDE; |
343
|
262
|
|
|
|
|
587
|
my $l = length($h); |
344
|
262
|
|
|
|
|
392
|
my $c = 0; |
345
|
262
|
|
|
|
|
600
|
while ($c < $l) { |
346
|
4808
|
|
|
|
|
6085
|
my $ch = substr($h, $c, 1); |
347
|
4808
|
|
|
|
|
5203
|
my $chnext = ''; |
348
|
4808
|
100
|
|
|
|
8032
|
$chnext = substr($h, $c + 1, 1) if ($c < $l - 1); |
349
|
4808
|
100
|
|
|
|
7524
|
if ($status == $ST_INSIDE) { |
|
|
50
|
|
|
|
|
|
350
|
1521
|
50
|
66
|
|
|
2933
|
if ($ch eq $escape_char and $chnext eq $quote_char) { |
|
|
100
|
|
|
|
|
|
351
|
0
|
|
|
|
|
0
|
$c += 2; |
352
|
|
|
|
|
|
|
} elsif ($ch eq $quote_char) { |
353
|
197
|
|
|
|
|
231
|
$status = $ST_OUTSIDE; |
354
|
197
|
|
|
|
|
306
|
$c++; |
355
|
|
|
|
|
|
|
} else { |
356
|
1324
|
|
|
|
|
1982
|
$c++; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
} elsif ($status == $ST_OUTSIDE) { |
359
|
3287
|
50
|
33
|
|
|
7145
|
if ($ch eq $escape_char and ($chnext eq $quote_char or |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
360
|
|
|
|
|
|
|
exists $Seps{$chnext})) { |
361
|
0
|
|
|
|
|
0
|
$c += 2; |
362
|
|
|
|
|
|
|
} elsif (exists $Seps{$ch}) { |
363
|
946
|
|
|
|
|
1175
|
$Seps{$ch}++; |
364
|
946
|
|
|
|
|
1410
|
$c++; |
365
|
|
|
|
|
|
|
} elsif ($ch eq $quote_char) { |
366
|
197
|
|
|
|
|
226
|
$status = $ST_INSIDE; |
367
|
197
|
|
|
|
|
315
|
$c++; |
368
|
|
|
|
|
|
|
} else { |
369
|
2144
|
|
|
|
|
3386
|
$c++; |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
262
|
100
|
100
|
|
|
1342
|
if ($Seps{";"} == 0 and $Seps{","} >= 1) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
375
|
147
|
|
|
|
|
260
|
$$sep = ","; |
376
|
147
|
|
|
|
|
636
|
return 1; |
377
|
|
|
|
|
|
|
} elsif ($Seps{","} == 0 and $Seps{";"} >= 1) { |
378
|
106
|
|
|
|
|
183
|
$$sep = ";"; |
379
|
106
|
|
|
|
|
434
|
return 1; |
380
|
|
|
|
|
|
|
} elsif ($Seps{","} == 0 and $Seps{";"} == 0 and $Seps{"\t"} >= 1) { |
381
|
0
|
|
|
|
|
0
|
$$sep = "\t"; |
382
|
0
|
|
|
|
|
0
|
return 1; |
383
|
|
|
|
|
|
|
} else { |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
# Check the case where there is one unique column, in which case, |
386
|
|
|
|
|
|
|
# assume comma separator. |
387
|
9
|
|
|
|
|
19
|
my $h_no_accnt = remove_accents($h); |
388
|
9
|
100
|
|
|
|
56
|
if ($h_no_accnt =~ m/^[[:alnum:]_]+$/i) { |
389
|
3
|
|
|
|
|
8
|
$$sep = ","; |
390
|
3
|
|
|
|
|
15
|
return 1; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
6
|
|
|
|
|
10
|
$$sep = ""; |
394
|
6
|
50
|
|
|
|
13
|
if ($_debug) { |
395
|
0
|
|
|
|
|
0
|
for my $k (keys %Seps) { |
396
|
0
|
|
|
|
|
0
|
print($_debugh "\$Seps{'$k'} = $Seps{$k}\n"); |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
} |
399
|
6
|
|
|
|
|
23
|
return 0; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub _reopen_input { |
404
|
652
|
|
|
652
|
|
964
|
my $self = shift; |
405
|
|
|
|
|
|
|
|
406
|
652
|
|
|
|
|
967
|
my $in_file = $self->{in_file}; |
407
|
|
|
|
|
|
|
|
408
|
652
|
|
|
|
|
804
|
my $inh; |
409
|
652
|
50
|
|
|
|
13337
|
if (!open($inh, "<", $in_file)) { |
410
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$in_file': $!"); |
411
|
0
|
|
|
|
|
0
|
return undef; |
412
|
|
|
|
|
|
|
} |
413
|
652
|
50
|
|
|
|
1847
|
if (!$self->{_leave_encoding_alone}) { |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
confess "Oups! _inh_encoding_string undef?" |
416
|
652
|
50
|
|
|
|
1329
|
unless defined($self->{_inh_encoding_string}); |
417
|
|
|
|
|
|
|
|
418
|
652
|
|
|
|
|
4216
|
binmode $inh, $self->{_inh_encoding_string}; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
652
|
|
|
|
|
29166
|
return $inh; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
# Abstraction layer, not useful Today, could bring added value when looking into Text::CSV I/O |
425
|
|
|
|
|
|
|
sub _mygetline { |
426
|
14863
|
|
|
14863
|
|
26338
|
my ($csvobj, $fh) = @_; |
427
|
|
|
|
|
|
|
|
428
|
14863
|
|
|
|
|
300278
|
return $csvobj->getline($fh); |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub _detect_meta { |
432
|
456
|
|
|
456
|
|
955
|
my ($self, $quote_char, $sep_char) = @_; |
433
|
|
|
|
|
|
|
|
434
|
456
|
|
|
|
|
755
|
my $in_file = $self->{in_file}; |
435
|
456
|
|
|
|
|
688
|
my $_debug = $self->{_debug}; |
436
|
456
|
|
|
|
|
693
|
my $_debugh = $self->{_debugh}; |
437
|
|
|
|
|
|
|
|
438
|
456
|
100
|
|
|
|
896
|
return if $self->{_int_one_pass}; |
439
|
436
|
100
|
|
|
|
1018
|
return if $self->{_detect_meta_done}; # Sans jeu de mot... |
440
|
|
|
|
|
|
|
|
441
|
300
|
100
|
|
|
|
717
|
if (!defined($self->{escape_char})) { |
442
|
298
|
|
|
|
|
742
|
$self->_register_pass("detect escape character"); |
443
|
|
|
|
|
|
|
|
444
|
298
|
|
|
|
|
418
|
my $flag = 0; |
445
|
298
|
|
|
|
|
693
|
my $inh = $self->_reopen_input(); |
446
|
298
|
50
|
|
|
|
752
|
if (defined($inh)) { |
447
|
298
|
|
|
|
|
3892
|
while (my $l = <$inh>) { |
448
|
7131
|
|
|
|
|
16218
|
chomp $l; |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# Very heuristic criteria... |
451
|
|
|
|
|
|
|
# Tant pis. |
452
|
|
|
|
|
|
|
# $flag = 1 if $l =~ m/(?<!$sep_char)$quote_char$quote_char(?!$sep_char)/; |
453
|
|
|
|
|
|
|
# $flag = 1 if $l =~ m/(\\$quote_char|\\\\)/; |
454
|
7131
|
100
|
|
|
|
24067
|
$flag = 1 if $l =~ m/(\\$quote_char)/; |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
} |
457
|
298
|
|
|
|
|
2174
|
close $inh; |
458
|
|
|
|
|
|
|
} |
459
|
298
|
100
|
|
|
|
1193
|
$self->{escape_char} = ($flag ? '\\' : '"'); |
460
|
298
|
50
|
|
|
|
1002
|
print($_debugh " detected escape_char: '$self->{escape_char}'\n") if $_debug; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
300
|
50
|
|
|
|
634
|
print($_debugh " using escape_char: '$self->{escape_char}' " . |
464
|
|
|
|
|
|
|
"to further examine input (is_always_quoted, multiline)\n") if $_debug; |
465
|
|
|
|
|
|
|
|
466
|
300
|
|
|
|
|
444
|
my $is_always_quoted = 0; |
467
|
300
|
|
|
|
|
743
|
my $inh = $self->_reopen_input(); |
468
|
300
|
|
|
|
|
484
|
my @multiline; |
469
|
300
|
50
|
|
|
|
707
|
if (defined($inh)) { |
470
|
300
|
|
|
|
|
782
|
$self->_register_pass("detect is_always_quoted and multiline"); |
471
|
|
|
|
|
|
|
my $csv = Text::CSV->new({sep_char => $sep_char, |
472
|
|
|
|
|
|
|
allow_whitespace => 1, binary => 1, auto_diag => 0, |
473
|
|
|
|
|
|
|
quote_char => $quote_char, escape_char => $self->{escape_char}, |
474
|
300
|
|
|
|
|
3106
|
keep_meta_info => 1, |
475
|
|
|
|
|
|
|
allow_loose_escapes => 1}); |
476
|
300
|
|
|
|
|
56555
|
my $nb_rows = 0; |
477
|
300
|
|
|
|
|
490
|
$is_always_quoted = 1; |
478
|
300
|
|
|
|
|
675
|
while (my $ar = _mygetline($csv, $inh)) { |
479
|
7129
|
|
|
|
|
190964
|
$nb_rows++; |
480
|
|
|
|
|
|
|
|
481
|
7129
|
|
|
|
|
8760
|
my @a = @{$ar}; |
|
7129
|
|
|
|
|
15824
|
|
482
|
7129
|
|
|
|
|
10602
|
my $e = $#a; |
483
|
7129
|
|
|
|
|
12760
|
for my $i (0..$e) { |
484
|
44737
|
100
|
|
|
|
73856
|
$is_always_quoted = 0 unless $csv->is_quoted($i); |
485
|
44737
|
100
|
|
|
|
351542
|
$multiline[$i] = 1 if $a[$i] =~ m/\n/; |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
} |
488
|
300
|
|
|
|
|
9064
|
$self->{_nb_rows} = $nb_rows; |
489
|
300
|
|
|
|
|
3509
|
close $inh; |
490
|
|
|
|
|
|
|
} |
491
|
300
|
|
|
|
|
1150
|
$self->{_multiline} = [ @multiline ]; |
492
|
|
|
|
|
|
|
|
493
|
300
|
50
|
|
|
|
720
|
print($_debugh " is_always_quoted: $is_always_quoted\n") if $_debug; |
494
|
300
|
|
|
|
|
665
|
$self->{_is_always_quoted} = $is_always_quoted; |
495
|
|
|
|
|
|
|
|
496
|
300
|
|
|
|
|
1405
|
$self->{_detect_meta_done} = 1; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
sub _register_pass { |
500
|
1269
|
|
|
1269
|
|
2339
|
my ($self, $pass_name) = @_; |
501
|
1269
|
|
|
|
|
2074
|
my $_debug = $self->{_debug}; |
502
|
1269
|
|
|
|
|
1808
|
my $_debugh = $self->{_debugh}; |
503
|
|
|
|
|
|
|
|
504
|
1269
|
|
|
|
|
2043
|
$self->{_pass_count}++; |
505
|
|
|
|
|
|
|
|
506
|
1269
|
50
|
|
|
|
2809
|
return unless $_debug; |
507
|
|
|
|
|
|
|
|
508
|
0
|
|
|
|
|
0
|
print($_debugh "Pass #" . $self->{_pass_count} . " ($pass_name) done\n"); |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
sub _update_in_mem_record_count { |
512
|
510
|
|
|
510
|
|
1093
|
my ($self, $nonexistent_arg) = @_; |
513
|
510
|
|
|
|
|
905
|
my $_debug = $self->{_debug}; |
514
|
510
|
|
|
|
|
846
|
my $_debugh = $self->{_debugh}; |
515
|
|
|
|
|
|
|
|
516
|
510
|
50
|
|
|
|
1056
|
confess "Hey! what is this second argument?" if defined($nonexistent_arg); |
517
|
|
|
|
|
|
|
|
518
|
510
|
|
|
|
|
673
|
my $new_count = $#{$self->{_flat}} + 1; |
|
510
|
|
|
|
|
1086
|
|
519
|
|
|
|
|
|
|
|
520
|
510
|
|
|
|
|
861
|
my $updated_max = 0; |
521
|
510
|
100
|
|
|
|
1386
|
if ($new_count > $self->get_max_in_mem_record_count()) { |
522
|
155
|
|
|
|
|
486
|
$self->_set_max_in_mem_record_count($new_count); |
523
|
155
|
|
|
|
|
273
|
$updated_max = 1; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
510
|
|
|
|
|
1247
|
$self->{_in_mem_record_count} = $new_count; |
527
|
510
|
50
|
|
|
|
1238
|
if ($_debug) { |
528
|
0
|
|
|
|
|
0
|
print($_debugh "_in_mem_record_count updated, set to $new_count"); |
529
|
0
|
0
|
|
|
|
0
|
print($_debugh " (also updated max)") if $updated_max; |
530
|
0
|
|
|
|
|
0
|
print($_debugh "\n"); |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sub _detect_inh_encoding { |
535
|
320
|
|
|
320
|
|
1287
|
my ($self, $enc, $via, $in_file, $detect_enc) = @_; |
536
|
320
|
|
|
|
|
672
|
my $_debug = $self->{_debug}; |
537
|
320
|
|
|
|
|
508
|
my $_debugh = $self->{_debugh}; |
538
|
|
|
|
|
|
|
|
539
|
320
|
100
|
66
|
|
|
1101
|
$enc = $DEFAULT_IN_ENCODING if !defined($enc) or $enc eq ''; |
540
|
|
|
|
|
|
|
|
541
|
320
|
|
|
|
|
2061
|
my @encodings = split(/\s*,\s*/, $enc); |
542
|
|
|
|
|
|
|
|
543
|
320
|
50
|
|
|
|
932
|
confess "Oups! No encoding to try?" if $#encodings < 0; |
544
|
|
|
|
|
|
|
|
545
|
320
|
50
|
|
|
|
728
|
print($_debugh "[ST] _detect_inh_encoding(): start\n") if $_debug; |
546
|
|
|
|
|
|
|
|
547
|
320
|
|
|
|
|
497
|
my $wrn = 0; |
548
|
320
|
|
|
|
|
777
|
my $m; |
549
|
|
|
|
|
|
|
my $m0; |
550
|
320
|
|
|
|
|
0
|
my $ee; |
551
|
320
|
|
|
|
|
684
|
for my $e (@encodings) { |
552
|
332
|
|
|
|
|
514
|
$ee = $e; |
553
|
332
|
|
|
|
|
692
|
my $viadef = _get_def($via, ''); |
554
|
332
|
|
|
|
|
985
|
$m = ":encoding($e)$viadef"; |
555
|
332
|
100
|
|
|
|
801
|
$m0 = $m unless defined($m0); |
556
|
|
|
|
|
|
|
|
557
|
332
|
100
|
|
|
|
676
|
last unless $detect_enc; |
558
|
|
|
|
|
|
|
|
559
|
320
|
50
|
33
|
|
|
1342
|
confess "Oups! in_file not defined?" if !defined($in_file) or $in_file eq ''; |
560
|
|
|
|
|
|
|
|
561
|
320
|
50
|
|
|
|
641
|
print($_debugh " Checking encoding '$e' / '$m'\n") if $_debug; |
562
|
320
|
|
|
|
|
465
|
$wrn = 0; |
563
|
|
|
|
|
|
|
|
564
|
320
|
|
|
|
|
1138
|
$self->_register_pass("check $e encoding"); |
565
|
|
|
|
|
|
|
|
566
|
320
|
|
|
|
|
522
|
my $utf8_bom = 0; |
567
|
320
|
100
|
|
|
|
733
|
if (_is_utf8($e)) { |
568
|
296
|
50
|
|
|
|
6325
|
if (open my $fh, '<:raw', $in_file) { |
569
|
296
|
|
|
|
|
622
|
my $bom; |
570
|
296
|
|
|
|
|
4840
|
read $fh, $bom, 3; |
571
|
296
|
100
|
66
|
|
|
1554
|
if (length($bom) == 3 and $bom eq "\xef\xbb\xbf") { |
572
|
12
|
100
|
|
|
|
41
|
if (!defined($via)) { |
573
|
10
|
|
|
|
|
34
|
$m .= ":via(File::BOM)"; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
} |
576
|
296
|
|
|
|
|
2429
|
close $fh; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
320
|
|
|
|
|
641
|
my $inh; |
581
|
320
|
50
|
|
|
|
4998
|
if (!open($inh, "<", $in_file)) { |
582
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$in_file': $!"); |
583
|
0
|
|
|
|
|
0
|
return ($encodings[0], $m0); |
584
|
|
|
|
|
|
|
} |
585
|
16
|
|
|
16
|
|
103
|
binmode $inh, $m; |
|
16
|
|
|
|
|
26
|
|
|
16
|
|
|
|
|
98
|
|
|
320
|
|
|
|
|
3529
|
|
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# TURN OFF WARNINGS OUTPUT |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
{ |
590
|
320
|
|
|
|
|
34466
|
local $SIG{__WARN__} = sub { |
591
|
51
|
|
|
51
|
|
892
|
$wrn++; |
592
|
|
|
|
|
|
|
# Uncomment only for debug! |
593
|
|
|
|
|
|
|
# Otherwise you'll get quite a good deal of output at each execution :-) |
594
|
|
|
|
|
|
|
# print(STDERR @_); |
595
|
320
|
|
|
|
|
2319
|
}; |
596
|
320
|
|
|
|
|
4433
|
while (<$inh>) { } |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# WARNINGS ARE BACK ON |
600
|
|
|
|
|
|
|
|
601
|
320
|
|
|
|
|
15184
|
close $inh; |
602
|
320
|
50
|
|
|
|
973
|
print($_debugh " '$m' counts $wrn warning(s)\n") if $_debug; |
603
|
|
|
|
|
|
|
|
604
|
320
|
100
|
|
|
|
1326
|
last if $wrn == 0; |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
|
607
|
320
|
50
|
|
|
|
793
|
if ($wrn >= 1) { |
608
|
0
|
|
|
|
|
0
|
$self->_print_warning("encoding warnings encountered during initial check, " . |
609
|
|
|
|
|
|
|
"using '$encodings[0]'"); |
610
|
0
|
|
|
|
|
0
|
return ($encodings[0], $m0); |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
|
613
|
320
|
50
|
|
|
|
670
|
confess "Oups! undef encoding string?" unless defined($m); |
614
|
|
|
|
|
|
|
|
615
|
320
|
50
|
|
|
|
656
|
print($_debugh " Detected encoding string '$ee' / '$m'\n") if $_debug; |
616
|
320
|
|
|
|
|
1325
|
return ($ee, $m); |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
# |
620
|
|
|
|
|
|
|
# Each of these functions brings status to the next value (current status + 1). |
621
|
|
|
|
|
|
|
# Each of these functions returns 0 if an error occured, 1 if all good |
622
|
|
|
|
|
|
|
# |
623
|
|
|
|
|
|
|
my @status_forward_functions = ( |
624
|
|
|
|
|
|
|
"_S1_init_input", # To go from S0 to S1 |
625
|
|
|
|
|
|
|
"_S2_init_fields_from_header", # To go form S1 to S2 |
626
|
|
|
|
|
|
|
"_S3_init_fields_extra", # To go from S2 to S3 |
627
|
|
|
|
|
|
|
"_S4_read_all_in_mem", # To go from S3 to S4 |
628
|
|
|
|
|
|
|
); |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
sub _status_reset { |
631
|
494
|
|
|
494
|
|
770
|
my $self = shift; |
632
|
|
|
|
|
|
|
|
633
|
494
|
|
|
|
|
3224
|
validate_pos(@_, {type => SCALAR, optional => 1}); |
634
|
494
|
|
|
|
|
1706
|
my $called_from_print_error = _get_def($_[0], 0); |
635
|
|
|
|
|
|
|
|
636
|
494
|
100
|
100
|
|
|
1742
|
if (defined($self->{_status}) and $self->{_status} == 4) { |
637
|
18
|
100
|
|
|
|
46
|
unless ($called_from_print_error) { |
638
|
16
|
|
|
|
|
27
|
my $msg = "in-memory CSV content discarded, will have to re-read input"; |
639
|
16
|
|
|
|
|
59
|
$self->_print_warning($msg); |
640
|
|
|
|
|
|
|
} |
641
|
18
|
|
|
|
|
159
|
$self->{_flat} = [ ]; |
642
|
18
|
|
|
|
|
61
|
$self->_update_in_mem_record_count(); |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
|
645
|
494
|
|
|
|
|
993
|
$self->{_status} = 0; |
646
|
494
|
100
|
|
|
|
961
|
return 0 if $called_from_print_error; |
647
|
464
|
|
|
|
|
1069
|
return $self->_status_forward('S1'); |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub _status_forward { |
651
|
4084
|
|
|
4084
|
|
5417
|
my $self = shift; |
652
|
|
|
|
|
|
|
|
653
|
4084
|
|
|
|
|
7464
|
return $self->___status_move(@_, 1); |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
sub _status_backward { |
657
|
139
|
|
|
139
|
|
195
|
my $self = shift; |
658
|
|
|
|
|
|
|
|
659
|
139
|
|
|
|
|
310
|
return $self->___status_move(@_, -1); |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
# You should not call ___status_move() in the code, that is why the name is prefixed with 3 |
663
|
|
|
|
|
|
|
# underscores! Only _status_forward and _status_backward should call it. |
664
|
|
|
|
|
|
|
sub ___status_move { |
665
|
4223
|
|
|
4223
|
|
7145
|
my ($self, $target, $step) = @_; |
666
|
|
|
|
|
|
|
|
667
|
4223
|
|
|
|
|
6270
|
my $_debug = $self->{_debug}; |
668
|
4223
|
|
|
|
|
5514
|
my $_debugh = $self->{_debugh}; |
669
|
|
|
|
|
|
|
|
670
|
4223
|
50
|
66
|
|
|
17189
|
if (!defined($step) or ($step != -1 and $step != 1)) { |
|
|
|
33
|
|
|
|
|
671
|
0
|
|
|
|
|
0
|
confess "Oups! \$step has a wrong value: '$step'"; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
4223
|
|
|
|
|
5201
|
my $n; |
675
|
4223
|
50
|
|
|
|
17241
|
confess "Oups! illegal status string: '$target'" unless ($n) = $target =~ m/^S(\d)$/; |
676
|
|
|
|
|
|
|
|
677
|
4223
|
100
|
|
|
|
8602
|
if ($self->{_read_in_progress}) { |
678
|
1
|
|
|
|
|
4
|
$self->_print_error("illegal call while read is in progress, " . |
679
|
|
|
|
|
|
|
"would lead to infinite recursion", 0); |
680
|
0
|
|
|
|
|
0
|
confess "Aborted."; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
4222
|
100
|
|
|
|
7047
|
if ($step == -1) { |
684
|
139
|
100
|
|
|
|
313
|
if ($n < $self->{_status}) { |
685
|
19
|
100
|
|
|
|
49
|
if ($self->{_status} == 4) { |
686
|
16
|
50
|
|
|
|
40
|
print($_debugh "[ST] Requested status $n but will go to status 0\n") if $_debug; |
687
|
16
|
|
|
|
|
47
|
return $self->_status_reset(); |
688
|
|
|
|
|
|
|
} |
689
|
3
|
|
|
|
|
5
|
$self->{_status} = $n ; |
690
|
3
|
50
|
|
|
|
7
|
print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug; |
691
|
|
|
|
|
|
|
} |
692
|
123
|
|
|
|
|
336
|
return 1; |
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
|
695
|
4083
|
100
|
|
|
|
8334
|
if ($self->{_status} < $n) { |
696
|
869
|
50
|
|
|
|
1584
|
print($_debugh "[ST] Current status: ". $self->{_status} . "\n") if $_debug; |
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
|
699
|
4083
|
100
|
100
|
|
|
10040
|
if ($self->{_status} <= 1 and $n >= 2 and $self->{_int_one_pass} and |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
700
|
|
|
|
|
|
|
$self->get_pass_count() >= 1) { |
701
|
12
|
|
|
|
|
23
|
my $msg = "one_pass set, unable to read input again"; |
702
|
12
|
50
|
|
|
|
46
|
$self->_print_error($msg), return 0 if $self->{one_pass}; |
703
|
0
|
0
|
|
|
|
0
|
$self->_print_warning($msg) if !$self->{one_pass}; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
4071
|
|
|
|
|
7551
|
while ($self->{_status} < $n) { |
707
|
|
|
|
|
|
|
|
708
|
1347
|
|
|
|
|
2548
|
my $funcname = $status_forward_functions[$self->{_status}]; |
709
|
1347
|
50
|
|
|
|
2661
|
confess "Oups! Unknown status?" unless defined($funcname); |
710
|
|
|
|
|
|
|
|
711
|
1347
|
50
|
|
|
|
2236
|
print($_debugh "[ST] Now executing $funcname\n") if $_debug; |
712
|
|
|
|
|
|
|
|
713
|
1347
|
50
|
|
|
|
5495
|
if (my $member_function = $self->can($funcname)) { |
714
|
1347
|
100
|
|
|
|
3075
|
return 0 unless $self->$member_function(); |
715
|
|
|
|
|
|
|
} else { |
716
|
0
|
|
|
|
|
0
|
confess "Could not find method $funcname in $PKG!"; |
717
|
|
|
|
|
|
|
} |
718
|
|
|
|
|
|
|
|
719
|
1328
|
|
|
|
|
2485
|
$self->{_status} += $step; |
720
|
1328
|
50
|
|
|
|
4024
|
print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug; |
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
|
723
|
4052
|
|
|
|
|
9545
|
return 1; |
724
|
|
|
|
|
|
|
} |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
sub new { |
727
|
333
|
|
|
333
|
1
|
255769
|
my ($class, @args) = @_; |
728
|
|
|
|
|
|
|
|
729
|
333
|
|
|
|
|
39585
|
@args = validate(@args, |
730
|
|
|
|
|
|
|
{ in_file => {type => SCALAR, optional => 1}, |
731
|
|
|
|
|
|
|
infoh => {type => UNDEF | GLOBREF, default => \*STDERR, optional => 1}, |
732
|
|
|
|
|
|
|
verbose => {type => BOOLEAN, default => 0, optional => 1}, |
733
|
|
|
|
|
|
|
quiet => {type => BOOLEAN, optional => 1}, |
734
|
|
|
|
|
|
|
croak_if_error => {type => BOOLEAN, default => 1, optional => 1}, |
735
|
|
|
|
|
|
|
inh => {type => GLOBREF, optional => 1}, |
736
|
|
|
|
|
|
|
in_csvobj => {type => OBJECT, optional => 1}, |
737
|
|
|
|
|
|
|
sep_char => {type => SCALAR, optional => 1}, |
738
|
|
|
|
|
|
|
quote_char => {type => SCALAR, optional => 1}, |
739
|
|
|
|
|
|
|
escape_char => {type => SCALAR, optional => 1}, |
740
|
|
|
|
|
|
|
has_headers => {type => BOOLEAN, default => 1, optional => 1}, |
741
|
|
|
|
|
|
|
out_has_headers => {type => UNDEF | BOOLEAN, default => undef, optional => 1}, |
742
|
|
|
|
|
|
|
fields_ar => {type => ARRAYREF, optional => 1}, |
743
|
|
|
|
|
|
|
fields_hr => {type => HASHREF, optional => 1}, |
744
|
|
|
|
|
|
|
fields_column_names => {type => ARRAYREF, optional => 1}, |
745
|
|
|
|
|
|
|
search_case => {type => SCALAR, optional => 1}, |
746
|
|
|
|
|
|
|
search_trim => {type => SCALAR, optional => 1}, |
747
|
|
|
|
|
|
|
search_ignore_empty => {type => SCALAR, optional => 1}, |
748
|
|
|
|
|
|
|
search_ignore_accents => {type => SCALAR, optional => 1}, |
749
|
|
|
|
|
|
|
search_ignore_ambiguous => {type => SCALAR, optional => 1}, |
750
|
|
|
|
|
|
|
search_value_if_not_found => {type => SCALAR, optional => 1}, |
751
|
|
|
|
|
|
|
search_value_if_found => {type => SCALAR, optional => 1}, |
752
|
|
|
|
|
|
|
search_value_if_ambiguous => {type => SCALAR, optional => 1}, |
753
|
|
|
|
|
|
|
walker_hr => {type => CODEREF, optional => 1}, |
754
|
|
|
|
|
|
|
walker_ar => {type => CODEREF, optional => 1}, |
755
|
|
|
|
|
|
|
read_post_update_hr => {type => CODEREF, optional => 1}, |
756
|
|
|
|
|
|
|
write_filter_hr => {type => CODEREF, optional => 1}, |
757
|
|
|
|
|
|
|
out_filter => {type => CODEREF, optional => 1}, |
758
|
|
|
|
|
|
|
write_fields => {type => ARRAYREF, optional => 1}, |
759
|
|
|
|
|
|
|
out_orderby => {type => ARRAYREF, optional => 1}, |
760
|
|
|
|
|
|
|
out_fields => {type => ARRAYREF, optional => 1}, |
761
|
|
|
|
|
|
|
out_file => {type => SCALAR, optional => 1}, |
762
|
|
|
|
|
|
|
out_always_quote => {type => BOOLEAN, optional => 1}, |
763
|
|
|
|
|
|
|
out_sep_char => {type => SCALAR, optional => 1}, |
764
|
|
|
|
|
|
|
out_quote_char => {type => SCALAR, optional => 1}, |
765
|
|
|
|
|
|
|
out_escape_char => {type => SCALAR, optional => 1}, |
766
|
|
|
|
|
|
|
out_dates_format => {type => SCALAR, optional => 1}, |
767
|
|
|
|
|
|
|
out_dates_locale => {type => SCALAR, optional => 1}, |
768
|
|
|
|
|
|
|
encoding => {type => SCALAR, optional => 1}, |
769
|
|
|
|
|
|
|
via => {type => SCALAR, optional => 1}, |
770
|
|
|
|
|
|
|
out_encoding => {type => SCALAR, optional => 1}, |
771
|
|
|
|
|
|
|
dont_mess_with_encoding => {type => BOOLEAN, optional => 1}, |
772
|
|
|
|
|
|
|
one_pass => {type => BOOLEAN, optional => 1}, |
773
|
|
|
|
|
|
|
no_undef => {type => BOOLEAN, optional => 1}, |
774
|
|
|
|
|
|
|
fields_dates => {type => ARRAYREF, optional => 1}, |
775
|
|
|
|
|
|
|
fields_dates_auto => {type => BOOLEAN, optional => 1}, |
776
|
|
|
|
|
|
|
fields_dates_auto_optimize => {type => BOOLEAN, optional => 1}, |
777
|
|
|
|
|
|
|
dates_formats_to_try => {type => ARRAYREF, optional => 1}, |
778
|
|
|
|
|
|
|
dates_formats_to_try_supp => {type => ARRAYREF, optional => 1}, |
779
|
|
|
|
|
|
|
dates_ignore_trailing_chars => {type => BOOLEAN, optional => 1}, |
780
|
|
|
|
|
|
|
dates_search_time => {type => BOOLEAN, optional => 1}, |
781
|
|
|
|
|
|
|
dates_locales => {type => SCALAR, optional => 1}, |
782
|
|
|
|
|
|
|
out_utf8_bom => {type => SCALAR, optional => 1}, |
783
|
|
|
|
|
|
|
dates_zeros_ok => {type => SCALAR, default => 1, optional => 1}, |
784
|
|
|
|
|
|
|
_debug => {type => BOOLEAN, default => 0, optional => 1}, |
785
|
|
|
|
|
|
|
_debug_read => {type => BOOLEAN, default => 0, optional => 1}, |
786
|
|
|
|
|
|
|
_debug_extra_fields => {type => BOOLEAN, optional => 1}, |
787
|
|
|
|
|
|
|
_debugh => {type => UNDEF | GLOBREF, optional => 1} |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
); |
790
|
|
|
|
|
|
|
|
791
|
329
|
|
|
|
|
7092
|
my $self = { @args }; |
792
|
|
|
|
|
|
|
|
793
|
329
|
|
|
|
|
686
|
my @fields = keys %{$self}; |
|
329
|
|
|
|
|
1229
|
|
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# croak_if_error |
796
|
|
|
|
|
|
|
|
797
|
329
|
|
|
|
|
815
|
my $croak_if_error = $self->{croak_if_error}; |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
# verbose and _debug management |
800
|
|
|
|
|
|
|
|
801
|
329
|
50
|
|
|
|
1220
|
$self->{_debugh} = $self->{infoh} if !defined($self->{_debugh}); |
802
|
329
|
50
|
|
|
|
879
|
$self->{_debug} = 1 if $ALWAYS_DEBUG; |
803
|
329
|
|
|
|
|
624
|
my $_debug = $self->{_debug}; |
804
|
329
|
50
|
|
|
|
714
|
$self->{verbose} = 1 if $_debug; |
805
|
329
|
|
|
|
|
541
|
my $verbose = $self->{verbose}; |
806
|
|
|
|
|
|
|
|
807
|
329
|
|
|
|
|
502
|
my $_debugh = $self->{_debugh}; |
808
|
|
|
|
|
|
|
|
809
|
329
|
|
|
|
|
618
|
bless $self, $class; |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
# fields_ar, fields_hr |
812
|
|
|
|
|
|
|
|
813
|
329
|
100
|
|
|
|
1086
|
if (defined($self->{fields_ar}) + |
814
|
|
|
|
|
|
|
defined($self->{fields_hr}) + |
815
|
|
|
|
|
|
|
defined($self->{fields_column_names}) |
816
|
|
|
|
|
|
|
>= 2) { |
817
|
1
|
|
|
|
|
7
|
$self->_print_error("mixed use of fields_ar, fields_hr and fields_column_names. " . |
818
|
|
|
|
|
|
|
"Use one at a time."); |
819
|
|
|
|
|
|
|
} |
820
|
329
|
100
|
100
|
|
|
1050
|
if (defined($self->{fields_ar}) and !defined($self->{fields_hr})) { |
821
|
2
|
|
|
|
|
4
|
my @f = @{$self->{fields_ar}}; |
|
2
|
|
|
|
|
6
|
|
822
|
2
|
|
|
|
|
3
|
my %h; |
823
|
2
|
|
|
|
|
6
|
for my $e (@f) { |
824
|
6
|
|
|
|
|
15
|
$h{$e} = "^$e\$"; |
825
|
|
|
|
|
|
|
} |
826
|
2
|
|
|
|
|
6
|
$self->{fields_hr} = \%h; |
827
|
|
|
|
|
|
|
} |
828
|
329
|
100
|
|
|
|
749
|
if (!$self->{has_headers}) { |
829
|
13
|
100
|
|
|
|
33
|
if (defined($self->{fields_ar})) { |
830
|
1
|
|
|
|
|
5
|
$self->_print_error("fields_ar irrelevant if CSV file has no headers"); |
831
|
1
|
|
|
|
|
175
|
return undef; |
832
|
|
|
|
|
|
|
} |
833
|
12
|
100
|
|
|
|
29
|
if (defined($self->{fields_hr})) { |
834
|
1
|
|
|
|
|
4
|
$self->_print_error("fields_hr irrelevant if CSV file has no headers"); |
835
|
1
|
|
|
|
|
5
|
return undef; |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
# in_file or inh |
840
|
|
|
|
|
|
|
|
841
|
327
|
|
|
|
|
733
|
$self->{_flat} = [ ]; |
842
|
|
|
|
|
|
|
|
843
|
327
|
|
|
|
|
686
|
$self->{_read_update_after_hr} = { }; |
844
|
327
|
|
|
|
|
652
|
$self->{_write_update_before_hr} = { }; |
845
|
|
|
|
|
|
|
|
846
|
327
|
|
|
|
|
1074
|
$self->_update_in_mem_record_count(); |
847
|
|
|
|
|
|
|
|
848
|
327
|
100
|
|
|
|
882
|
return undef unless $self->_status_reset(); |
849
|
|
|
|
|
|
|
|
850
|
318
|
50
|
|
|
|
672
|
$self->_debug_show_members() if $_debug; |
851
|
|
|
|
|
|
|
|
852
|
318
|
100
|
|
|
|
728
|
if ($self->{dates_zeros_ok}) { |
853
|
|
|
|
|
|
|
$self->{_refsub_is_datetime_empty} = sub { |
854
|
9637
|
|
|
9637
|
|
16324
|
my $v = $_[0]; |
855
|
9637
|
100
|
|
|
|
29969
|
if ($v !~ m/[1-9]/) { |
856
|
4207
|
100
|
|
|
|
10364
|
return 1 if $v =~ m/^[^0:]*0+[^0:]+0+[^0:]+0+/; |
857
|
|
|
|
|
|
|
} |
858
|
9634
|
|
|
|
|
30100
|
return 0; |
859
|
|
|
|
|
|
|
} |
860
|
316
|
|
|
|
|
1466
|
} |
861
|
|
|
|
|
|
|
|
862
|
318
|
|
|
|
|
5336
|
return $self; |
863
|
|
|
|
|
|
|
}; |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
# |
866
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
867
|
|
|
|
|
|
|
# |
868
|
|
|
|
|
|
|
# Do all low level activities associated to input: |
869
|
|
|
|
|
|
|
# I/O init |
870
|
|
|
|
|
|
|
# Detect encoding |
871
|
|
|
|
|
|
|
# Detect CSV separator |
872
|
|
|
|
|
|
|
# Detect escape character |
873
|
|
|
|
|
|
|
# |
874
|
|
|
|
|
|
|
sub _S1_init_input { |
875
|
470
|
|
|
470
|
|
726
|
my $self = shift; |
876
|
|
|
|
|
|
|
|
877
|
470
|
|
|
|
|
786
|
my $croak_if_error = $self->{croak_if_error}; |
878
|
470
|
|
|
|
|
751
|
my $_debug = $self->{_debug}; |
879
|
470
|
|
|
|
|
675
|
my $_debugh = $self->{_debugh}; |
880
|
|
|
|
|
|
|
|
881
|
470
|
100
|
|
|
|
952
|
$self->{in_file} = '' unless defined($self->{in_file}); |
882
|
470
|
|
|
|
|
946
|
$self->{_close_inh_when_finished} = 0; |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
$self->{_leave_encoding_alone} = $self->{dont_mess_with_encoding} |
885
|
470
|
50
|
|
|
|
1027
|
if defined($self->{dont_mess_with_encoding}); |
886
|
|
|
|
|
|
|
|
887
|
470
|
|
|
|
|
1341
|
$self->{_int_one_pass} = _get_def($self->{one_pass}, 0); |
888
|
470
|
|
|
|
|
878
|
my $in_file_disp; |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
# |
892
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 1 OF 4 |
893
|
|
|
|
|
|
|
# |
894
|
|
|
|
|
|
|
# Manage I/O (= in most cases, open input file...) |
895
|
|
|
|
|
|
|
# |
896
|
|
|
|
|
|
|
|
897
|
470
|
100
|
|
|
|
993
|
if (defined($self->{inh})) { |
898
|
4
|
50
|
|
|
|
13
|
$self->{_leave_encoding_alone} = 1 unless defined($self->{dont_mess_with_encoding}); |
899
|
4
|
|
|
|
|
10
|
$in_file_disp = _get_def($self->{in_file}, '<?>'); |
900
|
4
|
50
|
|
|
|
12
|
$self->{_int_one_pass} = 1 unless defined($self->{one_pass}); |
901
|
4
|
|
|
|
|
8
|
$self->{_inh} = $self->{inh}; |
902
|
|
|
|
|
|
|
} else { |
903
|
466
|
50
|
|
|
|
1201
|
$self->{_leave_encoding_alone} = 0 unless defined($self->{dont_mess_with_encoding}); |
904
|
466
|
|
|
|
|
803
|
my $in_file = $self->{in_file}; |
905
|
466
|
|
|
|
|
670
|
my $inh; |
906
|
466
|
50
|
|
|
|
972
|
if ($in_file eq '') { |
907
|
0
|
|
|
|
|
0
|
$inh = \*STDIN; |
908
|
0
|
|
|
|
|
0
|
$self->{inh_is_stdin} = 1; |
909
|
0
|
0
|
|
|
|
0
|
$self->{_int_one_pass} = 1 unless defined($self->{one_pass}); |
910
|
0
|
|
|
|
|
0
|
$in_file_disp = '<stdin>'; |
911
|
|
|
|
|
|
|
} else { |
912
|
466
|
100
|
|
|
|
17077
|
if (!open($inh, '<', $in_file)) { |
913
|
3
|
|
|
|
|
52
|
$self->_print_error("unable to open file '$in_file': $!"); |
914
|
3
|
|
|
|
|
77
|
return 0; |
915
|
|
|
|
|
|
|
} |
916
|
463
|
|
|
|
|
1180
|
$in_file_disp = $in_file; |
917
|
463
|
|
|
|
|
1080
|
$self->{_close_inh_when_finished} = 1; |
918
|
|
|
|
|
|
|
} |
919
|
463
|
|
|
|
|
1005
|
$self->{_inh} = $inh; |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
|
922
|
467
|
50
|
|
|
|
1021
|
confess "Oups! in_file_disp not defined?" unless defined($in_file_disp); |
923
|
467
|
|
|
|
|
964
|
$self->{_in_file_disp} = $in_file_disp; |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
# |
927
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 2 OF 4 |
928
|
|
|
|
|
|
|
# |
929
|
|
|
|
|
|
|
# "Detection" of encoding |
930
|
|
|
|
|
|
|
# |
931
|
|
|
|
|
|
|
# WARNING |
932
|
|
|
|
|
|
|
# As explained in the manual, it is a very partial and limited detection... |
933
|
|
|
|
|
|
|
# |
934
|
|
|
|
|
|
|
|
935
|
467
|
100
|
|
|
|
1012
|
unless ($self->{_leave_encoding_alone}) { |
936
|
463
|
100
|
|
|
|
1032
|
unless ($self->{_init_input_already_called}) { |
937
|
|
|
|
|
|
|
my ($e, $m) = $self->_detect_inh_encoding($self->{encoding}, $self->{via}, |
938
|
320
|
100
|
|
|
|
1815
|
$self->{in_file}, ($self->{_int_one_pass} ? 0 : $DETECT_ENCODING)); |
939
|
320
|
|
|
|
|
1083
|
$self->{_inh_encoding} = $e; |
940
|
320
|
|
|
|
|
706
|
$self->{_inh_encoding_string} = $m; |
941
|
|
|
|
|
|
|
} |
942
|
|
|
|
|
|
|
|
943
|
463
|
|
|
|
|
3422
|
binmode $self->{_inh}, $self->{_inh_encoding_string}; |
944
|
|
|
|
|
|
|
print($_debugh "Input encoding: '" . $self->{_inh_encoding} . "' / '" . |
945
|
463
|
50
|
|
|
|
21442
|
$self->{_inh_encoding_string} . "'\n") if $_debug; |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
} |
948
|
|
|
|
|
|
|
|
949
|
467
|
100
|
|
|
|
1313
|
$self->{out_file} = '' unless defined($self->{out_file}); |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
# |
953
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 3 OF 4 |
954
|
|
|
|
|
|
|
# |
955
|
|
|
|
|
|
|
# Detection of CSV separator and escape character |
956
|
|
|
|
|
|
|
# |
957
|
|
|
|
|
|
|
|
958
|
467
|
|
|
|
|
639
|
my $sep_char; |
959
|
467
|
|
|
|
|
805
|
my $escape_char = $self->{escape_char}; |
960
|
467
|
100
|
|
|
|
1180
|
$self->{quote_char} = $DEFAULT_QUOTE_CHAR unless defined($self->{quote_char}); |
961
|
467
|
|
|
|
|
776
|
my $quote_char = $self->{quote_char}; |
962
|
467
|
100
|
|
|
|
1044
|
unless (defined($self->{in_csvobj})) { |
963
|
462
|
100
|
|
|
|
913
|
if (defined($self->{sep_char})) { |
964
|
200
|
|
|
|
|
395
|
$sep_char = $self->{sep_char}; |
965
|
200
|
50
|
|
|
|
473
|
print($_debugh "-- $in_file_disp: CSV separator set to \"") if $_debug; |
966
|
|
|
|
|
|
|
} else { |
967
|
|
|
|
|
|
|
# The test below (on _init_input_already_called) shoud be useless. |
968
|
|
|
|
|
|
|
# Left for the sake of robustness. |
969
|
262
|
50
|
|
|
|
550
|
unless ($self->{_init_input_already_called}) { |
970
|
262
|
100
|
|
|
|
823
|
if (!$self->_detect_csv_sep($escape_char, $quote_char, \$sep_char)) { |
971
|
6
|
|
|
|
|
20
|
$self->_print_error("'$in_file_disp': cannot detect CSV separator"); |
972
|
0
|
|
|
|
|
0
|
return 0; |
973
|
|
|
|
|
|
|
} |
974
|
256
|
50
|
|
|
|
567
|
print($_debugh "-- $in_file_disp: CSV separator detected to \"") if $_debug; |
975
|
256
|
|
|
|
|
581
|
$self->{sep_char} = $sep_char; |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
} |
978
|
456
|
50
|
|
|
|
865
|
print($_debugh _render($sep_char) . "\"\n") if $_debug; |
979
|
|
|
|
|
|
|
|
980
|
456
|
|
|
|
|
1433
|
$self->_detect_meta($quote_char, $sep_char); |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
$self->{_in_csvobj} = Text::CSV->new({sep_char => $sep_char, |
983
|
|
|
|
|
|
|
allow_whitespace => 1, binary => 1, auto_diag => 0, |
984
|
|
|
|
|
|
|
quote_char => $quote_char, escape_char => $self->{escape_char}, |
985
|
456
|
|
|
|
|
3227
|
allow_loose_escapes => 1}); |
986
|
456
|
50
|
|
|
|
75000
|
unless (defined($self->{_in_csvobj})) { |
987
|
0
|
|
|
|
|
0
|
$self->_print_error("error creating input Text::CSV object"); |
988
|
0
|
|
|
|
|
0
|
return 0; |
989
|
|
|
|
|
|
|
} |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
} else { |
992
|
5
|
|
|
|
|
8
|
$self->{_in_csvobj} = $self->{in_csvobj}; |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
|
995
|
461
|
|
|
|
|
888
|
$self->{_init_input_already_called} = 1; |
996
|
|
|
|
|
|
|
|
997
|
461
|
|
|
|
|
1297
|
return 1; |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
sub get_in_file_disp { |
1001
|
3663
|
|
|
3663
|
1
|
4836
|
my $self = shift; |
1002
|
|
|
|
|
|
|
|
1003
|
3663
|
|
|
|
|
19857
|
validate_pos(@_); |
1004
|
|
|
|
|
|
|
|
1005
|
3663
|
|
|
|
|
9455
|
my $in_file_disp = _get_def($self->{_in_file_disp}, '?'); |
1006
|
3663
|
|
|
|
|
6690
|
return $in_file_disp; |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
sub get_sep_char { |
1010
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1011
|
|
|
|
|
|
|
|
1012
|
0
|
|
|
|
|
0
|
validate_pos(@_); |
1013
|
|
|
|
|
|
|
|
1014
|
0
|
|
|
|
|
0
|
return $self->{sep_char}; |
1015
|
|
|
|
|
|
|
} |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
sub get_escape_char { |
1018
|
9
|
|
|
9
|
1
|
37
|
my $self = shift; |
1019
|
|
|
|
|
|
|
|
1020
|
9
|
|
|
|
|
53
|
validate_pos(@_); |
1021
|
|
|
|
|
|
|
|
1022
|
9
|
|
|
|
|
52
|
return $self->{escape_char}; |
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
sub get_in_encoding { |
1026
|
27
|
|
|
27
|
1
|
6959
|
my $self = shift; |
1027
|
|
|
|
|
|
|
|
1028
|
27
|
|
|
|
|
271
|
validate_pos(@_); |
1029
|
|
|
|
|
|
|
|
1030
|
27
|
|
|
|
|
118
|
return _get_def($self->{_inh_encoding}, ''); |
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
sub get_is_always_quoted { |
1034
|
15
|
|
|
15
|
1
|
64
|
my $self = shift; |
1035
|
|
|
|
|
|
|
|
1036
|
15
|
|
|
|
|
88
|
validate_pos(@_); |
1037
|
|
|
|
|
|
|
|
1038
|
15
|
|
|
|
|
73
|
return $self->{_is_always_quoted}; |
1039
|
|
|
|
|
|
|
} |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
sub get_pass_count { |
1042
|
43
|
|
|
43
|
1
|
687
|
my $self = shift; |
1043
|
|
|
|
|
|
|
|
1044
|
43
|
|
|
|
|
233
|
validate_pos(@_); |
1045
|
|
|
|
|
|
|
|
1046
|
43
|
|
|
|
|
137
|
return _get_def($self->{_pass_count}, 0); |
1047
|
|
|
|
|
|
|
} |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
sub get_in_mem_record_count { |
1050
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1051
|
|
|
|
|
|
|
|
1052
|
0
|
|
|
|
|
0
|
validate_pos(@_); |
1053
|
|
|
|
|
|
|
|
1054
|
0
|
|
|
|
|
0
|
return ($self->{_in_mem_record_count}, 0); |
1055
|
|
|
|
|
|
|
} |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
sub get_max_in_mem_record_count { |
1058
|
513
|
|
|
513
|
1
|
2254
|
my $self = shift; |
1059
|
|
|
|
|
|
|
|
1060
|
513
|
|
|
|
|
3038
|
validate_pos(@_); |
1061
|
|
|
|
|
|
|
|
1062
|
513
|
|
|
|
|
2118
|
return _get_def($self->{_max_in_mem_record_count}, 0); |
1063
|
|
|
|
|
|
|
} |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
sub _set_max_in_mem_record_count { |
1066
|
155
|
|
|
155
|
|
279
|
my $self = shift; |
1067
|
|
|
|
|
|
|
|
1068
|
155
|
|
|
|
|
1537
|
validate_pos(@_, {type => SCALAR}); |
1069
|
|
|
|
|
|
|
|
1070
|
155
|
|
|
|
|
538
|
$self->{_max_in_mem_record_count} = $_[0]; |
1071
|
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
sub get_fields_names { |
1074
|
19
|
|
|
19
|
1
|
3445
|
my $self = shift; |
1075
|
|
|
|
|
|
|
|
1076
|
19
|
|
|
|
|
122
|
validate_pos(@_); |
1077
|
|
|
|
|
|
|
|
1078
|
19
|
50
|
|
|
|
58
|
return () unless $self->_status_forward('S3'); |
1079
|
19
|
|
|
|
|
31
|
return @{$self->{_columns}}; |
|
19
|
|
|
|
|
89
|
|
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
sub get_field_name { |
1083
|
1
|
|
|
1
|
1
|
475
|
my $self = shift; |
1084
|
|
|
|
|
|
|
|
1085
|
1
|
|
|
|
|
11
|
validate_pos(@_, {type => SCALAR}); |
1086
|
|
|
|
|
|
|
|
1087
|
1
|
|
|
|
|
3
|
my ($n) = @_; |
1088
|
|
|
|
|
|
|
|
1089
|
1
|
50
|
|
|
|
3
|
return undef unless $self->_status_forward('S3'); |
1090
|
1
|
|
|
|
|
4
|
return $self->{_columns}->[$n]; |
1091
|
|
|
|
|
|
|
} |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
sub get_coldata { |
1094
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
1095
|
|
|
|
|
|
|
|
1096
|
1
|
|
|
|
|
7
|
validate_pos(@_); |
1097
|
|
|
|
|
|
|
|
1098
|
1
|
50
|
|
|
|
8
|
return () unless $self->_status_forward('S3'); |
1099
|
1
|
|
|
|
|
2
|
my @ret; |
1100
|
1
|
|
|
|
|
3
|
for (@{$self->{_coldata}}) { |
|
1
|
|
|
|
|
2
|
|
1101
|
4
|
|
|
|
|
135
|
push @ret, [ |
1102
|
|
|
|
|
|
|
$_->field_name, |
1103
|
|
|
|
|
|
|
$_->header_text, |
1104
|
|
|
|
|
|
|
$_->description, |
1105
|
|
|
|
|
|
|
$_->dt_format, |
1106
|
|
|
|
|
|
|
$_->dt_locale, |
1107
|
|
|
|
|
|
|
$_->multiline]; |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
|
1110
|
1
|
|
|
|
|
26
|
return @ret; |
1111
|
|
|
|
|
|
|
} |
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
sub get_stats { |
1114
|
2
|
|
|
2
|
1
|
12
|
my $self = shift; |
1115
|
|
|
|
|
|
|
|
1116
|
2
|
|
|
|
|
11
|
validate_pos(@_); |
1117
|
|
|
|
|
|
|
|
1118
|
2
|
50
|
|
|
|
7
|
return () unless defined($self->{_stats}); |
1119
|
2
|
|
|
|
|
3
|
return %{$self->{_stats}}; |
|
2
|
|
|
|
|
7
|
|
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub get_nb_rows { |
1123
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
1124
|
|
|
|
|
|
|
|
1125
|
1
|
|
|
|
|
7
|
validate_pos(@_); |
1126
|
|
|
|
|
|
|
|
1127
|
1
|
|
|
|
|
3
|
return $self->{_nb_rows}; |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
sub _debug_show_members { |
1131
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
1132
|
0
|
|
|
|
|
0
|
my $_debugh = $self->{_debugh}; |
1133
|
0
|
0
|
|
|
|
0
|
my @a = @{$self->{fields_ar}} if defined($self->{fields_ar}); |
|
0
|
|
|
|
|
0
|
|
1134
|
0
|
0
|
|
|
|
0
|
my @c = @{$self->{fields_column_names}} if defined($self->{fields_column_names}); |
|
0
|
|
|
|
|
0
|
|
1135
|
0
|
0
|
|
|
|
0
|
my %h = %{$self->{fields_hr}} if defined($self->{fields_hr}); |
|
0
|
|
|
|
|
0
|
|
1136
|
|
|
|
|
|
|
|
1137
|
0
|
|
|
|
|
0
|
print($_debugh "-- _debug_show_members() start\n"); |
1138
|
0
|
|
|
|
|
0
|
print($_debugh " croak_if_error $self->{croak_if_error}\n"); |
1139
|
0
|
|
|
|
|
0
|
print($_debugh " verbose $self->{verbose}\n"); |
1140
|
0
|
|
|
|
|
0
|
print($_debugh " _debug $self->{_debug}\n"); |
1141
|
0
|
|
|
|
|
0
|
print($_debugh " _debug_read $self->{_debug_read}\n"); |
1142
|
0
|
|
|
|
|
0
|
print($_debugh " infoh $self->{infoh}\n"); |
1143
|
0
|
|
|
|
|
0
|
print($_debugh " _debugh $_debugh\n"); |
1144
|
0
|
|
|
|
|
0
|
print($_debugh " inh: $self->{_inh}\n"); |
1145
|
0
|
|
|
|
|
0
|
print($_debugh " in_file_disp " . $self->get_in_file_disp() . "\n"); |
1146
|
0
|
|
|
|
|
0
|
print($_debugh " _in_csvobj $self->{_in_csvobj}\n"); |
1147
|
0
|
|
|
|
|
0
|
print($_debugh " has_headers $self->{has_headers}\n"); |
1148
|
0
|
|
|
|
|
0
|
print($_debugh " fields_ar:\n"); |
1149
|
0
|
|
|
|
|
0
|
for my $e (@a) { |
1150
|
0
|
|
|
|
|
0
|
print($_debugh " '$e'\n"); |
1151
|
|
|
|
|
|
|
} |
1152
|
0
|
|
|
|
|
0
|
print($_debugh " fields_hr:\n"); |
1153
|
0
|
|
|
|
|
0
|
for my $e (keys %h) { |
1154
|
0
|
|
|
|
|
0
|
print($_debugh " '$e' => '$h{$e}'\n"); |
1155
|
|
|
|
|
|
|
} |
1156
|
0
|
|
|
|
|
0
|
print($_debugh " fields_column_names:\n"); |
1157
|
0
|
|
|
|
|
0
|
for my $e (@c) { |
1158
|
0
|
|
|
|
|
0
|
print($_debugh " '$e'\n"); |
1159
|
|
|
|
|
|
|
} |
1160
|
0
|
|
|
|
|
0
|
print($_debugh "-- _debug_show_members() end\n"); |
1161
|
|
|
|
|
|
|
} |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
# |
1164
|
|
|
|
|
|
|
# Check headers in CSV header line |
1165
|
|
|
|
|
|
|
# Used to increase robustness by relying on header title rather than |
1166
|
|
|
|
|
|
|
# column number. |
1167
|
|
|
|
|
|
|
# |
1168
|
|
|
|
|
|
|
# Return 1 if success (all fields found), 0 otherwise. |
1169
|
|
|
|
|
|
|
# |
1170
|
|
|
|
|
|
|
sub _process_header { |
1171
|
12
|
|
|
12
|
|
30
|
my $self = shift; |
1172
|
12
|
|
|
|
|
19
|
my @headers = @{shift(@_)}; |
|
12
|
|
|
|
|
43
|
|
1173
|
12
|
|
|
|
|
22
|
my %fields_h = %{shift(@_)}; |
|
12
|
|
|
|
|
69
|
|
1174
|
12
|
|
|
|
|
26
|
my $retval = shift; |
1175
|
|
|
|
|
|
|
|
1176
|
12
|
|
|
|
|
23
|
my @tmp = keys %{$retval}; |
|
12
|
|
|
|
|
51
|
|
1177
|
|
|
|
|
|
|
|
1178
|
12
|
|
|
|
|
38
|
my $in_file_disp = $self->get_in_file_disp(); |
1179
|
|
|
|
|
|
|
|
1180
|
12
|
50
|
|
|
|
40
|
confess '$_[4] must be an empty by-ref hash' if $#tmp >= 0; |
1181
|
|
|
|
|
|
|
|
1182
|
12
|
|
|
|
|
25
|
my $e = 0; |
1183
|
12
|
|
|
|
|
43
|
for my $k (keys %fields_h) { |
1184
|
48
|
|
|
|
|
79
|
my $v = $fields_h{$k}; |
1185
|
|
|
|
|
|
|
|
1186
|
48
|
|
|
462
|
|
205
|
my @all_idx = indexes { /$v/i } @headers; |
|
462
|
|
|
|
|
2576
|
|
1187
|
48
|
50
|
|
|
|
164
|
if ($#all_idx >= 1) { |
1188
|
0
|
|
|
|
|
0
|
$self->_print_error("file $in_file_disp: " . |
1189
|
|
|
|
|
|
|
"more than one column matches the criteria '$v'"); |
1190
|
0
|
|
|
|
|
0
|
$e++; |
1191
|
|
|
|
|
|
|
} |
1192
|
48
|
|
|
258
|
|
172
|
my $idx = first_index { /$v/i } @headers; |
|
258
|
|
|
|
|
1541
|
|
1193
|
48
|
50
|
|
|
|
206
|
if ($idx < 0) { |
1194
|
0
|
|
|
|
|
0
|
$self->_print_error("file $in_file_disp: unable to find field '$v'"); |
1195
|
0
|
|
|
|
|
0
|
$e++; |
1196
|
|
|
|
|
|
|
} else { |
1197
|
48
|
|
|
|
|
117
|
$retval->{$k} = $idx; |
1198
|
|
|
|
|
|
|
} |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
|
1201
|
12
|
50
|
|
|
|
73
|
return ($e >= 1 ? 0 : 1); |
1202
|
|
|
|
|
|
|
} |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
sub set_walker_hr { |
1205
|
2
|
|
|
2
|
1
|
1800
|
my $self = shift; |
1206
|
2
|
|
|
|
|
24
|
validate_pos(@_, {type => UNDEF | CODEREF, optional => 1}); |
1207
|
|
|
|
|
|
|
|
1208
|
2
|
|
|
|
|
8
|
my ($walker_hr) = @_; |
1209
|
|
|
|
|
|
|
|
1210
|
2
|
50
|
|
|
|
11
|
return undef unless $self->_status_forward('S2'); |
1211
|
2
|
50
|
|
|
|
7
|
return undef unless $self->_status_backward('S2'); |
1212
|
2
|
|
|
|
|
3
|
$self->{walker_hr} = $walker_hr; |
1213
|
|
|
|
|
|
|
|
1214
|
2
|
|
|
|
|
6
|
return $self; |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
sub set_walker_ar { |
1218
|
2
|
|
|
2
|
1
|
1460
|
my $self = shift; |
1219
|
2
|
|
|
|
|
24
|
validate_pos(@_, {type => UNDEF | CODEREF, optional => 1}); |
1220
|
|
|
|
|
|
|
|
1221
|
2
|
|
|
|
|
8
|
my ($walker_ar) = @_; |
1222
|
|
|
|
|
|
|
|
1223
|
2
|
50
|
|
|
|
6
|
return undef unless $self->_status_forward('S2'); |
1224
|
2
|
50
|
|
|
|
5
|
return undef unless $self->_status_backward('S2'); |
1225
|
2
|
|
|
|
|
4
|
$self->{walker_ar} = $walker_ar; |
1226
|
|
|
|
|
|
|
|
1227
|
2
|
|
|
|
|
4
|
return $self; |
1228
|
|
|
|
|
|
|
} |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
# * *************************************** * |
1232
|
|
|
|
|
|
|
# * BEGINNING OF DATE FORMAT DETECTION CODE * |
1233
|
|
|
|
|
|
|
# * *************************************** * |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
# |
1237
|
|
|
|
|
|
|
# The '%m.%d.%y' is not at its "logical" location. It is done to make sure the order in which |
1238
|
|
|
|
|
|
|
# entries are written does not impact the result. |
1239
|
|
|
|
|
|
|
# |
1240
|
|
|
|
|
|
|
# It could occur because there is some code that correlates an entry containing %y with another |
1241
|
|
|
|
|
|
|
# one that would contain %Y. The %Y will be called the master, the %y will be called the slave. |
1242
|
|
|
|
|
|
|
# It is important to match such entries, otherwise an identified format with %y would always be |
1243
|
|
|
|
|
|
|
# ambiguous with the same written with %Y. |
1244
|
|
|
|
|
|
|
# |
1245
|
|
|
|
|
|
|
# IMPORTANT |
1246
|
|
|
|
|
|
|
# The list below is written almost as-is in the POD at the bottom of this file. |
1247
|
|
|
|
|
|
|
# |
1248
|
|
|
|
|
|
|
my @DATES_DEFAULT_FORMATS_TO_TRY = ( |
1249
|
|
|
|
|
|
|
'', |
1250
|
|
|
|
|
|
|
'%Y-%m-%d', |
1251
|
|
|
|
|
|
|
'%Y.%m.%d', |
1252
|
|
|
|
|
|
|
'%Y/%m/%d', |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
'%m.%d.%y', |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
'%m-%d-%Y', |
1257
|
|
|
|
|
|
|
'%m.%d.%Y', |
1258
|
|
|
|
|
|
|
'%m/%d/%Y', |
1259
|
|
|
|
|
|
|
'%d-%m-%Y', |
1260
|
|
|
|
|
|
|
'%d.%m.%Y', |
1261
|
|
|
|
|
|
|
'%d/%m/%Y', |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
'%m-%d-%y', |
1264
|
|
|
|
|
|
|
'%m/%d/%y', |
1265
|
|
|
|
|
|
|
'%d-%m-%y', |
1266
|
|
|
|
|
|
|
'%d.%m.%y', |
1267
|
|
|
|
|
|
|
'%d/%m/%y', |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
'%Y%m%d%H%M%S', |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
# Localizaed formats |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
'%b %d, %Y', |
1274
|
|
|
|
|
|
|
'%b %d %Y', |
1275
|
|
|
|
|
|
|
'%b %d %T %Z %Y', |
1276
|
|
|
|
|
|
|
'%d %b %Y', |
1277
|
|
|
|
|
|
|
'%d %b, %Y' |
1278
|
|
|
|
|
|
|
); |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
# |
1281
|
|
|
|
|
|
|
# IMPORTANT |
1282
|
|
|
|
|
|
|
# Under Linux, $START is useless. Strptime will match a format exactly as it is, and a tring |
1283
|
|
|
|
|
|
|
# like "01/01/16 13:00:00" won't match with "%T". Under Windows, Strptime is capable of doing |
1284
|
|
|
|
|
|
|
# a match by ignoring characters at the beginning, thus "01/01/2016 13:00:00" for example will |
1285
|
|
|
|
|
|
|
# return success when matched against "%T". |
1286
|
|
|
|
|
|
|
# Possibly it has to do with versionning of Strptime, not Linux versus Windows as such. Any |
1287
|
|
|
|
|
|
|
# way, this difference had to be dealt with. |
1288
|
|
|
|
|
|
|
# |
1289
|
|
|
|
|
|
|
# The flexibility under Windows would screw the code logic so I had to add the prefix string |
1290
|
|
|
|
|
|
|
# below, to avoid unexpected success on match. |
1291
|
|
|
|
|
|
|
# |
1292
|
|
|
|
|
|
|
my $START = '<'; |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
struct RecordCounter => { |
1295
|
|
|
|
|
|
|
count_ok => '$', |
1296
|
|
|
|
|
|
|
count_ko => '$', |
1297
|
|
|
|
|
|
|
has_searched_time => '$', |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
format => '$', |
1300
|
|
|
|
|
|
|
locale => '$', |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
has_found_time => '$', |
1303
|
|
|
|
|
|
|
format_with_addition_of_time => '$', |
1304
|
|
|
|
|
|
|
locale_with_addition_of_time => '$', |
1305
|
|
|
|
|
|
|
parser_with_addition_of_time => '$' |
1306
|
|
|
|
|
|
|
}; |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
struct Format => { |
1309
|
|
|
|
|
|
|
id => '$', |
1310
|
|
|
|
|
|
|
format => '$', |
1311
|
|
|
|
|
|
|
locale => '$', |
1312
|
|
|
|
|
|
|
parser => '$', |
1313
|
|
|
|
|
|
|
index_slave => '$', |
1314
|
|
|
|
|
|
|
index_master => '$' |
1315
|
|
|
|
|
|
|
}; |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
sub _col_dispname { |
1318
|
539
|
|
|
539
|
|
897
|
my ($self, $n) = @_; |
1319
|
|
|
|
|
|
|
|
1320
|
539
|
|
|
|
|
664
|
my $col; |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
# |
1323
|
|
|
|
|
|
|
# IMPORTANT |
1324
|
|
|
|
|
|
|
# |
1325
|
|
|
|
|
|
|
# We cannot execute here a command like |
1326
|
|
|
|
|
|
|
# $self->_status_forward('S3'); |
1327
|
|
|
|
|
|
|
# (to ensure _columns is well defined) because _col_dispname is called by |
1328
|
|
|
|
|
|
|
# _detect_dates_formats that is in turn called by _S3_init_fields_extra. A call to |
1329
|
|
|
|
|
|
|
# _status_forward would trigger a never-ending call loop. |
1330
|
|
|
|
|
|
|
# |
1331
|
539
|
|
|
|
|
1200
|
my $cols = _get_def($self->{'_columns'}, $self->{'_S2_columns'}); |
1332
|
|
|
|
|
|
|
|
1333
|
539
|
50
|
|
|
|
1000
|
if ($self->{has_headers}) { |
1334
|
539
|
|
|
|
|
860
|
$col = $cols->[$n]; |
1335
|
539
|
50
|
|
|
|
874
|
$col = "<UNDEF>" unless defined($col); |
1336
|
|
|
|
|
|
|
} else { |
1337
|
0
|
|
|
|
|
0
|
$col = "[$n]"; |
1338
|
|
|
|
|
|
|
} |
1339
|
539
|
|
|
|
|
1061
|
return $col; |
1340
|
|
|
|
|
|
|
} |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
# Used by test plan only... |
1343
|
|
|
|
|
|
|
sub _dds { |
1344
|
45
|
|
|
45
|
|
1111
|
my $self = shift; |
1345
|
|
|
|
|
|
|
|
1346
|
45
|
50
|
|
|
|
107
|
return undef unless $self->_status_forward('S3'); |
1347
|
43
|
50
|
|
|
|
145
|
return undef unless defined($self->{_dates_detailed_status}); |
1348
|
43
|
|
|
|
|
333
|
return $self->{_dates_detailed_status}; |
1349
|
|
|
|
|
|
|
} |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
sub _detect_dates_formats { |
1352
|
354
|
|
|
354
|
|
536
|
my $self = shift; |
1353
|
|
|
|
|
|
|
|
1354
|
354
|
100
|
|
|
|
933
|
return if $self->{_detect_dates_formats_has_run}; |
1355
|
287
|
|
|
|
|
545
|
$self->{_detect_dates_formats_has_run} = 1; |
1356
|
287
|
100
|
|
|
|
656
|
my @fields_dates = @{$self->{fields_dates}} if defined($self->{fields_dates}); |
|
11
|
|
|
|
|
34
|
|
1357
|
287
|
100
|
100
|
|
|
1348
|
return unless @fields_dates or $self->{fields_dates_auto}; |
1358
|
|
|
|
|
|
|
|
1359
|
54
|
50
|
|
|
|
157
|
if ($self->{_int_one_pass}) { |
1360
|
0
|
|
|
|
|
0
|
$self->_print_error("date format detection disallowed when one_pass is set"); |
1361
|
0
|
|
|
|
|
0
|
return; |
1362
|
|
|
|
|
|
|
} |
1363
|
|
|
|
|
|
|
|
1364
|
54
|
|
|
|
|
109
|
my $_debug = $self->{_debug}; |
1365
|
54
|
|
|
|
|
92
|
my $_debugh = $self->{_debugh}; |
1366
|
54
|
|
33
|
|
|
171
|
my $debug_fmt = ($_debug and $DEBUG_DATETIME_FORMATS); |
1367
|
|
|
|
|
|
|
|
1368
|
54
|
|
|
|
|
158
|
$self->_register_pass("detect date format"); |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
# |
1371
|
|
|
|
|
|
|
# Why re-opening the input? |
1372
|
|
|
|
|
|
|
# I tried two other ways that never worked on some OSes (like freebsd) and/or with older perl |
1373
|
|
|
|
|
|
|
# versions. |
1374
|
|
|
|
|
|
|
# |
1375
|
|
|
|
|
|
|
# 1) The "tell" tactic |
1376
|
|
|
|
|
|
|
# Recording at the beginning of the function the file position with |
1377
|
|
|
|
|
|
|
# my $pos = tell($self->{inh}); |
1378
|
|
|
|
|
|
|
# ... and then recalling with a seek instruction is the most logical. |
1379
|
|
|
|
|
|
|
# But it didn't work = sometimes, reading would go back to first row (the headers) instead |
1380
|
|
|
|
|
|
|
# of the second row, could not figure out why (it would work on my Ubuntu 16.04 / perl 5.22, but |
1381
|
|
|
|
|
|
|
# would fail with other OSes and/or perl versions). |
1382
|
|
|
|
|
|
|
# |
1383
|
|
|
|
|
|
|
# 2) The "complete rewind" tactic |
1384
|
|
|
|
|
|
|
# I then undertook to do (at the end of detection function): |
1385
|
|
|
|
|
|
|
# seek $inh, 0, SEEK_SET; |
1386
|
|
|
|
|
|
|
# $incsv->getline($inh) if $self->{has_headers}; |
1387
|
|
|
|
|
|
|
# based on the assumption that a seek to zero would behave differently from a seek to an |
1388
|
|
|
|
|
|
|
# arbitrary position. |
1389
|
|
|
|
|
|
|
# But still, it would sometimes fail.... |
1390
|
|
|
|
|
|
|
# |
1391
|
|
|
|
|
|
|
|
1392
|
54
|
|
|
|
|
131
|
my $inh = $self->_reopen_input(); |
1393
|
54
|
|
|
|
|
111
|
my $incsv = $self->{_in_csvobj}; |
1394
|
54
|
50
|
|
|
|
223
|
_mygetline($incsv, $inh) if $self->{has_headers}; |
1395
|
|
|
|
|
|
|
|
1396
|
54
|
|
|
|
|
2360
|
my $formats_to_try = $self->{dates_formats_to_try}; |
1397
|
54
|
|
|
|
|
95
|
my $ignore_trailing_chars = $self->{dates_ignore_trailing_chars}; |
1398
|
54
|
|
|
|
|
105
|
my $search_time = $self->{dates_search_time}; |
1399
|
54
|
|
|
|
|
89
|
my $localizations = $self->{dates_locales}; |
1400
|
|
|
|
|
|
|
|
1401
|
54
|
|
|
|
|
79
|
my %regular_named_fields = %{$self->{_regular_named_fields}}; |
|
54
|
|
|
|
|
437
|
|
1402
|
|
|
|
|
|
|
|
1403
|
54
|
|
|
|
|
134
|
my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty}; |
1404
|
|
|
|
|
|
|
|
1405
|
54
|
|
|
|
|
88
|
my @fields_to_detect_format; |
1406
|
54
|
100
|
|
|
|
195
|
if (defined($self->{fields_dates})) { |
|
|
50
|
|
|
|
|
|
1407
|
11
|
|
|
|
|
21
|
my $count_field_not_found = 0; |
1408
|
11
|
|
|
|
|
18
|
my %column_seen; |
1409
|
11
|
|
|
|
|
18
|
for my $f (@{$self->{fields_dates}}) { |
|
11
|
|
|
|
|
40
|
|
1410
|
23
|
100
|
|
|
|
55
|
if (!exists $regular_named_fields{$f}) { |
1411
|
1
|
|
|
|
|
10
|
$self->_print_error("fields_dates: unknown field: '$f'", |
1412
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, { %regular_named_fields } ); |
1413
|
1
|
|
|
|
|
34
|
$count_field_not_found++; |
1414
|
1
|
|
|
|
|
3
|
next; |
1415
|
|
|
|
|
|
|
} |
1416
|
22
|
|
|
|
|
36
|
my $n = $regular_named_fields{$f}; |
1417
|
22
|
50
|
|
|
|
56
|
if (exists $column_seen{$n}) { |
1418
|
0
|
|
|
|
|
0
|
$self->_print_warning("field '$f' already seen"); |
1419
|
0
|
|
|
|
|
0
|
next; |
1420
|
|
|
|
|
|
|
} |
1421
|
22
|
|
|
|
|
41
|
$column_seen{$n} = 1; |
1422
|
22
|
|
|
|
|
45
|
push @fields_to_detect_format, $n; |
1423
|
|
|
|
|
|
|
} |
1424
|
11
|
100
|
|
|
|
34
|
$self->_print_error("non existent field(s) encountered, aborted") if $count_field_not_found; |
1425
|
|
|
|
|
|
|
} elsif ($self->{fields_dates_auto}) { |
1426
|
43
|
|
|
|
|
168
|
my @k = keys %regular_named_fields; |
1427
|
43
|
|
|
|
|
193
|
@fields_to_detect_format = (0..$#k); |
1428
|
|
|
|
|
|
|
} else { |
1429
|
0
|
|
|
|
|
0
|
confess "Hey! check this code, man"; |
1430
|
|
|
|
|
|
|
} |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
# |
1433
|
|
|
|
|
|
|
# FIXME? |
1434
|
|
|
|
|
|
|
# Sort by column number of not? |
1435
|
|
|
|
|
|
|
# |
1436
|
|
|
|
|
|
|
# At this moment in time, the author inclines to answer "yes". |
1437
|
|
|
|
|
|
|
# But I must admit it is rather arbitrary decision for now. |
1438
|
|
|
|
|
|
|
# |
1439
|
53
|
|
|
|
|
299
|
@fields_to_detect_format = sort { $a <=> $b } @fields_to_detect_format; |
|
753
|
|
|
|
|
868
|
|
1440
|
|
|
|
|
|
|
|
1441
|
2
|
|
|
|
|
5
|
my @dates_formats_supp = @{$self->{dates_formats_to_try_supp}} |
1442
|
53
|
100
|
|
|
|
143
|
if defined($self->{dates_formats_to_try_supp}); |
1443
|
|
|
|
|
|
|
|
1444
|
53
|
100
|
|
|
|
367
|
$formats_to_try = [ @DATES_DEFAULT_FORMATS_TO_TRY ] unless defined($formats_to_try); |
1445
|
53
|
|
|
|
|
96
|
$formats_to_try = [ @{$formats_to_try}, @dates_formats_supp ]; |
|
53
|
|
|
|
|
219
|
|
1446
|
53
|
|
|
|
|
143
|
my %seen; |
1447
|
53
|
|
|
|
|
105
|
my $f2 = [ ]; |
1448
|
53
|
|
|
|
|
149
|
for (@${formats_to_try}) { |
1449
|
978
|
50
|
|
|
|
1460
|
push @{$f2}, $_ unless exists($seen{$_}); |
|
978
|
|
|
|
|
1290
|
|
1450
|
978
|
|
|
|
|
1693
|
$seen{$_} = undef; |
1451
|
|
|
|
|
|
|
} |
1452
|
53
|
|
|
|
|
122
|
$formats_to_try = $f2; |
1453
|
|
|
|
|
|
|
|
1454
|
53
|
100
|
|
|
|
134
|
$ignore_trailing_chars = 1 unless defined($ignore_trailing_chars); |
1455
|
53
|
100
|
|
|
|
153
|
$search_time = 1 unless defined($search_time); |
1456
|
|
|
|
|
|
|
|
1457
|
53
|
100
|
|
|
|
122
|
my $stop = ($ignore_trailing_chars ? '' : '>'); |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
# |
1460
|
|
|
|
|
|
|
# The code below (from # AMB to # AMB-END) aims to remove ambiguity that comes from %Y versus %y. |
1461
|
|
|
|
|
|
|
# That is: provided you have (among others) the formats to try |
1462
|
|
|
|
|
|
|
# '%d-%m-%Y' |
1463
|
|
|
|
|
|
|
# and |
1464
|
|
|
|
|
|
|
# '%d-%m-%y' |
1465
|
|
|
|
|
|
|
# then if parsing 4-digit-year dates (like '31-12-2016'), the two formats will work and you'll end |
1466
|
|
|
|
|
|
|
# up with an ambiguity. To be precise, there'll be no ambiguity if the date is followed by a time, |
1467
|
|
|
|
|
|
|
# but if the date is alone, both formats will work. |
1468
|
|
|
|
|
|
|
# |
1469
|
|
|
|
|
|
|
# Thanks to the below code, the member 'index_slave' (and its counterpart index_master) is populated |
1470
|
|
|
|
|
|
|
# and later, if such an ambiguity is detected, the upper case version (the one containing upper case |
1471
|
|
|
|
|
|
|
# '%Y') will be kept and the other one will be discarded. |
1472
|
|
|
|
|
|
|
# |
1473
|
|
|
|
|
|
|
# NOTE |
1474
|
|
|
|
|
|
|
# Such an ambiguity can exist only when ignore_trailing_chars is set. Otherwise, the remaining two |
1475
|
|
|
|
|
|
|
# digits make the date parsing fail in the '%y' case. |
1476
|
|
|
|
|
|
|
# |
1477
|
|
|
|
|
|
|
# The other members of the 'Format' object are used to work "normally", independently from this |
1478
|
|
|
|
|
|
|
# ambiguity removal feature. |
1479
|
|
|
|
|
|
|
# |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
# WIP = Work In Progress... |
1482
|
53
|
|
|
|
|
147
|
my @formats_wip; |
1483
|
53
|
100
|
|
|
|
134
|
my @locales = split(/,\s*/, $localizations) if defined($localizations); |
1484
|
53
|
|
|
|
|
87
|
for my $f (@{$formats_to_try}) { |
|
53
|
|
|
|
|
127
|
|
1485
|
978
|
100
|
|
|
|
2006
|
my $has_localized_item = ($f =~ m/%a|%A|%b|%B|%c|%\+/ ? 1 : 0); |
1486
|
978
|
100
|
100
|
|
|
1634
|
unless (@locales and $has_localized_item) { |
1487
|
968
|
|
|
|
|
1524
|
push @formats_wip, [$f, '']; |
1488
|
968
|
|
|
|
|
1322
|
next; |
1489
|
|
|
|
|
|
|
} |
1490
|
10
|
|
|
|
|
33
|
push @formats_wip, [$f, $_] foreach @locales; |
1491
|
|
|
|
|
|
|
} |
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
# AMB |
1494
|
53
|
|
|
|
|
99
|
my @formats; |
1495
|
|
|
|
|
|
|
my %mates; |
1496
|
53
|
|
|
|
|
151
|
for my $i (0..$#formats_wip) { |
1497
|
988
|
|
|
|
|
2210
|
my $fstr = $formats_wip[$i]->[0]; |
1498
|
988
|
|
|
|
|
1415
|
my $floc = $formats_wip[$i]->[1]; |
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
# FIXME |
1501
|
|
|
|
|
|
|
# Will not manage correctly a string like |
1502
|
|
|
|
|
|
|
# '%%Y' |
1503
|
|
|
|
|
|
|
# that means (when used with Strptime), the litteral string '%Y' with no substitution. |
1504
|
|
|
|
|
|
|
# Such cases will be complicated to fix, as it'll require to do a kind-of |
1505
|
|
|
|
|
|
|
# Strptime-equivalent parsing of the string, and I find it a bit overkill. |
1506
|
|
|
|
|
|
|
# |
1507
|
|
|
|
|
|
|
# I prefer to push back in caller world saying |
1508
|
|
|
|
|
|
|
# "Hey, if using constructs like '%%Y', you'll be in trouble." |
1509
|
988
|
|
|
|
|
1287
|
my $m = $fstr; |
1510
|
988
|
|
|
|
|
3725
|
$m =~ s/%y//ig; |
1511
|
988
|
|
|
|
|
1696
|
$m .= $floc; |
1512
|
|
|
|
|
|
|
|
1513
|
988
|
|
|
|
|
1374
|
my $index_slave = -1; |
1514
|
988
|
|
|
|
|
1150
|
my $index_master = -1; |
1515
|
988
|
100
|
|
|
|
1989
|
if (exists $mates{$m}) { |
1516
|
262
|
|
|
|
|
584
|
my $alt_fstr = $formats_wip[$mates{$m}]->[0]; |
1517
|
262
|
100
|
|
|
|
720
|
my $m_lower = ($fstr =~ m/%y/ ? 1 : 0); |
1518
|
262
|
100
|
|
|
|
564
|
my $m_upper = ($fstr =~ m/%Y/ ? 1 : 0); |
1519
|
262
|
100
|
|
|
|
510
|
my $a_lower = ($alt_fstr =~ m/%y/ ? 1 : 0); |
1520
|
262
|
100
|
|
|
|
616
|
my $a_upper = ($alt_fstr =~ m/%Y/ ? 1 : 0); |
1521
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
# We ignore the weird cases where we'd have both %y and %Y in a format string. |
1523
|
|
|
|
|
|
|
|
1524
|
262
|
100
|
66
|
|
|
2167
|
if (!$m_lower and $m_upper and $a_lower and !$a_upper) { |
|
|
50
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1525
|
45
|
|
|
|
|
100
|
$index_slave = $mates{$m}; |
1526
|
45
|
|
|
|
|
807
|
$formats[$mates{$m}]->index_master($i); |
1527
|
|
|
|
|
|
|
} elsif ($m_lower and !$m_upper and !$a_lower and $a_upper) { |
1528
|
217
|
|
|
|
|
377
|
$index_master = $mates{$m}; |
1529
|
217
|
|
|
|
|
3539
|
$formats[$mates{$m}]->index_slave($i); |
1530
|
|
|
|
|
|
|
} |
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
} else { |
1533
|
726
|
|
|
|
|
1469
|
$mates{$m} = $i; |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
|
1536
|
988
|
|
|
|
|
4093
|
my %strptime_opts = (pattern => $START . $fstr . $stop); |
1537
|
988
|
100
|
|
|
|
2313
|
$strptime_opts{locale} = $floc if $floc ne ''; |
1538
|
988
|
100
|
|
|
|
5465
|
my $format = Format->new( |
1539
|
|
|
|
|
|
|
id => "$i", |
1540
|
|
|
|
|
|
|
format => $fstr, |
1541
|
|
|
|
|
|
|
locale => $floc, |
1542
|
|
|
|
|
|
|
parser => ($fstr ne '' ? |
1543
|
|
|
|
|
|
|
DateTime::Format::Strptime->new(%strptime_opts) : |
1544
|
|
|
|
|
|
|
undef), |
1545
|
|
|
|
|
|
|
index_slave => $index_slave, |
1546
|
|
|
|
|
|
|
index_master => $index_master |
1547
|
|
|
|
|
|
|
); |
1548
|
988
|
|
|
|
|
1078022
|
push @formats, $format; |
1549
|
|
|
|
|
|
|
} |
1550
|
53
|
|
|
|
|
236
|
for my $i (0..$#formats) { |
1551
|
988
|
|
|
|
|
10442
|
my $format = $formats[$i]; |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
# If a master could be itself the slave of another entry, that'd make it a hierarchical |
1554
|
|
|
|
|
|
|
# relation tree with multiple levels. It is not possible, only a direct, unique |
1555
|
|
|
|
|
|
|
# master-slave relation can be managed here. |
1556
|
988
|
50
|
66
|
|
|
11977
|
confess "Inonsistent data, check this module's code urgently!" |
1557
|
|
|
|
|
|
|
if $format->index_slave >= 0 and $format->index_master >= 0; |
1558
|
|
|
|
|
|
|
|
1559
|
988
|
100
|
|
|
|
21334
|
if ($format->index_slave >= 0) { |
1560
|
262
|
|
|
|
|
4607
|
my $mate = $formats[$format->index_slave]; |
1561
|
262
|
50
|
33
|
|
|
4188
|
if ($mate->index_master != $i or $mate->index_slave != -1) { |
1562
|
0
|
|
|
|
|
0
|
confess "Inonsistent data (2), check this module's code urgently!" |
1563
|
|
|
|
|
|
|
} |
1564
|
|
|
|
|
|
|
} |
1565
|
|
|
|
|
|
|
|
1566
|
988
|
100
|
|
|
|
20435
|
if ($format->index_master >= 0) { |
1567
|
262
|
|
|
|
|
4330
|
my $mate = $formats[$format->index_master]; |
1568
|
262
|
50
|
33
|
|
|
4128
|
if ($mate->index_slave != $i or $mate->index_master != -1) { |
1569
|
0
|
|
|
|
|
0
|
confess "Inonsistent data (3), check this module's code urgently!" |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
} |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
} |
1574
|
53
|
50
|
|
|
|
464
|
if ($debug_fmt) { |
1575
|
0
|
|
|
|
|
0
|
for (@formats) { |
1576
|
0
|
|
|
|
|
0
|
my ($idx, $rel) = (-1, ""); |
1577
|
0
|
0
|
|
|
|
0
|
$idx = $_->index_slave, $rel = "S: " if $_->index_slave >= 0; |
1578
|
0
|
0
|
|
|
|
0
|
$idx = $_->index_master, $rel = "M: " if $_->index_master >= 0; |
1579
|
0
|
|
|
|
|
0
|
printf($_debugh "%-18s %s %2d", "'" . $_->format . "'", $rel, $idx); |
1580
|
0
|
0
|
|
|
|
0
|
print($_debugh ": '" . $formats[$idx]->format . "'") if $idx >= 0; |
1581
|
0
|
|
|
|
|
0
|
print($_debugh "\n"); |
1582
|
|
|
|
|
|
|
} |
1583
|
|
|
|
|
|
|
} |
1584
|
|
|
|
|
|
|
# AMB-END |
1585
|
|
|
|
|
|
|
|
1586
|
53
|
|
|
|
|
118
|
my %records; |
1587
|
|
|
|
|
|
|
my $record_number; |
1588
|
53
|
|
|
|
|
92
|
my $count_gotit = 0; |
1589
|
53
|
|
|
|
|
74
|
my $count_ambiguous = 0; |
1590
|
53
|
|
|
|
|
85
|
my $count_nodate = 0; |
1591
|
53
|
|
|
|
|
80
|
my $count_empty = 0; |
1592
|
53
|
|
|
|
|
72
|
my $has_signaled_can_start_recording_data = 0; |
1593
|
53
|
|
|
|
|
140
|
$self->{_line_after_which_recording_can_start} = 0; |
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
# |
1596
|
|
|
|
|
|
|
# Seems a weird optimization here, but it is very important. |
1597
|
|
|
|
|
|
|
# In some cases, divides execution time (to detect date format on big files |
1598
|
|
|
|
|
|
|
# containing numerous fields) by 10. |
1599
|
|
|
|
|
|
|
# |
1600
|
|
|
|
|
|
|
# When evaluates to true, it means the input column has no identified date format, meaning, |
1601
|
|
|
|
|
|
|
# no further check to do. |
1602
|
|
|
|
|
|
|
# |
1603
|
53
|
|
|
|
|
96
|
my @cache_nodate; |
1604
|
|
|
|
|
|
|
|
1605
|
53
|
|
|
|
|
185
|
while (my $f = _mygetline($incsv, $inh)) { |
1606
|
4650
|
|
|
|
|
145432
|
$record_number++; |
1607
|
|
|
|
|
|
|
|
1608
|
4650
|
50
|
|
|
|
8680
|
if ($debug_fmt) { |
1609
|
0
|
|
|
|
|
0
|
print($_debugh "RECORD $record_number:\n"); |
1610
|
0
|
|
|
|
|
0
|
for (0 .. @$f - 1) { printf($_debugh " %02d: '%s'\n", $_, $f->[$_]); } |
|
0
|
|
|
|
|
0
|
|
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
|
1613
|
4650
|
|
|
|
|
8426
|
for my $n (@fields_to_detect_format) { |
1614
|
31445
|
100
|
|
|
|
53619
|
next if $cache_nodate[$n]; |
1615
|
|
|
|
|
|
|
|
1616
|
16908
|
|
|
|
|
23080
|
my $v = $f->[$n]; |
1617
|
16908
|
100
|
|
|
|
25435
|
$v = '' unless defined($v); |
1618
|
16908
|
100
|
|
|
|
30164
|
next if $v eq ''; |
1619
|
9406
|
100
|
100
|
|
|
24886
|
next if defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($v); |
1620
|
|
|
|
|
|
|
|
1621
|
9403
|
50
|
|
|
|
16351
|
if ($debug_fmt) { |
1622
|
0
|
|
|
|
|
0
|
my $col = $self->_col_dispname($n); |
1623
|
0
|
|
|
|
|
0
|
print($_debugh "Line $record_number, column '$col':\n"); |
1624
|
|
|
|
|
|
|
} |
1625
|
|
|
|
|
|
|
|
1626
|
9403
|
|
|
|
|
16398
|
for my $fmt (@formats) { |
1627
|
158464
|
|
|
|
|
1938371
|
my $fid = $fmt->id; |
1628
|
158464
|
|
|
|
|
2580620
|
my $fstr = $fmt->format; |
1629
|
|
|
|
|
|
|
|
1630
|
158464
|
50
|
|
|
|
961742
|
$self->_debug_output_fmt('** pre ', $fmt, $records{$n}->{$fid}) if $debug_fmt; |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
$records{$n}->{$fid} = RecordCounter->new( |
1633
|
|
|
|
|
|
|
count_ok => 0, |
1634
|
|
|
|
|
|
|
count_ko => 0, |
1635
|
|
|
|
|
|
|
has_searched_time => 0, |
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
format => undef, |
1638
|
|
|
|
|
|
|
locale => undef, |
1639
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
has_found_time => 0, |
1641
|
|
|
|
|
|
|
format_with_addition_of_time => undef, |
1642
|
|
|
|
|
|
|
locale_with_addition_of_time => undef, |
1643
|
|
|
|
|
|
|
parser_with_addition_of_time => undef |
1644
|
158464
|
100
|
|
|
|
402666
|
) unless defined($records{$n}->{$fid}); |
1645
|
|
|
|
|
|
|
|
1646
|
158464
|
100
|
|
|
|
2396526
|
unless ($records{$n}->{$fid}->count_ko) { |
1647
|
19561
|
|
|
|
|
156101
|
my $is_ok = &_try_parser($fmt, $records{$n}->{$fid}, $START . $v . $stop); |
1648
|
|
|
|
|
|
|
|
1649
|
19561
|
100
|
|
|
|
39790
|
if (!$is_ok) { |
1650
|
7789
|
|
|
|
|
10262
|
my $give_up_time = 0; |
1651
|
7789
|
100
|
66
|
|
|
97784
|
if ($records{$n}->{$fid}->count_ko == 0 and |
|
|
|
100
|
|
|
|
|
1652
|
|
|
|
|
|
|
$records{$n}->{$fid}->has_searched_time and |
1653
|
|
|
|
|
|
|
$records{$n}->{$fid}->has_found_time) { |
1654
|
77
|
|
100
|
|
|
4020
|
$give_up_time = (defined($fmt->parser) and |
1655
|
|
|
|
|
|
|
defined($fmt->parser->parse_datetime($START . $v . $stop)) |
1656
|
|
|
|
|
|
|
? |
1657
|
|
|
|
|
|
|
1 : 0); |
1658
|
77
|
100
|
|
|
|
40222
|
if ($give_up_time) { |
1659
|
4
|
|
|
|
|
74
|
$records{$n}->{$fid}->has_found_time(0); |
1660
|
4
|
|
|
|
|
28
|
$is_ok = 1; |
1661
|
|
|
|
|
|
|
} |
1662
|
|
|
|
|
|
|
} |
1663
|
|
|
|
|
|
|
} |
1664
|
|
|
|
|
|
|
|
1665
|
19561
|
100
|
100
|
|
|
213960
|
if ($is_ok or !$ignore_trailing_chars) { |
1666
|
14159
|
|
100
|
|
|
185995
|
my $incr = (defined($fmt->parser) and $is_ok ? 1: 0); |
1667
|
|
|
|
|
|
|
|
1668
|
14159
|
100
|
|
|
|
281680
|
unless ($records{$n}->{$fid}->has_searched_time) { |
1669
|
3156
|
|
|
|
|
53415
|
$records{$n}->{$fid}->has_searched_time(1); |
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
croak "Inconsistent status! Issue in module code not in caller's!" |
1672
|
3156
|
50
|
|
|
|
50360
|
if $records{$n}->{$fid}->count_ok != 0; |
1673
|
|
|
|
|
|
|
|
1674
|
3156
|
100
|
|
|
|
21664
|
if ($search_time) { |
|
|
100
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
|
1676
|
1980
|
50
|
|
|
|
3617
|
print($_debugh " Search time in '$v', format '$fstr'\n") |
1677
|
|
|
|
|
|
|
if $debug_fmt; |
1678
|
|
|
|
|
|
|
|
1679
|
1980
|
|
|
|
|
24457
|
my $t = $self->_guess_time_format($fstr, $fmt->locale, $v, $stop); |
1680
|
1980
|
100
|
|
|
|
42084
|
$records{$n}->{$fid}->has_found_time((defined($t) ? 1 : 0)); |
1681
|
1980
|
100
|
|
|
|
16509
|
if (defined($t)) { |
|
|
100
|
|
|
|
|
|
1682
|
254
|
|
|
|
|
3868
|
$records{$n}->{$fid}->format_with_addition_of_time($t->[0]); |
1683
|
254
|
|
|
|
|
4761
|
$records{$n}->{$fid}->locale_with_addition_of_time($t->[1]); |
1684
|
254
|
|
|
|
|
4595
|
$records{$n}->{$fid}->parser_with_addition_of_time($t->[2]); |
1685
|
254
|
|
|
|
|
1744
|
$incr = 1; |
1686
|
|
|
|
|
|
|
} elsif ($fstr eq '') { |
1687
|
200
|
|
|
|
|
3084
|
$records{$n}->{$fid}->count_ko(1); |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
} elsif ($fstr eq '') { |
1690
|
78
|
|
|
|
|
932
|
$records{$n}->{$fid}->count_ko(1); |
1691
|
|
|
|
|
|
|
} |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
} |
1694
|
|
|
|
|
|
|
|
1695
|
14159
|
|
|
|
|
240714
|
$records{$n}->{$fid}->count_ok($records{$n}->{$fid}->count_ok + $incr); |
1696
|
|
|
|
|
|
|
|
1697
|
14159
|
100
|
100
|
|
|
181498
|
$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1) |
1698
|
|
|
|
|
|
|
if !$incr and !$is_ok; |
1699
|
|
|
|
|
|
|
|
1700
|
14159
|
100
|
|
|
|
45090
|
if ($incr) { |
1701
|
|
|
|
|
|
|
# We remove the slave if master is fine. |
1702
|
|
|
|
|
|
|
# Depending on the order in which parsing got done, the master could |
1703
|
|
|
|
|
|
|
# pop up first, or the slave, that is why we need manage both cases. |
1704
|
9161
|
100
|
100
|
|
|
115808
|
if ($fmt->index_slave >= 0 or $fmt->index_master >= 0) { |
1705
|
6843
|
100
|
|
|
|
202853
|
my $has_slave = ($fmt->index_slave >= 0 ? 1 : 0); |
1706
|
6843
|
100
|
|
|
|
119149
|
my $idx = ($has_slave ? $fmt->index_slave : $fmt->index_master); |
1707
|
6843
|
|
|
|
|
114981
|
my $mate = $formats[$idx]->id; |
1708
|
6843
|
100
|
|
|
|
50518
|
if (exists $records{$n}->{$mate}) { |
1709
|
6689
|
100
|
|
|
|
12247
|
if ($has_slave) { |
1710
|
2239
|
100
|
|
|
|
30531
|
if ($records{$n}->{$mate}->count_ko == 0) { |
1711
|
|
|
|
|
|
|
# I am the master: I remove the slave |
1712
|
2
|
|
|
|
|
53
|
$records{$n}->{$mate}->count_ko(1); |
1713
|
|
|
|
|
|
|
} |
1714
|
|
|
|
|
|
|
} else { |
1715
|
4450
|
50
|
66
|
|
|
56764
|
if ($records{$n}->{$mate}->count_ko == 0 and |
|
|
|
66
|
|
|
|
|
1716
|
|
|
|
|
|
|
$records{$n}->{$mate}->count_ok >= 1 and |
1717
|
|
|
|
|
|
|
$records{$n}->{$fid}->count_ko == 0) { |
1718
|
117
|
|
|
|
|
6413
|
$records{$n}->{$fid}->count_ko(1); |
1719
|
|
|
|
|
|
|
} |
1720
|
|
|
|
|
|
|
} |
1721
|
|
|
|
|
|
|
} |
1722
|
|
|
|
|
|
|
} |
1723
|
|
|
|
|
|
|
} |
1724
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
} else { |
1726
|
5402
|
|
|
|
|
69351
|
$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1); |
1727
|
|
|
|
|
|
|
} |
1728
|
|
|
|
|
|
|
} |
1729
|
|
|
|
|
|
|
|
1730
|
158464
|
50
|
|
|
|
1103496
|
$self->_debug_output_fmt(' post', $fmt, $records{$n}->{$fid}) if $debug_fmt; |
1731
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
} |
1733
|
|
|
|
|
|
|
} |
1734
|
|
|
|
|
|
|
|
1735
|
4650
|
|
|
|
|
7265
|
$count_gotit = 0; |
1736
|
4650
|
|
|
|
|
6724
|
$count_ambiguous = 0; |
1737
|
4650
|
|
|
|
|
6590
|
$count_empty = 0; |
1738
|
4650
|
|
|
|
|
9138
|
for my $n (@fields_to_detect_format) { |
1739
|
31445
|
100
|
|
|
|
53472
|
next if $cache_nodate[$n]; |
1740
|
|
|
|
|
|
|
|
1741
|
16908
|
|
|
|
|
21910
|
my $candidate = 0; |
1742
|
16908
|
|
|
|
|
21516
|
my $tt = 0; |
1743
|
16908
|
|
|
|
|
24498
|
for my $fmt (@formats) { |
1744
|
259112
|
|
|
|
|
3130002
|
my $fid = $fmt->id; |
1745
|
259112
|
|
|
|
|
1561608
|
my $rec = $records{$n}->{$fid}; |
1746
|
259112
|
100
|
|
|
|
410964
|
next unless defined($rec); |
1747
|
|
|
|
|
|
|
|
1748
|
200290
|
|
|
|
|
2343828
|
my $ok = $rec->count_ok; |
1749
|
200290
|
|
|
|
|
3145687
|
my $ko = $rec->count_ko; |
1750
|
|
|
|
|
|
|
|
1751
|
200290
|
50
|
66
|
|
|
1352272
|
confess "Oups. Check this module code urgently!" if $ok == 0 and $ko == 0; |
1752
|
200290
|
|
|
|
|
239261
|
$tt += $ok + $ko; |
1753
|
|
|
|
|
|
|
|
1754
|
200290
|
100
|
100
|
|
|
407111
|
$candidate++ if $ok >= 1 and $ko == 0; |
1755
|
|
|
|
|
|
|
} |
1756
|
16908
|
100
|
|
|
|
31488
|
if ($candidate == 1) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1757
|
9602
|
|
|
|
|
13880
|
$count_gotit++; |
1758
|
|
|
|
|
|
|
} elsif ($candidate >= 2) { |
1759
|
2269
|
|
|
|
|
3620
|
$count_ambiguous++; |
1760
|
|
|
|
|
|
|
} elsif ($tt != 0) { |
1761
|
230
|
|
|
|
|
282
|
$count_nodate++; |
1762
|
230
|
|
|
|
|
423
|
$cache_nodate[$n] = 1; |
1763
|
|
|
|
|
|
|
} else { |
1764
|
4807
|
|
|
|
|
6511
|
$count_empty++; |
1765
|
|
|
|
|
|
|
} |
1766
|
|
|
|
|
|
|
} |
1767
|
|
|
|
|
|
|
|
1768
|
4650
|
50
|
|
|
|
8491
|
if ($debug_fmt) { |
1769
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_gotit = $count_gotit\n"); |
1770
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_ambiguous = $count_ambiguous\n"); |
1771
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_nodate = $count_nodate\n"); |
1772
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_empty = $count_empty\n"); |
1773
|
|
|
|
|
|
|
} |
1774
|
|
|
|
|
|
|
|
1775
|
4650
|
|
|
|
|
7169
|
my $can_start_recording_data = 0; |
1776
|
4650
|
100
|
100
|
|
|
23340
|
$can_start_recording_data = 1 |
|
|
|
100
|
|
|
|
|
1777
|
|
|
|
|
|
|
if $count_gotit + $count_ambiguous + $count_nodate >= 1 and |
1778
|
|
|
|
|
|
|
!$count_ambiguous and !$count_empty; |
1779
|
|
|
|
|
|
|
|
1780
|
4650
|
100
|
100
|
|
|
27375
|
if ($can_start_recording_data and !$has_signaled_can_start_recording_data) { |
1781
|
31
|
|
|
|
|
60
|
$has_signaled_can_start_recording_data = 1; |
1782
|
|
|
|
|
|
|
|
1783
|
31
|
50
|
|
|
|
85
|
print($_debugh "Can start recording (all dates formats detection closed) " . |
1784
|
|
|
|
|
|
|
"after record #$record_number\n") if $_debug; |
1785
|
|
|
|
|
|
|
|
1786
|
31
|
|
|
|
|
79
|
$self->{_line_after_which_recording_can_start} = $record_number; |
1787
|
31
|
100
|
|
|
|
113
|
last unless $self->{fields_dates_auto}; |
1788
|
24
|
100
|
|
|
|
196
|
last if $self->{fields_dates_auto_optimize}; |
1789
|
|
|
|
|
|
|
} |
1790
|
|
|
|
|
|
|
} |
1791
|
|
|
|
|
|
|
|
1792
|
53
|
|
|
|
|
3318
|
close $inh; |
1793
|
|
|
|
|
|
|
|
1794
|
53
|
|
|
|
|
156
|
my %dates_detailed_status; |
1795
|
|
|
|
|
|
|
my @dates_formats; |
1796
|
53
|
|
|
|
|
109
|
my $check_empty = 0; |
1797
|
53
|
|
|
|
|
86
|
my $check_nodate = 0; |
1798
|
53
|
|
|
|
|
85
|
my $check_ambiguous = 0; |
1799
|
53
|
|
|
|
|
111
|
my $check_gotit = 0; |
1800
|
53
|
|
|
|
|
128
|
for my $n (@fields_to_detect_format) { |
1801
|
532
|
|
|
|
|
662
|
my @formats_ok; |
1802
|
532
|
|
|
|
|
650
|
my $tt = 0; |
1803
|
532
|
|
|
|
|
626
|
for my $fid (sort keys %{$records{$n}}) { |
|
532
|
|
|
|
|
4453
|
|
1804
|
8465
|
|
|
|
|
73766
|
my $rec = $records{$n}->{$fid}; |
1805
|
8465
|
100
|
100
|
|
|
99187
|
if ($rec->count_ok >= 1 and $rec->count_ko == 0) { |
1806
|
|
|
|
|
|
|
|
1807
|
298
|
|
|
|
|
9864
|
my ($fstr, $floc) = ($rec->format, $rec->locale); |
1808
|
298
|
100
|
|
|
|
6337
|
($fstr, $floc) = ( |
1809
|
|
|
|
|
|
|
$rec->format_with_addition_of_time, |
1810
|
|
|
|
|
|
|
$rec->locale_with_addition_of_time |
1811
|
|
|
|
|
|
|
) if $rec->has_found_time; |
1812
|
|
|
|
|
|
|
|
1813
|
298
|
|
|
|
|
5808
|
push @formats_ok, [$fstr, $floc]; |
1814
|
|
|
|
|
|
|
} |
1815
|
8465
|
|
|
|
|
139405
|
$tt += $rec->count_ok + $rec->count_ko; |
1816
|
|
|
|
|
|
|
} |
1817
|
532
|
|
|
|
|
5100
|
my $is_ok = 0; |
1818
|
532
|
|
|
|
|
631
|
my $format; |
1819
|
532
|
|
|
|
|
820
|
my $locale = ''; |
1820
|
532
|
100
|
100
|
|
|
1961
|
if ($#formats_ok < 0 and $tt == 0) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1821
|
16
|
|
|
|
|
29
|
$format = "Z"; |
1822
|
16
|
|
|
|
|
28
|
$check_empty++; |
1823
|
|
|
|
|
|
|
} elsif ($#formats_ok < 0) { |
1824
|
230
|
|
|
|
|
307
|
$format = "N"; |
1825
|
230
|
|
|
|
|
289
|
$check_nodate++; |
1826
|
|
|
|
|
|
|
} elsif ($#formats_ok > 0) { |
1827
|
12
|
|
|
|
|
21
|
$format = "A"; |
1828
|
12
|
|
|
|
|
19
|
$check_ambiguous++; |
1829
|
|
|
|
|
|
|
} else { |
1830
|
274
|
|
|
|
|
386
|
$is_ok = 1; |
1831
|
274
|
|
|
|
|
506
|
$format = $formats_ok[0]->[0]; |
1832
|
274
|
|
|
|
|
376
|
$locale = $formats_ok[0]->[1]; |
1833
|
274
|
|
|
|
|
384
|
$check_gotit++; |
1834
|
|
|
|
|
|
|
} |
1835
|
532
|
|
|
|
|
1111
|
my $col = $self->_col_dispname($n); |
1836
|
|
|
|
|
|
|
|
1837
|
532
|
50
|
|
|
|
1589
|
$dates_detailed_status{$col} = $format unless exists $dates_detailed_status{$col}; |
1838
|
532
|
100
|
66
|
|
|
2036
|
$dates_formats[$n] = [ $format, $locale ] if $is_ok and !defined($dates_formats[$n]); |
1839
|
|
|
|
|
|
|
} |
1840
|
53
|
|
|
|
|
185
|
$dates_detailed_status{'.'} = $self->{_line_after_which_recording_can_start}; |
1841
|
|
|
|
|
|
|
|
1842
|
53
|
50
|
66
|
|
|
479
|
if ($check_empty != $count_empty or $check_nodate != $count_nodate or |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1843
|
|
|
|
|
|
|
$check_ambiguous != $count_ambiguous or $check_gotit != $count_gotit) { |
1844
|
|
|
|
|
|
|
# The below condition can happen with an empty CSV (empty file (no header) or |
1845
|
|
|
|
|
|
|
# only a header line). |
1846
|
1
|
50
|
33
|
|
|
37
|
unless (!$count_empty and !$check_nodate and !$count_nodate and |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1847
|
|
|
|
|
|
|
!$check_ambiguous and !$count_ambiguous and !$check_gotit and !$count_gotit) { |
1848
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_empty = $check_empty\n"); |
1849
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_empty = $count_empty\n"); |
1850
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_nodate = $check_nodate\n"); |
1851
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_nodate = $count_nodate\n"); |
1852
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_ambiguous = $check_ambiguous\n"); |
1853
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_ambiguous = $count_ambiguous\n"); |
1854
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_gotit = $check_gotit\n"); |
1855
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_gotit = $count_gotit\n"); |
1856
|
0
|
|
|
|
|
0
|
confess "Oups! Check immediately this module code, man!"; |
1857
|
|
|
|
|
|
|
} |
1858
|
|
|
|
|
|
|
} |
1859
|
|
|
|
|
|
|
|
1860
|
53
|
50
|
|
|
|
173
|
if ($debug_fmt) { |
1861
|
|
|
|
|
|
|
# A very detailed debug output |
1862
|
0
|
|
|
|
|
0
|
for my $n (@fields_to_detect_format) { |
1863
|
0
|
|
|
|
|
0
|
my $col = $self->_col_dispname($n); |
1864
|
0
|
|
|
|
|
0
|
print($_debugh "$col\n"); |
1865
|
0
|
|
|
|
|
0
|
printf($_debugh " %-25s %3s %3s\n", "format", "OK", "KO"); |
1866
|
0
|
|
|
|
|
0
|
for my $fid (sort keys %{$records{$n}}) { |
|
0
|
|
|
|
|
0
|
|
1867
|
0
|
|
|
|
|
0
|
my $rec = $records{$n}->{$fid}; |
1868
|
0
|
|
|
|
|
0
|
my $cc = ''; |
1869
|
0
|
0
|
0
|
|
|
0
|
$cc = "(" . $rec->locale . ")" if defined($rec->locale) and $rec->locale ne ''; |
1870
|
0
|
|
|
|
|
0
|
printf($_debugh " %-25s %3d %3d\n", |
1871
|
|
|
|
|
|
|
$rec->format . $cc, $rec->count_ok, $rec->count_ko); |
1872
|
|
|
|
|
|
|
} |
1873
|
|
|
|
|
|
|
} |
1874
|
|
|
|
|
|
|
} |
1875
|
|
|
|
|
|
|
# Not a typo - displaying it IN ADDITION to debug output above is done on purpose... |
1876
|
53
|
50
|
|
|
|
141
|
if ($_debug) { |
1877
|
|
|
|
|
|
|
# A shorter (as compared to above) output of outcome of DateTime detection |
1878
|
0
|
|
|
|
|
0
|
print($_debugh "Result of DateTime detection:\n"); |
1879
|
0
|
|
|
|
|
0
|
printf($_debugh "%-3s %-25s %-30s %s\n", '###', 'FIELD', 'DATETIME FORMAT', |
1880
|
|
|
|
|
|
|
'DATETIME LOCALE'); |
1881
|
0
|
|
|
|
|
0
|
for my $n (@fields_to_detect_format) { |
1882
|
0
|
|
|
|
|
0
|
my ($fmt, $loc) = ('<undef>', '<undef>'); |
1883
|
0
|
0
|
|
|
|
0
|
if (defined($dates_formats[$n])) { |
1884
|
0
|
|
|
|
|
0
|
($fmt, $loc) = @{$dates_formats[$n]}[0, 1]; |
|
0
|
|
|
|
|
0
|
|
1885
|
|
|
|
|
|
|
} |
1886
|
0
|
|
|
|
|
0
|
printf($_debugh "%03d %-25s %-30s %s\n", $n, $self->_col_dispname($n), $fmt, $loc); |
1887
|
|
|
|
|
|
|
} |
1888
|
|
|
|
|
|
|
} |
1889
|
|
|
|
|
|
|
|
1890
|
53
|
100
|
|
|
|
177
|
if (!$self->{fields_dates_auto}) { |
1891
|
10
|
|
|
|
|
21
|
my $e = 0; |
1892
|
10
|
|
|
|
|
25
|
for my $n (@fields_to_detect_format) { |
1893
|
20
|
100
|
|
|
|
50
|
next if defined($dates_formats[$n]); |
1894
|
7
|
|
|
|
|
19
|
$self->_print_error("unable to detect DateTime format of field '" . |
1895
|
|
|
|
|
|
|
$self->_col_dispname($n) . "'", 1); |
1896
|
7
|
|
|
|
|
226
|
$e++; |
1897
|
|
|
|
|
|
|
} |
1898
|
10
|
100
|
|
|
|
38
|
$self->_print_error("$e field(s) encountered with unknown DateTime format") if $e; |
1899
|
|
|
|
|
|
|
} |
1900
|
|
|
|
|
|
|
|
1901
|
51
|
|
|
|
|
629
|
$self->{_dates_detailed_status} = { %dates_detailed_status }; |
1902
|
51
|
|
|
|
|
71913
|
$self->{_dates_formats} = [ @dates_formats ]; |
1903
|
|
|
|
|
|
|
} |
1904
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
sub _debug_output_fmt { |
1906
|
0
|
|
|
0
|
|
0
|
my ($self, $prefix, $fmt, $rec) = @_; |
1907
|
|
|
|
|
|
|
|
1908
|
0
|
|
|
|
|
0
|
my $_debugh = $self->{_debugh}; |
1909
|
|
|
|
|
|
|
|
1910
|
0
|
|
|
|
|
0
|
my ($fstr, $floc) = ($fmt->format, $fmt->locale); |
1911
|
0
|
0
|
0
|
|
|
0
|
($fstr, $floc) = ( |
1912
|
|
|
|
|
|
|
'<+T>' . $rec->format_with_addition_of_time, |
1913
|
|
|
|
|
|
|
$rec->locale_with_addition_of_time |
1914
|
|
|
|
|
|
|
) if defined($rec) and $rec->has_found_time; |
1915
|
|
|
|
|
|
|
|
1916
|
0
|
|
|
|
|
0
|
my $locstr = ''; |
1917
|
0
|
0
|
0
|
|
|
0
|
$locstr = "(" . $floc . ")" if defined($floc) and $floc ne ''; |
1918
|
|
|
|
|
|
|
|
1919
|
0
|
0
|
|
|
|
0
|
my $tmpok = $rec->count_ok if defined($rec); |
1920
|
0
|
0
|
|
|
|
0
|
$tmpok = '<undef>' unless defined($tmpok); |
1921
|
0
|
0
|
|
|
|
0
|
my $tmpko = $rec->count_ko if defined($rec); |
1922
|
0
|
0
|
|
|
|
0
|
$tmpko = '<undef>' unless defined($tmpko); |
1923
|
|
|
|
|
|
|
|
1924
|
0
|
|
|
|
|
0
|
print($_debugh "$prefix (format '$fstr$locstr': OK = $tmpok, KO = $tmpko)\n"); |
1925
|
|
|
|
|
|
|
} |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
# When no parse can be done (parser to test is undef), return 1 |
1928
|
|
|
|
|
|
|
sub _try_parser { |
1929
|
19561
|
|
|
19561
|
|
36197
|
my ($fmt, $rec, $value_to_parse) = @_; |
1930
|
|
|
|
|
|
|
|
1931
|
19561
|
|
|
|
|
251597
|
my $parser = $fmt->parser; |
1932
|
19561
|
100
|
|
|
|
329188
|
$parser = $rec->parser_with_addition_of_time if $rec->has_found_time; |
1933
|
|
|
|
|
|
|
|
1934
|
19561
|
|
|
|
|
199756
|
my $is_ok = 1; |
1935
|
19561
|
100
|
|
|
|
63674
|
$is_ok = (defined($parser->parse_datetime($value_to_parse)) ? 1 : 0) if $parser; |
|
|
100
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
|
1937
|
19561
|
100
|
|
|
|
6762982
|
unless (defined($rec->format)) { |
1938
|
8465
|
|
|
|
|
145360
|
$rec->format($fmt->format); |
1939
|
8465
|
|
|
|
|
255757
|
$rec->locale($fmt->locale); |
1940
|
|
|
|
|
|
|
} |
1941
|
|
|
|
|
|
|
|
1942
|
19561
|
|
|
|
|
268220
|
return $is_ok; |
1943
|
|
|
|
|
|
|
} |
1944
|
|
|
|
|
|
|
|
1945
|
|
|
|
|
|
|
sub _guess_time_format { |
1946
|
|
|
|
|
|
|
|
1947
|
|
|
|
|
|
|
# IMPORTANT |
1948
|
|
|
|
|
|
|
# Formats are tested in the order of the list below, and the first one that succeeds stops the |
1949
|
|
|
|
|
|
|
# tests. |
1950
|
|
|
|
|
|
|
# That makes the order of the elements important: %R would match any value that'd also match |
1951
|
|
|
|
|
|
|
# %T, that'd cause to return %R whereas %T would be possible. Same with AM/PM formats. Thus |
1952
|
|
|
|
|
|
|
# the longest patterns appear first. |
1953
|
1980
|
|
|
1980
|
|
15671
|
my @T = ( |
1954
|
|
|
|
|
|
|
'%I:%M:%S %p', |
1955
|
|
|
|
|
|
|
'%I:%M %p', |
1956
|
|
|
|
|
|
|
'%I:%M:%S%p', |
1957
|
|
|
|
|
|
|
'%I:%M%p', |
1958
|
|
|
|
|
|
|
'%T', |
1959
|
|
|
|
|
|
|
'%R' |
1960
|
|
|
|
|
|
|
); |
1961
|
|
|
|
|
|
|
|
1962
|
1980
|
|
|
|
|
4508
|
my ($self, $format, $locale, $v, $stop) = @_; |
1963
|
|
|
|
|
|
|
|
1964
|
1980
|
|
|
|
|
3199
|
my $_debugh = $self->{_debugh}; |
1965
|
1980
|
|
33
|
|
|
4426
|
my $debug_fmt = ($self->{_debug} and $DEBUG_DATETIME_FORMATS); |
1966
|
|
|
|
|
|
|
|
1967
|
1980
|
100
|
|
|
|
4052
|
return undef if $format =~ /:/; |
1968
|
|
|
|
|
|
|
|
1969
|
1976
|
|
|
|
|
2690
|
my $sep; |
1970
|
1976
|
100
|
|
|
|
3837
|
if ($format eq '') { |
1971
|
325
|
|
|
|
|
639
|
$sep = ''; |
1972
|
|
|
|
|
|
|
} else { |
1973
|
1651
|
100
|
|
|
|
9128
|
unless ((undef, $sep) = $v =~ /(^|\d([^0-9:]+))(\d{1,2}):(\d{1,2})(\D|$)/) { |
1974
|
393
|
100
|
|
|
|
852
|
if ($v =~ /\d{4}:\d{2}(\D|$)/) { |
1975
|
29
|
|
|
|
|
52
|
$sep = ''; |
1976
|
|
|
|
|
|
|
} else { |
1977
|
|
|
|
|
|
|
|
1978
|
364
|
50
|
|
|
|
644
|
print($_debugh "_guess_time_format(): separator candidate not found in '$v'\n") |
1979
|
|
|
|
|
|
|
if $debug_fmt; |
1980
|
|
|
|
|
|
|
|
1981
|
364
|
|
|
|
|
861
|
return undef; |
1982
|
|
|
|
|
|
|
} |
1983
|
|
|
|
|
|
|
} |
1984
|
|
|
|
|
|
|
} |
1985
|
1612
|
100
|
|
|
|
4326
|
$sep = '' unless defined($sep); |
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
# |
1988
|
|
|
|
|
|
|
# IMPORTANT |
1989
|
|
|
|
|
|
|
# |
1990
|
|
|
|
|
|
|
# The code below allows to successfully detect DateTime format when |
1991
|
|
|
|
|
|
|
# the first lines contain things like: |
1992
|
|
|
|
|
|
|
# Jan 20 2017 2:00AM |
1993
|
|
|
|
|
|
|
# that could lead to a separator set to ' ' while actually it should be ' '. In this case |
1994
|
|
|
|
|
|
|
# if the double-space is kept, then a later value of |
1995
|
|
|
|
|
|
|
# Jan 20 2017 10:00AM |
1996
|
|
|
|
|
|
|
# won't be parsed. |
1997
|
|
|
|
|
|
|
# |
1998
|
|
|
|
|
|
|
# See t/11-bugfix.t, BUG 5, for an explanation of why the line below. |
1999
|
|
|
|
|
|
|
# |
2000
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
# More generic code, but will also break some separators like ' ' (4 spaces) |
2002
|
|
|
|
|
|
|
# $sep = substr($sep, 0, length($sep) - 1) if length($sep) >= 2 and substr($sep, -2) eq ' '; |
2003
|
1612
|
100
|
|
|
|
3365
|
$sep = ' ' if $sep eq ' '; |
2004
|
|
|
|
|
|
|
|
2005
|
1612
|
50
|
|
|
|
2884
|
if ($debug_fmt) { |
2006
|
0
|
|
|
|
|
0
|
print($_debugh " _guess_time_format(): Searching time in '$v'\n"); |
2007
|
|
|
|
|
|
|
} |
2008
|
|
|
|
|
|
|
|
2009
|
1612
|
|
|
|
|
2649
|
for my $t (@T) { |
2010
|
9193
|
|
|
|
|
527128
|
my $increased_format = "$format$sep$t"; |
2011
|
|
|
|
|
|
|
|
2012
|
9193
|
50
|
|
|
|
19654
|
print($_debugh " _guess_time_format(): Trying format '$increased_format'\n") if $debug_fmt; |
2013
|
|
|
|
|
|
|
|
2014
|
9193
|
|
|
|
|
26155
|
my %opts = (pattern => $START . $increased_format . $stop); |
2015
|
9193
|
100
|
66
|
|
|
33050
|
$opts{locale} = $locale if defined($locale) and $locale ne ''; |
2016
|
9193
|
|
|
|
|
32325
|
my $parser_of_increased_format = DateTime::Format::Strptime->new(%opts); |
2017
|
9193
|
100
|
|
|
|
10184123
|
next unless defined($parser_of_increased_format->parse_datetime($START . $v . $stop)); |
2018
|
|
|
|
|
|
|
|
2019
|
254
|
50
|
|
|
|
151930
|
if ($debug_fmt) { |
2020
|
0
|
|
|
|
|
0
|
print($_debugh " _guess_time_format(): found time in '$v'\n"); |
2021
|
0
|
|
|
|
|
0
|
print($_debugh " Initial format: '$format'\n"); |
2022
|
0
|
|
|
|
|
0
|
print($_debugh " Increased format: '$increased_format'\n"); |
2023
|
|
|
|
|
|
|
} |
2024
|
|
|
|
|
|
|
|
2025
|
254
|
|
|
|
|
1391
|
return [$increased_format, $locale, $parser_of_increased_format]; |
2026
|
|
|
|
|
|
|
} |
2027
|
1358
|
|
|
|
|
98908
|
return undef; |
2028
|
|
|
|
|
|
|
} |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
# * ********************************* * |
2032
|
|
|
|
|
|
|
# * END OF DATE FORMAT DETECTION CODE * |
2033
|
|
|
|
|
|
|
# * ********************************* * |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
# Take the string of a header in $_ and replace it with the corresponding field name |
2037
|
|
|
|
|
|
|
sub _header_to_field_name { |
2038
|
1265
|
|
|
1265
|
|
1978
|
$_ = remove_accents($_); |
2039
|
1265
|
|
|
|
|
2436
|
s/[^[:alnum:]_]//gi; |
2040
|
1265
|
|
|
|
|
6799
|
s/^.*$/\U$&/; |
2041
|
|
|
|
|
|
|
} |
2042
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2044
|
|
|
|
|
|
|
sub _S2_init_fields_from_header { |
2045
|
355
|
|
|
355
|
|
602
|
my $self = shift; |
2046
|
|
|
|
|
|
|
|
2047
|
355
|
|
|
|
|
645
|
my $has_headers = $self->{has_headers}; |
2048
|
355
|
|
|
|
|
582
|
my $_debug = $self->{_debug}; |
2049
|
355
|
|
|
|
|
553
|
my $_debugh = $self->{_debugh}; |
2050
|
|
|
|
|
|
|
|
2051
|
355
|
|
|
|
|
828
|
my $in_file_disp = $self->get_in_file_disp(); |
2052
|
|
|
|
|
|
|
|
2053
|
355
|
|
|
|
|
641
|
my $inh = $self->{_inh}; |
2054
|
355
|
|
|
|
|
557
|
my $incsv = $self->{_in_csvobj}; |
2055
|
|
|
|
|
|
|
|
2056
|
355
|
|
|
|
|
685
|
$self->{_row_read} = 0; |
2057
|
|
|
|
|
|
|
|
2058
|
355
|
|
|
|
|
648
|
my @columns; |
2059
|
|
|
|
|
|
|
my @headers; |
2060
|
355
|
100
|
|
|
|
763
|
if ($has_headers) { |
2061
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
print($_debugh "$PKG: '$in_file_disp': will parse header line to get column names\n") |
2063
|
344
|
50
|
|
|
|
812
|
if $self->{_debug_read}; |
2064
|
|
|
|
|
|
|
|
2065
|
344
|
|
|
|
|
579
|
$self->{_row_read}++; |
2066
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
print($_debugh "$PKG: '$in_file_disp': will read line #" . $self->{_row_read} . "\n") |
2068
|
344
|
50
|
|
|
|
753
|
if $self->{_debug_read}; |
2069
|
|
|
|
|
|
|
|
2070
|
344
|
50
|
|
|
|
768
|
if (defined($self->{_inh_header})) { |
2071
|
0
|
|
|
|
|
0
|
my $l = $self->{_inh_header}; |
2072
|
0
|
|
|
|
|
0
|
my $inmemh; |
2073
|
0
|
0
|
|
|
|
0
|
if (!open ($inmemh, '<', \$l)) { |
2074
|
0
|
|
|
|
|
0
|
$self->_print_error("can't open header line in-memory. CSV read aborted."); |
2075
|
0
|
|
|
|
|
0
|
return 0; |
2076
|
|
|
|
|
|
|
} |
2077
|
0
|
|
|
|
|
0
|
@headers = @{_mygetline($incsv, $inmemh)}; |
|
0
|
|
|
|
|
0
|
|
2078
|
|
|
|
|
|
|
} else { |
2079
|
344
|
|
|
|
|
689
|
my $r = _mygetline($incsv, $inh); |
2080
|
342
|
50
|
|
|
|
16073
|
@headers = @{$r} if defined($r); |
|
342
|
|
|
|
|
1071
|
|
2081
|
|
|
|
|
|
|
} |
2082
|
|
|
|
|
|
|
print($_debugh "Line " . $self->{_row_read} . ":\n--\n" . join('::', @headers) . "\n--\n") |
2083
|
342
|
50
|
|
|
|
1114
|
if $self->{_debug_read}; |
2084
|
|
|
|
|
|
|
} |
2085
|
|
|
|
|
|
|
|
2086
|
353
|
100
|
100
|
|
|
1547
|
if ($has_headers and !defined($self->{fields_column_names})) { |
2087
|
336
|
|
|
|
|
525
|
my %indexes; |
2088
|
336
|
100
|
|
|
|
812
|
if (defined($self->{fields_hr})) { |
2089
|
12
|
50
|
|
|
|
87
|
if (!$self->_process_header(\@headers, $self->{fields_hr}, \%indexes)) { |
2090
|
0
|
|
|
|
|
0
|
$self->_print_error("missing headers. CSV read aborted."); |
2091
|
0
|
|
|
|
|
0
|
return 0; |
2092
|
|
|
|
|
|
|
} |
2093
|
12
|
50
|
|
|
|
44
|
if ($_debug) { |
2094
|
0
|
|
|
|
|
0
|
print($_debugh " \%indexes:\n"); |
2095
|
0
|
|
|
|
|
0
|
for my $k (sort keys %indexes) { |
2096
|
0
|
|
|
|
|
0
|
print($_debugh " \t$k => $indexes{$k}\n"); |
2097
|
|
|
|
|
|
|
} |
2098
|
|
|
|
|
|
|
} |
2099
|
12
|
|
|
|
|
80
|
for (sort keys %indexes) { |
2100
|
48
|
50
|
|
|
|
84
|
next if $_ eq ''; |
2101
|
48
|
|
|
|
|
90
|
$columns[$indexes{$_}] = $_; |
2102
|
|
|
|
|
|
|
} |
2103
|
|
|
|
|
|
|
} else { |
2104
|
324
|
|
|
|
|
764
|
@columns = @headers; |
2105
|
324
|
|
|
|
|
617
|
map { _header_to_field_name } @columns; |
|
1265
|
|
|
|
|
7496
|
|
2106
|
|
|
|
|
|
|
} |
2107
|
|
|
|
|
|
|
} |
2108
|
|
|
|
|
|
|
|
2109
|
353
|
100
|
|
|
|
1064
|
@columns = @{$self->{fields_column_names}} if defined($self->{fields_column_names}); |
|
14
|
|
|
|
|
41
|
|
2110
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
# Avoid undef in column names... I prefer empty strings |
2112
|
353
|
100
|
|
|
|
702
|
@columns = map { defined($_) ? $_ : '' } @columns; |
|
1389
|
|
|
|
|
3105
|
|
2113
|
|
|
|
|
|
|
|
2114
|
353
|
50
|
|
|
|
828
|
if ($_debug) { |
2115
|
0
|
|
|
|
|
0
|
print($_debugh "-- CSV headers management\n"); |
2116
|
0
|
0
|
|
|
|
0
|
if (@columns) { |
2117
|
0
|
|
|
|
|
0
|
printf($_debugh " %-3s %-40s %-40s\n", 'COL', 'CSV Header', 'Hash Key'); |
2118
|
0
|
|
|
|
|
0
|
for my $i (0..$#columns) { |
2119
|
0
|
|
|
|
|
0
|
my $h = ''; |
2120
|
0
|
0
|
|
|
|
0
|
$h = $headers[$i] if defined($headers[$i]); |
2121
|
0
|
|
|
|
|
0
|
printf($_debugh " %03d %-40s %-40s\n", $i, "'$h'", "'$columns[$i]'"); |
2122
|
|
|
|
|
|
|
} |
2123
|
|
|
|
|
|
|
} else { |
2124
|
0
|
|
|
|
|
0
|
print($_debugh " No headers\n"); |
2125
|
|
|
|
|
|
|
} |
2126
|
|
|
|
|
|
|
} |
2127
|
|
|
|
|
|
|
|
2128
|
353
|
|
|
|
|
548
|
my %regular_named_fields; |
2129
|
353
|
|
|
|
|
953
|
for my $i (0..$#columns) { |
2130
|
1389
|
100
|
66
|
|
|
5084
|
$regular_named_fields{$columns[$i]} = $i if defined($columns[$i]) and $columns[$i] ne ''; |
2131
|
|
|
|
|
|
|
} |
2132
|
353
|
|
|
|
|
1716
|
$self->{_regular_named_fields} = { %regular_named_fields }; |
2133
|
353
|
|
|
|
|
1253
|
$self->{_S2_columns} = [ @columns ]; |
2134
|
353
|
100
|
|
|
|
1245
|
$self->{_S2_headers} = [ @headers ] if $has_headers; |
2135
|
|
|
|
|
|
|
|
2136
|
353
|
|
|
|
|
1368
|
return 1; |
2137
|
|
|
|
|
|
|
} |
2138
|
|
|
|
|
|
|
|
2139
|
|
|
|
|
|
|
sub out_header { |
2140
|
8
|
|
|
8
|
1
|
11
|
my $self = shift; |
2141
|
8
|
|
|
|
|
73
|
validate_pos(@_, {type => SCALAR}, {type => SCALAR}); |
2142
|
|
|
|
|
|
|
|
2143
|
8
|
|
|
|
|
21
|
my ($field, $header) = @_; |
2144
|
8
|
100
|
|
|
|
20
|
$self->{_out_headers} = { } unless exists $self->{_out_headers}; |
2145
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
$self->_print_warning("out_header: field $field already set") |
2147
|
8
|
50
|
|
|
|
14
|
if exists $self->{_out_headers}->{$field}; |
2148
|
|
|
|
|
|
|
|
2149
|
8
|
|
|
|
|
15
|
$self->{_out_headers}->{$field} = $header; |
2150
|
|
|
|
|
|
|
|
2151
|
8
|
|
|
|
|
24
|
return $self; |
2152
|
|
|
|
|
|
|
} |
2153
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2155
|
|
|
|
|
|
|
sub _S3_init_fields_extra { |
2156
|
355
|
|
|
355
|
|
572
|
my $self = shift; |
2157
|
|
|
|
|
|
|
|
2158
|
355
|
|
|
|
|
659
|
my $_debug = $self->{_debug}; |
2159
|
355
|
|
|
|
|
579
|
my $_debugh = $self->{_debugh}; |
2160
|
|
|
|
|
|
|
|
2161
|
355
|
|
|
|
|
547
|
my $verbose = $self->{verbose}; |
2162
|
|
|
|
|
|
|
|
2163
|
355
|
|
|
|
|
532
|
my $has_headers = $self->{has_headers}; |
2164
|
|
|
|
|
|
|
|
2165
|
355
|
|
|
|
|
520
|
my %named_fields = %{$self->{_regular_named_fields}}; |
|
355
|
|
|
|
|
1597
|
|
2166
|
355
|
|
|
|
|
666
|
my @columns = @{$self->{_S2_columns}}; |
|
355
|
|
|
|
|
949
|
|
2167
|
355
|
100
|
|
|
|
826
|
my @headers = @{$self->{_S2_headers}} if $has_headers; |
|
344
|
|
|
|
|
860
|
|
2168
|
|
|
|
|
|
|
|
2169
|
355
|
|
|
|
|
514
|
my @extra_fields_indexes; |
2170
|
355
|
100
|
|
|
|
773
|
my @extra_fields_definitions_list = @{$self->{_extra_fields}} if exists $self->{_extra_fields}; |
|
36
|
|
|
|
|
72
|
|
2171
|
355
|
|
|
|
|
505
|
my %extra_fields_definitions; |
2172
|
|
|
|
|
|
|
|
2173
|
355
|
100
|
|
|
|
839
|
my @multiline = @{$self->{_multiline}} if defined($self->{_multiline}); |
|
340
|
|
|
|
|
664
|
|
2174
|
|
|
|
|
|
|
|
2175
|
355
|
|
|
|
|
487
|
my @coldata; |
2176
|
355
|
|
|
|
|
824
|
for my $i (0..$#columns) { |
2177
|
1395
|
|
|
|
|
45332
|
my $col = $columns[$i]; |
2178
|
1395
|
100
|
|
|
|
2682
|
my $h = $headers[$i] if $has_headers; |
2179
|
1395
|
100
|
|
|
|
23430
|
push @coldata, ColData->new( |
2180
|
|
|
|
|
|
|
field_name => $col, |
2181
|
|
|
|
|
|
|
header_text => $h, |
2182
|
|
|
|
|
|
|
description => '', |
2183
|
|
|
|
|
|
|
multiline => ($multiline[$i] ? 'm' : '1') |
2184
|
|
|
|
|
|
|
); |
2185
|
|
|
|
|
|
|
} |
2186
|
|
|
|
|
|
|
|
2187
|
355
|
|
|
|
|
15896
|
for my $edef (@extra_fields_definitions_list) { |
2188
|
92
|
|
|
|
|
4275
|
my $c = $edef->check_field_existence; |
2189
|
92
|
100
|
|
|
|
668
|
if (defined($c)) { |
2190
|
80
|
100
|
|
|
|
174
|
unless (exists $named_fields{$c}) { |
2191
|
9
|
|
|
|
|
121
|
$self->_print_error("unknown field '" . $edef->check_field_existence . "'", |
2192
|
|
|
|
|
|
|
0, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2193
|
8
|
|
|
|
|
23
|
next; |
2194
|
|
|
|
|
|
|
} |
2195
|
|
|
|
|
|
|
} |
2196
|
|
|
|
|
|
|
|
2197
|
83
|
|
|
|
|
155
|
my @e_eclated = $edef; |
2198
|
|
|
|
|
|
|
|
2199
|
83
|
100
|
100
|
|
|
1102
|
if ($edef->ef_type == $EF_LINK and $edef->link_remote_read eq '*') { |
2200
|
2
|
|
|
|
|
76
|
my @cols = $edef->link_remote_obj->get_fields_names(); |
2201
|
|
|
|
|
|
|
|
2202
|
2
|
|
|
|
|
3
|
@e_eclated = (); |
2203
|
2
|
|
|
|
|
9
|
my %nf = %named_fields; |
2204
|
|
|
|
|
|
|
|
2205
|
2
|
|
|
|
|
5
|
for my $c (@cols) { |
2206
|
|
|
|
|
|
|
|
2207
|
4
|
|
|
|
|
130
|
my $ex_base = $edef->self_name . $c; |
2208
|
4
|
|
|
|
|
35
|
my $ex_target = $ex_base; |
2209
|
4
|
|
|
|
|
8
|
my $i = 1; |
2210
|
4
|
|
|
|
|
15
|
while (exists $nf{$ex_target}) { |
2211
|
1
|
|
|
|
|
2
|
$i++; |
2212
|
1
|
|
|
|
|
4
|
$ex_target = $ex_base . '_' . $i; |
2213
|
|
|
|
|
|
|
} |
2214
|
|
|
|
|
|
|
|
2215
|
4
|
|
|
|
|
60
|
my $e = ExtraField->new( |
2216
|
|
|
|
|
|
|
ef_type => $EF_LINK, |
2217
|
|
|
|
|
|
|
self_name => $ex_target, |
2218
|
|
|
|
|
|
|
description => $edef->description . " ($c)", |
2219
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
link_self_search => $edef->link_self_search, |
2221
|
|
|
|
|
|
|
link_remote_obj => $edef->link_remote_obj, |
2222
|
|
|
|
|
|
|
link_remote_search => $edef->link_remote_search, |
2223
|
|
|
|
|
|
|
link_remote_read => $c, |
2224
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
link_vlookup_opts => $edef->link_vlookup_opts |
2226
|
|
|
|
|
|
|
); |
2227
|
4
|
|
|
|
|
361
|
push @e_eclated, $e; |
2228
|
4
|
|
|
|
|
14
|
$nf{$ex_target} = undef; |
2229
|
|
|
|
|
|
|
} |
2230
|
|
|
|
|
|
|
} |
2231
|
|
|
|
|
|
|
|
2232
|
83
|
|
|
|
|
1348
|
for my $e1 (@e_eclated) { |
2233
|
85
|
100
|
|
|
|
1249
|
if (exists $named_fields{$e1->self_name}) { |
2234
|
6
|
|
|
|
|
101
|
$self->_print_error("extra field: duplicate field name: '" . $e1->self_name . "'"); |
2235
|
6
|
|
|
|
|
15
|
next; |
2236
|
|
|
|
|
|
|
} |
2237
|
|
|
|
|
|
|
|
2238
|
79
|
|
|
|
|
599
|
my $index_of_new_element = $#columns + 1; |
2239
|
79
|
|
|
|
|
128
|
push @extra_fields_indexes, $index_of_new_element; |
2240
|
79
|
|
|
|
|
1017
|
$columns[$index_of_new_element] = $e1->self_name; |
2241
|
79
|
|
|
|
|
1335
|
$named_fields{$e1->self_name} = $index_of_new_element; |
2242
|
79
|
|
|
|
|
1398
|
$extra_fields_definitions{$e1->self_name} = $e1; |
2243
|
|
|
|
|
|
|
|
2244
|
79
|
50
|
|
|
|
1403
|
push @headers, $e1->self_name if $has_headers; |
2245
|
79
|
|
|
|
|
1328
|
push @coldata, ColData->new( |
2246
|
|
|
|
|
|
|
field_name => $e1->self_name, |
2247
|
|
|
|
|
|
|
header_text => $e1->self_name, |
2248
|
|
|
|
|
|
|
description => $e1->description, |
2249
|
|
|
|
|
|
|
multiline => '?' |
2250
|
|
|
|
|
|
|
); |
2251
|
|
|
|
|
|
|
} |
2252
|
|
|
|
|
|
|
|
2253
|
|
|
|
|
|
|
} |
2254
|
354
|
100
|
|
|
|
3365
|
$self->{_headers} = [ @headers ] if $has_headers; |
2255
|
354
|
|
|
|
|
792
|
$self->{_extra_fields_indexes} = [ @extra_fields_indexes ]; |
2256
|
354
|
|
|
|
|
1079
|
$self->{_columns} = [ @columns ]; |
2257
|
354
|
|
|
|
|
933
|
$self->{_extra_fields_definitions} = { %extra_fields_definitions }; |
2258
|
|
|
|
|
|
|
|
2259
|
354
|
|
|
|
|
1631
|
$self->{_named_fields} = { %named_fields }; |
2260
|
|
|
|
|
|
|
|
2261
|
354
|
|
|
|
|
1338
|
$self->_detect_dates_formats(); |
2262
|
|
|
|
|
|
|
|
2263
|
351
|
|
|
|
|
5487
|
$self->{_read_update_after_ar} = [ ]; |
2264
|
351
|
|
|
|
|
3290
|
$self->{_write_update_before_ar} = [ ]; |
2265
|
351
|
100
|
|
|
|
918
|
my @dates_formats = @{$self->{_dates_formats}} if defined($self->{_dates_formats}); |
|
60
|
|
|
|
|
253
|
|
2266
|
351
|
|
|
|
|
1019
|
for my $i (0..$#columns) { |
2267
|
1455
|
|
|
|
|
2163
|
my $dt_format; |
2268
|
|
|
|
|
|
|
my $dt_locale; |
2269
|
1455
|
100
|
|
|
|
2735
|
if (defined($dates_formats[$i])) { |
2270
|
330
|
|
|
|
|
663
|
$dt_format = $dates_formats[$i]->[0]; |
2271
|
330
|
|
|
|
|
654
|
$dt_locale = $dates_formats[$i]->[1]; |
2272
|
|
|
|
|
|
|
} |
2273
|
1455
|
|
|
|
|
23224
|
$coldata[$i]->dt_format($dt_format); |
2274
|
1455
|
|
|
|
|
26390
|
$coldata[$i]->dt_locale($dt_locale); |
2275
|
|
|
|
|
|
|
|
2276
|
1455
|
100
|
|
|
|
9824
|
next unless defined($dt_format); |
2277
|
|
|
|
|
|
|
|
2278
|
330
|
|
|
|
|
478
|
my %opts_in; |
2279
|
330
|
100
|
66
|
|
|
1369
|
$opts_in{locale} = $dt_locale if defined($dt_locale) and $dt_locale ne ''; |
2280
|
|
|
|
|
|
|
|
2281
|
330
|
|
|
|
|
1369
|
my $obj_strptime_in = DateTime::Format::Strptime->new(pattern => $dt_format, %opts_in); |
2282
|
|
|
|
|
|
|
|
2283
|
330
|
|
|
|
|
361097
|
my %opts_out; |
2284
|
330
|
50
|
|
|
|
960
|
my $loc_out = (exists $self->{out_dates_locale} ? $self->{out_dates_locale} : $dt_locale); |
2285
|
330
|
100
|
66
|
|
|
1625
|
$opts_out{locale} = $loc_out if defined($loc_out) and $loc_out ne ''; |
2286
|
|
|
|
|
|
|
my $obj_strptime_out = DateTime::Format::Strptime->new( |
2287
|
330
|
50
|
|
|
|
1326
|
pattern => (exists $self->{out_dates_format} ? $self->{out_dates_format} :$dt_format), |
2288
|
|
|
|
|
|
|
%opts_out |
2289
|
|
|
|
|
|
|
); |
2290
|
|
|
|
|
|
|
|
2291
|
330
|
|
|
|
|
339767
|
my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty}; |
2292
|
330
|
|
|
|
|
1111
|
my $in_file_disp = $self->get_in_file_disp(); |
2293
|
|
|
|
|
|
|
|
2294
|
|
|
|
|
|
|
$self->{_read_update_after_ar}->[$i] = sub { |
2295
|
352
|
100
|
66
|
352
|
|
1818
|
return undef if !defined($_) or $_ eq '' or |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
2296
|
|
|
|
|
|
|
(defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($_)); |
2297
|
|
|
|
|
|
|
|
2298
|
315
|
|
|
|
|
552
|
my $s = $_[0]; |
2299
|
315
|
|
|
|
|
660
|
my $field = _get_def($_[1], '<?>'); |
2300
|
|
|
|
|
|
|
|
2301
|
315
|
|
|
|
|
937
|
my $dt = $obj_strptime_in->parse_datetime($_); |
2302
|
|
|
|
|
|
|
|
2303
|
315
|
0
|
33
|
|
|
175487
|
if ($_debug and $DEBUG_DATETIME_FORMATS and $DEBUG_DATETIME_FORMATS_EVEN_MORE) { |
|
|
|
33
|
|
|
|
|
2304
|
0
|
0
|
|
|
|
0
|
print($_debugh "-- Record " . $s->get_recnum() . |
2305
|
|
|
|
|
|
|
", field '$field':\n String parsed: '$_'\n Parse format: '$dt_format'\n" . |
2306
|
|
|
|
|
|
|
" DateTime obj: <" . (defined($dt) ? $dt . '' : 'undef') . ">\n"); |
2307
|
|
|
|
|
|
|
} |
2308
|
|
|
|
|
|
|
|
2309
|
315
|
100
|
|
|
|
706
|
if (!defined($dt)) { |
2310
|
2
|
|
|
|
|
10
|
my $recnum = $s->get_recnum(); |
2311
|
2
|
50
|
|
|
|
7
|
if ($verbose) { |
2312
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2313
|
|
|
|
|
|
|
"$in_file_disp: record $recnum: field $field: unable to parse DateTime\n"); |
2314
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: field: '$_'\n"); |
2315
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: format: '$dt_format'\n"); |
2316
|
0
|
0
|
|
|
|
0
|
$s->_print("$PKG: " . |
2317
|
|
|
|
|
|
|
"locale: '" . ($dt_locale eq '' ? '<none>' : $dt_locale) . "'\n"); |
2318
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2319
|
|
|
|
|
|
|
"Probable cause: when detecting DateTime format, $PKG will stop reading\n"); |
2320
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2321
|
|
|
|
|
|
|
"input as soon as the format is worked out. If a value found later\n"); |
2322
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2323
|
|
|
|
|
|
|
"turns out to use another DateTime format, it'll generate a DateTime\n"); |
2324
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: parse error, as is the case now.\n"); |
2325
|
0
|
|
|
|
|
0
|
$s->_print_error("unable to parse DateTime"); |
2326
|
|
|
|
|
|
|
} else { |
2327
|
2
|
|
|
|
|
20
|
$s->_print_error("$in_file_disp: record $recnum: field $field: " . |
2328
|
|
|
|
|
|
|
"unable to parse DateTime '$_'"); |
2329
|
|
|
|
|
|
|
} |
2330
|
|
|
|
|
|
|
} |
2331
|
|
|
|
|
|
|
|
2332
|
314
|
|
|
|
|
631
|
return $dt; |
2333
|
330
|
|
|
|
|
2611
|
}; |
2334
|
|
|
|
|
|
|
$self->{_write_update_before_ar}->[$i] = sub { |
2335
|
96
|
100
|
|
96
|
|
194
|
return '' unless defined($_); |
2336
|
87
|
100
|
|
|
|
202
|
return $_ if !ref($_); |
2337
|
81
|
50
|
|
|
|
279
|
return $_ unless $_->isa('DateTime'); |
2338
|
|
|
|
|
|
|
|
2339
|
81
|
|
|
|
|
236
|
my $str = $obj_strptime_out->format_datetime($_); |
2340
|
|
|
|
|
|
|
|
2341
|
81
|
50
|
|
|
|
17709
|
if (!defined($str)) { |
2342
|
0
|
|
|
|
|
0
|
my $s = $_[0]; |
2343
|
0
|
|
|
|
|
0
|
my $recnum = $s->get_recnum(); |
2344
|
0
|
|
|
|
|
0
|
my $field = _get_def($_[1], '<?>'); |
2345
|
0
|
|
|
|
|
0
|
$s->_print_error("$in_file_disp: record $recnum: field $field: " . |
2346
|
|
|
|
|
|
|
"unable to print DateTime '$_'") |
2347
|
|
|
|
|
|
|
} |
2348
|
|
|
|
|
|
|
|
2349
|
81
|
|
|
|
|
181
|
return $str; |
2350
|
330
|
|
|
|
|
1697
|
}; |
2351
|
|
|
|
|
|
|
} |
2352
|
|
|
|
|
|
|
|
2353
|
351
|
|
|
|
|
1366
|
$self->{_coldata} = [ @coldata ]; |
2354
|
|
|
|
|
|
|
|
2355
|
351
|
|
|
|
|
1330
|
my @loop = ( |
2356
|
|
|
|
|
|
|
['_read_update_after_hr', '_read_update_after_ar', 'read post'], |
2357
|
|
|
|
|
|
|
['_write_update_before_hr', '_write_update_before_ar', 'write pre'] |
2358
|
|
|
|
|
|
|
); |
2359
|
351
|
|
|
|
|
956
|
for my $ii (0..$#loop) { |
2360
|
702
|
|
|
|
|
1076
|
my $l = $loop[$ii]; |
2361
|
|
|
|
|
|
|
|
2362
|
702
|
|
|
|
|
1263
|
my $ht = $self->{$l->[0]}; |
2363
|
702
|
|
|
|
|
933
|
my @subrefs = @{$self->{$l->[1]}}; |
|
702
|
|
|
|
|
1435
|
|
2364
|
702
|
|
|
|
|
961
|
for my $field (keys %{$ht}) { |
|
702
|
|
|
|
|
1453
|
|
2365
|
70
|
50
|
|
|
|
141
|
unless (exists $named_fields{$field}) { |
2366
|
0
|
|
|
|
|
0
|
$self->_print_error($l->[2] . ": unknown field '$field'", |
2367
|
|
|
|
|
|
|
0, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2368
|
0
|
|
|
|
|
0
|
next; |
2369
|
|
|
|
|
|
|
} |
2370
|
|
|
|
|
|
|
|
2371
|
70
|
|
|
|
|
123
|
my $i = $named_fields{$field}; |
2372
|
|
|
|
|
|
|
|
2373
|
70
|
|
|
|
|
87
|
my @allsubs; |
2374
|
70
|
|
|
|
|
82
|
push @allsubs, @{$ht->{$field}}; |
|
70
|
|
|
|
|
120
|
|
2375
|
70
|
100
|
|
|
|
151
|
if (defined($subrefs[$i])) { |
2376
|
2
|
50
|
|
|
|
6
|
unshift @allsubs, $subrefs[$i] if $ii == 0; |
2377
|
2
|
50
|
|
|
|
6
|
push @allsubs, $subrefs[$i] if $ii == 1; |
2378
|
|
|
|
|
|
|
} |
2379
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
my $finalsub = sub { |
2381
|
196
|
|
|
196
|
|
308
|
for my $s (@allsubs) { |
2382
|
262
|
|
|
|
|
778
|
$_ = $s->(@_); |
2383
|
|
|
|
|
|
|
} |
2384
|
189
|
|
|
|
|
3877
|
return $_; |
2385
|
70
|
|
|
|
|
216
|
}; |
2386
|
70
|
|
|
|
|
145
|
$subrefs[$i] = $finalsub; |
2387
|
|
|
|
|
|
|
|
2388
|
|
|
|
|
|
|
} |
2389
|
702
|
|
|
|
|
1915
|
$self->{$l->[1]} = [ @subrefs ]; |
2390
|
|
|
|
|
|
|
} |
2391
|
|
|
|
|
|
|
|
2392
|
351
|
|
|
|
|
1492
|
my $tmp = _get_def($self->{out_fields}, $self->{write_fields}); |
2393
|
351
|
100
|
|
|
|
1034
|
my @wf = @{$tmp} if defined($tmp); |
|
6
|
|
|
|
|
13
|
|
2394
|
351
|
|
|
|
|
570
|
my $count_field_not_found = 0; |
2395
|
351
|
|
|
|
|
683
|
for (@wf) { |
2396
|
16
|
100
|
66
|
|
|
90
|
next if !defined($_) or $_ eq '' or exists $named_fields{$_}; |
|
|
|
100
|
|
|
|
|
2397
|
3
|
|
|
|
|
5
|
$count_field_not_found++; |
2398
|
3
|
|
|
|
|
18
|
$self->_print_error("out_fields: unknown field '$_'", |
2399
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2400
|
|
|
|
|
|
|
} |
2401
|
351
|
100
|
|
|
|
756
|
if ($count_field_not_found) { |
2402
|
2
|
|
|
|
|
5
|
$self->_print_error("non existent field(s) encountered"); |
2403
|
1
|
|
|
|
|
5
|
delete $self->{out_fields}; |
2404
|
1
|
|
|
|
|
3
|
delete $self->{write_fields}; |
2405
|
|
|
|
|
|
|
} |
2406
|
|
|
|
|
|
|
|
2407
|
350
|
100
|
|
|
|
847
|
my %sh = %{$self->{_out_headers}} if defined($self->{_out_headers}); |
|
4
|
|
|
|
|
13
|
|
2408
|
350
|
|
|
|
|
525
|
$count_field_not_found = 0; |
2409
|
350
|
|
|
|
|
1157
|
for (keys %sh) { |
2410
|
8
|
100
|
33
|
|
|
35
|
next if !defined($_) or $_ eq '' or exists $named_fields{$_}; |
|
|
|
66
|
|
|
|
|
2411
|
2
|
|
|
|
|
3
|
$count_field_not_found++; |
2412
|
2
|
|
|
|
|
12
|
$self->_print_error("out_header: unknown field '$_'", |
2413
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2414
|
|
|
|
|
|
|
} |
2415
|
350
|
100
|
|
|
|
697
|
$self->_print_error("non existent field(s) encountered") if $count_field_not_found; |
2416
|
|
|
|
|
|
|
|
2417
|
349
|
|
|
|
|
2306
|
return 1; |
2418
|
|
|
|
|
|
|
} |
2419
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
# |
2421
|
|
|
|
|
|
|
# Return 0 if there's no more records (error or eof reached), 1 if a record got read |
2422
|
|
|
|
|
|
|
# successfully. |
2423
|
|
|
|
|
|
|
# |
2424
|
|
|
|
|
|
|
# If return value is 1: |
2425
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to array ref and hash ref of the record, respectively |
2426
|
|
|
|
|
|
|
# |
2427
|
|
|
|
|
|
|
# If return value is 0: |
2428
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to undef if an error occured |
2429
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to a scalar if eof reached |
2430
|
|
|
|
|
|
|
# |
2431
|
|
|
|
|
|
|
sub _read_one_record_from_input { |
2432
|
2341
|
|
|
2341
|
|
4158
|
my ($self, $ref_ar, $ref_row_hr) = @_; |
2433
|
|
|
|
|
|
|
|
2434
|
2341
|
|
|
|
|
3236
|
my $_debug = $self->{_debug}; |
2435
|
2341
|
|
|
|
|
2856
|
my $_debug_extra_fields = $self->{_debug_extra_fields}; |
2436
|
2341
|
|
|
|
|
3166
|
my $_debugh = $self->{_debugh}; |
2437
|
|
|
|
|
|
|
|
2438
|
2341
|
|
|
|
|
4125
|
my $in_file_disp = $self->get_in_file_disp(); |
2439
|
|
|
|
|
|
|
|
2440
|
2341
|
|
|
|
|
3603
|
my $incsv = $self->{_in_csvobj}; |
2441
|
2341
|
|
|
|
|
2762
|
my $ar; |
2442
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
print($_debugh "$PKG: '$in_file_disp': will read line #" . ($self->{_row_read} + 1) . "\n") |
2444
|
2341
|
50
|
|
|
|
4000
|
if $self->{_debug_read}; |
2445
|
|
|
|
|
|
|
|
2446
|
2341
|
100
|
|
|
|
4187
|
unless ($ar = _mygetline($incsv, $self->{_inh})) { |
2447
|
286
|
50
|
|
|
|
10010
|
if (!$incsv->eof()) { |
2448
|
0
|
|
|
|
|
0
|
my ($code, $str, $pos) = $incsv->error_diag(); |
2449
|
0
|
|
|
|
|
0
|
$self->_print_error("$code: $str, record " . $incsv->record_number . ", position $pos"); |
2450
|
0
|
|
|
|
|
0
|
$$ref_ar = undef; |
2451
|
0
|
|
|
|
|
0
|
$$ref_row_hr = undef; |
2452
|
|
|
|
|
|
|
} else { |
2453
|
286
|
|
|
|
|
1780
|
$$ref_ar = 1; |
2454
|
286
|
|
|
|
|
709
|
$$ref_row_hr = 1; |
2455
|
|
|
|
|
|
|
} |
2456
|
|
|
|
|
|
|
|
2457
|
286
|
|
|
|
|
859
|
$self->_close_inh(); |
2458
|
|
|
|
|
|
|
|
2459
|
286
|
|
|
|
|
944
|
return 0; |
2460
|
|
|
|
|
|
|
} |
2461
|
|
|
|
|
|
|
|
2462
|
2055
|
|
|
|
|
59052
|
$self->{_row_read}++; |
2463
|
|
|
|
|
|
|
|
2464
|
2055
|
|
|
|
|
2767
|
my %named_fields = %{$self->{_named_fields}}; |
|
2055
|
|
|
|
|
8734
|
|
2465
|
|
|
|
|
|
|
|
2466
|
2055
|
50
|
|
|
|
4666
|
if ($self->{_debug_read}) { |
2467
|
0
|
|
|
|
|
0
|
print($_debugh "Line " . $self->{_row_read} . ":\n--\n"); |
2468
|
0
|
|
|
|
|
0
|
for (sort keys %named_fields) { |
2469
|
0
|
|
|
|
|
0
|
my $c = _get_def($ar->[$named_fields{$_}], '<undef>'); |
2470
|
0
|
|
|
|
|
0
|
print($_debugh " $_ => '" . $c . "'\n"); |
2471
|
|
|
|
|
|
|
} |
2472
|
|
|
|
|
|
|
} |
2473
|
|
|
|
|
|
|
|
2474
|
2055
|
|
|
|
|
2927
|
my $columns_ar = $self->{_columns}; |
2475
|
|
|
|
|
|
|
|
2476
|
2055
|
|
|
|
|
2633
|
my $no_undef = $self->{no_undef}; |
2477
|
2055
|
100
|
|
|
|
3244
|
if ($no_undef) { |
2478
|
30
|
|
|
|
|
37
|
for (0..$#{$columns_ar}) { |
|
30
|
|
|
|
|
71
|
|
2479
|
324
|
100
|
|
|
|
607
|
$ar->[$_] = '' unless defined($ar->[$_]); |
2480
|
|
|
|
|
|
|
} |
2481
|
|
|
|
|
|
|
} |
2482
|
|
|
|
|
|
|
|
2483
|
2055
|
|
|
|
|
2857
|
my $row_hr = { }; |
2484
|
|
|
|
|
|
|
$row_hr->{$_} = $ar->[$self->{_regular_named_fields}->{$_}] |
2485
|
2055
|
|
|
|
|
2483
|
foreach keys %{$self->{_regular_named_fields}}; |
|
2055
|
|
|
|
|
13216
|
|
2486
|
|
|
|
|
|
|
|
2487
|
2055
|
|
|
|
|
3891
|
my $rpost = $self->{_read_update_after_ar}; |
2488
|
2055
|
|
|
|
|
2541
|
for my $i (0..$#{$columns_ar}) { |
|
2055
|
|
|
|
|
4135
|
|
2489
|
10586
|
|
|
|
|
12777
|
my $subref = $rpost->[$i]; |
2490
|
10586
|
100
|
|
|
|
17217
|
next unless defined($subref); |
2491
|
|
|
|
|
|
|
|
2492
|
479
|
|
|
|
|
594
|
do { |
2493
|
479
|
|
|
|
|
712
|
my $field = $columns_ar->[$i]; |
2494
|
479
|
|
|
|
|
809
|
local $_ = $ar->[$i]; |
2495
|
479
|
|
|
|
|
930
|
my $new_val = $subref->($self, $field); |
2496
|
474
|
|
|
|
|
878
|
$ar->[$i] = $new_val; |
2497
|
474
|
50
|
|
|
|
1684
|
$row_hr->{$field} = $new_val if defined($field); |
2498
|
|
|
|
|
|
|
} |
2499
|
|
|
|
|
|
|
|
2500
|
|
|
|
|
|
|
} |
2501
|
|
|
|
|
|
|
|
2502
|
2050
|
|
|
|
|
2627
|
for my $i (@{$self->{_extra_fields_indexes}}) { |
|
2050
|
|
|
|
|
3584
|
|
2503
|
442
|
|
|
|
|
634
|
my $name = $columns_ar->[$i]; |
2504
|
442
|
|
|
|
|
692
|
my $e = $self->{_extra_fields_definitions}->{$name}; |
2505
|
|
|
|
|
|
|
|
2506
|
442
|
50
|
|
|
|
723
|
print($_debugh "Extra field: #$i: $name\n") if $_debug_extra_fields; |
2507
|
|
|
|
|
|
|
|
2508
|
442
|
|
|
|
|
534
|
my $value; |
2509
|
442
|
100
|
|
|
|
7218
|
if ($e->ef_type == $EF_LINK) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
|
2511
|
238
|
50
|
|
|
|
1804
|
print($_debugh " linked field\n") if $_debug_extra_fields; |
2512
|
|
|
|
|
|
|
|
2513
|
238
|
|
|
|
|
3293
|
my $remobj = $e->link_remote_obj; |
2514
|
|
|
|
|
|
|
$value = $remobj->vlookup( |
2515
|
|
|
|
|
|
|
$e->link_remote_search, |
2516
|
238
|
|
|
|
|
4381
|
$ar->[$named_fields{$e->link_self_search}], |
2517
|
|
|
|
|
|
|
$e->link_remote_read, |
2518
|
|
|
|
|
|
|
$e->link_vlookup_opts |
2519
|
|
|
|
|
|
|
); |
2520
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
} elsif ($e->ef_type == $EF_FUNC) { |
2522
|
|
|
|
|
|
|
|
2523
|
51
|
50
|
|
|
|
1176
|
print($_debugh " computed field\n") if $_debug_extra_fields; |
2524
|
|
|
|
|
|
|
|
2525
|
51
|
|
|
|
|
666
|
$value = $e->func_sub->($name, $row_hr, $self->{_stats}); |
2526
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
} elsif ($e->ef_type == $EF_COPY) { |
2528
|
|
|
|
|
|
|
|
2529
|
153
|
50
|
|
|
|
5733
|
print($_debugh " copy field\n") if $_debug_extra_fields; |
2530
|
|
|
|
|
|
|
|
2531
|
153
|
|
|
|
|
1898
|
my $input = $row_hr->{$e->copy_source}; |
2532
|
153
|
50
|
33
|
|
|
1072
|
$input = '' if !defined($input) and $no_undef; |
2533
|
153
|
100
|
|
|
|
1907
|
if (defined($e->copy_sub)) { |
2534
|
57
|
|
|
|
|
395
|
local $_ = $input; |
2535
|
57
|
|
|
|
|
742
|
$value = $e->copy_sub->(); |
2536
|
|
|
|
|
|
|
} else { |
2537
|
96
|
|
|
|
|
603
|
$value = $input; |
2538
|
|
|
|
|
|
|
} |
2539
|
|
|
|
|
|
|
|
2540
|
153
|
50
|
|
|
|
970
|
print($_debugh " in: '$input', out: '$value'\n") if $_debug_extra_fields; |
2541
|
|
|
|
|
|
|
|
2542
|
|
|
|
|
|
|
} else { |
2543
|
0
|
|
|
|
|
0
|
confess "Unknown ef_type '" . $e->ef_type . "', check this module' code urgently!"; |
2544
|
|
|
|
|
|
|
} |
2545
|
|
|
|
|
|
|
|
2546
|
440
|
100
|
100
|
|
|
1688
|
$value = '' if !defined($value) and $no_undef; |
2547
|
440
|
|
|
|
|
731
|
$ar->[$i] = $value; |
2548
|
440
|
|
|
|
|
780
|
$row_hr->{$name} = $value; |
2549
|
|
|
|
|
|
|
|
2550
|
440
|
50
|
|
|
|
913
|
print($_debugh " $name => '$value'\n") if $_debug_extra_fields; |
2551
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
} |
2553
|
|
|
|
|
|
|
|
2554
|
2048
|
100
|
|
|
|
3785
|
if (defined($self->{read_post_update_hr})) { |
2555
|
33
|
|
|
|
|
79
|
$self->{read_post_update_hr}->($row_hr, $self->{_stats}, $self->get_recnum()); |
2556
|
33
|
|
|
|
|
288
|
$ar->[$named_fields{$_}] = $row_hr->{$_} foreach keys %named_fields; |
2557
|
|
|
|
|
|
|
} |
2558
|
|
|
|
|
|
|
|
2559
|
2048
|
100
|
|
|
|
5593
|
lock_keys(%$row_hr) if $self->{croak_if_error}; |
2560
|
|
|
|
|
|
|
|
2561
|
|
|
|
|
|
|
$self->{walker_ar}->($ar, $self->{_stats}, $self->get_recnum()) |
2562
|
2048
|
100
|
|
|
|
12845
|
if defined($self->{walker_ar}); |
2563
|
|
|
|
|
|
|
$self->{walker_hr}->($row_hr, $self->{_stats}, $self->get_recnum()) |
2564
|
2048
|
100
|
|
|
|
4067
|
if defined($self->{walker_hr}); |
2565
|
|
|
|
|
|
|
|
2566
|
2047
|
|
|
|
|
3211
|
$$ref_ar = $ar; |
2567
|
2047
|
|
|
|
|
4113
|
$$ref_row_hr = $row_hr; |
2568
|
|
|
|
|
|
|
|
2569
|
2047
|
|
|
|
|
5682
|
return 1; |
2570
|
|
|
|
|
|
|
} |
2571
|
|
|
|
|
|
|
|
2572
|
|
|
|
|
|
|
sub _open_read { |
2573
|
297
|
|
|
297
|
|
468
|
my $self = shift; |
2574
|
|
|
|
|
|
|
|
2575
|
297
|
|
|
|
|
597
|
my $verbose = $self->{verbose}; |
2576
|
297
|
|
|
|
|
696
|
my $in_file_disp = $self->get_in_file_disp(); |
2577
|
|
|
|
|
|
|
|
2578
|
297
|
|
|
|
|
642
|
$self->{_stats} = { }; |
2579
|
297
|
|
|
|
|
574
|
$self->{_read_in_progress} = 1; |
2580
|
|
|
|
|
|
|
|
2581
|
297
|
50
|
|
|
|
692
|
$self->_print("-- $in_file_disp reading start\n") if $verbose; |
2582
|
|
|
|
|
|
|
} |
2583
|
|
|
|
|
|
|
|
2584
|
|
|
|
|
|
|
sub _close_read { |
2585
|
316
|
|
|
316
|
|
483
|
my $self = shift; |
2586
|
316
|
|
|
|
|
491
|
my $keep_quiet = shift; |
2587
|
|
|
|
|
|
|
|
2588
|
316
|
|
|
|
|
567
|
my $verbose = $self->{verbose}; |
2589
|
316
|
|
|
|
|
642
|
my $in_file_disp = $self->get_in_file_disp(); |
2590
|
|
|
|
|
|
|
|
2591
|
316
|
|
|
|
|
570
|
$self->{_read_in_progress} = 0; |
2592
|
|
|
|
|
|
|
|
2593
|
316
|
50
|
33
|
|
|
778
|
if ($verbose and !$keep_quiet) { |
2594
|
0
|
|
|
|
|
0
|
$self->_print("-- $in_file_disp reading end: " . $self->{_row_read} . " row(s) read\n"); |
2595
|
0
|
|
|
|
|
0
|
for my $k (sort keys %{$self->{_stats}}) { |
|
0
|
|
|
|
|
0
|
|
2596
|
0
|
|
|
|
|
0
|
$self->_printf(" %7d %s\n", $self->{_stats}->{$k}, $k); |
2597
|
|
|
|
|
|
|
} |
2598
|
|
|
|
|
|
|
} |
2599
|
|
|
|
|
|
|
|
2600
|
316
|
|
|
|
|
609
|
$self->{_nb_rows} = $self->{_row_read}; |
2601
|
|
|
|
|
|
|
} |
2602
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2604
|
|
|
|
|
|
|
sub _S4_read_all_in_mem { |
2605
|
167
|
|
|
167
|
|
320
|
my $self = shift; |
2606
|
|
|
|
|
|
|
|
2607
|
167
|
|
|
|
|
495
|
$self->_register_pass("_S4_read_all_in_mem()"); |
2608
|
|
|
|
|
|
|
|
2609
|
167
|
|
|
|
|
524
|
$self->_open_read(); |
2610
|
|
|
|
|
|
|
|
2611
|
167
|
|
|
|
|
311
|
my $ar; |
2612
|
|
|
|
|
|
|
my $row_hr; |
2613
|
167
|
|
|
|
|
547
|
while ($self->_read_one_record_from_input(\$ar, \$row_hr)) { |
2614
|
|
|
|
|
|
|
|
2615
|
1553
|
|
|
|
|
2045
|
push @{$self->{_flat}}, $ar; |
|
1553
|
|
|
|
|
4063
|
|
2616
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
} |
2618
|
|
|
|
|
|
|
|
2619
|
165
|
50
|
|
|
|
473
|
my $retcode = (defined($ar) ? 1 : 0); |
2620
|
165
|
|
|
|
|
515
|
$self->_update_in_mem_record_count(); |
2621
|
|
|
|
|
|
|
|
2622
|
165
|
|
|
|
|
557
|
$self->_close_read(); |
2623
|
|
|
|
|
|
|
|
2624
|
165
|
|
|
|
|
415
|
return $retcode; |
2625
|
|
|
|
|
|
|
} |
2626
|
|
|
|
|
|
|
|
2627
|
|
|
|
|
|
|
sub _chain_array { |
2628
|
24
|
|
|
24
|
|
146
|
return split(/\s*->\s*/, $_[0]); |
2629
|
|
|
|
|
|
|
} |
2630
|
|
|
|
|
|
|
|
2631
|
|
|
|
|
|
|
sub _chain_str { |
2632
|
2
|
|
|
2
|
|
7
|
return join('->', @_); |
2633
|
|
|
|
|
|
|
} |
2634
|
|
|
|
|
|
|
|
2635
|
|
|
|
|
|
|
sub field_add_link { |
2636
|
25
|
|
|
25
|
1
|
4309
|
my $self = shift; |
2637
|
|
|
|
|
|
|
|
2638
|
25
|
|
|
|
|
852
|
validate_pos(@_, {type => UNDEF | SCALAR}, {type => SCALAR}, {type => SCALAR | OBJECT}, |
2639
|
|
|
|
|
|
|
{type => HASHREF, optional => 1}); |
2640
|
|
|
|
|
|
|
|
2641
|
22
|
|
|
|
|
92
|
my ($new_field, $chain, $obj, $param_opts) = @_; |
2642
|
|
|
|
|
|
|
|
2643
|
22
|
|
|
|
|
44
|
my $croak_if_error = $self->{croak_if_error}; |
2644
|
22
|
|
|
|
|
39
|
my $_debug = $self->{_debug}; |
2645
|
22
|
|
|
|
|
35
|
my $_debugh = $self->{_debugh}; |
2646
|
|
|
|
|
|
|
|
2647
|
22
|
|
|
|
|
57
|
my @c = _chain_array($chain); |
2648
|
22
|
100
|
|
|
|
60
|
$new_field = $c[2] unless defined($new_field); |
2649
|
|
|
|
|
|
|
|
2650
|
22
|
50
|
|
|
|
51
|
print($_debugh "Registering new linked field, new_field = '$new_field', chain = '$chain'\n") |
2651
|
|
|
|
|
|
|
if $_debug; |
2652
|
|
|
|
|
|
|
|
2653
|
22
|
100
|
66
|
|
|
104
|
unless (@c == 3 and $c[2] ne '') { |
2654
|
1
|
|
|
|
|
8
|
$self->_print_error("wrong links chain parameter: '$chain', " . |
2655
|
|
|
|
|
|
|
"look for CHAIN in Text::AutoCSV manual for help"); |
2656
|
1
|
|
|
|
|
12
|
return undef; |
2657
|
|
|
|
|
|
|
} |
2658
|
|
|
|
|
|
|
|
2659
|
21
|
50
|
|
|
|
59
|
return undef unless $self->_status_forward('S2'); |
2660
|
21
|
50
|
|
|
|
54
|
return undef unless $self->_status_backward('S2'); |
2661
|
|
|
|
|
|
|
|
2662
|
21
|
100
|
|
|
|
52
|
my @tmp = %{$param_opts} if $param_opts; |
|
13
|
|
|
|
|
37
|
|
2663
|
21
|
|
|
|
|
513
|
my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS); |
2664
|
|
|
|
|
|
|
|
2665
|
20
|
|
|
|
|
75
|
my $target_name = ''; |
2666
|
20
|
100
|
|
|
|
55
|
if (ref $obj eq '') { |
2667
|
19
|
|
|
|
|
30
|
my $in_file = $obj; |
2668
|
19
|
|
|
|
|
31
|
$target_name = $in_file; |
2669
|
|
|
|
|
|
|
|
2670
|
|
|
|
|
|
|
# |
2671
|
|
|
|
|
|
|
# TODO (?) |
2672
|
|
|
|
|
|
|
# |
2673
|
|
|
|
|
|
|
# Take into account the fact that the OS' file system is case insensitive. At the |
2674
|
|
|
|
|
|
|
# moment, two different strings (even if identical in a case insensitive comparison) |
2675
|
|
|
|
|
|
|
# will be managed as being distinct. |
2676
|
|
|
|
|
|
|
# I put a question mark in this TO DO - after all, the user of this module had better |
2677
|
|
|
|
|
|
|
# use same case when dealing with multiple links of the same file. |
2678
|
|
|
|
|
|
|
# |
2679
|
|
|
|
|
|
|
# Also, tuning this module' behavior depending on the OS' characteristics would be not |
2680
|
|
|
|
|
|
|
# ideal, it'd add a level of complexity to understand how it works and what to expect. |
2681
|
|
|
|
|
|
|
# |
2682
|
19
|
100
|
100
|
|
|
66
|
if (exists $self->{_obj} and exists $self->{_obj}->{$in_file}) { |
2683
|
|
|
|
|
|
|
|
2684
|
4
|
50
|
|
|
|
110
|
print( |
2685
|
|
|
|
|
|
|
$_debugh |
2686
|
|
|
|
|
|
|
"field_add_link: file '$in_file': re-using existing Text::AutoCSV object\n" |
2687
|
|
|
|
|
|
|
) if $_debug; |
2688
|
|
|
|
|
|
|
|
2689
|
4
|
|
|
|
|
15
|
$obj = $self->{_obj}->{$in_file}; |
2690
|
|
|
|
|
|
|
} else { |
2691
|
|
|
|
|
|
|
|
2692
|
15
|
50
|
|
|
|
36
|
print($_debugh "field_add_link: file '$in_file': creating new Text::AutoCSV object\n") |
2693
|
|
|
|
|
|
|
if $_debug; |
2694
|
|
|
|
|
|
|
|
2695
|
15
|
100
|
|
|
|
58
|
$self->{_obj} = { } unless exists $self->{_obj}; |
2696
|
|
|
|
|
|
|
|
2697
|
|
|
|
|
|
|
# |
2698
|
|
|
|
|
|
|
# The created Text::AutoCSV must be created with the same search options as what is |
2699
|
|
|
|
|
|
|
# currently found in $self. |
2700
|
|
|
|
|
|
|
# |
2701
|
|
|
|
|
|
|
# Why? |
2702
|
|
|
|
|
|
|
# Because the link is populated doing a vlookup on the remote object ($obj below), |
2703
|
|
|
|
|
|
|
# not on $self. Therefore, if we don't "propagate" search options from $self to |
2704
|
|
|
|
|
|
|
# $obj, search tunnings won't work as user would expect. |
2705
|
|
|
|
|
|
|
# |
2706
|
15
|
|
|
|
|
24
|
my %search_opts; |
2707
|
15
|
|
|
|
|
40
|
for (qw(search_case search_trim search_ignore_empty search_ignore_accents |
2708
|
|
|
|
|
|
|
search_value_if_not_found search_value_if_ambiguous search_ignore_ambiguous)) { |
2709
|
|
|
|
|
|
|
# We assign depending on whether or not the attribute EXISTS - the definedness |
2710
|
|
|
|
|
|
|
# is not appropriate, in case an attribute would have been assigned to undef. |
2711
|
105
|
100
|
|
|
|
189
|
$search_opts{$_} = $self->{$_} if exists $self->{$_}; |
2712
|
|
|
|
|
|
|
} |
2713
|
|
|
|
|
|
|
|
2714
|
|
|
|
|
|
|
$obj = Text::AutoCSV->new( |
2715
|
|
|
|
|
|
|
in_file => $in_file, |
2716
|
|
|
|
|
|
|
verbose => $self->{verbose}, |
2717
|
|
|
|
|
|
|
infoh => $self->{infoh}, |
2718
|
|
|
|
|
|
|
_debug => $self->{debug}, |
2719
|
|
|
|
|
|
|
_debugh => $self->{debugh}, |
2720
|
15
|
|
|
|
|
103
|
%search_opts |
2721
|
|
|
|
|
|
|
); |
2722
|
15
|
|
|
|
|
61
|
$self->{_obj}->{$in_file} = $obj; |
2723
|
|
|
|
|
|
|
} |
2724
|
|
|
|
|
|
|
} else { |
2725
|
1
|
|
|
|
|
2
|
$target_name = '(object)'; |
2726
|
1
|
50
|
|
|
|
4
|
print($_debugh "field_add_link: Text::AutoCSV object provided\n") if $_debug; |
2727
|
|
|
|
|
|
|
} |
2728
|
|
|
|
|
|
|
|
2729
|
20
|
100
|
|
|
|
64
|
$self->{_extra_fields} = [ ] unless exists $self->{_extra_fields}; |
2730
|
|
|
|
|
|
|
|
2731
|
20
|
|
|
|
|
35
|
push @{$self->{_extra_fields}}, ExtraField->new( |
|
20
|
|
|
|
|
516
|
|
2732
|
|
|
|
|
|
|
ef_type => $EF_LINK, |
2733
|
|
|
|
|
|
|
self_name => $new_field, |
2734
|
|
|
|
|
|
|
description => "link: $target_name, chain: $chain", |
2735
|
|
|
|
|
|
|
check_field_existence => $c[0], |
2736
|
|
|
|
|
|
|
|
2737
|
|
|
|
|
|
|
link_self_search => $c[0], |
2738
|
|
|
|
|
|
|
link_remote_obj => $obj, |
2739
|
|
|
|
|
|
|
link_remote_search => $c[1], |
2740
|
|
|
|
|
|
|
link_remote_read => $c[2], |
2741
|
|
|
|
|
|
|
|
2742
|
|
|
|
|
|
|
link_vlookup_opts => \%opts |
2743
|
|
|
|
|
|
|
); |
2744
|
|
|
|
|
|
|
|
2745
|
20
|
|
|
|
|
2417
|
return $self; |
2746
|
|
|
|
|
|
|
} |
2747
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
sub links { |
2749
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
2750
|
|
|
|
|
|
|
|
2751
|
2
|
|
|
|
|
31
|
validate_pos(@_, {type => UNDEF | SCALAR}, {type => SCALAR}, {type => SCALAR | OBJECT}, |
2752
|
|
|
|
|
|
|
{type => HASHREF, optional => 1}); |
2753
|
|
|
|
|
|
|
|
2754
|
2
|
|
|
|
|
7
|
my $prefix_field = shift; |
2755
|
2
|
|
|
|
|
4
|
my $chain = shift; |
2756
|
2
|
|
|
|
|
5
|
my ($obj, $param_opts) = @_; |
2757
|
|
|
|
|
|
|
|
2758
|
2
|
|
|
|
|
5
|
my @c = _chain_array($chain); |
2759
|
|
|
|
|
|
|
|
2760
|
2
|
50
|
33
|
|
|
17
|
if (@c != 2 or $c[0] eq '' or $c[1] eq '') { |
|
|
|
33
|
|
|
|
|
2761
|
0
|
|
|
|
|
0
|
$self->_print_error("wrong links chain parameter: '$chain', " . |
2762
|
|
|
|
|
|
|
"look for JOINCHAIN in Text::AutoCSV manual for help"); |
2763
|
0
|
|
|
|
|
0
|
return undef; |
2764
|
|
|
|
|
|
|
} |
2765
|
|
|
|
|
|
|
|
2766
|
2
|
100
|
|
|
|
6
|
$prefix_field = '' unless defined($prefix_field); |
2767
|
2
|
|
|
|
|
6
|
my $chain2 = _chain_str(@c, '*'); |
2768
|
|
|
|
|
|
|
|
2769
|
2
|
|
|
|
|
8
|
return $self->field_add_link($prefix_field, $chain2, @_); |
2770
|
|
|
|
|
|
|
} |
2771
|
|
|
|
|
|
|
|
2772
|
|
|
|
|
|
|
sub field_add_computed { |
2773
|
7
|
|
|
7
|
1
|
2550
|
my $self = shift; |
2774
|
|
|
|
|
|
|
|
2775
|
7
|
|
|
|
|
217
|
validate_pos(@_, {type => SCALAR}, {type => CODEREF}); |
2776
|
6
|
|
|
|
|
26
|
my ($new_field, $func) = @_; |
2777
|
|
|
|
|
|
|
|
2778
|
6
|
|
|
|
|
15
|
my $croak_if_error = $self->{croak_if_error}; |
2779
|
|
|
|
|
|
|
|
2780
|
6
|
|
|
|
|
12
|
my $_debug = $self->{_debug}; |
2781
|
6
|
|
|
|
|
12
|
my $_debugh = $self->{_debugh}; |
2782
|
|
|
|
|
|
|
|
2783
|
6
|
50
|
|
|
|
20
|
print($_debugh "Registering new computed field, new_field = '$new_field'\n") if $_debug; |
2784
|
|
|
|
|
|
|
|
2785
|
6
|
100
|
|
|
|
19
|
return undef unless $self->_status_forward('S2'); |
2786
|
5
|
50
|
|
|
|
13
|
return undef unless $self->_status_backward('S2'); |
2787
|
|
|
|
|
|
|
|
2788
|
5
|
|
|
|
|
10
|
push @{$self->{_extra_fields}}, ExtraField->new( |
|
5
|
|
|
|
|
108
|
|
2789
|
|
|
|
|
|
|
ef_type => $EF_FUNC, |
2790
|
|
|
|
|
|
|
self_name => $new_field, |
2791
|
|
|
|
|
|
|
description => "computed", |
2792
|
|
|
|
|
|
|
|
2793
|
|
|
|
|
|
|
func_sub => $func |
2794
|
|
|
|
|
|
|
); |
2795
|
|
|
|
|
|
|
|
2796
|
5
|
|
|
|
|
464
|
return $self; |
2797
|
|
|
|
|
|
|
} |
2798
|
|
|
|
|
|
|
|
2799
|
|
|
|
|
|
|
sub field_add_copy { |
2800
|
14
|
|
|
14
|
1
|
8764
|
my $self = shift; |
2801
|
|
|
|
|
|
|
|
2802
|
14
|
|
|
|
|
213
|
validate_pos(@_, {type => SCALAR}, {type => SCALAR}, {type => CODEREF, optional => 1}); |
2803
|
14
|
|
|
|
|
62
|
my ($new_field, $copy_source, $func) = @_; |
2804
|
|
|
|
|
|
|
|
2805
|
14
|
|
|
|
|
32
|
my $croak_if_error = $self->{croak_if_error}; |
2806
|
|
|
|
|
|
|
|
2807
|
14
|
|
|
|
|
26
|
my $_debug = $self->{_debug}; |
2808
|
14
|
|
|
|
|
25
|
my $_debugh = $self->{_debugh}; |
2809
|
|
|
|
|
|
|
|
2810
|
14
|
50
|
|
|
|
36
|
print($_debugh "Registering field copy, new_field = '$new_field' copied from '$copy_source'\n") |
2811
|
|
|
|
|
|
|
if $_debug; |
2812
|
|
|
|
|
|
|
|
2813
|
14
|
100
|
|
|
|
34
|
return undef unless $self->_status_forward('S2'); |
2814
|
12
|
50
|
|
|
|
35
|
return undef unless $self->_status_backward('S2'); |
2815
|
|
|
|
|
|
|
|
2816
|
12
|
100
|
|
|
|
22
|
push @{$self->{_extra_fields}}, ExtraField->new( |
|
12
|
|
|
|
|
305
|
|
2817
|
|
|
|
|
|
|
ef_type => $EF_COPY, |
2818
|
|
|
|
|
|
|
self_name => $new_field, |
2819
|
|
|
|
|
|
|
description => "copy of $copy_source " . (defined($func) ? '(with sub)' : '(no sub)'), |
2820
|
|
|
|
|
|
|
check_field_existence => $copy_source, |
2821
|
|
|
|
|
|
|
|
2822
|
|
|
|
|
|
|
copy_source => $copy_source, |
2823
|
|
|
|
|
|
|
copy_sub => $func |
2824
|
|
|
|
|
|
|
); |
2825
|
|
|
|
|
|
|
|
2826
|
12
|
|
|
|
|
1147
|
return $self; |
2827
|
|
|
|
|
|
|
} |
2828
|
|
|
|
|
|
|
|
2829
|
|
|
|
|
|
|
sub in_map { |
2830
|
15
|
|
|
15
|
1
|
1062
|
my $self = shift; |
2831
|
|
|
|
|
|
|
|
2832
|
15
|
|
|
|
|
39
|
return $self->read_update_after(@_); |
2833
|
|
|
|
|
|
|
} |
2834
|
|
|
|
|
|
|
|
2835
|
|
|
|
|
|
|
sub read_update_after { |
2836
|
16
|
|
|
16
|
1
|
46
|
my $self = shift; |
2837
|
16
|
|
|
|
|
179
|
validate_pos(@_, {type => SCALAR}, {type => CODEREF}); |
2838
|
|
|
|
|
|
|
|
2839
|
16
|
|
|
|
|
55
|
my ($field, $subref) = @_; |
2840
|
|
|
|
|
|
|
|
2841
|
16
|
|
|
|
|
32
|
my $_debug = $self->{_debug}; |
2842
|
16
|
|
|
|
|
24
|
my $_debugh = $self->{_debugh}; |
2843
|
|
|
|
|
|
|
|
2844
|
16
|
50
|
|
|
|
39
|
return undef unless $self->_status_forward('S2'); |
2845
|
16
|
50
|
|
|
|
40
|
return undef unless $self->_status_backward('S2'); |
2846
|
|
|
|
|
|
|
|
2847
|
16
|
50
|
|
|
|
37
|
print($_debugh "Registering read_post_update subref for field '$field'\n") if $_debug; |
2848
|
|
|
|
|
|
|
|
2849
|
|
|
|
|
|
|
$self->{_read_update_after_hr}->{$field} = [ ] |
2850
|
16
|
100
|
|
|
|
53
|
unless defined($self->{_read_update_after_hr}->{$field}); |
2851
|
|
|
|
|
|
|
|
2852
|
16
|
|
|
|
|
21
|
push @{$self->{_read_update_after_hr}->{$field}}, $subref; |
|
16
|
|
|
|
|
44
|
|
2853
|
|
|
|
|
|
|
|
2854
|
16
|
|
|
|
|
188
|
return $self; |
2855
|
|
|
|
|
|
|
} |
2856
|
|
|
|
|
|
|
|
2857
|
|
|
|
|
|
|
sub out_map { |
2858
|
11
|
|
|
11
|
1
|
24
|
my $self = shift; |
2859
|
|
|
|
|
|
|
|
2860
|
11
|
|
|
|
|
31
|
return $self->write_update_before(@_); |
2861
|
|
|
|
|
|
|
} |
2862
|
|
|
|
|
|
|
|
2863
|
|
|
|
|
|
|
sub write_update_before { |
2864
|
12
|
|
|
12
|
1
|
21
|
my $self = shift; |
2865
|
12
|
|
|
|
|
147
|
validate_pos(@_, {type => SCALAR}, {type => CODEREF}); |
2866
|
|
|
|
|
|
|
|
2867
|
12
|
|
|
|
|
68
|
my ($field, $subref) = @_; |
2868
|
|
|
|
|
|
|
|
2869
|
12
|
|
|
|
|
26
|
my $_debug = $self->{_debug}; |
2870
|
12
|
|
|
|
|
23
|
my $_debugh = $self->{_debugh}; |
2871
|
|
|
|
|
|
|
|
2872
|
12
|
50
|
|
|
|
35
|
return undef unless $self->_status_forward('S2'); |
2873
|
12
|
50
|
|
|
|
34
|
return undef unless $self->_status_backward('S2'); |
2874
|
|
|
|
|
|
|
|
2875
|
12
|
50
|
|
|
|
36
|
print($_debugh "Registering write_pre_update subref for field '$field'\n") if $_debug; |
2876
|
|
|
|
|
|
|
|
2877
|
|
|
|
|
|
|
$self->{_write_update_before_hr}->{$field} = [ ] |
2878
|
12
|
100
|
|
|
|
42
|
unless defined($self->{_write_update_before_hr}->{$field}); |
2879
|
|
|
|
|
|
|
|
2880
|
12
|
|
|
|
|
24
|
push @{$self->{_write_update_before_hr}->{$field}}, $subref; |
|
12
|
|
|
|
|
31
|
|
2881
|
|
|
|
|
|
|
|
2882
|
12
|
|
|
|
|
98
|
return $self; |
2883
|
|
|
|
|
|
|
} |
2884
|
|
|
|
|
|
|
|
2885
|
|
|
|
|
|
|
sub reset_next_record_hr { |
2886
|
189
|
|
|
189
|
1
|
317
|
my $self = shift; |
2887
|
|
|
|
|
|
|
|
2888
|
189
|
|
|
|
|
869
|
validate_pos(@_); |
2889
|
|
|
|
|
|
|
|
2890
|
189
|
|
|
|
|
507
|
$self->{_current_record} = undef; |
2891
|
|
|
|
|
|
|
|
2892
|
189
|
|
|
|
|
313
|
return $self; |
2893
|
|
|
|
|
|
|
} |
2894
|
|
|
|
|
|
|
|
2895
|
|
|
|
|
|
|
sub _create_internal_column_name_from_its_number { |
2896
|
1120
|
|
|
1120
|
|
2658
|
return sprintf("__%04i__", $_[0]); |
2897
|
|
|
|
|
|
|
} |
2898
|
|
|
|
|
|
|
|
2899
|
|
|
|
|
|
|
sub _ar_to_hr { |
2900
|
2079
|
|
|
2079
|
|
2565
|
my $self = shift; |
2901
|
|
|
|
|
|
|
|
2902
|
2079
|
|
|
|
|
14622
|
validate_pos(@_, {type => ARRAYREF}); |
2903
|
|
|
|
|
|
|
|
2904
|
2079
|
|
|
|
|
5186
|
my ($ar) = @_; |
2905
|
2079
|
|
|
|
|
2603
|
my $last_elem_index = scalar(@{$ar}) - 1; |
|
2079
|
|
|
|
|
3428
|
|
2906
|
|
|
|
|
|
|
|
2907
|
2079
|
|
|
|
|
2998
|
my $nr = $self->{_named_fields}; |
2908
|
2079
|
|
|
|
|
2499
|
my %h; |
2909
|
|
|
|
|
|
|
my %n_seen; |
2910
|
2079
|
|
|
|
|
2286
|
for (keys %{$nr}) { |
|
2079
|
|
|
|
|
5571
|
|
2911
|
9934
|
|
|
|
|
18379
|
$h{$_} = $ar->[$nr->{$_}]; |
2912
|
9934
|
|
|
|
|
16214
|
undef $n_seen{$nr->{$_}}; |
2913
|
|
|
|
|
|
|
} |
2914
|
2079
|
|
|
|
|
4437
|
for my $i (0..$last_elem_index) { |
2915
|
11001
|
100
|
|
|
|
18595
|
if (!exists($n_seen{$i})) { |
2916
|
1120
|
|
|
|
|
1509
|
my $k = _create_internal_column_name_from_its_number($i); |
2917
|
1120
|
50
|
|
|
|
3161
|
$h{$k} = $ar->[$i] if !exists $h{$k}; |
2918
|
|
|
|
|
|
|
} |
2919
|
|
|
|
|
|
|
} |
2920
|
|
|
|
|
|
|
|
2921
|
2079
|
100
|
|
|
|
5685
|
lock_keys(%h) if $self->{croak_if_error}; |
2922
|
|
|
|
|
|
|
|
2923
|
2079
|
|
|
|
|
16022
|
return \%h; |
2924
|
|
|
|
|
|
|
} |
2925
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
sub get_next_record_hr { |
2927
|
2001
|
|
|
2001
|
1
|
2852
|
my $self = shift; |
2928
|
|
|
|
|
|
|
|
2929
|
2001
|
|
|
|
|
13072
|
validate_pos(@_, {type => SCALARREF, optional => 1}); |
2930
|
|
|
|
|
|
|
|
2931
|
2001
|
|
|
|
|
4485
|
my $refkey = $_[0]; |
2932
|
|
|
|
|
|
|
|
2933
|
2001
|
50
|
|
|
|
3628
|
return undef unless $self->_status_forward('S4'); |
2934
|
|
|
|
|
|
|
|
2935
|
1998
|
100
|
|
|
|
3831
|
if (!defined($self->{_current_record})) { |
2936
|
186
|
|
|
|
|
341
|
$self->{_current_record} = 0; |
2937
|
|
|
|
|
|
|
} else { |
2938
|
1812
|
|
|
|
|
2408
|
$self->{_current_record}++; |
2939
|
|
|
|
|
|
|
} |
2940
|
|
|
|
|
|
|
|
2941
|
1998
|
|
|
|
|
3250
|
my $ar = $self->{_flat}->[$self->{_current_record}]; |
2942
|
1998
|
100
|
|
|
|
3183
|
if (!defined($ar)) { |
2943
|
186
|
|
|
|
|
310
|
$self->{_current_record} = undef; |
2944
|
186
|
|
|
|
|
285
|
$$refkey = undef; |
2945
|
186
|
|
|
|
|
495
|
return undef; |
2946
|
|
|
|
|
|
|
} |
2947
|
|
|
|
|
|
|
|
2948
|
1812
|
|
|
|
|
2623
|
$$refkey = $self->{_current_record}; |
2949
|
|
|
|
|
|
|
|
2950
|
1812
|
|
|
|
|
3202
|
return $self->_ar_to_hr($ar); |
2951
|
|
|
|
|
|
|
} |
2952
|
|
|
|
|
|
|
|
2953
|
|
|
|
|
|
|
sub read { |
2954
|
60
|
|
|
60
|
1
|
19497
|
my $self = shift; |
2955
|
|
|
|
|
|
|
|
2956
|
60
|
|
|
|
|
417
|
validate_pos(@_); |
2957
|
|
|
|
|
|
|
|
2958
|
60
|
50
|
|
|
|
185
|
return undef unless $self->_status_backward('S3'); |
2959
|
60
|
100
|
|
|
|
132
|
return undef unless $self->_status_forward('S3'); |
2960
|
|
|
|
|
|
|
|
2961
|
51
|
|
|
|
|
175
|
$self->_register_pass("read()"); |
2962
|
|
|
|
|
|
|
|
2963
|
51
|
|
|
|
|
172
|
$self->_open_read(); |
2964
|
|
|
|
|
|
|
|
2965
|
51
|
|
|
|
|
84
|
my $ar; |
2966
|
|
|
|
|
|
|
my $row_hr; |
2967
|
51
|
|
|
|
|
142
|
while ($self->_read_one_record_from_input(\$ar, \$row_hr)) { |
2968
|
|
|
|
|
|
|
# Ben oui quoi... qu'est-ce que l'on peut bien faire d'autre ? |
2969
|
|
|
|
|
|
|
} |
2970
|
|
|
|
|
|
|
|
2971
|
49
|
|
|
|
|
151
|
$self->_close_read(); |
2972
|
49
|
50
|
|
|
|
109
|
return undef unless defined($ar); |
2973
|
|
|
|
|
|
|
|
2974
|
49
|
50
|
|
|
|
127
|
return undef unless $self->_status_reset(); |
2975
|
|
|
|
|
|
|
|
2976
|
49
|
|
|
|
|
244
|
return $self; |
2977
|
|
|
|
|
|
|
} |
2978
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
# |
2980
|
|
|
|
|
|
|
# Initially, _read_all_in_mem was intended for the test plan. |
2981
|
|
|
|
|
|
|
# |
2982
|
|
|
|
|
|
|
# Turned out to be sometimes useful for user, thus, is no longer private since 1.1.5. |
2983
|
|
|
|
|
|
|
# Private version below is kept for compatibility. |
2984
|
|
|
|
|
|
|
# |
2985
|
|
|
|
|
|
|
sub read_all_in_mem { |
2986
|
1
|
|
|
1
|
1
|
4
|
my $self = shift; |
2987
|
|
|
|
|
|
|
|
2988
|
1
|
|
|
|
|
3
|
return $self->_read_all_in_mem(); |
2989
|
|
|
|
|
|
|
} |
2990
|
|
|
|
|
|
|
|
2991
|
|
|
|
|
|
|
sub _read_all_in_mem { |
2992
|
9
|
|
|
9
|
|
1002
|
my $self = shift; |
2993
|
|
|
|
|
|
|
|
2994
|
9
|
50
|
|
|
|
17
|
return 0 unless $self->_status_backward('S3'); |
2995
|
9
|
50
|
|
|
|
21
|
return 0 unless $self->_status_forward('S4'); |
2996
|
|
|
|
|
|
|
|
2997
|
9
|
|
|
|
|
63
|
return $self; |
2998
|
|
|
|
|
|
|
} |
2999
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
sub _render { |
3001
|
0
|
|
|
0
|
|
0
|
my $v = $_[0]; |
3002
|
|
|
|
|
|
|
|
3003
|
0
|
0
|
0
|
|
|
0
|
if (length($v) == 1 and ord($v) < 32) { |
3004
|
0
|
|
|
|
|
0
|
my $n = ord($v); |
3005
|
0
|
0
|
|
|
|
0
|
return '\n' if $n == 10; |
3006
|
0
|
0
|
|
|
|
0
|
return '\r' if $n == 13; |
3007
|
0
|
0
|
|
|
|
0
|
return '\t' if $n == 9; |
3008
|
0
|
0
|
|
|
|
0
|
return '\f' if $n == 12; |
3009
|
0
|
0
|
|
|
|
0
|
return '\b' if $n == 8; |
3010
|
0
|
0
|
|
|
|
0
|
return '\a' if $n == 7; |
3011
|
0
|
0
|
|
|
|
0
|
return '\e' if $n == 27; |
3012
|
0
|
|
|
|
|
0
|
return '\0' . oct($n); |
3013
|
|
|
|
|
|
|
} |
3014
|
0
|
|
|
|
|
0
|
return $v; |
3015
|
|
|
|
|
|
|
} |
3016
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
sub print_id { |
3018
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
3019
|
|
|
|
|
|
|
|
3020
|
0
|
|
|
|
|
0
|
$self->_printf("-- " . $self->get_in_file_disp() . ":\n"); |
3021
|
0
|
|
|
|
|
0
|
$self->_printf("sep_char: " . _render($self->get_sep_char()) . "\n"); |
3022
|
0
|
|
|
|
|
0
|
$self->_printf("escape_char: " . _render($self->get_escape_char()) . "\n"); |
3023
|
0
|
|
|
|
|
0
|
$self->_printf("in_encoding: " . _render($self->get_in_encoding()) . "\n"); |
3024
|
0
|
0
|
|
|
|
0
|
$self->_printf("is_always_quoted: " . ($self->get_is_always_quoted() ? 'yes' : 'no') . "\n"); |
3025
|
|
|
|
|
|
|
|
3026
|
0
|
|
|
|
|
0
|
my @coldata = $self->get_coldata(); |
3027
|
0
|
|
|
|
|
0
|
my @disp; |
3028
|
0
|
|
|
|
|
0
|
push @disp, [ '#', 'FIELD', 'HEADER', 'EXT DATA', 'DATETIME FORMAT', 'DATETIME LOCALE' ]; |
3029
|
0
|
|
|
|
|
0
|
push @disp, [ map { my $s = $_; $s =~ s/./-/g; $s } @{$disp[0]} ]; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
3030
|
0
|
|
|
|
|
0
|
for my $i (0..$#coldata) { |
3031
|
0
|
|
|
|
|
0
|
my $col = $coldata[$i]; |
3032
|
|
|
|
|
|
|
|
3033
|
0
|
|
|
|
|
0
|
my @row; |
3034
|
0
|
|
|
|
|
0
|
push @row, "$i"; |
3035
|
0
|
0
|
|
|
|
0
|
push @row, (defined($col->[$_]) ? ($col->[$_] . '') : '') for (0..4); |
3036
|
0
|
|
|
|
|
0
|
map { s/\n/\\n/g; s/\r/\\r/g; s/\t/\\t/g } @row; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
3037
|
0
|
|
|
|
|
0
|
push @disp, [ @row ]; |
3038
|
|
|
|
|
|
|
} |
3039
|
0
|
|
|
|
|
0
|
my $n = @{$disp[-1]}; |
|
0
|
|
|
|
|
0
|
|
3040
|
0
|
|
|
|
|
0
|
my @max = (-1) x $n; |
3041
|
0
|
|
|
|
|
0
|
for my $l (@disp) { |
3042
|
0
|
0
|
|
|
|
0
|
do { $max[$_] = length($l->[$_]) if $max[$_] < length($l->[$_]) } for (0 .. $n - 1); |
|
0
|
|
|
|
|
0
|
|
3043
|
|
|
|
|
|
|
} |
3044
|
0
|
|
|
|
|
0
|
my $s = join(' ', map { "%-${_}s" } @max); |
|
0
|
|
|
|
|
0
|
|
3045
|
0
|
|
|
|
|
0
|
$self->_print("\n"); |
3046
|
0
|
|
|
|
|
0
|
$self->_printf("$s\n", @{$_}) for (@disp); |
|
0
|
|
|
|
|
0
|
|
3047
|
|
|
|
|
|
|
} |
3048
|
|
|
|
|
|
|
|
3049
|
|
|
|
|
|
|
sub set_out_file { |
3050
|
2
|
|
|
2
|
1
|
5260
|
my $self = shift; |
3051
|
2
|
|
|
|
|
31
|
validate_pos(@_, {type => SCALAR}); |
3052
|
|
|
|
|
|
|
|
3053
|
2
|
|
|
|
|
10
|
my ($out_file) = @_; |
3054
|
2
|
|
|
|
|
8
|
$self->{out_file} = $out_file; |
3055
|
|
|
|
|
|
|
|
3056
|
2
|
|
|
|
|
11
|
return $self; |
3057
|
|
|
|
|
|
|
} |
3058
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
# Subrefs set with out_map |
3060
|
|
|
|
|
|
|
sub _execute_write_update_before { |
3061
|
367
|
|
|
367
|
|
614
|
my ($self, $ar) = @_; |
3062
|
|
|
|
|
|
|
|
3063
|
367
|
|
|
|
|
529
|
my $columns_ar = $self->{_columns}; |
3064
|
|
|
|
|
|
|
|
3065
|
367
|
|
|
|
|
511
|
my $wpre = $self->{_write_update_before_ar}; |
3066
|
367
|
|
|
|
|
487
|
for my $i (0..$#{$columns_ar}) { |
|
367
|
|
|
|
|
753
|
|
3067
|
1177
|
|
|
|
|
1495
|
my $subref = $wpre->[$i]; |
3068
|
1177
|
100
|
|
|
|
2077
|
next unless defined($subref); |
3069
|
|
|
|
|
|
|
|
3070
|
159
|
|
|
|
|
181
|
do { |
3071
|
159
|
|
|
|
|
257
|
local $_ = $ar->[$i]; |
3072
|
159
|
|
|
|
|
212
|
my $field = $columns_ar->[$i]; |
3073
|
159
|
|
|
|
|
301
|
my $new_val = $subref->($self, $field); |
3074
|
156
|
|
|
|
|
367
|
$ar->[$i] = $new_val; |
3075
|
|
|
|
|
|
|
} |
3076
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
} |
3078
|
|
|
|
|
|
|
} |
3079
|
|
|
|
|
|
|
|
3080
|
|
|
|
|
|
|
# Take into account write_fields if it got set |
3081
|
|
|
|
|
|
|
sub _apply_write_fields { |
3082
|
466
|
|
|
466
|
|
797
|
my ($self, $ar) = @_; |
3083
|
|
|
|
|
|
|
|
3084
|
466
|
|
|
|
|
564
|
my @final; |
3085
|
|
|
|
|
|
|
|
3086
|
466
|
|
|
|
|
1308
|
my $tmp = _get_def($self->{out_fields}, $self->{write_fields}); |
3087
|
466
|
100
|
|
|
|
1066
|
my @wf = @{$tmp} if defined($tmp); |
|
16
|
|
|
|
|
26
|
|
3088
|
|
|
|
|
|
|
|
3089
|
466
|
100
|
|
|
|
1001
|
return unless @wf; |
3090
|
|
|
|
|
|
|
|
3091
|
16
|
|
|
|
|
18
|
my %named_fields = %{$self->{_named_fields}}; |
|
16
|
|
|
|
|
63
|
|
3092
|
16
|
|
|
|
|
37
|
for my $i (0..$#wf) { |
3093
|
40
|
|
|
|
|
47
|
my $field = $wf[$i]; |
3094
|
40
|
100
|
66
|
|
|
108
|
my $tmp = $ar->[$named_fields{$field}] if defined($field) and $field ne ''; |
3095
|
|
|
|
|
|
|
|
3096
|
|
|
|
|
|
|
# Put here any post-processing of value |
3097
|
|
|
|
|
|
|
# WARNING |
3098
|
|
|
|
|
|
|
# $tmp can be undef |
3099
|
|
|
|
|
|
|
# ... |
3100
|
|
|
|
|
|
|
|
3101
|
40
|
|
|
|
|
71
|
$final[$i] = $tmp; |
3102
|
|
|
|
|
|
|
} |
3103
|
16
|
|
|
|
|
50
|
$_[1] = [ @final ]; |
3104
|
|
|
|
|
|
|
} |
3105
|
|
|
|
|
|
|
|
3106
|
|
|
|
|
|
|
sub write { |
3107
|
109
|
|
|
109
|
1
|
23249
|
my $self = shift; |
3108
|
|
|
|
|
|
|
|
3109
|
109
|
|
|
|
|
728
|
validate_pos(@_); |
3110
|
|
|
|
|
|
|
|
3111
|
109
|
50
|
|
|
|
390
|
return undef unless $self->_status_forward('S3'); |
3112
|
|
|
|
|
|
|
|
3113
|
104
|
|
|
|
|
211
|
my $verbose = $self->{verbose}; |
3114
|
104
|
|
|
|
|
170
|
my $_debug = $self->{_debug}; |
3115
|
104
|
|
|
|
|
175
|
my $_debugh = $self->{_debugh}; |
3116
|
|
|
|
|
|
|
|
3117
|
104
|
|
|
|
|
195
|
my $out_file = $self->{out_file}; |
3118
|
|
|
|
|
|
|
|
3119
|
104
|
|
|
|
|
176
|
my %stats; |
3120
|
|
|
|
|
|
|
|
3121
|
104
|
50
|
|
|
|
213
|
$self->_print("-- $out_file writing start\n") if $verbose; |
3122
|
104
|
|
|
|
|
157
|
my $rows_written = 0; |
3123
|
|
|
|
|
|
|
|
3124
|
104
|
|
|
|
|
164
|
my $outh = $self->{outh}; |
3125
|
|
|
|
|
|
|
|
3126
|
104
|
|
|
|
|
201
|
$self->{_close_outh_when_finished} = 0; |
3127
|
104
|
50
|
|
|
|
245
|
unless (defined($outh)) { |
3128
|
104
|
50
|
|
|
|
286
|
if ($out_file eq '') { |
3129
|
0
|
|
|
|
|
0
|
$outh = \*STDOUT; |
3130
|
|
|
|
|
|
|
} else { |
3131
|
104
|
50
|
|
|
|
7450
|
unless (open($outh, '>', $out_file)) { |
3132
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$out_file': $!"); |
3133
|
0
|
|
|
|
|
0
|
return undef; |
3134
|
|
|
|
|
|
|
} |
3135
|
104
|
|
|
|
|
449
|
$self->{_close_outh_when_finished} = 1; |
3136
|
|
|
|
|
|
|
} |
3137
|
104
|
|
|
|
|
226
|
$self->{outh} = $outh; |
3138
|
|
|
|
|
|
|
} |
3139
|
|
|
|
|
|
|
|
3140
|
104
|
50
|
|
|
|
264
|
unless ($self->{_leave_encoding_alone}) { |
3141
|
|
|
|
|
|
|
my $enc = (defined($self->{_inh_encoding}) ? |
3142
|
|
|
|
|
|
|
$self->{_inh_encoding} : |
3143
|
104
|
50
|
|
|
|
341
|
$DEFAULT_OUT_ENCODING); |
3144
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
# out_encoding option takes precedence |
3146
|
104
|
100
|
|
|
|
258
|
$enc = $self->{out_encoding} if defined($self->{out_encoding}); |
3147
|
104
|
|
|
|
|
289
|
my $m = ":encoding($enc)"; |
3148
|
104
|
50
|
66
|
|
|
264
|
if (_is_utf8($enc) and $self->{out_utf8_bom}) { |
3149
|
0
|
|
|
|
|
0
|
$m .= ':via(File::BOM)'; |
3150
|
|
|
|
|
|
|
} |
3151
|
|
|
|
|
|
|
|
3152
|
104
|
50
|
33
|
|
|
423
|
if ($OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214) { |
3153
|
|
|
|
|
|
|
# Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story) |
3154
|
0
|
0
|
|
|
|
0
|
$m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i; |
3155
|
|
|
|
|
|
|
} |
3156
|
|
|
|
|
|
|
|
3157
|
104
|
|
|
|
|
982
|
binmode $outh, $m; |
3158
|
104
|
50
|
|
|
|
10140
|
print($_debugh "Encoding string used for output: $m\n") if $_debug; |
3159
|
|
|
|
|
|
|
} |
3160
|
|
|
|
|
|
|
|
3161
|
104
|
|
|
|
|
224
|
my $escape_char = $self->{escape_char}; |
3162
|
104
|
|
|
|
|
195
|
my $quote_char = $self->{quote_char}; |
3163
|
|
|
|
|
|
|
|
3164
|
104
|
|
|
|
|
160
|
my %opts; |
3165
|
104
|
|
|
|
|
202
|
$opts{binary} = 1; |
3166
|
104
|
|
|
|
|
199
|
$opts{eol} = "\n"; |
3167
|
|
|
|
|
|
|
|
3168
|
104
|
50
|
|
|
|
350
|
$opts{sep_char} = $self->{sep_char} if defined($self->{sep_char}); |
3169
|
104
|
100
|
|
|
|
244
|
$opts{sep_char} = $self->{out_sep_char} if defined($self->{out_sep_char}); |
3170
|
|
|
|
|
|
|
|
3171
|
104
|
50
|
|
|
|
316
|
$opts{quote_char} = $self->{quote_char} if defined($self->{quote_char}); |
3172
|
104
|
50
|
|
|
|
240
|
$opts{quote_char} = $self->{out_quote_char} if defined($self->{out_quote_char}); |
3173
|
|
|
|
|
|
|
|
3174
|
104
|
100
|
|
|
|
286
|
$opts{escape_char} = $self->{escape_char} if defined($self->{escape_char}); |
3175
|
104
|
100
|
|
|
|
219
|
$opts{escape_char} = $self->{out_escape_char} if defined($self->{out_escape_char}); |
3176
|
|
|
|
|
|
|
|
3177
|
104
|
|
|
|
|
211
|
$opts{always_quote} = $self->{_is_always_quoted}; |
3178
|
104
|
100
|
|
|
|
215
|
$opts{always_quote} = $self->{out_always_quote} if defined($self->{out_always_quote}); |
3179
|
|
|
|
|
|
|
|
3180
|
104
|
|
|
|
|
774
|
my $csvout = Text::CSV->new({ %opts }); |
3181
|
104
|
50
|
|
|
|
16000
|
if (!defined($csvout)) { |
3182
|
0
|
|
|
|
|
0
|
$self->_print_error("error creating output Text::CSV object"); |
3183
|
0
|
|
|
|
|
0
|
return undef; |
3184
|
|
|
|
|
|
|
} |
3185
|
|
|
|
|
|
|
|
3186
|
104
|
|
|
|
|
440
|
my $write_filter_hr = _get_def($self->{out_filter}, $self->{write_filter_hr}); |
3187
|
|
|
|
|
|
|
|
3188
|
104
|
100
|
66
|
|
|
673
|
if (($self->{has_headers} and |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
3189
|
|
|
|
|
|
|
!(defined($self->{out_has_headers}) and !$self->{out_has_headers})) |
3190
|
|
|
|
|
|
|
or $self->{out_has_headers}) { |
3191
|
102
|
|
|
|
|
215
|
my $ar = [ ]; |
3192
|
102
|
100
|
|
|
|
245
|
if ($self->{has_headers}) { |
3193
|
98
|
|
|
|
|
192
|
$ar = $self->{_headers}; |
3194
|
|
|
|
|
|
|
} else { |
3195
|
4
|
|
|
|
|
7
|
my $nf = $self->{_named_fields}; |
3196
|
4
|
|
|
|
|
6
|
$ar->[$nf->{$_}] = $_ for (keys %{$nf}); |
|
4
|
|
|
|
|
20
|
|
3197
|
|
|
|
|
|
|
} |
3198
|
|
|
|
|
|
|
|
3199
|
102
|
100
|
|
|
|
271
|
if (exists $self->{_out_headers}) { |
3200
|
3
|
|
|
|
|
4
|
my $h = $self->{_out_headers}; |
3201
|
3
|
|
|
|
|
5
|
for (keys %{$self->{_named_fields}}) { |
|
3
|
|
|
|
|
10
|
|
3202
|
12
|
100
|
|
|
|
22
|
if (exists $h->{$_}) { |
3203
|
5
|
|
|
|
|
13
|
$ar->[$self->{_named_fields}->{$_}] = $h->{$_}; |
3204
|
|
|
|
|
|
|
} |
3205
|
|
|
|
|
|
|
} |
3206
|
|
|
|
|
|
|
} |
3207
|
|
|
|
|
|
|
|
3208
|
102
|
|
|
|
|
347
|
$self->_apply_write_fields($ar); |
3209
|
|
|
|
|
|
|
|
3210
|
102
|
|
|
|
|
1568
|
$csvout->print($outh, $ar); |
3211
|
102
|
|
|
|
|
1115
|
$rows_written++; |
3212
|
|
|
|
|
|
|
} |
3213
|
|
|
|
|
|
|
|
3214
|
104
|
|
|
|
|
182
|
my $do_status_reset = 0; |
3215
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
|
3217
|
|
|
|
|
|
|
# |
3218
|
|
|
|
|
|
|
# FIXME!!! |
3219
|
|
|
|
|
|
|
# |
3220
|
|
|
|
|
|
|
# Instead of this duplication of code, provide AutoCSV with a "create iterator sub" feature to |
3221
|
|
|
|
|
|
|
# iterate over all records, whatever is going on behind the scene (in-memory or read input). |
3222
|
|
|
|
|
|
|
# |
3223
|
|
|
|
|
|
|
# Such an iterator would also benefit to module users. |
3224
|
|
|
|
|
|
|
# |
3225
|
|
|
|
|
|
|
|
3226
|
|
|
|
|
|
|
|
3227
|
104
|
100
|
|
|
|
280
|
if ($self->{_status} == 4) { |
3228
|
|
|
|
|
|
|
|
3229
|
|
|
|
|
|
|
# |
3230
|
|
|
|
|
|
|
# The content is available in-memory: we write from what we have in-memory then... |
3231
|
|
|
|
|
|
|
# |
3232
|
|
|
|
|
|
|
|
3233
|
25
|
|
|
|
|
80
|
my @keys = $self->get_keys(); |
3234
|
25
|
|
|
|
|
48
|
my @ordered_keys = @keys; |
3235
|
25
|
100
|
|
|
|
65
|
if (exists $self->{'out_orderby'}) { |
3236
|
1
|
|
|
|
|
2
|
my @orderby = @{$self->{'out_orderby'}}; |
|
1
|
|
|
|
|
2
|
|
3237
|
|
|
|
|
|
|
@ordered_keys = sort { |
3238
|
1
|
|
|
|
|
4
|
for my $f (@orderby) { |
|
7
|
|
|
|
|
12
|
|
3239
|
8
|
|
|
|
|
12
|
my $cmp = $self->get_cell($a, $f) cmp $self->get_cell($b, $f); |
3240
|
8
|
100
|
|
|
|
20
|
return $cmp if $cmp; |
3241
|
|
|
|
|
|
|
} |
3242
|
0
|
|
|
|
|
0
|
return 0; |
3243
|
|
|
|
|
|
|
} @keys; |
3244
|
|
|
|
|
|
|
} |
3245
|
|
|
|
|
|
|
|
3246
|
|
|
|
|
|
|
# for my $k ($self->get_keys()) { |
3247
|
25
|
|
|
|
|
54
|
for my $k (@ordered_keys) { |
3248
|
92
|
|
|
|
|
179
|
my $hr = $self->get_row_hr($k); |
3249
|
92
|
50
|
|
|
|
173
|
if (defined($write_filter_hr)) { |
3250
|
0
|
0
|
|
|
|
0
|
next unless $write_filter_hr->($hr); |
3251
|
|
|
|
|
|
|
} |
3252
|
92
|
|
|
|
|
117
|
my $ar = [ @{$self->get_row_ar($k)} ]; |
|
92
|
|
|
|
|
158
|
|
3253
|
|
|
|
|
|
|
|
3254
|
92
|
|
|
|
|
229
|
$self->_execute_write_update_before($ar); |
3255
|
92
|
|
|
|
|
182
|
$self->_apply_write_fields($ar); |
3256
|
|
|
|
|
|
|
|
3257
|
92
|
|
|
|
|
483
|
$csvout->print($outh, $ar); |
3258
|
92
|
|
|
|
|
747
|
$rows_written++; |
3259
|
|
|
|
|
|
|
} |
3260
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
} else { |
3262
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
# |
3264
|
|
|
|
|
|
|
# No in-memory content available: we read and write in parallel. |
3265
|
|
|
|
|
|
|
# |
3266
|
|
|
|
|
|
|
|
3267
|
79
|
|
|
|
|
275
|
$self->_register_pass("write()"); |
3268
|
|
|
|
|
|
|
|
3269
|
79
|
|
|
|
|
257
|
$self->_open_read(); |
3270
|
79
|
|
|
|
|
143
|
my $ar; |
3271
|
|
|
|
|
|
|
my $row_hr; |
3272
|
79
|
|
|
|
|
255
|
while ($self->_read_one_record_from_input(\$ar, \$row_hr)) { |
3273
|
301
|
100
|
|
|
|
557
|
if (defined($write_filter_hr)) { |
3274
|
46
|
100
|
|
|
|
103
|
next unless $write_filter_hr->($row_hr, \%stats, $self->get_recnum()); |
3275
|
|
|
|
|
|
|
} |
3276
|
275
|
|
|
|
|
504
|
$ar = [ @{$ar} ]; |
|
275
|
|
|
|
|
655
|
|
3277
|
|
|
|
|
|
|
|
3278
|
275
|
|
|
|
|
831
|
$self->_execute_write_update_before($ar); |
3279
|
272
|
|
|
|
|
617
|
$self->_apply_write_fields($ar); |
3280
|
|
|
|
|
|
|
|
3281
|
272
|
|
|
|
|
1571
|
$csvout->print($outh, $ar); |
3282
|
272
|
|
|
|
|
2270
|
$rows_written++; |
3283
|
|
|
|
|
|
|
} |
3284
|
72
|
|
|
|
|
254
|
$self->_close_read(); |
3285
|
|
|
|
|
|
|
|
3286
|
72
|
|
|
|
|
127
|
$do_status_reset = 1 |
3287
|
|
|
|
|
|
|
} |
3288
|
|
|
|
|
|
|
|
3289
|
97
|
|
|
|
|
267
|
$self->_close_outh(); |
3290
|
|
|
|
|
|
|
|
3291
|
97
|
50
|
|
|
|
286
|
if ($verbose) { |
3292
|
0
|
|
|
|
|
0
|
$self->_print("-- $out_file writing end: $rows_written row(s) written\n"); |
3293
|
0
|
|
|
|
|
0
|
for my $k (sort keys %stats) { |
3294
|
0
|
|
|
|
|
0
|
$self->_printf(" %7d %s\n", $stats{$k}, $k); |
3295
|
|
|
|
|
|
|
} |
3296
|
|
|
|
|
|
|
} |
3297
|
|
|
|
|
|
|
|
3298
|
97
|
100
|
|
|
|
229
|
if ($do_status_reset) { |
3299
|
72
|
50
|
|
|
|
231
|
return undef unless $self->_status_reset(); |
3300
|
|
|
|
|
|
|
} |
3301
|
97
|
|
|
|
|
1192
|
return $self; |
3302
|
|
|
|
|
|
|
} |
3303
|
|
|
|
|
|
|
|
3304
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
|
3306
|
|
|
|
|
|
|
# |
3307
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3308
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3309
|
|
|
|
|
|
|
# * OBJ *************************************************************************** |
3310
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3311
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3312
|
|
|
|
|
|
|
# |
3313
|
|
|
|
|
|
|
|
3314
|
|
|
|
|
|
|
# |
3315
|
|
|
|
|
|
|
# The subs below assume Text::AutoCSV can be in status S4 = all in memory. |
3316
|
|
|
|
|
|
|
# |
3317
|
|
|
|
|
|
|
|
3318
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
sub get_keys { |
3320
|
32
|
|
|
32
|
1
|
2003
|
my $self = shift; |
3321
|
32
|
|
|
|
|
216
|
validate_pos(@_); |
3322
|
|
|
|
|
|
|
|
3323
|
32
|
50
|
|
|
|
103
|
return undef unless $self->_status_forward('S4'); |
3324
|
|
|
|
|
|
|
|
3325
|
32
|
|
|
|
|
52
|
my $last_key = @{$self->{_flat}} - 1; |
|
32
|
|
|
|
|
82
|
|
3326
|
32
|
|
|
|
|
85
|
my @r = (0..$last_key); |
3327
|
|
|
|
|
|
|
|
3328
|
32
|
|
|
|
|
81
|
return @r; |
3329
|
|
|
|
|
|
|
} |
3330
|
|
|
|
|
|
|
|
3331
|
|
|
|
|
|
|
sub get_row_ar { |
3332
|
364
|
|
|
364
|
1
|
1051
|
my $self = shift; |
3333
|
364
|
|
|
|
|
2436
|
validate_pos(@_, {type => SCALAR}); |
3334
|
364
|
|
|
|
|
860
|
my ($key) = @_; |
3335
|
|
|
|
|
|
|
|
3336
|
364
|
50
|
|
|
|
703
|
return undef unless $self->_status_forward('S4'); |
3337
|
|
|
|
|
|
|
|
3338
|
364
|
50
|
|
|
|
669
|
unless (defined($key)) { |
3339
|
0
|
|
|
|
|
0
|
$self->_print_error("get_row_ar(): \$key is not defined!"); |
3340
|
0
|
|
|
|
|
0
|
return undef; |
3341
|
|
|
|
|
|
|
} |
3342
|
|
|
|
|
|
|
|
3343
|
364
|
100
|
|
|
|
758
|
$self->_print_error("unknown row '$key'") unless defined($self->{_flat}->[$key]); |
3344
|
364
|
|
|
|
|
721
|
return $self->{_flat}->[$key]; |
3345
|
|
|
|
|
|
|
} |
3346
|
|
|
|
|
|
|
|
3347
|
|
|
|
|
|
|
sub get_row_hr { |
3348
|
269
|
|
|
269
|
1
|
403
|
my $self = shift; |
3349
|
269
|
|
|
|
|
2074
|
validate_pos(@_, {type => SCALAR}); |
3350
|
269
|
|
|
|
|
717
|
my ($key) = @_; |
3351
|
|
|
|
|
|
|
|
3352
|
269
|
|
|
|
|
553
|
my $ar = $self->get_row_ar($key); |
3353
|
269
|
100
|
|
|
|
498
|
return undef unless defined($ar); |
3354
|
|
|
|
|
|
|
|
3355
|
267
|
|
|
|
|
533
|
return $self->_ar_to_hr($ar); |
3356
|
|
|
|
|
|
|
} |
3357
|
|
|
|
|
|
|
|
3358
|
|
|
|
|
|
|
# |
3359
|
|
|
|
|
|
|
# Could be made much more efficient (directly read $self->{_flat} instead of calling get_row_hr |
3360
|
|
|
|
|
|
|
# that itself calls get_row_ar). |
3361
|
|
|
|
|
|
|
# I leave it as is because get_hr_all is not good practice (it is not scalable), it was |
3362
|
|
|
|
|
|
|
# primarily done to ease test plan. |
3363
|
|
|
|
|
|
|
# |
3364
|
|
|
|
|
|
|
# By the way I may make it one day not available by default, requesting caller to tune some |
3365
|
|
|
|
|
|
|
# variable (like { $Text::AutoCSV::i_am_the_test_plan = 1 }) to expose it. |
3366
|
|
|
|
|
|
|
# |
3367
|
|
|
|
|
|
|
sub get_hr_all { |
3368
|
108
|
|
|
108
|
1
|
2663
|
my $self = shift; |
3369
|
108
|
|
|
|
|
771
|
validate_pos(@_); |
3370
|
|
|
|
|
|
|
|
3371
|
108
|
|
|
|
|
244
|
my @resp; |
3372
|
108
|
|
|
|
|
384
|
$self->reset_next_record_hr(); |
3373
|
108
|
|
|
|
|
327
|
while (my $hr = $self->get_next_record_hr()) { |
3374
|
414
|
|
|
|
|
1013
|
push @resp, $hr; |
3375
|
|
|
|
|
|
|
} |
3376
|
105
|
|
|
|
|
631
|
return @resp; |
3377
|
|
|
|
|
|
|
} |
3378
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
sub get_recnum { |
3380
|
190
|
|
|
190
|
1
|
300
|
my $self = shift; |
3381
|
190
|
|
|
|
|
969
|
validate_pos(@_); |
3382
|
|
|
|
|
|
|
|
3383
|
190
|
50
|
|
|
|
501
|
return -1 unless $self->{_read_in_progress}; |
3384
|
190
|
|
|
|
|
362
|
return _get_def($self->{_row_read}, -1); |
3385
|
|
|
|
|
|
|
} |
3386
|
|
|
|
|
|
|
|
3387
|
|
|
|
|
|
|
sub _check_for_search { |
3388
|
906
|
|
|
906
|
|
1584
|
my ($self, $field) = @_; |
3389
|
906
|
50
|
|
|
|
1558
|
return undef unless $self->_status_forward('S4'); |
3390
|
|
|
|
|
|
|
|
3391
|
905
|
100
|
|
|
|
2450
|
return 1 if exists $self->{_named_fields}->{$field}; |
3392
|
|
|
|
|
|
|
$self->_print_error("search: unknown field '$field'", |
3393
|
6
|
|
|
|
|
34
|
0, ERR_UNKNOWN_FIELD, $self->{_named_fields}); |
3394
|
|
|
|
|
|
|
} |
3395
|
|
|
|
|
|
|
|
3396
|
|
|
|
|
|
|
sub get_cell { |
3397
|
20
|
|
|
20
|
1
|
561
|
my $self = shift; |
3398
|
20
|
|
|
|
|
174
|
validate_pos(@_, {type => SCALAR}, {type => SCALAR}); |
3399
|
20
|
|
|
|
|
57
|
my ($key, $field) = @_; |
3400
|
|
|
|
|
|
|
|
3401
|
20
|
50
|
|
|
|
34
|
return undef unless $self->_check_for_search($field); |
3402
|
19
|
|
|
|
|
36
|
my $row = $self->get_row_hr($key); |
3403
|
19
|
100
|
|
|
|
44
|
return $row unless defined($row); |
3404
|
18
|
|
|
|
|
61
|
return $row->{$field}; |
3405
|
|
|
|
|
|
|
} |
3406
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
sub get_values { |
3408
|
9
|
|
|
9
|
1
|
3458
|
my $self = shift; |
3409
|
9
|
|
|
|
|
127
|
validate_pos(@_, {type => SCALAR}, {type => UNDEF | CODEREF, optional => 1}); |
3410
|
9
|
|
|
|
|
32
|
my ($field, $filter_subref) = @_; |
3411
|
|
|
|
|
|
|
|
3412
|
9
|
50
|
|
|
|
28
|
return undef unless $self->_check_for_search($field); |
3413
|
|
|
|
|
|
|
|
3414
|
9
|
|
|
|
|
18
|
my @values; |
3415
|
9
|
|
|
|
|
32
|
$self->reset_next_record_hr(); |
3416
|
9
|
|
|
|
|
23
|
while (my $hr = $self->get_next_record_hr()) { |
3417
|
53
|
100
|
|
|
|
94
|
if (defined($filter_subref)) { |
3418
|
23
|
|
|
|
|
40
|
local $_ = $hr->{$field}; |
3419
|
23
|
100
|
|
|
|
40
|
next unless $filter_subref->(); |
3420
|
|
|
|
|
|
|
} |
3421
|
42
|
|
|
|
|
171
|
push @values, $hr->{$field}; |
3422
|
|
|
|
|
|
|
} |
3423
|
9
|
|
|
|
|
47
|
return @values; |
3424
|
|
|
|
|
|
|
} |
3425
|
|
|
|
|
|
|
|
3426
|
|
|
|
|
|
|
sub _get_hash_and_projector { |
3427
|
561
|
|
|
561
|
|
926
|
my ($self, $field, $arg_opts) = @_; |
3428
|
|
|
|
|
|
|
|
3429
|
561
|
|
|
|
|
774
|
my $_debug = $self->{_debug}; |
3430
|
561
|
|
|
|
|
654
|
my $_debugh = $self->{_debugh}; |
3431
|
|
|
|
|
|
|
|
3432
|
561
|
50
|
|
|
|
905
|
my %opts = %{$arg_opts} if defined($arg_opts); |
|
561
|
|
|
|
|
1100
|
|
3433
|
|
|
|
|
|
|
|
3434
|
561
|
|
|
|
|
1685
|
my $opt_case = _get_def($opts{'case'}, $self->{search_case}, $DEF_SEARCH_CASE); |
3435
|
561
|
|
|
|
|
1546
|
my $opt_trim = _get_def($opts{'trim'}, $self->{search_trim}, $DEF_SEARCH_TRIM); |
3436
|
|
|
|
|
|
|
my $opt_ignore_empty = _get_def($opts{'ignore_empty'}, $self->{search_ignore_empty}, |
3437
|
561
|
|
|
|
|
1486
|
$DEF_SEARCH_IGNORE_EMPTY); |
3438
|
|
|
|
|
|
|
my $opt_ignacc = _get_def($opts{'ignore_accents'}, $self->{search_ignore_accents}, |
3439
|
561
|
|
|
|
|
1396
|
$DEF_SEARCH_IGNORE_ACCENTS); |
3440
|
|
|
|
|
|
|
|
3441
|
561
|
|
|
|
|
1315
|
my $opts_stringified = $opt_case . $opt_trim . $opt_ignore_empty . $opt_ignacc; |
3442
|
561
|
|
|
|
|
899
|
my $hash_name = "_h${field}_${opts_stringified}"; |
3443
|
561
|
|
|
|
|
798
|
my $projector_name = "_p${field}_${opts_stringified}"; |
3444
|
|
|
|
|
|
|
|
3445
|
561
|
100
|
66
|
|
|
1917
|
if (exists $self->{$hash_name} and exists $self->{$projector_name}) { |
|
|
50
|
33
|
|
|
|
|
3446
|
489
|
50
|
|
|
|
766
|
print($_debugh "Search by key '$field': using existing hash and projector (" . |
3447
|
|
|
|
|
|
|
"$hash_name, $projector_name)\n") if $_debug; |
3448
|
489
|
|
|
|
|
1315
|
return ($hash_name, $projector_name); |
3449
|
|
|
|
|
|
|
} elsif (exists $self->{$hash_name} or exists $self->{$projector_name}) { |
3450
|
0
|
|
|
|
|
0
|
confess "Man, check your $PKG module code now!"; |
3451
|
|
|
|
|
|
|
} |
3452
|
|
|
|
|
|
|
|
3453
|
72
|
50
|
|
|
|
147
|
print($_debugh "Search by key '$field': building hash\n") if $_debug; |
3454
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
# |
3456
|
|
|
|
|
|
|
# Projectors |
3457
|
|
|
|
|
|
|
# |
3458
|
|
|
|
|
|
|
# The projector contains subs to derivate the search key from the field value. |
3459
|
|
|
|
|
|
|
# At the moment it is used to manage with case / without case searches and with trim / without trim |
3460
|
|
|
|
|
|
|
# searches (meaning, ignoring spaces at beginning and end of fields) |
3461
|
|
|
|
|
|
|
# |
3462
|
|
|
|
|
|
|
# Why naming it a projector? |
3463
|
|
|
|
|
|
|
# Because if you run it twice on a value, the second run should produce the same result, meaning: |
3464
|
|
|
|
|
|
|
# p(p(x)) = p(x) whatever x |
3465
|
|
|
|
|
|
|
# |
3466
|
|
|
|
|
|
|
|
3467
|
72
|
|
|
|
|
100
|
my @projectors; |
3468
|
|
|
|
|
|
|
|
3469
|
|
|
|
|
|
|
# Add case removal in the projector function list |
3470
|
72
|
100
|
|
1479
|
|
295
|
push @projectors, sub { return lc(shift); } unless $opt_case; |
|
1479
|
|
|
|
|
3507
|
|
3471
|
|
|
|
|
|
|
|
3472
|
|
|
|
|
|
|
# Add trim in the projector function list |
3473
|
72
|
100
|
|
|
|
148
|
if ($opt_trim) { |
3474
|
|
|
|
|
|
|
push @projectors, |
3475
|
|
|
|
|
|
|
sub { |
3476
|
1479
|
|
|
1479
|
|
6391
|
my $v = shift; |
3477
|
1479
|
|
|
|
|
4668
|
$v =~ s/^\s+|\s+$//g; |
3478
|
1479
|
|
|
|
|
3245
|
return $v; |
3479
|
57
|
|
|
|
|
170
|
}; |
3480
|
|
|
|
|
|
|
} |
3481
|
|
|
|
|
|
|
|
3482
|
|
|
|
|
|
|
# Add remove_accents in the projector function list |
3483
|
72
|
100
|
|
1886
|
|
239
|
push @projectors, sub { return remove_accents(shift); } if $opt_ignacc; |
|
1886
|
|
|
|
|
3049
|
|
3484
|
|
|
|
|
|
|
|
3485
|
|
|
|
|
|
|
my $projector = sub { |
3486
|
1906
|
|
|
1906
|
|
3318
|
my $v = _get_def($_[0], ''); |
3487
|
1906
|
|
|
|
|
3587
|
$v = $_->($v) foreach (@projectors); |
3488
|
1906
|
|
|
|
|
3177
|
return $v; |
3489
|
72
|
|
|
|
|
190
|
}; |
3490
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
# |
3492
|
|
|
|
|
|
|
# Filter |
3493
|
|
|
|
|
|
|
# |
3494
|
|
|
|
|
|
|
# As opposed to projectors above (where a search key is transformed), the idea now is to ignore |
3495
|
|
|
|
|
|
|
# certain keys when doing a search. |
3496
|
|
|
|
|
|
|
# At the moment, used to manage searches with / without empty values. |
3497
|
|
|
|
|
|
|
# |
3498
|
|
|
|
|
|
|
# That is to say: shall we use empty value as a regular value to search on, as in |
3499
|
|
|
|
|
|
|
# my @results = $self->search('FIELDNAME', ''); |
3500
|
|
|
|
|
|
|
# ? |
3501
|
|
|
|
|
|
|
# |
3502
|
|
|
|
|
|
|
# Right now we don't use an array-based construct, that'd allow to chain filters with one another |
3503
|
|
|
|
|
|
|
# (as we now have only one filter to deal with), later, we may use an array of filters, as done with |
3504
|
|
|
|
|
|
|
# projectors... |
3505
|
|
|
|
|
|
|
# |
3506
|
|
|
|
|
|
|
|
3507
|
72
|
|
|
|
|
102
|
my $filter; |
3508
|
72
|
100
|
|
|
|
141
|
if ($opt_ignore_empty) { |
3509
|
1268
|
|
|
1268
|
|
2771
|
$filter = sub { return $_[0] ne ''; } |
3510
|
61
|
|
|
|
|
179
|
} else { |
3511
|
77
|
|
|
77
|
|
123
|
$filter = sub { return 1; } |
3512
|
11
|
|
|
|
|
30
|
} |
3513
|
|
|
|
|
|
|
|
3514
|
72
|
|
|
|
|
136
|
my %h; |
3515
|
|
|
|
|
|
|
my $k; |
3516
|
72
|
|
|
|
|
224
|
$self->reset_next_record_hr(); |
3517
|
72
|
|
|
|
|
202
|
while (my $hr = $self->get_next_record_hr(\$k)) { |
3518
|
1345
|
|
|
|
|
2072
|
my $kv = $hr->{$field}; |
3519
|
1345
|
|
|
|
|
2090
|
my $p = $projector->($kv); |
3520
|
1345
|
100
|
|
|
|
2153
|
unless ($filter->($p)) { |
3521
|
76
|
50
|
|
|
|
145
|
print($_debugh "Ignoring key value '$p' in hash build\n") if $_debug; |
3522
|
76
|
|
|
|
|
218
|
next; |
3523
|
|
|
|
|
|
|
} |
3524
|
1269
|
|
|
|
|
1689
|
push @{$h{$p}}, $k; |
|
1269
|
|
|
|
|
5686
|
|
3525
|
|
|
|
|
|
|
} |
3526
|
72
|
|
|
|
|
494
|
for (keys %h) { |
3527
|
1178
|
|
|
|
|
1330
|
@{$h{$_}} = sort { $a <=> $b } @{$h{$_}}; |
|
1178
|
|
|
|
|
2026
|
|
|
103
|
|
|
|
|
222
|
|
|
1178
|
|
|
|
|
2065
|
|
3528
|
|
|
|
|
|
|
} |
3529
|
|
|
|
|
|
|
|
3530
|
72
|
|
|
|
|
193
|
$self->{_hash_build_count}++; |
3531
|
72
|
|
|
|
|
805
|
$self->{$hash_name} = { %h }; |
3532
|
72
|
|
|
|
|
218
|
$self->{$projector_name} = $projector; |
3533
|
72
|
|
|
|
|
525
|
return ($hash_name, $projector_name); |
3534
|
|
|
|
|
|
|
} |
3535
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
sub _get_hash_build_count { |
3537
|
6
|
|
|
6
|
|
878
|
my $self = shift; |
3538
|
|
|
|
|
|
|
|
3539
|
6
|
|
|
|
|
18
|
return _get_def($self->{_hash_build_count}, 0); |
3540
|
|
|
|
|
|
|
} |
3541
|
|
|
|
|
|
|
|
3542
|
|
|
|
|
|
|
sub search { |
3543
|
563
|
|
|
563
|
1
|
8507
|
my $self = shift; |
3544
|
563
|
|
|
|
|
4719
|
validate_pos(@_, |
3545
|
|
|
|
|
|
|
{type => SCALAR}, {type => UNDEF | SCALAR}, {type => UNDEF | HASHREF, optional => 1}); |
3546
|
563
|
|
|
|
|
1685
|
my ($field, $value, $param_opts) = @_; |
3547
|
|
|
|
|
|
|
|
3548
|
563
|
|
|
|
|
1140
|
my $croak_if_error = $self->{croak_if_error}; |
3549
|
|
|
|
|
|
|
|
3550
|
|
|
|
|
|
|
# |
3551
|
|
|
|
|
|
|
# FIXME? |
3552
|
|
|
|
|
|
|
# A bit overkill to check options each time search is called... |
3553
|
|
|
|
|
|
|
# To be thought about. |
3554
|
|
|
|
|
|
|
# |
3555
|
|
|
|
|
|
|
|
3556
|
563
|
100
|
|
|
|
1053
|
my @tmp = %{$param_opts} if $param_opts; |
|
421
|
|
|
|
|
924
|
|
3557
|
563
|
|
|
|
|
7579
|
my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS); |
3558
|
|
|
|
|
|
|
|
3559
|
562
|
50
|
|
|
|
1930
|
return undef unless $self->_check_for_search($field); |
3560
|
|
|
|
|
|
|
|
3561
|
|
|
|
|
|
|
# $self->_print_error("undef value in search call") if !defined($value); |
3562
|
561
|
50
|
|
|
|
959
|
$value = '' unless defined($value); |
3563
|
|
|
|
|
|
|
|
3564
|
561
|
|
|
|
|
1094
|
my ($hash_name, $projector_name) = $self->_get_hash_and_projector($field, \%opts); |
3565
|
|
|
|
|
|
|
|
3566
|
561
|
|
|
|
|
1358
|
my $ret = $self->{$hash_name}->{$self->{$projector_name}->($value)}; |
3567
|
|
|
|
|
|
|
|
3568
|
561
|
100
|
|
|
|
1590
|
return $ret if defined($ret); |
3569
|
185
|
|
|
|
|
516
|
return [ ]; |
3570
|
|
|
|
|
|
|
} |
3571
|
|
|
|
|
|
|
|
3572
|
|
|
|
|
|
|
sub search_1hr { |
3573
|
22
|
|
|
22
|
1
|
5923
|
my $self = shift; |
3574
|
22
|
|
|
|
|
275
|
validate_pos(@_, |
3575
|
|
|
|
|
|
|
{type => SCALAR}, {type => UNDEF | SCALAR}, {type => UNDEF | HASHREF, optional => 1}); |
3576
|
22
|
|
|
|
|
77
|
my ($field, $value, $arg_opts) = @_; |
3577
|
|
|
|
|
|
|
|
3578
|
22
|
|
|
|
|
49
|
my $r = $self->search($field, $value, $arg_opts); |
3579
|
|
|
|
|
|
|
|
3580
|
22
|
100
|
|
|
|
57
|
return undef unless defined($r->[0]); |
3581
|
|
|
|
|
|
|
|
3582
|
20
|
|
|
|
|
39
|
my $opts = _get_def($arg_opts, { }); |
3583
|
|
|
|
|
|
|
my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'}, |
3584
|
20
|
|
|
|
|
61
|
$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS); |
3585
|
|
|
|
|
|
|
|
3586
|
20
|
100
|
100
|
|
|
34
|
return undef if @{$r} >= 2 and !$opt_ignore_ambiguous; |
|
20
|
|
|
|
|
83
|
|
3587
|
14
|
|
|
|
|
42
|
return $self->get_row_hr($r->[0]); |
3588
|
|
|
|
|
|
|
} |
3589
|
|
|
|
|
|
|
|
3590
|
|
|
|
|
|
|
sub vlookup { |
3591
|
317
|
|
|
317
|
1
|
21706
|
my $self = shift; |
3592
|
317
|
|
|
|
|
4400
|
validate_pos(@_, {type => SCALAR}, {type => UNDEF | SCALAR}, {type => SCALAR}, |
3593
|
|
|
|
|
|
|
{type => UNDEF | HASHREF, optional => 1}); |
3594
|
317
|
|
|
|
|
1207
|
my ($searched_field, $value, $target_field, $arg_opts) = @_; |
3595
|
|
|
|
|
|
|
|
3596
|
317
|
|
|
|
|
735
|
my $r = $self->search($searched_field, $value, $arg_opts); |
3597
|
315
|
50
|
|
|
|
674
|
return undef unless $self->_check_for_search($target_field); |
3598
|
|
|
|
|
|
|
|
3599
|
314
|
|
|
|
|
627
|
my $opts = _get_def($arg_opts, { }); |
3600
|
314
|
100
|
66
|
|
|
745
|
unless (defined($r->[0])) { |
3601
|
|
|
|
|
|
|
return (exists $opts->{'value_if_not_found'} ? $opts->{'value_if_not_found'} : |
3602
|
143
|
100
|
|
|
|
510
|
$self->{'search_value_if_not_found'}); |
3603
|
|
|
|
|
|
|
} elsif (@{$r} >= 2) { |
3604
|
|
|
|
|
|
|
my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'}, |
3605
|
|
|
|
|
|
|
$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS); |
3606
|
|
|
|
|
|
|
return (exists $opts->{'value_if_ambiguous'} ? $opts->{'value_if_ambiguous'} : |
3607
|
|
|
|
|
|
|
$self->{'search_value_if_ambiguous'}) if !$opt_ignore_ambiguous; |
3608
|
|
|
|
|
|
|
} |
3609
|
|
|
|
|
|
|
|
3610
|
144
|
100
|
|
|
|
285
|
return $opts->{value_if_found} if exists $opts->{value_if_found}; |
3611
|
140
|
50
|
|
|
|
239
|
return $self->{search_value_if_found} if exists $opts->{search_value_if_found}; |
3612
|
|
|
|
|
|
|
|
3613
|
140
|
|
|
|
|
352
|
my $hr = $self->get_row_hr($r->[0]); |
3614
|
|
|
|
|
|
|
|
3615
|
140
|
|
|
|
|
591
|
return $hr->{$target_field}; |
3616
|
|
|
|
|
|
|
} |
3617
|
|
|
|
|
|
|
|
3618
|
|
|
|
|
|
|
1; |
3619
|
|
|
|
|
|
|
|
3620
|
|
|
|
|
|
|
__END__ |
3621
|
|
|
|
|
|
|
|
3622
|
|
|
|
|
|
|
=pod |
3623
|
|
|
|
|
|
|
|
3624
|
|
|
|
|
|
|
=encoding UTF-8 |
3625
|
|
|
|
|
|
|
|
3626
|
|
|
|
|
|
|
=head1 NAME |
3627
|
|
|
|
|
|
|
|
3628
|
|
|
|
|
|
|
Text::AutoCSV - helper module to automate the use of Text::CSV |
3629
|
|
|
|
|
|
|
|
3630
|
|
|
|
|
|
|
=head1 VERSION |
3631
|
|
|
|
|
|
|
|
3632
|
|
|
|
|
|
|
version 1.1.9 |
3633
|
|
|
|
|
|
|
|
3634
|
|
|
|
|
|
|
=head1 SYNOPSIS |
3635
|
|
|
|
|
|
|
|
3636
|
|
|
|
|
|
|
By default, Text::AutoCSV will detect the following characteristics of the input: |
3637
|
|
|
|
|
|
|
|
3638
|
|
|
|
|
|
|
- The separator, among ",", ";" and "\t" (tab) |
3639
|
|
|
|
|
|
|
|
3640
|
|
|
|
|
|
|
- The escape character, among '"' (double-quote) and '\\' (backslash) |
3641
|
|
|
|
|
|
|
|
3642
|
|
|
|
|
|
|
- Try UTF-8 and if it fails, fall back on latin1 |
3643
|
|
|
|
|
|
|
|
3644
|
|
|
|
|
|
|
- Read the header line and compute field names |
3645
|
|
|
|
|
|
|
|
3646
|
|
|
|
|
|
|
- If asked to (see L</fields_dates_auto>), detect any field that contains a DateTime value, trying |
3647
|
|
|
|
|
|
|
20 date formats, possibly followed by a time (6 time formats tested) |
3648
|
|
|
|
|
|
|
|
3649
|
|
|
|
|
|
|
- If asked to (see L</fields_dates>), detect DateTime format of certain fields, croak if no DateTime |
3650
|
|
|
|
|
|
|
format can be worked out |
3651
|
|
|
|
|
|
|
|
3652
|
|
|
|
|
|
|
- Fields identified as containing a DateTime value (L</fields_dates_auto> or L</fields_dates>) are |
3653
|
|
|
|
|
|
|
stored as DateTime objects by default |
3654
|
|
|
|
|
|
|
|
3655
|
|
|
|
|
|
|
Text::AutoCSV also provides methods to search on fields (using cached hash tables) and it can |
3656
|
|
|
|
|
|
|
populate the value of "remote" fields, made from joining 2 CSV files with a key-value search |
3657
|
|
|
|
|
|
|
|
3658
|
|
|
|
|
|
|
=head2 General |
3659
|
|
|
|
|
|
|
|
3660
|
|
|
|
|
|
|
use Text::AutoCSV; |
3661
|
|
|
|
|
|
|
|
3662
|
|
|
|
|
|
|
Text::AutoCSV->new()->write(); # Read CSV data from std input, write to std output |
3663
|
|
|
|
|
|
|
|
3664
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'f.csv')->write(); # Read CSV data from f.csv, write to std output |
3665
|
|
|
|
|
|
|
|
3666
|
|
|
|
|
|
|
# Read CSV data from f.csv, write to g.csv |
3667
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'f.csv', out_file => 'g.csv')->write(); |
3668
|
|
|
|
|
|
|
|
3669
|
|
|
|
|
|
|
# "Rewrite" CSV file by printing out records as a list (separated by line breaks) of field |
3670
|
|
|
|
|
|
|
# name followed by its value. |
3671
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', walker_hr => \&walk); |
3672
|
|
|
|
|
|
|
my @cols = $csv->get_fields_names(); |
3673
|
|
|
|
|
|
|
$csv->read(); |
3674
|
|
|
|
|
|
|
sub walk { |
3675
|
|
|
|
|
|
|
my %rec = %{$_[0]}; |
3676
|
|
|
|
|
|
|
for (@cols) { |
3677
|
|
|
|
|
|
|
next if $_ eq ''; |
3678
|
|
|
|
|
|
|
print("$_ => ", $rec{$_}, "\n"); |
3679
|
|
|
|
|
|
|
} |
3680
|
|
|
|
|
|
|
print("\n"); |
3681
|
|
|
|
|
|
|
} |
3682
|
|
|
|
|
|
|
|
3683
|
|
|
|
|
|
|
=head2 OBJ-ish functions |
3684
|
|
|
|
|
|
|
|
3685
|
|
|
|
|
|
|
# Identify column internal names with more flexibility as the default mechanism |
3686
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv', |
3687
|
|
|
|
|
|
|
fields_hr => {'CITY' => '^(city|town)', 'ZIPCODE' => '^zip(code)?$'}); |
3688
|
|
|
|
|
|
|
# Get zipcode of Claix |
3689
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE'); |
3690
|
|
|
|
|
|
|
|
3691
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv'); |
3692
|
|
|
|
|
|
|
# Get zipcode of Claix |
3693
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE'); |
3694
|
|
|
|
|
|
|
# Same as above, but vlookup is strict for case and spaces around |
3695
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv', search_case => 1, search_trim => 0); |
3696
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', 'Claix', 'ZIPCODE'); |
3697
|
|
|
|
|
|
|
|
3698
|
|
|
|
|
|
|
# Create field 'MYCITY' made by taking pers.csv' ZIP column value, looking it up in the |
3699
|
|
|
|
|
|
|
# ZIPCODE columns of zips.csv, taking CITY colmun value and naming it 'MYCITY'. Output is |
3700
|
|
|
|
|
|
|
# written in std output. |
3701
|
|
|
|
|
|
|
# If a zipcode is ambiguous, say it. |
3702
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv') |
3703
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv', |
3704
|
|
|
|
|
|
|
{ ignore_ambiguous => 0, value_if_ambiguous => '<duplicate zipcode found!>' })->write(); |
3705
|
|
|
|
|
|
|
|
3706
|
|
|
|
|
|
|
# Note the above can also be written using Text::AutoCSV level attributes: |
3707
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', |
3708
|
|
|
|
|
|
|
search_ignore_ambiguous => 0, search_value_if_ambiguous => '<duplicate zipcode found!>') |
3709
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->write(); |
3710
|
|
|
|
|
|
|
|
3711
|
|
|
|
|
|
|
# Create 'MYCITY' field as above, then display some statistics |
3712
|
|
|
|
|
|
|
my $nom_compose = 0; |
3713
|
|
|
|
|
|
|
my $zip_not_found = 0; |
3714
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk) |
3715
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read(); |
3716
|
|
|
|
|
|
|
sub walk { |
3717
|
|
|
|
|
|
|
my $hr = shift; |
3718
|
|
|
|
|
|
|
$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/; |
3719
|
|
|
|
|
|
|
$zip_not_found++ unless defined($hr->{'MYCITY'}); |
3720
|
|
|
|
|
|
|
} |
3721
|
|
|
|
|
|
|
print("Number of persons with a multi-part name: $nom_compose\n"); |
3722
|
|
|
|
|
|
|
print("Number of persons with unknown zipcode: $zip_not_found\n"); |
3723
|
|
|
|
|
|
|
|
3724
|
|
|
|
|
|
|
=head2 Updating |
3725
|
|
|
|
|
|
|
|
3726
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'names.csv', out_file => 'ucnames.csv', |
3727
|
|
|
|
|
|
|
read_post_update_hr => \&updt)->write(); |
3728
|
|
|
|
|
|
|
sub updt { $_[0]->{'LASTNAME'} =~ s/^.*$/\U&/; } |
3729
|
|
|
|
|
|
|
|
3730
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'squares.csv', out_file => 'checkedsquares.csv', |
3731
|
|
|
|
|
|
|
out_filter => \&wf)->write(); |
3732
|
|
|
|
|
|
|
sub wf { return ($_[0]->{'X'} ** 2 == $_[0]->{'SQUAREOFX'}); } |
3733
|
|
|
|
|
|
|
|
3734
|
|
|
|
|
|
|
# Add a field for the full name, made of the concatenation of the |
3735
|
|
|
|
|
|
|
# first name and the last name. |
3736
|
|
|
|
|
|
|
# Also display stats about empty full names. |
3737
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1) |
3738
|
|
|
|
|
|
|
->field_add_computed('FULLNAME', \&calc_fn)->write(); |
3739
|
|
|
|
|
|
|
sub calc_fn { |
3740
|
|
|
|
|
|
|
my ($field, $hr, $stats) = @_; |
3741
|
|
|
|
|
|
|
my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'}); |
3742
|
|
|
|
|
|
|
$stats->{'empty full name'}++ if $fn eq ' '; |
3743
|
|
|
|
|
|
|
return $fn; |
3744
|
|
|
|
|
|
|
} |
3745
|
|
|
|
|
|
|
|
3746
|
|
|
|
|
|
|
# Read a file with a lot of columns and keep only 2 columns in output |
3747
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'big.csv', out_file => 'addr.csv', |
3748
|
|
|
|
|
|
|
out_fields => ['NAME', 'ADDRESS']) |
3749
|
|
|
|
|
|
|
->out_header('ADDRESS', 'Postal Address') |
3750
|
|
|
|
|
|
|
->write(); |
3751
|
|
|
|
|
|
|
|
3752
|
|
|
|
|
|
|
=head2 Datetime management |
3753
|
|
|
|
|
|
|
|
3754
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the |
3755
|
|
|
|
|
|
|
# input format is. |
3756
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
3757
|
|
|
|
|
|
|
out_dates_format => '%F')->write(); |
3758
|
|
|
|
|
|
|
|
3759
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to a US DateTime whatever the |
3760
|
|
|
|
|
|
|
# input format is. |
3761
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
3762
|
|
|
|
|
|
|
out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write(); |
3763
|
|
|
|
|
|
|
|
3764
|
|
|
|
|
|
|
# Find dates of specific formats and convert it into yyyy-mm-dd |
3765
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'raw.csv', out_file => 'cooked.csv', |
3766
|
|
|
|
|
|
|
dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d'], |
3767
|
|
|
|
|
|
|
out_dates_format => '%F')->write(); |
3768
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
# Take the dates on columns 'LASTLOGIN' and 'CREATIONDATE' and convert it into French dates |
3770
|
|
|
|
|
|
|
# (day/month/year). |
3771
|
|
|
|
|
|
|
# Text::AutoCSV will croak if LASTLOGIN or CREATIONDATE do not contain a DateTime format. |
3772
|
|
|
|
|
|
|
# By default, Text::AutoCSV will try 20 different formats. |
3773
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
3774
|
|
|
|
|
|
|
fields_dates => ['LASTLOGIN', 'CREATIONDATE'], out_dates_format => '%d/%m/%Y')->write(); |
3775
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
# Convert 2 DateTime fields into unix standard epoch |
3777
|
|
|
|
|
|
|
# Write -1 if DateTime is empty. |
3778
|
|
|
|
|
|
|
sub toepoch { return $_->epoch() if $_; -1; } |
3779
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'stats.csv', out_file => 'stats-epoch.csv', |
3780
|
|
|
|
|
|
|
fields_dates => ['ATIME', 'MTIME']) |
3781
|
|
|
|
|
|
|
->in_map('ATIME', \&toepoch) |
3782
|
|
|
|
|
|
|
->in_map('MTIME', \&toepoch) |
3783
|
|
|
|
|
|
|
->write(); |
3784
|
|
|
|
|
|
|
|
3785
|
|
|
|
|
|
|
# Do the other way round from above: convert 2 fields containing unix standard epoch into a |
3786
|
|
|
|
|
|
|
# string displaying a human-readable DateTime. |
3787
|
|
|
|
|
|
|
my $formatter = DateTime::Format::Strptime->new(pattern => 'DATE=%F, TIME=%T'); |
3788
|
|
|
|
|
|
|
sub fromepoch { |
3789
|
|
|
|
|
|
|
return $formatter->format_datetime(DateTime->from_epoch(epoch => $_)) if $_ >= 0; |
3790
|
|
|
|
|
|
|
''; |
3791
|
|
|
|
|
|
|
} |
3792
|
|
|
|
|
|
|
$csv = Text::AutoCSV->new(in_file => 'stats-epoch.csv', out_file => 'stats2.csv') |
3793
|
|
|
|
|
|
|
->in_map('ATIME', \&fromepoch) |
3794
|
|
|
|
|
|
|
->in_map('MTIME', \&fromepoch) |
3795
|
|
|
|
|
|
|
->write(); |
3796
|
|
|
|
|
|
|
|
3797
|
|
|
|
|
|
|
=head2 Miscellaneous |
3798
|
|
|
|
|
|
|
|
3799
|
|
|
|
|
|
|
use Text::AutoCSV 'remove_accents'; |
3800
|
|
|
|
|
|
|
# Output 'Francais: etre elementaire, Tcheque: sluzba dum' followed by a new line. |
3801
|
|
|
|
|
|
|
print remove_accents("Français: être élémentaire, Tchèque: služba dům"), "\n"; |
3802
|
|
|
|
|
|
|
|
3803
|
|
|
|
|
|
|
=for Pod::Coverage ERR_UNKNOWN_FIELD |
3804
|
|
|
|
|
|
|
|
3805
|
|
|
|
|
|
|
=head1 NAME |
3806
|
|
|
|
|
|
|
|
3807
|
|
|
|
|
|
|
Text::AutoCSV - helper module to automate the use of Text::CSV |
3808
|
|
|
|
|
|
|
|
3809
|
|
|
|
|
|
|
=head1 METHODS |
3810
|
|
|
|
|
|
|
|
3811
|
|
|
|
|
|
|
=head2 new |
3812
|
|
|
|
|
|
|
|
3813
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(%attr); |
3814
|
|
|
|
|
|
|
|
3815
|
|
|
|
|
|
|
(Class method) Returns a new instance of Text::AutoCSV. The object attributes are described by the |
3816
|
|
|
|
|
|
|
hash C<%attr> (can be empty). |
3817
|
|
|
|
|
|
|
|
3818
|
|
|
|
|
|
|
Currently the following attributes are available: |
3819
|
|
|
|
|
|
|
|
3820
|
|
|
|
|
|
|
=over 4 |
3821
|
|
|
|
|
|
|
|
3822
|
|
|
|
|
|
|
=item Preliminary note about L</fields_hr>, L</fields_ar> and L</fields_column_names> attributes |
3823
|
|
|
|
|
|
|
|
3824
|
|
|
|
|
|
|
By default, Text::AutoCSV assumes the input has a header and will use the field values of this first |
3825
|
|
|
|
|
|
|
line (the header) to work out the column internal names. These internal names are used everywhere in |
3826
|
|
|
|
|
|
|
Text::AutoCSV to designate columns. |
3827
|
|
|
|
|
|
|
|
3828
|
|
|
|
|
|
|
The values are transformed as follows: |
3829
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
- All accents are removed using the exportable function L</remove_accents>. |
3831
|
|
|
|
|
|
|
|
3832
|
|
|
|
|
|
|
- Any non-alphanumeric character is removed (except underscore) and all letters are switched to |
3833
|
|
|
|
|
|
|
upper case. The regex to do this is |
3834
|
|
|
|
|
|
|
|
3835
|
|
|
|
|
|
|
s/[^[:alnum:]_]//gi; s/^.*$/\U$&/; |
3836
|
|
|
|
|
|
|
|
3837
|
|
|
|
|
|
|
Thus a header line of |
3838
|
|
|
|
|
|
|
|
3839
|
|
|
|
|
|
|
'Office Number 1,Office_2,Personal Number' |
3840
|
|
|
|
|
|
|
|
3841
|
|
|
|
|
|
|
will produce the internal column names |
3842
|
|
|
|
|
|
|
|
3843
|
|
|
|
|
|
|
'OFFICENUMBER1' (first column) |
3844
|
|
|
|
|
|
|
|
3845
|
|
|
|
|
|
|
'OFFICE_2' (second column) |
3846
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
'PERSONALNUMBER' (third column). |
3848
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
The attribute L</fields_hr>, L</fields_ar> or L</fields_column_names> (only one of the three is |
3850
|
|
|
|
|
|
|
useful at a time) allows to change this behavior. |
3851
|
|
|
|
|
|
|
|
3852
|
|
|
|
|
|
|
B<NOTE> |
3853
|
|
|
|
|
|
|
|
3854
|
|
|
|
|
|
|
The removal of accents is *not* a conversion to us-ascii, see L</remove_accents> for details. |
3855
|
|
|
|
|
|
|
|
3856
|
|
|
|
|
|
|
=item Preliminary note about fields reading |
3857
|
|
|
|
|
|
|
|
3858
|
|
|
|
|
|
|
Functions that are given a field name (L</get_cell>, L</vlookup>, L</field_add_copy>, ...) raise an |
3859
|
|
|
|
|
|
|
error if the field requested does not exist. |
3860
|
|
|
|
|
|
|
|
3861
|
|
|
|
|
|
|
B<SO WILL THE HASHREFS GIVEN BY Text::AutoCSV:> when a function returns a hashref (L</search_1hr>, |
3862
|
|
|
|
|
|
|
L</get_row_hr>, ...), the hash is locked with the C<lock_keys> function of C<Hash::Util>. Any |
3863
|
|
|
|
|
|
|
attempt to read a non-existing key from the hash causes a croak. This feature is de-activated if you |
3864
|
|
|
|
|
|
|
specified C<croak_if_error =E<gt> 0> when creating Text::AutoCSV object. |
3865
|
|
|
|
|
|
|
|
3866
|
|
|
|
|
|
|
=item in_file |
3867
|
|
|
|
|
|
|
|
3868
|
|
|
|
|
|
|
The name of the file to read CSV data from. |
3869
|
|
|
|
|
|
|
|
3870
|
|
|
|
|
|
|
If not specified or empty, read standard input. |
3871
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
Example: |
3873
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv'); |
3875
|
|
|
|
|
|
|
|
3876
|
|
|
|
|
|
|
=item inh |
3877
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
File handle to read CSV data from. |
3879
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
3880
|
|
|
|
|
|
|
|
3881
|
|
|
|
|
|
|
C<inh> is useful if you don't like the way Text::AutoCSV opens the input file for you. |
3882
|
|
|
|
|
|
|
|
3883
|
|
|
|
|
|
|
Example: |
3884
|
|
|
|
|
|
|
|
3885
|
|
|
|
|
|
|
open my $inh, "producecsv.sh|"; |
3886
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(inh => $inh); |
3887
|
|
|
|
|
|
|
|
3888
|
|
|
|
|
|
|
=item encoding |
3889
|
|
|
|
|
|
|
|
3890
|
|
|
|
|
|
|
Comma-separated list of encodings to try to read input. |
3891
|
|
|
|
|
|
|
|
3892
|
|
|
|
|
|
|
Note that finding the correct encoding of any given input is overkill. This script just tries |
3893
|
|
|
|
|
|
|
encodings one after the other, and selects the first one that does not trigger a warning during |
3894
|
|
|
|
|
|
|
reading of input. If all produce warnings, select the first one. |
3895
|
|
|
|
|
|
|
|
3896
|
|
|
|
|
|
|
The encoding chosen is used in output, unless attribute L</out_encoding> is specified. |
3897
|
|
|
|
|
|
|
|
3898
|
|
|
|
|
|
|
Value by default: 'UTF-8,latin1' |
3899
|
|
|
|
|
|
|
|
3900
|
|
|
|
|
|
|
B<IMPORTANT> |
3901
|
|
|
|
|
|
|
|
3902
|
|
|
|
|
|
|
If one tries something like C<encoding =E<gt> 'latin1,UTF-8'>, it'll almost never detect UTF-8 |
3903
|
|
|
|
|
|
|
because latin1 rarely triggers warnings during reading. It tends to be also true with encodings like |
3904
|
|
|
|
|
|
|
UTF-16 that can remain happy with various inputs (sometimes resulting in Western languages turned |
3905
|
|
|
|
|
|
|
into Chinese text). |
3906
|
|
|
|
|
|
|
|
3907
|
|
|
|
|
|
|
Ultimately this attribute should be used with a unique value. The result when using more than one |
3908
|
|
|
|
|
|
|
value can produce weird results and should be considered B<experimental>. |
3909
|
|
|
|
|
|
|
|
3910
|
|
|
|
|
|
|
Example: |
3911
|
|
|
|
|
|
|
|
3912
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'w.csv', encoding => 'UTF-16'); |
3913
|
|
|
|
|
|
|
|
3914
|
|
|
|
|
|
|
=item via |
3915
|
|
|
|
|
|
|
|
3916
|
|
|
|
|
|
|
Adds a C<via> to the file opening instruction performed by Text::AutoCSV. You don't want to use it |
3917
|
|
|
|
|
|
|
under normal circumstances. |
3918
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
The value should start with a ':' character (Text::AutoCSV won't add one for you). |
3920
|
|
|
|
|
|
|
|
3921
|
|
|
|
|
|
|
Value by default: none |
3922
|
|
|
|
|
|
|
|
3923
|
|
|
|
|
|
|
Example: |
3924
|
|
|
|
|
|
|
|
3925
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', via => ':raw:perlio:UTF-32:crlf'); |
3926
|
|
|
|
|
|
|
|
3927
|
|
|
|
|
|
|
=item dont_mess_with_encoding |
3928
|
|
|
|
|
|
|
|
3929
|
|
|
|
|
|
|
If true, just ignore completely encoding and don't try to alter I/O operations with encoding |
3930
|
|
|
|
|
|
|
considerations (using C<binmode> instruction). Note that if inh attribute is specified, then |
3931
|
|
|
|
|
|
|
Text::AutoCSV will consider the caller manages encoding for himself and dont_mess_with_encoding will |
3932
|
|
|
|
|
|
|
be automatically set, too. |
3933
|
|
|
|
|
|
|
|
3934
|
|
|
|
|
|
|
B<IMPORTANT> |
3935
|
|
|
|
|
|
|
|
3936
|
|
|
|
|
|
|
This attribute does not mean perl will totally ignore encoding and would consider character strings |
3937
|
|
|
|
|
|
|
as bytes for example. The meaning of L</dont_mess_with_encoding> is that Text::AutoCSV itself will |
3938
|
|
|
|
|
|
|
totally ignore encoding matters, and leave it entirely to Perl' default. |
3939
|
|
|
|
|
|
|
|
3940
|
|
|
|
|
|
|
Value by default: |
3941
|
|
|
|
|
|
|
|
3942
|
|
|
|
|
|
|
0 if inh attribute is not set |
3943
|
|
|
|
|
|
|
1 if inh attribute is set |
3944
|
|
|
|
|
|
|
|
3945
|
|
|
|
|
|
|
Example: |
3946
|
|
|
|
|
|
|
|
3947
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', dont_mess_with_encoding => 1); |
3948
|
|
|
|
|
|
|
|
3949
|
|
|
|
|
|
|
=item sep_char |
3950
|
|
|
|
|
|
|
|
3951
|
|
|
|
|
|
|
Specify the CSV separator character. Turns off separator auto-detection. This attribute is passed as |
3952
|
|
|
|
|
|
|
is to C<Text::CSV-E<gt>new()>. |
3953
|
|
|
|
|
|
|
|
3954
|
|
|
|
|
|
|
Example: |
3955
|
|
|
|
|
|
|
|
3956
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', sep_char => ';'); |
3957
|
|
|
|
|
|
|
|
3958
|
|
|
|
|
|
|
=item quote_char |
3959
|
|
|
|
|
|
|
|
3960
|
|
|
|
|
|
|
Specify the field quote character. This attribute is passed as is to C<Text::CSV-E<gt>new()>. |
3961
|
|
|
|
|
|
|
|
3962
|
|
|
|
|
|
|
Value by default: double quote ('"') |
3963
|
|
|
|
|
|
|
|
3964
|
|
|
|
|
|
|
Example: |
3965
|
|
|
|
|
|
|
|
3966
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', quote_char => '\''); |
3967
|
|
|
|
|
|
|
|
3968
|
|
|
|
|
|
|
=item escape_char |
3969
|
|
|
|
|
|
|
|
3970
|
|
|
|
|
|
|
Specify the escape character. Turns off escape character auto-detection. This attribute is passed as |
3971
|
|
|
|
|
|
|
is to C<Text::CSV-E<gt>new()>. |
3972
|
|
|
|
|
|
|
|
3973
|
|
|
|
|
|
|
Value by default: backslash ('\\') |
3974
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
Example: |
3976
|
|
|
|
|
|
|
|
3977
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', escape_char => '"'); |
3978
|
|
|
|
|
|
|
|
3979
|
|
|
|
|
|
|
=item in_csvobj |
3980
|
|
|
|
|
|
|
|
3981
|
|
|
|
|
|
|
Text::CSV object to use. |
3982
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
3983
|
|
|
|
|
|
|
|
3984
|
|
|
|
|
|
|
By default, Text::AutoCSV will manage creating such an object and will work hard to detect the |
3985
|
|
|
|
|
|
|
parameters it requires. |
3986
|
|
|
|
|
|
|
|
3987
|
|
|
|
|
|
|
Defining C<in_csvobj> attribute turns off separator character and escape character auto-detection. |
3988
|
|
|
|
|
|
|
|
3989
|
|
|
|
|
|
|
Using this attribute workarounds Text::AutoCSV philosophy a bit, but you may need it in case |
3990
|
|
|
|
|
|
|
Text::AutoCSV behavior is not suitable for Text::CSV creation. |
3991
|
|
|
|
|
|
|
|
3992
|
|
|
|
|
|
|
Example: |
3993
|
|
|
|
|
|
|
|
3994
|
|
|
|
|
|
|
my $tcsv = Text::CSV->new(); |
3995
|
|
|
|
|
|
|
my $acsv = Text::AutoCSV->new(in_file => 'in.csv', in_csvobj => $tcsv); |
3996
|
|
|
|
|
|
|
|
3997
|
|
|
|
|
|
|
=item has_headers |
3998
|
|
|
|
|
|
|
|
3999
|
|
|
|
|
|
|
If true, Text::AutoCSV assumes the input has a header line. |
4000
|
|
|
|
|
|
|
|
4001
|
|
|
|
|
|
|
Value by default: 1 |
4002
|
|
|
|
|
|
|
|
4003
|
|
|
|
|
|
|
Example: |
4004
|
|
|
|
|
|
|
|
4005
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0); |
4006
|
|
|
|
|
|
|
|
4007
|
|
|
|
|
|
|
=item fields_hr |
4008
|
|
|
|
|
|
|
|
4009
|
|
|
|
|
|
|
(Only if input has a header line) Hash ref that contains column internal names along with a regular |
4010
|
|
|
|
|
|
|
expression to find it in the header line. |
4011
|
|
|
|
|
|
|
For example if you have: |
4012
|
|
|
|
|
|
|
|
4013
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4014
|
|
|
|
|
|
|
fields_hr => {'PHONE OFFICE' => '^office phone nu', |
4015
|
|
|
|
|
|
|
'PHONE PERSONAL' => '^personal phone nu'}); |
4016
|
|
|
|
|
|
|
|
4017
|
|
|
|
|
|
|
And the header line is |
4018
|
|
|
|
|
|
|
|
4019
|
|
|
|
|
|
|
'Personal Phone Number,Office Phone Number' |
4020
|
|
|
|
|
|
|
|
4021
|
|
|
|
|
|
|
the column name 'PHONE OFFICE' will designate the second column and the column name 'PHONE PERSONAL' |
4022
|
|
|
|
|
|
|
will designate the first column. |
4023
|
|
|
|
|
|
|
|
4024
|
|
|
|
|
|
|
You can choose column names like 'Phone Office' and 'Phone Personal' as well. |
4025
|
|
|
|
|
|
|
|
4026
|
|
|
|
|
|
|
The regex search is case insensitive. |
4027
|
|
|
|
|
|
|
|
4028
|
|
|
|
|
|
|
=item fields_ar |
4029
|
|
|
|
|
|
|
|
4030
|
|
|
|
|
|
|
(Only if input has a header line) Array ref that contains column internal names. The array is used |
4031
|
|
|
|
|
|
|
to create a hash ref of the same kind as L</fields_hr>, by wrapping the column name in a regex. The |
4032
|
|
|
|
|
|
|
names are surrounded by a leading '^' and a trailing '$', meaning, the name must match the entire |
4033
|
|
|
|
|
|
|
field name. |
4034
|
|
|
|
|
|
|
|
4035
|
|
|
|
|
|
|
For example |
4036
|
|
|
|
|
|
|
|
4037
|
|
|
|
|
|
|
fields_ar => ['OFFICENUMBER', 'PERSONALNUMBER'] |
4038
|
|
|
|
|
|
|
|
4039
|
|
|
|
|
|
|
is strictly equivalent to |
4040
|
|
|
|
|
|
|
|
4041
|
|
|
|
|
|
|
fields_hr => {'OFFICENUMBER' => '^officenumber$', 'PERSONALNUMBER' = '^personalnumber$'} |
4042
|
|
|
|
|
|
|
|
4043
|
|
|
|
|
|
|
The regex search is case insensitive. |
4044
|
|
|
|
|
|
|
|
4045
|
|
|
|
|
|
|
C<fields_ar> is useful if the internal names are identical to the file column names. It avoids |
4046
|
|
|
|
|
|
|
repeating the names over and over as would happen if using L</fields_hr> attribute. |
4047
|
|
|
|
|
|
|
|
4048
|
|
|
|
|
|
|
I<NOTE> |
4049
|
|
|
|
|
|
|
|
4050
|
|
|
|
|
|
|
You might wonder why using fields_ar as opposed to Text::AutoCSV default' mechanism. There are two |
4051
|
|
|
|
|
|
|
reasons for that: |
4052
|
|
|
|
|
|
|
|
4053
|
|
|
|
|
|
|
1- Text::AutoCSV removes spaces from column names, and some people may want another behavior. A |
4054
|
|
|
|
|
|
|
header name of 'Phone Number' will get an internal column name of 'PHONENUMBER' (default behavior, |
4055
|
|
|
|
|
|
|
if none of fields_hr, fields_ar and fields_column_names attributes is specified), and one may prefer |
4056
|
|
|
|
|
|
|
'PHONE NUMBER' or 'phone number' or whatsoever. |
4057
|
|
|
|
|
|
|
|
4058
|
|
|
|
|
|
|
2- By specifying a list of columns using either of fields_hr or fields_ar, you not only map column |
4059
|
|
|
|
|
|
|
names as found in the header line to internal column names: you also I<request> these columns to be |
4060
|
|
|
|
|
|
|
available. If one of the requested columns cannot be found, Text::AutoCSV will croak (default) or |
4061
|
|
|
|
|
|
|
print an error and return an undef object (if created with C<croak_if_error =E<gt> 0>). |
4062
|
|
|
|
|
|
|
|
4063
|
|
|
|
|
|
|
=item fields_column_names |
4064
|
|
|
|
|
|
|
|
4065
|
|
|
|
|
|
|
Array ref of column internal names, in the order of columns in file. This attribute works like the |
4066
|
|
|
|
|
|
|
C<column_names> attribute of Text::CSV. It'll just assign names to columns one by one, regardless of |
4067
|
|
|
|
|
|
|
what the header line contains. It'll work also if the file has no header line. |
4068
|
|
|
|
|
|
|
|
4069
|
|
|
|
|
|
|
Example: |
4070
|
|
|
|
|
|
|
|
4071
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4072
|
|
|
|
|
|
|
fields_column_names => ['My COL1', '', 'My COL3']); |
4073
|
|
|
|
|
|
|
|
4074
|
|
|
|
|
|
|
=item out_file |
4075
|
|
|
|
|
|
|
|
4076
|
|
|
|
|
|
|
Output file when executing the L</write> method. |
4077
|
|
|
|
|
|
|
|
4078
|
|
|
|
|
|
|
If not specified or empty, write to standard output. |
4079
|
|
|
|
|
|
|
|
4080
|
|
|
|
|
|
|
Example: |
4081
|
|
|
|
|
|
|
|
4082
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv'); |
4083
|
|
|
|
|
|
|
|
4084
|
|
|
|
|
|
|
=item outh |
4085
|
|
|
|
|
|
|
|
4086
|
|
|
|
|
|
|
File handle to write CSV data to when executing the L</write> method. |
4087
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
4088
|
|
|
|
|
|
|
|
4089
|
|
|
|
|
|
|
C<outh> is useful if you don't like the way Text::AutoCSV opens the output file for you. |
4090
|
|
|
|
|
|
|
|
4091
|
|
|
|
|
|
|
Example: |
4092
|
|
|
|
|
|
|
|
4093
|
|
|
|
|
|
|
my $outh = open "myin.csv', ">>"; |
4094
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0, outh => $outh); |
4095
|
|
|
|
|
|
|
|
4096
|
|
|
|
|
|
|
=item out_encoding |
4097
|
|
|
|
|
|
|
|
4098
|
|
|
|
|
|
|
Enforce the encoding of output. |
4099
|
|
|
|
|
|
|
|
4100
|
|
|
|
|
|
|
Value by default: input encoding |
4101
|
|
|
|
|
|
|
|
4102
|
|
|
|
|
|
|
Example: |
4103
|
|
|
|
|
|
|
|
4104
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4105
|
|
|
|
|
|
|
out_encoding => 'UTF-16'); |
4106
|
|
|
|
|
|
|
|
4107
|
|
|
|
|
|
|
=item out_utf8_bom |
4108
|
|
|
|
|
|
|
|
4109
|
|
|
|
|
|
|
Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is not UTF-8, this |
4110
|
|
|
|
|
|
|
attribute is ignored. |
4111
|
|
|
|
|
|
|
|
4112
|
|
|
|
|
|
|
B<NOTE> |
4113
|
|
|
|
|
|
|
|
4114
|
|
|
|
|
|
|
UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice, UTF8-encoded files rarely |
4115
|
|
|
|
|
|
|
have a BOM. |
4116
|
|
|
|
|
|
|
|
4117
|
|
|
|
|
|
|
Using this attribute is not recommended. It is provided for the sake of completeness, and also to |
4118
|
|
|
|
|
|
|
produce Unicode files Microsoft EXCEL will be happy to read. |
4119
|
|
|
|
|
|
|
|
4120
|
|
|
|
|
|
|
At first sight it would seem more logical to make EXCEL happy with something like this: |
4121
|
|
|
|
|
|
|
|
4122
|
|
|
|
|
|
|
out_encoding => 'UTF-16' |
4123
|
|
|
|
|
|
|
|
4124
|
|
|
|
|
|
|
But... While EXCEL will identify UTF-16 and read it as such, it will not take into account the BOM |
4125
|
|
|
|
|
|
|
found at the beginning. In the end the first cell will have 2 useless characters prepended. The only |
4126
|
|
|
|
|
|
|
solution the author knows to workaround this issue if to use UTF-8 as output encoding, and enforce a |
4127
|
|
|
|
|
|
|
BOM. That is, use: |
4128
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
..., out_encoding => 'UTF-8', out_utf8_bom => 1, ... |
4130
|
|
|
|
|
|
|
|
4131
|
|
|
|
|
|
|
=item out_sep_char |
4132
|
|
|
|
|
|
|
|
4133
|
|
|
|
|
|
|
Enforce the output CSV separator character. |
4134
|
|
|
|
|
|
|
|
4135
|
|
|
|
|
|
|
Value by default: input separator |
4136
|
|
|
|
|
|
|
|
4137
|
|
|
|
|
|
|
Example: |
4138
|
|
|
|
|
|
|
|
4139
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_sep_char => ','); |
4140
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
=item out_quote_char |
4142
|
|
|
|
|
|
|
|
4143
|
|
|
|
|
|
|
Enforce the output CSV quote character. |
4144
|
|
|
|
|
|
|
|
4145
|
|
|
|
|
|
|
Value by default: input quote character |
4146
|
|
|
|
|
|
|
|
4147
|
|
|
|
|
|
|
Example: |
4148
|
|
|
|
|
|
|
|
4149
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_quote_char => '"'); |
4150
|
|
|
|
|
|
|
|
4151
|
|
|
|
|
|
|
=item out_escape_char |
4152
|
|
|
|
|
|
|
|
4153
|
|
|
|
|
|
|
Enforce the output CSV escape character. |
4154
|
|
|
|
|
|
|
|
4155
|
|
|
|
|
|
|
Value by default: input escape character |
4156
|
|
|
|
|
|
|
|
4157
|
|
|
|
|
|
|
Example: |
4158
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4160
|
|
|
|
|
|
|
out_escape_char_char => '\\'); |
4161
|
|
|
|
|
|
|
|
4162
|
|
|
|
|
|
|
=item out_always_quote |
4163
|
|
|
|
|
|
|
|
4164
|
|
|
|
|
|
|
If true, quote all fields of output (set always_quote of Text::CSV). |
4165
|
|
|
|
|
|
|
|
4166
|
|
|
|
|
|
|
If false, don't quote all fields of output (don't set C<always_quote> of Text::CSV). |
4167
|
|
|
|
|
|
|
|
4168
|
|
|
|
|
|
|
Value by default: same as what is found in input |
4169
|
|
|
|
|
|
|
|
4170
|
|
|
|
|
|
|
While reading input, Text::AutoCSV works out whether or not all fields were quoted. If yes, then the |
4171
|
|
|
|
|
|
|
output Text::CSV object has the always_quote attribute set, if no, then the output Text::CSV object |
4172
|
|
|
|
|
|
|
does not have this attribute set. |
4173
|
|
|
|
|
|
|
|
4174
|
|
|
|
|
|
|
Example: |
4175
|
|
|
|
|
|
|
|
4176
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_always_quote => 1); |
4177
|
|
|
|
|
|
|
|
4178
|
|
|
|
|
|
|
=item out_has_headers |
4179
|
|
|
|
|
|
|
|
4180
|
|
|
|
|
|
|
If true, when writing output, write a header line on first line. |
4181
|
|
|
|
|
|
|
|
4182
|
|
|
|
|
|
|
If false, when writing output, don't write a header line on first line. |
4183
|
|
|
|
|
|
|
|
4184
|
|
|
|
|
|
|
Value by default: same as has_headers attribute |
4185
|
|
|
|
|
|
|
|
4186
|
|
|
|
|
|
|
Example 1 |
4187
|
|
|
|
|
|
|
|
4188
|
|
|
|
|
|
|
Read standard input and write to standard output, removing the header line. |
4189
|
|
|
|
|
|
|
|
4190
|
|
|
|
|
|
|
Text::AutoCSV->new(out_has_headers => 0)->write(); |
4191
|
|
|
|
|
|
|
|
4192
|
|
|
|
|
|
|
Example 2 |
4193
|
|
|
|
|
|
|
|
4194
|
|
|
|
|
|
|
Read standard input and write to standard output, adding a header line. |
4195
|
|
|
|
|
|
|
|
4196
|
|
|
|
|
|
|
Text::AutoCSV->new(fields_column_names => ['MYCOL1', 'MYCOL2'], out_has_headers => 1)->write(); |
4197
|
|
|
|
|
|
|
|
4198
|
|
|
|
|
|
|
=item no_undef |
4199
|
|
|
|
|
|
|
|
4200
|
|
|
|
|
|
|
If true, non-existent column values are set to an empty string instead of undef. It is also done on |
4201
|
|
|
|
|
|
|
extra fields that happen to have an undef value (for example when the target of a linked field is |
4202
|
|
|
|
|
|
|
not found). |
4203
|
|
|
|
|
|
|
|
4204
|
|
|
|
|
|
|
Note this attribute does not work on callback functions output set with L</in_map>: for example |
4205
|
|
|
|
|
|
|
empty DateTime values (on fields identified as containing a date/time, see C<dates_*> attributes |
4206
|
|
|
|
|
|
|
below) are set to C<undef>, even while C<no_undef> is set. Indeed setting it to an empty string |
4207
|
|
|
|
|
|
|
while non-empty values would contain a Datetime object would not be clean. An empty value in a |
4208
|
|
|
|
|
|
|
placeholder containing an object must be undef. |
4209
|
|
|
|
|
|
|
|
4210
|
|
|
|
|
|
|
Since version 1.1.5 of Text::AutoCSV, C<no_undef> is examined when sending parameter ($_) to |
4211
|
|
|
|
|
|
|
L</in_map> callback: an undef value is now passed as is (as undef), unless C<no_undef> is set. If |
4212
|
|
|
|
|
|
|
C<no_undef> is set, and field value is undef, then $_ is set to the empty string ('') when calling |
4213
|
|
|
|
|
|
|
callback defined by L</in_map>. This new behavior was put in place to be consistent with what is |
4214
|
|
|
|
|
|
|
being done with DateTime values. |
4215
|
|
|
|
|
|
|
|
4216
|
|
|
|
|
|
|
Value by default: 0 |
4217
|
|
|
|
|
|
|
|
4218
|
|
|
|
|
|
|
Example: |
4219
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', no_undef => 1); |
4221
|
|
|
|
|
|
|
|
4222
|
|
|
|
|
|
|
=item read_post_update_hr |
4223
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
To be set to a ref sub. Each time a record is read from input, call C<read_post_update_hr> to update |
4225
|
|
|
|
|
|
|
the hash ref of the record. The sub is called with 2 arguments: the hash ref to the record value and |
4226
|
|
|
|
|
|
|
the hash ref to stats. |
4227
|
|
|
|
|
|
|
|
4228
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is |
4229
|
|
|
|
|
|
|
called in verbose mode (C<verbose =E<gt> 1>). |
4230
|
|
|
|
|
|
|
|
4231
|
|
|
|
|
|
|
For example, the C<read_post_update_hr> below will turn column 'CITY' values in upper case and count |
4232
|
|
|
|
|
|
|
occurences of empty cities in stat display: |
4233
|
|
|
|
|
|
|
|
4234
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'addresses.csv', read_post_update_hr => \&updt, verbose => 1) |
4235
|
|
|
|
|
|
|
->write(); |
4236
|
|
|
|
|
|
|
sub updt { |
4237
|
|
|
|
|
|
|
my ($hr, $stats) = @_; |
4238
|
|
|
|
|
|
|
$hr->{'CITY'} =~ s/^.*$/\U$&/; |
4239
|
|
|
|
|
|
|
$stats->{'empty city encountered'}++ if $hr->{'CITY'} eq ''; |
4240
|
|
|
|
|
|
|
} |
4241
|
|
|
|
|
|
|
|
4242
|
|
|
|
|
|
|
B<IMPORTANT> |
4243
|
|
|
|
|
|
|
|
4244
|
|
|
|
|
|
|
You cannot create a field this way. To create a field, you have to use the member functions |
4245
|
|
|
|
|
|
|
L</field_add_link>, L</field_add_copy> or L</field_add_computed>. |
4246
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
B<NOTE> |
4248
|
|
|
|
|
|
|
|
4249
|
|
|
|
|
|
|
If you wish to manage some updates at field level, consider registering update functions with |
4250
|
|
|
|
|
|
|
L</in_map> and L</out_map> member functions. These functions register callbacks that work at field |
4251
|
|
|
|
|
|
|
level and with $_ variable (thus the callback function invoked is AutoCSV-agnostic). |
4252
|
|
|
|
|
|
|
|
4253
|
|
|
|
|
|
|
L</in_map> updates a field after read, L</out_map> updates the field content before writing it. |
4254
|
|
|
|
|
|
|
|
4255
|
|
|
|
|
|
|
=item walker_hr |
4256
|
|
|
|
|
|
|
|
4257
|
|
|
|
|
|
|
To set to a sub ref that'll be executed each time a record is read from input. It is executed after |
4258
|
|
|
|
|
|
|
L</read_post_update_hr>. The sub is called with 2 arguments: the hash ref to the record value and |
4259
|
|
|
|
|
|
|
the hash ref to stats. |
4260
|
|
|
|
|
|
|
|
4261
|
|
|
|
|
|
|
Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas |
4262
|
|
|
|
|
|
|
L</walker_hr> is read-only. |
4263
|
|
|
|
|
|
|
|
4264
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is |
4265
|
|
|
|
|
|
|
called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are |
4266
|
|
|
|
|
|
|
not displayed, however you can get stats by calling the get_stats function. |
4267
|
|
|
|
|
|
|
|
4268
|
|
|
|
|
|
|
The example below will count in the stats the number of records where the 'CITY' field is empty. |
4269
|
|
|
|
|
|
|
Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed. |
4270
|
|
|
|
|
|
|
|
4271
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_hr => \&walk1, |
4272
|
|
|
|
|
|
|
verbose => 1)->read(); |
4273
|
|
|
|
|
|
|
sub walk1 { |
4274
|
|
|
|
|
|
|
my ($hr, $stats) = @_; |
4275
|
|
|
|
|
|
|
$stats->{'empty city'}++ if $hr->{'CITY'} eq ''; |
4276
|
|
|
|
|
|
|
} |
4277
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
=item walker_ar |
4279
|
|
|
|
|
|
|
|
4280
|
|
|
|
|
|
|
To set to a sub ref that'll be executed each time a record is read from input. It is executed after |
4281
|
|
|
|
|
|
|
L</read_post_update_hr>. The sub is called with 2 arguments: the array ref to the record value and |
4282
|
|
|
|
|
|
|
the hash ref to stats. |
4283
|
|
|
|
|
|
|
|
4284
|
|
|
|
|
|
|
Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas |
4285
|
|
|
|
|
|
|
C<walker_hr> is read-only. |
4286
|
|
|
|
|
|
|
|
4287
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is |
4288
|
|
|
|
|
|
|
called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are |
4289
|
|
|
|
|
|
|
lost. |
4290
|
|
|
|
|
|
|
|
4291
|
|
|
|
|
|
|
The array ref contains values in their natural order in the CSV. To be used with the column names, |
4292
|
|
|
|
|
|
|
you have to use L</get_fields_names> member function. |
4293
|
|
|
|
|
|
|
|
4294
|
|
|
|
|
|
|
The example below will count in the stats the number of records where the 'CITY' field is empty. |
4295
|
|
|
|
|
|
|
Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed. It produces |
4296
|
|
|
|
|
|
|
the exact same result as the example in walker_hr attribute, but it uses walker_ar. |
4297
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
use List::MoreUtils qw(first_index); |
4299
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_ar => \&walk2, verbose => 1); |
4300
|
|
|
|
|
|
|
my @cols = $csv->get_fields_names(); |
4301
|
|
|
|
|
|
|
my $idxCITY = first_index { /^city$/i } @cols; |
4302
|
|
|
|
|
|
|
die "No city field!??" if $idxCITY < 0; |
4303
|
|
|
|
|
|
|
$csv->read(); |
4304
|
|
|
|
|
|
|
sub walk2 { |
4305
|
|
|
|
|
|
|
my ($ar, $stats) = @_; |
4306
|
|
|
|
|
|
|
$stats->{'empty city'}++ if $ar->[$idxCITY] eq ''; |
4307
|
|
|
|
|
|
|
} |
4308
|
|
|
|
|
|
|
|
4309
|
|
|
|
|
|
|
=item write_filter_hr |
4310
|
|
|
|
|
|
|
|
4311
|
|
|
|
|
|
|
Alias of L</out_filter>. |
4312
|
|
|
|
|
|
|
|
4313
|
|
|
|
|
|
|
=item out_filter |
4314
|
|
|
|
|
|
|
|
4315
|
|
|
|
|
|
|
To be set to a ref sub. Before writing a record to output, C<out_filter> is called and the record |
4316
|
|
|
|
|
|
|
gets writen only if C<out_filter> return value is true. The sub is called with 1 argument: the hash |
4317
|
|
|
|
|
|
|
ref to the record value. |
4318
|
|
|
|
|
|
|
|
4319
|
|
|
|
|
|
|
For example, if you want to output only records where the 'CITY' column value is Grenoble: |
4320
|
|
|
|
|
|
|
|
4321
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'addresses.csv', out_file => 'grenoble.csv', |
4322
|
|
|
|
|
|
|
out_filter => \&filt)->write(); |
4323
|
|
|
|
|
|
|
sub filt { |
4324
|
|
|
|
|
|
|
my $hr = shift; |
4325
|
|
|
|
|
|
|
return 1 if $hr->{'CITY'} =~ /^grenoble$/i; |
4326
|
|
|
|
|
|
|
return 0; |
4327
|
|
|
|
|
|
|
} |
4328
|
|
|
|
|
|
|
|
4329
|
|
|
|
|
|
|
=item write_fields |
4330
|
|
|
|
|
|
|
|
4331
|
|
|
|
|
|
|
Alias of L</out_fields>. |
4332
|
|
|
|
|
|
|
|
4333
|
|
|
|
|
|
|
=item out_fields |
4334
|
|
|
|
|
|
|
|
4335
|
|
|
|
|
|
|
Set to an array ref. List fields to write to output. |
4336
|
|
|
|
|
|
|
|
4337
|
|
|
|
|
|
|
Fields are written in their order in the array ref, the first CSV column being the first element in |
4338
|
|
|
|
|
|
|
the array, and so on. Fields not listed in B<out_fields> are not written in output. |
4339
|
|
|
|
|
|
|
|
4340
|
|
|
|
|
|
|
You can use empty field names to have empty columns in output. |
4341
|
|
|
|
|
|
|
|
4342
|
|
|
|
|
|
|
Value by default: none, meaning, all fields are output in their natural order. What is natural |
4343
|
|
|
|
|
|
|
order? It is the input order for fields that were read from input, and the order in which they got |
4344
|
|
|
|
|
|
|
created for created fields. |
4345
|
|
|
|
|
|
|
|
4346
|
|
|
|
|
|
|
Example: |
4347
|
|
|
|
|
|
|
|
4348
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'allinfos.csv', out_file => 'only-addresses.csv', |
4349
|
|
|
|
|
|
|
out_fields => [ 'NAME', 'ADDRESS' ] )->write(); |
4350
|
|
|
|
|
|
|
|
4351
|
|
|
|
|
|
|
=item out_orderby |
4352
|
|
|
|
|
|
|
|
4353
|
|
|
|
|
|
|
Array reference to a list of fields to sort output with. |
4354
|
|
|
|
|
|
|
|
4355
|
|
|
|
|
|
|
At the moment this feature is a bit of a hack (no option to make sort descending or ascending, |
4356
|
|
|
|
|
|
|
numeric or text, and it is not part of test plan). |
4357
|
|
|
|
|
|
|
|
4358
|
|
|
|
|
|
|
Example: |
4359
|
|
|
|
|
|
|
|
4360
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'names.csv', out_file => 'sortednames.csv', |
4361
|
|
|
|
|
|
|
out_orderby => [ 'LASTNAME', 'FIRSTNAME']); |
4362
|
|
|
|
|
|
|
|
4363
|
|
|
|
|
|
|
=item search_case |
4364
|
|
|
|
|
|
|
|
4365
|
|
|
|
|
|
|
If true, searches are case sensitive by default. Searches are done by the member functions |
4366
|
|
|
|
|
|
|
L</search>, L</search_1hr>, L</vlookup>, and linked fields (L</field_add_link>). |
4367
|
|
|
|
|
|
|
|
4368
|
|
|
|
|
|
|
The search functions can also be called with the option L</case>, that takes precedence over the |
4369
|
|
|
|
|
|
|
object-level C<search_case> attribute value. See L</vlookup> help. |
4370
|
|
|
|
|
|
|
|
4371
|
|
|
|
|
|
|
Value by default: 0 (by default searches are case insensitive) |
4372
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
Example: |
4374
|
|
|
|
|
|
|
|
4375
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_case => 1); |
4376
|
|
|
|
|
|
|
|
4377
|
|
|
|
|
|
|
=item search_trim |
4378
|
|
|
|
|
|
|
|
4379
|
|
|
|
|
|
|
If true, searches ignore the presence of leading or trailing spaces in values. |
4380
|
|
|
|
|
|
|
|
4381
|
|
|
|
|
|
|
The search functions can also be called with the option L</trim>, that takes precedence over the |
4382
|
|
|
|
|
|
|
object-level C<search_trim> attribute value. See L</vlookup> help. |
4383
|
|
|
|
|
|
|
|
4384
|
|
|
|
|
|
|
Value by default: 1 (by default searches ignore leading and trailing spaces) |
4385
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
Example: |
4387
|
|
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_trim => 0); |
4389
|
|
|
|
|
|
|
|
4390
|
|
|
|
|
|
|
=item search_ignore_empty |
4391
|
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
If true, empty fields are not included in the search indexes. |
4393
|
|
|
|
|
|
|
|
4394
|
|
|
|
|
|
|
The search functions can also be called with the option L</ignore_empty>, that takes precedence over |
4395
|
|
|
|
|
|
|
the object-level C<search_ignore_empty> attribute value. See L</vlookup> help. |
4396
|
|
|
|
|
|
|
|
4397
|
|
|
|
|
|
|
Value by default: 1 (by default, search of the value '' will find nothing) |
4398
|
|
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
Example: |
4400
|
|
|
|
|
|
|
|
4401
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_empty => 0); |
4402
|
|
|
|
|
|
|
|
4403
|
|
|
|
|
|
|
=item search_ignore_accents |
4404
|
|
|
|
|
|
|
|
4405
|
|
|
|
|
|
|
If true, accents are ignored by search indexes. |
4406
|
|
|
|
|
|
|
|
4407
|
|
|
|
|
|
|
The search functions can also be called with the option L</ignore_accents>, that takes precedence |
4408
|
|
|
|
|
|
|
over the object-level C<search_ignore_accents> attribute value. See L</vlookup> help. |
4409
|
|
|
|
|
|
|
|
4410
|
|
|
|
|
|
|
Value by default: 1 (by default, accents are ignored by search functions) |
4411
|
|
|
|
|
|
|
|
4412
|
|
|
|
|
|
|
Example: |
4413
|
|
|
|
|
|
|
|
4414
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_accents => 0); |
4415
|
|
|
|
|
|
|
|
4416
|
|
|
|
|
|
|
=item search_value_if_not_found |
4417
|
|
|
|
|
|
|
|
4418
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4419
|
|
|
|
|
|
|
return value of vlookup), default value of option L</value_if_not_found>. See L</vlookup>. |
4420
|
|
|
|
|
|
|
|
4421
|
|
|
|
|
|
|
=item search_value_if_found |
4422
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4424
|
|
|
|
|
|
|
return value of vlookup), default value of option L</value_if_found>. See L</vlookup>. |
4425
|
|
|
|
|
|
|
|
4426
|
|
|
|
|
|
|
B<IMPORTANT> |
4427
|
|
|
|
|
|
|
|
4428
|
|
|
|
|
|
|
This attribute is extremly unusual. Once you've provided it, all vlookups and the target field value |
4429
|
|
|
|
|
|
|
of fields created with field_add_link will all be populated with the value provided with this |
4430
|
|
|
|
|
|
|
option. |
4431
|
|
|
|
|
|
|
|
4432
|
|
|
|
|
|
|
Don't use it unless you know what you are doing. |
4433
|
|
|
|
|
|
|
|
4434
|
|
|
|
|
|
|
=item search_ignore_ambiguous |
4435
|
|
|
|
|
|
|
|
4436
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4437
|
|
|
|
|
|
|
return value of search_1hr and vlookup), default value of option L</ignore_ambiguous>. See |
4438
|
|
|
|
|
|
|
L</vlookup>. |
4439
|
|
|
|
|
|
|
|
4440
|
|
|
|
|
|
|
=item search_value_if_ambiguous |
4441
|
|
|
|
|
|
|
|
4442
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4443
|
|
|
|
|
|
|
return value of vlookup), default value of option L</value_if_ambiguous>. See L</vlookup>. |
4444
|
|
|
|
|
|
|
|
4445
|
|
|
|
|
|
|
=item fields_dates |
4446
|
|
|
|
|
|
|
|
4447
|
|
|
|
|
|
|
Array ref of field names that contain a date. |
4448
|
|
|
|
|
|
|
|
4449
|
|
|
|
|
|
|
Once the formats of these fields is known (auto-detection by default), each of these fields will get |
4450
|
|
|
|
|
|
|
a specific L</in_map> sub that converts the text in a DateTime object and a L</out_map> sub that |
4451
|
|
|
|
|
|
|
converts back from DateTime to text. |
4452
|
|
|
|
|
|
|
|
4453
|
|
|
|
|
|
|
B<NOTE> |
4454
|
|
|
|
|
|
|
|
4455
|
|
|
|
|
|
|
The L</out_map> given to a DateTime field is "defensive code": normally, L</in_map> converts text |
4456
|
|
|
|
|
|
|
into a DateTime object and L</out_map> does the opposite, it takes a DateTime object and converts it |
4457
|
|
|
|
|
|
|
to text. If ever L</out_map> encounters a value that is not a DateTime object, it'll just stringify |
4458
|
|
|
|
|
|
|
it (evaluation in a string context), without calling its DateTime formatter. |
4459
|
|
|
|
|
|
|
|
4460
|
|
|
|
|
|
|
If the format cannot be detected for a given field, output an error message and as always when an |
4461
|
|
|
|
|
|
|
error occurs, croak (unless L</croak_if_error> got set to 0). |
4462
|
|
|
|
|
|
|
|
4463
|
|
|
|
|
|
|
Value by default: none |
4464
|
|
|
|
|
|
|
|
4465
|
|
|
|
|
|
|
Example: |
4466
|
|
|
|
|
|
|
|
4467
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
4468
|
|
|
|
|
|
|
fields_dates => ['LASTLOGIN', 'CREATIONDATE']); |
4469
|
|
|
|
|
|
|
|
4470
|
|
|
|
|
|
|
=item fields_dates_auto |
4471
|
|
|
|
|
|
|
|
4472
|
|
|
|
|
|
|
Boolean value. If set to 1, will detect dates formats on all fields. Fields in which a DateTime |
4473
|
|
|
|
|
|
|
format got detected are then managed as if they had been being listed in L</fields_dates> attribute: |
4474
|
|
|
|
|
|
|
they get an appropriate L</in_map> sub and a L</out_map> sub to convert to and from DateTime (see |
4475
|
|
|
|
|
|
|
L</fields_dates> attribute above). |
4476
|
|
|
|
|
|
|
|
4477
|
|
|
|
|
|
|
C<fields_dates_auto> looks for DateTime on all fields, but it expects nothing: it won't raise an |
4478
|
|
|
|
|
|
|
error if no field is found that contains DateTime. |
4479
|
|
|
|
|
|
|
|
4480
|
|
|
|
|
|
|
Value by default: 0 |
4481
|
|
|
|
|
|
|
|
4482
|
|
|
|
|
|
|
Example: |
4483
|
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', fields_dates_auto => 1); |
4485
|
|
|
|
|
|
|
|
4486
|
|
|
|
|
|
|
=item fields_dates_auto_optimize |
4487
|
|
|
|
|
|
|
|
4488
|
|
|
|
|
|
|
Relevant only if L</fields_dates_auto> is set. |
4489
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
Normally when L</fields_dates_auto> is set, the input is read completely to make sure auto-detection |
4491
|
|
|
|
|
|
|
produces a reliable result. If C<fields_dates_auto_optimize> is set, this reading pass will stop as |
4492
|
|
|
|
|
|
|
soon as there is no ambiguity left. That is, for every fields in input, the date format (or the fact |
4493
|
|
|
|
|
|
|
that no date format is suitable) is known. |
4494
|
|
|
|
|
|
|
|
4495
|
|
|
|
|
|
|
Using this option is a bit risky because it could trigger a date format detection that later in the |
4496
|
|
|
|
|
|
|
input, would turn out to be wrong. Should that be the case, strange errors will occur, that are not |
4497
|
|
|
|
|
|
|
easy to understand. Use it at your own risk. |
4498
|
|
|
|
|
|
|
|
4499
|
|
|
|
|
|
|
Value by default: 0 |
4500
|
|
|
|
|
|
|
|
4501
|
|
|
|
|
|
|
Example: |
4502
|
|
|
|
|
|
|
|
4503
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', fields_dates_auto => 1, |
4504
|
|
|
|
|
|
|
fields_dates_auto_optimize => 1); |
4505
|
|
|
|
|
|
|
|
4506
|
|
|
|
|
|
|
=item dates_formats_to_try |
4507
|
|
|
|
|
|
|
|
4508
|
|
|
|
|
|
|
Array ref of string formats. |
4509
|
|
|
|
|
|
|
|
4510
|
|
|
|
|
|
|
Text::AutoCSV has a default built-in list of 20 date formats to try and 6 time formats (also it'll |
4511
|
|
|
|
|
|
|
combine any date format with any time format). |
4512
|
|
|
|
|
|
|
|
4513
|
|
|
|
|
|
|
C<dates_formats_to_try> will replace Text::AutoCSV default format-list will the one you specify, in |
4514
|
|
|
|
|
|
|
case the default would not produce the results you expect. |
4515
|
|
|
|
|
|
|
|
4516
|
|
|
|
|
|
|
The formats are written in Strptime format. |
4517
|
|
|
|
|
|
|
|
4518
|
|
|
|
|
|
|
Value by default (see below about the role of the pseudo-format ''): |
4519
|
|
|
|
|
|
|
|
4520
|
|
|
|
|
|
|
[ '', |
4521
|
|
|
|
|
|
|
'%Y-%m-%d', |
4522
|
|
|
|
|
|
|
'%Y.%m.%d', |
4523
|
|
|
|
|
|
|
'%Y/%m/%d', |
4524
|
|
|
|
|
|
|
'%m.%d.%y', |
4525
|
|
|
|
|
|
|
'%m-%d-%Y', |
4526
|
|
|
|
|
|
|
'%m.%d.%Y', |
4527
|
|
|
|
|
|
|
'%m/%d/%Y', |
4528
|
|
|
|
|
|
|
'%d-%m-%Y', |
4529
|
|
|
|
|
|
|
'%d.%m.%Y', |
4530
|
|
|
|
|
|
|
'%d/%m/%Y', |
4531
|
|
|
|
|
|
|
'%m-%d-%y', |
4532
|
|
|
|
|
|
|
'%m/%d/%y', |
4533
|
|
|
|
|
|
|
'%d-%m-%y', |
4534
|
|
|
|
|
|
|
'%d.%m.%y', |
4535
|
|
|
|
|
|
|
'%d/%m/%y', |
4536
|
|
|
|
|
|
|
'%Y%m%d%H%M%S', |
4537
|
|
|
|
|
|
|
'%b %d, %Y', |
4538
|
|
|
|
|
|
|
'%b %d %Y', |
4539
|
|
|
|
|
|
|
'%b %d %T %Z %Y', |
4540
|
|
|
|
|
|
|
'%d %b %Y', |
4541
|
|
|
|
|
|
|
'%d %b, %Y' ] |
4542
|
|
|
|
|
|
|
|
4543
|
|
|
|
|
|
|
B<IMPORTANT> |
4544
|
|
|
|
|
|
|
|
4545
|
|
|
|
|
|
|
The empty format (empty string) has a special meaning: when specified, Text::AutoCSV will be able to |
4546
|
|
|
|
|
|
|
identify fields that contain only a time (not preceeded by a date). |
4547
|
|
|
|
|
|
|
|
4548
|
|
|
|
|
|
|
B<Note> |
4549
|
|
|
|
|
|
|
|
4550
|
|
|
|
|
|
|
Format identification is over only when there is no more ambiguity. So the usual pitfall of US |
4551
|
|
|
|
|
|
|
versus French dates (month-day versus day-month) gets resolved only when a date is encountered that |
4552
|
|
|
|
|
|
|
disambiguates it (a date of 13th of the month or later). |
4553
|
|
|
|
|
|
|
|
4554
|
|
|
|
|
|
|
Example with a weird format that uses underscores to separate elements, using either US (month, day, |
4555
|
|
|
|
|
|
|
year), French (day, month, year), or international (year, month, day) order: |
4556
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
4558
|
|
|
|
|
|
|
dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']); |
4559
|
|
|
|
|
|
|
|
4560
|
|
|
|
|
|
|
=item dates_formats_to_try_supp |
4561
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
Same as L</dates_formats_to_try> but instead of replacing the default list of formats used during |
4563
|
|
|
|
|
|
|
detection, it is added to this default list. |
4564
|
|
|
|
|
|
|
|
4565
|
|
|
|
|
|
|
You want to use this attribute if you need a specific DateTime format while continuing to benefit |
4566
|
|
|
|
|
|
|
from the default list. |
4567
|
|
|
|
|
|
|
|
4568
|
|
|
|
|
|
|
B<IMPORTANT> |
4569
|
|
|
|
|
|
|
|
4570
|
|
|
|
|
|
|
Text::AutoCSV will identify a given Datetime format only when there is no ambiguity, meaning, one |
4571
|
|
|
|
|
|
|
unique Datetime format matches (all other failed). Adding a format that already exists in the |
4572
|
|
|
|
|
|
|
default list will prevent the format from being identified, as it'll always be ambiguous. See |
4573
|
|
|
|
|
|
|
L</dates_formats_to_try> for the default list of formats. |
4574
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
Example: |
4576
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
4578
|
|
|
|
|
|
|
dates_formats_to_try_supp => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']); |
4579
|
|
|
|
|
|
|
|
4580
|
|
|
|
|
|
|
=item dates_ignore_trailing_chars |
4581
|
|
|
|
|
|
|
|
4582
|
|
|
|
|
|
|
If set to 1, DateTime auto-detection will ignore trailing text that may follow detected |
4583
|
|
|
|
|
|
|
DateTime-like text. |
4584
|
|
|
|
|
|
|
|
4585
|
|
|
|
|
|
|
Value by default: 1 (do ignore trailing chars) |
4586
|
|
|
|
|
|
|
|
4587
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_ignore_trailing_chars => 0); |
4588
|
|
|
|
|
|
|
|
4589
|
|
|
|
|
|
|
=item dates_search_time |
4590
|
|
|
|
|
|
|
|
4591
|
|
|
|
|
|
|
If set to 1, look for times when detecting DateTime format. That is, whenever a date format |
4592
|
|
|
|
|
|
|
candidate is found, a longer candidate that also contains a time (after the date) is tested. |
4593
|
|
|
|
|
|
|
|
4594
|
|
|
|
|
|
|
Value by default: 1 (do look for times when auto-detecting DateTime formats) |
4595
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
Example: |
4597
|
|
|
|
|
|
|
|
4598
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_search_time => 0); |
4599
|
|
|
|
|
|
|
|
4600
|
|
|
|
|
|
|
=item dates_locales |
4601
|
|
|
|
|
|
|
|
4602
|
|
|
|
|
|
|
Comma-separated string of locales to test when detecting DateTime formats. Ultimately, Text::AutoCSV |
4603
|
|
|
|
|
|
|
will try all combinations of date formats, times and locales. |
4604
|
|
|
|
|
|
|
|
4605
|
|
|
|
|
|
|
Value by default: none (use perl default locale) |
4606
|
|
|
|
|
|
|
|
4607
|
|
|
|
|
|
|
Example: |
4608
|
|
|
|
|
|
|
|
4609
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_locales => 'fr,de,en'); |
4610
|
|
|
|
|
|
|
|
4611
|
|
|
|
|
|
|
=item dates_zeros_ok |
4612
|
|
|
|
|
|
|
|
4613
|
|
|
|
|
|
|
Boolean. If true, a date made only of 0s is regarded as being empty. |
4614
|
|
|
|
|
|
|
|
4615
|
|
|
|
|
|
|
For example if C<dates_zeros_ok> is False, then a date like 0000-00-00 will be always incorrect (as |
4616
|
|
|
|
|
|
|
the day and month are out of bounds), therefore a format like '%Y-%m-%d' will never match for the |
4617
|
|
|
|
|
|
|
field. |
4618
|
|
|
|
|
|
|
|
4619
|
|
|
|
|
|
|
Conversely if C<dates_zeros_ok> is true, then a date like 0000-00-00 will be processed as if being |
4620
|
|
|
|
|
|
|
the empty string, thus the detection of format will work and when parsed, this "full of zeros" dates |
4621
|
|
|
|
|
|
|
will be processed the same way as the empty string (= value will be undef). |
4622
|
|
|
|
|
|
|
|
4623
|
|
|
|
|
|
|
B<IMPORTANT> |
4624
|
|
|
|
|
|
|
|
4625
|
|
|
|
|
|
|
"0s dates" are evaluated to undef when parsed, thus when converted back to text (out_map), they are |
4626
|
|
|
|
|
|
|
set to an empty string, not to the original value. |
4627
|
|
|
|
|
|
|
|
4628
|
|
|
|
|
|
|
Value by default: 1 |
4629
|
|
|
|
|
|
|
|
4630
|
|
|
|
|
|
|
Example: |
4631
|
|
|
|
|
|
|
|
4632
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', dates_zeros_ok => 0); |
4633
|
|
|
|
|
|
|
|
4634
|
|
|
|
|
|
|
=item out_dates_format |
4635
|
|
|
|
|
|
|
|
4636
|
|
|
|
|
|
|
Enforce the format of dates in output, for all fields that contain a DateTime value. |
4637
|
|
|
|
|
|
|
|
4638
|
|
|
|
|
|
|
The format is written in Strptime format. |
4639
|
|
|
|
|
|
|
|
4640
|
|
|
|
|
|
|
Value by default: none (by default, use format detected on input) |
4641
|
|
|
|
|
|
|
|
4642
|
|
|
|
|
|
|
Example: |
4643
|
|
|
|
|
|
|
|
4644
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the |
4645
|
|
|
|
|
|
|
# input format is. |
4646
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
4647
|
|
|
|
|
|
|
out_dates_format => '%F')->write(); |
4648
|
|
|
|
|
|
|
|
4649
|
|
|
|
|
|
|
=item out_dates_locale |
4650
|
|
|
|
|
|
|
|
4651
|
|
|
|
|
|
|
Taken into account only if L</out_dates_format> is used. |
4652
|
|
|
|
|
|
|
|
4653
|
|
|
|
|
|
|
Sets the locale to apply on L</out_dates_format>. |
4654
|
|
|
|
|
|
|
|
4655
|
|
|
|
|
|
|
Value by default: none (by default, use the locale detected on input) |
4656
|
|
|
|
|
|
|
|
4657
|
|
|
|
|
|
|
Example: |
4658
|
|
|
|
|
|
|
|
4659
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to a US DateTime whatever the |
4660
|
|
|
|
|
|
|
# input format is. |
4661
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
4662
|
|
|
|
|
|
|
out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write(); |
4663
|
|
|
|
|
|
|
|
4664
|
|
|
|
|
|
|
=item croak_if_error |
4665
|
|
|
|
|
|
|
|
4666
|
|
|
|
|
|
|
If true, stops the program execution in case of error. |
4667
|
|
|
|
|
|
|
|
4668
|
|
|
|
|
|
|
B<IMPORTANT> |
4669
|
|
|
|
|
|
|
|
4670
|
|
|
|
|
|
|
Value by default: 1 |
4671
|
|
|
|
|
|
|
|
4672
|
|
|
|
|
|
|
If set to zero (C<croak_if_error =E<gt> 0>), errors are displayed as warnings. This printing can |
4673
|
|
|
|
|
|
|
then be affected by setting the L</quiet> attribute. |
4674
|
|
|
|
|
|
|
|
4675
|
|
|
|
|
|
|
=item verbose |
4676
|
|
|
|
|
|
|
|
4677
|
|
|
|
|
|
|
If true, get Text::AutoCSV to be a bit talkative instead of speaking only when warnings and errors |
4678
|
|
|
|
|
|
|
occur. Verbose output is printed to STDERR by default, this can be tuned with the L</infoh> |
4679
|
|
|
|
|
|
|
attribute. |
4680
|
|
|
|
|
|
|
|
4681
|
|
|
|
|
|
|
Value by default: 0 |
4682
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
Example: |
4684
|
|
|
|
|
|
|
|
4685
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', verbose => 1); |
4686
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
=item infoh |
4688
|
|
|
|
|
|
|
|
4689
|
|
|
|
|
|
|
File handle to display program's verbose output. Has effect *mainly* with attribute |
4690
|
|
|
|
|
|
|
C<verbose =E<gt> 1>. |
4691
|
|
|
|
|
|
|
|
4692
|
|
|
|
|
|
|
Note B<infoh> is used to display extra information in case of error (if a field does not exist, |
4693
|
|
|
|
|
|
|
Text::AutoCSV will display the list of existing fields). If you don't want such output, you can set |
4694
|
|
|
|
|
|
|
C<infoh> to undef. |
4695
|
|
|
|
|
|
|
|
4696
|
|
|
|
|
|
|
Value by default: \*STDERR |
4697
|
|
|
|
|
|
|
|
4698
|
|
|
|
|
|
|
Example: |
4699
|
|
|
|
|
|
|
|
4700
|
|
|
|
|
|
|
open my $infoh, ">", "log.txt"; |
4701
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', infoh => $infoh); |
4702
|
|
|
|
|
|
|
|
4703
|
|
|
|
|
|
|
=item quiet |
4704
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
If true, don't display warnings and errors, unless croaking. |
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
If L</croak_if_error> attribute is set (as per default), still, Text::AutoCSV will produce output |
4708
|
|
|
|
|
|
|
(on STDERR) when croaking miserably. |
4709
|
|
|
|
|
|
|
|
4710
|
|
|
|
|
|
|
When using C<croak_if_error =E<gt> 0>, errors are processed as warnings and if L</quiet> is set (in |
4711
|
|
|
|
|
|
|
addition to L</croak_if_error> being set to 0), there'll be no output. Note this way of working is |
4712
|
|
|
|
|
|
|
not recommended, as things can go wrong without any notice to the caller. |
4713
|
|
|
|
|
|
|
|
4714
|
|
|
|
|
|
|
Example: |
4715
|
|
|
|
|
|
|
|
4716
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', quiet => 1); |
4717
|
|
|
|
|
|
|
|
4718
|
|
|
|
|
|
|
=item one_pass |
4719
|
|
|
|
|
|
|
|
4720
|
|
|
|
|
|
|
If true, Text::AutoCSV will perform one reading of the input. If other readings are triggered, it'll |
4721
|
|
|
|
|
|
|
raise an error and no reading will be done. Should that be the case (you ask Text::AutoCSV to do |
4722
|
|
|
|
|
|
|
something that'll trigger more than one reading of input), Text::AutoCSV will croak as is always the |
4723
|
|
|
|
|
|
|
case if an error occurs. |
4724
|
|
|
|
|
|
|
|
4725
|
|
|
|
|
|
|
Normally Text::AutoCSV will do multiple reads of input to work out certain characteristics of the |
4726
|
|
|
|
|
|
|
CSV: guess of encoding and guess of escape character. |
4727
|
|
|
|
|
|
|
|
4728
|
|
|
|
|
|
|
Also if member functions like L</field_add_link>, L</field_add_copy>, L</field_add_computed>, |
4729
|
|
|
|
|
|
|
L</read> or L</write> are called after input has already been read, it'll trigger further reads as |
4730
|
|
|
|
|
|
|
needed. |
4731
|
|
|
|
|
|
|
|
4732
|
|
|
|
|
|
|
If one wishes a unique read of the input to occur, one_pass attribute is to be set. |
4733
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
When true, encoding will be assumed to be the first one in the provided list (L</encoding> |
4735
|
|
|
|
|
|
|
attribute), if no encoding attribute is provided, it'll be the first one in the default list, to |
4736
|
|
|
|
|
|
|
date, it is UTF-8. |
4737
|
|
|
|
|
|
|
|
4738
|
|
|
|
|
|
|
When true, and if attribute L</escape_char> is not set, escape_char will be assumed to be '\\' |
4739
|
|
|
|
|
|
|
(backslash). |
4740
|
|
|
|
|
|
|
|
4741
|
|
|
|
|
|
|
By default, one_pass is set if inh attribute is set (caller provides the input file handle of input) |
4742
|
|
|
|
|
|
|
or if input file is stdin (in_file attribute not set or set to an empty string). |
4743
|
|
|
|
|
|
|
|
4744
|
|
|
|
|
|
|
Value by default: |
4745
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
0 if inh attribute is not set and in_file attribute is set to a non empty string |
4747
|
|
|
|
|
|
|
1 if inh attribute is set or in_file is not set or set to an empty string |
4748
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
Example: |
4750
|
|
|
|
|
|
|
|
4751
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', one_pass => 1); |
4752
|
|
|
|
|
|
|
|
4753
|
|
|
|
|
|
|
=back |
4754
|
|
|
|
|
|
|
|
4755
|
|
|
|
|
|
|
=head2 read |
4756
|
|
|
|
|
|
|
|
4757
|
|
|
|
|
|
|
$csv->read(); |
4758
|
|
|
|
|
|
|
|
4759
|
|
|
|
|
|
|
Read input entirely. |
4760
|
|
|
|
|
|
|
|
4761
|
|
|
|
|
|
|
B<Return value> |
4762
|
|
|
|
|
|
|
|
4763
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4764
|
|
|
|
|
|
|
Returns undef if error. |
4765
|
|
|
|
|
|
|
|
4766
|
|
|
|
|
|
|
Callback functions (when defined) are invoked, in the following order: |
4767
|
|
|
|
|
|
|
|
4768
|
|
|
|
|
|
|
L</read_post_update_hr>, intended to do updates on fields values after each record read |
4769
|
|
|
|
|
|
|
|
4770
|
|
|
|
|
|
|
L</walker_ar>, called after each record read, with an array ref of fields values |
4771
|
|
|
|
|
|
|
|
4772
|
|
|
|
|
|
|
L</walker_hr>, called after each record read, with a hash ref of fields values |
4773
|
|
|
|
|
|
|
|
4774
|
|
|
|
|
|
|
Example: |
4775
|
|
|
|
|
|
|
|
4776
|
|
|
|
|
|
|
# Do nothing - just check CSV can be read successfully |
4777
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv')->read(); |
4778
|
|
|
|
|
|
|
|
4779
|
|
|
|
|
|
|
=head2 read_all_in_mem |
4780
|
|
|
|
|
|
|
|
4781
|
|
|
|
|
|
|
$csv->read_all_in_mem(); |
4782
|
|
|
|
|
|
|
|
4783
|
|
|
|
|
|
|
Created in version 1.1.5. Before, existed only as _read_all_in_mem, meaning, was private. |
4784
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
Read input entirely, as with L</read> function, but enforcing content to be kept in-memory. |
4786
|
|
|
|
|
|
|
|
4787
|
|
|
|
|
|
|
Having the content kept in-memory is implied by search functions (L</vlookup> for example). With |
4788
|
|
|
|
|
|
|
C<read_all_in_mem> you can enforce this behavior without doing a fake search. |
4789
|
|
|
|
|
|
|
|
4790
|
|
|
|
|
|
|
=head2 reset_next_record_hr |
4791
|
|
|
|
|
|
|
|
4792
|
|
|
|
|
|
|
$csv->reset_next_record_hr(); |
4793
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
Reset the internal status to start from the beginning with L</get_next_record_hr>. Used in |
4795
|
|
|
|
|
|
|
conjunction with L</get_next_record_hr>. |
4796
|
|
|
|
|
|
|
|
4797
|
|
|
|
|
|
|
=head2 get_next_record_hr |
4798
|
|
|
|
|
|
|
|
4799
|
|
|
|
|
|
|
my $hr = $csv->get_next_record_hr(\$opt_key); |
4800
|
|
|
|
|
|
|
|
4801
|
|
|
|
|
|
|
Get the next record content as a hash ref. C<$hr> is undef when the end of records has been reached. |
4802
|
|
|
|
|
|
|
|
4803
|
|
|
|
|
|
|
When specified, C<$opt_key> is set to the current (returned) record key. |
4804
|
|
|
|
|
|
|
|
4805
|
|
|
|
|
|
|
B<NOTE> |
4806
|
|
|
|
|
|
|
|
4807
|
|
|
|
|
|
|
You do not need to call L</reset_next_record_hr> once before using C<get_next_record_hr>. |
4808
|
|
|
|
|
|
|
|
4809
|
|
|
|
|
|
|
Therefore L</reset_next_record_hr> is useful only if you wish to restart from the beginning before |
4810
|
|
|
|
|
|
|
you've reached the end of the records. |
4811
|
|
|
|
|
|
|
|
4812
|
|
|
|
|
|
|
B<NOTE bis> |
4813
|
|
|
|
|
|
|
|
4814
|
|
|
|
|
|
|
L</walker_hr> allows to execute some code each time a record is read, and it better fits with |
4815
|
|
|
|
|
|
|
Text::AutoCSV philosophy. Using a loop with C<get_next_record_hr> is primarily meant for |
4816
|
|
|
|
|
|
|
Text::AutoCSV internal usage. Also when using this mechanism, you get very close to original |
4817
|
|
|
|
|
|
|
Text::CSV logic, that makes Text::AutoCSV less useful. |
4818
|
|
|
|
|
|
|
|
4819
|
|
|
|
|
|
|
B<Return value> |
4820
|
|
|
|
|
|
|
|
4821
|
|
|
|
|
|
|
A hashref of the record, or undef once there's no more record to return. |
4822
|
|
|
|
|
|
|
|
4823
|
|
|
|
|
|
|
Example: |
4824
|
|
|
|
|
|
|
|
4825
|
|
|
|
|
|
|
while (my $hr = $csv->get_next_record_hr()) { |
4826
|
|
|
|
|
|
|
say Dumper($hr); |
4827
|
|
|
|
|
|
|
} |
4828
|
|
|
|
|
|
|
|
4829
|
|
|
|
|
|
|
=head2 write |
4830
|
|
|
|
|
|
|
|
4831
|
|
|
|
|
|
|
$csv->write(); |
4832
|
|
|
|
|
|
|
|
4833
|
|
|
|
|
|
|
Write input into output. |
4834
|
|
|
|
|
|
|
|
4835
|
|
|
|
|
|
|
B<Return value> |
4836
|
|
|
|
|
|
|
|
4837
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4838
|
|
|
|
|
|
|
Returns undef if error. |
4839
|
|
|
|
|
|
|
|
4840
|
|
|
|
|
|
|
- If the content is not in-memory at the time write() is called: |
4841
|
|
|
|
|
|
|
|
4842
|
|
|
|
|
|
|
Each record is read (with call of L</read_post_update_hr>, L</walker_ar> and L</walker_hr>) and then |
4843
|
|
|
|
|
|
|
written. The read-and-write is done in sequence, each record is written to output before the next |
4844
|
|
|
|
|
|
|
record is read from input. |
4845
|
|
|
|
|
|
|
|
4846
|
|
|
|
|
|
|
- If the content is in-memory at the time write() is called: |
4847
|
|
|
|
|
|
|
|
4848
|
|
|
|
|
|
|
No L</read> operation is performed, instead, records are directly written to output. |
4849
|
|
|
|
|
|
|
|
4850
|
|
|
|
|
|
|
If defined, L</out_filter> is called for each record. If the return value of L</out_filter> is |
4851
|
|
|
|
|
|
|
false, the record is not written. |
4852
|
|
|
|
|
|
|
|
4853
|
|
|
|
|
|
|
Example: |
4854
|
|
|
|
|
|
|
|
4855
|
|
|
|
|
|
|
# Copy input to output. |
4856
|
|
|
|
|
|
|
# As CSV is parsed in-between, this copy also checks a number of characteristics about the |
4857
|
|
|
|
|
|
|
# input, as opposed to a plain file copy operation. |
4858
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')->write(); |
4859
|
|
|
|
|
|
|
|
4860
|
|
|
|
|
|
|
=head2 out_header |
4861
|
|
|
|
|
|
|
|
4862
|
|
|
|
|
|
|
$csv->out_header($field, $header); |
4863
|
|
|
|
|
|
|
|
4864
|
|
|
|
|
|
|
Set the header text of C<$field> to C<$header>. |
4865
|
|
|
|
|
|
|
|
4866
|
|
|
|
|
|
|
By default, the input header value is rewritten as is to output. C<out_header> allows you to change |
4867
|
|
|
|
|
|
|
it. |
4868
|
|
|
|
|
|
|
|
4869
|
|
|
|
|
|
|
B<Return value> |
4870
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
Returns the object itself. |
4872
|
|
|
|
|
|
|
|
4873
|
|
|
|
|
|
|
Example: |
4874
|
|
|
|
|
|
|
|
4875
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv') |
4876
|
|
|
|
|
|
|
->out_header('LOGIN', 'Login') |
4877
|
|
|
|
|
|
|
->out_header('FULLNAME', 'Full Name') |
4878
|
|
|
|
|
|
|
->write(); |
4879
|
|
|
|
|
|
|
|
4880
|
|
|
|
|
|
|
=head2 print_id |
4881
|
|
|
|
|
|
|
|
4882
|
|
|
|
|
|
|
$csv->print_id(); |
4883
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
Print out a description of input. Write to \*STDERR by default or to L</infoh> attribute if set. |
4885
|
|
|
|
|
|
|
|
4886
|
|
|
|
|
|
|
The description consists in a list of a few characteristics (CSV separator and the like) followed by |
4887
|
|
|
|
|
|
|
the list of columns with the details of each. |
4888
|
|
|
|
|
|
|
|
4889
|
|
|
|
|
|
|
Example of output: |
4890
|
|
|
|
|
|
|
|
4891
|
|
|
|
|
|
|
If you go to the C<utils> directory of this module and execute the following: |
4892
|
|
|
|
|
|
|
|
4893
|
|
|
|
|
|
|
./csvcopy.pl -i f1.csv -l "1:,A->B,f2.csv" --id |
4894
|
|
|
|
|
|
|
|
4895
|
|
|
|
|
|
|
You will get this output: |
4896
|
|
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
-- f1.csv: |
4898
|
|
|
|
|
|
|
sep_char: , |
4899
|
|
|
|
|
|
|
escape_char: \ |
4900
|
|
|
|
|
|
|
in_encoding: UTF-8 |
4901
|
|
|
|
|
|
|
is_always_quoted: no |
4902
|
|
|
|
|
|
|
|
4903
|
|
|
|
|
|
|
# FIELD HEADER EXT DATA DATETIME FORMAT DATETIME LOCALE |
4904
|
|
|
|
|
|
|
- ----- ------ -------- --------------- --------------- |
4905
|
|
|
|
|
|
|
0 TIMESTAMP timestamp %Y%m%d%H%M%S |
4906
|
|
|
|
|
|
|
1 A a |
4907
|
|
|
|
|
|
|
2 B b |
4908
|
|
|
|
|
|
|
3 C c |
4909
|
|
|
|
|
|
|
4 D d %d/%m/%Y |
4910
|
|
|
|
|
|
|
5 1:SITE 1:SITE link: f2.csv, chain: A->B->* (SITE) |
4911
|
|
|
|
|
|
|
6 1:B 1:B link: f2.csv, chain: A->B->* (B) |
4912
|
|
|
|
|
|
|
|
4913
|
|
|
|
|
|
|
=head2 field_add_computed |
4914
|
|
|
|
|
|
|
|
4915
|
|
|
|
|
|
|
$csv->field_add_computed($new_field, $subref); |
4916
|
|
|
|
|
|
|
|
4917
|
|
|
|
|
|
|
C<$new_field> is the name of the created field. |
4918
|
|
|
|
|
|
|
|
4919
|
|
|
|
|
|
|
C<$subref> is a reference to a sub that'll calculate the new field value. |
4920
|
|
|
|
|
|
|
|
4921
|
|
|
|
|
|
|
B<Return value> |
4922
|
|
|
|
|
|
|
|
4923
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4924
|
|
|
|
|
|
|
Returns undef if error. |
4925
|
|
|
|
|
|
|
|
4926
|
|
|
|
|
|
|
Add a field calculated from other fields values. The subref runs like this: |
4927
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
sub func { |
4929
|
|
|
|
|
|
|
# $new_field is the name of the field (allows to use one subref for more than one field |
4930
|
|
|
|
|
|
|
# calculation). |
4931
|
|
|
|
|
|
|
# $hr is a hash ref of fields values. |
4932
|
|
|
|
|
|
|
# $stats is a hash ref that gets printed (if Text::AutoCSV is created with verbose => 1) |
4933
|
|
|
|
|
|
|
# in the end. |
4934
|
|
|
|
|
|
|
my ($new_field, $hr, $stats) = @_; |
4935
|
|
|
|
|
|
|
|
4936
|
|
|
|
|
|
|
my $field_value; |
4937
|
|
|
|
|
|
|
# ... compute $field_value |
4938
|
|
|
|
|
|
|
|
4939
|
|
|
|
|
|
|
return $field_value; |
4940
|
|
|
|
|
|
|
} |
4941
|
|
|
|
|
|
|
|
4942
|
|
|
|
|
|
|
Example: |
4943
|
|
|
|
|
|
|
|
4944
|
|
|
|
|
|
|
# Add a field for the full name, made of the concatenation of the |
4945
|
|
|
|
|
|
|
# first name and the last name. |
4946
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1) |
4947
|
|
|
|
|
|
|
->field_add_computed('FULLNAME', \&calc_fn)->write(); |
4948
|
|
|
|
|
|
|
sub calc_fn { |
4949
|
|
|
|
|
|
|
my ($new_field, $hr, $stats) = @_; |
4950
|
|
|
|
|
|
|
die "Man, you are in serious trouble!" unless $new_field eq 'FULLNAME'; |
4951
|
|
|
|
|
|
|
my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'}); |
4952
|
|
|
|
|
|
|
$stats->{'empty full name'}++ if $fn eq ' '; |
4953
|
|
|
|
|
|
|
return $fn; |
4954
|
|
|
|
|
|
|
} |
4955
|
|
|
|
|
|
|
|
4956
|
|
|
|
|
|
|
=head2 field_add_copy |
4957
|
|
|
|
|
|
|
|
4958
|
|
|
|
|
|
|
$csv->field_add_copy($new_field, $src_field, $opt_subref); |
4959
|
|
|
|
|
|
|
|
4960
|
|
|
|
|
|
|
C<$new_field> if the name of the new field. |
4961
|
|
|
|
|
|
|
|
4962
|
|
|
|
|
|
|
C<$src_field> is the name of the field being copied. |
4963
|
|
|
|
|
|
|
|
4964
|
|
|
|
|
|
|
C<$opt_subref> is optional. It is a reference to a sub that takes one string (the value of |
4965
|
|
|
|
|
|
|
C<$src_field>) and returns a string (the value assigned to C<$new_field>). |
4966
|
|
|
|
|
|
|
|
4967
|
|
|
|
|
|
|
B<Return value> |
4968
|
|
|
|
|
|
|
|
4969
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4970
|
|
|
|
|
|
|
Returns undef if error. |
4971
|
|
|
|
|
|
|
|
4972
|
|
|
|
|
|
|
C<field_add_copy> is a special case of L</field_add_computed>. The advantage of C<field_add_copy> is |
4973
|
|
|
|
|
|
|
that it relies on a sub that is Text::AutoCSV "unaware", just taking one string as input and |
4974
|
|
|
|
|
|
|
returning another string as output. |
4975
|
|
|
|
|
|
|
|
4976
|
|
|
|
|
|
|
B<IMPORTANT> |
4977
|
|
|
|
|
|
|
|
4978
|
|
|
|
|
|
|
The current field value is passed to C<field_add_copy> in $_. |
4979
|
|
|
|
|
|
|
|
4980
|
|
|
|
|
|
|
A call to |
4981
|
|
|
|
|
|
|
|
4982
|
|
|
|
|
|
|
$csv->field_add_copy($new_field, $src_field, $subref); |
4983
|
|
|
|
|
|
|
|
4984
|
|
|
|
|
|
|
is equivalent to |
4985
|
|
|
|
|
|
|
|
4986
|
|
|
|
|
|
|
$csv->field_add_computed($new_field, \&subref2); |
4987
|
|
|
|
|
|
|
sub subref2 { |
4988
|
|
|
|
|
|
|
my (undef, $hr) = @_; |
4989
|
|
|
|
|
|
|
local $_ = $hr->{$src_field}; |
4990
|
|
|
|
|
|
|
return $subref->(); |
4991
|
|
|
|
|
|
|
} |
4992
|
|
|
|
|
|
|
|
4993
|
|
|
|
|
|
|
Example of a field copy + pass copied field in upper case and surround content with <<>>: |
4994
|
|
|
|
|
|
|
|
4995
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv'); |
4996
|
|
|
|
|
|
|
$csv->field_add_copy('UCLAST', 'LASTNAME', \&myfunc); |
4997
|
|
|
|
|
|
|
$csv->write(); |
4998
|
|
|
|
|
|
|
sub myfunc { s/^.*$/<<\U$&>>/; $_; } |
4999
|
|
|
|
|
|
|
|
5000
|
|
|
|
|
|
|
Note that the calls can be chained as most member functions return the object itself upon success. |
5001
|
|
|
|
|
|
|
The example above is equivalent to: |
5002
|
|
|
|
|
|
|
|
5003
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv') |
5004
|
|
|
|
|
|
|
->field_add_copy('UCLAST', 'LASTNAME', \&myfunc) |
5005
|
|
|
|
|
|
|
->write(); |
5006
|
|
|
|
|
|
|
sub myfunc { s/^.*$/<<\U$&>>/; $_; } |
5007
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
=head2 field_add_link |
5009
|
|
|
|
|
|
|
|
5010
|
|
|
|
|
|
|
$csv->field_add_link($new_field, $chain, $linked_file, \%opts); |
5011
|
|
|
|
|
|
|
|
5012
|
|
|
|
|
|
|
C<$new_field> is the name of the new field. |
5013
|
|
|
|
|
|
|
|
5014
|
|
|
|
|
|
|
C<$chain> is the CHAIN of the link, that is: 'LOCAL->REMOTE->PICK' where: |
5015
|
|
|
|
|
|
|
|
5016
|
|
|
|
|
|
|
C<LOCAL> is the field name to read the value from. |
5017
|
|
|
|
|
|
|
|
5018
|
|
|
|
|
|
|
C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file. |
5019
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
C<PICK> is the field from which to read the value of, in the record found by the search. This field |
5021
|
|
|
|
|
|
|
belongs to $linked_file. |
5022
|
|
|
|
|
|
|
|
5023
|
|
|
|
|
|
|
If $new_field is undef, the new field name is the name of the third field of $chain (PICK). |
5024
|
|
|
|
|
|
|
|
5025
|
|
|
|
|
|
|
C<$linked_file> is the name of the linked file, that gets read in a Text::AutoCSV object created |
5026
|
|
|
|
|
|
|
on-the-fly to do the search on. C<$linked_file> can also be a Text::AutoCSV object that you created |
5027
|
|
|
|
|
|
|
yourself, allowing for more flexibility. Example: |
5028
|
|
|
|
|
|
|
|
5029
|
|
|
|
|
|
|
my $lcsv = Text::AutoCSV->new(in_file => 'logins.csv', case => 1); |
5030
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', $lcsv); |
5031
|
|
|
|
|
|
|
|
5032
|
|
|
|
|
|
|
C<\%opts> is a hash ref of optional attributes. The same values can be provided as with vlookup. |
5033
|
|
|
|
|
|
|
|
5034
|
|
|
|
|
|
|
=over 4 |
5035
|
|
|
|
|
|
|
|
5036
|
|
|
|
|
|
|
=item trim |
5037
|
|
|
|
|
|
|
|
5038
|
|
|
|
|
|
|
If set to 1, searches will ignore leading and trailing spaces. That is, a C<LOCAL> value of ' x ' |
5039
|
|
|
|
|
|
|
will match with a C<REMOTE> value of 'x'. |
5040
|
|
|
|
|
|
|
|
5041
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object (default value: 1). |
5042
|
|
|
|
|
|
|
|
5043
|
|
|
|
|
|
|
Example: |
5044
|
|
|
|
|
|
|
|
5045
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5046
|
|
|
|
|
|
|
{ trim => 0 }); |
5047
|
|
|
|
|
|
|
|
5048
|
|
|
|
|
|
|
=item case |
5049
|
|
|
|
|
|
|
|
5050
|
|
|
|
|
|
|
If set to 1, searches will take the case into account. That is, a C<LOCAL> value of 'X' will B<not> |
5051
|
|
|
|
|
|
|
match with a C<REMOTE> value of 'x'. |
5052
|
|
|
|
|
|
|
|
5053
|
|
|
|
|
|
|
If option is not present, use L</search_case> attribute of object (default value: 0). |
5054
|
|
|
|
|
|
|
|
5055
|
|
|
|
|
|
|
Example: |
5056
|
|
|
|
|
|
|
|
5057
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5058
|
|
|
|
|
|
|
{ case => 1 }); |
5059
|
|
|
|
|
|
|
|
5060
|
|
|
|
|
|
|
=item ignore_empty |
5061
|
|
|
|
|
|
|
|
5062
|
|
|
|
|
|
|
If set to 1, empty values won't match. That is, a C<LOCAL> value of '' will not match with a |
5063
|
|
|
|
|
|
|
C<REMOTE> value of ''. |
5064
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_empty> attribute of object (default value: 1). |
5066
|
|
|
|
|
|
|
|
5067
|
|
|
|
|
|
|
Example: |
5068
|
|
|
|
|
|
|
|
5069
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5070
|
|
|
|
|
|
|
{ ignore_empty => 0 }); |
5071
|
|
|
|
|
|
|
|
5072
|
|
|
|
|
|
|
=item value_if_not_found |
5073
|
|
|
|
|
|
|
|
5074
|
|
|
|
|
|
|
If the searched value is not found, the value of the field is undef, that produces an empty string |
5075
|
|
|
|
|
|
|
at write time. Instead, you can specify the value. |
5076
|
|
|
|
|
|
|
|
5077
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object (default value: |
5078
|
|
|
|
|
|
|
undef). |
5079
|
|
|
|
|
|
|
|
5080
|
|
|
|
|
|
|
Example: |
5081
|
|
|
|
|
|
|
|
5082
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5083
|
|
|
|
|
|
|
{ value_if_not_found => '<not found!>' }); |
5084
|
|
|
|
|
|
|
|
5085
|
|
|
|
|
|
|
=item value_if_found |
5086
|
|
|
|
|
|
|
|
5087
|
|
|
|
|
|
|
If the searched value is found, you can specify the value to return. |
5088
|
|
|
|
|
|
|
|
5089
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_found> attribute of object (default value: none). |
5090
|
|
|
|
|
|
|
|
5091
|
|
|
|
|
|
|
B<NOTE> |
5092
|
|
|
|
|
|
|
|
5093
|
|
|
|
|
|
|
Although the C<PICK> field is ignored when using this option, you must specify it any way. |
5094
|
|
|
|
|
|
|
|
5095
|
|
|
|
|
|
|
Example: |
5096
|
|
|
|
|
|
|
|
5097
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5098
|
|
|
|
|
|
|
{ value_if_not_found => '0', value_if_found => '1' }); |
5099
|
|
|
|
|
|
|
|
5100
|
|
|
|
|
|
|
=item value_if_ambiguous |
5101
|
|
|
|
|
|
|
|
5102
|
|
|
|
|
|
|
If the searched value is found in more than one record, the value of the field is undef, that |
5103
|
|
|
|
|
|
|
produces an empty string at write time. Instead, you can specify the value. |
5104
|
|
|
|
|
|
|
|
5105
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_ambiguous> attribute of object (default value: |
5106
|
|
|
|
|
|
|
undef). |
5107
|
|
|
|
|
|
|
|
5108
|
|
|
|
|
|
|
Example: |
5109
|
|
|
|
|
|
|
|
5110
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5111
|
|
|
|
|
|
|
{ value_if_ambiguous => '<ambiguous!>' }); |
5112
|
|
|
|
|
|
|
|
5113
|
|
|
|
|
|
|
=item ignore_ambiguous |
5114
|
|
|
|
|
|
|
|
5115
|
|
|
|
|
|
|
Boolean value. If ignore_ambiguous is true and the searched value is found in more than one record, |
5116
|
|
|
|
|
|
|
then, silently fall back on returning the value of the first record. Obviously if |
5117
|
|
|
|
|
|
|
C<ignore_ambiguous> is true, then the value of L</value_if_ambiguous> is ignored. |
5118
|
|
|
|
|
|
|
|
5119
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1). |
5120
|
|
|
|
|
|
|
|
5121
|
|
|
|
|
|
|
Example: |
5122
|
|
|
|
|
|
|
|
5123
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5124
|
|
|
|
|
|
|
{ ignore_ambiguous => 1 }); |
5125
|
|
|
|
|
|
|
|
5126
|
|
|
|
|
|
|
Example with multiple options: |
5127
|
|
|
|
|
|
|
|
5128
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5129
|
|
|
|
|
|
|
{ value_if_not_found => '?', ignore_ambiguous => 1 }); |
5130
|
|
|
|
|
|
|
|
5131
|
|
|
|
|
|
|
=back |
5132
|
|
|
|
|
|
|
|
5133
|
|
|
|
|
|
|
B<Return value> |
5134
|
|
|
|
|
|
|
|
5135
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5136
|
|
|
|
|
|
|
Returns undef if error. |
5137
|
|
|
|
|
|
|
|
5138
|
|
|
|
|
|
|
Example of field_add_link usage: |
5139
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
my $nom_compose = 0; |
5141
|
|
|
|
|
|
|
my $zip_not_found = 0; |
5142
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk) |
5143
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read(); |
5144
|
|
|
|
|
|
|
sub walk { |
5145
|
|
|
|
|
|
|
my $hr = shift; |
5146
|
|
|
|
|
|
|
$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/; |
5147
|
|
|
|
|
|
|
$zip_not_found++ unless defined($hr->{'MYCITY'}); |
5148
|
|
|
|
|
|
|
} |
5149
|
|
|
|
|
|
|
print("Number of persons with a multi-part name: $nom_compose\n"); |
5150
|
|
|
|
|
|
|
print("Number of persons with unknown zipcode: $zip_not_found\n"); |
5151
|
|
|
|
|
|
|
|
5152
|
|
|
|
|
|
|
=head2 links |
5153
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
$csv->links($prefix, $chain, $linked_file, \%opts); |
5155
|
|
|
|
|
|
|
|
5156
|
|
|
|
|
|
|
C<$prefix> is the name to add to joined fields |
5157
|
|
|
|
|
|
|
|
5158
|
|
|
|
|
|
|
C<$chain> is the JOINCHAIN of the link, that is: 'LOCAL->REMOTE' where: |
5159
|
|
|
|
|
|
|
|
5160
|
|
|
|
|
|
|
C<LOCAL> is the field name to read the value from. |
5161
|
|
|
|
|
|
|
|
5162
|
|
|
|
|
|
|
C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file. |
5163
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
As opposed to L</field_add_link>, there is no C<PICK> part, as all fields of target are read. |
5165
|
|
|
|
|
|
|
|
5166
|
|
|
|
|
|
|
As opposed to Text::AutoCSV habits of croaking whenever a field name is duplicate, here, the |
5167
|
|
|
|
|
|
|
duplicates are resolved by appending _2 to the joined field name if it already exists. If _2 already |
5168
|
|
|
|
|
|
|
exists, too, then _3 is appended instead, and so on, until a non-duplicate is found. This mechanism |
5169
|
|
|
|
|
|
|
is executed given the difficulty to control all field names when joining CSVs. |
5170
|
|
|
|
|
|
|
|
5171
|
|
|
|
|
|
|
C<$linked_file> and C<\%opts> work exactly the same way as for L</field_add_link>, see |
5172
|
|
|
|
|
|
|
L</field_add_link> for help. |
5173
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
B<Return value> |
5175
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5177
|
|
|
|
|
|
|
Returns undef if error. |
5178
|
|
|
|
|
|
|
|
5179
|
|
|
|
|
|
|
B<NOTE> |
5180
|
|
|
|
|
|
|
|
5181
|
|
|
|
|
|
|
This function used to be called C<join> but got renamed to avoid clash with perl' builtin C<join>. |
5182
|
|
|
|
|
|
|
|
5183
|
|
|
|
|
|
|
Example: |
5184
|
|
|
|
|
|
|
|
5185
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', out_file => 'pers_with_city.csv') |
5186
|
|
|
|
|
|
|
->links('Read from zips.csv:', 'ZIP->ZIPCODE', 'zips.csv')->write(); |
5187
|
|
|
|
|
|
|
|
5188
|
|
|
|
|
|
|
=head2 get_in_encoding |
5189
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
my $enc = $csv->get_in_encoding(); |
5191
|
|
|
|
|
|
|
|
5192
|
|
|
|
|
|
|
Return the string of input encoding, for example 'latin2' or 'UTF-8', etc. |
5193
|
|
|
|
|
|
|
|
5194
|
|
|
|
|
|
|
=head2 get_in_file_disp |
5195
|
|
|
|
|
|
|
|
5196
|
|
|
|
|
|
|
my $f = $csv->get_in_file_disp(); |
5197
|
|
|
|
|
|
|
|
5198
|
|
|
|
|
|
|
Return the printable name of in_file. |
5199
|
|
|
|
|
|
|
|
5200
|
|
|
|
|
|
|
=head2 get_sep_char |
5201
|
|
|
|
|
|
|
|
5202
|
|
|
|
|
|
|
my $s = $csv->get_sep_char(); |
5203
|
|
|
|
|
|
|
|
5204
|
|
|
|
|
|
|
Return the string of the input CSV separator character, for example ',' or ';'. |
5205
|
|
|
|
|
|
|
|
5206
|
|
|
|
|
|
|
=head2 get_escape_char |
5207
|
|
|
|
|
|
|
|
5208
|
|
|
|
|
|
|
my $e = $csv->get_escape_char(); |
5209
|
|
|
|
|
|
|
|
5210
|
|
|
|
|
|
|
Return the string of the input escape character, for example '"' or '\\'. |
5211
|
|
|
|
|
|
|
|
5212
|
|
|
|
|
|
|
=head2 get_is_always_quoted |
5213
|
|
|
|
|
|
|
|
5214
|
|
|
|
|
|
|
my $a = $csv->get_is_always_quoted(); |
5215
|
|
|
|
|
|
|
|
5216
|
|
|
|
|
|
|
Return 1 if all fields of input are always quoted, 0 otherwise. |
5217
|
|
|
|
|
|
|
|
5218
|
|
|
|
|
|
|
=head2 get_coldata |
5219
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
my @cd = get_coldata(); |
5221
|
|
|
|
|
|
|
|
5222
|
|
|
|
|
|
|
Return an array that describes each column, from the first one (column 0) to the last. |
5223
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
Each element of the array is itself an array ref that contains 5 elements: |
5225
|
|
|
|
|
|
|
|
5226
|
|
|
|
|
|
|
0: Name of the field (as accessed in *_hr functions) |
5227
|
|
|
|
|
|
|
1: Content of the field in the header line (if input has a header line) |
5228
|
|
|
|
|
|
|
2: Column content type, shows some meta-data of fields created with field_add_* functions |
5229
|
|
|
|
|
|
|
3: Datetime format detected, if ever, in the format Strptime |
5230
|
|
|
|
|
|
|
4: Locale of DateTime format detected, if ever |
5231
|
|
|
|
|
|
|
5: Multiline field: '1' if not, 'm' if newlines encountered in the field |
5232
|
|
|
|
|
|
|
|
5233
|
|
|
|
|
|
|
=head2 get_pass_count |
5234
|
|
|
|
|
|
|
|
5235
|
|
|
|
|
|
|
my $n = $csv->get_pass_count(); |
5236
|
|
|
|
|
|
|
|
5237
|
|
|
|
|
|
|
Return the number of input readings done. Useful only if you're interested in Text::AutoCSV |
5238
|
|
|
|
|
|
|
internals. |
5239
|
|
|
|
|
|
|
|
5240
|
|
|
|
|
|
|
=head2 get_in_mem_record_count |
5241
|
|
|
|
|
|
|
|
5242
|
|
|
|
|
|
|
my $m = $csv->get_in_mem_record_count(); |
5243
|
|
|
|
|
|
|
|
5244
|
|
|
|
|
|
|
Return the number of records currently stored in-memory. Useful only if you're interested in |
5245
|
|
|
|
|
|
|
Text::AutoCSV internals. |
5246
|
|
|
|
|
|
|
|
5247
|
|
|
|
|
|
|
=head2 get_max_in_mem_record_count |
5248
|
|
|
|
|
|
|
|
5249
|
|
|
|
|
|
|
my $mm = $csv->get_max_in_mem_record_count(); |
5250
|
|
|
|
|
|
|
|
5251
|
|
|
|
|
|
|
Return the maximum number of records ever stored in-memory. Indeed this number can decrease: certain |
5252
|
|
|
|
|
|
|
functions like field_add* member-functions discard in-memory content. Useful only if you're |
5253
|
|
|
|
|
|
|
interested in Text::AutoCSV internals. |
5254
|
|
|
|
|
|
|
|
5255
|
|
|
|
|
|
|
=head2 get_fields_names |
5256
|
|
|
|
|
|
|
|
5257
|
|
|
|
|
|
|
my @f = $csv->get_fields_names(); |
5258
|
|
|
|
|
|
|
|
5259
|
|
|
|
|
|
|
Return an array of the internal names of the columns. |
5260
|
|
|
|
|
|
|
|
5261
|
|
|
|
|
|
|
=head2 get_field_name |
5262
|
|
|
|
|
|
|
|
5263
|
|
|
|
|
|
|
my $name = $csv->get_field_name($n); |
5264
|
|
|
|
|
|
|
|
5265
|
|
|
|
|
|
|
Return the C<$n>-th column name, the first column being number 0. |
5266
|
|
|
|
|
|
|
|
5267
|
|
|
|
|
|
|
Example: |
5268
|
|
|
|
|
|
|
|
5269
|
|
|
|
|
|
|
# Get the field name of the third column |
5270
|
|
|
|
|
|
|
my $col = $csv->get_field_name(2); |
5271
|
|
|
|
|
|
|
|
5272
|
|
|
|
|
|
|
=head2 get_stats |
5273
|
|
|
|
|
|
|
|
5274
|
|
|
|
|
|
|
my %stats = $csv->get_stats(); |
5275
|
|
|
|
|
|
|
|
5276
|
|
|
|
|
|
|
Certain callback functions provide a parameter to record event count: L</field_add_computed>, |
5277
|
|
|
|
|
|
|
L</read_post_update_hr>, L</walker_ar> and L</walker_hr>. By default, these stats are displayed if |
5278
|
|
|
|
|
|
|
Text::AutoCSV got created with attribute C<verbose =E<gt> 1>. get_stats() returns the statistics |
5279
|
|
|
|
|
|
|
hash of the object. |
5280
|
|
|
|
|
|
|
|
5281
|
|
|
|
|
|
|
B<IMPORTANT> |
5282
|
|
|
|
|
|
|
|
5283
|
|
|
|
|
|
|
As opposed to most functions that trigger input reading automatically (search functions and other |
5284
|
|
|
|
|
|
|
get_* functions), C<get_stats> just returns you the stats as it is, regardless of whether some |
5285
|
|
|
|
|
|
|
execution already occured. |
5286
|
|
|
|
|
|
|
|
5287
|
|
|
|
|
|
|
=head2 get_nb_rows |
5288
|
|
|
|
|
|
|
|
5289
|
|
|
|
|
|
|
my $nb_rows = $csv->get_nb_rows(); |
5290
|
|
|
|
|
|
|
|
5291
|
|
|
|
|
|
|
Gives the number of rows of the input. Does not trigger any reading - just provides the number of |
5292
|
|
|
|
|
|
|
rows as known at the moment of the call. If unknown, return undef. Typically, the number of rows is |
5293
|
|
|
|
|
|
|
known after doing the initial detection of CSV options (escape character, etc.), or, after doing one |
5294
|
|
|
|
|
|
|
complete reading. |
5295
|
|
|
|
|
|
|
|
5296
|
|
|
|
|
|
|
The header line counts for one row. |
5297
|
|
|
|
|
|
|
|
5298
|
|
|
|
|
|
|
B<IMPORTANT> |
5299
|
|
|
|
|
|
|
|
5300
|
|
|
|
|
|
|
As some fields can contain new lines, this number is not necessarily identical to the number of |
5301
|
|
|
|
|
|
|
lines. |
5302
|
|
|
|
|
|
|
|
5303
|
|
|
|
|
|
|
=head2 set_walker_ar |
5304
|
|
|
|
|
|
|
|
5305
|
|
|
|
|
|
|
$csv->set_walker_ar($subref); |
5306
|
|
|
|
|
|
|
|
5307
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</walker_ar> attribute. |
5308
|
|
|
|
|
|
|
C<set_walker_ar> allows to assign the attribute walker_ar after object creation. |
5309
|
|
|
|
|
|
|
|
5310
|
|
|
|
|
|
|
See attribute L</walker_ar> for help about the way C<$subref> should work. |
5311
|
|
|
|
|
|
|
|
5312
|
|
|
|
|
|
|
B<Return value> |
5313
|
|
|
|
|
|
|
|
5314
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5315
|
|
|
|
|
|
|
Returns undef if error. |
5316
|
|
|
|
|
|
|
|
5317
|
|
|
|
|
|
|
Example: |
5318
|
|
|
|
|
|
|
|
5319
|
|
|
|
|
|
|
# Calculate the total of the two first columns, the first column being money in and the |
5320
|
|
|
|
|
|
|
# second one being money out. |
5321
|
|
|
|
|
|
|
my ($actif, $passif) = (0, 0); |
5322
|
|
|
|
|
|
|
$csv->set_walker_ar(sub { my $ar = $_[0]; $actif += $ar->[0]; $passif += $ar->[1]; })->read(); |
5323
|
|
|
|
|
|
|
print("Actif = $actif\n"); |
5324
|
|
|
|
|
|
|
print("Passif = $passif\n"); |
5325
|
|
|
|
|
|
|
|
5326
|
|
|
|
|
|
|
=head2 set_walker_hr |
5327
|
|
|
|
|
|
|
|
5328
|
|
|
|
|
|
|
$csv->set_walker_hr($subref); |
5329
|
|
|
|
|
|
|
|
5330
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</walker_hr> attribute. |
5331
|
|
|
|
|
|
|
C<set_walker_hr> allows to assign the attribute L</walker_hr> after object creation. |
5332
|
|
|
|
|
|
|
|
5333
|
|
|
|
|
|
|
See attribute L</walker_hr> for help about the way C<$subref> should work. |
5334
|
|
|
|
|
|
|
|
5335
|
|
|
|
|
|
|
B<Return value> |
5336
|
|
|
|
|
|
|
|
5337
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5338
|
|
|
|
|
|
|
Returns undef if error. |
5339
|
|
|
|
|
|
|
|
5340
|
|
|
|
|
|
|
Example: |
5341
|
|
|
|
|
|
|
|
5342
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'directory.csv', verbose => 1); |
5343
|
|
|
|
|
|
|
|
5344
|
|
|
|
|
|
|
# ... |
5345
|
|
|
|
|
|
|
|
5346
|
|
|
|
|
|
|
$csv->set_walker_hr( |
5347
|
|
|
|
|
|
|
sub { |
5348
|
|
|
|
|
|
|
my ($hr, $stat) = @_; |
5349
|
|
|
|
|
|
|
$stat{'not capital name'}++, return if $hr->{'NAME'} ne uc($hr->{'NAME'}); |
5350
|
|
|
|
|
|
|
$stat{'name is capital letters'}++; |
5351
|
|
|
|
|
|
|
} |
5352
|
|
|
|
|
|
|
)->read(); |
5353
|
|
|
|
|
|
|
|
5354
|
|
|
|
|
|
|
=head2 set_out_file |
5355
|
|
|
|
|
|
|
|
5356
|
|
|
|
|
|
|
$csv->set_out_file($out_file); |
5357
|
|
|
|
|
|
|
|
5358
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</out_file> attribute. |
5359
|
|
|
|
|
|
|
C<set_out_file> allows to assign the attribute L</out_file> after object creation. It is set to |
5360
|
|
|
|
|
|
|
C<$out_file> value. |
5361
|
|
|
|
|
|
|
|
5362
|
|
|
|
|
|
|
B<Return value> |
5363
|
|
|
|
|
|
|
|
5364
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5365
|
|
|
|
|
|
|
Returns undef if error. |
5366
|
|
|
|
|
|
|
|
5367
|
|
|
|
|
|
|
Example: |
5368
|
|
|
|
|
|
|
|
5369
|
|
|
|
|
|
|
$csv->set_out_file('mycopy.csv')->write(); |
5370
|
|
|
|
|
|
|
|
5371
|
|
|
|
|
|
|
=head2 get_keys |
5372
|
|
|
|
|
|
|
|
5373
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5374
|
|
|
|
|
|
|
|
5375
|
|
|
|
|
|
|
Returns an array of all the record keys of input. A record key is a unique identifier that |
5376
|
|
|
|
|
|
|
designates the record. |
5377
|
|
|
|
|
|
|
|
5378
|
|
|
|
|
|
|
At the moment it is just an integer being the record number, the first one (that comes after the |
5379
|
|
|
|
|
|
|
header line) being of number 0. For example if $csv input is made of one header line and 3 records |
5380
|
|
|
|
|
|
|
(that is, a 4-line file typically, if no record contains a line break), $csv->get_keys() returns |
5381
|
|
|
|
|
|
|
|
5382
|
|
|
|
|
|
|
(0, 1, 2) |
5383
|
|
|
|
|
|
|
|
5384
|
|
|
|
|
|
|
B<IMPORTANT> |
5385
|
|
|
|
|
|
|
|
5386
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5387
|
|
|
|
|
|
|
|
5388
|
|
|
|
|
|
|
=head2 get_hr_all |
5389
|
|
|
|
|
|
|
|
5390
|
|
|
|
|
|
|
my @allin = $csv->get_hr_all(); |
5391
|
|
|
|
|
|
|
|
5392
|
|
|
|
|
|
|
Returns an array of all record contents of the input, each record being a hash ref. |
5393
|
|
|
|
|
|
|
|
5394
|
|
|
|
|
|
|
B<IMPORTANT> |
5395
|
|
|
|
|
|
|
|
5396
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5397
|
|
|
|
|
|
|
|
5398
|
|
|
|
|
|
|
=head2 get_row_ar |
5399
|
|
|
|
|
|
|
|
5400
|
|
|
|
|
|
|
my $row_ar = $csv->get_row_ar($record_key); |
5401
|
|
|
|
|
|
|
|
5402
|
|
|
|
|
|
|
Returns an array ref of the record designated by C<$record_key>. |
5403
|
|
|
|
|
|
|
|
5404
|
|
|
|
|
|
|
Example: |
5405
|
|
|
|
|
|
|
|
5406
|
|
|
|
|
|
|
# Get content (as array ref) of last record |
5407
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5408
|
|
|
|
|
|
|
my $lastk = $allkeys[-1]; |
5409
|
|
|
|
|
|
|
my $lastrec_ar = $csv->get_row_ar($lastk); |
5410
|
|
|
|
|
|
|
|
5411
|
|
|
|
|
|
|
B<IMPORTANT> |
5412
|
|
|
|
|
|
|
|
5413
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5414
|
|
|
|
|
|
|
|
5415
|
|
|
|
|
|
|
=head2 get_row_hr |
5416
|
|
|
|
|
|
|
|
5417
|
|
|
|
|
|
|
my $row_hr = $csv->get_row_hr($record_key); |
5418
|
|
|
|
|
|
|
|
5419
|
|
|
|
|
|
|
Returns a hash ref of the record designated by C<$record_key>. |
5420
|
|
|
|
|
|
|
|
5421
|
|
|
|
|
|
|
Example: |
5422
|
|
|
|
|
|
|
|
5423
|
|
|
|
|
|
|
# Get content (as hash ref) of first record |
5424
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5425
|
|
|
|
|
|
|
my $firstk = $allkeys[0]; |
5426
|
|
|
|
|
|
|
my $firstrec_hr = $csv->get_row_hr($firstk); |
5427
|
|
|
|
|
|
|
|
5428
|
|
|
|
|
|
|
B<IMPORTANT> |
5429
|
|
|
|
|
|
|
|
5430
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5431
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
=head2 get_cell |
5433
|
|
|
|
|
|
|
|
5434
|
|
|
|
|
|
|
my $val = $csv->get_cell($record_key, $field_name); |
5435
|
|
|
|
|
|
|
|
5436
|
|
|
|
|
|
|
Return the value of the cell designated by its record key (C<$record_key>) and field name |
5437
|
|
|
|
|
|
|
(C<$field_name>). |
5438
|
|
|
|
|
|
|
|
5439
|
|
|
|
|
|
|
Example: |
5440
|
|
|
|
|
|
|
|
5441
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5442
|
|
|
|
|
|
|
my $midk = $allkeys[int($#allkeys / 2)]; |
5443
|
|
|
|
|
|
|
my $midname = $csv->get_cell($midk, 'NAME'); |
5444
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
Note the above example is equivalent to: |
5446
|
|
|
|
|
|
|
|
5447
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5448
|
|
|
|
|
|
|
my $midk = $allkeys[int($#allkeys / 2)]; |
5449
|
|
|
|
|
|
|
my $midrec_hr = $csv->get_row_hr($midk); |
5450
|
|
|
|
|
|
|
my $midname = $midrec_hr->{'NAME'}; |
5451
|
|
|
|
|
|
|
|
5452
|
|
|
|
|
|
|
B<IMPORTANT> |
5453
|
|
|
|
|
|
|
|
5454
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5455
|
|
|
|
|
|
|
|
5456
|
|
|
|
|
|
|
=head2 get_values |
5457
|
|
|
|
|
|
|
|
5458
|
|
|
|
|
|
|
my @vals = $csv->get_values($field_name, $opt_filter_subref); |
5459
|
|
|
|
|
|
|
|
5460
|
|
|
|
|
|
|
Return an array made of the values of the given field name (C<$field_name>), for every records, in |
5461
|
|
|
|
|
|
|
the order of the records. |
5462
|
|
|
|
|
|
|
|
5463
|
|
|
|
|
|
|
C<$opt_filter_subref> is an optional subref. If defined, it is called with every values in turn (one |
5464
|
|
|
|
|
|
|
call per value) and only values for which C<$opt_filter_subref> returned True are included in the |
5465
|
|
|
|
|
|
|
returned array. Call to C<$opt_filter_subref> is done with $_ to pass the value. |
5466
|
|
|
|
|
|
|
|
5467
|
|
|
|
|
|
|
Example: |
5468
|
|
|
|
|
|
|
|
5469
|
|
|
|
|
|
|
my @logins = $csv->get_values('LOGIN"); |
5470
|
|
|
|
|
|
|
|
5471
|
|
|
|
|
|
|
This is equivalent to: |
5472
|
|
|
|
|
|
|
|
5473
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5474
|
|
|
|
|
|
|
my @logins; |
5475
|
|
|
|
|
|
|
push @logins, $csv->get_cell($_, 'LOGIN') for (@allkeys); |
5476
|
|
|
|
|
|
|
|
5477
|
|
|
|
|
|
|
Example bis |
5478
|
|
|
|
|
|
|
|
5479
|
|
|
|
|
|
|
# @badlogins is the list of logins that contain non alphanumeric characters |
5480
|
|
|
|
|
|
|
my @badlogins = Text::AutoCSV->new(in_file => 'logins.csv') |
5481
|
|
|
|
|
|
|
->get_values('LOGIN', sub { m/[^a-z0-9]/ }); |
5482
|
|
|
|
|
|
|
|
5483
|
|
|
|
|
|
|
This is equivalent to: |
5484
|
|
|
|
|
|
|
|
5485
|
|
|
|
|
|
|
# @badlogins is the list of logins that contain non alphanumeric characters |
5486
|
|
|
|
|
|
|
# This method leads to carrying all values of a given field across function calls... |
5487
|
|
|
|
|
|
|
my @badlogins = grep { m/[^a-z0-9]/ } ( |
5488
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'logins.csv')->get_values('LOGIN') |
5489
|
|
|
|
|
|
|
); |
5490
|
|
|
|
|
|
|
|
5491
|
|
|
|
|
|
|
B<IMPORTANT> |
5492
|
|
|
|
|
|
|
|
5493
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5494
|
|
|
|
|
|
|
|
5495
|
|
|
|
|
|
|
=head2 get_recnum |
5496
|
|
|
|
|
|
|
|
5497
|
|
|
|
|
|
|
my $r = $csv->get_recnum(); |
5498
|
|
|
|
|
|
|
|
5499
|
|
|
|
|
|
|
Returns the current record identifier, if a reading is in progress. If no read is in progress, |
5500
|
|
|
|
|
|
|
return undef. |
5501
|
|
|
|
|
|
|
|
5502
|
|
|
|
|
|
|
=head2 in_map |
5503
|
|
|
|
|
|
|
|
5504
|
|
|
|
|
|
|
=head2 read_update_after |
5505
|
|
|
|
|
|
|
|
5506
|
|
|
|
|
|
|
C<read_update_after> is an alias of C<in_map>. |
5507
|
|
|
|
|
|
|
|
5508
|
|
|
|
|
|
|
$csv->in_map($field, $subref); |
5509
|
|
|
|
|
|
|
|
5510
|
|
|
|
|
|
|
After reading a record from input, update C<$field> by calling C<$subref>. The value is put in |
5511
|
|
|
|
|
|
|
C<$_>. Then the field value is set to the return value of C<$subref>. |
5512
|
|
|
|
|
|
|
|
5513
|
|
|
|
|
|
|
This feature is originally meant to manage DateTime fields: the input and output CSVs carry text |
5514
|
|
|
|
|
|
|
content, and in-between, the values dealt with are DateTime objects. |
5515
|
|
|
|
|
|
|
|
5516
|
|
|
|
|
|
|
See L</out_map> for an example. |
5517
|
|
|
|
|
|
|
|
5518
|
|
|
|
|
|
|
=head2 out_map |
5519
|
|
|
|
|
|
|
|
5520
|
|
|
|
|
|
|
=head2 write_update_before |
5521
|
|
|
|
|
|
|
|
5522
|
|
|
|
|
|
|
C<write_update_before> is an alias of C<out_map>. |
5523
|
|
|
|
|
|
|
|
5524
|
|
|
|
|
|
|
$csv->out_map($field, $subref); |
5525
|
|
|
|
|
|
|
|
5526
|
|
|
|
|
|
|
Before writing C<$field> field content into the output file, pass it through C<out_map>. The value |
5527
|
|
|
|
|
|
|
is put in C<$_>. Then the return value of C<$subref> is written in the output. |
5528
|
|
|
|
|
|
|
|
5529
|
|
|
|
|
|
|
Example: |
5530
|
|
|
|
|
|
|
|
5531
|
|
|
|
|
|
|
Suppose you have a CSV file with the convention that a number surrounded by parenthesis is negative. |
5532
|
|
|
|
|
|
|
You can register corresponding L</in_map> and L</out_map> functions. During the processing of data, |
5533
|
|
|
|
|
|
|
the field content will be just a number (positive or negative), while in input and in output, it'll |
5534
|
|
|
|
|
|
|
follow the "negative under parenthesis" convention. |
5535
|
|
|
|
|
|
|
|
5536
|
|
|
|
|
|
|
In the below example, we rely on convention above and add a new field converted from the original |
5537
|
|
|
|
|
|
|
one, that follows the same convention. |
5538
|
|
|
|
|
|
|
|
5539
|
|
|
|
|
|
|
sub in_updt { |
5540
|
|
|
|
|
|
|
return 0 if !defined($_) or $_ eq ''; |
5541
|
|
|
|
|
|
|
my $i; |
5542
|
|
|
|
|
|
|
return -$i if ($i) = $_ =~ m/^\((.*)\)$/; |
5543
|
|
|
|
|
|
|
$_; |
5544
|
|
|
|
|
|
|
} |
5545
|
|
|
|
|
|
|
sub out_updt { |
5546
|
|
|
|
|
|
|
return '' unless defined($_); |
5547
|
|
|
|
|
|
|
return '(' . (-$_) . ')' if $_ < 0; |
5548
|
|
|
|
|
|
|
$_; |
5549
|
|
|
|
|
|
|
} |
5550
|
|
|
|
|
|
|
sub convert { |
5551
|
|
|
|
|
|
|
return ; |
5552
|
|
|
|
|
|
|
} |
5553
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'trans-euros.csv', out_file => 'trans-devises.csv') |
5554
|
|
|
|
|
|
|
->in_map('EUROS', \&in_updt) |
5555
|
|
|
|
|
|
|
->out_map('EUROS', \&out_updt) |
5556
|
|
|
|
|
|
|
->out_map('DEVISE', \&out_updt) |
5557
|
|
|
|
|
|
|
->field_add_copy('DEVISE', 'EUROS', sub { sprintf("%.2f", $_ * 1.141593); } ) |
5558
|
|
|
|
|
|
|
->write(); |
5559
|
|
|
|
|
|
|
|
5560
|
|
|
|
|
|
|
=head2 search |
5561
|
|
|
|
|
|
|
|
5562
|
|
|
|
|
|
|
my $found_ar = $csv->search($field_name, $value, \%opts); |
5563
|
|
|
|
|
|
|
|
5564
|
|
|
|
|
|
|
Returns an array ref of all records keys where the field C<$field_name> has the value C<$value>. |
5565
|
|
|
|
|
|
|
|
5566
|
|
|
|
|
|
|
C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options. |
5567
|
|
|
|
|
|
|
|
5568
|
|
|
|
|
|
|
B<IMPORTANT> |
5569
|
|
|
|
|
|
|
|
5570
|
|
|
|
|
|
|
An unsuccessful search returns an empty array ref, that is, [ ]. Thus you B<cannot> check for |
5571
|
|
|
|
|
|
|
definedness of C<search> return value to know whether or not the search found something. |
5572
|
|
|
|
|
|
|
|
5573
|
|
|
|
|
|
|
On the other hand, you can always examine the value C<search(...)-E<gt>[0]>, as search is always an |
5574
|
|
|
|
|
|
|
array ref. If the search found nothing, then, C<search(...)-E<gt>[0]> is not defined. |
5575
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
B<IMPORTANT bis> |
5577
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5579
|
|
|
|
|
|
|
|
5580
|
|
|
|
|
|
|
Example: |
5581
|
|
|
|
|
|
|
|
5582
|
|
|
|
|
|
|
my $linux_os_keys_ar = $csv->search('OS', 'linux'); |
5583
|
|
|
|
|
|
|
|
5584
|
|
|
|
|
|
|
=head2 search_1hr |
5585
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
my $found_hr = $csv->search_1hr($field_name, $value, \%opts); |
5587
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
Returns a hash ref of the first record where the field C<$field_name> has the value C<$value>. |
5589
|
|
|
|
|
|
|
|
5590
|
|
|
|
|
|
|
C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options. |
5591
|
|
|
|
|
|
|
|
5592
|
|
|
|
|
|
|
Note the options L</value_if_not_found> and L</value_if_ambiguous> are ignored. If not found, return |
5593
|
|
|
|
|
|
|
undef. If the result is ambiguous (more than one record found) and ignore_ambiguous is set to a |
5594
|
|
|
|
|
|
|
false value, return undef. |
5595
|
|
|
|
|
|
|
|
5596
|
|
|
|
|
|
|
The other options are taken into account as for any search: L</ignore_ambiguous>, L</trim>, |
5597
|
|
|
|
|
|
|
L</case>, L</ignore_empty>. |
5598
|
|
|
|
|
|
|
|
5599
|
|
|
|
|
|
|
B<IMPORTANT> |
5600
|
|
|
|
|
|
|
|
5601
|
|
|
|
|
|
|
As opposed to L</search>, an unsuccessful C<search_1hr> will return C<undef>. |
5602
|
|
|
|
|
|
|
|
5603
|
|
|
|
|
|
|
B<IMPORTANT bis> |
5604
|
|
|
|
|
|
|
|
5605
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5606
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
Example: |
5608
|
|
|
|
|
|
|
|
5609
|
|
|
|
|
|
|
my $hr = $csv->search_1hr('LOGIN', $login); |
5610
|
|
|
|
|
|
|
my $full_name = $hr->{'FIRSTNAME'} . ' ' . $hr->{'LASTNAME'}; |
5611
|
|
|
|
|
|
|
|
5612
|
|
|
|
|
|
|
=head2 vlookup |
5613
|
|
|
|
|
|
|
|
5614
|
|
|
|
|
|
|
my $val = $csv->vlookup($searched_field, $value, $target_field, \%opts); |
5615
|
|
|
|
|
|
|
|
5616
|
|
|
|
|
|
|
Find the first record where C<$searched_field> contains C<$value> and out of this record, returns |
5617
|
|
|
|
|
|
|
the value of C<$target_field>. |
5618
|
|
|
|
|
|
|
|
5619
|
|
|
|
|
|
|
C<\%opts> is optional. It is a hash of options for C<vlookup>: |
5620
|
|
|
|
|
|
|
|
5621
|
|
|
|
|
|
|
=over 4 |
5622
|
|
|
|
|
|
|
|
5623
|
|
|
|
|
|
|
=item trim |
5624
|
|
|
|
|
|
|
|
5625
|
|
|
|
|
|
|
If true, ignore spaces before and after the values to search. |
5626
|
|
|
|
|
|
|
|
5627
|
|
|
|
|
|
|
If option is not present, use L</search_trim> attribute of object (default value: 1). |
5628
|
|
|
|
|
|
|
|
5629
|
|
|
|
|
|
|
=item case |
5630
|
|
|
|
|
|
|
|
5631
|
|
|
|
|
|
|
If true, do case sensitive searches. |
5632
|
|
|
|
|
|
|
|
5633
|
|
|
|
|
|
|
If option is not present, use L</search_case> attribute of object (default value: 0). |
5634
|
|
|
|
|
|
|
|
5635
|
|
|
|
|
|
|
=item ignore_empty |
5636
|
|
|
|
|
|
|
|
5637
|
|
|
|
|
|
|
If true, ignore empty values in the search. The consequence is that you won't be able to find |
5638
|
|
|
|
|
|
|
empty values by searching it. |
5639
|
|
|
|
|
|
|
|
5640
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_empty> attribute of object (default value: 1). |
5641
|
|
|
|
|
|
|
|
5642
|
|
|
|
|
|
|
=item ignore_accents |
5643
|
|
|
|
|
|
|
|
5644
|
|
|
|
|
|
|
If true, ignore accents in searches. For exampe, if C<ignore_accents> is set, a string like |
5645
|
|
|
|
|
|
|
"élémentaire" will match "elementaire". |
5646
|
|
|
|
|
|
|
|
5647
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_accents> attribute of object (default value: 1). |
5648
|
|
|
|
|
|
|
|
5649
|
|
|
|
|
|
|
B<NOTE> |
5650
|
|
|
|
|
|
|
|
5651
|
|
|
|
|
|
|
This option uses the function L</remove_accents> to build its internal hash tables. See |
5652
|
|
|
|
|
|
|
L</remove_accents> help for more details. |
5653
|
|
|
|
|
|
|
|
5654
|
|
|
|
|
|
|
=item value_if_not_found |
5655
|
|
|
|
|
|
|
|
5656
|
|
|
|
|
|
|
Return value if vlookup finds nothing. |
5657
|
|
|
|
|
|
|
|
5658
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object (default value: |
5659
|
|
|
|
|
|
|
undef). |
5660
|
|
|
|
|
|
|
|
5661
|
|
|
|
|
|
|
=item value_if_found |
5662
|
|
|
|
|
|
|
|
5663
|
|
|
|
|
|
|
Return value if vlookup finds something. |
5664
|
|
|
|
|
|
|
|
5665
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_found> attribute of object (default value: none). |
5666
|
|
|
|
|
|
|
|
5667
|
|
|
|
|
|
|
This option is to just check whether a value exists, regardless of the target value found. |
5668
|
|
|
|
|
|
|
|
5669
|
|
|
|
|
|
|
B<NOTE> |
5670
|
|
|
|
|
|
|
|
5671
|
|
|
|
|
|
|
Although the B<$target_field> is ignored when using this option, you must specify it any way. |
5672
|
|
|
|
|
|
|
|
5673
|
|
|
|
|
|
|
=item value_if_ambiguous |
5674
|
|
|
|
|
|
|
|
5675
|
|
|
|
|
|
|
Return value if vlookup find more than one result. Tune it only if ignore_ambiguous is unset. |
5676
|
|
|
|
|
|
|
|
5677
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_ambiguous> attribute of object (default value: |
5678
|
|
|
|
|
|
|
undef). |
5679
|
|
|
|
|
|
|
|
5680
|
|
|
|
|
|
|
=item ignore_ambiguous |
5681
|
|
|
|
|
|
|
|
5682
|
|
|
|
|
|
|
If true, then if more than one result is found, silently return the first one. |
5683
|
|
|
|
|
|
|
|
5684
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1). |
5685
|
|
|
|
|
|
|
|
5686
|
|
|
|
|
|
|
=back |
5687
|
|
|
|
|
|
|
|
5688
|
|
|
|
|
|
|
B<IMPORTANT> |
5689
|
|
|
|
|
|
|
|
5690
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5691
|
|
|
|
|
|
|
|
5692
|
|
|
|
|
|
|
Example: |
5693
|
|
|
|
|
|
|
|
5694
|
|
|
|
|
|
|
my $name = $csv->vlookup('LOGIN', $id, 'NAME', { value_if_not_found => '<login not found>' }); |
5695
|
|
|
|
|
|
|
|
5696
|
|
|
|
|
|
|
=head2 remove_accents |
5697
|
|
|
|
|
|
|
|
5698
|
|
|
|
|
|
|
my $t = $csv->remove_accents($s); |
5699
|
|
|
|
|
|
|
|
5700
|
|
|
|
|
|
|
Take the string C<$s> as argument and return the string without accents. Uses a Unicode |
5701
|
|
|
|
|
|
|
decomposition followed by removal of every characters that have the Unicode property |
5702
|
|
|
|
|
|
|
C<Nonspacing_Mark>. |
5703
|
|
|
|
|
|
|
|
5704
|
|
|
|
|
|
|
B<NOTE> |
5705
|
|
|
|
|
|
|
|
5706
|
|
|
|
|
|
|
Only accents are removed. It is not a C<whatever-encoding -E<gt> us-ascii> conversion. For example, |
5707
|
|
|
|
|
|
|
the French B<Å> character (o followed by e) or the German B<Ã> (eszett) are kept as is. |
5708
|
|
|
|
|
|
|
|
5709
|
|
|
|
|
|
|
B<NOTE bis> |
5710
|
|
|
|
|
|
|
|
5711
|
|
|
|
|
|
|
Tested with some latin1 and latin2 characters. |
5712
|
|
|
|
|
|
|
|
5713
|
|
|
|
|
|
|
B<NOTE ter> |
5714
|
|
|
|
|
|
|
|
5715
|
|
|
|
|
|
|
There is no language-level transformation during accents removal. For example B<Jürgen> is returned |
5716
|
|
|
|
|
|
|
as B<Jurgen>, not B<Juergen>. |
5717
|
|
|
|
|
|
|
|
5718
|
|
|
|
|
|
|
This function is not exported by default. |
5719
|
|
|
|
|
|
|
|
5720
|
|
|
|
|
|
|
Example: |
5721
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
use Text::AutoCSV qw(remove_accents); |
5723
|
|
|
|
|
|
|
my $s = remove_accents("Français: être élémentaire, Tchèque: služba dům"); |
5724
|
|
|
|
|
|
|
die "This script will never die" if $s ne 'Francais: etre elementaire, Tcheque: sluzba dum'; |
5725
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
=head1 AUTHOR |
5727
|
|
|
|
|
|
|
|
5728
|
|
|
|
|
|
|
Sébastien Millet <milletseb@laposte.net> |
5729
|
|
|
|
|
|
|
|
5730
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
5731
|
|
|
|
|
|
|
|
5732
|
|
|
|
|
|
|
This software is copyright (c) 2016, 2017 by Sébastien Millet. |
5733
|
|
|
|
|
|
|
|
5734
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
5735
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
5736
|
|
|
|
|
|
|
|
5737
|
|
|
|
|
|
|
=cut |