line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
2
|
|
|
|
|
|
|
# ABSTRACT: helper module to automate the use of Text::CSV |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# vim:tw=100 |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# Text/AutoCSV.pm |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Written by Sébastien Millet |
10
|
|
|
|
|
|
|
# March, July, August, September 2016 |
11
|
|
|
|
|
|
|
# January, February 2017 |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
package Text::AutoCSV; |
15
|
|
|
|
|
|
|
$Text::AutoCSV::VERSION = '1.1.8'; |
16
|
|
|
|
|
|
|
my $PKG = "Text::AutoCSV"; |
17
|
|
|
|
|
|
|
|
18
|
19
|
|
|
19
|
|
952532
|
use strict; |
|
19
|
|
|
|
|
48
|
|
|
19
|
|
|
|
|
468
|
|
19
|
16
|
|
|
16
|
|
80
|
use warnings; |
|
16
|
|
|
|
|
32
|
|
|
16
|
|
|
|
|
851
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
require Exporter; |
22
|
|
|
|
|
|
|
our @ISA = 'Exporter'; |
23
|
|
|
|
|
|
|
our @EXPORT_OK = qw(remove_accents); |
24
|
|
|
|
|
|
|
|
25
|
16
|
|
|
16
|
|
88
|
use Carp; |
|
16
|
|
|
|
|
41
|
|
|
16
|
|
|
|
|
877
|
|
26
|
16
|
|
|
16
|
|
7773
|
use Params::Validate qw(validate validate_pos :types); |
|
16
|
|
|
|
|
126107
|
|
|
16
|
|
|
|
|
3315
|
|
27
|
16
|
|
|
16
|
|
7961
|
use List::MoreUtils qw(first_index indexes); |
|
16
|
|
|
|
|
117145
|
|
|
16
|
|
|
|
|
173
|
|
28
|
16
|
|
|
16
|
|
12604
|
use Fcntl qw(SEEK_SET); |
|
16
|
|
|
|
|
43
|
|
|
16
|
|
|
|
|
828
|
|
29
|
16
|
|
|
16
|
|
7530
|
use File::BOM; |
|
16
|
|
|
|
|
421354
|
|
|
16
|
|
|
|
|
1003
|
|
30
|
16
|
|
|
16
|
|
10630
|
use Text::CSV; |
|
16
|
|
|
|
|
243550
|
|
|
16
|
|
|
|
|
827
|
|
31
|
16
|
|
|
16
|
|
12023
|
use DateTime; |
|
16
|
|
|
|
|
7394925
|
|
|
16
|
|
|
|
|
869
|
|
32
|
|
|
|
|
|
|
# DateTime::Format::Strptime 1.70 does not work properly with us. |
33
|
|
|
|
|
|
|
# Actually all version as of 1.63 are fine, except 1.70. |
34
|
16
|
|
|
16
|
|
10948
|
use DateTime::Format::Strptime 1.71; |
|
16
|
|
|
|
|
956979
|
|
|
16
|
|
|
|
|
145
|
|
35
|
16
|
|
|
16
|
|
11087
|
use Class::Struct; |
|
16
|
|
|
|
|
25547
|
|
|
16
|
|
|
|
|
114
|
|
36
|
16
|
|
|
16
|
|
10377
|
use Unicode::Normalize; |
|
16
|
|
|
|
|
28388
|
|
|
16
|
|
|
|
|
1065
|
|
37
|
|
|
|
|
|
|
# lock_keys is used to prevent accessing non existing keys |
38
|
|
|
|
|
|
|
# Credits: 3381159 on http://stackoverflow.com |
39
|
|
|
|
|
|
|
# "make perl shout when trying to access undefined hash key" |
40
|
16
|
|
|
16
|
|
10330
|
use Hash::Util qw(lock_keys); |
|
16
|
|
|
|
|
58558
|
|
|
16
|
|
|
|
|
106
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# FIXME |
43
|
|
|
|
|
|
|
# Not needed in release -> should be always commented unless at dev time |
44
|
|
|
|
|
|
|
#use feature qw(say); |
45
|
|
|
|
|
|
|
#use Data::Dumper; |
46
|
|
|
|
|
|
|
#$Data::Dumper::Sortkeys = 1; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# Set to 1 if you wish to turn on debug without touching caller's code |
49
|
|
|
|
|
|
|
our $ALWAYS_DEBUG = 0; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Keep it set to 0 unless you know what you're doing! |
52
|
|
|
|
|
|
|
# Note |
53
|
|
|
|
|
|
|
# Taken into account only if debug is set. |
54
|
|
|
|
|
|
|
my $DEBUG_DATETIME_FORMATS = 0; |
55
|
|
|
|
|
|
|
# The below is taken into account only if $DEBUG_DATETIME_FORMATS is set. |
56
|
|
|
|
|
|
|
# It'll resqult in even more debug output. It becomes really MASSIVE debug output. |
57
|
|
|
|
|
|
|
my $DEBUG_DATETIME_FORMATS_EVEN_MORE = 0; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# |
60
|
|
|
|
|
|
|
# Uncomment to replace carp and croak with cluck and confess, respectively |
61
|
|
|
|
|
|
|
# Also reachable with perl option: |
62
|
|
|
|
|
|
|
# -MCarp=verbose |
63
|
|
|
|
|
|
|
# See 'perldoc Carp'. |
64
|
|
|
|
|
|
|
# |
65
|
|
|
|
|
|
|
#$Carp::Verbose = 1; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# * *************** * |
69
|
|
|
|
|
|
|
# * BEHAVIOR TUNING * |
70
|
|
|
|
|
|
|
# * *************** * |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# * **************************************************** * |
74
|
|
|
|
|
|
|
# * ALL THE VARIABLES BELOW ARE RATHER LOW LEVEL. * |
75
|
|
|
|
|
|
|
# * IF YOU UPDATE IT, IT WILL LIKELY BREAK THE TEST PLAN * |
76
|
|
|
|
|
|
|
# * **************************************************** * |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
my $DEF_SEARCH_CASE = 0; # Case insensitive search by default |
79
|
|
|
|
|
|
|
my $DEF_SEARCH_TRIM = 1; # Trim values by default |
80
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_ACCENTS = 1; # Ignore accents |
81
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_EMPTY = 1; # Ignore empty strings in searches by default |
82
|
|
|
|
|
|
|
my $DEF_SEARCH_VALUE_IF_NOT_FOUND = undef; # If not found, returned field value is undef |
83
|
|
|
|
|
|
|
my $DEF_SEARCH_VALUE_IF_AMBIGUOUS = undef; # If more than one record found by search (when a |
84
|
|
|
|
|
|
|
# unique value is expected), return undef |
85
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_AMBIGUOUS = 1; # By default, ignore the fact that multiple records are |
86
|
|
|
|
|
|
|
# found by search and return the first record found |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $DETECT_ENCODING = 1; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my $DEFAULT_IN_ENCODING = 'UTF-8,latin1'; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# By default, input encoding detected is used for output. |
93
|
|
|
|
|
|
|
# -> the constant below is used if and only if: |
94
|
|
|
|
|
|
|
# Inbound encoding is unknown |
95
|
|
|
|
|
|
|
# No providing of out_encoding attribute (out_encoding takes precedence when provided) |
96
|
|
|
|
|
|
|
my $DEFAULT_OUT_ENCODING = 'UTF-8'; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
my $DEFAULT_ESCAPE_CHAR = '\\'; |
99
|
|
|
|
|
|
|
my $DEFAULT_QUOTE_CHAR = '"'; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# |
102
|
|
|
|
|
|
|
# The code that workarounds $FIX_PERLMONKS_823214 (see below) makes sense only under plain |
103
|
|
|
|
|
|
|
# Windows. |
104
|
|
|
|
|
|
|
# |
105
|
|
|
|
|
|
|
# "Plain" Windows? |
106
|
|
|
|
|
|
|
# This code MUST NOT be executed under cygwin because cygwin uses unix line breaks. This is |
107
|
|
|
|
|
|
|
# why we detect /mswin/. Would we detect /win/, we'd catch cygwin, too, and we don't want |
108
|
|
|
|
|
|
|
# that. |
109
|
|
|
|
|
|
|
# |
110
|
|
|
|
|
|
|
my $OS_IS_PLAIN_WINDOWS = !! ($^O =~ /mswin/i); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# |
113
|
|
|
|
|
|
|
# Shall we fix the issue reported as #823214 in PerlMonks? See |
114
|
|
|
|
|
|
|
# http://www.perlmonks.org/?node_id=823214 |
115
|
|
|
|
|
|
|
# |
116
|
|
|
|
|
|
|
# In brief (in case the link above would be broken one day): |
117
|
|
|
|
|
|
|
# Under Windows, output mode set to UTF-16LE produces line breaks made of octets "0d 0a 00", |
118
|
|
|
|
|
|
|
# whereas it should be "0d 00 0a 00". |
119
|
|
|
|
|
|
|
# |
120
|
|
|
|
|
|
|
# The code also fixes UTF-16BE (but it was not tested). |
121
|
|
|
|
|
|
|
# |
122
|
|
|
|
|
|
|
my $FIX_PERLMONKS_823214 = 1; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# * **** * |
126
|
|
|
|
|
|
|
# * CODE * |
127
|
|
|
|
|
|
|
# * **** * |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub ERR_UNKNOWN_FIELD() { 0 } |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# Store meta-data about each column |
133
|
|
|
|
|
|
|
struct ColData => { |
134
|
|
|
|
|
|
|
field_name => '$', |
135
|
|
|
|
|
|
|
header_text => '$', |
136
|
|
|
|
|
|
|
description => '$', |
137
|
|
|
|
|
|
|
dt_format => '$', |
138
|
|
|
|
|
|
|
dt_locale => '$' |
139
|
|
|
|
|
|
|
}; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# |
142
|
|
|
|
|
|
|
# Enumeration of ef_type member below |
143
|
|
|
|
|
|
|
# Alternative: |
144
|
|
|
|
|
|
|
# use enum (...) |
145
|
|
|
|
|
|
|
# |
146
|
|
|
|
|
|
|
# But it is not also by default on my distro and installing a package for 3 constants, I find it |
147
|
|
|
|
|
|
|
# a bit overkill! |
148
|
|
|
|
|
|
|
# |
149
|
|
|
|
|
|
|
my ($EF_LINK, $EF_FUNC, $EF_COPY) = 0..2; |
150
|
|
|
|
|
|
|
struct ExtraField => { |
151
|
|
|
|
|
|
|
ef_type => '$', |
152
|
|
|
|
|
|
|
self_name => '$', |
153
|
|
|
|
|
|
|
description => '$', |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
check_field_existence => '$', |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# For when ef_type is set to $EF_LINK |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
link_self_search => '$', |
160
|
|
|
|
|
|
|
link_remote_obj => '$', |
161
|
|
|
|
|
|
|
link_remote_search => '$', |
162
|
|
|
|
|
|
|
link_remote_read => '$', |
163
|
|
|
|
|
|
|
link_vlookup_opts => '%', |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# For when ef_type is set to $EF_FUNC |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
func_sub => '$', |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# For when ef_type is set to $EF_COPY |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
copy_source => '$', |
172
|
|
|
|
|
|
|
copy_sub => '$' |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
}; |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
my $SEARCH_VALIDATE_OPTIONS = { |
177
|
|
|
|
|
|
|
value_if_not_found => {type => UNDEF | SCALAR, optional => 1}, |
178
|
|
|
|
|
|
|
value_if_found => {type => UNDEF | SCALAR, optional => 1}, |
179
|
|
|
|
|
|
|
value_if_ambiguous => {type => UNDEF | SCALAR, optional => 1}, |
180
|
|
|
|
|
|
|
ignore_ambiguous => {type => BOOLEAN, optional => 1}, |
181
|
|
|
|
|
|
|
case => {type => BOOLEAN, optional => 1}, |
182
|
|
|
|
|
|
|
trim => {type => BOOLEAN, optional => 1}, |
183
|
|
|
|
|
|
|
ignore_empty => {type => BOOLEAN, optional => 1}, |
184
|
|
|
|
|
|
|
ignore_accents => {type => BOOLEAN, optional => 1} |
185
|
|
|
|
|
|
|
}; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub _is_utf8 { |
188
|
416
|
|
|
416
|
|
1007
|
my $e = shift; |
189
|
|
|
|
|
|
|
|
190
|
416
|
100
|
|
|
|
3836
|
return 1 if $e =~ m/^(utf-?8|ucs-?8)/i; |
191
|
32
|
|
|
|
|
101
|
return 0; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# To replace // in old perls: return the first non-undef value in provided list |
195
|
|
|
|
|
|
|
sub _get_def { |
196
|
12004
|
|
|
12004
|
|
25387
|
for (@_) { |
197
|
18591
|
100
|
|
|
|
53806
|
return $_ if defined($_); |
198
|
|
|
|
|
|
|
} |
199
|
882
|
|
|
|
|
1896
|
return undef; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub _print { |
203
|
15
|
|
|
15
|
|
32
|
my $self = shift; |
204
|
15
|
|
|
|
|
27
|
my $t = shift; |
205
|
|
|
|
|
|
|
|
206
|
15
|
|
|
|
|
26
|
my $infoh = $self->{infoh}; |
207
|
15
|
50
|
|
|
|
55
|
return if ref $infoh ne 'GLOB'; |
208
|
|
|
|
|
|
|
|
209
|
0
|
|
|
|
|
0
|
print($infoh $t); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub _printf { |
213
|
17
|
|
|
17
|
|
31
|
my $self = shift; |
214
|
|
|
|
|
|
|
|
215
|
17
|
|
|
|
|
32
|
my $infoh = $self->{infoh}; |
216
|
17
|
50
|
|
|
|
64
|
return if ref $infoh ne 'GLOB'; |
217
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
0
|
printf($infoh @_); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub _print_warning { |
222
|
67
|
|
|
67
|
|
145
|
my $self = shift; |
223
|
67
|
|
|
|
|
140
|
my $warning_message = shift; |
224
|
67
|
|
|
|
|
129
|
my $dont_wrap = shift; |
225
|
|
|
|
|
|
|
|
226
|
67
|
100
|
|
|
|
232
|
my $msg = ($dont_wrap ? $warning_message : "$PKG: warning: $warning_message"); |
227
|
67
|
100
|
|
|
|
4925
|
carp $msg unless $self->{quiet}; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub _close_inh { |
231
|
314
|
|
|
314
|
|
621
|
my $self = shift; |
232
|
|
|
|
|
|
|
|
233
|
314
|
100
|
|
|
|
6037
|
close $self->{_inh} if $self->{_close_inh_when_finished}; |
234
|
314
|
|
|
|
|
1094
|
$self->{_inh} = undef; |
235
|
314
|
|
|
|
|
1277
|
$self->{_close_inh_when_finished} = undef; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub _close_outh { |
239
|
126
|
|
|
126
|
|
278
|
my $self = shift; |
240
|
|
|
|
|
|
|
|
241
|
126
|
50
|
66
|
|
|
18873
|
close $self->{outh} if defined($self->{outh}) and $self->{_close_outh_when_finished}; |
242
|
126
|
|
|
|
|
464
|
$self->{outh} = undef; |
243
|
126
|
|
|
|
|
327
|
$self->{_close_outh_when_finished} = undef; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub _print_error { |
247
|
81
|
|
|
81
|
|
636
|
my ($self, $error_message, $dont_stop, $err_code, $err_extra) = @_; |
248
|
|
|
|
|
|
|
|
249
|
81
|
|
|
|
|
294
|
my $msg = "$PKG: error: $error_message"; |
250
|
|
|
|
|
|
|
|
251
|
81
|
100
|
100
|
|
|
432
|
if (defined($err_code) and !$self->{quiet} and $self->{croak_if_error}) { |
|
|
|
100
|
|
|
|
|
252
|
5
|
50
|
|
|
|
21
|
if ($err_code == ERR_UNKNOWN_FIELD) { |
253
|
5
|
|
|
|
|
11
|
my %f = %{$err_extra}; |
|
5
|
|
|
|
|
29
|
|
254
|
5
|
|
|
|
|
14
|
my @cols; |
255
|
5
|
|
|
|
|
20
|
for my $n (keys %f) { |
256
|
15
|
|
|
|
|
41
|
$cols[$f{$n}] = $n; |
257
|
|
|
|
|
|
|
} |
258
|
5
|
|
|
|
|
19
|
$self->_print($self->get_in_file_disp() . " column - field name correspondance:\n"); |
259
|
5
|
|
|
|
|
18
|
$self->_print("COL # FIELD\n"); |
260
|
5
|
|
|
|
|
15
|
$self->_print("----- -----\n"); |
261
|
5
|
|
|
|
|
16
|
for my $i (0..$#cols) { |
262
|
17
|
100
|
|
|
|
58
|
$self->_printf("%05d %s\n", $i, (defined($cols[$i]) ? $cols[$i] : '')); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
} else { |
265
|
0
|
|
|
|
|
0
|
confess "Unknown error code: '$err_code'\n"; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
81
|
100
|
100
|
|
|
380
|
if ($self->{croak_if_error} and !$dont_stop) { |
270
|
30
|
|
|
|
|
119
|
$self->_close_read(1); |
271
|
30
|
|
|
|
|
123
|
$self->_close_inh(); |
272
|
30
|
|
|
|
|
101
|
$self->_close_outh(); |
273
|
30
|
|
|
|
|
160
|
$self->_status_reset(1); |
274
|
30
|
|
|
|
|
5202
|
croak $msg; |
275
|
|
|
|
|
|
|
} |
276
|
51
|
|
|
|
|
196
|
$self->_print_warning($msg, 1); |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# |
280
|
|
|
|
|
|
|
# Return the string passed in argument with all accents removed from characters. |
281
|
|
|
|
|
|
|
# Do it in a rather general and reliable way, not tied to latin1. |
282
|
|
|
|
|
|
|
# Tested on latin1 and latin2 character sets. |
283
|
|
|
|
|
|
|
# |
284
|
|
|
|
|
|
|
# Credits: |
285
|
|
|
|
|
|
|
# http://stackoverflow.com/questions/17561839/remove-accents-from-accented-characters |
286
|
|
|
|
|
|
|
# |
287
|
|
|
|
|
|
|
sub remove_accents { |
288
|
3144
|
|
|
3144
|
1
|
29323
|
validate_pos(@_, {type => SCALAR}); |
289
|
|
|
|
|
|
|
|
290
|
3144
|
|
|
|
|
8564
|
my $s = $_[0]; |
291
|
3144
|
|
|
|
|
13584
|
my $r = NFKD($s); |
292
|
3144
|
|
|
|
|
8753
|
$r =~ s/\p{Nonspacing_Mark}//g; |
293
|
3144
|
|
|
|
|
7763
|
return $r; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub _detect_csv_sep { |
297
|
255
|
|
|
255
|
|
572
|
my $ST_OUTSIDE = 0; |
298
|
255
|
|
|
|
|
500
|
my $ST_INSIDE = 1; |
299
|
|
|
|
|
|
|
|
300
|
255
|
|
|
|
|
779
|
my ($self, $escape_char, $quote_char, $sep) = @_; |
301
|
|
|
|
|
|
|
|
302
|
255
|
|
|
|
|
553
|
my $_debugh = $self->{_debugh}; |
303
|
255
|
|
|
|
|
559
|
my $inh = $self->{_inh}; |
304
|
255
|
|
|
|
|
502
|
my $_debug = $self->{_debug}; |
305
|
|
|
|
|
|
|
|
306
|
255
|
|
|
|
|
526
|
delete $self->{_inh_header}; |
307
|
|
|
|
|
|
|
|
308
|
255
|
100
|
|
|
|
823
|
$escape_char = $DEFAULT_ESCAPE_CHAR unless defined($escape_char); |
309
|
|
|
|
|
|
|
|
310
|
255
|
50
|
|
|
|
785
|
$self->_print_error("illegal \$escape_char: '$escape_char' (length >= 2)"), return 0 |
311
|
|
|
|
|
|
|
if length($escape_char) >= 2; |
312
|
|
|
|
|
|
|
|
313
|
255
|
50
|
|
|
|
781
|
$self->_print_error("$PKG: error: illegal \$quote_char '$quote_char' (length >= 2)"), return 0 |
314
|
|
|
|
|
|
|
if length($quote_char) >= 2; |
315
|
|
|
|
|
|
|
|
316
|
255
|
50
|
|
|
|
782
|
$escape_char = '--' if $escape_char eq ''; |
317
|
255
|
50
|
|
|
|
714
|
$quote_char = '--' if $quote_char eq ''; |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# FIXME (?) |
320
|
|
|
|
|
|
|
# Avoid inlined magic values for separator auto-detection. |
321
|
|
|
|
|
|
|
# Issue is, as you can see below, the behavior is also hard-coded and not straightforward to |
322
|
|
|
|
|
|
|
# render 'tunable' ("," and ";" take precedence over "\t"). |
323
|
255
|
|
|
|
|
1286
|
my %Seps = ( |
324
|
|
|
|
|
|
|
";" => 0, |
325
|
|
|
|
|
|
|
"," => 0, |
326
|
|
|
|
|
|
|
"\t" => 0 |
327
|
|
|
|
|
|
|
); |
328
|
|
|
|
|
|
|
|
329
|
255
|
|
|
|
|
3210
|
my $h = <$inh>; |
330
|
255
|
50
|
|
|
|
2860
|
if ($self->{inh_is_stdin}) { |
331
|
0
|
|
|
|
|
0
|
$self->{_inh_header} = $h; |
332
|
0
|
0
|
|
|
|
0
|
print($_debugh "Input is STDIN => saving header line to re-read it " . |
333
|
|
|
|
|
|
|
"later (in-memory)\n") if $_debug; |
334
|
|
|
|
|
|
|
} else { |
335
|
255
|
|
|
|
|
1800
|
seek $inh, 0, SEEK_SET; |
336
|
255
|
50
|
|
|
|
860
|
print($_debugh "Input is not STDIN => using seek function to rewind " . |
337
|
|
|
|
|
|
|
"read head after header line reading\n") if $_debug; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
255
|
|
|
|
|
708
|
chomp $h; |
341
|
255
|
|
|
|
|
520
|
my $status = $ST_OUTSIDE; |
342
|
255
|
|
|
|
|
653
|
my $l = length($h); |
343
|
255
|
|
|
|
|
481
|
my $c = 0; |
344
|
255
|
|
|
|
|
782
|
while ($c < $l) { |
345
|
4769
|
|
|
|
|
7927
|
my $ch = substr($h, $c, 1); |
346
|
4769
|
|
|
|
|
6711
|
my $chnext = ''; |
347
|
4769
|
100
|
|
|
|
10870
|
$chnext = substr($h, $c + 1, 1) if ($c < $l - 1); |
348
|
4769
|
100
|
|
|
|
10683
|
if ($status == $ST_INSIDE) { |
|
|
50
|
|
|
|
|
|
349
|
1521
|
50
|
66
|
|
|
4169
|
if ($ch eq $escape_char and $chnext eq $quote_char) { |
|
|
100
|
|
|
|
|
|
350
|
0
|
|
|
|
|
0
|
$c += 2; |
351
|
|
|
|
|
|
|
} elsif ($ch eq $quote_char) { |
352
|
197
|
|
|
|
|
307
|
$status = $ST_OUTSIDE; |
353
|
197
|
|
|
|
|
435
|
$c++; |
354
|
|
|
|
|
|
|
} else { |
355
|
1324
|
|
|
|
|
2585
|
$c++; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} elsif ($status == $ST_OUTSIDE) { |
358
|
3248
|
50
|
33
|
|
|
10777
|
if ($ch eq $escape_char and ($chnext eq $quote_char or |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
359
|
|
|
|
|
|
|
exists $Seps{$chnext})) { |
360
|
0
|
|
|
|
|
0
|
$c += 2; |
361
|
|
|
|
|
|
|
} elsif (exists $Seps{$ch}) { |
362
|
930
|
|
|
|
|
1420
|
$Seps{$ch}++; |
363
|
930
|
|
|
|
|
1990
|
$c++; |
364
|
|
|
|
|
|
|
} elsif ($ch eq $quote_char) { |
365
|
197
|
|
|
|
|
296
|
$status = $ST_INSIDE; |
366
|
197
|
|
|
|
|
425
|
$c++; |
367
|
|
|
|
|
|
|
} else { |
368
|
2121
|
|
|
|
|
4746
|
$c++; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
|
373
|
255
|
100
|
100
|
|
|
1852
|
if ($Seps{";"} == 0 and $Seps{","} >= 1) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
374
|
144
|
|
|
|
|
348
|
$$sep = ","; |
375
|
144
|
|
|
|
|
761
|
return 1; |
376
|
|
|
|
|
|
|
} elsif ($Seps{","} == 0 and $Seps{";"} >= 1) { |
377
|
102
|
|
|
|
|
220
|
$$sep = ";"; |
378
|
102
|
|
|
|
|
499
|
return 1; |
379
|
|
|
|
|
|
|
} elsif ($Seps{","} == 0 and $Seps{";"} == 0 and $Seps{"\t"} >= 1) { |
380
|
0
|
|
|
|
|
0
|
$$sep = "\t"; |
381
|
0
|
|
|
|
|
0
|
return 1; |
382
|
|
|
|
|
|
|
} else { |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# Check the case where there is one unique column, in which case, |
385
|
|
|
|
|
|
|
# assume comma separator. |
386
|
9
|
|
|
|
|
32
|
my $h_no_accnt = remove_accents($h); |
387
|
9
|
100
|
|
|
|
64
|
if ($h_no_accnt =~ m/^[[:alnum:]_]+$/i) { |
388
|
3
|
|
|
|
|
8
|
$$sep = ","; |
389
|
3
|
|
|
|
|
15
|
return 1; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
6
|
|
|
|
|
14
|
$$sep = ""; |
393
|
6
|
50
|
|
|
|
14
|
if ($_debug) { |
394
|
0
|
|
|
|
|
0
|
for my $k (keys %Seps) { |
395
|
0
|
|
|
|
|
0
|
print($_debugh "\$Seps{'$k'} = $Seps{$k}\n"); |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
} |
398
|
6
|
|
|
|
|
31
|
return 0; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub _reopen_input { |
403
|
633
|
|
|
633
|
|
1161
|
my $self = shift; |
404
|
|
|
|
|
|
|
|
405
|
633
|
|
|
|
|
1191
|
my $in_file = $self->{in_file}; |
406
|
|
|
|
|
|
|
|
407
|
633
|
|
|
|
|
972
|
my $inh; |
408
|
633
|
50
|
|
|
|
15399
|
if (!open($inh, "<", $in_file)) { |
409
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$in_file': $!"); |
410
|
0
|
|
|
|
|
0
|
return undef; |
411
|
|
|
|
|
|
|
} |
412
|
633
|
50
|
|
|
|
2252
|
if (!$self->{_leave_encoding_alone}) { |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
confess "Oups! _inh_encoding_string undef?" |
415
|
633
|
50
|
|
|
|
1690
|
unless defined($self->{_inh_encoding_string}); |
416
|
|
|
|
|
|
|
|
417
|
633
|
|
|
|
|
4315
|
binmode $inh, $self->{_inh_encoding_string}; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
633
|
|
|
|
|
30469
|
return $inh; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
# Abstraction layer, not useful Today, could bring added value when looking into Text::CSV I/O |
424
|
|
|
|
|
|
|
sub _mygetline { |
425
|
9078
|
|
|
9078
|
|
19491
|
my ($csvobj, $fh) = @_; |
426
|
|
|
|
|
|
|
|
427
|
9078
|
|
|
|
|
210971
|
return $csvobj->getline($fh); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub _detect_escape_char { |
431
|
304
|
|
|
304
|
|
960
|
my ($self, $quote_char, $sep_char, $ref_escape_char, $ref_is_always_quoted) = @_; |
432
|
|
|
|
|
|
|
|
433
|
304
|
|
|
|
|
670
|
my $in_file = $self->{in_file}; |
434
|
304
|
|
|
|
|
683
|
my $_debug = $self->{_debug}; |
435
|
304
|
|
|
|
|
579
|
my $_debugh = $self->{_debugh}; |
436
|
|
|
|
|
|
|
|
437
|
304
|
|
|
|
|
707
|
$$ref_escape_char = $DEFAULT_ESCAPE_CHAR; |
438
|
304
|
|
|
|
|
582
|
$$ref_is_always_quoted = undef; |
439
|
|
|
|
|
|
|
|
440
|
304
|
100
|
|
|
|
862
|
if ($self->{_int_one_pass}) { |
441
|
13
|
|
|
|
|
37
|
return; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
291
|
|
|
|
|
881
|
$self->_register_pass("detect escape character"); |
445
|
|
|
|
|
|
|
|
446
|
291
|
|
|
|
|
534
|
my $qesc = 0; |
447
|
291
|
|
|
|
|
909
|
my $inh = $self->_reopen_input(); |
448
|
291
|
50
|
|
|
|
951
|
if (defined($inh)) { |
449
|
291
|
|
|
|
|
4363
|
while (my $l = <$inh>) { |
450
|
7088
|
|
|
|
|
17838
|
chomp $l; |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# Very heuristic criteria... |
453
|
|
|
|
|
|
|
# Tant pis. |
454
|
7088
|
100
|
|
|
|
31105
|
$qesc = 1 if $l =~ m/(?<!$sep_char)$quote_char$quote_char(?!$sep_char)/; |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
} |
457
|
291
|
|
|
|
|
2732
|
close $inh; |
458
|
|
|
|
|
|
|
} |
459
|
291
|
100
|
|
|
|
992
|
if ($qesc) { |
460
|
7
|
|
|
|
|
20
|
$$ref_escape_char = '"'; |
461
|
|
|
|
|
|
|
} else { |
462
|
284
|
|
|
|
|
735
|
$$ref_escape_char = '\\' ; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
291
|
|
|
|
|
567
|
my $is_always_quoted = 0; |
466
|
291
|
|
|
|
|
842
|
$inh = $self->_reopen_input(); |
467
|
291
|
50
|
|
|
|
1212
|
if (defined($inh)) { |
468
|
291
|
|
|
|
|
3694
|
my $csv = Text::CSV->new({sep_char => $sep_char, |
469
|
|
|
|
|
|
|
allow_whitespace => 1, binary => 1, auto_diag => 0, |
470
|
|
|
|
|
|
|
quote_char => $quote_char, escape_char => $$ref_escape_char, |
471
|
|
|
|
|
|
|
keep_meta_info => 1, |
472
|
|
|
|
|
|
|
allow_loose_escapes => 1}); |
473
|
291
|
|
|
|
|
65381
|
$is_always_quoted = 1; |
474
|
291
|
|
|
|
|
1004
|
while (my $ar = _mygetline($csv, $inh)) { |
475
|
1628
|
|
|
|
|
57281
|
my @a = @{$ar}; |
|
1628
|
|
|
|
|
4629
|
|
476
|
1628
|
|
|
|
|
2907
|
my $e = $#a; |
477
|
1628
|
|
|
|
|
3473
|
for my $i (0..$e) { |
478
|
11018
|
100
|
|
|
|
91320
|
$is_always_quoted = 0 unless $csv->is_quoted($i); |
479
|
|
|
|
|
|
|
} |
480
|
1628
|
100
|
|
|
|
17517
|
last unless $is_always_quoted; |
481
|
|
|
|
|
|
|
} |
482
|
291
|
100
|
|
|
|
2473
|
my $is_ok = ($csv->eof() ? 1 : 0); |
483
|
291
|
|
|
|
|
5639
|
close $inh; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
291
|
50
|
|
|
|
1229
|
print($_debugh " is_always_quoted: $is_always_quoted\n") if $_debug; |
487
|
291
|
|
|
|
|
616
|
$$ref_is_always_quoted = $is_always_quoted; |
488
|
|
|
|
|
|
|
|
489
|
291
|
|
|
|
|
1317
|
return; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub _register_pass { |
493
|
950
|
|
|
950
|
|
2193
|
my ($self, $pass_name) = @_; |
494
|
950
|
|
|
|
|
1756
|
my $_debug = $self->{_debug}; |
495
|
950
|
|
|
|
|
1642
|
my $_debugh = $self->{_debugh}; |
496
|
|
|
|
|
|
|
|
497
|
950
|
|
|
|
|
1916
|
$self->{_pass_count}++; |
498
|
|
|
|
|
|
|
|
499
|
950
|
50
|
|
|
|
2781
|
return unless $_debug; |
500
|
|
|
|
|
|
|
|
501
|
0
|
|
|
|
|
0
|
print($_debugh "Pass #" . $self->{_pass_count} . " ($pass_name) done\n"); |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
sub _update_in_mem_record_count { |
505
|
501
|
|
|
501
|
|
1407
|
my ($self, $nonexistent_arg) = @_; |
506
|
501
|
|
|
|
|
1062
|
my $_debug = $self->{_debug}; |
507
|
501
|
|
|
|
|
1061
|
my $_debugh = $self->{_debugh}; |
508
|
|
|
|
|
|
|
|
509
|
501
|
50
|
|
|
|
1427
|
confess "Hey! what is this second argument?" if defined($nonexistent_arg); |
510
|
|
|
|
|
|
|
|
511
|
501
|
|
|
|
|
910
|
my $new_count = $#{$self->{_flat}} + 1; |
|
501
|
|
|
|
|
1381
|
|
512
|
|
|
|
|
|
|
|
513
|
501
|
|
|
|
|
1002
|
my $updated_max = 0; |
514
|
501
|
100
|
|
|
|
1776
|
if ($new_count > $self->get_max_in_mem_record_count()) { |
515
|
153
|
|
|
|
|
637
|
$self->_set_max_in_mem_record_count($new_count); |
516
|
153
|
|
|
|
|
310
|
$updated_max = 1; |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
501
|
|
|
|
|
1528
|
$self->{_in_mem_record_count} = $new_count; |
520
|
501
|
50
|
|
|
|
1687
|
if ($_debug) { |
521
|
0
|
|
|
|
|
0
|
print($_debugh "_in_mem_record_count updated, set to $new_count"); |
522
|
0
|
0
|
|
|
|
0
|
print($_debugh " (also updated max)") if $updated_max; |
523
|
0
|
|
|
|
|
0
|
print($_debugh "\n"); |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
sub _detect_inh_encoding { |
528
|
313
|
|
|
313
|
|
1377
|
my ($self, $enc, $via, $in_file, $detect_enc) = @_; |
529
|
313
|
|
|
|
|
785
|
my $_debug = $self->{_debug}; |
530
|
313
|
|
|
|
|
665
|
my $_debugh = $self->{_debugh}; |
531
|
|
|
|
|
|
|
|
532
|
313
|
100
|
66
|
|
|
1312
|
$enc = $DEFAULT_IN_ENCODING if !defined($enc) or $enc eq ''; |
533
|
|
|
|
|
|
|
|
534
|
313
|
|
|
|
|
2405
|
my @encodings = split(/\s*,\s*/, $enc); |
535
|
|
|
|
|
|
|
|
536
|
313
|
50
|
|
|
|
1158
|
confess "Oups! No encoding to try?" if $#encodings < 0; |
537
|
|
|
|
|
|
|
|
538
|
313
|
50
|
|
|
|
972
|
print($_debugh "[ST] _detect_inh_encoding(): start\n") if $_debug; |
539
|
|
|
|
|
|
|
|
540
|
313
|
|
|
|
|
582
|
my $wrn = 0; |
541
|
313
|
|
|
|
|
944
|
my $m; |
542
|
|
|
|
|
|
|
my $m0; |
543
|
313
|
|
|
|
|
0
|
my $ee; |
544
|
313
|
|
|
|
|
808
|
for my $e (@encodings) { |
545
|
325
|
|
|
|
|
693
|
$ee = $e; |
546
|
325
|
|
|
|
|
856
|
my $viadef = _get_def($via, ''); |
547
|
325
|
|
|
|
|
1113
|
$m = ":encoding($e)$viadef"; |
548
|
325
|
100
|
|
|
|
1081
|
$m0 = $m unless defined($m0); |
549
|
|
|
|
|
|
|
|
550
|
325
|
100
|
|
|
|
905
|
last unless $detect_enc; |
551
|
|
|
|
|
|
|
|
552
|
313
|
50
|
33
|
|
|
1843
|
confess "Oups! in_file not defined?" if !defined($in_file) or $in_file eq ''; |
553
|
|
|
|
|
|
|
|
554
|
313
|
50
|
|
|
|
871
|
print($_debugh " Checking encoding '$e' / '$m'\n") if $_debug; |
555
|
313
|
|
|
|
|
617
|
$wrn = 0; |
556
|
|
|
|
|
|
|
|
557
|
313
|
|
|
|
|
1426
|
$self->_register_pass("check $e encoding"); |
558
|
|
|
|
|
|
|
|
559
|
313
|
|
|
|
|
643
|
my $utf8_bom = 0; |
560
|
313
|
100
|
|
|
|
976
|
if (_is_utf8($e)) { |
561
|
289
|
50
|
|
|
|
7500
|
if (open my $fh, '<:raw', $in_file) { |
562
|
289
|
|
|
|
|
732
|
my $bom; |
563
|
289
|
|
|
|
|
5712
|
read $fh, $bom, 3; |
564
|
289
|
100
|
66
|
|
|
1963
|
if (length($bom) == 3 and $bom eq "\xef\xbb\xbf") { |
565
|
12
|
100
|
|
|
|
84
|
if (!defined($via)) { |
566
|
10
|
|
|
|
|
35
|
$m .= ":via(File::BOM)"; |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
} |
569
|
289
|
|
|
|
|
3955
|
close $fh; |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
313
|
|
|
|
|
753
|
my $inh; |
574
|
313
|
50
|
|
|
|
6191
|
if (!open($inh, "<", $in_file)) { |
575
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$in_file': $!"); |
576
|
0
|
|
|
|
|
0
|
return ($encodings[0], $m0); |
577
|
|
|
|
|
|
|
} |
578
|
15
|
|
|
15
|
|
116
|
binmode $inh, $m; |
|
15
|
|
|
|
|
32
|
|
|
15
|
|
|
|
|
102
|
|
|
313
|
|
|
|
|
4070
|
|
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# TURN OFF WARNINGS OUTPUT |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
{ |
583
|
313
|
|
|
|
|
42788
|
local $SIG{__WARN__} = sub { |
584
|
51
|
|
|
51
|
|
746
|
$wrn++; |
585
|
|
|
|
|
|
|
# Uncomment only for debug! |
586
|
|
|
|
|
|
|
# Otherwise you'll get quite a good deal of output at each execution :-) |
587
|
|
|
|
|
|
|
# print(STDERR @_); |
588
|
313
|
|
|
|
|
2604
|
}; |
589
|
313
|
|
|
|
|
4928
|
while (<$inh>) { } |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
# WARNINGS ARE BACK ON |
593
|
|
|
|
|
|
|
|
594
|
313
|
|
|
|
|
18097
|
close $inh; |
595
|
313
|
50
|
|
|
|
1212
|
print($_debugh " '$m' counts $wrn warning(s)\n") if $_debug; |
596
|
|
|
|
|
|
|
|
597
|
313
|
100
|
|
|
|
1608
|
last if $wrn == 0; |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
|
600
|
313
|
50
|
|
|
|
1086
|
if ($wrn >= 1) { |
601
|
0
|
|
|
|
|
0
|
$self->_print_warning("encoding warnings encountered during initial check, " . |
602
|
|
|
|
|
|
|
"using '$encodings[0]'"); |
603
|
0
|
|
|
|
|
0
|
return ($encodings[0], $m0); |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
313
|
50
|
|
|
|
958
|
confess "Oups! undef encoding string?" unless defined($m); |
607
|
|
|
|
|
|
|
|
608
|
313
|
50
|
|
|
|
873
|
print($_debugh " Detected encoding string '$ee' / '$m'\n") if $_debug; |
609
|
313
|
|
|
|
|
1561
|
return ($ee, $m); |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
# |
613
|
|
|
|
|
|
|
# Each of these functions brings status to the next value (current status + 1). |
614
|
|
|
|
|
|
|
# Each of these functions returns 0 if an error occured, 1 if all good |
615
|
|
|
|
|
|
|
# |
616
|
|
|
|
|
|
|
my @status_forward_functions = ( |
617
|
|
|
|
|
|
|
"_S1_init_input", # To go from S0 to S1 |
618
|
|
|
|
|
|
|
"_S2_init_fields_from_header", # To go form S1 to S2 |
619
|
|
|
|
|
|
|
"_S3_init_fields_extra", # To go from S2 to S3 |
620
|
|
|
|
|
|
|
"_S4_read_all_in_mem", # To go from S3 to S4 |
621
|
|
|
|
|
|
|
); |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
sub _status_reset { |
624
|
487
|
|
|
487
|
|
1010
|
my $self = shift; |
625
|
|
|
|
|
|
|
|
626
|
487
|
|
|
|
|
3754
|
validate_pos(@_, {type => SCALAR, optional => 1}); |
627
|
487
|
|
|
|
|
2042
|
my $called_from_print_error = _get_def($_[0], 0); |
628
|
|
|
|
|
|
|
|
629
|
487
|
100
|
100
|
|
|
2351
|
if (defined($self->{_status}) and $self->{_status} == 4) { |
630
|
18
|
100
|
|
|
|
65
|
unless ($called_from_print_error) { |
631
|
16
|
|
|
|
|
39
|
my $msg = "in-memory CSV content discarded, will have to re-read input"; |
632
|
16
|
|
|
|
|
65
|
$self->_print_warning($msg); |
633
|
|
|
|
|
|
|
} |
634
|
18
|
|
|
|
|
93
|
$self->{_flat} = [ ]; |
635
|
18
|
|
|
|
|
173
|
$self->_update_in_mem_record_count(); |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
|
638
|
487
|
|
|
|
|
1193
|
$self->{_status} = 0; |
639
|
487
|
100
|
|
|
|
1446
|
return 0 if $called_from_print_error; |
640
|
457
|
|
|
|
|
1328
|
return $self->_status_forward('S1'); |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
sub _status_forward { |
644
|
4017
|
|
|
4017
|
|
6996
|
my $self = shift; |
645
|
|
|
|
|
|
|
|
646
|
4017
|
|
|
|
|
10067
|
return $self->___status_move(@_, 1); |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
sub _status_backward { |
650
|
139
|
|
|
139
|
|
269
|
my $self = shift; |
651
|
|
|
|
|
|
|
|
652
|
139
|
|
|
|
|
399
|
return $self->___status_move(@_, -1); |
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# You should not call ___status_move() in the code, that is why the name is prefixed with 3 |
656
|
|
|
|
|
|
|
# underscores! Only _status_forward and _status_backward should call it. |
657
|
|
|
|
|
|
|
sub ___status_move { |
658
|
4156
|
|
|
4156
|
|
9007
|
my ($self, $target, $step) = @_; |
659
|
|
|
|
|
|
|
|
660
|
4156
|
|
|
|
|
7854
|
my $_debug = $self->{_debug}; |
661
|
4156
|
|
|
|
|
7283
|
my $_debugh = $self->{_debugh}; |
662
|
|
|
|
|
|
|
|
663
|
4156
|
50
|
66
|
|
|
25252
|
if (!defined($step) or ($step != -1 and $step != 1)) { |
|
|
|
33
|
|
|
|
|
664
|
0
|
|
|
|
|
0
|
confess "Oups! \$step has a wrong value: '$step'"; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
4156
|
|
|
|
|
6741
|
my $n; |
668
|
4156
|
50
|
|
|
|
20288
|
confess "Oups! illegal status string: '$target'" unless ($n) = $target =~ m/^S(\d)$/; |
669
|
|
|
|
|
|
|
|
670
|
4156
|
100
|
|
|
|
11469
|
if ($self->{_read_in_progress}) { |
671
|
1
|
|
|
|
|
6
|
$self->_print_error("illegal call while read is in progress, " . |
672
|
|
|
|
|
|
|
"would lead to infinite recursion", 0); |
673
|
0
|
|
|
|
|
0
|
confess "Aborted."; |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
4155
|
100
|
|
|
|
10067
|
if ($step == -1) { |
677
|
139
|
100
|
|
|
|
527
|
if ($n < $self->{_status}) { |
678
|
19
|
100
|
|
|
|
76
|
if ($self->{_status} == 4) { |
679
|
16
|
50
|
|
|
|
72
|
print($_debugh "[ST] Requested status $n but will go to status 0\n") if $_debug; |
680
|
16
|
|
|
|
|
63
|
return $self->_status_reset(); |
681
|
|
|
|
|
|
|
} |
682
|
3
|
|
|
|
|
6
|
$self->{_status} = $n ; |
683
|
3
|
50
|
|
|
|
9
|
print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug; |
684
|
|
|
|
|
|
|
} |
685
|
123
|
|
|
|
|
446
|
return 1; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
4016
|
100
|
|
|
|
11991
|
if ($self->{_status} < $n) { |
689
|
856
|
50
|
|
|
|
2278
|
print($_debugh "[ST] Current status: ". $self->{_status} . "\n") if $_debug; |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
4016
|
100
|
100
|
|
|
13962
|
if ($self->{_status} <= 1 and $n >= 2 and $self->{_int_one_pass} and |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
693
|
|
|
|
|
|
|
$self->get_pass_count() >= 1) { |
694
|
12
|
|
|
|
|
26
|
my $msg = "one_pass set, unable to read input again"; |
695
|
12
|
50
|
|
|
|
54
|
$self->_print_error($msg), return 0 if $self->{one_pass}; |
696
|
0
|
0
|
|
|
|
0
|
$self->_print_warning($msg) if !$self->{one_pass}; |
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
|
699
|
4004
|
|
|
|
|
10264
|
while ($self->{_status} < $n) { |
700
|
|
|
|
|
|
|
|
701
|
1326
|
|
|
|
|
3229
|
my $funcname = $status_forward_functions[$self->{_status}]; |
702
|
1326
|
50
|
|
|
|
3316
|
confess "Oups! Unknown status?" unless defined($funcname); |
703
|
|
|
|
|
|
|
|
704
|
1326
|
50
|
|
|
|
3175
|
print($_debugh "[ST] Now executing $funcname\n") if $_debug; |
705
|
|
|
|
|
|
|
|
706
|
1326
|
50
|
|
|
|
6403
|
if (my $member_function = $self->can($funcname)) { |
707
|
1326
|
100
|
|
|
|
3958
|
return 0 unless $self->$member_function(); |
708
|
|
|
|
|
|
|
} else { |
709
|
0
|
|
|
|
|
0
|
confess "Could not find method $funcname in $PKG!"; |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
|
712
|
1307
|
|
|
|
|
3121
|
$self->{_status} += $step; |
713
|
1307
|
50
|
|
|
|
5235
|
print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug; |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
|
716
|
3985
|
|
|
|
|
12473
|
return 1; |
717
|
|
|
|
|
|
|
} |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
sub new { |
720
|
326
|
|
|
326
|
1
|
317556
|
my ($class, @args) = @_; |
721
|
|
|
|
|
|
|
|
722
|
326
|
|
|
|
|
42533
|
@args = validate(@args, |
723
|
|
|
|
|
|
|
{ in_file => {type => SCALAR, optional => 1}, |
724
|
|
|
|
|
|
|
infoh => {type => UNDEF | GLOBREF, default => \*STDERR, optional => 1}, |
725
|
|
|
|
|
|
|
verbose => {type => BOOLEAN, default => 0, optional => 1}, |
726
|
|
|
|
|
|
|
quiet => {type => BOOLEAN, optional => 1}, |
727
|
|
|
|
|
|
|
croak_if_error => {type => BOOLEAN, default => 1, optional => 1}, |
728
|
|
|
|
|
|
|
inh => {type => GLOBREF, optional => 1}, |
729
|
|
|
|
|
|
|
in_csvobj => {type => OBJECT, optional => 1}, |
730
|
|
|
|
|
|
|
sep_char => {type => SCALAR, optional => 1}, |
731
|
|
|
|
|
|
|
quote_char => {type => SCALAR, optional => 1}, |
732
|
|
|
|
|
|
|
escape_char => {type => SCALAR, optional => 1}, |
733
|
|
|
|
|
|
|
has_headers => {type => BOOLEAN, default => 1, optional => 1}, |
734
|
|
|
|
|
|
|
out_has_headers => {type => UNDEF | BOOLEAN, default => undef, optional => 1}, |
735
|
|
|
|
|
|
|
fields_ar => {type => ARRAYREF, optional => 1}, |
736
|
|
|
|
|
|
|
fields_hr => {type => HASHREF, optional => 1}, |
737
|
|
|
|
|
|
|
fields_column_names => {type => ARRAYREF, optional => 1}, |
738
|
|
|
|
|
|
|
search_case => {type => SCALAR, optional => 1}, |
739
|
|
|
|
|
|
|
search_trim => {type => SCALAR, optional => 1}, |
740
|
|
|
|
|
|
|
search_ignore_empty => {type => SCALAR, optional => 1}, |
741
|
|
|
|
|
|
|
search_ignore_accents => {type => SCALAR, optional => 1}, |
742
|
|
|
|
|
|
|
search_ignore_ambiguous => {type => SCALAR, optional => 1}, |
743
|
|
|
|
|
|
|
search_value_if_not_found => {type => SCALAR, optional => 1}, |
744
|
|
|
|
|
|
|
search_value_if_found => {type => SCALAR, optional => 1}, |
745
|
|
|
|
|
|
|
search_value_if_ambiguous => {type => SCALAR, optional => 1}, |
746
|
|
|
|
|
|
|
walker_hr => {type => CODEREF, optional => 1}, |
747
|
|
|
|
|
|
|
walker_ar => {type => CODEREF, optional => 1}, |
748
|
|
|
|
|
|
|
read_post_update_hr => {type => CODEREF, optional => 1}, |
749
|
|
|
|
|
|
|
write_filter_hr => {type => CODEREF, optional => 1}, |
750
|
|
|
|
|
|
|
out_filter => {type => CODEREF, optional => 1}, |
751
|
|
|
|
|
|
|
write_fields => {type => ARRAYREF, optional => 1}, |
752
|
|
|
|
|
|
|
out_fields => {type => ARRAYREF, optional => 1}, |
753
|
|
|
|
|
|
|
out_file => {type => SCALAR, optional => 1}, |
754
|
|
|
|
|
|
|
out_always_quote => {type => BOOLEAN, optional => 1}, |
755
|
|
|
|
|
|
|
out_sep_char => {type => SCALAR, optional => 1}, |
756
|
|
|
|
|
|
|
out_quote_char => {type => SCALAR, optional => 1}, |
757
|
|
|
|
|
|
|
out_escape_char => {type => SCALAR, optional => 1}, |
758
|
|
|
|
|
|
|
out_dates_format => {type => SCALAR, optional => 1}, |
759
|
|
|
|
|
|
|
out_dates_locale => {type => SCALAR, optional => 1}, |
760
|
|
|
|
|
|
|
encoding => {type => SCALAR, optional => 1}, |
761
|
|
|
|
|
|
|
via => {type => SCALAR, optional => 1}, |
762
|
|
|
|
|
|
|
out_encoding => {type => SCALAR, optional => 1}, |
763
|
|
|
|
|
|
|
dont_mess_with_encoding => {type => BOOLEAN, optional => 1}, |
764
|
|
|
|
|
|
|
one_pass => {type => BOOLEAN, optional => 1}, |
765
|
|
|
|
|
|
|
no_undef => {type => BOOLEAN, optional => 1}, |
766
|
|
|
|
|
|
|
fields_dates => {type => ARRAYREF, optional => 1}, |
767
|
|
|
|
|
|
|
fields_dates_auto => {type => BOOLEAN, optional => 1}, |
768
|
|
|
|
|
|
|
dates_formats_to_try => {type => ARRAYREF, optional => 1}, |
769
|
|
|
|
|
|
|
dates_formats_to_try_supp => {type => ARRAYREF, optional => 1}, |
770
|
|
|
|
|
|
|
dates_ignore_trailing_chars => {type => BOOLEAN, optional => 1}, |
771
|
|
|
|
|
|
|
dates_search_time => {type => BOOLEAN, optional => 1}, |
772
|
|
|
|
|
|
|
dates_locales => {type => SCALAR, optional => 1}, |
773
|
|
|
|
|
|
|
out_utf8_bom => {type => SCALAR, optional => 1}, |
774
|
|
|
|
|
|
|
dates_zeros_ok => {type => SCALAR, default => 1, optional => 1}, |
775
|
|
|
|
|
|
|
_debug => {type => BOOLEAN, default => 0, optional => 1}, |
776
|
|
|
|
|
|
|
_debug_read => {type => BOOLEAN, default => 0, optional => 1}, |
777
|
|
|
|
|
|
|
_debug_extra_fields => {type => BOOLEAN, optional => 1}, |
778
|
|
|
|
|
|
|
_debugh => {type => UNDEF | GLOBREF, optional => 1} |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
); |
781
|
|
|
|
|
|
|
|
782
|
322
|
|
|
|
|
7777
|
my $self = { @args }; |
783
|
|
|
|
|
|
|
|
784
|
322
|
|
|
|
|
916
|
my @fields = keys %{$self}; |
|
322
|
|
|
|
|
1651
|
|
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
# croak_if_error |
787
|
|
|
|
|
|
|
|
788
|
322
|
|
|
|
|
1050
|
my $croak_if_error = $self->{croak_if_error}; |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
# verbose and _debug management |
791
|
|
|
|
|
|
|
|
792
|
322
|
50
|
|
|
|
1471
|
$self->{_debugh} = $self->{infoh} if !defined($self->{_debugh}); |
793
|
322
|
50
|
|
|
|
1109
|
$self->{_debug} = 1 if $ALWAYS_DEBUG; |
794
|
322
|
|
|
|
|
701
|
my $_debug = $self->{_debug}; |
795
|
322
|
50
|
|
|
|
909
|
$self->{verbose} = 1 if $_debug; |
796
|
322
|
|
|
|
|
690
|
my $verbose = $self->{verbose}; |
797
|
|
|
|
|
|
|
|
798
|
322
|
|
|
|
|
704
|
my $_debugh = $self->{_debugh}; |
799
|
|
|
|
|
|
|
|
800
|
322
|
|
|
|
|
763
|
bless $self, $class; |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
# fields_ar, fields_hr |
803
|
|
|
|
|
|
|
|
804
|
322
|
100
|
|
|
|
1373
|
if (defined($self->{fields_ar}) + |
805
|
|
|
|
|
|
|
defined($self->{fields_hr}) + |
806
|
|
|
|
|
|
|
defined($self->{fields_column_names}) |
807
|
|
|
|
|
|
|
>= 2) { |
808
|
1
|
|
|
|
|
4
|
$self->_print_error("mixed use of fields_ar, fields_hr and fields_column_names. " . |
809
|
|
|
|
|
|
|
"Use one at a time."); |
810
|
|
|
|
|
|
|
} |
811
|
322
|
100
|
100
|
|
|
1316
|
if (defined($self->{fields_ar}) and !defined($self->{fields_hr})) { |
812
|
2
|
|
|
|
|
4
|
my @f = @{$self->{fields_ar}}; |
|
2
|
|
|
|
|
6
|
|
813
|
2
|
|
|
|
|
5
|
my %h; |
814
|
2
|
|
|
|
|
5
|
for my $e (@f) { |
815
|
6
|
|
|
|
|
18
|
$h{$e} = "^$e\$"; |
816
|
|
|
|
|
|
|
} |
817
|
2
|
|
|
|
|
6
|
$self->{fields_hr} = \%h; |
818
|
|
|
|
|
|
|
} |
819
|
322
|
100
|
|
|
|
1020
|
if (!$self->{has_headers}) { |
820
|
13
|
100
|
|
|
|
38
|
if (defined($self->{fields_ar})) { |
821
|
1
|
|
|
|
|
4
|
$self->_print_error("fields_ar irrelevant if CSV file has no headers"); |
822
|
1
|
|
|
|
|
8
|
return undef; |
823
|
|
|
|
|
|
|
} |
824
|
12
|
100
|
|
|
|
35
|
if (defined($self->{fields_hr})) { |
825
|
1
|
|
|
|
|
3
|
$self->_print_error("fields_hr irrelevant if CSV file has no headers"); |
826
|
1
|
|
|
|
|
10
|
return undef; |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
} |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
# in_file or inh |
831
|
|
|
|
|
|
|
|
832
|
320
|
|
|
|
|
921
|
$self->{_flat} = [ ]; |
833
|
|
|
|
|
|
|
|
834
|
320
|
|
|
|
|
803
|
$self->{_read_update_after_hr} = { }; |
835
|
320
|
|
|
|
|
794
|
$self->{_write_update_before_hr} = { }; |
836
|
|
|
|
|
|
|
|
837
|
320
|
|
|
|
|
1364
|
$self->_update_in_mem_record_count(); |
838
|
|
|
|
|
|
|
|
839
|
320
|
100
|
|
|
|
1070
|
return undef unless $self->_status_reset(); |
840
|
|
|
|
|
|
|
|
841
|
311
|
50
|
|
|
|
899
|
$self->_debug_show_members() if $_debug; |
842
|
|
|
|
|
|
|
|
843
|
311
|
100
|
|
|
|
1062
|
if ($self->{dates_zeros_ok}) { |
844
|
|
|
|
|
|
|
$self->{_refsub_is_datetime_empty} = sub { |
845
|
9626
|
|
|
9626
|
|
18500
|
my $v = $_[0]; |
846
|
9626
|
100
|
|
|
|
36790
|
if ($v !~ m/[1-9]/) { |
847
|
4201
|
100
|
|
|
|
12200
|
return 1 if $v =~ m/^[^0:]*0+[^0:]+0+[^0:]+0+/; |
848
|
|
|
|
|
|
|
} |
849
|
9623
|
|
|
|
|
37148
|
return 0; |
850
|
|
|
|
|
|
|
} |
851
|
309
|
|
|
|
|
1937
|
} |
852
|
|
|
|
|
|
|
|
853
|
311
|
|
|
|
|
2190
|
return $self; |
854
|
|
|
|
|
|
|
}; |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
# |
857
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
858
|
|
|
|
|
|
|
# |
859
|
|
|
|
|
|
|
# Do all low level activities associated to input: |
860
|
|
|
|
|
|
|
# I/O init |
861
|
|
|
|
|
|
|
# Detect encoding |
862
|
|
|
|
|
|
|
# Detect CSV separator |
863
|
|
|
|
|
|
|
# Detect escape character |
864
|
|
|
|
|
|
|
# |
865
|
|
|
|
|
|
|
sub _S1_init_input { |
866
|
463
|
|
|
463
|
|
1087
|
my $self = shift; |
867
|
|
|
|
|
|
|
|
868
|
463
|
|
|
|
|
1011
|
my $croak_if_error = $self->{croak_if_error}; |
869
|
463
|
|
|
|
|
914
|
my $_debug = $self->{_debug}; |
870
|
463
|
|
|
|
|
917
|
my $_debugh = $self->{_debugh}; |
871
|
|
|
|
|
|
|
|
872
|
463
|
100
|
|
|
|
1368
|
$self->{in_file} = '' unless defined($self->{in_file}); |
873
|
463
|
|
|
|
|
1160
|
$self->{_close_inh_when_finished} = 0; |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
$self->{_leave_encoding_alone} = $self->{dont_mess_with_encoding} |
876
|
463
|
50
|
|
|
|
1342
|
if defined($self->{dont_mess_with_encoding}); |
877
|
|
|
|
|
|
|
|
878
|
463
|
|
|
|
|
1596
|
$self->{_int_one_pass} = _get_def($self->{one_pass}, 0); |
879
|
463
|
|
|
|
|
1118
|
my $in_file_disp; |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
# |
883
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 1 OF 4 |
884
|
|
|
|
|
|
|
# |
885
|
|
|
|
|
|
|
# Manage I/O (= in most cases, open input file...) |
886
|
|
|
|
|
|
|
# |
887
|
|
|
|
|
|
|
|
888
|
463
|
100
|
|
|
|
1259
|
if (defined($self->{inh})) { |
889
|
4
|
50
|
|
|
|
11
|
$self->{_leave_encoding_alone} = 1 unless defined($self->{dont_mess_with_encoding}); |
890
|
4
|
|
|
|
|
10
|
$in_file_disp = _get_def($self->{in_file}, '<?>'); |
891
|
4
|
50
|
|
|
|
13
|
$self->{_int_one_pass} = 1 unless defined($self->{one_pass}); |
892
|
4
|
|
|
|
|
6
|
$self->{_inh} = $self->{inh}; |
893
|
|
|
|
|
|
|
} else { |
894
|
459
|
50
|
|
|
|
1568
|
$self->{_leave_encoding_alone} = 0 unless defined($self->{dont_mess_with_encoding}); |
895
|
459
|
|
|
|
|
1013
|
my $in_file = $self->{in_file}; |
896
|
459
|
|
|
|
|
769
|
my $inh; |
897
|
459
|
50
|
|
|
|
1190
|
if ($in_file eq '') { |
898
|
0
|
|
|
|
|
0
|
$inh = \*STDIN; |
899
|
0
|
|
|
|
|
0
|
$self->{inh_is_stdin} = 1; |
900
|
0
|
0
|
|
|
|
0
|
$self->{_int_one_pass} = 1 unless defined($self->{one_pass}); |
901
|
0
|
|
|
|
|
0
|
$in_file_disp = '<stdin>'; |
902
|
|
|
|
|
|
|
} else { |
903
|
459
|
100
|
|
|
|
21694
|
if (!open($inh, '<', $in_file)) { |
904
|
3
|
|
|
|
|
42
|
$self->_print_error("unable to open file '$in_file': $!"); |
905
|
3
|
|
|
|
|
48
|
return 0; |
906
|
|
|
|
|
|
|
} |
907
|
456
|
|
|
|
|
1412
|
$in_file_disp = $in_file; |
908
|
456
|
|
|
|
|
1173
|
$self->{_close_inh_when_finished} = 1; |
909
|
|
|
|
|
|
|
} |
910
|
456
|
|
|
|
|
1226
|
$self->{_inh} = $inh; |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
|
913
|
460
|
50
|
|
|
|
1262
|
confess "Oups! in_file_disp not defined?" unless defined($in_file_disp); |
914
|
460
|
|
|
|
|
1228
|
$self->{_in_file_disp} = $in_file_disp; |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
# |
918
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 2 OF 4 |
919
|
|
|
|
|
|
|
# |
920
|
|
|
|
|
|
|
# "Detection" of encoding |
921
|
|
|
|
|
|
|
# |
922
|
|
|
|
|
|
|
# WARNING |
923
|
|
|
|
|
|
|
# As explained in the manual, it is a very partial and limited detection... |
924
|
|
|
|
|
|
|
# |
925
|
|
|
|
|
|
|
|
926
|
460
|
100
|
|
|
|
1405
|
unless ($self->{_leave_encoding_alone}) { |
927
|
456
|
100
|
|
|
|
1323
|
unless ($self->{_init_input_already_called}) { |
928
|
|
|
|
|
|
|
my ($e, $m) = $self->_detect_inh_encoding($self->{encoding}, $self->{via}, |
929
|
313
|
100
|
|
|
|
2098
|
$self->{in_file}, ($self->{_int_one_pass} ? 0 : $DETECT_ENCODING)); |
930
|
313
|
|
|
|
|
1203
|
$self->{_inh_encoding} = $e; |
931
|
313
|
|
|
|
|
859
|
$self->{_inh_encoding_string} = $m; |
932
|
|
|
|
|
|
|
} |
933
|
|
|
|
|
|
|
|
934
|
456
|
|
|
|
|
3848
|
binmode $self->{_inh}, $self->{_inh_encoding_string}; |
935
|
|
|
|
|
|
|
print($_debugh "Input encoding: '" . $self->{_inh_encoding} . "' / '" . |
936
|
456
|
50
|
|
|
|
23439
|
$self->{_inh_encoding_string} . "'\n") if $_debug; |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
} |
939
|
|
|
|
|
|
|
|
940
|
460
|
100
|
|
|
|
1707
|
$self->{out_file} = '' unless defined($self->{out_file}); |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
# |
944
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 3 OF 4 |
945
|
|
|
|
|
|
|
# |
946
|
|
|
|
|
|
|
# Detection of CSV separator and escape character |
947
|
|
|
|
|
|
|
# |
948
|
|
|
|
|
|
|
|
949
|
460
|
|
|
|
|
872
|
my $sep_char; |
950
|
460
|
|
|
|
|
1047
|
my $escape_char = $self->{escape_char}; |
951
|
460
|
100
|
|
|
|
1535
|
$self->{quote_char} = $DEFAULT_QUOTE_CHAR unless defined($self->{quote_char}); |
952
|
460
|
|
|
|
|
968
|
my $quote_char = $self->{quote_char}; |
953
|
460
|
100
|
|
|
|
1362
|
unless (defined($self->{in_csvobj})) { |
954
|
455
|
100
|
|
|
|
1383
|
if (defined($self->{sep_char})) { |
955
|
200
|
|
|
|
|
442
|
$sep_char = $self->{sep_char}; |
956
|
200
|
50
|
|
|
|
568
|
print($_debugh "-- $in_file_disp: CSV separator set to \"") if $_debug; |
957
|
|
|
|
|
|
|
} else { |
958
|
|
|
|
|
|
|
# The test below (on _init_input_already_called) shoud be useless. |
959
|
|
|
|
|
|
|
# Left for the sake of robustness. |
960
|
255
|
50
|
|
|
|
770
|
unless ($self->{_init_input_already_called}) { |
961
|
255
|
100
|
|
|
|
1099
|
if (!$self->_detect_csv_sep($escape_char, $quote_char, \$sep_char)) { |
962
|
6
|
|
|
|
|
29
|
$self->_print_error("'$in_file_disp': cannot detect CSV separator"); |
963
|
0
|
|
|
|
|
0
|
return 0; |
964
|
|
|
|
|
|
|
} |
965
|
249
|
50
|
|
|
|
746
|
print($_debugh "-- $in_file_disp: CSV separator detected to \"") if $_debug; |
966
|
249
|
|
|
|
|
686
|
$self->{sep_char} = $sep_char; |
967
|
|
|
|
|
|
|
} |
968
|
|
|
|
|
|
|
} |
969
|
449
|
0
|
|
|
|
1168
|
print($_debugh ($sep_char eq "\t" ? '\t' : $sep_char) . "\"\n") if $_debug; |
|
|
50
|
|
|
|
|
|
970
|
|
|
|
|
|
|
|
971
|
449
|
|
|
|
|
796
|
my $is_always_quoted; |
972
|
449
|
100
|
|
|
|
1316
|
unless (defined($self->{escape_char})) { |
973
|
304
|
|
|
|
|
1346
|
$self->_detect_escape_char($quote_char, $sep_char, \$escape_char, \$is_always_quoted); |
974
|
304
|
|
|
|
|
1177
|
$self->{escape_char} = $escape_char; |
975
|
304
|
|
|
|
|
751
|
$self->{_is_always_quoted} = $is_always_quoted; |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
|
978
|
449
|
|
|
|
|
3655
|
$self->{_in_csvobj} = Text::CSV->new({sep_char => $sep_char, |
979
|
|
|
|
|
|
|
allow_whitespace => 1, binary => 1, auto_diag => 0, |
980
|
|
|
|
|
|
|
quote_char => $quote_char, escape_char => $escape_char, |
981
|
|
|
|
|
|
|
allow_loose_escapes => 1}); |
982
|
449
|
50
|
|
|
|
88376
|
unless (defined($self->{_in_csvobj})) { |
983
|
0
|
|
|
|
|
0
|
$self->_print_error("error creating input Text::CSV object"); |
984
|
0
|
|
|
|
|
0
|
return 0; |
985
|
|
|
|
|
|
|
} |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
} else { |
988
|
5
|
|
|
|
|
9
|
$self->{_in_csvobj} = $self->{in_csvobj}; |
989
|
|
|
|
|
|
|
} |
990
|
|
|
|
|
|
|
|
991
|
454
|
|
|
|
|
1272
|
$self->{_init_input_already_called} = 1; |
992
|
|
|
|
|
|
|
|
993
|
454
|
|
|
|
|
1598
|
return 1; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
sub get_in_file_disp { |
997
|
3640
|
|
|
3640
|
1
|
6004
|
my $self = shift; |
998
|
|
|
|
|
|
|
|
999
|
3640
|
|
|
|
|
21415
|
validate_pos(@_); |
1000
|
|
|
|
|
|
|
|
1001
|
3640
|
|
|
|
|
10961
|
my $in_file_disp = _get_def($self->{_in_file_disp}, '?'); |
1002
|
3640
|
|
|
|
|
7890
|
return $in_file_disp; |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
sub get_sep_char { |
1006
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1007
|
|
|
|
|
|
|
|
1008
|
0
|
|
|
|
|
0
|
validate_pos(@_); |
1009
|
|
|
|
|
|
|
|
1010
|
0
|
|
|
|
|
0
|
return $self->{sep_char}; |
1011
|
|
|
|
|
|
|
} |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
sub get_escape_char { |
1014
|
9
|
|
|
9
|
1
|
255
|
my $self = shift; |
1015
|
|
|
|
|
|
|
|
1016
|
9
|
|
|
|
|
79
|
validate_pos(@_); |
1017
|
|
|
|
|
|
|
|
1018
|
9
|
|
|
|
|
84
|
return $self->{escape_char}; |
1019
|
|
|
|
|
|
|
} |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
sub get_in_encoding { |
1022
|
27
|
|
|
27
|
1
|
6035
|
my $self = shift; |
1023
|
|
|
|
|
|
|
|
1024
|
27
|
|
|
|
|
208
|
validate_pos(@_); |
1025
|
|
|
|
|
|
|
|
1026
|
27
|
|
|
|
|
97
|
return _get_def($self->{_inh_encoding}, ''); |
1027
|
|
|
|
|
|
|
} |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
sub get_is_always_quoted { |
1030
|
15
|
|
|
15
|
1
|
435
|
my $self = shift; |
1031
|
|
|
|
|
|
|
|
1032
|
15
|
|
|
|
|
138
|
validate_pos(@_); |
1033
|
|
|
|
|
|
|
|
1034
|
15
|
|
|
|
|
130
|
return $self->{_is_always_quoted}; |
1035
|
|
|
|
|
|
|
} |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
sub get_pass_count { |
1038
|
43
|
|
|
43
|
1
|
1156
|
my $self = shift; |
1039
|
|
|
|
|
|
|
|
1040
|
43
|
|
|
|
|
275
|
validate_pos(@_); |
1041
|
|
|
|
|
|
|
|
1042
|
43
|
|
|
|
|
170
|
return _get_def($self->{_pass_count}, 0); |
1043
|
|
|
|
|
|
|
} |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
sub get_in_mem_record_count { |
1046
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1047
|
|
|
|
|
|
|
|
1048
|
0
|
|
|
|
|
0
|
validate_pos(@_); |
1049
|
|
|
|
|
|
|
|
1050
|
0
|
|
|
|
|
0
|
return ($self->{_in_mem_record_count}, 0); |
1051
|
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
sub get_max_in_mem_record_count { |
1054
|
504
|
|
|
504
|
1
|
2270
|
my $self = shift; |
1055
|
|
|
|
|
|
|
|
1056
|
504
|
|
|
|
|
3481
|
validate_pos(@_); |
1057
|
|
|
|
|
|
|
|
1058
|
504
|
|
|
|
|
2620
|
return _get_def($self->{_max_in_mem_record_count}, 0); |
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
sub _set_max_in_mem_record_count { |
1062
|
153
|
|
|
153
|
|
341
|
my $self = shift; |
1063
|
|
|
|
|
|
|
|
1064
|
153
|
|
|
|
|
1697
|
validate_pos(@_, {type => SCALAR}); |
1065
|
|
|
|
|
|
|
|
1066
|
153
|
|
|
|
|
619
|
$self->{_max_in_mem_record_count} = $_[0]; |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
sub get_fields_names { |
1070
|
19
|
|
|
19
|
1
|
3909
|
my $self = shift; |
1071
|
|
|
|
|
|
|
|
1072
|
19
|
|
|
|
|
162
|
validate_pos(@_); |
1073
|
|
|
|
|
|
|
|
1074
|
19
|
50
|
|
|
|
73
|
return () unless $self->_status_forward('S3'); |
1075
|
19
|
|
|
|
|
39
|
return @{$self->{_columns}}; |
|
19
|
|
|
|
|
109
|
|
1076
|
|
|
|
|
|
|
} |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
sub get_field_name { |
1079
|
1
|
|
|
1
|
1
|
609
|
my $self = shift; |
1080
|
|
|
|
|
|
|
|
1081
|
1
|
|
|
|
|
12
|
validate_pos(@_, {type => SCALAR}); |
1082
|
|
|
|
|
|
|
|
1083
|
1
|
|
|
|
|
3
|
my ($n) = @_; |
1084
|
|
|
|
|
|
|
|
1085
|
1
|
50
|
|
|
|
4
|
return undef unless $self->_status_forward('S3'); |
1086
|
1
|
|
|
|
|
5
|
return $self->{_columns}->[$n]; |
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
sub get_coldata { |
1090
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1091
|
|
|
|
|
|
|
|
1092
|
0
|
|
|
|
|
0
|
validate_pos(@_); |
1093
|
|
|
|
|
|
|
|
1094
|
0
|
0
|
|
|
|
0
|
return () unless $self->_status_forward('S3'); |
1095
|
0
|
|
|
|
|
0
|
my @ret; |
1096
|
0
|
|
|
|
|
0
|
for (@{$self->{_coldata}}) { |
|
0
|
|
|
|
|
0
|
|
1097
|
0
|
|
|
|
|
0
|
push @ret, [ |
1098
|
|
|
|
|
|
|
$_->field_name, |
1099
|
|
|
|
|
|
|
$_->header_text, |
1100
|
|
|
|
|
|
|
$_->description, |
1101
|
|
|
|
|
|
|
$_->dt_format, |
1102
|
|
|
|
|
|
|
$_->dt_locale]; |
1103
|
|
|
|
|
|
|
} |
1104
|
|
|
|
|
|
|
|
1105
|
0
|
|
|
|
|
0
|
return @ret; |
1106
|
|
|
|
|
|
|
} |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
sub get_stats { |
1109
|
2
|
|
|
2
|
1
|
12
|
my $self = shift; |
1110
|
|
|
|
|
|
|
|
1111
|
2
|
|
|
|
|
13
|
validate_pos(@_); |
1112
|
|
|
|
|
|
|
|
1113
|
2
|
50
|
|
|
|
9
|
return () unless defined($self->{_stats}); |
1114
|
2
|
|
|
|
|
3
|
return %{$self->{_stats}}; |
|
2
|
|
|
|
|
10
|
|
1115
|
|
|
|
|
|
|
} |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
sub _debug_show_members { |
1118
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
1119
|
0
|
|
|
|
|
0
|
my $_debugh = $self->{_debugh}; |
1120
|
0
|
0
|
|
|
|
0
|
my @a = @{$self->{fields_ar}} if defined($self->{fields_ar}); |
|
0
|
|
|
|
|
0
|
|
1121
|
0
|
0
|
|
|
|
0
|
my @c = @{$self->{fields_column_names}} if defined($self->{fields_column_names}); |
|
0
|
|
|
|
|
0
|
|
1122
|
0
|
0
|
|
|
|
0
|
my %h = %{$self->{fields_hr}} if defined($self->{fields_hr}); |
|
0
|
|
|
|
|
0
|
|
1123
|
|
|
|
|
|
|
|
1124
|
0
|
|
|
|
|
0
|
print($_debugh "-- _debug_show_members() start\n"); |
1125
|
0
|
|
|
|
|
0
|
print($_debugh " croak_if_error $self->{croak_if_error}\n"); |
1126
|
0
|
|
|
|
|
0
|
print($_debugh " verbose $self->{verbose}\n"); |
1127
|
0
|
|
|
|
|
0
|
print($_debugh " _debug $self->{_debug}\n"); |
1128
|
0
|
|
|
|
|
0
|
print($_debugh " _debug_read $self->{_debug_read}\n"); |
1129
|
0
|
|
|
|
|
0
|
print($_debugh " infoh $self->{infoh}\n"); |
1130
|
0
|
|
|
|
|
0
|
print($_debugh " _debugh $_debugh\n"); |
1131
|
0
|
|
|
|
|
0
|
print($_debugh " inh: $self->{_inh}\n"); |
1132
|
0
|
|
|
|
|
0
|
print($_debugh " in_file_disp " . $self->get_in_file_disp() . "\n"); |
1133
|
0
|
|
|
|
|
0
|
print($_debugh " _in_csvobj $self->{_in_csvobj}\n"); |
1134
|
0
|
|
|
|
|
0
|
print($_debugh " has_headers $self->{has_headers}\n"); |
1135
|
0
|
|
|
|
|
0
|
print($_debugh " fields_ar:\n"); |
1136
|
0
|
|
|
|
|
0
|
for my $e (@a) { |
1137
|
0
|
|
|
|
|
0
|
print($_debugh " '$e'\n"); |
1138
|
|
|
|
|
|
|
} |
1139
|
0
|
|
|
|
|
0
|
print($_debugh " fields_hr:\n"); |
1140
|
0
|
|
|
|
|
0
|
for my $e (keys %h) { |
1141
|
0
|
|
|
|
|
0
|
print($_debugh " '$e' => '$h{$e}'\n"); |
1142
|
|
|
|
|
|
|
} |
1143
|
0
|
|
|
|
|
0
|
print($_debugh " fields_column_names:\n"); |
1144
|
0
|
|
|
|
|
0
|
for my $e (@c) { |
1145
|
0
|
|
|
|
|
0
|
print($_debugh " '$e'\n"); |
1146
|
|
|
|
|
|
|
} |
1147
|
0
|
|
|
|
|
0
|
print($_debugh "-- _debug_show_members() end\n"); |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
# |
1151
|
|
|
|
|
|
|
# Check headers in CSV header line |
1152
|
|
|
|
|
|
|
# Used to increase robustness by relying on header title rather than |
1153
|
|
|
|
|
|
|
# column number. |
1154
|
|
|
|
|
|
|
# |
1155
|
|
|
|
|
|
|
# Return 1 if success (all fields found), 0 otherwise. |
1156
|
|
|
|
|
|
|
# |
1157
|
|
|
|
|
|
|
sub _process_header { |
1158
|
12
|
|
|
12
|
|
27
|
my $self = shift; |
1159
|
12
|
|
|
|
|
23
|
my @headers = @{shift(@_)}; |
|
12
|
|
|
|
|
45
|
|
1160
|
12
|
|
|
|
|
25
|
my %fields_h = %{shift(@_)}; |
|
12
|
|
|
|
|
80
|
|
1161
|
12
|
|
|
|
|
31
|
my $retval = shift; |
1162
|
|
|
|
|
|
|
|
1163
|
12
|
|
|
|
|
22
|
my @tmp = keys %{$retval}; |
|
12
|
|
|
|
|
30
|
|
1164
|
|
|
|
|
|
|
|
1165
|
12
|
|
|
|
|
38
|
my $in_file_disp = $self->get_in_file_disp(); |
1166
|
|
|
|
|
|
|
|
1167
|
12
|
50
|
|
|
|
46
|
confess '$_[4] must be an empty by-ref hash' if $#tmp >= 0; |
1168
|
|
|
|
|
|
|
|
1169
|
12
|
|
|
|
|
23
|
my $e = 0; |
1170
|
12
|
|
|
|
|
45
|
for my $k (keys %fields_h) { |
1171
|
48
|
|
|
|
|
105
|
my $v = $fields_h{$k}; |
1172
|
|
|
|
|
|
|
|
1173
|
48
|
|
|
462
|
|
252
|
my @all_idx = indexes { /$v/i } @headers; |
|
462
|
|
|
|
|
3507
|
|
1174
|
48
|
50
|
|
|
|
210
|
if ($#all_idx >= 1) { |
1175
|
0
|
|
|
|
|
0
|
$self->_print_error("file $in_file_disp: " . |
1176
|
|
|
|
|
|
|
"more than one column matches the criteria '$v'"); |
1177
|
0
|
|
|
|
|
0
|
$e++; |
1178
|
|
|
|
|
|
|
} |
1179
|
48
|
|
|
258
|
|
223
|
my $idx = first_index { /$v/i } @headers; |
|
258
|
|
|
|
|
2172
|
|
1180
|
48
|
50
|
|
|
|
313
|
if ($idx < 0) { |
1181
|
0
|
|
|
|
|
0
|
$self->_print_error("file $in_file_disp: unable to find field '$v'"); |
1182
|
0
|
|
|
|
|
0
|
$e++; |
1183
|
|
|
|
|
|
|
} else { |
1184
|
48
|
|
|
|
|
146
|
$retval->{$k} = $idx; |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
} |
1187
|
|
|
|
|
|
|
|
1188
|
12
|
50
|
|
|
|
82
|
return ($e >= 1 ? 0 : 1); |
1189
|
|
|
|
|
|
|
} |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
sub set_walker_hr { |
1192
|
2
|
|
|
2
|
1
|
1928
|
my $self = shift; |
1193
|
2
|
|
|
|
|
22
|
validate_pos(@_, {type => UNDEF | CODEREF, optional => 1}); |
1194
|
|
|
|
|
|
|
|
1195
|
2
|
|
|
|
|
7
|
my ($walker_hr) = @_; |
1196
|
|
|
|
|
|
|
|
1197
|
2
|
50
|
|
|
|
6
|
return undef unless $self->_status_forward('S2'); |
1198
|
2
|
50
|
|
|
|
6
|
return undef unless $self->_status_backward('S2'); |
1199
|
2
|
|
|
|
|
6
|
$self->{walker_hr} = $walker_hr; |
1200
|
|
|
|
|
|
|
|
1201
|
2
|
|
|
|
|
5
|
return $self; |
1202
|
|
|
|
|
|
|
} |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
sub set_walker_ar { |
1205
|
2
|
|
|
2
|
1
|
3245
|
my $self = shift; |
1206
|
2
|
|
|
|
|
31
|
validate_pos(@_, {type => UNDEF | CODEREF, optional => 1}); |
1207
|
|
|
|
|
|
|
|
1208
|
2
|
|
|
|
|
8
|
my ($walker_ar) = @_; |
1209
|
|
|
|
|
|
|
|
1210
|
2
|
50
|
|
|
|
8
|
return undef unless $self->_status_forward('S2'); |
1211
|
2
|
50
|
|
|
|
7
|
return undef unless $self->_status_backward('S2'); |
1212
|
2
|
|
|
|
|
6
|
$self->{walker_ar} = $walker_ar; |
1213
|
|
|
|
|
|
|
|
1214
|
2
|
|
|
|
|
7
|
return $self; |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
# * *************************************** * |
1219
|
|
|
|
|
|
|
# * BEGINNING OF DATE FORMAT DETECTION CODE * |
1220
|
|
|
|
|
|
|
# * *************************************** * |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
# |
1224
|
|
|
|
|
|
|
# The '%m.%d.%y' is not at its "logical" location. It is done to make sure the order in which |
1225
|
|
|
|
|
|
|
# entries are written does not impact the result. |
1226
|
|
|
|
|
|
|
# |
1227
|
|
|
|
|
|
|
# It could occur because there is some code that correlates an entry containing %y with another |
1228
|
|
|
|
|
|
|
# one that would contain %Y. The %Y will be called the master, the %y will be called the slave. |
1229
|
|
|
|
|
|
|
# It is important to match such entries, otherwise an identified format with %y would always be |
1230
|
|
|
|
|
|
|
# ambiguous with the same written with %Y. |
1231
|
|
|
|
|
|
|
# |
1232
|
|
|
|
|
|
|
# IMPORTANT |
1233
|
|
|
|
|
|
|
# The list below is written almost as-is in the POD at the bottom of this file. |
1234
|
|
|
|
|
|
|
# |
1235
|
|
|
|
|
|
|
my @DATES_DEFAULT_FORMATS_TO_TRY = ( |
1236
|
|
|
|
|
|
|
'', |
1237
|
|
|
|
|
|
|
'%Y-%m-%d', |
1238
|
|
|
|
|
|
|
'%Y.%m.%d', |
1239
|
|
|
|
|
|
|
'%Y/%m/%d', |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
'%m.%d.%y', |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
'%m-%d-%Y', |
1244
|
|
|
|
|
|
|
'%m.%d.%Y', |
1245
|
|
|
|
|
|
|
'%m/%d/%Y', |
1246
|
|
|
|
|
|
|
'%d-%m-%Y', |
1247
|
|
|
|
|
|
|
'%d.%m.%Y', |
1248
|
|
|
|
|
|
|
'%d/%m/%Y', |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
'%m-%d-%y', |
1251
|
|
|
|
|
|
|
'%m/%d/%y', |
1252
|
|
|
|
|
|
|
'%d-%m-%y', |
1253
|
|
|
|
|
|
|
'%d.%m.%y', |
1254
|
|
|
|
|
|
|
'%d/%m/%y', |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
'%Y%m%d%H%M%S', |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
# Localizaed formats |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
'%b %d, %Y', |
1261
|
|
|
|
|
|
|
'%b %d %Y', |
1262
|
|
|
|
|
|
|
'%b %d %T %Z %Y', |
1263
|
|
|
|
|
|
|
'%d %b %Y', |
1264
|
|
|
|
|
|
|
'%d %b, %Y' |
1265
|
|
|
|
|
|
|
); |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
# |
1268
|
|
|
|
|
|
|
# IMPORTANT |
1269
|
|
|
|
|
|
|
# Under Linux, $START is useless. Strptime will match a format exactly as it is, and a tring |
1270
|
|
|
|
|
|
|
# like "01/01/16 13:00:00" won't match with "%T". Under Windows, Strptime is capable of doing |
1271
|
|
|
|
|
|
|
# a match by ignoring characters at the beginning, thus "01/01/2016 13:00:00" for example will |
1272
|
|
|
|
|
|
|
# return success when matched against "%T". |
1273
|
|
|
|
|
|
|
# Possibly it has to do with versionning of Strptime, not Linux versus Windows as such. Any |
1274
|
|
|
|
|
|
|
# way, this difference had to be dealt with. |
1275
|
|
|
|
|
|
|
# |
1276
|
|
|
|
|
|
|
# The flexibility under Windows would screw the code logic so I had to add the prefix string |
1277
|
|
|
|
|
|
|
# below, to avoid unexpected success on match. |
1278
|
|
|
|
|
|
|
# |
1279
|
|
|
|
|
|
|
my $START = '<'; |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
struct RecordCounter => { |
1282
|
|
|
|
|
|
|
count_ok => '$', |
1283
|
|
|
|
|
|
|
count_ko => '$', |
1284
|
|
|
|
|
|
|
has_searched_time => '$', |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
format => '$', |
1287
|
|
|
|
|
|
|
locale => '$', |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
has_found_time => '$', |
1290
|
|
|
|
|
|
|
format_with_addition_of_time => '$', |
1291
|
|
|
|
|
|
|
locale_with_addition_of_time => '$', |
1292
|
|
|
|
|
|
|
parser_with_addition_of_time => '$' |
1293
|
|
|
|
|
|
|
}; |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
struct Format => { |
1296
|
|
|
|
|
|
|
id => '$', |
1297
|
|
|
|
|
|
|
format => '$', |
1298
|
|
|
|
|
|
|
locale => '$', |
1299
|
|
|
|
|
|
|
parser => '$', |
1300
|
|
|
|
|
|
|
index_slave => '$', |
1301
|
|
|
|
|
|
|
index_master => '$' |
1302
|
|
|
|
|
|
|
}; |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
sub _col_dispname { |
1305
|
530
|
|
|
530
|
|
1035
|
my ($self, $n) = @_; |
1306
|
|
|
|
|
|
|
|
1307
|
530
|
|
|
|
|
778
|
my $col; |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
# |
1310
|
|
|
|
|
|
|
# IMPORTANT |
1311
|
|
|
|
|
|
|
# |
1312
|
|
|
|
|
|
|
# We cannot execute here a command like |
1313
|
|
|
|
|
|
|
# $self->_status_forward('S3'); |
1314
|
|
|
|
|
|
|
# (to ensure _columns is well defined) because _col_dispname is called by |
1315
|
|
|
|
|
|
|
# _detect_dates_formats that is in turn called by _S3_init_fields_extra. A call to |
1316
|
|
|
|
|
|
|
# _status_forward would trigger a never-ending call loop. |
1317
|
|
|
|
|
|
|
# |
1318
|
530
|
|
|
|
|
1478
|
my $cols = _get_def($self->{'_columns'}, $self->{'_S2_columns'}); |
1319
|
|
|
|
|
|
|
|
1320
|
530
|
50
|
|
|
|
1272
|
if ($self->{has_headers}) { |
1321
|
530
|
|
|
|
|
1033
|
$col = $cols->[$n]; |
1322
|
530
|
50
|
|
|
|
1225
|
$col = "<UNDEF>" unless defined($col); |
1323
|
|
|
|
|
|
|
} else { |
1324
|
0
|
|
|
|
|
0
|
$col = "[$n]"; |
1325
|
|
|
|
|
|
|
} |
1326
|
530
|
|
|
|
|
1243
|
return $col; |
1327
|
|
|
|
|
|
|
} |
1328
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
# Used by test plan only... |
1330
|
|
|
|
|
|
|
sub _dds { |
1331
|
42
|
|
|
42
|
|
1909
|
my $self = shift; |
1332
|
|
|
|
|
|
|
|
1333
|
42
|
50
|
|
|
|
115
|
return undef unless $self->_status_forward('S3'); |
1334
|
40
|
50
|
|
|
|
145
|
return undef unless defined($self->{_dates_detailed_status}); |
1335
|
40
|
|
|
|
|
341
|
return $self->{_dates_detailed_status}; |
1336
|
|
|
|
|
|
|
} |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
sub _detect_dates_formats { |
1339
|
348
|
|
|
348
|
|
698
|
my $self = shift; |
1340
|
|
|
|
|
|
|
|
1341
|
348
|
100
|
|
|
|
1150
|
return if $self->{_detect_dates_formats_has_run}; |
1342
|
281
|
|
|
|
|
785
|
$self->{_detect_dates_formats_has_run} = 1; |
1343
|
281
|
100
|
|
|
|
894
|
my @fields_dates = @{$self->{fields_dates}} if defined($self->{fields_dates}); |
|
11
|
|
|
|
|
75
|
|
1344
|
281
|
100
|
100
|
|
|
1755
|
return unless @fields_dates or $self->{fields_dates_auto}; |
1345
|
|
|
|
|
|
|
|
1346
|
51
|
50
|
|
|
|
214
|
if ($self->{_int_one_pass}) { |
1347
|
0
|
|
|
|
|
0
|
$self->_print_error("date format detection disallowed when one_pass is set"); |
1348
|
0
|
|
|
|
|
0
|
return; |
1349
|
|
|
|
|
|
|
} |
1350
|
|
|
|
|
|
|
|
1351
|
51
|
|
|
|
|
113
|
my $_debug = $self->{_debug}; |
1352
|
51
|
|
|
|
|
105
|
my $_debugh = $self->{_debugh}; |
1353
|
51
|
|
33
|
|
|
153
|
my $debug_fmt = ($_debug and $DEBUG_DATETIME_FORMATS); |
1354
|
|
|
|
|
|
|
|
1355
|
51
|
|
|
|
|
186
|
$self->_register_pass("detect date format"); |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
# |
1358
|
|
|
|
|
|
|
# Why re-opening the input? |
1359
|
|
|
|
|
|
|
# I tried two other ways that never worked on some OSes (like freebsd) and/or with older perl |
1360
|
|
|
|
|
|
|
# versions. |
1361
|
|
|
|
|
|
|
# |
1362
|
|
|
|
|
|
|
# 1) The "tell" tactic |
1363
|
|
|
|
|
|
|
# Recording at the beginning of the function the file position with |
1364
|
|
|
|
|
|
|
# my $pos = tell($self->{inh}); |
1365
|
|
|
|
|
|
|
# ... and then recalling with a seek instruction is the most logical. |
1366
|
|
|
|
|
|
|
# But it didn't work = sometimes, reading would go back to first row (the headers) instead |
1367
|
|
|
|
|
|
|
# of the second row, could not figure out why (it would work on my Ubuntu 16.04 / perl 5.22, but |
1368
|
|
|
|
|
|
|
# would fail with other OSes and/or perl versions). |
1369
|
|
|
|
|
|
|
# |
1370
|
|
|
|
|
|
|
# 2) The "complete rewind" tactic |
1371
|
|
|
|
|
|
|
# I then undertook to do (at the end of detection function): |
1372
|
|
|
|
|
|
|
# seek $inh, 0, SEEK_SET; |
1373
|
|
|
|
|
|
|
# $incsv->getline($inh) if $self->{has_headers}; |
1374
|
|
|
|
|
|
|
# based on the assumption that a seek to zero would behave differently from a seek to an |
1375
|
|
|
|
|
|
|
# arbitrary position. |
1376
|
|
|
|
|
|
|
# But still, it would sometimes fail.... |
1377
|
|
|
|
|
|
|
# |
1378
|
|
|
|
|
|
|
|
1379
|
51
|
|
|
|
|
135
|
my $inh = $self->_reopen_input(); |
1380
|
51
|
|
|
|
|
112
|
my $incsv = $self->{_in_csvobj}; |
1381
|
51
|
50
|
|
|
|
249
|
_mygetline($incsv, $inh) if $self->{has_headers}; |
1382
|
|
|
|
|
|
|
|
1383
|
51
|
|
|
|
|
2412
|
my $formats_to_try = $self->{dates_formats_to_try}; |
1384
|
51
|
|
|
|
|
112
|
my $ignore_trailing_chars = $self->{dates_ignore_trailing_chars}; |
1385
|
51
|
|
|
|
|
115
|
my $search_time = $self->{dates_search_time}; |
1386
|
51
|
|
|
|
|
111
|
my $localizations = $self->{dates_locales}; |
1387
|
|
|
|
|
|
|
|
1388
|
51
|
|
|
|
|
103
|
my %regular_named_fields = %{$self->{_regular_named_fields}}; |
|
51
|
|
|
|
|
438
|
|
1389
|
|
|
|
|
|
|
|
1390
|
51
|
|
|
|
|
203
|
my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty}; |
1391
|
|
|
|
|
|
|
|
1392
|
51
|
|
|
|
|
88
|
my @fields_to_detect_format; |
1393
|
51
|
100
|
|
|
|
211
|
if (defined($self->{fields_dates})) { |
|
|
50
|
|
|
|
|
|
1394
|
11
|
|
|
|
|
23
|
my $count_field_not_found = 0; |
1395
|
11
|
|
|
|
|
25
|
my %column_seen; |
1396
|
11
|
|
|
|
|
25
|
for my $f (@{$self->{fields_dates}}) { |
|
11
|
|
|
|
|
43
|
|
1397
|
23
|
100
|
|
|
|
74
|
if (!exists $regular_named_fields{$f}) { |
1398
|
1
|
|
|
|
|
17
|
$self->_print_error("fields_dates: unknown field: '$f'", |
1399
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, { %regular_named_fields } ); |
1400
|
1
|
|
|
|
|
64
|
$count_field_not_found++; |
1401
|
1
|
|
|
|
|
5
|
next; |
1402
|
|
|
|
|
|
|
} |
1403
|
22
|
|
|
|
|
42
|
my $n = $regular_named_fields{$f}; |
1404
|
22
|
50
|
|
|
|
67
|
if (exists $column_seen{$n}) { |
1405
|
0
|
|
|
|
|
0
|
$self->_print_warning("field '$f' already seen"); |
1406
|
0
|
|
|
|
|
0
|
next; |
1407
|
|
|
|
|
|
|
} |
1408
|
22
|
|
|
|
|
49
|
$column_seen{$n} = 1; |
1409
|
22
|
|
|
|
|
52
|
push @fields_to_detect_format, $n; |
1410
|
|
|
|
|
|
|
} |
1411
|
11
|
100
|
|
|
|
50
|
$self->_print_error("non existent field(s) encountered, aborted") if $count_field_not_found; |
1412
|
|
|
|
|
|
|
} elsif ($self->{fields_dates_auto}) { |
1413
|
40
|
|
|
|
|
281
|
my @k = keys %regular_named_fields; |
1414
|
40
|
|
|
|
|
228
|
@fields_to_detect_format = (0..$#k); |
1415
|
|
|
|
|
|
|
} else { |
1416
|
0
|
|
|
|
|
0
|
confess "Hey! check this code, man"; |
1417
|
|
|
|
|
|
|
} |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
# |
1420
|
|
|
|
|
|
|
# FIXME? |
1421
|
|
|
|
|
|
|
# Sort by column number of not? |
1422
|
|
|
|
|
|
|
# |
1423
|
|
|
|
|
|
|
# At this moment in time, the author inclines to answer "yes". |
1424
|
|
|
|
|
|
|
# But I must admit it is rather arbitrary decision for now. |
1425
|
|
|
|
|
|
|
# |
1426
|
50
|
|
|
|
|
342
|
@fields_to_detect_format = sort { $a <=> $b } @fields_to_detect_format; |
|
744
|
|
|
|
|
1054
|
|
1427
|
|
|
|
|
|
|
|
1428
|
2
|
|
|
|
|
8
|
my @dates_formats_supp = @{$self->{dates_formats_to_try_supp}} |
1429
|
50
|
100
|
|
|
|
172
|
if defined($self->{dates_formats_to_try_supp}); |
1430
|
|
|
|
|
|
|
|
1431
|
50
|
100
|
|
|
|
344
|
$formats_to_try = [ @DATES_DEFAULT_FORMATS_TO_TRY ] unless defined($formats_to_try); |
1432
|
50
|
|
|
|
|
100
|
$formats_to_try = [ @{$formats_to_try}, @dates_formats_supp ]; |
|
50
|
|
|
|
|
212
|
|
1433
|
50
|
|
|
|
|
138
|
my %seen; |
1434
|
50
|
|
|
|
|
102
|
my $f2 = [ ]; |
1435
|
50
|
|
|
|
|
139
|
for (@${formats_to_try}) { |
1436
|
912
|
50
|
|
|
|
1854
|
push @{$f2}, $_ unless exists($seen{$_}); |
|
912
|
|
|
|
|
1468
|
|
1437
|
912
|
|
|
|
|
1843
|
$seen{$_} = undef; |
1438
|
|
|
|
|
|
|
} |
1439
|
50
|
|
|
|
|
103
|
$formats_to_try = $f2; |
1440
|
|
|
|
|
|
|
|
1441
|
50
|
100
|
|
|
|
190
|
$ignore_trailing_chars = 1 unless defined($ignore_trailing_chars); |
1442
|
50
|
100
|
|
|
|
145
|
$search_time = 1 unless defined($search_time); |
1443
|
|
|
|
|
|
|
|
1444
|
50
|
100
|
|
|
|
146
|
my $stop = ($ignore_trailing_chars ? '' : '>'); |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
# |
1447
|
|
|
|
|
|
|
# The code below (from # AMB to # AMB-END) aims to remove ambiguity that comes from %Y versus %y. |
1448
|
|
|
|
|
|
|
# That is: provided you have (among others) the formats to try |
1449
|
|
|
|
|
|
|
# '%d-%m-%Y' |
1450
|
|
|
|
|
|
|
# and |
1451
|
|
|
|
|
|
|
# '%d-%m-%y' |
1452
|
|
|
|
|
|
|
# then if parsing 4-digit-year dates (like '31-12-2016'), the two formats will work and you'll end |
1453
|
|
|
|
|
|
|
# up with an ambiguity. To be precise, there'll be no ambiguity if the date is followed by a time, |
1454
|
|
|
|
|
|
|
# but if the date is alone, both formats will work. |
1455
|
|
|
|
|
|
|
# |
1456
|
|
|
|
|
|
|
# Thanks to the below code, the member 'index_slave' (and its counterpart index_master) is populated |
1457
|
|
|
|
|
|
|
# and later, if such an ambiguity is detected, the upper case version (the one containing upper case |
1458
|
|
|
|
|
|
|
# '%Y') will be kept and the other one will be discarded. |
1459
|
|
|
|
|
|
|
# |
1460
|
|
|
|
|
|
|
# NOTE |
1461
|
|
|
|
|
|
|
# Such an ambiguity can exist only when ignore_trailing_chars is set. Otherwise, the remaining two |
1462
|
|
|
|
|
|
|
# digits make the date parsing fail in the '%y' case. |
1463
|
|
|
|
|
|
|
# |
1464
|
|
|
|
|
|
|
# The other members of the 'Format' object are used to work "normally", independently from this |
1465
|
|
|
|
|
|
|
# ambiguity removal feature. |
1466
|
|
|
|
|
|
|
# |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
# WIP = Work In Progress... |
1469
|
50
|
|
|
|
|
88
|
my @formats_wip; |
1470
|
50
|
100
|
|
|
|
438
|
my @locales = split(/,\s*/, $localizations) if defined($localizations); |
1471
|
50
|
|
|
|
|
101
|
for my $f (@{$formats_to_try}) { |
|
50
|
|
|
|
|
149
|
|
1472
|
912
|
100
|
|
|
|
2392
|
my $has_localized_item = ($f =~ m/%a|%A|%b|%B|%c|%\+/ ? 1 : 0); |
1473
|
912
|
100
|
100
|
|
|
2189
|
unless (@locales and $has_localized_item) { |
1474
|
902
|
|
|
|
|
1822
|
push @formats_wip, [$f, '']; |
1475
|
902
|
|
|
|
|
1484
|
next; |
1476
|
|
|
|
|
|
|
} |
1477
|
10
|
|
|
|
|
36
|
push @formats_wip, [$f, $_] foreach @locales; |
1478
|
|
|
|
|
|
|
} |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
# AMB |
1481
|
50
|
|
|
|
|
108
|
my @formats; |
1482
|
|
|
|
|
|
|
my %mates; |
1483
|
50
|
|
|
|
|
177
|
for my $i (0..$#formats_wip) { |
1484
|
922
|
|
|
|
|
2353
|
my $fstr = $formats_wip[$i]->[0]; |
1485
|
922
|
|
|
|
|
1580
|
my $floc = $formats_wip[$i]->[1]; |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
# FIXME |
1488
|
|
|
|
|
|
|
# Will not manage correctly a string like |
1489
|
|
|
|
|
|
|
# '%%Y' |
1490
|
|
|
|
|
|
|
# that means (when used with Strptime), the litteral string '%Y' with no substitution. |
1491
|
|
|
|
|
|
|
# Such cases will be complicated to fix, as it'll require to do a kind-of |
1492
|
|
|
|
|
|
|
# Strptime-equivalent parsing of the string, and I find it a bit overkill. |
1493
|
|
|
|
|
|
|
# |
1494
|
|
|
|
|
|
|
# I prefer to push back in caller world saying |
1495
|
|
|
|
|
|
|
# "Hey, if using constructs like '%%Y', you'll be in trouble." |
1496
|
922
|
|
|
|
|
1581
|
my $m = $fstr; |
1497
|
922
|
|
|
|
|
3903
|
$m =~ s/%y//ig; |
1498
|
922
|
|
|
|
|
1887
|
$m .= $floc; |
1499
|
|
|
|
|
|
|
|
1500
|
922
|
|
|
|
|
1478
|
my $index_slave = -1; |
1501
|
922
|
|
|
|
|
1428
|
my $index_master = -1; |
1502
|
922
|
100
|
|
|
|
2317
|
if (exists $mates{$m}) { |
1503
|
244
|
|
|
|
|
640
|
my $alt_fstr = $formats_wip[$mates{$m}]->[0]; |
1504
|
244
|
100
|
|
|
|
859
|
my $m_lower = ($fstr =~ m/%y/ ? 1 : 0); |
1505
|
244
|
100
|
|
|
|
677
|
my $m_upper = ($fstr =~ m/%Y/ ? 1 : 0); |
1506
|
244
|
100
|
|
|
|
639
|
my $a_lower = ($alt_fstr =~ m/%y/ ? 1 : 0); |
1507
|
244
|
100
|
|
|
|
621
|
my $a_upper = ($alt_fstr =~ m/%Y/ ? 1 : 0); |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
# We ignore the weird cases where we'd have both %y and %Y in a format string. |
1510
|
|
|
|
|
|
|
|
1511
|
244
|
100
|
66
|
|
|
2819
|
if (!$m_lower and $m_upper and $a_lower and !$a_upper) { |
|
|
50
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1512
|
42
|
|
|
|
|
104
|
$index_slave = $mates{$m}; |
1513
|
42
|
|
|
|
|
720
|
$formats[$mates{$m}]->index_master($i); |
1514
|
|
|
|
|
|
|
} elsif ($m_lower and !$m_upper and !$a_lower and $a_upper) { |
1515
|
202
|
|
|
|
|
422
|
$index_master = $mates{$m}; |
1516
|
202
|
|
|
|
|
3565
|
$formats[$mates{$m}]->index_slave($i); |
1517
|
|
|
|
|
|
|
} |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
} else { |
1520
|
678
|
|
|
|
|
1660
|
$mates{$m} = $i; |
1521
|
|
|
|
|
|
|
} |
1522
|
|
|
|
|
|
|
|
1523
|
922
|
|
|
|
|
4366
|
my %strptime_opts = (pattern => $START . $fstr . $stop); |
1524
|
922
|
100
|
|
|
|
2748
|
$strptime_opts{locale} = $floc if $floc ne ''; |
1525
|
922
|
100
|
|
|
|
6003
|
my $format = Format->new( |
1526
|
|
|
|
|
|
|
id => "$i", |
1527
|
|
|
|
|
|
|
format => $fstr, |
1528
|
|
|
|
|
|
|
locale => $floc, |
1529
|
|
|
|
|
|
|
parser => ($fstr ne '' ? |
1530
|
|
|
|
|
|
|
DateTime::Format::Strptime->new(%strptime_opts) : |
1531
|
|
|
|
|
|
|
undef), |
1532
|
|
|
|
|
|
|
index_slave => $index_slave, |
1533
|
|
|
|
|
|
|
index_master => $index_master |
1534
|
|
|
|
|
|
|
); |
1535
|
922
|
|
|
|
|
1130237
|
push @formats, $format; |
1536
|
|
|
|
|
|
|
} |
1537
|
50
|
|
|
|
|
242
|
for my $i (0..$#formats) { |
1538
|
922
|
|
|
|
|
11421
|
my $format = $formats[$i]; |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
# If a master could be itself the slave of another entry, that'd make it a hierarchical |
1541
|
|
|
|
|
|
|
# relation tree with multiple levels. It is not possible, only a direct, unique |
1542
|
|
|
|
|
|
|
# master-slave relation can be managed here. |
1543
|
922
|
50
|
66
|
|
|
11660
|
confess "Inonsistent data, check this module's code urgently!" |
1544
|
|
|
|
|
|
|
if $format->index_slave >= 0 and $format->index_master >= 0; |
1545
|
|
|
|
|
|
|
|
1546
|
922
|
100
|
|
|
|
21329
|
if ($format->index_slave >= 0) { |
1547
|
244
|
|
|
|
|
4429
|
my $mate = $formats[$format->index_slave]; |
1548
|
244
|
50
|
33
|
|
|
4186
|
if ($mate->index_master != $i or $mate->index_slave != -1) { |
1549
|
0
|
|
|
|
|
0
|
confess "Inonsistent data (2), check this module's code urgently!" |
1550
|
|
|
|
|
|
|
} |
1551
|
|
|
|
|
|
|
} |
1552
|
|
|
|
|
|
|
|
1553
|
922
|
100
|
|
|
|
21239
|
if ($format->index_master >= 0) { |
1554
|
244
|
|
|
|
|
4486
|
my $mate = $formats[$format->index_master]; |
1555
|
244
|
50
|
33
|
|
|
4217
|
if ($mate->index_slave != $i or $mate->index_master != -1) { |
1556
|
0
|
|
|
|
|
0
|
confess "Inonsistent data (3), check this module's code urgently!" |
1557
|
|
|
|
|
|
|
} |
1558
|
|
|
|
|
|
|
} |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
} |
1561
|
50
|
50
|
|
|
|
574
|
if ($debug_fmt) { |
1562
|
0
|
|
|
|
|
0
|
for (@formats) { |
1563
|
0
|
|
|
|
|
0
|
my ($idx, $rel) = (-1, ""); |
1564
|
0
|
0
|
|
|
|
0
|
$idx = $_->index_slave, $rel = "S: " if $_->index_slave >= 0; |
1565
|
0
|
0
|
|
|
|
0
|
$idx = $_->index_master, $rel = "M: " if $_->index_master >= 0; |
1566
|
0
|
|
|
|
|
0
|
printf($_debugh "%-18s %s %2d", "'" . $_->format . "'", $rel, $idx); |
1567
|
0
|
0
|
|
|
|
0
|
print($_debugh ": '" . $formats[$idx]->format . "'") if $idx >= 0; |
1568
|
0
|
|
|
|
|
0
|
print($_debugh "\n"); |
1569
|
|
|
|
|
|
|
} |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
# AMB-END |
1572
|
|
|
|
|
|
|
|
1573
|
50
|
|
|
|
|
151
|
my %records; |
1574
|
|
|
|
|
|
|
my $record_number; |
1575
|
50
|
|
|
|
|
104
|
my $count_gotit = 0; |
1576
|
50
|
|
|
|
|
107
|
my $count_ambiguous = 0; |
1577
|
50
|
|
|
|
|
105
|
my $count_nodate = 0; |
1578
|
50
|
|
|
|
|
98
|
my $count_empty = 0; |
1579
|
50
|
|
|
|
|
99
|
my $has_signaled_can_start_recording_data = 0; |
1580
|
50
|
|
|
|
|
154
|
$self->{_line_after_which_recording_can_start} = 0; |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
# |
1583
|
|
|
|
|
|
|
# Seems a weird optimization here, but it is very important. |
1584
|
|
|
|
|
|
|
# In some cases, divides execution time (to detect date format on big files |
1585
|
|
|
|
|
|
|
# containing numerous fields) by 10. |
1586
|
|
|
|
|
|
|
# |
1587
|
|
|
|
|
|
|
# When evaluates to true, it means the input column has no identified date format, meaning, |
1588
|
|
|
|
|
|
|
# no further check to do. |
1589
|
|
|
|
|
|
|
# |
1590
|
50
|
|
|
|
|
122
|
my @cache_nodate; |
1591
|
|
|
|
|
|
|
|
1592
|
50
|
|
|
|
|
208
|
while (my $f = _mygetline($incsv, $inh)) { |
1593
|
4645
|
|
|
|
|
147657
|
$record_number++; |
1594
|
|
|
|
|
|
|
|
1595
|
4645
|
50
|
|
|
|
10989
|
if ($debug_fmt) { |
1596
|
0
|
|
|
|
|
0
|
print($_debugh "RECORD $record_number:\n"); |
1597
|
0
|
|
|
|
|
0
|
for (0 .. @$f - 1) { printf($_debugh " %02d: '%s'\n", $_, $f->[$_]); } |
|
0
|
|
|
|
|
0
|
|
1598
|
|
|
|
|
|
|
} |
1599
|
|
|
|
|
|
|
|
1600
|
4645
|
|
|
|
|
9460
|
for my $n (@fields_to_detect_format) { |
1601
|
31430
|
100
|
|
|
|
71512
|
next if $cache_nodate[$n]; |
1602
|
|
|
|
|
|
|
|
1603
|
16897
|
|
|
|
|
30214
|
my $v = $f->[$n]; |
1604
|
16897
|
100
|
|
|
|
35622
|
$v = '' unless defined($v); |
1605
|
16897
|
100
|
|
|
|
39587
|
next if $v eq ''; |
1606
|
9395
|
100
|
100
|
|
|
34623
|
next if defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($v); |
1607
|
|
|
|
|
|
|
|
1608
|
9392
|
50
|
|
|
|
22468
|
if ($debug_fmt) { |
1609
|
0
|
|
|
|
|
0
|
my $col = $self->_col_dispname($n); |
1610
|
0
|
|
|
|
|
0
|
print($_debugh "Line $record_number, column '$col':\n"); |
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
|
1613
|
9392
|
|
|
|
|
18512
|
for my $fmt (@formats) { |
1614
|
158222
|
|
|
|
|
2011791
|
my $fid = $fmt->id; |
1615
|
158222
|
|
|
|
|
2724809
|
my $fstr = $fmt->format; |
1616
|
|
|
|
|
|
|
|
1617
|
158222
|
50
|
|
|
|
1152556
|
$self->_debug_output_fmt('** pre ', $fmt, $records{$n}->{$fid}) if $debug_fmt; |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
$records{$n}->{$fid} = RecordCounter->new( |
1620
|
|
|
|
|
|
|
count_ok => 0, |
1621
|
|
|
|
|
|
|
count_ko => 0, |
1622
|
|
|
|
|
|
|
has_searched_time => 0, |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
format => undef, |
1625
|
|
|
|
|
|
|
locale => undef, |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
has_found_time => 0, |
1628
|
|
|
|
|
|
|
format_with_addition_of_time => undef, |
1629
|
|
|
|
|
|
|
locale_with_addition_of_time => undef, |
1630
|
|
|
|
|
|
|
parser_with_addition_of_time => undef |
1631
|
158222
|
100
|
|
|
|
501871
|
) unless defined($records{$n}->{$fid}); |
1632
|
|
|
|
|
|
|
|
1633
|
158222
|
100
|
|
|
|
2537923
|
unless ($records{$n}->{$fid}->count_ko) { |
1634
|
19361
|
|
|
|
|
181657
|
my $is_ok = &_try_parser($fmt, $records{$n}->{$fid}, $START . $v . $stop); |
1635
|
|
|
|
|
|
|
|
1636
|
19361
|
100
|
|
|
|
51956
|
if (!$is_ok) { |
1637
|
7604
|
|
|
|
|
12924
|
my $give_up_time = 0; |
1638
|
7604
|
100
|
66
|
|
|
98468
|
if ($records{$n}->{$fid}->count_ko == 0 and |
|
|
|
100
|
|
|
|
|
1639
|
|
|
|
|
|
|
$records{$n}->{$fid}->has_searched_time and |
1640
|
|
|
|
|
|
|
$records{$n}->{$fid}->has_found_time) { |
1641
|
77
|
|
100
|
|
|
4598
|
$give_up_time = (defined($fmt->parser) and |
1642
|
|
|
|
|
|
|
defined($fmt->parser->parse_datetime($START . $v . $stop)) |
1643
|
|
|
|
|
|
|
? |
1644
|
|
|
|
|
|
|
1 : 0); |
1645
|
77
|
100
|
|
|
|
47156
|
if ($give_up_time) { |
1646
|
4
|
|
|
|
|
92
|
$records{$n}->{$fid}->has_found_time(0); |
1647
|
4
|
|
|
|
|
39
|
$is_ok = 1; |
1648
|
|
|
|
|
|
|
} |
1649
|
|
|
|
|
|
|
} |
1650
|
|
|
|
|
|
|
} |
1651
|
|
|
|
|
|
|
|
1652
|
19361
|
100
|
100
|
|
|
252839
|
if ($is_ok or !$ignore_trailing_chars) { |
1653
|
14144
|
|
100
|
|
|
188133
|
my $incr = (defined($fmt->parser) and $is_ok ? 1: 0); |
1654
|
|
|
|
|
|
|
|
1655
|
14144
|
100
|
|
|
|
306619
|
unless ($records{$n}->{$fid}->has_searched_time) { |
1656
|
3141
|
|
|
|
|
58701
|
$records{$n}->{$fid}->has_searched_time(1); |
1657
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
croak "Inconsistent status! Issue in module code not in caller's!" |
1659
|
3141
|
50
|
|
|
|
53907
|
if $records{$n}->{$fid}->count_ok != 0; |
1660
|
|
|
|
|
|
|
|
1661
|
3141
|
100
|
|
|
|
27660
|
if ($search_time) { |
|
|
100
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
|
1663
|
1965
|
50
|
|
|
|
5577
|
print($_debugh " Search time in '$v', format '$fstr'\n") |
1664
|
|
|
|
|
|
|
if $debug_fmt; |
1665
|
|
|
|
|
|
|
|
1666
|
1965
|
|
|
|
|
25839
|
my $t = $self->_guess_time_format($fstr, $fmt->locale, $v, $stop); |
1667
|
1965
|
100
|
|
|
|
48455
|
$records{$n}->{$fid}->has_found_time((defined($t) ? 1 : 0)); |
1668
|
1965
|
100
|
|
|
|
22083
|
if (defined($t)) { |
|
|
100
|
|
|
|
|
|
1669
|
254
|
|
|
|
|
4087
|
$records{$n}->{$fid}->format_with_addition_of_time($t->[0]); |
1670
|
254
|
|
|
|
|
5234
|
$records{$n}->{$fid}->locale_with_addition_of_time($t->[1]); |
1671
|
254
|
|
|
|
|
5218
|
$records{$n}->{$fid}->parser_with_addition_of_time($t->[2]); |
1672
|
254
|
|
|
|
|
2308
|
$incr = 1; |
1673
|
|
|
|
|
|
|
} elsif ($fstr eq '') { |
1674
|
191
|
|
|
|
|
3066
|
$records{$n}->{$fid}->count_ko(1); |
1675
|
|
|
|
|
|
|
} |
1676
|
|
|
|
|
|
|
} elsif ($fstr eq '') { |
1677
|
78
|
|
|
|
|
1003
|
$records{$n}->{$fid}->count_ko(1); |
1678
|
|
|
|
|
|
|
} |
1679
|
|
|
|
|
|
|
|
1680
|
|
|
|
|
|
|
} |
1681
|
|
|
|
|
|
|
|
1682
|
14144
|
|
|
|
|
258579
|
$records{$n}->{$fid}->count_ok($records{$n}->{$fid}->count_ok + $incr); |
1683
|
|
|
|
|
|
|
|
1684
|
14144
|
100
|
100
|
|
|
212481
|
$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1) |
1685
|
|
|
|
|
|
|
if !$incr and !$is_ok; |
1686
|
|
|
|
|
|
|
|
1687
|
14144
|
100
|
|
|
|
57896
|
if ($incr) { |
1688
|
|
|
|
|
|
|
# We remove the slave if master is fine. |
1689
|
|
|
|
|
|
|
# Depending on the order in which parsing got done, the master could |
1690
|
|
|
|
|
|
|
# pop up first, or the slave, that is why we need manage both cases. |
1691
|
9155
|
100
|
100
|
|
|
120488
|
if ($fmt->index_slave >= 0 or $fmt->index_master >= 0) { |
1692
|
6837
|
100
|
|
|
|
218610
|
my $has_slave = ($fmt->index_slave >= 0 ? 1 : 0); |
1693
|
6837
|
100
|
|
|
|
129995
|
my $idx = ($has_slave ? $fmt->index_slave : $fmt->index_master); |
1694
|
6837
|
|
|
|
|
121500
|
my $mate = $formats[$idx]->id; |
1695
|
6837
|
100
|
|
|
|
60566
|
if (exists $records{$n}->{$mate}) { |
1696
|
6686
|
100
|
|
|
|
16791
|
if ($has_slave) { |
1697
|
2239
|
100
|
|
|
|
30513
|
if ($records{$n}->{$mate}->count_ko == 0) { |
1698
|
|
|
|
|
|
|
# I am the master: I remove the slave |
1699
|
2
|
|
|
|
|
50
|
$records{$n}->{$mate}->count_ko(1); |
1700
|
|
|
|
|
|
|
} |
1701
|
|
|
|
|
|
|
} else { |
1702
|
4447
|
50
|
66
|
|
|
60285
|
if ($records{$n}->{$mate}->count_ko == 0 and |
|
|
|
66
|
|
|
|
|
1703
|
|
|
|
|
|
|
$records{$n}->{$mate}->count_ok >= 1 and |
1704
|
|
|
|
|
|
|
$records{$n}->{$fid}->count_ko == 0) { |
1705
|
114
|
|
|
|
|
6722
|
$records{$n}->{$fid}->count_ko(1); |
1706
|
|
|
|
|
|
|
} |
1707
|
|
|
|
|
|
|
} |
1708
|
|
|
|
|
|
|
} |
1709
|
|
|
|
|
|
|
} |
1710
|
|
|
|
|
|
|
} |
1711
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
} else { |
1713
|
5217
|
|
|
|
|
70104
|
$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1); |
1714
|
|
|
|
|
|
|
} |
1715
|
|
|
|
|
|
|
} |
1716
|
|
|
|
|
|
|
|
1717
|
158222
|
50
|
|
|
|
1339039
|
$self->_debug_output_fmt(' post', $fmt, $records{$n}->{$fid}) if $debug_fmt; |
1718
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
} |
1720
|
|
|
|
|
|
|
} |
1721
|
|
|
|
|
|
|
|
1722
|
4645
|
|
|
|
|
8710
|
$count_gotit = 0; |
1723
|
4645
|
|
|
|
|
7516
|
$count_ambiguous = 0; |
1724
|
4645
|
|
|
|
|
7526
|
$count_empty = 0; |
1725
|
4645
|
|
|
|
|
10816
|
for my $n (@fields_to_detect_format) { |
1726
|
31430
|
100
|
|
|
|
71592
|
next if $cache_nodate[$n]; |
1727
|
|
|
|
|
|
|
|
1728
|
16897
|
|
|
|
|
25711
|
my $candidate = 0; |
1729
|
16897
|
|
|
|
|
24086
|
my $tt = 0; |
1730
|
16897
|
|
|
|
|
28237
|
for my $fmt (@formats) { |
1731
|
258870
|
|
|
|
|
3239194
|
my $fid = $fmt->id; |
1732
|
258870
|
|
|
|
|
1821426
|
my $rec = $records{$n}->{$fid}; |
1733
|
258870
|
100
|
|
|
|
574678
|
next unless defined($rec); |
1734
|
|
|
|
|
|
|
|
1735
|
200048
|
|
|
|
|
2451503
|
my $ok = $rec->count_ok; |
1736
|
200048
|
|
|
|
|
3426576
|
my $ko = $rec->count_ko; |
1737
|
|
|
|
|
|
|
|
1738
|
200048
|
50
|
66
|
|
|
1752815
|
confess "Oups. Check this module code urgently!" if $ok == 0 and $ko == 0; |
1739
|
200048
|
|
|
|
|
301631
|
$tt += $ok + $ko; |
1740
|
|
|
|
|
|
|
|
1741
|
200048
|
100
|
100
|
|
|
549775
|
$candidate++ if $ok >= 1 and $ko == 0; |
1742
|
|
|
|
|
|
|
} |
1743
|
16897
|
100
|
|
|
|
42993
|
if ($candidate == 1) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1744
|
9599
|
|
|
|
|
18745
|
$count_gotit++; |
1745
|
|
|
|
|
|
|
} elsif ($candidate >= 2) { |
1746
|
2269
|
|
|
|
|
5566
|
$count_ambiguous++; |
1747
|
|
|
|
|
|
|
} elsif ($tt != 0) { |
1748
|
222
|
|
|
|
|
417
|
$count_nodate++; |
1749
|
222
|
|
|
|
|
572
|
$cache_nodate[$n] = 1; |
1750
|
|
|
|
|
|
|
} else { |
1751
|
4807
|
|
|
|
|
8012
|
$count_empty++; |
1752
|
|
|
|
|
|
|
} |
1753
|
|
|
|
|
|
|
} |
1754
|
|
|
|
|
|
|
|
1755
|
4645
|
50
|
|
|
|
11977
|
if ($debug_fmt) { |
1756
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_gotit = $count_gotit\n"); |
1757
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_ambiguous = $count_ambiguous\n"); |
1758
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_nodate = $count_nodate\n"); |
1759
|
0
|
|
|
|
|
0
|
print($_debugh "\$count_empty = $count_empty\n"); |
1760
|
|
|
|
|
|
|
} |
1761
|
|
|
|
|
|
|
|
1762
|
4645
|
|
|
|
|
7395
|
my $can_start_recording_data = 0; |
1763
|
4645
|
100
|
100
|
|
|
27636
|
$can_start_recording_data = 1 |
|
|
|
100
|
|
|
|
|
1764
|
|
|
|
|
|
|
if $count_gotit + $count_ambiguous + $count_nodate >= 1 and |
1765
|
|
|
|
|
|
|
!$count_ambiguous and !$count_empty; |
1766
|
|
|
|
|
|
|
|
1767
|
4645
|
100
|
100
|
|
|
32531
|
if ($can_start_recording_data and !$has_signaled_can_start_recording_data) { |
1768
|
28
|
|
|
|
|
64
|
$has_signaled_can_start_recording_data = 1; |
1769
|
|
|
|
|
|
|
|
1770
|
28
|
50
|
|
|
|
97
|
print($_debugh "Can start recording (all dates formats detection closed) " . |
1771
|
|
|
|
|
|
|
"after record #$record_number\n") if $_debug; |
1772
|
|
|
|
|
|
|
|
1773
|
28
|
|
|
|
|
98
|
$self->{_line_after_which_recording_can_start} = $record_number; |
1774
|
28
|
100
|
|
|
|
559
|
last unless $self->{fields_dates_auto}; |
1775
|
|
|
|
|
|
|
} |
1776
|
|
|
|
|
|
|
} |
1777
|
|
|
|
|
|
|
|
1778
|
50
|
|
|
|
|
5167
|
close $inh; |
1779
|
|
|
|
|
|
|
|
1780
|
50
|
|
|
|
|
190
|
my %dates_detailed_status; |
1781
|
|
|
|
|
|
|
my @dates_formats; |
1782
|
50
|
|
|
|
|
141
|
my $check_empty = 0; |
1783
|
50
|
|
|
|
|
101
|
my $check_nodate = 0; |
1784
|
50
|
|
|
|
|
104
|
my $check_ambiguous = 0; |
1785
|
50
|
|
|
|
|
114
|
my $check_gotit = 0; |
1786
|
50
|
|
|
|
|
145
|
for my $n (@fields_to_detect_format) { |
1787
|
523
|
|
|
|
|
833
|
my @formats_ok; |
1788
|
523
|
|
|
|
|
798
|
my $tt = 0; |
1789
|
523
|
|
|
|
|
798
|
for my $fid (sort keys %{$records{$n}}) { |
|
523
|
|
|
|
|
4886
|
|
1790
|
8267
|
|
|
|
|
84350
|
my $rec = $records{$n}->{$fid}; |
1791
|
8267
|
100
|
100
|
|
|
99253
|
if ($rec->count_ok >= 1 and $rec->count_ko == 0) { |
1792
|
|
|
|
|
|
|
|
1793
|
297
|
|
|
|
|
10743
|
my ($fstr, $floc) = ($rec->format, $rec->locale); |
1794
|
297
|
100
|
|
|
|
6871
|
($fstr, $floc) = ( |
1795
|
|
|
|
|
|
|
$rec->format_with_addition_of_time, |
1796
|
|
|
|
|
|
|
$rec->locale_with_addition_of_time |
1797
|
|
|
|
|
|
|
) if $rec->has_found_time; |
1798
|
|
|
|
|
|
|
|
1799
|
297
|
|
|
|
|
6572
|
push @formats_ok, [$fstr, $floc]; |
1800
|
|
|
|
|
|
|
} |
1801
|
8267
|
|
|
|
|
147918
|
$tt += $rec->count_ok + $rec->count_ko; |
1802
|
|
|
|
|
|
|
} |
1803
|
523
|
|
|
|
|
5830
|
my $is_ok = 0; |
1804
|
523
|
|
|
|
|
803
|
my $format; |
1805
|
523
|
|
|
|
|
834
|
my $locale = ''; |
1806
|
523
|
100
|
100
|
|
|
2432
|
if ($#formats_ok < 0 and $tt == 0) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1807
|
16
|
|
|
|
|
32
|
$format = "Z"; |
1808
|
16
|
|
|
|
|
30
|
$check_empty++; |
1809
|
|
|
|
|
|
|
} elsif ($#formats_ok < 0) { |
1810
|
222
|
|
|
|
|
378
|
$format = "N"; |
1811
|
222
|
|
|
|
|
318
|
$check_nodate++; |
1812
|
|
|
|
|
|
|
} elsif ($#formats_ok > 0) { |
1813
|
12
|
|
|
|
|
23
|
$format = "A"; |
1814
|
12
|
|
|
|
|
24
|
$check_ambiguous++; |
1815
|
|
|
|
|
|
|
} else { |
1816
|
273
|
|
|
|
|
457
|
$is_ok = 1; |
1817
|
273
|
|
|
|
|
509
|
$format = $formats_ok[0]->[0]; |
1818
|
273
|
|
|
|
|
529
|
$locale = $formats_ok[0]->[1]; |
1819
|
273
|
|
|
|
|
580
|
$check_gotit++; |
1820
|
|
|
|
|
|
|
} |
1821
|
523
|
|
|
|
|
1318
|
my $col = $self->_col_dispname($n); |
1822
|
|
|
|
|
|
|
|
1823
|
523
|
50
|
|
|
|
1922
|
$dates_detailed_status{$col} = $format unless exists $dates_detailed_status{$col}; |
1824
|
523
|
100
|
66
|
|
|
2593
|
$dates_formats[$n] = [ $format, $locale ] if $is_ok and !defined($dates_formats[$n]); |
1825
|
|
|
|
|
|
|
} |
1826
|
50
|
|
|
|
|
213
|
$dates_detailed_status{'.'} = $self->{_line_after_which_recording_can_start}; |
1827
|
|
|
|
|
|
|
|
1828
|
50
|
50
|
66
|
|
|
591
|
if ($check_empty != $count_empty or $check_nodate != $count_nodate or |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1829
|
|
|
|
|
|
|
$check_ambiguous != $count_ambiguous or $check_gotit != $count_gotit) { |
1830
|
|
|
|
|
|
|
# The below condition can happen with an empty CSV (empty file (no header) or |
1831
|
|
|
|
|
|
|
# only a header line). |
1832
|
1
|
50
|
33
|
|
|
21
|
unless (!$count_empty and !$check_nodate and !$count_nodate and |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1833
|
|
|
|
|
|
|
!$check_ambiguous and !$count_ambiguous and !$check_gotit and !$count_gotit) { |
1834
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_empty = $check_empty\n"); |
1835
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_empty = $count_empty\n"); |
1836
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_nodate = $check_nodate\n"); |
1837
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_nodate = $count_nodate\n"); |
1838
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_ambiguous = $check_ambiguous\n"); |
1839
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_ambiguous = $count_ambiguous\n"); |
1840
|
0
|
|
|
|
|
0
|
print(STDERR "\$check_gotit = $check_gotit\n"); |
1841
|
0
|
|
|
|
|
0
|
print(STDERR "\$count_gotit = $count_gotit\n"); |
1842
|
0
|
|
|
|
|
0
|
confess "Oups! Check immediately this module code, man!"; |
1843
|
|
|
|
|
|
|
} |
1844
|
|
|
|
|
|
|
} |
1845
|
|
|
|
|
|
|
|
1846
|
50
|
50
|
|
|
|
148
|
if ($debug_fmt) { |
1847
|
|
|
|
|
|
|
# A very detailed debug output |
1848
|
0
|
|
|
|
|
0
|
for my $n (@fields_to_detect_format) { |
1849
|
0
|
|
|
|
|
0
|
my $col = $self->_col_dispname($n); |
1850
|
0
|
|
|
|
|
0
|
print($_debugh "$col\n"); |
1851
|
0
|
|
|
|
|
0
|
printf($_debugh " %-25s %3s %3s\n", "format", "OK", "KO"); |
1852
|
0
|
|
|
|
|
0
|
for my $fid (sort keys %{$records{$n}}) { |
|
0
|
|
|
|
|
0
|
|
1853
|
0
|
|
|
|
|
0
|
my $rec = $records{$n}->{$fid}; |
1854
|
0
|
|
|
|
|
0
|
my $cc = ''; |
1855
|
0
|
0
|
0
|
|
|
0
|
$cc = "(" . $rec->locale . ")" if defined($rec->locale) and $rec->locale ne ''; |
1856
|
0
|
|
|
|
|
0
|
printf($_debugh " %-25s %3d %3d\n", |
1857
|
|
|
|
|
|
|
$rec->format . $cc, $rec->count_ok, $rec->count_ko); |
1858
|
|
|
|
|
|
|
} |
1859
|
|
|
|
|
|
|
} |
1860
|
|
|
|
|
|
|
} |
1861
|
|
|
|
|
|
|
# Not a typo - displaying it IN ADDITION to debug output above is done on purpose... |
1862
|
50
|
50
|
|
|
|
202
|
if ($_debug) { |
1863
|
|
|
|
|
|
|
# A shorter (as compared to above) output of outcome of DateTime detection |
1864
|
0
|
|
|
|
|
0
|
print($_debugh "Result of DateTime detection:\n"); |
1865
|
0
|
|
|
|
|
0
|
printf($_debugh "%-3s %-25s %-30s %s\n", '###', 'FIELD', 'DATETIME FORMAT', |
1866
|
|
|
|
|
|
|
'DATETIME LOCALE'); |
1867
|
0
|
|
|
|
|
0
|
for my $n (@fields_to_detect_format) { |
1868
|
0
|
|
|
|
|
0
|
my ($fmt, $loc) = ('<undef>', '<undef>'); |
1869
|
0
|
0
|
|
|
|
0
|
if (defined($dates_formats[$n])) { |
1870
|
0
|
|
|
|
|
0
|
($fmt, $loc) = @{$dates_formats[$n]}[0, 1]; |
|
0
|
|
|
|
|
0
|
|
1871
|
|
|
|
|
|
|
} |
1872
|
0
|
|
|
|
|
0
|
printf($_debugh "%03d %-25s %-30s %s\n", $n, $self->_col_dispname($n), $fmt, $loc); |
1873
|
|
|
|
|
|
|
} |
1874
|
|
|
|
|
|
|
} |
1875
|
|
|
|
|
|
|
|
1876
|
50
|
100
|
|
|
|
178
|
if (!$self->{fields_dates_auto}) { |
1877
|
10
|
|
|
|
|
28
|
my $e = 0; |
1878
|
10
|
|
|
|
|
32
|
for my $n (@fields_to_detect_format) { |
1879
|
20
|
100
|
|
|
|
69
|
next if defined($dates_formats[$n]); |
1880
|
7
|
|
|
|
|
27
|
$self->_print_error("unable to detect DateTime format of field '" . |
1881
|
|
|
|
|
|
|
$self->_col_dispname($n) . "'", 1); |
1882
|
7
|
|
|
|
|
281
|
$e++; |
1883
|
|
|
|
|
|
|
} |
1884
|
10
|
100
|
|
|
|
57
|
$self->_print_error("$e field(s) encountered with unknown DateTime format") if $e; |
1885
|
|
|
|
|
|
|
} |
1886
|
|
|
|
|
|
|
|
1887
|
48
|
|
|
|
|
774
|
$self->{_dates_detailed_status} = { %dates_detailed_status }; |
1888
|
48
|
|
|
|
|
79659
|
$self->{_dates_formats} = [ @dates_formats ]; |
1889
|
|
|
|
|
|
|
} |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
sub _debug_output_fmt { |
1892
|
0
|
|
|
0
|
|
0
|
my ($self, $prefix, $fmt, $rec) = @_; |
1893
|
|
|
|
|
|
|
|
1894
|
0
|
|
|
|
|
0
|
my $_debugh = $self->{_debugh}; |
1895
|
|
|
|
|
|
|
|
1896
|
0
|
|
|
|
|
0
|
my ($fstr, $floc) = ($fmt->format, $fmt->locale); |
1897
|
0
|
0
|
0
|
|
|
0
|
($fstr, $floc) = ( |
1898
|
|
|
|
|
|
|
'<+T>' . $rec->format_with_addition_of_time, |
1899
|
|
|
|
|
|
|
$rec->locale_with_addition_of_time |
1900
|
|
|
|
|
|
|
) if defined($rec) and $rec->has_found_time; |
1901
|
|
|
|
|
|
|
|
1902
|
0
|
|
|
|
|
0
|
my $locstr = ''; |
1903
|
0
|
0
|
0
|
|
|
0
|
$locstr = "(" . $floc . ")" if defined($floc) and $floc ne ''; |
1904
|
|
|
|
|
|
|
|
1905
|
0
|
0
|
|
|
|
0
|
my $tmpok = $rec->count_ok if defined($rec); |
1906
|
0
|
0
|
|
|
|
0
|
$tmpok = '<undef>' unless defined($tmpok); |
1907
|
0
|
0
|
|
|
|
0
|
my $tmpko = $rec->count_ko if defined($rec); |
1908
|
0
|
0
|
|
|
|
0
|
$tmpko = '<undef>' unless defined($tmpko); |
1909
|
|
|
|
|
|
|
|
1910
|
0
|
|
|
|
|
0
|
print($_debugh "$prefix (format '$fstr$locstr': OK = $tmpok, KO = $tmpko)\n"); |
1911
|
|
|
|
|
|
|
} |
1912
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
# When no parse can be done (parser to test is undef), return 1 |
1914
|
|
|
|
|
|
|
sub _try_parser { |
1915
|
19361
|
|
|
19361
|
|
41937
|
my ($fmt, $rec, $value_to_parse) = @_; |
1916
|
|
|
|
|
|
|
|
1917
|
19361
|
|
|
|
|
248204
|
my $parser = $fmt->parser; |
1918
|
19361
|
100
|
|
|
|
360345
|
$parser = $rec->parser_with_addition_of_time if $rec->has_found_time; |
1919
|
|
|
|
|
|
|
|
1920
|
19361
|
|
|
|
|
219429
|
my $is_ok = 1; |
1921
|
19361
|
100
|
|
|
|
81838
|
$is_ok = (defined($parser->parse_datetime($value_to_parse)) ? 1 : 0) if $parser; |
|
|
100
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
|
1923
|
19361
|
100
|
|
|
|
7599175
|
unless (defined($rec->format)) { |
1924
|
8267
|
|
|
|
|
153643
|
$rec->format($fmt->format); |
1925
|
8267
|
|
|
|
|
265471
|
$rec->locale($fmt->locale); |
1926
|
|
|
|
|
|
|
} |
1927
|
|
|
|
|
|
|
|
1928
|
19361
|
|
|
|
|
296678
|
return $is_ok; |
1929
|
|
|
|
|
|
|
} |
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
sub _guess_time_format { |
1932
|
|
|
|
|
|
|
|
1933
|
|
|
|
|
|
|
# IMPORTANT |
1934
|
|
|
|
|
|
|
# Formats are tested in the order of the list below, and the first one that succeeds stops the |
1935
|
|
|
|
|
|
|
# tests. |
1936
|
|
|
|
|
|
|
# That makes the order of the elements important: %R would match any value that'd also match |
1937
|
|
|
|
|
|
|
# %T, that'd cause to return %R whereas %T would be possible. Same with AM/PM formats. Thus |
1938
|
|
|
|
|
|
|
# the longest patterns appear first. |
1939
|
1965
|
|
|
1965
|
|
18953
|
my @T = ( |
1940
|
|
|
|
|
|
|
'%I:%M:%S %p', |
1941
|
|
|
|
|
|
|
'%I:%M %p', |
1942
|
|
|
|
|
|
|
'%I:%M:%S%p', |
1943
|
|
|
|
|
|
|
'%I:%M%p', |
1944
|
|
|
|
|
|
|
'%T', |
1945
|
|
|
|
|
|
|
'%R' |
1946
|
|
|
|
|
|
|
); |
1947
|
|
|
|
|
|
|
|
1948
|
1965
|
|
|
|
|
5684
|
my ($self, $format, $locale, $v, $stop) = @_; |
1949
|
|
|
|
|
|
|
|
1950
|
1965
|
|
|
|
|
4895
|
my $_debugh = $self->{_debugh}; |
1951
|
1965
|
|
33
|
|
|
6387
|
my $debug_fmt = ($self->{_debug} and $DEBUG_DATETIME_FORMATS); |
1952
|
|
|
|
|
|
|
|
1953
|
1965
|
100
|
|
|
|
6396
|
return undef if $format =~ /:/; |
1954
|
|
|
|
|
|
|
|
1955
|
1961
|
|
|
|
|
3296
|
my $sep; |
1956
|
1961
|
100
|
|
|
|
6636
|
if ($format eq '') { |
1957
|
316
|
|
|
|
|
663
|
$sep = ''; |
1958
|
|
|
|
|
|
|
} else { |
1959
|
1645
|
100
|
|
|
|
12926
|
unless ((undef, $sep) = $v =~ /(^|\d([^0-9:]+))(\d{1,2}):(\d{1,2})(\D|$)/) { |
1960
|
387
|
100
|
|
|
|
1046
|
if ($v =~ /\d{4}:\d{2}(\D|$)/) { |
1961
|
29
|
|
|
|
|
73
|
$sep = ''; |
1962
|
|
|
|
|
|
|
} else { |
1963
|
|
|
|
|
|
|
|
1964
|
358
|
50
|
|
|
|
862
|
print($_debugh "_guess_time_format(): separator candidate not found in '$v'\n") |
1965
|
|
|
|
|
|
|
if $debug_fmt; |
1966
|
|
|
|
|
|
|
|
1967
|
358
|
|
|
|
|
945
|
return undef; |
1968
|
|
|
|
|
|
|
} |
1969
|
|
|
|
|
|
|
} |
1970
|
|
|
|
|
|
|
} |
1971
|
1603
|
100
|
|
|
|
5468
|
$sep = '' unless defined($sep); |
1972
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
# |
1974
|
|
|
|
|
|
|
# IMPORTANT |
1975
|
|
|
|
|
|
|
# |
1976
|
|
|
|
|
|
|
# The code below allows to successfully detect DateTime format when |
1977
|
|
|
|
|
|
|
# the first lines contain things like: |
1978
|
|
|
|
|
|
|
# Jan 20 2017 2:00AM |
1979
|
|
|
|
|
|
|
# that could lead to a separator set to ' ' while actually it should be ' '. In this case |
1980
|
|
|
|
|
|
|
# if the double-space is kept, then a later value of |
1981
|
|
|
|
|
|
|
# Jan 20 2017 10:00AM |
1982
|
|
|
|
|
|
|
# won't be parsed. |
1983
|
|
|
|
|
|
|
# |
1984
|
|
|
|
|
|
|
# See t/11-bugfix.t, BUG 5, for an explanation of why the line below. |
1985
|
|
|
|
|
|
|
# |
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
# More generic code, but will also break some separators like ' ' (4 spaces) |
1988
|
|
|
|
|
|
|
# $sep = substr($sep, 0, length($sep) - 1) if length($sep) >= 2 and substr($sep, -2) eq ' '; |
1989
|
1603
|
100
|
|
|
|
4677
|
$sep = ' ' if $sep eq ' '; |
1990
|
|
|
|
|
|
|
|
1991
|
1603
|
50
|
|
|
|
4046
|
if ($debug_fmt) { |
1992
|
0
|
|
|
|
|
0
|
print($_debugh " _guess_time_format(): Searching time in '$v'\n"); |
1993
|
|
|
|
|
|
|
} |
1994
|
|
|
|
|
|
|
|
1995
|
1603
|
|
|
|
|
3624
|
for my $t (@T) { |
1996
|
9139
|
|
|
|
|
599709
|
my $increased_format = "$format$sep$t"; |
1997
|
|
|
|
|
|
|
|
1998
|
9139
|
50
|
|
|
|
22295
|
print($_debugh " _guess_time_format(): Trying format '$increased_format'\n") if $debug_fmt; |
1999
|
|
|
|
|
|
|
|
2000
|
9139
|
|
|
|
|
31763
|
my %opts = (pattern => $START . $increased_format . $stop); |
2001
|
9139
|
100
|
66
|
|
|
46360
|
$opts{locale} = $locale if defined($locale) and $locale ne ''; |
2002
|
9139
|
|
|
|
|
40199
|
my $parser_of_increased_format = DateTime::Format::Strptime->new(%opts); |
2003
|
9139
|
100
|
|
|
|
11636796
|
next unless defined($parser_of_increased_format->parse_datetime($START . $v . $stop)); |
2004
|
|
|
|
|
|
|
|
2005
|
254
|
50
|
|
|
|
179457
|
if ($debug_fmt) { |
2006
|
0
|
|
|
|
|
0
|
print($_debugh " _guess_time_format(): found time in '$v'\n"); |
2007
|
0
|
|
|
|
|
0
|
print($_debugh " Initial format: '$format'\n"); |
2008
|
0
|
|
|
|
|
0
|
print($_debugh " Increased format: '$increased_format'\n"); |
2009
|
|
|
|
|
|
|
} |
2010
|
|
|
|
|
|
|
|
2011
|
254
|
|
|
|
|
1623
|
return [$increased_format, $locale, $parser_of_increased_format]; |
2012
|
|
|
|
|
|
|
} |
2013
|
1349
|
|
|
|
|
113176
|
return undef; |
2014
|
|
|
|
|
|
|
} |
2015
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
# * ********************************* * |
2018
|
|
|
|
|
|
|
# * END OF DATE FORMAT DETECTION CODE * |
2019
|
|
|
|
|
|
|
# * ********************************* * |
2020
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
# Take the string of a header in $_ and replace it with the corresponding field name |
2023
|
|
|
|
|
|
|
sub _header_to_field_name { |
2024
|
1246
|
|
|
1246
|
|
2749
|
$_ = remove_accents($_); |
2025
|
1246
|
|
|
|
|
2756
|
s/[^[:alnum:]_]//gi; |
2026
|
1246
|
|
|
|
|
7630
|
s/^.*$/\U$&/; |
2027
|
|
|
|
|
|
|
} |
2028
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2030
|
|
|
|
|
|
|
sub _S2_init_fields_from_header { |
2031
|
349
|
|
|
349
|
|
737
|
my $self = shift; |
2032
|
|
|
|
|
|
|
|
2033
|
349
|
|
|
|
|
804
|
my $has_headers = $self->{has_headers}; |
2034
|
349
|
|
|
|
|
734
|
my $_debug = $self->{_debug}; |
2035
|
349
|
|
|
|
|
756
|
my $_debugh = $self->{_debugh}; |
2036
|
|
|
|
|
|
|
|
2037
|
349
|
|
|
|
|
1108
|
my $in_file_disp = $self->get_in_file_disp(); |
2038
|
|
|
|
|
|
|
|
2039
|
349
|
|
|
|
|
739
|
my $inh = $self->{_inh}; |
2040
|
349
|
|
|
|
|
758
|
my $incsv = $self->{_in_csvobj}; |
2041
|
|
|
|
|
|
|
|
2042
|
349
|
|
|
|
|
990
|
$self->{_row_read} = 0; |
2043
|
|
|
|
|
|
|
|
2044
|
349
|
|
|
|
|
754
|
my @columns; |
2045
|
|
|
|
|
|
|
my @headers; |
2046
|
349
|
100
|
|
|
|
1042
|
if ($has_headers) { |
2047
|
|
|
|
|
|
|
|
2048
|
|
|
|
|
|
|
print($_debugh "$PKG: '$in_file_disp': will parse header line to get column names\n") |
2049
|
338
|
50
|
|
|
|
1055
|
if $self->{_debug_read}; |
2050
|
|
|
|
|
|
|
|
2051
|
338
|
|
|
|
|
704
|
$self->{_row_read}++; |
2052
|
|
|
|
|
|
|
|
2053
|
|
|
|
|
|
|
print($_debugh "$PKG: '$in_file_disp': will read line #" . $self->{_row_read} . "\n") |
2054
|
338
|
50
|
|
|
|
1003
|
if $self->{_debug_read}; |
2055
|
|
|
|
|
|
|
|
2056
|
338
|
50
|
|
|
|
1067
|
if (defined($self->{_inh_header})) { |
2057
|
0
|
|
|
|
|
0
|
my $l = $self->{_inh_header}; |
2058
|
0
|
|
|
|
|
0
|
my $inmemh; |
2059
|
0
|
0
|
|
|
|
0
|
if (!open ($inmemh, '<', \$l)) { |
2060
|
0
|
|
|
|
|
0
|
$self->_print_error("can't open header line in-memory. CSV read aborted."); |
2061
|
0
|
|
|
|
|
0
|
return 0; |
2062
|
|
|
|
|
|
|
} |
2063
|
0
|
|
|
|
|
0
|
@headers = @{_mygetline($incsv, $inmemh)}; |
|
0
|
|
|
|
|
0
|
|
2064
|
|
|
|
|
|
|
} else { |
2065
|
338
|
|
|
|
|
893
|
my $r = _mygetline($incsv, $inh); |
2066
|
336
|
50
|
|
|
|
17668
|
@headers = @{$r} if defined($r); |
|
336
|
|
|
|
|
1414
|
|
2067
|
|
|
|
|
|
|
} |
2068
|
|
|
|
|
|
|
print($_debugh "Line " . $self->{_row_read} . ":\n--\n" . join('::', @headers) . "\n--\n") |
2069
|
336
|
50
|
|
|
|
1322
|
if $self->{_debug_read}; |
2070
|
|
|
|
|
|
|
} |
2071
|
|
|
|
|
|
|
|
2072
|
347
|
100
|
100
|
|
|
2083
|
if ($has_headers and !defined($self->{fields_column_names})) { |
2073
|
330
|
|
|
|
|
677
|
my %indexes; |
2074
|
330
|
100
|
|
|
|
978
|
if (defined($self->{fields_hr})) { |
2075
|
12
|
50
|
|
|
|
65
|
if (!$self->_process_header(\@headers, $self->{fields_hr}, \%indexes)) { |
2076
|
0
|
|
|
|
|
0
|
$self->_print_error("missing headers. CSV read aborted."); |
2077
|
0
|
|
|
|
|
0
|
return 0; |
2078
|
|
|
|
|
|
|
} |
2079
|
12
|
50
|
|
|
|
40
|
if ($_debug) { |
2080
|
0
|
|
|
|
|
0
|
print($_debugh " \%indexes:\n"); |
2081
|
0
|
|
|
|
|
0
|
for my $k (sort keys %indexes) { |
2082
|
0
|
|
|
|
|
0
|
print($_debugh " \t$k => $indexes{$k}\n"); |
2083
|
|
|
|
|
|
|
} |
2084
|
|
|
|
|
|
|
} |
2085
|
12
|
|
|
|
|
75
|
for (sort keys %indexes) { |
2086
|
48
|
50
|
|
|
|
122
|
next if $_ eq ''; |
2087
|
48
|
|
|
|
|
113
|
$columns[$indexes{$_}] = $_; |
2088
|
|
|
|
|
|
|
} |
2089
|
|
|
|
|
|
|
} else { |
2090
|
318
|
|
|
|
|
1075
|
@columns = @headers; |
2091
|
318
|
|
|
|
|
778
|
map { _header_to_field_name } @columns; |
|
1246
|
|
|
|
|
9060
|
|
2092
|
|
|
|
|
|
|
} |
2093
|
|
|
|
|
|
|
} |
2094
|
|
|
|
|
|
|
|
2095
|
347
|
100
|
|
|
|
1266
|
@columns = @{$self->{fields_column_names}} if defined($self->{fields_column_names}); |
|
14
|
|
|
|
|
40
|
|
2096
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
# Avoid undef in column names... I prefer empty strings |
2098
|
347
|
100
|
|
|
|
1255
|
@columns = map { defined($_) ? $_ : '' } @columns; |
|
1370
|
|
|
|
|
3967
|
|
2099
|
|
|
|
|
|
|
|
2100
|
347
|
50
|
|
|
|
1185
|
if ($_debug) { |
2101
|
0
|
|
|
|
|
0
|
print($_debugh "-- CSV headers management\n"); |
2102
|
0
|
0
|
|
|
|
0
|
if (@columns) { |
2103
|
0
|
|
|
|
|
0
|
printf($_debugh " %-3s %-40s %-40s\n", 'COL', 'CSV Header', 'Hash Key'); |
2104
|
0
|
|
|
|
|
0
|
for my $i (0..$#columns) { |
2105
|
0
|
|
|
|
|
0
|
my $h = ''; |
2106
|
0
|
0
|
|
|
|
0
|
$h = $headers[$i] if defined($headers[$i]); |
2107
|
0
|
|
|
|
|
0
|
printf($_debugh " %03d %-40s %-40s\n", $i, "'$h'", "'$columns[$i]'"); |
2108
|
|
|
|
|
|
|
} |
2109
|
|
|
|
|
|
|
} else { |
2110
|
0
|
|
|
|
|
0
|
print($_debugh " No headers\n"); |
2111
|
|
|
|
|
|
|
} |
2112
|
|
|
|
|
|
|
} |
2113
|
|
|
|
|
|
|
|
2114
|
347
|
|
|
|
|
695
|
my %regular_named_fields; |
2115
|
347
|
|
|
|
|
1170
|
for my $i (0..$#columns) { |
2116
|
1370
|
100
|
66
|
|
|
6865
|
$regular_named_fields{$columns[$i]} = $i if defined($columns[$i]) and $columns[$i] ne ''; |
2117
|
|
|
|
|
|
|
} |
2118
|
347
|
|
|
|
|
2023
|
$self->{_regular_named_fields} = { %regular_named_fields }; |
2119
|
347
|
|
|
|
|
1446
|
$self->{_S2_columns} = [ @columns ]; |
2120
|
347
|
100
|
|
|
|
1635
|
$self->{_S2_headers} = [ @headers ] if $has_headers; |
2121
|
|
|
|
|
|
|
|
2122
|
347
|
|
|
|
|
1730
|
return 1; |
2123
|
|
|
|
|
|
|
} |
2124
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
sub out_header { |
2126
|
8
|
|
|
8
|
1
|
15
|
my $self = shift; |
2127
|
8
|
|
|
|
|
131
|
validate_pos(@_, {type => SCALAR}, {type => SCALAR}); |
2128
|
|
|
|
|
|
|
|
2129
|
8
|
|
|
|
|
28
|
my ($field, $header) = @_; |
2130
|
8
|
100
|
|
|
|
28
|
$self->{_out_headers} = { } unless exists $self->{_out_headers}; |
2131
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
$self->_print_warning("out_header: field $field already set") |
2133
|
8
|
50
|
|
|
|
19
|
if exists $self->{_out_headers}->{$field}; |
2134
|
|
|
|
|
|
|
|
2135
|
8
|
|
|
|
|
17
|
$self->{_out_headers}->{$field} = $header; |
2136
|
|
|
|
|
|
|
|
2137
|
8
|
|
|
|
|
27
|
return $self; |
2138
|
|
|
|
|
|
|
} |
2139
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2141
|
|
|
|
|
|
|
sub _S3_init_fields_extra { |
2142
|
349
|
|
|
349
|
|
738
|
my $self = shift; |
2143
|
|
|
|
|
|
|
|
2144
|
349
|
|
|
|
|
783
|
my $_debug = $self->{_debug}; |
2145
|
349
|
|
|
|
|
721
|
my $_debugh = $self->{_debugh}; |
2146
|
|
|
|
|
|
|
|
2147
|
349
|
|
|
|
|
760
|
my $verbose = $self->{verbose}; |
2148
|
|
|
|
|
|
|
|
2149
|
349
|
|
|
|
|
676
|
my $has_headers = $self->{has_headers}; |
2150
|
|
|
|
|
|
|
|
2151
|
349
|
|
|
|
|
610
|
my %named_fields = %{$self->{_regular_named_fields}}; |
|
349
|
|
|
|
|
1857
|
|
2152
|
349
|
|
|
|
|
821
|
my @columns = @{$self->{_S2_columns}}; |
|
349
|
|
|
|
|
1176
|
|
2153
|
349
|
100
|
|
|
|
1019
|
my @headers = @{$self->{_S2_headers}} if $has_headers; |
|
338
|
|
|
|
|
1066
|
|
2154
|
|
|
|
|
|
|
|
2155
|
349
|
|
|
|
|
664
|
my @extra_fields_indexes; |
2156
|
349
|
100
|
|
|
|
1173
|
my @extra_fields_definitions_list = @{$self->{_extra_fields}} if exists $self->{_extra_fields}; |
|
36
|
|
|
|
|
135
|
|
2157
|
349
|
|
|
|
|
754
|
my %extra_fields_definitions; |
2158
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
my @coldata; |
2160
|
349
|
|
|
|
|
1096
|
for my $i (0..$#columns) { |
2161
|
1376
|
|
|
|
|
47229
|
my $col = $columns[$i]; |
2162
|
1376
|
100
|
|
|
|
3304
|
my $h = $headers[$i] if $has_headers; |
2163
|
1376
|
|
|
|
|
24745
|
push @coldata, ColData->new( |
2164
|
|
|
|
|
|
|
field_name => $col, |
2165
|
|
|
|
|
|
|
header_text => $h, |
2166
|
|
|
|
|
|
|
description => '' |
2167
|
|
|
|
|
|
|
); |
2168
|
|
|
|
|
|
|
} |
2169
|
|
|
|
|
|
|
|
2170
|
349
|
|
|
|
|
16487
|
for my $edef (@extra_fields_definitions_list) { |
2171
|
92
|
|
|
|
|
6463
|
my $c = $edef->check_field_existence; |
2172
|
92
|
100
|
|
|
|
1234
|
if (defined($c)) { |
2173
|
80
|
100
|
|
|
|
276
|
unless (exists $named_fields{$c}) { |
2174
|
9
|
|
|
|
|
173
|
$self->_print_error("unknown field '" . $edef->check_field_existence . "'", |
2175
|
|
|
|
|
|
|
0, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2176
|
8
|
|
|
|
|
37
|
next; |
2177
|
|
|
|
|
|
|
} |
2178
|
|
|
|
|
|
|
} |
2179
|
|
|
|
|
|
|
|
2180
|
83
|
|
|
|
|
247
|
my @e_eclated = $edef; |
2181
|
|
|
|
|
|
|
|
2182
|
83
|
100
|
100
|
|
|
1621
|
if ($edef->ef_type == $EF_LINK and $edef->link_remote_read eq '*') { |
2183
|
2
|
|
|
|
|
102
|
my @cols = $edef->link_remote_obj->get_fields_names(); |
2184
|
|
|
|
|
|
|
|
2185
|
2
|
|
|
|
|
7
|
@e_eclated = (); |
2186
|
2
|
|
|
|
|
10
|
my %nf = %named_fields; |
2187
|
|
|
|
|
|
|
|
2188
|
2
|
|
|
|
|
7
|
for my $c (@cols) { |
2189
|
|
|
|
|
|
|
|
2190
|
4
|
|
|
|
|
84
|
my $ex_base = $edef->self_name . $c; |
2191
|
4
|
|
|
|
|
40
|
my $ex_target = $ex_base; |
2192
|
4
|
|
|
|
|
9
|
my $i = 1; |
2193
|
4
|
|
|
|
|
19
|
while (exists $nf{$ex_target}) { |
2194
|
1
|
|
|
|
|
3
|
$i++; |
2195
|
1
|
|
|
|
|
61
|
$ex_target = $ex_base . '_' . $i; |
2196
|
|
|
|
|
|
|
} |
2197
|
|
|
|
|
|
|
|
2198
|
4
|
|
|
|
|
77
|
my $e = ExtraField->new( |
2199
|
|
|
|
|
|
|
ef_type => $EF_LINK, |
2200
|
|
|
|
|
|
|
self_name => $ex_target, |
2201
|
|
|
|
|
|
|
description => $edef->description . " ($c)", |
2202
|
|
|
|
|
|
|
|
2203
|
|
|
|
|
|
|
link_self_search => $edef->link_self_search, |
2204
|
|
|
|
|
|
|
link_remote_obj => $edef->link_remote_obj, |
2205
|
|
|
|
|
|
|
link_remote_search => $edef->link_remote_search, |
2206
|
|
|
|
|
|
|
link_remote_read => $c, |
2207
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
link_vlookup_opts => $edef->link_vlookup_opts |
2209
|
|
|
|
|
|
|
); |
2210
|
4
|
|
|
|
|
522
|
push @e_eclated, $e; |
2211
|
4
|
|
|
|
|
19
|
$nf{$ex_target} = undef; |
2212
|
|
|
|
|
|
|
} |
2213
|
|
|
|
|
|
|
} |
2214
|
|
|
|
|
|
|
|
2215
|
83
|
|
|
|
|
2010
|
for my $e1 (@e_eclated) { |
2216
|
85
|
100
|
|
|
|
1756
|
if (exists $named_fields{$e1->self_name}) { |
2217
|
6
|
|
|
|
|
184
|
$self->_print_error("extra field: duplicate field name: '" . $e1->self_name . "'"); |
2218
|
6
|
|
|
|
|
36
|
next; |
2219
|
|
|
|
|
|
|
} |
2220
|
|
|
|
|
|
|
|
2221
|
79
|
|
|
|
|
905
|
my $index_of_new_element = $#columns + 1; |
2222
|
79
|
|
|
|
|
193
|
push @extra_fields_indexes, $index_of_new_element; |
2223
|
79
|
|
|
|
|
1463
|
$columns[$index_of_new_element] = $e1->self_name; |
2224
|
79
|
|
|
|
|
2011
|
$named_fields{$e1->self_name} = $index_of_new_element; |
2225
|
79
|
|
|
|
|
1987
|
$extra_fields_definitions{$e1->self_name} = $e1; |
2226
|
|
|
|
|
|
|
|
2227
|
79
|
50
|
|
|
|
2104
|
push @headers, $e1->self_name if $has_headers; |
2228
|
79
|
|
|
|
|
2057
|
push @coldata, ColData->new( |
2229
|
|
|
|
|
|
|
field_name => $e1->self_name, |
2230
|
|
|
|
|
|
|
header_text => $e1->self_name, |
2231
|
|
|
|
|
|
|
description => $e1->description |
2232
|
|
|
|
|
|
|
); |
2233
|
|
|
|
|
|
|
} |
2234
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
} |
2236
|
348
|
100
|
|
|
|
4319
|
$self->{_headers} = [ @headers ] if $has_headers; |
2237
|
348
|
|
|
|
|
1295
|
$self->{_extra_fields_indexes} = [ @extra_fields_indexes ]; |
2238
|
348
|
|
|
|
|
1375
|
$self->{_columns} = [ @columns ]; |
2239
|
348
|
|
|
|
|
1229
|
$self->{_extra_fields_definitions} = { %extra_fields_definitions }; |
2240
|
|
|
|
|
|
|
|
2241
|
348
|
|
|
|
|
2011
|
$self->{_named_fields} = { %named_fields }; |
2242
|
|
|
|
|
|
|
|
2243
|
348
|
|
|
|
|
1694
|
$self->_detect_dates_formats(); |
2244
|
|
|
|
|
|
|
|
2245
|
345
|
|
|
|
|
1046
|
$self->{_read_update_after_ar} = [ ]; |
2246
|
345
|
|
|
|
|
3959
|
$self->{_write_update_before_ar} = [ ]; |
2247
|
345
|
100
|
|
|
|
3326
|
my @dates_formats = @{$self->{_dates_formats}} if defined($self->{_dates_formats}); |
|
57
|
|
|
|
|
286
|
|
2248
|
345
|
|
|
|
|
1224
|
for my $i (0..$#columns) { |
2249
|
1436
|
|
|
|
|
2591
|
my $dt_format; |
2250
|
|
|
|
|
|
|
my $dt_locale; |
2251
|
1436
|
100
|
|
|
|
3506
|
if (defined($dates_formats[$i])) { |
2252
|
329
|
|
|
|
|
674
|
$dt_format = $dates_formats[$i]->[0]; |
2253
|
329
|
|
|
|
|
607
|
$dt_locale = $dates_formats[$i]->[1]; |
2254
|
|
|
|
|
|
|
} |
2255
|
1436
|
|
|
|
|
23769
|
$coldata[$i]->dt_format($dt_format); |
2256
|
1436
|
|
|
|
|
27651
|
$coldata[$i]->dt_locale($dt_locale); |
2257
|
|
|
|
|
|
|
|
2258
|
1436
|
100
|
|
|
|
11583
|
next unless defined($dt_format); |
2259
|
|
|
|
|
|
|
|
2260
|
329
|
|
|
|
|
540
|
my %opts_in; |
2261
|
329
|
100
|
66
|
|
|
1663
|
$opts_in{locale} = $dt_locale if defined($dt_locale) and $dt_locale ne ''; |
2262
|
|
|
|
|
|
|
|
2263
|
329
|
|
|
|
|
1443
|
my $obj_strptime_in = DateTime::Format::Strptime->new(pattern => $dt_format, %opts_in); |
2264
|
|
|
|
|
|
|
|
2265
|
329
|
|
|
|
|
379557
|
my %opts_out; |
2266
|
329
|
50
|
|
|
|
1038
|
my $loc_out = (exists $self->{out_dates_locale} ? $self->{out_dates_locale} : $dt_locale); |
2267
|
329
|
100
|
66
|
|
|
1922
|
$opts_out{locale} = $loc_out if defined($loc_out) and $loc_out ne ''; |
2268
|
|
|
|
|
|
|
my $obj_strptime_out = DateTime::Format::Strptime->new( |
2269
|
329
|
50
|
|
|
|
1534
|
pattern => (exists $self->{out_dates_format} ? $self->{out_dates_format} :$dt_format), |
2270
|
|
|
|
|
|
|
%opts_out |
2271
|
|
|
|
|
|
|
); |
2272
|
|
|
|
|
|
|
|
2273
|
329
|
|
|
|
|
356151
|
my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty}; |
2274
|
329
|
|
|
|
|
1123
|
my $in_file_disp = $self->get_in_file_disp(); |
2275
|
|
|
|
|
|
|
|
2276
|
|
|
|
|
|
|
$self->{_read_update_after_ar}->[$i] = sub { |
2277
|
352
|
100
|
66
|
352
|
|
2215
|
return undef if !defined($_) or $_ eq '' or |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
2278
|
|
|
|
|
|
|
(defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($_)); |
2279
|
|
|
|
|
|
|
|
2280
|
315
|
|
|
|
|
585
|
my $s = $_[0]; |
2281
|
315
|
|
|
|
|
741
|
my $field = _get_def($_[1], '<?>'); |
2282
|
|
|
|
|
|
|
|
2283
|
315
|
|
|
|
|
1028
|
my $dt = $obj_strptime_in->parse_datetime($_); |
2284
|
|
|
|
|
|
|
|
2285
|
315
|
0
|
33
|
|
|
175356
|
if ($_debug and $DEBUG_DATETIME_FORMATS and $DEBUG_DATETIME_FORMATS_EVEN_MORE) { |
|
|
|
33
|
|
|
|
|
2286
|
0
|
0
|
|
|
|
0
|
print($_debugh "-- Record " . $s->get_recnum() . |
2287
|
|
|
|
|
|
|
", field '$field':\n String parsed: '$_'\n Parse format: '$dt_format'\n" . |
2288
|
|
|
|
|
|
|
" DateTime obj: <" . (defined($dt) ? $dt . '' : 'undef') . ">\n"); |
2289
|
|
|
|
|
|
|
} |
2290
|
|
|
|
|
|
|
|
2291
|
315
|
100
|
|
|
|
753
|
if (!defined($dt)) { |
2292
|
2
|
|
|
|
|
11
|
my $recnum = $s->get_recnum(); |
2293
|
2
|
50
|
|
|
|
8
|
if ($verbose) { |
2294
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2295
|
|
|
|
|
|
|
"$in_file_disp: record $recnum: field $field: unable to parse DateTime\n"); |
2296
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: field: '$_'\n"); |
2297
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: format: '$dt_format'\n"); |
2298
|
0
|
0
|
|
|
|
0
|
$s->_print("$PKG: " . |
2299
|
|
|
|
|
|
|
"locale: '" . ($dt_locale eq '' ? '<none>' : $dt_locale) . "'\n"); |
2300
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2301
|
|
|
|
|
|
|
"Probable cause: when detecting DateTime format, $PKG will stop reading\n"); |
2302
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2303
|
|
|
|
|
|
|
"input as soon as the format is worked out. If a value found later\n"); |
2304
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: " . |
2305
|
|
|
|
|
|
|
"turns out to use another DateTime format, it'll generate a DateTime\n"); |
2306
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: parse error, as is the case now.\n"); |
2307
|
0
|
|
|
|
|
0
|
$s->_print_error("unable to parse DateTime"); |
2308
|
|
|
|
|
|
|
} else { |
2309
|
2
|
|
|
|
|
18
|
$s->_print_error("$in_file_disp: record $recnum: field $field: " . |
2310
|
|
|
|
|
|
|
"unable to parse DateTime '$_'"); |
2311
|
|
|
|
|
|
|
} |
2312
|
|
|
|
|
|
|
} |
2313
|
|
|
|
|
|
|
|
2314
|
314
|
|
|
|
|
651
|
return $dt; |
2315
|
329
|
|
|
|
|
2387
|
}; |
2316
|
|
|
|
|
|
|
$self->{_write_update_before_ar}->[$i] = sub { |
2317
|
96
|
100
|
|
96
|
|
240
|
return '' unless defined($_); |
2318
|
87
|
100
|
|
|
|
211
|
return $_ if !ref($_); |
2319
|
81
|
50
|
|
|
|
297
|
return $_ unless $_->isa('DateTime'); |
2320
|
|
|
|
|
|
|
|
2321
|
81
|
|
|
|
|
268
|
my $str = $obj_strptime_out->format_datetime($_); |
2322
|
|
|
|
|
|
|
|
2323
|
81
|
50
|
|
|
|
16151
|
if (!defined($str)) { |
2324
|
0
|
|
|
|
|
0
|
my $s = $_[0]; |
2325
|
0
|
|
|
|
|
0
|
my $recnum = $s->get_recnum(); |
2326
|
0
|
|
|
|
|
0
|
my $field = _get_def($_[1], '<?>'); |
2327
|
0
|
|
|
|
|
0
|
$s->_print_error("$in_file_disp: record $recnum: field $field: " . |
2328
|
|
|
|
|
|
|
"unable to print DateTime '$_'") |
2329
|
|
|
|
|
|
|
} |
2330
|
|
|
|
|
|
|
|
2331
|
81
|
|
|
|
|
188
|
return $str; |
2332
|
329
|
|
|
|
|
1696
|
}; |
2333
|
|
|
|
|
|
|
} |
2334
|
|
|
|
|
|
|
|
2335
|
345
|
|
|
|
|
1234
|
$self->{_coldata} = [ @coldata ]; |
2336
|
|
|
|
|
|
|
|
2337
|
345
|
|
|
|
|
2110
|
my @loop = ( |
2338
|
|
|
|
|
|
|
['_read_update_after_hr', '_read_update_after_ar', 'read post'], |
2339
|
|
|
|
|
|
|
['_write_update_before_hr', '_write_update_before_ar', 'write pre'] |
2340
|
|
|
|
|
|
|
); |
2341
|
345
|
|
|
|
|
1143
|
for my $ii (0..$#loop) { |
2342
|
690
|
|
|
|
|
1360
|
my $l = $loop[$ii]; |
2343
|
|
|
|
|
|
|
|
2344
|
690
|
|
|
|
|
1583
|
my $ht = $self->{$l->[0]}; |
2345
|
690
|
|
|
|
|
1112
|
my @subrefs = @{$self->{$l->[1]}}; |
|
690
|
|
|
|
|
1821
|
|
2346
|
690
|
|
|
|
|
1171
|
for my $field (keys %{$ht}) { |
|
690
|
|
|
|
|
1776
|
|
2347
|
70
|
50
|
|
|
|
192
|
unless (exists $named_fields{$field}) { |
2348
|
0
|
|
|
|
|
0
|
$self->_print_error($l->[2] . ": unknown field '$field'", |
2349
|
|
|
|
|
|
|
0, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2350
|
0
|
|
|
|
|
0
|
next; |
2351
|
|
|
|
|
|
|
} |
2352
|
|
|
|
|
|
|
|
2353
|
70
|
|
|
|
|
120
|
my $i = $named_fields{$field}; |
2354
|
|
|
|
|
|
|
|
2355
|
70
|
|
|
|
|
96
|
my @allsubs; |
2356
|
70
|
|
|
|
|
108
|
push @allsubs, @{$ht->{$field}}; |
|
70
|
|
|
|
|
135
|
|
2357
|
70
|
100
|
|
|
|
174
|
if (defined($subrefs[$i])) { |
2358
|
2
|
50
|
|
|
|
8
|
unshift @allsubs, $subrefs[$i] if $ii == 0; |
2359
|
2
|
50
|
|
|
|
6
|
push @allsubs, $subrefs[$i] if $ii == 1; |
2360
|
|
|
|
|
|
|
} |
2361
|
|
|
|
|
|
|
|
2362
|
|
|
|
|
|
|
my $finalsub = sub { |
2363
|
196
|
|
|
196
|
|
349
|
for my $s (@allsubs) { |
2364
|
262
|
|
|
|
|
979
|
$_ = $s->(@_); |
2365
|
|
|
|
|
|
|
} |
2366
|
189
|
|
|
|
|
6359
|
return $_; |
2367
|
70
|
|
|
|
|
248
|
}; |
2368
|
70
|
|
|
|
|
157
|
$subrefs[$i] = $finalsub; |
2369
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
} |
2371
|
690
|
|
|
|
|
2132
|
$self->{$l->[1]} = [ @subrefs ]; |
2372
|
|
|
|
|
|
|
} |
2373
|
|
|
|
|
|
|
|
2374
|
345
|
|
|
|
|
1684
|
my $tmp = _get_def($self->{out_fields}, $self->{write_fields}); |
2375
|
345
|
100
|
|
|
|
1326
|
my @wf = @{$tmp} if defined($tmp); |
|
6
|
|
|
|
|
14
|
|
2376
|
345
|
|
|
|
|
748
|
my $count_field_not_found = 0; |
2377
|
345
|
|
|
|
|
850
|
for (@wf) { |
2378
|
16
|
100
|
66
|
|
|
106
|
next if !defined($_) or $_ eq '' or exists $named_fields{$_}; |
|
|
|
100
|
|
|
|
|
2379
|
3
|
|
|
|
|
6
|
$count_field_not_found++; |
2380
|
3
|
|
|
|
|
18
|
$self->_print_error("out_fields: unknown field '$_'", |
2381
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2382
|
|
|
|
|
|
|
} |
2383
|
345
|
100
|
|
|
|
1022
|
if ($count_field_not_found) { |
2384
|
2
|
|
|
|
|
5
|
$self->_print_error("non existent field(s) encountered"); |
2385
|
1
|
|
|
|
|
59
|
delete $self->{out_fields}; |
2386
|
1
|
|
|
|
|
4
|
delete $self->{write_fields}; |
2387
|
|
|
|
|
|
|
} |
2388
|
|
|
|
|
|
|
|
2389
|
344
|
100
|
|
|
|
1068
|
my %sh = %{$self->{_out_headers}} if defined($self->{_out_headers}); |
|
4
|
|
|
|
|
16
|
|
2390
|
344
|
|
|
|
|
682
|
$count_field_not_found = 0; |
2391
|
344
|
|
|
|
|
988
|
for (keys %sh) { |
2392
|
8
|
100
|
33
|
|
|
50
|
next if !defined($_) or $_ eq '' or exists $named_fields{$_}; |
|
|
|
66
|
|
|
|
|
2393
|
2
|
|
|
|
|
4
|
$count_field_not_found++; |
2394
|
2
|
|
|
|
|
12
|
$self->_print_error("out_header: unknown field '$_'", |
2395
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, { %named_fields } ); |
2396
|
|
|
|
|
|
|
} |
2397
|
344
|
100
|
|
|
|
937
|
$self->_print_error("non existent field(s) encountered") if $count_field_not_found; |
2398
|
|
|
|
|
|
|
|
2399
|
343
|
|
|
|
|
2544
|
return 1; |
2400
|
|
|
|
|
|
|
} |
2401
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
# |
2403
|
|
|
|
|
|
|
# Return 0 if there's no more records (error or eof reached), 1 if a record got read |
2404
|
|
|
|
|
|
|
# successfully. |
2405
|
|
|
|
|
|
|
# |
2406
|
|
|
|
|
|
|
# If return value is 1: |
2407
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to array ref and hash ref of the record, respectively |
2408
|
|
|
|
|
|
|
# |
2409
|
|
|
|
|
|
|
# If return value is 0: |
2410
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to undef if an error occured |
2411
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to a scalar if eof reached |
2412
|
|
|
|
|
|
|
# |
2413
|
|
|
|
|
|
|
sub _read_one_record_from_input { |
2414
|
2329
|
|
|
2329
|
|
4935
|
my ($self, $ref_ar, $ref_row_hr) = @_; |
2415
|
|
|
|
|
|
|
|
2416
|
2329
|
|
|
|
|
4355
|
my $_debug = $self->{_debug}; |
2417
|
2329
|
|
|
|
|
3688
|
my $_debug_extra_fields = $self->{_debug_extra_fields}; |
2418
|
2329
|
|
|
|
|
3707
|
my $_debugh = $self->{_debugh}; |
2419
|
|
|
|
|
|
|
|
2420
|
2329
|
|
|
|
|
5212
|
my $in_file_disp = $self->get_in_file_disp(); |
2421
|
|
|
|
|
|
|
|
2422
|
2329
|
|
|
|
|
3981
|
my $incsv = $self->{_in_csvobj}; |
2423
|
2329
|
|
|
|
|
3539
|
my $ar; |
2424
|
|
|
|
|
|
|
|
2425
|
|
|
|
|
|
|
print($_debugh "$PKG: '$in_file_disp': will read line #" . ($self->{_row_read} + 1) . "\n") |
2426
|
2329
|
50
|
|
|
|
5342
|
if $self->{_debug_read}; |
2427
|
|
|
|
|
|
|
|
2428
|
2329
|
100
|
|
|
|
5116
|
unless ($ar = _mygetline($incsv, $self->{_inh})) { |
2429
|
284
|
50
|
|
|
|
11520
|
if (!$incsv->eof()) { |
2430
|
0
|
|
|
|
|
0
|
my ($code, $str, $pos) = $incsv->error_diag(); |
2431
|
0
|
|
|
|
|
0
|
$self->_print_error("$code: $str, record " . $incsv->record_number . ", position $pos"); |
2432
|
0
|
|
|
|
|
0
|
$$ref_ar = undef; |
2433
|
0
|
|
|
|
|
0
|
$$ref_row_hr = undef; |
2434
|
|
|
|
|
|
|
} else { |
2435
|
284
|
|
|
|
|
2104
|
$$ref_ar = 1; |
2436
|
284
|
|
|
|
|
677
|
$$ref_row_hr = 1; |
2437
|
|
|
|
|
|
|
} |
2438
|
|
|
|
|
|
|
|
2439
|
284
|
|
|
|
|
1443
|
$self->_close_inh(); |
2440
|
|
|
|
|
|
|
|
2441
|
284
|
|
|
|
|
1141
|
return 0; |
2442
|
|
|
|
|
|
|
} |
2443
|
|
|
|
|
|
|
|
2444
|
2045
|
|
|
|
|
63747
|
$self->{_row_read}++; |
2445
|
|
|
|
|
|
|
|
2446
|
2045
|
|
|
|
|
3385
|
my %named_fields = %{$self->{_named_fields}}; |
|
2045
|
|
|
|
|
10002
|
|
2447
|
|
|
|
|
|
|
|
2448
|
2045
|
50
|
|
|
|
5756
|
if ($self->{_debug_read}) { |
2449
|
0
|
|
|
|
|
0
|
print($_debugh "Line " . $self->{_row_read} . ":\n--\n"); |
2450
|
0
|
|
|
|
|
0
|
for (sort keys %named_fields) { |
2451
|
0
|
|
|
|
|
0
|
my $c = _get_def($ar->[$named_fields{$_}], '<undef>'); |
2452
|
0
|
|
|
|
|
0
|
print($_debugh " $_ => '" . $c . "'\n"); |
2453
|
|
|
|
|
|
|
} |
2454
|
|
|
|
|
|
|
} |
2455
|
|
|
|
|
|
|
|
2456
|
2045
|
|
|
|
|
3630
|
my $columns_ar = $self->{_columns}; |
2457
|
|
|
|
|
|
|
|
2458
|
2045
|
|
|
|
|
3399
|
my $no_undef = $self->{no_undef}; |
2459
|
2045
|
100
|
|
|
|
4499
|
if ($no_undef) { |
2460
|
30
|
|
|
|
|
63
|
for (0..$#{$columns_ar}) { |
|
30
|
|
|
|
|
103
|
|
2461
|
324
|
100
|
|
|
|
1043
|
$ar->[$_] = '' unless defined($ar->[$_]); |
2462
|
|
|
|
|
|
|
} |
2463
|
|
|
|
|
|
|
} |
2464
|
|
|
|
|
|
|
|
2465
|
2045
|
|
|
|
|
3621
|
my $row_hr = { }; |
2466
|
|
|
|
|
|
|
$row_hr->{$_} = $ar->[$self->{_regular_named_fields}->{$_}] |
2467
|
2045
|
|
|
|
|
3224
|
foreach keys %{$self->{_regular_named_fields}}; |
|
2045
|
|
|
|
|
14072
|
|
2468
|
|
|
|
|
|
|
|
2469
|
2045
|
|
|
|
|
4523
|
my $rpost = $self->{_read_update_after_ar}; |
2470
|
2045
|
|
|
|
|
3118
|
for my $i (0..$#{$columns_ar}) { |
|
2045
|
|
|
|
|
4791
|
|
2471
|
10556
|
|
|
|
|
15404
|
my $subref = $rpost->[$i]; |
2472
|
10556
|
100
|
|
|
|
23234
|
next unless defined($subref); |
2473
|
|
|
|
|
|
|
|
2474
|
479
|
|
|
|
|
750
|
do { |
2475
|
479
|
|
|
|
|
817
|
my $field = $columns_ar->[$i]; |
2476
|
479
|
|
|
|
|
845
|
local $_ = $ar->[$i]; |
2477
|
479
|
|
|
|
|
1080
|
my $new_val = $subref->($self, $field); |
2478
|
474
|
|
|
|
|
940
|
$ar->[$i] = $new_val; |
2479
|
474
|
50
|
|
|
|
1726
|
$row_hr->{$field} = $new_val if defined($field); |
2480
|
|
|
|
|
|
|
} |
2481
|
|
|
|
|
|
|
|
2482
|
|
|
|
|
|
|
} |
2483
|
|
|
|
|
|
|
|
2484
|
2040
|
|
|
|
|
3337
|
for my $i (@{$self->{_extra_fields_indexes}}) { |
|
2040
|
|
|
|
|
4099
|
|
2485
|
442
|
|
|
|
|
1039
|
my $name = $columns_ar->[$i]; |
2486
|
442
|
|
|
|
|
1023
|
my $e = $self->{_extra_fields_definitions}->{$name}; |
2487
|
|
|
|
|
|
|
|
2488
|
442
|
50
|
|
|
|
1186
|
print($_debugh "Extra field: #$i: $name\n") if $_debug_extra_fields; |
2489
|
|
|
|
|
|
|
|
2490
|
442
|
|
|
|
|
852
|
my $value; |
2491
|
442
|
100
|
|
|
|
9646
|
if ($e->ef_type == $EF_LINK) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2492
|
|
|
|
|
|
|
|
2493
|
238
|
50
|
|
|
|
2639
|
print($_debugh " linked field\n") if $_debug_extra_fields; |
2494
|
|
|
|
|
|
|
|
2495
|
238
|
|
|
|
|
4405
|
my $remobj = $e->link_remote_obj; |
2496
|
|
|
|
|
|
|
$value = $remobj->vlookup( |
2497
|
|
|
|
|
|
|
$e->link_remote_search, |
2498
|
238
|
|
|
|
|
5846
|
$ar->[$named_fields{$e->link_self_search}], |
2499
|
|
|
|
|
|
|
$e->link_remote_read, |
2500
|
|
|
|
|
|
|
$e->link_vlookup_opts |
2501
|
|
|
|
|
|
|
); |
2502
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
} elsif ($e->ef_type == $EF_FUNC) { |
2504
|
|
|
|
|
|
|
|
2505
|
51
|
50
|
|
|
|
1616
|
print($_debugh " computed field\n") if $_debug_extra_fields; |
2506
|
|
|
|
|
|
|
|
2507
|
51
|
|
|
|
|
889
|
$value = $e->func_sub->($name, $row_hr, $self->{_stats}); |
2508
|
|
|
|
|
|
|
|
2509
|
|
|
|
|
|
|
} elsif ($e->ef_type == $EF_COPY) { |
2510
|
|
|
|
|
|
|
|
2511
|
153
|
50
|
|
|
|
8580
|
print($_debugh " copy field\n") if $_debug_extra_fields; |
2512
|
|
|
|
|
|
|
|
2513
|
153
|
|
|
|
|
2850
|
my $input = $row_hr->{$e->copy_source}; |
2514
|
153
|
50
|
33
|
|
|
1664
|
$input = '' if !defined($input) and $no_undef; |
2515
|
153
|
100
|
|
|
|
3878
|
if (defined($e->copy_sub)) { |
2516
|
57
|
|
|
|
|
595
|
local $_ = $input; |
2517
|
57
|
|
|
|
|
995
|
$value = $e->copy_sub->(); |
2518
|
|
|
|
|
|
|
} else { |
2519
|
96
|
|
|
|
|
971
|
$value = $input; |
2520
|
|
|
|
|
|
|
} |
2521
|
|
|
|
|
|
|
|
2522
|
153
|
50
|
|
|
|
1448
|
print($_debugh " in: '$input', out: '$value'\n") if $_debug_extra_fields; |
2523
|
|
|
|
|
|
|
|
2524
|
|
|
|
|
|
|
} else { |
2525
|
0
|
|
|
|
|
0
|
confess "Unknown ef_type '" . $e->ef_type . "', check this module' code urgently!"; |
2526
|
|
|
|
|
|
|
} |
2527
|
|
|
|
|
|
|
|
2528
|
440
|
100
|
100
|
|
|
2431
|
$value = '' if !defined($value) and $no_undef; |
2529
|
440
|
|
|
|
|
1151
|
$ar->[$i] = $value; |
2530
|
440
|
|
|
|
|
1217
|
$row_hr->{$name} = $value; |
2531
|
|
|
|
|
|
|
|
2532
|
440
|
50
|
|
|
|
1369
|
print($_debugh " $name => '$value'\n") if $_debug_extra_fields; |
2533
|
|
|
|
|
|
|
|
2534
|
|
|
|
|
|
|
} |
2535
|
|
|
|
|
|
|
|
2536
|
2038
|
100
|
|
|
|
5070
|
if (defined($self->{read_post_update_hr})) { |
2537
|
33
|
|
|
|
|
100
|
$self->{read_post_update_hr}->($row_hr, $self->{_stats}, $self->get_recnum()); |
2538
|
33
|
|
|
|
|
318
|
$ar->[$named_fields{$_}] = $row_hr->{$_} foreach keys %named_fields; |
2539
|
|
|
|
|
|
|
} |
2540
|
|
|
|
|
|
|
|
2541
|
2038
|
100
|
|
|
|
7468
|
lock_keys(%$row_hr) if $self->{croak_if_error}; |
2542
|
|
|
|
|
|
|
|
2543
|
|
|
|
|
|
|
$self->{walker_ar}->($ar, $self->{_stats}, $self->get_recnum()) |
2544
|
2038
|
100
|
|
|
|
15211
|
if defined($self->{walker_ar}); |
2545
|
|
|
|
|
|
|
$self->{walker_hr}->($row_hr, $self->{_stats}, $self->get_recnum()) |
2546
|
2038
|
100
|
|
|
|
5428
|
if defined($self->{walker_hr}); |
2547
|
|
|
|
|
|
|
|
2548
|
2037
|
|
|
|
|
3705
|
$$ref_ar = $ar; |
2549
|
2037
|
|
|
|
|
3314
|
$$ref_row_hr = $row_hr; |
2550
|
|
|
|
|
|
|
|
2551
|
2037
|
|
|
|
|
8424
|
return 1; |
2552
|
|
|
|
|
|
|
} |
2553
|
|
|
|
|
|
|
|
2554
|
|
|
|
|
|
|
sub _open_read { |
2555
|
295
|
|
|
295
|
|
626
|
my $self = shift; |
2556
|
|
|
|
|
|
|
|
2557
|
295
|
|
|
|
|
619
|
my $verbose = $self->{verbose}; |
2558
|
295
|
|
|
|
|
1195
|
my $in_file_disp = $self->get_in_file_disp(); |
2559
|
|
|
|
|
|
|
|
2560
|
295
|
|
|
|
|
849
|
$self->{_stats} = { }; |
2561
|
295
|
|
|
|
|
677
|
$self->{_read_in_progress} = 1; |
2562
|
|
|
|
|
|
|
|
2563
|
295
|
50
|
|
|
|
895
|
$self->_print("-- $in_file_disp reading start\n") if $verbose; |
2564
|
|
|
|
|
|
|
} |
2565
|
|
|
|
|
|
|
|
2566
|
|
|
|
|
|
|
sub _close_read { |
2567
|
314
|
|
|
314
|
|
712
|
my $self = shift; |
2568
|
314
|
|
|
|
|
600
|
my $keep_quiet = shift; |
2569
|
|
|
|
|
|
|
|
2570
|
314
|
|
|
|
|
657
|
my $verbose = $self->{verbose}; |
2571
|
314
|
|
|
|
|
849
|
my $in_file_disp = $self->get_in_file_disp(); |
2572
|
|
|
|
|
|
|
|
2573
|
314
|
|
|
|
|
772
|
$self->{_read_in_progress} = 0; |
2574
|
|
|
|
|
|
|
|
2575
|
314
|
50
|
33
|
|
|
1196
|
if ($verbose and !$keep_quiet) { |
2576
|
0
|
|
|
|
|
0
|
$self->_print("-- $in_file_disp reading end: " . $self->{_row_read} . " row(s) read\n"); |
2577
|
0
|
|
|
|
|
0
|
for my $k (sort keys %{$self->{_stats}}) { |
|
0
|
|
|
|
|
0
|
|
2578
|
0
|
|
|
|
|
0
|
$self->_printf(" %7d %s\n", $self->{_stats}->{$k}, $k); |
2579
|
|
|
|
|
|
|
} |
2580
|
|
|
|
|
|
|
} |
2581
|
|
|
|
|
|
|
} |
2582
|
|
|
|
|
|
|
|
2583
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2584
|
|
|
|
|
|
|
sub _S4_read_all_in_mem { |
2585
|
165
|
|
|
165
|
|
377
|
my $self = shift; |
2586
|
|
|
|
|
|
|
|
2587
|
165
|
|
|
|
|
588
|
$self->_register_pass("_S4_read_all_in_mem()"); |
2588
|
|
|
|
|
|
|
|
2589
|
165
|
|
|
|
|
633
|
$self->_open_read(); |
2590
|
|
|
|
|
|
|
|
2591
|
165
|
|
|
|
|
374
|
my $ar; |
2592
|
|
|
|
|
|
|
my $row_hr; |
2593
|
165
|
|
|
|
|
697
|
while ($self->_read_one_record_from_input(\$ar, \$row_hr)) { |
2594
|
|
|
|
|
|
|
|
2595
|
1543
|
|
|
|
|
2566
|
push @{$self->{_flat}}, $ar; |
|
1543
|
|
|
|
|
4903
|
|
2596
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
} |
2598
|
|
|
|
|
|
|
|
2599
|
163
|
50
|
|
|
|
549
|
my $retcode = (defined($ar) ? 1 : 0); |
2600
|
163
|
|
|
|
|
645
|
$self->_update_in_mem_record_count(); |
2601
|
|
|
|
|
|
|
|
2602
|
163
|
|
|
|
|
616
|
$self->_close_read(); |
2603
|
|
|
|
|
|
|
|
2604
|
163
|
|
|
|
|
513
|
return $retcode; |
2605
|
|
|
|
|
|
|
} |
2606
|
|
|
|
|
|
|
|
2607
|
|
|
|
|
|
|
sub _chain_array { |
2608
|
24
|
|
|
24
|
|
194
|
return split(/\s*->\s*/, $_[0]); |
2609
|
|
|
|
|
|
|
} |
2610
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
sub _chain_str { |
2612
|
2
|
|
|
2
|
|
11
|
return join('->', @_); |
2613
|
|
|
|
|
|
|
} |
2614
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
sub field_add_link { |
2616
|
25
|
|
|
25
|
1
|
5483
|
my $self = shift; |
2617
|
|
|
|
|
|
|
|
2618
|
25
|
|
|
|
|
821
|
validate_pos(@_, {type => UNDEF | SCALAR}, {type => SCALAR}, {type => SCALAR | OBJECT}, |
2619
|
|
|
|
|
|
|
{type => HASHREF, optional => 1}); |
2620
|
|
|
|
|
|
|
|
2621
|
22
|
|
|
|
|
109
|
my ($new_field, $chain, $obj, $param_opts) = @_; |
2622
|
|
|
|
|
|
|
|
2623
|
22
|
|
|
|
|
51
|
my $croak_if_error = $self->{croak_if_error}; |
2624
|
22
|
|
|
|
|
52
|
my $_debug = $self->{_debug}; |
2625
|
22
|
|
|
|
|
44
|
my $_debugh = $self->{_debugh}; |
2626
|
|
|
|
|
|
|
|
2627
|
22
|
|
|
|
|
75
|
my @c = _chain_array($chain); |
2628
|
22
|
100
|
|
|
|
85
|
$new_field = $c[2] unless defined($new_field); |
2629
|
|
|
|
|
|
|
|
2630
|
22
|
50
|
|
|
|
65
|
print($_debugh "Registering new linked field, new_field = '$new_field', chain = '$chain'\n") |
2631
|
|
|
|
|
|
|
if $_debug; |
2632
|
|
|
|
|
|
|
|
2633
|
22
|
100
|
66
|
|
|
164
|
unless (@c == 3 and $c[2] ne '') { |
2634
|
1
|
|
|
|
|
5
|
$self->_print_error("wrong links chain parameter: '$chain', " . |
2635
|
|
|
|
|
|
|
"look for CHAIN in Text::AutoCSV manual for help"); |
2636
|
1
|
|
|
|
|
8
|
return undef; |
2637
|
|
|
|
|
|
|
} |
2638
|
|
|
|
|
|
|
|
2639
|
21
|
50
|
|
|
|
68
|
return undef unless $self->_status_forward('S2'); |
2640
|
21
|
50
|
|
|
|
76
|
return undef unless $self->_status_backward('S2'); |
2641
|
|
|
|
|
|
|
|
2642
|
21
|
100
|
|
|
|
73
|
my @tmp = %{$param_opts} if $param_opts; |
|
13
|
|
|
|
|
52
|
|
2643
|
21
|
|
|
|
|
585
|
my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS); |
2644
|
|
|
|
|
|
|
|
2645
|
20
|
|
|
|
|
92
|
my $target_name = ''; |
2646
|
20
|
100
|
|
|
|
69
|
if (ref $obj eq '') { |
2647
|
19
|
|
|
|
|
38
|
my $in_file = $obj; |
2648
|
19
|
|
|
|
|
39
|
$target_name = $in_file; |
2649
|
|
|
|
|
|
|
|
2650
|
|
|
|
|
|
|
# |
2651
|
|
|
|
|
|
|
# TODO (?) |
2652
|
|
|
|
|
|
|
# |
2653
|
|
|
|
|
|
|
# Take into account the fact that the OS' file system is case insensitive. At the |
2654
|
|
|
|
|
|
|
# moment, two different strings (even if identical in a case insensitive comparison) |
2655
|
|
|
|
|
|
|
# will be managed as being distinct. |
2656
|
|
|
|
|
|
|
# I put a question mark in this TO DO - after all, the user of this module had better |
2657
|
|
|
|
|
|
|
# use same case when dealing with multiple links of the same file. |
2658
|
|
|
|
|
|
|
# |
2659
|
|
|
|
|
|
|
# Also, tuning this module' behavior depending on the OS' characteristics would be not |
2660
|
|
|
|
|
|
|
# ideal, it'd add a level of complexity to understand how it works and what to expect. |
2661
|
|
|
|
|
|
|
# |
2662
|
19
|
100
|
100
|
|
|
95
|
if (exists $self->{_obj} and exists $self->{_obj}->{$in_file}) { |
2663
|
|
|
|
|
|
|
|
2664
|
4
|
50
|
|
|
|
11
|
print( |
2665
|
|
|
|
|
|
|
$_debugh |
2666
|
|
|
|
|
|
|
"field_add_link: file '$in_file': re-using existing Text::AutoCSV object\n" |
2667
|
|
|
|
|
|
|
) if $_debug; |
2668
|
|
|
|
|
|
|
|
2669
|
4
|
|
|
|
|
8
|
$obj = $self->{_obj}->{$in_file}; |
2670
|
|
|
|
|
|
|
} else { |
2671
|
|
|
|
|
|
|
|
2672
|
15
|
50
|
|
|
|
49
|
print($_debugh "field_add_link: file '$in_file': creating new Text::AutoCSV object\n") |
2673
|
|
|
|
|
|
|
if $_debug; |
2674
|
|
|
|
|
|
|
|
2675
|
15
|
100
|
|
|
|
64
|
$self->{_obj} = { } unless exists $self->{_obj}; |
2676
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
# |
2678
|
|
|
|
|
|
|
# The created Text::AutoCSV must be created with the same search options as what is |
2679
|
|
|
|
|
|
|
# currently found in $self. |
2680
|
|
|
|
|
|
|
# |
2681
|
|
|
|
|
|
|
# Why? |
2682
|
|
|
|
|
|
|
# Because the link is populated doing a vlookup on the remote object ($obj below), |
2683
|
|
|
|
|
|
|
# not on $self. Therefore, if we don't "propagate" search options from $self to |
2684
|
|
|
|
|
|
|
# $obj, search tunnings won't work as user would expect. |
2685
|
|
|
|
|
|
|
# |
2686
|
15
|
|
|
|
|
36
|
my %search_opts; |
2687
|
15
|
|
|
|
|
41
|
for (qw(search_case search_trim search_ignore_empty search_ignore_accents |
2688
|
|
|
|
|
|
|
search_value_if_not_found search_value_if_ambiguous search_ignore_ambiguous)) { |
2689
|
|
|
|
|
|
|
# We assign depending on whether or not the attribute EXISTS - the definedness |
2690
|
|
|
|
|
|
|
# is not appropriate, in case an attribute would have been assigned to undef. |
2691
|
105
|
100
|
|
|
|
267
|
$search_opts{$_} = $self->{$_} if exists $self->{$_}; |
2692
|
|
|
|
|
|
|
} |
2693
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
$obj = Text::AutoCSV->new( |
2695
|
|
|
|
|
|
|
in_file => $in_file, |
2696
|
|
|
|
|
|
|
verbose => $self->{verbose}, |
2697
|
|
|
|
|
|
|
infoh => $self->{infoh}, |
2698
|
|
|
|
|
|
|
_debug => $self->{debug}, |
2699
|
|
|
|
|
|
|
_debugh => $self->{debugh}, |
2700
|
15
|
|
|
|
|
130
|
%search_opts |
2701
|
|
|
|
|
|
|
); |
2702
|
15
|
|
|
|
|
83
|
$self->{_obj}->{$in_file} = $obj; |
2703
|
|
|
|
|
|
|
} |
2704
|
|
|
|
|
|
|
} else { |
2705
|
1
|
|
|
|
|
3
|
$target_name = '(object)'; |
2706
|
1
|
50
|
|
|
|
6
|
print($_debugh "field_add_link: Text::AutoCSV object provided\n") if $_debug; |
2707
|
|
|
|
|
|
|
} |
2708
|
|
|
|
|
|
|
|
2709
|
20
|
100
|
|
|
|
90
|
$self->{_extra_fields} = [ ] unless exists $self->{_extra_fields}; |
2710
|
|
|
|
|
|
|
|
2711
|
20
|
|
|
|
|
45
|
push @{$self->{_extra_fields}}, ExtraField->new( |
|
20
|
|
|
|
|
596
|
|
2712
|
|
|
|
|
|
|
ef_type => $EF_LINK, |
2713
|
|
|
|
|
|
|
self_name => $new_field, |
2714
|
|
|
|
|
|
|
description => "link: $target_name, chain: $chain", |
2715
|
|
|
|
|
|
|
check_field_existence => $c[0], |
2716
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
link_self_search => $c[0], |
2718
|
|
|
|
|
|
|
link_remote_obj => $obj, |
2719
|
|
|
|
|
|
|
link_remote_search => $c[1], |
2720
|
|
|
|
|
|
|
link_remote_read => $c[2], |
2721
|
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
link_vlookup_opts => \%opts |
2723
|
|
|
|
|
|
|
); |
2724
|
|
|
|
|
|
|
|
2725
|
20
|
|
|
|
|
2778
|
return $self; |
2726
|
|
|
|
|
|
|
} |
2727
|
|
|
|
|
|
|
|
2728
|
|
|
|
|
|
|
sub links { |
2729
|
2
|
|
|
2
|
1
|
6
|
my $self = shift; |
2730
|
|
|
|
|
|
|
|
2731
|
2
|
|
|
|
|
44
|
validate_pos(@_, {type => UNDEF | SCALAR}, {type => SCALAR}, {type => SCALAR | OBJECT}, |
2732
|
|
|
|
|
|
|
{type => HASHREF, optional => 1}); |
2733
|
|
|
|
|
|
|
|
2734
|
2
|
|
|
|
|
10
|
my $prefix_field = shift; |
2735
|
2
|
|
|
|
|
6
|
my $chain = shift; |
2736
|
2
|
|
|
|
|
5
|
my ($obj, $param_opts) = @_; |
2737
|
|
|
|
|
|
|
|
2738
|
2
|
|
|
|
|
9
|
my @c = _chain_array($chain); |
2739
|
|
|
|
|
|
|
|
2740
|
2
|
50
|
33
|
|
|
25
|
if (@c != 2 or $c[0] eq '' or $c[1] eq '') { |
|
|
|
33
|
|
|
|
|
2741
|
0
|
|
|
|
|
0
|
$self->_print_error("wrong links chain parameter: '$chain', " . |
2742
|
|
|
|
|
|
|
"look for JOINCHAIN in Text::AutoCSV manual for help"); |
2743
|
0
|
|
|
|
|
0
|
return undef; |
2744
|
|
|
|
|
|
|
} |
2745
|
|
|
|
|
|
|
|
2746
|
2
|
100
|
|
|
|
10
|
$prefix_field = '' unless defined($prefix_field); |
2747
|
2
|
|
|
|
|
7
|
my $chain2 = _chain_str(@c, '*'); |
2748
|
|
|
|
|
|
|
|
2749
|
2
|
|
|
|
|
8
|
return $self->field_add_link($prefix_field, $chain2, @_); |
2750
|
|
|
|
|
|
|
} |
2751
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
sub field_add_computed { |
2753
|
7
|
|
|
7
|
1
|
2517
|
my $self = shift; |
2754
|
|
|
|
|
|
|
|
2755
|
7
|
|
|
|
|
209
|
validate_pos(@_, {type => SCALAR}, {type => CODEREF}); |
2756
|
6
|
|
|
|
|
23
|
my ($new_field, $func) = @_; |
2757
|
|
|
|
|
|
|
|
2758
|
6
|
|
|
|
|
17
|
my $croak_if_error = $self->{croak_if_error}; |
2759
|
|
|
|
|
|
|
|
2760
|
6
|
|
|
|
|
14
|
my $_debug = $self->{_debug}; |
2761
|
6
|
|
|
|
|
13
|
my $_debugh = $self->{_debugh}; |
2762
|
|
|
|
|
|
|
|
2763
|
6
|
50
|
|
|
|
20
|
print($_debugh "Registering new computed field, new_field = '$new_field'\n") if $_debug; |
2764
|
|
|
|
|
|
|
|
2765
|
6
|
100
|
|
|
|
17
|
return undef unless $self->_status_forward('S2'); |
2766
|
5
|
50
|
|
|
|
20
|
return undef unless $self->_status_backward('S2'); |
2767
|
|
|
|
|
|
|
|
2768
|
5
|
|
|
|
|
14
|
push @{$self->{_extra_fields}}, ExtraField->new( |
|
5
|
|
|
|
|
133
|
|
2769
|
|
|
|
|
|
|
ef_type => $EF_FUNC, |
2770
|
|
|
|
|
|
|
self_name => $new_field, |
2771
|
|
|
|
|
|
|
description => "computed", |
2772
|
|
|
|
|
|
|
|
2773
|
|
|
|
|
|
|
func_sub => $func |
2774
|
|
|
|
|
|
|
); |
2775
|
|
|
|
|
|
|
|
2776
|
5
|
|
|
|
|
644
|
return $self; |
2777
|
|
|
|
|
|
|
} |
2778
|
|
|
|
|
|
|
|
2779
|
|
|
|
|
|
|
sub field_add_copy { |
2780
|
14
|
|
|
14
|
1
|
13785
|
my $self = shift; |
2781
|
|
|
|
|
|
|
|
2782
|
14
|
|
|
|
|
244
|
validate_pos(@_, {type => SCALAR}, {type => SCALAR}, {type => CODEREF, optional => 1}); |
2783
|
14
|
|
|
|
|
72
|
my ($new_field, $copy_source, $func) = @_; |
2784
|
|
|
|
|
|
|
|
2785
|
14
|
|
|
|
|
42
|
my $croak_if_error = $self->{croak_if_error}; |
2786
|
|
|
|
|
|
|
|
2787
|
14
|
|
|
|
|
33
|
my $_debug = $self->{_debug}; |
2788
|
14
|
|
|
|
|
33
|
my $_debugh = $self->{_debugh}; |
2789
|
|
|
|
|
|
|
|
2790
|
14
|
50
|
|
|
|
49
|
print($_debugh "Registering field copy, new_field = '$new_field' copied from '$copy_source'\n") |
2791
|
|
|
|
|
|
|
if $_debug; |
2792
|
|
|
|
|
|
|
|
2793
|
14
|
100
|
|
|
|
47
|
return undef unless $self->_status_forward('S2'); |
2794
|
12
|
50
|
|
|
|
47
|
return undef unless $self->_status_backward('S2'); |
2795
|
|
|
|
|
|
|
|
2796
|
12
|
100
|
|
|
|
36
|
push @{$self->{_extra_fields}}, ExtraField->new( |
|
12
|
|
|
|
|
528
|
|
2797
|
|
|
|
|
|
|
ef_type => $EF_COPY, |
2798
|
|
|
|
|
|
|
self_name => $new_field, |
2799
|
|
|
|
|
|
|
description => "copy of $copy_source " . (defined($func) ? '(with sub)' : '(no sub)'), |
2800
|
|
|
|
|
|
|
check_field_existence => $copy_source, |
2801
|
|
|
|
|
|
|
|
2802
|
|
|
|
|
|
|
copy_source => $copy_source, |
2803
|
|
|
|
|
|
|
copy_sub => $func |
2804
|
|
|
|
|
|
|
); |
2805
|
|
|
|
|
|
|
|
2806
|
12
|
|
|
|
|
1680
|
return $self; |
2807
|
|
|
|
|
|
|
} |
2808
|
|
|
|
|
|
|
|
2809
|
|
|
|
|
|
|
sub in_map { |
2810
|
15
|
|
|
15
|
1
|
1349
|
my $self = shift; |
2811
|
|
|
|
|
|
|
|
2812
|
15
|
|
|
|
|
48
|
return $self->read_update_after(@_); |
2813
|
|
|
|
|
|
|
} |
2814
|
|
|
|
|
|
|
|
2815
|
|
|
|
|
|
|
sub read_update_after { |
2816
|
16
|
|
|
16
|
1
|
31
|
my $self = shift; |
2817
|
16
|
|
|
|
|
194
|
validate_pos(@_, {type => SCALAR}, {type => CODEREF}); |
2818
|
|
|
|
|
|
|
|
2819
|
16
|
|
|
|
|
60
|
my ($field, $subref) = @_; |
2820
|
|
|
|
|
|
|
|
2821
|
16
|
|
|
|
|
33
|
my $_debug = $self->{_debug}; |
2822
|
16
|
|
|
|
|
32
|
my $_debugh = $self->{_debugh}; |
2823
|
|
|
|
|
|
|
|
2824
|
16
|
50
|
|
|
|
45
|
return undef unless $self->_status_forward('S2'); |
2825
|
16
|
50
|
|
|
|
45
|
return undef unless $self->_status_backward('S2'); |
2826
|
|
|
|
|
|
|
|
2827
|
16
|
50
|
|
|
|
42
|
print($_debugh "Registering read_post_update subref for field '$field'\n") if $_debug; |
2828
|
|
|
|
|
|
|
|
2829
|
|
|
|
|
|
|
$self->{_read_update_after_hr}->{$field} = [ ] |
2830
|
16
|
100
|
|
|
|
65
|
unless defined($self->{_read_update_after_hr}->{$field}); |
2831
|
|
|
|
|
|
|
|
2832
|
16
|
|
|
|
|
26
|
push @{$self->{_read_update_after_hr}->{$field}}, $subref; |
|
16
|
|
|
|
|
44
|
|
2833
|
|
|
|
|
|
|
|
2834
|
16
|
|
|
|
|
140
|
return $self; |
2835
|
|
|
|
|
|
|
} |
2836
|
|
|
|
|
|
|
|
2837
|
|
|
|
|
|
|
sub out_map { |
2838
|
11
|
|
|
11
|
1
|
17
|
my $self = shift; |
2839
|
|
|
|
|
|
|
|
2840
|
11
|
|
|
|
|
32
|
return $self->write_update_before(@_); |
2841
|
|
|
|
|
|
|
} |
2842
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
sub write_update_before { |
2844
|
12
|
|
|
12
|
1
|
19
|
my $self = shift; |
2845
|
12
|
|
|
|
|
108
|
validate_pos(@_, {type => SCALAR}, {type => CODEREF}); |
2846
|
|
|
|
|
|
|
|
2847
|
12
|
|
|
|
|
36
|
my ($field, $subref) = @_; |
2848
|
|
|
|
|
|
|
|
2849
|
12
|
|
|
|
|
23
|
my $_debug = $self->{_debug}; |
2850
|
12
|
|
|
|
|
20
|
my $_debugh = $self->{_debugh}; |
2851
|
|
|
|
|
|
|
|
2852
|
12
|
50
|
|
|
|
26
|
return undef unless $self->_status_forward('S2'); |
2853
|
12
|
50
|
|
|
|
27
|
return undef unless $self->_status_backward('S2'); |
2854
|
|
|
|
|
|
|
|
2855
|
12
|
50
|
|
|
|
32
|
print($_debugh "Registering write_pre_update subref for field '$field'\n") if $_debug; |
2856
|
|
|
|
|
|
|
|
2857
|
|
|
|
|
|
|
$self->{_write_update_before_hr}->{$field} = [ ] |
2858
|
12
|
100
|
|
|
|
47
|
unless defined($self->{_write_update_before_hr}->{$field}); |
2859
|
|
|
|
|
|
|
|
2860
|
12
|
|
|
|
|
18
|
push @{$self->{_write_update_before_hr}->{$field}}, $subref; |
|
12
|
|
|
|
|
29
|
|
2861
|
|
|
|
|
|
|
|
2862
|
12
|
|
|
|
|
70
|
return $self; |
2863
|
|
|
|
|
|
|
} |
2864
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
sub reset_next_record_hr { |
2866
|
187
|
|
|
187
|
1
|
418
|
my $self = shift; |
2867
|
|
|
|
|
|
|
|
2868
|
187
|
|
|
|
|
995
|
validate_pos(@_); |
2869
|
|
|
|
|
|
|
|
2870
|
187
|
|
|
|
|
684
|
$self->{_current_record} = undef; |
2871
|
|
|
|
|
|
|
|
2872
|
187
|
|
|
|
|
378
|
return $self; |
2873
|
|
|
|
|
|
|
} |
2874
|
|
|
|
|
|
|
|
2875
|
|
|
|
|
|
|
sub _create_internal_column_name_from_its_number { |
2876
|
1120
|
|
|
1120
|
|
2425
|
return sprintf("__%04i__", $_[0]); |
2877
|
|
|
|
|
|
|
} |
2878
|
|
|
|
|
|
|
|
2879
|
|
|
|
|
|
|
sub _ar_to_hr { |
2880
|
2048
|
|
|
2048
|
|
3221
|
my $self = shift; |
2881
|
|
|
|
|
|
|
|
2882
|
2048
|
|
|
|
|
15471
|
validate_pos(@_, {type => ARRAYREF}); |
2883
|
|
|
|
|
|
|
|
2884
|
2048
|
|
|
|
|
5758
|
my ($ar) = @_; |
2885
|
2048
|
|
|
|
|
3191
|
my $last_elem_index = scalar(@{$ar}) - 1; |
|
2048
|
|
|
|
|
4209
|
|
2886
|
|
|
|
|
|
|
|
2887
|
2048
|
|
|
|
|
3472
|
my $nr = $self->{_named_fields}; |
2888
|
2048
|
|
|
|
|
3125
|
my %h; |
2889
|
|
|
|
|
|
|
my %n_seen; |
2890
|
2048
|
|
|
|
|
3017
|
for (keys %{$nr}) { |
|
2048
|
|
|
|
|
6236
|
|
2891
|
9841
|
|
|
|
|
21307
|
$h{$_} = $ar->[$nr->{$_}]; |
2892
|
9841
|
|
|
|
|
19241
|
undef $n_seen{$nr->{$_}}; |
2893
|
|
|
|
|
|
|
} |
2894
|
2048
|
|
|
|
|
4913
|
for my $i (0..$last_elem_index) { |
2895
|
10908
|
100
|
|
|
|
24216
|
if (!exists($n_seen{$i})) { |
2896
|
1120
|
|
|
|
|
2010
|
my $k = _create_internal_column_name_from_its_number($i); |
2897
|
1120
|
50
|
|
|
|
3403
|
$h{$k} = $ar->[$i] if !exists $h{$k}; |
2898
|
|
|
|
|
|
|
} |
2899
|
|
|
|
|
|
|
} |
2900
|
|
|
|
|
|
|
|
2901
|
2048
|
100
|
|
|
|
7180
|
lock_keys(%h) if $self->{croak_if_error}; |
2902
|
|
|
|
|
|
|
|
2903
|
2048
|
|
|
|
|
18621
|
return \%h; |
2904
|
|
|
|
|
|
|
} |
2905
|
|
|
|
|
|
|
|
2906
|
|
|
|
|
|
|
sub get_next_record_hr { |
2907
|
1989
|
|
|
1989
|
1
|
3527
|
my $self = shift; |
2908
|
|
|
|
|
|
|
|
2909
|
1989
|
|
|
|
|
13767
|
validate_pos(@_, {type => SCALARREF, optional => 1}); |
2910
|
|
|
|
|
|
|
|
2911
|
1989
|
|
|
|
|
4830
|
my $refkey = $_[0]; |
2912
|
|
|
|
|
|
|
|
2913
|
1989
|
50
|
|
|
|
4396
|
return undef unless $self->_status_forward('S4'); |
2914
|
|
|
|
|
|
|
|
2915
|
1986
|
100
|
|
|
|
4581
|
if (!defined($self->{_current_record})) { |
2916
|
184
|
|
|
|
|
425
|
$self->{_current_record} = 0; |
2917
|
|
|
|
|
|
|
} else { |
2918
|
1802
|
|
|
|
|
2884
|
$self->{_current_record}++; |
2919
|
|
|
|
|
|
|
} |
2920
|
|
|
|
|
|
|
|
2921
|
1986
|
|
|
|
|
3798
|
my $ar = $self->{_flat}->[$self->{_current_record}]; |
2922
|
1986
|
100
|
|
|
|
4196
|
if (!defined($ar)) { |
2923
|
184
|
|
|
|
|
354
|
$self->{_current_record} = undef; |
2924
|
184
|
|
|
|
|
359
|
$$refkey = undef; |
2925
|
184
|
|
|
|
|
606
|
return undef; |
2926
|
|
|
|
|
|
|
} |
2927
|
|
|
|
|
|
|
|
2928
|
1802
|
|
|
|
|
2987
|
$$refkey = $self->{_current_record}; |
2929
|
|
|
|
|
|
|
|
2930
|
1802
|
|
|
|
|
3971
|
return $self->_ar_to_hr($ar); |
2931
|
|
|
|
|
|
|
} |
2932
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
sub read { |
2934
|
60
|
|
|
60
|
1
|
24762
|
my $self = shift; |
2935
|
|
|
|
|
|
|
|
2936
|
60
|
|
|
|
|
459
|
validate_pos(@_); |
2937
|
|
|
|
|
|
|
|
2938
|
60
|
50
|
|
|
|
225
|
return undef unless $self->_status_backward('S3'); |
2939
|
60
|
100
|
|
|
|
187
|
return undef unless $self->_status_forward('S3'); |
2940
|
|
|
|
|
|
|
|
2941
|
51
|
|
|
|
|
209
|
$self->_register_pass("read()"); |
2942
|
|
|
|
|
|
|
|
2943
|
51
|
|
|
|
|
192
|
$self->_open_read(); |
2944
|
|
|
|
|
|
|
|
2945
|
51
|
|
|
|
|
113
|
my $ar; |
2946
|
|
|
|
|
|
|
my $row_hr; |
2947
|
51
|
|
|
|
|
230
|
while ($self->_read_one_record_from_input(\$ar, \$row_hr)) { |
2948
|
|
|
|
|
|
|
# Ben oui quoi... qu'est-ce que l'on peut bien faire d'autre ? |
2949
|
|
|
|
|
|
|
} |
2950
|
|
|
|
|
|
|
|
2951
|
49
|
|
|
|
|
202
|
$self->_close_read(); |
2952
|
49
|
50
|
|
|
|
147
|
return undef unless defined($ar); |
2953
|
|
|
|
|
|
|
|
2954
|
49
|
50
|
|
|
|
158
|
return undef unless $self->_status_reset(); |
2955
|
|
|
|
|
|
|
|
2956
|
49
|
|
|
|
|
228
|
return $self; |
2957
|
|
|
|
|
|
|
} |
2958
|
|
|
|
|
|
|
|
2959
|
|
|
|
|
|
|
# |
2960
|
|
|
|
|
|
|
# Initially, _read_all_in_mem was intended for the test plan. |
2961
|
|
|
|
|
|
|
# |
2962
|
|
|
|
|
|
|
# Turned out to be sometimes useful for user, thus, is no longer private since 1.1.5. |
2963
|
|
|
|
|
|
|
# Private version below is kept for compatibility. |
2964
|
|
|
|
|
|
|
# |
2965
|
|
|
|
|
|
|
sub read_all_in_mem { |
2966
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
2967
|
|
|
|
|
|
|
|
2968
|
1
|
|
|
|
|
5
|
return $self->_read_all_in_mem(); |
2969
|
|
|
|
|
|
|
} |
2970
|
|
|
|
|
|
|
|
2971
|
|
|
|
|
|
|
sub _read_all_in_mem { |
2972
|
9
|
|
|
9
|
|
1281
|
my $self = shift; |
2973
|
|
|
|
|
|
|
|
2974
|
9
|
50
|
|
|
|
31
|
return 0 unless $self->_status_backward('S3'); |
2975
|
9
|
50
|
|
|
|
32
|
return 0 unless $self->_status_forward('S4'); |
2976
|
|
|
|
|
|
|
|
2977
|
9
|
|
|
|
|
49
|
return $self; |
2978
|
|
|
|
|
|
|
} |
2979
|
|
|
|
|
|
|
|
2980
|
|
|
|
|
|
|
sub print_id { |
2981
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
2982
|
|
|
|
|
|
|
|
2983
|
0
|
|
|
|
|
0
|
$self->_printf("-- " . $self->get_in_file_disp() . ":\n"); |
2984
|
0
|
|
|
|
|
0
|
$self->_printf("sep_char: " . $self->get_sep_char() . "\n"); |
2985
|
0
|
|
|
|
|
0
|
$self->_printf("escape_char: " . $self->get_escape_char() . "\n"); |
2986
|
0
|
|
|
|
|
0
|
$self->_printf("in_encoding: " . $self->get_in_encoding() . "\n"); |
2987
|
0
|
0
|
|
|
|
0
|
$self->_printf("is_always_quoted: " . ($self->get_is_always_quoted() ? 'yes' : 'no') . "\n"); |
2988
|
|
|
|
|
|
|
|
2989
|
0
|
|
|
|
|
0
|
my @coldata = $self->get_coldata(); |
2990
|
0
|
|
|
|
|
0
|
my @disp; |
2991
|
0
|
|
|
|
|
0
|
push @disp, [ '#', 'FIELD', 'HEADER', 'EXT DATA', 'DATETIME FORMAT', 'DATETIME LOCALE' ]; |
2992
|
0
|
|
|
|
|
0
|
push @disp, [ map { my $s = $_; $s =~ s/./-/g; $s } @{$disp[0]} ]; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
2993
|
0
|
|
|
|
|
0
|
for my $i (0..$#coldata) { |
2994
|
0
|
|
|
|
|
0
|
my $col = $coldata[$i]; |
2995
|
|
|
|
|
|
|
|
2996
|
0
|
|
|
|
|
0
|
my @row; |
2997
|
0
|
|
|
|
|
0
|
push @row, "$i"; |
2998
|
0
|
0
|
|
|
|
0
|
push @row, (defined($col->[$_]) ? ($col->[$_] . '') : '') for (0..4); |
2999
|
0
|
|
|
|
|
0
|
map { s/\n/\\n/g; s/\r/\\r/g; s/\t/\\t/g } @row; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
3000
|
0
|
|
|
|
|
0
|
push @disp, [ @row ]; |
3001
|
|
|
|
|
|
|
} |
3002
|
0
|
|
|
|
|
0
|
my $n = @{$disp[-1]}; |
|
0
|
|
|
|
|
0
|
|
3003
|
0
|
|
|
|
|
0
|
my @max = (-1) x $n; |
3004
|
0
|
|
|
|
|
0
|
for my $l (@disp) { |
3005
|
0
|
0
|
|
|
|
0
|
do { $max[$_] = length($l->[$_]) if $max[$_] < length($l->[$_]) } for (0 .. $n - 1); |
|
0
|
|
|
|
|
0
|
|
3006
|
|
|
|
|
|
|
} |
3007
|
0
|
|
|
|
|
0
|
my $s = join(' ', map { "%-${_}s" } @max); |
|
0
|
|
|
|
|
0
|
|
3008
|
0
|
|
|
|
|
0
|
$self->_print("\n"); |
3009
|
0
|
|
|
|
|
0
|
$self->_printf("$s\n", @{$_}) for (@disp); |
|
0
|
|
|
|
|
0
|
|
3010
|
|
|
|
|
|
|
} |
3011
|
|
|
|
|
|
|
|
3012
|
|
|
|
|
|
|
sub set_out_file { |
3013
|
2
|
|
|
2
|
1
|
8983
|
my $self = shift; |
3014
|
2
|
|
|
|
|
50
|
validate_pos(@_, {type => SCALAR}); |
3015
|
|
|
|
|
|
|
|
3016
|
2
|
|
|
|
|
11
|
my ($out_file) = @_; |
3017
|
2
|
|
|
|
|
152
|
$self->{out_file} = $out_file; |
3018
|
|
|
|
|
|
|
|
3019
|
2
|
|
|
|
|
18
|
return $self; |
3020
|
|
|
|
|
|
|
} |
3021
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
# Subrefs set with out_map |
3023
|
|
|
|
|
|
|
sub _execute_write_update_before { |
3024
|
362
|
|
|
362
|
|
763
|
my ($self, $ar) = @_; |
3025
|
|
|
|
|
|
|
|
3026
|
362
|
|
|
|
|
705
|
my $columns_ar = $self->{_columns}; |
3027
|
|
|
|
|
|
|
|
3028
|
362
|
|
|
|
|
666
|
my $wpre = $self->{_write_update_before_ar}; |
3029
|
362
|
|
|
|
|
641
|
for my $i (0..$#{$columns_ar}) { |
|
362
|
|
|
|
|
949
|
|
3030
|
1162
|
|
|
|
|
1984
|
my $subref = $wpre->[$i]; |
3031
|
1162
|
100
|
|
|
|
3022
|
next unless defined($subref); |
3032
|
|
|
|
|
|
|
|
3033
|
159
|
|
|
|
|
245
|
do { |
3034
|
159
|
|
|
|
|
273
|
local $_ = $ar->[$i]; |
3035
|
159
|
|
|
|
|
263
|
my $field = $columns_ar->[$i]; |
3036
|
159
|
|
|
|
|
338
|
my $new_val = $subref->($self, $field); |
3037
|
156
|
|
|
|
|
390
|
$ar->[$i] = $new_val; |
3038
|
|
|
|
|
|
|
} |
3039
|
|
|
|
|
|
|
|
3040
|
|
|
|
|
|
|
} |
3041
|
|
|
|
|
|
|
} |
3042
|
|
|
|
|
|
|
|
3043
|
|
|
|
|
|
|
# Take into account write_fields if it got set |
3044
|
|
|
|
|
|
|
sub _apply_write_fields { |
3045
|
460
|
|
|
460
|
|
948
|
my ($self, $ar) = @_; |
3046
|
|
|
|
|
|
|
|
3047
|
460
|
|
|
|
|
792
|
my @final; |
3048
|
|
|
|
|
|
|
|
3049
|
460
|
|
|
|
|
1745
|
my $tmp = _get_def($self->{out_fields}, $self->{write_fields}); |
3050
|
460
|
100
|
|
|
|
1480
|
my @wf = @{$tmp} if defined($tmp); |
|
16
|
|
|
|
|
36
|
|
3051
|
|
|
|
|
|
|
|
3052
|
460
|
100
|
|
|
|
1388
|
return unless @wf; |
3053
|
|
|
|
|
|
|
|
3054
|
16
|
|
|
|
|
22
|
my %named_fields = %{$self->{_named_fields}}; |
|
16
|
|
|
|
|
50
|
|
3055
|
16
|
|
|
|
|
38
|
for my $i (0..$#wf) { |
3056
|
40
|
|
|
|
|
58
|
my $field = $wf[$i]; |
3057
|
40
|
100
|
66
|
|
|
158
|
my $tmp = $ar->[$named_fields{$field}] if defined($field) and $field ne ''; |
3058
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
# Put here any post-processing of value |
3060
|
|
|
|
|
|
|
# WARNING |
3061
|
|
|
|
|
|
|
# $tmp can be undef |
3062
|
|
|
|
|
|
|
# ... |
3063
|
|
|
|
|
|
|
|
3064
|
40
|
|
|
|
|
79
|
$final[$i] = $tmp; |
3065
|
|
|
|
|
|
|
} |
3066
|
16
|
|
|
|
|
51
|
$_[1] = [ @final ]; |
3067
|
|
|
|
|
|
|
} |
3068
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
sub write { |
3070
|
108
|
|
|
108
|
1
|
27763
|
my $self = shift; |
3071
|
|
|
|
|
|
|
|
3072
|
108
|
|
|
|
|
850
|
validate_pos(@_); |
3073
|
|
|
|
|
|
|
|
3074
|
108
|
50
|
|
|
|
416
|
return undef unless $self->_status_forward('S3'); |
3075
|
|
|
|
|
|
|
|
3076
|
103
|
|
|
|
|
260
|
my $verbose = $self->{verbose}; |
3077
|
103
|
|
|
|
|
209
|
my $_debug = $self->{_debug}; |
3078
|
103
|
|
|
|
|
255
|
my $_debugh = $self->{_debugh}; |
3079
|
|
|
|
|
|
|
|
3080
|
103
|
|
|
|
|
228
|
my $out_file = $self->{out_file}; |
3081
|
|
|
|
|
|
|
|
3082
|
103
|
|
|
|
|
212
|
my %stats; |
3083
|
|
|
|
|
|
|
|
3084
|
103
|
50
|
|
|
|
297
|
$self->_print("-- $out_file writing start\n") if $verbose; |
3085
|
103
|
|
|
|
|
207
|
my $rows_written = 0; |
3086
|
|
|
|
|
|
|
|
3087
|
103
|
|
|
|
|
210
|
my $outh = $self->{outh}; |
3088
|
|
|
|
|
|
|
|
3089
|
103
|
|
|
|
|
241
|
$self->{_close_outh_when_finished} = 0; |
3090
|
103
|
50
|
|
|
|
331
|
unless (defined($outh)) { |
3091
|
103
|
50
|
|
|
|
335
|
if ($out_file eq '') { |
3092
|
0
|
|
|
|
|
0
|
$outh = \*STDOUT; |
3093
|
|
|
|
|
|
|
} else { |
3094
|
103
|
50
|
|
|
|
10569
|
unless (open($outh, '>', $out_file)) { |
3095
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$out_file': $!"); |
3096
|
0
|
|
|
|
|
0
|
return undef; |
3097
|
|
|
|
|
|
|
} |
3098
|
103
|
|
|
|
|
457
|
$self->{_close_outh_when_finished} = 1; |
3099
|
|
|
|
|
|
|
} |
3100
|
103
|
|
|
|
|
280
|
$self->{outh} = $outh; |
3101
|
|
|
|
|
|
|
} |
3102
|
|
|
|
|
|
|
|
3103
|
103
|
50
|
|
|
|
383
|
unless ($self->{_leave_encoding_alone}) { |
3104
|
|
|
|
|
|
|
my $enc = (defined($self->{_inh_encoding}) ? |
3105
|
|
|
|
|
|
|
$self->{_inh_encoding} : |
3106
|
103
|
50
|
|
|
|
408
|
$DEFAULT_OUT_ENCODING); |
3107
|
|
|
|
|
|
|
|
3108
|
|
|
|
|
|
|
# out_encoding option takes precedence |
3109
|
103
|
100
|
|
|
|
390
|
$enc = $self->{out_encoding} if defined($self->{out_encoding}); |
3110
|
103
|
|
|
|
|
350
|
my $m = ":encoding($enc)"; |
3111
|
103
|
50
|
66
|
|
|
345
|
if (_is_utf8($enc) and $self->{out_utf8_bom}) { |
3112
|
0
|
|
|
|
|
0
|
$m .= ':via(File::BOM)'; |
3113
|
|
|
|
|
|
|
} |
3114
|
|
|
|
|
|
|
|
3115
|
103
|
50
|
33
|
|
|
440
|
if ($OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214) { |
3116
|
|
|
|
|
|
|
# Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story) |
3117
|
0
|
0
|
|
|
|
0
|
$m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i; |
3118
|
|
|
|
|
|
|
} |
3119
|
|
|
|
|
|
|
|
3120
|
103
|
|
|
|
|
1044
|
binmode $outh, $m; |
3121
|
103
|
50
|
|
|
|
11702
|
print($_debugh "Encoding string used for output: $m\n") if $_debug; |
3122
|
|
|
|
|
|
|
} |
3123
|
|
|
|
|
|
|
|
3124
|
103
|
|
|
|
|
266
|
my $escape_char = $self->{escape_char}; |
3125
|
103
|
|
|
|
|
231
|
my $quote_char = $self->{quote_char}; |
3126
|
|
|
|
|
|
|
|
3127
|
103
|
|
|
|
|
205
|
my %opts; |
3128
|
103
|
|
|
|
|
322
|
$opts{binary} = 1; |
3129
|
103
|
|
|
|
|
541
|
$opts{eol} = "\n"; |
3130
|
|
|
|
|
|
|
|
3131
|
103
|
50
|
|
|
|
437
|
$opts{sep_char} = $self->{sep_char} if defined($self->{sep_char}); |
3132
|
103
|
100
|
|
|
|
346
|
$opts{sep_char} = $self->{out_sep_char} if defined($self->{out_sep_char}); |
3133
|
|
|
|
|
|
|
|
3134
|
103
|
50
|
|
|
|
389
|
$opts{quote_char} = $self->{quote_char} if defined($self->{quote_char}); |
3135
|
103
|
50
|
|
|
|
323
|
$opts{quote_char} = $self->{out_quote_char} if defined($self->{out_quote_char}); |
3136
|
|
|
|
|
|
|
|
3137
|
103
|
50
|
|
|
|
383
|
$opts{escape_char} = $self->{escape_char} if defined($self->{escape_char}); |
3138
|
103
|
100
|
|
|
|
306
|
$opts{escape_char} = $self->{out_escape_char} if defined($self->{out_escape_char}); |
3139
|
|
|
|
|
|
|
|
3140
|
103
|
|
|
|
|
264
|
$opts{always_quote} = $self->{_is_always_quoted}; |
3141
|
103
|
100
|
|
|
|
311
|
$opts{always_quote} = $self->{out_always_quote} if defined($self->{out_always_quote}); |
3142
|
|
|
|
|
|
|
|
3143
|
103
|
|
|
|
|
927
|
my $csvout = Text::CSV->new({ %opts }); |
3144
|
103
|
50
|
|
|
|
19593
|
if (!defined($csvout)) { |
3145
|
0
|
|
|
|
|
0
|
$self->_print_error("error creating output Text::CSV object"); |
3146
|
0
|
|
|
|
|
0
|
return undef; |
3147
|
|
|
|
|
|
|
} |
3148
|
|
|
|
|
|
|
|
3149
|
103
|
|
|
|
|
520
|
my $write_filter_hr = _get_def($self->{out_filter}, $self->{write_filter_hr}); |
3150
|
|
|
|
|
|
|
|
3151
|
103
|
100
|
66
|
|
|
915
|
if (($self->{has_headers} and |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
3152
|
|
|
|
|
|
|
!(defined($self->{out_has_headers}) and !$self->{out_has_headers})) |
3153
|
|
|
|
|
|
|
or $self->{out_has_headers}) { |
3154
|
101
|
|
|
|
|
253
|
my $ar = [ ]; |
3155
|
101
|
100
|
|
|
|
366
|
if ($self->{has_headers}) { |
3156
|
97
|
|
|
|
|
251
|
$ar = $self->{_headers}; |
3157
|
|
|
|
|
|
|
} else { |
3158
|
4
|
|
|
|
|
8
|
my $nf = $self->{_named_fields}; |
3159
|
4
|
|
|
|
|
7
|
$ar->[$nf->{$_}] = $_ for (keys %{$nf}); |
|
4
|
|
|
|
|
25
|
|
3160
|
|
|
|
|
|
|
} |
3161
|
|
|
|
|
|
|
|
3162
|
101
|
100
|
|
|
|
343
|
if (exists $self->{_out_headers}) { |
3163
|
3
|
|
|
|
|
5
|
my $h = $self->{_out_headers}; |
3164
|
3
|
|
|
|
|
6
|
for (keys %{$self->{_named_fields}}) { |
|
3
|
|
|
|
|
11
|
|
3165
|
12
|
100
|
|
|
|
30
|
if (exists $h->{$_}) { |
3166
|
5
|
|
|
|
|
13
|
$ar->[$self->{_named_fields}->{$_}] = $h->{$_}; |
3167
|
|
|
|
|
|
|
} |
3168
|
|
|
|
|
|
|
} |
3169
|
|
|
|
|
|
|
} |
3170
|
|
|
|
|
|
|
|
3171
|
101
|
|
|
|
|
445
|
$self->_apply_write_fields($ar); |
3172
|
|
|
|
|
|
|
|
3173
|
101
|
|
|
|
|
1848
|
$csvout->print($outh, $ar); |
3174
|
101
|
|
|
|
|
1337
|
$rows_written++; |
3175
|
|
|
|
|
|
|
} |
3176
|
|
|
|
|
|
|
|
3177
|
103
|
|
|
|
|
233
|
my $do_status_reset = 0; |
3178
|
|
|
|
|
|
|
|
3179
|
|
|
|
|
|
|
|
3180
|
|
|
|
|
|
|
# |
3181
|
|
|
|
|
|
|
# FIXME!!! |
3182
|
|
|
|
|
|
|
# |
3183
|
|
|
|
|
|
|
# Instead of this duplication of code, provide AutoCSV with a "create iterator sub" feature to |
3184
|
|
|
|
|
|
|
# iterate over all records, whatever is going on behind the scene (in-memory or read input). |
3185
|
|
|
|
|
|
|
# |
3186
|
|
|
|
|
|
|
# Such an iterator would also benefit to module users. |
3187
|
|
|
|
|
|
|
# |
3188
|
|
|
|
|
|
|
|
3189
|
|
|
|
|
|
|
|
3190
|
103
|
100
|
|
|
|
351
|
if ($self->{_status} == 4) { |
3191
|
|
|
|
|
|
|
|
3192
|
|
|
|
|
|
|
# |
3193
|
|
|
|
|
|
|
# The content is available in-memory: we write from what we have in-memory then... |
3194
|
|
|
|
|
|
|
# |
3195
|
|
|
|
|
|
|
|
3196
|
24
|
|
|
|
|
105
|
for my $k ($self->get_keys()) { |
3197
|
87
|
|
|
|
|
253
|
my $hr = $self->get_row_hr($k); |
3198
|
87
|
50
|
|
|
|
251
|
if (defined($write_filter_hr)) { |
3199
|
0
|
0
|
|
|
|
0
|
next unless $write_filter_hr->($hr); |
3200
|
|
|
|
|
|
|
} |
3201
|
87
|
|
|
|
|
154
|
my $ar = [ @{$self->get_row_ar($k)} ]; |
|
87
|
|
|
|
|
208
|
|
3202
|
|
|
|
|
|
|
|
3203
|
87
|
|
|
|
|
344
|
$self->_execute_write_update_before($ar); |
3204
|
87
|
|
|
|
|
278
|
$self->_apply_write_fields($ar); |
3205
|
|
|
|
|
|
|
|
3206
|
87
|
|
|
|
|
628
|
$csvout->print($outh, $ar); |
3207
|
87
|
|
|
|
|
989
|
$rows_written++; |
3208
|
|
|
|
|
|
|
} |
3209
|
|
|
|
|
|
|
|
3210
|
|
|
|
|
|
|
} else { |
3211
|
|
|
|
|
|
|
|
3212
|
|
|
|
|
|
|
# |
3213
|
|
|
|
|
|
|
# No in-memory content available: we read and write in parallel. |
3214
|
|
|
|
|
|
|
# |
3215
|
|
|
|
|
|
|
|
3216
|
79
|
|
|
|
|
317
|
$self->_register_pass("write()"); |
3217
|
|
|
|
|
|
|
|
3218
|
79
|
|
|
|
|
335
|
$self->_open_read(); |
3219
|
79
|
|
|
|
|
173
|
my $ar; |
3220
|
|
|
|
|
|
|
my $row_hr; |
3221
|
79
|
|
|
|
|
339
|
while ($self->_read_one_record_from_input(\$ar, \$row_hr)) { |
3222
|
301
|
100
|
|
|
|
812
|
if (defined($write_filter_hr)) { |
3223
|
46
|
100
|
|
|
|
112
|
next unless $write_filter_hr->($row_hr, \%stats, $self->get_recnum()); |
3224
|
|
|
|
|
|
|
} |
3225
|
275
|
|
|
|
|
610
|
$ar = [ @{$ar} ]; |
|
275
|
|
|
|
|
803
|
|
3226
|
|
|
|
|
|
|
|
3227
|
275
|
|
|
|
|
1138
|
$self->_execute_write_update_before($ar); |
3228
|
272
|
|
|
|
|
800
|
$self->_apply_write_fields($ar); |
3229
|
|
|
|
|
|
|
|
3230
|
272
|
|
|
|
|
1914
|
$csvout->print($outh, $ar); |
3231
|
272
|
|
|
|
|
2864
|
$rows_written++; |
3232
|
|
|
|
|
|
|
} |
3233
|
72
|
|
|
|
|
330
|
$self->_close_read(); |
3234
|
|
|
|
|
|
|
|
3235
|
72
|
|
|
|
|
177
|
$do_status_reset = 1 |
3236
|
|
|
|
|
|
|
} |
3237
|
|
|
|
|
|
|
|
3238
|
96
|
|
|
|
|
400
|
$self->_close_outh(); |
3239
|
|
|
|
|
|
|
|
3240
|
96
|
50
|
|
|
|
323
|
if ($verbose) { |
3241
|
0
|
|
|
|
|
0
|
$self->_print("-- $out_file writing end: $rows_written row(s) written\n"); |
3242
|
0
|
|
|
|
|
0
|
for my $k (sort keys %stats) { |
3243
|
0
|
|
|
|
|
0
|
$self->_printf(" %7d %s\n", $stats{$k}, $k); |
3244
|
|
|
|
|
|
|
} |
3245
|
|
|
|
|
|
|
} |
3246
|
|
|
|
|
|
|
|
3247
|
96
|
100
|
|
|
|
303
|
if ($do_status_reset) { |
3248
|
72
|
50
|
|
|
|
310
|
return undef unless $self->_status_reset(); |
3249
|
|
|
|
|
|
|
} |
3250
|
96
|
|
|
|
|
1094
|
return $self; |
3251
|
|
|
|
|
|
|
} |
3252
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
|
3254
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
# |
3256
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3257
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3258
|
|
|
|
|
|
|
# * OBJ *************************************************************************** |
3259
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3260
|
|
|
|
|
|
|
# * *** *************************************************************************** |
3261
|
|
|
|
|
|
|
# |
3262
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
# |
3264
|
|
|
|
|
|
|
# The subs below assume Text::AutoCSV can be in status S4 = all in memory. |
3265
|
|
|
|
|
|
|
# |
3266
|
|
|
|
|
|
|
|
3267
|
|
|
|
|
|
|
|
3268
|
|
|
|
|
|
|
sub get_keys { |
3269
|
31
|
|
|
31
|
1
|
2672
|
my $self = shift; |
3270
|
31
|
|
|
|
|
242
|
validate_pos(@_); |
3271
|
|
|
|
|
|
|
|
3272
|
31
|
50
|
|
|
|
113
|
return undef unless $self->_status_forward('S4'); |
3273
|
|
|
|
|
|
|
|
3274
|
31
|
|
|
|
|
63
|
my $last_key = @{$self->{_flat}} - 1; |
|
31
|
|
|
|
|
85
|
|
3275
|
31
|
|
|
|
|
106
|
my @r = (0..$last_key); |
3276
|
|
|
|
|
|
|
|
3277
|
31
|
|
|
|
|
108
|
return @r; |
3278
|
|
|
|
|
|
|
} |
3279
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
sub get_row_ar { |
3281
|
338
|
|
|
338
|
1
|
1307
|
my $self = shift; |
3282
|
338
|
|
|
|
|
2565
|
validate_pos(@_, {type => SCALAR}); |
3283
|
338
|
|
|
|
|
1052
|
my ($key) = @_; |
3284
|
|
|
|
|
|
|
|
3285
|
338
|
50
|
|
|
|
912
|
return undef unless $self->_status_forward('S4'); |
3286
|
|
|
|
|
|
|
|
3287
|
338
|
50
|
|
|
|
983
|
unless (defined($key)) { |
3288
|
0
|
|
|
|
|
0
|
$self->_print_error("get_row_ar(): \$key is not defined!"); |
3289
|
0
|
|
|
|
|
0
|
return undef; |
3290
|
|
|
|
|
|
|
} |
3291
|
|
|
|
|
|
|
|
3292
|
338
|
100
|
|
|
|
997
|
$self->_print_error("unknown row '$key'") unless defined($self->{_flat}->[$key]); |
3293
|
338
|
|
|
|
|
882
|
return $self->{_flat}->[$key]; |
3294
|
|
|
|
|
|
|
} |
3295
|
|
|
|
|
|
|
|
3296
|
|
|
|
|
|
|
sub get_row_hr { |
3297
|
248
|
|
|
248
|
1
|
795
|
my $self = shift; |
3298
|
248
|
|
|
|
|
2558
|
validate_pos(@_, {type => SCALAR}); |
3299
|
248
|
|
|
|
|
881
|
my ($key) = @_; |
3300
|
|
|
|
|
|
|
|
3301
|
248
|
|
|
|
|
747
|
my $ar = $self->get_row_ar($key); |
3302
|
248
|
100
|
|
|
|
719
|
return undef unless defined($ar); |
3303
|
|
|
|
|
|
|
|
3304
|
246
|
|
|
|
|
790
|
return $self->_ar_to_hr($ar); |
3305
|
|
|
|
|
|
|
} |
3306
|
|
|
|
|
|
|
|
3307
|
|
|
|
|
|
|
# |
3308
|
|
|
|
|
|
|
# Could be made much more efficient (directly read $self->{_flat} instead of calling get_row_hr |
3309
|
|
|
|
|
|
|
# that itself calls get_row_ar). |
3310
|
|
|
|
|
|
|
# I leave it as is because get_hr_all is not good practice (it is not scalable), it was |
3311
|
|
|
|
|
|
|
# primarily done to ease test plan. |
3312
|
|
|
|
|
|
|
# |
3313
|
|
|
|
|
|
|
# By the way I may make it one day not available by default, requesting caller to tune some |
3314
|
|
|
|
|
|
|
# variable (like { $Text::AutoCSV::i_am_the_test_plan = 1 }) to expose it. |
3315
|
|
|
|
|
|
|
# |
3316
|
|
|
|
|
|
|
sub get_hr_all { |
3317
|
106
|
|
|
106
|
1
|
5160
|
my $self = shift; |
3318
|
106
|
|
|
|
|
859
|
validate_pos(@_); |
3319
|
|
|
|
|
|
|
|
3320
|
106
|
|
|
|
|
290
|
my @resp; |
3321
|
106
|
|
|
|
|
448
|
$self->reset_next_record_hr(); |
3322
|
106
|
|
|
|
|
387
|
while (my $hr = $self->get_next_record_hr()) { |
3323
|
404
|
|
|
|
|
1187
|
push @resp, $hr; |
3324
|
|
|
|
|
|
|
} |
3325
|
103
|
|
|
|
|
476
|
return @resp; |
3326
|
|
|
|
|
|
|
} |
3327
|
|
|
|
|
|
|
|
3328
|
|
|
|
|
|
|
sub get_recnum { |
3329
|
190
|
|
|
190
|
1
|
350
|
my $self = shift; |
3330
|
190
|
|
|
|
|
1102
|
validate_pos(@_); |
3331
|
|
|
|
|
|
|
|
3332
|
190
|
50
|
|
|
|
642
|
return -1 unless $self->{_read_in_progress}; |
3333
|
190
|
|
|
|
|
458
|
return _get_def($self->{_row_read}, -1); |
3334
|
|
|
|
|
|
|
} |
3335
|
|
|
|
|
|
|
|
3336
|
|
|
|
|
|
|
sub _check_for_search { |
3337
|
890
|
|
|
890
|
|
2116
|
my ($self, $field) = @_; |
3338
|
890
|
50
|
|
|
|
2331
|
return undef unless $self->_status_forward('S4'); |
3339
|
|
|
|
|
|
|
|
3340
|
889
|
100
|
|
|
|
3707
|
return 1 if exists $self->{_named_fields}->{$field}; |
3341
|
|
|
|
|
|
|
$self->_print_error("search: unknown field '$field'", |
3342
|
6
|
|
|
|
|
30
|
0, ERR_UNKNOWN_FIELD, $self->{_named_fields}); |
3343
|
|
|
|
|
|
|
} |
3344
|
|
|
|
|
|
|
|
3345
|
|
|
|
|
|
|
sub get_cell { |
3346
|
4
|
|
|
4
|
1
|
671
|
my $self = shift; |
3347
|
4
|
|
|
|
|
48
|
validate_pos(@_, {type => SCALAR}, {type => SCALAR}); |
3348
|
4
|
|
|
|
|
15
|
my ($key, $field) = @_; |
3349
|
|
|
|
|
|
|
|
3350
|
4
|
50
|
|
|
|
13
|
return undef unless $self->_check_for_search($field); |
3351
|
3
|
|
|
|
|
10
|
my $row = $self->get_row_hr($key); |
3352
|
3
|
100
|
|
|
|
19
|
return $row unless defined($row); |
3353
|
2
|
|
|
|
|
12
|
return $row->{$field}; |
3354
|
|
|
|
|
|
|
} |
3355
|
|
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
sub get_values { |
3357
|
9
|
|
|
9
|
1
|
4150
|
my $self = shift; |
3358
|
9
|
|
|
|
|
132
|
validate_pos(@_, {type => SCALAR}, {type => UNDEF | CODEREF, optional => 1}); |
3359
|
9
|
|
|
|
|
44
|
my ($field, $filter_subref) = @_; |
3360
|
|
|
|
|
|
|
|
3361
|
9
|
50
|
|
|
|
34
|
return undef unless $self->_check_for_search($field); |
3362
|
|
|
|
|
|
|
|
3363
|
9
|
|
|
|
|
18
|
my @values; |
3364
|
9
|
|
|
|
|
35
|
$self->reset_next_record_hr(); |
3365
|
9
|
|
|
|
|
31
|
while (my $hr = $self->get_next_record_hr()) { |
3366
|
53
|
100
|
|
|
|
118
|
if (defined($filter_subref)) { |
3367
|
23
|
|
|
|
|
41
|
local $_ = $hr->{$field}; |
3368
|
23
|
100
|
|
|
|
51
|
next unless $filter_subref->(); |
3369
|
|
|
|
|
|
|
} |
3370
|
42
|
|
|
|
|
181
|
push @values, $hr->{$field}; |
3371
|
|
|
|
|
|
|
} |
3372
|
9
|
|
|
|
|
43
|
return @values; |
3373
|
|
|
|
|
|
|
} |
3374
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
sub _get_hash_and_projector { |
3376
|
561
|
|
|
561
|
|
1318
|
my ($self, $field, $arg_opts) = @_; |
3377
|
|
|
|
|
|
|
|
3378
|
561
|
|
|
|
|
1105
|
my $_debug = $self->{_debug}; |
3379
|
561
|
|
|
|
|
1042
|
my $_debugh = $self->{_debugh}; |
3380
|
|
|
|
|
|
|
|
3381
|
561
|
50
|
|
|
|
1415
|
my %opts = %{$arg_opts} if defined($arg_opts); |
|
561
|
|
|
|
|
1726
|
|
3382
|
|
|
|
|
|
|
|
3383
|
561
|
|
|
|
|
2351
|
my $opt_case = _get_def($opts{'case'}, $self->{search_case}, $DEF_SEARCH_CASE); |
3384
|
561
|
|
|
|
|
2184
|
my $opt_trim = _get_def($opts{'trim'}, $self->{search_trim}, $DEF_SEARCH_TRIM); |
3385
|
|
|
|
|
|
|
my $opt_ignore_empty = _get_def($opts{'ignore_empty'}, $self->{search_ignore_empty}, |
3386
|
561
|
|
|
|
|
2058
|
$DEF_SEARCH_IGNORE_EMPTY); |
3387
|
|
|
|
|
|
|
my $opt_ignacc = _get_def($opts{'ignore_accents'}, $self->{search_ignore_accents}, |
3388
|
561
|
|
|
|
|
2042
|
$DEF_SEARCH_IGNORE_ACCENTS); |
3389
|
|
|
|
|
|
|
|
3390
|
561
|
|
|
|
|
1868
|
my $opts_stringified = $opt_case . $opt_trim . $opt_ignore_empty . $opt_ignacc; |
3391
|
561
|
|
|
|
|
1423
|
my $hash_name = "_h${field}_${opts_stringified}"; |
3392
|
561
|
|
|
|
|
1188
|
my $projector_name = "_p${field}_${opts_stringified}"; |
3393
|
|
|
|
|
|
|
|
3394
|
561
|
100
|
66
|
|
|
2959
|
if (exists $self->{$hash_name} and exists $self->{$projector_name}) { |
|
|
50
|
33
|
|
|
|
|
3395
|
489
|
50
|
|
|
|
1268
|
print($_debugh "Search by key '$field': using existing hash and projector (" . |
3396
|
|
|
|
|
|
|
"$hash_name, $projector_name)\n") if $_debug; |
3397
|
489
|
|
|
|
|
1906
|
return ($hash_name, $projector_name); |
3398
|
|
|
|
|
|
|
} elsif (exists $self->{$hash_name} or exists $self->{$projector_name}) { |
3399
|
0
|
|
|
|
|
0
|
confess "Man, check your $PKG module code now!"; |
3400
|
|
|
|
|
|
|
} |
3401
|
|
|
|
|
|
|
|
3402
|
72
|
50
|
|
|
|
237
|
print($_debugh "Search by key '$field': building hash\n") if $_debug; |
3403
|
|
|
|
|
|
|
|
3404
|
|
|
|
|
|
|
# |
3405
|
|
|
|
|
|
|
# Projectors |
3406
|
|
|
|
|
|
|
# |
3407
|
|
|
|
|
|
|
# The projector contains subs to derivate the search key from the field value. |
3408
|
|
|
|
|
|
|
# At the moment it is used to manage with case / without case searches and with trim / without trim |
3409
|
|
|
|
|
|
|
# searches (meaning, ignoring spaces at beginning and end of fields) |
3410
|
|
|
|
|
|
|
# |
3411
|
|
|
|
|
|
|
# Why naming it a projector? |
3412
|
|
|
|
|
|
|
# Because if you run it twice on a value, the second run should produce the same result, meaning: |
3413
|
|
|
|
|
|
|
# p(p(x)) = p(x) whatever x |
3414
|
|
|
|
|
|
|
# |
3415
|
|
|
|
|
|
|
|
3416
|
72
|
|
|
|
|
145
|
my @projectors; |
3417
|
|
|
|
|
|
|
|
3418
|
|
|
|
|
|
|
# Add case removal in the projector function list |
3419
|
72
|
100
|
|
1479
|
|
398
|
push @projectors, sub { return lc(shift); } unless $opt_case; |
|
1479
|
|
|
|
|
4573
|
|
3420
|
|
|
|
|
|
|
|
3421
|
|
|
|
|
|
|
# Add trim in the projector function list |
3422
|
72
|
100
|
|
|
|
219
|
if ($opt_trim) { |
3423
|
|
|
|
|
|
|
push @projectors, |
3424
|
|
|
|
|
|
|
sub { |
3425
|
1479
|
|
|
1479
|
|
8134
|
my $v = shift; |
3426
|
1479
|
|
|
|
|
5239
|
$v =~ s/^\s+|\s+$//g; |
3427
|
1479
|
|
|
|
|
3808
|
return $v; |
3428
|
57
|
|
|
|
|
213
|
}; |
3429
|
|
|
|
|
|
|
} |
3430
|
|
|
|
|
|
|
|
3431
|
|
|
|
|
|
|
# Add remove_accents in the projector function list |
3432
|
72
|
100
|
|
1886
|
|
328
|
push @projectors, sub { return remove_accents(shift); } if $opt_ignacc; |
|
1886
|
|
|
|
|
4174
|
|
3433
|
|
|
|
|
|
|
|
3434
|
|
|
|
|
|
|
my $projector = sub { |
3435
|
1906
|
|
|
1906
|
|
4245
|
my $v = _get_def($_[0], ''); |
3436
|
1906
|
|
|
|
|
4688
|
$v = $_->($v) foreach (@projectors); |
3437
|
1906
|
|
|
|
|
4083
|
return $v; |
3438
|
72
|
|
|
|
|
251
|
}; |
3439
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
# |
3441
|
|
|
|
|
|
|
# Filter |
3442
|
|
|
|
|
|
|
# |
3443
|
|
|
|
|
|
|
# As opposed to projectors above (where a search key is transformed), the idea now is to ignore |
3444
|
|
|
|
|
|
|
# certain keys when doing a search. |
3445
|
|
|
|
|
|
|
# At the moment, used to manage searches with / without empty values. |
3446
|
|
|
|
|
|
|
# |
3447
|
|
|
|
|
|
|
# That is to say: shall we use empty value as a regular value to search on, as in |
3448
|
|
|
|
|
|
|
# my @results = $self->search('FIELDNAME', ''); |
3449
|
|
|
|
|
|
|
# ? |
3450
|
|
|
|
|
|
|
# |
3451
|
|
|
|
|
|
|
# Right now we don't use an array-based construct, that'd allow to chain filters with one another |
3452
|
|
|
|
|
|
|
# (as we now have only one filter to deal with), later, we may use an array of filters, as done with |
3453
|
|
|
|
|
|
|
# projectors... |
3454
|
|
|
|
|
|
|
# |
3455
|
|
|
|
|
|
|
|
3456
|
72
|
|
|
|
|
130
|
my $filter; |
3457
|
72
|
100
|
|
|
|
175
|
if ($opt_ignore_empty) { |
3458
|
1268
|
|
|
1268
|
|
3588
|
$filter = sub { return $_[0] ne ''; } |
3459
|
61
|
|
|
|
|
191
|
} else { |
3460
|
77
|
|
|
77
|
|
183
|
$filter = sub { return 1; } |
3461
|
11
|
|
|
|
|
48
|
} |
3462
|
|
|
|
|
|
|
|
3463
|
72
|
|
|
|
|
154
|
my %h; |
3464
|
|
|
|
|
|
|
my $k; |
3465
|
72
|
|
|
|
|
286
|
$self->reset_next_record_hr(); |
3466
|
72
|
|
|
|
|
320
|
while (my $hr = $self->get_next_record_hr(\$k)) { |
3467
|
1345
|
|
|
|
|
2470
|
my $kv = $hr->{$field}; |
3468
|
1345
|
|
|
|
|
2567
|
my $p = $projector->($kv); |
3469
|
1345
|
100
|
|
|
|
2723
|
unless ($filter->($p)) { |
3470
|
76
|
50
|
|
|
|
184
|
print($_debugh "Ignoring key value '$p' in hash build\n") if $_debug; |
3471
|
76
|
|
|
|
|
256
|
next; |
3472
|
|
|
|
|
|
|
} |
3473
|
1269
|
|
|
|
|
2012
|
push @{$h{$p}}, $k; |
|
1269
|
|
|
|
|
6049
|
|
3474
|
|
|
|
|
|
|
} |
3475
|
72
|
|
|
|
|
486
|
for (keys %h) { |
3476
|
1178
|
|
|
|
|
1638
|
@{$h{$_}} = sort { $a <=> $b } @{$h{$_}}; |
|
1178
|
|
|
|
|
2405
|
|
|
103
|
|
|
|
|
273
|
|
|
1178
|
|
|
|
|
2368
|
|
3477
|
|
|
|
|
|
|
} |
3478
|
|
|
|
|
|
|
|
3479
|
72
|
|
|
|
|
247
|
$self->{_hash_build_count}++; |
3480
|
72
|
|
|
|
|
853
|
$self->{$hash_name} = { %h }; |
3481
|
72
|
|
|
|
|
260
|
$self->{$projector_name} = $projector; |
3482
|
72
|
|
|
|
|
564
|
return ($hash_name, $projector_name); |
3483
|
|
|
|
|
|
|
} |
3484
|
|
|
|
|
|
|
|
3485
|
|
|
|
|
|
|
sub _get_hash_build_count { |
3486
|
6
|
|
|
6
|
|
1488
|
my $self = shift; |
3487
|
|
|
|
|
|
|
|
3488
|
6
|
|
|
|
|
33
|
return _get_def($self->{_hash_build_count}, 0); |
3489
|
|
|
|
|
|
|
} |
3490
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
sub search { |
3492
|
563
|
|
|
563
|
1
|
14062
|
my $self = shift; |
3493
|
563
|
|
|
|
|
6299
|
validate_pos(@_, |
3494
|
|
|
|
|
|
|
{type => SCALAR}, {type => UNDEF | SCALAR}, {type => UNDEF | HASHREF, optional => 1}); |
3495
|
563
|
|
|
|
|
2234
|
my ($field, $value, $param_opts) = @_; |
3496
|
|
|
|
|
|
|
|
3497
|
563
|
|
|
|
|
1201
|
my $croak_if_error = $self->{croak_if_error}; |
3498
|
|
|
|
|
|
|
|
3499
|
|
|
|
|
|
|
# |
3500
|
|
|
|
|
|
|
# FIXME? |
3501
|
|
|
|
|
|
|
# A bit overkill to check options each time search is called... |
3502
|
|
|
|
|
|
|
# To be thought about. |
3503
|
|
|
|
|
|
|
# |
3504
|
|
|
|
|
|
|
|
3505
|
563
|
100
|
|
|
|
1548
|
my @tmp = %{$param_opts} if $param_opts; |
|
421
|
|
|
|
|
1572
|
|
3506
|
563
|
|
|
|
|
9833
|
my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS); |
3507
|
|
|
|
|
|
|
|
3508
|
562
|
50
|
|
|
|
2778
|
return undef unless $self->_check_for_search($field); |
3509
|
|
|
|
|
|
|
|
3510
|
|
|
|
|
|
|
# $self->_print_error("undef value in search call") if !defined($value); |
3511
|
561
|
50
|
|
|
|
1448
|
$value = '' unless defined($value); |
3512
|
|
|
|
|
|
|
|
3513
|
561
|
|
|
|
|
1661
|
my ($hash_name, $projector_name) = $self->_get_hash_and_projector($field, \%opts); |
3514
|
|
|
|
|
|
|
|
3515
|
561
|
|
|
|
|
1925
|
my $ret = $self->{$hash_name}->{$self->{$projector_name}->($value)}; |
3516
|
|
|
|
|
|
|
|
3517
|
561
|
100
|
|
|
|
2643
|
return $ret if defined($ret); |
3518
|
185
|
|
|
|
|
724
|
return [ ]; |
3519
|
|
|
|
|
|
|
} |
3520
|
|
|
|
|
|
|
|
3521
|
|
|
|
|
|
|
sub search_1hr { |
3522
|
22
|
|
|
22
|
1
|
10058
|
my $self = shift; |
3523
|
22
|
|
|
|
|
332
|
validate_pos(@_, |
3524
|
|
|
|
|
|
|
{type => SCALAR}, {type => UNDEF | SCALAR}, {type => UNDEF | HASHREF, optional => 1}); |
3525
|
22
|
|
|
|
|
96
|
my ($field, $value, $arg_opts) = @_; |
3526
|
|
|
|
|
|
|
|
3527
|
22
|
|
|
|
|
72
|
my $r = $self->search($field, $value, $arg_opts); |
3528
|
|
|
|
|
|
|
|
3529
|
22
|
100
|
|
|
|
74
|
return undef unless defined($r->[0]); |
3530
|
|
|
|
|
|
|
|
3531
|
20
|
|
|
|
|
48
|
my $opts = _get_def($arg_opts, { }); |
3532
|
|
|
|
|
|
|
my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'}, |
3533
|
20
|
|
|
|
|
72
|
$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS); |
3534
|
|
|
|
|
|
|
|
3535
|
20
|
100
|
100
|
|
|
38
|
return undef if @{$r} >= 2 and !$opt_ignore_ambiguous; |
|
20
|
|
|
|
|
107
|
|
3536
|
14
|
|
|
|
|
50
|
return $self->get_row_hr($r->[0]); |
3537
|
|
|
|
|
|
|
} |
3538
|
|
|
|
|
|
|
|
3539
|
|
|
|
|
|
|
sub vlookup { |
3540
|
317
|
|
|
317
|
1
|
33785
|
my $self = shift; |
3541
|
317
|
|
|
|
|
5554
|
validate_pos(@_, {type => SCALAR}, {type => UNDEF | SCALAR}, {type => SCALAR}, |
3542
|
|
|
|
|
|
|
{type => UNDEF | HASHREF, optional => 1}); |
3543
|
317
|
|
|
|
|
1630
|
my ($searched_field, $value, $target_field, $arg_opts) = @_; |
3544
|
|
|
|
|
|
|
|
3545
|
317
|
|
|
|
|
991
|
my $r = $self->search($searched_field, $value, $arg_opts); |
3546
|
315
|
50
|
|
|
|
945
|
return undef unless $self->_check_for_search($target_field); |
3547
|
|
|
|
|
|
|
|
3548
|
314
|
|
|
|
|
924
|
my $opts = _get_def($arg_opts, { }); |
3549
|
314
|
100
|
66
|
|
|
989
|
unless (defined($r->[0])) { |
3550
|
|
|
|
|
|
|
return (exists $opts->{'value_if_not_found'} ? $opts->{'value_if_not_found'} : |
3551
|
143
|
100
|
|
|
|
682
|
$self->{'search_value_if_not_found'}); |
3552
|
|
|
|
|
|
|
} elsif (@{$r} >= 2) { |
3553
|
|
|
|
|
|
|
my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'}, |
3554
|
|
|
|
|
|
|
$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS); |
3555
|
|
|
|
|
|
|
return (exists $opts->{'value_if_ambiguous'} ? $opts->{'value_if_ambiguous'} : |
3556
|
|
|
|
|
|
|
$self->{'search_value_if_ambiguous'}) if !$opt_ignore_ambiguous; |
3557
|
|
|
|
|
|
|
} |
3558
|
|
|
|
|
|
|
|
3559
|
144
|
100
|
|
|
|
458
|
return $opts->{value_if_found} if exists $opts->{value_if_found}; |
3560
|
140
|
50
|
|
|
|
394
|
return $self->{search_value_if_found} if exists $opts->{search_value_if_found}; |
3561
|
|
|
|
|
|
|
|
3562
|
140
|
|
|
|
|
449
|
my $hr = $self->get_row_hr($r->[0]); |
3563
|
|
|
|
|
|
|
|
3564
|
140
|
|
|
|
|
730
|
return $hr->{$target_field}; |
3565
|
|
|
|
|
|
|
} |
3566
|
|
|
|
|
|
|
|
3567
|
|
|
|
|
|
|
1; |
3568
|
|
|
|
|
|
|
|
3569
|
|
|
|
|
|
|
__END__ |
3570
|
|
|
|
|
|
|
|
3571
|
|
|
|
|
|
|
=pod |
3572
|
|
|
|
|
|
|
|
3573
|
|
|
|
|
|
|
=encoding UTF-8 |
3574
|
|
|
|
|
|
|
|
3575
|
|
|
|
|
|
|
=head1 NAME |
3576
|
|
|
|
|
|
|
|
3577
|
|
|
|
|
|
|
Text::AutoCSV - helper module to automate the use of Text::CSV |
3578
|
|
|
|
|
|
|
|
3579
|
|
|
|
|
|
|
=head1 VERSION |
3580
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
version 1.1.8 |
3582
|
|
|
|
|
|
|
|
3583
|
|
|
|
|
|
|
=head1 SYNOPSIS |
3584
|
|
|
|
|
|
|
|
3585
|
|
|
|
|
|
|
By default, Text::AutoCSV will detect the following characteristics of the input: |
3586
|
|
|
|
|
|
|
|
3587
|
|
|
|
|
|
|
- The separator, among ",", ";" and "\t" (tab) |
3588
|
|
|
|
|
|
|
|
3589
|
|
|
|
|
|
|
- The escape character, among '"' (double-quote) and '\\' (backslash) |
3590
|
|
|
|
|
|
|
|
3591
|
|
|
|
|
|
|
- Try UTF-8 and if it fails, fall back on latin1 |
3592
|
|
|
|
|
|
|
|
3593
|
|
|
|
|
|
|
- Read the header line and compute field names |
3594
|
|
|
|
|
|
|
|
3595
|
|
|
|
|
|
|
- If asked to (see L</fields_dates_auto>), detect any field that contains a DateTime value, trying |
3596
|
|
|
|
|
|
|
20 date formats, possibly followed by a time (6 time formats tested) |
3597
|
|
|
|
|
|
|
|
3598
|
|
|
|
|
|
|
- If asked to (see L</fields_dates>), detect DateTime format of certain fields, croak if no DateTime |
3599
|
|
|
|
|
|
|
format can be worked out |
3600
|
|
|
|
|
|
|
|
3601
|
|
|
|
|
|
|
- Fields identified as containing a DateTime value (L</fields_dates_auto> or L</fields_dates>) are |
3602
|
|
|
|
|
|
|
stored as DateTime objects by default |
3603
|
|
|
|
|
|
|
|
3604
|
|
|
|
|
|
|
Text::AutoCSV also provides methods to search on fields (using cached hash tables) and it can |
3605
|
|
|
|
|
|
|
populate the value of "remote" fields, made from joining 2 CSV files with a key-value search |
3606
|
|
|
|
|
|
|
|
3607
|
|
|
|
|
|
|
=head2 General |
3608
|
|
|
|
|
|
|
|
3609
|
|
|
|
|
|
|
use Text::AutoCSV; |
3610
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
Text::AutoCSV->new()->write(); # Read CSV data from std input, write to std output |
3612
|
|
|
|
|
|
|
|
3613
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'f.csv')->write(); # Read CSV data from f.csv, write to std output |
3614
|
|
|
|
|
|
|
|
3615
|
|
|
|
|
|
|
# Read CSV data from f.csv, write to g.csv |
3616
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'f.csv', out_file => 'g.csv')->write(); |
3617
|
|
|
|
|
|
|
|
3618
|
|
|
|
|
|
|
# "Rewrite" CSV file by printing out records as a list (separated by line breaks) of field |
3619
|
|
|
|
|
|
|
# name followed by its value. |
3620
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', walker_hr => \&walk); |
3621
|
|
|
|
|
|
|
my @cols = $csv->get_fields_names(); |
3622
|
|
|
|
|
|
|
$csv->read(); |
3623
|
|
|
|
|
|
|
sub walk { |
3624
|
|
|
|
|
|
|
my %rec = %{$_[0]}; |
3625
|
|
|
|
|
|
|
for (@cols) { |
3626
|
|
|
|
|
|
|
next if $_ eq ''; |
3627
|
|
|
|
|
|
|
print("$_ => ", $rec{$_}, "\n"); |
3628
|
|
|
|
|
|
|
} |
3629
|
|
|
|
|
|
|
print("\n"); |
3630
|
|
|
|
|
|
|
} |
3631
|
|
|
|
|
|
|
|
3632
|
|
|
|
|
|
|
=head2 OBJ-ish functions |
3633
|
|
|
|
|
|
|
|
3634
|
|
|
|
|
|
|
# Identify column internal names with more flexibility as the default mechanism |
3635
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv', |
3636
|
|
|
|
|
|
|
fields_hr => {'CITY' => '^(city|town)', 'ZIPCODE' => '^zip(code)?$'}); |
3637
|
|
|
|
|
|
|
# Get zipcode of Claix |
3638
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE'); |
3639
|
|
|
|
|
|
|
|
3640
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv'); |
3641
|
|
|
|
|
|
|
# Get zipcode of Claix |
3642
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE'); |
3643
|
|
|
|
|
|
|
# Same as above, but vlookup is strict for case and spaces around |
3644
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv', search_case => 1, search_trim => 0); |
3645
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', 'Claix', 'ZIPCODE'); |
3646
|
|
|
|
|
|
|
|
3647
|
|
|
|
|
|
|
# Create field 'MYCITY' made by taking pers.csv' ZIP column value, looking it up in the |
3648
|
|
|
|
|
|
|
# ZIPCODE columns of zips.csv, taking CITY colmun value and naming it 'MYCITY'. Output is |
3649
|
|
|
|
|
|
|
# written in std output. |
3650
|
|
|
|
|
|
|
# If a zipcode is ambiguous, say it. |
3651
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv') |
3652
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv', |
3653
|
|
|
|
|
|
|
{ ignore_ambiguous => 0, value_if_ambiguous => '<duplicate zipcode found!>' })->write(); |
3654
|
|
|
|
|
|
|
|
3655
|
|
|
|
|
|
|
# Note the above can also be written using Text::AutoCSV level attributes: |
3656
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', |
3657
|
|
|
|
|
|
|
search_ignore_ambiguous => 0, search_value_if_ambiguous => '<duplicate zipcode found!>') |
3658
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->write(); |
3659
|
|
|
|
|
|
|
|
3660
|
|
|
|
|
|
|
# Create 'MYCITY' field as above, then display some statistics |
3661
|
|
|
|
|
|
|
my $nom_compose = 0; |
3662
|
|
|
|
|
|
|
my $zip_not_found = 0; |
3663
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk) |
3664
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read(); |
3665
|
|
|
|
|
|
|
sub walk { |
3666
|
|
|
|
|
|
|
my $hr = shift; |
3667
|
|
|
|
|
|
|
$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/; |
3668
|
|
|
|
|
|
|
$zip_not_found++ unless defined($hr->{'MYCITY'}); |
3669
|
|
|
|
|
|
|
} |
3670
|
|
|
|
|
|
|
print("Number of persons with a multi-part name: $nom_compose\n"); |
3671
|
|
|
|
|
|
|
print("Number of persons with unknown zipcode: $zip_not_found\n"); |
3672
|
|
|
|
|
|
|
|
3673
|
|
|
|
|
|
|
=head2 Updating |
3674
|
|
|
|
|
|
|
|
3675
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'names.csv', out_file => 'ucnames.csv', |
3676
|
|
|
|
|
|
|
read_post_update_hr => \&updt)->write(); |
3677
|
|
|
|
|
|
|
sub updt { $_[0]->{'LASTNAME'} =~ s/^.*$/\U&/; } |
3678
|
|
|
|
|
|
|
|
3679
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'squares.csv', out_file => 'checkedsquares.csv', |
3680
|
|
|
|
|
|
|
out_filter => \&wf)->write(); |
3681
|
|
|
|
|
|
|
sub wf { return ($_[0]->{'X'} ** 2 == $_[0]->{'SQUAREOFX'}); } |
3682
|
|
|
|
|
|
|
|
3683
|
|
|
|
|
|
|
# Add a field for the full name, made of the concatenation of the |
3684
|
|
|
|
|
|
|
# first name and the last name. |
3685
|
|
|
|
|
|
|
# Also display stats about empty full names. |
3686
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1) |
3687
|
|
|
|
|
|
|
->field_add_computed('FULLNAME', \&calc_fn)->write(); |
3688
|
|
|
|
|
|
|
sub calc_fn { |
3689
|
|
|
|
|
|
|
my ($field, $hr, $stats) = @_; |
3690
|
|
|
|
|
|
|
my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'}); |
3691
|
|
|
|
|
|
|
$stats->{'empty full name'}++ if $fn eq ' '; |
3692
|
|
|
|
|
|
|
return $fn; |
3693
|
|
|
|
|
|
|
} |
3694
|
|
|
|
|
|
|
|
3695
|
|
|
|
|
|
|
# Read a file with a lot of columns and keep only 2 columns in output |
3696
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'big.csv', out_file => 'addr.csv', |
3697
|
|
|
|
|
|
|
out_fields => ['NAME', 'ADDRESS']) |
3698
|
|
|
|
|
|
|
->out_header('ADDRESS', 'Postal Address') |
3699
|
|
|
|
|
|
|
->write(); |
3700
|
|
|
|
|
|
|
|
3701
|
|
|
|
|
|
|
=head2 Datetime management |
3702
|
|
|
|
|
|
|
|
3703
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the |
3704
|
|
|
|
|
|
|
# input format is. |
3705
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
3706
|
|
|
|
|
|
|
out_dates_format => '%F')->write(); |
3707
|
|
|
|
|
|
|
|
3708
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to a US DateTime whatever the |
3709
|
|
|
|
|
|
|
# input format is. |
3710
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
3711
|
|
|
|
|
|
|
out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write(); |
3712
|
|
|
|
|
|
|
|
3713
|
|
|
|
|
|
|
# Find dates of specific formats and convert it into yyyy-mm-dd |
3714
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'raw.csv', out_file => 'cooked.csv', |
3715
|
|
|
|
|
|
|
dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d'], |
3716
|
|
|
|
|
|
|
out_dates_format => '%F')->write(); |
3717
|
|
|
|
|
|
|
|
3718
|
|
|
|
|
|
|
# Take the dates on columns 'LASTLOGIN' and 'CREATIONDATE' and convert it into French dates |
3719
|
|
|
|
|
|
|
# (day/month/year). |
3720
|
|
|
|
|
|
|
# Text::AutoCSV will croak if LASTLOGIN or CREATIONDATE do not contain a DateTime format. |
3721
|
|
|
|
|
|
|
# By default, Text::AutoCSV will try 20 different formats. |
3722
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
3723
|
|
|
|
|
|
|
fields_dates => ['LASTLOGIN', 'CREATIONDATE'], out_dates_format => '%d/%m/%Y')->write(); |
3724
|
|
|
|
|
|
|
|
3725
|
|
|
|
|
|
|
# Convert 2 DateTime fields into unix standard epoch |
3726
|
|
|
|
|
|
|
# Write -1 if DateTime is empty. |
3727
|
|
|
|
|
|
|
sub toepoch { return $_->epoch() if $_; -1; } |
3728
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'stats.csv', out_file => 'stats-epoch.csv', |
3729
|
|
|
|
|
|
|
fields_dates => ['ATIME', 'MTIME']) |
3730
|
|
|
|
|
|
|
->in_map('ATIME', \&toepoch) |
3731
|
|
|
|
|
|
|
->in_map('MTIME', \&toepoch) |
3732
|
|
|
|
|
|
|
->write(); |
3733
|
|
|
|
|
|
|
|
3734
|
|
|
|
|
|
|
# Do the other way round from above: convert 2 fields containing unix standard epoch into a |
3735
|
|
|
|
|
|
|
# string displaying a human-readable DateTime. |
3736
|
|
|
|
|
|
|
my $formatter = DateTime::Format::Strptime->new(pattern => 'DATE=%F, TIME=%T'); |
3737
|
|
|
|
|
|
|
sub fromepoch { |
3738
|
|
|
|
|
|
|
return $formatter->format_datetime(DateTime->from_epoch(epoch => $_)) if $_ >= 0; |
3739
|
|
|
|
|
|
|
''; |
3740
|
|
|
|
|
|
|
} |
3741
|
|
|
|
|
|
|
$csv = Text::AutoCSV->new(in_file => 'stats-epoch.csv', out_file => 'stats2.csv') |
3742
|
|
|
|
|
|
|
->in_map('ATIME', \&fromepoch) |
3743
|
|
|
|
|
|
|
->in_map('MTIME', \&fromepoch) |
3744
|
|
|
|
|
|
|
->write(); |
3745
|
|
|
|
|
|
|
|
3746
|
|
|
|
|
|
|
=head2 Miscellaneous |
3747
|
|
|
|
|
|
|
|
3748
|
|
|
|
|
|
|
use Text::AutoCSV 'remove_accents'; |
3749
|
|
|
|
|
|
|
# Output 'Francais: etre elementaire, Tcheque: sluzba dum' followed by a new line. |
3750
|
|
|
|
|
|
|
print remove_accents("Français: être élémentaire, Tchèque: služba dům"), "\n"; |
3751
|
|
|
|
|
|
|
|
3752
|
|
|
|
|
|
|
=for Pod::Coverage ERR_UNKNOWN_FIELD |
3753
|
|
|
|
|
|
|
|
3754
|
|
|
|
|
|
|
=head1 NAME |
3755
|
|
|
|
|
|
|
|
3756
|
|
|
|
|
|
|
Text::AutoCSV - helper module to automate the use of Text::CSV |
3757
|
|
|
|
|
|
|
|
3758
|
|
|
|
|
|
|
=head1 METHODS |
3759
|
|
|
|
|
|
|
|
3760
|
|
|
|
|
|
|
=head2 new |
3761
|
|
|
|
|
|
|
|
3762
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(%attr); |
3763
|
|
|
|
|
|
|
|
3764
|
|
|
|
|
|
|
(Class method) Returns a new instance of Text::AutoCSV. The object attributes are described by the |
3765
|
|
|
|
|
|
|
hash C<%attr> (can be empty). |
3766
|
|
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
Currently the following attributes are available: |
3768
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
=over 4 |
3770
|
|
|
|
|
|
|
|
3771
|
|
|
|
|
|
|
=item Preliminary note about L</fields_hr>, L</fields_ar> and L</fields_column_names> attributes |
3772
|
|
|
|
|
|
|
|
3773
|
|
|
|
|
|
|
By default, Text::AutoCSV assumes the input has a header and will use the field values of this first |
3774
|
|
|
|
|
|
|
line (the header) to work out the column internal names. These internal names are used everywhere in |
3775
|
|
|
|
|
|
|
Text::AutoCSV to designate columns. |
3776
|
|
|
|
|
|
|
|
3777
|
|
|
|
|
|
|
The values are transformed as follows: |
3778
|
|
|
|
|
|
|
|
3779
|
|
|
|
|
|
|
- All accents are removed using the exportable function L</remove_accents>. |
3780
|
|
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
- Any non-alphanumeric character is removed (except underscore) and all letters are switched to |
3782
|
|
|
|
|
|
|
upper case. The regex to do this is |
3783
|
|
|
|
|
|
|
|
3784
|
|
|
|
|
|
|
s/[^[:alnum:]_]//gi; s/^.*$/\U$&/; |
3785
|
|
|
|
|
|
|
|
3786
|
|
|
|
|
|
|
Thus a header line of |
3787
|
|
|
|
|
|
|
|
3788
|
|
|
|
|
|
|
'Office Number 1,Office_2,Personal Number' |
3789
|
|
|
|
|
|
|
|
3790
|
|
|
|
|
|
|
will produce the internal column names |
3791
|
|
|
|
|
|
|
|
3792
|
|
|
|
|
|
|
'OFFICENUMBER1' (first column) |
3793
|
|
|
|
|
|
|
|
3794
|
|
|
|
|
|
|
'OFFICE_2' (second column) |
3795
|
|
|
|
|
|
|
|
3796
|
|
|
|
|
|
|
'PERSONALNUMBER' (third column). |
3797
|
|
|
|
|
|
|
|
3798
|
|
|
|
|
|
|
The attribute L</fields_hr>, L</fields_ar> or L</fields_column_names> (only one of the three is |
3799
|
|
|
|
|
|
|
useful at a time) allows to change this behavior. |
3800
|
|
|
|
|
|
|
|
3801
|
|
|
|
|
|
|
B<NOTE> |
3802
|
|
|
|
|
|
|
|
3803
|
|
|
|
|
|
|
The removal of accents is *not* a conversion to us-ascii, see L</remove_accents> for details. |
3804
|
|
|
|
|
|
|
|
3805
|
|
|
|
|
|
|
=item Preliminary note about fields reading |
3806
|
|
|
|
|
|
|
|
3807
|
|
|
|
|
|
|
Functions that are given a field name (L</get_cell>, L</vlookup>, L</field_add_copy>, ...) raise an |
3808
|
|
|
|
|
|
|
error if the field requested does not exist. |
3809
|
|
|
|
|
|
|
|
3810
|
|
|
|
|
|
|
B<SO WILL THE HASHREFS GIVEN BY Text::AutoCSV:> when a function returns a hashref (L</search_1hr>, |
3811
|
|
|
|
|
|
|
L</get_row_hr>, ...), the hash is locked with the C<lock_keys> function of C<Hash::Util>. Any |
3812
|
|
|
|
|
|
|
attempt to read a non-existing key from the hash causes a croak. This feature is de-activated if you |
3813
|
|
|
|
|
|
|
specified C<croak_if_error =E<gt> 0> when creating Text::AutoCSV object. |
3814
|
|
|
|
|
|
|
|
3815
|
|
|
|
|
|
|
=item in_file |
3816
|
|
|
|
|
|
|
|
3817
|
|
|
|
|
|
|
The name of the file to read CSV data from. |
3818
|
|
|
|
|
|
|
|
3819
|
|
|
|
|
|
|
If not specified or empty, read standard input. |
3820
|
|
|
|
|
|
|
|
3821
|
|
|
|
|
|
|
Example: |
3822
|
|
|
|
|
|
|
|
3823
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv'); |
3824
|
|
|
|
|
|
|
|
3825
|
|
|
|
|
|
|
=item inh |
3826
|
|
|
|
|
|
|
|
3827
|
|
|
|
|
|
|
File handle to read CSV data from. |
3828
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
3829
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
C<inh> is useful if you don't like the way Text::AutoCSV opens the input file for you. |
3831
|
|
|
|
|
|
|
|
3832
|
|
|
|
|
|
|
Example: |
3833
|
|
|
|
|
|
|
|
3834
|
|
|
|
|
|
|
open my $inh, "producecsv.sh|"; |
3835
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(inh => $inh); |
3836
|
|
|
|
|
|
|
|
3837
|
|
|
|
|
|
|
=item encoding |
3838
|
|
|
|
|
|
|
|
3839
|
|
|
|
|
|
|
Comma-separated list of encodings to try to read input. |
3840
|
|
|
|
|
|
|
|
3841
|
|
|
|
|
|
|
Note that finding the correct encoding of any given input is overkill. This script just tries |
3842
|
|
|
|
|
|
|
encodings one after the other, and selects the first one that does not trigger a warning during |
3843
|
|
|
|
|
|
|
reading of input. If all produce warnings, select the first one. |
3844
|
|
|
|
|
|
|
|
3845
|
|
|
|
|
|
|
The encoding chosen is used in output, unless attribute L</out_encoding> is specified. |
3846
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
Value by default: 'UTF-8,latin1' |
3848
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
B<IMPORTANT> |
3850
|
|
|
|
|
|
|
|
3851
|
|
|
|
|
|
|
If one tries something like C<encoding =E<gt> 'latin1,UTF-8'>, it'll almost never detect UTF-8 |
3852
|
|
|
|
|
|
|
because latin1 rarely triggers warnings during reading. It tends to be also true with encodings like |
3853
|
|
|
|
|
|
|
UTF-16 that can remain happy with various inputs (sometimes resulting in Western languages turned |
3854
|
|
|
|
|
|
|
into Chinese text). |
3855
|
|
|
|
|
|
|
|
3856
|
|
|
|
|
|
|
Ultimately this attribute should be used with a unique value. The result when using more than one |
3857
|
|
|
|
|
|
|
value can produce weird results and should be considered B<experimental>. |
3858
|
|
|
|
|
|
|
|
3859
|
|
|
|
|
|
|
Example: |
3860
|
|
|
|
|
|
|
|
3861
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'w.csv', encoding => 'UTF-16'); |
3862
|
|
|
|
|
|
|
|
3863
|
|
|
|
|
|
|
=item via |
3864
|
|
|
|
|
|
|
|
3865
|
|
|
|
|
|
|
Adds a C<via> to the file opening instruction performed by Text::AutoCSV. You don't want to use it |
3866
|
|
|
|
|
|
|
under normal circumstances. |
3867
|
|
|
|
|
|
|
|
3868
|
|
|
|
|
|
|
The value should start with a ':' character (Text::AutoCSV won't add one for you). |
3869
|
|
|
|
|
|
|
|
3870
|
|
|
|
|
|
|
Value by default: none |
3871
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
Example: |
3873
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', via => ':raw:perlio:UTF-32:crlf'); |
3875
|
|
|
|
|
|
|
|
3876
|
|
|
|
|
|
|
=item dont_mess_with_encoding |
3877
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
If true, just ignore completely encoding and don't try to alter I/O operations with encoding |
3879
|
|
|
|
|
|
|
considerations (using C<binmode> instruction). Note that if inh attribute is specified, then |
3880
|
|
|
|
|
|
|
Text::AutoCSV will consider the caller manages encoding for himself and dont_mess_with_encoding will |
3881
|
|
|
|
|
|
|
be automatically set, too. |
3882
|
|
|
|
|
|
|
|
3883
|
|
|
|
|
|
|
B<IMPORTANT> |
3884
|
|
|
|
|
|
|
|
3885
|
|
|
|
|
|
|
This attribute does not mean perl will totally ignore encoding and would consider character strings |
3886
|
|
|
|
|
|
|
as bytes for example. The meaning of L</dont_mess_with_encoding> is that Text::AutoCSV itself will |
3887
|
|
|
|
|
|
|
totally ignore encoding matters, and leave it entirely to Perl' default. |
3888
|
|
|
|
|
|
|
|
3889
|
|
|
|
|
|
|
Value by default: |
3890
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
0 if inh attribute is not set |
3892
|
|
|
|
|
|
|
1 if inh attribute is set |
3893
|
|
|
|
|
|
|
|
3894
|
|
|
|
|
|
|
Example: |
3895
|
|
|
|
|
|
|
|
3896
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', dont_mess_with_encoding => 1); |
3897
|
|
|
|
|
|
|
|
3898
|
|
|
|
|
|
|
=item sep_char |
3899
|
|
|
|
|
|
|
|
3900
|
|
|
|
|
|
|
Specify the CSV separator character. Turns off separator auto-detection. This attribute is passed as |
3901
|
|
|
|
|
|
|
is to C<Text::CSV-E<gt>new()>. |
3902
|
|
|
|
|
|
|
|
3903
|
|
|
|
|
|
|
Example: |
3904
|
|
|
|
|
|
|
|
3905
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', sep_char => ';'); |
3906
|
|
|
|
|
|
|
|
3907
|
|
|
|
|
|
|
=item quote_char |
3908
|
|
|
|
|
|
|
|
3909
|
|
|
|
|
|
|
Specify the field quote character. This attribute is passed as is to C<Text::CSV-E<gt>new()>. |
3910
|
|
|
|
|
|
|
|
3911
|
|
|
|
|
|
|
Value by default: double quote ('"') |
3912
|
|
|
|
|
|
|
|
3913
|
|
|
|
|
|
|
Example: |
3914
|
|
|
|
|
|
|
|
3915
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', quote_char => '\''); |
3916
|
|
|
|
|
|
|
|
3917
|
|
|
|
|
|
|
=item escape_char |
3918
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
Specify the escape character. Turns off escape character auto-detection. This attribute is passed as |
3920
|
|
|
|
|
|
|
is to C<Text::CSV-E<gt>new()>. |
3921
|
|
|
|
|
|
|
|
3922
|
|
|
|
|
|
|
Value by default: backslash ('\\') |
3923
|
|
|
|
|
|
|
|
3924
|
|
|
|
|
|
|
Example: |
3925
|
|
|
|
|
|
|
|
3926
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', escape_char => '"'); |
3927
|
|
|
|
|
|
|
|
3928
|
|
|
|
|
|
|
=item in_csvobj |
3929
|
|
|
|
|
|
|
|
3930
|
|
|
|
|
|
|
Text::CSV object to use. |
3931
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
3932
|
|
|
|
|
|
|
|
3933
|
|
|
|
|
|
|
By default, Text::AutoCSV will manage creating such an object and will work hard to detect the |
3934
|
|
|
|
|
|
|
parameters it requires. |
3935
|
|
|
|
|
|
|
|
3936
|
|
|
|
|
|
|
Defining C<in_csvobj> attribute turns off separator character and escape character auto-detection. |
3937
|
|
|
|
|
|
|
|
3938
|
|
|
|
|
|
|
Using this attribute workarounds Text::AutoCSV philosophy a bit, but you may need it in case |
3939
|
|
|
|
|
|
|
Text::AutoCSV behavior is not suitable for Text::CSV creation. |
3940
|
|
|
|
|
|
|
|
3941
|
|
|
|
|
|
|
Example: |
3942
|
|
|
|
|
|
|
|
3943
|
|
|
|
|
|
|
my $tcsv = Text::CSV->new(); |
3944
|
|
|
|
|
|
|
my $acsv = Text::AutoCSV->new(in_file => 'in.csv', in_csvobj => $tcsv); |
3945
|
|
|
|
|
|
|
|
3946
|
|
|
|
|
|
|
=item has_headers |
3947
|
|
|
|
|
|
|
|
3948
|
|
|
|
|
|
|
If true, Text::AutoCSV assumes the input has a header line. |
3949
|
|
|
|
|
|
|
|
3950
|
|
|
|
|
|
|
Value by default: 1 |
3951
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
Example: |
3953
|
|
|
|
|
|
|
|
3954
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0); |
3955
|
|
|
|
|
|
|
|
3956
|
|
|
|
|
|
|
=item fields_hr |
3957
|
|
|
|
|
|
|
|
3958
|
|
|
|
|
|
|
(Only if input has a header line) Hash ref that contains column internal names along with a regular |
3959
|
|
|
|
|
|
|
expression to find it in the header line. |
3960
|
|
|
|
|
|
|
For example if you have: |
3961
|
|
|
|
|
|
|
|
3962
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
3963
|
|
|
|
|
|
|
fields_hr => {'PHONE OFFICE' => '^office phone nu', |
3964
|
|
|
|
|
|
|
'PHONE PERSONAL' => '^personal phone nu'}); |
3965
|
|
|
|
|
|
|
|
3966
|
|
|
|
|
|
|
And the header line is |
3967
|
|
|
|
|
|
|
|
3968
|
|
|
|
|
|
|
'Personal Phone Number,Office Phone Number' |
3969
|
|
|
|
|
|
|
|
3970
|
|
|
|
|
|
|
the column name 'PHONE OFFICE' will designate the second column and the column name 'PHONE PERSONAL' |
3971
|
|
|
|
|
|
|
will designate the first column. |
3972
|
|
|
|
|
|
|
|
3973
|
|
|
|
|
|
|
You can choose column names like 'Phone Office' and 'Phone Personal' as well. |
3974
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
The regex search is case insensitive. |
3976
|
|
|
|
|
|
|
|
3977
|
|
|
|
|
|
|
=item fields_ar |
3978
|
|
|
|
|
|
|
|
3979
|
|
|
|
|
|
|
(Only if input has a header line) Array ref that contains column internal names. The array is used |
3980
|
|
|
|
|
|
|
to create a hash ref of the same kind as L</fields_hr>, by wrapping the column name in a regex. The |
3981
|
|
|
|
|
|
|
names are surrounded by a leading '^' and a trailing '$', meaning, the name must match the entire |
3982
|
|
|
|
|
|
|
field name. |
3983
|
|
|
|
|
|
|
|
3984
|
|
|
|
|
|
|
For example |
3985
|
|
|
|
|
|
|
|
3986
|
|
|
|
|
|
|
fields_ar => ['OFFICENUMBER', 'PERSONALNUMBER'] |
3987
|
|
|
|
|
|
|
|
3988
|
|
|
|
|
|
|
is strictly equivalent to |
3989
|
|
|
|
|
|
|
|
3990
|
|
|
|
|
|
|
fields_hr => {'OFFICENUMBER' => '^officenumber$', 'PERSONALNUMBER' = '^personalnumber$'} |
3991
|
|
|
|
|
|
|
|
3992
|
|
|
|
|
|
|
The regex search is case insensitive. |
3993
|
|
|
|
|
|
|
|
3994
|
|
|
|
|
|
|
C<fields_ar> is useful if the internal names are identical to the file column names. It avoids |
3995
|
|
|
|
|
|
|
repeating the names over and over as would happen if using L</fields_hr> attribute. |
3996
|
|
|
|
|
|
|
|
3997
|
|
|
|
|
|
|
I<NOTE> |
3998
|
|
|
|
|
|
|
|
3999
|
|
|
|
|
|
|
You might wonder why using fields_ar as opposed to Text::AutoCSV default' mechanism. There are two |
4000
|
|
|
|
|
|
|
reasons for that: |
4001
|
|
|
|
|
|
|
|
4002
|
|
|
|
|
|
|
1- Text::AutoCSV removes spaces from column names, and some people may want another behavior. A |
4003
|
|
|
|
|
|
|
header name of 'Phone Number' will get an internal column name of 'PHONENUMBER' (default behavior, |
4004
|
|
|
|
|
|
|
if none of fields_hr, fields_ar and fields_column_names attributes is specified), and one may prefer |
4005
|
|
|
|
|
|
|
'PHONE NUMBER' or 'phone number' or whatsoever. |
4006
|
|
|
|
|
|
|
|
4007
|
|
|
|
|
|
|
2- By specifying a list of columns using either of fields_hr or fields_ar, you not only map column |
4008
|
|
|
|
|
|
|
names as found in the header line to internal column names: you also I<request> these columns to be |
4009
|
|
|
|
|
|
|
available. If one of the requested columns cannot be found, Text::AutoCSV will croak (default) or |
4010
|
|
|
|
|
|
|
print an error and return an undef object (if created with C<croak_if_error =E<gt> 0>). |
4011
|
|
|
|
|
|
|
|
4012
|
|
|
|
|
|
|
=item fields_column_names |
4013
|
|
|
|
|
|
|
|
4014
|
|
|
|
|
|
|
Array ref of column internal names, in the order of columns in file. This attribute works like the |
4015
|
|
|
|
|
|
|
C<column_names> attribute of Text::CSV. It'll just assign names to columns one by one, regardless of |
4016
|
|
|
|
|
|
|
what the header line contains. It'll work also if the file has no header line. |
4017
|
|
|
|
|
|
|
|
4018
|
|
|
|
|
|
|
Example: |
4019
|
|
|
|
|
|
|
|
4020
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4021
|
|
|
|
|
|
|
fields_column_names => ['My COL1', '', 'My COL3']); |
4022
|
|
|
|
|
|
|
|
4023
|
|
|
|
|
|
|
=item out_file |
4024
|
|
|
|
|
|
|
|
4025
|
|
|
|
|
|
|
Output file when executing the L</write> method. |
4026
|
|
|
|
|
|
|
|
4027
|
|
|
|
|
|
|
If not specified or empty, write to standard output. |
4028
|
|
|
|
|
|
|
|
4029
|
|
|
|
|
|
|
Example: |
4030
|
|
|
|
|
|
|
|
4031
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv'); |
4032
|
|
|
|
|
|
|
|
4033
|
|
|
|
|
|
|
=item outh |
4034
|
|
|
|
|
|
|
|
4035
|
|
|
|
|
|
|
File handle to write CSV data to when executing the L</write> method. |
4036
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
4037
|
|
|
|
|
|
|
|
4038
|
|
|
|
|
|
|
C<outh> is useful if you don't like the way Text::AutoCSV opens the output file for you. |
4039
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
Example: |
4041
|
|
|
|
|
|
|
|
4042
|
|
|
|
|
|
|
my $outh = open "myin.csv', ">>"; |
4043
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0, outh => $outh); |
4044
|
|
|
|
|
|
|
|
4045
|
|
|
|
|
|
|
=item out_encoding |
4046
|
|
|
|
|
|
|
|
4047
|
|
|
|
|
|
|
Enforce the encoding of output. |
4048
|
|
|
|
|
|
|
|
4049
|
|
|
|
|
|
|
Value by default: input encoding |
4050
|
|
|
|
|
|
|
|
4051
|
|
|
|
|
|
|
Example: |
4052
|
|
|
|
|
|
|
|
4053
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4054
|
|
|
|
|
|
|
out_encoding => 'UTF-16'); |
4055
|
|
|
|
|
|
|
|
4056
|
|
|
|
|
|
|
=item out_utf8_bom |
4057
|
|
|
|
|
|
|
|
4058
|
|
|
|
|
|
|
Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is not UTF-8, this |
4059
|
|
|
|
|
|
|
attribute is ignored. |
4060
|
|
|
|
|
|
|
|
4061
|
|
|
|
|
|
|
B<NOTE> |
4062
|
|
|
|
|
|
|
|
4063
|
|
|
|
|
|
|
UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice, UTF8-encoded files rarely |
4064
|
|
|
|
|
|
|
have a BOM. |
4065
|
|
|
|
|
|
|
|
4066
|
|
|
|
|
|
|
Using this attribute is not recommended. It is provided for the sake of completeness, and also to |
4067
|
|
|
|
|
|
|
produce Unicode files Microsoft EXCEL will be happy to read. |
4068
|
|
|
|
|
|
|
|
4069
|
|
|
|
|
|
|
At first sight it would seem more logical to make EXCEL happy with something like this: |
4070
|
|
|
|
|
|
|
|
4071
|
|
|
|
|
|
|
out_encoding => 'UTF-16' |
4072
|
|
|
|
|
|
|
|
4073
|
|
|
|
|
|
|
But... While EXCEL will identify UTF-16 and read it as such, it will not take into account the BOM |
4074
|
|
|
|
|
|
|
found at the beginning. In the end the first cell will have 2 useless characters prepended. The only |
4075
|
|
|
|
|
|
|
solution the author knows to workaround this issue if to use UTF-8 as output encoding, and enforce a |
4076
|
|
|
|
|
|
|
BOM. That is, use: |
4077
|
|
|
|
|
|
|
|
4078
|
|
|
|
|
|
|
..., out_encoding => 'UTF-8', out_utf8_bom => 1, ... |
4079
|
|
|
|
|
|
|
|
4080
|
|
|
|
|
|
|
=item out_sep_char |
4081
|
|
|
|
|
|
|
|
4082
|
|
|
|
|
|
|
Enforce the output CSV separator character. |
4083
|
|
|
|
|
|
|
|
4084
|
|
|
|
|
|
|
Value by default: input separator |
4085
|
|
|
|
|
|
|
|
4086
|
|
|
|
|
|
|
Example: |
4087
|
|
|
|
|
|
|
|
4088
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_sep_char => ','); |
4089
|
|
|
|
|
|
|
|
4090
|
|
|
|
|
|
|
=item out_quote_char |
4091
|
|
|
|
|
|
|
|
4092
|
|
|
|
|
|
|
Enforce the output CSV quote character. |
4093
|
|
|
|
|
|
|
|
4094
|
|
|
|
|
|
|
Value by default: input quote character |
4095
|
|
|
|
|
|
|
|
4096
|
|
|
|
|
|
|
Example: |
4097
|
|
|
|
|
|
|
|
4098
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_quote_char => '"'); |
4099
|
|
|
|
|
|
|
|
4100
|
|
|
|
|
|
|
=item out_escape_char |
4101
|
|
|
|
|
|
|
|
4102
|
|
|
|
|
|
|
Enforce the output CSV escape character. |
4103
|
|
|
|
|
|
|
|
4104
|
|
|
|
|
|
|
Value by default: input escape character |
4105
|
|
|
|
|
|
|
|
4106
|
|
|
|
|
|
|
Example: |
4107
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4109
|
|
|
|
|
|
|
out_escape_char_char => '\\'); |
4110
|
|
|
|
|
|
|
|
4111
|
|
|
|
|
|
|
=item out_always_quote |
4112
|
|
|
|
|
|
|
|
4113
|
|
|
|
|
|
|
If true, quote all fields of output (set always_quote of Text::CSV). |
4114
|
|
|
|
|
|
|
|
4115
|
|
|
|
|
|
|
If false, don't quote all fields of output (don't set C<always_quote> of Text::CSV). |
4116
|
|
|
|
|
|
|
|
4117
|
|
|
|
|
|
|
Value by default: same as what is found in input |
4118
|
|
|
|
|
|
|
|
4119
|
|
|
|
|
|
|
While reading input, Text::AutoCSV works out whether or not all fields were quoted. If yes, then the |
4120
|
|
|
|
|
|
|
output Text::CSV object has the always_quote attribute set, if no, then the output Text::CSV object |
4121
|
|
|
|
|
|
|
does not have this attribute set. |
4122
|
|
|
|
|
|
|
|
4123
|
|
|
|
|
|
|
Example: |
4124
|
|
|
|
|
|
|
|
4125
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_always_quote => 1); |
4126
|
|
|
|
|
|
|
|
4127
|
|
|
|
|
|
|
=item out_has_headers |
4128
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
If true, when writing output, write a header line on first line. |
4130
|
|
|
|
|
|
|
|
4131
|
|
|
|
|
|
|
If false, when writing output, don't write a header line on first line. |
4132
|
|
|
|
|
|
|
|
4133
|
|
|
|
|
|
|
Value by default: same as has_headers attribute |
4134
|
|
|
|
|
|
|
|
4135
|
|
|
|
|
|
|
Example 1 |
4136
|
|
|
|
|
|
|
|
4137
|
|
|
|
|
|
|
Read standard input and write to standard output, removing the header line. |
4138
|
|
|
|
|
|
|
|
4139
|
|
|
|
|
|
|
Text::AutoCSV->new(out_has_headers => 0)->write(); |
4140
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
Example 2 |
4142
|
|
|
|
|
|
|
|
4143
|
|
|
|
|
|
|
Read standard input and write to standard output, adding a header line. |
4144
|
|
|
|
|
|
|
|
4145
|
|
|
|
|
|
|
Text::AutoCSV->new(fields_column_names => ['MYCOL1', 'MYCOL2'], out_has_headers => 1)->write(); |
4146
|
|
|
|
|
|
|
|
4147
|
|
|
|
|
|
|
=item no_undef |
4148
|
|
|
|
|
|
|
|
4149
|
|
|
|
|
|
|
If true, non-existent column values are set to an empty string instead of undef. It is also done on |
4150
|
|
|
|
|
|
|
extra fields that happen to have an undef value (for example when the target of a linked field is |
4151
|
|
|
|
|
|
|
not found). |
4152
|
|
|
|
|
|
|
|
4153
|
|
|
|
|
|
|
Note this attribute does not work on callback functions output set with L</in_map>: for example |
4154
|
|
|
|
|
|
|
empty DateTime values (on fields identified as containing a date/time, see C<dates_*> attributes |
4155
|
|
|
|
|
|
|
below) are set to C<undef>, even while C<no_undef> is set. Indeed setting it to an empty string |
4156
|
|
|
|
|
|
|
while non-empty values would contain a Datetime object would not be clean. An empty value in a |
4157
|
|
|
|
|
|
|
placeholder containing an object must be undef. |
4158
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
Since version 1.1.5 of Text::AutoCSV, C<no_undef> is examined when sending parameter ($_) to |
4160
|
|
|
|
|
|
|
L</in_map> callback: an undef value is now passed as is (as undef), unless C<no_undef> is set. If |
4161
|
|
|
|
|
|
|
C<no_undef> is set, and field value is undef, then $_ is set to the empty string ('') when calling |
4162
|
|
|
|
|
|
|
callback defined by L</in_map>. This new behavior was put in place to be consistent with what is |
4163
|
|
|
|
|
|
|
being done with DateTime values. |
4164
|
|
|
|
|
|
|
|
4165
|
|
|
|
|
|
|
Value by default: 0 |
4166
|
|
|
|
|
|
|
|
4167
|
|
|
|
|
|
|
Example: |
4168
|
|
|
|
|
|
|
|
4169
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', no_undef => 1); |
4170
|
|
|
|
|
|
|
|
4171
|
|
|
|
|
|
|
=item read_post_update_hr |
4172
|
|
|
|
|
|
|
|
4173
|
|
|
|
|
|
|
To be set to a ref sub. Each time a record is read from input, call C<read_post_update_hr> to update |
4174
|
|
|
|
|
|
|
the hash ref of the record. The sub is called with 2 arguments: the hash ref to the record value and |
4175
|
|
|
|
|
|
|
the hash ref to stats. |
4176
|
|
|
|
|
|
|
|
4177
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is |
4178
|
|
|
|
|
|
|
called in verbose mode (C<verbose =E<gt> 1>). |
4179
|
|
|
|
|
|
|
|
4180
|
|
|
|
|
|
|
For example, the C<read_post_update_hr> below will turn column 'CITY' values in upper case and count |
4181
|
|
|
|
|
|
|
occurences of empty cities in stat display: |
4182
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'addresses.csv', read_post_update_hr => \&updt, verbose => 1) |
4184
|
|
|
|
|
|
|
->write(); |
4185
|
|
|
|
|
|
|
sub updt { |
4186
|
|
|
|
|
|
|
my ($hr, $stats) = @_; |
4187
|
|
|
|
|
|
|
$hr->{'CITY'} =~ s/^.*$/\U$&/; |
4188
|
|
|
|
|
|
|
$stats->{'empty city encountered'}++ if $hr->{'CITY'} eq ''; |
4189
|
|
|
|
|
|
|
} |
4190
|
|
|
|
|
|
|
|
4191
|
|
|
|
|
|
|
B<IMPORTANT> |
4192
|
|
|
|
|
|
|
|
4193
|
|
|
|
|
|
|
You cannot create a field this way. To create a field, you have to use the member functions |
4194
|
|
|
|
|
|
|
L</field_add_link>, L</field_add_copy> or L</field_add_computed>. |
4195
|
|
|
|
|
|
|
|
4196
|
|
|
|
|
|
|
B<NOTE> |
4197
|
|
|
|
|
|
|
|
4198
|
|
|
|
|
|
|
If you wish to manage some updates at field level, consider registering update functions with |
4199
|
|
|
|
|
|
|
L</in_map> and L</out_map> member functions. These functions register callbacks that work at field |
4200
|
|
|
|
|
|
|
level and with $_ variable (thus the callback function invoked is AutoCSV-agnostic). |
4201
|
|
|
|
|
|
|
|
4202
|
|
|
|
|
|
|
L</in_map> updates a field after read, L</out_map> updates the field content before writing it. |
4203
|
|
|
|
|
|
|
|
4204
|
|
|
|
|
|
|
=item walker_hr |
4205
|
|
|
|
|
|
|
|
4206
|
|
|
|
|
|
|
To set to a sub ref that'll be executed each time a record is read from input. It is executed after |
4207
|
|
|
|
|
|
|
L</read_post_update_hr>. The sub is called with 2 arguments: the hash ref to the record value and |
4208
|
|
|
|
|
|
|
the hash ref to stats. |
4209
|
|
|
|
|
|
|
|
4210
|
|
|
|
|
|
|
Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas |
4211
|
|
|
|
|
|
|
L</walker_hr> is read-only. |
4212
|
|
|
|
|
|
|
|
4213
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is |
4214
|
|
|
|
|
|
|
called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are |
4215
|
|
|
|
|
|
|
not displayed, however you can get stats by calling the get_stats function. |
4216
|
|
|
|
|
|
|
|
4217
|
|
|
|
|
|
|
The example below will count in the stats the number of records where the 'CITY' field is empty. |
4218
|
|
|
|
|
|
|
Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed. |
4219
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_hr => \&walk1, |
4221
|
|
|
|
|
|
|
verbose => 1)->read(); |
4222
|
|
|
|
|
|
|
sub walk1 { |
4223
|
|
|
|
|
|
|
my ($hr, $stats) = @_; |
4224
|
|
|
|
|
|
|
$stats->{'empty city'}++ if $hr->{'CITY'} eq ''; |
4225
|
|
|
|
|
|
|
} |
4226
|
|
|
|
|
|
|
|
4227
|
|
|
|
|
|
|
=item walker_ar |
4228
|
|
|
|
|
|
|
|
4229
|
|
|
|
|
|
|
To set to a sub ref that'll be executed each time a record is read from input. It is executed after |
4230
|
|
|
|
|
|
|
L</read_post_update_hr>. The sub is called with 2 arguments: the array ref to the record value and |
4231
|
|
|
|
|
|
|
the hash ref to stats. |
4232
|
|
|
|
|
|
|
|
4233
|
|
|
|
|
|
|
Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas |
4234
|
|
|
|
|
|
|
C<walker_hr> is read-only. |
4235
|
|
|
|
|
|
|
|
4236
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is |
4237
|
|
|
|
|
|
|
called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are |
4238
|
|
|
|
|
|
|
lost. |
4239
|
|
|
|
|
|
|
|
4240
|
|
|
|
|
|
|
The array ref contains values in their natural order in the CSV. To be used with the column names, |
4241
|
|
|
|
|
|
|
you have to use L</get_fields_names> member function. |
4242
|
|
|
|
|
|
|
|
4243
|
|
|
|
|
|
|
The example below will count in the stats the number of records where the 'CITY' field is empty. |
4244
|
|
|
|
|
|
|
Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed. It produces |
4245
|
|
|
|
|
|
|
the exact same result as the example in walker_hr attribute, but it uses walker_ar. |
4246
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
use List::MoreUtils qw(first_index); |
4248
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_ar => \&walk2, verbose => 1); |
4249
|
|
|
|
|
|
|
my @cols = $csv->get_fields_names(); |
4250
|
|
|
|
|
|
|
my $idxCITY = first_index { /^city$/i } @cols; |
4251
|
|
|
|
|
|
|
die "No city field!??" if $idxCITY < 0; |
4252
|
|
|
|
|
|
|
$csv->read(); |
4253
|
|
|
|
|
|
|
sub walk2 { |
4254
|
|
|
|
|
|
|
my ($ar, $stats) = @_; |
4255
|
|
|
|
|
|
|
$stats->{'empty city'}++ if $ar->[$idxCITY] eq ''; |
4256
|
|
|
|
|
|
|
} |
4257
|
|
|
|
|
|
|
|
4258
|
|
|
|
|
|
|
=item write_filter_hr |
4259
|
|
|
|
|
|
|
|
4260
|
|
|
|
|
|
|
Alias of L</out_filter>. |
4261
|
|
|
|
|
|
|
|
4262
|
|
|
|
|
|
|
=item out_filter |
4263
|
|
|
|
|
|
|
|
4264
|
|
|
|
|
|
|
To be set to a ref sub. Before writing a record to output, C<out_filter> is called and the record |
4265
|
|
|
|
|
|
|
gets writen only if C<out_filter> return value is true. The sub is called with 1 argument: the hash |
4266
|
|
|
|
|
|
|
ref to the record value. |
4267
|
|
|
|
|
|
|
|
4268
|
|
|
|
|
|
|
For example, if you want to output only records where the 'CITY' column value is Grenoble: |
4269
|
|
|
|
|
|
|
|
4270
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'addresses.csv', out_file => 'grenoble.csv', |
4271
|
|
|
|
|
|
|
out_filter => \&filt)->write(); |
4272
|
|
|
|
|
|
|
sub filt { |
4273
|
|
|
|
|
|
|
my $hr = shift; |
4274
|
|
|
|
|
|
|
return 1 if $hr->{'CITY'} =~ /^grenoble$/i; |
4275
|
|
|
|
|
|
|
return 0; |
4276
|
|
|
|
|
|
|
} |
4277
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
=item write_fields |
4279
|
|
|
|
|
|
|
|
4280
|
|
|
|
|
|
|
Alias of L</out_fields>. |
4281
|
|
|
|
|
|
|
|
4282
|
|
|
|
|
|
|
=item out_fields |
4283
|
|
|
|
|
|
|
|
4284
|
|
|
|
|
|
|
Set to an array ref. List fields to write to output. |
4285
|
|
|
|
|
|
|
|
4286
|
|
|
|
|
|
|
Fields are written in their order in the array ref, the first CSV column being the first element in |
4287
|
|
|
|
|
|
|
the array, and so on. Fields not listed in B<out_fields> are not written in output. |
4288
|
|
|
|
|
|
|
|
4289
|
|
|
|
|
|
|
You can use empty field names to have empty columns in output. |
4290
|
|
|
|
|
|
|
|
4291
|
|
|
|
|
|
|
Example: |
4292
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'allinfos.csv', out_file => 'only-addresses.csv', |
4294
|
|
|
|
|
|
|
out_fields => [ 'NAME', 'ADDRESS' ] )->write(); |
4295
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
=item search_case |
4297
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
If true, searches are case sensitive by default. Searches are done by the member functions |
4299
|
|
|
|
|
|
|
L</search>, L</search_1hr>, L</vlookup>, and linked fields (L</field_add_link>). |
4300
|
|
|
|
|
|
|
|
4301
|
|
|
|
|
|
|
The search functions can also be called with the option L</case>, that takes precedence over the |
4302
|
|
|
|
|
|
|
object-level C<search_case> attribute value. See L</vlookup> help. |
4303
|
|
|
|
|
|
|
|
4304
|
|
|
|
|
|
|
Value by default: 0 (by default searches are case insensitive) |
4305
|
|
|
|
|
|
|
|
4306
|
|
|
|
|
|
|
Example: |
4307
|
|
|
|
|
|
|
|
4308
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_case => 1); |
4309
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
=item search_trim |
4311
|
|
|
|
|
|
|
|
4312
|
|
|
|
|
|
|
If true, searches ignore the presence of leading or trailing spaces in values. |
4313
|
|
|
|
|
|
|
|
4314
|
|
|
|
|
|
|
The search functions can also be called with the option L</trim>, that takes precedence over the |
4315
|
|
|
|
|
|
|
object-level C<search_trim> attribute value. See L</vlookup> help. |
4316
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
Value by default: 1 (by default searches ignore leading and trailing spaces) |
4318
|
|
|
|
|
|
|
|
4319
|
|
|
|
|
|
|
Example: |
4320
|
|
|
|
|
|
|
|
4321
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_trim => 0); |
4322
|
|
|
|
|
|
|
|
4323
|
|
|
|
|
|
|
=item search_ignore_empty |
4324
|
|
|
|
|
|
|
|
4325
|
|
|
|
|
|
|
If true, empty fields are not included in the search indexes. |
4326
|
|
|
|
|
|
|
|
4327
|
|
|
|
|
|
|
The search functions can also be called with the option L</ignore_empty>, that takes precedence over |
4328
|
|
|
|
|
|
|
the object-level C<search_ignore_empty> attribute value. See L</vlookup> help. |
4329
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
Value by default: 1 (by default, search of the value '' will find nothing) |
4331
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
Example: |
4333
|
|
|
|
|
|
|
|
4334
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_empty => 0); |
4335
|
|
|
|
|
|
|
|
4336
|
|
|
|
|
|
|
=item search_ignore_accents |
4337
|
|
|
|
|
|
|
|
4338
|
|
|
|
|
|
|
If true, accents are ignored by search indexes. |
4339
|
|
|
|
|
|
|
|
4340
|
|
|
|
|
|
|
The search functions can also be called with the option L</ignore_accents>, that takes precedence |
4341
|
|
|
|
|
|
|
over the object-level C<search_ignore_accents> attribute value. See L</vlookup> help. |
4342
|
|
|
|
|
|
|
|
4343
|
|
|
|
|
|
|
Value by default: 1 (by default, accents are ignored by search functions) |
4344
|
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
Example: |
4346
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_accents => 0); |
4348
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
=item search_value_if_not_found |
4350
|
|
|
|
|
|
|
|
4351
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4352
|
|
|
|
|
|
|
return value of vlookup), default value of option L</value_if_not_found>. See L</vlookup>. |
4353
|
|
|
|
|
|
|
|
4354
|
|
|
|
|
|
|
=item search_value_if_found |
4355
|
|
|
|
|
|
|
|
4356
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4357
|
|
|
|
|
|
|
return value of vlookup), default value of option L</value_if_found>. See L</vlookup>. |
4358
|
|
|
|
|
|
|
|
4359
|
|
|
|
|
|
|
B<IMPORTANT> |
4360
|
|
|
|
|
|
|
|
4361
|
|
|
|
|
|
|
This attribute is extremly unusual. Once you've provided it, all vlookups and the target field value |
4362
|
|
|
|
|
|
|
of fields created with field_add_link will all be populated with the value provided with this |
4363
|
|
|
|
|
|
|
option. |
4364
|
|
|
|
|
|
|
|
4365
|
|
|
|
|
|
|
Don't use it unless you know what you are doing. |
4366
|
|
|
|
|
|
|
|
4367
|
|
|
|
|
|
|
=item search_ignore_ambiguous |
4368
|
|
|
|
|
|
|
|
4369
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4370
|
|
|
|
|
|
|
return value of search_1hr and vlookup), default value of option L</ignore_ambiguous>. See |
4371
|
|
|
|
|
|
|
L</vlookup>. |
4372
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
=item search_value_if_ambiguous |
4374
|
|
|
|
|
|
|
|
4375
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member function behavior or |
4376
|
|
|
|
|
|
|
return value of vlookup), default value of option L</value_if_ambiguous>. See L</vlookup>. |
4377
|
|
|
|
|
|
|
|
4378
|
|
|
|
|
|
|
=item fields_dates |
4379
|
|
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
Array ref of field names that contain a date. |
4381
|
|
|
|
|
|
|
|
4382
|
|
|
|
|
|
|
Once the formats of these fields is known (auto-detection by default), each of these fields will get |
4383
|
|
|
|
|
|
|
a specific L</in_map> sub that converts the text in a DateTime object and a L</out_map> sub that |
4384
|
|
|
|
|
|
|
converts back from DateTime to text. |
4385
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
B<NOTE> |
4387
|
|
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
The L</out_map> given to a DateTime field is "defensive code": normally, L</in_map> converts text |
4389
|
|
|
|
|
|
|
into a DateTime object and L</out_map> does the opposite, it takes a DateTime object and converts it |
4390
|
|
|
|
|
|
|
to text. If ever L</out_map> encounters a value that is not a DateTime object, it'll just stringify |
4391
|
|
|
|
|
|
|
it (evaluation in a string context), without calling its DateTime formatter. |
4392
|
|
|
|
|
|
|
|
4393
|
|
|
|
|
|
|
If the format cannot be detected for a given field, output an error message and as always when an |
4394
|
|
|
|
|
|
|
error occurs, croak (unless L</croak_if_error> got set to 0). |
4395
|
|
|
|
|
|
|
|
4396
|
|
|
|
|
|
|
Value by default: none |
4397
|
|
|
|
|
|
|
|
4398
|
|
|
|
|
|
|
Example: |
4399
|
|
|
|
|
|
|
|
4400
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
4401
|
|
|
|
|
|
|
fields_dates => ['LASTLOGIN', 'CREATIONDATE']); |
4402
|
|
|
|
|
|
|
|
4403
|
|
|
|
|
|
|
=item fields_dates_auto |
4404
|
|
|
|
|
|
|
|
4405
|
|
|
|
|
|
|
Boolean value. If set to 1, will detect dates formats on all fields. Fields in which a DateTime |
4406
|
|
|
|
|
|
|
format got detected are then managed as if they had been being listed in L</fields_dates> attribute: |
4407
|
|
|
|
|
|
|
they get an appropriate L</in_map> sub and a L</out_map> sub to convert to and from DateTime (see |
4408
|
|
|
|
|
|
|
L</fields_dates> attribute above). |
4409
|
|
|
|
|
|
|
|
4410
|
|
|
|
|
|
|
C<fields_dates_auto> looks for DateTime on all fields, but it expects nothing: it won't raise an |
4411
|
|
|
|
|
|
|
error if no field is found that contains DateTime. |
4412
|
|
|
|
|
|
|
|
4413
|
|
|
|
|
|
|
Value by default: 0 |
4414
|
|
|
|
|
|
|
|
4415
|
|
|
|
|
|
|
Example: |
4416
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', fields_dates_auto => 1); |
4418
|
|
|
|
|
|
|
|
4419
|
|
|
|
|
|
|
=item dates_formats_to_try |
4420
|
|
|
|
|
|
|
|
4421
|
|
|
|
|
|
|
Array ref of string formats. |
4422
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
Text::AutoCSV has a default built-in list of 20 date formats to try and 6 time formats (also it'll |
4424
|
|
|
|
|
|
|
combine any date format with any time format). |
4425
|
|
|
|
|
|
|
|
4426
|
|
|
|
|
|
|
C<dates_formats_to_try> will replace Text::AutoCSV default format-list will the one you specify, in |
4427
|
|
|
|
|
|
|
case the default would not produce the results you expect. |
4428
|
|
|
|
|
|
|
|
4429
|
|
|
|
|
|
|
The formats are written in Strptime format. |
4430
|
|
|
|
|
|
|
|
4431
|
|
|
|
|
|
|
Value by default (see below about the role of the pseudo-format ''): |
4432
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
[ '', |
4434
|
|
|
|
|
|
|
'%Y-%m-%d', |
4435
|
|
|
|
|
|
|
'%Y.%m.%d', |
4436
|
|
|
|
|
|
|
'%Y/%m/%d', |
4437
|
|
|
|
|
|
|
'%m.%d.%y', |
4438
|
|
|
|
|
|
|
'%m-%d-%Y', |
4439
|
|
|
|
|
|
|
'%m.%d.%Y', |
4440
|
|
|
|
|
|
|
'%m/%d/%Y', |
4441
|
|
|
|
|
|
|
'%d-%m-%Y', |
4442
|
|
|
|
|
|
|
'%d.%m.%Y', |
4443
|
|
|
|
|
|
|
'%d/%m/%Y', |
4444
|
|
|
|
|
|
|
'%m-%d-%y', |
4445
|
|
|
|
|
|
|
'%m/%d/%y', |
4446
|
|
|
|
|
|
|
'%d-%m-%y', |
4447
|
|
|
|
|
|
|
'%d.%m.%y', |
4448
|
|
|
|
|
|
|
'%d/%m/%y', |
4449
|
|
|
|
|
|
|
'%Y%m%d%H%M%S', |
4450
|
|
|
|
|
|
|
'%b %d, %Y', |
4451
|
|
|
|
|
|
|
'%b %d %Y', |
4452
|
|
|
|
|
|
|
'%b %d %T %Z %Y', |
4453
|
|
|
|
|
|
|
'%d %b %Y', |
4454
|
|
|
|
|
|
|
'%d %b, %Y' ] |
4455
|
|
|
|
|
|
|
|
4456
|
|
|
|
|
|
|
B<IMPORTANT> |
4457
|
|
|
|
|
|
|
|
4458
|
|
|
|
|
|
|
The empty format (empty string) has a special meaning: when specified, Text::AutoCSV will be able to |
4459
|
|
|
|
|
|
|
identify fields that contain only a time (not preceeded by a date). |
4460
|
|
|
|
|
|
|
|
4461
|
|
|
|
|
|
|
B<Note> |
4462
|
|
|
|
|
|
|
|
4463
|
|
|
|
|
|
|
Format identification is over only when there is no more ambiguity. So the usual pitfall of US |
4464
|
|
|
|
|
|
|
versus French dates (month-day versus day-month) gets resolved only when a date is encountered that |
4465
|
|
|
|
|
|
|
disambiguates it (a date of 13th of the month or later). |
4466
|
|
|
|
|
|
|
|
4467
|
|
|
|
|
|
|
Example with a weird format that uses underscores to separate elements, using either US (month, day, |
4468
|
|
|
|
|
|
|
year), French (day, month, year), or international (year, month, day) order: |
4469
|
|
|
|
|
|
|
|
4470
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
4471
|
|
|
|
|
|
|
dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']); |
4472
|
|
|
|
|
|
|
|
4473
|
|
|
|
|
|
|
=item dates_formats_to_try_supp |
4474
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
Same as L</dates_formats_to_try> but instead of replacing the default list of formats used during |
4476
|
|
|
|
|
|
|
detection, it is added to this default list. |
4477
|
|
|
|
|
|
|
|
4478
|
|
|
|
|
|
|
You want to use this attribute if you need a specific DateTime format while continuing to benefit |
4479
|
|
|
|
|
|
|
from the default list. |
4480
|
|
|
|
|
|
|
|
4481
|
|
|
|
|
|
|
B<IMPORTANT> |
4482
|
|
|
|
|
|
|
|
4483
|
|
|
|
|
|
|
Text::AutoCSV will identify a given Datetime format only when there is no ambiguity, meaning, one |
4484
|
|
|
|
|
|
|
unique Datetime format matches (all other failed). Adding a format that already exists in the |
4485
|
|
|
|
|
|
|
default list will prevent the format from being identified, as it'll always be ambiguous. See |
4486
|
|
|
|
|
|
|
L</dates_formats_to_try> for the default list of formats. |
4487
|
|
|
|
|
|
|
|
4488
|
|
|
|
|
|
|
Example: |
4489
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
4491
|
|
|
|
|
|
|
dates_formats_to_try_supp => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']); |
4492
|
|
|
|
|
|
|
|
4493
|
|
|
|
|
|
|
=item dates_ignore_trailing_chars |
4494
|
|
|
|
|
|
|
|
4495
|
|
|
|
|
|
|
If set to 1, DateTime auto-detection will ignore trailing text that may follow detected |
4496
|
|
|
|
|
|
|
DateTime-like text. |
4497
|
|
|
|
|
|
|
|
4498
|
|
|
|
|
|
|
Value by default: 1 (do ignore trailing chars) |
4499
|
|
|
|
|
|
|
|
4500
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_ignore_trailing_chars => 0); |
4501
|
|
|
|
|
|
|
|
4502
|
|
|
|
|
|
|
=item dates_search_time |
4503
|
|
|
|
|
|
|
|
4504
|
|
|
|
|
|
|
If set to 1, look for times when detecting DateTime format. That is, whenever a date format |
4505
|
|
|
|
|
|
|
candidate is found, a longer candidate that also contains a time (after the date) is tested. |
4506
|
|
|
|
|
|
|
|
4507
|
|
|
|
|
|
|
Value by default: 1 (do look for times when auto-detecting DateTime formats) |
4508
|
|
|
|
|
|
|
|
4509
|
|
|
|
|
|
|
Example: |
4510
|
|
|
|
|
|
|
|
4511
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_search_time => 0); |
4512
|
|
|
|
|
|
|
|
4513
|
|
|
|
|
|
|
=item dates_locales |
4514
|
|
|
|
|
|
|
|
4515
|
|
|
|
|
|
|
Comma-separated string of locales to test when detecting DateTime formats. Ultimately, Text::AutoCSV |
4516
|
|
|
|
|
|
|
will try all combinations of date formats, times and locales. |
4517
|
|
|
|
|
|
|
|
4518
|
|
|
|
|
|
|
Value by default: none (use perl default locale) |
4519
|
|
|
|
|
|
|
|
4520
|
|
|
|
|
|
|
Example: |
4521
|
|
|
|
|
|
|
|
4522
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_locales => 'fr,de,en'); |
4523
|
|
|
|
|
|
|
|
4524
|
|
|
|
|
|
|
=item dates_zeros_ok |
4525
|
|
|
|
|
|
|
|
4526
|
|
|
|
|
|
|
Boolean. If true, a date made only of 0s is regarded as being empty. |
4527
|
|
|
|
|
|
|
|
4528
|
|
|
|
|
|
|
For example if C<dates_zeros_ok> is False, then a date like 0000-00-00 will be always incorrect (as |
4529
|
|
|
|
|
|
|
the day and month are out of bounds), therefore a format like '%Y-%m-%d' will never match for the |
4530
|
|
|
|
|
|
|
field. |
4531
|
|
|
|
|
|
|
|
4532
|
|
|
|
|
|
|
Conversely if C<dates_zeros_ok> is true, then a date like 0000-00-00 will be processed as if being |
4533
|
|
|
|
|
|
|
the empty string, thus the detection of format will work and when parsed, this "full of zeros" dates |
4534
|
|
|
|
|
|
|
will be processed the same way as the empty string (= value will be undef). |
4535
|
|
|
|
|
|
|
|
4536
|
|
|
|
|
|
|
B<IMPORTANT> |
4537
|
|
|
|
|
|
|
|
4538
|
|
|
|
|
|
|
"0s dates" are evaluated to undef when parsed, thus when converted back to text (out_map), they are |
4539
|
|
|
|
|
|
|
set to an empty string, not to the original value. |
4540
|
|
|
|
|
|
|
|
4541
|
|
|
|
|
|
|
Value by default: 1 |
4542
|
|
|
|
|
|
|
|
4543
|
|
|
|
|
|
|
Example: |
4544
|
|
|
|
|
|
|
|
4545
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', dates_zeros_ok => 0); |
4546
|
|
|
|
|
|
|
|
4547
|
|
|
|
|
|
|
=item out_dates_format |
4548
|
|
|
|
|
|
|
|
4549
|
|
|
|
|
|
|
Enforce the format of dates in output, for all fields that contain a DateTime value. |
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
The format is written in Strptime format. |
4552
|
|
|
|
|
|
|
|
4553
|
|
|
|
|
|
|
Value by default: none (by default, use format detected on input) |
4554
|
|
|
|
|
|
|
|
4555
|
|
|
|
|
|
|
Example: |
4556
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the |
4558
|
|
|
|
|
|
|
# input format is. |
4559
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
4560
|
|
|
|
|
|
|
out_dates_format => '%F')->write(); |
4561
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
=item out_dates_locale |
4563
|
|
|
|
|
|
|
|
4564
|
|
|
|
|
|
|
Taken into account only if L</out_dates_format> is used. |
4565
|
|
|
|
|
|
|
|
4566
|
|
|
|
|
|
|
Sets the locale to apply on L</out_dates_format>. |
4567
|
|
|
|
|
|
|
|
4568
|
|
|
|
|
|
|
Value by default: none (by default, use the locale detected on input) |
4569
|
|
|
|
|
|
|
|
4570
|
|
|
|
|
|
|
Example: |
4571
|
|
|
|
|
|
|
|
4572
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to a US DateTime whatever the |
4573
|
|
|
|
|
|
|
# input format is. |
4574
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1, |
4575
|
|
|
|
|
|
|
out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write(); |
4576
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
=item croak_if_error |
4578
|
|
|
|
|
|
|
|
4579
|
|
|
|
|
|
|
If true, stops the program execution in case of error. |
4580
|
|
|
|
|
|
|
|
4581
|
|
|
|
|
|
|
B<IMPORTANT> |
4582
|
|
|
|
|
|
|
|
4583
|
|
|
|
|
|
|
Value by default: 1 |
4584
|
|
|
|
|
|
|
|
4585
|
|
|
|
|
|
|
If set to zero (C<croak_if_error =E<gt> 0>), errors are displayed as warnings. This printing can |
4586
|
|
|
|
|
|
|
then be affected by setting the L</quiet> attribute. |
4587
|
|
|
|
|
|
|
|
4588
|
|
|
|
|
|
|
=item verbose |
4589
|
|
|
|
|
|
|
|
4590
|
|
|
|
|
|
|
If true, get Text::AutoCSV to be a bit talkative instead of speaking only when warnings and errors |
4591
|
|
|
|
|
|
|
occur. Verbose output is printed to STDERR by default, this can be tuned with the L</infoh> |
4592
|
|
|
|
|
|
|
attribute. |
4593
|
|
|
|
|
|
|
|
4594
|
|
|
|
|
|
|
Value by default: 0 |
4595
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
Example: |
4597
|
|
|
|
|
|
|
|
4598
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', verbose => 1); |
4599
|
|
|
|
|
|
|
|
4600
|
|
|
|
|
|
|
=item infoh |
4601
|
|
|
|
|
|
|
|
4602
|
|
|
|
|
|
|
File handle to display program's verbose output. Has effect *mainly* with attribute |
4603
|
|
|
|
|
|
|
C<verbose =E<gt> 1>. |
4604
|
|
|
|
|
|
|
|
4605
|
|
|
|
|
|
|
Note B<infoh> is used to display extra information in case of error (if a field does not exist, |
4606
|
|
|
|
|
|
|
Text::AutoCSV will display the list of existing fields). If you don't want such output, you can set |
4607
|
|
|
|
|
|
|
C<infoh> to undef. |
4608
|
|
|
|
|
|
|
|
4609
|
|
|
|
|
|
|
Value by default: \*STDERR |
4610
|
|
|
|
|
|
|
|
4611
|
|
|
|
|
|
|
Example: |
4612
|
|
|
|
|
|
|
|
4613
|
|
|
|
|
|
|
open my $infoh, ">", "log.txt"; |
4614
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', infoh => $infoh); |
4615
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
=item quiet |
4617
|
|
|
|
|
|
|
|
4618
|
|
|
|
|
|
|
If true, don't display warnings and errors, unless croaking. |
4619
|
|
|
|
|
|
|
|
4620
|
|
|
|
|
|
|
If L</croak_if_error> attribute is set (as per default), still, Text::AutoCSV will produce output |
4621
|
|
|
|
|
|
|
(on STDERR) when croaking miserably. |
4622
|
|
|
|
|
|
|
|
4623
|
|
|
|
|
|
|
When using C<croak_if_error =E<gt> 0>, errors are processed as warnings and if L</quiet> is set (in |
4624
|
|
|
|
|
|
|
addition to L</croak_if_error> being set to 0), there'll be no output. Note this way of working is |
4625
|
|
|
|
|
|
|
not recommended, as things can go wrong without any notice to the caller. |
4626
|
|
|
|
|
|
|
|
4627
|
|
|
|
|
|
|
Example: |
4628
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', quiet => 1); |
4630
|
|
|
|
|
|
|
|
4631
|
|
|
|
|
|
|
=item one_pass |
4632
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
If true, Text::AutoCSV will perform one reading of the input. If other readings are triggered, it'll |
4634
|
|
|
|
|
|
|
raise an error and no reading will be done. Should that be the case (you ask Text::AutoCSV to do |
4635
|
|
|
|
|
|
|
something that'll trigger more than one reading of input), Text::AutoCSV will croak as is always the |
4636
|
|
|
|
|
|
|
case if an error occurs. |
4637
|
|
|
|
|
|
|
|
4638
|
|
|
|
|
|
|
Normally Text::AutoCSV will do multiple reads of input to work out certain characteristics of the |
4639
|
|
|
|
|
|
|
CSV: guess of encoding and guess of escape character. |
4640
|
|
|
|
|
|
|
|
4641
|
|
|
|
|
|
|
Also if member functions like L</field_add_link>, L</field_add_copy>, L</field_add_computed>, |
4642
|
|
|
|
|
|
|
L</read> or L</write> are called after input has already been read, it'll trigger further reads as |
4643
|
|
|
|
|
|
|
needed. |
4644
|
|
|
|
|
|
|
|
4645
|
|
|
|
|
|
|
If one wishes a unique read of the input to occur, one_pass attribute is to be set. |
4646
|
|
|
|
|
|
|
|
4647
|
|
|
|
|
|
|
When true, encoding will be assumed to be the first one in the provided list (L</encoding> |
4648
|
|
|
|
|
|
|
attribute), if no encoding attribute is provided, it'll be the first one in the default list, to |
4649
|
|
|
|
|
|
|
date, it is UTF-8. |
4650
|
|
|
|
|
|
|
|
4651
|
|
|
|
|
|
|
When true, and if attribute L</escape_char> is not set, escape_char will be assumed to be '\\' |
4652
|
|
|
|
|
|
|
(backslash). |
4653
|
|
|
|
|
|
|
|
4654
|
|
|
|
|
|
|
By default, one_pass is set if inh attribute is set (caller provides the input file handle of input) |
4655
|
|
|
|
|
|
|
or if input file is stdin (in_file attribute not set or set to an empty string). |
4656
|
|
|
|
|
|
|
|
4657
|
|
|
|
|
|
|
Value by default: |
4658
|
|
|
|
|
|
|
|
4659
|
|
|
|
|
|
|
0 if inh attribute is not set and in_file attribute is set to a non empty string |
4660
|
|
|
|
|
|
|
1 if inh attribute is set or in_file is not set or set to an empty string |
4661
|
|
|
|
|
|
|
|
4662
|
|
|
|
|
|
|
Example: |
4663
|
|
|
|
|
|
|
|
4664
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', one_pass => 1); |
4665
|
|
|
|
|
|
|
|
4666
|
|
|
|
|
|
|
=back |
4667
|
|
|
|
|
|
|
|
4668
|
|
|
|
|
|
|
=head2 read |
4669
|
|
|
|
|
|
|
|
4670
|
|
|
|
|
|
|
$csv->read(); |
4671
|
|
|
|
|
|
|
|
4672
|
|
|
|
|
|
|
Read input entirely. |
4673
|
|
|
|
|
|
|
|
4674
|
|
|
|
|
|
|
B<Return value> |
4675
|
|
|
|
|
|
|
|
4676
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4677
|
|
|
|
|
|
|
Returns undef if error. |
4678
|
|
|
|
|
|
|
|
4679
|
|
|
|
|
|
|
Callback functions (when defined) are invoked, in the following order: |
4680
|
|
|
|
|
|
|
|
4681
|
|
|
|
|
|
|
L</read_post_update_hr>, intended to do updates on fields values after each record read |
4682
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
L</walker_ar>, called after each record read, with an array ref of fields values |
4684
|
|
|
|
|
|
|
|
4685
|
|
|
|
|
|
|
L</walker_hr>, called after each record read, with a hash ref of fields values |
4686
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
Example: |
4688
|
|
|
|
|
|
|
|
4689
|
|
|
|
|
|
|
# Do nothing - just check CSV can be read successfully |
4690
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv')->read(); |
4691
|
|
|
|
|
|
|
|
4692
|
|
|
|
|
|
|
=head2 read_all_in_mem |
4693
|
|
|
|
|
|
|
|
4694
|
|
|
|
|
|
|
$csv->read_all_in_mem(); |
4695
|
|
|
|
|
|
|
|
4696
|
|
|
|
|
|
|
Created in version 1.1.5. Before, existed only as _read_all_in_mem, meaning, was private. |
4697
|
|
|
|
|
|
|
|
4698
|
|
|
|
|
|
|
Read input entirely, as with L</read> function, but enforcing content to be kept in-memory. |
4699
|
|
|
|
|
|
|
|
4700
|
|
|
|
|
|
|
Having the content kept in-memory is implied by search functions (L</vlookup> for example). With |
4701
|
|
|
|
|
|
|
C<read_all_in_mem> you can enforce this behavior without doing a fake search. |
4702
|
|
|
|
|
|
|
|
4703
|
|
|
|
|
|
|
=head2 reset_next_record_hr |
4704
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
$csv->reset_next_record_hr(); |
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
Reset the internal status to start from the beginning with L</get_next_record_hr>. Used in |
4708
|
|
|
|
|
|
|
conjunction with L</get_next_record_hr>. |
4709
|
|
|
|
|
|
|
|
4710
|
|
|
|
|
|
|
=head2 get_next_record_hr |
4711
|
|
|
|
|
|
|
|
4712
|
|
|
|
|
|
|
my $hr = $csv->get_next_record_hr(\$opt_key); |
4713
|
|
|
|
|
|
|
|
4714
|
|
|
|
|
|
|
Get the next record content as a hash ref. C<$hr> is undef when the end of records has been reached. |
4715
|
|
|
|
|
|
|
|
4716
|
|
|
|
|
|
|
When specified, C<$opt_key> is set to the current (returned) record key. |
4717
|
|
|
|
|
|
|
|
4718
|
|
|
|
|
|
|
B<NOTE> |
4719
|
|
|
|
|
|
|
|
4720
|
|
|
|
|
|
|
You do not need to call L</reset_next_record_hr> once before using C<get_next_record_hr>. |
4721
|
|
|
|
|
|
|
|
4722
|
|
|
|
|
|
|
Therefore L</reset_next_record_hr> is useful only if you wish to restart from the beginning before |
4723
|
|
|
|
|
|
|
you've reached the end of the records. |
4724
|
|
|
|
|
|
|
|
4725
|
|
|
|
|
|
|
B<NOTE bis> |
4726
|
|
|
|
|
|
|
|
4727
|
|
|
|
|
|
|
L</walker_hr> allows to execute some code each time a record is read, and it better fits with |
4728
|
|
|
|
|
|
|
Text::AutoCSV philosophy. Using a loop with C<get_next_record_hr> is primarily meant for |
4729
|
|
|
|
|
|
|
Text::AutoCSV internal usage. Also when using this mechanism, you get very close to original |
4730
|
|
|
|
|
|
|
Text::CSV logic, that makes Text::AutoCSV less useful. |
4731
|
|
|
|
|
|
|
|
4732
|
|
|
|
|
|
|
B<Return value> |
4733
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
A hashref of the record, or undef once there's no more record to return. |
4735
|
|
|
|
|
|
|
|
4736
|
|
|
|
|
|
|
Example: |
4737
|
|
|
|
|
|
|
|
4738
|
|
|
|
|
|
|
while (my $hr = $csv->get_next_record_hr()) { |
4739
|
|
|
|
|
|
|
say Dumper($hr); |
4740
|
|
|
|
|
|
|
} |
4741
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
=head2 write |
4743
|
|
|
|
|
|
|
|
4744
|
|
|
|
|
|
|
$csv->write(); |
4745
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
Write input into output. |
4747
|
|
|
|
|
|
|
|
4748
|
|
|
|
|
|
|
B<Return value> |
4749
|
|
|
|
|
|
|
|
4750
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4751
|
|
|
|
|
|
|
Returns undef if error. |
4752
|
|
|
|
|
|
|
|
4753
|
|
|
|
|
|
|
- If the content is not in-memory at the time write() is called: |
4754
|
|
|
|
|
|
|
|
4755
|
|
|
|
|
|
|
Each record is read (with call of L</read_post_update_hr>, L</walker_ar> and L</walker_hr>) and then |
4756
|
|
|
|
|
|
|
written. The read-and-write is done in sequence, each record is written to output before the next |
4757
|
|
|
|
|
|
|
record is read from input. |
4758
|
|
|
|
|
|
|
|
4759
|
|
|
|
|
|
|
- If the content is in-memory at the time write() is called: |
4760
|
|
|
|
|
|
|
|
4761
|
|
|
|
|
|
|
No L</read> operation is performed, instead, records are directly written to output. |
4762
|
|
|
|
|
|
|
|
4763
|
|
|
|
|
|
|
If defined, L</out_filter> is called for each record. If the return value of L</out_filter> is |
4764
|
|
|
|
|
|
|
false, the record is not written. |
4765
|
|
|
|
|
|
|
|
4766
|
|
|
|
|
|
|
Example: |
4767
|
|
|
|
|
|
|
|
4768
|
|
|
|
|
|
|
# Copy input to output. |
4769
|
|
|
|
|
|
|
# As CSV is parsed in-between, this copy also checks a number of characteristics about the |
4770
|
|
|
|
|
|
|
# input, as opposed to a plain file copy operation. |
4771
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')->write(); |
4772
|
|
|
|
|
|
|
|
4773
|
|
|
|
|
|
|
=head2 out_header |
4774
|
|
|
|
|
|
|
|
4775
|
|
|
|
|
|
|
$csv->out_header($field, $header); |
4776
|
|
|
|
|
|
|
|
4777
|
|
|
|
|
|
|
Set the header text of C<$field> to C<$header>. |
4778
|
|
|
|
|
|
|
|
4779
|
|
|
|
|
|
|
By default, the input header value is rewritten as is to output. C<out_header> allows you to change |
4780
|
|
|
|
|
|
|
it. |
4781
|
|
|
|
|
|
|
|
4782
|
|
|
|
|
|
|
B<Return value> |
4783
|
|
|
|
|
|
|
|
4784
|
|
|
|
|
|
|
Returns the object itself. |
4785
|
|
|
|
|
|
|
|
4786
|
|
|
|
|
|
|
Example: |
4787
|
|
|
|
|
|
|
|
4788
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv') |
4789
|
|
|
|
|
|
|
->out_header('LOGIN', 'Login') |
4790
|
|
|
|
|
|
|
->out_header('FULLNAME', 'Full Name') |
4791
|
|
|
|
|
|
|
->write(); |
4792
|
|
|
|
|
|
|
|
4793
|
|
|
|
|
|
|
=head2 print_id |
4794
|
|
|
|
|
|
|
|
4795
|
|
|
|
|
|
|
$csv->print_id(); |
4796
|
|
|
|
|
|
|
|
4797
|
|
|
|
|
|
|
Print out a description of input. Write to \*STDERR by default or to L</infoh> attribute if set. |
4798
|
|
|
|
|
|
|
|
4799
|
|
|
|
|
|
|
The description consists in a list of a few characteristics (CSV separator and the like) followed by |
4800
|
|
|
|
|
|
|
the list of columns with the details of each. |
4801
|
|
|
|
|
|
|
|
4802
|
|
|
|
|
|
|
Example of output: |
4803
|
|
|
|
|
|
|
|
4804
|
|
|
|
|
|
|
If you go to the C<utils> directory of this module and execute the following: |
4805
|
|
|
|
|
|
|
|
4806
|
|
|
|
|
|
|
./csvcopy.pl -i f1.csv -l "1:,A->B,f2.csv" --id |
4807
|
|
|
|
|
|
|
|
4808
|
|
|
|
|
|
|
You will get this output: |
4809
|
|
|
|
|
|
|
|
4810
|
|
|
|
|
|
|
-- f1.csv: |
4811
|
|
|
|
|
|
|
sep_char: , |
4812
|
|
|
|
|
|
|
escape_char: \ |
4813
|
|
|
|
|
|
|
in_encoding: UTF-8 |
4814
|
|
|
|
|
|
|
is_always_quoted: no |
4815
|
|
|
|
|
|
|
|
4816
|
|
|
|
|
|
|
# FIELD HEADER EXT DATA DATETIME FORMAT DATETIME LOCALE |
4817
|
|
|
|
|
|
|
- ----- ------ -------- --------------- --------------- |
4818
|
|
|
|
|
|
|
0 TIMESTAMP timestamp %Y%m%d%H%M%S |
4819
|
|
|
|
|
|
|
1 A a |
4820
|
|
|
|
|
|
|
2 B b |
4821
|
|
|
|
|
|
|
3 C c |
4822
|
|
|
|
|
|
|
4 D d %d/%m/%Y |
4823
|
|
|
|
|
|
|
5 1:SITE 1:SITE link: f2.csv, chain: A->B->* (SITE) |
4824
|
|
|
|
|
|
|
6 1:B 1:B link: f2.csv, chain: A->B->* (B) |
4825
|
|
|
|
|
|
|
|
4826
|
|
|
|
|
|
|
=head2 field_add_computed |
4827
|
|
|
|
|
|
|
|
4828
|
|
|
|
|
|
|
$csv->field_add_computed($new_field, $subref); |
4829
|
|
|
|
|
|
|
|
4830
|
|
|
|
|
|
|
C<$new_field> is the name of the created field. |
4831
|
|
|
|
|
|
|
|
4832
|
|
|
|
|
|
|
C<$subref> is a reference to a sub that'll calculate the new field value. |
4833
|
|
|
|
|
|
|
|
4834
|
|
|
|
|
|
|
B<Return value> |
4835
|
|
|
|
|
|
|
|
4836
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4837
|
|
|
|
|
|
|
Returns undef if error. |
4838
|
|
|
|
|
|
|
|
4839
|
|
|
|
|
|
|
Add a field calculated from other fields values. The subref runs like this: |
4840
|
|
|
|
|
|
|
|
4841
|
|
|
|
|
|
|
sub func { |
4842
|
|
|
|
|
|
|
# $new_field is the name of the field (allows to use one subref for more than one field |
4843
|
|
|
|
|
|
|
# calculation). |
4844
|
|
|
|
|
|
|
# $hr is a hash ref of fields values. |
4845
|
|
|
|
|
|
|
# $stats is a hash ref that gets printed (if Text::AutoCSV is created with verbose => 1) |
4846
|
|
|
|
|
|
|
# in the end. |
4847
|
|
|
|
|
|
|
my ($new_field, $hr, $stats) = @_; |
4848
|
|
|
|
|
|
|
|
4849
|
|
|
|
|
|
|
my $field_value; |
4850
|
|
|
|
|
|
|
# ... compute $field_value |
4851
|
|
|
|
|
|
|
|
4852
|
|
|
|
|
|
|
return $field_value; |
4853
|
|
|
|
|
|
|
} |
4854
|
|
|
|
|
|
|
|
4855
|
|
|
|
|
|
|
Example: |
4856
|
|
|
|
|
|
|
|
4857
|
|
|
|
|
|
|
# Add a field for the full name, made of the concatenation of the |
4858
|
|
|
|
|
|
|
# first name and the last name. |
4859
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1) |
4860
|
|
|
|
|
|
|
->field_add_computed('FULLNAME', \&calc_fn)->write(); |
4861
|
|
|
|
|
|
|
sub calc_fn { |
4862
|
|
|
|
|
|
|
my ($new_field, $hr, $stats) = @_; |
4863
|
|
|
|
|
|
|
die "Man, you are in serious trouble!" unless $new_field eq 'FULLNAME'; |
4864
|
|
|
|
|
|
|
my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'}); |
4865
|
|
|
|
|
|
|
$stats->{'empty full name'}++ if $fn eq ' '; |
4866
|
|
|
|
|
|
|
return $fn; |
4867
|
|
|
|
|
|
|
} |
4868
|
|
|
|
|
|
|
|
4869
|
|
|
|
|
|
|
=head2 field_add_copy |
4870
|
|
|
|
|
|
|
|
4871
|
|
|
|
|
|
|
$csv->field_add_copy($new_field, $src_field, $opt_subref); |
4872
|
|
|
|
|
|
|
|
4873
|
|
|
|
|
|
|
C<$new_field> if the name of the new field. |
4874
|
|
|
|
|
|
|
|
4875
|
|
|
|
|
|
|
C<$src_field> is the name of the field being copied. |
4876
|
|
|
|
|
|
|
|
4877
|
|
|
|
|
|
|
C<$opt_subref> is optional. It is a reference to a sub that takes one string (the value of |
4878
|
|
|
|
|
|
|
C<$src_field>) and returns a string (the value assigned to C<$new_field>). |
4879
|
|
|
|
|
|
|
|
4880
|
|
|
|
|
|
|
B<Return value> |
4881
|
|
|
|
|
|
|
|
4882
|
|
|
|
|
|
|
Returns the object itself in case of success. |
4883
|
|
|
|
|
|
|
Returns undef if error. |
4884
|
|
|
|
|
|
|
|
4885
|
|
|
|
|
|
|
C<field_add_copy> is a special case of L</field_add_computed>. The advantage of C<field_add_copy> is |
4886
|
|
|
|
|
|
|
that it relies on a sub that is Text::AutoCSV "unaware", just taking one string as input and |
4887
|
|
|
|
|
|
|
returning another string as output. |
4888
|
|
|
|
|
|
|
|
4889
|
|
|
|
|
|
|
B<IMPORTANT> |
4890
|
|
|
|
|
|
|
|
4891
|
|
|
|
|
|
|
The current field value is passed to C<field_add_copy> in $_. |
4892
|
|
|
|
|
|
|
|
4893
|
|
|
|
|
|
|
A call to |
4894
|
|
|
|
|
|
|
|
4895
|
|
|
|
|
|
|
$csv->field_add_copy($new_field, $src_field, $subref); |
4896
|
|
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
is equivalent to |
4898
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
$csv->field_add_computed($new_field, \&subref2); |
4900
|
|
|
|
|
|
|
sub subref2 { |
4901
|
|
|
|
|
|
|
my (undef, $hr) = @_; |
4902
|
|
|
|
|
|
|
local $_ = $hr->{$src_field}; |
4903
|
|
|
|
|
|
|
return $subref->(); |
4904
|
|
|
|
|
|
|
} |
4905
|
|
|
|
|
|
|
|
4906
|
|
|
|
|
|
|
Example of a field copy + pass copied field in upper case and surround content with <<>>: |
4907
|
|
|
|
|
|
|
|
4908
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv'); |
4909
|
|
|
|
|
|
|
$csv->field_add_copy('UCLAST', 'LASTNAME', \&myfunc); |
4910
|
|
|
|
|
|
|
$csv->write(); |
4911
|
|
|
|
|
|
|
sub myfunc { s/^.*$/<<\U$&>>/; $_; } |
4912
|
|
|
|
|
|
|
|
4913
|
|
|
|
|
|
|
Note that the calls can be chained as most member functions return the object itself upon success. |
4914
|
|
|
|
|
|
|
The example above is equivalent to: |
4915
|
|
|
|
|
|
|
|
4916
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv') |
4917
|
|
|
|
|
|
|
->field_add_copy('UCLAST', 'LASTNAME', \&myfunc) |
4918
|
|
|
|
|
|
|
->write(); |
4919
|
|
|
|
|
|
|
sub myfunc { s/^.*$/<<\U$&>>/; $_; } |
4920
|
|
|
|
|
|
|
|
4921
|
|
|
|
|
|
|
=head2 field_add_link |
4922
|
|
|
|
|
|
|
|
4923
|
|
|
|
|
|
|
$csv->field_add_link($new_field, $chain, $linked_file, \%opts); |
4924
|
|
|
|
|
|
|
|
4925
|
|
|
|
|
|
|
C<$new_field> is the name of the new field. |
4926
|
|
|
|
|
|
|
|
4927
|
|
|
|
|
|
|
C<$chain> is the CHAIN of the link, that is: 'LOCAL->REMOTE->PICK' where: |
4928
|
|
|
|
|
|
|
|
4929
|
|
|
|
|
|
|
C<LOCAL> is the field name to read the value from. |
4930
|
|
|
|
|
|
|
|
4931
|
|
|
|
|
|
|
C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file. |
4932
|
|
|
|
|
|
|
|
4933
|
|
|
|
|
|
|
C<PICK> is the field from which to read the value of, in the record found by the search. This field |
4934
|
|
|
|
|
|
|
belongs to $linked_file. |
4935
|
|
|
|
|
|
|
|
4936
|
|
|
|
|
|
|
If $new_field is undef, the new field name is the name of the third field of $chain (PICK). |
4937
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
C<$linked_file> is the name of the linked file, that gets read in a Text::AutoCSV object created |
4939
|
|
|
|
|
|
|
on-the-fly to do the search on. C<$linked_file> can also be a Text::AutoCSV object that you created |
4940
|
|
|
|
|
|
|
yourself, allowing for more flexibility. Example: |
4941
|
|
|
|
|
|
|
|
4942
|
|
|
|
|
|
|
my $lcsv = Text::AutoCSV->new(in_file => 'logins.csv', case => 1); |
4943
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', $lcsv); |
4944
|
|
|
|
|
|
|
|
4945
|
|
|
|
|
|
|
C<\%opts> is a hash ref of optional attributes. The same values can be provided as with vlookup. |
4946
|
|
|
|
|
|
|
|
4947
|
|
|
|
|
|
|
=over 4 |
4948
|
|
|
|
|
|
|
|
4949
|
|
|
|
|
|
|
=item trim |
4950
|
|
|
|
|
|
|
|
4951
|
|
|
|
|
|
|
If set to 1, searches will ignore leading and trailing spaces. That is, a C<LOCAL> value of ' x ' |
4952
|
|
|
|
|
|
|
will match with a C<REMOTE> value of 'x'. |
4953
|
|
|
|
|
|
|
|
4954
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object (default value: 1). |
4955
|
|
|
|
|
|
|
|
4956
|
|
|
|
|
|
|
Example: |
4957
|
|
|
|
|
|
|
|
4958
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
4959
|
|
|
|
|
|
|
{ trim => 0 }); |
4960
|
|
|
|
|
|
|
|
4961
|
|
|
|
|
|
|
=item case |
4962
|
|
|
|
|
|
|
|
4963
|
|
|
|
|
|
|
If set to 1, searches will take the case into account. That is, a C<LOCAL> value of 'X' will B<not> |
4964
|
|
|
|
|
|
|
match with a C<REMOTE> value of 'x'. |
4965
|
|
|
|
|
|
|
|
4966
|
|
|
|
|
|
|
If option is not present, use L</search_case> attribute of object (default value: 0). |
4967
|
|
|
|
|
|
|
|
4968
|
|
|
|
|
|
|
Example: |
4969
|
|
|
|
|
|
|
|
4970
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
4971
|
|
|
|
|
|
|
{ case => 1 }); |
4972
|
|
|
|
|
|
|
|
4973
|
|
|
|
|
|
|
=item ignore_empty |
4974
|
|
|
|
|
|
|
|
4975
|
|
|
|
|
|
|
If set to 1, empty values won't match. That is, a C<LOCAL> value of '' will not match with a |
4976
|
|
|
|
|
|
|
C<REMOTE> value of ''. |
4977
|
|
|
|
|
|
|
|
4978
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_empty> attribute of object (default value: 1). |
4979
|
|
|
|
|
|
|
|
4980
|
|
|
|
|
|
|
Example: |
4981
|
|
|
|
|
|
|
|
4982
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
4983
|
|
|
|
|
|
|
{ ignore_empty => 0 }); |
4984
|
|
|
|
|
|
|
|
4985
|
|
|
|
|
|
|
=item value_if_not_found |
4986
|
|
|
|
|
|
|
|
4987
|
|
|
|
|
|
|
If the searched value is not found, the value of the field is undef, that produces an empty string |
4988
|
|
|
|
|
|
|
at write time. Instead, you can specify the value. |
4989
|
|
|
|
|
|
|
|
4990
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object (default value: |
4991
|
|
|
|
|
|
|
undef). |
4992
|
|
|
|
|
|
|
|
4993
|
|
|
|
|
|
|
Example: |
4994
|
|
|
|
|
|
|
|
4995
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
4996
|
|
|
|
|
|
|
{ value_if_not_found => '<not found!>' }); |
4997
|
|
|
|
|
|
|
|
4998
|
|
|
|
|
|
|
=item value_if_found |
4999
|
|
|
|
|
|
|
|
5000
|
|
|
|
|
|
|
If the searched value is found, you can specify the value to return. |
5001
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_found> attribute of object (default value: none). |
5003
|
|
|
|
|
|
|
|
5004
|
|
|
|
|
|
|
B<NOTE> |
5005
|
|
|
|
|
|
|
|
5006
|
|
|
|
|
|
|
Although the C<PICK> field is ignored when using this option, you must specify it any way. |
5007
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
Example: |
5009
|
|
|
|
|
|
|
|
5010
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5011
|
|
|
|
|
|
|
{ value_if_not_found => '0', value_if_found => '1' }); |
5012
|
|
|
|
|
|
|
|
5013
|
|
|
|
|
|
|
=item value_if_ambiguous |
5014
|
|
|
|
|
|
|
|
5015
|
|
|
|
|
|
|
If the searched value is found in more than one record, the value of the field is undef, that |
5016
|
|
|
|
|
|
|
produces an empty string at write time. Instead, you can specify the value. |
5017
|
|
|
|
|
|
|
|
5018
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_ambiguous> attribute of object (default value: |
5019
|
|
|
|
|
|
|
undef). |
5020
|
|
|
|
|
|
|
|
5021
|
|
|
|
|
|
|
Example: |
5022
|
|
|
|
|
|
|
|
5023
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5024
|
|
|
|
|
|
|
{ value_if_ambiguous => '<ambiguous!>' }); |
5025
|
|
|
|
|
|
|
|
5026
|
|
|
|
|
|
|
=item ignore_ambiguous |
5027
|
|
|
|
|
|
|
|
5028
|
|
|
|
|
|
|
Boolean value. If ignore_ambiguous is true and the searched value is found in more than one record, |
5029
|
|
|
|
|
|
|
then, silently fall back on returning the value of the first record. Obviously if |
5030
|
|
|
|
|
|
|
C<ignore_ambiguous> is true, then the value of L</value_if_ambiguous> is ignored. |
5031
|
|
|
|
|
|
|
|
5032
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1). |
5033
|
|
|
|
|
|
|
|
5034
|
|
|
|
|
|
|
Example: |
5035
|
|
|
|
|
|
|
|
5036
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5037
|
|
|
|
|
|
|
{ ignore_ambiguous => 1 }); |
5038
|
|
|
|
|
|
|
|
5039
|
|
|
|
|
|
|
Example with multiple options: |
5040
|
|
|
|
|
|
|
|
5041
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5042
|
|
|
|
|
|
|
{ value_if_not_found => '?', ignore_ambiguous => 1 }); |
5043
|
|
|
|
|
|
|
|
5044
|
|
|
|
|
|
|
=back |
5045
|
|
|
|
|
|
|
|
5046
|
|
|
|
|
|
|
B<Return value> |
5047
|
|
|
|
|
|
|
|
5048
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5049
|
|
|
|
|
|
|
Returns undef if error. |
5050
|
|
|
|
|
|
|
|
5051
|
|
|
|
|
|
|
Example of field_add_link usage: |
5052
|
|
|
|
|
|
|
|
5053
|
|
|
|
|
|
|
my $nom_compose = 0; |
5054
|
|
|
|
|
|
|
my $zip_not_found = 0; |
5055
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk) |
5056
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read(); |
5057
|
|
|
|
|
|
|
sub walk { |
5058
|
|
|
|
|
|
|
my $hr = shift; |
5059
|
|
|
|
|
|
|
$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/; |
5060
|
|
|
|
|
|
|
$zip_not_found++ unless defined($hr->{'MYCITY'}); |
5061
|
|
|
|
|
|
|
} |
5062
|
|
|
|
|
|
|
print("Number of persons with a multi-part name: $nom_compose\n"); |
5063
|
|
|
|
|
|
|
print("Number of persons with unknown zipcode: $zip_not_found\n"); |
5064
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
=head2 links |
5066
|
|
|
|
|
|
|
|
5067
|
|
|
|
|
|
|
$csv->links($prefix, $chain, $linked_file, \%opts); |
5068
|
|
|
|
|
|
|
|
5069
|
|
|
|
|
|
|
C<$prefix> is the name to add to joined fields |
5070
|
|
|
|
|
|
|
|
5071
|
|
|
|
|
|
|
C<$chain> is the JOINCHAIN of the link, that is: 'LOCAL->REMOTE' where: |
5072
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
C<LOCAL> is the field name to read the value from. |
5074
|
|
|
|
|
|
|
|
5075
|
|
|
|
|
|
|
C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file. |
5076
|
|
|
|
|
|
|
|
5077
|
|
|
|
|
|
|
As opposed to L</field_add_link>, there is no C<PICK> part, as all fields of target are read. |
5078
|
|
|
|
|
|
|
|
5079
|
|
|
|
|
|
|
As opposed to Text::AutoCSV habits of croaking whenever a field name is duplicate, here, the |
5080
|
|
|
|
|
|
|
duplicates are resolved by appending _2 to the joined field name if it already exists. If _2 already |
5081
|
|
|
|
|
|
|
exists, too, then _3 is appended instead, and so on, until a non-duplicate is found. This mechanism |
5082
|
|
|
|
|
|
|
is executed given the difficulty to control all field names when joining CSVs. |
5083
|
|
|
|
|
|
|
|
5084
|
|
|
|
|
|
|
C<$linked_file> and C<\%opts> work exactly the same way as for L</field_add_link>, see |
5085
|
|
|
|
|
|
|
L</field_add_link> for help. |
5086
|
|
|
|
|
|
|
|
5087
|
|
|
|
|
|
|
B<Return value> |
5088
|
|
|
|
|
|
|
|
5089
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5090
|
|
|
|
|
|
|
Returns undef if error. |
5091
|
|
|
|
|
|
|
|
5092
|
|
|
|
|
|
|
B<NOTE> |
5093
|
|
|
|
|
|
|
|
5094
|
|
|
|
|
|
|
This function used to be called C<join> but got renamed to avoid clash with perl' builtin C<join>. |
5095
|
|
|
|
|
|
|
|
5096
|
|
|
|
|
|
|
Example: |
5097
|
|
|
|
|
|
|
|
5098
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', out_file => 'pers_with_city.csv') |
5099
|
|
|
|
|
|
|
->links('Read from zips.csv:', 'ZIP->ZIPCODE', 'zips.csv')->write(); |
5100
|
|
|
|
|
|
|
|
5101
|
|
|
|
|
|
|
=head2 get_in_encoding |
5102
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
my $enc = $csv->get_in_encoding(); |
5104
|
|
|
|
|
|
|
|
5105
|
|
|
|
|
|
|
Return the string of input encoding, for example 'latin2' or 'UTF-8', etc. |
5106
|
|
|
|
|
|
|
|
5107
|
|
|
|
|
|
|
=head2 get_in_file_disp |
5108
|
|
|
|
|
|
|
|
5109
|
|
|
|
|
|
|
my $f = $csv->get_in_file_disp(); |
5110
|
|
|
|
|
|
|
|
5111
|
|
|
|
|
|
|
Return the printable name of in_file. |
5112
|
|
|
|
|
|
|
|
5113
|
|
|
|
|
|
|
=head2 get_sep_char |
5114
|
|
|
|
|
|
|
|
5115
|
|
|
|
|
|
|
my $s = $csv->get_sep_char(); |
5116
|
|
|
|
|
|
|
|
5117
|
|
|
|
|
|
|
Return the string of the input CSV separator character, for example ',' or ';'. |
5118
|
|
|
|
|
|
|
|
5119
|
|
|
|
|
|
|
=head2 get_escape_char |
5120
|
|
|
|
|
|
|
|
5121
|
|
|
|
|
|
|
my $e = $csv->get_escape_char(); |
5122
|
|
|
|
|
|
|
|
5123
|
|
|
|
|
|
|
Return the string of the input escape character, for example '"' or '\\'. |
5124
|
|
|
|
|
|
|
|
5125
|
|
|
|
|
|
|
=head2 get_is_always_quoted |
5126
|
|
|
|
|
|
|
|
5127
|
|
|
|
|
|
|
my $a = $csv->get_is_always_quoted(); |
5128
|
|
|
|
|
|
|
|
5129
|
|
|
|
|
|
|
Return 1 if all fields of input are always quoted, 0 otherwise. |
5130
|
|
|
|
|
|
|
|
5131
|
|
|
|
|
|
|
=head2 get_coldata |
5132
|
|
|
|
|
|
|
|
5133
|
|
|
|
|
|
|
my @cd = get_coldata(); |
5134
|
|
|
|
|
|
|
|
5135
|
|
|
|
|
|
|
Return an array that describes each column, from the first one (column 0) to the last. |
5136
|
|
|
|
|
|
|
|
5137
|
|
|
|
|
|
|
Each element of the array is itself an array ref that contains 5 elements: |
5138
|
|
|
|
|
|
|
|
5139
|
|
|
|
|
|
|
0: Name of the field (as accessed in *_hr functions) |
5140
|
|
|
|
|
|
|
1: Content of the field in the header line (if input has a header line) |
5141
|
|
|
|
|
|
|
2: Column content type, shows some meta-data of fields created with field_add_* functions |
5142
|
|
|
|
|
|
|
3: Datetime format detected, if ever, in the format Strptime |
5143
|
|
|
|
|
|
|
4: Locale of DateTime format detected, if ever |
5144
|
|
|
|
|
|
|
|
5145
|
|
|
|
|
|
|
=head2 get_pass_count |
5146
|
|
|
|
|
|
|
|
5147
|
|
|
|
|
|
|
my $n = $csv->get_pass_count(); |
5148
|
|
|
|
|
|
|
|
5149
|
|
|
|
|
|
|
Return the number of input readings done. Useful only if you're interested in Text::AutoCSV |
5150
|
|
|
|
|
|
|
internals. |
5151
|
|
|
|
|
|
|
|
5152
|
|
|
|
|
|
|
=head2 get_in_mem_record_count |
5153
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
my $m = $csv->get_in_mem_record_count(); |
5155
|
|
|
|
|
|
|
|
5156
|
|
|
|
|
|
|
Return the number of records currently stored in-memory. Useful only if you're interested in |
5157
|
|
|
|
|
|
|
Text::AutoCSV internals. |
5158
|
|
|
|
|
|
|
|
5159
|
|
|
|
|
|
|
=head2 get_max_in_mem_record_count |
5160
|
|
|
|
|
|
|
|
5161
|
|
|
|
|
|
|
my $mm = $csv->get_max_in_mem_record_count(); |
5162
|
|
|
|
|
|
|
|
5163
|
|
|
|
|
|
|
Return the maximum number of records ever stored in-memory. Indeed this number can decrease: certain |
5164
|
|
|
|
|
|
|
functions like field_add* member-functions discard in-memory content. Useful only if you're |
5165
|
|
|
|
|
|
|
interested in Text::AutoCSV internals. |
5166
|
|
|
|
|
|
|
|
5167
|
|
|
|
|
|
|
=head2 get_fields_names |
5168
|
|
|
|
|
|
|
|
5169
|
|
|
|
|
|
|
my @f = $csv->get_fields_names(); |
5170
|
|
|
|
|
|
|
|
5171
|
|
|
|
|
|
|
Return an array of the internal names of the columns. |
5172
|
|
|
|
|
|
|
|
5173
|
|
|
|
|
|
|
=head2 get_field_name |
5174
|
|
|
|
|
|
|
|
5175
|
|
|
|
|
|
|
my $name = $csv->get_field_name($n); |
5176
|
|
|
|
|
|
|
|
5177
|
|
|
|
|
|
|
Return the C<$n>-th column name, the first column being number 0. |
5178
|
|
|
|
|
|
|
|
5179
|
|
|
|
|
|
|
Example: |
5180
|
|
|
|
|
|
|
|
5181
|
|
|
|
|
|
|
# Get the field name of the third column |
5182
|
|
|
|
|
|
|
my $col = $csv->get_field_name(2); |
5183
|
|
|
|
|
|
|
|
5184
|
|
|
|
|
|
|
=head2 get_stats |
5185
|
|
|
|
|
|
|
|
5186
|
|
|
|
|
|
|
my %stats = $csv->get_stats(); |
5187
|
|
|
|
|
|
|
|
5188
|
|
|
|
|
|
|
Certain callback functions provide a parameter to record event count: L</field_add_computed>, |
5189
|
|
|
|
|
|
|
L</read_post_update_hr>, L</walker_ar> and L</walker_hr>. By default, these stats are displayed if |
5190
|
|
|
|
|
|
|
Text::AutoCSV got created with attribute C<verbose =E<gt> 1>. get_stats() returns the statistics |
5191
|
|
|
|
|
|
|
hash of the object. |
5192
|
|
|
|
|
|
|
|
5193
|
|
|
|
|
|
|
B<IMPORTANT> |
5194
|
|
|
|
|
|
|
|
5195
|
|
|
|
|
|
|
As opposed to most functions that trigger input reading automatically (search functions and other |
5196
|
|
|
|
|
|
|
get_* functions), C<get_stats> just returns you the stats as it is, regardless of whether some |
5197
|
|
|
|
|
|
|
execution already occured. |
5198
|
|
|
|
|
|
|
|
5199
|
|
|
|
|
|
|
=head2 set_walker_ar |
5200
|
|
|
|
|
|
|
|
5201
|
|
|
|
|
|
|
$csv->set_walker_ar($subref); |
5202
|
|
|
|
|
|
|
|
5203
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</walker_ar> attribute. |
5204
|
|
|
|
|
|
|
C<set_walker_ar> allows to assign the attribute walker_ar after object creation. |
5205
|
|
|
|
|
|
|
|
5206
|
|
|
|
|
|
|
See attribute L</walker_ar> for help about the way C<$subref> should work. |
5207
|
|
|
|
|
|
|
|
5208
|
|
|
|
|
|
|
B<Return value> |
5209
|
|
|
|
|
|
|
|
5210
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5211
|
|
|
|
|
|
|
Returns undef if error. |
5212
|
|
|
|
|
|
|
|
5213
|
|
|
|
|
|
|
Example: |
5214
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
# Calculate the total of the two first columns, the first column being money in and the |
5216
|
|
|
|
|
|
|
# second one being money out. |
5217
|
|
|
|
|
|
|
my ($actif, $passif) = (0, 0); |
5218
|
|
|
|
|
|
|
$csv->set_walker_ar(sub { my $ar = $_[0]; $actif += $ar->[0]; $passif += $ar->[1]; })->read(); |
5219
|
|
|
|
|
|
|
print("Actif = $actif\n"); |
5220
|
|
|
|
|
|
|
print("Passif = $passif\n"); |
5221
|
|
|
|
|
|
|
|
5222
|
|
|
|
|
|
|
=head2 set_walker_hr |
5223
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
$csv->set_walker_hr($subref); |
5225
|
|
|
|
|
|
|
|
5226
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</walker_hr> attribute. |
5227
|
|
|
|
|
|
|
C<set_walker_hr> allows to assign the attribute L</walker_hr> after object creation. |
5228
|
|
|
|
|
|
|
|
5229
|
|
|
|
|
|
|
See attribute L</walker_hr> for help about the way C<$subref> should work. |
5230
|
|
|
|
|
|
|
|
5231
|
|
|
|
|
|
|
B<Return value> |
5232
|
|
|
|
|
|
|
|
5233
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5234
|
|
|
|
|
|
|
Returns undef if error. |
5235
|
|
|
|
|
|
|
|
5236
|
|
|
|
|
|
|
Example: |
5237
|
|
|
|
|
|
|
|
5238
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'directory.csv', verbose => 1); |
5239
|
|
|
|
|
|
|
|
5240
|
|
|
|
|
|
|
# ... |
5241
|
|
|
|
|
|
|
|
5242
|
|
|
|
|
|
|
$csv->set_walker_hr( |
5243
|
|
|
|
|
|
|
sub { |
5244
|
|
|
|
|
|
|
my ($hr, $stat) = @_; |
5245
|
|
|
|
|
|
|
$stat{'not capital name'}++, return if $hr->{'NAME'} ne uc($hr->{'NAME'}); |
5246
|
|
|
|
|
|
|
$stat{'name is capital letters'}++; |
5247
|
|
|
|
|
|
|
} |
5248
|
|
|
|
|
|
|
)->read(); |
5249
|
|
|
|
|
|
|
|
5250
|
|
|
|
|
|
|
=head2 set_out_file |
5251
|
|
|
|
|
|
|
|
5252
|
|
|
|
|
|
|
$csv->set_out_file($out_file); |
5253
|
|
|
|
|
|
|
|
5254
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</out_file> attribute. |
5255
|
|
|
|
|
|
|
C<set_out_file> allows to assign the attribute L</out_file> after object creation. It is set to |
5256
|
|
|
|
|
|
|
C<$out_file> value. |
5257
|
|
|
|
|
|
|
|
5258
|
|
|
|
|
|
|
B<Return value> |
5259
|
|
|
|
|
|
|
|
5260
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5261
|
|
|
|
|
|
|
Returns undef if error. |
5262
|
|
|
|
|
|
|
|
5263
|
|
|
|
|
|
|
Example: |
5264
|
|
|
|
|
|
|
|
5265
|
|
|
|
|
|
|
$csv->set_out_file('mycopy.csv')->write(); |
5266
|
|
|
|
|
|
|
|
5267
|
|
|
|
|
|
|
=head2 get_keys |
5268
|
|
|
|
|
|
|
|
5269
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5270
|
|
|
|
|
|
|
|
5271
|
|
|
|
|
|
|
Returns an array of all the record keys of input. A record key is a unique identifier that |
5272
|
|
|
|
|
|
|
designates the record. |
5273
|
|
|
|
|
|
|
|
5274
|
|
|
|
|
|
|
At the moment it is just an integer being the record number, the first one (that comes after the |
5275
|
|
|
|
|
|
|
header line) being of number 0. For example if $csv input is made of one header line and 3 records |
5276
|
|
|
|
|
|
|
(that is, a 4-line file typically, if no record contains a line break), $csv->get_keys() returns |
5277
|
|
|
|
|
|
|
|
5278
|
|
|
|
|
|
|
(0, 1, 2) |
5279
|
|
|
|
|
|
|
|
5280
|
|
|
|
|
|
|
B<IMPORTANT> |
5281
|
|
|
|
|
|
|
|
5282
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5283
|
|
|
|
|
|
|
|
5284
|
|
|
|
|
|
|
=head2 get_hr_all |
5285
|
|
|
|
|
|
|
|
5286
|
|
|
|
|
|
|
my @allin = $csv->get_hr_all(); |
5287
|
|
|
|
|
|
|
|
5288
|
|
|
|
|
|
|
Returns an array of all record contents of the input, each record being a hash ref. |
5289
|
|
|
|
|
|
|
|
5290
|
|
|
|
|
|
|
B<IMPORTANT> |
5291
|
|
|
|
|
|
|
|
5292
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5293
|
|
|
|
|
|
|
|
5294
|
|
|
|
|
|
|
=head2 get_row_ar |
5295
|
|
|
|
|
|
|
|
5296
|
|
|
|
|
|
|
my $row_ar = $csv->get_row_ar($record_key); |
5297
|
|
|
|
|
|
|
|
5298
|
|
|
|
|
|
|
Returns an array ref of the record designated by C<$record_key>. |
5299
|
|
|
|
|
|
|
|
5300
|
|
|
|
|
|
|
Example: |
5301
|
|
|
|
|
|
|
|
5302
|
|
|
|
|
|
|
# Get content (as array ref) of last record |
5303
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5304
|
|
|
|
|
|
|
my $lastk = $allkeys[-1]; |
5305
|
|
|
|
|
|
|
my $lastrec_ar = $csv->get_row_ar($lastk); |
5306
|
|
|
|
|
|
|
|
5307
|
|
|
|
|
|
|
B<IMPORTANT> |
5308
|
|
|
|
|
|
|
|
5309
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5310
|
|
|
|
|
|
|
|
5311
|
|
|
|
|
|
|
=head2 get_row_hr |
5312
|
|
|
|
|
|
|
|
5313
|
|
|
|
|
|
|
my $row_hr = $csv->get_row_hr($record_key); |
5314
|
|
|
|
|
|
|
|
5315
|
|
|
|
|
|
|
Returns a hash ref of the record designated by C<$record_key>. |
5316
|
|
|
|
|
|
|
|
5317
|
|
|
|
|
|
|
Example: |
5318
|
|
|
|
|
|
|
|
5319
|
|
|
|
|
|
|
# Get content (as hash ref) of first record |
5320
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5321
|
|
|
|
|
|
|
my $firstk = $allkeys[0]; |
5322
|
|
|
|
|
|
|
my $firstrec_hr = $csv->get_row_hr($firstk); |
5323
|
|
|
|
|
|
|
|
5324
|
|
|
|
|
|
|
B<IMPORTANT> |
5325
|
|
|
|
|
|
|
|
5326
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5327
|
|
|
|
|
|
|
|
5328
|
|
|
|
|
|
|
=head2 get_cell |
5329
|
|
|
|
|
|
|
|
5330
|
|
|
|
|
|
|
my $val = $csv->get_cell($record_key, $field_name); |
5331
|
|
|
|
|
|
|
|
5332
|
|
|
|
|
|
|
Return the value of the cell designated by its record key (C<$record_key>) and field name |
5333
|
|
|
|
|
|
|
(C<$field_name>). |
5334
|
|
|
|
|
|
|
|
5335
|
|
|
|
|
|
|
Example: |
5336
|
|
|
|
|
|
|
|
5337
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5338
|
|
|
|
|
|
|
my $midk = $allkeys[int($#allkeys / 2)]; |
5339
|
|
|
|
|
|
|
my $midname = $csv->get_cell($midk, 'NAME'); |
5340
|
|
|
|
|
|
|
|
5341
|
|
|
|
|
|
|
Note the above example is equivalent to: |
5342
|
|
|
|
|
|
|
|
5343
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5344
|
|
|
|
|
|
|
my $midk = $allkeys[int($#allkeys / 2)]; |
5345
|
|
|
|
|
|
|
my $midrec_hr = $csv->get_row_hr($midk); |
5346
|
|
|
|
|
|
|
my $midname = $midrec_hr->{'NAME'}; |
5347
|
|
|
|
|
|
|
|
5348
|
|
|
|
|
|
|
B<IMPORTANT> |
5349
|
|
|
|
|
|
|
|
5350
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5351
|
|
|
|
|
|
|
|
5352
|
|
|
|
|
|
|
=head2 get_values |
5353
|
|
|
|
|
|
|
|
5354
|
|
|
|
|
|
|
my @vals = $csv->get_values($field_name, $opt_filter_subref); |
5355
|
|
|
|
|
|
|
|
5356
|
|
|
|
|
|
|
Return an array made of the values of the given field name (C<$field_name>), for every records, in |
5357
|
|
|
|
|
|
|
the order of the records. |
5358
|
|
|
|
|
|
|
|
5359
|
|
|
|
|
|
|
C<$opt_filter_subref> is an optional subref. If defined, it is called with every values in turn (one |
5360
|
|
|
|
|
|
|
call per value) and only values for which C<$opt_filter_subref> returned True are included in the |
5361
|
|
|
|
|
|
|
returned array. Call to C<$opt_filter_subref> is done with $_ to pass the value. |
5362
|
|
|
|
|
|
|
|
5363
|
|
|
|
|
|
|
Example: |
5364
|
|
|
|
|
|
|
|
5365
|
|
|
|
|
|
|
my @logins = $csv->get_values('LOGIN"); |
5366
|
|
|
|
|
|
|
|
5367
|
|
|
|
|
|
|
This is equivalent to: |
5368
|
|
|
|
|
|
|
|
5369
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5370
|
|
|
|
|
|
|
my @logins; |
5371
|
|
|
|
|
|
|
push @logins, $csv->get_cell($_, 'LOGIN') for (@allkeys); |
5372
|
|
|
|
|
|
|
|
5373
|
|
|
|
|
|
|
Example bis |
5374
|
|
|
|
|
|
|
|
5375
|
|
|
|
|
|
|
# @badlogins is the list of logins that contain non alphanumeric characters |
5376
|
|
|
|
|
|
|
my @badlogins = Text::AutoCSV->new(in_file => 'logins.csv') |
5377
|
|
|
|
|
|
|
->get_values('LOGIN', sub { m/[^a-z0-9]/ }); |
5378
|
|
|
|
|
|
|
|
5379
|
|
|
|
|
|
|
This is equivalent to: |
5380
|
|
|
|
|
|
|
|
5381
|
|
|
|
|
|
|
# @badlogins is the list of logins that contain non alphanumeric characters |
5382
|
|
|
|
|
|
|
# This method leads to carrying all values of a given field across function calls... |
5383
|
|
|
|
|
|
|
my @badlogins = grep { m/[^a-z0-9]/ } ( |
5384
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'logins.csv')->get_values('LOGIN') |
5385
|
|
|
|
|
|
|
); |
5386
|
|
|
|
|
|
|
|
5387
|
|
|
|
|
|
|
B<IMPORTANT> |
5388
|
|
|
|
|
|
|
|
5389
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5390
|
|
|
|
|
|
|
|
5391
|
|
|
|
|
|
|
=head2 get_recnum |
5392
|
|
|
|
|
|
|
|
5393
|
|
|
|
|
|
|
my $r = $csv->get_recnum(); |
5394
|
|
|
|
|
|
|
|
5395
|
|
|
|
|
|
|
Returns the current record identifier, if a reading is in progress. If no read is in progress, |
5396
|
|
|
|
|
|
|
return undef. |
5397
|
|
|
|
|
|
|
|
5398
|
|
|
|
|
|
|
=head2 in_map |
5399
|
|
|
|
|
|
|
|
5400
|
|
|
|
|
|
|
=head2 read_update_after |
5401
|
|
|
|
|
|
|
|
5402
|
|
|
|
|
|
|
C<read_update_after> is an alias of C<in_map>. |
5403
|
|
|
|
|
|
|
|
5404
|
|
|
|
|
|
|
$csv->in_map($field, $subref); |
5405
|
|
|
|
|
|
|
|
5406
|
|
|
|
|
|
|
After reading a record from input, update C<$field> by calling C<$subref>. The value is put in |
5407
|
|
|
|
|
|
|
C<$_>. Then the field value is set to the return value of C<$subref>. |
5408
|
|
|
|
|
|
|
|
5409
|
|
|
|
|
|
|
This feature is originally meant to manage DateTime fields: the input and output CSVs carry text |
5410
|
|
|
|
|
|
|
content, and in-between, the values dealt with are DateTime objects. |
5411
|
|
|
|
|
|
|
|
5412
|
|
|
|
|
|
|
See L</out_map> for an example. |
5413
|
|
|
|
|
|
|
|
5414
|
|
|
|
|
|
|
=head2 out_map |
5415
|
|
|
|
|
|
|
|
5416
|
|
|
|
|
|
|
=head2 write_update_before |
5417
|
|
|
|
|
|
|
|
5418
|
|
|
|
|
|
|
C<write_update_before> is an alias of C<out_map>. |
5419
|
|
|
|
|
|
|
|
5420
|
|
|
|
|
|
|
$csv->out_map($field, $subref); |
5421
|
|
|
|
|
|
|
|
5422
|
|
|
|
|
|
|
Before writing C<$field> field content into the output file, pass it through C<out_map>. The value |
5423
|
|
|
|
|
|
|
is put in C<$_>. Then the return value of C<$subref> is written in the output. |
5424
|
|
|
|
|
|
|
|
5425
|
|
|
|
|
|
|
Example: |
5426
|
|
|
|
|
|
|
|
5427
|
|
|
|
|
|
|
Suppose you have a CSV file with the convention that a number surrounded by parenthesis is negative. |
5428
|
|
|
|
|
|
|
You can register corresponding L</in_map> and L</out_map> functions. During the processing of data, |
5429
|
|
|
|
|
|
|
the field content will be just a number (positive or negative), while in input and in output, it'll |
5430
|
|
|
|
|
|
|
follow the "negative under parenthesis" convention. |
5431
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
In the below example, we rely on convention above and add a new field converted from the original |
5433
|
|
|
|
|
|
|
one, that follows the same convention. |
5434
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
sub in_updt { |
5436
|
|
|
|
|
|
|
return 0 if !defined($_) or $_ eq ''; |
5437
|
|
|
|
|
|
|
my $i; |
5438
|
|
|
|
|
|
|
return -$i if ($i) = $_ =~ m/^\((.*)\)$/; |
5439
|
|
|
|
|
|
|
$_; |
5440
|
|
|
|
|
|
|
} |
5441
|
|
|
|
|
|
|
sub out_updt { |
5442
|
|
|
|
|
|
|
return '' unless defined($_); |
5443
|
|
|
|
|
|
|
return '(' . (-$_) . ')' if $_ < 0; |
5444
|
|
|
|
|
|
|
$_; |
5445
|
|
|
|
|
|
|
} |
5446
|
|
|
|
|
|
|
sub convert { |
5447
|
|
|
|
|
|
|
return ; |
5448
|
|
|
|
|
|
|
} |
5449
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'trans-euros.csv', out_file => 'trans-devises.csv') |
5450
|
|
|
|
|
|
|
->in_map('EUROS', \&in_updt) |
5451
|
|
|
|
|
|
|
->out_map('EUROS', \&out_updt) |
5452
|
|
|
|
|
|
|
->out_map('DEVISE', \&out_updt) |
5453
|
|
|
|
|
|
|
->field_add_copy('DEVISE', 'EUROS', sub { sprintf("%.2f", $_ * 1.141593); } ) |
5454
|
|
|
|
|
|
|
->write(); |
5455
|
|
|
|
|
|
|
|
5456
|
|
|
|
|
|
|
=head2 search |
5457
|
|
|
|
|
|
|
|
5458
|
|
|
|
|
|
|
my $found_ar = $csv->search($field_name, $value, \%opts); |
5459
|
|
|
|
|
|
|
|
5460
|
|
|
|
|
|
|
Returns an array ref of all records keys where the field C<$field_name> has the value C<$value>. |
5461
|
|
|
|
|
|
|
|
5462
|
|
|
|
|
|
|
C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options. |
5463
|
|
|
|
|
|
|
|
5464
|
|
|
|
|
|
|
B<IMPORTANT> |
5465
|
|
|
|
|
|
|
|
5466
|
|
|
|
|
|
|
An unsuccessful search returns an empty array ref, that is, [ ]. Thus you B<cannot> check for |
5467
|
|
|
|
|
|
|
definedness of C<search> return value to know whether or not the search found something. |
5468
|
|
|
|
|
|
|
|
5469
|
|
|
|
|
|
|
On the other hand, you can always examine the value C<search(...)-E<gt>[0]>, as search is always an |
5470
|
|
|
|
|
|
|
array ref. If the search found nothing, then, C<search(...)-E<gt>[0]> is not defined. |
5471
|
|
|
|
|
|
|
|
5472
|
|
|
|
|
|
|
B<IMPORTANT bis> |
5473
|
|
|
|
|
|
|
|
5474
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5475
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
Example: |
5477
|
|
|
|
|
|
|
|
5478
|
|
|
|
|
|
|
my $linux_os_keys_ar = $csv->search('OS', 'linux'); |
5479
|
|
|
|
|
|
|
|
5480
|
|
|
|
|
|
|
=head2 search_1hr |
5481
|
|
|
|
|
|
|
|
5482
|
|
|
|
|
|
|
my $found_hr = $csv->search_1hr($field_name, $value, \%opts); |
5483
|
|
|
|
|
|
|
|
5484
|
|
|
|
|
|
|
Returns a hash ref of the first record where the field C<$field_name> has the value C<$value>. |
5485
|
|
|
|
|
|
|
|
5486
|
|
|
|
|
|
|
C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options. |
5487
|
|
|
|
|
|
|
|
5488
|
|
|
|
|
|
|
Note the options L</value_if_not_found> and L</value_if_ambiguous> are ignored. If not found, return |
5489
|
|
|
|
|
|
|
undef. If the result is ambiguous (more than one record found) and ignore_ambiguous is set to a |
5490
|
|
|
|
|
|
|
false value, return undef. |
5491
|
|
|
|
|
|
|
|
5492
|
|
|
|
|
|
|
The other options are taken into account as for any search: L</ignore_ambiguous>, L</trim>, |
5493
|
|
|
|
|
|
|
L</case>, L</ignore_empty>. |
5494
|
|
|
|
|
|
|
|
5495
|
|
|
|
|
|
|
B<IMPORTANT> |
5496
|
|
|
|
|
|
|
|
5497
|
|
|
|
|
|
|
As opposed to L</search>, an unsuccessful C<search_1hr> will return C<undef>. |
5498
|
|
|
|
|
|
|
|
5499
|
|
|
|
|
|
|
B<IMPORTANT bis> |
5500
|
|
|
|
|
|
|
|
5501
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5502
|
|
|
|
|
|
|
|
5503
|
|
|
|
|
|
|
Example: |
5504
|
|
|
|
|
|
|
|
5505
|
|
|
|
|
|
|
my $hr = $csv->search_1hr('LOGIN', $login); |
5506
|
|
|
|
|
|
|
my $full_name = $hr->{'FIRSTNAME'} . ' ' . $hr->{'LASTNAME'}; |
5507
|
|
|
|
|
|
|
|
5508
|
|
|
|
|
|
|
=head2 vlookup |
5509
|
|
|
|
|
|
|
|
5510
|
|
|
|
|
|
|
my $val = $csv->vlookup($searched_field, $value, $target_field, \%opts); |
5511
|
|
|
|
|
|
|
|
5512
|
|
|
|
|
|
|
Find the first record where C<$searched_field> contains C<$value> and out of this record, returns |
5513
|
|
|
|
|
|
|
the value of C<$target_field>. |
5514
|
|
|
|
|
|
|
|
5515
|
|
|
|
|
|
|
C<\%opts> is optional. It is a hash of options for C<vlookup>: |
5516
|
|
|
|
|
|
|
|
5517
|
|
|
|
|
|
|
=over 4 |
5518
|
|
|
|
|
|
|
|
5519
|
|
|
|
|
|
|
=item trim |
5520
|
|
|
|
|
|
|
|
5521
|
|
|
|
|
|
|
If true, ignore spaces before and after the values to search. |
5522
|
|
|
|
|
|
|
|
5523
|
|
|
|
|
|
|
If option is not present, use L</search_trim> attribute of object (default value: 1). |
5524
|
|
|
|
|
|
|
|
5525
|
|
|
|
|
|
|
=item case |
5526
|
|
|
|
|
|
|
|
5527
|
|
|
|
|
|
|
If true, do case sensitive searches. |
5528
|
|
|
|
|
|
|
|
5529
|
|
|
|
|
|
|
If option is not present, use L</search_case> attribute of object (default value: 0). |
5530
|
|
|
|
|
|
|
|
5531
|
|
|
|
|
|
|
=item ignore_empty |
5532
|
|
|
|
|
|
|
|
5533
|
|
|
|
|
|
|
If true, ignore empty values in the search. The consequence is that you won't be able to find |
5534
|
|
|
|
|
|
|
empty values by searching it. |
5535
|
|
|
|
|
|
|
|
5536
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_empty> attribute of object (default value: 1). |
5537
|
|
|
|
|
|
|
|
5538
|
|
|
|
|
|
|
=item ignore_accents |
5539
|
|
|
|
|
|
|
|
5540
|
|
|
|
|
|
|
If true, ignore accents in searches. For exampe, if C<ignore_accents> is set, a string like |
5541
|
|
|
|
|
|
|
"élémentaire" will match "elementaire". |
5542
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_accents> attribute of object (default value: 1). |
5544
|
|
|
|
|
|
|
|
5545
|
|
|
|
|
|
|
B<NOTE> |
5546
|
|
|
|
|
|
|
|
5547
|
|
|
|
|
|
|
This option uses the function L</remove_accents> to build its internal hash tables. See |
5548
|
|
|
|
|
|
|
L</remove_accents> help for more details. |
5549
|
|
|
|
|
|
|
|
5550
|
|
|
|
|
|
|
=item value_if_not_found |
5551
|
|
|
|
|
|
|
|
5552
|
|
|
|
|
|
|
Return value if vlookup finds nothing. |
5553
|
|
|
|
|
|
|
|
5554
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object (default value: |
5555
|
|
|
|
|
|
|
undef). |
5556
|
|
|
|
|
|
|
|
5557
|
|
|
|
|
|
|
=item value_if_found |
5558
|
|
|
|
|
|
|
|
5559
|
|
|
|
|
|
|
Return value if vlookup finds something. |
5560
|
|
|
|
|
|
|
|
5561
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_found> attribute of object (default value: none). |
5562
|
|
|
|
|
|
|
|
5563
|
|
|
|
|
|
|
This option is to just check whether a value exists, regardless of the target value found. |
5564
|
|
|
|
|
|
|
|
5565
|
|
|
|
|
|
|
B<NOTE> |
5566
|
|
|
|
|
|
|
|
5567
|
|
|
|
|
|
|
Although the B<$target_field> is ignored when using this option, you must specify it any way. |
5568
|
|
|
|
|
|
|
|
5569
|
|
|
|
|
|
|
=item value_if_ambiguous |
5570
|
|
|
|
|
|
|
|
5571
|
|
|
|
|
|
|
Return value if vlookup find more than one result. Tune it only if ignore_ambiguous is unset. |
5572
|
|
|
|
|
|
|
|
5573
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_ambiguous> attribute of object (default value: |
5574
|
|
|
|
|
|
|
undef). |
5575
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
=item ignore_ambiguous |
5577
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
If true, then if more than one result is found, silently return the first one. |
5579
|
|
|
|
|
|
|
|
5580
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1). |
5581
|
|
|
|
|
|
|
|
5582
|
|
|
|
|
|
|
=back |
5583
|
|
|
|
|
|
|
|
5584
|
|
|
|
|
|
|
B<IMPORTANT> |
5585
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored in-memory. |
5587
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
Example: |
5589
|
|
|
|
|
|
|
|
5590
|
|
|
|
|
|
|
my $name = $csv->vlookup('LOGIN', $id, 'NAME', { value_if_not_found => '<login not found>' }); |
5591
|
|
|
|
|
|
|
|
5592
|
|
|
|
|
|
|
=head2 remove_accents |
5593
|
|
|
|
|
|
|
|
5594
|
|
|
|
|
|
|
my $t = $csv->remove_accents($s); |
5595
|
|
|
|
|
|
|
|
5596
|
|
|
|
|
|
|
Take the string C<$s> as argument and return the string without accents. Uses a Unicode |
5597
|
|
|
|
|
|
|
decomposition followed by removal of every characters that have the Unicode property |
5598
|
|
|
|
|
|
|
C<Nonspacing_Mark>. |
5599
|
|
|
|
|
|
|
|
5600
|
|
|
|
|
|
|
B<NOTE> |
5601
|
|
|
|
|
|
|
|
5602
|
|
|
|
|
|
|
Only accents are removed. It is not a C<whatever-encoding -E<gt> us-ascii> conversion. For example, |
5603
|
|
|
|
|
|
|
the French B<Å> character (o followed by e) or the German B<Ã> (eszett) are kept as is. |
5604
|
|
|
|
|
|
|
|
5605
|
|
|
|
|
|
|
B<NOTE bis> |
5606
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
Tested with some latin1 and latin2 characters. |
5608
|
|
|
|
|
|
|
|
5609
|
|
|
|
|
|
|
B<NOTE ter> |
5610
|
|
|
|
|
|
|
|
5611
|
|
|
|
|
|
|
There is no language-level transformation during accents removal. For example B<Jürgen> is returned |
5612
|
|
|
|
|
|
|
as B<Jurgen>, not B<Juergen>. |
5613
|
|
|
|
|
|
|
|
5614
|
|
|
|
|
|
|
This function is not exported by default. |
5615
|
|
|
|
|
|
|
|
5616
|
|
|
|
|
|
|
Example: |
5617
|
|
|
|
|
|
|
|
5618
|
|
|
|
|
|
|
use Text::AutoCSV qw(remove_accents); |
5619
|
|
|
|
|
|
|
my $s = remove_accents("Français: être élémentaire, Tchèque: služba dům"); |
5620
|
|
|
|
|
|
|
die "This script will never die" if $s ne 'Francais: etre elementaire, Tcheque: sluzba dum'; |
5621
|
|
|
|
|
|
|
|
5622
|
|
|
|
|
|
|
=head1 AUTHOR |
5623
|
|
|
|
|
|
|
|
5624
|
|
|
|
|
|
|
Sébastien Millet <milletseb@laposte.net> |
5625
|
|
|
|
|
|
|
|
5626
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
5627
|
|
|
|
|
|
|
|
5628
|
|
|
|
|
|
|
This software is copyright (c) 2016, 2017 by Sébastien Millet. |
5629
|
|
|
|
|
|
|
|
5630
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
5631
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
5632
|
|
|
|
|
|
|
|
5633
|
|
|
|
|
|
|
=cut |