line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl |
2
|
|
|
|
|
|
|
# ABSTRACT: helper module to automate the use of Text::CSV |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# vim:tw=80 :et |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# Text/AutoCSV.pm |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Written by Sébastien Millet |
10
|
|
|
|
|
|
|
# March, July, August, September 2016 |
11
|
|
|
|
|
|
|
# January - August 2017 |
12
|
|
|
|
|
|
|
# |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
package Text::AutoCSV; |
15
|
|
|
|
|
|
|
$Text::AutoCSV::VERSION = '1.2.0'; |
16
|
20
|
|
|
20
|
|
972116
|
use strict; |
|
20
|
|
|
|
|
51
|
|
|
20
|
|
|
|
|
556
|
|
17
|
17
|
|
|
17
|
|
102
|
use warnings; |
|
17
|
|
|
|
|
43
|
|
|
17
|
|
|
|
|
1175
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $PKG = "Text::AutoCSV"; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
require Exporter; |
22
|
|
|
|
|
|
|
our @ISA = 'Exporter'; |
23
|
|
|
|
|
|
|
our @EXPORT_OK = qw(remove_accents); |
24
|
|
|
|
|
|
|
|
25
|
17
|
|
|
17
|
|
109
|
use Carp; |
|
17
|
|
|
|
|
39
|
|
|
17
|
|
|
|
|
1118
|
|
26
|
17
|
|
|
17
|
|
6036
|
use Params::Validate qw(validate validate_pos :types); |
|
17
|
|
|
|
|
123272
|
|
|
17
|
|
|
|
|
3646
|
|
27
|
17
|
|
|
17
|
|
5779
|
use List::MoreUtils qw(first_index indexes); |
|
17
|
|
|
|
|
118583
|
|
|
17
|
|
|
|
|
167
|
|
28
|
17
|
|
|
17
|
|
11395
|
use Fcntl qw(SEEK_SET); |
|
17
|
|
|
|
|
36
|
|
|
17
|
|
|
|
|
812
|
|
29
|
17
|
|
|
17
|
|
4997
|
use File::BOM; |
|
17
|
|
|
|
|
427647
|
|
|
17
|
|
|
|
|
956
|
|
30
|
17
|
|
|
17
|
|
8115
|
use Text::CSV; |
|
17
|
|
|
|
|
243285
|
|
|
17
|
|
|
|
|
775
|
|
31
|
17
|
|
|
17
|
|
9732
|
use DateTime; |
|
17
|
|
|
|
|
6851560
|
|
|
17
|
|
|
|
|
927
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# DateTime::Format::Strptime 1.70 does not work properly with us. |
34
|
|
|
|
|
|
|
# Actually all version as of 1.63 are fine, except 1.70. |
35
|
17
|
|
|
17
|
|
8007
|
use DateTime::Format::Strptime 1.71; |
|
17
|
|
|
|
|
933433
|
|
|
17
|
|
|
|
|
169
|
|
36
|
17
|
|
|
17
|
|
8184
|
use Class::Struct; |
|
17
|
|
|
|
|
26792
|
|
|
17
|
|
|
|
|
100
|
|
37
|
17
|
|
|
17
|
|
7963
|
use Unicode::Normalize; |
|
17
|
|
|
|
|
27240
|
|
|
17
|
|
|
|
|
1060
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# lock_keys is used to prevent accessing non existing keys |
40
|
|
|
|
|
|
|
# Credits: 3381159 on http://stackoverflow.com |
41
|
|
|
|
|
|
|
# "make perl shout when trying to access undefined hash key" |
42
|
17
|
|
|
17
|
|
5522
|
use Hash::Util qw(lock_keys); |
|
17
|
|
|
|
|
35279
|
|
|
17
|
|
|
|
|
100
|
|
43
|
17
|
|
|
17
|
|
7461
|
use Memoize; |
|
17
|
|
|
|
|
32077
|
|
|
17
|
|
|
|
|
14334
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# FIXME |
46
|
|
|
|
|
|
|
# Not needed in release -> should be always commented unless at dev time |
47
|
|
|
|
|
|
|
#use feature qw(say); |
48
|
|
|
|
|
|
|
#use Data::Dumper; |
49
|
|
|
|
|
|
|
#$Data::Dumper::Sortkeys = 1; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Set to 1 if you wish to turn on debug without touching caller's code |
52
|
|
|
|
|
|
|
our $ALWAYS_DEBUG = 0; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Keep it set to 0 unless you know what you're doing! |
55
|
|
|
|
|
|
|
# Note |
56
|
|
|
|
|
|
|
# Taken into account only if debug is set. |
57
|
|
|
|
|
|
|
my $DEBUG_DATETIME_FORMATS = 0; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# The below is taken into account only if $DEBUG_DATETIME_FORMATS is set. |
60
|
|
|
|
|
|
|
# It becomes really MASSIVE debug output. |
61
|
|
|
|
|
|
|
my $DEBUG_DATETIME_FORMATS_EVEN_MORE = 0; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# |
64
|
|
|
|
|
|
|
# Uncomment to replace carp and croak with cluck and confess, respectively |
65
|
|
|
|
|
|
|
# Also reachable with perl option: |
66
|
|
|
|
|
|
|
# -MCarp=verbose |
67
|
|
|
|
|
|
|
# See 'perldoc Carp'. |
68
|
|
|
|
|
|
|
# |
69
|
|
|
|
|
|
|
#$Carp::Verbose = 1; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# * *************** * |
72
|
|
|
|
|
|
|
# * BEHAVIOR TUNING * |
73
|
|
|
|
|
|
|
# * *************** * |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# * **************************************************** * |
76
|
|
|
|
|
|
|
# * ALL THE VARIABLES BELOW ARE RATHER LOW LEVEL. * |
77
|
|
|
|
|
|
|
# * IF YOU UPDATE IT, IT WILL LIKELY BREAK THE TEST PLAN * |
78
|
|
|
|
|
|
|
# * **************************************************** * |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $DEF_SEARCH_CASE = 0; # Case insensitive search by default |
81
|
|
|
|
|
|
|
my $DEF_SEARCH_TRIM = 1; # Trim values by default |
82
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_ACCENTS = 1; # Ignore accents |
83
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_EMPTY = 1; # Ignore empty strings in searches by default |
84
|
|
|
|
|
|
|
my $DEF_SEARCH_VALUE_IF_NOT_FOUND = undef; # If not found, returned field value is undef |
85
|
|
|
|
|
|
|
my $DEF_SEARCH_VALUE_IF_AMBIGUOUS = undef; # If more than one record found by search (when a |
86
|
|
|
|
|
|
|
# unique value is expected), return undef |
87
|
|
|
|
|
|
|
my $DEF_SEARCH_IGNORE_AMBIGUOUS = 1; # By default, ignore the fact that multiple records are |
88
|
|
|
|
|
|
|
# found by search and return the first record found |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my $DETECT_ENCODING = 1; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $DEFAULT_IN_ENCODING = 'UTF-8,latin1'; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# By default, input encoding detected is used for output. |
95
|
|
|
|
|
|
|
# -> the constant below is used if and only if: |
96
|
|
|
|
|
|
|
# Inbound encoding is unknown |
97
|
|
|
|
|
|
|
# No providing of out_encoding attribute (out_encoding takes precedence when |
98
|
|
|
|
|
|
|
# provided) |
99
|
|
|
|
|
|
|
my $DEFAULT_OUT_ENCODING = 'UTF-8'; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
my $DEFAULT_ESCAPE_CHAR = '\\'; |
102
|
|
|
|
|
|
|
my $DEFAULT_QUOTE_CHAR = '"'; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# |
105
|
|
|
|
|
|
|
# The code that workarounds $FIX_PERLMONKS_823214 (see below) makes sense only |
106
|
|
|
|
|
|
|
# under plain Windows. |
107
|
|
|
|
|
|
|
# |
108
|
|
|
|
|
|
|
# "Plain" Windows? |
109
|
|
|
|
|
|
|
# This code MUST NOT be executed under cygwin because cygwin uses unix line |
110
|
|
|
|
|
|
|
# breaks. This is why we detect /mswin/. Would we detect /win/, we'd catch |
111
|
|
|
|
|
|
|
# cygwin, too, and we don't want that. |
112
|
|
|
|
|
|
|
# |
113
|
|
|
|
|
|
|
my $OS_IS_PLAIN_WINDOWS = !!( $^O =~ /mswin/i ); |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# |
116
|
|
|
|
|
|
|
# Shall we fix the issue reported as #823214 in PerlMonks? See |
117
|
|
|
|
|
|
|
# http://www.perlmonks.org/?node_id=823214 |
118
|
|
|
|
|
|
|
# |
119
|
|
|
|
|
|
|
# In brief (in case the link above would be broken one day): |
120
|
|
|
|
|
|
|
# Under Windows, output mode set to UTF-16LE produces line breaks made of |
121
|
|
|
|
|
|
|
# octets "0d 0a 00", whereas it should be "0d 00 0a 00". |
122
|
|
|
|
|
|
|
# |
123
|
|
|
|
|
|
|
# The code also fixes UTF-16BE (but it was not tested). |
124
|
|
|
|
|
|
|
# |
125
|
|
|
|
|
|
|
my $FIX_PERLMONKS_823214 = 1; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# * **** * |
128
|
|
|
|
|
|
|
# * CODE * |
129
|
|
|
|
|
|
|
# * **** * |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub ERR_UNKNOWN_FIELD() { 0 } |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Store meta-data about each column |
134
|
|
|
|
|
|
|
struct ColData => { |
135
|
|
|
|
|
|
|
field_name => '$', |
136
|
|
|
|
|
|
|
header_text => '$', |
137
|
|
|
|
|
|
|
description => '$', |
138
|
|
|
|
|
|
|
dt_format => '$', |
139
|
|
|
|
|
|
|
dt_locale => '$', |
140
|
|
|
|
|
|
|
multiline => '$' |
141
|
|
|
|
|
|
|
}; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# |
144
|
|
|
|
|
|
|
# Enumeration of ef_type member below |
145
|
|
|
|
|
|
|
# Alternative: |
146
|
|
|
|
|
|
|
# use enum (...) |
147
|
|
|
|
|
|
|
# |
148
|
|
|
|
|
|
|
# But it is not also by default on my distro and installing a package for 3 |
149
|
|
|
|
|
|
|
# constants, I find it a bit overkill! |
150
|
|
|
|
|
|
|
# |
151
|
|
|
|
|
|
|
my ( $EF_LINK, $EF_FUNC, $EF_COPY ) = 0 .. 2; |
152
|
|
|
|
|
|
|
struct ExtraField => { |
153
|
|
|
|
|
|
|
ef_type => '$', |
154
|
|
|
|
|
|
|
self_name => '$', |
155
|
|
|
|
|
|
|
description => '$', |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
check_field_existence => '$', |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# For when ef_type is set to $EF_LINK |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
link_self_search => '$', |
162
|
|
|
|
|
|
|
link_remote_obj => '$', |
163
|
|
|
|
|
|
|
link_remote_search => '$', |
164
|
|
|
|
|
|
|
link_remote_read => '$', |
165
|
|
|
|
|
|
|
link_vlookup_opts => '%', |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# For when ef_type is set to $EF_FUNC |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
func_sub => '$', |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# For when ef_type is set to $EF_COPY |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
copy_source => '$', |
174
|
|
|
|
|
|
|
copy_sub => '$' |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
}; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
my $SEARCH_VALIDATE_OPTIONS = { |
179
|
|
|
|
|
|
|
value_if_not_found => { type => UNDEF | SCALAR, optional => 1 }, |
180
|
|
|
|
|
|
|
value_if_found => { type => UNDEF | SCALAR, optional => 1 }, |
181
|
|
|
|
|
|
|
value_if_ambiguous => { type => UNDEF | SCALAR, optional => 1 }, |
182
|
|
|
|
|
|
|
ignore_ambiguous => { type => BOOLEAN, optional => 1 }, |
183
|
|
|
|
|
|
|
case => { type => BOOLEAN, optional => 1 }, |
184
|
|
|
|
|
|
|
trim => { type => BOOLEAN, optional => 1 }, |
185
|
|
|
|
|
|
|
ignore_empty => { type => BOOLEAN, optional => 1 }, |
186
|
|
|
|
|
|
|
ignore_accents => { type => BOOLEAN, optional => 1 } |
187
|
|
|
|
|
|
|
}; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub _is_utf8 { |
190
|
424
|
|
|
424
|
|
758
|
my $e = shift; |
191
|
|
|
|
|
|
|
|
192
|
424
|
100
|
|
|
|
2836
|
return 1 if $e =~ m/^(utf-?8|ucs-?8)/i; |
193
|
32
|
|
|
|
|
67
|
return 0; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# To replace // in old perls: return the first non-undef value in provided list |
197
|
|
|
|
|
|
|
sub _get_def { |
198
|
12079
|
|
|
12079
|
|
21704
|
for (@_) { |
199
|
18709
|
100
|
|
|
|
44202
|
return $_ if defined($_); |
200
|
|
|
|
|
|
|
} |
201
|
895
|
|
|
|
|
1739
|
return; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub _print { |
205
|
15
|
|
|
15
|
|
19
|
my $self = shift; |
206
|
15
|
|
|
|
|
20
|
my $t = shift; |
207
|
|
|
|
|
|
|
|
208
|
15
|
|
|
|
|
18
|
my $infoh = $self->{infoh}; |
209
|
15
|
50
|
|
|
|
31
|
return if ref $infoh ne 'GLOB'; |
210
|
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
0
|
print( $infoh $t ); |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub _printf { |
215
|
17
|
|
|
17
|
|
22
|
my $self = shift; |
216
|
|
|
|
|
|
|
|
217
|
17
|
|
|
|
|
23
|
my $infoh = $self->{infoh}; |
218
|
17
|
50
|
|
|
|
45
|
return if ref $infoh ne 'GLOB'; |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
0
|
printf( $infoh @_ ); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub _print_warning { |
224
|
67
|
|
|
67
|
|
104
|
my $self = shift; |
225
|
67
|
|
|
|
|
109
|
my $warning_message = shift; |
226
|
67
|
|
|
|
|
94
|
my $dont_wrap = shift; |
227
|
|
|
|
|
|
|
|
228
|
67
|
100
|
|
|
|
168
|
my $msg = |
229
|
|
|
|
|
|
|
( $dont_wrap ? $warning_message : "$PKG: warning: $warning_message" ); |
230
|
67
|
100
|
|
|
|
4487
|
carp $msg unless $self->{quiet}; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub _close_inh { |
234
|
316
|
|
|
316
|
|
516
|
my $self = shift; |
235
|
|
|
|
|
|
|
|
236
|
316
|
100
|
|
|
|
3003
|
close $self->{_inh} if $self->{_close_inh_when_finished}; |
237
|
316
|
|
|
|
|
1194
|
$self->{_inh} = undef; |
238
|
316
|
|
|
|
|
577
|
$self->{_close_inh_when_finished} = undef; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub _close_outh { |
242
|
127
|
|
|
127
|
|
193
|
my $self = shift; |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
close $self->{outh} |
245
|
|
|
|
|
|
|
if defined( $self->{outh} ) |
246
|
127
|
50
|
66
|
|
|
4759
|
and $self->{_close_outh_when_finished}; |
247
|
127
|
|
|
|
|
359
|
$self->{outh} = undef; |
248
|
127
|
|
|
|
|
246
|
$self->{_close_outh_when_finished} = undef; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub _print_error { |
252
|
81
|
|
|
81
|
|
340
|
my ( $self, $error_message, $dont_stop, $err_code, $err_extra ) = @_; |
253
|
|
|
|
|
|
|
|
254
|
81
|
|
|
|
|
196
|
my $msg = "$PKG: error: $error_message"; |
255
|
|
|
|
|
|
|
|
256
|
81
|
100
|
100
|
|
|
263
|
if ( defined($err_code) |
|
|
|
100
|
|
|
|
|
257
|
|
|
|
|
|
|
and ( !$self->{quiet} ) |
258
|
|
|
|
|
|
|
and $self->{croak_if_error} ) |
259
|
|
|
|
|
|
|
{ |
260
|
5
|
50
|
|
|
|
15
|
if ( $err_code == ERR_UNKNOWN_FIELD ) { |
261
|
5
|
|
|
|
|
9
|
my %f = %{$err_extra}; |
|
5
|
|
|
|
|
23
|
|
262
|
5
|
|
|
|
|
10
|
my @cols; |
263
|
5
|
|
|
|
|
16
|
for my $n ( keys %f ) { |
264
|
15
|
|
|
|
|
31
|
$cols[ $f{$n} ] = $n; |
265
|
|
|
|
|
|
|
} |
266
|
5
|
|
|
|
|
17
|
$self->_print( $self->get_in_file_disp() |
267
|
|
|
|
|
|
|
. " column - field name correspondance:\n" ); |
268
|
5
|
|
|
|
|
14
|
$self->_print("COL # FIELD\n"); |
269
|
5
|
|
|
|
|
13
|
$self->_print("----- -----\n"); |
270
|
5
|
|
|
|
|
13
|
for my $i ( 0 .. $#cols ) { |
271
|
17
|
100
|
|
|
|
48
|
$self->_printf( "%05d %s\n", $i, |
272
|
|
|
|
|
|
|
( defined( $cols[$i] ) ? $cols[$i] : '' ) ); |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
else { |
276
|
0
|
|
|
|
|
0
|
confess "Unknown error code: '$err_code'\n"; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
81
|
100
|
100
|
|
|
289
|
if ( $self->{croak_if_error} and ( !$dont_stop ) ) { |
281
|
30
|
|
|
|
|
98
|
$self->_close_read(1); |
282
|
30
|
|
|
|
|
92
|
$self->_close_inh(); |
283
|
30
|
|
|
|
|
87
|
$self->_close_outh(); |
284
|
30
|
|
|
|
|
113
|
$self->_status_reset(1); |
285
|
30
|
|
|
|
|
4480
|
croak $msg; |
286
|
|
|
|
|
|
|
} |
287
|
51
|
|
|
|
|
129
|
$self->_print_warning( $msg, 1 ); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# |
291
|
|
|
|
|
|
|
# Return the string passed in argument with all accents removed from characters. |
292
|
|
|
|
|
|
|
# Do it in a rather general and reliable way, not tied to latin1. |
293
|
|
|
|
|
|
|
# Tested on latin1 and latin2 character sets. |
294
|
|
|
|
|
|
|
# |
295
|
|
|
|
|
|
|
# Credits: |
296
|
|
|
|
|
|
|
# http://stackoverflow.com/questions/17561839/remove-accents-from-accented-characters |
297
|
|
|
|
|
|
|
# |
298
|
|
|
|
|
|
|
sub remove_accents { |
299
|
3163
|
|
|
3163
|
1
|
29098
|
validate_pos( @_, { type => SCALAR } ); |
300
|
|
|
|
|
|
|
|
301
|
3163
|
|
|
|
|
9118
|
my $s = $_[0]; |
302
|
3163
|
|
|
|
|
12216
|
my $r = NFKD($s); |
303
|
3163
|
|
|
|
|
7912
|
$r =~ s/\p{Nonspacing_Mark}//g; |
304
|
3163
|
|
|
|
|
7972
|
return $r; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _detect_csv_sep { |
308
|
262
|
|
|
262
|
|
452
|
my $ST_OUTSIDE = 0; |
309
|
262
|
|
|
|
|
423
|
my $ST_INSIDE = 1; |
310
|
|
|
|
|
|
|
|
311
|
262
|
|
|
|
|
634
|
my ( $self, $escape_char, $quote_char, $sep ) = @_; |
312
|
|
|
|
|
|
|
|
313
|
262
|
|
|
|
|
465
|
my $_debugh = $self->{_debugh}; |
314
|
262
|
|
|
|
|
391
|
my $inh = $self->{_inh}; |
315
|
262
|
|
|
|
|
434
|
my $_debug = $self->{_debug}; |
316
|
|
|
|
|
|
|
|
317
|
262
|
|
|
|
|
409
|
delete $self->{_inh_header}; |
318
|
|
|
|
|
|
|
|
319
|
262
|
100
|
|
|
|
647
|
$escape_char = $DEFAULT_ESCAPE_CHAR unless defined($escape_char); |
320
|
|
|
|
|
|
|
|
321
|
262
|
50
|
|
|
|
636
|
do { |
322
|
0
|
|
|
|
|
0
|
$self->_print_error( |
323
|
|
|
|
|
|
|
"illegal \$escape_char: '$escape_char' (length >= 2)"); |
324
|
0
|
|
|
|
|
0
|
return 0; |
325
|
|
|
|
|
|
|
} if length($escape_char) >= 2; |
326
|
|
|
|
|
|
|
|
327
|
262
|
50
|
|
|
|
569
|
do { |
328
|
0
|
|
|
|
|
0
|
$self->_print_error( |
329
|
|
|
|
|
|
|
"$PKG: error: illegal \$quote_char '$quote_char' (length >= 2)"); |
330
|
0
|
|
|
|
|
0
|
return 0; |
331
|
|
|
|
|
|
|
} if length($quote_char) >= 2; |
332
|
|
|
|
|
|
|
|
333
|
262
|
50
|
|
|
|
575
|
$escape_char = '--' if $escape_char eq ''; |
334
|
262
|
50
|
|
|
|
524
|
$quote_char = '--' if $quote_char eq ''; |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
# FIXME (?) |
337
|
|
|
|
|
|
|
# Avoid inlined magic values for separator auto-detection. |
338
|
|
|
|
|
|
|
# Issue is, as you can see below, the behavior is also hard-coded and not |
339
|
|
|
|
|
|
|
# straightforward to render 'tunable' ("," and ";" take precedence over "\t"). |
340
|
262
|
|
|
|
|
877
|
my %Seps = ( |
341
|
|
|
|
|
|
|
";" => 0, |
342
|
|
|
|
|
|
|
"," => 0, |
343
|
|
|
|
|
|
|
"\t" => 0 |
344
|
|
|
|
|
|
|
); |
345
|
|
|
|
|
|
|
|
346
|
262
|
|
|
|
|
2958
|
my $h = <$inh>; |
347
|
262
|
50
|
|
|
|
2577
|
if ( $self->{inh_is_stdin} ) { |
348
|
0
|
|
|
|
|
0
|
$self->{_inh_header} = $h; |
349
|
0
|
0
|
|
|
|
0
|
print( $_debugh "Input is STDIN => saving header line to re-read it " |
350
|
|
|
|
|
|
|
. "later (in-memory)\n" ) |
351
|
|
|
|
|
|
|
if $_debug; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
else { |
354
|
262
|
|
|
|
|
1760
|
seek $inh, 0, SEEK_SET; |
355
|
262
|
50
|
|
|
|
755
|
print( $_debugh "Input is not STDIN => using seek function to rewind " |
356
|
|
|
|
|
|
|
. "read head after header line reading\n" ) |
357
|
|
|
|
|
|
|
if $_debug; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
262
|
|
|
|
|
593
|
chomp $h; |
361
|
262
|
|
|
|
|
409
|
my $status = $ST_OUTSIDE; |
362
|
262
|
|
|
|
|
600
|
my $l = length($h); |
363
|
262
|
|
|
|
|
406
|
my $c = 0; |
364
|
262
|
|
|
|
|
636
|
while ( $c < $l ) { |
365
|
4808
|
|
|
|
|
6459
|
my $ch = substr( $h, $c, 1 ); |
366
|
4808
|
|
|
|
|
5733
|
my $chnext = ''; |
367
|
4808
|
100
|
|
|
|
8310
|
$chnext = substr( $h, $c + 1, 1 ) if ( $c < $l - 1 ); |
368
|
4808
|
100
|
|
|
|
8025
|
if ( $status == $ST_INSIDE ) { |
|
|
50
|
|
|
|
|
|
369
|
1521
|
50
|
66
|
|
|
2962
|
if ( $ch eq $escape_char and $chnext eq $quote_char ) { |
|
|
100
|
|
|
|
|
|
370
|
0
|
|
|
|
|
0
|
$c += 2; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
elsif ( $ch eq $quote_char ) { |
373
|
197
|
|
|
|
|
230
|
$status = $ST_OUTSIDE; |
374
|
197
|
|
|
|
|
331
|
$c++; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
else { |
377
|
1324
|
|
|
|
|
2043
|
$c++; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
elsif ( $status == $ST_OUTSIDE ) { |
381
|
3287
|
50
|
33
|
|
|
7523
|
if ( |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
382
|
|
|
|
|
|
|
$ch eq $escape_char |
383
|
|
|
|
|
|
|
and ( $chnext eq $quote_char |
384
|
|
|
|
|
|
|
or exists $Seps{$chnext} ) |
385
|
|
|
|
|
|
|
) |
386
|
|
|
|
|
|
|
{ |
387
|
0
|
|
|
|
|
0
|
$c += 2; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
elsif ( exists $Seps{$ch} ) { |
390
|
946
|
|
|
|
|
1181
|
$Seps{$ch}++; |
391
|
946
|
|
|
|
|
1485
|
$c++; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
elsif ( $ch eq $quote_char ) { |
394
|
197
|
|
|
|
|
538
|
$status = $ST_INSIDE; |
395
|
197
|
|
|
|
|
318
|
$c++; |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
else { |
398
|
2144
|
|
|
|
|
3484
|
$c++; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
262
|
100
|
100
|
|
|
1383
|
if ( $Seps{";"} == 0 and $Seps{","} >= 1 ) { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
404
|
147
|
|
|
|
|
277
|
$$sep = ","; |
405
|
147
|
|
|
|
|
620
|
return 1; |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
elsif ( $Seps{","} == 0 and $Seps{";"} >= 1 ) { |
408
|
106
|
|
|
|
|
222
|
$$sep = ";"; |
409
|
106
|
|
|
|
|
492
|
return 1; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
elsif ( $Seps{","} == 0 and $Seps{";"} == 0 and $Seps{"\t"} >= 1 ) { |
412
|
0
|
|
|
|
|
0
|
$$sep = "\t"; |
413
|
0
|
|
|
|
|
0
|
return 1; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
else { |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# Check the case where there is one unique column, in which case, |
418
|
|
|
|
|
|
|
# assume comma separator. |
419
|
9
|
|
|
|
|
23
|
my $h_no_accnt = remove_accents($h); |
420
|
9
|
100
|
|
|
|
58
|
if ( $h_no_accnt =~ m/^[[:alnum:]_]+$/i ) { |
421
|
3
|
|
|
|
|
7
|
$$sep = ","; |
422
|
3
|
|
|
|
|
13
|
return 1; |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
6
|
|
|
|
|
11
|
$$sep = ""; |
426
|
6
|
50
|
|
|
|
12
|
if ($_debug) { |
427
|
0
|
|
|
|
|
0
|
for my $k ( keys %Seps ) { |
428
|
0
|
|
|
|
|
0
|
print( $_debugh "\$Seps{'$k'} = $Seps{$k}\n" ); |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
} |
431
|
6
|
|
|
|
|
23
|
return 0; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub _reopen_input { |
436
|
652
|
|
|
652
|
|
948
|
my $self = shift; |
437
|
|
|
|
|
|
|
|
438
|
652
|
|
|
|
|
985
|
my $in_file = $self->{in_file}; |
439
|
|
|
|
|
|
|
|
440
|
652
|
|
|
|
|
805
|
my $inh; |
441
|
652
|
50
|
|
|
|
12642
|
if ( !open( $inh, "<", $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen) |
442
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$in_file': $!"); |
443
|
0
|
|
|
|
|
0
|
return; |
444
|
|
|
|
|
|
|
} |
445
|
652
|
50
|
|
|
|
1833
|
if ( !$self->{_leave_encoding_alone} ) { |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
confess "Oups! _inh_encoding_string undef?" |
448
|
652
|
50
|
|
|
|
1310
|
unless defined( $self->{_inh_encoding_string} ); |
449
|
|
|
|
|
|
|
|
450
|
652
|
|
|
|
|
4100
|
binmode $inh, $self->{_inh_encoding_string}; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
652
|
|
|
|
|
29058
|
return $inh; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
# Abstraction layer, not useful Today, could bring added value when looking into |
457
|
|
|
|
|
|
|
# Text::CSV I/O |
458
|
|
|
|
|
|
|
sub _mygetline { |
459
|
8399
|
|
|
8399
|
|
14351
|
my ( $csvobj, $fh ) = @_; |
460
|
|
|
|
|
|
|
|
461
|
8399
|
|
|
|
|
172425
|
return $csvobj->getline($fh); |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
sub _detect_meta { |
465
|
456
|
|
|
456
|
|
978
|
my ( $self, $quote_char, $sep_char ) = @_; |
466
|
|
|
|
|
|
|
|
467
|
456
|
|
|
|
|
765
|
my $in_file = $self->{in_file}; |
468
|
456
|
|
|
|
|
682
|
my $_debug = $self->{_debug}; |
469
|
456
|
|
|
|
|
702
|
my $_debugh = $self->{_debugh}; |
470
|
|
|
|
|
|
|
|
471
|
456
|
100
|
|
|
|
955
|
return if $self->{_int_one_pass}; |
472
|
436
|
100
|
|
|
|
986
|
return if $self->{_detect_meta_done}; # Sans jeu de mot... |
473
|
|
|
|
|
|
|
|
474
|
300
|
100
|
|
|
|
682
|
if ( !defined( $self->{escape_char} ) ) { |
475
|
298
|
|
|
|
|
727
|
$self->_register_pass("detect escape character"); |
476
|
|
|
|
|
|
|
|
477
|
298
|
|
|
|
|
459
|
my $flag = 0; |
478
|
298
|
|
|
|
|
642
|
my $inh = $self->_reopen_input(); |
479
|
298
|
50
|
|
|
|
756
|
if ( defined($inh) ) { |
480
|
298
|
|
|
|
|
3897
|
while ( my $l = <$inh> ) { |
481
|
3899
|
|
|
|
|
10878
|
chomp $l; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# Very heuristic criteria... |
484
|
|
|
|
|
|
|
# Tant pis. |
485
|
|
|
|
|
|
|
# $flag = 1 if $l =~ m/(?<!$sep_char)$quote_char$quote_char(?!$sep_char)/; |
486
|
|
|
|
|
|
|
# $flag = 1 if $l =~ m/(\\$quote_char|\\\\)/; |
487
|
3899
|
100
|
|
|
|
14272
|
$flag = 1 if $l =~ m/(\\$quote_char)/; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
} |
490
|
298
|
|
|
|
|
2040
|
close $inh; |
491
|
|
|
|
|
|
|
} |
492
|
298
|
100
|
|
|
|
1085
|
$self->{escape_char} = ( $flag ? '\\' : '"' ); |
493
|
298
|
50
|
|
|
|
1006
|
print( $_debugh " detected escape_char: '$self->{escape_char}'\n" ) |
494
|
|
|
|
|
|
|
if $_debug; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
300
|
50
|
|
|
|
647
|
print( $_debugh " using escape_char: '$self->{escape_char}' " |
498
|
|
|
|
|
|
|
. "to further examine input (is_always_quoted, multiline)\n" ) |
499
|
|
|
|
|
|
|
if $_debug; |
500
|
|
|
|
|
|
|
|
501
|
300
|
|
|
|
|
437
|
my $is_always_quoted = 0; |
502
|
300
|
|
|
|
|
724
|
my $inh = $self->_reopen_input(); |
503
|
300
|
|
|
|
|
501
|
my @multiline; |
504
|
300
|
50
|
|
|
|
677
|
if ( defined($inh) ) { |
505
|
300
|
|
|
|
|
758
|
$self->_register_pass("detect is_always_quoted and multiline"); |
506
|
|
|
|
|
|
|
my $csv = Text::CSV->new( |
507
|
|
|
|
|
|
|
{ |
508
|
|
|
|
|
|
|
sep_char => $sep_char, |
509
|
|
|
|
|
|
|
allow_whitespace => 1, |
510
|
|
|
|
|
|
|
binary => 1, |
511
|
|
|
|
|
|
|
auto_diag => 0, |
512
|
|
|
|
|
|
|
quote_char => $quote_char, |
513
|
|
|
|
|
|
|
escape_char => $self->{escape_char}, |
514
|
300
|
|
|
|
|
3169
|
keep_meta_info => 1, |
515
|
|
|
|
|
|
|
allow_loose_escapes => 1 |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
); |
518
|
300
|
|
|
|
|
55576
|
my $nb_rows = 0; |
519
|
300
|
|
|
|
|
494
|
$is_always_quoted = 1; |
520
|
300
|
|
|
|
|
686
|
while ( my $ar = _mygetline( $csv, $inh ) ) { |
521
|
3897
|
|
|
|
|
110314
|
$nb_rows++; |
522
|
|
|
|
|
|
|
|
523
|
3897
|
|
|
|
|
4637
|
my @aa = @{$ar}; |
|
3897
|
|
|
|
|
9471
|
|
524
|
3897
|
|
|
|
|
5790
|
my $e = $#aa; |
525
|
3897
|
|
|
|
|
6876
|
for my $i ( 0 .. $e ) { |
526
|
28577
|
100
|
|
|
|
47046
|
$is_always_quoted = 0 unless $csv->is_quoted($i); |
527
|
28577
|
100
|
|
|
|
225458
|
$multiline[$i] = 1 if $aa[$i] =~ m/\n/; |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
} |
530
|
300
|
|
|
|
|
8749
|
$self->{_nb_rows} = $nb_rows; |
531
|
300
|
|
|
|
|
3349
|
close $inh; |
532
|
|
|
|
|
|
|
} |
533
|
300
|
|
|
|
|
1033
|
$self->{_multiline} = [@multiline]; |
534
|
|
|
|
|
|
|
|
535
|
300
|
50
|
|
|
|
713
|
print( $_debugh " is_always_quoted: $is_always_quoted\n" ) if $_debug; |
536
|
300
|
|
|
|
|
630
|
$self->{_is_always_quoted} = $is_always_quoted; |
537
|
|
|
|
|
|
|
|
538
|
300
|
|
|
|
|
1357
|
$self->{_detect_meta_done} = 1; |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
sub _register_pass { |
542
|
1269
|
|
|
1269
|
|
2350
|
my ( $self, $pass_name ) = @_; |
543
|
1269
|
|
|
|
|
1967
|
my $_debug = $self->{_debug}; |
544
|
1269
|
|
|
|
|
2351
|
my $_debugh = $self->{_debugh}; |
545
|
|
|
|
|
|
|
|
546
|
1269
|
|
|
|
|
2079
|
$self->{_pass_count}++; |
547
|
|
|
|
|
|
|
|
548
|
1269
|
50
|
|
|
|
3308
|
return unless $_debug; |
549
|
|
|
|
|
|
|
|
550
|
0
|
|
|
|
|
0
|
print( $_debugh "Pass #" . $self->{_pass_count} . " ($pass_name) done\n" ); |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
sub _update_in_mem_record_count { |
554
|
510
|
|
|
510
|
|
1113
|
my ( $self, $nonexistent_arg ) = @_; |
555
|
510
|
|
|
|
|
842
|
my $_debug = $self->{_debug}; |
556
|
510
|
|
|
|
|
811
|
my $_debugh = $self->{_debugh}; |
557
|
|
|
|
|
|
|
|
558
|
510
|
50
|
|
|
|
1116
|
confess "Hey! what is this second argument?" if defined($nonexistent_arg); |
559
|
|
|
|
|
|
|
|
560
|
510
|
|
|
|
|
711
|
my $new_count = $#{ $self->{_flat} } + 1; |
|
510
|
|
|
|
|
1096
|
|
561
|
|
|
|
|
|
|
|
562
|
510
|
|
|
|
|
787
|
my $updated_max = 0; |
563
|
510
|
100
|
|
|
|
1334
|
if ( $new_count > $self->get_max_in_mem_record_count() ) { |
564
|
155
|
|
|
|
|
450
|
$self->_set_max_in_mem_record_count($new_count); |
565
|
155
|
|
|
|
|
252
|
$updated_max = 1; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
510
|
|
|
|
|
1276
|
$self->{_in_mem_record_count} = $new_count; |
569
|
510
|
50
|
|
|
|
1232
|
if ($_debug) { |
570
|
0
|
|
|
|
|
0
|
print( $_debugh "_in_mem_record_count updated, set to $new_count" ); |
571
|
0
|
0
|
|
|
|
0
|
print( $_debugh " (also updated max)" ) if $updated_max; |
572
|
0
|
|
|
|
|
0
|
print( $_debugh "\n" ); |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
sub _detect_inh_encoding { |
577
|
320
|
|
|
320
|
|
1262
|
my ( $self, $enc, $via, $in_file, $detect_enc ) = @_; |
578
|
320
|
|
|
|
|
685
|
my $_debug = $self->{_debug}; |
579
|
320
|
|
|
|
|
538
|
my $_debugh = $self->{_debugh}; |
580
|
|
|
|
|
|
|
|
581
|
320
|
100
|
66
|
|
|
1074
|
$enc = $DEFAULT_IN_ENCODING if ( !defined $enc ) or $enc eq ''; |
582
|
|
|
|
|
|
|
|
583
|
320
|
|
|
|
|
1995
|
my @encodings = split( /\s*,\s*/, $enc ); |
584
|
|
|
|
|
|
|
|
585
|
320
|
50
|
|
|
|
916
|
confess "Oups! No encoding to try?" if $#encodings < 0; |
586
|
|
|
|
|
|
|
|
587
|
320
|
50
|
|
|
|
716
|
print( $_debugh "[ST] _detect_inh_encoding(): start\n" ) if $_debug; |
588
|
|
|
|
|
|
|
|
589
|
320
|
|
|
|
|
494
|
my $wrn = 0; |
590
|
320
|
|
|
|
|
724
|
my $m; |
591
|
|
|
|
|
|
|
my $m0; |
592
|
320
|
|
|
|
|
0
|
my $ee; |
593
|
320
|
|
|
|
|
626
|
for my $e (@encodings) { |
594
|
332
|
|
|
|
|
516
|
$ee = $e; |
595
|
332
|
|
|
|
|
696
|
my $viadef = _get_def( $via, '' ); |
596
|
332
|
|
|
|
|
931
|
$m = ":encoding($e)$viadef"; |
597
|
332
|
100
|
|
|
|
774
|
$m0 = $m unless defined($m0); |
598
|
|
|
|
|
|
|
|
599
|
332
|
100
|
|
|
|
705
|
last unless $detect_enc; |
600
|
|
|
|
|
|
|
|
601
|
320
|
50
|
33
|
|
|
1252
|
confess "Oups! in_file not defined?" |
602
|
|
|
|
|
|
|
if ( !defined $in_file ) |
603
|
|
|
|
|
|
|
or $in_file eq ''; |
604
|
|
|
|
|
|
|
|
605
|
320
|
50
|
|
|
|
621
|
print( $_debugh " Checking encoding '$e' / '$m'\n" ) if $_debug; |
606
|
320
|
|
|
|
|
498
|
$wrn = 0; |
607
|
|
|
|
|
|
|
|
608
|
320
|
|
|
|
|
1093
|
$self->_register_pass("check $e encoding"); |
609
|
|
|
|
|
|
|
|
610
|
320
|
|
|
|
|
493
|
my $utf8_bom = 0; |
611
|
320
|
100
|
|
|
|
695
|
if ( _is_utf8($e) ) { |
612
|
296
|
50
|
|
|
|
6370
|
if ( open my $fh, '<:raw', $in_file ) { |
613
|
296
|
|
|
|
|
588
|
my $bom; |
614
|
296
|
|
|
|
|
4148
|
read $fh, $bom, 3; |
615
|
296
|
100
|
66
|
|
|
1476
|
if ( length($bom) == 3 and $bom eq "\xef\xbb\xbf" ) { |
616
|
12
|
100
|
|
|
|
29
|
if ( !defined($via) ) { |
617
|
10
|
|
|
|
|
25
|
$m .= ":via(File::BOM)"; |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
} |
620
|
296
|
|
|
|
|
2227
|
close $fh; |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
320
|
|
|
|
|
602
|
my $inh; |
625
|
320
|
50
|
|
|
|
4609
|
if ( !open( $inh, "<", $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen) |
626
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$in_file': $!"); |
627
|
0
|
|
|
|
|
0
|
return ( $encodings[0], $m0 ); |
628
|
|
|
|
|
|
|
} |
629
|
16
|
|
|
16
|
|
109
|
binmode $inh, $m; |
|
16
|
|
|
|
|
28
|
|
|
16
|
|
|
|
|
106
|
|
|
320
|
|
|
|
|
3265
|
|
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
# TURN OFF WARNINGS OUTPUT |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
{ |
634
|
320
|
|
|
|
|
35455
|
local $SIG{__WARN__} = sub { |
635
|
51
|
|
|
51
|
|
545
|
$wrn++; |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
# Uncomment only for debug! |
638
|
|
|
|
|
|
|
# Otherwise you'll get quite a good deal of output at each execution :-) |
639
|
|
|
|
|
|
|
# print(STDERR @_); |
640
|
320
|
|
|
|
|
2114
|
}; |
641
|
320
|
|
|
|
|
4069
|
while (<$inh>) { } |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# WARNINGS ARE BACK ON |
645
|
|
|
|
|
|
|
|
646
|
320
|
|
|
|
|
11980
|
close $inh; |
647
|
320
|
50
|
|
|
|
949
|
print( $_debugh " '$m' counts $wrn warning(s)\n" ) if $_debug; |
648
|
|
|
|
|
|
|
|
649
|
320
|
100
|
|
|
|
1274
|
last if $wrn == 0; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
320
|
50
|
|
|
|
789
|
if ( $wrn >= 1 ) { |
653
|
0
|
|
|
|
|
0
|
$self->_print_warning( |
654
|
|
|
|
|
|
|
"encoding warnings encountered during initial check, " |
655
|
|
|
|
|
|
|
. "using '$encodings[0]'" ); |
656
|
0
|
|
|
|
|
0
|
return ( $encodings[0], $m0 ); |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
|
659
|
320
|
50
|
|
|
|
689
|
confess "Oups! undef encoding string?" unless defined($m); |
660
|
|
|
|
|
|
|
|
661
|
320
|
50
|
|
|
|
622
|
print( $_debugh " Detected encoding string '$ee' / '$m'\n" ) if $_debug; |
662
|
320
|
|
|
|
|
1330
|
return ( $ee, $m ); |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
# |
666
|
|
|
|
|
|
|
# Each of these functions brings status to the next value (current status + 1). |
667
|
|
|
|
|
|
|
# Each of these functions returns 0 if an error occured, 1 if all good |
668
|
|
|
|
|
|
|
# |
669
|
|
|
|
|
|
|
my @status_forward_functions = ( |
670
|
|
|
|
|
|
|
"_S1_init_input", # To go from S0 to S1 |
671
|
|
|
|
|
|
|
"_S2_init_fields_from_header", # To go form S1 to S2 |
672
|
|
|
|
|
|
|
"_S3_init_fields_extra", # To go from S2 to S3 |
673
|
|
|
|
|
|
|
"_S4_read_all_in_mem", # To go from S3 to S4 |
674
|
|
|
|
|
|
|
); |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
sub _status_reset { |
677
|
494
|
|
|
494
|
|
822
|
my $self = shift; |
678
|
|
|
|
|
|
|
|
679
|
494
|
|
|
|
|
3173
|
validate_pos( @_, { type => SCALAR, optional => 1 } ); |
680
|
494
|
|
|
|
|
1704
|
my $called_from_print_error = _get_def( $_[0], 0 ); |
681
|
|
|
|
|
|
|
|
682
|
494
|
100
|
100
|
|
|
1745
|
if ( defined( $self->{_status} ) and $self->{_status} == 4 ) { |
683
|
18
|
100
|
|
|
|
43
|
unless ($called_from_print_error) { |
684
|
16
|
|
|
|
|
28
|
my $msg = |
685
|
|
|
|
|
|
|
"in-memory CSV content discarded, will have to re-read input"; |
686
|
16
|
|
|
|
|
52
|
$self->_print_warning($msg); |
687
|
|
|
|
|
|
|
} |
688
|
18
|
|
|
|
|
176
|
$self->{_flat} = []; |
689
|
18
|
|
|
|
|
54
|
$self->_update_in_mem_record_count(); |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
494
|
|
|
|
|
948
|
$self->{_status} = 0; |
693
|
494
|
100
|
|
|
|
1032
|
return 0 if $called_from_print_error; |
694
|
464
|
|
|
|
|
981
|
return $self->_status_forward('S1'); |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
sub _status_forward { |
698
|
4085
|
|
|
4085
|
|
6172
|
my $self = shift; |
699
|
|
|
|
|
|
|
|
700
|
4085
|
|
|
|
|
8393
|
return $self->___status_move( @_, 1 ); |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
sub _status_backward { |
704
|
139
|
|
|
139
|
|
209
|
my $self = shift; |
705
|
|
|
|
|
|
|
|
706
|
139
|
|
|
|
|
319
|
return $self->___status_move( @_, -1 ); |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
# You should not call ___status_move() in the code, that is why the name is |
710
|
|
|
|
|
|
|
# prefixed with 3 underscores! Only _status_forward and _status_backward should |
711
|
|
|
|
|
|
|
# call it. |
712
|
|
|
|
|
|
|
sub ___status_move { |
713
|
4224
|
|
|
4224
|
|
8425
|
my ( $self, $target, $step ) = @_; |
714
|
|
|
|
|
|
|
|
715
|
4224
|
|
|
|
|
7258
|
my $_debug = $self->{_debug}; |
716
|
4224
|
|
|
|
|
6124
|
my $_debugh = $self->{_debugh}; |
717
|
|
|
|
|
|
|
|
718
|
4224
|
50
|
66
|
|
|
18528
|
if ( ( !defined $step ) or ( $step != -1 and $step != 1 ) ) { |
|
|
|
33
|
|
|
|
|
719
|
0
|
|
|
|
|
0
|
confess "Oups! \$step has a wrong value: '$step'"; |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
|
722
|
4224
|
|
|
|
|
5740
|
my $n; |
723
|
4224
|
50
|
|
|
|
18668
|
confess "Oups! illegal status string: '$target'" |
724
|
|
|
|
|
|
|
unless ($n) = $target =~ m/^S(\d)$/; |
725
|
|
|
|
|
|
|
|
726
|
4224
|
100
|
|
|
|
9727
|
if ( $self->{_read_in_progress} ) { |
727
|
1
|
|
|
|
|
4
|
$self->_print_error( |
728
|
|
|
|
|
|
|
"illegal call while read is in progress, " |
729
|
|
|
|
|
|
|
. "would lead to infinite recursion", |
730
|
|
|
|
|
|
|
0 |
731
|
|
|
|
|
|
|
); |
732
|
0
|
|
|
|
|
0
|
confess "Aborted."; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
|
735
|
4223
|
100
|
|
|
|
7936
|
if ( $step == -1 ) { |
736
|
139
|
100
|
|
|
|
406
|
if ( $n < $self->{_status} ) { |
737
|
19
|
100
|
|
|
|
52
|
if ( $self->{_status} == 4 ) { |
738
|
16
|
50
|
|
|
|
42
|
print( $_debugh |
739
|
|
|
|
|
|
|
"[ST] Requested status $n but will go to status 0\n" ) |
740
|
|
|
|
|
|
|
if $_debug; |
741
|
16
|
|
|
|
|
63
|
return $self->_status_reset(); |
742
|
|
|
|
|
|
|
} |
743
|
3
|
|
|
|
|
6
|
$self->{_status} = $n; |
744
|
3
|
50
|
|
|
|
6
|
print( $_debugh "[ST] New status: " . $self->{_status} . "\n" ) |
745
|
|
|
|
|
|
|
if $_debug; |
746
|
|
|
|
|
|
|
} |
747
|
123
|
|
|
|
|
352
|
return 1; |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
4084
|
100
|
|
|
|
9168
|
if ( $self->{_status} < $n ) { |
751
|
869
|
50
|
|
|
|
1658
|
print( $_debugh "[ST] Current status: " . $self->{_status} . "\n" ) |
752
|
|
|
|
|
|
|
if $_debug; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
|
755
|
4084
|
100
|
100
|
|
|
10716
|
if ( $self->{_status} <= 1 |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
756
|
|
|
|
|
|
|
and $n >= 2 |
757
|
|
|
|
|
|
|
and $self->{_int_one_pass} |
758
|
|
|
|
|
|
|
and $self->get_pass_count() >= 1 ) |
759
|
|
|
|
|
|
|
{ |
760
|
12
|
|
|
|
|
24
|
my $msg = "one_pass set, unable to read input again"; |
761
|
|
|
|
|
|
|
do { |
762
|
12
|
|
|
|
|
44
|
$self->_print_error($msg); |
763
|
5
|
|
|
|
|
77
|
return 0; |
764
|
12
|
50
|
|
|
|
32
|
} if $self->{one_pass}; |
765
|
0
|
0
|
|
|
|
0
|
$self->_print_warning($msg) if !$self->{one_pass}; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
|
768
|
4072
|
|
|
|
|
8248
|
while ( $self->{_status} < $n ) { |
769
|
|
|
|
|
|
|
|
770
|
1347
|
|
|
|
|
2522
|
my $funcname = $status_forward_functions[ $self->{_status} ]; |
771
|
1347
|
50
|
|
|
|
2437
|
confess "Oups! Unknown status?" unless defined($funcname); |
772
|
|
|
|
|
|
|
|
773
|
1347
|
50
|
|
|
|
2577
|
print( $_debugh "[ST] Now executing $funcname\n" ) if $_debug; |
774
|
|
|
|
|
|
|
|
775
|
1347
|
50
|
|
|
|
5511
|
if ( my $member_function = $self->can($funcname) ) { |
776
|
1347
|
100
|
|
|
|
3089
|
return 0 unless $self->$member_function(); |
777
|
|
|
|
|
|
|
} |
778
|
|
|
|
|
|
|
else { |
779
|
0
|
|
|
|
|
0
|
confess "Could not find method $funcname in $PKG!"; |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
|
782
|
1328
|
|
|
|
|
2519
|
$self->{_status} += $step; |
783
|
1328
|
50
|
|
|
|
3999
|
print( $_debugh "[ST] New status: " . $self->{_status} . "\n" ) |
784
|
|
|
|
|
|
|
if $_debug; |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
|
787
|
4053
|
|
|
|
|
10418
|
return 1; |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
sub new { |
791
|
333
|
|
|
333
|
1
|
247908
|
my ( $class, @args ) = @_; |
792
|
|
|
|
|
|
|
|
793
|
333
|
|
|
|
|
37965
|
@args = validate( |
794
|
|
|
|
|
|
|
@args, |
795
|
|
|
|
|
|
|
{ |
796
|
|
|
|
|
|
|
in_file => { type => SCALAR, optional => 1 }, |
797
|
|
|
|
|
|
|
infoh => |
798
|
|
|
|
|
|
|
{ type => UNDEF | GLOBREF, default => \*STDERR, optional => 1 }, |
799
|
|
|
|
|
|
|
verbose => { type => BOOLEAN, default => 0, optional => 1 }, |
800
|
|
|
|
|
|
|
quiet => { type => BOOLEAN, optional => 1 }, |
801
|
|
|
|
|
|
|
croak_if_error => { type => BOOLEAN, default => 1, optional => 1 }, |
802
|
|
|
|
|
|
|
inh => { type => GLOBREF, optional => 1 }, |
803
|
|
|
|
|
|
|
in_csvobj => { type => OBJECT, optional => 1 }, |
804
|
|
|
|
|
|
|
sep_char => { type => SCALAR, optional => 1 }, |
805
|
|
|
|
|
|
|
quote_char => { type => SCALAR, optional => 1 }, |
806
|
|
|
|
|
|
|
escape_char => { type => SCALAR, optional => 1 }, |
807
|
|
|
|
|
|
|
has_headers => { type => BOOLEAN, default => 1, optional => 1 }, |
808
|
|
|
|
|
|
|
out_has_headers => |
809
|
|
|
|
|
|
|
{ type => UNDEF | BOOLEAN, default => undef, optional => 1 }, |
810
|
|
|
|
|
|
|
fields_ar => { type => ARRAYREF, optional => 1 }, |
811
|
|
|
|
|
|
|
fields_hr => { type => HASHREF, optional => 1 }, |
812
|
|
|
|
|
|
|
fields_column_names => { type => ARRAYREF, optional => 1 }, |
813
|
|
|
|
|
|
|
search_case => { type => SCALAR, optional => 1 }, |
814
|
|
|
|
|
|
|
search_trim => { type => SCALAR, optional => 1 }, |
815
|
|
|
|
|
|
|
search_ignore_empty => { type => SCALAR, optional => 1 }, |
816
|
|
|
|
|
|
|
search_ignore_accents => { type => SCALAR, optional => 1 }, |
817
|
|
|
|
|
|
|
search_ignore_ambiguous => { type => SCALAR, optional => 1 }, |
818
|
|
|
|
|
|
|
search_value_if_not_found => { type => SCALAR, optional => 1 }, |
819
|
|
|
|
|
|
|
search_value_if_found => { type => SCALAR, optional => 1 }, |
820
|
|
|
|
|
|
|
search_value_if_ambiguous => { type => SCALAR, optional => 1 }, |
821
|
|
|
|
|
|
|
walker_hr => { type => CODEREF, optional => 1 }, |
822
|
|
|
|
|
|
|
walker_ar => { type => CODEREF, optional => 1 }, |
823
|
|
|
|
|
|
|
read_post_update_hr => { type => CODEREF, optional => 1 }, |
824
|
|
|
|
|
|
|
write_filter_hr => { type => CODEREF, optional => 1 }, |
825
|
|
|
|
|
|
|
out_filter => { type => CODEREF, optional => 1 }, |
826
|
|
|
|
|
|
|
write_fields => { type => ARRAYREF, optional => 1 }, |
827
|
|
|
|
|
|
|
out_orderby => { type => ARRAYREF, optional => 1 }, |
828
|
|
|
|
|
|
|
out_fields => { type => ARRAYREF, optional => 1 }, |
829
|
|
|
|
|
|
|
out_file => { type => SCALAR, optional => 1 }, |
830
|
|
|
|
|
|
|
out_always_quote => { type => BOOLEAN, optional => 1 }, |
831
|
|
|
|
|
|
|
out_sep_char => { type => SCALAR, optional => 1 }, |
832
|
|
|
|
|
|
|
out_quote_char => { type => SCALAR, optional => 1 }, |
833
|
|
|
|
|
|
|
out_escape_char => { type => SCALAR, optional => 1 }, |
834
|
|
|
|
|
|
|
out_dates_format => { type => SCALAR, optional => 1 }, |
835
|
|
|
|
|
|
|
out_dates_locale => { type => SCALAR, optional => 1 }, |
836
|
|
|
|
|
|
|
encoding => { type => SCALAR, optional => 1 }, |
837
|
|
|
|
|
|
|
via => { type => SCALAR, optional => 1 }, |
838
|
|
|
|
|
|
|
out_encoding => { type => SCALAR, optional => 1 }, |
839
|
|
|
|
|
|
|
dont_mess_with_encoding => { type => BOOLEAN, optional => 1 }, |
840
|
|
|
|
|
|
|
one_pass => { type => BOOLEAN, optional => 1 }, |
841
|
|
|
|
|
|
|
no_undef => { type => BOOLEAN, optional => 1 }, |
842
|
|
|
|
|
|
|
fields_dates => { type => ARRAYREF, optional => 1 }, |
843
|
|
|
|
|
|
|
fields_dates_auto => { type => BOOLEAN, optional => 1 }, |
844
|
|
|
|
|
|
|
fields_dates_auto_optimize => { type => BOOLEAN, optional => 1 }, |
845
|
|
|
|
|
|
|
dates_formats_to_try => { type => ARRAYREF, optional => 1 }, |
846
|
|
|
|
|
|
|
dates_formats_to_try_supp => { type => ARRAYREF, optional => 1 }, |
847
|
|
|
|
|
|
|
dates_ignore_trailing_chars => { type => BOOLEAN, optional => 1 }, |
848
|
|
|
|
|
|
|
dates_search_time => { type => BOOLEAN, optional => 1 }, |
849
|
|
|
|
|
|
|
dates_locales => { type => SCALAR, optional => 1 }, |
850
|
|
|
|
|
|
|
out_utf8_bom => { type => SCALAR, optional => 1 }, |
851
|
|
|
|
|
|
|
dates_zeros_ok => { type => SCALAR, default => 1, optional => 1 }, |
852
|
|
|
|
|
|
|
_debug => { type => BOOLEAN, default => 0, optional => 1 }, |
853
|
|
|
|
|
|
|
_debug_read => { type => BOOLEAN, default => 0, optional => 1 }, |
854
|
|
|
|
|
|
|
_debug_extra_fields => { type => BOOLEAN, optional => 1 }, |
855
|
|
|
|
|
|
|
_debugh => { type => UNDEF | GLOBREF, optional => 1 } |
856
|
|
|
|
|
|
|
} |
857
|
|
|
|
|
|
|
); |
858
|
|
|
|
|
|
|
|
859
|
329
|
|
|
|
|
7074
|
my $self = {@args}; |
860
|
|
|
|
|
|
|
|
861
|
329
|
|
|
|
|
719
|
my @fields = keys %{$self}; |
|
329
|
|
|
|
|
1202
|
|
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
# croak_if_error |
864
|
|
|
|
|
|
|
|
865
|
329
|
|
|
|
|
821
|
my $croak_if_error = $self->{croak_if_error}; |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
# verbose and _debug management |
868
|
|
|
|
|
|
|
|
869
|
329
|
50
|
|
|
|
1143
|
$self->{_debugh} = $self->{infoh} if !defined( $self->{_debugh} ); |
870
|
329
|
50
|
|
|
|
842
|
$self->{_debug} = 1 if $ALWAYS_DEBUG; |
871
|
329
|
|
|
|
|
650
|
my $_debug = $self->{_debug}; |
872
|
329
|
50
|
|
|
|
682
|
$self->{verbose} = 1 if $_debug; |
873
|
329
|
|
|
|
|
596
|
my $verbose = $self->{verbose}; |
874
|
|
|
|
|
|
|
|
875
|
329
|
|
|
|
|
511
|
my $_debugh = $self->{_debugh}; |
876
|
|
|
|
|
|
|
|
877
|
329
|
|
|
|
|
594
|
bless $self, $class; |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
# fields_ar, fields_hr |
880
|
|
|
|
|
|
|
|
881
|
329
|
100
|
|
|
|
1083
|
if ( |
882
|
|
|
|
|
|
|
defined( $self->{fields_ar} ) + |
883
|
|
|
|
|
|
|
defined( $self->{fields_hr} ) + |
884
|
|
|
|
|
|
|
defined( $self->{fields_column_names} ) >= 2 ) |
885
|
|
|
|
|
|
|
{ |
886
|
1
|
|
|
|
|
4
|
$self->_print_error( |
887
|
|
|
|
|
|
|
"mixed use of fields_ar, fields_hr and fields_column_names. " |
888
|
|
|
|
|
|
|
. "Use one at a time." ); |
889
|
|
|
|
|
|
|
} |
890
|
329
|
100
|
100
|
|
|
984
|
if ( defined( $self->{fields_ar} ) and ( !defined $self->{fields_hr} ) ) { |
891
|
2
|
|
|
|
|
4
|
my @f = @{ $self->{fields_ar} }; |
|
2
|
|
|
|
|
5
|
|
892
|
2
|
|
|
|
|
4
|
my %h; |
893
|
2
|
|
|
|
|
5
|
for my $e (@f) { |
894
|
6
|
|
|
|
|
17
|
$h{$e} = "^$e\$"; |
895
|
|
|
|
|
|
|
} |
896
|
2
|
|
|
|
|
6
|
$self->{fields_hr} = \%h; |
897
|
|
|
|
|
|
|
} |
898
|
329
|
100
|
|
|
|
739
|
if ( !$self->{has_headers} ) { |
899
|
13
|
100
|
|
|
|
35
|
if ( defined( $self->{fields_ar} ) ) { |
900
|
1
|
|
|
|
|
5
|
$self->_print_error( |
901
|
|
|
|
|
|
|
"fields_ar irrelevant if CSV file has no headers"); |
902
|
1
|
|
|
|
|
104
|
return; |
903
|
|
|
|
|
|
|
} |
904
|
12
|
100
|
|
|
|
33
|
if ( defined( $self->{fields_hr} ) ) { |
905
|
1
|
|
|
|
|
3
|
$self->_print_error( |
906
|
|
|
|
|
|
|
"fields_hr irrelevant if CSV file has no headers"); |
907
|
1
|
|
|
|
|
6
|
return; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
# in_file or inh |
912
|
|
|
|
|
|
|
|
913
|
327
|
|
|
|
|
704
|
$self->{_flat} = []; |
914
|
|
|
|
|
|
|
|
915
|
327
|
|
|
|
|
646
|
$self->{_read_update_after_hr} = {}; |
916
|
327
|
|
|
|
|
635
|
$self->{_write_update_before_hr} = {}; |
917
|
|
|
|
|
|
|
|
918
|
327
|
|
|
|
|
1042
|
$self->_update_in_mem_record_count(); |
919
|
|
|
|
|
|
|
|
920
|
327
|
100
|
|
|
|
841
|
return unless $self->_status_reset(); |
921
|
|
|
|
|
|
|
|
922
|
318
|
50
|
|
|
|
668
|
$self->_debug_show_members() if $_debug; |
923
|
|
|
|
|
|
|
|
924
|
318
|
100
|
|
|
|
741
|
if ( $self->{dates_zeros_ok} ) { |
925
|
|
|
|
|
|
|
$self->{_refsub_is_datetime_empty} = sub { |
926
|
3173
|
|
|
3173
|
|
4677
|
my $v = $_[0]; |
927
|
3173
|
100
|
|
|
|
9569
|
if ( $v !~ m/[1-9]/ ) { |
928
|
975
|
100
|
|
|
|
2228
|
return 1 if $v =~ m/^[^0:]*0+[^0:]+0+[^0:]+0+/; |
929
|
|
|
|
|
|
|
} |
930
|
3170
|
|
|
|
|
8634
|
return 0; |
931
|
|
|
|
|
|
|
} |
932
|
316
|
|
|
|
|
1415
|
} |
933
|
|
|
|
|
|
|
|
934
|
318
|
|
|
|
|
5217
|
return $self; |
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
# |
938
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
939
|
|
|
|
|
|
|
# |
940
|
|
|
|
|
|
|
# Do all low level activities associated to input: |
941
|
|
|
|
|
|
|
# I/O init |
942
|
|
|
|
|
|
|
# Detect encoding |
943
|
|
|
|
|
|
|
# Detect CSV separator |
944
|
|
|
|
|
|
|
# Detect escape character |
945
|
|
|
|
|
|
|
# |
946
|
|
|
|
|
|
|
sub _S1_init_input { |
947
|
470
|
|
|
470
|
|
829
|
my $self = shift; |
948
|
|
|
|
|
|
|
|
949
|
470
|
|
|
|
|
760
|
my $croak_if_error = $self->{croak_if_error}; |
950
|
470
|
|
|
|
|
692
|
my $_debug = $self->{_debug}; |
951
|
470
|
|
|
|
|
744
|
my $_debugh = $self->{_debugh}; |
952
|
|
|
|
|
|
|
|
953
|
470
|
100
|
|
|
|
994
|
$self->{in_file} = '' unless defined( $self->{in_file} ); |
954
|
470
|
|
|
|
|
923
|
$self->{_close_inh_when_finished} = 0; |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
$self->{_leave_encoding_alone} = $self->{dont_mess_with_encoding} |
957
|
470
|
50
|
|
|
|
978
|
if defined( $self->{dont_mess_with_encoding} ); |
958
|
|
|
|
|
|
|
|
959
|
470
|
|
|
|
|
1279
|
$self->{_int_one_pass} = _get_def( $self->{one_pass}, 0 ); |
960
|
470
|
|
|
|
|
880
|
my $in_file_disp; |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
# |
963
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 1 OF 4 |
964
|
|
|
|
|
|
|
# |
965
|
|
|
|
|
|
|
# Manage I/O (= in most cases, open input file...) |
966
|
|
|
|
|
|
|
# |
967
|
|
|
|
|
|
|
|
968
|
470
|
100
|
|
|
|
1077
|
if ( defined( $self->{inh} ) ) { |
969
|
|
|
|
|
|
|
$self->{_leave_encoding_alone} = 1 |
970
|
4
|
50
|
|
|
|
10
|
unless defined( $self->{dont_mess_with_encoding} ); |
971
|
4
|
|
|
|
|
7
|
$in_file_disp = _get_def( $self->{in_file}, '<?>' ); |
972
|
4
|
50
|
|
|
|
10
|
$self->{_int_one_pass} = 1 unless defined( $self->{one_pass} ); |
973
|
4
|
|
|
|
|
6
|
$self->{_inh} = $self->{inh}; |
974
|
|
|
|
|
|
|
} |
975
|
|
|
|
|
|
|
else { |
976
|
|
|
|
|
|
|
$self->{_leave_encoding_alone} = 0 |
977
|
466
|
50
|
|
|
|
1124
|
unless defined( $self->{dont_mess_with_encoding} ); |
978
|
466
|
|
|
|
|
764
|
my $in_file = $self->{in_file}; |
979
|
466
|
|
|
|
|
642
|
my $inh; |
980
|
466
|
50
|
|
|
|
956
|
if ( $in_file eq '' ) { |
981
|
0
|
|
|
|
|
0
|
$inh = \*STDIN; |
982
|
0
|
|
|
|
|
0
|
$self->{inh_is_stdin} = 1; |
983
|
0
|
0
|
|
|
|
0
|
$self->{_int_one_pass} = 1 unless defined( $self->{one_pass} ); |
984
|
0
|
|
|
|
|
0
|
$in_file_disp = '<stdin>'; |
985
|
|
|
|
|
|
|
} |
986
|
|
|
|
|
|
|
else { |
987
|
466
|
100
|
|
|
|
14335
|
if ( !open( $inh, '<', $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen) |
988
|
3
|
|
|
|
|
50
|
$self->_print_error("unable to open file '$in_file': $!"); |
989
|
3
|
|
|
|
|
105
|
return 0; |
990
|
|
|
|
|
|
|
} |
991
|
463
|
|
|
|
|
1175
|
$in_file_disp = $in_file; |
992
|
463
|
|
|
|
|
1009
|
$self->{_close_inh_when_finished} = 1; |
993
|
|
|
|
|
|
|
} |
994
|
463
|
|
|
|
|
1297
|
$self->{_inh} = $inh; |
995
|
|
|
|
|
|
|
} |
996
|
|
|
|
|
|
|
|
997
|
467
|
50
|
|
|
|
983
|
confess "Oups! in_file_disp not defined?" unless defined($in_file_disp); |
998
|
467
|
|
|
|
|
857
|
$self->{_in_file_disp} = $in_file_disp; |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
# |
1001
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 2 OF 4 |
1002
|
|
|
|
|
|
|
# |
1003
|
|
|
|
|
|
|
# "Detection" of encoding |
1004
|
|
|
|
|
|
|
# |
1005
|
|
|
|
|
|
|
# WARNING |
1006
|
|
|
|
|
|
|
# As explained in the manual, it is a very partial and limited detection... |
1007
|
|
|
|
|
|
|
# |
1008
|
|
|
|
|
|
|
|
1009
|
467
|
100
|
|
|
|
1030
|
unless ( $self->{_leave_encoding_alone} ) { |
1010
|
463
|
100
|
|
|
|
1026
|
unless ( $self->{_init_input_already_called} ) { |
1011
|
|
|
|
|
|
|
my ( $e, $m ) = |
1012
|
|
|
|
|
|
|
$self->_detect_inh_encoding( $self->{encoding}, $self->{via}, |
1013
|
|
|
|
|
|
|
$self->{in_file}, |
1014
|
320
|
100
|
|
|
|
1697
|
( $self->{_int_one_pass} ? 0 : $DETECT_ENCODING ) ); |
1015
|
320
|
|
|
|
|
1048
|
$self->{_inh_encoding} = $e; |
1016
|
320
|
|
|
|
|
682
|
$self->{_inh_encoding_string} = $m; |
1017
|
|
|
|
|
|
|
} |
1018
|
|
|
|
|
|
|
|
1019
|
463
|
|
|
|
|
3395
|
binmode $self->{_inh}, $self->{_inh_encoding_string}; |
1020
|
|
|
|
|
|
|
print( $_debugh "Input encoding: '" |
1021
|
|
|
|
|
|
|
. $self->{_inh_encoding} . "' / '" |
1022
|
|
|
|
|
|
|
. $self->{_inh_encoding_string} |
1023
|
463
|
50
|
|
|
|
20771
|
. "'\n" ) |
1024
|
|
|
|
|
|
|
if $_debug; |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
} |
1027
|
|
|
|
|
|
|
|
1028
|
467
|
100
|
|
|
|
1266
|
$self->{out_file} = '' unless defined( $self->{out_file} ); |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
# |
1031
|
|
|
|
|
|
|
# LOW LEVEL INIT STEP 3 OF 4 |
1032
|
|
|
|
|
|
|
# |
1033
|
|
|
|
|
|
|
# Detection of CSV separator and escape character |
1034
|
|
|
|
|
|
|
# |
1035
|
|
|
|
|
|
|
|
1036
|
467
|
|
|
|
|
672
|
my $sep_char; |
1037
|
467
|
|
|
|
|
799
|
my $escape_char = $self->{escape_char}; |
1038
|
|
|
|
|
|
|
$self->{quote_char} = $DEFAULT_QUOTE_CHAR |
1039
|
467
|
100
|
|
|
|
1193
|
unless defined( $self->{quote_char} ); |
1040
|
467
|
|
|
|
|
771
|
my $quote_char = $self->{quote_char}; |
1041
|
467
|
100
|
|
|
|
1013
|
unless ( defined( $self->{in_csvobj} ) ) { |
1042
|
462
|
100
|
|
|
|
916
|
if ( defined( $self->{sep_char} ) ) { |
1043
|
200
|
|
|
|
|
324
|
$sep_char = $self->{sep_char}; |
1044
|
200
|
50
|
|
|
|
418
|
print( $_debugh "-- $in_file_disp: CSV separator set to \"" ) |
1045
|
|
|
|
|
|
|
if $_debug; |
1046
|
|
|
|
|
|
|
} |
1047
|
|
|
|
|
|
|
else { |
1048
|
|
|
|
|
|
|
# The test below (on _init_input_already_called) shoud be useless. |
1049
|
|
|
|
|
|
|
# Left for the sake of robustness. |
1050
|
262
|
50
|
|
|
|
566
|
unless ( $self->{_init_input_already_called} ) { |
1051
|
262
|
100
|
|
|
|
864
|
if ( |
1052
|
|
|
|
|
|
|
!$self->_detect_csv_sep( |
1053
|
|
|
|
|
|
|
$escape_char, $quote_char, \$sep_char |
1054
|
|
|
|
|
|
|
) |
1055
|
|
|
|
|
|
|
) |
1056
|
|
|
|
|
|
|
{ |
1057
|
6
|
|
|
|
|
24
|
$self->_print_error( |
1058
|
|
|
|
|
|
|
"'$in_file_disp': cannot detect CSV separator"); |
1059
|
0
|
|
|
|
|
0
|
return 0; |
1060
|
|
|
|
|
|
|
} |
1061
|
256
|
50
|
|
|
|
636
|
print( $_debugh |
1062
|
|
|
|
|
|
|
"-- $in_file_disp: CSV separator detected to \"" ) |
1063
|
|
|
|
|
|
|
if $_debug; |
1064
|
256
|
|
|
|
|
596
|
$self->{sep_char} = $sep_char; |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
} |
1067
|
456
|
50
|
|
|
|
941
|
print( $_debugh _render($sep_char) . "\"\n" ) if $_debug; |
1068
|
|
|
|
|
|
|
|
1069
|
456
|
|
|
|
|
1335
|
$self->_detect_meta( $quote_char, $sep_char ); |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
$self->{_in_csvobj} = Text::CSV->new( |
1072
|
|
|
|
|
|
|
{ |
1073
|
|
|
|
|
|
|
sep_char => $sep_char, |
1074
|
|
|
|
|
|
|
allow_whitespace => 1, |
1075
|
|
|
|
|
|
|
binary => 1, |
1076
|
|
|
|
|
|
|
auto_diag => 0, |
1077
|
|
|
|
|
|
|
quote_char => $quote_char, |
1078
|
|
|
|
|
|
|
escape_char => $self->{escape_char}, |
1079
|
456
|
|
|
|
|
3410
|
allow_loose_escapes => 1 |
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
); |
1082
|
456
|
50
|
|
|
|
74413
|
unless ( defined( $self->{_in_csvobj} ) ) { |
1083
|
0
|
|
|
|
|
0
|
$self->_print_error("error creating input Text::CSV object"); |
1084
|
0
|
|
|
|
|
0
|
return 0; |
1085
|
|
|
|
|
|
|
} |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
else { |
1089
|
5
|
|
|
|
|
9
|
$self->{_in_csvobj} = $self->{in_csvobj}; |
1090
|
|
|
|
|
|
|
} |
1091
|
|
|
|
|
|
|
|
1092
|
461
|
|
|
|
|
899
|
$self->{_init_input_already_called} = 1; |
1093
|
|
|
|
|
|
|
|
1094
|
461
|
|
|
|
|
1259
|
return 1; |
1095
|
|
|
|
|
|
|
} |
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
sub get_in_file_disp { |
1098
|
3663
|
|
|
3663
|
1
|
4957
|
my $self = shift; |
1099
|
|
|
|
|
|
|
|
1100
|
3663
|
|
|
|
|
18910
|
validate_pos(@_); |
1101
|
|
|
|
|
|
|
|
1102
|
3663
|
|
|
|
|
9196
|
my $in_file_disp = _get_def( $self->{_in_file_disp}, '?' ); |
1103
|
3663
|
|
|
|
|
6583
|
return $in_file_disp; |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
sub get_sep_char { |
1107
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1108
|
|
|
|
|
|
|
|
1109
|
0
|
|
|
|
|
0
|
validate_pos(@_); |
1110
|
|
|
|
|
|
|
|
1111
|
0
|
|
|
|
|
0
|
return $self->{sep_char}; |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
sub get_escape_char { |
1115
|
9
|
|
|
9
|
1
|
50
|
my $self = shift; |
1116
|
|
|
|
|
|
|
|
1117
|
9
|
|
|
|
|
65
|
validate_pos(@_); |
1118
|
|
|
|
|
|
|
|
1119
|
9
|
|
|
|
|
52
|
return $self->{escape_char}; |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
sub get_in_encoding { |
1123
|
27
|
|
|
27
|
1
|
3324
|
my $self = shift; |
1124
|
|
|
|
|
|
|
|
1125
|
27
|
|
|
|
|
172
|
validate_pos(@_); |
1126
|
|
|
|
|
|
|
|
1127
|
27
|
|
|
|
|
81
|
return _get_def( $self->{_inh_encoding}, '' ); |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
sub get_is_always_quoted { |
1131
|
15
|
|
|
15
|
1
|
80
|
my $self = shift; |
1132
|
|
|
|
|
|
|
|
1133
|
15
|
|
|
|
|
96
|
validate_pos(@_); |
1134
|
|
|
|
|
|
|
|
1135
|
15
|
|
|
|
|
95
|
return $self->{_is_always_quoted}; |
1136
|
|
|
|
|
|
|
} |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
sub get_pass_count { |
1139
|
43
|
|
|
43
|
1
|
559
|
my $self = shift; |
1140
|
|
|
|
|
|
|
|
1141
|
43
|
|
|
|
|
286
|
validate_pos(@_); |
1142
|
|
|
|
|
|
|
|
1143
|
43
|
|
|
|
|
158
|
return _get_def( $self->{_pass_count}, 0 ); |
1144
|
|
|
|
|
|
|
} |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
sub get_in_mem_record_count { |
1147
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
1148
|
|
|
|
|
|
|
|
1149
|
0
|
|
|
|
|
0
|
validate_pos(@_); |
1150
|
|
|
|
|
|
|
|
1151
|
0
|
|
|
|
|
0
|
return ( $self->{_in_mem_record_count}, 0 ); |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
sub get_max_in_mem_record_count { |
1155
|
513
|
|
|
513
|
1
|
1553
|
my $self = shift; |
1156
|
|
|
|
|
|
|
|
1157
|
513
|
|
|
|
|
3035
|
validate_pos(@_); |
1158
|
|
|
|
|
|
|
|
1159
|
513
|
|
|
|
|
2102
|
return _get_def( $self->{_max_in_mem_record_count}, 0 ); |
1160
|
|
|
|
|
|
|
} |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
sub _set_max_in_mem_record_count { |
1163
|
155
|
|
|
155
|
|
253
|
my $self = shift; |
1164
|
|
|
|
|
|
|
|
1165
|
155
|
|
|
|
|
1493
|
validate_pos( @_, { type => SCALAR } ); |
1166
|
|
|
|
|
|
|
|
1167
|
155
|
|
|
|
|
525
|
$self->{_max_in_mem_record_count} = $_[0]; |
1168
|
|
|
|
|
|
|
} |
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
sub get_fields_names { |
1171
|
19
|
|
|
19
|
1
|
2757
|
my $self = shift; |
1172
|
|
|
|
|
|
|
|
1173
|
19
|
|
|
|
|
134
|
validate_pos(@_); |
1174
|
|
|
|
|
|
|
|
1175
|
19
|
50
|
|
|
|
68
|
return () unless $self->_status_forward('S3'); |
1176
|
19
|
|
|
|
|
34
|
return @{ $self->{_columns} }; |
|
19
|
|
|
|
|
82
|
|
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
sub get_field_name { |
1180
|
1
|
|
|
1
|
1
|
477
|
my $self = shift; |
1181
|
|
|
|
|
|
|
|
1182
|
1
|
|
|
|
|
16
|
validate_pos( @_, { type => SCALAR } ); |
1183
|
|
|
|
|
|
|
|
1184
|
1
|
|
|
|
|
7
|
my ($n) = @_; |
1185
|
|
|
|
|
|
|
|
1186
|
1
|
50
|
|
|
|
5
|
return unless $self->_status_forward('S3'); |
1187
|
1
|
|
|
|
|
4
|
return $self->{_columns}->[$n]; |
1188
|
|
|
|
|
|
|
} |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
sub get_coldata { |
1191
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
1192
|
|
|
|
|
|
|
|
1193
|
1
|
|
|
|
|
7
|
validate_pos(@_); |
1194
|
|
|
|
|
|
|
|
1195
|
1
|
50
|
|
|
|
3
|
return () unless $self->_status_forward('S3'); |
1196
|
1
|
|
|
|
|
2
|
my @ret; |
1197
|
1
|
|
|
|
|
2
|
for ( @{ $self->{_coldata} } ) { |
|
1
|
|
|
|
|
3
|
|
1198
|
4
|
|
|
|
|
123
|
push @ret, |
1199
|
|
|
|
|
|
|
[ |
1200
|
|
|
|
|
|
|
$_->field_name, $_->header_text, $_->description, |
1201
|
|
|
|
|
|
|
$_->dt_format, $_->dt_locale, $_->multiline |
1202
|
|
|
|
|
|
|
]; |
1203
|
|
|
|
|
|
|
} |
1204
|
|
|
|
|
|
|
|
1205
|
1
|
|
|
|
|
25
|
return @ret; |
1206
|
|
|
|
|
|
|
} |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
sub get_stats { |
1209
|
2
|
|
|
2
|
1
|
9
|
my $self = shift; |
1210
|
|
|
|
|
|
|
|
1211
|
2
|
|
|
|
|
11
|
validate_pos(@_); |
1212
|
|
|
|
|
|
|
|
1213
|
2
|
50
|
|
|
|
6
|
return () unless defined( $self->{_stats} ); |
1214
|
2
|
|
|
|
|
4
|
return %{ $self->{_stats} }; |
|
2
|
|
|
|
|
8
|
|
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
sub get_nb_rows { |
1218
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
1219
|
|
|
|
|
|
|
|
1220
|
1
|
|
|
|
|
11
|
validate_pos(@_); |
1221
|
|
|
|
|
|
|
|
1222
|
1
|
|
|
|
|
6
|
return $self->{_nb_rows}; |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
sub _debug_show_members { |
1226
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
1227
|
0
|
|
|
|
|
0
|
my $_debugh = $self->{_debugh}; |
1228
|
0
|
|
|
|
|
0
|
my @aa; |
1229
|
0
|
0
|
|
|
|
0
|
@aa = @{ $self->{fields_ar} } if defined( $self->{fields_ar} ); |
|
0
|
|
|
|
|
0
|
|
1230
|
0
|
|
|
|
|
0
|
my @c; |
1231
|
0
|
|
|
|
|
0
|
@c = @{ $self->{fields_column_names} } |
1232
|
0
|
0
|
|
|
|
0
|
if defined( $self->{fields_column_names} ); |
1233
|
0
|
|
|
|
|
0
|
my %h; |
1234
|
0
|
0
|
|
|
|
0
|
%h = %{ $self->{fields_hr} } if defined( $self->{fields_hr} ); |
|
0
|
|
|
|
|
0
|
|
1235
|
|
|
|
|
|
|
|
1236
|
0
|
|
|
|
|
0
|
print( $_debugh "-- _debug_show_members() start\n" ); |
1237
|
0
|
|
|
|
|
0
|
print( $_debugh " croak_if_error $self->{croak_if_error}\n" ); |
1238
|
0
|
|
|
|
|
0
|
print( $_debugh " verbose $self->{verbose}\n" ); |
1239
|
0
|
|
|
|
|
0
|
print( $_debugh " _debug $self->{_debug}\n" ); |
1240
|
0
|
|
|
|
|
0
|
print( $_debugh " _debug_read $self->{_debug_read}\n" ); |
1241
|
0
|
|
|
|
|
0
|
print( $_debugh " infoh $self->{infoh}\n" ); |
1242
|
0
|
|
|
|
|
0
|
print( $_debugh " _debugh $_debugh\n" ); |
1243
|
0
|
|
|
|
|
0
|
print( $_debugh " inh: $self->{_inh}\n" ); |
1244
|
0
|
|
|
|
|
0
|
print( $_debugh " in_file_disp " . $self->get_in_file_disp() . "\n" ); |
1245
|
0
|
|
|
|
|
0
|
print( $_debugh " _in_csvobj $self->{_in_csvobj}\n" ); |
1246
|
0
|
|
|
|
|
0
|
print( $_debugh " has_headers $self->{has_headers}\n" ); |
1247
|
0
|
|
|
|
|
0
|
print( $_debugh " fields_ar:\n" ); |
1248
|
|
|
|
|
|
|
|
1249
|
0
|
|
|
|
|
0
|
for my $e (@aa) { |
1250
|
0
|
|
|
|
|
0
|
print( $_debugh " '$e'\n" ); |
1251
|
|
|
|
|
|
|
} |
1252
|
0
|
|
|
|
|
0
|
print( $_debugh " fields_hr:\n" ); |
1253
|
0
|
|
|
|
|
0
|
for my $e ( keys %h ) { |
1254
|
0
|
|
|
|
|
0
|
print( $_debugh " '$e' => '$h{$e}'\n" ); |
1255
|
|
|
|
|
|
|
} |
1256
|
0
|
|
|
|
|
0
|
print( $_debugh " fields_column_names:\n" ); |
1257
|
0
|
|
|
|
|
0
|
for my $e (@c) { |
1258
|
0
|
|
|
|
|
0
|
print( $_debugh " '$e'\n" ); |
1259
|
|
|
|
|
|
|
} |
1260
|
0
|
|
|
|
|
0
|
print( $_debugh "-- _debug_show_members() end\n" ); |
1261
|
|
|
|
|
|
|
} |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
# |
1264
|
|
|
|
|
|
|
# Check headers in CSV header line |
1265
|
|
|
|
|
|
|
# Used to increase robustness by relying on header title rather than |
1266
|
|
|
|
|
|
|
# column number. |
1267
|
|
|
|
|
|
|
# |
1268
|
|
|
|
|
|
|
# Return 1 if success (all fields found), 0 otherwise. |
1269
|
|
|
|
|
|
|
# |
1270
|
|
|
|
|
|
|
sub _process_header { |
1271
|
12
|
|
|
12
|
|
22
|
my $self = shift; |
1272
|
12
|
|
|
|
|
23
|
my @headers = @{ shift(@_) }; |
|
12
|
|
|
|
|
39
|
|
1273
|
12
|
|
|
|
|
22
|
my %fields_h = %{ shift(@_) }; |
|
12
|
|
|
|
|
68
|
|
1274
|
12
|
|
|
|
|
29
|
my $retval = shift; |
1275
|
|
|
|
|
|
|
|
1276
|
12
|
|
|
|
|
20
|
my @tmp = keys %{$retval}; |
|
12
|
|
|
|
|
30
|
|
1277
|
|
|
|
|
|
|
|
1278
|
12
|
|
|
|
|
34
|
my $in_file_disp = $self->get_in_file_disp(); |
1279
|
|
|
|
|
|
|
|
1280
|
12
|
50
|
|
|
|
35
|
confess '$_[4] must be an empty by-ref hash' if $#tmp >= 0; |
1281
|
|
|
|
|
|
|
|
1282
|
12
|
|
|
|
|
23
|
my $e = 0; |
1283
|
12
|
|
|
|
|
38
|
for my $k ( keys %fields_h ) { |
1284
|
48
|
|
|
|
|
85
|
my $v = $fields_h{$k}; |
1285
|
|
|
|
|
|
|
|
1286
|
48
|
|
|
462
|
|
199
|
my @all_idx = indexes { /$v/i } @headers; |
|
462
|
|
|
|
|
2609
|
|
1287
|
48
|
50
|
|
|
|
164
|
if ( $#all_idx >= 1 ) { |
1288
|
0
|
|
|
|
|
0
|
$self->_print_error( "file $in_file_disp: " |
1289
|
|
|
|
|
|
|
. "more than one column matches the criteria '$v'" ); |
1290
|
0
|
|
|
|
|
0
|
$e++; |
1291
|
|
|
|
|
|
|
} |
1292
|
48
|
|
|
258
|
|
179
|
my $idx = first_index { /$v/i } @headers; |
|
258
|
|
|
|
|
1630
|
|
1293
|
48
|
50
|
|
|
|
211
|
if ( $idx < 0 ) { |
1294
|
0
|
|
|
|
|
0
|
$self->_print_error( |
1295
|
|
|
|
|
|
|
"file $in_file_disp: unable to find field '$v'"); |
1296
|
0
|
|
|
|
|
0
|
$e++; |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
else { |
1299
|
48
|
|
|
|
|
121
|
$retval->{$k} = $idx; |
1300
|
|
|
|
|
|
|
} |
1301
|
|
|
|
|
|
|
} |
1302
|
|
|
|
|
|
|
|
1303
|
12
|
50
|
|
|
|
72
|
return ( $e >= 1 ? 0 : 1 ); |
1304
|
|
|
|
|
|
|
} |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
sub set_walker_hr { |
1307
|
2
|
|
|
2
|
1
|
1593
|
my $self = shift; |
1308
|
2
|
|
|
|
|
25
|
validate_pos( @_, { type => UNDEF | CODEREF, optional => 1 } ); |
1309
|
|
|
|
|
|
|
|
1310
|
2
|
|
|
|
|
7
|
my ($walker_hr) = @_; |
1311
|
|
|
|
|
|
|
|
1312
|
2
|
50
|
|
|
|
5
|
return unless $self->_status_forward('S2'); |
1313
|
2
|
50
|
|
|
|
6
|
return unless $self->_status_backward('S2'); |
1314
|
2
|
|
|
|
|
4
|
$self->{walker_hr} = $walker_hr; |
1315
|
|
|
|
|
|
|
|
1316
|
2
|
|
|
|
|
5
|
return $self; |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
sub set_walker_ar { |
1320
|
2
|
|
|
2
|
1
|
1404
|
my $self = shift; |
1321
|
2
|
|
|
|
|
22
|
validate_pos( @_, { type => UNDEF | CODEREF, optional => 1 } ); |
1322
|
|
|
|
|
|
|
|
1323
|
2
|
|
|
|
|
8
|
my ($walker_ar) = @_; |
1324
|
|
|
|
|
|
|
|
1325
|
2
|
50
|
|
|
|
6
|
return unless $self->_status_forward('S2'); |
1326
|
2
|
50
|
|
|
|
5
|
return unless $self->_status_backward('S2'); |
1327
|
2
|
|
|
|
|
3
|
$self->{walker_ar} = $walker_ar; |
1328
|
|
|
|
|
|
|
|
1329
|
2
|
|
|
|
|
5
|
return $self; |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
# * *************************************** * |
1333
|
|
|
|
|
|
|
# * BEGINNING OF DATE FORMAT DETECTION CODE * |
1334
|
|
|
|
|
|
|
# * *************************************** * |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
# |
1337
|
|
|
|
|
|
|
# The '%m.%d.%y' is not at its "logical" location. It is done to make sure the |
1338
|
|
|
|
|
|
|
# order in which entries are written does not impact the result. |
1339
|
|
|
|
|
|
|
# |
1340
|
|
|
|
|
|
|
# It could occur because there is some code that correlates an entry containing |
1341
|
|
|
|
|
|
|
# %y with another one that would contain %Y. The %Y will be called the master, |
1342
|
|
|
|
|
|
|
# the %y will be called the slave. It is important to match such entries, |
1343
|
|
|
|
|
|
|
# otherwise an identified format with %y would always be ambiguous with the same |
1344
|
|
|
|
|
|
|
# written with %Y. |
1345
|
|
|
|
|
|
|
# |
1346
|
|
|
|
|
|
|
# IMPORTANT |
1347
|
|
|
|
|
|
|
# The list below is written almost as-is in the POD at the bottom of this |
1348
|
|
|
|
|
|
|
# file. |
1349
|
|
|
|
|
|
|
# |
1350
|
|
|
|
|
|
|
my @DATES_DEFAULT_FORMATS_TO_TRY = ( |
1351
|
|
|
|
|
|
|
'', |
1352
|
|
|
|
|
|
|
'%Y-%m-%d', |
1353
|
|
|
|
|
|
|
'%Y.%m.%d', |
1354
|
|
|
|
|
|
|
'%Y/%m/%d', |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
'%m.%d.%y', |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
'%m-%d-%Y', |
1359
|
|
|
|
|
|
|
'%m.%d.%Y', |
1360
|
|
|
|
|
|
|
'%m/%d/%Y', |
1361
|
|
|
|
|
|
|
'%d-%m-%Y', |
1362
|
|
|
|
|
|
|
'%d.%m.%Y', |
1363
|
|
|
|
|
|
|
'%d/%m/%Y', |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
'%m-%d-%y', |
1366
|
|
|
|
|
|
|
'%m/%d/%y', |
1367
|
|
|
|
|
|
|
'%d-%m-%y', |
1368
|
|
|
|
|
|
|
'%d.%m.%y', |
1369
|
|
|
|
|
|
|
'%d/%m/%y', |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
'%Y%m%d%H%M%S', |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
# Localizaed formats |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
'%b %d, %Y', |
1376
|
|
|
|
|
|
|
'%b %d %Y', |
1377
|
|
|
|
|
|
|
'%b %d %T %Z %Y', |
1378
|
|
|
|
|
|
|
'%d %b %Y', |
1379
|
|
|
|
|
|
|
'%d %b, %Y' |
1380
|
|
|
|
|
|
|
); |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
# |
1383
|
|
|
|
|
|
|
# IMPORTANT |
1384
|
|
|
|
|
|
|
# Under Linux, $START is useless. Strptime will match a format exactly as it |
1385
|
|
|
|
|
|
|
# is, and a tring like "01/01/16 13:00:00" won't match with "%T". Under |
1386
|
|
|
|
|
|
|
# Windows, Strptime is capable of doing a match by ignoring characters at the |
1387
|
|
|
|
|
|
|
# beginning, thus "01/01/2016 13:00:00" for example will return success when |
1388
|
|
|
|
|
|
|
# matched against "%T". |
1389
|
|
|
|
|
|
|
# Possibly it has to do with versionning of Strptime, not Linux versus Windows |
1390
|
|
|
|
|
|
|
# as such. Any way, this difference had to be dealt with. |
1391
|
|
|
|
|
|
|
# |
1392
|
|
|
|
|
|
|
# The flexibility under Windows would screw the code logic so I had to add the |
1393
|
|
|
|
|
|
|
# prefix string below, to avoid unexpected success on match. |
1394
|
|
|
|
|
|
|
# |
1395
|
|
|
|
|
|
|
my $START = '<'; |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
struct RecordCounter => { |
1398
|
|
|
|
|
|
|
count_ok => '$', |
1399
|
|
|
|
|
|
|
count_ko => '$', |
1400
|
|
|
|
|
|
|
has_searched_time => '$', |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
format => '$', |
1403
|
|
|
|
|
|
|
locale => '$', |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
has_found_time => '$', |
1406
|
|
|
|
|
|
|
format_with_addition_of_time => '$', |
1407
|
|
|
|
|
|
|
locale_with_addition_of_time => '$', |
1408
|
|
|
|
|
|
|
parser_with_addition_of_time => '$' |
1409
|
|
|
|
|
|
|
}; |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
struct Format => { |
1412
|
|
|
|
|
|
|
id => '$', |
1413
|
|
|
|
|
|
|
format => '$', |
1414
|
|
|
|
|
|
|
locale => '$', |
1415
|
|
|
|
|
|
|
parser => '$', |
1416
|
|
|
|
|
|
|
index_slave => '$', |
1417
|
|
|
|
|
|
|
index_master => '$' |
1418
|
|
|
|
|
|
|
}; |
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
sub _col_dispname { |
1421
|
539
|
|
|
539
|
|
867
|
my ( $self, $n ) = @_; |
1422
|
|
|
|
|
|
|
|
1423
|
539
|
|
|
|
|
639
|
my $col; |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
# |
1426
|
|
|
|
|
|
|
# IMPORTANT |
1427
|
|
|
|
|
|
|
# |
1428
|
|
|
|
|
|
|
# We cannot execute here a command like |
1429
|
|
|
|
|
|
|
# $self->_status_forward('S3'); |
1430
|
|
|
|
|
|
|
# (to ensure _columns is well defined) because _col_dispname is called by |
1431
|
|
|
|
|
|
|
# _detect_dates_formats that is in turn called by _S3_init_fields_extra. A call |
1432
|
|
|
|
|
|
|
# to _status_forward would trigger a never-ending call loop. |
1433
|
|
|
|
|
|
|
# |
1434
|
539
|
|
|
|
|
1103
|
my $cols = _get_def( $self->{'_columns'}, $self->{'_S2_columns'} ); |
1435
|
|
|
|
|
|
|
|
1436
|
539
|
50
|
|
|
|
934
|
if ( $self->{has_headers} ) { |
1437
|
539
|
|
|
|
|
929
|
$col = $cols->[$n]; |
1438
|
539
|
50
|
|
|
|
864
|
$col = "<UNDEF>" unless defined($col); |
1439
|
|
|
|
|
|
|
} |
1440
|
|
|
|
|
|
|
else { |
1441
|
0
|
|
|
|
|
0
|
$col = "[$n]"; |
1442
|
|
|
|
|
|
|
} |
1443
|
539
|
|
|
|
|
1034
|
return $col; |
1444
|
|
|
|
|
|
|
} |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
# Used by test plan only... |
1447
|
|
|
|
|
|
|
sub _dds { |
1448
|
45
|
|
|
45
|
|
917
|
my $self = shift; |
1449
|
|
|
|
|
|
|
|
1450
|
45
|
50
|
|
|
|
110
|
return unless $self->_status_forward('S3'); |
1451
|
43
|
50
|
|
|
|
121
|
return unless defined( $self->{_dates_detailed_status} ); |
1452
|
43
|
|
|
|
|
352
|
return $self->{_dates_detailed_status}; |
1453
|
|
|
|
|
|
|
} |
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
sub _detect_dates_formats { |
1456
|
354
|
|
|
354
|
|
524
|
my $self = shift; |
1457
|
|
|
|
|
|
|
|
1458
|
354
|
100
|
|
|
|
868
|
return if $self->{_detect_dates_formats_has_run}; |
1459
|
287
|
|
|
|
|
564
|
$self->{_detect_dates_formats_has_run} = 1; |
1460
|
287
|
|
|
|
|
383
|
my @fields_dates; |
1461
|
11
|
|
|
|
|
35
|
@fields_dates = @{ $self->{fields_dates} } |
1462
|
287
|
100
|
|
|
|
643
|
if defined( $self->{fields_dates} ); |
1463
|
287
|
100
|
100
|
|
|
1293
|
return unless @fields_dates or $self->{fields_dates_auto}; |
1464
|
|
|
|
|
|
|
|
1465
|
54
|
50
|
|
|
|
151
|
if ( $self->{_int_one_pass} ) { |
1466
|
0
|
|
|
|
|
0
|
$self->_print_error( |
1467
|
|
|
|
|
|
|
"date format detection disallowed when one_pass is set"); |
1468
|
0
|
|
|
|
|
0
|
return; |
1469
|
|
|
|
|
|
|
} |
1470
|
|
|
|
|
|
|
|
1471
|
54
|
|
|
|
|
100
|
my $_debug = $self->{_debug}; |
1472
|
54
|
|
|
|
|
85
|
my $_debugh = $self->{_debugh}; |
1473
|
54
|
|
33
|
|
|
127
|
my $debug_fmt = ( $_debug and $DEBUG_DATETIME_FORMATS ); |
1474
|
|
|
|
|
|
|
|
1475
|
54
|
|
|
|
|
155
|
$self->_register_pass("detect date format"); |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
# |
1478
|
|
|
|
|
|
|
# Why re-opening the input? |
1479
|
|
|
|
|
|
|
# I tried two other ways that never worked on some OSes (like freebsd) and/or |
1480
|
|
|
|
|
|
|
# with older perl versions. |
1481
|
|
|
|
|
|
|
# |
1482
|
|
|
|
|
|
|
# 1) The "tell" tactic |
1483
|
|
|
|
|
|
|
# Recording at the beginning of the function the file position with |
1484
|
|
|
|
|
|
|
# my $pos = tell($self->{inh}); |
1485
|
|
|
|
|
|
|
# ... and then recalling with a seek instruction is the most logical. |
1486
|
|
|
|
|
|
|
# But it didn't work = sometimes, reading would go back to first row (the |
1487
|
|
|
|
|
|
|
# headers) instead of the second row, could not figure out why (it would work |
1488
|
|
|
|
|
|
|
# on my Ubuntu 16.04 / perl 5.22, but would fail with other OSes and/or perl |
1489
|
|
|
|
|
|
|
# versions). |
1490
|
|
|
|
|
|
|
# |
1491
|
|
|
|
|
|
|
# 2) The "complete rewind" tactic |
1492
|
|
|
|
|
|
|
# I then undertook to do (at the end of detection function): |
1493
|
|
|
|
|
|
|
# seek $inh, 0, SEEK_SET; |
1494
|
|
|
|
|
|
|
# $incsv->getline($inh) if $self->{has_headers}; |
1495
|
|
|
|
|
|
|
# based on the assumption that a seek to zero would behave differently from a |
1496
|
|
|
|
|
|
|
# seek to an arbitrary position. |
1497
|
|
|
|
|
|
|
# But still, it would sometimes fail.... |
1498
|
|
|
|
|
|
|
# |
1499
|
|
|
|
|
|
|
|
1500
|
54
|
|
|
|
|
119
|
my $inh = $self->_reopen_input(); |
1501
|
54
|
|
|
|
|
113
|
my $incsv = $self->{_in_csvobj}; |
1502
|
54
|
50
|
|
|
|
218
|
_mygetline( $incsv, $inh ) if $self->{has_headers}; |
1503
|
|
|
|
|
|
|
|
1504
|
54
|
|
|
|
|
2401
|
my $formats_to_try = $self->{dates_formats_to_try}; |
1505
|
54
|
|
|
|
|
114
|
my $ignore_trailing_chars = $self->{dates_ignore_trailing_chars}; |
1506
|
54
|
|
|
|
|
133
|
my $search_time = $self->{dates_search_time}; |
1507
|
54
|
|
|
|
|
91
|
my $localizations = $self->{dates_locales}; |
1508
|
|
|
|
|
|
|
|
1509
|
54
|
|
|
|
|
92
|
my %regular_named_fields = %{ $self->{_regular_named_fields} }; |
|
54
|
|
|
|
|
423
|
|
1510
|
|
|
|
|
|
|
|
1511
|
54
|
|
|
|
|
144
|
my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty}; |
1512
|
|
|
|
|
|
|
|
1513
|
54
|
|
|
|
|
82
|
my @fields_to_detect_format; |
1514
|
54
|
100
|
|
|
|
203
|
if ( defined( $self->{fields_dates} ) ) { |
|
|
50
|
|
|
|
|
|
1515
|
11
|
|
|
|
|
20
|
my $count_field_not_found = 0; |
1516
|
11
|
|
|
|
|
17
|
my %column_seen; |
1517
|
11
|
|
|
|
|
32
|
for my $f ( @{ $self->{fields_dates} } ) { |
|
11
|
|
|
|
|
31
|
|
1518
|
23
|
100
|
|
|
|
58
|
if ( !exists $regular_named_fields{$f} ) { |
1519
|
1
|
|
|
|
|
9
|
$self->_print_error( "fields_dates: unknown field: '$f'", |
1520
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, {%regular_named_fields} ); |
1521
|
1
|
|
|
|
|
32
|
$count_field_not_found++; |
1522
|
1
|
|
|
|
|
4
|
next; |
1523
|
|
|
|
|
|
|
} |
1524
|
22
|
|
|
|
|
34
|
my $n = $regular_named_fields{$f}; |
1525
|
22
|
50
|
|
|
|
52
|
if ( exists $column_seen{$n} ) { |
1526
|
0
|
|
|
|
|
0
|
$self->_print_warning("field '$f' already seen"); |
1527
|
0
|
|
|
|
|
0
|
next; |
1528
|
|
|
|
|
|
|
} |
1529
|
22
|
|
|
|
|
43
|
$column_seen{$n} = 1; |
1530
|
22
|
|
|
|
|
47
|
push @fields_to_detect_format, $n; |
1531
|
|
|
|
|
|
|
} |
1532
|
11
|
100
|
|
|
|
88
|
$self->_print_error("non existent field(s) encountered, aborted") |
1533
|
|
|
|
|
|
|
if $count_field_not_found; |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
elsif ( $self->{fields_dates_auto} ) { |
1536
|
43
|
|
|
|
|
174
|
my @k = keys %regular_named_fields; |
1537
|
43
|
|
|
|
|
181
|
@fields_to_detect_format = ( 0 .. $#k ); |
1538
|
|
|
|
|
|
|
} |
1539
|
|
|
|
|
|
|
else { |
1540
|
0
|
|
|
|
|
0
|
confess "Hey! check this code, man"; |
1541
|
|
|
|
|
|
|
} |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
# |
1544
|
|
|
|
|
|
|
# FIXME? |
1545
|
|
|
|
|
|
|
# Sort by column number of not? |
1546
|
|
|
|
|
|
|
# |
1547
|
|
|
|
|
|
|
# At this moment in time, the author inclines to answer "yes". |
1548
|
|
|
|
|
|
|
# But I must admit it is rather arbitrary decision for now. |
1549
|
|
|
|
|
|
|
# |
1550
|
53
|
|
|
|
|
294
|
@fields_to_detect_format = sort { $a <=> $b } @fields_to_detect_format; |
|
753
|
|
|
|
|
898
|
|
1551
|
|
|
|
|
|
|
|
1552
|
53
|
|
|
|
|
86
|
my @dates_formats_supp; |
1553
|
2
|
|
|
|
|
6
|
@dates_formats_supp = @{ $self->{dates_formats_to_try_supp} } |
1554
|
53
|
100
|
|
|
|
149
|
if defined( $self->{dates_formats_to_try_supp} ); |
1555
|
|
|
|
|
|
|
|
1556
|
53
|
100
|
|
|
|
346
|
$formats_to_try = [@DATES_DEFAULT_FORMATS_TO_TRY] |
1557
|
|
|
|
|
|
|
unless defined($formats_to_try); |
1558
|
53
|
|
|
|
|
94
|
$formats_to_try = [ @{$formats_to_try}, @dates_formats_supp ]; |
|
53
|
|
|
|
|
217
|
|
1559
|
53
|
|
|
|
|
146
|
my %seen; |
1560
|
53
|
|
|
|
|
94
|
my $f2 = []; |
1561
|
53
|
|
|
|
|
176
|
for ( @${formats_to_try} ) { |
1562
|
978
|
50
|
|
|
|
1463
|
push @{$f2}, $_ unless exists( $seen{$_} ); |
|
978
|
|
|
|
|
1353
|
|
1563
|
978
|
|
|
|
|
1685
|
$seen{$_} = undef; |
1564
|
|
|
|
|
|
|
} |
1565
|
53
|
|
|
|
|
134
|
$formats_to_try = $f2; |
1566
|
|
|
|
|
|
|
|
1567
|
53
|
100
|
|
|
|
149
|
$ignore_trailing_chars = 1 unless defined($ignore_trailing_chars); |
1568
|
53
|
100
|
|
|
|
187
|
$search_time = 1 unless defined($search_time); |
1569
|
|
|
|
|
|
|
|
1570
|
53
|
100
|
|
|
|
128
|
my $stop = ( $ignore_trailing_chars ? '' : '>' ); |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
# |
1573
|
|
|
|
|
|
|
# The code below (from # AMB to # AMB-END) aims to remove ambiguity that comes |
1574
|
|
|
|
|
|
|
# from %Y versus %y. |
1575
|
|
|
|
|
|
|
# That is: provided you have (among others) the formats to try |
1576
|
|
|
|
|
|
|
# '%d-%m-%Y' |
1577
|
|
|
|
|
|
|
# and |
1578
|
|
|
|
|
|
|
# '%d-%m-%y' |
1579
|
|
|
|
|
|
|
# then if parsing 4-digit-year dates (like '31-12-2016'), the two formats will |
1580
|
|
|
|
|
|
|
# work and you'll end up with an ambiguity. To be precise, there'll be no |
1581
|
|
|
|
|
|
|
# ambiguity if the date is followed by a time, but if the date is alone, both |
1582
|
|
|
|
|
|
|
# formats will work. |
1583
|
|
|
|
|
|
|
# |
1584
|
|
|
|
|
|
|
# Thanks to the below code, the member 'index_slave' (and its counterpart |
1585
|
|
|
|
|
|
|
# index_master) is populated and later, if such an ambiguity is detected, the |
1586
|
|
|
|
|
|
|
# upper case version (the one containing upper case '%Y') will be kept and the |
1587
|
|
|
|
|
|
|
# other one will be discarded. |
1588
|
|
|
|
|
|
|
# |
1589
|
|
|
|
|
|
|
# NOTE |
1590
|
|
|
|
|
|
|
# Such an ambiguity can exist only when ignore_trailing_chars is set. |
1591
|
|
|
|
|
|
|
# Otherwise, the remaining two digits make the date parsing fail in the '%y' |
1592
|
|
|
|
|
|
|
# case. |
1593
|
|
|
|
|
|
|
# |
1594
|
|
|
|
|
|
|
# The other members of the 'Format' object are used to work "normally", |
1595
|
|
|
|
|
|
|
# independently from this ambiguity removal feature. |
1596
|
|
|
|
|
|
|
# |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
# WIP = Work In Progress... |
1599
|
53
|
|
|
|
|
92
|
my @formats_wip; |
1600
|
|
|
|
|
|
|
my @locales; |
1601
|
53
|
100
|
|
|
|
147
|
@locales = split( /,\s*/, $localizations ) if defined($localizations); |
1602
|
53
|
|
|
|
|
73
|
for my $f ( @{$formats_to_try} ) { |
|
53
|
|
|
|
|
129
|
|
1603
|
978
|
100
|
|
|
|
2135
|
my $has_localized_item = ( $f =~ m/%a|%A|%b|%B|%c|%\+/ ? 1 : 0 ); |
1604
|
978
|
100
|
100
|
|
|
1765
|
unless ( @locales and $has_localized_item ) { |
1605
|
968
|
|
|
|
|
1583
|
push @formats_wip, [ $f, '' ]; |
1606
|
968
|
|
|
|
|
1387
|
next; |
1607
|
|
|
|
|
|
|
} |
1608
|
10
|
|
|
|
|
34
|
push @formats_wip, [ $f, $_ ] foreach @locales; |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
# AMB |
1612
|
53
|
|
|
|
|
95
|
my @formats; |
1613
|
|
|
|
|
|
|
my %mates; |
1614
|
53
|
|
|
|
|
151
|
for my $i ( 0 .. $#formats_wip ) { |
1615
|
988
|
|
|
|
|
1734
|
my $fstr = $formats_wip[$i]->[0]; |
1616
|
988
|
|
|
|
|
1263
|
my $floc = $formats_wip[$i]->[1]; |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
# FIXME |
1619
|
|
|
|
|
|
|
# Will not manage correctly a string like |
1620
|
|
|
|
|
|
|
# '%%Y' |
1621
|
|
|
|
|
|
|
# that means (when used with Strptime), the litteral string '%Y' with no |
1622
|
|
|
|
|
|
|
# substitution. |
1623
|
|
|
|
|
|
|
# Such cases will be complicated to fix, as it'll require to do a kind-of |
1624
|
|
|
|
|
|
|
# Strptime-equivalent parsing of the string, and I find it a bit overkill. |
1625
|
|
|
|
|
|
|
# |
1626
|
|
|
|
|
|
|
# I prefer to push back in caller world saying |
1627
|
|
|
|
|
|
|
# "Hey, if using constructs like '%%Y', you'll be in trouble." |
1628
|
988
|
|
|
|
|
1181
|
my $m = $fstr; |
1629
|
988
|
|
|
|
|
3181
|
$m =~ s/%y//ig; |
1630
|
988
|
|
|
|
|
1608
|
$m .= $floc; |
1631
|
|
|
|
|
|
|
|
1632
|
988
|
|
|
|
|
1273
|
my $index_slave = -1; |
1633
|
988
|
|
|
|
|
1119
|
my $index_master = -1; |
1634
|
988
|
100
|
|
|
|
1757
|
if ( exists $mates{$m} ) { |
1635
|
262
|
|
|
|
|
516
|
my $alt_fstr = $formats_wip[ $mates{$m} ]->[0]; |
1636
|
262
|
100
|
|
|
|
705
|
my $m_lower = ( $fstr =~ m/%y/ ? 1 : 0 ); |
1637
|
262
|
100
|
|
|
|
570
|
my $m_upper = ( $fstr =~ m/%Y/ ? 1 : 0 ); |
1638
|
262
|
100
|
|
|
|
492
|
my $a_lower = ( $alt_fstr =~ m/%y/ ? 1 : 0 ); |
1639
|
262
|
100
|
|
|
|
516
|
my $a_upper = ( $alt_fstr =~ m/%Y/ ? 1 : 0 ); |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
# We ignore the weird cases where we'd have both %y and %Y in a format string. |
1642
|
|
|
|
|
|
|
|
1643
|
262
|
100
|
66
|
|
|
1919
|
if ( ( !$m_lower ) and $m_upper and $a_lower and ( !$a_upper ) ) { |
|
|
50
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1644
|
45
|
|
|
|
|
101
|
$index_slave = $mates{$m}; |
1645
|
45
|
|
|
|
|
749
|
$formats[ $mates{$m} ]->index_master($i); |
1646
|
|
|
|
|
|
|
} |
1647
|
|
|
|
|
|
|
elsif ( $m_lower and ( !$m_upper ) and ( !$a_lower ) and $a_upper ) |
1648
|
|
|
|
|
|
|
{ |
1649
|
217
|
|
|
|
|
352
|
$index_master = $mates{$m}; |
1650
|
217
|
|
|
|
|
3444
|
$formats[ $mates{$m} ]->index_slave($i); |
1651
|
|
|
|
|
|
|
} |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
} |
1654
|
|
|
|
|
|
|
else { |
1655
|
726
|
|
|
|
|
1389
|
$mates{$m} = $i; |
1656
|
|
|
|
|
|
|
} |
1657
|
|
|
|
|
|
|
|
1658
|
988
|
|
|
|
|
3597
|
my %strptime_opts = ( pattern => $START . $fstr . $stop ); |
1659
|
988
|
100
|
|
|
|
1936
|
$strptime_opts{locale} = $floc if $floc ne ''; |
1660
|
988
|
100
|
|
|
|
15300
|
my $format = Format->new( |
1661
|
|
|
|
|
|
|
id => "$i", |
1662
|
|
|
|
|
|
|
format => $fstr, |
1663
|
|
|
|
|
|
|
locale => $floc, |
1664
|
|
|
|
|
|
|
parser => ( |
1665
|
|
|
|
|
|
|
$fstr ne '' |
1666
|
|
|
|
|
|
|
? _build_strptime_parser(%strptime_opts) |
1667
|
|
|
|
|
|
|
: undef |
1668
|
|
|
|
|
|
|
), |
1669
|
|
|
|
|
|
|
index_slave => $index_slave, |
1670
|
|
|
|
|
|
|
index_master => $index_master |
1671
|
|
|
|
|
|
|
); |
1672
|
988
|
|
|
|
|
247159
|
push @formats, $format; |
1673
|
|
|
|
|
|
|
} |
1674
|
53
|
|
|
|
|
201
|
for my $i ( 0 .. $#formats ) { |
1675
|
988
|
|
|
|
|
11052
|
my $format = $formats[$i]; |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
# If a master could be itself the slave of another entry, that'd make it a |
1678
|
|
|
|
|
|
|
# hierarchical relation tree with multiple levels. It is not possible, only a |
1679
|
|
|
|
|
|
|
# direct, unique master-slave relation can be managed here. |
1680
|
988
|
50
|
66
|
|
|
12683
|
confess "Inonsistent data, check this module's code urgently!" |
1681
|
|
|
|
|
|
|
if $format->index_slave >= 0 and $format->index_master >= 0; |
1682
|
|
|
|
|
|
|
|
1683
|
988
|
100
|
|
|
|
22232
|
if ( $format->index_slave >= 0 ) { |
1684
|
262
|
|
|
|
|
4781
|
my $mate = $formats[ $format->index_slave ]; |
1685
|
262
|
50
|
33
|
|
|
4634
|
if ( $mate->index_master != $i or $mate->index_slave != -1 ) { |
1686
|
0
|
|
|
|
|
0
|
confess |
1687
|
|
|
|
|
|
|
"Inonsistent data (2), check this module's code urgently!"; |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
} |
1690
|
|
|
|
|
|
|
|
1691
|
988
|
100
|
|
|
|
21627
|
if ( $format->index_master >= 0 ) { |
1692
|
262
|
|
|
|
|
4681
|
my $mate = $formats[ $format->index_master ]; |
1693
|
262
|
50
|
33
|
|
|
4439
|
if ( $mate->index_slave != $i or $mate->index_master != -1 ) { |
1694
|
0
|
|
|
|
|
0
|
confess |
1695
|
|
|
|
|
|
|
"Inonsistent data (3), check this module's code urgently!"; |
1696
|
|
|
|
|
|
|
} |
1697
|
|
|
|
|
|
|
} |
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
} |
1700
|
53
|
50
|
|
|
|
470
|
if ($debug_fmt) { |
1701
|
0
|
|
|
|
|
0
|
for (@formats) { |
1702
|
0
|
|
|
|
|
0
|
my ( $idx, $rel ) = ( -1, "" ); |
1703
|
0
|
0
|
|
|
|
0
|
$idx = $_->index_slave, $rel = "S: " if $_->index_slave >= 0; |
1704
|
0
|
0
|
|
|
|
0
|
$idx = $_->index_master, $rel = "M: " if $_->index_master >= 0; |
1705
|
0
|
|
|
|
|
0
|
printf( $_debugh "%-18s %s %2d", |
1706
|
|
|
|
|
|
|
"'" . $_->format . "'", |
1707
|
|
|
|
|
|
|
$rel, $idx |
1708
|
|
|
|
|
|
|
); |
1709
|
0
|
0
|
|
|
|
0
|
print( $_debugh ": '" . $formats[$idx]->format . "'" ) if $idx >= 0; |
1710
|
0
|
|
|
|
|
0
|
print( $_debugh "\n" ); |
1711
|
|
|
|
|
|
|
} |
1712
|
|
|
|
|
|
|
} |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
# AMB-END |
1715
|
|
|
|
|
|
|
|
1716
|
53
|
|
|
|
|
127
|
my %records; |
1717
|
|
|
|
|
|
|
my $record_number; |
1718
|
53
|
|
|
|
|
95
|
my $count_gotit = 0; |
1719
|
53
|
|
|
|
|
78
|
my $count_ambiguous = 0; |
1720
|
53
|
|
|
|
|
90
|
my $count_nodate = 0; |
1721
|
53
|
|
|
|
|
81
|
my $count_empty = 0; |
1722
|
53
|
|
|
|
|
71
|
my $has_signaled_can_start_recording_data = 0; |
1723
|
53
|
|
|
|
|
115
|
$self->{_line_after_which_recording_can_start} = 0; |
1724
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
# |
1726
|
|
|
|
|
|
|
# Seems a weird optimization here, but it is very important. |
1727
|
|
|
|
|
|
|
# In some cases, divides execution time (to detect date format on big files |
1728
|
|
|
|
|
|
|
# containing numerous fields) by 10. |
1729
|
|
|
|
|
|
|
# |
1730
|
|
|
|
|
|
|
# When evaluates to true, it means the input column has no identified date |
1731
|
|
|
|
|
|
|
# format, meaning, no further check to do. |
1732
|
|
|
|
|
|
|
# |
1733
|
53
|
|
|
|
|
85
|
my @cache_nodate; |
1734
|
|
|
|
|
|
|
|
1735
|
53
|
|
|
|
|
144
|
while ( my $f = _mygetline( $incsv, $inh ) ) { |
1736
|
1418
|
|
|
|
|
43503
|
$record_number++; |
1737
|
|
|
|
|
|
|
|
1738
|
1418
|
50
|
|
|
|
3439
|
if ($debug_fmt) { |
1739
|
0
|
|
|
|
|
0
|
print( $_debugh "RECORD $record_number:\n" ); |
1740
|
0
|
|
|
|
|
0
|
for ( 0 .. @$f - 1 ) { |
1741
|
0
|
|
|
|
|
0
|
printf( $_debugh " %02d: '%s'\n", $_, $f->[$_] ); |
1742
|
|
|
|
|
|
|
} |
1743
|
|
|
|
|
|
|
} |
1744
|
|
|
|
|
|
|
|
1745
|
1418
|
|
|
|
|
2452
|
for my $n (@fields_to_detect_format) { |
1746
|
15285
|
100
|
|
|
|
24502
|
next if $cache_nodate[$n]; |
1747
|
|
|
|
|
|
|
|
1748
|
10444
|
|
|
|
|
13470
|
my $v = $f->[$n]; |
1749
|
10444
|
100
|
|
|
|
14980
|
$v = '' unless defined($v); |
1750
|
10444
|
100
|
|
|
|
17038
|
next if $v eq ''; |
1751
|
|
|
|
|
|
|
next |
1752
|
2942
|
100
|
100
|
|
|
8788
|
if defined($refsub_is_datetime_empty) |
1753
|
|
|
|
|
|
|
and $refsub_is_datetime_empty->($v); |
1754
|
|
|
|
|
|
|
|
1755
|
2939
|
50
|
|
|
|
5069
|
if ($debug_fmt) { |
1756
|
0
|
|
|
|
|
0
|
my $col = $self->_col_dispname($n); |
1757
|
0
|
|
|
|
|
0
|
print( $_debugh "Line $record_number, column '$col':\n" ); |
1758
|
|
|
|
|
|
|
} |
1759
|
|
|
|
|
|
|
|
1760
|
2939
|
|
|
|
|
4647
|
for my $fmt (@formats) { |
1761
|
50192
|
|
|
|
|
595888
|
my $fid = $fmt->id; |
1762
|
50192
|
|
|
|
|
811538
|
my $fstr = $fmt->format; |
1763
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
$self->_debug_output_fmt( '** pre ', $fmt, |
1765
|
50192
|
50
|
|
|
|
304131
|
$records{$n}->{$fid} ) |
1766
|
|
|
|
|
|
|
if $debug_fmt; |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
$records{$n}->{$fid} = RecordCounter->new( |
1769
|
|
|
|
|
|
|
count_ok => 0, |
1770
|
|
|
|
|
|
|
count_ko => 0, |
1771
|
|
|
|
|
|
|
has_searched_time => 0, |
1772
|
|
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
format => undef, |
1774
|
|
|
|
|
|
|
locale => undef, |
1775
|
|
|
|
|
|
|
|
1776
|
|
|
|
|
|
|
has_found_time => 0, |
1777
|
|
|
|
|
|
|
format_with_addition_of_time => undef, |
1778
|
|
|
|
|
|
|
locale_with_addition_of_time => undef, |
1779
|
|
|
|
|
|
|
parser_with_addition_of_time => undef |
1780
|
50192
|
100
|
|
|
|
192820
|
) unless defined( $records{$n}->{$fid} ); |
1781
|
|
|
|
|
|
|
|
1782
|
50192
|
100
|
|
|
|
1068410
|
unless ( $records{$n}->{$fid}->count_ko ) { |
1783
|
|
|
|
|
|
|
my $is_ok = &_try_parser( |
1784
|
|
|
|
|
|
|
$fmt, |
1785
|
11481
|
|
|
|
|
90500
|
$records{$n}->{$fid}, |
1786
|
|
|
|
|
|
|
$START . $v . $stop |
1787
|
|
|
|
|
|
|
); |
1788
|
|
|
|
|
|
|
|
1789
|
11481
|
100
|
|
|
|
21489
|
if ( !$is_ok ) { |
1790
|
7789
|
|
|
|
|
10535
|
my $give_up_time = 0; |
1791
|
7789
|
100
|
66
|
|
|
99359
|
if ( $records{$n}->{$fid}->count_ko == 0 |
|
|
|
100
|
|
|
|
|
1792
|
|
|
|
|
|
|
and $records{$n}->{$fid}->has_searched_time |
1793
|
|
|
|
|
|
|
and $records{$n}->{$fid}->has_found_time ) |
1794
|
|
|
|
|
|
|
{ |
1795
|
77
|
|
100
|
|
|
4346
|
$give_up_time = ( |
1796
|
|
|
|
|
|
|
defined( $fmt->parser ) and defined( |
1797
|
|
|
|
|
|
|
$fmt->parser->parse_datetime( |
1798
|
|
|
|
|
|
|
$START . $v . $stop |
1799
|
|
|
|
|
|
|
) |
1800
|
|
|
|
|
|
|
) ? 1 : 0 |
1801
|
|
|
|
|
|
|
); |
1802
|
77
|
100
|
|
|
|
42441
|
if ($give_up_time) { |
1803
|
4
|
|
|
|
|
80
|
$records{$n}->{$fid}->has_found_time(0); |
1804
|
4
|
|
|
|
|
33
|
$is_ok = 1; |
1805
|
|
|
|
|
|
|
} |
1806
|
|
|
|
|
|
|
} |
1807
|
|
|
|
|
|
|
} |
1808
|
|
|
|
|
|
|
|
1809
|
11481
|
100
|
100
|
|
|
200084
|
if ( $is_ok or ( !$ignore_trailing_chars ) ) { |
1810
|
6079
|
|
100
|
|
|
76756
|
my $incr = |
1811
|
|
|
|
|
|
|
( defined( $fmt->parser ) and $is_ok ? 1 : 0 ); |
1812
|
|
|
|
|
|
|
|
1813
|
6079
|
100
|
|
|
|
115704
|
unless ( $records{$n}->{$fid}->has_searched_time ) { |
1814
|
3156
|
|
|
|
|
55838
|
$records{$n}->{$fid}->has_searched_time(1); |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
croak |
1817
|
|
|
|
|
|
|
"Inconsistent status! Issue in module code not in caller's!" |
1818
|
3156
|
50
|
|
|
|
51260
|
if $records{$n}->{$fid}->count_ok != 0; |
1819
|
|
|
|
|
|
|
|
1820
|
3156
|
100
|
|
|
|
21966
|
if ($search_time) { |
|
|
100
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
|
1822
|
1980
|
50
|
|
|
|
3352
|
print( $_debugh |
1823
|
|
|
|
|
|
|
" Search time in '$v', format '$fstr'\n" |
1824
|
|
|
|
|
|
|
) if $debug_fmt; |
1825
|
|
|
|
|
|
|
|
1826
|
1980
|
|
|
|
|
24125
|
my $t = |
1827
|
|
|
|
|
|
|
$self->_guess_time_format( $fstr, |
1828
|
|
|
|
|
|
|
$fmt->locale, $v, $stop ); |
1829
|
1980
|
100
|
|
|
|
34402
|
$records{$n}->{$fid} |
1830
|
|
|
|
|
|
|
->has_found_time( ( defined($t) ? 1 : 0 ) ); |
1831
|
1980
|
100
|
|
|
|
14908
|
if ( defined($t) ) { |
|
|
100
|
|
|
|
|
|
1832
|
254
|
|
|
|
|
3696
|
$records{$n}->{$fid} |
1833
|
|
|
|
|
|
|
->format_with_addition_of_time( $t->[0] ); |
1834
|
254
|
|
|
|
|
4677
|
$records{$n}->{$fid} |
1835
|
|
|
|
|
|
|
->locale_with_addition_of_time( $t->[1] ); |
1836
|
254
|
|
|
|
|
4603
|
$records{$n}->{$fid} |
1837
|
|
|
|
|
|
|
->parser_with_addition_of_time( $t->[2] ); |
1838
|
254
|
|
|
|
|
1804
|
$incr = 1; |
1839
|
|
|
|
|
|
|
} |
1840
|
|
|
|
|
|
|
elsif ( $fstr eq '' ) { |
1841
|
200
|
|
|
|
|
2676
|
$records{$n}->{$fid}->count_ko(1); |
1842
|
|
|
|
|
|
|
} |
1843
|
|
|
|
|
|
|
} |
1844
|
|
|
|
|
|
|
elsif ( $fstr eq '' ) { |
1845
|
78
|
|
|
|
|
973
|
$records{$n}->{$fid}->count_ko(1); |
1846
|
|
|
|
|
|
|
} |
1847
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
} |
1849
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
$records{$n}->{$fid} |
1851
|
6079
|
|
|
|
|
95074
|
->count_ok( $records{$n}->{$fid}->count_ok + $incr ); |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
$records{$n}->{$fid} |
1854
|
6079
|
100
|
100
|
|
|
94759
|
->count_ko( $records{$n}->{$fid}->count_ko + 1 ) |
1855
|
|
|
|
|
|
|
if ( !$incr ) |
1856
|
|
|
|
|
|
|
and ( !$is_ok ); |
1857
|
|
|
|
|
|
|
|
1858
|
6079
|
100
|
|
|
|
31004
|
if ($incr) { |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
# We remove the slave if master is fine. |
1861
|
|
|
|
|
|
|
# Depending on the order in which parsing got done, the master could |
1862
|
|
|
|
|
|
|
# pop up first, or the slave, that is why we need manage both cases. |
1863
|
2697
|
100
|
100
|
|
|
32947
|
if ( $fmt->index_slave >= 0 |
1864
|
|
|
|
|
|
|
or $fmt->index_master >= 0 ) |
1865
|
|
|
|
|
|
|
{ |
1866
|
1995
|
100
|
|
|
|
56452
|
my $has_slave = |
1867
|
|
|
|
|
|
|
( $fmt->index_slave >= 0 ? 1 : 0 ); |
1868
|
1995
|
100
|
|
|
|
33339
|
my $idx = ( |
1869
|
|
|
|
|
|
|
$has_slave |
1870
|
|
|
|
|
|
|
? $fmt->index_slave |
1871
|
|
|
|
|
|
|
: $fmt->index_master |
1872
|
|
|
|
|
|
|
); |
1873
|
1995
|
|
|
|
|
32688
|
my $mate = $formats[$idx]->id; |
1874
|
1995
|
100
|
|
|
|
15143
|
if ( exists $records{$n}->{$mate} ) { |
1875
|
1841
|
100
|
|
|
|
3444
|
if ($has_slave) { |
1876
|
623
|
100
|
|
|
|
7673
|
if ( $records{$n}->{$mate}->count_ko == |
1877
|
|
|
|
|
|
|
0 ) |
1878
|
|
|
|
|
|
|
{ |
1879
|
|
|
|
|
|
|
# I am the master: I remove the slave |
1880
|
2
|
|
|
|
|
36
|
$records{$n}->{$mate}->count_ko(1); |
1881
|
|
|
|
|
|
|
} |
1882
|
|
|
|
|
|
|
} |
1883
|
|
|
|
|
|
|
else { |
1884
|
1218
|
50
|
66
|
|
|
15021
|
if ( |
|
|
|
66
|
|
|
|
|
1885
|
|
|
|
|
|
|
$records{$n}->{$mate}->count_ko == 0 |
1886
|
|
|
|
|
|
|
and |
1887
|
|
|
|
|
|
|
$records{$n}->{$mate}->count_ok >= 1 |
1888
|
|
|
|
|
|
|
and |
1889
|
|
|
|
|
|
|
$records{$n}->{$fid}->count_ko == |
1890
|
|
|
|
|
|
|
0 ) |
1891
|
|
|
|
|
|
|
{ |
1892
|
117
|
|
|
|
|
6262
|
$records{$n}->{$fid}->count_ko(1); |
1893
|
|
|
|
|
|
|
} |
1894
|
|
|
|
|
|
|
} |
1895
|
|
|
|
|
|
|
} |
1896
|
|
|
|
|
|
|
} |
1897
|
|
|
|
|
|
|
} |
1898
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
} |
1900
|
|
|
|
|
|
|
else { |
1901
|
|
|
|
|
|
|
$records{$n}->{$fid} |
1902
|
5402
|
|
|
|
|
71277
|
->count_ko( $records{$n}->{$fid}->count_ko + 1 ); |
1903
|
|
|
|
|
|
|
} |
1904
|
|
|
|
|
|
|
} |
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
$self->_debug_output_fmt( ' post', $fmt, |
1907
|
50192
|
50
|
|
|
|
346497
|
$records{$n}->{$fid} ) |
1908
|
|
|
|
|
|
|
if $debug_fmt; |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
} |
1911
|
|
|
|
|
|
|
} |
1912
|
|
|
|
|
|
|
|
1913
|
1418
|
|
|
|
|
2199
|
$count_gotit = 0; |
1914
|
1418
|
|
|
|
|
1811
|
$count_ambiguous = 0; |
1915
|
1418
|
|
|
|
|
2187
|
$count_empty = 0; |
1916
|
1418
|
|
|
|
|
2563
|
for my $n (@fields_to_detect_format) { |
1917
|
15285
|
100
|
|
|
|
24296
|
next if $cache_nodate[$n]; |
1918
|
|
|
|
|
|
|
|
1919
|
10444
|
|
|
|
|
12135
|
my $candidate = 0; |
1920
|
10444
|
|
|
|
|
10843
|
my $tt = 0; |
1921
|
10444
|
|
|
|
|
13604
|
for my $fmt (@formats) { |
1922
|
150840
|
|
|
|
|
1783376
|
my $fid = $fmt->id; |
1923
|
150840
|
|
|
|
|
910867
|
my $rec = $records{$n}->{$fid}; |
1924
|
150840
|
100
|
|
|
|
247074
|
next unless defined($rec); |
1925
|
|
|
|
|
|
|
|
1926
|
92018
|
|
|
|
|
1059566
|
my $ok = $rec->count_ok; |
1927
|
92018
|
|
|
|
|
1420139
|
my $ko = $rec->count_ko; |
1928
|
|
|
|
|
|
|
|
1929
|
92018
|
50
|
66
|
|
|
608679
|
confess "Oups. Check this module code urgently!" |
1930
|
|
|
|
|
|
|
if $ok == 0 and $ko == 0; |
1931
|
92018
|
|
|
|
|
103875
|
$tt += $ok + $ko; |
1932
|
|
|
|
|
|
|
|
1933
|
92018
|
100
|
100
|
|
|
177244
|
$candidate++ if $ok >= 1 and $ko == 0; |
1934
|
|
|
|
|
|
|
} |
1935
|
10444
|
100
|
|
|
|
19328
|
if ( $candidate == 1 ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1936
|
4754
|
|
|
|
|
6947
|
$count_gotit++; |
1937
|
|
|
|
|
|
|
} |
1938
|
|
|
|
|
|
|
elsif ( $candidate >= 2 ) { |
1939
|
653
|
|
|
|
|
1242
|
$count_ambiguous++; |
1940
|
|
|
|
|
|
|
} |
1941
|
|
|
|
|
|
|
elsif ( $tt != 0 ) { |
1942
|
230
|
|
|
|
|
289
|
$count_nodate++; |
1943
|
230
|
|
|
|
|
466
|
$cache_nodate[$n] = 1; |
1944
|
|
|
|
|
|
|
} |
1945
|
|
|
|
|
|
|
else { |
1946
|
4807
|
|
|
|
|
6378
|
$count_empty++; |
1947
|
|
|
|
|
|
|
} |
1948
|
|
|
|
|
|
|
} |
1949
|
|
|
|
|
|
|
|
1950
|
1418
|
50
|
|
|
|
2765
|
if ($debug_fmt) { |
1951
|
0
|
|
|
|
|
0
|
print( $_debugh "\$count_gotit = $count_gotit\n" ); |
1952
|
0
|
|
|
|
|
0
|
print( $_debugh "\$count_ambiguous = $count_ambiguous\n" ); |
1953
|
0
|
|
|
|
|
0
|
print( $_debugh "\$count_nodate = $count_nodate\n" ); |
1954
|
0
|
|
|
|
|
0
|
print( $_debugh "\$count_empty = $count_empty\n" ); |
1955
|
|
|
|
|
|
|
} |
1956
|
|
|
|
|
|
|
|
1957
|
1418
|
|
|
|
|
2495
|
my $can_start_recording_data = 0; |
1958
|
1418
|
100
|
100
|
|
|
6083
|
$can_start_recording_data = 1 |
|
|
|
100
|
|
|
|
|
1959
|
|
|
|
|
|
|
if $count_gotit + $count_ambiguous + $count_nodate >= 1 |
1960
|
|
|
|
|
|
|
and ( !$count_ambiguous ) |
1961
|
|
|
|
|
|
|
and ( !$count_empty ); |
1962
|
|
|
|
|
|
|
|
1963
|
1418
|
100
|
100
|
|
|
8344
|
if ( $can_start_recording_data |
1964
|
|
|
|
|
|
|
and ( !$has_signaled_can_start_recording_data ) ) |
1965
|
|
|
|
|
|
|
{ |
1966
|
31
|
|
|
|
|
54
|
$has_signaled_can_start_recording_data = 1; |
1967
|
|
|
|
|
|
|
|
1968
|
31
|
50
|
|
|
|
86
|
print( $_debugh |
1969
|
|
|
|
|
|
|
"Can start recording (all dates formats detection closed) " |
1970
|
|
|
|
|
|
|
. "after record #$record_number\n" ) |
1971
|
|
|
|
|
|
|
if $_debug; |
1972
|
|
|
|
|
|
|
|
1973
|
31
|
|
|
|
|
87
|
$self->{_line_after_which_recording_can_start} = $record_number; |
1974
|
31
|
100
|
|
|
|
119
|
last unless $self->{fields_dates_auto}; |
1975
|
24
|
100
|
|
|
|
181
|
last if $self->{fields_dates_auto_optimize}; |
1976
|
|
|
|
|
|
|
} |
1977
|
|
|
|
|
|
|
} |
1978
|
|
|
|
|
|
|
|
1979
|
53
|
|
|
|
|
2822
|
close $inh; |
1980
|
|
|
|
|
|
|
|
1981
|
53
|
|
|
|
|
168
|
my %dates_detailed_status; |
1982
|
|
|
|
|
|
|
my @dates_formats; |
1983
|
53
|
|
|
|
|
208
|
my $check_empty = 0; |
1984
|
53
|
|
|
|
|
93
|
my $check_nodate = 0; |
1985
|
53
|
|
|
|
|
117
|
my $check_ambiguous = 0; |
1986
|
53
|
|
|
|
|
102
|
my $check_gotit = 0; |
1987
|
53
|
|
|
|
|
284
|
for my $n (@fields_to_detect_format) { |
1988
|
532
|
|
|
|
|
635
|
my @formats_ok; |
1989
|
532
|
|
|
|
|
686
|
my $tt = 0; |
1990
|
532
|
|
|
|
|
662
|
for my $fid ( sort keys %{ $records{$n} } ) { |
|
532
|
|
|
|
|
4539
|
|
1991
|
8465
|
|
|
|
|
74146
|
my $rec = $records{$n}->{$fid}; |
1992
|
8465
|
100
|
100
|
|
|
100208
|
if ( $rec->count_ok >= 1 and $rec->count_ko == 0 ) { |
1993
|
|
|
|
|
|
|
|
1994
|
298
|
|
|
|
|
10002
|
my ( $fstr, $floc ) = ( $rec->format, $rec->locale ); |
1995
|
298
|
100
|
|
|
|
6546
|
( $fstr, $floc ) = ( |
1996
|
|
|
|
|
|
|
$rec->format_with_addition_of_time, |
1997
|
|
|
|
|
|
|
$rec->locale_with_addition_of_time |
1998
|
|
|
|
|
|
|
) if $rec->has_found_time; |
1999
|
|
|
|
|
|
|
|
2000
|
298
|
|
|
|
|
5779
|
push @formats_ok, [ $fstr, $floc ]; |
2001
|
|
|
|
|
|
|
} |
2002
|
8465
|
|
|
|
|
141069
|
$tt += $rec->count_ok + $rec->count_ko; |
2003
|
|
|
|
|
|
|
} |
2004
|
532
|
|
|
|
|
5172
|
my $is_ok = 0; |
2005
|
532
|
|
|
|
|
621
|
my $format; |
2006
|
532
|
|
|
|
|
657
|
my $locale = ''; |
2007
|
532
|
100
|
100
|
|
|
1835
|
if ( $#formats_ok < 0 and $tt == 0 ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2008
|
16
|
|
|
|
|
36
|
$format = "Z"; |
2009
|
16
|
|
|
|
|
34
|
$check_empty++; |
2010
|
|
|
|
|
|
|
} |
2011
|
|
|
|
|
|
|
elsif ( $#formats_ok < 0 ) { |
2012
|
230
|
|
|
|
|
304
|
$format = "N"; |
2013
|
230
|
|
|
|
|
279
|
$check_nodate++; |
2014
|
|
|
|
|
|
|
} |
2015
|
|
|
|
|
|
|
elsif ( $#formats_ok > 0 ) { |
2016
|
12
|
|
|
|
|
25
|
$format = "A"; |
2017
|
12
|
|
|
|
|
16
|
$check_ambiguous++; |
2018
|
|
|
|
|
|
|
} |
2019
|
|
|
|
|
|
|
else { |
2020
|
274
|
|
|
|
|
337
|
$is_ok = 1; |
2021
|
274
|
|
|
|
|
438
|
$format = $formats_ok[0]->[0]; |
2022
|
274
|
|
|
|
|
345
|
$locale = $formats_ok[0]->[1]; |
2023
|
274
|
|
|
|
|
349
|
$check_gotit++; |
2024
|
|
|
|
|
|
|
} |
2025
|
532
|
|
|
|
|
1034
|
my $col = $self->_col_dispname($n); |
2026
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
$dates_detailed_status{$col} = $format |
2028
|
532
|
50
|
|
|
|
1586
|
unless exists $dates_detailed_status{$col}; |
2029
|
532
|
100
|
66
|
|
|
2047
|
$dates_formats[$n] = [ $format, $locale ] |
2030
|
|
|
|
|
|
|
if $is_ok and ( !defined $dates_formats[$n] ); |
2031
|
|
|
|
|
|
|
} |
2032
|
|
|
|
|
|
|
$dates_detailed_status{'.'} = |
2033
|
53
|
|
|
|
|
187
|
$self->{_line_after_which_recording_can_start}; |
2034
|
|
|
|
|
|
|
|
2035
|
53
|
50
|
66
|
|
|
440
|
if ( $check_empty != $count_empty |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2036
|
|
|
|
|
|
|
or $check_nodate != $count_nodate |
2037
|
|
|
|
|
|
|
or $check_ambiguous != $count_ambiguous |
2038
|
|
|
|
|
|
|
or $check_gotit != $count_gotit ) |
2039
|
|
|
|
|
|
|
{ |
2040
|
|
|
|
|
|
|
# The below condition can happen with an empty CSV (empty file (no header) or |
2041
|
|
|
|
|
|
|
# only a header line). |
2042
|
1
|
50
|
33
|
|
|
71
|
if ( $count_empty |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
2043
|
|
|
|
|
|
|
or $check_nodate |
2044
|
|
|
|
|
|
|
or $count_nodate |
2045
|
|
|
|
|
|
|
or $check_ambiguous |
2046
|
|
|
|
|
|
|
or $count_ambiguous |
2047
|
|
|
|
|
|
|
or $check_gotit |
2048
|
|
|
|
|
|
|
or $count_gotit ) |
2049
|
|
|
|
|
|
|
{ |
2050
|
0
|
|
|
|
|
0
|
print( STDERR "\$check_empty = $check_empty\n" ); |
2051
|
0
|
|
|
|
|
0
|
print( STDERR "\$count_empty = $count_empty\n" ); |
2052
|
0
|
|
|
|
|
0
|
print( STDERR "\$check_nodate = $check_nodate\n" ); |
2053
|
0
|
|
|
|
|
0
|
print( STDERR "\$count_nodate = $count_nodate\n" ); |
2054
|
0
|
|
|
|
|
0
|
print( STDERR "\$check_ambiguous = $check_ambiguous\n" ); |
2055
|
0
|
|
|
|
|
0
|
print( STDERR "\$count_ambiguous = $count_ambiguous\n" ); |
2056
|
0
|
|
|
|
|
0
|
print( STDERR "\$check_gotit = $check_gotit\n" ); |
2057
|
0
|
|
|
|
|
0
|
print( STDERR "\$count_gotit = $count_gotit\n" ); |
2058
|
0
|
|
|
|
|
0
|
confess "Oups! Check immediately this module code, man!"; |
2059
|
|
|
|
|
|
|
} |
2060
|
|
|
|
|
|
|
} |
2061
|
|
|
|
|
|
|
|
2062
|
53
|
50
|
|
|
|
140
|
if ($debug_fmt) { |
2063
|
|
|
|
|
|
|
|
2064
|
|
|
|
|
|
|
# A very detailed debug output |
2065
|
0
|
|
|
|
|
0
|
for my $n (@fields_to_detect_format) { |
2066
|
0
|
|
|
|
|
0
|
my $col = $self->_col_dispname($n); |
2067
|
0
|
|
|
|
|
0
|
print( $_debugh "$col\n" ); |
2068
|
0
|
|
|
|
|
0
|
printf( $_debugh " %-25s %3s %3s\n", "format", "OK", "KO" ); |
2069
|
0
|
|
|
|
|
0
|
for my $fid ( sort keys %{ $records{$n} } ) { |
|
0
|
|
|
|
|
0
|
|
2070
|
0
|
|
|
|
|
0
|
my $rec = $records{$n}->{$fid}; |
2071
|
0
|
|
|
|
|
0
|
my $cc = ''; |
2072
|
0
|
0
|
0
|
|
|
0
|
$cc = "(" . $rec->locale . ")" |
2073
|
|
|
|
|
|
|
if defined( $rec->locale ) |
2074
|
|
|
|
|
|
|
and $rec->locale ne ''; |
2075
|
0
|
|
|
|
|
0
|
printf( $_debugh " %-25s %3d %3d\n", |
2076
|
|
|
|
|
|
|
$rec->format . $cc, |
2077
|
|
|
|
|
|
|
$rec->count_ok, $rec->count_ko |
2078
|
|
|
|
|
|
|
); |
2079
|
|
|
|
|
|
|
} |
2080
|
|
|
|
|
|
|
} |
2081
|
|
|
|
|
|
|
} |
2082
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
# Not a typo - displaying it IN ADDITION to debug output above is done on |
2084
|
|
|
|
|
|
|
# purpose... |
2085
|
53
|
50
|
|
|
|
140
|
if ($_debug) { |
2086
|
|
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
# A shorter (as compared to above) output of outcome of DateTime detection |
2088
|
0
|
|
|
|
|
0
|
print( $_debugh "Result of DateTime detection:\n" ); |
2089
|
0
|
|
|
|
|
0
|
printf( $_debugh "%-3s %-25s %-30s %s\n", |
2090
|
|
|
|
|
|
|
'###', 'FIELD', |
2091
|
|
|
|
|
|
|
'DATETIME FORMAT', |
2092
|
|
|
|
|
|
|
'DATETIME LOCALE' |
2093
|
|
|
|
|
|
|
); |
2094
|
0
|
|
|
|
|
0
|
for my $n (@fields_to_detect_format) { |
2095
|
0
|
|
|
|
|
0
|
my ( $fmt, $loc ) = ( '<undef>', '<undef>' ); |
2096
|
0
|
0
|
|
|
|
0
|
if ( defined( $dates_formats[$n] ) ) { |
2097
|
0
|
|
|
|
|
0
|
( $fmt, $loc ) = @{ $dates_formats[$n] }[ 0, 1 ]; |
|
0
|
|
|
|
|
0
|
|
2098
|
|
|
|
|
|
|
} |
2099
|
0
|
|
|
|
|
0
|
printf( $_debugh "%03d %-25s %-30s %s\n", |
2100
|
|
|
|
|
|
|
$n, $self->_col_dispname($n), |
2101
|
|
|
|
|
|
|
$fmt, $loc |
2102
|
|
|
|
|
|
|
); |
2103
|
|
|
|
|
|
|
} |
2104
|
|
|
|
|
|
|
} |
2105
|
|
|
|
|
|
|
|
2106
|
53
|
100
|
|
|
|
168
|
if ( !$self->{fields_dates_auto} ) { |
2107
|
10
|
|
|
|
|
20
|
my $e = 0; |
2108
|
10
|
|
|
|
|
23
|
for my $n (@fields_to_detect_format) { |
2109
|
20
|
100
|
|
|
|
53
|
next if defined( $dates_formats[$n] ); |
2110
|
7
|
|
|
|
|
17
|
$self->_print_error( |
2111
|
|
|
|
|
|
|
"unable to detect DateTime format of field '" |
2112
|
|
|
|
|
|
|
. $self->_col_dispname($n) . "'", |
2113
|
|
|
|
|
|
|
1 |
2114
|
|
|
|
|
|
|
); |
2115
|
7
|
|
|
|
|
195
|
$e++; |
2116
|
|
|
|
|
|
|
} |
2117
|
|
|
|
|
|
|
$self->_print_error( |
2118
|
10
|
100
|
|
|
|
44
|
"$e field(s) encountered with unknown DateTime format") |
2119
|
|
|
|
|
|
|
if $e; |
2120
|
|
|
|
|
|
|
} |
2121
|
|
|
|
|
|
|
|
2122
|
51
|
|
|
|
|
642
|
$self->{_dates_detailed_status} = {%dates_detailed_status}; |
2123
|
51
|
|
|
|
|
10202
|
$self->{_dates_formats} = [@dates_formats]; |
2124
|
|
|
|
|
|
|
} |
2125
|
|
|
|
|
|
|
|
2126
|
|
|
|
|
|
|
sub _debug_output_fmt { |
2127
|
0
|
|
|
0
|
|
0
|
my ( $self, $prefix, $fmt, $rec ) = @_; |
2128
|
|
|
|
|
|
|
|
2129
|
0
|
|
|
|
|
0
|
my $_debugh = $self->{_debugh}; |
2130
|
|
|
|
|
|
|
|
2131
|
0
|
|
|
|
|
0
|
my ( $fstr, $floc ) = ( $fmt->format, $fmt->locale ); |
2132
|
0
|
0
|
0
|
|
|
0
|
( $fstr, $floc ) = ( |
2133
|
|
|
|
|
|
|
'<+T>' . $rec->format_with_addition_of_time, |
2134
|
|
|
|
|
|
|
$rec->locale_with_addition_of_time |
2135
|
|
|
|
|
|
|
) |
2136
|
|
|
|
|
|
|
if defined($rec) |
2137
|
|
|
|
|
|
|
and $rec->has_found_time; |
2138
|
|
|
|
|
|
|
|
2139
|
0
|
|
|
|
|
0
|
my $locstr = ''; |
2140
|
0
|
0
|
0
|
|
|
0
|
$locstr = "(" . $floc . ")" if defined($floc) and $floc ne ''; |
2141
|
|
|
|
|
|
|
|
2142
|
0
|
|
|
|
|
0
|
my $tmpok; |
2143
|
0
|
0
|
|
|
|
0
|
$tmpok = $rec->count_ok if defined($rec); |
2144
|
0
|
0
|
|
|
|
0
|
$tmpok = '<undef>' unless defined($tmpok); |
2145
|
0
|
|
|
|
|
0
|
my $tmpko; |
2146
|
0
|
0
|
|
|
|
0
|
$tmpko = $rec->count_ko if defined($rec); |
2147
|
0
|
0
|
|
|
|
0
|
$tmpko = '<undef>' unless defined($tmpko); |
2148
|
|
|
|
|
|
|
|
2149
|
0
|
|
|
|
|
0
|
print( $_debugh |
2150
|
|
|
|
|
|
|
"$prefix (format '$fstr$locstr': OK = $tmpok, KO = $tmpko)\n" ); |
2151
|
|
|
|
|
|
|
} |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
# When no parse can be done (parser to test is undef), return 1 |
2154
|
|
|
|
|
|
|
sub _try_parser { |
2155
|
11481
|
|
|
11481
|
|
21108
|
my ( $fmt, $rec, $value_to_parse ) = @_; |
2156
|
|
|
|
|
|
|
|
2157
|
11481
|
|
|
|
|
142300
|
my $parser = $fmt->parser; |
2158
|
11481
|
100
|
|
|
|
191018
|
$parser = $rec->parser_with_addition_of_time if $rec->has_found_time; |
2159
|
|
|
|
|
|
|
|
2160
|
11481
|
|
|
|
|
92946
|
my $is_ok = 1; |
2161
|
11481
|
100
|
|
|
|
37082
|
$is_ok = ( defined( $parser->parse_datetime($value_to_parse) ) ? 1 : 0 ) |
|
|
100
|
|
|
|
|
|
2162
|
|
|
|
|
|
|
if $parser; |
2163
|
|
|
|
|
|
|
|
2164
|
11481
|
100
|
|
|
|
2283436
|
unless ( defined( $rec->format ) ) { |
2165
|
8465
|
|
|
|
|
148944
|
$rec->format( $fmt->format ); |
2166
|
8465
|
|
|
|
|
263129
|
$rec->locale( $fmt->locale ); |
2167
|
|
|
|
|
|
|
} |
2168
|
|
|
|
|
|
|
|
2169
|
11481
|
|
|
|
|
205131
|
return $is_ok; |
2170
|
|
|
|
|
|
|
} |
2171
|
|
|
|
|
|
|
|
2172
|
|
|
|
|
|
|
# |
2173
|
|
|
|
|
|
|
# By checking Text::AutoCSV with NYTProf, I saw the t/02-dates2.t test was long |
2174
|
|
|
|
|
|
|
# because of the time to create Strptime parsers all along execution (while |
2175
|
|
|
|
|
|
|
# detecting formats of dates/times). I spent some time caching (manually, and |
2176
|
|
|
|
|
|
|
# painfully) Strptime parsers for naked dates, but didn't afford to doing it for |
2177
|
|
|
|
|
|
|
# the time detection. |
2178
|
|
|
|
|
|
|
# |
2179
|
|
|
|
|
|
|
# Then I came upon knowing of the Memoize module while reading |
2180
|
|
|
|
|
|
|
# HIGHER-ORDER PERL |
2181
|
|
|
|
|
|
|
# of |
2182
|
|
|
|
|
|
|
# Mark Jason Dominus, editor: Morgan Kaufmann Publishers (Elsevier) |
2183
|
|
|
|
|
|
|
# You can find here (I strongly recommend your reading this book): |
2184
|
|
|
|
|
|
|
# http://hop.perl.plover.com/book/pdf/HigherOrderPerl.pdf |
2185
|
|
|
|
|
|
|
# |
2186
|
|
|
|
|
|
|
# TODO |
2187
|
|
|
|
|
|
|
# Actually a code rewriting should be done, to discard all the caching work done |
2188
|
|
|
|
|
|
|
# for naked dates and just rely on this memoization, that is just perfect |
2189
|
|
|
|
|
|
|
# feature for this case. |
2190
|
|
|
|
|
|
|
# |
2191
|
|
|
|
|
|
|
memoize('_build_strptime_parser'); |
2192
|
|
|
|
|
|
|
sub _build_strptime_parser { DateTime::Format::Strptime->new(@_) } |
2193
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
sub _guess_time_format { |
2195
|
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
# IMPORTANT |
2197
|
|
|
|
|
|
|
# Formats are tested in the order of the list below, and the first one that |
2198
|
|
|
|
|
|
|
# succeeds stops the tests. |
2199
|
|
|
|
|
|
|
# That makes the order of the elements important: %R would match any value |
2200
|
|
|
|
|
|
|
# that'd also match %T, that'd cause to return %R whereas %T would be |
2201
|
|
|
|
|
|
|
# possible. Same with AM/PM formats. Thus the longest patterns appear first, |
2202
|
|
|
|
|
|
|
# and in particular, the patterns ending with "AM/PM" appear before all |
2203
|
|
|
|
|
|
|
# others. |
2204
|
1980
|
|
|
1980
|
|
15866
|
my @T = ( '%I:%M:%S %p', '%I:%M %p', '%I:%M:%S%p', '%I:%M%p', '%T', '%R' ); |
2205
|
|
|
|
|
|
|
|
2206
|
1980
|
|
|
|
|
4053
|
my ( $self, $format, $locale, $v, $stop ) = @_; |
2207
|
|
|
|
|
|
|
|
2208
|
1980
|
|
|
|
|
3132
|
my $_debugh = $self->{_debugh}; |
2209
|
1980
|
|
33
|
|
|
4227
|
my $debug_fmt = ( $self->{_debug} and $DEBUG_DATETIME_FORMATS ); |
2210
|
|
|
|
|
|
|
|
2211
|
1980
|
100
|
|
|
|
4136
|
return if $format =~ /:/; |
2212
|
|
|
|
|
|
|
|
2213
|
1976
|
|
|
|
|
2392
|
my $sep; |
2214
|
1976
|
100
|
|
|
|
3441
|
if ( $format eq '' ) { |
2215
|
325
|
|
|
|
|
599
|
$sep = ''; |
2216
|
|
|
|
|
|
|
} |
2217
|
|
|
|
|
|
|
else { |
2218
|
1651
|
100
|
|
|
|
8853
|
unless ( ( undef, $sep ) = |
2219
|
|
|
|
|
|
|
$v =~ /(^|\d([^0-9:]+))(\d{1,2}):(\d{1,2})(\D|$)/ ) |
2220
|
|
|
|
|
|
|
{ |
2221
|
393
|
100
|
|
|
|
789
|
if ( $v =~ /\d{4}:\d{2}(\D|$)/ ) { |
2222
|
29
|
|
|
|
|
59
|
$sep = ''; |
2223
|
|
|
|
|
|
|
} |
2224
|
|
|
|
|
|
|
else { |
2225
|
|
|
|
|
|
|
|
2226
|
364
|
50
|
|
|
|
691
|
print( $_debugh |
2227
|
|
|
|
|
|
|
"_guess_time_format(): separator candidate not found in '$v'\n" |
2228
|
|
|
|
|
|
|
) if $debug_fmt; |
2229
|
|
|
|
|
|
|
|
2230
|
364
|
|
|
|
|
867
|
return; |
2231
|
|
|
|
|
|
|
} |
2232
|
|
|
|
|
|
|
} |
2233
|
|
|
|
|
|
|
} |
2234
|
1612
|
100
|
|
|
|
3839
|
$sep = '' unless defined($sep); |
2235
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
# |
2237
|
|
|
|
|
|
|
# IMPORTANT |
2238
|
|
|
|
|
|
|
# |
2239
|
|
|
|
|
|
|
# The code below allows to successfully detect DateTime format when |
2240
|
|
|
|
|
|
|
# the first lines contain things like: |
2241
|
|
|
|
|
|
|
# Jan 20 2017 2:00AM |
2242
|
|
|
|
|
|
|
# that could lead to a separator set to ' ' while actually it should be ' '. In |
2243
|
|
|
|
|
|
|
# this case |
2244
|
|
|
|
|
|
|
# if the double-space is kept, then a later value of |
2245
|
|
|
|
|
|
|
# Jan 20 2017 10:00AM |
2246
|
|
|
|
|
|
|
# won't be parsed. |
2247
|
|
|
|
|
|
|
# |
2248
|
|
|
|
|
|
|
# See t/11-bugfix.t, BUG 5, for an explanation of why the line below. |
2249
|
|
|
|
|
|
|
# |
2250
|
1612
|
100
|
|
|
|
3048
|
$sep = ' ' if $sep eq ' '; |
2251
|
|
|
|
|
|
|
|
2252
|
1612
|
50
|
|
|
|
2937
|
if ($debug_fmt) { |
2253
|
0
|
|
|
|
|
0
|
print( $_debugh " _guess_time_format(): Searching time in '$v'\n" ); |
2254
|
|
|
|
|
|
|
} |
2255
|
|
|
|
|
|
|
|
2256
|
1612
|
|
|
|
|
2707
|
for my $t (@T) { |
2257
|
9193
|
|
|
|
|
220786
|
my $increased_format = "$format$sep$t"; |
2258
|
|
|
|
|
|
|
|
2259
|
9193
|
50
|
|
|
|
15009
|
print( $_debugh |
2260
|
|
|
|
|
|
|
" _guess_time_format(): Trying format '$increased_format'\n" ) |
2261
|
|
|
|
|
|
|
if $debug_fmt; |
2262
|
|
|
|
|
|
|
|
2263
|
9193
|
|
|
|
|
20363
|
my %opts = ( pattern => $START . $increased_format . $stop ); |
2264
|
9193
|
100
|
66
|
|
|
27603
|
$opts{locale} = $locale if defined($locale) and $locale ne ''; |
2265
|
9193
|
|
|
|
|
133851
|
my $parser_of_increased_format = _build_strptime_parser(%opts); |
2266
|
|
|
|
|
|
|
next |
2267
|
|
|
|
|
|
|
unless defined( |
2268
|
9193
|
100
|
|
|
|
1359577
|
$parser_of_increased_format->parse_datetime( $START . $v . $stop ) |
2269
|
|
|
|
|
|
|
); |
2270
|
|
|
|
|
|
|
|
2271
|
254
|
50
|
|
|
|
155067
|
if ($debug_fmt) { |
2272
|
0
|
|
|
|
|
0
|
print( $_debugh " _guess_time_format(): found time in '$v'\n" ); |
2273
|
0
|
|
|
|
|
0
|
print( $_debugh " Initial format: '$format'\n" ); |
2274
|
0
|
|
|
|
|
0
|
print( $_debugh " Increased format: '$increased_format'\n" ); |
2275
|
|
|
|
|
|
|
} |
2276
|
|
|
|
|
|
|
|
2277
|
254
|
|
|
|
|
1360
|
return [ $increased_format, $locale, $parser_of_increased_format ]; |
2278
|
|
|
|
|
|
|
} |
2279
|
1358
|
|
|
|
|
42918
|
return; |
2280
|
|
|
|
|
|
|
} |
2281
|
|
|
|
|
|
|
|
2282
|
|
|
|
|
|
|
# * ********************************* * |
2283
|
|
|
|
|
|
|
# * END OF DATE FORMAT DETECTION CODE * |
2284
|
|
|
|
|
|
|
# * ********************************* * |
2285
|
|
|
|
|
|
|
|
2286
|
|
|
|
|
|
|
# Take the string of a header in $_ and replace it with the corresponding field |
2287
|
|
|
|
|
|
|
# name |
2288
|
|
|
|
|
|
|
sub _header_to_field_name { |
2289
|
1265
|
|
|
1265
|
|
2011
|
$_ = remove_accents($_); |
2290
|
1265
|
|
|
|
|
2483
|
s/[^[:alnum:]_]//gi; |
2291
|
1265
|
|
|
|
|
6845
|
s/^.*$/\U$&/; |
2292
|
|
|
|
|
|
|
} |
2293
|
|
|
|
|
|
|
|
2294
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2295
|
|
|
|
|
|
|
sub _S2_init_fields_from_header { |
2296
|
355
|
|
|
355
|
|
617
|
my $self = shift; |
2297
|
|
|
|
|
|
|
|
2298
|
355
|
|
|
|
|
613
|
my $has_headers = $self->{has_headers}; |
2299
|
355
|
|
|
|
|
613
|
my $_debug = $self->{_debug}; |
2300
|
355
|
|
|
|
|
570
|
my $_debugh = $self->{_debugh}; |
2301
|
|
|
|
|
|
|
|
2302
|
355
|
|
|
|
|
827
|
my $in_file_disp = $self->get_in_file_disp(); |
2303
|
|
|
|
|
|
|
|
2304
|
355
|
|
|
|
|
603
|
my $inh = $self->{_inh}; |
2305
|
355
|
|
|
|
|
572
|
my $incsv = $self->{_in_csvobj}; |
2306
|
|
|
|
|
|
|
|
2307
|
355
|
|
|
|
|
626
|
$self->{_row_read} = 0; |
2308
|
|
|
|
|
|
|
|
2309
|
355
|
|
|
|
|
581
|
my @columns; |
2310
|
|
|
|
|
|
|
my @headers; |
2311
|
355
|
100
|
|
|
|
740
|
if ($has_headers) { |
2312
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
print( $_debugh |
2314
|
|
|
|
|
|
|
"$PKG: '$in_file_disp': will parse header line to get column names\n" |
2315
|
344
|
50
|
|
|
|
793
|
) if $self->{_debug_read}; |
2316
|
|
|
|
|
|
|
|
2317
|
344
|
|
|
|
|
558
|
$self->{_row_read}++; |
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
print( $_debugh "$PKG: '$in_file_disp': will read line #" |
2320
|
|
|
|
|
|
|
. $self->{_row_read} |
2321
|
|
|
|
|
|
|
. "\n" ) |
2322
|
344
|
50
|
|
|
|
716
|
if $self->{_debug_read}; |
2323
|
|
|
|
|
|
|
|
2324
|
344
|
50
|
|
|
|
790
|
if ( defined( $self->{_inh_header} ) ) { |
2325
|
0
|
|
|
|
|
0
|
my $l = $self->{_inh_header}; |
2326
|
0
|
|
|
|
|
0
|
my $inmemh; |
2327
|
0
|
0
|
|
|
|
0
|
if ( !open( $inmemh, '<', \$l ) ) { ## no critic (InputOutput::RequireBriefOpen) |
2328
|
0
|
|
|
|
|
0
|
$self->_print_error( |
2329
|
|
|
|
|
|
|
"can't open header line in-memory. CSV read aborted."); |
2330
|
0
|
|
|
|
|
0
|
return 0; |
2331
|
|
|
|
|
|
|
} |
2332
|
0
|
|
|
|
|
0
|
@headers = @{ _mygetline( $incsv, $inmemh ) }; |
|
0
|
|
|
|
|
0
|
|
2333
|
|
|
|
|
|
|
} |
2334
|
|
|
|
|
|
|
else { |
2335
|
344
|
|
|
|
|
705
|
my $r = _mygetline( $incsv, $inh ); |
2336
|
342
|
50
|
|
|
|
15499
|
@headers = @{$r} if defined($r); |
|
342
|
|
|
|
|
1168
|
|
2337
|
|
|
|
|
|
|
} |
2338
|
|
|
|
|
|
|
print( $_debugh "Line " |
2339
|
|
|
|
|
|
|
. $self->{_row_read} |
2340
|
|
|
|
|
|
|
. ":\n--\n" |
2341
|
|
|
|
|
|
|
. join( '::', @headers ) |
2342
|
|
|
|
|
|
|
. "\n--\n" ) |
2343
|
342
|
50
|
|
|
|
1068
|
if $self->{_debug_read}; |
2344
|
|
|
|
|
|
|
} |
2345
|
|
|
|
|
|
|
|
2346
|
353
|
100
|
100
|
|
|
1513
|
if ( $has_headers and ( !defined $self->{fields_column_names} ) ) { |
2347
|
336
|
|
|
|
|
545
|
my %indexes; |
2348
|
336
|
100
|
|
|
|
782
|
if ( defined( $self->{fields_hr} ) ) { |
2349
|
12
|
50
|
|
|
|
56
|
if ( |
2350
|
|
|
|
|
|
|
!$self->_process_header( |
2351
|
|
|
|
|
|
|
\@headers, $self->{fields_hr}, \%indexes |
2352
|
|
|
|
|
|
|
) |
2353
|
|
|
|
|
|
|
) |
2354
|
|
|
|
|
|
|
{ |
2355
|
0
|
|
|
|
|
0
|
$self->_print_error("missing headers. CSV read aborted."); |
2356
|
0
|
|
|
|
|
0
|
return 0; |
2357
|
|
|
|
|
|
|
} |
2358
|
12
|
50
|
|
|
|
34
|
if ($_debug) { |
2359
|
0
|
|
|
|
|
0
|
print( $_debugh " \%indexes:\n" ); |
2360
|
0
|
|
|
|
|
0
|
for my $k ( sort keys %indexes ) { |
2361
|
0
|
|
|
|
|
0
|
print( $_debugh " \t$k => $indexes{$k}\n" ); |
2362
|
|
|
|
|
|
|
} |
2363
|
|
|
|
|
|
|
} |
2364
|
12
|
|
|
|
|
68
|
for ( sort keys %indexes ) { |
2365
|
48
|
50
|
|
|
|
90
|
next if $_ eq ''; |
2366
|
48
|
|
|
|
|
95
|
$columns[ $indexes{$_} ] = $_; |
2367
|
|
|
|
|
|
|
} |
2368
|
|
|
|
|
|
|
} |
2369
|
|
|
|
|
|
|
else { |
2370
|
324
|
|
|
|
|
772
|
@columns = @headers; |
2371
|
324
|
|
|
|
|
632
|
map { _header_to_field_name } @columns; |
|
1265
|
|
|
|
|
7838
|
|
2372
|
|
|
|
|
|
|
} |
2373
|
|
|
|
|
|
|
} |
2374
|
|
|
|
|
|
|
|
2375
|
14
|
|
|
|
|
46
|
@columns = @{ $self->{fields_column_names} } |
2376
|
353
|
100
|
|
|
|
1107
|
if defined( $self->{fields_column_names} ); |
2377
|
|
|
|
|
|
|
|
2378
|
|
|
|
|
|
|
# Avoid undef in column names... I prefer empty strings |
2379
|
353
|
100
|
|
|
|
685
|
@columns = map { defined($_) ? $_ : '' } @columns; |
|
1389
|
|
|
|
|
3204
|
|
2380
|
|
|
|
|
|
|
|
2381
|
353
|
50
|
|
|
|
841
|
if ($_debug) { |
2382
|
0
|
|
|
|
|
0
|
print( $_debugh "-- CSV headers management\n" ); |
2383
|
0
|
0
|
|
|
|
0
|
if (@columns) { |
2384
|
0
|
|
|
|
|
0
|
printf( $_debugh " %-3s %-40s %-40s\n", |
2385
|
|
|
|
|
|
|
'COL', 'CSV Header', 'Hash Key' ); |
2386
|
0
|
|
|
|
|
0
|
for my $i ( 0 .. $#columns ) { |
2387
|
0
|
|
|
|
|
0
|
my $h = ''; |
2388
|
0
|
0
|
|
|
|
0
|
$h = $headers[$i] if defined( $headers[$i] ); |
2389
|
0
|
|
|
|
|
0
|
printf( $_debugh " %03d %-40s %-40s\n", |
2390
|
|
|
|
|
|
|
$i, "'$h'", "'$columns[$i]'" ); |
2391
|
|
|
|
|
|
|
} |
2392
|
|
|
|
|
|
|
} |
2393
|
|
|
|
|
|
|
else { |
2394
|
0
|
|
|
|
|
0
|
print( $_debugh " No headers\n" ); |
2395
|
|
|
|
|
|
|
} |
2396
|
|
|
|
|
|
|
} |
2397
|
|
|
|
|
|
|
|
2398
|
353
|
|
|
|
|
511
|
my %regular_named_fields; |
2399
|
353
|
|
|
|
|
1027
|
for my $i ( 0 .. $#columns ) { |
2400
|
1389
|
100
|
66
|
|
|
5341
|
$regular_named_fields{ $columns[$i] } = $i |
2401
|
|
|
|
|
|
|
if defined( $columns[$i] ) |
2402
|
|
|
|
|
|
|
and $columns[$i] ne ''; |
2403
|
|
|
|
|
|
|
} |
2404
|
353
|
|
|
|
|
1773
|
$self->{_regular_named_fields} = {%regular_named_fields}; |
2405
|
353
|
|
|
|
|
1243
|
$self->{_S2_columns} = [@columns]; |
2406
|
353
|
100
|
|
|
|
1256
|
$self->{_S2_headers} = [@headers] if $has_headers; |
2407
|
|
|
|
|
|
|
|
2408
|
353
|
|
|
|
|
1363
|
return 1; |
2409
|
|
|
|
|
|
|
} |
2410
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
sub out_header { |
2412
|
8
|
|
|
8
|
1
|
17
|
my $self = shift; |
2413
|
8
|
|
|
|
|
108
|
validate_pos( @_, { type => SCALAR }, { type => SCALAR } ); |
2414
|
|
|
|
|
|
|
|
2415
|
8
|
|
|
|
|
34
|
my ( $field, $header ) = @_; |
2416
|
8
|
100
|
|
|
|
25
|
$self->{_out_headers} = {} unless exists $self->{_out_headers}; |
2417
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
$self->_print_warning("out_header: field $field already set") |
2419
|
8
|
50
|
|
|
|
23
|
if exists $self->{_out_headers}->{$field}; |
2420
|
|
|
|
|
|
|
|
2421
|
8
|
|
|
|
|
21
|
$self->{_out_headers}->{$field} = $header; |
2422
|
|
|
|
|
|
|
|
2423
|
8
|
|
|
|
|
35
|
return $self; |
2424
|
|
|
|
|
|
|
} |
2425
|
|
|
|
|
|
|
|
2426
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2427
|
|
|
|
|
|
|
sub _S3_init_fields_extra { |
2428
|
355
|
|
|
355
|
|
591
|
my $self = shift; |
2429
|
|
|
|
|
|
|
|
2430
|
355
|
|
|
|
|
633
|
my $_debug = $self->{_debug}; |
2431
|
355
|
|
|
|
|
591
|
my $_debugh = $self->{_debugh}; |
2432
|
|
|
|
|
|
|
|
2433
|
355
|
|
|
|
|
564
|
my $verbose = $self->{verbose}; |
2434
|
|
|
|
|
|
|
|
2435
|
355
|
|
|
|
|
546
|
my $has_headers = $self->{has_headers}; |
2436
|
|
|
|
|
|
|
|
2437
|
355
|
|
|
|
|
471
|
my %named_fields = %{ $self->{_regular_named_fields} }; |
|
355
|
|
|
|
|
1619
|
|
2438
|
355
|
|
|
|
|
730
|
my @columns = @{ $self->{_S2_columns} }; |
|
355
|
|
|
|
|
917
|
|
2439
|
355
|
|
|
|
|
554
|
my @headers; |
2440
|
355
|
100
|
|
|
|
798
|
@headers = @{ $self->{_S2_headers} } if $has_headers; |
|
344
|
|
|
|
|
856
|
|
2441
|
|
|
|
|
|
|
|
2442
|
355
|
|
|
|
|
629
|
my @extra_fields_indexes; |
2443
|
|
|
|
|
|
|
my @extra_fields_definitions_list; |
2444
|
36
|
|
|
|
|
70
|
@extra_fields_definitions_list = @{ $self->{_extra_fields} } |
2445
|
355
|
100
|
|
|
|
808
|
if exists $self->{_extra_fields}; |
2446
|
355
|
|
|
|
|
579
|
my %extra_fields_definitions; |
2447
|
|
|
|
|
|
|
|
2448
|
|
|
|
|
|
|
my @multiline; |
2449
|
355
|
100
|
|
|
|
843
|
@multiline = @{ $self->{_multiline} } if defined( $self->{_multiline} ); |
|
340
|
|
|
|
|
641
|
|
2450
|
|
|
|
|
|
|
|
2451
|
355
|
|
|
|
|
502
|
my @coldata; |
2452
|
355
|
|
|
|
|
819
|
for my $i ( 0 .. $#columns ) { |
2453
|
1395
|
|
|
|
|
46899
|
my $col = $columns[$i]; |
2454
|
1395
|
|
|
|
|
1771
|
my $h; |
2455
|
1395
|
100
|
|
|
|
2637
|
$h = $headers[$i] if $has_headers; |
2456
|
1395
|
100
|
|
|
|
23846
|
push @coldata, |
2457
|
|
|
|
|
|
|
ColData->new( |
2458
|
|
|
|
|
|
|
field_name => $col, |
2459
|
|
|
|
|
|
|
header_text => $h, |
2460
|
|
|
|
|
|
|
description => '', |
2461
|
|
|
|
|
|
|
multiline => ( $multiline[$i] ? 'm' : '1' ) |
2462
|
|
|
|
|
|
|
); |
2463
|
|
|
|
|
|
|
} |
2464
|
|
|
|
|
|
|
|
2465
|
355
|
|
|
|
|
15094
|
for my $edef (@extra_fields_definitions_list) { |
2466
|
92
|
|
|
|
|
3788
|
my $c = $edef->check_field_existence; |
2467
|
92
|
100
|
|
|
|
612
|
if ( defined($c) ) { |
2468
|
80
|
100
|
|
|
|
178
|
unless ( exists $named_fields{$c} ) { |
2469
|
9
|
|
|
|
|
112
|
$self->_print_error( |
2470
|
|
|
|
|
|
|
"unknown field '" . $edef->check_field_existence . "'", |
2471
|
|
|
|
|
|
|
0, ERR_UNKNOWN_FIELD, {%named_fields} ); |
2472
|
8
|
|
|
|
|
21
|
next; |
2473
|
|
|
|
|
|
|
} |
2474
|
|
|
|
|
|
|
} |
2475
|
|
|
|
|
|
|
|
2476
|
83
|
|
|
|
|
139
|
my @e_eclated = $edef; |
2477
|
|
|
|
|
|
|
|
2478
|
83
|
100
|
100
|
|
|
970
|
if ( $edef->ef_type == $EF_LINK and $edef->link_remote_read eq '*' ) { |
2479
|
2
|
|
|
|
|
68
|
my @cols = $edef->link_remote_obj->get_fields_names(); |
2480
|
|
|
|
|
|
|
|
2481
|
2
|
|
|
|
|
4
|
@e_eclated = (); |
2482
|
2
|
|
|
|
|
8
|
my %nf = %named_fields; |
2483
|
|
|
|
|
|
|
|
2484
|
2
|
|
|
|
|
5
|
for my $c (@cols) { |
2485
|
|
|
|
|
|
|
|
2486
|
4
|
|
|
|
|
59
|
my $ex_base = $edef->self_name . $c; |
2487
|
4
|
|
|
|
|
27
|
my $ex_target = $ex_base; |
2488
|
4
|
|
|
|
|
7
|
my $i = 1; |
2489
|
4
|
|
|
|
|
11
|
while ( exists $nf{$ex_target} ) { |
2490
|
1
|
|
|
|
|
2
|
$i++; |
2491
|
1
|
|
|
|
|
5
|
$ex_target = $ex_base . '_' . $i; |
2492
|
|
|
|
|
|
|
} |
2493
|
|
|
|
|
|
|
|
2494
|
4
|
|
|
|
|
50
|
my $e = ExtraField->new( |
2495
|
|
|
|
|
|
|
ef_type => $EF_LINK, |
2496
|
|
|
|
|
|
|
self_name => $ex_target, |
2497
|
|
|
|
|
|
|
description => $edef->description . " ($c)", |
2498
|
|
|
|
|
|
|
|
2499
|
|
|
|
|
|
|
link_self_search => $edef->link_self_search, |
2500
|
|
|
|
|
|
|
link_remote_obj => $edef->link_remote_obj, |
2501
|
|
|
|
|
|
|
link_remote_search => $edef->link_remote_search, |
2502
|
|
|
|
|
|
|
link_remote_read => $c, |
2503
|
|
|
|
|
|
|
|
2504
|
|
|
|
|
|
|
link_vlookup_opts => $edef->link_vlookup_opts |
2505
|
|
|
|
|
|
|
); |
2506
|
4
|
|
|
|
|
354
|
push @e_eclated, $e; |
2507
|
4
|
|
|
|
|
14
|
$nf{$ex_target} = undef; |
2508
|
|
|
|
|
|
|
} |
2509
|
|
|
|
|
|
|
} |
2510
|
|
|
|
|
|
|
|
2511
|
83
|
|
|
|
|
1166
|
for my $e1 (@e_eclated) { |
2512
|
85
|
100
|
|
|
|
1126
|
if ( exists $named_fields{ $e1->self_name } ) { |
2513
|
6
|
|
|
|
|
101
|
$self->_print_error( "extra field: duplicate field name: '" |
2514
|
|
|
|
|
|
|
. $e1->self_name |
2515
|
|
|
|
|
|
|
. "'" ); |
2516
|
6
|
|
|
|
|
17
|
next; |
2517
|
|
|
|
|
|
|
} |
2518
|
|
|
|
|
|
|
|
2519
|
79
|
|
|
|
|
523
|
my $index_of_new_element = $#columns + 1; |
2520
|
79
|
|
|
|
|
117
|
push @extra_fields_indexes, $index_of_new_element; |
2521
|
79
|
|
|
|
|
925
|
$columns[$index_of_new_element] = $e1->self_name; |
2522
|
79
|
|
|
|
|
1304
|
$named_fields{ $e1->self_name } = $index_of_new_element; |
2523
|
79
|
|
|
|
|
1248
|
$extra_fields_definitions{ $e1->self_name } = $e1; |
2524
|
|
|
|
|
|
|
|
2525
|
79
|
50
|
|
|
|
1325
|
push @headers, $e1->self_name if $has_headers; |
2526
|
79
|
|
|
|
|
1244
|
push @coldata, |
2527
|
|
|
|
|
|
|
ColData->new( |
2528
|
|
|
|
|
|
|
field_name => $e1->self_name, |
2529
|
|
|
|
|
|
|
header_text => $e1->self_name, |
2530
|
|
|
|
|
|
|
description => $e1->description, |
2531
|
|
|
|
|
|
|
multiline => '?' |
2532
|
|
|
|
|
|
|
); |
2533
|
|
|
|
|
|
|
} |
2534
|
|
|
|
|
|
|
|
2535
|
|
|
|
|
|
|
} |
2536
|
354
|
100
|
|
|
|
3314
|
$self->{_headers} = [@headers] if $has_headers; |
2537
|
354
|
|
|
|
|
770
|
$self->{_extra_fields_indexes} = [@extra_fields_indexes]; |
2538
|
354
|
|
|
|
|
1075
|
$self->{_columns} = [@columns]; |
2539
|
354
|
|
|
|
|
858
|
$self->{_extra_fields_definitions} = {%extra_fields_definitions}; |
2540
|
|
|
|
|
|
|
|
2541
|
354
|
|
|
|
|
1651
|
$self->{_named_fields} = {%named_fields}; |
2542
|
|
|
|
|
|
|
|
2543
|
354
|
|
|
|
|
1269
|
$self->_detect_dates_formats(); |
2544
|
|
|
|
|
|
|
|
2545
|
351
|
|
|
|
|
1478
|
$self->{_read_update_after_ar} = []; |
2546
|
351
|
|
|
|
|
931
|
$self->{_write_update_before_ar} = []; |
2547
|
351
|
|
|
|
|
508
|
my @dates_formats; |
2548
|
60
|
|
|
|
|
231
|
@dates_formats = @{ $self->{_dates_formats} } |
2549
|
351
|
100
|
|
|
|
843
|
if defined( $self->{_dates_formats} ); |
2550
|
351
|
|
|
|
|
958
|
for my $i ( 0 .. $#columns ) { |
2551
|
1455
|
|
|
|
|
2097
|
my $dt_format; |
2552
|
|
|
|
|
|
|
my $dt_locale; |
2553
|
1455
|
100
|
|
|
|
2652
|
if ( defined( $dates_formats[$i] ) ) { |
2554
|
330
|
|
|
|
|
598
|
$dt_format = $dates_formats[$i]->[0]; |
2555
|
330
|
|
|
|
|
506
|
$dt_locale = $dates_formats[$i]->[1]; |
2556
|
|
|
|
|
|
|
} |
2557
|
1455
|
|
|
|
|
21525
|
$coldata[$i]->dt_format($dt_format); |
2558
|
1455
|
|
|
|
|
25988
|
$coldata[$i]->dt_locale($dt_locale); |
2559
|
|
|
|
|
|
|
|
2560
|
1455
|
100
|
|
|
|
9777
|
next unless defined($dt_format); |
2561
|
|
|
|
|
|
|
|
2562
|
330
|
|
|
|
|
449
|
my %opts_in; |
2563
|
330
|
100
|
66
|
|
|
1077
|
$opts_in{locale} = $dt_locale |
2564
|
|
|
|
|
|
|
if defined($dt_locale) |
2565
|
|
|
|
|
|
|
and $dt_locale ne ''; |
2566
|
|
|
|
|
|
|
|
2567
|
330
|
|
|
|
|
4295
|
my $obj_strptime_in = |
2568
|
|
|
|
|
|
|
_build_strptime_parser( pattern => $dt_format, %opts_in ); |
2569
|
|
|
|
|
|
|
|
2570
|
330
|
|
|
|
|
74967
|
my %opts_out; |
2571
|
|
|
|
|
|
|
my $loc_out = ( |
2572
|
|
|
|
|
|
|
exists $self->{out_dates_locale} |
2573
|
|
|
|
|
|
|
? $self->{out_dates_locale} |
2574
|
330
|
50
|
|
|
|
786
|
: $dt_locale |
2575
|
|
|
|
|
|
|
); |
2576
|
330
|
100
|
66
|
|
|
1285
|
$opts_out{locale} = $loc_out if defined($loc_out) and $loc_out ne ''; |
2577
|
|
|
|
|
|
|
my $obj_strptime_out = _build_strptime_parser( |
2578
|
|
|
|
|
|
|
pattern => ( |
2579
|
|
|
|
|
|
|
exists $self->{out_dates_format} |
2580
|
|
|
|
|
|
|
? $self->{out_dates_format} |
2581
|
330
|
50
|
|
|
|
4947
|
: $dt_format |
2582
|
|
|
|
|
|
|
), |
2583
|
|
|
|
|
|
|
%opts_out |
2584
|
|
|
|
|
|
|
); |
2585
|
|
|
|
|
|
|
|
2586
|
330
|
|
|
|
|
6947
|
my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty}; |
2587
|
330
|
|
|
|
|
849
|
my $in_file_disp = $self->get_in_file_disp(); |
2588
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
$self->{_read_update_after_ar}->[$i] = sub { |
2590
|
|
|
|
|
|
|
return |
2591
|
352
|
100
|
66
|
352
|
|
1657
|
if ( !defined $_ ) |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
2592
|
|
|
|
|
|
|
or $_ eq '' |
2593
|
|
|
|
|
|
|
or ( defined($refsub_is_datetime_empty) |
2594
|
|
|
|
|
|
|
and $refsub_is_datetime_empty->($_) ); |
2595
|
|
|
|
|
|
|
|
2596
|
315
|
|
|
|
|
447
|
my $s = $_[0]; |
2597
|
315
|
|
|
|
|
603
|
my $field = _get_def( $_[1], '<?>' ); |
2598
|
|
|
|
|
|
|
|
2599
|
315
|
|
|
|
|
905
|
my $dt = $obj_strptime_in->parse_datetime($_); |
2600
|
|
|
|
|
|
|
|
2601
|
315
|
0
|
33
|
|
|
167140
|
if ( $_debug |
|
|
|
33
|
|
|
|
|
2602
|
|
|
|
|
|
|
and $DEBUG_DATETIME_FORMATS |
2603
|
|
|
|
|
|
|
and $DEBUG_DATETIME_FORMATS_EVEN_MORE ) |
2604
|
|
|
|
|
|
|
{ |
2605
|
0
|
0
|
|
|
|
0
|
print( $_debugh "-- Record " |
2606
|
|
|
|
|
|
|
. $s->get_recnum() |
2607
|
|
|
|
|
|
|
. ", field '$field':\n String parsed: '$_'\n" |
2608
|
|
|
|
|
|
|
. " Parse format: '$dt_format'\n" |
2609
|
|
|
|
|
|
|
. " DateTime obj: <" |
2610
|
|
|
|
|
|
|
. ( defined($dt) ? $dt . '' : 'undef' ) |
2611
|
|
|
|
|
|
|
. ">\n" ); |
2612
|
|
|
|
|
|
|
} |
2613
|
|
|
|
|
|
|
|
2614
|
315
|
100
|
|
|
|
643
|
if ( !defined($dt) ) { |
2615
|
2
|
|
|
|
|
10
|
my $recnum = $s->get_recnum(); |
2616
|
2
|
50
|
|
|
|
6
|
if ($verbose) { |
2617
|
0
|
|
|
|
|
0
|
$s->_print( "$PKG: " |
2618
|
|
|
|
|
|
|
. "$in_file_disp: record $recnum: field $field: " |
2619
|
|
|
|
|
|
|
. "unable to parse DateTime\n" ); |
2620
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: field: '$_'\n"); |
2621
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: format: '$dt_format'\n"); |
2622
|
0
|
0
|
|
|
|
0
|
$s->_print( "$PKG: " |
2623
|
|
|
|
|
|
|
. "locale: '" |
2624
|
|
|
|
|
|
|
. ( $dt_locale eq '' ? '<none>' : $dt_locale ) |
2625
|
|
|
|
|
|
|
. "'\n" ); |
2626
|
0
|
|
|
|
|
0
|
$s->_print( "$PKG: " |
2627
|
|
|
|
|
|
|
. "Probable cause: when detecting DateTime format, " |
2628
|
|
|
|
|
|
|
. "$PKG will stop reading\n" ); |
2629
|
0
|
|
|
|
|
0
|
$s->_print( "$PKG: " |
2630
|
|
|
|
|
|
|
. "input as soon as the format is worked out. " |
2631
|
|
|
|
|
|
|
. "If a value found later\n" ); |
2632
|
0
|
|
|
|
|
0
|
$s->_print( "$PKG: " |
2633
|
|
|
|
|
|
|
. "turns out to use another DateTime format, " |
2634
|
|
|
|
|
|
|
. "it'll generate a DateTime\n" ); |
2635
|
0
|
|
|
|
|
0
|
$s->_print("$PKG: parse error, as is the case now.\n"); |
2636
|
0
|
|
|
|
|
0
|
$s->_print_error("unable to parse DateTime"); |
2637
|
|
|
|
|
|
|
} |
2638
|
|
|
|
|
|
|
else { |
2639
|
2
|
|
|
|
|
18
|
$s->_print_error( |
2640
|
|
|
|
|
|
|
"$in_file_disp: record $recnum: field $field: " |
2641
|
|
|
|
|
|
|
. "unable to parse DateTime '$_'" ); |
2642
|
|
|
|
|
|
|
} |
2643
|
|
|
|
|
|
|
} |
2644
|
|
|
|
|
|
|
|
2645
|
314
|
|
|
|
|
545
|
return $dt; |
2646
|
330
|
|
|
|
|
2241
|
}; |
2647
|
|
|
|
|
|
|
$self->{_write_update_before_ar}->[$i] = sub { |
2648
|
96
|
100
|
|
96
|
|
181
|
return '' unless defined($_); |
2649
|
87
|
100
|
|
|
|
188
|
return $_ if !ref($_); |
2650
|
81
|
50
|
|
|
|
231
|
return $_ unless $_->isa('DateTime'); |
2651
|
|
|
|
|
|
|
|
2652
|
81
|
|
|
|
|
259
|
my $str = $obj_strptime_out->format_datetime($_); |
2653
|
|
|
|
|
|
|
|
2654
|
81
|
50
|
|
|
|
14831
|
if ( !defined($str) ) { |
2655
|
0
|
|
|
|
|
0
|
my $s = $_[0]; |
2656
|
0
|
|
|
|
|
0
|
my $recnum = $s->get_recnum(); |
2657
|
0
|
|
|
|
|
0
|
my $field = _get_def( $_[1], '<?>' ); |
2658
|
0
|
|
|
|
|
0
|
$s->_print_error( |
2659
|
|
|
|
|
|
|
"$in_file_disp: record $recnum: field $field: " |
2660
|
|
|
|
|
|
|
. "unable to print DateTime '$_'" ); |
2661
|
|
|
|
|
|
|
} |
2662
|
|
|
|
|
|
|
|
2663
|
81
|
|
|
|
|
164
|
return $str; |
2664
|
330
|
|
|
|
|
1479
|
}; |
2665
|
|
|
|
|
|
|
} |
2666
|
|
|
|
|
|
|
|
2667
|
351
|
|
|
|
|
1352
|
$self->{_coldata} = [@coldata]; |
2668
|
|
|
|
|
|
|
|
2669
|
351
|
|
|
|
|
1321
|
my @loop = ( |
2670
|
|
|
|
|
|
|
[ '_read_update_after_hr', '_read_update_after_ar', 'read post' ], |
2671
|
|
|
|
|
|
|
[ '_write_update_before_hr', '_write_update_before_ar', 'write pre' ] |
2672
|
|
|
|
|
|
|
); |
2673
|
351
|
|
|
|
|
955
|
for my $ii ( 0 .. $#loop ) { |
2674
|
702
|
|
|
|
|
1074
|
my $l = $loop[$ii]; |
2675
|
|
|
|
|
|
|
|
2676
|
702
|
|
|
|
|
1283
|
my $ht = $self->{ $l->[0] }; |
2677
|
702
|
|
|
|
|
907
|
my @subrefs = @{ $self->{ $l->[1] } }; |
|
702
|
|
|
|
|
1421
|
|
2678
|
702
|
|
|
|
|
953
|
for my $field ( keys %{$ht} ) { |
|
702
|
|
|
|
|
1503
|
|
2679
|
70
|
50
|
|
|
|
136
|
unless ( exists $named_fields{$field} ) { |
2680
|
0
|
|
|
|
|
0
|
$self->_print_error( $l->[2] . ": unknown field '$field'", |
2681
|
|
|
|
|
|
|
0, ERR_UNKNOWN_FIELD, {%named_fields} ); |
2682
|
0
|
|
|
|
|
0
|
next; |
2683
|
|
|
|
|
|
|
} |
2684
|
|
|
|
|
|
|
|
2685
|
70
|
|
|
|
|
93
|
my $i = $named_fields{$field}; |
2686
|
|
|
|
|
|
|
|
2687
|
70
|
|
|
|
|
106
|
my @allsubs; |
2688
|
70
|
|
|
|
|
86
|
push @allsubs, @{ $ht->{$field} }; |
|
70
|
|
|
|
|
119
|
|
2689
|
70
|
100
|
|
|
|
128
|
if ( defined( $subrefs[$i] ) ) { |
2690
|
2
|
50
|
|
|
|
6
|
unshift @allsubs, $subrefs[$i] if $ii == 0; |
2691
|
2
|
50
|
|
|
|
4
|
push @allsubs, $subrefs[$i] if $ii == 1; |
2692
|
|
|
|
|
|
|
} |
2693
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
my $finalsub = sub { |
2695
|
196
|
|
|
196
|
|
253
|
for my $s (@allsubs) { |
2696
|
262
|
|
|
|
|
802
|
$_ = $s->(@_); |
2697
|
|
|
|
|
|
|
} |
2698
|
189
|
|
|
|
|
3462
|
return $_; |
2699
|
70
|
|
|
|
|
198
|
}; |
2700
|
70
|
|
|
|
|
138
|
$subrefs[$i] = $finalsub; |
2701
|
|
|
|
|
|
|
|
2702
|
|
|
|
|
|
|
} |
2703
|
702
|
|
|
|
|
1859
|
$self->{ $l->[1] } = [@subrefs]; |
2704
|
|
|
|
|
|
|
} |
2705
|
|
|
|
|
|
|
|
2706
|
351
|
|
|
|
|
601
|
my $tmp; |
2707
|
351
|
|
|
|
|
1337
|
$tmp = _get_def( $self->{out_fields}, $self->{write_fields} ); |
2708
|
351
|
|
|
|
|
826
|
my @wf; |
2709
|
351
|
100
|
|
|
|
817
|
@wf = @$tmp if defined $tmp; |
2710
|
351
|
|
|
|
|
571
|
my $count_field_not_found = 0; |
2711
|
351
|
|
|
|
|
770
|
for (@wf) { |
2712
|
16
|
100
|
66
|
|
|
102
|
next if ( !defined $_ ) or $_ eq '' or exists $named_fields{$_}; |
|
|
|
100
|
|
|
|
|
2713
|
3
|
|
|
|
|
6
|
$count_field_not_found++; |
2714
|
3
|
|
|
|
|
25
|
$self->_print_error( "out_fields: unknown field '$_'", |
2715
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, {%named_fields} ); |
2716
|
|
|
|
|
|
|
} |
2717
|
351
|
100
|
|
|
|
751
|
if ($count_field_not_found) { |
2718
|
2
|
|
|
|
|
18
|
$self->_print_error("non existent field(s) encountered"); |
2719
|
1
|
|
|
|
|
10
|
delete $self->{out_fields}; |
2720
|
1
|
|
|
|
|
4
|
delete $self->{write_fields}; |
2721
|
|
|
|
|
|
|
} |
2722
|
|
|
|
|
|
|
|
2723
|
350
|
|
|
|
|
507
|
my %sh; |
2724
|
350
|
100
|
|
|
|
819
|
%sh = %{ $self->{_out_headers} } if defined( $self->{_out_headers} ); |
|
4
|
|
|
|
|
19
|
|
2725
|
350
|
|
|
|
|
559
|
$count_field_not_found = 0; |
2726
|
350
|
|
|
|
|
837
|
for ( keys %sh ) { |
2727
|
8
|
100
|
33
|
|
|
47
|
next if ( !defined $_ ) or $_ eq '' or exists $named_fields{$_}; |
|
|
|
66
|
|
|
|
|
2728
|
2
|
|
|
|
|
3
|
$count_field_not_found++; |
2729
|
2
|
|
|
|
|
13
|
$self->_print_error( "out_header: unknown field '$_'", |
2730
|
|
|
|
|
|
|
1, ERR_UNKNOWN_FIELD, {%named_fields} ); |
2731
|
|
|
|
|
|
|
} |
2732
|
350
|
100
|
|
|
|
668
|
$self->_print_error("non existent field(s) encountered") |
2733
|
|
|
|
|
|
|
if $count_field_not_found; |
2734
|
|
|
|
|
|
|
|
2735
|
349
|
|
|
|
|
2311
|
return 1; |
2736
|
|
|
|
|
|
|
} |
2737
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
# |
2739
|
|
|
|
|
|
|
# Return 0 if there's no more records (error or eof reached), 1 if a record got |
2740
|
|
|
|
|
|
|
# read successfully. |
2741
|
|
|
|
|
|
|
# |
2742
|
|
|
|
|
|
|
# If return value is 1: |
2743
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to array ref and hash ref of the record, |
2744
|
|
|
|
|
|
|
# respectively |
2745
|
|
|
|
|
|
|
# |
2746
|
|
|
|
|
|
|
# If return value is 0: |
2747
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to undef if an error occured |
2748
|
|
|
|
|
|
|
# $$ref_ar and $$ref_hr are set to a scalar if eof reached |
2749
|
|
|
|
|
|
|
# |
2750
|
|
|
|
|
|
|
sub _read_one_record_from_input { |
2751
|
2341
|
|
|
2341
|
|
4250
|
my ( $self, $ref_ar, $ref_row_hr ) = @_; |
2752
|
|
|
|
|
|
|
|
2753
|
2341
|
|
|
|
|
3549
|
my $_debug = $self->{_debug}; |
2754
|
2341
|
|
|
|
|
3369
|
my $_debug_extra_fields = $self->{_debug_extra_fields}; |
2755
|
2341
|
|
|
|
|
3050
|
my $_debugh = $self->{_debugh}; |
2756
|
|
|
|
|
|
|
|
2757
|
2341
|
|
|
|
|
4328
|
my $in_file_disp = $self->get_in_file_disp(); |
2758
|
|
|
|
|
|
|
|
2759
|
2341
|
|
|
|
|
3477
|
my $incsv = $self->{_in_csvobj}; |
2760
|
2341
|
|
|
|
|
2822
|
my $ar; |
2761
|
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
print( $_debugh "$PKG: '$in_file_disp': will read line #" |
2763
|
|
|
|
|
|
|
. ( $self->{_row_read} + 1 ) |
2764
|
|
|
|
|
|
|
. "\n" ) |
2765
|
2341
|
50
|
|
|
|
4048
|
if $self->{_debug_read}; |
2766
|
|
|
|
|
|
|
|
2767
|
2341
|
100
|
|
|
|
4056
|
unless ( $ar = _mygetline( $incsv, $self->{_inh} ) ) { |
2768
|
286
|
50
|
|
|
|
10007
|
if ( !$incsv->eof() ) { |
2769
|
0
|
|
|
|
|
0
|
my ( $code, $str, $pos ) = $incsv->error_diag(); |
2770
|
0
|
|
|
|
|
0
|
$self->_print_error( "$code: $str, record " |
2771
|
|
|
|
|
|
|
. $incsv->record_number |
2772
|
|
|
|
|
|
|
. ", position $pos" ); |
2773
|
0
|
|
|
|
|
0
|
$$ref_ar = undef; |
2774
|
0
|
|
|
|
|
0
|
$$ref_row_hr = undef; |
2775
|
|
|
|
|
|
|
} |
2776
|
|
|
|
|
|
|
else { |
2777
|
286
|
|
|
|
|
1781
|
$$ref_ar = 1; |
2778
|
286
|
|
|
|
|
683
|
$$ref_row_hr = 1; |
2779
|
|
|
|
|
|
|
} |
2780
|
|
|
|
|
|
|
|
2781
|
286
|
|
|
|
|
861
|
$self->_close_inh(); |
2782
|
|
|
|
|
|
|
|
2783
|
286
|
|
|
|
|
944
|
return 0; |
2784
|
|
|
|
|
|
|
} |
2785
|
|
|
|
|
|
|
|
2786
|
2055
|
|
|
|
|
58344
|
$self->{_row_read}++; |
2787
|
|
|
|
|
|
|
|
2788
|
2055
|
|
|
|
|
2789
|
my %named_fields = %{ $self->{_named_fields} }; |
|
2055
|
|
|
|
|
9074
|
|
2789
|
|
|
|
|
|
|
|
2790
|
2055
|
50
|
|
|
|
4801
|
if ( $self->{_debug_read} ) { |
2791
|
0
|
|
|
|
|
0
|
print( $_debugh "Line " . $self->{_row_read} . ":\n--\n" ); |
2792
|
0
|
|
|
|
|
0
|
for ( sort keys %named_fields ) { |
2793
|
0
|
|
|
|
|
0
|
my $c = _get_def( $ar->[ $named_fields{$_} ], '<undef>' ); |
2794
|
0
|
|
|
|
|
0
|
print( $_debugh " $_ => '" . $c . "'\n" ); |
2795
|
|
|
|
|
|
|
} |
2796
|
|
|
|
|
|
|
} |
2797
|
|
|
|
|
|
|
|
2798
|
2055
|
|
|
|
|
2949
|
my $columns_ar = $self->{_columns}; |
2799
|
|
|
|
|
|
|
|
2800
|
2055
|
|
|
|
|
2871
|
my $no_undef = $self->{no_undef}; |
2801
|
2055
|
100
|
|
|
|
3325
|
if ($no_undef) { |
2802
|
30
|
|
|
|
|
37
|
for ( 0 .. $#{$columns_ar} ) { |
|
30
|
|
|
|
|
68
|
|
2803
|
324
|
100
|
|
|
|
575
|
$ar->[$_] = '' unless defined( $ar->[$_] ); |
2804
|
|
|
|
|
|
|
} |
2805
|
|
|
|
|
|
|
} |
2806
|
|
|
|
|
|
|
|
2807
|
2055
|
|
|
|
|
2988
|
my $row_hr = {}; |
2808
|
|
|
|
|
|
|
$row_hr->{$_} = $ar->[ $self->{_regular_named_fields}->{$_} ] |
2809
|
2055
|
|
|
|
|
2751
|
foreach keys %{ $self->{_regular_named_fields} }; |
|
2055
|
|
|
|
|
12825
|
|
2810
|
|
|
|
|
|
|
|
2811
|
2055
|
|
|
|
|
4016
|
my $rpost = $self->{_read_update_after_ar}; |
2812
|
2055
|
|
|
|
|
2729
|
for my $i ( 0 .. $#{$columns_ar} ) { |
|
2055
|
|
|
|
|
4431
|
|
2813
|
10586
|
|
|
|
|
13242
|
my $subref = $rpost->[$i]; |
2814
|
10586
|
100
|
|
|
|
17051
|
next unless defined($subref); |
2815
|
|
|
|
|
|
|
|
2816
|
479
|
|
|
|
|
545
|
do { |
2817
|
479
|
|
|
|
|
650
|
my $field = $columns_ar->[$i]; |
2818
|
479
|
|
|
|
|
746
|
local $_ = $ar->[$i]; |
2819
|
479
|
|
|
|
|
820
|
my $new_val = $subref->( $self, $field ); |
2820
|
474
|
|
|
|
|
820
|
$ar->[$i] = $new_val; |
2821
|
474
|
50
|
|
|
|
1515
|
$row_hr->{$field} = $new_val if defined($field); |
2822
|
|
|
|
|
|
|
} |
2823
|
|
|
|
|
|
|
|
2824
|
|
|
|
|
|
|
} |
2825
|
|
|
|
|
|
|
|
2826
|
2050
|
|
|
|
|
2815
|
for my $i ( @{ $self->{_extra_fields_indexes} } ) { |
|
2050
|
|
|
|
|
3742
|
|
2827
|
442
|
|
|
|
|
599
|
my $name = $columns_ar->[$i]; |
2828
|
442
|
|
|
|
|
628
|
my $e = $self->{_extra_fields_definitions}->{$name}; |
2829
|
|
|
|
|
|
|
|
2830
|
442
|
50
|
|
|
|
714
|
print( $_debugh "Extra field: #$i: $name\n" ) if $_debug_extra_fields; |
2831
|
|
|
|
|
|
|
|
2832
|
442
|
|
|
|
|
483
|
my $value; |
2833
|
442
|
100
|
|
|
|
6481
|
if ( $e->ef_type == $EF_LINK ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2834
|
|
|
|
|
|
|
|
2835
|
238
|
50
|
|
|
|
1592
|
print( $_debugh " linked field\n" ) if $_debug_extra_fields; |
2836
|
|
|
|
|
|
|
|
2837
|
238
|
|
|
|
|
2874
|
my $remobj = $e->link_remote_obj; |
2838
|
|
|
|
|
|
|
$value = |
2839
|
|
|
|
|
|
|
$remobj->vlookup( $e->link_remote_search, |
2840
|
238
|
|
|
|
|
3799
|
$ar->[ $named_fields{ $e->link_self_search } ], |
2841
|
|
|
|
|
|
|
$e->link_remote_read, $e->link_vlookup_opts ); |
2842
|
|
|
|
|
|
|
|
2843
|
|
|
|
|
|
|
} |
2844
|
|
|
|
|
|
|
elsif ( $e->ef_type == $EF_FUNC ) { |
2845
|
|
|
|
|
|
|
|
2846
|
51
|
50
|
|
|
|
1085
|
print( $_debugh " computed field\n" ) if $_debug_extra_fields; |
2847
|
|
|
|
|
|
|
|
2848
|
51
|
|
|
|
|
622
|
$value = $e->func_sub->( $name, $row_hr, $self->{_stats} ); |
2849
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
} |
2851
|
|
|
|
|
|
|
elsif ( $e->ef_type == $EF_COPY ) { |
2852
|
|
|
|
|
|
|
|
2853
|
153
|
50
|
|
|
|
5323
|
print( $_debugh " copy field\n" ) if $_debug_extra_fields; |
2854
|
|
|
|
|
|
|
|
2855
|
153
|
|
|
|
|
1768
|
my $input = $row_hr->{ $e->copy_source }; |
2856
|
153
|
50
|
33
|
|
|
988
|
$input = '' if ( !defined $input ) and $no_undef; |
2857
|
153
|
100
|
|
|
|
1709
|
if ( defined( $e->copy_sub ) ) { |
2858
|
57
|
|
|
|
|
347
|
local $_ = $input; |
2859
|
57
|
|
|
|
|
641
|
$value = $e->copy_sub->(); |
2860
|
|
|
|
|
|
|
} |
2861
|
|
|
|
|
|
|
else { |
2862
|
96
|
|
|
|
|
576
|
$value = $input; |
2863
|
|
|
|
|
|
|
} |
2864
|
|
|
|
|
|
|
|
2865
|
153
|
50
|
|
|
|
868
|
print( $_debugh " in: '$input', out: '$value'\n" ) |
2866
|
|
|
|
|
|
|
if $_debug_extra_fields; |
2867
|
|
|
|
|
|
|
|
2868
|
|
|
|
|
|
|
} |
2869
|
|
|
|
|
|
|
else { |
2870
|
0
|
|
|
|
|
0
|
confess "Unknown ef_type '" |
2871
|
|
|
|
|
|
|
. $e->ef_type |
2872
|
|
|
|
|
|
|
. "', check this module' code urgently!"; |
2873
|
|
|
|
|
|
|
} |
2874
|
|
|
|
|
|
|
|
2875
|
440
|
100
|
100
|
|
|
1456
|
$value = '' if ( !defined $value ) and $no_undef; |
2876
|
440
|
|
|
|
|
755
|
$ar->[$i] = $value; |
2877
|
440
|
|
|
|
|
694
|
$row_hr->{$name} = $value; |
2878
|
|
|
|
|
|
|
|
2879
|
440
|
50
|
|
|
|
854
|
print( $_debugh " $name => '$value'\n" ) if $_debug_extra_fields; |
2880
|
|
|
|
|
|
|
|
2881
|
|
|
|
|
|
|
} |
2882
|
|
|
|
|
|
|
|
2883
|
2048
|
100
|
|
|
|
3762
|
if ( defined( $self->{read_post_update_hr} ) ) { |
2884
|
|
|
|
|
|
|
$self->{read_post_update_hr} |
2885
|
33
|
|
|
|
|
78
|
->( $row_hr, $self->{_stats}, $self->get_recnum() ); |
2886
|
33
|
|
|
|
|
264
|
$ar->[ $named_fields{$_} ] = $row_hr->{$_} foreach keys %named_fields; |
2887
|
|
|
|
|
|
|
} |
2888
|
|
|
|
|
|
|
|
2889
|
2048
|
100
|
|
|
|
5774
|
lock_keys(%$row_hr) if $self->{croak_if_error}; |
2890
|
|
|
|
|
|
|
|
2891
|
|
|
|
|
|
|
$self->{walker_ar}->( $ar, $self->{_stats}, $self->get_recnum() ) |
2892
|
2048
|
100
|
|
|
|
13469
|
if defined( $self->{walker_ar} ); |
2893
|
|
|
|
|
|
|
$self->{walker_hr}->( $row_hr, $self->{_stats}, $self->get_recnum() ) |
2894
|
2048
|
100
|
|
|
|
4136
|
if defined( $self->{walker_hr} ); |
2895
|
|
|
|
|
|
|
|
2896
|
2047
|
|
|
|
|
3249
|
$$ref_ar = $ar; |
2897
|
2047
|
|
|
|
|
4093
|
$$ref_row_hr = $row_hr; |
2898
|
|
|
|
|
|
|
|
2899
|
2047
|
|
|
|
|
5928
|
return 1; |
2900
|
|
|
|
|
|
|
} |
2901
|
|
|
|
|
|
|
|
2902
|
|
|
|
|
|
|
sub _open_read { |
2903
|
297
|
|
|
297
|
|
476
|
my $self = shift; |
2904
|
|
|
|
|
|
|
|
2905
|
297
|
|
|
|
|
499
|
my $verbose = $self->{verbose}; |
2906
|
297
|
|
|
|
|
700
|
my $in_file_disp = $self->get_in_file_disp(); |
2907
|
|
|
|
|
|
|
|
2908
|
297
|
|
|
|
|
687
|
$self->{_stats} = {}; |
2909
|
297
|
|
|
|
|
531
|
$self->{_read_in_progress} = 1; |
2910
|
|
|
|
|
|
|
|
2911
|
297
|
50
|
|
|
|
739
|
$self->_print("-- $in_file_disp reading start\n") if $verbose; |
2912
|
|
|
|
|
|
|
} |
2913
|
|
|
|
|
|
|
|
2914
|
|
|
|
|
|
|
sub _close_read { |
2915
|
316
|
|
|
316
|
|
490
|
my $self = shift; |
2916
|
316
|
|
|
|
|
441
|
my $keep_quiet = shift; |
2917
|
|
|
|
|
|
|
|
2918
|
316
|
|
|
|
|
505
|
my $verbose = $self->{verbose}; |
2919
|
316
|
|
|
|
|
623
|
my $in_file_disp = $self->get_in_file_disp(); |
2920
|
|
|
|
|
|
|
|
2921
|
316
|
|
|
|
|
568
|
$self->{_read_in_progress} = 0; |
2922
|
|
|
|
|
|
|
|
2923
|
316
|
50
|
33
|
|
|
721
|
if ( $verbose and ( !$keep_quiet ) ) { |
2924
|
|
|
|
|
|
|
$self->_print( "-- $in_file_disp reading end: " |
2925
|
|
|
|
|
|
|
. $self->{_row_read} |
2926
|
0
|
|
|
|
|
0
|
. " row(s) read\n" ); |
2927
|
0
|
|
|
|
|
0
|
for my $k ( sort keys %{ $self->{_stats} } ) { |
|
0
|
|
|
|
|
0
|
|
2928
|
0
|
|
|
|
|
0
|
$self->_printf( " %7d %s\n", $self->{_stats}->{$k}, $k ); |
2929
|
|
|
|
|
|
|
} |
2930
|
|
|
|
|
|
|
} |
2931
|
|
|
|
|
|
|
|
2932
|
316
|
|
|
|
|
617
|
$self->{_nb_rows} = $self->{_row_read}; |
2933
|
|
|
|
|
|
|
} |
2934
|
|
|
|
|
|
|
|
2935
|
|
|
|
|
|
|
# Return 0 if error, 1 if all good |
2936
|
|
|
|
|
|
|
sub _S4_read_all_in_mem { |
2937
|
167
|
|
|
167
|
|
267
|
my $self = shift; |
2938
|
|
|
|
|
|
|
|
2939
|
167
|
|
|
|
|
469
|
$self->_register_pass("_S4_read_all_in_mem()"); |
2940
|
|
|
|
|
|
|
|
2941
|
167
|
|
|
|
|
486
|
$self->_open_read(); |
2942
|
|
|
|
|
|
|
|
2943
|
167
|
|
|
|
|
259
|
my $ar; |
2944
|
|
|
|
|
|
|
my $row_hr; |
2945
|
167
|
|
|
|
|
498
|
while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) { |
2946
|
|
|
|
|
|
|
|
2947
|
1553
|
|
|
|
|
2124
|
push @{ $self->{_flat} }, $ar; |
|
1553
|
|
|
|
|
4335
|
|
2948
|
|
|
|
|
|
|
|
2949
|
|
|
|
|
|
|
} |
2950
|
|
|
|
|
|
|
|
2951
|
165
|
50
|
|
|
|
451
|
my $retcode = ( defined($ar) ? 1 : 0 ); |
2952
|
165
|
|
|
|
|
482
|
$self->_update_in_mem_record_count(); |
2953
|
|
|
|
|
|
|
|
2954
|
165
|
|
|
|
|
456
|
$self->_close_read(); |
2955
|
|
|
|
|
|
|
|
2956
|
165
|
|
|
|
|
409
|
return $retcode; |
2957
|
|
|
|
|
|
|
} |
2958
|
|
|
|
|
|
|
|
2959
|
|
|
|
|
|
|
sub _chain_array { |
2960
|
24
|
|
|
24
|
|
140
|
return split( /\s*->\s*/, $_[0] ); |
2961
|
|
|
|
|
|
|
} |
2962
|
|
|
|
|
|
|
|
2963
|
|
|
|
|
|
|
sub _chain_str { |
2964
|
2
|
|
|
2
|
|
6
|
return join( '->', @_ ); |
2965
|
|
|
|
|
|
|
} |
2966
|
|
|
|
|
|
|
|
2967
|
|
|
|
|
|
|
sub field_add_link { |
2968
|
25
|
|
|
25
|
1
|
4901
|
my $self = shift; |
2969
|
|
|
|
|
|
|
|
2970
|
25
|
|
|
|
|
756
|
validate_pos( |
2971
|
|
|
|
|
|
|
@_, |
2972
|
|
|
|
|
|
|
{ type => UNDEF | SCALAR }, |
2973
|
|
|
|
|
|
|
{ type => SCALAR }, |
2974
|
|
|
|
|
|
|
{ type => SCALAR | OBJECT }, |
2975
|
|
|
|
|
|
|
{ type => HASHREF, optional => 1 } |
2976
|
|
|
|
|
|
|
); |
2977
|
|
|
|
|
|
|
|
2978
|
22
|
|
|
|
|
95
|
my ( $new_field, $chain, $obj, $param_opts ) = @_; |
2979
|
|
|
|
|
|
|
|
2980
|
22
|
|
|
|
|
39
|
my $croak_if_error = $self->{croak_if_error}; |
2981
|
22
|
|
|
|
|
36
|
my $_debug = $self->{_debug}; |
2982
|
22
|
|
|
|
|
34
|
my $_debugh = $self->{_debugh}; |
2983
|
|
|
|
|
|
|
|
2984
|
22
|
|
|
|
|
51
|
my @c = _chain_array($chain); |
2985
|
22
|
100
|
|
|
|
52
|
$new_field = $c[2] unless defined($new_field); |
2986
|
|
|
|
|
|
|
|
2987
|
22
|
50
|
|
|
|
47
|
print( $_debugh |
2988
|
|
|
|
|
|
|
"Registering new linked field, new_field = '$new_field', chain = '$chain'\n" |
2989
|
|
|
|
|
|
|
) if $_debug; |
2990
|
|
|
|
|
|
|
|
2991
|
22
|
100
|
66
|
|
|
101
|
unless ( @c == 3 and $c[2] ne '' ) { |
2992
|
1
|
|
|
|
|
7
|
$self->_print_error( "wrong links chain parameter: '$chain', " |
2993
|
|
|
|
|
|
|
. "look for CHAIN in Text::AutoCSV manual for help" ); |
2994
|
1
|
|
|
|
|
9
|
return; |
2995
|
|
|
|
|
|
|
} |
2996
|
|
|
|
|
|
|
|
2997
|
21
|
50
|
|
|
|
52
|
return unless $self->_status_forward('S2'); |
2998
|
21
|
50
|
|
|
|
52
|
return unless $self->_status_backward('S2'); |
2999
|
|
|
|
|
|
|
|
3000
|
21
|
|
|
|
|
36
|
my @tmp; |
3001
|
21
|
100
|
|
|
|
47
|
@tmp = %{$param_opts} if $param_opts; |
|
13
|
|
|
|
|
42
|
|
3002
|
21
|
|
|
|
|
479
|
my %opts = validate( @tmp, $SEARCH_VALIDATE_OPTIONS ); |
3003
|
|
|
|
|
|
|
|
3004
|
20
|
|
|
|
|
73
|
my $target_name = ''; |
3005
|
20
|
100
|
|
|
|
46
|
if ( ref $obj eq '' ) { |
3006
|
19
|
|
|
|
|
27
|
my $in_file = $obj; |
3007
|
19
|
|
|
|
|
27
|
$target_name = $in_file; |
3008
|
|
|
|
|
|
|
|
3009
|
|
|
|
|
|
|
# |
3010
|
|
|
|
|
|
|
# TODO (?) |
3011
|
|
|
|
|
|
|
# |
3012
|
|
|
|
|
|
|
# Take into account the fact that the OS' file system is case insensitive. At |
3013
|
|
|
|
|
|
|
# the moment, two different strings (even if identical in a case insensitive |
3014
|
|
|
|
|
|
|
# comparison) will be managed as being distinct. |
3015
|
|
|
|
|
|
|
# I put a question mark in this TO DO - after all, the user of this module had |
3016
|
|
|
|
|
|
|
# better use same case when dealing with multiple links of the same file. |
3017
|
|
|
|
|
|
|
# |
3018
|
|
|
|
|
|
|
# Also, tuning this module' behavior depending on the OS' characteristics |
3019
|
|
|
|
|
|
|
# would be not ideal, it'd add a level of complexity to understand how it |
3020
|
|
|
|
|
|
|
# works and what to expect. |
3021
|
|
|
|
|
|
|
# |
3022
|
19
|
100
|
100
|
|
|
72
|
if ( exists $self->{_obj} and exists $self->{_obj}->{$in_file} ) { |
3023
|
|
|
|
|
|
|
|
3024
|
4
|
50
|
|
|
|
9
|
print( $_debugh |
3025
|
|
|
|
|
|
|
"field_add_link: file '$in_file': re-using existing Text::AutoCSV object\n" |
3026
|
|
|
|
|
|
|
) if $_debug; |
3027
|
|
|
|
|
|
|
|
3028
|
4
|
|
|
|
|
7
|
$obj = $self->{_obj}->{$in_file}; |
3029
|
|
|
|
|
|
|
} |
3030
|
|
|
|
|
|
|
else { |
3031
|
|
|
|
|
|
|
|
3032
|
15
|
50
|
|
|
|
31
|
print( $_debugh |
3033
|
|
|
|
|
|
|
"field_add_link: file '$in_file': creating new Text::AutoCSV object\n" |
3034
|
|
|
|
|
|
|
) if $_debug; |
3035
|
|
|
|
|
|
|
|
3036
|
15
|
100
|
|
|
|
46
|
$self->{_obj} = {} unless exists $self->{_obj}; |
3037
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
# |
3039
|
|
|
|
|
|
|
# The created Text::AutoCSV must be created with the same search options as what |
3040
|
|
|
|
|
|
|
# is currently found in $self. |
3041
|
|
|
|
|
|
|
# |
3042
|
|
|
|
|
|
|
# Why? |
3043
|
|
|
|
|
|
|
# Because the link is populated doing a vlookup on the remote object ($obj |
3044
|
|
|
|
|
|
|
# below), not on $self. Therefore, if we don't "propagate" search options from |
3045
|
|
|
|
|
|
|
# $self to $obj, search tunnings won't work as user would expect. |
3046
|
|
|
|
|
|
|
# |
3047
|
15
|
|
|
|
|
20
|
my %search_opts; |
3048
|
15
|
|
|
|
|
30
|
for ( |
3049
|
|
|
|
|
|
|
qw(search_case search_trim search_ignore_empty |
3050
|
|
|
|
|
|
|
search_ignore_accents search_value_if_not_found |
3051
|
|
|
|
|
|
|
search_value_if_ambiguous search_ignore_ambiguous) |
3052
|
|
|
|
|
|
|
) |
3053
|
|
|
|
|
|
|
{ |
3054
|
|
|
|
|
|
|
# We assign depending on whether or not the attribute EXISTS - the definedness |
3055
|
|
|
|
|
|
|
# is not appropriate, in case an attribute would have been assigned to undef. |
3056
|
105
|
100
|
|
|
|
179
|
$search_opts{$_} = $self->{$_} if exists $self->{$_}; |
3057
|
|
|
|
|
|
|
} |
3058
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
$obj = Text::AutoCSV->new( |
3060
|
|
|
|
|
|
|
in_file => $in_file, |
3061
|
|
|
|
|
|
|
verbose => $self->{verbose}, |
3062
|
|
|
|
|
|
|
infoh => $self->{infoh}, |
3063
|
|
|
|
|
|
|
_debug => $self->{debug}, |
3064
|
|
|
|
|
|
|
_debugh => $self->{debugh}, |
3065
|
15
|
|
|
|
|
95
|
%search_opts |
3066
|
|
|
|
|
|
|
); |
3067
|
15
|
|
|
|
|
55
|
$self->{_obj}->{$in_file} = $obj; |
3068
|
|
|
|
|
|
|
} |
3069
|
|
|
|
|
|
|
} |
3070
|
|
|
|
|
|
|
else { |
3071
|
1
|
|
|
|
|
57
|
$target_name = '(object)'; |
3072
|
1
|
50
|
|
|
|
5
|
print( $_debugh "field_add_link: Text::AutoCSV object provided\n" ) |
3073
|
|
|
|
|
|
|
if $_debug; |
3074
|
|
|
|
|
|
|
} |
3075
|
|
|
|
|
|
|
|
3076
|
20
|
100
|
|
|
|
55
|
$self->{_extra_fields} = [] unless exists $self->{_extra_fields}; |
3077
|
|
|
|
|
|
|
|
3078
|
20
|
|
|
|
|
30
|
push @{ $self->{_extra_fields} }, ExtraField->new( |
|
20
|
|
|
|
|
450
|
|
3079
|
|
|
|
|
|
|
ef_type => $EF_LINK, |
3080
|
|
|
|
|
|
|
self_name => $new_field, |
3081
|
|
|
|
|
|
|
description => "link: $target_name, chain: $chain", |
3082
|
|
|
|
|
|
|
check_field_existence => $c[0], |
3083
|
|
|
|
|
|
|
|
3084
|
|
|
|
|
|
|
link_self_search => $c[0], |
3085
|
|
|
|
|
|
|
link_remote_obj => $obj, |
3086
|
|
|
|
|
|
|
link_remote_search => $c[1], |
3087
|
|
|
|
|
|
|
link_remote_read => $c[2], |
3088
|
|
|
|
|
|
|
|
3089
|
|
|
|
|
|
|
link_vlookup_opts => \%opts |
3090
|
|
|
|
|
|
|
); |
3091
|
|
|
|
|
|
|
|
3092
|
20
|
|
|
|
|
2172
|
return $self; |
3093
|
|
|
|
|
|
|
} |
3094
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
sub links { |
3096
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
3097
|
|
|
|
|
|
|
|
3098
|
2
|
|
|
|
|
31
|
validate_pos( |
3099
|
|
|
|
|
|
|
@_, |
3100
|
|
|
|
|
|
|
{ type => UNDEF | SCALAR }, |
3101
|
|
|
|
|
|
|
{ type => SCALAR }, |
3102
|
|
|
|
|
|
|
{ type => SCALAR | OBJECT }, |
3103
|
|
|
|
|
|
|
{ type => HASHREF, optional => 1 } |
3104
|
|
|
|
|
|
|
); |
3105
|
|
|
|
|
|
|
|
3106
|
2
|
|
|
|
|
7
|
my $prefix_field = shift; |
3107
|
2
|
|
|
|
|
4
|
my $chain = shift; |
3108
|
2
|
|
|
|
|
4
|
my ( $obj, $param_opts ) = @_; |
3109
|
|
|
|
|
|
|
|
3110
|
2
|
|
|
|
|
6
|
my @c = _chain_array($chain); |
3111
|
|
|
|
|
|
|
|
3112
|
2
|
50
|
33
|
|
|
57
|
if ( @c != 2 or $c[0] eq '' or $c[1] eq '' ) { |
|
|
|
33
|
|
|
|
|
3113
|
0
|
|
|
|
|
0
|
$self->_print_error( "wrong links chain parameter: '$chain', " |
3114
|
|
|
|
|
|
|
. "look for JOINCHAIN in Text::AutoCSV manual for help" ); |
3115
|
0
|
|
|
|
|
0
|
return; |
3116
|
|
|
|
|
|
|
} |
3117
|
|
|
|
|
|
|
|
3118
|
2
|
100
|
|
|
|
8
|
$prefix_field = '' unless defined($prefix_field); |
3119
|
2
|
|
|
|
|
6
|
my $chain2 = _chain_str( @c, '*' ); |
3120
|
|
|
|
|
|
|
|
3121
|
2
|
|
|
|
|
8
|
return $self->field_add_link( $prefix_field, $chain2, @_ ); |
3122
|
|
|
|
|
|
|
} |
3123
|
|
|
|
|
|
|
|
3124
|
|
|
|
|
|
|
sub field_add_computed { |
3125
|
7
|
|
|
7
|
1
|
2410
|
my $self = shift; |
3126
|
|
|
|
|
|
|
|
3127
|
7
|
|
|
|
|
202
|
validate_pos( @_, { type => SCALAR }, { type => CODEREF } ); |
3128
|
6
|
|
|
|
|
24
|
my ( $new_field, $func ) = @_; |
3129
|
|
|
|
|
|
|
|
3130
|
6
|
|
|
|
|
15
|
my $croak_if_error = $self->{croak_if_error}; |
3131
|
|
|
|
|
|
|
|
3132
|
6
|
|
|
|
|
9
|
my $_debug = $self->{_debug}; |
3133
|
6
|
|
|
|
|
13
|
my $_debugh = $self->{_debugh}; |
3134
|
|
|
|
|
|
|
|
3135
|
6
|
50
|
|
|
|
19
|
print( $_debugh |
3136
|
|
|
|
|
|
|
"Registering new computed field, new_field = '$new_field'\n" ) |
3137
|
|
|
|
|
|
|
if $_debug; |
3138
|
|
|
|
|
|
|
|
3139
|
6
|
100
|
|
|
|
18
|
return unless $self->_status_forward('S2'); |
3140
|
5
|
50
|
|
|
|
14
|
return unless $self->_status_backward('S2'); |
3141
|
|
|
|
|
|
|
|
3142
|
5
|
|
|
|
|
10
|
push @{ $self->{_extra_fields} }, ExtraField->new( |
|
5
|
|
|
|
|
108
|
|
3143
|
|
|
|
|
|
|
ef_type => $EF_FUNC, |
3144
|
|
|
|
|
|
|
self_name => $new_field, |
3145
|
|
|
|
|
|
|
description => "computed", |
3146
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
func_sub => $func |
3148
|
|
|
|
|
|
|
); |
3149
|
|
|
|
|
|
|
|
3150
|
5
|
|
|
|
|
449
|
return $self; |
3151
|
|
|
|
|
|
|
} |
3152
|
|
|
|
|
|
|
|
3153
|
|
|
|
|
|
|
sub field_add_copy { |
3154
|
14
|
|
|
14
|
1
|
8161
|
my $self = shift; |
3155
|
|
|
|
|
|
|
|
3156
|
14
|
|
|
|
|
199
|
validate_pos( |
3157
|
|
|
|
|
|
|
@_, |
3158
|
|
|
|
|
|
|
{ type => SCALAR }, |
3159
|
|
|
|
|
|
|
{ type => SCALAR }, |
3160
|
|
|
|
|
|
|
{ type => CODEREF, optional => 1 } |
3161
|
|
|
|
|
|
|
); |
3162
|
14
|
|
|
|
|
56
|
my ( $new_field, $copy_source, $func ) = @_; |
3163
|
|
|
|
|
|
|
|
3164
|
14
|
|
|
|
|
29
|
my $croak_if_error = $self->{croak_if_error}; |
3165
|
|
|
|
|
|
|
|
3166
|
14
|
|
|
|
|
25
|
my $_debug = $self->{_debug}; |
3167
|
14
|
|
|
|
|
25
|
my $_debugh = $self->{_debugh}; |
3168
|
|
|
|
|
|
|
|
3169
|
14
|
50
|
|
|
|
37
|
print( $_debugh |
3170
|
|
|
|
|
|
|
"Registering field copy, new_field = '$new_field' copied from '$copy_source'\n" |
3171
|
|
|
|
|
|
|
) if $_debug; |
3172
|
|
|
|
|
|
|
|
3173
|
14
|
100
|
|
|
|
31
|
return unless $self->_status_forward('S2'); |
3174
|
12
|
50
|
|
|
|
45
|
return unless $self->_status_backward('S2'); |
3175
|
|
|
|
|
|
|
|
3176
|
12
|
100
|
|
|
|
24
|
push @{ $self->{_extra_fields} }, ExtraField->new( |
|
12
|
|
|
|
|
284
|
|
3177
|
|
|
|
|
|
|
ef_type => $EF_COPY, |
3178
|
|
|
|
|
|
|
self_name => $new_field, |
3179
|
|
|
|
|
|
|
description => "copy of $copy_source " |
3180
|
|
|
|
|
|
|
. ( defined($func) ? '(with sub)' : '(no sub)' ), |
3181
|
|
|
|
|
|
|
check_field_existence => $copy_source, |
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
copy_source => $copy_source, |
3184
|
|
|
|
|
|
|
copy_sub => $func |
3185
|
|
|
|
|
|
|
); |
3186
|
|
|
|
|
|
|
|
3187
|
12
|
|
|
|
|
1058
|
return $self; |
3188
|
|
|
|
|
|
|
} |
3189
|
|
|
|
|
|
|
|
3190
|
|
|
|
|
|
|
sub in_map { |
3191
|
15
|
|
|
15
|
1
|
1355
|
my $self = shift; |
3192
|
|
|
|
|
|
|
|
3193
|
15
|
|
|
|
|
36
|
return $self->read_update_after(@_); |
3194
|
|
|
|
|
|
|
} |
3195
|
|
|
|
|
|
|
|
3196
|
|
|
|
|
|
|
sub read_update_after { |
3197
|
16
|
|
|
16
|
1
|
25
|
my $self = shift; |
3198
|
16
|
|
|
|
|
191
|
validate_pos( @_, { type => SCALAR }, { type => CODEREF } ); |
3199
|
|
|
|
|
|
|
|
3200
|
16
|
|
|
|
|
52
|
my ( $field, $subref ) = @_; |
3201
|
|
|
|
|
|
|
|
3202
|
16
|
|
|
|
|
31
|
my $_debug = $self->{_debug}; |
3203
|
16
|
|
|
|
|
23
|
my $_debugh = $self->{_debugh}; |
3204
|
|
|
|
|
|
|
|
3205
|
16
|
50
|
|
|
|
33
|
return unless $self->_status_forward('S2'); |
3206
|
16
|
50
|
|
|
|
36
|
return unless $self->_status_backward('S2'); |
3207
|
|
|
|
|
|
|
|
3208
|
16
|
50
|
|
|
|
36
|
print( $_debugh "Registering read_post_update subref for field '$field'\n" ) |
3209
|
|
|
|
|
|
|
if $_debug; |
3210
|
|
|
|
|
|
|
|
3211
|
|
|
|
|
|
|
$self->{_read_update_after_hr}->{$field} = [] |
3212
|
16
|
100
|
|
|
|
50
|
unless defined( $self->{_read_update_after_hr}->{$field} ); |
3213
|
|
|
|
|
|
|
|
3214
|
16
|
|
|
|
|
20
|
push @{ $self->{_read_update_after_hr}->{$field} }, $subref; |
|
16
|
|
|
|
|
42
|
|
3215
|
|
|
|
|
|
|
|
3216
|
16
|
|
|
|
|
169
|
return $self; |
3217
|
|
|
|
|
|
|
} |
3218
|
|
|
|
|
|
|
|
3219
|
|
|
|
|
|
|
sub out_map { |
3220
|
11
|
|
|
11
|
1
|
19
|
my $self = shift; |
3221
|
|
|
|
|
|
|
|
3222
|
11
|
|
|
|
|
27
|
return $self->write_update_before(@_); |
3223
|
|
|
|
|
|
|
} |
3224
|
|
|
|
|
|
|
|
3225
|
|
|
|
|
|
|
sub write_update_before { |
3226
|
12
|
|
|
12
|
1
|
17
|
my $self = shift; |
3227
|
12
|
|
|
|
|
124
|
validate_pos( @_, { type => SCALAR }, { type => CODEREF } ); |
3228
|
|
|
|
|
|
|
|
3229
|
12
|
|
|
|
|
42
|
my ( $field, $subref ) = @_; |
3230
|
|
|
|
|
|
|
|
3231
|
12
|
|
|
|
|
20
|
my $_debug = $self->{_debug}; |
3232
|
12
|
|
|
|
|
19
|
my $_debugh = $self->{_debugh}; |
3233
|
|
|
|
|
|
|
|
3234
|
12
|
50
|
|
|
|
27
|
return unless $self->_status_forward('S2'); |
3235
|
12
|
50
|
|
|
|
23
|
return unless $self->_status_backward('S2'); |
3236
|
|
|
|
|
|
|
|
3237
|
12
|
50
|
|
|
|
25
|
print( $_debugh "Registering write_pre_update subref for field '$field'\n" ) |
3238
|
|
|
|
|
|
|
if $_debug; |
3239
|
|
|
|
|
|
|
|
3240
|
|
|
|
|
|
|
$self->{_write_update_before_hr}->{$field} = [] |
3241
|
12
|
100
|
|
|
|
45
|
unless defined( $self->{_write_update_before_hr}->{$field} ); |
3242
|
|
|
|
|
|
|
|
3243
|
12
|
|
|
|
|
19
|
push @{ $self->{_write_update_before_hr}->{$field} }, $subref; |
|
12
|
|
|
|
|
24
|
|
3244
|
|
|
|
|
|
|
|
3245
|
12
|
|
|
|
|
75
|
return $self; |
3246
|
|
|
|
|
|
|
} |
3247
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
sub reset_next_record_hr { |
3249
|
189
|
|
|
189
|
1
|
310
|
my $self = shift; |
3250
|
|
|
|
|
|
|
|
3251
|
189
|
|
|
|
|
973
|
validate_pos(@_); |
3252
|
|
|
|
|
|
|
|
3253
|
189
|
|
|
|
|
535
|
$self->{_current_record} = undef; |
3254
|
|
|
|
|
|
|
|
3255
|
189
|
|
|
|
|
355
|
return $self; |
3256
|
|
|
|
|
|
|
} |
3257
|
|
|
|
|
|
|
|
3258
|
|
|
|
|
|
|
sub _create_internal_column_name_from_its_number { |
3259
|
1120
|
|
|
1120
|
|
2326
|
return sprintf( "__%04i__", $_[0] ); |
3260
|
|
|
|
|
|
|
} |
3261
|
|
|
|
|
|
|
|
3262
|
|
|
|
|
|
|
sub _ar_to_hr { |
3263
|
2079
|
|
|
2079
|
|
2952
|
my $self = shift; |
3264
|
|
|
|
|
|
|
|
3265
|
2079
|
|
|
|
|
15865
|
validate_pos( @_, { type => ARRAYREF } ); |
3266
|
|
|
|
|
|
|
|
3267
|
2079
|
|
|
|
|
5752
|
my ($ar) = @_; |
3268
|
2079
|
|
|
|
|
2856
|
my $last_elem_index = scalar( @{$ar} ) - 1; |
|
2079
|
|
|
|
|
3828
|
|
3269
|
|
|
|
|
|
|
|
3270
|
2079
|
|
|
|
|
3679
|
my $nr = $self->{_named_fields}; |
3271
|
2079
|
|
|
|
|
3059
|
my %h; |
3272
|
|
|
|
|
|
|
my %n_seen; |
3273
|
2079
|
|
|
|
|
2700
|
for ( keys %{$nr} ) { |
|
2079
|
|
|
|
|
6083
|
|
3274
|
9934
|
|
|
|
|
19949
|
$h{$_} = $ar->[ $nr->{$_} ]; |
3275
|
9934
|
|
|
|
|
18336
|
undef $n_seen{ $nr->{$_} }; |
3276
|
|
|
|
|
|
|
} |
3277
|
2079
|
|
|
|
|
4919
|
for my $i ( 0 .. $last_elem_index ) { |
3278
|
11001
|
100
|
|
|
|
19943
|
if ( !exists( $n_seen{$i} ) ) { |
3279
|
1120
|
|
|
|
|
1575
|
my $k = _create_internal_column_name_from_its_number($i); |
3280
|
1120
|
50
|
|
|
|
2895
|
$h{$k} = $ar->[$i] if !exists $h{$k}; |
3281
|
|
|
|
|
|
|
} |
3282
|
|
|
|
|
|
|
} |
3283
|
|
|
|
|
|
|
|
3284
|
2079
|
100
|
|
|
|
6375
|
lock_keys(%h) if $self->{croak_if_error}; |
3285
|
|
|
|
|
|
|
|
3286
|
2079
|
|
|
|
|
18557
|
return \%h; |
3287
|
|
|
|
|
|
|
} |
3288
|
|
|
|
|
|
|
|
3289
|
|
|
|
|
|
|
sub get_next_record_hr { |
3290
|
2001
|
|
|
2001
|
1
|
3288
|
my $self = shift; |
3291
|
|
|
|
|
|
|
|
3292
|
2001
|
|
|
|
|
14019
|
validate_pos( @_, { type => SCALARREF, optional => 1 } ); |
3293
|
|
|
|
|
|
|
|
3294
|
2001
|
|
|
|
|
4712
|
my $refkey = $_[0]; |
3295
|
|
|
|
|
|
|
|
3296
|
2001
|
50
|
|
|
|
3754
|
return unless $self->_status_forward('S4'); |
3297
|
|
|
|
|
|
|
|
3298
|
1998
|
100
|
|
|
|
4304
|
if ( !defined( $self->{_current_record} ) ) { |
3299
|
186
|
|
|
|
|
358
|
$self->{_current_record} = 0; |
3300
|
|
|
|
|
|
|
} |
3301
|
|
|
|
|
|
|
else { |
3302
|
1812
|
|
|
|
|
2721
|
$self->{_current_record}++; |
3303
|
|
|
|
|
|
|
} |
3304
|
|
|
|
|
|
|
|
3305
|
1998
|
|
|
|
|
3499
|
my $ar = $self->{_flat}->[ $self->{_current_record} ]; |
3306
|
1998
|
100
|
|
|
|
3623
|
if ( !defined($ar) ) { |
3307
|
186
|
|
|
|
|
319
|
$self->{_current_record} = undef; |
3308
|
186
|
|
|
|
|
342
|
$$refkey = undef; |
3309
|
186
|
|
|
|
|
540
|
return; |
3310
|
|
|
|
|
|
|
} |
3311
|
|
|
|
|
|
|
|
3312
|
1812
|
|
|
|
|
2768
|
$$refkey = $self->{_current_record}; |
3313
|
|
|
|
|
|
|
|
3314
|
1812
|
|
|
|
|
3471
|
return $self->_ar_to_hr($ar); |
3315
|
|
|
|
|
|
|
} |
3316
|
|
|
|
|
|
|
|
3317
|
|
|
|
|
|
|
sub read { |
3318
|
60
|
|
|
60
|
1
|
20399
|
my $self = shift; |
3319
|
|
|
|
|
|
|
|
3320
|
60
|
|
|
|
|
399
|
validate_pos(@_); |
3321
|
|
|
|
|
|
|
|
3322
|
60
|
50
|
|
|
|
226
|
return unless $self->_status_backward('S3'); |
3323
|
|
|
|
|
|
|
|
3324
|
60
|
50
|
|
|
|
151
|
if ( exists $self->{out_orderby} ) { |
3325
|
0
|
0
|
|
|
|
0
|
return unless $self->_status_forward('S4'); |
3326
|
0
|
|
|
|
|
0
|
return $self; |
3327
|
|
|
|
|
|
|
} |
3328
|
|
|
|
|
|
|
|
3329
|
60
|
100
|
|
|
|
129
|
return unless $self->_status_forward('S3'); |
3330
|
|
|
|
|
|
|
|
3331
|
51
|
|
|
|
|
178
|
$self->_register_pass("read()"); |
3332
|
|
|
|
|
|
|
|
3333
|
51
|
|
|
|
|
149
|
$self->_open_read(); |
3334
|
|
|
|
|
|
|
|
3335
|
51
|
|
|
|
|
88
|
my $ar; |
3336
|
|
|
|
|
|
|
my $row_hr; |
3337
|
51
|
|
|
|
|
170
|
while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) { |
3338
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
# Ben oui quoi... qu'est-ce que l'on peut bien faire d'autre ? |
3340
|
|
|
|
|
|
|
} |
3341
|
|
|
|
|
|
|
|
3342
|
49
|
|
|
|
|
166
|
$self->_close_read(); |
3343
|
49
|
50
|
|
|
|
126
|
return unless defined($ar); |
3344
|
|
|
|
|
|
|
|
3345
|
49
|
50
|
|
|
|
127
|
return unless $self->_status_reset(); |
3346
|
|
|
|
|
|
|
|
3347
|
49
|
|
|
|
|
246
|
return $self; |
3348
|
|
|
|
|
|
|
} |
3349
|
|
|
|
|
|
|
|
3350
|
|
|
|
|
|
|
# |
3351
|
|
|
|
|
|
|
# Initially, _read_all_in_mem was intended for the test plan. |
3352
|
|
|
|
|
|
|
# |
3353
|
|
|
|
|
|
|
# Turned out to be sometimes useful for user, thus, is no longer private since |
3354
|
|
|
|
|
|
|
# 1.1.5. Private version below is kept for compatibility. |
3355
|
|
|
|
|
|
|
# |
3356
|
|
|
|
|
|
|
sub read_all_in_mem { |
3357
|
1
|
|
|
1
|
1
|
4
|
my $self = shift; |
3358
|
|
|
|
|
|
|
|
3359
|
1
|
|
|
|
|
6
|
return $self->_read_all_in_mem(); |
3360
|
|
|
|
|
|
|
} |
3361
|
|
|
|
|
|
|
|
3362
|
|
|
|
|
|
|
sub _read_all_in_mem { |
3363
|
9
|
|
|
9
|
|
1173
|
my $self = shift; |
3364
|
|
|
|
|
|
|
|
3365
|
9
|
50
|
|
|
|
29
|
return 0 unless $self->_status_backward('S3'); |
3366
|
9
|
50
|
|
|
|
24
|
return 0 unless $self->_status_forward('S4'); |
3367
|
|
|
|
|
|
|
|
3368
|
9
|
|
|
|
|
99
|
return $self; |
3369
|
|
|
|
|
|
|
} |
3370
|
|
|
|
|
|
|
|
3371
|
|
|
|
|
|
|
sub _render { |
3372
|
0
|
|
|
0
|
|
0
|
my $v = $_[0]; |
3373
|
|
|
|
|
|
|
|
3374
|
0
|
0
|
0
|
|
|
0
|
if ( length($v) == 1 and ord($v) < 32 ) { |
3375
|
0
|
|
|
|
|
0
|
my $n = ord($v); |
3376
|
0
|
0
|
|
|
|
0
|
return '\n' if $n == 10; |
3377
|
0
|
0
|
|
|
|
0
|
return '\r' if $n == 13; |
3378
|
0
|
0
|
|
|
|
0
|
return '\t' if $n == 9; |
3379
|
0
|
0
|
|
|
|
0
|
return '\f' if $n == 12; |
3380
|
0
|
0
|
|
|
|
0
|
return '\b' if $n == 8; |
3381
|
0
|
0
|
|
|
|
0
|
return '\a' if $n == 7; |
3382
|
0
|
0
|
|
|
|
0
|
return '\e' if $n == 27; |
3383
|
0
|
|
|
|
|
0
|
return '\0' . oct($n); |
3384
|
|
|
|
|
|
|
} |
3385
|
0
|
|
|
|
|
0
|
return $v; |
3386
|
|
|
|
|
|
|
} |
3387
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
sub print_id { |
3389
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
3390
|
|
|
|
|
|
|
|
3391
|
0
|
|
|
|
|
0
|
$self->_printf( "-- " . $self->get_in_file_disp() . ":\n" ); |
3392
|
0
|
|
|
|
|
0
|
$self->_printf( |
3393
|
|
|
|
|
|
|
"sep_char: " . _render( $self->get_sep_char() ) . "\n" ); |
3394
|
0
|
|
|
|
|
0
|
$self->_printf( |
3395
|
|
|
|
|
|
|
"escape_char: " . _render( $self->get_escape_char() ) . "\n" ); |
3396
|
0
|
|
|
|
|
0
|
$self->_printf( |
3397
|
|
|
|
|
|
|
"in_encoding: " . _render( $self->get_in_encoding() ) . "\n" ); |
3398
|
0
|
0
|
|
|
|
0
|
$self->_printf( "is_always_quoted: " |
3399
|
|
|
|
|
|
|
. ( $self->get_is_always_quoted() ? 'yes' : 'no' ) |
3400
|
|
|
|
|
|
|
. "\n" ); |
3401
|
|
|
|
|
|
|
|
3402
|
0
|
|
|
|
|
0
|
my @coldata = $self->get_coldata(); |
3403
|
0
|
|
|
|
|
0
|
my @disp = [ |
3404
|
|
|
|
|
|
|
'#', 'FIELD', |
3405
|
|
|
|
|
|
|
'HEADER', 'EXT DATA', |
3406
|
|
|
|
|
|
|
'DATETIME FORMAT', 'DATETIME LOCALE', |
3407
|
|
|
|
|
|
|
'ML' |
3408
|
|
|
|
|
|
|
]; |
3409
|
0
|
|
|
|
|
0
|
my $NCOLS = @{ $disp[0] }; |
|
0
|
|
|
|
|
0
|
|
3410
|
0
|
|
|
|
|
0
|
push @disp, [ map { my $s = $_; $s =~ s/./-/g; $s } @{ $disp[0] } ]; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
3411
|
|
|
|
|
|
|
|
3412
|
0
|
|
|
|
|
0
|
for my $i ( 0 .. $#coldata ) { |
3413
|
0
|
|
|
|
|
0
|
my $col = $coldata[$i]; |
3414
|
|
|
|
|
|
|
|
3415
|
0
|
|
|
|
|
0
|
my @row = "$i"; |
3416
|
|
|
|
|
|
|
push @row, ( defined( $col->[$_] ) ? ( $col->[$_] . '' ) : '' ) |
3417
|
0
|
0
|
|
|
|
0
|
for 0 .. $NCOLS - 2; |
3418
|
0
|
|
|
|
|
0
|
for (@row) { |
3419
|
0
|
|
|
|
|
0
|
s/\n/\\n/g; |
3420
|
0
|
|
|
|
|
0
|
s/\r/\\r/g; |
3421
|
0
|
|
|
|
|
0
|
s/\t/\\t/g; |
3422
|
|
|
|
|
|
|
} |
3423
|
0
|
|
|
|
|
0
|
push @disp, [@row]; |
3424
|
|
|
|
|
|
|
} |
3425
|
0
|
|
|
|
|
0
|
my $n = @{ $disp[-1] }; |
|
0
|
|
|
|
|
0
|
|
3426
|
0
|
|
|
|
|
0
|
my @max = (-1) x $n; |
3427
|
0
|
|
|
|
|
0
|
for my $l (@disp) { |
3428
|
0
|
0
|
|
|
|
0
|
do { $max[$_] = length( $l->[$_] ) if $max[$_] < length( $l->[$_] ) } |
3429
|
0
|
|
|
|
|
0
|
for ( 0 .. $n - 1 ); |
3430
|
|
|
|
|
|
|
} |
3431
|
0
|
|
|
|
|
0
|
my $s = join( ' ', map { "%-${_}s" } @max ); |
|
0
|
|
|
|
|
0
|
|
3432
|
0
|
|
|
|
|
0
|
$self->_print("\n"); |
3433
|
0
|
|
|
|
|
0
|
$self->_printf( "$s\n", @{$_} ) for (@disp); |
|
0
|
|
|
|
|
0
|
|
3434
|
|
|
|
|
|
|
} |
3435
|
|
|
|
|
|
|
|
3436
|
|
|
|
|
|
|
sub set_out_file { |
3437
|
2
|
|
|
2
|
1
|
3949
|
my $self = shift; |
3438
|
2
|
|
|
|
|
23
|
validate_pos( @_, { type => SCALAR } ); |
3439
|
|
|
|
|
|
|
|
3440
|
2
|
|
|
|
|
8
|
my ($out_file) = @_; |
3441
|
2
|
|
|
|
|
4
|
$self->{out_file} = $out_file; |
3442
|
|
|
|
|
|
|
|
3443
|
2
|
|
|
|
|
9
|
return $self; |
3444
|
|
|
|
|
|
|
} |
3445
|
|
|
|
|
|
|
|
3446
|
|
|
|
|
|
|
# Subrefs set with out_map |
3447
|
|
|
|
|
|
|
sub _execute_write_update_before { |
3448
|
367
|
|
|
367
|
|
654
|
my ( $self, $ar ) = @_; |
3449
|
|
|
|
|
|
|
|
3450
|
367
|
|
|
|
|
563
|
my $columns_ar = $self->{_columns}; |
3451
|
|
|
|
|
|
|
|
3452
|
367
|
|
|
|
|
561
|
my $wpre = $self->{_write_update_before_ar}; |
3453
|
367
|
|
|
|
|
504
|
for my $i ( 0 .. $#{$columns_ar} ) { |
|
367
|
|
|
|
|
814
|
|
3454
|
1177
|
|
|
|
|
1599
|
my $subref = $wpre->[$i]; |
3455
|
1177
|
100
|
|
|
|
2237
|
next unless defined($subref); |
3456
|
|
|
|
|
|
|
|
3457
|
159
|
|
|
|
|
174
|
do { |
3458
|
159
|
|
|
|
|
230
|
local $_ = $ar->[$i]; |
3459
|
159
|
|
|
|
|
218
|
my $field = $columns_ar->[$i]; |
3460
|
159
|
|
|
|
|
280
|
my $new_val = $subref->( $self, $field ); |
3461
|
156
|
|
|
|
|
345
|
$ar->[$i] = $new_val; |
3462
|
|
|
|
|
|
|
} |
3463
|
|
|
|
|
|
|
|
3464
|
|
|
|
|
|
|
} |
3465
|
|
|
|
|
|
|
} |
3466
|
|
|
|
|
|
|
|
3467
|
|
|
|
|
|
|
# Take into account write_fields if it got set |
3468
|
|
|
|
|
|
|
sub _apply_write_fields { |
3469
|
466
|
|
|
466
|
|
808
|
my ( $self, $ar ) = @_; |
3470
|
|
|
|
|
|
|
|
3471
|
466
|
|
|
|
|
597
|
my @final; |
3472
|
|
|
|
|
|
|
|
3473
|
466
|
|
|
|
|
1461
|
my $tmp = _get_def( $self->{out_fields}, $self->{write_fields} ); |
3474
|
466
|
|
|
|
|
865
|
my @wf; |
3475
|
466
|
100
|
|
|
|
823
|
@wf = @{$tmp} if defined($tmp); |
|
16
|
|
|
|
|
38
|
|
3476
|
|
|
|
|
|
|
|
3477
|
466
|
100
|
|
|
|
1047
|
return unless @wf; |
3478
|
|
|
|
|
|
|
|
3479
|
16
|
|
|
|
|
23
|
my %named_fields = %{ $self->{_named_fields} }; |
|
16
|
|
|
|
|
67
|
|
3480
|
16
|
|
|
|
|
103
|
for my $i ( 0 .. $#wf ) { |
3481
|
40
|
|
|
|
|
75
|
my $field = $wf[$i]; |
3482
|
40
|
|
|
|
|
59
|
my $tmp; |
3483
|
40
|
100
|
66
|
|
|
158
|
$tmp = $ar->[ $named_fields{$field} ] |
3484
|
|
|
|
|
|
|
if defined($field) |
3485
|
|
|
|
|
|
|
and $field ne ''; |
3486
|
|
|
|
|
|
|
|
3487
|
|
|
|
|
|
|
# Put here any post-processing of value |
3488
|
|
|
|
|
|
|
# WARNING |
3489
|
|
|
|
|
|
|
# $tmp can be undef |
3490
|
|
|
|
|
|
|
# ... |
3491
|
|
|
|
|
|
|
|
3492
|
40
|
|
|
|
|
101
|
$final[$i] = $tmp; |
3493
|
|
|
|
|
|
|
} |
3494
|
16
|
|
|
|
|
70
|
$_[1] = [@final]; |
3495
|
|
|
|
|
|
|
} |
3496
|
|
|
|
|
|
|
|
3497
|
|
|
|
|
|
|
sub write { |
3498
|
109
|
|
|
109
|
1
|
24409
|
my $self = shift; |
3499
|
|
|
|
|
|
|
|
3500
|
109
|
|
|
|
|
737
|
validate_pos(@_); |
3501
|
|
|
|
|
|
|
|
3502
|
109
|
50
|
|
|
|
339
|
return unless $self->_status_forward('S3'); |
3503
|
|
|
|
|
|
|
|
3504
|
104
|
|
|
|
|
223
|
my $verbose = $self->{verbose}; |
3505
|
104
|
|
|
|
|
199
|
my $_debug = $self->{_debug}; |
3506
|
104
|
|
|
|
|
198
|
my $_debugh = $self->{_debugh}; |
3507
|
|
|
|
|
|
|
|
3508
|
104
|
|
|
|
|
191
|
my $out_file = $self->{out_file}; |
3509
|
|
|
|
|
|
|
|
3510
|
104
|
|
|
|
|
151
|
my %stats; |
3511
|
|
|
|
|
|
|
|
3512
|
104
|
50
|
|
|
|
209
|
$self->_print("-- $out_file writing start\n") if $verbose; |
3513
|
104
|
|
|
|
|
169
|
my $rows_written = 0; |
3514
|
|
|
|
|
|
|
|
3515
|
104
|
|
|
|
|
158
|
my $outh = $self->{outh}; |
3516
|
|
|
|
|
|
|
|
3517
|
104
|
|
|
|
|
197
|
$self->{_close_outh_when_finished} = 0; |
3518
|
104
|
50
|
|
|
|
236
|
unless ( defined($outh) ) { |
3519
|
104
|
50
|
|
|
|
259
|
if ( $out_file eq '' ) { |
3520
|
0
|
|
|
|
|
0
|
$outh = \*STDOUT; |
3521
|
|
|
|
|
|
|
} |
3522
|
|
|
|
|
|
|
else { |
3523
|
104
|
50
|
|
|
|
163273
|
unless ( open( $outh, '>', $out_file ) ) { ## no critic (InputOutput::RequireBriefOpen) |
3524
|
0
|
|
|
|
|
0
|
$self->_print_error("unable to open file '$out_file': $!"); |
3525
|
0
|
|
|
|
|
0
|
return; |
3526
|
|
|
|
|
|
|
} |
3527
|
104
|
|
|
|
|
435
|
$self->{_close_outh_when_finished} = 1; |
3528
|
|
|
|
|
|
|
} |
3529
|
104
|
|
|
|
|
260
|
$self->{outh} = $outh; |
3530
|
|
|
|
|
|
|
} |
3531
|
|
|
|
|
|
|
|
3532
|
104
|
50
|
|
|
|
261
|
unless ( $self->{_leave_encoding_alone} ) { |
3533
|
|
|
|
|
|
|
my $enc = ( |
3534
|
|
|
|
|
|
|
defined( $self->{_inh_encoding} ) |
3535
|
|
|
|
|
|
|
? $self->{_inh_encoding} |
3536
|
104
|
50
|
|
|
|
342
|
: $DEFAULT_OUT_ENCODING |
3537
|
|
|
|
|
|
|
); |
3538
|
|
|
|
|
|
|
|
3539
|
|
|
|
|
|
|
# out_encoding option takes precedence |
3540
|
104
|
100
|
|
|
|
295
|
$enc = $self->{out_encoding} if defined( $self->{out_encoding} ); |
3541
|
104
|
|
|
|
|
276
|
my $m = ":encoding($enc)"; |
3542
|
104
|
50
|
66
|
|
|
276
|
if ( _is_utf8($enc) and $self->{out_utf8_bom} ) { |
3543
|
0
|
|
|
|
|
0
|
$m .= ':via(File::BOM)'; |
3544
|
|
|
|
|
|
|
} |
3545
|
|
|
|
|
|
|
|
3546
|
104
|
50
|
33
|
|
|
364
|
if ( $OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214 ) { |
3547
|
|
|
|
|
|
|
|
3548
|
|
|
|
|
|
|
# Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story) |
3549
|
0
|
0
|
|
|
|
0
|
$m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i; |
3550
|
|
|
|
|
|
|
} |
3551
|
|
|
|
|
|
|
|
3552
|
104
|
|
|
|
|
970
|
binmode $outh, $m; |
3553
|
104
|
50
|
|
|
|
9346
|
print( $_debugh "Encoding string used for output: $m\n" ) if $_debug; |
3554
|
|
|
|
|
|
|
} |
3555
|
|
|
|
|
|
|
|
3556
|
104
|
|
|
|
|
214
|
my $escape_char = $self->{escape_char}; |
3557
|
104
|
|
|
|
|
205
|
my $quote_char = $self->{quote_char}; |
3558
|
|
|
|
|
|
|
|
3559
|
104
|
|
|
|
|
164
|
my %opts; |
3560
|
104
|
|
|
|
|
218
|
$opts{binary} = 1; |
3561
|
104
|
|
|
|
|
198
|
$opts{eol} = "\n"; |
3562
|
|
|
|
|
|
|
|
3563
|
104
|
50
|
|
|
|
346
|
$opts{sep_char} = $self->{sep_char} if defined( $self->{sep_char} ); |
3564
|
104
|
100
|
|
|
|
247
|
$opts{sep_char} = $self->{out_sep_char} if defined( $self->{out_sep_char} ); |
3565
|
|
|
|
|
|
|
|
3566
|
104
|
50
|
|
|
|
298
|
$opts{quote_char} = $self->{quote_char} if defined( $self->{quote_char} ); |
3567
|
|
|
|
|
|
|
$opts{quote_char} = $self->{out_quote_char} |
3568
|
104
|
50
|
|
|
|
237
|
if defined( $self->{out_quote_char} ); |
3569
|
|
|
|
|
|
|
|
3570
|
|
|
|
|
|
|
$opts{escape_char} = $self->{escape_char} |
3571
|
104
|
100
|
|
|
|
291
|
if defined( $self->{escape_char} ); |
3572
|
|
|
|
|
|
|
$opts{escape_char} = $self->{out_escape_char} |
3573
|
104
|
100
|
|
|
|
237
|
if defined( $self->{out_escape_char} ); |
3574
|
|
|
|
|
|
|
|
3575
|
104
|
|
|
|
|
205
|
$opts{always_quote} = $self->{_is_always_quoted}; |
3576
|
|
|
|
|
|
|
$opts{always_quote} = $self->{out_always_quote} |
3577
|
104
|
100
|
|
|
|
249
|
if defined( $self->{out_always_quote} ); |
3578
|
|
|
|
|
|
|
|
3579
|
104
|
|
|
|
|
744
|
my $csvout = Text::CSV->new( {%opts} ); |
3580
|
104
|
50
|
|
|
|
16140
|
if ( !defined($csvout) ) { |
3581
|
0
|
|
|
|
|
0
|
$self->_print_error("error creating output Text::CSV object"); |
3582
|
0
|
|
|
|
|
0
|
return; |
3583
|
|
|
|
|
|
|
} |
3584
|
|
|
|
|
|
|
|
3585
|
|
|
|
|
|
|
my $write_filter_hr = |
3586
|
104
|
|
|
|
|
407
|
_get_def( $self->{out_filter}, $self->{write_filter_hr} ); |
3587
|
|
|
|
|
|
|
|
3588
|
104
|
100
|
66
|
|
|
725
|
if ( |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
3589
|
|
|
|
|
|
|
( |
3590
|
|
|
|
|
|
|
$self->{has_headers} |
3591
|
|
|
|
|
|
|
and ( |
3592
|
|
|
|
|
|
|
!( |
3593
|
|
|
|
|
|
|
defined( $self->{out_has_headers} ) |
3594
|
|
|
|
|
|
|
and ( !$self->{out_has_headers} ) |
3595
|
|
|
|
|
|
|
) |
3596
|
|
|
|
|
|
|
) |
3597
|
|
|
|
|
|
|
) |
3598
|
|
|
|
|
|
|
or $self->{out_has_headers} |
3599
|
|
|
|
|
|
|
) |
3600
|
|
|
|
|
|
|
{ |
3601
|
102
|
|
|
|
|
220
|
my $ar = []; |
3602
|
102
|
100
|
|
|
|
255
|
if ( $self->{has_headers} ) { |
3603
|
98
|
|
|
|
|
201
|
$ar = $self->{_headers}; |
3604
|
|
|
|
|
|
|
} |
3605
|
|
|
|
|
|
|
else { |
3606
|
4
|
|
|
|
|
4
|
my $nf = $self->{_named_fields}; |
3607
|
4
|
|
|
|
|
5
|
$ar->[ $nf->{$_} ] = $_ for ( keys %{$nf} ); |
|
4
|
|
|
|
|
16
|
|
3608
|
|
|
|
|
|
|
} |
3609
|
|
|
|
|
|
|
|
3610
|
102
|
100
|
|
|
|
255
|
if ( exists $self->{_out_headers} ) { |
3611
|
3
|
|
|
|
|
8
|
my $h = $self->{_out_headers}; |
3612
|
3
|
|
|
|
|
5
|
for ( keys %{ $self->{_named_fields} } ) { |
|
3
|
|
|
|
|
16
|
|
3613
|
12
|
100
|
|
|
|
31
|
if ( exists $h->{$_} ) { |
3614
|
5
|
|
|
|
|
17
|
$ar->[ $self->{_named_fields}->{$_} ] = $h->{$_}; |
3615
|
|
|
|
|
|
|
} |
3616
|
|
|
|
|
|
|
} |
3617
|
|
|
|
|
|
|
} |
3618
|
|
|
|
|
|
|
|
3619
|
102
|
|
|
|
|
370
|
$self->_apply_write_fields($ar); |
3620
|
|
|
|
|
|
|
|
3621
|
102
|
|
|
|
|
1531
|
$csvout->print( $outh, $ar ); |
3622
|
102
|
|
|
|
|
1083
|
$rows_written++; |
3623
|
|
|
|
|
|
|
} |
3624
|
|
|
|
|
|
|
|
3625
|
104
|
|
|
|
|
177
|
my $do_status_reset = 0; |
3626
|
|
|
|
|
|
|
|
3627
|
|
|
|
|
|
|
# |
3628
|
|
|
|
|
|
|
# FIXME!!! |
3629
|
|
|
|
|
|
|
# |
3630
|
|
|
|
|
|
|
# Instead of this duplication of code, provide AutoCSV with a "create iterator |
3631
|
|
|
|
|
|
|
# sub" feature to iterate over all records, whatever is going on behind the |
3632
|
|
|
|
|
|
|
# scene (in-memory or read input). |
3633
|
|
|
|
|
|
|
# |
3634
|
|
|
|
|
|
|
# Such an iterator would also benefit to module users. |
3635
|
|
|
|
|
|
|
# |
3636
|
|
|
|
|
|
|
|
3637
|
104
|
100
|
|
|
|
275
|
if ( exists $self->{out_orderby} ) { |
3638
|
1
|
50
|
|
|
|
3
|
return unless $self->_status_forward('S4'); |
3639
|
|
|
|
|
|
|
} |
3640
|
|
|
|
|
|
|
|
3641
|
104
|
100
|
|
|
|
281
|
if ( $self->{_status} == 4 ) { |
3642
|
|
|
|
|
|
|
|
3643
|
|
|
|
|
|
|
# |
3644
|
|
|
|
|
|
|
# The content is available in-memory: we write from what we have in-memory |
3645
|
|
|
|
|
|
|
# then... |
3646
|
|
|
|
|
|
|
# |
3647
|
|
|
|
|
|
|
|
3648
|
25
|
|
|
|
|
88
|
my @keys = $self->get_keys(); |
3649
|
25
|
|
|
|
|
64
|
my @ordered_keys = @keys; |
3650
|
25
|
100
|
|
|
|
74
|
if ( exists $self->{'out_orderby'} ) { |
3651
|
1
|
|
|
|
|
2
|
my @orderby = @{ $self->{'out_orderby'} }; |
|
1
|
|
|
|
|
3
|
|
3652
|
|
|
|
|
|
|
@ordered_keys = sort { |
3653
|
1
|
|
|
|
|
5
|
for my $f (@orderby) { |
|
7
|
|
|
|
|
15
|
|
3654
|
8
|
|
|
|
|
17
|
my $cmp = |
3655
|
|
|
|
|
|
|
$self->get_cell( $a, $f ) cmp $self->get_cell( $b, $f ); |
3656
|
8
|
100
|
|
|
|
22
|
return $cmp if $cmp; |
3657
|
|
|
|
|
|
|
} |
3658
|
0
|
|
|
|
|
0
|
return 0; |
3659
|
|
|
|
|
|
|
} @keys; |
3660
|
|
|
|
|
|
|
} |
3661
|
|
|
|
|
|
|
|
3662
|
|
|
|
|
|
|
# for my $k ($self->get_keys()) { |
3663
|
25
|
|
|
|
|
63
|
for my $k (@ordered_keys) { |
3664
|
92
|
|
|
|
|
246
|
my $hr = $self->get_row_hr($k); |
3665
|
92
|
50
|
|
|
|
274
|
if ( defined($write_filter_hr) ) { |
3666
|
0
|
0
|
|
|
|
0
|
next unless $write_filter_hr->($hr); |
3667
|
|
|
|
|
|
|
} |
3668
|
92
|
|
|
|
|
151
|
my $ar = [ @{ $self->get_row_ar($k) } ]; |
|
92
|
|
|
|
|
207
|
|
3669
|
|
|
|
|
|
|
|
3670
|
92
|
|
|
|
|
291
|
$self->_execute_write_update_before($ar); |
3671
|
92
|
|
|
|
|
271
|
$self->_apply_write_fields($ar); |
3672
|
|
|
|
|
|
|
|
3673
|
92
|
|
|
|
|
662
|
$csvout->print( $outh, $ar ); |
3674
|
92
|
|
|
|
|
1032
|
$rows_written++; |
3675
|
|
|
|
|
|
|
} |
3676
|
|
|
|
|
|
|
|
3677
|
|
|
|
|
|
|
} |
3678
|
|
|
|
|
|
|
else { |
3679
|
|
|
|
|
|
|
|
3680
|
|
|
|
|
|
|
# |
3681
|
|
|
|
|
|
|
# No in-memory content available: we read and write in parallel. |
3682
|
|
|
|
|
|
|
# |
3683
|
|
|
|
|
|
|
|
3684
|
79
|
|
|
|
|
244
|
$self->_register_pass("write()"); |
3685
|
|
|
|
|
|
|
|
3686
|
79
|
|
|
|
|
227
|
$self->_open_read(); |
3687
|
79
|
|
|
|
|
145
|
my $ar; |
3688
|
|
|
|
|
|
|
my $row_hr; |
3689
|
79
|
|
|
|
|
254
|
while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) { |
3690
|
301
|
100
|
|
|
|
543
|
if ( defined($write_filter_hr) ) { |
3691
|
|
|
|
|
|
|
next |
3692
|
46
|
100
|
|
|
|
91
|
unless $write_filter_hr->( $row_hr, \%stats, |
3693
|
|
|
|
|
|
|
$self->get_recnum() ); |
3694
|
|
|
|
|
|
|
} |
3695
|
275
|
|
|
|
|
470
|
$ar = [ @{$ar} ]; |
|
275
|
|
|
|
|
642
|
|
3696
|
|
|
|
|
|
|
|
3697
|
275
|
|
|
|
|
798
|
$self->_execute_write_update_before($ar); |
3698
|
272
|
|
|
|
|
580
|
$self->_apply_write_fields($ar); |
3699
|
|
|
|
|
|
|
|
3700
|
272
|
|
|
|
|
1441
|
$csvout->print( $outh, $ar ); |
3701
|
272
|
|
|
|
|
2199
|
$rows_written++; |
3702
|
|
|
|
|
|
|
} |
3703
|
72
|
|
|
|
|
223
|
$self->_close_read(); |
3704
|
|
|
|
|
|
|
|
3705
|
72
|
|
|
|
|
124
|
$do_status_reset = 1; |
3706
|
|
|
|
|
|
|
} |
3707
|
|
|
|
|
|
|
|
3708
|
97
|
|
|
|
|
281
|
$self->_close_outh(); |
3709
|
|
|
|
|
|
|
|
3710
|
97
|
50
|
|
|
|
234
|
if ($verbose) { |
3711
|
0
|
|
|
|
|
0
|
$self->_print( |
3712
|
|
|
|
|
|
|
"-- $out_file writing end: $rows_written row(s) written\n"); |
3713
|
0
|
|
|
|
|
0
|
for my $k ( sort keys %stats ) { |
3714
|
0
|
|
|
|
|
0
|
$self->_printf( " %7d %s\n", $stats{$k}, $k ); |
3715
|
|
|
|
|
|
|
} |
3716
|
|
|
|
|
|
|
} |
3717
|
|
|
|
|
|
|
|
3718
|
97
|
100
|
|
|
|
216
|
if ($do_status_reset) { |
3719
|
72
|
50
|
|
|
|
206
|
return unless $self->_status_reset(); |
3720
|
|
|
|
|
|
|
} |
3721
|
97
|
|
|
|
|
1026
|
return $self; |
3722
|
|
|
|
|
|
|
} |
3723
|
|
|
|
|
|
|
|
3724
|
|
|
|
|
|
|
# |
3725
|
|
|
|
|
|
|
# * *** ************************************************************************ |
3726
|
|
|
|
|
|
|
# * *** ************************************************************************ |
3727
|
|
|
|
|
|
|
# * OBJ ************************************************************************ |
3728
|
|
|
|
|
|
|
# * *** ************************************************************************ |
3729
|
|
|
|
|
|
|
# * *** ************************************************************************ |
3730
|
|
|
|
|
|
|
# |
3731
|
|
|
|
|
|
|
|
3732
|
|
|
|
|
|
|
# |
3733
|
|
|
|
|
|
|
# The subs below assume Text::AutoCSV can be in status S4 = all in memory. |
3734
|
|
|
|
|
|
|
# |
3735
|
|
|
|
|
|
|
|
3736
|
|
|
|
|
|
|
sub get_keys { |
3737
|
32
|
|
|
32
|
1
|
2171
|
my $self = shift; |
3738
|
32
|
|
|
|
|
238
|
validate_pos(@_); |
3739
|
|
|
|
|
|
|
|
3740
|
32
|
50
|
|
|
|
122
|
return unless $self->_status_forward('S4'); |
3741
|
|
|
|
|
|
|
|
3742
|
32
|
|
|
|
|
63
|
my $last_key = @{ $self->{_flat} } - 1; |
|
32
|
|
|
|
|
86
|
|
3743
|
32
|
|
|
|
|
100
|
my @r = ( 0 .. $last_key ); |
3744
|
|
|
|
|
|
|
|
3745
|
32
|
|
|
|
|
101
|
return @r; |
3746
|
|
|
|
|
|
|
} |
3747
|
|
|
|
|
|
|
|
3748
|
|
|
|
|
|
|
sub get_row_ar { |
3749
|
364
|
|
|
364
|
1
|
1100
|
my $self = shift; |
3750
|
364
|
|
|
|
|
2759
|
validate_pos( @_, { type => SCALAR } ); |
3751
|
364
|
|
|
|
|
1032
|
my ($key) = @_; |
3752
|
|
|
|
|
|
|
|
3753
|
364
|
50
|
|
|
|
802
|
return unless $self->_status_forward('S4'); |
3754
|
|
|
|
|
|
|
|
3755
|
364
|
50
|
|
|
|
806
|
unless ( defined($key) ) { |
3756
|
0
|
|
|
|
|
0
|
$self->_print_error("get_row_ar(): \$key is not defined!"); |
3757
|
0
|
|
|
|
|
0
|
return; |
3758
|
|
|
|
|
|
|
} |
3759
|
|
|
|
|
|
|
|
3760
|
|
|
|
|
|
|
$self->_print_error("unknown row '$key'") |
3761
|
364
|
100
|
|
|
|
846
|
unless defined( $self->{_flat}->[$key] ); |
3762
|
364
|
|
|
|
|
885
|
return $self->{_flat}->[$key]; |
3763
|
|
|
|
|
|
|
} |
3764
|
|
|
|
|
|
|
|
3765
|
|
|
|
|
|
|
sub get_row_hr { |
3766
|
269
|
|
|
269
|
1
|
461
|
my $self = shift; |
3767
|
269
|
|
|
|
|
2443
|
validate_pos( @_, { type => SCALAR } ); |
3768
|
269
|
|
|
|
|
886
|
my ($key) = @_; |
3769
|
|
|
|
|
|
|
|
3770
|
269
|
|
|
|
|
696
|
my $ar = $self->get_row_ar($key); |
3771
|
269
|
100
|
|
|
|
568
|
return unless defined($ar); |
3772
|
|
|
|
|
|
|
|
3773
|
267
|
|
|
|
|
606
|
return $self->_ar_to_hr($ar); |
3774
|
|
|
|
|
|
|
} |
3775
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
# |
3777
|
|
|
|
|
|
|
# Could be made much more efficient (directly read $self->{_flat} instead of |
3778
|
|
|
|
|
|
|
# calling get_row_hr that itself calls get_row_ar). |
3779
|
|
|
|
|
|
|
# I leave it as is because get_hr_all is not good practice (it is not scalable), |
3780
|
|
|
|
|
|
|
# it was primarily done to ease test plan. |
3781
|
|
|
|
|
|
|
# |
3782
|
|
|
|
|
|
|
# By the way I may make it one day not available by default, requesting caller |
3783
|
|
|
|
|
|
|
# to tune some variable (like { $Text::AutoCSV::i_am_the_test_plan = 1 }) to |
3784
|
|
|
|
|
|
|
# expose it. |
3785
|
|
|
|
|
|
|
# |
3786
|
|
|
|
|
|
|
sub get_hr_all { |
3787
|
108
|
|
|
108
|
1
|
2130
|
my $self = shift; |
3788
|
108
|
|
|
|
|
720
|
validate_pos(@_); |
3789
|
|
|
|
|
|
|
|
3790
|
108
|
|
|
|
|
226
|
my @resp; |
3791
|
108
|
|
|
|
|
337
|
$self->reset_next_record_hr(); |
3792
|
108
|
|
|
|
|
272
|
while ( my $hr = $self->get_next_record_hr() ) { |
3793
|
414
|
|
|
|
|
897
|
push @resp, $hr; |
3794
|
|
|
|
|
|
|
} |
3795
|
105
|
|
|
|
|
558
|
return @resp; |
3796
|
|
|
|
|
|
|
} |
3797
|
|
|
|
|
|
|
|
3798
|
|
|
|
|
|
|
sub get_recnum { |
3799
|
190
|
|
|
190
|
1
|
313
|
my $self = shift; |
3800
|
190
|
|
|
|
|
923
|
validate_pos(@_); |
3801
|
|
|
|
|
|
|
|
3802
|
190
|
50
|
|
|
|
557
|
return -1 unless $self->{_read_in_progress}; |
3803
|
190
|
|
|
|
|
395
|
return _get_def( $self->{_row_read}, -1 ); |
3804
|
|
|
|
|
|
|
} |
3805
|
|
|
|
|
|
|
|
3806
|
|
|
|
|
|
|
sub _check_for_search { |
3807
|
906
|
|
|
906
|
|
1978
|
my ( $self, $field ) = @_; |
3808
|
906
|
50
|
|
|
|
1984
|
return unless $self->_status_forward('S4'); |
3809
|
|
|
|
|
|
|
|
3810
|
905
|
100
|
|
|
|
3246
|
return 1 if exists $self->{_named_fields}->{$field}; |
3811
|
|
|
|
|
|
|
$self->_print_error( "search: unknown field '$field'", |
3812
|
6
|
|
|
|
|
24
|
0, ERR_UNKNOWN_FIELD, $self->{_named_fields} ); |
3813
|
|
|
|
|
|
|
} |
3814
|
|
|
|
|
|
|
|
3815
|
|
|
|
|
|
|
sub get_cell { |
3816
|
20
|
|
|
20
|
1
|
1353
|
my $self = shift; |
3817
|
20
|
|
|
|
|
184
|
validate_pos( @_, { type => SCALAR }, { type => SCALAR } ); |
3818
|
20
|
|
|
|
|
60
|
my ( $key, $field ) = @_; |
3819
|
|
|
|
|
|
|
|
3820
|
20
|
50
|
|
|
|
39
|
return unless $self->_check_for_search($field); |
3821
|
19
|
|
|
|
|
39
|
my $row = $self->get_row_hr($key); |
3822
|
19
|
100
|
|
|
|
41
|
return $row unless defined($row); |
3823
|
18
|
|
|
|
|
51
|
return $row->{$field}; |
3824
|
|
|
|
|
|
|
} |
3825
|
|
|
|
|
|
|
|
3826
|
|
|
|
|
|
|
sub get_values { |
3827
|
9
|
|
|
9
|
1
|
2812
|
my $self = shift; |
3828
|
9
|
|
|
|
|
102
|
validate_pos( |
3829
|
|
|
|
|
|
|
@_, |
3830
|
|
|
|
|
|
|
{ type => SCALAR }, |
3831
|
|
|
|
|
|
|
{ type => UNDEF | CODEREF, optional => 1 } |
3832
|
|
|
|
|
|
|
); |
3833
|
9
|
|
|
|
|
30
|
my ( $field, $filter_subref ) = @_; |
3834
|
|
|
|
|
|
|
|
3835
|
9
|
50
|
|
|
|
24
|
return unless $self->_check_for_search($field); |
3836
|
|
|
|
|
|
|
|
3837
|
9
|
|
|
|
|
15
|
my @values; |
3838
|
9
|
|
|
|
|
25
|
$self->reset_next_record_hr(); |
3839
|
9
|
|
|
|
|
32
|
while ( my $hr = $self->get_next_record_hr() ) { |
3840
|
53
|
100
|
|
|
|
91
|
if ( defined($filter_subref) ) { |
3841
|
23
|
|
|
|
|
38
|
local $_ = $hr->{$field}; |
3842
|
23
|
100
|
|
|
|
41
|
next unless $filter_subref->(); |
3843
|
|
|
|
|
|
|
} |
3844
|
42
|
|
|
|
|
150
|
push @values, $hr->{$field}; |
3845
|
|
|
|
|
|
|
} |
3846
|
9
|
|
|
|
|
41
|
return @values; |
3847
|
|
|
|
|
|
|
} |
3848
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
sub _get_hash_and_projector { |
3850
|
561
|
|
|
561
|
|
1243
|
my ( $self, $field, $arg_opts ) = @_; |
3851
|
|
|
|
|
|
|
|
3852
|
561
|
|
|
|
|
1015
|
my $_debug = $self->{_debug}; |
3853
|
561
|
|
|
|
|
933
|
my $_debugh = $self->{_debugh}; |
3854
|
|
|
|
|
|
|
|
3855
|
561
|
|
|
|
|
813
|
my %opts; |
3856
|
561
|
50
|
|
|
|
1192
|
%opts = %{$arg_opts} if defined($arg_opts); |
|
561
|
|
|
|
|
1576
|
|
3857
|
|
|
|
|
|
|
|
3858
|
|
|
|
|
|
|
my $opt_case = |
3859
|
561
|
|
|
|
|
2285
|
_get_def( $opts{'case'}, $self->{search_case}, $DEF_SEARCH_CASE ); |
3860
|
|
|
|
|
|
|
my $opt_trim = |
3861
|
561
|
|
|
|
|
2076
|
_get_def( $opts{'trim'}, $self->{search_trim}, $DEF_SEARCH_TRIM ); |
3862
|
|
|
|
|
|
|
my $opt_ignore_empty = |
3863
|
|
|
|
|
|
|
_get_def( $opts{'ignore_empty'}, $self->{search_ignore_empty}, |
3864
|
561
|
|
|
|
|
1940
|
$DEF_SEARCH_IGNORE_EMPTY ); |
3865
|
|
|
|
|
|
|
my $opt_ignacc = |
3866
|
|
|
|
|
|
|
_get_def( $opts{'ignore_accents'}, $self->{search_ignore_accents}, |
3867
|
561
|
|
|
|
|
2106
|
$DEF_SEARCH_IGNORE_ACCENTS ); |
3868
|
|
|
|
|
|
|
|
3869
|
561
|
|
|
|
|
1819
|
my $opts_stringified = |
3870
|
|
|
|
|
|
|
$opt_case . $opt_trim . $opt_ignore_empty . $opt_ignacc; |
3871
|
561
|
|
|
|
|
1297
|
my $hash_name = "_h${field}_${opts_stringified}"; |
3872
|
561
|
|
|
|
|
1172
|
my $projector_name = "_p${field}_${opts_stringified}"; |
3873
|
|
|
|
|
|
|
|
3874
|
561
|
100
|
66
|
|
|
3116
|
if ( exists $self->{$hash_name} and exists $self->{$projector_name} ) { |
|
|
50
|
33
|
|
|
|
|
3875
|
489
|
50
|
|
|
|
9752
|
print( $_debugh |
3876
|
|
|
|
|
|
|
"Search by key '$field': using existing hash and projector (" |
3877
|
|
|
|
|
|
|
. "$hash_name, $projector_name)\n" ) |
3878
|
|
|
|
|
|
|
if $_debug; |
3879
|
489
|
|
|
|
|
1971
|
return ( $hash_name, $projector_name ); |
3880
|
|
|
|
|
|
|
} |
3881
|
|
|
|
|
|
|
elsif ( exists $self->{$hash_name} or exists $self->{$projector_name} ) { |
3882
|
0
|
|
|
|
|
0
|
confess "Man, check your $PKG module code now!"; |
3883
|
|
|
|
|
|
|
} |
3884
|
|
|
|
|
|
|
|
3885
|
72
|
50
|
|
|
|
198
|
print( $_debugh "Search by key '$field': building hash\n" ) if $_debug; |
3886
|
|
|
|
|
|
|
|
3887
|
|
|
|
|
|
|
# |
3888
|
|
|
|
|
|
|
# Projectors |
3889
|
|
|
|
|
|
|
# |
3890
|
|
|
|
|
|
|
# The projector contains subs to derivate the search key from the field value. |
3891
|
|
|
|
|
|
|
# At the moment it is used to manage with case / without case searches and with |
3892
|
|
|
|
|
|
|
# trim / without trim searches (meaning, ignoring spaces at beginning and end of |
3893
|
|
|
|
|
|
|
# fields) |
3894
|
|
|
|
|
|
|
# |
3895
|
|
|
|
|
|
|
# Why naming it a projector? |
3896
|
|
|
|
|
|
|
# Because if you run it twice on a value, the second run should produce the |
3897
|
|
|
|
|
|
|
# same result, meaning: p(p(x)) = p(x) whatever x |
3898
|
|
|
|
|
|
|
# |
3899
|
|
|
|
|
|
|
|
3900
|
72
|
|
|
|
|
134
|
my @projectors; |
3901
|
|
|
|
|
|
|
|
3902
|
|
|
|
|
|
|
# Add case removal in the projector function list |
3903
|
1479
|
|
|
1479
|
|
4103
|
push @projectors, sub { return lc(shift); } |
3904
|
72
|
100
|
|
|
|
401
|
unless $opt_case; |
3905
|
|
|
|
|
|
|
|
3906
|
|
|
|
|
|
|
# Add trim in the projector function list |
3907
|
72
|
100
|
|
|
|
215
|
if ($opt_trim) { |
3908
|
|
|
|
|
|
|
push @projectors, sub { |
3909
|
1479
|
|
|
1479
|
|
7212
|
my $v = shift; |
3910
|
1479
|
|
|
|
|
5181
|
$v =~ s/^\s+|\s+$//g; |
3911
|
1479
|
|
|
|
|
3736
|
return $v; |
3912
|
57
|
|
|
|
|
221
|
}; |
3913
|
|
|
|
|
|
|
} |
3914
|
|
|
|
|
|
|
|
3915
|
|
|
|
|
|
|
# Add remove_accents in the projector function list |
3916
|
1886
|
|
|
1886
|
|
4272
|
push @projectors, sub { return remove_accents(shift); } |
3917
|
72
|
100
|
|
|
|
373
|
if $opt_ignacc; |
3918
|
|
|
|
|
|
|
|
3919
|
|
|
|
|
|
|
my $projector = sub { |
3920
|
1906
|
|
|
1906
|
|
3888
|
my $v = _get_def( $_[0], '' ); |
3921
|
1906
|
|
|
|
|
4499
|
$v = $_->($v) foreach (@projectors); |
3922
|
1906
|
|
|
|
|
4218
|
return $v; |
3923
|
72
|
|
|
|
|
295
|
}; |
3924
|
|
|
|
|
|
|
|
3925
|
|
|
|
|
|
|
# |
3926
|
|
|
|
|
|
|
# Filter |
3927
|
|
|
|
|
|
|
# |
3928
|
|
|
|
|
|
|
# As opposed to projectors above (where a search key is transformed), the idea |
3929
|
|
|
|
|
|
|
# now is to ignore certain keys when doing a search. |
3930
|
|
|
|
|
|
|
# At the moment, used to manage searches with / without empty values. |
3931
|
|
|
|
|
|
|
# |
3932
|
|
|
|
|
|
|
# That is to say: shall we use empty value as a regular value to search on, as |
3933
|
|
|
|
|
|
|
# in |
3934
|
|
|
|
|
|
|
# my @results = $self->search('FIELDNAME', ''); |
3935
|
|
|
|
|
|
|
# ? |
3936
|
|
|
|
|
|
|
# |
3937
|
|
|
|
|
|
|
# Right now we don't use an array-based construct, that'd allow to chain filters |
3938
|
|
|
|
|
|
|
# with one another (as we now have only one filter to deal with), later, we may |
3939
|
|
|
|
|
|
|
# use an array of filters, as done with projectors... |
3940
|
|
|
|
|
|
|
# |
3941
|
|
|
|
|
|
|
|
3942
|
72
|
|
|
|
|
140
|
my $filter; |
3943
|
72
|
100
|
|
|
|
241
|
if ($opt_ignore_empty) { |
3944
|
1268
|
|
|
1268
|
|
2906
|
$filter = sub { return $_[0] ne ''; } |
3945
|
61
|
|
|
|
|
195
|
} |
3946
|
|
|
|
|
|
|
else { |
3947
|
77
|
|
|
77
|
|
234
|
$filter = sub { return 1; } |
3948
|
11
|
|
|
|
|
65
|
} |
3949
|
|
|
|
|
|
|
|
3950
|
72
|
|
|
|
|
162
|
my %h; |
3951
|
|
|
|
|
|
|
my $k; |
3952
|
72
|
|
|
|
|
408
|
$self->reset_next_record_hr(); |
3953
|
72
|
|
|
|
|
274
|
while ( my $hr = $self->get_next_record_hr( \$k ) ) { |
3954
|
1345
|
|
|
|
|
2484
|
my $kv = $hr->{$field}; |
3955
|
1345
|
|
|
|
|
2412
|
my $p = $projector->($kv); |
3956
|
1345
|
100
|
|
|
|
2403
|
unless ( $filter->($p) ) { |
3957
|
76
|
50
|
|
|
|
195
|
print( $_debugh "Ignoring key value '$p' in hash build\n" ) |
3958
|
|
|
|
|
|
|
if $_debug; |
3959
|
76
|
|
|
|
|
388
|
next; |
3960
|
|
|
|
|
|
|
} |
3961
|
1269
|
|
|
|
|
1869
|
push @{ $h{$p} }, $k; |
|
1269
|
|
|
|
|
6288
|
|
3962
|
|
|
|
|
|
|
} |
3963
|
72
|
|
|
|
|
507
|
for ( keys %h ) { |
3964
|
1178
|
|
|
|
|
1582
|
@{ $h{$_} } = sort { $a <=> $b } @{ $h{$_} }; |
|
1178
|
|
|
|
|
2478
|
|
|
103
|
|
|
|
|
315
|
|
|
1178
|
|
|
|
|
2405
|
|
3965
|
|
|
|
|
|
|
} |
3966
|
|
|
|
|
|
|
|
3967
|
72
|
|
|
|
|
417
|
$self->{_hash_build_count}++; |
3968
|
72
|
|
|
|
|
869
|
$self->{$hash_name} = {%h}; |
3969
|
72
|
|
|
|
|
310
|
$self->{$projector_name} = $projector; |
3970
|
72
|
|
|
|
|
674
|
return ( $hash_name, $projector_name ); |
3971
|
|
|
|
|
|
|
} |
3972
|
|
|
|
|
|
|
|
3973
|
|
|
|
|
|
|
sub _get_hash_build_count { |
3974
|
6
|
|
|
6
|
|
3379
|
my $self = shift; |
3975
|
|
|
|
|
|
|
|
3976
|
6
|
|
|
|
|
29
|
return _get_def( $self->{_hash_build_count}, 0 ); |
3977
|
|
|
|
|
|
|
} |
3978
|
|
|
|
|
|
|
|
3979
|
|
|
|
|
|
|
sub search { |
3980
|
563
|
|
|
563
|
1
|
22942
|
my $self = shift; |
3981
|
563
|
|
|
|
|
7217
|
validate_pos( |
3982
|
|
|
|
|
|
|
@_, |
3983
|
|
|
|
|
|
|
{ type => SCALAR }, |
3984
|
|
|
|
|
|
|
{ type => UNDEF | SCALAR }, |
3985
|
|
|
|
|
|
|
{ type => UNDEF | HASHREF, optional => 1 } |
3986
|
|
|
|
|
|
|
); |
3987
|
563
|
|
|
|
|
2358
|
my ( $field, $value, $param_opts ) = @_; |
3988
|
|
|
|
|
|
|
|
3989
|
563
|
|
|
|
|
1354
|
my $croak_if_error = $self->{croak_if_error}; |
3990
|
|
|
|
|
|
|
|
3991
|
|
|
|
|
|
|
# |
3992
|
|
|
|
|
|
|
# FIXME? |
3993
|
|
|
|
|
|
|
# A bit overkill to check options each time search is called... |
3994
|
|
|
|
|
|
|
# To be thought about. |
3995
|
|
|
|
|
|
|
# |
3996
|
|
|
|
|
|
|
|
3997
|
563
|
|
|
|
|
895
|
my @tmp; |
3998
|
563
|
100
|
|
|
|
1455
|
@tmp = %{$param_opts} if $param_opts; |
|
421
|
|
|
|
|
1260
|
|
3999
|
563
|
|
|
|
|
25596
|
my %opts = validate( @tmp, $SEARCH_VALIDATE_OPTIONS ); |
4000
|
|
|
|
|
|
|
|
4001
|
562
|
50
|
|
|
|
2988
|
return unless $self->_check_for_search($field); |
4002
|
|
|
|
|
|
|
|
4003
|
|
|
|
|
|
|
# $self->_print_error("undef value in search call") if !defined($value); |
4004
|
561
|
50
|
|
|
|
1289
|
$value = '' unless defined($value); |
4005
|
|
|
|
|
|
|
|
4006
|
561
|
|
|
|
|
1695
|
my ( $hash_name, $projector_name ) = |
4007
|
|
|
|
|
|
|
$self->_get_hash_and_projector( $field, \%opts ); |
4008
|
|
|
|
|
|
|
|
4009
|
561
|
|
|
|
|
1918
|
my $ret = $self->{$hash_name}->{ $self->{$projector_name}->($value) }; |
4010
|
|
|
|
|
|
|
|
4011
|
561
|
100
|
|
|
|
2654
|
return $ret if defined($ret); |
4012
|
185
|
|
|
|
|
621
|
return []; |
4013
|
|
|
|
|
|
|
} |
4014
|
|
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
sub search_1hr { |
4016
|
22
|
|
|
22
|
1
|
15913
|
my $self = shift; |
4017
|
22
|
|
|
|
|
534
|
validate_pos( |
4018
|
|
|
|
|
|
|
@_, |
4019
|
|
|
|
|
|
|
{ type => SCALAR }, |
4020
|
|
|
|
|
|
|
{ type => UNDEF | SCALAR }, |
4021
|
|
|
|
|
|
|
{ type => UNDEF | HASHREF, optional => 1 } |
4022
|
|
|
|
|
|
|
); |
4023
|
22
|
|
|
|
|
148
|
my ( $field, $value, $arg_opts ) = @_; |
4024
|
|
|
|
|
|
|
|
4025
|
22
|
|
|
|
|
109
|
my $r = $self->search( $field, $value, $arg_opts ); |
4026
|
|
|
|
|
|
|
|
4027
|
22
|
100
|
|
|
|
110
|
return unless defined( $r->[0] ); |
4028
|
|
|
|
|
|
|
|
4029
|
20
|
|
|
|
|
87
|
my $opts = _get_def( $arg_opts, {} ); |
4030
|
|
|
|
|
|
|
my $opt_ignore_ambiguous = _get_def( |
4031
|
|
|
|
|
|
|
$opts->{'ignore_ambiguous'}, |
4032
|
20
|
|
|
|
|
124
|
$self->{'search_ignore_ambiguous'}, |
4033
|
|
|
|
|
|
|
$DEF_SEARCH_IGNORE_AMBIGUOUS |
4034
|
|
|
|
|
|
|
); |
4035
|
|
|
|
|
|
|
|
4036
|
20
|
100
|
100
|
|
|
65
|
return if @{$r} >= 2 and ( !$opt_ignore_ambiguous ); |
|
20
|
|
|
|
|
155
|
|
4037
|
14
|
|
|
|
|
86
|
return $self->get_row_hr( $r->[0] ); |
4038
|
|
|
|
|
|
|
} |
4039
|
|
|
|
|
|
|
|
4040
|
|
|
|
|
|
|
sub vlookup { |
4041
|
317
|
|
|
317
|
1
|
35958
|
my $self = shift; |
4042
|
317
|
|
|
|
|
4212
|
validate_pos( |
4043
|
|
|
|
|
|
|
@_, |
4044
|
|
|
|
|
|
|
{ type => SCALAR }, |
4045
|
|
|
|
|
|
|
{ type => UNDEF | SCALAR }, |
4046
|
|
|
|
|
|
|
{ type => SCALAR }, |
4047
|
|
|
|
|
|
|
{ type => UNDEF | HASHREF, optional => 1 } |
4048
|
|
|
|
|
|
|
); |
4049
|
317
|
|
|
|
|
1251
|
my ( $searched_field, $value, $target_field, $arg_opts ) = @_; |
4050
|
|
|
|
|
|
|
|
4051
|
317
|
|
|
|
|
739
|
my $r = $self->search( $searched_field, $value, $arg_opts ); |
4052
|
315
|
50
|
|
|
|
661
|
return unless $self->_check_for_search($target_field); |
4053
|
|
|
|
|
|
|
|
4054
|
314
|
|
|
|
|
652
|
my $opts = _get_def( $arg_opts, {} ); |
4055
|
314
|
100
|
66
|
|
|
720
|
unless ( defined( $r->[0] ) ) { |
4056
|
|
|
|
|
|
|
return ( |
4057
|
|
|
|
|
|
|
exists $opts->{'value_if_not_found'} |
4058
|
|
|
|
|
|
|
? $opts->{'value_if_not_found'} |
4059
|
143
|
100
|
|
|
|
505
|
: $self->{'search_value_if_not_found'} |
4060
|
|
|
|
|
|
|
); |
4061
|
|
|
|
|
|
|
} |
4062
|
|
|
|
|
|
|
elsif ( @{$r} >= 2 ) { |
4063
|
|
|
|
|
|
|
my $opt_ignore_ambiguous = _get_def( |
4064
|
|
|
|
|
|
|
$opts->{'ignore_ambiguous'}, |
4065
|
|
|
|
|
|
|
$self->{'search_ignore_ambiguous'}, |
4066
|
|
|
|
|
|
|
$DEF_SEARCH_IGNORE_AMBIGUOUS |
4067
|
|
|
|
|
|
|
); |
4068
|
|
|
|
|
|
|
return ( |
4069
|
|
|
|
|
|
|
exists $opts->{'value_if_ambiguous'} |
4070
|
|
|
|
|
|
|
? $opts->{'value_if_ambiguous'} |
4071
|
|
|
|
|
|
|
: $self->{'search_value_if_ambiguous'} |
4072
|
|
|
|
|
|
|
) if !$opt_ignore_ambiguous; |
4073
|
|
|
|
|
|
|
} |
4074
|
|
|
|
|
|
|
|
4075
|
144
|
100
|
|
|
|
304
|
return $opts->{value_if_found} if exists $opts->{value_if_found}; |
4076
|
|
|
|
|
|
|
return $self->{search_value_if_found} |
4077
|
140
|
50
|
|
|
|
255
|
if exists $opts->{search_value_if_found}; |
4078
|
|
|
|
|
|
|
|
4079
|
140
|
|
|
|
|
313
|
my $hr = $self->get_row_hr( $r->[0] ); |
4080
|
|
|
|
|
|
|
|
4081
|
140
|
|
|
|
|
574
|
return $hr->{$target_field}; |
4082
|
|
|
|
|
|
|
} |
4083
|
|
|
|
|
|
|
|
4084
|
|
|
|
|
|
|
1; |
4085
|
|
|
|
|
|
|
|
4086
|
|
|
|
|
|
|
__END__ |
4087
|
|
|
|
|
|
|
|
4088
|
|
|
|
|
|
|
=pod |
4089
|
|
|
|
|
|
|
|
4090
|
|
|
|
|
|
|
=encoding UTF-8 |
4091
|
|
|
|
|
|
|
|
4092
|
|
|
|
|
|
|
=head1 NAME |
4093
|
|
|
|
|
|
|
|
4094
|
|
|
|
|
|
|
Text::AutoCSV - helper module to automate the use of Text::CSV |
4095
|
|
|
|
|
|
|
|
4096
|
|
|
|
|
|
|
=head1 VERSION |
4097
|
|
|
|
|
|
|
|
4098
|
|
|
|
|
|
|
version 1.2.0 |
4099
|
|
|
|
|
|
|
|
4100
|
|
|
|
|
|
|
=head1 SYNOPSIS |
4101
|
|
|
|
|
|
|
|
4102
|
|
|
|
|
|
|
By default, Text::AutoCSV will detect the following characteristics of the |
4103
|
|
|
|
|
|
|
input: |
4104
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
- The separator, among ",", ";" and "\t" (tab) |
4106
|
|
|
|
|
|
|
|
4107
|
|
|
|
|
|
|
- The escape character, among '"' (double-quote) and '\\' (backslash) |
4108
|
|
|
|
|
|
|
|
4109
|
|
|
|
|
|
|
- Try UTF-8 and if it fails, fall back on latin1 |
4110
|
|
|
|
|
|
|
|
4111
|
|
|
|
|
|
|
- Read the header line and compute field names |
4112
|
|
|
|
|
|
|
|
4113
|
|
|
|
|
|
|
- If asked to (see L</fields_dates_auto>), detect any field that contains a |
4114
|
|
|
|
|
|
|
DateTime value, trying 20 date formats, possibly followed by a time (6 time |
4115
|
|
|
|
|
|
|
formats tested) |
4116
|
|
|
|
|
|
|
|
4117
|
|
|
|
|
|
|
- If asked to (see L</fields_dates>), detect DateTime format of certain fields, |
4118
|
|
|
|
|
|
|
croak if no DateTime format can be worked out |
4119
|
|
|
|
|
|
|
|
4120
|
|
|
|
|
|
|
- Fields identified as containing a DateTime value (L</fields_dates_auto> or |
4121
|
|
|
|
|
|
|
L</fields_dates>) are stored as DateTime objects by default |
4122
|
|
|
|
|
|
|
|
4123
|
|
|
|
|
|
|
Text::AutoCSV also provides methods to search on fields (using cached hash |
4124
|
|
|
|
|
|
|
tables) and it can populate the value of "remote" fields, made from joining 2 |
4125
|
|
|
|
|
|
|
CSV files with a key-value search |
4126
|
|
|
|
|
|
|
|
4127
|
|
|
|
|
|
|
=head2 General |
4128
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
use Text::AutoCSV; |
4130
|
|
|
|
|
|
|
|
4131
|
|
|
|
|
|
|
# Read CSV from std input, write to std output |
4132
|
|
|
|
|
|
|
Text::AutoCSV->new()->write(); |
4133
|
|
|
|
|
|
|
|
4134
|
|
|
|
|
|
|
# Read CSV data from f.csv, write to std output |
4135
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'f.csv')->write(); |
4136
|
|
|
|
|
|
|
|
4137
|
|
|
|
|
|
|
# Read CSV data from f.csv, write to g.csv |
4138
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'f.csv', out_file => 'g.csv')->write(); |
4139
|
|
|
|
|
|
|
|
4140
|
|
|
|
|
|
|
# "Rewrite" CSV file by printing out records as a list (separated by |
4141
|
|
|
|
|
|
|
# line breaks) of field name followed by its value. |
4142
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', walker_hr => \&walk); |
4143
|
|
|
|
|
|
|
my @cols = $csv->get_fields_names(); |
4144
|
|
|
|
|
|
|
$csv->read(); |
4145
|
|
|
|
|
|
|
sub walk { |
4146
|
|
|
|
|
|
|
my %rec = %{$_[0]}; |
4147
|
|
|
|
|
|
|
for (@cols) { |
4148
|
|
|
|
|
|
|
next if $_ eq ''; |
4149
|
|
|
|
|
|
|
print("$_ => ", $rec{$_}, "\n"); |
4150
|
|
|
|
|
|
|
} |
4151
|
|
|
|
|
|
|
print("\n"); |
4152
|
|
|
|
|
|
|
} |
4153
|
|
|
|
|
|
|
|
4154
|
|
|
|
|
|
|
=head2 OBJ-ish functions |
4155
|
|
|
|
|
|
|
|
4156
|
|
|
|
|
|
|
# Identify column internal names with more flexibility as the default |
4157
|
|
|
|
|
|
|
# mechanism |
4158
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv', |
4159
|
|
|
|
|
|
|
fields_hr => {'CITY' => '^(city|town)', 'ZIPCODE' => '^zip(code)?$'}); |
4160
|
|
|
|
|
|
|
# Get zipcode of Claix |
4161
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE'); |
4162
|
|
|
|
|
|
|
|
4163
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv'); |
4164
|
|
|
|
|
|
|
# Get zipcode of Claix |
4165
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE'); |
4166
|
|
|
|
|
|
|
# Same as above, but vlookup is strict for case and spaces around |
4167
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'zips.csv', search_case => 1, |
4168
|
|
|
|
|
|
|
search_trim => 0); |
4169
|
|
|
|
|
|
|
my $z = $csv->vlookup('CITY', 'Claix', 'ZIPCODE'); |
4170
|
|
|
|
|
|
|
|
4171
|
|
|
|
|
|
|
# Create field 'MYCITY' made by taking pers.csv' ZIP column value, looking |
4172
|
|
|
|
|
|
|
# it up in the ZIPCODE columns of zips.csv, taking CITY colmun value and |
4173
|
|
|
|
|
|
|
# naming it 'MYCITY'. Output is written in std output. If a zipcode is |
4174
|
|
|
|
|
|
|
# ambiguous, say it. |
4175
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv') |
4176
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv', |
4177
|
|
|
|
|
|
|
{ ignore_ambiguous => 0, |
4178
|
|
|
|
|
|
|
value_if_ambiguous => '<duplicate zipcode found!>' } |
4179
|
|
|
|
|
|
|
)->write(); |
4180
|
|
|
|
|
|
|
|
4181
|
|
|
|
|
|
|
# Note the above can also be written using Text::AutoCSV level attributes: |
4182
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', |
4183
|
|
|
|
|
|
|
search_ignore_ambiguous => 0, |
4184
|
|
|
|
|
|
|
search_value_if_ambiguous => '<duplicate zipcode found!>') |
4185
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->write(); |
4186
|
|
|
|
|
|
|
|
4187
|
|
|
|
|
|
|
# Create 'MYCITY' field as above, then display some statistics |
4188
|
|
|
|
|
|
|
my $nom_compose = 0; |
4189
|
|
|
|
|
|
|
my $zip_not_found = 0; |
4190
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk) |
4191
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read(); |
4192
|
|
|
|
|
|
|
sub walk { |
4193
|
|
|
|
|
|
|
my $hr = shift; |
4194
|
|
|
|
|
|
|
$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/; |
4195
|
|
|
|
|
|
|
$zip_not_found++ unless defined($hr->{'MYCITY'}); |
4196
|
|
|
|
|
|
|
} |
4197
|
|
|
|
|
|
|
print("Number of persons with a multi-part name: $nom_compose\n"); |
4198
|
|
|
|
|
|
|
print("Number of persons with unknown zipcode: $zip_not_found\n"); |
4199
|
|
|
|
|
|
|
|
4200
|
|
|
|
|
|
|
=head2 Updating |
4201
|
|
|
|
|
|
|
|
4202
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'names.csv', out_file => 'ucnames.csv', |
4203
|
|
|
|
|
|
|
read_post_update_hr => \&updt)->write(); |
4204
|
|
|
|
|
|
|
sub updt { $_[0]->{'LASTNAME'} =~ s/^.*$/\U&/; } |
4205
|
|
|
|
|
|
|
|
4206
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'squares.csv', |
4207
|
|
|
|
|
|
|
out_file => 'checkedsquares.csv', |
4208
|
|
|
|
|
|
|
out_filter => \&wf)->write(); |
4209
|
|
|
|
|
|
|
sub wf { return ($_[0]->{'X'} ** 2 == $_[0]->{'SQUAREOFX'}); } |
4210
|
|
|
|
|
|
|
|
4211
|
|
|
|
|
|
|
# Add a field for the full name, made of the concatenation of the |
4212
|
|
|
|
|
|
|
# first name and the last name. |
4213
|
|
|
|
|
|
|
# Also display stats about empty full names. |
4214
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', |
4215
|
|
|
|
|
|
|
verbose => 1) |
4216
|
|
|
|
|
|
|
->field_add_computed('FULLNAME', \&calc_fn)->write(); |
4217
|
|
|
|
|
|
|
sub calc_fn { |
4218
|
|
|
|
|
|
|
my ($field, $hr, $stats) = @_; |
4219
|
|
|
|
|
|
|
my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'}); |
4220
|
|
|
|
|
|
|
$stats->{'empty full name'}++ if $fn eq ' '; |
4221
|
|
|
|
|
|
|
return $fn; |
4222
|
|
|
|
|
|
|
} |
4223
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
# Read a file with a lot of columns and keep only 2 columns in output |
4225
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'big.csv', out_file => 'addr.csv', |
4226
|
|
|
|
|
|
|
out_fields => ['NAME', 'ADDRESS']) |
4227
|
|
|
|
|
|
|
->out_header('ADDRESS', 'Postal Address') |
4228
|
|
|
|
|
|
|
->write(); |
4229
|
|
|
|
|
|
|
|
4230
|
|
|
|
|
|
|
=head2 Datetime management |
4231
|
|
|
|
|
|
|
|
4232
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to yyyy-mm-dd |
4233
|
|
|
|
|
|
|
# whatever the input format is. |
4234
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4235
|
|
|
|
|
|
|
fields_dates_auto => 1, out_dates_format => '%F')->write(); |
4236
|
|
|
|
|
|
|
|
4237
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to a US |
4238
|
|
|
|
|
|
|
# DateTime whatever the input format is. |
4239
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4240
|
|
|
|
|
|
|
fields_dates_auto => 1, |
4241
|
|
|
|
|
|
|
out_dates_format => '%b %d, %Y, %I:%M:%S %p', |
4242
|
|
|
|
|
|
|
out_dates_locale => 'en')->write(); |
4243
|
|
|
|
|
|
|
|
4244
|
|
|
|
|
|
|
# Find dates of specific formats and convert it into yyyy-mm-dd |
4245
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'raw.csv', out_file => 'cooked.csv', |
4246
|
|
|
|
|
|
|
dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d'], |
4247
|
|
|
|
|
|
|
out_dates_format => '%F')->write(); |
4248
|
|
|
|
|
|
|
|
4249
|
|
|
|
|
|
|
# Take the dates on columns 'LASTLOGIN' and 'CREATIONDATE' and convert it |
4250
|
|
|
|
|
|
|
# into French dates (day/month/year). |
4251
|
|
|
|
|
|
|
# Text::AutoCSV will croak if LASTLOGIN or CREATIONDATE do not contain a |
4252
|
|
|
|
|
|
|
# DateTime format. By default, Text::AutoCSV will try 20 different formats. |
4253
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4254
|
|
|
|
|
|
|
fields_dates => ['LASTLOGIN', 'CREATIONDATE'], |
4255
|
|
|
|
|
|
|
out_dates_format => '%d/%m/%Y')->write(); |
4256
|
|
|
|
|
|
|
|
4257
|
|
|
|
|
|
|
# Convert 2 DateTime fields into unix standard epoch |
4258
|
|
|
|
|
|
|
# Write -1 if DateTime is empty. |
4259
|
|
|
|
|
|
|
sub toepoch { return $_->epoch() if $_; -1; } |
4260
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'stats.csv', out_file => 'stats-epoch.csv', |
4261
|
|
|
|
|
|
|
fields_dates => ['ATIME', 'MTIME']) |
4262
|
|
|
|
|
|
|
->in_map('ATIME', \&toepoch) |
4263
|
|
|
|
|
|
|
->in_map('MTIME', \&toepoch) |
4264
|
|
|
|
|
|
|
->write(); |
4265
|
|
|
|
|
|
|
|
4266
|
|
|
|
|
|
|
# Do the other way round from above: convert 2 fields containing unix |
4267
|
|
|
|
|
|
|
# standard epoch into a string displaying a human-readable DateTime. |
4268
|
|
|
|
|
|
|
my $formatter = DateTime::Format::Strptime->new( |
4269
|
|
|
|
|
|
|
pattern => 'DATE=%F, TIME=%T' |
4270
|
|
|
|
|
|
|
); |
4271
|
|
|
|
|
|
|
sub fromepoch { |
4272
|
|
|
|
|
|
|
return $formatter->format_datetime(DateTime->from_epoch(epoch => $_)) |
4273
|
|
|
|
|
|
|
if $_ >= 0; |
4274
|
|
|
|
|
|
|
''; |
4275
|
|
|
|
|
|
|
} |
4276
|
|
|
|
|
|
|
$csv = Text::AutoCSV->new(in_file => 'stats-epoch.csv', |
4277
|
|
|
|
|
|
|
out_file => 'stats2.csv' |
4278
|
|
|
|
|
|
|
) |
4279
|
|
|
|
|
|
|
->in_map('ATIME', \&fromepoch) |
4280
|
|
|
|
|
|
|
->in_map('MTIME', \&fromepoch) |
4281
|
|
|
|
|
|
|
->write(); |
4282
|
|
|
|
|
|
|
|
4283
|
|
|
|
|
|
|
=head2 Miscellaneous |
4284
|
|
|
|
|
|
|
|
4285
|
|
|
|
|
|
|
use Text::AutoCSV 'remove_accents'; |
4286
|
|
|
|
|
|
|
# Output 'Francais: etre elementaire, Tcheque: sluzba dum' followed by a new |
4287
|
|
|
|
|
|
|
# line. |
4288
|
|
|
|
|
|
|
print remove_accents("Français: être élémentaire, Tchèque: služba dům"), |
4289
|
|
|
|
|
|
|
"\n"; |
4290
|
|
|
|
|
|
|
|
4291
|
|
|
|
|
|
|
=head2 csvcopy.pl |
4292
|
|
|
|
|
|
|
|
4293
|
|
|
|
|
|
|
As of version 1.2.0, csvcopy.pl is installed along with Text::AutoCSV. |
4294
|
|
|
|
|
|
|
It is a command-line wrapper to Text::AutoCSV, run |
4295
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
csvcopy.pl --help |
4297
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
to get help. |
4299
|
|
|
|
|
|
|
|
4300
|
|
|
|
|
|
|
=for Pod::Coverage ERR_UNKNOWN_FIELD |
4301
|
|
|
|
|
|
|
|
4302
|
|
|
|
|
|
|
=head1 NAME |
4303
|
|
|
|
|
|
|
|
4304
|
|
|
|
|
|
|
Text::AutoCSV - helper module to automate the use of Text::CSV |
4305
|
|
|
|
|
|
|
|
4306
|
|
|
|
|
|
|
=head1 METHODS |
4307
|
|
|
|
|
|
|
|
4308
|
|
|
|
|
|
|
=head2 new |
4309
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(%attr); |
4311
|
|
|
|
|
|
|
|
4312
|
|
|
|
|
|
|
(Class method) Returns a new instance of Text::AutoCSV. The object attributes |
4313
|
|
|
|
|
|
|
are described by the hash C<%attr> (can be empty). |
4314
|
|
|
|
|
|
|
|
4315
|
|
|
|
|
|
|
Currently the following attributes are available: |
4316
|
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
=over 4 |
4318
|
|
|
|
|
|
|
|
4319
|
|
|
|
|
|
|
=item Preliminary note about L</fields_hr>, L</fields_ar> and L</fields_column_names> attributes |
4320
|
|
|
|
|
|
|
|
4321
|
|
|
|
|
|
|
By default, Text::AutoCSV assumes the input has a header and will use the field |
4322
|
|
|
|
|
|
|
values of this first line (the header) to work out the column internal names. |
4323
|
|
|
|
|
|
|
These internal names are used everywhere in Text::AutoCSV to designate columns. |
4324
|
|
|
|
|
|
|
|
4325
|
|
|
|
|
|
|
The values are transformed as follows: |
4326
|
|
|
|
|
|
|
|
4327
|
|
|
|
|
|
|
- All accents are removed using the exportable function L</remove_accents>. |
4328
|
|
|
|
|
|
|
|
4329
|
|
|
|
|
|
|
- Any non-alphanumeric character is removed (except underscore) and all letters |
4330
|
|
|
|
|
|
|
are switched to upper case. The regex to do this is |
4331
|
|
|
|
|
|
|
|
4332
|
|
|
|
|
|
|
s/[^[:alnum:]_]//gi; s/^.*$/\U$&/; |
4333
|
|
|
|
|
|
|
|
4334
|
|
|
|
|
|
|
Thus a header line of |
4335
|
|
|
|
|
|
|
|
4336
|
|
|
|
|
|
|
'Office Number 1,Office_2,Personal Number' |
4337
|
|
|
|
|
|
|
|
4338
|
|
|
|
|
|
|
will produce the internal column names |
4339
|
|
|
|
|
|
|
|
4340
|
|
|
|
|
|
|
'OFFICENUMBER1' (first column) |
4341
|
|
|
|
|
|
|
|
4342
|
|
|
|
|
|
|
'OFFICE_2' (second column) |
4343
|
|
|
|
|
|
|
|
4344
|
|
|
|
|
|
|
'PERSONALNUMBER' (third column). |
4345
|
|
|
|
|
|
|
|
4346
|
|
|
|
|
|
|
The attribute L</fields_hr>, L</fields_ar> or L</fields_column_names> (only one |
4347
|
|
|
|
|
|
|
of the three is useful at a time) allows to change this behavior. |
4348
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
B<NOTE> |
4350
|
|
|
|
|
|
|
|
4351
|
|
|
|
|
|
|
The removal of accents is *not* a conversion to us-ascii, see L</remove_accents> |
4352
|
|
|
|
|
|
|
for details. |
4353
|
|
|
|
|
|
|
|
4354
|
|
|
|
|
|
|
=item Preliminary note about fields reading |
4355
|
|
|
|
|
|
|
|
4356
|
|
|
|
|
|
|
Functions that are given a field name (L</get_cell>, L</vlookup>, |
4357
|
|
|
|
|
|
|
L</field_add_copy>, ...) raise an error if the field requested does not exist. |
4358
|
|
|
|
|
|
|
|
4359
|
|
|
|
|
|
|
B<SO WILL THE HASHREFS GIVEN BY Text::AutoCSV:> when a function returns a |
4360
|
|
|
|
|
|
|
hashref (L</search_1hr>, L</get_row_hr>, ...), the hash is locked with the |
4361
|
|
|
|
|
|
|
C<lock_keys> function of C<Hash::Util>. Any attempt to read a non-existing key |
4362
|
|
|
|
|
|
|
from the hash causes a croak. This feature is de-activated if you specified |
4363
|
|
|
|
|
|
|
C<croak_if_error =E<gt> 0> when creating Text::AutoCSV object. |
4364
|
|
|
|
|
|
|
|
4365
|
|
|
|
|
|
|
=item in_file |
4366
|
|
|
|
|
|
|
|
4367
|
|
|
|
|
|
|
The name of the file to read CSV data from. |
4368
|
|
|
|
|
|
|
|
4369
|
|
|
|
|
|
|
If not specified or empty, read standard input. |
4370
|
|
|
|
|
|
|
|
4371
|
|
|
|
|
|
|
Example: |
4372
|
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv'); |
4374
|
|
|
|
|
|
|
|
4375
|
|
|
|
|
|
|
=item inh |
4376
|
|
|
|
|
|
|
|
4377
|
|
|
|
|
|
|
File handle to read CSV data from. |
4378
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
4379
|
|
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
C<inh> is useful if you don't like the way Text::AutoCSV opens the input file |
4381
|
|
|
|
|
|
|
for you. |
4382
|
|
|
|
|
|
|
|
4383
|
|
|
|
|
|
|
Example: |
4384
|
|
|
|
|
|
|
|
4385
|
|
|
|
|
|
|
open my $inh, "producecsv.sh|"; |
4386
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(inh => $inh); |
4387
|
|
|
|
|
|
|
|
4388
|
|
|
|
|
|
|
=item encoding |
4389
|
|
|
|
|
|
|
|
4390
|
|
|
|
|
|
|
Comma-separated list of encodings to try to read input. |
4391
|
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
Note that finding the correct encoding of any given input is overkill. This |
4393
|
|
|
|
|
|
|
script just tries encodings one after the other, and selects the first one that |
4394
|
|
|
|
|
|
|
does not trigger a warning during reading of input. If all produce warnings, |
4395
|
|
|
|
|
|
|
select the first one. |
4396
|
|
|
|
|
|
|
|
4397
|
|
|
|
|
|
|
The encoding chosen is used in output, unless attribute L</out_encoding> is |
4398
|
|
|
|
|
|
|
specified. |
4399
|
|
|
|
|
|
|
|
4400
|
|
|
|
|
|
|
Value by default: 'UTF-8,latin1' |
4401
|
|
|
|
|
|
|
|
4402
|
|
|
|
|
|
|
B<IMPORTANT> |
4403
|
|
|
|
|
|
|
|
4404
|
|
|
|
|
|
|
If one tries something like C<encoding =E<gt> 'latin1,UTF-8'>, it'll almost |
4405
|
|
|
|
|
|
|
never detect UTF-8 because latin1 rarely triggers warnings during reading. It |
4406
|
|
|
|
|
|
|
tends to be also true with encodings like UTF-16 that can remain happy with |
4407
|
|
|
|
|
|
|
various inputs (sometimes resulting in Western languages turned into Chinese |
4408
|
|
|
|
|
|
|
text). |
4409
|
|
|
|
|
|
|
|
4410
|
|
|
|
|
|
|
Ultimately this attribute should be used with a unique value. The result when |
4411
|
|
|
|
|
|
|
using more than one value can produce weird results and should be considered |
4412
|
|
|
|
|
|
|
B<experimental>. |
4413
|
|
|
|
|
|
|
|
4414
|
|
|
|
|
|
|
Example: |
4415
|
|
|
|
|
|
|
|
4416
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'w.csv', encoding => 'UTF-16'); |
4417
|
|
|
|
|
|
|
|
4418
|
|
|
|
|
|
|
=item via |
4419
|
|
|
|
|
|
|
|
4420
|
|
|
|
|
|
|
Adds a C<via> to the file opening instruction performed by Text::AutoCSV. You |
4421
|
|
|
|
|
|
|
don't want to use it under normal circumstances. |
4422
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
The value should start with a ':' character (Text::AutoCSV won't add one for |
4424
|
|
|
|
|
|
|
you). |
4425
|
|
|
|
|
|
|
|
4426
|
|
|
|
|
|
|
Value by default: none |
4427
|
|
|
|
|
|
|
|
4428
|
|
|
|
|
|
|
Example: |
4429
|
|
|
|
|
|
|
|
4430
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4431
|
|
|
|
|
|
|
via => ':raw:perlio:UTF-32:crlf'); |
4432
|
|
|
|
|
|
|
|
4433
|
|
|
|
|
|
|
=item dont_mess_with_encoding |
4434
|
|
|
|
|
|
|
|
4435
|
|
|
|
|
|
|
If true, just ignore completely encoding and don't try to alter I/O operations |
4436
|
|
|
|
|
|
|
with encoding considerations (using C<binmode> instruction). Note that if inh |
4437
|
|
|
|
|
|
|
attribute is specified, then Text::AutoCSV will consider the caller manages |
4438
|
|
|
|
|
|
|
encoding for himself and dont_mess_with_encoding will be automatically set, too. |
4439
|
|
|
|
|
|
|
|
4440
|
|
|
|
|
|
|
B<IMPORTANT> |
4441
|
|
|
|
|
|
|
|
4442
|
|
|
|
|
|
|
This attribute does not mean perl will totally ignore encoding and would |
4443
|
|
|
|
|
|
|
consider character strings as bytes for example. The meaning of |
4444
|
|
|
|
|
|
|
L</dont_mess_with_encoding> is that Text::AutoCSV itself will totally ignore |
4445
|
|
|
|
|
|
|
encoding matters, and leave it entirely to Perl' default. |
4446
|
|
|
|
|
|
|
|
4447
|
|
|
|
|
|
|
Value by default: |
4448
|
|
|
|
|
|
|
|
4449
|
|
|
|
|
|
|
0 if inh attribute is not set |
4450
|
|
|
|
|
|
|
1 if inh attribute is set |
4451
|
|
|
|
|
|
|
|
4452
|
|
|
|
|
|
|
Example: |
4453
|
|
|
|
|
|
|
|
4454
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4455
|
|
|
|
|
|
|
dont_mess_with_encoding => 1); |
4456
|
|
|
|
|
|
|
|
4457
|
|
|
|
|
|
|
=item sep_char |
4458
|
|
|
|
|
|
|
|
4459
|
|
|
|
|
|
|
Specify the CSV separator character. Turns off separator auto-detection. This |
4460
|
|
|
|
|
|
|
attribute is passed as is to C<Text::CSV-E<gt>new()>. |
4461
|
|
|
|
|
|
|
|
4462
|
|
|
|
|
|
|
Example: |
4463
|
|
|
|
|
|
|
|
4464
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', sep_char => ';'); |
4465
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
=item quote_char |
4467
|
|
|
|
|
|
|
|
4468
|
|
|
|
|
|
|
Specify the field quote character. This attribute is passed as is to |
4469
|
|
|
|
|
|
|
C<Text::CSV-E<gt>new()>. |
4470
|
|
|
|
|
|
|
|
4471
|
|
|
|
|
|
|
Value by default: double quote ('"') |
4472
|
|
|
|
|
|
|
|
4473
|
|
|
|
|
|
|
Example: |
4474
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', quote_char => '\''); |
4476
|
|
|
|
|
|
|
|
4477
|
|
|
|
|
|
|
=item escape_char |
4478
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
Specify the escape character. Turns off escape character auto-detection. This |
4480
|
|
|
|
|
|
|
attribute is passed as is to C<Text::CSV-E<gt>new()>. |
4481
|
|
|
|
|
|
|
|
4482
|
|
|
|
|
|
|
Value by default: backslash ('\\') |
4483
|
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
Example: |
4485
|
|
|
|
|
|
|
|
4486
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', escape_char => '"'); |
4487
|
|
|
|
|
|
|
|
4488
|
|
|
|
|
|
|
=item in_csvobj |
4489
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
Text::CSV object to use. |
4491
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
4492
|
|
|
|
|
|
|
|
4493
|
|
|
|
|
|
|
By default, Text::AutoCSV will manage creating such an object and will work hard |
4494
|
|
|
|
|
|
|
to detect the parameters it requires. |
4495
|
|
|
|
|
|
|
|
4496
|
|
|
|
|
|
|
Defining C<in_csvobj> attribute turns off separator character and escape |
4497
|
|
|
|
|
|
|
character auto-detection. |
4498
|
|
|
|
|
|
|
|
4499
|
|
|
|
|
|
|
Using this attribute workarounds Text::AutoCSV philosophy a bit, but you may |
4500
|
|
|
|
|
|
|
need it in case Text::AutoCSV behavior is not suitable for Text::CSV creation. |
4501
|
|
|
|
|
|
|
|
4502
|
|
|
|
|
|
|
Example: |
4503
|
|
|
|
|
|
|
|
4504
|
|
|
|
|
|
|
my $tcsv = Text::CSV->new(); |
4505
|
|
|
|
|
|
|
my $acsv = Text::AutoCSV->new(in_file => 'in.csv', in_csvobj => $tcsv); |
4506
|
|
|
|
|
|
|
|
4507
|
|
|
|
|
|
|
=item has_headers |
4508
|
|
|
|
|
|
|
|
4509
|
|
|
|
|
|
|
If true, Text::AutoCSV assumes the input has a header line. |
4510
|
|
|
|
|
|
|
|
4511
|
|
|
|
|
|
|
Value by default: 1 |
4512
|
|
|
|
|
|
|
|
4513
|
|
|
|
|
|
|
Example: |
4514
|
|
|
|
|
|
|
|
4515
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0); |
4516
|
|
|
|
|
|
|
|
4517
|
|
|
|
|
|
|
=item fields_hr |
4518
|
|
|
|
|
|
|
|
4519
|
|
|
|
|
|
|
(Only if input has a header line) Hash ref that contains column internal names |
4520
|
|
|
|
|
|
|
along with a regular expression to find it in the header line. |
4521
|
|
|
|
|
|
|
For example if you have: |
4522
|
|
|
|
|
|
|
|
4523
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4524
|
|
|
|
|
|
|
fields_hr => {'PHONE OFFICE' => '^office phone nu', |
4525
|
|
|
|
|
|
|
'PHONE PERSONAL' => '^personal phone nu'}); |
4526
|
|
|
|
|
|
|
|
4527
|
|
|
|
|
|
|
And the header line is |
4528
|
|
|
|
|
|
|
|
4529
|
|
|
|
|
|
|
'Personal Phone Number,Office Phone Number' |
4530
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
the column name 'PHONE OFFICE' will designate the second column and the column |
4532
|
|
|
|
|
|
|
name 'PHONE PERSONAL' will designate the first column. |
4533
|
|
|
|
|
|
|
|
4534
|
|
|
|
|
|
|
You can choose column names like 'Phone Office' and 'Phone Personal' as well. |
4535
|
|
|
|
|
|
|
|
4536
|
|
|
|
|
|
|
The regex search is case insensitive. |
4537
|
|
|
|
|
|
|
|
4538
|
|
|
|
|
|
|
=item fields_ar |
4539
|
|
|
|
|
|
|
|
4540
|
|
|
|
|
|
|
(Only if input has a header line) Array ref that contains column internal names. |
4541
|
|
|
|
|
|
|
The array is used to create a hash ref of the same kind as L</fields_hr>, by |
4542
|
|
|
|
|
|
|
wrapping the column name in a regex. The names are surrounded by a leading '^' |
4543
|
|
|
|
|
|
|
and a trailing '$', meaning, the name must match the entire field name. |
4544
|
|
|
|
|
|
|
|
4545
|
|
|
|
|
|
|
For example |
4546
|
|
|
|
|
|
|
|
4547
|
|
|
|
|
|
|
fields_ar => ['OFFICENUMBER', 'PERSONALNUMBER'] |
4548
|
|
|
|
|
|
|
|
4549
|
|
|
|
|
|
|
is strictly equivalent to |
4550
|
|
|
|
|
|
|
|
4551
|
|
|
|
|
|
|
fields_hr => {'OFFICENUMBER' => '^officenumber$', |
4552
|
|
|
|
|
|
|
'PERSONALNUMBER' = '^personalnumber$'} |
4553
|
|
|
|
|
|
|
|
4554
|
|
|
|
|
|
|
The regex search is case insensitive. |
4555
|
|
|
|
|
|
|
|
4556
|
|
|
|
|
|
|
C<fields_ar> is useful if the internal names are identical to the file column |
4557
|
|
|
|
|
|
|
names. It avoids repeating the names over and over as would happen if using |
4558
|
|
|
|
|
|
|
L</fields_hr> attribute. |
4559
|
|
|
|
|
|
|
|
4560
|
|
|
|
|
|
|
I<NOTE> |
4561
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
You might wonder why using fields_ar as opposed to Text::AutoCSV default' |
4563
|
|
|
|
|
|
|
mechanism. There are two reasons for that: |
4564
|
|
|
|
|
|
|
|
4565
|
|
|
|
|
|
|
1- Text::AutoCSV removes spaces from column names, and some people may want |
4566
|
|
|
|
|
|
|
another behavior. A header name of 'Phone Number' will get an internal column |
4567
|
|
|
|
|
|
|
name of 'PHONENUMBER' (default behavior, if none of fields_hr, fields_ar and |
4568
|
|
|
|
|
|
|
fields_column_names attributes is specified), and one may prefer 'PHONE NUMBER' |
4569
|
|
|
|
|
|
|
or 'phone number' or whatsoever. |
4570
|
|
|
|
|
|
|
|
4571
|
|
|
|
|
|
|
2- By specifying a list of columns using either of fields_hr or fields_ar, you |
4572
|
|
|
|
|
|
|
not only map column names as found in the header line to internal column names: |
4573
|
|
|
|
|
|
|
you also I<request> these columns to be available. If one of the requested |
4574
|
|
|
|
|
|
|
columns cannot be found, Text::AutoCSV will croak (default) or print an error |
4575
|
|
|
|
|
|
|
and return an undef object (if created with C<croak_if_error =E<gt> 0>). |
4576
|
|
|
|
|
|
|
|
4577
|
|
|
|
|
|
|
=item fields_column_names |
4578
|
|
|
|
|
|
|
|
4579
|
|
|
|
|
|
|
Array ref of column internal names, in the order of columns in file. This |
4580
|
|
|
|
|
|
|
attribute works like the C<column_names> attribute of Text::CSV. It'll just |
4581
|
|
|
|
|
|
|
assign names to columns one by one, regardless of what the header line contains. |
4582
|
|
|
|
|
|
|
It'll work also if the file has no header line. |
4583
|
|
|
|
|
|
|
|
4584
|
|
|
|
|
|
|
Example: |
4585
|
|
|
|
|
|
|
|
4586
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4587
|
|
|
|
|
|
|
fields_column_names => ['My COL1', '', 'My COL3']); |
4588
|
|
|
|
|
|
|
|
4589
|
|
|
|
|
|
|
=item out_file |
4590
|
|
|
|
|
|
|
|
4591
|
|
|
|
|
|
|
Output file when executing the L</write> method. |
4592
|
|
|
|
|
|
|
|
4593
|
|
|
|
|
|
|
If not specified or empty, write to standard output. |
4594
|
|
|
|
|
|
|
|
4595
|
|
|
|
|
|
|
Example: |
4596
|
|
|
|
|
|
|
|
4597
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv'); |
4598
|
|
|
|
|
|
|
|
4599
|
|
|
|
|
|
|
=item outh |
4600
|
|
|
|
|
|
|
|
4601
|
|
|
|
|
|
|
File handle to write CSV data to when executing the L</write> method. |
4602
|
|
|
|
|
|
|
Normally you don't want to specify this attribute. |
4603
|
|
|
|
|
|
|
|
4604
|
|
|
|
|
|
|
C<outh> is useful if you don't like the way Text::AutoCSV opens the output file |
4605
|
|
|
|
|
|
|
for you. |
4606
|
|
|
|
|
|
|
|
4607
|
|
|
|
|
|
|
Example: |
4608
|
|
|
|
|
|
|
|
4609
|
|
|
|
|
|
|
my $outh = open "myin.csv', ">>"; |
4610
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0, |
4611
|
|
|
|
|
|
|
outh => $outh); |
4612
|
|
|
|
|
|
|
|
4613
|
|
|
|
|
|
|
=item out_encoding |
4614
|
|
|
|
|
|
|
|
4615
|
|
|
|
|
|
|
Enforce the encoding of output. |
4616
|
|
|
|
|
|
|
|
4617
|
|
|
|
|
|
|
Value by default: input encoding |
4618
|
|
|
|
|
|
|
|
4619
|
|
|
|
|
|
|
Example: |
4620
|
|
|
|
|
|
|
|
4621
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4622
|
|
|
|
|
|
|
out_encoding => 'UTF-16'); |
4623
|
|
|
|
|
|
|
|
4624
|
|
|
|
|
|
|
=item out_utf8_bom |
4625
|
|
|
|
|
|
|
|
4626
|
|
|
|
|
|
|
Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is |
4627
|
|
|
|
|
|
|
not UTF-8, this attribute is ignored. |
4628
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
B<NOTE> |
4630
|
|
|
|
|
|
|
|
4631
|
|
|
|
|
|
|
UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice, |
4632
|
|
|
|
|
|
|
UTF8-encoded files rarely have a BOM. |
4633
|
|
|
|
|
|
|
|
4634
|
|
|
|
|
|
|
Using this attribute is not recommended. It is provided for the sake of |
4635
|
|
|
|
|
|
|
completeness, and also to produce Unicode files Microsoft EXCEL will be happy to |
4636
|
|
|
|
|
|
|
read. |
4637
|
|
|
|
|
|
|
|
4638
|
|
|
|
|
|
|
At first sight it would seem more logical to make EXCEL happy with something |
4639
|
|
|
|
|
|
|
like this: |
4640
|
|
|
|
|
|
|
|
4641
|
|
|
|
|
|
|
out_encoding => 'UTF-16' |
4642
|
|
|
|
|
|
|
|
4643
|
|
|
|
|
|
|
But... While EXCEL will identify UTF-16 and read it as such, it will not take |
4644
|
|
|
|
|
|
|
into account the BOM found at the beginning. In the end the first cell will have |
4645
|
|
|
|
|
|
|
2 useless characters prepended. The only solution the author knows to workaround |
4646
|
|
|
|
|
|
|
this issue if to use UTF-8 as output encoding, and enforce a BOM. That is, use: |
4647
|
|
|
|
|
|
|
|
4648
|
|
|
|
|
|
|
..., out_encoding => 'UTF-8', out_utf8_bom => 1, ... |
4649
|
|
|
|
|
|
|
|
4650
|
|
|
|
|
|
|
=item out_sep_char |
4651
|
|
|
|
|
|
|
|
4652
|
|
|
|
|
|
|
Enforce the output CSV separator character. |
4653
|
|
|
|
|
|
|
|
4654
|
|
|
|
|
|
|
Value by default: input separator |
4655
|
|
|
|
|
|
|
|
4656
|
|
|
|
|
|
|
Example: |
4657
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4659
|
|
|
|
|
|
|
out_sep_char => ','); |
4660
|
|
|
|
|
|
|
|
4661
|
|
|
|
|
|
|
=item out_quote_char |
4662
|
|
|
|
|
|
|
|
4663
|
|
|
|
|
|
|
Enforce the output CSV quote character. |
4664
|
|
|
|
|
|
|
|
4665
|
|
|
|
|
|
|
Value by default: input quote character |
4666
|
|
|
|
|
|
|
|
4667
|
|
|
|
|
|
|
Example: |
4668
|
|
|
|
|
|
|
|
4669
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4670
|
|
|
|
|
|
|
out_quote_char => '"'); |
4671
|
|
|
|
|
|
|
|
4672
|
|
|
|
|
|
|
=item out_escape_char |
4673
|
|
|
|
|
|
|
|
4674
|
|
|
|
|
|
|
Enforce the output CSV escape character. |
4675
|
|
|
|
|
|
|
|
4676
|
|
|
|
|
|
|
Value by default: input escape character |
4677
|
|
|
|
|
|
|
|
4678
|
|
|
|
|
|
|
Example: |
4679
|
|
|
|
|
|
|
|
4680
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4681
|
|
|
|
|
|
|
out_escape_char_char => '\\'); |
4682
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
=item out_always_quote |
4684
|
|
|
|
|
|
|
|
4685
|
|
|
|
|
|
|
If true, quote all fields of output (set always_quote of Text::CSV). |
4686
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
If false, don't quote all fields of output (don't set C<always_quote> of |
4688
|
|
|
|
|
|
|
Text::CSV). |
4689
|
|
|
|
|
|
|
|
4690
|
|
|
|
|
|
|
Value by default: same as what is found in input |
4691
|
|
|
|
|
|
|
|
4692
|
|
|
|
|
|
|
While reading input, Text::AutoCSV works out whether or not all fields were |
4693
|
|
|
|
|
|
|
quoted. If yes, then the output Text::CSV object has the always_quote attribute |
4694
|
|
|
|
|
|
|
set, if no, then the output Text::CSV object does not have this attribute set. |
4695
|
|
|
|
|
|
|
|
4696
|
|
|
|
|
|
|
Example: |
4697
|
|
|
|
|
|
|
|
4698
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
4699
|
|
|
|
|
|
|
out_always_quote => 1); |
4700
|
|
|
|
|
|
|
|
4701
|
|
|
|
|
|
|
=item out_has_headers |
4702
|
|
|
|
|
|
|
|
4703
|
|
|
|
|
|
|
If true, when writing output, write a header line on first line. |
4704
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
If false, when writing output, don't write a header line on first line. |
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
Value by default: same as has_headers attribute |
4708
|
|
|
|
|
|
|
|
4709
|
|
|
|
|
|
|
Example 1 |
4710
|
|
|
|
|
|
|
|
4711
|
|
|
|
|
|
|
Read standard input and write to standard output, removing the header line. |
4712
|
|
|
|
|
|
|
|
4713
|
|
|
|
|
|
|
Text::AutoCSV->new(out_has_headers => 0)->write(); |
4714
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
Example 2 |
4716
|
|
|
|
|
|
|
|
4717
|
|
|
|
|
|
|
Read standard input and write to standard output, adding a header line. |
4718
|
|
|
|
|
|
|
|
4719
|
|
|
|
|
|
|
Text::AutoCSV->new(fields_column_names => ['MYCOL1', 'MYCOL2'], |
4720
|
|
|
|
|
|
|
out_has_headers => 1)->write(); |
4721
|
|
|
|
|
|
|
|
4722
|
|
|
|
|
|
|
=item no_undef |
4723
|
|
|
|
|
|
|
|
4724
|
|
|
|
|
|
|
If true, non-existent column values are set to an empty string instead of undef. |
4725
|
|
|
|
|
|
|
It is also done on extra fields that happen to have an undef value (for example |
4726
|
|
|
|
|
|
|
when the target of a linked field is not found). |
4727
|
|
|
|
|
|
|
|
4728
|
|
|
|
|
|
|
Note this attribute does not work on callback functions output set with |
4729
|
|
|
|
|
|
|
L</in_map>: for example empty DateTime values (on fields identified as |
4730
|
|
|
|
|
|
|
containing a date/time, see C<dates_*> attributes below) are set to C<undef>, |
4731
|
|
|
|
|
|
|
even while C<no_undef> is set. Indeed setting it to an empty string while |
4732
|
|
|
|
|
|
|
non-empty values would contain a Datetime object would not be clean. An empty |
4733
|
|
|
|
|
|
|
value in a placeholder containing an object must be undef. |
4734
|
|
|
|
|
|
|
|
4735
|
|
|
|
|
|
|
Since version 1.1.5 of Text::AutoCSV, C<no_undef> is examined when sending |
4736
|
|
|
|
|
|
|
parameter ($_) to L</in_map> callback: an undef value is now passed as is (as |
4737
|
|
|
|
|
|
|
undef), unless C<no_undef> is set. If C<no_undef> is set, and field value is |
4738
|
|
|
|
|
|
|
undef, then $_ is set to the empty string ('') when calling callback defined by |
4739
|
|
|
|
|
|
|
L</in_map>. This new behavior was put in place to be consistent with what is |
4740
|
|
|
|
|
|
|
being done with DateTime values. |
4741
|
|
|
|
|
|
|
|
4742
|
|
|
|
|
|
|
Value by default: 0 |
4743
|
|
|
|
|
|
|
|
4744
|
|
|
|
|
|
|
Example: |
4745
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', no_undef => 1); |
4747
|
|
|
|
|
|
|
|
4748
|
|
|
|
|
|
|
=item read_post_update_hr |
4749
|
|
|
|
|
|
|
|
4750
|
|
|
|
|
|
|
To be set to a ref sub. Each time a record is read from input, call |
4751
|
|
|
|
|
|
|
C<read_post_update_hr> to update the hash ref of the record. The sub is called |
4752
|
|
|
|
|
|
|
with 2 arguments: the hash ref to the record value and the hash ref to stats. |
4753
|
|
|
|
|
|
|
|
4754
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case |
4755
|
|
|
|
|
|
|
Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>). |
4756
|
|
|
|
|
|
|
|
4757
|
|
|
|
|
|
|
For example, the C<read_post_update_hr> below will turn column 'CITY' values in |
4758
|
|
|
|
|
|
|
upper case and count occurences of empty cities in stat display: |
4759
|
|
|
|
|
|
|
|
4760
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'addresses.csv', |
4761
|
|
|
|
|
|
|
read_post_update_hr => \&updt, verbose => 1) |
4762
|
|
|
|
|
|
|
->write(); |
4763
|
|
|
|
|
|
|
sub updt { |
4764
|
|
|
|
|
|
|
my ($hr, $stats) = @_; |
4765
|
|
|
|
|
|
|
$hr->{'CITY'} =~ s/^.*$/\U$&/; |
4766
|
|
|
|
|
|
|
$stats->{'empty city encountered'}++ if $hr->{'CITY'} eq ''; |
4767
|
|
|
|
|
|
|
} |
4768
|
|
|
|
|
|
|
|
4769
|
|
|
|
|
|
|
B<IMPORTANT> |
4770
|
|
|
|
|
|
|
|
4771
|
|
|
|
|
|
|
You cannot create a field this way. To create a field, you have to use the |
4772
|
|
|
|
|
|
|
member functions L</field_add_link>, L</field_add_copy> or |
4773
|
|
|
|
|
|
|
L</field_add_computed>. |
4774
|
|
|
|
|
|
|
|
4775
|
|
|
|
|
|
|
B<NOTE> |
4776
|
|
|
|
|
|
|
|
4777
|
|
|
|
|
|
|
If you wish to manage some updates at field level, consider registering update |
4778
|
|
|
|
|
|
|
functions with L</in_map> and L</out_map> member functions. These functions |
4779
|
|
|
|
|
|
|
register callbacks that work at field level and with $_ variable (thus the |
4780
|
|
|
|
|
|
|
callback function invoked is AutoCSV-agnostic). |
4781
|
|
|
|
|
|
|
|
4782
|
|
|
|
|
|
|
L</in_map> updates a field after read, L</out_map> updates the field content |
4783
|
|
|
|
|
|
|
before writing it. |
4784
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
=item walker_hr |
4786
|
|
|
|
|
|
|
|
4787
|
|
|
|
|
|
|
To set to a sub ref that'll be executed each time a record is read from input. |
4788
|
|
|
|
|
|
|
It is executed after L</read_post_update_hr>. The sub is called with 2 |
4789
|
|
|
|
|
|
|
arguments: the hash ref to the record value and the hash ref to stats. |
4790
|
|
|
|
|
|
|
|
4791
|
|
|
|
|
|
|
Note L</read_post_update_hr> is meant for updating record fields just after |
4792
|
|
|
|
|
|
|
reading, whereas L</walker_hr> is read-only. |
4793
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case |
4795
|
|
|
|
|
|
|
Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>). If the |
4796
|
|
|
|
|
|
|
L</verbose> attribute is not set, the stats are not displayed, however you can |
4797
|
|
|
|
|
|
|
get stats by calling the get_stats function. |
4798
|
|
|
|
|
|
|
|
4799
|
|
|
|
|
|
|
The example below will count in the stats the number of records where the 'CITY' |
4800
|
|
|
|
|
|
|
field is empty. Thanks to C<verbose =E<gt> 1> attribute, at the end of reading |
4801
|
|
|
|
|
|
|
the stats are displayed. |
4802
|
|
|
|
|
|
|
|
4803
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', |
4804
|
|
|
|
|
|
|
walker_hr => \&walk1, verbose => 1)->read(); |
4805
|
|
|
|
|
|
|
sub walk1 { |
4806
|
|
|
|
|
|
|
my ($hr, $stats) = @_; |
4807
|
|
|
|
|
|
|
$stats->{'empty city'}++ if $hr->{'CITY'} eq ''; |
4808
|
|
|
|
|
|
|
} |
4809
|
|
|
|
|
|
|
|
4810
|
|
|
|
|
|
|
=item walker_ar |
4811
|
|
|
|
|
|
|
|
4812
|
|
|
|
|
|
|
To set to a sub ref that'll be executed each time a record is read from input. |
4813
|
|
|
|
|
|
|
It is executed after L</read_post_update_hr>. The sub is called with 2 |
4814
|
|
|
|
|
|
|
arguments: the array ref to the record value and the hash ref to stats. |
4815
|
|
|
|
|
|
|
|
4816
|
|
|
|
|
|
|
Note L</read_post_update_hr> is meant for updating record fields just after |
4817
|
|
|
|
|
|
|
reading, whereas C<walker_hr> is read-only. |
4818
|
|
|
|
|
|
|
|
4819
|
|
|
|
|
|
|
The stats allow to count events and are printed in the end of reading in case |
4820
|
|
|
|
|
|
|
Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>). If the |
4821
|
|
|
|
|
|
|
L</verbose> attribute is not set, the stats are lost. |
4822
|
|
|
|
|
|
|
|
4823
|
|
|
|
|
|
|
The array ref contains values in their natural order in the CSV. To be used with |
4824
|
|
|
|
|
|
|
the column names, you have to use L</get_fields_names> member function. |
4825
|
|
|
|
|
|
|
|
4826
|
|
|
|
|
|
|
The example below will count in the stats the number of records where the 'CITY' |
4827
|
|
|
|
|
|
|
field is empty. Thanks to C<verbose =E<gt> 1> attribute, at the end of reading |
4828
|
|
|
|
|
|
|
the stats are displayed. It produces the exact same result as the example in |
4829
|
|
|
|
|
|
|
walker_hr attribute, but it uses walker_ar. |
4830
|
|
|
|
|
|
|
|
4831
|
|
|
|
|
|
|
use List::MoreUtils qw(first_index); |
4832
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', |
4833
|
|
|
|
|
|
|
walker_ar => \&walk2, verbose => 1); |
4834
|
|
|
|
|
|
|
my @cols = $csv->get_fields_names(); |
4835
|
|
|
|
|
|
|
my $idxCITY = first_index { /^city$/i } @cols; |
4836
|
|
|
|
|
|
|
die "No city field!??" if $idxCITY < 0; |
4837
|
|
|
|
|
|
|
$csv->read(); |
4838
|
|
|
|
|
|
|
sub walk2 { |
4839
|
|
|
|
|
|
|
my ($ar, $stats) = @_; |
4840
|
|
|
|
|
|
|
$stats->{'empty city'}++ if $ar->[$idxCITY] eq ''; |
4841
|
|
|
|
|
|
|
} |
4842
|
|
|
|
|
|
|
|
4843
|
|
|
|
|
|
|
=item write_filter_hr |
4844
|
|
|
|
|
|
|
|
4845
|
|
|
|
|
|
|
Alias of L</out_filter>. |
4846
|
|
|
|
|
|
|
|
4847
|
|
|
|
|
|
|
=item out_filter |
4848
|
|
|
|
|
|
|
|
4849
|
|
|
|
|
|
|
To be set to a ref sub. Before writing a record to output, C<out_filter> is |
4850
|
|
|
|
|
|
|
called and the record gets writen only if C<out_filter> return value is true. |
4851
|
|
|
|
|
|
|
The sub is called with 1 argument: the hash ref to the record value. |
4852
|
|
|
|
|
|
|
|
4853
|
|
|
|
|
|
|
For example, if you want to output only records where the 'CITY' column value is |
4854
|
|
|
|
|
|
|
Grenoble: |
4855
|
|
|
|
|
|
|
|
4856
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'addresses.csv', out_file => 'grenoble.csv', |
4857
|
|
|
|
|
|
|
out_filter => \&filt)->write(); |
4858
|
|
|
|
|
|
|
sub filt { |
4859
|
|
|
|
|
|
|
my $hr = shift; |
4860
|
|
|
|
|
|
|
return 1 if $hr->{'CITY'} =~ /^grenoble$/i; |
4861
|
|
|
|
|
|
|
return 0; |
4862
|
|
|
|
|
|
|
} |
4863
|
|
|
|
|
|
|
|
4864
|
|
|
|
|
|
|
=item write_fields |
4865
|
|
|
|
|
|
|
|
4866
|
|
|
|
|
|
|
Alias of L</out_fields>. |
4867
|
|
|
|
|
|
|
|
4868
|
|
|
|
|
|
|
=item out_fields |
4869
|
|
|
|
|
|
|
|
4870
|
|
|
|
|
|
|
Set to an array ref. List fields to write to output. |
4871
|
|
|
|
|
|
|
|
4872
|
|
|
|
|
|
|
Fields are written in their order in the array ref, the first CSV column being |
4873
|
|
|
|
|
|
|
the first element in the array, and so on. Fields not listed in B<out_fields> |
4874
|
|
|
|
|
|
|
are not written in output. |
4875
|
|
|
|
|
|
|
|
4876
|
|
|
|
|
|
|
You can use empty field names to have empty columns in output. |
4877
|
|
|
|
|
|
|
|
4878
|
|
|
|
|
|
|
Value by default: none, meaning, all fields are output in their natural order. |
4879
|
|
|
|
|
|
|
What is natural order? It is the input order for fields that were read from |
4880
|
|
|
|
|
|
|
input, and the order in which they got created for created fields. |
4881
|
|
|
|
|
|
|
|
4882
|
|
|
|
|
|
|
Example: |
4883
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'allinfos.csv', |
4885
|
|
|
|
|
|
|
out_file => 'only-addresses.csv', out_fields => [ 'NAME', 'ADDRESS' ] |
4886
|
|
|
|
|
|
|
)->write(); |
4887
|
|
|
|
|
|
|
|
4888
|
|
|
|
|
|
|
=item out_orderby |
4889
|
|
|
|
|
|
|
|
4890
|
|
|
|
|
|
|
Array reference to a list of fields to sort output with. |
4891
|
|
|
|
|
|
|
|
4892
|
|
|
|
|
|
|
At the moment this feature is a bit of a hack (no option to make sort descending |
4893
|
|
|
|
|
|
|
or ascending, numeric or text, and it is not part of test plan). |
4894
|
|
|
|
|
|
|
|
4895
|
|
|
|
|
|
|
Example: |
4896
|
|
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'names.csv', out_file => 'sortednames.csv', |
4898
|
|
|
|
|
|
|
out_orderby => [ 'LASTNAME', 'FIRSTNAME']); |
4899
|
|
|
|
|
|
|
|
4900
|
|
|
|
|
|
|
=item search_case |
4901
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
If true, searches are case sensitive by default. Searches are done by the member |
4903
|
|
|
|
|
|
|
functions L</search>, L</search_1hr>, L</vlookup>, and linked fields |
4904
|
|
|
|
|
|
|
(L</field_add_link>). |
4905
|
|
|
|
|
|
|
|
4906
|
|
|
|
|
|
|
The search functions can also be called with the option L</case>, that takes |
4907
|
|
|
|
|
|
|
precedence over the object-level C<search_case> attribute value. See L</vlookup> |
4908
|
|
|
|
|
|
|
help. |
4909
|
|
|
|
|
|
|
|
4910
|
|
|
|
|
|
|
Value by default: 0 (by default searches are case insensitive) |
4911
|
|
|
|
|
|
|
|
4912
|
|
|
|
|
|
|
Example: |
4913
|
|
|
|
|
|
|
|
4914
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_case => 1); |
4915
|
|
|
|
|
|
|
|
4916
|
|
|
|
|
|
|
=item search_trim |
4917
|
|
|
|
|
|
|
|
4918
|
|
|
|
|
|
|
If true, searches ignore the presence of leading or trailing spaces in values. |
4919
|
|
|
|
|
|
|
|
4920
|
|
|
|
|
|
|
The search functions can also be called with the option L</trim>, that takes |
4921
|
|
|
|
|
|
|
precedence over the object-level C<search_trim> attribute value. See L</vlookup> |
4922
|
|
|
|
|
|
|
help. |
4923
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
Value by default: 1 (by default searches ignore leading and trailing spaces) |
4925
|
|
|
|
|
|
|
|
4926
|
|
|
|
|
|
|
Example: |
4927
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_trim => 0); |
4929
|
|
|
|
|
|
|
|
4930
|
|
|
|
|
|
|
=item search_ignore_empty |
4931
|
|
|
|
|
|
|
|
4932
|
|
|
|
|
|
|
If true, empty fields are not included in the search indexes. |
4933
|
|
|
|
|
|
|
|
4934
|
|
|
|
|
|
|
The search functions can also be called with the option L</ignore_empty>, that |
4935
|
|
|
|
|
|
|
takes precedence over the object-level C<search_ignore_empty> attribute value. |
4936
|
|
|
|
|
|
|
See L</vlookup> help. |
4937
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
Value by default: 1 (by default, search of the value '' will find nothing) |
4939
|
|
|
|
|
|
|
|
4940
|
|
|
|
|
|
|
Example: |
4941
|
|
|
|
|
|
|
|
4942
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_empty => 0); |
4943
|
|
|
|
|
|
|
|
4944
|
|
|
|
|
|
|
=item search_ignore_accents |
4945
|
|
|
|
|
|
|
|
4946
|
|
|
|
|
|
|
If true, accents are ignored by search indexes. |
4947
|
|
|
|
|
|
|
|
4948
|
|
|
|
|
|
|
The search functions can also be called with the option L</ignore_accents>, that |
4949
|
|
|
|
|
|
|
takes precedence over the object-level C<search_ignore_accents> attribute value. |
4950
|
|
|
|
|
|
|
See L</vlookup> help. |
4951
|
|
|
|
|
|
|
|
4952
|
|
|
|
|
|
|
Value by default: 1 (by default, accents are ignored by search functions) |
4953
|
|
|
|
|
|
|
|
4954
|
|
|
|
|
|
|
Example: |
4955
|
|
|
|
|
|
|
|
4956
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', |
4957
|
|
|
|
|
|
|
search_ignore_accents => 0); |
4958
|
|
|
|
|
|
|
|
4959
|
|
|
|
|
|
|
=item search_value_if_not_found |
4960
|
|
|
|
|
|
|
|
4961
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member |
4962
|
|
|
|
|
|
|
function behavior or return value of vlookup), default value of option |
4963
|
|
|
|
|
|
|
L</value_if_not_found>. See L</vlookup>. |
4964
|
|
|
|
|
|
|
|
4965
|
|
|
|
|
|
|
=item search_value_if_found |
4966
|
|
|
|
|
|
|
|
4967
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member |
4968
|
|
|
|
|
|
|
function behavior or return value of vlookup), default value of option |
4969
|
|
|
|
|
|
|
L</value_if_found>. See L</vlookup>. |
4970
|
|
|
|
|
|
|
|
4971
|
|
|
|
|
|
|
B<IMPORTANT> |
4972
|
|
|
|
|
|
|
|
4973
|
|
|
|
|
|
|
This attribute is extremly unusual. Once you've provided it, all vlookups and |
4974
|
|
|
|
|
|
|
the target field value of fields created with field_add_link will all be |
4975
|
|
|
|
|
|
|
populated with the value provided with this option. |
4976
|
|
|
|
|
|
|
|
4977
|
|
|
|
|
|
|
Don't use it unless you know what you are doing. |
4978
|
|
|
|
|
|
|
|
4979
|
|
|
|
|
|
|
=item search_ignore_ambiguous |
4980
|
|
|
|
|
|
|
|
4981
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member |
4982
|
|
|
|
|
|
|
function behavior or return value of search_1hr and vlookup), default value of |
4983
|
|
|
|
|
|
|
option L</ignore_ambiguous>. See L</vlookup>. |
4984
|
|
|
|
|
|
|
|
4985
|
|
|
|
|
|
|
=item search_value_if_ambiguous |
4986
|
|
|
|
|
|
|
|
4987
|
|
|
|
|
|
|
When a search is done with a unique value to return (field_add_link member |
4988
|
|
|
|
|
|
|
function behavior or return value of vlookup), default value of option |
4989
|
|
|
|
|
|
|
L</value_if_ambiguous>. See L</vlookup>. |
4990
|
|
|
|
|
|
|
|
4991
|
|
|
|
|
|
|
=item fields_dates |
4992
|
|
|
|
|
|
|
|
4993
|
|
|
|
|
|
|
Array ref of field names that contain a date. |
4994
|
|
|
|
|
|
|
|
4995
|
|
|
|
|
|
|
Once the formats of these fields is known (auto-detection by default), each of |
4996
|
|
|
|
|
|
|
these fields will get a specific L</in_map> sub that converts the text in a |
4997
|
|
|
|
|
|
|
DateTime object and a L</out_map> sub that converts back from DateTime to text. |
4998
|
|
|
|
|
|
|
|
4999
|
|
|
|
|
|
|
B<NOTE> |
5000
|
|
|
|
|
|
|
|
5001
|
|
|
|
|
|
|
The L</out_map> given to a DateTime field is "defensive code": normally, |
5002
|
|
|
|
|
|
|
L</in_map> converts text into a DateTime object and L</out_map> does the |
5003
|
|
|
|
|
|
|
opposite, it takes a DateTime object and converts it to text. If ever |
5004
|
|
|
|
|
|
|
L</out_map> encounters a value that is not a DateTime object, it'll just |
5005
|
|
|
|
|
|
|
stringify it (evaluation in a string context), without calling its DateTime |
5006
|
|
|
|
|
|
|
formatter. |
5007
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
If the format cannot be detected for a given field, output an error message and |
5009
|
|
|
|
|
|
|
as always when an error occurs, croak (unless L</croak_if_error> got set to 0). |
5010
|
|
|
|
|
|
|
|
5011
|
|
|
|
|
|
|
Value by default: none |
5012
|
|
|
|
|
|
|
|
5013
|
|
|
|
|
|
|
Example: |
5014
|
|
|
|
|
|
|
|
5015
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5016
|
|
|
|
|
|
|
fields_dates => ['LASTLOGIN', 'CREATIONDATE']); |
5017
|
|
|
|
|
|
|
|
5018
|
|
|
|
|
|
|
=item fields_dates_auto |
5019
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
Boolean value. If set to 1, will detect dates formats on all fields. Fields in |
5021
|
|
|
|
|
|
|
which a DateTime format got detected are then managed as if they had been being |
5022
|
|
|
|
|
|
|
listed in L</fields_dates> attribute: they get an appropriate L</in_map> sub and |
5023
|
|
|
|
|
|
|
a L</out_map> sub to convert to and from DateTime (see L</fields_dates> |
5024
|
|
|
|
|
|
|
attribute above). |
5025
|
|
|
|
|
|
|
|
5026
|
|
|
|
|
|
|
C<fields_dates_auto> looks for DateTime on all fields, but it expects nothing: |
5027
|
|
|
|
|
|
|
it won't raise an error if no field is found that contains DateTime. |
5028
|
|
|
|
|
|
|
|
5029
|
|
|
|
|
|
|
Value by default: 0 |
5030
|
|
|
|
|
|
|
|
5031
|
|
|
|
|
|
|
Example: |
5032
|
|
|
|
|
|
|
|
5033
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5034
|
|
|
|
|
|
|
fields_dates_auto => 1); |
5035
|
|
|
|
|
|
|
|
5036
|
|
|
|
|
|
|
=item fields_dates_auto_optimize |
5037
|
|
|
|
|
|
|
|
5038
|
|
|
|
|
|
|
Relevant only if L</fields_dates_auto> is set. |
5039
|
|
|
|
|
|
|
|
5040
|
|
|
|
|
|
|
Normally when L</fields_dates_auto> is set, the input is read completely to make |
5041
|
|
|
|
|
|
|
sure auto-detection produces a reliable result. If C<fields_dates_auto_optimize> |
5042
|
|
|
|
|
|
|
is set, this reading pass will stop as soon as there is no ambiguity left. That |
5043
|
|
|
|
|
|
|
is, for every fields in input, the date format (or the fact that no date format |
5044
|
|
|
|
|
|
|
is suitable) is known. |
5045
|
|
|
|
|
|
|
|
5046
|
|
|
|
|
|
|
Using this option is a bit risky because it could trigger a date format |
5047
|
|
|
|
|
|
|
detection that later in the input, would turn out to be wrong. Should that be |
5048
|
|
|
|
|
|
|
the case, strange errors will occur, that are not easy to understand. Use it at |
5049
|
|
|
|
|
|
|
your own risk. |
5050
|
|
|
|
|
|
|
|
5051
|
|
|
|
|
|
|
Value by default: 0 |
5052
|
|
|
|
|
|
|
|
5053
|
|
|
|
|
|
|
Example: |
5054
|
|
|
|
|
|
|
|
5055
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5056
|
|
|
|
|
|
|
fields_dates_auto => 1, fields_dates_auto_optimize => 1); |
5057
|
|
|
|
|
|
|
|
5058
|
|
|
|
|
|
|
=item dates_formats_to_try |
5059
|
|
|
|
|
|
|
|
5060
|
|
|
|
|
|
|
Array ref of string formats. |
5061
|
|
|
|
|
|
|
|
5062
|
|
|
|
|
|
|
Text::AutoCSV has a default built-in list of 20 date formats to try and 6 time |
5063
|
|
|
|
|
|
|
formats (also it'll combine any date format with any time format). |
5064
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
C<dates_formats_to_try> will replace Text::AutoCSV default format-list will the |
5066
|
|
|
|
|
|
|
one you specify, in case the default would not produce the results you expect. |
5067
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
The formats are written in Strptime format. |
5069
|
|
|
|
|
|
|
|
5070
|
|
|
|
|
|
|
Value by default (see below about the role of the pseudo-format ''): |
5071
|
|
|
|
|
|
|
|
5072
|
|
|
|
|
|
|
[ '', |
5073
|
|
|
|
|
|
|
'%Y-%m-%d', |
5074
|
|
|
|
|
|
|
'%Y.%m.%d', |
5075
|
|
|
|
|
|
|
'%Y/%m/%d', |
5076
|
|
|
|
|
|
|
'%m.%d.%y', |
5077
|
|
|
|
|
|
|
'%m-%d-%Y', |
5078
|
|
|
|
|
|
|
'%m.%d.%Y', |
5079
|
|
|
|
|
|
|
'%m/%d/%Y', |
5080
|
|
|
|
|
|
|
'%d-%m-%Y', |
5081
|
|
|
|
|
|
|
'%d.%m.%Y', |
5082
|
|
|
|
|
|
|
'%d/%m/%Y', |
5083
|
|
|
|
|
|
|
'%m-%d-%y', |
5084
|
|
|
|
|
|
|
'%m/%d/%y', |
5085
|
|
|
|
|
|
|
'%d-%m-%y', |
5086
|
|
|
|
|
|
|
'%d.%m.%y', |
5087
|
|
|
|
|
|
|
'%d/%m/%y', |
5088
|
|
|
|
|
|
|
'%Y%m%d%H%M%S', |
5089
|
|
|
|
|
|
|
'%b %d, %Y', |
5090
|
|
|
|
|
|
|
'%b %d %Y', |
5091
|
|
|
|
|
|
|
'%b %d %T %Z %Y', |
5092
|
|
|
|
|
|
|
'%d %b %Y', |
5093
|
|
|
|
|
|
|
'%d %b, %Y' ] |
5094
|
|
|
|
|
|
|
|
5095
|
|
|
|
|
|
|
B<IMPORTANT> |
5096
|
|
|
|
|
|
|
|
5097
|
|
|
|
|
|
|
The empty format (empty string) has a special meaning: when specified, |
5098
|
|
|
|
|
|
|
Text::AutoCSV will be able to identify fields that contain only a time (not |
5099
|
|
|
|
|
|
|
preceeded by a date). |
5100
|
|
|
|
|
|
|
|
5101
|
|
|
|
|
|
|
B<Note> |
5102
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
Format identification is over only when there is no more ambiguity. So the usual |
5104
|
|
|
|
|
|
|
pitfall of US versus French dates (month-day versus day-month) gets resolved |
5105
|
|
|
|
|
|
|
only when a date is encountered that disambiguates it (a date of 13th of the |
5106
|
|
|
|
|
|
|
month or later). |
5107
|
|
|
|
|
|
|
|
5108
|
|
|
|
|
|
|
Example with a weird format that uses underscores to separate elements, using |
5109
|
|
|
|
|
|
|
either US (month, day, year), French (day, month, year), or international (year, |
5110
|
|
|
|
|
|
|
month, day) order: |
5111
|
|
|
|
|
|
|
|
5112
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5113
|
|
|
|
|
|
|
dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']); |
5114
|
|
|
|
|
|
|
|
5115
|
|
|
|
|
|
|
=item dates_formats_to_try_supp |
5116
|
|
|
|
|
|
|
|
5117
|
|
|
|
|
|
|
Same as L</dates_formats_to_try> but instead of replacing the default list of |
5118
|
|
|
|
|
|
|
formats used during detection, it is added to this default list. |
5119
|
|
|
|
|
|
|
|
5120
|
|
|
|
|
|
|
You want to use this attribute if you need a specific DateTime format while |
5121
|
|
|
|
|
|
|
continuing to benefit from the default list. |
5122
|
|
|
|
|
|
|
|
5123
|
|
|
|
|
|
|
B<IMPORTANT> |
5124
|
|
|
|
|
|
|
|
5125
|
|
|
|
|
|
|
Text::AutoCSV will identify a given Datetime format only when there is no |
5126
|
|
|
|
|
|
|
ambiguity, meaning, one unique Datetime format matches (all other failed). |
5127
|
|
|
|
|
|
|
Adding a format that already exists in the default list will prevent the format |
5128
|
|
|
|
|
|
|
from being identified, as it'll always be ambiguous. See |
5129
|
|
|
|
|
|
|
L</dates_formats_to_try> for the default list of formats. |
5130
|
|
|
|
|
|
|
|
5131
|
|
|
|
|
|
|
Example: |
5132
|
|
|
|
|
|
|
|
5133
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5134
|
|
|
|
|
|
|
dates_formats_to_try_supp => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']); |
5135
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
=item dates_ignore_trailing_chars |
5137
|
|
|
|
|
|
|
|
5138
|
|
|
|
|
|
|
If set to 1, DateTime auto-detection will ignore trailing text that may follow |
5139
|
|
|
|
|
|
|
detected DateTime-like text. |
5140
|
|
|
|
|
|
|
|
5141
|
|
|
|
|
|
|
Value by default: 1 (do ignore trailing chars) |
5142
|
|
|
|
|
|
|
|
5143
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5144
|
|
|
|
|
|
|
dates_ignore_trailing_chars => 0); |
5145
|
|
|
|
|
|
|
|
5146
|
|
|
|
|
|
|
=item dates_search_time |
5147
|
|
|
|
|
|
|
|
5148
|
|
|
|
|
|
|
If set to 1, look for times when detecting DateTime format. That is, whenever a |
5149
|
|
|
|
|
|
|
date format candidate is found, a longer candidate that also contains a time |
5150
|
|
|
|
|
|
|
(after the date) is tested. |
5151
|
|
|
|
|
|
|
|
5152
|
|
|
|
|
|
|
Value by default: 1 (do look for times when auto-detecting DateTime formats) |
5153
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
Example: |
5155
|
|
|
|
|
|
|
|
5156
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5157
|
|
|
|
|
|
|
dates_search_time => 0); |
5158
|
|
|
|
|
|
|
|
5159
|
|
|
|
|
|
|
=item dates_locales |
5160
|
|
|
|
|
|
|
|
5161
|
|
|
|
|
|
|
Comma-separated string of locales to test when detecting DateTime formats. |
5162
|
|
|
|
|
|
|
Ultimately, Text::AutoCSV will try all combinations of date formats, times and |
5163
|
|
|
|
|
|
|
locales. |
5164
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
Value by default: none (use perl default locale) |
5166
|
|
|
|
|
|
|
|
5167
|
|
|
|
|
|
|
Example: |
5168
|
|
|
|
|
|
|
|
5169
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'logins.csv', |
5170
|
|
|
|
|
|
|
dates_locales => 'fr,de,en'); |
5171
|
|
|
|
|
|
|
|
5172
|
|
|
|
|
|
|
=item dates_zeros_ok |
5173
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
Boolean. If true, a date made only of 0s is regarded as being empty. |
5175
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
For example if C<dates_zeros_ok> is False, then a date like 0000-00-00 will be |
5177
|
|
|
|
|
|
|
always incorrect (as the day and month are out of bounds), therefore a format |
5178
|
|
|
|
|
|
|
like '%Y-%m-%d' will never match for the field. |
5179
|
|
|
|
|
|
|
|
5180
|
|
|
|
|
|
|
Conversely if C<dates_zeros_ok> is true, then a date like 0000-00-00 will be |
5181
|
|
|
|
|
|
|
processed as if being the empty string, thus the detection of format will work |
5182
|
|
|
|
|
|
|
and when parsed, this "full of zeros" dates will be processed the same way as |
5183
|
|
|
|
|
|
|
the empty string (= value will be undef). |
5184
|
|
|
|
|
|
|
|
5185
|
|
|
|
|
|
|
B<IMPORTANT> |
5186
|
|
|
|
|
|
|
|
5187
|
|
|
|
|
|
|
"0s dates" are evaluated to undef when parsed, thus when converted back to text |
5188
|
|
|
|
|
|
|
(out_map), they are set to an empty string, not to the original value. |
5189
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
Value by default: 1 |
5191
|
|
|
|
|
|
|
|
5192
|
|
|
|
|
|
|
Example: |
5193
|
|
|
|
|
|
|
|
5194
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', dates_zeros_ok => 0); |
5195
|
|
|
|
|
|
|
|
5196
|
|
|
|
|
|
|
=item out_dates_format |
5197
|
|
|
|
|
|
|
|
5198
|
|
|
|
|
|
|
Enforce the format of dates in output, for all fields that contain a DateTime |
5199
|
|
|
|
|
|
|
value. |
5200
|
|
|
|
|
|
|
|
5201
|
|
|
|
|
|
|
The format is written in Strptime format. |
5202
|
|
|
|
|
|
|
|
5203
|
|
|
|
|
|
|
Value by default: none (by default, use format detected on input) |
5204
|
|
|
|
|
|
|
|
5205
|
|
|
|
|
|
|
Example: |
5206
|
|
|
|
|
|
|
|
5207
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to |
5208
|
|
|
|
|
|
|
# yyyy-mm-dd whatever the input format is. |
5209
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
5210
|
|
|
|
|
|
|
fields_dates_auto => 1, out_dates_format => '%F')->write(); |
5211
|
|
|
|
|
|
|
|
5212
|
|
|
|
|
|
|
=item out_dates_locale |
5213
|
|
|
|
|
|
|
|
5214
|
|
|
|
|
|
|
Taken into account only if L</out_dates_format> is used. |
5215
|
|
|
|
|
|
|
|
5216
|
|
|
|
|
|
|
Sets the locale to apply on L</out_dates_format>. |
5217
|
|
|
|
|
|
|
|
5218
|
|
|
|
|
|
|
Value by default: none (by default, use the locale detected on input) |
5219
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
Example: |
5221
|
|
|
|
|
|
|
|
5222
|
|
|
|
|
|
|
# Detect any field containing a DateTime value and convert it to a US |
5223
|
|
|
|
|
|
|
# DateTime whatever the input format is. |
5224
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', |
5225
|
|
|
|
|
|
|
fields_dates_auto => 1, out_dates_format => '%b %d, %Y, %I:%M:%S %p', |
5226
|
|
|
|
|
|
|
out_dates_locale => 'en')->write(); |
5227
|
|
|
|
|
|
|
|
5228
|
|
|
|
|
|
|
=item croak_if_error |
5229
|
|
|
|
|
|
|
|
5230
|
|
|
|
|
|
|
If true, stops the program execution in case of error. |
5231
|
|
|
|
|
|
|
|
5232
|
|
|
|
|
|
|
B<IMPORTANT> |
5233
|
|
|
|
|
|
|
|
5234
|
|
|
|
|
|
|
Value by default: 1 |
5235
|
|
|
|
|
|
|
|
5236
|
|
|
|
|
|
|
If set to zero (C<croak_if_error =E<gt> 0>), errors are displayed as warnings. |
5237
|
|
|
|
|
|
|
This printing can then be affected by setting the L</quiet> attribute. |
5238
|
|
|
|
|
|
|
|
5239
|
|
|
|
|
|
|
=item verbose |
5240
|
|
|
|
|
|
|
|
5241
|
|
|
|
|
|
|
If true, get Text::AutoCSV to be a bit talkative instead of speaking only when |
5242
|
|
|
|
|
|
|
warnings and errors occur. Verbose output is printed to STDERR by default, this |
5243
|
|
|
|
|
|
|
can be tuned with the L</infoh> attribute. |
5244
|
|
|
|
|
|
|
|
5245
|
|
|
|
|
|
|
Value by default: 0 |
5246
|
|
|
|
|
|
|
|
5247
|
|
|
|
|
|
|
Example: |
5248
|
|
|
|
|
|
|
|
5249
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', verbose => 1); |
5250
|
|
|
|
|
|
|
|
5251
|
|
|
|
|
|
|
=item infoh |
5252
|
|
|
|
|
|
|
|
5253
|
|
|
|
|
|
|
File handle to display program's verbose output. Has effect *mainly* with |
5254
|
|
|
|
|
|
|
attribute C<verbose =E<gt> 1>. |
5255
|
|
|
|
|
|
|
|
5256
|
|
|
|
|
|
|
Note B<infoh> is used to display extra information in case of error (if a field |
5257
|
|
|
|
|
|
|
does not exist, Text::AutoCSV will display the list of existing fields). If you |
5258
|
|
|
|
|
|
|
don't want such output, you can set C<infoh> to undef. |
5259
|
|
|
|
|
|
|
|
5260
|
|
|
|
|
|
|
Value by default: \*STDERR |
5261
|
|
|
|
|
|
|
|
5262
|
|
|
|
|
|
|
Example: |
5263
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
open my $infoh, ">", "log.txt"; |
5265
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', infoh => $infoh); |
5266
|
|
|
|
|
|
|
|
5267
|
|
|
|
|
|
|
=item quiet |
5268
|
|
|
|
|
|
|
|
5269
|
|
|
|
|
|
|
If true, don't display warnings and errors, unless croaking. |
5270
|
|
|
|
|
|
|
|
5271
|
|
|
|
|
|
|
If L</croak_if_error> attribute is set (as per default), still, Text::AutoCSV |
5272
|
|
|
|
|
|
|
will produce output (on STDERR) when croaking miserably. |
5273
|
|
|
|
|
|
|
|
5274
|
|
|
|
|
|
|
When using C<croak_if_error =E<gt> 0>, errors are processed as warnings and if |
5275
|
|
|
|
|
|
|
L</quiet> is set (in addition to L</croak_if_error> being set to 0), there'll be |
5276
|
|
|
|
|
|
|
no output. Note this way of working is not recommended, as things can go wrong |
5277
|
|
|
|
|
|
|
without any notice to the caller. |
5278
|
|
|
|
|
|
|
|
5279
|
|
|
|
|
|
|
Example: |
5280
|
|
|
|
|
|
|
|
5281
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', quiet => 1); |
5282
|
|
|
|
|
|
|
|
5283
|
|
|
|
|
|
|
=item one_pass |
5284
|
|
|
|
|
|
|
|
5285
|
|
|
|
|
|
|
If true, Text::AutoCSV will perform one reading of the input. If other readings |
5286
|
|
|
|
|
|
|
are triggered, it'll raise an error and no reading will be done. Should that be |
5287
|
|
|
|
|
|
|
the case (you ask Text::AutoCSV to do something that'll trigger more than one |
5288
|
|
|
|
|
|
|
reading of input), Text::AutoCSV will croak as is always the case if an error |
5289
|
|
|
|
|
|
|
occurs. |
5290
|
|
|
|
|
|
|
|
5291
|
|
|
|
|
|
|
Normally Text::AutoCSV will do multiple reads of input to work out certain |
5292
|
|
|
|
|
|
|
characteristics of the CSV: guess of encoding and guess of escape character. |
5293
|
|
|
|
|
|
|
|
5294
|
|
|
|
|
|
|
Also if member functions like L</field_add_link>, L</field_add_copy>, |
5295
|
|
|
|
|
|
|
L</field_add_computed>, L</read> or L</write> are called after input has already |
5296
|
|
|
|
|
|
|
been read, it'll trigger further reads as needed. |
5297
|
|
|
|
|
|
|
|
5298
|
|
|
|
|
|
|
If one wishes a unique read of the input to occur, one_pass attribute is to be |
5299
|
|
|
|
|
|
|
set. |
5300
|
|
|
|
|
|
|
|
5301
|
|
|
|
|
|
|
When true, encoding will be assumed to be the first one in the provided list |
5302
|
|
|
|
|
|
|
(L</encoding> attribute), if no encoding attribute is provided, it'll be the |
5303
|
|
|
|
|
|
|
first one in the default list, to date, it is UTF-8. |
5304
|
|
|
|
|
|
|
|
5305
|
|
|
|
|
|
|
When true, and if attribute L</escape_char> is not set, escape_char will be |
5306
|
|
|
|
|
|
|
assumed to be '\\' (backslash). |
5307
|
|
|
|
|
|
|
|
5308
|
|
|
|
|
|
|
By default, one_pass is set if inh attribute is set (caller provides the input |
5309
|
|
|
|
|
|
|
file handle of input) or if input file is stdin (in_file attribute not set or |
5310
|
|
|
|
|
|
|
set to an empty string). |
5311
|
|
|
|
|
|
|
|
5312
|
|
|
|
|
|
|
Value by default: |
5313
|
|
|
|
|
|
|
|
5314
|
|
|
|
|
|
|
0 if inh attribute is not set and in_file attribute is set to a non empty |
5315
|
|
|
|
|
|
|
string |
5316
|
|
|
|
|
|
|
1 if inh attribute is set or in_file is not set or set to an empty string |
5317
|
|
|
|
|
|
|
|
5318
|
|
|
|
|
|
|
Example: |
5319
|
|
|
|
|
|
|
|
5320
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'in.csv', one_pass => 1); |
5321
|
|
|
|
|
|
|
|
5322
|
|
|
|
|
|
|
=back |
5323
|
|
|
|
|
|
|
|
5324
|
|
|
|
|
|
|
=head2 read |
5325
|
|
|
|
|
|
|
|
5326
|
|
|
|
|
|
|
$csv->read(); |
5327
|
|
|
|
|
|
|
|
5328
|
|
|
|
|
|
|
Read input entirely. |
5329
|
|
|
|
|
|
|
|
5330
|
|
|
|
|
|
|
B<Return value> |
5331
|
|
|
|
|
|
|
|
5332
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5333
|
|
|
|
|
|
|
Returns undef if error. |
5334
|
|
|
|
|
|
|
|
5335
|
|
|
|
|
|
|
Callback functions (when defined) are invoked, in the following order: |
5336
|
|
|
|
|
|
|
|
5337
|
|
|
|
|
|
|
L</read_post_update_hr>, intended to do updates on fields values after each |
5338
|
|
|
|
|
|
|
record read |
5339
|
|
|
|
|
|
|
|
5340
|
|
|
|
|
|
|
L</walker_ar>, called after each record read, with an array ref of fields values |
5341
|
|
|
|
|
|
|
|
5342
|
|
|
|
|
|
|
L</walker_hr>, called after each record read, with a hash ref of fields values |
5343
|
|
|
|
|
|
|
|
5344
|
|
|
|
|
|
|
Example: |
5345
|
|
|
|
|
|
|
|
5346
|
|
|
|
|
|
|
# Do nothing - just check CSV can be read successfully |
5347
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv')->read(); |
5348
|
|
|
|
|
|
|
|
5349
|
|
|
|
|
|
|
=head2 read_all_in_mem |
5350
|
|
|
|
|
|
|
|
5351
|
|
|
|
|
|
|
$csv->read_all_in_mem(); |
5352
|
|
|
|
|
|
|
|
5353
|
|
|
|
|
|
|
Created in version 1.1.5. Before, existed only as _read_all_in_mem, meaning, was |
5354
|
|
|
|
|
|
|
private. |
5355
|
|
|
|
|
|
|
|
5356
|
|
|
|
|
|
|
Read input entirely, as with L</read> function, but enforcing content to be kept |
5357
|
|
|
|
|
|
|
in-memory. |
5358
|
|
|
|
|
|
|
|
5359
|
|
|
|
|
|
|
Having the content kept in-memory is implied by search functions (L</vlookup> |
5360
|
|
|
|
|
|
|
for example). With C<read_all_in_mem> you can enforce this behavior without |
5361
|
|
|
|
|
|
|
doing a fake search. |
5362
|
|
|
|
|
|
|
|
5363
|
|
|
|
|
|
|
=head2 reset_next_record_hr |
5364
|
|
|
|
|
|
|
|
5365
|
|
|
|
|
|
|
$csv->reset_next_record_hr(); |
5366
|
|
|
|
|
|
|
|
5367
|
|
|
|
|
|
|
Reset the internal status to start from the beginning with |
5368
|
|
|
|
|
|
|
L</get_next_record_hr>. Used in conjunction with L</get_next_record_hr>. |
5369
|
|
|
|
|
|
|
|
5370
|
|
|
|
|
|
|
=head2 get_next_record_hr |
5371
|
|
|
|
|
|
|
|
5372
|
|
|
|
|
|
|
my $hr = $csv->get_next_record_hr(\$opt_key); |
5373
|
|
|
|
|
|
|
|
5374
|
|
|
|
|
|
|
Get the next record content as a hash ref. C<$hr> is undef when the end of |
5375
|
|
|
|
|
|
|
records has been reached. |
5376
|
|
|
|
|
|
|
|
5377
|
|
|
|
|
|
|
When specified, C<$opt_key> is set to the current (returned) record key. |
5378
|
|
|
|
|
|
|
|
5379
|
|
|
|
|
|
|
B<NOTE> |
5380
|
|
|
|
|
|
|
|
5381
|
|
|
|
|
|
|
You do not need to call L</reset_next_record_hr> once before using |
5382
|
|
|
|
|
|
|
C<get_next_record_hr>. |
5383
|
|
|
|
|
|
|
|
5384
|
|
|
|
|
|
|
Therefore L</reset_next_record_hr> is useful only if you wish to restart from |
5385
|
|
|
|
|
|
|
the beginning before you've reached the end of the records. |
5386
|
|
|
|
|
|
|
|
5387
|
|
|
|
|
|
|
B<NOTE bis> |
5388
|
|
|
|
|
|
|
|
5389
|
|
|
|
|
|
|
L</walker_hr> allows to execute some code each time a record is read, and it |
5390
|
|
|
|
|
|
|
better fits with Text::AutoCSV philosophy. Using a loop with |
5391
|
|
|
|
|
|
|
C<get_next_record_hr> is primarily meant for Text::AutoCSV internal usage. Also |
5392
|
|
|
|
|
|
|
when using this mechanism, you get very close to original Text::CSV logic, that |
5393
|
|
|
|
|
|
|
makes Text::AutoCSV less useful. |
5394
|
|
|
|
|
|
|
|
5395
|
|
|
|
|
|
|
B<Return value> |
5396
|
|
|
|
|
|
|
|
5397
|
|
|
|
|
|
|
A hashref of the record, or undef once there's no more record to return. |
5398
|
|
|
|
|
|
|
|
5399
|
|
|
|
|
|
|
Example: |
5400
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
while (my $hr = $csv->get_next_record_hr()) { |
5402
|
|
|
|
|
|
|
say Dumper($hr); |
5403
|
|
|
|
|
|
|
} |
5404
|
|
|
|
|
|
|
|
5405
|
|
|
|
|
|
|
=head2 write |
5406
|
|
|
|
|
|
|
|
5407
|
|
|
|
|
|
|
$csv->write(); |
5408
|
|
|
|
|
|
|
|
5409
|
|
|
|
|
|
|
Write input into output. |
5410
|
|
|
|
|
|
|
|
5411
|
|
|
|
|
|
|
B<Return value> |
5412
|
|
|
|
|
|
|
|
5413
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5414
|
|
|
|
|
|
|
Returns undef if error. |
5415
|
|
|
|
|
|
|
|
5416
|
|
|
|
|
|
|
- If the content is not in-memory at the time write() is called: |
5417
|
|
|
|
|
|
|
|
5418
|
|
|
|
|
|
|
Each record is read (with call of L</read_post_update_hr>, L</walker_ar> and |
5419
|
|
|
|
|
|
|
L</walker_hr>) and then written. The read-and-write is done in sequence, each |
5420
|
|
|
|
|
|
|
record is written to output before the next record is read from input. |
5421
|
|
|
|
|
|
|
|
5422
|
|
|
|
|
|
|
- If the content is in-memory at the time write() is called: |
5423
|
|
|
|
|
|
|
|
5424
|
|
|
|
|
|
|
No L</read> operation is performed, instead, records are directly written to |
5425
|
|
|
|
|
|
|
output. |
5426
|
|
|
|
|
|
|
|
5427
|
|
|
|
|
|
|
If defined, L</out_filter> is called for each record. If the return value of |
5428
|
|
|
|
|
|
|
L</out_filter> is false, the record is not written. |
5429
|
|
|
|
|
|
|
|
5430
|
|
|
|
|
|
|
Example: |
5431
|
|
|
|
|
|
|
|
5432
|
|
|
|
|
|
|
# Copy input to output. |
5433
|
|
|
|
|
|
|
# As CSV is parsed in-between, this copy also checks a number of |
5434
|
|
|
|
|
|
|
# characteristics about the input, as opposed to a plain file copy |
5435
|
|
|
|
|
|
|
# operation. |
5436
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')->write(); |
5437
|
|
|
|
|
|
|
|
5438
|
|
|
|
|
|
|
=head2 out_header |
5439
|
|
|
|
|
|
|
|
5440
|
|
|
|
|
|
|
$csv->out_header($field, $header); |
5441
|
|
|
|
|
|
|
|
5442
|
|
|
|
|
|
|
Set the header text of C<$field> to C<$header>. |
5443
|
|
|
|
|
|
|
|
5444
|
|
|
|
|
|
|
By default, the input header value is rewritten as is to output. C<out_header> |
5445
|
|
|
|
|
|
|
allows you to change it. |
5446
|
|
|
|
|
|
|
|
5447
|
|
|
|
|
|
|
B<Return value> |
5448
|
|
|
|
|
|
|
|
5449
|
|
|
|
|
|
|
Returns the object itself. |
5450
|
|
|
|
|
|
|
|
5451
|
|
|
|
|
|
|
Example: |
5452
|
|
|
|
|
|
|
|
5453
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv') |
5454
|
|
|
|
|
|
|
->out_header('LOGIN', 'Login') |
5455
|
|
|
|
|
|
|
->out_header('FULLNAME', 'Full Name') |
5456
|
|
|
|
|
|
|
->write(); |
5457
|
|
|
|
|
|
|
|
5458
|
|
|
|
|
|
|
=head2 print_id |
5459
|
|
|
|
|
|
|
|
5460
|
|
|
|
|
|
|
$csv->print_id(); |
5461
|
|
|
|
|
|
|
|
5462
|
|
|
|
|
|
|
Print out a description of input. Write to \*STDERR by default or to L</infoh> |
5463
|
|
|
|
|
|
|
attribute if set. |
5464
|
|
|
|
|
|
|
|
5465
|
|
|
|
|
|
|
The description consists in a list of a few characteristics (CSV separator and |
5466
|
|
|
|
|
|
|
the like) followed by the list of columns with the details of each. |
5467
|
|
|
|
|
|
|
|
5468
|
|
|
|
|
|
|
Example of output: |
5469
|
|
|
|
|
|
|
|
5470
|
|
|
|
|
|
|
If you go to the C<utils> directory of this module and execute the following: |
5471
|
|
|
|
|
|
|
|
5472
|
|
|
|
|
|
|
./csvcopy.pl -i f1.csv -l "1:,A->B,f2.csv" --id |
5473
|
|
|
|
|
|
|
|
5474
|
|
|
|
|
|
|
You will get this output: |
5475
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
-- f1.csv: |
5477
|
|
|
|
|
|
|
sep_char: , |
5478
|
|
|
|
|
|
|
escape_char: \ |
5479
|
|
|
|
|
|
|
in_encoding: UTF-8 |
5480
|
|
|
|
|
|
|
is_always_quoted: no |
5481
|
|
|
|
|
|
|
|
5482
|
|
|
|
|
|
|
# FIELD HEADER EXT DATA DATETIME FORMAT DATETIME LOCALE |
5483
|
|
|
|
|
|
|
- ----- ------ -------- --------------- --------------- |
5484
|
|
|
|
|
|
|
0 TIMESTAMP timestamp %Y%m%d%H%M%S |
5485
|
|
|
|
|
|
|
1 A a |
5486
|
|
|
|
|
|
|
2 B b |
5487
|
|
|
|
|
|
|
3 C c |
5488
|
|
|
|
|
|
|
4 D d %d/%m/%Y |
5489
|
|
|
|
|
|
|
5 1:SITE 1:SITE link: f2.csv, chain: A->B->* (SITE) |
5490
|
|
|
|
|
|
|
6 1:B 1:B link: f2.csv, chain: A->B->* (B) |
5491
|
|
|
|
|
|
|
|
5492
|
|
|
|
|
|
|
=head2 field_add_computed |
5493
|
|
|
|
|
|
|
|
5494
|
|
|
|
|
|
|
$csv->field_add_computed($new_field, $subref); |
5495
|
|
|
|
|
|
|
|
5496
|
|
|
|
|
|
|
C<$new_field> is the name of the created field. |
5497
|
|
|
|
|
|
|
|
5498
|
|
|
|
|
|
|
C<$subref> is a reference to a sub that'll calculate the new field value. |
5499
|
|
|
|
|
|
|
|
5500
|
|
|
|
|
|
|
B<Return value> |
5501
|
|
|
|
|
|
|
|
5502
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5503
|
|
|
|
|
|
|
Returns undef if error. |
5504
|
|
|
|
|
|
|
|
5505
|
|
|
|
|
|
|
Add a field calculated from other fields values. The subref runs like this: |
5506
|
|
|
|
|
|
|
|
5507
|
|
|
|
|
|
|
sub func { |
5508
|
|
|
|
|
|
|
# $new_field is the name of the field (allows to use one subref for |
5509
|
|
|
|
|
|
|
# more than one field calculation). |
5510
|
|
|
|
|
|
|
# $hr is a hash ref of fields values. |
5511
|
|
|
|
|
|
|
# $stats is a hash ref that gets printed (if Text::AutoCSV is |
5512
|
|
|
|
|
|
|
# created with verbose => 1) in the end. |
5513
|
|
|
|
|
|
|
my ($new_field, $hr, $stats) = @_; |
5514
|
|
|
|
|
|
|
|
5515
|
|
|
|
|
|
|
my $field_value; |
5516
|
|
|
|
|
|
|
# ... compute $field_value |
5517
|
|
|
|
|
|
|
|
5518
|
|
|
|
|
|
|
return $field_value; |
5519
|
|
|
|
|
|
|
} |
5520
|
|
|
|
|
|
|
|
5521
|
|
|
|
|
|
|
Example: |
5522
|
|
|
|
|
|
|
|
5523
|
|
|
|
|
|
|
# Add a field for the full name, made of the concatenation of the first |
5524
|
|
|
|
|
|
|
# name and the last name. |
5525
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', |
5526
|
|
|
|
|
|
|
verbose => 1) |
5527
|
|
|
|
|
|
|
->field_add_computed('FULLNAME', \&calc_fn)->write(); |
5528
|
|
|
|
|
|
|
sub calc_fn { |
5529
|
|
|
|
|
|
|
my ($new_field, $hr, $stats) = @_; |
5530
|
|
|
|
|
|
|
die "Man, you are in serious trouble!" unless $new_field eq 'FULLNAME'; |
5531
|
|
|
|
|
|
|
my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'}); |
5532
|
|
|
|
|
|
|
$stats->{'empty full name'}++ if $fn eq ' '; |
5533
|
|
|
|
|
|
|
return $fn; |
5534
|
|
|
|
|
|
|
} |
5535
|
|
|
|
|
|
|
|
5536
|
|
|
|
|
|
|
=head2 field_add_copy |
5537
|
|
|
|
|
|
|
|
5538
|
|
|
|
|
|
|
$csv->field_add_copy($new_field, $src_field, $opt_subref); |
5539
|
|
|
|
|
|
|
|
5540
|
|
|
|
|
|
|
C<$new_field> if the name of the new field. |
5541
|
|
|
|
|
|
|
|
5542
|
|
|
|
|
|
|
C<$src_field> is the name of the field being copied. |
5543
|
|
|
|
|
|
|
|
5544
|
|
|
|
|
|
|
C<$opt_subref> is optional. It is a reference to a sub that takes one string |
5545
|
|
|
|
|
|
|
(the value of C<$src_field>) and returns a string (the value assigned to |
5546
|
|
|
|
|
|
|
C<$new_field>). |
5547
|
|
|
|
|
|
|
|
5548
|
|
|
|
|
|
|
B<Return value> |
5549
|
|
|
|
|
|
|
|
5550
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5551
|
|
|
|
|
|
|
Returns undef if error. |
5552
|
|
|
|
|
|
|
|
5553
|
|
|
|
|
|
|
C<field_add_copy> is a special case of L</field_add_computed>. The advantage of |
5554
|
|
|
|
|
|
|
C<field_add_copy> is that it relies on a sub that is Text::AutoCSV "unaware", |
5555
|
|
|
|
|
|
|
just taking one string as input and returning another string as output. |
5556
|
|
|
|
|
|
|
|
5557
|
|
|
|
|
|
|
B<IMPORTANT> |
5558
|
|
|
|
|
|
|
|
5559
|
|
|
|
|
|
|
The current field value is passed to C<field_add_copy> in $_. |
5560
|
|
|
|
|
|
|
|
5561
|
|
|
|
|
|
|
A call to |
5562
|
|
|
|
|
|
|
|
5563
|
|
|
|
|
|
|
$csv->field_add_copy($new_field, $src_field, $subref); |
5564
|
|
|
|
|
|
|
|
5565
|
|
|
|
|
|
|
is equivalent to |
5566
|
|
|
|
|
|
|
|
5567
|
|
|
|
|
|
|
$csv->field_add_computed($new_field, \&subref2); |
5568
|
|
|
|
|
|
|
sub subref2 { |
5569
|
|
|
|
|
|
|
my (undef, $hr) = @_; |
5570
|
|
|
|
|
|
|
local $_ = $hr->{$src_field}; |
5571
|
|
|
|
|
|
|
return $subref->(); |
5572
|
|
|
|
|
|
|
} |
5573
|
|
|
|
|
|
|
|
5574
|
|
|
|
|
|
|
Example of a field copy + pass copied field in upper case and surround content |
5575
|
|
|
|
|
|
|
with <<>>: |
5576
|
|
|
|
|
|
|
|
5577
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'dirpeople.csv', |
5578
|
|
|
|
|
|
|
out_file => 'd2.csv'); |
5579
|
|
|
|
|
|
|
$csv->field_add_copy('UCLAST', 'LASTNAME', \&myfunc); |
5580
|
|
|
|
|
|
|
$csv->write(); |
5581
|
|
|
|
|
|
|
sub myfunc { s/^.*$/<<\U$&>>/; $_; } |
5582
|
|
|
|
|
|
|
|
5583
|
|
|
|
|
|
|
Note that the calls can be chained as most member functions return the object |
5584
|
|
|
|
|
|
|
itself upon success. The example above is equivalent to: |
5585
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv') |
5587
|
|
|
|
|
|
|
->field_add_copy('UCLAST', 'LASTNAME', \&myfunc) |
5588
|
|
|
|
|
|
|
->write(); |
5589
|
|
|
|
|
|
|
sub myfunc { s/^.*$/<<\U$&>>/; $_; } |
5590
|
|
|
|
|
|
|
|
5591
|
|
|
|
|
|
|
=head2 field_add_link |
5592
|
|
|
|
|
|
|
|
5593
|
|
|
|
|
|
|
$csv->field_add_link($new_field, $chain, $linked_file, \%opts); |
5594
|
|
|
|
|
|
|
|
5595
|
|
|
|
|
|
|
C<$new_field> is the name of the new field. |
5596
|
|
|
|
|
|
|
|
5597
|
|
|
|
|
|
|
C<$chain> is the CHAIN of the link, that is: 'LOCAL->REMOTE->PICK' where: |
5598
|
|
|
|
|
|
|
|
5599
|
|
|
|
|
|
|
C<LOCAL> is the field name to read the value from. |
5600
|
|
|
|
|
|
|
|
5601
|
|
|
|
|
|
|
C<REMOTE> is the linked field to find the value in. This field belongs to |
5602
|
|
|
|
|
|
|
$linked_file. |
5603
|
|
|
|
|
|
|
|
5604
|
|
|
|
|
|
|
C<PICK> is the field from which to read the value of, in the record found by the |
5605
|
|
|
|
|
|
|
search. This field belongs to $linked_file. |
5606
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
If $new_field is undef, the new field name is the name of the third field of |
5608
|
|
|
|
|
|
|
$chain (PICK). |
5609
|
|
|
|
|
|
|
|
5610
|
|
|
|
|
|
|
C<$linked_file> is the name of the linked file, that gets read in a |
5611
|
|
|
|
|
|
|
Text::AutoCSV object created on-the-fly to do the search on. C<$linked_file> can |
5612
|
|
|
|
|
|
|
also be a Text::AutoCSV object that you created yourself, allowing for more |
5613
|
|
|
|
|
|
|
flexibility. Example: |
5614
|
|
|
|
|
|
|
|
5615
|
|
|
|
|
|
|
my $lcsv = Text::AutoCSV->new(in_file => 'logins.csv', case => 1); |
5616
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', $lcsv); |
5617
|
|
|
|
|
|
|
|
5618
|
|
|
|
|
|
|
C<\%opts> is a hash ref of optional attributes. The same values can be provided |
5619
|
|
|
|
|
|
|
as with vlookup. |
5620
|
|
|
|
|
|
|
|
5621
|
|
|
|
|
|
|
=over 4 |
5622
|
|
|
|
|
|
|
|
5623
|
|
|
|
|
|
|
=item trim |
5624
|
|
|
|
|
|
|
|
5625
|
|
|
|
|
|
|
If set to 1, searches will ignore leading and trailing spaces. That is, a |
5626
|
|
|
|
|
|
|
C<LOCAL> value of ' x ' will match with a C<REMOTE> value of 'x'. |
5627
|
|
|
|
|
|
|
|
5628
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object |
5629
|
|
|
|
|
|
|
(default value: 1). |
5630
|
|
|
|
|
|
|
|
5631
|
|
|
|
|
|
|
Example: |
5632
|
|
|
|
|
|
|
|
5633
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5634
|
|
|
|
|
|
|
{ trim => 0 }); |
5635
|
|
|
|
|
|
|
|
5636
|
|
|
|
|
|
|
=item case |
5637
|
|
|
|
|
|
|
|
5638
|
|
|
|
|
|
|
If set to 1, searches will take the case into account. That is, a C<LOCAL> value |
5639
|
|
|
|
|
|
|
of 'X' will B<not> match with a C<REMOTE> value of 'x'. |
5640
|
|
|
|
|
|
|
|
5641
|
|
|
|
|
|
|
If option is not present, use L</search_case> attribute of object (default |
5642
|
|
|
|
|
|
|
value: 0). |
5643
|
|
|
|
|
|
|
|
5644
|
|
|
|
|
|
|
Example: |
5645
|
|
|
|
|
|
|
|
5646
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5647
|
|
|
|
|
|
|
{ case => 1 }); |
5648
|
|
|
|
|
|
|
|
5649
|
|
|
|
|
|
|
=item ignore_empty |
5650
|
|
|
|
|
|
|
|
5651
|
|
|
|
|
|
|
If set to 1, empty values won't match. That is, a C<LOCAL> value of '' will not |
5652
|
|
|
|
|
|
|
match with a C<REMOTE> value of ''. |
5653
|
|
|
|
|
|
|
|
5654
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_empty> attribute of object |
5655
|
|
|
|
|
|
|
(default value: 1). |
5656
|
|
|
|
|
|
|
|
5657
|
|
|
|
|
|
|
Example: |
5658
|
|
|
|
|
|
|
|
5659
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5660
|
|
|
|
|
|
|
{ ignore_empty => 0 }); |
5661
|
|
|
|
|
|
|
|
5662
|
|
|
|
|
|
|
=item value_if_not_found |
5663
|
|
|
|
|
|
|
|
5664
|
|
|
|
|
|
|
If the searched value is not found, the value of the field is undef, that |
5665
|
|
|
|
|
|
|
produces an empty string at write time. Instead, you can specify the value. |
5666
|
|
|
|
|
|
|
|
5667
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object |
5668
|
|
|
|
|
|
|
(default value: undef). |
5669
|
|
|
|
|
|
|
|
5670
|
|
|
|
|
|
|
Example: |
5671
|
|
|
|
|
|
|
|
5672
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5673
|
|
|
|
|
|
|
{ value_if_not_found => '<not found!>' }); |
5674
|
|
|
|
|
|
|
|
5675
|
|
|
|
|
|
|
=item value_if_found |
5676
|
|
|
|
|
|
|
|
5677
|
|
|
|
|
|
|
If the searched value is found, you can specify the value to return. |
5678
|
|
|
|
|
|
|
|
5679
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_found> attribute of object |
5680
|
|
|
|
|
|
|
(default value: none). |
5681
|
|
|
|
|
|
|
|
5682
|
|
|
|
|
|
|
B<NOTE> |
5683
|
|
|
|
|
|
|
|
5684
|
|
|
|
|
|
|
Although the C<PICK> field is ignored when using this option, you must specify |
5685
|
|
|
|
|
|
|
it any way. |
5686
|
|
|
|
|
|
|
|
5687
|
|
|
|
|
|
|
Example: |
5688
|
|
|
|
|
|
|
|
5689
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5690
|
|
|
|
|
|
|
{ value_if_not_found => '0', value_if_found => '1' }); |
5691
|
|
|
|
|
|
|
|
5692
|
|
|
|
|
|
|
=item value_if_ambiguous |
5693
|
|
|
|
|
|
|
|
5694
|
|
|
|
|
|
|
If the searched value is found in more than one record, the value of the field |
5695
|
|
|
|
|
|
|
is undef, that produces an empty string at write time. Instead, you can specify |
5696
|
|
|
|
|
|
|
the value. |
5697
|
|
|
|
|
|
|
|
5698
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_ambiguous> attribute of object |
5699
|
|
|
|
|
|
|
(default value: undef). |
5700
|
|
|
|
|
|
|
|
5701
|
|
|
|
|
|
|
Example: |
5702
|
|
|
|
|
|
|
|
5703
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5704
|
|
|
|
|
|
|
{ value_if_ambiguous => '<ambiguous!>' }); |
5705
|
|
|
|
|
|
|
|
5706
|
|
|
|
|
|
|
=item ignore_ambiguous |
5707
|
|
|
|
|
|
|
|
5708
|
|
|
|
|
|
|
Boolean value. If ignore_ambiguous is true and the searched value is found in |
5709
|
|
|
|
|
|
|
more than one record, then, silently fall back on returning the value of the |
5710
|
|
|
|
|
|
|
first record. Obviously if C<ignore_ambiguous> is true, then the value of |
5711
|
|
|
|
|
|
|
L</value_if_ambiguous> is ignored. |
5712
|
|
|
|
|
|
|
|
5713
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_ambiguous> attribute of object |
5714
|
|
|
|
|
|
|
(default value: 1). |
5715
|
|
|
|
|
|
|
|
5716
|
|
|
|
|
|
|
Example: |
5717
|
|
|
|
|
|
|
|
5718
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5719
|
|
|
|
|
|
|
{ ignore_ambiguous => 1 }); |
5720
|
|
|
|
|
|
|
|
5721
|
|
|
|
|
|
|
Example with multiple options: |
5722
|
|
|
|
|
|
|
|
5723
|
|
|
|
|
|
|
$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv', |
5724
|
|
|
|
|
|
|
{ value_if_not_found => '?', ignore_ambiguous => 1 }); |
5725
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
=back |
5727
|
|
|
|
|
|
|
|
5728
|
|
|
|
|
|
|
B<Return value> |
5729
|
|
|
|
|
|
|
|
5730
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5731
|
|
|
|
|
|
|
Returns undef if error. |
5732
|
|
|
|
|
|
|
|
5733
|
|
|
|
|
|
|
Example of field_add_link usage: |
5734
|
|
|
|
|
|
|
|
5735
|
|
|
|
|
|
|
my $nom_compose = 0; |
5736
|
|
|
|
|
|
|
my $zip_not_found = 0; |
5737
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk) |
5738
|
|
|
|
|
|
|
->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read(); |
5739
|
|
|
|
|
|
|
sub walk { |
5740
|
|
|
|
|
|
|
my $hr = shift; |
5741
|
|
|
|
|
|
|
$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/; |
5742
|
|
|
|
|
|
|
$zip_not_found++ unless defined($hr->{'MYCITY'}); |
5743
|
|
|
|
|
|
|
} |
5744
|
|
|
|
|
|
|
print("Number of persons with a multi-part name: $nom_compose\n"); |
5745
|
|
|
|
|
|
|
print("Number of persons with unknown zipcode: $zip_not_found\n"); |
5746
|
|
|
|
|
|
|
|
5747
|
|
|
|
|
|
|
=head2 links |
5748
|
|
|
|
|
|
|
|
5749
|
|
|
|
|
|
|
$csv->links($prefix, $chain, $linked_file, \%opts); |
5750
|
|
|
|
|
|
|
|
5751
|
|
|
|
|
|
|
C<$prefix> is the name to add to joined fields |
5752
|
|
|
|
|
|
|
|
5753
|
|
|
|
|
|
|
C<$chain> is the JOINCHAIN of the link, that is: 'LOCAL->REMOTE' where: |
5754
|
|
|
|
|
|
|
|
5755
|
|
|
|
|
|
|
C<LOCAL> is the field name to read the value from. |
5756
|
|
|
|
|
|
|
|
5757
|
|
|
|
|
|
|
C<REMOTE> is the linked field to find the value in. This field belongs to |
5758
|
|
|
|
|
|
|
$linked_file. |
5759
|
|
|
|
|
|
|
|
5760
|
|
|
|
|
|
|
As opposed to L</field_add_link>, there is no C<PICK> part, as all fields of |
5761
|
|
|
|
|
|
|
target are read. |
5762
|
|
|
|
|
|
|
|
5763
|
|
|
|
|
|
|
As opposed to Text::AutoCSV habits of croaking whenever a field name is |
5764
|
|
|
|
|
|
|
duplicate, here, the duplicates are resolved by appending _2 to the joined field |
5765
|
|
|
|
|
|
|
name if it already exists. If _2 already exists, too, then _3 is appended |
5766
|
|
|
|
|
|
|
instead, and so on, until a non-duplicate is found. This mechanism is executed |
5767
|
|
|
|
|
|
|
given the difficulty to control all field names when joining CSVs. |
5768
|
|
|
|
|
|
|
|
5769
|
|
|
|
|
|
|
C<$linked_file> and C<\%opts> work exactly the same way as for |
5770
|
|
|
|
|
|
|
L</field_add_link>, see L</field_add_link> for help. |
5771
|
|
|
|
|
|
|
|
5772
|
|
|
|
|
|
|
B<Return value> |
5773
|
|
|
|
|
|
|
|
5774
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5775
|
|
|
|
|
|
|
Returns undef if error. |
5776
|
|
|
|
|
|
|
|
5777
|
|
|
|
|
|
|
B<NOTE> |
5778
|
|
|
|
|
|
|
|
5779
|
|
|
|
|
|
|
This function used to be called C<join> but got renamed to avoid clash with |
5780
|
|
|
|
|
|
|
perl' builtin C<join>. |
5781
|
|
|
|
|
|
|
|
5782
|
|
|
|
|
|
|
Example: |
5783
|
|
|
|
|
|
|
|
5784
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'pers.csv', out_file => 'pers_with_city.csv') |
5785
|
|
|
|
|
|
|
->links('Read from zips.csv:', 'ZIP->ZIPCODE', 'zips.csv')->write(); |
5786
|
|
|
|
|
|
|
|
5787
|
|
|
|
|
|
|
=head2 get_in_encoding |
5788
|
|
|
|
|
|
|
|
5789
|
|
|
|
|
|
|
my $enc = $csv->get_in_encoding(); |
5790
|
|
|
|
|
|
|
|
5791
|
|
|
|
|
|
|
Return the string of input encoding, for example 'latin2' or 'UTF-8', etc. |
5792
|
|
|
|
|
|
|
|
5793
|
|
|
|
|
|
|
=head2 get_in_file_disp |
5794
|
|
|
|
|
|
|
|
5795
|
|
|
|
|
|
|
my $f = $csv->get_in_file_disp(); |
5796
|
|
|
|
|
|
|
|
5797
|
|
|
|
|
|
|
Return the printable name of in_file. |
5798
|
|
|
|
|
|
|
|
5799
|
|
|
|
|
|
|
=head2 get_sep_char |
5800
|
|
|
|
|
|
|
|
5801
|
|
|
|
|
|
|
my $s = $csv->get_sep_char(); |
5802
|
|
|
|
|
|
|
|
5803
|
|
|
|
|
|
|
Return the string of the input CSV separator character, for example ',' or ';'. |
5804
|
|
|
|
|
|
|
|
5805
|
|
|
|
|
|
|
=head2 get_escape_char |
5806
|
|
|
|
|
|
|
|
5807
|
|
|
|
|
|
|
my $e = $csv->get_escape_char(); |
5808
|
|
|
|
|
|
|
|
5809
|
|
|
|
|
|
|
Return the string of the input escape character, for example '"' or '\\'. |
5810
|
|
|
|
|
|
|
|
5811
|
|
|
|
|
|
|
=head2 get_is_always_quoted |
5812
|
|
|
|
|
|
|
|
5813
|
|
|
|
|
|
|
my $a = $csv->get_is_always_quoted(); |
5814
|
|
|
|
|
|
|
|
5815
|
|
|
|
|
|
|
Return 1 if all fields of input are always quoted, 0 otherwise. |
5816
|
|
|
|
|
|
|
|
5817
|
|
|
|
|
|
|
=head2 get_coldata |
5818
|
|
|
|
|
|
|
|
5819
|
|
|
|
|
|
|
my @cd = get_coldata(); |
5820
|
|
|
|
|
|
|
|
5821
|
|
|
|
|
|
|
Return an array that describes each column, from the first one (column 0) to the |
5822
|
|
|
|
|
|
|
last. |
5823
|
|
|
|
|
|
|
|
5824
|
|
|
|
|
|
|
Each element of the array is itself an array ref that contains 5 elements: |
5825
|
|
|
|
|
|
|
|
5826
|
|
|
|
|
|
|
0: Name of the field (as accessed in *_hr functions) |
5827
|
|
|
|
|
|
|
1: Content of the field in the header line (if input has a header line) |
5828
|
|
|
|
|
|
|
2: Column content type, shows some meta-data of fields created with |
5829
|
|
|
|
|
|
|
field_add_* functions |
5830
|
|
|
|
|
|
|
3: Datetime format detected, if ever, in the format Strptime |
5831
|
|
|
|
|
|
|
4: Locale of DateTime format detected, if ever |
5832
|
|
|
|
|
|
|
5: Multiline field: '1' if not, 'm' if newlines encountered in the field |
5833
|
|
|
|
|
|
|
|
5834
|
|
|
|
|
|
|
=head2 get_pass_count |
5835
|
|
|
|
|
|
|
|
5836
|
|
|
|
|
|
|
my $n = $csv->get_pass_count(); |
5837
|
|
|
|
|
|
|
|
5838
|
|
|
|
|
|
|
Return the number of input readings done. Useful only if you're interested in |
5839
|
|
|
|
|
|
|
Text::AutoCSV internals. |
5840
|
|
|
|
|
|
|
|
5841
|
|
|
|
|
|
|
=head2 get_in_mem_record_count |
5842
|
|
|
|
|
|
|
|
5843
|
|
|
|
|
|
|
my $m = $csv->get_in_mem_record_count(); |
5844
|
|
|
|
|
|
|
|
5845
|
|
|
|
|
|
|
Return the number of records currently stored in-memory. Useful only if you're |
5846
|
|
|
|
|
|
|
interested in Text::AutoCSV internals. |
5847
|
|
|
|
|
|
|
|
5848
|
|
|
|
|
|
|
=head2 get_max_in_mem_record_count |
5849
|
|
|
|
|
|
|
|
5850
|
|
|
|
|
|
|
my $mm = $csv->get_max_in_mem_record_count(); |
5851
|
|
|
|
|
|
|
|
5852
|
|
|
|
|
|
|
Return the maximum number of records ever stored in-memory. Indeed this number |
5853
|
|
|
|
|
|
|
can decrease: certain functions like field_add* member-functions discard |
5854
|
|
|
|
|
|
|
in-memory content. Useful only if you're interested in Text::AutoCSV internals. |
5855
|
|
|
|
|
|
|
|
5856
|
|
|
|
|
|
|
=head2 get_fields_names |
5857
|
|
|
|
|
|
|
|
5858
|
|
|
|
|
|
|
my @f = $csv->get_fields_names(); |
5859
|
|
|
|
|
|
|
|
5860
|
|
|
|
|
|
|
Return an array of the internal names of the columns. |
5861
|
|
|
|
|
|
|
|
5862
|
|
|
|
|
|
|
=head2 get_field_name |
5863
|
|
|
|
|
|
|
|
5864
|
|
|
|
|
|
|
my $name = $csv->get_field_name($n); |
5865
|
|
|
|
|
|
|
|
5866
|
|
|
|
|
|
|
Return the C<$n>-th column name, the first column being number 0. |
5867
|
|
|
|
|
|
|
|
5868
|
|
|
|
|
|
|
Example: |
5869
|
|
|
|
|
|
|
|
5870
|
|
|
|
|
|
|
# Get the field name of the third column |
5871
|
|
|
|
|
|
|
my $col = $csv->get_field_name(2); |
5872
|
|
|
|
|
|
|
|
5873
|
|
|
|
|
|
|
=head2 get_stats |
5874
|
|
|
|
|
|
|
|
5875
|
|
|
|
|
|
|
my %stats = $csv->get_stats(); |
5876
|
|
|
|
|
|
|
|
5877
|
|
|
|
|
|
|
Certain callback functions provide a parameter to record event count: |
5878
|
|
|
|
|
|
|
L</field_add_computed>, L</read_post_update_hr>, L</walker_ar> and |
5879
|
|
|
|
|
|
|
L</walker_hr>. By default, these stats are displayed if Text::AutoCSV got |
5880
|
|
|
|
|
|
|
created with attribute C<verbose =E<gt> 1>. get_stats() returns the statistics |
5881
|
|
|
|
|
|
|
hash of the object. |
5882
|
|
|
|
|
|
|
|
5883
|
|
|
|
|
|
|
B<IMPORTANT> |
5884
|
|
|
|
|
|
|
|
5885
|
|
|
|
|
|
|
As opposed to most functions that trigger input reading automatically (search |
5886
|
|
|
|
|
|
|
functions and other get_* functions), C<get_stats> just returns you the stats as |
5887
|
|
|
|
|
|
|
it is, regardless of whether some execution already occured. |
5888
|
|
|
|
|
|
|
|
5889
|
|
|
|
|
|
|
=head2 get_nb_rows |
5890
|
|
|
|
|
|
|
|
5891
|
|
|
|
|
|
|
my $nb_rows = $csv->get_nb_rows(); |
5892
|
|
|
|
|
|
|
|
5893
|
|
|
|
|
|
|
Gives the number of rows of the input. Does not trigger any reading - just |
5894
|
|
|
|
|
|
|
provides the number of rows as known at the moment of the call. If unknown, |
5895
|
|
|
|
|
|
|
return undef. Typically, the number of rows is known after doing the initial |
5896
|
|
|
|
|
|
|
detection of CSV options (escape character, etc.), or, after doing one complete |
5897
|
|
|
|
|
|
|
reading. |
5898
|
|
|
|
|
|
|
|
5899
|
|
|
|
|
|
|
The header line counts for one row. |
5900
|
|
|
|
|
|
|
|
5901
|
|
|
|
|
|
|
B<IMPORTANT> |
5902
|
|
|
|
|
|
|
|
5903
|
|
|
|
|
|
|
As some fields can contain new lines, this number is not necessarily identical |
5904
|
|
|
|
|
|
|
to the number of lines. |
5905
|
|
|
|
|
|
|
|
5906
|
|
|
|
|
|
|
=head2 set_walker_ar |
5907
|
|
|
|
|
|
|
|
5908
|
|
|
|
|
|
|
$csv->set_walker_ar($subref); |
5909
|
|
|
|
|
|
|
|
5910
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</walker_ar> |
5911
|
|
|
|
|
|
|
attribute. C<set_walker_ar> allows to assign the attribute walker_ar after |
5912
|
|
|
|
|
|
|
object creation. |
5913
|
|
|
|
|
|
|
|
5914
|
|
|
|
|
|
|
See attribute L</walker_ar> for help about the way C<$subref> should work. |
5915
|
|
|
|
|
|
|
|
5916
|
|
|
|
|
|
|
B<Return value> |
5917
|
|
|
|
|
|
|
|
5918
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5919
|
|
|
|
|
|
|
Returns undef if error. |
5920
|
|
|
|
|
|
|
|
5921
|
|
|
|
|
|
|
Example: |
5922
|
|
|
|
|
|
|
|
5923
|
|
|
|
|
|
|
# Calculate the total of the two first columns, the first column being |
5924
|
|
|
|
|
|
|
# money in and the second one being money out. |
5925
|
|
|
|
|
|
|
my ($actif, $passif) = (0, 0); |
5926
|
|
|
|
|
|
|
$csv->set_walker_ar( |
5927
|
|
|
|
|
|
|
sub { |
5928
|
|
|
|
|
|
|
my $ar = $_[0]; $actif += $ar->[0]; $passif += $ar->[1]; |
5929
|
|
|
|
|
|
|
} |
5930
|
|
|
|
|
|
|
) |
5931
|
|
|
|
|
|
|
->read(); |
5932
|
|
|
|
|
|
|
print("Actif = $actif\n"); |
5933
|
|
|
|
|
|
|
print("Passif = $passif\n"); |
5934
|
|
|
|
|
|
|
|
5935
|
|
|
|
|
|
|
=head2 set_walker_hr |
5936
|
|
|
|
|
|
|
|
5937
|
|
|
|
|
|
|
$csv->set_walker_hr($subref); |
5938
|
|
|
|
|
|
|
|
5939
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</walker_hr> |
5940
|
|
|
|
|
|
|
attribute. C<set_walker_hr> allows to assign the attribute L</walker_hr> after |
5941
|
|
|
|
|
|
|
object creation. |
5942
|
|
|
|
|
|
|
|
5943
|
|
|
|
|
|
|
See attribute L</walker_hr> for help about the way C<$subref> should work. |
5944
|
|
|
|
|
|
|
|
5945
|
|
|
|
|
|
|
B<Return value> |
5946
|
|
|
|
|
|
|
|
5947
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5948
|
|
|
|
|
|
|
Returns undef if error. |
5949
|
|
|
|
|
|
|
|
5950
|
|
|
|
|
|
|
Example: |
5951
|
|
|
|
|
|
|
|
5952
|
|
|
|
|
|
|
my $csv = Text::AutoCSV->new(in_file => 'directory.csv', verbose => 1); |
5953
|
|
|
|
|
|
|
|
5954
|
|
|
|
|
|
|
# ... |
5955
|
|
|
|
|
|
|
|
5956
|
|
|
|
|
|
|
$csv->set_walker_hr( |
5957
|
|
|
|
|
|
|
sub { |
5958
|
|
|
|
|
|
|
my ($hr, $stat) = @_; |
5959
|
|
|
|
|
|
|
$stat{'not capital name'}++, return |
5960
|
|
|
|
|
|
|
if $hr->{'NAME'} ne uc($hr->{'NAME'}); |
5961
|
|
|
|
|
|
|
$stat{'name is capital letters'}++; |
5962
|
|
|
|
|
|
|
} |
5963
|
|
|
|
|
|
|
)->read(); |
5964
|
|
|
|
|
|
|
|
5965
|
|
|
|
|
|
|
=head2 set_out_file |
5966
|
|
|
|
|
|
|
|
5967
|
|
|
|
|
|
|
$csv->set_out_file($out_file); |
5968
|
|
|
|
|
|
|
|
5969
|
|
|
|
|
|
|
Normally one wants to define it at object creation time using L</out_file> |
5970
|
|
|
|
|
|
|
attribute. C<set_out_file> allows to assign the attribute L</out_file> after |
5971
|
|
|
|
|
|
|
object creation. It is set to C<$out_file> value. |
5972
|
|
|
|
|
|
|
|
5973
|
|
|
|
|
|
|
B<Return value> |
5974
|
|
|
|
|
|
|
|
5975
|
|
|
|
|
|
|
Returns the object itself in case of success. |
5976
|
|
|
|
|
|
|
Returns undef if error. |
5977
|
|
|
|
|
|
|
|
5978
|
|
|
|
|
|
|
Example: |
5979
|
|
|
|
|
|
|
|
5980
|
|
|
|
|
|
|
$csv->set_out_file('mycopy.csv')->write(); |
5981
|
|
|
|
|
|
|
|
5982
|
|
|
|
|
|
|
=head2 get_keys |
5983
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
5985
|
|
|
|
|
|
|
|
5986
|
|
|
|
|
|
|
Returns an array of all the record keys of input. A record key is a unique |
5987
|
|
|
|
|
|
|
identifier that designates the record. |
5988
|
|
|
|
|
|
|
|
5989
|
|
|
|
|
|
|
At the moment it is just an integer being the record number, the first one (that |
5990
|
|
|
|
|
|
|
comes after the header line) being of number 0. For example if $csv input is |
5991
|
|
|
|
|
|
|
made of one header line and 3 records (that is, a 4-line file typically, if no |
5992
|
|
|
|
|
|
|
record contains a line break), $csv->get_keys() returns: |
5993
|
|
|
|
|
|
|
|
5994
|
|
|
|
|
|
|
(0, 1, 2) |
5995
|
|
|
|
|
|
|
|
5996
|
|
|
|
|
|
|
B<IMPORTANT> |
5997
|
|
|
|
|
|
|
|
5998
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
5999
|
|
|
|
|
|
|
in-memory. |
6000
|
|
|
|
|
|
|
|
6001
|
|
|
|
|
|
|
=head2 get_hr_all |
6002
|
|
|
|
|
|
|
|
6003
|
|
|
|
|
|
|
my @allin = $csv->get_hr_all(); |
6004
|
|
|
|
|
|
|
|
6005
|
|
|
|
|
|
|
Returns an array of all record contents of the input, each record being a hash |
6006
|
|
|
|
|
|
|
ref. |
6007
|
|
|
|
|
|
|
|
6008
|
|
|
|
|
|
|
B<IMPORTANT> |
6009
|
|
|
|
|
|
|
|
6010
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6011
|
|
|
|
|
|
|
in-memory. |
6012
|
|
|
|
|
|
|
|
6013
|
|
|
|
|
|
|
=head2 get_row_ar |
6014
|
|
|
|
|
|
|
|
6015
|
|
|
|
|
|
|
my $row_ar = $csv->get_row_ar($record_key); |
6016
|
|
|
|
|
|
|
|
6017
|
|
|
|
|
|
|
Returns an array ref of the record designated by C<$record_key>. |
6018
|
|
|
|
|
|
|
|
6019
|
|
|
|
|
|
|
Example: |
6020
|
|
|
|
|
|
|
|
6021
|
|
|
|
|
|
|
# Get content (as array ref) of last record |
6022
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
6023
|
|
|
|
|
|
|
my $lastk = $allkeys[-1]; |
6024
|
|
|
|
|
|
|
my $lastrec_ar = $csv->get_row_ar($lastk); |
6025
|
|
|
|
|
|
|
|
6026
|
|
|
|
|
|
|
B<IMPORTANT> |
6027
|
|
|
|
|
|
|
|
6028
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6029
|
|
|
|
|
|
|
in-memory. |
6030
|
|
|
|
|
|
|
|
6031
|
|
|
|
|
|
|
=head2 get_row_hr |
6032
|
|
|
|
|
|
|
|
6033
|
|
|
|
|
|
|
my $row_hr = $csv->get_row_hr($record_key); |
6034
|
|
|
|
|
|
|
|
6035
|
|
|
|
|
|
|
Returns a hash ref of the record designated by C<$record_key>. |
6036
|
|
|
|
|
|
|
|
6037
|
|
|
|
|
|
|
Example: |
6038
|
|
|
|
|
|
|
|
6039
|
|
|
|
|
|
|
# Get content (as hash ref) of first record |
6040
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
6041
|
|
|
|
|
|
|
my $firstk = $allkeys[0]; |
6042
|
|
|
|
|
|
|
my $firstrec_hr = $csv->get_row_hr($firstk); |
6043
|
|
|
|
|
|
|
|
6044
|
|
|
|
|
|
|
B<IMPORTANT> |
6045
|
|
|
|
|
|
|
|
6046
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6047
|
|
|
|
|
|
|
in-memory. |
6048
|
|
|
|
|
|
|
|
6049
|
|
|
|
|
|
|
=head2 get_cell |
6050
|
|
|
|
|
|
|
|
6051
|
|
|
|
|
|
|
my $val = $csv->get_cell($record_key, $field_name); |
6052
|
|
|
|
|
|
|
|
6053
|
|
|
|
|
|
|
Return the value of the cell designated by its record key (C<$record_key>) and |
6054
|
|
|
|
|
|
|
field name (C<$field_name>). |
6055
|
|
|
|
|
|
|
|
6056
|
|
|
|
|
|
|
Example: |
6057
|
|
|
|
|
|
|
|
6058
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
6059
|
|
|
|
|
|
|
my $midk = $allkeys[int($#allkeys / 2)]; |
6060
|
|
|
|
|
|
|
my $midname = $csv->get_cell($midk, 'NAME'); |
6061
|
|
|
|
|
|
|
|
6062
|
|
|
|
|
|
|
Note the above example is equivalent to: |
6063
|
|
|
|
|
|
|
|
6064
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
6065
|
|
|
|
|
|
|
my $midk = $allkeys[int($#allkeys / 2)]; |
6066
|
|
|
|
|
|
|
my $midrec_hr = $csv->get_row_hr($midk); |
6067
|
|
|
|
|
|
|
my $midname = $midrec_hr->{'NAME'}; |
6068
|
|
|
|
|
|
|
|
6069
|
|
|
|
|
|
|
B<IMPORTANT> |
6070
|
|
|
|
|
|
|
|
6071
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6072
|
|
|
|
|
|
|
in-memory. |
6073
|
|
|
|
|
|
|
|
6074
|
|
|
|
|
|
|
=head2 get_values |
6075
|
|
|
|
|
|
|
|
6076
|
|
|
|
|
|
|
my @vals = $csv->get_values($field_name, $opt_filter_subref); |
6077
|
|
|
|
|
|
|
|
6078
|
|
|
|
|
|
|
Return an array made of the values of the given field name (C<$field_name>), for |
6079
|
|
|
|
|
|
|
every records, in the order of the records. |
6080
|
|
|
|
|
|
|
|
6081
|
|
|
|
|
|
|
C<$opt_filter_subref> is an optional subref. If defined, it is called with every |
6082
|
|
|
|
|
|
|
values in turn (one call per value) and only values for which |
6083
|
|
|
|
|
|
|
C<$opt_filter_subref> returned True are included in the returned array. Call to |
6084
|
|
|
|
|
|
|
C<$opt_filter_subref> is done with $_ to pass the value. |
6085
|
|
|
|
|
|
|
|
6086
|
|
|
|
|
|
|
Example: |
6087
|
|
|
|
|
|
|
|
6088
|
|
|
|
|
|
|
my @logins = $csv->get_values('LOGIN"); |
6089
|
|
|
|
|
|
|
|
6090
|
|
|
|
|
|
|
This is equivalent to: |
6091
|
|
|
|
|
|
|
|
6092
|
|
|
|
|
|
|
my @allkeys = $csv->get_keys(); |
6093
|
|
|
|
|
|
|
my @logins; |
6094
|
|
|
|
|
|
|
push @logins, $csv->get_cell($_, 'LOGIN') for (@allkeys); |
6095
|
|
|
|
|
|
|
|
6096
|
|
|
|
|
|
|
Example bis |
6097
|
|
|
|
|
|
|
|
6098
|
|
|
|
|
|
|
# @badlogins is the list of logins that contain non alphanumeric |
6099
|
|
|
|
|
|
|
# characters |
6100
|
|
|
|
|
|
|
my @badlogins = Text::AutoCSV->new(in_file => 'logins.csv') |
6101
|
|
|
|
|
|
|
->get_values('LOGIN', sub { m/[^a-z0-9]/ }); |
6102
|
|
|
|
|
|
|
|
6103
|
|
|
|
|
|
|
This is equivalent to: |
6104
|
|
|
|
|
|
|
|
6105
|
|
|
|
|
|
|
# @badlogins is the list of logins that contain non alphanumeric |
6106
|
|
|
|
|
|
|
# characters This method leads to carrying all values of a given field |
6107
|
|
|
|
|
|
|
# across function calls... |
6108
|
|
|
|
|
|
|
my @badlogins = grep { m/[^a-z0-9]/ } ( |
6109
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'logins.csv')->get_values('LOGIN') |
6110
|
|
|
|
|
|
|
); |
6111
|
|
|
|
|
|
|
|
6112
|
|
|
|
|
|
|
B<IMPORTANT> |
6113
|
|
|
|
|
|
|
|
6114
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6115
|
|
|
|
|
|
|
in-memory. |
6116
|
|
|
|
|
|
|
|
6117
|
|
|
|
|
|
|
=head2 get_recnum |
6118
|
|
|
|
|
|
|
|
6119
|
|
|
|
|
|
|
my $r = $csv->get_recnum(); |
6120
|
|
|
|
|
|
|
|
6121
|
|
|
|
|
|
|
Returns the current record identifier, if a reading is in progress. If no read |
6122
|
|
|
|
|
|
|
is in progress, return undef. |
6123
|
|
|
|
|
|
|
|
6124
|
|
|
|
|
|
|
=head2 in_map |
6125
|
|
|
|
|
|
|
|
6126
|
|
|
|
|
|
|
See L</read_update_after> below. |
6127
|
|
|
|
|
|
|
|
6128
|
|
|
|
|
|
|
=head2 read_update_after |
6129
|
|
|
|
|
|
|
|
6130
|
|
|
|
|
|
|
C<read_update_after> is an alias of C<in_map>. |
6131
|
|
|
|
|
|
|
|
6132
|
|
|
|
|
|
|
$csv->in_map($field, $subref); |
6133
|
|
|
|
|
|
|
|
6134
|
|
|
|
|
|
|
After reading a record from input, update C<$field> by calling C<$subref>. The |
6135
|
|
|
|
|
|
|
value is put in C<$_>. Then the field value is set to the return value of |
6136
|
|
|
|
|
|
|
C<$subref>. |
6137
|
|
|
|
|
|
|
|
6138
|
|
|
|
|
|
|
This feature is originally meant to manage DateTime fields: the input and output |
6139
|
|
|
|
|
|
|
CSVs carry text content, and in-between, the values dealt with are DateTime |
6140
|
|
|
|
|
|
|
objects. |
6141
|
|
|
|
|
|
|
|
6142
|
|
|
|
|
|
|
See L</out_map> for an example. |
6143
|
|
|
|
|
|
|
|
6144
|
|
|
|
|
|
|
=head2 out_map |
6145
|
|
|
|
|
|
|
|
6146
|
|
|
|
|
|
|
See L</write_update_before> below. |
6147
|
|
|
|
|
|
|
|
6148
|
|
|
|
|
|
|
=head2 write_update_before |
6149
|
|
|
|
|
|
|
|
6150
|
|
|
|
|
|
|
C<write_update_before> is an alias of C<out_map>. |
6151
|
|
|
|
|
|
|
|
6152
|
|
|
|
|
|
|
$csv->out_map($field, $subref); |
6153
|
|
|
|
|
|
|
|
6154
|
|
|
|
|
|
|
Before writing C<$field> field content into the output file, pass it through |
6155
|
|
|
|
|
|
|
C<out_map>. The value is put in C<$_>. Then the return value of C<$subref> is |
6156
|
|
|
|
|
|
|
written in the output. |
6157
|
|
|
|
|
|
|
|
6158
|
|
|
|
|
|
|
Example: |
6159
|
|
|
|
|
|
|
|
6160
|
|
|
|
|
|
|
Suppose you have a CSV file with the convention that a number surrounded by |
6161
|
|
|
|
|
|
|
parenthesis is negative. You can register corresponding L</in_map> and |
6162
|
|
|
|
|
|
|
L</out_map> functions. During the processing of data, the field content will be |
6163
|
|
|
|
|
|
|
just a number (positive or negative), while in input and in output, it'll follow |
6164
|
|
|
|
|
|
|
the "negative under parenthesis" convention. |
6165
|
|
|
|
|
|
|
|
6166
|
|
|
|
|
|
|
In the below example, we rely on convention above and add a new field converted |
6167
|
|
|
|
|
|
|
from the original one, that follows the same convention. |
6168
|
|
|
|
|
|
|
|
6169
|
|
|
|
|
|
|
sub in_updt { |
6170
|
|
|
|
|
|
|
return 0 if !defined($_) or $_ eq ''; |
6171
|
|
|
|
|
|
|
my $i; |
6172
|
|
|
|
|
|
|
return -$i if ($i) = $_ =~ m/^\((.*)\)$/; |
6173
|
|
|
|
|
|
|
$_; |
6174
|
|
|
|
|
|
|
} |
6175
|
|
|
|
|
|
|
sub out_updt { |
6176
|
|
|
|
|
|
|
return '' unless defined($_); |
6177
|
|
|
|
|
|
|
return '(' . (-$_) . ')' if $_ < 0; |
6178
|
|
|
|
|
|
|
$_; |
6179
|
|
|
|
|
|
|
} |
6180
|
|
|
|
|
|
|
sub convert { |
6181
|
|
|
|
|
|
|
return ; |
6182
|
|
|
|
|
|
|
} |
6183
|
|
|
|
|
|
|
Text::AutoCSV->new(in_file => 'trans-euros.csv', |
6184
|
|
|
|
|
|
|
out_file => 'trans-devises.csv') |
6185
|
|
|
|
|
|
|
->in_map('EUROS', \&in_updt) |
6186
|
|
|
|
|
|
|
->out_map('EUROS', \&out_updt) |
6187
|
|
|
|
|
|
|
->out_map('DEVISE', \&out_updt) |
6188
|
|
|
|
|
|
|
->field_add_copy('DEVISE', 'EUROS', |
6189
|
|
|
|
|
|
|
sub { sprintf("%.2f", $_ * 1.141593); } ) |
6190
|
|
|
|
|
|
|
->write(); |
6191
|
|
|
|
|
|
|
|
6192
|
|
|
|
|
|
|
=head2 search |
6193
|
|
|
|
|
|
|
|
6194
|
|
|
|
|
|
|
my $found_ar = $csv->search($field_name, $value, \%opts); |
6195
|
|
|
|
|
|
|
|
6196
|
|
|
|
|
|
|
Returns an array ref of all records keys where the field C<$field_name> has the |
6197
|
|
|
|
|
|
|
value C<$value>. |
6198
|
|
|
|
|
|
|
|
6199
|
|
|
|
|
|
|
C<\%opts> is an optional hash ref of options for the search. See help of |
6200
|
|
|
|
|
|
|
L</vlookup> about options. |
6201
|
|
|
|
|
|
|
|
6202
|
|
|
|
|
|
|
B<IMPORTANT> |
6203
|
|
|
|
|
|
|
|
6204
|
|
|
|
|
|
|
An unsuccessful search returns an empty array ref, that is, [ ]. Thus you |
6205
|
|
|
|
|
|
|
B<cannot> check for definedness of C<search> return value to know whether or not |
6206
|
|
|
|
|
|
|
the search found something. |
6207
|
|
|
|
|
|
|
|
6208
|
|
|
|
|
|
|
On the other hand, you can always examine the value C<search(...)-E<gt>[0]>, as |
6209
|
|
|
|
|
|
|
search is always an array ref. If the search found nothing, then, |
6210
|
|
|
|
|
|
|
C<search(...)-E<gt>[0]> is not defined. |
6211
|
|
|
|
|
|
|
|
6212
|
|
|
|
|
|
|
B<IMPORTANT bis> |
6213
|
|
|
|
|
|
|
|
6214
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6215
|
|
|
|
|
|
|
in-memory. |
6216
|
|
|
|
|
|
|
|
6217
|
|
|
|
|
|
|
Example: |
6218
|
|
|
|
|
|
|
|
6219
|
|
|
|
|
|
|
my $linux_os_keys_ar = $csv->search('OS', 'linux'); |
6220
|
|
|
|
|
|
|
|
6221
|
|
|
|
|
|
|
=head2 search_1hr |
6222
|
|
|
|
|
|
|
|
6223
|
|
|
|
|
|
|
my $found_hr = $csv->search_1hr($field_name, $value, \%opts); |
6224
|
|
|
|
|
|
|
|
6225
|
|
|
|
|
|
|
Returns a hash ref of the first record where the field C<$field_name> has the |
6226
|
|
|
|
|
|
|
value C<$value>. |
6227
|
|
|
|
|
|
|
|
6228
|
|
|
|
|
|
|
C<\%opts> is an optional hash ref of options for the search. See help of |
6229
|
|
|
|
|
|
|
L</vlookup> about options. |
6230
|
|
|
|
|
|
|
|
6231
|
|
|
|
|
|
|
Note the options L</value_if_not_found> and L</value_if_ambiguous> are ignored. |
6232
|
|
|
|
|
|
|
If not found, return undef. If the result is ambiguous (more than one record |
6233
|
|
|
|
|
|
|
found) and ignore_ambiguous is set to a false value, return undef. |
6234
|
|
|
|
|
|
|
|
6235
|
|
|
|
|
|
|
The other options are taken into account as for any search: |
6236
|
|
|
|
|
|
|
L</ignore_ambiguous>, L</trim>, L</case>, L</ignore_empty>. |
6237
|
|
|
|
|
|
|
|
6238
|
|
|
|
|
|
|
B<IMPORTANT> |
6239
|
|
|
|
|
|
|
|
6240
|
|
|
|
|
|
|
As opposed to L</search>, an unsuccessful C<search_1hr> will return C<undef>. |
6241
|
|
|
|
|
|
|
|
6242
|
|
|
|
|
|
|
B<IMPORTANT bis> |
6243
|
|
|
|
|
|
|
|
6244
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6245
|
|
|
|
|
|
|
in-memory. |
6246
|
|
|
|
|
|
|
|
6247
|
|
|
|
|
|
|
Example: |
6248
|
|
|
|
|
|
|
|
6249
|
|
|
|
|
|
|
my $hr = $csv->search_1hr('LOGIN', $login); |
6250
|
|
|
|
|
|
|
my $full_name = $hr->{'FIRSTNAME'} . ' ' . $hr->{'LASTNAME'}; |
6251
|
|
|
|
|
|
|
|
6252
|
|
|
|
|
|
|
=head2 vlookup |
6253
|
|
|
|
|
|
|
|
6254
|
|
|
|
|
|
|
my $val = $csv->vlookup($searched_field, $value, $target_field, \%opts); |
6255
|
|
|
|
|
|
|
|
6256
|
|
|
|
|
|
|
Find the first record where C<$searched_field> contains C<$value> and out of |
6257
|
|
|
|
|
|
|
this record, returns the value of C<$target_field>. |
6258
|
|
|
|
|
|
|
|
6259
|
|
|
|
|
|
|
C<\%opts> is optional. It is a hash of options for C<vlookup>: |
6260
|
|
|
|
|
|
|
|
6261
|
|
|
|
|
|
|
=over 4 |
6262
|
|
|
|
|
|
|
|
6263
|
|
|
|
|
|
|
=item trim |
6264
|
|
|
|
|
|
|
|
6265
|
|
|
|
|
|
|
If true, ignore spaces before and after the values to search. |
6266
|
|
|
|
|
|
|
|
6267
|
|
|
|
|
|
|
If option is not present, use L</search_trim> attribute of object (default |
6268
|
|
|
|
|
|
|
value: 1). |
6269
|
|
|
|
|
|
|
|
6270
|
|
|
|
|
|
|
=item case |
6271
|
|
|
|
|
|
|
|
6272
|
|
|
|
|
|
|
If true, do case sensitive searches. |
6273
|
|
|
|
|
|
|
|
6274
|
|
|
|
|
|
|
If option is not present, use L</search_case> attribute of object (default |
6275
|
|
|
|
|
|
|
value: 0). |
6276
|
|
|
|
|
|
|
|
6277
|
|
|
|
|
|
|
=item ignore_empty |
6278
|
|
|
|
|
|
|
|
6279
|
|
|
|
|
|
|
If true, ignore empty values in the search. The consequence is that you won't be |
6280
|
|
|
|
|
|
|
able to find empty values by searching it. |
6281
|
|
|
|
|
|
|
|
6282
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_empty> attribute of object |
6283
|
|
|
|
|
|
|
(default value: 1). |
6284
|
|
|
|
|
|
|
|
6285
|
|
|
|
|
|
|
=item ignore_accents |
6286
|
|
|
|
|
|
|
|
6287
|
|
|
|
|
|
|
If true, ignore accents in searches. For exampe, if C<ignore_accents> is set, a |
6288
|
|
|
|
|
|
|
string like "élémentaire" will match "elementaire". |
6289
|
|
|
|
|
|
|
|
6290
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_accents> attribute of object |
6291
|
|
|
|
|
|
|
(default value: 1). |
6292
|
|
|
|
|
|
|
|
6293
|
|
|
|
|
|
|
B<NOTE> |
6294
|
|
|
|
|
|
|
|
6295
|
|
|
|
|
|
|
This option uses the function L</remove_accents> to build its internal hash |
6296
|
|
|
|
|
|
|
tables. See L</remove_accents> help for more details. |
6297
|
|
|
|
|
|
|
|
6298
|
|
|
|
|
|
|
=item value_if_not_found |
6299
|
|
|
|
|
|
|
|
6300
|
|
|
|
|
|
|
Return value if vlookup finds nothing. |
6301
|
|
|
|
|
|
|
|
6302
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_not_found> attribute of object |
6303
|
|
|
|
|
|
|
(default value: undef). |
6304
|
|
|
|
|
|
|
|
6305
|
|
|
|
|
|
|
=item value_if_found |
6306
|
|
|
|
|
|
|
|
6307
|
|
|
|
|
|
|
Return value if vlookup finds something. |
6308
|
|
|
|
|
|
|
|
6309
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_found> attribute of object |
6310
|
|
|
|
|
|
|
(default value: none). |
6311
|
|
|
|
|
|
|
|
6312
|
|
|
|
|
|
|
This option is to just check whether a value exists, regardless of the target |
6313
|
|
|
|
|
|
|
value found. |
6314
|
|
|
|
|
|
|
|
6315
|
|
|
|
|
|
|
B<NOTE> |
6316
|
|
|
|
|
|
|
|
6317
|
|
|
|
|
|
|
Although the B<$target_field> is ignored when using this option, you must |
6318
|
|
|
|
|
|
|
specify it any way. |
6319
|
|
|
|
|
|
|
|
6320
|
|
|
|
|
|
|
=item value_if_ambiguous |
6321
|
|
|
|
|
|
|
|
6322
|
|
|
|
|
|
|
Return value if vlookup find more than one result. Tune it only if |
6323
|
|
|
|
|
|
|
ignore_ambiguous is unset. |
6324
|
|
|
|
|
|
|
|
6325
|
|
|
|
|
|
|
If option is not present, use L</search_value_if_ambiguous> attribute of object |
6326
|
|
|
|
|
|
|
(default value: undef). |
6327
|
|
|
|
|
|
|
|
6328
|
|
|
|
|
|
|
=item ignore_ambiguous |
6329
|
|
|
|
|
|
|
|
6330
|
|
|
|
|
|
|
If true, then if more than one result is found, silently return the first one. |
6331
|
|
|
|
|
|
|
|
6332
|
|
|
|
|
|
|
If option is not present, use L</search_ignore_ambiguous> attribute of object |
6333
|
|
|
|
|
|
|
(default value: 1). |
6334
|
|
|
|
|
|
|
|
6335
|
|
|
|
|
|
|
=back |
6336
|
|
|
|
|
|
|
|
6337
|
|
|
|
|
|
|
B<IMPORTANT> |
6338
|
|
|
|
|
|
|
|
6339
|
|
|
|
|
|
|
If not yet done, this function causes the input to be read entirely and stored |
6340
|
|
|
|
|
|
|
in-memory. |
6341
|
|
|
|
|
|
|
|
6342
|
|
|
|
|
|
|
Example: |
6343
|
|
|
|
|
|
|
|
6344
|
|
|
|
|
|
|
my $name = $csv->vlookup('LOGIN', $id, 'NAME', |
6345
|
|
|
|
|
|
|
{ value_if_not_found => '<login not found>' }); |
6346
|
|
|
|
|
|
|
|
6347
|
|
|
|
|
|
|
=head2 remove_accents |
6348
|
|
|
|
|
|
|
|
6349
|
|
|
|
|
|
|
my $t = $csv->remove_accents($s); |
6350
|
|
|
|
|
|
|
|
6351
|
|
|
|
|
|
|
Take the string C<$s> as argument and return the string without accents. Uses a |
6352
|
|
|
|
|
|
|
Unicode decomposition followed by removal of every characters that have the |
6353
|
|
|
|
|
|
|
Unicode property C<Nonspacing_Mark>. |
6354
|
|
|
|
|
|
|
|
6355
|
|
|
|
|
|
|
B<NOTE> |
6356
|
|
|
|
|
|
|
|
6357
|
|
|
|
|
|
|
Only accents are removed. It is not a C<whatever-encoding -E<gt> us-ascii> |
6358
|
|
|
|
|
|
|
conversion. For example, the French B<Å> character (o followed by e) or the |
6359
|
|
|
|
|
|
|
German B<Ã> (eszett) are kept as is. |
6360
|
|
|
|
|
|
|
|
6361
|
|
|
|
|
|
|
B<NOTE bis> |
6362
|
|
|
|
|
|
|
|
6363
|
|
|
|
|
|
|
Tested with some latin1 and latin2 characters. |
6364
|
|
|
|
|
|
|
|
6365
|
|
|
|
|
|
|
B<NOTE ter> |
6366
|
|
|
|
|
|
|
|
6367
|
|
|
|
|
|
|
There is no language-level transformation during accents removal. For example |
6368
|
|
|
|
|
|
|
B<Jürgen> is returned as B<Jurgen>, not B<Juergen>. |
6369
|
|
|
|
|
|
|
|
6370
|
|
|
|
|
|
|
This function is not exported by default. |
6371
|
|
|
|
|
|
|
|
6372
|
|
|
|
|
|
|
Example: |
6373
|
|
|
|
|
|
|
|
6374
|
|
|
|
|
|
|
use Text::AutoCSV qw(remove_accents); |
6375
|
|
|
|
|
|
|
my $s = remove_accents("Français: être élémentaire, Tchèque: služba dům"); |
6376
|
|
|
|
|
|
|
die "This script will never die" |
6377
|
|
|
|
|
|
|
if $s ne 'Francais: etre elementaire, Tcheque: sluzba dum'; |
6378
|
|
|
|
|
|
|
|
6379
|
|
|
|
|
|
|
=head1 AUTHOR |
6380
|
|
|
|
|
|
|
|
6381
|
|
|
|
|
|
|
Sébastien Millet <milletseb@laposte.net> |
6382
|
|
|
|
|
|
|
|
6383
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
6384
|
|
|
|
|
|
|
|
6385
|
|
|
|
|
|
|
This software is copyright (c) 2016, 2017 by Sébastien Millet. |
6386
|
|
|
|
|
|
|
|
6387
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
6388
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
6389
|
|
|
|
|
|
|
|
6390
|
|
|
|
|
|
|
=cut |