File Coverage

blib/lib/Text/AutoCSV.pm

Criterion	Covered	Total	%
statement	1480	1730	85.5
branch	686	958	71.6
condition	188	291	64.6
subroutine	111	117	94.8
pod	43	43	100.0
total	2508	3139	79.9

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl
2							# ABSTRACT: helper module to automate the use of Text::CSV
3
4							# vim:tw=100
5
6							# Text/AutoCSV.pm
7
8							#
9							# Written by SÃ©bastien Millet
10							# March, July, August, September 2016
11							# January, February 2017
12							#
13
14							package Text::AutoCSV;
15							$Text::AutoCSV::VERSION = '1.1.9';
16							my $PKG = "Text::AutoCSV";
17
18	20			20		879687	use strict;
	20					65
	20					510
19	17			17		83	use warnings;
	17					33
	17					904
20
21							require Exporter;
22							our @ISA = 'Exporter';
23							our @EXPORT_OK = qw(remove_accents);
24
25	17			17		91	use Carp;
	17					33
	17					995
26	17			17		5429	use Params::Validate qw(validate validate_pos :types);
	17					112305
	17					3238
27	17			17		5606	use List::MoreUtils qw(first_index indexes);
	17					113952
	17					156
28	17			17		11361	use Fcntl qw(SEEK_SET);
	17					34
	17					791
29	17			17		4861	use File::BOM;
	17					398401
	17					868
30	17			17		7409	use Text::CSV;
	17					231961
	17					780
31	17			17		9579	use DateTime;
	17					6663742
	17					958
32							# DateTime::Format::Strptime 1.70 does not work properly with us.
33							# Actually all version as of 1.63 are fine, except 1.70.
34	17			17		8364	use DateTime::Format::Strptime 1.71;
	17					885728
	17					125
35	17			17		7995	use Class::Struct;
	17					25020
	17					102
36	17			17		7503	use Unicode::Normalize;
	17					26892
	17					988
37							# lock_keys is used to prevent accessing non existing keys
38							# Credits: 3381159 on http://stackoverflow.com
39							# "make perl shout when trying to access undefined hash key"
40	17			17		5396	use Hash::Util qw(lock_keys);
	17					33754
	17					97
41
42							# FIXME
43							# Not needed in release -> should be always commented unless at dev time
44							#use feature qw(say);
45							#use Data::Dumper;
46							#$Data::Dumper::Sortkeys = 1;
47
48							# Set to 1 if you wish to turn on debug without touching caller's code
49							our $ALWAYS_DEBUG = 0;
50
51							# Keep it set to 0 unless you know what you're doing!
52							# Note
53							# Taken into account only if debug is set.
54							my $DEBUG_DATETIME_FORMATS = 0;
55							# The below is taken into account only if $DEBUG_DATETIME_FORMATS is set.
56							# It'll resqult in even more debug output. It becomes really MASSIVE debug output.
57							my $DEBUG_DATETIME_FORMATS_EVEN_MORE = 0;
58
59							#
60							# Uncomment to replace carp and croak with cluck and confess, respectively
61							# Also reachable with perl option:
62							# -MCarp=verbose
63							# See 'perldoc Carp'.
64							#
65							#$Carp::Verbose = 1;
66
67
68							# * *************** *
69							# * BEHAVIOR TUNING *
70							# * *************** *
71
72
73							# * **************************************************** *
74							# * ALL THE VARIABLES BELOW ARE RATHER LOW LEVEL. *
75							# * IF YOU UPDATE IT, IT WILL LIKELY BREAK THE TEST PLAN *
76							# * **************************************************** *
77
78							my $DEF_SEARCH_CASE = 0; # Case insensitive search by default
79							my $DEF_SEARCH_TRIM = 1; # Trim values by default
80							my $DEF_SEARCH_IGNORE_ACCENTS = 1; # Ignore accents
81							my $DEF_SEARCH_IGNORE_EMPTY = 1; # Ignore empty strings in searches by default
82							my $DEF_SEARCH_VALUE_IF_NOT_FOUND = undef; # If not found, returned field value is undef
83							my $DEF_SEARCH_VALUE_IF_AMBIGUOUS = undef; # If more than one record found by search (when a
84							# unique value is expected), return undef
85							my $DEF_SEARCH_IGNORE_AMBIGUOUS = 1; # By default, ignore the fact that multiple records are
86							# found by search and return the first record found
87
88							my $DETECT_ENCODING = 1;
89
90							my $DEFAULT_IN_ENCODING = 'UTF-8,latin1';
91
92							# By default, input encoding detected is used for output.
93							# -> the constant below is used if and only if:
94							# Inbound encoding is unknown
95							# No providing of out_encoding attribute (out_encoding takes precedence when provided)
96							my $DEFAULT_OUT_ENCODING = 'UTF-8';
97
98							my $DEFAULT_ESCAPE_CHAR = '\\';
99							my $DEFAULT_QUOTE_CHAR = '"';
100
101							#
102							# The code that workarounds $FIX_PERLMONKS_823214 (see below) makes sense only under plain
103							# Windows.
104							#
105							# "Plain" Windows?
106							# This code MUST NOT be executed under cygwin because cygwin uses unix line breaks. This is
107							# why we detect /mswin/. Would we detect /win/, we'd catch cygwin, too, and we don't want
108							# that.
109							#
110							my $OS_IS_PLAIN_WINDOWS = !! ($^O =~ /mswin/i);
111
112							#
113							# Shall we fix the issue reported as #823214 in PerlMonks? See
114							# http://www.perlmonks.org/?node_id=823214
115							#
116							# In brief (in case the link above would be broken one day):
117							# Under Windows, output mode set to UTF-16LE produces line breaks made of octets "0d 0a 00",
118							# whereas it should be "0d 00 0a 00".
119							#
120							# The code also fixes UTF-16BE (but it was not tested).
121							#
122							my $FIX_PERLMONKS_823214 = 1;
123
124
125							# * **** *
126							# * CODE *
127							# * **** *
128
129
130							sub ERR_UNKNOWN_FIELD() { 0 }
131
132							# Store meta-data about each column
133							struct ColData => {
134							field_name => '$',
135							header_text => '$',
136							description => '$',
137							dt_format => '$',
138							dt_locale => '$',
139							multiline => '$'
140							};
141
142							#
143							# Enumeration of ef_type member below
144							# Alternative:
145							# use enum (...)
146							#
147							# But it is not also by default on my distro and installing a package for 3 constants, I find it
148							# a bit overkill!
149							#
150							my ($EF_LINK, $EF_FUNC, $EF_COPY) = 0..2;
151							struct ExtraField => {
152							ef_type => '$',
153							self_name => '$',
154							description => '$',
155
156							check_field_existence => '$',
157
158							# For when ef_type is set to $EF_LINK
159
160							link_self_search => '$',
161							link_remote_obj => '$',
162							link_remote_search => '$',
163							link_remote_read => '$',
164							link_vlookup_opts => '%',
165
166							# For when ef_type is set to $EF_FUNC
167
168							func_sub => '$',
169
170							# For when ef_type is set to $EF_COPY
171
172							copy_source => '$',
173							copy_sub => '$'
174
175							};
176
177							my $SEARCH_VALIDATE_OPTIONS = {
178							value_if_not_found => {type => UNDEF \| SCALAR, optional => 1},
179							value_if_found => {type => UNDEF \| SCALAR, optional => 1},
180							value_if_ambiguous => {type => UNDEF \| SCALAR, optional => 1},
181							ignore_ambiguous => {type => BOOLEAN, optional => 1},
182							case => {type => BOOLEAN, optional => 1},
183							trim => {type => BOOLEAN, optional => 1},
184							ignore_empty => {type => BOOLEAN, optional => 1},
185							ignore_accents => {type => BOOLEAN, optional => 1}
186							};
187
188							sub _is_utf8 {
189	424			424		828	my $e = shift;
190
191	424	100				3130	return 1 if $e =~ m/^(utf-?8\|ucs-?8)/i;
192	32					94	return 0;
193							}
194
195							# To replace // in old perls: return the first non-undef value in provided list
196							sub _get_def {
197	12079			12079		19554	for (@_) {
198	18709	100				40127	return $_ if defined($_);
199							}
200	895					1451	return undef;
201							}
202
203							sub _print {
204	15			15		20	my $self = shift;
205	15					21	my $t = shift;
206
207	15					26	my $infoh = $self->{infoh};
208	15	50				27	return if ref $infoh ne 'GLOB';
209
210	0					0	print($infoh $t);
211							}
212
213							sub _printf {
214	17			17		21	my $self = shift;
215
216	17					20	my $infoh = $self->{infoh};
217	17	50				53	return if ref $infoh ne 'GLOB';
218
219	0					0	printf($infoh @_);
220							}
221
222							sub _print_warning {
223	67			67		103	my $self = shift;
224	67					93	my $warning_message = shift;
225	67					93	my $dont_wrap = shift;
226
227	67	100				161	my $msg = ($dont_wrap ? $warning_message : "$PKG: warning: $warning_message");
228	67	100				4313	carp $msg unless $self->{quiet};
229							}
230
231							sub _close_inh {
232	316			316		467	my $self = shift;
233
234	316	100				3175	close $self->{_inh} if $self->{_close_inh_when_finished};
235	316					1218	$self->{_inh} = undef;
236	316					602	$self->{_close_inh_when_finished} = undef;
237							}
238
239							sub _close_outh {
240	127			127		197	my $self = shift;
241
242	127	50	66			11912	close $self->{outh} if defined($self->{outh}) and $self->{_close_outh_when_finished};
243	127					413	$self->{outh} = undef;
244	127					278	$self->{_close_outh_when_finished} = undef;
245							}
246
247							sub _print_error {
248	81			81		316	my ($self, $error_message, $dont_stop, $err_code, $err_extra) = @_;
249
250	81					194	my $msg = "$PKG: error: $error_message";
251
252	81	100	100			267	if (defined($err_code) and !$self->{quiet} and $self->{croak_if_error}) {
			100
253	5	50				15	if ($err_code == ERR_UNKNOWN_FIELD) {
254	5					10	my %f = %{$err_extra};
	5					20
255	5					9	my @cols;
256	5					18	for my $n (keys %f) {
257	15					30	$cols[$f{$n}] = $n;
258							}
259	5					17	$self->_print($self->get_in_file_disp() . " column - field name correspondance:\n");
260	5					13	$self->_print("COL # FIELD\n");
261	5					13	$self->_print("----- -----\n");
262	5					15	for my $i (0..$#cols) {
263	17	100				45	$self->_printf("%05d %s\n", $i, (defined($cols[$i]) ? $cols[$i] : ''));
264							}
265							} else {
266	0					0	confess "Unknown error code: '$err_code'\n";
267							}
268							}
269
270	81	100	100			287	if ($self->{croak_if_error} and !$dont_stop) {
271	30					92	$self->_close_read(1);
272	30					84	$self->_close_inh();
273	30					72	$self->_close_outh();
274	30					91	$self->_status_reset(1);
275	30					4526	croak $msg;
276							}
277	51					132	$self->_print_warning($msg, 1);
278							}
279
280							#
281							# Return the string passed in argument with all accents removed from characters.
282							# Do it in a rather general and reliable way, not tied to latin1.
283							# Tested on latin1 and latin2 character sets.
284							#
285							# Credits:
286							# http://stackoverflow.com/questions/17561839/remove-accents-from-accented-characters
287							#
288							sub remove_accents {
289	3163			3163	1	25231	validate_pos(@_, {type => SCALAR});
290
291	3163					7569	my $s = $_[0];
292	3163					11025	my $r = NFKD($s);
293	3163					7004	$r =~ s/\p{Nonspacing_Mark}//g;
294	3163					6659	return $r;
295							}
296
297							sub _detect_csv_sep {
298	262			262		435	my $ST_OUTSIDE = 0;
299	262					435	my $ST_INSIDE = 1;
300
301	262					605	my ($self, $escape_char, $quote_char, $sep) = @_;
302
303	262					491	my $_debugh = $self->{_debugh};
304	262					425	my $inh = $self->{_inh};
305	262					413	my $_debug = $self->{_debug};
306
307	262					401	delete $self->{_inh_header};
308
309	262	100				596	$escape_char = $DEFAULT_ESCAPE_CHAR unless defined($escape_char);
310
311	262	50				612	$self->_print_error("illegal \$escape_char: '$escape_char' (length >= 2)"), return 0
312							if length($escape_char) >= 2;
313
314	262	50				528	$self->_print_error("$PKG: error: illegal \$quote_char '$quote_char' (length >= 2)"), return 0
315							if length($quote_char) >= 2;
316
317	262	50				553	$escape_char = '--' if $escape_char eq '';
318	262	50				511	$quote_char = '--' if $quote_char eq '';
319
320							# FIXME (?)
321							# Avoid inlined magic values for separator auto-detection.
322							# Issue is, as you can see below, the behavior is also hard-coded and not straightforward to
323							# render 'tunable' ("," and ";" take precedence over "\t").
324	262					857	my %Seps = (
325							";" => 0,
326							"," => 0,
327							"\t" => 0
328							);
329
330	262					2921	my $h = <$inh>;
331	262	50				2566	if ($self->{inh_is_stdin}) {
332	0					0	$self->{_inh_header} = $h;
333	0	0				0	print($_debugh "Input is STDIN => saving header line to re-read it " .
334							"later (in-memory)\n") if $_debug;
335							} else {
336	262					1974	seek $inh, 0, SEEK_SET;
337	262	50				699	print($_debugh "Input is not STDIN => using seek function to rewind " .
338							"read head after header line reading\n") if $_debug;
339							}
340
341	262					581	chomp $h;
342	262					439	my $status = $ST_OUTSIDE;
343	262					587	my $l = length($h);
344	262					392	my $c = 0;
345	262					600	while ($c < $l) {
346	4808					6085	my $ch = substr($h, $c, 1);
347	4808					5203	my $chnext = '';
348	4808	100				8032	$chnext = substr($h, $c + 1, 1) if ($c < $l - 1);
349	4808	100				7524	if ($status == $ST_INSIDE) {
		50
350	1521	50	66			2933	if ($ch eq $escape_char and $chnext eq $quote_char) {
		100
351	0					0	$c += 2;
352							} elsif ($ch eq $quote_char) {
353	197					231	$status = $ST_OUTSIDE;
354	197					306	$c++;
355							} else {
356	1324					1982	$c++;
357							}
358							} elsif ($status == $ST_OUTSIDE) {
359	3287	50	33			7145	if ($ch eq $escape_char and ($chnext eq $quote_char or
		100	66
		100
360							exists $Seps{$chnext})) {
361	0					0	$c += 2;
362							} elsif (exists $Seps{$ch}) {
363	946					1175	$Seps{$ch}++;
364	946					1410	$c++;
365							} elsif ($ch eq $quote_char) {
366	197					226	$status = $ST_INSIDE;
367	197					315	$c++;
368							} else {
369	2144					3386	$c++;
370							}
371							}
372							}
373
374	262	100	100			1342	if ($Seps{";"} == 0 and $Seps{","} >= 1) {
		100	66
		50	33
			33
375	147					260	$$sep = ",";
376	147					636	return 1;
377							} elsif ($Seps{","} == 0 and $Seps{";"} >= 1) {
378	106					183	$$sep = ";";
379	106					434	return 1;
380							} elsif ($Seps{","} == 0 and $Seps{";"} == 0 and $Seps{"\t"} >= 1) {
381	0					0	$$sep = "\t";
382	0					0	return 1;
383							} else {
384
385							# Check the case where there is one unique column, in which case,
386							# assume comma separator.
387	9					19	my $h_no_accnt = remove_accents($h);
388	9	100				56	if ($h_no_accnt =~ m/^[[:alnum:]_]+$/i) {
389	3					8	$$sep = ",";
390	3					15	return 1;
391							}
392
393	6					10	$$sep = "";
394	6	50				13	if ($_debug) {
395	0					0	for my $k (keys %Seps) {
396	0					0	print($_debugh "\$Seps{'$k'} = $Seps{$k}\n");
397							}
398							}
399	6					23	return 0;
400							}
401							}
402
403							sub _reopen_input {
404	652			652		964	my $self = shift;
405
406	652					967	my $in_file = $self->{in_file};
407
408	652					804	my $inh;
409	652	50				13337	if (!open($inh, "<", $in_file)) {
410	0					0	$self->_print_error("unable to open file '$in_file': $!");
411	0					0	return undef;
412							}
413	652	50				1847	if (!$self->{_leave_encoding_alone}) {
414
415							confess "Oups! _inh_encoding_string undef?"
416	652	50				1329	unless defined($self->{_inh_encoding_string});
417
418	652					4216	binmode $inh, $self->{_inh_encoding_string};
419							}
420
421	652					29166	return $inh;
422							}
423
424							# Abstraction layer, not useful Today, could bring added value when looking into Text::CSV I/O
425							sub _mygetline {
426	14863			14863		26338	my ($csvobj, $fh) = @_;
427
428	14863					300278	return $csvobj->getline($fh);
429							}
430
431							sub _detect_meta {
432	456			456		955	my ($self, $quote_char, $sep_char) = @_;
433
434	456					755	my $in_file = $self->{in_file};
435	456					688	my $_debug = $self->{_debug};
436	456					693	my $_debugh = $self->{_debugh};
437
438	456	100				896	return if $self->{_int_one_pass};
439	436	100				1018	return if $self->{_detect_meta_done}; # Sans jeu de mot...
440
441	300	100				717	if (!defined($self->{escape_char})) {
442	298					742	$self->_register_pass("detect escape character");
443
444	298					418	my $flag = 0;
445	298					693	my $inh = $self->_reopen_input();
446	298	50				752	if (defined($inh)) {
447	298					3892	while (my $l = <$inh>) {
448	7131					16218	chomp $l;
449
450							# Very heuristic criteria...
451							# Tant pis.
452							# $flag = 1 if $l =~ m/(?<!$sep_char)$quote_char$quote_char(?!$sep_char)/;
453							# $flag = 1 if $l =~ m/(\\$quote_char\|\\\\)/;
454	7131	100				24067	$flag = 1 if $l =~ m/(\\$quote_char)/;
455
456							}
457	298					2174	close $inh;
458							}
459	298	100				1193	$self->{escape_char} = ($flag ? '\\' : '"');
460	298	50				1002	print($_debugh " detected escape_char: '$self->{escape_char}'\n") if $_debug;
461							}
462
463	300	50				634	print($_debugh " using escape_char: '$self->{escape_char}' " .
464							"to further examine input (is_always_quoted, multiline)\n") if $_debug;
465
466	300					444	my $is_always_quoted = 0;
467	300					743	my $inh = $self->_reopen_input();
468	300					484	my @multiline;
469	300	50				707	if (defined($inh)) {
470	300					782	$self->_register_pass("detect is_always_quoted and multiline");
471							my $csv = Text::CSV->new({sep_char => $sep_char,
472							allow_whitespace => 1, binary => 1, auto_diag => 0,
473							quote_char => $quote_char, escape_char => $self->{escape_char},
474	300					3106	keep_meta_info => 1,
475							allow_loose_escapes => 1});
476	300					56555	my $nb_rows = 0;
477	300					490	$is_always_quoted = 1;
478	300					675	while (my $ar = _mygetline($csv, $inh)) {
479	7129					190964	$nb_rows++;
480
481	7129					8760	my @a = @{$ar};
	7129					15824
482	7129					10602	my $e = $#a;
483	7129					12760	for my $i (0..$e) {
484	44737	100				73856	$is_always_quoted = 0 unless $csv->is_quoted($i);
485	44737	100				351542	$multiline[$i] = 1 if $a[$i] =~ m/\n/;
486							}
487							}
488	300					9064	$self->{_nb_rows} = $nb_rows;
489	300					3509	close $inh;
490							}
491	300					1150	$self->{_multiline} = [ @multiline ];
492
493	300	50				720	print($_debugh " is_always_quoted: $is_always_quoted\n") if $_debug;
494	300					665	$self->{_is_always_quoted} = $is_always_quoted;
495
496	300					1405	$self->{_detect_meta_done} = 1;
497							}
498
499							sub _register_pass {
500	1269			1269		2339	my ($self, $pass_name) = @_;
501	1269					2074	my $_debug = $self->{_debug};
502	1269					1808	my $_debugh = $self->{_debugh};
503
504	1269					2043	$self->{_pass_count}++;
505
506	1269	50				2809	return unless $_debug;
507
508	0					0	print($_debugh "Pass #" . $self->{_pass_count} . " ($pass_name) done\n");
509							}
510
511							sub _update_in_mem_record_count {
512	510			510		1093	my ($self, $nonexistent_arg) = @_;
513	510					905	my $_debug = $self->{_debug};
514	510					846	my $_debugh = $self->{_debugh};
515
516	510	50				1056	confess "Hey! what is this second argument?" if defined($nonexistent_arg);
517
518	510					673	my $new_count = $#{$self->{_flat}} + 1;
	510					1086
519
520	510					861	my $updated_max = 0;
521	510	100				1386	if ($new_count > $self->get_max_in_mem_record_count()) {
522	155					486	$self->_set_max_in_mem_record_count($new_count);
523	155					273	$updated_max = 1;
524							}
525
526	510					1247	$self->{_in_mem_record_count} = $new_count;
527	510	50				1238	if ($_debug) {
528	0					0	print($_debugh "_in_mem_record_count updated, set to $new_count");
529	0	0				0	print($_debugh " (also updated max)") if $updated_max;
530	0					0	print($_debugh "\n");
531							}
532							}
533
534							sub _detect_inh_encoding {
535	320			320		1287	my ($self, $enc, $via, $in_file, $detect_enc) = @_;
536	320					672	my $_debug = $self->{_debug};
537	320					508	my $_debugh = $self->{_debugh};
538
539	320	100	66			1101	$enc = $DEFAULT_IN_ENCODING if !defined($enc) or $enc eq '';
540
541	320					2061	my @encodings = split(/\s,\s/, $enc);
542
543	320	50				932	confess "Oups! No encoding to try?" if $#encodings < 0;
544
545	320	50				728	print($_debugh "[ST] _detect_inh_encoding(): start\n") if $_debug;
546
547	320					497	my $wrn = 0;
548	320					777	my $m;
549							my $m0;
550	320					0	my $ee;
551	320					684	for my $e (@encodings) {
552	332					514	$ee = $e;
553	332					692	my $viadef = _get_def($via, '');
554	332					985	$m = ":encoding($e)$viadef";
555	332	100				801	$m0 = $m unless defined($m0);
556
557	332	100				676	last unless $detect_enc;
558
559	320	50	33			1342	confess "Oups! in_file not defined?" if !defined($in_file) or $in_file eq '';
560
561	320	50				641	print($_debugh " Checking encoding '$e' / '$m'\n") if $_debug;
562	320					465	$wrn = 0;
563
564	320					1138	$self->_register_pass("check $e encoding");
565
566	320					522	my $utf8_bom = 0;
567	320	100				733	if (_is_utf8($e)) {
568	296	50				6325	if (open my $fh, '<:raw', $in_file) {
569	296					622	my $bom;
570	296					4840	read $fh, $bom, 3;
571	296	100	66			1554	if (length($bom) == 3 and $bom eq "\xef\xbb\xbf") {
572	12	100				41	if (!defined($via)) {
573	10					34	$m .= ":via(File::BOM)";
574							}
575							}
576	296					2429	close $fh;
577							}
578							}
579
580	320					641	my $inh;
581	320	50				4998	if (!open($inh, "<", $in_file)) {
582	0					0	$self->_print_error("unable to open file '$in_file': $!");
583	0					0	return ($encodings[0], $m0);
584							}
585	16			16		103	binmode $inh, $m;
	16					26
	16					98
	320					3529
586
587							# TURN OFF WARNINGS OUTPUT
588
589							{
590	320					34466	local $SIG{__WARN__} = sub {
591	51			51		892	$wrn++;
592							# Uncomment only for debug!
593							# Otherwise you'll get quite a good deal of output at each execution :-)
594							# print(STDERR @_);
595	320					2319	};
596	320					4433	while (<$inh>) { }
597							}
598
599							# WARNINGS ARE BACK ON
600
601	320					15184	close $inh;
602	320	50				973	print($_debugh " '$m' counts $wrn warning(s)\n") if $_debug;
603
604	320	100				1326	last if $wrn == 0;
605							}
606
607	320	50				793	if ($wrn >= 1) {
608	0					0	$self->_print_warning("encoding warnings encountered during initial check, " .
609							"using '$encodings[0]'");
610	0					0	return ($encodings[0], $m0);
611							}
612
613	320	50				670	confess "Oups! undef encoding string?" unless defined($m);
614
615	320	50				656	print($_debugh " Detected encoding string '$ee' / '$m'\n") if $_debug;
616	320					1325	return ($ee, $m);
617							}
618
619							#
620							# Each of these functions brings status to the next value (current status + 1).
621							# Each of these functions returns 0 if an error occured, 1 if all good
622							#
623							my @status_forward_functions = (
624							"_S1_init_input", # To go from S0 to S1
625							"_S2_init_fields_from_header", # To go form S1 to S2
626							"_S3_init_fields_extra", # To go from S2 to S3
627							"_S4_read_all_in_mem", # To go from S3 to S4
628							);
629
630							sub _status_reset {
631	494			494		770	my $self = shift;
632
633	494					3224	validate_pos(@_, {type => SCALAR, optional => 1});
634	494					1706	my $called_from_print_error = _get_def($_[0], 0);
635
636	494	100	100			1742	if (defined($self->{_status}) and $self->{_status} == 4) {
637	18	100				46	unless ($called_from_print_error) {
638	16					27	my $msg = "in-memory CSV content discarded, will have to re-read input";
639	16					59	$self->_print_warning($msg);
640							}
641	18					159	$self->{_flat} = [ ];
642	18					61	$self->_update_in_mem_record_count();
643							}
644
645	494					993	$self->{_status} = 0;
646	494	100				961	return 0 if $called_from_print_error;
647	464					1069	return $self->_status_forward('S1');
648							}
649
650							sub _status_forward {
651	4084			4084		5417	my $self = shift;
652
653	4084					7464	return $self->___status_move(@_, 1);
654							}
655
656							sub _status_backward {
657	139			139		195	my $self = shift;
658
659	139					310	return $self->___status_move(@_, -1);
660							}
661
662							# You should not call ___status_move() in the code, that is why the name is prefixed with 3
663							# underscores! Only _status_forward and _status_backward should call it.
664							sub ___status_move {
665	4223			4223		7145	my ($self, $target, $step) = @_;
666
667	4223					6270	my $_debug = $self->{_debug};
668	4223					5514	my $_debugh = $self->{_debugh};
669
670	4223	50	66			17189	if (!defined($step) or ($step != -1 and $step != 1)) {
			33
671	0					0	confess "Oups! \$step has a wrong value: '$step'";
672							}
673
674	4223					5201	my $n;
675	4223	50				17241	confess "Oups! illegal status string: '$target'" unless ($n) = $target =~ m/^S(\d)$/;
676
677	4223	100				8602	if ($self->{_read_in_progress}) {
678	1					4	$self->_print_error("illegal call while read is in progress, " .
679							"would lead to infinite recursion", 0);
680	0					0	confess "Aborted.";
681							}
682
683	4222	100				7047	if ($step == -1) {
684	139	100				313	if ($n < $self->{_status}) {
685	19	100				49	if ($self->{_status} == 4) {
686	16	50				40	print($_debugh "[ST] Requested status $n but will go to status 0\n") if $_debug;
687	16					47	return $self->_status_reset();
688							}
689	3					5	$self->{_status} = $n ;
690	3	50				7	print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug;
691							}
692	123					336	return 1;
693							}
694
695	4083	100				8334	if ($self->{_status} < $n) {
696	869	50				1584	print($_debugh "[ST] Current status: ". $self->{_status} . "\n") if $_debug;
697							}
698
699	4083	100	100			10040	if ($self->{_status} <= 1 and $n >= 2 and $self->{_int_one_pass} and
			100
			100
700							$self->get_pass_count() >= 1) {
701	12					23	my $msg = "one_pass set, unable to read input again";
702	12	50				46	$self->_print_error($msg), return 0 if $self->{one_pass};
703	0	0				0	$self->_print_warning($msg) if !$self->{one_pass};
704							}
705
706	4071					7551	while ($self->{_status} < $n) {
707
708	1347					2548	my $funcname = $status_forward_functions[$self->{_status}];
709	1347	50				2661	confess "Oups! Unknown status?" unless defined($funcname);
710
711	1347	50				2236	print($_debugh "[ST] Now executing $funcname\n") if $_debug;
712
713	1347	50				5495	if (my $member_function = $self->can($funcname)) {
714	1347	100				3075	return 0 unless $self->$member_function();
715							} else {
716	0					0	confess "Could not find method $funcname in $PKG!";
717							}
718
719	1328					2485	$self->{_status} += $step;
720	1328	50				4024	print($_debugh "[ST] New status: ". $self->{_status} . "\n") if $_debug;
721							}
722
723	4052					9545	return 1;
724							}
725
726							sub new {
727	333			333	1	255769	my ($class, @args) = @_;
728
729	333					39585	@args = validate(@args,
730							{ in_file => {type => SCALAR, optional => 1},
731							infoh => {type => UNDEF \| GLOBREF, default => \*STDERR, optional => 1},
732							verbose => {type => BOOLEAN, default => 0, optional => 1},
733							quiet => {type => BOOLEAN, optional => 1},
734							croak_if_error => {type => BOOLEAN, default => 1, optional => 1},
735							inh => {type => GLOBREF, optional => 1},
736							in_csvobj => {type => OBJECT, optional => 1},
737							sep_char => {type => SCALAR, optional => 1},
738							quote_char => {type => SCALAR, optional => 1},
739							escape_char => {type => SCALAR, optional => 1},
740							has_headers => {type => BOOLEAN, default => 1, optional => 1},
741							out_has_headers => {type => UNDEF \| BOOLEAN, default => undef, optional => 1},
742							fields_ar => {type => ARRAYREF, optional => 1},
743							fields_hr => {type => HASHREF, optional => 1},
744							fields_column_names => {type => ARRAYREF, optional => 1},
745							search_case => {type => SCALAR, optional => 1},
746							search_trim => {type => SCALAR, optional => 1},
747							search_ignore_empty => {type => SCALAR, optional => 1},
748							search_ignore_accents => {type => SCALAR, optional => 1},
749							search_ignore_ambiguous => {type => SCALAR, optional => 1},
750							search_value_if_not_found => {type => SCALAR, optional => 1},
751							search_value_if_found => {type => SCALAR, optional => 1},
752							search_value_if_ambiguous => {type => SCALAR, optional => 1},
753							walker_hr => {type => CODEREF, optional => 1},
754							walker_ar => {type => CODEREF, optional => 1},
755							read_post_update_hr => {type => CODEREF, optional => 1},
756							write_filter_hr => {type => CODEREF, optional => 1},
757							out_filter => {type => CODEREF, optional => 1},
758							write_fields => {type => ARRAYREF, optional => 1},
759							out_orderby => {type => ARRAYREF, optional => 1},
760							out_fields => {type => ARRAYREF, optional => 1},
761							out_file => {type => SCALAR, optional => 1},
762							out_always_quote => {type => BOOLEAN, optional => 1},
763							out_sep_char => {type => SCALAR, optional => 1},
764							out_quote_char => {type => SCALAR, optional => 1},
765							out_escape_char => {type => SCALAR, optional => 1},
766							out_dates_format => {type => SCALAR, optional => 1},
767							out_dates_locale => {type => SCALAR, optional => 1},
768							encoding => {type => SCALAR, optional => 1},
769							via => {type => SCALAR, optional => 1},
770							out_encoding => {type => SCALAR, optional => 1},
771							dont_mess_with_encoding => {type => BOOLEAN, optional => 1},
772							one_pass => {type => BOOLEAN, optional => 1},
773							no_undef => {type => BOOLEAN, optional => 1},
774							fields_dates => {type => ARRAYREF, optional => 1},
775							fields_dates_auto => {type => BOOLEAN, optional => 1},
776							fields_dates_auto_optimize => {type => BOOLEAN, optional => 1},
777							dates_formats_to_try => {type => ARRAYREF, optional => 1},
778							dates_formats_to_try_supp => {type => ARRAYREF, optional => 1},
779							dates_ignore_trailing_chars => {type => BOOLEAN, optional => 1},
780							dates_search_time => {type => BOOLEAN, optional => 1},
781							dates_locales => {type => SCALAR, optional => 1},
782							out_utf8_bom => {type => SCALAR, optional => 1},
783							dates_zeros_ok => {type => SCALAR, default => 1, optional => 1},
784							_debug => {type => BOOLEAN, default => 0, optional => 1},
785							_debug_read => {type => BOOLEAN, default => 0, optional => 1},
786							_debug_extra_fields => {type => BOOLEAN, optional => 1},
787							_debugh => {type => UNDEF \| GLOBREF, optional => 1}
788							}
789							);
790
791	329					7092	my $self = { @args };
792
793	329					686	my @fields = keys %{$self};
	329					1229
794
795							# croak_if_error
796
797	329					815	my $croak_if_error = $self->{croak_if_error};
798
799							# verbose and _debug management
800
801	329	50				1220	$self->{_debugh} = $self->{infoh} if !defined($self->{_debugh});
802	329	50				879	$self->{_debug} = 1 if $ALWAYS_DEBUG;
803	329					624	my $_debug = $self->{_debug};
804	329	50				714	$self->{verbose} = 1 if $_debug;
805	329					541	my $verbose = $self->{verbose};
806
807	329					502	my $_debugh = $self->{_debugh};
808
809	329					618	bless $self, $class;
810
811							# fields_ar, fields_hr
812
813	329	100				1086	if (defined($self->{fields_ar}) +
814							defined($self->{fields_hr}) +
815							defined($self->{fields_column_names})
816							>= 2) {
817	1					7	$self->_print_error("mixed use of fields_ar, fields_hr and fields_column_names. " .
818							"Use one at a time.");
819							}
820	329	100	100			1050	if (defined($self->{fields_ar}) and !defined($self->{fields_hr})) {
821	2					4	my @f = @{$self->{fields_ar}};
	2					6
822	2					3	my %h;
823	2					6	for my $e (@f) {
824	6					15	$h{$e} = "^$e\$";
825							}
826	2					6	$self->{fields_hr} = \%h;
827							}
828	329	100				749	if (!$self->{has_headers}) {
829	13	100				33	if (defined($self->{fields_ar})) {
830	1					5	$self->_print_error("fields_ar irrelevant if CSV file has no headers");
831	1					175	return undef;
832							}
833	12	100				29	if (defined($self->{fields_hr})) {
834	1					4	$self->_print_error("fields_hr irrelevant if CSV file has no headers");
835	1					5	return undef;
836							}
837							}
838
839							# in_file or inh
840
841	327					733	$self->{_flat} = [ ];
842
843	327					686	$self->{_read_update_after_hr} = { };
844	327					652	$self->{_write_update_before_hr} = { };
845
846	327					1074	$self->_update_in_mem_record_count();
847
848	327	100				882	return undef unless $self->_status_reset();
849
850	318	50				672	$self->_debug_show_members() if $_debug;
851
852	318	100				728	if ($self->{dates_zeros_ok}) {
853							$self->{_refsub_is_datetime_empty} = sub {
854	9637			9637		16324	my $v = $_[0];
855	9637	100				29969	if ($v !~ m/[1-9]/) {
856	4207	100				10364	return 1 if $v =~ m/^[^0:]*0+[^0:]+0+[^0:]+0+/;
857							}
858	9634					30100	return 0;
859							}
860	316					1466	}
861
862	318					5336	return $self;
863							};
864
865							#
866							# Return 0 if error, 1 if all good
867							#
868							# Do all low level activities associated to input:
869							# I/O init
870							# Detect encoding
871							# Detect CSV separator
872							# Detect escape character
873							#
874							sub _S1_init_input {
875	470			470		726	my $self = shift;
876
877	470					786	my $croak_if_error = $self->{croak_if_error};
878	470					751	my $_debug = $self->{_debug};
879	470					675	my $_debugh = $self->{_debugh};
880
881	470	100				952	$self->{in_file} = '' unless defined($self->{in_file});
882	470					946	$self->{_close_inh_when_finished} = 0;
883
884							$self->{_leave_encoding_alone} = $self->{dont_mess_with_encoding}
885	470	50				1027	if defined($self->{dont_mess_with_encoding});
886
887	470					1341	$self->{_int_one_pass} = _get_def($self->{one_pass}, 0);
888	470					878	my $in_file_disp;
889
890
891							#
892							# LOW LEVEL INIT STEP 1 OF 4
893							#
894							# Manage I/O (= in most cases, open input file...)
895							#
896
897	470	100				993	if (defined($self->{inh})) {
898	4	50				13	$self->{_leave_encoding_alone} = 1 unless defined($self->{dont_mess_with_encoding});
899	4					10	$in_file_disp = _get_def($self->{in_file}, '<?>');
900	4	50				12	$self->{_int_one_pass} = 1 unless defined($self->{one_pass});
901	4					8	$self->{_inh} = $self->{inh};
902							} else {
903	466	50				1201	$self->{_leave_encoding_alone} = 0 unless defined($self->{dont_mess_with_encoding});
904	466					803	my $in_file = $self->{in_file};
905	466					670	my $inh;
906	466	50				972	if ($in_file eq '') {
907	0					0	$inh = \*STDIN;
908	0					0	$self->{inh_is_stdin} = 1;
909	0	0				0	$self->{_int_one_pass} = 1 unless defined($self->{one_pass});
910	0					0	$in_file_disp = '<stdin>';
911							} else {
912	466	100				17077	if (!open($inh, '<', $in_file)) {
913	3					52	$self->_print_error("unable to open file '$in_file': $!");
914	3					77	return 0;
915							}
916	463					1180	$in_file_disp = $in_file;
917	463					1080	$self->{_close_inh_when_finished} = 1;
918							}
919	463					1005	$self->{_inh} = $inh;
920							}
921
922	467	50				1021	confess "Oups! in_file_disp not defined?" unless defined($in_file_disp);
923	467					964	$self->{_in_file_disp} = $in_file_disp;
924
925
926							#
927							# LOW LEVEL INIT STEP 2 OF 4
928							#
929							# "Detection" of encoding
930							#
931							# WARNING
932							# As explained in the manual, it is a very partial and limited detection...
933							#
934
935	467	100				1012	unless ($self->{_leave_encoding_alone}) {
936	463	100				1032	unless ($self->{_init_input_already_called}) {
937							my ($e, $m) = $self->_detect_inh_encoding($self->{encoding}, $self->{via},
938	320	100				1815	$self->{in_file}, ($self->{_int_one_pass} ? 0 : $DETECT_ENCODING));
939	320					1083	$self->{_inh_encoding} = $e;
940	320					706	$self->{_inh_encoding_string} = $m;
941							}
942
943	463					3422	binmode $self->{_inh}, $self->{_inh_encoding_string};
944							print($_debugh "Input encoding: '" . $self->{_inh_encoding} . "' / '" .
945	463	50				21442	$self->{_inh_encoding_string} . "'\n") if $_debug;
946
947							}
948
949	467	100				1313	$self->{out_file} = '' unless defined($self->{out_file});
950
951
952							#
953							# LOW LEVEL INIT STEP 3 OF 4
954							#
955							# Detection of CSV separator and escape character
956							#
957
958	467					639	my $sep_char;
959	467					805	my $escape_char = $self->{escape_char};
960	467	100				1180	$self->{quote_char} = $DEFAULT_QUOTE_CHAR unless defined($self->{quote_char});
961	467					776	my $quote_char = $self->{quote_char};
962	467	100				1044	unless (defined($self->{in_csvobj})) {
963	462	100				913	if (defined($self->{sep_char})) {
964	200					395	$sep_char = $self->{sep_char};
965	200	50				473	print($_debugh "-- $in_file_disp: CSV separator set to \"") if $_debug;
966							} else {
967							# The test below (on _init_input_already_called) shoud be useless.
968							# Left for the sake of robustness.
969	262	50				550	unless ($self->{_init_input_already_called}) {
970	262	100				823	if (!$self->_detect_csv_sep($escape_char, $quote_char, \$sep_char)) {
971	6					20	$self->_print_error("'$in_file_disp': cannot detect CSV separator");
972	0					0	return 0;
973							}
974	256	50				567	print($_debugh "-- $in_file_disp: CSV separator detected to \"") if $_debug;
975	256					581	$self->{sep_char} = $sep_char;
976							}
977							}
978	456	50				865	print($_debugh _render($sep_char) . "\"\n") if $_debug;
979
980	456					1433	$self->_detect_meta($quote_char, $sep_char);
981
982							$self->{_in_csvobj} = Text::CSV->new({sep_char => $sep_char,
983							allow_whitespace => 1, binary => 1, auto_diag => 0,
984							quote_char => $quote_char, escape_char => $self->{escape_char},
985	456					3227	allow_loose_escapes => 1});
986	456	50				75000	unless (defined($self->{_in_csvobj})) {
987	0					0	$self->_print_error("error creating input Text::CSV object");
988	0					0	return 0;
989							}
990
991							} else {
992	5					8	$self->{_in_csvobj} = $self->{in_csvobj};
993							}
994
995	461					888	$self->{_init_input_already_called} = 1;
996
997	461					1297	return 1;
998							}
999
1000							sub get_in_file_disp {
1001	3663			3663	1	4836	my $self = shift;
1002
1003	3663					19857	validate_pos(@_);
1004
1005	3663					9455	my $in_file_disp = _get_def($self->{_in_file_disp}, '?');
1006	3663					6690	return $in_file_disp;
1007							}
1008
1009							sub get_sep_char {
1010	0			0	1	0	my $self = shift;
1011
1012	0					0	validate_pos(@_);
1013
1014	0					0	return $self->{sep_char};
1015							}
1016
1017							sub get_escape_char {
1018	9			9	1	37	my $self = shift;
1019
1020	9					53	validate_pos(@_);
1021
1022	9					52	return $self->{escape_char};
1023							}
1024
1025							sub get_in_encoding {
1026	27			27	1	6959	my $self = shift;
1027
1028	27					271	validate_pos(@_);
1029
1030	27					118	return _get_def($self->{_inh_encoding}, '');
1031							}
1032
1033							sub get_is_always_quoted {
1034	15			15	1	64	my $self = shift;
1035
1036	15					88	validate_pos(@_);
1037
1038	15					73	return $self->{_is_always_quoted};
1039							}
1040
1041							sub get_pass_count {
1042	43			43	1	687	my $self = shift;
1043
1044	43					233	validate_pos(@_);
1045
1046	43					137	return _get_def($self->{_pass_count}, 0);
1047							}
1048
1049							sub get_in_mem_record_count {
1050	0			0	1	0	my $self = shift;
1051
1052	0					0	validate_pos(@_);
1053
1054	0					0	return ($self->{_in_mem_record_count}, 0);
1055							}
1056
1057							sub get_max_in_mem_record_count {
1058	513			513	1	2254	my $self = shift;
1059
1060	513					3038	validate_pos(@_);
1061
1062	513					2118	return _get_def($self->{_max_in_mem_record_count}, 0);
1063							}
1064
1065							sub _set_max_in_mem_record_count {
1066	155			155		279	my $self = shift;
1067
1068	155					1537	validate_pos(@_, {type => SCALAR});
1069
1070	155					538	$self->{_max_in_mem_record_count} = $_[0];
1071							}
1072
1073							sub get_fields_names {
1074	19			19	1	3445	my $self = shift;
1075
1076	19					122	validate_pos(@_);
1077
1078	19	50				58	return () unless $self->_status_forward('S3');
1079	19					31	return @{$self->{_columns}};
	19					89
1080							}
1081
1082							sub get_field_name {
1083	1			1	1	475	my $self = shift;
1084
1085	1					11	validate_pos(@_, {type => SCALAR});
1086
1087	1					3	my ($n) = @_;
1088
1089	1	50				3	return undef unless $self->_status_forward('S3');
1090	1					4	return $self->{_columns}->[$n];
1091							}
1092
1093							sub get_coldata {
1094	1			1	1	2	my $self = shift;
1095
1096	1					7	validate_pos(@_);
1097
1098	1	50				8	return () unless $self->_status_forward('S3');
1099	1					2	my @ret;
1100	1					3	for (@{$self->{_coldata}}) {
	1					2
1101	4					135	push @ret, [
1102							$_->field_name,
1103							$_->header_text,
1104							$_->description,
1105							$_->dt_format,
1106							$_->dt_locale,
1107							$_->multiline];
1108							}
1109
1110	1					26	return @ret;
1111							}
1112
1113							sub get_stats {
1114	2			2	1	12	my $self = shift;
1115
1116	2					11	validate_pos(@_);
1117
1118	2	50				7	return () unless defined($self->{_stats});
1119	2					3	return %{$self->{_stats}};
	2					7
1120							}
1121
1122							sub get_nb_rows {
1123	1			1	1	2	my $self = shift;
1124
1125	1					7	validate_pos(@_);
1126
1127	1					3	return $self->{_nb_rows};
1128							}
1129
1130							sub _debug_show_members {
1131	0			0		0	my ($self) = @_;
1132	0					0	my $_debugh = $self->{_debugh};
1133	0	0				0	my @a = @{$self->{fields_ar}} if defined($self->{fields_ar});
	0					0
1134	0	0				0	my @c = @{$self->{fields_column_names}} if defined($self->{fields_column_names});
	0					0
1135	0	0				0	my %h = %{$self->{fields_hr}} if defined($self->{fields_hr});
	0					0
1136
1137	0					0	print($_debugh "-- _debug_show_members() start\n");
1138	0					0	print($_debugh " croak_if_error $self->{croak_if_error}\n");
1139	0					0	print($_debugh " verbose $self->{verbose}\n");
1140	0					0	print($_debugh " _debug $self->{_debug}\n");
1141	0					0	print($_debugh " _debug_read $self->{_debug_read}\n");
1142	0					0	print($_debugh " infoh $self->{infoh}\n");
1143	0					0	print($_debugh " _debugh $_debugh\n");
1144	0					0	print($_debugh " inh: $self->{_inh}\n");
1145	0					0	print($_debugh " in_file_disp " . $self->get_in_file_disp() . "\n");
1146	0					0	print($_debugh " _in_csvobj $self->{_in_csvobj}\n");
1147	0					0	print($_debugh " has_headers $self->{has_headers}\n");
1148	0					0	print($_debugh " fields_ar:\n");
1149	0					0	for my $e (@a) {
1150	0					0	print($_debugh " '$e'\n");
1151							}
1152	0					0	print($_debugh " fields_hr:\n");
1153	0					0	for my $e (keys %h) {
1154	0					0	print($_debugh " '$e' => '$h{$e}'\n");
1155							}
1156	0					0	print($_debugh " fields_column_names:\n");
1157	0					0	for my $e (@c) {
1158	0					0	print($_debugh " '$e'\n");
1159							}
1160	0					0	print($_debugh "-- _debug_show_members() end\n");
1161							}
1162
1163							#
1164							# Check headers in CSV header line
1165							# Used to increase robustness by relying on header title rather than
1166							# column number.
1167							#
1168							# Return 1 if success (all fields found), 0 otherwise.
1169							#
1170							sub _process_header {
1171	12			12		30	my $self = shift;
1172	12					19	my @headers = @{shift(@_)};
	12					43
1173	12					22	my %fields_h = %{shift(@_)};
	12					69
1174	12					26	my $retval = shift;
1175
1176	12					23	my @tmp = keys %{$retval};
	12					51
1177
1178	12					38	my $in_file_disp = $self->get_in_file_disp();
1179
1180	12	50				40	confess '$_[4] must be an empty by-ref hash' if $#tmp >= 0;
1181
1182	12					25	my $e = 0;
1183	12					43	for my $k (keys %fields_h) {
1184	48					79	my $v = $fields_h{$k};
1185
1186	48			462		205	my @all_idx = indexes { /$v/i } @headers;
	462					2576
1187	48	50				164	if ($#all_idx >= 1) {
1188	0					0	$self->_print_error("file $in_file_disp: " .
1189							"more than one column matches the criteria '$v'");
1190	0					0	$e++;
1191							}
1192	48			258		172	my $idx = first_index { /$v/i } @headers;
	258					1541
1193	48	50				206	if ($idx < 0) {
1194	0					0	$self->_print_error("file $in_file_disp: unable to find field '$v'");
1195	0					0	$e++;
1196							} else {
1197	48					117	$retval->{$k} = $idx;
1198							}
1199							}
1200
1201	12	50				73	return ($e >= 1 ? 0 : 1);
1202							}
1203
1204							sub set_walker_hr {
1205	2			2	1	1800	my $self = shift;
1206	2					24	validate_pos(@_, {type => UNDEF \| CODEREF, optional => 1});
1207
1208	2					8	my ($walker_hr) = @_;
1209
1210	2	50				11	return undef unless $self->_status_forward('S2');
1211	2	50				7	return undef unless $self->_status_backward('S2');
1212	2					3	$self->{walker_hr} = $walker_hr;
1213
1214	2					6	return $self;
1215							}
1216
1217							sub set_walker_ar {
1218	2			2	1	1460	my $self = shift;
1219	2					24	validate_pos(@_, {type => UNDEF \| CODEREF, optional => 1});
1220
1221	2					8	my ($walker_ar) = @_;
1222
1223	2	50				6	return undef unless $self->_status_forward('S2');
1224	2	50				5	return undef unless $self->_status_backward('S2');
1225	2					4	$self->{walker_ar} = $walker_ar;
1226
1227	2					4	return $self;
1228							}
1229
1230
1231							# * *************************************** *
1232							# * BEGINNING OF DATE FORMAT DETECTION CODE *
1233							# * *************************************** *
1234
1235
1236							#
1237							# The '%m.%d.%y' is not at its "logical" location. It is done to make sure the order in which
1238							# entries are written does not impact the result.
1239							#
1240							# It could occur because there is some code that correlates an entry containing %y with another
1241							# one that would contain %Y. The %Y will be called the master, the %y will be called the slave.
1242							# It is important to match such entries, otherwise an identified format with %y would always be
1243							# ambiguous with the same written with %Y.
1244							#
1245							# IMPORTANT
1246							# The list below is written almost as-is in the POD at the bottom of this file.
1247							#
1248							my @DATES_DEFAULT_FORMATS_TO_TRY = (
1249							'',
1250							'%Y-%m-%d',
1251							'%Y.%m.%d',
1252							'%Y/%m/%d',
1253
1254							'%m.%d.%y',
1255
1256							'%m-%d-%Y',
1257							'%m.%d.%Y',
1258							'%m/%d/%Y',
1259							'%d-%m-%Y',
1260							'%d.%m.%Y',
1261							'%d/%m/%Y',
1262
1263							'%m-%d-%y',
1264							'%m/%d/%y',
1265							'%d-%m-%y',
1266							'%d.%m.%y',
1267							'%d/%m/%y',
1268
1269							'%Y%m%d%H%M%S',
1270
1271							# Localizaed formats
1272
1273							'%b %d, %Y',
1274							'%b %d %Y',
1275							'%b %d %T %Z %Y',
1276							'%d %b %Y',
1277							'%d %b, %Y'
1278							);
1279
1280							#
1281							# IMPORTANT
1282							# Under Linux, $START is useless. Strptime will match a format exactly as it is, and a tring
1283							# like "01/01/16 13:00:00" won't match with "%T". Under Windows, Strptime is capable of doing
1284							# a match by ignoring characters at the beginning, thus "01/01/2016 13:00:00" for example will
1285							# return success when matched against "%T".
1286							# Possibly it has to do with versionning of Strptime, not Linux versus Windows as such. Any
1287							# way, this difference had to be dealt with.
1288							#
1289							# The flexibility under Windows would screw the code logic so I had to add the prefix string
1290							# below, to avoid unexpected success on match.
1291							#
1292							my $START = '<';
1293
1294							struct RecordCounter => {
1295							count_ok => '$',
1296							count_ko => '$',
1297							has_searched_time => '$',
1298
1299							format => '$',
1300							locale => '$',
1301
1302							has_found_time => '$',
1303							format_with_addition_of_time => '$',
1304							locale_with_addition_of_time => '$',
1305							parser_with_addition_of_time => '$'
1306							};
1307
1308							struct Format => {
1309							id => '$',
1310							format => '$',
1311							locale => '$',
1312							parser => '$',
1313							index_slave => '$',
1314							index_master => '$'
1315							};
1316
1317							sub _col_dispname {
1318	539			539		897	my ($self, $n) = @_;
1319
1320	539					664	my $col;
1321
1322							#
1323							# IMPORTANT
1324							#
1325							# We cannot execute here a command like
1326							# $self->_status_forward('S3');
1327							# (to ensure _columns is well defined) because _col_dispname is called by
1328							# _detect_dates_formats that is in turn called by _S3_init_fields_extra. A call to
1329							# _status_forward would trigger a never-ending call loop.
1330							#
1331	539					1200	my $cols = _get_def($self->{'_columns'}, $self->{'_S2_columns'});
1332
1333	539	50				1000	if ($self->{has_headers}) {
1334	539					860	$col = $cols->[$n];
1335	539	50				874	$col = "<UNDEF>" unless defined($col);
1336							} else {
1337	0					0	$col = "[$n]";
1338							}
1339	539					1061	return $col;
1340							}
1341
1342							# Used by test plan only...
1343							sub _dds {
1344	45			45		1111	my $self = shift;
1345
1346	45	50				107	return undef unless $self->_status_forward('S3');
1347	43	50				145	return undef unless defined($self->{_dates_detailed_status});
1348	43					333	return $self->{_dates_detailed_status};
1349							}
1350
1351							sub _detect_dates_formats {
1352	354			354		536	my $self = shift;
1353
1354	354	100				933	return if $self->{_detect_dates_formats_has_run};
1355	287					545	$self->{_detect_dates_formats_has_run} = 1;
1356	287	100				656	my @fields_dates = @{$self->{fields_dates}} if defined($self->{fields_dates});
	11					34
1357	287	100	100			1348	return unless @fields_dates or $self->{fields_dates_auto};
1358
1359	54	50				157	if ($self->{_int_one_pass}) {
1360	0					0	$self->_print_error("date format detection disallowed when one_pass is set");
1361	0					0	return;
1362							}
1363
1364	54					109	my $_debug = $self->{_debug};
1365	54					92	my $_debugh = $self->{_debugh};
1366	54		33			171	my $debug_fmt = ($_debug and $DEBUG_DATETIME_FORMATS);
1367
1368	54					158	$self->_register_pass("detect date format");
1369
1370							#
1371							# Why re-opening the input?
1372							# I tried two other ways that never worked on some OSes (like freebsd) and/or with older perl
1373							# versions.
1374							#
1375							# 1) The "tell" tactic
1376							# Recording at the beginning of the function the file position with
1377							# my $pos = tell($self->{inh});
1378							# ... and then recalling with a seek instruction is the most logical.
1379							# But it didn't work = sometimes, reading would go back to first row (the headers) instead
1380							# of the second row, could not figure out why (it would work on my Ubuntu 16.04 / perl 5.22, but
1381							# would fail with other OSes and/or perl versions).
1382							#
1383							# 2) The "complete rewind" tactic
1384							# I then undertook to do (at the end of detection function):
1385							# seek $inh, 0, SEEK_SET;
1386							# $incsv->getline($inh) if $self->{has_headers};
1387							# based on the assumption that a seek to zero would behave differently from a seek to an
1388							# arbitrary position.
1389							# But still, it would sometimes fail....
1390							#
1391
1392	54					131	my $inh = $self->_reopen_input();
1393	54					111	my $incsv = $self->{_in_csvobj};
1394	54	50				223	_mygetline($incsv, $inh) if $self->{has_headers};
1395
1396	54					2360	my $formats_to_try = $self->{dates_formats_to_try};
1397	54					95	my $ignore_trailing_chars = $self->{dates_ignore_trailing_chars};
1398	54					105	my $search_time = $self->{dates_search_time};
1399	54					89	my $localizations = $self->{dates_locales};
1400
1401	54					79	my %regular_named_fields = %{$self->{_regular_named_fields}};
	54					437
1402
1403	54					134	my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
1404
1405	54					88	my @fields_to_detect_format;
1406	54	100				195	if (defined($self->{fields_dates})) {
		50
1407	11					21	my $count_field_not_found = 0;
1408	11					18	my %column_seen;
1409	11					18	for my $f (@{$self->{fields_dates}}) {
	11					40
1410	23	100				55	if (!exists $regular_named_fields{$f}) {
1411	1					10	$self->_print_error("fields_dates: unknown field: '$f'",
1412							1, ERR_UNKNOWN_FIELD, { %regular_named_fields } );
1413	1					34	$count_field_not_found++;
1414	1					3	next;
1415							}
1416	22					36	my $n = $regular_named_fields{$f};
1417	22	50				56	if (exists $column_seen{$n}) {
1418	0					0	$self->_print_warning("field '$f' already seen");
1419	0					0	next;
1420							}
1421	22					41	$column_seen{$n} = 1;
1422	22					45	push @fields_to_detect_format, $n;
1423							}
1424	11	100				34	$self->_print_error("non existent field(s) encountered, aborted") if $count_field_not_found;
1425							} elsif ($self->{fields_dates_auto}) {
1426	43					168	my @k = keys %regular_named_fields;
1427	43					193	@fields_to_detect_format = (0..$#k);
1428							} else {
1429	0					0	confess "Hey! check this code, man";
1430							}
1431
1432							#
1433							# FIXME?
1434							# Sort by column number of not?
1435							#
1436							# At this moment in time, the author inclines to answer "yes".
1437							# But I must admit it is rather arbitrary decision for now.
1438							#
1439	53					299	@fields_to_detect_format = sort { $a <=> $b } @fields_to_detect_format;
	753					868
1440
1441	2					5	my @dates_formats_supp = @{$self->{dates_formats_to_try_supp}}
1442	53	100				143	if defined($self->{dates_formats_to_try_supp});
1443
1444	53	100				367	$formats_to_try = [ @DATES_DEFAULT_FORMATS_TO_TRY ] unless defined($formats_to_try);
1445	53					96	$formats_to_try = [ @{$formats_to_try}, @dates_formats_supp ];
	53					219
1446	53					143	my %seen;
1447	53					105	my $f2 = [ ];
1448	53					149	for (@${formats_to_try}) {
1449	978	50				1460	push @{$f2}, $_ unless exists($seen{$_});
	978					1290
1450	978					1693	$seen{$_} = undef;
1451							}
1452	53					122	$formats_to_try = $f2;
1453
1454	53	100				134	$ignore_trailing_chars = 1 unless defined($ignore_trailing_chars);
1455	53	100				153	$search_time = 1 unless defined($search_time);
1456
1457	53	100				122	my $stop = ($ignore_trailing_chars ? '' : '>');
1458
1459							#
1460							# The code below (from # AMB to # AMB-END) aims to remove ambiguity that comes from %Y versus %y.
1461							# That is: provided you have (among others) the formats to try
1462							# '%d-%m-%Y'
1463							# and
1464							# '%d-%m-%y'
1465							# then if parsing 4-digit-year dates (like '31-12-2016'), the two formats will work and you'll end
1466							# up with an ambiguity. To be precise, there'll be no ambiguity if the date is followed by a time,
1467							# but if the date is alone, both formats will work.
1468							#
1469							# Thanks to the below code, the member 'index_slave' (and its counterpart index_master) is populated
1470							# and later, if such an ambiguity is detected, the upper case version (the one containing upper case
1471							# '%Y') will be kept and the other one will be discarded.
1472							#
1473							# NOTE
1474							# Such an ambiguity can exist only when ignore_trailing_chars is set. Otherwise, the remaining two
1475							# digits make the date parsing fail in the '%y' case.
1476							#
1477							# The other members of the 'Format' object are used to work "normally", independently from this
1478							# ambiguity removal feature.
1479							#
1480
1481							# WIP = Work In Progress...
1482	53					147	my @formats_wip;
1483	53	100				134	my @locales = split(/,\s*/, $localizations) if defined($localizations);
1484	53					87	for my $f (@{$formats_to_try}) {
	53					127
1485	978	100				2006	my $has_localized_item = ($f =~ m/%a\|%A\|%b\|%B\|%c\|%\+/ ? 1 : 0);
1486	978	100	100			1634	unless (@locales and $has_localized_item) {
1487	968					1524	push @formats_wip, [$f, ''];
1488	968					1322	next;
1489							}
1490	10					33	push @formats_wip, [$f, $_] foreach @locales;
1491							}
1492
1493							# AMB
1494	53					99	my @formats;
1495							my %mates;
1496	53					151	for my $i (0..$#formats_wip) {
1497	988					2210	my $fstr = $formats_wip[$i]->[0];
1498	988					1415	my $floc = $formats_wip[$i]->[1];
1499
1500							# FIXME
1501							# Will not manage correctly a string like
1502							# '%%Y'
1503							# that means (when used with Strptime), the litteral string '%Y' with no substitution.
1504							# Such cases will be complicated to fix, as it'll require to do a kind-of
1505							# Strptime-equivalent parsing of the string, and I find it a bit overkill.
1506							#
1507							# I prefer to push back in caller world saying
1508							# "Hey, if using constructs like '%%Y', you'll be in trouble."
1509	988					1287	my $m = $fstr;
1510	988					3725	$m =~ s/%y//ig;
1511	988					1696	$m .= $floc;
1512
1513	988					1374	my $index_slave = -1;
1514	988					1150	my $index_master = -1;
1515	988	100				1989	if (exists $mates{$m}) {
1516	262					584	my $alt_fstr = $formats_wip[$mates{$m}]->[0];
1517	262	100				720	my $m_lower = ($fstr =~ m/%y/ ? 1 : 0);
1518	262	100				564	my $m_upper = ($fstr =~ m/%Y/ ? 1 : 0);
1519	262	100				510	my $a_lower = ($alt_fstr =~ m/%y/ ? 1 : 0);
1520	262	100				616	my $a_upper = ($alt_fstr =~ m/%Y/ ? 1 : 0);
1521
1522							# We ignore the weird cases where we'd have both %y and %Y in a format string.
1523
1524	262	100	66			2167	if (!$m_lower and $m_upper and $a_lower and !$a_upper) {
		50	66
			33
			33
			33
			33
1525	45					100	$index_slave = $mates{$m};
1526	45					807	$formats[$mates{$m}]->index_master($i);
1527							} elsif ($m_lower and !$m_upper and !$a_lower and $a_upper) {
1528	217					377	$index_master = $mates{$m};
1529	217					3539	$formats[$mates{$m}]->index_slave($i);
1530							}
1531
1532							} else {
1533	726					1469	$mates{$m} = $i;
1534							}
1535
1536	988					4093	my %strptime_opts = (pattern => $START . $fstr . $stop);
1537	988	100				2313	$strptime_opts{locale} = $floc if $floc ne '';
1538	988	100				5465	my $format = Format->new(
1539							id => "$i",
1540							format => $fstr,
1541							locale => $floc,
1542							parser => ($fstr ne '' ?
1543							DateTime::Format::Strptime->new(%strptime_opts) :
1544							undef),
1545							index_slave => $index_slave,
1546							index_master => $index_master
1547							);
1548	988					1078022	push @formats, $format;
1549							}
1550	53					236	for my $i (0..$#formats) {
1551	988					10442	my $format = $formats[$i];
1552
1553							# If a master could be itself the slave of another entry, that'd make it a hierarchical
1554							# relation tree with multiple levels. It is not possible, only a direct, unique
1555							# master-slave relation can be managed here.
1556	988	50	66			11977	confess "Inonsistent data, check this module's code urgently!"
1557							if $format->index_slave >= 0 and $format->index_master >= 0;
1558
1559	988	100				21334	if ($format->index_slave >= 0) {
1560	262					4607	my $mate = $formats[$format->index_slave];
1561	262	50	33			4188	if ($mate->index_master != $i or $mate->index_slave != -1) {
1562	0					0	confess "Inonsistent data (2), check this module's code urgently!"
1563							}
1564							}
1565
1566	988	100				20435	if ($format->index_master >= 0) {
1567	262					4330	my $mate = $formats[$format->index_master];
1568	262	50	33			4128	if ($mate->index_slave != $i or $mate->index_master != -1) {
1569	0					0	confess "Inonsistent data (3), check this module's code urgently!"
1570							}
1571							}
1572
1573							}
1574	53	50				464	if ($debug_fmt) {
1575	0					0	for (@formats) {
1576	0					0	my ($idx, $rel) = (-1, "");
1577	0	0				0	$idx = $_->index_slave, $rel = "S: " if $_->index_slave >= 0;
1578	0	0				0	$idx = $_->index_master, $rel = "M: " if $_->index_master >= 0;
1579	0					0	printf($_debugh "%-18s %s %2d", "'" . $_->format . "'", $rel, $idx);
1580	0	0				0	print($_debugh ": '" . $formats[$idx]->format . "'") if $idx >= 0;
1581	0					0	print($_debugh "\n");
1582							}
1583							}
1584							# AMB-END
1585
1586	53					118	my %records;
1587							my $record_number;
1588	53					92	my $count_gotit = 0;
1589	53					74	my $count_ambiguous = 0;
1590	53					85	my $count_nodate = 0;
1591	53					80	my $count_empty = 0;
1592	53					72	my $has_signaled_can_start_recording_data = 0;
1593	53					140	$self->{_line_after_which_recording_can_start} = 0;
1594
1595							#
1596							# Seems a weird optimization here, but it is very important.
1597							# In some cases, divides execution time (to detect date format on big files
1598							# containing numerous fields) by 10.
1599							#
1600							# When evaluates to true, it means the input column has no identified date format, meaning,
1601							# no further check to do.
1602							#
1603	53					96	my @cache_nodate;
1604
1605	53					185	while (my $f = _mygetline($incsv, $inh)) {
1606	4650					145432	$record_number++;
1607
1608	4650	50				8680	if ($debug_fmt) {
1609	0					0	print($_debugh "RECORD $record_number:\n");
1610	0					0	for (0 .. @$f - 1) { printf($_debugh " %02d: '%s'\n", $_, $f->[$_]); }
	0					0
1611							}
1612
1613	4650					8426	for my $n (@fields_to_detect_format) {
1614	31445	100				53619	next if $cache_nodate[$n];
1615
1616	16908					23080	my $v = $f->[$n];
1617	16908	100				25435	$v = '' unless defined($v);
1618	16908	100				30164	next if $v eq '';
1619	9406	100	100			24886	next if defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($v);
1620
1621	9403	50				16351	if ($debug_fmt) {
1622	0					0	my $col = $self->_col_dispname($n);
1623	0					0	print($_debugh "Line $record_number, column '$col':\n");
1624							}
1625
1626	9403					16398	for my $fmt (@formats) {
1627	158464					1938371	my $fid = $fmt->id;
1628	158464					2580620	my $fstr = $fmt->format;
1629
1630	158464	50				961742	$self->_debug_output_fmt('** pre ', $fmt, $records{$n}->{$fid}) if $debug_fmt;
1631
1632							$records{$n}->{$fid} = RecordCounter->new(
1633							count_ok => 0,
1634							count_ko => 0,
1635							has_searched_time => 0,
1636
1637							format => undef,
1638							locale => undef,
1639
1640							has_found_time => 0,
1641							format_with_addition_of_time => undef,
1642							locale_with_addition_of_time => undef,
1643							parser_with_addition_of_time => undef
1644	158464	100				402666	) unless defined($records{$n}->{$fid});
1645
1646	158464	100				2396526	unless ($records{$n}->{$fid}->count_ko) {
1647	19561					156101	my $is_ok = &_try_parser($fmt, $records{$n}->{$fid}, $START . $v . $stop);
1648
1649	19561	100				39790	if (!$is_ok) {
1650	7789					10262	my $give_up_time = 0;
1651	7789	100	66			97784	if ($records{$n}->{$fid}->count_ko == 0 and
			100
1652							$records{$n}->{$fid}->has_searched_time and
1653							$records{$n}->{$fid}->has_found_time) {
1654	77		100			4020	$give_up_time = (defined($fmt->parser) and
1655							defined($fmt->parser->parse_datetime($START . $v . $stop))
1656							?
1657							1 : 0);
1658	77	100				40222	if ($give_up_time) {
1659	4					74	$records{$n}->{$fid}->has_found_time(0);
1660	4					28	$is_ok = 1;
1661							}
1662							}
1663							}
1664
1665	19561	100	100			213960	if ($is_ok or !$ignore_trailing_chars) {
1666	14159		100			185995	my $incr = (defined($fmt->parser) and $is_ok ? 1: 0);
1667
1668	14159	100				281680	unless ($records{$n}->{$fid}->has_searched_time) {
1669	3156					53415	$records{$n}->{$fid}->has_searched_time(1);
1670
1671							croak "Inconsistent status! Issue in module code not in caller's!"
1672	3156	50				50360	if $records{$n}->{$fid}->count_ok != 0;
1673
1674	3156	100				21664	if ($search_time) {
		100
1675
1676	1980	50				3617	print($_debugh " Search time in '$v', format '$fstr'\n")
1677							if $debug_fmt;
1678
1679	1980					24457	my $t = $self->_guess_time_format($fstr, $fmt->locale, $v, $stop);
1680	1980	100				42084	$records{$n}->{$fid}->has_found_time((defined($t) ? 1 : 0));
1681	1980	100				16509	if (defined($t)) {
		100
1682	254					3868	$records{$n}->{$fid}->format_with_addition_of_time($t->[0]);
1683	254					4761	$records{$n}->{$fid}->locale_with_addition_of_time($t->[1]);
1684	254					4595	$records{$n}->{$fid}->parser_with_addition_of_time($t->[2]);
1685	254					1744	$incr = 1;
1686							} elsif ($fstr eq '') {
1687	200					3084	$records{$n}->{$fid}->count_ko(1);
1688							}
1689							} elsif ($fstr eq '') {
1690	78					932	$records{$n}->{$fid}->count_ko(1);
1691							}
1692
1693							}
1694
1695	14159					240714	$records{$n}->{$fid}->count_ok($records{$n}->{$fid}->count_ok + $incr);
1696
1697	14159	100	100			181498	$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1)
1698							if !$incr and !$is_ok;
1699
1700	14159	100				45090	if ($incr) {
1701							# We remove the slave if master is fine.
1702							# Depending on the order in which parsing got done, the master could
1703							# pop up first, or the slave, that is why we need manage both cases.
1704	9161	100	100			115808	if ($fmt->index_slave >= 0 or $fmt->index_master >= 0) {
1705	6843	100				202853	my $has_slave = ($fmt->index_slave >= 0 ? 1 : 0);
1706	6843	100				119149	my $idx = ($has_slave ? $fmt->index_slave : $fmt->index_master);
1707	6843					114981	my $mate = $formats[$idx]->id;
1708	6843	100				50518	if (exists $records{$n}->{$mate}) {
1709	6689	100				12247	if ($has_slave) {
1710	2239	100				30531	if ($records{$n}->{$mate}->count_ko == 0) {
1711							# I am the master: I remove the slave
1712	2					53	$records{$n}->{$mate}->count_ko(1);
1713							}
1714							} else {
1715	4450	50	66			56764	if ($records{$n}->{$mate}->count_ko == 0 and
			66
1716							$records{$n}->{$mate}->count_ok >= 1 and
1717							$records{$n}->{$fid}->count_ko == 0) {
1718	117					6413	$records{$n}->{$fid}->count_ko(1);
1719							}
1720							}
1721							}
1722							}
1723							}
1724
1725							} else {
1726	5402					69351	$records{$n}->{$fid}->count_ko($records{$n}->{$fid}->count_ko + 1);
1727							}
1728							}
1729
1730	158464	50				1103496	$self->_debug_output_fmt(' post', $fmt, $records{$n}->{$fid}) if $debug_fmt;
1731
1732							}
1733							}
1734
1735	4650					7265	$count_gotit = 0;
1736	4650					6724	$count_ambiguous = 0;
1737	4650					6590	$count_empty = 0;
1738	4650					9138	for my $n (@fields_to_detect_format) {
1739	31445	100				53472	next if $cache_nodate[$n];
1740
1741	16908					21910	my $candidate = 0;
1742	16908					21516	my $tt = 0;
1743	16908					24498	for my $fmt (@formats) {
1744	259112					3130002	my $fid = $fmt->id;
1745	259112					1561608	my $rec = $records{$n}->{$fid};
1746	259112	100				410964	next unless defined($rec);
1747
1748	200290					2343828	my $ok = $rec->count_ok;
1749	200290					3145687	my $ko = $rec->count_ko;
1750
1751	200290	50	66			1352272	confess "Oups. Check this module code urgently!" if $ok == 0 and $ko == 0;
1752	200290					239261	$tt += $ok + $ko;
1753
1754	200290	100	100			407111	$candidate++ if $ok >= 1 and $ko == 0;
1755							}
1756	16908	100				31488	if ($candidate == 1) {
		100
		100
1757	9602					13880	$count_gotit++;
1758							} elsif ($candidate >= 2) {
1759	2269					3620	$count_ambiguous++;
1760							} elsif ($tt != 0) {
1761	230					282	$count_nodate++;
1762	230					423	$cache_nodate[$n] = 1;
1763							} else {
1764	4807					6511	$count_empty++;
1765							}
1766							}
1767
1768	4650	50				8491	if ($debug_fmt) {
1769	0					0	print($_debugh "\$count_gotit = $count_gotit\n");
1770	0					0	print($_debugh "\$count_ambiguous = $count_ambiguous\n");
1771	0					0	print($_debugh "\$count_nodate = $count_nodate\n");
1772	0					0	print($_debugh "\$count_empty = $count_empty\n");
1773							}
1774
1775	4650					7169	my $can_start_recording_data = 0;
1776	4650	100	100			23340	$can_start_recording_data = 1
			100
1777							if $count_gotit + $count_ambiguous + $count_nodate >= 1 and
1778							!$count_ambiguous and !$count_empty;
1779
1780	4650	100	100			27375	if ($can_start_recording_data and !$has_signaled_can_start_recording_data) {
1781	31					60	$has_signaled_can_start_recording_data = 1;
1782
1783	31	50				85	print($_debugh "Can start recording (all dates formats detection closed) " .
1784							"after record #$record_number\n") if $_debug;
1785
1786	31					79	$self->{_line_after_which_recording_can_start} = $record_number;
1787	31	100				113	last unless $self->{fields_dates_auto};
1788	24	100				196	last if $self->{fields_dates_auto_optimize};
1789							}
1790							}
1791
1792	53					3318	close $inh;
1793
1794	53					156	my %dates_detailed_status;
1795							my @dates_formats;
1796	53					109	my $check_empty = 0;
1797	53					86	my $check_nodate = 0;
1798	53					85	my $check_ambiguous = 0;
1799	53					111	my $check_gotit = 0;
1800	53					128	for my $n (@fields_to_detect_format) {
1801	532					662	my @formats_ok;
1802	532					650	my $tt = 0;
1803	532					626	for my $fid (sort keys %{$records{$n}}) {
	532					4453
1804	8465					73766	my $rec = $records{$n}->{$fid};
1805	8465	100	100			99187	if ($rec->count_ok >= 1 and $rec->count_ko == 0) {
1806
1807	298					9864	my ($fstr, $floc) = ($rec->format, $rec->locale);
1808	298	100				6337	($fstr, $floc) = (
1809							$rec->format_with_addition_of_time,
1810							$rec->locale_with_addition_of_time
1811							) if $rec->has_found_time;
1812
1813	298					5808	push @formats_ok, [$fstr, $floc];
1814							}
1815	8465					139405	$tt += $rec->count_ok + $rec->count_ko;
1816							}
1817	532					5100	my $is_ok = 0;
1818	532					631	my $format;
1819	532					820	my $locale = '';
1820	532	100	100			1961	if ($#formats_ok < 0 and $tt == 0) {
		100
		100
1821	16					29	$format = "Z";
1822	16					28	$check_empty++;
1823							} elsif ($#formats_ok < 0) {
1824	230					307	$format = "N";
1825	230					289	$check_nodate++;
1826							} elsif ($#formats_ok > 0) {
1827	12					21	$format = "A";
1828	12					19	$check_ambiguous++;
1829							} else {
1830	274					386	$is_ok = 1;
1831	274					506	$format = $formats_ok[0]->[0];
1832	274					376	$locale = $formats_ok[0]->[1];
1833	274					384	$check_gotit++;
1834							}
1835	532					1111	my $col = $self->_col_dispname($n);
1836
1837	532	50				1589	$dates_detailed_status{$col} = $format unless exists $dates_detailed_status{$col};
1838	532	100	66			2036	$dates_formats[$n] = [ $format, $locale ] if $is_ok and !defined($dates_formats[$n]);
1839							}
1840	53					185	$dates_detailed_status{'.'} = $self->{_line_after_which_recording_can_start};
1841
1842	53	50	66			479	if ($check_empty != $count_empty or $check_nodate != $count_nodate or
			66
			33
1843							$check_ambiguous != $count_ambiguous or $check_gotit != $count_gotit) {
1844							# The below condition can happen with an empty CSV (empty file (no header) or
1845							# only a header line).
1846	1	50	33			37	unless (!$count_empty and !$check_nodate and !$count_nodate and
			33
			33
			33
			33
			33
1847							!$check_ambiguous and !$count_ambiguous and !$check_gotit and !$count_gotit) {
1848	0					0	print(STDERR "\$check_empty = $check_empty\n");
1849	0					0	print(STDERR "\$count_empty = $count_empty\n");
1850	0					0	print(STDERR "\$check_nodate = $check_nodate\n");
1851	0					0	print(STDERR "\$count_nodate = $count_nodate\n");
1852	0					0	print(STDERR "\$check_ambiguous = $check_ambiguous\n");
1853	0					0	print(STDERR "\$count_ambiguous = $count_ambiguous\n");
1854	0					0	print(STDERR "\$check_gotit = $check_gotit\n");
1855	0					0	print(STDERR "\$count_gotit = $count_gotit\n");
1856	0					0	confess "Oups! Check immediately this module code, man!";
1857							}
1858							}
1859
1860	53	50				173	if ($debug_fmt) {
1861							# A very detailed debug output
1862	0					0	for my $n (@fields_to_detect_format) {
1863	0					0	my $col = $self->_col_dispname($n);
1864	0					0	print($_debugh "$col\n");
1865	0					0	printf($_debugh " %-25s %3s %3s\n", "format", "OK", "KO");
1866	0					0	for my $fid (sort keys %{$records{$n}}) {
	0					0
1867	0					0	my $rec = $records{$n}->{$fid};
1868	0					0	my $cc = '';
1869	0	0	0			0	$cc = "(" . $rec->locale . ")" if defined($rec->locale) and $rec->locale ne '';
1870	0					0	printf($_debugh " %-25s %3d %3d\n",
1871							$rec->format . $cc, $rec->count_ok, $rec->count_ko);
1872							}
1873							}
1874							}
1875							# Not a typo - displaying it IN ADDITION to debug output above is done on purpose...
1876	53	50				141	if ($_debug) {
1877							# A shorter (as compared to above) output of outcome of DateTime detection
1878	0					0	print($_debugh "Result of DateTime detection:\n");
1879	0					0	printf($_debugh "%-3s %-25s %-30s %s\n", '###', 'FIELD', 'DATETIME FORMAT',
1880							'DATETIME LOCALE');
1881	0					0	for my $n (@fields_to_detect_format) {
1882	0					0	my ($fmt, $loc) = ('<undef>', '<undef>');
1883	0	0				0	if (defined($dates_formats[$n])) {
1884	0					0	($fmt, $loc) = @{$dates_formats[$n]}[0, 1];
	0					0
1885							}
1886	0					0	printf($_debugh "%03d %-25s %-30s %s\n", $n, $self->_col_dispname($n), $fmt, $loc);
1887							}
1888							}
1889
1890	53	100				177	if (!$self->{fields_dates_auto}) {
1891	10					21	my $e = 0;
1892	10					25	for my $n (@fields_to_detect_format) {
1893	20	100				50	next if defined($dates_formats[$n]);
1894	7					19	$self->_print_error("unable to detect DateTime format of field '" .
1895							$self->_col_dispname($n) . "'", 1);
1896	7					226	$e++;
1897							}
1898	10	100				38	$self->_print_error("$e field(s) encountered with unknown DateTime format") if $e;
1899							}
1900
1901	51					629	$self->{_dates_detailed_status} = { %dates_detailed_status };
1902	51					71913	$self->{_dates_formats} = [ @dates_formats ];
1903							}
1904
1905							sub _debug_output_fmt {
1906	0			0		0	my ($self, $prefix, $fmt, $rec) = @_;
1907
1908	0					0	my $_debugh = $self->{_debugh};
1909
1910	0					0	my ($fstr, $floc) = ($fmt->format, $fmt->locale);
1911	0	0	0			0	($fstr, $floc) = (
1912							'<+T>' . $rec->format_with_addition_of_time,
1913							$rec->locale_with_addition_of_time
1914							) if defined($rec) and $rec->has_found_time;
1915
1916	0					0	my $locstr = '';
1917	0	0	0			0	$locstr = "(" . $floc . ")" if defined($floc) and $floc ne '';
1918
1919	0	0				0	my $tmpok = $rec->count_ok if defined($rec);
1920	0	0				0	$tmpok = '<undef>' unless defined($tmpok);
1921	0	0				0	my $tmpko = $rec->count_ko if defined($rec);
1922	0	0				0	$tmpko = '<undef>' unless defined($tmpko);
1923
1924	0					0	print($_debugh "$prefix (format '$fstr$locstr': OK = $tmpok, KO = $tmpko)\n");
1925							}
1926
1927							# When no parse can be done (parser to test is undef), return 1
1928							sub _try_parser {
1929	19561			19561		36197	my ($fmt, $rec, $value_to_parse) = @_;
1930
1931	19561					251597	my $parser = $fmt->parser;
1932	19561	100				329188	$parser = $rec->parser_with_addition_of_time if $rec->has_found_time;
1933
1934	19561					199756	my $is_ok = 1;
1935	19561	100				63674	$is_ok = (defined($parser->parse_datetime($value_to_parse)) ? 1 : 0) if $parser;
		100
1936
1937	19561	100				6762982	unless (defined($rec->format)) {
1938	8465					145360	$rec->format($fmt->format);
1939	8465					255757	$rec->locale($fmt->locale);
1940							}
1941
1942	19561					268220	return $is_ok;
1943							}
1944
1945							sub _guess_time_format {
1946
1947							# IMPORTANT
1948							# Formats are tested in the order of the list below, and the first one that succeeds stops the
1949							# tests.
1950							# That makes the order of the elements important: %R would match any value that'd also match
1951							# %T, that'd cause to return %R whereas %T would be possible. Same with AM/PM formats. Thus
1952							# the longest patterns appear first.
1953	1980			1980		15671	my @T = (
1954							'%I:%M:%S %p',
1955							'%I:%M %p',
1956							'%I:%M:%S%p',
1957							'%I:%M%p',
1958							'%T',
1959							'%R'
1960							);
1961
1962	1980					4508	my ($self, $format, $locale, $v, $stop) = @_;
1963
1964	1980					3199	my $_debugh = $self->{_debugh};
1965	1980		33			4426	my $debug_fmt = ($self->{_debug} and $DEBUG_DATETIME_FORMATS);
1966
1967	1980	100				4052	return undef if $format =~ /:/;
1968
1969	1976					2690	my $sep;
1970	1976	100				3837	if ($format eq '') {
1971	325					639	$sep = '';
1972							} else {
1973	1651	100				9128	unless ((undef, $sep) = $v =~ /(^\|\d([^0-9:]+))(\d{1,2}):(\d{1,2})(\D\|$)/) {
1974	393	100				852	if ($v =~ /\d{4}:\d{2}(\D\|$)/) {
1975	29					52	$sep = '';
1976							} else {
1977
1978	364	50				644	print($_debugh "_guess_time_format(): separator candidate not found in '$v'\n")
1979							if $debug_fmt;
1980
1981	364					861	return undef;
1982							}
1983							}
1984							}
1985	1612	100				4326	$sep = '' unless defined($sep);
1986
1987							#
1988							# IMPORTANT
1989							#
1990							# The code below allows to successfully detect DateTime format when
1991							# the first lines contain things like:
1992							# Jan 20 2017 2:00AM
1993							# that could lead to a separator set to ' ' while actually it should be ' '. In this case
1994							# if the double-space is kept, then a later value of
1995							# Jan 20 2017 10:00AM
1996							# won't be parsed.
1997							#
1998							# See t/11-bugfix.t, BUG 5, for an explanation of why the line below.
1999							#
2000
2001							# More generic code, but will also break some separators like ' ' (4 spaces)
2002							# $sep = substr($sep, 0, length($sep) - 1) if length($sep) >= 2 and substr($sep, -2) eq ' ';
2003	1612	100				3365	$sep = ' ' if $sep eq ' ';
2004
2005	1612	50				2884	if ($debug_fmt) {
2006	0					0	print($_debugh " _guess_time_format(): Searching time in '$v'\n");
2007							}
2008
2009	1612					2649	for my $t (@T) {
2010	9193					527128	my $increased_format = "$format$sep$t";
2011
2012	9193	50				19654	print($_debugh " _guess_time_format(): Trying format '$increased_format'\n") if $debug_fmt;
2013
2014	9193					26155	my %opts = (pattern => $START . $increased_format . $stop);
2015	9193	100	66			33050	$opts{locale} = $locale if defined($locale) and $locale ne '';
2016	9193					32325	my $parser_of_increased_format = DateTime::Format::Strptime->new(%opts);
2017	9193	100				10184123	next unless defined($parser_of_increased_format->parse_datetime($START . $v . $stop));
2018
2019	254	50				151930	if ($debug_fmt) {
2020	0					0	print($_debugh " _guess_time_format(): found time in '$v'\n");
2021	0					0	print($_debugh " Initial format: '$format'\n");
2022	0					0	print($_debugh " Increased format: '$increased_format'\n");
2023							}
2024
2025	254					1391	return [$increased_format, $locale, $parser_of_increased_format];
2026							}
2027	1358					98908	return undef;
2028							}
2029
2030
2031							# * ********************************* *
2032							# * END OF DATE FORMAT DETECTION CODE *
2033							# * ********************************* *
2034
2035
2036							# Take the string of a header in $_ and replace it with the corresponding field name
2037							sub _header_to_field_name {
2038	1265			1265		1978	$_ = remove_accents($_);
2039	1265					2436	s/[^[:alnum:]_]//gi;
2040	1265					6799	s/^.*$/\U$&/;
2041							}
2042
2043							# Return 0 if error, 1 if all good
2044							sub _S2_init_fields_from_header {
2045	355			355		602	my $self = shift;
2046
2047	355					645	my $has_headers = $self->{has_headers};
2048	355					582	my $_debug = $self->{_debug};
2049	355					553	my $_debugh = $self->{_debugh};
2050
2051	355					828	my $in_file_disp = $self->get_in_file_disp();
2052
2053	355					641	my $inh = $self->{_inh};
2054	355					557	my $incsv = $self->{_in_csvobj};
2055
2056	355					685	$self->{_row_read} = 0;
2057
2058	355					648	my @columns;
2059							my @headers;
2060	355	100				763	if ($has_headers) {
2061
2062							print($_debugh "$PKG: '$in_file_disp': will parse header line to get column names\n")
2063	344	50				812	if $self->{_debug_read};
2064
2065	344					579	$self->{_row_read}++;
2066
2067							print($_debugh "$PKG: '$in_file_disp': will read line #" . $self->{_row_read} . "\n")
2068	344	50				753	if $self->{_debug_read};
2069
2070	344	50				768	if (defined($self->{_inh_header})) {
2071	0					0	my $l = $self->{_inh_header};
2072	0					0	my $inmemh;
2073	0	0				0	if (!open ($inmemh, '<', \$l)) {
2074	0					0	$self->_print_error("can't open header line in-memory. CSV read aborted.");
2075	0					0	return 0;
2076							}
2077	0					0	@headers = @{_mygetline($incsv, $inmemh)};
	0					0
2078							} else {
2079	344					689	my $r = _mygetline($incsv, $inh);
2080	342	50				16073	@headers = @{$r} if defined($r);
	342					1071
2081							}
2082							print($_debugh "Line " . $self->{_row_read} . ":\n--\n" . join('::', @headers) . "\n--\n")
2083	342	50				1114	if $self->{_debug_read};
2084							}
2085
2086	353	100	100			1547	if ($has_headers and !defined($self->{fields_column_names})) {
2087	336					525	my %indexes;
2088	336	100				812	if (defined($self->{fields_hr})) {
2089	12	50				87	if (!$self->_process_header(\@headers, $self->{fields_hr}, \%indexes)) {
2090	0					0	$self->_print_error("missing headers. CSV read aborted.");
2091	0					0	return 0;
2092							}
2093	12	50				44	if ($_debug) {
2094	0					0	print($_debugh " \%indexes:\n");
2095	0					0	for my $k (sort keys %indexes) {
2096	0					0	print($_debugh " \t$k => $indexes{$k}\n");
2097							}
2098							}
2099	12					80	for (sort keys %indexes) {
2100	48	50				84	next if $_ eq '';
2101	48					90	$columns[$indexes{$_}] = $_;
2102							}
2103							} else {
2104	324					764	@columns = @headers;
2105	324					617	map { _header_to_field_name } @columns;
	1265					7496
2106							}
2107							}
2108
2109	353	100				1064	@columns = @{$self->{fields_column_names}} if defined($self->{fields_column_names});
	14					41
2110
2111							# Avoid undef in column names... I prefer empty strings
2112	353	100				702	@columns = map { defined($_) ? $_ : '' } @columns;
	1389					3105
2113
2114	353	50				828	if ($_debug) {
2115	0					0	print($_debugh "-- CSV headers management\n");
2116	0	0				0	if (@columns) {
2117	0					0	printf($_debugh " %-3s %-40s %-40s\n", 'COL', 'CSV Header', 'Hash Key');
2118	0					0	for my $i (0..$#columns) {
2119	0					0	my $h = '';
2120	0	0				0	$h = $headers[$i] if defined($headers[$i]);
2121	0					0	printf($_debugh " %03d %-40s %-40s\n", $i, "'$h'", "'$columns[$i]'");
2122							}
2123							} else {
2124	0					0	print($_debugh " No headers\n");
2125							}
2126							}
2127
2128	353					548	my %regular_named_fields;
2129	353					953	for my $i (0..$#columns) {
2130	1389	100	66			5084	$regular_named_fields{$columns[$i]} = $i if defined($columns[$i]) and $columns[$i] ne '';
2131							}
2132	353					1716	$self->{_regular_named_fields} = { %regular_named_fields };
2133	353					1253	$self->{_S2_columns} = [ @columns ];
2134	353	100				1245	$self->{_S2_headers} = [ @headers ] if $has_headers;
2135
2136	353					1368	return 1;
2137							}
2138
2139							sub out_header {
2140	8			8	1	11	my $self = shift;
2141	8					73	validate_pos(@_, {type => SCALAR}, {type => SCALAR});
2142
2143	8					21	my ($field, $header) = @_;
2144	8	100				20	$self->{_out_headers} = { } unless exists $self->{_out_headers};
2145
2146							$self->_print_warning("out_header: field $field already set")
2147	8	50				14	if exists $self->{_out_headers}->{$field};
2148
2149	8					15	$self->{_out_headers}->{$field} = $header;
2150
2151	8					24	return $self;
2152							}
2153
2154							# Return 0 if error, 1 if all good
2155							sub _S3_init_fields_extra {
2156	355			355		572	my $self = shift;
2157
2158	355					659	my $_debug = $self->{_debug};
2159	355					579	my $_debugh = $self->{_debugh};
2160
2161	355					547	my $verbose = $self->{verbose};
2162
2163	355					532	my $has_headers = $self->{has_headers};
2164
2165	355					520	my %named_fields = %{$self->{_regular_named_fields}};
	355					1597
2166	355					666	my @columns = @{$self->{_S2_columns}};
	355					949
2167	355	100				826	my @headers = @{$self->{_S2_headers}} if $has_headers;
	344					860
2168
2169	355					514	my @extra_fields_indexes;
2170	355	100				773	my @extra_fields_definitions_list = @{$self->{_extra_fields}} if exists $self->{_extra_fields};
	36					72
2171	355					505	my %extra_fields_definitions;
2172
2173	355	100				839	my @multiline = @{$self->{_multiline}} if defined($self->{_multiline});
	340					664
2174
2175	355					487	my @coldata;
2176	355					824	for my $i (0..$#columns) {
2177	1395					45332	my $col = $columns[$i];
2178	1395	100				2682	my $h = $headers[$i] if $has_headers;
2179	1395	100				23430	push @coldata, ColData->new(
2180							field_name => $col,
2181							header_text => $h,
2182							description => '',
2183							multiline => ($multiline[$i] ? 'm' : '1')
2184							);
2185							}
2186
2187	355					15896	for my $edef (@extra_fields_definitions_list) {
2188	92					4275	my $c = $edef->check_field_existence;
2189	92	100				668	if (defined($c)) {
2190	80	100				174	unless (exists $named_fields{$c}) {
2191	9					121	$self->_print_error("unknown field '" . $edef->check_field_existence . "'",
2192							0, ERR_UNKNOWN_FIELD, { %named_fields } );
2193	8					23	next;
2194							}
2195							}
2196
2197	83					155	my @e_eclated = $edef;
2198
2199	83	100	100			1102	if ($edef->ef_type == $EF_LINK and $edef->link_remote_read eq '*') {
2200	2					76	my @cols = $edef->link_remote_obj->get_fields_names();
2201
2202	2					3	@e_eclated = ();
2203	2					9	my %nf = %named_fields;
2204
2205	2					5	for my $c (@cols) {
2206
2207	4					130	my $ex_base = $edef->self_name . $c;
2208	4					35	my $ex_target = $ex_base;
2209	4					8	my $i = 1;
2210	4					15	while (exists $nf{$ex_target}) {
2211	1					2	$i++;
2212	1					4	$ex_target = $ex_base . '_' . $i;
2213							}
2214
2215	4					60	my $e = ExtraField->new(
2216							ef_type => $EF_LINK,
2217							self_name => $ex_target,
2218							description => $edef->description . " ($c)",
2219
2220							link_self_search => $edef->link_self_search,
2221							link_remote_obj => $edef->link_remote_obj,
2222							link_remote_search => $edef->link_remote_search,
2223							link_remote_read => $c,
2224
2225							link_vlookup_opts => $edef->link_vlookup_opts
2226							);
2227	4					361	push @e_eclated, $e;
2228	4					14	$nf{$ex_target} = undef;
2229							}
2230							}
2231
2232	83					1348	for my $e1 (@e_eclated) {
2233	85	100				1249	if (exists $named_fields{$e1->self_name}) {
2234	6					101	$self->_print_error("extra field: duplicate field name: '" . $e1->self_name . "'");
2235	6					15	next;
2236							}
2237
2238	79					599	my $index_of_new_element = $#columns + 1;
2239	79					128	push @extra_fields_indexes, $index_of_new_element;
2240	79					1017	$columns[$index_of_new_element] = $e1->self_name;
2241	79					1335	$named_fields{$e1->self_name} = $index_of_new_element;
2242	79					1398	$extra_fields_definitions{$e1->self_name} = $e1;
2243
2244	79	50				1403	push @headers, $e1->self_name if $has_headers;
2245	79					1328	push @coldata, ColData->new(
2246							field_name => $e1->self_name,
2247							header_text => $e1->self_name,
2248							description => $e1->description,
2249							multiline => '?'
2250							);
2251							}
2252
2253							}
2254	354	100				3365	$self->{_headers} = [ @headers ] if $has_headers;
2255	354					792	$self->{_extra_fields_indexes} = [ @extra_fields_indexes ];
2256	354					1079	$self->{_columns} = [ @columns ];
2257	354					933	$self->{_extra_fields_definitions} = { %extra_fields_definitions };
2258
2259	354					1631	$self->{_named_fields} = { %named_fields };
2260
2261	354					1338	$self->_detect_dates_formats();
2262
2263	351					5487	$self->{_read_update_after_ar} = [ ];
2264	351					3290	$self->{_write_update_before_ar} = [ ];
2265	351	100				918	my @dates_formats = @{$self->{_dates_formats}} if defined($self->{_dates_formats});
	60					253
2266	351					1019	for my $i (0..$#columns) {
2267	1455					2163	my $dt_format;
2268							my $dt_locale;
2269	1455	100				2735	if (defined($dates_formats[$i])) {
2270	330					663	$dt_format = $dates_formats[$i]->[0];
2271	330					654	$dt_locale = $dates_formats[$i]->[1];
2272							}
2273	1455					23224	$coldata[$i]->dt_format($dt_format);
2274	1455					26390	$coldata[$i]->dt_locale($dt_locale);
2275
2276	1455	100				9824	next unless defined($dt_format);
2277
2278	330					478	my %opts_in;
2279	330	100	66			1369	$opts_in{locale} = $dt_locale if defined($dt_locale) and $dt_locale ne '';
2280
2281	330					1369	my $obj_strptime_in = DateTime::Format::Strptime->new(pattern => $dt_format, %opts_in);
2282
2283	330					361097	my %opts_out;
2284	330	50				960	my $loc_out = (exists $self->{out_dates_locale} ? $self->{out_dates_locale} : $dt_locale);
2285	330	100	66			1625	$opts_out{locale} = $loc_out if defined($loc_out) and $loc_out ne '';
2286							my $obj_strptime_out = DateTime::Format::Strptime->new(
2287	330	50				1326	pattern => (exists $self->{out_dates_format} ? $self->{out_dates_format} :$dt_format),
2288							%opts_out
2289							);
2290
2291	330					339767	my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
2292	330					1111	my $in_file_disp = $self->get_in_file_disp();
2293
2294							$self->{_read_update_after_ar}->[$i] = sub {
2295	352	100	66	352		1818	return undef if !defined($_) or $_ eq '' or
			33
			66
2296							(defined($refsub_is_datetime_empty) and $refsub_is_datetime_empty->($_));
2297
2298	315					552	my $s = $_[0];
2299	315					660	my $field = _get_def($_[1], '<?>');
2300
2301	315					937	my $dt = $obj_strptime_in->parse_datetime($_);
2302
2303	315	0	33			175487	if ($_debug and $DEBUG_DATETIME_FORMATS and $DEBUG_DATETIME_FORMATS_EVEN_MORE) {
			33
2304	0	0				0	print($_debugh "-- Record " . $s->get_recnum() .
2305							", field '$field':\n String parsed: '$_'\n Parse format: '$dt_format'\n" .
2306							" DateTime obj: <" . (defined($dt) ? $dt . '' : 'undef') . ">\n");
2307							}
2308
2309	315	100				706	if (!defined($dt)) {
2310	2					10	my $recnum = $s->get_recnum();
2311	2	50				7	if ($verbose) {
2312	0					0	$s->_print("$PKG: " .
2313							"$in_file_disp: record $recnum: field $field: unable to parse DateTime\n");
2314	0					0	$s->_print("$PKG: field: '$_'\n");
2315	0					0	$s->_print("$PKG: format: '$dt_format'\n");
2316	0	0				0	$s->_print("$PKG: " .
2317							"locale: '" . ($dt_locale eq '' ? '<none>' : $dt_locale) . "'\n");
2318	0					0	$s->_print("$PKG: " .
2319							"Probable cause: when detecting DateTime format, $PKG will stop reading\n");
2320	0					0	$s->_print("$PKG: " .
2321							"input as soon as the format is worked out. If a value found later\n");
2322	0					0	$s->_print("$PKG: " .
2323							"turns out to use another DateTime format, it'll generate a DateTime\n");
2324	0					0	$s->_print("$PKG: parse error, as is the case now.\n");
2325	0					0	$s->_print_error("unable to parse DateTime");
2326							} else {
2327	2					20	$s->_print_error("$in_file_disp: record $recnum: field $field: " .
2328							"unable to parse DateTime '$_'");
2329							}
2330							}
2331
2332	314					631	return $dt;
2333	330					2611	};
2334							$self->{_write_update_before_ar}->[$i] = sub {
2335	96	100		96		194	return '' unless defined($_);
2336	87	100				202	return $_ if !ref($_);
2337	81	50				279	return $_ unless $_->isa('DateTime');
2338
2339	81					236	my $str = $obj_strptime_out->format_datetime($_);
2340
2341	81	50				17709	if (!defined($str)) {
2342	0					0	my $s = $_[0];
2343	0					0	my $recnum = $s->get_recnum();
2344	0					0	my $field = _get_def($_[1], '<?>');
2345	0					0	$s->_print_error("$in_file_disp: record $recnum: field $field: " .
2346							"unable to print DateTime '$_'")
2347							}
2348
2349	81					181	return $str;
2350	330					1697	};
2351							}
2352
2353	351					1366	$self->{_coldata} = [ @coldata ];
2354
2355	351					1330	my @loop = (
2356							['_read_update_after_hr', '_read_update_after_ar', 'read post'],
2357							['_write_update_before_hr', '_write_update_before_ar', 'write pre']
2358							);
2359	351					956	for my $ii (0..$#loop) {
2360	702					1076	my $l = $loop[$ii];
2361
2362	702					1263	my $ht = $self->{$l->[0]};
2363	702					933	my @subrefs = @{$self->{$l->[1]}};
	702					1435
2364	702					961	for my $field (keys %{$ht}) {
	702					1453
2365	70	50				141	unless (exists $named_fields{$field}) {
2366	0					0	$self->_print_error($l->[2] . ": unknown field '$field'",
2367							0, ERR_UNKNOWN_FIELD, { %named_fields } );
2368	0					0	next;
2369							}
2370
2371	70					123	my $i = $named_fields{$field};
2372
2373	70					87	my @allsubs;
2374	70					82	push @allsubs, @{$ht->{$field}};
	70					120
2375	70	100				151	if (defined($subrefs[$i])) {
2376	2	50				6	unshift @allsubs, $subrefs[$i] if $ii == 0;
2377	2	50				6	push @allsubs, $subrefs[$i] if $ii == 1;
2378							}
2379
2380							my $finalsub = sub {
2381	196			196		308	for my $s (@allsubs) {
2382	262					778	$_ = $s->(@_);
2383							}
2384	189					3877	return $_;
2385	70					216	};
2386	70					145	$subrefs[$i] = $finalsub;
2387
2388							}
2389	702					1915	$self->{$l->[1]} = [ @subrefs ];
2390							}
2391
2392	351					1492	my $tmp = _get_def($self->{out_fields}, $self->{write_fields});
2393	351	100				1034	my @wf = @{$tmp} if defined($tmp);
	6					13
2394	351					570	my $count_field_not_found = 0;
2395	351					683	for (@wf) {
2396	16	100	66			90	next if !defined($_) or $_ eq '' or exists $named_fields{$_};
			100
2397	3					5	$count_field_not_found++;
2398	3					18	$self->_print_error("out_fields: unknown field '$_'",
2399							1, ERR_UNKNOWN_FIELD, { %named_fields } );
2400							}
2401	351	100				756	if ($count_field_not_found) {
2402	2					5	$self->_print_error("non existent field(s) encountered");
2403	1					5	delete $self->{out_fields};
2404	1					3	delete $self->{write_fields};
2405							}
2406
2407	350	100				847	my %sh = %{$self->{_out_headers}} if defined($self->{_out_headers});
	4					13
2408	350					525	$count_field_not_found = 0;
2409	350					1157	for (keys %sh) {
2410	8	100	33			35	next if !defined($_) or $_ eq '' or exists $named_fields{$_};
			66
2411	2					3	$count_field_not_found++;
2412	2					12	$self->_print_error("out_header: unknown field '$_'",
2413							1, ERR_UNKNOWN_FIELD, { %named_fields } );
2414							}
2415	350	100				697	$self->_print_error("non existent field(s) encountered") if $count_field_not_found;
2416
2417	349					2306	return 1;
2418							}
2419
2420							#
2421							# Return 0 if there's no more records (error or eof reached), 1 if a record got read
2422							# successfully.
2423							#
2424							# If return value is 1:
2425							# $$ref_ar and $$ref_hr are set to array ref and hash ref of the record, respectively
2426							#
2427							# If return value is 0:
2428							# $$ref_ar and $$ref_hr are set to undef if an error occured
2429							# $$ref_ar and $$ref_hr are set to a scalar if eof reached
2430							#
2431							sub _read_one_record_from_input {
2432	2341			2341		4158	my ($self, $ref_ar, $ref_row_hr) = @_;
2433
2434	2341					3236	my $_debug = $self->{_debug};
2435	2341					2856	my $_debug_extra_fields = $self->{_debug_extra_fields};
2436	2341					3166	my $_debugh = $self->{_debugh};
2437
2438	2341					4125	my $in_file_disp = $self->get_in_file_disp();
2439
2440	2341					3603	my $incsv = $self->{_in_csvobj};
2441	2341					2762	my $ar;
2442
2443							print($_debugh "$PKG: '$in_file_disp': will read line #" . ($self->{_row_read} + 1) . "\n")
2444	2341	50				4000	if $self->{_debug_read};
2445
2446	2341	100				4187	unless ($ar = _mygetline($incsv, $self->{_inh})) {
2447	286	50				10010	if (!$incsv->eof()) {
2448	0					0	my ($code, $str, $pos) = $incsv->error_diag();
2449	0					0	$self->_print_error("$code: $str, record " . $incsv->record_number . ", position $pos");
2450	0					0	$$ref_ar = undef;
2451	0					0	$$ref_row_hr = undef;
2452							} else {
2453	286					1780	$$ref_ar = 1;
2454	286					709	$$ref_row_hr = 1;
2455							}
2456
2457	286					859	$self->_close_inh();
2458
2459	286					944	return 0;
2460							}
2461
2462	2055					59052	$self->{_row_read}++;
2463
2464	2055					2767	my %named_fields = %{$self->{_named_fields}};
	2055					8734
2465
2466	2055	50				4666	if ($self->{_debug_read}) {
2467	0					0	print($_debugh "Line " . $self->{_row_read} . ":\n--\n");
2468	0					0	for (sort keys %named_fields) {
2469	0					0	my $c = _get_def($ar->[$named_fields{$_}], '<undef>');
2470	0					0	print($_debugh " $_ => '" . $c . "'\n");
2471							}
2472							}
2473
2474	2055					2927	my $columns_ar = $self->{_columns};
2475
2476	2055					2633	my $no_undef = $self->{no_undef};
2477	2055	100				3244	if ($no_undef) {
2478	30					37	for (0..$#{$columns_ar}) {
	30					71
2479	324	100				607	$ar->[$_] = '' unless defined($ar->[$_]);
2480							}
2481							}
2482
2483	2055					2857	my $row_hr = { };
2484							$row_hr->{$_} = $ar->[$self->{_regular_named_fields}->{$_}]
2485	2055					2483	foreach keys %{$self->{_regular_named_fields}};
	2055					13216
2486
2487	2055					3891	my $rpost = $self->{_read_update_after_ar};
2488	2055					2541	for my $i (0..$#{$columns_ar}) {
	2055					4135
2489	10586					12777	my $subref = $rpost->[$i];
2490	10586	100				17217	next unless defined($subref);
2491
2492	479					594	do {
2493	479					712	my $field = $columns_ar->[$i];
2494	479					809	local $_ = $ar->[$i];
2495	479					930	my $new_val = $subref->($self, $field);
2496	474					878	$ar->[$i] = $new_val;
2497	474	50				1684	$row_hr->{$field} = $new_val if defined($field);
2498							}
2499
2500							}
2501
2502	2050					2627	for my $i (@{$self->{_extra_fields_indexes}}) {
	2050					3584
2503	442					634	my $name = $columns_ar->[$i];
2504	442					692	my $e = $self->{_extra_fields_definitions}->{$name};
2505
2506	442	50				723	print($_debugh "Extra field: #$i: $name\n") if $_debug_extra_fields;
2507
2508	442					534	my $value;
2509	442	100				7218	if ($e->ef_type == $EF_LINK) {
		100
		50
2510
2511	238	50				1804	print($_debugh " linked field\n") if $_debug_extra_fields;
2512
2513	238					3293	my $remobj = $e->link_remote_obj;
2514							$value = $remobj->vlookup(
2515							$e->link_remote_search,
2516	238					4381	$ar->[$named_fields{$e->link_self_search}],
2517							$e->link_remote_read,
2518							$e->link_vlookup_opts
2519							);
2520
2521							} elsif ($e->ef_type == $EF_FUNC) {
2522
2523	51	50				1176	print($_debugh " computed field\n") if $_debug_extra_fields;
2524
2525	51					666	$value = $e->func_sub->($name, $row_hr, $self->{_stats});
2526
2527							} elsif ($e->ef_type == $EF_COPY) {
2528
2529	153	50				5733	print($_debugh " copy field\n") if $_debug_extra_fields;
2530
2531	153					1898	my $input = $row_hr->{$e->copy_source};
2532	153	50	33			1072	$input = '' if !defined($input) and $no_undef;
2533	153	100				1907	if (defined($e->copy_sub)) {
2534	57					395	local $_ = $input;
2535	57					742	$value = $e->copy_sub->();
2536							} else {
2537	96					603	$value = $input;
2538							}
2539
2540	153	50				970	print($_debugh " in: '$input', out: '$value'\n") if $_debug_extra_fields;
2541
2542							} else {
2543	0					0	confess "Unknown ef_type '" . $e->ef_type . "', check this module' code urgently!";
2544							}
2545
2546	440	100	100			1688	$value = '' if !defined($value) and $no_undef;
2547	440					731	$ar->[$i] = $value;
2548	440					780	$row_hr->{$name} = $value;
2549
2550	440	50				913	print($_debugh " $name => '$value'\n") if $_debug_extra_fields;
2551
2552							}
2553
2554	2048	100				3785	if (defined($self->{read_post_update_hr})) {
2555	33					79	$self->{read_post_update_hr}->($row_hr, $self->{_stats}, $self->get_recnum());
2556	33					288	$ar->[$named_fields{$_}] = $row_hr->{$_} foreach keys %named_fields;
2557							}
2558
2559	2048	100				5593	lock_keys(%$row_hr) if $self->{croak_if_error};
2560
2561							$self->{walker_ar}->($ar, $self->{_stats}, $self->get_recnum())
2562	2048	100				12845	if defined($self->{walker_ar});
2563							$self->{walker_hr}->($row_hr, $self->{_stats}, $self->get_recnum())
2564	2048	100				4067	if defined($self->{walker_hr});
2565
2566	2047					3211	$$ref_ar = $ar;
2567	2047					4113	$$ref_row_hr = $row_hr;
2568
2569	2047					5682	return 1;
2570							}
2571
2572							sub _open_read {
2573	297			297		468	my $self = shift;
2574
2575	297					597	my $verbose = $self->{verbose};
2576	297					696	my $in_file_disp = $self->get_in_file_disp();
2577
2578	297					642	$self->{_stats} = { };
2579	297					574	$self->{_read_in_progress} = 1;
2580
2581	297	50				692	$self->_print("-- $in_file_disp reading start\n") if $verbose;
2582							}
2583
2584							sub _close_read {
2585	316			316		483	my $self = shift;
2586	316					491	my $keep_quiet = shift;
2587
2588	316					567	my $verbose = $self->{verbose};
2589	316					642	my $in_file_disp = $self->get_in_file_disp();
2590
2591	316					570	$self->{_read_in_progress} = 0;
2592
2593	316	50	33			778	if ($verbose and !$keep_quiet) {
2594	0					0	$self->_print("-- $in_file_disp reading end: " . $self->{_row_read} . " row(s) read\n");
2595	0					0	for my $k (sort keys %{$self->{_stats}}) {
	0					0
2596	0					0	$self->_printf(" %7d %s\n", $self->{_stats}->{$k}, $k);
2597							}
2598							}
2599
2600	316					609	$self->{_nb_rows} = $self->{_row_read};
2601							}
2602
2603							# Return 0 if error, 1 if all good
2604							sub _S4_read_all_in_mem {
2605	167			167		320	my $self = shift;
2606
2607	167					495	$self->_register_pass("_S4_read_all_in_mem()");
2608
2609	167					524	$self->_open_read();
2610
2611	167					311	my $ar;
2612							my $row_hr;
2613	167					547	while ($self->_read_one_record_from_input(\$ar, \$row_hr)) {
2614
2615	1553					2045	push @{$self->{_flat}}, $ar;
	1553					4063
2616
2617							}
2618
2619	165	50				473	my $retcode = (defined($ar) ? 1 : 0);
2620	165					515	$self->_update_in_mem_record_count();
2621
2622	165					557	$self->_close_read();
2623
2624	165					415	return $retcode;
2625							}
2626
2627							sub _chain_array {
2628	24			24		146	return split(/\s->\s/, $_[0]);
2629							}
2630
2631							sub _chain_str {
2632	2			2		7	return join('->', @_);
2633							}
2634
2635							sub field_add_link {
2636	25			25	1	4309	my $self = shift;
2637
2638	25					852	validate_pos(@_, {type => UNDEF \| SCALAR}, {type => SCALAR}, {type => SCALAR \| OBJECT},
2639							{type => HASHREF, optional => 1});
2640
2641	22					92	my ($new_field, $chain, $obj, $param_opts) = @_;
2642
2643	22					44	my $croak_if_error = $self->{croak_if_error};
2644	22					39	my $_debug = $self->{_debug};
2645	22					35	my $_debugh = $self->{_debugh};
2646
2647	22					57	my @c = _chain_array($chain);
2648	22	100				60	$new_field = $c[2] unless defined($new_field);
2649
2650	22	50				51	print($_debugh "Registering new linked field, new_field = '$new_field', chain = '$chain'\n")
2651							if $_debug;
2652
2653	22	100	66			104	unless (@c == 3 and $c[2] ne '') {
2654	1					8	$self->_print_error("wrong links chain parameter: '$chain', " .
2655							"look for CHAIN in Text::AutoCSV manual for help");
2656	1					12	return undef;
2657							}
2658
2659	21	50				59	return undef unless $self->_status_forward('S2');
2660	21	50				54	return undef unless $self->_status_backward('S2');
2661
2662	21	100				52	my @tmp = %{$param_opts} if $param_opts;
	13					37
2663	21					513	my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS);
2664
2665	20					75	my $target_name = '';
2666	20	100				55	if (ref $obj eq '') {
2667	19					30	my $in_file = $obj;
2668	19					31	$target_name = $in_file;
2669
2670							#
2671							# TODO (?)
2672							#
2673							# Take into account the fact that the OS' file system is case insensitive. At the
2674							# moment, two different strings (even if identical in a case insensitive comparison)
2675							# will be managed as being distinct.
2676							# I put a question mark in this TO DO - after all, the user of this module had better
2677							# use same case when dealing with multiple links of the same file.
2678							#
2679							# Also, tuning this module' behavior depending on the OS' characteristics would be not
2680							# ideal, it'd add a level of complexity to understand how it works and what to expect.
2681							#
2682	19	100	100			66	if (exists $self->{_obj} and exists $self->{_obj}->{$in_file}) {
2683
2684	4	50				110	print(
2685							$_debugh
2686							"field_add_link: file '$in_file': re-using existing Text::AutoCSV object\n"
2687							) if $_debug;
2688
2689	4					15	$obj = $self->{_obj}->{$in_file};
2690							} else {
2691
2692	15	50				36	print($_debugh "field_add_link: file '$in_file': creating new Text::AutoCSV object\n")
2693							if $_debug;
2694
2695	15	100				58	$self->{_obj} = { } unless exists $self->{_obj};
2696
2697							#
2698							# The created Text::AutoCSV must be created with the same search options as what is
2699							# currently found in $self.
2700							#
2701							# Why?
2702							# Because the link is populated doing a vlookup on the remote object ($obj below),
2703							# not on $self. Therefore, if we don't "propagate" search options from $self to
2704							# $obj, search tunnings won't work as user would expect.
2705							#
2706	15					24	my %search_opts;
2707	15					40	for (qw(search_case search_trim search_ignore_empty search_ignore_accents
2708							search_value_if_not_found search_value_if_ambiguous search_ignore_ambiguous)) {
2709							# We assign depending on whether or not the attribute EXISTS - the definedness
2710							# is not appropriate, in case an attribute would have been assigned to undef.
2711	105	100				189	$search_opts{$_} = $self->{$_} if exists $self->{$_};
2712							}
2713
2714							$obj = Text::AutoCSV->new(
2715							in_file => $in_file,
2716							verbose => $self->{verbose},
2717							infoh => $self->{infoh},
2718							_debug => $self->{debug},
2719							_debugh => $self->{debugh},
2720	15					103	%search_opts
2721							);
2722	15					61	$self->{_obj}->{$in_file} = $obj;
2723							}
2724							} else {
2725	1					2	$target_name = '(object)';
2726	1	50				4	print($_debugh "field_add_link: Text::AutoCSV object provided\n") if $_debug;
2727							}
2728
2729	20	100				64	$self->{_extra_fields} = [ ] unless exists $self->{_extra_fields};
2730
2731	20					35	push @{$self->{_extra_fields}}, ExtraField->new(
	20					516
2732							ef_type => $EF_LINK,
2733							self_name => $new_field,
2734							description => "link: $target_name, chain: $chain",
2735							check_field_existence => $c[0],
2736
2737							link_self_search => $c[0],
2738							link_remote_obj => $obj,
2739							link_remote_search => $c[1],
2740							link_remote_read => $c[2],
2741
2742							link_vlookup_opts => \%opts
2743							);
2744
2745	20					2417	return $self;
2746							}
2747
2748							sub links {
2749	2			2	1	5	my $self = shift;
2750
2751	2					31	validate_pos(@_, {type => UNDEF \| SCALAR}, {type => SCALAR}, {type => SCALAR \| OBJECT},
2752							{type => HASHREF, optional => 1});
2753
2754	2					7	my $prefix_field = shift;
2755	2					4	my $chain = shift;
2756	2					5	my ($obj, $param_opts) = @_;
2757
2758	2					5	my @c = _chain_array($chain);
2759
2760	2	50	33			17	if (@c != 2 or $c[0] eq '' or $c[1] eq '') {
			33
2761	0					0	$self->_print_error("wrong links chain parameter: '$chain', " .
2762							"look for JOINCHAIN in Text::AutoCSV manual for help");
2763	0					0	return undef;
2764							}
2765
2766	2	100				6	$prefix_field = '' unless defined($prefix_field);
2767	2					6	my $chain2 = _chain_str(@c, '*');
2768
2769	2					8	return $self->field_add_link($prefix_field, $chain2, @_);
2770							}
2771
2772							sub field_add_computed {
2773	7			7	1	2550	my $self = shift;
2774
2775	7					217	validate_pos(@_, {type => SCALAR}, {type => CODEREF});
2776	6					26	my ($new_field, $func) = @_;
2777
2778	6					15	my $croak_if_error = $self->{croak_if_error};
2779
2780	6					12	my $_debug = $self->{_debug};
2781	6					12	my $_debugh = $self->{_debugh};
2782
2783	6	50				20	print($_debugh "Registering new computed field, new_field = '$new_field'\n") if $_debug;
2784
2785	6	100				19	return undef unless $self->_status_forward('S2');
2786	5	50				13	return undef unless $self->_status_backward('S2');
2787
2788	5					10	push @{$self->{_extra_fields}}, ExtraField->new(
	5					108
2789							ef_type => $EF_FUNC,
2790							self_name => $new_field,
2791							description => "computed",
2792
2793							func_sub => $func
2794							);
2795
2796	5					464	return $self;
2797							}
2798
2799							sub field_add_copy {
2800	14			14	1	8764	my $self = shift;
2801
2802	14					213	validate_pos(@_, {type => SCALAR}, {type => SCALAR}, {type => CODEREF, optional => 1});
2803	14					62	my ($new_field, $copy_source, $func) = @_;
2804
2805	14					32	my $croak_if_error = $self->{croak_if_error};
2806
2807	14					26	my $_debug = $self->{_debug};
2808	14					25	my $_debugh = $self->{_debugh};
2809
2810	14	50				36	print($_debugh "Registering field copy, new_field = '$new_field' copied from '$copy_source'\n")
2811							if $_debug;
2812
2813	14	100				34	return undef unless $self->_status_forward('S2');
2814	12	50				35	return undef unless $self->_status_backward('S2');
2815
2816	12	100				22	push @{$self->{_extra_fields}}, ExtraField->new(
	12					305
2817							ef_type => $EF_COPY,
2818							self_name => $new_field,
2819							description => "copy of $copy_source " . (defined($func) ? '(with sub)' : '(no sub)'),
2820							check_field_existence => $copy_source,
2821
2822							copy_source => $copy_source,
2823							copy_sub => $func
2824							);
2825
2826	12					1147	return $self;
2827							}
2828
2829							sub in_map {
2830	15			15	1	1062	my $self = shift;
2831
2832	15					39	return $self->read_update_after(@_);
2833							}
2834
2835							sub read_update_after {
2836	16			16	1	46	my $self = shift;
2837	16					179	validate_pos(@_, {type => SCALAR}, {type => CODEREF});
2838
2839	16					55	my ($field, $subref) = @_;
2840
2841	16					32	my $_debug = $self->{_debug};
2842	16					24	my $_debugh = $self->{_debugh};
2843
2844	16	50				39	return undef unless $self->_status_forward('S2');
2845	16	50				40	return undef unless $self->_status_backward('S2');
2846
2847	16	50				37	print($_debugh "Registering read_post_update subref for field '$field'\n") if $_debug;
2848
2849							$self->{_read_update_after_hr}->{$field} = [ ]
2850	16	100				53	unless defined($self->{_read_update_after_hr}->{$field});
2851
2852	16					21	push @{$self->{_read_update_after_hr}->{$field}}, $subref;
	16					44
2853
2854	16					188	return $self;
2855							}
2856
2857							sub out_map {
2858	11			11	1	24	my $self = shift;
2859
2860	11					31	return $self->write_update_before(@_);
2861							}
2862
2863							sub write_update_before {
2864	12			12	1	21	my $self = shift;
2865	12					147	validate_pos(@_, {type => SCALAR}, {type => CODEREF});
2866
2867	12					68	my ($field, $subref) = @_;
2868
2869	12					26	my $_debug = $self->{_debug};
2870	12					23	my $_debugh = $self->{_debugh};
2871
2872	12	50				35	return undef unless $self->_status_forward('S2');
2873	12	50				34	return undef unless $self->_status_backward('S2');
2874
2875	12	50				36	print($_debugh "Registering write_pre_update subref for field '$field'\n") if $_debug;
2876
2877							$self->{_write_update_before_hr}->{$field} = [ ]
2878	12	100				42	unless defined($self->{_write_update_before_hr}->{$field});
2879
2880	12					24	push @{$self->{_write_update_before_hr}->{$field}}, $subref;
	12					31
2881
2882	12					98	return $self;
2883							}
2884
2885							sub reset_next_record_hr {
2886	189			189	1	317	my $self = shift;
2887
2888	189					869	validate_pos(@_);
2889
2890	189					507	$self->{_current_record} = undef;
2891
2892	189					313	return $self;
2893							}
2894
2895							sub _create_internal_column_name_from_its_number {
2896	1120			1120		2658	return sprintf("__%04i__", $_[0]);
2897							}
2898
2899							sub _ar_to_hr {
2900	2079			2079		2565	my $self = shift;
2901
2902	2079					14622	validate_pos(@_, {type => ARRAYREF});
2903
2904	2079					5186	my ($ar) = @_;
2905	2079					2603	my $last_elem_index = scalar(@{$ar}) - 1;
	2079					3428
2906
2907	2079					2998	my $nr = $self->{_named_fields};
2908	2079					2499	my %h;
2909							my %n_seen;
2910	2079					2286	for (keys %{$nr}) {
	2079					5571
2911	9934					18379	$h{$_} = $ar->[$nr->{$_}];
2912	9934					16214	undef $n_seen{$nr->{$_}};
2913							}
2914	2079					4437	for my $i (0..$last_elem_index) {
2915	11001	100				18595	if (!exists($n_seen{$i})) {
2916	1120					1509	my $k = _create_internal_column_name_from_its_number($i);
2917	1120	50				3161	$h{$k} = $ar->[$i] if !exists $h{$k};
2918							}
2919							}
2920
2921	2079	100				5685	lock_keys(%h) if $self->{croak_if_error};
2922
2923	2079					16022	return \%h;
2924							}
2925
2926							sub get_next_record_hr {
2927	2001			2001	1	2852	my $self = shift;
2928
2929	2001					13072	validate_pos(@_, {type => SCALARREF, optional => 1});
2930
2931	2001					4485	my $refkey = $_[0];
2932
2933	2001	50				3628	return undef unless $self->_status_forward('S4');
2934
2935	1998	100				3831	if (!defined($self->{_current_record})) {
2936	186					341	$self->{_current_record} = 0;
2937							} else {
2938	1812					2408	$self->{_current_record}++;
2939							}
2940
2941	1998					3250	my $ar = $self->{_flat}->[$self->{_current_record}];
2942	1998	100				3183	if (!defined($ar)) {
2943	186					310	$self->{_current_record} = undef;
2944	186					285	$$refkey = undef;
2945	186					495	return undef;
2946							}
2947
2948	1812					2623	$$refkey = $self->{_current_record};
2949
2950	1812					3202	return $self->_ar_to_hr($ar);
2951							}
2952
2953							sub read {
2954	60			60	1	19497	my $self = shift;
2955
2956	60					417	validate_pos(@_);
2957
2958	60	50				185	return undef unless $self->_status_backward('S3');
2959	60	100				132	return undef unless $self->_status_forward('S3');
2960
2961	51					175	$self->_register_pass("read()");
2962
2963	51					172	$self->_open_read();
2964
2965	51					84	my $ar;
2966							my $row_hr;
2967	51					142	while ($self->_read_one_record_from_input(\$ar, \$row_hr)) {
2968							# Ben oui quoi... qu'est-ce que l'on peut bien faire d'autre ?
2969							}
2970
2971	49					151	$self->_close_read();
2972	49	50				109	return undef unless defined($ar);
2973
2974	49	50				127	return undef unless $self->_status_reset();
2975
2976	49					244	return $self;
2977							}
2978
2979							#
2980							# Initially, _read_all_in_mem was intended for the test plan.
2981							#
2982							# Turned out to be sometimes useful for user, thus, is no longer private since 1.1.5.
2983							# Private version below is kept for compatibility.
2984							#
2985							sub read_all_in_mem {
2986	1			1	1	4	my $self = shift;
2987
2988	1					3	return $self->_read_all_in_mem();
2989							}
2990
2991							sub _read_all_in_mem {
2992	9			9		1002	my $self = shift;
2993
2994	9	50				17	return 0 unless $self->_status_backward('S3');
2995	9	50				21	return 0 unless $self->_status_forward('S4');
2996
2997	9					63	return $self;
2998							}
2999
3000							sub _render {
3001	0			0		0	my $v = $_[0];
3002
3003	0	0	0			0	if (length($v) == 1 and ord($v) < 32) {
3004	0					0	my $n = ord($v);
3005	0	0				0	return '\n' if $n == 10;
3006	0	0				0	return '\r' if $n == 13;
3007	0	0				0	return '\t' if $n == 9;
3008	0	0				0	return '\f' if $n == 12;
3009	0	0				0	return '\b' if $n == 8;
3010	0	0				0	return '\a' if $n == 7;
3011	0	0				0	return '\e' if $n == 27;
3012	0					0	return '\0' . oct($n);
3013							}
3014	0					0	return $v;
3015							}
3016
3017							sub print_id {
3018	0			0	1	0	my $self = shift;
3019
3020	0					0	$self->_printf("-- " . $self->get_in_file_disp() . ":\n");
3021	0					0	$self->_printf("sep_char: " . _render($self->get_sep_char()) . "\n");
3022	0					0	$self->_printf("escape_char: " . _render($self->get_escape_char()) . "\n");
3023	0					0	$self->_printf("in_encoding: " . _render($self->get_in_encoding()) . "\n");
3024	0	0				0	$self->_printf("is_always_quoted: " . ($self->get_is_always_quoted() ? 'yes' : 'no') . "\n");
3025
3026	0					0	my @coldata = $self->get_coldata();
3027	0					0	my @disp;
3028	0					0	push @disp, [ '#', 'FIELD', 'HEADER', 'EXT DATA', 'DATETIME FORMAT', 'DATETIME LOCALE' ];
3029	0					0	push @disp, [ map { my $s = $_; $s =~ s/./-/g; $s } @{$disp[0]} ];
	0					0
	0					0
	0					0
	0					0
3030	0					0	for my $i (0..$#coldata) {
3031	0					0	my $col = $coldata[$i];
3032
3033	0					0	my @row;
3034	0					0	push @row, "$i";
3035	0	0				0	push @row, (defined($col->[$_]) ? ($col->[$_] . '') : '') for (0..4);
3036	0					0	map { s/\n/\\n/g; s/\r/\\r/g; s/\t/\\t/g } @row;
	0					0
	0					0
	0					0
3037	0					0	push @disp, [ @row ];
3038							}
3039	0					0	my $n = @{$disp[-1]};
	0					0
3040	0					0	my @max = (-1) x $n;
3041	0					0	for my $l (@disp) {
3042	0	0				0	do { $max[$_] = length($l->[$_]) if $max[$_] < length($l->[$_]) } for (0 .. $n - 1);
	0					0
3043							}
3044	0					0	my $s = join(' ', map { "%-${_}s" } @max);
	0					0
3045	0					0	$self->_print("\n");
3046	0					0	$self->_printf("$s\n", @{$_}) for (@disp);
	0					0
3047							}
3048
3049							sub set_out_file {
3050	2			2	1	5260	my $self = shift;
3051	2					31	validate_pos(@_, {type => SCALAR});
3052
3053	2					10	my ($out_file) = @_;
3054	2					8	$self->{out_file} = $out_file;
3055
3056	2					11	return $self;
3057							}
3058
3059							# Subrefs set with out_map
3060							sub _execute_write_update_before {
3061	367			367		614	my ($self, $ar) = @_;
3062
3063	367					529	my $columns_ar = $self->{_columns};
3064
3065	367					511	my $wpre = $self->{_write_update_before_ar};
3066	367					487	for my $i (0..$#{$columns_ar}) {
	367					753
3067	1177					1495	my $subref = $wpre->[$i];
3068	1177	100				2077	next unless defined($subref);
3069
3070	159					181	do {
3071	159					257	local $_ = $ar->[$i];
3072	159					212	my $field = $columns_ar->[$i];
3073	159					301	my $new_val = $subref->($self, $field);
3074	156					367	$ar->[$i] = $new_val;
3075							}
3076
3077							}
3078							}
3079
3080							# Take into account write_fields if it got set
3081							sub _apply_write_fields {
3082	466			466		797	my ($self, $ar) = @_;
3083
3084	466					564	my @final;
3085
3086	466					1308	my $tmp = _get_def($self->{out_fields}, $self->{write_fields});
3087	466	100				1066	my @wf = @{$tmp} if defined($tmp);
	16					26
3088
3089	466	100				1001	return unless @wf;
3090
3091	16					18	my %named_fields = %{$self->{_named_fields}};
	16					63
3092	16					37	for my $i (0..$#wf) {
3093	40					47	my $field = $wf[$i];
3094	40	100	66			108	my $tmp = $ar->[$named_fields{$field}] if defined($field) and $field ne '';
3095
3096							# Put here any post-processing of value
3097							# WARNING
3098							# $tmp can be undef
3099							# ...
3100
3101	40					71	$final[$i] = $tmp;
3102							}
3103	16					50	$_[1] = [ @final ];
3104							}
3105
3106							sub write {
3107	109			109	1	23249	my $self = shift;
3108
3109	109					728	validate_pos(@_);
3110
3111	109	50				390	return undef unless $self->_status_forward('S3');
3112
3113	104					211	my $verbose = $self->{verbose};
3114	104					170	my $_debug = $self->{_debug};
3115	104					175	my $_debugh = $self->{_debugh};
3116
3117	104					195	my $out_file = $self->{out_file};
3118
3119	104					176	my %stats;
3120
3121	104	50				213	$self->_print("-- $out_file writing start\n") if $verbose;
3122	104					157	my $rows_written = 0;
3123
3124	104					164	my $outh = $self->{outh};
3125
3126	104					201	$self->{_close_outh_when_finished} = 0;
3127	104	50				245	unless (defined($outh)) {
3128	104	50				286	if ($out_file eq '') {
3129	0					0	$outh = \*STDOUT;
3130							} else {
3131	104	50				7450	unless (open($outh, '>', $out_file)) {
3132	0					0	$self->_print_error("unable to open file '$out_file': $!");
3133	0					0	return undef;
3134							}
3135	104					449	$self->{_close_outh_when_finished} = 1;
3136							}
3137	104					226	$self->{outh} = $outh;
3138							}
3139
3140	104	50				264	unless ($self->{_leave_encoding_alone}) {
3141							my $enc = (defined($self->{_inh_encoding}) ?
3142							$self->{_inh_encoding} :
3143	104	50				341	$DEFAULT_OUT_ENCODING);
3144
3145							# out_encoding option takes precedence
3146	104	100				258	$enc = $self->{out_encoding} if defined($self->{out_encoding});
3147	104					289	my $m = ":encoding($enc)";
3148	104	50	66			264	if (_is_utf8($enc) and $self->{out_utf8_bom}) {
3149	0					0	$m .= ':via(File::BOM)';
3150							}
3151
3152	104	50	33			423	if ($OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214) {
3153							# Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story)
3154	0	0				0	$m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i;
3155							}
3156
3157	104					982	binmode $outh, $m;
3158	104	50				10140	print($_debugh "Encoding string used for output: $m\n") if $_debug;
3159							}
3160
3161	104					224	my $escape_char = $self->{escape_char};
3162	104					195	my $quote_char = $self->{quote_char};
3163
3164	104					160	my %opts;
3165	104					202	$opts{binary} = 1;
3166	104					199	$opts{eol} = "\n";
3167
3168	104	50				350	$opts{sep_char} = $self->{sep_char} if defined($self->{sep_char});
3169	104	100				244	$opts{sep_char} = $self->{out_sep_char} if defined($self->{out_sep_char});
3170
3171	104	50				316	$opts{quote_char} = $self->{quote_char} if defined($self->{quote_char});
3172	104	50				240	$opts{quote_char} = $self->{out_quote_char} if defined($self->{out_quote_char});
3173
3174	104	100				286	$opts{escape_char} = $self->{escape_char} if defined($self->{escape_char});
3175	104	100				219	$opts{escape_char} = $self->{out_escape_char} if defined($self->{out_escape_char});
3176
3177	104					211	$opts{always_quote} = $self->{_is_always_quoted};
3178	104	100				215	$opts{always_quote} = $self->{out_always_quote} if defined($self->{out_always_quote});
3179
3180	104					774	my $csvout = Text::CSV->new({ %opts });
3181	104	50				16000	if (!defined($csvout)) {
3182	0					0	$self->_print_error("error creating output Text::CSV object");
3183	0					0	return undef;
3184							}
3185
3186	104					440	my $write_filter_hr = _get_def($self->{out_filter}, $self->{write_filter_hr});
3187
3188	104	100	66			673	if (($self->{has_headers} and
			100
			100
3189							!(defined($self->{out_has_headers}) and !$self->{out_has_headers}))
3190							or $self->{out_has_headers}) {
3191	102					215	my $ar = [ ];
3192	102	100				245	if ($self->{has_headers}) {
3193	98					192	$ar = $self->{_headers};
3194							} else {
3195	4					7	my $nf = $self->{_named_fields};
3196	4					6	$ar->[$nf->{$_}] = $_ for (keys %{$nf});
	4					20
3197							}
3198
3199	102	100				271	if (exists $self->{_out_headers}) {
3200	3					4	my $h = $self->{_out_headers};
3201	3					5	for (keys %{$self->{_named_fields}}) {
	3					10
3202	12	100				22	if (exists $h->{$_}) {
3203	5					13	$ar->[$self->{_named_fields}->{$_}] = $h->{$_};
3204							}
3205							}
3206							}
3207
3208	102					347	$self->_apply_write_fields($ar);
3209
3210	102					1568	$csvout->print($outh, $ar);
3211	102					1115	$rows_written++;
3212							}
3213
3214	104					182	my $do_status_reset = 0;
3215
3216
3217							#
3218							# FIXME!!!
3219							#
3220							# Instead of this duplication of code, provide AutoCSV with a "create iterator sub" feature to
3221							# iterate over all records, whatever is going on behind the scene (in-memory or read input).
3222							#
3223							# Such an iterator would also benefit to module users.
3224							#
3225
3226
3227	104	100				280	if ($self->{_status} == 4) {
3228
3229							#
3230							# The content is available in-memory: we write from what we have in-memory then...
3231							#
3232
3233	25					80	my @keys = $self->get_keys();
3234	25					48	my @ordered_keys = @keys;
3235	25	100				65	if (exists $self->{'out_orderby'}) {
3236	1					2	my @orderby = @{$self->{'out_orderby'}};
	1					2
3237							@ordered_keys = sort {
3238	1					4	for my $f (@orderby) {
	7					12
3239	8					12	my $cmp = $self->get_cell($a, $f) cmp $self->get_cell($b, $f);
3240	8	100				20	return $cmp if $cmp;
3241							}
3242	0					0	return 0;
3243							} @keys;
3244							}
3245
3246							# for my $k ($self->get_keys()) {
3247	25					54	for my $k (@ordered_keys) {
3248	92					179	my $hr = $self->get_row_hr($k);
3249	92	50				173	if (defined($write_filter_hr)) {
3250	0	0				0	next unless $write_filter_hr->($hr);
3251							}
3252	92					117	my $ar = [ @{$self->get_row_ar($k)} ];
	92					158
3253
3254	92					229	$self->_execute_write_update_before($ar);
3255	92					182	$self->_apply_write_fields($ar);
3256
3257	92					483	$csvout->print($outh, $ar);
3258	92					747	$rows_written++;
3259							}
3260
3261							} else {
3262
3263							#
3264							# No in-memory content available: we read and write in parallel.
3265							#
3266
3267	79					275	$self->_register_pass("write()");
3268
3269	79					257	$self->_open_read();
3270	79					143	my $ar;
3271							my $row_hr;
3272	79					255	while ($self->_read_one_record_from_input(\$ar, \$row_hr)) {
3273	301	100				557	if (defined($write_filter_hr)) {
3274	46	100				103	next unless $write_filter_hr->($row_hr, \%stats, $self->get_recnum());
3275							}
3276	275					504	$ar = [ @{$ar} ];
	275					655
3277
3278	275					831	$self->_execute_write_update_before($ar);
3279	272					617	$self->_apply_write_fields($ar);
3280
3281	272					1571	$csvout->print($outh, $ar);
3282	272					2270	$rows_written++;
3283							}
3284	72					254	$self->_close_read();
3285
3286	72					127	$do_status_reset = 1
3287							}
3288
3289	97					267	$self->_close_outh();
3290
3291	97	50				286	if ($verbose) {
3292	0					0	$self->_print("-- $out_file writing end: $rows_written row(s) written\n");
3293	0					0	for my $k (sort keys %stats) {
3294	0					0	$self->_printf(" %7d %s\n", $stats{$k}, $k);
3295							}
3296							}
3297
3298	97	100				229	if ($do_status_reset) {
3299	72	50				231	return undef unless $self->_status_reset();
3300							}
3301	97					1192	return $self;
3302							}
3303
3304
3305
3306							#
3307							# * * *************************************************************************
3308							# * * *************************************************************************
3309							# * OBJ ***************************************************************************
3310							# * * *************************************************************************
3311							# * * *************************************************************************
3312							#
3313
3314							#
3315							# The subs below assume Text::AutoCSV can be in status S4 = all in memory.
3316							#
3317
3318
3319							sub get_keys {
3320	32			32	1	2003	my $self = shift;
3321	32					216	validate_pos(@_);
3322
3323	32	50				103	return undef unless $self->_status_forward('S4');
3324
3325	32					52	my $last_key = @{$self->{_flat}} - 1;
	32					82
3326	32					85	my @r = (0..$last_key);
3327
3328	32					81	return @r;
3329							}
3330
3331							sub get_row_ar {
3332	364			364	1	1051	my $self = shift;
3333	364					2436	validate_pos(@_, {type => SCALAR});
3334	364					860	my ($key) = @_;
3335
3336	364	50				703	return undef unless $self->_status_forward('S4');
3337
3338	364	50				669	unless (defined($key)) {
3339	0					0	$self->_print_error("get_row_ar(): \$key is not defined!");
3340	0					0	return undef;
3341							}
3342
3343	364	100				758	$self->_print_error("unknown row '$key'") unless defined($self->{_flat}->[$key]);
3344	364					721	return $self->{_flat}->[$key];
3345							}
3346
3347							sub get_row_hr {
3348	269			269	1	403	my $self = shift;
3349	269					2074	validate_pos(@_, {type => SCALAR});
3350	269					717	my ($key) = @_;
3351
3352	269					553	my $ar = $self->get_row_ar($key);
3353	269	100				498	return undef unless defined($ar);
3354
3355	267					533	return $self->_ar_to_hr($ar);
3356							}
3357
3358							#
3359							# Could be made much more efficient (directly read $self->{_flat} instead of calling get_row_hr
3360							# that itself calls get_row_ar).
3361							# I leave it as is because get_hr_all is not good practice (it is not scalable), it was
3362							# primarily done to ease test plan.
3363							#
3364							# By the way I may make it one day not available by default, requesting caller to tune some
3365							# variable (like { $Text::AutoCSV::i_am_the_test_plan = 1 }) to expose it.
3366							#
3367							sub get_hr_all {
3368	108			108	1	2663	my $self = shift;
3369	108					771	validate_pos(@_);
3370
3371	108					244	my @resp;
3372	108					384	$self->reset_next_record_hr();
3373	108					327	while (my $hr = $self->get_next_record_hr()) {
3374	414					1013	push @resp, $hr;
3375							}
3376	105					631	return @resp;
3377							}
3378
3379							sub get_recnum {
3380	190			190	1	300	my $self = shift;
3381	190					969	validate_pos(@_);
3382
3383	190	50				501	return -1 unless $self->{_read_in_progress};
3384	190					362	return _get_def($self->{_row_read}, -1);
3385							}
3386
3387							sub _check_for_search {
3388	906			906		1584	my ($self, $field) = @_;
3389	906	50				1558	return undef unless $self->_status_forward('S4');
3390
3391	905	100				2450	return 1 if exists $self->{_named_fields}->{$field};
3392							$self->_print_error("search: unknown field '$field'",
3393	6					34	0, ERR_UNKNOWN_FIELD, $self->{_named_fields});
3394							}
3395
3396							sub get_cell {
3397	20			20	1	561	my $self = shift;
3398	20					174	validate_pos(@_, {type => SCALAR}, {type => SCALAR});
3399	20					57	my ($key, $field) = @_;
3400
3401	20	50				34	return undef unless $self->_check_for_search($field);
3402	19					36	my $row = $self->get_row_hr($key);
3403	19	100				44	return $row unless defined($row);
3404	18					61	return $row->{$field};
3405							}
3406
3407							sub get_values {
3408	9			9	1	3458	my $self = shift;
3409	9					127	validate_pos(@_, {type => SCALAR}, {type => UNDEF \| CODEREF, optional => 1});
3410	9					32	my ($field, $filter_subref) = @_;
3411
3412	9	50				28	return undef unless $self->_check_for_search($field);
3413
3414	9					18	my @values;
3415	9					32	$self->reset_next_record_hr();
3416	9					23	while (my $hr = $self->get_next_record_hr()) {
3417	53	100				94	if (defined($filter_subref)) {
3418	23					40	local $_ = $hr->{$field};
3419	23	100				40	next unless $filter_subref->();
3420							}
3421	42					171	push @values, $hr->{$field};
3422							}
3423	9					47	return @values;
3424							}
3425
3426							sub _get_hash_and_projector {
3427	561			561		926	my ($self, $field, $arg_opts) = @_;
3428
3429	561					774	my $_debug = $self->{_debug};
3430	561					654	my $_debugh = $self->{_debugh};
3431
3432	561	50				905	my %opts = %{$arg_opts} if defined($arg_opts);
	561					1100
3433
3434	561					1685	my $opt_case = _get_def($opts{'case'}, $self->{search_case}, $DEF_SEARCH_CASE);
3435	561					1546	my $opt_trim = _get_def($opts{'trim'}, $self->{search_trim}, $DEF_SEARCH_TRIM);
3436							my $opt_ignore_empty = _get_def($opts{'ignore_empty'}, $self->{search_ignore_empty},
3437	561					1486	$DEF_SEARCH_IGNORE_EMPTY);
3438							my $opt_ignacc = _get_def($opts{'ignore_accents'}, $self->{search_ignore_accents},
3439	561					1396	$DEF_SEARCH_IGNORE_ACCENTS);
3440
3441	561					1315	my $opts_stringified = $opt_case . $opt_trim . $opt_ignore_empty . $opt_ignacc;
3442	561					899	my $hash_name = "_h${field}_${opts_stringified}";
3443	561					798	my $projector_name = "_p${field}_${opts_stringified}";
3444
3445	561	100	66			1917	if (exists $self->{$hash_name} and exists $self->{$projector_name}) {
		50	33
3446	489	50				766	print($_debugh "Search by key '$field': using existing hash and projector (" .
3447							"$hash_name, $projector_name)\n") if $_debug;
3448	489					1315	return ($hash_name, $projector_name);
3449							} elsif (exists $self->{$hash_name} or exists $self->{$projector_name}) {
3450	0					0	confess "Man, check your $PKG module code now!";
3451							}
3452
3453	72	50				147	print($_debugh "Search by key '$field': building hash\n") if $_debug;
3454
3455							#
3456							# Projectors
3457							#
3458							# The projector contains subs to derivate the search key from the field value.
3459							# At the moment it is used to manage with case / without case searches and with trim / without trim
3460							# searches (meaning, ignoring spaces at beginning and end of fields)
3461							#
3462							# Why naming it a projector?
3463							# Because if you run it twice on a value, the second run should produce the same result, meaning:
3464							# p(p(x)) = p(x) whatever x
3465							#
3466
3467	72					100	my @projectors;
3468
3469							# Add case removal in the projector function list
3470	72	100		1479		295	push @projectors, sub { return lc(shift); } unless $opt_case;
	1479					3507
3471
3472							# Add trim in the projector function list
3473	72	100				148	if ($opt_trim) {
3474							push @projectors,
3475							sub {
3476	1479			1479		6391	my $v = shift;
3477	1479					4668	$v =~ s/^\s+\|\s+$//g;
3478	1479					3245	return $v;
3479	57					170	};
3480							}
3481
3482							# Add remove_accents in the projector function list
3483	72	100		1886		239	push @projectors, sub { return remove_accents(shift); } if $opt_ignacc;
	1886					3049
3484
3485							my $projector = sub {
3486	1906			1906		3318	my $v = _get_def($_[0], '');
3487	1906					3587	$v = $_->($v) foreach (@projectors);
3488	1906					3177	return $v;
3489	72					190	};
3490
3491							#
3492							# Filter
3493							#
3494							# As opposed to projectors above (where a search key is transformed), the idea now is to ignore
3495							# certain keys when doing a search.
3496							# At the moment, used to manage searches with / without empty values.
3497							#
3498							# That is to say: shall we use empty value as a regular value to search on, as in
3499							# my @results = $self->search('FIELDNAME', '');
3500							# ?
3501							#
3502							# Right now we don't use an array-based construct, that'd allow to chain filters with one another
3503							# (as we now have only one filter to deal with), later, we may use an array of filters, as done with
3504							# projectors...
3505							#
3506
3507	72					102	my $filter;
3508	72	100				141	if ($opt_ignore_empty) {
3509	1268			1268		2771	$filter = sub { return $_[0] ne ''; }
3510	61					179	} else {
3511	77			77		123	$filter = sub { return 1; }
3512	11					30	}
3513
3514	72					136	my %h;
3515							my $k;
3516	72					224	$self->reset_next_record_hr();
3517	72					202	while (my $hr = $self->get_next_record_hr(\$k)) {
3518	1345					2072	my $kv = $hr->{$field};
3519	1345					2090	my $p = $projector->($kv);
3520	1345	100				2153	unless ($filter->($p)) {
3521	76	50				145	print($_debugh "Ignoring key value '$p' in hash build\n") if $_debug;
3522	76					218	next;
3523							}
3524	1269					1689	push @{$h{$p}}, $k;
	1269					5686
3525							}
3526	72					494	for (keys %h) {
3527	1178					1330	@{$h{$_}} = sort { $a <=> $b } @{$h{$_}};
	1178					2026
	103					222
	1178					2065
3528							}
3529
3530	72					193	$self->{_hash_build_count}++;
3531	72					805	$self->{$hash_name} = { %h };
3532	72					218	$self->{$projector_name} = $projector;
3533	72					525	return ($hash_name, $projector_name);
3534							}
3535
3536							sub _get_hash_build_count {
3537	6			6		878	my $self = shift;
3538
3539	6					18	return _get_def($self->{_hash_build_count}, 0);
3540							}
3541
3542							sub search {
3543	563			563	1	8507	my $self = shift;
3544	563					4719	validate_pos(@_,
3545							{type => SCALAR}, {type => UNDEF \| SCALAR}, {type => UNDEF \| HASHREF, optional => 1});
3546	563					1685	my ($field, $value, $param_opts) = @_;
3547
3548	563					1140	my $croak_if_error = $self->{croak_if_error};
3549
3550							#
3551							# FIXME?
3552							# A bit overkill to check options each time search is called...
3553							# To be thought about.
3554							#
3555
3556	563	100				1053	my @tmp = %{$param_opts} if $param_opts;
	421					924
3557	563					7579	my %opts = validate(@tmp, $SEARCH_VALIDATE_OPTIONS);
3558
3559	562	50				1930	return undef unless $self->_check_for_search($field);
3560
3561							# $self->_print_error("undef value in search call") if !defined($value);
3562	561	50				959	$value = '' unless defined($value);
3563
3564	561					1094	my ($hash_name, $projector_name) = $self->_get_hash_and_projector($field, \%opts);
3565
3566	561					1358	my $ret = $self->{$hash_name}->{$self->{$projector_name}->($value)};
3567
3568	561	100				1590	return $ret if defined($ret);
3569	185					516	return [ ];
3570							}
3571
3572							sub search_1hr {
3573	22			22	1	5923	my $self = shift;
3574	22					275	validate_pos(@_,
3575							{type => SCALAR}, {type => UNDEF \| SCALAR}, {type => UNDEF \| HASHREF, optional => 1});
3576	22					77	my ($field, $value, $arg_opts) = @_;
3577
3578	22					49	my $r = $self->search($field, $value, $arg_opts);
3579
3580	22	100				57	return undef unless defined($r->[0]);
3581
3582	20					39	my $opts = _get_def($arg_opts, { });
3583							my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'},
3584	20					61	$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS);
3585
3586	20	100	100			34	return undef if @{$r} >= 2 and !$opt_ignore_ambiguous;
	20					83
3587	14					42	return $self->get_row_hr($r->[0]);
3588							}
3589
3590							sub vlookup {
3591	317			317	1	21706	my $self = shift;
3592	317					4400	validate_pos(@_, {type => SCALAR}, {type => UNDEF \| SCALAR}, {type => SCALAR},
3593							{type => UNDEF \| HASHREF, optional => 1});
3594	317					1207	my ($searched_field, $value, $target_field, $arg_opts) = @_;
3595
3596	317					735	my $r = $self->search($searched_field, $value, $arg_opts);
3597	315	50				674	return undef unless $self->_check_for_search($target_field);
3598
3599	314					627	my $opts = _get_def($arg_opts, { });
3600	314	100	66			745	unless (defined($r->[0])) {
3601							return (exists $opts->{'value_if_not_found'} ? $opts->{'value_if_not_found'} :
3602	143	100				510	$self->{'search_value_if_not_found'});
3603							} elsif (@{$r} >= 2) {
3604							my $opt_ignore_ambiguous = _get_def($opts->{'ignore_ambiguous'},
3605							$self->{'search_ignore_ambiguous'}, $DEF_SEARCH_IGNORE_AMBIGUOUS);
3606							return (exists $opts->{'value_if_ambiguous'} ? $opts->{'value_if_ambiguous'} :
3607							$self->{'search_value_if_ambiguous'}) if !$opt_ignore_ambiguous;
3608							}
3609
3610	144	100				285	return $opts->{value_if_found} if exists $opts->{value_if_found};
3611	140	50				239	return $self->{search_value_if_found} if exists $opts->{search_value_if_found};
3612
3613	140					352	my $hr = $self->get_row_hr($r->[0]);
3614
3615	140					591	return $hr->{$target_field};
3616							}
3617
3618							1;
3619
3620							__END__
3621
3622							=pod
3623
3624							=encoding UTF-8
3625
3626							=head1 NAME
3627
3628							Text::AutoCSV - helper module to automate the use of Text::CSV
3629
3630							=head1 VERSION
3631
3632							version 1.1.9
3633
3634							=head1 SYNOPSIS
3635
3636							By default, Text::AutoCSV will detect the following characteristics of the input:
3637
3638							- The separator, among ",", ";" and "\t" (tab)
3639
3640							- The escape character, among '"' (double-quote) and '\\' (backslash)
3641
3642							- Try UTF-8 and if it fails, fall back on latin1
3643
3644							- Read the header line and compute field names
3645
3646							- If asked to (see L</fields_dates_auto>), detect any field that contains a DateTime value, trying
3647							20 date formats, possibly followed by a time (6 time formats tested)
3648
3649							- If asked to (see L</fields_dates>), detect DateTime format of certain fields, croak if no DateTime
3650							format can be worked out
3651
3652							- Fields identified as containing a DateTime value (L</fields_dates_auto> or L</fields_dates>) are
3653							stored as DateTime objects by default
3654
3655							Text::AutoCSV also provides methods to search on fields (using cached hash tables) and it can
3656							populate the value of "remote" fields, made from joining 2 CSV files with a key-value search
3657
3658							=head2 General
3659
3660							use Text::AutoCSV;
3661
3662							Text::AutoCSV->new()->write(); # Read CSV data from std input, write to std output
3663
3664							Text::AutoCSV->new(in_file => 'f.csv')->write(); # Read CSV data from f.csv, write to std output
3665
3666							# Read CSV data from f.csv, write to g.csv
3667							Text::AutoCSV->new(in_file => 'f.csv', out_file => 'g.csv')->write();
3668
3669							# "Rewrite" CSV file by printing out records as a list (separated by line breaks) of field
3670							# name followed by its value.
3671							my $csv = Text::AutoCSV->new(in_file => 'in.csv', walker_hr => \&walk);
3672							my @cols = $csv->get_fields_names();
3673							$csv->read();
3674							sub walk {
3675							my %rec = %{$_[0]};
3676							for (@cols) {
3677							next if $_ eq '';
3678							print("$_ => ", $rec{$_}, "\n");
3679							}
3680							print("\n");
3681							}
3682
3683							=head2 OBJ-ish functions
3684
3685							# Identify column internal names with more flexibility as the default mechanism
3686							my $csv = Text::AutoCSV->new(in_file => 'zips.csv',
3687							fields_hr => {'CITY' => '^(city\|town)', 'ZIPCODE' => '^zip(code)?$'});
3688							# Get zipcode of Claix
3689							my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
3690
3691							my $csv = Text::AutoCSV->new(in_file => 'zips.csv');
3692							# Get zipcode of Claix
3693							my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
3694							# Same as above, but vlookup is strict for case and spaces around
3695							my $csv = Text::AutoCSV->new(in_file => 'zips.csv', search_case => 1, search_trim => 0);
3696							my $z = $csv->vlookup('CITY', 'Claix', 'ZIPCODE');
3697
3698							# Create field 'MYCITY' made by taking pers.csv' ZIP column value, looking it up in the
3699							# ZIPCODE columns of zips.csv, taking CITY colmun value and naming it 'MYCITY'. Output is
3700							# written in std output.
3701							# If a zipcode is ambiguous, say it.
3702							Text::AutoCSV->new(in_file => 'pers.csv')
3703							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv',
3704							{ ignore_ambiguous => 0, value_if_ambiguous => '<duplicate zipcode found!>' })->write();
3705
3706							# Note the above can also be written using Text::AutoCSV level attributes:
3707							Text::AutoCSV->new(in_file => 'pers.csv',
3708							search_ignore_ambiguous => 0, search_value_if_ambiguous => '<duplicate zipcode found!>')
3709							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->write();
3710
3711							# Create 'MYCITY' field as above, then display some statistics
3712							my $nom_compose = 0;
3713							my $zip_not_found = 0;
3714							Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
3715							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
3716							sub walk {
3717							my $hr = shift;
3718							$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
3719							$zip_not_found++ unless defined($hr->{'MYCITY'});
3720							}
3721							print("Number of persons with a multi-part name: $nom_compose\n");
3722							print("Number of persons with unknown zipcode: $zip_not_found\n");
3723
3724							=head2 Updating
3725
3726							Text::AutoCSV->new(in_file => 'names.csv', out_file => 'ucnames.csv',
3727							read_post_update_hr => \&updt)->write();
3728							sub updt { $_[0]->{'LASTNAME'} =~ s/^.*$/\U&/; }
3729
3730							Text::AutoCSV->new(in_file => 'squares.csv', out_file => 'checkedsquares.csv',
3731							out_filter => \&wf)->write();
3732							sub wf { return ($_[0]->{'X'} ** 2 == $_[0]->{'SQUAREOFX'}); }
3733
3734							# Add a field for the full name, made of the concatenation of the
3735							# first name and the last name.
3736							# Also display stats about empty full names.
3737							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1)
3738							->field_add_computed('FULLNAME', \&calc_fn)->write();
3739							sub calc_fn {
3740							my ($field, $hr, $stats) = @_;
3741							my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
3742							$stats->{'empty full name'}++ if $fn eq ' ';
3743							return $fn;
3744							}
3745
3746							# Read a file with a lot of columns and keep only 2 columns in output
3747							Text::AutoCSV->new(in_file => 'big.csv', out_file => 'addr.csv',
3748							out_fields => ['NAME', 'ADDRESS'])
3749							->out_header('ADDRESS', 'Postal Address')
3750							->write();
3751
3752							=head2 Datetime management
3753
3754							# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the
3755							# input format is.
3756							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
3757							out_dates_format => '%F')->write();
3758
3759							# Detect any field containing a DateTime value and convert it to a US DateTime whatever the
3760							# input format is.
3761							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
3762							out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write();
3763
3764							# Find dates of specific formats and convert it into yyyy-mm-dd
3765							Text::AutoCSV->new(in_file => 'raw.csv', out_file => 'cooked.csv',
3766							dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d'],
3767							out_dates_format => '%F')->write();
3768
3769							# Take the dates on columns 'LASTLOGIN' and 'CREATIONDATE' and convert it into French dates
3770							# (day/month/year).
3771							# Text::AutoCSV will croak if LASTLOGIN or CREATIONDATE do not contain a DateTime format.
3772							# By default, Text::AutoCSV will try 20 different formats.
3773							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
3774							fields_dates => ['LASTLOGIN', 'CREATIONDATE'], out_dates_format => '%d/%m/%Y')->write();
3775
3776							# Convert 2 DateTime fields into unix standard epoch
3777							# Write -1 if DateTime is empty.
3778							sub toepoch { return $_->epoch() if $_; -1; }
3779							Text::AutoCSV->new(in_file => 'stats.csv', out_file => 'stats-epoch.csv',
3780							fields_dates => ['ATIME', 'MTIME'])
3781							->in_map('ATIME', \&toepoch)
3782							->in_map('MTIME', \&toepoch)
3783							->write();
3784
3785							# Do the other way round from above: convert 2 fields containing unix standard epoch into a
3786							# string displaying a human-readable DateTime.
3787							my $formatter = DateTime::Format::Strptime->new(pattern => 'DATE=%F, TIME=%T');
3788							sub fromepoch {
3789							return $formatter->format_datetime(DateTime->from_epoch(epoch => $_)) if $_ >= 0;
3790							'';
3791							}
3792							$csv = Text::AutoCSV->new(in_file => 'stats-epoch.csv', out_file => 'stats2.csv')
3793							->in_map('ATIME', \&fromepoch)
3794							->in_map('MTIME', \&fromepoch)
3795							->write();
3796
3797							=head2 Miscellaneous
3798
3799							use Text::AutoCSV 'remove_accents';
3800							# Output 'Francais: etre elementaire, Tcheque: sluzba dum' followed by a new line.
3801							print remove_accents("FranÃ§ais: Ãªtre Ã©lÃ©mentaire, TchÃ¨que: sluÅ¾ba dÅ¯m"), "\n";
3802
3803							=for Pod::Coverage ERR_UNKNOWN_FIELD
3804
3805							=head1 NAME
3806
3807							Text::AutoCSV - helper module to automate the use of Text::CSV
3808
3809							=head1 METHODS
3810
3811							=head2 new
3812
3813							my $csv = Text::AutoCSV->new(%attr);
3814
3815							(Class method) Returns a new instance of Text::AutoCSV. The object attributes are described by the
3816							hash C<%attr> (can be empty).
3817
3818							Currently the following attributes are available:
3819
3820							=over 4
3821
3822							=item Preliminary note about L</fields_hr>, L</fields_ar> and L</fields_column_names> attributes
3823
3824							By default, Text::AutoCSV assumes the input has a header and will use the field values of this first
3825							line (the header) to work out the column internal names. These internal names are used everywhere in
3826							Text::AutoCSV to designate columns.
3827
3828							The values are transformed as follows:
3829
3830							- All accents are removed using the exportable function L</remove_accents>.
3831
3832							- Any non-alphanumeric character is removed (except underscore) and all letters are switched to
3833							upper case. The regex to do this is
3834
3835							s/[^[:alnum:]_]//gi; s/^.*$/\U$&/;
3836
3837							Thus a header line of
3838
3839							'Office Number 1,Office_2,Personal Number'
3840
3841							will produce the internal column names
3842
3843							'OFFICENUMBER1' (first column)
3844
3845							'OFFICE_2' (second column)
3846
3847							'PERSONALNUMBER' (third column).
3848
3849							The attribute L</fields_hr>, L</fields_ar> or L</fields_column_names> (only one of the three is
3850							useful at a time) allows to change this behavior.
3851
3852							B<NOTE>
3853
3854							The removal of accents is not a conversion to us-ascii, see L</remove_accents> for details.
3855
3856							=item Preliminary note about fields reading
3857
3858							Functions that are given a field name (L</get_cell>, L</vlookup>, L</field_add_copy>, ...) raise an
3859							error if the field requested does not exist.
3860
3861							B<SO WILL THE HASHREFS GIVEN BY Text::AutoCSV:> when a function returns a hashref (L</search_1hr>,
3862							L</get_row_hr>, ...), the hash is locked with the C<lock_keys> function of C<Hash::Util>. Any
3863							attempt to read a non-existing key from the hash causes a croak. This feature is de-activated if you
3864							specified C<croak_if_error =E<gt> 0> when creating Text::AutoCSV object.
3865
3866							=item in_file
3867
3868							The name of the file to read CSV data from.
3869
3870							If not specified or empty, read standard input.
3871
3872							Example:
3873
3874							my $csv = Text::AutoCSV->new(in_file => 'in.csv');
3875
3876							=item inh
3877
3878							File handle to read CSV data from.
3879							Normally you don't want to specify this attribute.
3880
3881							C<inh> is useful if you don't like the way Text::AutoCSV opens the input file for you.
3882
3883							Example:
3884
3885							open my $inh, "producecsv.sh\|";
3886							my $csv = Text::AutoCSV->new(inh => $inh);
3887
3888							=item encoding
3889
3890							Comma-separated list of encodings to try to read input.
3891
3892							Note that finding the correct encoding of any given input is overkill. This script just tries
3893							encodings one after the other, and selects the first one that does not trigger a warning during
3894							reading of input. If all produce warnings, select the first one.
3895
3896							The encoding chosen is used in output, unless attribute L</out_encoding> is specified.
3897
3898							Value by default: 'UTF-8,latin1'
3899
3900							B<IMPORTANT>
3901
3902							If one tries something like C<encoding =E<gt> 'latin1,UTF-8'>, it'll almost never detect UTF-8
3903							because latin1 rarely triggers warnings during reading. It tends to be also true with encodings like
3904							UTF-16 that can remain happy with various inputs (sometimes resulting in Western languages turned
3905							into Chinese text).
3906
3907							Ultimately this attribute should be used with a unique value. The result when using more than one
3908							value can produce weird results and should be considered B<experimental>.
3909
3910							Example:
3911
3912							my $csv = Text::AutoCSV->new(in_file => 'w.csv', encoding => 'UTF-16');
3913
3914							=item via
3915
3916							Adds a C<via> to the file opening instruction performed by Text::AutoCSV. You don't want to use it
3917							under normal circumstances.
3918
3919							The value should start with a ':' character (Text::AutoCSV won't add one for you).
3920
3921							Value by default: none
3922
3923							Example:
3924
3925							my $csv = Text::AutoCSV->new(in_file => 'in.csv', via => ':raw:perlio:UTF-32:crlf');
3926
3927							=item dont_mess_with_encoding
3928
3929							If true, just ignore completely encoding and don't try to alter I/O operations with encoding
3930							considerations (using C<binmode> instruction). Note that if inh attribute is specified, then
3931							Text::AutoCSV will consider the caller manages encoding for himself and dont_mess_with_encoding will
3932							be automatically set, too.
3933
3934							B<IMPORTANT>
3935
3936							This attribute does not mean perl will totally ignore encoding and would consider character strings
3937							as bytes for example. The meaning of L</dont_mess_with_encoding> is that Text::AutoCSV itself will
3938							totally ignore encoding matters, and leave it entirely to Perl' default.
3939
3940							Value by default:
3941
3942							0 if inh attribute is not set
3943							1 if inh attribute is set
3944
3945							Example:
3946
3947							my $csv = Text::AutoCSV->new(in_file => 'in.csv', dont_mess_with_encoding => 1);
3948
3949							=item sep_char
3950
3951							Specify the CSV separator character. Turns off separator auto-detection. This attribute is passed as
3952							is to C<Text::CSV-E<gt>new()>.
3953
3954							Example:
3955
3956							my $csv = Text::AutoCSV->new(in_file => 'in.csv', sep_char => ';');
3957
3958							=item quote_char
3959
3960							Specify the field quote character. This attribute is passed as is to C<Text::CSV-E<gt>new()>.
3961
3962							Value by default: double quote ('"')
3963
3964							Example:
3965
3966							my $csv = Text::AutoCSV->new(in_file => 'in.csv', quote_char => '\'');
3967
3968							=item escape_char
3969
3970							Specify the escape character. Turns off escape character auto-detection. This attribute is passed as
3971							is to C<Text::CSV-E<gt>new()>.
3972
3973							Value by default: backslash ('\\')
3974
3975							Example:
3976
3977							my $csv = Text::AutoCSV->new(in_file => 'in.csv', escape_char => '"');
3978
3979							=item in_csvobj
3980
3981							Text::CSV object to use.
3982							Normally you don't want to specify this attribute.
3983
3984							By default, Text::AutoCSV will manage creating such an object and will work hard to detect the
3985							parameters it requires.
3986
3987							Defining C<in_csvobj> attribute turns off separator character and escape character auto-detection.
3988
3989							Using this attribute workarounds Text::AutoCSV philosophy a bit, but you may need it in case
3990							Text::AutoCSV behavior is not suitable for Text::CSV creation.
3991
3992							Example:
3993
3994							my $tcsv = Text::CSV->new();
3995							my $acsv = Text::AutoCSV->new(in_file => 'in.csv', in_csvobj => $tcsv);
3996
3997							=item has_headers
3998
3999							If true, Text::AutoCSV assumes the input has a header line.
4000
4001							Value by default: 1
4002
4003							Example:
4004
4005							my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0);
4006
4007							=item fields_hr
4008
4009							(Only if input has a header line) Hash ref that contains column internal names along with a regular
4010							expression to find it in the header line.
4011							For example if you have:
4012
4013							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4014							fields_hr => {'PHONE OFFICE' => '^office phone nu',
4015							'PHONE PERSONAL' => '^personal phone nu'});
4016
4017							And the header line is
4018
4019							'Personal Phone Number,Office Phone Number'
4020
4021							the column name 'PHONE OFFICE' will designate the second column and the column name 'PHONE PERSONAL'
4022							will designate the first column.
4023
4024							You can choose column names like 'Phone Office' and 'Phone Personal' as well.
4025
4026							The regex search is case insensitive.
4027
4028							=item fields_ar
4029
4030							(Only if input has a header line) Array ref that contains column internal names. The array is used
4031							to create a hash ref of the same kind as L</fields_hr>, by wrapping the column name in a regex. The
4032							names are surrounded by a leading '^' and a trailing '$', meaning, the name must match the entire
4033							field name.
4034
4035							For example
4036
4037							fields_ar => ['OFFICENUMBER', 'PERSONALNUMBER']
4038
4039							is strictly equivalent to
4040
4041							fields_hr => {'OFFICENUMBER' => '^officenumber$', 'PERSONALNUMBER' = '^personalnumber$'}
4042
4043							The regex search is case insensitive.
4044
4045							C<fields_ar> is useful if the internal names are identical to the file column names. It avoids
4046							repeating the names over and over as would happen if using L</fields_hr> attribute.
4047
4048							I<NOTE>
4049
4050							You might wonder why using fields_ar as opposed to Text::AutoCSV default' mechanism. There are two
4051							reasons for that:
4052
4053							1- Text::AutoCSV removes spaces from column names, and some people may want another behavior. A
4054							header name of 'Phone Number' will get an internal column name of 'PHONENUMBER' (default behavior,
4055							if none of fields_hr, fields_ar and fields_column_names attributes is specified), and one may prefer
4056							'PHONE NUMBER' or 'phone number' or whatsoever.
4057
4058							2- By specifying a list of columns using either of fields_hr or fields_ar, you not only map column
4059							names as found in the header line to internal column names: you also I<request> these columns to be
4060							available. If one of the requested columns cannot be found, Text::AutoCSV will croak (default) or
4061							print an error and return an undef object (if created with C<croak_if_error =E<gt> 0>).
4062
4063							=item fields_column_names
4064
4065							Array ref of column internal names, in the order of columns in file. This attribute works like the
4066							C<column_names> attribute of Text::CSV. It'll just assign names to columns one by one, regardless of
4067							what the header line contains. It'll work also if the file has no header line.
4068
4069							Example:
4070
4071							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4072							fields_column_names => ['My COL1', '', 'My COL3']);
4073
4074							=item out_file
4075
4076							Output file when executing the L</write> method.
4077
4078							If not specified or empty, write to standard output.
4079
4080							Example:
4081
4082							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv');
4083
4084							=item outh
4085
4086							File handle to write CSV data to when executing the L</write> method.
4087							Normally you don't want to specify this attribute.
4088
4089							C<outh> is useful if you don't like the way Text::AutoCSV opens the output file for you.
4090
4091							Example:
4092
4093							my $outh = open "myin.csv', ">>";
4094							my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0, outh => $outh);
4095
4096							=item out_encoding
4097
4098							Enforce the encoding of output.
4099
4100							Value by default: input encoding
4101
4102							Example:
4103
4104							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4105							out_encoding => 'UTF-16');
4106
4107							=item out_utf8_bom
4108
4109							Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is not UTF-8, this
4110							attribute is ignored.
4111
4112							B<NOTE>
4113
4114							UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice, UTF8-encoded files rarely
4115							have a BOM.
4116
4117							Using this attribute is not recommended. It is provided for the sake of completeness, and also to
4118							produce Unicode files Microsoft EXCEL will be happy to read.
4119
4120							At first sight it would seem more logical to make EXCEL happy with something like this:
4121
4122							out_encoding => 'UTF-16'
4123
4124							But... While EXCEL will identify UTF-16 and read it as such, it will not take into account the BOM
4125							found at the beginning. In the end the first cell will have 2 useless characters prepended. The only
4126							solution the author knows to workaround this issue if to use UTF-8 as output encoding, and enforce a
4127							BOM. That is, use:
4128
4129							..., out_encoding => 'UTF-8', out_utf8_bom => 1, ...
4130
4131							=item out_sep_char
4132
4133							Enforce the output CSV separator character.
4134
4135							Value by default: input separator
4136
4137							Example:
4138
4139							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_sep_char => ',');
4140
4141							=item out_quote_char
4142
4143							Enforce the output CSV quote character.
4144
4145							Value by default: input quote character
4146
4147							Example:
4148
4149							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_quote_char => '"');
4150
4151							=item out_escape_char
4152
4153							Enforce the output CSV escape character.
4154
4155							Value by default: input escape character
4156
4157							Example:
4158
4159							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4160							out_escape_char_char => '\\');
4161
4162							=item out_always_quote
4163
4164							If true, quote all fields of output (set always_quote of Text::CSV).
4165
4166							If false, don't quote all fields of output (don't set C<always_quote> of Text::CSV).
4167
4168							Value by default: same as what is found in input
4169
4170							While reading input, Text::AutoCSV works out whether or not all fields were quoted. If yes, then the
4171							output Text::CSV object has the always_quote attribute set, if no, then the output Text::CSV object
4172							does not have this attribute set.
4173
4174							Example:
4175
4176							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', out_always_quote => 1);
4177
4178							=item out_has_headers
4179
4180							If true, when writing output, write a header line on first line.
4181
4182							If false, when writing output, don't write a header line on first line.
4183
4184							Value by default: same as has_headers attribute
4185
4186							Example 1
4187
4188							Read standard input and write to standard output, removing the header line.
4189
4190							Text::AutoCSV->new(out_has_headers => 0)->write();
4191
4192							Example 2
4193
4194							Read standard input and write to standard output, adding a header line.
4195
4196							Text::AutoCSV->new(fields_column_names => ['MYCOL1', 'MYCOL2'], out_has_headers => 1)->write();
4197
4198							=item no_undef
4199
4200							If true, non-existent column values are set to an empty string instead of undef. It is also done on
4201							extra fields that happen to have an undef value (for example when the target of a linked field is
4202							not found).
4203
4204							Note this attribute does not work on callback functions output set with L</in_map>: for example
4205							empty DateTime values (on fields identified as containing a date/time, see C<dates_*> attributes
4206							below) are set to C<undef>, even while C<no_undef> is set. Indeed setting it to an empty string
4207							while non-empty values would contain a Datetime object would not be clean. An empty value in a
4208							placeholder containing an object must be undef.
4209
4210							Since version 1.1.5 of Text::AutoCSV, C<no_undef> is examined when sending parameter ($_) to
4211							L</in_map> callback: an undef value is now passed as is (as undef), unless C<no_undef> is set. If
4212							C<no_undef> is set, and field value is undef, then $_ is set to the empty string ('') when calling
4213							callback defined by L</in_map>. This new behavior was put in place to be consistent with what is
4214							being done with DateTime values.
4215
4216							Value by default: 0
4217
4218							Example:
4219
4220							my $csv = Text::AutoCSV->new(in_file => 'in.csv', no_undef => 1);
4221
4222							=item read_post_update_hr
4223
4224							To be set to a ref sub. Each time a record is read from input, call C<read_post_update_hr> to update
4225							the hash ref of the record. The sub is called with 2 arguments: the hash ref to the record value and
4226							the hash ref to stats.
4227
4228							The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is
4229							called in verbose mode (C<verbose =E<gt> 1>).
4230
4231							For example, the C<read_post_update_hr> below will turn column 'CITY' values in upper case and count
4232							occurences of empty cities in stat display:
4233
4234							Text::AutoCSV->new(in_file => 'addresses.csv', read_post_update_hr => \&updt, verbose => 1)
4235							->write();
4236							sub updt {
4237							my ($hr, $stats) = @_;
4238							$hr->{'CITY'} =~ s/^.*$/\U$&/;
4239							$stats->{'empty city encountered'}++ if $hr->{'CITY'} eq '';
4240							}
4241
4242							B<IMPORTANT>
4243
4244							You cannot create a field this way. To create a field, you have to use the member functions
4245							L</field_add_link>, L</field_add_copy> or L</field_add_computed>.
4246
4247							B<NOTE>
4248
4249							If you wish to manage some updates at field level, consider registering update functions with
4250							L</in_map> and L</out_map> member functions. These functions register callbacks that work at field
4251							level and with $_ variable (thus the callback function invoked is AutoCSV-agnostic).
4252
4253							L</in_map> updates a field after read, L</out_map> updates the field content before writing it.
4254
4255							=item walker_hr
4256
4257							To set to a sub ref that'll be executed each time a record is read from input. It is executed after
4258							L</read_post_update_hr>. The sub is called with 2 arguments: the hash ref to the record value and
4259							the hash ref to stats.
4260
4261							Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas
4262							L</walker_hr> is read-only.
4263
4264							The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is
4265							called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are
4266							not displayed, however you can get stats by calling the get_stats function.
4267
4268							The example below will count in the stats the number of records where the 'CITY' field is empty.
4269							Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed.
4270
4271							my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_hr => \&walk1,
4272							verbose => 1)->read();
4273							sub walk1 {
4274							my ($hr, $stats) = @_;
4275							$stats->{'empty city'}++ if $hr->{'CITY'} eq '';
4276							}
4277
4278							=item walker_ar
4279
4280							To set to a sub ref that'll be executed each time a record is read from input. It is executed after
4281							L</read_post_update_hr>. The sub is called with 2 arguments: the array ref to the record value and
4282							the hash ref to stats.
4283
4284							Note L</read_post_update_hr> is meant for updating record fields just after reading, whereas
4285							C<walker_hr> is read-only.
4286
4287							The stats allow to count events and are printed in the end of reading in case Text::AutoCSV is
4288							called in verbose mode (C<verbose =E<gt> 1>). If the L</verbose> attribute is not set, the stats are
4289							lost.
4290
4291							The array ref contains values in their natural order in the CSV. To be used with the column names,
4292							you have to use L</get_fields_names> member function.
4293
4294							The example below will count in the stats the number of records where the 'CITY' field is empty.
4295							Thanks to C<verbose =E<gt> 1> attribute, at the end of reading the stats are displayed. It produces
4296							the exact same result as the example in walker_hr attribute, but it uses walker_ar.
4297
4298							use List::MoreUtils qw(first_index);
4299							my $csv = Text::AutoCSV->new(in_file => 'addresses.csv', walker_ar => \&walk2, verbose => 1);
4300							my @cols = $csv->get_fields_names();
4301							my $idxCITY = first_index { /^city$/i } @cols;
4302							die "No city field!??" if $idxCITY < 0;
4303							$csv->read();
4304							sub walk2 {
4305							my ($ar, $stats) = @_;
4306							$stats->{'empty city'}++ if $ar->[$idxCITY] eq '';
4307							}
4308
4309							=item write_filter_hr
4310
4311							Alias of L</out_filter>.
4312
4313							=item out_filter
4314
4315							To be set to a ref sub. Before writing a record to output, C<out_filter> is called and the record
4316							gets writen only if C<out_filter> return value is true. The sub is called with 1 argument: the hash
4317							ref to the record value.
4318
4319							For example, if you want to output only records where the 'CITY' column value is Grenoble:
4320
4321							Text::AutoCSV->new(in_file => 'addresses.csv', out_file => 'grenoble.csv',
4322							out_filter => \&filt)->write();
4323							sub filt {
4324							my $hr = shift;
4325							return 1 if $hr->{'CITY'} =~ /^grenoble$/i;
4326							return 0;
4327							}
4328
4329							=item write_fields
4330
4331							Alias of L</out_fields>.
4332
4333							=item out_fields
4334
4335							Set to an array ref. List fields to write to output.
4336
4337							Fields are written in their order in the array ref, the first CSV column being the first element in
4338							the array, and so on. Fields not listed in B<out_fields> are not written in output.
4339
4340							You can use empty field names to have empty columns in output.
4341
4342							Value by default: none, meaning, all fields are output in their natural order. What is natural
4343							order? It is the input order for fields that were read from input, and the order in which they got
4344							created for created fields.
4345
4346							Example:
4347
4348							Text::AutoCSV->new(in_file => 'allinfos.csv', out_file => 'only-addresses.csv',
4349							out_fields => [ 'NAME', 'ADDRESS' ] )->write();
4350
4351							=item out_orderby
4352
4353							Array reference to a list of fields to sort output with.
4354
4355							At the moment this feature is a bit of a hack (no option to make sort descending or ascending,
4356							numeric or text, and it is not part of test plan).
4357
4358							Example:
4359
4360							Text::AutoCSV->new(in_file => 'names.csv', out_file => 'sortednames.csv',
4361							out_orderby => [ 'LASTNAME', 'FIRSTNAME']);
4362
4363							=item search_case
4364
4365							If true, searches are case sensitive by default. Searches are done by the member functions
4366							L</search>, L</search_1hr>, L</vlookup>, and linked fields (L</field_add_link>).
4367
4368							The search functions can also be called with the option L</case>, that takes precedence over the
4369							object-level C<search_case> attribute value. See L</vlookup> help.
4370
4371							Value by default: 0 (by default searches are case insensitive)
4372
4373							Example:
4374
4375							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_case => 1);
4376
4377							=item search_trim
4378
4379							If true, searches ignore the presence of leading or trailing spaces in values.
4380
4381							The search functions can also be called with the option L</trim>, that takes precedence over the
4382							object-level C<search_trim> attribute value. See L</vlookup> help.
4383
4384							Value by default: 1 (by default searches ignore leading and trailing spaces)
4385
4386							Example:
4387
4388							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_trim => 0);
4389
4390							=item search_ignore_empty
4391
4392							If true, empty fields are not included in the search indexes.
4393
4394							The search functions can also be called with the option L</ignore_empty>, that takes precedence over
4395							the object-level C<search_ignore_empty> attribute value. See L</vlookup> help.
4396
4397							Value by default: 1 (by default, search of the value '' will find nothing)
4398
4399							Example:
4400
4401							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_empty => 0);
4402
4403							=item search_ignore_accents
4404
4405							If true, accents are ignored by search indexes.
4406
4407							The search functions can also be called with the option L</ignore_accents>, that takes precedence
4408							over the object-level C<search_ignore_accents> attribute value. See L</vlookup> help.
4409
4410							Value by default: 1 (by default, accents are ignored by search functions)
4411
4412							Example:
4413
4414							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_accents => 0);
4415
4416							=item search_value_if_not_found
4417
4418							When a search is done with a unique value to return (field_add_link member function behavior or
4419							return value of vlookup), default value of option L</value_if_not_found>. See L</vlookup>.
4420
4421							=item search_value_if_found
4422
4423							When a search is done with a unique value to return (field_add_link member function behavior or
4424							return value of vlookup), default value of option L</value_if_found>. See L</vlookup>.
4425
4426							B<IMPORTANT>
4427
4428							This attribute is extremly unusual. Once you've provided it, all vlookups and the target field value
4429							of fields created with field_add_link will all be populated with the value provided with this
4430							option.
4431
4432							Don't use it unless you know what you are doing.
4433
4434							=item search_ignore_ambiguous
4435
4436							When a search is done with a unique value to return (field_add_link member function behavior or
4437							return value of search_1hr and vlookup), default value of option L</ignore_ambiguous>. See
4438							L</vlookup>.
4439
4440							=item search_value_if_ambiguous
4441
4442							When a search is done with a unique value to return (field_add_link member function behavior or
4443							return value of vlookup), default value of option L</value_if_ambiguous>. See L</vlookup>.
4444
4445							=item fields_dates
4446
4447							Array ref of field names that contain a date.
4448
4449							Once the formats of these fields is known (auto-detection by default), each of these fields will get
4450							a specific L</in_map> sub that converts the text in a DateTime object and a L</out_map> sub that
4451							converts back from DateTime to text.
4452
4453							B<NOTE>
4454
4455							The L</out_map> given to a DateTime field is "defensive code": normally, L</in_map> converts text
4456							into a DateTime object and L</out_map> does the opposite, it takes a DateTime object and converts it
4457							to text. If ever L</out_map> encounters a value that is not a DateTime object, it'll just stringify
4458							it (evaluation in a string context), without calling its DateTime formatter.
4459
4460							If the format cannot be detected for a given field, output an error message and as always when an
4461							error occurs, croak (unless L</croak_if_error> got set to 0).
4462
4463							Value by default: none
4464
4465							Example:
4466
4467							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
4468							fields_dates => ['LASTLOGIN', 'CREATIONDATE']);
4469
4470							=item fields_dates_auto
4471
4472							Boolean value. If set to 1, will detect dates formats on all fields. Fields in which a DateTime
4473							format got detected are then managed as if they had been being listed in L</fields_dates> attribute:
4474							they get an appropriate L</in_map> sub and a L</out_map> sub to convert to and from DateTime (see
4475							L</fields_dates> attribute above).
4476
4477							C<fields_dates_auto> looks for DateTime on all fields, but it expects nothing: it won't raise an
4478							error if no field is found that contains DateTime.
4479
4480							Value by default: 0
4481
4482							Example:
4483
4484							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', fields_dates_auto => 1);
4485
4486							=item fields_dates_auto_optimize
4487
4488							Relevant only if L</fields_dates_auto> is set.
4489
4490							Normally when L</fields_dates_auto> is set, the input is read completely to make sure auto-detection
4491							produces a reliable result. If C<fields_dates_auto_optimize> is set, this reading pass will stop as
4492							soon as there is no ambiguity left. That is, for every fields in input, the date format (or the fact
4493							that no date format is suitable) is known.
4494
4495							Using this option is a bit risky because it could trigger a date format detection that later in the
4496							input, would turn out to be wrong. Should that be the case, strange errors will occur, that are not
4497							easy to understand. Use it at your own risk.
4498
4499							Value by default: 0
4500
4501							Example:
4502
4503							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', fields_dates_auto => 1,
4504							fields_dates_auto_optimize => 1);
4505
4506							=item dates_formats_to_try
4507
4508							Array ref of string formats.
4509
4510							Text::AutoCSV has a default built-in list of 20 date formats to try and 6 time formats (also it'll
4511							combine any date format with any time format).
4512
4513							C<dates_formats_to_try> will replace Text::AutoCSV default format-list will the one you specify, in
4514							case the default would not produce the results you expect.
4515
4516							The formats are written in Strptime format.
4517
4518							Value by default (see below about the role of the pseudo-format ''):
4519
4520							[ '',
4521							'%Y-%m-%d',
4522							'%Y.%m.%d',
4523							'%Y/%m/%d',
4524							'%m.%d.%y',
4525							'%m-%d-%Y',
4526							'%m.%d.%Y',
4527							'%m/%d/%Y',
4528							'%d-%m-%Y',
4529							'%d.%m.%Y',
4530							'%d/%m/%Y',
4531							'%m-%d-%y',
4532							'%m/%d/%y',
4533							'%d-%m-%y',
4534							'%d.%m.%y',
4535							'%d/%m/%y',
4536							'%Y%m%d%H%M%S',
4537							'%b %d, %Y',
4538							'%b %d %Y',
4539							'%b %d %T %Z %Y',
4540							'%d %b %Y',
4541							'%d %b, %Y' ]
4542
4543							B<IMPORTANT>
4544
4545							The empty format (empty string) has a special meaning: when specified, Text::AutoCSV will be able to
4546							identify fields that contain only a time (not preceeded by a date).
4547
4548							B<Note>
4549
4550							Format identification is over only when there is no more ambiguity. So the usual pitfall of US
4551							versus French dates (month-day versus day-month) gets resolved only when a date is encountered that
4552							disambiguates it (a date of 13th of the month or later).
4553
4554							Example with a weird format that uses underscores to separate elements, using either US (month, day,
4555							year), French (day, month, year), or international (year, month, day) order:
4556
4557							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
4558							dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
4559
4560							=item dates_formats_to_try_supp
4561
4562							Same as L</dates_formats_to_try> but instead of replacing the default list of formats used during
4563							detection, it is added to this default list.
4564
4565							You want to use this attribute if you need a specific DateTime format while continuing to benefit
4566							from the default list.
4567
4568							B<IMPORTANT>
4569
4570							Text::AutoCSV will identify a given Datetime format only when there is no ambiguity, meaning, one
4571							unique Datetime format matches (all other failed). Adding a format that already exists in the
4572							default list will prevent the format from being identified, as it'll always be ambiguous. See
4573							L</dates_formats_to_try> for the default list of formats.
4574
4575							Example:
4576
4577							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
4578							dates_formats_to_try_supp => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
4579
4580							=item dates_ignore_trailing_chars
4581
4582							If set to 1, DateTime auto-detection will ignore trailing text that may follow detected
4583							DateTime-like text.
4584
4585							Value by default: 1 (do ignore trailing chars)
4586
4587							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_ignore_trailing_chars => 0);
4588
4589							=item dates_search_time
4590
4591							If set to 1, look for times when detecting DateTime format. That is, whenever a date format
4592							candidate is found, a longer candidate that also contains a time (after the date) is tested.
4593
4594							Value by default: 1 (do look for times when auto-detecting DateTime formats)
4595
4596							Example:
4597
4598							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_search_time => 0);
4599
4600							=item dates_locales
4601
4602							Comma-separated string of locales to test when detecting DateTime formats. Ultimately, Text::AutoCSV
4603							will try all combinations of date formats, times and locales.
4604
4605							Value by default: none (use perl default locale)
4606
4607							Example:
4608
4609							my $csv = Text::AutoCSV->new(in_file => 'logins.csv', dates_locales => 'fr,de,en');
4610
4611							=item dates_zeros_ok
4612
4613							Boolean. If true, a date made only of 0s is regarded as being empty.
4614
4615							For example if C<dates_zeros_ok> is False, then a date like 0000-00-00 will be always incorrect (as
4616							the day and month are out of bounds), therefore a format like '%Y-%m-%d' will never match for the
4617							field.
4618
4619							Conversely if C<dates_zeros_ok> is true, then a date like 0000-00-00 will be processed as if being
4620							the empty string, thus the detection of format will work and when parsed, this "full of zeros" dates
4621							will be processed the same way as the empty string (= value will be undef).
4622
4623							B<IMPORTANT>
4624
4625							"0s dates" are evaluated to undef when parsed, thus when converted back to text (out_map), they are
4626							set to an empty string, not to the original value.
4627
4628							Value by default: 1
4629
4630							Example:
4631
4632							my $csv = Text::AutoCSV->new(in_file => 'in.csv', dates_zeros_ok => 0);
4633
4634							=item out_dates_format
4635
4636							Enforce the format of dates in output, for all fields that contain a DateTime value.
4637
4638							The format is written in Strptime format.
4639
4640							Value by default: none (by default, use format detected on input)
4641
4642							Example:
4643
4644							# Detect any field containing a DateTime value and convert it to yyyy-mm-dd whatever the
4645							# input format is.
4646							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
4647							out_dates_format => '%F')->write();
4648
4649							=item out_dates_locale
4650
4651							Taken into account only if L</out_dates_format> is used.
4652
4653							Sets the locale to apply on L</out_dates_format>.
4654
4655							Value by default: none (by default, use the locale detected on input)
4656
4657							Example:
4658
4659							# Detect any field containing a DateTime value and convert it to a US DateTime whatever the
4660							# input format is.
4661							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv', fields_dates_auto => 1,
4662							out_dates_format => '%b %d, %Y, %I:%M:%S %p', out_dates_locale => 'en')->write();
4663
4664							=item croak_if_error
4665
4666							If true, stops the program execution in case of error.
4667
4668							B<IMPORTANT>
4669
4670							Value by default: 1
4671
4672							If set to zero (C<croak_if_error =E<gt> 0>), errors are displayed as warnings. This printing can
4673							then be affected by setting the L</quiet> attribute.
4674
4675							=item verbose
4676
4677							If true, get Text::AutoCSV to be a bit talkative instead of speaking only when warnings and errors
4678							occur. Verbose output is printed to STDERR by default, this can be tuned with the L</infoh>
4679							attribute.
4680
4681							Value by default: 0
4682
4683							Example:
4684
4685							my $csv = Text::AutoCSV->new(in_file => 'in.csv', verbose => 1);
4686
4687							=item infoh
4688
4689							File handle to display program's verbose output. Has effect mainly with attribute
4690							C<verbose =E<gt> 1>.
4691
4692							Note B<infoh> is used to display extra information in case of error (if a field does not exist,
4693							Text::AutoCSV will display the list of existing fields). If you don't want such output, you can set
4694							C<infoh> to undef.
4695
4696							Value by default: \*STDERR
4697
4698							Example:
4699
4700							open my $infoh, ">", "log.txt";
4701							my $csv = Text::AutoCSV->new(in_file => 'in.csv', infoh => $infoh);
4702
4703							=item quiet
4704
4705							If true, don't display warnings and errors, unless croaking.
4706
4707							If L</croak_if_error> attribute is set (as per default), still, Text::AutoCSV will produce output
4708							(on STDERR) when croaking miserably.
4709
4710							When using C<croak_if_error =E<gt> 0>, errors are processed as warnings and if L</quiet> is set (in
4711							addition to L</croak_if_error> being set to 0), there'll be no output. Note this way of working is
4712							not recommended, as things can go wrong without any notice to the caller.
4713
4714							Example:
4715
4716							my $csv = Text::AutoCSV->new(in_file => 'in.csv', quiet => 1);
4717
4718							=item one_pass
4719
4720							If true, Text::AutoCSV will perform one reading of the input. If other readings are triggered, it'll
4721							raise an error and no reading will be done. Should that be the case (you ask Text::AutoCSV to do
4722							something that'll trigger more than one reading of input), Text::AutoCSV will croak as is always the
4723							case if an error occurs.
4724
4725							Normally Text::AutoCSV will do multiple reads of input to work out certain characteristics of the
4726							CSV: guess of encoding and guess of escape character.
4727
4728							Also if member functions like L</field_add_link>, L</field_add_copy>, L</field_add_computed>,
4729							L</read> or L</write> are called after input has already been read, it'll trigger further reads as
4730							needed.
4731
4732							If one wishes a unique read of the input to occur, one_pass attribute is to be set.
4733
4734							When true, encoding will be assumed to be the first one in the provided list (L</encoding>
4735							attribute), if no encoding attribute is provided, it'll be the first one in the default list, to
4736							date, it is UTF-8.
4737
4738							When true, and if attribute L</escape_char> is not set, escape_char will be assumed to be '\\'
4739							(backslash).
4740
4741							By default, one_pass is set if inh attribute is set (caller provides the input file handle of input)
4742							or if input file is stdin (in_file attribute not set or set to an empty string).
4743
4744							Value by default:
4745
4746							0 if inh attribute is not set and in_file attribute is set to a non empty string
4747							1 if inh attribute is set or in_file is not set or set to an empty string
4748
4749							Example:
4750
4751							my $csv = Text::AutoCSV->new(in_file => 'in.csv', one_pass => 1);
4752
4753							=back
4754
4755							=head2 read
4756
4757							$csv->read();
4758
4759							Read input entirely.
4760
4761							B<Return value>
4762
4763							Returns the object itself in case of success.
4764							Returns undef if error.
4765
4766							Callback functions (when defined) are invoked, in the following order:
4767
4768							L</read_post_update_hr>, intended to do updates on fields values after each record read
4769
4770							L</walker_ar>, called after each record read, with an array ref of fields values
4771
4772							L</walker_hr>, called after each record read, with a hash ref of fields values
4773
4774							Example:
4775
4776							# Do nothing - just check CSV can be read successfully
4777							Text::AutoCSV->new(in_file => 'in.csv')->read();
4778
4779							=head2 read_all_in_mem
4780
4781							$csv->read_all_in_mem();
4782
4783							Created in version 1.1.5. Before, existed only as _read_all_in_mem, meaning, was private.
4784
4785							Read input entirely, as with L</read> function, but enforcing content to be kept in-memory.
4786
4787							Having the content kept in-memory is implied by search functions (L</vlookup> for example). With
4788							C<read_all_in_mem> you can enforce this behavior without doing a fake search.
4789
4790							=head2 reset_next_record_hr
4791
4792							$csv->reset_next_record_hr();
4793
4794							Reset the internal status to start from the beginning with L</get_next_record_hr>. Used in
4795							conjunction with L</get_next_record_hr>.
4796
4797							=head2 get_next_record_hr
4798
4799							my $hr = $csv->get_next_record_hr(\$opt_key);
4800
4801							Get the next record content as a hash ref. C<$hr> is undef when the end of records has been reached.
4802
4803							When specified, C<$opt_key> is set to the current (returned) record key.
4804
4805							B<NOTE>
4806
4807							You do not need to call L</reset_next_record_hr> once before using C<get_next_record_hr>.
4808
4809							Therefore L</reset_next_record_hr> is useful only if you wish to restart from the beginning before
4810							you've reached the end of the records.
4811
4812							B<NOTE bis>
4813
4814							L</walker_hr> allows to execute some code each time a record is read, and it better fits with
4815							Text::AutoCSV philosophy. Using a loop with C<get_next_record_hr> is primarily meant for
4816							Text::AutoCSV internal usage. Also when using this mechanism, you get very close to original
4817							Text::CSV logic, that makes Text::AutoCSV less useful.
4818
4819							B<Return value>
4820
4821							A hashref of the record, or undef once there's no more record to return.
4822
4823							Example:
4824
4825							while (my $hr = $csv->get_next_record_hr()) {
4826							say Dumper($hr);
4827							}
4828
4829							=head2 write
4830
4831							$csv->write();
4832
4833							Write input into output.
4834
4835							B<Return value>
4836
4837							Returns the object itself in case of success.
4838							Returns undef if error.
4839
4840							- If the content is not in-memory at the time write() is called:
4841
4842							Each record is read (with call of L</read_post_update_hr>, L</walker_ar> and L</walker_hr>) and then
4843							written. The read-and-write is done in sequence, each record is written to output before the next
4844							record is read from input.
4845
4846							- If the content is in-memory at the time write() is called:
4847
4848							No L</read> operation is performed, instead, records are directly written to output.
4849
4850							If defined, L</out_filter> is called for each record. If the return value of L</out_filter> is
4851							false, the record is not written.
4852
4853							Example:
4854
4855							# Copy input to output.
4856							# As CSV is parsed in-between, this copy also checks a number of characteristics about the
4857							# input, as opposed to a plain file copy operation.
4858							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')->write();
4859
4860							=head2 out_header
4861
4862							$csv->out_header($field, $header);
4863
4864							Set the header text of C<$field> to C<$header>.
4865
4866							By default, the input header value is rewritten as is to output. C<out_header> allows you to change
4867							it.
4868
4869							B<Return value>
4870
4871							Returns the object itself.
4872
4873							Example:
4874
4875							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')
4876							->out_header('LOGIN', 'Login')
4877							->out_header('FULLNAME', 'Full Name')
4878							->write();
4879
4880							=head2 print_id
4881
4882							$csv->print_id();
4883
4884							Print out a description of input. Write to \*STDERR by default or to L</infoh> attribute if set.
4885
4886							The description consists in a list of a few characteristics (CSV separator and the like) followed by
4887							the list of columns with the details of each.
4888
4889							Example of output:
4890
4891							If you go to the C<utils> directory of this module and execute the following:
4892
4893							./csvcopy.pl -i f1.csv -l "1:,A->B,f2.csv" --id
4894
4895							You will get this output:
4896
4897							-- f1.csv:
4898							sep_char: ,
4899							escape_char: \
4900							in_encoding: UTF-8
4901							is_always_quoted: no
4902
4903							# FIELD HEADER EXT DATA DATETIME FORMAT DATETIME LOCALE
4904							- ----- ------ -------- --------------- ---------------
4905							0 TIMESTAMP timestamp %Y%m%d%H%M%S
4906							1 A a
4907							2 B b
4908							3 C c
4909							4 D d %d/%m/%Y
4910							5 1:SITE 1:SITE link: f2.csv, chain: A->B->* (SITE)
4911							6 1:B 1:B link: f2.csv, chain: A->B->* (B)
4912
4913							=head2 field_add_computed
4914
4915							$csv->field_add_computed($new_field, $subref);
4916
4917							C<$new_field> is the name of the created field.
4918
4919							C<$subref> is a reference to a sub that'll calculate the new field value.
4920
4921							B<Return value>
4922
4923							Returns the object itself in case of success.
4924							Returns undef if error.
4925
4926							Add a field calculated from other fields values. The subref runs like this:
4927
4928							sub func {
4929							# $new_field is the name of the field (allows to use one subref for more than one field
4930							# calculation).
4931							# $hr is a hash ref of fields values.
4932							# $stats is a hash ref that gets printed (if Text::AutoCSV is created with verbose => 1)
4933							# in the end.
4934							my ($new_field, $hr, $stats) = @_;
4935
4936							my $field_value;
4937							# ... compute $field_value
4938
4939							return $field_value;
4940							}
4941
4942							Example:
4943
4944							# Add a field for the full name, made of the concatenation of the
4945							# first name and the last name.
4946							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv', verbose => 1)
4947							->field_add_computed('FULLNAME', \&calc_fn)->write();
4948							sub calc_fn {
4949							my ($new_field, $hr, $stats) = @_;
4950							die "Man, you are in serious trouble!" unless $new_field eq 'FULLNAME';
4951							my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
4952							$stats->{'empty full name'}++ if $fn eq ' ';
4953							return $fn;
4954							}
4955
4956							=head2 field_add_copy
4957
4958							$csv->field_add_copy($new_field, $src_field, $opt_subref);
4959
4960							C<$new_field> if the name of the new field.
4961
4962							C<$src_field> is the name of the field being copied.
4963
4964							C<$opt_subref> is optional. It is a reference to a sub that takes one string (the value of
4965							C<$src_field>) and returns a string (the value assigned to C<$new_field>).
4966
4967							B<Return value>
4968
4969							Returns the object itself in case of success.
4970							Returns undef if error.
4971
4972							C<field_add_copy> is a special case of L</field_add_computed>. The advantage of C<field_add_copy> is
4973							that it relies on a sub that is Text::AutoCSV "unaware", just taking one string as input and
4974							returning another string as output.
4975
4976							B<IMPORTANT>
4977
4978							The current field value is passed to C<field_add_copy> in $_.
4979
4980							A call to
4981
4982							$csv->field_add_copy($new_field, $src_field, $subref);
4983
4984							is equivalent to
4985
4986							$csv->field_add_computed($new_field, \&subref2);
4987							sub subref2 {
4988							my (undef, $hr) = @_;
4989							local $_ = $hr->{$src_field};
4990							return $subref->();
4991							}
4992
4993							Example of a field copy + pass copied field in upper case and surround content with <<>>:
4994
4995							my $csv = Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv');
4996							$csv->field_add_copy('UCLAST', 'LASTNAME', \&myfunc);
4997							$csv->write();
4998							sub myfunc { s/^.*$/<<\U$&>>/; $_; }
4999
5000							Note that the calls can be chained as most member functions return the object itself upon success.
5001							The example above is equivalent to:
5002
5003							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv')
5004							->field_add_copy('UCLAST', 'LASTNAME', \&myfunc)
5005							->write();
5006							sub myfunc { s/^.*$/<<\U$&>>/; $_; }
5007
5008							=head2 field_add_link
5009
5010							$csv->field_add_link($new_field, $chain, $linked_file, \%opts);
5011
5012							C<$new_field> is the name of the new field.
5013
5014							C<$chain> is the CHAIN of the link, that is: 'LOCAL->REMOTE->PICK' where:
5015
5016							C<LOCAL> is the field name to read the value from.
5017
5018							C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file.
5019
5020							C<PICK> is the field from which to read the value of, in the record found by the search. This field
5021							belongs to $linked_file.
5022
5023							If $new_field is undef, the new field name is the name of the third field of $chain (PICK).
5024
5025							C<$linked_file> is the name of the linked file, that gets read in a Text::AutoCSV object created
5026							on-the-fly to do the search on. C<$linked_file> can also be a Text::AutoCSV object that you created
5027							yourself, allowing for more flexibility. Example:
5028
5029							my $lcsv = Text::AutoCSV->new(in_file => 'logins.csv', case => 1);
5030							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', $lcsv);
5031
5032							C<\%opts> is a hash ref of optional attributes. The same values can be provided as with vlookup.
5033
5034							=over 4
5035
5036							=item trim
5037
5038							If set to 1, searches will ignore leading and trailing spaces. That is, a C<LOCAL> value of ' x '
5039							will match with a C<REMOTE> value of 'x'.
5040
5041							If option is not present, use L</search_value_if_not_found> attribute of object (default value: 1).
5042
5043							Example:
5044
5045							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5046							{ trim => 0 });
5047
5048							=item case
5049
5050							If set to 1, searches will take the case into account. That is, a C<LOCAL> value of 'X' will B<not>
5051							match with a C<REMOTE> value of 'x'.
5052
5053							If option is not present, use L</search_case> attribute of object (default value: 0).
5054
5055							Example:
5056
5057							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5058							{ case => 1 });
5059
5060							=item ignore_empty
5061
5062							If set to 1, empty values won't match. That is, a C<LOCAL> value of '' will not match with a
5063							C<REMOTE> value of ''.
5064
5065							If option is not present, use L</search_ignore_empty> attribute of object (default value: 1).
5066
5067							Example:
5068
5069							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5070							{ ignore_empty => 0 });
5071
5072							=item value_if_not_found
5073
5074							If the searched value is not found, the value of the field is undef, that produces an empty string
5075							at write time. Instead, you can specify the value.
5076
5077							If option is not present, use L</search_value_if_not_found> attribute of object (default value:
5078							undef).
5079
5080							Example:
5081
5082							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5083							{ value_if_not_found => '<not found!>' });
5084
5085							=item value_if_found
5086
5087							If the searched value is found, you can specify the value to return.
5088
5089							If option is not present, use L</search_value_if_found> attribute of object (default value: none).
5090
5091							B<NOTE>
5092
5093							Although the C<PICK> field is ignored when using this option, you must specify it any way.
5094
5095							Example:
5096
5097							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5098							{ value_if_not_found => '0', value_if_found => '1' });
5099
5100							=item value_if_ambiguous
5101
5102							If the searched value is found in more than one record, the value of the field is undef, that
5103							produces an empty string at write time. Instead, you can specify the value.
5104
5105							If option is not present, use L</search_value_if_ambiguous> attribute of object (default value:
5106							undef).
5107
5108							Example:
5109
5110							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5111							{ value_if_ambiguous => '<ambiguous!>' });
5112
5113							=item ignore_ambiguous
5114
5115							Boolean value. If ignore_ambiguous is true and the searched value is found in more than one record,
5116							then, silently fall back on returning the value of the first record. Obviously if
5117							C<ignore_ambiguous> is true, then the value of L</value_if_ambiguous> is ignored.
5118
5119							If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1).
5120
5121							Example:
5122
5123							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5124							{ ignore_ambiguous => 1 });
5125
5126							Example with multiple options:
5127
5128							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5129							{ value_if_not_found => '?', ignore_ambiguous => 1 });
5130
5131							=back
5132
5133							B<Return value>
5134
5135							Returns the object itself in case of success.
5136							Returns undef if error.
5137
5138							Example of field_add_link usage:
5139
5140							my $nom_compose = 0;
5141							my $zip_not_found = 0;
5142							Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
5143							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
5144							sub walk {
5145							my $hr = shift;
5146							$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
5147							$zip_not_found++ unless defined($hr->{'MYCITY'});
5148							}
5149							print("Number of persons with a multi-part name: $nom_compose\n");
5150							print("Number of persons with unknown zipcode: $zip_not_found\n");
5151
5152							=head2 links
5153
5154							$csv->links($prefix, $chain, $linked_file, \%opts);
5155
5156							C<$prefix> is the name to add to joined fields
5157
5158							C<$chain> is the JOINCHAIN of the link, that is: 'LOCAL->REMOTE' where:
5159
5160							C<LOCAL> is the field name to read the value from.
5161
5162							C<REMOTE> is the linked field to find the value in. This field belongs to $linked_file.
5163
5164							As opposed to L</field_add_link>, there is no C<PICK> part, as all fields of target are read.
5165
5166							As opposed to Text::AutoCSV habits of croaking whenever a field name is duplicate, here, the
5167							duplicates are resolved by appending _2 to the joined field name if it already exists. If _2 already
5168							exists, too, then _3 is appended instead, and so on, until a non-duplicate is found. This mechanism
5169							is executed given the difficulty to control all field names when joining CSVs.
5170
5171							C<$linked_file> and C<\%opts> work exactly the same way as for L</field_add_link>, see
5172							L</field_add_link> for help.
5173
5174							B<Return value>
5175
5176							Returns the object itself in case of success.
5177							Returns undef if error.
5178
5179							B<NOTE>
5180
5181							This function used to be called C<join> but got renamed to avoid clash with perl' builtin C<join>.
5182
5183							Example:
5184
5185							Text::AutoCSV->new(in_file => 'pers.csv', out_file => 'pers_with_city.csv')
5186							->links('Read from zips.csv:', 'ZIP->ZIPCODE', 'zips.csv')->write();
5187
5188							=head2 get_in_encoding
5189
5190							my $enc = $csv->get_in_encoding();
5191
5192							Return the string of input encoding, for example 'latin2' or 'UTF-8', etc.
5193
5194							=head2 get_in_file_disp
5195
5196							my $f = $csv->get_in_file_disp();
5197
5198							Return the printable name of in_file.
5199
5200							=head2 get_sep_char
5201
5202							my $s = $csv->get_sep_char();
5203
5204							Return the string of the input CSV separator character, for example ',' or ';'.
5205
5206							=head2 get_escape_char
5207
5208							my $e = $csv->get_escape_char();
5209
5210							Return the string of the input escape character, for example '"' or '\\'.
5211
5212							=head2 get_is_always_quoted
5213
5214							my $a = $csv->get_is_always_quoted();
5215
5216							Return 1 if all fields of input are always quoted, 0 otherwise.
5217
5218							=head2 get_coldata
5219
5220							my @cd = get_coldata();
5221
5222							Return an array that describes each column, from the first one (column 0) to the last.
5223
5224							Each element of the array is itself an array ref that contains 5 elements:
5225
5226							0: Name of the field (as accessed in *_hr functions)
5227							1: Content of the field in the header line (if input has a header line)
5228							2: Column content type, shows some meta-data of fields created with field_add_* functions
5229							3: Datetime format detected, if ever, in the format Strptime
5230							4: Locale of DateTime format detected, if ever
5231							5: Multiline field: '1' if not, 'm' if newlines encountered in the field
5232
5233							=head2 get_pass_count
5234
5235							my $n = $csv->get_pass_count();
5236
5237							Return the number of input readings done. Useful only if you're interested in Text::AutoCSV
5238							internals.
5239
5240							=head2 get_in_mem_record_count
5241
5242							my $m = $csv->get_in_mem_record_count();
5243
5244							Return the number of records currently stored in-memory. Useful only if you're interested in
5245							Text::AutoCSV internals.
5246
5247							=head2 get_max_in_mem_record_count
5248
5249							my $mm = $csv->get_max_in_mem_record_count();
5250
5251							Return the maximum number of records ever stored in-memory. Indeed this number can decrease: certain
5252							functions like field_add* member-functions discard in-memory content. Useful only if you're
5253							interested in Text::AutoCSV internals.
5254
5255							=head2 get_fields_names
5256
5257							my @f = $csv->get_fields_names();
5258
5259							Return an array of the internal names of the columns.
5260
5261							=head2 get_field_name
5262
5263							my $name = $csv->get_field_name($n);
5264
5265							Return the C<$n>-th column name, the first column being number 0.
5266
5267							Example:
5268
5269							# Get the field name of the third column
5270							my $col = $csv->get_field_name(2);
5271
5272							=head2 get_stats
5273
5274							my %stats = $csv->get_stats();
5275
5276							Certain callback functions provide a parameter to record event count: L</field_add_computed>,
5277							L</read_post_update_hr>, L</walker_ar> and L</walker_hr>. By default, these stats are displayed if
5278							Text::AutoCSV got created with attribute C<verbose =E<gt> 1>. get_stats() returns the statistics
5279							hash of the object.
5280
5281							B<IMPORTANT>
5282
5283							As opposed to most functions that trigger input reading automatically (search functions and other
5284							get_* functions), C<get_stats> just returns you the stats as it is, regardless of whether some
5285							execution already occured.
5286
5287							=head2 get_nb_rows
5288
5289							my $nb_rows = $csv->get_nb_rows();
5290
5291							Gives the number of rows of the input. Does not trigger any reading - just provides the number of
5292							rows as known at the moment of the call. If unknown, return undef. Typically, the number of rows is
5293							known after doing the initial detection of CSV options (escape character, etc.), or, after doing one
5294							complete reading.
5295
5296							The header line counts for one row.
5297
5298							B<IMPORTANT>
5299
5300							As some fields can contain new lines, this number is not necessarily identical to the number of
5301							lines.
5302
5303							=head2 set_walker_ar
5304
5305							$csv->set_walker_ar($subref);
5306
5307							Normally one wants to define it at object creation time using L</walker_ar> attribute.
5308							C<set_walker_ar> allows to assign the attribute walker_ar after object creation.
5309
5310							See attribute L</walker_ar> for help about the way C<$subref> should work.
5311
5312							B<Return value>
5313
5314							Returns the object itself in case of success.
5315							Returns undef if error.
5316
5317							Example:
5318
5319							# Calculate the total of the two first columns, the first column being money in and the
5320							# second one being money out.
5321							my ($actif, $passif) = (0, 0);
5322							$csv->set_walker_ar(sub { my $ar = $_[0]; $actif += $ar->[0]; $passif += $ar->[1]; })->read();
5323							print("Actif = $actif\n");
5324							print("Passif = $passif\n");
5325
5326							=head2 set_walker_hr
5327
5328							$csv->set_walker_hr($subref);
5329
5330							Normally one wants to define it at object creation time using L</walker_hr> attribute.
5331							C<set_walker_hr> allows to assign the attribute L</walker_hr> after object creation.
5332
5333							See attribute L</walker_hr> for help about the way C<$subref> should work.
5334
5335							B<Return value>
5336
5337							Returns the object itself in case of success.
5338							Returns undef if error.
5339
5340							Example:
5341
5342							my $csv = Text::AutoCSV->new(in_file => 'directory.csv', verbose => 1);
5343
5344							# ...
5345
5346							$csv->set_walker_hr(
5347							sub {
5348							my ($hr, $stat) = @_;
5349							$stat{'not capital name'}++, return if $hr->{'NAME'} ne uc($hr->{'NAME'});
5350							$stat{'name is capital letters'}++;
5351							}
5352							)->read();
5353
5354							=head2 set_out_file
5355
5356							$csv->set_out_file($out_file);
5357
5358							Normally one wants to define it at object creation time using L</out_file> attribute.
5359							C<set_out_file> allows to assign the attribute L</out_file> after object creation. It is set to
5360							C<$out_file> value.
5361
5362							B<Return value>
5363
5364							Returns the object itself in case of success.
5365							Returns undef if error.
5366
5367							Example:
5368
5369							$csv->set_out_file('mycopy.csv')->write();
5370
5371							=head2 get_keys
5372
5373							my @allkeys = $csv->get_keys();
5374
5375							Returns an array of all the record keys of input. A record key is a unique identifier that
5376							designates the record.
5377
5378							At the moment it is just an integer being the record number, the first one (that comes after the
5379							header line) being of number 0. For example if $csv input is made of one header line and 3 records
5380							(that is, a 4-line file typically, if no record contains a line break), $csv->get_keys() returns
5381
5382							(0, 1, 2)
5383
5384							B<IMPORTANT>
5385
5386							If not yet done, this function causes the input to be read entirely and stored in-memory.
5387
5388							=head2 get_hr_all
5389
5390							my @allin = $csv->get_hr_all();
5391
5392							Returns an array of all record contents of the input, each record being a hash ref.
5393
5394							B<IMPORTANT>
5395
5396							If not yet done, this function causes the input to be read entirely and stored in-memory.
5397
5398							=head2 get_row_ar
5399
5400							my $row_ar = $csv->get_row_ar($record_key);
5401
5402							Returns an array ref of the record designated by C<$record_key>.
5403
5404							Example:
5405
5406							# Get content (as array ref) of last record
5407							my @allkeys = $csv->get_keys();
5408							my $lastk = $allkeys[-1];
5409							my $lastrec_ar = $csv->get_row_ar($lastk);
5410
5411							B<IMPORTANT>
5412
5413							If not yet done, this function causes the input to be read entirely and stored in-memory.
5414
5415							=head2 get_row_hr
5416
5417							my $row_hr = $csv->get_row_hr($record_key);
5418
5419							Returns a hash ref of the record designated by C<$record_key>.
5420
5421							Example:
5422
5423							# Get content (as hash ref) of first record
5424							my @allkeys = $csv->get_keys();
5425							my $firstk = $allkeys[0];
5426							my $firstrec_hr = $csv->get_row_hr($firstk);
5427
5428							B<IMPORTANT>
5429
5430							If not yet done, this function causes the input to be read entirely and stored in-memory.
5431
5432							=head2 get_cell
5433
5434							my $val = $csv->get_cell($record_key, $field_name);
5435
5436							Return the value of the cell designated by its record key (C<$record_key>) and field name
5437							(C<$field_name>).
5438
5439							Example:
5440
5441							my @allkeys = $csv->get_keys();
5442							my $midk = $allkeys[int($#allkeys / 2)];
5443							my $midname = $csv->get_cell($midk, 'NAME');
5444
5445							Note the above example is equivalent to:
5446
5447							my @allkeys = $csv->get_keys();
5448							my $midk = $allkeys[int($#allkeys / 2)];
5449							my $midrec_hr = $csv->get_row_hr($midk);
5450							my $midname = $midrec_hr->{'NAME'};
5451
5452							B<IMPORTANT>
5453
5454							If not yet done, this function causes the input to be read entirely and stored in-memory.
5455
5456							=head2 get_values
5457
5458							my @vals = $csv->get_values($field_name, $opt_filter_subref);
5459
5460							Return an array made of the values of the given field name (C<$field_name>), for every records, in
5461							the order of the records.
5462
5463							C<$opt_filter_subref> is an optional subref. If defined, it is called with every values in turn (one
5464							call per value) and only values for which C<$opt_filter_subref> returned True are included in the
5465							returned array. Call to C<$opt_filter_subref> is done with $_ to pass the value.
5466
5467							Example:
5468
5469							my @logins = $csv->get_values('LOGIN");
5470
5471							This is equivalent to:
5472
5473							my @allkeys = $csv->get_keys();
5474							my @logins;
5475							push @logins, $csv->get_cell($_, 'LOGIN') for (@allkeys);
5476
5477							Example bis
5478
5479							# @badlogins is the list of logins that contain non alphanumeric characters
5480							my @badlogins = Text::AutoCSV->new(in_file => 'logins.csv')
5481							->get_values('LOGIN', sub { m/[^a-z0-9]/ });
5482
5483							This is equivalent to:
5484
5485							# @badlogins is the list of logins that contain non alphanumeric characters
5486							# This method leads to carrying all values of a given field across function calls...
5487							my @badlogins = grep { m/[^a-z0-9]/ } (
5488							Text::AutoCSV->new(in_file => 'logins.csv')->get_values('LOGIN')
5489							);
5490
5491							B<IMPORTANT>
5492
5493							If not yet done, this function causes the input to be read entirely and stored in-memory.
5494
5495							=head2 get_recnum
5496
5497							my $r = $csv->get_recnum();
5498
5499							Returns the current record identifier, if a reading is in progress. If no read is in progress,
5500							return undef.
5501
5502							=head2 in_map
5503
5504							=head2 read_update_after
5505
5506							C<read_update_after> is an alias of C<in_map>.
5507
5508							$csv->in_map($field, $subref);
5509
5510							After reading a record from input, update C<$field> by calling C<$subref>. The value is put in
5511							C<$_>. Then the field value is set to the return value of C<$subref>.
5512
5513							This feature is originally meant to manage DateTime fields: the input and output CSVs carry text
5514							content, and in-between, the values dealt with are DateTime objects.
5515
5516							See L</out_map> for an example.
5517
5518							=head2 out_map
5519
5520							=head2 write_update_before
5521
5522							C<write_update_before> is an alias of C<out_map>.
5523
5524							$csv->out_map($field, $subref);
5525
5526							Before writing C<$field> field content into the output file, pass it through C<out_map>. The value
5527							is put in C<$_>. Then the return value of C<$subref> is written in the output.
5528
5529							Example:
5530
5531							Suppose you have a CSV file with the convention that a number surrounded by parenthesis is negative.
5532							You can register corresponding L</in_map> and L</out_map> functions. During the processing of data,
5533							the field content will be just a number (positive or negative), while in input and in output, it'll
5534							follow the "negative under parenthesis" convention.
5535
5536							In the below example, we rely on convention above and add a new field converted from the original
5537							one, that follows the same convention.
5538
5539							sub in_updt {
5540							return 0 if !defined($_) or $_ eq '';
5541							my $i;
5542							return -$i if ($i) = $_ =~ m/^$(.*)$$/;
5543							$_;
5544							}
5545							sub out_updt {
5546							return '' unless defined($_);
5547							return '(' . (-$_) . ')' if $_ < 0;
5548							$_;
5549							}
5550							sub convert {
5551							return ;
5552							}
5553							Text::AutoCSV->new(in_file => 'trans-euros.csv', out_file => 'trans-devises.csv')
5554							->in_map('EUROS', \&in_updt)
5555							->out_map('EUROS', \&out_updt)
5556							->out_map('DEVISE', \&out_updt)
5557							->field_add_copy('DEVISE', 'EUROS', sub { sprintf("%.2f", $_ * 1.141593); } )
5558							->write();
5559
5560							=head2 search
5561
5562							my $found_ar = $csv->search($field_name, $value, \%opts);
5563
5564							Returns an array ref of all records keys where the field C<$field_name> has the value C<$value>.
5565
5566							C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options.
5567
5568							B<IMPORTANT>
5569
5570							An unsuccessful search returns an empty array ref, that is, [ ]. Thus you B<cannot> check for
5571							definedness of C<search> return value to know whether or not the search found something.
5572
5573							On the other hand, you can always examine the value C<search(...)-E<gt>[0]>, as search is always an
5574							array ref. If the search found nothing, then, C<search(...)-E<gt>[0]> is not defined.
5575
5576							B<IMPORTANT bis>
5577
5578							If not yet done, this function causes the input to be read entirely and stored in-memory.
5579
5580							Example:
5581
5582							my $linux_os_keys_ar = $csv->search('OS', 'linux');
5583
5584							=head2 search_1hr
5585
5586							my $found_hr = $csv->search_1hr($field_name, $value, \%opts);
5587
5588							Returns a hash ref of the first record where the field C<$field_name> has the value C<$value>.
5589
5590							C<\%opts> is an optional hash ref of options for the search. See help of L</vlookup> about options.
5591
5592							Note the options L</value_if_not_found> and L</value_if_ambiguous> are ignored. If not found, return
5593							undef. If the result is ambiguous (more than one record found) and ignore_ambiguous is set to a
5594							false value, return undef.
5595
5596							The other options are taken into account as for any search: L</ignore_ambiguous>, L</trim>,
5597							L</case>, L</ignore_empty>.
5598
5599							B<IMPORTANT>
5600
5601							As opposed to L</search>, an unsuccessful C<search_1hr> will return C<undef>.
5602
5603							B<IMPORTANT bis>
5604
5605							If not yet done, this function causes the input to be read entirely and stored in-memory.
5606
5607							Example:
5608
5609							my $hr = $csv->search_1hr('LOGIN', $login);
5610							my $full_name = $hr->{'FIRSTNAME'} . ' ' . $hr->{'LASTNAME'};
5611
5612							=head2 vlookup
5613
5614							my $val = $csv->vlookup($searched_field, $value, $target_field, \%opts);
5615
5616							Find the first record where C<$searched_field> contains C<$value> and out of this record, returns
5617							the value of C<$target_field>.
5618
5619							C<\%opts> is optional. It is a hash of options for C<vlookup>:
5620
5621							=over 4
5622
5623							=item trim
5624
5625							If true, ignore spaces before and after the values to search.
5626
5627							If option is not present, use L</search_trim> attribute of object (default value: 1).
5628
5629							=item case
5630
5631							If true, do case sensitive searches.
5632
5633							If option is not present, use L</search_case> attribute of object (default value: 0).
5634
5635							=item ignore_empty
5636
5637							If true, ignore empty values in the search. The consequence is that you won't be able to find
5638							empty values by searching it.
5639
5640							If option is not present, use L</search_ignore_empty> attribute of object (default value: 1).
5641
5642							=item ignore_accents
5643
5644							If true, ignore accents in searches. For exampe, if C<ignore_accents> is set, a string like
5645							"Ã©lÃ©mentaire" will match "elementaire".
5646
5647							If option is not present, use L</search_ignore_accents> attribute of object (default value: 1).
5648
5649							B<NOTE>
5650
5651							This option uses the function L</remove_accents> to build its internal hash tables. See
5652							L</remove_accents> help for more details.
5653
5654							=item value_if_not_found
5655
5656							Return value if vlookup finds nothing.
5657
5658							If option is not present, use L</search_value_if_not_found> attribute of object (default value:
5659							undef).
5660
5661							=item value_if_found
5662
5663							Return value if vlookup finds something.
5664
5665							If option is not present, use L</search_value_if_found> attribute of object (default value: none).
5666
5667							This option is to just check whether a value exists, regardless of the target value found.
5668
5669							B<NOTE>
5670
5671							Although the B<$target_field> is ignored when using this option, you must specify it any way.
5672
5673							=item value_if_ambiguous
5674
5675							Return value if vlookup find more than one result. Tune it only if ignore_ambiguous is unset.
5676
5677							If option is not present, use L</search_value_if_ambiguous> attribute of object (default value:
5678							undef).
5679
5680							=item ignore_ambiguous
5681
5682							If true, then if more than one result is found, silently return the first one.
5683
5684							If option is not present, use L</search_ignore_ambiguous> attribute of object (default value: 1).
5685
5686							=back
5687
5688							B<IMPORTANT>
5689
5690							If not yet done, this function causes the input to be read entirely and stored in-memory.
5691
5692							Example:
5693
5694							my $name = $csv->vlookup('LOGIN', $id, 'NAME', { value_if_not_found => '<login not found>' });
5695
5696							=head2 remove_accents
5697
5698							my $t = $csv->remove_accents($s);
5699
5700							Take the string C<$s> as argument and return the string without accents. Uses a Unicode
5701							decomposition followed by removal of every characters that have the Unicode property
5702							C<Nonspacing_Mark>.
5703
5704							B<NOTE>
5705
5706							Only accents are removed. It is not a C<whatever-encoding -E<gt> us-ascii> conversion. For example,
5707							the French B<Å“> character (o followed by e) or the German B<ÃŸ> (eszett) are kept as is.
5708
5709							B<NOTE bis>
5710
5711							Tested with some latin1 and latin2 characters.
5712
5713							B<NOTE ter>
5714
5715							There is no language-level transformation during accents removal. For example B<JÃ¼rgen> is returned
5716							as B<Jurgen>, not B<Juergen>.
5717
5718							This function is not exported by default.
5719
5720							Example:
5721
5722							use Text::AutoCSV qw(remove_accents);
5723							my $s = remove_accents("FranÃ§ais: Ãªtre Ã©lÃ©mentaire, TchÃ¨que: sluÅ¾ba dÅ¯m");
5724							die "This script will never die" if $s ne 'Francais: etre elementaire, Tcheque: sluzba dum';
5725
5726							=head1 AUTHOR
5727
5728							SÃ©bastien Millet <milletseb@laposte.net>
5729
5730							=head1 COPYRIGHT AND LICENSE
5731
5732							This software is copyright (c) 2016, 2017 by SÃ©bastien Millet.
5733
5734							This is free software; you can redistribute it and/or modify it under
5735							the same terms as the Perl 5 programming language system itself.
5736
5737							=cut