File Coverage

blib/lib/Text/AutoCSV.pm

Criterion	Covered	Total	%
statement	1500	1761	85.1
branch	690	966	71.4
condition	188	291	64.6
subroutine	112	118	94.9
pod	43	43	100.0
total	2533	3179	79.6

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl
2							# ABSTRACT: helper module to automate the use of Text::CSV
3
4							# vim:tw=80 :et
5
6							# Text/AutoCSV.pm
7
8							#
9							# Written by SÃ©bastien Millet
10							# March, July, August, September 2016
11							# January - August 2017
12							#
13
14							package Text::AutoCSV;
15							$Text::AutoCSV::VERSION = '1.2.0';
16	20			20		972116	use strict;
	20					51
	20					556
17	17			17		102	use warnings;
	17					43
	17					1175
18
19							my $PKG = "Text::AutoCSV";
20
21							require Exporter;
22							our @ISA = 'Exporter';
23							our @EXPORT_OK = qw(remove_accents);
24
25	17			17		109	use Carp;
	17					39
	17					1118
26	17			17		6036	use Params::Validate qw(validate validate_pos :types);
	17					123272
	17					3646
27	17			17		5779	use List::MoreUtils qw(first_index indexes);
	17					118583
	17					167
28	17			17		11395	use Fcntl qw(SEEK_SET);
	17					36
	17					812
29	17			17		4997	use File::BOM;
	17					427647
	17					956
30	17			17		8115	use Text::CSV;
	17					243285
	17					775
31	17			17		9732	use DateTime;
	17					6851560
	17					927
32
33							# DateTime::Format::Strptime 1.70 does not work properly with us.
34							# Actually all version as of 1.63 are fine, except 1.70.
35	17			17		8007	use DateTime::Format::Strptime 1.71;
	17					933433
	17					169
36	17			17		8184	use Class::Struct;
	17					26792
	17					100
37	17			17		7963	use Unicode::Normalize;
	17					27240
	17					1060
38
39							# lock_keys is used to prevent accessing non existing keys
40							# Credits: 3381159 on http://stackoverflow.com
41							# "make perl shout when trying to access undefined hash key"
42	17			17		5522	use Hash::Util qw(lock_keys);
	17					35279
	17					100
43	17			17		7461	use Memoize;
	17					32077
	17					14334
44
45							# FIXME
46							# Not needed in release -> should be always commented unless at dev time
47							#use feature qw(say);
48							#use Data::Dumper;
49							#$Data::Dumper::Sortkeys = 1;
50
51							# Set to 1 if you wish to turn on debug without touching caller's code
52							our $ALWAYS_DEBUG = 0;
53
54							# Keep it set to 0 unless you know what you're doing!
55							# Note
56							# Taken into account only if debug is set.
57							my $DEBUG_DATETIME_FORMATS = 0;
58
59							# The below is taken into account only if $DEBUG_DATETIME_FORMATS is set.
60							# It becomes really MASSIVE debug output.
61							my $DEBUG_DATETIME_FORMATS_EVEN_MORE = 0;
62
63							#
64							# Uncomment to replace carp and croak with cluck and confess, respectively
65							# Also reachable with perl option:
66							# -MCarp=verbose
67							# See 'perldoc Carp'.
68							#
69							#$Carp::Verbose = 1;
70
71							# * *************** *
72							# * BEHAVIOR TUNING *
73							# * *************** *
74
75							# * **************************************************** *
76							# * ALL THE VARIABLES BELOW ARE RATHER LOW LEVEL. *
77							# * IF YOU UPDATE IT, IT WILL LIKELY BREAK THE TEST PLAN *
78							# * **************************************************** *
79
80							my $DEF_SEARCH_CASE = 0; # Case insensitive search by default
81							my $DEF_SEARCH_TRIM = 1; # Trim values by default
82							my $DEF_SEARCH_IGNORE_ACCENTS = 1; # Ignore accents
83							my $DEF_SEARCH_IGNORE_EMPTY = 1; # Ignore empty strings in searches by default
84							my $DEF_SEARCH_VALUE_IF_NOT_FOUND = undef; # If not found, returned field value is undef
85							my $DEF_SEARCH_VALUE_IF_AMBIGUOUS = undef; # If more than one record found by search (when a
86							# unique value is expected), return undef
87							my $DEF_SEARCH_IGNORE_AMBIGUOUS = 1; # By default, ignore the fact that multiple records are
88							# found by search and return the first record found
89
90							my $DETECT_ENCODING = 1;
91
92							my $DEFAULT_IN_ENCODING = 'UTF-8,latin1';
93
94							# By default, input encoding detected is used for output.
95							# -> the constant below is used if and only if:
96							# Inbound encoding is unknown
97							# No providing of out_encoding attribute (out_encoding takes precedence when
98							# provided)
99							my $DEFAULT_OUT_ENCODING = 'UTF-8';
100
101							my $DEFAULT_ESCAPE_CHAR = '\\';
102							my $DEFAULT_QUOTE_CHAR = '"';
103
104							#
105							# The code that workarounds $FIX_PERLMONKS_823214 (see below) makes sense only
106							# under plain Windows.
107							#
108							# "Plain" Windows?
109							# This code MUST NOT be executed under cygwin because cygwin uses unix line
110							# breaks. This is why we detect /mswin/. Would we detect /win/, we'd catch
111							# cygwin, too, and we don't want that.
112							#
113							my $OS_IS_PLAIN_WINDOWS = !!( $^O =~ /mswin/i );
114
115							#
116							# Shall we fix the issue reported as #823214 in PerlMonks? See
117							# http://www.perlmonks.org/?node_id=823214
118							#
119							# In brief (in case the link above would be broken one day):
120							# Under Windows, output mode set to UTF-16LE produces line breaks made of
121							# octets "0d 0a 00", whereas it should be "0d 00 0a 00".
122							#
123							# The code also fixes UTF-16BE (but it was not tested).
124							#
125							my $FIX_PERLMONKS_823214 = 1;
126
127							# * **** *
128							# * CODE *
129							# * **** *
130
131							sub ERR_UNKNOWN_FIELD() { 0 }
132
133							# Store meta-data about each column
134							struct ColData => {
135							field_name => '$',
136							header_text => '$',
137							description => '$',
138							dt_format => '$',
139							dt_locale => '$',
140							multiline => '$'
141							};
142
143							#
144							# Enumeration of ef_type member below
145							# Alternative:
146							# use enum (...)
147							#
148							# But it is not also by default on my distro and installing a package for 3
149							# constants, I find it a bit overkill!
150							#
151							my ( $EF_LINK, $EF_FUNC, $EF_COPY ) = 0 .. 2;
152							struct ExtraField => {
153							ef_type => '$',
154							self_name => '$',
155							description => '$',
156
157							check_field_existence => '$',
158
159							# For when ef_type is set to $EF_LINK
160
161							link_self_search => '$',
162							link_remote_obj => '$',
163							link_remote_search => '$',
164							link_remote_read => '$',
165							link_vlookup_opts => '%',
166
167							# For when ef_type is set to $EF_FUNC
168
169							func_sub => '$',
170
171							# For when ef_type is set to $EF_COPY
172
173							copy_source => '$',
174							copy_sub => '$'
175
176							};
177
178							my $SEARCH_VALIDATE_OPTIONS = {
179							value_if_not_found => { type => UNDEF \| SCALAR, optional => 1 },
180							value_if_found => { type => UNDEF \| SCALAR, optional => 1 },
181							value_if_ambiguous => { type => UNDEF \| SCALAR, optional => 1 },
182							ignore_ambiguous => { type => BOOLEAN, optional => 1 },
183							case => { type => BOOLEAN, optional => 1 },
184							trim => { type => BOOLEAN, optional => 1 },
185							ignore_empty => { type => BOOLEAN, optional => 1 },
186							ignore_accents => { type => BOOLEAN, optional => 1 }
187							};
188
189							sub _is_utf8 {
190	424			424		758	my $e = shift;
191
192	424	100				2836	return 1 if $e =~ m/^(utf-?8\|ucs-?8)/i;
193	32					67	return 0;
194							}
195
196							# To replace // in old perls: return the first non-undef value in provided list
197							sub _get_def {
198	12079			12079		21704	for (@_) {
199	18709	100				44202	return $_ if defined($_);
200							}
201	895					1739	return;
202							}
203
204							sub _print {
205	15			15		19	my $self = shift;
206	15					20	my $t = shift;
207
208	15					18	my $infoh = $self->{infoh};
209	15	50				31	return if ref $infoh ne 'GLOB';
210
211	0					0	print( $infoh $t );
212							}
213
214							sub _printf {
215	17			17		22	my $self = shift;
216
217	17					23	my $infoh = $self->{infoh};
218	17	50				45	return if ref $infoh ne 'GLOB';
219
220	0					0	printf( $infoh @_ );
221							}
222
223							sub _print_warning {
224	67			67		104	my $self = shift;
225	67					109	my $warning_message = shift;
226	67					94	my $dont_wrap = shift;
227
228	67	100				168	my $msg =
229							( $dont_wrap ? $warning_message : "$PKG: warning: $warning_message" );
230	67	100				4487	carp $msg unless $self->{quiet};
231							}
232
233							sub _close_inh {
234	316			316		516	my $self = shift;
235
236	316	100				3003	close $self->{_inh} if $self->{_close_inh_when_finished};
237	316					1194	$self->{_inh} = undef;
238	316					577	$self->{_close_inh_when_finished} = undef;
239							}
240
241							sub _close_outh {
242	127			127		193	my $self = shift;
243
244							close $self->{outh}
245							if defined( $self->{outh} )
246	127	50	66			4759	and $self->{_close_outh_when_finished};
247	127					359	$self->{outh} = undef;
248	127					246	$self->{_close_outh_when_finished} = undef;
249							}
250
251							sub _print_error {
252	81			81		340	my ( $self, $error_message, $dont_stop, $err_code, $err_extra ) = @_;
253
254	81					196	my $msg = "$PKG: error: $error_message";
255
256	81	100	100			263	if ( defined($err_code)
			100
257							and ( !$self->{quiet} )
258							and $self->{croak_if_error} )
259							{
260	5	50				15	if ( $err_code == ERR_UNKNOWN_FIELD ) {
261	5					9	my %f = %{$err_extra};
	5					23
262	5					10	my @cols;
263	5					16	for my $n ( keys %f ) {
264	15					31	$cols[ $f{$n} ] = $n;
265							}
266	5					17	$self->_print( $self->get_in_file_disp()
267							. " column - field name correspondance:\n" );
268	5					14	$self->_print("COL # FIELD\n");
269	5					13	$self->_print("----- -----\n");
270	5					13	for my $i ( 0 .. $#cols ) {
271	17	100				48	$self->_printf( "%05d %s\n", $i,
272							( defined( $cols[$i] ) ? $cols[$i] : '' ) );
273							}
274							}
275							else {
276	0					0	confess "Unknown error code: '$err_code'\n";
277							}
278							}
279
280	81	100	100			289	if ( $self->{croak_if_error} and ( !$dont_stop ) ) {
281	30					98	$self->_close_read(1);
282	30					92	$self->_close_inh();
283	30					87	$self->_close_outh();
284	30					113	$self->_status_reset(1);
285	30					4480	croak $msg;
286							}
287	51					129	$self->_print_warning( $msg, 1 );
288							}
289
290							#
291							# Return the string passed in argument with all accents removed from characters.
292							# Do it in a rather general and reliable way, not tied to latin1.
293							# Tested on latin1 and latin2 character sets.
294							#
295							# Credits:
296							# http://stackoverflow.com/questions/17561839/remove-accents-from-accented-characters
297							#
298							sub remove_accents {
299	3163			3163	1	29098	validate_pos( @_, { type => SCALAR } );
300
301	3163					9118	my $s = $_[0];
302	3163					12216	my $r = NFKD($s);
303	3163					7912	$r =~ s/\p{Nonspacing_Mark}//g;
304	3163					7972	return $r;
305							}
306
307							sub _detect_csv_sep {
308	262			262		452	my $ST_OUTSIDE = 0;
309	262					423	my $ST_INSIDE = 1;
310
311	262					634	my ( $self, $escape_char, $quote_char, $sep ) = @_;
312
313	262					465	my $_debugh = $self->{_debugh};
314	262					391	my $inh = $self->{_inh};
315	262					434	my $_debug = $self->{_debug};
316
317	262					409	delete $self->{_inh_header};
318
319	262	100				647	$escape_char = $DEFAULT_ESCAPE_CHAR unless defined($escape_char);
320
321	262	50				636	do {
322	0					0	$self->_print_error(
323							"illegal \$escape_char: '$escape_char' (length >= 2)");
324	0					0	return 0;
325							} if length($escape_char) >= 2;
326
327	262	50				569	do {
328	0					0	$self->_print_error(
329							"$PKG: error: illegal \$quote_char '$quote_char' (length >= 2)");
330	0					0	return 0;
331							} if length($quote_char) >= 2;
332
333	262	50				575	$escape_char = '--' if $escape_char eq '';
334	262	50				524	$quote_char = '--' if $quote_char eq '';
335
336							# FIXME (?)
337							# Avoid inlined magic values for separator auto-detection.
338							# Issue is, as you can see below, the behavior is also hard-coded and not
339							# straightforward to render 'tunable' ("," and ";" take precedence over "\t").
340	262					877	my %Seps = (
341							";" => 0,
342							"," => 0,
343							"\t" => 0
344							);
345
346	262					2958	my $h = <$inh>;
347	262	50				2577	if ( $self->{inh_is_stdin} ) {
348	0					0	$self->{_inh_header} = $h;
349	0	0				0	print( $_debugh "Input is STDIN => saving header line to re-read it "
350							. "later (in-memory)\n" )
351							if $_debug;
352							}
353							else {
354	262					1760	seek $inh, 0, SEEK_SET;
355	262	50				755	print( $_debugh "Input is not STDIN => using seek function to rewind "
356							. "read head after header line reading\n" )
357							if $_debug;
358							}
359
360	262					593	chomp $h;
361	262					409	my $status = $ST_OUTSIDE;
362	262					600	my $l = length($h);
363	262					406	my $c = 0;
364	262					636	while ( $c < $l ) {
365	4808					6459	my $ch = substr( $h, $c, 1 );
366	4808					5733	my $chnext = '';
367	4808	100				8310	$chnext = substr( $h, $c + 1, 1 ) if ( $c < $l - 1 );
368	4808	100				8025	if ( $status == $ST_INSIDE ) {
		50
369	1521	50	66			2962	if ( $ch eq $escape_char and $chnext eq $quote_char ) {
		100
370	0					0	$c += 2;
371							}
372							elsif ( $ch eq $quote_char ) {
373	197					230	$status = $ST_OUTSIDE;
374	197					331	$c++;
375							}
376							else {
377	1324					2043	$c++;
378							}
379							}
380							elsif ( $status == $ST_OUTSIDE ) {
381	3287	50	33			7523	if (
		100	66
		100
382							$ch eq $escape_char
383							and ( $chnext eq $quote_char
384							or exists $Seps{$chnext} )
385							)
386							{
387	0					0	$c += 2;
388							}
389							elsif ( exists $Seps{$ch} ) {
390	946					1181	$Seps{$ch}++;
391	946					1485	$c++;
392							}
393							elsif ( $ch eq $quote_char ) {
394	197					538	$status = $ST_INSIDE;
395	197					318	$c++;
396							}
397							else {
398	2144					3484	$c++;
399							}
400							}
401							}
402
403	262	100	100			1383	if ( $Seps{";"} == 0 and $Seps{","} >= 1 ) {
		100	66
		50	33
			33
404	147					277	$$sep = ",";
405	147					620	return 1;
406							}
407							elsif ( $Seps{","} == 0 and $Seps{";"} >= 1 ) {
408	106					222	$$sep = ";";
409	106					492	return 1;
410							}
411							elsif ( $Seps{","} == 0 and $Seps{";"} == 0 and $Seps{"\t"} >= 1 ) {
412	0					0	$$sep = "\t";
413	0					0	return 1;
414							}
415							else {
416
417							# Check the case where there is one unique column, in which case,
418							# assume comma separator.
419	9					23	my $h_no_accnt = remove_accents($h);
420	9	100				58	if ( $h_no_accnt =~ m/^[[:alnum:]_]+$/i ) {
421	3					7	$$sep = ",";
422	3					13	return 1;
423							}
424
425	6					11	$$sep = "";
426	6	50				12	if ($_debug) {
427	0					0	for my $k ( keys %Seps ) {
428	0					0	print( $_debugh "\$Seps{'$k'} = $Seps{$k}\n" );
429							}
430							}
431	6					23	return 0;
432							}
433							}
434
435							sub _reopen_input {
436	652			652		948	my $self = shift;
437
438	652					985	my $in_file = $self->{in_file};
439
440	652					805	my $inh;
441	652	50				12642	if ( !open( $inh, "<", $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
442	0					0	$self->_print_error("unable to open file '$in_file': $!");
443	0					0	return;
444							}
445	652	50				1833	if ( !$self->{_leave_encoding_alone} ) {
446
447							confess "Oups! _inh_encoding_string undef?"
448	652	50				1310	unless defined( $self->{_inh_encoding_string} );
449
450	652					4100	binmode $inh, $self->{_inh_encoding_string};
451							}
452
453	652					29058	return $inh;
454							}
455
456							# Abstraction layer, not useful Today, could bring added value when looking into
457							# Text::CSV I/O
458							sub _mygetline {
459	8399			8399		14351	my ( $csvobj, $fh ) = @_;
460
461	8399					172425	return $csvobj->getline($fh);
462							}
463
464							sub _detect_meta {
465	456			456		978	my ( $self, $quote_char, $sep_char ) = @_;
466
467	456					765	my $in_file = $self->{in_file};
468	456					682	my $_debug = $self->{_debug};
469	456					702	my $_debugh = $self->{_debugh};
470
471	456	100				955	return if $self->{_int_one_pass};
472	436	100				986	return if $self->{_detect_meta_done}; # Sans jeu de mot...
473
474	300	100				682	if ( !defined( $self->{escape_char} ) ) {
475	298					727	$self->_register_pass("detect escape character");
476
477	298					459	my $flag = 0;
478	298					642	my $inh = $self->_reopen_input();
479	298	50				756	if ( defined($inh) ) {
480	298					3897	while ( my $l = <$inh> ) {
481	3899					10878	chomp $l;
482
483							# Very heuristic criteria...
484							# Tant pis.
485							# $flag = 1 if $l =~ m/(?<!$sep_char)$quote_char$quote_char(?!$sep_char)/;
486							# $flag = 1 if $l =~ m/(\\$quote_char\|\\\\)/;
487	3899	100				14272	$flag = 1 if $l =~ m/(\\$quote_char)/;
488
489							}
490	298					2040	close $inh;
491							}
492	298	100				1085	$self->{escape_char} = ( $flag ? '\\' : '"' );
493	298	50				1006	print( $_debugh " detected escape_char: '$self->{escape_char}'\n" )
494							if $_debug;
495							}
496
497	300	50				647	print( $_debugh " using escape_char: '$self->{escape_char}' "
498							. "to further examine input (is_always_quoted, multiline)\n" )
499							if $_debug;
500
501	300					437	my $is_always_quoted = 0;
502	300					724	my $inh = $self->_reopen_input();
503	300					501	my @multiline;
504	300	50				677	if ( defined($inh) ) {
505	300					758	$self->_register_pass("detect is_always_quoted and multiline");
506							my $csv = Text::CSV->new(
507							{
508							sep_char => $sep_char,
509							allow_whitespace => 1,
510							binary => 1,
511							auto_diag => 0,
512							quote_char => $quote_char,
513							escape_char => $self->{escape_char},
514	300					3169	keep_meta_info => 1,
515							allow_loose_escapes => 1
516							}
517							);
518	300					55576	my $nb_rows = 0;
519	300					494	$is_always_quoted = 1;
520	300					686	while ( my $ar = _mygetline( $csv, $inh ) ) {
521	3897					110314	$nb_rows++;
522
523	3897					4637	my @aa = @{$ar};
	3897					9471
524	3897					5790	my $e = $#aa;
525	3897					6876	for my $i ( 0 .. $e ) {
526	28577	100				47046	$is_always_quoted = 0 unless $csv->is_quoted($i);
527	28577	100				225458	$multiline[$i] = 1 if $aa[$i] =~ m/\n/;
528							}
529							}
530	300					8749	$self->{_nb_rows} = $nb_rows;
531	300					3349	close $inh;
532							}
533	300					1033	$self->{_multiline} = [@multiline];
534
535	300	50				713	print( $_debugh " is_always_quoted: $is_always_quoted\n" ) if $_debug;
536	300					630	$self->{_is_always_quoted} = $is_always_quoted;
537
538	300					1357	$self->{_detect_meta_done} = 1;
539							}
540
541							sub _register_pass {
542	1269			1269		2350	my ( $self, $pass_name ) = @_;
543	1269					1967	my $_debug = $self->{_debug};
544	1269					2351	my $_debugh = $self->{_debugh};
545
546	1269					2079	$self->{_pass_count}++;
547
548	1269	50				3308	return unless $_debug;
549
550	0					0	print( $_debugh "Pass #" . $self->{_pass_count} . " ($pass_name) done\n" );
551							}
552
553							sub _update_in_mem_record_count {
554	510			510		1113	my ( $self, $nonexistent_arg ) = @_;
555	510					842	my $_debug = $self->{_debug};
556	510					811	my $_debugh = $self->{_debugh};
557
558	510	50				1116	confess "Hey! what is this second argument?" if defined($nonexistent_arg);
559
560	510					711	my $new_count = $#{ $self->{_flat} } + 1;
	510					1096
561
562	510					787	my $updated_max = 0;
563	510	100				1334	if ( $new_count > $self->get_max_in_mem_record_count() ) {
564	155					450	$self->_set_max_in_mem_record_count($new_count);
565	155					252	$updated_max = 1;
566							}
567
568	510					1276	$self->{_in_mem_record_count} = $new_count;
569	510	50				1232	if ($_debug) {
570	0					0	print( $_debugh "_in_mem_record_count updated, set to $new_count" );
571	0	0				0	print( $_debugh " (also updated max)" ) if $updated_max;
572	0					0	print( $_debugh "\n" );
573							}
574							}
575
576							sub _detect_inh_encoding {
577	320			320		1262	my ( $self, $enc, $via, $in_file, $detect_enc ) = @_;
578	320					685	my $_debug = $self->{_debug};
579	320					538	my $_debugh = $self->{_debugh};
580
581	320	100	66			1074	$enc = $DEFAULT_IN_ENCODING if ( !defined $enc ) or $enc eq '';
582
583	320					1995	my @encodings = split( /\s,\s/, $enc );
584
585	320	50				916	confess "Oups! No encoding to try?" if $#encodings < 0;
586
587	320	50				716	print( $_debugh "[ST] _detect_inh_encoding(): start\n" ) if $_debug;
588
589	320					494	my $wrn = 0;
590	320					724	my $m;
591							my $m0;
592	320					0	my $ee;
593	320					626	for my $e (@encodings) {
594	332					516	$ee = $e;
595	332					696	my $viadef = _get_def( $via, '' );
596	332					931	$m = ":encoding($e)$viadef";
597	332	100				774	$m0 = $m unless defined($m0);
598
599	332	100				705	last unless $detect_enc;
600
601	320	50	33			1252	confess "Oups! in_file not defined?"
602							if ( !defined $in_file )
603							or $in_file eq '';
604
605	320	50				621	print( $_debugh " Checking encoding '$e' / '$m'\n" ) if $_debug;
606	320					498	$wrn = 0;
607
608	320					1093	$self->_register_pass("check $e encoding");
609
610	320					493	my $utf8_bom = 0;
611	320	100				695	if ( _is_utf8($e) ) {
612	296	50				6370	if ( open my $fh, '<:raw', $in_file ) {
613	296					588	my $bom;
614	296					4148	read $fh, $bom, 3;
615	296	100	66			1476	if ( length($bom) == 3 and $bom eq "\xef\xbb\xbf" ) {
616	12	100				29	if ( !defined($via) ) {
617	10					25	$m .= ":via(File::BOM)";
618							}
619							}
620	296					2227	close $fh;
621							}
622							}
623
624	320					602	my $inh;
625	320	50				4609	if ( !open( $inh, "<", $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
626	0					0	$self->_print_error("unable to open file '$in_file': $!");
627	0					0	return ( $encodings[0], $m0 );
628							}
629	16			16		109	binmode $inh, $m;
	16					28
	16					106
	320					3265
630
631							# TURN OFF WARNINGS OUTPUT
632
633							{
634	320					35455	local $SIG{__WARN__} = sub {
635	51			51		545	$wrn++;
636
637							# Uncomment only for debug!
638							# Otherwise you'll get quite a good deal of output at each execution :-)
639							# print(STDERR @_);
640	320					2114	};
641	320					4069	while (<$inh>) { }
642							}
643
644							# WARNINGS ARE BACK ON
645
646	320					11980	close $inh;
647	320	50				949	print( $_debugh " '$m' counts $wrn warning(s)\n" ) if $_debug;
648
649	320	100				1274	last if $wrn == 0;
650							}
651
652	320	50				789	if ( $wrn >= 1 ) {
653	0					0	$self->_print_warning(
654							"encoding warnings encountered during initial check, "
655							. "using '$encodings[0]'" );
656	0					0	return ( $encodings[0], $m0 );
657							}
658
659	320	50				689	confess "Oups! undef encoding string?" unless defined($m);
660
661	320	50				622	print( $_debugh " Detected encoding string '$ee' / '$m'\n" ) if $_debug;
662	320					1330	return ( $ee, $m );
663							}
664
665							#
666							# Each of these functions brings status to the next value (current status + 1).
667							# Each of these functions returns 0 if an error occured, 1 if all good
668							#
669							my @status_forward_functions = (
670							"_S1_init_input", # To go from S0 to S1
671							"_S2_init_fields_from_header", # To go form S1 to S2
672							"_S3_init_fields_extra", # To go from S2 to S3
673							"_S4_read_all_in_mem", # To go from S3 to S4
674							);
675
676							sub _status_reset {
677	494			494		822	my $self = shift;
678
679	494					3173	validate_pos( @_, { type => SCALAR, optional => 1 } );
680	494					1704	my $called_from_print_error = _get_def( $_[0], 0 );
681
682	494	100	100			1745	if ( defined( $self->{_status} ) and $self->{_status} == 4 ) {
683	18	100				43	unless ($called_from_print_error) {
684	16					28	my $msg =
685							"in-memory CSV content discarded, will have to re-read input";
686	16					52	$self->_print_warning($msg);
687							}
688	18					176	$self->{_flat} = [];
689	18					54	$self->_update_in_mem_record_count();
690							}
691
692	494					948	$self->{_status} = 0;
693	494	100				1032	return 0 if $called_from_print_error;
694	464					981	return $self->_status_forward('S1');
695							}
696
697							sub _status_forward {
698	4085			4085		6172	my $self = shift;
699
700	4085					8393	return $self->___status_move( @_, 1 );
701							}
702
703							sub _status_backward {
704	139			139		209	my $self = shift;
705
706	139					319	return $self->___status_move( @_, -1 );
707							}
708
709							# You should not call ___status_move() in the code, that is why the name is
710							# prefixed with 3 underscores! Only _status_forward and _status_backward should
711							# call it.
712							sub ___status_move {
713	4224			4224		8425	my ( $self, $target, $step ) = @_;
714
715	4224					7258	my $_debug = $self->{_debug};
716	4224					6124	my $_debugh = $self->{_debugh};
717
718	4224	50	66			18528	if ( ( !defined $step ) or ( $step != -1 and $step != 1 ) ) {
			33
719	0					0	confess "Oups! \$step has a wrong value: '$step'";
720							}
721
722	4224					5740	my $n;
723	4224	50				18668	confess "Oups! illegal status string: '$target'"
724							unless ($n) = $target =~ m/^S(\d)$/;
725
726	4224	100				9727	if ( $self->{_read_in_progress} ) {
727	1					4	$self->_print_error(
728							"illegal call while read is in progress, "
729							. "would lead to infinite recursion",
730							0
731							);
732	0					0	confess "Aborted.";
733							}
734
735	4223	100				7936	if ( $step == -1 ) {
736	139	100				406	if ( $n < $self->{_status} ) {
737	19	100				52	if ( $self->{_status} == 4 ) {
738	16	50				42	print( $_debugh
739							"[ST] Requested status $n but will go to status 0\n" )
740							if $_debug;
741	16					63	return $self->_status_reset();
742							}
743	3					6	$self->{_status} = $n;
744	3	50				6	print( $_debugh "[ST] New status: " . $self->{_status} . "\n" )
745							if $_debug;
746							}
747	123					352	return 1;
748							}
749
750	4084	100				9168	if ( $self->{_status} < $n ) {
751	869	50				1658	print( $_debugh "[ST] Current status: " . $self->{_status} . "\n" )
752							if $_debug;
753							}
754
755	4084	100	100			10716	if ( $self->{_status} <= 1
			100
			100
756							and $n >= 2
757							and $self->{_int_one_pass}
758							and $self->get_pass_count() >= 1 )
759							{
760	12					24	my $msg = "one_pass set, unable to read input again";
761							do {
762	12					44	$self->_print_error($msg);
763	5					77	return 0;
764	12	50				32	} if $self->{one_pass};
765	0	0				0	$self->_print_warning($msg) if !$self->{one_pass};
766							}
767
768	4072					8248	while ( $self->{_status} < $n ) {
769
770	1347					2522	my $funcname = $status_forward_functions[ $self->{_status} ];
771	1347	50				2437	confess "Oups! Unknown status?" unless defined($funcname);
772
773	1347	50				2577	print( $_debugh "[ST] Now executing $funcname\n" ) if $_debug;
774
775	1347	50				5511	if ( my $member_function = $self->can($funcname) ) {
776	1347	100				3089	return 0 unless $self->$member_function();
777							}
778							else {
779	0					0	confess "Could not find method $funcname in $PKG!";
780							}
781
782	1328					2519	$self->{_status} += $step;
783	1328	50				3999	print( $_debugh "[ST] New status: " . $self->{_status} . "\n" )
784							if $_debug;
785							}
786
787	4053					10418	return 1;
788							}
789
790							sub new {
791	333			333	1	247908	my ( $class, @args ) = @_;
792
793	333					37965	@args = validate(
794							@args,
795							{
796							in_file => { type => SCALAR, optional => 1 },
797							infoh =>
798							{ type => UNDEF \| GLOBREF, default => \*STDERR, optional => 1 },
799							verbose => { type => BOOLEAN, default => 0, optional => 1 },
800							quiet => { type => BOOLEAN, optional => 1 },
801							croak_if_error => { type => BOOLEAN, default => 1, optional => 1 },
802							inh => { type => GLOBREF, optional => 1 },
803							in_csvobj => { type => OBJECT, optional => 1 },
804							sep_char => { type => SCALAR, optional => 1 },
805							quote_char => { type => SCALAR, optional => 1 },
806							escape_char => { type => SCALAR, optional => 1 },
807							has_headers => { type => BOOLEAN, default => 1, optional => 1 },
808							out_has_headers =>
809							{ type => UNDEF \| BOOLEAN, default => undef, optional => 1 },
810							fields_ar => { type => ARRAYREF, optional => 1 },
811							fields_hr => { type => HASHREF, optional => 1 },
812							fields_column_names => { type => ARRAYREF, optional => 1 },
813							search_case => { type => SCALAR, optional => 1 },
814							search_trim => { type => SCALAR, optional => 1 },
815							search_ignore_empty => { type => SCALAR, optional => 1 },
816							search_ignore_accents => { type => SCALAR, optional => 1 },
817							search_ignore_ambiguous => { type => SCALAR, optional => 1 },
818							search_value_if_not_found => { type => SCALAR, optional => 1 },
819							search_value_if_found => { type => SCALAR, optional => 1 },
820							search_value_if_ambiguous => { type => SCALAR, optional => 1 },
821							walker_hr => { type => CODEREF, optional => 1 },
822							walker_ar => { type => CODEREF, optional => 1 },
823							read_post_update_hr => { type => CODEREF, optional => 1 },
824							write_filter_hr => { type => CODEREF, optional => 1 },
825							out_filter => { type => CODEREF, optional => 1 },
826							write_fields => { type => ARRAYREF, optional => 1 },
827							out_orderby => { type => ARRAYREF, optional => 1 },
828							out_fields => { type => ARRAYREF, optional => 1 },
829							out_file => { type => SCALAR, optional => 1 },
830							out_always_quote => { type => BOOLEAN, optional => 1 },
831							out_sep_char => { type => SCALAR, optional => 1 },
832							out_quote_char => { type => SCALAR, optional => 1 },
833							out_escape_char => { type => SCALAR, optional => 1 },
834							out_dates_format => { type => SCALAR, optional => 1 },
835							out_dates_locale => { type => SCALAR, optional => 1 },
836							encoding => { type => SCALAR, optional => 1 },
837							via => { type => SCALAR, optional => 1 },
838							out_encoding => { type => SCALAR, optional => 1 },
839							dont_mess_with_encoding => { type => BOOLEAN, optional => 1 },
840							one_pass => { type => BOOLEAN, optional => 1 },
841							no_undef => { type => BOOLEAN, optional => 1 },
842							fields_dates => { type => ARRAYREF, optional => 1 },
843							fields_dates_auto => { type => BOOLEAN, optional => 1 },
844							fields_dates_auto_optimize => { type => BOOLEAN, optional => 1 },
845							dates_formats_to_try => { type => ARRAYREF, optional => 1 },
846							dates_formats_to_try_supp => { type => ARRAYREF, optional => 1 },
847							dates_ignore_trailing_chars => { type => BOOLEAN, optional => 1 },
848							dates_search_time => { type => BOOLEAN, optional => 1 },
849							dates_locales => { type => SCALAR, optional => 1 },
850							out_utf8_bom => { type => SCALAR, optional => 1 },
851							dates_zeros_ok => { type => SCALAR, default => 1, optional => 1 },
852							_debug => { type => BOOLEAN, default => 0, optional => 1 },
853							_debug_read => { type => BOOLEAN, default => 0, optional => 1 },
854							_debug_extra_fields => { type => BOOLEAN, optional => 1 },
855							_debugh => { type => UNDEF \| GLOBREF, optional => 1 }
856							}
857							);
858
859	329					7074	my $self = {@args};
860
861	329					719	my @fields = keys %{$self};
	329					1202
862
863							# croak_if_error
864
865	329					821	my $croak_if_error = $self->{croak_if_error};
866
867							# verbose and _debug management
868
869	329	50				1143	$self->{_debugh} = $self->{infoh} if !defined( $self->{_debugh} );
870	329	50				842	$self->{_debug} = 1 if $ALWAYS_DEBUG;
871	329					650	my $_debug = $self->{_debug};
872	329	50				682	$self->{verbose} = 1 if $_debug;
873	329					596	my $verbose = $self->{verbose};
874
875	329					511	my $_debugh = $self->{_debugh};
876
877	329					594	bless $self, $class;
878
879							# fields_ar, fields_hr
880
881	329	100				1083	if (
882							defined( $self->{fields_ar} ) +
883							defined( $self->{fields_hr} ) +
884							defined( $self->{fields_column_names} ) >= 2 )
885							{
886	1					4	$self->_print_error(
887							"mixed use of fields_ar, fields_hr and fields_column_names. "
888							. "Use one at a time." );
889							}
890	329	100	100			984	if ( defined( $self->{fields_ar} ) and ( !defined $self->{fields_hr} ) ) {
891	2					4	my @f = @{ $self->{fields_ar} };
	2					5
892	2					4	my %h;
893	2					5	for my $e (@f) {
894	6					17	$h{$e} = "^$e\$";
895							}
896	2					6	$self->{fields_hr} = \%h;
897							}
898	329	100				739	if ( !$self->{has_headers} ) {
899	13	100				35	if ( defined( $self->{fields_ar} ) ) {
900	1					5	$self->_print_error(
901							"fields_ar irrelevant if CSV file has no headers");
902	1					104	return;
903							}
904	12	100				33	if ( defined( $self->{fields_hr} ) ) {
905	1					3	$self->_print_error(
906							"fields_hr irrelevant if CSV file has no headers");
907	1					6	return;
908							}
909							}
910
911							# in_file or inh
912
913	327					704	$self->{_flat} = [];
914
915	327					646	$self->{_read_update_after_hr} = {};
916	327					635	$self->{_write_update_before_hr} = {};
917
918	327					1042	$self->_update_in_mem_record_count();
919
920	327	100				841	return unless $self->_status_reset();
921
922	318	50				668	$self->_debug_show_members() if $_debug;
923
924	318	100				741	if ( $self->{dates_zeros_ok} ) {
925							$self->{_refsub_is_datetime_empty} = sub {
926	3173			3173		4677	my $v = $_[0];
927	3173	100				9569	if ( $v !~ m/[1-9]/ ) {
928	975	100				2228	return 1 if $v =~ m/^[^0:]*0+[^0:]+0+[^0:]+0+/;
929							}
930	3170					8634	return 0;
931							}
932	316					1415	}
933
934	318					5217	return $self;
935							}
936
937							#
938							# Return 0 if error, 1 if all good
939							#
940							# Do all low level activities associated to input:
941							# I/O init
942							# Detect encoding
943							# Detect CSV separator
944							# Detect escape character
945							#
946							sub _S1_init_input {
947	470			470		829	my $self = shift;
948
949	470					760	my $croak_if_error = $self->{croak_if_error};
950	470					692	my $_debug = $self->{_debug};
951	470					744	my $_debugh = $self->{_debugh};
952
953	470	100				994	$self->{in_file} = '' unless defined( $self->{in_file} );
954	470					923	$self->{_close_inh_when_finished} = 0;
955
956							$self->{_leave_encoding_alone} = $self->{dont_mess_with_encoding}
957	470	50				978	if defined( $self->{dont_mess_with_encoding} );
958
959	470					1279	$self->{_int_one_pass} = _get_def( $self->{one_pass}, 0 );
960	470					880	my $in_file_disp;
961
962							#
963							# LOW LEVEL INIT STEP 1 OF 4
964							#
965							# Manage I/O (= in most cases, open input file...)
966							#
967
968	470	100				1077	if ( defined( $self->{inh} ) ) {
969							$self->{_leave_encoding_alone} = 1
970	4	50				10	unless defined( $self->{dont_mess_with_encoding} );
971	4					7	$in_file_disp = _get_def( $self->{in_file}, '<?>' );
972	4	50				10	$self->{_int_one_pass} = 1 unless defined( $self->{one_pass} );
973	4					6	$self->{_inh} = $self->{inh};
974							}
975							else {
976							$self->{_leave_encoding_alone} = 0
977	466	50				1124	unless defined( $self->{dont_mess_with_encoding} );
978	466					764	my $in_file = $self->{in_file};
979	466					642	my $inh;
980	466	50				956	if ( $in_file eq '' ) {
981	0					0	$inh = \*STDIN;
982	0					0	$self->{inh_is_stdin} = 1;
983	0	0				0	$self->{_int_one_pass} = 1 unless defined( $self->{one_pass} );
984	0					0	$in_file_disp = '<stdin>';
985							}
986							else {
987	466	100				14335	if ( !open( $inh, '<', $in_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
988	3					50	$self->_print_error("unable to open file '$in_file': $!");
989	3					105	return 0;
990							}
991	463					1175	$in_file_disp = $in_file;
992	463					1009	$self->{_close_inh_when_finished} = 1;
993							}
994	463					1297	$self->{_inh} = $inh;
995							}
996
997	467	50				983	confess "Oups! in_file_disp not defined?" unless defined($in_file_disp);
998	467					857	$self->{_in_file_disp} = $in_file_disp;
999
1000							#
1001							# LOW LEVEL INIT STEP 2 OF 4
1002							#
1003							# "Detection" of encoding
1004							#
1005							# WARNING
1006							# As explained in the manual, it is a very partial and limited detection...
1007							#
1008
1009	467	100				1030	unless ( $self->{_leave_encoding_alone} ) {
1010	463	100				1026	unless ( $self->{_init_input_already_called} ) {
1011							my ( $e, $m ) =
1012							$self->_detect_inh_encoding( $self->{encoding}, $self->{via},
1013							$self->{in_file},
1014	320	100				1697	( $self->{_int_one_pass} ? 0 : $DETECT_ENCODING ) );
1015	320					1048	$self->{_inh_encoding} = $e;
1016	320					682	$self->{_inh_encoding_string} = $m;
1017							}
1018
1019	463					3395	binmode $self->{_inh}, $self->{_inh_encoding_string};
1020							print( $_debugh "Input encoding: '"
1021							. $self->{_inh_encoding} . "' / '"
1022							. $self->{_inh_encoding_string}
1023	463	50				20771	. "'\n" )
1024							if $_debug;
1025
1026							}
1027
1028	467	100				1266	$self->{out_file} = '' unless defined( $self->{out_file} );
1029
1030							#
1031							# LOW LEVEL INIT STEP 3 OF 4
1032							#
1033							# Detection of CSV separator and escape character
1034							#
1035
1036	467					672	my $sep_char;
1037	467					799	my $escape_char = $self->{escape_char};
1038							$self->{quote_char} = $DEFAULT_QUOTE_CHAR
1039	467	100				1193	unless defined( $self->{quote_char} );
1040	467					771	my $quote_char = $self->{quote_char};
1041	467	100				1013	unless ( defined( $self->{in_csvobj} ) ) {
1042	462	100				916	if ( defined( $self->{sep_char} ) ) {
1043	200					324	$sep_char = $self->{sep_char};
1044	200	50				418	print( $_debugh "-- $in_file_disp: CSV separator set to \"" )
1045							if $_debug;
1046							}
1047							else {
1048							# The test below (on _init_input_already_called) shoud be useless.
1049							# Left for the sake of robustness.
1050	262	50				566	unless ( $self->{_init_input_already_called} ) {
1051	262	100				864	if (
1052							!$self->_detect_csv_sep(
1053							$escape_char, $quote_char, \$sep_char
1054							)
1055							)
1056							{
1057	6					24	$self->_print_error(
1058							"'$in_file_disp': cannot detect CSV separator");
1059	0					0	return 0;
1060							}
1061	256	50				636	print( $_debugh
1062							"-- $in_file_disp: CSV separator detected to \"" )
1063							if $_debug;
1064	256					596	$self->{sep_char} = $sep_char;
1065							}
1066							}
1067	456	50				941	print( $_debugh _render($sep_char) . "\"\n" ) if $_debug;
1068
1069	456					1335	$self->_detect_meta( $quote_char, $sep_char );
1070
1071							$self->{_in_csvobj} = Text::CSV->new(
1072							{
1073							sep_char => $sep_char,
1074							allow_whitespace => 1,
1075							binary => 1,
1076							auto_diag => 0,
1077							quote_char => $quote_char,
1078							escape_char => $self->{escape_char},
1079	456					3410	allow_loose_escapes => 1
1080							}
1081							);
1082	456	50				74413	unless ( defined( $self->{_in_csvobj} ) ) {
1083	0					0	$self->_print_error("error creating input Text::CSV object");
1084	0					0	return 0;
1085							}
1086
1087							}
1088							else {
1089	5					9	$self->{_in_csvobj} = $self->{in_csvobj};
1090							}
1091
1092	461					899	$self->{_init_input_already_called} = 1;
1093
1094	461					1259	return 1;
1095							}
1096
1097							sub get_in_file_disp {
1098	3663			3663	1	4957	my $self = shift;
1099
1100	3663					18910	validate_pos(@_);
1101
1102	3663					9196	my $in_file_disp = _get_def( $self->{_in_file_disp}, '?' );
1103	3663					6583	return $in_file_disp;
1104							}
1105
1106							sub get_sep_char {
1107	0			0	1	0	my $self = shift;
1108
1109	0					0	validate_pos(@_);
1110
1111	0					0	return $self->{sep_char};
1112							}
1113
1114							sub get_escape_char {
1115	9			9	1	50	my $self = shift;
1116
1117	9					65	validate_pos(@_);
1118
1119	9					52	return $self->{escape_char};
1120							}
1121
1122							sub get_in_encoding {
1123	27			27	1	3324	my $self = shift;
1124
1125	27					172	validate_pos(@_);
1126
1127	27					81	return _get_def( $self->{_inh_encoding}, '' );
1128							}
1129
1130							sub get_is_always_quoted {
1131	15			15	1	80	my $self = shift;
1132
1133	15					96	validate_pos(@_);
1134
1135	15					95	return $self->{_is_always_quoted};
1136							}
1137
1138							sub get_pass_count {
1139	43			43	1	559	my $self = shift;
1140
1141	43					286	validate_pos(@_);
1142
1143	43					158	return _get_def( $self->{_pass_count}, 0 );
1144							}
1145
1146							sub get_in_mem_record_count {
1147	0			0	1	0	my $self = shift;
1148
1149	0					0	validate_pos(@_);
1150
1151	0					0	return ( $self->{_in_mem_record_count}, 0 );
1152							}
1153
1154							sub get_max_in_mem_record_count {
1155	513			513	1	1553	my $self = shift;
1156
1157	513					3035	validate_pos(@_);
1158
1159	513					2102	return _get_def( $self->{_max_in_mem_record_count}, 0 );
1160							}
1161
1162							sub _set_max_in_mem_record_count {
1163	155			155		253	my $self = shift;
1164
1165	155					1493	validate_pos( @_, { type => SCALAR } );
1166
1167	155					525	$self->{_max_in_mem_record_count} = $_[0];
1168							}
1169
1170							sub get_fields_names {
1171	19			19	1	2757	my $self = shift;
1172
1173	19					134	validate_pos(@_);
1174
1175	19	50				68	return () unless $self->_status_forward('S3');
1176	19					34	return @{ $self->{_columns} };
	19					82
1177							}
1178
1179							sub get_field_name {
1180	1			1	1	477	my $self = shift;
1181
1182	1					16	validate_pos( @_, { type => SCALAR } );
1183
1184	1					7	my ($n) = @_;
1185
1186	1	50				5	return unless $self->_status_forward('S3');
1187	1					4	return $self->{_columns}->[$n];
1188							}
1189
1190							sub get_coldata {
1191	1			1	1	2	my $self = shift;
1192
1193	1					7	validate_pos(@_);
1194
1195	1	50				3	return () unless $self->_status_forward('S3');
1196	1					2	my @ret;
1197	1					2	for ( @{ $self->{_coldata} } ) {
	1					3
1198	4					123	push @ret,
1199							[
1200							$_->field_name, $_->header_text, $_->description,
1201							$_->dt_format, $_->dt_locale, $_->multiline
1202							];
1203							}
1204
1205	1					25	return @ret;
1206							}
1207
1208							sub get_stats {
1209	2			2	1	9	my $self = shift;
1210
1211	2					11	validate_pos(@_);
1212
1213	2	50				6	return () unless defined( $self->{_stats} );
1214	2					4	return %{ $self->{_stats} };
	2					8
1215							}
1216
1217							sub get_nb_rows {
1218	1			1	1	2	my $self = shift;
1219
1220	1					11	validate_pos(@_);
1221
1222	1					6	return $self->{_nb_rows};
1223							}
1224
1225							sub _debug_show_members {
1226	0			0		0	my ($self) = @_;
1227	0					0	my $_debugh = $self->{_debugh};
1228	0					0	my @aa;
1229	0	0				0	@aa = @{ $self->{fields_ar} } if defined( $self->{fields_ar} );
	0					0
1230	0					0	my @c;
1231	0					0	@c = @{ $self->{fields_column_names} }
1232	0	0				0	if defined( $self->{fields_column_names} );
1233	0					0	my %h;
1234	0	0				0	%h = %{ $self->{fields_hr} } if defined( $self->{fields_hr} );
	0					0
1235
1236	0					0	print( $_debugh "-- _debug_show_members() start\n" );
1237	0					0	print( $_debugh " croak_if_error $self->{croak_if_error}\n" );
1238	0					0	print( $_debugh " verbose $self->{verbose}\n" );
1239	0					0	print( $_debugh " _debug $self->{_debug}\n" );
1240	0					0	print( $_debugh " _debug_read $self->{_debug_read}\n" );
1241	0					0	print( $_debugh " infoh $self->{infoh}\n" );
1242	0					0	print( $_debugh " _debugh $_debugh\n" );
1243	0					0	print( $_debugh " inh: $self->{_inh}\n" );
1244	0					0	print( $_debugh " in_file_disp " . $self->get_in_file_disp() . "\n" );
1245	0					0	print( $_debugh " _in_csvobj $self->{_in_csvobj}\n" );
1246	0					0	print( $_debugh " has_headers $self->{has_headers}\n" );
1247	0					0	print( $_debugh " fields_ar:\n" );
1248
1249	0					0	for my $e (@aa) {
1250	0					0	print( $_debugh " '$e'\n" );
1251							}
1252	0					0	print( $_debugh " fields_hr:\n" );
1253	0					0	for my $e ( keys %h ) {
1254	0					0	print( $_debugh " '$e' => '$h{$e}'\n" );
1255							}
1256	0					0	print( $_debugh " fields_column_names:\n" );
1257	0					0	for my $e (@c) {
1258	0					0	print( $_debugh " '$e'\n" );
1259							}
1260	0					0	print( $_debugh "-- _debug_show_members() end\n" );
1261							}
1262
1263							#
1264							# Check headers in CSV header line
1265							# Used to increase robustness by relying on header title rather than
1266							# column number.
1267							#
1268							# Return 1 if success (all fields found), 0 otherwise.
1269							#
1270							sub _process_header {
1271	12			12		22	my $self = shift;
1272	12					23	my @headers = @{ shift(@_) };
	12					39
1273	12					22	my %fields_h = %{ shift(@_) };
	12					68
1274	12					29	my $retval = shift;
1275
1276	12					20	my @tmp = keys %{$retval};
	12					30
1277
1278	12					34	my $in_file_disp = $self->get_in_file_disp();
1279
1280	12	50				35	confess '$_[4] must be an empty by-ref hash' if $#tmp >= 0;
1281
1282	12					23	my $e = 0;
1283	12					38	for my $k ( keys %fields_h ) {
1284	48					85	my $v = $fields_h{$k};
1285
1286	48			462		199	my @all_idx = indexes { /$v/i } @headers;
	462					2609
1287	48	50				164	if ( $#all_idx >= 1 ) {
1288	0					0	$self->_print_error( "file $in_file_disp: "
1289							. "more than one column matches the criteria '$v'" );
1290	0					0	$e++;
1291							}
1292	48			258		179	my $idx = first_index { /$v/i } @headers;
	258					1630
1293	48	50				211	if ( $idx < 0 ) {
1294	0					0	$self->_print_error(
1295							"file $in_file_disp: unable to find field '$v'");
1296	0					0	$e++;
1297							}
1298							else {
1299	48					121	$retval->{$k} = $idx;
1300							}
1301							}
1302
1303	12	50				72	return ( $e >= 1 ? 0 : 1 );
1304							}
1305
1306							sub set_walker_hr {
1307	2			2	1	1593	my $self = shift;
1308	2					25	validate_pos( @_, { type => UNDEF \| CODEREF, optional => 1 } );
1309
1310	2					7	my ($walker_hr) = @_;
1311
1312	2	50				5	return unless $self->_status_forward('S2');
1313	2	50				6	return unless $self->_status_backward('S2');
1314	2					4	$self->{walker_hr} = $walker_hr;
1315
1316	2					5	return $self;
1317							}
1318
1319							sub set_walker_ar {
1320	2			2	1	1404	my $self = shift;
1321	2					22	validate_pos( @_, { type => UNDEF \| CODEREF, optional => 1 } );
1322
1323	2					8	my ($walker_ar) = @_;
1324
1325	2	50				6	return unless $self->_status_forward('S2');
1326	2	50				5	return unless $self->_status_backward('S2');
1327	2					3	$self->{walker_ar} = $walker_ar;
1328
1329	2					5	return $self;
1330							}
1331
1332							# * *************************************** *
1333							# * BEGINNING OF DATE FORMAT DETECTION CODE *
1334							# * *************************************** *
1335
1336							#
1337							# The '%m.%d.%y' is not at its "logical" location. It is done to make sure the
1338							# order in which entries are written does not impact the result.
1339							#
1340							# It could occur because there is some code that correlates an entry containing
1341							# %y with another one that would contain %Y. The %Y will be called the master,
1342							# the %y will be called the slave. It is important to match such entries,
1343							# otherwise an identified format with %y would always be ambiguous with the same
1344							# written with %Y.
1345							#
1346							# IMPORTANT
1347							# The list below is written almost as-is in the POD at the bottom of this
1348							# file.
1349							#
1350							my @DATES_DEFAULT_FORMATS_TO_TRY = (
1351							'',
1352							'%Y-%m-%d',
1353							'%Y.%m.%d',
1354							'%Y/%m/%d',
1355
1356							'%m.%d.%y',
1357
1358							'%m-%d-%Y',
1359							'%m.%d.%Y',
1360							'%m/%d/%Y',
1361							'%d-%m-%Y',
1362							'%d.%m.%Y',
1363							'%d/%m/%Y',
1364
1365							'%m-%d-%y',
1366							'%m/%d/%y',
1367							'%d-%m-%y',
1368							'%d.%m.%y',
1369							'%d/%m/%y',
1370
1371							'%Y%m%d%H%M%S',
1372
1373							# Localizaed formats
1374
1375							'%b %d, %Y',
1376							'%b %d %Y',
1377							'%b %d %T %Z %Y',
1378							'%d %b %Y',
1379							'%d %b, %Y'
1380							);
1381
1382							#
1383							# IMPORTANT
1384							# Under Linux, $START is useless. Strptime will match a format exactly as it
1385							# is, and a tring like "01/01/16 13:00:00" won't match with "%T". Under
1386							# Windows, Strptime is capable of doing a match by ignoring characters at the
1387							# beginning, thus "01/01/2016 13:00:00" for example will return success when
1388							# matched against "%T".
1389							# Possibly it has to do with versionning of Strptime, not Linux versus Windows
1390							# as such. Any way, this difference had to be dealt with.
1391							#
1392							# The flexibility under Windows would screw the code logic so I had to add the
1393							# prefix string below, to avoid unexpected success on match.
1394							#
1395							my $START = '<';
1396
1397							struct RecordCounter => {
1398							count_ok => '$',
1399							count_ko => '$',
1400							has_searched_time => '$',
1401
1402							format => '$',
1403							locale => '$',
1404
1405							has_found_time => '$',
1406							format_with_addition_of_time => '$',
1407							locale_with_addition_of_time => '$',
1408							parser_with_addition_of_time => '$'
1409							};
1410
1411							struct Format => {
1412							id => '$',
1413							format => '$',
1414							locale => '$',
1415							parser => '$',
1416							index_slave => '$',
1417							index_master => '$'
1418							};
1419
1420							sub _col_dispname {
1421	539			539		867	my ( $self, $n ) = @_;
1422
1423	539					639	my $col;
1424
1425							#
1426							# IMPORTANT
1427							#
1428							# We cannot execute here a command like
1429							# $self->_status_forward('S3');
1430							# (to ensure _columns is well defined) because _col_dispname is called by
1431							# _detect_dates_formats that is in turn called by _S3_init_fields_extra. A call
1432							# to _status_forward would trigger a never-ending call loop.
1433							#
1434	539					1103	my $cols = _get_def( $self->{'_columns'}, $self->{'_S2_columns'} );
1435
1436	539	50				934	if ( $self->{has_headers} ) {
1437	539					929	$col = $cols->[$n];
1438	539	50				864	$col = "<UNDEF>" unless defined($col);
1439							}
1440							else {
1441	0					0	$col = "[$n]";
1442							}
1443	539					1034	return $col;
1444							}
1445
1446							# Used by test plan only...
1447							sub _dds {
1448	45			45		917	my $self = shift;
1449
1450	45	50				110	return unless $self->_status_forward('S3');
1451	43	50				121	return unless defined( $self->{_dates_detailed_status} );
1452	43					352	return $self->{_dates_detailed_status};
1453							}
1454
1455							sub _detect_dates_formats {
1456	354			354		524	my $self = shift;
1457
1458	354	100				868	return if $self->{_detect_dates_formats_has_run};
1459	287					564	$self->{_detect_dates_formats_has_run} = 1;
1460	287					383	my @fields_dates;
1461	11					35	@fields_dates = @{ $self->{fields_dates} }
1462	287	100				643	if defined( $self->{fields_dates} );
1463	287	100	100			1293	return unless @fields_dates or $self->{fields_dates_auto};
1464
1465	54	50				151	if ( $self->{_int_one_pass} ) {
1466	0					0	$self->_print_error(
1467							"date format detection disallowed when one_pass is set");
1468	0					0	return;
1469							}
1470
1471	54					100	my $_debug = $self->{_debug};
1472	54					85	my $_debugh = $self->{_debugh};
1473	54		33			127	my $debug_fmt = ( $_debug and $DEBUG_DATETIME_FORMATS );
1474
1475	54					155	$self->_register_pass("detect date format");
1476
1477							#
1478							# Why re-opening the input?
1479							# I tried two other ways that never worked on some OSes (like freebsd) and/or
1480							# with older perl versions.
1481							#
1482							# 1) The "tell" tactic
1483							# Recording at the beginning of the function the file position with
1484							# my $pos = tell($self->{inh});
1485							# ... and then recalling with a seek instruction is the most logical.
1486							# But it didn't work = sometimes, reading would go back to first row (the
1487							# headers) instead of the second row, could not figure out why (it would work
1488							# on my Ubuntu 16.04 / perl 5.22, but would fail with other OSes and/or perl
1489							# versions).
1490							#
1491							# 2) The "complete rewind" tactic
1492							# I then undertook to do (at the end of detection function):
1493							# seek $inh, 0, SEEK_SET;
1494							# $incsv->getline($inh) if $self->{has_headers};
1495							# based on the assumption that a seek to zero would behave differently from a
1496							# seek to an arbitrary position.
1497							# But still, it would sometimes fail....
1498							#
1499
1500	54					119	my $inh = $self->_reopen_input();
1501	54					113	my $incsv = $self->{_in_csvobj};
1502	54	50				218	_mygetline( $incsv, $inh ) if $self->{has_headers};
1503
1504	54					2401	my $formats_to_try = $self->{dates_formats_to_try};
1505	54					114	my $ignore_trailing_chars = $self->{dates_ignore_trailing_chars};
1506	54					133	my $search_time = $self->{dates_search_time};
1507	54					91	my $localizations = $self->{dates_locales};
1508
1509	54					92	my %regular_named_fields = %{ $self->{_regular_named_fields} };
	54					423
1510
1511	54					144	my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
1512
1513	54					82	my @fields_to_detect_format;
1514	54	100				203	if ( defined( $self->{fields_dates} ) ) {
		50
1515	11					20	my $count_field_not_found = 0;
1516	11					17	my %column_seen;
1517	11					32	for my $f ( @{ $self->{fields_dates} } ) {
	11					31
1518	23	100				58	if ( !exists $regular_named_fields{$f} ) {
1519	1					9	$self->_print_error( "fields_dates: unknown field: '$f'",
1520							1, ERR_UNKNOWN_FIELD, {%regular_named_fields} );
1521	1					32	$count_field_not_found++;
1522	1					4	next;
1523							}
1524	22					34	my $n = $regular_named_fields{$f};
1525	22	50				52	if ( exists $column_seen{$n} ) {
1526	0					0	$self->_print_warning("field '$f' already seen");
1527	0					0	next;
1528							}
1529	22					43	$column_seen{$n} = 1;
1530	22					47	push @fields_to_detect_format, $n;
1531							}
1532	11	100				88	$self->_print_error("non existent field(s) encountered, aborted")
1533							if $count_field_not_found;
1534							}
1535							elsif ( $self->{fields_dates_auto} ) {
1536	43					174	my @k = keys %regular_named_fields;
1537	43					181	@fields_to_detect_format = ( 0 .. $#k );
1538							}
1539							else {
1540	0					0	confess "Hey! check this code, man";
1541							}
1542
1543							#
1544							# FIXME?
1545							# Sort by column number of not?
1546							#
1547							# At this moment in time, the author inclines to answer "yes".
1548							# But I must admit it is rather arbitrary decision for now.
1549							#
1550	53					294	@fields_to_detect_format = sort { $a <=> $b } @fields_to_detect_format;
	753					898
1551
1552	53					86	my @dates_formats_supp;
1553	2					6	@dates_formats_supp = @{ $self->{dates_formats_to_try_supp} }
1554	53	100				149	if defined( $self->{dates_formats_to_try_supp} );
1555
1556	53	100				346	$formats_to_try = [@DATES_DEFAULT_FORMATS_TO_TRY]
1557							unless defined($formats_to_try);
1558	53					94	$formats_to_try = [ @{$formats_to_try}, @dates_formats_supp ];
	53					217
1559	53					146	my %seen;
1560	53					94	my $f2 = [];
1561	53					176	for ( @${formats_to_try} ) {
1562	978	50				1463	push @{$f2}, $_ unless exists( $seen{$_} );
	978					1353
1563	978					1685	$seen{$_} = undef;
1564							}
1565	53					134	$formats_to_try = $f2;
1566
1567	53	100				149	$ignore_trailing_chars = 1 unless defined($ignore_trailing_chars);
1568	53	100				187	$search_time = 1 unless defined($search_time);
1569
1570	53	100				128	my $stop = ( $ignore_trailing_chars ? '' : '>' );
1571
1572							#
1573							# The code below (from # AMB to # AMB-END) aims to remove ambiguity that comes
1574							# from %Y versus %y.
1575							# That is: provided you have (among others) the formats to try
1576							# '%d-%m-%Y'
1577							# and
1578							# '%d-%m-%y'
1579							# then if parsing 4-digit-year dates (like '31-12-2016'), the two formats will
1580							# work and you'll end up with an ambiguity. To be precise, there'll be no
1581							# ambiguity if the date is followed by a time, but if the date is alone, both
1582							# formats will work.
1583							#
1584							# Thanks to the below code, the member 'index_slave' (and its counterpart
1585							# index_master) is populated and later, if such an ambiguity is detected, the
1586							# upper case version (the one containing upper case '%Y') will be kept and the
1587							# other one will be discarded.
1588							#
1589							# NOTE
1590							# Such an ambiguity can exist only when ignore_trailing_chars is set.
1591							# Otherwise, the remaining two digits make the date parsing fail in the '%y'
1592							# case.
1593							#
1594							# The other members of the 'Format' object are used to work "normally",
1595							# independently from this ambiguity removal feature.
1596							#
1597
1598							# WIP = Work In Progress...
1599	53					92	my @formats_wip;
1600							my @locales;
1601	53	100				147	@locales = split( /,\s*/, $localizations ) if defined($localizations);
1602	53					73	for my $f ( @{$formats_to_try} ) {
	53					129
1603	978	100				2135	my $has_localized_item = ( $f =~ m/%a\|%A\|%b\|%B\|%c\|%\+/ ? 1 : 0 );
1604	978	100	100			1765	unless ( @locales and $has_localized_item ) {
1605	968					1583	push @formats_wip, [ $f, '' ];
1606	968					1387	next;
1607							}
1608	10					34	push @formats_wip, [ $f, $_ ] foreach @locales;
1609							}
1610
1611							# AMB
1612	53					95	my @formats;
1613							my %mates;
1614	53					151	for my $i ( 0 .. $#formats_wip ) {
1615	988					1734	my $fstr = $formats_wip[$i]->[0];
1616	988					1263	my $floc = $formats_wip[$i]->[1];
1617
1618							# FIXME
1619							# Will not manage correctly a string like
1620							# '%%Y'
1621							# that means (when used with Strptime), the litteral string '%Y' with no
1622							# substitution.
1623							# Such cases will be complicated to fix, as it'll require to do a kind-of
1624							# Strptime-equivalent parsing of the string, and I find it a bit overkill.
1625							#
1626							# I prefer to push back in caller world saying
1627							# "Hey, if using constructs like '%%Y', you'll be in trouble."
1628	988					1181	my $m = $fstr;
1629	988					3181	$m =~ s/%y//ig;
1630	988					1608	$m .= $floc;
1631
1632	988					1273	my $index_slave = -1;
1633	988					1119	my $index_master = -1;
1634	988	100				1757	if ( exists $mates{$m} ) {
1635	262					516	my $alt_fstr = $formats_wip[ $mates{$m} ]->[0];
1636	262	100				705	my $m_lower = ( $fstr =~ m/%y/ ? 1 : 0 );
1637	262	100				570	my $m_upper = ( $fstr =~ m/%Y/ ? 1 : 0 );
1638	262	100				492	my $a_lower = ( $alt_fstr =~ m/%y/ ? 1 : 0 );
1639	262	100				516	my $a_upper = ( $alt_fstr =~ m/%Y/ ? 1 : 0 );
1640
1641							# We ignore the weird cases where we'd have both %y and %Y in a format string.
1642
1643	262	100	66			1919	if ( ( !$m_lower ) and $m_upper and $a_lower and ( !$a_upper ) ) {
		50	66
			33
			33
			33
			33
1644	45					101	$index_slave = $mates{$m};
1645	45					749	$formats[ $mates{$m} ]->index_master($i);
1646							}
1647							elsif ( $m_lower and ( !$m_upper ) and ( !$a_lower ) and $a_upper )
1648							{
1649	217					352	$index_master = $mates{$m};
1650	217					3444	$formats[ $mates{$m} ]->index_slave($i);
1651							}
1652
1653							}
1654							else {
1655	726					1389	$mates{$m} = $i;
1656							}
1657
1658	988					3597	my %strptime_opts = ( pattern => $START . $fstr . $stop );
1659	988	100				1936	$strptime_opts{locale} = $floc if $floc ne '';
1660	988	100				15300	my $format = Format->new(
1661							id => "$i",
1662							format => $fstr,
1663							locale => $floc,
1664							parser => (
1665							$fstr ne ''
1666							? _build_strptime_parser(%strptime_opts)
1667							: undef
1668							),
1669							index_slave => $index_slave,
1670							index_master => $index_master
1671							);
1672	988					247159	push @formats, $format;
1673							}
1674	53					201	for my $i ( 0 .. $#formats ) {
1675	988					11052	my $format = $formats[$i];
1676
1677							# If a master could be itself the slave of another entry, that'd make it a
1678							# hierarchical relation tree with multiple levels. It is not possible, only a
1679							# direct, unique master-slave relation can be managed here.
1680	988	50	66			12683	confess "Inonsistent data, check this module's code urgently!"
1681							if $format->index_slave >= 0 and $format->index_master >= 0;
1682
1683	988	100				22232	if ( $format->index_slave >= 0 ) {
1684	262					4781	my $mate = $formats[ $format->index_slave ];
1685	262	50	33			4634	if ( $mate->index_master != $i or $mate->index_slave != -1 ) {
1686	0					0	confess
1687							"Inonsistent data (2), check this module's code urgently!";
1688							}
1689							}
1690
1691	988	100				21627	if ( $format->index_master >= 0 ) {
1692	262					4681	my $mate = $formats[ $format->index_master ];
1693	262	50	33			4439	if ( $mate->index_slave != $i or $mate->index_master != -1 ) {
1694	0					0	confess
1695							"Inonsistent data (3), check this module's code urgently!";
1696							}
1697							}
1698
1699							}
1700	53	50				470	if ($debug_fmt) {
1701	0					0	for (@formats) {
1702	0					0	my ( $idx, $rel ) = ( -1, "" );
1703	0	0				0	$idx = $_->index_slave, $rel = "S: " if $_->index_slave >= 0;
1704	0	0				0	$idx = $_->index_master, $rel = "M: " if $_->index_master >= 0;
1705	0					0	printf( $_debugh "%-18s %s %2d",
1706							"'" . $_->format . "'",
1707							$rel, $idx
1708							);
1709	0	0				0	print( $_debugh ": '" . $formats[$idx]->format . "'" ) if $idx >= 0;
1710	0					0	print( $_debugh "\n" );
1711							}
1712							}
1713
1714							# AMB-END
1715
1716	53					127	my %records;
1717							my $record_number;
1718	53					95	my $count_gotit = 0;
1719	53					78	my $count_ambiguous = 0;
1720	53					90	my $count_nodate = 0;
1721	53					81	my $count_empty = 0;
1722	53					71	my $has_signaled_can_start_recording_data = 0;
1723	53					115	$self->{_line_after_which_recording_can_start} = 0;
1724
1725							#
1726							# Seems a weird optimization here, but it is very important.
1727							# In some cases, divides execution time (to detect date format on big files
1728							# containing numerous fields) by 10.
1729							#
1730							# When evaluates to true, it means the input column has no identified date
1731							# format, meaning, no further check to do.
1732							#
1733	53					85	my @cache_nodate;
1734
1735	53					144	while ( my $f = _mygetline( $incsv, $inh ) ) {
1736	1418					43503	$record_number++;
1737
1738	1418	50				3439	if ($debug_fmt) {
1739	0					0	print( $_debugh "RECORD $record_number:\n" );
1740	0					0	for ( 0 .. @$f - 1 ) {
1741	0					0	printf( $_debugh " %02d: '%s'\n", $_, $f->[$_] );
1742							}
1743							}
1744
1745	1418					2452	for my $n (@fields_to_detect_format) {
1746	15285	100				24502	next if $cache_nodate[$n];
1747
1748	10444					13470	my $v = $f->[$n];
1749	10444	100				14980	$v = '' unless defined($v);
1750	10444	100				17038	next if $v eq '';
1751							next
1752	2942	100	100			8788	if defined($refsub_is_datetime_empty)
1753							and $refsub_is_datetime_empty->($v);
1754
1755	2939	50				5069	if ($debug_fmt) {
1756	0					0	my $col = $self->_col_dispname($n);
1757	0					0	print( $_debugh "Line $record_number, column '$col':\n" );
1758							}
1759
1760	2939					4647	for my $fmt (@formats) {
1761	50192					595888	my $fid = $fmt->id;
1762	50192					811538	my $fstr = $fmt->format;
1763
1764							$self->_debug_output_fmt( '** pre ', $fmt,
1765	50192	50				304131	$records{$n}->{$fid} )
1766							if $debug_fmt;
1767
1768							$records{$n}->{$fid} = RecordCounter->new(
1769							count_ok => 0,
1770							count_ko => 0,
1771							has_searched_time => 0,
1772
1773							format => undef,
1774							locale => undef,
1775
1776							has_found_time => 0,
1777							format_with_addition_of_time => undef,
1778							locale_with_addition_of_time => undef,
1779							parser_with_addition_of_time => undef
1780	50192	100				192820	) unless defined( $records{$n}->{$fid} );
1781
1782	50192	100				1068410	unless ( $records{$n}->{$fid}->count_ko ) {
1783							my $is_ok = &_try_parser(
1784							$fmt,
1785	11481					90500	$records{$n}->{$fid},
1786							$START . $v . $stop
1787							);
1788
1789	11481	100				21489	if ( !$is_ok ) {
1790	7789					10535	my $give_up_time = 0;
1791	7789	100	66			99359	if ( $records{$n}->{$fid}->count_ko == 0
			100
1792							and $records{$n}->{$fid}->has_searched_time
1793							and $records{$n}->{$fid}->has_found_time )
1794							{
1795	77		100			4346	$give_up_time = (
1796							defined( $fmt->parser ) and defined(
1797							$fmt->parser->parse_datetime(
1798							$START . $v . $stop
1799							)
1800							) ? 1 : 0
1801							);
1802	77	100				42441	if ($give_up_time) {
1803	4					80	$records{$n}->{$fid}->has_found_time(0);
1804	4					33	$is_ok = 1;
1805							}
1806							}
1807							}
1808
1809	11481	100	100			200084	if ( $is_ok or ( !$ignore_trailing_chars ) ) {
1810	6079		100			76756	my $incr =
1811							( defined( $fmt->parser ) and $is_ok ? 1 : 0 );
1812
1813	6079	100				115704	unless ( $records{$n}->{$fid}->has_searched_time ) {
1814	3156					55838	$records{$n}->{$fid}->has_searched_time(1);
1815
1816							croak
1817							"Inconsistent status! Issue in module code not in caller's!"
1818	3156	50				51260	if $records{$n}->{$fid}->count_ok != 0;
1819
1820	3156	100				21966	if ($search_time) {
		100
1821
1822	1980	50				3352	print( $_debugh
1823							" Search time in '$v', format '$fstr'\n"
1824							) if $debug_fmt;
1825
1826	1980					24125	my $t =
1827							$self->_guess_time_format( $fstr,
1828							$fmt->locale, $v, $stop );
1829	1980	100				34402	$records{$n}->{$fid}
1830							->has_found_time( ( defined($t) ? 1 : 0 ) );
1831	1980	100				14908	if ( defined($t) ) {
		100
1832	254					3696	$records{$n}->{$fid}
1833							->format_with_addition_of_time( $t->[0] );
1834	254					4677	$records{$n}->{$fid}
1835							->locale_with_addition_of_time( $t->[1] );
1836	254					4603	$records{$n}->{$fid}
1837							->parser_with_addition_of_time( $t->[2] );
1838	254					1804	$incr = 1;
1839							}
1840							elsif ( $fstr eq '' ) {
1841	200					2676	$records{$n}->{$fid}->count_ko(1);
1842							}
1843							}
1844							elsif ( $fstr eq '' ) {
1845	78					973	$records{$n}->{$fid}->count_ko(1);
1846							}
1847
1848							}
1849
1850							$records{$n}->{$fid}
1851	6079					95074	->count_ok( $records{$n}->{$fid}->count_ok + $incr );
1852
1853							$records{$n}->{$fid}
1854	6079	100	100			94759	->count_ko( $records{$n}->{$fid}->count_ko + 1 )
1855							if ( !$incr )
1856							and ( !$is_ok );
1857
1858	6079	100				31004	if ($incr) {
1859
1860							# We remove the slave if master is fine.
1861							# Depending on the order in which parsing got done, the master could
1862							# pop up first, or the slave, that is why we need manage both cases.
1863	2697	100	100			32947	if ( $fmt->index_slave >= 0
1864							or $fmt->index_master >= 0 )
1865							{
1866	1995	100				56452	my $has_slave =
1867							( $fmt->index_slave >= 0 ? 1 : 0 );
1868	1995	100				33339	my $idx = (
1869							$has_slave
1870							? $fmt->index_slave
1871							: $fmt->index_master
1872							);
1873	1995					32688	my $mate = $formats[$idx]->id;
1874	1995	100				15143	if ( exists $records{$n}->{$mate} ) {
1875	1841	100				3444	if ($has_slave) {
1876	623	100				7673	if ( $records{$n}->{$mate}->count_ko ==
1877							0 )
1878							{
1879							# I am the master: I remove the slave
1880	2					36	$records{$n}->{$mate}->count_ko(1);
1881							}
1882							}
1883							else {
1884	1218	50	66			15021	if (
			66
1885							$records{$n}->{$mate}->count_ko == 0
1886							and
1887							$records{$n}->{$mate}->count_ok >= 1
1888							and
1889							$records{$n}->{$fid}->count_ko ==
1890							0 )
1891							{
1892	117					6262	$records{$n}->{$fid}->count_ko(1);
1893							}
1894							}
1895							}
1896							}
1897							}
1898
1899							}
1900							else {
1901							$records{$n}->{$fid}
1902	5402					71277	->count_ko( $records{$n}->{$fid}->count_ko + 1 );
1903							}
1904							}
1905
1906							$self->_debug_output_fmt( ' post', $fmt,
1907	50192	50				346497	$records{$n}->{$fid} )
1908							if $debug_fmt;
1909
1910							}
1911							}
1912
1913	1418					2199	$count_gotit = 0;
1914	1418					1811	$count_ambiguous = 0;
1915	1418					2187	$count_empty = 0;
1916	1418					2563	for my $n (@fields_to_detect_format) {
1917	15285	100				24296	next if $cache_nodate[$n];
1918
1919	10444					12135	my $candidate = 0;
1920	10444					10843	my $tt = 0;
1921	10444					13604	for my $fmt (@formats) {
1922	150840					1783376	my $fid = $fmt->id;
1923	150840					910867	my $rec = $records{$n}->{$fid};
1924	150840	100				247074	next unless defined($rec);
1925
1926	92018					1059566	my $ok = $rec->count_ok;
1927	92018					1420139	my $ko = $rec->count_ko;
1928
1929	92018	50	66			608679	confess "Oups. Check this module code urgently!"
1930							if $ok == 0 and $ko == 0;
1931	92018					103875	$tt += $ok + $ko;
1932
1933	92018	100	100			177244	$candidate++ if $ok >= 1 and $ko == 0;
1934							}
1935	10444	100				19328	if ( $candidate == 1 ) {
		100
		100
1936	4754					6947	$count_gotit++;
1937							}
1938							elsif ( $candidate >= 2 ) {
1939	653					1242	$count_ambiguous++;
1940							}
1941							elsif ( $tt != 0 ) {
1942	230					289	$count_nodate++;
1943	230					466	$cache_nodate[$n] = 1;
1944							}
1945							else {
1946	4807					6378	$count_empty++;
1947							}
1948							}
1949
1950	1418	50				2765	if ($debug_fmt) {
1951	0					0	print( $_debugh "\$count_gotit = $count_gotit\n" );
1952	0					0	print( $_debugh "\$count_ambiguous = $count_ambiguous\n" );
1953	0					0	print( $_debugh "\$count_nodate = $count_nodate\n" );
1954	0					0	print( $_debugh "\$count_empty = $count_empty\n" );
1955							}
1956
1957	1418					2495	my $can_start_recording_data = 0;
1958	1418	100	100			6083	$can_start_recording_data = 1
			100
1959							if $count_gotit + $count_ambiguous + $count_nodate >= 1
1960							and ( !$count_ambiguous )
1961							and ( !$count_empty );
1962
1963	1418	100	100			8344	if ( $can_start_recording_data
1964							and ( !$has_signaled_can_start_recording_data ) )
1965							{
1966	31					54	$has_signaled_can_start_recording_data = 1;
1967
1968	31	50				86	print( $_debugh
1969							"Can start recording (all dates formats detection closed) "
1970							. "after record #$record_number\n" )
1971							if $_debug;
1972
1973	31					87	$self->{_line_after_which_recording_can_start} = $record_number;
1974	31	100				119	last unless $self->{fields_dates_auto};
1975	24	100				181	last if $self->{fields_dates_auto_optimize};
1976							}
1977							}
1978
1979	53					2822	close $inh;
1980
1981	53					168	my %dates_detailed_status;
1982							my @dates_formats;
1983	53					208	my $check_empty = 0;
1984	53					93	my $check_nodate = 0;
1985	53					117	my $check_ambiguous = 0;
1986	53					102	my $check_gotit = 0;
1987	53					284	for my $n (@fields_to_detect_format) {
1988	532					635	my @formats_ok;
1989	532					686	my $tt = 0;
1990	532					662	for my $fid ( sort keys %{ $records{$n} } ) {
	532					4539
1991	8465					74146	my $rec = $records{$n}->{$fid};
1992	8465	100	100			100208	if ( $rec->count_ok >= 1 and $rec->count_ko == 0 ) {
1993
1994	298					10002	my ( $fstr, $floc ) = ( $rec->format, $rec->locale );
1995	298	100				6546	( $fstr, $floc ) = (
1996							$rec->format_with_addition_of_time,
1997							$rec->locale_with_addition_of_time
1998							) if $rec->has_found_time;
1999
2000	298					5779	push @formats_ok, [ $fstr, $floc ];
2001							}
2002	8465					141069	$tt += $rec->count_ok + $rec->count_ko;
2003							}
2004	532					5172	my $is_ok = 0;
2005	532					621	my $format;
2006	532					657	my $locale = '';
2007	532	100	100			1835	if ( $#formats_ok < 0 and $tt == 0 ) {
		100
		100
2008	16					36	$format = "Z";
2009	16					34	$check_empty++;
2010							}
2011							elsif ( $#formats_ok < 0 ) {
2012	230					304	$format = "N";
2013	230					279	$check_nodate++;
2014							}
2015							elsif ( $#formats_ok > 0 ) {
2016	12					25	$format = "A";
2017	12					16	$check_ambiguous++;
2018							}
2019							else {
2020	274					337	$is_ok = 1;
2021	274					438	$format = $formats_ok[0]->[0];
2022	274					345	$locale = $formats_ok[0]->[1];
2023	274					349	$check_gotit++;
2024							}
2025	532					1034	my $col = $self->_col_dispname($n);
2026
2027							$dates_detailed_status{$col} = $format
2028	532	50				1586	unless exists $dates_detailed_status{$col};
2029	532	100	66			2047	$dates_formats[$n] = [ $format, $locale ]
2030							if $is_ok and ( !defined $dates_formats[$n] );
2031							}
2032							$dates_detailed_status{'.'} =
2033	53					187	$self->{_line_after_which_recording_can_start};
2034
2035	53	50	66			440	if ( $check_empty != $count_empty
			66
			33
2036							or $check_nodate != $count_nodate
2037							or $check_ambiguous != $count_ambiguous
2038							or $check_gotit != $count_gotit )
2039							{
2040							# The below condition can happen with an empty CSV (empty file (no header) or
2041							# only a header line).
2042	1	50	33			71	if ( $count_empty
			33
			33
			33
			33
			33
2043							or $check_nodate
2044							or $count_nodate
2045							or $check_ambiguous
2046							or $count_ambiguous
2047							or $check_gotit
2048							or $count_gotit )
2049							{
2050	0					0	print( STDERR "\$check_empty = $check_empty\n" );
2051	0					0	print( STDERR "\$count_empty = $count_empty\n" );
2052	0					0	print( STDERR "\$check_nodate = $check_nodate\n" );
2053	0					0	print( STDERR "\$count_nodate = $count_nodate\n" );
2054	0					0	print( STDERR "\$check_ambiguous = $check_ambiguous\n" );
2055	0					0	print( STDERR "\$count_ambiguous = $count_ambiguous\n" );
2056	0					0	print( STDERR "\$check_gotit = $check_gotit\n" );
2057	0					0	print( STDERR "\$count_gotit = $count_gotit\n" );
2058	0					0	confess "Oups! Check immediately this module code, man!";
2059							}
2060							}
2061
2062	53	50				140	if ($debug_fmt) {
2063
2064							# A very detailed debug output
2065	0					0	for my $n (@fields_to_detect_format) {
2066	0					0	my $col = $self->_col_dispname($n);
2067	0					0	print( $_debugh "$col\n" );
2068	0					0	printf( $_debugh " %-25s %3s %3s\n", "format", "OK", "KO" );
2069	0					0	for my $fid ( sort keys %{ $records{$n} } ) {
	0					0
2070	0					0	my $rec = $records{$n}->{$fid};
2071	0					0	my $cc = '';
2072	0	0	0			0	$cc = "(" . $rec->locale . ")"
2073							if defined( $rec->locale )
2074							and $rec->locale ne '';
2075	0					0	printf( $_debugh " %-25s %3d %3d\n",
2076							$rec->format . $cc,
2077							$rec->count_ok, $rec->count_ko
2078							);
2079							}
2080							}
2081							}
2082
2083							# Not a typo - displaying it IN ADDITION to debug output above is done on
2084							# purpose...
2085	53	50				140	if ($_debug) {
2086
2087							# A shorter (as compared to above) output of outcome of DateTime detection
2088	0					0	print( $_debugh "Result of DateTime detection:\n" );
2089	0					0	printf( $_debugh "%-3s %-25s %-30s %s\n",
2090							'###', 'FIELD',
2091							'DATETIME FORMAT',
2092							'DATETIME LOCALE'
2093							);
2094	0					0	for my $n (@fields_to_detect_format) {
2095	0					0	my ( $fmt, $loc ) = ( '<undef>', '<undef>' );
2096	0	0				0	if ( defined( $dates_formats[$n] ) ) {
2097	0					0	( $fmt, $loc ) = @{ $dates_formats[$n] }[ 0, 1 ];
	0					0
2098							}
2099	0					0	printf( $_debugh "%03d %-25s %-30s %s\n",
2100							$n, $self->_col_dispname($n),
2101							$fmt, $loc
2102							);
2103							}
2104							}
2105
2106	53	100				168	if ( !$self->{fields_dates_auto} ) {
2107	10					20	my $e = 0;
2108	10					23	for my $n (@fields_to_detect_format) {
2109	20	100				53	next if defined( $dates_formats[$n] );
2110	7					17	$self->_print_error(
2111							"unable to detect DateTime format of field '"
2112							. $self->_col_dispname($n) . "'",
2113							1
2114							);
2115	7					195	$e++;
2116							}
2117							$self->_print_error(
2118	10	100				44	"$e field(s) encountered with unknown DateTime format")
2119							if $e;
2120							}
2121
2122	51					642	$self->{_dates_detailed_status} = {%dates_detailed_status};
2123	51					10202	$self->{_dates_formats} = [@dates_formats];
2124							}
2125
2126							sub _debug_output_fmt {
2127	0			0		0	my ( $self, $prefix, $fmt, $rec ) = @_;
2128
2129	0					0	my $_debugh = $self->{_debugh};
2130
2131	0					0	my ( $fstr, $floc ) = ( $fmt->format, $fmt->locale );
2132	0	0	0			0	( $fstr, $floc ) = (
2133							'<+T>' . $rec->format_with_addition_of_time,
2134							$rec->locale_with_addition_of_time
2135							)
2136							if defined($rec)
2137							and $rec->has_found_time;
2138
2139	0					0	my $locstr = '';
2140	0	0	0			0	$locstr = "(" . $floc . ")" if defined($floc) and $floc ne '';
2141
2142	0					0	my $tmpok;
2143	0	0				0	$tmpok = $rec->count_ok if defined($rec);
2144	0	0				0	$tmpok = '<undef>' unless defined($tmpok);
2145	0					0	my $tmpko;
2146	0	0				0	$tmpko = $rec->count_ko if defined($rec);
2147	0	0				0	$tmpko = '<undef>' unless defined($tmpko);
2148
2149	0					0	print( $_debugh
2150							"$prefix (format '$fstr$locstr': OK = $tmpok, KO = $tmpko)\n" );
2151							}
2152
2153							# When no parse can be done (parser to test is undef), return 1
2154							sub _try_parser {
2155	11481			11481		21108	my ( $fmt, $rec, $value_to_parse ) = @_;
2156
2157	11481					142300	my $parser = $fmt->parser;
2158	11481	100				191018	$parser = $rec->parser_with_addition_of_time if $rec->has_found_time;
2159
2160	11481					92946	my $is_ok = 1;
2161	11481	100				37082	$is_ok = ( defined( $parser->parse_datetime($value_to_parse) ) ? 1 : 0 )
		100
2162							if $parser;
2163
2164	11481	100				2283436	unless ( defined( $rec->format ) ) {
2165	8465					148944	$rec->format( $fmt->format );
2166	8465					263129	$rec->locale( $fmt->locale );
2167							}
2168
2169	11481					205131	return $is_ok;
2170							}
2171
2172							#
2173							# By checking Text::AutoCSV with NYTProf, I saw the t/02-dates2.t test was long
2174							# because of the time to create Strptime parsers all along execution (while
2175							# detecting formats of dates/times). I spent some time caching (manually, and
2176							# painfully) Strptime parsers for naked dates, but didn't afford to doing it for
2177							# the time detection.
2178							#
2179							# Then I came upon knowing of the Memoize module while reading
2180							# HIGHER-ORDER PERL
2181							# of
2182							# Mark Jason Dominus, editor: Morgan Kaufmann Publishers (Elsevier)
2183							# You can find here (I strongly recommend your reading this book):
2184							# http://hop.perl.plover.com/book/pdf/HigherOrderPerl.pdf
2185							#
2186							# TODO
2187							# Actually a code rewriting should be done, to discard all the caching work done
2188							# for naked dates and just rely on this memoization, that is just perfect
2189							# feature for this case.
2190							#
2191							memoize('_build_strptime_parser');
2192							sub _build_strptime_parser { DateTime::Format::Strptime->new(@_) }
2193
2194							sub _guess_time_format {
2195
2196							# IMPORTANT
2197							# Formats are tested in the order of the list below, and the first one that
2198							# succeeds stops the tests.
2199							# That makes the order of the elements important: %R would match any value
2200							# that'd also match %T, that'd cause to return %R whereas %T would be
2201							# possible. Same with AM/PM formats. Thus the longest patterns appear first,
2202							# and in particular, the patterns ending with "AM/PM" appear before all
2203							# others.
2204	1980			1980		15866	my @T = ( '%I:%M:%S %p', '%I:%M %p', '%I:%M:%S%p', '%I:%M%p', '%T', '%R' );
2205
2206	1980					4053	my ( $self, $format, $locale, $v, $stop ) = @_;
2207
2208	1980					3132	my $_debugh = $self->{_debugh};
2209	1980		33			4227	my $debug_fmt = ( $self->{_debug} and $DEBUG_DATETIME_FORMATS );
2210
2211	1980	100				4136	return if $format =~ /:/;
2212
2213	1976					2392	my $sep;
2214	1976	100				3441	if ( $format eq '' ) {
2215	325					599	$sep = '';
2216							}
2217							else {
2218	1651	100				8853	unless ( ( undef, $sep ) =
2219							$v =~ /(^\|\d([^0-9:]+))(\d{1,2}):(\d{1,2})(\D\|$)/ )
2220							{
2221	393	100				789	if ( $v =~ /\d{4}:\d{2}(\D\|$)/ ) {
2222	29					59	$sep = '';
2223							}
2224							else {
2225
2226	364	50				691	print( $_debugh
2227							"_guess_time_format(): separator candidate not found in '$v'\n"
2228							) if $debug_fmt;
2229
2230	364					867	return;
2231							}
2232							}
2233							}
2234	1612	100				3839	$sep = '' unless defined($sep);
2235
2236							#
2237							# IMPORTANT
2238							#
2239							# The code below allows to successfully detect DateTime format when
2240							# the first lines contain things like:
2241							# Jan 20 2017 2:00AM
2242							# that could lead to a separator set to ' ' while actually it should be ' '. In
2243							# this case
2244							# if the double-space is kept, then a later value of
2245							# Jan 20 2017 10:00AM
2246							# won't be parsed.
2247							#
2248							# See t/11-bugfix.t, BUG 5, for an explanation of why the line below.
2249							#
2250	1612	100				3048	$sep = ' ' if $sep eq ' ';
2251
2252	1612	50				2937	if ($debug_fmt) {
2253	0					0	print( $_debugh " _guess_time_format(): Searching time in '$v'\n" );
2254							}
2255
2256	1612					2707	for my $t (@T) {
2257	9193					220786	my $increased_format = "$format$sep$t";
2258
2259	9193	50				15009	print( $_debugh
2260							" _guess_time_format(): Trying format '$increased_format'\n" )
2261							if $debug_fmt;
2262
2263	9193					20363	my %opts = ( pattern => $START . $increased_format . $stop );
2264	9193	100	66			27603	$opts{locale} = $locale if defined($locale) and $locale ne '';
2265	9193					133851	my $parser_of_increased_format = _build_strptime_parser(%opts);
2266							next
2267							unless defined(
2268	9193	100				1359577	$parser_of_increased_format->parse_datetime( $START . $v . $stop )
2269							);
2270
2271	254	50				155067	if ($debug_fmt) {
2272	0					0	print( $_debugh " _guess_time_format(): found time in '$v'\n" );
2273	0					0	print( $_debugh " Initial format: '$format'\n" );
2274	0					0	print( $_debugh " Increased format: '$increased_format'\n" );
2275							}
2276
2277	254					1360	return [ $increased_format, $locale, $parser_of_increased_format ];
2278							}
2279	1358					42918	return;
2280							}
2281
2282							# * ********************************* *
2283							# * END OF DATE FORMAT DETECTION CODE *
2284							# * ********************************* *
2285
2286							# Take the string of a header in $_ and replace it with the corresponding field
2287							# name
2288							sub _header_to_field_name {
2289	1265			1265		2011	$_ = remove_accents($_);
2290	1265					2483	s/[^[:alnum:]_]//gi;
2291	1265					6845	s/^.*$/\U$&/;
2292							}
2293
2294							# Return 0 if error, 1 if all good
2295							sub _S2_init_fields_from_header {
2296	355			355		617	my $self = shift;
2297
2298	355					613	my $has_headers = $self->{has_headers};
2299	355					613	my $_debug = $self->{_debug};
2300	355					570	my $_debugh = $self->{_debugh};
2301
2302	355					827	my $in_file_disp = $self->get_in_file_disp();
2303
2304	355					603	my $inh = $self->{_inh};
2305	355					572	my $incsv = $self->{_in_csvobj};
2306
2307	355					626	$self->{_row_read} = 0;
2308
2309	355					581	my @columns;
2310							my @headers;
2311	355	100				740	if ($has_headers) {
2312
2313							print( $_debugh
2314							"$PKG: '$in_file_disp': will parse header line to get column names\n"
2315	344	50				793	) if $self->{_debug_read};
2316
2317	344					558	$self->{_row_read}++;
2318
2319							print( $_debugh "$PKG: '$in_file_disp': will read line #"
2320							. $self->{_row_read}
2321							. "\n" )
2322	344	50				716	if $self->{_debug_read};
2323
2324	344	50				790	if ( defined( $self->{_inh_header} ) ) {
2325	0					0	my $l = $self->{_inh_header};
2326	0					0	my $inmemh;
2327	0	0				0	if ( !open( $inmemh, '<', \$l ) ) { ## no critic (InputOutput::RequireBriefOpen)
2328	0					0	$self->_print_error(
2329							"can't open header line in-memory. CSV read aborted.");
2330	0					0	return 0;
2331							}
2332	0					0	@headers = @{ _mygetline( $incsv, $inmemh ) };
	0					0
2333							}
2334							else {
2335	344					705	my $r = _mygetline( $incsv, $inh );
2336	342	50				15499	@headers = @{$r} if defined($r);
	342					1168
2337							}
2338							print( $_debugh "Line "
2339							. $self->{_row_read}
2340							. ":\n--\n"
2341							. join( '::', @headers )
2342							. "\n--\n" )
2343	342	50				1068	if $self->{_debug_read};
2344							}
2345
2346	353	100	100			1513	if ( $has_headers and ( !defined $self->{fields_column_names} ) ) {
2347	336					545	my %indexes;
2348	336	100				782	if ( defined( $self->{fields_hr} ) ) {
2349	12	50				56	if (
2350							!$self->_process_header(
2351							\@headers, $self->{fields_hr}, \%indexes
2352							)
2353							)
2354							{
2355	0					0	$self->_print_error("missing headers. CSV read aborted.");
2356	0					0	return 0;
2357							}
2358	12	50				34	if ($_debug) {
2359	0					0	print( $_debugh " \%indexes:\n" );
2360	0					0	for my $k ( sort keys %indexes ) {
2361	0					0	print( $_debugh " \t$k => $indexes{$k}\n" );
2362							}
2363							}
2364	12					68	for ( sort keys %indexes ) {
2365	48	50				90	next if $_ eq '';
2366	48					95	$columns[ $indexes{$_} ] = $_;
2367							}
2368							}
2369							else {
2370	324					772	@columns = @headers;
2371	324					632	map { _header_to_field_name } @columns;
	1265					7838
2372							}
2373							}
2374
2375	14					46	@columns = @{ $self->{fields_column_names} }
2376	353	100				1107	if defined( $self->{fields_column_names} );
2377
2378							# Avoid undef in column names... I prefer empty strings
2379	353	100				685	@columns = map { defined($_) ? $_ : '' } @columns;
	1389					3204
2380
2381	353	50				841	if ($_debug) {
2382	0					0	print( $_debugh "-- CSV headers management\n" );
2383	0	0				0	if (@columns) {
2384	0					0	printf( $_debugh " %-3s %-40s %-40s\n",
2385							'COL', 'CSV Header', 'Hash Key' );
2386	0					0	for my $i ( 0 .. $#columns ) {
2387	0					0	my $h = '';
2388	0	0				0	$h = $headers[$i] if defined( $headers[$i] );
2389	0					0	printf( $_debugh " %03d %-40s %-40s\n",
2390							$i, "'$h'", "'$columns[$i]'" );
2391							}
2392							}
2393							else {
2394	0					0	print( $_debugh " No headers\n" );
2395							}
2396							}
2397
2398	353					511	my %regular_named_fields;
2399	353					1027	for my $i ( 0 .. $#columns ) {
2400	1389	100	66			5341	$regular_named_fields{ $columns[$i] } = $i
2401							if defined( $columns[$i] )
2402							and $columns[$i] ne '';
2403							}
2404	353					1773	$self->{_regular_named_fields} = {%regular_named_fields};
2405	353					1243	$self->{_S2_columns} = [@columns];
2406	353	100				1256	$self->{_S2_headers} = [@headers] if $has_headers;
2407
2408	353					1363	return 1;
2409							}
2410
2411							sub out_header {
2412	8			8	1	17	my $self = shift;
2413	8					108	validate_pos( @_, { type => SCALAR }, { type => SCALAR } );
2414
2415	8					34	my ( $field, $header ) = @_;
2416	8	100				25	$self->{_out_headers} = {} unless exists $self->{_out_headers};
2417
2418							$self->_print_warning("out_header: field $field already set")
2419	8	50				23	if exists $self->{_out_headers}->{$field};
2420
2421	8					21	$self->{_out_headers}->{$field} = $header;
2422
2423	8					35	return $self;
2424							}
2425
2426							# Return 0 if error, 1 if all good
2427							sub _S3_init_fields_extra {
2428	355			355		591	my $self = shift;
2429
2430	355					633	my $_debug = $self->{_debug};
2431	355					591	my $_debugh = $self->{_debugh};
2432
2433	355					564	my $verbose = $self->{verbose};
2434
2435	355					546	my $has_headers = $self->{has_headers};
2436
2437	355					471	my %named_fields = %{ $self->{_regular_named_fields} };
	355					1619
2438	355					730	my @columns = @{ $self->{_S2_columns} };
	355					917
2439	355					554	my @headers;
2440	355	100				798	@headers = @{ $self->{_S2_headers} } if $has_headers;
	344					856
2441
2442	355					629	my @extra_fields_indexes;
2443							my @extra_fields_definitions_list;
2444	36					70	@extra_fields_definitions_list = @{ $self->{_extra_fields} }
2445	355	100				808	if exists $self->{_extra_fields};
2446	355					579	my %extra_fields_definitions;
2447
2448							my @multiline;
2449	355	100				843	@multiline = @{ $self->{_multiline} } if defined( $self->{_multiline} );
	340					641
2450
2451	355					502	my @coldata;
2452	355					819	for my $i ( 0 .. $#columns ) {
2453	1395					46899	my $col = $columns[$i];
2454	1395					1771	my $h;
2455	1395	100				2637	$h = $headers[$i] if $has_headers;
2456	1395	100				23846	push @coldata,
2457							ColData->new(
2458							field_name => $col,
2459							header_text => $h,
2460							description => '',
2461							multiline => ( $multiline[$i] ? 'm' : '1' )
2462							);
2463							}
2464
2465	355					15094	for my $edef (@extra_fields_definitions_list) {
2466	92					3788	my $c = $edef->check_field_existence;
2467	92	100				612	if ( defined($c) ) {
2468	80	100				178	unless ( exists $named_fields{$c} ) {
2469	9					112	$self->_print_error(
2470							"unknown field '" . $edef->check_field_existence . "'",
2471							0, ERR_UNKNOWN_FIELD, {%named_fields} );
2472	8					21	next;
2473							}
2474							}
2475
2476	83					139	my @e_eclated = $edef;
2477
2478	83	100	100			970	if ( $edef->ef_type == $EF_LINK and $edef->link_remote_read eq '*' ) {
2479	2					68	my @cols = $edef->link_remote_obj->get_fields_names();
2480
2481	2					4	@e_eclated = ();
2482	2					8	my %nf = %named_fields;
2483
2484	2					5	for my $c (@cols) {
2485
2486	4					59	my $ex_base = $edef->self_name . $c;
2487	4					27	my $ex_target = $ex_base;
2488	4					7	my $i = 1;
2489	4					11	while ( exists $nf{$ex_target} ) {
2490	1					2	$i++;
2491	1					5	$ex_target = $ex_base . '_' . $i;
2492							}
2493
2494	4					50	my $e = ExtraField->new(
2495							ef_type => $EF_LINK,
2496							self_name => $ex_target,
2497							description => $edef->description . " ($c)",
2498
2499							link_self_search => $edef->link_self_search,
2500							link_remote_obj => $edef->link_remote_obj,
2501							link_remote_search => $edef->link_remote_search,
2502							link_remote_read => $c,
2503
2504							link_vlookup_opts => $edef->link_vlookup_opts
2505							);
2506	4					354	push @e_eclated, $e;
2507	4					14	$nf{$ex_target} = undef;
2508							}
2509							}
2510
2511	83					1166	for my $e1 (@e_eclated) {
2512	85	100				1126	if ( exists $named_fields{ $e1->self_name } ) {
2513	6					101	$self->_print_error( "extra field: duplicate field name: '"
2514							. $e1->self_name
2515							. "'" );
2516	6					17	next;
2517							}
2518
2519	79					523	my $index_of_new_element = $#columns + 1;
2520	79					117	push @extra_fields_indexes, $index_of_new_element;
2521	79					925	$columns[$index_of_new_element] = $e1->self_name;
2522	79					1304	$named_fields{ $e1->self_name } = $index_of_new_element;
2523	79					1248	$extra_fields_definitions{ $e1->self_name } = $e1;
2524
2525	79	50				1325	push @headers, $e1->self_name if $has_headers;
2526	79					1244	push @coldata,
2527							ColData->new(
2528							field_name => $e1->self_name,
2529							header_text => $e1->self_name,
2530							description => $e1->description,
2531							multiline => '?'
2532							);
2533							}
2534
2535							}
2536	354	100				3314	$self->{_headers} = [@headers] if $has_headers;
2537	354					770	$self->{_extra_fields_indexes} = [@extra_fields_indexes];
2538	354					1075	$self->{_columns} = [@columns];
2539	354					858	$self->{_extra_fields_definitions} = {%extra_fields_definitions};
2540
2541	354					1651	$self->{_named_fields} = {%named_fields};
2542
2543	354					1269	$self->_detect_dates_formats();
2544
2545	351					1478	$self->{_read_update_after_ar} = [];
2546	351					931	$self->{_write_update_before_ar} = [];
2547	351					508	my @dates_formats;
2548	60					231	@dates_formats = @{ $self->{_dates_formats} }
2549	351	100				843	if defined( $self->{_dates_formats} );
2550	351					958	for my $i ( 0 .. $#columns ) {
2551	1455					2097	my $dt_format;
2552							my $dt_locale;
2553	1455	100				2652	if ( defined( $dates_formats[$i] ) ) {
2554	330					598	$dt_format = $dates_formats[$i]->[0];
2555	330					506	$dt_locale = $dates_formats[$i]->[1];
2556							}
2557	1455					21525	$coldata[$i]->dt_format($dt_format);
2558	1455					25988	$coldata[$i]->dt_locale($dt_locale);
2559
2560	1455	100				9777	next unless defined($dt_format);
2561
2562	330					449	my %opts_in;
2563	330	100	66			1077	$opts_in{locale} = $dt_locale
2564							if defined($dt_locale)
2565							and $dt_locale ne '';
2566
2567	330					4295	my $obj_strptime_in =
2568							_build_strptime_parser( pattern => $dt_format, %opts_in );
2569
2570	330					74967	my %opts_out;
2571							my $loc_out = (
2572							exists $self->{out_dates_locale}
2573							? $self->{out_dates_locale}
2574	330	50				786	: $dt_locale
2575							);
2576	330	100	66			1285	$opts_out{locale} = $loc_out if defined($loc_out) and $loc_out ne '';
2577							my $obj_strptime_out = _build_strptime_parser(
2578							pattern => (
2579							exists $self->{out_dates_format}
2580							? $self->{out_dates_format}
2581	330	50				4947	: $dt_format
2582							),
2583							%opts_out
2584							);
2585
2586	330					6947	my $refsub_is_datetime_empty = $self->{_refsub_is_datetime_empty};
2587	330					849	my $in_file_disp = $self->get_in_file_disp();
2588
2589							$self->{_read_update_after_ar}->[$i] = sub {
2590							return
2591	352	100	66	352		1657	if ( !defined $_ )
			33
			66
2592							or $_ eq ''
2593							or ( defined($refsub_is_datetime_empty)
2594							and $refsub_is_datetime_empty->($_) );
2595
2596	315					447	my $s = $_[0];
2597	315					603	my $field = _get_def( $_[1], '<?>' );
2598
2599	315					905	my $dt = $obj_strptime_in->parse_datetime($_);
2600
2601	315	0	33			167140	if ( $_debug
			33
2602							and $DEBUG_DATETIME_FORMATS
2603							and $DEBUG_DATETIME_FORMATS_EVEN_MORE )
2604							{
2605	0	0				0	print( $_debugh "-- Record "
2606							. $s->get_recnum()
2607							. ", field '$field':\n String parsed: '$_'\n"
2608							. " Parse format: '$dt_format'\n"
2609							. " DateTime obj: <"
2610							. ( defined($dt) ? $dt . '' : 'undef' )
2611							. ">\n" );
2612							}
2613
2614	315	100				643	if ( !defined($dt) ) {
2615	2					10	my $recnum = $s->get_recnum();
2616	2	50				6	if ($verbose) {
2617	0					0	$s->_print( "$PKG: "
2618							. "$in_file_disp: record $recnum: field $field: "
2619							. "unable to parse DateTime\n" );
2620	0					0	$s->_print("$PKG: field: '$_'\n");
2621	0					0	$s->_print("$PKG: format: '$dt_format'\n");
2622	0	0				0	$s->_print( "$PKG: "
2623							. "locale: '"
2624							. ( $dt_locale eq '' ? '<none>' : $dt_locale )
2625							. "'\n" );
2626	0					0	$s->_print( "$PKG: "
2627							. "Probable cause: when detecting DateTime format, "
2628							. "$PKG will stop reading\n" );
2629	0					0	$s->_print( "$PKG: "
2630							. "input as soon as the format is worked out. "
2631							. "If a value found later\n" );
2632	0					0	$s->_print( "$PKG: "
2633							. "turns out to use another DateTime format, "
2634							. "it'll generate a DateTime\n" );
2635	0					0	$s->_print("$PKG: parse error, as is the case now.\n");
2636	0					0	$s->_print_error("unable to parse DateTime");
2637							}
2638							else {
2639	2					18	$s->_print_error(
2640							"$in_file_disp: record $recnum: field $field: "
2641							. "unable to parse DateTime '$_'" );
2642							}
2643							}
2644
2645	314					545	return $dt;
2646	330					2241	};
2647							$self->{_write_update_before_ar}->[$i] = sub {
2648	96	100		96		181	return '' unless defined($_);
2649	87	100				188	return $_ if !ref($_);
2650	81	50				231	return $_ unless $_->isa('DateTime');
2651
2652	81					259	my $str = $obj_strptime_out->format_datetime($_);
2653
2654	81	50				14831	if ( !defined($str) ) {
2655	0					0	my $s = $_[0];
2656	0					0	my $recnum = $s->get_recnum();
2657	0					0	my $field = _get_def( $_[1], '<?>' );
2658	0					0	$s->_print_error(
2659							"$in_file_disp: record $recnum: field $field: "
2660							. "unable to print DateTime '$_'" );
2661							}
2662
2663	81					164	return $str;
2664	330					1479	};
2665							}
2666
2667	351					1352	$self->{_coldata} = [@coldata];
2668
2669	351					1321	my @loop = (
2670							[ '_read_update_after_hr', '_read_update_after_ar', 'read post' ],
2671							[ '_write_update_before_hr', '_write_update_before_ar', 'write pre' ]
2672							);
2673	351					955	for my $ii ( 0 .. $#loop ) {
2674	702					1074	my $l = $loop[$ii];
2675
2676	702					1283	my $ht = $self->{ $l->[0] };
2677	702					907	my @subrefs = @{ $self->{ $l->[1] } };
	702					1421
2678	702					953	for my $field ( keys %{$ht} ) {
	702					1503
2679	70	50				136	unless ( exists $named_fields{$field} ) {
2680	0					0	$self->_print_error( $l->[2] . ": unknown field '$field'",
2681							0, ERR_UNKNOWN_FIELD, {%named_fields} );
2682	0					0	next;
2683							}
2684
2685	70					93	my $i = $named_fields{$field};
2686
2687	70					106	my @allsubs;
2688	70					86	push @allsubs, @{ $ht->{$field} };
	70					119
2689	70	100				128	if ( defined( $subrefs[$i] ) ) {
2690	2	50				6	unshift @allsubs, $subrefs[$i] if $ii == 0;
2691	2	50				4	push @allsubs, $subrefs[$i] if $ii == 1;
2692							}
2693
2694							my $finalsub = sub {
2695	196			196		253	for my $s (@allsubs) {
2696	262					802	$_ = $s->(@_);
2697							}
2698	189					3462	return $_;
2699	70					198	};
2700	70					138	$subrefs[$i] = $finalsub;
2701
2702							}
2703	702					1859	$self->{ $l->[1] } = [@subrefs];
2704							}
2705
2706	351					601	my $tmp;
2707	351					1337	$tmp = _get_def( $self->{out_fields}, $self->{write_fields} );
2708	351					826	my @wf;
2709	351	100				817	@wf = @$tmp if defined $tmp;
2710	351					571	my $count_field_not_found = 0;
2711	351					770	for (@wf) {
2712	16	100	66			102	next if ( !defined $_ ) or $_ eq '' or exists $named_fields{$_};
			100
2713	3					6	$count_field_not_found++;
2714	3					25	$self->_print_error( "out_fields: unknown field '$_'",
2715							1, ERR_UNKNOWN_FIELD, {%named_fields} );
2716							}
2717	351	100				751	if ($count_field_not_found) {
2718	2					18	$self->_print_error("non existent field(s) encountered");
2719	1					10	delete $self->{out_fields};
2720	1					4	delete $self->{write_fields};
2721							}
2722
2723	350					507	my %sh;
2724	350	100				819	%sh = %{ $self->{_out_headers} } if defined( $self->{_out_headers} );
	4					19
2725	350					559	$count_field_not_found = 0;
2726	350					837	for ( keys %sh ) {
2727	8	100	33			47	next if ( !defined $_ ) or $_ eq '' or exists $named_fields{$_};
			66
2728	2					3	$count_field_not_found++;
2729	2					13	$self->_print_error( "out_header: unknown field '$_'",
2730							1, ERR_UNKNOWN_FIELD, {%named_fields} );
2731							}
2732	350	100				668	$self->_print_error("non existent field(s) encountered")
2733							if $count_field_not_found;
2734
2735	349					2311	return 1;
2736							}
2737
2738							#
2739							# Return 0 if there's no more records (error or eof reached), 1 if a record got
2740							# read successfully.
2741							#
2742							# If return value is 1:
2743							# $$ref_ar and $$ref_hr are set to array ref and hash ref of the record,
2744							# respectively
2745							#
2746							# If return value is 0:
2747							# $$ref_ar and $$ref_hr are set to undef if an error occured
2748							# $$ref_ar and $$ref_hr are set to a scalar if eof reached
2749							#
2750							sub _read_one_record_from_input {
2751	2341			2341		4250	my ( $self, $ref_ar, $ref_row_hr ) = @_;
2752
2753	2341					3549	my $_debug = $self->{_debug};
2754	2341					3369	my $_debug_extra_fields = $self->{_debug_extra_fields};
2755	2341					3050	my $_debugh = $self->{_debugh};
2756
2757	2341					4328	my $in_file_disp = $self->get_in_file_disp();
2758
2759	2341					3477	my $incsv = $self->{_in_csvobj};
2760	2341					2822	my $ar;
2761
2762							print( $_debugh "$PKG: '$in_file_disp': will read line #"
2763							. ( $self->{_row_read} + 1 )
2764							. "\n" )
2765	2341	50				4048	if $self->{_debug_read};
2766
2767	2341	100				4056	unless ( $ar = _mygetline( $incsv, $self->{_inh} ) ) {
2768	286	50				10007	if ( !$incsv->eof() ) {
2769	0					0	my ( $code, $str, $pos ) = $incsv->error_diag();
2770	0					0	$self->_print_error( "$code: $str, record "
2771							. $incsv->record_number
2772							. ", position $pos" );
2773	0					0	$$ref_ar = undef;
2774	0					0	$$ref_row_hr = undef;
2775							}
2776							else {
2777	286					1781	$$ref_ar = 1;
2778	286					683	$$ref_row_hr = 1;
2779							}
2780
2781	286					861	$self->_close_inh();
2782
2783	286					944	return 0;
2784							}
2785
2786	2055					58344	$self->{_row_read}++;
2787
2788	2055					2789	my %named_fields = %{ $self->{_named_fields} };
	2055					9074
2789
2790	2055	50				4801	if ( $self->{_debug_read} ) {
2791	0					0	print( $_debugh "Line " . $self->{_row_read} . ":\n--\n" );
2792	0					0	for ( sort keys %named_fields ) {
2793	0					0	my $c = _get_def( $ar->[ $named_fields{$_} ], '<undef>' );
2794	0					0	print( $_debugh " $_ => '" . $c . "'\n" );
2795							}
2796							}
2797
2798	2055					2949	my $columns_ar = $self->{_columns};
2799
2800	2055					2871	my $no_undef = $self->{no_undef};
2801	2055	100				3325	if ($no_undef) {
2802	30					37	for ( 0 .. $#{$columns_ar} ) {
	30					68
2803	324	100				575	$ar->[$_] = '' unless defined( $ar->[$_] );
2804							}
2805							}
2806
2807	2055					2988	my $row_hr = {};
2808							$row_hr->{$_} = $ar->[ $self->{_regular_named_fields}->{$_} ]
2809	2055					2751	foreach keys %{ $self->{_regular_named_fields} };
	2055					12825
2810
2811	2055					4016	my $rpost = $self->{_read_update_after_ar};
2812	2055					2729	for my $i ( 0 .. $#{$columns_ar} ) {
	2055					4431
2813	10586					13242	my $subref = $rpost->[$i];
2814	10586	100				17051	next unless defined($subref);
2815
2816	479					545	do {
2817	479					650	my $field = $columns_ar->[$i];
2818	479					746	local $_ = $ar->[$i];
2819	479					820	my $new_val = $subref->( $self, $field );
2820	474					820	$ar->[$i] = $new_val;
2821	474	50				1515	$row_hr->{$field} = $new_val if defined($field);
2822							}
2823
2824							}
2825
2826	2050					2815	for my $i ( @{ $self->{_extra_fields_indexes} } ) {
	2050					3742
2827	442					599	my $name = $columns_ar->[$i];
2828	442					628	my $e = $self->{_extra_fields_definitions}->{$name};
2829
2830	442	50				714	print( $_debugh "Extra field: #$i: $name\n" ) if $_debug_extra_fields;
2831
2832	442					483	my $value;
2833	442	100				6481	if ( $e->ef_type == $EF_LINK ) {
		100
		50
2834
2835	238	50				1592	print( $_debugh " linked field\n" ) if $_debug_extra_fields;
2836
2837	238					2874	my $remobj = $e->link_remote_obj;
2838							$value =
2839							$remobj->vlookup( $e->link_remote_search,
2840	238					3799	$ar->[ $named_fields{ $e->link_self_search } ],
2841							$e->link_remote_read, $e->link_vlookup_opts );
2842
2843							}
2844							elsif ( $e->ef_type == $EF_FUNC ) {
2845
2846	51	50				1085	print( $_debugh " computed field\n" ) if $_debug_extra_fields;
2847
2848	51					622	$value = $e->func_sub->( $name, $row_hr, $self->{_stats} );
2849
2850							}
2851							elsif ( $e->ef_type == $EF_COPY ) {
2852
2853	153	50				5323	print( $_debugh " copy field\n" ) if $_debug_extra_fields;
2854
2855	153					1768	my $input = $row_hr->{ $e->copy_source };
2856	153	50	33			988	$input = '' if ( !defined $input ) and $no_undef;
2857	153	100				1709	if ( defined( $e->copy_sub ) ) {
2858	57					347	local $_ = $input;
2859	57					641	$value = $e->copy_sub->();
2860							}
2861							else {
2862	96					576	$value = $input;
2863							}
2864
2865	153	50				868	print( $_debugh " in: '$input', out: '$value'\n" )
2866							if $_debug_extra_fields;
2867
2868							}
2869							else {
2870	0					0	confess "Unknown ef_type '"
2871							. $e->ef_type
2872							. "', check this module' code urgently!";
2873							}
2874
2875	440	100	100			1456	$value = '' if ( !defined $value ) and $no_undef;
2876	440					755	$ar->[$i] = $value;
2877	440					694	$row_hr->{$name} = $value;
2878
2879	440	50				854	print( $_debugh " $name => '$value'\n" ) if $_debug_extra_fields;
2880
2881							}
2882
2883	2048	100				3762	if ( defined( $self->{read_post_update_hr} ) ) {
2884							$self->{read_post_update_hr}
2885	33					78	->( $row_hr, $self->{_stats}, $self->get_recnum() );
2886	33					264	$ar->[ $named_fields{$_} ] = $row_hr->{$_} foreach keys %named_fields;
2887							}
2888
2889	2048	100				5774	lock_keys(%$row_hr) if $self->{croak_if_error};
2890
2891							$self->{walker_ar}->( $ar, $self->{_stats}, $self->get_recnum() )
2892	2048	100				13469	if defined( $self->{walker_ar} );
2893							$self->{walker_hr}->( $row_hr, $self->{_stats}, $self->get_recnum() )
2894	2048	100				4136	if defined( $self->{walker_hr} );
2895
2896	2047					3249	$$ref_ar = $ar;
2897	2047					4093	$$ref_row_hr = $row_hr;
2898
2899	2047					5928	return 1;
2900							}
2901
2902							sub _open_read {
2903	297			297		476	my $self = shift;
2904
2905	297					499	my $verbose = $self->{verbose};
2906	297					700	my $in_file_disp = $self->get_in_file_disp();
2907
2908	297					687	$self->{_stats} = {};
2909	297					531	$self->{_read_in_progress} = 1;
2910
2911	297	50				739	$self->_print("-- $in_file_disp reading start\n") if $verbose;
2912							}
2913
2914							sub _close_read {
2915	316			316		490	my $self = shift;
2916	316					441	my $keep_quiet = shift;
2917
2918	316					505	my $verbose = $self->{verbose};
2919	316					623	my $in_file_disp = $self->get_in_file_disp();
2920
2921	316					568	$self->{_read_in_progress} = 0;
2922
2923	316	50	33			721	if ( $verbose and ( !$keep_quiet ) ) {
2924							$self->_print( "-- $in_file_disp reading end: "
2925							. $self->{_row_read}
2926	0					0	. " row(s) read\n" );
2927	0					0	for my $k ( sort keys %{ $self->{_stats} } ) {
	0					0
2928	0					0	$self->_printf( " %7d %s\n", $self->{_stats}->{$k}, $k );
2929							}
2930							}
2931
2932	316					617	$self->{_nb_rows} = $self->{_row_read};
2933							}
2934
2935							# Return 0 if error, 1 if all good
2936							sub _S4_read_all_in_mem {
2937	167			167		267	my $self = shift;
2938
2939	167					469	$self->_register_pass("_S4_read_all_in_mem()");
2940
2941	167					486	$self->_open_read();
2942
2943	167					259	my $ar;
2944							my $row_hr;
2945	167					498	while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) {
2946
2947	1553					2124	push @{ $self->{_flat} }, $ar;
	1553					4335
2948
2949							}
2950
2951	165	50				451	my $retcode = ( defined($ar) ? 1 : 0 );
2952	165					482	$self->_update_in_mem_record_count();
2953
2954	165					456	$self->_close_read();
2955
2956	165					409	return $retcode;
2957							}
2958
2959							sub _chain_array {
2960	24			24		140	return split( /\s->\s/, $_[0] );
2961							}
2962
2963							sub _chain_str {
2964	2			2		6	return join( '->', @_ );
2965							}
2966
2967							sub field_add_link {
2968	25			25	1	4901	my $self = shift;
2969
2970	25					756	validate_pos(
2971							@_,
2972							{ type => UNDEF \| SCALAR },
2973							{ type => SCALAR },
2974							{ type => SCALAR \| OBJECT },
2975							{ type => HASHREF, optional => 1 }
2976							);
2977
2978	22					95	my ( $new_field, $chain, $obj, $param_opts ) = @_;
2979
2980	22					39	my $croak_if_error = $self->{croak_if_error};
2981	22					36	my $_debug = $self->{_debug};
2982	22					34	my $_debugh = $self->{_debugh};
2983
2984	22					51	my @c = _chain_array($chain);
2985	22	100				52	$new_field = $c[2] unless defined($new_field);
2986
2987	22	50				47	print( $_debugh
2988							"Registering new linked field, new_field = '$new_field', chain = '$chain'\n"
2989							) if $_debug;
2990
2991	22	100	66			101	unless ( @c == 3 and $c[2] ne '' ) {
2992	1					7	$self->_print_error( "wrong links chain parameter: '$chain', "
2993							. "look for CHAIN in Text::AutoCSV manual for help" );
2994	1					9	return;
2995							}
2996
2997	21	50				52	return unless $self->_status_forward('S2');
2998	21	50				52	return unless $self->_status_backward('S2');
2999
3000	21					36	my @tmp;
3001	21	100				47	@tmp = %{$param_opts} if $param_opts;
	13					42
3002	21					479	my %opts = validate( @tmp, $SEARCH_VALIDATE_OPTIONS );
3003
3004	20					73	my $target_name = '';
3005	20	100				46	if ( ref $obj eq '' ) {
3006	19					27	my $in_file = $obj;
3007	19					27	$target_name = $in_file;
3008
3009							#
3010							# TODO (?)
3011							#
3012							# Take into account the fact that the OS' file system is case insensitive. At
3013							# the moment, two different strings (even if identical in a case insensitive
3014							# comparison) will be managed as being distinct.
3015							# I put a question mark in this TO DO - after all, the user of this module had
3016							# better use same case when dealing with multiple links of the same file.
3017							#
3018							# Also, tuning this module' behavior depending on the OS' characteristics
3019							# would be not ideal, it'd add a level of complexity to understand how it
3020							# works and what to expect.
3021							#
3022	19	100	100			72	if ( exists $self->{_obj} and exists $self->{_obj}->{$in_file} ) {
3023
3024	4	50				9	print( $_debugh
3025							"field_add_link: file '$in_file': re-using existing Text::AutoCSV object\n"
3026							) if $_debug;
3027
3028	4					7	$obj = $self->{_obj}->{$in_file};
3029							}
3030							else {
3031
3032	15	50				31	print( $_debugh
3033							"field_add_link: file '$in_file': creating new Text::AutoCSV object\n"
3034							) if $_debug;
3035
3036	15	100				46	$self->{_obj} = {} unless exists $self->{_obj};
3037
3038							#
3039							# The created Text::AutoCSV must be created with the same search options as what
3040							# is currently found in $self.
3041							#
3042							# Why?
3043							# Because the link is populated doing a vlookup on the remote object ($obj
3044							# below), not on $self. Therefore, if we don't "propagate" search options from
3045							# $self to $obj, search tunnings won't work as user would expect.
3046							#
3047	15					20	my %search_opts;
3048	15					30	for (
3049							qw(search_case search_trim search_ignore_empty
3050							search_ignore_accents search_value_if_not_found
3051							search_value_if_ambiguous search_ignore_ambiguous)
3052							)
3053							{
3054							# We assign depending on whether or not the attribute EXISTS - the definedness
3055							# is not appropriate, in case an attribute would have been assigned to undef.
3056	105	100				179	$search_opts{$_} = $self->{$_} if exists $self->{$_};
3057							}
3058
3059							$obj = Text::AutoCSV->new(
3060							in_file => $in_file,
3061							verbose => $self->{verbose},
3062							infoh => $self->{infoh},
3063							_debug => $self->{debug},
3064							_debugh => $self->{debugh},
3065	15					95	%search_opts
3066							);
3067	15					55	$self->{_obj}->{$in_file} = $obj;
3068							}
3069							}
3070							else {
3071	1					57	$target_name = '(object)';
3072	1	50				5	print( $_debugh "field_add_link: Text::AutoCSV object provided\n" )
3073							if $_debug;
3074							}
3075
3076	20	100				55	$self->{_extra_fields} = [] unless exists $self->{_extra_fields};
3077
3078	20					30	push @{ $self->{_extra_fields} }, ExtraField->new(
	20					450
3079							ef_type => $EF_LINK,
3080							self_name => $new_field,
3081							description => "link: $target_name, chain: $chain",
3082							check_field_existence => $c[0],
3083
3084							link_self_search => $c[0],
3085							link_remote_obj => $obj,
3086							link_remote_search => $c[1],
3087							link_remote_read => $c[2],
3088
3089							link_vlookup_opts => \%opts
3090							);
3091
3092	20					2172	return $self;
3093							}
3094
3095							sub links {
3096	2			2	1	5	my $self = shift;
3097
3098	2					31	validate_pos(
3099							@_,
3100							{ type => UNDEF \| SCALAR },
3101							{ type => SCALAR },
3102							{ type => SCALAR \| OBJECT },
3103							{ type => HASHREF, optional => 1 }
3104							);
3105
3106	2					7	my $prefix_field = shift;
3107	2					4	my $chain = shift;
3108	2					4	my ( $obj, $param_opts ) = @_;
3109
3110	2					6	my @c = _chain_array($chain);
3111
3112	2	50	33			57	if ( @c != 2 or $c[0] eq '' or $c[1] eq '' ) {
			33
3113	0					0	$self->_print_error( "wrong links chain parameter: '$chain', "
3114							. "look for JOINCHAIN in Text::AutoCSV manual for help" );
3115	0					0	return;
3116							}
3117
3118	2	100				8	$prefix_field = '' unless defined($prefix_field);
3119	2					6	my $chain2 = _chain_str( @c, '*' );
3120
3121	2					8	return $self->field_add_link( $prefix_field, $chain2, @_ );
3122							}
3123
3124							sub field_add_computed {
3125	7			7	1	2410	my $self = shift;
3126
3127	7					202	validate_pos( @_, { type => SCALAR }, { type => CODEREF } );
3128	6					24	my ( $new_field, $func ) = @_;
3129
3130	6					15	my $croak_if_error = $self->{croak_if_error};
3131
3132	6					9	my $_debug = $self->{_debug};
3133	6					13	my $_debugh = $self->{_debugh};
3134
3135	6	50				19	print( $_debugh
3136							"Registering new computed field, new_field = '$new_field'\n" )
3137							if $_debug;
3138
3139	6	100				18	return unless $self->_status_forward('S2');
3140	5	50				14	return unless $self->_status_backward('S2');
3141
3142	5					10	push @{ $self->{_extra_fields} }, ExtraField->new(
	5					108
3143							ef_type => $EF_FUNC,
3144							self_name => $new_field,
3145							description => "computed",
3146
3147							func_sub => $func
3148							);
3149
3150	5					449	return $self;
3151							}
3152
3153							sub field_add_copy {
3154	14			14	1	8161	my $self = shift;
3155
3156	14					199	validate_pos(
3157							@_,
3158							{ type => SCALAR },
3159							{ type => SCALAR },
3160							{ type => CODEREF, optional => 1 }
3161							);
3162	14					56	my ( $new_field, $copy_source, $func ) = @_;
3163
3164	14					29	my $croak_if_error = $self->{croak_if_error};
3165
3166	14					25	my $_debug = $self->{_debug};
3167	14					25	my $_debugh = $self->{_debugh};
3168
3169	14	50				37	print( $_debugh
3170							"Registering field copy, new_field = '$new_field' copied from '$copy_source'\n"
3171							) if $_debug;
3172
3173	14	100				31	return unless $self->_status_forward('S2');
3174	12	50				45	return unless $self->_status_backward('S2');
3175
3176	12	100				24	push @{ $self->{_extra_fields} }, ExtraField->new(
	12					284
3177							ef_type => $EF_COPY,
3178							self_name => $new_field,
3179							description => "copy of $copy_source "
3180							. ( defined($func) ? '(with sub)' : '(no sub)' ),
3181							check_field_existence => $copy_source,
3182
3183							copy_source => $copy_source,
3184							copy_sub => $func
3185							);
3186
3187	12					1058	return $self;
3188							}
3189
3190							sub in_map {
3191	15			15	1	1355	my $self = shift;
3192
3193	15					36	return $self->read_update_after(@_);
3194							}
3195
3196							sub read_update_after {
3197	16			16	1	25	my $self = shift;
3198	16					191	validate_pos( @_, { type => SCALAR }, { type => CODEREF } );
3199
3200	16					52	my ( $field, $subref ) = @_;
3201
3202	16					31	my $_debug = $self->{_debug};
3203	16					23	my $_debugh = $self->{_debugh};
3204
3205	16	50				33	return unless $self->_status_forward('S2');
3206	16	50				36	return unless $self->_status_backward('S2');
3207
3208	16	50				36	print( $_debugh "Registering read_post_update subref for field '$field'\n" )
3209							if $_debug;
3210
3211							$self->{_read_update_after_hr}->{$field} = []
3212	16	100				50	unless defined( $self->{_read_update_after_hr}->{$field} );
3213
3214	16					20	push @{ $self->{_read_update_after_hr}->{$field} }, $subref;
	16					42
3215
3216	16					169	return $self;
3217							}
3218
3219							sub out_map {
3220	11			11	1	19	my $self = shift;
3221
3222	11					27	return $self->write_update_before(@_);
3223							}
3224
3225							sub write_update_before {
3226	12			12	1	17	my $self = shift;
3227	12					124	validate_pos( @_, { type => SCALAR }, { type => CODEREF } );
3228
3229	12					42	my ( $field, $subref ) = @_;
3230
3231	12					20	my $_debug = $self->{_debug};
3232	12					19	my $_debugh = $self->{_debugh};
3233
3234	12	50				27	return unless $self->_status_forward('S2');
3235	12	50				23	return unless $self->_status_backward('S2');
3236
3237	12	50				25	print( $_debugh "Registering write_pre_update subref for field '$field'\n" )
3238							if $_debug;
3239
3240							$self->{_write_update_before_hr}->{$field} = []
3241	12	100				45	unless defined( $self->{_write_update_before_hr}->{$field} );
3242
3243	12					19	push @{ $self->{_write_update_before_hr}->{$field} }, $subref;
	12					24
3244
3245	12					75	return $self;
3246							}
3247
3248							sub reset_next_record_hr {
3249	189			189	1	310	my $self = shift;
3250
3251	189					973	validate_pos(@_);
3252
3253	189					535	$self->{_current_record} = undef;
3254
3255	189					355	return $self;
3256							}
3257
3258							sub _create_internal_column_name_from_its_number {
3259	1120			1120		2326	return sprintf( "__%04i__", $_[0] );
3260							}
3261
3262							sub _ar_to_hr {
3263	2079			2079		2952	my $self = shift;
3264
3265	2079					15865	validate_pos( @_, { type => ARRAYREF } );
3266
3267	2079					5752	my ($ar) = @_;
3268	2079					2856	my $last_elem_index = scalar( @{$ar} ) - 1;
	2079					3828
3269
3270	2079					3679	my $nr = $self->{_named_fields};
3271	2079					3059	my %h;
3272							my %n_seen;
3273	2079					2700	for ( keys %{$nr} ) {
	2079					6083
3274	9934					19949	$h{$_} = $ar->[ $nr->{$_} ];
3275	9934					18336	undef $n_seen{ $nr->{$_} };
3276							}
3277	2079					4919	for my $i ( 0 .. $last_elem_index ) {
3278	11001	100				19943	if ( !exists( $n_seen{$i} ) ) {
3279	1120					1575	my $k = _create_internal_column_name_from_its_number($i);
3280	1120	50				2895	$h{$k} = $ar->[$i] if !exists $h{$k};
3281							}
3282							}
3283
3284	2079	100				6375	lock_keys(%h) if $self->{croak_if_error};
3285
3286	2079					18557	return \%h;
3287							}
3288
3289							sub get_next_record_hr {
3290	2001			2001	1	3288	my $self = shift;
3291
3292	2001					14019	validate_pos( @_, { type => SCALARREF, optional => 1 } );
3293
3294	2001					4712	my $refkey = $_[0];
3295
3296	2001	50				3754	return unless $self->_status_forward('S4');
3297
3298	1998	100				4304	if ( !defined( $self->{_current_record} ) ) {
3299	186					358	$self->{_current_record} = 0;
3300							}
3301							else {
3302	1812					2721	$self->{_current_record}++;
3303							}
3304
3305	1998					3499	my $ar = $self->{_flat}->[ $self->{_current_record} ];
3306	1998	100				3623	if ( !defined($ar) ) {
3307	186					319	$self->{_current_record} = undef;
3308	186					342	$$refkey = undef;
3309	186					540	return;
3310							}
3311
3312	1812					2768	$$refkey = $self->{_current_record};
3313
3314	1812					3471	return $self->_ar_to_hr($ar);
3315							}
3316
3317							sub read {
3318	60			60	1	20399	my $self = shift;
3319
3320	60					399	validate_pos(@_);
3321
3322	60	50				226	return unless $self->_status_backward('S3');
3323
3324	60	50				151	if ( exists $self->{out_orderby} ) {
3325	0	0				0	return unless $self->_status_forward('S4');
3326	0					0	return $self;
3327							}
3328
3329	60	100				129	return unless $self->_status_forward('S3');
3330
3331	51					178	$self->_register_pass("read()");
3332
3333	51					149	$self->_open_read();
3334
3335	51					88	my $ar;
3336							my $row_hr;
3337	51					170	while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) {
3338
3339							# Ben oui quoi... qu'est-ce que l'on peut bien faire d'autre ?
3340							}
3341
3342	49					166	$self->_close_read();
3343	49	50				126	return unless defined($ar);
3344
3345	49	50				127	return unless $self->_status_reset();
3346
3347	49					246	return $self;
3348							}
3349
3350							#
3351							# Initially, _read_all_in_mem was intended for the test plan.
3352							#
3353							# Turned out to be sometimes useful for user, thus, is no longer private since
3354							# 1.1.5. Private version below is kept for compatibility.
3355							#
3356							sub read_all_in_mem {
3357	1			1	1	4	my $self = shift;
3358
3359	1					6	return $self->_read_all_in_mem();
3360							}
3361
3362							sub _read_all_in_mem {
3363	9			9		1173	my $self = shift;
3364
3365	9	50				29	return 0 unless $self->_status_backward('S3');
3366	9	50				24	return 0 unless $self->_status_forward('S4');
3367
3368	9					99	return $self;
3369							}
3370
3371							sub _render {
3372	0			0		0	my $v = $_[0];
3373
3374	0	0	0			0	if ( length($v) == 1 and ord($v) < 32 ) {
3375	0					0	my $n = ord($v);
3376	0	0				0	return '\n' if $n == 10;
3377	0	0				0	return '\r' if $n == 13;
3378	0	0				0	return '\t' if $n == 9;
3379	0	0				0	return '\f' if $n == 12;
3380	0	0				0	return '\b' if $n == 8;
3381	0	0				0	return '\a' if $n == 7;
3382	0	0				0	return '\e' if $n == 27;
3383	0					0	return '\0' . oct($n);
3384							}
3385	0					0	return $v;
3386							}
3387
3388							sub print_id {
3389	0			0	1	0	my $self = shift;
3390
3391	0					0	$self->_printf( "-- " . $self->get_in_file_disp() . ":\n" );
3392	0					0	$self->_printf(
3393							"sep_char: " . _render( $self->get_sep_char() ) . "\n" );
3394	0					0	$self->_printf(
3395							"escape_char: " . _render( $self->get_escape_char() ) . "\n" );
3396	0					0	$self->_printf(
3397							"in_encoding: " . _render( $self->get_in_encoding() ) . "\n" );
3398	0	0				0	$self->_printf( "is_always_quoted: "
3399							. ( $self->get_is_always_quoted() ? 'yes' : 'no' )
3400							. "\n" );
3401
3402	0					0	my @coldata = $self->get_coldata();
3403	0					0	my @disp = [
3404							'#', 'FIELD',
3405							'HEADER', 'EXT DATA',
3406							'DATETIME FORMAT', 'DATETIME LOCALE',
3407							'ML'
3408							];
3409	0					0	my $NCOLS = @{ $disp[0] };
	0					0
3410	0					0	push @disp, [ map { my $s = $_; $s =~ s/./-/g; $s } @{ $disp[0] } ];
	0					0
	0					0
	0					0
	0					0
3411
3412	0					0	for my $i ( 0 .. $#coldata ) {
3413	0					0	my $col = $coldata[$i];
3414
3415	0					0	my @row = "$i";
3416							push @row, ( defined( $col->[$_] ) ? ( $col->[$_] . '' ) : '' )
3417	0	0				0	for 0 .. $NCOLS - 2;
3418	0					0	for (@row) {
3419	0					0	s/\n/\\n/g;
3420	0					0	s/\r/\\r/g;
3421	0					0	s/\t/\\t/g;
3422							}
3423	0					0	push @disp, [@row];
3424							}
3425	0					0	my $n = @{ $disp[-1] };
	0					0
3426	0					0	my @max = (-1) x $n;
3427	0					0	for my $l (@disp) {
3428	0	0				0	do { $max[$_] = length( $l->[$_] ) if $max[$_] < length( $l->[$_] ) }
3429	0					0	for ( 0 .. $n - 1 );
3430							}
3431	0					0	my $s = join( ' ', map { "%-${_}s" } @max );
	0					0
3432	0					0	$self->_print("\n");
3433	0					0	$self->_printf( "$s\n", @{$_} ) for (@disp);
	0					0
3434							}
3435
3436							sub set_out_file {
3437	2			2	1	3949	my $self = shift;
3438	2					23	validate_pos( @_, { type => SCALAR } );
3439
3440	2					8	my ($out_file) = @_;
3441	2					4	$self->{out_file} = $out_file;
3442
3443	2					9	return $self;
3444							}
3445
3446							# Subrefs set with out_map
3447							sub _execute_write_update_before {
3448	367			367		654	my ( $self, $ar ) = @_;
3449
3450	367					563	my $columns_ar = $self->{_columns};
3451
3452	367					561	my $wpre = $self->{_write_update_before_ar};
3453	367					504	for my $i ( 0 .. $#{$columns_ar} ) {
	367					814
3454	1177					1599	my $subref = $wpre->[$i];
3455	1177	100				2237	next unless defined($subref);
3456
3457	159					174	do {
3458	159					230	local $_ = $ar->[$i];
3459	159					218	my $field = $columns_ar->[$i];
3460	159					280	my $new_val = $subref->( $self, $field );
3461	156					345	$ar->[$i] = $new_val;
3462							}
3463
3464							}
3465							}
3466
3467							# Take into account write_fields if it got set
3468							sub _apply_write_fields {
3469	466			466		808	my ( $self, $ar ) = @_;
3470
3471	466					597	my @final;
3472
3473	466					1461	my $tmp = _get_def( $self->{out_fields}, $self->{write_fields} );
3474	466					865	my @wf;
3475	466	100				823	@wf = @{$tmp} if defined($tmp);
	16					38
3476
3477	466	100				1047	return unless @wf;
3478
3479	16					23	my %named_fields = %{ $self->{_named_fields} };
	16					67
3480	16					103	for my $i ( 0 .. $#wf ) {
3481	40					75	my $field = $wf[$i];
3482	40					59	my $tmp;
3483	40	100	66			158	$tmp = $ar->[ $named_fields{$field} ]
3484							if defined($field)
3485							and $field ne '';
3486
3487							# Put here any post-processing of value
3488							# WARNING
3489							# $tmp can be undef
3490							# ...
3491
3492	40					101	$final[$i] = $tmp;
3493							}
3494	16					70	$_[1] = [@final];
3495							}
3496
3497							sub write {
3498	109			109	1	24409	my $self = shift;
3499
3500	109					737	validate_pos(@_);
3501
3502	109	50				339	return unless $self->_status_forward('S3');
3503
3504	104					223	my $verbose = $self->{verbose};
3505	104					199	my $_debug = $self->{_debug};
3506	104					198	my $_debugh = $self->{_debugh};
3507
3508	104					191	my $out_file = $self->{out_file};
3509
3510	104					151	my %stats;
3511
3512	104	50				209	$self->_print("-- $out_file writing start\n") if $verbose;
3513	104					169	my $rows_written = 0;
3514
3515	104					158	my $outh = $self->{outh};
3516
3517	104					197	$self->{_close_outh_when_finished} = 0;
3518	104	50				236	unless ( defined($outh) ) {
3519	104	50				259	if ( $out_file eq '' ) {
3520	0					0	$outh = \*STDOUT;
3521							}
3522							else {
3523	104	50				163273	unless ( open( $outh, '>', $out_file ) ) { ## no critic (InputOutput::RequireBriefOpen)
3524	0					0	$self->_print_error("unable to open file '$out_file': $!");
3525	0					0	return;
3526							}
3527	104					435	$self->{_close_outh_when_finished} = 1;
3528							}
3529	104					260	$self->{outh} = $outh;
3530							}
3531
3532	104	50				261	unless ( $self->{_leave_encoding_alone} ) {
3533							my $enc = (
3534							defined( $self->{_inh_encoding} )
3535							? $self->{_inh_encoding}
3536	104	50				342	: $DEFAULT_OUT_ENCODING
3537							);
3538
3539							# out_encoding option takes precedence
3540	104	100				295	$enc = $self->{out_encoding} if defined( $self->{out_encoding} );
3541	104					276	my $m = ":encoding($enc)";
3542	104	50	66			276	if ( _is_utf8($enc) and $self->{out_utf8_bom} ) {
3543	0					0	$m .= ':via(File::BOM)';
3544							}
3545
3546	104	50	33			364	if ( $OS_IS_PLAIN_WINDOWS and $FIX_PERLMONKS_823214 ) {
3547
3548							# Tested with UTF-16LE, NOT tested with UTF-16BE (it should be the same story)
3549	0	0				0	$m = ":raw:perlio:$m:crlf" if $enc =~ /^utf-?16/i;
3550							}
3551
3552	104					970	binmode $outh, $m;
3553	104	50				9346	print( $_debugh "Encoding string used for output: $m\n" ) if $_debug;
3554							}
3555
3556	104					214	my $escape_char = $self->{escape_char};
3557	104					205	my $quote_char = $self->{quote_char};
3558
3559	104					164	my %opts;
3560	104					218	$opts{binary} = 1;
3561	104					198	$opts{eol} = "\n";
3562
3563	104	50				346	$opts{sep_char} = $self->{sep_char} if defined( $self->{sep_char} );
3564	104	100				247	$opts{sep_char} = $self->{out_sep_char} if defined( $self->{out_sep_char} );
3565
3566	104	50				298	$opts{quote_char} = $self->{quote_char} if defined( $self->{quote_char} );
3567							$opts{quote_char} = $self->{out_quote_char}
3568	104	50				237	if defined( $self->{out_quote_char} );
3569
3570							$opts{escape_char} = $self->{escape_char}
3571	104	100				291	if defined( $self->{escape_char} );
3572							$opts{escape_char} = $self->{out_escape_char}
3573	104	100				237	if defined( $self->{out_escape_char} );
3574
3575	104					205	$opts{always_quote} = $self->{_is_always_quoted};
3576							$opts{always_quote} = $self->{out_always_quote}
3577	104	100				249	if defined( $self->{out_always_quote} );
3578
3579	104					744	my $csvout = Text::CSV->new( {%opts} );
3580	104	50				16140	if ( !defined($csvout) ) {
3581	0					0	$self->_print_error("error creating output Text::CSV object");
3582	0					0	return;
3583							}
3584
3585							my $write_filter_hr =
3586	104					407	_get_def( $self->{out_filter}, $self->{write_filter_hr} );
3587
3588	104	100	66			725	if (
			100
			100
3589							(
3590							$self->{has_headers}
3591							and (
3592							!(
3593							defined( $self->{out_has_headers} )
3594							and ( !$self->{out_has_headers} )
3595							)
3596							)
3597							)
3598							or $self->{out_has_headers}
3599							)
3600							{
3601	102					220	my $ar = [];
3602	102	100				255	if ( $self->{has_headers} ) {
3603	98					201	$ar = $self->{_headers};
3604							}
3605							else {
3606	4					4	my $nf = $self->{_named_fields};
3607	4					5	$ar->[ $nf->{$_} ] = $_ for ( keys %{$nf} );
	4					16
3608							}
3609
3610	102	100				255	if ( exists $self->{_out_headers} ) {
3611	3					8	my $h = $self->{_out_headers};
3612	3					5	for ( keys %{ $self->{_named_fields} } ) {
	3					16
3613	12	100				31	if ( exists $h->{$_} ) {
3614	5					17	$ar->[ $self->{_named_fields}->{$_} ] = $h->{$_};
3615							}
3616							}
3617							}
3618
3619	102					370	$self->_apply_write_fields($ar);
3620
3621	102					1531	$csvout->print( $outh, $ar );
3622	102					1083	$rows_written++;
3623							}
3624
3625	104					177	my $do_status_reset = 0;
3626
3627							#
3628							# FIXME!!!
3629							#
3630							# Instead of this duplication of code, provide AutoCSV with a "create iterator
3631							# sub" feature to iterate over all records, whatever is going on behind the
3632							# scene (in-memory or read input).
3633							#
3634							# Such an iterator would also benefit to module users.
3635							#
3636
3637	104	100				275	if ( exists $self->{out_orderby} ) {
3638	1	50				3	return unless $self->_status_forward('S4');
3639							}
3640
3641	104	100				281	if ( $self->{_status} == 4 ) {
3642
3643							#
3644							# The content is available in-memory: we write from what we have in-memory
3645							# then...
3646							#
3647
3648	25					88	my @keys = $self->get_keys();
3649	25					64	my @ordered_keys = @keys;
3650	25	100				74	if ( exists $self->{'out_orderby'} ) {
3651	1					2	my @orderby = @{ $self->{'out_orderby'} };
	1					3
3652							@ordered_keys = sort {
3653	1					5	for my $f (@orderby) {
	7					15
3654	8					17	my $cmp =
3655							$self->get_cell( $a, $f ) cmp $self->get_cell( $b, $f );
3656	8	100				22	return $cmp if $cmp;
3657							}
3658	0					0	return 0;
3659							} @keys;
3660							}
3661
3662							# for my $k ($self->get_keys()) {
3663	25					63	for my $k (@ordered_keys) {
3664	92					246	my $hr = $self->get_row_hr($k);
3665	92	50				274	if ( defined($write_filter_hr) ) {
3666	0	0				0	next unless $write_filter_hr->($hr);
3667							}
3668	92					151	my $ar = [ @{ $self->get_row_ar($k) } ];
	92					207
3669
3670	92					291	$self->_execute_write_update_before($ar);
3671	92					271	$self->_apply_write_fields($ar);
3672
3673	92					662	$csvout->print( $outh, $ar );
3674	92					1032	$rows_written++;
3675							}
3676
3677							}
3678							else {
3679
3680							#
3681							# No in-memory content available: we read and write in parallel.
3682							#
3683
3684	79					244	$self->_register_pass("write()");
3685
3686	79					227	$self->_open_read();
3687	79					145	my $ar;
3688							my $row_hr;
3689	79					254	while ( $self->_read_one_record_from_input( \$ar, \$row_hr ) ) {
3690	301	100				543	if ( defined($write_filter_hr) ) {
3691							next
3692	46	100				91	unless $write_filter_hr->( $row_hr, \%stats,
3693							$self->get_recnum() );
3694							}
3695	275					470	$ar = [ @{$ar} ];
	275					642
3696
3697	275					798	$self->_execute_write_update_before($ar);
3698	272					580	$self->_apply_write_fields($ar);
3699
3700	272					1441	$csvout->print( $outh, $ar );
3701	272					2199	$rows_written++;
3702							}
3703	72					223	$self->_close_read();
3704
3705	72					124	$do_status_reset = 1;
3706							}
3707
3708	97					281	$self->_close_outh();
3709
3710	97	50				234	if ($verbose) {
3711	0					0	$self->_print(
3712							"-- $out_file writing end: $rows_written row(s) written\n");
3713	0					0	for my $k ( sort keys %stats ) {
3714	0					0	$self->_printf( " %7d %s\n", $stats{$k}, $k );
3715							}
3716							}
3717
3718	97	100				216	if ($do_status_reset) {
3719	72	50				206	return unless $self->_status_reset();
3720							}
3721	97					1026	return $self;
3722							}
3723
3724							#
3725							# * * **********************************************************************
3726							# * * **********************************************************************
3727							# * OBJ ************************************************************************
3728							# * * **********************************************************************
3729							# * * **********************************************************************
3730							#
3731
3732							#
3733							# The subs below assume Text::AutoCSV can be in status S4 = all in memory.
3734							#
3735
3736							sub get_keys {
3737	32			32	1	2171	my $self = shift;
3738	32					238	validate_pos(@_);
3739
3740	32	50				122	return unless $self->_status_forward('S4');
3741
3742	32					63	my $last_key = @{ $self->{_flat} } - 1;
	32					86
3743	32					100	my @r = ( 0 .. $last_key );
3744
3745	32					101	return @r;
3746							}
3747
3748							sub get_row_ar {
3749	364			364	1	1100	my $self = shift;
3750	364					2759	validate_pos( @_, { type => SCALAR } );
3751	364					1032	my ($key) = @_;
3752
3753	364	50				802	return unless $self->_status_forward('S4');
3754
3755	364	50				806	unless ( defined($key) ) {
3756	0					0	$self->_print_error("get_row_ar(): \$key is not defined!");
3757	0					0	return;
3758							}
3759
3760							$self->_print_error("unknown row '$key'")
3761	364	100				846	unless defined( $self->{_flat}->[$key] );
3762	364					885	return $self->{_flat}->[$key];
3763							}
3764
3765							sub get_row_hr {
3766	269			269	1	461	my $self = shift;
3767	269					2443	validate_pos( @_, { type => SCALAR } );
3768	269					886	my ($key) = @_;
3769
3770	269					696	my $ar = $self->get_row_ar($key);
3771	269	100				568	return unless defined($ar);
3772
3773	267					606	return $self->_ar_to_hr($ar);
3774							}
3775
3776							#
3777							# Could be made much more efficient (directly read $self->{_flat} instead of
3778							# calling get_row_hr that itself calls get_row_ar).
3779							# I leave it as is because get_hr_all is not good practice (it is not scalable),
3780							# it was primarily done to ease test plan.
3781							#
3782							# By the way I may make it one day not available by default, requesting caller
3783							# to tune some variable (like { $Text::AutoCSV::i_am_the_test_plan = 1 }) to
3784							# expose it.
3785							#
3786							sub get_hr_all {
3787	108			108	1	2130	my $self = shift;
3788	108					720	validate_pos(@_);
3789
3790	108					226	my @resp;
3791	108					337	$self->reset_next_record_hr();
3792	108					272	while ( my $hr = $self->get_next_record_hr() ) {
3793	414					897	push @resp, $hr;
3794							}
3795	105					558	return @resp;
3796							}
3797
3798							sub get_recnum {
3799	190			190	1	313	my $self = shift;
3800	190					923	validate_pos(@_);
3801
3802	190	50				557	return -1 unless $self->{_read_in_progress};
3803	190					395	return _get_def( $self->{_row_read}, -1 );
3804							}
3805
3806							sub _check_for_search {
3807	906			906		1978	my ( $self, $field ) = @_;
3808	906	50				1984	return unless $self->_status_forward('S4');
3809
3810	905	100				3246	return 1 if exists $self->{_named_fields}->{$field};
3811							$self->_print_error( "search: unknown field '$field'",
3812	6					24	0, ERR_UNKNOWN_FIELD, $self->{_named_fields} );
3813							}
3814
3815							sub get_cell {
3816	20			20	1	1353	my $self = shift;
3817	20					184	validate_pos( @_, { type => SCALAR }, { type => SCALAR } );
3818	20					60	my ( $key, $field ) = @_;
3819
3820	20	50				39	return unless $self->_check_for_search($field);
3821	19					39	my $row = $self->get_row_hr($key);
3822	19	100				41	return $row unless defined($row);
3823	18					51	return $row->{$field};
3824							}
3825
3826							sub get_values {
3827	9			9	1	2812	my $self = shift;
3828	9					102	validate_pos(
3829							@_,
3830							{ type => SCALAR },
3831							{ type => UNDEF \| CODEREF, optional => 1 }
3832							);
3833	9					30	my ( $field, $filter_subref ) = @_;
3834
3835	9	50				24	return unless $self->_check_for_search($field);
3836
3837	9					15	my @values;
3838	9					25	$self->reset_next_record_hr();
3839	9					32	while ( my $hr = $self->get_next_record_hr() ) {
3840	53	100				91	if ( defined($filter_subref) ) {
3841	23					38	local $_ = $hr->{$field};
3842	23	100				41	next unless $filter_subref->();
3843							}
3844	42					150	push @values, $hr->{$field};
3845							}
3846	9					41	return @values;
3847							}
3848
3849							sub _get_hash_and_projector {
3850	561			561		1243	my ( $self, $field, $arg_opts ) = @_;
3851
3852	561					1015	my $_debug = $self->{_debug};
3853	561					933	my $_debugh = $self->{_debugh};
3854
3855	561					813	my %opts;
3856	561	50				1192	%opts = %{$arg_opts} if defined($arg_opts);
	561					1576
3857
3858							my $opt_case =
3859	561					2285	_get_def( $opts{'case'}, $self->{search_case}, $DEF_SEARCH_CASE );
3860							my $opt_trim =
3861	561					2076	_get_def( $opts{'trim'}, $self->{search_trim}, $DEF_SEARCH_TRIM );
3862							my $opt_ignore_empty =
3863							_get_def( $opts{'ignore_empty'}, $self->{search_ignore_empty},
3864	561					1940	$DEF_SEARCH_IGNORE_EMPTY );
3865							my $opt_ignacc =
3866							_get_def( $opts{'ignore_accents'}, $self->{search_ignore_accents},
3867	561					2106	$DEF_SEARCH_IGNORE_ACCENTS );
3868
3869	561					1819	my $opts_stringified =
3870							$opt_case . $opt_trim . $opt_ignore_empty . $opt_ignacc;
3871	561					1297	my $hash_name = "_h${field}_${opts_stringified}";
3872	561					1172	my $projector_name = "_p${field}_${opts_stringified}";
3873
3874	561	100	66			3116	if ( exists $self->{$hash_name} and exists $self->{$projector_name} ) {
		50	33
3875	489	50				9752	print( $_debugh
3876							"Search by key '$field': using existing hash and projector ("
3877							. "$hash_name, $projector_name)\n" )
3878							if $_debug;
3879	489					1971	return ( $hash_name, $projector_name );
3880							}
3881							elsif ( exists $self->{$hash_name} or exists $self->{$projector_name} ) {
3882	0					0	confess "Man, check your $PKG module code now!";
3883							}
3884
3885	72	50				198	print( $_debugh "Search by key '$field': building hash\n" ) if $_debug;
3886
3887							#
3888							# Projectors
3889							#
3890							# The projector contains subs to derivate the search key from the field value.
3891							# At the moment it is used to manage with case / without case searches and with
3892							# trim / without trim searches (meaning, ignoring spaces at beginning and end of
3893							# fields)
3894							#
3895							# Why naming it a projector?
3896							# Because if you run it twice on a value, the second run should produce the
3897							# same result, meaning: p(p(x)) = p(x) whatever x
3898							#
3899
3900	72					134	my @projectors;
3901
3902							# Add case removal in the projector function list
3903	1479			1479		4103	push @projectors, sub { return lc(shift); }
3904	72	100				401	unless $opt_case;
3905
3906							# Add trim in the projector function list
3907	72	100				215	if ($opt_trim) {
3908							push @projectors, sub {
3909	1479			1479		7212	my $v = shift;
3910	1479					5181	$v =~ s/^\s+\|\s+$//g;
3911	1479					3736	return $v;
3912	57					221	};
3913							}
3914
3915							# Add remove_accents in the projector function list
3916	1886			1886		4272	push @projectors, sub { return remove_accents(shift); }
3917	72	100				373	if $opt_ignacc;
3918
3919							my $projector = sub {
3920	1906			1906		3888	my $v = _get_def( $_[0], '' );
3921	1906					4499	$v = $_->($v) foreach (@projectors);
3922	1906					4218	return $v;
3923	72					295	};
3924
3925							#
3926							# Filter
3927							#
3928							# As opposed to projectors above (where a search key is transformed), the idea
3929							# now is to ignore certain keys when doing a search.
3930							# At the moment, used to manage searches with / without empty values.
3931							#
3932							# That is to say: shall we use empty value as a regular value to search on, as
3933							# in
3934							# my @results = $self->search('FIELDNAME', '');
3935							# ?
3936							#
3937							# Right now we don't use an array-based construct, that'd allow to chain filters
3938							# with one another (as we now have only one filter to deal with), later, we may
3939							# use an array of filters, as done with projectors...
3940							#
3941
3942	72					140	my $filter;
3943	72	100				241	if ($opt_ignore_empty) {
3944	1268			1268		2906	$filter = sub { return $_[0] ne ''; }
3945	61					195	}
3946							else {
3947	77			77		234	$filter = sub { return 1; }
3948	11					65	}
3949
3950	72					162	my %h;
3951							my $k;
3952	72					408	$self->reset_next_record_hr();
3953	72					274	while ( my $hr = $self->get_next_record_hr( \$k ) ) {
3954	1345					2484	my $kv = $hr->{$field};
3955	1345					2412	my $p = $projector->($kv);
3956	1345	100				2403	unless ( $filter->($p) ) {
3957	76	50				195	print( $_debugh "Ignoring key value '$p' in hash build\n" )
3958							if $_debug;
3959	76					388	next;
3960							}
3961	1269					1869	push @{ $h{$p} }, $k;
	1269					6288
3962							}
3963	72					507	for ( keys %h ) {
3964	1178					1582	@{ $h{$_} } = sort { $a <=> $b } @{ $h{$_} };
	1178					2478
	103					315
	1178					2405
3965							}
3966
3967	72					417	$self->{_hash_build_count}++;
3968	72					869	$self->{$hash_name} = {%h};
3969	72					310	$self->{$projector_name} = $projector;
3970	72					674	return ( $hash_name, $projector_name );
3971							}
3972
3973							sub _get_hash_build_count {
3974	6			6		3379	my $self = shift;
3975
3976	6					29	return _get_def( $self->{_hash_build_count}, 0 );
3977							}
3978
3979							sub search {
3980	563			563	1	22942	my $self = shift;
3981	563					7217	validate_pos(
3982							@_,
3983							{ type => SCALAR },
3984							{ type => UNDEF \| SCALAR },
3985							{ type => UNDEF \| HASHREF, optional => 1 }
3986							);
3987	563					2358	my ( $field, $value, $param_opts ) = @_;
3988
3989	563					1354	my $croak_if_error = $self->{croak_if_error};
3990
3991							#
3992							# FIXME?
3993							# A bit overkill to check options each time search is called...
3994							# To be thought about.
3995							#
3996
3997	563					895	my @tmp;
3998	563	100				1455	@tmp = %{$param_opts} if $param_opts;
	421					1260
3999	563					25596	my %opts = validate( @tmp, $SEARCH_VALIDATE_OPTIONS );
4000
4001	562	50				2988	return unless $self->_check_for_search($field);
4002
4003							# $self->_print_error("undef value in search call") if !defined($value);
4004	561	50				1289	$value = '' unless defined($value);
4005
4006	561					1695	my ( $hash_name, $projector_name ) =
4007							$self->_get_hash_and_projector( $field, \%opts );
4008
4009	561					1918	my $ret = $self->{$hash_name}->{ $self->{$projector_name}->($value) };
4010
4011	561	100				2654	return $ret if defined($ret);
4012	185					621	return [];
4013							}
4014
4015							sub search_1hr {
4016	22			22	1	15913	my $self = shift;
4017	22					534	validate_pos(
4018							@_,
4019							{ type => SCALAR },
4020							{ type => UNDEF \| SCALAR },
4021							{ type => UNDEF \| HASHREF, optional => 1 }
4022							);
4023	22					148	my ( $field, $value, $arg_opts ) = @_;
4024
4025	22					109	my $r = $self->search( $field, $value, $arg_opts );
4026
4027	22	100				110	return unless defined( $r->[0] );
4028
4029	20					87	my $opts = _get_def( $arg_opts, {} );
4030							my $opt_ignore_ambiguous = _get_def(
4031							$opts->{'ignore_ambiguous'},
4032	20					124	$self->{'search_ignore_ambiguous'},
4033							$DEF_SEARCH_IGNORE_AMBIGUOUS
4034							);
4035
4036	20	100	100			65	return if @{$r} >= 2 and ( !$opt_ignore_ambiguous );
	20					155
4037	14					86	return $self->get_row_hr( $r->[0] );
4038							}
4039
4040							sub vlookup {
4041	317			317	1	35958	my $self = shift;
4042	317					4212	validate_pos(
4043							@_,
4044							{ type => SCALAR },
4045							{ type => UNDEF \| SCALAR },
4046							{ type => SCALAR },
4047							{ type => UNDEF \| HASHREF, optional => 1 }
4048							);
4049	317					1251	my ( $searched_field, $value, $target_field, $arg_opts ) = @_;
4050
4051	317					739	my $r = $self->search( $searched_field, $value, $arg_opts );
4052	315	50				661	return unless $self->_check_for_search($target_field);
4053
4054	314					652	my $opts = _get_def( $arg_opts, {} );
4055	314	100	66			720	unless ( defined( $r->[0] ) ) {
4056							return (
4057							exists $opts->{'value_if_not_found'}
4058							? $opts->{'value_if_not_found'}
4059	143	100				505	: $self->{'search_value_if_not_found'}
4060							);
4061							}
4062							elsif ( @{$r} >= 2 ) {
4063							my $opt_ignore_ambiguous = _get_def(
4064							$opts->{'ignore_ambiguous'},
4065							$self->{'search_ignore_ambiguous'},
4066							$DEF_SEARCH_IGNORE_AMBIGUOUS
4067							);
4068							return (
4069							exists $opts->{'value_if_ambiguous'}
4070							? $opts->{'value_if_ambiguous'}
4071							: $self->{'search_value_if_ambiguous'}
4072							) if !$opt_ignore_ambiguous;
4073							}
4074
4075	144	100				304	return $opts->{value_if_found} if exists $opts->{value_if_found};
4076							return $self->{search_value_if_found}
4077	140	50				255	if exists $opts->{search_value_if_found};
4078
4079	140					313	my $hr = $self->get_row_hr( $r->[0] );
4080
4081	140					574	return $hr->{$target_field};
4082							}
4083
4084							1;
4085
4086							__END__
4087
4088							=pod
4089
4090							=encoding UTF-8
4091
4092							=head1 NAME
4093
4094							Text::AutoCSV - helper module to automate the use of Text::CSV
4095
4096							=head1 VERSION
4097
4098							version 1.2.0
4099
4100							=head1 SYNOPSIS
4101
4102							By default, Text::AutoCSV will detect the following characteristics of the
4103							input:
4104
4105							- The separator, among ",", ";" and "\t" (tab)
4106
4107							- The escape character, among '"' (double-quote) and '\\' (backslash)
4108
4109							- Try UTF-8 and if it fails, fall back on latin1
4110
4111							- Read the header line and compute field names
4112
4113							- If asked to (see L</fields_dates_auto>), detect any field that contains a
4114							DateTime value, trying 20 date formats, possibly followed by a time (6 time
4115							formats tested)
4116
4117							- If asked to (see L</fields_dates>), detect DateTime format of certain fields,
4118							croak if no DateTime format can be worked out
4119
4120							- Fields identified as containing a DateTime value (L</fields_dates_auto> or
4121							L</fields_dates>) are stored as DateTime objects by default
4122
4123							Text::AutoCSV also provides methods to search on fields (using cached hash
4124							tables) and it can populate the value of "remote" fields, made from joining 2
4125							CSV files with a key-value search
4126
4127							=head2 General
4128
4129							use Text::AutoCSV;
4130
4131							# Read CSV from std input, write to std output
4132							Text::AutoCSV->new()->write();
4133
4134							# Read CSV data from f.csv, write to std output
4135							Text::AutoCSV->new(in_file => 'f.csv')->write();
4136
4137							# Read CSV data from f.csv, write to g.csv
4138							Text::AutoCSV->new(in_file => 'f.csv', out_file => 'g.csv')->write();
4139
4140							# "Rewrite" CSV file by printing out records as a list (separated by
4141							# line breaks) of field name followed by its value.
4142							my $csv = Text::AutoCSV->new(in_file => 'in.csv', walker_hr => \&walk);
4143							my @cols = $csv->get_fields_names();
4144							$csv->read();
4145							sub walk {
4146							my %rec = %{$_[0]};
4147							for (@cols) {
4148							next if $_ eq '';
4149							print("$_ => ", $rec{$_}, "\n");
4150							}
4151							print("\n");
4152							}
4153
4154							=head2 OBJ-ish functions
4155
4156							# Identify column internal names with more flexibility as the default
4157							# mechanism
4158							my $csv = Text::AutoCSV->new(in_file => 'zips.csv',
4159							fields_hr => {'CITY' => '^(city\|town)', 'ZIPCODE' => '^zip(code)?$'});
4160							# Get zipcode of Claix
4161							my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
4162
4163							my $csv = Text::AutoCSV->new(in_file => 'zips.csv');
4164							# Get zipcode of Claix
4165							my $z = $csv->vlookup('CITY', ' claix ', 'ZIPCODE');
4166							# Same as above, but vlookup is strict for case and spaces around
4167							my $csv = Text::AutoCSV->new(in_file => 'zips.csv', search_case => 1,
4168							search_trim => 0);
4169							my $z = $csv->vlookup('CITY', 'Claix', 'ZIPCODE');
4170
4171							# Create field 'MYCITY' made by taking pers.csv' ZIP column value, looking
4172							# it up in the ZIPCODE columns of zips.csv, taking CITY colmun value and
4173							# naming it 'MYCITY'. Output is written in std output. If a zipcode is
4174							# ambiguous, say it.
4175							Text::AutoCSV->new(in_file => 'pers.csv')
4176							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv',
4177							{ ignore_ambiguous => 0,
4178							value_if_ambiguous => '<duplicate zipcode found!>' }
4179							)->write();
4180
4181							# Note the above can also be written using Text::AutoCSV level attributes:
4182							Text::AutoCSV->new(in_file => 'pers.csv',
4183							search_ignore_ambiguous => 0,
4184							search_value_if_ambiguous => '<duplicate zipcode found!>')
4185							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->write();
4186
4187							# Create 'MYCITY' field as above, then display some statistics
4188							my $nom_compose = 0;
4189							my $zip_not_found = 0;
4190							Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
4191							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
4192							sub walk {
4193							my $hr = shift;
4194							$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
4195							$zip_not_found++ unless defined($hr->{'MYCITY'});
4196							}
4197							print("Number of persons with a multi-part name: $nom_compose\n");
4198							print("Number of persons with unknown zipcode: $zip_not_found\n");
4199
4200							=head2 Updating
4201
4202							Text::AutoCSV->new(in_file => 'names.csv', out_file => 'ucnames.csv',
4203							read_post_update_hr => \&updt)->write();
4204							sub updt { $_[0]->{'LASTNAME'} =~ s/^.*$/\U&/; }
4205
4206							Text::AutoCSV->new(in_file => 'squares.csv',
4207							out_file => 'checkedsquares.csv',
4208							out_filter => \&wf)->write();
4209							sub wf { return ($_[0]->{'X'} ** 2 == $_[0]->{'SQUAREOFX'}); }
4210
4211							# Add a field for the full name, made of the concatenation of the
4212							# first name and the last name.
4213							# Also display stats about empty full names.
4214							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv',
4215							verbose => 1)
4216							->field_add_computed('FULLNAME', \&calc_fn)->write();
4217							sub calc_fn {
4218							my ($field, $hr, $stats) = @_;
4219							my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
4220							$stats->{'empty full name'}++ if $fn eq ' ';
4221							return $fn;
4222							}
4223
4224							# Read a file with a lot of columns and keep only 2 columns in output
4225							Text::AutoCSV->new(in_file => 'big.csv', out_file => 'addr.csv',
4226							out_fields => ['NAME', 'ADDRESS'])
4227							->out_header('ADDRESS', 'Postal Address')
4228							->write();
4229
4230							=head2 Datetime management
4231
4232							# Detect any field containing a DateTime value and convert it to yyyy-mm-dd
4233							# whatever the input format is.
4234							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4235							fields_dates_auto => 1, out_dates_format => '%F')->write();
4236
4237							# Detect any field containing a DateTime value and convert it to a US
4238							# DateTime whatever the input format is.
4239							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4240							fields_dates_auto => 1,
4241							out_dates_format => '%b %d, %Y, %I:%M:%S %p',
4242							out_dates_locale => 'en')->write();
4243
4244							# Find dates of specific formats and convert it into yyyy-mm-dd
4245							Text::AutoCSV->new(in_file => 'raw.csv', out_file => 'cooked.csv',
4246							dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d'],
4247							out_dates_format => '%F')->write();
4248
4249							# Take the dates on columns 'LASTLOGIN' and 'CREATIONDATE' and convert it
4250							# into French dates (day/month/year).
4251							# Text::AutoCSV will croak if LASTLOGIN or CREATIONDATE do not contain a
4252							# DateTime format. By default, Text::AutoCSV will try 20 different formats.
4253							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4254							fields_dates => ['LASTLOGIN', 'CREATIONDATE'],
4255							out_dates_format => '%d/%m/%Y')->write();
4256
4257							# Convert 2 DateTime fields into unix standard epoch
4258							# Write -1 if DateTime is empty.
4259							sub toepoch { return $_->epoch() if $_; -1; }
4260							Text::AutoCSV->new(in_file => 'stats.csv', out_file => 'stats-epoch.csv',
4261							fields_dates => ['ATIME', 'MTIME'])
4262							->in_map('ATIME', \&toepoch)
4263							->in_map('MTIME', \&toepoch)
4264							->write();
4265
4266							# Do the other way round from above: convert 2 fields containing unix
4267							# standard epoch into a string displaying a human-readable DateTime.
4268							my $formatter = DateTime::Format::Strptime->new(
4269							pattern => 'DATE=%F, TIME=%T'
4270							);
4271							sub fromepoch {
4272							return $formatter->format_datetime(DateTime->from_epoch(epoch => $_))
4273							if $_ >= 0;
4274							'';
4275							}
4276							$csv = Text::AutoCSV->new(in_file => 'stats-epoch.csv',
4277							out_file => 'stats2.csv'
4278							)
4279							->in_map('ATIME', \&fromepoch)
4280							->in_map('MTIME', \&fromepoch)
4281							->write();
4282
4283							=head2 Miscellaneous
4284
4285							use Text::AutoCSV 'remove_accents';
4286							# Output 'Francais: etre elementaire, Tcheque: sluzba dum' followed by a new
4287							# line.
4288							print remove_accents("FranÃ§ais: Ãªtre Ã©lÃ©mentaire, TchÃ¨que: sluÅ¾ba dÅ¯m"),
4289							"\n";
4290
4291							=head2 csvcopy.pl
4292
4293							As of version 1.2.0, csvcopy.pl is installed along with Text::AutoCSV.
4294							It is a command-line wrapper to Text::AutoCSV, run
4295
4296							csvcopy.pl --help
4297
4298							to get help.
4299
4300							=for Pod::Coverage ERR_UNKNOWN_FIELD
4301
4302							=head1 NAME
4303
4304							Text::AutoCSV - helper module to automate the use of Text::CSV
4305
4306							=head1 METHODS
4307
4308							=head2 new
4309
4310							my $csv = Text::AutoCSV->new(%attr);
4311
4312							(Class method) Returns a new instance of Text::AutoCSV. The object attributes
4313							are described by the hash C<%attr> (can be empty).
4314
4315							Currently the following attributes are available:
4316
4317							=over 4
4318
4319							=item Preliminary note about L</fields_hr>, L</fields_ar> and L</fields_column_names> attributes
4320
4321							By default, Text::AutoCSV assumes the input has a header and will use the field
4322							values of this first line (the header) to work out the column internal names.
4323							These internal names are used everywhere in Text::AutoCSV to designate columns.
4324
4325							The values are transformed as follows:
4326
4327							- All accents are removed using the exportable function L</remove_accents>.
4328
4329							- Any non-alphanumeric character is removed (except underscore) and all letters
4330							are switched to upper case. The regex to do this is
4331
4332							s/[^[:alnum:]_]//gi; s/^.*$/\U$&/;
4333
4334							Thus a header line of
4335
4336							'Office Number 1,Office_2,Personal Number'
4337
4338							will produce the internal column names
4339
4340							'OFFICENUMBER1' (first column)
4341
4342							'OFFICE_2' (second column)
4343
4344							'PERSONALNUMBER' (third column).
4345
4346							The attribute L</fields_hr>, L</fields_ar> or L</fields_column_names> (only one
4347							of the three is useful at a time) allows to change this behavior.
4348
4349							B<NOTE>
4350
4351							The removal of accents is not a conversion to us-ascii, see L</remove_accents>
4352							for details.
4353
4354							=item Preliminary note about fields reading
4355
4356							Functions that are given a field name (L</get_cell>, L</vlookup>,
4357							L</field_add_copy>, ...) raise an error if the field requested does not exist.
4358
4359							B<SO WILL THE HASHREFS GIVEN BY Text::AutoCSV:> when a function returns a
4360							hashref (L</search_1hr>, L</get_row_hr>, ...), the hash is locked with the
4361							C<lock_keys> function of C<Hash::Util>. Any attempt to read a non-existing key
4362							from the hash causes a croak. This feature is de-activated if you specified
4363							C<croak_if_error =E<gt> 0> when creating Text::AutoCSV object.
4364
4365							=item in_file
4366
4367							The name of the file to read CSV data from.
4368
4369							If not specified or empty, read standard input.
4370
4371							Example:
4372
4373							my $csv = Text::AutoCSV->new(in_file => 'in.csv');
4374
4375							=item inh
4376
4377							File handle to read CSV data from.
4378							Normally you don't want to specify this attribute.
4379
4380							C<inh> is useful if you don't like the way Text::AutoCSV opens the input file
4381							for you.
4382
4383							Example:
4384
4385							open my $inh, "producecsv.sh\|";
4386							my $csv = Text::AutoCSV->new(inh => $inh);
4387
4388							=item encoding
4389
4390							Comma-separated list of encodings to try to read input.
4391
4392							Note that finding the correct encoding of any given input is overkill. This
4393							script just tries encodings one after the other, and selects the first one that
4394							does not trigger a warning during reading of input. If all produce warnings,
4395							select the first one.
4396
4397							The encoding chosen is used in output, unless attribute L</out_encoding> is
4398							specified.
4399
4400							Value by default: 'UTF-8,latin1'
4401
4402							B<IMPORTANT>
4403
4404							If one tries something like C<encoding =E<gt> 'latin1,UTF-8'>, it'll almost
4405							never detect UTF-8 because latin1 rarely triggers warnings during reading. It
4406							tends to be also true with encodings like UTF-16 that can remain happy with
4407							various inputs (sometimes resulting in Western languages turned into Chinese
4408							text).
4409
4410							Ultimately this attribute should be used with a unique value. The result when
4411							using more than one value can produce weird results and should be considered
4412							B<experimental>.
4413
4414							Example:
4415
4416							my $csv = Text::AutoCSV->new(in_file => 'w.csv', encoding => 'UTF-16');
4417
4418							=item via
4419
4420							Adds a C<via> to the file opening instruction performed by Text::AutoCSV. You
4421							don't want to use it under normal circumstances.
4422
4423							The value should start with a ':' character (Text::AutoCSV won't add one for
4424							you).
4425
4426							Value by default: none
4427
4428							Example:
4429
4430							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4431							via => ':raw:perlio:UTF-32:crlf');
4432
4433							=item dont_mess_with_encoding
4434
4435							If true, just ignore completely encoding and don't try to alter I/O operations
4436							with encoding considerations (using C<binmode> instruction). Note that if inh
4437							attribute is specified, then Text::AutoCSV will consider the caller manages
4438							encoding for himself and dont_mess_with_encoding will be automatically set, too.
4439
4440							B<IMPORTANT>
4441
4442							This attribute does not mean perl will totally ignore encoding and would
4443							consider character strings as bytes for example. The meaning of
4444							L</dont_mess_with_encoding> is that Text::AutoCSV itself will totally ignore
4445							encoding matters, and leave it entirely to Perl' default.
4446
4447							Value by default:
4448
4449							0 if inh attribute is not set
4450							1 if inh attribute is set
4451
4452							Example:
4453
4454							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4455							dont_mess_with_encoding => 1);
4456
4457							=item sep_char
4458
4459							Specify the CSV separator character. Turns off separator auto-detection. This
4460							attribute is passed as is to C<Text::CSV-E<gt>new()>.
4461
4462							Example:
4463
4464							my $csv = Text::AutoCSV->new(in_file => 'in.csv', sep_char => ';');
4465
4466							=item quote_char
4467
4468							Specify the field quote character. This attribute is passed as is to
4469							C<Text::CSV-E<gt>new()>.
4470
4471							Value by default: double quote ('"')
4472
4473							Example:
4474
4475							my $csv = Text::AutoCSV->new(in_file => 'in.csv', quote_char => '\'');
4476
4477							=item escape_char
4478
4479							Specify the escape character. Turns off escape character auto-detection. This
4480							attribute is passed as is to C<Text::CSV-E<gt>new()>.
4481
4482							Value by default: backslash ('\\')
4483
4484							Example:
4485
4486							my $csv = Text::AutoCSV->new(in_file => 'in.csv', escape_char => '"');
4487
4488							=item in_csvobj
4489
4490							Text::CSV object to use.
4491							Normally you don't want to specify this attribute.
4492
4493							By default, Text::AutoCSV will manage creating such an object and will work hard
4494							to detect the parameters it requires.
4495
4496							Defining C<in_csvobj> attribute turns off separator character and escape
4497							character auto-detection.
4498
4499							Using this attribute workarounds Text::AutoCSV philosophy a bit, but you may
4500							need it in case Text::AutoCSV behavior is not suitable for Text::CSV creation.
4501
4502							Example:
4503
4504							my $tcsv = Text::CSV->new();
4505							my $acsv = Text::AutoCSV->new(in_file => 'in.csv', in_csvobj => $tcsv);
4506
4507							=item has_headers
4508
4509							If true, Text::AutoCSV assumes the input has a header line.
4510
4511							Value by default: 1
4512
4513							Example:
4514
4515							my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0);
4516
4517							=item fields_hr
4518
4519							(Only if input has a header line) Hash ref that contains column internal names
4520							along with a regular expression to find it in the header line.
4521							For example if you have:
4522
4523							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4524							fields_hr => {'PHONE OFFICE' => '^office phone nu',
4525							'PHONE PERSONAL' => '^personal phone nu'});
4526
4527							And the header line is
4528
4529							'Personal Phone Number,Office Phone Number'
4530
4531							the column name 'PHONE OFFICE' will designate the second column and the column
4532							name 'PHONE PERSONAL' will designate the first column.
4533
4534							You can choose column names like 'Phone Office' and 'Phone Personal' as well.
4535
4536							The regex search is case insensitive.
4537
4538							=item fields_ar
4539
4540							(Only if input has a header line) Array ref that contains column internal names.
4541							The array is used to create a hash ref of the same kind as L</fields_hr>, by
4542							wrapping the column name in a regex. The names are surrounded by a leading '^'
4543							and a trailing '$', meaning, the name must match the entire field name.
4544
4545							For example
4546
4547							fields_ar => ['OFFICENUMBER', 'PERSONALNUMBER']
4548
4549							is strictly equivalent to
4550
4551							fields_hr => {'OFFICENUMBER' => '^officenumber$',
4552							'PERSONALNUMBER' = '^personalnumber$'}
4553
4554							The regex search is case insensitive.
4555
4556							C<fields_ar> is useful if the internal names are identical to the file column
4557							names. It avoids repeating the names over and over as would happen if using
4558							L</fields_hr> attribute.
4559
4560							I<NOTE>
4561
4562							You might wonder why using fields_ar as opposed to Text::AutoCSV default'
4563							mechanism. There are two reasons for that:
4564
4565							1- Text::AutoCSV removes spaces from column names, and some people may want
4566							another behavior. A header name of 'Phone Number' will get an internal column
4567							name of 'PHONENUMBER' (default behavior, if none of fields_hr, fields_ar and
4568							fields_column_names attributes is specified), and one may prefer 'PHONE NUMBER'
4569							or 'phone number' or whatsoever.
4570
4571							2- By specifying a list of columns using either of fields_hr or fields_ar, you
4572							not only map column names as found in the header line to internal column names:
4573							you also I<request> these columns to be available. If one of the requested
4574							columns cannot be found, Text::AutoCSV will croak (default) or print an error
4575							and return an undef object (if created with C<croak_if_error =E<gt> 0>).
4576
4577							=item fields_column_names
4578
4579							Array ref of column internal names, in the order of columns in file. This
4580							attribute works like the C<column_names> attribute of Text::CSV. It'll just
4581							assign names to columns one by one, regardless of what the header line contains.
4582							It'll work also if the file has no header line.
4583
4584							Example:
4585
4586							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4587							fields_column_names => ['My COL1', '', 'My COL3']);
4588
4589							=item out_file
4590
4591							Output file when executing the L</write> method.
4592
4593							If not specified or empty, write to standard output.
4594
4595							Example:
4596
4597							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv');
4598
4599							=item outh
4600
4601							File handle to write CSV data to when executing the L</write> method.
4602							Normally you don't want to specify this attribute.
4603
4604							C<outh> is useful if you don't like the way Text::AutoCSV opens the output file
4605							for you.
4606
4607							Example:
4608
4609							my $outh = open "myin.csv', ">>";
4610							my $csv = Text::AutoCSV->new(in_file => 'in.csv', has_headers => 0,
4611							outh => $outh);
4612
4613							=item out_encoding
4614
4615							Enforce the encoding of output.
4616
4617							Value by default: input encoding
4618
4619							Example:
4620
4621							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4622							out_encoding => 'UTF-16');
4623
4624							=item out_utf8_bom
4625
4626							Enforce BOM (Byte-Order-Mark) on output, when it is UTF8. If output encoding is
4627							not UTF-8, this attribute is ignored.
4628
4629							B<NOTE>
4630
4631							UTF-8 needs no BOM (there is no Byte-Order in UTF-8), and in practice,
4632							UTF8-encoded files rarely have a BOM.
4633
4634							Using this attribute is not recommended. It is provided for the sake of
4635							completeness, and also to produce Unicode files Microsoft EXCEL will be happy to
4636							read.
4637
4638							At first sight it would seem more logical to make EXCEL happy with something
4639							like this:
4640
4641							out_encoding => 'UTF-16'
4642
4643							But... While EXCEL will identify UTF-16 and read it as such, it will not take
4644							into account the BOM found at the beginning. In the end the first cell will have
4645							2 useless characters prepended. The only solution the author knows to workaround
4646							this issue if to use UTF-8 as output encoding, and enforce a BOM. That is, use:
4647
4648							..., out_encoding => 'UTF-8', out_utf8_bom => 1, ...
4649
4650							=item out_sep_char
4651
4652							Enforce the output CSV separator character.
4653
4654							Value by default: input separator
4655
4656							Example:
4657
4658							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4659							out_sep_char => ',');
4660
4661							=item out_quote_char
4662
4663							Enforce the output CSV quote character.
4664
4665							Value by default: input quote character
4666
4667							Example:
4668
4669							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4670							out_quote_char => '"');
4671
4672							=item out_escape_char
4673
4674							Enforce the output CSV escape character.
4675
4676							Value by default: input escape character
4677
4678							Example:
4679
4680							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4681							out_escape_char_char => '\\');
4682
4683							=item out_always_quote
4684
4685							If true, quote all fields of output (set always_quote of Text::CSV).
4686
4687							If false, don't quote all fields of output (don't set C<always_quote> of
4688							Text::CSV).
4689
4690							Value by default: same as what is found in input
4691
4692							While reading input, Text::AutoCSV works out whether or not all fields were
4693							quoted. If yes, then the output Text::CSV object has the always_quote attribute
4694							set, if no, then the output Text::CSV object does not have this attribute set.
4695
4696							Example:
4697
4698							my $csv = Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
4699							out_always_quote => 1);
4700
4701							=item out_has_headers
4702
4703							If true, when writing output, write a header line on first line.
4704
4705							If false, when writing output, don't write a header line on first line.
4706
4707							Value by default: same as has_headers attribute
4708
4709							Example 1
4710
4711							Read standard input and write to standard output, removing the header line.
4712
4713							Text::AutoCSV->new(out_has_headers => 0)->write();
4714
4715							Example 2
4716
4717							Read standard input and write to standard output, adding a header line.
4718
4719							Text::AutoCSV->new(fields_column_names => ['MYCOL1', 'MYCOL2'],
4720							out_has_headers => 1)->write();
4721
4722							=item no_undef
4723
4724							If true, non-existent column values are set to an empty string instead of undef.
4725							It is also done on extra fields that happen to have an undef value (for example
4726							when the target of a linked field is not found).
4727
4728							Note this attribute does not work on callback functions output set with
4729							L</in_map>: for example empty DateTime values (on fields identified as
4730							containing a date/time, see C<dates_*> attributes below) are set to C<undef>,
4731							even while C<no_undef> is set. Indeed setting it to an empty string while
4732							non-empty values would contain a Datetime object would not be clean. An empty
4733							value in a placeholder containing an object must be undef.
4734
4735							Since version 1.1.5 of Text::AutoCSV, C<no_undef> is examined when sending
4736							parameter ($_) to L</in_map> callback: an undef value is now passed as is (as
4737							undef), unless C<no_undef> is set. If C<no_undef> is set, and field value is
4738							undef, then $_ is set to the empty string ('') when calling callback defined by
4739							L</in_map>. This new behavior was put in place to be consistent with what is
4740							being done with DateTime values.
4741
4742							Value by default: 0
4743
4744							Example:
4745
4746							my $csv = Text::AutoCSV->new(in_file => 'in.csv', no_undef => 1);
4747
4748							=item read_post_update_hr
4749
4750							To be set to a ref sub. Each time a record is read from input, call
4751							C<read_post_update_hr> to update the hash ref of the record. The sub is called
4752							with 2 arguments: the hash ref to the record value and the hash ref to stats.
4753
4754							The stats allow to count events and are printed in the end of reading in case
4755							Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>).
4756
4757							For example, the C<read_post_update_hr> below will turn column 'CITY' values in
4758							upper case and count occurences of empty cities in stat display:
4759
4760							Text::AutoCSV->new(in_file => 'addresses.csv',
4761							read_post_update_hr => \&updt, verbose => 1)
4762							->write();
4763							sub updt {
4764							my ($hr, $stats) = @_;
4765							$hr->{'CITY'} =~ s/^.*$/\U$&/;
4766							$stats->{'empty city encountered'}++ if $hr->{'CITY'} eq '';
4767							}
4768
4769							B<IMPORTANT>
4770
4771							You cannot create a field this way. To create a field, you have to use the
4772							member functions L</field_add_link>, L</field_add_copy> or
4773							L</field_add_computed>.
4774
4775							B<NOTE>
4776
4777							If you wish to manage some updates at field level, consider registering update
4778							functions with L</in_map> and L</out_map> member functions. These functions
4779							register callbacks that work at field level and with $_ variable (thus the
4780							callback function invoked is AutoCSV-agnostic).
4781
4782							L</in_map> updates a field after read, L</out_map> updates the field content
4783							before writing it.
4784
4785							=item walker_hr
4786
4787							To set to a sub ref that'll be executed each time a record is read from input.
4788							It is executed after L</read_post_update_hr>. The sub is called with 2
4789							arguments: the hash ref to the record value and the hash ref to stats.
4790
4791							Note L</read_post_update_hr> is meant for updating record fields just after
4792							reading, whereas L</walker_hr> is read-only.
4793
4794							The stats allow to count events and are printed in the end of reading in case
4795							Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>). If the
4796							L</verbose> attribute is not set, the stats are not displayed, however you can
4797							get stats by calling the get_stats function.
4798
4799							The example below will count in the stats the number of records where the 'CITY'
4800							field is empty. Thanks to C<verbose =E<gt> 1> attribute, at the end of reading
4801							the stats are displayed.
4802
4803							my $csv = Text::AutoCSV->new(in_file => 'addresses.csv',
4804							walker_hr => \&walk1, verbose => 1)->read();
4805							sub walk1 {
4806							my ($hr, $stats) = @_;
4807							$stats->{'empty city'}++ if $hr->{'CITY'} eq '';
4808							}
4809
4810							=item walker_ar
4811
4812							To set to a sub ref that'll be executed each time a record is read from input.
4813							It is executed after L</read_post_update_hr>. The sub is called with 2
4814							arguments: the array ref to the record value and the hash ref to stats.
4815
4816							Note L</read_post_update_hr> is meant for updating record fields just after
4817							reading, whereas C<walker_hr> is read-only.
4818
4819							The stats allow to count events and are printed in the end of reading in case
4820							Text::AutoCSV is called in verbose mode (C<verbose =E<gt> 1>). If the
4821							L</verbose> attribute is not set, the stats are lost.
4822
4823							The array ref contains values in their natural order in the CSV. To be used with
4824							the column names, you have to use L</get_fields_names> member function.
4825
4826							The example below will count in the stats the number of records where the 'CITY'
4827							field is empty. Thanks to C<verbose =E<gt> 1> attribute, at the end of reading
4828							the stats are displayed. It produces the exact same result as the example in
4829							walker_hr attribute, but it uses walker_ar.
4830
4831							use List::MoreUtils qw(first_index);
4832							my $csv = Text::AutoCSV->new(in_file => 'addresses.csv',
4833							walker_ar => \&walk2, verbose => 1);
4834							my @cols = $csv->get_fields_names();
4835							my $idxCITY = first_index { /^city$/i } @cols;
4836							die "No city field!??" if $idxCITY < 0;
4837							$csv->read();
4838							sub walk2 {
4839							my ($ar, $stats) = @_;
4840							$stats->{'empty city'}++ if $ar->[$idxCITY] eq '';
4841							}
4842
4843							=item write_filter_hr
4844
4845							Alias of L</out_filter>.
4846
4847							=item out_filter
4848
4849							To be set to a ref sub. Before writing a record to output, C<out_filter> is
4850							called and the record gets writen only if C<out_filter> return value is true.
4851							The sub is called with 1 argument: the hash ref to the record value.
4852
4853							For example, if you want to output only records where the 'CITY' column value is
4854							Grenoble:
4855
4856							Text::AutoCSV->new(in_file => 'addresses.csv', out_file => 'grenoble.csv',
4857							out_filter => \&filt)->write();
4858							sub filt {
4859							my $hr = shift;
4860							return 1 if $hr->{'CITY'} =~ /^grenoble$/i;
4861							return 0;
4862							}
4863
4864							=item write_fields
4865
4866							Alias of L</out_fields>.
4867
4868							=item out_fields
4869
4870							Set to an array ref. List fields to write to output.
4871
4872							Fields are written in their order in the array ref, the first CSV column being
4873							the first element in the array, and so on. Fields not listed in B<out_fields>
4874							are not written in output.
4875
4876							You can use empty field names to have empty columns in output.
4877
4878							Value by default: none, meaning, all fields are output in their natural order.
4879							What is natural order? It is the input order for fields that were read from
4880							input, and the order in which they got created for created fields.
4881
4882							Example:
4883
4884							Text::AutoCSV->new(in_file => 'allinfos.csv',
4885							out_file => 'only-addresses.csv', out_fields => [ 'NAME', 'ADDRESS' ]
4886							)->write();
4887
4888							=item out_orderby
4889
4890							Array reference to a list of fields to sort output with.
4891
4892							At the moment this feature is a bit of a hack (no option to make sort descending
4893							or ascending, numeric or text, and it is not part of test plan).
4894
4895							Example:
4896
4897							Text::AutoCSV->new(in_file => 'names.csv', out_file => 'sortednames.csv',
4898							out_orderby => [ 'LASTNAME', 'FIRSTNAME']);
4899
4900							=item search_case
4901
4902							If true, searches are case sensitive by default. Searches are done by the member
4903							functions L</search>, L</search_1hr>, L</vlookup>, and linked fields
4904							(L</field_add_link>).
4905
4906							The search functions can also be called with the option L</case>, that takes
4907							precedence over the object-level C<search_case> attribute value. See L</vlookup>
4908							help.
4909
4910							Value by default: 0 (by default searches are case insensitive)
4911
4912							Example:
4913
4914							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_case => 1);
4915
4916							=item search_trim
4917
4918							If true, searches ignore the presence of leading or trailing spaces in values.
4919
4920							The search functions can also be called with the option L</trim>, that takes
4921							precedence over the object-level C<search_trim> attribute value. See L</vlookup>
4922							help.
4923
4924							Value by default: 1 (by default searches ignore leading and trailing spaces)
4925
4926							Example:
4927
4928							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_trim => 0);
4929
4930							=item search_ignore_empty
4931
4932							If true, empty fields are not included in the search indexes.
4933
4934							The search functions can also be called with the option L</ignore_empty>, that
4935							takes precedence over the object-level C<search_ignore_empty> attribute value.
4936							See L</vlookup> help.
4937
4938							Value by default: 1 (by default, search of the value '' will find nothing)
4939
4940							Example:
4941
4942							my $csv = Text::AutoCSV->new(in_file => 'in.csv', search_ignore_empty => 0);
4943
4944							=item search_ignore_accents
4945
4946							If true, accents are ignored by search indexes.
4947
4948							The search functions can also be called with the option L</ignore_accents>, that
4949							takes precedence over the object-level C<search_ignore_accents> attribute value.
4950							See L</vlookup> help.
4951
4952							Value by default: 1 (by default, accents are ignored by search functions)
4953
4954							Example:
4955
4956							my $csv = Text::AutoCSV->new(in_file => 'in.csv',
4957							search_ignore_accents => 0);
4958
4959							=item search_value_if_not_found
4960
4961							When a search is done with a unique value to return (field_add_link member
4962							function behavior or return value of vlookup), default value of option
4963							L</value_if_not_found>. See L</vlookup>.
4964
4965							=item search_value_if_found
4966
4967							When a search is done with a unique value to return (field_add_link member
4968							function behavior or return value of vlookup), default value of option
4969							L</value_if_found>. See L</vlookup>.
4970
4971							B<IMPORTANT>
4972
4973							This attribute is extremly unusual. Once you've provided it, all vlookups and
4974							the target field value of fields created with field_add_link will all be
4975							populated with the value provided with this option.
4976
4977							Don't use it unless you know what you are doing.
4978
4979							=item search_ignore_ambiguous
4980
4981							When a search is done with a unique value to return (field_add_link member
4982							function behavior or return value of search_1hr and vlookup), default value of
4983							option L</ignore_ambiguous>. See L</vlookup>.
4984
4985							=item search_value_if_ambiguous
4986
4987							When a search is done with a unique value to return (field_add_link member
4988							function behavior or return value of vlookup), default value of option
4989							L</value_if_ambiguous>. See L</vlookup>.
4990
4991							=item fields_dates
4992
4993							Array ref of field names that contain a date.
4994
4995							Once the formats of these fields is known (auto-detection by default), each of
4996							these fields will get a specific L</in_map> sub that converts the text in a
4997							DateTime object and a L</out_map> sub that converts back from DateTime to text.
4998
4999							B<NOTE>
5000
5001							The L</out_map> given to a DateTime field is "defensive code": normally,
5002							L</in_map> converts text into a DateTime object and L</out_map> does the
5003							opposite, it takes a DateTime object and converts it to text. If ever
5004							L</out_map> encounters a value that is not a DateTime object, it'll just
5005							stringify it (evaluation in a string context), without calling its DateTime
5006							formatter.
5007
5008							If the format cannot be detected for a given field, output an error message and
5009							as always when an error occurs, croak (unless L</croak_if_error> got set to 0).
5010
5011							Value by default: none
5012
5013							Example:
5014
5015							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5016							fields_dates => ['LASTLOGIN', 'CREATIONDATE']);
5017
5018							=item fields_dates_auto
5019
5020							Boolean value. If set to 1, will detect dates formats on all fields. Fields in
5021							which a DateTime format got detected are then managed as if they had been being
5022							listed in L</fields_dates> attribute: they get an appropriate L</in_map> sub and
5023							a L</out_map> sub to convert to and from DateTime (see L</fields_dates>
5024							attribute above).
5025
5026							C<fields_dates_auto> looks for DateTime on all fields, but it expects nothing:
5027							it won't raise an error if no field is found that contains DateTime.
5028
5029							Value by default: 0
5030
5031							Example:
5032
5033							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5034							fields_dates_auto => 1);
5035
5036							=item fields_dates_auto_optimize
5037
5038							Relevant only if L</fields_dates_auto> is set.
5039
5040							Normally when L</fields_dates_auto> is set, the input is read completely to make
5041							sure auto-detection produces a reliable result. If C<fields_dates_auto_optimize>
5042							is set, this reading pass will stop as soon as there is no ambiguity left. That
5043							is, for every fields in input, the date format (or the fact that no date format
5044							is suitable) is known.
5045
5046							Using this option is a bit risky because it could trigger a date format
5047							detection that later in the input, would turn out to be wrong. Should that be
5048							the case, strange errors will occur, that are not easy to understand. Use it at
5049							your own risk.
5050
5051							Value by default: 0
5052
5053							Example:
5054
5055							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5056							fields_dates_auto => 1, fields_dates_auto_optimize => 1);
5057
5058							=item dates_formats_to_try
5059
5060							Array ref of string formats.
5061
5062							Text::AutoCSV has a default built-in list of 20 date formats to try and 6 time
5063							formats (also it'll combine any date format with any time format).
5064
5065							C<dates_formats_to_try> will replace Text::AutoCSV default format-list will the
5066							one you specify, in case the default would not produce the results you expect.
5067
5068							The formats are written in Strptime format.
5069
5070							Value by default (see below about the role of the pseudo-format ''):
5071
5072							[ '',
5073							'%Y-%m-%d',
5074							'%Y.%m.%d',
5075							'%Y/%m/%d',
5076							'%m.%d.%y',
5077							'%m-%d-%Y',
5078							'%m.%d.%Y',
5079							'%m/%d/%Y',
5080							'%d-%m-%Y',
5081							'%d.%m.%Y',
5082							'%d/%m/%Y',
5083							'%m-%d-%y',
5084							'%m/%d/%y',
5085							'%d-%m-%y',
5086							'%d.%m.%y',
5087							'%d/%m/%y',
5088							'%Y%m%d%H%M%S',
5089							'%b %d, %Y',
5090							'%b %d %Y',
5091							'%b %d %T %Z %Y',
5092							'%d %b %Y',
5093							'%d %b, %Y' ]
5094
5095							B<IMPORTANT>
5096
5097							The empty format (empty string) has a special meaning: when specified,
5098							Text::AutoCSV will be able to identify fields that contain only a time (not
5099							preceeded by a date).
5100
5101							B<Note>
5102
5103							Format identification is over only when there is no more ambiguity. So the usual
5104							pitfall of US versus French dates (month-day versus day-month) gets resolved
5105							only when a date is encountered that disambiguates it (a date of 13th of the
5106							month or later).
5107
5108							Example with a weird format that uses underscores to separate elements, using
5109							either US (month, day, year), French (day, month, year), or international (year,
5110							month, day) order:
5111
5112							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5113							dates_formats_to_try => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
5114
5115							=item dates_formats_to_try_supp
5116
5117							Same as L</dates_formats_to_try> but instead of replacing the default list of
5118							formats used during detection, it is added to this default list.
5119
5120							You want to use this attribute if you need a specific DateTime format while
5121							continuing to benefit from the default list.
5122
5123							B<IMPORTANT>
5124
5125							Text::AutoCSV will identify a given Datetime format only when there is no
5126							ambiguity, meaning, one unique Datetime format matches (all other failed).
5127							Adding a format that already exists in the default list will prevent the format
5128							from being identified, as it'll always be ambiguous. See
5129							L</dates_formats_to_try> for the default list of formats.
5130
5131							Example:
5132
5133							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5134							dates_formats_to_try_supp => ['%d_%m_%Y', '%m_%d_%Y', '%Y_%m_%d']);
5135
5136							=item dates_ignore_trailing_chars
5137
5138							If set to 1, DateTime auto-detection will ignore trailing text that may follow
5139							detected DateTime-like text.
5140
5141							Value by default: 1 (do ignore trailing chars)
5142
5143							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5144							dates_ignore_trailing_chars => 0);
5145
5146							=item dates_search_time
5147
5148							If set to 1, look for times when detecting DateTime format. That is, whenever a
5149							date format candidate is found, a longer candidate that also contains a time
5150							(after the date) is tested.
5151
5152							Value by default: 1 (do look for times when auto-detecting DateTime formats)
5153
5154							Example:
5155
5156							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5157							dates_search_time => 0);
5158
5159							=item dates_locales
5160
5161							Comma-separated string of locales to test when detecting DateTime formats.
5162							Ultimately, Text::AutoCSV will try all combinations of date formats, times and
5163							locales.
5164
5165							Value by default: none (use perl default locale)
5166
5167							Example:
5168
5169							my $csv = Text::AutoCSV->new(in_file => 'logins.csv',
5170							dates_locales => 'fr,de,en');
5171
5172							=item dates_zeros_ok
5173
5174							Boolean. If true, a date made only of 0s is regarded as being empty.
5175
5176							For example if C<dates_zeros_ok> is False, then a date like 0000-00-00 will be
5177							always incorrect (as the day and month are out of bounds), therefore a format
5178							like '%Y-%m-%d' will never match for the field.
5179
5180							Conversely if C<dates_zeros_ok> is true, then a date like 0000-00-00 will be
5181							processed as if being the empty string, thus the detection of format will work
5182							and when parsed, this "full of zeros" dates will be processed the same way as
5183							the empty string (= value will be undef).
5184
5185							B<IMPORTANT>
5186
5187							"0s dates" are evaluated to undef when parsed, thus when converted back to text
5188							(out_map), they are set to an empty string, not to the original value.
5189
5190							Value by default: 1
5191
5192							Example:
5193
5194							my $csv = Text::AutoCSV->new(in_file => 'in.csv', dates_zeros_ok => 0);
5195
5196							=item out_dates_format
5197
5198							Enforce the format of dates in output, for all fields that contain a DateTime
5199							value.
5200
5201							The format is written in Strptime format.
5202
5203							Value by default: none (by default, use format detected on input)
5204
5205							Example:
5206
5207							# Detect any field containing a DateTime value and convert it to
5208							# yyyy-mm-dd whatever the input format is.
5209							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
5210							fields_dates_auto => 1, out_dates_format => '%F')->write();
5211
5212							=item out_dates_locale
5213
5214							Taken into account only if L</out_dates_format> is used.
5215
5216							Sets the locale to apply on L</out_dates_format>.
5217
5218							Value by default: none (by default, use the locale detected on input)
5219
5220							Example:
5221
5222							# Detect any field containing a DateTime value and convert it to a US
5223							# DateTime whatever the input format is.
5224							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv',
5225							fields_dates_auto => 1, out_dates_format => '%b %d, %Y, %I:%M:%S %p',
5226							out_dates_locale => 'en')->write();
5227
5228							=item croak_if_error
5229
5230							If true, stops the program execution in case of error.
5231
5232							B<IMPORTANT>
5233
5234							Value by default: 1
5235
5236							If set to zero (C<croak_if_error =E<gt> 0>), errors are displayed as warnings.
5237							This printing can then be affected by setting the L</quiet> attribute.
5238
5239							=item verbose
5240
5241							If true, get Text::AutoCSV to be a bit talkative instead of speaking only when
5242							warnings and errors occur. Verbose output is printed to STDERR by default, this
5243							can be tuned with the L</infoh> attribute.
5244
5245							Value by default: 0
5246
5247							Example:
5248
5249							my $csv = Text::AutoCSV->new(in_file => 'in.csv', verbose => 1);
5250
5251							=item infoh
5252
5253							File handle to display program's verbose output. Has effect mainly with
5254							attribute C<verbose =E<gt> 1>.
5255
5256							Note B<infoh> is used to display extra information in case of error (if a field
5257							does not exist, Text::AutoCSV will display the list of existing fields). If you
5258							don't want such output, you can set C<infoh> to undef.
5259
5260							Value by default: \*STDERR
5261
5262							Example:
5263
5264							open my $infoh, ">", "log.txt";
5265							my $csv = Text::AutoCSV->new(in_file => 'in.csv', infoh => $infoh);
5266
5267							=item quiet
5268
5269							If true, don't display warnings and errors, unless croaking.
5270
5271							If L</croak_if_error> attribute is set (as per default), still, Text::AutoCSV
5272							will produce output (on STDERR) when croaking miserably.
5273
5274							When using C<croak_if_error =E<gt> 0>, errors are processed as warnings and if
5275							L</quiet> is set (in addition to L</croak_if_error> being set to 0), there'll be
5276							no output. Note this way of working is not recommended, as things can go wrong
5277							without any notice to the caller.
5278
5279							Example:
5280
5281							my $csv = Text::AutoCSV->new(in_file => 'in.csv', quiet => 1);
5282
5283							=item one_pass
5284
5285							If true, Text::AutoCSV will perform one reading of the input. If other readings
5286							are triggered, it'll raise an error and no reading will be done. Should that be
5287							the case (you ask Text::AutoCSV to do something that'll trigger more than one
5288							reading of input), Text::AutoCSV will croak as is always the case if an error
5289							occurs.
5290
5291							Normally Text::AutoCSV will do multiple reads of input to work out certain
5292							characteristics of the CSV: guess of encoding and guess of escape character.
5293
5294							Also if member functions like L</field_add_link>, L</field_add_copy>,
5295							L</field_add_computed>, L</read> or L</write> are called after input has already
5296							been read, it'll trigger further reads as needed.
5297
5298							If one wishes a unique read of the input to occur, one_pass attribute is to be
5299							set.
5300
5301							When true, encoding will be assumed to be the first one in the provided list
5302							(L</encoding> attribute), if no encoding attribute is provided, it'll be the
5303							first one in the default list, to date, it is UTF-8.
5304
5305							When true, and if attribute L</escape_char> is not set, escape_char will be
5306							assumed to be '\\' (backslash).
5307
5308							By default, one_pass is set if inh attribute is set (caller provides the input
5309							file handle of input) or if input file is stdin (in_file attribute not set or
5310							set to an empty string).
5311
5312							Value by default:
5313
5314							0 if inh attribute is not set and in_file attribute is set to a non empty
5315							string
5316							1 if inh attribute is set or in_file is not set or set to an empty string
5317
5318							Example:
5319
5320							my $csv = Text::AutoCSV->new(in_file => 'in.csv', one_pass => 1);
5321
5322							=back
5323
5324							=head2 read
5325
5326							$csv->read();
5327
5328							Read input entirely.
5329
5330							B<Return value>
5331
5332							Returns the object itself in case of success.
5333							Returns undef if error.
5334
5335							Callback functions (when defined) are invoked, in the following order:
5336
5337							L</read_post_update_hr>, intended to do updates on fields values after each
5338							record read
5339
5340							L</walker_ar>, called after each record read, with an array ref of fields values
5341
5342							L</walker_hr>, called after each record read, with a hash ref of fields values
5343
5344							Example:
5345
5346							# Do nothing - just check CSV can be read successfully
5347							Text::AutoCSV->new(in_file => 'in.csv')->read();
5348
5349							=head2 read_all_in_mem
5350
5351							$csv->read_all_in_mem();
5352
5353							Created in version 1.1.5. Before, existed only as _read_all_in_mem, meaning, was
5354							private.
5355
5356							Read input entirely, as with L</read> function, but enforcing content to be kept
5357							in-memory.
5358
5359							Having the content kept in-memory is implied by search functions (L</vlookup>
5360							for example). With C<read_all_in_mem> you can enforce this behavior without
5361							doing a fake search.
5362
5363							=head2 reset_next_record_hr
5364
5365							$csv->reset_next_record_hr();
5366
5367							Reset the internal status to start from the beginning with
5368							L</get_next_record_hr>. Used in conjunction with L</get_next_record_hr>.
5369
5370							=head2 get_next_record_hr
5371
5372							my $hr = $csv->get_next_record_hr(\$opt_key);
5373
5374							Get the next record content as a hash ref. C<$hr> is undef when the end of
5375							records has been reached.
5376
5377							When specified, C<$opt_key> is set to the current (returned) record key.
5378
5379							B<NOTE>
5380
5381							You do not need to call L</reset_next_record_hr> once before using
5382							C<get_next_record_hr>.
5383
5384							Therefore L</reset_next_record_hr> is useful only if you wish to restart from
5385							the beginning before you've reached the end of the records.
5386
5387							B<NOTE bis>
5388
5389							L</walker_hr> allows to execute some code each time a record is read, and it
5390							better fits with Text::AutoCSV philosophy. Using a loop with
5391							C<get_next_record_hr> is primarily meant for Text::AutoCSV internal usage. Also
5392							when using this mechanism, you get very close to original Text::CSV logic, that
5393							makes Text::AutoCSV less useful.
5394
5395							B<Return value>
5396
5397							A hashref of the record, or undef once there's no more record to return.
5398
5399							Example:
5400
5401							while (my $hr = $csv->get_next_record_hr()) {
5402							say Dumper($hr);
5403							}
5404
5405							=head2 write
5406
5407							$csv->write();
5408
5409							Write input into output.
5410
5411							B<Return value>
5412
5413							Returns the object itself in case of success.
5414							Returns undef if error.
5415
5416							- If the content is not in-memory at the time write() is called:
5417
5418							Each record is read (with call of L</read_post_update_hr>, L</walker_ar> and
5419							L</walker_hr>) and then written. The read-and-write is done in sequence, each
5420							record is written to output before the next record is read from input.
5421
5422							- If the content is in-memory at the time write() is called:
5423
5424							No L</read> operation is performed, instead, records are directly written to
5425							output.
5426
5427							If defined, L</out_filter> is called for each record. If the return value of
5428							L</out_filter> is false, the record is not written.
5429
5430							Example:
5431
5432							# Copy input to output.
5433							# As CSV is parsed in-between, this copy also checks a number of
5434							# characteristics about the input, as opposed to a plain file copy
5435							# operation.
5436							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')->write();
5437
5438							=head2 out_header
5439
5440							$csv->out_header($field, $header);
5441
5442							Set the header text of C<$field> to C<$header>.
5443
5444							By default, the input header value is rewritten as is to output. C<out_header>
5445							allows you to change it.
5446
5447							B<Return value>
5448
5449							Returns the object itself.
5450
5451							Example:
5452
5453							Text::AutoCSV->new(in_file => 'in.csv', out_file => 'out.csv')
5454							->out_header('LOGIN', 'Login')
5455							->out_header('FULLNAME', 'Full Name')
5456							->write();
5457
5458							=head2 print_id
5459
5460							$csv->print_id();
5461
5462							Print out a description of input. Write to \*STDERR by default or to L</infoh>
5463							attribute if set.
5464
5465							The description consists in a list of a few characteristics (CSV separator and
5466							the like) followed by the list of columns with the details of each.
5467
5468							Example of output:
5469
5470							If you go to the C<utils> directory of this module and execute the following:
5471
5472							./csvcopy.pl -i f1.csv -l "1:,A->B,f2.csv" --id
5473
5474							You will get this output:
5475
5476							-- f1.csv:
5477							sep_char: ,
5478							escape_char: \
5479							in_encoding: UTF-8
5480							is_always_quoted: no
5481
5482							# FIELD HEADER EXT DATA DATETIME FORMAT DATETIME LOCALE
5483							- ----- ------ -------- --------------- ---------------
5484							0 TIMESTAMP timestamp %Y%m%d%H%M%S
5485							1 A a
5486							2 B b
5487							3 C c
5488							4 D d %d/%m/%Y
5489							5 1:SITE 1:SITE link: f2.csv, chain: A->B->* (SITE)
5490							6 1:B 1:B link: f2.csv, chain: A->B->* (B)
5491
5492							=head2 field_add_computed
5493
5494							$csv->field_add_computed($new_field, $subref);
5495
5496							C<$new_field> is the name of the created field.
5497
5498							C<$subref> is a reference to a sub that'll calculate the new field value.
5499
5500							B<Return value>
5501
5502							Returns the object itself in case of success.
5503							Returns undef if error.
5504
5505							Add a field calculated from other fields values. The subref runs like this:
5506
5507							sub func {
5508							# $new_field is the name of the field (allows to use one subref for
5509							# more than one field calculation).
5510							# $hr is a hash ref of fields values.
5511							# $stats is a hash ref that gets printed (if Text::AutoCSV is
5512							# created with verbose => 1) in the end.
5513							my ($new_field, $hr, $stats) = @_;
5514
5515							my $field_value;
5516							# ... compute $field_value
5517
5518							return $field_value;
5519							}
5520
5521							Example:
5522
5523							# Add a field for the full name, made of the concatenation of the first
5524							# name and the last name.
5525							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'dirwithfn.csv',
5526							verbose => 1)
5527							->field_add_computed('FULLNAME', \&calc_fn)->write();
5528							sub calc_fn {
5529							my ($new_field, $hr, $stats) = @_;
5530							die "Man, you are in serious trouble!" unless $new_field eq 'FULLNAME';
5531							my $fn = $hr->{'FIRSTNAME'} . ' ' . uc($hr->{'LASTNAME'});
5532							$stats->{'empty full name'}++ if $fn eq ' ';
5533							return $fn;
5534							}
5535
5536							=head2 field_add_copy
5537
5538							$csv->field_add_copy($new_field, $src_field, $opt_subref);
5539
5540							C<$new_field> if the name of the new field.
5541
5542							C<$src_field> is the name of the field being copied.
5543
5544							C<$opt_subref> is optional. It is a reference to a sub that takes one string
5545							(the value of C<$src_field>) and returns a string (the value assigned to
5546							C<$new_field>).
5547
5548							B<Return value>
5549
5550							Returns the object itself in case of success.
5551							Returns undef if error.
5552
5553							C<field_add_copy> is a special case of L</field_add_computed>. The advantage of
5554							C<field_add_copy> is that it relies on a sub that is Text::AutoCSV "unaware",
5555							just taking one string as input and returning another string as output.
5556
5557							B<IMPORTANT>
5558
5559							The current field value is passed to C<field_add_copy> in $_.
5560
5561							A call to
5562
5563							$csv->field_add_copy($new_field, $src_field, $subref);
5564
5565							is equivalent to
5566
5567							$csv->field_add_computed($new_field, \&subref2);
5568							sub subref2 {
5569							my (undef, $hr) = @_;
5570							local $_ = $hr->{$src_field};
5571							return $subref->();
5572							}
5573
5574							Example of a field copy + pass copied field in upper case and surround content
5575							with <<>>:
5576
5577							my $csv = Text::AutoCSV->new(in_file => 'dirpeople.csv',
5578							out_file => 'd2.csv');
5579							$csv->field_add_copy('UCLAST', 'LASTNAME', \&myfunc);
5580							$csv->write();
5581							sub myfunc { s/^.*$/<<\U$&>>/; $_; }
5582
5583							Note that the calls can be chained as most member functions return the object
5584							itself upon success. The example above is equivalent to:
5585
5586							Text::AutoCSV->new(in_file => 'dirpeople.csv', out_file => 'd2.csv')
5587							->field_add_copy('UCLAST', 'LASTNAME', \&myfunc)
5588							->write();
5589							sub myfunc { s/^.*$/<<\U$&>>/; $_; }
5590
5591							=head2 field_add_link
5592
5593							$csv->field_add_link($new_field, $chain, $linked_file, \%opts);
5594
5595							C<$new_field> is the name of the new field.
5596
5597							C<$chain> is the CHAIN of the link, that is: 'LOCAL->REMOTE->PICK' where:
5598
5599							C<LOCAL> is the field name to read the value from.
5600
5601							C<REMOTE> is the linked field to find the value in. This field belongs to
5602							$linked_file.
5603
5604							C<PICK> is the field from which to read the value of, in the record found by the
5605							search. This field belongs to $linked_file.
5606
5607							If $new_field is undef, the new field name is the name of the third field of
5608							$chain (PICK).
5609
5610							C<$linked_file> is the name of the linked file, that gets read in a
5611							Text::AutoCSV object created on-the-fly to do the search on. C<$linked_file> can
5612							also be a Text::AutoCSV object that you created yourself, allowing for more
5613							flexibility. Example:
5614
5615							my $lcsv = Text::AutoCSV->new(in_file => 'logins.csv', case => 1);
5616							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', $lcsv);
5617
5618							C<\%opts> is a hash ref of optional attributes. The same values can be provided
5619							as with vlookup.
5620
5621							=over 4
5622
5623							=item trim
5624
5625							If set to 1, searches will ignore leading and trailing spaces. That is, a
5626							C<LOCAL> value of ' x ' will match with a C<REMOTE> value of 'x'.
5627
5628							If option is not present, use L</search_value_if_not_found> attribute of object
5629							(default value: 1).
5630
5631							Example:
5632
5633							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5634							{ trim => 0 });
5635
5636							=item case
5637
5638							If set to 1, searches will take the case into account. That is, a C<LOCAL> value
5639							of 'X' will B<not> match with a C<REMOTE> value of 'x'.
5640
5641							If option is not present, use L</search_case> attribute of object (default
5642							value: 0).
5643
5644							Example:
5645
5646							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5647							{ case => 1 });
5648
5649							=item ignore_empty
5650
5651							If set to 1, empty values won't match. That is, a C<LOCAL> value of '' will not
5652							match with a C<REMOTE> value of ''.
5653
5654							If option is not present, use L</search_ignore_empty> attribute of object
5655							(default value: 1).
5656
5657							Example:
5658
5659							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5660							{ ignore_empty => 0 });
5661
5662							=item value_if_not_found
5663
5664							If the searched value is not found, the value of the field is undef, that
5665							produces an empty string at write time. Instead, you can specify the value.
5666
5667							If option is not present, use L</search_value_if_not_found> attribute of object
5668							(default value: undef).
5669
5670							Example:
5671
5672							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5673							{ value_if_not_found => '<not found!>' });
5674
5675							=item value_if_found
5676
5677							If the searched value is found, you can specify the value to return.
5678
5679							If option is not present, use L</search_value_if_found> attribute of object
5680							(default value: none).
5681
5682							B<NOTE>
5683
5684							Although the C<PICK> field is ignored when using this option, you must specify
5685							it any way.
5686
5687							Example:
5688
5689							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5690							{ value_if_not_found => '0', value_if_found => '1' });
5691
5692							=item value_if_ambiguous
5693
5694							If the searched value is found in more than one record, the value of the field
5695							is undef, that produces an empty string at write time. Instead, you can specify
5696							the value.
5697
5698							If option is not present, use L</search_value_if_ambiguous> attribute of object
5699							(default value: undef).
5700
5701							Example:
5702
5703							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5704							{ value_if_ambiguous => '<ambiguous!>' });
5705
5706							=item ignore_ambiguous
5707
5708							Boolean value. If ignore_ambiguous is true and the searched value is found in
5709							more than one record, then, silently fall back on returning the value of the
5710							first record. Obviously if C<ignore_ambiguous> is true, then the value of
5711							L</value_if_ambiguous> is ignored.
5712
5713							If option is not present, use L</search_ignore_ambiguous> attribute of object
5714							(default value: 1).
5715
5716							Example:
5717
5718							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5719							{ ignore_ambiguous => 1 });
5720
5721							Example with multiple options:
5722
5723							$csv->field_add_link('NAME', 'ID->LOGIN->DISPLAYNAME', 'logins.csv',
5724							{ value_if_not_found => '?', ignore_ambiguous => 1 });
5725
5726							=back
5727
5728							B<Return value>
5729
5730							Returns the object itself in case of success.
5731							Returns undef if error.
5732
5733							Example of field_add_link usage:
5734
5735							my $nom_compose = 0;
5736							my $zip_not_found = 0;
5737							Text::AutoCSV->new(in_file => 'pers.csv', walker_hr => \&walk)
5738							->field_add_link('MYCITY', 'ZIP->ZIPCODE->CITY', 'zips.csv')->read();
5739							sub walk {
5740							my $hr = shift;
5741							$nom_compose++ if $hr->{'NAME'} =~ m/[- ]/;
5742							$zip_not_found++ unless defined($hr->{'MYCITY'});
5743							}
5744							print("Number of persons with a multi-part name: $nom_compose\n");
5745							print("Number of persons with unknown zipcode: $zip_not_found\n");
5746
5747							=head2 links
5748
5749							$csv->links($prefix, $chain, $linked_file, \%opts);
5750
5751							C<$prefix> is the name to add to joined fields
5752
5753							C<$chain> is the JOINCHAIN of the link, that is: 'LOCAL->REMOTE' where:
5754
5755							C<LOCAL> is the field name to read the value from.
5756
5757							C<REMOTE> is the linked field to find the value in. This field belongs to
5758							$linked_file.
5759
5760							As opposed to L</field_add_link>, there is no C<PICK> part, as all fields of
5761							target are read.
5762
5763							As opposed to Text::AutoCSV habits of croaking whenever a field name is
5764							duplicate, here, the duplicates are resolved by appending _2 to the joined field
5765							name if it already exists. If _2 already exists, too, then _3 is appended
5766							instead, and so on, until a non-duplicate is found. This mechanism is executed
5767							given the difficulty to control all field names when joining CSVs.
5768
5769							C<$linked_file> and C<\%opts> work exactly the same way as for
5770							L</field_add_link>, see L</field_add_link> for help.
5771
5772							B<Return value>
5773
5774							Returns the object itself in case of success.
5775							Returns undef if error.
5776
5777							B<NOTE>
5778
5779							This function used to be called C<join> but got renamed to avoid clash with
5780							perl' builtin C<join>.
5781
5782							Example:
5783
5784							Text::AutoCSV->new(in_file => 'pers.csv', out_file => 'pers_with_city.csv')
5785							->links('Read from zips.csv:', 'ZIP->ZIPCODE', 'zips.csv')->write();
5786
5787							=head2 get_in_encoding
5788
5789							my $enc = $csv->get_in_encoding();
5790
5791							Return the string of input encoding, for example 'latin2' or 'UTF-8', etc.
5792
5793							=head2 get_in_file_disp
5794
5795							my $f = $csv->get_in_file_disp();
5796
5797							Return the printable name of in_file.
5798
5799							=head2 get_sep_char
5800
5801							my $s = $csv->get_sep_char();
5802
5803							Return the string of the input CSV separator character, for example ',' or ';'.
5804
5805							=head2 get_escape_char
5806
5807							my $e = $csv->get_escape_char();
5808
5809							Return the string of the input escape character, for example '"' or '\\'.
5810
5811							=head2 get_is_always_quoted
5812
5813							my $a = $csv->get_is_always_quoted();
5814
5815							Return 1 if all fields of input are always quoted, 0 otherwise.
5816
5817							=head2 get_coldata
5818
5819							my @cd = get_coldata();
5820
5821							Return an array that describes each column, from the first one (column 0) to the
5822							last.
5823
5824							Each element of the array is itself an array ref that contains 5 elements:
5825
5826							0: Name of the field (as accessed in *_hr functions)
5827							1: Content of the field in the header line (if input has a header line)
5828							2: Column content type, shows some meta-data of fields created with
5829							field_add_* functions
5830							3: Datetime format detected, if ever, in the format Strptime
5831							4: Locale of DateTime format detected, if ever
5832							5: Multiline field: '1' if not, 'm' if newlines encountered in the field
5833
5834							=head2 get_pass_count
5835
5836							my $n = $csv->get_pass_count();
5837
5838							Return the number of input readings done. Useful only if you're interested in
5839							Text::AutoCSV internals.
5840
5841							=head2 get_in_mem_record_count
5842
5843							my $m = $csv->get_in_mem_record_count();
5844
5845							Return the number of records currently stored in-memory. Useful only if you're
5846							interested in Text::AutoCSV internals.
5847
5848							=head2 get_max_in_mem_record_count
5849
5850							my $mm = $csv->get_max_in_mem_record_count();
5851
5852							Return the maximum number of records ever stored in-memory. Indeed this number
5853							can decrease: certain functions like field_add* member-functions discard
5854							in-memory content. Useful only if you're interested in Text::AutoCSV internals.
5855
5856							=head2 get_fields_names
5857
5858							my @f = $csv->get_fields_names();
5859
5860							Return an array of the internal names of the columns.
5861
5862							=head2 get_field_name
5863
5864							my $name = $csv->get_field_name($n);
5865
5866							Return the C<$n>-th column name, the first column being number 0.
5867
5868							Example:
5869
5870							# Get the field name of the third column
5871							my $col = $csv->get_field_name(2);
5872
5873							=head2 get_stats
5874
5875							my %stats = $csv->get_stats();
5876
5877							Certain callback functions provide a parameter to record event count:
5878							L</field_add_computed>, L</read_post_update_hr>, L</walker_ar> and
5879							L</walker_hr>. By default, these stats are displayed if Text::AutoCSV got
5880							created with attribute C<verbose =E<gt> 1>. get_stats() returns the statistics
5881							hash of the object.
5882
5883							B<IMPORTANT>
5884
5885							As opposed to most functions that trigger input reading automatically (search
5886							functions and other get_* functions), C<get_stats> just returns you the stats as
5887							it is, regardless of whether some execution already occured.
5888
5889							=head2 get_nb_rows
5890
5891							my $nb_rows = $csv->get_nb_rows();
5892
5893							Gives the number of rows of the input. Does not trigger any reading - just
5894							provides the number of rows as known at the moment of the call. If unknown,
5895							return undef. Typically, the number of rows is known after doing the initial
5896							detection of CSV options (escape character, etc.), or, after doing one complete
5897							reading.
5898
5899							The header line counts for one row.
5900
5901							B<IMPORTANT>
5902
5903							As some fields can contain new lines, this number is not necessarily identical
5904							to the number of lines.
5905
5906							=head2 set_walker_ar
5907
5908							$csv->set_walker_ar($subref);
5909
5910							Normally one wants to define it at object creation time using L</walker_ar>
5911							attribute. C<set_walker_ar> allows to assign the attribute walker_ar after
5912							object creation.
5913
5914							See attribute L</walker_ar> for help about the way C<$subref> should work.
5915
5916							B<Return value>
5917
5918							Returns the object itself in case of success.
5919							Returns undef if error.
5920
5921							Example:
5922
5923							# Calculate the total of the two first columns, the first column being
5924							# money in and the second one being money out.
5925							my ($actif, $passif) = (0, 0);
5926							$csv->set_walker_ar(
5927							sub {
5928							my $ar = $_[0]; $actif += $ar->[0]; $passif += $ar->[1];
5929							}
5930							)
5931							->read();
5932							print("Actif = $actif\n");
5933							print("Passif = $passif\n");
5934
5935							=head2 set_walker_hr
5936
5937							$csv->set_walker_hr($subref);
5938
5939							Normally one wants to define it at object creation time using L</walker_hr>
5940							attribute. C<set_walker_hr> allows to assign the attribute L</walker_hr> after
5941							object creation.
5942
5943							See attribute L</walker_hr> for help about the way C<$subref> should work.
5944
5945							B<Return value>
5946
5947							Returns the object itself in case of success.
5948							Returns undef if error.
5949
5950							Example:
5951
5952							my $csv = Text::AutoCSV->new(in_file => 'directory.csv', verbose => 1);
5953
5954							# ...
5955
5956							$csv->set_walker_hr(
5957							sub {
5958							my ($hr, $stat) = @_;
5959							$stat{'not capital name'}++, return
5960							if $hr->{'NAME'} ne uc($hr->{'NAME'});
5961							$stat{'name is capital letters'}++;
5962							}
5963							)->read();
5964
5965							=head2 set_out_file
5966
5967							$csv->set_out_file($out_file);
5968
5969							Normally one wants to define it at object creation time using L</out_file>
5970							attribute. C<set_out_file> allows to assign the attribute L</out_file> after
5971							object creation. It is set to C<$out_file> value.
5972
5973							B<Return value>
5974
5975							Returns the object itself in case of success.
5976							Returns undef if error.
5977
5978							Example:
5979
5980							$csv->set_out_file('mycopy.csv')->write();
5981
5982							=head2 get_keys
5983
5984							my @allkeys = $csv->get_keys();
5985
5986							Returns an array of all the record keys of input. A record key is a unique
5987							identifier that designates the record.
5988
5989							At the moment it is just an integer being the record number, the first one (that
5990							comes after the header line) being of number 0. For example if $csv input is
5991							made of one header line and 3 records (that is, a 4-line file typically, if no
5992							record contains a line break), $csv->get_keys() returns:
5993
5994							(0, 1, 2)
5995
5996							B<IMPORTANT>
5997
5998							If not yet done, this function causes the input to be read entirely and stored
5999							in-memory.
6000
6001							=head2 get_hr_all
6002
6003							my @allin = $csv->get_hr_all();
6004
6005							Returns an array of all record contents of the input, each record being a hash
6006							ref.
6007
6008							B<IMPORTANT>
6009
6010							If not yet done, this function causes the input to be read entirely and stored
6011							in-memory.
6012
6013							=head2 get_row_ar
6014
6015							my $row_ar = $csv->get_row_ar($record_key);
6016
6017							Returns an array ref of the record designated by C<$record_key>.
6018
6019							Example:
6020
6021							# Get content (as array ref) of last record
6022							my @allkeys = $csv->get_keys();
6023							my $lastk = $allkeys[-1];
6024							my $lastrec_ar = $csv->get_row_ar($lastk);
6025
6026							B<IMPORTANT>
6027
6028							If not yet done, this function causes the input to be read entirely and stored
6029							in-memory.
6030
6031							=head2 get_row_hr
6032
6033							my $row_hr = $csv->get_row_hr($record_key);
6034
6035							Returns a hash ref of the record designated by C<$record_key>.
6036
6037							Example:
6038
6039							# Get content (as hash ref) of first record
6040							my @allkeys = $csv->get_keys();
6041							my $firstk = $allkeys[0];
6042							my $firstrec_hr = $csv->get_row_hr($firstk);
6043
6044							B<IMPORTANT>
6045
6046							If not yet done, this function causes the input to be read entirely and stored
6047							in-memory.
6048
6049							=head2 get_cell
6050
6051							my $val = $csv->get_cell($record_key, $field_name);
6052
6053							Return the value of the cell designated by its record key (C<$record_key>) and
6054							field name (C<$field_name>).
6055
6056							Example:
6057
6058							my @allkeys = $csv->get_keys();
6059							my $midk = $allkeys[int($#allkeys / 2)];
6060							my $midname = $csv->get_cell($midk, 'NAME');
6061
6062							Note the above example is equivalent to:
6063
6064							my @allkeys = $csv->get_keys();
6065							my $midk = $allkeys[int($#allkeys / 2)];
6066							my $midrec_hr = $csv->get_row_hr($midk);
6067							my $midname = $midrec_hr->{'NAME'};
6068
6069							B<IMPORTANT>
6070
6071							If not yet done, this function causes the input to be read entirely and stored
6072							in-memory.
6073
6074							=head2 get_values
6075
6076							my @vals = $csv->get_values($field_name, $opt_filter_subref);
6077
6078							Return an array made of the values of the given field name (C<$field_name>), for
6079							every records, in the order of the records.
6080
6081							C<$opt_filter_subref> is an optional subref. If defined, it is called with every
6082							values in turn (one call per value) and only values for which
6083							C<$opt_filter_subref> returned True are included in the returned array. Call to
6084							C<$opt_filter_subref> is done with $_ to pass the value.
6085
6086							Example:
6087
6088							my @logins = $csv->get_values('LOGIN");
6089
6090							This is equivalent to:
6091
6092							my @allkeys = $csv->get_keys();
6093							my @logins;
6094							push @logins, $csv->get_cell($_, 'LOGIN') for (@allkeys);
6095
6096							Example bis
6097
6098							# @badlogins is the list of logins that contain non alphanumeric
6099							# characters
6100							my @badlogins = Text::AutoCSV->new(in_file => 'logins.csv')
6101							->get_values('LOGIN', sub { m/[^a-z0-9]/ });
6102
6103							This is equivalent to:
6104
6105							# @badlogins is the list of logins that contain non alphanumeric
6106							# characters This method leads to carrying all values of a given field
6107							# across function calls...
6108							my @badlogins = grep { m/[^a-z0-9]/ } (
6109							Text::AutoCSV->new(in_file => 'logins.csv')->get_values('LOGIN')
6110							);
6111
6112							B<IMPORTANT>
6113
6114							If not yet done, this function causes the input to be read entirely and stored
6115							in-memory.
6116
6117							=head2 get_recnum
6118
6119							my $r = $csv->get_recnum();
6120
6121							Returns the current record identifier, if a reading is in progress. If no read
6122							is in progress, return undef.
6123
6124							=head2 in_map
6125
6126							See L</read_update_after> below.
6127
6128							=head2 read_update_after
6129
6130							C<read_update_after> is an alias of C<in_map>.
6131
6132							$csv->in_map($field, $subref);
6133
6134							After reading a record from input, update C<$field> by calling C<$subref>. The
6135							value is put in C<$_>. Then the field value is set to the return value of
6136							C<$subref>.
6137
6138							This feature is originally meant to manage DateTime fields: the input and output
6139							CSVs carry text content, and in-between, the values dealt with are DateTime
6140							objects.
6141
6142							See L</out_map> for an example.
6143
6144							=head2 out_map
6145
6146							See L</write_update_before> below.
6147
6148							=head2 write_update_before
6149
6150							C<write_update_before> is an alias of C<out_map>.
6151
6152							$csv->out_map($field, $subref);
6153
6154							Before writing C<$field> field content into the output file, pass it through
6155							C<out_map>. The value is put in C<$_>. Then the return value of C<$subref> is
6156							written in the output.
6157
6158							Example:
6159
6160							Suppose you have a CSV file with the convention that a number surrounded by
6161							parenthesis is negative. You can register corresponding L</in_map> and
6162							L</out_map> functions. During the processing of data, the field content will be
6163							just a number (positive or negative), while in input and in output, it'll follow
6164							the "negative under parenthesis" convention.
6165
6166							In the below example, we rely on convention above and add a new field converted
6167							from the original one, that follows the same convention.
6168
6169							sub in_updt {
6170							return 0 if !defined($_) or $_ eq '';
6171							my $i;
6172							return -$i if ($i) = $_ =~ m/^$(.*)$$/;
6173							$_;
6174							}
6175							sub out_updt {
6176							return '' unless defined($_);
6177							return '(' . (-$_) . ')' if $_ < 0;
6178							$_;
6179							}
6180							sub convert {
6181							return ;
6182							}
6183							Text::AutoCSV->new(in_file => 'trans-euros.csv',
6184							out_file => 'trans-devises.csv')
6185							->in_map('EUROS', \&in_updt)
6186							->out_map('EUROS', \&out_updt)
6187							->out_map('DEVISE', \&out_updt)
6188							->field_add_copy('DEVISE', 'EUROS',
6189							sub { sprintf("%.2f", $_ * 1.141593); } )
6190							->write();
6191
6192							=head2 search
6193
6194							my $found_ar = $csv->search($field_name, $value, \%opts);
6195
6196							Returns an array ref of all records keys where the field C<$field_name> has the
6197							value C<$value>.
6198
6199							C<\%opts> is an optional hash ref of options for the search. See help of
6200							L</vlookup> about options.
6201
6202							B<IMPORTANT>
6203
6204							An unsuccessful search returns an empty array ref, that is, [ ]. Thus you
6205							B<cannot> check for definedness of C<search> return value to know whether or not
6206							the search found something.
6207
6208							On the other hand, you can always examine the value C<search(...)-E<gt>[0]>, as
6209							search is always an array ref. If the search found nothing, then,
6210							C<search(...)-E<gt>[0]> is not defined.
6211
6212							B<IMPORTANT bis>
6213
6214							If not yet done, this function causes the input to be read entirely and stored
6215							in-memory.
6216
6217							Example:
6218
6219							my $linux_os_keys_ar = $csv->search('OS', 'linux');
6220
6221							=head2 search_1hr
6222
6223							my $found_hr = $csv->search_1hr($field_name, $value, \%opts);
6224
6225							Returns a hash ref of the first record where the field C<$field_name> has the
6226							value C<$value>.
6227
6228							C<\%opts> is an optional hash ref of options for the search. See help of
6229							L</vlookup> about options.
6230
6231							Note the options L</value_if_not_found> and L</value_if_ambiguous> are ignored.
6232							If not found, return undef. If the result is ambiguous (more than one record
6233							found) and ignore_ambiguous is set to a false value, return undef.
6234
6235							The other options are taken into account as for any search:
6236							L</ignore_ambiguous>, L</trim>, L</case>, L</ignore_empty>.
6237
6238							B<IMPORTANT>
6239
6240							As opposed to L</search>, an unsuccessful C<search_1hr> will return C<undef>.
6241
6242							B<IMPORTANT bis>
6243
6244							If not yet done, this function causes the input to be read entirely and stored
6245							in-memory.
6246
6247							Example:
6248
6249							my $hr = $csv->search_1hr('LOGIN', $login);
6250							my $full_name = $hr->{'FIRSTNAME'} . ' ' . $hr->{'LASTNAME'};
6251
6252							=head2 vlookup
6253
6254							my $val = $csv->vlookup($searched_field, $value, $target_field, \%opts);
6255
6256							Find the first record where C<$searched_field> contains C<$value> and out of
6257							this record, returns the value of C<$target_field>.
6258
6259							C<\%opts> is optional. It is a hash of options for C<vlookup>:
6260
6261							=over 4
6262
6263							=item trim
6264
6265							If true, ignore spaces before and after the values to search.
6266
6267							If option is not present, use L</search_trim> attribute of object (default
6268							value: 1).
6269
6270							=item case
6271
6272							If true, do case sensitive searches.
6273
6274							If option is not present, use L</search_case> attribute of object (default
6275							value: 0).
6276
6277							=item ignore_empty
6278
6279							If true, ignore empty values in the search. The consequence is that you won't be
6280							able to find empty values by searching it.
6281
6282							If option is not present, use L</search_ignore_empty> attribute of object
6283							(default value: 1).
6284
6285							=item ignore_accents
6286
6287							If true, ignore accents in searches. For exampe, if C<ignore_accents> is set, a
6288							string like "Ã©lÃ©mentaire" will match "elementaire".
6289
6290							If option is not present, use L</search_ignore_accents> attribute of object
6291							(default value: 1).
6292
6293							B<NOTE>
6294
6295							This option uses the function L</remove_accents> to build its internal hash
6296							tables. See L</remove_accents> help for more details.
6297
6298							=item value_if_not_found
6299
6300							Return value if vlookup finds nothing.
6301
6302							If option is not present, use L</search_value_if_not_found> attribute of object
6303							(default value: undef).
6304
6305							=item value_if_found
6306
6307							Return value if vlookup finds something.
6308
6309							If option is not present, use L</search_value_if_found> attribute of object
6310							(default value: none).
6311
6312							This option is to just check whether a value exists, regardless of the target
6313							value found.
6314
6315							B<NOTE>
6316
6317							Although the B<$target_field> is ignored when using this option, you must
6318							specify it any way.
6319
6320							=item value_if_ambiguous
6321
6322							Return value if vlookup find more than one result. Tune it only if
6323							ignore_ambiguous is unset.
6324
6325							If option is not present, use L</search_value_if_ambiguous> attribute of object
6326							(default value: undef).
6327
6328							=item ignore_ambiguous
6329
6330							If true, then if more than one result is found, silently return the first one.
6331
6332							If option is not present, use L</search_ignore_ambiguous> attribute of object
6333							(default value: 1).
6334
6335							=back
6336
6337							B<IMPORTANT>
6338
6339							If not yet done, this function causes the input to be read entirely and stored
6340							in-memory.
6341
6342							Example:
6343
6344							my $name = $csv->vlookup('LOGIN', $id, 'NAME',
6345							{ value_if_not_found => '<login not found>' });
6346
6347							=head2 remove_accents
6348
6349							my $t = $csv->remove_accents($s);
6350
6351							Take the string C<$s> as argument and return the string without accents. Uses a
6352							Unicode decomposition followed by removal of every characters that have the
6353							Unicode property C<Nonspacing_Mark>.
6354
6355							B<NOTE>
6356
6357							Only accents are removed. It is not a C<whatever-encoding -E<gt> us-ascii>
6358							conversion. For example, the French B<Å“> character (o followed by e) or the
6359							German B<ÃŸ> (eszett) are kept as is.
6360
6361							B<NOTE bis>
6362
6363							Tested with some latin1 and latin2 characters.
6364
6365							B<NOTE ter>
6366
6367							There is no language-level transformation during accents removal. For example
6368							B<JÃ¼rgen> is returned as B<Jurgen>, not B<Juergen>.
6369
6370							This function is not exported by default.
6371
6372							Example:
6373
6374							use Text::AutoCSV qw(remove_accents);
6375							my $s = remove_accents("FranÃ§ais: Ãªtre Ã©lÃ©mentaire, TchÃ¨que: sluÅ¾ba dÅ¯m");
6376							die "This script will never die"
6377							if $s ne 'Francais: etre elementaire, Tcheque: sluzba dum';
6378
6379							=head1 AUTHOR
6380
6381							SÃ©bastien Millet <milletseb@laposte.net>
6382
6383							=head1 COPYRIGHT AND LICENSE
6384
6385							This software is copyright (c) 2016, 2017 by SÃ©bastien Millet.
6386
6387							This is free software; you can redistribute it and/or modify it under
6388							the same terms as the Perl 5 programming language system itself.
6389
6390							=cut