File Coverage

blib/lib/Text/CSV_XS.pm

Criterion	Covered	Total	%
statement	937	950	98.7
branch	740	790	93.6
condition	397	466	84.9
subroutine	90	90	100.0
pod	68	68	100.0
total	2232	2364	94.4

line	stmt	bran	cond	sub	pod	time	code
1							package Text::CSV_XS;
2
3							# Copyright (c) 2007-2025 H.Merijn Brand. All rights reserved.
4							# Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
5							# Copyright (c) 1997 Alan Citterman. All rights reserved.
6							#
7							# This program is free software; you can redistribute it and/or
8							# modify it under the same terms as Perl itself.
9
10							# HISTORY
11							#
12							# 0.24 - H.Merijn Brand <perl5@tux.freedom.nl>
13							# 0.10 - 0.23 Jochen Wiedmann <joe@ispsoft.de>
14							# Based on (the original) Text::CSV by Alan Citterman <alan@mfgrtl.com>
15
16							require 5.006001;
17
18	34			34		4112731	use strict;
	34					107
	34					1587
19	34			34		232	use warnings;
	34					99
	34					2639
20
21							require Exporter;
22	34			34		245	use XSLoader;
	34					61
	34					1019
23	34			34		172	use Carp;
	34					133
	34					3176
24
25	34			34		248	use vars qw( $VERSION @ISA @EXPORT_OK %EXPORT_TAGS );
	34					105
	34					11151
26							$VERSION = "1.61";
27							@ISA = qw( Exporter );
28							XSLoader::load ("Text::CSV_XS", $VERSION);
29
30	4			4	1	13	sub PV { 0 } sub CSV_TYPE_PV { PV }
	12			12	1	268176
31	4			4	1	16	sub IV { 1 } sub CSV_TYPE_IV { IV }
	12			12	1	229950
32	4			4	1	11	sub NV { 2 } sub CSV_TYPE_NV { NV }
	12			12	1	95
33
34	11			11	1	71	sub CSV_FLAGS_IS_QUOTED { 0x0001 }
35	12			12	1	70	sub CSV_FLAGS_IS_BINARY { 0x0002 }
36	4			4	1	26	sub CSV_FLAGS_ERROR_IN_FIELD { 0x0004 }
37	20			20	1	80	sub CSV_FLAGS_IS_MISSING { 0x0010 }
38
39							%EXPORT_TAGS = (
40							CONSTANTS => [qw(
41							CSV_FLAGS_IS_QUOTED
42							CSV_FLAGS_IS_BINARY
43							CSV_FLAGS_ERROR_IN_FIELD
44							CSV_FLAGS_IS_MISSING
45
46							CSV_TYPE_PV
47							CSV_TYPE_IV
48							CSV_TYPE_NV
49							)],
50							);
51							@EXPORT_OK = (qw( csv PV IV NV ), @{$EXPORT_TAGS{'CONSTANTS'}});
52
53							if ($] < 5.008002) {
54	34			34		256	no warnings "redefine";
	34					112
	34					544317
55							*utf8::decode = sub {};
56							}
57
58							# version
59							#
60							# class/object method expecting no arguments and returning the version
61							# number of Text::CSV. there are no side-effects.
62
63							sub version {
64	2			2	1	1012	return $VERSION;
65							} # version
66
67							# new
68							#
69							# class/object method expecting no arguments and returning a reference to
70							# a newly created Text::CSV object.
71
72							my %def_attr = (
73							'eol' => '',
74							'sep_char' => ',',
75							'quote_char' => '"',
76							'escape_char' => '"',
77							'binary' => 0,
78							'decode_utf8' => 1,
79							'auto_diag' => 0,
80							'diag_verbose' => 0,
81							'strict' => 0,
82							'strict_eol' => 0,
83							'blank_is_undef' => 0,
84							'empty_is_undef' => 0,
85							'allow_whitespace' => 0,
86							'allow_loose_quotes' => 0,
87							'allow_loose_escapes' => 0,
88							'allow_unquoted_escape' => 0,
89							'always_quote' => 0,
90							'quote_empty' => 0,
91							'quote_space' => 1,
92							'quote_binary' => 1,
93							'escape_null' => 1,
94							'keep_meta_info' => 0,
95							'verbatim' => 0,
96							'formula' => 0,
97							'skip_empty_rows' => 0,
98							'undef_str' => undef,
99							'comment_str' => undef,
100							'types' => undef,
101							'callbacks' => undef,
102
103							'_EOF' => "",
104							'_RECNO' => 0,
105							'_STATUS' => undef,
106							'_FIELDS' => undef,
107							'_FFLAGS' => undef,
108							'_STRING' => undef,
109							'_ERROR_INPUT' => undef,
110							'_COLUMN_NAMES' => undef,
111							'_BOUND_COLUMNS' => undef,
112							'_AHEAD' => undef,
113							'_FORMULA_CB' => undef,
114							'_EMPTROW_CB' => undef,
115
116							'ENCODING' => undef,
117							);
118							my %attr_alias = (
119							'quote_always' => "always_quote",
120							'verbose_diag' => "diag_verbose",
121							'quote_null' => "escape_null",
122							'escape' => "escape_char",
123							'comment' => "comment_str",
124							);
125							my $last_err = Text::CSV_XS->SetDiag (0);
126							my $ebcdic = ord ("A") == 0xC1; # Faster than $Config{'ebcdic'}
127							my @internal_kh;
128
129							# NOT a method: is also used before bless
130							sub _unhealthy_whitespace {
131	15734			15734		35355	my ($self, $aw) = @_;
132	15734	100				54426	$aw or return 0; # no checks needed without allow_whitespace
133
134	3569					7710	my $quo = $self->{'quote'};
135	3569	100	100			13703	defined $quo && length ($quo) or $quo = $self->{'quote_char'};
136	3569					7341	my $esc = $self->{'escape_char'};
137
138	3569	100	100			68167	defined $quo && $quo =~ m/^[ \t]/ and return 1002;
139	3327	100	100			75399	defined $esc && $esc =~ m/^[ \t]/ and return 1002;
140
141	3037					9374	return 0;
142							} # _unhealty_whitespace
143
144							sub _check_sanity {
145	12428			12428		20030	my $self = shift;
146
147	12428					23496	my $eol = $self->{'eol'};
148	12428					21635	my $sep = $self->{'sep'};
149	12428	100	100			48530	defined $sep && length ($sep) or $sep = $self->{'sep_char'};
150	12428					20744	my $quo = $self->{'quote'};
151	12428	100	100			40755	defined $quo && length ($quo) or $quo = $self->{'quote_char'};
152	12428					23591	my $esc = $self->{'escape_char'};
153
154							# use DP;::diag ("SEP: '", DPeek ($sep),
155							# "', QUO: '", DPeek ($quo),
156							# "', ESC: '", DPeek ($esc),"'");
157
158							# sep_char should not be undefined
159	12428	100				33562	$sep ne "" or return 1008;
160	12426	100				28359	length ($sep) > 16 and return 1006;
161	12425	100				42665	$sep =~ m/[\r\n]/ and return 1003;
162
163	12419	100				24998	if (defined $quo) {
164	12409	100				87299	$quo eq $sep and return 1001;
165	12181	100				26538	length ($quo) > 16 and return 1007;
166	12180	100				29343	$quo =~ m/[\r\n]/ and return 1003;
167							}
168	12184	100				23751	if (defined $esc) {
169	12168	100				67722	$esc eq $sep and return 1001;
170	12000	100				29671	$esc =~ m/[\r\n]/ and return 1003;
171							}
172	12010	100				23053	if (defined $eol) {
173	12005	100				23198	length ($eol) > 16 and return 1005;
174							}
175
176	12009					33864	return _unhealthy_whitespace ($self, $self->{'allow_whitespace'});
177							} # _check_sanity
178
179							sub known_attributes {
180	3			3	1	777	sort grep !m/^_/ => "sep", "quote", keys %def_attr;
181							} # known_attributes
182
183							sub new {
184	1004			1004	1	72071138	$last_err = Text::CSV_XS->SetDiag (1000,
185							"usage: my \$csv = Text::CSV_XS->new ([{ option => value, ... }]);");
186
187	1004					2541	my $proto = shift;
188	1004	100	100			6178	my $class = ref $proto \|\| $proto or return;
189	1003	100	100			5908	@_ > 0 && ref $_[0] ne "HASH" and return;
190	995		100			2832	my $attr = shift \|\| {};
191							my %attr = map {
192	2756	100				11666	my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
193	2756	100				6666	exists $attr_alias{$k} and $k = $attr_alias{$k};
194	2756					8807	($k => $attr->{$_});
195	995					1748	} keys %{$attr};
	995					3728
196
197	995					3410	my $sep_aliased = 0;
198	995	100				2775	if (exists $attr{'sep'}) {
199	10					35	$attr{'sep_char'} = delete $attr{'sep'};
200	10					28	$sep_aliased = 1;
201							}
202	995					1714	my $quote_aliased = 0;
203	995	100				2476	if (exists $attr{'quote'}) {
204	25					80	$attr{'quote_char'} = delete $attr{'quote'};
205	25					44	$quote_aliased = 1;
206							}
207							exists $attr{'formula_handling'} and
208	995	100				2354	$attr{'formula'} = delete $attr{'formula_handling'};
209	995					1874	my $attr_formula = delete $attr{'formula'};
210
211	995					2724	for (keys %attr) {
212	2720	100	100			10748	if (m/^[a-z]/ && exists $def_attr{$_}) {
213							# uncoverable condition false
214	2713	100	100			9899	defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
215	2713					4942	next;
216							}
217							# croak?
218	7					40	$last_err = Text::CSV_XS->SetDiag (1000, "INI - Unknown attribute '$_'");
219	7	100				27	$attr{'auto_diag'} and error_diag ();
220	7					48	return;
221							}
222	988	100				2581	if ($sep_aliased) {
223	10					59	my @b = unpack "U0C*", $attr{'sep_char'};
224	10	100				40	if (@b > 1) {
225	6					19	$attr{'sep'} = $attr{'sep_char'};
226	6					18	$attr{'sep_char'} = "\0";
227							}
228							else {
229	4					13	$attr{'sep'} = undef;
230							}
231							}
232	988	100	100			2608	if ($quote_aliased and defined $attr{'quote_char'}) {
233	21					84	my @b = unpack "U0C*", $attr{'quote_char'};
234	21	100				55	if (@b > 1) {
235	7					21	$attr{'quote'} = $attr{'quote_char'};
236	7					21	$attr{'quote_char'} = "\0";
237							}
238							else {
239	14					31	$attr{'quote'} = undef;
240							}
241							}
242
243	988					24669	my $self = { %def_attr, %attr };
244	988	100				5100	if (my $ec = _check_sanity ($self)) {
245	35					142	$last_err = Text::CSV_XS->SetDiag ($ec);
246	35	100				82	$attr{'auto_diag'} and error_diag ();
247	35					291	return;
248							}
249	953	100	100			3618	if (defined $self->{'callbacks'} && ref $self->{'callbacks'} ne "HASH") {
250	6					1069	carp ("The 'callbacks' attribute is set but is not a hash: ignored\n");
251	6					86	$self->{'callbacks'} = undef;
252							}
253
254	953					4771	$last_err = Text::CSV_XS->SetDiag (0);
255	953	100	100			3451	defined $\ && !exists $attr{'eol'} and $self->{'eol'} = $\;
256	953					2210	bless $self, $class;
257	953	100				2632	defined $self->{'types'} and $self->types ($self->{'types'});
258	953	50				3708	defined $self->{'skip_empty_rows'} and $self->{'skip_empty_rows'} = _supported_skip_empty_rows ($self, $self->{'skip_empty_rows'});
259	953	100				2420	defined $attr_formula and $self->{'formula'} = _supported_formula ($self, $attr_formula);
260	952					7356	$self;
261							} # new
262
263							# Keep in sync with XS!
264							my %_cache_id = ( # Only expose what is accessed from within PM
265							'quote_char' => 0,
266							'escape_char' => 1,
267							'sep_char' => 2,
268							'always_quote' => 4,
269							'quote_empty' => 5,
270							'quote_space' => 6,
271							'quote_binary' => 7,
272							'allow_loose_quotes' => 8,
273							'allow_loose_escapes' => 9,
274							'allow_unquoted_escape' => 10,
275							'allow_whitespace' => 11,
276							'blank_is_undef' => 12,
277							'empty_is_undef' => 13,
278							'auto_diag' => 14,
279							'diag_verbose' => 15,
280							'escape_null' => 16,
281							'formula' => 18,
282							'decode_utf8' => 21,
283							'verbatim' => 23,
284							'strict_eol' => 24,
285							'strict' => 28,
286							'skip_empty_rows' => 29,
287							'binary' => 30,
288							'keep_meta_info' => 31,
289							'_has_hooks' => 32,
290							'_has_ahead' => 33,
291							'_is_bound' => 44,
292							'eol' => 100,
293							'sep' => 116,
294							'quote' => 132,
295							'undef_str' => 148,
296							'comment_str' => 156,
297							'types' => 92,
298							);
299
300							# A `character'
301							sub _set_attr_C {
302	11108			11108		31027	my ($self, $name, $val, $ec) = @_;
303	11108	100				50264	defined $val and utf8::decode ($val);
304	11108					31667	$self->{$name} = $val;
305	11108	100				27351	$ec = _check_sanity ($self) and croak ($self->SetDiag ($ec));
306	10198					50313	$self->_cache_set ($_cache_id{$name}, $val);
307							} # _set_attr_C
308
309							# A flag
310							sub _set_attr_X {
311	5643			5643		15964	my ($self, $name, $val) = @_;
312	5643	100				13887	defined $val or $val = 0;
313	5643					14364	$self->{$name} = $val;
314	5643					35192	$self->_cache_set ($_cache_id{$name}, 0 + $val);
315							} # _set_attr_X
316
317							# A number
318							sub _set_attr_N {
319	68			68		151	my ($self, $name, $val) = @_;
320	68					131	$self->{$name} = $val;
321	68					383	$self->_cache_set ($_cache_id{$name}, 0 + $val);
322							} # _set_attr_N
323
324							# Accessor methods.
325							# It is unwise to change them halfway through a single file!
326							sub quote_char {
327	4836			4836	1	1095536	my $self = shift;
328	4836	100				14291	if (@_) {
329	3601					10428	$self->_set_attr_C ("quote_char", shift);
330	3374					10708	$self->_cache_set ($_cache_id{'quote'}, "");
331							}
332	4609					18990	$self->{'quote_char'};
333							} # quote_char
334
335							sub quote {
336	20			20	1	50	my $self = shift;
337	20	100				72	if (@_) {
338	11					19	my $quote = shift;
339	11	100				30	defined $quote or $quote = "";
340	11					38	utf8::decode ($quote);
341	11					47	my @b = unpack "U0C*", $quote;
342	11	100				32	if (@b > 1) {
343	5	100				141	@b > 16 and croak ($self->SetDiag (1007));
344	4					13	$self->quote_char ("\0");
345							}
346							else {
347	6					18	$self->quote_char ($quote);
348	6					10	$quote = "";
349							}
350	10					26	$self->{'quote'} = $quote;
351
352	10					20	my $ec = _check_sanity ($self);
353	10	100				148	$ec and croak ($self->SetDiag ($ec));
354
355	9					26	$self->_cache_set ($_cache_id{'quote'}, $quote);
356							}
357	18					40	my $quote = $self->{'quote'};
358	18	100	100			220	defined $quote && length ($quote) ? $quote : $self->{'quote_char'};
359							} # quote
360
361							sub escape_char {
362	4826			4826	1	1117206	my $self = shift;
363	4826	100				15787	if (@_) {
364	3595					7700	my $ec = shift;
365	3595					11114	$self->_set_attr_C ("escape_char", $ec);
366	3480	100				8618	$ec or $self->_set_attr_X ("escape_null", 0);
367							}
368	4711					25297	$self->{'escape_char'};
369							} # escape_char
370
371							sub sep_char {
372	5155			5155	1	1100887	my $self = shift;
373	5155	100				17080	if (@_) {
374	3912					12297	$self->_set_attr_C ("sep_char", shift);
375	3344					11473	$self->_cache_set ($_cache_id{'sep'}, "");
376							}
377	4587					21092	$self->{'sep_char'};
378							} # sep_char
379
380							sub sep {
381	359			359	1	7348	my $self = shift;
382	359	100				881	if (@_) {
383	326					696	my $sep = shift;
384	326	100				661	defined $sep or $sep = "";
385	326					1242	utf8::decode ($sep);
386	326					1354	my @b = unpack "U0C*", $sep;
387	326	100				916	if (@b > 1) {
388	13	100				164	@b > 16 and croak ($self->SetDiag (1006));
389	12					41	$self->sep_char ("\0");
390							}
391							else {
392	313					953	$self->sep_char ($sep);
393	310					497	$sep = "";
394							}
395	322					742	$self->{'sep'} = $sep;
396
397	322					774	my $ec = _check_sanity ($self);
398	322	100				816	$ec and croak ($self->SetDiag ($ec));
399
400	321					1141	$self->_cache_set ($_cache_id{'sep'}, $sep);
401							}
402	354					722	my $sep = $self->{'sep'};
403	354	100	100			1658	defined $sep && length ($sep) ? $sep : $self->{'sep_char'};
404							} # sep
405
406							sub eol {
407	280			280	1	6829	my $self = shift;
408	280	100				660	if (@_) {
409	227					385	my $eol = shift;
410	227	100				552	defined $eol or $eol = ""; # Also reset strict_eol?
411	227	100				681	length ($eol) > 16 and croak ($self->SetDiag (1005));
412	226					425	$self->{'eol'} = $eol;
413	226					790	$self->_cache_set ($_cache_id{'eol'}, $eol);
414							}
415	279					1621	$self->{'eol'};
416							} # eol
417
418							sub eol_type {
419	32			32	1	65	my $self = shift;
420	32					169	$self->_cache_get_eolt;
421							} # eol_type
422
423							sub always_quote {
424	3032			3032	1	1143717	my $self = shift;
425	3032	100				12556	@_ and $self->_set_attr_X ("always_quote", shift);
426	3032					10907	$self->{'always_quote'};
427							} # always_quote
428
429							sub quote_space {
430	10			10	1	23	my $self = shift;
431	10	100				42	@_ and $self->_set_attr_X ("quote_space", shift);
432	10					38	$self->{'quote_space'};
433							} # quote_space
434
435							sub quote_empty {
436	5			5	1	13	my $self = shift;
437	5	100				24	@_ and $self->_set_attr_X ("quote_empty", shift);
438	5					29	$self->{'quote_empty'};
439							} # quote_empty
440
441							sub escape_null {
442	6			6	1	11	my $self = shift;
443	6	100				28	@_ and $self->_set_attr_X ("escape_null", shift);
444	6					27	$self->{'escape_null'};
445							} # escape_null
446	3			3	1	14	sub quote_null { goto &escape_null; }
447
448							sub quote_binary {
449	7			7	1	23	my $self = shift;
450	7	100				38	@_ and $self->_set_attr_X ("quote_binary", shift);
451	7					50	$self->{'quote_binary'};
452							} # quote_binary
453
454							sub binary {
455	21			21	1	94566	my $self = shift;
456	21	100				111	@_ and $self->_set_attr_X ("binary", shift);
457	21					73	$self->{'binary'};
458							} # binary
459
460							sub strict {
461	2			2	1	5	my $self = shift;
462	2	100				22	@_ and $self->_set_attr_X ("strict", shift);
463	2					9	$self->{'strict'};
464							} # strict
465
466							sub strict_eol {
467	2			2	1	4	my $self = shift;
468	2	100				9	@_ and $self->_set_attr_X ("strict_eol", shift);
469	2					8	$self->{'strict_eol'};
470							} # strict_eol
471
472							sub _supported_skip_empty_rows {
473	974			974		1984	my ($self, $f) = @_;
474	974	100				2187	defined $f or return 0;
475	973	100	66			4284	if ($self && $f && ref $f && ref $f eq "CODE") {
			100
			66
476	5					9	$self->{'_EMPTROW_CB'} = $f;
477	5					12	return 6;
478							}
479							$f =~ m/^(?: 0 \| undef )$/xi ? 0 :
480							$f =~ m/^(?: 1 \| skip )$/xi ? 1 :
481							$f =~ m/^(?: 2 \| eof \| stop )$/xi ? 2 :
482							$f =~ m/^(?: 3 \| die )$/xi ? 3 :
483							$f =~ m/^(?: 4 \| croak )$/xi ? 4 :
484							$f =~ m/^(?: 5 \| error )$/xi ? 5 :
485	968	0				5338	$f =~ m/^(?: 6 \| cb )$/xi ? 6 : do {
		50
		100
		100
		100
		100
		100
486	0		0			0	$self \|\|= "Text::CSV_XS";
487	0					0	croak ($self->_SetDiagInfo (1500, "skip_empty_rows '$f' is not supported"));
488							};
489							} # _supported_skip_empty_rows
490
491							sub skip_empty_rows {
492	23			23	1	51	my $self = shift;
493	23	100				86	@_ and $self->_set_attr_N ("skip_empty_rows", _supported_skip_empty_rows ($self, shift));
494	23					43	my $ser = $self->{'skip_empty_rows'};
495	23	100				63	$ser == 6 or $self->{'_EMPTROW_CB'} = undef;
496							$ser <= 1 ? $ser : $ser == 2 ? "eof" : $ser == 3 ? "die" :
497							$ser == 4 ? "croak" : $ser == 5 ? "error" :
498	23	100				126	$self->{'_EMPTROW_CB'};
		100
		100
		100
		100
499							} # skip_empty_rows
500
501							sub _SetDiagInfo {
502	17			17		35	my ($self, $err, $msg) = @_;
503	17					146	$self->SetDiag ($err);
504	17					52	my $em = $self->error_diag ();
505	17	50				75	$em =~ s/^\d+$// and $msg =~ s/^/# /;
506	17	50				66	my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
507	17					2613	join $sep => grep m/\S\S\S/ => $em, $msg;
508							} # _SetDiagInfo
509
510							sub _supported_formula {
511	103			103		156	my ($self, $f) = @_;
512	103	100				177	defined $f or return 5;
513	102	100	66			455	if ($self && $f && ref $f && ref $f eq "CODE") {
			100
			100
514	6					11	$self->{'_FORMULA_CB'} = $f;
515	6					16	return 6;
516							}
517							$f =~ m/^(?: 0 \| none )$/xi ? 0 :
518							$f =~ m/^(?: 1 \| die )$/xi ? 1 :
519							$f =~ m/^(?: 2 \| croak )$/xi ? 2 :
520							$f =~ m/^(?: 3 \| diag )$/xi ? 3 :
521							$f =~ m/^(?: 4 \| empty \| )$/xi ? 4 :
522							$f =~ m/^(?: 5 \| undef )$/xi ? 5 :
523	96	100				791	$f =~ m/^(?: 6 \| cb )$/xi ? 6 : do {
		100
		100
		100
		100
		100
		100
524	7		50			15	$self \|\|= "Text::CSV_XS";
525	7					20	croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported"));
526							};
527							} # _supported_formula
528
529							sub formula {
530	44			44	1	2763	my $self = shift;
531	44	100				120	@_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift));
532	38	100				97	$self->{'formula'} == 6 or $self->{'_FORMULA_CB'} = undef;
533	38					132	[qw( none die croak diag empty undef cb )]->[_supported_formula ($self, $self->{'formula'})];
534							} # formula
535
536							sub formula_handling {
537	7			7	1	11	my $self = shift;
538	7					16	$self->formula (@_);
539							} # formula_handling
540
541							sub decode_utf8 {
542	2			2	1	5	my $self = shift;
543	2	100				9	@_ and $self->_set_attr_X ("decode_utf8", shift);
544	2					9	$self->{'decode_utf8'};
545							} # decode_utf8
546
547							sub keep_meta_info {
548	12			12	1	843	my $self = shift;
549	12	100				46	if (@_) {
550	11					18	my $v = shift;
551	11	100	100			66	!defined $v \|\| $v eq "" and $v = 0;
552	11	100				52	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
		100
553	11					37	$self->_set_attr_X ("keep_meta_info", $v);
554							}
555	12					62	$self->{'keep_meta_info'};
556							} # keep_meta_info
557
558							sub allow_loose_quotes {
559	12			12	1	26	my $self = shift;
560	12	100				60	@_ and $self->_set_attr_X ("allow_loose_quotes", shift);
561	12					35	$self->{'allow_loose_quotes'};
562							} # allow_loose_quotes
563
564							sub allow_loose_escapes {
565	12			12	1	1874	my $self = shift;
566	12	100				84	@_ and $self->_set_attr_X ("allow_loose_escapes", shift);
567	12					37	$self->{'allow_loose_escapes'};
568							} # allow_loose_escapes
569
570							sub allow_whitespace {
571	4954			4954	1	3329645	my $self = shift;
572	4954	100				18660	if (@_) {
573	3725					9356	my $aw = shift;
574	3725	100				11897	_unhealthy_whitespace ($self, $aw) and
575							croak ($self->SetDiag (1002));
576	3721					14301	$self->_set_attr_X ("allow_whitespace", $aw);
577							}
578	4950					20564	$self->{'allow_whitespace'};
579							} # allow_whitespace
580
581							sub allow_unquoted_escape {
582	3			3	1	20	my $self = shift;
583	3	100				18	@_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
584	3					9	$self->{'allow_unquoted_escape'};
585							} # allow_unquoted_escape
586
587							sub blank_is_undef {
588	2			2	1	5	my $self = shift;
589	2	100				9	@_ and $self->_set_attr_X ("blank_is_undef", shift);
590	2					10	$self->{'blank_is_undef'};
591							} # blank_is_undef
592
593							sub empty_is_undef {
594	2			2	1	4	my $self = shift;
595	2	100				10	@_ and $self->_set_attr_X ("empty_is_undef", shift);
596	2					9	$self->{'empty_is_undef'};
597							} # empty_is_undef
598
599							sub verbatim {
600	9			9	1	16311	my $self = shift;
601	9	100				72	@_ and $self->_set_attr_X ("verbatim", shift);
602	9					32	$self->{'verbatim'};
603							} # verbatim
604
605							sub undef_str {
606	12			12	1	3788	my $self = shift;
607	12	100				31	if (@_) {
608	11					20	my $v = shift;
609	11	100				38	$self->{'undef_str'} = defined $v ? "$v" : undef;
610	11					60	$self->_cache_set ($_cache_id{'undef_str'}, $self->{'undef_str'});
611							}
612	12					40	$self->{'undef_str'};
613							} # undef_str
614
615							sub comment_str {
616	15			15	1	82	my $self = shift;
617	15	100				48	if (@_) {
618	14					28	my $v = shift;
619	14	100				53	$self->{'comment_str'} = defined $v ? "$v" : undef;
620	14					99	$self->_cache_set ($_cache_id{'comment_str'}, $self->{'comment_str'});
621							}
622	15					50	$self->{'comment_str'};
623							} # comment_str
624
625							sub auto_diag {
626	12			12	1	482	my $self = shift;
627	12	100				44	if (@_) {
628	9					17	my $v = shift;
629	9	100	100			54	!defined $v \|\| $v eq "" and $v = 0;
630	9	100				43	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
		100
631	9					35	$self->_set_attr_X ("auto_diag", $v);
632							}
633	12					58	$self->{'auto_diag'};
634							} # auto_diag
635
636							sub diag_verbose {
637	10			10	1	896	my $self = shift;
638	10	100				32	if (@_) {
639	8					13	my $v = shift;
640	8	100	100			109	!defined $v \|\| $v eq "" and $v = 0;
641	8	100				39	$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
		100
642	8					26	$self->_set_attr_X ("diag_verbose", $v);
643							}
644	10					47	$self->{'diag_verbose'};
645							} # diag_verbose
646
647							# status
648							#
649							# object method returning the success or failure of the most recent
650							# combine () or parse (). there are no side-effects.
651
652							sub status {
653	5			5	1	13	my $self = shift;
654	5					22	return $self->{'_STATUS'};
655							} # status
656
657							sub eof {
658	33			33	1	15706	my $self = shift;
659	33					137	return $self->{'_EOF'};
660							} # eof
661
662							sub types {
663	7			7	1	2052	my $self = shift;
664	7	100				23	if (@_) {
665	2	100				7	if (my $types = shift) {
666	1					2	$self->{'_types'} = join "", map { chr } @{$types};
	3					23
	1					3
667	1					3	$self->{'types'} = $types;
668	1					7	$self->_cache_set ($_cache_id{'types'}, $self->{'_types'});
669							}
670							else {
671	1					3	delete $self->{'types'};
672	1					3	delete $self->{'_types'};
673	1					6	$self->_cache_set ($_cache_id{'types'}, undef);
674	1					4	undef;
675							}
676							}
677							else {
678	5					24	$self->{'types'};
679							}
680							} # types
681
682							sub callbacks {
683	74			74	1	63857	my $self = shift;
684	74	100				237	if (@_) {
685	44					99	my $cb;
686	44					69	my $hf = 0x00;
687	44	100				119	if (defined $_[0]) {
		100
688	42	100				3028	grep { !defined } @_ and croak ($self->SetDiag (1004));
	75					612
689	40	100	100			1151	$cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
		100
690							: @_ % 2 == 0 ? { @_ }
691							: croak ($self->SetDiag (1004));
692	35					70	foreach my $cbk (keys %{$cb}) {
	35					117
693							# A key cannot be a ref. That would be stored as the *string
694							# 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
695	37	100	100			2660	$cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
696							croak ($self->SetDiag (1004));
697							}
698	21	100				133	exists $cb->{'error'} and $hf \|= 0x01;
699	21	100				63	exists $cb->{'after_parse'} and $hf \|= 0x02;
700	21	100				54	exists $cb->{'before_print'} and $hf \|= 0x04;
701							}
702							elsif (@_ > 1) {
703							# (undef, whatever)
704	1					169	croak ($self->SetDiag (1004));
705							}
706	22					116	$self->_set_attr_X ("_has_hooks", $hf);
707	22					69	$self->{'callbacks'} = $cb;
708							}
709	52					205	$self->{'callbacks'};
710							} # callbacks
711
712							# error_diag
713							#
714							# If (and only if) an error occurred, this function returns a code that
715							# indicates the reason of failure
716
717							sub error_diag {
718	1820			1820	1	84425	my $self = shift;
719	1820					6662	my @diag = (0 + $last_err, $last_err, 0, 0, 0, 0);
720
721							# Docs state to NEVER use UNIVERSAL::isa, because it will never call an
722							# overridden isa method in any class. Well, that is exacly what I want here
723	1820	100	100			34874	if ($self && ref $self and # Not a class method or direct call
			100
			100
724							UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{'_ERROR_DIAG'}) {
725	1641					3515	$diag[0] = 0 + $self->{'_ERROR_DIAG'};
726	1641					6866	$diag[1] = $self->{'_ERROR_DIAG'};
727	1641	100				4566	$diag[2] = 1 + $self->{'_ERROR_POS'} if exists $self->{'_ERROR_POS'};
728	1641					2836	$diag[3] = $self->{'_RECNO'};
729	1641	100				4016	$diag[4] = $self->{'_ERROR_FLD'} if exists $self->{'_ERROR_FLD'};
730	1641	100	66			6928	$diag[5] = $self->{'_ERROR_SRC'} if exists $self->{'_ERROR_SRC'} && $self->{'diag_verbose'};
731
732							$diag[0] && $self->{'callbacks'} && $self->{'callbacks'}{'error'} and
733	1641	100	100			10964	return $self->{'callbacks'}{'error'}->(@diag);
			100
734							}
735
736	1810					3339	my $context = wantarray;
737	1810	100				4420	unless (defined $context) { # Void context, auto-diag
738	343	100	100			1325	if ($diag[0] && $diag[0] != 2012) {
739	36					173	my $msg = "# CSV_XS ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
740	36	100				286	$diag[4] and $msg =~ s/$/ field $diag[4]/;
741	36	100				159	$diag[5] and $msg =~ s/$/ (XS#$diag[5])/;
742
743	36	100	100			167	unless ($self && ref $self) { # auto_diag
744							# called without args in void context
745	4					66	warn $msg;
746	4					54	return;
747							}
748
749							$self->{'diag_verbose'} && $self->{'_ERROR_INPUT'} and
750	32	50	66			168	$msg .= $self->{'_ERROR_INPUT'}."\n".
751							(" " x ($diag[2] - 1))."^\n";
752
753	32					72	my $lvl = $self->{'auto_diag'};
754	32	100				124	if ($lvl < 2) {
755	29					122	my @c = caller (2);
756	29	50	66			166	if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
			33
757	0					0	my $hints = $c[10];
758							(exists $hints->{'autodie'} && $hints->{'autodie'} or
759							exists $hints->{'guard Fatal'} &&
760	0	0	0			0	!exists $hints->{'no Fatal'}) and
			0
			0
761							$lvl++;
762							# Future releases of autodie will probably set $^H{autodie}
763							# to "autodie @args", like "autodie :all" or "autodie open"
764							# so we can/should check for "open" or "new"
765							}
766							}
767	32	100				346	$lvl > 1 ? die $msg : warn $msg;
768							}
769	336					3526	return;
770							}
771	1467	100				8592	return $context ? @diag : $diag[1];
772							} # error_diag
773
774							sub record_number {
775	14			14	1	4588	my $self = shift;
776	14					59	return $self->{'_RECNO'};
777							} # record_number
778
779							# string
780							#
781							# object method returning the result of the most recent combine () or the
782							# input to the most recent parse (), whichever is more recent. there are
783							# no side-effects.
784
785							sub string {
786	1398			1398	1	496966	my $self = shift;
787	1398	100				6091	return ref $self->{'_STRING'} ? ${$self->{'_STRING'}} : undef;
	1397					8039
788							} # string
789
790							# fields
791							#
792							# object method returning the result of the most recent parse () or the
793							# input to the most recent combine (), whichever is more recent. there
794							# are no side-effects.
795
796							sub fields {
797	1603			1603	1	29060	my $self = shift;
798	1603	100				6203	return ref $self->{'_FIELDS'} ? @{$self->{'_FIELDS'}} : undef;
	1602					12376
799							} # fields
800
801							# meta_info
802							#
803							# object method returning the result of the most recent parse () or the
804							# input to the most recent combine (), whichever is more recent. there
805							# are no side-effects. meta_info () returns (if available) some of the
806							# field's properties
807
808							sub meta_info {
809	21			21	1	897	my $self = shift;
810	21	100				102	return ref $self->{'_FFLAGS'} ? @{$self->{'_FFLAGS'}} : undef;
	16					86
811							} # meta_info
812
813							sub is_quoted {
814	12			12	1	2582	my ($self, $idx) = @_;
815							ref $self->{'_FFLAGS'} &&
816	12	100	100			114	$idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return;
	8		100			40
817	7	100				31	$self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_QUOTED () ? 1 : 0;
818							} # is_quoted
819
820							sub is_binary {
821	11			11	1	1736	my ($self, $idx) = @_;
822							ref $self->{'_FFLAGS'} &&
823	11	100	100			109	$idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return;
	9		100			61
824	8	100				25	$self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_BINARY () ? 1 : 0;
825							} # is_binary
826
827							sub is_missing {
828	19			19	1	41	my ($self, $idx) = @_;
829	19	100	100			119	$idx < 0 \|\| !ref $self->{'_FFLAGS'} and return;
830	11	100				16	$idx >= @{$self->{'_FFLAGS'}} and return 1;
	11					31
831	10	100				25	$self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_MISSING () ? 1 : 0;
832							} # is_missing
833
834							# combine
835							#
836							# Object method returning success or failure. The given arguments are
837							# combined into a single comma-separated value. Failure can be the
838							# result of no arguments or an argument containing an invalid character.
839							# side-effects include:
840							# setting status ()
841							# setting fields ()
842							# setting string ()
843							# setting error_input ()
844
845							sub combine {
846	1397			1397	1	1130292	my $self = shift;
847	1397					4108	my $str = "";
848	1397					11795	$self->{'_FIELDS'} = \@_;
849	1397		100			44925	$self->{'_STATUS'} = (@_ > 0) && $self->Combine (\$str, \@_, 0);
850	1393					5602	$self->{'_STRING'} = \$str;
851	1393					5933	$self->{'_STATUS'};
852							} # combine
853
854							# parse
855							#
856							# Object method returning success or failure. The given argument is
857							# expected to be a valid comma-separated value. Failure can be the
858							# result of no arguments or an argument containing an invalid sequence
859							# of characters. Side-effects include:
860							# setting status ()
861							# setting fields ()
862							# setting meta_info ()
863							# setting string ()
864							# setting error_input ()
865
866							sub parse {
867	1947			1947	1	140773	my ($self, $str) = @_;
868
869	1947	100				8459	ref $str and croak ($self->SetDiag (1500));
870
871	1943					3796	my $fields = [];
872	1943					3529	my $fflags = [];
873	1943					5420	$self->{'_STRING'} = \$str;
874	1943	100	100			59658	if (defined $str && $self->Parse ($str, $fields, $fflags)) {
875	1729					6264	$self->{'_FIELDS'} = $fields;
876	1729					4210	$self->{'_FFLAGS'} = $fflags;
877	1729					3990	$self->{'_STATUS'} = 1;
878							}
879							else {
880	211					560	$self->{'_FIELDS'} = undef;
881	211					406	$self->{'_FFLAGS'} = undef;
882	211					463	$self->{'_STATUS'} = 0;
883							}
884	1940					10269	$self->{'_STATUS'};
885							} # parse
886
887							sub column_names {
888	1024			1024	1	75564	my ($self, @keys) = @_;
889							@keys or
890	1024	100				3192	return defined $self->{'_COLUMN_NAMES'} ? @{$self->{'_COLUMN_NAMES'}} : ();
	293	100				1521
891
892							@keys == 1 && ! defined $keys[0] and
893	688	100	100			2788	return $self->{'_COLUMN_NAMES'} = undef;
894
895	550	100	100			2032	if (@keys == 1 && ref $keys[0] eq "ARRAY") {
		100
896	226					391	@keys = @{$keys[0]};
	226					631
897							}
898	712	100				2503	elsif (join "", map { defined $_ ? ref $_ : "" } @keys) {
899	5					1013	croak ($self->SetDiag (3001));
900							}
901
902	545	100	100			1628	$self->{'_BOUND_COLUMNS'} && @keys != @{$self->{'_BOUND_COLUMNS'}} and
	2					178
903							croak ($self->SetDiag (3003));
904
905	544	100				993	$self->{'_COLUMN_NAMES'} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @keys ];
	1259					3558
906	544					965	@{$self->{'_COLUMN_NAMES'}};
	544					1610
907							} # column_names
908
909							sub header {
910	333			333	1	54299	my ($self, $fh, @args) = @_;
911
912	333	100				1031	$fh or croak ($self->SetDiag (1014));
913
914	332					727	my (@seps, %args);
915	332					855	for (@args) {
916	225	100				711	if (ref $_ eq "ARRAY") {
917	18					33	push @seps, @{$_};
	18					58
918	18					40	next;
919							}
920	207	100				498	if (ref $_ eq "HASH") {
921	206					296	%args = %{$_};
	206					719
922	206					625	next;
923							}
924	1					149	croak ('usage: $csv->header ($fh, [ seps ], { options })');
925							}
926
927							defined $args{'munge'} && !defined $args{'munge_column_names'} and
928	331	100	66			1324	$args{'munge_column_names'} = $args{'munge'}; # munge as alias
929	331	100				1153	defined $args{'detect_bom'} or $args{'detect_bom'} = 1;
930	331	100				1069	defined $args{'set_column_names'} or $args{'set_column_names'} = 1;
931	331	100				1673	defined $args{'munge_column_names'} or $args{'munge_column_names'} = "lc";
932
933							# Reset any previous leftovers
934	331					753	$self->{'_RECNO'} = 0;
935	331					890	$self->{'_AHEAD'} = undef;
936	331	100				910	$self->{'_COLUMN_NAMES'} = undef if $args{'set_column_names'};
937	331	100				853	$self->{'_BOUND_COLUMNS'} = undef if $args{'set_column_names'};
938
939	331	100				786	if (defined $args{'sep_set'}) {
940	27	100				110	ref $args{'sep_set'} eq "ARRAY" or
941							croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref"));
942	22					41	@seps = @{$args{'sep_set'}};
	22					66
943							}
944
945	326	50				1186	$^O eq "MSWin32" and binmode $fh;
946	326					9251	my $hdr = <$fh>;
947							# check if $hdr can be empty here, I don't think so
948	326	100	66			2490	defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
949
950	324					633	my %sep;
951	324	100				1015	@seps or @seps = (",", ";");
952	324					767	foreach my $sep (@seps) {
953	732	100				2242	index ($hdr, $sep) >= 0 and $sep{$sep}++;
954							}
955
956	324	100				990	keys %sep >= 2 and croak ($self->SetDiag (1011));
957
958	320					1437	$self->sep (keys %sep);
959	320					787	my $enc = "";
960	320	100				908	if ($args{'detect_bom'}) { # UTF-7 is not supported
961	319	100				3579	if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" }
	24	100				49
		100
		100
		100
		100
		100
		100
		100
		100
		100
962	24					53	elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" }
963	25					49	elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" }
964	24					76	elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" }
965	48					94	elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" }
966	1					3	elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" }
967	1					4	elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
968	1					5	elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" }
969	1					3	elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" }
970	1					4	elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" }
971	36					75	elsif ($hdr =~ s/^\x{feff}//) { $enc = "" }
972
973	319	100				970	$self->{'ENCODING'} = $enc ? uc $enc : undef;
974
975	319	100				1823	$hdr eq "" and croak ($self->SetDiag (1010));
976
977	313	100				754	if ($enc) {
978	144	50	33			482	$ebcdic && $enc eq "utf-ebcdic" and $enc = "";
979	144	100				571	if ($enc =~ m/([13]).le$/) {
980	48					234	my $l = 0 + $1;
981	48					78	my $x;
982	48					154	$hdr .= "\0" x $l;
983	48					200	read $fh, $x, $l;
984							}
985	144	50				346	if ($enc) {
986	144	100				381	if ($enc ne "utf-8") {
987	96					766	require Encode;
988	96					667	$hdr = Encode::decode ($enc, $hdr);
989							}
990	144			2		6914	binmode $fh, ":encoding($enc)";
	2					1868
	2					38
	2					12
991							}
992							}
993							}
994
995	314					10045	my ($ahead, $eol);
996	314	100	66			1454	if ($hdr and $hdr =~ s/\Asep=(\S)([\r\n]+)//i) { # Also look in xs:Parse
997	1					4	$self->sep ($1);
998	1	50				6	length $hdr or $hdr = <$fh>;
999							}
1000	314	100				2674	if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
1001	142					386	$eol = $2;
1002	142					418	$ahead = $3;
1003							}
1004
1005	314					715	my $hr = \$hdr; # Will cause croak on perl-5.6.x
1006	314	50				3605	open my $h, "<", $hr or croak ($self->SetDiag (1010));
1007
1008	314	100				8537	my $row = $self->getline ($h) or croak ();
1009	312					1061	close $h;
1010
1011	312	100				963	if ( $args{'munge_column_names'} eq "lc") {
		100
		100
1012	293					447	$_ = lc for @{$row};
	293					1294
1013							}
1014							elsif ($args{'munge_column_names'} eq "uc") {
1015	7					12	$_ = uc for @{$row};
	7					39
1016							}
1017							elsif ($args{'munge_column_names'} eq "db") {
1018	3					6	for (@{$row}) {
	3					12
1019	7					17	s/\W+/_/g;
1020	7					21	s/^_+//;
1021	7					18	$_ = lc;
1022							}
1023							}
1024
1025	312	100				716	if ($ahead) { # Must be after getline, which creates the cache
1026	142					588	$self->_cache_set ($_cache_id{'_has_ahead'}, 1);
1027	142					286	$self->{'_AHEAD'} = $ahead;
1028	142	100				706	$eol =~ m/^\r([^\n]\|\z)/ and $self->eol ($eol);
1029							}
1030
1031	312					433	my @hdr = @{$row};
	312					1012
1032							ref $args{'munge_column_names'} eq "CODE" and
1033	312	100				897	@hdr = map { $args{'munge_column_names'}->($_) } @hdr;
	4					21
1034							ref $args{'munge_column_names'} eq "HASH" and
1035	312	100				763	@hdr = map { $args{'munge_column_names'}->{$_} \|\| $_ } @hdr;
	3	100				17
1036	312					563	my %hdr; $hdr{$_}++ for @hdr;
	312					1273
1037	312	100				897	exists $hdr{''} and croak ($self->SetDiag (1012));
1038	310	100				794	unless (keys %hdr == @hdr) {
1039							croak ($self->_SetDiagInfo (1013, join ", " =>
1040	1					5	map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
	1					11
	2					8
1041							}
1042	309	100				1527	$args{'set_column_names'} and $self->column_names (@hdr);
1043	309	100				3645	wantarray ? @hdr : $self;
1044							} # header
1045
1046							sub bind_columns {
1047	36			36	1	10127	my ($self, @refs) = @_;
1048							@refs or
1049	36	100				141	return defined $self->{'_BOUND_COLUMNS'} ? @{$self->{'_BOUND_COLUMNS'}} : undef;
	2	100				13
1050
1051	32	100	100			155	if (@refs == 1 && ! defined $refs[0]) {
1052	5					14	$self->{'_COLUMN_NAMES'} = undef;
1053	5					26	return $self->{'_BOUND_COLUMNS'} = undef;
1054							}
1055
1056	27	100	100			109	$self->{'_COLUMN_NAMES'} && @refs != @{$self->{'_COLUMN_NAMES'}} and
	3					234
1057							croak ($self->SetDiag (3003));
1058
1059	26	100				349	join "", map { ref $_ eq "SCALAR" ? "" : "*" } @refs and
	74632	100				137466
1060							croak ($self->SetDiag (3004));
1061
1062	24					2581	$self->_set_attr_N ("_is_bound", scalar @refs);
1063	24					4095	$self->{'_BOUND_COLUMNS'} = [ @refs ];
1064	24					1141	@refs;
1065							} # bind_columns
1066
1067							sub getline_hr {
1068	131			131	1	27048	my ($self, @args, %hr) = @_;
1069	131	100				676	$self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002));
1070	130	100				2524	my $fr = $self->getline (@args) or return;
1071	127	100				539	if (ref $self->{'_FFLAGS'}) { # missing
1072							$self->{'_FFLAGS'}[$_] = CSV_FLAGS_IS_MISSING ()
1073	5	50				6	for (@{$fr} ? $#{$fr} + 1 : 0) .. $#{$self->{'_COLUMN_NAMES'}};
	5					11
	5					6
	5					18
1074	5					33	@{$fr} == 1 && (!defined $fr->[0] \|\| $fr->[0] eq "") and
1075	5	100	33			5	$self->{'_FFLAGS'}[0] \|\|= CSV_FLAGS_IS_MISSING ();
			66
			100
1076							}
1077	127					217	@hr{@{$self->{'_COLUMN_NAMES'}}} = @{$fr};
	127					704
	127					266
1078	127					752	\%hr;
1079							} # getline_hr
1080
1081							sub getline_hr_all {
1082	250			250	1	560	my ($self, @args) = @_;
1083	250	100				1165	$self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002));
1084	248					388	my @cn = @{$self->{'_COLUMN_NAMES'}};
	248					715
1085	248					476	[ map { my %h; @h{@cn} = @{$_}; \%h } @{$self->getline_all (@args)} ];
	375					678
	375					558
	375					1659
	375					2143
	248					7282
1086							} # getline_hr_all
1087
1088							sub say {
1089	34			34	1	3613	my ($self, $io, @f) = @_;
1090	34					92	my $eol = $self->eol ();
1091							# say ($fh, undef) does not propage actual undef to print ()
1092	34	100	66			443	my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f);
1093	34	100				446	unless (length $eol) {
1094	32		33			94	$eol = $self->eol_type () \|\| $\ \|\| $/;
1095	32					68	print $io $eol;
1096							}
1097	34					127	return $state;
1098							} # say
1099
1100							sub print_hr {
1101	3			3	1	738	my ($self, $io, $hr) = @_;
1102	3	100				267	$self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3009));
1103	2	100				138	ref $hr eq "HASH" or croak ($self->SetDiag (3010));
1104	1					3	$self->print ($io, [ map { $hr->{$_} } $self->column_names () ]);
	3					13
1105							} # print_hr
1106
1107							sub fragment {
1108	58			58	1	45836	my ($self, $io, $spec) = @_;
1109
1110	58					360	my $qd = qr{\s* [0-9]+ \s* }x; # digit
1111	58					151	my $qs = qr{\s* (?: [0-9]+ \| \* ) \s*}x; # digit or star
1112	58					767	my $qr = qr{$qd (?: - $qs )?}x; # range
1113	58					664	my $qc = qr{$qr (?: ; $qr )*}x; # list
1114	58	100	100			6418	defined $spec && $spec =~ m{^ \s*
1115							\x23 ? \s* # optional leading #
1116							( row \| col \| cell ) \s* =
1117							( $qc # for row and col
1118							\| $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
1119							(?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
1120							) \s* $}xi or croak ($self->SetDiag (2013));
1121	38					249	my ($type, $range) = (lc $1, $2);
1122
1123	38					153	my @h = $self->column_names ();
1124
1125	38					72	my @c;
1126	38	100				107	if ($type eq "cell") {
1127	21					42	my @spec;
1128							my $min_row;
1129	21					39	my $max_row = 0;
1130	21					137	for (split m/\s;\s/ => $range) {
1131	37	100				4652	my ($tlr, $tlc, $brr, $brc) = (m{
1132							^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
1133							(?: - \s* ([0-9]+ \| \) \s , \s* ([0-9]+ \| \) \s )?
1134							$}x) or croak ($self->SetDiag (2013));
1135	36	100				129	defined $brr or ($brr, $brc) = ($tlr, $tlc);
1136	36	100	100			2134	$tlr == 0 \|\| $tlc == 0 \|\|
			66
			100
			100
			66
			100
			100
1137							($brr ne "*" && ($brr == 0 \|\| $brr < $tlr)) \|\|
1138							($brc ne "*" && ($brc == 0 \|\| $brc < $tlc))
1139							and croak ($self->SetDiag (2013));
1140	28					48	$tlc--;
1141	28	100				104	$brc-- unless $brc eq "*";
1142	28	100				73	defined $min_row or $min_row = $tlr;
1143	28	100				63	$tlr < $min_row and $min_row = $tlr;
1144	28	100	100			102	$brr eq "*" \|\| $brr > $max_row and
1145							$max_row = $brr;
1146	28					111	push @spec, [ $tlr, $tlc, $brr, $brc ];
1147							}
1148	12					70	my $r = 0;
1149	12					282	while (my $row = $self->getline ($io)) {
1150	77	100				577	++$r < $min_row and next;
1151	33					56	my %row;
1152							my $lc;
1153	33					71	foreach my $s (@spec) {
1154	77					142	my ($tlr, $tlc, $brr, $brc) = @{$s};
	77					184
1155	77	100	100			324	$r < $tlr \|\| ($brr ne "*" && $r > $brr) and next;
			100
1156	45	100	100			134	!defined $lc \|\| $tlc < $lc and $lc = $tlc;
1157	45	100				102	my $rr = $brc eq "*" ? $#{$row} : $brc;
	5					9
1158	45					337	$row{$_} = $row->[$_] for $tlc .. $rr;
1159							}
1160	33					156	push @c, [ @row{sort { $a <=> $b } keys %row } ];
	64					241
1161	33	100				91	if (@h) {
1162	2					4	my %h; @h{@h} = @{$c[-1]};
	2					3
	2					10
1163	2					5	$c[-1] = \%h;
1164							}
1165	33	100	100			465	$max_row ne "*" && $r == $max_row and last;
1166							}
1167	12					139	return \@c;
1168							}
1169
1170							# row or col
1171	17					31	my @r;
1172	17					32	my $eod = 0;
1173	17					93	for (split m/\s;\s/ => $range) {
1174	25	50				168	my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ \| \* ))? \s* $/x
1175							or croak ($self->SetDiag (2013));
1176	25		100			151	$to \|\|= $from;
1177	25	100				64	$to eq "*" and ($to, $eod) = ($from, 1);
1178							# $to cannot be <= 0 due to regex and \|\|=
1179	25	100	100			635	$from <= 0 \|\| $to < $from and croak ($self->SetDiag (2013));
1180	22					124	$r[$_] = 1 for $from .. $to;
1181							}
1182
1183	14					29	my $r = 0;
1184	14	100				40	$type eq "col" and shift @r;
1185	14		100			153	$_ \|\|= 0 for @r;
1186	14					498	while (my $row = $self->getline ($io)) {
1187	109					227	$r++;
1188	109	100				222	if ($type eq "row") {
1189	64	100	100			295	if (($r > $#r && $eod) \|\| $r[$r]) {
			100
1190	20					64	push @c, $row;
1191	20	100				50	if (@h) {
1192	3					7	my %h; @h{@h} = @{$c[-1]};
	3					8
	3					19
1193	3					8	$c[-1] = \%h;
1194							}
1195							}
1196	64					653	next;
1197							}
1198	45	100	100			99	push @c, [ map { ($_ > $#r && $eod) \|\| $r[$_] ? $row->[$_] : () } 0..$#{$row} ];
	405					1738
	45					124
1199	45	100				544	if (@h) {
1200	9					15	my %h; @h{@h} = @{$c[-1]};
	9					15
	9					24
1201	9					139	$c[-1] = \%h;
1202							}
1203							}
1204
1205	14					132	return \@c;
1206							} # fragment
1207
1208							my $csv_usage = q{usage: my $aoa = csv (in => $file);};
1209
1210							sub _csv_attr {
1211	343	100	66	343		2449	my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak ();
	4	50				16
1212
1213	343					809	$attr{'binary'} = 1;
1214	343					728	$attr{'strict_eol'} = 1;
1215
1216	343		100			1939	my $enc = delete $attr{'enc'} \|\| delete $attr{'encoding'} \|\| "";
1217	343	100				970	$enc eq "auto" and ($attr{'detect_bom'}, $enc) = (1, "");
1218	343	50				1058	my $stack = $enc =~ s/(:\w.*)// ? $1 : "";
1219	343	100				897	$enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
1220	343					629	$enc .= $stack;
1221
1222	343					711	my $hdrs = delete $attr{'headers'};
1223	343					663	my $frag = delete $attr{'fragment'};
1224	343					713	my $key = delete $attr{'key'};
1225	343					603	my $val = delete $attr{'value'};
1226							my $kh = delete $attr{'keep_headers'} \|\|
1227							delete $attr{'keep_column_names'} \|\|
1228	343		100			1953	delete $attr{'kh'};
1229
1230							my $cbai = delete $attr{'callbacks'}{'after_in'} \|\|
1231							delete $attr{'after_in'} \|\|
1232							delete $attr{'callbacks'}{'after_parse'} \|\|
1233	343		100			2470	delete $attr{'after_parse'};
1234							my $cbbo = delete $attr{'callbacks'}{'before_out'} \|\|
1235	343		100			1085	delete $attr{'before_out'};
1236							my $cboi = delete $attr{'callbacks'}{'on_in'} \|\|
1237	343		100			2136	delete $attr{'on_in'};
1238							my $cboe = delete $attr{'callbacks'}{'on_error'} \|\|
1239	343		66			1124	delete $attr{'on_error'};
1240
1241							my $hd_s = delete $attr{'sep_set'} \|\|
1242	343		100			1080	delete $attr{'seps'};
1243							my $hd_b = delete $attr{'detect_bom'} \|\|
1244	343		100			1213	delete $attr{'bom'};
1245							my $hd_m = delete $attr{'munge'} \|\|
1246	343		100			1121	delete $attr{'munge_column_names'};
1247	343					515	my $hd_c = delete $attr{'set_column_names'};
1248
1249	343					631	my $fh;
1250	343					520	my $sink = 0;
1251	343					502	my $cls = 0; # If I open a file, I have to close it
1252	343	100	100			1724	my $in = delete $attr{'in'} \|\| delete $attr{'file'} or croak ($csv_usage);
1253							my $out = exists $attr{'out'} && !$attr{'out'} ? \"skip"
1254	340	100	100			1583	: delete $attr{'out'} \|\| delete $attr{'file'};
			100
1255
1256	340	100	100			1438	ref $in eq "CODE" \|\| ref $in eq "ARRAY" and $out \|\|= \*STDOUT;
			100
1257
1258	340					634	my ($fho, $fho_cls);
1259	340	100	66			1545	if ($in && $out and (!ref $in \|\| ref $in eq "GLOB" \|\| ref \$in eq "GLOB")
			66
			100
			66
			66
1260							and (!ref $out \|\| ref $out eq "GLOB" \|\| ref \$out eq "GLOB")) {
1261	7	100	66			33	if (ref $out or "GLOB" eq ref \$out) {
1262	2					5	$fho = $out;
1263							}
1264							else {
1265	5	50				739	open $fho, ">", $out or croak "$out: $!\n";
1266	5	50				30	if (my $e = $attr{'encoding'}) {
1267	0					0	binmode $fho, ":encoding($e)";
1268	0	0				0	$hd_b and print $fho "\x{feff}";
1269							}
1270	5					13	$fho_cls = 1;
1271							}
1272	7	100	66			26	if ($cboi && !$cbai) {
1273	1					3	$cbai = $cboi;
1274	1					3	$cboi = undef;
1275							}
1276	7	100				20	if ($cbai) {
1277	2					4	my $cb = $cbai;
1278	2			6		14	$cbai = sub { $cb->(@_); $_[0]->say ($fho, $_[1]); 0 };
	6					29
	6					34
	6					80
1279							}
1280							else {
1281	5			15		56	$cbai = sub { $_[0]->say ($fho, $_[1]); 0 };
	15					80
	15					217
1282							}
1283
1284							# Put all callbacks back in place for streaming behavior
1285	7					20	$attr{'callbacks'}{'after_parse'} = $cbai; $cbai = undef;
	7					14
1286	7					15	$attr{'callbacks'}{'before_out'} = $cbbo; $cbbo = undef;
	7					41
1287	7					18	$attr{'callbacks'}{'on_in'} = $cboi; $cboi = undef;
	7					9
1288	7					14	$attr{'callbacks'}{'on_error'} = $cboe; $cboe = undef;
	7					13
1289	7					37	$out = undef;
1290	7					44	$sink = 1;
1291							}
1292
1293	340	100				787	if ($out) {
1294	33	100	100			270	if (ref $out and ("ARRAY" eq ref $out or "HASH" eq ref $out)) {
		100	100
		100	100
			100
			66
			66
			66
1295	5					7	delete $attr{'out'};
1296	5					8	$sink = 1;
1297							}
1298							elsif ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
1299	14					21	$fh = $out;
1300							}
1301	7					42	elsif (ref $out and "SCALAR" eq ref $out and defined ${$out} and ${$out} eq "skip") {
	7					22
1302	2					5	delete $attr{'out'};
1303	2					5	$sink = 1;
1304							}
1305							else {
1306	12	100				1029	open $fh, ">", $out or croak ("$out: $!");
1307	11					37	$cls = 1;
1308							}
1309	32	100				65	if ($fh) {
1310	25	100				54	if ($enc) {
1311	1					11	binmode $fh, $enc;
1312	1					61	my $fn = fileno $fh; # This is a workaround for a bug in PerlIO::via::gzip
1313							}
1314	25	100	66			98	unless (defined $attr{'eol'} \|\| defined $fho) {
1315	18					30	my @layers = eval { PerlIO::get_layers ($fh) };
	18					125
1316	18	100				109	$attr{'eol'} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
1317							}
1318							}
1319							}
1320
1321	339	100	100			2153	if ( ref $in eq "CODE" or ref $in eq "ARRAY") {
		100	100
		100
1322							# All done
1323							}
1324							elsif (ref $in eq "SCALAR") {
1325							# Strings with code points over 0xFF may not be mapped into in-memory file handles
1326							# "<$enc" does not change that :(
1327	30	50				381	open $fh, "<", $in or croak ("Cannot open from SCALAR using PerlIO");
1328	30					64	$cls = 1;
1329							}
1330							elsif (ref $in or "GLOB" eq ref \$in) {
1331	18	50	66			47	if (!ref $in && $] < 5.008005) {
1332	0					0	$fh = \*{$in}; # uncoverable statement ancient perl version required
	0					0
1333							}
1334							else {
1335	18					23	$fh = $in;
1336							}
1337							}
1338							else {
1339	267	100				13904	open $fh, "<$enc", $in or croak ("$in: $!");
1340	265					2440	$cls = 1;
1341							}
1342	337	50	33			1012	$fh \|\| $sink or croak (qq{No valid source passed. "in" is required});
1343
1344	337					1461	for ([ 'quo' => "quote" ],
1345							[ 'esc' => "escape" ],
1346							[ 'escape' => "escape_char" ],
1347							) {
1348	1011					1323	my ($f, $t) = @{$_};
	1011					2030
1349	1011	100	100			2893	exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
1350							}
1351
1352	337					1031	my $fltr = delete $attr{'filter'};
1353							my %fltr = (
1354	10	100	33	10		14	'not_blank' => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
	10					63
1355	10	50		10		17	'not_empty' => sub { grep { defined && $_ ne "" } @{$_[1]} },
	26					144
	10					22
1356	10	50		10		20	'filled' => sub { grep { defined && m/\S/ } @{$_[1]} },
	26					211
	10					19
1357	337					3248	);
1358							defined $fltr && !ref $fltr && exists $fltr{$fltr} and
1359	337	50	100			1071	$fltr = { '0' => $fltr{$fltr} };
			66
1360	337	100				800	ref $fltr eq "CODE" and $fltr = { 0 => $fltr };
1361	337	100				849	ref $fltr eq "HASH" or $fltr = undef;
1362
1363	337					690	my $form = delete $attr{'formula'};
1364
1365	337	100				992	defined $attr{'auto_diag'} or $attr{'auto_diag'} = 1;
1366	337	100				979	defined $attr{'escape_null'} or $attr{'escape_null'} = 0;
1367	337	50	66			2347	my $csv = delete $attr{'csv'} \|\| Text::CSV_XS->new (\%attr)
1368							or croak ($last_err);
1369	337	100				819	defined $form and $csv->formula ($form);
1370	337	100				933	defined $cboe and $csv->callbacks (error => $cboe);
1371
1372	337	100	100			981	$kh && !ref $kh && $kh =~ m/^(?:1\|yes\|true\|internal\|auto)$/i and
			100
1373							$kh = \@internal_kh;
1374
1375							return {
1376	337					8614	'csv' => $csv,
1377							'attr' => { %attr },
1378							'fh' => $fh,
1379							'cls' => $cls,
1380							'in' => $in,
1381							'sink' => $sink,
1382							'out' => $out,
1383							'enc' => $enc,
1384							'fho' => $fho,
1385							'fhoc' => $fho_cls,
1386							'hdrs' => $hdrs,
1387							'key' => $key,
1388							'val' => $val,
1389							'kh' => $kh,
1390							'frag' => $frag,
1391							'fltr' => $fltr,
1392							'cbai' => $cbai,
1393							'cbbo' => $cbbo,
1394							'cboi' => $cboi,
1395							'hd_s' => $hd_s,
1396							'hd_b' => $hd_b,
1397							'hd_m' => $hd_m,
1398							'hd_c' => $hd_c,
1399							};
1400							} # _csv_attr
1401
1402							sub csv {
1403	344	100	100	344	1	906070	@_ && ref $_[0] eq __PACKAGE__ and splice @_, 0, 0, "csv";
1404	344	100				1179	@_ or croak ($csv_usage);
1405
1406	343					1007	my $c = _csv_attr (@_);
1407
1408	337					720	my ($csv, $in, $fh, $hdrs) = @{$c}{qw( csv in fh hdrs )};
	337					1131
1409	337					694	my %hdr;
1410	337	100				866	if (ref $hdrs eq "HASH") {
1411	2					25	%hdr = %{$hdrs};
	2					7
1412	2					5	$hdrs = "auto";
1413							}
1414
1415	337	100	100			953	if ($c->{'out'} && !$c->{'sink'}) {
1416							!$hdrs && ref $c->{'kh'} && $c->{'kh'} == \@internal_kh and
1417	24	100	100			107	$hdrs = $c->{'kh'};
			66
1418
1419	24	100	100			42	if (ref $in eq "CODE") {
		100
1420	3					7	my $hdr = 1;
1421	3					13	while (my $row = $in->($csv)) {
1422	7	100				73	if (ref $row eq "ARRAY") {
1423	3					32	$csv->print ($fh, $row);
1424	3					44	next;
1425							}
1426	4	50				11	if (ref $row eq "HASH") {
1427	4	100				10	if ($hdr) {
1428	2	50	100			7	$hdrs \|\|= [ map { $hdr{$_} \|\| $_ } keys %{$row} ];
	3					10
	1					3
1429	2					35	$csv->print ($fh, $hdrs);
1430	2					42	$hdr = 0;
1431							}
1432	4					5	$csv->print ($fh, [ @{$row}{@{$hdrs}} ]);
	4					38
	4					7
1433							}
1434							}
1435							}
1436	21					100	elsif (@{$in} == 0 or ref $in->[0] eq "ARRAY") { # aoa
1437	10	50				22	ref $hdrs and $csv->print ($fh, $hdrs);
1438	10					11	for (@{$in}) {
	10					19
1439	12	100				73	$c->{'cboi'} and $c->{'cboi'}->($csv, $_);
1440	12	50				1186	$c->{'cbbo'} and $c->{'cbbo'}->($csv, $_);
1441	12					169	$csv->print ($fh, $_);
1442							}
1443							}
1444							else { # aoh
1445	11	100				34	my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
	5					17
	6					17
1446	11	100				26	defined $hdrs or $hdrs = "auto";
1447							ref $hdrs \|\| $hdrs eq "auto" and @hdrs and
1448	11	100	100			59	$csv->print ($fh, [ map { $hdr{$_} \|\| $_ } @hdrs ]);
	20	100	66			238
1449	11					106	for (@{$in}) {
	11					45
1450	17					76	local %_;
1451	17					38	*_ = $_;
1452	17	50				39	$c->{'cboi'} and $c->{'cboi'}->($csv, $_);
1453	17	50				35	$c->{'cbbo'} and $c->{'cbbo'}->($csv, $_);
1454	17					25	$csv->print ($fh, [ @{$_}{@hdrs} ]);
	17					117
1455							}
1456							}
1457
1458	24	100				858	$c->{'cls'} and close $fh;
1459	24	50				61	$c->{'fho_cls'} and close $c->{'fho'};
1460	24					373	return 1;
1461							}
1462
1463	313					517	my @row1;
1464	313	100	100			1670	if (defined $c->{'hd_s'} \|\| defined $c->{'hd_b'} \|\| defined $c->{'hd_m'} \|\| defined $c->{'hd_c'}) {
			100
			100
1465	173					277	my %harg;
1466							!defined $c->{'hd_s'} && $c->{'attr'}{'sep_char'} and
1467	173	100	100			739	$c->{'hd_s'} = [ $c->{'attr'}{'sep_char'} ];
1468							!defined $c->{'hd_s'} && $c->{'attr'}{'sep'} and
1469	173	100	100			781	$c->{'hd_s'} = [ $c->{'attr'}{'sep'} ];
1470	173	100				520	defined $c->{'hd_s'} and $harg{'sep_set'} = $c->{'hd_s'};
1471	173	100				494	defined $c->{'hd_b'} and $harg{'detect_bom'} = $c->{'hd_b'};
1472	173	50				423	defined $c->{'hd_m'} and $harg{'munge_column_names'} = $hdrs ? "none" : $c->{'hd_m'};
		100
1473	173	50				394	defined $c->{'hd_c'} and $harg{'set_column_names'} = $hdrs ? 0 : $c->{'hd_c'};
		100
1474	173					654	@row1 = $csv->header ($fh, \%harg);
1475	170					495	my @hdr = $csv->column_names ();
1476	170	100	100			1000	@hdr and $hdrs \|\|= \@hdr;
1477							}
1478
1479	310	100				746	if ($c->{'kh'}) {
1480	15					26	@internal_kh = ();
1481	15	100				665	ref $c->{'kh'} eq "ARRAY" or croak ($csv->SetDiag (1501));
1482	10		100			24	$hdrs \|\|= "auto";
1483							}
1484
1485	305					626	my $key = $c->{'key'};
1486	305	100				685	if ($key) {
1487	27	100	100			702	!ref $key or ref $key eq "ARRAY" && @{$key} > 1 or croak ($csv->SetDiag (1501));
	8		100			470
1488	20		100			57	$hdrs \|\|= "auto";
1489							}
1490	298					680	my $val = $c->{'val'};
1491	298	100				600	if ($val) {
1492	9	100				153	$key or croak ($csv->SetDiag (1502));
1493	8	100	100			415	!ref $val or ref $val eq "ARRAY" && @{$val} > 0 or croak ($csv->SetDiag (1503));
	3		100			181
1494							}
1495
1496	294	100	100			726	$c->{'fltr'} && grep m/\D/ => keys %{$c->{'fltr'}} and $hdrs \|\|= "auto";
	16		100			150
1497	294	100				754	if (defined $hdrs) {
1498	223	100	100			904	if (!ref $hdrs or ref $hdrs eq "CODE") {
1499	52	100				2209	my $h = $c->{'hd_b'}
1500							? [ $csv->column_names () ]
1501							: $csv->getline ($fh);
1502	52		33			250	my $has_h = $h && @$h;
1503
1504	52	100				191	if (ref $hdrs) {
		100
		100
		100
		50
1505	1	50				5	$has_h or return;
1506	1					3	my $cr = $hdrs;
1507	1		33			3	$hdrs = [ map { $cr->($hdr{$_} \|\| $_) } @{$h} ];
	3					24
	1					3
1508							}
1509							elsif ($hdrs eq "skip") {
1510							# discard;
1511							}
1512							elsif ($hdrs eq "auto") {
1513	48	50				101	$has_h or return;
1514	48	100				68	$hdrs = [ map { $hdr{$_} \|\| $_ } @{$h} ];
	136					508
	48					96
1515							}
1516							elsif ($hdrs eq "lc") {
1517	1	50				4	$has_h or return;
1518	1		33			4	$hdrs = [ map { lc ($hdr{$_} \|\| $_) } @{$h} ];
	3					19
	1					2
1519							}
1520							elsif ($hdrs eq "uc") {
1521	1	50				4	$has_h or return;
1522	1		33			3	$hdrs = [ map { uc ($hdr{$_} \|\| $_) } @{$h} ];
	3					41
	1					3
1523							}
1524							}
1525	223	100	66			649	$c->{'kh'} and $hdrs and @{$c->{'kh'}} = @{$hdrs};
	10					25
	10					13
1526							}
1527
1528	294	100				729	if ($c->{'fltr'}) {
1529	16					24	my %f = %{$c->{'fltr'}};
	16					64
1530							# convert headers to index
1531	16					28	my @hdr;
1532	16	100				39	if (ref $hdrs) {
1533	7					12	@hdr = @{$hdrs};
	7					23
1534	7					29	for (0 .. $#hdr) {
1535	21	100				72	exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
1536							}
1537							}
1538							$csv->callbacks ('after_parse' => sub {
1539	114			114		825	my ($CSV, $ROW) = @_; # lexical sub-variables in caps
1540	114					288	foreach my $FLD (sort keys %f) {
1541	115					296	local $_ = $ROW->[$FLD - 1];
1542	115					198	local %_;
1543	115	100				236	@hdr and @_{@hdr} = @{$ROW};
	51					178
1544	115	100				306	$f{$FLD}->($CSV, $ROW) or return \"skip";
1545	52					896	$ROW->[$FLD - 1] = $_;
1546							}
1547	16					130	});
1548							}
1549
1550	294					511	my $frag = $c->{'frag'};
1551							my $ref = ref $hdrs
1552							? # aoh
1553	294	100				5401	do {
		100
1554	222					591	my @h = $csv->column_names ($hdrs);
1555	222					361	my %h; $h{$_}++ for @h;
	222					877
1556	222	50				574	exists $h{''} and croak ($csv->SetDiag (1012));
1557	222	50				551	unless (keys %h == @h) {
1558							croak ($csv->_SetDiagInfo (1013, join ", " =>
1559	0					0	map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
	0					0
	0					0
1560							}
1561							$frag ? $csv->fragment ($fh, $frag) :
1562	222	100				948	$key ? do {
		100
		100
1563	17	100				46	my ($k, $j, @f) = ref $key ? (undef, @{$key}) : ($key);
	5					11
1564	17	100				28	if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
	22					71
	27					44
1565	2					9	croak ($csv->_SetDiagInfo (4001, join ", " => @mk));
1566							}
1567							+{ map {
1568	26					32	my $r = $_;
1569	26	100				60	my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
	4					11
1570							( $K => (
1571							$val
1572							? ref $val
1573	4					38	? { map { $_ => $r->{$_} } @{$val} }
	2					4
1574	26	100				113	: $r->{$val}
		100
1575							: $r ));
1576	15					23	} @{$csv->getline_hr_all ($fh)} }
	15					51
1577							}
1578							: $csv->getline_hr_all ($fh);
1579							}
1580							: # aoa
1581							$frag ? $csv->fragment ($fh, $frag)
1582							: $csv->getline_all ($fh);
1583	286	50				1845	if ($ref) {
1584	286	100	66			1238	@row1 && !$c->{'hd_c'} && !ref $hdrs and unshift @{$ref}, \@row1;
	4		100			9
1585							}
1586							else {
1587	0					0	Text::CSV_XS->auto_diag ();
1588							}
1589	286	100				4830	$c->{'cls'} and close $fh;
1590	286	50				929	$c->{'fho_cls'} and close $c->{'fho'};
1591	286	100	100			1588	if ($ref and $c->{'cbai'} \|\| $c->{'cboi'}) {
			66
1592							# Default is ARRAYref, but with key =>, you'll get a hashref
1593	23	100				77	foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
	22					62
	1					5
1594	74					9748	local %_;
1595	74	100				247	ref $r eq "HASH" and *_ = $r;
1596	74	100				280	$c->{'cbai'} and $c->{'cbai'}->($csv, $r);
1597	74	100				6306	$c->{'cboi'} and $c->{'cboi'}->($csv, $r);
1598							}
1599							}
1600
1601	286	100				2619	if ($c->{'sink'}) {
1602	14	100				407	my $ro = ref $c->{'out'} or return;
1603
1604	7	100	66			23	$ro eq "SCALAR" && ${$c->{'out'}} eq "skip" and
	2					52
1605							return;
1606
1607	5	50				7	$ro eq ref $ref or
1608							croak ($csv->_SetDiagInfo (5001, "Output type mismatch"));
1609
1610	5	100				10	if ($ro eq "ARRAY") {
1611	4	100	33			4	if (@{$c->{'out'}} and @$ref and ref $c->{'out'}[0] eq ref $ref->[0]) {
	4		66			20
1612	2					3	push @{$c->{'out'}} => @$ref;
	2					5
1613	2					29	return $c->{'out'};
1614							}
1615	2					6	croak ($csv->_SetDiagInfo (5001, "Output type mismatch"));
1616							}
1617
1618	1	50				3	if ($ro eq "HASH") {
1619	1					2	@{$c->{'out'}}{keys %{$ref}} = values %{$ref};
	1					12
	1					1
	1					2
1620	1					17	return $c->{'out'};
1621							}
1622
1623	0					0	croak ($csv->_SetDiagInfo (5002, "Unsupported output type"));
1624							}
1625
1626							defined wantarray or
1627							return csv (
1628							'in' => $ref,
1629							'headers' => $hdrs,
1630	272	100				605	%{$c->{'attr'}},
	1					11
1631							);
1632
1633	271		100			1223	$last_err \|\|= $csv->{'_ERROR_DIAG'};
1634	271					6051	return $ref;
1635							} # csv
1636
1637							1;
1638
1639							__END__
1640
1641							=encoding utf-8
1642
1643							=head1 NAME
1644
1645							Text::CSV_XS - comma-separated values manipulation routines
1646
1647							=head1 SYNOPSIS
1648
1649							# Functional interface
1650							use Text::CSV_XS qw( csv );
1651
1652							# Read whole file in memory
1653							my $aoa = csv (in => "data.csv"); # as array of array
1654							my $aoh = csv (in => "data.csv",
1655							headers => "auto"); # as array of hash
1656
1657							# Write array of arrays as csv file
1658							csv (in => $aoa, out => "file.csv", sep_char => ";");
1659
1660							# Only show lines where "code" is odd
1661							csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
1662
1663
1664							# Object interface
1665							use Text::CSV_XS;
1666
1667							my @rows;
1668							# Read/parse CSV
1669							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
1670							open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
1671							while (my $row = $csv->getline ($fh)) {
1672							$row->[2] =~ m/pattern/ or next; # 3rd field should match
1673							push @rows, $row;
1674							}
1675							close $fh;
1676
1677							# and write as CSV
1678							open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
1679							$csv->say ($fh, $_) for @rows;
1680							close $fh or die "new.csv: $!";
1681
1682							=head1 DESCRIPTION
1683
1684							Text::CSV_XS provides facilities for the composition and decomposition of
1685							comma-separated values. An instance of the Text::CSV_XS class will combine
1686							fields into a C<CSV> string and parse a C<CSV> string into fields.
1687
1688							The module accepts either strings or files as input and support the use of
1689							user-specified characters for delimiters, separators, and escapes.
1690
1691							=head2 Embedded newlines
1692
1693							B<Important Note>: The default behavior is to accept only ASCII characters
1694							in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
1695							fields can not contain newlines. If your data contains newlines embedded in
1696							fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
1697							set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
1698							parsing options, you will always want to set binary.
1699
1700							But you still have the problem that you have to pass a correct line to the
1701							L</parse> method, which is more complicated from the usual point of usage:
1702
1703							my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
1704							while (<>) { # WRONG!
1705							$csv->parse ($_);
1706							my @fields = $csv->fields ();
1707							}
1708
1709							this will break, as the C<while> might read broken lines: it does not care
1710							about the quoting. If you need to support embedded newlines, the way to go
1711							is to B<not> pass L<C<eol>\|/eol> in the parser (it accepts C<\n>, C<\r>,
1712							B<and> C<\r\n> by default) and then
1713
1714							my $csv = Text::CSV_XS->new ({ binary => 1 });
1715							open my $fh, "<", $file or die "$file: $!";
1716							while (my $row = $csv->getline ($fh)) {
1717							my @fields = @$row;
1718							}
1719
1720							The old(er) way of using global file handles is still supported
1721
1722							while (my $row = $csv->getline (*ARGV)) { ... }
1723
1724							=head2 Unicode
1725
1726							Unicode is only tested to work with perl-5.8.2 and up.
1727
1728							See also L</BOM>.
1729
1730							The simplest way to ensure the correct encoding is used for in- and output
1731							is by either setting layers on the filehandles, or setting the L</encoding>
1732							argument for L</csv>.
1733
1734							open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
1735							or
1736							my $aoa = csv (in => "in.csv", encoding => "UTF-8");
1737
1738							open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
1739							or
1740							csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
1741
1742							On parsing (both for L</getline> and L</parse>), if the source is marked
1743							being UTF8, then all fields that are marked binary will also be marked UTF8.
1744
1745							On combining (L</print> and L</combine>): if any of the combining fields
1746							was marked UTF8, the resulting string will be marked as UTF8. Note however
1747							that all fields I<before> the first field marked UTF8 and contained 8-bit
1748							characters that were not upgraded to UTF8, these will be C<bytes> in the
1749							resulting string too, possibly causing unexpected errors. If you pass data
1750							of different encoding, or you don't know if there is different encoding,
1751							force it to be upgraded before you pass them on:
1752
1753							$csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
1754
1755							For complete control over encoding, please use L<Text::CSV::Encoded>:
1756
1757							use Text::CSV::Encoded;
1758							my $csv = Text::CSV::Encoded->new ({
1759							encoding_in => "iso-8859-1", # the encoding comes into Perl
1760							encoding_out => "cp1252", # the encoding comes out of Perl
1761							});
1762
1763							$csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
1764							# combine () and print () accept literally utf8 encoded data
1765							# parse () and getline () return literally utf8 encoded data
1766
1767							$csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
1768							# combine () and print () accept UTF8 marked data
1769							# parse () and getline () return UTF8 marked data
1770
1771							=head2 BOM
1772
1773							BOM (or Byte Order Mark) handling is available only inside the L</header>
1774							method. This method supports the following encodings: C<utf-8>, C<utf-1>,
1775							C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
1776							C<bocu-1>, and C<gb-18030>. See L<Wikipedia\|https://en.wikipedia.org/wiki/Byte_order_mark>.
1777
1778							If a file has a BOM, the easiest way to deal with that is
1779
1780							my $aoh = csv (in => $file, detect_bom => 1);
1781
1782							All records will be encoded based on the detected BOM.
1783
1784							This implies a call to the L</header> method, which defaults to also set
1785							the L</column_names>. So this is B<not> the same as
1786
1787							my $aoh = csv (in => $file, headers => "auto");
1788
1789							which only reads the first record to set L</column_names> but ignores any
1790							meaning of possible present BOM.
1791
1792							=head1 SPECIFICATION
1793
1794							While no formal specification for CSV exists, L<RFC 4180\|https://datatracker.ietf.org/doc/html/rfc4180>
1795							(I<1>) describes the common format and establishes C<text/csv> as the MIME
1796							type registered with the IANA. L<RFC 7111\|https://datatracker.ietf.org/doc/html/rfc7111>
1797							(I<2>) adds fragments to CSV.
1798
1799							Many informal documents exist that describe the C<CSV> format. L<"How To:
1800							The Comma Separated Value (CSV) File Format"\|http://creativyst.com/Doc/Articles/CSV/CSV01.shtml>
1801							(I<3>) provides an overview of the C<CSV> format in the most widely used
1802							applications and explains how it can best be used and supported.
1803
1804							1) https://datatracker.ietf.org/doc/html/rfc4180
1805							2) https://datatracker.ietf.org/doc/html/rfc7111
1806							3) http://creativyst.com/Doc/Articles/CSV/CSV01.shtml
1807
1808							The basic rules are as follows:
1809
1810							B<CSV> is a delimited data format that has fields/columns separated by the
1811							comma character and records/rows separated by newlines. Fields that contain
1812							a special character (comma, newline, or double quote), must be enclosed in
1813							double quotes. However, if a line contains a single entry that is the empty
1814							string, it may be enclosed in double quotes. If a field's value contains a
1815							double quote character it is escaped by placing another double quote
1816							character next to it. The C<CSV> file format does not require a specific
1817							character encoding, byte order, or line terminator format.
1818
1819							=over 2
1820
1821							=item *
1822
1823							Each record is a single line ended by a line feed (ASCII/C<LF>=C<0x0A>) or
1824							a carriage return and line feed pair (ASCII/C<CRLF>=C<0x0D 0x0A>), however,
1825							line-breaks may be embedded.
1826
1827							=item *
1828
1829							Fields are separated by commas.
1830
1831							=item *
1832
1833							Allowable characters within a C<CSV> field include C<0x09> (C<TAB>) and the
1834							inclusive range of C<0x20> (space) through C<0x7E> (tilde). In binary mode
1835							all characters are accepted, at least in quoted fields.
1836
1837							=item *
1838
1839							A field within C<CSV> must be surrounded by double-quotes to contain a
1840							separator character (comma).
1841
1842							=back
1843
1844							Though this is the most clear and restrictive definition, Text::CSV_XS is
1845							way more liberal than this, and allows extension:
1846
1847							=over 2
1848
1849							=item *
1850
1851							Line termination by a single carriage return is accepted by default
1852
1853							=item *
1854
1855							The separation-, quote-, and escape character(s) can be any ASCII character
1856							in the range from C<0x20> (space) to C<0x7E> (tilde). Characters outside
1857							this range may or may not work as expected. Multibyte characters, like UTF
1858							C<U+060C> (ARABIC COMMA), C<U+FF0C> (FULLWIDTH COMMA), C<U+241B> (SYMBOL
1859							FOR ESCAPE), C<U+2424> (SYMBOL FOR NEWLINE), C<U+FF02> (FULLWIDTH QUOTATION
1860							MARK), and C<U+201C> (LEFT DOUBLE QUOTATION MARK) (to give some examples of
1861							what might look promising) work for newer versions of perl for C<sep_char>,
1862							and C<quote_char> but not for C<escape_char>.
1863
1864							If you use perl-5.8.2 or higher these three attributes are utf8-decoded, to
1865							increase the likelihood of success. This way C<U+00FE> will be allowed as a
1866							quote character.
1867
1868							=item *
1869
1870							A field in C<CSV> must be surrounded by double-quotes to make an embedded
1871							double-quote, represented by a pair of consecutive double-quotes, valid. In
1872							binary mode you may additionally use the sequence C<"0> for representation
1873							of a NULL byte. Using C<0x00> in binary mode is just as valid.
1874
1875							=item *
1876
1877							Several violations of the above specification may be lifted by passing some
1878							options as attributes to the object constructor.
1879
1880							=back
1881
1882							=head1 METHODS
1883
1884							=head2 version
1885							X<version>
1886
1887							(Class method) Returns the current module version.
1888
1889							=head2 new
1890							X<new>
1891
1892							(Class method) Returns a new instance of class Text::CSV_XS. The attributes
1893							are described by the (optional) hash ref C<\%attr>.
1894
1895							my $csv = Text::CSV_XS->new ({ attributes ... });
1896
1897							The following attributes are available:
1898
1899							=head3 eol
1900							X<eol>
1901
1902							my $csv = Text::CSV_XS->new ({ eol => $/ });
1903							$csv->eol (undef);
1904							my $eol = $csv->eol;
1905
1906							The end-of-line string to add to rows for L</print> or the record separator
1907							for L</getline>.
1908
1909							When not passed in a B<parser> instance, the default behavior is to accept
1910							C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
1911							all. Passing C<undef> or the empty string behave the same.
1912
1913							When not passed in a B<generating> instance, records are not terminated at
1914							all, so it is probably wise to pass something you expect. A safe choice for
1915							C<eol> on output is either C<$/> or C<\r\n>.
1916
1917							Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
1918							(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
1919							Return). The L<C<eol>\|/eol> attribute cannot exceed 7 (ASCII) characters.
1920
1921							If both C<$/> and L<C<eol>\|/eol> equal C<"\015">, parsing lines that end on
1922							only a Carriage Return without Line Feed, will be L</parse>d correct.
1923
1924							=head3 eol_type
1925							X<eol_type>
1926
1927							my $eol = $csv->eol_type;
1928
1929							This read-only method returns the internal state of what is considered the
1930							valid EOL for parsing.
1931
1932							=head3 sep_char
1933							X<sep_char>
1934
1935							my $csv = Text::CSV_XS->new ({ sep_char => ";" });
1936							$csv->sep_char (";");
1937							my $c = $csv->sep_char;
1938
1939							The char used to separate fields, by default a comma. (C<,>). Limited to a
1940							single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
1941							(tilde). When longer sequences are required, use L<C<sep>\|/sep>.
1942
1943							The separation character can not be equal to the quote character or to the
1944							escape character.
1945
1946							See also L</CAVEATS>
1947
1948							=head3 sep
1949							X<sep>
1950
1951							my $csv = Text::CSV_XS->new ({ sep => "\N{FULLWIDTH COMMA}" });
1952							$csv->sep (";");
1953							my $sep = $csv->sep;
1954
1955							The chars used to separate fields, by default undefined. Limited to 8 bytes.
1956
1957							When set, overrules L<C<sep_char>\|/sep_char>. If its length is one byte it
1958							acts as an alias to L<C<sep_char>\|/sep_char>.
1959
1960							See also L</CAVEATS>
1961
1962							=head3 quote_char
1963							X<quote_char>
1964
1965							my $csv = Text::CSV_XS->new ({ quote_char => "'" });
1966							$csv->quote_char (undef);
1967							my $c = $csv->quote_char;
1968
1969							The character to quote fields containing blanks or binary data, by default
1970							the double quote character (C<">). A value of undef suppresses quote chars
1971							(for simple cases only). Limited to a single-byte character, usually in the
1972							range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
1973							required, use L<C<quote>\|/quote>.
1974
1975							C<quote_char> can not be equal to L<C<sep_char>\|/sep_char>.
1976
1977							=head3 quote
1978							X<quote>
1979
1980							my $csv = Text::CSV_XS->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
1981							$csv->quote ("'");
1982							my $quote = $csv->quote;
1983
1984							The chars used to quote fields, by default undefined. Limited to 8 bytes.
1985
1986							When set, overrules L<C<quote_char>\|/quote_char>. If its length is one byte
1987							it acts as an alias to L<C<quote_char>\|/quote_char>.
1988
1989							This method does not support C<undef>. Use L<C<quote_char>\|/quote_char> to
1990							disable quotation.
1991
1992							See also L</CAVEATS>
1993
1994							=head3 escape_char
1995							X<escape_char>
1996
1997							my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
1998							$csv->escape_char (":");
1999							my $c = $csv->escape_char;
2000
2001							The character to escape certain characters inside quoted fields. This is
2002							limited to a single-byte character, usually in the range from C<0x20>
2003							(space) to C<0x7E> (tilde).
2004
2005							The C<escape_char> defaults to being the double-quote mark (C<">). In other
2006							words the same as the default L<C<quote_char>\|/quote_char>. This means that
2007							doubling the quote mark in a field escapes it:
2008
2009							"foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
2010
2011							If you change the L<C<quote_char>\|/quote_char> without changing the
2012							C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
2013							If instead you want to escape the L<C<quote_char>\|/quote_char> by doubling
2014							it you will need to also change the C<escape_char> to be the same as what
2015							you have changed the L<C<quote_char>\|/quote_char> to.
2016
2017							Setting C<escape_char> to C<undef> or C<""> will completely disable escapes
2018							and is greatly discouraged. This will also disable C<escape_null>.
2019
2020							The escape character can not be equal to the separation character.
2021
2022							=head3 binary
2023							X<binary>
2024
2025							my $csv = Text::CSV_XS->new ({ binary => 1 });
2026							$csv->binary (0);
2027							my $f = $csv->binary;
2028
2029							If this attribute is C<1>, you may use binary characters in quoted fields,
2030							including line feeds, carriage returns and C<NULL> bytes. (The latter could
2031							be escaped as C<"0>.) By default this feature is off.
2032
2033							If a string is marked UTF8, C<binary> will be turned on automatically when
2034							binary characters other than C<CR> and C<NL> are encountered. Note that a
2035							simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
2036							so setting C<< { binary => 1 } >> is still a wise option.
2037
2038							=head3 strict
2039							X<strict>
2040
2041							my $csv = Text::CSV_XS->new ({ strict => 1 });
2042							$csv->strict (0);
2043							my $f = $csv->strict;
2044
2045							If this attribute is set to C<1>, any row that parses to a different number
2046							of fields than the previous row will cause the parser to throw error 2014.
2047
2048							Empty rows or rows that result in no fields (like comment lines) are exempt
2049							from these checks.
2050
2051							=head3 strict_eol
2052							X<strict_eol>
2053
2054							my $csv = Text::CSV_XS->new ({ strict_eol => 1 });
2055							$csv->strict_eol (0);
2056							my $f = $csv->strict_eol;
2057
2058							If this attribute is set to C<0>, no EOL consistency checks are done.
2059
2060							If this attribute is set to C<1>, any row that parses with a EOL other than
2061							the EOL from the first row will cause a warning. The error will be ignored
2062							and parsing continues. This warning is only thrown once. Note that in data
2063							with various different line endings, C<\r\r> will still throw an error that
2064							cannot be ignored.
2065
2066							If this attribute is set to C<2> or higher, any row that parses with a EOL
2067							other than the EOL from the first row will cause error C<2016> to be thrown.
2068							The line being parsed to this error might not be stored in the result.
2069
2070							=head3 skip_empty_rows
2071							X<skip_empty_rows>
2072
2073							my $csv = Text::CSV_XS->new ({ skip_empty_rows => 1 });
2074							$csv->skip_empty_rows ("eof");
2075							my $f = $csv->skip_empty_rows;
2076
2077							This attribute defines the behavior for empty rows: an L</eol> immediately
2078							following the start of line. Default behavior is to return one single empty
2079							field.
2080
2081							This attribute is only used in parsing. This attribute is ineffective when
2082							using L</parse> and L</fields>.
2083
2084							Possible values for this attribute are
2085
2086							=over 2
2087
2088							=item 0 \| undef
2089
2090							my $csv = Text::CSV_XS->new ({ skip_empty_rows => 0 });
2091							$csv->skip_empty_rows (undef);
2092
2093							No special action is taken. The result will be one single empty field.
2094
2095							=item 1 \| "skip"
2096
2097							my $csv = Text::CSV_XS->new ({ skip_empty_rows => 1 });
2098							$csv->skip_empty_rows ("skip");
2099
2100							The row will be skipped.
2101
2102							=item 2 \| "eof" \| "stop"
2103
2104							my $csv = Text::CSV_XS->new ({ skip_empty_rows => 2 });
2105							$csv->skip_empty_rows ("eof");
2106
2107							The parsing will stop as if an L</eof> was detected.
2108
2109							=item 3 \| "die"
2110
2111							my $csv = Text::CSV_XS->new ({ skip_empty_rows => 3 });
2112							$csv->skip_empty_rows ("die");
2113
2114							The parsing will stop. The internal error code will be set to 2015 and the
2115							parser will C<die>.
2116
2117							=item 4 \| "croak"
2118
2119							my $csv = Text::CSV_XS->new ({ skip_empty_rows => 4 });
2120							$csv->skip_empty_rows ("croak");
2121
2122							The parsing will stop. The internal error code will be set to 2015 and the
2123							parser will C<croak>.
2124
2125							=item 5 \| "error"
2126
2127							my $csv = Text::CSV_XS->new ({ skip_empty_rows => 5 });
2128							$csv->skip_empty_rows ("error");
2129
2130							The parsing will fail. The internal error code will be set to 2015.
2131
2132							=item callback
2133
2134							my $csv = Text::CSV_XS->new ({ skip_empty_rows => sub { [] } });
2135							$csv->skip_empty_rows (sub { [ 42, $., undef, "empty" ] });
2136
2137							The callback is invoked and its result used instead. If you want the parse
2138							to stop after the callback, make sure to return a false value.
2139
2140							The returned value from the callback should be an array-ref. Any other type
2141							will cause the parse to stop, so these are equivalent in behavior:
2142
2143							csv (in => $fh, skip_empty_rows => "stop");
2144							csv (in => $fh. skip_empty_rows => sub { 0; });
2145
2146							=back
2147
2148							Without arguments, the current value is returned: C<0>, C<1>, C<eof>, C<die>,
2149							C<croak> or the callback.
2150
2151							=head3 formula_handling
2152							X<formula_handling>
2153
2154							Alias for L</formula>
2155
2156							=head3 formula
2157							X<formula>
2158
2159							my $csv = Text::CSV_XS->new ({ formula => "none" });
2160							$csv->formula ("none");
2161							my $f = $csv->formula;
2162
2163							This defines the behavior of fields containing I<formulas>. As formulas are
2164							considered dangerous in spreadsheets, this attribute can define an optional
2165							action to be taken if a field starts with an equal sign (C<=>).
2166
2167							For purpose of code-readability, this can also be written as
2168
2169							my $csv = Text::CSV_XS->new ({ formula_handling => "none" });
2170							$csv->formula_handling ("none");
2171							my $f = $csv->formula_handling;
2172
2173							Possible values for this attribute are
2174
2175							=over 2
2176
2177							=item none
2178
2179							Take no specific action. This is the default.
2180
2181							$csv->formula ("none");
2182
2183							=item die
2184
2185							Cause the process to C<die> whenever a leading C<=> is encountered.
2186
2187							$csv->formula ("die");
2188
2189							=item croak
2190
2191							Cause the process to C<croak> whenever a leading C<=> is encountered. (See
2192							L<Carp>)
2193
2194							$csv->formula ("croak");
2195
2196							=item diag
2197
2198							Report position and content of the field whenever a leading C<=> is found.
2199							The value of the field is unchanged.
2200
2201							$csv->formula ("diag");
2202
2203							=item empty
2204
2205							Replace the content of fields that start with a C<=> with the empty string.
2206
2207							$csv->formula ("empty");
2208							$csv->formula ("");
2209
2210							=item undef
2211
2212							Replace the content of fields that start with a C<=> with C<undef>.
2213
2214							$csv->formula ("undef");
2215							$csv->formula (undef);
2216
2217							=item a callback
2218
2219							Modify the content of fields that start with a C<=> with the return-value
2220							of the callback. The original content of the field is available inside the
2221							callback as C<$_>;
2222
2223							# Replace all formula's with 42
2224							$csv->formula (sub { 42; });
2225
2226							# same as $csv->formula ("empty") but slower
2227							$csv->formula (sub { "" });
2228
2229							# Allow =4+12
2230							$csv->formula (sub { s/^=(\d+\+\d+)$/$1/eer });
2231
2232							# Allow more complex calculations
2233							$csv->formula (sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ });
2234
2235							=back
2236
2237							All other values will give a warning and then fallback to C<diag>.
2238
2239							=head3 decode_utf8
2240							X<decode_utf8>
2241
2242							my $csv = Text::CSV_XS->new ({ decode_utf8 => 1 });
2243							$csv->decode_utf8 (0);
2244							my $f = $csv->decode_utf8;
2245
2246							This attributes defaults to TRUE.
2247
2248							While I<parsing>, fields that are valid UTF-8, are automatically set to be
2249							UTF-8, so that
2250
2251							$csv->parse ("\xC4\xA8\n");
2252
2253							results in
2254
2255							PV("\304\250"\0) [UTF8 "\x{128}"]
2256
2257							Sometimes it might not be a desired action. To prevent those upgrades, set
2258							this attribute to false, and the result will be
2259
2260							PV("\304\250"\0)
2261
2262							=head3 auto_diag
2263							X<auto_diag>
2264
2265							my $csv = Text::CSV_XS->new ({ auto_diag => 1 });
2266							$csv->auto_diag (2);
2267							my $l = $csv->auto_diag;
2268
2269							Set this attribute to a number between C<1> and C<9> causes L</error_diag>
2270							to be automatically called in void context upon errors.
2271
2272							In case of error C<2012 - EOF>, this call will be void.
2273
2274							If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
2275							on errors instead of C<warn>. If set to anything unrecognized, it will be
2276							silently ignored.
2277
2278							Future extensions to this feature will include more reliable auto-detection
2279							of C<autodie> being active in the scope of which the error occurred which
2280							will increment the value of C<auto_diag> with C<1> the moment the error is
2281							detected.
2282
2283							=head3 diag_verbose
2284							X<diag_verbose>
2285
2286							my $csv = Text::CSV_XS->new ({ diag_verbose => 1 });
2287							$csv->diag_verbose (2);
2288							my $l = $csv->diag_verbose;
2289
2290							Set the verbosity of the output triggered by C<auto_diag>. Currently only
2291							adds the current input-record-number (if known) to the diagnostic output
2292							with an indication of the position of the error.
2293
2294							=head3 blank_is_undef
2295							X<blank_is_undef>
2296
2297							my $csv = Text::CSV_XS->new ({ blank_is_undef => 1 });
2298							$csv->blank_is_undef (0);
2299							my $f = $csv->blank_is_undef;
2300
2301							Under normal circumstances, C<CSV> data makes no distinction between quoted-
2302							and unquoted empty fields. These both end up in an empty string field once
2303							read, thus
2304
2305							1,"",," ",2
2306
2307							is read as
2308
2309							("1", "", "", " ", "2")
2310
2311							When I<writing> C<CSV> files with either L<C<always_quote>\|/always_quote>
2312							or L<C<quote_empty>\|/quote_empty> set, the unquoted I<empty> field is the
2313							result of an undefined value. To enable this distinction when I<reading>
2314							C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
2315							fields to be set to C<undef>, causing the above to be parsed as
2316
2317							("1", "", undef, " ", "2")
2318
2319							Note that this is specifically important when loading C<CSV> fields into a
2320							database that allows C<NULL> values, as the perl equivalent for C<NULL> is
2321							C<undef> in L<DBI> land.
2322
2323							=head3 empty_is_undef
2324							X<empty_is_undef>
2325
2326							my $csv = Text::CSV_XS->new ({ empty_is_undef => 1 });
2327							$csv->empty_is_undef (0);
2328							my $f = $csv->empty_is_undef;
2329
2330							Going one step further than L<C<blank_is_undef>\|/blank_is_undef>, this
2331							attribute converts all empty fields to C<undef>, so
2332
2333							1,"",," ",2
2334
2335							is read as
2336
2337							(1, undef, undef, " ", 2)
2338
2339							Note that this affects only fields that are originally empty, not fields
2340							that are empty after stripping allowed whitespace. YMMV.
2341
2342							=head3 allow_whitespace
2343							X<allow_whitespace>
2344
2345							my $csv = Text::CSV_XS->new ({ allow_whitespace => 1 });
2346							$csv->allow_whitespace (0);
2347							my $f = $csv->allow_whitespace;
2348
2349							When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
2350							surrounding the separation character is removed when parsing. If either
2351							C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>\|/sep_char>,
2352							L<C<quote_char>\|/quote_char>, or L<C<escape_char>\|/escape_char> it will not
2353							be considered whitespace.
2354
2355							Now lines like:
2356
2357							1 , "foo" , bar , 3 , zapp
2358
2359							are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
2360
2361							Note that B<all> whitespace is stripped from both start and end of each
2362							field. That would make it I<more> than a I<feature> to enable parsing bad
2363							C<CSV> lines, as
2364
2365							1, 2.0, 3, ape , monkey
2366
2367							will now be parsed as
2368
2369							("1", "2.0", "3", "ape", "monkey")
2370
2371							even if the original line was perfectly acceptable C<CSV>.
2372
2373							=head3 allow_loose_quotes
2374							X<allow_loose_quotes>
2375
2376							my $csv = Text::CSV_XS->new ({ allow_loose_quotes => 1 });
2377							$csv->allow_loose_quotes (0);
2378							my $f = $csv->allow_loose_quotes;
2379
2380							By default, parsing unquoted fields containing L<C<quote_char>\|/quote_char>
2381							characters like
2382
2383							1,foo "bar" baz,42
2384
2385							would result in parse error 2034. Though it is still bad practice to allow
2386							this format, we cannot help the fact that some vendors make their
2387							applications spit out lines styled this way.
2388
2389							If there is B<really> bad C<CSV> data, like
2390
2391							1,"foo "bar" baz",42
2392
2393							or
2394
2395							1,""foo bar baz"",42
2396
2397							there is a way to get this data-line parsed and leave the quotes inside the
2398							quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
2399							B<AND> making sure that the L<C<escape_char>\|/escape_char> is I<not> equal
2400							to L<C<quote_char>\|/quote_char>.
2401
2402							=head3 allow_loose_escapes
2403							X<allow_loose_escapes>
2404
2405							my $csv = Text::CSV_XS->new ({ allow_loose_escapes => 1 });
2406							$csv->allow_loose_escapes (0);
2407							my $f = $csv->allow_loose_escapes;
2408
2409							Parsing fields that have L<C<escape_char>\|/escape_char> characters that
2410							escape characters that do not need to be escaped, like:
2411
2412							my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
2413							$csv->parse (qq{1,"my bar\'s",baz,42});
2414
2415							would result in parse error 2025. Though it is bad practice to allow this
2416							format, this attribute enables you to treat all escape character sequences
2417							equal.
2418
2419							=head3 allow_unquoted_escape
2420							X<allow_unquoted_escape>
2421
2422							my $csv = Text::CSV_XS->new ({ allow_unquoted_escape => 1 });
2423							$csv->allow_unquoted_escape (0);
2424							my $f = $csv->allow_unquoted_escape;
2425
2426							A backward compatibility issue where L<C<escape_char>\|/escape_char> differs
2427							from L<C<quote_char>\|/quote_char> prevents L<C<escape_char>\|/escape_char>
2428							to be in the first position of a field. If L<C<quote_char>\|/quote_char> is
2429							equal to the default C<"> and L<C<escape_char>\|/escape_char> is set to C<\>,
2430							this would be illegal:
2431
2432							1,\0,2
2433
2434							Setting this attribute to C<1> might help to overcome issues with backward
2435							compatibility and allow this style.
2436
2437							=head3 always_quote
2438							X<always_quote>
2439
2440							my $csv = Text::CSV_XS->new ({ always_quote => 1 });
2441							$csv->always_quote (0);
2442							my $f = $csv->always_quote;
2443
2444							By default the generated fields are quoted only if they I<need> to be. For
2445							example, if they contain the separator character. If you set this attribute
2446							to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
2447							quoted, see L</blank_is_undef>). This makes it quite often easier to handle
2448							exported data in external applications. (Poor creatures who are better to
2449							use Text::CSV_XS. :)
2450
2451							=head3 quote_space
2452							X<quote_space>
2453
2454							my $csv = Text::CSV_XS->new ({ quote_space => 1 });
2455							$csv->quote_space (0);
2456							my $f = $csv->quote_space;
2457
2458							By default, a space in a field would trigger quotation. As no rule exists
2459							this to be forced in C<CSV>, nor any for the opposite, the default is true
2460							for safety. You can exclude the space from this trigger by setting this
2461							attribute to 0.
2462
2463							=head3 quote_empty
2464							X<quote_empty>
2465
2466							my $csv = Text::CSV_XS->new ({ quote_empty => 1 });
2467							$csv->quote_empty (0);
2468							my $f = $csv->quote_empty;
2469
2470							By default the generated fields are quoted only if they I<need> to be. An
2471							empty (defined) field does not need quotation. If you set this attribute to
2472							C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
2473							quoted, see L</blank_is_undef>). See also L<C<always_quote>\|/always_quote>.
2474
2475							=head3 quote_binary
2476							X<quote_binary>
2477
2478							my $csv = Text::CSV_XS->new ({ quote_binary => 1 });
2479							$csv->quote_binary (0);
2480							my $f = $csv->quote_binary;
2481
2482							By default, all "unsafe" bytes inside a string cause the combined field to
2483							be quoted. By setting this attribute to C<0>, you can disable that trigger
2484							for bytes C<< >= 0x7F >>.
2485
2486							=head3 escape_null
2487							X<escape_null>
2488							X<quote_null>
2489
2490							my $csv = Text::CSV_XS->new ({ escape_null => 1 });
2491							$csv->escape_null (0);
2492							my $f = $csv->escape_null;
2493
2494							By default, a C<NULL> byte in a field would be escaped. This option enables
2495							you to treat the C<NULL> byte as a simple binary character in binary mode
2496							(the C<< { binary => 1 } >> is set). The default is true. You can prevent
2497							C<NULL> escapes by setting this attribute to C<0>.
2498
2499							When the C<escape_char> attribute is set to undefined, this attribute will
2500							be set to false.
2501
2502							The default setting will encode "=\x00=" as
2503
2504							"="0="
2505
2506							With C<escape_null> set, this will result in
2507
2508							"=\x00="
2509
2510							The default when using the C<csv> function is C<false>.
2511
2512							For backward compatibility reasons, the deprecated old name C<quote_null>
2513							is still recognized.
2514
2515							=head3 keep_meta_info
2516							X<keep_meta_info>
2517
2518							my $csv = Text::CSV_XS->new ({ keep_meta_info => 1 });
2519							$csv->keep_meta_info (0);
2520							my $f = $csv->keep_meta_info;
2521
2522							By default, the parsing of input records is as simple and fast as possible.
2523							However, some parsing information - like quotation of the original field -
2524							is lost in that process. Setting this flag to true enables retrieving that
2525							information after parsing with the methods L</meta_info>, L</is_quoted>,
2526							and L</is_binary> described below. Default is false for performance.
2527
2528							If you set this attribute to a value greater than 9, then you can control
2529							output quotation style like it was used in the input of the the last parsed
2530							record (unless quotation was added because of other reasons).
2531
2532							my $csv = Text::CSV_XS->new ({
2533							binary => 1,
2534							keep_meta_info => 1,
2535							quote_space => 0,
2536							});
2537
2538							my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
2539
2540							$csv->print (*STDOUT, \@row);
2541							# 1,,, , ,f,g,"h""h",help,help
2542							$csv->keep_meta_info (11);
2543							$csv->print (*STDOUT, \@row);
2544							# 1,,"", ," ",f,"g","h""h",help,"help"
2545
2546							=head3 undef_str
2547							X<undef_str>
2548
2549							my $csv = Text::CSV_XS->new ({ undef_str => "\\N" });
2550							$csv->undef_str (undef);
2551							my $s = $csv->undef_str;
2552
2553							This attribute optionally defines the output of undefined fields. The value
2554							passed is not changed at all, so if it needs quotation, the quotation needs
2555							to be included in the value of the attribute. Use with caution, as passing
2556							a value like C<",",,,,"""> will for sure mess up your output. The default
2557							for this attribute is C<undef>, meaning no special treatment.
2558
2559							This attribute is useful when exporting CSV data to be imported in custom
2560							loaders, like for MySQL, that recognize special sequences for C<NULL> data.
2561
2562							This attribute has no meaning when parsing CSV data.
2563
2564							=head3 comment_str
2565							X<comment_str>
2566
2567							my $csv = Text::CSV_XS->new ({ comment_str => "#" });
2568							$csv->comment_str (undef);
2569							my $s = $csv->comment_str;
2570
2571							This attribute optionally defines a string to be recognized as comment. If
2572							this attribute is defined, all lines starting with this sequence will not
2573							be parsed as CSV but skipped as comment.
2574
2575							This attribute has no meaning when generating CSV.
2576
2577							Comment strings that start with any of the special characters/sequences are
2578							not supported (so it cannot start with any of L</sep_char>, L</quote_char>,
2579							L</escape_char>, L</sep>, L</quote>, or L</eol>).
2580
2581							For convenience, C<comment> is an alias for C<comment_str>.
2582
2583							=head3 verbatim
2584							X<verbatim>
2585
2586							my $csv = Text::CSV_XS->new ({ verbatim => 1 });
2587							$csv->verbatim (0);
2588							my $f = $csv->verbatim;
2589
2590							This is a quite controversial attribute to set, but makes some hard things
2591							possible.
2592
2593							The rationale behind this attribute is to tell the parser that the normally
2594							special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
2595							special when this flag is set, and be dealt with as being ordinary binary
2596							characters. This will ease working with data with embedded newlines.
2597
2598							When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
2599							every line.
2600
2601							Imagine a file format like
2602
2603							M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
2604
2605							where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
2606							C<^> (caret). None of the fields is quoted, but embedded binary data is
2607							likely to be present. With the specific line ending, this should not be too
2608							hard to detect.
2609
2610							By default, Text::CSV_XS' parse function is instructed to only know about
2611							C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
2612							embedded newline as a real C<end-of-line>, so it can scan the next line if
2613							binary is true, and the newline is inside a quoted field. With this option,
2614							we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
2615							a binary character.
2616
2617							For L</parse> this means that the parser has no more idea about line ending
2618							and L</getline> C<chomp>s line endings on reading.
2619
2620							=head3 types
2621
2622							A set of column types; the attribute is immediately passed to the L</types>
2623							method.
2624
2625							=head3 callbacks
2626							X<callbacks>
2627
2628							See the L</Callbacks> section below.
2629
2630							=head3 accessors
2631
2632							To sum it up,
2633
2634							$csv = Text::CSV_XS->new ();
2635
2636							is equivalent to
2637
2638							$csv = Text::CSV_XS->new ({
2639							eol => undef, # \r, \n, or \r\n
2640							sep_char => ',',
2641							sep => undef,
2642							quote_char => '"',
2643							quote => undef,
2644							escape_char => '"',
2645							binary => 0,
2646							decode_utf8 => 1,
2647							auto_diag => 0,
2648							diag_verbose => 0,
2649							blank_is_undef => 0,
2650							empty_is_undef => 0,
2651							allow_whitespace => 0,
2652							allow_loose_quotes => 0,
2653							allow_loose_escapes => 0,
2654							allow_unquoted_escape => 0,
2655							always_quote => 0,
2656							quote_empty => 0,
2657							quote_space => 1,
2658							escape_null => 1,
2659							quote_binary => 1,
2660							keep_meta_info => 0,
2661							strict => 0,
2662							skip_empty_rows => 0,
2663							formula => 0,
2664							verbatim => 0,
2665							undef_str => undef,
2666							comment_str => undef,
2667							types => undef,
2668							callbacks => undef,
2669							});
2670
2671							For all of the above mentioned flags, an accessor method is available where
2672							you can inquire the current value, or change the value
2673
2674							my $quote = $csv->quote_char;
2675							$csv->binary (1);
2676
2677							It is not wise to change these settings halfway through writing C<CSV> data
2678							to a stream. If however you want to create a new stream using the available
2679							C<CSV> object, there is no harm in changing them.
2680
2681							If the L</new> constructor call fails, it returns C<undef>, and makes the
2682							fail reason available through the L</error_diag> method.
2683
2684							$csv = Text::CSV_XS->new ({ ecs_char => 1 }) or
2685							die "".Text::CSV_XS->error_diag ();
2686
2687							L</error_diag> will return a string like
2688
2689							"INI - Unknown attribute 'ecs_char'"
2690
2691							=head2 known_attributes
2692							X<known_attributes>
2693
2694							@attr = Text::CSV_XS->known_attributes;
2695							@attr = Text::CSV_XS::known_attributes;
2696							@attr = $csv->known_attributes;
2697
2698							This method will return an ordered list of all the supported attributes as
2699							described above. This can be useful for knowing what attributes are valid
2700							in classes that use or extend Text::CSV_XS.
2701
2702							=head2 print
2703							X<print>
2704
2705							$status = $csv->print ($fh, $colref);
2706
2707							Similar to L</combine> + L</string> + L</print>, but much more efficient.
2708							It expects an array ref as input (not an array!) and the resulting string
2709							is not really created, but immediately written to the C<$fh> object,
2710							typically an IO handle or any other object that offers a L</print> method.
2711
2712							For performance reasons C<print> does not create a result string, so all
2713							L</string>, L</status>, L</fields>, and L</error_input> methods will return
2714							undefined information after executing this method.
2715
2716							If C<$colref> is C<undef> (explicit, not through a variable argument) and
2717							L</bind_columns> was used to specify fields to be printed, it is possible
2718							to make performance improvements, as otherwise data would have to be copied
2719							as arguments to the method call:
2720
2721							$csv->bind_columns (\($foo, $bar));
2722							$status = $csv->print ($fh, undef);
2723
2724							A short benchmark
2725
2726							my @data = ("aa" .. "zz");
2727							$csv->bind_columns (\(@data));
2728
2729							$csv->print ($fh, [ @data ]); # 11800 recs/sec
2730							$csv->print ($fh, \@data ); # 57600 recs/sec
2731							$csv->print ($fh, undef ); # 48500 recs/sec
2732
2733							=head2 say
2734							X<say>
2735
2736							$status = $csv->say ($fh, $colref);
2737
2738							Like L<C<print>\|/print>, but L<C<eol>\|/eol> defaults to C<$\>.
2739
2740							=head2 print_hr
2741							X<print_hr>
2742
2743							$csv->print_hr ($fh, $ref);
2744
2745							Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
2746							provided the column names are set with L</column_names>.
2747
2748							It is just a wrapper method with basic parameter checks over
2749
2750							$csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
2751
2752							=head2 combine
2753							X<combine>
2754
2755							$status = $csv->combine (@fields);
2756
2757							This method constructs a C<CSV> record from C<@fields>, returning success
2758							or failure. Failure can result from lack of arguments or an argument that
2759							contains an invalid character. Upon success, L</string> can be called to
2760							retrieve the resultant C<CSV> string. Upon failure, the value returned by
2761							L</string> is undefined and L</error_input> could be called to retrieve the
2762							invalid argument.
2763
2764							=head2 string
2765							X<string>
2766
2767							$line = $csv->string ();
2768
2769							This method returns the input to L</parse> or the resultant C<CSV> string
2770							of L</combine>, whichever was called more recently.
2771
2772							=head2 getline
2773							X<getline>
2774
2775							$colref = $csv->getline ($fh);
2776
2777							This is the counterpart to L</print>, as L</parse> is the counterpart to
2778							L</combine>: it parses a row from the C<$fh> handle using the L</getline>
2779							method associated with C<$fh> and parses this row into an array ref. This
2780							array ref is returned by the function or C<undef> for failure. When C<$fh>
2781							does not support C<getline>, you are likely to hit errors.
2782
2783							When fields are bound with L</bind_columns> the return value is a reference
2784							to an empty list.
2785
2786							The L</string>, L</fields>, and L</status> methods are meaningless again.
2787
2788							=head2 getline_all
2789							X<getline_all>
2790
2791							$arrayref = $csv->getline_all ($fh);
2792							$arrayref = $csv->getline_all ($fh, $offset);
2793							$arrayref = $csv->getline_all ($fh, $offset, $length);
2794
2795							This will return a reference to a list of L<getline ($fh)\|/getline> results.
2796							In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
2797							with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken
2798							into consideration. Parameters C<$offset> and C<$length> are expected to be
2799							integers. Non-integer values are interpreted as integer without check.
2800
2801							Given a CSV file with 10 lines:
2802
2803							lines call
2804							----- ---------------------------------------------------------
2805							0..9 $csv->getline_all ($fh) # all
2806							0..9 $csv->getline_all ($fh, 0) # all
2807							8..9 $csv->getline_all ($fh, 8) # start at 8
2808							- $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows
2809							0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows
2810							4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows
2811							8..9 $csv->getline_all ($fh, -2) # last 2 rows
2812							6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows
2813
2814							=head2 getline_hr
2815							X<getline_hr>
2816
2817							The L</getline_hr> and L</column_names> methods work together to allow you
2818							to have rows returned as hashrefs. You must call L</column_names> first to
2819							declare your column names.
2820
2821							$csv->column_names (qw( code name price description ));
2822							$hr = $csv->getline_hr ($fh);
2823							print "Price for $hr->{name} is $hr->{price} EUR\n";
2824
2825							L</getline_hr> will croak if called before L</column_names>.
2826
2827							Note that L</getline_hr> creates a hashref for every row and will be much
2828							slower than the combined use of L</bind_columns> and L</getline> but still
2829							offering the same easy to use hashref inside the loop:
2830
2831							my @cols = @{$csv->getline ($fh)};
2832							$csv->column_names (@cols);
2833							while (my $row = $csv->getline_hr ($fh)) {
2834							print $row->{price};
2835							}
2836
2837							Could easily be rewritten to the much faster:
2838
2839							my @cols = @{$csv->getline ($fh)};
2840							my $row = {};
2841							$csv->bind_columns (\@{$row}{@cols});
2842							while ($csv->getline ($fh)) {
2843							print $row->{price};
2844							}
2845
2846							Your mileage may vary for the size of the data and the number of rows. With
2847							perl-5.14.2 the comparison for a 100_000 line file with 14 columns:
2848
2849							Rate hashrefs getlines
2850							hashrefs 1.00/s -- -76%
2851							getlines 4.15/s 313% --
2852
2853							=head2 getline_hr_all
2854							X<getline_hr_all>
2855
2856							$arrayref = $csv->getline_hr_all ($fh);
2857							$arrayref = $csv->getline_hr_all ($fh, $offset);
2858							$arrayref = $csv->getline_hr_all ($fh, $offset, $length);
2859
2860							This will return a reference to a list of L<getline_hr ($fh)\|/getline_hr>
2861							results. In this call, L<C<keep_meta_info>\|/keep_meta_info> is disabled.
2862
2863							=head2 parse
2864							X<parse>
2865
2866							$status = $csv->parse ($line);
2867
2868							This method decomposes a C<CSV> string into fields, returning success or
2869							failure. Failure can result from a lack of argument or the given C<CSV>
2870							string is improperly formatted. Upon success, L</fields> can be called to
2871							retrieve the decomposed fields. Upon failure calling L</fields> will return
2872							undefined data and L</error_input> can be called to retrieve the invalid
2873							argument.
2874
2875							You may use the L</types> method for setting column types. See L</types>'
2876							description below.
2877
2878							The C<$line> argument is supposed to be a simple scalar. Everything else is
2879							supposed to croak and set error 1500.
2880
2881							=head2 fragment
2882							X<fragment>
2883
2884							This function tries to implement RFC7111 (URI Fragment Identifiers for the
2885							text/csv Media Type) - https://datatracker.ietf.org/doc/html/rfc7111
2886
2887							my $AoA = $csv->fragment ($fh, $spec);
2888
2889							In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
2890							to indicate a range. All indices are C<1>-based: the first row or column
2891							has index C<1>. Selections can be combined with the semi-colon (C<;>).
2892
2893							When using this method in combination with L</column_names>, the returned
2894							reference will point to a list of hashes instead of a list of lists. A
2895							disjointed cell-based combined selection might return rows with different
2896							number of columns making the use of hashes unpredictable.
2897
2898							$csv->column_names ("Name", "Age");
2899							my $AoH = $csv->fragment ($fh, "col=3;8");
2900
2901							If the L</after_parse> callback is active, it is also called on every line
2902							parsed and skipped before the fragment.
2903
2904							=over 2
2905
2906							=item row
2907
2908							row=4
2909							row=5-7
2910							row=6-*
2911							row=1-2;4;6-*
2912
2913							=item col
2914
2915							col=2
2916							col=1-3
2917							col=4-*
2918							col=1-2;4;7-*
2919
2920							=item cell
2921
2922							In cell-based selection, the comma (C<,>) is used to pair row and column
2923
2924							cell=4,1
2925
2926							The range operator (C<->) using C<cell>s can be used to define top-left and
2927							bottom-right C<cell> location
2928
2929							cell=3,1-4,6
2930
2931							The C<*> is only allowed in the second part of a pair
2932
2933							cell=3,2-*,2 # row 3 till end, only column 2
2934							cell=3,2-3,* # column 2 till end, only row 3
2935							cell=3,2-, # strip row 1 and 2, and column 1
2936
2937							Cells and cell ranges may be combined with C<;>, possibly resulting in rows
2938							with different numbers of columns
2939
2940							cell=1,1-2,2;3,3-4,4;1,4;4,1
2941
2942							Disjointed selections will only return selected cells. The cells that are
2943							not specified will not be included in the returned set, not even as
2944							C<undef>. As an example given a C<CSV> like
2945
2946							11,12,13,...19
2947							21,22,...28,29
2948							: :
2949							91,...97,98,99
2950
2951							with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
2952
2953							11,12,14
2954							21,22
2955							33,34
2956							41,43,44
2957
2958							Overlapping cell-specs will return those cells only once, So
2959							C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
2960
2961							11,12,13
2962							21,22,23,24
2963							31,32,33,34
2964							42,43,44
2965
2966							=back
2967
2968							L<RFC7111\|https://datatracker.ietf.org/doc/html/rfc7111> does B<not> allow different
2969							types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
2970							Passing an invalid fragment specification will croak and set error 2013.
2971
2972							=head2 column_names
2973							X<column_names>
2974
2975							Set the "keys" that will be used in the L</getline_hr> calls. If no keys
2976							(column names) are passed, it will return the current setting as a list.
2977
2978							L</column_names> accepts a list of scalars (the column names) or a single
2979							array_ref, so you can pass the return value from L</getline> too:
2980
2981							$csv->column_names ($csv->getline ($fh));
2982
2983							L</column_names> does B<no> checking on duplicates at all, which might lead
2984							to unexpected results. Undefined entries will be replaced with the string
2985							C<"\cAUNDEF\cA">, so
2986
2987							$csv->column_names (undef, "", "name", "name");
2988							$hr = $csv->getline_hr ($fh);
2989
2990							will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
2991							the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
2992							field.
2993
2994							L</column_names> croaks on invalid arguments.
2995
2996							=head2 header
2997
2998							This method does NOT work in perl-5.6.x
2999
3000							Parse the CSV header and set L<C<sep>\|/sep>, column_names and encoding.
3001
3002							my @hdr = $csv->header ($fh);
3003							$csv->header ($fh, { sep_set => [ ";", ",", "\|", "\t" ] });
3004							$csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
3005
3006							The first argument should be a file handle.
3007
3008							This method resets some object properties, as it is supposed to be invoked
3009							only once per file or stream. It will leave attributes C<column_names> and
3010							C<bound_columns> alone if setting column names is disabled. Reading headers
3011							on previously process objects might fail on perl-5.8.0 and older.
3012
3013							Assuming that the file opened for parsing has a header, and the header does
3014							not contain problematic characters like embedded newlines, read the first
3015							line from the open handle then auto-detect whether the header separates the
3016							column names with a character from the allowed separator list.
3017
3018							If any of the allowed separators matches, and none of the I<other> allowed
3019							separators match, set L<C<sep>\|/sep> to that separator for the current
3020							CSV_XS instance and use it to parse the first line, map those to lowercase,
3021							and use that to set the instance L</column_names>:
3022
3023							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
3024							open my $fh, "<", "file.csv";
3025							binmode $fh; # for Windows
3026							$csv->header ($fh);
3027							while (my $row = $csv->getline_hr ($fh)) {
3028							...
3029							}
3030
3031							If the header is empty, contains more than one unique separator out of the
3032							allowed set, contains empty fields, or contains identical fields (after
3033							folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
3034
3035							If the header contains embedded newlines or is not valid CSV in any other
3036							way, this method will croak and leave the parse error untouched.
3037
3038							A successful call to C<header> will always set the L<C<sep>\|/sep> of the
3039							C<$csv> object. This behavior can not be disabled.
3040
3041							=head3 return value
3042
3043							On error this method will croak.
3044
3045							In list context, the headers will be returned whether they are used to set
3046							L</column_names> or not.
3047
3048							In scalar context, the instance itself is returned. B<Note>: the values as
3049							found in the header will effectively be B<lost> if C<set_column_names> is
3050							false.
3051
3052							=head3 Options
3053
3054							=over 2
3055
3056							=item sep_set
3057							X<sep_set>
3058
3059							$csv->header ($fh, { sep_set => [ ";", ",", "\|", "\t" ] });
3060
3061							The list of legal separators defaults to C<[ ";", "," ]> and can be changed
3062							by this option. As this is probably the most often used option, it can be
3063							passed on its own as an unnamed argument:
3064
3065							$csv->header ($fh, [ ";", ",", "\|", "\t", "::", "\x{2063}" ]);
3066
3067							Multi-byte sequences are allowed, both multi-character and Unicode. See
3068							L<C<sep>\|/sep>.
3069
3070							=item detect_bom
3071							X<detect_bom>
3072
3073							$csv->header ($fh, { detect_bom => 1 });
3074
3075							The default behavior is to detect if the header line starts with a BOM. If
3076							the header has a BOM, use that to set the encoding of C<$fh>. This default
3077							behavior can be disabled by passing a false value to C<detect_bom>.
3078
3079							Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
3080							UTF-32LE. BOM also supports UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
3081							but L<Encode> does not (yet). UTF-7 is not supported.
3082
3083							If a supported BOM was detected as start of the stream, it is stored in the
3084							object attribute C<ENCODING>.
3085
3086							my $enc = $csv->{ENCODING};
3087
3088							The encoding is used with C<binmode> on C<$fh>.
3089
3090							If the handle was opened in a (correct) encoding, this method will B<not>
3091							alter the encoding, as it checks the leading B<bytes> of the first line. In
3092							case the stream starts with a decoded BOM (C<U+FEFF>), C<{ENCODING}> will be
3093							C<""> (empty) instead of the default C<undef>.
3094
3095							=item munge_column_names
3096							X<munge_column_names>
3097
3098							This option offers the means to modify the column names into something that
3099							is most useful to the application. The default is to map all column names
3100							to lower case.
3101
3102							$csv->header ($fh, { munge_column_names => "lc" });
3103
3104							The following values are available:
3105
3106							lc - lower case
3107							uc - upper case
3108							db - valid DB field names
3109							none - do not change
3110							\%hash - supply a mapping
3111							\&cb - supply a callback
3112
3113							=over 2
3114
3115							=item Lower case
3116
3117							$csv->header ($fh, { munge_column_names => "lc" });
3118
3119							The header is changed to all lower-case
3120
3121							$_ = lc;
3122
3123							=item Upper case
3124
3125							$csv->header ($fh, { munge_column_names => "uc" });
3126
3127							The header is changed to all upper-case
3128
3129							$_ = uc;
3130
3131							=item Literal
3132
3133							$csv->header ($fh, { munge_column_names => "none" });
3134
3135							=item Hash
3136
3137							$csv->header ($fh, { munge_column_names => { foo => "sombrero" });
3138
3139							if a value does not exist, the original value is used unchanged
3140
3141							=item Database
3142
3143							$csv->header ($fh, { munge_column_names => "db" });
3144
3145							=over 2
3146
3147							=item -
3148
3149							lower-case
3150
3151							=item -
3152
3153							all sequences of non-word characters are replaced with an underscore
3154
3155							=item -
3156
3157							all leading underscores are removed
3158
3159							=back
3160
3161							$_ = lc (s/\W+/_/gr =~ s/^_+//r);
3162
3163							=item Callback
3164
3165							$csv->header ($fh, { munge_column_names => sub { fc } });
3166							$csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
3167							$csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
3168
3169							As this callback is called in a C<map>, you can use C<$_> directly.
3170
3171							=back
3172
3173							=item set_column_names
3174							X<set_column_names>
3175
3176							$csv->header ($fh, { set_column_names => 1 });
3177
3178							The default is to set the instances column names using L</column_names> if
3179							the method is successful, so subsequent calls to L</getline_hr> can return
3180							a hash. Disable setting the header can be forced by using a false value for
3181							this option.
3182
3183							As described in L</return value> above, content is lost in scalar context.
3184
3185							=back
3186
3187							=head3 Validation
3188
3189							When receiving CSV files from external sources, this method can be used to
3190							protect against changes in the layout by restricting to known headers (and
3191							typos in the header fields).
3192
3193							my %known = (
3194							"record key" => "c_rec",
3195							"rec id" => "c_rec",
3196							"id_rec" => "c_rec",
3197							"kode" => "code",
3198							"code" => "code",
3199							"vaule" => "value",
3200							"value" => "value",
3201							);
3202							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
3203							open my $fh, "<", $source or die "$source: $!";
3204							$csv->header ($fh, { munge_column_names => sub {
3205							s/\s+$//;
3206							s/^\s+//;
3207							$known{lc $_} or die "Unknown column '$_' in $source";
3208							}});
3209							while (my $row = $csv->getline_hr ($fh)) {
3210							say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
3211							}
3212
3213							=head2 bind_columns
3214							X<bind_columns>
3215
3216							Takes a list of scalar references to be used for output with L</print> or
3217							to store in the fields fetched by L</getline>. When you do not pass enough
3218							references to store the fetched fields in, L</getline> will fail with error
3219							C<3006>. If you pass more than there are fields to return, the content of
3220							the remaining references is left untouched. Under C<strict> the two should
3221							match, otherwise L</getline> will fail with error C<2014>.
3222
3223							$csv->bind_columns (\$code, \$name, \$price, \$description);
3224							while ($csv->getline ($fh)) {
3225							print "The price of a $name is \x{20ac} $price\n";
3226							}
3227
3228							To reset or clear all column binding, call L</bind_columns> with the single
3229							argument C<undef>. This will also clear column names.
3230
3231							$csv->bind_columns (undef);
3232
3233							If no arguments are passed at all, L</bind_columns> will return the list of
3234							current bindings or C<undef> if no binds are active.
3235
3236							Note that in parsing with C<bind_columns>, the fields are set on the fly.
3237							That implies that if the third field of a row causes an error (or this row
3238							has just two fields where the previous row had more), the first two fields
3239							already have been assigned the values of the current row, while the rest of
3240							the fields will still hold the values of the previous row. If you want the
3241							parser to fail in these cases, use the L<C<strict>\|/strict> attribute.
3242
3243							=head2 eof
3244							X<eof>
3245
3246							$eof = $csv->eof ();
3247
3248							If L</parse> or L</getline> was used with an IO stream, this method will
3249							return true (1) if the last call hit end of file, otherwise it will return
3250							false (''). This is useful to see the difference between a failure and end
3251							of file.
3252
3253							Note that if the parsing of the last line caused an error, C<eof> is still
3254							true. That means that if you are I<not> using L</auto_diag>, an idiom like
3255
3256							while (my $row = $csv->getline ($fh)) {
3257							# ...
3258							}
3259							$csv->eof or $csv->error_diag;
3260
3261							will I<not> report the error. You would have to change that to
3262
3263							while (my $row = $csv->getline ($fh)) {
3264							# ...
3265							}
3266							+$csv->error_diag and $csv->error_diag;
3267
3268							=head2 types
3269							X<types>
3270
3271							$csv->types (\@tref);
3272
3273							This method is used to force that (all) columns are of a given type. For
3274							example, if you have an integer column, two columns with doubles and a
3275							string column, then you might do a
3276
3277							$csv->types ([Text::CSV_XS::IV (),
3278							Text::CSV_XS::NV (),
3279							Text::CSV_XS::NV (),
3280							Text::CSV_XS::PV ()]);
3281
3282							Column types are used only for I<decoding> columns while parsing, in other
3283							words by the L</parse> and L</getline> methods.
3284
3285							You can unset column types by doing a
3286
3287							$csv->types (undef);
3288
3289							or fetch the current type settings with
3290
3291							$types = $csv->types ();
3292
3293							=over 4
3294
3295							=item IV
3296							X<IV>
3297
3298							=item CSV_TYPE_IV
3299							X<CSV_TYPE_IV>
3300
3301							Set field type to integer.
3302
3303							=item NV
3304							X<NV>
3305
3306							=item CSV_TYPE_NV
3307							X<CSV_TYPE_NV>
3308
3309							Set field type to numeric/float.
3310
3311							=item PV
3312							X<PV>
3313
3314							=item CSV_TYPE_PV
3315							X<CSV_TYPE_PV>
3316
3317							Set field type to string.
3318
3319							=back
3320
3321							=head2 fields
3322							X<fields>
3323
3324							@columns = $csv->fields ();
3325
3326							This method returns the input to L</combine> or the resultant decomposed
3327							fields of a successful L</parse>, whichever was called more recently.
3328
3329							Note that the return value is undefined after using L</getline>, which does
3330							not fill the data structures returned by L</parse>.
3331
3332							=head2 meta_info
3333							X<meta_info>
3334
3335							@flags = $csv->meta_info ();
3336
3337							This method returns the "flags" of the input to L</combine> or the flags of
3338							the resultant decomposed fields of L</parse>, whichever was called more
3339							recently.
3340
3341							For each field, a meta_info field will hold flags that inform something
3342							about the field returned by the L</fields> method or passed to the
3343							L</combine> method. The flags are bit-wise-C<or>'d like:
3344
3345							=over 2
3346
3347							=item C<0x0001>
3348
3349							=item C<CSV_FLAGS_IS_QUOTED>
3350							X<CSV_FLAGS_IS_QUOTED>
3351
3352							The field was quoted.
3353
3354							=item C<0x0002>
3355
3356							=item C<CSV_FLAGS_IS_BINARY>
3357							X<CSV_FLAGS_IS_BINARY>
3358
3359							The field was binary.
3360
3361							=item C<0x0004>
3362
3363							=item C<CSV_FLAGS_ERROR_IN_FIELD>
3364							X<CSV_FLAGS_ERROR_IN_FIELD>
3365
3366							The field was invalid.
3367
3368							Currently only used when C<allow_loose_quotes> is active.
3369
3370							=item C<0x0010>
3371
3372							=item C<CSV_FLAGS_IS_MISSING>
3373							X<CSV_FLAGS_IS_MISSING>
3374
3375							The field was missing.
3376
3377							=back
3378
3379							See the C<is_***> methods below.
3380
3381							=head2 is_quoted
3382							X<is_quoted>
3383
3384							my $quoted = $csv->is_quoted ($column_idx);
3385
3386							where C<$column_idx> is the (zero-based) index of the column in the last
3387							result of L</parse>.
3388
3389							This returns a true value if the data in the indicated column was enclosed
3390							in L<C<quote_char>\|/quote_char> quotes. This might be important for fields
3391							where content C<,20070108,> is to be treated as a numeric value, and where
3392							C<,"20070108",> is explicitly marked as character string data.
3393
3394							This method is only valid when L</keep_meta_info> is set to a true value.
3395
3396							=head2 is_binary
3397							X<is_binary>
3398
3399							my $binary = $csv->is_binary ($column_idx);
3400
3401							where C<$column_idx> is the (zero-based) index of the column in the last
3402							result of L</parse>.
3403
3404							This returns a true value if the data in the indicated column contained any
3405							byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
3406
3407							This method is only valid when L</keep_meta_info> is set to a true value.
3408
3409							=head2 is_missing
3410							X<is_missing>
3411
3412							my $missing = $csv->is_missing ($column_idx);
3413
3414							where C<$column_idx> is the (zero-based) index of the column in the last
3415							result of L</getline_hr>.
3416
3417							$csv->keep_meta_info (1);
3418							while (my $hr = $csv->getline_hr ($fh)) {
3419							$csv->is_missing (0) and next; # This was an empty line
3420							}
3421
3422							When using L</getline_hr>, it is impossible to tell if the parsed fields
3423							are C<undef> because they where not filled in the C<CSV> stream or because
3424							they were not read at all, as B<all> the fields defined by L</column_names>
3425							are set in the hash-ref. If you still need to know if all fields in each
3426							row are provided, you should enable L<C<keep_meta_info>\|/keep_meta_info> so
3427							you can check the flags.
3428
3429							If L<C<keep_meta_info>\|/keep_meta_info> is C<false>, C<is_missing> will
3430							always return C<undef>, regardless of C<$column_idx> being valid or not. If
3431							this attribute is C<true> it will return either C<0> (the field is present)
3432							or C<1> (the field is missing).
3433
3434							A special case is the empty line. If the line is completely empty - after
3435							dealing with the flags - this is still a valid CSV line: it is a record of
3436							just one single empty field. However, if C<keep_meta_info> is set, invoking
3437							C<is_missing> with index C<0> will now return true.
3438
3439							=head2 status
3440							X<status>
3441
3442							$status = $csv->status ();
3443
3444							This method returns the status of the last invoked L</combine> or L</parse>
3445							call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
3446
3447							Note that as this only keeps track of the status of above mentioned methods,
3448							you are probably looking for L<C<error_diag>\|/error_diag> instead.
3449
3450							=head2 error_input
3451							X<error_input>
3452
3453							$bad_argument = $csv->error_input ();
3454
3455							This method returns the erroneous argument (if it exists) of L</combine> or
3456							L</parse>, whichever was called more recently. If the last invocation was
3457							successful, C<error_input> will return C<undef>.
3458
3459							Depending on the type of error, it I<might> also hold the data for the last
3460							error-input of L</getline>.
3461
3462							=head2 error_diag
3463							X<error_diag>
3464
3465							Text::CSV_XS->error_diag ();
3466							$csv->error_diag ();
3467							$error_code = 0 + $csv->error_diag ();
3468							$error_str = "" . $csv->error_diag ();
3469							($cde, $str, $pos, $rec, $fld, $xs) = $csv->error_diag ();
3470
3471							If (and only if) an error occurred, this function returns the diagnostics
3472							of that error.
3473
3474							If called in void context, this will print the internal error code and the
3475							associated error message to STDERR.
3476
3477							If called in list context, this will return the error code and the error
3478							message in that order. If the last error was from parsing, the rest of the
3479							values returned are a best guess at the location within the line that was
3480							being parsed. Their values are 1-based. The position currently is index of
3481							the byte at which the parsing failed in the current record. It might change
3482							to be the index of the current character in a later release. The records is
3483							the index of the record parsed by the csv instance. The field number is the
3484							index of the field the parser thinks it is currently trying to parse. See
3485							F<examples/csv-check> for how this can be used. If C<$xs> is set, it is the
3486							line number in XS where the error was triggered (for debugging). C<XS> will
3487							show in void context only when L</diag_verbose> is set.
3488
3489							If called in scalar context, it will return the diagnostics in a single
3490							scalar, a-la C<$!>. It will contain the error code in numeric context, and
3491							the diagnostics message in string context.
3492
3493							When called as a class method or a direct function call, the diagnostics
3494							are that of the last L</new> call.
3495
3496							=head3 _cache_diag
3497
3498							Note: This is an internal function only, and output cannot be relied upon.
3499							Use at own risk.
3500
3501							If debugging beyond what L</error_diag> is able to show, the internal cache
3502							can be shown with this function.
3503
3504							# Something failed ..
3505							$csv->error_diag;
3506							$csv->_cache_diag ();
3507
3508							=head2 record_number
3509							X<record_number>
3510
3511							$recno = $csv->record_number ();
3512
3513							Returns the records parsed by this csv instance. This value should be more
3514							accurate than C<$.> when embedded newlines come in play. Records written by
3515							this instance are not counted.
3516
3517							=head2 SetDiag
3518							X<SetDiag>
3519
3520							$csv->SetDiag (0);
3521
3522							Use to reset the diagnostics if you are dealing with errors.
3523
3524							=head1 IMPORTS/EXPORTS
3525
3526							By default none of these are exported.
3527
3528							=over 2
3529
3530							=item csv
3531
3532							use Text::CSV_XS qw( csv );
3533
3534							Import the function L</csv> function. See below.
3535
3536							=item :CONSTANTS
3537
3538							use Text::CSV_XS qw( :CONSTANTS );
3539
3540							Import module constants L</CSV_FLAGS_IS_QUOTED>, L</CSV_FLAGS_IS_BINARY>,
3541							L</CSV_FLAGS_ERROR_IN_FIELD>, L</CSV_FLAGS_IS_MISSING>, L</CSV_TYPE_PV>,
3542							L</CSV_TYPE_IV>, and L</CSV_TYPE_NV>. Each can be imported alone
3543
3544							use Text::CSV_XS qw( CSV_FLAS_IS_BINARY CSV_TYPE_NV );
3545
3546							=back
3547
3548							=head1 FUNCTIONS
3549
3550							=head2 csv
3551							X<csv>
3552
3553							This function is not exported by default and should be explicitly requested:
3554
3555							use Text::CSV_XS qw( csv );
3556
3557							This is a high-level function that aims at simple (user) interfaces. This
3558							can be used to read/parse a C<CSV> file or stream (the default behavior) or
3559							to produce a file or write to a stream (define the C<out> attribute). It
3560							returns an array- or hash-reference on parsing (or C<undef> on fail) or the
3561							numeric value of L</error_diag> on writing. When this function fails you
3562							can get to the error using the class call to L</error_diag>
3563
3564							my $aoa = csv (in => "test.csv") or
3565							die Text::CSV_XS->error_diag;
3566
3567							Note that failure here is the inability to start the parser, like when the
3568							input does not exist or the arguments are unknown or conflicting. Run-time
3569							parsing errors will return a valid reference, which can be empty, but still
3570							contains all results up till the error. See L</on_error>.
3571
3572							This function takes the arguments as key-value pairs. This can be passed as
3573							a list or as an anonymous hash:
3574
3575							my $aoa = csv ( in => "test.csv", sep_char => ";");
3576							my $aoh = csv ({ in => $fh, headers => "auto" });
3577
3578							The arguments passed consist of two parts: the arguments to L</csv> itself
3579							and the optional attributes to the C<CSV> object used inside the function
3580							as enumerated and explained in L</new>.
3581
3582							If not overridden, the default option used for CSV is
3583
3584							auto_diag => 1
3585							escape_null => 0
3586							strict_eol => 1
3587
3588							The option that is always set and cannot be altered is
3589
3590							binary => 1
3591
3592							As this function will likely be used in one-liners, it allows C<quote> to
3593							be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
3594							or C<escape>.
3595
3596							Alternative invocations:
3597
3598							my $aoa = Text::CSV_XS::csv (in => "file.csv");
3599
3600							my $csv = Text::CSV_XS->new ();
3601							my $aoa = $csv->csv (in => "file.csv");
3602
3603							In the latter case, the object attributes are used from the existing object
3604							and the attribute arguments in the function call are ignored:
3605
3606							my $csv = Text::CSV_XS->new ({ sep_char => ";" });
3607							my $aoh = $csv->csv (in => "file.csv", bom => 1);
3608
3609							will parse using C<;> as C<sep_char>, not C<,>.
3610
3611							=head3 in
3612							X<in>
3613
3614							Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
3615							which will be opened for reading and closed when finished, a file handle
3616							(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
3617							itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
3618
3619							When used with L</out>, C<in> should be a reference to a CSV structure (AoA
3620							or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
3621							The code-ref will be invoked with no arguments.
3622
3623							my $aoa = csv (in => "file.csv");
3624
3625							open my $fh, "<", "file.csv";
3626							my $aoa = csv (in => $fh);
3627
3628							my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
3629							my $err = csv (in => $csv, out => "file.csv");
3630
3631							If called in void context without the L</out> attribute, the resulting ref
3632							will be used as input to a subsequent call to csv:
3633
3634							csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
3635
3636							will be a shortcut to
3637
3638							csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
3639
3640							where, in the absence of the C<out> attribute, this is a shortcut to
3641
3642							csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
3643							out => *STDOUT)
3644
3645							=head3 out
3646							X<out>
3647
3648							csv (in => $aoa, out => "file.csv");
3649							csv (in => $aoa, out => $fh);
3650							csv (in => $aoa, out => STDOUT);
3651							csv (in => $aoa, out => *STDOUT);
3652							csv (in => $aoa, out => \*STDOUT);
3653							csv (in => $aoa, out => \my $data);
3654							csv (in => $aoa, out => undef);
3655							csv (in => $aoa, out => \"skip");
3656
3657							csv (in => $fh, out => \@aoa);
3658							csv (in => $fh, out => \@aoh, bom => 1);
3659							csv (in => $fh, out => \%hsh, key => "key");
3660
3661							csv (in => $file, out => $file);
3662							csv (in => $file, out => $fh);
3663							csv (in => $fh, out => $file);
3664							csv (in => $fh, out => $fh);
3665
3666							In output mode, the default CSV options when producing CSV are
3667
3668							eol => "\r\n"
3669
3670							The L</fragment> attribute is ignored in output mode.
3671
3672							C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
3673							writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
3674							reference to a glob (e.g. C<\STDOUT>), the glob itself (e.g. C<STDOUT>),
3675							or a reference to a scalar (e.g. C<\my $data>).
3676
3677							csv (in => sub { $sth->fetch }, out => "dump.csv");
3678							csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
3679							headers => $sth->{NAME_lc});
3680
3681							When a code-ref is used for C<in>, the output is generated per invocation,
3682							so no buffering is involved. This implies that there is no size restriction
3683							on the number of records. The C<csv> function ends when the coderef returns
3684							a false value.
3685
3686							If C<out> is set to a reference of the literal string C<"skip">, the output
3687							will be suppressed completely, which might be useful in combination with a
3688							filter for side effects only.
3689
3690							my %cache;
3691							csv (in => "dump.csv",
3692							out => \"skip",
3693							on_in => sub { $cache{$_[1][1]}++ });
3694
3695							Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be
3696							equivalent to C<\"skip">.
3697
3698							If the C<in> argument point to something to parse, and the C<out> is set to
3699							a reference to an C<ARRAY> or a C<HASH>, the output is appended to the data
3700							in the existing reference. The result of the parse should match what exists
3701							in the reference passed. This might come handy when you have to parse a set
3702							of files with similar content (like data stored per period) and you want to
3703							collect that into a single data structure:
3704
3705							my %hash;
3706							csv (in => $_, out => \%hash, key => "id") for sort glob "foo-[0-9]*.csv";
3707
3708							my @list; # List of arrays
3709							csv (in => $_, out => \@list) for sort glob "foo-[0-9]*.csv";
3710
3711							my @list; # List of hashes
3712							csv (in => $_, out => \@list, bom => 1) for sort glob "foo-[0-9]*.csv";
3713
3714							=head4 Streaming
3715							X<streaming>
3716
3717							If B<both> C<in> and C<out> are files, file handles or globs, streaming is
3718							enforced by injecting an C<after_parse> callback that immediately uses the
3719							L<C<say ()>\|/say> method of the same instance to output the result and then
3720							rejects the record.
3721
3722							If a C<after_parse> was already passed as attribute, that will be included
3723							in the injected call. If C<on_in> was passed and C<after_parse> was not, it
3724							will be used instead. If both were passed, C<on_in> is ignored.
3725
3726							The EOL of the first record of the C<in> source is consistently used as EOL
3727							for all records in the C<out> destination.
3728
3729							The C<filter> attribute is not available.
3730
3731							All other attributes are shared for C<in> and C<out>, so you cannot define
3732							different encodings for C<in> and C<out>. You need to pass a C<$fh>, where
3733							C<binmode> was used to apply the encoding layers.
3734
3735							Note that this is work in progress and things might change.
3736
3737							=head3 encoding
3738							X<encoding>
3739
3740							If passed, it should be an encoding accepted by the C<:encoding()> option
3741							to C<open>. There is no default value. This attribute does not work in perl
3742							5.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
3743							line invocations.
3744
3745							If C<encoding> is set to the literal value C<"auto">, the method L</header>
3746							will be invoked on the opened stream to check if there is a BOM and set the
3747							encoding accordingly. This is equal to passing a true value in the option
3748							L<C<detect_bom>\|/detect_bom>.
3749
3750							Encodings can be stacked, as supported by C<binmode>:
3751
3752							# Using PerlIO::via::gzip
3753							csv (in => \@csv,
3754							out => "test.csv:via.gz",
3755							encoding => ":via(gzip):encoding(utf-8)",
3756							);
3757							$aoa = csv (in => "test.csv:via.gz", encoding => ":via(gzip)");
3758
3759							# Using PerlIO::gzip
3760							csv (in => \@csv,
3761							out => "test.csv:via.gz",
3762							encoding => ":gzip:encoding(utf-8)",
3763							);
3764							$aoa = csv (in => "test.csv:gzip.gz", encoding => ":gzip");
3765
3766							=head3 detect_bom
3767							X<detect_bom>
3768
3769							If C<detect_bom> is given, the method L</header> will be invoked on the
3770							opened stream to check if there is a BOM and set the encoding accordingly.
3771							Note that the attribute L<C<headers>\|/headers> can be used to overrule the
3772							default behavior of how that method automatically sets the attribute.
3773
3774							C<detect_bom> can be abbreviated to C<bom>.
3775
3776							This is the same as setting L<C<encoding>\|/encoding> to C<"auto">.
3777
3778							=head3 headers
3779							X<headers>
3780
3781							If this attribute is not given, the default behavior is to produce an array
3782							of arrays.
3783
3784							If C<headers> is supplied, it should be an anonymous list of column names,
3785							an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
3786							or C<skip>.
3787
3788							=over 2
3789
3790							=item skip
3791							X<skip>
3792
3793							When C<skip> is used, the header will not be included in the output.
3794
3795							my $aoa = csv (in => $fh, headers => "skip");
3796
3797							C<skip> is invalid/ignored in combinations with L<C<detect_bom>\|/detect_bom>.
3798
3799							=item auto
3800							X<auto>
3801
3802							If C<auto> is used, the first line of the C<CSV> source will be read as the
3803							list of field headers and used to produce an array of hashes.
3804
3805							my $aoh = csv (in => $fh, headers => "auto");
3806
3807							=item lc
3808							X<lc>
3809
3810							If C<lc> is used, the first line of the C<CSV> source will be read as the
3811							list of field headers mapped to lower case and used to produce an array of
3812							hashes. This is a variation of C<auto>.
3813
3814							my $aoh = csv (in => $fh, headers => "lc");
3815
3816							=item uc
3817							X<uc>
3818
3819							If C<uc> is used, the first line of the C<CSV> source will be read as the
3820							list of field headers mapped to upper case and used to produce an array of
3821							hashes. This is a variation of C<auto>.
3822
3823							my $aoh = csv (in => $fh, headers => "uc");
3824
3825							=item CODE
3826							X<CODE>
3827
3828							If a coderef is used, the first line of the C<CSV> source will be read as
3829							the list of mangled field headers in which each field is passed as the only
3830							argument to the coderef. This list is used to produce an array of hashes.
3831
3832							my $aoh = csv (in => $fh,
3833							headers => sub { lc ($_[0]) =~ s/kode/code/gr });
3834
3835							this example is a variation of using C<lc> where all occurrences of C<kode>
3836							are replaced with C<code>.
3837
3838							=item ARRAY
3839							X<ARRAY>
3840
3841							If C<headers> is an anonymous list, the entries in the list will be used
3842							as field names. The first line is considered data instead of headers.
3843
3844							my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
3845							csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
3846
3847							=item HASH
3848							X<HASH>
3849
3850							If C<headers> is a hash reference, this implies C<auto>, but header fields
3851							that exist as key in the hashref will be replaced by the value for that
3852							key. Given a CSV file like
3853
3854							post-kode,city,name,id number,fubble
3855							1234AA,Duckstad,Donald,13,"X313DF"
3856
3857							using
3858
3859							csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
3860
3861							will return an entry like
3862
3863							{ pc => "1234AA",
3864							city => "Duckstad",
3865							name => "Donald",
3866							ID => "13",
3867							fubble => "X313DF",
3868							}
3869
3870							=back
3871
3872							See also L<C<munge_column_names>\|/munge_column_names> and
3873							L<C<set_column_names>\|/set_column_names>.
3874
3875							=head3 munge_column_names
3876							X<munge_column_names>
3877
3878							If C<munge_column_names> is set, the method L</header> is invoked on the
3879							opened stream with all matching arguments to detect and set the headers.
3880
3881							C<munge_column_names> can be abbreviated to C<munge>.
3882
3883							=head3 key
3884							X<key>
3885
3886							If passed, will default L<C<headers>\|/headers> to C<"auto"> and return a
3887							hashref instead of an array of hashes. Allowed values are simple scalars or
3888							array-references where the first element is the joiner and the rest are the
3889							fields to join to combine the key.
3890
3891							my $ref = csv (in => "test.csv", key => "code");
3892							my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
3893
3894							with test.csv like
3895
3896							code,product,price,color
3897							1,pc,850,gray
3898							2,keyboard,12,white
3899							3,mouse,5,black
3900
3901							the first example will return
3902
3903							{ 1 => {
3904							code => 1,
3905							color => 'gray',
3906							price => 850,
3907							product => 'pc'
3908							},
3909							2 => {
3910							code => 2,
3911							color => 'white',
3912							price => 12,
3913							product => 'keyboard'
3914							},
3915							3 => {
3916							code => 3,
3917							color => 'black',
3918							price => 5,
3919							product => 'mouse'
3920							}
3921							}
3922
3923							the second example will return
3924
3925							{ "1:gray" => {
3926							code => 1,
3927							color => 'gray',
3928							price => 850,
3929							product => 'pc'
3930							},
3931							"2:white" => {
3932							code => 2,
3933							color => 'white',
3934							price => 12,
3935							product => 'keyboard'
3936							},
3937							"3:black" => {
3938							code => 3,
3939							color => 'black',
3940							price => 5,
3941							product => 'mouse'
3942							}
3943							}
3944
3945							The C<key> attribute can be combined with L<C<headers>\|/headers> for C<CSV>
3946							date that has no header line, like
3947
3948							my $ref = csv (
3949							in => "foo.csv",
3950							headers => [qw( c_foo foo bar description stock )],
3951							key => "c_foo",
3952							);
3953
3954							=head3 value
3955							X<value>
3956
3957							Used to create key-value hashes.
3958
3959							Only allowed when C<key> is valid. A C<value> can be either a single column
3960							label or an anonymous list of column labels. In the first case, the value
3961							will be a simple scalar value, in the latter case, it will be a hashref.
3962
3963							my $ref = csv (in => "test.csv", key => "code",
3964							value => "price");
3965							my $ref = csv (in => "test.csv", key => "code",
3966							value => [ "product", "price" ]);
3967							my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
3968							value => "price");
3969							my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
3970							value => [ "product", "price" ]);
3971
3972							with test.csv like
3973
3974							code,product,price,color
3975							1,pc,850,gray
3976							2,keyboard,12,white
3977							3,mouse,5,black
3978
3979							the first example will return
3980
3981							{ 1 => 850,
3982							2 => 12,
3983							3 => 5,
3984							}
3985
3986							the second example will return
3987
3988							{ 1 => {
3989							price => 850,
3990							product => 'pc'
3991							},
3992							2 => {
3993							price => 12,
3994							product => 'keyboard'
3995							},
3996							3 => {
3997							price => 5,
3998							product => 'mouse'
3999							}
4000							}
4001
4002							the third example will return
4003
4004							{ "1:gray" => 850,
4005							"2:white" => 12,
4006							"3:black" => 5,
4007							}
4008
4009							the fourth example will return
4010
4011							{ "1:gray" => {
4012							price => 850,
4013							product => 'pc'
4014							},
4015							"2:white" => {
4016							price => 12,
4017							product => 'keyboard'
4018							},
4019							"3:black" => {
4020							price => 5,
4021							product => 'mouse'
4022							}
4023							}
4024
4025							=head3 keep_headers
4026							X<keep_headers>
4027							X<keep_column_names>
4028							X<kh>
4029
4030							When using hashes, keep the column names into the arrayref passed, so all
4031							headers are available after the call in the original order.
4032
4033							my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
4034
4035							This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
4036
4037							This attribute implies a default of C<auto> for the C<headers> attribute.
4038
4039							X<stable header order>
4040							X<internal headers>
4041							The headers can also be kept internally to keep stable header order:
4042
4043							csv (in => csv (in => "file.csv", kh => "internal"),
4044							out => "new.csv",
4045							kh => "internal");
4046
4047							where C<internal> can also be C<1>, C<yes>, or C<true>. This is similar to
4048
4049							my @h;
4050							csv (in => csv (in => "file.csv", kh => \@h),
4051							out => "new.csv",
4052							headers => \@h);
4053
4054							=head3 fragment
4055							X<fragment>
4056
4057							Only output the fragment as defined in the L</fragment> method. This option
4058							is ignored when I<generating> C<CSV>. See L</out>.
4059
4060							Combining all of them could give something like
4061
4062							use Text::CSV_XS qw( csv );
4063							my $aoh = csv (
4064							in => "test.txt",
4065							encoding => "utf-8",
4066							headers => "auto",
4067							sep_char => "\|",
4068							fragment => "row=3;6-9;15-*",
4069							);
4070							say $aoh->[15]{Foo};
4071
4072							=head3 sep_set
4073							X<sep_set>
4074							X<seps>
4075
4076							If C<sep_set> is set, the method L</header> is invoked on the opened stream
4077							to detect and set L<C<sep_char>\|/sep_char> with the given set.
4078
4079							C<sep_set> can be abbreviated to C<seps>. If neither C<sep_set> not C<seps>
4080							is given, but C<sep> is defined, C<sep_set> defaults to C<[ sep ]>. This is
4081							only supported for perl version 5.10 and up.
4082
4083							Note that as the L</header> method is invoked, its default is to also set
4084							the headers.
4085
4086							=head3 set_column_names
4087							X<set_column_names>
4088
4089							If C<set_column_names> is passed, the method L</header> is invoked on the
4090							opened stream with all arguments meant for L</header>.
4091
4092							If C<set_column_names> is passed as a false value, the content of the first
4093							row is only preserved if the output is AoA:
4094
4095							With an input-file like
4096
4097							bAr,foo
4098							1,2
4099							3,4,5
4100
4101							This call
4102
4103							my $aoa = csv (in => $file, set_column_names => 0);
4104
4105							will result in
4106
4107							[[ "bar", "foo" ],
4108							[ "1", "2" ],
4109							[ "3", "4", "5" ]]
4110
4111							and
4112
4113							my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
4114
4115							will result in
4116
4117							[[ "bAr", "foo" ],
4118							[ "1", "2" ],
4119							[ "3", "4", "5" ]]
4120
4121							=head3 csv
4122							X<csv>
4123
4124							The I<function> L</csv> can also be called as a method or with an existing
4125							Text::CSV_XS object. This could help if the function is to be invoked a lot
4126							of times and the overhead of creating the object internally over and over
4127							again would be prevented by passing an existing instance.
4128
4129							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4130
4131							my $aoa = $csv->csv (in => $fh);
4132							my $aoa = csv (in => $fh, csv => $csv);
4133
4134							both act the same. Running this 20000 times on a 20 lines CSV file, showed
4135							a 53% speedup.
4136
4137							=head2 Callbacks
4138							X<Callbacks>
4139
4140							Callbacks enable actions triggered from the I<inside> of Text::CSV_XS.
4141
4142							While most of what this enables can easily be done in an unrolled loop as
4143							described in the L</SYNOPSIS> callbacks can be used to meet special demands
4144							or enhance the L</csv> function.
4145
4146							=over 2
4147
4148							=item error
4149							X<error>
4150
4151							$csv->callbacks (error => sub { $csv->SetDiag (0) });
4152
4153							the C<error> callback is invoked when an error occurs, but I<only> when
4154							L</auto_diag> is set to a true value. A callback is invoked with the values
4155							returned by L</error_diag>:
4156
4157							my ($c, $s);
4158
4159							sub ignore3006 {
4160							my ($err, $msg, $pos, $recno, $fldno) = @_;
4161							if ($err == 3006) {
4162							# ignore this error
4163							($c, $s) = (undef, undef);
4164							Text::CSV_XS->SetDiag (0);
4165							}
4166							# Any other error
4167							return;
4168							} # ignore3006
4169
4170							$csv->callbacks (error => \&ignore3006);
4171							$csv->bind_columns (\$c, \$s);
4172							while ($csv->getline ($fh)) {
4173							# Error 3006 will not stop the loop
4174							}
4175
4176							=item after_parse
4177							X<after_parse>
4178
4179							$csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
4180							while (my $row = $csv->getline ($fh)) {
4181							$row->[-1] eq "NEW";
4182							}
4183
4184							This callback is invoked after parsing with L</getline> only if no error
4185							occurred. The callback is invoked with two arguments: the current C<CSV>
4186							parser object and an array reference to the fields parsed.
4187
4188							The return code of the callback is ignored unless it is a reference to the
4189							string "skip", in which case the record will be skipped in L</getline_all>.
4190
4191							sub add_from_db {
4192							my ($csv, $row) = @_;
4193							$sth->execute ($row->[4]);
4194							push @$row, $sth->fetchrow_array;
4195							} # add_from_db
4196
4197							my $aoa = csv (in => "file.csv", callbacks => {
4198							after_parse => \&add_from_db });
4199
4200							This hook can be used for validation:
4201							X<data_validation>
4202
4203							=over 2
4204
4205							=item FAIL
4206
4207							Die if any of the records does not validate a rule:
4208
4209							after_parse => sub {
4210							$_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
4211							die "5th field does not have a valid Dutch zipcode";
4212							}
4213
4214							=item DEFAULT
4215
4216							Replace invalid fields with a default value:
4217
4218							after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
4219
4220							=item SKIP
4221
4222							Skip records that have invalid fields (only applies to L</getline_all>):
4223
4224							after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
4225
4226							=back
4227
4228							=item before_print
4229							X<before_print>
4230
4231							my $idx = 1;
4232							$csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
4233							$csv->print (*STDOUT, [ 0, $_ ]) for @members;
4234
4235							This callback is invoked before printing with L</print> only if no error
4236							occurred. The callback is invoked with two arguments: the current C<CSV>
4237							parser object and an array reference to the fields passed.
4238
4239							The return code of the callback is ignored.
4240
4241							sub max_4_fields {
4242							my ($csv, $row) = @_;
4243							@$row > 4 and splice @$row, 4;
4244							} # max_4_fields
4245
4246							csv (in => csv (in => "file.csv"), out => *STDOUT,
4247							callbacks => { before_print => \&max_4_fields });
4248
4249							This callback is not active for L</combine>.
4250
4251							=back
4252
4253							=head3 Callbacks for csv ()
4254
4255							The L</csv> allows for some callbacks that do not integrate in XS internals
4256							but only feature the L</csv> function.
4257
4258							csv (in => "file.csv",
4259							callbacks => {
4260							filter => { 6 => sub { $_ > 15 } }, # first
4261							after_parse => sub { say "AFTER PARSE"; }, # first
4262							after_in => sub { say "AFTER IN"; }, # second
4263							on_in => sub { say "ON IN"; }, # third
4264							},
4265							);
4266
4267							csv (in => $aoh,
4268							out => "file.csv",
4269							callbacks => {
4270							on_in => sub { say "ON IN"; }, # first
4271							before_out => sub { say "BEFORE OUT"; }, # second
4272							before_print => sub { say "BEFORE PRINT"; }, # third
4273							},
4274							);
4275
4276							=over 2
4277
4278							=item filter
4279							X<filter>
4280
4281							This callback can be used to filter records. It is called just after a new
4282							record has been scanned. The callback accepts a:
4283
4284							=over 2
4285
4286							=item hashref
4287
4288							The keys are the index to the row (the field name or field number, 1-based)
4289							and the values are subs to return a true or false value.
4290
4291							csv (in => "file.csv", filter => {
4292							3 => sub { m/a/ }, # third field should contain an "a"
4293							5 => sub { length > 4 }, # length of the 5th field minimal 5
4294							});
4295
4296							csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
4297
4298							If the keys to the filter hash contain any character that is not a digit it
4299							will also implicitly set L</headers> to C<"auto"> unless L</headers> was
4300							already passed as argument. When headers are active, returning an array of
4301							hashes, the filter is not applicable to the header itself.
4302
4303							All sub results should match, as in AND.
4304
4305							The context of the callback sets C<$_> localized to the field indicated by
4306							the filter. The two arguments are as with all other callbacks, so the other
4307							fields in the current row can be seen:
4308
4309							filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
4310
4311							If the context is set to return a list of hashes (L</headers> is defined),
4312							the current record will also be available in the localized C<%_>:
4313
4314							filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
4315
4316							If the filter is used to I<alter> the content by changing C<$_>, make sure
4317							that the sub returns true in order not to have that record skipped:
4318
4319							filter => { 2 => sub { $_ = uc }}
4320
4321							will upper-case the second field, and then skip it if the resulting content
4322							evaluates to false. To always accept, end with truth:
4323
4324							filter => { 2 => sub { $_ = uc; 1 }}
4325
4326							=item coderef
4327
4328							csv (in => "file.csv", filter => sub { $n++; 0; });
4329
4330							If the argument to C<filter> is a coderef, it is an alias or shortcut to a
4331							filter on column 0:
4332
4333							csv (filter => sub { $n++; 0 });
4334
4335							is equal to
4336
4337							csv (filter => { 0 => sub { $n++; 0 });
4338
4339							=item filter-name
4340
4341							csv (in => "file.csv", filter => "not_blank");
4342							csv (in => "file.csv", filter => "not_empty");
4343							csv (in => "file.csv", filter => "filled");
4344
4345							These are predefined filters
4346
4347							Given a file like (line numbers prefixed for doc purpose only):
4348
4349							1:1,2,3
4350							2:
4351							3:,
4352							4:""
4353							5:,,
4354							6:, ,
4355							7:"",
4356							8:" "
4357							9:4,5,6
4358
4359							=over 2
4360
4361							=item not_blank
4362
4363							Filter out the blank lines
4364
4365							This filter is a shortcut for
4366
4367							filter => { 0 => sub { @{$_[1]} > 1 or
4368							defined $_[1][0] && $_[1][0] ne "" } }
4369
4370							Due to the implementation, it is currently impossible to also filter lines
4371							that consists only of a quoted empty field. These lines are also considered
4372							blank lines.
4373
4374							With the given example, lines 2 and 4 will be skipped.
4375
4376							=item not_empty
4377
4378							Filter out lines where all the fields are empty.
4379
4380							This filter is a shortcut for
4381
4382							filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
4383
4384							A space is not regarded being empty, so given the example data, lines 2, 3,
4385							4, 5, and 7 are skipped.
4386
4387							=item filled
4388
4389							Filter out lines that have no visible data
4390
4391							This filter is a shortcut for
4392
4393							filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
4394
4395							This filter rejects all lines that I<not> have at least one field that does
4396							not evaluate to the empty string.
4397
4398							With the given example data, this filter would skip lines 2 through 8.
4399
4400							=back
4401
4402							=back
4403
4404							One could also use modules like L<Types::Standard>:
4405
4406							use Types::Standard -types;
4407
4408							my $type = Tuple[Str, Str, Int, Bool, Optional[Num]];
4409							my $check = $type->compiled_check;
4410
4411							# filter with compiled check and warnings
4412							my $aoa = csv (
4413							in => \$data,
4414							filter => {
4415							0 => sub {
4416							my $ok = $check->($_[1]) or
4417							warn $type->get_message ($_[1]), "\n";
4418							return $ok;
4419							},
4420							},
4421							);
4422
4423							=item after_in
4424							X<after_in>
4425
4426							This callback is invoked for each record after all records have been parsed
4427							but before returning the reference to the caller. The hook is invoked with
4428							two arguments: the current C<CSV> parser object and a reference to the
4429							record. The reference can be a reference to a HASH or a reference to an
4430							ARRAY as determined by the arguments.
4431
4432							This callback can also be passed as an attribute without the C<callbacks>
4433							wrapper.
4434
4435							=item before_out
4436							X<before_out>
4437
4438							This callback is invoked for each record before the record is printed. The
4439							hook is invoked with two arguments: the current C<CSV> parser object and a
4440							reference to the record. The reference can be a reference to a HASH or a
4441							reference to an ARRAY as determined by the arguments.
4442
4443							This callback can also be passed as an attribute without the C<callbacks>
4444							wrapper.
4445
4446							This callback makes the row available in C<%_> if the row is a hashref. In
4447							this case C<%_> is writable and will change the original row.
4448
4449							=item on_in
4450							X<on_in>
4451
4452							This callback acts exactly as the L</after_in> or the L</before_out> hooks.
4453
4454							This callback can also be passed as an attribute without the C<callbacks>
4455							wrapper.
4456
4457							This callback makes the row available in C<%_> if the row is a hashref. In
4458							this case C<%_> is writable and will change the original row. So e.g. with
4459
4460							my $aoh = csv (
4461							in => \"foo\n1\n2\n",
4462							headers => "auto",
4463							on_in => sub { $_{bar} = 2; },
4464							);
4465
4466							C<$aoh> will be:
4467
4468							[ { foo => 1,
4469							bar => 2,
4470							}
4471							{ foo => 2,
4472							bar => 2,
4473							}
4474							]
4475
4476							=item on_error
4477							X<on_error>
4478
4479							This callback acts exactly as the L</error> hook.
4480
4481							my @err;
4482							my $aoa = csv (in => $fh, on_error => sub { @err = @_ });
4483
4484							is identical to
4485
4486							my $aoa = csv (in => $fh, callbacks => {
4487							error => sub { @err = @_ },
4488							});
4489
4490							It can be used for ignoring errors as well as for just keeping the error in
4491							case of analysis after the C<csv ()> function has returned.
4492
4493							my @err;
4494							my $aoa = csv (in => "bad.csv, on_error => sub { @err = @_ });
4495							die Text::CSV_XS->error_diag if @err or !$aoa;
4496
4497							=back
4498
4499							=head1 INTERNALS
4500
4501							=over 4
4502
4503							=item Combine (...)
4504
4505							=item Parse (...)
4506
4507							=back
4508
4509							The arguments to these internal functions are deliberately not described or
4510							documented in order to enable the module authors make changes it when they
4511							feel the need for it. Using them is highly discouraged as the API may
4512							change in future releases.
4513
4514							=head1 EXAMPLES
4515
4516							=head2 Reading a CSV file line by line:
4517
4518							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4519							open my $fh, "<", "file.csv" or die "file.csv: $!";
4520							while (my $row = $csv->getline ($fh)) {
4521							# do something with @$row
4522							}
4523							close $fh or die "file.csv: $!";
4524
4525							or
4526
4527							my $aoa = csv (in => "file.csv", on_in => sub {
4528							# do something with %_
4529							});
4530
4531							=head3 Reading only a single column
4532
4533							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4534							open my $fh, "<", "file.csv" or die "file.csv: $!";
4535							# get only the 4th column
4536							my @column = map { $_->[3] } @{$csv->getline_all ($fh)};
4537							close $fh or die "file.csv: $!";
4538
4539							with L</csv>, you could do
4540
4541							my @column = map { $_->[0] }
4542							@{csv (in => "file.csv", fragment => "col=4")};
4543
4544							=head2 Parsing CSV strings:
4545
4546							my $csv = Text::CSV_XS->new ({ keep_meta_info => 1, binary => 1 });
4547
4548							my $sample_input_string =
4549							qq{"I said, ""Hi!""",Yes,"",2.34,,"1.09","\x{20ac}",};
4550							if ($csv->parse ($sample_input_string)) {
4551							my @field = $csv->fields;
4552							foreach my $col (0 .. $#field) {
4553							my $quo = $csv->is_quoted ($col) ? $csv->{quote_char} : "";
4554							printf "%2d: %s%s%s\n", $col, $quo, $field[$col], $quo;
4555							}
4556							}
4557							else {
4558							print STDERR "parse () failed on argument: ",
4559							$csv->error_input, "\n";
4560							$csv->error_diag ();
4561							}
4562
4563							=head3 Parsing CSV from memory
4564
4565							Given a complete CSV data-set in scalar C<$data>, generate a list of lists
4566							to represent the rows and fields
4567
4568							# The data
4569							my $data = join "\r\n" => map { join "," => 0 .. 5 } 0 .. 5;
4570
4571							# in a loop
4572							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4573							open my $fh, "<", \$data;
4574							my @foo;
4575							while (my $row = $csv->getline ($fh)) {
4576							push @foo, $row;
4577							}
4578							close $fh;
4579
4580							# a single call
4581							my $foo = csv (in => \$data);
4582
4583							=head2 Printing CSV data
4584
4585							=head3 The fast way: using L</print>
4586
4587							An example for creating C<CSV> files using the L</print> method:
4588
4589							my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
4590							open my $fh, ">", "foo.csv" or die "foo.csv: $!";
4591							for (1 .. 10) {
4592							$csv->print ($fh, [ $_, "$_" ]) or $csv->error_diag;
4593							}
4594							close $fh or die "$tbl.csv: $!";
4595
4596							=head3 The slow way: using L</combine> and L</string>
4597
4598							or using the slower L</combine> and L</string> methods:
4599
4600							my $csv = Text::CSV_XS->new;
4601
4602							open my $csv_fh, ">", "hello.csv" or die "hello.csv: $!";
4603
4604							my @sample_input_fields = (
4605							'You said, "Hello!"', 5.67,
4606							'"Surely"', '', '3.14159');
4607							if ($csv->combine (@sample_input_fields)) {
4608							print $csv_fh $csv->string, "\n";
4609							}
4610							else {
4611							print "combine () failed on argument: ",
4612							$csv->error_input, "\n";
4613							}
4614							close $csv_fh or die "hello.csv: $!";
4615
4616							=head3 Generating CSV into memory
4617
4618							Format a data-set (C<@foo>) into a scalar value in memory (C<$data>):
4619
4620							# The data
4621							my @foo = map { [ 0 .. 5 ] } 0 .. 3;
4622
4623							# in a loop
4624							my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1, eol => "\r\n" });
4625							open my $fh, ">", \my $data;
4626							$csv->print ($fh, $_) for @foo;
4627							close $fh;
4628
4629							# a single call
4630							csv (in => \@foo, out => \my $data);
4631
4632							=head2 Rewriting CSV
4633
4634							=head3 Changing separator
4635
4636							Rewrite C<CSV> files with C<;> as separator character to well-formed C<CSV>:
4637
4638							use Text::CSV_XS qw( csv );
4639							csv (in => csv (in => "bad.csv", sep_char => ";"), out => *STDOUT);
4640
4641							As C<STDOUT> is now default in L</csv>, a one-liner converting a UTF-16 CSV
4642							file with BOM and TAB-separation to valid UTF-8 CSV could be:
4643
4644							$ perl -C3 -MText::CSV_XS=csv -we\
4645							'csv(in=>"utf16tab.csv",encoding=>"utf16",sep=>"\t")' >utf8.csv
4646
4647							=head3 Unifying EOL
4648
4649							Rewrite a CSV file with mixed EOL and/or inconsistent quotation into a new
4650							CSV file with consistent EOL and quotation. Attributes apply.
4651
4652							use Text::CSV_XS qw( csv );
4653							csv (in => "file.csv", out => "newfile.csv", quote_space => 1);
4654
4655							=head2 Dumping database tables to CSV
4656
4657							Dumping a database table can be simple as this (TIMTOWTDI):
4658
4659							my $dbh = DBI->connect (...);
4660							my $sql = "select * from foo";
4661
4662							# using your own loop
4663							open my $fh, ">", "foo.csv" or die "foo.csv: $!\n";
4664							my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\r\n" });
4665							my $sth = $dbh->prepare ($sql); $sth->execute;
4666							$csv->print ($fh, $sth->{NAME_lc});
4667							while (my $row = $sth->fetch) {
4668							$csv->print ($fh, $row);
4669							}
4670
4671							# using the csv function, all in memory
4672							csv (out => "foo.csv", in => $dbh->selectall_arrayref ($sql));
4673
4674							# using the csv function, streaming with callbacks
4675							my $sth = $dbh->prepare ($sql); $sth->execute;
4676							csv (out => "foo.csv", in => sub { $sth->fetch });
4677							csv (out => "foo.csv", in => sub { $sth->fetchrow_hashref });
4678
4679							Note that this does not discriminate between "empty" values and NULL-values
4680							from the database, as both will be the same empty field in CSV. To enable
4681							distinction between the two, use L<C<quote_empty>\|/quote_empty>.
4682
4683							csv (out => "foo.csv", in => sub { $sth->fetch }, quote_empty => 1);
4684
4685							If the database import utility supports special sequences to insert C<NULL>
4686							values into the database, like MySQL/MariaDB supports C<\N>, use a filter
4687							or a map
4688
4689							csv (out => "foo.csv", in => sub { $sth->fetch },
4690							on_in => sub { $_ //= "\\N" for @{$_[1]} });
4691
4692							while (my $row = $sth->fetch) {
4693							$csv->print ($fh, [ map { $_ // "\\N" } @$row ]);
4694							}
4695
4696							Note that this will not work as expected when choosing the backslash (C<\>)
4697							as C<escape_char>, as that will cause the C<\> to need to be escaped by yet
4698							another C<\>, which will cause the field to need quotation and thus ending
4699							up as C<"\\N"> instead of C<\N>. See also L<C<undef_str>\|/undef_str>.
4700
4701							csv (out => "foo.csv", in => sub { $sth->fetch }, undef_str => "\\N");
4702
4703							These special sequences are not recognized by Text::CSV_XS on parsing the
4704							CSV generated like this, but map and filter are your friends again
4705
4706							while (my $row = $csv->getline ($fh)) {
4707							$sth->execute (map { $_ eq "\\N" ? undef : $_ } @$row);
4708							}
4709
4710							csv (in => "foo.csv", filter => { 1 => sub {
4711							$sth->execute (map { $_ eq "\\N" ? undef : $_ } @{$_[1]}); 0; }});
4712
4713							=head2 Converting CSV to JSON
4714
4715							use Text::CSV_XS qw( csv );
4716							use JSON; # or Cpanel::JSON::XS for better performance
4717
4718							# AoA (no header interpretation)
4719							say encode_json (csv (in => "file.csv"));
4720
4721							# AoH (convert to structures)
4722							say encode_json (csv (in => "file.csv", bom => 1));
4723
4724							Yes, it is that simple.
4725
4726							=head2 The examples folder
4727
4728							For more extended examples, see the F<examples/> C<1>. sub-directory in the
4729							original distribution or the git repository C<2>.
4730
4731							1. https://github.com/Tux/Text-CSV_XS/tree/master/examples
4732							2. https://github.com/Tux/Text-CSV_XS
4733
4734							The following files can be found there:
4735
4736							=over 2
4737
4738							=item parser-xs.pl
4739							X<parser-xs.pl>
4740
4741							This can be used as a boilerplate to parse invalid C<CSV> and parse beyond
4742							(expected) errors alternative to using the L</error> callback.
4743
4744							$ perl examples/parser-xs.pl bad.csv >good.csv
4745
4746							=item csv-check
4747							X<csv-check>
4748
4749							This is a command-line tool that uses parser-xs.pl techniques to check the
4750							C<CSV> file and report on its content.
4751
4752							$ csv-check files/utf8.csv
4753							Checked files/utf8.csv with csv-check 1.9
4754							using Text::CSV_XS 1.32 with perl 5.26.0 and Unicode 9.0.0
4755							OK: rows: 1, columns: 2
4756							sep = <,>, quo = <">, bin = <1>, eol = <"\n">
4757
4758							=item csv-split
4759							X<csv-split>
4760
4761							This command splits C<CSV> files into smaller files, keeping (part of) the
4762							header. Options include maximum number of (data) rows per file and maximum
4763							number of columns per file or a combination of the two.
4764
4765							=item csv2xls
4766							X<csv2xls>
4767
4768							A script to convert C<CSV> to Microsoft Excel (C<XLS>). This requires extra
4769							modules L<Date::Calc> and L<Spreadsheet::WriteExcel>. The converter accepts
4770							various options and can produce UTF-8 compliant Excel files.
4771
4772							=item csv2xlsx
4773							X<csv2xlsx>
4774
4775							A script to convert C<CSV> to Microsoft Excel (C<XLSX>). This requires the
4776							modules L<Date::Calc> and L<Spreadsheet::Writer::XLSX>. The converter does
4777							accept various options including merging several C<CSV> files into a single
4778							Excel file.
4779
4780							=item csvdiff
4781							X<csvdiff>
4782
4783							A script that provides colorized diff on sorted CSV files, assuming first
4784							line is header and first field is the key. Output options include colorized
4785							ANSI escape codes or HTML.
4786
4787							$ csvdiff --html --output=diff.html file1.csv file2.csv
4788
4789							=item rewrite.pl
4790							X<rewrite.pl>
4791
4792							A script to rewrite (in)valid CSV into valid CSV files. Script has options
4793							to generate confusing CSV files or CSV files that conform to Dutch MS-Excel
4794							exports (using C<;> as separation).
4795
4796							Script - by default - honors BOM and auto-detects separation converting it
4797							to default standard CSV with C<,> as separator.
4798
4799							=back
4800
4801							=head1 CAVEATS
4802
4803							Text::CSV_XS is I<not> designed to detect the characters used to quote and
4804							separate fields. The parsing is done using predefined (default) settings.
4805							In the examples sub-directory, you can find scripts that demonstrate how
4806							you could try to detect these characters yourself.
4807
4808							=head2 Microsoft Excel
4809
4810							The import/export from Microsoft Excel is a I<risky task>, according to the
4811							documentation in C<Text::CSV::Separator>. Microsoft uses the system's list
4812							separator defined in the regional settings, which happens to be a semicolon
4813							for Dutch, German and Spanish (and probably some others as well). For the
4814							English locale, the default is a comma. In Windows however, the user is
4815							free to choose a predefined locale, and then change I<every> individual
4816							setting in it, so checking the locale is no solution.
4817
4818							As of version 1.17, a lone first line with just
4819
4820							sep=;
4821
4822							will be recognized and honored when parsing with L</getline>.
4823
4824							=head1 TODO
4825
4826							=over 2
4827
4828							=item More Errors & Warnings
4829
4830							New extensions ought to be clear and concise in reporting what error has
4831							occurred where and why, and maybe also offer a remedy to the problem.
4832
4833							L</error_diag> is a (very) good start, but there is more work to be done in
4834							this area.
4835
4836							Basic calls should croak or warn on illegal parameters. Errors should be
4837							documented.
4838
4839							=item setting meta info
4840
4841							Future extensions might include extending the L</meta_info>, L</is_quoted>,
4842							and L</is_binary> to accept setting these flags for fields, so you can
4843							specify which fields are quoted in the L</combine>/L</string> combination.
4844
4845							$csv->meta_info (0, 1, 1, 3, 0, 0);
4846							$csv->is_quoted (3, 1);
4847
4848							L<Metadata Vocabulary for Tabular Data\|http://w3c.github.io/csvw/metadata/>
4849							(a W3C editor's draft) could be an example for supporting more metadata.
4850
4851							=item Parse the whole file at once
4852
4853							Implement new methods or functions that enable parsing of a complete file
4854							at once, returning a list of hashes. Possible extension to this could be to
4855							enable a column selection on the call:
4856
4857							my @AoH = $csv->parse_file ($filename, { cols => [ 1, 4..8, 12 ]});
4858
4859							returning something like
4860
4861							[ { fields => [ 1, 2, "foo", 4.5, undef, "", 8 ],
4862							flags => [ ... ],
4863							},
4864							{ fields => [ ... ],
4865							.
4866							},
4867							]
4868
4869							Note that the L</csv> function already supports most of this, but does not
4870							return flags. L</getline_all> returns all rows for an open stream, but this
4871							will not return flags either. L</fragment> can reduce the required rows
4872							I<or> columns, but cannot combine them.
4873
4874							=item provider
4875
4876							csv (in => $fh) vs csv (provider => sub { get_line });
4877
4878							Whatever the attribute name might end up to be, this should make it easier
4879							to add input providers for parsing. Currently most special variations for
4880							the C<in> attribute are aimed at CSV generation: e.g. a callback is defined
4881							to return a reference to a record. This new attribute should enable passing
4882							data to parse, like getline.
4883
4884							Suggested by Johan Vromans.
4885
4886							=item Cookbook
4887
4888							Write a document that has recipes for most known non-standard (and maybe
4889							some standard) C<CSV> formats, including formats that use C<TAB>, C<;>,
4890							C<\|>, or other non-comma separators.
4891
4892							Examples could be taken from W3C's L<CSV on the Web: Use Cases and
4893							Requirements\|http://w3c.github.io/csvw/use-cases-and-requirements/index.html>
4894
4895							=item Steal
4896
4897							Steal good new ideas and features from L<PapaParse\|http://papaparse.com> or
4898							L<csvkit\|http://csvkit.readthedocs.org>.
4899
4900							=item Raku support
4901
4902							Raku support can be found L<here\|https://github.com/Tux/CSV>. The interface
4903							is richer in support than the Perl5 API, as Raku supports more types.
4904
4905							The Raku version does not (yet) support pure binary CSV datasets.
4906
4907							=back
4908
4909							=head2 NOT TODO
4910
4911							=over 2
4912
4913							=item combined methods
4914
4915							Requests for adding means (methods) that combine L</combine> and L</string>
4916							in a single call will B<not> be honored (use L</print> instead). Likewise
4917							for L</parse> and L</fields> (use L</getline> instead), given the problems
4918							with embedded newlines.
4919
4920							=back
4921
4922							=head2 Release plan
4923
4924							No guarantees, but this is what I had in mind some time ago:
4925
4926							=over 2
4927
4928							=item *
4929
4930							DIAGNOSTICS section in pod to describe the errors (see below)
4931
4932							=back
4933
4934							=head1 EBCDIC
4935
4936							Everything should now work on native EBCDIC systems. As the test does not
4937							cover all possible codepoints and L<Encode> does not support C<utf-ebcdic>,
4938							there is no guarantee that all handling of Unicode is done correct.
4939
4940							Opening C<EBCDIC> encoded files on C<ASCII>+ systems is likely to succeed
4941							using Encode's C<cp37>, C<cp1047>, or C<posix-bc>:
4942
4943							open my $fh, "<:encoding(cp1047)", "ebcdic_file.csv" or die "...";
4944
4945							=head1 DIAGNOSTICS
4946
4947							Still under construction ...
4948
4949							If an error occurs, C<< $csv->error_diag >> can be used to get information
4950							on the cause of the failure. Note that for speed reasons the internal value
4951							is never cleared on success, so using the value returned by L</error_diag>
4952							in normal cases - when no error occurred - may cause unexpected results.
4953
4954							If the constructor failed, the cause can be found using L</error_diag> as a
4955							class method, like C<< Text::CSV_XS->error_diag >>.
4956
4957							The C<< $csv->error_diag >> method is automatically invoked upon error when
4958							the contractor was called with L<C<auto_diag>\|/auto_diag> set to C<1> or
4959							C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
4960							C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
4961							EOF> is excluded from L<C<auto_diag>\|/auto_diag> reports.
4962
4963							Errors can be (individually) caught using the L</error> callback.
4964
4965							The errors as described below are available. I have tried to make the error
4966							itself explanatory enough, but more descriptions will be added. For most of
4967							these errors, the first three capitals describe the error category:
4968
4969							=over 2
4970
4971							=item *
4972							INI
4973
4974							Initialization error or option conflict.
4975
4976							=item *
4977							ECR
4978
4979							Carriage-Return related parse error.
4980
4981							=item *
4982							EOF
4983
4984							End-Of-File related parse error.
4985
4986							=item *
4987							EIQ
4988
4989							Parse error inside quotation.
4990
4991							=item *
4992							EIF
4993
4994							Parse error inside field.
4995
4996							=item *
4997							ECB
4998
4999							Combine error.
5000
5001							=item *
5002							EHR
5003
5004							HashRef parse related error.
5005
5006							=back
5007
5008							And below should be the complete list of error codes that can be returned:
5009
5010							=over 2
5011
5012							=item *
5013							1001 "INI - sep_char is equal to quote_char or escape_char"
5014							X<1001>
5015
5016							The L<separation character\|/sep_char> cannot be equal to L<the quotation
5017							character\|/quote_char> or to L<the escape character\|/escape_char>, as this
5018							would invalidate all parsing rules.
5019
5020							=item *
5021							1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
5022							X<1002>
5023
5024							Using the L<C<allow_whitespace>\|/allow_whitespace> attribute when either
5025							L<C<quote_char>\|/quote_char> or L<C<escape_char>\|/escape_char> is equal to
5026							C<SPACE> or C<TAB> is too ambiguous to allow.
5027
5028							=item *
5029							1003 "INI - \r or \n in main attr not allowed"
5030							X<1003>
5031
5032							Using default L<C<eol>\|/eol> characters in either L<C<sep_char>\|/sep_char>,
5033							L<C<quote_char>\|/quote_char>, or L<C<escape_char>\|/escape_char> is not
5034							allowed.
5035
5036							=item *
5037							1004 "INI - callbacks should be undef or a hashref"
5038							X<1004>
5039
5040							The L<C<callbacks>\|/Callbacks> attribute only allows one to be C<undef> or
5041							a hash reference.
5042
5043							=item *
5044							1005 "INI - EOL too long"
5045							X<1005>
5046
5047							The value passed for EOL is exceeding its maximum length (16).
5048
5049							=item *
5050							1006 "INI - SEP too long"
5051							X<1006>
5052
5053							The value passed for SEP is exceeding its maximum length (16).
5054
5055							=item *
5056							1007 "INI - QUOTE too long"
5057							X<1007>
5058
5059							The value passed for QUOTE is exceeding its maximum length (16).
5060
5061							=item *
5062							1008 "INI - SEP undefined"
5063							X<1008>
5064
5065							The value passed for SEP should be defined and not empty.
5066
5067							=item *
5068							1010 "INI - the header is empty"
5069							X<1010>
5070
5071							The header line parsed in the L</header> is empty.
5072
5073							=item *
5074							1011 "INI - the header contains more than one valid separator"
5075							X<1011>
5076
5077							The header line parsed in the L</header> contains more than one (unique)
5078							separator character out of the allowed set of separators.
5079
5080							=item *
5081							1012 "INI - the header contains an empty field"
5082							X<1012>
5083
5084							The header line parsed in the L</header> contains an empty field.
5085
5086							=item *
5087							1013 "INI - the header contains non-unique fields"
5088							X<1013>
5089
5090							The header line parsed in the L</header> contains at least two identical
5091							fields.
5092
5093							=item *
5094							1014 "INI - header called on undefined stream"
5095							X<1014>
5096
5097							The header line cannot be parsed from an undefined source.
5098
5099							=item *
5100							1500 "PRM - Invalid/unsupported argument(s)"
5101							X<1500>
5102
5103							Function or method called with invalid argument(s) or parameter(s).
5104
5105							=item *
5106							1501 "PRM - The key attribute is passed as an unsupported type"
5107							X<1501>
5108
5109							The C<key> attribute is of an unsupported type.
5110
5111							=item *
5112							1502 "PRM - The value attribute is passed without the key attribute"
5113							X<1502>
5114
5115							The C<value> attribute is only allowed when a valid key is given.
5116
5117							=item *
5118							1503 "PRM - The value attribute is passed as an unsupported type"
5119							X<1503>
5120
5121							The C<value> attribute is of an unsupported type.
5122
5123							=item *
5124							2010 "ECR - QUO char inside quotes followed by CR not part of EOL"
5125							X<2010>
5126
5127							When L<C<eol>\|/eol> has been set to anything but the default, like
5128							C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
5129							L<C<quote_char>\|/quote_char>, where the characters following the C<"\r"> do
5130							not make up the L<C<eol>\|/eol> sequence, this is an error.
5131
5132							=item *
5133							2011 "ECR - Characters after end of quoted field"
5134							X<2011>
5135
5136							Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
5137							field and after the closing double-quote, there should be either a new-line
5138							sequence or a separation character.
5139
5140							=item *
5141							2012 "EOF - End of data in parsing input stream"
5142							X<2012>
5143
5144							Self-explaining. End-of-file while inside parsing a stream. Can happen only
5145							when reading from streams with L</getline>, as using L</parse> is done on
5146							strings that are not required to have a trailing L<C<eol>\|/eol>.
5147
5148							=item *
5149							2013 "INI - Specification error for fragments RFC7111"
5150							X<2013>
5151
5152							Invalid specification for URI L</fragment> specification.
5153
5154							=item *
5155							2014 "ENF - Inconsistent number of fields"
5156							X<2014>
5157
5158							Inconsistent number of fields under strict parsing.
5159
5160							=item *
5161							2015 "ERW - Empty row"
5162							X<2015>
5163
5164							An empty row was not allowed.
5165
5166							=item *
5167							2016 "EOL - Inconsistent EOL"
5168							X<2016>
5169
5170							Inconsistent End-Of-Line detected under strict_eol parsing.
5171
5172							=item *
5173							2021 "EIQ - NL char inside quotes, binary off"
5174							X<2021>
5175
5176							Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
5177							has been selected with the constructor.
5178
5179							=item *
5180							2022 "EIQ - CR char inside quotes, binary off"
5181							X<2022>
5182
5183							Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
5184							has been selected with the constructor.
5185
5186							=item *
5187							2023 "EIQ - QUO character not allowed"
5188							X<2023>
5189
5190							Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
5191							will cause this error.
5192
5193							=item *
5194							2024 "EIQ - EOF cannot be escaped, not even inside quotes"
5195							X<2024>
5196
5197							The escape character is not allowed as last character in an input stream.
5198
5199							=item *
5200							2025 "EIQ - Loose unescaped escape"
5201							X<2025>
5202
5203							An escape character should escape only characters that need escaping.
5204
5205							Allowing the escape for other characters is possible with the attribute
5206							L</allow_loose_escapes>.
5207
5208							=item *
5209							2026 "EIQ - Binary character inside quoted field, binary off"
5210							X<2026>
5211
5212							Binary characters are not allowed by default. Exceptions are fields that
5213							contain valid UTF-8, that will automatically be upgraded if the content is
5214							valid UTF-8. Set L<C<binary>\|/binary> to C<1> to accept binary data.
5215
5216							=item *
5217							2027 "EIQ - Quoted field not terminated"
5218							X<2027>
5219
5220							When parsing a field that started with a quotation character, the field is
5221							expected to be closed with a quotation character. When the parsed line is
5222							exhausted before the quote is found, that field is not terminated.
5223
5224							=item *
5225							2030 "EIF - NL char inside unquoted verbatim, binary off"
5226							X<2030>
5227
5228							=item *
5229							2031 "EIF - CR char is first char of field, not part of EOL"
5230							X<2031>
5231
5232							=item *
5233							2032 "EIF - CR char inside unquoted, not part of EOL"
5234							X<2032>
5235
5236							=item *
5237							2034 "EIF - Loose unescaped quote"
5238							X<2034>
5239
5240							=item *
5241							2035 "EIF - Escaped EOF in unquoted field"
5242							X<2035>
5243
5244							=item *
5245							2036 "EIF - ESC error"
5246							X<2036>
5247
5248							=item *
5249							2037 "EIF - Binary character in unquoted field, binary off"
5250							X<2037>
5251
5252							=item *
5253							2110 "ECB - Binary character in Combine, binary off"
5254							X<2110>
5255
5256							=item *
5257							2200 "EIO - print to IO failed. See errno"
5258							X<2200>
5259
5260							=item *
5261							3001 "EHR - Unsupported syntax for column_names ()"
5262							X<3001>
5263
5264							=item *
5265							3002 "EHR - getline_hr () called before column_names ()"
5266							X<3002>
5267
5268							=item *
5269							3003 "EHR - bind_columns () and column_names () fields count mismatch"
5270							X<3003>
5271
5272							=item *
5273							3004 "EHR - bind_columns () only accepts refs to scalars"
5274							X<3004>
5275
5276							=item *
5277							3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
5278							X<3006>
5279
5280							=item *
5281							3007 "EHR - bind_columns needs refs to writable scalars"
5282							X<3007>
5283
5284							=item *
5285							3008 "EHR - unexpected error in bound fields"
5286							X<3008>
5287
5288							=item *
5289							3009 "EHR - print_hr () called before column_names ()"
5290							X<3009>
5291
5292							=item *
5293							3010 "EHR - print_hr () called with invalid arguments"
5294							X<3010>
5295
5296							=back
5297
5298							=head1 SEE ALSO
5299
5300							L<IO::File>, L<IO::Handle>, L<IO::Wrap>, L<Text::CSV>, L<Text::CSV_PP>,
5301							L<Text::CSV::Encoded>, L<Text::CSV::Separator>, L<Text::CSV::Slurp>,
5302							L<Spreadsheet::CSV> and L<Spreadsheet::Read>, and of course L<perl>.
5303
5304							If you are using Raku, have a look at C<Text::CSV> in the Raku ecosystem,
5305							offering the same features.
5306
5307							A beautiful L<Love Letter\|https://github.com/medialab/xan/blob/master/docs/LOVE_LETTER.md>
5308							to C<CSV> by the developers of L<xan\|https://github.com/medialab/xan#readme>.
5309
5310							=head3 non-perl
5311
5312							A CSV parser in JavaScript, also used by L<W3C\|http://www.w3.org>, is the
5313							multi-threaded in-browser L<PapaParse\|http://papaparse.com/>.
5314
5315							L<csvkit\|http://csvkit.readthedocs.org> is a python CSV parsing toolkit.
5316
5317							=head1 AUTHOR
5318
5319							Alan Citterman F<E<lt>alan@mfgrtl.comE<gt>> wrote the original Perl module.
5320							Please don't send mail concerning Text::CSV_XS to Alan, who is not involved
5321							in the C/XS part that is now the main part of the module.
5322
5323							Jochen Wiedmann F<E<lt>joe@ispsoft.deE<gt>> rewrote the en- and decoding in
5324							C by implementing a simple finite-state machine. He added variable quote,
5325							escape and separator characters, the binary mode and the print and getline
5326							methods. See F<ChangeLog> releases 0.10 through 0.23.
5327
5328							H.Merijn Brand F<E<lt>hmbrand@cpan.orgE<gt>> cleaned up the code, added
5329							the field flags methods, wrote the major part of the test suite, completed
5330							the documentation, fixed most RT bugs, added all the allow flags and the
5331							L</csv> function. See ChangeLog releases 0.25 and on.
5332
5333							=head1 COPYRIGHT AND LICENSE
5334
5335							Copyright (C) 2007-2025 H.Merijn Brand. All rights reserved.
5336							Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
5337							Copyright (C) 1997 Alan Citterman. All rights reserved.
5338
5339							This library is free software; you can redistribute and/or modify it under
5340							the same terms as Perl itself.
5341
5342							=cut
5343
5344							=for elvis
5345							:ex:se gw=75\|color guide #ff0000:
5346
5347							=cut