File Coverage

blib/lib/Text/CSV_XS.pm
Criterion Covered Total %
statement 937 950 98.7
branch 740 790 93.6
condition 397 466 84.9
subroutine 90 90 100.0
pod 68 68 100.0
total 2232 2364 94.4


line stmt bran cond sub pod time code
1             package Text::CSV_XS;
2              
3             # Copyright (c) 2007-2025 H.Merijn Brand. All rights reserved.
4             # Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved.
5             # Copyright (c) 1997 Alan Citterman. All rights reserved.
6             #
7             # This program is free software; you can redistribute it and/or
8             # modify it under the same terms as Perl itself.
9              
10             # HISTORY
11             #
12             # 0.24 - H.Merijn Brand <perl5@tux.freedom.nl>
13             # 0.10 - 0.23 Jochen Wiedmann <joe@ispsoft.de>
14             # Based on (the original) Text::CSV by Alan Citterman <alan@mfgrtl.com>
15              
16             require 5.006001;
17              
18 34     34   4112731 use strict;
  34         107  
  34         1587  
19 34     34   232 use warnings;
  34         99  
  34         2639  
20              
21             require Exporter;
22 34     34   245 use XSLoader;
  34         61  
  34         1019  
23 34     34   172 use Carp;
  34         133  
  34         3176  
24              
25 34     34   248 use vars qw( $VERSION @ISA @EXPORT_OK %EXPORT_TAGS );
  34         105  
  34         11151  
26             $VERSION = "1.61";
27             @ISA = qw( Exporter );
28             XSLoader::load ("Text::CSV_XS", $VERSION);
29              
30 4     4 1 13 sub PV { 0 } sub CSV_TYPE_PV { PV }
  12     12 1 268176  
31 4     4 1 16 sub IV { 1 } sub CSV_TYPE_IV { IV }
  12     12 1 229950  
32 4     4 1 11 sub NV { 2 } sub CSV_TYPE_NV { NV }
  12     12 1 95  
33              
34 11     11 1 71 sub CSV_FLAGS_IS_QUOTED { 0x0001 }
35 12     12 1 70 sub CSV_FLAGS_IS_BINARY { 0x0002 }
36 4     4 1 26 sub CSV_FLAGS_ERROR_IN_FIELD { 0x0004 }
37 20     20 1 80 sub CSV_FLAGS_IS_MISSING { 0x0010 }
38              
39             %EXPORT_TAGS = (
40             CONSTANTS => [qw(
41             CSV_FLAGS_IS_QUOTED
42             CSV_FLAGS_IS_BINARY
43             CSV_FLAGS_ERROR_IN_FIELD
44             CSV_FLAGS_IS_MISSING
45              
46             CSV_TYPE_PV
47             CSV_TYPE_IV
48             CSV_TYPE_NV
49             )],
50             );
51             @EXPORT_OK = (qw( csv PV IV NV ), @{$EXPORT_TAGS{'CONSTANTS'}});
52              
53             if ($] < 5.008002) {
54 34     34   256 no warnings "redefine";
  34         112  
  34         544317  
55             *utf8::decode = sub {};
56             }
57              
58             # version
59             #
60             # class/object method expecting no arguments and returning the version
61             # number of Text::CSV. there are no side-effects.
62              
63             sub version {
64 2     2 1 1012 return $VERSION;
65             } # version
66              
67             # new
68             #
69             # class/object method expecting no arguments and returning a reference to
70             # a newly created Text::CSV object.
71              
72             my %def_attr = (
73             'eol' => '',
74             'sep_char' => ',',
75             'quote_char' => '"',
76             'escape_char' => '"',
77             'binary' => 0,
78             'decode_utf8' => 1,
79             'auto_diag' => 0,
80             'diag_verbose' => 0,
81             'strict' => 0,
82             'strict_eol' => 0,
83             'blank_is_undef' => 0,
84             'empty_is_undef' => 0,
85             'allow_whitespace' => 0,
86             'allow_loose_quotes' => 0,
87             'allow_loose_escapes' => 0,
88             'allow_unquoted_escape' => 0,
89             'always_quote' => 0,
90             'quote_empty' => 0,
91             'quote_space' => 1,
92             'quote_binary' => 1,
93             'escape_null' => 1,
94             'keep_meta_info' => 0,
95             'verbatim' => 0,
96             'formula' => 0,
97             'skip_empty_rows' => 0,
98             'undef_str' => undef,
99             'comment_str' => undef,
100             'types' => undef,
101             'callbacks' => undef,
102              
103             '_EOF' => "",
104             '_RECNO' => 0,
105             '_STATUS' => undef,
106             '_FIELDS' => undef,
107             '_FFLAGS' => undef,
108             '_STRING' => undef,
109             '_ERROR_INPUT' => undef,
110             '_COLUMN_NAMES' => undef,
111             '_BOUND_COLUMNS' => undef,
112             '_AHEAD' => undef,
113             '_FORMULA_CB' => undef,
114             '_EMPTROW_CB' => undef,
115              
116             'ENCODING' => undef,
117             );
118             my %attr_alias = (
119             'quote_always' => "always_quote",
120             'verbose_diag' => "diag_verbose",
121             'quote_null' => "escape_null",
122             'escape' => "escape_char",
123             'comment' => "comment_str",
124             );
125             my $last_err = Text::CSV_XS->SetDiag (0);
126             my $ebcdic = ord ("A") == 0xC1; # Faster than $Config{'ebcdic'}
127             my @internal_kh;
128              
129             # NOT a method: is also used before bless
130             sub _unhealthy_whitespace {
131 15734     15734   35355 my ($self, $aw) = @_;
132 15734 100       54426 $aw or return 0; # no checks needed without allow_whitespace
133              
134 3569         7710 my $quo = $self->{'quote'};
135 3569 100 100     13703 defined $quo && length ($quo) or $quo = $self->{'quote_char'};
136 3569         7341 my $esc = $self->{'escape_char'};
137              
138 3569 100 100     68167 defined $quo && $quo =~ m/^[ \t]/ and return 1002;
139 3327 100 100     75399 defined $esc && $esc =~ m/^[ \t]/ and return 1002;
140              
141 3037         9374 return 0;
142             } # _unhealty_whitespace
143              
144             sub _check_sanity {
145 12428     12428   20030 my $self = shift;
146              
147 12428         23496 my $eol = $self->{'eol'};
148 12428         21635 my $sep = $self->{'sep'};
149 12428 100 100     48530 defined $sep && length ($sep) or $sep = $self->{'sep_char'};
150 12428         20744 my $quo = $self->{'quote'};
151 12428 100 100     40755 defined $quo && length ($quo) or $quo = $self->{'quote_char'};
152 12428         23591 my $esc = $self->{'escape_char'};
153              
154             # use DP;::diag ("SEP: '", DPeek ($sep),
155             # "', QUO: '", DPeek ($quo),
156             # "', ESC: '", DPeek ($esc),"'");
157              
158             # sep_char should not be undefined
159 12428 100       33562 $sep ne "" or return 1008;
160 12426 100       28359 length ($sep) > 16 and return 1006;
161 12425 100       42665 $sep =~ m/[\r\n]/ and return 1003;
162              
163 12419 100       24998 if (defined $quo) {
164 12409 100       87299 $quo eq $sep and return 1001;
165 12181 100       26538 length ($quo) > 16 and return 1007;
166 12180 100       29343 $quo =~ m/[\r\n]/ and return 1003;
167             }
168 12184 100       23751 if (defined $esc) {
169 12168 100       67722 $esc eq $sep and return 1001;
170 12000 100       29671 $esc =~ m/[\r\n]/ and return 1003;
171             }
172 12010 100       23053 if (defined $eol) {
173 12005 100       23198 length ($eol) > 16 and return 1005;
174             }
175              
176 12009         33864 return _unhealthy_whitespace ($self, $self->{'allow_whitespace'});
177             } # _check_sanity
178              
179             sub known_attributes {
180 3     3 1 777 sort grep !m/^_/ => "sep", "quote", keys %def_attr;
181             } # known_attributes
182              
183             sub new {
184 1004     1004 1 72071138 $last_err = Text::CSV_XS->SetDiag (1000,
185             "usage: my \$csv = Text::CSV_XS->new ([{ option => value, ... }]);");
186              
187 1004         2541 my $proto = shift;
188 1004 100 100     6178 my $class = ref $proto || $proto or return;
189 1003 100 100     5908 @_ > 0 && ref $_[0] ne "HASH" and return;
190 995   100     2832 my $attr = shift || {};
191             my %attr = map {
192 2756 100       11666 my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
193 2756 100       6666 exists $attr_alias{$k} and $k = $attr_alias{$k};
194 2756         8807 ($k => $attr->{$_});
195 995         1748 } keys %{$attr};
  995         3728  
196              
197 995         3410 my $sep_aliased = 0;
198 995 100       2775 if (exists $attr{'sep'}) {
199 10         35 $attr{'sep_char'} = delete $attr{'sep'};
200 10         28 $sep_aliased = 1;
201             }
202 995         1714 my $quote_aliased = 0;
203 995 100       2476 if (exists $attr{'quote'}) {
204 25         80 $attr{'quote_char'} = delete $attr{'quote'};
205 25         44 $quote_aliased = 1;
206             }
207             exists $attr{'formula_handling'} and
208 995 100       2354 $attr{'formula'} = delete $attr{'formula_handling'};
209 995         1874 my $attr_formula = delete $attr{'formula'};
210              
211 995         2724 for (keys %attr) {
212 2720 100 100     10748 if (m/^[a-z]/ && exists $def_attr{$_}) {
213             # uncoverable condition false
214 2713 100 100     9899 defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_});
215 2713         4942 next;
216             }
217             # croak?
218 7         40 $last_err = Text::CSV_XS->SetDiag (1000, "INI - Unknown attribute '$_'");
219 7 100       27 $attr{'auto_diag'} and error_diag ();
220 7         48 return;
221             }
222 988 100       2581 if ($sep_aliased) {
223 10         59 my @b = unpack "U0C*", $attr{'sep_char'};
224 10 100       40 if (@b > 1) {
225 6         19 $attr{'sep'} = $attr{'sep_char'};
226 6         18 $attr{'sep_char'} = "\0";
227             }
228             else {
229 4         13 $attr{'sep'} = undef;
230             }
231             }
232 988 100 100     2608 if ($quote_aliased and defined $attr{'quote_char'}) {
233 21         84 my @b = unpack "U0C*", $attr{'quote_char'};
234 21 100       55 if (@b > 1) {
235 7         21 $attr{'quote'} = $attr{'quote_char'};
236 7         21 $attr{'quote_char'} = "\0";
237             }
238             else {
239 14         31 $attr{'quote'} = undef;
240             }
241             }
242              
243 988         24669 my $self = { %def_attr, %attr };
244 988 100       5100 if (my $ec = _check_sanity ($self)) {
245 35         142 $last_err = Text::CSV_XS->SetDiag ($ec);
246 35 100       82 $attr{'auto_diag'} and error_diag ();
247 35         291 return;
248             }
249 953 100 100     3618 if (defined $self->{'callbacks'} && ref $self->{'callbacks'} ne "HASH") {
250 6         1069 carp ("The 'callbacks' attribute is set but is not a hash: ignored\n");
251 6         86 $self->{'callbacks'} = undef;
252             }
253              
254 953         4771 $last_err = Text::CSV_XS->SetDiag (0);
255 953 100 100     3451 defined $\ && !exists $attr{'eol'} and $self->{'eol'} = $\;
256 953         2210 bless $self, $class;
257 953 100       2632 defined $self->{'types'} and $self->types ($self->{'types'});
258 953 50       3708 defined $self->{'skip_empty_rows'} and $self->{'skip_empty_rows'} = _supported_skip_empty_rows ($self, $self->{'skip_empty_rows'});
259 953 100       2420 defined $attr_formula and $self->{'formula'} = _supported_formula ($self, $attr_formula);
260 952         7356 $self;
261             } # new
262              
263             # Keep in sync with XS!
264             my %_cache_id = ( # Only expose what is accessed from within PM
265             'quote_char' => 0,
266             'escape_char' => 1,
267             'sep_char' => 2,
268             'always_quote' => 4,
269             'quote_empty' => 5,
270             'quote_space' => 6,
271             'quote_binary' => 7,
272             'allow_loose_quotes' => 8,
273             'allow_loose_escapes' => 9,
274             'allow_unquoted_escape' => 10,
275             'allow_whitespace' => 11,
276             'blank_is_undef' => 12,
277             'empty_is_undef' => 13,
278             'auto_diag' => 14,
279             'diag_verbose' => 15,
280             'escape_null' => 16,
281             'formula' => 18,
282             'decode_utf8' => 21,
283             'verbatim' => 23,
284             'strict_eol' => 24,
285             'strict' => 28,
286             'skip_empty_rows' => 29,
287             'binary' => 30,
288             'keep_meta_info' => 31,
289             '_has_hooks' => 32,
290             '_has_ahead' => 33,
291             '_is_bound' => 44,
292             'eol' => 100,
293             'sep' => 116,
294             'quote' => 132,
295             'undef_str' => 148,
296             'comment_str' => 156,
297             'types' => 92,
298             );
299              
300             # A `character'
301             sub _set_attr_C {
302 11108     11108   31027 my ($self, $name, $val, $ec) = @_;
303 11108 100       50264 defined $val and utf8::decode ($val);
304 11108         31667 $self->{$name} = $val;
305 11108 100       27351 $ec = _check_sanity ($self) and croak ($self->SetDiag ($ec));
306 10198         50313 $self->_cache_set ($_cache_id{$name}, $val);
307             } # _set_attr_C
308              
309             # A flag
310             sub _set_attr_X {
311 5643     5643   15964 my ($self, $name, $val) = @_;
312 5643 100       13887 defined $val or $val = 0;
313 5643         14364 $self->{$name} = $val;
314 5643         35192 $self->_cache_set ($_cache_id{$name}, 0 + $val);
315             } # _set_attr_X
316              
317             # A number
318             sub _set_attr_N {
319 68     68   151 my ($self, $name, $val) = @_;
320 68         131 $self->{$name} = $val;
321 68         383 $self->_cache_set ($_cache_id{$name}, 0 + $val);
322             } # _set_attr_N
323              
324             # Accessor methods.
325             # It is unwise to change them halfway through a single file!
326             sub quote_char {
327 4836     4836 1 1095536 my $self = shift;
328 4836 100       14291 if (@_) {
329 3601         10428 $self->_set_attr_C ("quote_char", shift);
330 3374         10708 $self->_cache_set ($_cache_id{'quote'}, "");
331             }
332 4609         18990 $self->{'quote_char'};
333             } # quote_char
334              
335             sub quote {
336 20     20 1 50 my $self = shift;
337 20 100       72 if (@_) {
338 11         19 my $quote = shift;
339 11 100       30 defined $quote or $quote = "";
340 11         38 utf8::decode ($quote);
341 11         47 my @b = unpack "U0C*", $quote;
342 11 100       32 if (@b > 1) {
343 5 100       141 @b > 16 and croak ($self->SetDiag (1007));
344 4         13 $self->quote_char ("\0");
345             }
346             else {
347 6         18 $self->quote_char ($quote);
348 6         10 $quote = "";
349             }
350 10         26 $self->{'quote'} = $quote;
351              
352 10         20 my $ec = _check_sanity ($self);
353 10 100       148 $ec and croak ($self->SetDiag ($ec));
354              
355 9         26 $self->_cache_set ($_cache_id{'quote'}, $quote);
356             }
357 18         40 my $quote = $self->{'quote'};
358 18 100 100     220 defined $quote && length ($quote) ? $quote : $self->{'quote_char'};
359             } # quote
360              
361             sub escape_char {
362 4826     4826 1 1117206 my $self = shift;
363 4826 100       15787 if (@_) {
364 3595         7700 my $ec = shift;
365 3595         11114 $self->_set_attr_C ("escape_char", $ec);
366 3480 100       8618 $ec or $self->_set_attr_X ("escape_null", 0);
367             }
368 4711         25297 $self->{'escape_char'};
369             } # escape_char
370              
371             sub sep_char {
372 5155     5155 1 1100887 my $self = shift;
373 5155 100       17080 if (@_) {
374 3912         12297 $self->_set_attr_C ("sep_char", shift);
375 3344         11473 $self->_cache_set ($_cache_id{'sep'}, "");
376             }
377 4587         21092 $self->{'sep_char'};
378             } # sep_char
379              
380             sub sep {
381 359     359 1 7348 my $self = shift;
382 359 100       881 if (@_) {
383 326         696 my $sep = shift;
384 326 100       661 defined $sep or $sep = "";
385 326         1242 utf8::decode ($sep);
386 326         1354 my @b = unpack "U0C*", $sep;
387 326 100       916 if (@b > 1) {
388 13 100       164 @b > 16 and croak ($self->SetDiag (1006));
389 12         41 $self->sep_char ("\0");
390             }
391             else {
392 313         953 $self->sep_char ($sep);
393 310         497 $sep = "";
394             }
395 322         742 $self->{'sep'} = $sep;
396              
397 322         774 my $ec = _check_sanity ($self);
398 322 100       816 $ec and croak ($self->SetDiag ($ec));
399              
400 321         1141 $self->_cache_set ($_cache_id{'sep'}, $sep);
401             }
402 354         722 my $sep = $self->{'sep'};
403 354 100 100     1658 defined $sep && length ($sep) ? $sep : $self->{'sep_char'};
404             } # sep
405              
406             sub eol {
407 280     280 1 6829 my $self = shift;
408 280 100       660 if (@_) {
409 227         385 my $eol = shift;
410 227 100       552 defined $eol or $eol = ""; # Also reset strict_eol?
411 227 100       681 length ($eol) > 16 and croak ($self->SetDiag (1005));
412 226         425 $self->{'eol'} = $eol;
413 226         790 $self->_cache_set ($_cache_id{'eol'}, $eol);
414             }
415 279         1621 $self->{'eol'};
416             } # eol
417              
418             sub eol_type {
419 32     32 1 65 my $self = shift;
420 32         169 $self->_cache_get_eolt;
421             } # eol_type
422              
423             sub always_quote {
424 3032     3032 1 1143717 my $self = shift;
425 3032 100       12556 @_ and $self->_set_attr_X ("always_quote", shift);
426 3032         10907 $self->{'always_quote'};
427             } # always_quote
428              
429             sub quote_space {
430 10     10 1 23 my $self = shift;
431 10 100       42 @_ and $self->_set_attr_X ("quote_space", shift);
432 10         38 $self->{'quote_space'};
433             } # quote_space
434              
435             sub quote_empty {
436 5     5 1 13 my $self = shift;
437 5 100       24 @_ and $self->_set_attr_X ("quote_empty", shift);
438 5         29 $self->{'quote_empty'};
439             } # quote_empty
440              
441             sub escape_null {
442 6     6 1 11 my $self = shift;
443 6 100       28 @_ and $self->_set_attr_X ("escape_null", shift);
444 6         27 $self->{'escape_null'};
445             } # escape_null
446 3     3 1 14 sub quote_null { goto &escape_null; }
447              
448             sub quote_binary {
449 7     7 1 23 my $self = shift;
450 7 100       38 @_ and $self->_set_attr_X ("quote_binary", shift);
451 7         50 $self->{'quote_binary'};
452             } # quote_binary
453              
454             sub binary {
455 21     21 1 94566 my $self = shift;
456 21 100       111 @_ and $self->_set_attr_X ("binary", shift);
457 21         73 $self->{'binary'};
458             } # binary
459              
460             sub strict {
461 2     2 1 5 my $self = shift;
462 2 100       22 @_ and $self->_set_attr_X ("strict", shift);
463 2         9 $self->{'strict'};
464             } # strict
465              
466             sub strict_eol {
467 2     2 1 4 my $self = shift;
468 2 100       9 @_ and $self->_set_attr_X ("strict_eol", shift);
469 2         8 $self->{'strict_eol'};
470             } # strict_eol
471              
472             sub _supported_skip_empty_rows {
473 974     974   1984 my ($self, $f) = @_;
474 974 100       2187 defined $f or return 0;
475 973 100 66     4284 if ($self && $f && ref $f && ref $f eq "CODE") {
      100        
      66        
476 5         9 $self->{'_EMPTROW_CB'} = $f;
477 5         12 return 6;
478             }
479             $f =~ m/^(?: 0 | undef )$/xi ? 0 :
480             $f =~ m/^(?: 1 | skip )$/xi ? 1 :
481             $f =~ m/^(?: 2 | eof | stop )$/xi ? 2 :
482             $f =~ m/^(?: 3 | die )$/xi ? 3 :
483             $f =~ m/^(?: 4 | croak )$/xi ? 4 :
484             $f =~ m/^(?: 5 | error )$/xi ? 5 :
485 968 0       5338 $f =~ m/^(?: 6 | cb )$/xi ? 6 : do {
    50          
    100          
    100          
    100          
    100          
    100          
486 0   0     0 $self ||= "Text::CSV_XS";
487 0         0 croak ($self->_SetDiagInfo (1500, "skip_empty_rows '$f' is not supported"));
488             };
489             } # _supported_skip_empty_rows
490              
491             sub skip_empty_rows {
492 23     23 1 51 my $self = shift;
493 23 100       86 @_ and $self->_set_attr_N ("skip_empty_rows", _supported_skip_empty_rows ($self, shift));
494 23         43 my $ser = $self->{'skip_empty_rows'};
495 23 100       63 $ser == 6 or $self->{'_EMPTROW_CB'} = undef;
496             $ser <= 1 ? $ser : $ser == 2 ? "eof" : $ser == 3 ? "die" :
497             $ser == 4 ? "croak" : $ser == 5 ? "error" :
498 23 100       126 $self->{'_EMPTROW_CB'};
    100          
    100          
    100          
    100          
499             } # skip_empty_rows
500              
501             sub _SetDiagInfo {
502 17     17   35 my ($self, $err, $msg) = @_;
503 17         146 $self->SetDiag ($err);
504 17         52 my $em = $self->error_diag ();
505 17 50       75 $em =~ s/^\d+$// and $msg =~ s/^/# /;
506 17 50       66 my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
507 17         2613 join $sep => grep m/\S\S\S/ => $em, $msg;
508             } # _SetDiagInfo
509              
510             sub _supported_formula {
511 103     103   156 my ($self, $f) = @_;
512 103 100       177 defined $f or return 5;
513 102 100 66     455 if ($self && $f && ref $f && ref $f eq "CODE") {
      100        
      100        
514 6         11 $self->{'_FORMULA_CB'} = $f;
515 6         16 return 6;
516             }
517             $f =~ m/^(?: 0 | none )$/xi ? 0 :
518             $f =~ m/^(?: 1 | die )$/xi ? 1 :
519             $f =~ m/^(?: 2 | croak )$/xi ? 2 :
520             $f =~ m/^(?: 3 | diag )$/xi ? 3 :
521             $f =~ m/^(?: 4 | empty | )$/xi ? 4 :
522             $f =~ m/^(?: 5 | undef )$/xi ? 5 :
523 96 100       791 $f =~ m/^(?: 6 | cb )$/xi ? 6 : do {
    100          
    100          
    100          
    100          
    100          
    100          
524 7   50     15 $self ||= "Text::CSV_XS";
525 7         20 croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported"));
526             };
527             } # _supported_formula
528              
529             sub formula {
530 44     44 1 2763 my $self = shift;
531 44 100       120 @_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift));
532 38 100       97 $self->{'formula'} == 6 or $self->{'_FORMULA_CB'} = undef;
533 38         132 [qw( none die croak diag empty undef cb )]->[_supported_formula ($self, $self->{'formula'})];
534             } # formula
535              
536             sub formula_handling {
537 7     7 1 11 my $self = shift;
538 7         16 $self->formula (@_);
539             } # formula_handling
540              
541             sub decode_utf8 {
542 2     2 1 5 my $self = shift;
543 2 100       9 @_ and $self->_set_attr_X ("decode_utf8", shift);
544 2         9 $self->{'decode_utf8'};
545             } # decode_utf8
546              
547             sub keep_meta_info {
548 12     12 1 843 my $self = shift;
549 12 100       46 if (@_) {
550 11         18 my $v = shift;
551 11 100 100     66 !defined $v || $v eq "" and $v = 0;
552 11 100       52 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
    100          
553 11         37 $self->_set_attr_X ("keep_meta_info", $v);
554             }
555 12         62 $self->{'keep_meta_info'};
556             } # keep_meta_info
557              
558             sub allow_loose_quotes {
559 12     12 1 26 my $self = shift;
560 12 100       60 @_ and $self->_set_attr_X ("allow_loose_quotes", shift);
561 12         35 $self->{'allow_loose_quotes'};
562             } # allow_loose_quotes
563              
564             sub allow_loose_escapes {
565 12     12 1 1874 my $self = shift;
566 12 100       84 @_ and $self->_set_attr_X ("allow_loose_escapes", shift);
567 12         37 $self->{'allow_loose_escapes'};
568             } # allow_loose_escapes
569              
570             sub allow_whitespace {
571 4954     4954 1 3329645 my $self = shift;
572 4954 100       18660 if (@_) {
573 3725         9356 my $aw = shift;
574 3725 100       11897 _unhealthy_whitespace ($self, $aw) and
575             croak ($self->SetDiag (1002));
576 3721         14301 $self->_set_attr_X ("allow_whitespace", $aw);
577             }
578 4950         20564 $self->{'allow_whitespace'};
579             } # allow_whitespace
580              
581             sub allow_unquoted_escape {
582 3     3 1 20 my $self = shift;
583 3 100       18 @_ and $self->_set_attr_X ("allow_unquoted_escape", shift);
584 3         9 $self->{'allow_unquoted_escape'};
585             } # allow_unquoted_escape
586              
587             sub blank_is_undef {
588 2     2 1 5 my $self = shift;
589 2 100       9 @_ and $self->_set_attr_X ("blank_is_undef", shift);
590 2         10 $self->{'blank_is_undef'};
591             } # blank_is_undef
592              
593             sub empty_is_undef {
594 2     2 1 4 my $self = shift;
595 2 100       10 @_ and $self->_set_attr_X ("empty_is_undef", shift);
596 2         9 $self->{'empty_is_undef'};
597             } # empty_is_undef
598              
599             sub verbatim {
600 9     9 1 16311 my $self = shift;
601 9 100       72 @_ and $self->_set_attr_X ("verbatim", shift);
602 9         32 $self->{'verbatim'};
603             } # verbatim
604              
605             sub undef_str {
606 12     12 1 3788 my $self = shift;
607 12 100       31 if (@_) {
608 11         20 my $v = shift;
609 11 100       38 $self->{'undef_str'} = defined $v ? "$v" : undef;
610 11         60 $self->_cache_set ($_cache_id{'undef_str'}, $self->{'undef_str'});
611             }
612 12         40 $self->{'undef_str'};
613             } # undef_str
614              
615             sub comment_str {
616 15     15 1 82 my $self = shift;
617 15 100       48 if (@_) {
618 14         28 my $v = shift;
619 14 100       53 $self->{'comment_str'} = defined $v ? "$v" : undef;
620 14         99 $self->_cache_set ($_cache_id{'comment_str'}, $self->{'comment_str'});
621             }
622 15         50 $self->{'comment_str'};
623             } # comment_str
624              
625             sub auto_diag {
626 12     12 1 482 my $self = shift;
627 12 100       44 if (@_) {
628 9         17 my $v = shift;
629 9 100 100     54 !defined $v || $v eq "" and $v = 0;
630 9 100       43 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
    100          
631 9         35 $self->_set_attr_X ("auto_diag", $v);
632             }
633 12         58 $self->{'auto_diag'};
634             } # auto_diag
635              
636             sub diag_verbose {
637 10     10 1 896 my $self = shift;
638 10 100       32 if (@_) {
639 8         13 my $v = shift;
640 8 100 100     109 !defined $v || $v eq "" and $v = 0;
641 8 100       39 $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
    100          
642 8         26 $self->_set_attr_X ("diag_verbose", $v);
643             }
644 10         47 $self->{'diag_verbose'};
645             } # diag_verbose
646              
647             # status
648             #
649             # object method returning the success or failure of the most recent
650             # combine () or parse (). there are no side-effects.
651              
652             sub status {
653 5     5 1 13 my $self = shift;
654 5         22 return $self->{'_STATUS'};
655             } # status
656              
657             sub eof {
658 33     33 1 15706 my $self = shift;
659 33         137 return $self->{'_EOF'};
660             } # eof
661              
662             sub types {
663 7     7 1 2052 my $self = shift;
664 7 100       23 if (@_) {
665 2 100       7 if (my $types = shift) {
666 1         2 $self->{'_types'} = join "", map { chr } @{$types};
  3         23  
  1         3  
667 1         3 $self->{'types'} = $types;
668 1         7 $self->_cache_set ($_cache_id{'types'}, $self->{'_types'});
669             }
670             else {
671 1         3 delete $self->{'types'};
672 1         3 delete $self->{'_types'};
673 1         6 $self->_cache_set ($_cache_id{'types'}, undef);
674 1         4 undef;
675             }
676             }
677             else {
678 5         24 $self->{'types'};
679             }
680             } # types
681              
682             sub callbacks {
683 74     74 1 63857 my $self = shift;
684 74 100       237 if (@_) {
685 44         99 my $cb;
686 44         69 my $hf = 0x00;
687 44 100       119 if (defined $_[0]) {
    100          
688 42 100       3028 grep { !defined } @_ and croak ($self->SetDiag (1004));
  75         612  
689 40 100 100     1151 $cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
    100          
690             : @_ % 2 == 0 ? { @_ }
691             : croak ($self->SetDiag (1004));
692 35         70 foreach my $cbk (keys %{$cb}) {
  35         117  
693             # A key cannot be a ref. That would be stored as the *string
694             # 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
695 37 100 100     2660 $cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
696             croak ($self->SetDiag (1004));
697             }
698 21 100       133 exists $cb->{'error'} and $hf |= 0x01;
699 21 100       63 exists $cb->{'after_parse'} and $hf |= 0x02;
700 21 100       54 exists $cb->{'before_print'} and $hf |= 0x04;
701             }
702             elsif (@_ > 1) {
703             # (undef, whatever)
704 1         169 croak ($self->SetDiag (1004));
705             }
706 22         116 $self->_set_attr_X ("_has_hooks", $hf);
707 22         69 $self->{'callbacks'} = $cb;
708             }
709 52         205 $self->{'callbacks'};
710             } # callbacks
711              
712             # error_diag
713             #
714             # If (and only if) an error occurred, this function returns a code that
715             # indicates the reason of failure
716              
717             sub error_diag {
718 1820     1820 1 84425 my $self = shift;
719 1820         6662 my @diag = (0 + $last_err, $last_err, 0, 0, 0, 0);
720              
721             # Docs state to NEVER use UNIVERSAL::isa, because it will *never* call an
722             # overridden isa method in any class. Well, that is exacly what I want here
723 1820 100 100     34874 if ($self && ref $self and # Not a class method or direct call
      100        
      100        
724             UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{'_ERROR_DIAG'}) {
725 1641         3515 $diag[0] = 0 + $self->{'_ERROR_DIAG'};
726 1641         6866 $diag[1] = $self->{'_ERROR_DIAG'};
727 1641 100       4566 $diag[2] = 1 + $self->{'_ERROR_POS'} if exists $self->{'_ERROR_POS'};
728 1641         2836 $diag[3] = $self->{'_RECNO'};
729 1641 100       4016 $diag[4] = $self->{'_ERROR_FLD'} if exists $self->{'_ERROR_FLD'};
730 1641 100 66     6928 $diag[5] = $self->{'_ERROR_SRC'} if exists $self->{'_ERROR_SRC'} && $self->{'diag_verbose'};
731              
732             $diag[0] && $self->{'callbacks'} && $self->{'callbacks'}{'error'} and
733 1641 100 100     10964 return $self->{'callbacks'}{'error'}->(@diag);
      100        
734             }
735              
736 1810         3339 my $context = wantarray;
737 1810 100       4420 unless (defined $context) { # Void context, auto-diag
738 343 100 100     1325 if ($diag[0] && $diag[0] != 2012) {
739 36         173 my $msg = "# CSV_XS ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
740 36 100       286 $diag[4] and $msg =~ s/$/ field $diag[4]/;
741 36 100       159 $diag[5] and $msg =~ s/$/ (XS#$diag[5])/;
742              
743 36 100 100     167 unless ($self && ref $self) { # auto_diag
744             # called without args in void context
745 4         66 warn $msg;
746 4         54 return;
747             }
748              
749             $self->{'diag_verbose'} && $self->{'_ERROR_INPUT'} and
750 32 50 66     168 $msg .= $self->{'_ERROR_INPUT'}."\n".
751             (" " x ($diag[2] - 1))."^\n";
752              
753 32         72 my $lvl = $self->{'auto_diag'};
754 32 100       124 if ($lvl < 2) {
755 29         122 my @c = caller (2);
756 29 50 66     166 if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
      33        
757 0         0 my $hints = $c[10];
758             (exists $hints->{'autodie'} && $hints->{'autodie'} or
759             exists $hints->{'guard Fatal'} &&
760 0 0 0     0 !exists $hints->{'no Fatal'}) and
      0        
      0        
761             $lvl++;
762             # Future releases of autodie will probably set $^H{autodie}
763             # to "autodie @args", like "autodie :all" or "autodie open"
764             # so we can/should check for "open" or "new"
765             }
766             }
767 32 100       346 $lvl > 1 ? die $msg : warn $msg;
768             }
769 336         3526 return;
770             }
771 1467 100       8592 return $context ? @diag : $diag[1];
772             } # error_diag
773              
774             sub record_number {
775 14     14 1 4588 my $self = shift;
776 14         59 return $self->{'_RECNO'};
777             } # record_number
778              
779             # string
780             #
781             # object method returning the result of the most recent combine () or the
782             # input to the most recent parse (), whichever is more recent. there are
783             # no side-effects.
784              
785             sub string {
786 1398     1398 1 496966 my $self = shift;
787 1398 100       6091 return ref $self->{'_STRING'} ? ${$self->{'_STRING'}} : undef;
  1397         8039  
788             } # string
789              
790             # fields
791             #
792             # object method returning the result of the most recent parse () or the
793             # input to the most recent combine (), whichever is more recent. there
794             # are no side-effects.
795              
796             sub fields {
797 1603     1603 1 29060 my $self = shift;
798 1603 100       6203 return ref $self->{'_FIELDS'} ? @{$self->{'_FIELDS'}} : undef;
  1602         12376  
799             } # fields
800              
801             # meta_info
802             #
803             # object method returning the result of the most recent parse () or the
804             # input to the most recent combine (), whichever is more recent. there
805             # are no side-effects. meta_info () returns (if available) some of the
806             # field's properties
807              
808             sub meta_info {
809 21     21 1 897 my $self = shift;
810 21 100       102 return ref $self->{'_FFLAGS'} ? @{$self->{'_FFLAGS'}} : undef;
  16         86  
811             } # meta_info
812              
813             sub is_quoted {
814 12     12 1 2582 my ($self, $idx) = @_;
815             ref $self->{'_FFLAGS'} &&
816 12 100 100     114 $idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return;
  8   100     40  
817 7 100       31 $self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_QUOTED () ? 1 : 0;
818             } # is_quoted
819              
820             sub is_binary {
821 11     11 1 1736 my ($self, $idx) = @_;
822             ref $self->{'_FFLAGS'} &&
823 11 100 100     109 $idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return;
  9   100     61  
824 8 100       25 $self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_BINARY () ? 1 : 0;
825             } # is_binary
826              
827             sub is_missing {
828 19     19 1 41 my ($self, $idx) = @_;
829 19 100 100     119 $idx < 0 || !ref $self->{'_FFLAGS'} and return;
830 11 100       16 $idx >= @{$self->{'_FFLAGS'}} and return 1;
  11         31  
831 10 100       25 $self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_MISSING () ? 1 : 0;
832             } # is_missing
833              
834             # combine
835             #
836             # Object method returning success or failure. The given arguments are
837             # combined into a single comma-separated value. Failure can be the
838             # result of no arguments or an argument containing an invalid character.
839             # side-effects include:
840             # setting status ()
841             # setting fields ()
842             # setting string ()
843             # setting error_input ()
844              
845             sub combine {
846 1397     1397 1 1130292 my $self = shift;
847 1397         4108 my $str = "";
848 1397         11795 $self->{'_FIELDS'} = \@_;
849 1397   100     44925 $self->{'_STATUS'} = (@_ > 0) && $self->Combine (\$str, \@_, 0);
850 1393         5602 $self->{'_STRING'} = \$str;
851 1393         5933 $self->{'_STATUS'};
852             } # combine
853              
854             # parse
855             #
856             # Object method returning success or failure. The given argument is
857             # expected to be a valid comma-separated value. Failure can be the
858             # result of no arguments or an argument containing an invalid sequence
859             # of characters. Side-effects include:
860             # setting status ()
861             # setting fields ()
862             # setting meta_info ()
863             # setting string ()
864             # setting error_input ()
865              
866             sub parse {
867 1947     1947 1 140773 my ($self, $str) = @_;
868              
869 1947 100       8459 ref $str and croak ($self->SetDiag (1500));
870              
871 1943         3796 my $fields = [];
872 1943         3529 my $fflags = [];
873 1943         5420 $self->{'_STRING'} = \$str;
874 1943 100 100     59658 if (defined $str && $self->Parse ($str, $fields, $fflags)) {
875 1729         6264 $self->{'_FIELDS'} = $fields;
876 1729         4210 $self->{'_FFLAGS'} = $fflags;
877 1729         3990 $self->{'_STATUS'} = 1;
878             }
879             else {
880 211         560 $self->{'_FIELDS'} = undef;
881 211         406 $self->{'_FFLAGS'} = undef;
882 211         463 $self->{'_STATUS'} = 0;
883             }
884 1940         10269 $self->{'_STATUS'};
885             } # parse
886              
887             sub column_names {
888 1024     1024 1 75564 my ($self, @keys) = @_;
889             @keys or
890 1024 100       3192 return defined $self->{'_COLUMN_NAMES'} ? @{$self->{'_COLUMN_NAMES'}} : ();
  293 100       1521  
891              
892             @keys == 1 && ! defined $keys[0] and
893 688 100 100     2788 return $self->{'_COLUMN_NAMES'} = undef;
894              
895 550 100 100     2032 if (@keys == 1 && ref $keys[0] eq "ARRAY") {
    100          
896 226         391 @keys = @{$keys[0]};
  226         631  
897             }
898 712 100       2503 elsif (join "", map { defined $_ ? ref $_ : "" } @keys) {
899 5         1013 croak ($self->SetDiag (3001));
900             }
901              
902 545 100 100     1628 $self->{'_BOUND_COLUMNS'} && @keys != @{$self->{'_BOUND_COLUMNS'}} and
  2         178  
903             croak ($self->SetDiag (3003));
904              
905 544 100       993 $self->{'_COLUMN_NAMES'} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @keys ];
  1259         3558  
906 544         965 @{$self->{'_COLUMN_NAMES'}};
  544         1610  
907             } # column_names
908              
909             sub header {
910 333     333 1 54299 my ($self, $fh, @args) = @_;
911              
912 333 100       1031 $fh or croak ($self->SetDiag (1014));
913              
914 332         727 my (@seps, %args);
915 332         855 for (@args) {
916 225 100       711 if (ref $_ eq "ARRAY") {
917 18         33 push @seps, @{$_};
  18         58  
918 18         40 next;
919             }
920 207 100       498 if (ref $_ eq "HASH") {
921 206         296 %args = %{$_};
  206         719  
922 206         625 next;
923             }
924 1         149 croak ('usage: $csv->header ($fh, [ seps ], { options })');
925             }
926              
927             defined $args{'munge'} && !defined $args{'munge_column_names'} and
928 331 100 66     1324 $args{'munge_column_names'} = $args{'munge'}; # munge as alias
929 331 100       1153 defined $args{'detect_bom'} or $args{'detect_bom'} = 1;
930 331 100       1069 defined $args{'set_column_names'} or $args{'set_column_names'} = 1;
931 331 100       1673 defined $args{'munge_column_names'} or $args{'munge_column_names'} = "lc";
932              
933             # Reset any previous leftovers
934 331         753 $self->{'_RECNO'} = 0;
935 331         890 $self->{'_AHEAD'} = undef;
936 331 100       910 $self->{'_COLUMN_NAMES'} = undef if $args{'set_column_names'};
937 331 100       853 $self->{'_BOUND_COLUMNS'} = undef if $args{'set_column_names'};
938              
939 331 100       786 if (defined $args{'sep_set'}) {
940 27 100       110 ref $args{'sep_set'} eq "ARRAY" or
941             croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref"));
942 22         41 @seps = @{$args{'sep_set'}};
  22         66  
943             }
944              
945 326 50       1186 $^O eq "MSWin32" and binmode $fh;
946 326         9251 my $hdr = <$fh>;
947             # check if $hdr can be empty here, I don't think so
948 326 100 66     2490 defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010));
949              
950 324         633 my %sep;
951 324 100       1015 @seps or @seps = (",", ";");
952 324         767 foreach my $sep (@seps) {
953 732 100       2242 index ($hdr, $sep) >= 0 and $sep{$sep}++;
954             }
955              
956 324 100       990 keys %sep >= 2 and croak ($self->SetDiag (1011));
957              
958 320         1437 $self->sep (keys %sep);
959 320         787 my $enc = "";
960 320 100       908 if ($args{'detect_bom'}) { # UTF-7 is not supported
961 319 100       3579 if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" }
  24 100       49  
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
962 24         53 elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" }
963 25         49 elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" }
964 24         76 elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" }
965 48         94 elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" }
966 1         3 elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" }
967 1         4 elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
968 1         5 elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" }
969 1         3 elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" }
970 1         4 elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" }
971 36         75 elsif ($hdr =~ s/^\x{feff}//) { $enc = "" }
972              
973 319 100       970 $self->{'ENCODING'} = $enc ? uc $enc : undef;
974              
975 319 100       1823 $hdr eq "" and croak ($self->SetDiag (1010));
976              
977 313 100       754 if ($enc) {
978 144 50 33     482 $ebcdic && $enc eq "utf-ebcdic" and $enc = "";
979 144 100       571 if ($enc =~ m/([13]).le$/) {
980 48         234 my $l = 0 + $1;
981 48         78 my $x;
982 48         154 $hdr .= "\0" x $l;
983 48         200 read $fh, $x, $l;
984             }
985 144 50       346 if ($enc) {
986 144 100       381 if ($enc ne "utf-8") {
987 96         766 require Encode;
988 96         667 $hdr = Encode::decode ($enc, $hdr);
989             }
990 144     2   6914 binmode $fh, ":encoding($enc)";
  2         1868  
  2         38  
  2         12  
991             }
992             }
993             }
994              
995 314         10045 my ($ahead, $eol);
996 314 100 66     1454 if ($hdr and $hdr =~ s/\Asep=(\S)([\r\n]+)//i) { # Also look in xs:Parse
997 1         4 $self->sep ($1);
998 1 50       6 length $hdr or $hdr = <$fh>;
999             }
1000 314 100       2674 if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
1001 142         386 $eol = $2;
1002 142         418 $ahead = $3;
1003             }
1004              
1005 314         715 my $hr = \$hdr; # Will cause croak on perl-5.6.x
1006 314 50       3605 open my $h, "<", $hr or croak ($self->SetDiag (1010));
1007              
1008 314 100       8537 my $row = $self->getline ($h) or croak ();
1009 312         1061 close $h;
1010              
1011 312 100       963 if ( $args{'munge_column_names'} eq "lc") {
    100          
    100          
1012 293         447 $_ = lc for @{$row};
  293         1294  
1013             }
1014             elsif ($args{'munge_column_names'} eq "uc") {
1015 7         12 $_ = uc for @{$row};
  7         39  
1016             }
1017             elsif ($args{'munge_column_names'} eq "db") {
1018 3         6 for (@{$row}) {
  3         12  
1019 7         17 s/\W+/_/g;
1020 7         21 s/^_+//;
1021 7         18 $_ = lc;
1022             }
1023             }
1024              
1025 312 100       716 if ($ahead) { # Must be after getline, which creates the cache
1026 142         588 $self->_cache_set ($_cache_id{'_has_ahead'}, 1);
1027 142         286 $self->{'_AHEAD'} = $ahead;
1028 142 100       706 $eol =~ m/^\r([^\n]|\z)/ and $self->eol ($eol);
1029             }
1030              
1031 312         433 my @hdr = @{$row};
  312         1012  
1032             ref $args{'munge_column_names'} eq "CODE" and
1033 312 100       897 @hdr = map { $args{'munge_column_names'}->($_) } @hdr;
  4         21  
1034             ref $args{'munge_column_names'} eq "HASH" and
1035 312 100       763 @hdr = map { $args{'munge_column_names'}->{$_} || $_ } @hdr;
  3 100       17  
1036 312         563 my %hdr; $hdr{$_}++ for @hdr;
  312         1273  
1037 312 100       897 exists $hdr{''} and croak ($self->SetDiag (1012));
1038 310 100       794 unless (keys %hdr == @hdr) {
1039             croak ($self->_SetDiagInfo (1013, join ", " =>
1040 1         5 map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
  1         11  
  2         8  
1041             }
1042 309 100       1527 $args{'set_column_names'} and $self->column_names (@hdr);
1043 309 100       3645 wantarray ? @hdr : $self;
1044             } # header
1045              
1046             sub bind_columns {
1047 36     36 1 10127 my ($self, @refs) = @_;
1048             @refs or
1049 36 100       141 return defined $self->{'_BOUND_COLUMNS'} ? @{$self->{'_BOUND_COLUMNS'}} : undef;
  2 100       13  
1050              
1051 32 100 100     155 if (@refs == 1 && ! defined $refs[0]) {
1052 5         14 $self->{'_COLUMN_NAMES'} = undef;
1053 5         26 return $self->{'_BOUND_COLUMNS'} = undef;
1054             }
1055              
1056 27 100 100     109 $self->{'_COLUMN_NAMES'} && @refs != @{$self->{'_COLUMN_NAMES'}} and
  3         234  
1057             croak ($self->SetDiag (3003));
1058              
1059 26 100       349 join "", map { ref $_ eq "SCALAR" ? "" : "*" } @refs and
  74632 100       137466  
1060             croak ($self->SetDiag (3004));
1061              
1062 24         2581 $self->_set_attr_N ("_is_bound", scalar @refs);
1063 24         4095 $self->{'_BOUND_COLUMNS'} = [ @refs ];
1064 24         1141 @refs;
1065             } # bind_columns
1066              
1067             sub getline_hr {
1068 131     131 1 27048 my ($self, @args, %hr) = @_;
1069 131 100       676 $self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002));
1070 130 100       2524 my $fr = $self->getline (@args) or return;
1071 127 100       539 if (ref $self->{'_FFLAGS'}) { # missing
1072             $self->{'_FFLAGS'}[$_] = CSV_FLAGS_IS_MISSING ()
1073 5 50       6 for (@{$fr} ? $#{$fr} + 1 : 0) .. $#{$self->{'_COLUMN_NAMES'}};
  5         11  
  5         6  
  5         18  
1074 5         33 @{$fr} == 1 && (!defined $fr->[0] || $fr->[0] eq "") and
1075 5 100 33     5 $self->{'_FFLAGS'}[0] ||= CSV_FLAGS_IS_MISSING ();
      66        
      100        
1076             }
1077 127         217 @hr{@{$self->{'_COLUMN_NAMES'}}} = @{$fr};
  127         704  
  127         266  
1078 127         752 \%hr;
1079             } # getline_hr
1080              
1081             sub getline_hr_all {
1082 250     250 1 560 my ($self, @args) = @_;
1083 250 100       1165 $self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002));
1084 248         388 my @cn = @{$self->{'_COLUMN_NAMES'}};
  248         715  
1085 248         476 [ map { my %h; @h{@cn} = @{$_}; \%h } @{$self->getline_all (@args)} ];
  375         678  
  375         558  
  375         1659  
  375         2143  
  248         7282  
1086             } # getline_hr_all
1087              
1088             sub say {
1089 34     34 1 3613 my ($self, $io, @f) = @_;
1090 34         92 my $eol = $self->eol ();
1091             # say ($fh, undef) does not propage actual undef to print ()
1092 34 100 66     443 my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f);
1093 34 100       446 unless (length $eol) {
1094 32   33     94 $eol = $self->eol_type () || $\ || $/;
1095 32         68 print $io $eol;
1096             }
1097 34         127 return $state;
1098             } # say
1099              
1100             sub print_hr {
1101 3     3 1 738 my ($self, $io, $hr) = @_;
1102 3 100       267 $self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3009));
1103 2 100       138 ref $hr eq "HASH" or croak ($self->SetDiag (3010));
1104 1         3 $self->print ($io, [ map { $hr->{$_} } $self->column_names () ]);
  3         13  
1105             } # print_hr
1106              
1107             sub fragment {
1108 58     58 1 45836 my ($self, $io, $spec) = @_;
1109              
1110 58         360 my $qd = qr{\s* [0-9]+ \s* }x; # digit
1111 58         151 my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x; # digit or star
1112 58         767 my $qr = qr{$qd (?: - $qs )?}x; # range
1113 58         664 my $qc = qr{$qr (?: ; $qr )*}x; # list
1114 58 100 100     6418 defined $spec && $spec =~ m{^ \s*
1115             \x23 ? \s* # optional leading #
1116             ( row | col | cell ) \s* =
1117             ( $qc # for row and col
1118             | $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
1119             (?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
1120             ) \s* $}xi or croak ($self->SetDiag (2013));
1121 38         249 my ($type, $range) = (lc $1, $2);
1122              
1123 38         153 my @h = $self->column_names ();
1124              
1125 38         72 my @c;
1126 38 100       107 if ($type eq "cell") {
1127 21         42 my @spec;
1128             my $min_row;
1129 21         39 my $max_row = 0;
1130 21         137 for (split m/\s*;\s*/ => $range) {
1131 37 100       4652 my ($tlr, $tlc, $brr, $brc) = (m{
1132             ^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
1133             (?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )?
1134             $}x) or croak ($self->SetDiag (2013));
1135 36 100       129 defined $brr or ($brr, $brc) = ($tlr, $tlc);
1136 36 100 100     2134 $tlr == 0 || $tlc == 0 ||
      66        
      100        
      100        
      66        
      100        
      100        
1137             ($brr ne "*" && ($brr == 0 || $brr < $tlr)) ||
1138             ($brc ne "*" && ($brc == 0 || $brc < $tlc))
1139             and croak ($self->SetDiag (2013));
1140 28         48 $tlc--;
1141 28 100       104 $brc-- unless $brc eq "*";
1142 28 100       73 defined $min_row or $min_row = $tlr;
1143 28 100       63 $tlr < $min_row and $min_row = $tlr;
1144 28 100 100     102 $brr eq "*" || $brr > $max_row and
1145             $max_row = $brr;
1146 28         111 push @spec, [ $tlr, $tlc, $brr, $brc ];
1147             }
1148 12         70 my $r = 0;
1149 12         282 while (my $row = $self->getline ($io)) {
1150 77 100       577 ++$r < $min_row and next;
1151 33         56 my %row;
1152             my $lc;
1153 33         71 foreach my $s (@spec) {
1154 77         142 my ($tlr, $tlc, $brr, $brc) = @{$s};
  77         184  
1155 77 100 100     324 $r < $tlr || ($brr ne "*" && $r > $brr) and next;
      100        
1156 45 100 100     134 !defined $lc || $tlc < $lc and $lc = $tlc;
1157 45 100       102 my $rr = $brc eq "*" ? $#{$row} : $brc;
  5         9  
1158 45         337 $row{$_} = $row->[$_] for $tlc .. $rr;
1159             }
1160 33         156 push @c, [ @row{sort { $a <=> $b } keys %row } ];
  64         241  
1161 33 100       91 if (@h) {
1162 2         4 my %h; @h{@h} = @{$c[-1]};
  2         3  
  2         10  
1163 2         5 $c[-1] = \%h;
1164             }
1165 33 100 100     465 $max_row ne "*" && $r == $max_row and last;
1166             }
1167 12         139 return \@c;
1168             }
1169              
1170             # row or col
1171 17         31 my @r;
1172 17         32 my $eod = 0;
1173 17         93 for (split m/\s*;\s*/ => $range) {
1174 25 50       168 my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x
1175             or croak ($self->SetDiag (2013));
1176 25   100     151 $to ||= $from;
1177 25 100       64 $to eq "*" and ($to, $eod) = ($from, 1);
1178             # $to cannot be <= 0 due to regex and ||=
1179 25 100 100     635 $from <= 0 || $to < $from and croak ($self->SetDiag (2013));
1180 22         124 $r[$_] = 1 for $from .. $to;
1181             }
1182              
1183 14         29 my $r = 0;
1184 14 100       40 $type eq "col" and shift @r;
1185 14   100     153 $_ ||= 0 for @r;
1186 14         498 while (my $row = $self->getline ($io)) {
1187 109         227 $r++;
1188 109 100       222 if ($type eq "row") {
1189 64 100 100     295 if (($r > $#r && $eod) || $r[$r]) {
      100        
1190 20         64 push @c, $row;
1191 20 100       50 if (@h) {
1192 3         7 my %h; @h{@h} = @{$c[-1]};
  3         8  
  3         19  
1193 3         8 $c[-1] = \%h;
1194             }
1195             }
1196 64         653 next;
1197             }
1198 45 100 100     99 push @c, [ map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0..$#{$row} ];
  405         1738  
  45         124  
1199 45 100       544 if (@h) {
1200 9         15 my %h; @h{@h} = @{$c[-1]};
  9         15  
  9         24  
1201 9         139 $c[-1] = \%h;
1202             }
1203             }
1204              
1205 14         132 return \@c;
1206             } # fragment
1207              
1208             my $csv_usage = q{usage: my $aoa = csv (in => $file);};
1209              
1210             sub _csv_attr {
1211 343 100 66 343   2449 my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak ();
  4 50       16  
1212              
1213 343         809 $attr{'binary'} = 1;
1214 343         728 $attr{'strict_eol'} = 1;
1215              
1216 343   100     1939 my $enc = delete $attr{'enc'} || delete $attr{'encoding'} || "";
1217 343 100       970 $enc eq "auto" and ($attr{'detect_bom'}, $enc) = (1, "");
1218 343 50       1058 my $stack = $enc =~ s/(:\w.*)// ? $1 : "";
1219 343 100       897 $enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
1220 343         629 $enc .= $stack;
1221              
1222 343         711 my $hdrs = delete $attr{'headers'};
1223 343         663 my $frag = delete $attr{'fragment'};
1224 343         713 my $key = delete $attr{'key'};
1225 343         603 my $val = delete $attr{'value'};
1226             my $kh = delete $attr{'keep_headers'} ||
1227             delete $attr{'keep_column_names'} ||
1228 343   100     1953 delete $attr{'kh'};
1229              
1230             my $cbai = delete $attr{'callbacks'}{'after_in'} ||
1231             delete $attr{'after_in'} ||
1232             delete $attr{'callbacks'}{'after_parse'} ||
1233 343   100     2470 delete $attr{'after_parse'};
1234             my $cbbo = delete $attr{'callbacks'}{'before_out'} ||
1235 343   100     1085 delete $attr{'before_out'};
1236             my $cboi = delete $attr{'callbacks'}{'on_in'} ||
1237 343   100     2136 delete $attr{'on_in'};
1238             my $cboe = delete $attr{'callbacks'}{'on_error'} ||
1239 343   66     1124 delete $attr{'on_error'};
1240              
1241             my $hd_s = delete $attr{'sep_set'} ||
1242 343   100     1080 delete $attr{'seps'};
1243             my $hd_b = delete $attr{'detect_bom'} ||
1244 343   100     1213 delete $attr{'bom'};
1245             my $hd_m = delete $attr{'munge'} ||
1246 343   100     1121 delete $attr{'munge_column_names'};
1247 343         515 my $hd_c = delete $attr{'set_column_names'};
1248              
1249 343         631 my $fh;
1250 343         520 my $sink = 0;
1251 343         502 my $cls = 0; # If I open a file, I have to close it
1252 343 100 100     1724 my $in = delete $attr{'in'} || delete $attr{'file'} or croak ($csv_usage);
1253             my $out = exists $attr{'out'} && !$attr{'out'} ? \"skip"
1254 340 100 100     1583 : delete $attr{'out'} || delete $attr{'file'};
      100        
1255              
1256 340 100 100     1438 ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT;
      100        
1257              
1258 340         634 my ($fho, $fho_cls);
1259 340 100 66     1545 if ($in && $out and (!ref $in || ref $in eq "GLOB" || ref \$in eq "GLOB")
      66        
      100        
      66        
      66        
1260             and (!ref $out || ref $out eq "GLOB" || ref \$out eq "GLOB")) {
1261 7 100 66     33 if (ref $out or "GLOB" eq ref \$out) {
1262 2         5 $fho = $out;
1263             }
1264             else {
1265 5 50       739 open $fho, ">", $out or croak "$out: $!\n";
1266 5 50       30 if (my $e = $attr{'encoding'}) {
1267 0         0 binmode $fho, ":encoding($e)";
1268 0 0       0 $hd_b and print $fho "\x{feff}";
1269             }
1270 5         13 $fho_cls = 1;
1271             }
1272 7 100 66     26 if ($cboi && !$cbai) {
1273 1         3 $cbai = $cboi;
1274 1         3 $cboi = undef;
1275             }
1276 7 100       20 if ($cbai) {
1277 2         4 my $cb = $cbai;
1278 2     6   14 $cbai = sub { $cb->(@_); $_[0]->say ($fho, $_[1]); 0 };
  6         29  
  6         34  
  6         80  
1279             }
1280             else {
1281 5     15   56 $cbai = sub { $_[0]->say ($fho, $_[1]); 0 };
  15         80  
  15         217  
1282             }
1283              
1284             # Put all callbacks back in place for streaming behavior
1285 7         20 $attr{'callbacks'}{'after_parse'} = $cbai; $cbai = undef;
  7         14  
1286 7         15 $attr{'callbacks'}{'before_out'} = $cbbo; $cbbo = undef;
  7         41  
1287 7         18 $attr{'callbacks'}{'on_in'} = $cboi; $cboi = undef;
  7         9  
1288 7         14 $attr{'callbacks'}{'on_error'} = $cboe; $cboe = undef;
  7         13  
1289 7         37 $out = undef;
1290 7         44 $sink = 1;
1291             }
1292              
1293 340 100       787 if ($out) {
1294 33 100 100     270 if (ref $out and ("ARRAY" eq ref $out or "HASH" eq ref $out)) {
    100 100        
    100 100        
      100        
      66        
      66        
      66        
1295 5         7 delete $attr{'out'};
1296 5         8 $sink = 1;
1297             }
1298             elsif ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
1299 14         21 $fh = $out;
1300             }
1301 7         42 elsif (ref $out and "SCALAR" eq ref $out and defined ${$out} and ${$out} eq "skip") {
  7         22  
1302 2         5 delete $attr{'out'};
1303 2         5 $sink = 1;
1304             }
1305             else {
1306 12 100       1029 open $fh, ">", $out or croak ("$out: $!");
1307 11         37 $cls = 1;
1308             }
1309 32 100       65 if ($fh) {
1310 25 100       54 if ($enc) {
1311 1         11 binmode $fh, $enc;
1312 1         61 my $fn = fileno $fh; # This is a workaround for a bug in PerlIO::via::gzip
1313             }
1314 25 100 66     98 unless (defined $attr{'eol'} || defined $fho) {
1315 18         30 my @layers = eval { PerlIO::get_layers ($fh) };
  18         125  
1316 18 100       109 $attr{'eol'} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
1317             }
1318             }
1319             }
1320              
1321 339 100 100     2153 if ( ref $in eq "CODE" or ref $in eq "ARRAY") {
    100 100        
    100          
1322             # All done
1323             }
1324             elsif (ref $in eq "SCALAR") {
1325             # Strings with code points over 0xFF may not be mapped into in-memory file handles
1326             # "<$enc" does not change that :(
1327 30 50       381 open $fh, "<", $in or croak ("Cannot open from SCALAR using PerlIO");
1328 30         64 $cls = 1;
1329             }
1330             elsif (ref $in or "GLOB" eq ref \$in) {
1331 18 50 66     47 if (!ref $in && $] < 5.008005) {
1332 0         0 $fh = \*{$in}; # uncoverable statement ancient perl version required
  0         0  
1333             }
1334             else {
1335 18         23 $fh = $in;
1336             }
1337             }
1338             else {
1339 267 100       13904 open $fh, "<$enc", $in or croak ("$in: $!");
1340 265         2440 $cls = 1;
1341             }
1342 337 50 33     1012 $fh || $sink or croak (qq{No valid source passed. "in" is required});
1343              
1344 337         1461 for ([ 'quo' => "quote" ],
1345             [ 'esc' => "escape" ],
1346             [ 'escape' => "escape_char" ],
1347             ) {
1348 1011         1323 my ($f, $t) = @{$_};
  1011         2030  
1349 1011 100 100     2893 exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
1350             }
1351              
1352 337         1031 my $fltr = delete $attr{'filter'};
1353             my %fltr = (
1354 10 100 33 10   14 'not_blank' => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
  10         63  
1355 10 50   10   17 'not_empty' => sub { grep { defined && $_ ne "" } @{$_[1]} },
  26         144  
  10         22  
1356 10 50   10   20 'filled' => sub { grep { defined && m/\S/ } @{$_[1]} },
  26         211  
  10         19  
1357 337         3248 );
1358             defined $fltr && !ref $fltr && exists $fltr{$fltr} and
1359 337 50 100     1071 $fltr = { '0' => $fltr{$fltr} };
      66        
1360 337 100       800 ref $fltr eq "CODE" and $fltr = { 0 => $fltr };
1361 337 100       849 ref $fltr eq "HASH" or $fltr = undef;
1362              
1363 337         690 my $form = delete $attr{'formula'};
1364              
1365 337 100       992 defined $attr{'auto_diag'} or $attr{'auto_diag'} = 1;
1366 337 100       979 defined $attr{'escape_null'} or $attr{'escape_null'} = 0;
1367 337 50 66     2347 my $csv = delete $attr{'csv'} || Text::CSV_XS->new (\%attr)
1368             or croak ($last_err);
1369 337 100       819 defined $form and $csv->formula ($form);
1370 337 100       933 defined $cboe and $csv->callbacks (error => $cboe);
1371              
1372 337 100 100     981 $kh && !ref $kh && $kh =~ m/^(?:1|yes|true|internal|auto)$/i and
      100        
1373             $kh = \@internal_kh;
1374              
1375             return {
1376 337         8614 'csv' => $csv,
1377             'attr' => { %attr },
1378             'fh' => $fh,
1379             'cls' => $cls,
1380             'in' => $in,
1381             'sink' => $sink,
1382             'out' => $out,
1383             'enc' => $enc,
1384             'fho' => $fho,
1385             'fhoc' => $fho_cls,
1386             'hdrs' => $hdrs,
1387             'key' => $key,
1388             'val' => $val,
1389             'kh' => $kh,
1390             'frag' => $frag,
1391             'fltr' => $fltr,
1392             'cbai' => $cbai,
1393             'cbbo' => $cbbo,
1394             'cboi' => $cboi,
1395             'hd_s' => $hd_s,
1396             'hd_b' => $hd_b,
1397             'hd_m' => $hd_m,
1398             'hd_c' => $hd_c,
1399             };
1400             } # _csv_attr
1401              
1402             sub csv {
1403 344 100 100 344 1 906070 @_ && ref $_[0] eq __PACKAGE__ and splice @_, 0, 0, "csv";
1404 344 100       1179 @_ or croak ($csv_usage);
1405              
1406 343         1007 my $c = _csv_attr (@_);
1407              
1408 337         720 my ($csv, $in, $fh, $hdrs) = @{$c}{qw( csv in fh hdrs )};
  337         1131  
1409 337         694 my %hdr;
1410 337 100       866 if (ref $hdrs eq "HASH") {
1411 2         25 %hdr = %{$hdrs};
  2         7  
1412 2         5 $hdrs = "auto";
1413             }
1414              
1415 337 100 100     953 if ($c->{'out'} && !$c->{'sink'}) {
1416             !$hdrs && ref $c->{'kh'} && $c->{'kh'} == \@internal_kh and
1417 24 100 100     107 $hdrs = $c->{'kh'};
      66        
1418              
1419 24 100 100     42 if (ref $in eq "CODE") {
    100          
1420 3         7 my $hdr = 1;
1421 3         13 while (my $row = $in->($csv)) {
1422 7 100       73 if (ref $row eq "ARRAY") {
1423 3         32 $csv->print ($fh, $row);
1424 3         44 next;
1425             }
1426 4 50       11 if (ref $row eq "HASH") {
1427 4 100       10 if ($hdr) {
1428 2 50 100     7 $hdrs ||= [ map { $hdr{$_} || $_ } keys %{$row} ];
  3         10  
  1         3  
1429 2         35 $csv->print ($fh, $hdrs);
1430 2         42 $hdr = 0;
1431             }
1432 4         5 $csv->print ($fh, [ @{$row}{@{$hdrs}} ]);
  4         38  
  4         7  
1433             }
1434             }
1435             }
1436 21         100 elsif (@{$in} == 0 or ref $in->[0] eq "ARRAY") { # aoa
1437 10 50       22 ref $hdrs and $csv->print ($fh, $hdrs);
1438 10         11 for (@{$in}) {
  10         19  
1439 12 100       73 $c->{'cboi'} and $c->{'cboi'}->($csv, $_);
1440 12 50       1186 $c->{'cbbo'} and $c->{'cbbo'}->($csv, $_);
1441 12         169 $csv->print ($fh, $_);
1442             }
1443             }
1444             else { # aoh
1445 11 100       34 my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
  5         17  
  6         17  
1446 11 100       26 defined $hdrs or $hdrs = "auto";
1447             ref $hdrs || $hdrs eq "auto" and @hdrs and
1448 11 100 100     59 $csv->print ($fh, [ map { $hdr{$_} || $_ } @hdrs ]);
  20 100 66     238  
1449 11         106 for (@{$in}) {
  11         45  
1450 17         76 local %_;
1451 17         38 *_ = $_;
1452 17 50       39 $c->{'cboi'} and $c->{'cboi'}->($csv, $_);
1453 17 50       35 $c->{'cbbo'} and $c->{'cbbo'}->($csv, $_);
1454 17         25 $csv->print ($fh, [ @{$_}{@hdrs} ]);
  17         117  
1455             }
1456             }
1457              
1458 24 100       858 $c->{'cls'} and close $fh;
1459 24 50       61 $c->{'fho_cls'} and close $c->{'fho'};
1460 24         373 return 1;
1461             }
1462              
1463 313         517 my @row1;
1464 313 100 100     1670 if (defined $c->{'hd_s'} || defined $c->{'hd_b'} || defined $c->{'hd_m'} || defined $c->{'hd_c'}) {
      100        
      100        
1465 173         277 my %harg;
1466             !defined $c->{'hd_s'} && $c->{'attr'}{'sep_char'} and
1467 173 100 100     739 $c->{'hd_s'} = [ $c->{'attr'}{'sep_char'} ];
1468             !defined $c->{'hd_s'} && $c->{'attr'}{'sep'} and
1469 173 100 100     781 $c->{'hd_s'} = [ $c->{'attr'}{'sep'} ];
1470 173 100       520 defined $c->{'hd_s'} and $harg{'sep_set'} = $c->{'hd_s'};
1471 173 100       494 defined $c->{'hd_b'} and $harg{'detect_bom'} = $c->{'hd_b'};
1472 173 50       423 defined $c->{'hd_m'} and $harg{'munge_column_names'} = $hdrs ? "none" : $c->{'hd_m'};
    100          
1473 173 50       394 defined $c->{'hd_c'} and $harg{'set_column_names'} = $hdrs ? 0 : $c->{'hd_c'};
    100          
1474 173         654 @row1 = $csv->header ($fh, \%harg);
1475 170         495 my @hdr = $csv->column_names ();
1476 170 100 100     1000 @hdr and $hdrs ||= \@hdr;
1477             }
1478              
1479 310 100       746 if ($c->{'kh'}) {
1480 15         26 @internal_kh = ();
1481 15 100       665 ref $c->{'kh'} eq "ARRAY" or croak ($csv->SetDiag (1501));
1482 10   100     24 $hdrs ||= "auto";
1483             }
1484              
1485 305         626 my $key = $c->{'key'};
1486 305 100       685 if ($key) {
1487 27 100 100     702 !ref $key or ref $key eq "ARRAY" && @{$key} > 1 or croak ($csv->SetDiag (1501));
  8   100     470  
1488 20   100     57 $hdrs ||= "auto";
1489             }
1490 298         680 my $val = $c->{'val'};
1491 298 100       600 if ($val) {
1492 9 100       153 $key or croak ($csv->SetDiag (1502));
1493 8 100 100     415 !ref $val or ref $val eq "ARRAY" && @{$val} > 0 or croak ($csv->SetDiag (1503));
  3   100     181  
1494             }
1495              
1496 294 100 100     726 $c->{'fltr'} && grep m/\D/ => keys %{$c->{'fltr'}} and $hdrs ||= "auto";
  16   100     150  
1497 294 100       754 if (defined $hdrs) {
1498 223 100 100     904 if (!ref $hdrs or ref $hdrs eq "CODE") {
1499 52 100       2209 my $h = $c->{'hd_b'}
1500             ? [ $csv->column_names () ]
1501             : $csv->getline ($fh);
1502 52   33     250 my $has_h = $h && @$h;
1503              
1504 52 100       191 if (ref $hdrs) {
    100          
    100          
    100          
    50          
1505 1 50       5 $has_h or return;
1506 1         3 my $cr = $hdrs;
1507 1   33     3 $hdrs = [ map { $cr->($hdr{$_} || $_) } @{$h} ];
  3         24  
  1         3  
1508             }
1509             elsif ($hdrs eq "skip") {
1510             # discard;
1511             }
1512             elsif ($hdrs eq "auto") {
1513 48 50       101 $has_h or return;
1514 48 100       68 $hdrs = [ map { $hdr{$_} || $_ } @{$h} ];
  136         508  
  48         96  
1515             }
1516             elsif ($hdrs eq "lc") {
1517 1 50       4 $has_h or return;
1518 1   33     4 $hdrs = [ map { lc ($hdr{$_} || $_) } @{$h} ];
  3         19  
  1         2  
1519             }
1520             elsif ($hdrs eq "uc") {
1521 1 50       4 $has_h or return;
1522 1   33     3 $hdrs = [ map { uc ($hdr{$_} || $_) } @{$h} ];
  3         41  
  1         3  
1523             }
1524             }
1525 223 100 66     649 $c->{'kh'} and $hdrs and @{$c->{'kh'}} = @{$hdrs};
  10         25  
  10         13  
1526             }
1527              
1528 294 100       729 if ($c->{'fltr'}) {
1529 16         24 my %f = %{$c->{'fltr'}};
  16         64  
1530             # convert headers to index
1531 16         28 my @hdr;
1532 16 100       39 if (ref $hdrs) {
1533 7         12 @hdr = @{$hdrs};
  7         23  
1534 7         29 for (0 .. $#hdr) {
1535 21 100       72 exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
1536             }
1537             }
1538             $csv->callbacks ('after_parse' => sub {
1539 114     114   825 my ($CSV, $ROW) = @_; # lexical sub-variables in caps
1540 114         288 foreach my $FLD (sort keys %f) {
1541 115         296 local $_ = $ROW->[$FLD - 1];
1542 115         198 local %_;
1543 115 100       236 @hdr and @_{@hdr} = @{$ROW};
  51         178  
1544 115 100       306 $f{$FLD}->($CSV, $ROW) or return \"skip";
1545 52         896 $ROW->[$FLD - 1] = $_;
1546             }
1547 16         130 });
1548             }
1549              
1550 294         511 my $frag = $c->{'frag'};
1551             my $ref = ref $hdrs
1552             ? # aoh
1553 294 100       5401 do {
    100          
1554 222         591 my @h = $csv->column_names ($hdrs);
1555 222         361 my %h; $h{$_}++ for @h;
  222         877  
1556 222 50       574 exists $h{''} and croak ($csv->SetDiag (1012));
1557 222 50       551 unless (keys %h == @h) {
1558             croak ($csv->_SetDiagInfo (1013, join ", " =>
1559 0         0 map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
  0         0  
  0         0  
1560             }
1561             $frag ? $csv->fragment ($fh, $frag) :
1562 222 100       948 $key ? do {
    100          
    100          
1563 17 100       46 my ($k, $j, @f) = ref $key ? (undef, @{$key}) : ($key);
  5         11  
1564 17 100       28 if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
  22         71  
  27         44  
1565 2         9 croak ($csv->_SetDiagInfo (4001, join ", " => @mk));
1566             }
1567             +{ map {
1568 26         32 my $r = $_;
1569 26 100       60 my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
  4         11  
1570             ( $K => (
1571             $val
1572             ? ref $val
1573 4         38 ? { map { $_ => $r->{$_} } @{$val} }
  2         4  
1574 26 100       113 : $r->{$val}
    100          
1575             : $r ));
1576 15         23 } @{$csv->getline_hr_all ($fh)} }
  15         51  
1577             }
1578             : $csv->getline_hr_all ($fh);
1579             }
1580             : # aoa
1581             $frag ? $csv->fragment ($fh, $frag)
1582             : $csv->getline_all ($fh);
1583 286 50       1845 if ($ref) {
1584 286 100 66     1238 @row1 && !$c->{'hd_c'} && !ref $hdrs and unshift @{$ref}, \@row1;
  4   100     9  
1585             }
1586             else {
1587 0         0 Text::CSV_XS->auto_diag ();
1588             }
1589 286 100       4830 $c->{'cls'} and close $fh;
1590 286 50       929 $c->{'fho_cls'} and close $c->{'fho'};
1591 286 100 100     1588 if ($ref and $c->{'cbai'} || $c->{'cboi'}) {
      66        
1592             # Default is ARRAYref, but with key =>, you'll get a hashref
1593 23 100       77 foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
  22         62  
  1         5  
1594 74         9748 local %_;
1595 74 100       247 ref $r eq "HASH" and *_ = $r;
1596 74 100       280 $c->{'cbai'} and $c->{'cbai'}->($csv, $r);
1597 74 100       6306 $c->{'cboi'} and $c->{'cboi'}->($csv, $r);
1598             }
1599             }
1600              
1601 286 100       2619 if ($c->{'sink'}) {
1602 14 100       407 my $ro = ref $c->{'out'} or return;
1603              
1604 7 100 66     23 $ro eq "SCALAR" && ${$c->{'out'}} eq "skip" and
  2         52  
1605             return;
1606              
1607 5 50       7 $ro eq ref $ref or
1608             croak ($csv->_SetDiagInfo (5001, "Output type mismatch"));
1609              
1610 5 100       10 if ($ro eq "ARRAY") {
1611 4 100 33     4 if (@{$c->{'out'}} and @$ref and ref $c->{'out'}[0] eq ref $ref->[0]) {
  4   66     20  
1612 2         3 push @{$c->{'out'}} => @$ref;
  2         5  
1613 2         29 return $c->{'out'};
1614             }
1615 2         6 croak ($csv->_SetDiagInfo (5001, "Output type mismatch"));
1616             }
1617              
1618 1 50       3 if ($ro eq "HASH") {
1619 1         2 @{$c->{'out'}}{keys %{$ref}} = values %{$ref};
  1         12  
  1         1  
  1         2  
1620 1         17 return $c->{'out'};
1621             }
1622              
1623 0         0 croak ($csv->_SetDiagInfo (5002, "Unsupported output type"));
1624             }
1625              
1626             defined wantarray or
1627             return csv (
1628             'in' => $ref,
1629             'headers' => $hdrs,
1630 272 100       605 %{$c->{'attr'}},
  1         11  
1631             );
1632              
1633 271   100     1223 $last_err ||= $csv->{'_ERROR_DIAG'};
1634 271         6051 return $ref;
1635             } # csv
1636              
1637             1;
1638              
1639             __END__
1640              
1641             =encoding utf-8
1642              
1643             =head1 NAME
1644              
1645             Text::CSV_XS - comma-separated values manipulation routines
1646              
1647             =head1 SYNOPSIS
1648              
1649             # Functional interface
1650             use Text::CSV_XS qw( csv );
1651              
1652             # Read whole file in memory
1653             my $aoa = csv (in => "data.csv"); # as array of array
1654             my $aoh = csv (in => "data.csv",
1655             headers => "auto"); # as array of hash
1656              
1657             # Write array of arrays as csv file
1658             csv (in => $aoa, out => "file.csv", sep_char => ";");
1659              
1660             # Only show lines where "code" is odd
1661             csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
1662              
1663              
1664             # Object interface
1665             use Text::CSV_XS;
1666              
1667             my @rows;
1668             # Read/parse CSV
1669             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
1670             open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
1671             while (my $row = $csv->getline ($fh)) {
1672             $row->[2] =~ m/pattern/ or next; # 3rd field should match
1673             push @rows, $row;
1674             }
1675             close $fh;
1676              
1677             # and write as CSV
1678             open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
1679             $csv->say ($fh, $_) for @rows;
1680             close $fh or die "new.csv: $!";
1681              
1682             =head1 DESCRIPTION
1683              
1684             Text::CSV_XS provides facilities for the composition and decomposition of
1685             comma-separated values. An instance of the Text::CSV_XS class will combine
1686             fields into a C<CSV> string and parse a C<CSV> string into fields.
1687              
1688             The module accepts either strings or files as input and support the use of
1689             user-specified characters for delimiters, separators, and escapes.
1690              
1691             =head2 Embedded newlines
1692              
1693             B<Important Note>: The default behavior is to accept only ASCII characters
1694             in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
1695             fields can not contain newlines. If your data contains newlines embedded in
1696             fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>>
1697             set C<< binary => 1 >> in the call to L</new>. To cover the widest range of
1698             parsing options, you will always want to set binary.
1699              
1700             But you still have the problem that you have to pass a correct line to the
1701             L</parse> method, which is more complicated from the usual point of usage:
1702              
1703             my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
1704             while (<>) { # WRONG!
1705             $csv->parse ($_);
1706             my @fields = $csv->fields ();
1707             }
1708              
1709             this will break, as the C<while> might read broken lines: it does not care
1710             about the quoting. If you need to support embedded newlines, the way to go
1711             is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>,
1712             B<and> C<\r\n> by default) and then
1713              
1714             my $csv = Text::CSV_XS->new ({ binary => 1 });
1715             open my $fh, "<", $file or die "$file: $!";
1716             while (my $row = $csv->getline ($fh)) {
1717             my @fields = @$row;
1718             }
1719              
1720             The old(er) way of using global file handles is still supported
1721              
1722             while (my $row = $csv->getline (*ARGV)) { ... }
1723              
1724             =head2 Unicode
1725              
1726             Unicode is only tested to work with perl-5.8.2 and up.
1727              
1728             See also L</BOM>.
1729              
1730             The simplest way to ensure the correct encoding is used for in- and output
1731             is by either setting layers on the filehandles, or setting the L</encoding>
1732             argument for L</csv>.
1733              
1734             open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
1735             or
1736             my $aoa = csv (in => "in.csv", encoding => "UTF-8");
1737              
1738             open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
1739             or
1740             csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
1741              
1742             On parsing (both for L</getline> and L</parse>), if the source is marked
1743             being UTF8, then all fields that are marked binary will also be marked UTF8.
1744              
1745             On combining (L</print> and L</combine>): if any of the combining fields
1746             was marked UTF8, the resulting string will be marked as UTF8. Note however
1747             that all fields I<before> the first field marked UTF8 and contained 8-bit
1748             characters that were not upgraded to UTF8, these will be C<bytes> in the
1749             resulting string too, possibly causing unexpected errors. If you pass data
1750             of different encoding, or you don't know if there is different encoding,
1751             force it to be upgraded before you pass them on:
1752              
1753             $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
1754              
1755             For complete control over encoding, please use L<Text::CSV::Encoded>:
1756              
1757             use Text::CSV::Encoded;
1758             my $csv = Text::CSV::Encoded->new ({
1759             encoding_in => "iso-8859-1", # the encoding comes into Perl
1760             encoding_out => "cp1252", # the encoding comes out of Perl
1761             });
1762              
1763             $csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
1764             # combine () and print () accept *literally* utf8 encoded data
1765             # parse () and getline () return *literally* utf8 encoded data
1766              
1767             $csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
1768             # combine () and print () accept UTF8 marked data
1769             # parse () and getline () return UTF8 marked data
1770              
1771             =head2 BOM
1772              
1773             BOM (or Byte Order Mark) handling is available only inside the L</header>
1774             method. This method supports the following encodings: C<utf-8>, C<utf-1>,
1775             C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>,
1776             C<bocu-1>, and C<gb-18030>. See L<Wikipedia|https://en.wikipedia.org/wiki/Byte_order_mark>.
1777              
1778             If a file has a BOM, the easiest way to deal with that is
1779              
1780             my $aoh = csv (in => $file, detect_bom => 1);
1781              
1782             All records will be encoded based on the detected BOM.
1783              
1784             This implies a call to the L</header> method, which defaults to also set
1785             the L</column_names>. So this is B<not> the same as
1786              
1787             my $aoh = csv (in => $file, headers => "auto");
1788              
1789             which only reads the first record to set L</column_names> but ignores any
1790             meaning of possible present BOM.
1791              
1792             =head1 SPECIFICATION
1793              
1794             While no formal specification for CSV exists, L<RFC 4180|https://datatracker.ietf.org/doc/html/rfc4180>
1795             (I<1>) describes the common format and establishes C<text/csv> as the MIME
1796             type registered with the IANA. L<RFC 7111|https://datatracker.ietf.org/doc/html/rfc7111>
1797             (I<2>) adds fragments to CSV.
1798              
1799             Many informal documents exist that describe the C<CSV> format. L<"How To:
1800             The Comma Separated Value (CSV) File Format"|http://creativyst.com/Doc/Articles/CSV/CSV01.shtml>
1801             (I<3>) provides an overview of the C<CSV> format in the most widely used
1802             applications and explains how it can best be used and supported.
1803              
1804             1) https://datatracker.ietf.org/doc/html/rfc4180
1805             2) https://datatracker.ietf.org/doc/html/rfc7111
1806             3) http://creativyst.com/Doc/Articles/CSV/CSV01.shtml
1807              
1808             The basic rules are as follows:
1809              
1810             B<CSV> is a delimited data format that has fields/columns separated by the
1811             comma character and records/rows separated by newlines. Fields that contain
1812             a special character (comma, newline, or double quote), must be enclosed in
1813             double quotes. However, if a line contains a single entry that is the empty
1814             string, it may be enclosed in double quotes. If a field's value contains a
1815             double quote character it is escaped by placing another double quote
1816             character next to it. The C<CSV> file format does not require a specific
1817             character encoding, byte order, or line terminator format.
1818              
1819             =over 2
1820              
1821             =item *
1822              
1823             Each record is a single line ended by a line feed (ASCII/C<LF>=C<0x0A>) or
1824             a carriage return and line feed pair (ASCII/C<CRLF>=C<0x0D 0x0A>), however,
1825             line-breaks may be embedded.
1826              
1827             =item *
1828              
1829             Fields are separated by commas.
1830              
1831             =item *
1832              
1833             Allowable characters within a C<CSV> field include C<0x09> (C<TAB>) and the
1834             inclusive range of C<0x20> (space) through C<0x7E> (tilde). In binary mode
1835             all characters are accepted, at least in quoted fields.
1836              
1837             =item *
1838              
1839             A field within C<CSV> must be surrounded by double-quotes to contain a
1840             separator character (comma).
1841              
1842             =back
1843              
1844             Though this is the most clear and restrictive definition, Text::CSV_XS is
1845             way more liberal than this, and allows extension:
1846              
1847             =over 2
1848              
1849             =item *
1850              
1851             Line termination by a single carriage return is accepted by default
1852              
1853             =item *
1854              
1855             The separation-, quote-, and escape character(s) can be any ASCII character
1856             in the range from C<0x20> (space) to C<0x7E> (tilde). Characters outside
1857             this range may or may not work as expected. Multibyte characters, like UTF
1858             C<U+060C> (ARABIC COMMA), C<U+FF0C> (FULLWIDTH COMMA), C<U+241B> (SYMBOL
1859             FOR ESCAPE), C<U+2424> (SYMBOL FOR NEWLINE), C<U+FF02> (FULLWIDTH QUOTATION
1860             MARK), and C<U+201C> (LEFT DOUBLE QUOTATION MARK) (to give some examples of
1861             what might look promising) work for newer versions of perl for C<sep_char>,
1862             and C<quote_char> but not for C<escape_char>.
1863              
1864             If you use perl-5.8.2 or higher these three attributes are utf8-decoded, to
1865             increase the likelihood of success. This way C<U+00FE> will be allowed as a
1866             quote character.
1867              
1868             =item *
1869              
1870             A field in C<CSV> must be surrounded by double-quotes to make an embedded
1871             double-quote, represented by a pair of consecutive double-quotes, valid. In
1872             binary mode you may additionally use the sequence C<"0> for representation
1873             of a NULL byte. Using C<0x00> in binary mode is just as valid.
1874              
1875             =item *
1876              
1877             Several violations of the above specification may be lifted by passing some
1878             options as attributes to the object constructor.
1879              
1880             =back
1881              
1882             =head1 METHODS
1883              
1884             =head2 version
1885             X<version>
1886              
1887             (Class method) Returns the current module version.
1888              
1889             =head2 new
1890             X<new>
1891              
1892             (Class method) Returns a new instance of class Text::CSV_XS. The attributes
1893             are described by the (optional) hash ref C<\%attr>.
1894              
1895             my $csv = Text::CSV_XS->new ({ attributes ... });
1896              
1897             The following attributes are available:
1898              
1899             =head3 eol
1900             X<eol>
1901              
1902             my $csv = Text::CSV_XS->new ({ eol => $/ });
1903             $csv->eol (undef);
1904             my $eol = $csv->eol;
1905              
1906             The end-of-line string to add to rows for L</print> or the record separator
1907             for L</getline>.
1908              
1909             When not passed in a B<parser> instance, the default behavior is to accept
1910             C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at
1911             all. Passing C<undef> or the empty string behave the same.
1912              
1913             When not passed in a B<generating> instance, records are not terminated at
1914             all, so it is probably wise to pass something you expect. A safe choice for
1915             C<eol> on output is either C<$/> or C<\r\n>.
1916              
1917             Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
1918             (C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
1919             Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters.
1920              
1921             If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on
1922             only a Carriage Return without Line Feed, will be L</parse>d correct.
1923              
1924             =head3 eol_type
1925             X<eol_type>
1926              
1927             my $eol = $csv->eol_type;
1928              
1929             This read-only method returns the internal state of what is considered the
1930             valid EOL for parsing.
1931              
1932             =head3 sep_char
1933             X<sep_char>
1934              
1935             my $csv = Text::CSV_XS->new ({ sep_char => ";" });
1936             $csv->sep_char (";");
1937             my $c = $csv->sep_char;
1938              
1939             The char used to separate fields, by default a comma. (C<,>). Limited to a
1940             single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
1941             (tilde). When longer sequences are required, use L<C<sep>|/sep>.
1942              
1943             The separation character can not be equal to the quote character or to the
1944             escape character.
1945              
1946             See also L</CAVEATS>
1947              
1948             =head3 sep
1949             X<sep>
1950              
1951             my $csv = Text::CSV_XS->new ({ sep => "\N{FULLWIDTH COMMA}" });
1952             $csv->sep (";");
1953             my $sep = $csv->sep;
1954              
1955             The chars used to separate fields, by default undefined. Limited to 8 bytes.
1956              
1957             When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it
1958             acts as an alias to L<C<sep_char>|/sep_char>.
1959              
1960             See also L</CAVEATS>
1961              
1962             =head3 quote_char
1963             X<quote_char>
1964              
1965             my $csv = Text::CSV_XS->new ({ quote_char => "'" });
1966             $csv->quote_char (undef);
1967             my $c = $csv->quote_char;
1968              
1969             The character to quote fields containing blanks or binary data, by default
1970             the double quote character (C<">). A value of undef suppresses quote chars
1971             (for simple cases only). Limited to a single-byte character, usually in the
1972             range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
1973             required, use L<C<quote>|/quote>.
1974              
1975             C<quote_char> can not be equal to L<C<sep_char>|/sep_char>.
1976              
1977             =head3 quote
1978             X<quote>
1979              
1980             my $csv = Text::CSV_XS->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
1981             $csv->quote ("'");
1982             my $quote = $csv->quote;
1983              
1984             The chars used to quote fields, by default undefined. Limited to 8 bytes.
1985              
1986             When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte
1987             it acts as an alias to L<C<quote_char>|/quote_char>.
1988              
1989             This method does not support C<undef>. Use L<C<quote_char>|/quote_char> to
1990             disable quotation.
1991              
1992             See also L</CAVEATS>
1993              
1994             =head3 escape_char
1995             X<escape_char>
1996              
1997             my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
1998             $csv->escape_char (":");
1999             my $c = $csv->escape_char;
2000              
2001             The character to escape certain characters inside quoted fields. This is
2002             limited to a single-byte character, usually in the range from C<0x20>
2003             (space) to C<0x7E> (tilde).
2004              
2005             The C<escape_char> defaults to being the double-quote mark (C<">). In other
2006             words the same as the default L<C<quote_char>|/quote_char>. This means that
2007             doubling the quote mark in a field escapes it:
2008              
2009             "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
2010              
2011             If you change the L<C<quote_char>|/quote_char> without changing the
2012             C<escape_char>, the C<escape_char> will still be the double-quote (C<">).
2013             If instead you want to escape the L<C<quote_char>|/quote_char> by doubling
2014             it you will need to also change the C<escape_char> to be the same as what
2015             you have changed the L<C<quote_char>|/quote_char> to.
2016              
2017             Setting C<escape_char> to C<undef> or C<""> will completely disable escapes
2018             and is greatly discouraged. This will also disable C<escape_null>.
2019              
2020             The escape character can not be equal to the separation character.
2021              
2022             =head3 binary
2023             X<binary>
2024              
2025             my $csv = Text::CSV_XS->new ({ binary => 1 });
2026             $csv->binary (0);
2027             my $f = $csv->binary;
2028              
2029             If this attribute is C<1>, you may use binary characters in quoted fields,
2030             including line feeds, carriage returns and C<NULL> bytes. (The latter could
2031             be escaped as C<"0>.) By default this feature is off.
2032              
2033             If a string is marked UTF8, C<binary> will be turned on automatically when
2034             binary characters other than C<CR> and C<NL> are encountered. Note that a
2035             simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
2036             so setting C<< { binary => 1 } >> is still a wise option.
2037              
2038             =head3 strict
2039             X<strict>
2040              
2041             my $csv = Text::CSV_XS->new ({ strict => 1 });
2042             $csv->strict (0);
2043             my $f = $csv->strict;
2044              
2045             If this attribute is set to C<1>, any row that parses to a different number
2046             of fields than the previous row will cause the parser to throw error 2014.
2047              
2048             Empty rows or rows that result in no fields (like comment lines) are exempt
2049             from these checks.
2050              
2051             =head3 strict_eol
2052             X<strict_eol>
2053              
2054             my $csv = Text::CSV_XS->new ({ strict_eol => 1 });
2055             $csv->strict_eol (0);
2056             my $f = $csv->strict_eol;
2057              
2058             If this attribute is set to C<0>, no EOL consistency checks are done.
2059              
2060             If this attribute is set to C<1>, any row that parses with a EOL other than
2061             the EOL from the first row will cause a warning. The error will be ignored
2062             and parsing continues. This warning is only thrown once. Note that in data
2063             with various different line endings, C<\r\r> will still throw an error that
2064             cannot be ignored.
2065              
2066             If this attribute is set to C<2> or higher, any row that parses with a EOL
2067             other than the EOL from the first row will cause error C<2016> to be thrown.
2068             The line being parsed to this error might not be stored in the result.
2069              
2070             =head3 skip_empty_rows
2071             X<skip_empty_rows>
2072              
2073             my $csv = Text::CSV_XS->new ({ skip_empty_rows => 1 });
2074             $csv->skip_empty_rows ("eof");
2075             my $f = $csv->skip_empty_rows;
2076              
2077             This attribute defines the behavior for empty rows: an L</eol> immediately
2078             following the start of line. Default behavior is to return one single empty
2079             field.
2080              
2081             This attribute is only used in parsing. This attribute is ineffective when
2082             using L</parse> and L</fields>.
2083              
2084             Possible values for this attribute are
2085              
2086             =over 2
2087              
2088             =item 0 | undef
2089              
2090             my $csv = Text::CSV_XS->new ({ skip_empty_rows => 0 });
2091             $csv->skip_empty_rows (undef);
2092              
2093             No special action is taken. The result will be one single empty field.
2094              
2095             =item 1 | "skip"
2096              
2097             my $csv = Text::CSV_XS->new ({ skip_empty_rows => 1 });
2098             $csv->skip_empty_rows ("skip");
2099              
2100             The row will be skipped.
2101              
2102             =item 2 | "eof" | "stop"
2103              
2104             my $csv = Text::CSV_XS->new ({ skip_empty_rows => 2 });
2105             $csv->skip_empty_rows ("eof");
2106              
2107             The parsing will stop as if an L</eof> was detected.
2108              
2109             =item 3 | "die"
2110              
2111             my $csv = Text::CSV_XS->new ({ skip_empty_rows => 3 });
2112             $csv->skip_empty_rows ("die");
2113              
2114             The parsing will stop. The internal error code will be set to 2015 and the
2115             parser will C<die>.
2116              
2117             =item 4 | "croak"
2118              
2119             my $csv = Text::CSV_XS->new ({ skip_empty_rows => 4 });
2120             $csv->skip_empty_rows ("croak");
2121              
2122             The parsing will stop. The internal error code will be set to 2015 and the
2123             parser will C<croak>.
2124              
2125             =item 5 | "error"
2126              
2127             my $csv = Text::CSV_XS->new ({ skip_empty_rows => 5 });
2128             $csv->skip_empty_rows ("error");
2129              
2130             The parsing will fail. The internal error code will be set to 2015.
2131              
2132             =item callback
2133              
2134             my $csv = Text::CSV_XS->new ({ skip_empty_rows => sub { [] } });
2135             $csv->skip_empty_rows (sub { [ 42, $., undef, "empty" ] });
2136              
2137             The callback is invoked and its result used instead. If you want the parse
2138             to stop after the callback, make sure to return a false value.
2139              
2140             The returned value from the callback should be an array-ref. Any other type
2141             will cause the parse to stop, so these are equivalent in behavior:
2142              
2143             csv (in => $fh, skip_empty_rows => "stop");
2144             csv (in => $fh. skip_empty_rows => sub { 0; });
2145              
2146             =back
2147              
2148             Without arguments, the current value is returned: C<0>, C<1>, C<eof>, C<die>,
2149             C<croak> or the callback.
2150              
2151             =head3 formula_handling
2152             X<formula_handling>
2153              
2154             Alias for L</formula>
2155              
2156             =head3 formula
2157             X<formula>
2158              
2159             my $csv = Text::CSV_XS->new ({ formula => "none" });
2160             $csv->formula ("none");
2161             my $f = $csv->formula;
2162              
2163             This defines the behavior of fields containing I<formulas>. As formulas are
2164             considered dangerous in spreadsheets, this attribute can define an optional
2165             action to be taken if a field starts with an equal sign (C<=>).
2166              
2167             For purpose of code-readability, this can also be written as
2168              
2169             my $csv = Text::CSV_XS->new ({ formula_handling => "none" });
2170             $csv->formula_handling ("none");
2171             my $f = $csv->formula_handling;
2172              
2173             Possible values for this attribute are
2174              
2175             =over 2
2176              
2177             =item none
2178              
2179             Take no specific action. This is the default.
2180              
2181             $csv->formula ("none");
2182              
2183             =item die
2184              
2185             Cause the process to C<die> whenever a leading C<=> is encountered.
2186              
2187             $csv->formula ("die");
2188              
2189             =item croak
2190              
2191             Cause the process to C<croak> whenever a leading C<=> is encountered. (See
2192             L<Carp>)
2193              
2194             $csv->formula ("croak");
2195              
2196             =item diag
2197              
2198             Report position and content of the field whenever a leading C<=> is found.
2199             The value of the field is unchanged.
2200              
2201             $csv->formula ("diag");
2202              
2203             =item empty
2204              
2205             Replace the content of fields that start with a C<=> with the empty string.
2206              
2207             $csv->formula ("empty");
2208             $csv->formula ("");
2209              
2210             =item undef
2211              
2212             Replace the content of fields that start with a C<=> with C<undef>.
2213              
2214             $csv->formula ("undef");
2215             $csv->formula (undef);
2216              
2217             =item a callback
2218              
2219             Modify the content of fields that start with a C<=> with the return-value
2220             of the callback. The original content of the field is available inside the
2221             callback as C<$_>;
2222              
2223             # Replace all formula's with 42
2224             $csv->formula (sub { 42; });
2225              
2226             # same as $csv->formula ("empty") but slower
2227             $csv->formula (sub { "" });
2228              
2229             # Allow =4+12
2230             $csv->formula (sub { s/^=(\d+\+\d+)$/$1/eer });
2231              
2232             # Allow more complex calculations
2233             $csv->formula (sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ });
2234              
2235             =back
2236              
2237             All other values will give a warning and then fallback to C<diag>.
2238              
2239             =head3 decode_utf8
2240             X<decode_utf8>
2241              
2242             my $csv = Text::CSV_XS->new ({ decode_utf8 => 1 });
2243             $csv->decode_utf8 (0);
2244             my $f = $csv->decode_utf8;
2245              
2246             This attributes defaults to TRUE.
2247              
2248             While I<parsing>, fields that are valid UTF-8, are automatically set to be
2249             UTF-8, so that
2250              
2251             $csv->parse ("\xC4\xA8\n");
2252              
2253             results in
2254              
2255             PV("\304\250"\0) [UTF8 "\x{128}"]
2256              
2257             Sometimes it might not be a desired action. To prevent those upgrades, set
2258             this attribute to false, and the result will be
2259              
2260             PV("\304\250"\0)
2261              
2262             =head3 auto_diag
2263             X<auto_diag>
2264              
2265             my $csv = Text::CSV_XS->new ({ auto_diag => 1 });
2266             $csv->auto_diag (2);
2267             my $l = $csv->auto_diag;
2268              
2269             Set this attribute to a number between C<1> and C<9> causes L</error_diag>
2270             to be automatically called in void context upon errors.
2271              
2272             In case of error C<2012 - EOF>, this call will be void.
2273              
2274             If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die>
2275             on errors instead of C<warn>. If set to anything unrecognized, it will be
2276             silently ignored.
2277              
2278             Future extensions to this feature will include more reliable auto-detection
2279             of C<autodie> being active in the scope of which the error occurred which
2280             will increment the value of C<auto_diag> with C<1> the moment the error is
2281             detected.
2282              
2283             =head3 diag_verbose
2284             X<diag_verbose>
2285              
2286             my $csv = Text::CSV_XS->new ({ diag_verbose => 1 });
2287             $csv->diag_verbose (2);
2288             my $l = $csv->diag_verbose;
2289              
2290             Set the verbosity of the output triggered by C<auto_diag>. Currently only
2291             adds the current input-record-number (if known) to the diagnostic output
2292             with an indication of the position of the error.
2293              
2294             =head3 blank_is_undef
2295             X<blank_is_undef>
2296              
2297             my $csv = Text::CSV_XS->new ({ blank_is_undef => 1 });
2298             $csv->blank_is_undef (0);
2299             my $f = $csv->blank_is_undef;
2300              
2301             Under normal circumstances, C<CSV> data makes no distinction between quoted-
2302             and unquoted empty fields. These both end up in an empty string field once
2303             read, thus
2304              
2305             1,"",," ",2
2306              
2307             is read as
2308              
2309             ("1", "", "", " ", "2")
2310              
2311             When I<writing> C<CSV> files with either L<C<always_quote>|/always_quote>
2312             or L<C<quote_empty>|/quote_empty> set, the unquoted I<empty> field is the
2313             result of an undefined value. To enable this distinction when I<reading>
2314             C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty
2315             fields to be set to C<undef>, causing the above to be parsed as
2316              
2317             ("1", "", undef, " ", "2")
2318              
2319             Note that this is specifically important when loading C<CSV> fields into a
2320             database that allows C<NULL> values, as the perl equivalent for C<NULL> is
2321             C<undef> in L<DBI> land.
2322              
2323             =head3 empty_is_undef
2324             X<empty_is_undef>
2325              
2326             my $csv = Text::CSV_XS->new ({ empty_is_undef => 1 });
2327             $csv->empty_is_undef (0);
2328             my $f = $csv->empty_is_undef;
2329              
2330             Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this
2331             attribute converts all empty fields to C<undef>, so
2332              
2333             1,"",," ",2
2334              
2335             is read as
2336              
2337             (1, undef, undef, " ", 2)
2338              
2339             Note that this affects only fields that are originally empty, not fields
2340             that are empty after stripping allowed whitespace. YMMV.
2341              
2342             =head3 allow_whitespace
2343             X<allow_whitespace>
2344              
2345             my $csv = Text::CSV_XS->new ({ allow_whitespace => 1 });
2346             $csv->allow_whitespace (0);
2347             my $f = $csv->allow_whitespace;
2348              
2349             When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s)
2350             surrounding the separation character is removed when parsing. If either
2351             C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>,
2352             L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not
2353             be considered whitespace.
2354              
2355             Now lines like:
2356              
2357             1 , "foo" , bar , 3 , zapp
2358              
2359             are parsed as valid C<CSV>, even though it violates the C<CSV> specs.
2360              
2361             Note that B<all> whitespace is stripped from both start and end of each
2362             field. That would make it I<more> than a I<feature> to enable parsing bad
2363             C<CSV> lines, as
2364              
2365             1, 2.0, 3, ape , monkey
2366              
2367             will now be parsed as
2368              
2369             ("1", "2.0", "3", "ape", "monkey")
2370              
2371             even if the original line was perfectly acceptable C<CSV>.
2372              
2373             =head3 allow_loose_quotes
2374             X<allow_loose_quotes>
2375              
2376             my $csv = Text::CSV_XS->new ({ allow_loose_quotes => 1 });
2377             $csv->allow_loose_quotes (0);
2378             my $f = $csv->allow_loose_quotes;
2379              
2380             By default, parsing unquoted fields containing L<C<quote_char>|/quote_char>
2381             characters like
2382              
2383             1,foo "bar" baz,42
2384              
2385             would result in parse error 2034. Though it is still bad practice to allow
2386             this format, we cannot help the fact that some vendors make their
2387             applications spit out lines styled this way.
2388              
2389             If there is B<really> bad C<CSV> data, like
2390              
2391             1,"foo "bar" baz",42
2392              
2393             or
2394              
2395             1,""foo bar baz"",42
2396              
2397             there is a way to get this data-line parsed and leave the quotes inside the
2398             quoted field as-is. This can be achieved by setting C<allow_loose_quotes>
2399             B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal
2400             to L<C<quote_char>|/quote_char>.
2401              
2402             =head3 allow_loose_escapes
2403             X<allow_loose_escapes>
2404              
2405             my $csv = Text::CSV_XS->new ({ allow_loose_escapes => 1 });
2406             $csv->allow_loose_escapes (0);
2407             my $f = $csv->allow_loose_escapes;
2408              
2409             Parsing fields that have L<C<escape_char>|/escape_char> characters that
2410             escape characters that do not need to be escaped, like:
2411              
2412             my $csv = Text::CSV_XS->new ({ escape_char => "\\" });
2413             $csv->parse (qq{1,"my bar\'s",baz,42});
2414              
2415             would result in parse error 2025. Though it is bad practice to allow this
2416             format, this attribute enables you to treat all escape character sequences
2417             equal.
2418              
2419             =head3 allow_unquoted_escape
2420             X<allow_unquoted_escape>
2421              
2422             my $csv = Text::CSV_XS->new ({ allow_unquoted_escape => 1 });
2423             $csv->allow_unquoted_escape (0);
2424             my $f = $csv->allow_unquoted_escape;
2425              
2426             A backward compatibility issue where L<C<escape_char>|/escape_char> differs
2427             from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char>
2428             to be in the first position of a field. If L<C<quote_char>|/quote_char> is
2429             equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>,
2430             this would be illegal:
2431              
2432             1,\0,2
2433              
2434             Setting this attribute to C<1> might help to overcome issues with backward
2435             compatibility and allow this style.
2436              
2437             =head3 always_quote
2438             X<always_quote>
2439              
2440             my $csv = Text::CSV_XS->new ({ always_quote => 1 });
2441             $csv->always_quote (0);
2442             my $f = $csv->always_quote;
2443              
2444             By default the generated fields are quoted only if they I<need> to be. For
2445             example, if they contain the separator character. If you set this attribute
2446             to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not
2447             quoted, see L</blank_is_undef>). This makes it quite often easier to handle
2448             exported data in external applications. (Poor creatures who are better to
2449             use Text::CSV_XS. :)
2450              
2451             =head3 quote_space
2452             X<quote_space>
2453              
2454             my $csv = Text::CSV_XS->new ({ quote_space => 1 });
2455             $csv->quote_space (0);
2456             my $f = $csv->quote_space;
2457              
2458             By default, a space in a field would trigger quotation. As no rule exists
2459             this to be forced in C<CSV>, nor any for the opposite, the default is true
2460             for safety. You can exclude the space from this trigger by setting this
2461             attribute to 0.
2462              
2463             =head3 quote_empty
2464             X<quote_empty>
2465              
2466             my $csv = Text::CSV_XS->new ({ quote_empty => 1 });
2467             $csv->quote_empty (0);
2468             my $f = $csv->quote_empty;
2469              
2470             By default the generated fields are quoted only if they I<need> to be. An
2471             empty (defined) field does not need quotation. If you set this attribute to
2472             C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not
2473             quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>.
2474              
2475             =head3 quote_binary
2476             X<quote_binary>
2477              
2478             my $csv = Text::CSV_XS->new ({ quote_binary => 1 });
2479             $csv->quote_binary (0);
2480             my $f = $csv->quote_binary;
2481              
2482             By default, all "unsafe" bytes inside a string cause the combined field to
2483             be quoted. By setting this attribute to C<0>, you can disable that trigger
2484             for bytes C<< >= 0x7F >>.
2485              
2486             =head3 escape_null
2487             X<escape_null>
2488             X<quote_null>
2489              
2490             my $csv = Text::CSV_XS->new ({ escape_null => 1 });
2491             $csv->escape_null (0);
2492             my $f = $csv->escape_null;
2493              
2494             By default, a C<NULL> byte in a field would be escaped. This option enables
2495             you to treat the C<NULL> byte as a simple binary character in binary mode
2496             (the C<< { binary => 1 } >> is set). The default is true. You can prevent
2497             C<NULL> escapes by setting this attribute to C<0>.
2498              
2499             When the C<escape_char> attribute is set to undefined, this attribute will
2500             be set to false.
2501              
2502             The default setting will encode "=\x00=" as
2503              
2504             "="0="
2505              
2506             With C<escape_null> set, this will result in
2507              
2508             "=\x00="
2509              
2510             The default when using the C<csv> function is C<false>.
2511              
2512             For backward compatibility reasons, the deprecated old name C<quote_null>
2513             is still recognized.
2514              
2515             =head3 keep_meta_info
2516             X<keep_meta_info>
2517              
2518             my $csv = Text::CSV_XS->new ({ keep_meta_info => 1 });
2519             $csv->keep_meta_info (0);
2520             my $f = $csv->keep_meta_info;
2521              
2522             By default, the parsing of input records is as simple and fast as possible.
2523             However, some parsing information - like quotation of the original field -
2524             is lost in that process. Setting this flag to true enables retrieving that
2525             information after parsing with the methods L</meta_info>, L</is_quoted>,
2526             and L</is_binary> described below. Default is false for performance.
2527              
2528             If you set this attribute to a value greater than 9, then you can control
2529             output quotation style like it was used in the input of the the last parsed
2530             record (unless quotation was added because of other reasons).
2531              
2532             my $csv = Text::CSV_XS->new ({
2533             binary => 1,
2534             keep_meta_info => 1,
2535             quote_space => 0,
2536             });
2537              
2538             my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
2539              
2540             $csv->print (*STDOUT, \@row);
2541             # 1,,, , ,f,g,"h""h",help,help
2542             $csv->keep_meta_info (11);
2543             $csv->print (*STDOUT, \@row);
2544             # 1,,"", ," ",f,"g","h""h",help,"help"
2545              
2546             =head3 undef_str
2547             X<undef_str>
2548              
2549             my $csv = Text::CSV_XS->new ({ undef_str => "\\N" });
2550             $csv->undef_str (undef);
2551             my $s = $csv->undef_str;
2552              
2553             This attribute optionally defines the output of undefined fields. The value
2554             passed is not changed at all, so if it needs quotation, the quotation needs
2555             to be included in the value of the attribute. Use with caution, as passing
2556             a value like C<",",,,,"""> will for sure mess up your output. The default
2557             for this attribute is C<undef>, meaning no special treatment.
2558              
2559             This attribute is useful when exporting CSV data to be imported in custom
2560             loaders, like for MySQL, that recognize special sequences for C<NULL> data.
2561              
2562             This attribute has no meaning when parsing CSV data.
2563              
2564             =head3 comment_str
2565             X<comment_str>
2566              
2567             my $csv = Text::CSV_XS->new ({ comment_str => "#" });
2568             $csv->comment_str (undef);
2569             my $s = $csv->comment_str;
2570              
2571             This attribute optionally defines a string to be recognized as comment. If
2572             this attribute is defined, all lines starting with this sequence will not
2573             be parsed as CSV but skipped as comment.
2574              
2575             This attribute has no meaning when generating CSV.
2576              
2577             Comment strings that start with any of the special characters/sequences are
2578             not supported (so it cannot start with any of L</sep_char>, L</quote_char>,
2579             L</escape_char>, L</sep>, L</quote>, or L</eol>).
2580              
2581             For convenience, C<comment> is an alias for C<comment_str>.
2582              
2583             =head3 verbatim
2584             X<verbatim>
2585              
2586             my $csv = Text::CSV_XS->new ({ verbatim => 1 });
2587             $csv->verbatim (0);
2588             my $f = $csv->verbatim;
2589              
2590             This is a quite controversial attribute to set, but makes some hard things
2591             possible.
2592              
2593             The rationale behind this attribute is to tell the parser that the normally
2594             special characters newline (C<NL>) and Carriage Return (C<CR>) will not be
2595             special when this flag is set, and be dealt with as being ordinary binary
2596             characters. This will ease working with data with embedded newlines.
2597              
2598             When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s
2599             every line.
2600              
2601             Imagine a file format like
2602              
2603             M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
2604              
2605             where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
2606             C<^> (caret). None of the fields is quoted, but embedded binary data is
2607             likely to be present. With the specific line ending, this should not be too
2608             hard to detect.
2609              
2610             By default, Text::CSV_XS' parse function is instructed to only know about
2611             C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
2612             embedded newline as a real C<end-of-line>, so it can scan the next line if
2613             binary is true, and the newline is inside a quoted field. With this option,
2614             we tell L</parse> to parse the line as if C<"\n"> is just nothing more than
2615             a binary character.
2616              
2617             For L</parse> this means that the parser has no more idea about line ending
2618             and L</getline> C<chomp>s line endings on reading.
2619              
2620             =head3 types
2621              
2622             A set of column types; the attribute is immediately passed to the L</types>
2623             method.
2624              
2625             =head3 callbacks
2626             X<callbacks>
2627              
2628             See the L</Callbacks> section below.
2629              
2630             =head3 accessors
2631              
2632             To sum it up,
2633              
2634             $csv = Text::CSV_XS->new ();
2635              
2636             is equivalent to
2637              
2638             $csv = Text::CSV_XS->new ({
2639             eol => undef, # \r, \n, or \r\n
2640             sep_char => ',',
2641             sep => undef,
2642             quote_char => '"',
2643             quote => undef,
2644             escape_char => '"',
2645             binary => 0,
2646             decode_utf8 => 1,
2647             auto_diag => 0,
2648             diag_verbose => 0,
2649             blank_is_undef => 0,
2650             empty_is_undef => 0,
2651             allow_whitespace => 0,
2652             allow_loose_quotes => 0,
2653             allow_loose_escapes => 0,
2654             allow_unquoted_escape => 0,
2655             always_quote => 0,
2656             quote_empty => 0,
2657             quote_space => 1,
2658             escape_null => 1,
2659             quote_binary => 1,
2660             keep_meta_info => 0,
2661             strict => 0,
2662             skip_empty_rows => 0,
2663             formula => 0,
2664             verbatim => 0,
2665             undef_str => undef,
2666             comment_str => undef,
2667             types => undef,
2668             callbacks => undef,
2669             });
2670              
2671             For all of the above mentioned flags, an accessor method is available where
2672             you can inquire the current value, or change the value
2673              
2674             my $quote = $csv->quote_char;
2675             $csv->binary (1);
2676              
2677             It is not wise to change these settings halfway through writing C<CSV> data
2678             to a stream. If however you want to create a new stream using the available
2679             C<CSV> object, there is no harm in changing them.
2680              
2681             If the L</new> constructor call fails, it returns C<undef>, and makes the
2682             fail reason available through the L</error_diag> method.
2683              
2684             $csv = Text::CSV_XS->new ({ ecs_char => 1 }) or
2685             die "".Text::CSV_XS->error_diag ();
2686              
2687             L</error_diag> will return a string like
2688              
2689             "INI - Unknown attribute 'ecs_char'"
2690              
2691             =head2 known_attributes
2692             X<known_attributes>
2693              
2694             @attr = Text::CSV_XS->known_attributes;
2695             @attr = Text::CSV_XS::known_attributes;
2696             @attr = $csv->known_attributes;
2697              
2698             This method will return an ordered list of all the supported attributes as
2699             described above. This can be useful for knowing what attributes are valid
2700             in classes that use or extend Text::CSV_XS.
2701              
2702             =head2 print
2703             X<print>
2704              
2705             $status = $csv->print ($fh, $colref);
2706              
2707             Similar to L</combine> + L</string> + L</print>, but much more efficient.
2708             It expects an array ref as input (not an array!) and the resulting string
2709             is not really created, but immediately written to the C<$fh> object,
2710             typically an IO handle or any other object that offers a L</print> method.
2711              
2712             For performance reasons C<print> does not create a result string, so all
2713             L</string>, L</status>, L</fields>, and L</error_input> methods will return
2714             undefined information after executing this method.
2715              
2716             If C<$colref> is C<undef> (explicit, not through a variable argument) and
2717             L</bind_columns> was used to specify fields to be printed, it is possible
2718             to make performance improvements, as otherwise data would have to be copied
2719             as arguments to the method call:
2720              
2721             $csv->bind_columns (\($foo, $bar));
2722             $status = $csv->print ($fh, undef);
2723              
2724             A short benchmark
2725              
2726             my @data = ("aa" .. "zz");
2727             $csv->bind_columns (\(@data));
2728              
2729             $csv->print ($fh, [ @data ]); # 11800 recs/sec
2730             $csv->print ($fh, \@data ); # 57600 recs/sec
2731             $csv->print ($fh, undef ); # 48500 recs/sec
2732              
2733             =head2 say
2734             X<say>
2735              
2736             $status = $csv->say ($fh, $colref);
2737              
2738             Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>.
2739              
2740             =head2 print_hr
2741             X<print_hr>
2742              
2743             $csv->print_hr ($fh, $ref);
2744              
2745             Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>)
2746             provided the column names are set with L</column_names>.
2747              
2748             It is just a wrapper method with basic parameter checks over
2749              
2750             $csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
2751              
2752             =head2 combine
2753             X<combine>
2754              
2755             $status = $csv->combine (@fields);
2756              
2757             This method constructs a C<CSV> record from C<@fields>, returning success
2758             or failure. Failure can result from lack of arguments or an argument that
2759             contains an invalid character. Upon success, L</string> can be called to
2760             retrieve the resultant C<CSV> string. Upon failure, the value returned by
2761             L</string> is undefined and L</error_input> could be called to retrieve the
2762             invalid argument.
2763              
2764             =head2 string
2765             X<string>
2766              
2767             $line = $csv->string ();
2768              
2769             This method returns the input to L</parse> or the resultant C<CSV> string
2770             of L</combine>, whichever was called more recently.
2771              
2772             =head2 getline
2773             X<getline>
2774              
2775             $colref = $csv->getline ($fh);
2776              
2777             This is the counterpart to L</print>, as L</parse> is the counterpart to
2778             L</combine>: it parses a row from the C<$fh> handle using the L</getline>
2779             method associated with C<$fh> and parses this row into an array ref. This
2780             array ref is returned by the function or C<undef> for failure. When C<$fh>
2781             does not support C<getline>, you are likely to hit errors.
2782              
2783             When fields are bound with L</bind_columns> the return value is a reference
2784             to an empty list.
2785              
2786             The L</string>, L</fields>, and L</status> methods are meaningless again.
2787              
2788             =head2 getline_all
2789             X<getline_all>
2790              
2791             $arrayref = $csv->getline_all ($fh);
2792             $arrayref = $csv->getline_all ($fh, $offset);
2793             $arrayref = $csv->getline_all ($fh, $offset, $length);
2794              
2795             This will return a reference to a list of L<getline ($fh)|/getline> results.
2796             In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as
2797             with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken
2798             into consideration. Parameters C<$offset> and C<$length> are expected to be
2799             integers. Non-integer values are interpreted as integer without check.
2800              
2801             Given a CSV file with 10 lines:
2802              
2803             lines call
2804             ----- ---------------------------------------------------------
2805             0..9 $csv->getline_all ($fh) # all
2806             0..9 $csv->getline_all ($fh, 0) # all
2807             8..9 $csv->getline_all ($fh, 8) # start at 8
2808             - $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows
2809             0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows
2810             4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows
2811             8..9 $csv->getline_all ($fh, -2) # last 2 rows
2812             6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows
2813              
2814             =head2 getline_hr
2815             X<getline_hr>
2816              
2817             The L</getline_hr> and L</column_names> methods work together to allow you
2818             to have rows returned as hashrefs. You must call L</column_names> first to
2819             declare your column names.
2820              
2821             $csv->column_names (qw( code name price description ));
2822             $hr = $csv->getline_hr ($fh);
2823             print "Price for $hr->{name} is $hr->{price} EUR\n";
2824              
2825             L</getline_hr> will croak if called before L</column_names>.
2826              
2827             Note that L</getline_hr> creates a hashref for every row and will be much
2828             slower than the combined use of L</bind_columns> and L</getline> but still
2829             offering the same easy to use hashref inside the loop:
2830              
2831             my @cols = @{$csv->getline ($fh)};
2832             $csv->column_names (@cols);
2833             while (my $row = $csv->getline_hr ($fh)) {
2834             print $row->{price};
2835             }
2836              
2837             Could easily be rewritten to the much faster:
2838              
2839             my @cols = @{$csv->getline ($fh)};
2840             my $row = {};
2841             $csv->bind_columns (\@{$row}{@cols});
2842             while ($csv->getline ($fh)) {
2843             print $row->{price};
2844             }
2845              
2846             Your mileage may vary for the size of the data and the number of rows. With
2847             perl-5.14.2 the comparison for a 100_000 line file with 14 columns:
2848              
2849             Rate hashrefs getlines
2850             hashrefs 1.00/s -- -76%
2851             getlines 4.15/s 313% --
2852              
2853             =head2 getline_hr_all
2854             X<getline_hr_all>
2855              
2856             $arrayref = $csv->getline_hr_all ($fh);
2857             $arrayref = $csv->getline_hr_all ($fh, $offset);
2858             $arrayref = $csv->getline_hr_all ($fh, $offset, $length);
2859              
2860             This will return a reference to a list of L<getline_hr ($fh)|/getline_hr>
2861             results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled.
2862              
2863             =head2 parse
2864             X<parse>
2865              
2866             $status = $csv->parse ($line);
2867              
2868             This method decomposes a C<CSV> string into fields, returning success or
2869             failure. Failure can result from a lack of argument or the given C<CSV>
2870             string is improperly formatted. Upon success, L</fields> can be called to
2871             retrieve the decomposed fields. Upon failure calling L</fields> will return
2872             undefined data and L</error_input> can be called to retrieve the invalid
2873             argument.
2874              
2875             You may use the L</types> method for setting column types. See L</types>'
2876             description below.
2877              
2878             The C<$line> argument is supposed to be a simple scalar. Everything else is
2879             supposed to croak and set error 1500.
2880              
2881             =head2 fragment
2882             X<fragment>
2883              
2884             This function tries to implement RFC7111 (URI Fragment Identifiers for the
2885             text/csv Media Type) - https://datatracker.ietf.org/doc/html/rfc7111
2886              
2887             my $AoA = $csv->fragment ($fh, $spec);
2888              
2889             In specifications, C<*> is used to specify the I<last> item, a dash (C<->)
2890             to indicate a range. All indices are C<1>-based: the first row or column
2891             has index C<1>. Selections can be combined with the semi-colon (C<;>).
2892              
2893             When using this method in combination with L</column_names>, the returned
2894             reference will point to a list of hashes instead of a list of lists. A
2895             disjointed cell-based combined selection might return rows with different
2896             number of columns making the use of hashes unpredictable.
2897              
2898             $csv->column_names ("Name", "Age");
2899             my $AoH = $csv->fragment ($fh, "col=3;8");
2900              
2901             If the L</after_parse> callback is active, it is also called on every line
2902             parsed and skipped before the fragment.
2903              
2904             =over 2
2905              
2906             =item row
2907              
2908             row=4
2909             row=5-7
2910             row=6-*
2911             row=1-2;4;6-*
2912              
2913             =item col
2914              
2915             col=2
2916             col=1-3
2917             col=4-*
2918             col=1-2;4;7-*
2919              
2920             =item cell
2921              
2922             In cell-based selection, the comma (C<,>) is used to pair row and column
2923              
2924             cell=4,1
2925              
2926             The range operator (C<->) using C<cell>s can be used to define top-left and
2927             bottom-right C<cell> location
2928              
2929             cell=3,1-4,6
2930              
2931             The C<*> is only allowed in the second part of a pair
2932              
2933             cell=3,2-*,2 # row 3 till end, only column 2
2934             cell=3,2-3,* # column 2 till end, only row 3
2935             cell=3,2-*,* # strip row 1 and 2, and column 1
2936              
2937             Cells and cell ranges may be combined with C<;>, possibly resulting in rows
2938             with different numbers of columns
2939              
2940             cell=1,1-2,2;3,3-4,4;1,4;4,1
2941              
2942             Disjointed selections will only return selected cells. The cells that are
2943             not specified will not be included in the returned set, not even as
2944             C<undef>. As an example given a C<CSV> like
2945              
2946             11,12,13,...19
2947             21,22,...28,29
2948             : :
2949             91,...97,98,99
2950              
2951             with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return:
2952              
2953             11,12,14
2954             21,22
2955             33,34
2956             41,43,44
2957              
2958             Overlapping cell-specs will return those cells only once, So
2959             C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return:
2960              
2961             11,12,13
2962             21,22,23,24
2963             31,32,33,34
2964             42,43,44
2965              
2966             =back
2967              
2968             L<RFC7111|https://datatracker.ietf.org/doc/html/rfc7111> does B<not> allow different
2969             types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>).
2970             Passing an invalid fragment specification will croak and set error 2013.
2971              
2972             =head2 column_names
2973             X<column_names>
2974              
2975             Set the "keys" that will be used in the L</getline_hr> calls. If no keys
2976             (column names) are passed, it will return the current setting as a list.
2977              
2978             L</column_names> accepts a list of scalars (the column names) or a single
2979             array_ref, so you can pass the return value from L</getline> too:
2980              
2981             $csv->column_names ($csv->getline ($fh));
2982              
2983             L</column_names> does B<no> checking on duplicates at all, which might lead
2984             to unexpected results. Undefined entries will be replaced with the string
2985             C<"\cAUNDEF\cA">, so
2986              
2987             $csv->column_names (undef, "", "name", "name");
2988             $hr = $csv->getline_hr ($fh);
2989              
2990             will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
2991             the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
2992             field.
2993              
2994             L</column_names> croaks on invalid arguments.
2995              
2996             =head2 header
2997              
2998             This method does NOT work in perl-5.6.x
2999              
3000             Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding.
3001              
3002             my @hdr = $csv->header ($fh);
3003             $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
3004             $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
3005              
3006             The first argument should be a file handle.
3007              
3008             This method resets some object properties, as it is supposed to be invoked
3009             only once per file or stream. It will leave attributes C<column_names> and
3010             C<bound_columns> alone if setting column names is disabled. Reading headers
3011             on previously process objects might fail on perl-5.8.0 and older.
3012              
3013             Assuming that the file opened for parsing has a header, and the header does
3014             not contain problematic characters like embedded newlines, read the first
3015             line from the open handle then auto-detect whether the header separates the
3016             column names with a character from the allowed separator list.
3017              
3018             If any of the allowed separators matches, and none of the I<other> allowed
3019             separators match, set L<C<sep>|/sep> to that separator for the current
3020             CSV_XS instance and use it to parse the first line, map those to lowercase,
3021             and use that to set the instance L</column_names>:
3022              
3023             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
3024             open my $fh, "<", "file.csv";
3025             binmode $fh; # for Windows
3026             $csv->header ($fh);
3027             while (my $row = $csv->getline_hr ($fh)) {
3028             ...
3029             }
3030              
3031             If the header is empty, contains more than one unique separator out of the
3032             allowed set, contains empty fields, or contains identical fields (after
3033             folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
3034              
3035             If the header contains embedded newlines or is not valid CSV in any other
3036             way, this method will croak and leave the parse error untouched.
3037              
3038             A successful call to C<header> will always set the L<C<sep>|/sep> of the
3039             C<$csv> object. This behavior can not be disabled.
3040              
3041             =head3 return value
3042              
3043             On error this method will croak.
3044              
3045             In list context, the headers will be returned whether they are used to set
3046             L</column_names> or not.
3047              
3048             In scalar context, the instance itself is returned. B<Note>: the values as
3049             found in the header will effectively be B<lost> if C<set_column_names> is
3050             false.
3051              
3052             =head3 Options
3053              
3054             =over 2
3055              
3056             =item sep_set
3057             X<sep_set>
3058              
3059             $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
3060              
3061             The list of legal separators defaults to C<[ ";", "," ]> and can be changed
3062             by this option. As this is probably the most often used option, it can be
3063             passed on its own as an unnamed argument:
3064              
3065             $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
3066              
3067             Multi-byte sequences are allowed, both multi-character and Unicode. See
3068             L<C<sep>|/sep>.
3069              
3070             =item detect_bom
3071             X<detect_bom>
3072              
3073             $csv->header ($fh, { detect_bom => 1 });
3074              
3075             The default behavior is to detect if the header line starts with a BOM. If
3076             the header has a BOM, use that to set the encoding of C<$fh>. This default
3077             behavior can be disabled by passing a false value to C<detect_bom>.
3078              
3079             Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
3080             UTF-32LE. BOM also supports UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
3081             but L<Encode> does not (yet). UTF-7 is not supported.
3082              
3083             If a supported BOM was detected as start of the stream, it is stored in the
3084             object attribute C<ENCODING>.
3085              
3086             my $enc = $csv->{ENCODING};
3087              
3088             The encoding is used with C<binmode> on C<$fh>.
3089              
3090             If the handle was opened in a (correct) encoding, this method will B<not>
3091             alter the encoding, as it checks the leading B<bytes> of the first line. In
3092             case the stream starts with a decoded BOM (C<U+FEFF>), C<{ENCODING}> will be
3093             C<""> (empty) instead of the default C<undef>.
3094              
3095             =item munge_column_names
3096             X<munge_column_names>
3097              
3098             This option offers the means to modify the column names into something that
3099             is most useful to the application. The default is to map all column names
3100             to lower case.
3101              
3102             $csv->header ($fh, { munge_column_names => "lc" });
3103              
3104             The following values are available:
3105              
3106             lc - lower case
3107             uc - upper case
3108             db - valid DB field names
3109             none - do not change
3110             \%hash - supply a mapping
3111             \&cb - supply a callback
3112              
3113             =over 2
3114              
3115             =item Lower case
3116              
3117             $csv->header ($fh, { munge_column_names => "lc" });
3118              
3119             The header is changed to all lower-case
3120              
3121             $_ = lc;
3122              
3123             =item Upper case
3124              
3125             $csv->header ($fh, { munge_column_names => "uc" });
3126              
3127             The header is changed to all upper-case
3128              
3129             $_ = uc;
3130              
3131             =item Literal
3132              
3133             $csv->header ($fh, { munge_column_names => "none" });
3134              
3135             =item Hash
3136              
3137             $csv->header ($fh, { munge_column_names => { foo => "sombrero" });
3138              
3139             if a value does not exist, the original value is used unchanged
3140              
3141             =item Database
3142              
3143             $csv->header ($fh, { munge_column_names => "db" });
3144              
3145             =over 2
3146              
3147             =item -
3148              
3149             lower-case
3150              
3151             =item -
3152              
3153             all sequences of non-word characters are replaced with an underscore
3154              
3155             =item -
3156              
3157             all leading underscores are removed
3158              
3159             =back
3160              
3161             $_ = lc (s/\W+/_/gr =~ s/^_+//r);
3162              
3163             =item Callback
3164              
3165             $csv->header ($fh, { munge_column_names => sub { fc } });
3166             $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
3167             $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
3168              
3169             As this callback is called in a C<map>, you can use C<$_> directly.
3170              
3171             =back
3172              
3173             =item set_column_names
3174             X<set_column_names>
3175              
3176             $csv->header ($fh, { set_column_names => 1 });
3177              
3178             The default is to set the instances column names using L</column_names> if
3179             the method is successful, so subsequent calls to L</getline_hr> can return
3180             a hash. Disable setting the header can be forced by using a false value for
3181             this option.
3182              
3183             As described in L</return value> above, content is lost in scalar context.
3184              
3185             =back
3186              
3187             =head3 Validation
3188              
3189             When receiving CSV files from external sources, this method can be used to
3190             protect against changes in the layout by restricting to known headers (and
3191             typos in the header fields).
3192              
3193             my %known = (
3194             "record key" => "c_rec",
3195             "rec id" => "c_rec",
3196             "id_rec" => "c_rec",
3197             "kode" => "code",
3198             "code" => "code",
3199             "vaule" => "value",
3200             "value" => "value",
3201             );
3202             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
3203             open my $fh, "<", $source or die "$source: $!";
3204             $csv->header ($fh, { munge_column_names => sub {
3205             s/\s+$//;
3206             s/^\s+//;
3207             $known{lc $_} or die "Unknown column '$_' in $source";
3208             }});
3209             while (my $row = $csv->getline_hr ($fh)) {
3210             say join "\t", $row->{c_rec}, $row->{code}, $row->{value};
3211             }
3212              
3213             =head2 bind_columns
3214             X<bind_columns>
3215              
3216             Takes a list of scalar references to be used for output with L</print> or
3217             to store in the fields fetched by L</getline>. When you do not pass enough
3218             references to store the fetched fields in, L</getline> will fail with error
3219             C<3006>. If you pass more than there are fields to return, the content of
3220             the remaining references is left untouched. Under C<strict> the two should
3221             match, otherwise L</getline> will fail with error C<2014>.
3222              
3223             $csv->bind_columns (\$code, \$name, \$price, \$description);
3224             while ($csv->getline ($fh)) {
3225             print "The price of a $name is \x{20ac} $price\n";
3226             }
3227              
3228             To reset or clear all column binding, call L</bind_columns> with the single
3229             argument C<undef>. This will also clear column names.
3230              
3231             $csv->bind_columns (undef);
3232              
3233             If no arguments are passed at all, L</bind_columns> will return the list of
3234             current bindings or C<undef> if no binds are active.
3235              
3236             Note that in parsing with C<bind_columns>, the fields are set on the fly.
3237             That implies that if the third field of a row causes an error (or this row
3238             has just two fields where the previous row had more), the first two fields
3239             already have been assigned the values of the current row, while the rest of
3240             the fields will still hold the values of the previous row. If you want the
3241             parser to fail in these cases, use the L<C<strict>|/strict> attribute.
3242              
3243             =head2 eof
3244             X<eof>
3245              
3246             $eof = $csv->eof ();
3247              
3248             If L</parse> or L</getline> was used with an IO stream, this method will
3249             return true (1) if the last call hit end of file, otherwise it will return
3250             false (''). This is useful to see the difference between a failure and end
3251             of file.
3252              
3253             Note that if the parsing of the last line caused an error, C<eof> is still
3254             true. That means that if you are I<not> using L</auto_diag>, an idiom like
3255              
3256             while (my $row = $csv->getline ($fh)) {
3257             # ...
3258             }
3259             $csv->eof or $csv->error_diag;
3260              
3261             will I<not> report the error. You would have to change that to
3262              
3263             while (my $row = $csv->getline ($fh)) {
3264             # ...
3265             }
3266             +$csv->error_diag and $csv->error_diag;
3267              
3268             =head2 types
3269             X<types>
3270              
3271             $csv->types (\@tref);
3272              
3273             This method is used to force that (all) columns are of a given type. For
3274             example, if you have an integer column, two columns with doubles and a
3275             string column, then you might do a
3276              
3277             $csv->types ([Text::CSV_XS::IV (),
3278             Text::CSV_XS::NV (),
3279             Text::CSV_XS::NV (),
3280             Text::CSV_XS::PV ()]);
3281              
3282             Column types are used only for I<decoding> columns while parsing, in other
3283             words by the L</parse> and L</getline> methods.
3284              
3285             You can unset column types by doing a
3286              
3287             $csv->types (undef);
3288              
3289             or fetch the current type settings with
3290              
3291             $types = $csv->types ();
3292              
3293             =over 4
3294              
3295             =item IV
3296             X<IV>
3297              
3298             =item CSV_TYPE_IV
3299             X<CSV_TYPE_IV>
3300              
3301             Set field type to integer.
3302              
3303             =item NV
3304             X<NV>
3305              
3306             =item CSV_TYPE_NV
3307             X<CSV_TYPE_NV>
3308              
3309             Set field type to numeric/float.
3310              
3311             =item PV
3312             X<PV>
3313              
3314             =item CSV_TYPE_PV
3315             X<CSV_TYPE_PV>
3316              
3317             Set field type to string.
3318              
3319             =back
3320              
3321             =head2 fields
3322             X<fields>
3323              
3324             @columns = $csv->fields ();
3325              
3326             This method returns the input to L</combine> or the resultant decomposed
3327             fields of a successful L</parse>, whichever was called more recently.
3328              
3329             Note that the return value is undefined after using L</getline>, which does
3330             not fill the data structures returned by L</parse>.
3331              
3332             =head2 meta_info
3333             X<meta_info>
3334              
3335             @flags = $csv->meta_info ();
3336              
3337             This method returns the "flags" of the input to L</combine> or the flags of
3338             the resultant decomposed fields of L</parse>, whichever was called more
3339             recently.
3340              
3341             For each field, a meta_info field will hold flags that inform something
3342             about the field returned by the L</fields> method or passed to the
3343             L</combine> method. The flags are bit-wise-C<or>'d like:
3344              
3345             =over 2
3346              
3347             =item C<0x0001>
3348              
3349             =item C<CSV_FLAGS_IS_QUOTED>
3350             X<CSV_FLAGS_IS_QUOTED>
3351              
3352             The field was quoted.
3353              
3354             =item C<0x0002>
3355              
3356             =item C<CSV_FLAGS_IS_BINARY>
3357             X<CSV_FLAGS_IS_BINARY>
3358              
3359             The field was binary.
3360              
3361             =item C<0x0004>
3362              
3363             =item C<CSV_FLAGS_ERROR_IN_FIELD>
3364             X<CSV_FLAGS_ERROR_IN_FIELD>
3365              
3366             The field was invalid.
3367              
3368             Currently only used when C<allow_loose_quotes> is active.
3369              
3370             =item C<0x0010>
3371              
3372             =item C<CSV_FLAGS_IS_MISSING>
3373             X<CSV_FLAGS_IS_MISSING>
3374              
3375             The field was missing.
3376              
3377             =back
3378              
3379             See the C<is_***> methods below.
3380              
3381             =head2 is_quoted
3382             X<is_quoted>
3383              
3384             my $quoted = $csv->is_quoted ($column_idx);
3385              
3386             where C<$column_idx> is the (zero-based) index of the column in the last
3387             result of L</parse>.
3388              
3389             This returns a true value if the data in the indicated column was enclosed
3390             in L<C<quote_char>|/quote_char> quotes. This might be important for fields
3391             where content C<,20070108,> is to be treated as a numeric value, and where
3392             C<,"20070108",> is explicitly marked as character string data.
3393              
3394             This method is only valid when L</keep_meta_info> is set to a true value.
3395              
3396             =head2 is_binary
3397             X<is_binary>
3398              
3399             my $binary = $csv->is_binary ($column_idx);
3400              
3401             where C<$column_idx> is the (zero-based) index of the column in the last
3402             result of L</parse>.
3403              
3404             This returns a true value if the data in the indicated column contained any
3405             byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>.
3406              
3407             This method is only valid when L</keep_meta_info> is set to a true value.
3408              
3409             =head2 is_missing
3410             X<is_missing>
3411              
3412             my $missing = $csv->is_missing ($column_idx);
3413              
3414             where C<$column_idx> is the (zero-based) index of the column in the last
3415             result of L</getline_hr>.
3416              
3417             $csv->keep_meta_info (1);
3418             while (my $hr = $csv->getline_hr ($fh)) {
3419             $csv->is_missing (0) and next; # This was an empty line
3420             }
3421              
3422             When using L</getline_hr>, it is impossible to tell if the parsed fields
3423             are C<undef> because they where not filled in the C<CSV> stream or because
3424             they were not read at all, as B<all> the fields defined by L</column_names>
3425             are set in the hash-ref. If you still need to know if all fields in each
3426             row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so
3427             you can check the flags.
3428              
3429             If L<C<keep_meta_info>|/keep_meta_info> is C<false>, C<is_missing> will
3430             always return C<undef>, regardless of C<$column_idx> being valid or not. If
3431             this attribute is C<true> it will return either C<0> (the field is present)
3432             or C<1> (the field is missing).
3433              
3434             A special case is the empty line. If the line is completely empty - after
3435             dealing with the flags - this is still a valid CSV line: it is a record of
3436             just one single empty field. However, if C<keep_meta_info> is set, invoking
3437             C<is_missing> with index C<0> will now return true.
3438              
3439             =head2 status
3440             X<status>
3441              
3442             $status = $csv->status ();
3443              
3444             This method returns the status of the last invoked L</combine> or L</parse>
3445             call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>).
3446              
3447             Note that as this only keeps track of the status of above mentioned methods,
3448             you are probably looking for L<C<error_diag>|/error_diag> instead.
3449              
3450             =head2 error_input
3451             X<error_input>
3452              
3453             $bad_argument = $csv->error_input ();
3454              
3455             This method returns the erroneous argument (if it exists) of L</combine> or
3456             L</parse>, whichever was called more recently. If the last invocation was
3457             successful, C<error_input> will return C<undef>.
3458              
3459             Depending on the type of error, it I<might> also hold the data for the last
3460             error-input of L</getline>.
3461              
3462             =head2 error_diag
3463             X<error_diag>
3464              
3465             Text::CSV_XS->error_diag ();
3466             $csv->error_diag ();
3467             $error_code = 0 + $csv->error_diag ();
3468             $error_str = "" . $csv->error_diag ();
3469             ($cde, $str, $pos, $rec, $fld, $xs) = $csv->error_diag ();
3470              
3471             If (and only if) an error occurred, this function returns the diagnostics
3472             of that error.
3473              
3474             If called in void context, this will print the internal error code and the
3475             associated error message to STDERR.
3476              
3477             If called in list context, this will return the error code and the error
3478             message in that order. If the last error was from parsing, the rest of the
3479             values returned are a best guess at the location within the line that was
3480             being parsed. Their values are 1-based. The position currently is index of
3481             the byte at which the parsing failed in the current record. It might change
3482             to be the index of the current character in a later release. The records is
3483             the index of the record parsed by the csv instance. The field number is the
3484             index of the field the parser thinks it is currently trying to parse. See
3485             F<examples/csv-check> for how this can be used. If C<$xs> is set, it is the
3486             line number in XS where the error was triggered (for debugging). C<XS> will
3487             show in void context only when L</diag_verbose> is set.
3488              
3489             If called in scalar context, it will return the diagnostics in a single
3490             scalar, a-la C<$!>. It will contain the error code in numeric context, and
3491             the diagnostics message in string context.
3492              
3493             When called as a class method or a direct function call, the diagnostics
3494             are that of the last L</new> call.
3495              
3496             =head3 _cache_diag
3497              
3498             Note: This is an internal function only, and output cannot be relied upon.
3499             Use at own risk.
3500              
3501             If debugging beyond what L</error_diag> is able to show, the internal cache
3502             can be shown with this function.
3503              
3504             # Something failed ..
3505             $csv->error_diag;
3506             $csv->_cache_diag ();
3507              
3508             =head2 record_number
3509             X<record_number>
3510              
3511             $recno = $csv->record_number ();
3512              
3513             Returns the records parsed by this csv instance. This value should be more
3514             accurate than C<$.> when embedded newlines come in play. Records written by
3515             this instance are not counted.
3516              
3517             =head2 SetDiag
3518             X<SetDiag>
3519              
3520             $csv->SetDiag (0);
3521              
3522             Use to reset the diagnostics if you are dealing with errors.
3523              
3524             =head1 IMPORTS/EXPORTS
3525              
3526             By default none of these are exported.
3527              
3528             =over 2
3529              
3530             =item csv
3531              
3532             use Text::CSV_XS qw( csv );
3533              
3534             Import the function L</csv> function. See below.
3535              
3536             =item :CONSTANTS
3537              
3538             use Text::CSV_XS qw( :CONSTANTS );
3539              
3540             Import module constants L</CSV_FLAGS_IS_QUOTED>, L</CSV_FLAGS_IS_BINARY>,
3541             L</CSV_FLAGS_ERROR_IN_FIELD>, L</CSV_FLAGS_IS_MISSING>, L</CSV_TYPE_PV>,
3542             L</CSV_TYPE_IV>, and L</CSV_TYPE_NV>. Each can be imported alone
3543              
3544             use Text::CSV_XS qw( CSV_FLAS_IS_BINARY CSV_TYPE_NV );
3545              
3546             =back
3547              
3548             =head1 FUNCTIONS
3549              
3550             =head2 csv
3551             X<csv>
3552              
3553             This function is not exported by default and should be explicitly requested:
3554              
3555             use Text::CSV_XS qw( csv );
3556              
3557             This is a high-level function that aims at simple (user) interfaces. This
3558             can be used to read/parse a C<CSV> file or stream (the default behavior) or
3559             to produce a file or write to a stream (define the C<out> attribute). It
3560             returns an array- or hash-reference on parsing (or C<undef> on fail) or the
3561             numeric value of L</error_diag> on writing. When this function fails you
3562             can get to the error using the class call to L</error_diag>
3563              
3564             my $aoa = csv (in => "test.csv") or
3565             die Text::CSV_XS->error_diag;
3566              
3567             Note that failure here is the inability to start the parser, like when the
3568             input does not exist or the arguments are unknown or conflicting. Run-time
3569             parsing errors will return a valid reference, which can be empty, but still
3570             contains all results up till the error. See L</on_error>.
3571              
3572             This function takes the arguments as key-value pairs. This can be passed as
3573             a list or as an anonymous hash:
3574              
3575             my $aoa = csv ( in => "test.csv", sep_char => ";");
3576             my $aoh = csv ({ in => $fh, headers => "auto" });
3577              
3578             The arguments passed consist of two parts: the arguments to L</csv> itself
3579             and the optional attributes to the C<CSV> object used inside the function
3580             as enumerated and explained in L</new>.
3581              
3582             If not overridden, the default option used for CSV is
3583              
3584             auto_diag => 1
3585             escape_null => 0
3586             strict_eol => 1
3587              
3588             The option that is always set and cannot be altered is
3589              
3590             binary => 1
3591              
3592             As this function will likely be used in one-liners, it allows C<quote> to
3593             be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc>
3594             or C<escape>.
3595              
3596             Alternative invocations:
3597              
3598             my $aoa = Text::CSV_XS::csv (in => "file.csv");
3599              
3600             my $csv = Text::CSV_XS->new ();
3601             my $aoa = $csv->csv (in => "file.csv");
3602              
3603             In the latter case, the object attributes are used from the existing object
3604             and the attribute arguments in the function call are ignored:
3605              
3606             my $csv = Text::CSV_XS->new ({ sep_char => ";" });
3607             my $aoh = $csv->csv (in => "file.csv", bom => 1);
3608              
3609             will parse using C<;> as C<sep_char>, not C<,>.
3610              
3611             =head3 in
3612             X<in>
3613              
3614             Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">),
3615             which will be opened for reading and closed when finished, a file handle
3616             (e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob
3617             itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>).
3618              
3619             When used with L</out>, C<in> should be a reference to a CSV structure (AoA
3620             or AoH) or a CODE-ref that returns an array-reference or a hash-reference.
3621             The code-ref will be invoked with no arguments.
3622              
3623             my $aoa = csv (in => "file.csv");
3624              
3625             open my $fh, "<", "file.csv";
3626             my $aoa = csv (in => $fh);
3627              
3628             my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]];
3629             my $err = csv (in => $csv, out => "file.csv");
3630              
3631             If called in void context without the L</out> attribute, the resulting ref
3632             will be used as input to a subsequent call to csv:
3633              
3634             csv (in => "file.csv", filter => { 2 => sub { length > 2 }})
3635              
3636             will be a shortcut to
3637              
3638             csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}))
3639              
3640             where, in the absence of the C<out> attribute, this is a shortcut to
3641              
3642             csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}),
3643             out => *STDOUT)
3644              
3645             =head3 out
3646             X<out>
3647              
3648             csv (in => $aoa, out => "file.csv");
3649             csv (in => $aoa, out => $fh);
3650             csv (in => $aoa, out => STDOUT);
3651             csv (in => $aoa, out => *STDOUT);
3652             csv (in => $aoa, out => \*STDOUT);
3653             csv (in => $aoa, out => \my $data);
3654             csv (in => $aoa, out => undef);
3655             csv (in => $aoa, out => \"skip");
3656              
3657             csv (in => $fh, out => \@aoa);
3658             csv (in => $fh, out => \@aoh, bom => 1);
3659             csv (in => $fh, out => \%hsh, key => "key");
3660              
3661             csv (in => $file, out => $file);
3662             csv (in => $file, out => $fh);
3663             csv (in => $fh, out => $file);
3664             csv (in => $fh, out => $fh);
3665              
3666             In output mode, the default CSV options when producing CSV are
3667              
3668             eol => "\r\n"
3669              
3670             The L</fragment> attribute is ignored in output mode.
3671              
3672             C<out> can be a file name (e.g. C<"file.csv">), which will be opened for
3673             writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a
3674             reference to a glob (e.g. C<\*STDOUT>), the glob itself (e.g. C<*STDOUT>),
3675             or a reference to a scalar (e.g. C<\my $data>).
3676              
3677             csv (in => sub { $sth->fetch }, out => "dump.csv");
3678             csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv",
3679             headers => $sth->{NAME_lc});
3680              
3681             When a code-ref is used for C<in>, the output is generated per invocation,
3682             so no buffering is involved. This implies that there is no size restriction
3683             on the number of records. The C<csv> function ends when the coderef returns
3684             a false value.
3685              
3686             If C<out> is set to a reference of the literal string C<"skip">, the output
3687             will be suppressed completely, which might be useful in combination with a
3688             filter for side effects only.
3689              
3690             my %cache;
3691             csv (in => "dump.csv",
3692             out => \"skip",
3693             on_in => sub { $cache{$_[1][1]}++ });
3694              
3695             Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be
3696             equivalent to C<\"skip">.
3697              
3698             If the C<in> argument point to something to parse, and the C<out> is set to
3699             a reference to an C<ARRAY> or a C<HASH>, the output is appended to the data
3700             in the existing reference. The result of the parse should match what exists
3701             in the reference passed. This might come handy when you have to parse a set
3702             of files with similar content (like data stored per period) and you want to
3703             collect that into a single data structure:
3704              
3705             my %hash;
3706             csv (in => $_, out => \%hash, key => "id") for sort glob "foo-[0-9]*.csv";
3707              
3708             my @list; # List of arrays
3709             csv (in => $_, out => \@list) for sort glob "foo-[0-9]*.csv";
3710              
3711             my @list; # List of hashes
3712             csv (in => $_, out => \@list, bom => 1) for sort glob "foo-[0-9]*.csv";
3713              
3714             =head4 Streaming
3715             X<streaming>
3716              
3717             If B<both> C<in> and C<out> are files, file handles or globs, streaming is
3718             enforced by injecting an C<after_parse> callback that immediately uses the
3719             L<C<say ()>|/say> method of the same instance to output the result and then
3720             rejects the record.
3721              
3722             If a C<after_parse> was already passed as attribute, that will be included
3723             in the injected call. If C<on_in> was passed and C<after_parse> was not, it
3724             will be used instead. If both were passed, C<on_in> is ignored.
3725              
3726             The EOL of the first record of the C<in> source is consistently used as EOL
3727             for all records in the C<out> destination.
3728              
3729             The C<filter> attribute is not available.
3730              
3731             All other attributes are shared for C<in> and C<out>, so you cannot define
3732             different encodings for C<in> and C<out>. You need to pass a C<$fh>, where
3733             C<binmode> was used to apply the encoding layers.
3734              
3735             Note that this is work in progress and things might change.
3736              
3737             =head3 encoding
3738             X<encoding>
3739              
3740             If passed, it should be an encoding accepted by the C<:encoding()> option
3741             to C<open>. There is no default value. This attribute does not work in perl
3742             5.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command
3743             line invocations.
3744              
3745             If C<encoding> is set to the literal value C<"auto">, the method L</header>
3746             will be invoked on the opened stream to check if there is a BOM and set the
3747             encoding accordingly. This is equal to passing a true value in the option
3748             L<C<detect_bom>|/detect_bom>.
3749              
3750             Encodings can be stacked, as supported by C<binmode>:
3751              
3752             # Using PerlIO::via::gzip
3753             csv (in => \@csv,
3754             out => "test.csv:via.gz",
3755             encoding => ":via(gzip):encoding(utf-8)",
3756             );
3757             $aoa = csv (in => "test.csv:via.gz", encoding => ":via(gzip)");
3758              
3759             # Using PerlIO::gzip
3760             csv (in => \@csv,
3761             out => "test.csv:via.gz",
3762             encoding => ":gzip:encoding(utf-8)",
3763             );
3764             $aoa = csv (in => "test.csv:gzip.gz", encoding => ":gzip");
3765              
3766             =head3 detect_bom
3767             X<detect_bom>
3768              
3769             If C<detect_bom> is given, the method L</header> will be invoked on the
3770             opened stream to check if there is a BOM and set the encoding accordingly.
3771             Note that the attribute L<C<headers>|/headers> can be used to overrule the
3772             default behavior of how that method automatically sets the attribute.
3773              
3774             C<detect_bom> can be abbreviated to C<bom>.
3775              
3776             This is the same as setting L<C<encoding>|/encoding> to C<"auto">.
3777              
3778             =head3 headers
3779             X<headers>
3780              
3781             If this attribute is not given, the default behavior is to produce an array
3782             of arrays.
3783              
3784             If C<headers> is supplied, it should be an anonymous list of column names,
3785             an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>,
3786             or C<skip>.
3787              
3788             =over 2
3789              
3790             =item skip
3791             X<skip>
3792              
3793             When C<skip> is used, the header will not be included in the output.
3794              
3795             my $aoa = csv (in => $fh, headers => "skip");
3796              
3797             C<skip> is invalid/ignored in combinations with L<C<detect_bom>|/detect_bom>.
3798              
3799             =item auto
3800             X<auto>
3801              
3802             If C<auto> is used, the first line of the C<CSV> source will be read as the
3803             list of field headers and used to produce an array of hashes.
3804              
3805             my $aoh = csv (in => $fh, headers => "auto");
3806              
3807             =item lc
3808             X<lc>
3809              
3810             If C<lc> is used, the first line of the C<CSV> source will be read as the
3811             list of field headers mapped to lower case and used to produce an array of
3812             hashes. This is a variation of C<auto>.
3813              
3814             my $aoh = csv (in => $fh, headers => "lc");
3815              
3816             =item uc
3817             X<uc>
3818              
3819             If C<uc> is used, the first line of the C<CSV> source will be read as the
3820             list of field headers mapped to upper case and used to produce an array of
3821             hashes. This is a variation of C<auto>.
3822              
3823             my $aoh = csv (in => $fh, headers => "uc");
3824              
3825             =item CODE
3826             X<CODE>
3827              
3828             If a coderef is used, the first line of the C<CSV> source will be read as
3829             the list of mangled field headers in which each field is passed as the only
3830             argument to the coderef. This list is used to produce an array of hashes.
3831              
3832             my $aoh = csv (in => $fh,
3833             headers => sub { lc ($_[0]) =~ s/kode/code/gr });
3834              
3835             this example is a variation of using C<lc> where all occurrences of C<kode>
3836             are replaced with C<code>.
3837              
3838             =item ARRAY
3839             X<ARRAY>
3840              
3841             If C<headers> is an anonymous list, the entries in the list will be used
3842             as field names. The first line is considered data instead of headers.
3843              
3844             my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
3845             csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
3846              
3847             =item HASH
3848             X<HASH>
3849              
3850             If C<headers> is a hash reference, this implies C<auto>, but header fields
3851             that exist as key in the hashref will be replaced by the value for that
3852             key. Given a CSV file like
3853              
3854             post-kode,city,name,id number,fubble
3855             1234AA,Duckstad,Donald,13,"X313DF"
3856              
3857             using
3858              
3859             csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
3860              
3861             will return an entry like
3862              
3863             { pc => "1234AA",
3864             city => "Duckstad",
3865             name => "Donald",
3866             ID => "13",
3867             fubble => "X313DF",
3868             }
3869              
3870             =back
3871              
3872             See also L<C<munge_column_names>|/munge_column_names> and
3873             L<C<set_column_names>|/set_column_names>.
3874              
3875             =head3 munge_column_names
3876             X<munge_column_names>
3877              
3878             If C<munge_column_names> is set, the method L</header> is invoked on the
3879             opened stream with all matching arguments to detect and set the headers.
3880              
3881             C<munge_column_names> can be abbreviated to C<munge>.
3882              
3883             =head3 key
3884             X<key>
3885              
3886             If passed, will default L<C<headers>|/headers> to C<"auto"> and return a
3887             hashref instead of an array of hashes. Allowed values are simple scalars or
3888             array-references where the first element is the joiner and the rest are the
3889             fields to join to combine the key.
3890              
3891             my $ref = csv (in => "test.csv", key => "code");
3892             my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
3893              
3894             with test.csv like
3895              
3896             code,product,price,color
3897             1,pc,850,gray
3898             2,keyboard,12,white
3899             3,mouse,5,black
3900              
3901             the first example will return
3902              
3903             { 1 => {
3904             code => 1,
3905             color => 'gray',
3906             price => 850,
3907             product => 'pc'
3908             },
3909             2 => {
3910             code => 2,
3911             color => 'white',
3912             price => 12,
3913             product => 'keyboard'
3914             },
3915             3 => {
3916             code => 3,
3917             color => 'black',
3918             price => 5,
3919             product => 'mouse'
3920             }
3921             }
3922              
3923             the second example will return
3924              
3925             { "1:gray" => {
3926             code => 1,
3927             color => 'gray',
3928             price => 850,
3929             product => 'pc'
3930             },
3931             "2:white" => {
3932             code => 2,
3933             color => 'white',
3934             price => 12,
3935             product => 'keyboard'
3936             },
3937             "3:black" => {
3938             code => 3,
3939             color => 'black',
3940             price => 5,
3941             product => 'mouse'
3942             }
3943             }
3944              
3945             The C<key> attribute can be combined with L<C<headers>|/headers> for C<CSV>
3946             date that has no header line, like
3947              
3948             my $ref = csv (
3949             in => "foo.csv",
3950             headers => [qw( c_foo foo bar description stock )],
3951             key => "c_foo",
3952             );
3953              
3954             =head3 value
3955             X<value>
3956              
3957             Used to create key-value hashes.
3958              
3959             Only allowed when C<key> is valid. A C<value> can be either a single column
3960             label or an anonymous list of column labels. In the first case, the value
3961             will be a simple scalar value, in the latter case, it will be a hashref.
3962              
3963             my $ref = csv (in => "test.csv", key => "code",
3964             value => "price");
3965             my $ref = csv (in => "test.csv", key => "code",
3966             value => [ "product", "price" ]);
3967             my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
3968             value => "price");
3969             my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
3970             value => [ "product", "price" ]);
3971              
3972             with test.csv like
3973              
3974             code,product,price,color
3975             1,pc,850,gray
3976             2,keyboard,12,white
3977             3,mouse,5,black
3978              
3979             the first example will return
3980              
3981             { 1 => 850,
3982             2 => 12,
3983             3 => 5,
3984             }
3985              
3986             the second example will return
3987              
3988             { 1 => {
3989             price => 850,
3990             product => 'pc'
3991             },
3992             2 => {
3993             price => 12,
3994             product => 'keyboard'
3995             },
3996             3 => {
3997             price => 5,
3998             product => 'mouse'
3999             }
4000             }
4001              
4002             the third example will return
4003              
4004             { "1:gray" => 850,
4005             "2:white" => 12,
4006             "3:black" => 5,
4007             }
4008              
4009             the fourth example will return
4010              
4011             { "1:gray" => {
4012             price => 850,
4013             product => 'pc'
4014             },
4015             "2:white" => {
4016             price => 12,
4017             product => 'keyboard'
4018             },
4019             "3:black" => {
4020             price => 5,
4021             product => 'mouse'
4022             }
4023             }
4024              
4025             =head3 keep_headers
4026             X<keep_headers>
4027             X<keep_column_names>
4028             X<kh>
4029              
4030             When using hashes, keep the column names into the arrayref passed, so all
4031             headers are available after the call in the original order.
4032              
4033             my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
4034              
4035             This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>.
4036              
4037             This attribute implies a default of C<auto> for the C<headers> attribute.
4038              
4039             X<stable header order>
4040             X<internal headers>
4041             The headers can also be kept internally to keep stable header order:
4042              
4043             csv (in => csv (in => "file.csv", kh => "internal"),
4044             out => "new.csv",
4045             kh => "internal");
4046              
4047             where C<internal> can also be C<1>, C<yes>, or C<true>. This is similar to
4048              
4049             my @h;
4050             csv (in => csv (in => "file.csv", kh => \@h),
4051             out => "new.csv",
4052             headers => \@h);
4053              
4054             =head3 fragment
4055             X<fragment>
4056              
4057             Only output the fragment as defined in the L</fragment> method. This option
4058             is ignored when I<generating> C<CSV>. See L</out>.
4059              
4060             Combining all of them could give something like
4061              
4062             use Text::CSV_XS qw( csv );
4063             my $aoh = csv (
4064             in => "test.txt",
4065             encoding => "utf-8",
4066             headers => "auto",
4067             sep_char => "|",
4068             fragment => "row=3;6-9;15-*",
4069             );
4070             say $aoh->[15]{Foo};
4071              
4072             =head3 sep_set
4073             X<sep_set>
4074             X<seps>
4075              
4076             If C<sep_set> is set, the method L</header> is invoked on the opened stream
4077             to detect and set L<C<sep_char>|/sep_char> with the given set.
4078              
4079             C<sep_set> can be abbreviated to C<seps>. If neither C<sep_set> not C<seps>
4080             is given, but C<sep> is defined, C<sep_set> defaults to C<[ sep ]>. This is
4081             only supported for perl version 5.10 and up.
4082              
4083             Note that as the L</header> method is invoked, its default is to also set
4084             the headers.
4085              
4086             =head3 set_column_names
4087             X<set_column_names>
4088              
4089             If C<set_column_names> is passed, the method L</header> is invoked on the
4090             opened stream with all arguments meant for L</header>.
4091              
4092             If C<set_column_names> is passed as a false value, the content of the first
4093             row is only preserved if the output is AoA:
4094              
4095             With an input-file like
4096              
4097             bAr,foo
4098             1,2
4099             3,4,5
4100              
4101             This call
4102              
4103             my $aoa = csv (in => $file, set_column_names => 0);
4104              
4105             will result in
4106              
4107             [[ "bar", "foo" ],
4108             [ "1", "2" ],
4109             [ "3", "4", "5" ]]
4110              
4111             and
4112              
4113             my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
4114              
4115             will result in
4116              
4117             [[ "bAr", "foo" ],
4118             [ "1", "2" ],
4119             [ "3", "4", "5" ]]
4120              
4121             =head3 csv
4122             X<csv>
4123              
4124             The I<function> L</csv> can also be called as a method or with an existing
4125             Text::CSV_XS object. This could help if the function is to be invoked a lot
4126             of times and the overhead of creating the object internally over and over
4127             again would be prevented by passing an existing instance.
4128              
4129             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4130              
4131             my $aoa = $csv->csv (in => $fh);
4132             my $aoa = csv (in => $fh, csv => $csv);
4133              
4134             both act the same. Running this 20000 times on a 20 lines CSV file, showed
4135             a 53% speedup.
4136              
4137             =head2 Callbacks
4138             X<Callbacks>
4139              
4140             Callbacks enable actions triggered from the I<inside> of Text::CSV_XS.
4141              
4142             While most of what this enables can easily be done in an unrolled loop as
4143             described in the L</SYNOPSIS> callbacks can be used to meet special demands
4144             or enhance the L</csv> function.
4145              
4146             =over 2
4147              
4148             =item error
4149             X<error>
4150              
4151             $csv->callbacks (error => sub { $csv->SetDiag (0) });
4152              
4153             the C<error> callback is invoked when an error occurs, but I<only> when
4154             L</auto_diag> is set to a true value. A callback is invoked with the values
4155             returned by L</error_diag>:
4156              
4157             my ($c, $s);
4158              
4159             sub ignore3006 {
4160             my ($err, $msg, $pos, $recno, $fldno) = @_;
4161             if ($err == 3006) {
4162             # ignore this error
4163             ($c, $s) = (undef, undef);
4164             Text::CSV_XS->SetDiag (0);
4165             }
4166             # Any other error
4167             return;
4168             } # ignore3006
4169              
4170             $csv->callbacks (error => \&ignore3006);
4171             $csv->bind_columns (\$c, \$s);
4172             while ($csv->getline ($fh)) {
4173             # Error 3006 will not stop the loop
4174             }
4175              
4176             =item after_parse
4177             X<after_parse>
4178              
4179             $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
4180             while (my $row = $csv->getline ($fh)) {
4181             $row->[-1] eq "NEW";
4182             }
4183              
4184             This callback is invoked after parsing with L</getline> only if no error
4185             occurred. The callback is invoked with two arguments: the current C<CSV>
4186             parser object and an array reference to the fields parsed.
4187              
4188             The return code of the callback is ignored unless it is a reference to the
4189             string "skip", in which case the record will be skipped in L</getline_all>.
4190              
4191             sub add_from_db {
4192             my ($csv, $row) = @_;
4193             $sth->execute ($row->[4]);
4194             push @$row, $sth->fetchrow_array;
4195             } # add_from_db
4196              
4197             my $aoa = csv (in => "file.csv", callbacks => {
4198             after_parse => \&add_from_db });
4199              
4200             This hook can be used for validation:
4201             X<data_validation>
4202              
4203             =over 2
4204              
4205             =item FAIL
4206              
4207             Die if any of the records does not validate a rule:
4208              
4209             after_parse => sub {
4210             $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
4211             die "5th field does not have a valid Dutch zipcode";
4212             }
4213              
4214             =item DEFAULT
4215              
4216             Replace invalid fields with a default value:
4217              
4218             after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
4219              
4220             =item SKIP
4221              
4222             Skip records that have invalid fields (only applies to L</getline_all>):
4223              
4224             after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
4225              
4226             =back
4227              
4228             =item before_print
4229             X<before_print>
4230              
4231             my $idx = 1;
4232             $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
4233             $csv->print (*STDOUT, [ 0, $_ ]) for @members;
4234              
4235             This callback is invoked before printing with L</print> only if no error
4236             occurred. The callback is invoked with two arguments: the current C<CSV>
4237             parser object and an array reference to the fields passed.
4238              
4239             The return code of the callback is ignored.
4240              
4241             sub max_4_fields {
4242             my ($csv, $row) = @_;
4243             @$row > 4 and splice @$row, 4;
4244             } # max_4_fields
4245              
4246             csv (in => csv (in => "file.csv"), out => *STDOUT,
4247             callbacks => { before_print => \&max_4_fields });
4248              
4249             This callback is not active for L</combine>.
4250              
4251             =back
4252              
4253             =head3 Callbacks for csv ()
4254              
4255             The L</csv> allows for some callbacks that do not integrate in XS internals
4256             but only feature the L</csv> function.
4257              
4258             csv (in => "file.csv",
4259             callbacks => {
4260             filter => { 6 => sub { $_ > 15 } }, # first
4261             after_parse => sub { say "AFTER PARSE"; }, # first
4262             after_in => sub { say "AFTER IN"; }, # second
4263             on_in => sub { say "ON IN"; }, # third
4264             },
4265             );
4266              
4267             csv (in => $aoh,
4268             out => "file.csv",
4269             callbacks => {
4270             on_in => sub { say "ON IN"; }, # first
4271             before_out => sub { say "BEFORE OUT"; }, # second
4272             before_print => sub { say "BEFORE PRINT"; }, # third
4273             },
4274             );
4275              
4276             =over 2
4277              
4278             =item filter
4279             X<filter>
4280              
4281             This callback can be used to filter records. It is called just after a new
4282             record has been scanned. The callback accepts a:
4283              
4284             =over 2
4285              
4286             =item hashref
4287              
4288             The keys are the index to the row (the field name or field number, 1-based)
4289             and the values are subs to return a true or false value.
4290              
4291             csv (in => "file.csv", filter => {
4292             3 => sub { m/a/ }, # third field should contain an "a"
4293             5 => sub { length > 4 }, # length of the 5th field minimal 5
4294             });
4295              
4296             csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
4297              
4298             If the keys to the filter hash contain any character that is not a digit it
4299             will also implicitly set L</headers> to C<"auto"> unless L</headers> was
4300             already passed as argument. When headers are active, returning an array of
4301             hashes, the filter is not applicable to the header itself.
4302              
4303             All sub results should match, as in AND.
4304              
4305             The context of the callback sets C<$_> localized to the field indicated by
4306             the filter. The two arguments are as with all other callbacks, so the other
4307             fields in the current row can be seen:
4308              
4309             filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
4310              
4311             If the context is set to return a list of hashes (L</headers> is defined),
4312             the current record will also be available in the localized C<%_>:
4313              
4314             filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
4315              
4316             If the filter is used to I<alter> the content by changing C<$_>, make sure
4317             that the sub returns true in order not to have that record skipped:
4318              
4319             filter => { 2 => sub { $_ = uc }}
4320              
4321             will upper-case the second field, and then skip it if the resulting content
4322             evaluates to false. To always accept, end with truth:
4323              
4324             filter => { 2 => sub { $_ = uc; 1 }}
4325              
4326             =item coderef
4327              
4328             csv (in => "file.csv", filter => sub { $n++; 0; });
4329              
4330             If the argument to C<filter> is a coderef, it is an alias or shortcut to a
4331             filter on column 0:
4332              
4333             csv (filter => sub { $n++; 0 });
4334              
4335             is equal to
4336              
4337             csv (filter => { 0 => sub { $n++; 0 });
4338              
4339             =item filter-name
4340              
4341             csv (in => "file.csv", filter => "not_blank");
4342             csv (in => "file.csv", filter => "not_empty");
4343             csv (in => "file.csv", filter => "filled");
4344              
4345             These are predefined filters
4346              
4347             Given a file like (line numbers prefixed for doc purpose only):
4348              
4349             1:1,2,3
4350             2:
4351             3:,
4352             4:""
4353             5:,,
4354             6:, ,
4355             7:"",
4356             8:" "
4357             9:4,5,6
4358              
4359             =over 2
4360              
4361             =item not_blank
4362              
4363             Filter out the blank lines
4364              
4365             This filter is a shortcut for
4366              
4367             filter => { 0 => sub { @{$_[1]} > 1 or
4368             defined $_[1][0] && $_[1][0] ne "" } }
4369              
4370             Due to the implementation, it is currently impossible to also filter lines
4371             that consists only of a quoted empty field. These lines are also considered
4372             blank lines.
4373              
4374             With the given example, lines 2 and 4 will be skipped.
4375              
4376             =item not_empty
4377              
4378             Filter out lines where all the fields are empty.
4379              
4380             This filter is a shortcut for
4381              
4382             filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
4383              
4384             A space is not regarded being empty, so given the example data, lines 2, 3,
4385             4, 5, and 7 are skipped.
4386              
4387             =item filled
4388              
4389             Filter out lines that have no visible data
4390              
4391             This filter is a shortcut for
4392              
4393             filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
4394              
4395             This filter rejects all lines that I<not> have at least one field that does
4396             not evaluate to the empty string.
4397              
4398             With the given example data, this filter would skip lines 2 through 8.
4399              
4400             =back
4401              
4402             =back
4403              
4404             One could also use modules like L<Types::Standard>:
4405              
4406             use Types::Standard -types;
4407              
4408             my $type = Tuple[Str, Str, Int, Bool, Optional[Num]];
4409             my $check = $type->compiled_check;
4410              
4411             # filter with compiled check and warnings
4412             my $aoa = csv (
4413             in => \$data,
4414             filter => {
4415             0 => sub {
4416             my $ok = $check->($_[1]) or
4417             warn $type->get_message ($_[1]), "\n";
4418             return $ok;
4419             },
4420             },
4421             );
4422              
4423             =item after_in
4424             X<after_in>
4425              
4426             This callback is invoked for each record after all records have been parsed
4427             but before returning the reference to the caller. The hook is invoked with
4428             two arguments: the current C<CSV> parser object and a reference to the
4429             record. The reference can be a reference to a HASH or a reference to an
4430             ARRAY as determined by the arguments.
4431              
4432             This callback can also be passed as an attribute without the C<callbacks>
4433             wrapper.
4434              
4435             =item before_out
4436             X<before_out>
4437              
4438             This callback is invoked for each record before the record is printed. The
4439             hook is invoked with two arguments: the current C<CSV> parser object and a
4440             reference to the record. The reference can be a reference to a HASH or a
4441             reference to an ARRAY as determined by the arguments.
4442              
4443             This callback can also be passed as an attribute without the C<callbacks>
4444             wrapper.
4445              
4446             This callback makes the row available in C<%_> if the row is a hashref. In
4447             this case C<%_> is writable and will change the original row.
4448              
4449             =item on_in
4450             X<on_in>
4451              
4452             This callback acts exactly as the L</after_in> or the L</before_out> hooks.
4453              
4454             This callback can also be passed as an attribute without the C<callbacks>
4455             wrapper.
4456              
4457             This callback makes the row available in C<%_> if the row is a hashref. In
4458             this case C<%_> is writable and will change the original row. So e.g. with
4459              
4460             my $aoh = csv (
4461             in => \"foo\n1\n2\n",
4462             headers => "auto",
4463             on_in => sub { $_{bar} = 2; },
4464             );
4465              
4466             C<$aoh> will be:
4467              
4468             [ { foo => 1,
4469             bar => 2,
4470             }
4471             { foo => 2,
4472             bar => 2,
4473             }
4474             ]
4475              
4476             =item on_error
4477             X<on_error>
4478              
4479             This callback acts exactly as the L</error> hook.
4480              
4481             my @err;
4482             my $aoa = csv (in => $fh, on_error => sub { @err = @_ });
4483              
4484             is identical to
4485              
4486             my $aoa = csv (in => $fh, callbacks => {
4487             error => sub { @err = @_ },
4488             });
4489              
4490             It can be used for ignoring errors as well as for just keeping the error in
4491             case of analysis after the C<csv ()> function has returned.
4492              
4493             my @err;
4494             my $aoa = csv (in => "bad.csv, on_error => sub { @err = @_ });
4495             die Text::CSV_XS->error_diag if @err or !$aoa;
4496              
4497             =back
4498              
4499             =head1 INTERNALS
4500              
4501             =over 4
4502              
4503             =item Combine (...)
4504              
4505             =item Parse (...)
4506              
4507             =back
4508              
4509             The arguments to these internal functions are deliberately not described or
4510             documented in order to enable the module authors make changes it when they
4511             feel the need for it. Using them is highly discouraged as the API may
4512             change in future releases.
4513              
4514             =head1 EXAMPLES
4515              
4516             =head2 Reading a CSV file line by line:
4517              
4518             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4519             open my $fh, "<", "file.csv" or die "file.csv: $!";
4520             while (my $row = $csv->getline ($fh)) {
4521             # do something with @$row
4522             }
4523             close $fh or die "file.csv: $!";
4524              
4525             or
4526              
4527             my $aoa = csv (in => "file.csv", on_in => sub {
4528             # do something with %_
4529             });
4530              
4531             =head3 Reading only a single column
4532              
4533             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4534             open my $fh, "<", "file.csv" or die "file.csv: $!";
4535             # get only the 4th column
4536             my @column = map { $_->[3] } @{$csv->getline_all ($fh)};
4537             close $fh or die "file.csv: $!";
4538              
4539             with L</csv>, you could do
4540              
4541             my @column = map { $_->[0] }
4542             @{csv (in => "file.csv", fragment => "col=4")};
4543              
4544             =head2 Parsing CSV strings:
4545              
4546             my $csv = Text::CSV_XS->new ({ keep_meta_info => 1, binary => 1 });
4547              
4548             my $sample_input_string =
4549             qq{"I said, ""Hi!""",Yes,"",2.34,,"1.09","\x{20ac}",};
4550             if ($csv->parse ($sample_input_string)) {
4551             my @field = $csv->fields;
4552             foreach my $col (0 .. $#field) {
4553             my $quo = $csv->is_quoted ($col) ? $csv->{quote_char} : "";
4554             printf "%2d: %s%s%s\n", $col, $quo, $field[$col], $quo;
4555             }
4556             }
4557             else {
4558             print STDERR "parse () failed on argument: ",
4559             $csv->error_input, "\n";
4560             $csv->error_diag ();
4561             }
4562              
4563             =head3 Parsing CSV from memory
4564              
4565             Given a complete CSV data-set in scalar C<$data>, generate a list of lists
4566             to represent the rows and fields
4567              
4568             # The data
4569             my $data = join "\r\n" => map { join "," => 0 .. 5 } 0 .. 5;
4570              
4571             # in a loop
4572             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 });
4573             open my $fh, "<", \$data;
4574             my @foo;
4575             while (my $row = $csv->getline ($fh)) {
4576             push @foo, $row;
4577             }
4578             close $fh;
4579              
4580             # a single call
4581             my $foo = csv (in => \$data);
4582              
4583             =head2 Printing CSV data
4584              
4585             =head3 The fast way: using L</print>
4586              
4587             An example for creating C<CSV> files using the L</print> method:
4588              
4589             my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ });
4590             open my $fh, ">", "foo.csv" or die "foo.csv: $!";
4591             for (1 .. 10) {
4592             $csv->print ($fh, [ $_, "$_" ]) or $csv->error_diag;
4593             }
4594             close $fh or die "$tbl.csv: $!";
4595              
4596             =head3 The slow way: using L</combine> and L</string>
4597              
4598             or using the slower L</combine> and L</string> methods:
4599              
4600             my $csv = Text::CSV_XS->new;
4601              
4602             open my $csv_fh, ">", "hello.csv" or die "hello.csv: $!";
4603              
4604             my @sample_input_fields = (
4605             'You said, "Hello!"', 5.67,
4606             '"Surely"', '', '3.14159');
4607             if ($csv->combine (@sample_input_fields)) {
4608             print $csv_fh $csv->string, "\n";
4609             }
4610             else {
4611             print "combine () failed on argument: ",
4612             $csv->error_input, "\n";
4613             }
4614             close $csv_fh or die "hello.csv: $!";
4615              
4616             =head3 Generating CSV into memory
4617              
4618             Format a data-set (C<@foo>) into a scalar value in memory (C<$data>):
4619              
4620             # The data
4621             my @foo = map { [ 0 .. 5 ] } 0 .. 3;
4622              
4623             # in a loop
4624             my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1, eol => "\r\n" });
4625             open my $fh, ">", \my $data;
4626             $csv->print ($fh, $_) for @foo;
4627             close $fh;
4628              
4629             # a single call
4630             csv (in => \@foo, out => \my $data);
4631              
4632             =head2 Rewriting CSV
4633              
4634             =head3 Changing separator
4635              
4636             Rewrite C<CSV> files with C<;> as separator character to well-formed C<CSV>:
4637              
4638             use Text::CSV_XS qw( csv );
4639             csv (in => csv (in => "bad.csv", sep_char => ";"), out => *STDOUT);
4640              
4641             As C<STDOUT> is now default in L</csv>, a one-liner converting a UTF-16 CSV
4642             file with BOM and TAB-separation to valid UTF-8 CSV could be:
4643              
4644             $ perl -C3 -MText::CSV_XS=csv -we\
4645             'csv(in=>"utf16tab.csv",encoding=>"utf16",sep=>"\t")' >utf8.csv
4646              
4647             =head3 Unifying EOL
4648              
4649             Rewrite a CSV file with mixed EOL and/or inconsistent quotation into a new
4650             CSV file with consistent EOL and quotation. Attributes apply.
4651              
4652             use Text::CSV_XS qw( csv );
4653             csv (in => "file.csv", out => "newfile.csv", quote_space => 1);
4654              
4655             =head2 Dumping database tables to CSV
4656              
4657             Dumping a database table can be simple as this (TIMTOWTDI):
4658              
4659             my $dbh = DBI->connect (...);
4660             my $sql = "select * from foo";
4661              
4662             # using your own loop
4663             open my $fh, ">", "foo.csv" or die "foo.csv: $!\n";
4664             my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\r\n" });
4665             my $sth = $dbh->prepare ($sql); $sth->execute;
4666             $csv->print ($fh, $sth->{NAME_lc});
4667             while (my $row = $sth->fetch) {
4668             $csv->print ($fh, $row);
4669             }
4670              
4671             # using the csv function, all in memory
4672             csv (out => "foo.csv", in => $dbh->selectall_arrayref ($sql));
4673              
4674             # using the csv function, streaming with callbacks
4675             my $sth = $dbh->prepare ($sql); $sth->execute;
4676             csv (out => "foo.csv", in => sub { $sth->fetch });
4677             csv (out => "foo.csv", in => sub { $sth->fetchrow_hashref });
4678              
4679             Note that this does not discriminate between "empty" values and NULL-values
4680             from the database, as both will be the same empty field in CSV. To enable
4681             distinction between the two, use L<C<quote_empty>|/quote_empty>.
4682              
4683             csv (out => "foo.csv", in => sub { $sth->fetch }, quote_empty => 1);
4684              
4685             If the database import utility supports special sequences to insert C<NULL>
4686             values into the database, like MySQL/MariaDB supports C<\N>, use a filter
4687             or a map
4688              
4689             csv (out => "foo.csv", in => sub { $sth->fetch },
4690             on_in => sub { $_ //= "\\N" for @{$_[1]} });
4691              
4692             while (my $row = $sth->fetch) {
4693             $csv->print ($fh, [ map { $_ // "\\N" } @$row ]);
4694             }
4695              
4696             Note that this will not work as expected when choosing the backslash (C<\>)
4697             as C<escape_char>, as that will cause the C<\> to need to be escaped by yet
4698             another C<\>, which will cause the field to need quotation and thus ending
4699             up as C<"\\N"> instead of C<\N>. See also L<C<undef_str>|/undef_str>.
4700              
4701             csv (out => "foo.csv", in => sub { $sth->fetch }, undef_str => "\\N");
4702              
4703             These special sequences are not recognized by Text::CSV_XS on parsing the
4704             CSV generated like this, but map and filter are your friends again
4705              
4706             while (my $row = $csv->getline ($fh)) {
4707             $sth->execute (map { $_ eq "\\N" ? undef : $_ } @$row);
4708             }
4709              
4710             csv (in => "foo.csv", filter => { 1 => sub {
4711             $sth->execute (map { $_ eq "\\N" ? undef : $_ } @{$_[1]}); 0; }});
4712              
4713             =head2 Converting CSV to JSON
4714              
4715             use Text::CSV_XS qw( csv );
4716             use JSON; # or Cpanel::JSON::XS for better performance
4717              
4718             # AoA (no header interpretation)
4719             say encode_json (csv (in => "file.csv"));
4720              
4721             # AoH (convert to structures)
4722             say encode_json (csv (in => "file.csv", bom => 1));
4723              
4724             Yes, it is that simple.
4725              
4726             =head2 The examples folder
4727              
4728             For more extended examples, see the F<examples/> C<1>. sub-directory in the
4729             original distribution or the git repository C<2>.
4730              
4731             1. https://github.com/Tux/Text-CSV_XS/tree/master/examples
4732             2. https://github.com/Tux/Text-CSV_XS
4733              
4734             The following files can be found there:
4735              
4736             =over 2
4737              
4738             =item parser-xs.pl
4739             X<parser-xs.pl>
4740              
4741             This can be used as a boilerplate to parse invalid C<CSV> and parse beyond
4742             (expected) errors alternative to using the L</error> callback.
4743              
4744             $ perl examples/parser-xs.pl bad.csv >good.csv
4745              
4746             =item csv-check
4747             X<csv-check>
4748              
4749             This is a command-line tool that uses parser-xs.pl techniques to check the
4750             C<CSV> file and report on its content.
4751              
4752             $ csv-check files/utf8.csv
4753             Checked files/utf8.csv with csv-check 1.9
4754             using Text::CSV_XS 1.32 with perl 5.26.0 and Unicode 9.0.0
4755             OK: rows: 1, columns: 2
4756             sep = <,>, quo = <">, bin = <1>, eol = <"\n">
4757              
4758             =item csv-split
4759             X<csv-split>
4760              
4761             This command splits C<CSV> files into smaller files, keeping (part of) the
4762             header. Options include maximum number of (data) rows per file and maximum
4763             number of columns per file or a combination of the two.
4764              
4765             =item csv2xls
4766             X<csv2xls>
4767              
4768             A script to convert C<CSV> to Microsoft Excel (C<XLS>). This requires extra
4769             modules L<Date::Calc> and L<Spreadsheet::WriteExcel>. The converter accepts
4770             various options and can produce UTF-8 compliant Excel files.
4771              
4772             =item csv2xlsx
4773             X<csv2xlsx>
4774              
4775             A script to convert C<CSV> to Microsoft Excel (C<XLSX>). This requires the
4776             modules L<Date::Calc> and L<Spreadsheet::Writer::XLSX>. The converter does
4777             accept various options including merging several C<CSV> files into a single
4778             Excel file.
4779              
4780             =item csvdiff
4781             X<csvdiff>
4782              
4783             A script that provides colorized diff on sorted CSV files, assuming first
4784             line is header and first field is the key. Output options include colorized
4785             ANSI escape codes or HTML.
4786              
4787             $ csvdiff --html --output=diff.html file1.csv file2.csv
4788              
4789             =item rewrite.pl
4790             X<rewrite.pl>
4791              
4792             A script to rewrite (in)valid CSV into valid CSV files. Script has options
4793             to generate confusing CSV files or CSV files that conform to Dutch MS-Excel
4794             exports (using C<;> as separation).
4795              
4796             Script - by default - honors BOM and auto-detects separation converting it
4797             to default standard CSV with C<,> as separator.
4798              
4799             =back
4800              
4801             =head1 CAVEATS
4802              
4803             Text::CSV_XS is I<not> designed to detect the characters used to quote and
4804             separate fields. The parsing is done using predefined (default) settings.
4805             In the examples sub-directory, you can find scripts that demonstrate how
4806             you could try to detect these characters yourself.
4807              
4808             =head2 Microsoft Excel
4809              
4810             The import/export from Microsoft Excel is a I<risky task>, according to the
4811             documentation in C<Text::CSV::Separator>. Microsoft uses the system's list
4812             separator defined in the regional settings, which happens to be a semicolon
4813             for Dutch, German and Spanish (and probably some others as well). For the
4814             English locale, the default is a comma. In Windows however, the user is
4815             free to choose a predefined locale, and then change I<every> individual
4816             setting in it, so checking the locale is no solution.
4817              
4818             As of version 1.17, a lone first line with just
4819              
4820             sep=;
4821              
4822             will be recognized and honored when parsing with L</getline>.
4823              
4824             =head1 TODO
4825              
4826             =over 2
4827              
4828             =item More Errors & Warnings
4829              
4830             New extensions ought to be clear and concise in reporting what error has
4831             occurred where and why, and maybe also offer a remedy to the problem.
4832              
4833             L</error_diag> is a (very) good start, but there is more work to be done in
4834             this area.
4835              
4836             Basic calls should croak or warn on illegal parameters. Errors should be
4837             documented.
4838              
4839             =item setting meta info
4840              
4841             Future extensions might include extending the L</meta_info>, L</is_quoted>,
4842             and L</is_binary> to accept setting these flags for fields, so you can
4843             specify which fields are quoted in the L</combine>/L</string> combination.
4844              
4845             $csv->meta_info (0, 1, 1, 3, 0, 0);
4846             $csv->is_quoted (3, 1);
4847              
4848             L<Metadata Vocabulary for Tabular Data|http://w3c.github.io/csvw/metadata/>
4849             (a W3C editor's draft) could be an example for supporting more metadata.
4850              
4851             =item Parse the whole file at once
4852              
4853             Implement new methods or functions that enable parsing of a complete file
4854             at once, returning a list of hashes. Possible extension to this could be to
4855             enable a column selection on the call:
4856              
4857             my @AoH = $csv->parse_file ($filename, { cols => [ 1, 4..8, 12 ]});
4858              
4859             returning something like
4860              
4861             [ { fields => [ 1, 2, "foo", 4.5, undef, "", 8 ],
4862             flags => [ ... ],
4863             },
4864             { fields => [ ... ],
4865             .
4866             },
4867             ]
4868              
4869             Note that the L</csv> function already supports most of this, but does not
4870             return flags. L</getline_all> returns all rows for an open stream, but this
4871             will not return flags either. L</fragment> can reduce the required rows
4872             I<or> columns, but cannot combine them.
4873              
4874             =item provider
4875              
4876             csv (in => $fh) vs csv (provider => sub { get_line });
4877              
4878             Whatever the attribute name might end up to be, this should make it easier
4879             to add input providers for parsing. Currently most special variations for
4880             the C<in> attribute are aimed at CSV generation: e.g. a callback is defined
4881             to return a reference to a record. This new attribute should enable passing
4882             data to parse, like getline.
4883              
4884             Suggested by Johan Vromans.
4885              
4886             =item Cookbook
4887              
4888             Write a document that has recipes for most known non-standard (and maybe
4889             some standard) C<CSV> formats, including formats that use C<TAB>, C<;>,
4890             C<|>, or other non-comma separators.
4891              
4892             Examples could be taken from W3C's L<CSV on the Web: Use Cases and
4893             Requirements|http://w3c.github.io/csvw/use-cases-and-requirements/index.html>
4894              
4895             =item Steal
4896              
4897             Steal good new ideas and features from L<PapaParse|http://papaparse.com> or
4898             L<csvkit|http://csvkit.readthedocs.org>.
4899              
4900             =item Raku support
4901              
4902             Raku support can be found L<here|https://github.com/Tux/CSV>. The interface
4903             is richer in support than the Perl5 API, as Raku supports more types.
4904              
4905             The Raku version does not (yet) support pure binary CSV datasets.
4906              
4907             =back
4908              
4909             =head2 NOT TODO
4910              
4911             =over 2
4912              
4913             =item combined methods
4914              
4915             Requests for adding means (methods) that combine L</combine> and L</string>
4916             in a single call will B<not> be honored (use L</print> instead). Likewise
4917             for L</parse> and L</fields> (use L</getline> instead), given the problems
4918             with embedded newlines.
4919              
4920             =back
4921              
4922             =head2 Release plan
4923              
4924             No guarantees, but this is what I had in mind some time ago:
4925              
4926             =over 2
4927              
4928             =item *
4929              
4930             DIAGNOSTICS section in pod to *describe* the errors (see below)
4931              
4932             =back
4933              
4934             =head1 EBCDIC
4935              
4936             Everything should now work on native EBCDIC systems. As the test does not
4937             cover all possible codepoints and L<Encode> does not support C<utf-ebcdic>,
4938             there is no guarantee that all handling of Unicode is done correct.
4939              
4940             Opening C<EBCDIC> encoded files on C<ASCII>+ systems is likely to succeed
4941             using Encode's C<cp37>, C<cp1047>, or C<posix-bc>:
4942              
4943             open my $fh, "<:encoding(cp1047)", "ebcdic_file.csv" or die "...";
4944              
4945             =head1 DIAGNOSTICS
4946              
4947             Still under construction ...
4948              
4949             If an error occurs, C<< $csv->error_diag >> can be used to get information
4950             on the cause of the failure. Note that for speed reasons the internal value
4951             is never cleared on success, so using the value returned by L</error_diag>
4952             in normal cases - when no error occurred - may cause unexpected results.
4953              
4954             If the constructor failed, the cause can be found using L</error_diag> as a
4955             class method, like C<< Text::CSV_XS->error_diag >>.
4956              
4957             The C<< $csv->error_diag >> method is automatically invoked upon error when
4958             the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or
4959             C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a
4960             C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 -
4961             EOF> is excluded from L<C<auto_diag>|/auto_diag> reports.
4962              
4963             Errors can be (individually) caught using the L</error> callback.
4964              
4965             The errors as described below are available. I have tried to make the error
4966             itself explanatory enough, but more descriptions will be added. For most of
4967             these errors, the first three capitals describe the error category:
4968              
4969             =over 2
4970              
4971             =item *
4972             INI
4973              
4974             Initialization error or option conflict.
4975              
4976             =item *
4977             ECR
4978              
4979             Carriage-Return related parse error.
4980              
4981             =item *
4982             EOF
4983              
4984             End-Of-File related parse error.
4985              
4986             =item *
4987             EIQ
4988              
4989             Parse error inside quotation.
4990              
4991             =item *
4992             EIF
4993              
4994             Parse error inside field.
4995              
4996             =item *
4997             ECB
4998              
4999             Combine error.
5000              
5001             =item *
5002             EHR
5003              
5004             HashRef parse related error.
5005              
5006             =back
5007              
5008             And below should be the complete list of error codes that can be returned:
5009              
5010             =over 2
5011              
5012             =item *
5013             1001 "INI - sep_char is equal to quote_char or escape_char"
5014             X<1001>
5015              
5016             The L<separation character|/sep_char> cannot be equal to L<the quotation
5017             character|/quote_char> or to L<the escape character|/escape_char>, as this
5018             would invalidate all parsing rules.
5019              
5020             =item *
5021             1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
5022             X<1002>
5023              
5024             Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either
5025             L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to
5026             C<SPACE> or C<TAB> is too ambiguous to allow.
5027              
5028             =item *
5029             1003 "INI - \r or \n in main attr not allowed"
5030             X<1003>
5031              
5032             Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>,
5033             L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not
5034             allowed.
5035              
5036             =item *
5037             1004 "INI - callbacks should be undef or a hashref"
5038             X<1004>
5039              
5040             The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or
5041             a hash reference.
5042              
5043             =item *
5044             1005 "INI - EOL too long"
5045             X<1005>
5046              
5047             The value passed for EOL is exceeding its maximum length (16).
5048              
5049             =item *
5050             1006 "INI - SEP too long"
5051             X<1006>
5052              
5053             The value passed for SEP is exceeding its maximum length (16).
5054              
5055             =item *
5056             1007 "INI - QUOTE too long"
5057             X<1007>
5058              
5059             The value passed for QUOTE is exceeding its maximum length (16).
5060              
5061             =item *
5062             1008 "INI - SEP undefined"
5063             X<1008>
5064              
5065             The value passed for SEP should be defined and not empty.
5066              
5067             =item *
5068             1010 "INI - the header is empty"
5069             X<1010>
5070              
5071             The header line parsed in the L</header> is empty.
5072              
5073             =item *
5074             1011 "INI - the header contains more than one valid separator"
5075             X<1011>
5076              
5077             The header line parsed in the L</header> contains more than one (unique)
5078             separator character out of the allowed set of separators.
5079              
5080             =item *
5081             1012 "INI - the header contains an empty field"
5082             X<1012>
5083              
5084             The header line parsed in the L</header> contains an empty field.
5085              
5086             =item *
5087             1013 "INI - the header contains non-unique fields"
5088             X<1013>
5089              
5090             The header line parsed in the L</header> contains at least two identical
5091             fields.
5092              
5093             =item *
5094             1014 "INI - header called on undefined stream"
5095             X<1014>
5096              
5097             The header line cannot be parsed from an undefined source.
5098              
5099             =item *
5100             1500 "PRM - Invalid/unsupported argument(s)"
5101             X<1500>
5102              
5103             Function or method called with invalid argument(s) or parameter(s).
5104              
5105             =item *
5106             1501 "PRM - The key attribute is passed as an unsupported type"
5107             X<1501>
5108              
5109             The C<key> attribute is of an unsupported type.
5110              
5111             =item *
5112             1502 "PRM - The value attribute is passed without the key attribute"
5113             X<1502>
5114              
5115             The C<value> attribute is only allowed when a valid key is given.
5116              
5117             =item *
5118             1503 "PRM - The value attribute is passed as an unsupported type"
5119             X<1503>
5120              
5121             The C<value> attribute is of an unsupported type.
5122              
5123             =item *
5124             2010 "ECR - QUO char inside quotes followed by CR not part of EOL"
5125             X<2010>
5126              
5127             When L<C<eol>|/eol> has been set to anything but the default, like
5128             C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing)
5129             L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do
5130             not make up the L<C<eol>|/eol> sequence, this is an error.
5131              
5132             =item *
5133             2011 "ECR - Characters after end of quoted field"
5134             X<2011>
5135              
5136             Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
5137             field and after the closing double-quote, there should be either a new-line
5138             sequence or a separation character.
5139              
5140             =item *
5141             2012 "EOF - End of data in parsing input stream"
5142             X<2012>
5143              
5144             Self-explaining. End-of-file while inside parsing a stream. Can happen only
5145             when reading from streams with L</getline>, as using L</parse> is done on
5146             strings that are not required to have a trailing L<C<eol>|/eol>.
5147              
5148             =item *
5149             2013 "INI - Specification error for fragments RFC7111"
5150             X<2013>
5151              
5152             Invalid specification for URI L</fragment> specification.
5153              
5154             =item *
5155             2014 "ENF - Inconsistent number of fields"
5156             X<2014>
5157              
5158             Inconsistent number of fields under strict parsing.
5159              
5160             =item *
5161             2015 "ERW - Empty row"
5162             X<2015>
5163              
5164             An empty row was not allowed.
5165              
5166             =item *
5167             2016 "EOL - Inconsistent EOL"
5168             X<2016>
5169              
5170             Inconsistent End-Of-Line detected under strict_eol parsing.
5171              
5172             =item *
5173             2021 "EIQ - NL char inside quotes, binary off"
5174             X<2021>
5175              
5176             Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
5177             has been selected with the constructor.
5178              
5179             =item *
5180             2022 "EIQ - CR char inside quotes, binary off"
5181             X<2022>
5182              
5183             Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
5184             has been selected with the constructor.
5185              
5186             =item *
5187             2023 "EIQ - QUO character not allowed"
5188             X<2023>
5189              
5190             Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
5191             will cause this error.
5192              
5193             =item *
5194             2024 "EIQ - EOF cannot be escaped, not even inside quotes"
5195             X<2024>
5196              
5197             The escape character is not allowed as last character in an input stream.
5198              
5199             =item *
5200             2025 "EIQ - Loose unescaped escape"
5201             X<2025>
5202              
5203             An escape character should escape only characters that need escaping.
5204              
5205             Allowing the escape for other characters is possible with the attribute
5206             L</allow_loose_escapes>.
5207              
5208             =item *
5209             2026 "EIQ - Binary character inside quoted field, binary off"
5210             X<2026>
5211              
5212             Binary characters are not allowed by default. Exceptions are fields that
5213             contain valid UTF-8, that will automatically be upgraded if the content is
5214             valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data.
5215              
5216             =item *
5217             2027 "EIQ - Quoted field not terminated"
5218             X<2027>
5219              
5220             When parsing a field that started with a quotation character, the field is
5221             expected to be closed with a quotation character. When the parsed line is
5222             exhausted before the quote is found, that field is not terminated.
5223              
5224             =item *
5225             2030 "EIF - NL char inside unquoted verbatim, binary off"
5226             X<2030>
5227              
5228             =item *
5229             2031 "EIF - CR char is first char of field, not part of EOL"
5230             X<2031>
5231              
5232             =item *
5233             2032 "EIF - CR char inside unquoted, not part of EOL"
5234             X<2032>
5235              
5236             =item *
5237             2034 "EIF - Loose unescaped quote"
5238             X<2034>
5239              
5240             =item *
5241             2035 "EIF - Escaped EOF in unquoted field"
5242             X<2035>
5243              
5244             =item *
5245             2036 "EIF - ESC error"
5246             X<2036>
5247              
5248             =item *
5249             2037 "EIF - Binary character in unquoted field, binary off"
5250             X<2037>
5251              
5252             =item *
5253             2110 "ECB - Binary character in Combine, binary off"
5254             X<2110>
5255              
5256             =item *
5257             2200 "EIO - print to IO failed. See errno"
5258             X<2200>
5259              
5260             =item *
5261             3001 "EHR - Unsupported syntax for column_names ()"
5262             X<3001>
5263              
5264             =item *
5265             3002 "EHR - getline_hr () called before column_names ()"
5266             X<3002>
5267              
5268             =item *
5269             3003 "EHR - bind_columns () and column_names () fields count mismatch"
5270             X<3003>
5271              
5272             =item *
5273             3004 "EHR - bind_columns () only accepts refs to scalars"
5274             X<3004>
5275              
5276             =item *
5277             3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
5278             X<3006>
5279              
5280             =item *
5281             3007 "EHR - bind_columns needs refs to writable scalars"
5282             X<3007>
5283              
5284             =item *
5285             3008 "EHR - unexpected error in bound fields"
5286             X<3008>
5287              
5288             =item *
5289             3009 "EHR - print_hr () called before column_names ()"
5290             X<3009>
5291              
5292             =item *
5293             3010 "EHR - print_hr () called with invalid arguments"
5294             X<3010>
5295              
5296             =back
5297              
5298             =head1 SEE ALSO
5299              
5300             L<IO::File>, L<IO::Handle>, L<IO::Wrap>, L<Text::CSV>, L<Text::CSV_PP>,
5301             L<Text::CSV::Encoded>, L<Text::CSV::Separator>, L<Text::CSV::Slurp>,
5302             L<Spreadsheet::CSV> and L<Spreadsheet::Read>, and of course L<perl>.
5303              
5304             If you are using Raku, have a look at C<Text::CSV> in the Raku ecosystem,
5305             offering the same features.
5306              
5307             A beautiful L<Love Letter|https://github.com/medialab/xan/blob/master/docs/LOVE_LETTER.md>
5308             to C<CSV> by the developers of L<xan|https://github.com/medialab/xan#readme>.
5309              
5310             =head3 non-perl
5311              
5312             A CSV parser in JavaScript, also used by L<W3C|http://www.w3.org>, is the
5313             multi-threaded in-browser L<PapaParse|http://papaparse.com/>.
5314              
5315             L<csvkit|http://csvkit.readthedocs.org> is a python CSV parsing toolkit.
5316              
5317             =head1 AUTHOR
5318              
5319             Alan Citterman F<E<lt>alan@mfgrtl.comE<gt>> wrote the original Perl module.
5320             Please don't send mail concerning Text::CSV_XS to Alan, who is not involved
5321             in the C/XS part that is now the main part of the module.
5322              
5323             Jochen Wiedmann F<E<lt>joe@ispsoft.deE<gt>> rewrote the en- and decoding in
5324             C by implementing a simple finite-state machine. He added variable quote,
5325             escape and separator characters, the binary mode and the print and getline
5326             methods. See F<ChangeLog> releases 0.10 through 0.23.
5327              
5328             H.Merijn Brand F<E<lt>hmbrand@cpan.orgE<gt>> cleaned up the code, added
5329             the field flags methods, wrote the major part of the test suite, completed
5330             the documentation, fixed most RT bugs, added all the allow flags and the
5331             L</csv> function. See ChangeLog releases 0.25 and on.
5332              
5333             =head1 COPYRIGHT AND LICENSE
5334              
5335             Copyright (C) 2007-2025 H.Merijn Brand. All rights reserved.
5336             Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
5337             Copyright (C) 1997 Alan Citterman. All rights reserved.
5338              
5339             This library is free software; you can redistribute and/or modify it under
5340             the same terms as Perl itself.
5341              
5342             =cut
5343              
5344             =for elvis
5345             :ex:se gw=75|color guide #ff0000:
5346              
5347             =cut