| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::CSV_XS; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Copyright (c) 2007-2025 H.Merijn Brand. All rights reserved. |
|
4
|
|
|
|
|
|
|
# Copyright (c) 1998-2001 Jochen Wiedmann. All rights reserved. |
|
5
|
|
|
|
|
|
|
# Copyright (c) 1997 Alan Citterman. All rights reserved. |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
|
8
|
|
|
|
|
|
|
# modify it under the same terms as Perl itself. |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# HISTORY |
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# 0.24 - H.Merijn Brand <perl5@tux.freedom.nl> |
|
13
|
|
|
|
|
|
|
# 0.10 - 0.23 Jochen Wiedmann <joe@ispsoft.de> |
|
14
|
|
|
|
|
|
|
# Based on (the original) Text::CSV by Alan Citterman <alan@mfgrtl.com> |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
require 5.006001; |
|
17
|
|
|
|
|
|
|
|
|
18
|
34
|
|
|
34
|
|
4112731
|
use strict; |
|
|
34
|
|
|
|
|
107
|
|
|
|
34
|
|
|
|
|
1587
|
|
|
19
|
34
|
|
|
34
|
|
232
|
use warnings; |
|
|
34
|
|
|
|
|
99
|
|
|
|
34
|
|
|
|
|
2639
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
require Exporter; |
|
22
|
34
|
|
|
34
|
|
245
|
use XSLoader; |
|
|
34
|
|
|
|
|
61
|
|
|
|
34
|
|
|
|
|
1019
|
|
|
23
|
34
|
|
|
34
|
|
172
|
use Carp; |
|
|
34
|
|
|
|
|
133
|
|
|
|
34
|
|
|
|
|
3176
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
34
|
|
|
34
|
|
248
|
use vars qw( $VERSION @ISA @EXPORT_OK %EXPORT_TAGS ); |
|
|
34
|
|
|
|
|
105
|
|
|
|
34
|
|
|
|
|
11151
|
|
|
26
|
|
|
|
|
|
|
$VERSION = "1.61"; |
|
27
|
|
|
|
|
|
|
@ISA = qw( Exporter ); |
|
28
|
|
|
|
|
|
|
XSLoader::load ("Text::CSV_XS", $VERSION); |
|
29
|
|
|
|
|
|
|
|
|
30
|
4
|
|
|
4
|
1
|
13
|
sub PV { 0 } sub CSV_TYPE_PV { PV } |
|
|
12
|
|
|
12
|
1
|
268176
|
|
|
31
|
4
|
|
|
4
|
1
|
16
|
sub IV { 1 } sub CSV_TYPE_IV { IV } |
|
|
12
|
|
|
12
|
1
|
229950
|
|
|
32
|
4
|
|
|
4
|
1
|
11
|
sub NV { 2 } sub CSV_TYPE_NV { NV } |
|
|
12
|
|
|
12
|
1
|
95
|
|
|
33
|
|
|
|
|
|
|
|
|
34
|
11
|
|
|
11
|
1
|
71
|
sub CSV_FLAGS_IS_QUOTED { 0x0001 } |
|
35
|
12
|
|
|
12
|
1
|
70
|
sub CSV_FLAGS_IS_BINARY { 0x0002 } |
|
36
|
4
|
|
|
4
|
1
|
26
|
sub CSV_FLAGS_ERROR_IN_FIELD { 0x0004 } |
|
37
|
20
|
|
|
20
|
1
|
80
|
sub CSV_FLAGS_IS_MISSING { 0x0010 } |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
%EXPORT_TAGS = ( |
|
40
|
|
|
|
|
|
|
CONSTANTS => [qw( |
|
41
|
|
|
|
|
|
|
CSV_FLAGS_IS_QUOTED |
|
42
|
|
|
|
|
|
|
CSV_FLAGS_IS_BINARY |
|
43
|
|
|
|
|
|
|
CSV_FLAGS_ERROR_IN_FIELD |
|
44
|
|
|
|
|
|
|
CSV_FLAGS_IS_MISSING |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
CSV_TYPE_PV |
|
47
|
|
|
|
|
|
|
CSV_TYPE_IV |
|
48
|
|
|
|
|
|
|
CSV_TYPE_NV |
|
49
|
|
|
|
|
|
|
)], |
|
50
|
|
|
|
|
|
|
); |
|
51
|
|
|
|
|
|
|
@EXPORT_OK = (qw( csv PV IV NV ), @{$EXPORT_TAGS{'CONSTANTS'}}); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
if ($] < 5.008002) { |
|
54
|
34
|
|
|
34
|
|
256
|
no warnings "redefine"; |
|
|
34
|
|
|
|
|
112
|
|
|
|
34
|
|
|
|
|
544317
|
|
|
55
|
|
|
|
|
|
|
*utf8::decode = sub {}; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# version |
|
59
|
|
|
|
|
|
|
# |
|
60
|
|
|
|
|
|
|
# class/object method expecting no arguments and returning the version |
|
61
|
|
|
|
|
|
|
# number of Text::CSV. there are no side-effects. |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub version { |
|
64
|
2
|
|
|
2
|
1
|
1012
|
return $VERSION; |
|
65
|
|
|
|
|
|
|
} # version |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# new |
|
68
|
|
|
|
|
|
|
# |
|
69
|
|
|
|
|
|
|
# class/object method expecting no arguments and returning a reference to |
|
70
|
|
|
|
|
|
|
# a newly created Text::CSV object. |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
my %def_attr = ( |
|
73
|
|
|
|
|
|
|
'eol' => '', |
|
74
|
|
|
|
|
|
|
'sep_char' => ',', |
|
75
|
|
|
|
|
|
|
'quote_char' => '"', |
|
76
|
|
|
|
|
|
|
'escape_char' => '"', |
|
77
|
|
|
|
|
|
|
'binary' => 0, |
|
78
|
|
|
|
|
|
|
'decode_utf8' => 1, |
|
79
|
|
|
|
|
|
|
'auto_diag' => 0, |
|
80
|
|
|
|
|
|
|
'diag_verbose' => 0, |
|
81
|
|
|
|
|
|
|
'strict' => 0, |
|
82
|
|
|
|
|
|
|
'strict_eol' => 0, |
|
83
|
|
|
|
|
|
|
'blank_is_undef' => 0, |
|
84
|
|
|
|
|
|
|
'empty_is_undef' => 0, |
|
85
|
|
|
|
|
|
|
'allow_whitespace' => 0, |
|
86
|
|
|
|
|
|
|
'allow_loose_quotes' => 0, |
|
87
|
|
|
|
|
|
|
'allow_loose_escapes' => 0, |
|
88
|
|
|
|
|
|
|
'allow_unquoted_escape' => 0, |
|
89
|
|
|
|
|
|
|
'always_quote' => 0, |
|
90
|
|
|
|
|
|
|
'quote_empty' => 0, |
|
91
|
|
|
|
|
|
|
'quote_space' => 1, |
|
92
|
|
|
|
|
|
|
'quote_binary' => 1, |
|
93
|
|
|
|
|
|
|
'escape_null' => 1, |
|
94
|
|
|
|
|
|
|
'keep_meta_info' => 0, |
|
95
|
|
|
|
|
|
|
'verbatim' => 0, |
|
96
|
|
|
|
|
|
|
'formula' => 0, |
|
97
|
|
|
|
|
|
|
'skip_empty_rows' => 0, |
|
98
|
|
|
|
|
|
|
'undef_str' => undef, |
|
99
|
|
|
|
|
|
|
'comment_str' => undef, |
|
100
|
|
|
|
|
|
|
'types' => undef, |
|
101
|
|
|
|
|
|
|
'callbacks' => undef, |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
'_EOF' => "", |
|
104
|
|
|
|
|
|
|
'_RECNO' => 0, |
|
105
|
|
|
|
|
|
|
'_STATUS' => undef, |
|
106
|
|
|
|
|
|
|
'_FIELDS' => undef, |
|
107
|
|
|
|
|
|
|
'_FFLAGS' => undef, |
|
108
|
|
|
|
|
|
|
'_STRING' => undef, |
|
109
|
|
|
|
|
|
|
'_ERROR_INPUT' => undef, |
|
110
|
|
|
|
|
|
|
'_COLUMN_NAMES' => undef, |
|
111
|
|
|
|
|
|
|
'_BOUND_COLUMNS' => undef, |
|
112
|
|
|
|
|
|
|
'_AHEAD' => undef, |
|
113
|
|
|
|
|
|
|
'_FORMULA_CB' => undef, |
|
114
|
|
|
|
|
|
|
'_EMPTROW_CB' => undef, |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
'ENCODING' => undef, |
|
117
|
|
|
|
|
|
|
); |
|
118
|
|
|
|
|
|
|
my %attr_alias = ( |
|
119
|
|
|
|
|
|
|
'quote_always' => "always_quote", |
|
120
|
|
|
|
|
|
|
'verbose_diag' => "diag_verbose", |
|
121
|
|
|
|
|
|
|
'quote_null' => "escape_null", |
|
122
|
|
|
|
|
|
|
'escape' => "escape_char", |
|
123
|
|
|
|
|
|
|
'comment' => "comment_str", |
|
124
|
|
|
|
|
|
|
); |
|
125
|
|
|
|
|
|
|
my $last_err = Text::CSV_XS->SetDiag (0); |
|
126
|
|
|
|
|
|
|
my $ebcdic = ord ("A") == 0xC1; # Faster than $Config{'ebcdic'} |
|
127
|
|
|
|
|
|
|
my @internal_kh; |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# NOT a method: is also used before bless |
|
130
|
|
|
|
|
|
|
sub _unhealthy_whitespace { |
|
131
|
15734
|
|
|
15734
|
|
35355
|
my ($self, $aw) = @_; |
|
132
|
15734
|
100
|
|
|
|
54426
|
$aw or return 0; # no checks needed without allow_whitespace |
|
133
|
|
|
|
|
|
|
|
|
134
|
3569
|
|
|
|
|
7710
|
my $quo = $self->{'quote'}; |
|
135
|
3569
|
100
|
100
|
|
|
13703
|
defined $quo && length ($quo) or $quo = $self->{'quote_char'}; |
|
136
|
3569
|
|
|
|
|
7341
|
my $esc = $self->{'escape_char'}; |
|
137
|
|
|
|
|
|
|
|
|
138
|
3569
|
100
|
100
|
|
|
68167
|
defined $quo && $quo =~ m/^[ \t]/ and return 1002; |
|
139
|
3327
|
100
|
100
|
|
|
75399
|
defined $esc && $esc =~ m/^[ \t]/ and return 1002; |
|
140
|
|
|
|
|
|
|
|
|
141
|
3037
|
|
|
|
|
9374
|
return 0; |
|
142
|
|
|
|
|
|
|
} # _unhealty_whitespace |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub _check_sanity { |
|
145
|
12428
|
|
|
12428
|
|
20030
|
my $self = shift; |
|
146
|
|
|
|
|
|
|
|
|
147
|
12428
|
|
|
|
|
23496
|
my $eol = $self->{'eol'}; |
|
148
|
12428
|
|
|
|
|
21635
|
my $sep = $self->{'sep'}; |
|
149
|
12428
|
100
|
100
|
|
|
48530
|
defined $sep && length ($sep) or $sep = $self->{'sep_char'}; |
|
150
|
12428
|
|
|
|
|
20744
|
my $quo = $self->{'quote'}; |
|
151
|
12428
|
100
|
100
|
|
|
40755
|
defined $quo && length ($quo) or $quo = $self->{'quote_char'}; |
|
152
|
12428
|
|
|
|
|
23591
|
my $esc = $self->{'escape_char'}; |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# use DP;::diag ("SEP: '", DPeek ($sep), |
|
155
|
|
|
|
|
|
|
# "', QUO: '", DPeek ($quo), |
|
156
|
|
|
|
|
|
|
# "', ESC: '", DPeek ($esc),"'"); |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# sep_char should not be undefined |
|
159
|
12428
|
100
|
|
|
|
33562
|
$sep ne "" or return 1008; |
|
160
|
12426
|
100
|
|
|
|
28359
|
length ($sep) > 16 and return 1006; |
|
161
|
12425
|
100
|
|
|
|
42665
|
$sep =~ m/[\r\n]/ and return 1003; |
|
162
|
|
|
|
|
|
|
|
|
163
|
12419
|
100
|
|
|
|
24998
|
if (defined $quo) { |
|
164
|
12409
|
100
|
|
|
|
87299
|
$quo eq $sep and return 1001; |
|
165
|
12181
|
100
|
|
|
|
26538
|
length ($quo) > 16 and return 1007; |
|
166
|
12180
|
100
|
|
|
|
29343
|
$quo =~ m/[\r\n]/ and return 1003; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
12184
|
100
|
|
|
|
23751
|
if (defined $esc) { |
|
169
|
12168
|
100
|
|
|
|
67722
|
$esc eq $sep and return 1001; |
|
170
|
12000
|
100
|
|
|
|
29671
|
$esc =~ m/[\r\n]/ and return 1003; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
12010
|
100
|
|
|
|
23053
|
if (defined $eol) { |
|
173
|
12005
|
100
|
|
|
|
23198
|
length ($eol) > 16 and return 1005; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
12009
|
|
|
|
|
33864
|
return _unhealthy_whitespace ($self, $self->{'allow_whitespace'}); |
|
177
|
|
|
|
|
|
|
} # _check_sanity |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub known_attributes { |
|
180
|
3
|
|
|
3
|
1
|
777
|
sort grep !m/^_/ => "sep", "quote", keys %def_attr; |
|
181
|
|
|
|
|
|
|
} # known_attributes |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub new { |
|
184
|
1004
|
|
|
1004
|
1
|
72071138
|
$last_err = Text::CSV_XS->SetDiag (1000, |
|
185
|
|
|
|
|
|
|
"usage: my \$csv = Text::CSV_XS->new ([{ option => value, ... }]);"); |
|
186
|
|
|
|
|
|
|
|
|
187
|
1004
|
|
|
|
|
2541
|
my $proto = shift; |
|
188
|
1004
|
100
|
100
|
|
|
6178
|
my $class = ref $proto || $proto or return; |
|
189
|
1003
|
100
|
100
|
|
|
5908
|
@_ > 0 && ref $_[0] ne "HASH" and return; |
|
190
|
995
|
|
100
|
|
|
2832
|
my $attr = shift || {}; |
|
191
|
|
|
|
|
|
|
my %attr = map { |
|
192
|
2756
|
100
|
|
|
|
11666
|
my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_; |
|
193
|
2756
|
100
|
|
|
|
6666
|
exists $attr_alias{$k} and $k = $attr_alias{$k}; |
|
194
|
2756
|
|
|
|
|
8807
|
($k => $attr->{$_}); |
|
195
|
995
|
|
|
|
|
1748
|
} keys %{$attr}; |
|
|
995
|
|
|
|
|
3728
|
|
|
196
|
|
|
|
|
|
|
|
|
197
|
995
|
|
|
|
|
3410
|
my $sep_aliased = 0; |
|
198
|
995
|
100
|
|
|
|
2775
|
if (exists $attr{'sep'}) { |
|
199
|
10
|
|
|
|
|
35
|
$attr{'sep_char'} = delete $attr{'sep'}; |
|
200
|
10
|
|
|
|
|
28
|
$sep_aliased = 1; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
995
|
|
|
|
|
1714
|
my $quote_aliased = 0; |
|
203
|
995
|
100
|
|
|
|
2476
|
if (exists $attr{'quote'}) { |
|
204
|
25
|
|
|
|
|
80
|
$attr{'quote_char'} = delete $attr{'quote'}; |
|
205
|
25
|
|
|
|
|
44
|
$quote_aliased = 1; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
exists $attr{'formula_handling'} and |
|
208
|
995
|
100
|
|
|
|
2354
|
$attr{'formula'} = delete $attr{'formula_handling'}; |
|
209
|
995
|
|
|
|
|
1874
|
my $attr_formula = delete $attr{'formula'}; |
|
210
|
|
|
|
|
|
|
|
|
211
|
995
|
|
|
|
|
2724
|
for (keys %attr) { |
|
212
|
2720
|
100
|
100
|
|
|
10748
|
if (m/^[a-z]/ && exists $def_attr{$_}) { |
|
213
|
|
|
|
|
|
|
# uncoverable condition false |
|
214
|
2713
|
100
|
100
|
|
|
9899
|
defined $attr{$_} && m/_char$/ and utf8::decode ($attr{$_}); |
|
215
|
2713
|
|
|
|
|
4942
|
next; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
# croak? |
|
218
|
7
|
|
|
|
|
40
|
$last_err = Text::CSV_XS->SetDiag (1000, "INI - Unknown attribute '$_'"); |
|
219
|
7
|
100
|
|
|
|
27
|
$attr{'auto_diag'} and error_diag (); |
|
220
|
7
|
|
|
|
|
48
|
return; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
988
|
100
|
|
|
|
2581
|
if ($sep_aliased) { |
|
223
|
10
|
|
|
|
|
59
|
my @b = unpack "U0C*", $attr{'sep_char'}; |
|
224
|
10
|
100
|
|
|
|
40
|
if (@b > 1) { |
|
225
|
6
|
|
|
|
|
19
|
$attr{'sep'} = $attr{'sep_char'}; |
|
226
|
6
|
|
|
|
|
18
|
$attr{'sep_char'} = "\0"; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
else { |
|
229
|
4
|
|
|
|
|
13
|
$attr{'sep'} = undef; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
} |
|
232
|
988
|
100
|
100
|
|
|
2608
|
if ($quote_aliased and defined $attr{'quote_char'}) { |
|
233
|
21
|
|
|
|
|
84
|
my @b = unpack "U0C*", $attr{'quote_char'}; |
|
234
|
21
|
100
|
|
|
|
55
|
if (@b > 1) { |
|
235
|
7
|
|
|
|
|
21
|
$attr{'quote'} = $attr{'quote_char'}; |
|
236
|
7
|
|
|
|
|
21
|
$attr{'quote_char'} = "\0"; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
else { |
|
239
|
14
|
|
|
|
|
31
|
$attr{'quote'} = undef; |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
988
|
|
|
|
|
24669
|
my $self = { %def_attr, %attr }; |
|
244
|
988
|
100
|
|
|
|
5100
|
if (my $ec = _check_sanity ($self)) { |
|
245
|
35
|
|
|
|
|
142
|
$last_err = Text::CSV_XS->SetDiag ($ec); |
|
246
|
35
|
100
|
|
|
|
82
|
$attr{'auto_diag'} and error_diag (); |
|
247
|
35
|
|
|
|
|
291
|
return; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
953
|
100
|
100
|
|
|
3618
|
if (defined $self->{'callbacks'} && ref $self->{'callbacks'} ne "HASH") { |
|
250
|
6
|
|
|
|
|
1069
|
carp ("The 'callbacks' attribute is set but is not a hash: ignored\n"); |
|
251
|
6
|
|
|
|
|
86
|
$self->{'callbacks'} = undef; |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
|
|
254
|
953
|
|
|
|
|
4771
|
$last_err = Text::CSV_XS->SetDiag (0); |
|
255
|
953
|
100
|
100
|
|
|
3451
|
defined $\ && !exists $attr{'eol'} and $self->{'eol'} = $\; |
|
256
|
953
|
|
|
|
|
2210
|
bless $self, $class; |
|
257
|
953
|
100
|
|
|
|
2632
|
defined $self->{'types'} and $self->types ($self->{'types'}); |
|
258
|
953
|
50
|
|
|
|
3708
|
defined $self->{'skip_empty_rows'} and $self->{'skip_empty_rows'} = _supported_skip_empty_rows ($self, $self->{'skip_empty_rows'}); |
|
259
|
953
|
100
|
|
|
|
2420
|
defined $attr_formula and $self->{'formula'} = _supported_formula ($self, $attr_formula); |
|
260
|
952
|
|
|
|
|
7356
|
$self; |
|
261
|
|
|
|
|
|
|
} # new |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Keep in sync with XS! |
|
264
|
|
|
|
|
|
|
my %_cache_id = ( # Only expose what is accessed from within PM |
|
265
|
|
|
|
|
|
|
'quote_char' => 0, |
|
266
|
|
|
|
|
|
|
'escape_char' => 1, |
|
267
|
|
|
|
|
|
|
'sep_char' => 2, |
|
268
|
|
|
|
|
|
|
'always_quote' => 4, |
|
269
|
|
|
|
|
|
|
'quote_empty' => 5, |
|
270
|
|
|
|
|
|
|
'quote_space' => 6, |
|
271
|
|
|
|
|
|
|
'quote_binary' => 7, |
|
272
|
|
|
|
|
|
|
'allow_loose_quotes' => 8, |
|
273
|
|
|
|
|
|
|
'allow_loose_escapes' => 9, |
|
274
|
|
|
|
|
|
|
'allow_unquoted_escape' => 10, |
|
275
|
|
|
|
|
|
|
'allow_whitespace' => 11, |
|
276
|
|
|
|
|
|
|
'blank_is_undef' => 12, |
|
277
|
|
|
|
|
|
|
'empty_is_undef' => 13, |
|
278
|
|
|
|
|
|
|
'auto_diag' => 14, |
|
279
|
|
|
|
|
|
|
'diag_verbose' => 15, |
|
280
|
|
|
|
|
|
|
'escape_null' => 16, |
|
281
|
|
|
|
|
|
|
'formula' => 18, |
|
282
|
|
|
|
|
|
|
'decode_utf8' => 21, |
|
283
|
|
|
|
|
|
|
'verbatim' => 23, |
|
284
|
|
|
|
|
|
|
'strict_eol' => 24, |
|
285
|
|
|
|
|
|
|
'strict' => 28, |
|
286
|
|
|
|
|
|
|
'skip_empty_rows' => 29, |
|
287
|
|
|
|
|
|
|
'binary' => 30, |
|
288
|
|
|
|
|
|
|
'keep_meta_info' => 31, |
|
289
|
|
|
|
|
|
|
'_has_hooks' => 32, |
|
290
|
|
|
|
|
|
|
'_has_ahead' => 33, |
|
291
|
|
|
|
|
|
|
'_is_bound' => 44, |
|
292
|
|
|
|
|
|
|
'eol' => 100, |
|
293
|
|
|
|
|
|
|
'sep' => 116, |
|
294
|
|
|
|
|
|
|
'quote' => 132, |
|
295
|
|
|
|
|
|
|
'undef_str' => 148, |
|
296
|
|
|
|
|
|
|
'comment_str' => 156, |
|
297
|
|
|
|
|
|
|
'types' => 92, |
|
298
|
|
|
|
|
|
|
); |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# A `character' |
|
301
|
|
|
|
|
|
|
sub _set_attr_C { |
|
302
|
11108
|
|
|
11108
|
|
31027
|
my ($self, $name, $val, $ec) = @_; |
|
303
|
11108
|
100
|
|
|
|
50264
|
defined $val and utf8::decode ($val); |
|
304
|
11108
|
|
|
|
|
31667
|
$self->{$name} = $val; |
|
305
|
11108
|
100
|
|
|
|
27351
|
$ec = _check_sanity ($self) and croak ($self->SetDiag ($ec)); |
|
306
|
10198
|
|
|
|
|
50313
|
$self->_cache_set ($_cache_id{$name}, $val); |
|
307
|
|
|
|
|
|
|
} # _set_attr_C |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# A flag |
|
310
|
|
|
|
|
|
|
sub _set_attr_X { |
|
311
|
5643
|
|
|
5643
|
|
15964
|
my ($self, $name, $val) = @_; |
|
312
|
5643
|
100
|
|
|
|
13887
|
defined $val or $val = 0; |
|
313
|
5643
|
|
|
|
|
14364
|
$self->{$name} = $val; |
|
314
|
5643
|
|
|
|
|
35192
|
$self->_cache_set ($_cache_id{$name}, 0 + $val); |
|
315
|
|
|
|
|
|
|
} # _set_attr_X |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# A number |
|
318
|
|
|
|
|
|
|
sub _set_attr_N { |
|
319
|
68
|
|
|
68
|
|
151
|
my ($self, $name, $val) = @_; |
|
320
|
68
|
|
|
|
|
131
|
$self->{$name} = $val; |
|
321
|
68
|
|
|
|
|
383
|
$self->_cache_set ($_cache_id{$name}, 0 + $val); |
|
322
|
|
|
|
|
|
|
} # _set_attr_N |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Accessor methods. |
|
325
|
|
|
|
|
|
|
# It is unwise to change them halfway through a single file! |
|
326
|
|
|
|
|
|
|
sub quote_char { |
|
327
|
4836
|
|
|
4836
|
1
|
1095536
|
my $self = shift; |
|
328
|
4836
|
100
|
|
|
|
14291
|
if (@_) { |
|
329
|
3601
|
|
|
|
|
10428
|
$self->_set_attr_C ("quote_char", shift); |
|
330
|
3374
|
|
|
|
|
10708
|
$self->_cache_set ($_cache_id{'quote'}, ""); |
|
331
|
|
|
|
|
|
|
} |
|
332
|
4609
|
|
|
|
|
18990
|
$self->{'quote_char'}; |
|
333
|
|
|
|
|
|
|
} # quote_char |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
sub quote { |
|
336
|
20
|
|
|
20
|
1
|
50
|
my $self = shift; |
|
337
|
20
|
100
|
|
|
|
72
|
if (@_) { |
|
338
|
11
|
|
|
|
|
19
|
my $quote = shift; |
|
339
|
11
|
100
|
|
|
|
30
|
defined $quote or $quote = ""; |
|
340
|
11
|
|
|
|
|
38
|
utf8::decode ($quote); |
|
341
|
11
|
|
|
|
|
47
|
my @b = unpack "U0C*", $quote; |
|
342
|
11
|
100
|
|
|
|
32
|
if (@b > 1) { |
|
343
|
5
|
100
|
|
|
|
141
|
@b > 16 and croak ($self->SetDiag (1007)); |
|
344
|
4
|
|
|
|
|
13
|
$self->quote_char ("\0"); |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
else { |
|
347
|
6
|
|
|
|
|
18
|
$self->quote_char ($quote); |
|
348
|
6
|
|
|
|
|
10
|
$quote = ""; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
10
|
|
|
|
|
26
|
$self->{'quote'} = $quote; |
|
351
|
|
|
|
|
|
|
|
|
352
|
10
|
|
|
|
|
20
|
my $ec = _check_sanity ($self); |
|
353
|
10
|
100
|
|
|
|
148
|
$ec and croak ($self->SetDiag ($ec)); |
|
354
|
|
|
|
|
|
|
|
|
355
|
9
|
|
|
|
|
26
|
$self->_cache_set ($_cache_id{'quote'}, $quote); |
|
356
|
|
|
|
|
|
|
} |
|
357
|
18
|
|
|
|
|
40
|
my $quote = $self->{'quote'}; |
|
358
|
18
|
100
|
100
|
|
|
220
|
defined $quote && length ($quote) ? $quote : $self->{'quote_char'}; |
|
359
|
|
|
|
|
|
|
} # quote |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub escape_char { |
|
362
|
4826
|
|
|
4826
|
1
|
1117206
|
my $self = shift; |
|
363
|
4826
|
100
|
|
|
|
15787
|
if (@_) { |
|
364
|
3595
|
|
|
|
|
7700
|
my $ec = shift; |
|
365
|
3595
|
|
|
|
|
11114
|
$self->_set_attr_C ("escape_char", $ec); |
|
366
|
3480
|
100
|
|
|
|
8618
|
$ec or $self->_set_attr_X ("escape_null", 0); |
|
367
|
|
|
|
|
|
|
} |
|
368
|
4711
|
|
|
|
|
25297
|
$self->{'escape_char'}; |
|
369
|
|
|
|
|
|
|
} # escape_char |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub sep_char { |
|
372
|
5155
|
|
|
5155
|
1
|
1100887
|
my $self = shift; |
|
373
|
5155
|
100
|
|
|
|
17080
|
if (@_) { |
|
374
|
3912
|
|
|
|
|
12297
|
$self->_set_attr_C ("sep_char", shift); |
|
375
|
3344
|
|
|
|
|
11473
|
$self->_cache_set ($_cache_id{'sep'}, ""); |
|
376
|
|
|
|
|
|
|
} |
|
377
|
4587
|
|
|
|
|
21092
|
$self->{'sep_char'}; |
|
378
|
|
|
|
|
|
|
} # sep_char |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub sep { |
|
381
|
359
|
|
|
359
|
1
|
7348
|
my $self = shift; |
|
382
|
359
|
100
|
|
|
|
881
|
if (@_) { |
|
383
|
326
|
|
|
|
|
696
|
my $sep = shift; |
|
384
|
326
|
100
|
|
|
|
661
|
defined $sep or $sep = ""; |
|
385
|
326
|
|
|
|
|
1242
|
utf8::decode ($sep); |
|
386
|
326
|
|
|
|
|
1354
|
my @b = unpack "U0C*", $sep; |
|
387
|
326
|
100
|
|
|
|
916
|
if (@b > 1) { |
|
388
|
13
|
100
|
|
|
|
164
|
@b > 16 and croak ($self->SetDiag (1006)); |
|
389
|
12
|
|
|
|
|
41
|
$self->sep_char ("\0"); |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
else { |
|
392
|
313
|
|
|
|
|
953
|
$self->sep_char ($sep); |
|
393
|
310
|
|
|
|
|
497
|
$sep = ""; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
322
|
|
|
|
|
742
|
$self->{'sep'} = $sep; |
|
396
|
|
|
|
|
|
|
|
|
397
|
322
|
|
|
|
|
774
|
my $ec = _check_sanity ($self); |
|
398
|
322
|
100
|
|
|
|
816
|
$ec and croak ($self->SetDiag ($ec)); |
|
399
|
|
|
|
|
|
|
|
|
400
|
321
|
|
|
|
|
1141
|
$self->_cache_set ($_cache_id{'sep'}, $sep); |
|
401
|
|
|
|
|
|
|
} |
|
402
|
354
|
|
|
|
|
722
|
my $sep = $self->{'sep'}; |
|
403
|
354
|
100
|
100
|
|
|
1658
|
defined $sep && length ($sep) ? $sep : $self->{'sep_char'}; |
|
404
|
|
|
|
|
|
|
} # sep |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub eol { |
|
407
|
280
|
|
|
280
|
1
|
6829
|
my $self = shift; |
|
408
|
280
|
100
|
|
|
|
660
|
if (@_) { |
|
409
|
227
|
|
|
|
|
385
|
my $eol = shift; |
|
410
|
227
|
100
|
|
|
|
552
|
defined $eol or $eol = ""; # Also reset strict_eol? |
|
411
|
227
|
100
|
|
|
|
681
|
length ($eol) > 16 and croak ($self->SetDiag (1005)); |
|
412
|
226
|
|
|
|
|
425
|
$self->{'eol'} = $eol; |
|
413
|
226
|
|
|
|
|
790
|
$self->_cache_set ($_cache_id{'eol'}, $eol); |
|
414
|
|
|
|
|
|
|
} |
|
415
|
279
|
|
|
|
|
1621
|
$self->{'eol'}; |
|
416
|
|
|
|
|
|
|
} # eol |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub eol_type { |
|
419
|
32
|
|
|
32
|
1
|
65
|
my $self = shift; |
|
420
|
32
|
|
|
|
|
169
|
$self->_cache_get_eolt; |
|
421
|
|
|
|
|
|
|
} # eol_type |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub always_quote { |
|
424
|
3032
|
|
|
3032
|
1
|
1143717
|
my $self = shift; |
|
425
|
3032
|
100
|
|
|
|
12556
|
@_ and $self->_set_attr_X ("always_quote", shift); |
|
426
|
3032
|
|
|
|
|
10907
|
$self->{'always_quote'}; |
|
427
|
|
|
|
|
|
|
} # always_quote |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
sub quote_space { |
|
430
|
10
|
|
|
10
|
1
|
23
|
my $self = shift; |
|
431
|
10
|
100
|
|
|
|
42
|
@_ and $self->_set_attr_X ("quote_space", shift); |
|
432
|
10
|
|
|
|
|
38
|
$self->{'quote_space'}; |
|
433
|
|
|
|
|
|
|
} # quote_space |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub quote_empty { |
|
436
|
5
|
|
|
5
|
1
|
13
|
my $self = shift; |
|
437
|
5
|
100
|
|
|
|
24
|
@_ and $self->_set_attr_X ("quote_empty", shift); |
|
438
|
5
|
|
|
|
|
29
|
$self->{'quote_empty'}; |
|
439
|
|
|
|
|
|
|
} # quote_empty |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
sub escape_null { |
|
442
|
6
|
|
|
6
|
1
|
11
|
my $self = shift; |
|
443
|
6
|
100
|
|
|
|
28
|
@_ and $self->_set_attr_X ("escape_null", shift); |
|
444
|
6
|
|
|
|
|
27
|
$self->{'escape_null'}; |
|
445
|
|
|
|
|
|
|
} # escape_null |
|
446
|
3
|
|
|
3
|
1
|
14
|
sub quote_null { goto &escape_null; } |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
sub quote_binary { |
|
449
|
7
|
|
|
7
|
1
|
23
|
my $self = shift; |
|
450
|
7
|
100
|
|
|
|
38
|
@_ and $self->_set_attr_X ("quote_binary", shift); |
|
451
|
7
|
|
|
|
|
50
|
$self->{'quote_binary'}; |
|
452
|
|
|
|
|
|
|
} # quote_binary |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub binary { |
|
455
|
21
|
|
|
21
|
1
|
94566
|
my $self = shift; |
|
456
|
21
|
100
|
|
|
|
111
|
@_ and $self->_set_attr_X ("binary", shift); |
|
457
|
21
|
|
|
|
|
73
|
$self->{'binary'}; |
|
458
|
|
|
|
|
|
|
} # binary |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
sub strict { |
|
461
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
|
462
|
2
|
100
|
|
|
|
22
|
@_ and $self->_set_attr_X ("strict", shift); |
|
463
|
2
|
|
|
|
|
9
|
$self->{'strict'}; |
|
464
|
|
|
|
|
|
|
} # strict |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
sub strict_eol { |
|
467
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
468
|
2
|
100
|
|
|
|
9
|
@_ and $self->_set_attr_X ("strict_eol", shift); |
|
469
|
2
|
|
|
|
|
8
|
$self->{'strict_eol'}; |
|
470
|
|
|
|
|
|
|
} # strict_eol |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub _supported_skip_empty_rows { |
|
473
|
974
|
|
|
974
|
|
1984
|
my ($self, $f) = @_; |
|
474
|
974
|
100
|
|
|
|
2187
|
defined $f or return 0; |
|
475
|
973
|
100
|
66
|
|
|
4284
|
if ($self && $f && ref $f && ref $f eq "CODE") { |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
476
|
5
|
|
|
|
|
9
|
$self->{'_EMPTROW_CB'} = $f; |
|
477
|
5
|
|
|
|
|
12
|
return 6; |
|
478
|
|
|
|
|
|
|
} |
|
479
|
|
|
|
|
|
|
$f =~ m/^(?: 0 | undef )$/xi ? 0 : |
|
480
|
|
|
|
|
|
|
$f =~ m/^(?: 1 | skip )$/xi ? 1 : |
|
481
|
|
|
|
|
|
|
$f =~ m/^(?: 2 | eof | stop )$/xi ? 2 : |
|
482
|
|
|
|
|
|
|
$f =~ m/^(?: 3 | die )$/xi ? 3 : |
|
483
|
|
|
|
|
|
|
$f =~ m/^(?: 4 | croak )$/xi ? 4 : |
|
484
|
|
|
|
|
|
|
$f =~ m/^(?: 5 | error )$/xi ? 5 : |
|
485
|
968
|
0
|
|
|
|
5338
|
$f =~ m/^(?: 6 | cb )$/xi ? 6 : do { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
486
|
0
|
|
0
|
|
|
0
|
$self ||= "Text::CSV_XS"; |
|
487
|
0
|
|
|
|
|
0
|
croak ($self->_SetDiagInfo (1500, "skip_empty_rows '$f' is not supported")); |
|
488
|
|
|
|
|
|
|
}; |
|
489
|
|
|
|
|
|
|
} # _supported_skip_empty_rows |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
sub skip_empty_rows { |
|
492
|
23
|
|
|
23
|
1
|
51
|
my $self = shift; |
|
493
|
23
|
100
|
|
|
|
86
|
@_ and $self->_set_attr_N ("skip_empty_rows", _supported_skip_empty_rows ($self, shift)); |
|
494
|
23
|
|
|
|
|
43
|
my $ser = $self->{'skip_empty_rows'}; |
|
495
|
23
|
100
|
|
|
|
63
|
$ser == 6 or $self->{'_EMPTROW_CB'} = undef; |
|
496
|
|
|
|
|
|
|
$ser <= 1 ? $ser : $ser == 2 ? "eof" : $ser == 3 ? "die" : |
|
497
|
|
|
|
|
|
|
$ser == 4 ? "croak" : $ser == 5 ? "error" : |
|
498
|
23
|
100
|
|
|
|
126
|
$self->{'_EMPTROW_CB'}; |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
} # skip_empty_rows |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
sub _SetDiagInfo { |
|
502
|
17
|
|
|
17
|
|
35
|
my ($self, $err, $msg) = @_; |
|
503
|
17
|
|
|
|
|
146
|
$self->SetDiag ($err); |
|
504
|
17
|
|
|
|
|
52
|
my $em = $self->error_diag (); |
|
505
|
17
|
50
|
|
|
|
75
|
$em =~ s/^\d+$// and $msg =~ s/^/# /; |
|
506
|
17
|
50
|
|
|
|
66
|
my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": "; |
|
507
|
17
|
|
|
|
|
2613
|
join $sep => grep m/\S\S\S/ => $em, $msg; |
|
508
|
|
|
|
|
|
|
} # _SetDiagInfo |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
sub _supported_formula { |
|
511
|
103
|
|
|
103
|
|
156
|
my ($self, $f) = @_; |
|
512
|
103
|
100
|
|
|
|
177
|
defined $f or return 5; |
|
513
|
102
|
100
|
66
|
|
|
455
|
if ($self && $f && ref $f && ref $f eq "CODE") { |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
514
|
6
|
|
|
|
|
11
|
$self->{'_FORMULA_CB'} = $f; |
|
515
|
6
|
|
|
|
|
16
|
return 6; |
|
516
|
|
|
|
|
|
|
} |
|
517
|
|
|
|
|
|
|
$f =~ m/^(?: 0 | none )$/xi ? 0 : |
|
518
|
|
|
|
|
|
|
$f =~ m/^(?: 1 | die )$/xi ? 1 : |
|
519
|
|
|
|
|
|
|
$f =~ m/^(?: 2 | croak )$/xi ? 2 : |
|
520
|
|
|
|
|
|
|
$f =~ m/^(?: 3 | diag )$/xi ? 3 : |
|
521
|
|
|
|
|
|
|
$f =~ m/^(?: 4 | empty | )$/xi ? 4 : |
|
522
|
|
|
|
|
|
|
$f =~ m/^(?: 5 | undef )$/xi ? 5 : |
|
523
|
96
|
100
|
|
|
|
791
|
$f =~ m/^(?: 6 | cb )$/xi ? 6 : do { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
524
|
7
|
|
50
|
|
|
15
|
$self ||= "Text::CSV_XS"; |
|
525
|
7
|
|
|
|
|
20
|
croak ($self->_SetDiagInfo (1500, "formula-handling '$f' is not supported")); |
|
526
|
|
|
|
|
|
|
}; |
|
527
|
|
|
|
|
|
|
} # _supported_formula |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
sub formula { |
|
530
|
44
|
|
|
44
|
1
|
2763
|
my $self = shift; |
|
531
|
44
|
100
|
|
|
|
120
|
@_ and $self->_set_attr_N ("formula", _supported_formula ($self, shift)); |
|
532
|
38
|
100
|
|
|
|
97
|
$self->{'formula'} == 6 or $self->{'_FORMULA_CB'} = undef; |
|
533
|
38
|
|
|
|
|
132
|
[qw( none die croak diag empty undef cb )]->[_supported_formula ($self, $self->{'formula'})]; |
|
534
|
|
|
|
|
|
|
} # formula |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
sub formula_handling { |
|
537
|
7
|
|
|
7
|
1
|
11
|
my $self = shift; |
|
538
|
7
|
|
|
|
|
16
|
$self->formula (@_); |
|
539
|
|
|
|
|
|
|
} # formula_handling |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
sub decode_utf8 { |
|
542
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
|
543
|
2
|
100
|
|
|
|
9
|
@_ and $self->_set_attr_X ("decode_utf8", shift); |
|
544
|
2
|
|
|
|
|
9
|
$self->{'decode_utf8'}; |
|
545
|
|
|
|
|
|
|
} # decode_utf8 |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
sub keep_meta_info { |
|
548
|
12
|
|
|
12
|
1
|
843
|
my $self = shift; |
|
549
|
12
|
100
|
|
|
|
46
|
if (@_) { |
|
550
|
11
|
|
|
|
|
18
|
my $v = shift; |
|
551
|
11
|
100
|
100
|
|
|
66
|
!defined $v || $v eq "" and $v = 0; |
|
552
|
11
|
100
|
|
|
|
52
|
$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1 |
|
|
|
100
|
|
|
|
|
|
|
553
|
11
|
|
|
|
|
37
|
$self->_set_attr_X ("keep_meta_info", $v); |
|
554
|
|
|
|
|
|
|
} |
|
555
|
12
|
|
|
|
|
62
|
$self->{'keep_meta_info'}; |
|
556
|
|
|
|
|
|
|
} # keep_meta_info |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
sub allow_loose_quotes { |
|
559
|
12
|
|
|
12
|
1
|
26
|
my $self = shift; |
|
560
|
12
|
100
|
|
|
|
60
|
@_ and $self->_set_attr_X ("allow_loose_quotes", shift); |
|
561
|
12
|
|
|
|
|
35
|
$self->{'allow_loose_quotes'}; |
|
562
|
|
|
|
|
|
|
} # allow_loose_quotes |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub allow_loose_escapes { |
|
565
|
12
|
|
|
12
|
1
|
1874
|
my $self = shift; |
|
566
|
12
|
100
|
|
|
|
84
|
@_ and $self->_set_attr_X ("allow_loose_escapes", shift); |
|
567
|
12
|
|
|
|
|
37
|
$self->{'allow_loose_escapes'}; |
|
568
|
|
|
|
|
|
|
} # allow_loose_escapes |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
sub allow_whitespace { |
|
571
|
4954
|
|
|
4954
|
1
|
3329645
|
my $self = shift; |
|
572
|
4954
|
100
|
|
|
|
18660
|
if (@_) { |
|
573
|
3725
|
|
|
|
|
9356
|
my $aw = shift; |
|
574
|
3725
|
100
|
|
|
|
11897
|
_unhealthy_whitespace ($self, $aw) and |
|
575
|
|
|
|
|
|
|
croak ($self->SetDiag (1002)); |
|
576
|
3721
|
|
|
|
|
14301
|
$self->_set_attr_X ("allow_whitespace", $aw); |
|
577
|
|
|
|
|
|
|
} |
|
578
|
4950
|
|
|
|
|
20564
|
$self->{'allow_whitespace'}; |
|
579
|
|
|
|
|
|
|
} # allow_whitespace |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub allow_unquoted_escape { |
|
582
|
3
|
|
|
3
|
1
|
20
|
my $self = shift; |
|
583
|
3
|
100
|
|
|
|
18
|
@_ and $self->_set_attr_X ("allow_unquoted_escape", shift); |
|
584
|
3
|
|
|
|
|
9
|
$self->{'allow_unquoted_escape'}; |
|
585
|
|
|
|
|
|
|
} # allow_unquoted_escape |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
sub blank_is_undef { |
|
588
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
|
589
|
2
|
100
|
|
|
|
9
|
@_ and $self->_set_attr_X ("blank_is_undef", shift); |
|
590
|
2
|
|
|
|
|
10
|
$self->{'blank_is_undef'}; |
|
591
|
|
|
|
|
|
|
} # blank_is_undef |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
sub empty_is_undef { |
|
594
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
|
595
|
2
|
100
|
|
|
|
10
|
@_ and $self->_set_attr_X ("empty_is_undef", shift); |
|
596
|
2
|
|
|
|
|
9
|
$self->{'empty_is_undef'}; |
|
597
|
|
|
|
|
|
|
} # empty_is_undef |
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
sub verbatim { |
|
600
|
9
|
|
|
9
|
1
|
16311
|
my $self = shift; |
|
601
|
9
|
100
|
|
|
|
72
|
@_ and $self->_set_attr_X ("verbatim", shift); |
|
602
|
9
|
|
|
|
|
32
|
$self->{'verbatim'}; |
|
603
|
|
|
|
|
|
|
} # verbatim |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
sub undef_str { |
|
606
|
12
|
|
|
12
|
1
|
3788
|
my $self = shift; |
|
607
|
12
|
100
|
|
|
|
31
|
if (@_) { |
|
608
|
11
|
|
|
|
|
20
|
my $v = shift; |
|
609
|
11
|
100
|
|
|
|
38
|
$self->{'undef_str'} = defined $v ? "$v" : undef; |
|
610
|
11
|
|
|
|
|
60
|
$self->_cache_set ($_cache_id{'undef_str'}, $self->{'undef_str'}); |
|
611
|
|
|
|
|
|
|
} |
|
612
|
12
|
|
|
|
|
40
|
$self->{'undef_str'}; |
|
613
|
|
|
|
|
|
|
} # undef_str |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
sub comment_str { |
|
616
|
15
|
|
|
15
|
1
|
82
|
my $self = shift; |
|
617
|
15
|
100
|
|
|
|
48
|
if (@_) { |
|
618
|
14
|
|
|
|
|
28
|
my $v = shift; |
|
619
|
14
|
100
|
|
|
|
53
|
$self->{'comment_str'} = defined $v ? "$v" : undef; |
|
620
|
14
|
|
|
|
|
99
|
$self->_cache_set ($_cache_id{'comment_str'}, $self->{'comment_str'}); |
|
621
|
|
|
|
|
|
|
} |
|
622
|
15
|
|
|
|
|
50
|
$self->{'comment_str'}; |
|
623
|
|
|
|
|
|
|
} # comment_str |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub auto_diag { |
|
626
|
12
|
|
|
12
|
1
|
482
|
my $self = shift; |
|
627
|
12
|
100
|
|
|
|
44
|
if (@_) { |
|
628
|
9
|
|
|
|
|
17
|
my $v = shift; |
|
629
|
9
|
100
|
100
|
|
|
54
|
!defined $v || $v eq "" and $v = 0; |
|
630
|
9
|
100
|
|
|
|
43
|
$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1 |
|
|
|
100
|
|
|
|
|
|
|
631
|
9
|
|
|
|
|
35
|
$self->_set_attr_X ("auto_diag", $v); |
|
632
|
|
|
|
|
|
|
} |
|
633
|
12
|
|
|
|
|
58
|
$self->{'auto_diag'}; |
|
634
|
|
|
|
|
|
|
} # auto_diag |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
sub diag_verbose { |
|
637
|
10
|
|
|
10
|
1
|
896
|
my $self = shift; |
|
638
|
10
|
100
|
|
|
|
32
|
if (@_) { |
|
639
|
8
|
|
|
|
|
13
|
my $v = shift; |
|
640
|
8
|
100
|
100
|
|
|
109
|
!defined $v || $v eq "" and $v = 0; |
|
641
|
8
|
100
|
|
|
|
39
|
$v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1 |
|
|
|
100
|
|
|
|
|
|
|
642
|
8
|
|
|
|
|
26
|
$self->_set_attr_X ("diag_verbose", $v); |
|
643
|
|
|
|
|
|
|
} |
|
644
|
10
|
|
|
|
|
47
|
$self->{'diag_verbose'}; |
|
645
|
|
|
|
|
|
|
} # diag_verbose |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# status |
|
648
|
|
|
|
|
|
|
# |
|
649
|
|
|
|
|
|
|
# object method returning the success or failure of the most recent |
|
650
|
|
|
|
|
|
|
# combine () or parse (). there are no side-effects. |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub status { |
|
653
|
5
|
|
|
5
|
1
|
13
|
my $self = shift; |
|
654
|
5
|
|
|
|
|
22
|
return $self->{'_STATUS'}; |
|
655
|
|
|
|
|
|
|
} # status |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
sub eof { |
|
658
|
33
|
|
|
33
|
1
|
15706
|
my $self = shift; |
|
659
|
33
|
|
|
|
|
137
|
return $self->{'_EOF'}; |
|
660
|
|
|
|
|
|
|
} # eof |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
sub types { |
|
663
|
7
|
|
|
7
|
1
|
2052
|
my $self = shift; |
|
664
|
7
|
100
|
|
|
|
23
|
if (@_) { |
|
665
|
2
|
100
|
|
|
|
7
|
if (my $types = shift) { |
|
666
|
1
|
|
|
|
|
2
|
$self->{'_types'} = join "", map { chr } @{$types}; |
|
|
3
|
|
|
|
|
23
|
|
|
|
1
|
|
|
|
|
3
|
|
|
667
|
1
|
|
|
|
|
3
|
$self->{'types'} = $types; |
|
668
|
1
|
|
|
|
|
7
|
$self->_cache_set ($_cache_id{'types'}, $self->{'_types'}); |
|
669
|
|
|
|
|
|
|
} |
|
670
|
|
|
|
|
|
|
else { |
|
671
|
1
|
|
|
|
|
3
|
delete $self->{'types'}; |
|
672
|
1
|
|
|
|
|
3
|
delete $self->{'_types'}; |
|
673
|
1
|
|
|
|
|
6
|
$self->_cache_set ($_cache_id{'types'}, undef); |
|
674
|
1
|
|
|
|
|
4
|
undef; |
|
675
|
|
|
|
|
|
|
} |
|
676
|
|
|
|
|
|
|
} |
|
677
|
|
|
|
|
|
|
else { |
|
678
|
5
|
|
|
|
|
24
|
$self->{'types'}; |
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
} # types |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
sub callbacks { |
|
683
|
74
|
|
|
74
|
1
|
63857
|
my $self = shift; |
|
684
|
74
|
100
|
|
|
|
237
|
if (@_) { |
|
685
|
44
|
|
|
|
|
99
|
my $cb; |
|
686
|
44
|
|
|
|
|
69
|
my $hf = 0x00; |
|
687
|
44
|
100
|
|
|
|
119
|
if (defined $_[0]) { |
|
|
|
100
|
|
|
|
|
|
|
688
|
42
|
100
|
|
|
|
3028
|
grep { !defined } @_ and croak ($self->SetDiag (1004)); |
|
|
75
|
|
|
|
|
612
|
|
|
689
|
40
|
100
|
100
|
|
|
1151
|
$cb = @_ == 1 && ref $_[0] eq "HASH" ? shift |
|
|
|
100
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
: @_ % 2 == 0 ? { @_ } |
|
691
|
|
|
|
|
|
|
: croak ($self->SetDiag (1004)); |
|
692
|
35
|
|
|
|
|
70
|
foreach my $cbk (keys %{$cb}) { |
|
|
35
|
|
|
|
|
117
|
|
|
693
|
|
|
|
|
|
|
# A key cannot be a ref. That would be stored as the *string |
|
694
|
|
|
|
|
|
|
# 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)' |
|
695
|
37
|
100
|
100
|
|
|
2660
|
$cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or |
|
696
|
|
|
|
|
|
|
croak ($self->SetDiag (1004)); |
|
697
|
|
|
|
|
|
|
} |
|
698
|
21
|
100
|
|
|
|
133
|
exists $cb->{'error'} and $hf |= 0x01; |
|
699
|
21
|
100
|
|
|
|
63
|
exists $cb->{'after_parse'} and $hf |= 0x02; |
|
700
|
21
|
100
|
|
|
|
54
|
exists $cb->{'before_print'} and $hf |= 0x04; |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
elsif (@_ > 1) { |
|
703
|
|
|
|
|
|
|
# (undef, whatever) |
|
704
|
1
|
|
|
|
|
169
|
croak ($self->SetDiag (1004)); |
|
705
|
|
|
|
|
|
|
} |
|
706
|
22
|
|
|
|
|
116
|
$self->_set_attr_X ("_has_hooks", $hf); |
|
707
|
22
|
|
|
|
|
69
|
$self->{'callbacks'} = $cb; |
|
708
|
|
|
|
|
|
|
} |
|
709
|
52
|
|
|
|
|
205
|
$self->{'callbacks'}; |
|
710
|
|
|
|
|
|
|
} # callbacks |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
# error_diag |
|
713
|
|
|
|
|
|
|
# |
|
714
|
|
|
|
|
|
|
# If (and only if) an error occurred, this function returns a code that |
|
715
|
|
|
|
|
|
|
# indicates the reason of failure |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
sub error_diag { |
|
718
|
1820
|
|
|
1820
|
1
|
84425
|
my $self = shift; |
|
719
|
1820
|
|
|
|
|
6662
|
my @diag = (0 + $last_err, $last_err, 0, 0, 0, 0); |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
# Docs state to NEVER use UNIVERSAL::isa, because it will *never* call an |
|
722
|
|
|
|
|
|
|
# overridden isa method in any class. Well, that is exacly what I want here |
|
723
|
1820
|
100
|
100
|
|
|
34874
|
if ($self && ref $self and # Not a class method or direct call |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
724
|
|
|
|
|
|
|
UNIVERSAL::isa ($self, __PACKAGE__) && exists $self->{'_ERROR_DIAG'}) { |
|
725
|
1641
|
|
|
|
|
3515
|
$diag[0] = 0 + $self->{'_ERROR_DIAG'}; |
|
726
|
1641
|
|
|
|
|
6866
|
$diag[1] = $self->{'_ERROR_DIAG'}; |
|
727
|
1641
|
100
|
|
|
|
4566
|
$diag[2] = 1 + $self->{'_ERROR_POS'} if exists $self->{'_ERROR_POS'}; |
|
728
|
1641
|
|
|
|
|
2836
|
$diag[3] = $self->{'_RECNO'}; |
|
729
|
1641
|
100
|
|
|
|
4016
|
$diag[4] = $self->{'_ERROR_FLD'} if exists $self->{'_ERROR_FLD'}; |
|
730
|
1641
|
100
|
66
|
|
|
6928
|
$diag[5] = $self->{'_ERROR_SRC'} if exists $self->{'_ERROR_SRC'} && $self->{'diag_verbose'}; |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
$diag[0] && $self->{'callbacks'} && $self->{'callbacks'}{'error'} and |
|
733
|
1641
|
100
|
100
|
|
|
10964
|
return $self->{'callbacks'}{'error'}->(@diag); |
|
|
|
|
100
|
|
|
|
|
|
734
|
|
|
|
|
|
|
} |
|
735
|
|
|
|
|
|
|
|
|
736
|
1810
|
|
|
|
|
3339
|
my $context = wantarray; |
|
737
|
1810
|
100
|
|
|
|
4420
|
unless (defined $context) { # Void context, auto-diag |
|
738
|
343
|
100
|
100
|
|
|
1325
|
if ($diag[0] && $diag[0] != 2012) { |
|
739
|
36
|
|
|
|
|
173
|
my $msg = "# CSV_XS ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n"; |
|
740
|
36
|
100
|
|
|
|
286
|
$diag[4] and $msg =~ s/$/ field $diag[4]/; |
|
741
|
36
|
100
|
|
|
|
159
|
$diag[5] and $msg =~ s/$/ (XS#$diag[5])/; |
|
742
|
|
|
|
|
|
|
|
|
743
|
36
|
100
|
100
|
|
|
167
|
unless ($self && ref $self) { # auto_diag |
|
744
|
|
|
|
|
|
|
# called without args in void context |
|
745
|
4
|
|
|
|
|
66
|
warn $msg; |
|
746
|
4
|
|
|
|
|
54
|
return; |
|
747
|
|
|
|
|
|
|
} |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
$self->{'diag_verbose'} && $self->{'_ERROR_INPUT'} and |
|
750
|
32
|
50
|
66
|
|
|
168
|
$msg .= $self->{'_ERROR_INPUT'}."\n". |
|
751
|
|
|
|
|
|
|
(" " x ($diag[2] - 1))."^\n"; |
|
752
|
|
|
|
|
|
|
|
|
753
|
32
|
|
|
|
|
72
|
my $lvl = $self->{'auto_diag'}; |
|
754
|
32
|
100
|
|
|
|
124
|
if ($lvl < 2) { |
|
755
|
29
|
|
|
|
|
122
|
my @c = caller (2); |
|
756
|
29
|
50
|
66
|
|
|
166
|
if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") { |
|
|
|
|
33
|
|
|
|
|
|
757
|
0
|
|
|
|
|
0
|
my $hints = $c[10]; |
|
758
|
|
|
|
|
|
|
(exists $hints->{'autodie'} && $hints->{'autodie'} or |
|
759
|
|
|
|
|
|
|
exists $hints->{'guard Fatal'} && |
|
760
|
0
|
0
|
0
|
|
|
0
|
!exists $hints->{'no Fatal'}) and |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
761
|
|
|
|
|
|
|
$lvl++; |
|
762
|
|
|
|
|
|
|
# Future releases of autodie will probably set $^H{autodie} |
|
763
|
|
|
|
|
|
|
# to "autodie @args", like "autodie :all" or "autodie open" |
|
764
|
|
|
|
|
|
|
# so we can/should check for "open" or "new" |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
} |
|
767
|
32
|
100
|
|
|
|
346
|
$lvl > 1 ? die $msg : warn $msg; |
|
768
|
|
|
|
|
|
|
} |
|
769
|
336
|
|
|
|
|
3526
|
return; |
|
770
|
|
|
|
|
|
|
} |
|
771
|
1467
|
100
|
|
|
|
8592
|
return $context ? @diag : $diag[1]; |
|
772
|
|
|
|
|
|
|
} # error_diag |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
sub record_number { |
|
775
|
14
|
|
|
14
|
1
|
4588
|
my $self = shift; |
|
776
|
14
|
|
|
|
|
59
|
return $self->{'_RECNO'}; |
|
777
|
|
|
|
|
|
|
} # record_number |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
# string |
|
780
|
|
|
|
|
|
|
# |
|
781
|
|
|
|
|
|
|
# object method returning the result of the most recent combine () or the |
|
782
|
|
|
|
|
|
|
# input to the most recent parse (), whichever is more recent. there are |
|
783
|
|
|
|
|
|
|
# no side-effects. |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
sub string { |
|
786
|
1398
|
|
|
1398
|
1
|
496966
|
my $self = shift; |
|
787
|
1398
|
100
|
|
|
|
6091
|
return ref $self->{'_STRING'} ? ${$self->{'_STRING'}} : undef; |
|
|
1397
|
|
|
|
|
8039
|
|
|
788
|
|
|
|
|
|
|
} # string |
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
# fields |
|
791
|
|
|
|
|
|
|
# |
|
792
|
|
|
|
|
|
|
# object method returning the result of the most recent parse () or the |
|
793
|
|
|
|
|
|
|
# input to the most recent combine (), whichever is more recent. there |
|
794
|
|
|
|
|
|
|
# are no side-effects. |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
sub fields { |
|
797
|
1603
|
|
|
1603
|
1
|
29060
|
my $self = shift; |
|
798
|
1603
|
100
|
|
|
|
6203
|
return ref $self->{'_FIELDS'} ? @{$self->{'_FIELDS'}} : undef; |
|
|
1602
|
|
|
|
|
12376
|
|
|
799
|
|
|
|
|
|
|
} # fields |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
# meta_info |
|
802
|
|
|
|
|
|
|
# |
|
803
|
|
|
|
|
|
|
# object method returning the result of the most recent parse () or the |
|
804
|
|
|
|
|
|
|
# input to the most recent combine (), whichever is more recent. there |
|
805
|
|
|
|
|
|
|
# are no side-effects. meta_info () returns (if available) some of the |
|
806
|
|
|
|
|
|
|
# field's properties |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
sub meta_info { |
|
809
|
21
|
|
|
21
|
1
|
897
|
my $self = shift; |
|
810
|
21
|
100
|
|
|
|
102
|
return ref $self->{'_FFLAGS'} ? @{$self->{'_FFLAGS'}} : undef; |
|
|
16
|
|
|
|
|
86
|
|
|
811
|
|
|
|
|
|
|
} # meta_info |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
sub is_quoted { |
|
814
|
12
|
|
|
12
|
1
|
2582
|
my ($self, $idx) = @_; |
|
815
|
|
|
|
|
|
|
ref $self->{'_FFLAGS'} && |
|
816
|
12
|
100
|
100
|
|
|
114
|
$idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return; |
|
|
8
|
|
100
|
|
|
40
|
|
|
817
|
7
|
100
|
|
|
|
31
|
$self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_QUOTED () ? 1 : 0; |
|
818
|
|
|
|
|
|
|
} # is_quoted |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
sub is_binary { |
|
821
|
11
|
|
|
11
|
1
|
1736
|
my ($self, $idx) = @_; |
|
822
|
|
|
|
|
|
|
ref $self->{'_FFLAGS'} && |
|
823
|
11
|
100
|
100
|
|
|
109
|
$idx >= 0 && $idx < @{$self->{'_FFLAGS'}} or return; |
|
|
9
|
|
100
|
|
|
61
|
|
|
824
|
8
|
100
|
|
|
|
25
|
$self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_BINARY () ? 1 : 0; |
|
825
|
|
|
|
|
|
|
} # is_binary |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
sub is_missing { |
|
828
|
19
|
|
|
19
|
1
|
41
|
my ($self, $idx) = @_; |
|
829
|
19
|
100
|
100
|
|
|
119
|
$idx < 0 || !ref $self->{'_FFLAGS'} and return; |
|
830
|
11
|
100
|
|
|
|
16
|
$idx >= @{$self->{'_FFLAGS'}} and return 1; |
|
|
11
|
|
|
|
|
31
|
|
|
831
|
10
|
100
|
|
|
|
25
|
$self->{'_FFLAGS'}[$idx] & CSV_FLAGS_IS_MISSING () ? 1 : 0; |
|
832
|
|
|
|
|
|
|
} # is_missing |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
# combine |
|
835
|
|
|
|
|
|
|
# |
|
836
|
|
|
|
|
|
|
# Object method returning success or failure. The given arguments are |
|
837
|
|
|
|
|
|
|
# combined into a single comma-separated value. Failure can be the |
|
838
|
|
|
|
|
|
|
# result of no arguments or an argument containing an invalid character. |
|
839
|
|
|
|
|
|
|
# side-effects include: |
|
840
|
|
|
|
|
|
|
# setting status () |
|
841
|
|
|
|
|
|
|
# setting fields () |
|
842
|
|
|
|
|
|
|
# setting string () |
|
843
|
|
|
|
|
|
|
# setting error_input () |
|
844
|
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
sub combine { |
|
846
|
1397
|
|
|
1397
|
1
|
1130292
|
my $self = shift; |
|
847
|
1397
|
|
|
|
|
4108
|
my $str = ""; |
|
848
|
1397
|
|
|
|
|
11795
|
$self->{'_FIELDS'} = \@_; |
|
849
|
1397
|
|
100
|
|
|
44925
|
$self->{'_STATUS'} = (@_ > 0) && $self->Combine (\$str, \@_, 0); |
|
850
|
1393
|
|
|
|
|
5602
|
$self->{'_STRING'} = \$str; |
|
851
|
1393
|
|
|
|
|
5933
|
$self->{'_STATUS'}; |
|
852
|
|
|
|
|
|
|
} # combine |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
# parse |
|
855
|
|
|
|
|
|
|
# |
|
856
|
|
|
|
|
|
|
# Object method returning success or failure. The given argument is |
|
857
|
|
|
|
|
|
|
# expected to be a valid comma-separated value. Failure can be the |
|
858
|
|
|
|
|
|
|
# result of no arguments or an argument containing an invalid sequence |
|
859
|
|
|
|
|
|
|
# of characters. Side-effects include: |
|
860
|
|
|
|
|
|
|
# setting status () |
|
861
|
|
|
|
|
|
|
# setting fields () |
|
862
|
|
|
|
|
|
|
# setting meta_info () |
|
863
|
|
|
|
|
|
|
# setting string () |
|
864
|
|
|
|
|
|
|
# setting error_input () |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
sub parse { |
|
867
|
1947
|
|
|
1947
|
1
|
140773
|
my ($self, $str) = @_; |
|
868
|
|
|
|
|
|
|
|
|
869
|
1947
|
100
|
|
|
|
8459
|
ref $str and croak ($self->SetDiag (1500)); |
|
870
|
|
|
|
|
|
|
|
|
871
|
1943
|
|
|
|
|
3796
|
my $fields = []; |
|
872
|
1943
|
|
|
|
|
3529
|
my $fflags = []; |
|
873
|
1943
|
|
|
|
|
5420
|
$self->{'_STRING'} = \$str; |
|
874
|
1943
|
100
|
100
|
|
|
59658
|
if (defined $str && $self->Parse ($str, $fields, $fflags)) { |
|
875
|
1729
|
|
|
|
|
6264
|
$self->{'_FIELDS'} = $fields; |
|
876
|
1729
|
|
|
|
|
4210
|
$self->{'_FFLAGS'} = $fflags; |
|
877
|
1729
|
|
|
|
|
3990
|
$self->{'_STATUS'} = 1; |
|
878
|
|
|
|
|
|
|
} |
|
879
|
|
|
|
|
|
|
else { |
|
880
|
211
|
|
|
|
|
560
|
$self->{'_FIELDS'} = undef; |
|
881
|
211
|
|
|
|
|
406
|
$self->{'_FFLAGS'} = undef; |
|
882
|
211
|
|
|
|
|
463
|
$self->{'_STATUS'} = 0; |
|
883
|
|
|
|
|
|
|
} |
|
884
|
1940
|
|
|
|
|
10269
|
$self->{'_STATUS'}; |
|
885
|
|
|
|
|
|
|
} # parse |
|
886
|
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
sub column_names { |
|
888
|
1024
|
|
|
1024
|
1
|
75564
|
my ($self, @keys) = @_; |
|
889
|
|
|
|
|
|
|
@keys or |
|
890
|
1024
|
100
|
|
|
|
3192
|
return defined $self->{'_COLUMN_NAMES'} ? @{$self->{'_COLUMN_NAMES'}} : (); |
|
|
293
|
100
|
|
|
|
1521
|
|
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
@keys == 1 && ! defined $keys[0] and |
|
893
|
688
|
100
|
100
|
|
|
2788
|
return $self->{'_COLUMN_NAMES'} = undef; |
|
894
|
|
|
|
|
|
|
|
|
895
|
550
|
100
|
100
|
|
|
2032
|
if (@keys == 1 && ref $keys[0] eq "ARRAY") { |
|
|
|
100
|
|
|
|
|
|
|
896
|
226
|
|
|
|
|
391
|
@keys = @{$keys[0]}; |
|
|
226
|
|
|
|
|
631
|
|
|
897
|
|
|
|
|
|
|
} |
|
898
|
712
|
100
|
|
|
|
2503
|
elsif (join "", map { defined $_ ? ref $_ : "" } @keys) { |
|
899
|
5
|
|
|
|
|
1013
|
croak ($self->SetDiag (3001)); |
|
900
|
|
|
|
|
|
|
} |
|
901
|
|
|
|
|
|
|
|
|
902
|
545
|
100
|
100
|
|
|
1628
|
$self->{'_BOUND_COLUMNS'} && @keys != @{$self->{'_BOUND_COLUMNS'}} and |
|
|
2
|
|
|
|
|
178
|
|
|
903
|
|
|
|
|
|
|
croak ($self->SetDiag (3003)); |
|
904
|
|
|
|
|
|
|
|
|
905
|
544
|
100
|
|
|
|
993
|
$self->{'_COLUMN_NAMES'} = [ map { defined $_ ? $_ : "\cAUNDEF\cA" } @keys ]; |
|
|
1259
|
|
|
|
|
3558
|
|
|
906
|
544
|
|
|
|
|
965
|
@{$self->{'_COLUMN_NAMES'}}; |
|
|
544
|
|
|
|
|
1610
|
|
|
907
|
|
|
|
|
|
|
} # column_names |
|
908
|
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
sub header { |
|
910
|
333
|
|
|
333
|
1
|
54299
|
my ($self, $fh, @args) = @_; |
|
911
|
|
|
|
|
|
|
|
|
912
|
333
|
100
|
|
|
|
1031
|
$fh or croak ($self->SetDiag (1014)); |
|
913
|
|
|
|
|
|
|
|
|
914
|
332
|
|
|
|
|
727
|
my (@seps, %args); |
|
915
|
332
|
|
|
|
|
855
|
for (@args) { |
|
916
|
225
|
100
|
|
|
|
711
|
if (ref $_ eq "ARRAY") { |
|
917
|
18
|
|
|
|
|
33
|
push @seps, @{$_}; |
|
|
18
|
|
|
|
|
58
|
|
|
918
|
18
|
|
|
|
|
40
|
next; |
|
919
|
|
|
|
|
|
|
} |
|
920
|
207
|
100
|
|
|
|
498
|
if (ref $_ eq "HASH") { |
|
921
|
206
|
|
|
|
|
296
|
%args = %{$_}; |
|
|
206
|
|
|
|
|
719
|
|
|
922
|
206
|
|
|
|
|
625
|
next; |
|
923
|
|
|
|
|
|
|
} |
|
924
|
1
|
|
|
|
|
149
|
croak ('usage: $csv->header ($fh, [ seps ], { options })'); |
|
925
|
|
|
|
|
|
|
} |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
defined $args{'munge'} && !defined $args{'munge_column_names'} and |
|
928
|
331
|
100
|
66
|
|
|
1324
|
$args{'munge_column_names'} = $args{'munge'}; # munge as alias |
|
929
|
331
|
100
|
|
|
|
1153
|
defined $args{'detect_bom'} or $args{'detect_bom'} = 1; |
|
930
|
331
|
100
|
|
|
|
1069
|
defined $args{'set_column_names'} or $args{'set_column_names'} = 1; |
|
931
|
331
|
100
|
|
|
|
1673
|
defined $args{'munge_column_names'} or $args{'munge_column_names'} = "lc"; |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
# Reset any previous leftovers |
|
934
|
331
|
|
|
|
|
753
|
$self->{'_RECNO'} = 0; |
|
935
|
331
|
|
|
|
|
890
|
$self->{'_AHEAD'} = undef; |
|
936
|
331
|
100
|
|
|
|
910
|
$self->{'_COLUMN_NAMES'} = undef if $args{'set_column_names'}; |
|
937
|
331
|
100
|
|
|
|
853
|
$self->{'_BOUND_COLUMNS'} = undef if $args{'set_column_names'}; |
|
938
|
|
|
|
|
|
|
|
|
939
|
331
|
100
|
|
|
|
786
|
if (defined $args{'sep_set'}) { |
|
940
|
27
|
100
|
|
|
|
110
|
ref $args{'sep_set'} eq "ARRAY" or |
|
941
|
|
|
|
|
|
|
croak ($self->_SetDiagInfo (1500, "sep_set should be an array ref")); |
|
942
|
22
|
|
|
|
|
41
|
@seps = @{$args{'sep_set'}}; |
|
|
22
|
|
|
|
|
66
|
|
|
943
|
|
|
|
|
|
|
} |
|
944
|
|
|
|
|
|
|
|
|
945
|
326
|
50
|
|
|
|
1186
|
$^O eq "MSWin32" and binmode $fh; |
|
946
|
326
|
|
|
|
|
9251
|
my $hdr = <$fh>; |
|
947
|
|
|
|
|
|
|
# check if $hdr can be empty here, I don't think so |
|
948
|
326
|
100
|
66
|
|
|
2490
|
defined $hdr && $hdr ne "" or croak ($self->SetDiag (1010)); |
|
949
|
|
|
|
|
|
|
|
|
950
|
324
|
|
|
|
|
633
|
my %sep; |
|
951
|
324
|
100
|
|
|
|
1015
|
@seps or @seps = (",", ";"); |
|
952
|
324
|
|
|
|
|
767
|
foreach my $sep (@seps) { |
|
953
|
732
|
100
|
|
|
|
2242
|
index ($hdr, $sep) >= 0 and $sep{$sep}++; |
|
954
|
|
|
|
|
|
|
} |
|
955
|
|
|
|
|
|
|
|
|
956
|
324
|
100
|
|
|
|
990
|
keys %sep >= 2 and croak ($self->SetDiag (1011)); |
|
957
|
|
|
|
|
|
|
|
|
958
|
320
|
|
|
|
|
1437
|
$self->sep (keys %sep); |
|
959
|
320
|
|
|
|
|
787
|
my $enc = ""; |
|
960
|
320
|
100
|
|
|
|
908
|
if ($args{'detect_bom'}) { # UTF-7 is not supported |
|
961
|
319
|
100
|
|
|
|
3579
|
if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" } |
|
|
24
|
100
|
|
|
|
49
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
962
|
24
|
|
|
|
|
53
|
elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" } |
|
963
|
25
|
|
|
|
|
49
|
elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" } |
|
964
|
24
|
|
|
|
|
76
|
elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" } |
|
965
|
48
|
|
|
|
|
94
|
elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" } |
|
966
|
1
|
|
|
|
|
3
|
elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" } |
|
967
|
1
|
|
|
|
|
4
|
elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" } |
|
968
|
1
|
|
|
|
|
5
|
elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" } |
|
969
|
1
|
|
|
|
|
3
|
elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" } |
|
970
|
1
|
|
|
|
|
4
|
elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" } |
|
971
|
36
|
|
|
|
|
75
|
elsif ($hdr =~ s/^\x{feff}//) { $enc = "" } |
|
972
|
|
|
|
|
|
|
|
|
973
|
319
|
100
|
|
|
|
970
|
$self->{'ENCODING'} = $enc ? uc $enc : undef; |
|
974
|
|
|
|
|
|
|
|
|
975
|
319
|
100
|
|
|
|
1823
|
$hdr eq "" and croak ($self->SetDiag (1010)); |
|
976
|
|
|
|
|
|
|
|
|
977
|
313
|
100
|
|
|
|
754
|
if ($enc) { |
|
978
|
144
|
50
|
33
|
|
|
482
|
$ebcdic && $enc eq "utf-ebcdic" and $enc = ""; |
|
979
|
144
|
100
|
|
|
|
571
|
if ($enc =~ m/([13]).le$/) { |
|
980
|
48
|
|
|
|
|
234
|
my $l = 0 + $1; |
|
981
|
48
|
|
|
|
|
78
|
my $x; |
|
982
|
48
|
|
|
|
|
154
|
$hdr .= "\0" x $l; |
|
983
|
48
|
|
|
|
|
200
|
read $fh, $x, $l; |
|
984
|
|
|
|
|
|
|
} |
|
985
|
144
|
50
|
|
|
|
346
|
if ($enc) { |
|
986
|
144
|
100
|
|
|
|
381
|
if ($enc ne "utf-8") { |
|
987
|
96
|
|
|
|
|
766
|
require Encode; |
|
988
|
96
|
|
|
|
|
667
|
$hdr = Encode::decode ($enc, $hdr); |
|
989
|
|
|
|
|
|
|
} |
|
990
|
144
|
|
|
2
|
|
6914
|
binmode $fh, ":encoding($enc)"; |
|
|
2
|
|
|
|
|
1868
|
|
|
|
2
|
|
|
|
|
38
|
|
|
|
2
|
|
|
|
|
12
|
|
|
991
|
|
|
|
|
|
|
} |
|
992
|
|
|
|
|
|
|
} |
|
993
|
|
|
|
|
|
|
} |
|
994
|
|
|
|
|
|
|
|
|
995
|
314
|
|
|
|
|
10045
|
my ($ahead, $eol); |
|
996
|
314
|
100
|
66
|
|
|
1454
|
if ($hdr and $hdr =~ s/\Asep=(\S)([\r\n]+)//i) { # Also look in xs:Parse |
|
997
|
1
|
|
|
|
|
4
|
$self->sep ($1); |
|
998
|
1
|
50
|
|
|
|
6
|
length $hdr or $hdr = <$fh>; |
|
999
|
|
|
|
|
|
|
} |
|
1000
|
314
|
100
|
|
|
|
2674
|
if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) { |
|
1001
|
142
|
|
|
|
|
386
|
$eol = $2; |
|
1002
|
142
|
|
|
|
|
418
|
$ahead = $3; |
|
1003
|
|
|
|
|
|
|
} |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
314
|
|
|
|
|
715
|
my $hr = \$hdr; # Will cause croak on perl-5.6.x |
|
1006
|
314
|
50
|
|
|
|
3605
|
open my $h, "<", $hr or croak ($self->SetDiag (1010)); |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
314
|
100
|
|
|
|
8537
|
my $row = $self->getline ($h) or croak (); |
|
1009
|
312
|
|
|
|
|
1061
|
close $h; |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
312
|
100
|
|
|
|
963
|
if ( $args{'munge_column_names'} eq "lc") { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1012
|
293
|
|
|
|
|
447
|
$_ = lc for @{$row}; |
|
|
293
|
|
|
|
|
1294
|
|
|
1013
|
|
|
|
|
|
|
} |
|
1014
|
|
|
|
|
|
|
elsif ($args{'munge_column_names'} eq "uc") { |
|
1015
|
7
|
|
|
|
|
12
|
$_ = uc for @{$row}; |
|
|
7
|
|
|
|
|
39
|
|
|
1016
|
|
|
|
|
|
|
} |
|
1017
|
|
|
|
|
|
|
elsif ($args{'munge_column_names'} eq "db") { |
|
1018
|
3
|
|
|
|
|
6
|
for (@{$row}) { |
|
|
3
|
|
|
|
|
12
|
|
|
1019
|
7
|
|
|
|
|
17
|
s/\W+/_/g; |
|
1020
|
7
|
|
|
|
|
21
|
s/^_+//; |
|
1021
|
7
|
|
|
|
|
18
|
$_ = lc; |
|
1022
|
|
|
|
|
|
|
} |
|
1023
|
|
|
|
|
|
|
} |
|
1024
|
|
|
|
|
|
|
|
|
1025
|
312
|
100
|
|
|
|
716
|
if ($ahead) { # Must be after getline, which creates the cache |
|
1026
|
142
|
|
|
|
|
588
|
$self->_cache_set ($_cache_id{'_has_ahead'}, 1); |
|
1027
|
142
|
|
|
|
|
286
|
$self->{'_AHEAD'} = $ahead; |
|
1028
|
142
|
100
|
|
|
|
706
|
$eol =~ m/^\r([^\n]|\z)/ and $self->eol ($eol); |
|
1029
|
|
|
|
|
|
|
} |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
312
|
|
|
|
|
433
|
my @hdr = @{$row}; |
|
|
312
|
|
|
|
|
1012
|
|
|
1032
|
|
|
|
|
|
|
ref $args{'munge_column_names'} eq "CODE" and |
|
1033
|
312
|
100
|
|
|
|
897
|
@hdr = map { $args{'munge_column_names'}->($_) } @hdr; |
|
|
4
|
|
|
|
|
21
|
|
|
1034
|
|
|
|
|
|
|
ref $args{'munge_column_names'} eq "HASH" and |
|
1035
|
312
|
100
|
|
|
|
763
|
@hdr = map { $args{'munge_column_names'}->{$_} || $_ } @hdr; |
|
|
3
|
100
|
|
|
|
17
|
|
|
1036
|
312
|
|
|
|
|
563
|
my %hdr; $hdr{$_}++ for @hdr; |
|
|
312
|
|
|
|
|
1273
|
|
|
1037
|
312
|
100
|
|
|
|
897
|
exists $hdr{''} and croak ($self->SetDiag (1012)); |
|
1038
|
310
|
100
|
|
|
|
794
|
unless (keys %hdr == @hdr) { |
|
1039
|
|
|
|
|
|
|
croak ($self->_SetDiagInfo (1013, join ", " => |
|
1040
|
1
|
|
|
|
|
5
|
map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr)); |
|
|
1
|
|
|
|
|
11
|
|
|
|
2
|
|
|
|
|
8
|
|
|
1041
|
|
|
|
|
|
|
} |
|
1042
|
309
|
100
|
|
|
|
1527
|
$args{'set_column_names'} and $self->column_names (@hdr); |
|
1043
|
309
|
100
|
|
|
|
3645
|
wantarray ? @hdr : $self; |
|
1044
|
|
|
|
|
|
|
} # header |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
sub bind_columns { |
|
1047
|
36
|
|
|
36
|
1
|
10127
|
my ($self, @refs) = @_; |
|
1048
|
|
|
|
|
|
|
@refs or |
|
1049
|
36
|
100
|
|
|
|
141
|
return defined $self->{'_BOUND_COLUMNS'} ? @{$self->{'_BOUND_COLUMNS'}} : undef; |
|
|
2
|
100
|
|
|
|
13
|
|
|
1050
|
|
|
|
|
|
|
|
|
1051
|
32
|
100
|
100
|
|
|
155
|
if (@refs == 1 && ! defined $refs[0]) { |
|
1052
|
5
|
|
|
|
|
14
|
$self->{'_COLUMN_NAMES'} = undef; |
|
1053
|
5
|
|
|
|
|
26
|
return $self->{'_BOUND_COLUMNS'} = undef; |
|
1054
|
|
|
|
|
|
|
} |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
27
|
100
|
100
|
|
|
109
|
$self->{'_COLUMN_NAMES'} && @refs != @{$self->{'_COLUMN_NAMES'}} and |
|
|
3
|
|
|
|
|
234
|
|
|
1057
|
|
|
|
|
|
|
croak ($self->SetDiag (3003)); |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
26
|
100
|
|
|
|
349
|
join "", map { ref $_ eq "SCALAR" ? "" : "*" } @refs and |
|
|
74632
|
100
|
|
|
|
137466
|
|
|
1060
|
|
|
|
|
|
|
croak ($self->SetDiag (3004)); |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
24
|
|
|
|
|
2581
|
$self->_set_attr_N ("_is_bound", scalar @refs); |
|
1063
|
24
|
|
|
|
|
4095
|
$self->{'_BOUND_COLUMNS'} = [ @refs ]; |
|
1064
|
24
|
|
|
|
|
1141
|
@refs; |
|
1065
|
|
|
|
|
|
|
} # bind_columns |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
sub getline_hr { |
|
1068
|
131
|
|
|
131
|
1
|
27048
|
my ($self, @args, %hr) = @_; |
|
1069
|
131
|
100
|
|
|
|
676
|
$self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002)); |
|
1070
|
130
|
100
|
|
|
|
2524
|
my $fr = $self->getline (@args) or return; |
|
1071
|
127
|
100
|
|
|
|
539
|
if (ref $self->{'_FFLAGS'}) { # missing |
|
1072
|
|
|
|
|
|
|
$self->{'_FFLAGS'}[$_] = CSV_FLAGS_IS_MISSING () |
|
1073
|
5
|
50
|
|
|
|
6
|
for (@{$fr} ? $#{$fr} + 1 : 0) .. $#{$self->{'_COLUMN_NAMES'}}; |
|
|
5
|
|
|
|
|
11
|
|
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
18
|
|
|
1074
|
5
|
|
|
|
|
33
|
@{$fr} == 1 && (!defined $fr->[0] || $fr->[0] eq "") and |
|
1075
|
5
|
100
|
33
|
|
|
5
|
$self->{'_FFLAGS'}[0] ||= CSV_FLAGS_IS_MISSING (); |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
} |
|
1077
|
127
|
|
|
|
|
217
|
@hr{@{$self->{'_COLUMN_NAMES'}}} = @{$fr}; |
|
|
127
|
|
|
|
|
704
|
|
|
|
127
|
|
|
|
|
266
|
|
|
1078
|
127
|
|
|
|
|
752
|
\%hr; |
|
1079
|
|
|
|
|
|
|
} # getline_hr |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
sub getline_hr_all { |
|
1082
|
250
|
|
|
250
|
1
|
560
|
my ($self, @args) = @_; |
|
1083
|
250
|
100
|
|
|
|
1165
|
$self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3002)); |
|
1084
|
248
|
|
|
|
|
388
|
my @cn = @{$self->{'_COLUMN_NAMES'}}; |
|
|
248
|
|
|
|
|
715
|
|
|
1085
|
248
|
|
|
|
|
476
|
[ map { my %h; @h{@cn} = @{$_}; \%h } @{$self->getline_all (@args)} ]; |
|
|
375
|
|
|
|
|
678
|
|
|
|
375
|
|
|
|
|
558
|
|
|
|
375
|
|
|
|
|
1659
|
|
|
|
375
|
|
|
|
|
2143
|
|
|
|
248
|
|
|
|
|
7282
|
|
|
1086
|
|
|
|
|
|
|
} # getline_hr_all |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
sub say { |
|
1089
|
34
|
|
|
34
|
1
|
3613
|
my ($self, $io, @f) = @_; |
|
1090
|
34
|
|
|
|
|
92
|
my $eol = $self->eol (); |
|
1091
|
|
|
|
|
|
|
# say ($fh, undef) does not propage actual undef to print () |
|
1092
|
34
|
100
|
66
|
|
|
443
|
my $state = $self->print ($io, @f == 1 && !defined $f[0] ? undef : @f); |
|
1093
|
34
|
100
|
|
|
|
446
|
unless (length $eol) { |
|
1094
|
32
|
|
33
|
|
|
94
|
$eol = $self->eol_type () || $\ || $/; |
|
1095
|
32
|
|
|
|
|
68
|
print $io $eol; |
|
1096
|
|
|
|
|
|
|
} |
|
1097
|
34
|
|
|
|
|
127
|
return $state; |
|
1098
|
|
|
|
|
|
|
} # say |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
sub print_hr { |
|
1101
|
3
|
|
|
3
|
1
|
738
|
my ($self, $io, $hr) = @_; |
|
1102
|
3
|
100
|
|
|
|
267
|
$self->{'_COLUMN_NAMES'} or croak ($self->SetDiag (3009)); |
|
1103
|
2
|
100
|
|
|
|
138
|
ref $hr eq "HASH" or croak ($self->SetDiag (3010)); |
|
1104
|
1
|
|
|
|
|
3
|
$self->print ($io, [ map { $hr->{$_} } $self->column_names () ]); |
|
|
3
|
|
|
|
|
13
|
|
|
1105
|
|
|
|
|
|
|
} # print_hr |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
sub fragment { |
|
1108
|
58
|
|
|
58
|
1
|
45836
|
my ($self, $io, $spec) = @_; |
|
1109
|
|
|
|
|
|
|
|
|
1110
|
58
|
|
|
|
|
360
|
my $qd = qr{\s* [0-9]+ \s* }x; # digit |
|
1111
|
58
|
|
|
|
|
151
|
my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x; # digit or star |
|
1112
|
58
|
|
|
|
|
767
|
my $qr = qr{$qd (?: - $qs )?}x; # range |
|
1113
|
58
|
|
|
|
|
664
|
my $qc = qr{$qr (?: ; $qr )*}x; # list |
|
1114
|
58
|
100
|
100
|
|
|
6418
|
defined $spec && $spec =~ m{^ \s* |
|
1115
|
|
|
|
|
|
|
\x23 ? \s* # optional leading # |
|
1116
|
|
|
|
|
|
|
( row | col | cell ) \s* = |
|
1117
|
|
|
|
|
|
|
( $qc # for row and col |
|
1118
|
|
|
|
|
|
|
| $qd , $qd (?: - $qs , $qs)? # for cell (ranges) |
|
1119
|
|
|
|
|
|
|
(?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists |
|
1120
|
|
|
|
|
|
|
) \s* $}xi or croak ($self->SetDiag (2013)); |
|
1121
|
38
|
|
|
|
|
249
|
my ($type, $range) = (lc $1, $2); |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
38
|
|
|
|
|
153
|
my @h = $self->column_names (); |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
38
|
|
|
|
|
72
|
my @c; |
|
1126
|
38
|
100
|
|
|
|
107
|
if ($type eq "cell") { |
|
1127
|
21
|
|
|
|
|
42
|
my @spec; |
|
1128
|
|
|
|
|
|
|
my $min_row; |
|
1129
|
21
|
|
|
|
|
39
|
my $max_row = 0; |
|
1130
|
21
|
|
|
|
|
137
|
for (split m/\s*;\s*/ => $range) { |
|
1131
|
37
|
100
|
|
|
|
4652
|
my ($tlr, $tlc, $brr, $brc) = (m{ |
|
1132
|
|
|
|
|
|
|
^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s* |
|
1133
|
|
|
|
|
|
|
(?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )? |
|
1134
|
|
|
|
|
|
|
$}x) or croak ($self->SetDiag (2013)); |
|
1135
|
36
|
100
|
|
|
|
129
|
defined $brr or ($brr, $brc) = ($tlr, $tlc); |
|
1136
|
36
|
100
|
100
|
|
|
2134
|
$tlr == 0 || $tlc == 0 || |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
($brr ne "*" && ($brr == 0 || $brr < $tlr)) || |
|
1138
|
|
|
|
|
|
|
($brc ne "*" && ($brc == 0 || $brc < $tlc)) |
|
1139
|
|
|
|
|
|
|
and croak ($self->SetDiag (2013)); |
|
1140
|
28
|
|
|
|
|
48
|
$tlc--; |
|
1141
|
28
|
100
|
|
|
|
104
|
$brc-- unless $brc eq "*"; |
|
1142
|
28
|
100
|
|
|
|
73
|
defined $min_row or $min_row = $tlr; |
|
1143
|
28
|
100
|
|
|
|
63
|
$tlr < $min_row and $min_row = $tlr; |
|
1144
|
28
|
100
|
100
|
|
|
102
|
$brr eq "*" || $brr > $max_row and |
|
1145
|
|
|
|
|
|
|
$max_row = $brr; |
|
1146
|
28
|
|
|
|
|
111
|
push @spec, [ $tlr, $tlc, $brr, $brc ]; |
|
1147
|
|
|
|
|
|
|
} |
|
1148
|
12
|
|
|
|
|
70
|
my $r = 0; |
|
1149
|
12
|
|
|
|
|
282
|
while (my $row = $self->getline ($io)) { |
|
1150
|
77
|
100
|
|
|
|
577
|
++$r < $min_row and next; |
|
1151
|
33
|
|
|
|
|
56
|
my %row; |
|
1152
|
|
|
|
|
|
|
my $lc; |
|
1153
|
33
|
|
|
|
|
71
|
foreach my $s (@spec) { |
|
1154
|
77
|
|
|
|
|
142
|
my ($tlr, $tlc, $brr, $brc) = @{$s}; |
|
|
77
|
|
|
|
|
184
|
|
|
1155
|
77
|
100
|
100
|
|
|
324
|
$r < $tlr || ($brr ne "*" && $r > $brr) and next; |
|
|
|
|
100
|
|
|
|
|
|
1156
|
45
|
100
|
100
|
|
|
134
|
!defined $lc || $tlc < $lc and $lc = $tlc; |
|
1157
|
45
|
100
|
|
|
|
102
|
my $rr = $brc eq "*" ? $#{$row} : $brc; |
|
|
5
|
|
|
|
|
9
|
|
|
1158
|
45
|
|
|
|
|
337
|
$row{$_} = $row->[$_] for $tlc .. $rr; |
|
1159
|
|
|
|
|
|
|
} |
|
1160
|
33
|
|
|
|
|
156
|
push @c, [ @row{sort { $a <=> $b } keys %row } ]; |
|
|
64
|
|
|
|
|
241
|
|
|
1161
|
33
|
100
|
|
|
|
91
|
if (@h) { |
|
1162
|
2
|
|
|
|
|
4
|
my %h; @h{@h} = @{$c[-1]}; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
10
|
|
|
1163
|
2
|
|
|
|
|
5
|
$c[-1] = \%h; |
|
1164
|
|
|
|
|
|
|
} |
|
1165
|
33
|
100
|
100
|
|
|
465
|
$max_row ne "*" && $r == $max_row and last; |
|
1166
|
|
|
|
|
|
|
} |
|
1167
|
12
|
|
|
|
|
139
|
return \@c; |
|
1168
|
|
|
|
|
|
|
} |
|
1169
|
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
# row or col |
|
1171
|
17
|
|
|
|
|
31
|
my @r; |
|
1172
|
17
|
|
|
|
|
32
|
my $eod = 0; |
|
1173
|
17
|
|
|
|
|
93
|
for (split m/\s*;\s*/ => $range) { |
|
1174
|
25
|
50
|
|
|
|
168
|
my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x |
|
1175
|
|
|
|
|
|
|
or croak ($self->SetDiag (2013)); |
|
1176
|
25
|
|
100
|
|
|
151
|
$to ||= $from; |
|
1177
|
25
|
100
|
|
|
|
64
|
$to eq "*" and ($to, $eod) = ($from, 1); |
|
1178
|
|
|
|
|
|
|
# $to cannot be <= 0 due to regex and ||= |
|
1179
|
25
|
100
|
100
|
|
|
635
|
$from <= 0 || $to < $from and croak ($self->SetDiag (2013)); |
|
1180
|
22
|
|
|
|
|
124
|
$r[$_] = 1 for $from .. $to; |
|
1181
|
|
|
|
|
|
|
} |
|
1182
|
|
|
|
|
|
|
|
|
1183
|
14
|
|
|
|
|
29
|
my $r = 0; |
|
1184
|
14
|
100
|
|
|
|
40
|
$type eq "col" and shift @r; |
|
1185
|
14
|
|
100
|
|
|
153
|
$_ ||= 0 for @r; |
|
1186
|
14
|
|
|
|
|
498
|
while (my $row = $self->getline ($io)) { |
|
1187
|
109
|
|
|
|
|
227
|
$r++; |
|
1188
|
109
|
100
|
|
|
|
222
|
if ($type eq "row") { |
|
1189
|
64
|
100
|
100
|
|
|
295
|
if (($r > $#r && $eod) || $r[$r]) { |
|
|
|
|
100
|
|
|
|
|
|
1190
|
20
|
|
|
|
|
64
|
push @c, $row; |
|
1191
|
20
|
100
|
|
|
|
50
|
if (@h) { |
|
1192
|
3
|
|
|
|
|
7
|
my %h; @h{@h} = @{$c[-1]}; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
19
|
|
|
1193
|
3
|
|
|
|
|
8
|
$c[-1] = \%h; |
|
1194
|
|
|
|
|
|
|
} |
|
1195
|
|
|
|
|
|
|
} |
|
1196
|
64
|
|
|
|
|
653
|
next; |
|
1197
|
|
|
|
|
|
|
} |
|
1198
|
45
|
100
|
100
|
|
|
99
|
push @c, [ map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0..$#{$row} ]; |
|
|
405
|
|
|
|
|
1738
|
|
|
|
45
|
|
|
|
|
124
|
|
|
1199
|
45
|
100
|
|
|
|
544
|
if (@h) { |
|
1200
|
9
|
|
|
|
|
15
|
my %h; @h{@h} = @{$c[-1]}; |
|
|
9
|
|
|
|
|
15
|
|
|
|
9
|
|
|
|
|
24
|
|
|
1201
|
9
|
|
|
|
|
139
|
$c[-1] = \%h; |
|
1202
|
|
|
|
|
|
|
} |
|
1203
|
|
|
|
|
|
|
} |
|
1204
|
|
|
|
|
|
|
|
|
1205
|
14
|
|
|
|
|
132
|
return \@c; |
|
1206
|
|
|
|
|
|
|
} # fragment |
|
1207
|
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
my $csv_usage = q{usage: my $aoa = csv (in => $file);}; |
|
1209
|
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
sub _csv_attr { |
|
1211
|
343
|
100
|
66
|
343
|
|
2449
|
my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak (); |
|
|
4
|
50
|
|
|
|
16
|
|
|
1212
|
|
|
|
|
|
|
|
|
1213
|
343
|
|
|
|
|
809
|
$attr{'binary'} = 1; |
|
1214
|
343
|
|
|
|
|
728
|
$attr{'strict_eol'} = 1; |
|
1215
|
|
|
|
|
|
|
|
|
1216
|
343
|
|
100
|
|
|
1939
|
my $enc = delete $attr{'enc'} || delete $attr{'encoding'} || ""; |
|
1217
|
343
|
100
|
|
|
|
970
|
$enc eq "auto" and ($attr{'detect_bom'}, $enc) = (1, ""); |
|
1218
|
343
|
50
|
|
|
|
1058
|
my $stack = $enc =~ s/(:\w.*)// ? $1 : ""; |
|
1219
|
343
|
100
|
|
|
|
897
|
$enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)"; |
|
1220
|
343
|
|
|
|
|
629
|
$enc .= $stack; |
|
1221
|
|
|
|
|
|
|
|
|
1222
|
343
|
|
|
|
|
711
|
my $hdrs = delete $attr{'headers'}; |
|
1223
|
343
|
|
|
|
|
663
|
my $frag = delete $attr{'fragment'}; |
|
1224
|
343
|
|
|
|
|
713
|
my $key = delete $attr{'key'}; |
|
1225
|
343
|
|
|
|
|
603
|
my $val = delete $attr{'value'}; |
|
1226
|
|
|
|
|
|
|
my $kh = delete $attr{'keep_headers'} || |
|
1227
|
|
|
|
|
|
|
delete $attr{'keep_column_names'} || |
|
1228
|
343
|
|
100
|
|
|
1953
|
delete $attr{'kh'}; |
|
1229
|
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
my $cbai = delete $attr{'callbacks'}{'after_in'} || |
|
1231
|
|
|
|
|
|
|
delete $attr{'after_in'} || |
|
1232
|
|
|
|
|
|
|
delete $attr{'callbacks'}{'after_parse'} || |
|
1233
|
343
|
|
100
|
|
|
2470
|
delete $attr{'after_parse'}; |
|
1234
|
|
|
|
|
|
|
my $cbbo = delete $attr{'callbacks'}{'before_out'} || |
|
1235
|
343
|
|
100
|
|
|
1085
|
delete $attr{'before_out'}; |
|
1236
|
|
|
|
|
|
|
my $cboi = delete $attr{'callbacks'}{'on_in'} || |
|
1237
|
343
|
|
100
|
|
|
2136
|
delete $attr{'on_in'}; |
|
1238
|
|
|
|
|
|
|
my $cboe = delete $attr{'callbacks'}{'on_error'} || |
|
1239
|
343
|
|
66
|
|
|
1124
|
delete $attr{'on_error'}; |
|
1240
|
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
my $hd_s = delete $attr{'sep_set'} || |
|
1242
|
343
|
|
100
|
|
|
1080
|
delete $attr{'seps'}; |
|
1243
|
|
|
|
|
|
|
my $hd_b = delete $attr{'detect_bom'} || |
|
1244
|
343
|
|
100
|
|
|
1213
|
delete $attr{'bom'}; |
|
1245
|
|
|
|
|
|
|
my $hd_m = delete $attr{'munge'} || |
|
1246
|
343
|
|
100
|
|
|
1121
|
delete $attr{'munge_column_names'}; |
|
1247
|
343
|
|
|
|
|
515
|
my $hd_c = delete $attr{'set_column_names'}; |
|
1248
|
|
|
|
|
|
|
|
|
1249
|
343
|
|
|
|
|
631
|
my $fh; |
|
1250
|
343
|
|
|
|
|
520
|
my $sink = 0; |
|
1251
|
343
|
|
|
|
|
502
|
my $cls = 0; # If I open a file, I have to close it |
|
1252
|
343
|
100
|
100
|
|
|
1724
|
my $in = delete $attr{'in'} || delete $attr{'file'} or croak ($csv_usage); |
|
1253
|
|
|
|
|
|
|
my $out = exists $attr{'out'} && !$attr{'out'} ? \"skip" |
|
1254
|
340
|
100
|
100
|
|
|
1583
|
: delete $attr{'out'} || delete $attr{'file'}; |
|
|
|
|
100
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
|
|
1256
|
340
|
100
|
100
|
|
|
1438
|
ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT; |
|
|
|
|
100
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
|
|
1258
|
340
|
|
|
|
|
634
|
my ($fho, $fho_cls); |
|
1259
|
340
|
100
|
66
|
|
|
1545
|
if ($in && $out and (!ref $in || ref $in eq "GLOB" || ref \$in eq "GLOB") |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
and (!ref $out || ref $out eq "GLOB" || ref \$out eq "GLOB")) { |
|
1261
|
7
|
100
|
66
|
|
|
33
|
if (ref $out or "GLOB" eq ref \$out) { |
|
1262
|
2
|
|
|
|
|
5
|
$fho = $out; |
|
1263
|
|
|
|
|
|
|
} |
|
1264
|
|
|
|
|
|
|
else { |
|
1265
|
5
|
50
|
|
|
|
739
|
open $fho, ">", $out or croak "$out: $!\n"; |
|
1266
|
5
|
50
|
|
|
|
30
|
if (my $e = $attr{'encoding'}) { |
|
1267
|
0
|
|
|
|
|
0
|
binmode $fho, ":encoding($e)"; |
|
1268
|
0
|
0
|
|
|
|
0
|
$hd_b and print $fho "\x{feff}"; |
|
1269
|
|
|
|
|
|
|
} |
|
1270
|
5
|
|
|
|
|
13
|
$fho_cls = 1; |
|
1271
|
|
|
|
|
|
|
} |
|
1272
|
7
|
100
|
66
|
|
|
26
|
if ($cboi && !$cbai) { |
|
1273
|
1
|
|
|
|
|
3
|
$cbai = $cboi; |
|
1274
|
1
|
|
|
|
|
3
|
$cboi = undef; |
|
1275
|
|
|
|
|
|
|
} |
|
1276
|
7
|
100
|
|
|
|
20
|
if ($cbai) { |
|
1277
|
2
|
|
|
|
|
4
|
my $cb = $cbai; |
|
1278
|
2
|
|
|
6
|
|
14
|
$cbai = sub { $cb->(@_); $_[0]->say ($fho, $_[1]); 0 }; |
|
|
6
|
|
|
|
|
29
|
|
|
|
6
|
|
|
|
|
34
|
|
|
|
6
|
|
|
|
|
80
|
|
|
1279
|
|
|
|
|
|
|
} |
|
1280
|
|
|
|
|
|
|
else { |
|
1281
|
5
|
|
|
15
|
|
56
|
$cbai = sub { $_[0]->say ($fho, $_[1]); 0 }; |
|
|
15
|
|
|
|
|
80
|
|
|
|
15
|
|
|
|
|
217
|
|
|
1282
|
|
|
|
|
|
|
} |
|
1283
|
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
# Put all callbacks back in place for streaming behavior |
|
1285
|
7
|
|
|
|
|
20
|
$attr{'callbacks'}{'after_parse'} = $cbai; $cbai = undef; |
|
|
7
|
|
|
|
|
14
|
|
|
1286
|
7
|
|
|
|
|
15
|
$attr{'callbacks'}{'before_out'} = $cbbo; $cbbo = undef; |
|
|
7
|
|
|
|
|
41
|
|
|
1287
|
7
|
|
|
|
|
18
|
$attr{'callbacks'}{'on_in'} = $cboi; $cboi = undef; |
|
|
7
|
|
|
|
|
9
|
|
|
1288
|
7
|
|
|
|
|
14
|
$attr{'callbacks'}{'on_error'} = $cboe; $cboe = undef; |
|
|
7
|
|
|
|
|
13
|
|
|
1289
|
7
|
|
|
|
|
37
|
$out = undef; |
|
1290
|
7
|
|
|
|
|
44
|
$sink = 1; |
|
1291
|
|
|
|
|
|
|
} |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
340
|
100
|
|
|
|
787
|
if ($out) { |
|
1294
|
33
|
100
|
100
|
|
|
270
|
if (ref $out and ("ARRAY" eq ref $out or "HASH" eq ref $out)) { |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1295
|
5
|
|
|
|
|
7
|
delete $attr{'out'}; |
|
1296
|
5
|
|
|
|
|
8
|
$sink = 1; |
|
1297
|
|
|
|
|
|
|
} |
|
1298
|
|
|
|
|
|
|
elsif ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) { |
|
1299
|
14
|
|
|
|
|
21
|
$fh = $out; |
|
1300
|
|
|
|
|
|
|
} |
|
1301
|
7
|
|
|
|
|
42
|
elsif (ref $out and "SCALAR" eq ref $out and defined ${$out} and ${$out} eq "skip") { |
|
|
7
|
|
|
|
|
22
|
|
|
1302
|
2
|
|
|
|
|
5
|
delete $attr{'out'}; |
|
1303
|
2
|
|
|
|
|
5
|
$sink = 1; |
|
1304
|
|
|
|
|
|
|
} |
|
1305
|
|
|
|
|
|
|
else { |
|
1306
|
12
|
100
|
|
|
|
1029
|
open $fh, ">", $out or croak ("$out: $!"); |
|
1307
|
11
|
|
|
|
|
37
|
$cls = 1; |
|
1308
|
|
|
|
|
|
|
} |
|
1309
|
32
|
100
|
|
|
|
65
|
if ($fh) { |
|
1310
|
25
|
100
|
|
|
|
54
|
if ($enc) { |
|
1311
|
1
|
|
|
|
|
11
|
binmode $fh, $enc; |
|
1312
|
1
|
|
|
|
|
61
|
my $fn = fileno $fh; # This is a workaround for a bug in PerlIO::via::gzip |
|
1313
|
|
|
|
|
|
|
} |
|
1314
|
25
|
100
|
66
|
|
|
98
|
unless (defined $attr{'eol'} || defined $fho) { |
|
1315
|
18
|
|
|
|
|
30
|
my @layers = eval { PerlIO::get_layers ($fh) }; |
|
|
18
|
|
|
|
|
125
|
|
|
1316
|
18
|
100
|
|
|
|
109
|
$attr{'eol'} = (grep m/crlf/ => @layers) ? "\n" : "\r\n"; |
|
1317
|
|
|
|
|
|
|
} |
|
1318
|
|
|
|
|
|
|
} |
|
1319
|
|
|
|
|
|
|
} |
|
1320
|
|
|
|
|
|
|
|
|
1321
|
339
|
100
|
100
|
|
|
2153
|
if ( ref $in eq "CODE" or ref $in eq "ARRAY") { |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
# All done |
|
1323
|
|
|
|
|
|
|
} |
|
1324
|
|
|
|
|
|
|
elsif (ref $in eq "SCALAR") { |
|
1325
|
|
|
|
|
|
|
# Strings with code points over 0xFF may not be mapped into in-memory file handles |
|
1326
|
|
|
|
|
|
|
# "<$enc" does not change that :( |
|
1327
|
30
|
50
|
|
|
|
381
|
open $fh, "<", $in or croak ("Cannot open from SCALAR using PerlIO"); |
|
1328
|
30
|
|
|
|
|
64
|
$cls = 1; |
|
1329
|
|
|
|
|
|
|
} |
|
1330
|
|
|
|
|
|
|
elsif (ref $in or "GLOB" eq ref \$in) { |
|
1331
|
18
|
50
|
66
|
|
|
47
|
if (!ref $in && $] < 5.008005) { |
|
1332
|
0
|
|
|
|
|
0
|
$fh = \*{$in}; # uncoverable statement ancient perl version required |
|
|
0
|
|
|
|
|
0
|
|
|
1333
|
|
|
|
|
|
|
} |
|
1334
|
|
|
|
|
|
|
else { |
|
1335
|
18
|
|
|
|
|
23
|
$fh = $in; |
|
1336
|
|
|
|
|
|
|
} |
|
1337
|
|
|
|
|
|
|
} |
|
1338
|
|
|
|
|
|
|
else { |
|
1339
|
267
|
100
|
|
|
|
13904
|
open $fh, "<$enc", $in or croak ("$in: $!"); |
|
1340
|
265
|
|
|
|
|
2440
|
$cls = 1; |
|
1341
|
|
|
|
|
|
|
} |
|
1342
|
337
|
50
|
33
|
|
|
1012
|
$fh || $sink or croak (qq{No valid source passed. "in" is required}); |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
337
|
|
|
|
|
1461
|
for ([ 'quo' => "quote" ], |
|
1345
|
|
|
|
|
|
|
[ 'esc' => "escape" ], |
|
1346
|
|
|
|
|
|
|
[ 'escape' => "escape_char" ], |
|
1347
|
|
|
|
|
|
|
) { |
|
1348
|
1011
|
|
|
|
|
1323
|
my ($f, $t) = @{$_}; |
|
|
1011
|
|
|
|
|
2030
|
|
|
1349
|
1011
|
100
|
100
|
|
|
2893
|
exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f}; |
|
1350
|
|
|
|
|
|
|
} |
|
1351
|
|
|
|
|
|
|
|
|
1352
|
337
|
|
|
|
|
1031
|
my $fltr = delete $attr{'filter'}; |
|
1353
|
|
|
|
|
|
|
my %fltr = ( |
|
1354
|
10
|
100
|
33
|
10
|
|
14
|
'not_blank' => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" }, |
|
|
10
|
|
|
|
|
63
|
|
|
1355
|
10
|
50
|
|
10
|
|
17
|
'not_empty' => sub { grep { defined && $_ ne "" } @{$_[1]} }, |
|
|
26
|
|
|
|
|
144
|
|
|
|
10
|
|
|
|
|
22
|
|
|
1356
|
10
|
50
|
|
10
|
|
20
|
'filled' => sub { grep { defined && m/\S/ } @{$_[1]} }, |
|
|
26
|
|
|
|
|
211
|
|
|
|
10
|
|
|
|
|
19
|
|
|
1357
|
337
|
|
|
|
|
3248
|
); |
|
1358
|
|
|
|
|
|
|
defined $fltr && !ref $fltr && exists $fltr{$fltr} and |
|
1359
|
337
|
50
|
100
|
|
|
1071
|
$fltr = { '0' => $fltr{$fltr} }; |
|
|
|
|
66
|
|
|
|
|
|
1360
|
337
|
100
|
|
|
|
800
|
ref $fltr eq "CODE" and $fltr = { 0 => $fltr }; |
|
1361
|
337
|
100
|
|
|
|
849
|
ref $fltr eq "HASH" or $fltr = undef; |
|
1362
|
|
|
|
|
|
|
|
|
1363
|
337
|
|
|
|
|
690
|
my $form = delete $attr{'formula'}; |
|
1364
|
|
|
|
|
|
|
|
|
1365
|
337
|
100
|
|
|
|
992
|
defined $attr{'auto_diag'} or $attr{'auto_diag'} = 1; |
|
1366
|
337
|
100
|
|
|
|
979
|
defined $attr{'escape_null'} or $attr{'escape_null'} = 0; |
|
1367
|
337
|
50
|
66
|
|
|
2347
|
my $csv = delete $attr{'csv'} || Text::CSV_XS->new (\%attr) |
|
1368
|
|
|
|
|
|
|
or croak ($last_err); |
|
1369
|
337
|
100
|
|
|
|
819
|
defined $form and $csv->formula ($form); |
|
1370
|
337
|
100
|
|
|
|
933
|
defined $cboe and $csv->callbacks (error => $cboe); |
|
1371
|
|
|
|
|
|
|
|
|
1372
|
337
|
100
|
100
|
|
|
981
|
$kh && !ref $kh && $kh =~ m/^(?:1|yes|true|internal|auto)$/i and |
|
|
|
|
100
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
$kh = \@internal_kh; |
|
1374
|
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
return { |
|
1376
|
337
|
|
|
|
|
8614
|
'csv' => $csv, |
|
1377
|
|
|
|
|
|
|
'attr' => { %attr }, |
|
1378
|
|
|
|
|
|
|
'fh' => $fh, |
|
1379
|
|
|
|
|
|
|
'cls' => $cls, |
|
1380
|
|
|
|
|
|
|
'in' => $in, |
|
1381
|
|
|
|
|
|
|
'sink' => $sink, |
|
1382
|
|
|
|
|
|
|
'out' => $out, |
|
1383
|
|
|
|
|
|
|
'enc' => $enc, |
|
1384
|
|
|
|
|
|
|
'fho' => $fho, |
|
1385
|
|
|
|
|
|
|
'fhoc' => $fho_cls, |
|
1386
|
|
|
|
|
|
|
'hdrs' => $hdrs, |
|
1387
|
|
|
|
|
|
|
'key' => $key, |
|
1388
|
|
|
|
|
|
|
'val' => $val, |
|
1389
|
|
|
|
|
|
|
'kh' => $kh, |
|
1390
|
|
|
|
|
|
|
'frag' => $frag, |
|
1391
|
|
|
|
|
|
|
'fltr' => $fltr, |
|
1392
|
|
|
|
|
|
|
'cbai' => $cbai, |
|
1393
|
|
|
|
|
|
|
'cbbo' => $cbbo, |
|
1394
|
|
|
|
|
|
|
'cboi' => $cboi, |
|
1395
|
|
|
|
|
|
|
'hd_s' => $hd_s, |
|
1396
|
|
|
|
|
|
|
'hd_b' => $hd_b, |
|
1397
|
|
|
|
|
|
|
'hd_m' => $hd_m, |
|
1398
|
|
|
|
|
|
|
'hd_c' => $hd_c, |
|
1399
|
|
|
|
|
|
|
}; |
|
1400
|
|
|
|
|
|
|
} # _csv_attr |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
sub csv { |
|
1403
|
344
|
100
|
100
|
344
|
1
|
906070
|
@_ && ref $_[0] eq __PACKAGE__ and splice @_, 0, 0, "csv"; |
|
1404
|
344
|
100
|
|
|
|
1179
|
@_ or croak ($csv_usage); |
|
1405
|
|
|
|
|
|
|
|
|
1406
|
343
|
|
|
|
|
1007
|
my $c = _csv_attr (@_); |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
337
|
|
|
|
|
720
|
my ($csv, $in, $fh, $hdrs) = @{$c}{qw( csv in fh hdrs )}; |
|
|
337
|
|
|
|
|
1131
|
|
|
1409
|
337
|
|
|
|
|
694
|
my %hdr; |
|
1410
|
337
|
100
|
|
|
|
866
|
if (ref $hdrs eq "HASH") { |
|
1411
|
2
|
|
|
|
|
25
|
%hdr = %{$hdrs}; |
|
|
2
|
|
|
|
|
7
|
|
|
1412
|
2
|
|
|
|
|
5
|
$hdrs = "auto"; |
|
1413
|
|
|
|
|
|
|
} |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
337
|
100
|
100
|
|
|
953
|
if ($c->{'out'} && !$c->{'sink'}) { |
|
1416
|
|
|
|
|
|
|
!$hdrs && ref $c->{'kh'} && $c->{'kh'} == \@internal_kh and |
|
1417
|
24
|
100
|
100
|
|
|
107
|
$hdrs = $c->{'kh'}; |
|
|
|
|
66
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
|
|
1419
|
24
|
100
|
100
|
|
|
42
|
if (ref $in eq "CODE") { |
|
|
|
100
|
|
|
|
|
|
|
1420
|
3
|
|
|
|
|
7
|
my $hdr = 1; |
|
1421
|
3
|
|
|
|
|
13
|
while (my $row = $in->($csv)) { |
|
1422
|
7
|
100
|
|
|
|
73
|
if (ref $row eq "ARRAY") { |
|
1423
|
3
|
|
|
|
|
32
|
$csv->print ($fh, $row); |
|
1424
|
3
|
|
|
|
|
44
|
next; |
|
1425
|
|
|
|
|
|
|
} |
|
1426
|
4
|
50
|
|
|
|
11
|
if (ref $row eq "HASH") { |
|
1427
|
4
|
100
|
|
|
|
10
|
if ($hdr) { |
|
1428
|
2
|
50
|
100
|
|
|
7
|
$hdrs ||= [ map { $hdr{$_} || $_ } keys %{$row} ]; |
|
|
3
|
|
|
|
|
10
|
|
|
|
1
|
|
|
|
|
3
|
|
|
1429
|
2
|
|
|
|
|
35
|
$csv->print ($fh, $hdrs); |
|
1430
|
2
|
|
|
|
|
42
|
$hdr = 0; |
|
1431
|
|
|
|
|
|
|
} |
|
1432
|
4
|
|
|
|
|
5
|
$csv->print ($fh, [ @{$row}{@{$hdrs}} ]); |
|
|
4
|
|
|
|
|
38
|
|
|
|
4
|
|
|
|
|
7
|
|
|
1433
|
|
|
|
|
|
|
} |
|
1434
|
|
|
|
|
|
|
} |
|
1435
|
|
|
|
|
|
|
} |
|
1436
|
21
|
|
|
|
|
100
|
elsif (@{$in} == 0 or ref $in->[0] eq "ARRAY") { # aoa |
|
1437
|
10
|
50
|
|
|
|
22
|
ref $hdrs and $csv->print ($fh, $hdrs); |
|
1438
|
10
|
|
|
|
|
11
|
for (@{$in}) { |
|
|
10
|
|
|
|
|
19
|
|
|
1439
|
12
|
100
|
|
|
|
73
|
$c->{'cboi'} and $c->{'cboi'}->($csv, $_); |
|
1440
|
12
|
50
|
|
|
|
1186
|
$c->{'cbbo'} and $c->{'cbbo'}->($csv, $_); |
|
1441
|
12
|
|
|
|
|
169
|
$csv->print ($fh, $_); |
|
1442
|
|
|
|
|
|
|
} |
|
1443
|
|
|
|
|
|
|
} |
|
1444
|
|
|
|
|
|
|
else { # aoh |
|
1445
|
11
|
100
|
|
|
|
34
|
my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]}; |
|
|
5
|
|
|
|
|
17
|
|
|
|
6
|
|
|
|
|
17
|
|
|
1446
|
11
|
100
|
|
|
|
26
|
defined $hdrs or $hdrs = "auto"; |
|
1447
|
|
|
|
|
|
|
ref $hdrs || $hdrs eq "auto" and @hdrs and |
|
1448
|
11
|
100
|
100
|
|
|
59
|
$csv->print ($fh, [ map { $hdr{$_} || $_ } @hdrs ]); |
|
|
20
|
100
|
66
|
|
|
238
|
|
|
1449
|
11
|
|
|
|
|
106
|
for (@{$in}) { |
|
|
11
|
|
|
|
|
45
|
|
|
1450
|
17
|
|
|
|
|
76
|
local %_; |
|
1451
|
17
|
|
|
|
|
38
|
*_ = $_; |
|
1452
|
17
|
50
|
|
|
|
39
|
$c->{'cboi'} and $c->{'cboi'}->($csv, $_); |
|
1453
|
17
|
50
|
|
|
|
35
|
$c->{'cbbo'} and $c->{'cbbo'}->($csv, $_); |
|
1454
|
17
|
|
|
|
|
25
|
$csv->print ($fh, [ @{$_}{@hdrs} ]); |
|
|
17
|
|
|
|
|
117
|
|
|
1455
|
|
|
|
|
|
|
} |
|
1456
|
|
|
|
|
|
|
} |
|
1457
|
|
|
|
|
|
|
|
|
1458
|
24
|
100
|
|
|
|
858
|
$c->{'cls'} and close $fh; |
|
1459
|
24
|
50
|
|
|
|
61
|
$c->{'fho_cls'} and close $c->{'fho'}; |
|
1460
|
24
|
|
|
|
|
373
|
return 1; |
|
1461
|
|
|
|
|
|
|
} |
|
1462
|
|
|
|
|
|
|
|
|
1463
|
313
|
|
|
|
|
517
|
my @row1; |
|
1464
|
313
|
100
|
100
|
|
|
1670
|
if (defined $c->{'hd_s'} || defined $c->{'hd_b'} || defined $c->{'hd_m'} || defined $c->{'hd_c'}) { |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1465
|
173
|
|
|
|
|
277
|
my %harg; |
|
1466
|
|
|
|
|
|
|
!defined $c->{'hd_s'} && $c->{'attr'}{'sep_char'} and |
|
1467
|
173
|
100
|
100
|
|
|
739
|
$c->{'hd_s'} = [ $c->{'attr'}{'sep_char'} ]; |
|
1468
|
|
|
|
|
|
|
!defined $c->{'hd_s'} && $c->{'attr'}{'sep'} and |
|
1469
|
173
|
100
|
100
|
|
|
781
|
$c->{'hd_s'} = [ $c->{'attr'}{'sep'} ]; |
|
1470
|
173
|
100
|
|
|
|
520
|
defined $c->{'hd_s'} and $harg{'sep_set'} = $c->{'hd_s'}; |
|
1471
|
173
|
100
|
|
|
|
494
|
defined $c->{'hd_b'} and $harg{'detect_bom'} = $c->{'hd_b'}; |
|
1472
|
173
|
50
|
|
|
|
423
|
defined $c->{'hd_m'} and $harg{'munge_column_names'} = $hdrs ? "none" : $c->{'hd_m'}; |
|
|
|
100
|
|
|
|
|
|
|
1473
|
173
|
50
|
|
|
|
394
|
defined $c->{'hd_c'} and $harg{'set_column_names'} = $hdrs ? 0 : $c->{'hd_c'}; |
|
|
|
100
|
|
|
|
|
|
|
1474
|
173
|
|
|
|
|
654
|
@row1 = $csv->header ($fh, \%harg); |
|
1475
|
170
|
|
|
|
|
495
|
my @hdr = $csv->column_names (); |
|
1476
|
170
|
100
|
100
|
|
|
1000
|
@hdr and $hdrs ||= \@hdr; |
|
1477
|
|
|
|
|
|
|
} |
|
1478
|
|
|
|
|
|
|
|
|
1479
|
310
|
100
|
|
|
|
746
|
if ($c->{'kh'}) { |
|
1480
|
15
|
|
|
|
|
26
|
@internal_kh = (); |
|
1481
|
15
|
100
|
|
|
|
665
|
ref $c->{'kh'} eq "ARRAY" or croak ($csv->SetDiag (1501)); |
|
1482
|
10
|
|
100
|
|
|
24
|
$hdrs ||= "auto"; |
|
1483
|
|
|
|
|
|
|
} |
|
1484
|
|
|
|
|
|
|
|
|
1485
|
305
|
|
|
|
|
626
|
my $key = $c->{'key'}; |
|
1486
|
305
|
100
|
|
|
|
685
|
if ($key) { |
|
1487
|
27
|
100
|
100
|
|
|
702
|
!ref $key or ref $key eq "ARRAY" && @{$key} > 1 or croak ($csv->SetDiag (1501)); |
|
|
8
|
|
100
|
|
|
470
|
|
|
1488
|
20
|
|
100
|
|
|
57
|
$hdrs ||= "auto"; |
|
1489
|
|
|
|
|
|
|
} |
|
1490
|
298
|
|
|
|
|
680
|
my $val = $c->{'val'}; |
|
1491
|
298
|
100
|
|
|
|
600
|
if ($val) { |
|
1492
|
9
|
100
|
|
|
|
153
|
$key or croak ($csv->SetDiag (1502)); |
|
1493
|
8
|
100
|
100
|
|
|
415
|
!ref $val or ref $val eq "ARRAY" && @{$val} > 0 or croak ($csv->SetDiag (1503)); |
|
|
3
|
|
100
|
|
|
181
|
|
|
1494
|
|
|
|
|
|
|
} |
|
1495
|
|
|
|
|
|
|
|
|
1496
|
294
|
100
|
100
|
|
|
726
|
$c->{'fltr'} && grep m/\D/ => keys %{$c->{'fltr'}} and $hdrs ||= "auto"; |
|
|
16
|
|
100
|
|
|
150
|
|
|
1497
|
294
|
100
|
|
|
|
754
|
if (defined $hdrs) { |
|
1498
|
223
|
100
|
100
|
|
|
904
|
if (!ref $hdrs or ref $hdrs eq "CODE") { |
|
1499
|
52
|
100
|
|
|
|
2209
|
my $h = $c->{'hd_b'} |
|
1500
|
|
|
|
|
|
|
? [ $csv->column_names () ] |
|
1501
|
|
|
|
|
|
|
: $csv->getline ($fh); |
|
1502
|
52
|
|
33
|
|
|
250
|
my $has_h = $h && @$h; |
|
1503
|
|
|
|
|
|
|
|
|
1504
|
52
|
100
|
|
|
|
191
|
if (ref $hdrs) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1505
|
1
|
50
|
|
|
|
5
|
$has_h or return; |
|
1506
|
1
|
|
|
|
|
3
|
my $cr = $hdrs; |
|
1507
|
1
|
|
33
|
|
|
3
|
$hdrs = [ map { $cr->($hdr{$_} || $_) } @{$h} ]; |
|
|
3
|
|
|
|
|
24
|
|
|
|
1
|
|
|
|
|
3
|
|
|
1508
|
|
|
|
|
|
|
} |
|
1509
|
|
|
|
|
|
|
elsif ($hdrs eq "skip") { |
|
1510
|
|
|
|
|
|
|
# discard; |
|
1511
|
|
|
|
|
|
|
} |
|
1512
|
|
|
|
|
|
|
elsif ($hdrs eq "auto") { |
|
1513
|
48
|
50
|
|
|
|
101
|
$has_h or return; |
|
1514
|
48
|
100
|
|
|
|
68
|
$hdrs = [ map { $hdr{$_} || $_ } @{$h} ]; |
|
|
136
|
|
|
|
|
508
|
|
|
|
48
|
|
|
|
|
96
|
|
|
1515
|
|
|
|
|
|
|
} |
|
1516
|
|
|
|
|
|
|
elsif ($hdrs eq "lc") { |
|
1517
|
1
|
50
|
|
|
|
4
|
$has_h or return; |
|
1518
|
1
|
|
33
|
|
|
4
|
$hdrs = [ map { lc ($hdr{$_} || $_) } @{$h} ]; |
|
|
3
|
|
|
|
|
19
|
|
|
|
1
|
|
|
|
|
2
|
|
|
1519
|
|
|
|
|
|
|
} |
|
1520
|
|
|
|
|
|
|
elsif ($hdrs eq "uc") { |
|
1521
|
1
|
50
|
|
|
|
4
|
$has_h or return; |
|
1522
|
1
|
|
33
|
|
|
3
|
$hdrs = [ map { uc ($hdr{$_} || $_) } @{$h} ]; |
|
|
3
|
|
|
|
|
41
|
|
|
|
1
|
|
|
|
|
3
|
|
|
1523
|
|
|
|
|
|
|
} |
|
1524
|
|
|
|
|
|
|
} |
|
1525
|
223
|
100
|
66
|
|
|
649
|
$c->{'kh'} and $hdrs and @{$c->{'kh'}} = @{$hdrs}; |
|
|
10
|
|
|
|
|
25
|
|
|
|
10
|
|
|
|
|
13
|
|
|
1526
|
|
|
|
|
|
|
} |
|
1527
|
|
|
|
|
|
|
|
|
1528
|
294
|
100
|
|
|
|
729
|
if ($c->{'fltr'}) { |
|
1529
|
16
|
|
|
|
|
24
|
my %f = %{$c->{'fltr'}}; |
|
|
16
|
|
|
|
|
64
|
|
|
1530
|
|
|
|
|
|
|
# convert headers to index |
|
1531
|
16
|
|
|
|
|
28
|
my @hdr; |
|
1532
|
16
|
100
|
|
|
|
39
|
if (ref $hdrs) { |
|
1533
|
7
|
|
|
|
|
12
|
@hdr = @{$hdrs}; |
|
|
7
|
|
|
|
|
23
|
|
|
1534
|
7
|
|
|
|
|
29
|
for (0 .. $#hdr) { |
|
1535
|
21
|
100
|
|
|
|
72
|
exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]}; |
|
1536
|
|
|
|
|
|
|
} |
|
1537
|
|
|
|
|
|
|
} |
|
1538
|
|
|
|
|
|
|
$csv->callbacks ('after_parse' => sub { |
|
1539
|
114
|
|
|
114
|
|
825
|
my ($CSV, $ROW) = @_; # lexical sub-variables in caps |
|
1540
|
114
|
|
|
|
|
288
|
foreach my $FLD (sort keys %f) { |
|
1541
|
115
|
|
|
|
|
296
|
local $_ = $ROW->[$FLD - 1]; |
|
1542
|
115
|
|
|
|
|
198
|
local %_; |
|
1543
|
115
|
100
|
|
|
|
236
|
@hdr and @_{@hdr} = @{$ROW}; |
|
|
51
|
|
|
|
|
178
|
|
|
1544
|
115
|
100
|
|
|
|
306
|
$f{$FLD}->($CSV, $ROW) or return \"skip"; |
|
1545
|
52
|
|
|
|
|
896
|
$ROW->[$FLD - 1] = $_; |
|
1546
|
|
|
|
|
|
|
} |
|
1547
|
16
|
|
|
|
|
130
|
}); |
|
1548
|
|
|
|
|
|
|
} |
|
1549
|
|
|
|
|
|
|
|
|
1550
|
294
|
|
|
|
|
511
|
my $frag = $c->{'frag'}; |
|
1551
|
|
|
|
|
|
|
my $ref = ref $hdrs |
|
1552
|
|
|
|
|
|
|
? # aoh |
|
1553
|
294
|
100
|
|
|
|
5401
|
do { |
|
|
|
100
|
|
|
|
|
|
|
1554
|
222
|
|
|
|
|
591
|
my @h = $csv->column_names ($hdrs); |
|
1555
|
222
|
|
|
|
|
361
|
my %h; $h{$_}++ for @h; |
|
|
222
|
|
|
|
|
877
|
|
|
1556
|
222
|
50
|
|
|
|
574
|
exists $h{''} and croak ($csv->SetDiag (1012)); |
|
1557
|
222
|
50
|
|
|
|
551
|
unless (keys %h == @h) { |
|
1558
|
|
|
|
|
|
|
croak ($csv->_SetDiagInfo (1013, join ", " => |
|
1559
|
0
|
|
|
|
|
0
|
map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h)); |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
1560
|
|
|
|
|
|
|
} |
|
1561
|
|
|
|
|
|
|
$frag ? $csv->fragment ($fh, $frag) : |
|
1562
|
222
|
100
|
|
|
|
948
|
$key ? do { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1563
|
17
|
100
|
|
|
|
46
|
my ($k, $j, @f) = ref $key ? (undef, @{$key}) : ($key); |
|
|
5
|
|
|
|
|
11
|
|
|
1564
|
17
|
100
|
|
|
|
28
|
if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) { |
|
|
22
|
|
|
|
|
71
|
|
|
|
27
|
|
|
|
|
44
|
|
|
1565
|
2
|
|
|
|
|
9
|
croak ($csv->_SetDiagInfo (4001, join ", " => @mk)); |
|
1566
|
|
|
|
|
|
|
} |
|
1567
|
|
|
|
|
|
|
+{ map { |
|
1568
|
26
|
|
|
|
|
32
|
my $r = $_; |
|
1569
|
26
|
100
|
|
|
|
60
|
my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f}; |
|
|
4
|
|
|
|
|
11
|
|
|
1570
|
|
|
|
|
|
|
( $K => ( |
|
1571
|
|
|
|
|
|
|
$val |
|
1572
|
|
|
|
|
|
|
? ref $val |
|
1573
|
4
|
|
|
|
|
38
|
? { map { $_ => $r->{$_} } @{$val} } |
|
|
2
|
|
|
|
|
4
|
|
|
1574
|
26
|
100
|
|
|
|
113
|
: $r->{$val} |
|
|
|
100
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
: $r )); |
|
1576
|
15
|
|
|
|
|
23
|
} @{$csv->getline_hr_all ($fh)} } |
|
|
15
|
|
|
|
|
51
|
|
|
1577
|
|
|
|
|
|
|
} |
|
1578
|
|
|
|
|
|
|
: $csv->getline_hr_all ($fh); |
|
1579
|
|
|
|
|
|
|
} |
|
1580
|
|
|
|
|
|
|
: # aoa |
|
1581
|
|
|
|
|
|
|
$frag ? $csv->fragment ($fh, $frag) |
|
1582
|
|
|
|
|
|
|
: $csv->getline_all ($fh); |
|
1583
|
286
|
50
|
|
|
|
1845
|
if ($ref) { |
|
1584
|
286
|
100
|
66
|
|
|
1238
|
@row1 && !$c->{'hd_c'} && !ref $hdrs and unshift @{$ref}, \@row1; |
|
|
4
|
|
100
|
|
|
9
|
|
|
1585
|
|
|
|
|
|
|
} |
|
1586
|
|
|
|
|
|
|
else { |
|
1587
|
0
|
|
|
|
|
0
|
Text::CSV_XS->auto_diag (); |
|
1588
|
|
|
|
|
|
|
} |
|
1589
|
286
|
100
|
|
|
|
4830
|
$c->{'cls'} and close $fh; |
|
1590
|
286
|
50
|
|
|
|
929
|
$c->{'fho_cls'} and close $c->{'fho'}; |
|
1591
|
286
|
100
|
100
|
|
|
1588
|
if ($ref and $c->{'cbai'} || $c->{'cboi'}) { |
|
|
|
|
66
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
# Default is ARRAYref, but with key =>, you'll get a hashref |
|
1593
|
23
|
100
|
|
|
|
77
|
foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) { |
|
|
22
|
|
|
|
|
62
|
|
|
|
1
|
|
|
|
|
5
|
|
|
1594
|
74
|
|
|
|
|
9748
|
local %_; |
|
1595
|
74
|
100
|
|
|
|
247
|
ref $r eq "HASH" and *_ = $r; |
|
1596
|
74
|
100
|
|
|
|
280
|
$c->{'cbai'} and $c->{'cbai'}->($csv, $r); |
|
1597
|
74
|
100
|
|
|
|
6306
|
$c->{'cboi'} and $c->{'cboi'}->($csv, $r); |
|
1598
|
|
|
|
|
|
|
} |
|
1599
|
|
|
|
|
|
|
} |
|
1600
|
|
|
|
|
|
|
|
|
1601
|
286
|
100
|
|
|
|
2619
|
if ($c->{'sink'}) { |
|
1602
|
14
|
100
|
|
|
|
407
|
my $ro = ref $c->{'out'} or return; |
|
1603
|
|
|
|
|
|
|
|
|
1604
|
7
|
100
|
66
|
|
|
23
|
$ro eq "SCALAR" && ${$c->{'out'}} eq "skip" and |
|
|
2
|
|
|
|
|
52
|
|
|
1605
|
|
|
|
|
|
|
return; |
|
1606
|
|
|
|
|
|
|
|
|
1607
|
5
|
50
|
|
|
|
7
|
$ro eq ref $ref or |
|
1608
|
|
|
|
|
|
|
croak ($csv->_SetDiagInfo (5001, "Output type mismatch")); |
|
1609
|
|
|
|
|
|
|
|
|
1610
|
5
|
100
|
|
|
|
10
|
if ($ro eq "ARRAY") { |
|
1611
|
4
|
100
|
33
|
|
|
4
|
if (@{$c->{'out'}} and @$ref and ref $c->{'out'}[0] eq ref $ref->[0]) { |
|
|
4
|
|
66
|
|
|
20
|
|
|
1612
|
2
|
|
|
|
|
3
|
push @{$c->{'out'}} => @$ref; |
|
|
2
|
|
|
|
|
5
|
|
|
1613
|
2
|
|
|
|
|
29
|
return $c->{'out'}; |
|
1614
|
|
|
|
|
|
|
} |
|
1615
|
2
|
|
|
|
|
6
|
croak ($csv->_SetDiagInfo (5001, "Output type mismatch")); |
|
1616
|
|
|
|
|
|
|
} |
|
1617
|
|
|
|
|
|
|
|
|
1618
|
1
|
50
|
|
|
|
3
|
if ($ro eq "HASH") { |
|
1619
|
1
|
|
|
|
|
2
|
@{$c->{'out'}}{keys %{$ref}} = values %{$ref}; |
|
|
1
|
|
|
|
|
12
|
|
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
2
|
|
|
1620
|
1
|
|
|
|
|
17
|
return $c->{'out'}; |
|
1621
|
|
|
|
|
|
|
} |
|
1622
|
|
|
|
|
|
|
|
|
1623
|
0
|
|
|
|
|
0
|
croak ($csv->_SetDiagInfo (5002, "Unsupported output type")); |
|
1624
|
|
|
|
|
|
|
} |
|
1625
|
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
defined wantarray or |
|
1627
|
|
|
|
|
|
|
return csv ( |
|
1628
|
|
|
|
|
|
|
'in' => $ref, |
|
1629
|
|
|
|
|
|
|
'headers' => $hdrs, |
|
1630
|
272
|
100
|
|
|
|
605
|
%{$c->{'attr'}}, |
|
|
1
|
|
|
|
|
11
|
|
|
1631
|
|
|
|
|
|
|
); |
|
1632
|
|
|
|
|
|
|
|
|
1633
|
271
|
|
100
|
|
|
1223
|
$last_err ||= $csv->{'_ERROR_DIAG'}; |
|
1634
|
271
|
|
|
|
|
6051
|
return $ref; |
|
1635
|
|
|
|
|
|
|
} # csv |
|
1636
|
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
1; |
|
1638
|
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
__END__ |
|
1640
|
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
=encoding utf-8 |
|
1642
|
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
=head1 NAME |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
Text::CSV_XS - comma-separated values manipulation routines |
|
1646
|
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
1648
|
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
# Functional interface |
|
1650
|
|
|
|
|
|
|
use Text::CSV_XS qw( csv ); |
|
1651
|
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
# Read whole file in memory |
|
1653
|
|
|
|
|
|
|
my $aoa = csv (in => "data.csv"); # as array of array |
|
1654
|
|
|
|
|
|
|
my $aoh = csv (in => "data.csv", |
|
1655
|
|
|
|
|
|
|
headers => "auto"); # as array of hash |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
# Write array of arrays as csv file |
|
1658
|
|
|
|
|
|
|
csv (in => $aoa, out => "file.csv", sep_char => ";"); |
|
1659
|
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
# Only show lines where "code" is odd |
|
1661
|
|
|
|
|
|
|
csv (in => "data.csv", filter => { code => sub { $_ % 2 }}); |
|
1662
|
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
# Object interface |
|
1665
|
|
|
|
|
|
|
use Text::CSV_XS; |
|
1666
|
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
my @rows; |
|
1668
|
|
|
|
|
|
|
# Read/parse CSV |
|
1669
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); |
|
1670
|
|
|
|
|
|
|
open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!"; |
|
1671
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
1672
|
|
|
|
|
|
|
$row->[2] =~ m/pattern/ or next; # 3rd field should match |
|
1673
|
|
|
|
|
|
|
push @rows, $row; |
|
1674
|
|
|
|
|
|
|
} |
|
1675
|
|
|
|
|
|
|
close $fh; |
|
1676
|
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
# and write as CSV |
|
1678
|
|
|
|
|
|
|
open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!"; |
|
1679
|
|
|
|
|
|
|
$csv->say ($fh, $_) for @rows; |
|
1680
|
|
|
|
|
|
|
close $fh or die "new.csv: $!"; |
|
1681
|
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
1683
|
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
Text::CSV_XS provides facilities for the composition and decomposition of |
|
1685
|
|
|
|
|
|
|
comma-separated values. An instance of the Text::CSV_XS class will combine |
|
1686
|
|
|
|
|
|
|
fields into a C<CSV> string and parse a C<CSV> string into fields. |
|
1687
|
|
|
|
|
|
|
|
|
1688
|
|
|
|
|
|
|
The module accepts either strings or files as input and support the use of |
|
1689
|
|
|
|
|
|
|
user-specified characters for delimiters, separators, and escapes. |
|
1690
|
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
=head2 Embedded newlines |
|
1692
|
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
B<Important Note>: The default behavior is to accept only ASCII characters |
|
1694
|
|
|
|
|
|
|
in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the |
|
1695
|
|
|
|
|
|
|
fields can not contain newlines. If your data contains newlines embedded in |
|
1696
|
|
|
|
|
|
|
fields, or characters above C<0x7E> (tilde), or binary data, you B<I<must>> |
|
1697
|
|
|
|
|
|
|
set C<< binary => 1 >> in the call to L</new>. To cover the widest range of |
|
1698
|
|
|
|
|
|
|
parsing options, you will always want to set binary. |
|
1699
|
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
But you still have the problem that you have to pass a correct line to the |
|
1701
|
|
|
|
|
|
|
L</parse> method, which is more complicated from the usual point of usage: |
|
1702
|
|
|
|
|
|
|
|
|
1703
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ }); |
|
1704
|
|
|
|
|
|
|
while (<>) { # WRONG! |
|
1705
|
|
|
|
|
|
|
$csv->parse ($_); |
|
1706
|
|
|
|
|
|
|
my @fields = $csv->fields (); |
|
1707
|
|
|
|
|
|
|
} |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
this will break, as the C<while> might read broken lines: it does not care |
|
1710
|
|
|
|
|
|
|
about the quoting. If you need to support embedded newlines, the way to go |
|
1711
|
|
|
|
|
|
|
is to B<not> pass L<C<eol>|/eol> in the parser (it accepts C<\n>, C<\r>, |
|
1712
|
|
|
|
|
|
|
B<and> C<\r\n> by default) and then |
|
1713
|
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1 }); |
|
1715
|
|
|
|
|
|
|
open my $fh, "<", $file or die "$file: $!"; |
|
1716
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
1717
|
|
|
|
|
|
|
my @fields = @$row; |
|
1718
|
|
|
|
|
|
|
} |
|
1719
|
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
The old(er) way of using global file handles is still supported |
|
1721
|
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
while (my $row = $csv->getline (*ARGV)) { ... } |
|
1723
|
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
=head2 Unicode |
|
1725
|
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
Unicode is only tested to work with perl-5.8.2 and up. |
|
1727
|
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
See also L</BOM>. |
|
1729
|
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
The simplest way to ensure the correct encoding is used for in- and output |
|
1731
|
|
|
|
|
|
|
is by either setting layers on the filehandles, or setting the L</encoding> |
|
1732
|
|
|
|
|
|
|
argument for L</csv>. |
|
1733
|
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!"; |
|
1735
|
|
|
|
|
|
|
or |
|
1736
|
|
|
|
|
|
|
my $aoa = csv (in => "in.csv", encoding => "UTF-8"); |
|
1737
|
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!"; |
|
1739
|
|
|
|
|
|
|
or |
|
1740
|
|
|
|
|
|
|
csv (in => $aoa, out => "out.csv", encoding => "UTF-8"); |
|
1741
|
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
On parsing (both for L</getline> and L</parse>), if the source is marked |
|
1743
|
|
|
|
|
|
|
being UTF8, then all fields that are marked binary will also be marked UTF8. |
|
1744
|
|
|
|
|
|
|
|
|
1745
|
|
|
|
|
|
|
On combining (L</print> and L</combine>): if any of the combining fields |
|
1746
|
|
|
|
|
|
|
was marked UTF8, the resulting string will be marked as UTF8. Note however |
|
1747
|
|
|
|
|
|
|
that all fields I<before> the first field marked UTF8 and contained 8-bit |
|
1748
|
|
|
|
|
|
|
characters that were not upgraded to UTF8, these will be C<bytes> in the |
|
1749
|
|
|
|
|
|
|
resulting string too, possibly causing unexpected errors. If you pass data |
|
1750
|
|
|
|
|
|
|
of different encoding, or you don't know if there is different encoding, |
|
1751
|
|
|
|
|
|
|
force it to be upgraded before you pass them on: |
|
1752
|
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
$csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]); |
|
1754
|
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
For complete control over encoding, please use L<Text::CSV::Encoded>: |
|
1756
|
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
use Text::CSV::Encoded; |
|
1758
|
|
|
|
|
|
|
my $csv = Text::CSV::Encoded->new ({ |
|
1759
|
|
|
|
|
|
|
encoding_in => "iso-8859-1", # the encoding comes into Perl |
|
1760
|
|
|
|
|
|
|
encoding_out => "cp1252", # the encoding comes out of Perl |
|
1761
|
|
|
|
|
|
|
}); |
|
1762
|
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
$csv = Text::CSV::Encoded->new ({ encoding => "utf8" }); |
|
1764
|
|
|
|
|
|
|
# combine () and print () accept *literally* utf8 encoded data |
|
1765
|
|
|
|
|
|
|
# parse () and getline () return *literally* utf8 encoded data |
|
1766
|
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
$csv = Text::CSV::Encoded->new ({ encoding => undef }); # default |
|
1768
|
|
|
|
|
|
|
# combine () and print () accept UTF8 marked data |
|
1769
|
|
|
|
|
|
|
# parse () and getline () return UTF8 marked data |
|
1770
|
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
=head2 BOM |
|
1772
|
|
|
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
BOM (or Byte Order Mark) handling is available only inside the L</header> |
|
1774
|
|
|
|
|
|
|
method. This method supports the following encodings: C<utf-8>, C<utf-1>, |
|
1775
|
|
|
|
|
|
|
C<utf-32be>, C<utf-32le>, C<utf-16be>, C<utf-16le>, C<utf-ebcdic>, C<scsu>, |
|
1776
|
|
|
|
|
|
|
C<bocu-1>, and C<gb-18030>. See L<Wikipedia|https://en.wikipedia.org/wiki/Byte_order_mark>. |
|
1777
|
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
If a file has a BOM, the easiest way to deal with that is |
|
1779
|
|
|
|
|
|
|
|
|
1780
|
|
|
|
|
|
|
my $aoh = csv (in => $file, detect_bom => 1); |
|
1781
|
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
All records will be encoded based on the detected BOM. |
|
1783
|
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
This implies a call to the L</header> method, which defaults to also set |
|
1785
|
|
|
|
|
|
|
the L</column_names>. So this is B<not> the same as |
|
1786
|
|
|
|
|
|
|
|
|
1787
|
|
|
|
|
|
|
my $aoh = csv (in => $file, headers => "auto"); |
|
1788
|
|
|
|
|
|
|
|
|
1789
|
|
|
|
|
|
|
which only reads the first record to set L</column_names> but ignores any |
|
1790
|
|
|
|
|
|
|
meaning of possible present BOM. |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
=head1 SPECIFICATION |
|
1793
|
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
While no formal specification for CSV exists, L<RFC 4180|https://datatracker.ietf.org/doc/html/rfc4180> |
|
1795
|
|
|
|
|
|
|
(I<1>) describes the common format and establishes C<text/csv> as the MIME |
|
1796
|
|
|
|
|
|
|
type registered with the IANA. L<RFC 7111|https://datatracker.ietf.org/doc/html/rfc7111> |
|
1797
|
|
|
|
|
|
|
(I<2>) adds fragments to CSV. |
|
1798
|
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
Many informal documents exist that describe the C<CSV> format. L<"How To: |
|
1800
|
|
|
|
|
|
|
The Comma Separated Value (CSV) File Format"|http://creativyst.com/Doc/Articles/CSV/CSV01.shtml> |
|
1801
|
|
|
|
|
|
|
(I<3>) provides an overview of the C<CSV> format in the most widely used |
|
1802
|
|
|
|
|
|
|
applications and explains how it can best be used and supported. |
|
1803
|
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
1) https://datatracker.ietf.org/doc/html/rfc4180 |
|
1805
|
|
|
|
|
|
|
2) https://datatracker.ietf.org/doc/html/rfc7111 |
|
1806
|
|
|
|
|
|
|
3) http://creativyst.com/Doc/Articles/CSV/CSV01.shtml |
|
1807
|
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
The basic rules are as follows: |
|
1809
|
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
B<CSV> is a delimited data format that has fields/columns separated by the |
|
1811
|
|
|
|
|
|
|
comma character and records/rows separated by newlines. Fields that contain |
|
1812
|
|
|
|
|
|
|
a special character (comma, newline, or double quote), must be enclosed in |
|
1813
|
|
|
|
|
|
|
double quotes. However, if a line contains a single entry that is the empty |
|
1814
|
|
|
|
|
|
|
string, it may be enclosed in double quotes. If a field's value contains a |
|
1815
|
|
|
|
|
|
|
double quote character it is escaped by placing another double quote |
|
1816
|
|
|
|
|
|
|
character next to it. The C<CSV> file format does not require a specific |
|
1817
|
|
|
|
|
|
|
character encoding, byte order, or line terminator format. |
|
1818
|
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
=over 2 |
|
1820
|
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
=item * |
|
1822
|
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
Each record is a single line ended by a line feed (ASCII/C<LF>=C<0x0A>) or |
|
1824
|
|
|
|
|
|
|
a carriage return and line feed pair (ASCII/C<CRLF>=C<0x0D 0x0A>), however, |
|
1825
|
|
|
|
|
|
|
line-breaks may be embedded. |
|
1826
|
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
=item * |
|
1828
|
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
Fields are separated by commas. |
|
1830
|
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
=item * |
|
1832
|
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
Allowable characters within a C<CSV> field include C<0x09> (C<TAB>) and the |
|
1834
|
|
|
|
|
|
|
inclusive range of C<0x20> (space) through C<0x7E> (tilde). In binary mode |
|
1835
|
|
|
|
|
|
|
all characters are accepted, at least in quoted fields. |
|
1836
|
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
=item * |
|
1838
|
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
A field within C<CSV> must be surrounded by double-quotes to contain a |
|
1840
|
|
|
|
|
|
|
separator character (comma). |
|
1841
|
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
=back |
|
1843
|
|
|
|
|
|
|
|
|
1844
|
|
|
|
|
|
|
Though this is the most clear and restrictive definition, Text::CSV_XS is |
|
1845
|
|
|
|
|
|
|
way more liberal than this, and allows extension: |
|
1846
|
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
=over 2 |
|
1848
|
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
=item * |
|
1850
|
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
Line termination by a single carriage return is accepted by default |
|
1852
|
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
=item * |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
The separation-, quote-, and escape character(s) can be any ASCII character |
|
1856
|
|
|
|
|
|
|
in the range from C<0x20> (space) to C<0x7E> (tilde). Characters outside |
|
1857
|
|
|
|
|
|
|
this range may or may not work as expected. Multibyte characters, like UTF |
|
1858
|
|
|
|
|
|
|
C<U+060C> (ARABIC COMMA), C<U+FF0C> (FULLWIDTH COMMA), C<U+241B> (SYMBOL |
|
1859
|
|
|
|
|
|
|
FOR ESCAPE), C<U+2424> (SYMBOL FOR NEWLINE), C<U+FF02> (FULLWIDTH QUOTATION |
|
1860
|
|
|
|
|
|
|
MARK), and C<U+201C> (LEFT DOUBLE QUOTATION MARK) (to give some examples of |
|
1861
|
|
|
|
|
|
|
what might look promising) work for newer versions of perl for C<sep_char>, |
|
1862
|
|
|
|
|
|
|
and C<quote_char> but not for C<escape_char>. |
|
1863
|
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
If you use perl-5.8.2 or higher these three attributes are utf8-decoded, to |
|
1865
|
|
|
|
|
|
|
increase the likelihood of success. This way C<U+00FE> will be allowed as a |
|
1866
|
|
|
|
|
|
|
quote character. |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
=item * |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
A field in C<CSV> must be surrounded by double-quotes to make an embedded |
|
1871
|
|
|
|
|
|
|
double-quote, represented by a pair of consecutive double-quotes, valid. In |
|
1872
|
|
|
|
|
|
|
binary mode you may additionally use the sequence C<"0> for representation |
|
1873
|
|
|
|
|
|
|
of a NULL byte. Using C<0x00> in binary mode is just as valid. |
|
1874
|
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
=item * |
|
1876
|
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
Several violations of the above specification may be lifted by passing some |
|
1878
|
|
|
|
|
|
|
options as attributes to the object constructor. |
|
1879
|
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=back |
|
1881
|
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
=head1 METHODS |
|
1883
|
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
=head2 version |
|
1885
|
|
|
|
|
|
|
X<version> |
|
1886
|
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
(Class method) Returns the current module version. |
|
1888
|
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
=head2 new |
|
1890
|
|
|
|
|
|
|
X<new> |
|
1891
|
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
(Class method) Returns a new instance of class Text::CSV_XS. The attributes |
|
1893
|
|
|
|
|
|
|
are described by the (optional) hash ref C<\%attr>. |
|
1894
|
|
|
|
|
|
|
|
|
1895
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ attributes ... }); |
|
1896
|
|
|
|
|
|
|
|
|
1897
|
|
|
|
|
|
|
The following attributes are available: |
|
1898
|
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
=head3 eol |
|
1900
|
|
|
|
|
|
|
X<eol> |
|
1901
|
|
|
|
|
|
|
|
|
1902
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ eol => $/ }); |
|
1903
|
|
|
|
|
|
|
$csv->eol (undef); |
|
1904
|
|
|
|
|
|
|
my $eol = $csv->eol; |
|
1905
|
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
The end-of-line string to add to rows for L</print> or the record separator |
|
1907
|
|
|
|
|
|
|
for L</getline>. |
|
1908
|
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
When not passed in a B<parser> instance, the default behavior is to accept |
|
1910
|
|
|
|
|
|
|
C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C<eol> at |
|
1911
|
|
|
|
|
|
|
all. Passing C<undef> or the empty string behave the same. |
|
1912
|
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
When not passed in a B<generating> instance, records are not terminated at |
|
1914
|
|
|
|
|
|
|
all, so it is probably wise to pass something you expect. A safe choice for |
|
1915
|
|
|
|
|
|
|
C<eol> on output is either C<$/> or C<\r\n>. |
|
1916
|
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
Common values for C<eol> are C<"\012"> (C<\n> or Line Feed), C<"\015\012"> |
|
1918
|
|
|
|
|
|
|
(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage |
|
1919
|
|
|
|
|
|
|
Return). The L<C<eol>|/eol> attribute cannot exceed 7 (ASCII) characters. |
|
1920
|
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
If both C<$/> and L<C<eol>|/eol> equal C<"\015">, parsing lines that end on |
|
1922
|
|
|
|
|
|
|
only a Carriage Return without Line Feed, will be L</parse>d correct. |
|
1923
|
|
|
|
|
|
|
|
|
1924
|
|
|
|
|
|
|
=head3 eol_type |
|
1925
|
|
|
|
|
|
|
X<eol_type> |
|
1926
|
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
my $eol = $csv->eol_type; |
|
1928
|
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
This read-only method returns the internal state of what is considered the |
|
1930
|
|
|
|
|
|
|
valid EOL for parsing. |
|
1931
|
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
=head3 sep_char |
|
1933
|
|
|
|
|
|
|
X<sep_char> |
|
1934
|
|
|
|
|
|
|
|
|
1935
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ sep_char => ";" }); |
|
1936
|
|
|
|
|
|
|
$csv->sep_char (";"); |
|
1937
|
|
|
|
|
|
|
my $c = $csv->sep_char; |
|
1938
|
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
The char used to separate fields, by default a comma. (C<,>). Limited to a |
|
1940
|
|
|
|
|
|
|
single-byte character, usually in the range from C<0x20> (space) to C<0x7E> |
|
1941
|
|
|
|
|
|
|
(tilde). When longer sequences are required, use L<C<sep>|/sep>. |
|
1942
|
|
|
|
|
|
|
|
|
1943
|
|
|
|
|
|
|
The separation character can not be equal to the quote character or to the |
|
1944
|
|
|
|
|
|
|
escape character. |
|
1945
|
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
See also L</CAVEATS> |
|
1947
|
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
=head3 sep |
|
1949
|
|
|
|
|
|
|
X<sep> |
|
1950
|
|
|
|
|
|
|
|
|
1951
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ sep => "\N{FULLWIDTH COMMA}" }); |
|
1952
|
|
|
|
|
|
|
$csv->sep (";"); |
|
1953
|
|
|
|
|
|
|
my $sep = $csv->sep; |
|
1954
|
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
The chars used to separate fields, by default undefined. Limited to 8 bytes. |
|
1956
|
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
When set, overrules L<C<sep_char>|/sep_char>. If its length is one byte it |
|
1958
|
|
|
|
|
|
|
acts as an alias to L<C<sep_char>|/sep_char>. |
|
1959
|
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
See also L</CAVEATS> |
|
1961
|
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
=head3 quote_char |
|
1963
|
|
|
|
|
|
|
X<quote_char> |
|
1964
|
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ quote_char => "'" }); |
|
1966
|
|
|
|
|
|
|
$csv->quote_char (undef); |
|
1967
|
|
|
|
|
|
|
my $c = $csv->quote_char; |
|
1968
|
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
The character to quote fields containing blanks or binary data, by default |
|
1970
|
|
|
|
|
|
|
the double quote character (C<">). A value of undef suppresses quote chars |
|
1971
|
|
|
|
|
|
|
(for simple cases only). Limited to a single-byte character, usually in the |
|
1972
|
|
|
|
|
|
|
range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are |
|
1973
|
|
|
|
|
|
|
required, use L<C<quote>|/quote>. |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
C<quote_char> can not be equal to L<C<sep_char>|/sep_char>. |
|
1976
|
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
=head3 quote |
|
1978
|
|
|
|
|
|
|
X<quote> |
|
1979
|
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" }); |
|
1981
|
|
|
|
|
|
|
$csv->quote ("'"); |
|
1982
|
|
|
|
|
|
|
my $quote = $csv->quote; |
|
1983
|
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
The chars used to quote fields, by default undefined. Limited to 8 bytes. |
|
1985
|
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
When set, overrules L<C<quote_char>|/quote_char>. If its length is one byte |
|
1987
|
|
|
|
|
|
|
it acts as an alias to L<C<quote_char>|/quote_char>. |
|
1988
|
|
|
|
|
|
|
|
|
1989
|
|
|
|
|
|
|
This method does not support C<undef>. Use L<C<quote_char>|/quote_char> to |
|
1990
|
|
|
|
|
|
|
disable quotation. |
|
1991
|
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
See also L</CAVEATS> |
|
1993
|
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
=head3 escape_char |
|
1995
|
|
|
|
|
|
|
X<escape_char> |
|
1996
|
|
|
|
|
|
|
|
|
1997
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ escape_char => "\\" }); |
|
1998
|
|
|
|
|
|
|
$csv->escape_char (":"); |
|
1999
|
|
|
|
|
|
|
my $c = $csv->escape_char; |
|
2000
|
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
The character to escape certain characters inside quoted fields. This is |
|
2002
|
|
|
|
|
|
|
limited to a single-byte character, usually in the range from C<0x20> |
|
2003
|
|
|
|
|
|
|
(space) to C<0x7E> (tilde). |
|
2004
|
|
|
|
|
|
|
|
|
2005
|
|
|
|
|
|
|
The C<escape_char> defaults to being the double-quote mark (C<">). In other |
|
2006
|
|
|
|
|
|
|
words the same as the default L<C<quote_char>|/quote_char>. This means that |
|
2007
|
|
|
|
|
|
|
doubling the quote mark in a field escapes it: |
|
2008
|
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
"foo","bar","Escape ""quote mark"" with two ""quote marks""","baz" |
|
2010
|
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
If you change the L<C<quote_char>|/quote_char> without changing the |
|
2012
|
|
|
|
|
|
|
C<escape_char>, the C<escape_char> will still be the double-quote (C<">). |
|
2013
|
|
|
|
|
|
|
If instead you want to escape the L<C<quote_char>|/quote_char> by doubling |
|
2014
|
|
|
|
|
|
|
it you will need to also change the C<escape_char> to be the same as what |
|
2015
|
|
|
|
|
|
|
you have changed the L<C<quote_char>|/quote_char> to. |
|
2016
|
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
Setting C<escape_char> to C<undef> or C<""> will completely disable escapes |
|
2018
|
|
|
|
|
|
|
and is greatly discouraged. This will also disable C<escape_null>. |
|
2019
|
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
The escape character can not be equal to the separation character. |
|
2021
|
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
=head3 binary |
|
2023
|
|
|
|
|
|
|
X<binary> |
|
2024
|
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1 }); |
|
2026
|
|
|
|
|
|
|
$csv->binary (0); |
|
2027
|
|
|
|
|
|
|
my $f = $csv->binary; |
|
2028
|
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
If this attribute is C<1>, you may use binary characters in quoted fields, |
|
2030
|
|
|
|
|
|
|
including line feeds, carriage returns and C<NULL> bytes. (The latter could |
|
2031
|
|
|
|
|
|
|
be escaped as C<"0>.) By default this feature is off. |
|
2032
|
|
|
|
|
|
|
|
|
2033
|
|
|
|
|
|
|
If a string is marked UTF8, C<binary> will be turned on automatically when |
|
2034
|
|
|
|
|
|
|
binary characters other than C<CR> and C<NL> are encountered. Note that a |
|
2035
|
|
|
|
|
|
|
simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8, |
|
2036
|
|
|
|
|
|
|
so setting C<< { binary => 1 } >> is still a wise option. |
|
2037
|
|
|
|
|
|
|
|
|
2038
|
|
|
|
|
|
|
=head3 strict |
|
2039
|
|
|
|
|
|
|
X<strict> |
|
2040
|
|
|
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ strict => 1 }); |
|
2042
|
|
|
|
|
|
|
$csv->strict (0); |
|
2043
|
|
|
|
|
|
|
my $f = $csv->strict; |
|
2044
|
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
If this attribute is set to C<1>, any row that parses to a different number |
|
2046
|
|
|
|
|
|
|
of fields than the previous row will cause the parser to throw error 2014. |
|
2047
|
|
|
|
|
|
|
|
|
2048
|
|
|
|
|
|
|
Empty rows or rows that result in no fields (like comment lines) are exempt |
|
2049
|
|
|
|
|
|
|
from these checks. |
|
2050
|
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
=head3 strict_eol |
|
2052
|
|
|
|
|
|
|
X<strict_eol> |
|
2053
|
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ strict_eol => 1 }); |
|
2055
|
|
|
|
|
|
|
$csv->strict_eol (0); |
|
2056
|
|
|
|
|
|
|
my $f = $csv->strict_eol; |
|
2057
|
|
|
|
|
|
|
|
|
2058
|
|
|
|
|
|
|
If this attribute is set to C<0>, no EOL consistency checks are done. |
|
2059
|
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
If this attribute is set to C<1>, any row that parses with a EOL other than |
|
2061
|
|
|
|
|
|
|
the EOL from the first row will cause a warning. The error will be ignored |
|
2062
|
|
|
|
|
|
|
and parsing continues. This warning is only thrown once. Note that in data |
|
2063
|
|
|
|
|
|
|
with various different line endings, C<\r\r> will still throw an error that |
|
2064
|
|
|
|
|
|
|
cannot be ignored. |
|
2065
|
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
If this attribute is set to C<2> or higher, any row that parses with a EOL |
|
2067
|
|
|
|
|
|
|
other than the EOL from the first row will cause error C<2016> to be thrown. |
|
2068
|
|
|
|
|
|
|
The line being parsed to this error might not be stored in the result. |
|
2069
|
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
=head3 skip_empty_rows |
|
2071
|
|
|
|
|
|
|
X<skip_empty_rows> |
|
2072
|
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => 1 }); |
|
2074
|
|
|
|
|
|
|
$csv->skip_empty_rows ("eof"); |
|
2075
|
|
|
|
|
|
|
my $f = $csv->skip_empty_rows; |
|
2076
|
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
This attribute defines the behavior for empty rows: an L</eol> immediately |
|
2078
|
|
|
|
|
|
|
following the start of line. Default behavior is to return one single empty |
|
2079
|
|
|
|
|
|
|
field. |
|
2080
|
|
|
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
This attribute is only used in parsing. This attribute is ineffective when |
|
2082
|
|
|
|
|
|
|
using L</parse> and L</fields>. |
|
2083
|
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
Possible values for this attribute are |
|
2085
|
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
=over 2 |
|
2087
|
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
=item 0 | undef |
|
2089
|
|
|
|
|
|
|
|
|
2090
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => 0 }); |
|
2091
|
|
|
|
|
|
|
$csv->skip_empty_rows (undef); |
|
2092
|
|
|
|
|
|
|
|
|
2093
|
|
|
|
|
|
|
No special action is taken. The result will be one single empty field. |
|
2094
|
|
|
|
|
|
|
|
|
2095
|
|
|
|
|
|
|
=item 1 | "skip" |
|
2096
|
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => 1 }); |
|
2098
|
|
|
|
|
|
|
$csv->skip_empty_rows ("skip"); |
|
2099
|
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
The row will be skipped. |
|
2101
|
|
|
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
=item 2 | "eof" | "stop" |
|
2103
|
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => 2 }); |
|
2105
|
|
|
|
|
|
|
$csv->skip_empty_rows ("eof"); |
|
2106
|
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
The parsing will stop as if an L</eof> was detected. |
|
2108
|
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
=item 3 | "die" |
|
2110
|
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => 3 }); |
|
2112
|
|
|
|
|
|
|
$csv->skip_empty_rows ("die"); |
|
2113
|
|
|
|
|
|
|
|
|
2114
|
|
|
|
|
|
|
The parsing will stop. The internal error code will be set to 2015 and the |
|
2115
|
|
|
|
|
|
|
parser will C<die>. |
|
2116
|
|
|
|
|
|
|
|
|
2117
|
|
|
|
|
|
|
=item 4 | "croak" |
|
2118
|
|
|
|
|
|
|
|
|
2119
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => 4 }); |
|
2120
|
|
|
|
|
|
|
$csv->skip_empty_rows ("croak"); |
|
2121
|
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
The parsing will stop. The internal error code will be set to 2015 and the |
|
2123
|
|
|
|
|
|
|
parser will C<croak>. |
|
2124
|
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
=item 5 | "error" |
|
2126
|
|
|
|
|
|
|
|
|
2127
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => 5 }); |
|
2128
|
|
|
|
|
|
|
$csv->skip_empty_rows ("error"); |
|
2129
|
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
The parsing will fail. The internal error code will be set to 2015. |
|
2131
|
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
=item callback |
|
2133
|
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ skip_empty_rows => sub { [] } }); |
|
2135
|
|
|
|
|
|
|
$csv->skip_empty_rows (sub { [ 42, $., undef, "empty" ] }); |
|
2136
|
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
The callback is invoked and its result used instead. If you want the parse |
|
2138
|
|
|
|
|
|
|
to stop after the callback, make sure to return a false value. |
|
2139
|
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
The returned value from the callback should be an array-ref. Any other type |
|
2141
|
|
|
|
|
|
|
will cause the parse to stop, so these are equivalent in behavior: |
|
2142
|
|
|
|
|
|
|
|
|
2143
|
|
|
|
|
|
|
csv (in => $fh, skip_empty_rows => "stop"); |
|
2144
|
|
|
|
|
|
|
csv (in => $fh. skip_empty_rows => sub { 0; }); |
|
2145
|
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
=back |
|
2147
|
|
|
|
|
|
|
|
|
2148
|
|
|
|
|
|
|
Without arguments, the current value is returned: C<0>, C<1>, C<eof>, C<die>, |
|
2149
|
|
|
|
|
|
|
C<croak> or the callback. |
|
2150
|
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
=head3 formula_handling |
|
2152
|
|
|
|
|
|
|
X<formula_handling> |
|
2153
|
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
Alias for L</formula> |
|
2155
|
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
=head3 formula |
|
2157
|
|
|
|
|
|
|
X<formula> |
|
2158
|
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ formula => "none" }); |
|
2160
|
|
|
|
|
|
|
$csv->formula ("none"); |
|
2161
|
|
|
|
|
|
|
my $f = $csv->formula; |
|
2162
|
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
This defines the behavior of fields containing I<formulas>. As formulas are |
|
2164
|
|
|
|
|
|
|
considered dangerous in spreadsheets, this attribute can define an optional |
|
2165
|
|
|
|
|
|
|
action to be taken if a field starts with an equal sign (C<=>). |
|
2166
|
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
For purpose of code-readability, this can also be written as |
|
2168
|
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ formula_handling => "none" }); |
|
2170
|
|
|
|
|
|
|
$csv->formula_handling ("none"); |
|
2171
|
|
|
|
|
|
|
my $f = $csv->formula_handling; |
|
2172
|
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
Possible values for this attribute are |
|
2174
|
|
|
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
=over 2 |
|
2176
|
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
=item none |
|
2178
|
|
|
|
|
|
|
|
|
2179
|
|
|
|
|
|
|
Take no specific action. This is the default. |
|
2180
|
|
|
|
|
|
|
|
|
2181
|
|
|
|
|
|
|
$csv->formula ("none"); |
|
2182
|
|
|
|
|
|
|
|
|
2183
|
|
|
|
|
|
|
=item die |
|
2184
|
|
|
|
|
|
|
|
|
2185
|
|
|
|
|
|
|
Cause the process to C<die> whenever a leading C<=> is encountered. |
|
2186
|
|
|
|
|
|
|
|
|
2187
|
|
|
|
|
|
|
$csv->formula ("die"); |
|
2188
|
|
|
|
|
|
|
|
|
2189
|
|
|
|
|
|
|
=item croak |
|
2190
|
|
|
|
|
|
|
|
|
2191
|
|
|
|
|
|
|
Cause the process to C<croak> whenever a leading C<=> is encountered. (See |
|
2192
|
|
|
|
|
|
|
L<Carp>) |
|
2193
|
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
$csv->formula ("croak"); |
|
2195
|
|
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
=item diag |
|
2197
|
|
|
|
|
|
|
|
|
2198
|
|
|
|
|
|
|
Report position and content of the field whenever a leading C<=> is found. |
|
2199
|
|
|
|
|
|
|
The value of the field is unchanged. |
|
2200
|
|
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
$csv->formula ("diag"); |
|
2202
|
|
|
|
|
|
|
|
|
2203
|
|
|
|
|
|
|
=item empty |
|
2204
|
|
|
|
|
|
|
|
|
2205
|
|
|
|
|
|
|
Replace the content of fields that start with a C<=> with the empty string. |
|
2206
|
|
|
|
|
|
|
|
|
2207
|
|
|
|
|
|
|
$csv->formula ("empty"); |
|
2208
|
|
|
|
|
|
|
$csv->formula (""); |
|
2209
|
|
|
|
|
|
|
|
|
2210
|
|
|
|
|
|
|
=item undef |
|
2211
|
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
Replace the content of fields that start with a C<=> with C<undef>. |
|
2213
|
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
$csv->formula ("undef"); |
|
2215
|
|
|
|
|
|
|
$csv->formula (undef); |
|
2216
|
|
|
|
|
|
|
|
|
2217
|
|
|
|
|
|
|
=item a callback |
|
2218
|
|
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
Modify the content of fields that start with a C<=> with the return-value |
|
2220
|
|
|
|
|
|
|
of the callback. The original content of the field is available inside the |
|
2221
|
|
|
|
|
|
|
callback as C<$_>; |
|
2222
|
|
|
|
|
|
|
|
|
2223
|
|
|
|
|
|
|
# Replace all formula's with 42 |
|
2224
|
|
|
|
|
|
|
$csv->formula (sub { 42; }); |
|
2225
|
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
# same as $csv->formula ("empty") but slower |
|
2227
|
|
|
|
|
|
|
$csv->formula (sub { "" }); |
|
2228
|
|
|
|
|
|
|
|
|
2229
|
|
|
|
|
|
|
# Allow =4+12 |
|
2230
|
|
|
|
|
|
|
$csv->formula (sub { s/^=(\d+\+\d+)$/$1/eer }); |
|
2231
|
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
# Allow more complex calculations |
|
2233
|
|
|
|
|
|
|
$csv->formula (sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ }); |
|
2234
|
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
=back |
|
2236
|
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
All other values will give a warning and then fallback to C<diag>. |
|
2238
|
|
|
|
|
|
|
|
|
2239
|
|
|
|
|
|
|
=head3 decode_utf8 |
|
2240
|
|
|
|
|
|
|
X<decode_utf8> |
|
2241
|
|
|
|
|
|
|
|
|
2242
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ decode_utf8 => 1 }); |
|
2243
|
|
|
|
|
|
|
$csv->decode_utf8 (0); |
|
2244
|
|
|
|
|
|
|
my $f = $csv->decode_utf8; |
|
2245
|
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
This attributes defaults to TRUE. |
|
2247
|
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
While I<parsing>, fields that are valid UTF-8, are automatically set to be |
|
2249
|
|
|
|
|
|
|
UTF-8, so that |
|
2250
|
|
|
|
|
|
|
|
|
2251
|
|
|
|
|
|
|
$csv->parse ("\xC4\xA8\n"); |
|
2252
|
|
|
|
|
|
|
|
|
2253
|
|
|
|
|
|
|
results in |
|
2254
|
|
|
|
|
|
|
|
|
2255
|
|
|
|
|
|
|
PV("\304\250"\0) [UTF8 "\x{128}"] |
|
2256
|
|
|
|
|
|
|
|
|
2257
|
|
|
|
|
|
|
Sometimes it might not be a desired action. To prevent those upgrades, set |
|
2258
|
|
|
|
|
|
|
this attribute to false, and the result will be |
|
2259
|
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
PV("\304\250"\0) |
|
2261
|
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
=head3 auto_diag |
|
2263
|
|
|
|
|
|
|
X<auto_diag> |
|
2264
|
|
|
|
|
|
|
|
|
2265
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ auto_diag => 1 }); |
|
2266
|
|
|
|
|
|
|
$csv->auto_diag (2); |
|
2267
|
|
|
|
|
|
|
my $l = $csv->auto_diag; |
|
2268
|
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
Set this attribute to a number between C<1> and C<9> causes L</error_diag> |
|
2270
|
|
|
|
|
|
|
to be automatically called in void context upon errors. |
|
2271
|
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
In case of error C<2012 - EOF>, this call will be void. |
|
2273
|
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
If C<auto_diag> is set to a numeric value greater than C<1>, it will C<die> |
|
2275
|
|
|
|
|
|
|
on errors instead of C<warn>. If set to anything unrecognized, it will be |
|
2276
|
|
|
|
|
|
|
silently ignored. |
|
2277
|
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
Future extensions to this feature will include more reliable auto-detection |
|
2279
|
|
|
|
|
|
|
of C<autodie> being active in the scope of which the error occurred which |
|
2280
|
|
|
|
|
|
|
will increment the value of C<auto_diag> with C<1> the moment the error is |
|
2281
|
|
|
|
|
|
|
detected. |
|
2282
|
|
|
|
|
|
|
|
|
2283
|
|
|
|
|
|
|
=head3 diag_verbose |
|
2284
|
|
|
|
|
|
|
X<diag_verbose> |
|
2285
|
|
|
|
|
|
|
|
|
2286
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ diag_verbose => 1 }); |
|
2287
|
|
|
|
|
|
|
$csv->diag_verbose (2); |
|
2288
|
|
|
|
|
|
|
my $l = $csv->diag_verbose; |
|
2289
|
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
Set the verbosity of the output triggered by C<auto_diag>. Currently only |
|
2291
|
|
|
|
|
|
|
adds the current input-record-number (if known) to the diagnostic output |
|
2292
|
|
|
|
|
|
|
with an indication of the position of the error. |
|
2293
|
|
|
|
|
|
|
|
|
2294
|
|
|
|
|
|
|
=head3 blank_is_undef |
|
2295
|
|
|
|
|
|
|
X<blank_is_undef> |
|
2296
|
|
|
|
|
|
|
|
|
2297
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ blank_is_undef => 1 }); |
|
2298
|
|
|
|
|
|
|
$csv->blank_is_undef (0); |
|
2299
|
|
|
|
|
|
|
my $f = $csv->blank_is_undef; |
|
2300
|
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
Under normal circumstances, C<CSV> data makes no distinction between quoted- |
|
2302
|
|
|
|
|
|
|
and unquoted empty fields. These both end up in an empty string field once |
|
2303
|
|
|
|
|
|
|
read, thus |
|
2304
|
|
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
1,"",," ",2 |
|
2306
|
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
is read as |
|
2308
|
|
|
|
|
|
|
|
|
2309
|
|
|
|
|
|
|
("1", "", "", " ", "2") |
|
2310
|
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
When I<writing> C<CSV> files with either L<C<always_quote>|/always_quote> |
|
2312
|
|
|
|
|
|
|
or L<C<quote_empty>|/quote_empty> set, the unquoted I<empty> field is the |
|
2313
|
|
|
|
|
|
|
result of an undefined value. To enable this distinction when I<reading> |
|
2314
|
|
|
|
|
|
|
C<CSV> data, the C<blank_is_undef> attribute will cause unquoted empty |
|
2315
|
|
|
|
|
|
|
fields to be set to C<undef>, causing the above to be parsed as |
|
2316
|
|
|
|
|
|
|
|
|
2317
|
|
|
|
|
|
|
("1", "", undef, " ", "2") |
|
2318
|
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
Note that this is specifically important when loading C<CSV> fields into a |
|
2320
|
|
|
|
|
|
|
database that allows C<NULL> values, as the perl equivalent for C<NULL> is |
|
2321
|
|
|
|
|
|
|
C<undef> in L<DBI> land. |
|
2322
|
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
=head3 empty_is_undef |
|
2324
|
|
|
|
|
|
|
X<empty_is_undef> |
|
2325
|
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ empty_is_undef => 1 }); |
|
2327
|
|
|
|
|
|
|
$csv->empty_is_undef (0); |
|
2328
|
|
|
|
|
|
|
my $f = $csv->empty_is_undef; |
|
2329
|
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
Going one step further than L<C<blank_is_undef>|/blank_is_undef>, this |
|
2331
|
|
|
|
|
|
|
attribute converts all empty fields to C<undef>, so |
|
2332
|
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
1,"",," ",2 |
|
2334
|
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
is read as |
|
2336
|
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
(1, undef, undef, " ", 2) |
|
2338
|
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
Note that this affects only fields that are originally empty, not fields |
|
2340
|
|
|
|
|
|
|
that are empty after stripping allowed whitespace. YMMV. |
|
2341
|
|
|
|
|
|
|
|
|
2342
|
|
|
|
|
|
|
=head3 allow_whitespace |
|
2343
|
|
|
|
|
|
|
X<allow_whitespace> |
|
2344
|
|
|
|
|
|
|
|
|
2345
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ allow_whitespace => 1 }); |
|
2346
|
|
|
|
|
|
|
$csv->allow_whitespace (0); |
|
2347
|
|
|
|
|
|
|
my $f = $csv->allow_whitespace; |
|
2348
|
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
When this option is set to true, the whitespace (C<TAB>'s and C<SPACE>'s) |
|
2350
|
|
|
|
|
|
|
surrounding the separation character is removed when parsing. If either |
|
2351
|
|
|
|
|
|
|
C<TAB> or C<SPACE> is one of the three characters L<C<sep_char>|/sep_char>, |
|
2352
|
|
|
|
|
|
|
L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> it will not |
|
2353
|
|
|
|
|
|
|
be considered whitespace. |
|
2354
|
|
|
|
|
|
|
|
|
2355
|
|
|
|
|
|
|
Now lines like: |
|
2356
|
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
1 , "foo" , bar , 3 , zapp |
|
2358
|
|
|
|
|
|
|
|
|
2359
|
|
|
|
|
|
|
are parsed as valid C<CSV>, even though it violates the C<CSV> specs. |
|
2360
|
|
|
|
|
|
|
|
|
2361
|
|
|
|
|
|
|
Note that B<all> whitespace is stripped from both start and end of each |
|
2362
|
|
|
|
|
|
|
field. That would make it I<more> than a I<feature> to enable parsing bad |
|
2363
|
|
|
|
|
|
|
C<CSV> lines, as |
|
2364
|
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
1, 2.0, 3, ape , monkey |
|
2366
|
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
will now be parsed as |
|
2368
|
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
("1", "2.0", "3", "ape", "monkey") |
|
2370
|
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
even if the original line was perfectly acceptable C<CSV>. |
|
2372
|
|
|
|
|
|
|
|
|
2373
|
|
|
|
|
|
|
=head3 allow_loose_quotes |
|
2374
|
|
|
|
|
|
|
X<allow_loose_quotes> |
|
2375
|
|
|
|
|
|
|
|
|
2376
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ allow_loose_quotes => 1 }); |
|
2377
|
|
|
|
|
|
|
$csv->allow_loose_quotes (0); |
|
2378
|
|
|
|
|
|
|
my $f = $csv->allow_loose_quotes; |
|
2379
|
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
By default, parsing unquoted fields containing L<C<quote_char>|/quote_char> |
|
2381
|
|
|
|
|
|
|
characters like |
|
2382
|
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
1,foo "bar" baz,42 |
|
2384
|
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
would result in parse error 2034. Though it is still bad practice to allow |
|
2386
|
|
|
|
|
|
|
this format, we cannot help the fact that some vendors make their |
|
2387
|
|
|
|
|
|
|
applications spit out lines styled this way. |
|
2388
|
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
If there is B<really> bad C<CSV> data, like |
|
2390
|
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
1,"foo "bar" baz",42 |
|
2392
|
|
|
|
|
|
|
|
|
2393
|
|
|
|
|
|
|
or |
|
2394
|
|
|
|
|
|
|
|
|
2395
|
|
|
|
|
|
|
1,""foo bar baz"",42 |
|
2396
|
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
there is a way to get this data-line parsed and leave the quotes inside the |
|
2398
|
|
|
|
|
|
|
quoted field as-is. This can be achieved by setting C<allow_loose_quotes> |
|
2399
|
|
|
|
|
|
|
B<AND> making sure that the L<C<escape_char>|/escape_char> is I<not> equal |
|
2400
|
|
|
|
|
|
|
to L<C<quote_char>|/quote_char>. |
|
2401
|
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
=head3 allow_loose_escapes |
|
2403
|
|
|
|
|
|
|
X<allow_loose_escapes> |
|
2404
|
|
|
|
|
|
|
|
|
2405
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ allow_loose_escapes => 1 }); |
|
2406
|
|
|
|
|
|
|
$csv->allow_loose_escapes (0); |
|
2407
|
|
|
|
|
|
|
my $f = $csv->allow_loose_escapes; |
|
2408
|
|
|
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
Parsing fields that have L<C<escape_char>|/escape_char> characters that |
|
2410
|
|
|
|
|
|
|
escape characters that do not need to be escaped, like: |
|
2411
|
|
|
|
|
|
|
|
|
2412
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ escape_char => "\\" }); |
|
2413
|
|
|
|
|
|
|
$csv->parse (qq{1,"my bar\'s",baz,42}); |
|
2414
|
|
|
|
|
|
|
|
|
2415
|
|
|
|
|
|
|
would result in parse error 2025. Though it is bad practice to allow this |
|
2416
|
|
|
|
|
|
|
format, this attribute enables you to treat all escape character sequences |
|
2417
|
|
|
|
|
|
|
equal. |
|
2418
|
|
|
|
|
|
|
|
|
2419
|
|
|
|
|
|
|
=head3 allow_unquoted_escape |
|
2420
|
|
|
|
|
|
|
X<allow_unquoted_escape> |
|
2421
|
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ allow_unquoted_escape => 1 }); |
|
2423
|
|
|
|
|
|
|
$csv->allow_unquoted_escape (0); |
|
2424
|
|
|
|
|
|
|
my $f = $csv->allow_unquoted_escape; |
|
2425
|
|
|
|
|
|
|
|
|
2426
|
|
|
|
|
|
|
A backward compatibility issue where L<C<escape_char>|/escape_char> differs |
|
2427
|
|
|
|
|
|
|
from L<C<quote_char>|/quote_char> prevents L<C<escape_char>|/escape_char> |
|
2428
|
|
|
|
|
|
|
to be in the first position of a field. If L<C<quote_char>|/quote_char> is |
|
2429
|
|
|
|
|
|
|
equal to the default C<"> and L<C<escape_char>|/escape_char> is set to C<\>, |
|
2430
|
|
|
|
|
|
|
this would be illegal: |
|
2431
|
|
|
|
|
|
|
|
|
2432
|
|
|
|
|
|
|
1,\0,2 |
|
2433
|
|
|
|
|
|
|
|
|
2434
|
|
|
|
|
|
|
Setting this attribute to C<1> might help to overcome issues with backward |
|
2435
|
|
|
|
|
|
|
compatibility and allow this style. |
|
2436
|
|
|
|
|
|
|
|
|
2437
|
|
|
|
|
|
|
=head3 always_quote |
|
2438
|
|
|
|
|
|
|
X<always_quote> |
|
2439
|
|
|
|
|
|
|
|
|
2440
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ always_quote => 1 }); |
|
2441
|
|
|
|
|
|
|
$csv->always_quote (0); |
|
2442
|
|
|
|
|
|
|
my $f = $csv->always_quote; |
|
2443
|
|
|
|
|
|
|
|
|
2444
|
|
|
|
|
|
|
By default the generated fields are quoted only if they I<need> to be. For |
|
2445
|
|
|
|
|
|
|
example, if they contain the separator character. If you set this attribute |
|
2446
|
|
|
|
|
|
|
to C<1> then I<all> defined fields will be quoted. (C<undef> fields are not |
|
2447
|
|
|
|
|
|
|
quoted, see L</blank_is_undef>). This makes it quite often easier to handle |
|
2448
|
|
|
|
|
|
|
exported data in external applications. (Poor creatures who are better to |
|
2449
|
|
|
|
|
|
|
use Text::CSV_XS. :) |
|
2450
|
|
|
|
|
|
|
|
|
2451
|
|
|
|
|
|
|
=head3 quote_space |
|
2452
|
|
|
|
|
|
|
X<quote_space> |
|
2453
|
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ quote_space => 1 }); |
|
2455
|
|
|
|
|
|
|
$csv->quote_space (0); |
|
2456
|
|
|
|
|
|
|
my $f = $csv->quote_space; |
|
2457
|
|
|
|
|
|
|
|
|
2458
|
|
|
|
|
|
|
By default, a space in a field would trigger quotation. As no rule exists |
|
2459
|
|
|
|
|
|
|
this to be forced in C<CSV>, nor any for the opposite, the default is true |
|
2460
|
|
|
|
|
|
|
for safety. You can exclude the space from this trigger by setting this |
|
2461
|
|
|
|
|
|
|
attribute to 0. |
|
2462
|
|
|
|
|
|
|
|
|
2463
|
|
|
|
|
|
|
=head3 quote_empty |
|
2464
|
|
|
|
|
|
|
X<quote_empty> |
|
2465
|
|
|
|
|
|
|
|
|
2466
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ quote_empty => 1 }); |
|
2467
|
|
|
|
|
|
|
$csv->quote_empty (0); |
|
2468
|
|
|
|
|
|
|
my $f = $csv->quote_empty; |
|
2469
|
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
By default the generated fields are quoted only if they I<need> to be. An |
|
2471
|
|
|
|
|
|
|
empty (defined) field does not need quotation. If you set this attribute to |
|
2472
|
|
|
|
|
|
|
C<1> then I<empty> defined fields will be quoted. (C<undef> fields are not |
|
2473
|
|
|
|
|
|
|
quoted, see L</blank_is_undef>). See also L<C<always_quote>|/always_quote>. |
|
2474
|
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
=head3 quote_binary |
|
2476
|
|
|
|
|
|
|
X<quote_binary> |
|
2477
|
|
|
|
|
|
|
|
|
2478
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ quote_binary => 1 }); |
|
2479
|
|
|
|
|
|
|
$csv->quote_binary (0); |
|
2480
|
|
|
|
|
|
|
my $f = $csv->quote_binary; |
|
2481
|
|
|
|
|
|
|
|
|
2482
|
|
|
|
|
|
|
By default, all "unsafe" bytes inside a string cause the combined field to |
|
2483
|
|
|
|
|
|
|
be quoted. By setting this attribute to C<0>, you can disable that trigger |
|
2484
|
|
|
|
|
|
|
for bytes C<< >= 0x7F >>. |
|
2485
|
|
|
|
|
|
|
|
|
2486
|
|
|
|
|
|
|
=head3 escape_null |
|
2487
|
|
|
|
|
|
|
X<escape_null> |
|
2488
|
|
|
|
|
|
|
X<quote_null> |
|
2489
|
|
|
|
|
|
|
|
|
2490
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ escape_null => 1 }); |
|
2491
|
|
|
|
|
|
|
$csv->escape_null (0); |
|
2492
|
|
|
|
|
|
|
my $f = $csv->escape_null; |
|
2493
|
|
|
|
|
|
|
|
|
2494
|
|
|
|
|
|
|
By default, a C<NULL> byte in a field would be escaped. This option enables |
|
2495
|
|
|
|
|
|
|
you to treat the C<NULL> byte as a simple binary character in binary mode |
|
2496
|
|
|
|
|
|
|
(the C<< { binary => 1 } >> is set). The default is true. You can prevent |
|
2497
|
|
|
|
|
|
|
C<NULL> escapes by setting this attribute to C<0>. |
|
2498
|
|
|
|
|
|
|
|
|
2499
|
|
|
|
|
|
|
When the C<escape_char> attribute is set to undefined, this attribute will |
|
2500
|
|
|
|
|
|
|
be set to false. |
|
2501
|
|
|
|
|
|
|
|
|
2502
|
|
|
|
|
|
|
The default setting will encode "=\x00=" as |
|
2503
|
|
|
|
|
|
|
|
|
2504
|
|
|
|
|
|
|
"="0=" |
|
2505
|
|
|
|
|
|
|
|
|
2506
|
|
|
|
|
|
|
With C<escape_null> set, this will result in |
|
2507
|
|
|
|
|
|
|
|
|
2508
|
|
|
|
|
|
|
"=\x00=" |
|
2509
|
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
The default when using the C<csv> function is C<false>. |
|
2511
|
|
|
|
|
|
|
|
|
2512
|
|
|
|
|
|
|
For backward compatibility reasons, the deprecated old name C<quote_null> |
|
2513
|
|
|
|
|
|
|
is still recognized. |
|
2514
|
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
=head3 keep_meta_info |
|
2516
|
|
|
|
|
|
|
X<keep_meta_info> |
|
2517
|
|
|
|
|
|
|
|
|
2518
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ keep_meta_info => 1 }); |
|
2519
|
|
|
|
|
|
|
$csv->keep_meta_info (0); |
|
2520
|
|
|
|
|
|
|
my $f = $csv->keep_meta_info; |
|
2521
|
|
|
|
|
|
|
|
|
2522
|
|
|
|
|
|
|
By default, the parsing of input records is as simple and fast as possible. |
|
2523
|
|
|
|
|
|
|
However, some parsing information - like quotation of the original field - |
|
2524
|
|
|
|
|
|
|
is lost in that process. Setting this flag to true enables retrieving that |
|
2525
|
|
|
|
|
|
|
information after parsing with the methods L</meta_info>, L</is_quoted>, |
|
2526
|
|
|
|
|
|
|
and L</is_binary> described below. Default is false for performance. |
|
2527
|
|
|
|
|
|
|
|
|
2528
|
|
|
|
|
|
|
If you set this attribute to a value greater than 9, then you can control |
|
2529
|
|
|
|
|
|
|
output quotation style like it was used in the input of the the last parsed |
|
2530
|
|
|
|
|
|
|
record (unless quotation was added because of other reasons). |
|
2531
|
|
|
|
|
|
|
|
|
2532
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ |
|
2533
|
|
|
|
|
|
|
binary => 1, |
|
2534
|
|
|
|
|
|
|
keep_meta_info => 1, |
|
2535
|
|
|
|
|
|
|
quote_space => 0, |
|
2536
|
|
|
|
|
|
|
}); |
|
2537
|
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"}); |
|
2539
|
|
|
|
|
|
|
|
|
2540
|
|
|
|
|
|
|
$csv->print (*STDOUT, \@row); |
|
2541
|
|
|
|
|
|
|
# 1,,, , ,f,g,"h""h",help,help |
|
2542
|
|
|
|
|
|
|
$csv->keep_meta_info (11); |
|
2543
|
|
|
|
|
|
|
$csv->print (*STDOUT, \@row); |
|
2544
|
|
|
|
|
|
|
# 1,,"", ," ",f,"g","h""h",help,"help" |
|
2545
|
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
=head3 undef_str |
|
2547
|
|
|
|
|
|
|
X<undef_str> |
|
2548
|
|
|
|
|
|
|
|
|
2549
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ undef_str => "\\N" }); |
|
2550
|
|
|
|
|
|
|
$csv->undef_str (undef); |
|
2551
|
|
|
|
|
|
|
my $s = $csv->undef_str; |
|
2552
|
|
|
|
|
|
|
|
|
2553
|
|
|
|
|
|
|
This attribute optionally defines the output of undefined fields. The value |
|
2554
|
|
|
|
|
|
|
passed is not changed at all, so if it needs quotation, the quotation needs |
|
2555
|
|
|
|
|
|
|
to be included in the value of the attribute. Use with caution, as passing |
|
2556
|
|
|
|
|
|
|
a value like C<",",,,,"""> will for sure mess up your output. The default |
|
2557
|
|
|
|
|
|
|
for this attribute is C<undef>, meaning no special treatment. |
|
2558
|
|
|
|
|
|
|
|
|
2559
|
|
|
|
|
|
|
This attribute is useful when exporting CSV data to be imported in custom |
|
2560
|
|
|
|
|
|
|
loaders, like for MySQL, that recognize special sequences for C<NULL> data. |
|
2561
|
|
|
|
|
|
|
|
|
2562
|
|
|
|
|
|
|
This attribute has no meaning when parsing CSV data. |
|
2563
|
|
|
|
|
|
|
|
|
2564
|
|
|
|
|
|
|
=head3 comment_str |
|
2565
|
|
|
|
|
|
|
X<comment_str> |
|
2566
|
|
|
|
|
|
|
|
|
2567
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ comment_str => "#" }); |
|
2568
|
|
|
|
|
|
|
$csv->comment_str (undef); |
|
2569
|
|
|
|
|
|
|
my $s = $csv->comment_str; |
|
2570
|
|
|
|
|
|
|
|
|
2571
|
|
|
|
|
|
|
This attribute optionally defines a string to be recognized as comment. If |
|
2572
|
|
|
|
|
|
|
this attribute is defined, all lines starting with this sequence will not |
|
2573
|
|
|
|
|
|
|
be parsed as CSV but skipped as comment. |
|
2574
|
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
This attribute has no meaning when generating CSV. |
|
2576
|
|
|
|
|
|
|
|
|
2577
|
|
|
|
|
|
|
Comment strings that start with any of the special characters/sequences are |
|
2578
|
|
|
|
|
|
|
not supported (so it cannot start with any of L</sep_char>, L</quote_char>, |
|
2579
|
|
|
|
|
|
|
L</escape_char>, L</sep>, L</quote>, or L</eol>). |
|
2580
|
|
|
|
|
|
|
|
|
2581
|
|
|
|
|
|
|
For convenience, C<comment> is an alias for C<comment_str>. |
|
2582
|
|
|
|
|
|
|
|
|
2583
|
|
|
|
|
|
|
=head3 verbatim |
|
2584
|
|
|
|
|
|
|
X<verbatim> |
|
2585
|
|
|
|
|
|
|
|
|
2586
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ verbatim => 1 }); |
|
2587
|
|
|
|
|
|
|
$csv->verbatim (0); |
|
2588
|
|
|
|
|
|
|
my $f = $csv->verbatim; |
|
2589
|
|
|
|
|
|
|
|
|
2590
|
|
|
|
|
|
|
This is a quite controversial attribute to set, but makes some hard things |
|
2591
|
|
|
|
|
|
|
possible. |
|
2592
|
|
|
|
|
|
|
|
|
2593
|
|
|
|
|
|
|
The rationale behind this attribute is to tell the parser that the normally |
|
2594
|
|
|
|
|
|
|
special characters newline (C<NL>) and Carriage Return (C<CR>) will not be |
|
2595
|
|
|
|
|
|
|
special when this flag is set, and be dealt with as being ordinary binary |
|
2596
|
|
|
|
|
|
|
characters. This will ease working with data with embedded newlines. |
|
2597
|
|
|
|
|
|
|
|
|
2598
|
|
|
|
|
|
|
When C<verbatim> is used with L</getline>, L</getline> auto-C<chomp>'s |
|
2599
|
|
|
|
|
|
|
every line. |
|
2600
|
|
|
|
|
|
|
|
|
2601
|
|
|
|
|
|
|
Imagine a file format like |
|
2602
|
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n |
|
2604
|
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
where, the line ending is a very specific C<"#\r\n">, and the sep_char is a |
|
2606
|
|
|
|
|
|
|
C<^> (caret). None of the fields is quoted, but embedded binary data is |
|
2607
|
|
|
|
|
|
|
likely to be present. With the specific line ending, this should not be too |
|
2608
|
|
|
|
|
|
|
hard to detect. |
|
2609
|
|
|
|
|
|
|
|
|
2610
|
|
|
|
|
|
|
By default, Text::CSV_XS' parse function is instructed to only know about |
|
2611
|
|
|
|
|
|
|
C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the |
|
2612
|
|
|
|
|
|
|
embedded newline as a real C<end-of-line>, so it can scan the next line if |
|
2613
|
|
|
|
|
|
|
binary is true, and the newline is inside a quoted field. With this option, |
|
2614
|
|
|
|
|
|
|
we tell L</parse> to parse the line as if C<"\n"> is just nothing more than |
|
2615
|
|
|
|
|
|
|
a binary character. |
|
2616
|
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
For L</parse> this means that the parser has no more idea about line ending |
|
2618
|
|
|
|
|
|
|
and L</getline> C<chomp>s line endings on reading. |
|
2619
|
|
|
|
|
|
|
|
|
2620
|
|
|
|
|
|
|
=head3 types |
|
2621
|
|
|
|
|
|
|
|
|
2622
|
|
|
|
|
|
|
A set of column types; the attribute is immediately passed to the L</types> |
|
2623
|
|
|
|
|
|
|
method. |
|
2624
|
|
|
|
|
|
|
|
|
2625
|
|
|
|
|
|
|
=head3 callbacks |
|
2626
|
|
|
|
|
|
|
X<callbacks> |
|
2627
|
|
|
|
|
|
|
|
|
2628
|
|
|
|
|
|
|
See the L</Callbacks> section below. |
|
2629
|
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
=head3 accessors |
|
2631
|
|
|
|
|
|
|
|
|
2632
|
|
|
|
|
|
|
To sum it up, |
|
2633
|
|
|
|
|
|
|
|
|
2634
|
|
|
|
|
|
|
$csv = Text::CSV_XS->new (); |
|
2635
|
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
is equivalent to |
|
2637
|
|
|
|
|
|
|
|
|
2638
|
|
|
|
|
|
|
$csv = Text::CSV_XS->new ({ |
|
2639
|
|
|
|
|
|
|
eol => undef, # \r, \n, or \r\n |
|
2640
|
|
|
|
|
|
|
sep_char => ',', |
|
2641
|
|
|
|
|
|
|
sep => undef, |
|
2642
|
|
|
|
|
|
|
quote_char => '"', |
|
2643
|
|
|
|
|
|
|
quote => undef, |
|
2644
|
|
|
|
|
|
|
escape_char => '"', |
|
2645
|
|
|
|
|
|
|
binary => 0, |
|
2646
|
|
|
|
|
|
|
decode_utf8 => 1, |
|
2647
|
|
|
|
|
|
|
auto_diag => 0, |
|
2648
|
|
|
|
|
|
|
diag_verbose => 0, |
|
2649
|
|
|
|
|
|
|
blank_is_undef => 0, |
|
2650
|
|
|
|
|
|
|
empty_is_undef => 0, |
|
2651
|
|
|
|
|
|
|
allow_whitespace => 0, |
|
2652
|
|
|
|
|
|
|
allow_loose_quotes => 0, |
|
2653
|
|
|
|
|
|
|
allow_loose_escapes => 0, |
|
2654
|
|
|
|
|
|
|
allow_unquoted_escape => 0, |
|
2655
|
|
|
|
|
|
|
always_quote => 0, |
|
2656
|
|
|
|
|
|
|
quote_empty => 0, |
|
2657
|
|
|
|
|
|
|
quote_space => 1, |
|
2658
|
|
|
|
|
|
|
escape_null => 1, |
|
2659
|
|
|
|
|
|
|
quote_binary => 1, |
|
2660
|
|
|
|
|
|
|
keep_meta_info => 0, |
|
2661
|
|
|
|
|
|
|
strict => 0, |
|
2662
|
|
|
|
|
|
|
skip_empty_rows => 0, |
|
2663
|
|
|
|
|
|
|
formula => 0, |
|
2664
|
|
|
|
|
|
|
verbatim => 0, |
|
2665
|
|
|
|
|
|
|
undef_str => undef, |
|
2666
|
|
|
|
|
|
|
comment_str => undef, |
|
2667
|
|
|
|
|
|
|
types => undef, |
|
2668
|
|
|
|
|
|
|
callbacks => undef, |
|
2669
|
|
|
|
|
|
|
}); |
|
2670
|
|
|
|
|
|
|
|
|
2671
|
|
|
|
|
|
|
For all of the above mentioned flags, an accessor method is available where |
|
2672
|
|
|
|
|
|
|
you can inquire the current value, or change the value |
|
2673
|
|
|
|
|
|
|
|
|
2674
|
|
|
|
|
|
|
my $quote = $csv->quote_char; |
|
2675
|
|
|
|
|
|
|
$csv->binary (1); |
|
2676
|
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
It is not wise to change these settings halfway through writing C<CSV> data |
|
2678
|
|
|
|
|
|
|
to a stream. If however you want to create a new stream using the available |
|
2679
|
|
|
|
|
|
|
C<CSV> object, there is no harm in changing them. |
|
2680
|
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
If the L</new> constructor call fails, it returns C<undef>, and makes the |
|
2682
|
|
|
|
|
|
|
fail reason available through the L</error_diag> method. |
|
2683
|
|
|
|
|
|
|
|
|
2684
|
|
|
|
|
|
|
$csv = Text::CSV_XS->new ({ ecs_char => 1 }) or |
|
2685
|
|
|
|
|
|
|
die "".Text::CSV_XS->error_diag (); |
|
2686
|
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
L</error_diag> will return a string like |
|
2688
|
|
|
|
|
|
|
|
|
2689
|
|
|
|
|
|
|
"INI - Unknown attribute 'ecs_char'" |
|
2690
|
|
|
|
|
|
|
|
|
2691
|
|
|
|
|
|
|
=head2 known_attributes |
|
2692
|
|
|
|
|
|
|
X<known_attributes> |
|
2693
|
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
@attr = Text::CSV_XS->known_attributes; |
|
2695
|
|
|
|
|
|
|
@attr = Text::CSV_XS::known_attributes; |
|
2696
|
|
|
|
|
|
|
@attr = $csv->known_attributes; |
|
2697
|
|
|
|
|
|
|
|
|
2698
|
|
|
|
|
|
|
This method will return an ordered list of all the supported attributes as |
|
2699
|
|
|
|
|
|
|
described above. This can be useful for knowing what attributes are valid |
|
2700
|
|
|
|
|
|
|
in classes that use or extend Text::CSV_XS. |
|
2701
|
|
|
|
|
|
|
|
|
2702
|
|
|
|
|
|
|
=head2 print |
|
2703
|
|
|
|
|
|
|
X<print> |
|
2704
|
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
$status = $csv->print ($fh, $colref); |
|
2706
|
|
|
|
|
|
|
|
|
2707
|
|
|
|
|
|
|
Similar to L</combine> + L</string> + L</print>, but much more efficient. |
|
2708
|
|
|
|
|
|
|
It expects an array ref as input (not an array!) and the resulting string |
|
2709
|
|
|
|
|
|
|
is not really created, but immediately written to the C<$fh> object, |
|
2710
|
|
|
|
|
|
|
typically an IO handle or any other object that offers a L</print> method. |
|
2711
|
|
|
|
|
|
|
|
|
2712
|
|
|
|
|
|
|
For performance reasons C<print> does not create a result string, so all |
|
2713
|
|
|
|
|
|
|
L</string>, L</status>, L</fields>, and L</error_input> methods will return |
|
2714
|
|
|
|
|
|
|
undefined information after executing this method. |
|
2715
|
|
|
|
|
|
|
|
|
2716
|
|
|
|
|
|
|
If C<$colref> is C<undef> (explicit, not through a variable argument) and |
|
2717
|
|
|
|
|
|
|
L</bind_columns> was used to specify fields to be printed, it is possible |
|
2718
|
|
|
|
|
|
|
to make performance improvements, as otherwise data would have to be copied |
|
2719
|
|
|
|
|
|
|
as arguments to the method call: |
|
2720
|
|
|
|
|
|
|
|
|
2721
|
|
|
|
|
|
|
$csv->bind_columns (\($foo, $bar)); |
|
2722
|
|
|
|
|
|
|
$status = $csv->print ($fh, undef); |
|
2723
|
|
|
|
|
|
|
|
|
2724
|
|
|
|
|
|
|
A short benchmark |
|
2725
|
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
my @data = ("aa" .. "zz"); |
|
2727
|
|
|
|
|
|
|
$csv->bind_columns (\(@data)); |
|
2728
|
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
$csv->print ($fh, [ @data ]); # 11800 recs/sec |
|
2730
|
|
|
|
|
|
|
$csv->print ($fh, \@data ); # 57600 recs/sec |
|
2731
|
|
|
|
|
|
|
$csv->print ($fh, undef ); # 48500 recs/sec |
|
2732
|
|
|
|
|
|
|
|
|
2733
|
|
|
|
|
|
|
=head2 say |
|
2734
|
|
|
|
|
|
|
X<say> |
|
2735
|
|
|
|
|
|
|
|
|
2736
|
|
|
|
|
|
|
$status = $csv->say ($fh, $colref); |
|
2737
|
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
Like L<C<print>|/print>, but L<C<eol>|/eol> defaults to C<$\>. |
|
2739
|
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
=head2 print_hr |
|
2741
|
|
|
|
|
|
|
X<print_hr> |
|
2742
|
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
$csv->print_hr ($fh, $ref); |
|
2744
|
|
|
|
|
|
|
|
|
2745
|
|
|
|
|
|
|
Provides an easy way to print a C<$ref> (as fetched with L</getline_hr>) |
|
2746
|
|
|
|
|
|
|
provided the column names are set with L</column_names>. |
|
2747
|
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
It is just a wrapper method with basic parameter checks over |
|
2749
|
|
|
|
|
|
|
|
|
2750
|
|
|
|
|
|
|
$csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]); |
|
2751
|
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
=head2 combine |
|
2753
|
|
|
|
|
|
|
X<combine> |
|
2754
|
|
|
|
|
|
|
|
|
2755
|
|
|
|
|
|
|
$status = $csv->combine (@fields); |
|
2756
|
|
|
|
|
|
|
|
|
2757
|
|
|
|
|
|
|
This method constructs a C<CSV> record from C<@fields>, returning success |
|
2758
|
|
|
|
|
|
|
or failure. Failure can result from lack of arguments or an argument that |
|
2759
|
|
|
|
|
|
|
contains an invalid character. Upon success, L</string> can be called to |
|
2760
|
|
|
|
|
|
|
retrieve the resultant C<CSV> string. Upon failure, the value returned by |
|
2761
|
|
|
|
|
|
|
L</string> is undefined and L</error_input> could be called to retrieve the |
|
2762
|
|
|
|
|
|
|
invalid argument. |
|
2763
|
|
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
=head2 string |
|
2765
|
|
|
|
|
|
|
X<string> |
|
2766
|
|
|
|
|
|
|
|
|
2767
|
|
|
|
|
|
|
$line = $csv->string (); |
|
2768
|
|
|
|
|
|
|
|
|
2769
|
|
|
|
|
|
|
This method returns the input to L</parse> or the resultant C<CSV> string |
|
2770
|
|
|
|
|
|
|
of L</combine>, whichever was called more recently. |
|
2771
|
|
|
|
|
|
|
|
|
2772
|
|
|
|
|
|
|
=head2 getline |
|
2773
|
|
|
|
|
|
|
X<getline> |
|
2774
|
|
|
|
|
|
|
|
|
2775
|
|
|
|
|
|
|
$colref = $csv->getline ($fh); |
|
2776
|
|
|
|
|
|
|
|
|
2777
|
|
|
|
|
|
|
This is the counterpart to L</print>, as L</parse> is the counterpart to |
|
2778
|
|
|
|
|
|
|
L</combine>: it parses a row from the C<$fh> handle using the L</getline> |
|
2779
|
|
|
|
|
|
|
method associated with C<$fh> and parses this row into an array ref. This |
|
2780
|
|
|
|
|
|
|
array ref is returned by the function or C<undef> for failure. When C<$fh> |
|
2781
|
|
|
|
|
|
|
does not support C<getline>, you are likely to hit errors. |
|
2782
|
|
|
|
|
|
|
|
|
2783
|
|
|
|
|
|
|
When fields are bound with L</bind_columns> the return value is a reference |
|
2784
|
|
|
|
|
|
|
to an empty list. |
|
2785
|
|
|
|
|
|
|
|
|
2786
|
|
|
|
|
|
|
The L</string>, L</fields>, and L</status> methods are meaningless again. |
|
2787
|
|
|
|
|
|
|
|
|
2788
|
|
|
|
|
|
|
=head2 getline_all |
|
2789
|
|
|
|
|
|
|
X<getline_all> |
|
2790
|
|
|
|
|
|
|
|
|
2791
|
|
|
|
|
|
|
$arrayref = $csv->getline_all ($fh); |
|
2792
|
|
|
|
|
|
|
$arrayref = $csv->getline_all ($fh, $offset); |
|
2793
|
|
|
|
|
|
|
$arrayref = $csv->getline_all ($fh, $offset, $length); |
|
2794
|
|
|
|
|
|
|
|
|
2795
|
|
|
|
|
|
|
This will return a reference to a list of L<getline ($fh)|/getline> results. |
|
2796
|
|
|
|
|
|
|
In this call, C<keep_meta_info> is disabled. If C<$offset> is negative, as |
|
2797
|
|
|
|
|
|
|
with C<splice>, only the last C<abs ($offset)> records of C<$fh> are taken |
|
2798
|
|
|
|
|
|
|
into consideration. Parameters C<$offset> and C<$length> are expected to be |
|
2799
|
|
|
|
|
|
|
integers. Non-integer values are interpreted as integer without check. |
|
2800
|
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
Given a CSV file with 10 lines: |
|
2802
|
|
|
|
|
|
|
|
|
2803
|
|
|
|
|
|
|
lines call |
|
2804
|
|
|
|
|
|
|
----- --------------------------------------------------------- |
|
2805
|
|
|
|
|
|
|
0..9 $csv->getline_all ($fh) # all |
|
2806
|
|
|
|
|
|
|
0..9 $csv->getline_all ($fh, 0) # all |
|
2807
|
|
|
|
|
|
|
8..9 $csv->getline_all ($fh, 8) # start at 8 |
|
2808
|
|
|
|
|
|
|
- $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows |
|
2809
|
|
|
|
|
|
|
0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows |
|
2810
|
|
|
|
|
|
|
4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows |
|
2811
|
|
|
|
|
|
|
8..9 $csv->getline_all ($fh, -2) # last 2 rows |
|
2812
|
|
|
|
|
|
|
6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows |
|
2813
|
|
|
|
|
|
|
|
|
2814
|
|
|
|
|
|
|
=head2 getline_hr |
|
2815
|
|
|
|
|
|
|
X<getline_hr> |
|
2816
|
|
|
|
|
|
|
|
|
2817
|
|
|
|
|
|
|
The L</getline_hr> and L</column_names> methods work together to allow you |
|
2818
|
|
|
|
|
|
|
to have rows returned as hashrefs. You must call L</column_names> first to |
|
2819
|
|
|
|
|
|
|
declare your column names. |
|
2820
|
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
$csv->column_names (qw( code name price description )); |
|
2822
|
|
|
|
|
|
|
$hr = $csv->getline_hr ($fh); |
|
2823
|
|
|
|
|
|
|
print "Price for $hr->{name} is $hr->{price} EUR\n"; |
|
2824
|
|
|
|
|
|
|
|
|
2825
|
|
|
|
|
|
|
L</getline_hr> will croak if called before L</column_names>. |
|
2826
|
|
|
|
|
|
|
|
|
2827
|
|
|
|
|
|
|
Note that L</getline_hr> creates a hashref for every row and will be much |
|
2828
|
|
|
|
|
|
|
slower than the combined use of L</bind_columns> and L</getline> but still |
|
2829
|
|
|
|
|
|
|
offering the same easy to use hashref inside the loop: |
|
2830
|
|
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
my @cols = @{$csv->getline ($fh)}; |
|
2832
|
|
|
|
|
|
|
$csv->column_names (@cols); |
|
2833
|
|
|
|
|
|
|
while (my $row = $csv->getline_hr ($fh)) { |
|
2834
|
|
|
|
|
|
|
print $row->{price}; |
|
2835
|
|
|
|
|
|
|
} |
|
2836
|
|
|
|
|
|
|
|
|
2837
|
|
|
|
|
|
|
Could easily be rewritten to the much faster: |
|
2838
|
|
|
|
|
|
|
|
|
2839
|
|
|
|
|
|
|
my @cols = @{$csv->getline ($fh)}; |
|
2840
|
|
|
|
|
|
|
my $row = {}; |
|
2841
|
|
|
|
|
|
|
$csv->bind_columns (\@{$row}{@cols}); |
|
2842
|
|
|
|
|
|
|
while ($csv->getline ($fh)) { |
|
2843
|
|
|
|
|
|
|
print $row->{price}; |
|
2844
|
|
|
|
|
|
|
} |
|
2845
|
|
|
|
|
|
|
|
|
2846
|
|
|
|
|
|
|
Your mileage may vary for the size of the data and the number of rows. With |
|
2847
|
|
|
|
|
|
|
perl-5.14.2 the comparison for a 100_000 line file with 14 columns: |
|
2848
|
|
|
|
|
|
|
|
|
2849
|
|
|
|
|
|
|
Rate hashrefs getlines |
|
2850
|
|
|
|
|
|
|
hashrefs 1.00/s -- -76% |
|
2851
|
|
|
|
|
|
|
getlines 4.15/s 313% -- |
|
2852
|
|
|
|
|
|
|
|
|
2853
|
|
|
|
|
|
|
=head2 getline_hr_all |
|
2854
|
|
|
|
|
|
|
X<getline_hr_all> |
|
2855
|
|
|
|
|
|
|
|
|
2856
|
|
|
|
|
|
|
$arrayref = $csv->getline_hr_all ($fh); |
|
2857
|
|
|
|
|
|
|
$arrayref = $csv->getline_hr_all ($fh, $offset); |
|
2858
|
|
|
|
|
|
|
$arrayref = $csv->getline_hr_all ($fh, $offset, $length); |
|
2859
|
|
|
|
|
|
|
|
|
2860
|
|
|
|
|
|
|
This will return a reference to a list of L<getline_hr ($fh)|/getline_hr> |
|
2861
|
|
|
|
|
|
|
results. In this call, L<C<keep_meta_info>|/keep_meta_info> is disabled. |
|
2862
|
|
|
|
|
|
|
|
|
2863
|
|
|
|
|
|
|
=head2 parse |
|
2864
|
|
|
|
|
|
|
X<parse> |
|
2865
|
|
|
|
|
|
|
|
|
2866
|
|
|
|
|
|
|
$status = $csv->parse ($line); |
|
2867
|
|
|
|
|
|
|
|
|
2868
|
|
|
|
|
|
|
This method decomposes a C<CSV> string into fields, returning success or |
|
2869
|
|
|
|
|
|
|
failure. Failure can result from a lack of argument or the given C<CSV> |
|
2870
|
|
|
|
|
|
|
string is improperly formatted. Upon success, L</fields> can be called to |
|
2871
|
|
|
|
|
|
|
retrieve the decomposed fields. Upon failure calling L</fields> will return |
|
2872
|
|
|
|
|
|
|
undefined data and L</error_input> can be called to retrieve the invalid |
|
2873
|
|
|
|
|
|
|
argument. |
|
2874
|
|
|
|
|
|
|
|
|
2875
|
|
|
|
|
|
|
You may use the L</types> method for setting column types. See L</types>' |
|
2876
|
|
|
|
|
|
|
description below. |
|
2877
|
|
|
|
|
|
|
|
|
2878
|
|
|
|
|
|
|
The C<$line> argument is supposed to be a simple scalar. Everything else is |
|
2879
|
|
|
|
|
|
|
supposed to croak and set error 1500. |
|
2880
|
|
|
|
|
|
|
|
|
2881
|
|
|
|
|
|
|
=head2 fragment |
|
2882
|
|
|
|
|
|
|
X<fragment> |
|
2883
|
|
|
|
|
|
|
|
|
2884
|
|
|
|
|
|
|
This function tries to implement RFC7111 (URI Fragment Identifiers for the |
|
2885
|
|
|
|
|
|
|
text/csv Media Type) - https://datatracker.ietf.org/doc/html/rfc7111 |
|
2886
|
|
|
|
|
|
|
|
|
2887
|
|
|
|
|
|
|
my $AoA = $csv->fragment ($fh, $spec); |
|
2888
|
|
|
|
|
|
|
|
|
2889
|
|
|
|
|
|
|
In specifications, C<*> is used to specify the I<last> item, a dash (C<->) |
|
2890
|
|
|
|
|
|
|
to indicate a range. All indices are C<1>-based: the first row or column |
|
2891
|
|
|
|
|
|
|
has index C<1>. Selections can be combined with the semi-colon (C<;>). |
|
2892
|
|
|
|
|
|
|
|
|
2893
|
|
|
|
|
|
|
When using this method in combination with L</column_names>, the returned |
|
2894
|
|
|
|
|
|
|
reference will point to a list of hashes instead of a list of lists. A |
|
2895
|
|
|
|
|
|
|
disjointed cell-based combined selection might return rows with different |
|
2896
|
|
|
|
|
|
|
number of columns making the use of hashes unpredictable. |
|
2897
|
|
|
|
|
|
|
|
|
2898
|
|
|
|
|
|
|
$csv->column_names ("Name", "Age"); |
|
2899
|
|
|
|
|
|
|
my $AoH = $csv->fragment ($fh, "col=3;8"); |
|
2900
|
|
|
|
|
|
|
|
|
2901
|
|
|
|
|
|
|
If the L</after_parse> callback is active, it is also called on every line |
|
2902
|
|
|
|
|
|
|
parsed and skipped before the fragment. |
|
2903
|
|
|
|
|
|
|
|
|
2904
|
|
|
|
|
|
|
=over 2 |
|
2905
|
|
|
|
|
|
|
|
|
2906
|
|
|
|
|
|
|
=item row |
|
2907
|
|
|
|
|
|
|
|
|
2908
|
|
|
|
|
|
|
row=4 |
|
2909
|
|
|
|
|
|
|
row=5-7 |
|
2910
|
|
|
|
|
|
|
row=6-* |
|
2911
|
|
|
|
|
|
|
row=1-2;4;6-* |
|
2912
|
|
|
|
|
|
|
|
|
2913
|
|
|
|
|
|
|
=item col |
|
2914
|
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
col=2 |
|
2916
|
|
|
|
|
|
|
col=1-3 |
|
2917
|
|
|
|
|
|
|
col=4-* |
|
2918
|
|
|
|
|
|
|
col=1-2;4;7-* |
|
2919
|
|
|
|
|
|
|
|
|
2920
|
|
|
|
|
|
|
=item cell |
|
2921
|
|
|
|
|
|
|
|
|
2922
|
|
|
|
|
|
|
In cell-based selection, the comma (C<,>) is used to pair row and column |
|
2923
|
|
|
|
|
|
|
|
|
2924
|
|
|
|
|
|
|
cell=4,1 |
|
2925
|
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
The range operator (C<->) using C<cell>s can be used to define top-left and |
|
2927
|
|
|
|
|
|
|
bottom-right C<cell> location |
|
2928
|
|
|
|
|
|
|
|
|
2929
|
|
|
|
|
|
|
cell=3,1-4,6 |
|
2930
|
|
|
|
|
|
|
|
|
2931
|
|
|
|
|
|
|
The C<*> is only allowed in the second part of a pair |
|
2932
|
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
cell=3,2-*,2 # row 3 till end, only column 2 |
|
2934
|
|
|
|
|
|
|
cell=3,2-3,* # column 2 till end, only row 3 |
|
2935
|
|
|
|
|
|
|
cell=3,2-*,* # strip row 1 and 2, and column 1 |
|
2936
|
|
|
|
|
|
|
|
|
2937
|
|
|
|
|
|
|
Cells and cell ranges may be combined with C<;>, possibly resulting in rows |
|
2938
|
|
|
|
|
|
|
with different numbers of columns |
|
2939
|
|
|
|
|
|
|
|
|
2940
|
|
|
|
|
|
|
cell=1,1-2,2;3,3-4,4;1,4;4,1 |
|
2941
|
|
|
|
|
|
|
|
|
2942
|
|
|
|
|
|
|
Disjointed selections will only return selected cells. The cells that are |
|
2943
|
|
|
|
|
|
|
not specified will not be included in the returned set, not even as |
|
2944
|
|
|
|
|
|
|
C<undef>. As an example given a C<CSV> like |
|
2945
|
|
|
|
|
|
|
|
|
2946
|
|
|
|
|
|
|
11,12,13,...19 |
|
2947
|
|
|
|
|
|
|
21,22,...28,29 |
|
2948
|
|
|
|
|
|
|
: : |
|
2949
|
|
|
|
|
|
|
91,...97,98,99 |
|
2950
|
|
|
|
|
|
|
|
|
2951
|
|
|
|
|
|
|
with C<cell=1,1-2,2;3,3-4,4;1,4;4,1> will return: |
|
2952
|
|
|
|
|
|
|
|
|
2953
|
|
|
|
|
|
|
11,12,14 |
|
2954
|
|
|
|
|
|
|
21,22 |
|
2955
|
|
|
|
|
|
|
33,34 |
|
2956
|
|
|
|
|
|
|
41,43,44 |
|
2957
|
|
|
|
|
|
|
|
|
2958
|
|
|
|
|
|
|
Overlapping cell-specs will return those cells only once, So |
|
2959
|
|
|
|
|
|
|
C<cell=1,1-3,3;2,2-4,4;2,3;4,2> will return: |
|
2960
|
|
|
|
|
|
|
|
|
2961
|
|
|
|
|
|
|
11,12,13 |
|
2962
|
|
|
|
|
|
|
21,22,23,24 |
|
2963
|
|
|
|
|
|
|
31,32,33,34 |
|
2964
|
|
|
|
|
|
|
42,43,44 |
|
2965
|
|
|
|
|
|
|
|
|
2966
|
|
|
|
|
|
|
=back |
|
2967
|
|
|
|
|
|
|
|
|
2968
|
|
|
|
|
|
|
L<RFC7111|https://datatracker.ietf.org/doc/html/rfc7111> does B<not> allow different |
|
2969
|
|
|
|
|
|
|
types of specs to be combined (either C<row> I<or> C<col> I<or> C<cell>). |
|
2970
|
|
|
|
|
|
|
Passing an invalid fragment specification will croak and set error 2013. |
|
2971
|
|
|
|
|
|
|
|
|
2972
|
|
|
|
|
|
|
=head2 column_names |
|
2973
|
|
|
|
|
|
|
X<column_names> |
|
2974
|
|
|
|
|
|
|
|
|
2975
|
|
|
|
|
|
|
Set the "keys" that will be used in the L</getline_hr> calls. If no keys |
|
2976
|
|
|
|
|
|
|
(column names) are passed, it will return the current setting as a list. |
|
2977
|
|
|
|
|
|
|
|
|
2978
|
|
|
|
|
|
|
L</column_names> accepts a list of scalars (the column names) or a single |
|
2979
|
|
|
|
|
|
|
array_ref, so you can pass the return value from L</getline> too: |
|
2980
|
|
|
|
|
|
|
|
|
2981
|
|
|
|
|
|
|
$csv->column_names ($csv->getline ($fh)); |
|
2982
|
|
|
|
|
|
|
|
|
2983
|
|
|
|
|
|
|
L</column_names> does B<no> checking on duplicates at all, which might lead |
|
2984
|
|
|
|
|
|
|
to unexpected results. Undefined entries will be replaced with the string |
|
2985
|
|
|
|
|
|
|
C<"\cAUNDEF\cA">, so |
|
2986
|
|
|
|
|
|
|
|
|
2987
|
|
|
|
|
|
|
$csv->column_names (undef, "", "name", "name"); |
|
2988
|
|
|
|
|
|
|
$hr = $csv->getline_hr ($fh); |
|
2989
|
|
|
|
|
|
|
|
|
2990
|
|
|
|
|
|
|
will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to |
|
2991
|
|
|
|
|
|
|
the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd |
|
2992
|
|
|
|
|
|
|
field. |
|
2993
|
|
|
|
|
|
|
|
|
2994
|
|
|
|
|
|
|
L</column_names> croaks on invalid arguments. |
|
2995
|
|
|
|
|
|
|
|
|
2996
|
|
|
|
|
|
|
=head2 header |
|
2997
|
|
|
|
|
|
|
|
|
2998
|
|
|
|
|
|
|
This method does NOT work in perl-5.6.x |
|
2999
|
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
Parse the CSV header and set L<C<sep>|/sep>, column_names and encoding. |
|
3001
|
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
my @hdr = $csv->header ($fh); |
|
3003
|
|
|
|
|
|
|
$csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] }); |
|
3004
|
|
|
|
|
|
|
$csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" }); |
|
3005
|
|
|
|
|
|
|
|
|
3006
|
|
|
|
|
|
|
The first argument should be a file handle. |
|
3007
|
|
|
|
|
|
|
|
|
3008
|
|
|
|
|
|
|
This method resets some object properties, as it is supposed to be invoked |
|
3009
|
|
|
|
|
|
|
only once per file or stream. It will leave attributes C<column_names> and |
|
3010
|
|
|
|
|
|
|
C<bound_columns> alone if setting column names is disabled. Reading headers |
|
3011
|
|
|
|
|
|
|
on previously process objects might fail on perl-5.8.0 and older. |
|
3012
|
|
|
|
|
|
|
|
|
3013
|
|
|
|
|
|
|
Assuming that the file opened for parsing has a header, and the header does |
|
3014
|
|
|
|
|
|
|
not contain problematic characters like embedded newlines, read the first |
|
3015
|
|
|
|
|
|
|
line from the open handle then auto-detect whether the header separates the |
|
3016
|
|
|
|
|
|
|
column names with a character from the allowed separator list. |
|
3017
|
|
|
|
|
|
|
|
|
3018
|
|
|
|
|
|
|
If any of the allowed separators matches, and none of the I<other> allowed |
|
3019
|
|
|
|
|
|
|
separators match, set L<C<sep>|/sep> to that separator for the current |
|
3020
|
|
|
|
|
|
|
CSV_XS instance and use it to parse the first line, map those to lowercase, |
|
3021
|
|
|
|
|
|
|
and use that to set the instance L</column_names>: |
|
3022
|
|
|
|
|
|
|
|
|
3023
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); |
|
3024
|
|
|
|
|
|
|
open my $fh, "<", "file.csv"; |
|
3025
|
|
|
|
|
|
|
binmode $fh; # for Windows |
|
3026
|
|
|
|
|
|
|
$csv->header ($fh); |
|
3027
|
|
|
|
|
|
|
while (my $row = $csv->getline_hr ($fh)) { |
|
3028
|
|
|
|
|
|
|
... |
|
3029
|
|
|
|
|
|
|
} |
|
3030
|
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
If the header is empty, contains more than one unique separator out of the |
|
3032
|
|
|
|
|
|
|
allowed set, contains empty fields, or contains identical fields (after |
|
3033
|
|
|
|
|
|
|
folding), it will croak with error 1010, 1011, 1012, or 1013 respectively. |
|
3034
|
|
|
|
|
|
|
|
|
3035
|
|
|
|
|
|
|
If the header contains embedded newlines or is not valid CSV in any other |
|
3036
|
|
|
|
|
|
|
way, this method will croak and leave the parse error untouched. |
|
3037
|
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
A successful call to C<header> will always set the L<C<sep>|/sep> of the |
|
3039
|
|
|
|
|
|
|
C<$csv> object. This behavior can not be disabled. |
|
3040
|
|
|
|
|
|
|
|
|
3041
|
|
|
|
|
|
|
=head3 return value |
|
3042
|
|
|
|
|
|
|
|
|
3043
|
|
|
|
|
|
|
On error this method will croak. |
|
3044
|
|
|
|
|
|
|
|
|
3045
|
|
|
|
|
|
|
In list context, the headers will be returned whether they are used to set |
|
3046
|
|
|
|
|
|
|
L</column_names> or not. |
|
3047
|
|
|
|
|
|
|
|
|
3048
|
|
|
|
|
|
|
In scalar context, the instance itself is returned. B<Note>: the values as |
|
3049
|
|
|
|
|
|
|
found in the header will effectively be B<lost> if C<set_column_names> is |
|
3050
|
|
|
|
|
|
|
false. |
|
3051
|
|
|
|
|
|
|
|
|
3052
|
|
|
|
|
|
|
=head3 Options |
|
3053
|
|
|
|
|
|
|
|
|
3054
|
|
|
|
|
|
|
=over 2 |
|
3055
|
|
|
|
|
|
|
|
|
3056
|
|
|
|
|
|
|
=item sep_set |
|
3057
|
|
|
|
|
|
|
X<sep_set> |
|
3058
|
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
$csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] }); |
|
3060
|
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
The list of legal separators defaults to C<[ ";", "," ]> and can be changed |
|
3062
|
|
|
|
|
|
|
by this option. As this is probably the most often used option, it can be |
|
3063
|
|
|
|
|
|
|
passed on its own as an unnamed argument: |
|
3064
|
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
$csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]); |
|
3066
|
|
|
|
|
|
|
|
|
3067
|
|
|
|
|
|
|
Multi-byte sequences are allowed, both multi-character and Unicode. See |
|
3068
|
|
|
|
|
|
|
L<C<sep>|/sep>. |
|
3069
|
|
|
|
|
|
|
|
|
3070
|
|
|
|
|
|
|
=item detect_bom |
|
3071
|
|
|
|
|
|
|
X<detect_bom> |
|
3072
|
|
|
|
|
|
|
|
|
3073
|
|
|
|
|
|
|
$csv->header ($fh, { detect_bom => 1 }); |
|
3074
|
|
|
|
|
|
|
|
|
3075
|
|
|
|
|
|
|
The default behavior is to detect if the header line starts with a BOM. If |
|
3076
|
|
|
|
|
|
|
the header has a BOM, use that to set the encoding of C<$fh>. This default |
|
3077
|
|
|
|
|
|
|
behavior can be disabled by passing a false value to C<detect_bom>. |
|
3078
|
|
|
|
|
|
|
|
|
3079
|
|
|
|
|
|
|
Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and |
|
3080
|
|
|
|
|
|
|
UTF-32LE. BOM also supports UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030 |
|
3081
|
|
|
|
|
|
|
but L<Encode> does not (yet). UTF-7 is not supported. |
|
3082
|
|
|
|
|
|
|
|
|
3083
|
|
|
|
|
|
|
If a supported BOM was detected as start of the stream, it is stored in the |
|
3084
|
|
|
|
|
|
|
object attribute C<ENCODING>. |
|
3085
|
|
|
|
|
|
|
|
|
3086
|
|
|
|
|
|
|
my $enc = $csv->{ENCODING}; |
|
3087
|
|
|
|
|
|
|
|
|
3088
|
|
|
|
|
|
|
The encoding is used with C<binmode> on C<$fh>. |
|
3089
|
|
|
|
|
|
|
|
|
3090
|
|
|
|
|
|
|
If the handle was opened in a (correct) encoding, this method will B<not> |
|
3091
|
|
|
|
|
|
|
alter the encoding, as it checks the leading B<bytes> of the first line. In |
|
3092
|
|
|
|
|
|
|
case the stream starts with a decoded BOM (C<U+FEFF>), C<{ENCODING}> will be |
|
3093
|
|
|
|
|
|
|
C<""> (empty) instead of the default C<undef>. |
|
3094
|
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
=item munge_column_names |
|
3096
|
|
|
|
|
|
|
X<munge_column_names> |
|
3097
|
|
|
|
|
|
|
|
|
3098
|
|
|
|
|
|
|
This option offers the means to modify the column names into something that |
|
3099
|
|
|
|
|
|
|
is most useful to the application. The default is to map all column names |
|
3100
|
|
|
|
|
|
|
to lower case. |
|
3101
|
|
|
|
|
|
|
|
|
3102
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => "lc" }); |
|
3103
|
|
|
|
|
|
|
|
|
3104
|
|
|
|
|
|
|
The following values are available: |
|
3105
|
|
|
|
|
|
|
|
|
3106
|
|
|
|
|
|
|
lc - lower case |
|
3107
|
|
|
|
|
|
|
uc - upper case |
|
3108
|
|
|
|
|
|
|
db - valid DB field names |
|
3109
|
|
|
|
|
|
|
none - do not change |
|
3110
|
|
|
|
|
|
|
\%hash - supply a mapping |
|
3111
|
|
|
|
|
|
|
\&cb - supply a callback |
|
3112
|
|
|
|
|
|
|
|
|
3113
|
|
|
|
|
|
|
=over 2 |
|
3114
|
|
|
|
|
|
|
|
|
3115
|
|
|
|
|
|
|
=item Lower case |
|
3116
|
|
|
|
|
|
|
|
|
3117
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => "lc" }); |
|
3118
|
|
|
|
|
|
|
|
|
3119
|
|
|
|
|
|
|
The header is changed to all lower-case |
|
3120
|
|
|
|
|
|
|
|
|
3121
|
|
|
|
|
|
|
$_ = lc; |
|
3122
|
|
|
|
|
|
|
|
|
3123
|
|
|
|
|
|
|
=item Upper case |
|
3124
|
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => "uc" }); |
|
3126
|
|
|
|
|
|
|
|
|
3127
|
|
|
|
|
|
|
The header is changed to all upper-case |
|
3128
|
|
|
|
|
|
|
|
|
3129
|
|
|
|
|
|
|
$_ = uc; |
|
3130
|
|
|
|
|
|
|
|
|
3131
|
|
|
|
|
|
|
=item Literal |
|
3132
|
|
|
|
|
|
|
|
|
3133
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => "none" }); |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
=item Hash |
|
3136
|
|
|
|
|
|
|
|
|
3137
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => { foo => "sombrero" }); |
|
3138
|
|
|
|
|
|
|
|
|
3139
|
|
|
|
|
|
|
if a value does not exist, the original value is used unchanged |
|
3140
|
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
=item Database |
|
3142
|
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => "db" }); |
|
3144
|
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
=over 2 |
|
3146
|
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
=item - |
|
3148
|
|
|
|
|
|
|
|
|
3149
|
|
|
|
|
|
|
lower-case |
|
3150
|
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
=item - |
|
3152
|
|
|
|
|
|
|
|
|
3153
|
|
|
|
|
|
|
all sequences of non-word characters are replaced with an underscore |
|
3154
|
|
|
|
|
|
|
|
|
3155
|
|
|
|
|
|
|
=item - |
|
3156
|
|
|
|
|
|
|
|
|
3157
|
|
|
|
|
|
|
all leading underscores are removed |
|
3158
|
|
|
|
|
|
|
|
|
3159
|
|
|
|
|
|
|
=back |
|
3160
|
|
|
|
|
|
|
|
|
3161
|
|
|
|
|
|
|
$_ = lc (s/\W+/_/gr =~ s/^_+//r); |
|
3162
|
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
=item Callback |
|
3164
|
|
|
|
|
|
|
|
|
3165
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => sub { fc } }); |
|
3166
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => sub { "column_".$col++ } }); |
|
3167
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } }); |
|
3168
|
|
|
|
|
|
|
|
|
3169
|
|
|
|
|
|
|
As this callback is called in a C<map>, you can use C<$_> directly. |
|
3170
|
|
|
|
|
|
|
|
|
3171
|
|
|
|
|
|
|
=back |
|
3172
|
|
|
|
|
|
|
|
|
3173
|
|
|
|
|
|
|
=item set_column_names |
|
3174
|
|
|
|
|
|
|
X<set_column_names> |
|
3175
|
|
|
|
|
|
|
|
|
3176
|
|
|
|
|
|
|
$csv->header ($fh, { set_column_names => 1 }); |
|
3177
|
|
|
|
|
|
|
|
|
3178
|
|
|
|
|
|
|
The default is to set the instances column names using L</column_names> if |
|
3179
|
|
|
|
|
|
|
the method is successful, so subsequent calls to L</getline_hr> can return |
|
3180
|
|
|
|
|
|
|
a hash. Disable setting the header can be forced by using a false value for |
|
3181
|
|
|
|
|
|
|
this option. |
|
3182
|
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
As described in L</return value> above, content is lost in scalar context. |
|
3184
|
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
=back |
|
3186
|
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
=head3 Validation |
|
3188
|
|
|
|
|
|
|
|
|
3189
|
|
|
|
|
|
|
When receiving CSV files from external sources, this method can be used to |
|
3190
|
|
|
|
|
|
|
protect against changes in the layout by restricting to known headers (and |
|
3191
|
|
|
|
|
|
|
typos in the header fields). |
|
3192
|
|
|
|
|
|
|
|
|
3193
|
|
|
|
|
|
|
my %known = ( |
|
3194
|
|
|
|
|
|
|
"record key" => "c_rec", |
|
3195
|
|
|
|
|
|
|
"rec id" => "c_rec", |
|
3196
|
|
|
|
|
|
|
"id_rec" => "c_rec", |
|
3197
|
|
|
|
|
|
|
"kode" => "code", |
|
3198
|
|
|
|
|
|
|
"code" => "code", |
|
3199
|
|
|
|
|
|
|
"vaule" => "value", |
|
3200
|
|
|
|
|
|
|
"value" => "value", |
|
3201
|
|
|
|
|
|
|
); |
|
3202
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); |
|
3203
|
|
|
|
|
|
|
open my $fh, "<", $source or die "$source: $!"; |
|
3204
|
|
|
|
|
|
|
$csv->header ($fh, { munge_column_names => sub { |
|
3205
|
|
|
|
|
|
|
s/\s+$//; |
|
3206
|
|
|
|
|
|
|
s/^\s+//; |
|
3207
|
|
|
|
|
|
|
$known{lc $_} or die "Unknown column '$_' in $source"; |
|
3208
|
|
|
|
|
|
|
}}); |
|
3209
|
|
|
|
|
|
|
while (my $row = $csv->getline_hr ($fh)) { |
|
3210
|
|
|
|
|
|
|
say join "\t", $row->{c_rec}, $row->{code}, $row->{value}; |
|
3211
|
|
|
|
|
|
|
} |
|
3212
|
|
|
|
|
|
|
|
|
3213
|
|
|
|
|
|
|
=head2 bind_columns |
|
3214
|
|
|
|
|
|
|
X<bind_columns> |
|
3215
|
|
|
|
|
|
|
|
|
3216
|
|
|
|
|
|
|
Takes a list of scalar references to be used for output with L</print> or |
|
3217
|
|
|
|
|
|
|
to store in the fields fetched by L</getline>. When you do not pass enough |
|
3218
|
|
|
|
|
|
|
references to store the fetched fields in, L</getline> will fail with error |
|
3219
|
|
|
|
|
|
|
C<3006>. If you pass more than there are fields to return, the content of |
|
3220
|
|
|
|
|
|
|
the remaining references is left untouched. Under C<strict> the two should |
|
3221
|
|
|
|
|
|
|
match, otherwise L</getline> will fail with error C<2014>. |
|
3222
|
|
|
|
|
|
|
|
|
3223
|
|
|
|
|
|
|
$csv->bind_columns (\$code, \$name, \$price, \$description); |
|
3224
|
|
|
|
|
|
|
while ($csv->getline ($fh)) { |
|
3225
|
|
|
|
|
|
|
print "The price of a $name is \x{20ac} $price\n"; |
|
3226
|
|
|
|
|
|
|
} |
|
3227
|
|
|
|
|
|
|
|
|
3228
|
|
|
|
|
|
|
To reset or clear all column binding, call L</bind_columns> with the single |
|
3229
|
|
|
|
|
|
|
argument C<undef>. This will also clear column names. |
|
3230
|
|
|
|
|
|
|
|
|
3231
|
|
|
|
|
|
|
$csv->bind_columns (undef); |
|
3232
|
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
If no arguments are passed at all, L</bind_columns> will return the list of |
|
3234
|
|
|
|
|
|
|
current bindings or C<undef> if no binds are active. |
|
3235
|
|
|
|
|
|
|
|
|
3236
|
|
|
|
|
|
|
Note that in parsing with C<bind_columns>, the fields are set on the fly. |
|
3237
|
|
|
|
|
|
|
That implies that if the third field of a row causes an error (or this row |
|
3238
|
|
|
|
|
|
|
has just two fields where the previous row had more), the first two fields |
|
3239
|
|
|
|
|
|
|
already have been assigned the values of the current row, while the rest of |
|
3240
|
|
|
|
|
|
|
the fields will still hold the values of the previous row. If you want the |
|
3241
|
|
|
|
|
|
|
parser to fail in these cases, use the L<C<strict>|/strict> attribute. |
|
3242
|
|
|
|
|
|
|
|
|
3243
|
|
|
|
|
|
|
=head2 eof |
|
3244
|
|
|
|
|
|
|
X<eof> |
|
3245
|
|
|
|
|
|
|
|
|
3246
|
|
|
|
|
|
|
$eof = $csv->eof (); |
|
3247
|
|
|
|
|
|
|
|
|
3248
|
|
|
|
|
|
|
If L</parse> or L</getline> was used with an IO stream, this method will |
|
3249
|
|
|
|
|
|
|
return true (1) if the last call hit end of file, otherwise it will return |
|
3250
|
|
|
|
|
|
|
false (''). This is useful to see the difference between a failure and end |
|
3251
|
|
|
|
|
|
|
of file. |
|
3252
|
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
Note that if the parsing of the last line caused an error, C<eof> is still |
|
3254
|
|
|
|
|
|
|
true. That means that if you are I<not> using L</auto_diag>, an idiom like |
|
3255
|
|
|
|
|
|
|
|
|
3256
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
3257
|
|
|
|
|
|
|
# ... |
|
3258
|
|
|
|
|
|
|
} |
|
3259
|
|
|
|
|
|
|
$csv->eof or $csv->error_diag; |
|
3260
|
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
will I<not> report the error. You would have to change that to |
|
3262
|
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
3264
|
|
|
|
|
|
|
# ... |
|
3265
|
|
|
|
|
|
|
} |
|
3266
|
|
|
|
|
|
|
+$csv->error_diag and $csv->error_diag; |
|
3267
|
|
|
|
|
|
|
|
|
3268
|
|
|
|
|
|
|
=head2 types |
|
3269
|
|
|
|
|
|
|
X<types> |
|
3270
|
|
|
|
|
|
|
|
|
3271
|
|
|
|
|
|
|
$csv->types (\@tref); |
|
3272
|
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
This method is used to force that (all) columns are of a given type. For |
|
3274
|
|
|
|
|
|
|
example, if you have an integer column, two columns with doubles and a |
|
3275
|
|
|
|
|
|
|
string column, then you might do a |
|
3276
|
|
|
|
|
|
|
|
|
3277
|
|
|
|
|
|
|
$csv->types ([Text::CSV_XS::IV (), |
|
3278
|
|
|
|
|
|
|
Text::CSV_XS::NV (), |
|
3279
|
|
|
|
|
|
|
Text::CSV_XS::NV (), |
|
3280
|
|
|
|
|
|
|
Text::CSV_XS::PV ()]); |
|
3281
|
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
Column types are used only for I<decoding> columns while parsing, in other |
|
3283
|
|
|
|
|
|
|
words by the L</parse> and L</getline> methods. |
|
3284
|
|
|
|
|
|
|
|
|
3285
|
|
|
|
|
|
|
You can unset column types by doing a |
|
3286
|
|
|
|
|
|
|
|
|
3287
|
|
|
|
|
|
|
$csv->types (undef); |
|
3288
|
|
|
|
|
|
|
|
|
3289
|
|
|
|
|
|
|
or fetch the current type settings with |
|
3290
|
|
|
|
|
|
|
|
|
3291
|
|
|
|
|
|
|
$types = $csv->types (); |
|
3292
|
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
=over 4 |
|
3294
|
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
=item IV |
|
3296
|
|
|
|
|
|
|
X<IV> |
|
3297
|
|
|
|
|
|
|
|
|
3298
|
|
|
|
|
|
|
=item CSV_TYPE_IV |
|
3299
|
|
|
|
|
|
|
X<CSV_TYPE_IV> |
|
3300
|
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
Set field type to integer. |
|
3302
|
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
=item NV |
|
3304
|
|
|
|
|
|
|
X<NV> |
|
3305
|
|
|
|
|
|
|
|
|
3306
|
|
|
|
|
|
|
=item CSV_TYPE_NV |
|
3307
|
|
|
|
|
|
|
X<CSV_TYPE_NV> |
|
3308
|
|
|
|
|
|
|
|
|
3309
|
|
|
|
|
|
|
Set field type to numeric/float. |
|
3310
|
|
|
|
|
|
|
|
|
3311
|
|
|
|
|
|
|
=item PV |
|
3312
|
|
|
|
|
|
|
X<PV> |
|
3313
|
|
|
|
|
|
|
|
|
3314
|
|
|
|
|
|
|
=item CSV_TYPE_PV |
|
3315
|
|
|
|
|
|
|
X<CSV_TYPE_PV> |
|
3316
|
|
|
|
|
|
|
|
|
3317
|
|
|
|
|
|
|
Set field type to string. |
|
3318
|
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
=back |
|
3320
|
|
|
|
|
|
|
|
|
3321
|
|
|
|
|
|
|
=head2 fields |
|
3322
|
|
|
|
|
|
|
X<fields> |
|
3323
|
|
|
|
|
|
|
|
|
3324
|
|
|
|
|
|
|
@columns = $csv->fields (); |
|
3325
|
|
|
|
|
|
|
|
|
3326
|
|
|
|
|
|
|
This method returns the input to L</combine> or the resultant decomposed |
|
3327
|
|
|
|
|
|
|
fields of a successful L</parse>, whichever was called more recently. |
|
3328
|
|
|
|
|
|
|
|
|
3329
|
|
|
|
|
|
|
Note that the return value is undefined after using L</getline>, which does |
|
3330
|
|
|
|
|
|
|
not fill the data structures returned by L</parse>. |
|
3331
|
|
|
|
|
|
|
|
|
3332
|
|
|
|
|
|
|
=head2 meta_info |
|
3333
|
|
|
|
|
|
|
X<meta_info> |
|
3334
|
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
@flags = $csv->meta_info (); |
|
3336
|
|
|
|
|
|
|
|
|
3337
|
|
|
|
|
|
|
This method returns the "flags" of the input to L</combine> or the flags of |
|
3338
|
|
|
|
|
|
|
the resultant decomposed fields of L</parse>, whichever was called more |
|
3339
|
|
|
|
|
|
|
recently. |
|
3340
|
|
|
|
|
|
|
|
|
3341
|
|
|
|
|
|
|
For each field, a meta_info field will hold flags that inform something |
|
3342
|
|
|
|
|
|
|
about the field returned by the L</fields> method or passed to the |
|
3343
|
|
|
|
|
|
|
L</combine> method. The flags are bit-wise-C<or>'d like: |
|
3344
|
|
|
|
|
|
|
|
|
3345
|
|
|
|
|
|
|
=over 2 |
|
3346
|
|
|
|
|
|
|
|
|
3347
|
|
|
|
|
|
|
=item C<0x0001> |
|
3348
|
|
|
|
|
|
|
|
|
3349
|
|
|
|
|
|
|
=item C<CSV_FLAGS_IS_QUOTED> |
|
3350
|
|
|
|
|
|
|
X<CSV_FLAGS_IS_QUOTED> |
|
3351
|
|
|
|
|
|
|
|
|
3352
|
|
|
|
|
|
|
The field was quoted. |
|
3353
|
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
=item C<0x0002> |
|
3355
|
|
|
|
|
|
|
|
|
3356
|
|
|
|
|
|
|
=item C<CSV_FLAGS_IS_BINARY> |
|
3357
|
|
|
|
|
|
|
X<CSV_FLAGS_IS_BINARY> |
|
3358
|
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
The field was binary. |
|
3360
|
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
=item C<0x0004> |
|
3362
|
|
|
|
|
|
|
|
|
3363
|
|
|
|
|
|
|
=item C<CSV_FLAGS_ERROR_IN_FIELD> |
|
3364
|
|
|
|
|
|
|
X<CSV_FLAGS_ERROR_IN_FIELD> |
|
3365
|
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
The field was invalid. |
|
3367
|
|
|
|
|
|
|
|
|
3368
|
|
|
|
|
|
|
Currently only used when C<allow_loose_quotes> is active. |
|
3369
|
|
|
|
|
|
|
|
|
3370
|
|
|
|
|
|
|
=item C<0x0010> |
|
3371
|
|
|
|
|
|
|
|
|
3372
|
|
|
|
|
|
|
=item C<CSV_FLAGS_IS_MISSING> |
|
3373
|
|
|
|
|
|
|
X<CSV_FLAGS_IS_MISSING> |
|
3374
|
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
The field was missing. |
|
3376
|
|
|
|
|
|
|
|
|
3377
|
|
|
|
|
|
|
=back |
|
3378
|
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
See the C<is_***> methods below. |
|
3380
|
|
|
|
|
|
|
|
|
3381
|
|
|
|
|
|
|
=head2 is_quoted |
|
3382
|
|
|
|
|
|
|
X<is_quoted> |
|
3383
|
|
|
|
|
|
|
|
|
3384
|
|
|
|
|
|
|
my $quoted = $csv->is_quoted ($column_idx); |
|
3385
|
|
|
|
|
|
|
|
|
3386
|
|
|
|
|
|
|
where C<$column_idx> is the (zero-based) index of the column in the last |
|
3387
|
|
|
|
|
|
|
result of L</parse>. |
|
3388
|
|
|
|
|
|
|
|
|
3389
|
|
|
|
|
|
|
This returns a true value if the data in the indicated column was enclosed |
|
3390
|
|
|
|
|
|
|
in L<C<quote_char>|/quote_char> quotes. This might be important for fields |
|
3391
|
|
|
|
|
|
|
where content C<,20070108,> is to be treated as a numeric value, and where |
|
3392
|
|
|
|
|
|
|
C<,"20070108",> is explicitly marked as character string data. |
|
3393
|
|
|
|
|
|
|
|
|
3394
|
|
|
|
|
|
|
This method is only valid when L</keep_meta_info> is set to a true value. |
|
3395
|
|
|
|
|
|
|
|
|
3396
|
|
|
|
|
|
|
=head2 is_binary |
|
3397
|
|
|
|
|
|
|
X<is_binary> |
|
3398
|
|
|
|
|
|
|
|
|
3399
|
|
|
|
|
|
|
my $binary = $csv->is_binary ($column_idx); |
|
3400
|
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
where C<$column_idx> is the (zero-based) index of the column in the last |
|
3402
|
|
|
|
|
|
|
result of L</parse>. |
|
3403
|
|
|
|
|
|
|
|
|
3404
|
|
|
|
|
|
|
This returns a true value if the data in the indicated column contained any |
|
3405
|
|
|
|
|
|
|
byte in the range C<[\x00-\x08,\x10-\x1F,\x7F-\xFF]>. |
|
3406
|
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
This method is only valid when L</keep_meta_info> is set to a true value. |
|
3408
|
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
=head2 is_missing |
|
3410
|
|
|
|
|
|
|
X<is_missing> |
|
3411
|
|
|
|
|
|
|
|
|
3412
|
|
|
|
|
|
|
my $missing = $csv->is_missing ($column_idx); |
|
3413
|
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
where C<$column_idx> is the (zero-based) index of the column in the last |
|
3415
|
|
|
|
|
|
|
result of L</getline_hr>. |
|
3416
|
|
|
|
|
|
|
|
|
3417
|
|
|
|
|
|
|
$csv->keep_meta_info (1); |
|
3418
|
|
|
|
|
|
|
while (my $hr = $csv->getline_hr ($fh)) { |
|
3419
|
|
|
|
|
|
|
$csv->is_missing (0) and next; # This was an empty line |
|
3420
|
|
|
|
|
|
|
} |
|
3421
|
|
|
|
|
|
|
|
|
3422
|
|
|
|
|
|
|
When using L</getline_hr>, it is impossible to tell if the parsed fields |
|
3423
|
|
|
|
|
|
|
are C<undef> because they where not filled in the C<CSV> stream or because |
|
3424
|
|
|
|
|
|
|
they were not read at all, as B<all> the fields defined by L</column_names> |
|
3425
|
|
|
|
|
|
|
are set in the hash-ref. If you still need to know if all fields in each |
|
3426
|
|
|
|
|
|
|
row are provided, you should enable L<C<keep_meta_info>|/keep_meta_info> so |
|
3427
|
|
|
|
|
|
|
you can check the flags. |
|
3428
|
|
|
|
|
|
|
|
|
3429
|
|
|
|
|
|
|
If L<C<keep_meta_info>|/keep_meta_info> is C<false>, C<is_missing> will |
|
3430
|
|
|
|
|
|
|
always return C<undef>, regardless of C<$column_idx> being valid or not. If |
|
3431
|
|
|
|
|
|
|
this attribute is C<true> it will return either C<0> (the field is present) |
|
3432
|
|
|
|
|
|
|
or C<1> (the field is missing). |
|
3433
|
|
|
|
|
|
|
|
|
3434
|
|
|
|
|
|
|
A special case is the empty line. If the line is completely empty - after |
|
3435
|
|
|
|
|
|
|
dealing with the flags - this is still a valid CSV line: it is a record of |
|
3436
|
|
|
|
|
|
|
just one single empty field. However, if C<keep_meta_info> is set, invoking |
|
3437
|
|
|
|
|
|
|
C<is_missing> with index C<0> will now return true. |
|
3438
|
|
|
|
|
|
|
|
|
3439
|
|
|
|
|
|
|
=head2 status |
|
3440
|
|
|
|
|
|
|
X<status> |
|
3441
|
|
|
|
|
|
|
|
|
3442
|
|
|
|
|
|
|
$status = $csv->status (); |
|
3443
|
|
|
|
|
|
|
|
|
3444
|
|
|
|
|
|
|
This method returns the status of the last invoked L</combine> or L</parse> |
|
3445
|
|
|
|
|
|
|
call. Status is success (true: C<1>) or failure (false: C<undef> or C<0>). |
|
3446
|
|
|
|
|
|
|
|
|
3447
|
|
|
|
|
|
|
Note that as this only keeps track of the status of above mentioned methods, |
|
3448
|
|
|
|
|
|
|
you are probably looking for L<C<error_diag>|/error_diag> instead. |
|
3449
|
|
|
|
|
|
|
|
|
3450
|
|
|
|
|
|
|
=head2 error_input |
|
3451
|
|
|
|
|
|
|
X<error_input> |
|
3452
|
|
|
|
|
|
|
|
|
3453
|
|
|
|
|
|
|
$bad_argument = $csv->error_input (); |
|
3454
|
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
This method returns the erroneous argument (if it exists) of L</combine> or |
|
3456
|
|
|
|
|
|
|
L</parse>, whichever was called more recently. If the last invocation was |
|
3457
|
|
|
|
|
|
|
successful, C<error_input> will return C<undef>. |
|
3458
|
|
|
|
|
|
|
|
|
3459
|
|
|
|
|
|
|
Depending on the type of error, it I<might> also hold the data for the last |
|
3460
|
|
|
|
|
|
|
error-input of L</getline>. |
|
3461
|
|
|
|
|
|
|
|
|
3462
|
|
|
|
|
|
|
=head2 error_diag |
|
3463
|
|
|
|
|
|
|
X<error_diag> |
|
3464
|
|
|
|
|
|
|
|
|
3465
|
|
|
|
|
|
|
Text::CSV_XS->error_diag (); |
|
3466
|
|
|
|
|
|
|
$csv->error_diag (); |
|
3467
|
|
|
|
|
|
|
$error_code = 0 + $csv->error_diag (); |
|
3468
|
|
|
|
|
|
|
$error_str = "" . $csv->error_diag (); |
|
3469
|
|
|
|
|
|
|
($cde, $str, $pos, $rec, $fld, $xs) = $csv->error_diag (); |
|
3470
|
|
|
|
|
|
|
|
|
3471
|
|
|
|
|
|
|
If (and only if) an error occurred, this function returns the diagnostics |
|
3472
|
|
|
|
|
|
|
of that error. |
|
3473
|
|
|
|
|
|
|
|
|
3474
|
|
|
|
|
|
|
If called in void context, this will print the internal error code and the |
|
3475
|
|
|
|
|
|
|
associated error message to STDERR. |
|
3476
|
|
|
|
|
|
|
|
|
3477
|
|
|
|
|
|
|
If called in list context, this will return the error code and the error |
|
3478
|
|
|
|
|
|
|
message in that order. If the last error was from parsing, the rest of the |
|
3479
|
|
|
|
|
|
|
values returned are a best guess at the location within the line that was |
|
3480
|
|
|
|
|
|
|
being parsed. Their values are 1-based. The position currently is index of |
|
3481
|
|
|
|
|
|
|
the byte at which the parsing failed in the current record. It might change |
|
3482
|
|
|
|
|
|
|
to be the index of the current character in a later release. The records is |
|
3483
|
|
|
|
|
|
|
the index of the record parsed by the csv instance. The field number is the |
|
3484
|
|
|
|
|
|
|
index of the field the parser thinks it is currently trying to parse. See |
|
3485
|
|
|
|
|
|
|
F<examples/csv-check> for how this can be used. If C<$xs> is set, it is the |
|
3486
|
|
|
|
|
|
|
line number in XS where the error was triggered (for debugging). C<XS> will |
|
3487
|
|
|
|
|
|
|
show in void context only when L</diag_verbose> is set. |
|
3488
|
|
|
|
|
|
|
|
|
3489
|
|
|
|
|
|
|
If called in scalar context, it will return the diagnostics in a single |
|
3490
|
|
|
|
|
|
|
scalar, a-la C<$!>. It will contain the error code in numeric context, and |
|
3491
|
|
|
|
|
|
|
the diagnostics message in string context. |
|
3492
|
|
|
|
|
|
|
|
|
3493
|
|
|
|
|
|
|
When called as a class method or a direct function call, the diagnostics |
|
3494
|
|
|
|
|
|
|
are that of the last L</new> call. |
|
3495
|
|
|
|
|
|
|
|
|
3496
|
|
|
|
|
|
|
=head3 _cache_diag |
|
3497
|
|
|
|
|
|
|
|
|
3498
|
|
|
|
|
|
|
Note: This is an internal function only, and output cannot be relied upon. |
|
3499
|
|
|
|
|
|
|
Use at own risk. |
|
3500
|
|
|
|
|
|
|
|
|
3501
|
|
|
|
|
|
|
If debugging beyond what L</error_diag> is able to show, the internal cache |
|
3502
|
|
|
|
|
|
|
can be shown with this function. |
|
3503
|
|
|
|
|
|
|
|
|
3504
|
|
|
|
|
|
|
# Something failed .. |
|
3505
|
|
|
|
|
|
|
$csv->error_diag; |
|
3506
|
|
|
|
|
|
|
$csv->_cache_diag (); |
|
3507
|
|
|
|
|
|
|
|
|
3508
|
|
|
|
|
|
|
=head2 record_number |
|
3509
|
|
|
|
|
|
|
X<record_number> |
|
3510
|
|
|
|
|
|
|
|
|
3511
|
|
|
|
|
|
|
$recno = $csv->record_number (); |
|
3512
|
|
|
|
|
|
|
|
|
3513
|
|
|
|
|
|
|
Returns the records parsed by this csv instance. This value should be more |
|
3514
|
|
|
|
|
|
|
accurate than C<$.> when embedded newlines come in play. Records written by |
|
3515
|
|
|
|
|
|
|
this instance are not counted. |
|
3516
|
|
|
|
|
|
|
|
|
3517
|
|
|
|
|
|
|
=head2 SetDiag |
|
3518
|
|
|
|
|
|
|
X<SetDiag> |
|
3519
|
|
|
|
|
|
|
|
|
3520
|
|
|
|
|
|
|
$csv->SetDiag (0); |
|
3521
|
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
Use to reset the diagnostics if you are dealing with errors. |
|
3523
|
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
=head1 IMPORTS/EXPORTS |
|
3525
|
|
|
|
|
|
|
|
|
3526
|
|
|
|
|
|
|
By default none of these are exported. |
|
3527
|
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
=over 2 |
|
3529
|
|
|
|
|
|
|
|
|
3530
|
|
|
|
|
|
|
=item csv |
|
3531
|
|
|
|
|
|
|
|
|
3532
|
|
|
|
|
|
|
use Text::CSV_XS qw( csv ); |
|
3533
|
|
|
|
|
|
|
|
|
3534
|
|
|
|
|
|
|
Import the function L</csv> function. See below. |
|
3535
|
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
=item :CONSTANTS |
|
3537
|
|
|
|
|
|
|
|
|
3538
|
|
|
|
|
|
|
use Text::CSV_XS qw( :CONSTANTS ); |
|
3539
|
|
|
|
|
|
|
|
|
3540
|
|
|
|
|
|
|
Import module constants L</CSV_FLAGS_IS_QUOTED>, L</CSV_FLAGS_IS_BINARY>, |
|
3541
|
|
|
|
|
|
|
L</CSV_FLAGS_ERROR_IN_FIELD>, L</CSV_FLAGS_IS_MISSING>, L</CSV_TYPE_PV>, |
|
3542
|
|
|
|
|
|
|
L</CSV_TYPE_IV>, and L</CSV_TYPE_NV>. Each can be imported alone |
|
3543
|
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
use Text::CSV_XS qw( CSV_FLAS_IS_BINARY CSV_TYPE_NV ); |
|
3545
|
|
|
|
|
|
|
|
|
3546
|
|
|
|
|
|
|
=back |
|
3547
|
|
|
|
|
|
|
|
|
3548
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
3549
|
|
|
|
|
|
|
|
|
3550
|
|
|
|
|
|
|
=head2 csv |
|
3551
|
|
|
|
|
|
|
X<csv> |
|
3552
|
|
|
|
|
|
|
|
|
3553
|
|
|
|
|
|
|
This function is not exported by default and should be explicitly requested: |
|
3554
|
|
|
|
|
|
|
|
|
3555
|
|
|
|
|
|
|
use Text::CSV_XS qw( csv ); |
|
3556
|
|
|
|
|
|
|
|
|
3557
|
|
|
|
|
|
|
This is a high-level function that aims at simple (user) interfaces. This |
|
3558
|
|
|
|
|
|
|
can be used to read/parse a C<CSV> file or stream (the default behavior) or |
|
3559
|
|
|
|
|
|
|
to produce a file or write to a stream (define the C<out> attribute). It |
|
3560
|
|
|
|
|
|
|
returns an array- or hash-reference on parsing (or C<undef> on fail) or the |
|
3561
|
|
|
|
|
|
|
numeric value of L</error_diag> on writing. When this function fails you |
|
3562
|
|
|
|
|
|
|
can get to the error using the class call to L</error_diag> |
|
3563
|
|
|
|
|
|
|
|
|
3564
|
|
|
|
|
|
|
my $aoa = csv (in => "test.csv") or |
|
3565
|
|
|
|
|
|
|
die Text::CSV_XS->error_diag; |
|
3566
|
|
|
|
|
|
|
|
|
3567
|
|
|
|
|
|
|
Note that failure here is the inability to start the parser, like when the |
|
3568
|
|
|
|
|
|
|
input does not exist or the arguments are unknown or conflicting. Run-time |
|
3569
|
|
|
|
|
|
|
parsing errors will return a valid reference, which can be empty, but still |
|
3570
|
|
|
|
|
|
|
contains all results up till the error. See L</on_error>. |
|
3571
|
|
|
|
|
|
|
|
|
3572
|
|
|
|
|
|
|
This function takes the arguments as key-value pairs. This can be passed as |
|
3573
|
|
|
|
|
|
|
a list or as an anonymous hash: |
|
3574
|
|
|
|
|
|
|
|
|
3575
|
|
|
|
|
|
|
my $aoa = csv ( in => "test.csv", sep_char => ";"); |
|
3576
|
|
|
|
|
|
|
my $aoh = csv ({ in => $fh, headers => "auto" }); |
|
3577
|
|
|
|
|
|
|
|
|
3578
|
|
|
|
|
|
|
The arguments passed consist of two parts: the arguments to L</csv> itself |
|
3579
|
|
|
|
|
|
|
and the optional attributes to the C<CSV> object used inside the function |
|
3580
|
|
|
|
|
|
|
as enumerated and explained in L</new>. |
|
3581
|
|
|
|
|
|
|
|
|
3582
|
|
|
|
|
|
|
If not overridden, the default option used for CSV is |
|
3583
|
|
|
|
|
|
|
|
|
3584
|
|
|
|
|
|
|
auto_diag => 1 |
|
3585
|
|
|
|
|
|
|
escape_null => 0 |
|
3586
|
|
|
|
|
|
|
strict_eol => 1 |
|
3587
|
|
|
|
|
|
|
|
|
3588
|
|
|
|
|
|
|
The option that is always set and cannot be altered is |
|
3589
|
|
|
|
|
|
|
|
|
3590
|
|
|
|
|
|
|
binary => 1 |
|
3591
|
|
|
|
|
|
|
|
|
3592
|
|
|
|
|
|
|
As this function will likely be used in one-liners, it allows C<quote> to |
|
3593
|
|
|
|
|
|
|
be abbreviated as C<quo>, and C<escape_char> to be abbreviated as C<esc> |
|
3594
|
|
|
|
|
|
|
or C<escape>. |
|
3595
|
|
|
|
|
|
|
|
|
3596
|
|
|
|
|
|
|
Alternative invocations: |
|
3597
|
|
|
|
|
|
|
|
|
3598
|
|
|
|
|
|
|
my $aoa = Text::CSV_XS::csv (in => "file.csv"); |
|
3599
|
|
|
|
|
|
|
|
|
3600
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new (); |
|
3601
|
|
|
|
|
|
|
my $aoa = $csv->csv (in => "file.csv"); |
|
3602
|
|
|
|
|
|
|
|
|
3603
|
|
|
|
|
|
|
In the latter case, the object attributes are used from the existing object |
|
3604
|
|
|
|
|
|
|
and the attribute arguments in the function call are ignored: |
|
3605
|
|
|
|
|
|
|
|
|
3606
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ sep_char => ";" }); |
|
3607
|
|
|
|
|
|
|
my $aoh = $csv->csv (in => "file.csv", bom => 1); |
|
3608
|
|
|
|
|
|
|
|
|
3609
|
|
|
|
|
|
|
will parse using C<;> as C<sep_char>, not C<,>. |
|
3610
|
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
=head3 in |
|
3612
|
|
|
|
|
|
|
X<in> |
|
3613
|
|
|
|
|
|
|
|
|
3614
|
|
|
|
|
|
|
Used to specify the source. C<in> can be a file name (e.g. C<"file.csv">), |
|
3615
|
|
|
|
|
|
|
which will be opened for reading and closed when finished, a file handle |
|
3616
|
|
|
|
|
|
|
(e.g. C<$fh> or C<FH>), a reference to a glob (e.g. C<\*ARGV>), the glob |
|
3617
|
|
|
|
|
|
|
itself (e.g. C<*STDIN>), or a reference to a scalar (e.g. C<\q{1,2,"csv"}>). |
|
3618
|
|
|
|
|
|
|
|
|
3619
|
|
|
|
|
|
|
When used with L</out>, C<in> should be a reference to a CSV structure (AoA |
|
3620
|
|
|
|
|
|
|
or AoH) or a CODE-ref that returns an array-reference or a hash-reference. |
|
3621
|
|
|
|
|
|
|
The code-ref will be invoked with no arguments. |
|
3622
|
|
|
|
|
|
|
|
|
3623
|
|
|
|
|
|
|
my $aoa = csv (in => "file.csv"); |
|
3624
|
|
|
|
|
|
|
|
|
3625
|
|
|
|
|
|
|
open my $fh, "<", "file.csv"; |
|
3626
|
|
|
|
|
|
|
my $aoa = csv (in => $fh); |
|
3627
|
|
|
|
|
|
|
|
|
3628
|
|
|
|
|
|
|
my $csv = [ [qw( Foo Bar )], [ 1, 2 ], [ 2, 3 ]]; |
|
3629
|
|
|
|
|
|
|
my $err = csv (in => $csv, out => "file.csv"); |
|
3630
|
|
|
|
|
|
|
|
|
3631
|
|
|
|
|
|
|
If called in void context without the L</out> attribute, the resulting ref |
|
3632
|
|
|
|
|
|
|
will be used as input to a subsequent call to csv: |
|
3633
|
|
|
|
|
|
|
|
|
3634
|
|
|
|
|
|
|
csv (in => "file.csv", filter => { 2 => sub { length > 2 }}) |
|
3635
|
|
|
|
|
|
|
|
|
3636
|
|
|
|
|
|
|
will be a shortcut to |
|
3637
|
|
|
|
|
|
|
|
|
3638
|
|
|
|
|
|
|
csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }})) |
|
3639
|
|
|
|
|
|
|
|
|
3640
|
|
|
|
|
|
|
where, in the absence of the C<out> attribute, this is a shortcut to |
|
3641
|
|
|
|
|
|
|
|
|
3642
|
|
|
|
|
|
|
csv (in => csv (in => "file.csv", filter => { 2 => sub { length > 2 }}), |
|
3643
|
|
|
|
|
|
|
out => *STDOUT) |
|
3644
|
|
|
|
|
|
|
|
|
3645
|
|
|
|
|
|
|
=head3 out |
|
3646
|
|
|
|
|
|
|
X<out> |
|
3647
|
|
|
|
|
|
|
|
|
3648
|
|
|
|
|
|
|
csv (in => $aoa, out => "file.csv"); |
|
3649
|
|
|
|
|
|
|
csv (in => $aoa, out => $fh); |
|
3650
|
|
|
|
|
|
|
csv (in => $aoa, out => STDOUT); |
|
3651
|
|
|
|
|
|
|
csv (in => $aoa, out => *STDOUT); |
|
3652
|
|
|
|
|
|
|
csv (in => $aoa, out => \*STDOUT); |
|
3653
|
|
|
|
|
|
|
csv (in => $aoa, out => \my $data); |
|
3654
|
|
|
|
|
|
|
csv (in => $aoa, out => undef); |
|
3655
|
|
|
|
|
|
|
csv (in => $aoa, out => \"skip"); |
|
3656
|
|
|
|
|
|
|
|
|
3657
|
|
|
|
|
|
|
csv (in => $fh, out => \@aoa); |
|
3658
|
|
|
|
|
|
|
csv (in => $fh, out => \@aoh, bom => 1); |
|
3659
|
|
|
|
|
|
|
csv (in => $fh, out => \%hsh, key => "key"); |
|
3660
|
|
|
|
|
|
|
|
|
3661
|
|
|
|
|
|
|
csv (in => $file, out => $file); |
|
3662
|
|
|
|
|
|
|
csv (in => $file, out => $fh); |
|
3663
|
|
|
|
|
|
|
csv (in => $fh, out => $file); |
|
3664
|
|
|
|
|
|
|
csv (in => $fh, out => $fh); |
|
3665
|
|
|
|
|
|
|
|
|
3666
|
|
|
|
|
|
|
In output mode, the default CSV options when producing CSV are |
|
3667
|
|
|
|
|
|
|
|
|
3668
|
|
|
|
|
|
|
eol => "\r\n" |
|
3669
|
|
|
|
|
|
|
|
|
3670
|
|
|
|
|
|
|
The L</fragment> attribute is ignored in output mode. |
|
3671
|
|
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
C<out> can be a file name (e.g. C<"file.csv">), which will be opened for |
|
3673
|
|
|
|
|
|
|
writing and closed when finished, a file handle (e.g. C<$fh> or C<FH>), a |
|
3674
|
|
|
|
|
|
|
reference to a glob (e.g. C<\*STDOUT>), the glob itself (e.g. C<*STDOUT>), |
|
3675
|
|
|
|
|
|
|
or a reference to a scalar (e.g. C<\my $data>). |
|
3676
|
|
|
|
|
|
|
|
|
3677
|
|
|
|
|
|
|
csv (in => sub { $sth->fetch }, out => "dump.csv"); |
|
3678
|
|
|
|
|
|
|
csv (in => sub { $sth->fetchrow_hashref }, out => "dump.csv", |
|
3679
|
|
|
|
|
|
|
headers => $sth->{NAME_lc}); |
|
3680
|
|
|
|
|
|
|
|
|
3681
|
|
|
|
|
|
|
When a code-ref is used for C<in>, the output is generated per invocation, |
|
3682
|
|
|
|
|
|
|
so no buffering is involved. This implies that there is no size restriction |
|
3683
|
|
|
|
|
|
|
on the number of records. The C<csv> function ends when the coderef returns |
|
3684
|
|
|
|
|
|
|
a false value. |
|
3685
|
|
|
|
|
|
|
|
|
3686
|
|
|
|
|
|
|
If C<out> is set to a reference of the literal string C<"skip">, the output |
|
3687
|
|
|
|
|
|
|
will be suppressed completely, which might be useful in combination with a |
|
3688
|
|
|
|
|
|
|
filter for side effects only. |
|
3689
|
|
|
|
|
|
|
|
|
3690
|
|
|
|
|
|
|
my %cache; |
|
3691
|
|
|
|
|
|
|
csv (in => "dump.csv", |
|
3692
|
|
|
|
|
|
|
out => \"skip", |
|
3693
|
|
|
|
|
|
|
on_in => sub { $cache{$_[1][1]}++ }); |
|
3694
|
|
|
|
|
|
|
|
|
3695
|
|
|
|
|
|
|
Currently, setting C<out> to any false value (C<undef>, C<"">, 0) will be |
|
3696
|
|
|
|
|
|
|
equivalent to C<\"skip">. |
|
3697
|
|
|
|
|
|
|
|
|
3698
|
|
|
|
|
|
|
If the C<in> argument point to something to parse, and the C<out> is set to |
|
3699
|
|
|
|
|
|
|
a reference to an C<ARRAY> or a C<HASH>, the output is appended to the data |
|
3700
|
|
|
|
|
|
|
in the existing reference. The result of the parse should match what exists |
|
3701
|
|
|
|
|
|
|
in the reference passed. This might come handy when you have to parse a set |
|
3702
|
|
|
|
|
|
|
of files with similar content (like data stored per period) and you want to |
|
3703
|
|
|
|
|
|
|
collect that into a single data structure: |
|
3704
|
|
|
|
|
|
|
|
|
3705
|
|
|
|
|
|
|
my %hash; |
|
3706
|
|
|
|
|
|
|
csv (in => $_, out => \%hash, key => "id") for sort glob "foo-[0-9]*.csv"; |
|
3707
|
|
|
|
|
|
|
|
|
3708
|
|
|
|
|
|
|
my @list; # List of arrays |
|
3709
|
|
|
|
|
|
|
csv (in => $_, out => \@list) for sort glob "foo-[0-9]*.csv"; |
|
3710
|
|
|
|
|
|
|
|
|
3711
|
|
|
|
|
|
|
my @list; # List of hashes |
|
3712
|
|
|
|
|
|
|
csv (in => $_, out => \@list, bom => 1) for sort glob "foo-[0-9]*.csv"; |
|
3713
|
|
|
|
|
|
|
|
|
3714
|
|
|
|
|
|
|
=head4 Streaming |
|
3715
|
|
|
|
|
|
|
X<streaming> |
|
3716
|
|
|
|
|
|
|
|
|
3717
|
|
|
|
|
|
|
If B<both> C<in> and C<out> are files, file handles or globs, streaming is |
|
3718
|
|
|
|
|
|
|
enforced by injecting an C<after_parse> callback that immediately uses the |
|
3719
|
|
|
|
|
|
|
L<C<say ()>|/say> method of the same instance to output the result and then |
|
3720
|
|
|
|
|
|
|
rejects the record. |
|
3721
|
|
|
|
|
|
|
|
|
3722
|
|
|
|
|
|
|
If a C<after_parse> was already passed as attribute, that will be included |
|
3723
|
|
|
|
|
|
|
in the injected call. If C<on_in> was passed and C<after_parse> was not, it |
|
3724
|
|
|
|
|
|
|
will be used instead. If both were passed, C<on_in> is ignored. |
|
3725
|
|
|
|
|
|
|
|
|
3726
|
|
|
|
|
|
|
The EOL of the first record of the C<in> source is consistently used as EOL |
|
3727
|
|
|
|
|
|
|
for all records in the C<out> destination. |
|
3728
|
|
|
|
|
|
|
|
|
3729
|
|
|
|
|
|
|
The C<filter> attribute is not available. |
|
3730
|
|
|
|
|
|
|
|
|
3731
|
|
|
|
|
|
|
All other attributes are shared for C<in> and C<out>, so you cannot define |
|
3732
|
|
|
|
|
|
|
different encodings for C<in> and C<out>. You need to pass a C<$fh>, where |
|
3733
|
|
|
|
|
|
|
C<binmode> was used to apply the encoding layers. |
|
3734
|
|
|
|
|
|
|
|
|
3735
|
|
|
|
|
|
|
Note that this is work in progress and things might change. |
|
3736
|
|
|
|
|
|
|
|
|
3737
|
|
|
|
|
|
|
=head3 encoding |
|
3738
|
|
|
|
|
|
|
X<encoding> |
|
3739
|
|
|
|
|
|
|
|
|
3740
|
|
|
|
|
|
|
If passed, it should be an encoding accepted by the C<:encoding()> option |
|
3741
|
|
|
|
|
|
|
to C<open>. There is no default value. This attribute does not work in perl |
|
3742
|
|
|
|
|
|
|
5.6.x. C<encoding> can be abbreviated to C<enc> for ease of use in command |
|
3743
|
|
|
|
|
|
|
line invocations. |
|
3744
|
|
|
|
|
|
|
|
|
3745
|
|
|
|
|
|
|
If C<encoding> is set to the literal value C<"auto">, the method L</header> |
|
3746
|
|
|
|
|
|
|
will be invoked on the opened stream to check if there is a BOM and set the |
|
3747
|
|
|
|
|
|
|
encoding accordingly. This is equal to passing a true value in the option |
|
3748
|
|
|
|
|
|
|
L<C<detect_bom>|/detect_bom>. |
|
3749
|
|
|
|
|
|
|
|
|
3750
|
|
|
|
|
|
|
Encodings can be stacked, as supported by C<binmode>: |
|
3751
|
|
|
|
|
|
|
|
|
3752
|
|
|
|
|
|
|
# Using PerlIO::via::gzip |
|
3753
|
|
|
|
|
|
|
csv (in => \@csv, |
|
3754
|
|
|
|
|
|
|
out => "test.csv:via.gz", |
|
3755
|
|
|
|
|
|
|
encoding => ":via(gzip):encoding(utf-8)", |
|
3756
|
|
|
|
|
|
|
); |
|
3757
|
|
|
|
|
|
|
$aoa = csv (in => "test.csv:via.gz", encoding => ":via(gzip)"); |
|
3758
|
|
|
|
|
|
|
|
|
3759
|
|
|
|
|
|
|
# Using PerlIO::gzip |
|
3760
|
|
|
|
|
|
|
csv (in => \@csv, |
|
3761
|
|
|
|
|
|
|
out => "test.csv:via.gz", |
|
3762
|
|
|
|
|
|
|
encoding => ":gzip:encoding(utf-8)", |
|
3763
|
|
|
|
|
|
|
); |
|
3764
|
|
|
|
|
|
|
$aoa = csv (in => "test.csv:gzip.gz", encoding => ":gzip"); |
|
3765
|
|
|
|
|
|
|
|
|
3766
|
|
|
|
|
|
|
=head3 detect_bom |
|
3767
|
|
|
|
|
|
|
X<detect_bom> |
|
3768
|
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
If C<detect_bom> is given, the method L</header> will be invoked on the |
|
3770
|
|
|
|
|
|
|
opened stream to check if there is a BOM and set the encoding accordingly. |
|
3771
|
|
|
|
|
|
|
Note that the attribute L<C<headers>|/headers> can be used to overrule the |
|
3772
|
|
|
|
|
|
|
default behavior of how that method automatically sets the attribute. |
|
3773
|
|
|
|
|
|
|
|
|
3774
|
|
|
|
|
|
|
C<detect_bom> can be abbreviated to C<bom>. |
|
3775
|
|
|
|
|
|
|
|
|
3776
|
|
|
|
|
|
|
This is the same as setting L<C<encoding>|/encoding> to C<"auto">. |
|
3777
|
|
|
|
|
|
|
|
|
3778
|
|
|
|
|
|
|
=head3 headers |
|
3779
|
|
|
|
|
|
|
X<headers> |
|
3780
|
|
|
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
If this attribute is not given, the default behavior is to produce an array |
|
3782
|
|
|
|
|
|
|
of arrays. |
|
3783
|
|
|
|
|
|
|
|
|
3784
|
|
|
|
|
|
|
If C<headers> is supplied, it should be an anonymous list of column names, |
|
3785
|
|
|
|
|
|
|
an anonymous hashref, a coderef, or a literal flag: C<auto>, C<lc>, C<uc>, |
|
3786
|
|
|
|
|
|
|
or C<skip>. |
|
3787
|
|
|
|
|
|
|
|
|
3788
|
|
|
|
|
|
|
=over 2 |
|
3789
|
|
|
|
|
|
|
|
|
3790
|
|
|
|
|
|
|
=item skip |
|
3791
|
|
|
|
|
|
|
X<skip> |
|
3792
|
|
|
|
|
|
|
|
|
3793
|
|
|
|
|
|
|
When C<skip> is used, the header will not be included in the output. |
|
3794
|
|
|
|
|
|
|
|
|
3795
|
|
|
|
|
|
|
my $aoa = csv (in => $fh, headers => "skip"); |
|
3796
|
|
|
|
|
|
|
|
|
3797
|
|
|
|
|
|
|
C<skip> is invalid/ignored in combinations with L<C<detect_bom>|/detect_bom>. |
|
3798
|
|
|
|
|
|
|
|
|
3799
|
|
|
|
|
|
|
=item auto |
|
3800
|
|
|
|
|
|
|
X<auto> |
|
3801
|
|
|
|
|
|
|
|
|
3802
|
|
|
|
|
|
|
If C<auto> is used, the first line of the C<CSV> source will be read as the |
|
3803
|
|
|
|
|
|
|
list of field headers and used to produce an array of hashes. |
|
3804
|
|
|
|
|
|
|
|
|
3805
|
|
|
|
|
|
|
my $aoh = csv (in => $fh, headers => "auto"); |
|
3806
|
|
|
|
|
|
|
|
|
3807
|
|
|
|
|
|
|
=item lc |
|
3808
|
|
|
|
|
|
|
X<lc> |
|
3809
|
|
|
|
|
|
|
|
|
3810
|
|
|
|
|
|
|
If C<lc> is used, the first line of the C<CSV> source will be read as the |
|
3811
|
|
|
|
|
|
|
list of field headers mapped to lower case and used to produce an array of |
|
3812
|
|
|
|
|
|
|
hashes. This is a variation of C<auto>. |
|
3813
|
|
|
|
|
|
|
|
|
3814
|
|
|
|
|
|
|
my $aoh = csv (in => $fh, headers => "lc"); |
|
3815
|
|
|
|
|
|
|
|
|
3816
|
|
|
|
|
|
|
=item uc |
|
3817
|
|
|
|
|
|
|
X<uc> |
|
3818
|
|
|
|
|
|
|
|
|
3819
|
|
|
|
|
|
|
If C<uc> is used, the first line of the C<CSV> source will be read as the |
|
3820
|
|
|
|
|
|
|
list of field headers mapped to upper case and used to produce an array of |
|
3821
|
|
|
|
|
|
|
hashes. This is a variation of C<auto>. |
|
3822
|
|
|
|
|
|
|
|
|
3823
|
|
|
|
|
|
|
my $aoh = csv (in => $fh, headers => "uc"); |
|
3824
|
|
|
|
|
|
|
|
|
3825
|
|
|
|
|
|
|
=item CODE |
|
3826
|
|
|
|
|
|
|
X<CODE> |
|
3827
|
|
|
|
|
|
|
|
|
3828
|
|
|
|
|
|
|
If a coderef is used, the first line of the C<CSV> source will be read as |
|
3829
|
|
|
|
|
|
|
the list of mangled field headers in which each field is passed as the only |
|
3830
|
|
|
|
|
|
|
argument to the coderef. This list is used to produce an array of hashes. |
|
3831
|
|
|
|
|
|
|
|
|
3832
|
|
|
|
|
|
|
my $aoh = csv (in => $fh, |
|
3833
|
|
|
|
|
|
|
headers => sub { lc ($_[0]) =~ s/kode/code/gr }); |
|
3834
|
|
|
|
|
|
|
|
|
3835
|
|
|
|
|
|
|
this example is a variation of using C<lc> where all occurrences of C<kode> |
|
3836
|
|
|
|
|
|
|
are replaced with C<code>. |
|
3837
|
|
|
|
|
|
|
|
|
3838
|
|
|
|
|
|
|
=item ARRAY |
|
3839
|
|
|
|
|
|
|
X<ARRAY> |
|
3840
|
|
|
|
|
|
|
|
|
3841
|
|
|
|
|
|
|
If C<headers> is an anonymous list, the entries in the list will be used |
|
3842
|
|
|
|
|
|
|
as field names. The first line is considered data instead of headers. |
|
3843
|
|
|
|
|
|
|
|
|
3844
|
|
|
|
|
|
|
my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]); |
|
3845
|
|
|
|
|
|
|
csv (in => $aoa, out => $fh, headers => [qw( code description price )]); |
|
3846
|
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
=item HASH |
|
3848
|
|
|
|
|
|
|
X<HASH> |
|
3849
|
|
|
|
|
|
|
|
|
3850
|
|
|
|
|
|
|
If C<headers> is a hash reference, this implies C<auto>, but header fields |
|
3851
|
|
|
|
|
|
|
that exist as key in the hashref will be replaced by the value for that |
|
3852
|
|
|
|
|
|
|
key. Given a CSV file like |
|
3853
|
|
|
|
|
|
|
|
|
3854
|
|
|
|
|
|
|
post-kode,city,name,id number,fubble |
|
3855
|
|
|
|
|
|
|
1234AA,Duckstad,Donald,13,"X313DF" |
|
3856
|
|
|
|
|
|
|
|
|
3857
|
|
|
|
|
|
|
using |
|
3858
|
|
|
|
|
|
|
|
|
3859
|
|
|
|
|
|
|
csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ... |
|
3860
|
|
|
|
|
|
|
|
|
3861
|
|
|
|
|
|
|
will return an entry like |
|
3862
|
|
|
|
|
|
|
|
|
3863
|
|
|
|
|
|
|
{ pc => "1234AA", |
|
3864
|
|
|
|
|
|
|
city => "Duckstad", |
|
3865
|
|
|
|
|
|
|
name => "Donald", |
|
3866
|
|
|
|
|
|
|
ID => "13", |
|
3867
|
|
|
|
|
|
|
fubble => "X313DF", |
|
3868
|
|
|
|
|
|
|
} |
|
3869
|
|
|
|
|
|
|
|
|
3870
|
|
|
|
|
|
|
=back |
|
3871
|
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
See also L<C<munge_column_names>|/munge_column_names> and |
|
3873
|
|
|
|
|
|
|
L<C<set_column_names>|/set_column_names>. |
|
3874
|
|
|
|
|
|
|
|
|
3875
|
|
|
|
|
|
|
=head3 munge_column_names |
|
3876
|
|
|
|
|
|
|
X<munge_column_names> |
|
3877
|
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
If C<munge_column_names> is set, the method L</header> is invoked on the |
|
3879
|
|
|
|
|
|
|
opened stream with all matching arguments to detect and set the headers. |
|
3880
|
|
|
|
|
|
|
|
|
3881
|
|
|
|
|
|
|
C<munge_column_names> can be abbreviated to C<munge>. |
|
3882
|
|
|
|
|
|
|
|
|
3883
|
|
|
|
|
|
|
=head3 key |
|
3884
|
|
|
|
|
|
|
X<key> |
|
3885
|
|
|
|
|
|
|
|
|
3886
|
|
|
|
|
|
|
If passed, will default L<C<headers>|/headers> to C<"auto"> and return a |
|
3887
|
|
|
|
|
|
|
hashref instead of an array of hashes. Allowed values are simple scalars or |
|
3888
|
|
|
|
|
|
|
array-references where the first element is the joiner and the rest are the |
|
3889
|
|
|
|
|
|
|
fields to join to combine the key. |
|
3890
|
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
my $ref = csv (in => "test.csv", key => "code"); |
|
3892
|
|
|
|
|
|
|
my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]); |
|
3893
|
|
|
|
|
|
|
|
|
3894
|
|
|
|
|
|
|
with test.csv like |
|
3895
|
|
|
|
|
|
|
|
|
3896
|
|
|
|
|
|
|
code,product,price,color |
|
3897
|
|
|
|
|
|
|
1,pc,850,gray |
|
3898
|
|
|
|
|
|
|
2,keyboard,12,white |
|
3899
|
|
|
|
|
|
|
3,mouse,5,black |
|
3900
|
|
|
|
|
|
|
|
|
3901
|
|
|
|
|
|
|
the first example will return |
|
3902
|
|
|
|
|
|
|
|
|
3903
|
|
|
|
|
|
|
{ 1 => { |
|
3904
|
|
|
|
|
|
|
code => 1, |
|
3905
|
|
|
|
|
|
|
color => 'gray', |
|
3906
|
|
|
|
|
|
|
price => 850, |
|
3907
|
|
|
|
|
|
|
product => 'pc' |
|
3908
|
|
|
|
|
|
|
}, |
|
3909
|
|
|
|
|
|
|
2 => { |
|
3910
|
|
|
|
|
|
|
code => 2, |
|
3911
|
|
|
|
|
|
|
color => 'white', |
|
3912
|
|
|
|
|
|
|
price => 12, |
|
3913
|
|
|
|
|
|
|
product => 'keyboard' |
|
3914
|
|
|
|
|
|
|
}, |
|
3915
|
|
|
|
|
|
|
3 => { |
|
3916
|
|
|
|
|
|
|
code => 3, |
|
3917
|
|
|
|
|
|
|
color => 'black', |
|
3918
|
|
|
|
|
|
|
price => 5, |
|
3919
|
|
|
|
|
|
|
product => 'mouse' |
|
3920
|
|
|
|
|
|
|
} |
|
3921
|
|
|
|
|
|
|
} |
|
3922
|
|
|
|
|
|
|
|
|
3923
|
|
|
|
|
|
|
the second example will return |
|
3924
|
|
|
|
|
|
|
|
|
3925
|
|
|
|
|
|
|
{ "1:gray" => { |
|
3926
|
|
|
|
|
|
|
code => 1, |
|
3927
|
|
|
|
|
|
|
color => 'gray', |
|
3928
|
|
|
|
|
|
|
price => 850, |
|
3929
|
|
|
|
|
|
|
product => 'pc' |
|
3930
|
|
|
|
|
|
|
}, |
|
3931
|
|
|
|
|
|
|
"2:white" => { |
|
3932
|
|
|
|
|
|
|
code => 2, |
|
3933
|
|
|
|
|
|
|
color => 'white', |
|
3934
|
|
|
|
|
|
|
price => 12, |
|
3935
|
|
|
|
|
|
|
product => 'keyboard' |
|
3936
|
|
|
|
|
|
|
}, |
|
3937
|
|
|
|
|
|
|
"3:black" => { |
|
3938
|
|
|
|
|
|
|
code => 3, |
|
3939
|
|
|
|
|
|
|
color => 'black', |
|
3940
|
|
|
|
|
|
|
price => 5, |
|
3941
|
|
|
|
|
|
|
product => 'mouse' |
|
3942
|
|
|
|
|
|
|
} |
|
3943
|
|
|
|
|
|
|
} |
|
3944
|
|
|
|
|
|
|
|
|
3945
|
|
|
|
|
|
|
The C<key> attribute can be combined with L<C<headers>|/headers> for C<CSV> |
|
3946
|
|
|
|
|
|
|
date that has no header line, like |
|
3947
|
|
|
|
|
|
|
|
|
3948
|
|
|
|
|
|
|
my $ref = csv ( |
|
3949
|
|
|
|
|
|
|
in => "foo.csv", |
|
3950
|
|
|
|
|
|
|
headers => [qw( c_foo foo bar description stock )], |
|
3951
|
|
|
|
|
|
|
key => "c_foo", |
|
3952
|
|
|
|
|
|
|
); |
|
3953
|
|
|
|
|
|
|
|
|
3954
|
|
|
|
|
|
|
=head3 value |
|
3955
|
|
|
|
|
|
|
X<value> |
|
3956
|
|
|
|
|
|
|
|
|
3957
|
|
|
|
|
|
|
Used to create key-value hashes. |
|
3958
|
|
|
|
|
|
|
|
|
3959
|
|
|
|
|
|
|
Only allowed when C<key> is valid. A C<value> can be either a single column |
|
3960
|
|
|
|
|
|
|
label or an anonymous list of column labels. In the first case, the value |
|
3961
|
|
|
|
|
|
|
will be a simple scalar value, in the latter case, it will be a hashref. |
|
3962
|
|
|
|
|
|
|
|
|
3963
|
|
|
|
|
|
|
my $ref = csv (in => "test.csv", key => "code", |
|
3964
|
|
|
|
|
|
|
value => "price"); |
|
3965
|
|
|
|
|
|
|
my $ref = csv (in => "test.csv", key => "code", |
|
3966
|
|
|
|
|
|
|
value => [ "product", "price" ]); |
|
3967
|
|
|
|
|
|
|
my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ], |
|
3968
|
|
|
|
|
|
|
value => "price"); |
|
3969
|
|
|
|
|
|
|
my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ], |
|
3970
|
|
|
|
|
|
|
value => [ "product", "price" ]); |
|
3971
|
|
|
|
|
|
|
|
|
3972
|
|
|
|
|
|
|
with test.csv like |
|
3973
|
|
|
|
|
|
|
|
|
3974
|
|
|
|
|
|
|
code,product,price,color |
|
3975
|
|
|
|
|
|
|
1,pc,850,gray |
|
3976
|
|
|
|
|
|
|
2,keyboard,12,white |
|
3977
|
|
|
|
|
|
|
3,mouse,5,black |
|
3978
|
|
|
|
|
|
|
|
|
3979
|
|
|
|
|
|
|
the first example will return |
|
3980
|
|
|
|
|
|
|
|
|
3981
|
|
|
|
|
|
|
{ 1 => 850, |
|
3982
|
|
|
|
|
|
|
2 => 12, |
|
3983
|
|
|
|
|
|
|
3 => 5, |
|
3984
|
|
|
|
|
|
|
} |
|
3985
|
|
|
|
|
|
|
|
|
3986
|
|
|
|
|
|
|
the second example will return |
|
3987
|
|
|
|
|
|
|
|
|
3988
|
|
|
|
|
|
|
{ 1 => { |
|
3989
|
|
|
|
|
|
|
price => 850, |
|
3990
|
|
|
|
|
|
|
product => 'pc' |
|
3991
|
|
|
|
|
|
|
}, |
|
3992
|
|
|
|
|
|
|
2 => { |
|
3993
|
|
|
|
|
|
|
price => 12, |
|
3994
|
|
|
|
|
|
|
product => 'keyboard' |
|
3995
|
|
|
|
|
|
|
}, |
|
3996
|
|
|
|
|
|
|
3 => { |
|
3997
|
|
|
|
|
|
|
price => 5, |
|
3998
|
|
|
|
|
|
|
product => 'mouse' |
|
3999
|
|
|
|
|
|
|
} |
|
4000
|
|
|
|
|
|
|
} |
|
4001
|
|
|
|
|
|
|
|
|
4002
|
|
|
|
|
|
|
the third example will return |
|
4003
|
|
|
|
|
|
|
|
|
4004
|
|
|
|
|
|
|
{ "1:gray" => 850, |
|
4005
|
|
|
|
|
|
|
"2:white" => 12, |
|
4006
|
|
|
|
|
|
|
"3:black" => 5, |
|
4007
|
|
|
|
|
|
|
} |
|
4008
|
|
|
|
|
|
|
|
|
4009
|
|
|
|
|
|
|
the fourth example will return |
|
4010
|
|
|
|
|
|
|
|
|
4011
|
|
|
|
|
|
|
{ "1:gray" => { |
|
4012
|
|
|
|
|
|
|
price => 850, |
|
4013
|
|
|
|
|
|
|
product => 'pc' |
|
4014
|
|
|
|
|
|
|
}, |
|
4015
|
|
|
|
|
|
|
"2:white" => { |
|
4016
|
|
|
|
|
|
|
price => 12, |
|
4017
|
|
|
|
|
|
|
product => 'keyboard' |
|
4018
|
|
|
|
|
|
|
}, |
|
4019
|
|
|
|
|
|
|
"3:black" => { |
|
4020
|
|
|
|
|
|
|
price => 5, |
|
4021
|
|
|
|
|
|
|
product => 'mouse' |
|
4022
|
|
|
|
|
|
|
} |
|
4023
|
|
|
|
|
|
|
} |
|
4024
|
|
|
|
|
|
|
|
|
4025
|
|
|
|
|
|
|
=head3 keep_headers |
|
4026
|
|
|
|
|
|
|
X<keep_headers> |
|
4027
|
|
|
|
|
|
|
X<keep_column_names> |
|
4028
|
|
|
|
|
|
|
X<kh> |
|
4029
|
|
|
|
|
|
|
|
|
4030
|
|
|
|
|
|
|
When using hashes, keep the column names into the arrayref passed, so all |
|
4031
|
|
|
|
|
|
|
headers are available after the call in the original order. |
|
4032
|
|
|
|
|
|
|
|
|
4033
|
|
|
|
|
|
|
my $aoh = csv (in => "file.csv", keep_headers => \my @hdr); |
|
4034
|
|
|
|
|
|
|
|
|
4035
|
|
|
|
|
|
|
This attribute can be abbreviated to C<kh> or passed as C<keep_column_names>. |
|
4036
|
|
|
|
|
|
|
|
|
4037
|
|
|
|
|
|
|
This attribute implies a default of C<auto> for the C<headers> attribute. |
|
4038
|
|
|
|
|
|
|
|
|
4039
|
|
|
|
|
|
|
X<stable header order> |
|
4040
|
|
|
|
|
|
|
X<internal headers> |
|
4041
|
|
|
|
|
|
|
The headers can also be kept internally to keep stable header order: |
|
4042
|
|
|
|
|
|
|
|
|
4043
|
|
|
|
|
|
|
csv (in => csv (in => "file.csv", kh => "internal"), |
|
4044
|
|
|
|
|
|
|
out => "new.csv", |
|
4045
|
|
|
|
|
|
|
kh => "internal"); |
|
4046
|
|
|
|
|
|
|
|
|
4047
|
|
|
|
|
|
|
where C<internal> can also be C<1>, C<yes>, or C<true>. This is similar to |
|
4048
|
|
|
|
|
|
|
|
|
4049
|
|
|
|
|
|
|
my @h; |
|
4050
|
|
|
|
|
|
|
csv (in => csv (in => "file.csv", kh => \@h), |
|
4051
|
|
|
|
|
|
|
out => "new.csv", |
|
4052
|
|
|
|
|
|
|
headers => \@h); |
|
4053
|
|
|
|
|
|
|
|
|
4054
|
|
|
|
|
|
|
=head3 fragment |
|
4055
|
|
|
|
|
|
|
X<fragment> |
|
4056
|
|
|
|
|
|
|
|
|
4057
|
|
|
|
|
|
|
Only output the fragment as defined in the L</fragment> method. This option |
|
4058
|
|
|
|
|
|
|
is ignored when I<generating> C<CSV>. See L</out>. |
|
4059
|
|
|
|
|
|
|
|
|
4060
|
|
|
|
|
|
|
Combining all of them could give something like |
|
4061
|
|
|
|
|
|
|
|
|
4062
|
|
|
|
|
|
|
use Text::CSV_XS qw( csv ); |
|
4063
|
|
|
|
|
|
|
my $aoh = csv ( |
|
4064
|
|
|
|
|
|
|
in => "test.txt", |
|
4065
|
|
|
|
|
|
|
encoding => "utf-8", |
|
4066
|
|
|
|
|
|
|
headers => "auto", |
|
4067
|
|
|
|
|
|
|
sep_char => "|", |
|
4068
|
|
|
|
|
|
|
fragment => "row=3;6-9;15-*", |
|
4069
|
|
|
|
|
|
|
); |
|
4070
|
|
|
|
|
|
|
say $aoh->[15]{Foo}; |
|
4071
|
|
|
|
|
|
|
|
|
4072
|
|
|
|
|
|
|
=head3 sep_set |
|
4073
|
|
|
|
|
|
|
X<sep_set> |
|
4074
|
|
|
|
|
|
|
X<seps> |
|
4075
|
|
|
|
|
|
|
|
|
4076
|
|
|
|
|
|
|
If C<sep_set> is set, the method L</header> is invoked on the opened stream |
|
4077
|
|
|
|
|
|
|
to detect and set L<C<sep_char>|/sep_char> with the given set. |
|
4078
|
|
|
|
|
|
|
|
|
4079
|
|
|
|
|
|
|
C<sep_set> can be abbreviated to C<seps>. If neither C<sep_set> not C<seps> |
|
4080
|
|
|
|
|
|
|
is given, but C<sep> is defined, C<sep_set> defaults to C<[ sep ]>. This is |
|
4081
|
|
|
|
|
|
|
only supported for perl version 5.10 and up. |
|
4082
|
|
|
|
|
|
|
|
|
4083
|
|
|
|
|
|
|
Note that as the L</header> method is invoked, its default is to also set |
|
4084
|
|
|
|
|
|
|
the headers. |
|
4085
|
|
|
|
|
|
|
|
|
4086
|
|
|
|
|
|
|
=head3 set_column_names |
|
4087
|
|
|
|
|
|
|
X<set_column_names> |
|
4088
|
|
|
|
|
|
|
|
|
4089
|
|
|
|
|
|
|
If C<set_column_names> is passed, the method L</header> is invoked on the |
|
4090
|
|
|
|
|
|
|
opened stream with all arguments meant for L</header>. |
|
4091
|
|
|
|
|
|
|
|
|
4092
|
|
|
|
|
|
|
If C<set_column_names> is passed as a false value, the content of the first |
|
4093
|
|
|
|
|
|
|
row is only preserved if the output is AoA: |
|
4094
|
|
|
|
|
|
|
|
|
4095
|
|
|
|
|
|
|
With an input-file like |
|
4096
|
|
|
|
|
|
|
|
|
4097
|
|
|
|
|
|
|
bAr,foo |
|
4098
|
|
|
|
|
|
|
1,2 |
|
4099
|
|
|
|
|
|
|
3,4,5 |
|
4100
|
|
|
|
|
|
|
|
|
4101
|
|
|
|
|
|
|
This call |
|
4102
|
|
|
|
|
|
|
|
|
4103
|
|
|
|
|
|
|
my $aoa = csv (in => $file, set_column_names => 0); |
|
4104
|
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
will result in |
|
4106
|
|
|
|
|
|
|
|
|
4107
|
|
|
|
|
|
|
[[ "bar", "foo" ], |
|
4108
|
|
|
|
|
|
|
[ "1", "2" ], |
|
4109
|
|
|
|
|
|
|
[ "3", "4", "5" ]] |
|
4110
|
|
|
|
|
|
|
|
|
4111
|
|
|
|
|
|
|
and |
|
4112
|
|
|
|
|
|
|
|
|
4113
|
|
|
|
|
|
|
my $aoa = csv (in => $file, set_column_names => 0, munge => "none"); |
|
4114
|
|
|
|
|
|
|
|
|
4115
|
|
|
|
|
|
|
will result in |
|
4116
|
|
|
|
|
|
|
|
|
4117
|
|
|
|
|
|
|
[[ "bAr", "foo" ], |
|
4118
|
|
|
|
|
|
|
[ "1", "2" ], |
|
4119
|
|
|
|
|
|
|
[ "3", "4", "5" ]] |
|
4120
|
|
|
|
|
|
|
|
|
4121
|
|
|
|
|
|
|
=head3 csv |
|
4122
|
|
|
|
|
|
|
X<csv> |
|
4123
|
|
|
|
|
|
|
|
|
4124
|
|
|
|
|
|
|
The I<function> L</csv> can also be called as a method or with an existing |
|
4125
|
|
|
|
|
|
|
Text::CSV_XS object. This could help if the function is to be invoked a lot |
|
4126
|
|
|
|
|
|
|
of times and the overhead of creating the object internally over and over |
|
4127
|
|
|
|
|
|
|
again would be prevented by passing an existing instance. |
|
4128
|
|
|
|
|
|
|
|
|
4129
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); |
|
4130
|
|
|
|
|
|
|
|
|
4131
|
|
|
|
|
|
|
my $aoa = $csv->csv (in => $fh); |
|
4132
|
|
|
|
|
|
|
my $aoa = csv (in => $fh, csv => $csv); |
|
4133
|
|
|
|
|
|
|
|
|
4134
|
|
|
|
|
|
|
both act the same. Running this 20000 times on a 20 lines CSV file, showed |
|
4135
|
|
|
|
|
|
|
a 53% speedup. |
|
4136
|
|
|
|
|
|
|
|
|
4137
|
|
|
|
|
|
|
=head2 Callbacks |
|
4138
|
|
|
|
|
|
|
X<Callbacks> |
|
4139
|
|
|
|
|
|
|
|
|
4140
|
|
|
|
|
|
|
Callbacks enable actions triggered from the I<inside> of Text::CSV_XS. |
|
4141
|
|
|
|
|
|
|
|
|
4142
|
|
|
|
|
|
|
While most of what this enables can easily be done in an unrolled loop as |
|
4143
|
|
|
|
|
|
|
described in the L</SYNOPSIS> callbacks can be used to meet special demands |
|
4144
|
|
|
|
|
|
|
or enhance the L</csv> function. |
|
4145
|
|
|
|
|
|
|
|
|
4146
|
|
|
|
|
|
|
=over 2 |
|
4147
|
|
|
|
|
|
|
|
|
4148
|
|
|
|
|
|
|
=item error |
|
4149
|
|
|
|
|
|
|
X<error> |
|
4150
|
|
|
|
|
|
|
|
|
4151
|
|
|
|
|
|
|
$csv->callbacks (error => sub { $csv->SetDiag (0) }); |
|
4152
|
|
|
|
|
|
|
|
|
4153
|
|
|
|
|
|
|
the C<error> callback is invoked when an error occurs, but I<only> when |
|
4154
|
|
|
|
|
|
|
L</auto_diag> is set to a true value. A callback is invoked with the values |
|
4155
|
|
|
|
|
|
|
returned by L</error_diag>: |
|
4156
|
|
|
|
|
|
|
|
|
4157
|
|
|
|
|
|
|
my ($c, $s); |
|
4158
|
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
sub ignore3006 { |
|
4160
|
|
|
|
|
|
|
my ($err, $msg, $pos, $recno, $fldno) = @_; |
|
4161
|
|
|
|
|
|
|
if ($err == 3006) { |
|
4162
|
|
|
|
|
|
|
# ignore this error |
|
4163
|
|
|
|
|
|
|
($c, $s) = (undef, undef); |
|
4164
|
|
|
|
|
|
|
Text::CSV_XS->SetDiag (0); |
|
4165
|
|
|
|
|
|
|
} |
|
4166
|
|
|
|
|
|
|
# Any other error |
|
4167
|
|
|
|
|
|
|
return; |
|
4168
|
|
|
|
|
|
|
} # ignore3006 |
|
4169
|
|
|
|
|
|
|
|
|
4170
|
|
|
|
|
|
|
$csv->callbacks (error => \&ignore3006); |
|
4171
|
|
|
|
|
|
|
$csv->bind_columns (\$c, \$s); |
|
4172
|
|
|
|
|
|
|
while ($csv->getline ($fh)) { |
|
4173
|
|
|
|
|
|
|
# Error 3006 will not stop the loop |
|
4174
|
|
|
|
|
|
|
} |
|
4175
|
|
|
|
|
|
|
|
|
4176
|
|
|
|
|
|
|
=item after_parse |
|
4177
|
|
|
|
|
|
|
X<after_parse> |
|
4178
|
|
|
|
|
|
|
|
|
4179
|
|
|
|
|
|
|
$csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" }); |
|
4180
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
4181
|
|
|
|
|
|
|
$row->[-1] eq "NEW"; |
|
4182
|
|
|
|
|
|
|
} |
|
4183
|
|
|
|
|
|
|
|
|
4184
|
|
|
|
|
|
|
This callback is invoked after parsing with L</getline> only if no error |
|
4185
|
|
|
|
|
|
|
occurred. The callback is invoked with two arguments: the current C<CSV> |
|
4186
|
|
|
|
|
|
|
parser object and an array reference to the fields parsed. |
|
4187
|
|
|
|
|
|
|
|
|
4188
|
|
|
|
|
|
|
The return code of the callback is ignored unless it is a reference to the |
|
4189
|
|
|
|
|
|
|
string "skip", in which case the record will be skipped in L</getline_all>. |
|
4190
|
|
|
|
|
|
|
|
|
4191
|
|
|
|
|
|
|
sub add_from_db { |
|
4192
|
|
|
|
|
|
|
my ($csv, $row) = @_; |
|
4193
|
|
|
|
|
|
|
$sth->execute ($row->[4]); |
|
4194
|
|
|
|
|
|
|
push @$row, $sth->fetchrow_array; |
|
4195
|
|
|
|
|
|
|
} # add_from_db |
|
4196
|
|
|
|
|
|
|
|
|
4197
|
|
|
|
|
|
|
my $aoa = csv (in => "file.csv", callbacks => { |
|
4198
|
|
|
|
|
|
|
after_parse => \&add_from_db }); |
|
4199
|
|
|
|
|
|
|
|
|
4200
|
|
|
|
|
|
|
This hook can be used for validation: |
|
4201
|
|
|
|
|
|
|
X<data_validation> |
|
4202
|
|
|
|
|
|
|
|
|
4203
|
|
|
|
|
|
|
=over 2 |
|
4204
|
|
|
|
|
|
|
|
|
4205
|
|
|
|
|
|
|
=item FAIL |
|
4206
|
|
|
|
|
|
|
|
|
4207
|
|
|
|
|
|
|
Die if any of the records does not validate a rule: |
|
4208
|
|
|
|
|
|
|
|
|
4209
|
|
|
|
|
|
|
after_parse => sub { |
|
4210
|
|
|
|
|
|
|
$_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or |
|
4211
|
|
|
|
|
|
|
die "5th field does not have a valid Dutch zipcode"; |
|
4212
|
|
|
|
|
|
|
} |
|
4213
|
|
|
|
|
|
|
|
|
4214
|
|
|
|
|
|
|
=item DEFAULT |
|
4215
|
|
|
|
|
|
|
|
|
4216
|
|
|
|
|
|
|
Replace invalid fields with a default value: |
|
4217
|
|
|
|
|
|
|
|
|
4218
|
|
|
|
|
|
|
after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 } |
|
4219
|
|
|
|
|
|
|
|
|
4220
|
|
|
|
|
|
|
=item SKIP |
|
4221
|
|
|
|
|
|
|
|
|
4222
|
|
|
|
|
|
|
Skip records that have invalid fields (only applies to L</getline_all>): |
|
4223
|
|
|
|
|
|
|
|
|
4224
|
|
|
|
|
|
|
after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; } |
|
4225
|
|
|
|
|
|
|
|
|
4226
|
|
|
|
|
|
|
=back |
|
4227
|
|
|
|
|
|
|
|
|
4228
|
|
|
|
|
|
|
=item before_print |
|
4229
|
|
|
|
|
|
|
X<before_print> |
|
4230
|
|
|
|
|
|
|
|
|
4231
|
|
|
|
|
|
|
my $idx = 1; |
|
4232
|
|
|
|
|
|
|
$csv->callbacks (before_print => sub { $_[1][0] = $idx++ }); |
|
4233
|
|
|
|
|
|
|
$csv->print (*STDOUT, [ 0, $_ ]) for @members; |
|
4234
|
|
|
|
|
|
|
|
|
4235
|
|
|
|
|
|
|
This callback is invoked before printing with L</print> only if no error |
|
4236
|
|
|
|
|
|
|
occurred. The callback is invoked with two arguments: the current C<CSV> |
|
4237
|
|
|
|
|
|
|
parser object and an array reference to the fields passed. |
|
4238
|
|
|
|
|
|
|
|
|
4239
|
|
|
|
|
|
|
The return code of the callback is ignored. |
|
4240
|
|
|
|
|
|
|
|
|
4241
|
|
|
|
|
|
|
sub max_4_fields { |
|
4242
|
|
|
|
|
|
|
my ($csv, $row) = @_; |
|
4243
|
|
|
|
|
|
|
@$row > 4 and splice @$row, 4; |
|
4244
|
|
|
|
|
|
|
} # max_4_fields |
|
4245
|
|
|
|
|
|
|
|
|
4246
|
|
|
|
|
|
|
csv (in => csv (in => "file.csv"), out => *STDOUT, |
|
4247
|
|
|
|
|
|
|
callbacks => { before_print => \&max_4_fields }); |
|
4248
|
|
|
|
|
|
|
|
|
4249
|
|
|
|
|
|
|
This callback is not active for L</combine>. |
|
4250
|
|
|
|
|
|
|
|
|
4251
|
|
|
|
|
|
|
=back |
|
4252
|
|
|
|
|
|
|
|
|
4253
|
|
|
|
|
|
|
=head3 Callbacks for csv () |
|
4254
|
|
|
|
|
|
|
|
|
4255
|
|
|
|
|
|
|
The L</csv> allows for some callbacks that do not integrate in XS internals |
|
4256
|
|
|
|
|
|
|
but only feature the L</csv> function. |
|
4257
|
|
|
|
|
|
|
|
|
4258
|
|
|
|
|
|
|
csv (in => "file.csv", |
|
4259
|
|
|
|
|
|
|
callbacks => { |
|
4260
|
|
|
|
|
|
|
filter => { 6 => sub { $_ > 15 } }, # first |
|
4261
|
|
|
|
|
|
|
after_parse => sub { say "AFTER PARSE"; }, # first |
|
4262
|
|
|
|
|
|
|
after_in => sub { say "AFTER IN"; }, # second |
|
4263
|
|
|
|
|
|
|
on_in => sub { say "ON IN"; }, # third |
|
4264
|
|
|
|
|
|
|
}, |
|
4265
|
|
|
|
|
|
|
); |
|
4266
|
|
|
|
|
|
|
|
|
4267
|
|
|
|
|
|
|
csv (in => $aoh, |
|
4268
|
|
|
|
|
|
|
out => "file.csv", |
|
4269
|
|
|
|
|
|
|
callbacks => { |
|
4270
|
|
|
|
|
|
|
on_in => sub { say "ON IN"; }, # first |
|
4271
|
|
|
|
|
|
|
before_out => sub { say "BEFORE OUT"; }, # second |
|
4272
|
|
|
|
|
|
|
before_print => sub { say "BEFORE PRINT"; }, # third |
|
4273
|
|
|
|
|
|
|
}, |
|
4274
|
|
|
|
|
|
|
); |
|
4275
|
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
=over 2 |
|
4277
|
|
|
|
|
|
|
|
|
4278
|
|
|
|
|
|
|
=item filter |
|
4279
|
|
|
|
|
|
|
X<filter> |
|
4280
|
|
|
|
|
|
|
|
|
4281
|
|
|
|
|
|
|
This callback can be used to filter records. It is called just after a new |
|
4282
|
|
|
|
|
|
|
record has been scanned. The callback accepts a: |
|
4283
|
|
|
|
|
|
|
|
|
4284
|
|
|
|
|
|
|
=over 2 |
|
4285
|
|
|
|
|
|
|
|
|
4286
|
|
|
|
|
|
|
=item hashref |
|
4287
|
|
|
|
|
|
|
|
|
4288
|
|
|
|
|
|
|
The keys are the index to the row (the field name or field number, 1-based) |
|
4289
|
|
|
|
|
|
|
and the values are subs to return a true or false value. |
|
4290
|
|
|
|
|
|
|
|
|
4291
|
|
|
|
|
|
|
csv (in => "file.csv", filter => { |
|
4292
|
|
|
|
|
|
|
3 => sub { m/a/ }, # third field should contain an "a" |
|
4293
|
|
|
|
|
|
|
5 => sub { length > 4 }, # length of the 5th field minimal 5 |
|
4294
|
|
|
|
|
|
|
}); |
|
4295
|
|
|
|
|
|
|
|
|
4296
|
|
|
|
|
|
|
csv (in => "file.csv", filter => { foo => sub { $_ > 4 }}); |
|
4297
|
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
If the keys to the filter hash contain any character that is not a digit it |
|
4299
|
|
|
|
|
|
|
will also implicitly set L</headers> to C<"auto"> unless L</headers> was |
|
4300
|
|
|
|
|
|
|
already passed as argument. When headers are active, returning an array of |
|
4301
|
|
|
|
|
|
|
hashes, the filter is not applicable to the header itself. |
|
4302
|
|
|
|
|
|
|
|
|
4303
|
|
|
|
|
|
|
All sub results should match, as in AND. |
|
4304
|
|
|
|
|
|
|
|
|
4305
|
|
|
|
|
|
|
The context of the callback sets C<$_> localized to the field indicated by |
|
4306
|
|
|
|
|
|
|
the filter. The two arguments are as with all other callbacks, so the other |
|
4307
|
|
|
|
|
|
|
fields in the current row can be seen: |
|
4308
|
|
|
|
|
|
|
|
|
4309
|
|
|
|
|
|
|
filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }} |
|
4310
|
|
|
|
|
|
|
|
|
4311
|
|
|
|
|
|
|
If the context is set to return a list of hashes (L</headers> is defined), |
|
4312
|
|
|
|
|
|
|
the current record will also be available in the localized C<%_>: |
|
4313
|
|
|
|
|
|
|
|
|
4314
|
|
|
|
|
|
|
filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }} |
|
4315
|
|
|
|
|
|
|
|
|
4316
|
|
|
|
|
|
|
If the filter is used to I<alter> the content by changing C<$_>, make sure |
|
4317
|
|
|
|
|
|
|
that the sub returns true in order not to have that record skipped: |
|
4318
|
|
|
|
|
|
|
|
|
4319
|
|
|
|
|
|
|
filter => { 2 => sub { $_ = uc }} |
|
4320
|
|
|
|
|
|
|
|
|
4321
|
|
|
|
|
|
|
will upper-case the second field, and then skip it if the resulting content |
|
4322
|
|
|
|
|
|
|
evaluates to false. To always accept, end with truth: |
|
4323
|
|
|
|
|
|
|
|
|
4324
|
|
|
|
|
|
|
filter => { 2 => sub { $_ = uc; 1 }} |
|
4325
|
|
|
|
|
|
|
|
|
4326
|
|
|
|
|
|
|
=item coderef |
|
4327
|
|
|
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
csv (in => "file.csv", filter => sub { $n++; 0; }); |
|
4329
|
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
If the argument to C<filter> is a coderef, it is an alias or shortcut to a |
|
4331
|
|
|
|
|
|
|
filter on column 0: |
|
4332
|
|
|
|
|
|
|
|
|
4333
|
|
|
|
|
|
|
csv (filter => sub { $n++; 0 }); |
|
4334
|
|
|
|
|
|
|
|
|
4335
|
|
|
|
|
|
|
is equal to |
|
4336
|
|
|
|
|
|
|
|
|
4337
|
|
|
|
|
|
|
csv (filter => { 0 => sub { $n++; 0 }); |
|
4338
|
|
|
|
|
|
|
|
|
4339
|
|
|
|
|
|
|
=item filter-name |
|
4340
|
|
|
|
|
|
|
|
|
4341
|
|
|
|
|
|
|
csv (in => "file.csv", filter => "not_blank"); |
|
4342
|
|
|
|
|
|
|
csv (in => "file.csv", filter => "not_empty"); |
|
4343
|
|
|
|
|
|
|
csv (in => "file.csv", filter => "filled"); |
|
4344
|
|
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
These are predefined filters |
|
4346
|
|
|
|
|
|
|
|
|
4347
|
|
|
|
|
|
|
Given a file like (line numbers prefixed for doc purpose only): |
|
4348
|
|
|
|
|
|
|
|
|
4349
|
|
|
|
|
|
|
1:1,2,3 |
|
4350
|
|
|
|
|
|
|
2: |
|
4351
|
|
|
|
|
|
|
3:, |
|
4352
|
|
|
|
|
|
|
4:"" |
|
4353
|
|
|
|
|
|
|
5:,, |
|
4354
|
|
|
|
|
|
|
6:, , |
|
4355
|
|
|
|
|
|
|
7:"", |
|
4356
|
|
|
|
|
|
|
8:" " |
|
4357
|
|
|
|
|
|
|
9:4,5,6 |
|
4358
|
|
|
|
|
|
|
|
|
4359
|
|
|
|
|
|
|
=over 2 |
|
4360
|
|
|
|
|
|
|
|
|
4361
|
|
|
|
|
|
|
=item not_blank |
|
4362
|
|
|
|
|
|
|
|
|
4363
|
|
|
|
|
|
|
Filter out the blank lines |
|
4364
|
|
|
|
|
|
|
|
|
4365
|
|
|
|
|
|
|
This filter is a shortcut for |
|
4366
|
|
|
|
|
|
|
|
|
4367
|
|
|
|
|
|
|
filter => { 0 => sub { @{$_[1]} > 1 or |
|
4368
|
|
|
|
|
|
|
defined $_[1][0] && $_[1][0] ne "" } } |
|
4369
|
|
|
|
|
|
|
|
|
4370
|
|
|
|
|
|
|
Due to the implementation, it is currently impossible to also filter lines |
|
4371
|
|
|
|
|
|
|
that consists only of a quoted empty field. These lines are also considered |
|
4372
|
|
|
|
|
|
|
blank lines. |
|
4373
|
|
|
|
|
|
|
|
|
4374
|
|
|
|
|
|
|
With the given example, lines 2 and 4 will be skipped. |
|
4375
|
|
|
|
|
|
|
|
|
4376
|
|
|
|
|
|
|
=item not_empty |
|
4377
|
|
|
|
|
|
|
|
|
4378
|
|
|
|
|
|
|
Filter out lines where all the fields are empty. |
|
4379
|
|
|
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
This filter is a shortcut for |
|
4381
|
|
|
|
|
|
|
|
|
4382
|
|
|
|
|
|
|
filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } } |
|
4383
|
|
|
|
|
|
|
|
|
4384
|
|
|
|
|
|
|
A space is not regarded being empty, so given the example data, lines 2, 3, |
|
4385
|
|
|
|
|
|
|
4, 5, and 7 are skipped. |
|
4386
|
|
|
|
|
|
|
|
|
4387
|
|
|
|
|
|
|
=item filled |
|
4388
|
|
|
|
|
|
|
|
|
4389
|
|
|
|
|
|
|
Filter out lines that have no visible data |
|
4390
|
|
|
|
|
|
|
|
|
4391
|
|
|
|
|
|
|
This filter is a shortcut for |
|
4392
|
|
|
|
|
|
|
|
|
4393
|
|
|
|
|
|
|
filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } } |
|
4394
|
|
|
|
|
|
|
|
|
4395
|
|
|
|
|
|
|
This filter rejects all lines that I<not> have at least one field that does |
|
4396
|
|
|
|
|
|
|
not evaluate to the empty string. |
|
4397
|
|
|
|
|
|
|
|
|
4398
|
|
|
|
|
|
|
With the given example data, this filter would skip lines 2 through 8. |
|
4399
|
|
|
|
|
|
|
|
|
4400
|
|
|
|
|
|
|
=back |
|
4401
|
|
|
|
|
|
|
|
|
4402
|
|
|
|
|
|
|
=back |
|
4403
|
|
|
|
|
|
|
|
|
4404
|
|
|
|
|
|
|
One could also use modules like L<Types::Standard>: |
|
4405
|
|
|
|
|
|
|
|
|
4406
|
|
|
|
|
|
|
use Types::Standard -types; |
|
4407
|
|
|
|
|
|
|
|
|
4408
|
|
|
|
|
|
|
my $type = Tuple[Str, Str, Int, Bool, Optional[Num]]; |
|
4409
|
|
|
|
|
|
|
my $check = $type->compiled_check; |
|
4410
|
|
|
|
|
|
|
|
|
4411
|
|
|
|
|
|
|
# filter with compiled check and warnings |
|
4412
|
|
|
|
|
|
|
my $aoa = csv ( |
|
4413
|
|
|
|
|
|
|
in => \$data, |
|
4414
|
|
|
|
|
|
|
filter => { |
|
4415
|
|
|
|
|
|
|
0 => sub { |
|
4416
|
|
|
|
|
|
|
my $ok = $check->($_[1]) or |
|
4417
|
|
|
|
|
|
|
warn $type->get_message ($_[1]), "\n"; |
|
4418
|
|
|
|
|
|
|
return $ok; |
|
4419
|
|
|
|
|
|
|
}, |
|
4420
|
|
|
|
|
|
|
}, |
|
4421
|
|
|
|
|
|
|
); |
|
4422
|
|
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
=item after_in |
|
4424
|
|
|
|
|
|
|
X<after_in> |
|
4425
|
|
|
|
|
|
|
|
|
4426
|
|
|
|
|
|
|
This callback is invoked for each record after all records have been parsed |
|
4427
|
|
|
|
|
|
|
but before returning the reference to the caller. The hook is invoked with |
|
4428
|
|
|
|
|
|
|
two arguments: the current C<CSV> parser object and a reference to the |
|
4429
|
|
|
|
|
|
|
record. The reference can be a reference to a HASH or a reference to an |
|
4430
|
|
|
|
|
|
|
ARRAY as determined by the arguments. |
|
4431
|
|
|
|
|
|
|
|
|
4432
|
|
|
|
|
|
|
This callback can also be passed as an attribute without the C<callbacks> |
|
4433
|
|
|
|
|
|
|
wrapper. |
|
4434
|
|
|
|
|
|
|
|
|
4435
|
|
|
|
|
|
|
=item before_out |
|
4436
|
|
|
|
|
|
|
X<before_out> |
|
4437
|
|
|
|
|
|
|
|
|
4438
|
|
|
|
|
|
|
This callback is invoked for each record before the record is printed. The |
|
4439
|
|
|
|
|
|
|
hook is invoked with two arguments: the current C<CSV> parser object and a |
|
4440
|
|
|
|
|
|
|
reference to the record. The reference can be a reference to a HASH or a |
|
4441
|
|
|
|
|
|
|
reference to an ARRAY as determined by the arguments. |
|
4442
|
|
|
|
|
|
|
|
|
4443
|
|
|
|
|
|
|
This callback can also be passed as an attribute without the C<callbacks> |
|
4444
|
|
|
|
|
|
|
wrapper. |
|
4445
|
|
|
|
|
|
|
|
|
4446
|
|
|
|
|
|
|
This callback makes the row available in C<%_> if the row is a hashref. In |
|
4447
|
|
|
|
|
|
|
this case C<%_> is writable and will change the original row. |
|
4448
|
|
|
|
|
|
|
|
|
4449
|
|
|
|
|
|
|
=item on_in |
|
4450
|
|
|
|
|
|
|
X<on_in> |
|
4451
|
|
|
|
|
|
|
|
|
4452
|
|
|
|
|
|
|
This callback acts exactly as the L</after_in> or the L</before_out> hooks. |
|
4453
|
|
|
|
|
|
|
|
|
4454
|
|
|
|
|
|
|
This callback can also be passed as an attribute without the C<callbacks> |
|
4455
|
|
|
|
|
|
|
wrapper. |
|
4456
|
|
|
|
|
|
|
|
|
4457
|
|
|
|
|
|
|
This callback makes the row available in C<%_> if the row is a hashref. In |
|
4458
|
|
|
|
|
|
|
this case C<%_> is writable and will change the original row. So e.g. with |
|
4459
|
|
|
|
|
|
|
|
|
4460
|
|
|
|
|
|
|
my $aoh = csv ( |
|
4461
|
|
|
|
|
|
|
in => \"foo\n1\n2\n", |
|
4462
|
|
|
|
|
|
|
headers => "auto", |
|
4463
|
|
|
|
|
|
|
on_in => sub { $_{bar} = 2; }, |
|
4464
|
|
|
|
|
|
|
); |
|
4465
|
|
|
|
|
|
|
|
|
4466
|
|
|
|
|
|
|
C<$aoh> will be: |
|
4467
|
|
|
|
|
|
|
|
|
4468
|
|
|
|
|
|
|
[ { foo => 1, |
|
4469
|
|
|
|
|
|
|
bar => 2, |
|
4470
|
|
|
|
|
|
|
} |
|
4471
|
|
|
|
|
|
|
{ foo => 2, |
|
4472
|
|
|
|
|
|
|
bar => 2, |
|
4473
|
|
|
|
|
|
|
} |
|
4474
|
|
|
|
|
|
|
] |
|
4475
|
|
|
|
|
|
|
|
|
4476
|
|
|
|
|
|
|
=item on_error |
|
4477
|
|
|
|
|
|
|
X<on_error> |
|
4478
|
|
|
|
|
|
|
|
|
4479
|
|
|
|
|
|
|
This callback acts exactly as the L</error> hook. |
|
4480
|
|
|
|
|
|
|
|
|
4481
|
|
|
|
|
|
|
my @err; |
|
4482
|
|
|
|
|
|
|
my $aoa = csv (in => $fh, on_error => sub { @err = @_ }); |
|
4483
|
|
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
is identical to |
|
4485
|
|
|
|
|
|
|
|
|
4486
|
|
|
|
|
|
|
my $aoa = csv (in => $fh, callbacks => { |
|
4487
|
|
|
|
|
|
|
error => sub { @err = @_ }, |
|
4488
|
|
|
|
|
|
|
}); |
|
4489
|
|
|
|
|
|
|
|
|
4490
|
|
|
|
|
|
|
It can be used for ignoring errors as well as for just keeping the error in |
|
4491
|
|
|
|
|
|
|
case of analysis after the C<csv ()> function has returned. |
|
4492
|
|
|
|
|
|
|
|
|
4493
|
|
|
|
|
|
|
my @err; |
|
4494
|
|
|
|
|
|
|
my $aoa = csv (in => "bad.csv, on_error => sub { @err = @_ }); |
|
4495
|
|
|
|
|
|
|
die Text::CSV_XS->error_diag if @err or !$aoa; |
|
4496
|
|
|
|
|
|
|
|
|
4497
|
|
|
|
|
|
|
=back |
|
4498
|
|
|
|
|
|
|
|
|
4499
|
|
|
|
|
|
|
=head1 INTERNALS |
|
4500
|
|
|
|
|
|
|
|
|
4501
|
|
|
|
|
|
|
=over 4 |
|
4502
|
|
|
|
|
|
|
|
|
4503
|
|
|
|
|
|
|
=item Combine (...) |
|
4504
|
|
|
|
|
|
|
|
|
4505
|
|
|
|
|
|
|
=item Parse (...) |
|
4506
|
|
|
|
|
|
|
|
|
4507
|
|
|
|
|
|
|
=back |
|
4508
|
|
|
|
|
|
|
|
|
4509
|
|
|
|
|
|
|
The arguments to these internal functions are deliberately not described or |
|
4510
|
|
|
|
|
|
|
documented in order to enable the module authors make changes it when they |
|
4511
|
|
|
|
|
|
|
feel the need for it. Using them is highly discouraged as the API may |
|
4512
|
|
|
|
|
|
|
change in future releases. |
|
4513
|
|
|
|
|
|
|
|
|
4514
|
|
|
|
|
|
|
=head1 EXAMPLES |
|
4515
|
|
|
|
|
|
|
|
|
4516
|
|
|
|
|
|
|
=head2 Reading a CSV file line by line: |
|
4517
|
|
|
|
|
|
|
|
|
4518
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); |
|
4519
|
|
|
|
|
|
|
open my $fh, "<", "file.csv" or die "file.csv: $!"; |
|
4520
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
4521
|
|
|
|
|
|
|
# do something with @$row |
|
4522
|
|
|
|
|
|
|
} |
|
4523
|
|
|
|
|
|
|
close $fh or die "file.csv: $!"; |
|
4524
|
|
|
|
|
|
|
|
|
4525
|
|
|
|
|
|
|
or |
|
4526
|
|
|
|
|
|
|
|
|
4527
|
|
|
|
|
|
|
my $aoa = csv (in => "file.csv", on_in => sub { |
|
4528
|
|
|
|
|
|
|
# do something with %_ |
|
4529
|
|
|
|
|
|
|
}); |
|
4530
|
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
=head3 Reading only a single column |
|
4532
|
|
|
|
|
|
|
|
|
4533
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); |
|
4534
|
|
|
|
|
|
|
open my $fh, "<", "file.csv" or die "file.csv: $!"; |
|
4535
|
|
|
|
|
|
|
# get only the 4th column |
|
4536
|
|
|
|
|
|
|
my @column = map { $_->[3] } @{$csv->getline_all ($fh)}; |
|
4537
|
|
|
|
|
|
|
close $fh or die "file.csv: $!"; |
|
4538
|
|
|
|
|
|
|
|
|
4539
|
|
|
|
|
|
|
with L</csv>, you could do |
|
4540
|
|
|
|
|
|
|
|
|
4541
|
|
|
|
|
|
|
my @column = map { $_->[0] } |
|
4542
|
|
|
|
|
|
|
@{csv (in => "file.csv", fragment => "col=4")}; |
|
4543
|
|
|
|
|
|
|
|
|
4544
|
|
|
|
|
|
|
=head2 Parsing CSV strings: |
|
4545
|
|
|
|
|
|
|
|
|
4546
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ keep_meta_info => 1, binary => 1 }); |
|
4547
|
|
|
|
|
|
|
|
|
4548
|
|
|
|
|
|
|
my $sample_input_string = |
|
4549
|
|
|
|
|
|
|
qq{"I said, ""Hi!""",Yes,"",2.34,,"1.09","\x{20ac}",}; |
|
4550
|
|
|
|
|
|
|
if ($csv->parse ($sample_input_string)) { |
|
4551
|
|
|
|
|
|
|
my @field = $csv->fields; |
|
4552
|
|
|
|
|
|
|
foreach my $col (0 .. $#field) { |
|
4553
|
|
|
|
|
|
|
my $quo = $csv->is_quoted ($col) ? $csv->{quote_char} : ""; |
|
4554
|
|
|
|
|
|
|
printf "%2d: %s%s%s\n", $col, $quo, $field[$col], $quo; |
|
4555
|
|
|
|
|
|
|
} |
|
4556
|
|
|
|
|
|
|
} |
|
4557
|
|
|
|
|
|
|
else { |
|
4558
|
|
|
|
|
|
|
print STDERR "parse () failed on argument: ", |
|
4559
|
|
|
|
|
|
|
$csv->error_input, "\n"; |
|
4560
|
|
|
|
|
|
|
$csv->error_diag (); |
|
4561
|
|
|
|
|
|
|
} |
|
4562
|
|
|
|
|
|
|
|
|
4563
|
|
|
|
|
|
|
=head3 Parsing CSV from memory |
|
4564
|
|
|
|
|
|
|
|
|
4565
|
|
|
|
|
|
|
Given a complete CSV data-set in scalar C<$data>, generate a list of lists |
|
4566
|
|
|
|
|
|
|
to represent the rows and fields |
|
4567
|
|
|
|
|
|
|
|
|
4568
|
|
|
|
|
|
|
# The data |
|
4569
|
|
|
|
|
|
|
my $data = join "\r\n" => map { join "," => 0 .. 5 } 0 .. 5; |
|
4570
|
|
|
|
|
|
|
|
|
4571
|
|
|
|
|
|
|
# in a loop |
|
4572
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1 }); |
|
4573
|
|
|
|
|
|
|
open my $fh, "<", \$data; |
|
4574
|
|
|
|
|
|
|
my @foo; |
|
4575
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
4576
|
|
|
|
|
|
|
push @foo, $row; |
|
4577
|
|
|
|
|
|
|
} |
|
4578
|
|
|
|
|
|
|
close $fh; |
|
4579
|
|
|
|
|
|
|
|
|
4580
|
|
|
|
|
|
|
# a single call |
|
4581
|
|
|
|
|
|
|
my $foo = csv (in => \$data); |
|
4582
|
|
|
|
|
|
|
|
|
4583
|
|
|
|
|
|
|
=head2 Printing CSV data |
|
4584
|
|
|
|
|
|
|
|
|
4585
|
|
|
|
|
|
|
=head3 The fast way: using L</print> |
|
4586
|
|
|
|
|
|
|
|
|
4587
|
|
|
|
|
|
|
An example for creating C<CSV> files using the L</print> method: |
|
4588
|
|
|
|
|
|
|
|
|
4589
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, eol => $/ }); |
|
4590
|
|
|
|
|
|
|
open my $fh, ">", "foo.csv" or die "foo.csv: $!"; |
|
4591
|
|
|
|
|
|
|
for (1 .. 10) { |
|
4592
|
|
|
|
|
|
|
$csv->print ($fh, [ $_, "$_" ]) or $csv->error_diag; |
|
4593
|
|
|
|
|
|
|
} |
|
4594
|
|
|
|
|
|
|
close $fh or die "$tbl.csv: $!"; |
|
4595
|
|
|
|
|
|
|
|
|
4596
|
|
|
|
|
|
|
=head3 The slow way: using L</combine> and L</string> |
|
4597
|
|
|
|
|
|
|
|
|
4598
|
|
|
|
|
|
|
or using the slower L</combine> and L</string> methods: |
|
4599
|
|
|
|
|
|
|
|
|
4600
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new; |
|
4601
|
|
|
|
|
|
|
|
|
4602
|
|
|
|
|
|
|
open my $csv_fh, ">", "hello.csv" or die "hello.csv: $!"; |
|
4603
|
|
|
|
|
|
|
|
|
4604
|
|
|
|
|
|
|
my @sample_input_fields = ( |
|
4605
|
|
|
|
|
|
|
'You said, "Hello!"', 5.67, |
|
4606
|
|
|
|
|
|
|
'"Surely"', '', '3.14159'); |
|
4607
|
|
|
|
|
|
|
if ($csv->combine (@sample_input_fields)) { |
|
4608
|
|
|
|
|
|
|
print $csv_fh $csv->string, "\n"; |
|
4609
|
|
|
|
|
|
|
} |
|
4610
|
|
|
|
|
|
|
else { |
|
4611
|
|
|
|
|
|
|
print "combine () failed on argument: ", |
|
4612
|
|
|
|
|
|
|
$csv->error_input, "\n"; |
|
4613
|
|
|
|
|
|
|
} |
|
4614
|
|
|
|
|
|
|
close $csv_fh or die "hello.csv: $!"; |
|
4615
|
|
|
|
|
|
|
|
|
4616
|
|
|
|
|
|
|
=head3 Generating CSV into memory |
|
4617
|
|
|
|
|
|
|
|
|
4618
|
|
|
|
|
|
|
Format a data-set (C<@foo>) into a scalar value in memory (C<$data>): |
|
4619
|
|
|
|
|
|
|
|
|
4620
|
|
|
|
|
|
|
# The data |
|
4621
|
|
|
|
|
|
|
my @foo = map { [ 0 .. 5 ] } 0 .. 3; |
|
4622
|
|
|
|
|
|
|
|
|
4623
|
|
|
|
|
|
|
# in a loop |
|
4624
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, auto_diag => 1, eol => "\r\n" }); |
|
4625
|
|
|
|
|
|
|
open my $fh, ">", \my $data; |
|
4626
|
|
|
|
|
|
|
$csv->print ($fh, $_) for @foo; |
|
4627
|
|
|
|
|
|
|
close $fh; |
|
4628
|
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
# a single call |
|
4630
|
|
|
|
|
|
|
csv (in => \@foo, out => \my $data); |
|
4631
|
|
|
|
|
|
|
|
|
4632
|
|
|
|
|
|
|
=head2 Rewriting CSV |
|
4633
|
|
|
|
|
|
|
|
|
4634
|
|
|
|
|
|
|
=head3 Changing separator |
|
4635
|
|
|
|
|
|
|
|
|
4636
|
|
|
|
|
|
|
Rewrite C<CSV> files with C<;> as separator character to well-formed C<CSV>: |
|
4637
|
|
|
|
|
|
|
|
|
4638
|
|
|
|
|
|
|
use Text::CSV_XS qw( csv ); |
|
4639
|
|
|
|
|
|
|
csv (in => csv (in => "bad.csv", sep_char => ";"), out => *STDOUT); |
|
4640
|
|
|
|
|
|
|
|
|
4641
|
|
|
|
|
|
|
As C<STDOUT> is now default in L</csv>, a one-liner converting a UTF-16 CSV |
|
4642
|
|
|
|
|
|
|
file with BOM and TAB-separation to valid UTF-8 CSV could be: |
|
4643
|
|
|
|
|
|
|
|
|
4644
|
|
|
|
|
|
|
$ perl -C3 -MText::CSV_XS=csv -we\ |
|
4645
|
|
|
|
|
|
|
'csv(in=>"utf16tab.csv",encoding=>"utf16",sep=>"\t")' >utf8.csv |
|
4646
|
|
|
|
|
|
|
|
|
4647
|
|
|
|
|
|
|
=head3 Unifying EOL |
|
4648
|
|
|
|
|
|
|
|
|
4649
|
|
|
|
|
|
|
Rewrite a CSV file with mixed EOL and/or inconsistent quotation into a new |
|
4650
|
|
|
|
|
|
|
CSV file with consistent EOL and quotation. Attributes apply. |
|
4651
|
|
|
|
|
|
|
|
|
4652
|
|
|
|
|
|
|
use Text::CSV_XS qw( csv ); |
|
4653
|
|
|
|
|
|
|
csv (in => "file.csv", out => "newfile.csv", quote_space => 1); |
|
4654
|
|
|
|
|
|
|
|
|
4655
|
|
|
|
|
|
|
=head2 Dumping database tables to CSV |
|
4656
|
|
|
|
|
|
|
|
|
4657
|
|
|
|
|
|
|
Dumping a database table can be simple as this (TIMTOWTDI): |
|
4658
|
|
|
|
|
|
|
|
|
4659
|
|
|
|
|
|
|
my $dbh = DBI->connect (...); |
|
4660
|
|
|
|
|
|
|
my $sql = "select * from foo"; |
|
4661
|
|
|
|
|
|
|
|
|
4662
|
|
|
|
|
|
|
# using your own loop |
|
4663
|
|
|
|
|
|
|
open my $fh, ">", "foo.csv" or die "foo.csv: $!\n"; |
|
4664
|
|
|
|
|
|
|
my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\r\n" }); |
|
4665
|
|
|
|
|
|
|
my $sth = $dbh->prepare ($sql); $sth->execute; |
|
4666
|
|
|
|
|
|
|
$csv->print ($fh, $sth->{NAME_lc}); |
|
4667
|
|
|
|
|
|
|
while (my $row = $sth->fetch) { |
|
4668
|
|
|
|
|
|
|
$csv->print ($fh, $row); |
|
4669
|
|
|
|
|
|
|
} |
|
4670
|
|
|
|
|
|
|
|
|
4671
|
|
|
|
|
|
|
# using the csv function, all in memory |
|
4672
|
|
|
|
|
|
|
csv (out => "foo.csv", in => $dbh->selectall_arrayref ($sql)); |
|
4673
|
|
|
|
|
|
|
|
|
4674
|
|
|
|
|
|
|
# using the csv function, streaming with callbacks |
|
4675
|
|
|
|
|
|
|
my $sth = $dbh->prepare ($sql); $sth->execute; |
|
4676
|
|
|
|
|
|
|
csv (out => "foo.csv", in => sub { $sth->fetch }); |
|
4677
|
|
|
|
|
|
|
csv (out => "foo.csv", in => sub { $sth->fetchrow_hashref }); |
|
4678
|
|
|
|
|
|
|
|
|
4679
|
|
|
|
|
|
|
Note that this does not discriminate between "empty" values and NULL-values |
|
4680
|
|
|
|
|
|
|
from the database, as both will be the same empty field in CSV. To enable |
|
4681
|
|
|
|
|
|
|
distinction between the two, use L<C<quote_empty>|/quote_empty>. |
|
4682
|
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
csv (out => "foo.csv", in => sub { $sth->fetch }, quote_empty => 1); |
|
4684
|
|
|
|
|
|
|
|
|
4685
|
|
|
|
|
|
|
If the database import utility supports special sequences to insert C<NULL> |
|
4686
|
|
|
|
|
|
|
values into the database, like MySQL/MariaDB supports C<\N>, use a filter |
|
4687
|
|
|
|
|
|
|
or a map |
|
4688
|
|
|
|
|
|
|
|
|
4689
|
|
|
|
|
|
|
csv (out => "foo.csv", in => sub { $sth->fetch }, |
|
4690
|
|
|
|
|
|
|
on_in => sub { $_ //= "\\N" for @{$_[1]} }); |
|
4691
|
|
|
|
|
|
|
|
|
4692
|
|
|
|
|
|
|
while (my $row = $sth->fetch) { |
|
4693
|
|
|
|
|
|
|
$csv->print ($fh, [ map { $_ // "\\N" } @$row ]); |
|
4694
|
|
|
|
|
|
|
} |
|
4695
|
|
|
|
|
|
|
|
|
4696
|
|
|
|
|
|
|
Note that this will not work as expected when choosing the backslash (C<\>) |
|
4697
|
|
|
|
|
|
|
as C<escape_char>, as that will cause the C<\> to need to be escaped by yet |
|
4698
|
|
|
|
|
|
|
another C<\>, which will cause the field to need quotation and thus ending |
|
4699
|
|
|
|
|
|
|
up as C<"\\N"> instead of C<\N>. See also L<C<undef_str>|/undef_str>. |
|
4700
|
|
|
|
|
|
|
|
|
4701
|
|
|
|
|
|
|
csv (out => "foo.csv", in => sub { $sth->fetch }, undef_str => "\\N"); |
|
4702
|
|
|
|
|
|
|
|
|
4703
|
|
|
|
|
|
|
These special sequences are not recognized by Text::CSV_XS on parsing the |
|
4704
|
|
|
|
|
|
|
CSV generated like this, but map and filter are your friends again |
|
4705
|
|
|
|
|
|
|
|
|
4706
|
|
|
|
|
|
|
while (my $row = $csv->getline ($fh)) { |
|
4707
|
|
|
|
|
|
|
$sth->execute (map { $_ eq "\\N" ? undef : $_ } @$row); |
|
4708
|
|
|
|
|
|
|
} |
|
4709
|
|
|
|
|
|
|
|
|
4710
|
|
|
|
|
|
|
csv (in => "foo.csv", filter => { 1 => sub { |
|
4711
|
|
|
|
|
|
|
$sth->execute (map { $_ eq "\\N" ? undef : $_ } @{$_[1]}); 0; }}); |
|
4712
|
|
|
|
|
|
|
|
|
4713
|
|
|
|
|
|
|
=head2 Converting CSV to JSON |
|
4714
|
|
|
|
|
|
|
|
|
4715
|
|
|
|
|
|
|
use Text::CSV_XS qw( csv ); |
|
4716
|
|
|
|
|
|
|
use JSON; # or Cpanel::JSON::XS for better performance |
|
4717
|
|
|
|
|
|
|
|
|
4718
|
|
|
|
|
|
|
# AoA (no header interpretation) |
|
4719
|
|
|
|
|
|
|
say encode_json (csv (in => "file.csv")); |
|
4720
|
|
|
|
|
|
|
|
|
4721
|
|
|
|
|
|
|
# AoH (convert to structures) |
|
4722
|
|
|
|
|
|
|
say encode_json (csv (in => "file.csv", bom => 1)); |
|
4723
|
|
|
|
|
|
|
|
|
4724
|
|
|
|
|
|
|
Yes, it is that simple. |
|
4725
|
|
|
|
|
|
|
|
|
4726
|
|
|
|
|
|
|
=head2 The examples folder |
|
4727
|
|
|
|
|
|
|
|
|
4728
|
|
|
|
|
|
|
For more extended examples, see the F<examples/> C<1>. sub-directory in the |
|
4729
|
|
|
|
|
|
|
original distribution or the git repository C<2>. |
|
4730
|
|
|
|
|
|
|
|
|
4731
|
|
|
|
|
|
|
1. https://github.com/Tux/Text-CSV_XS/tree/master/examples |
|
4732
|
|
|
|
|
|
|
2. https://github.com/Tux/Text-CSV_XS |
|
4733
|
|
|
|
|
|
|
|
|
4734
|
|
|
|
|
|
|
The following files can be found there: |
|
4735
|
|
|
|
|
|
|
|
|
4736
|
|
|
|
|
|
|
=over 2 |
|
4737
|
|
|
|
|
|
|
|
|
4738
|
|
|
|
|
|
|
=item parser-xs.pl |
|
4739
|
|
|
|
|
|
|
X<parser-xs.pl> |
|
4740
|
|
|
|
|
|
|
|
|
4741
|
|
|
|
|
|
|
This can be used as a boilerplate to parse invalid C<CSV> and parse beyond |
|
4742
|
|
|
|
|
|
|
(expected) errors alternative to using the L</error> callback. |
|
4743
|
|
|
|
|
|
|
|
|
4744
|
|
|
|
|
|
|
$ perl examples/parser-xs.pl bad.csv >good.csv |
|
4745
|
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
=item csv-check |
|
4747
|
|
|
|
|
|
|
X<csv-check> |
|
4748
|
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
This is a command-line tool that uses parser-xs.pl techniques to check the |
|
4750
|
|
|
|
|
|
|
C<CSV> file and report on its content. |
|
4751
|
|
|
|
|
|
|
|
|
4752
|
|
|
|
|
|
|
$ csv-check files/utf8.csv |
|
4753
|
|
|
|
|
|
|
Checked files/utf8.csv with csv-check 1.9 |
|
4754
|
|
|
|
|
|
|
using Text::CSV_XS 1.32 with perl 5.26.0 and Unicode 9.0.0 |
|
4755
|
|
|
|
|
|
|
OK: rows: 1, columns: 2 |
|
4756
|
|
|
|
|
|
|
sep = <,>, quo = <">, bin = <1>, eol = <"\n"> |
|
4757
|
|
|
|
|
|
|
|
|
4758
|
|
|
|
|
|
|
=item csv-split |
|
4759
|
|
|
|
|
|
|
X<csv-split> |
|
4760
|
|
|
|
|
|
|
|
|
4761
|
|
|
|
|
|
|
This command splits C<CSV> files into smaller files, keeping (part of) the |
|
4762
|
|
|
|
|
|
|
header. Options include maximum number of (data) rows per file and maximum |
|
4763
|
|
|
|
|
|
|
number of columns per file or a combination of the two. |
|
4764
|
|
|
|
|
|
|
|
|
4765
|
|
|
|
|
|
|
=item csv2xls |
|
4766
|
|
|
|
|
|
|
X<csv2xls> |
|
4767
|
|
|
|
|
|
|
|
|
4768
|
|
|
|
|
|
|
A script to convert C<CSV> to Microsoft Excel (C<XLS>). This requires extra |
|
4769
|
|
|
|
|
|
|
modules L<Date::Calc> and L<Spreadsheet::WriteExcel>. The converter accepts |
|
4770
|
|
|
|
|
|
|
various options and can produce UTF-8 compliant Excel files. |
|
4771
|
|
|
|
|
|
|
|
|
4772
|
|
|
|
|
|
|
=item csv2xlsx |
|
4773
|
|
|
|
|
|
|
X<csv2xlsx> |
|
4774
|
|
|
|
|
|
|
|
|
4775
|
|
|
|
|
|
|
A script to convert C<CSV> to Microsoft Excel (C<XLSX>). This requires the |
|
4776
|
|
|
|
|
|
|
modules L<Date::Calc> and L<Spreadsheet::Writer::XLSX>. The converter does |
|
4777
|
|
|
|
|
|
|
accept various options including merging several C<CSV> files into a single |
|
4778
|
|
|
|
|
|
|
Excel file. |
|
4779
|
|
|
|
|
|
|
|
|
4780
|
|
|
|
|
|
|
=item csvdiff |
|
4781
|
|
|
|
|
|
|
X<csvdiff> |
|
4782
|
|
|
|
|
|
|
|
|
4783
|
|
|
|
|
|
|
A script that provides colorized diff on sorted CSV files, assuming first |
|
4784
|
|
|
|
|
|
|
line is header and first field is the key. Output options include colorized |
|
4785
|
|
|
|
|
|
|
ANSI escape codes or HTML. |
|
4786
|
|
|
|
|
|
|
|
|
4787
|
|
|
|
|
|
|
$ csvdiff --html --output=diff.html file1.csv file2.csv |
|
4788
|
|
|
|
|
|
|
|
|
4789
|
|
|
|
|
|
|
=item rewrite.pl |
|
4790
|
|
|
|
|
|
|
X<rewrite.pl> |
|
4791
|
|
|
|
|
|
|
|
|
4792
|
|
|
|
|
|
|
A script to rewrite (in)valid CSV into valid CSV files. Script has options |
|
4793
|
|
|
|
|
|
|
to generate confusing CSV files or CSV files that conform to Dutch MS-Excel |
|
4794
|
|
|
|
|
|
|
exports (using C<;> as separation). |
|
4795
|
|
|
|
|
|
|
|
|
4796
|
|
|
|
|
|
|
Script - by default - honors BOM and auto-detects separation converting it |
|
4797
|
|
|
|
|
|
|
to default standard CSV with C<,> as separator. |
|
4798
|
|
|
|
|
|
|
|
|
4799
|
|
|
|
|
|
|
=back |
|
4800
|
|
|
|
|
|
|
|
|
4801
|
|
|
|
|
|
|
=head1 CAVEATS |
|
4802
|
|
|
|
|
|
|
|
|
4803
|
|
|
|
|
|
|
Text::CSV_XS is I<not> designed to detect the characters used to quote and |
|
4804
|
|
|
|
|
|
|
separate fields. The parsing is done using predefined (default) settings. |
|
4805
|
|
|
|
|
|
|
In the examples sub-directory, you can find scripts that demonstrate how |
|
4806
|
|
|
|
|
|
|
you could try to detect these characters yourself. |
|
4807
|
|
|
|
|
|
|
|
|
4808
|
|
|
|
|
|
|
=head2 Microsoft Excel |
|
4809
|
|
|
|
|
|
|
|
|
4810
|
|
|
|
|
|
|
The import/export from Microsoft Excel is a I<risky task>, according to the |
|
4811
|
|
|
|
|
|
|
documentation in C<Text::CSV::Separator>. Microsoft uses the system's list |
|
4812
|
|
|
|
|
|
|
separator defined in the regional settings, which happens to be a semicolon |
|
4813
|
|
|
|
|
|
|
for Dutch, German and Spanish (and probably some others as well). For the |
|
4814
|
|
|
|
|
|
|
English locale, the default is a comma. In Windows however, the user is |
|
4815
|
|
|
|
|
|
|
free to choose a predefined locale, and then change I<every> individual |
|
4816
|
|
|
|
|
|
|
setting in it, so checking the locale is no solution. |
|
4817
|
|
|
|
|
|
|
|
|
4818
|
|
|
|
|
|
|
As of version 1.17, a lone first line with just |
|
4819
|
|
|
|
|
|
|
|
|
4820
|
|
|
|
|
|
|
sep=; |
|
4821
|
|
|
|
|
|
|
|
|
4822
|
|
|
|
|
|
|
will be recognized and honored when parsing with L</getline>. |
|
4823
|
|
|
|
|
|
|
|
|
4824
|
|
|
|
|
|
|
=head1 TODO |
|
4825
|
|
|
|
|
|
|
|
|
4826
|
|
|
|
|
|
|
=over 2 |
|
4827
|
|
|
|
|
|
|
|
|
4828
|
|
|
|
|
|
|
=item More Errors & Warnings |
|
4829
|
|
|
|
|
|
|
|
|
4830
|
|
|
|
|
|
|
New extensions ought to be clear and concise in reporting what error has |
|
4831
|
|
|
|
|
|
|
occurred where and why, and maybe also offer a remedy to the problem. |
|
4832
|
|
|
|
|
|
|
|
|
4833
|
|
|
|
|
|
|
L</error_diag> is a (very) good start, but there is more work to be done in |
|
4834
|
|
|
|
|
|
|
this area. |
|
4835
|
|
|
|
|
|
|
|
|
4836
|
|
|
|
|
|
|
Basic calls should croak or warn on illegal parameters. Errors should be |
|
4837
|
|
|
|
|
|
|
documented. |
|
4838
|
|
|
|
|
|
|
|
|
4839
|
|
|
|
|
|
|
=item setting meta info |
|
4840
|
|
|
|
|
|
|
|
|
4841
|
|
|
|
|
|
|
Future extensions might include extending the L</meta_info>, L</is_quoted>, |
|
4842
|
|
|
|
|
|
|
and L</is_binary> to accept setting these flags for fields, so you can |
|
4843
|
|
|
|
|
|
|
specify which fields are quoted in the L</combine>/L</string> combination. |
|
4844
|
|
|
|
|
|
|
|
|
4845
|
|
|
|
|
|
|
$csv->meta_info (0, 1, 1, 3, 0, 0); |
|
4846
|
|
|
|
|
|
|
$csv->is_quoted (3, 1); |
|
4847
|
|
|
|
|
|
|
|
|
4848
|
|
|
|
|
|
|
L<Metadata Vocabulary for Tabular Data|http://w3c.github.io/csvw/metadata/> |
|
4849
|
|
|
|
|
|
|
(a W3C editor's draft) could be an example for supporting more metadata. |
|
4850
|
|
|
|
|
|
|
|
|
4851
|
|
|
|
|
|
|
=item Parse the whole file at once |
|
4852
|
|
|
|
|
|
|
|
|
4853
|
|
|
|
|
|
|
Implement new methods or functions that enable parsing of a complete file |
|
4854
|
|
|
|
|
|
|
at once, returning a list of hashes. Possible extension to this could be to |
|
4855
|
|
|
|
|
|
|
enable a column selection on the call: |
|
4856
|
|
|
|
|
|
|
|
|
4857
|
|
|
|
|
|
|
my @AoH = $csv->parse_file ($filename, { cols => [ 1, 4..8, 12 ]}); |
|
4858
|
|
|
|
|
|
|
|
|
4859
|
|
|
|
|
|
|
returning something like |
|
4860
|
|
|
|
|
|
|
|
|
4861
|
|
|
|
|
|
|
[ { fields => [ 1, 2, "foo", 4.5, undef, "", 8 ], |
|
4862
|
|
|
|
|
|
|
flags => [ ... ], |
|
4863
|
|
|
|
|
|
|
}, |
|
4864
|
|
|
|
|
|
|
{ fields => [ ... ], |
|
4865
|
|
|
|
|
|
|
. |
|
4866
|
|
|
|
|
|
|
}, |
|
4867
|
|
|
|
|
|
|
] |
|
4868
|
|
|
|
|
|
|
|
|
4869
|
|
|
|
|
|
|
Note that the L</csv> function already supports most of this, but does not |
|
4870
|
|
|
|
|
|
|
return flags. L</getline_all> returns all rows for an open stream, but this |
|
4871
|
|
|
|
|
|
|
will not return flags either. L</fragment> can reduce the required rows |
|
4872
|
|
|
|
|
|
|
I<or> columns, but cannot combine them. |
|
4873
|
|
|
|
|
|
|
|
|
4874
|
|
|
|
|
|
|
=item provider |
|
4875
|
|
|
|
|
|
|
|
|
4876
|
|
|
|
|
|
|
csv (in => $fh) vs csv (provider => sub { get_line }); |
|
4877
|
|
|
|
|
|
|
|
|
4878
|
|
|
|
|
|
|
Whatever the attribute name might end up to be, this should make it easier |
|
4879
|
|
|
|
|
|
|
to add input providers for parsing. Currently most special variations for |
|
4880
|
|
|
|
|
|
|
the C<in> attribute are aimed at CSV generation: e.g. a callback is defined |
|
4881
|
|
|
|
|
|
|
to return a reference to a record. This new attribute should enable passing |
|
4882
|
|
|
|
|
|
|
data to parse, like getline. |
|
4883
|
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
Suggested by Johan Vromans. |
|
4885
|
|
|
|
|
|
|
|
|
4886
|
|
|
|
|
|
|
=item Cookbook |
|
4887
|
|
|
|
|
|
|
|
|
4888
|
|
|
|
|
|
|
Write a document that has recipes for most known non-standard (and maybe |
|
4889
|
|
|
|
|
|
|
some standard) C<CSV> formats, including formats that use C<TAB>, C<;>, |
|
4890
|
|
|
|
|
|
|
C<|>, or other non-comma separators. |
|
4891
|
|
|
|
|
|
|
|
|
4892
|
|
|
|
|
|
|
Examples could be taken from W3C's L<CSV on the Web: Use Cases and |
|
4893
|
|
|
|
|
|
|
Requirements|http://w3c.github.io/csvw/use-cases-and-requirements/index.html> |
|
4894
|
|
|
|
|
|
|
|
|
4895
|
|
|
|
|
|
|
=item Steal |
|
4896
|
|
|
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
Steal good new ideas and features from L<PapaParse|http://papaparse.com> or |
|
4898
|
|
|
|
|
|
|
L<csvkit|http://csvkit.readthedocs.org>. |
|
4899
|
|
|
|
|
|
|
|
|
4900
|
|
|
|
|
|
|
=item Raku support |
|
4901
|
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
Raku support can be found L<here|https://github.com/Tux/CSV>. The interface |
|
4903
|
|
|
|
|
|
|
is richer in support than the Perl5 API, as Raku supports more types. |
|
4904
|
|
|
|
|
|
|
|
|
4905
|
|
|
|
|
|
|
The Raku version does not (yet) support pure binary CSV datasets. |
|
4906
|
|
|
|
|
|
|
|
|
4907
|
|
|
|
|
|
|
=back |
|
4908
|
|
|
|
|
|
|
|
|
4909
|
|
|
|
|
|
|
=head2 NOT TODO |
|
4910
|
|
|
|
|
|
|
|
|
4911
|
|
|
|
|
|
|
=over 2 |
|
4912
|
|
|
|
|
|
|
|
|
4913
|
|
|
|
|
|
|
=item combined methods |
|
4914
|
|
|
|
|
|
|
|
|
4915
|
|
|
|
|
|
|
Requests for adding means (methods) that combine L</combine> and L</string> |
|
4916
|
|
|
|
|
|
|
in a single call will B<not> be honored (use L</print> instead). Likewise |
|
4917
|
|
|
|
|
|
|
for L</parse> and L</fields> (use L</getline> instead), given the problems |
|
4918
|
|
|
|
|
|
|
with embedded newlines. |
|
4919
|
|
|
|
|
|
|
|
|
4920
|
|
|
|
|
|
|
=back |
|
4921
|
|
|
|
|
|
|
|
|
4922
|
|
|
|
|
|
|
=head2 Release plan |
|
4923
|
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
No guarantees, but this is what I had in mind some time ago: |
|
4925
|
|
|
|
|
|
|
|
|
4926
|
|
|
|
|
|
|
=over 2 |
|
4927
|
|
|
|
|
|
|
|
|
4928
|
|
|
|
|
|
|
=item * |
|
4929
|
|
|
|
|
|
|
|
|
4930
|
|
|
|
|
|
|
DIAGNOSTICS section in pod to *describe* the errors (see below) |
|
4931
|
|
|
|
|
|
|
|
|
4932
|
|
|
|
|
|
|
=back |
|
4933
|
|
|
|
|
|
|
|
|
4934
|
|
|
|
|
|
|
=head1 EBCDIC |
|
4935
|
|
|
|
|
|
|
|
|
4936
|
|
|
|
|
|
|
Everything should now work on native EBCDIC systems. As the test does not |
|
4937
|
|
|
|
|
|
|
cover all possible codepoints and L<Encode> does not support C<utf-ebcdic>, |
|
4938
|
|
|
|
|
|
|
there is no guarantee that all handling of Unicode is done correct. |
|
4939
|
|
|
|
|
|
|
|
|
4940
|
|
|
|
|
|
|
Opening C<EBCDIC> encoded files on C<ASCII>+ systems is likely to succeed |
|
4941
|
|
|
|
|
|
|
using Encode's C<cp37>, C<cp1047>, or C<posix-bc>: |
|
4942
|
|
|
|
|
|
|
|
|
4943
|
|
|
|
|
|
|
open my $fh, "<:encoding(cp1047)", "ebcdic_file.csv" or die "..."; |
|
4944
|
|
|
|
|
|
|
|
|
4945
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
|
4946
|
|
|
|
|
|
|
|
|
4947
|
|
|
|
|
|
|
Still under construction ... |
|
4948
|
|
|
|
|
|
|
|
|
4949
|
|
|
|
|
|
|
If an error occurs, C<< $csv->error_diag >> can be used to get information |
|
4950
|
|
|
|
|
|
|
on the cause of the failure. Note that for speed reasons the internal value |
|
4951
|
|
|
|
|
|
|
is never cleared on success, so using the value returned by L</error_diag> |
|
4952
|
|
|
|
|
|
|
in normal cases - when no error occurred - may cause unexpected results. |
|
4953
|
|
|
|
|
|
|
|
|
4954
|
|
|
|
|
|
|
If the constructor failed, the cause can be found using L</error_diag> as a |
|
4955
|
|
|
|
|
|
|
class method, like C<< Text::CSV_XS->error_diag >>. |
|
4956
|
|
|
|
|
|
|
|
|
4957
|
|
|
|
|
|
|
The C<< $csv->error_diag >> method is automatically invoked upon error when |
|
4958
|
|
|
|
|
|
|
the contractor was called with L<C<auto_diag>|/auto_diag> set to C<1> or |
|
4959
|
|
|
|
|
|
|
C<2>, or when L<autodie> is in effect. When set to C<1>, this will cause a |
|
4960
|
|
|
|
|
|
|
C<warn> with the error message, when set to C<2>, it will C<die>. C<2012 - |
|
4961
|
|
|
|
|
|
|
EOF> is excluded from L<C<auto_diag>|/auto_diag> reports. |
|
4962
|
|
|
|
|
|
|
|
|
4963
|
|
|
|
|
|
|
Errors can be (individually) caught using the L</error> callback. |
|
4964
|
|
|
|
|
|
|
|
|
4965
|
|
|
|
|
|
|
The errors as described below are available. I have tried to make the error |
|
4966
|
|
|
|
|
|
|
itself explanatory enough, but more descriptions will be added. For most of |
|
4967
|
|
|
|
|
|
|
these errors, the first three capitals describe the error category: |
|
4968
|
|
|
|
|
|
|
|
|
4969
|
|
|
|
|
|
|
=over 2 |
|
4970
|
|
|
|
|
|
|
|
|
4971
|
|
|
|
|
|
|
=item * |
|
4972
|
|
|
|
|
|
|
INI |
|
4973
|
|
|
|
|
|
|
|
|
4974
|
|
|
|
|
|
|
Initialization error or option conflict. |
|
4975
|
|
|
|
|
|
|
|
|
4976
|
|
|
|
|
|
|
=item * |
|
4977
|
|
|
|
|
|
|
ECR |
|
4978
|
|
|
|
|
|
|
|
|
4979
|
|
|
|
|
|
|
Carriage-Return related parse error. |
|
4980
|
|
|
|
|
|
|
|
|
4981
|
|
|
|
|
|
|
=item * |
|
4982
|
|
|
|
|
|
|
EOF |
|
4983
|
|
|
|
|
|
|
|
|
4984
|
|
|
|
|
|
|
End-Of-File related parse error. |
|
4985
|
|
|
|
|
|
|
|
|
4986
|
|
|
|
|
|
|
=item * |
|
4987
|
|
|
|
|
|
|
EIQ |
|
4988
|
|
|
|
|
|
|
|
|
4989
|
|
|
|
|
|
|
Parse error inside quotation. |
|
4990
|
|
|
|
|
|
|
|
|
4991
|
|
|
|
|
|
|
=item * |
|
4992
|
|
|
|
|
|
|
EIF |
|
4993
|
|
|
|
|
|
|
|
|
4994
|
|
|
|
|
|
|
Parse error inside field. |
|
4995
|
|
|
|
|
|
|
|
|
4996
|
|
|
|
|
|
|
=item * |
|
4997
|
|
|
|
|
|
|
ECB |
|
4998
|
|
|
|
|
|
|
|
|
4999
|
|
|
|
|
|
|
Combine error. |
|
5000
|
|
|
|
|
|
|
|
|
5001
|
|
|
|
|
|
|
=item * |
|
5002
|
|
|
|
|
|
|
EHR |
|
5003
|
|
|
|
|
|
|
|
|
5004
|
|
|
|
|
|
|
HashRef parse related error. |
|
5005
|
|
|
|
|
|
|
|
|
5006
|
|
|
|
|
|
|
=back |
|
5007
|
|
|
|
|
|
|
|
|
5008
|
|
|
|
|
|
|
And below should be the complete list of error codes that can be returned: |
|
5009
|
|
|
|
|
|
|
|
|
5010
|
|
|
|
|
|
|
=over 2 |
|
5011
|
|
|
|
|
|
|
|
|
5012
|
|
|
|
|
|
|
=item * |
|
5013
|
|
|
|
|
|
|
1001 "INI - sep_char is equal to quote_char or escape_char" |
|
5014
|
|
|
|
|
|
|
X<1001> |
|
5015
|
|
|
|
|
|
|
|
|
5016
|
|
|
|
|
|
|
The L<separation character|/sep_char> cannot be equal to L<the quotation |
|
5017
|
|
|
|
|
|
|
character|/quote_char> or to L<the escape character|/escape_char>, as this |
|
5018
|
|
|
|
|
|
|
would invalidate all parsing rules. |
|
5019
|
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
=item * |
|
5021
|
|
|
|
|
|
|
1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB" |
|
5022
|
|
|
|
|
|
|
X<1002> |
|
5023
|
|
|
|
|
|
|
|
|
5024
|
|
|
|
|
|
|
Using the L<C<allow_whitespace>|/allow_whitespace> attribute when either |
|
5025
|
|
|
|
|
|
|
L<C<quote_char>|/quote_char> or L<C<escape_char>|/escape_char> is equal to |
|
5026
|
|
|
|
|
|
|
C<SPACE> or C<TAB> is too ambiguous to allow. |
|
5027
|
|
|
|
|
|
|
|
|
5028
|
|
|
|
|
|
|
=item * |
|
5029
|
|
|
|
|
|
|
1003 "INI - \r or \n in main attr not allowed" |
|
5030
|
|
|
|
|
|
|
X<1003> |
|
5031
|
|
|
|
|
|
|
|
|
5032
|
|
|
|
|
|
|
Using default L<C<eol>|/eol> characters in either L<C<sep_char>|/sep_char>, |
|
5033
|
|
|
|
|
|
|
L<C<quote_char>|/quote_char>, or L<C<escape_char>|/escape_char> is not |
|
5034
|
|
|
|
|
|
|
allowed. |
|
5035
|
|
|
|
|
|
|
|
|
5036
|
|
|
|
|
|
|
=item * |
|
5037
|
|
|
|
|
|
|
1004 "INI - callbacks should be undef or a hashref" |
|
5038
|
|
|
|
|
|
|
X<1004> |
|
5039
|
|
|
|
|
|
|
|
|
5040
|
|
|
|
|
|
|
The L<C<callbacks>|/Callbacks> attribute only allows one to be C<undef> or |
|
5041
|
|
|
|
|
|
|
a hash reference. |
|
5042
|
|
|
|
|
|
|
|
|
5043
|
|
|
|
|
|
|
=item * |
|
5044
|
|
|
|
|
|
|
1005 "INI - EOL too long" |
|
5045
|
|
|
|
|
|
|
X<1005> |
|
5046
|
|
|
|
|
|
|
|
|
5047
|
|
|
|
|
|
|
The value passed for EOL is exceeding its maximum length (16). |
|
5048
|
|
|
|
|
|
|
|
|
5049
|
|
|
|
|
|
|
=item * |
|
5050
|
|
|
|
|
|
|
1006 "INI - SEP too long" |
|
5051
|
|
|
|
|
|
|
X<1006> |
|
5052
|
|
|
|
|
|
|
|
|
5053
|
|
|
|
|
|
|
The value passed for SEP is exceeding its maximum length (16). |
|
5054
|
|
|
|
|
|
|
|
|
5055
|
|
|
|
|
|
|
=item * |
|
5056
|
|
|
|
|
|
|
1007 "INI - QUOTE too long" |
|
5057
|
|
|
|
|
|
|
X<1007> |
|
5058
|
|
|
|
|
|
|
|
|
5059
|
|
|
|
|
|
|
The value passed for QUOTE is exceeding its maximum length (16). |
|
5060
|
|
|
|
|
|
|
|
|
5061
|
|
|
|
|
|
|
=item * |
|
5062
|
|
|
|
|
|
|
1008 "INI - SEP undefined" |
|
5063
|
|
|
|
|
|
|
X<1008> |
|
5064
|
|
|
|
|
|
|
|
|
5065
|
|
|
|
|
|
|
The value passed for SEP should be defined and not empty. |
|
5066
|
|
|
|
|
|
|
|
|
5067
|
|
|
|
|
|
|
=item * |
|
5068
|
|
|
|
|
|
|
1010 "INI - the header is empty" |
|
5069
|
|
|
|
|
|
|
X<1010> |
|
5070
|
|
|
|
|
|
|
|
|
5071
|
|
|
|
|
|
|
The header line parsed in the L</header> is empty. |
|
5072
|
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
=item * |
|
5074
|
|
|
|
|
|
|
1011 "INI - the header contains more than one valid separator" |
|
5075
|
|
|
|
|
|
|
X<1011> |
|
5076
|
|
|
|
|
|
|
|
|
5077
|
|
|
|
|
|
|
The header line parsed in the L</header> contains more than one (unique) |
|
5078
|
|
|
|
|
|
|
separator character out of the allowed set of separators. |
|
5079
|
|
|
|
|
|
|
|
|
5080
|
|
|
|
|
|
|
=item * |
|
5081
|
|
|
|
|
|
|
1012 "INI - the header contains an empty field" |
|
5082
|
|
|
|
|
|
|
X<1012> |
|
5083
|
|
|
|
|
|
|
|
|
5084
|
|
|
|
|
|
|
The header line parsed in the L</header> contains an empty field. |
|
5085
|
|
|
|
|
|
|
|
|
5086
|
|
|
|
|
|
|
=item * |
|
5087
|
|
|
|
|
|
|
1013 "INI - the header contains non-unique fields" |
|
5088
|
|
|
|
|
|
|
X<1013> |
|
5089
|
|
|
|
|
|
|
|
|
5090
|
|
|
|
|
|
|
The header line parsed in the L</header> contains at least two identical |
|
5091
|
|
|
|
|
|
|
fields. |
|
5092
|
|
|
|
|
|
|
|
|
5093
|
|
|
|
|
|
|
=item * |
|
5094
|
|
|
|
|
|
|
1014 "INI - header called on undefined stream" |
|
5095
|
|
|
|
|
|
|
X<1014> |
|
5096
|
|
|
|
|
|
|
|
|
5097
|
|
|
|
|
|
|
The header line cannot be parsed from an undefined source. |
|
5098
|
|
|
|
|
|
|
|
|
5099
|
|
|
|
|
|
|
=item * |
|
5100
|
|
|
|
|
|
|
1500 "PRM - Invalid/unsupported argument(s)" |
|
5101
|
|
|
|
|
|
|
X<1500> |
|
5102
|
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
Function or method called with invalid argument(s) or parameter(s). |
|
5104
|
|
|
|
|
|
|
|
|
5105
|
|
|
|
|
|
|
=item * |
|
5106
|
|
|
|
|
|
|
1501 "PRM - The key attribute is passed as an unsupported type" |
|
5107
|
|
|
|
|
|
|
X<1501> |
|
5108
|
|
|
|
|
|
|
|
|
5109
|
|
|
|
|
|
|
The C<key> attribute is of an unsupported type. |
|
5110
|
|
|
|
|
|
|
|
|
5111
|
|
|
|
|
|
|
=item * |
|
5112
|
|
|
|
|
|
|
1502 "PRM - The value attribute is passed without the key attribute" |
|
5113
|
|
|
|
|
|
|
X<1502> |
|
5114
|
|
|
|
|
|
|
|
|
5115
|
|
|
|
|
|
|
The C<value> attribute is only allowed when a valid key is given. |
|
5116
|
|
|
|
|
|
|
|
|
5117
|
|
|
|
|
|
|
=item * |
|
5118
|
|
|
|
|
|
|
1503 "PRM - The value attribute is passed as an unsupported type" |
|
5119
|
|
|
|
|
|
|
X<1503> |
|
5120
|
|
|
|
|
|
|
|
|
5121
|
|
|
|
|
|
|
The C<value> attribute is of an unsupported type. |
|
5122
|
|
|
|
|
|
|
|
|
5123
|
|
|
|
|
|
|
=item * |
|
5124
|
|
|
|
|
|
|
2010 "ECR - QUO char inside quotes followed by CR not part of EOL" |
|
5125
|
|
|
|
|
|
|
X<2010> |
|
5126
|
|
|
|
|
|
|
|
|
5127
|
|
|
|
|
|
|
When L<C<eol>|/eol> has been set to anything but the default, like |
|
5128
|
|
|
|
|
|
|
C<"\r\t\n">, and the C<"\r"> is following the B<second> (closing) |
|
5129
|
|
|
|
|
|
|
L<C<quote_char>|/quote_char>, where the characters following the C<"\r"> do |
|
5130
|
|
|
|
|
|
|
not make up the L<C<eol>|/eol> sequence, this is an error. |
|
5131
|
|
|
|
|
|
|
|
|
5132
|
|
|
|
|
|
|
=item * |
|
5133
|
|
|
|
|
|
|
2011 "ECR - Characters after end of quoted field" |
|
5134
|
|
|
|
|
|
|
X<2011> |
|
5135
|
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted |
|
5137
|
|
|
|
|
|
|
field and after the closing double-quote, there should be either a new-line |
|
5138
|
|
|
|
|
|
|
sequence or a separation character. |
|
5139
|
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
=item * |
|
5141
|
|
|
|
|
|
|
2012 "EOF - End of data in parsing input stream" |
|
5142
|
|
|
|
|
|
|
X<2012> |
|
5143
|
|
|
|
|
|
|
|
|
5144
|
|
|
|
|
|
|
Self-explaining. End-of-file while inside parsing a stream. Can happen only |
|
5145
|
|
|
|
|
|
|
when reading from streams with L</getline>, as using L</parse> is done on |
|
5146
|
|
|
|
|
|
|
strings that are not required to have a trailing L<C<eol>|/eol>. |
|
5147
|
|
|
|
|
|
|
|
|
5148
|
|
|
|
|
|
|
=item * |
|
5149
|
|
|
|
|
|
|
2013 "INI - Specification error for fragments RFC7111" |
|
5150
|
|
|
|
|
|
|
X<2013> |
|
5151
|
|
|
|
|
|
|
|
|
5152
|
|
|
|
|
|
|
Invalid specification for URI L</fragment> specification. |
|
5153
|
|
|
|
|
|
|
|
|
5154
|
|
|
|
|
|
|
=item * |
|
5155
|
|
|
|
|
|
|
2014 "ENF - Inconsistent number of fields" |
|
5156
|
|
|
|
|
|
|
X<2014> |
|
5157
|
|
|
|
|
|
|
|
|
5158
|
|
|
|
|
|
|
Inconsistent number of fields under strict parsing. |
|
5159
|
|
|
|
|
|
|
|
|
5160
|
|
|
|
|
|
|
=item * |
|
5161
|
|
|
|
|
|
|
2015 "ERW - Empty row" |
|
5162
|
|
|
|
|
|
|
X<2015> |
|
5163
|
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
An empty row was not allowed. |
|
5165
|
|
|
|
|
|
|
|
|
5166
|
|
|
|
|
|
|
=item * |
|
5167
|
|
|
|
|
|
|
2016 "EOL - Inconsistent EOL" |
|
5168
|
|
|
|
|
|
|
X<2016> |
|
5169
|
|
|
|
|
|
|
|
|
5170
|
|
|
|
|
|
|
Inconsistent End-Of-Line detected under strict_eol parsing. |
|
5171
|
|
|
|
|
|
|
|
|
5172
|
|
|
|
|
|
|
=item * |
|
5173
|
|
|
|
|
|
|
2021 "EIQ - NL char inside quotes, binary off" |
|
5174
|
|
|
|
|
|
|
X<2021> |
|
5175
|
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option |
|
5177
|
|
|
|
|
|
|
has been selected with the constructor. |
|
5178
|
|
|
|
|
|
|
|
|
5179
|
|
|
|
|
|
|
=item * |
|
5180
|
|
|
|
|
|
|
2022 "EIQ - CR char inside quotes, binary off" |
|
5181
|
|
|
|
|
|
|
X<2022> |
|
5182
|
|
|
|
|
|
|
|
|
5183
|
|
|
|
|
|
|
Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option |
|
5184
|
|
|
|
|
|
|
has been selected with the constructor. |
|
5185
|
|
|
|
|
|
|
|
|
5186
|
|
|
|
|
|
|
=item * |
|
5187
|
|
|
|
|
|
|
2023 "EIQ - QUO character not allowed" |
|
5188
|
|
|
|
|
|
|
X<2023> |
|
5189
|
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n> |
|
5191
|
|
|
|
|
|
|
will cause this error. |
|
5192
|
|
|
|
|
|
|
|
|
5193
|
|
|
|
|
|
|
=item * |
|
5194
|
|
|
|
|
|
|
2024 "EIQ - EOF cannot be escaped, not even inside quotes" |
|
5195
|
|
|
|
|
|
|
X<2024> |
|
5196
|
|
|
|
|
|
|
|
|
5197
|
|
|
|
|
|
|
The escape character is not allowed as last character in an input stream. |
|
5198
|
|
|
|
|
|
|
|
|
5199
|
|
|
|
|
|
|
=item * |
|
5200
|
|
|
|
|
|
|
2025 "EIQ - Loose unescaped escape" |
|
5201
|
|
|
|
|
|
|
X<2025> |
|
5202
|
|
|
|
|
|
|
|
|
5203
|
|
|
|
|
|
|
An escape character should escape only characters that need escaping. |
|
5204
|
|
|
|
|
|
|
|
|
5205
|
|
|
|
|
|
|
Allowing the escape for other characters is possible with the attribute |
|
5206
|
|
|
|
|
|
|
L</allow_loose_escapes>. |
|
5207
|
|
|
|
|
|
|
|
|
5208
|
|
|
|
|
|
|
=item * |
|
5209
|
|
|
|
|
|
|
2026 "EIQ - Binary character inside quoted field, binary off" |
|
5210
|
|
|
|
|
|
|
X<2026> |
|
5211
|
|
|
|
|
|
|
|
|
5212
|
|
|
|
|
|
|
Binary characters are not allowed by default. Exceptions are fields that |
|
5213
|
|
|
|
|
|
|
contain valid UTF-8, that will automatically be upgraded if the content is |
|
5214
|
|
|
|
|
|
|
valid UTF-8. Set L<C<binary>|/binary> to C<1> to accept binary data. |
|
5215
|
|
|
|
|
|
|
|
|
5216
|
|
|
|
|
|
|
=item * |
|
5217
|
|
|
|
|
|
|
2027 "EIQ - Quoted field not terminated" |
|
5218
|
|
|
|
|
|
|
X<2027> |
|
5219
|
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
When parsing a field that started with a quotation character, the field is |
|
5221
|
|
|
|
|
|
|
expected to be closed with a quotation character. When the parsed line is |
|
5222
|
|
|
|
|
|
|
exhausted before the quote is found, that field is not terminated. |
|
5223
|
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
=item * |
|
5225
|
|
|
|
|
|
|
2030 "EIF - NL char inside unquoted verbatim, binary off" |
|
5226
|
|
|
|
|
|
|
X<2030> |
|
5227
|
|
|
|
|
|
|
|
|
5228
|
|
|
|
|
|
|
=item * |
|
5229
|
|
|
|
|
|
|
2031 "EIF - CR char is first char of field, not part of EOL" |
|
5230
|
|
|
|
|
|
|
X<2031> |
|
5231
|
|
|
|
|
|
|
|
|
5232
|
|
|
|
|
|
|
=item * |
|
5233
|
|
|
|
|
|
|
2032 "EIF - CR char inside unquoted, not part of EOL" |
|
5234
|
|
|
|
|
|
|
X<2032> |
|
5235
|
|
|
|
|
|
|
|
|
5236
|
|
|
|
|
|
|
=item * |
|
5237
|
|
|
|
|
|
|
2034 "EIF - Loose unescaped quote" |
|
5238
|
|
|
|
|
|
|
X<2034> |
|
5239
|
|
|
|
|
|
|
|
|
5240
|
|
|
|
|
|
|
=item * |
|
5241
|
|
|
|
|
|
|
2035 "EIF - Escaped EOF in unquoted field" |
|
5242
|
|
|
|
|
|
|
X<2035> |
|
5243
|
|
|
|
|
|
|
|
|
5244
|
|
|
|
|
|
|
=item * |
|
5245
|
|
|
|
|
|
|
2036 "EIF - ESC error" |
|
5246
|
|
|
|
|
|
|
X<2036> |
|
5247
|
|
|
|
|
|
|
|
|
5248
|
|
|
|
|
|
|
=item * |
|
5249
|
|
|
|
|
|
|
2037 "EIF - Binary character in unquoted field, binary off" |
|
5250
|
|
|
|
|
|
|
X<2037> |
|
5251
|
|
|
|
|
|
|
|
|
5252
|
|
|
|
|
|
|
=item * |
|
5253
|
|
|
|
|
|
|
2110 "ECB - Binary character in Combine, binary off" |
|
5254
|
|
|
|
|
|
|
X<2110> |
|
5255
|
|
|
|
|
|
|
|
|
5256
|
|
|
|
|
|
|
=item * |
|
5257
|
|
|
|
|
|
|
2200 "EIO - print to IO failed. See errno" |
|
5258
|
|
|
|
|
|
|
X<2200> |
|
5259
|
|
|
|
|
|
|
|
|
5260
|
|
|
|
|
|
|
=item * |
|
5261
|
|
|
|
|
|
|
3001 "EHR - Unsupported syntax for column_names ()" |
|
5262
|
|
|
|
|
|
|
X<3001> |
|
5263
|
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
=item * |
|
5265
|
|
|
|
|
|
|
3002 "EHR - getline_hr () called before column_names ()" |
|
5266
|
|
|
|
|
|
|
X<3002> |
|
5267
|
|
|
|
|
|
|
|
|
5268
|
|
|
|
|
|
|
=item * |
|
5269
|
|
|
|
|
|
|
3003 "EHR - bind_columns () and column_names () fields count mismatch" |
|
5270
|
|
|
|
|
|
|
X<3003> |
|
5271
|
|
|
|
|
|
|
|
|
5272
|
|
|
|
|
|
|
=item * |
|
5273
|
|
|
|
|
|
|
3004 "EHR - bind_columns () only accepts refs to scalars" |
|
5274
|
|
|
|
|
|
|
X<3004> |
|
5275
|
|
|
|
|
|
|
|
|
5276
|
|
|
|
|
|
|
=item * |
|
5277
|
|
|
|
|
|
|
3006 "EHR - bind_columns () did not pass enough refs for parsed fields" |
|
5278
|
|
|
|
|
|
|
X<3006> |
|
5279
|
|
|
|
|
|
|
|
|
5280
|
|
|
|
|
|
|
=item * |
|
5281
|
|
|
|
|
|
|
3007 "EHR - bind_columns needs refs to writable scalars" |
|
5282
|
|
|
|
|
|
|
X<3007> |
|
5283
|
|
|
|
|
|
|
|
|
5284
|
|
|
|
|
|
|
=item * |
|
5285
|
|
|
|
|
|
|
3008 "EHR - unexpected error in bound fields" |
|
5286
|
|
|
|
|
|
|
X<3008> |
|
5287
|
|
|
|
|
|
|
|
|
5288
|
|
|
|
|
|
|
=item * |
|
5289
|
|
|
|
|
|
|
3009 "EHR - print_hr () called before column_names ()" |
|
5290
|
|
|
|
|
|
|
X<3009> |
|
5291
|
|
|
|
|
|
|
|
|
5292
|
|
|
|
|
|
|
=item * |
|
5293
|
|
|
|
|
|
|
3010 "EHR - print_hr () called with invalid arguments" |
|
5294
|
|
|
|
|
|
|
X<3010> |
|
5295
|
|
|
|
|
|
|
|
|
5296
|
|
|
|
|
|
|
=back |
|
5297
|
|
|
|
|
|
|
|
|
5298
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
5299
|
|
|
|
|
|
|
|
|
5300
|
|
|
|
|
|
|
L<IO::File>, L<IO::Handle>, L<IO::Wrap>, L<Text::CSV>, L<Text::CSV_PP>, |
|
5301
|
|
|
|
|
|
|
L<Text::CSV::Encoded>, L<Text::CSV::Separator>, L<Text::CSV::Slurp>, |
|
5302
|
|
|
|
|
|
|
L<Spreadsheet::CSV> and L<Spreadsheet::Read>, and of course L<perl>. |
|
5303
|
|
|
|
|
|
|
|
|
5304
|
|
|
|
|
|
|
If you are using Raku, have a look at C<Text::CSV> in the Raku ecosystem, |
|
5305
|
|
|
|
|
|
|
offering the same features. |
|
5306
|
|
|
|
|
|
|
|
|
5307
|
|
|
|
|
|
|
A beautiful L<Love Letter|https://github.com/medialab/xan/blob/master/docs/LOVE_LETTER.md> |
|
5308
|
|
|
|
|
|
|
to C<CSV> by the developers of L<xan|https://github.com/medialab/xan#readme>. |
|
5309
|
|
|
|
|
|
|
|
|
5310
|
|
|
|
|
|
|
=head3 non-perl |
|
5311
|
|
|
|
|
|
|
|
|
5312
|
|
|
|
|
|
|
A CSV parser in JavaScript, also used by L<W3C|http://www.w3.org>, is the |
|
5313
|
|
|
|
|
|
|
multi-threaded in-browser L<PapaParse|http://papaparse.com/>. |
|
5314
|
|
|
|
|
|
|
|
|
5315
|
|
|
|
|
|
|
L<csvkit|http://csvkit.readthedocs.org> is a python CSV parsing toolkit. |
|
5316
|
|
|
|
|
|
|
|
|
5317
|
|
|
|
|
|
|
=head1 AUTHOR |
|
5318
|
|
|
|
|
|
|
|
|
5319
|
|
|
|
|
|
|
Alan Citterman F<E<lt>alan@mfgrtl.comE<gt>> wrote the original Perl module. |
|
5320
|
|
|
|
|
|
|
Please don't send mail concerning Text::CSV_XS to Alan, who is not involved |
|
5321
|
|
|
|
|
|
|
in the C/XS part that is now the main part of the module. |
|
5322
|
|
|
|
|
|
|
|
|
5323
|
|
|
|
|
|
|
Jochen Wiedmann F<E<lt>joe@ispsoft.deE<gt>> rewrote the en- and decoding in |
|
5324
|
|
|
|
|
|
|
C by implementing a simple finite-state machine. He added variable quote, |
|
5325
|
|
|
|
|
|
|
escape and separator characters, the binary mode and the print and getline |
|
5326
|
|
|
|
|
|
|
methods. See F<ChangeLog> releases 0.10 through 0.23. |
|
5327
|
|
|
|
|
|
|
|
|
5328
|
|
|
|
|
|
|
H.Merijn Brand F<E<lt>hmbrand@cpan.orgE<gt>> cleaned up the code, added |
|
5329
|
|
|
|
|
|
|
the field flags methods, wrote the major part of the test suite, completed |
|
5330
|
|
|
|
|
|
|
the documentation, fixed most RT bugs, added all the allow flags and the |
|
5331
|
|
|
|
|
|
|
L</csv> function. See ChangeLog releases 0.25 and on. |
|
5332
|
|
|
|
|
|
|
|
|
5333
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
5334
|
|
|
|
|
|
|
|
|
5335
|
|
|
|
|
|
|
Copyright (C) 2007-2025 H.Merijn Brand. All rights reserved. |
|
5336
|
|
|
|
|
|
|
Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved. |
|
5337
|
|
|
|
|
|
|
Copyright (C) 1997 Alan Citterman. All rights reserved. |
|
5338
|
|
|
|
|
|
|
|
|
5339
|
|
|
|
|
|
|
This library is free software; you can redistribute and/or modify it under |
|
5340
|
|
|
|
|
|
|
the same terms as Perl itself. |
|
5341
|
|
|
|
|
|
|
|
|
5342
|
|
|
|
|
|
|
=cut |
|
5343
|
|
|
|
|
|
|
|
|
5344
|
|
|
|
|
|
|
=for elvis |
|
5345
|
|
|
|
|
|
|
:ex:se gw=75|color guide #ff0000: |
|
5346
|
|
|
|
|
|
|
|
|
5347
|
|
|
|
|
|
|
=cut |