| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Data::TableReader::Decoder::CSV; |
|
2
|
|
|
|
|
|
|
$Data::TableReader::Decoder::CSV::VERSION = '0.010'; |
|
3
|
4
|
|
|
4
|
|
103808
|
use Moo 2; |
|
|
4
|
|
|
|
|
11032
|
|
|
|
4
|
|
|
|
|
28
|
|
|
4
|
4
|
|
|
4
|
|
2485
|
use Try::Tiny; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
257
|
|
|
5
|
4
|
|
|
4
|
|
32
|
use Carp; |
|
|
4
|
|
|
|
|
11
|
|
|
|
4
|
|
|
|
|
247
|
|
|
6
|
4
|
|
|
4
|
|
2153
|
use IO::Handle; |
|
|
4
|
|
|
|
|
24554
|
|
|
|
4
|
|
|
|
|
3976
|
|
|
7
|
|
|
|
|
|
|
extends 'Data::TableReader::Decoder'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our @csv_probe_modules= ( ['Text::CSV_XS' => 1.06], ['Text::CSV' => 1.91] ); |
|
10
|
|
|
|
|
|
|
our $default_csv_module; |
|
11
|
|
|
|
|
|
|
sub default_csv_module { |
|
12
|
21
|
|
66
|
21
|
0
|
619
|
$default_csv_module ||= |
|
13
|
|
|
|
|
|
|
Data::TableReader::Decoder::_first_sufficient_module('CSV parser', \@csv_probe_modules); |
|
14
|
|
|
|
|
|
|
} |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# ABSTRACT: Access rows of a comma-delimited text file |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has _parser_args => ( is => 'ro', init_arg => 'parser' ); |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has parser => ( is => 'lazy', init_arg => undef ); |
|
22
|
|
|
|
|
|
|
sub _build_parser { |
|
23
|
16
|
|
|
16
|
|
154
|
my $self= shift; |
|
24
|
16
|
|
50
|
|
|
96
|
my $args= $self->_parser_args || {}; |
|
25
|
16
|
50
|
|
|
|
130
|
return $args if ref($args)->can('getline'); |
|
26
|
16
|
|
|
|
|
54
|
return $self->default_csv_module->new({ |
|
27
|
|
|
|
|
|
|
binary => 1, |
|
28
|
|
|
|
|
|
|
allow_loose_quotes => 1, |
|
29
|
|
|
|
|
|
|
auto_diag => 2, |
|
30
|
|
|
|
|
|
|
%$args |
|
31
|
|
|
|
|
|
|
}); |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
has autodetect_encoding => ( is => 'rw', default => sub { 1 } ); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub encoding { |
|
38
|
18
|
|
|
18
|
1
|
52
|
my ($self, $enc)= @_; |
|
39
|
18
|
|
|
|
|
43
|
my $fh= $self->file_handle; |
|
40
|
18
|
50
|
|
|
|
55
|
if (defined $enc) { |
|
41
|
0
|
|
|
|
|
0
|
binmode($fh, ":encoding($enc)"); |
|
42
|
0
|
|
|
|
|
0
|
return $enc; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
18
|
|
|
|
|
123
|
my @layers= PerlIO::get_layers($fh); |
|
46
|
18
|
50
|
|
|
|
55
|
if (($enc)= grep { /^encoding|^utf/ } @layers) { |
|
|
23
|
|
|
|
|
137
|
|
|
47
|
|
|
|
|
|
|
# extract encoding name |
|
48
|
0
|
0
|
|
|
|
0
|
return 'UTF-8' if $enc eq 'utf8'; |
|
49
|
0
|
0
|
|
|
|
0
|
return uc($1) if $enc =~ /encoding\(([^)]+)\)/; |
|
50
|
0
|
|
|
|
|
0
|
return uc($enc); # could throw a parse error, but this is probably more useful behavior |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# fh_start_pos will be set if we have already checked for BOM |
|
54
|
18
|
50
|
33
|
|
|
175
|
if ($self->autodetect_encoding && !defined $self->_fh_start_pos) { |
|
55
|
18
|
|
50
|
|
|
124
|
$self->_fh_start_pos(tell $fh or 0); |
|
56
|
18
|
100
|
|
|
|
63
|
if (($enc= $self->_autodetect_bom($fh))) { |
|
57
|
1
|
|
|
1
|
|
10
|
binmode($fh, ":encoding($enc)"); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
107
|
|
|
58
|
|
|
|
|
|
|
# re-mark the start after the BOM |
|
59
|
6
|
|
50
|
|
|
21021
|
$self->_fh_start_pos(tell $fh or 0); |
|
60
|
6
|
|
|
|
|
24
|
return $enc; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
} |
|
63
|
11
|
|
|
|
|
35
|
return ''; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
has _fh_start_pos => ( is => 'rw' ); |
|
68
|
|
|
|
|
|
|
has _iterator => ( is => 'rw', weak_ref => 1 ); |
|
69
|
|
|
|
|
|
|
has _row_ref => ( is => 'rw' ); |
|
70
|
|
|
|
|
|
|
sub iterator { |
|
71
|
20
|
|
|
20
|
1
|
7211
|
my $self= shift; |
|
72
|
20
|
100
|
|
|
|
458
|
croak "Multiple iterators on CSV stream not supported yet" if $self->_iterator; |
|
73
|
19
|
|
|
|
|
449
|
my $parser= $self->parser; |
|
74
|
19
|
|
|
|
|
14976
|
my $fh= $self->file_handle; |
|
75
|
19
|
|
|
|
|
61
|
my $row_ref= $self->_row_ref; |
|
76
|
|
|
|
|
|
|
# Keeping this object is just an indication of whether an iterator has been used yet |
|
77
|
19
|
100
|
|
|
|
66
|
if (!$row_ref) { |
|
|
|
50
|
|
|
|
|
|
|
78
|
18
|
|
|
|
|
55
|
$self->_row_ref($row_ref= \(my $row= 0)); |
|
79
|
|
|
|
|
|
|
# trigger BOM detection if needed |
|
80
|
18
|
|
|
|
|
72
|
my $enc= $self->encoding; |
|
81
|
17
|
|
100
|
|
|
150
|
$self->_log->('debug', "encoding is ".($enc||'maybe utf8')); |
|
82
|
|
|
|
|
|
|
# ensure _fh_start_pos is set |
|
83
|
17
|
|
100
|
|
|
262
|
$self->_fh_start_pos(tell $fh or 0); |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
elsif ($$row_ref) { |
|
86
|
0
|
|
|
|
|
0
|
$self->_log->('debug', 'Seeking back to start of input'); |
|
87
|
0
|
0
|
|
|
|
0
|
seek($fh, $self->_fh_start_pos, 0) |
|
88
|
|
|
|
|
|
|
or die "Can't seek back to start of stream"; |
|
89
|
0
|
|
|
|
|
0
|
$$row_ref= 0; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
my $i= Data::TableReader::Decoder::CSV::_Iter->new( |
|
92
|
|
|
|
|
|
|
sub { |
|
93
|
87
|
|
|
87
|
|
176
|
++$$row_ref; |
|
94
|
87
|
100
|
|
|
|
2583
|
my $r= $parser->getline($fh) or return undef; |
|
95
|
71
|
100
|
|
|
|
2329
|
@$r= @{$r}[ @{$_[0]} ] if $_[0]; # optional slice argument |
|
|
26
|
|
|
|
|
83
|
|
|
|
26
|
|
|
|
|
50
|
|
|
96
|
71
|
|
|
|
|
378
|
return $r; |
|
97
|
|
|
|
|
|
|
}, |
|
98
|
|
|
|
|
|
|
{ |
|
99
|
18
|
|
|
|
|
294
|
row => $row_ref, |
|
100
|
|
|
|
|
|
|
fh => $fh, |
|
101
|
|
|
|
|
|
|
origin => $self->_fh_start_pos, |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
); |
|
104
|
18
|
|
|
|
|
492
|
$self->_iterator($i); |
|
105
|
18
|
|
|
|
|
274
|
return $i; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# This design is simplified from File::BOM in that it ignores UTF-32 |
|
109
|
|
|
|
|
|
|
# and in any "normal" case it can read from a pipe with only one |
|
110
|
|
|
|
|
|
|
# character to push back, avoiding the need to tie the file handle. |
|
111
|
|
|
|
|
|
|
# It also checks for whether layers have already been enabled. |
|
112
|
|
|
|
|
|
|
# It also avoids seeking to the start of the file handle, in case |
|
113
|
|
|
|
|
|
|
# the user deliberately seeked to a position. |
|
114
|
|
|
|
|
|
|
sub _autodetect_bom { |
|
115
|
18
|
|
|
18
|
|
40
|
my ($self, $fh)= @_; |
|
116
|
18
|
|
|
|
|
45
|
my $fpos= tell($fh); |
|
117
|
|
|
|
|
|
|
|
|
118
|
18
|
|
|
|
|
109
|
local $!; |
|
119
|
18
|
50
|
|
|
|
170
|
read($fh, my $buf, 1) || return; |
|
120
|
18
|
100
|
100
|
|
|
128
|
if ($buf eq "\xFF" || $buf eq "\xFE" || $buf eq "\xEF") { |
|
|
|
|
100
|
|
|
|
|
|
121
|
8
|
50
|
|
|
|
30
|
if (read($fh, $buf, 1, 1)) { |
|
122
|
8
|
100
|
66
|
|
|
55
|
if ($buf eq "\xFF\xFE") { |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
123
|
2
|
|
|
|
|
12
|
return 'UTF-16LE'; |
|
124
|
|
|
|
|
|
|
} elsif ($buf eq "\xFE\xFF") { |
|
125
|
2
|
|
|
|
|
11
|
return 'UTF-16BE'; |
|
126
|
|
|
|
|
|
|
} elsif ($buf eq "\xEF\xBB" and read($fh, $buf, 1, 2) and $buf eq "\xEF\xBB\xBF") { |
|
127
|
2
|
|
|
|
|
15
|
return 'UTF-8'; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# It wasn't a BOM. Try to undo our read. |
|
133
|
12
|
|
|
|
|
77
|
$self->_log->('debug', 'No BOM in stream, seeking back to start'); |
|
134
|
12
|
100
|
|
|
|
208
|
if (length $buf == 1) { |
|
|
|
100
|
|
|
|
|
|
|
135
|
10
|
|
|
|
|
196
|
$fh->ungetc(ord $buf); |
|
136
|
|
|
|
|
|
|
} elsif (!seek($fh, $fpos, 0)) { |
|
137
|
|
|
|
|
|
|
# Can't seek |
|
138
|
1
|
50
|
|
|
|
26
|
if ($fh->can('ungets')) { # support for FileHandle::Unget |
|
139
|
0
|
|
|
|
|
0
|
$fh->ungets($buf); |
|
140
|
|
|
|
|
|
|
} else { |
|
141
|
1
|
|
|
|
|
218
|
croak "Can't seek input handle after BOM detection; You should set an encoding manually, buffer the entire input, or use FileHandle::Unget"; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
} |
|
144
|
11
|
|
|
|
|
72
|
return; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# If you need to subclass this iterator, don't. Just implement your own. |
|
148
|
|
|
|
|
|
|
# i.e. I'm not declaring this implementation stable, yet. |
|
149
|
4
|
|
|
4
|
|
1476
|
use Data::TableReader::Iterator; |
|
|
4
|
|
|
|
|
11
|
|
|
|
4
|
|
|
|
|
163
|
|
|
150
|
4
|
|
|
4
|
|
1467
|
BEGIN { @Data::TableReader::Decoder::CSV::_Iter::ISA= ('Data::TableReader::Iterator'); } |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub Data::TableReader::Decoder::CSV::_Iter::position { |
|
153
|
12
|
|
|
12
|
|
34
|
my $f= shift->_fields; |
|
154
|
12
|
|
|
|
|
20
|
'row '.${ $f->{row} }; |
|
|
12
|
|
|
|
|
57
|
|
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub Data::TableReader::Decoder::CSV::_Iter::progress { |
|
158
|
0
|
|
|
0
|
|
0
|
my $f= shift->_fields; |
|
159
|
|
|
|
|
|
|
# lazy-build the file size, using seek |
|
160
|
0
|
0
|
|
|
|
0
|
unless (exists $f->{file_size}) { |
|
161
|
0
|
|
|
|
|
0
|
my $pos= tell $f->{fh}; |
|
162
|
0
|
0
|
0
|
|
|
0
|
if (defined $pos and $pos >= 0 and seek($f->{fh}, 0, 2)) { |
|
|
|
|
0
|
|
|
|
|
|
163
|
0
|
|
|
|
|
0
|
$f->{file_size}= tell($f->{fh}); |
|
164
|
0
|
0
|
|
|
|
0
|
seek($f->{fh}, $pos, 0) or die "seek: $!"; |
|
165
|
|
|
|
|
|
|
} else { |
|
166
|
0
|
|
|
|
|
0
|
$f->{file_size}= undef; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
} |
|
169
|
0
|
0
|
|
|
|
0
|
return $f->{file_size}? (tell $f->{fh})/$f->{file_size} : undef; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub Data::TableReader::Decoder::CSV::_Iter::tell { |
|
173
|
4
|
|
|
4
|
|
11
|
my $f= shift->_fields; |
|
174
|
4
|
|
|
|
|
11
|
my $pos= tell($f->{fh}); |
|
175
|
4
|
50
|
33
|
|
|
17
|
return undef unless defined $pos && $pos >= 0; |
|
176
|
4
|
|
|
|
|
10
|
return [ $pos, ${$f->{row}} ]; |
|
|
4
|
|
|
|
|
14
|
|
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub Data::TableReader::Decoder::CSV::_Iter::seek { |
|
180
|
7
|
|
|
7
|
|
25
|
my ($self, $to)= @_; |
|
181
|
7
|
|
|
|
|
39
|
my $f= $self->_fields; |
|
182
|
7
|
50
|
|
|
|
59
|
seek($f->{fh}, ($to? $to->[0] : $f->{origin}), 0) or croak("seek failed: $!"); |
|
|
|
50
|
|
|
|
|
|
|
183
|
7
|
50
|
|
|
|
21
|
${ $f->{row} }= $to? $to->[1] : 0; |
|
|
7
|
|
|
|
|
22
|
|
|
184
|
7
|
|
|
|
|
26
|
1; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
1; |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
__END__ |