line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::RecordStream::Operation::fromcsv; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = "4.0.25"; |
4
|
|
|
|
|
|
|
|
5
|
3
|
|
|
3
|
|
1978
|
use strict; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
64
|
|
6
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
11
|
use base qw(App::RecordStream::Operation); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
191
|
|
8
|
|
|
|
|
|
|
|
9
|
3
|
|
|
3
|
|
1531
|
use Text::CSV; |
|
3
|
|
|
|
|
35682
|
|
|
3
|
|
|
|
|
1400
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub init { |
12
|
11
|
|
|
11
|
0
|
15
|
my $this = shift; |
13
|
11
|
|
|
|
|
15
|
my $args = shift; |
14
|
|
|
|
|
|
|
|
15
|
11
|
|
|
|
|
14
|
my @fields; |
16
|
11
|
|
|
|
|
13
|
my $header_line = undef; |
17
|
11
|
|
|
|
|
13
|
my $strict = 0; |
18
|
11
|
|
|
|
|
14
|
my $delim = ','; |
19
|
11
|
|
|
|
|
13
|
my $escape = '"'; |
20
|
11
|
|
|
|
|
13
|
my $quote = '"'; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $spec = { |
23
|
3
|
|
|
3
|
|
1667
|
"keys|k|field|f=s" => sub { push @fields, split(/,/, $_[1]); }, |
24
|
11
|
|
|
|
|
58
|
"header" => \$header_line, |
25
|
|
|
|
|
|
|
"strict" => \$strict, |
26
|
|
|
|
|
|
|
"delim|d=s" => \$delim, |
27
|
|
|
|
|
|
|
"escape=s" => \$escape, |
28
|
|
|
|
|
|
|
"quote=s" => \$quote, |
29
|
|
|
|
|
|
|
}; |
30
|
|
|
|
|
|
|
|
31
|
11
|
|
|
|
|
43
|
$this->parse_options($args, $spec); |
32
|
|
|
|
|
|
|
|
33
|
11
|
50
|
|
|
|
26
|
die "Delimiter must be a single character\n\n" |
34
|
|
|
|
|
|
|
unless length $delim == 1; |
35
|
|
|
|
|
|
|
|
36
|
11
|
50
|
|
|
|
54
|
my $csv_args = { |
37
|
|
|
|
|
|
|
binary => 1, |
38
|
|
|
|
|
|
|
eol => $/, |
39
|
|
|
|
|
|
|
sep_char => $delim, |
40
|
|
|
|
|
|
|
escape_char => $escape, |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Text::CSV wants undef, but it's easier to pass the empty string at the shell. |
43
|
|
|
|
|
|
|
quote_char => ($quote eq '' ? undef : $quote), |
44
|
|
|
|
|
|
|
}; |
45
|
|
|
|
|
|
|
|
46
|
11
|
100
|
|
|
|
21
|
if ( !$strict ) { |
47
|
7
|
|
|
|
|
15
|
$csv_args->{'allow_whitespace'} = 1; |
48
|
7
|
|
|
|
|
10
|
$csv_args->{'allow_loose_quotes'} = 1; |
49
|
7
|
|
|
|
|
16
|
$csv_args->{'allow_loose_escapes'} = 1; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
11
|
|
|
|
|
21
|
$this->{'FIELDS'} = \@fields; |
53
|
11
|
|
|
|
|
19
|
$this->{'HEADER_LINE'} = $header_line; |
54
|
11
|
|
|
|
|
42
|
$this->{'PARSER'} = new Text::CSV($csv_args); |
55
|
11
|
|
|
|
|
1670
|
$this->{'EXTRA_ARGS'} = $args; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub wants_input { |
59
|
22
|
|
|
22
|
0
|
49
|
return 0; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub stream_done { |
63
|
11
|
|
|
11
|
0
|
16
|
my $this = shift; |
64
|
|
|
|
|
|
|
|
65
|
11
|
|
|
|
|
16
|
my $files = $this->{'EXTRA_ARGS'}; |
66
|
|
|
|
|
|
|
|
67
|
11
|
100
|
|
|
|
22
|
if ( scalar @$files > 0 ) { |
68
|
2
|
|
|
|
|
7
|
foreach my $file ( @$files ) { |
69
|
4
|
|
|
|
|
22
|
$this->update_current_filename($file); |
70
|
|
|
|
|
|
|
|
71
|
4
|
50
|
|
|
|
141
|
open(my $fh, '<', $file) or die "Could not open file: $!\n"; |
72
|
4
|
|
|
|
|
24
|
$this->get_records_from_handle($fh); |
73
|
4
|
|
|
|
|
81
|
close $fh; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
else { |
77
|
9
|
|
|
|
|
18
|
$this->get_records_from_handle(\*STDIN); |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub get_records_from_handle { |
82
|
13
|
|
|
13
|
0
|
24
|
my ($this, $handle) = @_; |
83
|
|
|
|
|
|
|
|
84
|
13
|
|
|
|
|
21
|
my $parser = $this->{'PARSER'}; |
85
|
13
|
|
|
|
|
19
|
my $do_headers = $this->{'HEADER_LINE'}; |
86
|
13
|
|
|
|
|
16
|
my @fields = @{ $this->{'FIELDS'} }; |
|
13
|
|
|
|
|
24
|
|
87
|
|
|
|
|
|
|
|
88
|
13
|
|
|
|
|
396
|
while(my $row = $parser->getline($handle)) { |
89
|
25
|
100
|
|
|
|
743
|
if ( $do_headers ) { |
90
|
3
|
|
|
|
|
7
|
push @fields, @$row; |
91
|
3
|
|
|
|
|
4
|
$do_headers = 0; |
92
|
3
|
|
|
|
|
45
|
next; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
22
|
|
|
|
|
47
|
my @values = @$row; |
96
|
|
|
|
|
|
|
|
97
|
22
|
|
|
|
|
93
|
my $record = App::RecordStream::Record->new(); |
98
|
22
|
|
|
|
|
50
|
for(my $i = 0; $i < @values; ++$i) { |
99
|
62
|
|
100
|
|
|
157
|
my $key = $fields[$i] || $i; |
100
|
62
|
|
|
|
|
83
|
${$record->guess_key_from_spec($key)} = $values[$i]; |
|
62
|
|
|
|
|
116
|
|
101
|
|
|
|
|
|
|
} |
102
|
22
|
|
|
|
|
69
|
$this->push_record($record); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Parsing was a success only if we reached EOF and we got no error. Code |
106
|
|
|
|
|
|
|
# 2012 is used by Text::CSV_XS for normal EOF condition. |
107
|
13
|
|
|
|
|
318
|
my ($code, $msg, $pos) = $parser->error_diag; |
108
|
13
|
100
|
33
|
|
|
326
|
unless ($parser->eof and ($code == 0 or $code == 2012)) { |
|
|
|
66
|
|
|
|
|
109
|
2
|
|
|
|
|
15
|
my ($line, $file) = ($., $this->get_current_filename); |
110
|
2
|
|
|
|
|
36
|
die "fromcsv: parse error: $msg ($code)", |
111
|
|
|
|
|
|
|
", roughly at position $pos, line $line, file $file\n"; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub add_help_types { |
116
|
11
|
|
|
11
|
0
|
17
|
my $this = shift; |
117
|
11
|
|
|
|
|
29
|
$this->use_help_type('keyspecs'); |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub usage |
121
|
|
|
|
|
|
|
{ |
122
|
0
|
|
|
0
|
0
|
|
my $this = shift; |
123
|
|
|
|
|
|
|
|
124
|
0
|
|
|
|
|
|
my $options = [ |
125
|
|
|
|
|
|
|
[ 'key|k ', 'Comma separated list of field names. May be specified multiple times, may be key specs' ], |
126
|
|
|
|
|
|
|
[ 'header', 'Take field names from the first line of input' ], |
127
|
|
|
|
|
|
|
[ 'strict', 'Do not trim whitespaces, allow loose quoting (quotes inside quotes), or allow the use of escape characters when not strictly needed. (not recommended, for most cases, though may help with parsing quoted fields containing newlines)' ], |
128
|
|
|
|
|
|
|
[ 'delim|-d ', "Field delimiter to use when reading input lines (default ',')."], |
129
|
|
|
|
|
|
|
[ 'escape ', "Escape character used in quoted fields (default '\x22')."], |
130
|
|
|
|
|
|
|
[ 'quote ', "Quote character used in quoted fields (default '\x22'). Use the empty string to indicate no quoted fields."], |
131
|
|
|
|
|
|
|
]; |
132
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
|
my $args_string = $this->options_string($options); |
134
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
return <
|
136
|
|
|
|
|
|
|
Usage: recs-fromcsv [] |
137
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
138
|
|
|
|
|
|
|
Each line of input (or lines of ) is split on commas to |
139
|
|
|
|
|
|
|
produce an output record. Fields are named numerically (0, 1, etc.), or as |
140
|
|
|
|
|
|
|
given by --field, or as read by --header. Lines may be split on delimiters |
141
|
|
|
|
|
|
|
other than commas by providing --delim. |
142
|
|
|
|
|
|
|
__FORMAT_TEXT__ |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Arguments: |
145
|
|
|
|
|
|
|
$args_string |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Examples: |
148
|
|
|
|
|
|
|
Parse csv separated fields x and y. |
149
|
|
|
|
|
|
|
recs-fromcsv --field x,y |
150
|
|
|
|
|
|
|
Parse data with a header line specifying fields |
151
|
|
|
|
|
|
|
recs-fromcsv --header |
152
|
|
|
|
|
|
|
Parse tsv data (using bash syntax for a literal tab) |
153
|
|
|
|
|
|
|
recs-fromcsv --delim \$'\\t' |
154
|
|
|
|
|
|
|
USAGE |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
1; |