File Coverage

blib/lib/App/RecordStream/Operation/fromcsv.pm
Criterion Covered Total %
statement 63 67 94.0
branch 11 14 78.5
condition 6 9 66.6
subroutine 9 10 90.0
pod 0 6 0.0
total 89 106 83.9


line stmt bran cond sub pod time code
1             package App::RecordStream::Operation::fromcsv;
2              
3             our $VERSION = "4.0.25";
4              
5 3     3   1978 use strict;
  3         5  
  3         64  
6              
7 3     3   11 use base qw(App::RecordStream::Operation);
  3         3  
  3         191  
8              
9 3     3   1531 use Text::CSV;
  3         35682  
  3         1400  
10              
11             sub init {
12 11     11 0 15 my $this = shift;
13 11         15 my $args = shift;
14              
15 11         14 my @fields;
16 11         13 my $header_line = undef;
17 11         13 my $strict = 0;
18 11         14 my $delim = ',';
19 11         13 my $escape = '"';
20 11         13 my $quote = '"';
21              
22             my $spec = {
23 3     3   1667 "keys|k|field|f=s" => sub { push @fields, split(/,/, $_[1]); },
24 11         58 "header" => \$header_line,
25             "strict" => \$strict,
26             "delim|d=s" => \$delim,
27             "escape=s" => \$escape,
28             "quote=s" => \$quote,
29             };
30              
31 11         43 $this->parse_options($args, $spec);
32              
33 11 50       26 die "Delimiter must be a single character\n\n"
34             unless length $delim == 1;
35              
36 11 50       54 my $csv_args = {
37             binary => 1,
38             eol => $/,
39             sep_char => $delim,
40             escape_char => $escape,
41              
42             # Text::CSV wants undef, but it's easier to pass the empty string at the shell.
43             quote_char => ($quote eq '' ? undef : $quote),
44             };
45              
46 11 100       21 if ( !$strict ) {
47 7         15 $csv_args->{'allow_whitespace'} = 1;
48 7         10 $csv_args->{'allow_loose_quotes'} = 1;
49 7         16 $csv_args->{'allow_loose_escapes'} = 1;
50             }
51              
52 11         21 $this->{'FIELDS'} = \@fields;
53 11         19 $this->{'HEADER_LINE'} = $header_line;
54 11         42 $this->{'PARSER'} = new Text::CSV($csv_args);
55 11         1670 $this->{'EXTRA_ARGS'} = $args;
56             }
57              
58             sub wants_input {
59 22     22 0 49 return 0;
60             }
61              
62             sub stream_done {
63 11     11 0 16 my $this = shift;
64              
65 11         16 my $files = $this->{'EXTRA_ARGS'};
66              
67 11 100       22 if ( scalar @$files > 0 ) {
68 2         7 foreach my $file ( @$files ) {
69 4         22 $this->update_current_filename($file);
70              
71 4 50       141 open(my $fh, '<', $file) or die "Could not open file: $!\n";
72 4         24 $this->get_records_from_handle($fh);
73 4         81 close $fh;
74             }
75             }
76             else {
77 9         18 $this->get_records_from_handle(\*STDIN);
78             }
79             }
80              
81             sub get_records_from_handle {
82 13     13 0 24 my ($this, $handle) = @_;
83              
84 13         21 my $parser = $this->{'PARSER'};
85 13         19 my $do_headers = $this->{'HEADER_LINE'};
86 13         16 my @fields = @{ $this->{'FIELDS'} };
  13         24  
87              
88 13         396 while(my $row = $parser->getline($handle)) {
89 25 100       743 if ( $do_headers ) {
90 3         7 push @fields, @$row;
91 3         4 $do_headers = 0;
92 3         45 next;
93             }
94              
95 22         47 my @values = @$row;
96              
97 22         93 my $record = App::RecordStream::Record->new();
98 22         50 for(my $i = 0; $i < @values; ++$i) {
99 62   100     157 my $key = $fields[$i] || $i;
100 62         83 ${$record->guess_key_from_spec($key)} = $values[$i];
  62         116  
101             }
102 22         69 $this->push_record($record);
103             }
104              
105             # Parsing was a success only if we reached EOF and we got no error. Code
106             # 2012 is used by Text::CSV_XS for normal EOF condition.
107 13         318 my ($code, $msg, $pos) = $parser->error_diag;
108 13 100 33     326 unless ($parser->eof and ($code == 0 or $code == 2012)) {
      66        
109 2         15 my ($line, $file) = ($., $this->get_current_filename);
110 2         36 die "fromcsv: parse error: $msg ($code)",
111             ", roughly at position $pos, line $line, file $file\n";
112             }
113             }
114              
115             sub add_help_types {
116 11     11 0 17 my $this = shift;
117 11         29 $this->use_help_type('keyspecs');
118             }
119              
120             sub usage
121             {
122 0     0 0   my $this = shift;
123              
124 0           my $options = [
125             [ 'key|k ', 'Comma separated list of field names. May be specified multiple times, may be key specs' ],
126             [ 'header', 'Take field names from the first line of input' ],
127             [ 'strict', 'Do not trim whitespaces, allow loose quoting (quotes inside quotes), or allow the use of escape characters when not strictly needed. (not recommended, for most cases, though may help with parsing quoted fields containing newlines)' ],
128             [ 'delim|-d ', "Field delimiter to use when reading input lines (default ',')."],
129             [ 'escape ', "Escape character used in quoted fields (default '\x22')."],
130             [ 'quote ', "Quote character used in quoted fields (default '\x22'). Use the empty string to indicate no quoted fields."],
131             ];
132              
133 0           my $args_string = $this->options_string($options);
134              
135 0           return <
136             Usage: recs-fromcsv []
137             __FORMAT_TEXT__
138             Each line of input (or lines of ) is split on commas to
139             produce an output record. Fields are named numerically (0, 1, etc.), or as
140             given by --field, or as read by --header. Lines may be split on delimiters
141             other than commas by providing --delim.
142             __FORMAT_TEXT__
143              
144             Arguments:
145             $args_string
146              
147             Examples:
148             Parse csv separated fields x and y.
149             recs-fromcsv --field x,y
150             Parse data with a header line specifying fields
151             recs-fromcsv --header
152             Parse tsv data (using bash syntax for a literal tab)
153             recs-fromcsv --delim \$'\\t'
154             USAGE
155             }
156              
157             1;