File Coverage

blib/lib/App/CSVUtils.pm
Criterion Covered Total %
statement 727 868 83.7
branch 182 322 56.5
condition 63 122 51.6
subroutine 111 122 90.9
pod 3 3 100.0
total 1086 1437 75.5


line stmt bran cond sub pod time code
1             package App::CSVUtils;
2              
3 1     1   110261 use 5.010001;
  1         12  
4 1     1   5 use strict;
  1         2  
  1         19  
5 1     1   4 use warnings;
  1         2  
  1         22  
6 1     1   2431 use Log::ger;
  1         54  
  1         4  
7              
8 1     1   287 use Cwd;
  1         1  
  1         64  
9 1     1   6 use Exporter qw(import);
  1         2  
  1         621  
10              
11             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
12             our $DATE = '2023-04-01'; # DATE
13             our $DIST = 'App-CSVUtils'; # DIST
14             our $VERSION = '1.024'; # VERSION
15              
16             our @EXPORT_OK = qw(
17             gen_csv_util
18             compile_eval_code
19             eval_code
20             );
21              
22             our %SPEC;
23              
24             our $sch_req_str_or_code = ['any*', of=>['str*', 'code*']];
25              
26             sub _open_file_read {
27 129     129   205 my $filename = shift;
28              
29 129         179 my ($fh, $err);
30 129 50       275 if ($filename eq '-') {
31 0         0 $fh = *STDIN;
32             } else {
33 129 50       5004 open $fh, "<", $filename or do {
34 0         0 $err = [500, "Can't open input filename '$filename': $!"];
35 0         0 goto RETURN;
36             };
37             }
38 129         1643 binmode $fh, ":encoding(utf8)";
39              
40 129         5268 RETURN:
41             ($fh, $err);
42             }
43              
44             sub _open_file_write {
45 2     2   4 my $filename = shift;
46              
47 2         5 my ($fh, $err);
48 2 50       7 if ($filename eq '-') {
49 0         0 $fh = *STDOUT;
50             } else {
51 2 50       128 open $fh, ">", $filename or do {
52 0         0 $err = [500, "Can't open output filename '$filename': $!"];
53 0         0 goto RETURN;
54             };
55             }
56 2         27 binmode $fh, ":encoding(utf8)";
57              
58 2         82 RETURN:
59             ($fh, $err);
60             }
61              
62             sub _return_or_write_file {
63 0     0   0 my ($res, $filename, $overwrite) = @_;
64 0 0       0 return $res if !defined($filename);
65              
66 0         0 my $fh;
67 0 0       0 if ($filename eq '-') {
68 0         0 $fh = \*STDOUT;
69             } else {
70 0 0       0 if (-f $filename) {
71 0 0       0 if ($overwrite) {
72 0         0 log_info "[csvutil] Overwriting output file $filename";
73             } else {
74 0         0 return [412, "Refusing to ovewrite existing output file '$filename', please select another path or specify --overwrite"];
75             }
76             }
77 0 0       0 open my $fh, ">", $filename or do {
78 0         0 return [500, "Can't open output file '$filename': $!"];
79             };
80 0         0 binmode $fh, ":encoding(utf8)";
81 0         0 print $fh $res->[2];
82 0 0       0 close $fh or warn "Can't write to '$filename': $!";
83 0         0 return [$res->[0], $res->[1]];
84             }
85             }
86              
87             sub compile_eval_code {
88 44 100   44 1 114 return $_[0] if ref $_[0] eq 'CODE';
89 43         107 my ($str, $label) = @_;
90 43 50 33     172 defined($str) && length($str) or die [400, "Please specify code ($label)"];
91 43         98 $str = "package main; no strict; no warnings; sub { $str }";
92 43         128 log_trace "[csvutil] Compiling Perl code: $str";
93 1     1   7 my $code = eval $str; ## no critic: BuiltinFunctions::ProhibitStringyEval
  1     1   2  
  1     1   40  
  1     1   6  
  1     1   2  
  1     1   54  
  1     1   8  
  1     1   2  
  1     1   48  
  1     1   8  
  1     1   2  
  1     1   51  
  1     1   7  
  1     1   2  
  1     1   41  
  1     1   7  
  1     1   2  
  1     1   64  
  1     1   6  
  1     1   8  
  1     1   23  
  1     1   5  
  1     1   2  
  1     1   50  
  1     1   7  
  1     1   2  
  1     1   23  
  1     1   5  
  1     1   2  
  1     1   50  
  1     1   7  
  1     1   10  
  1     1   36  
  1     1   7  
  1     1   1  
  1     1   52  
  1     1   7  
  1     1   3  
  1     1   27  
  1     1   5  
  1     1   2  
  1     1   52  
  1     1   7  
  1     1   3  
  1     1   35  
  1     1   6  
  1     1   2  
  1     1   40  
  1     1   7  
  1     1   2  
  1     1   35  
  1     1   6  
  1     1   3  
  1     1   40  
  1     1   7  
  1     1   2  
  1     1   35  
  1     1   5  
  1     1   3  
  1     1   47  
  1     1   7  
  1     1   2  
  1     1   33  
  1     1   6  
  1     1   2  
  1     1   53  
  1     1   816  
  1     1   3  
  1     1   31  
  1     1   5  
  1     1   2  
  1     1   43  
  1     1   6  
  1     1   3  
  1     1   23  
  1     1   4  
  1     1   2  
  1     1   61  
  1     1   6  
  1     1   2  
  1     1   23  
  1     1   5  
  1     1   2  
  1     1   59  
  1     1   7  
  1     1   111  
  1         36  
  1         7  
  1         2  
  1         129  
  1         7  
  1         2  
  1         35  
  1         6  
  1         2  
  1         49  
  1         7  
  1         2  
  1         37  
  1         6  
  1         3  
  1         50  
  1         7  
  1         2  
  1         28  
  1         5  
  1         3  
  1         57  
  1         7  
  1         2  
  1         26  
  1         5  
  1         2  
  1         68  
  1         7  
  1         14  
  1         35  
  1         6  
  1         2  
  1         82  
  1         7  
  1         1  
  1         35  
  1         7  
  1         6  
  1         58  
  1         7  
  1         3  
  1         34  
  1         6  
  1         2  
  1         65  
  1         6  
  1         3  
  1         45  
  1         8  
  1         2  
  1         54  
  1         6  
  1         2  
  1         20  
  1         4  
  1         2  
  1         60  
  1         8  
  1         3  
  1         26  
  1         15  
  1         2  
  1         53  
  1         6  
  1         2  
  1         20  
  1         4  
  1         2  
  1         51  
  1         9  
  1         2  
  1         25  
  1         5  
  1         1  
  1         52  
  1         6  
  1         2  
  1         26  
  1         5  
  1         2  
  1         52  
  1         7  
  1         2  
  1         25  
  1         5  
  1         2  
  1         68  
  1         6  
  1         3  
  1         24  
  1         5  
  1         2  
  1         48  
  1         7  
  1         3  
  1         25  
  1         5  
  1         2  
  1         55  
  1         7  
  1         2  
  1         25  
  1         5  
  1         2  
  1         56  
  1         7  
  1         4  
  1         45  
  1         7  
  1         2  
  1         45  
  1         7  
  1         3  
  1         35  
  1         6  
  1         2  
  1         59  
  1         7  
  1         2  
  1         25  
  1         5  
  1         11  
  1         59  
  1         8  
  1         2  
  1         41  
  1         6  
  1         2  
  1         48  
  1         7  
  1         6  
  1         24  
  1         5  
  1         2  
  1         58  
  1         7  
  1         2  
  1         25  
  1         5  
  1         2  
  1         54  
  1         6  
  1         2  
  1         39  
  1         5  
  1         2  
  1         78  
  1         13  
  1         2  
  1         36  
  1         6  
  1         2  
  1         72  
  1         6  
  1         3  
  1         26  
  1         4  
  1         3  
  1         84  
  1         7  
  1         2  
  1         48  
  1         11  
  1         2  
  1         75  
  1         8  
  1         2  
  1         28  
  1         5  
  1         1  
  1         90  
  43         3451  
94 43 100       171 die [400, "Can't compile code ($label) '$str': $@"] if $@;
95 40         124 $code;
96             }
97              
98             sub eval_code {
99 1     1   7 no warnings 'once';
  1         2  
  1         7515  
100 60     60 1 133 my ($code, $r, $value_for_topic, $return_topic) = @_;
101 60         96 local $_ = $value_for_topic;
102 60         82 local $main::r = $r;
103 60         86 local $main::row = $r->{input_row};
104 60         81 local $main::rownum = $r->{input_rownum};
105 60         85 local $main::data_rownum = $r->{input_data_rownum};
106 60         81 local $main::csv = $r->{input_parser};
107 60         84 local $main::fields_idx = $r->{input_fields_idx};
108 60 100       99 if ($return_topic) {
109 9         187 $code->($_);
110 9         30 $_;
111             } else {
112 51         1030 $code->($_);
113             }
114             }
115              
116             sub _get_field_idx {
117 0     0   0 my ($field, $field_idxs) = @_;
118 0 0 0     0 defined($field) && length($field) or die "Please specify at least a field\n";
119 0         0 my $idx = $field_idxs->{$field};
120             die "Unknown field '$field' (known fields include: ".
121 0 0       0 join(", ", map { "'$_'" } sort {$field_idxs->{$a} <=> $field_idxs->{$b}}
  0         0  
  0         0  
122             keys %$field_idxs).")\n" unless defined $idx;
123 0         0 $idx;
124             }
125              
126             sub _get_csv_row {
127 0     0   0 my ($csv, $row, $i, $outputs_header) = @_;
128             #use DD; print " "; dd $row;
129 0 0 0     0 return "" if $i == 1 && !$outputs_header;
130 0 0       0 my $status = $csv->combine(@$row)
131             or die "Error in line $i: ".$csv->error_input."\n";
132 0         0 $csv->string . "\n";
133             }
134              
135             sub _instantiate_parser_default {
136 0     0   0 require Text::CSV_XS;
137              
138 0         0 Text::CSV_XS->new({binary=>1});
139             }
140              
141             sub _instantiate_parser {
142 195     195   1936 require Text::CSV_XS;
143              
144 195         12911 my ($args, $prefix) = @_;
145 195   50     397 $prefix //= '';
146              
147 195         418 my %tcsv_opts = (binary=>1);
148 195 100 66     1480 if (defined $args->{"${prefix}sep_char"} ||
    100 66        
149             defined $args->{"${prefix}quote_char"} ||
150             defined $args->{"${prefix}escape_char"}) {
151 1 50       7 $tcsv_opts{"sep_char"} = $args->{"${prefix}sep_char"} if defined $args->{"${prefix}sep_char"};
152 1 50       6 $tcsv_opts{"quote_char"} = $args->{"${prefix}quote_char"} if defined $args->{"${prefix}quote_char"};
153 1 50       4 $tcsv_opts{"escape_char"} = $args->{"${prefix}escape_char"} if defined $args->{"${prefix}escape_char"};
154             } elsif ($args->{"${prefix}tsv"}) {
155 1         3 $tcsv_opts{"sep_char"} = "\t";
156 1         2 $tcsv_opts{"quote_char"} = undef;
157 1         2 $tcsv_opts{"escape_char"} = undef;
158             }
159 195 100       453 $tcsv_opts{always_quote} = 1 if $args->{"${prefix}always_quote"};
160 195 50       386 $tcsv_opts{quote_empty} = 1 if $args->{"${prefix}quote_empty"};
161              
162 195         758 Text::CSV_XS->new(\%tcsv_opts);
163             }
164              
165             sub _instantiate_emitter {
166 85     85   130 my $args = shift;
167 85         198 _instantiate_parser($args, 'output_');
168             }
169              
170             sub _complete_field_or_field_list {
171             # return list of known fields of a CSV
172              
173 0     0   0 my $which = shift;
174              
175 0         0 my %args = @_;
176 0   0     0 my $word = $args{word} // '';
177 0         0 my $cmdline = $args{cmdline};
178 0         0 my $r = $args{r};
179              
180             # we are not called from cmdline, bail
181 0 0       0 return undef unless $cmdline; ## no critic: Subroutines::ProhibitExplicitReturnUndef
182              
183             # let's parse argv first
184 0         0 my $args;
185             {
186             # this is not activated yet
187 0         0 $r->{read_config} = 1;
  0         0  
188              
189 0         0 my $res = $cmdline->parse_argv($r);
190             #return undef unless $res->[0] == 200;
191              
192 0 0       0 $cmdline->_read_config($r) unless $r->{config};
193 0         0 $args = $res->[2];
194             }
195              
196             # user hasn't specified -f, bail
197 0 0 0     0 return {message=>"Please specify input filename first"} unless defined $args && $args->{input_filename};
198              
199             # user wants to read CSV from stdin, bail
200 0 0       0 return {message=>"Can't get field list when input is stdin"} if $args->{input_filename} eq '-';
201              
202             # can the file be opened?
203 0         0 my $csv_parser = _instantiate_parser(\%args, 'input_');
204 0 0       0 open my($fh), "<encoding(utf8)", $args->{input_filename} or do {
205             #warn "csvutils: Cannot open file '$args->{input_filename}': $!\n";
206 0         0 return [];
207             };
208              
209             # can the header row be read?
210 0 0       0 my $row = $csv_parser->getline($fh) or return [];
211              
212 0 0 0     0 if (defined $args->{input_header} && !$args->{input_header}) {
213 0         0 $row = [map {"field$_"} 1 .. @$row];
  0         0  
214             }
215              
216 0 0       0 if ($which =~ /sort/) {
217 0         0 $row = [map {($_,"-$_","+$_","~$_")} @$row];
  0         0  
218             }
219              
220 0         0 require Complete::Util;
221 0 0       0 if ($which =~ /field_list/) {
222 0         0 return Complete::Util::complete_comma_sep(
223             word => $word,
224             elems => $row,
225             uniq => 1,
226             );
227             } else {
228 0         0 return Complete::Util::complete_array_elem(
229             word => $word,
230             array => $row,
231             );
232             }
233             }
234              
235             sub _complete_field {
236 0     0   0 _complete_field_or_field_list('field', @_);
237             }
238              
239             sub _complete_field_list {
240 0     0   0 _complete_field_or_field_list('field_list', @_);
241             }
242              
243             sub _complete_sort_field_list {
244 0     0   0 _complete_field_or_field_list('sort_field_list', @_);
245             }
246              
247             sub _complete_sort_field {
248 0     0   0 _complete_field_or_field_list('sort_field', @_);
249             }
250              
251             sub _array2hash {
252 0     0   0 my ($row, $fields) = @_;
253 0         0 my $rowhash = {};
254 0         0 for my $i (0..$#{$fields}) {
  0         0  
255 0         0 $rowhash->{ $fields->[$i] } = $row->[$i];
256             }
257 0         0 $rowhash;
258             }
259              
260             # check that the first N values of a field are all defined and numeric. if there
261             # are now rows or less than N values, return true.
262             sub _is_numeric_field {
263 0     0   0 require Scalar::Util::Numeric;
264              
265 0         0 my ($rows, $field_idx, $num_samples) = @_;
266 0   0     0 $num_samples //= 5;
267              
268 0         0 my $is_numeric = 1;
269 0         0 for my $row (@$rows) {
270 0         0 my $val = $row->[$field_idx];
271 0 0       0 return 0 unless defined $val;
272 0 0       0 return 0 unless Scalar::Util::Numeric::isnum($val);
273             }
274 0         0 $is_numeric;
275             }
276              
277             # find a single field by name or index (1-based), return index (0-based). die
278             # when requested field does not exist.
279             sub _find_field {
280 17     17   38 my ($fields, $name_or_idx) = @_;
281              
282             # search by name first
283 17         27 for my $i (0 .. $#{$fields}) {
  17         39  
284 29         54 my $field = $fields->[$i];
285 29 100       92 return $i if $field eq $name_or_idx;
286             }
287              
288 2 50       23 if ($name_or_idx eq '0') {
    100          
    50          
289 0         0 die [400, "Field index 0 is requested, you probably meant 1 for the first field?"];
290             } elsif ($name_or_idx =~ /\A[1-9][0-9]*\z/) {
291 1 50       5 if ($name_or_idx > @$fields) {
292 0         0 die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
293             } else {
294 1         5 return $name_or_idx-1;
295             }
296             } elsif ($name_or_idx =~ /\A-[1-9][0-9]*\z/) {
297 0 0       0 if (-$name_or_idx > @$fields) {
298 0         0 die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
299             } else {
300 0         0 return @$fields + $name_or_idx;
301             }
302             }
303              
304             # not found
305             die [404, "Unknown field name/index '$name_or_idx' (known fields include: ".
306 1         6 join(", ", map { "'$_'" } @$fields).")"];
  3         19  
307             }
308              
309             # select one or more fields with options like --include-field, etc
310             sub _select_fields {
311 12     12   34 my ($fields, $field_idxs, $args, $default_select_choice) = @_;
312              
313 12         22 my @selected_fields;
314              
315             my $select_field_options_used;
316              
317 12 100       31 if (defined $args->{include_field_pat}) {
318 4         18 $select_field_options_used++;
319 4         11 for my $field (@$fields) {
320 10 50       45 if ($field =~ $args->{include_field_pat}) {
321 10         24 push @selected_fields, $field;
322             }
323             }
324             }
325 12 100       34 if (defined $args->{exclude_field_pat}) {
326 1         6 $select_field_options_used++;
327 1         14 @selected_fields = grep { $_ !~ $args->{exclude_field_pat} }
  3         16  
328             @selected_fields;
329             }
330 12 100       28 if (defined $args->{include_fields}) {
331 8         16 $select_field_options_used++;
332             FIELD:
333 8         14 for my $field (@{ $args->{include_fields} }) {
  8         19  
334 13 100       31 unless (defined $field_idxs->{$field}) {
335 4 100       24 return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
336 2         7 next FIELD;
337             }
338 9 50       26 next if grep { $field eq $_ } @selected_fields;
  3         12  
339 9         21 push @selected_fields, $field;
340             }
341             }
342 10 100       29 if (defined $args->{exclude_fields}) {
343 2         3 $select_field_options_used++;
344             FIELD:
345 2         6 for my $field (@{ $args->{exclude_fields} }) {
  2         6  
346 2 50       7 unless (defined $field_idxs->{$field}) {
347 0 0       0 return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
348 0         0 next FIELD;
349             }
350 2         6 @selected_fields = grep { $field ne $_ } @selected_fields;
  6         16  
351             }
352             }
353              
354 10 50 33     28 if (!$select_field_options_used && $default_select_choice) {
355 0 0       0 if ($default_select_choice eq 'all') {
    0          
    0          
    0          
356 0         0 @selected_fields = @$fields;
357             } elsif ($default_select_choice eq 'first') {
358 0 0       0 @selected_fields = ($fields->[0]) if @$fields;
359             } elsif ($default_select_choice eq 'last') {
360 0 0       0 @selected_fields = ($fields->[-1]) if @$fields;
361             } elsif ($default_select_choice eq 'first-if-only-field') {
362 0 0       0 @selected_fields = ($fields->[0]) if @$fields == 1;
363             }
364             }
365              
366 10 100       21 if ($args->{show_selected_fields}) {
367 1         7 return [200, "OK", \@selected_fields];
368             }
369              
370             #my %selected_field_idxs;
371             #$selected_field_idxs{$_} = $fields_idx->{$_} for @selected_fields;
372              
373 9         15 my @selected_field_idxs_array;
374 9         27 push @selected_field_idxs_array, $field_idxs->{$_} for @selected_fields;
375              
376 9         47 [100, "Continue", [\@selected_fields, \@selected_field_idxs_array]];
377             }
378              
379             our $xcomp_csvfiles = [filename => {file_ext_filter => qr/^[tc]sv$/i}];
380              
381             our %argspecs_csv_input = (
382             input_header => {
383             summary => 'Specify whether input CSV has a header row',
384             'summary.alt.bool.not' => 'Specify that input CSV does not have a header row',
385             schema => 'bool*',
386             default => 1,
387             description => <<'_',
388              
389             By default, the first row of the input CSV will be assumed to contain field
390             names (and the second row contains the first data row). When you declare that
391             input CSV does not have header row (`--no-input-header`), the first row of the
392             CSV is assumed to contain the first data row. Fields will be named `field1`,
393             `field2`, and so on.
394              
395             _
396             cmdline_aliases => {
397             },
398             tags => ['category:input'],
399             },
400             input_tsv => {
401             summary => "Inform that input file is in TSV (tab-separated) format instead of CSV",
402             schema => 'true*',
403             description => <<'_',
404              
405             Overriden by `--input-sep-char`, `--input-quote-char`, `--input-escape-char`
406             options. If one of those options is specified, then `--input-tsv` will be
407             ignored.
408              
409             _
410             tags => ['category:input'],
411             },
412             input_sep_char => {
413             summary => 'Specify field separator character in input CSV, will be passed to Text::CSV_XS',
414             schema => ['str*', len=>1],
415             description => <<'_',
416              
417             Defaults to `,` (comma). Overrides `--input-tsv` option.
418              
419             _
420             tags => ['category:input'],
421             },
422             input_quote_char => {
423             summary => 'Specify field quote character in input CSV, will be passed to Text::CSV_XS',
424             schema => ['str*', len=>1],
425             description => <<'_',
426              
427             Defaults to `"` (double quote). Overrides `--input-tsv` option.
428              
429             _
430             tags => ['category:input'],
431             },
432             input_escape_char => {
433             summary => 'Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS',
434             schema => ['str*', len=>1],
435             description => <<'_',
436              
437             Defaults to `\\` (backslash). Overrides `--input-tsv` option.
438              
439             _
440             tags => ['category:input'],
441             },
442             );
443              
444             our %argspecs_csv_output = (
445             output_header => {
446             summary => 'Whether output CSV should have a header row',
447             schema => 'bool*',
448             description => <<'_',
449              
450             By default, a header row will be output *if* input CSV has header row. Under
451             `--output-header`, a header row will be output even if input CSV does not have
452             header row (value will be something like "col0,col1,..."). Under
453             `--no-output-header`, header row will *not* be printed even if input CSV has
454             header row. So this option can be used to unconditionally add or remove header
455             row.
456              
457             _
458             tags => ['category:output'],
459             },
460             output_tsv => {
461             summary => "Inform that output file is TSV (tab-separated) format instead of CSV",
462             schema => 'bool*',
463             description => <<'_',
464              
465             This is like `--input-tsv` option but for output instead of input.
466              
467             Overriden by `--output-sep-char`, `--output-quote-char`, `--output-escape-char`
468             options. If one of those options is specified, then `--output-tsv` will be
469             ignored.
470              
471             _
472             tags => ['category:output'],
473             },
474             output_sep_char => {
475             summary => 'Specify field separator character in output CSV, will be passed to Text::CSV_XS',
476             schema => ['str*', len=>1],
477             description => <<'_',
478              
479             This is like `--input-sep-char` option but for output instead of input.
480              
481             Defaults to `,` (comma). Overrides `--output-tsv` option.
482              
483             _
484             tags => ['category:output'],
485             },
486             output_quote_char => {
487             summary => 'Specify field quote character in output CSV, will be passed to Text::CSV_XS',
488             schema => ['str*', len=>1],
489             description => <<'_',
490              
491             This is like `--input-quote-char` option but for output instead of input.
492              
493             Defaults to `"` (double quote). Overrides `--output-tsv` option.
494              
495             _
496             tags => ['category:output'],
497             },
498             output_escape_char => {
499             summary => 'Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS',
500             schema => ['str*', len=>1],
501             description => <<'_',
502              
503             This is like `--input-escape-char` option but for output instead of input.
504              
505             Defaults to `\\` (backslash). Overrides `--output-tsv` option.
506              
507             _
508             tags => ['category:output'],
509             },
510             output_always_quote => {
511             summary => 'Whether to always quote values',
512             schema => 'bool*',
513             default => 0,
514             description => <<'_',
515              
516             When set to false (the default), values are quoted only when necessary:
517              
518             field1,field2,"field three contains comma (,)",field4
519              
520             When set to true, then all values will be quoted:
521              
522             "field1","field2","field three contains comma (,)","field4"
523              
524             _
525             tags => ['category:output'],
526             },
527             output_quote_empty => {
528             summary => 'Whether to quote empty values',
529             schema => 'bool*',
530             default => 0,
531             description => <<'_',
532              
533             When set to false (the default), empty values are not quoted:
534              
535             field1,field2,,field4
536              
537             When set to true, then empty values will be quoted:
538              
539             field1,field2,"",field4
540              
541             _
542             tags => ['category:output'],
543             },
544             );
545              
546             our %argspecopt_input_filename = (
547             input_filename => {
548             summary => 'Input CSV file',
549             description => <<'_',
550              
551             Use `-` to read from stdin.
552              
553             Encoding of input file is assumed to be UTF-8.
554              
555             _
556             schema => 'filename*',
557             default => '-',
558             'x.completion' => $xcomp_csvfiles,
559             tags => ['category:input'],
560             },
561             );
562              
563             our %argspecopt_input_filenames = (
564             input_filenames => {
565             'x.name.is_plural' => 1,
566             'x.name.singular' => 'input_filename',
567             summary => 'Input CSV files',
568             description => <<'_',
569              
570             Use `-` to read from stdin.
571              
572             Encoding of input file is assumed to be UTF-8.
573              
574             _
575             schema => ['array*', of=>'filename*'],
576             default => ['-'],
577             'x.completion' => $xcomp_csvfiles,
578             tags => ['category:input'],
579             },
580             );
581              
582             our %argspecopt_overwrite = (
583             overwrite => {
584             summary => 'Whether to override existing output file',
585             schema => 'bool*',
586             cmdline_aliases=>{O=>{}},
587             tags => ['category:output'],
588             },
589             );
590              
591             our %argspecsopt_inplace = (
592             inplace => {
593             summary => 'Output to the same file as input',
594             schema => 'true*',
595             description => <<'_',
596              
597             Normally, you output to a different file than input. If you try to output to the
598             same file (`-o INPUT.csv -O`) you will clobber the input file; thus the utility
599             prevents you from doing it. However, with this `--inplace` option, you can
600             output to the same file. Like perl's `-i` option, this will first output to a
601             temporary file in the same directory as the input file then rename to the final
602             file at the end. You cannot specify output file (`-o`) when using this option,
603             but you can specify backup extension with `-b` option.
604              
605             Some caveats:
606              
607             - if input file is a symbolic link, it will be replaced with a regular file;
608             - renaming (implemented using `rename()`) can fail if input filename is too long;
609             - value specified in `-b` is currently not checked for acceptable characters;
610             - things can also fail if permissions are restrictive;
611              
612             _
613             tags => ['category:output'],
614             },
615             inplace_backup_ext => {
616             summary => 'Extension to add for backup of input file',
617             schema => 'str*',
618             default => '',
619             description => <<'_',
620              
621             In inplace mode (`--inplace`), if this option is set to a non-empty string, will
622             rename the input file using this extension as a backup. The old existing backup
623             will be overwritten, if any.
624              
625             _
626             cmdline_aliases => {b=>{}},
627             tags => ['category:output'],
628             },
629             );
630              
631             our %argspecopt_output_filename = (
632             output_filename => {
633             summary => 'Output filename',
634             description => <<'_',
635              
636             Use `-` to output to stdout (the default if you don't specify this option).
637              
638             Encoding of output file is assumed to be UTF-8.
639              
640             _
641             schema => 'filename*',
642             cmdline_aliases=>{o=>{}},
643             tags => ['category:output'],
644             },
645             );
646              
647             our %argspecopt_output_filenames = (
648             output_filenames => {
649             summary => 'Output filenames',
650             description => <<'_',
651              
652             Use `-` to output to stdout (the default if you don't specify this option).
653              
654             Encoding of output file is assumed to be UTF-8.
655              
656             _
657             schema => ['array*', of=>'filename*'],
658             cmdline_aliases=>{o=>{}},
659             tags => ['category:output'],
660             },
661             );
662              
663             our %argspecopt_field = (
664             field => {
665             summary => 'Field name',
666             schema => 'str*',
667             cmdline_aliases => { f=>{} },
668             completion => \&_complete_field,
669             },
670             );
671              
672             our %argspecopt_field_1 = (
673             field => {
674             summary => 'Field name',
675             schema => 'str*',
676             pos => 1,
677             cmdline_aliases => { f=>{} },
678             completion => \&_complete_field,
679             },
680             );
681              
682             our %argspec_field_1 = (
683             field => {
684             summary => 'Field name',
685             schema => 'str*',
686             cmdline_aliases => { f=>{} },
687             req => 1,
688             pos => 1,
689             completion => \&_complete_field,
690             },
691             );
692              
693             our %argspec_fields_1plus = (
694             fields => {
695             'x.name.is_plural' => 1,
696             'x.name.singular' => 'field',
697             summary => 'Field names',
698             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
699             req => 1,
700             pos => 1,
701             slurpy => 1,
702             cmdline_aliases => {f=>{}},
703             element_completion => \&_complete_field,
704             },
705             );
706              
707             # without completion, for adding new field
708             our %argspec_field_1_nocomp = (
709             field => {
710             summary => 'Field name',
711             schema => 'str*',
712             cmdline_aliases => { f=>{} },
713             req => 1,
714             pos => 1,
715             },
716             );
717              
718             # without completion, for adding new fields
719             our %argspec_fields_1plus_nocomp = (
720             fields => {
721             'x.name.is_plural' => 1,
722             'x.name.singular' => 'field',
723             summary => 'Field names',
724             'summary.alt.plurality.singular' => 'Field name',
725             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
726             cmdline_aliases => { f=>{} },
727             req => 1,
728             pos => 1,
729             slurpy => 1,
730             },
731             );
732              
733             our %argspec_fields = (
734             fields => {
735             'x.name.is_plural' => 1,
736             'x.name.singular' => 'field',
737             summary => 'Field names',
738             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
739             req => 1,
740             cmdline_aliases => {f=>{}},
741             element_completion => \&_complete_field,
742             },
743             );
744              
745             our %argspecopt_fields = (
746             fields => {
747             'x.name.is_plural' => 1,
748             'x.name.singular' => 'field',
749             summary => 'Field names',
750             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
751             cmdline_aliases => {f=>{}},
752             element_completion => \&_complete_field,
753             },
754             );
755              
756             our %argspecsopt_field_selection = (
757             include_fields => {
758             'x.name.is_plural' => 1,
759             'x.name.singular' => 'include_field',
760             summary => 'Field names to include, takes precedence over --exclude-field-pat',
761             schema => ['array*', of=>'str*'],
762             cmdline_aliases => {
763             f => {},
764             field => {}, # backward compatibility
765             },
766             element_completion => \&_complete_field,
767             tags => ['category:field-selection'],
768             },
769             include_field_pat => {
770             summary => 'Field regex pattern to select, overidden by --exclude-field-pat',
771             schema => 're*',
772             cmdline_aliases => {
773             field_pat => {}, # backward compatibility
774             include_all_fields => { summary => 'Shortcut for --field-pat=.*, effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.*' } },
775             a => { summary => 'Shortcut for --field-pat=.*, effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.*' } },
776             },
777             tags => ['category:field-selection'],
778             },
779             exclude_fields => {
780             'x.name.is_plural' => 1,
781             'x.name.singular' => 'exclude_field',
782             summary => 'Field names to exclude, takes precedence over --fields',
783             schema => ['array*', of=>'str*'],
784             cmdline_aliases => {
785             F => {},
786             },
787             element_completion => \&_complete_field,
788             tags => ['category:field-selection'],
789             },
790             exclude_field_pat => {
791             summary => 'Field regex pattern to exclude, takes precedence over --field-pat',
792             schema => 're*',
793             cmdline_aliases => {
794             exclude_all_fields => { summary => 'Shortcut for --exclude-field-pat=.*, effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.*' } },
795             A => { summary => 'Shortcut for --exclude-field-pat=.*, effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.*' } },
796             },
797             tags => ['category:field-selection'],
798             },
799             ignore_unknown_fields => {
800             summary => 'When unknown fields are specified in --include-field (--field) or --exclude-field options, ignore them instead of throwing an error',
801             schema => 'bool*',
802             },
803             show_selected_fields => {
804             summary => 'Show selected fields and then immediately exit',
805             schema => 'true*',
806             },
807             );
808              
809             our %argspec_eval = (
810             eval => {
811             summary => 'Perl code',
812             schema => $sch_req_str_or_code,
813             cmdline_aliases => { e=>{} },
814             req => 1,
815             },
816             );
817              
818             our %argspecopt_eval = (
819             eval => {
820             summary => 'Perl code',
821             schema => $sch_req_str_or_code,
822             cmdline_aliases => { e=>{} },
823             },
824             );
825              
826             our %argspec_eval_1 = (
827             eval => {
828             summary => 'Perl code',
829             schema => $sch_req_str_or_code,
830             cmdline_aliases => { e=>{} },
831             req => 1,
832             pos => 1,
833             },
834             );
835              
836             our %argspec_eval_2 = (
837             eval => {
838             summary => 'Perl code',
839             schema => $sch_req_str_or_code,
840             cmdline_aliases => { e=>{} },
841             req => 1,
842             pos => 2,
843             },
844             );
845              
846             our %argspecopt_eval_2 = (
847             eval => {
848             summary => 'Perl code',
849             schema => $sch_req_str_or_code,
850             cmdline_aliases => { e=>{} },
851             pos => 2,
852             },
853             );
854              
855             our %argspecsopt_sortsub = (
856             by_sortsub => {
857             schema => 'str*',
858             description => <<'_',
859              
860             When sorting rows, usually combined with `--key` because most Sort::Sub routine
861             expects a string to be compared against.
862              
863             When sorting fields, the Sort::Sub routine will get the field name as argument.
864              
865             _
866             summary => 'Sort using a Sort::Sub routine',
867             'x.completion' => ['sortsub_spec'],
868             },
869             sortsub_args => {
870             summary => 'Arguments to pass to Sort::Sub routine',
871             schema => ['hash*', of=>'str*'],
872             },
873             );
874              
875             our %argspecopt_key = (
876             key => {
877             summary => 'Generate sort keys with this Perl code',
878             description => <<'_',
879              
880             If specified, then will compute sort keys using Perl code and sort using the
881             keys. Relevant when sorting using `--by-code` or `--by-sortsub`. If specified,
882             then instead of row when sorting rows, the code (or Sort::Sub routine) will
883             receive these sort keys to sort against.
884              
885             The code will receive the row (arrayref, or if -H is specified, hashref) as the
886             argument.
887              
888             _
889             schema => $sch_req_str_or_code,
890             cmdline_aliases => {k=>{}},
891             },
892             );
893              
894             our %argspecs_sort_rows = (
895             reverse => {
896             schema => ['bool', is=>1],
897             cmdline_aliases => {r=>{}},
898             },
899             ci => {
900             schema => ['bool', is=>1],
901             cmdline_aliases => {i=>{}},
902             },
903             by_fields => {
904             summary => 'Sort by a list of field specifications',
905             'summary.alt.plurality.singular' => 'Add a sort field specification',
906             'x.name.is_plural' => 1,
907             'x.name.singular' => 'by_field',
908             description => <<'_',
909              
910             Each field specification is a field name with an optional prefix. `FIELD`
911             (without prefix) means sort asciibetically ascending (smallest to largest),
912             `~FIELD` means sort asciibetically descending (largest to smallest), `+FIELD`
913             means sort numerically ascending, `-FIELD` means sort numerically descending.
914              
915             _
916             schema => ['array*', of=>'str*'],
917             element_completion => \&_complete_sort_field,
918             },
919             by_code => {
920             summary => 'Sort by using Perl code',
921             schema => $sch_req_str_or_code,
922             description => <<'_',
923              
924             `$a` and `$b` (or the first and second argument) will contain the two rows to be
925             compared. Which are arrayrefs; or if `--hash` (`-H`) is specified, hashrefs; or
926             if `--key` is specified, whatever the code in `--key` returns.
927              
928             _
929             },
930             %argspecopt_key,
931             %argspecsopt_sortsub,
932             );
933              
934             our %argspecs_sort_fields = (
935             reverse => {
936             schema => ['bool', is=>1],
937             cmdline_aliases => {r=>{}},
938             },
939             ci => {
940             schema => ['bool', is=>1],
941             cmdline_aliases => {i=>{}},
942             },
943             by_examples => {
944             summary => 'Sort by a list of field names as examples',
945             'summary.alt.plurality.singular' => 'Add a field to sort by example',
946             'x.name.is_plural' => 1,
947             'x.name.singular' => 'by_example',
948             schema => ['array*', of=>'str*'],
949             element_completion => \&_complete_field,
950             },
951             by_code => {
952             summary => 'Sort fields using Perl code',
953             schema => $sch_req_str_or_code,
954             description => <<'_',
955              
956             `$a` and `$b` (or the first and second argument) will contain `[$field_name,
957             $field_idx]`.
958              
959             _
960             },
961             %argspecsopt_sortsub,
962             );
963              
964             our %argspecopt_with_data_rows = (
965             with_data_rows => {
966             summary => 'Whether to also output data rows',
967             schema => 'bool',
968             },
969             );
970              
971             our %argspecopt_hash = (
972             hash => {
973             summary => 'Provide row in $_ as hashref instead of arrayref',
974             schema => ['bool*', is=>1],
975             cmdline_aliases => {H=>{}},
976             },
977             );
978              
979             # add a position to specified argument, if possible
980             sub _add_arg_pos {
981 40     40   87 my ($args, $argname, $is_slurpy) = @_;
982              
983             # argument already has a position, return
984 40 50       108 return if defined $args->{$argname}{pos};
985              
986             # position of slurpy argument
987 40         65 my $slurpy_pos;
988 40         180 for (keys %$args) {
989 482 100       938 next unless $args->{$_}{slurpy};
990 6         14 $slurpy_pos = $args->{$_}{pos};
991 6         8 last;
992             }
993              
994             # there is already a slurpy arg, return
995 40 50 66     130 return if $is_slurpy && defined $slurpy_pos;
996              
997             # find the lowest position that's not available
998             ARG:
999 40         120 for my $j (0 .. scalar(keys %$args)-1) {
1000 57 100 100     151 last if defined $slurpy_pos && $j >= $slurpy_pos;
1001 53         154 for (keys %$args) {
1002 586 100 100     1203 next ARG if defined $args->{$_}{pos} && $args->{$_}{pos} == $j;
1003             }
1004 36         93 $args->{$argname}{pos} = $j;
1005 36 100       69 $args->{$argname}{slurpy} = 1 if $is_slurpy;
1006 36         78 last;
1007             }
1008             }
1009              
1010             sub _randext {
1011 2     2   23 state $charset = [0..9, "A".."Z","a".."z"];
1012 2         5 my $len = shift;
1013 2         4 my $ext = "";
1014 2         5 for (1..$len) { $ext .= $charset->[rand @$charset] }
  10         20  
1015 2         6 $ext;
1016             }
1017              
1018             $SPEC{gen_csv_util} = {
1019             v => 1.1,
1020             summary => 'Generate a CSV utility',
1021             description => <<'_',
1022              
1023             This routine is used to generate a CSV utility in the form of a <pm:Rinci>
1024             function (code and metadata). You can then produce a CLI from the Rinci function
1025             simply using <pm:Perinci::CmdLine::Gen> or, if you use <pm:Dist::Zilla>,
1026             <pm:Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
1027             <prog:gen-pericmd-script>.
1028              
1029             Using this routine, by providing just one or a few hooks and setting some
1030             parameters like a couple of extra arguments, you will get a complete CLI with
1031             decent POD/manpage, ability to read one or multiple CSV's and write one or
1032             multiple CSV's, some command-line options to customize how the input CSV's
1033             should be parsed and how the output CSV's should be formatted and named. Your
1034             CLI also has tab completion, usage and help message, and other features.
1035              
1036             To create a CSV utility, you specify a `name` (e.g. `csv_dump`; must be a valid
1037             unqualified Perl identifier/function name) and optionally `summary`,
1038             `description`, and other metadata like `links` or even `add_meta_props`. Then
1039             you specify one or more of `on_*` or `before_*` or `after_*` arguments to supply
1040             handlers (coderefs) for your CSV utility at various hook points.
1041              
1042              
1043             *THE HOOKS*
1044              
1045             All code for hooks should accept a single argument `r`. `r` is a stash (hashref)
1046             of various data, the keys of which will depend on which hook point being called.
1047             You can also add more keys to store data or for flow control (see hook
1048             documentation below for more details).
1049              
1050             The order of the hooks, in processing chronological order:
1051              
1052             * on_begin
1053              
1054             Called when utility begins, before reading CSV. You can use this hook e.g. to
1055             process arguments, set output filenames (if you allow custom output
1056             filenames).
1057              
1058             * before_read_input
1059              
1060             Called before opening any input CSV file. This hook is *still* called even if
1061             your utility sets `reads_csv` to false.
1062              
1063             At this point, the `input_filenames` stash key (as well as other keys like
1064             `input_filename`, `input_filenum`, etc) has not been set. You can use this
1065             hook e.g. to set a custom `input_filenames`.
1066              
1067             * before_open_input_files
1068              
1069             Called before an input CSV file is about to be opened, including for stdin
1070             (`-`). You can use this hook e.g. to check/preprocess input file. Flow control
1071             is available by setting `$r->{wants_skip_files}` to skip reading all the input
1072             file and go directly to the `after_read_input` hook.
1073              
1074             * before_open_input_file
1075              
1076             Called before an input CSV file is about to be opened, including for stdin
1077             (`-`). For the first file, called after `before_open_input_file` hook. You can
1078             use this hook e.g. to check/preprocess input file. Flow control is available
1079             by setting `$r->{wants_skip_file}` to skip reading a single input file and go
1080             to the next file, or `$r->{wants_skip_files}` to skip reading the rest of the
1081             files and go directly to the `after_read_input` hook.
1082              
1083             * on_input_header_row
1084              
1085             Called when receiving header row. Will be called for every input file, and
1086             called even when user specify `--no-input-header`, in which case the header
1087             row will be the generated `["field1", "field2", ...]`. You can use this hook
1088             e.g. to add/remove/rearrange fields.
1089              
1090             You can set `$r->{wants_fill_rows}` to a defined false if you do not want
1091             `$r->{input_rows}` to be filled with empty string elements when it contains
1092             less than the number of fields (in case of sparse values at the end). Normally
1093             you only want to do this when you want to do checking, e.g. in
1094             <prog:csv-check-rows>.
1095              
1096             * on_input_data_row
1097              
1098             Called when receiving each data row. You can use this hook e.g. to modify the
1099             row or print output (for line-by-line transformation or filtering).
1100              
1101             * after_close_input_file
1102              
1103             Called after each input file is closed, including for stdin (`-`) (although
1104             for stdin, the handle is not actually closed). Flow control is possible by
1105             setting `$r->{wants_skip_files}` to skip reading the rest of the files and go
1106             straight to the `after_close_input_files` hook.
1107              
1108             * after_close_input_files
1109              
1110             Called after the last input file is closed, after the last
1111             `after_close_input_file` hook, including for stdin (`-`) (although for stdin,
1112             the handle is not actually closed).
1113              
1114             * after_read_input
1115              
1116             Called after the last row of the last CSV file is read and the last file is
1117             closed. This hook is *still* called, if you set `reads_csv` option to false.
1118             At this point the stash keys related to CSV reading have all been cleared,
1119             including `input_filenames`, `input_filename`, `input_fh`, etc.
1120              
1121             You can use this hook e.g. to print output if you buffer the output.
1122              
1123             * on_end
1124              
1125             Called when utility is about to exit. You can use this hook e.g. to return the
1126             final result.
1127              
1128              
1129             *THE STASH*
1130              
1131             The common keys that `r` will contain:
1132              
1133             - `gen_args`, hash. The arguments used to generate the CSV utility.
1134              
1135             - `util_args`, hash. The arguments that your CSV utility accepts. Parsed from
1136             command-line arguments (or configuration files, or environment variables).
1137              
1138             - `name`, str. The name of the CSV utility. Which can also be retrieved via
1139             `gen_args`.
1140              
1141             - `code_print`, coderef. Routine provided for you to print something. Accepts a
1142             string. Takes care of opening the output files for you.
1143              
1144             - `code_print_row`, coderef. Routine provided for you to print a data row. You
1145             pass the row (either arrayref or hashref). Takes care of opening the output
1146             files for you, as well as printing header row the first time, if needed.
1147              
1148             - `code_print_header_row`, coderef. Routine provided for you to print header
1149             row. You don't need to pass any arguments. Will only print the header row once
1150             per output file if output header is enabled, even if called multiple times.
1151              
1152             If you are accepting CSV data (`reads_csv` gen argument set to true), the
1153             following keys will also be available (in `on_input_header_row` and
1154             `on_input_data_row` hooks):
1155              
1156             - `input_parser`, a <pm:Text::CSV_XS> instance for input parsing.
1157              
1158             - `input_filenames`, array of str.
1159              
1160             - `input_filename`, str. The name of the current input file being read (`-` if
1161             reading from stdin).
1162              
1163             - `input_filenum`, uint. The number of the current input file, 1 being the first
1164             file, 2 for the second, and so on.
1165              
1166             - `input_fh`, the handle to the current file being read.
1167              
1168             - `input_rownum`, uint. The number of rows that have been read (reset after each
1169             input file). In `on_input_header_row` phase, this will be 1 since header row
1170             (including the generated one) is the first row. Then in `on_input_data_row`
1171             phase (called the first time for a file), it will be 2 for the first data row,
1172             even if physically it is the first row for CSV file that does not have a
1173             header.
1174              
1175             - `input_data_rownum`, uint. The number of data rows that have been read (reset
1176             after each input file). This will be equal to `input_rownum` less 1 if input
1177             file has header.
1178              
1179             - `input_row`, aos (array of str). The current input CSV row as an arrayref.
1180              
1181             - `input_row_as_hashref`, hos (hash of str). The current input CSV row as a
1182             hashref, with field names as hash keys and field values as hash values. This
1183             will only be calculated if utility wants it. Utility can express so by setting
1184             `$r->{wants_input_row_as_hashref}` to true, e.g. in the `on_begin` hook.
1185              
1186             - `input_header_row_count`, uint. Contains the number of actual header rows that
1187             have been read. If CLI user specifies `--no-input-header`, this will stay at
1188             zero. Will be reset for each CSV file.
1189              
1190             - `input_data_row_count`, int. Contains the number of actual data rows that have
1191             read. Will be reset for each CSV file.
1192              
1193             If you are outputting CSV (`writes_csv` gen argument set to true), the following
1194             keys will be available:
1195              
1196             - `output_emitter`, a <pm:Text::CSV_XS> instance for output.
1197              
1198             - `output_filenames`, array of str.
1199              
1200             - `output_filename`, str, name of current output file.
1201              
1202             - `output_filenum`, uint, the number of the current output file, 1 being the
1203             first file, 2 for the second, and so on.
1204              
1205             - `output_fh`, handle to the current output file.
1206              
1207             - `output_rownum`, uint. The number of rows that have been outputted (reset
1208             after each output file).
1209              
1210             - `output_data_rownum`, uint. The number of data rows that have been outputted
1211             (reset after each output file). This will be equal to `input_rownum` less 1 if
1212             input file has header.
1213              
1214             For other hook-specific keys, see the documentation for associated hook point.
1215              
1216              
1217             *ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS*
1218              
1219             As mentioned above, you will get additional command-line options/arguments in
1220             `$r->{util_args}` hashref. Some options/arguments are already added by
1221             `gen_csv_util`, e.g. `input_filename` or `input_filenames` along with
1222             `input_sep_char`, etc (when your utility declares `reads_csv`),
1223             `output_filename` or `output_filenames` along with `overwrite`,
1224             `output_sep_char`, etc (when your utility declares `writes_csv`).
1225              
1226             If you want to accept additional arguments/options, you specify them in
1227             `add_args` (hashref, with key being Each option/argument has to be specified
1228             first via `add_args` (as hashref, with key being argument name and value the
1229             argument specification as defined in <pm:Rinci::function>)). Some argument
1230             specifications have been defined in <pm:App::CSVUtils> and can be used. See
1231             existing utilities for examples.
1232              
1233              
1234             *READING CSV DATA*
1235              
1236             To read CSV data, normally your utility would provide handler for the
1237             `on_input_data_row` hook and sometimes additionally `on_input_header_row`.
1238              
1239              
1240             *OUTPUTTING STRING OR RETURNING RESULT*
1241              
1242             To output string, usually you call the provided routine `$r->{code_print}`. This
1243             routine will open the output files for you.
1244              
1245             You can also return enveloped result directly by setting `$r->{result}`.
1246              
1247              
1248             *OUTPUTTING CSV DATA*
1249              
1250             To output CSV data, usually you call the provided routine `$r->{code_print_row}`.
1251             This routine accepts a row (arrayref or hashref). This routine will open the
1252             output files for you when needed, as well as print header row automatically.
1253              
1254             You can also buffer rows from input to e.g. `$r->{output_rows}`, then call
1255             `$r->{code_print_row}` repeatedly in the `after_read_input` hook to print all the
1256             rows.
1257              
1258              
1259             *READING MULTIPLE CSV FILES*
1260              
1261             To read multiple CSV files, you first specify `reads_multiple_csv`. Then, you
1262             can supply handler for `on_input_header_row` and `on_input_data_row` as usual.
1263             If you want to do something before/after each input file, you can also supply
1264             handler for `before_open_input_file` or `after_close_input_file`.
1265              
1266              
1267             *WRITING TO MULTIPLE CSV FILES*
1268              
1269             Similarly, to write to many CSv files, you first specify `writes_multiple_csv`.
1270             Then, you can supply handler for `on_input_header_row` and `on_input_data_row`
1271             as usual. To switch to the next file, set
1272             `$r->{wants_switch_to_next_output_file}` to true, in which case the next call to
1273             `$r->{code_print_row}` will close the current file and open the next file.
1274              
1275              
1276             *CHANGING THE OUTPUT FIELDS*
1277              
1278             When calling `$r->{code_print_row}`, you can output whatever fields you want. By
1279             convention, you can set `$r->{output_fields}` and `$r->{output_fields_idx}` to
1280             let other handlers know about the output fields. For example, see the
1281             implementation of <prog:csv-concat>.
1282              
1283             _
1284             args => {
1285             name => {
1286             schema => 'perl::identifier::unqualified_ascii*',
1287             req => 1,
1288             tags => ['category:metadata'],
1289             },
1290             summary => {
1291             schema => 'str*',
1292             tags => ['category:metadata'],
1293             },
1294             description => {
1295             schema => 'str*',
1296             tags => ['category:metadata'],
1297             },
1298             links => {
1299             schema => ['array*', of=>'hash*'], # XXX defhashes
1300             tags => ['category:metadata'],
1301             },
1302             examples => {
1303             schema => ['array*'], # defhashes
1304             tags => ['category:metadata'],
1305             },
1306             add_meta_props => {
1307             summary => 'Add additional Rinci function metadata properties',
1308             schema => ['hash*'],
1309             tags => ['category:metadata'],
1310             },
1311             add_args => {
1312             schema => ['hash*'],
1313             tags => ['category:metadata'],
1314             },
1315             add_args_rels => {
1316             schema => ['hash*'],
1317             tags => ['category:metadata'],
1318             },
1319              
1320             reads_csv => {
1321             summary => 'Whether utility reads CSV data',
1322             'summary.alt.bool.not' => 'Specify that utility does not read CSV data',
1323             schema => 'bool*',
1324             default => 1,
1325             },
1326             reads_multiple_csv => {
1327             summary => 'Whether utility accepts CSV data',
1328             schema => 'bool*',
1329             description => <<'_',
1330              
1331             Setting this option to true will implicitly set the `reads_csv` option to true,
1332             obviously.
1333              
1334             _
1335             },
1336             writes_csv => {
1337             summary => 'Whether utility writes CSV data',
1338             'summary.alt.bool.not' => 'Specify that utility does not write CSV data',
1339             schema => 'bool*',
1340             default => 1,
1341             },
1342             writes_multiple_csv => {
1343             summary => 'Whether utility outputs CSV data',
1344             schema => 'bool*',
1345             description => <<'_',
1346              
1347             Setting this option to true will implicitly set the `writes_csv` option to true,
1348             obviously.
1349              
1350             _
1351             },
1352              
1353             on_begin => {
1354             schema => 'code*',
1355             },
1356             before_read_input => {
1357             schema => 'code*',
1358             },
1359             before_open_input_files => {
1360             schema => 'code*',
1361             },
1362             before_open_input_file => {
1363             schema => 'code*',
1364             },
1365             on_input_header_row => {
1366             schema => 'code*',
1367             },
1368             on_input_data_row => {
1369             schema => 'code*',
1370             },
1371             after_close_input_file => {
1372             schema => 'code*',
1373             },
1374             after_close_input_files => {
1375             schema => 'code*',
1376             },
1377             after_read_input => {
1378             schema => 'code*',
1379             },
1380             on_end => {
1381             schema => 'code*',
1382             },
1383             },
1384             result_naked => 1,
1385             result => {
1386             schema => 'bool*',
1387             },
1388             };
1389             sub gen_csv_util {
1390 24     24 1 167 my %gen_args = @_;
1391              
1392 24 50       104 my $name = delete($gen_args{name}) or die "Please specify name";
1393 24   50     76 my $summary = delete($gen_args{summary}) // '(No summary)';
1394 24   100     67 my $description = delete($gen_args{description}) // '(No description)';
1395 24   100     110 my $links = delete($gen_args{links}) // [];
1396 24   100     89 my $examples = delete($gen_args{examples}) // [];
1397 24         45 my $add_meta_props = delete $gen_args{add_meta_props};
1398 24         38 my $add_args = delete $gen_args{add_args};
1399 24         36 my $add_args_rels = delete $gen_args{add_args_rels};
1400 24         33 my $reads_multiple_csv = delete($gen_args{reads_multiple_csv});
1401 24   50     76 my $reads_csv = delete($gen_args{reads_csv}) // 1;
1402 24   50     36 my $tags = [ @{ delete($gen_args{tags}) // [] } ];
  24         73  
1403 24 100       66 $reads_csv = 1 if $reads_multiple_csv;
1404 24         32 my $writes_multiple_csv = delete($gen_args{writes_multiple_csv});
1405 24   100     71 my $writes_csv = delete($gen_args{writes_csv}) // 1;
1406 24 50       51 $writes_csv = 1 if $writes_multiple_csv;
1407 24         34 my $on_begin = delete $gen_args{on_begin};
1408 24         40 my $before_read_input = delete $gen_args{before_read_input};
1409 24         37 my $before_open_input_files = delete $gen_args{before_open_input_files};
1410 24         34 my $before_open_input_file = delete $gen_args{before_open_input_file};
1411 24         34 my $on_input_header_row = delete $gen_args{on_input_header_row};
1412 24         36 my $on_input_data_row = delete $gen_args{on_input_data_row};
1413 24         32 my $after_close_input_file = delete $gen_args{after_close_input_file};
1414 24         40 my $after_close_input_files = delete $gen_args{after_close_input_files};
1415 24         38 my $after_read_input = delete $gen_args{after_read_input};
1416 24         30 my $on_end = delete $gen_args{on_end};
1417              
1418 24 50       82 scalar(keys %gen_args) and die "Unknown argument(s): ".join(", ", keys %gen_args);
1419              
1420 24         37 my $code;
1421             CREATE_CODE: {
1422 24         36 $code = sub {
1423 110     110   278604 my %util_args = @_;
1424              
1425 110   100     499 my $has_header = $util_args{input_header} // 1;
1426 110   66     319 my $outputs_header = $util_args{output_header} // $has_header;
1427              
1428 110         349 my $r = {
1429             gen_args => \%gen_args,
1430             util_args => \%util_args,
1431             name => $name,
1432             };
1433              
1434             # inside the main eval block, we call hook handlers. A handler can
1435             # throw an exception (which can be a string or an enveloped response
1436             # like [500, "some error message"], see Rinci::function). we trap
1437             # the exception so we can return the appropriate enveloped response.
1438             MAIN_EVAL:
1439 110         211 eval {
1440              
1441             # do some checking
1442 110 50 33     291 if ($util_args{inplace} && (!$reads_csv || !$writes_csv)) {
      66        
1443 0         0 die [412, "--inplace cannot be specified when we do not read & write CSV"];
1444             }
1445              
1446 110 100       244 if ($on_begin) {
1447 37         125 log_trace "[csvutil] Calling on_begin hook handler ...";
1448 37         147 $on_begin->($r);
1449             }
1450              
1451             my $code_open_file = sub {
1452             # set output filenames, if not yet
1453 184 100   184   362 unless ($r->{output_filenames}) {
1454 70         99 my @output_filenames;
1455 70 100       170 if ($util_args{inplace}) {
    50          
1456 2         2 for my $input_filename (@{ $r->{input_filenames} }) {
  2         6  
1457 2         4 my $output_filename;
1458 2         3 while (1) {
1459 2         9 $output_filename = $input_filename . "." . _randext(5);
1460 2 50       49 last unless -e $output_filename;
1461             }
1462 2         9 push @output_filenames, $output_filename;
1463             }
1464             } elsif ($writes_multiple_csv) {
1465 0   0     0 @output_filenames = @{ $util_args{output_filenames} // ['-'] };
  0         0  
1466             } else {
1467 68   50     347 @output_filenames = ($util_args{output_filename} // '-');
1468             }
1469              
1470             CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME: {
1471 70         108 my %seen_output_abs_path; # key = output filename
  70         94  
1472 70 100 66     223 last unless $reads_csv && $writes_csv;
1473 67         91 for my $input_filename (@{ $r->{input_filenames} }) {
  67         129  
1474 78 50       161 next if $input_filename eq '-';
1475 78         2193 my $input_abs_path = Cwd::abs_path($input_filename);
1476 78 50       229 die [500, "Can't get absolute path of input filename '$input_filename'"] unless $input_abs_path;
1477 78         142 for my $output_filename (@output_filenames) {
1478 78 100       259 next if $output_filename eq '-';
1479 2 50       6 next if $seen_output_abs_path{$output_filename};
1480 2         39 my $output_abs_path = Cwd::abs_path($output_filename);
1481 2 50       8 die [500, "Can't get absolute path of output filename '$output_filename'"] unless $output_abs_path;
1482 2 0       8 die [412, "Cannot set output filename to '$output_filename' ".
    50          
1483             ($output_filename ne $output_abs_path ? "($output_abs_path) ":"").
1484             "because it is the same as input filename and input will be clobbered; use --inplace to avoid clobbering<"]
1485             if $output_abs_path eq $input_abs_path;
1486             }
1487             }
1488             } # CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME
1489              
1490 70         200 $r->{output_filenames} = \@output_filenames;
1491 70   50     337 $r->{output_num_of_files} //= scalar(@output_filenames);
1492             } # set output filenames
1493              
1494             # open the next file, if not yet
1495 184 100 66     678 if (!$r->{output_fh} || $r->{wants_switch_to_next_output_file}) {
1496 70   50     265 $r->{output_filenum} //= 0;
1497 70         112 $r->{output_filenum}++;
1498              
1499 70         194 $r->{output_rownum} = 0;
1500 70         116 $r->{output_data_rownum} = 0;
1501              
1502             # close the previous file, if any
1503 70 50 33     154 if ($r->{output_fh} && $r->{output_filename} ne '-') {
1504 0         0 log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1505 0 0       0 close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1506 0         0 delete $r->{has_printed_header};
1507 0         0 delete $r->{wants_switch_to_next_output_file};
1508             }
1509              
1510             # we have exhausted all the files, do nothing & return
1511 70 50       102 return if $r->{output_filenum} > @{ $r->{output_filenames} };
  70         167  
1512              
1513 70         164 $r->{output_filename} = $r->{output_filenames}[ $r->{output_filenum}-1 ];
1514             log_info "[csvutil] [%d/%s] Opening output file %s ...",
1515 70         249 $r->{output_filenum}, $r->{output_num_of_files}, $r->{output_filename};
1516 70 100       257 if ($r->{output_filename} eq '-') {
1517 68         147 $r->{output_fh} = \*STDOUT;
1518             } else {
1519 2 50       19 if (-f $r->{output_filename}) {
1520 0 0       0 if ($r->{util_args}{overwrite}) {
1521 0         0 log_info "[csvutil] Will be overwriting output file %s", $r->{output_filename};
1522             } else {
1523 0         0 die [412, "Refusing to overwrite existing output file '$r->{output_filename}', choose another name or use --overwrite (-O)"];
1524             }
1525             }
1526 2         10 my ($fh, $err) = _open_file_write($r->{output_filename});
1527 2 50       5 die $err if $err;
1528 2         6 $r->{output_fh} = $fh;
1529             }
1530             } # open the next file
1531 110         688 }; # code_open_file
1532              
1533             my $code_print = sub {
1534 9     9   14 my $str = shift;
1535 9         21 $code_open_file->();
1536 9         15 print { $r->{output_fh} } $str;
  9         240  
1537 110         370 }; # code_print
1538 110         226 $r->{code_print} = $code_print;
1539              
1540 110 100       248 if ($writes_csv) {
1541 85         193 my $output_emitter = _instantiate_emitter(\%util_args);
1542 85         10084 $r->{output_emitter} = $output_emitter;
1543 85         160 $r->{has_printed_header} = 0;
1544              
1545             my $code_print_header_row = sub {
1546             # set output fields, if not yet
1547 175 100   175   429 unless ($r->{output_fields}) {
1548             # by default, use the
1549 24         50 $r->{output_fields} = $r->{input_fields};
1550             }
1551              
1552             # index the output fields, if not yet
1553 175 100       368 unless ($r->{output_fields_idx}) {
1554 66         175 $r->{output_fields_idx} = {};
1555 66         148 for my $j (0 .. $#{ $r->{output_fields} }) {
  66         181  
1556 179         374 $r->{output_fields_idx}{ $r->{output_fields}[$j] } = $j;
1557             }
1558             }
1559              
1560 175         437 $code_open_file->();
1561              
1562             # print header line, if not yet
1563 175 100 100     606 if ($outputs_header && !$r->{has_printed_header}) {
1564 65         97 $r->{has_printed_header}++;
1565 65         1116 $r->{output_emitter}->print($r->{output_fh}, $r->{output_fields});
1566 65         3310 print { $r->{output_fh} } "\n";
  65         830  
1567 65         233 $r->{output_rownum}++;
1568             }
1569 85         444 };
1570 85         158 $r->{code_print_header_row} = $code_print_header_row;
1571              
1572             my $code_print_row = sub {
1573 175     175   274 my $row = shift;
1574              
1575 175         394 $code_print_header_row->();
1576              
1577             # print data line
1578 175 50       369 if ($row) {
1579 175 50       440 if (ref $row eq 'HASH') {
1580 0         0 my $row0 = $row;
1581 0         0 $row = [];
1582 0         0 for my $j (0 .. $#{ $r->{output_fields} }) {
  0         0  
1583 0   0     0 $row->[$j] = $row0->{ $r->{output_fields}[$j] } // '';
1584             }
1585             }
1586 175         1182 $r->{output_emitter}->print( $r->{output_fh}, $row );
1587 175         3726 print { $r->{output_fh} } "\n";
  175         1696  
1588 175         481 $r->{output_rownum}++;
1589 175         943 $r->{output_data_rownum}++;
1590             }
1591 85         274 }; # code_print_row
1592 85         216 $r->{code_print_row} = $code_print_row;
1593             } # if outputs csv
1594              
1595 110 50       227 if ($before_read_input) {
1596 0         0 log_trace "[csvutil] Calling before_read_input handler ...";
1597 0         0 $before_read_input->($r);
1598             }
1599              
1600             READ_CSV: {
1601 110 50       150 last unless $reads_csv;
  110         239  
1602              
1603 110         244 my $input_parser = _instantiate_parser(\%util_args, 'input_');
1604 110         11764 $r->{input_parser} = $input_parser;
1605              
1606 110         188 my @input_filenames;
1607 110 100       251 if ($reads_multiple_csv) {
1608 18   50     32 @input_filenames = @{ $util_args{input_filenames} // ['-'] };
  18         63  
1609             } else {
1610 92   50     363 @input_filenames = ($util_args{input_filename} // '-');
1611             }
1612 110   50     497 $r->{input_filenames} //= \@input_filenames;
1613              
1614             BEFORE_INPUT_FILENAME:
1615 110         197 $r->{input_filenum} = 0;
1616              
1617             INPUT_FILENAME:
1618 110         200 for my $input_filename (@input_filenames) {
1619 129         195 $r->{input_filenum}++;
1620 129         253 $r->{input_filename} = $input_filename;
1621              
1622 129 100 100     439 if ($r->{input_filenum} == 1 && $before_open_input_files) {
1623 1         5 log_trace "[csvutil] Calling before_open_input_files handler ...";
1624 1         5 $before_open_input_files->($r);
1625 1 50       4 if (delete $r->{wants_skip_files}) {
1626 0         0 log_trace "[csvutil] Handler wants to skip files, skipping all input files";
1627 0         0 last READ_CSV;
1628             }
1629             }
1630              
1631 129 50       244 if ($before_open_input_file) {
1632 0         0 log_trace "[csvutil] Calling before_open_input_file handler ...";
1633 0         0 $before_open_input_file->($r);
1634 0 0       0 if (delete $r->{wants_skip_file}) {
    0          
1635 0         0 log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1636 0         0 next INPUT_FILENAME;
1637             } elsif (delete $r->{wants_skip_files}) {
1638 0         0 log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1639 0         0 last READ_CSV;
1640             }
1641             }
1642              
1643             log_info "[csvutil] [file %d/%d] Reading input file %s ...",
1644 129         442 $r->{input_filenum}, scalar(@input_filenames), $input_filename;
1645 129         475 my ($fh, $err) = _open_file_read($input_filename);
1646 129 50       359 die $err if $err;
1647 129         474 $r->{input_fh} = $r->{input_fhs}[ $r->{input_filenum}-1 ] = $fh;
1648              
1649 129         216 my $i;
1650 129         196 $r->{input_header_row_count} = 0;
1651 129         366 $r->{input_data_row_count} = 0;
1652 129         266 $r->{input_fields} = []; # array, field names in order
1653 129         243 $r->{input_field_idxs} = {}; # key=field name, value=index (0-based)
1654 129         172 my $row0;
1655             my $code_getline = sub {
1656 562 50 33 562   2739 if ($r->{stdin_input_fields} && $r->{input_filename} eq '-') {
    100 100        
    100 100        
1657 0 0       0 if ($i == 0) {
1658             # we have read the header for stdin. since
1659             # we can't seek to the beginning, we return
1660             # the saved fields
1661 0         0 $r->{input_header_row_count}++;
1662 0         0 return $r->{stdin_input_fields};
1663             } else {
1664 0         0 my $row = $input_parser->getline($r->{input_fh});
1665 0 0       0 $r->{input_data_row_count}++ if $row;
1666 0         0 return $row;
1667             }
1668             } elsif ($i == 0 && !$has_header) {
1669             # this is the first line of a file and user
1670             # specifies there is no input header. we save
1671             # the line and return the generated field names
1672             # instead.
1673 4         145 $row0 = $input_parser->getline($r->{input_fh});
1674 4 50       224 return unless $row0;
1675 4         12 return [map { "field$_" } 1..@$row0];
  12         51  
1676             } elsif ($i == 1 && !$has_header) {
1677             # we return the saved first line
1678 4 50       10 $r->{input_data_row_count}++ if $row0;
1679 4         11 return $row0;
1680             }
1681 554         14456 my $res = $input_parser->getline($r->{input_fh});
1682 554 100       20919 if ($res) {
1683 447 100       939 $r->{input_header_row_count}++ if $i==0;
1684 447 100       938 $r->{input_data_row_count}++ if $i;
1685             }
1686 554         1589 $res;
1687 129         686 };
1688 129         332 $r->{code_getline} = $code_getline;
1689              
1690 129         191 $i = 0;
1691 129         278 while ($r->{input_row} = $code_getline->()) {
1692 455         628 $i++;
1693 455         647 $r->{input_rownum} = $i;
1694 455 100       930 $r->{input_data_rownum} = $has_header ? $i-1 : $i;
1695 455 100       748 if ($i == 1) {
1696             # gather the list of fields
1697 129         216 $r->{input_fields} = $r->{input_row};
1698 129 50 0     269 $r->{stdin_input_fields} //= $r->{input_row} if $input_filename eq '-';
1699 129         225 $r->{orig_input_fields} = $r->{input_fields};
1700 129         260 $r->{input_fields_idx} = {};
1701 129         197 for my $j (0 .. $#{ $r->{input_fields} }) {
  129         419  
1702 361         800 $r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1703             }
1704              
1705 129 100       269 if ($on_input_header_row) {
1706 115         362 log_trace "[csvutil] Calling on_input_header_row hook handler ...";
1707 115         557 $on_input_header_row->($r);
1708              
1709 106 100       322 if (delete $r->{wants_skip_file}) {
    100          
1710 3         10 log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1711 3         12 next INPUT_FILENAME;
1712             } elsif (delete $r->{wants_skip_files}) {
1713 1         4 log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1714 1         5 last READ_CSV;
1715             }
1716             }
1717              
1718             # reindex the fields, should the above hook
1719             # handler adds/removes fields. let's save the
1720             # old fields_idx to orig_fields_idx.
1721 116         221 $r->{orig_input_fields_idx} = $r->{input_fields_idx};
1722 116         176 $r->{input_fields_idx} = {};
1723 116         207 for my $j (0 .. $#{ $r->{input_fields} }) {
  116         293  
1724 326         746 $r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1725             }
1726              
1727             } else {
1728             # fill up the elements of row to the number of
1729             # fields, in case the row contains sparse values
1730 326 50 33     761 unless (defined $r->{wants_fill_rows} && !$r->{wants_fill_rows}) {
1731 326 100       433 if (@{ $r->{input_row} } < @{ $r->{input_fields} }) {
  326         463  
  326         653  
1732 3         6 splice @{ $r->{input_row} }, scalar(@{ $r->{input_row} }), 0, (("") x (@{ $r->{input_fields} } - @{ $r->{input_row} }));
  3         6  
  3         6  
  3         5  
  3         10  
1733             }
1734             }
1735              
1736             # generate the hashref version of row if utility
1737             # requires it
1738 326 100       622 if ($r->{wants_input_row_as_hashref}) {
1739 34         78 $r->{input_row_as_hashref} = {};
1740 34         49 for my $j (0 .. $#{ $r->{input_row} }) {
  34         124  
1741             # ignore extraneous data fields
1742 93 50       116 last if $j >= @{ $r->{input_fields} };
  93         167  
1743 93         225 $r->{input_row_as_hashref}{ $r->{input_fields}[$j] } = $r->{input_row}[$j];
1744             }
1745             }
1746              
1747 326 50       534 if ($on_input_data_row) {
1748 326 100       797 log_trace "[csvutil] Calling on_input_data_row hook handler (for first data row) ..." if $r->{input_rownum} <= 2;
1749 326         1026 $on_input_data_row->($r);
1750              
1751 321 100       1073 if (delete $r->{wants_skip_file}) {
    100          
1752 2         12 log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1753 2         13 next INPUT_FILENAME;
1754             } elsif (delete $r->{wants_skip_files}) {
1755 2         8 log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1756 2         10 last READ_CSV;
1757             }
1758             }
1759             }
1760              
1761             } # while getline
1762              
1763             # XXX actually close filehandle except stdin
1764              
1765 107 50       312 if ($after_close_input_file) {
1766 0         0 log_trace "[csvutil] Calling after_close_input_file handler ...";
1767 0         0 $after_close_input_file->($r);
1768 0 0       0 if (delete $r->{wants_skip_files}) {
1769 0         0 log_trace "[csvutil] Handler wants to skip reading all file, skipping";
1770 0         0 last READ_CSV;
1771             }
1772             }
1773             } # for input_filename
1774              
1775 93 100       202 if ($after_close_input_files) {
1776 32         106 log_trace "[csvutil] Calling after_close_input_files handler ...";
1777 32         120 $after_close_input_files->($r);
1778             }
1779              
1780             } # READ_CSV
1781              
1782             # cleanup stash from csv-reading-related keys
1783 96         408 delete $r->{input_filenames};
1784 96         153 delete $r->{input_filenum};
1785 96         165 delete $r->{input_filename};
1786 96         150 delete $r->{input_fh};
1787 96         148 delete $r->{input_rownum};
1788 96         133 delete $r->{input_data_rownum};
1789 96         156 delete $r->{input_row};
1790 96         129 delete $r->{input_row_as_hashref};
1791 96         129 delete $r->{input_fields};
1792 96         195 delete $r->{input_fields_idx};
1793 96         188 delete $r->{orig_input_fields_idx};
1794 96         748 delete $r->{code_getline};
1795 96         157 delete $r->{wants_input_row_as_hashref};
1796              
1797 96 100       196 if ($after_read_input) {
1798 6         21 log_trace "[csvutil] Calling after_read_input handler ...";
1799 6         24 $after_read_input->($r);
1800             }
1801              
1802             # cleanup stash from csv-outputting-related keys
1803 96         138 delete $r->{output_num_of_files};
1804 96         166 delete $r->{output_filenum};
1805 96 100       190 if ($r->{output_fh}) {
1806 70 100       200 if ($r->{output_filename} ne '-') {
1807 2         13 log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1808 2 50       124 close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1809             }
1810 70         122 delete $r->{output_fh};
1811             }
1812 96 100       205 if ($r->{util_args}{inplace}) {
1813 2   50     10 my $output_filenum = $r->{output_filenum} // 0;
1814 2         7 my $i = -1;
1815 2         4 for my $output_filename (@{ $r->{output_filenames} }) {
  2         5  
1816 2         5 $i++;
1817 2 50       6 last if $i > $output_filenum;
1818 2 50       18 (my $input_filename = $output_filename) =~ s/\.\w{5}\z//
1819             or die [500, "BUG: Can't get original input file '$output_filename'"];
1820 2 100       11 if (length(my $ext = $r->{util_args}{inplace_backup_ext})) {
1821 1         7 my $backup_filename = $input_filename . $ext;
1822 1         9 log_info "[csvutil] Backing up input file '$output_filename' -> '$backup_filename' ...";
1823 1 50       49 rename $input_filename, $backup_filename or die [500, "Can't rename '$input_filename' -> '$backup_filename': $!"];
1824             }
1825 2         14 log_info "[csvutil] Renaming from temporary output file '$output_filename' -> '$input_filename' ...";
1826 2 50       198 rename $output_filename, $input_filename or die [500, "Can't rename back '$output_filename' -> '$input_filename': $!"];
1827             }
1828             }
1829 96         171 delete $r->{output_filenames};
1830 96         139 delete $r->{output_filename};
1831 96         132 delete $r->{output_rownum};
1832 96         119 delete $r->{output_data_rownum};
1833 96         153 delete $r->{code_print};
1834 96         431 delete $r->{code_print_row};
1835 96         370 delete $r->{code_print_header_row};
1836 96         138 delete $r->{has_printed_header};
1837 96         125 delete $r->{wants_switch_to_next_output_file};
1838              
1839 96 100       1446 if ($on_end) {
1840 11         36 log_trace "[csvutil] Calling on_end hook handler ...";
1841 11         44 $on_end->($r);
1842             }
1843              
1844             }; # MAIN_EVAL
1845              
1846 110         237 my $err = $@;
1847 110 100       205 if ($err) {
1848 14 50       38 $err = [500, $err] unless ref $err;
1849 14         49 return $err;
1850             }
1851              
1852             RETURN_RESULT:
1853 96 100       283 if (!$r->{result}) {
    50          
    50          
1854 82         279 $r->{result} = [200];
1855             } elsif (!ref($r->{result})) {
1856 0         0 $r->{result} = [500, "BUG: Result (r->{result}) is set to a non-reference ($r->{result}), probably by one of the handlers"];
1857             } elsif (ref($r->{result}) ne 'ARRAY') {
1858 0         0 $r->{result} = [500, "BUG: Result (r->{result}) is not set to an enveloped result (arrayref) ($r->{result}), probably by one of the handlers"];
1859             }
1860 96         3008 $r->{result};
1861 24         257 };
1862             } # CREATE_CODE
1863              
1864 24         40 my $meta;
1865             CREATE_META: {
1866              
1867 24         129 $meta = {
1868             v => 1.1,
1869             summary => $summary,
1870             description => $description,
1871             args => {},
1872             args_rels => {},
1873             links => $links,
1874             examples => $examples,
1875             tags => $tags,
1876             };
1877              
1878             CREATE_ARGS_PROP: {
1879 24 50       42 if ($add_args) {
  24         54  
1880 24         131 $meta->{args}{$_} = $add_args->{$_} for keys %$add_args;
1881             }
1882              
1883 24 50       56 if ($reads_csv) {
1884 24         76 $meta->{args}{$_} = {%{$argspecs_csv_input{$_}}} for keys %argspecs_csv_input;
  120         484  
1885              
1886 24 100       60 if ($reads_multiple_csv) {
1887 3         5 $meta->{args}{input_filenames} = {%{$argspecopt_input_filenames{input_filenames}}};
  3         28  
1888 3         13 _add_arg_pos($meta->{args}, 'input_filenames', 'slurpy');
1889 3         10 push @$tags, 'reads-multiple-csv';
1890             } else {
1891 21         30 $meta->{args}{input_filename} = {%{$argspecopt_input_filename{input_filename}}};
  21         93  
1892 21         63 _add_arg_pos($meta->{args}, 'input_filename');
1893             }
1894              
1895 24         52 push @$tags, 'reads-csv';
1896             } # if reads_csv
1897              
1898 24 100       55 if ($writes_csv) {
1899 16         48 $meta->{args}{$_} = {%{$argspecs_csv_output{$_}}} for keys %argspecs_csv_output;
  112         426  
1900              
1901 16 50       38 if ($reads_csv) {
1902 16         47 $meta->{args}{$_} = {%{$argspecsopt_inplace{$_}}} for keys %argspecsopt_inplace;
  32         176  
1903 16   50     80 $meta->{args_rels}{'dep_all&'} //= [];
1904 16         25 push @{ $meta->{args_rels}{'dep_all&'} }, ['inplace_backup_ext', ['inplace']];
  16         53  
1905 16   50     78 $meta->{args_rels}{'choose_one&'} //= [];
1906 16         21 push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filename'];
  16         39  
1907 16         22 push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filenames'];
  16         34  
1908             }
1909              
1910 16 50       33 if ($writes_multiple_csv) {
1911 0         0 $meta->{args}{output_filenames} = {%{$argspecopt_output_filenames{output_filenames}}};
  0         0  
1912 0         0 _add_arg_pos($meta->{args}, 'output_filenames', 'slurpy');
1913 0 0       0 if ($reads_csv) {
1914 0   0     0 $meta->{args_rels}{'choose_one&'} //= [];
1915 0         0 push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filenames inplace/];
  0         0  
1916             }
1917 0         0 push @$tags, 'writes-multiple-csv';
1918             } else {
1919 16         24 $meta->{args}{output_filename} = {%{$argspecopt_output_filename{output_filename}}};
  16         67  
1920 16         48 _add_arg_pos($meta->{args}, 'output_filename');
1921 16 50       32 if ($reads_csv) {
1922 16   50     39 $meta->{args_rels}{'choose_one&'} //= [];
1923 16         23 push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filename inplace/];
  16         39  
1924             }
1925             }
1926              
1927 16         28 $meta->{args}{overwrite} = {%{$argspecopt_overwrite{overwrite}}};
  16         59  
1928 16   50     72 $meta->{args_rels}{'dep_any&'} //= [];
1929 16         27 push @{ $meta->{args_rels}{'dep_any&'} }, ['overwrite', ['output_filename', 'output_filenames']];
  16         46  
1930              
1931 16         36 push @$tags, 'writes-csv';
1932             } # if writes csv
1933              
1934             } # CREATE_ARGS_PROP
1935              
1936             CREATE_ARGS_RELS_PROP: {
1937 24         32 $meta->{args_rels} = {};
  24         75  
1938 24 100       58 if ($add_args_rels) {
1939 3         14 $meta->{args_rels}{$_} = $add_args_rels->{$_} for keys %$add_args_rels;
1940             }
1941             } # CREATE_ARGS_RELS_PROP
1942              
1943 24 50       53 if ($add_meta_props) {
1944 0         0 $meta->{$_} = $add_meta_props->{$_} for keys %$add_meta_props;
1945             }
1946              
1947             } # CREATE_META
1948              
1949             {
1950 24         39 my $package = caller();
  24         30  
  24         76  
1951 1     1   9 no strict 'refs'; ## no critic: TestingAndDebugging::ProhibitNoStrict
  1         2  
  1         304  
1952 24         466 *{"$package\::$name"} = $code;
  24         118  
1953             #use DD; dd $meta;
1954 24         40 ${"$package\::SPEC"}{$name} = $meta;
  24         119  
1955             }
1956              
1957 24         80 1;
1958             }
1959              
1960             1;
1961             # ABSTRACT: CLI utilities related to CSV
1962              
1963             __END__
1964              
1965             =pod
1966              
1967             =encoding UTF-8
1968              
1969             =head1 NAME
1970              
1971             App::CSVUtils - CLI utilities related to CSV
1972              
1973             =head1 VERSION
1974              
1975             This document describes version 1.024 of App::CSVUtils (from Perl distribution App-CSVUtils), released on 2023-04-01.
1976              
1977             =head1 DESCRIPTION
1978              
1979             This distribution contains the following CLI utilities:
1980              
1981             =over
1982              
1983             =item 1. L<csv-add-fields>
1984              
1985             =item 2. L<csv-avg>
1986              
1987             =item 3. L<csv-check-cell-values>
1988              
1989             =item 4. L<csv-check-field-names>
1990              
1991             =item 5. L<csv-check-field-values>
1992              
1993             =item 6. L<csv-check-rows>
1994              
1995             =item 7. L<csv-cmp>
1996              
1997             =item 8. L<csv-concat>
1998              
1999             =item 9. L<csv-convert-to-hash>
2000              
2001             =item 10. L<csv-csv>
2002              
2003             =item 11. L<csv-delete-fields>
2004              
2005             =item 12. L<csv-dump>
2006              
2007             =item 13. L<csv-each-row>
2008              
2009             =item 14. L<csv-fill-template>
2010              
2011             =item 15. L<csv-find-values>
2012              
2013             =item 16. L<csv-freqtable>
2014              
2015             =item 17. L<csv-gen>
2016              
2017             =item 18. L<csv-get-cells>
2018              
2019             =item 19. L<csv-grep>
2020              
2021             =item 20. L<csv-info>
2022              
2023             =item 21. L<csv-intrange>
2024              
2025             =item 22. L<csv-list-field-names>
2026              
2027             =item 23. L<csv-lookup-fields>
2028              
2029             =item 24. L<csv-ltrim>
2030              
2031             =item 25. L<csv-map>
2032              
2033             =item 26. L<csv-munge-field>
2034              
2035             =item 27. L<csv-munge-rows>
2036              
2037             =item 28. L<csv-pick>
2038              
2039             =item 29. L<csv-pick-fields>
2040              
2041             =item 30. L<csv-pick-rows>
2042              
2043             =item 31. L<csv-quote>
2044              
2045             =item 32. L<csv-replace-newline>
2046              
2047             =item 33. L<csv-rtrim>
2048              
2049             =item 34. L<csv-select-fields>
2050              
2051             =item 35. L<csv-select-rows>
2052              
2053             =item 36. L<csv-setop>
2054              
2055             =item 37. L<csv-shuf>
2056              
2057             =item 38. L<csv-shuf-fields>
2058              
2059             =item 39. L<csv-shuf-rows>
2060              
2061             =item 40. L<csv-sort>
2062              
2063             =item 41. L<csv-sort-fields>
2064              
2065             =item 42. L<csv-sort-rows>
2066              
2067             =item 43. L<csv-sorted>
2068              
2069             =item 44. L<csv-sorted-fields>
2070              
2071             =item 45. L<csv-sorted-rows>
2072              
2073             =item 46. L<csv-split>
2074              
2075             =item 47. L<csv-sum>
2076              
2077             =item 48. L<csv-transpose>
2078              
2079             =item 49. L<csv-trim>
2080              
2081             =item 50. L<csv-uniq>
2082              
2083             =item 51. L<csv-unquote>
2084              
2085             =item 52. L<csv2ltsv>
2086              
2087             =item 53. L<csv2paras>
2088              
2089             =item 54. L<csv2td>
2090              
2091             =item 55. L<csv2tsv>
2092              
2093             =item 56. L<csv2vcf>
2094              
2095             =item 57. L<list-csvutils>
2096              
2097             =item 58. L<paras2csv>
2098              
2099             =item 59. L<tsv2csv>
2100              
2101             =back
2102              
2103             =head1 FUNCTIONS
2104              
2105              
2106             =head2 gen_csv_util
2107              
2108             Usage:
2109              
2110             gen_csv_util(%args) -> bool
2111              
2112             Generate a CSV utility.
2113              
2114             This routine is used to generate a CSV utility in the form of a L<Rinci>
2115             function (code and metadata). You can then produce a CLI from the Rinci function
2116             simply using L<Perinci::CmdLine::Gen> or, if you use L<Dist::Zilla>,
2117             L<Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
2118             L<gen-pericmd-script>.
2119              
2120             Using this routine, by providing just one or a few hooks and setting some
2121             parameters like a couple of extra arguments, you will get a complete CLI with
2122             decent POD/manpage, ability to read one or multiple CSV's and write one or
2123             multiple CSV's, some command-line options to customize how the input CSV's
2124             should be parsed and how the output CSV's should be formatted and named. Your
2125             CLI also has tab completion, usage and help message, and other features.
2126              
2127             To create a CSV utility, you specify a C<name> (e.g. C<csv_dump>; must be a valid
2128             unqualified Perl identifier/function name) and optionally C<summary>,
2129             C<description>, and other metadata like C<links> or even C<add_meta_props>. Then
2130             you specify one or more of C<on_*> or C<before_*> or C<after_*> arguments to supply
2131             handlers (coderefs) for your CSV utility at various hook points.
2132              
2133             I<THE HOOKS>
2134              
2135             All code for hooks should accept a single argument C<r>. C<r> is a stash (hashref)
2136             of various data, the keys of which will depend on which hook point being called.
2137             You can also add more keys to store data or for flow control (see hook
2138             documentation below for more details).
2139              
2140             The order of the hooks, in processing chronological order:
2141              
2142             =over
2143              
2144             =item * on_begin
2145              
2146             Called when utility begins, before reading CSV. You can use this hook e.g. to
2147             process arguments, set output filenames (if you allow custom output
2148             filenames).
2149              
2150             =item * before_read_input
2151              
2152             Called before opening any input CSV file. This hook is I<still> called even if
2153             your utility sets C<reads_csv> to false.
2154              
2155             At this point, the C<input_filenames> stash key (as well as other keys like
2156             C<input_filename>, C<input_filenum>, etc) has not been set. You can use this
2157             hook e.g. to set a custom C<input_filenames>.
2158              
2159             =item * before_open_input_files
2160              
2161             Called before an input CSV file is about to be opened, including for stdin
2162             (C<->). You can use this hook e.g. to check/preprocess input file. Flow control
2163             is available by setting C<< $r-E<gt>{wants_skip_files} >> to skip reading all the input
2164             file and go directly to the C<after_read_input> hook.
2165              
2166             =item * before_open_input_file
2167              
2168             Called before an input CSV file is about to be opened, including for stdin
2169             (C<->). For the first file, called after C<before_open_input_file> hook. You can
2170             use this hook e.g. to check/preprocess input file. Flow control is available
2171             by setting C<< $r-E<gt>{wants_skip_file} >> to skip reading a single input file and go
2172             to the next file, or C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the
2173             files and go directly to the C<after_read_input> hook.
2174              
2175             =item * on_input_header_row
2176              
2177             Called when receiving header row. Will be called for every input file, and
2178             called even when user specify C<--no-input-header>, in which case the header
2179             row will be the generated C<["field1", "field2", ...]>. You can use this hook
2180             e.g. to add/remove/rearrange fields.
2181              
2182             You can set C<< $r-E<gt>{wants_fill_rows} >> to a defined false if you do not want
2183             C<< $r-E<gt>{input_rows} >> to be filled with empty string elements when it contains
2184             less than the number of fields (in case of sparse values at the end). Normally
2185             you only want to do this when you want to do checking, e.g. in
2186             L<csv-check-rows>.
2187              
2188             =item * on_input_data_row
2189              
2190             Called when receiving each data row. You can use this hook e.g. to modify the
2191             row or print output (for line-by-line transformation or filtering).
2192              
2193             =item * after_close_input_file
2194              
2195             Called after each input file is closed, including for stdin (C<->) (although
2196             for stdin, the handle is not actually closed). Flow control is possible by
2197             setting C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the files and go
2198             straight to the C<after_close_input_files> hook.
2199              
2200             =item * after_close_input_files
2201              
2202             Called after the last input file is closed, after the last
2203             C<after_close_input_file> hook, including for stdin (C<->) (although for stdin,
2204             the handle is not actually closed).
2205              
2206             =item * after_read_input
2207              
2208             Called after the last row of the last CSV file is read and the last file is
2209             closed. This hook is I<still> called, if you set C<reads_csv> option to false.
2210             At this point the stash keys related to CSV reading have all been cleared,
2211             including C<input_filenames>, C<input_filename>, C<input_fh>, etc.
2212              
2213             You can use this hook e.g. to print output if you buffer the output.
2214              
2215             =item * on_end
2216              
2217             Called when utility is about to exit. You can use this hook e.g. to return the
2218             final result.
2219              
2220             =back
2221              
2222             I<THE STASH>
2223              
2224             The common keys that C<r> will contain:
2225              
2226             =over
2227              
2228             =item * C<gen_args>, hash. The arguments used to generate the CSV utility.
2229              
2230             =item * C<util_args>, hash. The arguments that your CSV utility accepts. Parsed from
2231             command-line arguments (or configuration files, or environment variables).
2232              
2233             =item * C<name>, str. The name of the CSV utility. Which can also be retrieved via
2234             C<gen_args>.
2235              
2236             =item * C<code_print>, coderef. Routine provided for you to print something. Accepts a
2237             string. Takes care of opening the output files for you.
2238              
2239             =item * C<code_print_row>, coderef. Routine provided for you to print a data row. You
2240             pass the row (either arrayref or hashref). Takes care of opening the output
2241             files for you, as well as printing header row the first time, if needed.
2242              
2243             =item * C<code_print_header_row>, coderef. Routine provided for you to print header
2244             row. You don't need to pass any arguments. Will only print the header row once
2245             per output file if output header is enabled, even if called multiple times.
2246              
2247             =back
2248              
2249             If you are accepting CSV data (C<reads_csv> gen argument set to true), the
2250             following keys will also be available (in C<on_input_header_row> and
2251             C<on_input_data_row> hooks):
2252              
2253             =over
2254              
2255             =item * C<input_parser>, a L<Text::CSV_XS> instance for input parsing.
2256              
2257             =item * C<input_filenames>, array of str.
2258              
2259             =item * C<input_filename>, str. The name of the current input file being read (C<-> if
2260             reading from stdin).
2261              
2262             =item * C<input_filenum>, uint. The number of the current input file, 1 being the first
2263             file, 2 for the second, and so on.
2264              
2265             =item * C<input_fh>, the handle to the current file being read.
2266              
2267             =item * C<input_rownum>, uint. The number of rows that have been read (reset after each
2268             input file). In C<on_input_header_row> phase, this will be 1 since header row
2269             (including the generated one) is the first row. Then in C<on_input_data_row>
2270             phase (called the first time for a file), it will be 2 for the first data row,
2271             even if physically it is the first row for CSV file that does not have a
2272             header.
2273              
2274             =item * C<input_data_rownum>, uint. The number of data rows that have been read (reset
2275             after each input file). This will be equal to C<input_rownum> less 1 if input
2276             file has header.
2277              
2278             =item * C<input_row>, aos (array of str). The current input CSV row as an arrayref.
2279              
2280             =item * C<input_row_as_hashref>, hos (hash of str). The current input CSV row as a
2281             hashref, with field names as hash keys and field values as hash values. This
2282             will only be calculated if utility wants it. Utility can express so by setting
2283             C<< $r-E<gt>{wants_input_row_as_hashref} >> to true, e.g. in the C<on_begin> hook.
2284              
2285             =item * C<input_header_row_count>, uint. Contains the number of actual header rows that
2286             have been read. If CLI user specifies C<--no-input-header>, this will stay at
2287             zero. Will be reset for each CSV file.
2288              
2289             =item * C<input_data_row_count>, int. Contains the number of actual data rows that have
2290             read. Will be reset for each CSV file.
2291              
2292             =back
2293              
2294             If you are outputting CSV (C<writes_csv> gen argument set to true), the following
2295             keys will be available:
2296              
2297             =over
2298              
2299             =item * C<output_emitter>, a L<Text::CSV_XS> instance for output.
2300              
2301             =item * C<output_filenames>, array of str.
2302              
2303             =item * C<output_filename>, str, name of current output file.
2304              
2305             =item * C<output_filenum>, uint, the number of the current output file, 1 being the
2306             first file, 2 for the second, and so on.
2307              
2308             =item * C<output_fh>, handle to the current output file.
2309              
2310             =item * C<output_rownum>, uint. The number of rows that have been outputted (reset
2311             after each output file).
2312              
2313             =item * C<output_data_rownum>, uint. The number of data rows that have been outputted
2314             (reset after each output file). This will be equal to C<input_rownum> less 1 if
2315             input file has header.
2316              
2317             =back
2318              
2319             For other hook-specific keys, see the documentation for associated hook point.
2320              
2321             I<ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS>
2322              
2323             As mentioned above, you will get additional command-line options/arguments in
2324             C<< $r-E<gt>{util_args} >> hashref. Some options/arguments are already added by
2325             C<gen_csv_util>, e.g. C<input_filename> or C<input_filenames> along with
2326             C<input_sep_char>, etc (when your utility declares C<reads_csv>),
2327             C<output_filename> or C<output_filenames> along with C<overwrite>,
2328             C<output_sep_char>, etc (when your utility declares C<writes_csv>).
2329              
2330             If you want to accept additional arguments/options, you specify them in
2331             C<add_args> (hashref, with key being Each option/argument has to be specified
2332             first via C<add_args> (as hashref, with key being argument name and value the
2333             argument specification as defined in L<Rinci::function>)). Some argument
2334             specifications have been defined in L<App::CSVUtils> and can be used. See
2335             existing utilities for examples.
2336              
2337             I<READING CSV DATA>
2338              
2339             To read CSV data, normally your utility would provide handler for the
2340             C<on_input_data_row> hook and sometimes additionally C<on_input_header_row>.
2341              
2342             I<OUTPUTTING STRING OR RETURNING RESULT>
2343              
2344             To output string, usually you call the provided routine C<< $r-E<gt>{code_print} >>. This
2345             routine will open the output files for you.
2346              
2347             You can also return enveloped result directly by setting C<< $r-E<gt>{result} >>.
2348              
2349             I<OUTPUTTING CSV DATA>
2350              
2351             To output CSV data, usually you call the provided routine C<< $r-E<gt>{code_print_row} >>.
2352             This routine accepts a row (arrayref or hashref). This routine will open the
2353             output files for you when needed, as well as print header row automatically.
2354              
2355             You can also buffer rows from input to e.g. C<< $r-E<gt>{output_rows} >>, then call
2356             C<< $r-E<gt>{code_print_row} >> repeatedly in the C<after_read_input> hook to print all the
2357             rows.
2358              
2359             I<READING MULTIPLE CSV FILES>
2360              
2361             To read multiple CSV files, you first specify C<reads_multiple_csv>. Then, you
2362             can supply handler for C<on_input_header_row> and C<on_input_data_row> as usual.
2363             If you want to do something before/after each input file, you can also supply
2364             handler for C<before_open_input_file> or C<after_close_input_file>.
2365              
2366             I<WRITING TO MULTIPLE CSV FILES>
2367              
2368             Similarly, to write to many CSv files, you first specify C<writes_multiple_csv>.
2369             Then, you can supply handler for C<on_input_header_row> and C<on_input_data_row>
2370             as usual. To switch to the next file, set
2371             C<< $r-E<gt>{wants_switch_to_next_output_file} >> to true, in which case the next call to
2372             C<< $r-E<gt>{code_print_row} >> will close the current file and open the next file.
2373              
2374             I<CHANGING THE OUTPUT FIELDS>
2375              
2376             When calling C<< $r-E<gt>{code_print_row} >>, you can output whatever fields you want. By
2377             convention, you can set C<< $r-E<gt>{output_fields} >> and C<< $r-E<gt>{output_fields_idx} >> to
2378             let other handlers know about the output fields. For example, see the
2379             implementation of L<csv-concat>.
2380              
2381             This function is not exported by default, but exportable.
2382              
2383             Arguments ('*' denotes required arguments):
2384              
2385             =over 4
2386              
2387             =item * B<add_args> => I<hash>
2388              
2389             (No description)
2390              
2391             =item * B<add_args_rels> => I<hash>
2392              
2393             (No description)
2394              
2395             =item * B<add_meta_props> => I<hash>
2396              
2397             Add additional Rinci function metadata properties.
2398              
2399             =item * B<after_close_input_file> => I<code>
2400              
2401             (No description)
2402              
2403             =item * B<after_close_input_files> => I<code>
2404              
2405             (No description)
2406              
2407             =item * B<after_read_input> => I<code>
2408              
2409             (No description)
2410              
2411             =item * B<before_open_input_file> => I<code>
2412              
2413             (No description)
2414              
2415             =item * B<before_open_input_files> => I<code>
2416              
2417             (No description)
2418              
2419             =item * B<before_read_input> => I<code>
2420              
2421             (No description)
2422              
2423             =item * B<description> => I<str>
2424              
2425             (No description)
2426              
2427             =item * B<examples> => I<array>
2428              
2429             (No description)
2430              
2431             =item * B<links> => I<array[hash]>
2432              
2433             (No description)
2434              
2435             =item * B<name>* => I<perl::identifier::unqualified_ascii>
2436              
2437             (No description)
2438              
2439             =item * B<on_begin> => I<code>
2440              
2441             (No description)
2442              
2443             =item * B<on_end> => I<code>
2444              
2445             (No description)
2446              
2447             =item * B<on_input_data_row> => I<code>
2448              
2449             (No description)
2450              
2451             =item * B<on_input_header_row> => I<code>
2452              
2453             (No description)
2454              
2455             =item * B<reads_csv> => I<bool> (default: 1)
2456              
2457             Whether utility reads CSV data.
2458              
2459             =item * B<reads_multiple_csv> => I<bool>
2460              
2461             Whether utility accepts CSV data.
2462              
2463             Setting this option to true will implicitly set the C<reads_csv> option to true,
2464             obviously.
2465              
2466             =item * B<summary> => I<str>
2467              
2468             (No description)
2469              
2470             =item * B<writes_csv> => I<bool> (default: 1)
2471              
2472             Whether utility writes CSV data.
2473              
2474             =item * B<writes_multiple_csv> => I<bool>
2475              
2476             Whether utility outputs CSV data.
2477              
2478             Setting this option to true will implicitly set the C<writes_csv> option to true,
2479             obviously.
2480              
2481              
2482             =back
2483              
2484             Return value: (bool)
2485              
2486              
2487             =head2 compile_eval_code
2488              
2489             Usage:
2490              
2491             $coderef = compile_eval_code($str, $label);
2492              
2493             Compile string code C<$str> to coderef in 'main' package, without C<use strict>
2494             or C<use warnings>. Die on compile error.
2495              
2496             =head2 eval_code
2497              
2498             Usage:
2499              
2500             $res = eval_code($coderef, $r, $topic_var_value, $return_topic_var);
2501              
2502             =for Pod::Coverage ^(csvutil)$
2503              
2504             =head1 FAQ
2505              
2506             =head2 My CSV does not have a header?
2507              
2508             Use the C<--no-header> option. Fields will be named C<field1>, C<field2>, and so
2509             on.
2510              
2511             =head2 My data is TSV, not CSV?
2512              
2513             Use the C<--tsv> option.
2514              
2515             =head2 I have a big CSV and the utilities are too slow or eat too much RAM!
2516              
2517             These utilities are not (yet) optimized, patches welcome. If your CSV is very
2518             big, perhaps a C-based solution is what you need.
2519              
2520             =head1 HOMEPAGE
2521              
2522             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
2523              
2524             =head1 SOURCE
2525              
2526             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
2527              
2528             =head1 SEE ALSO
2529              
2530             =head2 Similar CLI bundles for other format
2531              
2532             L<App::TSVUtils>, L<App::LTSVUtils>, L<App::SerializeUtils>.
2533              
2534             =head2 Other CSV-related utilities
2535              
2536             L<xls2csv> and L<xlsx2csv> from L<Spreadsheet::Read>
2537              
2538             L<import-csv-to-sqlite> from L<App::SQLiteUtils>
2539              
2540             Query CSV with SQL using L<fsql> from L<App::fsql>
2541              
2542             L<csvgrep> from L<csvgrep>
2543              
2544             =head2 Other non-Perl-based CSV utilities
2545              
2546             =head3 Python
2547              
2548             B<csvkit>, L<https://csvkit.readthedocs.io/en/latest/>
2549              
2550             =head1 AUTHOR
2551              
2552             perlancar <perlancar@cpan.org>
2553              
2554             =head1 CONTRIBUTOR
2555              
2556             =for stopwords Adam Hopkins
2557              
2558             Adam Hopkins <violapiratejunky@gmail.com>
2559              
2560             =head1 CONTRIBUTING
2561              
2562              
2563             To contribute, you can send patches by email/via RT, or send pull requests on
2564             GitHub.
2565              
2566             Most of the time, you don't need to build the distribution yourself. You can
2567             simply modify the code, then test via:
2568              
2569             % prove -l
2570              
2571             If you want to build the distribution (e.g. to try to install it locally on your
2572             system), you can install L<Dist::Zilla>,
2573             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
2574             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
2575             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
2576             that are considered a bug and can be reported to me.
2577              
2578             =head1 COPYRIGHT AND LICENSE
2579              
2580             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
2581              
2582             This is free software; you can redistribute it and/or modify it under
2583             the same terms as the Perl 5 programming language system itself.
2584              
2585             =head1 BUGS
2586              
2587             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
2588              
2589             When submitting a bug or request, please include a test-file or a
2590             patch to an existing test-file that illustrates the bug or desired
2591             feature.
2592              
2593             =cut