File Coverage

blib/lib/App/CSVUtils.pm
Criterion Covered Total %
statement 727 868 83.7
branch 182 322 56.5
condition 63 122 51.6
subroutine 111 122 90.9
pod 3 3 100.0
total 1086 1437 75.5


line stmt bran cond sub pod time code
1             package App::CSVUtils;
2              
3 1     1   113841 use 5.010001;
  1         15  
4 1     1   5 use strict;
  1         2  
  1         19  
5 1     1   5 use warnings;
  1         1  
  1         22  
6 1     1   2261 use Log::ger;
  1         82  
  1         5  
7              
8 1     1   262 use Cwd;
  1         2  
  1         63  
9 1     1   6 use Exporter qw(import);
  1         3  
  1         621  
10              
11             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
12             our $DATE = '2023-07-25'; # DATE
13             our $DIST = 'App-CSVUtils'; # DIST
14             our $VERSION = '1.030'; # VERSION
15              
16             our @EXPORT_OK = qw(
17             gen_csv_util
18             compile_eval_code
19             eval_code
20             );
21              
22             our %SPEC;
23              
24             our $sch_req_str_or_code = ['any*', of=>['str*', 'code*']];
25              
26             sub _open_file_read {
27 138     138   196 my $filename = shift;
28              
29 138         240 my ($fh, $err);
30 138 50       288 if ($filename eq '-') {
31 0         0 $fh = *STDIN;
32             } else {
33 138 50       5860 open $fh, "<", $filename or do {
34 0         0 $err = [500, "Can't open input filename '$filename': $!"];
35 0         0 goto RETURN;
36             };
37             }
38 138         1944 binmode $fh, ":encoding(utf8)";
39              
40 138         5874 RETURN:
41             ($fh, $err);
42             }
43              
44             sub _open_file_write {
45 2     2   6 my $filename = shift;
46              
47 2         5 my ($fh, $err);
48 2 50       7 if ($filename eq '-') {
49 0         0 $fh = *STDOUT;
50             } else {
51 2 50       137 open $fh, ">", $filename or do {
52 0         0 $err = [500, "Can't open output filename '$filename': $!"];
53 0         0 goto RETURN;
54             };
55             }
56 2         31 binmode $fh, ":encoding(utf8)";
57              
58 2         84 RETURN:
59             ($fh, $err);
60             }
61              
62             sub _return_or_write_file {
63 0     0   0 my ($res, $filename, $overwrite) = @_;
64 0 0       0 return $res if !defined($filename);
65              
66 0         0 my $fh;
67 0 0       0 if ($filename eq '-') {
68 0         0 $fh = \*STDOUT;
69             } else {
70 0 0       0 if (-f $filename) {
71 0 0       0 if ($overwrite) {
72 0         0 log_info "[csvutil] Overwriting output file $filename";
73             } else {
74 0         0 return [412, "Refusing to ovewrite existing output file '$filename', please select another path or specify --overwrite"];
75             }
76             }
77 0 0       0 open my $fh, ">", $filename or do {
78 0         0 return [500, "Can't open output file '$filename': $!"];
79             };
80 0         0 binmode $fh, ":encoding(utf8)";
81 0         0 print $fh $res->[2];
82 0 0       0 close $fh or warn "Can't write to '$filename': $!";
83 0         0 return [$res->[0], $res->[1]];
84             }
85             }
86              
87             sub compile_eval_code {
88 45 100   45 1 148 return $_[0] if ref $_[0] eq 'CODE';
89 43         117 my ($str, $label) = @_;
90 43 50 33     184 defined($str) && length($str) or die [400, "Please specify code ($label)"];
91 43         104 $str = "package main; no strict; no warnings; sub { $str }";
92 43         139 log_trace "[csvutil] Compiling Perl code: $str";
93 1     1   7 my $code = eval $str; ## no critic: BuiltinFunctions::ProhibitStringyEval
  1     1   2  
  1     1   26  
  1     1   5  
  1     1   12  
  1     1   62  
  1     1   7  
  1     1   2  
  1     1   26  
  1     1   5  
  1     1   2  
  1     1   57  
  1     1   7  
  1     1   3  
  1     1   47  
  1     1   6  
  1     1   2  
  1     1   45  
  1     1   6  
  1     1   2  
  1     1   38  
  1     1   7  
  1     1   2  
  1     1   42  
  1     1   7  
  1     1   2  
  1     1   37  
  1     1   7  
  1     1   2  
  1     1   54  
  1     1   23  
  1     1   4  
  1     1   30  
  1     1   11  
  1     1   15  
  1     1   70  
  1     1   7  
  1     1   3  
  1     1   37  
  1     1   6  
  1     1   5  
  1     1   59  
  1     1   7  
  1     1   3  
  1     1   50  
  1     1   9  
  1     1   2  
  1     1   55  
  1     1   6  
  1     1   4  
  1     1   29  
  1     1   5  
  1     1   2  
  1     1   59  
  1     1   7  
  1     1   2  
  1     1   29  
  1     1   4  
  1     1   2  
  1     1   67  
  1     1   7  
  1     1   2  
  1     1   25  
  1     1   5  
  1     1   2  
  1     1   55  
  1     1   6  
  1     1   2  
  1     1   24  
  1     1   5  
  1     1   2  
  1     1   43  
  1     1   6  
  1     1   3  
  1     1   23  
  1     1   5  
  1     1   1  
  1     1   45  
  1     1   6  
  1     1   2  
  1     1   23  
  1     1   5  
  1     1   1  
  1     1   82  
  1     1   9  
  1     1   108  
  1         39  
  1         6  
  1         2  
  1         140  
  1         8  
  1         2  
  1         23  
  1         5  
  1         2  
  1         64  
  1         18  
  1         2  
  1         49  
  1         7  
  1         2  
  1         77  
  1         7  
  1         2  
  1         40  
  1         10  
  1         7  
  1         116  
  1         12  
  1         3  
  1         48  
  1         6  
  1         2  
  1         51  
  1         7  
  1         2  
  1         25  
  1         5  
  1         3  
  1         100  
  1         8  
  1         2  
  1         42  
  1         6  
  1         3  
  1         61  
  1         7  
  1         2  
  1         67  
  1         7  
  1         2  
  1         71  
  1         9  
  1         2  
  1         25  
  1         4  
  1         3  
  1         57  
  1         6  
  1         2  
  1         36  
  1         7  
  1         1  
  1         40  
  1         10  
  1         2  
  1         24  
  1         6  
  1         3  
  1         56  
  1         7  
  1         2  
  1         40  
  1         7  
  1         2  
  1         40  
  1         7  
  1         3  
  1         35  
  1         6  
  1         3  
  1         54  
  1         7  
  1         4  
  1         38  
  1         6  
  1         2  
  1         58  
  1         6  
  1         3  
  1         27  
  1         7  
  1         2  
  1         49  
  1         7  
  1         2  
  1         24  
  1         4  
  1         3  
  1         52  
  1         7  
  1         2  
  1         29  
  1         6  
  1         7  
  1         57  
  1         7  
  1         3  
  1         36  
  1         5  
  1         9  
  1         69  
  1         8  
  1         2  
  1         23  
  1         5  
  1         2  
  1         57  
  1         8  
  1         1  
  1         25  
  1         5  
  1         2  
  1         61  
  1         7  
  1         2  
  1         31  
  1         5  
  1         7  
  1         66  
  1         7  
  1         3  
  1         70  
  1         10  
  1         2  
  1         49  
  1         12  
  1         2  
  1         24  
  1         14  
  1         3  
  1         60  
  1         7  
  1         2  
  1         71  
  1         8  
  1         6  
  1         51  
  1         8  
  1         2  
  1         27  
  1         5  
  1         2  
  1         80  
  1         8  
  1         2  
  1         25  
  1         6  
  1         2  
  1         86  
  1         8  
  1         1  
  1         38  
  1         5  
  1         2  
  1         85  
  1         7  
  1         5  
  1         37  
  1         6  
  1         2  
  1         84  
  1         8  
  1         3  
  1         36  
  1         7  
  1         2  
  1         80  
  43         3558  
94 43 100       191 die [400, "Can't compile code ($label) '$str': $@"] if $@;
95 40         141 $code;
96             }
97              
98             sub eval_code {
99 1     1   8 no warnings 'once';
  1         2  
  1         7703  
100 60     60 1 145 my ($code, $r, $value_for_topic, $return_topic) = @_;
101 60         124 local $_ = $value_for_topic;
102 60         81 local $main::r = $r;
103 60         126 local $main::row = $r->{input_row};
104 60         84 local $main::rownum = $r->{input_rownum};
105 60         100 local $main::data_rownum = $r->{input_data_rownum};
106 60         91 local $main::csv = $r->{input_parser};
107 60         77 local $main::fields_idx = $r->{input_fields_idx};
108 60 100       107 if ($return_topic) {
109 9         181 $code->($_);
110 9         27 $_;
111             } else {
112 51         1065 $code->($_);
113             }
114             }
115              
116             sub _get_field_idx {
117 0     0   0 my ($field, $field_idxs) = @_;
118 0 0 0     0 defined($field) && length($field) or die "Please specify at least a field\n";
119 0         0 my $idx = $field_idxs->{$field};
120             die "Unknown field '$field' (known fields include: ".
121 0 0       0 join(", ", map { "'$_'" } sort {$field_idxs->{$a} <=> $field_idxs->{$b}}
  0         0  
  0         0  
122             keys %$field_idxs).")\n" unless defined $idx;
123 0         0 $idx;
124             }
125              
126             sub _get_csv_row {
127 0     0   0 my ($csv, $row, $i, $outputs_header) = @_;
128             #use DD; print " "; dd $row;
129 0 0 0     0 return "" if $i == 1 && !$outputs_header;
130 0 0       0 my $status = $csv->combine(@$row)
131             or die "Error in line $i: ".$csv->error_input."\n";
132 0         0 $csv->string . "\n";
133             }
134              
135             sub _instantiate_parser_default {
136 0     0   0 require Text::CSV_XS;
137              
138 0         0 Text::CSV_XS->new({binary=>1});
139             }
140              
141             sub _instantiate_parser {
142 213     213   2082 require Text::CSV_XS;
143              
144 213         13748 my ($args, $prefix) = @_;
145 213   50     438 $prefix //= '';
146              
147 213         508 my %tcsv_opts = (binary=>1);
148 213 100 66     1619 if (defined $args->{"${prefix}sep_char"} ||
    100 66        
149             defined $args->{"${prefix}quote_char"} ||
150             defined $args->{"${prefix}escape_char"}) {
151 1 50       7 $tcsv_opts{"sep_char"} = $args->{"${prefix}sep_char"} if defined $args->{"${prefix}sep_char"};
152 1 50       4 $tcsv_opts{"quote_char"} = $args->{"${prefix}quote_char"} if defined $args->{"${prefix}quote_char"};
153 1 50       5 $tcsv_opts{"escape_char"} = $args->{"${prefix}escape_char"} if defined $args->{"${prefix}escape_char"};
154             } elsif ($args->{"${prefix}tsv"}) {
155 1         3 $tcsv_opts{"sep_char"} = "\t";
156 1         3 $tcsv_opts{"quote_char"} = undef;
157 1         2 $tcsv_opts{"escape_char"} = undef;
158             }
159 213 100       491 $tcsv_opts{always_quote} = 1 if $args->{"${prefix}always_quote"};
160 213 50       427 $tcsv_opts{quote_empty} = 1 if $args->{"${prefix}quote_empty"};
161              
162 213         895 Text::CSV_XS->new(\%tcsv_opts);
163             }
164              
165             sub _instantiate_emitter {
166 94     94   154 my $args = shift;
167 94         203 _instantiate_parser($args, 'output_');
168             }
169              
170             sub _complete_field_or_field_list {
171             # return list of known fields of a CSV
172              
173 0     0   0 my $which = shift;
174              
175 0         0 my %args = @_;
176 0   0     0 my $word = $args{word} // '';
177 0         0 my $cmdline = $args{cmdline};
178 0         0 my $r = $args{r};
179              
180             # we are not called from cmdline, bail
181 0 0       0 return undef unless $cmdline; ## no critic: Subroutines::ProhibitExplicitReturnUndef
182              
183             # let's parse argv first
184 0         0 my $args;
185             {
186             # this is not activated yet
187 0         0 $r->{read_config} = 1;
  0         0  
188              
189 0         0 my $res = $cmdline->parse_argv($r);
190             #return undef unless $res->[0] == 200;
191              
192 0 0       0 $cmdline->_read_config($r) unless $r->{config};
193 0         0 $args = $res->[2];
194             }
195              
196             # user hasn't specified -f, bail
197 0 0 0     0 return {message=>"Please specify input filename first"} unless defined $args && $args->{input_filename};
198              
199             # user wants to read CSV from stdin, bail
200 0 0       0 return {message=>"Can't get field list when input is stdin"} if $args->{input_filename} eq '-';
201              
202             # can the file be opened?
203 0         0 my $csv_parser = _instantiate_parser(\%args, 'input_');
204 0 0       0 open my($fh), "<encoding(utf8)", $args->{input_filename} or do {
205             #warn "csvutils: Cannot open file '$args->{input_filename}': $!\n";
206 0         0 return [];
207             };
208              
209             # can the header row be read?
210 0 0       0 my $row = $csv_parser->getline($fh) or return [];
211              
212 0 0 0     0 if (defined $args->{input_header} && !$args->{input_header}) {
213 0         0 $row = [map {"field$_"} 1 .. @$row];
  0         0  
214             }
215              
216 0 0       0 if ($which =~ /sort/) {
217 0         0 $row = [map {($_,"-$_","+$_","~$_")} @$row];
  0         0  
218             }
219              
220 0         0 require Complete::Util;
221 0 0       0 if ($which =~ /field_list/) {
222 0         0 return Complete::Util::complete_comma_sep(
223             word => $word,
224             elems => $row,
225             uniq => 1,
226             );
227             } else {
228 0         0 return Complete::Util::complete_array_elem(
229             word => $word,
230             array => $row,
231             );
232             }
233             }
234              
235             sub _complete_field {
236 0     0   0 _complete_field_or_field_list('field', @_);
237             }
238              
239             sub _complete_field_list {
240 0     0   0 _complete_field_or_field_list('field_list', @_);
241             }
242              
243             sub _complete_sort_field_list {
244 0     0   0 _complete_field_or_field_list('sort_field_list', @_);
245             }
246              
247             sub _complete_sort_field {
248 0     0   0 _complete_field_or_field_list('sort_field', @_);
249             }
250              
251             sub _array2hash {
252 0     0   0 my ($row, $fields) = @_;
253 0         0 my $rowhash = {};
254 0         0 for my $i (0..$#{$fields}) {
  0         0  
255 0         0 $rowhash->{ $fields->[$i] } = $row->[$i];
256             }
257 0         0 $rowhash;
258             }
259              
260             # check that the first N values of a field are all defined and numeric. if there
261             # are now rows or less than N values, return true.
262             sub _is_numeric_field {
263 0     0   0 require Scalar::Util::Numeric;
264              
265 0         0 my ($rows, $field_idx, $num_samples) = @_;
266 0   0     0 $num_samples //= 5;
267              
268 0         0 my $is_numeric = 1;
269 0         0 for my $row (@$rows) {
270 0         0 my $val = $row->[$field_idx];
271 0 0       0 return 0 unless defined $val;
272 0 0       0 return 0 unless Scalar::Util::Numeric::isnum($val);
273             }
274 0         0 $is_numeric;
275             }
276              
277             # find a single field by name or index (1-based), return index (0-based). die
278             # when requested field does not exist.
279             sub _find_field {
280 17     17   42 my ($fields, $name_or_idx) = @_;
281              
282             # search by name first
283 17         23 for my $i (0 .. $#{$fields}) {
  17         54  
284 29         47 my $field = $fields->[$i];
285 29 100       100 return $i if $field eq $name_or_idx;
286             }
287              
288 2 50       32 if ($name_or_idx eq '0') {
    100          
    50          
289 0         0 die [400, "Field index 0 is requested, you probably meant 1 for the first field?"];
290             } elsif ($name_or_idx =~ /\A[1-9][0-9]*\z/) {
291 1 50       7 if ($name_or_idx > @$fields) {
292 0         0 die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
293             } else {
294 1         4 return $name_or_idx-1;
295             }
296             } elsif ($name_or_idx =~ /\A-[1-9][0-9]*\z/) {
297 0 0       0 if (-$name_or_idx > @$fields) {
298 0         0 die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
299             } else {
300 0         0 return @$fields + $name_or_idx;
301             }
302             }
303              
304             # not found
305             die [404, "Unknown field name/index '$name_or_idx' (known fields include: ".
306 1         7 join(", ", map { "'$_'" } @$fields).")"];
  3         17  
307             }
308              
309             # select one or more fields with options like --include-field, etc
310             sub _select_fields {
311 12     12   35 my ($fields, $field_idxs, $args, $default_select_choice) = @_;
312              
313 12         18 my @selected_fields;
314              
315             my $select_field_options_used;
316              
317 12 100       33 if (defined $args->{include_field_pat}) {
318 4         7 $select_field_options_used++;
319 4         7 for my $field (@$fields) {
320 10 50       59 if ($field =~ $args->{include_field_pat}) {
321 10         25 push @selected_fields, $field;
322             }
323             }
324             }
325 12 100       26 if (defined $args->{exclude_field_pat}) {
326 1         5 $select_field_options_used++;
327 1         3 @selected_fields = grep { $_ !~ $args->{exclude_field_pat} }
  3         13  
328             @selected_fields;
329             }
330 12 100       30 if (defined $args->{include_fields}) {
331 8         14 $select_field_options_used++;
332             FIELD:
333 8         14 for my $field (@{ $args->{include_fields} }) {
  8         24  
334 13 100       33 unless (defined $field_idxs->{$field}) {
335 4 100       28 return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
336 2         6 next FIELD;
337             }
338 9 50       23 next if grep { $field eq $_ } @selected_fields;
  3         16  
339 9         23 push @selected_fields, $field;
340             }
341             }
342 10 100       33 if (defined $args->{exclude_fields}) {
343 2         5 $select_field_options_used++;
344             FIELD:
345 2         4 for my $field (@{ $args->{exclude_fields} }) {
  2         6  
346 2 50       6 unless (defined $field_idxs->{$field}) {
347 0 0       0 return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
348 0         0 next FIELD;
349             }
350 2         4 @selected_fields = grep { $field ne $_ } @selected_fields;
  6         16  
351             }
352             }
353              
354 10 50 33     39 if (!$select_field_options_used && $default_select_choice) {
355 0 0       0 if ($default_select_choice eq 'all') {
    0          
    0          
    0          
356 0         0 @selected_fields = @$fields;
357             } elsif ($default_select_choice eq 'first') {
358 0 0       0 @selected_fields = ($fields->[0]) if @$fields;
359             } elsif ($default_select_choice eq 'last') {
360 0 0       0 @selected_fields = ($fields->[-1]) if @$fields;
361             } elsif ($default_select_choice eq 'first-if-only-field') {
362 0 0       0 @selected_fields = ($fields->[0]) if @$fields == 1;
363             }
364             }
365              
366 10 100       22 if ($args->{show_selected_fields}) {
367 1         8 return [200, "OK", \@selected_fields];
368             }
369              
370             #my %selected_field_idxs;
371             #$selected_field_idxs{$_} = $fields_idx->{$_} for @selected_fields;
372              
373 9         14 my @selected_field_idxs_array;
374 9         29 push @selected_field_idxs_array, $field_idxs->{$_} for @selected_fields;
375              
376 9         48 [100, "Continue", [\@selected_fields, \@selected_field_idxs_array]];
377             }
378              
379             our $xcomp_csvfiles = [filename => {file_ext_filter => qr/^[tc]sv$/i}];
380              
381             our %argspecs_csv_input = (
382             input_header => {
383             summary => 'Specify whether input CSV has a header row',
384             'summary.alt.bool.not' => 'Specify that input CSV does not have a header row',
385             schema => 'bool*',
386             default => 1,
387             description => <<'_',
388              
389             By default, the first row of the input CSV will be assumed to contain field
390             names (and the second row contains the first data row). When you declare that
391             input CSV does not have header row (`--no-input-header`), the first row of the
392             CSV is assumed to contain the first data row. Fields will be named `field1`,
393             `field2`, and so on.
394              
395             _
396             cmdline_aliases => {
397             },
398             tags => ['category:input'],
399             },
400             input_tsv => {
401             summary => "Inform that input file is in TSV (tab-separated) format instead of CSV",
402             schema => 'true*',
403             description => <<'_',
404              
405             Overriden by `--input-sep-char`, `--input-quote-char`, `--input-escape-char`
406             options. If one of those options is specified, then `--input-tsv` will be
407             ignored.
408              
409             _
410             tags => ['category:input'],
411             },
412             input_sep_char => {
413             summary => 'Specify field separator character in input CSV, will be passed to Text::CSV_XS',
414             schema => ['str*', len=>1],
415             description => <<'_',
416              
417             Defaults to `,` (comma). Overrides `--input-tsv` option.
418              
419             _
420             tags => ['category:input'],
421             },
422             input_quote_char => {
423             summary => 'Specify field quote character in input CSV, will be passed to Text::CSV_XS',
424             schema => ['str*', len=>1],
425             description => <<'_',
426              
427             Defaults to `"` (double quote). Overrides `--input-tsv` option.
428              
429             _
430             tags => ['category:input'],
431             },
432             input_escape_char => {
433             summary => 'Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS',
434             schema => ['str*', len=>1],
435             description => <<'_',
436              
437             Defaults to `\\` (backslash). Overrides `--input-tsv` option.
438              
439             _
440             tags => ['category:input'],
441             },
442             );
443              
444             our %argspecs_csv_output = (
445             output_header => {
446             summary => 'Whether output CSV should have a header row',
447             schema => 'bool*',
448             description => <<'_',
449              
450             By default, a header row will be output *if* input CSV has header row. Under
451             `--output-header`, a header row will be output even if input CSV does not have
452             header row (value will be something like "col0,col1,..."). Under
453             `--no-output-header`, header row will *not* be printed even if input CSV has
454             header row. So this option can be used to unconditionally add or remove header
455             row.
456              
457             _
458             tags => ['category:output'],
459             },
460             output_tsv => {
461             summary => "Inform that output file is TSV (tab-separated) format instead of CSV",
462             schema => 'bool*',
463             description => <<'_',
464              
465             This is like `--input-tsv` option but for output instead of input.
466              
467             Overriden by `--output-sep-char`, `--output-quote-char`, `--output-escape-char`
468             options. If one of those options is specified, then `--output-tsv` will be
469             ignored.
470              
471             _
472             tags => ['category:output'],
473             },
474             output_sep_char => {
475             summary => 'Specify field separator character in output CSV, will be passed to Text::CSV_XS',
476             schema => ['str*', len=>1],
477             description => <<'_',
478              
479             This is like `--input-sep-char` option but for output instead of input.
480              
481             Defaults to `,` (comma). Overrides `--output-tsv` option.
482              
483             _
484             tags => ['category:output'],
485             },
486             output_quote_char => {
487             summary => 'Specify field quote character in output CSV, will be passed to Text::CSV_XS',
488             schema => ['str*', len=>1],
489             description => <<'_',
490              
491             This is like `--input-quote-char` option but for output instead of input.
492              
493             Defaults to `"` (double quote). Overrides `--output-tsv` option.
494              
495             _
496             tags => ['category:output'],
497             },
498             output_escape_char => {
499             summary => 'Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS',
500             schema => ['str*', len=>1],
501             description => <<'_',
502              
503             This is like `--input-escape-char` option but for output instead of input.
504              
505             Defaults to `\\` (backslash). Overrides `--output-tsv` option.
506              
507             _
508             tags => ['category:output'],
509             },
510             output_always_quote => {
511             summary => 'Whether to always quote values',
512             schema => 'bool*',
513             default => 0,
514             description => <<'_',
515              
516             When set to false (the default), values are quoted only when necessary:
517              
518             field1,field2,"field three contains comma (,)",field4
519              
520             When set to true, then all values will be quoted:
521              
522             "field1","field2","field three contains comma (,)","field4"
523              
524             _
525             tags => ['category:output'],
526             },
527             output_quote_empty => {
528             summary => 'Whether to quote empty values',
529             schema => 'bool*',
530             default => 0,
531             description => <<'_',
532              
533             When set to false (the default), empty values are not quoted:
534              
535             field1,field2,,field4
536              
537             When set to true, then empty values will be quoted:
538              
539             field1,field2,"",field4
540              
541             _
542             tags => ['category:output'],
543             },
544             );
545              
546             our %argspecopt_input_filename = (
547             input_filename => {
548             summary => 'Input CSV file',
549             description => <<'_',
550              
551             Use `-` to read from stdin.
552              
553             Encoding of input file is assumed to be UTF-8.
554              
555             _
556             schema => 'filename*',
557             default => '-',
558             'x.completion' => $xcomp_csvfiles,
559             tags => ['category:input'],
560             },
561             );
562              
563             our %argspecopt_input_filenames = (
564             input_filenames => {
565             'x.name.is_plural' => 1,
566             'x.name.singular' => 'input_filename',
567             summary => 'Input CSV files',
568             description => <<'_',
569              
570             Use `-` to read from stdin.
571              
572             Encoding of input file is assumed to be UTF-8.
573              
574             _
575             schema => ['array*', of=>'filename*'],
576             default => ['-'],
577             'x.completion' => $xcomp_csvfiles,
578             tags => ['category:input'],
579             },
580             );
581              
582             our %argspecopt_overwrite = (
583             overwrite => {
584             summary => 'Whether to override existing output file',
585             schema => 'bool*',
586             cmdline_aliases=>{O=>{}},
587             tags => ['category:output'],
588             },
589             );
590              
591             our %argspecsopt_inplace = (
592             inplace => {
593             summary => 'Output to the same file as input',
594             schema => 'true*',
595             description => <<'_',
596              
597             Normally, you output to a different file than input. If you try to output to the
598             same file (`-o INPUT.csv -O`) you will clobber the input file; thus the utility
599             prevents you from doing it. However, with this `--inplace` option, you can
600             output to the same file. Like perl's `-i` option, this will first output to a
601             temporary file in the same directory as the input file then rename to the final
602             file at the end. You cannot specify output file (`-o`) when using this option,
603             but you can specify backup extension with `-b` option.
604              
605             Some caveats:
606              
607             - if input file is a symbolic link, it will be replaced with a regular file;
608             - renaming (implemented using `rename()`) can fail if input filename is too long;
609             - value specified in `-b` is currently not checked for acceptable characters;
610             - things can also fail if permissions are restrictive;
611              
612             _
613             tags => ['category:output'],
614             },
615             inplace_backup_ext => {
616             summary => 'Extension to add for backup of input file',
617             schema => 'str*',
618             default => '',
619             description => <<'_',
620              
621             In inplace mode (`--inplace`), if this option is set to a non-empty string, will
622             rename the input file using this extension as a backup. The old existing backup
623             will be overwritten, if any.
624              
625             _
626             cmdline_aliases => {b=>{}},
627             tags => ['category:output'],
628             },
629             );
630              
631             our %argspecopt_output_filename = (
632             output_filename => {
633             summary => 'Output filename',
634             description => <<'_',
635              
636             Use `-` to output to stdout (the default if you don't specify this option).
637              
638             Encoding of output file is assumed to be UTF-8.
639              
640             _
641             schema => 'filename*',
642             cmdline_aliases=>{o=>{}},
643             tags => ['category:output'],
644             },
645             );
646              
647             our %argspecopt_output_filenames = (
648             output_filenames => {
649             summary => 'Output filenames',
650             description => <<'_',
651              
652             Use `-` to output to stdout (the default if you don't specify this option).
653              
654             Encoding of output file is assumed to be UTF-8.
655              
656             _
657             schema => ['array*', of=>'filename*'],
658             cmdline_aliases=>{o=>{}},
659             tags => ['category:output'],
660             },
661             );
662              
663             our %argspecopt_field = (
664             field => {
665             summary => 'Field name',
666             schema => 'str*',
667             cmdline_aliases => { f=>{} },
668             completion => \&_complete_field,
669             },
670             );
671              
672             our %argspecopt_field_1 = (
673             field => {
674             summary => 'Field name',
675             schema => 'str*',
676             pos => 1,
677             cmdline_aliases => { f=>{} },
678             completion => \&_complete_field,
679             },
680             );
681              
682             our %argspec_field_1 = (
683             field => {
684             summary => 'Field name',
685             schema => 'str*',
686             cmdline_aliases => { f=>{} },
687             req => 1,
688             pos => 1,
689             completion => \&_complete_field,
690             },
691             );
692              
693             our %argspec_fields_1plus = (
694             fields => {
695             'x.name.is_plural' => 1,
696             'x.name.singular' => 'field',
697             summary => 'Field names',
698             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
699             req => 1,
700             pos => 1,
701             slurpy => 1,
702             cmdline_aliases => {f=>{}},
703             element_completion => \&_complete_field,
704             },
705             );
706              
707             # without completion, for adding new field
708             our %argspec_field_1_nocomp = (
709             field => {
710             summary => 'Field name',
711             schema => 'str*',
712             cmdline_aliases => { f=>{} },
713             req => 1,
714             pos => 1,
715             },
716             );
717              
718             # without completion, for adding new fields
719             our %argspec_fields_1plus_nocomp = (
720             fields => {
721             'x.name.is_plural' => 1,
722             'x.name.singular' => 'field',
723             summary => 'Field names',
724             'summary.alt.plurality.singular' => 'Field name',
725             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
726             cmdline_aliases => { f=>{} },
727             req => 1,
728             pos => 1,
729             slurpy => 1,
730             },
731             );
732              
733             our %argspec_fields = (
734             fields => {
735             'x.name.is_plural' => 1,
736             'x.name.singular' => 'field',
737             summary => 'Field names',
738             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
739             req => 1,
740             cmdline_aliases => {f=>{}},
741             element_completion => \&_complete_field,
742             },
743             );
744              
745             our %argspecopt_fields = (
746             fields => {
747             'x.name.is_plural' => 1,
748             'x.name.singular' => 'field',
749             summary => 'Field names',
750             schema => ['array*', of=>['str*', min_len=>1], min_len=>1],
751             cmdline_aliases => {f=>{}},
752             element_completion => \&_complete_field,
753             },
754             );
755              
756             our %argspecsopt_field_selection = (
757             include_fields => {
758             'x.name.is_plural' => 1,
759             'x.name.singular' => 'include_field',
760             summary => 'Field names to include, takes precedence over --exclude-field-pat',
761             schema => ['array*', of=>'str*'],
762             cmdline_aliases => {
763             f => {},
764             field => {}, # backward compatibility
765             },
766             element_completion => \&_complete_field,
767             tags => ['category:field-selection'],
768             },
769             include_field_pat => {
770             summary => 'Field regex pattern to select, overidden by --exclude-field-pat',
771             schema => 're*',
772             cmdline_aliases => {
773             field_pat => {}, # backward compatibility
774             include_all_fields => { summary => 'Shortcut for --field-pat=.*, effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.*' } },
775             a => { summary => 'Shortcut for --field-pat=.*, effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.*' } },
776             },
777             tags => ['category:field-selection'],
778             },
779             exclude_fields => {
780             'x.name.is_plural' => 1,
781             'x.name.singular' => 'exclude_field',
782             summary => 'Field names to exclude, takes precedence over --fields',
783             schema => ['array*', of=>'str*'],
784             cmdline_aliases => {
785             F => {},
786             },
787             element_completion => \&_complete_field,
788             tags => ['category:field-selection'],
789             },
790             exclude_field_pat => {
791             summary => 'Field regex pattern to exclude, takes precedence over --field-pat',
792             schema => 're*',
793             cmdline_aliases => {
794             exclude_all_fields => { summary => 'Shortcut for --exclude-field-pat=.*, effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.*' } },
795             A => { summary => 'Shortcut for --exclude-field-pat=.*, effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.*' } },
796             },
797             tags => ['category:field-selection'],
798             },
799             ignore_unknown_fields => {
800             summary => 'When unknown fields are specified in --include-field (--field) or --exclude-field options, ignore them instead of throwing an error',
801             schema => 'bool*',
802             },
803             show_selected_fields => {
804             summary => 'Show selected fields and then immediately exit',
805             schema => 'true*',
806             },
807             );
808              
809             our %argspec_eval = (
810             eval => {
811             summary => 'Perl code',
812             schema => $sch_req_str_or_code,
813             cmdline_aliases => { e=>{} },
814             req => 1,
815             },
816             );
817              
818             our %argspecopt_eval = (
819             eval => {
820             summary => 'Perl code',
821             schema => $sch_req_str_or_code,
822             cmdline_aliases => { e=>{} },
823             },
824             );
825              
826             our %argspec_eval_1 = (
827             eval => {
828             summary => 'Perl code',
829             schema => $sch_req_str_or_code,
830             cmdline_aliases => { e=>{} },
831             req => 1,
832             pos => 1,
833             },
834             );
835              
836             our %argspec_eval_2 = (
837             eval => {
838             summary => 'Perl code',
839             schema => $sch_req_str_or_code,
840             cmdline_aliases => { e=>{} },
841             req => 1,
842             pos => 2,
843             },
844             );
845              
846             our %argspecopt_eval_2 = (
847             eval => {
848             summary => 'Perl code',
849             schema => $sch_req_str_or_code,
850             cmdline_aliases => { e=>{} },
851             pos => 2,
852             },
853             );
854              
855             our %argspecsopt_sortsub = (
856             by_sortsub => {
857             schema => 'str*',
858             description => <<'_',
859              
860             When sorting rows, usually combined with `--key` because most Sort::Sub routine
861             expects a string to be compared against.
862              
863             When sorting fields, the Sort::Sub routine will get the field name as argument.
864              
865             _
866             summary => 'Sort using a Sort::Sub routine',
867             'x.completion' => ['sortsub_spec'],
868             },
869             sortsub_args => {
870             summary => 'Arguments to pass to Sort::Sub routine',
871             schema => ['hash*', of=>'str*'],
872             },
873             );
874              
875             our %argspecopt_key = (
876             key => {
877             summary => 'Generate sort keys with this Perl code',
878             description => <<'_',
879              
880             If specified, then will compute sort keys using Perl code and sort using the
881             keys. Relevant when sorting using `--by-code` or `--by-sortsub`. If specified,
882             then instead of row when sorting rows, the code (or Sort::Sub routine) will
883             receive these sort keys to sort against.
884              
885             The code will receive the row (arrayref, or if -H is specified, hashref) as the
886             argument.
887              
888             _
889             schema => $sch_req_str_or_code,
890             cmdline_aliases => {k=>{}},
891             },
892             );
893              
894             our %argspecs_sort_rows = (
895             reverse => {
896             schema => ['bool', is=>1],
897             cmdline_aliases => {r=>{}},
898             },
899             ci => {
900             schema => ['bool', is=>1],
901             cmdline_aliases => {i=>{}},
902             },
903             by_fields => {
904             summary => 'Sort by a list of field specifications',
905             'summary.alt.plurality.singular' => 'Add a sort field specification',
906             'x.name.is_plural' => 1,
907             'x.name.singular' => 'by_field',
908             description => <<'_',
909              
910             Each field specification is a field name with an optional prefix. `FIELD`
911             (without prefix) means sort asciibetically ascending (smallest to largest),
912             `~FIELD` means sort asciibetically descending (largest to smallest), `+FIELD`
913             means sort numerically ascending, `-FIELD` means sort numerically descending.
914              
915             _
916             schema => ['array*', of=>'str*'],
917             element_completion => \&_complete_sort_field,
918             },
919             by_code => {
920             summary => 'Sort by using Perl code',
921             schema => $sch_req_str_or_code,
922             description => <<'_',
923              
924             `$a` and `$b` (or the first and second argument) will contain the two rows to be
925             compared. Which are arrayrefs; or if `--hash` (`-H`) is specified, hashrefs; or
926             if `--key` is specified, whatever the code in `--key` returns.
927              
928             _
929             },
930             %argspecopt_key,
931             %argspecsopt_sortsub,
932             );
933              
934             our %argspecs_sort_fields = (
935             reverse => {
936             schema => ['bool', is=>1],
937             cmdline_aliases => {r=>{}},
938             },
939             ci => {
940             schema => ['bool', is=>1],
941             cmdline_aliases => {i=>{}},
942             },
943             by_examples => {
944             summary => 'Sort by a list of field names as examples',
945             'summary.alt.plurality.singular' => 'Add a field to sort by example',
946             'x.name.is_plural' => 1,
947             'x.name.singular' => 'by_example',
948             schema => ['array*', of=>'str*'],
949             element_completion => \&_complete_field,
950             },
951             by_code => {
952             summary => 'Sort fields using Perl code',
953             schema => $sch_req_str_or_code,
954             description => <<'_',
955              
956             `$a` and `$b` (or the first and second argument) will contain `[$field_name,
957             $field_idx]`.
958              
959             _
960             },
961             %argspecsopt_sortsub,
962             );
963              
964             our %argspecopt_with_data_rows = (
965             with_data_rows => {
966             summary => 'Whether to also output data rows',
967             schema => 'bool',
968             },
969             );
970              
971             our %argspecopt_hash = (
972             hash => {
973             summary => 'Provide row in $_ as hashref instead of arrayref',
974             schema => ['bool*', is=>1],
975             cmdline_aliases => {H=>{}},
976             },
977             );
978              
979             # add a position to specified argument, if possible
980             sub _add_arg_pos {
981 42     42   100 my ($args, $argname, $is_slurpy) = @_;
982              
983             # argument already has a position, return
984 42 50       105 return if defined $args->{$argname}{pos};
985              
986             # position of slurpy argument
987 42         61 my $slurpy_pos;
988 42         149 for (keys %$args) {
989 509 100       993 next unless $args->{$_}{slurpy};
990 6         19 $slurpy_pos = $args->{$_}{pos};
991 6         13 last;
992             }
993              
994             # there is already a slurpy arg, return
995 42 50 66     161 return if $is_slurpy && defined $slurpy_pos;
996              
997             # find the lowest position that's not available
998             ARG:
999 42         130 for my $j (0 .. scalar(keys %$args)-1) {
1000 60 100 100     160 last if defined $slurpy_pos && $j >= $slurpy_pos;
1001 56         177 for (keys %$args) {
1002 648 100 100     1381 next ARG if defined $args->{$_}{pos} && $args->{$_}{pos} == $j;
1003             }
1004 38         104 $args->{$argname}{pos} = $j;
1005 38 100       89 $args->{$argname}{slurpy} = 1 if $is_slurpy;
1006 38         104 last;
1007             }
1008             }
1009              
1010             sub _randext {
1011 2     2   16 state $charset = [0..9, "A".."Z","a".."z"];
1012 2         6 my $len = shift;
1013 2         3 my $ext = "";
1014 2         5 for (1..$len) { $ext .= $charset->[rand @$charset] }
  10         21  
1015 2         8 $ext;
1016             }
1017              
1018             $SPEC{gen_csv_util} = {
1019             v => 1.1,
1020             summary => 'Generate a CSV utility',
1021             description => <<'_',
1022              
1023             This routine is used to generate a CSV utility in the form of a <pm:Rinci>
1024             function (code and metadata). You can then produce a CLI from the Rinci function
1025             simply using <pm:Perinci::CmdLine::Gen> or, if you use <pm:Dist::Zilla>,
1026             <pm:Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
1027             <prog:gen-pericmd-script>.
1028              
1029             Using this routine, by providing just one or a few hooks and setting some
1030             parameters like a couple of extra arguments, you will get a complete CLI with
1031             decent POD/manpage, ability to read one or multiple CSV's and write one or
1032             multiple CSV's, some command-line options to customize how the input CSV's
1033             should be parsed and how the output CSV's should be formatted and named. Your
1034             CLI also has tab completion, usage and help message, and other features.
1035              
1036             To create a CSV utility, you specify a `name` (e.g. `csv_dump`; must be a valid
1037             unqualified Perl identifier/function name) and optionally `summary`,
1038             `description`, and other metadata like `links` or even `add_meta_props`. Then
1039             you specify one or more of `on_*` or `before_*` or `after_*` arguments to supply
1040             handlers (coderefs) for your CSV utility at various hook points.
1041              
1042              
1043             *THE HOOKS*
1044              
1045             All code for hooks should accept a single argument `r`. `r` is a stash (hashref)
1046             of various data, the keys of which will depend on which hook point being called.
1047             You can also add more keys to store data or for flow control (see hook
1048             documentation below for more details).
1049              
1050             The order of the hooks, in processing chronological order:
1051              
1052             * on_begin
1053              
1054             Called when utility begins, before reading CSV. You can use this hook e.g. to
1055             process arguments, set output filenames (if you allow custom output
1056             filenames).
1057              
1058             * before_read_input
1059              
1060             Called before opening any input CSV file. This hook is *still* called even if
1061             your utility sets `reads_csv` to false.
1062              
1063             At this point, the `input_filenames` stash key (as well as other keys like
1064             `input_filename`, `input_filenum`, etc) has not been set. You can use this
1065             hook e.g. to set a custom `input_filenames`.
1066              
1067             * before_open_input_files
1068              
1069             Called before an input CSV file is about to be opened, including for stdin
1070             (`-`). You can use this hook e.g. to check/preprocess input file. Flow control
1071             is available by setting `$r->{wants_skip_files}` to skip reading all the input
1072             file and go directly to the `after_read_input` hook.
1073              
1074             * before_open_input_file
1075              
1076             Called before an input CSV file is about to be opened, including for stdin
1077             (`-`). For the first file, called after `before_open_input_file` hook. You can
1078             use this hook e.g. to check/preprocess input file. Flow control is available
1079             by setting `$r->{wants_skip_file}` to skip reading a single input file and go
1080             to the next file, or `$r->{wants_skip_files}` to skip reading the rest of the
1081             files and go directly to the `after_read_input` hook.
1082              
1083             * on_input_header_row
1084              
1085             Called when receiving header row. Will be called for every input file, and
1086             called even when user specify `--no-input-header`, in which case the header
1087             row will be the generated `["field1", "field2", ...]`. You can use this hook
1088             e.g. to add/remove/rearrange fields.
1089              
1090             You can set `$r->{wants_fill_rows}` to a defined false if you do not want
1091             `$r->{input_rows}` to be filled with empty string elements when it contains
1092             less than the number of fields (in case of sparse values at the end). Normally
1093             you only want to do this when you want to do checking, e.g. in
1094             <prog:csv-check-rows>.
1095              
1096             * on_input_data_row
1097              
1098             Called when receiving each data row. You can use this hook e.g. to modify the
1099             row or print output (for line-by-line transformation or filtering).
1100              
1101             * after_close_input_file
1102              
1103             Called after each input file is closed, including for stdin (`-`) (although
1104             for stdin, the handle is not actually closed). Flow control is possible by
1105             setting `$r->{wants_skip_files}` to skip reading the rest of the files and go
1106             straight to the `after_close_input_files` hook.
1107              
1108             * after_close_input_files
1109              
1110             Called after the last input file is closed, after the last
1111             `after_close_input_file` hook, including for stdin (`-`) (although for stdin,
1112             the handle is not actually closed).
1113              
1114             * after_read_input
1115              
1116             Called after the last row of the last CSV file is read and the last file is
1117             closed. This hook is *still* called, if you set `reads_csv` option to false.
1118             At this point the stash keys related to CSV reading have all been cleared,
1119             including `input_filenames`, `input_filename`, `input_fh`, etc.
1120              
1121             You can use this hook e.g. to print output if you buffer the output.
1122              
1123             * on_end
1124              
1125             Called when utility is about to exit. You can use this hook e.g. to return the
1126             final result.
1127              
1128              
1129             *THE STASH*
1130              
1131             The common keys that `r` will contain:
1132              
1133             - `gen_args`, hash. The arguments used to generate the CSV utility.
1134              
1135             - `util_args`, hash. The arguments that your CSV utility accepts. Parsed from
1136             command-line arguments (or configuration files, or environment variables).
1137              
1138             - `name`, str. The name of the CSV utility. Which can also be retrieved via
1139             `gen_args`.
1140              
1141             - `code_print`, coderef. Routine provided for you to print something. Accepts a
1142             string. Takes care of opening the output files for you.
1143              
1144             - `code_print_row`, coderef. Routine provided for you to print a data row. You
1145             pass the row (either arrayref or hashref). Takes care of opening the output
1146             files for you, as well as printing header row the first time, if needed.
1147              
1148             - `code_print_header_row`, coderef. Routine provided for you to print header
1149             row. You don't need to pass any arguments. Will only print the header row once
1150             per output file if output header is enabled, even if called multiple times.
1151              
1152             If you are accepting CSV data (`reads_csv` gen argument set to true), the
1153             following keys will also be available (in `on_input_header_row` and
1154             `on_input_data_row` hooks):
1155              
1156             - `input_parser`, a <pm:Text::CSV_XS> instance for input parsing.
1157              
1158             - `input_fields`, array of str. Input CSV's field names.
1159              
1160             - `input_fields_idx`, hash with field name as keys and field index (0-based
1161             integer) as values.
1162              
1163             - `input_filenames`, array of str.
1164              
1165             - `input_filename`, str. The name of the current input file being read (`-` if
1166             reading from stdin).
1167              
1168             - `input_filenum`, uint. The number of the current input file, 1 being the first
1169             file, 2 for the second, and so on.
1170              
1171             - `input_fh`, the handle to the current file being read.
1172              
1173             - `input_rownum`, uint. The number of rows that have been read (reset after each
1174             input file). In `on_input_header_row` phase, this will be 1 since header row
1175             (including the generated one) is the first row. Then in `on_input_data_row`
1176             phase (called the first time for a file), it will be 2 for the first data row,
1177             even if physically it is the first row for CSV file that does not have a
1178             header.
1179              
1180             - `input_data_rownum`, uint. The number of data rows that have been read (reset
1181             after each input file). This will be equal to `input_rownum` less 1 if input
1182             file has header.
1183              
1184             - `input_row`, aos (array of str). The current input CSV row as an arrayref.
1185              
1186             - `input_row_as_hashref`, hos (hash of str). The current input CSV row as a
1187             hashref, with field names as hash keys and field values as hash values. This
1188             will only be calculated if utility wants it. Utility can express so by setting
1189             `$r->{wants_input_row_as_hashref}` to true, e.g. in the `on_begin` hook.
1190              
1191             - `input_header_row_count`, uint. Contains the number of actual header rows that
1192             have been read. If CLI user specifies `--no-input-header`, this will stay at
1193             zero. Will be reset for each CSV file.
1194              
1195             - `input_data_row_count`, int. Contains the number of actual data rows that have
1196             read. Will be reset for each CSV file.
1197              
1198             If you are outputting CSV (`writes_csv` gen argument set to true), the following
1199             keys will be available:
1200              
1201             - `output_emitter`, a <pm:Text::CSV_XS> instance for output.
1202              
1203             - `output_fields`, array of str. Should be set to list of output field names. If
1204             unset, will be set to be the same as `input_fields`.
1205              
1206             - `output_fields_idx`, hash with field names as keys and field indexes (0-based
1207             integer) as values. Normally you do not need to set this manually; you just
1208             need to set `output_fields` and this hash will be computed automatically for
1209             you just before the first output row is outputted.
1210              
1211             - `output_filenames`, array of str.
1212              
1213             - `output_filename`, str, name of current output file.
1214              
1215             - `output_filenum`, uint, the number of the current output file, 1 being the
1216             first file, 2 for the second, and so on.
1217              
1218             - `output_fh`, handle to the current output file.
1219              
1220             - `output_rownum`, uint. The number of rows that have been outputted (reset
1221             after each output file).
1222              
1223             - `output_data_rownum`, uint. The number of data rows that have been outputted
1224             (reset after each output file). This will be equal to `input_rownum` less 1 if
1225             input file has header.
1226              
1227             For other hook-specific keys, see the documentation for associated hook point.
1228              
1229              
1230             *ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS*
1231              
1232             As mentioned above, you will get additional command-line options/arguments in
1233             `$r->{util_args}` hashref. Some options/arguments are already added by
1234             `gen_csv_util`, e.g. `input_filename` or `input_filenames` along with
1235             `input_sep_char`, etc (when your utility declares `reads_csv`),
1236             `output_filename` or `output_filenames` along with `overwrite`,
1237             `output_sep_char`, etc (when your utility declares `writes_csv`).
1238              
1239             If you want to accept additional arguments/options, you specify them in
1240             `add_args` (hashref, with key being Each option/argument has to be specified
1241             first via `add_args` (as hashref, with key being argument name and value the
1242             argument specification as defined in <pm:Rinci::function>)). Some argument
1243             specifications have been defined in <pm:App::CSVUtils> and can be used. See
1244             existing utilities for examples.
1245              
1246              
1247             *READING CSV DATA*
1248              
1249             To read CSV data, normally your utility would provide handler for the
1250             `on_input_data_row` hook and sometimes additionally `on_input_header_row`.
1251              
1252              
1253             *OUTPUTTING STRING OR RETURNING RESULT*
1254              
1255             To output string, usually you call the provided routine `$r->{code_print}`. This
1256             routine will open the output files for you.
1257              
1258             You can also return enveloped result directly by setting `$r->{result}`.
1259              
1260              
1261             *OUTPUTTING CSV DATA*
1262              
1263             To output CSV data, usually you call the provided routine `$r->{code_print_row}`.
1264             This routine accepts a row (arrayref or hashref). This routine will open the
1265             output files for you when needed, as well as print header row automatically.
1266              
1267             You can also buffer rows from input to e.g. `$r->{output_rows}`, then call
1268             `$r->{code_print_row}` repeatedly in the `after_read_input` hook to print all the
1269             rows.
1270              
1271              
1272             *READING MULTIPLE CSV FILES*
1273              
1274             To read multiple CSV files, you first specify `reads_multiple_csv`. Then, you
1275             can supply handler for `on_input_header_row` and `on_input_data_row` as usual.
1276             If you want to do something before/after each input file, you can also supply
1277             handler for `before_open_input_file` or `after_close_input_file`.
1278              
1279              
1280             *WRITING TO MULTIPLE CSV FILES*
1281              
1282             Similarly, to write to many CSv files, you first specify `writes_multiple_csv`.
1283             Then, you can supply handler for `on_input_header_row` and `on_input_data_row`
1284             as usual. To switch to the next file, set
1285             `$r->{wants_switch_to_next_output_file}` to true, in which case the next call to
1286             `$r->{code_print_row}` will close the current file and open the next file.
1287              
1288              
1289             *CHANGING THE OUTPUT FIELDS*
1290              
1291             When calling `$r->{code_print_row}`, you can output whatever fields you want. By
1292             convention, you can set `$r->{output_fields}` and `$r->{output_fields_idx}` to
1293             let other handlers know about the output fields. For example, see the
1294             implementation of <prog:csv-concat>.
1295              
1296             _
1297             args => {
1298             name => {
1299             schema => 'perl::identifier::unqualified_ascii*',
1300             req => 1,
1301             tags => ['category:metadata'],
1302             },
1303             summary => {
1304             schema => 'str*',
1305             tags => ['category:metadata'],
1306             },
1307             description => {
1308             schema => 'str*',
1309             tags => ['category:metadata'],
1310             },
1311             links => {
1312             schema => ['array*', of=>'hash*'], # XXX defhashes
1313             tags => ['category:metadata'],
1314             },
1315             examples => {
1316             schema => ['array*'], # defhashes
1317             tags => ['category:metadata'],
1318             },
1319             add_meta_props => {
1320             summary => 'Add additional Rinci function metadata properties',
1321             schema => ['hash*'],
1322             tags => ['category:metadata'],
1323             },
1324             add_args => {
1325             schema => ['hash*'],
1326             tags => ['category:metadata'],
1327             },
1328             add_args_rels => {
1329             schema => ['hash*'],
1330             tags => ['category:metadata'],
1331             },
1332              
1333             reads_csv => {
1334             summary => 'Whether utility reads CSV data',
1335             'summary.alt.bool.not' => 'Specify that utility does not read CSV data',
1336             schema => 'bool*',
1337             default => 1,
1338             },
1339             reads_multiple_csv => {
1340             summary => 'Whether utility accepts CSV data',
1341             schema => 'bool*',
1342             description => <<'_',
1343              
1344             Setting this option to true will implicitly set the `reads_csv` option to true,
1345             obviously.
1346              
1347             _
1348             },
1349             writes_csv => {
1350             summary => 'Whether utility writes CSV data',
1351             'summary.alt.bool.not' => 'Specify that utility does not write CSV data',
1352             schema => 'bool*',
1353             default => 1,
1354             },
1355             writes_multiple_csv => {
1356             summary => 'Whether utility outputs CSV data',
1357             schema => 'bool*',
1358             description => <<'_',
1359              
1360             Setting this option to true will implicitly set the `writes_csv` option to true,
1361             obviously.
1362              
1363             _
1364             },
1365              
1366             on_begin => {
1367             schema => 'code*',
1368             },
1369             before_read_input => {
1370             schema => 'code*',
1371             },
1372             before_open_input_files => {
1373             schema => 'code*',
1374             },
1375             before_open_input_file => {
1376             schema => 'code*',
1377             },
1378             on_input_header_row => {
1379             schema => 'code*',
1380             },
1381             on_input_data_row => {
1382             schema => 'code*',
1383             },
1384             after_close_input_file => {
1385             schema => 'code*',
1386             },
1387             after_close_input_files => {
1388             schema => 'code*',
1389             },
1390             after_read_input => {
1391             schema => 'code*',
1392             },
1393             on_end => {
1394             schema => 'code*',
1395             },
1396             },
1397             result_naked => 1,
1398             result => {
1399             schema => 'bool*',
1400             },
1401             };
1402             sub gen_csv_util {
1403 25     25 1 204 my %gen_args = @_;
1404              
1405 25 50       470 my $name = delete($gen_args{name}) or die "Please specify name";
1406 25   50     89 my $summary = delete($gen_args{summary}) // '(No summary)';
1407 25   100     92 my $description = delete($gen_args{description}) // '(No description)';
1408 25   100     108 my $links = delete($gen_args{links}) // [];
1409 25   100     83 my $examples = delete($gen_args{examples}) // [];
1410 25         46 my $add_meta_props = delete $gen_args{add_meta_props};
1411 25         44 my $add_args = delete $gen_args{add_args};
1412 25         47 my $add_args_rels = delete $gen_args{add_args_rels};
1413 25         39 my $reads_multiple_csv = delete($gen_args{reads_multiple_csv});
1414 25   50     84 my $reads_csv = delete($gen_args{reads_csv}) // 1;
1415 25   50     53 my $tags = [ @{ delete($gen_args{tags}) // [] } ];
  25         90  
1416 25 100       62 $reads_csv = 1 if $reads_multiple_csv;
1417 25         36 my $writes_multiple_csv = delete($gen_args{writes_multiple_csv});
1418 25   100     83 my $writes_csv = delete($gen_args{writes_csv}) // 1;
1419 25 50       68 $writes_csv = 1 if $writes_multiple_csv;
1420 25         48 my $on_begin = delete $gen_args{on_begin};
1421 25         39 my $before_read_input = delete $gen_args{before_read_input};
1422 25         43 my $before_open_input_files = delete $gen_args{before_open_input_files};
1423 25         42 my $before_open_input_file = delete $gen_args{before_open_input_file};
1424 25         33 my $on_input_header_row = delete $gen_args{on_input_header_row};
1425 25         39 my $on_input_data_row = delete $gen_args{on_input_data_row};
1426 25         37 my $after_close_input_file = delete $gen_args{after_close_input_file};
1427 25         36 my $after_close_input_files = delete $gen_args{after_close_input_files};
1428 25         40 my $after_read_input = delete $gen_args{after_read_input};
1429 25         47 my $on_end = delete $gen_args{on_end};
1430              
1431 25 50       77 scalar(keys %gen_args) and die "Unknown argument(s): ".join(", ", keys %gen_args);
1432              
1433 25         41 my $code;
1434             CREATE_CODE: {
1435 25         39 $code = sub {
1436 119     119   302386 my %util_args = @_;
1437              
1438 119   100     576 my $has_header = $util_args{input_header} // 1;
1439 119   66     381 my $outputs_header = $util_args{output_header} // $has_header;
1440              
1441 119         398 my $r = {
1442             gen_args => \%gen_args,
1443             util_args => \%util_args,
1444             name => $name,
1445             };
1446              
1447             # inside the main eval block, we call hook handlers. A handler can
1448             # throw an exception (which can be a string or an enveloped response
1449             # like [500, "some error message"], see Rinci::function). we trap
1450             # the exception so we can return the appropriate enveloped response.
1451             MAIN_EVAL:
1452 119         211 eval {
1453              
1454             # do some checking
1455 119 50 33     342 if ($util_args{inplace} && (!$reads_csv || !$writes_csv)) {
      66        
1456 0         0 die [412, "--inplace cannot be specified when we do not read & write CSV"];
1457             }
1458              
1459 119 100       251 if ($on_begin) {
1460 37         122 log_trace "[csvutil] Calling on_begin hook handler ...";
1461 37         150 $on_begin->($r);
1462             }
1463              
1464             my $code_open_file = sub {
1465             # set output filenames, if not yet
1466 211 100   211   414 unless ($r->{output_filenames}) {
1467 79         114 my @output_filenames;
1468 79 100       188 if ($util_args{inplace}) {
    50          
1469 2         3 for my $input_filename (@{ $r->{input_filenames} }) {
  2         5  
1470 2         4 my $output_filename;
1471 2         3 while (1) {
1472 2         10 $output_filename = $input_filename . "." . _randext(5);
1473 2 50       56 last unless -e $output_filename;
1474             }
1475 2         20 push @output_filenames, $output_filename;
1476             }
1477             } elsif ($writes_multiple_csv) {
1478 0   0     0 @output_filenames = @{ $util_args{output_filenames} // ['-'] };
  0         0  
1479             } else {
1480 77   50     340 @output_filenames = ($util_args{output_filename} // '-');
1481             }
1482              
1483             CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME: {
1484 79         129 my %seen_output_abs_path; # key = output filename
  79         101  
1485 79 100 66     255 last unless $reads_csv && $writes_csv;
1486 76         104 for my $input_filename (@{ $r->{input_filenames} }) {
  76         157  
1487 78 50       156 next if $input_filename eq '-';
1488 78         2231 my $input_abs_path = Cwd::abs_path($input_filename);
1489 78 50       269 die [500, "Can't get absolute path of input filename '$input_filename'"] unless $input_abs_path;
1490 78         174 for my $output_filename (@output_filenames) {
1491 78 100       269 next if $output_filename eq '-';
1492 2 50       5 next if $seen_output_abs_path{$output_filename};
1493 2         41 my $output_abs_path = Cwd::abs_path($output_filename);
1494 2 50       8 die [500, "Can't get absolute path of output filename '$output_filename'"] unless $output_abs_path;
1495 2 0       8 die [412, "Cannot set output filename to '$output_filename' ".
    50          
1496             ($output_filename ne $output_abs_path ? "($output_abs_path) ":"").
1497             "because it is the same as input filename and input will be clobbered; use --inplace to avoid clobbering<"]
1498             if $output_abs_path eq $input_abs_path;
1499             }
1500             }
1501             } # CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME
1502              
1503 79         211 $r->{output_filenames} = \@output_filenames;
1504 79   50     369 $r->{output_num_of_files} //= scalar(@output_filenames);
1505             } # set output filenames
1506              
1507             # open the next file, if not yet
1508 211 100 66     890 if (!$r->{output_fh} || $r->{wants_switch_to_next_output_file}) {
1509 79   50     368 $r->{output_filenum} //= 0;
1510 79         107 $r->{output_filenum}++;
1511              
1512 79         233 $r->{output_rownum} = 0;
1513 79         142 $r->{output_data_rownum} = 0;
1514              
1515             # close the previous file, if any
1516 79 50 33     187 if ($r->{output_fh} && $r->{output_filename} ne '-') {
1517 0         0 log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1518 0 0       0 close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1519 0         0 delete $r->{has_printed_header};
1520 0         0 delete $r->{wants_switch_to_next_output_file};
1521             }
1522              
1523             # we have exhausted all the files, do nothing & return
1524 79 50       113 return if $r->{output_filenum} > @{ $r->{output_filenames} };
  79         267  
1525              
1526 79         195 $r->{output_filename} = $r->{output_filenames}[ $r->{output_filenum}-1 ];
1527             log_info "[csvutil] [%d/%s] Opening output file %s ...",
1528 79         305 $r->{output_filenum}, $r->{output_num_of_files}, $r->{output_filename};
1529 79 100       287 if ($r->{output_filename} eq '-') {
1530 77         190 $r->{output_fh} = \*STDOUT;
1531             } else {
1532 2 50       21 if (-f $r->{output_filename}) {
1533 0 0       0 if ($r->{util_args}{overwrite}) {
1534 0         0 log_info "[csvutil] Will be overwriting output file %s", $r->{output_filename};
1535             } else {
1536 0         0 die [412, "Refusing to overwrite existing output file '$r->{output_filename}', choose another name or use --overwrite (-O)"];
1537             }
1538             }
1539 2         11 my ($fh, $err) = _open_file_write($r->{output_filename});
1540 2 50       9 die $err if $err;
1541 2         6 $r->{output_fh} = $fh;
1542             }
1543             } # open the next file
1544 119         742 }; # code_open_file
1545              
1546             my $code_print = sub {
1547 9     9   15 my $str = shift;
1548 9         23 $code_open_file->();
1549 9         14 print { $r->{output_fh} } $str;
  9         275  
1550 119         365 }; # code_print
1551 119         268 $r->{code_print} = $code_print;
1552              
1553 119 100       269 if ($writes_csv) {
1554 94         219 my $output_emitter = _instantiate_emitter(\%util_args);
1555 94         11219 $r->{output_emitter} = $output_emitter;
1556 94         206 $r->{has_printed_header} = 0;
1557              
1558             my $code_print_header_row = sub {
1559             # set output fields, if not yet
1560 202 100   202   492 unless ($r->{output_fields}) {
1561             # by default, use the
1562 24         55 $r->{output_fields} = $r->{input_fields};
1563             }
1564              
1565             # index the output fields, if not yet
1566 202 100       424 unless ($r->{output_fields_idx}) {
1567 75         187 $r->{output_fields_idx} = {};
1568 75         107 for my $j (0 .. $#{ $r->{output_fields} }) {
  75         210  
1569 206         430 $r->{output_fields_idx}{ $r->{output_fields}[$j] } = $j;
1570             }
1571             }
1572              
1573 202         473 $code_open_file->();
1574              
1575             # print header line, if not yet
1576 202 100 100     682 if ($outputs_header && !$r->{has_printed_header}) {
1577 74         110 $r->{has_printed_header}++;
1578 74         1388 $r->{output_emitter}->print($r->{output_fh}, $r->{output_fields});
1579 74         3880 print { $r->{output_fh} } "\n";
  74         969  
1580 74         288 $r->{output_rownum}++;
1581             }
1582 94         419 };
1583 94         177 $r->{code_print_header_row} = $code_print_header_row;
1584              
1585             my $code_print_row = sub {
1586 202     202   337 my $row = shift;
1587              
1588 202         490 $code_print_header_row->();
1589              
1590             # print data line
1591 202 50       506 if ($row) {
1592 202 50       504 if (ref $row eq 'HASH') {
1593 0         0 my $row0 = $row;
1594 0         0 $row = [];
1595 0         0 for my $j (0 .. $#{ $r->{output_fields} }) {
  0         0  
1596 0   0     0 $row->[$j] = $row0->{ $r->{output_fields}[$j] } // '';
1597             }
1598             }
1599 202         1376 $r->{output_emitter}->print( $r->{output_fh}, $row );
1600 202         4434 print { $r->{output_fh} } "\n";
  202         2113  
1601 202         618 $r->{output_rownum}++;
1602 202         1077 $r->{output_data_rownum}++;
1603             }
1604 94         288 }; # code_print_row
1605 94         230 $r->{code_print_row} = $code_print_row;
1606             } # if outputs csv
1607              
1608 119 50       269 if ($before_read_input) {
1609 0         0 log_trace "[csvutil] Calling before_read_input handler ...";
1610 0         0 $before_read_input->($r);
1611             }
1612              
1613             READ_CSV: {
1614 119 50       178 last unless $reads_csv;
  119         235  
1615              
1616 119         257 my $input_parser = _instantiate_parser(\%util_args, 'input_');
1617 119         13022 $r->{input_parser} = $input_parser;
1618              
1619 119         206 my @input_filenames;
1620 119 100       266 if ($reads_multiple_csv) {
1621 18   50     32 @input_filenames = @{ $util_args{input_filenames} // ['-'] };
  18         74  
1622             } else {
1623 101   50     334 @input_filenames = ($util_args{input_filename} // '-');
1624             }
1625 119   50     552 $r->{input_filenames} //= \@input_filenames;
1626              
1627             BEFORE_INPUT_FILENAME:
1628 119         221 $r->{input_filenum} = 0;
1629              
1630             INPUT_FILENAME:
1631 119         223 for my $input_filename (@input_filenames) {
1632 138         218 $r->{input_filenum}++;
1633 138         269 $r->{input_filename} = $input_filename;
1634              
1635 138 100 100     478 if ($r->{input_filenum} == 1 && $before_open_input_files) {
1636 1         6 log_trace "[csvutil] Calling before_open_input_files handler ...";
1637 1         7 $before_open_input_files->($r);
1638 1 50       4 if (delete $r->{wants_skip_files}) {
1639 0         0 log_trace "[csvutil] Handler wants to skip files, skipping all input files";
1640 0         0 last READ_CSV;
1641             }
1642             }
1643              
1644 138 50       311 if ($before_open_input_file) {
1645 0         0 log_trace "[csvutil] Calling before_open_input_file handler ...";
1646 0         0 $before_open_input_file->($r);
1647 0 0       0 if (delete $r->{wants_skip_file}) {
    0          
1648 0         0 log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1649 0         0 next INPUT_FILENAME;
1650             } elsif (delete $r->{wants_skip_files}) {
1651 0         0 log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1652 0         0 last READ_CSV;
1653             }
1654             }
1655              
1656             log_info "[csvutil] [file %d/%d] Reading input file %s ...",
1657 138         495 $r->{input_filenum}, scalar(@input_filenames), $input_filename;
1658 138         470 my ($fh, $err) = _open_file_read($input_filename);
1659 138 50       331 die $err if $err;
1660 138         523 $r->{input_fh} = $r->{input_fhs}[ $r->{input_filenum}-1 ] = $fh;
1661              
1662 138         193 my $i;
1663 138         224 $r->{input_header_row_count} = 0;
1664 138         365 $r->{input_data_row_count} = 0;
1665 138         255 $r->{input_fields} = []; # array, field names in order
1666 138         258 $r->{input_field_idxs} = {}; # key=field name, value=index (0-based)
1667 138         207 my $row0;
1668             my $code_getline = sub {
1669 661 50 33 661   3310 if ($r->{stdin_input_fields} && $r->{input_filename} eq '-') {
    100 100        
    100 100        
1670 0 0       0 if ($i == 0) {
1671             # we have read the header for stdin. since
1672             # we can't seek to the beginning, we return
1673             # the saved fields
1674 0         0 $r->{input_header_row_count}++;
1675 0         0 return $r->{stdin_input_fields};
1676             } else {
1677 0         0 my $row = $input_parser->getline($r->{input_fh});
1678 0 0       0 $r->{input_data_row_count}++ if $row;
1679 0         0 return $row;
1680             }
1681             } elsif ($i == 0 && !$has_header) {
1682             # this is the first line of a file and user
1683             # specifies there is no input header. we save
1684             # the line and return the generated field names
1685             # instead.
1686 4         165 $row0 = $input_parser->getline($r->{input_fh});
1687 4 50       254 return unless $row0;
1688 4         14 return [map { "field$_" } 1..@$row0];
  12         53  
1689             } elsif ($i == 1 && !$has_header) {
1690             # we return the saved first line
1691 4 50       12 $r->{input_data_row_count}++ if $row0;
1692 4         12 return $row0;
1693             }
1694 653         17028 my $res = $input_parser->getline($r->{input_fh});
1695 653 100       23844 if ($res) {
1696 537 100       1191 $r->{input_header_row_count}++ if $i==0;
1697 537 100       1121 $r->{input_data_row_count}++ if $i;
1698             }
1699 653         1883 $res;
1700 138         712 };
1701 138         363 $r->{code_getline} = $code_getline;
1702              
1703 138         218 $i = 0;
1704 138         260 while ($r->{input_row} = $code_getline->()) {
1705 545         733 $i++;
1706 545         867 $r->{input_rownum} = $i;
1707 545 100       1124 $r->{input_data_rownum} = $has_header ? $i-1 : $i;
1708 545 100       983 if ($i == 1) {
1709             # gather the list of fields
1710 138         235 $r->{input_fields} = $r->{input_row};
1711 138 50 0     346 $r->{stdin_input_fields} //= $r->{input_row} if $input_filename eq '-';
1712 138         226 $r->{orig_input_fields} = $r->{input_fields};
1713 138         304 $r->{input_fields_idx} = {};
1714 138         199 for my $j (0 .. $#{ $r->{input_fields} }) {
  138         443  
1715 370         893 $r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1716             }
1717              
1718 138 100       281 if ($on_input_header_row) {
1719 124         436 log_trace "[csvutil] Calling on_input_header_row hook handler ...";
1720 124         545 $on_input_header_row->($r);
1721              
1722 115 100       422 if (delete $r->{wants_skip_file}) {
    100          
1723 3         10 log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1724 3         13 next INPUT_FILENAME;
1725             } elsif (delete $r->{wants_skip_files}) {
1726 1         8 log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1727 1         4 last READ_CSV;
1728             }
1729             }
1730              
1731             # reindex the fields, should the above hook
1732             # handler adds/removes fields. let's save the
1733             # old fields_idx to orig_fields_idx.
1734 125         219 $r->{orig_input_fields_idx} = $r->{input_fields_idx};
1735 125         192 $r->{input_fields_idx} = {};
1736 125         252 for my $j (0 .. $#{ $r->{input_fields} }) {
  125         307  
1737 335         802 $r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1738             }
1739              
1740             } else {
1741             # fill up the elements of row to the number of
1742             # fields, in case the row contains sparse values
1743 407 50 33     928 unless (defined $r->{wants_fill_rows} && !$r->{wants_fill_rows}) {
1744 407 100       528 if (@{ $r->{input_row} } < @{ $r->{input_fields} }) {
  407         640  
  407         813  
1745 3         7 splice @{ $r->{input_row} }, scalar(@{ $r->{input_row} }), 0, (("") x (@{ $r->{input_fields} } - @{ $r->{input_row} }));
  3         6  
  3         4  
  3         7  
  3         8  
1746             }
1747             }
1748              
1749             # generate the hashref version of row if utility
1750             # requires it
1751 407 100       804 if ($r->{wants_input_row_as_hashref}) {
1752 34         71 $r->{input_row_as_hashref} = {};
1753 34         55 for my $j (0 .. $#{ $r->{input_row} }) {
  34         84  
1754             # ignore extraneous data fields
1755 93 50       122 last if $j >= @{ $r->{input_fields} };
  93         171  
1756 93         231 $r->{input_row_as_hashref}{ $r->{input_fields}[$j] } = $r->{input_row}[$j];
1757             }
1758             }
1759              
1760 407 50       804 if ($on_input_data_row) {
1761 407 100       975 log_trace "[csvutil] Calling on_input_data_row hook handler (for first data row) ..." if $r->{input_rownum} <= 2;
1762 407         1241 $on_input_data_row->($r);
1763              
1764 402 100       1405 if (delete $r->{wants_skip_file}) {
    100          
1765 2         7 log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1766 2         8 next INPUT_FILENAME;
1767             } elsif (delete $r->{wants_skip_files}) {
1768 2         8 log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1769 2         9 last READ_CSV;
1770             }
1771             }
1772             }
1773              
1774             } # while getline
1775              
1776             # XXX actually close filehandle except stdin
1777              
1778 116 50       357 if ($after_close_input_file) {
1779 0         0 log_trace "[csvutil] Calling after_close_input_file handler ...";
1780 0         0 $after_close_input_file->($r);
1781 0 0       0 if (delete $r->{wants_skip_files}) {
1782 0         0 log_trace "[csvutil] Handler wants to skip reading all file, skipping";
1783 0         0 last READ_CSV;
1784             }
1785             }
1786             } # for input_filename
1787              
1788 102 100       577 if ($after_close_input_files) {
1789 32         102 log_trace "[csvutil] Calling after_close_input_files handler ...";
1790 32         127 $after_close_input_files->($r);
1791             }
1792              
1793             } # READ_CSV
1794              
1795             # cleanup stash from csv-reading-related keys
1796 105         445 delete $r->{input_filenames};
1797 105         167 delete $r->{input_filenum};
1798 105         175 delete $r->{input_filename};
1799 105         152 delete $r->{input_fh};
1800 105         157 delete $r->{input_rownum};
1801 105         174 delete $r->{input_data_rownum};
1802 105         155 delete $r->{input_row};
1803 105         158 delete $r->{input_row_as_hashref};
1804 105         193 delete $r->{input_fields};
1805 105         216 delete $r->{input_fields_idx};
1806 105         174 delete $r->{orig_input_fields_idx};
1807 105         833 delete $r->{code_getline};
1808 105         158 delete $r->{wants_input_row_as_hashref};
1809              
1810 105 100       222 if ($after_read_input) {
1811 15         64 log_trace "[csvutil] Calling after_read_input handler ...";
1812 15         60 $after_read_input->($r);
1813             }
1814              
1815             # cleanup stash from csv-outputting-related keys
1816 105         224 delete $r->{output_num_of_files};
1817 105         149 delete $r->{output_filenum};
1818 105 100       238 if ($r->{output_fh}) {
1819 79 100       229 if ($r->{output_filename} ne '-') {
1820 2         13 log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1821 2 50       134 close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1822             }
1823 79         146 delete $r->{output_fh};
1824             }
1825 105 100       244 if ($r->{util_args}{inplace}) {
1826 2   50     15 my $output_filenum = $r->{output_filenum} // 0;
1827 2         4 my $i = -1;
1828 2         4 for my $output_filename (@{ $r->{output_filenames} }) {
  2         7  
1829 2         3 $i++;
1830 2 50       7 last if $i > $output_filenum;
1831 2 50       24 (my $input_filename = $output_filename) =~ s/\.\w{5}\z//
1832             or die [500, "BUG: Can't get original input file '$output_filename'"];
1833 2 100       10 if (length(my $ext = $r->{util_args}{inplace_backup_ext})) {
1834 1         7 my $backup_filename = $input_filename . $ext;
1835 1         7 log_info "[csvutil] Backing up input file '$output_filename' -> '$backup_filename' ...";
1836 1 50       43 rename $input_filename, $backup_filename or die [500, "Can't rename '$input_filename' -> '$backup_filename': $!"];
1837             }
1838 2         14 log_info "[csvutil] Renaming from temporary output file '$output_filename' -> '$input_filename' ...";
1839 2 50       179 rename $output_filename, $input_filename or die [500, "Can't rename back '$output_filename' -> '$input_filename': $!"];
1840             }
1841             }
1842 105         164 delete $r->{output_filenames};
1843 105         153 delete $r->{output_filename};
1844 105         147 delete $r->{output_rownum};
1845 105         136 delete $r->{output_data_rownum};
1846 105         166 delete $r->{code_print};
1847 105         478 delete $r->{code_print_row};
1848 105         403 delete $r->{code_print_header_row};
1849 105         150 delete $r->{has_printed_header};
1850 105         138 delete $r->{wants_switch_to_next_output_file};
1851              
1852 105 100       1654 if ($on_end) {
1853 11         36 log_trace "[csvutil] Calling on_end hook handler ...";
1854 11         49 $on_end->($r);
1855             }
1856              
1857             }; # MAIN_EVAL
1858              
1859 119         255 my $err = $@;
1860 119 100       240 if ($err) {
1861 14 50       45 $err = [500, $err] unless ref $err;
1862 14         49 return $err;
1863             }
1864              
1865             RETURN_RESULT:
1866 105 100       302 if (!$r->{result}) {
    50          
    50          
1867 91         229 $r->{result} = [200];
1868             } elsif (!ref($r->{result})) {
1869 0         0 $r->{result} = [500, "BUG: Result (r->{result}) is set to a non-reference ($r->{result}), probably by one of the handlers"];
1870             } elsif (ref($r->{result}) ne 'ARRAY') {
1871 0         0 $r->{result} = [500, "BUG: Result (r->{result}) is not set to an enveloped result (arrayref) ($r->{result}), probably by one of the handlers"];
1872             }
1873 105         3490 $r->{result};
1874 25         290 };
1875             } # CREATE_CODE
1876              
1877 25         51 my $meta;
1878             CREATE_META: {
1879              
1880 25         545 $meta = {
1881             v => 1.1,
1882             summary => $summary,
1883             description => $description,
1884             args => {},
1885             args_rels => {},
1886             links => $links,
1887             examples => $examples,
1888             tags => $tags,
1889             };
1890              
1891             CREATE_ARGS_PROP: {
1892 25 50       46 if ($add_args) {
  25         89  
1893 25         167 $meta->{args}{$_} = $add_args->{$_} for keys %$add_args;
1894             }
1895              
1896 25 50       76 if ($reads_csv) {
1897 25         111 $meta->{args}{$_} = {%{$argspecs_csv_input{$_}}} for keys %argspecs_csv_input;
  125         557  
1898              
1899 25 100       66 if ($reads_multiple_csv) {
1900 3         6 $meta->{args}{input_filenames} = {%{$argspecopt_input_filenames{input_filenames}}};
  3         23  
1901 3         16 _add_arg_pos($meta->{args}, 'input_filenames', 'slurpy');
1902 3         9 push @$tags, 'reads-multiple-csv';
1903             } else {
1904 22         36 $meta->{args}{input_filename} = {%{$argspecopt_input_filename{input_filename}}};
  22         118  
1905 22         77 _add_arg_pos($meta->{args}, 'input_filename');
1906             }
1907              
1908 25         68 push @$tags, 'reads-csv';
1909             } # if reads_csv
1910              
1911 25 100       82 if ($writes_csv) {
1912 17         66 $meta->{args}{$_} = {%{$argspecs_csv_output{$_}}} for keys %argspecs_csv_output;
  119         499  
1913              
1914 17 50       50 if ($reads_csv) {
1915 17         56 $meta->{args}{$_} = {%{$argspecsopt_inplace{$_}}} for keys %argspecsopt_inplace;
  34         185  
1916 17   50     139 $meta->{args_rels}{'dep_all&'} //= [];
1917 17         34 push @{ $meta->{args_rels}{'dep_all&'} }, ['inplace_backup_ext', ['inplace']];
  17         64  
1918 17   50     79 $meta->{args_rels}{'choose_one&'} //= [];
1919 17         29 push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filename'];
  17         50  
1920 17         28 push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filenames'];
  17         45  
1921             }
1922              
1923 17 50       48 if ($writes_multiple_csv) {
1924 0         0 $meta->{args}{output_filenames} = {%{$argspecopt_output_filenames{output_filenames}}};
  0         0  
1925 0         0 _add_arg_pos($meta->{args}, 'output_filenames', 'slurpy');
1926 0 0       0 if ($reads_csv) {
1927 0   0     0 $meta->{args_rels}{'choose_one&'} //= [];
1928 0         0 push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filenames inplace/];
  0         0  
1929             }
1930 0         0 push @$tags, 'writes-multiple-csv';
1931             } else {
1932 17         29 $meta->{args}{output_filename} = {%{$argspecopt_output_filename{output_filename}}};
  17         81  
1933 17         75 _add_arg_pos($meta->{args}, 'output_filename');
1934 17 50       56 if ($reads_csv) {
1935 17   50     47 $meta->{args_rels}{'choose_one&'} //= [];
1936 17         30 push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filename inplace/];
  17         88  
1937             }
1938             }
1939              
1940 17         23 $meta->{args}{overwrite} = {%{$argspecopt_overwrite{overwrite}}};
  17         79  
1941 17   50     98 $meta->{args_rels}{'dep_any&'} //= [];
1942 17         54 push @{ $meta->{args_rels}{'dep_any&'} }, ['overwrite', ['output_filename', 'output_filenames']];
  17         67  
1943              
1944 17         51 push @$tags, 'writes-csv';
1945             } # if writes csv
1946              
1947             } # CREATE_ARGS_PROP
1948              
1949             CREATE_ARGS_RELS_PROP: {
1950 25         42 $meta->{args_rels} = {};
  25         81  
1951 25 100       59 if ($add_args_rels) {
1952 3         17 $meta->{args_rels}{$_} = $add_args_rels->{$_} for keys %$add_args_rels;
1953             }
1954             } # CREATE_ARGS_RELS_PROP
1955              
1956 25 50       74 if ($add_meta_props) {
1957 0         0 $meta->{$_} = $add_meta_props->{$_} for keys %$add_meta_props;
1958             }
1959              
1960             } # CREATE_META
1961              
1962             {
1963 25         41 my $package = caller();
  25         42  
  25         96  
1964 1     1   9 no strict 'refs'; ## no critic: TestingAndDebugging::ProhibitNoStrict
  1         3  
  1         336  
1965 25         508 *{"$package\::$name"} = $code;
  25         154  
1966             #use DD; dd $meta;
1967 25         41 ${"$package\::SPEC"}{$name} = $meta;
  25         150  
1968             }
1969              
1970 25         89 1;
1971             }
1972              
1973             1;
1974             # ABSTRACT: CLI utilities related to CSV
1975              
1976             __END__
1977              
1978             =pod
1979              
1980             =encoding UTF-8
1981              
1982             =head1 NAME
1983              
1984             App::CSVUtils - CLI utilities related to CSV
1985              
1986             =head1 VERSION
1987              
1988             This document describes version 1.030 of App::CSVUtils (from Perl distribution App-CSVUtils), released on 2023-07-25.
1989              
1990             =head1 DESCRIPTION
1991              
1992             This distribution contains the following CLI utilities:
1993              
1994             =over
1995              
1996             =item 1. L<csv-add-fields>
1997              
1998             =item 2. L<csv-avg>
1999              
2000             =item 3. L<csv-check-cell-values>
2001              
2002             =item 4. L<csv-check-field-names>
2003              
2004             =item 5. L<csv-check-field-values>
2005              
2006             =item 6. L<csv-check-rows>
2007              
2008             =item 7. L<csv-cmp>
2009              
2010             =item 8. L<csv-concat>
2011              
2012             =item 9. L<csv-convert-to-hash>
2013              
2014             =item 10. L<csv-csv>
2015              
2016             =item 11. L<csv-delete-fields>
2017              
2018             =item 12. L<csv-dump>
2019              
2020             =item 13. L<csv-each-row>
2021              
2022             =item 14. L<csv-fill-cells>
2023              
2024             =item 15. L<csv-fill-template>
2025              
2026             =item 16. L<csv-find-values>
2027              
2028             =item 17. L<csv-freqtable>
2029              
2030             =item 18. L<csv-gen>
2031              
2032             =item 19. L<csv-get-cells>
2033              
2034             =item 20. L<csv-grep>
2035              
2036             =item 21. L<csv-info>
2037              
2038             =item 22. L<csv-intrange>
2039              
2040             =item 23. L<csv-list-field-names>
2041              
2042             =item 24. L<csv-lookup-fields>
2043              
2044             =item 25. L<csv-ltrim>
2045              
2046             =item 26. L<csv-map>
2047              
2048             =item 27. L<csv-munge-field>
2049              
2050             =item 28. L<csv-munge-rows>
2051              
2052             =item 29. L<csv-pick>
2053              
2054             =item 30. L<csv-pick-cells>
2055              
2056             =item 31. L<csv-pick-fields>
2057              
2058             =item 32. L<csv-pick-rows>
2059              
2060             =item 33. L<csv-quote>
2061              
2062             =item 34. L<csv-replace-newline>
2063              
2064             =item 35. L<csv-rtrim>
2065              
2066             =item 36. L<csv-select-fields>
2067              
2068             =item 37. L<csv-select-rows>
2069              
2070             =item 38. L<csv-setop>
2071              
2072             =item 39. L<csv-shuf>
2073              
2074             =item 40. L<csv-shuf-fields>
2075              
2076             =item 41. L<csv-shuf-rows>
2077              
2078             =item 42. L<csv-sort>
2079              
2080             =item 43. L<csv-sort-fields>
2081              
2082             =item 44. L<csv-sort-rows>
2083              
2084             =item 45. L<csv-sorted>
2085              
2086             =item 46. L<csv-sorted-fields>
2087              
2088             =item 47. L<csv-sorted-rows>
2089              
2090             =item 48. L<csv-split>
2091              
2092             =item 49. L<csv-sum>
2093              
2094             =item 50. L<csv-transpose>
2095              
2096             =item 51. L<csv-trim>
2097              
2098             =item 52. L<csv-uniq>
2099              
2100             =item 53. L<csv-unquote>
2101              
2102             =item 54. L<csv2ltsv>
2103              
2104             =item 55. L<csv2paras>
2105              
2106             =item 56. L<csv2td>
2107              
2108             =item 57. L<csv2tsv>
2109              
2110             =item 58. L<csv2vcf>
2111              
2112             =item 59. L<list-csvutils>
2113              
2114             =item 60. L<paras2csv>
2115              
2116             =item 61. L<tsv2csv>
2117              
2118             =back
2119              
2120             =head1 FUNCTIONS
2121              
2122              
2123             =head2 gen_csv_util
2124              
2125             Usage:
2126              
2127             gen_csv_util(%args) -> bool
2128              
2129             Generate a CSV utility.
2130              
2131             This routine is used to generate a CSV utility in the form of a L<Rinci>
2132             function (code and metadata). You can then produce a CLI from the Rinci function
2133             simply using L<Perinci::CmdLine::Gen> or, if you use L<Dist::Zilla>,
2134             L<Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
2135             L<gen-pericmd-script>.
2136              
2137             Using this routine, by providing just one or a few hooks and setting some
2138             parameters like a couple of extra arguments, you will get a complete CLI with
2139             decent POD/manpage, ability to read one or multiple CSV's and write one or
2140             multiple CSV's, some command-line options to customize how the input CSV's
2141             should be parsed and how the output CSV's should be formatted and named. Your
2142             CLI also has tab completion, usage and help message, and other features.
2143              
2144             To create a CSV utility, you specify a C<name> (e.g. C<csv_dump>; must be a valid
2145             unqualified Perl identifier/function name) and optionally C<summary>,
2146             C<description>, and other metadata like C<links> or even C<add_meta_props>. Then
2147             you specify one or more of C<on_*> or C<before_*> or C<after_*> arguments to supply
2148             handlers (coderefs) for your CSV utility at various hook points.
2149              
2150             I<THE HOOKS>
2151              
2152             All code for hooks should accept a single argument C<r>. C<r> is a stash (hashref)
2153             of various data, the keys of which will depend on which hook point being called.
2154             You can also add more keys to store data or for flow control (see hook
2155             documentation below for more details).
2156              
2157             The order of the hooks, in processing chronological order:
2158              
2159             =over
2160              
2161             =item * on_begin
2162              
2163             Called when utility begins, before reading CSV. You can use this hook e.g. to
2164             process arguments, set output filenames (if you allow custom output
2165             filenames).
2166              
2167             =item * before_read_input
2168              
2169             Called before opening any input CSV file. This hook is I<still> called even if
2170             your utility sets C<reads_csv> to false.
2171              
2172             At this point, the C<input_filenames> stash key (as well as other keys like
2173             C<input_filename>, C<input_filenum>, etc) has not been set. You can use this
2174             hook e.g. to set a custom C<input_filenames>.
2175              
2176             =item * before_open_input_files
2177              
2178             Called before an input CSV file is about to be opened, including for stdin
2179             (C<->). You can use this hook e.g. to check/preprocess input file. Flow control
2180             is available by setting C<< $r-E<gt>{wants_skip_files} >> to skip reading all the input
2181             file and go directly to the C<after_read_input> hook.
2182              
2183             =item * before_open_input_file
2184              
2185             Called before an input CSV file is about to be opened, including for stdin
2186             (C<->). For the first file, called after C<before_open_input_file> hook. You can
2187             use this hook e.g. to check/preprocess input file. Flow control is available
2188             by setting C<< $r-E<gt>{wants_skip_file} >> to skip reading a single input file and go
2189             to the next file, or C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the
2190             files and go directly to the C<after_read_input> hook.
2191              
2192             =item * on_input_header_row
2193              
2194             Called when receiving header row. Will be called for every input file, and
2195             called even when user specify C<--no-input-header>, in which case the header
2196             row will be the generated C<["field1", "field2", ...]>. You can use this hook
2197             e.g. to add/remove/rearrange fields.
2198              
2199             You can set C<< $r-E<gt>{wants_fill_rows} >> to a defined false if you do not want
2200             C<< $r-E<gt>{input_rows} >> to be filled with empty string elements when it contains
2201             less than the number of fields (in case of sparse values at the end). Normally
2202             you only want to do this when you want to do checking, e.g. in
2203             L<csv-check-rows>.
2204              
2205             =item * on_input_data_row
2206              
2207             Called when receiving each data row. You can use this hook e.g. to modify the
2208             row or print output (for line-by-line transformation or filtering).
2209              
2210             =item * after_close_input_file
2211              
2212             Called after each input file is closed, including for stdin (C<->) (although
2213             for stdin, the handle is not actually closed). Flow control is possible by
2214             setting C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the files and go
2215             straight to the C<after_close_input_files> hook.
2216              
2217             =item * after_close_input_files
2218              
2219             Called after the last input file is closed, after the last
2220             C<after_close_input_file> hook, including for stdin (C<->) (although for stdin,
2221             the handle is not actually closed).
2222              
2223             =item * after_read_input
2224              
2225             Called after the last row of the last CSV file is read and the last file is
2226             closed. This hook is I<still> called, if you set C<reads_csv> option to false.
2227             At this point the stash keys related to CSV reading have all been cleared,
2228             including C<input_filenames>, C<input_filename>, C<input_fh>, etc.
2229              
2230             You can use this hook e.g. to print output if you buffer the output.
2231              
2232             =item * on_end
2233              
2234             Called when utility is about to exit. You can use this hook e.g. to return the
2235             final result.
2236              
2237             =back
2238              
2239             I<THE STASH>
2240              
2241             The common keys that C<r> will contain:
2242              
2243             =over
2244              
2245             =item * C<gen_args>, hash. The arguments used to generate the CSV utility.
2246              
2247             =item * C<util_args>, hash. The arguments that your CSV utility accepts. Parsed from
2248             command-line arguments (or configuration files, or environment variables).
2249              
2250             =item * C<name>, str. The name of the CSV utility. Which can also be retrieved via
2251             C<gen_args>.
2252              
2253             =item * C<code_print>, coderef. Routine provided for you to print something. Accepts a
2254             string. Takes care of opening the output files for you.
2255              
2256             =item * C<code_print_row>, coderef. Routine provided for you to print a data row. You
2257             pass the row (either arrayref or hashref). Takes care of opening the output
2258             files for you, as well as printing header row the first time, if needed.
2259              
2260             =item * C<code_print_header_row>, coderef. Routine provided for you to print header
2261             row. You don't need to pass any arguments. Will only print the header row once
2262             per output file if output header is enabled, even if called multiple times.
2263              
2264             =back
2265              
2266             If you are accepting CSV data (C<reads_csv> gen argument set to true), the
2267             following keys will also be available (in C<on_input_header_row> and
2268             C<on_input_data_row> hooks):
2269              
2270             =over
2271              
2272             =item * C<input_parser>, a L<Text::CSV_XS> instance for input parsing.
2273              
2274             =item * C<input_fields>, array of str. Input CSV's field names.
2275              
2276             =item * C<input_fields_idx>, hash with field name as keys and field index (0-based
2277             integer) as values.
2278              
2279             =item * C<input_filenames>, array of str.
2280              
2281             =item * C<input_filename>, str. The name of the current input file being read (C<-> if
2282             reading from stdin).
2283              
2284             =item * C<input_filenum>, uint. The number of the current input file, 1 being the first
2285             file, 2 for the second, and so on.
2286              
2287             =item * C<input_fh>, the handle to the current file being read.
2288              
2289             =item * C<input_rownum>, uint. The number of rows that have been read (reset after each
2290             input file). In C<on_input_header_row> phase, this will be 1 since header row
2291             (including the generated one) is the first row. Then in C<on_input_data_row>
2292             phase (called the first time for a file), it will be 2 for the first data row,
2293             even if physically it is the first row for CSV file that does not have a
2294             header.
2295              
2296             =item * C<input_data_rownum>, uint. The number of data rows that have been read (reset
2297             after each input file). This will be equal to C<input_rownum> less 1 if input
2298             file has header.
2299              
2300             =item * C<input_row>, aos (array of str). The current input CSV row as an arrayref.
2301              
2302             =item * C<input_row_as_hashref>, hos (hash of str). The current input CSV row as a
2303             hashref, with field names as hash keys and field values as hash values. This
2304             will only be calculated if utility wants it. Utility can express so by setting
2305             C<< $r-E<gt>{wants_input_row_as_hashref} >> to true, e.g. in the C<on_begin> hook.
2306              
2307             =item * C<input_header_row_count>, uint. Contains the number of actual header rows that
2308             have been read. If CLI user specifies C<--no-input-header>, this will stay at
2309             zero. Will be reset for each CSV file.
2310              
2311             =item * C<input_data_row_count>, int. Contains the number of actual data rows that have
2312             read. Will be reset for each CSV file.
2313              
2314             =back
2315              
2316             If you are outputting CSV (C<writes_csv> gen argument set to true), the following
2317             keys will be available:
2318              
2319             =over
2320              
2321             =item * C<output_emitter>, a L<Text::CSV_XS> instance for output.
2322              
2323             =item * C<output_fields>, array of str. Should be set to list of output field names. If
2324             unset, will be set to be the same as C<input_fields>.
2325              
2326             =item * C<output_fields_idx>, hash with field names as keys and field indexes (0-based
2327             integer) as values. Normally you do not need to set this manually; you just
2328             need to set C<output_fields> and this hash will be computed automatically for
2329             you just before the first output row is outputted.
2330              
2331             =item * C<output_filenames>, array of str.
2332              
2333             =item * C<output_filename>, str, name of current output file.
2334              
2335             =item * C<output_filenum>, uint, the number of the current output file, 1 being the
2336             first file, 2 for the second, and so on.
2337              
2338             =item * C<output_fh>, handle to the current output file.
2339              
2340             =item * C<output_rownum>, uint. The number of rows that have been outputted (reset
2341             after each output file).
2342              
2343             =item * C<output_data_rownum>, uint. The number of data rows that have been outputted
2344             (reset after each output file). This will be equal to C<input_rownum> less 1 if
2345             input file has header.
2346              
2347             =back
2348              
2349             For other hook-specific keys, see the documentation for associated hook point.
2350              
2351             I<ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS>
2352              
2353             As mentioned above, you will get additional command-line options/arguments in
2354             C<< $r-E<gt>{util_args} >> hashref. Some options/arguments are already added by
2355             C<gen_csv_util>, e.g. C<input_filename> or C<input_filenames> along with
2356             C<input_sep_char>, etc (when your utility declares C<reads_csv>),
2357             C<output_filename> or C<output_filenames> along with C<overwrite>,
2358             C<output_sep_char>, etc (when your utility declares C<writes_csv>).
2359              
2360             If you want to accept additional arguments/options, you specify them in
2361             C<add_args> (hashref, with key being Each option/argument has to be specified
2362             first via C<add_args> (as hashref, with key being argument name and value the
2363             argument specification as defined in L<Rinci::function>)). Some argument
2364             specifications have been defined in L<App::CSVUtils> and can be used. See
2365             existing utilities for examples.
2366              
2367             I<READING CSV DATA>
2368              
2369             To read CSV data, normally your utility would provide handler for the
2370             C<on_input_data_row> hook and sometimes additionally C<on_input_header_row>.
2371              
2372             I<OUTPUTTING STRING OR RETURNING RESULT>
2373              
2374             To output string, usually you call the provided routine C<< $r-E<gt>{code_print} >>. This
2375             routine will open the output files for you.
2376              
2377             You can also return enveloped result directly by setting C<< $r-E<gt>{result} >>.
2378              
2379             I<OUTPUTTING CSV DATA>
2380              
2381             To output CSV data, usually you call the provided routine C<< $r-E<gt>{code_print_row} >>.
2382             This routine accepts a row (arrayref or hashref). This routine will open the
2383             output files for you when needed, as well as print header row automatically.
2384              
2385             You can also buffer rows from input to e.g. C<< $r-E<gt>{output_rows} >>, then call
2386             C<< $r-E<gt>{code_print_row} >> repeatedly in the C<after_read_input> hook to print all the
2387             rows.
2388              
2389             I<READING MULTIPLE CSV FILES>
2390              
2391             To read multiple CSV files, you first specify C<reads_multiple_csv>. Then, you
2392             can supply handler for C<on_input_header_row> and C<on_input_data_row> as usual.
2393             If you want to do something before/after each input file, you can also supply
2394             handler for C<before_open_input_file> or C<after_close_input_file>.
2395              
2396             I<WRITING TO MULTIPLE CSV FILES>
2397              
2398             Similarly, to write to many CSv files, you first specify C<writes_multiple_csv>.
2399             Then, you can supply handler for C<on_input_header_row> and C<on_input_data_row>
2400             as usual. To switch to the next file, set
2401             C<< $r-E<gt>{wants_switch_to_next_output_file} >> to true, in which case the next call to
2402             C<< $r-E<gt>{code_print_row} >> will close the current file and open the next file.
2403              
2404             I<CHANGING THE OUTPUT FIELDS>
2405              
2406             When calling C<< $r-E<gt>{code_print_row} >>, you can output whatever fields you want. By
2407             convention, you can set C<< $r-E<gt>{output_fields} >> and C<< $r-E<gt>{output_fields_idx} >> to
2408             let other handlers know about the output fields. For example, see the
2409             implementation of L<csv-concat>.
2410              
2411             This function is not exported by default, but exportable.
2412              
2413             Arguments ('*' denotes required arguments):
2414              
2415             =over 4
2416              
2417             =item * B<add_args> => I<hash>
2418              
2419             (No description)
2420              
2421             =item * B<add_args_rels> => I<hash>
2422              
2423             (No description)
2424              
2425             =item * B<add_meta_props> => I<hash>
2426              
2427             Add additional Rinci function metadata properties.
2428              
2429             =item * B<after_close_input_file> => I<code>
2430              
2431             (No description)
2432              
2433             =item * B<after_close_input_files> => I<code>
2434              
2435             (No description)
2436              
2437             =item * B<after_read_input> => I<code>
2438              
2439             (No description)
2440              
2441             =item * B<before_open_input_file> => I<code>
2442              
2443             (No description)
2444              
2445             =item * B<before_open_input_files> => I<code>
2446              
2447             (No description)
2448              
2449             =item * B<before_read_input> => I<code>
2450              
2451             (No description)
2452              
2453             =item * B<description> => I<str>
2454              
2455             (No description)
2456              
2457             =item * B<examples> => I<array>
2458              
2459             (No description)
2460              
2461             =item * B<links> => I<array[hash]>
2462              
2463             (No description)
2464              
2465             =item * B<name>* => I<perl::identifier::unqualified_ascii>
2466              
2467             (No description)
2468              
2469             =item * B<on_begin> => I<code>
2470              
2471             (No description)
2472              
2473             =item * B<on_end> => I<code>
2474              
2475             (No description)
2476              
2477             =item * B<on_input_data_row> => I<code>
2478              
2479             (No description)
2480              
2481             =item * B<on_input_header_row> => I<code>
2482              
2483             (No description)
2484              
2485             =item * B<reads_csv> => I<bool> (default: 1)
2486              
2487             Whether utility reads CSV data.
2488              
2489             =item * B<reads_multiple_csv> => I<bool>
2490              
2491             Whether utility accepts CSV data.
2492              
2493             Setting this option to true will implicitly set the C<reads_csv> option to true,
2494             obviously.
2495              
2496             =item * B<summary> => I<str>
2497              
2498             (No description)
2499              
2500             =item * B<writes_csv> => I<bool> (default: 1)
2501              
2502             Whether utility writes CSV data.
2503              
2504             =item * B<writes_multiple_csv> => I<bool>
2505              
2506             Whether utility outputs CSV data.
2507              
2508             Setting this option to true will implicitly set the C<writes_csv> option to true,
2509             obviously.
2510              
2511              
2512             =back
2513              
2514             Return value: (bool)
2515              
2516              
2517             =head2 compile_eval_code
2518              
2519             Usage:
2520              
2521             $coderef = compile_eval_code($str, $label);
2522              
2523             Compile string code C<$str> to coderef in 'main' package, without C<use strict>
2524             or C<use warnings>. Die on compile error.
2525              
2526             =head2 eval_code
2527              
2528             Usage:
2529              
2530             $res = eval_code($coderef, $r, $topic_var_value, $return_topic_var);
2531              
2532             =for Pod::Coverage ^(csvutil)$
2533              
2534             =head1 FAQ
2535              
2536             =head2 My CSV does not have a header?
2537              
2538             Use the C<--no-header> option. Fields will be named C<field1>, C<field2>, and so
2539             on.
2540              
2541             =head2 My data is TSV, not CSV?
2542              
2543             Use the C<--tsv> option.
2544              
2545             =head2 I have a big CSV and the utilities are too slow or eat too much RAM!
2546              
2547             These utilities are not (yet) optimized, patches welcome. If your CSV is very
2548             big, perhaps a C-based solution is what you need.
2549              
2550             =head1 HOMEPAGE
2551              
2552             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
2553              
2554             =head1 SOURCE
2555              
2556             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
2557              
2558             =head1 SEE ALSO
2559              
2560             =head2 Similar CLI bundles for other format
2561              
2562             L<App::TSVUtils>, L<App::LTSVUtils>, L<App::SerializeUtils>.
2563              
2564             =head2 Other CSV-related utilities
2565              
2566             L<xls2csv> and L<xlsx2csv> from L<Spreadsheet::Read>
2567              
2568             L<import-csv-to-sqlite> from L<App::SQLiteUtils>
2569              
2570             Query CSV with SQL using L<fsql> from L<App::fsql>
2571              
2572             L<csvgrep> from L<csvgrep>
2573              
2574             =head2 Other non-Perl-based CSV utilities
2575              
2576             =head3 Python
2577              
2578             B<csvkit>, L<https://csvkit.readthedocs.io/en/latest/>
2579              
2580             =head1 AUTHOR
2581              
2582             perlancar <perlancar@cpan.org>
2583              
2584             =head1 CONTRIBUTOR
2585              
2586             =for stopwords Adam Hopkins
2587              
2588             Adam Hopkins <violapiratejunky@gmail.com>
2589              
2590             =head1 CONTRIBUTING
2591              
2592              
2593             To contribute, you can send patches by email/via RT, or send pull requests on
2594             GitHub.
2595              
2596             Most of the time, you don't need to build the distribution yourself. You can
2597             simply modify the code, then test via:
2598              
2599             % prove -l
2600              
2601             If you want to build the distribution (e.g. to try to install it locally on your
2602             system), you can install L<Dist::Zilla>,
2603             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
2604             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
2605             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
2606             that are considered a bug and can be reported to me.
2607              
2608             =head1 COPYRIGHT AND LICENSE
2609              
2610             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
2611              
2612             This is free software; you can redistribute it and/or modify it under
2613             the same terms as the Perl 5 programming language system itself.
2614              
2615             =head1 BUGS
2616              
2617             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
2618              
2619             When submitting a bug or request, please include a test-file or a
2620             patch to an existing test-file that illustrates the bug or desired
2621             feature.
2622              
2623             =cut