File Coverage

blib/lib/App/CSVUtils.pm

Criterion	Covered	Total	%
statement	727	868	83.7
branch	182	322	56.5
condition	63	122	51.6
subroutine	111	122	90.9
pod	3	3	100.0
total	1086	1437	75.5

line	stmt	bran	cond	sub	pod	time	code
1							package App::CSVUtils;
2
3	1			1		113841	use 5.010001;
	1					15
4	1			1		5	use strict;
	1					2
	1					19
5	1			1		5	use warnings;
	1					1
	1					22
6	1			1		2261	use Log::ger;
	1					82
	1					5
7
8	1			1		262	use Cwd;
	1					2
	1					63
9	1			1		6	use Exporter qw(import);
	1					3
	1					621
10
11							our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
12							our $DATE = '2023-07-25'; # DATE
13							our $DIST = 'App-CSVUtils'; # DIST
14							our $VERSION = '1.030'; # VERSION
15
16							our @EXPORT_OK = qw(
17							gen_csv_util
18							compile_eval_code
19							eval_code
20							);
21
22							our %SPEC;
23
24							our $sch_req_str_or_code = ['any', of=>['str', 'code*']];
25
26							sub _open_file_read {
27	138			138		196	my $filename = shift;
28
29	138					240	my ($fh, $err);
30	138	50				288	if ($filename eq '-') {
31	0					0	$fh = *STDIN;
32							} else {
33	138	50				5860	open $fh, "<", $filename or do {
34	0					0	$err = [500, "Can't open input filename '$filename': $!"];
35	0					0	goto RETURN;
36							};
37							}
38	138					1944	binmode $fh, ":encoding(utf8)";
39
40	138					5874	RETURN:
41							($fh, $err);
42							}
43
44							sub _open_file_write {
45	2			2		6	my $filename = shift;
46
47	2					5	my ($fh, $err);
48	2	50				7	if ($filename eq '-') {
49	0					0	$fh = *STDOUT;
50							} else {
51	2	50				137	open $fh, ">", $filename or do {
52	0					0	$err = [500, "Can't open output filename '$filename': $!"];
53	0					0	goto RETURN;
54							};
55							}
56	2					31	binmode $fh, ":encoding(utf8)";
57
58	2					84	RETURN:
59							($fh, $err);
60							}
61
62							sub _return_or_write_file {
63	0			0		0	my ($res, $filename, $overwrite) = @_;
64	0	0				0	return $res if !defined($filename);
65
66	0					0	my $fh;
67	0	0				0	if ($filename eq '-') {
68	0					0	$fh = \*STDOUT;
69							} else {
70	0	0				0	if (-f $filename) {
71	0	0				0	if ($overwrite) {
72	0					0	log_info "[csvutil] Overwriting output file $filename";
73							} else {
74	0					0	return [412, "Refusing to ovewrite existing output file '$filename', please select another path or specify --overwrite"];
75							}
76							}
77	0	0				0	open my $fh, ">", $filename or do {
78	0					0	return [500, "Can't open output file '$filename': $!"];
79							};
80	0					0	binmode $fh, ":encoding(utf8)";
81	0					0	print $fh $res->[2];
82	0	0				0	close $fh or warn "Can't write to '$filename': $!";
83	0					0	return [$res->[0], $res->[1]];
84							}
85							}
86
87							sub compile_eval_code {
88	45	100		45	1	148	return $_[0] if ref $_[0] eq 'CODE';
89	43					117	my ($str, $label) = @_;
90	43	50	33			184	defined($str) && length($str) or die [400, "Please specify code ($label)"];
91	43					104	$str = "package main; no strict; no warnings; sub { $str }";
92	43					139	log_trace "[csvutil] Compiling Perl code: $str";
93	1			1		7	my $code = eval $str; ## no critic: BuiltinFunctions::ProhibitStringyEval
	1			1		2
	1			1		26
	1			1		5
	1			1		12
	1			1		62
	1			1		7
	1			1		2
	1			1		26
	1			1		5
	1			1		2
	1			1		57
	1			1		7
	1			1		3
	1			1		47
	1			1		6
	1			1		2
	1			1		45
	1			1		6
	1			1		2
	1			1		38
	1			1		7
	1			1		2
	1			1		42
	1			1		7
	1			1		2
	1			1		37
	1			1		7
	1			1		2
	1			1		54
	1			1		23
	1			1		4
	1			1		30
	1			1		11
	1			1		15
	1			1		70
	1			1		7
	1			1		3
	1			1		37
	1			1		6
	1			1		5
	1			1		59
	1			1		7
	1			1		3
	1			1		50
	1			1		9
	1			1		2
	1			1		55
	1			1		6
	1			1		4
	1			1		29
	1			1		5
	1			1		2
	1			1		59
	1			1		7
	1			1		2
	1			1		29
	1			1		4
	1			1		2
	1			1		67
	1			1		7
	1			1		2
	1			1		25
	1			1		5
	1			1		2
	1			1		55
	1			1		6
	1			1		2
	1			1		24
	1			1		5
	1			1		2
	1			1		43
	1			1		6
	1			1		3
	1			1		23
	1			1		5
	1			1		1
	1			1		45
	1			1		6
	1			1		2
	1			1		23
	1			1		5
	1			1		1
	1			1		82
	1			1		9
	1			1		108
	1					39
	1					6
	1					2
	1					140
	1					8
	1					2
	1					23
	1					5
	1					2
	1					64
	1					18
	1					2
	1					49
	1					7
	1					2
	1					77
	1					7
	1					2
	1					40
	1					10
	1					7
	1					116
	1					12
	1					3
	1					48
	1					6
	1					2
	1					51
	1					7
	1					2
	1					25
	1					5
	1					3
	1					100
	1					8
	1					2
	1					42
	1					6
	1					3
	1					61
	1					7
	1					2
	1					67
	1					7
	1					2
	1					71
	1					9
	1					2
	1					25
	1					4
	1					3
	1					57
	1					6
	1					2
	1					36
	1					7
	1					1
	1					40
	1					10
	1					2
	1					24
	1					6
	1					3
	1					56
	1					7
	1					2
	1					40
	1					7
	1					2
	1					40
	1					7
	1					3
	1					35
	1					6
	1					3
	1					54
	1					7
	1					4
	1					38
	1					6
	1					2
	1					58
	1					6
	1					3
	1					27
	1					7
	1					2
	1					49
	1					7
	1					2
	1					24
	1					4
	1					3
	1					52
	1					7
	1					2
	1					29
	1					6
	1					7
	1					57
	1					7
	1					3
	1					36
	1					5
	1					9
	1					69
	1					8
	1					2
	1					23
	1					5
	1					2
	1					57
	1					8
	1					1
	1					25
	1					5
	1					2
	1					61
	1					7
	1					2
	1					31
	1					5
	1					7
	1					66
	1					7
	1					3
	1					70
	1					10
	1					2
	1					49
	1					12
	1					2
	1					24
	1					14
	1					3
	1					60
	1					7
	1					2
	1					71
	1					8
	1					6
	1					51
	1					8
	1					2
	1					27
	1					5
	1					2
	1					80
	1					8
	1					2
	1					25
	1					6
	1					2
	1					86
	1					8
	1					1
	1					38
	1					5
	1					2
	1					85
	1					7
	1					5
	1					37
	1					6
	1					2
	1					84
	1					8
	1					3
	1					36
	1					7
	1					2
	1					80
	43					3558
94	43	100				191	die [400, "Can't compile code ($label) '$str': $@"] if $@;
95	40					141	$code;
96							}
97
98							sub eval_code {
99	1			1		8	no warnings 'once';
	1					2
	1					7703
100	60			60	1	145	my ($code, $r, $value_for_topic, $return_topic) = @_;
101	60					124	local $_ = $value_for_topic;
102	60					81	local $main::r = $r;
103	60					126	local $main::row = $r->{input_row};
104	60					84	local $main::rownum = $r->{input_rownum};
105	60					100	local $main::data_rownum = $r->{input_data_rownum};
106	60					91	local $main::csv = $r->{input_parser};
107	60					77	local $main::fields_idx = $r->{input_fields_idx};
108	60	100				107	if ($return_topic) {
109	9					181	$code->($_);
110	9					27	$_;
111							} else {
112	51					1065	$code->($_);
113							}
114							}
115
116							sub _get_field_idx {
117	0			0		0	my ($field, $field_idxs) = @_;
118	0	0	0			0	defined($field) && length($field) or die "Please specify at least a field\n";
119	0					0	my $idx = $field_idxs->{$field};
120							die "Unknown field '$field' (known fields include: ".
121	0	0				0	join(", ", map { "'$_'" } sort {$field_idxs->{$a} <=> $field_idxs->{$b}}
	0					0
	0					0
122							keys %$field_idxs).")\n" unless defined $idx;
123	0					0	$idx;
124							}
125
126							sub _get_csv_row {
127	0			0		0	my ($csv, $row, $i, $outputs_header) = @_;
128							#use DD; print " "; dd $row;
129	0	0	0			0	return "" if $i == 1 && !$outputs_header;
130	0	0				0	my $status = $csv->combine(@$row)
131							or die "Error in line $i: ".$csv->error_input."\n";
132	0					0	$csv->string . "\n";
133							}
134
135							sub _instantiate_parser_default {
136	0			0		0	require Text::CSV_XS;
137
138	0					0	Text::CSV_XS->new({binary=>1});
139							}
140
141							sub _instantiate_parser {
142	213			213		2082	require Text::CSV_XS;
143
144	213					13748	my ($args, $prefix) = @_;
145	213		50			438	$prefix //= '';
146
147	213					508	my %tcsv_opts = (binary=>1);
148	213	100	66			1619	if (defined $args->{"${prefix}sep_char"} \|\|
		100	66
149							defined $args->{"${prefix}quote_char"} \|\|
150							defined $args->{"${prefix}escape_char"}) {
151	1	50				7	$tcsv_opts{"sep_char"} = $args->{"${prefix}sep_char"} if defined $args->{"${prefix}sep_char"};
152	1	50				4	$tcsv_opts{"quote_char"} = $args->{"${prefix}quote_char"} if defined $args->{"${prefix}quote_char"};
153	1	50				5	$tcsv_opts{"escape_char"} = $args->{"${prefix}escape_char"} if defined $args->{"${prefix}escape_char"};
154							} elsif ($args->{"${prefix}tsv"}) {
155	1					3	$tcsv_opts{"sep_char"} = "\t";
156	1					3	$tcsv_opts{"quote_char"} = undef;
157	1					2	$tcsv_opts{"escape_char"} = undef;
158							}
159	213	100				491	$tcsv_opts{always_quote} = 1 if $args->{"${prefix}always_quote"};
160	213	50				427	$tcsv_opts{quote_empty} = 1 if $args->{"${prefix}quote_empty"};
161
162	213					895	Text::CSV_XS->new(\%tcsv_opts);
163							}
164
165							sub _instantiate_emitter {
166	94			94		154	my $args = shift;
167	94					203	_instantiate_parser($args, 'output_');
168							}
169
170							sub _complete_field_or_field_list {
171							# return list of known fields of a CSV
172
173	0			0		0	my $which = shift;
174
175	0					0	my %args = @_;
176	0		0			0	my $word = $args{word} // '';
177	0					0	my $cmdline = $args{cmdline};
178	0					0	my $r = $args{r};
179
180							# we are not called from cmdline, bail
181	0	0				0	return undef unless $cmdline; ## no critic: Subroutines::ProhibitExplicitReturnUndef
182
183							# let's parse argv first
184	0					0	my $args;
185							{
186							# this is not activated yet
187	0					0	$r->{read_config} = 1;
	0					0
188
189	0					0	my $res = $cmdline->parse_argv($r);
190							#return undef unless $res->[0] == 200;
191
192	0	0				0	$cmdline->_read_config($r) unless $r->{config};
193	0					0	$args = $res->[2];
194							}
195
196							# user hasn't specified -f, bail
197	0	0	0			0	return {message=>"Please specify input filename first"} unless defined $args && $args->{input_filename};
198
199							# user wants to read CSV from stdin, bail
200	0	0				0	return {message=>"Can't get field list when input is stdin"} if $args->{input_filename} eq '-';
201
202							# can the file be opened?
203	0					0	my $csv_parser = _instantiate_parser(\%args, 'input_');
204	0	0				0	open my($fh), "<encoding(utf8)", $args->{input_filename} or do {
205							#warn "csvutils: Cannot open file '$args->{input_filename}': $!\n";
206	0					0	return [];
207							};
208
209							# can the header row be read?
210	0	0				0	my $row = $csv_parser->getline($fh) or return [];
211
212	0	0	0			0	if (defined $args->{input_header} && !$args->{input_header}) {
213	0					0	$row = [map {"field$_"} 1 .. @$row];
	0					0
214							}
215
216	0	0				0	if ($which =~ /sort/) {
217	0					0	$row = [map {($_,"-$_","+$_","~$_")} @$row];
	0					0
218							}
219
220	0					0	require Complete::Util;
221	0	0				0	if ($which =~ /field_list/) {
222	0					0	return Complete::Util::complete_comma_sep(
223							word => $word,
224							elems => $row,
225							uniq => 1,
226							);
227							} else {
228	0					0	return Complete::Util::complete_array_elem(
229							word => $word,
230							array => $row,
231							);
232							}
233							}
234
235							sub _complete_field {
236	0			0		0	_complete_field_or_field_list('field', @_);
237							}
238
239							sub _complete_field_list {
240	0			0		0	_complete_field_or_field_list('field_list', @_);
241							}
242
243							sub _complete_sort_field_list {
244	0			0		0	_complete_field_or_field_list('sort_field_list', @_);
245							}
246
247							sub _complete_sort_field {
248	0			0		0	_complete_field_or_field_list('sort_field', @_);
249							}
250
251							sub _array2hash {
252	0			0		0	my ($row, $fields) = @_;
253	0					0	my $rowhash = {};
254	0					0	for my $i (0..$#{$fields}) {
	0					0
255	0					0	$rowhash->{ $fields->[$i] } = $row->[$i];
256							}
257	0					0	$rowhash;
258							}
259
260							# check that the first N values of a field are all defined and numeric. if there
261							# are now rows or less than N values, return true.
262							sub _is_numeric_field {
263	0			0		0	require Scalar::Util::Numeric;
264
265	0					0	my ($rows, $field_idx, $num_samples) = @_;
266	0		0			0	$num_samples //= 5;
267
268	0					0	my $is_numeric = 1;
269	0					0	for my $row (@$rows) {
270	0					0	my $val = $row->[$field_idx];
271	0	0				0	return 0 unless defined $val;
272	0	0				0	return 0 unless Scalar::Util::Numeric::isnum($val);
273							}
274	0					0	$is_numeric;
275							}
276
277							# find a single field by name or index (1-based), return index (0-based). die
278							# when requested field does not exist.
279							sub _find_field {
280	17			17		42	my ($fields, $name_or_idx) = @_;
281
282							# search by name first
283	17					23	for my $i (0 .. $#{$fields}) {
	17					54
284	29					47	my $field = $fields->[$i];
285	29	100				100	return $i if $field eq $name_or_idx;
286							}
287
288	2	50				32	if ($name_or_idx eq '0') {
		100
		50
289	0					0	die [400, "Field index 0 is requested, you probably meant 1 for the first field?"];
290							} elsif ($name_or_idx =~ /\A[1-9][0-9]*\z/) {
291	1	50				7	if ($name_or_idx > @$fields) {
292	0					0	die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
293							} else {
294	1					4	return $name_or_idx-1;
295							}
296							} elsif ($name_or_idx =~ /\A-[1-9][0-9]*\z/) {
297	0	0				0	if (-$name_or_idx > @$fields) {
298	0					0	die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
299							} else {
300	0					0	return @$fields + $name_or_idx;
301							}
302							}
303
304							# not found
305							die [404, "Unknown field name/index '$name_or_idx' (known fields include: ".
306	1					7	join(", ", map { "'$_'" } @$fields).")"];
	3					17
307							}
308
309							# select one or more fields with options like --include-field, etc
310							sub _select_fields {
311	12			12		35	my ($fields, $field_idxs, $args, $default_select_choice) = @_;
312
313	12					18	my @selected_fields;
314
315							my $select_field_options_used;
316
317	12	100				33	if (defined $args->{include_field_pat}) {
318	4					7	$select_field_options_used++;
319	4					7	for my $field (@$fields) {
320	10	50				59	if ($field =~ $args->{include_field_pat}) {
321	10					25	push @selected_fields, $field;
322							}
323							}
324							}
325	12	100				26	if (defined $args->{exclude_field_pat}) {
326	1					5	$select_field_options_used++;
327	1					3	@selected_fields = grep { $_ !~ $args->{exclude_field_pat} }
	3					13
328							@selected_fields;
329							}
330	12	100				30	if (defined $args->{include_fields}) {
331	8					14	$select_field_options_used++;
332							FIELD:
333	8					14	for my $field (@{ $args->{include_fields} }) {
	8					24
334	13	100				33	unless (defined $field_idxs->{$field}) {
335	4	100				28	return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
336	2					6	next FIELD;
337							}
338	9	50				23	next if grep { $field eq $_ } @selected_fields;
	3					16
339	9					23	push @selected_fields, $field;
340							}
341							}
342	10	100				33	if (defined $args->{exclude_fields}) {
343	2					5	$select_field_options_used++;
344							FIELD:
345	2					4	for my $field (@{ $args->{exclude_fields} }) {
	2					6
346	2	50				6	unless (defined $field_idxs->{$field}) {
347	0	0				0	return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
348	0					0	next FIELD;
349							}
350	2					4	@selected_fields = grep { $field ne $_ } @selected_fields;
	6					16
351							}
352							}
353
354	10	50	33			39	if (!$select_field_options_used && $default_select_choice) {
355	0	0				0	if ($default_select_choice eq 'all') {
		0
		0
		0
356	0					0	@selected_fields = @$fields;
357							} elsif ($default_select_choice eq 'first') {
358	0	0				0	@selected_fields = ($fields->[0]) if @$fields;
359							} elsif ($default_select_choice eq 'last') {
360	0	0				0	@selected_fields = ($fields->[-1]) if @$fields;
361							} elsif ($default_select_choice eq 'first-if-only-field') {
362	0	0				0	@selected_fields = ($fields->[0]) if @$fields == 1;
363							}
364							}
365
366	10	100				22	if ($args->{show_selected_fields}) {
367	1					8	return [200, "OK", \@selected_fields];
368							}
369
370							#my %selected_field_idxs;
371							#$selected_field_idxs{$_} = $fields_idx->{$_} for @selected_fields;
372
373	9					14	my @selected_field_idxs_array;
374	9					29	push @selected_field_idxs_array, $field_idxs->{$_} for @selected_fields;
375
376	9					48	[100, "Continue", [\@selected_fields, \@selected_field_idxs_array]];
377							}
378
379							our $xcomp_csvfiles = [filename => {file_ext_filter => qr/^[tc]sv$/i}];
380
381							our %argspecs_csv_input = (
382							input_header => {
383							summary => 'Specify whether input CSV has a header row',
384							'summary.alt.bool.not' => 'Specify that input CSV does not have a header row',
385							schema => 'bool*',
386							default => 1,
387							description => <<'_',
388
389							By default, the first row of the input CSV will be assumed to contain field
390							names (and the second row contains the first data row). When you declare that
391							input CSV does not have header row (`--no-input-header`), the first row of the
392							CSV is assumed to contain the first data row. Fields will be named `field1`,
393							`field2`, and so on.
394
395							_
396							cmdline_aliases => {
397							},
398							tags => ['category:input'],
399							},
400							input_tsv => {
401							summary => "Inform that input file is in TSV (tab-separated) format instead of CSV",
402							schema => 'true*',
403							description => <<'_',
404
405							Overriden by `--input-sep-char`, `--input-quote-char`, `--input-escape-char`
406							options. If one of those options is specified, then `--input-tsv` will be
407							ignored.
408
409							_
410							tags => ['category:input'],
411							},
412							input_sep_char => {
413							summary => 'Specify field separator character in input CSV, will be passed to Text::CSV_XS',
414							schema => ['str*', len=>1],
415							description => <<'_',
416
417							Defaults to `,` (comma). Overrides `--input-tsv` option.
418
419							_
420							tags => ['category:input'],
421							},
422							input_quote_char => {
423							summary => 'Specify field quote character in input CSV, will be passed to Text::CSV_XS',
424							schema => ['str*', len=>1],
425							description => <<'_',
426
427							Defaults to `"` (double quote). Overrides `--input-tsv` option.
428
429							_
430							tags => ['category:input'],
431							},
432							input_escape_char => {
433							summary => 'Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS',
434							schema => ['str*', len=>1],
435							description => <<'_',
436
437							Defaults to `\\` (backslash). Overrides `--input-tsv` option.
438
439							_
440							tags => ['category:input'],
441							},
442							);
443
444							our %argspecs_csv_output = (
445							output_header => {
446							summary => 'Whether output CSV should have a header row',
447							schema => 'bool*',
448							description => <<'_',
449
450							By default, a header row will be output if input CSV has header row. Under
451							`--output-header`, a header row will be output even if input CSV does not have
452							header row (value will be something like "col0,col1,..."). Under
453							`--no-output-header`, header row will not be printed even if input CSV has
454							header row. So this option can be used to unconditionally add or remove header
455							row.
456
457							_
458							tags => ['category:output'],
459							},
460							output_tsv => {
461							summary => "Inform that output file is TSV (tab-separated) format instead of CSV",
462							schema => 'bool*',
463							description => <<'_',
464
465							This is like `--input-tsv` option but for output instead of input.
466
467							Overriden by `--output-sep-char`, `--output-quote-char`, `--output-escape-char`
468							options. If one of those options is specified, then `--output-tsv` will be
469							ignored.
470
471							_
472							tags => ['category:output'],
473							},
474							output_sep_char => {
475							summary => 'Specify field separator character in output CSV, will be passed to Text::CSV_XS',
476							schema => ['str*', len=>1],
477							description => <<'_',
478
479							This is like `--input-sep-char` option but for output instead of input.
480
481							Defaults to `,` (comma). Overrides `--output-tsv` option.
482
483							_
484							tags => ['category:output'],
485							},
486							output_quote_char => {
487							summary => 'Specify field quote character in output CSV, will be passed to Text::CSV_XS',
488							schema => ['str*', len=>1],
489							description => <<'_',
490
491							This is like `--input-quote-char` option but for output instead of input.
492
493							Defaults to `"` (double quote). Overrides `--output-tsv` option.
494
495							_
496							tags => ['category:output'],
497							},
498							output_escape_char => {
499							summary => 'Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS',
500							schema => ['str*', len=>1],
501							description => <<'_',
502
503							This is like `--input-escape-char` option but for output instead of input.
504
505							Defaults to `\\` (backslash). Overrides `--output-tsv` option.
506
507							_
508							tags => ['category:output'],
509							},
510							output_always_quote => {
511							summary => 'Whether to always quote values',
512							schema => 'bool*',
513							default => 0,
514							description => <<'_',
515
516							When set to false (the default), values are quoted only when necessary:
517
518							field1,field2,"field three contains comma (,)",field4
519
520							When set to true, then all values will be quoted:
521
522							"field1","field2","field three contains comma (,)","field4"
523
524							_
525							tags => ['category:output'],
526							},
527							output_quote_empty => {
528							summary => 'Whether to quote empty values',
529							schema => 'bool*',
530							default => 0,
531							description => <<'_',
532
533							When set to false (the default), empty values are not quoted:
534
535							field1,field2,,field4
536
537							When set to true, then empty values will be quoted:
538
539							field1,field2,"",field4
540
541							_
542							tags => ['category:output'],
543							},
544							);
545
546							our %argspecopt_input_filename = (
547							input_filename => {
548							summary => 'Input CSV file',
549							description => <<'_',
550
551							Use `-` to read from stdin.
552
553							Encoding of input file is assumed to be UTF-8.
554
555							_
556							schema => 'filename*',
557							default => '-',
558							'x.completion' => $xcomp_csvfiles,
559							tags => ['category:input'],
560							},
561							);
562
563							our %argspecopt_input_filenames = (
564							input_filenames => {
565							'x.name.is_plural' => 1,
566							'x.name.singular' => 'input_filename',
567							summary => 'Input CSV files',
568							description => <<'_',
569
570							Use `-` to read from stdin.
571
572							Encoding of input file is assumed to be UTF-8.
573
574							_
575							schema => ['array', of=>'filename'],
576							default => ['-'],
577							'x.completion' => $xcomp_csvfiles,
578							tags => ['category:input'],
579							},
580							);
581
582							our %argspecopt_overwrite = (
583							overwrite => {
584							summary => 'Whether to override existing output file',
585							schema => 'bool*',
586							cmdline_aliases=>{O=>{}},
587							tags => ['category:output'],
588							},
589							);
590
591							our %argspecsopt_inplace = (
592							inplace => {
593							summary => 'Output to the same file as input',
594							schema => 'true*',
595							description => <<'_',
596
597							Normally, you output to a different file than input. If you try to output to the
598							same file (`-o INPUT.csv -O`) you will clobber the input file; thus the utility
599							prevents you from doing it. However, with this `--inplace` option, you can
600							output to the same file. Like perl's `-i` option, this will first output to a
601							temporary file in the same directory as the input file then rename to the final
602							file at the end. You cannot specify output file (`-o`) when using this option,
603							but you can specify backup extension with `-b` option.
604
605							Some caveats:
606
607							- if input file is a symbolic link, it will be replaced with a regular file;
608							- renaming (implemented using `rename()`) can fail if input filename is too long;
609							- value specified in `-b` is currently not checked for acceptable characters;
610							- things can also fail if permissions are restrictive;
611
612							_
613							tags => ['category:output'],
614							},
615							inplace_backup_ext => {
616							summary => 'Extension to add for backup of input file',
617							schema => 'str*',
618							default => '',
619							description => <<'_',
620
621							In inplace mode (`--inplace`), if this option is set to a non-empty string, will
622							rename the input file using this extension as a backup. The old existing backup
623							will be overwritten, if any.
624
625							_
626							cmdline_aliases => {b=>{}},
627							tags => ['category:output'],
628							},
629							);
630
631							our %argspecopt_output_filename = (
632							output_filename => {
633							summary => 'Output filename',
634							description => <<'_',
635
636							Use `-` to output to stdout (the default if you don't specify this option).
637
638							Encoding of output file is assumed to be UTF-8.
639
640							_
641							schema => 'filename*',
642							cmdline_aliases=>{o=>{}},
643							tags => ['category:output'],
644							},
645							);
646
647							our %argspecopt_output_filenames = (
648							output_filenames => {
649							summary => 'Output filenames',
650							description => <<'_',
651
652							Use `-` to output to stdout (the default if you don't specify this option).
653
654							Encoding of output file is assumed to be UTF-8.
655
656							_
657							schema => ['array', of=>'filename'],
658							cmdline_aliases=>{o=>{}},
659							tags => ['category:output'],
660							},
661							);
662
663							our %argspecopt_field = (
664							field => {
665							summary => 'Field name',
666							schema => 'str*',
667							cmdline_aliases => { f=>{} },
668							completion => \&_complete_field,
669							},
670							);
671
672							our %argspecopt_field_1 = (
673							field => {
674							summary => 'Field name',
675							schema => 'str*',
676							pos => 1,
677							cmdline_aliases => { f=>{} },
678							completion => \&_complete_field,
679							},
680							);
681
682							our %argspec_field_1 = (
683							field => {
684							summary => 'Field name',
685							schema => 'str*',
686							cmdline_aliases => { f=>{} },
687							req => 1,
688							pos => 1,
689							completion => \&_complete_field,
690							},
691							);
692
693							our %argspec_fields_1plus = (
694							fields => {
695							'x.name.is_plural' => 1,
696							'x.name.singular' => 'field',
697							summary => 'Field names',
698							schema => ['array', of=>['str', min_len=>1], min_len=>1],
699							req => 1,
700							pos => 1,
701							slurpy => 1,
702							cmdline_aliases => {f=>{}},
703							element_completion => \&_complete_field,
704							},
705							);
706
707							# without completion, for adding new field
708							our %argspec_field_1_nocomp = (
709							field => {
710							summary => 'Field name',
711							schema => 'str*',
712							cmdline_aliases => { f=>{} },
713							req => 1,
714							pos => 1,
715							},
716							);
717
718							# without completion, for adding new fields
719							our %argspec_fields_1plus_nocomp = (
720							fields => {
721							'x.name.is_plural' => 1,
722							'x.name.singular' => 'field',
723							summary => 'Field names',
724							'summary.alt.plurality.singular' => 'Field name',
725							schema => ['array', of=>['str', min_len=>1], min_len=>1],
726							cmdline_aliases => { f=>{} },
727							req => 1,
728							pos => 1,
729							slurpy => 1,
730							},
731							);
732
733							our %argspec_fields = (
734							fields => {
735							'x.name.is_plural' => 1,
736							'x.name.singular' => 'field',
737							summary => 'Field names',
738							schema => ['array', of=>['str', min_len=>1], min_len=>1],
739							req => 1,
740							cmdline_aliases => {f=>{}},
741							element_completion => \&_complete_field,
742							},
743							);
744
745							our %argspecopt_fields = (
746							fields => {
747							'x.name.is_plural' => 1,
748							'x.name.singular' => 'field',
749							summary => 'Field names',
750							schema => ['array', of=>['str', min_len=>1], min_len=>1],
751							cmdline_aliases => {f=>{}},
752							element_completion => \&_complete_field,
753							},
754							);
755
756							our %argspecsopt_field_selection = (
757							include_fields => {
758							'x.name.is_plural' => 1,
759							'x.name.singular' => 'include_field',
760							summary => 'Field names to include, takes precedence over --exclude-field-pat',
761							schema => ['array', of=>'str'],
762							cmdline_aliases => {
763							f => {},
764							field => {}, # backward compatibility
765							},
766							element_completion => \&_complete_field,
767							tags => ['category:field-selection'],
768							},
769							include_field_pat => {
770							summary => 'Field regex pattern to select, overidden by --exclude-field-pat',
771							schema => 're*',
772							cmdline_aliases => {
773							field_pat => {}, # backward compatibility
774							include_all_fields => { summary => 'Shortcut for --field-pat=., effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.' } },
775							a => { summary => 'Shortcut for --field-pat=., effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.' } },
776							},
777							tags => ['category:field-selection'],
778							},
779							exclude_fields => {
780							'x.name.is_plural' => 1,
781							'x.name.singular' => 'exclude_field',
782							summary => 'Field names to exclude, takes precedence over --fields',
783							schema => ['array', of=>'str'],
784							cmdline_aliases => {
785							F => {},
786							},
787							element_completion => \&_complete_field,
788							tags => ['category:field-selection'],
789							},
790							exclude_field_pat => {
791							summary => 'Field regex pattern to exclude, takes precedence over --field-pat',
792							schema => 're*',
793							cmdline_aliases => {
794							exclude_all_fields => { summary => 'Shortcut for --exclude-field-pat=., effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.' } },
795							A => { summary => 'Shortcut for --exclude-field-pat=., effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.' } },
796							},
797							tags => ['category:field-selection'],
798							},
799							ignore_unknown_fields => {
800							summary => 'When unknown fields are specified in --include-field (--field) or --exclude-field options, ignore them instead of throwing an error',
801							schema => 'bool*',
802							},
803							show_selected_fields => {
804							summary => 'Show selected fields and then immediately exit',
805							schema => 'true*',
806							},
807							);
808
809							our %argspec_eval = (
810							eval => {
811							summary => 'Perl code',
812							schema => $sch_req_str_or_code,
813							cmdline_aliases => { e=>{} },
814							req => 1,
815							},
816							);
817
818							our %argspecopt_eval = (
819							eval => {
820							summary => 'Perl code',
821							schema => $sch_req_str_or_code,
822							cmdline_aliases => { e=>{} },
823							},
824							);
825
826							our %argspec_eval_1 = (
827							eval => {
828							summary => 'Perl code',
829							schema => $sch_req_str_or_code,
830							cmdline_aliases => { e=>{} },
831							req => 1,
832							pos => 1,
833							},
834							);
835
836							our %argspec_eval_2 = (
837							eval => {
838							summary => 'Perl code',
839							schema => $sch_req_str_or_code,
840							cmdline_aliases => { e=>{} },
841							req => 1,
842							pos => 2,
843							},
844							);
845
846							our %argspecopt_eval_2 = (
847							eval => {
848							summary => 'Perl code',
849							schema => $sch_req_str_or_code,
850							cmdline_aliases => { e=>{} },
851							pos => 2,
852							},
853							);
854
855							our %argspecsopt_sortsub = (
856							by_sortsub => {
857							schema => 'str*',
858							description => <<'_',
859
860							When sorting rows, usually combined with `--key` because most Sort::Sub routine
861							expects a string to be compared against.
862
863							When sorting fields, the Sort::Sub routine will get the field name as argument.
864
865							_
866							summary => 'Sort using a Sort::Sub routine',
867							'x.completion' => ['sortsub_spec'],
868							},
869							sortsub_args => {
870							summary => 'Arguments to pass to Sort::Sub routine',
871							schema => ['hash', of=>'str'],
872							},
873							);
874
875							our %argspecopt_key = (
876							key => {
877							summary => 'Generate sort keys with this Perl code',
878							description => <<'_',
879
880							If specified, then will compute sort keys using Perl code and sort using the
881							keys. Relevant when sorting using `--by-code` or `--by-sortsub`. If specified,
882							then instead of row when sorting rows, the code (or Sort::Sub routine) will
883							receive these sort keys to sort against.
884
885							The code will receive the row (arrayref, or if -H is specified, hashref) as the
886							argument.
887
888							_
889							schema => $sch_req_str_or_code,
890							cmdline_aliases => {k=>{}},
891							},
892							);
893
894							our %argspecs_sort_rows = (
895							reverse => {
896							schema => ['bool', is=>1],
897							cmdline_aliases => {r=>{}},
898							},
899							ci => {
900							schema => ['bool', is=>1],
901							cmdline_aliases => {i=>{}},
902							},
903							by_fields => {
904							summary => 'Sort by a list of field specifications',
905							'summary.alt.plurality.singular' => 'Add a sort field specification',
906							'x.name.is_plural' => 1,
907							'x.name.singular' => 'by_field',
908							description => <<'_',
909
910							Each field specification is a field name with an optional prefix. `FIELD`
911							(without prefix) means sort asciibetically ascending (smallest to largest),
912							`~FIELD` means sort asciibetically descending (largest to smallest), `+FIELD`
913							means sort numerically ascending, `-FIELD` means sort numerically descending.
914
915							_
916							schema => ['array', of=>'str'],
917							element_completion => \&_complete_sort_field,
918							},
919							by_code => {
920							summary => 'Sort by using Perl code',
921							schema => $sch_req_str_or_code,
922							description => <<'_',
923
924							`$a` and `$b` (or the first and second argument) will contain the two rows to be
925							compared. Which are arrayrefs; or if `--hash` (`-H`) is specified, hashrefs; or
926							if `--key` is specified, whatever the code in `--key` returns.
927
928							_
929							},
930							%argspecopt_key,
931							%argspecsopt_sortsub,
932							);
933
934							our %argspecs_sort_fields = (
935							reverse => {
936							schema => ['bool', is=>1],
937							cmdline_aliases => {r=>{}},
938							},
939							ci => {
940							schema => ['bool', is=>1],
941							cmdline_aliases => {i=>{}},
942							},
943							by_examples => {
944							summary => 'Sort by a list of field names as examples',
945							'summary.alt.plurality.singular' => 'Add a field to sort by example',
946							'x.name.is_plural' => 1,
947							'x.name.singular' => 'by_example',
948							schema => ['array', of=>'str'],
949							element_completion => \&_complete_field,
950							},
951							by_code => {
952							summary => 'Sort fields using Perl code',
953							schema => $sch_req_str_or_code,
954							description => <<'_',
955
956							`$a` and `$b` (or the first and second argument) will contain `[$field_name,
957							$field_idx]`.
958
959							_
960							},
961							%argspecsopt_sortsub,
962							);
963
964							our %argspecopt_with_data_rows = (
965							with_data_rows => {
966							summary => 'Whether to also output data rows',
967							schema => 'bool',
968							},
969							);
970
971							our %argspecopt_hash = (
972							hash => {
973							summary => 'Provide row in $_ as hashref instead of arrayref',
974							schema => ['bool*', is=>1],
975							cmdline_aliases => {H=>{}},
976							},
977							);
978
979							# add a position to specified argument, if possible
980							sub _add_arg_pos {
981	42			42		100	my ($args, $argname, $is_slurpy) = @_;
982
983							# argument already has a position, return
984	42	50				105	return if defined $args->{$argname}{pos};
985
986							# position of slurpy argument
987	42					61	my $slurpy_pos;
988	42					149	for (keys %$args) {
989	509	100				993	next unless $args->{$_}{slurpy};
990	6					19	$slurpy_pos = $args->{$_}{pos};
991	6					13	last;
992							}
993
994							# there is already a slurpy arg, return
995	42	50	66			161	return if $is_slurpy && defined $slurpy_pos;
996
997							# find the lowest position that's not available
998							ARG:
999	42					130	for my $j (0 .. scalar(keys %$args)-1) {
1000	60	100	100			160	last if defined $slurpy_pos && $j >= $slurpy_pos;
1001	56					177	for (keys %$args) {
1002	648	100	100			1381	next ARG if defined $args->{$_}{pos} && $args->{$_}{pos} == $j;
1003							}
1004	38					104	$args->{$argname}{pos} = $j;
1005	38	100				89	$args->{$argname}{slurpy} = 1 if $is_slurpy;
1006	38					104	last;
1007							}
1008							}
1009
1010							sub _randext {
1011	2			2		16	state $charset = [0..9, "A".."Z","a".."z"];
1012	2					6	my $len = shift;
1013	2					3	my $ext = "";
1014	2					5	for (1..$len) { $ext .= $charset->[rand @$charset] }
	10					21
1015	2					8	$ext;
1016							}
1017
1018							$SPEC{gen_csv_util} = {
1019							v => 1.1,
1020							summary => 'Generate a CSV utility',
1021							description => <<'_',
1022
1023							This routine is used to generate a CSV utility in the form of a <pm:Rinci>
1024							function (code and metadata). You can then produce a CLI from the Rinci function
1025							simply using <pm:Perinci::CmdLine::Gen> or, if you use <pm:Dist::Zilla>,
1026							<pm:Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
1027							<prog:gen-pericmd-script>.
1028
1029							Using this routine, by providing just one or a few hooks and setting some
1030							parameters like a couple of extra arguments, you will get a complete CLI with
1031							decent POD/manpage, ability to read one or multiple CSV's and write one or
1032							multiple CSV's, some command-line options to customize how the input CSV's
1033							should be parsed and how the output CSV's should be formatted and named. Your
1034							CLI also has tab completion, usage and help message, and other features.
1035
1036							To create a CSV utility, you specify a `name` (e.g. `csv_dump`; must be a valid
1037							unqualified Perl identifier/function name) and optionally `summary`,
1038							`description`, and other metadata like `links` or even `add_meta_props`. Then
1039							you specify one or more of `on_` or `before_` or `after_*` arguments to supply
1040							handlers (coderefs) for your CSV utility at various hook points.
1041
1042
1043							THE HOOKS
1044
1045							All code for hooks should accept a single argument `r`. `r` is a stash (hashref)
1046							of various data, the keys of which will depend on which hook point being called.
1047							You can also add more keys to store data or for flow control (see hook
1048							documentation below for more details).
1049
1050							The order of the hooks, in processing chronological order:
1051
1052							* on_begin
1053
1054							Called when utility begins, before reading CSV. You can use this hook e.g. to
1055							process arguments, set output filenames (if you allow custom output
1056							filenames).
1057
1058							* before_read_input
1059
1060							Called before opening any input CSV file. This hook is still called even if
1061							your utility sets `reads_csv` to false.
1062
1063							At this point, the `input_filenames` stash key (as well as other keys like
1064							`input_filename`, `input_filenum`, etc) has not been set. You can use this
1065							hook e.g. to set a custom `input_filenames`.
1066
1067							* before_open_input_files
1068
1069							Called before an input CSV file is about to be opened, including for stdin
1070							(`-`). You can use this hook e.g. to check/preprocess input file. Flow control
1071							is available by setting `$r->{wants_skip_files}` to skip reading all the input
1072							file and go directly to the `after_read_input` hook.
1073
1074							* before_open_input_file
1075
1076							Called before an input CSV file is about to be opened, including for stdin
1077							(`-`). For the first file, called after `before_open_input_file` hook. You can
1078							use this hook e.g. to check/preprocess input file. Flow control is available
1079							by setting `$r->{wants_skip_file}` to skip reading a single input file and go
1080							to the next file, or `$r->{wants_skip_files}` to skip reading the rest of the
1081							files and go directly to the `after_read_input` hook.
1082
1083							* on_input_header_row
1084
1085							Called when receiving header row. Will be called for every input file, and
1086							called even when user specify `--no-input-header`, in which case the header
1087							row will be the generated `["field1", "field2", ...]`. You can use this hook
1088							e.g. to add/remove/rearrange fields.
1089
1090							You can set `$r->{wants_fill_rows}` to a defined false if you do not want
1091							`$r->{input_rows}` to be filled with empty string elements when it contains
1092							less than the number of fields (in case of sparse values at the end). Normally
1093							you only want to do this when you want to do checking, e.g. in
1094							<prog:csv-check-rows>.
1095
1096							* on_input_data_row
1097
1098							Called when receiving each data row. You can use this hook e.g. to modify the
1099							row or print output (for line-by-line transformation or filtering).
1100
1101							* after_close_input_file
1102
1103							Called after each input file is closed, including for stdin (`-`) (although
1104							for stdin, the handle is not actually closed). Flow control is possible by
1105							setting `$r->{wants_skip_files}` to skip reading the rest of the files and go
1106							straight to the `after_close_input_files` hook.
1107
1108							* after_close_input_files
1109
1110							Called after the last input file is closed, after the last
1111							`after_close_input_file` hook, including for stdin (`-`) (although for stdin,
1112							the handle is not actually closed).
1113
1114							* after_read_input
1115
1116							Called after the last row of the last CSV file is read and the last file is
1117							closed. This hook is still called, if you set `reads_csv` option to false.
1118							At this point the stash keys related to CSV reading have all been cleared,
1119							including `input_filenames`, `input_filename`, `input_fh`, etc.
1120
1121							You can use this hook e.g. to print output if you buffer the output.
1122
1123							* on_end
1124
1125							Called when utility is about to exit. You can use this hook e.g. to return the
1126							final result.
1127
1128
1129							THE STASH
1130
1131							The common keys that `r` will contain:
1132
1133							- `gen_args`, hash. The arguments used to generate the CSV utility.
1134
1135							- `util_args`, hash. The arguments that your CSV utility accepts. Parsed from
1136							command-line arguments (or configuration files, or environment variables).
1137
1138							- `name`, str. The name of the CSV utility. Which can also be retrieved via
1139							`gen_args`.
1140
1141							- `code_print`, coderef. Routine provided for you to print something. Accepts a
1142							string. Takes care of opening the output files for you.
1143
1144							- `code_print_row`, coderef. Routine provided for you to print a data row. You
1145							pass the row (either arrayref or hashref). Takes care of opening the output
1146							files for you, as well as printing header row the first time, if needed.
1147
1148							- `code_print_header_row`, coderef. Routine provided for you to print header
1149							row. You don't need to pass any arguments. Will only print the header row once
1150							per output file if output header is enabled, even if called multiple times.
1151
1152							If you are accepting CSV data (`reads_csv` gen argument set to true), the
1153							following keys will also be available (in `on_input_header_row` and
1154							`on_input_data_row` hooks):
1155
1156							- `input_parser`, a <pm:Text::CSV_XS> instance for input parsing.
1157
1158							- `input_fields`, array of str. Input CSV's field names.
1159
1160							- `input_fields_idx`, hash with field name as keys and field index (0-based
1161							integer) as values.
1162
1163							- `input_filenames`, array of str.
1164
1165							- `input_filename`, str. The name of the current input file being read (`-` if
1166							reading from stdin).
1167
1168							- `input_filenum`, uint. The number of the current input file, 1 being the first
1169							file, 2 for the second, and so on.
1170
1171							- `input_fh`, the handle to the current file being read.
1172
1173							- `input_rownum`, uint. The number of rows that have been read (reset after each
1174							input file). In `on_input_header_row` phase, this will be 1 since header row
1175							(including the generated one) is the first row. Then in `on_input_data_row`
1176							phase (called the first time for a file), it will be 2 for the first data row,
1177							even if physically it is the first row for CSV file that does not have a
1178							header.
1179
1180							- `input_data_rownum`, uint. The number of data rows that have been read (reset
1181							after each input file). This will be equal to `input_rownum` less 1 if input
1182							file has header.
1183
1184							- `input_row`, aos (array of str). The current input CSV row as an arrayref.
1185
1186							- `input_row_as_hashref`, hos (hash of str). The current input CSV row as a
1187							hashref, with field names as hash keys and field values as hash values. This
1188							will only be calculated if utility wants it. Utility can express so by setting
1189							`$r->{wants_input_row_as_hashref}` to true, e.g. in the `on_begin` hook.
1190
1191							- `input_header_row_count`, uint. Contains the number of actual header rows that
1192							have been read. If CLI user specifies `--no-input-header`, this will stay at
1193							zero. Will be reset for each CSV file.
1194
1195							- `input_data_row_count`, int. Contains the number of actual data rows that have
1196							read. Will be reset for each CSV file.
1197
1198							If you are outputting CSV (`writes_csv` gen argument set to true), the following
1199							keys will be available:
1200
1201							- `output_emitter`, a <pm:Text::CSV_XS> instance for output.
1202
1203							- `output_fields`, array of str. Should be set to list of output field names. If
1204							unset, will be set to be the same as `input_fields`.
1205
1206							- `output_fields_idx`, hash with field names as keys and field indexes (0-based
1207							integer) as values. Normally you do not need to set this manually; you just
1208							need to set `output_fields` and this hash will be computed automatically for
1209							you just before the first output row is outputted.
1210
1211							- `output_filenames`, array of str.
1212
1213							- `output_filename`, str, name of current output file.
1214
1215							- `output_filenum`, uint, the number of the current output file, 1 being the
1216							first file, 2 for the second, and so on.
1217
1218							- `output_fh`, handle to the current output file.
1219
1220							- `output_rownum`, uint. The number of rows that have been outputted (reset
1221							after each output file).
1222
1223							- `output_data_rownum`, uint. The number of data rows that have been outputted
1224							(reset after each output file). This will be equal to `input_rownum` less 1 if
1225							input file has header.
1226
1227							For other hook-specific keys, see the documentation for associated hook point.
1228
1229
1230							ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS
1231
1232							As mentioned above, you will get additional command-line options/arguments in
1233							`$r->{util_args}` hashref. Some options/arguments are already added by
1234							`gen_csv_util`, e.g. `input_filename` or `input_filenames` along with
1235							`input_sep_char`, etc (when your utility declares `reads_csv`),
1236							`output_filename` or `output_filenames` along with `overwrite`,
1237							`output_sep_char`, etc (when your utility declares `writes_csv`).
1238
1239							If you want to accept additional arguments/options, you specify them in
1240							`add_args` (hashref, with key being Each option/argument has to be specified
1241							first via `add_args` (as hashref, with key being argument name and value the
1242							argument specification as defined in <pm:Rinci::function>)). Some argument
1243							specifications have been defined in <pm:App::CSVUtils> and can be used. See
1244							existing utilities for examples.
1245
1246
1247							READING CSV DATA
1248
1249							To read CSV data, normally your utility would provide handler for the
1250							`on_input_data_row` hook and sometimes additionally `on_input_header_row`.
1251
1252
1253							OUTPUTTING STRING OR RETURNING RESULT
1254
1255							To output string, usually you call the provided routine `$r->{code_print}`. This
1256							routine will open the output files for you.
1257
1258							You can also return enveloped result directly by setting `$r->{result}`.
1259
1260
1261							OUTPUTTING CSV DATA
1262
1263							To output CSV data, usually you call the provided routine `$r->{code_print_row}`.
1264							This routine accepts a row (arrayref or hashref). This routine will open the
1265							output files for you when needed, as well as print header row automatically.
1266
1267							You can also buffer rows from input to e.g. `$r->{output_rows}`, then call
1268							`$r->{code_print_row}` repeatedly in the `after_read_input` hook to print all the
1269							rows.
1270
1271
1272							READING MULTIPLE CSV FILES
1273
1274							To read multiple CSV files, you first specify `reads_multiple_csv`. Then, you
1275							can supply handler for `on_input_header_row` and `on_input_data_row` as usual.
1276							If you want to do something before/after each input file, you can also supply
1277							handler for `before_open_input_file` or `after_close_input_file`.
1278
1279
1280							WRITING TO MULTIPLE CSV FILES
1281
1282							Similarly, to write to many CSv files, you first specify `writes_multiple_csv`.
1283							Then, you can supply handler for `on_input_header_row` and `on_input_data_row`
1284							as usual. To switch to the next file, set
1285							`$r->{wants_switch_to_next_output_file}` to true, in which case the next call to
1286							`$r->{code_print_row}` will close the current file and open the next file.
1287
1288
1289							CHANGING THE OUTPUT FIELDS
1290
1291							When calling `$r->{code_print_row}`, you can output whatever fields you want. By
1292							convention, you can set `$r->{output_fields}` and `$r->{output_fields_idx}` to
1293							let other handlers know about the output fields. For example, see the
1294							implementation of <prog:csv-concat>.
1295
1296							_
1297							args => {
1298							name => {
1299							schema => 'perl::identifier::unqualified_ascii*',
1300							req => 1,
1301							tags => ['category:metadata'],
1302							},
1303							summary => {
1304							schema => 'str*',
1305							tags => ['category:metadata'],
1306							},
1307							description => {
1308							schema => 'str*',
1309							tags => ['category:metadata'],
1310							},
1311							links => {
1312							schema => ['array', of=>'hash'], # XXX defhashes
1313							tags => ['category:metadata'],
1314							},
1315							examples => {
1316							schema => ['array*'], # defhashes
1317							tags => ['category:metadata'],
1318							},
1319							add_meta_props => {
1320							summary => 'Add additional Rinci function metadata properties',
1321							schema => ['hash*'],
1322							tags => ['category:metadata'],
1323							},
1324							add_args => {
1325							schema => ['hash*'],
1326							tags => ['category:metadata'],
1327							},
1328							add_args_rels => {
1329							schema => ['hash*'],
1330							tags => ['category:metadata'],
1331							},
1332
1333							reads_csv => {
1334							summary => 'Whether utility reads CSV data',
1335							'summary.alt.bool.not' => 'Specify that utility does not read CSV data',
1336							schema => 'bool*',
1337							default => 1,
1338							},
1339							reads_multiple_csv => {
1340							summary => 'Whether utility accepts CSV data',
1341							schema => 'bool*',
1342							description => <<'_',
1343
1344							Setting this option to true will implicitly set the `reads_csv` option to true,
1345							obviously.
1346
1347							_
1348							},
1349							writes_csv => {
1350							summary => 'Whether utility writes CSV data',
1351							'summary.alt.bool.not' => 'Specify that utility does not write CSV data',
1352							schema => 'bool*',
1353							default => 1,
1354							},
1355							writes_multiple_csv => {
1356							summary => 'Whether utility outputs CSV data',
1357							schema => 'bool*',
1358							description => <<'_',
1359
1360							Setting this option to true will implicitly set the `writes_csv` option to true,
1361							obviously.
1362
1363							_
1364							},
1365
1366							on_begin => {
1367							schema => 'code*',
1368							},
1369							before_read_input => {
1370							schema => 'code*',
1371							},
1372							before_open_input_files => {
1373							schema => 'code*',
1374							},
1375							before_open_input_file => {
1376							schema => 'code*',
1377							},
1378							on_input_header_row => {
1379							schema => 'code*',
1380							},
1381							on_input_data_row => {
1382							schema => 'code*',
1383							},
1384							after_close_input_file => {
1385							schema => 'code*',
1386							},
1387							after_close_input_files => {
1388							schema => 'code*',
1389							},
1390							after_read_input => {
1391							schema => 'code*',
1392							},
1393							on_end => {
1394							schema => 'code*',
1395							},
1396							},
1397							result_naked => 1,
1398							result => {
1399							schema => 'bool*',
1400							},
1401							};
1402							sub gen_csv_util {
1403	25			25	1	204	my %gen_args = @_;
1404
1405	25	50				470	my $name = delete($gen_args{name}) or die "Please specify name";
1406	25		50			89	my $summary = delete($gen_args{summary}) // '(No summary)';
1407	25		100			92	my $description = delete($gen_args{description}) // '(No description)';
1408	25		100			108	my $links = delete($gen_args{links}) // [];
1409	25		100			83	my $examples = delete($gen_args{examples}) // [];
1410	25					46	my $add_meta_props = delete $gen_args{add_meta_props};
1411	25					44	my $add_args = delete $gen_args{add_args};
1412	25					47	my $add_args_rels = delete $gen_args{add_args_rels};
1413	25					39	my $reads_multiple_csv = delete($gen_args{reads_multiple_csv});
1414	25		50			84	my $reads_csv = delete($gen_args{reads_csv}) // 1;
1415	25		50			53	my $tags = [ @{ delete($gen_args{tags}) // [] } ];
	25					90
1416	25	100				62	$reads_csv = 1 if $reads_multiple_csv;
1417	25					36	my $writes_multiple_csv = delete($gen_args{writes_multiple_csv});
1418	25		100			83	my $writes_csv = delete($gen_args{writes_csv}) // 1;
1419	25	50				68	$writes_csv = 1 if $writes_multiple_csv;
1420	25					48	my $on_begin = delete $gen_args{on_begin};
1421	25					39	my $before_read_input = delete $gen_args{before_read_input};
1422	25					43	my $before_open_input_files = delete $gen_args{before_open_input_files};
1423	25					42	my $before_open_input_file = delete $gen_args{before_open_input_file};
1424	25					33	my $on_input_header_row = delete $gen_args{on_input_header_row};
1425	25					39	my $on_input_data_row = delete $gen_args{on_input_data_row};
1426	25					37	my $after_close_input_file = delete $gen_args{after_close_input_file};
1427	25					36	my $after_close_input_files = delete $gen_args{after_close_input_files};
1428	25					40	my $after_read_input = delete $gen_args{after_read_input};
1429	25					47	my $on_end = delete $gen_args{on_end};
1430
1431	25	50				77	scalar(keys %gen_args) and die "Unknown argument(s): ".join(", ", keys %gen_args);
1432
1433	25					41	my $code;
1434							CREATE_CODE: {
1435	25					39	$code = sub {
1436	119			119		302386	my %util_args = @_;
1437
1438	119		100			576	my $has_header = $util_args{input_header} // 1;
1439	119		66			381	my $outputs_header = $util_args{output_header} // $has_header;
1440
1441	119					398	my $r = {
1442							gen_args => \%gen_args,
1443							util_args => \%util_args,
1444							name => $name,
1445							};
1446
1447							# inside the main eval block, we call hook handlers. A handler can
1448							# throw an exception (which can be a string or an enveloped response
1449							# like [500, "some error message"], see Rinci::function). we trap
1450							# the exception so we can return the appropriate enveloped response.
1451							MAIN_EVAL:
1452	119					211	eval {
1453
1454							# do some checking
1455	119	50	33			342	if ($util_args{inplace} && (!$reads_csv \|\| !$writes_csv)) {
			66
1456	0					0	die [412, "--inplace cannot be specified when we do not read & write CSV"];
1457							}
1458
1459	119	100				251	if ($on_begin) {
1460	37					122	log_trace "[csvutil] Calling on_begin hook handler ...";
1461	37					150	$on_begin->($r);
1462							}
1463
1464							my $code_open_file = sub {
1465							# set output filenames, if not yet
1466	211	100		211		414	unless ($r->{output_filenames}) {
1467	79					114	my @output_filenames;
1468	79	100				188	if ($util_args{inplace}) {
		50
1469	2					3	for my $input_filename (@{ $r->{input_filenames} }) {
	2					5
1470	2					4	my $output_filename;
1471	2					3	while (1) {
1472	2					10	$output_filename = $input_filename . "." . _randext(5);
1473	2	50				56	last unless -e $output_filename;
1474							}
1475	2					20	push @output_filenames, $output_filename;
1476							}
1477							} elsif ($writes_multiple_csv) {
1478	0		0			0	@output_filenames = @{ $util_args{output_filenames} // ['-'] };
	0					0
1479							} else {
1480	77		50			340	@output_filenames = ($util_args{output_filename} // '-');
1481							}
1482
1483							CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME: {
1484	79					129	my %seen_output_abs_path; # key = output filename
	79					101
1485	79	100	66			255	last unless $reads_csv && $writes_csv;
1486	76					104	for my $input_filename (@{ $r->{input_filenames} }) {
	76					157
1487	78	50				156	next if $input_filename eq '-';
1488	78					2231	my $input_abs_path = Cwd::abs_path($input_filename);
1489	78	50				269	die [500, "Can't get absolute path of input filename '$input_filename'"] unless $input_abs_path;
1490	78					174	for my $output_filename (@output_filenames) {
1491	78	100				269	next if $output_filename eq '-';
1492	2	50				5	next if $seen_output_abs_path{$output_filename};
1493	2					41	my $output_abs_path = Cwd::abs_path($output_filename);
1494	2	50				8	die [500, "Can't get absolute path of output filename '$output_filename'"] unless $output_abs_path;
1495	2	0				8	die [412, "Cannot set output filename to '$output_filename' ".
		50
1496							($output_filename ne $output_abs_path ? "($output_abs_path) ":"").
1497							"because it is the same as input filename and input will be clobbered; use --inplace to avoid clobbering<"]
1498							if $output_abs_path eq $input_abs_path;
1499							}
1500							}
1501							} # CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME
1502
1503	79					211	$r->{output_filenames} = \@output_filenames;
1504	79		50			369	$r->{output_num_of_files} //= scalar(@output_filenames);
1505							} # set output filenames
1506
1507							# open the next file, if not yet
1508	211	100	66			890	if (!$r->{output_fh} \|\| $r->{wants_switch_to_next_output_file}) {
1509	79		50			368	$r->{output_filenum} //= 0;
1510	79					107	$r->{output_filenum}++;
1511
1512	79					233	$r->{output_rownum} = 0;
1513	79					142	$r->{output_data_rownum} = 0;
1514
1515							# close the previous file, if any
1516	79	50	33			187	if ($r->{output_fh} && $r->{output_filename} ne '-') {
1517	0					0	log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1518	0	0				0	close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1519	0					0	delete $r->{has_printed_header};
1520	0					0	delete $r->{wants_switch_to_next_output_file};
1521							}
1522
1523							# we have exhausted all the files, do nothing & return
1524	79	50				113	return if $r->{output_filenum} > @{ $r->{output_filenames} };
	79					267
1525
1526	79					195	$r->{output_filename} = $r->{output_filenames}[ $r->{output_filenum}-1 ];
1527							log_info "[csvutil] [%d/%s] Opening output file %s ...",
1528	79					305	$r->{output_filenum}, $r->{output_num_of_files}, $r->{output_filename};
1529	79	100				287	if ($r->{output_filename} eq '-') {
1530	77					190	$r->{output_fh} = \*STDOUT;
1531							} else {
1532	2	50				21	if (-f $r->{output_filename}) {
1533	0	0				0	if ($r->{util_args}{overwrite}) {
1534	0					0	log_info "[csvutil] Will be overwriting output file %s", $r->{output_filename};
1535							} else {
1536	0					0	die [412, "Refusing to overwrite existing output file '$r->{output_filename}', choose another name or use --overwrite (-O)"];
1537							}
1538							}
1539	2					11	my ($fh, $err) = _open_file_write($r->{output_filename});
1540	2	50				9	die $err if $err;
1541	2					6	$r->{output_fh} = $fh;
1542							}
1543							} # open the next file
1544	119					742	}; # code_open_file
1545
1546							my $code_print = sub {
1547	9			9		15	my $str = shift;
1548	9					23	$code_open_file->();
1549	9					14	print { $r->{output_fh} } $str;
	9					275
1550	119					365	}; # code_print
1551	119					268	$r->{code_print} = $code_print;
1552
1553	119	100				269	if ($writes_csv) {
1554	94					219	my $output_emitter = _instantiate_emitter(\%util_args);
1555	94					11219	$r->{output_emitter} = $output_emitter;
1556	94					206	$r->{has_printed_header} = 0;
1557
1558							my $code_print_header_row = sub {
1559							# set output fields, if not yet
1560	202	100		202		492	unless ($r->{output_fields}) {
1561							# by default, use the
1562	24					55	$r->{output_fields} = $r->{input_fields};
1563							}
1564
1565							# index the output fields, if not yet
1566	202	100				424	unless ($r->{output_fields_idx}) {
1567	75					187	$r->{output_fields_idx} = {};
1568	75					107	for my $j (0 .. $#{ $r->{output_fields} }) {
	75					210
1569	206					430	$r->{output_fields_idx}{ $r->{output_fields}[$j] } = $j;
1570							}
1571							}
1572
1573	202					473	$code_open_file->();
1574
1575							# print header line, if not yet
1576	202	100	100			682	if ($outputs_header && !$r->{has_printed_header}) {
1577	74					110	$r->{has_printed_header}++;
1578	74					1388	$r->{output_emitter}->print($r->{output_fh}, $r->{output_fields});
1579	74					3880	print { $r->{output_fh} } "\n";
	74					969
1580	74					288	$r->{output_rownum}++;
1581							}
1582	94					419	};
1583	94					177	$r->{code_print_header_row} = $code_print_header_row;
1584
1585							my $code_print_row = sub {
1586	202			202		337	my $row = shift;
1587
1588	202					490	$code_print_header_row->();
1589
1590							# print data line
1591	202	50				506	if ($row) {
1592	202	50				504	if (ref $row eq 'HASH') {
1593	0					0	my $row0 = $row;
1594	0					0	$row = [];
1595	0					0	for my $j (0 .. $#{ $r->{output_fields} }) {
	0					0
1596	0		0			0	$row->[$j] = $row0->{ $r->{output_fields}[$j] } // '';
1597							}
1598							}
1599	202					1376	$r->{output_emitter}->print( $r->{output_fh}, $row );
1600	202					4434	print { $r->{output_fh} } "\n";
	202					2113
1601	202					618	$r->{output_rownum}++;
1602	202					1077	$r->{output_data_rownum}++;
1603							}
1604	94					288	}; # code_print_row
1605	94					230	$r->{code_print_row} = $code_print_row;
1606							} # if outputs csv
1607
1608	119	50				269	if ($before_read_input) {
1609	0					0	log_trace "[csvutil] Calling before_read_input handler ...";
1610	0					0	$before_read_input->($r);
1611							}
1612
1613							READ_CSV: {
1614	119	50				178	last unless $reads_csv;
	119					235
1615
1616	119					257	my $input_parser = _instantiate_parser(\%util_args, 'input_');
1617	119					13022	$r->{input_parser} = $input_parser;
1618
1619	119					206	my @input_filenames;
1620	119	100				266	if ($reads_multiple_csv) {
1621	18		50			32	@input_filenames = @{ $util_args{input_filenames} // ['-'] };
	18					74
1622							} else {
1623	101		50			334	@input_filenames = ($util_args{input_filename} // '-');
1624							}
1625	119		50			552	$r->{input_filenames} //= \@input_filenames;
1626
1627							BEFORE_INPUT_FILENAME:
1628	119					221	$r->{input_filenum} = 0;
1629
1630							INPUT_FILENAME:
1631	119					223	for my $input_filename (@input_filenames) {
1632	138					218	$r->{input_filenum}++;
1633	138					269	$r->{input_filename} = $input_filename;
1634
1635	138	100	100			478	if ($r->{input_filenum} == 1 && $before_open_input_files) {
1636	1					6	log_trace "[csvutil] Calling before_open_input_files handler ...";
1637	1					7	$before_open_input_files->($r);
1638	1	50				4	if (delete $r->{wants_skip_files}) {
1639	0					0	log_trace "[csvutil] Handler wants to skip files, skipping all input files";
1640	0					0	last READ_CSV;
1641							}
1642							}
1643
1644	138	50				311	if ($before_open_input_file) {
1645	0					0	log_trace "[csvutil] Calling before_open_input_file handler ...";
1646	0					0	$before_open_input_file->($r);
1647	0	0				0	if (delete $r->{wants_skip_file}) {
		0
1648	0					0	log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1649	0					0	next INPUT_FILENAME;
1650							} elsif (delete $r->{wants_skip_files}) {
1651	0					0	log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1652	0					0	last READ_CSV;
1653							}
1654							}
1655
1656							log_info "[csvutil] [file %d/%d] Reading input file %s ...",
1657	138					495	$r->{input_filenum}, scalar(@input_filenames), $input_filename;
1658	138					470	my ($fh, $err) = _open_file_read($input_filename);
1659	138	50				331	die $err if $err;
1660	138					523	$r->{input_fh} = $r->{input_fhs}[ $r->{input_filenum}-1 ] = $fh;
1661
1662	138					193	my $i;
1663	138					224	$r->{input_header_row_count} = 0;
1664	138					365	$r->{input_data_row_count} = 0;
1665	138					255	$r->{input_fields} = []; # array, field names in order
1666	138					258	$r->{input_field_idxs} = {}; # key=field name, value=index (0-based)
1667	138					207	my $row0;
1668							my $code_getline = sub {
1669	661	50	33	661		3310	if ($r->{stdin_input_fields} && $r->{input_filename} eq '-') {
		100	100
		100	100
1670	0	0				0	if ($i == 0) {
1671							# we have read the header for stdin. since
1672							# we can't seek to the beginning, we return
1673							# the saved fields
1674	0					0	$r->{input_header_row_count}++;
1675	0					0	return $r->{stdin_input_fields};
1676							} else {
1677	0					0	my $row = $input_parser->getline($r->{input_fh});
1678	0	0				0	$r->{input_data_row_count}++ if $row;
1679	0					0	return $row;
1680							}
1681							} elsif ($i == 0 && !$has_header) {
1682							# this is the first line of a file and user
1683							# specifies there is no input header. we save
1684							# the line and return the generated field names
1685							# instead.
1686	4					165	$row0 = $input_parser->getline($r->{input_fh});
1687	4	50				254	return unless $row0;
1688	4					14	return [map { "field$_" } 1..@$row0];
	12					53
1689							} elsif ($i == 1 && !$has_header) {
1690							# we return the saved first line
1691	4	50				12	$r->{input_data_row_count}++ if $row0;
1692	4					12	return $row0;
1693							}
1694	653					17028	my $res = $input_parser->getline($r->{input_fh});
1695	653	100				23844	if ($res) {
1696	537	100				1191	$r->{input_header_row_count}++ if $i==0;
1697	537	100				1121	$r->{input_data_row_count}++ if $i;
1698							}
1699	653					1883	$res;
1700	138					712	};
1701	138					363	$r->{code_getline} = $code_getline;
1702
1703	138					218	$i = 0;
1704	138					260	while ($r->{input_row} = $code_getline->()) {
1705	545					733	$i++;
1706	545					867	$r->{input_rownum} = $i;
1707	545	100				1124	$r->{input_data_rownum} = $has_header ? $i-1 : $i;
1708	545	100				983	if ($i == 1) {
1709							# gather the list of fields
1710	138					235	$r->{input_fields} = $r->{input_row};
1711	138	50	0			346	$r->{stdin_input_fields} //= $r->{input_row} if $input_filename eq '-';
1712	138					226	$r->{orig_input_fields} = $r->{input_fields};
1713	138					304	$r->{input_fields_idx} = {};
1714	138					199	for my $j (0 .. $#{ $r->{input_fields} }) {
	138					443
1715	370					893	$r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1716							}
1717
1718	138	100				281	if ($on_input_header_row) {
1719	124					436	log_trace "[csvutil] Calling on_input_header_row hook handler ...";
1720	124					545	$on_input_header_row->($r);
1721
1722	115	100				422	if (delete $r->{wants_skip_file}) {
		100
1723	3					10	log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1724	3					13	next INPUT_FILENAME;
1725							} elsif (delete $r->{wants_skip_files}) {
1726	1					8	log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1727	1					4	last READ_CSV;
1728							}
1729							}
1730
1731							# reindex the fields, should the above hook
1732							# handler adds/removes fields. let's save the
1733							# old fields_idx to orig_fields_idx.
1734	125					219	$r->{orig_input_fields_idx} = $r->{input_fields_idx};
1735	125					192	$r->{input_fields_idx} = {};
1736	125					252	for my $j (0 .. $#{ $r->{input_fields} }) {
	125					307
1737	335					802	$r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1738							}
1739
1740							} else {
1741							# fill up the elements of row to the number of
1742							# fields, in case the row contains sparse values
1743	407	50	33			928	unless (defined $r->{wants_fill_rows} && !$r->{wants_fill_rows}) {
1744	407	100				528	if (@{ $r->{input_row} } < @{ $r->{input_fields} }) {
	407					640
	407					813
1745	3					7	splice @{ $r->{input_row} }, scalar(@{ $r->{input_row} }), 0, (("") x (@{ $r->{input_fields} } - @{ $r->{input_row} }));
	3					6
	3					4
	3					7
	3					8
1746							}
1747							}
1748
1749							# generate the hashref version of row if utility
1750							# requires it
1751	407	100				804	if ($r->{wants_input_row_as_hashref}) {
1752	34					71	$r->{input_row_as_hashref} = {};
1753	34					55	for my $j (0 .. $#{ $r->{input_row} }) {
	34					84
1754							# ignore extraneous data fields
1755	93	50				122	last if $j >= @{ $r->{input_fields} };
	93					171
1756	93					231	$r->{input_row_as_hashref}{ $r->{input_fields}[$j] } = $r->{input_row}[$j];
1757							}
1758							}
1759
1760	407	50				804	if ($on_input_data_row) {
1761	407	100				975	log_trace "[csvutil] Calling on_input_data_row hook handler (for first data row) ..." if $r->{input_rownum} <= 2;
1762	407					1241	$on_input_data_row->($r);
1763
1764	402	100				1405	if (delete $r->{wants_skip_file}) {
		100
1765	2					7	log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1766	2					8	next INPUT_FILENAME;
1767							} elsif (delete $r->{wants_skip_files}) {
1768	2					8	log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1769	2					9	last READ_CSV;
1770							}
1771							}
1772							}
1773
1774							} # while getline
1775
1776							# XXX actually close filehandle except stdin
1777
1778	116	50				357	if ($after_close_input_file) {
1779	0					0	log_trace "[csvutil] Calling after_close_input_file handler ...";
1780	0					0	$after_close_input_file->($r);
1781	0	0				0	if (delete $r->{wants_skip_files}) {
1782	0					0	log_trace "[csvutil] Handler wants to skip reading all file, skipping";
1783	0					0	last READ_CSV;
1784							}
1785							}
1786							} # for input_filename
1787
1788	102	100				577	if ($after_close_input_files) {
1789	32					102	log_trace "[csvutil] Calling after_close_input_files handler ...";
1790	32					127	$after_close_input_files->($r);
1791							}
1792
1793							} # READ_CSV
1794
1795							# cleanup stash from csv-reading-related keys
1796	105					445	delete $r->{input_filenames};
1797	105					167	delete $r->{input_filenum};
1798	105					175	delete $r->{input_filename};
1799	105					152	delete $r->{input_fh};
1800	105					157	delete $r->{input_rownum};
1801	105					174	delete $r->{input_data_rownum};
1802	105					155	delete $r->{input_row};
1803	105					158	delete $r->{input_row_as_hashref};
1804	105					193	delete $r->{input_fields};
1805	105					216	delete $r->{input_fields_idx};
1806	105					174	delete $r->{orig_input_fields_idx};
1807	105					833	delete $r->{code_getline};
1808	105					158	delete $r->{wants_input_row_as_hashref};
1809
1810	105	100				222	if ($after_read_input) {
1811	15					64	log_trace "[csvutil] Calling after_read_input handler ...";
1812	15					60	$after_read_input->($r);
1813							}
1814
1815							# cleanup stash from csv-outputting-related keys
1816	105					224	delete $r->{output_num_of_files};
1817	105					149	delete $r->{output_filenum};
1818	105	100				238	if ($r->{output_fh}) {
1819	79	100				229	if ($r->{output_filename} ne '-') {
1820	2					13	log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1821	2	50				134	close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1822							}
1823	79					146	delete $r->{output_fh};
1824							}
1825	105	100				244	if ($r->{util_args}{inplace}) {
1826	2		50			15	my $output_filenum = $r->{output_filenum} // 0;
1827	2					4	my $i = -1;
1828	2					4	for my $output_filename (@{ $r->{output_filenames} }) {
	2					7
1829	2					3	$i++;
1830	2	50				7	last if $i > $output_filenum;
1831	2	50				24	(my $input_filename = $output_filename) =~ s/\.\w{5}\z//
1832							or die [500, "BUG: Can't get original input file '$output_filename'"];
1833	2	100				10	if (length(my $ext = $r->{util_args}{inplace_backup_ext})) {
1834	1					7	my $backup_filename = $input_filename . $ext;
1835	1					7	log_info "[csvutil] Backing up input file '$output_filename' -> '$backup_filename' ...";
1836	1	50				43	rename $input_filename, $backup_filename or die [500, "Can't rename '$input_filename' -> '$backup_filename': $!"];
1837							}
1838	2					14	log_info "[csvutil] Renaming from temporary output file '$output_filename' -> '$input_filename' ...";
1839	2	50				179	rename $output_filename, $input_filename or die [500, "Can't rename back '$output_filename' -> '$input_filename': $!"];
1840							}
1841							}
1842	105					164	delete $r->{output_filenames};
1843	105					153	delete $r->{output_filename};
1844	105					147	delete $r->{output_rownum};
1845	105					136	delete $r->{output_data_rownum};
1846	105					166	delete $r->{code_print};
1847	105					478	delete $r->{code_print_row};
1848	105					403	delete $r->{code_print_header_row};
1849	105					150	delete $r->{has_printed_header};
1850	105					138	delete $r->{wants_switch_to_next_output_file};
1851
1852	105	100				1654	if ($on_end) {
1853	11					36	log_trace "[csvutil] Calling on_end hook handler ...";
1854	11					49	$on_end->($r);
1855							}
1856
1857							}; # MAIN_EVAL
1858
1859	119					255	my $err = $@;
1860	119	100				240	if ($err) {
1861	14	50				45	$err = [500, $err] unless ref $err;
1862	14					49	return $err;
1863							}
1864
1865							RETURN_RESULT:
1866	105	100				302	if (!$r->{result}) {
		50
		50
1867	91					229	$r->{result} = [200];
1868							} elsif (!ref($r->{result})) {
1869	0					0	$r->{result} = [500, "BUG: Result (r->{result}) is set to a non-reference ($r->{result}), probably by one of the handlers"];
1870							} elsif (ref($r->{result}) ne 'ARRAY') {
1871	0					0	$r->{result} = [500, "BUG: Result (r->{result}) is not set to an enveloped result (arrayref) ($r->{result}), probably by one of the handlers"];
1872							}
1873	105					3490	$r->{result};
1874	25					290	};
1875							} # CREATE_CODE
1876
1877	25					51	my $meta;
1878							CREATE_META: {
1879
1880	25					545	$meta = {
1881							v => 1.1,
1882							summary => $summary,
1883							description => $description,
1884							args => {},
1885							args_rels => {},
1886							links => $links,
1887							examples => $examples,
1888							tags => $tags,
1889							};
1890
1891							CREATE_ARGS_PROP: {
1892	25	50				46	if ($add_args) {
	25					89
1893	25					167	$meta->{args}{$_} = $add_args->{$_} for keys %$add_args;
1894							}
1895
1896	25	50				76	if ($reads_csv) {
1897	25					111	$meta->{args}{$_} = {%{$argspecs_csv_input{$_}}} for keys %argspecs_csv_input;
	125					557
1898
1899	25	100				66	if ($reads_multiple_csv) {
1900	3					6	$meta->{args}{input_filenames} = {%{$argspecopt_input_filenames{input_filenames}}};
	3					23
1901	3					16	_add_arg_pos($meta->{args}, 'input_filenames', 'slurpy');
1902	3					9	push @$tags, 'reads-multiple-csv';
1903							} else {
1904	22					36	$meta->{args}{input_filename} = {%{$argspecopt_input_filename{input_filename}}};
	22					118
1905	22					77	_add_arg_pos($meta->{args}, 'input_filename');
1906							}
1907
1908	25					68	push @$tags, 'reads-csv';
1909							} # if reads_csv
1910
1911	25	100				82	if ($writes_csv) {
1912	17					66	$meta->{args}{$_} = {%{$argspecs_csv_output{$_}}} for keys %argspecs_csv_output;
	119					499
1913
1914	17	50				50	if ($reads_csv) {
1915	17					56	$meta->{args}{$_} = {%{$argspecsopt_inplace{$_}}} for keys %argspecsopt_inplace;
	34					185
1916	17		50			139	$meta->{args_rels}{'dep_all&'} //= [];
1917	17					34	push @{ $meta->{args_rels}{'dep_all&'} }, ['inplace_backup_ext', ['inplace']];
	17					64
1918	17		50			79	$meta->{args_rels}{'choose_one&'} //= [];
1919	17					29	push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filename'];
	17					50
1920	17					28	push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filenames'];
	17					45
1921							}
1922
1923	17	50				48	if ($writes_multiple_csv) {
1924	0					0	$meta->{args}{output_filenames} = {%{$argspecopt_output_filenames{output_filenames}}};
	0					0
1925	0					0	_add_arg_pos($meta->{args}, 'output_filenames', 'slurpy');
1926	0	0				0	if ($reads_csv) {
1927	0		0			0	$meta->{args_rels}{'choose_one&'} //= [];
1928	0					0	push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filenames inplace/];
	0					0
1929							}
1930	0					0	push @$tags, 'writes-multiple-csv';
1931							} else {
1932	17					29	$meta->{args}{output_filename} = {%{$argspecopt_output_filename{output_filename}}};
	17					81
1933	17					75	_add_arg_pos($meta->{args}, 'output_filename');
1934	17	50				56	if ($reads_csv) {
1935	17		50			47	$meta->{args_rels}{'choose_one&'} //= [];
1936	17					30	push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filename inplace/];
	17					88
1937							}
1938							}
1939
1940	17					23	$meta->{args}{overwrite} = {%{$argspecopt_overwrite{overwrite}}};
	17					79
1941	17		50			98	$meta->{args_rels}{'dep_any&'} //= [];
1942	17					54	push @{ $meta->{args_rels}{'dep_any&'} }, ['overwrite', ['output_filename', 'output_filenames']];
	17					67
1943
1944	17					51	push @$tags, 'writes-csv';
1945							} # if writes csv
1946
1947							} # CREATE_ARGS_PROP
1948
1949							CREATE_ARGS_RELS_PROP: {
1950	25					42	$meta->{args_rels} = {};
	25					81
1951	25	100				59	if ($add_args_rels) {
1952	3					17	$meta->{args_rels}{$_} = $add_args_rels->{$_} for keys %$add_args_rels;
1953							}
1954							} # CREATE_ARGS_RELS_PROP
1955
1956	25	50				74	if ($add_meta_props) {
1957	0					0	$meta->{$_} = $add_meta_props->{$_} for keys %$add_meta_props;
1958							}
1959
1960							} # CREATE_META
1961
1962							{
1963	25					41	my $package = caller();
	25					42
	25					96
1964	1			1		9	no strict 'refs'; ## no critic: TestingAndDebugging::ProhibitNoStrict
	1					3
	1					336
1965	25					508	*{"$package\::$name"} = $code;
	25					154
1966							#use DD; dd $meta;
1967	25					41	${"$package\::SPEC"}{$name} = $meta;
	25					150
1968							}
1969
1970	25					89	1;
1971							}
1972
1973							1;
1974							# ABSTRACT: CLI utilities related to CSV
1975
1976							__END__
1977
1978							=pod
1979
1980							=encoding UTF-8
1981
1982							=head1 NAME
1983
1984							App::CSVUtils - CLI utilities related to CSV
1985
1986							=head1 VERSION
1987
1988							This document describes version 1.030 of App::CSVUtils (from Perl distribution App-CSVUtils), released on 2023-07-25.
1989
1990							=head1 DESCRIPTION
1991
1992							This distribution contains the following CLI utilities:
1993
1994							=over
1995
1996							=item 1. L<csv-add-fields>
1997
1998							=item 2. L<csv-avg>
1999
2000							=item 3. L<csv-check-cell-values>
2001
2002							=item 4. L<csv-check-field-names>
2003
2004							=item 5. L<csv-check-field-values>
2005
2006							=item 6. L<csv-check-rows>
2007
2008							=item 7. L<csv-cmp>
2009
2010							=item 8. L<csv-concat>
2011
2012							=item 9. L<csv-convert-to-hash>
2013
2014							=item 10. L<csv-csv>
2015
2016							=item 11. L<csv-delete-fields>
2017
2018							=item 12. L<csv-dump>
2019
2020							=item 13. L<csv-each-row>
2021
2022							=item 14. L<csv-fill-cells>
2023
2024							=item 15. L<csv-fill-template>
2025
2026							=item 16. L<csv-find-values>
2027
2028							=item 17. L<csv-freqtable>
2029
2030							=item 18. L<csv-gen>
2031
2032							=item 19. L<csv-get-cells>
2033
2034							=item 20. L<csv-grep>
2035
2036							=item 21. L<csv-info>
2037
2038							=item 22. L<csv-intrange>
2039
2040							=item 23. L<csv-list-field-names>
2041
2042							=item 24. L<csv-lookup-fields>
2043
2044							=item 25. L<csv-ltrim>
2045
2046							=item 26. L<csv-map>
2047
2048							=item 27. L<csv-munge-field>
2049
2050							=item 28. L<csv-munge-rows>
2051
2052							=item 29. L<csv-pick>
2053
2054							=item 30. L<csv-pick-cells>
2055
2056							=item 31. L<csv-pick-fields>
2057
2058							=item 32. L<csv-pick-rows>
2059
2060							=item 33. L<csv-quote>
2061
2062							=item 34. L<csv-replace-newline>
2063
2064							=item 35. L<csv-rtrim>
2065
2066							=item 36. L<csv-select-fields>
2067
2068							=item 37. L<csv-select-rows>
2069
2070							=item 38. L<csv-setop>
2071
2072							=item 39. L<csv-shuf>
2073
2074							=item 40. L<csv-shuf-fields>
2075
2076							=item 41. L<csv-shuf-rows>
2077
2078							=item 42. L<csv-sort>
2079
2080							=item 43. L<csv-sort-fields>
2081
2082							=item 44. L<csv-sort-rows>
2083
2084							=item 45. L<csv-sorted>
2085
2086							=item 46. L<csv-sorted-fields>
2087
2088							=item 47. L<csv-sorted-rows>
2089
2090							=item 48. L<csv-split>
2091
2092							=item 49. L<csv-sum>
2093
2094							=item 50. L<csv-transpose>
2095
2096							=item 51. L<csv-trim>
2097
2098							=item 52. L<csv-uniq>
2099
2100							=item 53. L<csv-unquote>
2101
2102							=item 54. L<csv2ltsv>
2103
2104							=item 55. L<csv2paras>
2105
2106							=item 56. L<csv2td>
2107
2108							=item 57. L<csv2tsv>
2109
2110							=item 58. L<csv2vcf>
2111
2112							=item 59. L<list-csvutils>
2113
2114							=item 60. L<paras2csv>
2115
2116							=item 61. L<tsv2csv>
2117
2118							=back
2119
2120							=head1 FUNCTIONS
2121
2122
2123							=head2 gen_csv_util
2124
2125							Usage:
2126
2127							gen_csv_util(%args) -> bool
2128
2129							Generate a CSV utility.
2130
2131							This routine is used to generate a CSV utility in the form of a L<Rinci>
2132							function (code and metadata). You can then produce a CLI from the Rinci function
2133							simply using L<Perinci::CmdLine::Gen> or, if you use L<Dist::Zilla>,
2134							L<Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
2135							L<gen-pericmd-script>.
2136
2137							Using this routine, by providing just one or a few hooks and setting some
2138							parameters like a couple of extra arguments, you will get a complete CLI with
2139							decent POD/manpage, ability to read one or multiple CSV's and write one or
2140							multiple CSV's, some command-line options to customize how the input CSV's
2141							should be parsed and how the output CSV's should be formatted and named. Your
2142							CLI also has tab completion, usage and help message, and other features.
2143
2144							To create a CSV utility, you specify a C<name> (e.g. C<csv_dump>; must be a valid
2145							unqualified Perl identifier/function name) and optionally C<summary>,
2146							C<description>, and other metadata like C<links> or even C<add_meta_props>. Then
2147							you specify one or more of C<on_> or C<before_> or C<after_*> arguments to supply
2148							handlers (coderefs) for your CSV utility at various hook points.
2149
2150							I<THE HOOKS>
2151
2152							All code for hooks should accept a single argument C<r>. C<r> is a stash (hashref)
2153							of various data, the keys of which will depend on which hook point being called.
2154							You can also add more keys to store data or for flow control (see hook
2155							documentation below for more details).
2156
2157							The order of the hooks, in processing chronological order:
2158
2159							=over
2160
2161							=item * on_begin
2162
2163							Called when utility begins, before reading CSV. You can use this hook e.g. to
2164							process arguments, set output filenames (if you allow custom output
2165							filenames).
2166
2167							=item * before_read_input
2168
2169							Called before opening any input CSV file. This hook is I<still> called even if
2170							your utility sets C<reads_csv> to false.
2171
2172							At this point, the C<input_filenames> stash key (as well as other keys like
2173							C<input_filename>, C<input_filenum>, etc) has not been set. You can use this
2174							hook e.g. to set a custom C<input_filenames>.
2175
2176							=item * before_open_input_files
2177
2178							Called before an input CSV file is about to be opened, including for stdin
2179							(C<->). You can use this hook e.g. to check/preprocess input file. Flow control
2180							is available by setting C<< $r-E<gt>{wants_skip_files} >> to skip reading all the input
2181							file and go directly to the C<after_read_input> hook.
2182
2183							=item * before_open_input_file
2184
2185							Called before an input CSV file is about to be opened, including for stdin
2186							(C<->). For the first file, called after C<before_open_input_file> hook. You can
2187							use this hook e.g. to check/preprocess input file. Flow control is available
2188							by setting C<< $r-E<gt>{wants_skip_file} >> to skip reading a single input file and go
2189							to the next file, or C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the
2190							files and go directly to the C<after_read_input> hook.
2191
2192							=item * on_input_header_row
2193
2194							Called when receiving header row. Will be called for every input file, and
2195							called even when user specify C<--no-input-header>, in which case the header
2196							row will be the generated C<["field1", "field2", ...]>. You can use this hook
2197							e.g. to add/remove/rearrange fields.
2198
2199							You can set C<< $r-E<gt>{wants_fill_rows} >> to a defined false if you do not want
2200							C<< $r-E<gt>{input_rows} >> to be filled with empty string elements when it contains
2201							less than the number of fields (in case of sparse values at the end). Normally
2202							you only want to do this when you want to do checking, e.g. in
2203							L<csv-check-rows>.
2204
2205							=item * on_input_data_row
2206
2207							Called when receiving each data row. You can use this hook e.g. to modify the
2208							row or print output (for line-by-line transformation or filtering).
2209
2210							=item * after_close_input_file
2211
2212							Called after each input file is closed, including for stdin (C<->) (although
2213							for stdin, the handle is not actually closed). Flow control is possible by
2214							setting C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the files and go
2215							straight to the C<after_close_input_files> hook.
2216
2217							=item * after_close_input_files
2218
2219							Called after the last input file is closed, after the last
2220							C<after_close_input_file> hook, including for stdin (C<->) (although for stdin,
2221							the handle is not actually closed).
2222
2223							=item * after_read_input
2224
2225							Called after the last row of the last CSV file is read and the last file is
2226							closed. This hook is I<still> called, if you set C<reads_csv> option to false.
2227							At this point the stash keys related to CSV reading have all been cleared,
2228							including C<input_filenames>, C<input_filename>, C<input_fh>, etc.
2229
2230							You can use this hook e.g. to print output if you buffer the output.
2231
2232							=item * on_end
2233
2234							Called when utility is about to exit. You can use this hook e.g. to return the
2235							final result.
2236
2237							=back
2238
2239							I<THE STASH>
2240
2241							The common keys that C<r> will contain:
2242
2243							=over
2244
2245							=item * C<gen_args>, hash. The arguments used to generate the CSV utility.
2246
2247							=item * C<util_args>, hash. The arguments that your CSV utility accepts. Parsed from
2248							command-line arguments (or configuration files, or environment variables).
2249
2250							=item * C<name>, str. The name of the CSV utility. Which can also be retrieved via
2251							C<gen_args>.
2252
2253							=item * C<code_print>, coderef. Routine provided for you to print something. Accepts a
2254							string. Takes care of opening the output files for you.
2255
2256							=item * C<code_print_row>, coderef. Routine provided for you to print a data row. You
2257							pass the row (either arrayref or hashref). Takes care of opening the output
2258							files for you, as well as printing header row the first time, if needed.
2259
2260							=item * C<code_print_header_row>, coderef. Routine provided for you to print header
2261							row. You don't need to pass any arguments. Will only print the header row once
2262							per output file if output header is enabled, even if called multiple times.
2263
2264							=back
2265
2266							If you are accepting CSV data (C<reads_csv> gen argument set to true), the
2267							following keys will also be available (in C<on_input_header_row> and
2268							C<on_input_data_row> hooks):
2269
2270							=over
2271
2272							=item * C<input_parser>, a L<Text::CSV_XS> instance for input parsing.
2273
2274							=item * C<input_fields>, array of str. Input CSV's field names.
2275
2276							=item * C<input_fields_idx>, hash with field name as keys and field index (0-based
2277							integer) as values.
2278
2279							=item * C<input_filenames>, array of str.
2280
2281							=item * C<input_filename>, str. The name of the current input file being read (C<-> if
2282							reading from stdin).
2283
2284							=item * C<input_filenum>, uint. The number of the current input file, 1 being the first
2285							file, 2 for the second, and so on.
2286
2287							=item * C<input_fh>, the handle to the current file being read.
2288
2289							=item * C<input_rownum>, uint. The number of rows that have been read (reset after each
2290							input file). In C<on_input_header_row> phase, this will be 1 since header row
2291							(including the generated one) is the first row. Then in C<on_input_data_row>
2292							phase (called the first time for a file), it will be 2 for the first data row,
2293							even if physically it is the first row for CSV file that does not have a
2294							header.
2295
2296							=item * C<input_data_rownum>, uint. The number of data rows that have been read (reset
2297							after each input file). This will be equal to C<input_rownum> less 1 if input
2298							file has header.
2299
2300							=item * C<input_row>, aos (array of str). The current input CSV row as an arrayref.
2301
2302							=item * C<input_row_as_hashref>, hos (hash of str). The current input CSV row as a
2303							hashref, with field names as hash keys and field values as hash values. This
2304							will only be calculated if utility wants it. Utility can express so by setting
2305							C<< $r-E<gt>{wants_input_row_as_hashref} >> to true, e.g. in the C<on_begin> hook.
2306
2307							=item * C<input_header_row_count>, uint. Contains the number of actual header rows that
2308							have been read. If CLI user specifies C<--no-input-header>, this will stay at
2309							zero. Will be reset for each CSV file.
2310
2311							=item * C<input_data_row_count>, int. Contains the number of actual data rows that have
2312							read. Will be reset for each CSV file.
2313
2314							=back
2315
2316							If you are outputting CSV (C<writes_csv> gen argument set to true), the following
2317							keys will be available:
2318
2319							=over
2320
2321							=item * C<output_emitter>, a L<Text::CSV_XS> instance for output.
2322
2323							=item * C<output_fields>, array of str. Should be set to list of output field names. If
2324							unset, will be set to be the same as C<input_fields>.
2325
2326							=item * C<output_fields_idx>, hash with field names as keys and field indexes (0-based
2327							integer) as values. Normally you do not need to set this manually; you just
2328							need to set C<output_fields> and this hash will be computed automatically for
2329							you just before the first output row is outputted.
2330
2331							=item * C<output_filenames>, array of str.
2332
2333							=item * C<output_filename>, str, name of current output file.
2334
2335							=item * C<output_filenum>, uint, the number of the current output file, 1 being the
2336							first file, 2 for the second, and so on.
2337
2338							=item * C<output_fh>, handle to the current output file.
2339
2340							=item * C<output_rownum>, uint. The number of rows that have been outputted (reset
2341							after each output file).
2342
2343							=item * C<output_data_rownum>, uint. The number of data rows that have been outputted
2344							(reset after each output file). This will be equal to C<input_rownum> less 1 if
2345							input file has header.
2346
2347							=back
2348
2349							For other hook-specific keys, see the documentation for associated hook point.
2350
2351							I<ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS>
2352
2353							As mentioned above, you will get additional command-line options/arguments in
2354							C<< $r-E<gt>{util_args} >> hashref. Some options/arguments are already added by
2355							C<gen_csv_util>, e.g. C<input_filename> or C<input_filenames> along with
2356							C<input_sep_char>, etc (when your utility declares C<reads_csv>),
2357							C<output_filename> or C<output_filenames> along with C<overwrite>,
2358							C<output_sep_char>, etc (when your utility declares C<writes_csv>).
2359
2360							If you want to accept additional arguments/options, you specify them in
2361							C<add_args> (hashref, with key being Each option/argument has to be specified
2362							first via C<add_args> (as hashref, with key being argument name and value the
2363							argument specification as defined in L<Rinci::function>)). Some argument
2364							specifications have been defined in L<App::CSVUtils> and can be used. See
2365							existing utilities for examples.
2366
2367							I<READING CSV DATA>
2368
2369							To read CSV data, normally your utility would provide handler for the
2370							C<on_input_data_row> hook and sometimes additionally C<on_input_header_row>.
2371
2372							I<OUTPUTTING STRING OR RETURNING RESULT>
2373
2374							To output string, usually you call the provided routine C<< $r-E<gt>{code_print} >>. This
2375							routine will open the output files for you.
2376
2377							You can also return enveloped result directly by setting C<< $r-E<gt>{result} >>.
2378
2379							I<OUTPUTTING CSV DATA>
2380
2381							To output CSV data, usually you call the provided routine C<< $r-E<gt>{code_print_row} >>.
2382							This routine accepts a row (arrayref or hashref). This routine will open the
2383							output files for you when needed, as well as print header row automatically.
2384
2385							You can also buffer rows from input to e.g. C<< $r-E<gt>{output_rows} >>, then call
2386							C<< $r-E<gt>{code_print_row} >> repeatedly in the C<after_read_input> hook to print all the
2387							rows.
2388
2389							I<READING MULTIPLE CSV FILES>
2390
2391							To read multiple CSV files, you first specify C<reads_multiple_csv>. Then, you
2392							can supply handler for C<on_input_header_row> and C<on_input_data_row> as usual.
2393							If you want to do something before/after each input file, you can also supply
2394							handler for C<before_open_input_file> or C<after_close_input_file>.
2395
2396							I<WRITING TO MULTIPLE CSV FILES>
2397
2398							Similarly, to write to many CSv files, you first specify C<writes_multiple_csv>.
2399							Then, you can supply handler for C<on_input_header_row> and C<on_input_data_row>
2400							as usual. To switch to the next file, set
2401							C<< $r-E<gt>{wants_switch_to_next_output_file} >> to true, in which case the next call to
2402							C<< $r-E<gt>{code_print_row} >> will close the current file and open the next file.
2403
2404							I<CHANGING THE OUTPUT FIELDS>
2405
2406							When calling C<< $r-E<gt>{code_print_row} >>, you can output whatever fields you want. By
2407							convention, you can set C<< $r-E<gt>{output_fields} >> and C<< $r-E<gt>{output_fields_idx} >> to
2408							let other handlers know about the output fields. For example, see the
2409							implementation of L<csv-concat>.
2410
2411							This function is not exported by default, but exportable.
2412
2413							Arguments ('*' denotes required arguments):
2414
2415							=over 4
2416
2417							=item * B<add_args> => I<hash>
2418
2419							(No description)
2420
2421							=item * B<add_args_rels> => I<hash>
2422
2423							(No description)
2424
2425							=item * B<add_meta_props> => I<hash>
2426
2427							Add additional Rinci function metadata properties.
2428
2429							=item * B<after_close_input_file> => I<code>
2430
2431							(No description)
2432
2433							=item * B<after_close_input_files> => I<code>
2434
2435							(No description)
2436
2437							=item * B<after_read_input> => I<code>
2438
2439							(No description)
2440
2441							=item * B<before_open_input_file> => I<code>
2442
2443							(No description)
2444
2445							=item * B<before_open_input_files> => I<code>
2446
2447							(No description)
2448
2449							=item * B<before_read_input> => I<code>
2450
2451							(No description)
2452
2453							=item * B<description> => I<str>
2454
2455							(No description)
2456
2457							=item * B<examples> => I<array>
2458
2459							(No description)
2460
2461							=item * B<links> => I<array[hash]>
2462
2463							(No description)
2464
2465							=item * B<name>* => I<perl::identifier::unqualified_ascii>
2466
2467							(No description)
2468
2469							=item * B<on_begin> => I<code>
2470
2471							(No description)
2472
2473							=item * B<on_end> => I<code>
2474
2475							(No description)
2476
2477							=item * B<on_input_data_row> => I<code>
2478
2479							(No description)
2480
2481							=item * B<on_input_header_row> => I<code>
2482
2483							(No description)
2484
2485							=item * B<reads_csv> => I<bool> (default: 1)
2486
2487							Whether utility reads CSV data.
2488
2489							=item * B<reads_multiple_csv> => I<bool>
2490
2491							Whether utility accepts CSV data.
2492
2493							Setting this option to true will implicitly set the C<reads_csv> option to true,
2494							obviously.
2495
2496							=item * B<summary> => I<str>
2497
2498							(No description)
2499
2500							=item * B<writes_csv> => I<bool> (default: 1)
2501
2502							Whether utility writes CSV data.
2503
2504							=item * B<writes_multiple_csv> => I<bool>
2505
2506							Whether utility outputs CSV data.
2507
2508							Setting this option to true will implicitly set the C<writes_csv> option to true,
2509							obviously.
2510
2511
2512							=back
2513
2514							Return value: (bool)
2515
2516
2517							=head2 compile_eval_code
2518
2519							Usage:
2520
2521							$coderef = compile_eval_code($str, $label);
2522
2523							Compile string code C<$str> to coderef in 'main' package, without C<use strict>
2524							or C<use warnings>. Die on compile error.
2525
2526							=head2 eval_code
2527
2528							Usage:
2529
2530							$res = eval_code($coderef, $r, $topic_var_value, $return_topic_var);
2531
2532							=for Pod::Coverage ^(csvutil)$
2533
2534							=head1 FAQ
2535
2536							=head2 My CSV does not have a header?
2537
2538							Use the C<--no-header> option. Fields will be named C<field1>, C<field2>, and so
2539							on.
2540
2541							=head2 My data is TSV, not CSV?
2542
2543							Use the C<--tsv> option.
2544
2545							=head2 I have a big CSV and the utilities are too slow or eat too much RAM!
2546
2547							These utilities are not (yet) optimized, patches welcome. If your CSV is very
2548							big, perhaps a C-based solution is what you need.
2549
2550							=head1 HOMEPAGE
2551
2552							Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
2553
2554							=head1 SOURCE
2555
2556							Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
2557
2558							=head1 SEE ALSO
2559
2560							=head2 Similar CLI bundles for other format
2561
2562							L<App::TSVUtils>, L<App::LTSVUtils>, L<App::SerializeUtils>.
2563
2564							=head2 Other CSV-related utilities
2565
2566							L<xls2csv> and L<xlsx2csv> from L<Spreadsheet::Read>
2567
2568							L<import-csv-to-sqlite> from L<App::SQLiteUtils>
2569
2570							Query CSV with SQL using L<fsql> from L<App::fsql>
2571
2572							L<csvgrep> from L<csvgrep>
2573
2574							=head2 Other non-Perl-based CSV utilities
2575
2576							=head3 Python
2577
2578							B<csvkit>, L<https://csvkit.readthedocs.io/en/latest/>
2579
2580							=head1 AUTHOR
2581
2582							perlancar <perlancar@cpan.org>
2583
2584							=head1 CONTRIBUTOR
2585
2586							=for stopwords Adam Hopkins
2587
2588							Adam Hopkins <violapiratejunky@gmail.com>
2589
2590							=head1 CONTRIBUTING
2591
2592
2593							To contribute, you can send patches by email/via RT, or send pull requests on
2594							GitHub.
2595
2596							Most of the time, you don't need to build the distribution yourself. You can
2597							simply modify the code, then test via:
2598
2599							% prove -l
2600
2601							If you want to build the distribution (e.g. to try to install it locally on your
2602							system), you can install L<Dist::Zilla>,
2603							L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
2604							L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
2605							Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
2606							that are considered a bug and can be reported to me.
2607
2608							=head1 COPYRIGHT AND LICENSE
2609
2610							This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
2611
2612							This is free software; you can redistribute it and/or modify it under
2613							the same terms as the Perl 5 programming language system itself.
2614
2615							=head1 BUGS
2616
2617							Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
2618
2619							When submitting a bug or request, please include a test-file or a
2620							patch to an existing test-file that illustrates the bug or desired
2621							feature.
2622
2623							=cut