File Coverage

blib/lib/App/CSVUtils.pm

Criterion	Covered	Total	%
statement	727	868	83.7
branch	182	322	56.5
condition	63	122	51.6
subroutine	111	122	90.9
pod	3	3	100.0
total	1086	1437	75.5

line	stmt	bran	cond	sub	pod	time	code
1							package App::CSVUtils;
2
3	1			1		118959	use 5.010001;
	1					14
4	1			1		6	use strict;
	1					2
	1					29
5	1			1		5	use warnings;
	1					4
	1					23
6	1			1		2302	use Log::ger;
	1					53
	1					5
7
8	1			1		256	use Cwd;
	1					2
	1					70
9	1			1		5	use Exporter qw(import);
	1					2
	1					673
10
11							our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
12							our $DATE = '2023-03-31'; # DATE
13							our $DIST = 'App-CSVUtils'; # DIST
14							our $VERSION = '1.023'; # VERSION
15
16							our @EXPORT_OK = qw(
17							gen_csv_util
18							compile_eval_code
19							eval_code
20							);
21
22							our %SPEC;
23
24							our $sch_req_str_or_code = ['any', of=>['str', 'code*']];
25
26							sub _open_file_read {
27	129			129		201	my $filename = shift;
28
29	129					187	my ($fh, $err);
30	129	50				257	if ($filename eq '-') {
31	0					0	$fh = *STDIN;
32							} else {
33	129	50				4922	open $fh, "<", $filename or do {
34	0					0	$err = [500, "Can't open input filename '$filename': $!"];
35	0					0	goto RETURN;
36							};
37							}
38	129					1658	binmode $fh, ":encoding(utf8)";
39
40	129					5192	RETURN:
41							($fh, $err);
42							}
43
44							sub _open_file_write {
45	2			2		4	my $filename = shift;
46
47	2					4	my ($fh, $err);
48	2	50				8	if ($filename eq '-') {
49	0					0	$fh = *STDOUT;
50							} else {
51	2	50				127	open $fh, ">", $filename or do {
52	0					0	$err = [500, "Can't open output filename '$filename': $!"];
53	0					0	goto RETURN;
54							};
55							}
56	2					41	binmode $fh, ":encoding(utf8)";
57
58	2					83	RETURN:
59							($fh, $err);
60							}
61
62							sub _return_or_write_file {
63	0			0		0	my ($res, $filename, $overwrite) = @_;
64	0	0				0	return $res if !defined($filename);
65
66	0					0	my $fh;
67	0	0				0	if ($filename eq '-') {
68	0					0	$fh = \*STDOUT;
69							} else {
70	0	0				0	if (-f $filename) {
71	0	0				0	if ($overwrite) {
72	0					0	log_info "[csvutil] Overwriting output file $filename";
73							} else {
74	0					0	return [412, "Refusing to ovewrite existing output file '$filename', please select another path or specify --overwrite"];
75							}
76							}
77	0	0				0	open my $fh, ">", $filename or do {
78	0					0	return [500, "Can't open output file '$filename': $!"];
79							};
80	0					0	binmode $fh, ":encoding(utf8)";
81	0					0	print $fh $res->[2];
82	0	0				0	close $fh or warn "Can't write to '$filename': $!";
83	0					0	return [$res->[0], $res->[1]];
84							}
85							}
86
87							sub compile_eval_code {
88	44	100		44	1	114	return $_[0] if ref $_[0] eq 'CODE';
89	43					93	my ($str, $label) = @_;
90	43	50	33			183	defined($str) && length($str) or die [400, "Please specify code ($label)"];
91	43					99	$str = "package main; no strict; no warnings; sub { $str }";
92	43					136	log_trace "[csvutil] Compiling Perl code: $str";
93	1			1		8	my $code = eval $str; ## no critic: BuiltinFunctions::ProhibitStringyEval
	1			1		2
	1			1		39
	1			1		6
	1			1		10
	1			1		73
	1			1		8
	1			1		2
	1			1		40
	1			1		7
	1			1		2
	1			1		43
	1			1		6
	1			1		3
	1			1		53
	1			1		7
	1			1		2
	1			1		56
	1			1		7
	1			1		2
	1			1		35
	1			1		11
	1			1		3
	1			1		52
	1			1		9
	1			1		2
	1			1		23
	1			1		5
	1			1		14
	1			1		60
	1			1		7
	1			1		12
	1			1		42
	1			1		6
	1			1		2
	1			1		72
	1			1		7
	1			1		5
	1			1		24
	1			1		5
	1			1		2
	1			1		50
	1			1		7
	1			1		2
	1			1		22
	1			1		5
	1			1		11
	1			1		51
	1			1		8
	1			1		3
	1			1		35
	1			1		7
	1			1		2
	1			1		39
	1			1		8
	1			1		2
	1			1		29
	1			1		6
	1			1		26
	1			1		53
	1			1		6
	1			1		12
	1			1		79
	1			1		8
	1			1		1
	1			1		66
	1			1		7
	1			1		2
	1			1		24
	1			1		5
	1			1		1
	1			1		43
	1			1		6
	1			1		2
	1			1		23
	1			1		4
	1			1		2
	1			1		47
	1			1		7
	1			1		3
	1			1		33
	1			1		4
	1			1		2
	1			1		82
	1			1		8
	1			1		112
	1					35
	1					7
	1					2
	1					132
	1					8
	1					1
	1					26
	1					5
	1					2
	1					60
	1					6
	1					3
	1					25
	1					5
	1					2
	1					73
	1					11
	1					2
	1					39
	1					6
	1					3
	1					71
	1					7
	1					1
	1					35
	1					6
	1					2
	1					58
	1					8
	1					3
	1					45
	1					6
	1					3
	1					85
	1					6
	1					2
	1					24
	1					4
	1					2
	1					63
	1					7
	1					2
	1					34
	1					6
	1					2
	1					51
	1					21
	1					3
	1					26
	1					5
	1					2
	1					59
	1					7
	1					2
	1					31
	1					6
	1					2
	1					49
	1					20
	1					2
	1					35
	1					6
	1					2
	1					65
	1					18
	1					2
	1					19
	1					5
	1					2
	1					46
	1					9
	1					2
	1					27
	1					5
	1					2
	1					59
	1					12
	1					3
	1					27
	1					5
	1					6
	1					71
	1					7
	1					2
	1					23
	1					5
	1					2
	1					51
	1					6
	1					3
	1					25
	1					5
	1					2
	1					49
	1					9
	1					23
	1					39
	1					7
	1					2
	1					42
	1					7
	1					4
	1					34
	1					5
	1					2
	1					53
	1					6
	1					9
	1					24
	1					6
	1					11
	1					56
	1					9
	1					3
	1					32
	1					16
	1					2
	1					59
	1					7
	1					3
	1					28
	1					6
	1					3
	1					67
	1					7
	1					2
	1					52
	1					10
	1					2
	1					57
	1					7
	1					9
	1					23
	1					5
	1					2
	1					61
	1					7
	1					2
	1					35
	1					6
	1					2
	1					56
	1					7
	1					9
	1					24
	1					5
	1					2
	1					82
	1					7
	1					4
	1					25
	1					6
	1					2
	1					84
	1					7
	1					2
	1					39
	1					6
	1					2
	1					77
	1					9
	1					2
	1					34
	1					6
	1					3
	1					83
	1					8
	1					3
	1					41
	1					6
	1					2
	1					78
	43					4626
94	43	100				184	die [400, "Can't compile code ($label) '$str': $@"] if $@;
95	40					154	$code;
96							}
97
98							sub eval_code {
99	1			1		8	no warnings 'once';
	1					2
	1					7792
100	60			60	1	144	my ($code, $r, $value_for_topic, $return_topic) = @_;
101	60					103	local $_ = $value_for_topic;
102	60					95	local $main::r = $r;
103	60					95	local $main::row = $r->{input_row};
104	60					80	local $main::rownum = $r->{input_rownum};
105	60					78	local $main::data_rownum = $r->{input_data_rownum};
106	60					82	local $main::csv = $r->{input_parser};
107	60					84	local $main::fields_idx = $r->{input_fields_idx};
108	60	100				107	if ($return_topic) {
109	9					184	$code->($_);
110	9					32	$_;
111							} else {
112	51					1021	$code->($_);
113							}
114							}
115
116							sub _get_field_idx {
117	0			0		0	my ($field, $field_idxs) = @_;
118	0	0	0			0	defined($field) && length($field) or die "Please specify at least a field\n";
119	0					0	my $idx = $field_idxs->{$field};
120							die "Unknown field '$field' (known fields include: ".
121	0	0				0	join(", ", map { "'$_'" } sort {$field_idxs->{$a} <=> $field_idxs->{$b}}
	0					0
	0					0
122							keys %$field_idxs).")\n" unless defined $idx;
123	0					0	$idx;
124							}
125
126							sub _get_csv_row {
127	0			0		0	my ($csv, $row, $i, $outputs_header) = @_;
128							#use DD; print " "; dd $row;
129	0	0	0			0	return "" if $i == 1 && !$outputs_header;
130	0	0				0	my $status = $csv->combine(@$row)
131							or die "Error in line $i: ".$csv->error_input."\n";
132	0					0	$csv->string . "\n";
133							}
134
135							sub _instantiate_parser_default {
136	0			0		0	require Text::CSV_XS;
137
138	0					0	Text::CSV_XS->new({binary=>1});
139							}
140
141							sub _instantiate_parser {
142	195			195		1990	require Text::CSV_XS;
143
144	195					12910	my ($args, $prefix) = @_;
145	195		50			396	$prefix //= '';
146
147	195					416	my %tcsv_opts = (binary=>1);
148	195	100	66			1479	if (defined $args->{"${prefix}sep_char"} \|\|
		100	66
149							defined $args->{"${prefix}quote_char"} \|\|
150							defined $args->{"${prefix}escape_char"}) {
151	1	50				11	$tcsv_opts{"sep_char"} = $args->{"${prefix}sep_char"} if defined $args->{"${prefix}sep_char"};
152	1	50				6	$tcsv_opts{"quote_char"} = $args->{"${prefix}quote_char"} if defined $args->{"${prefix}quote_char"};
153	1	50				4	$tcsv_opts{"escape_char"} = $args->{"${prefix}escape_char"} if defined $args->{"${prefix}escape_char"};
154							} elsif ($args->{"${prefix}tsv"}) {
155	1					4	$tcsv_opts{"sep_char"} = "\t";
156	1					2	$tcsv_opts{"quote_char"} = undef;
157	1					4	$tcsv_opts{"escape_char"} = undef;
158							}
159	195	100				438	$tcsv_opts{always_quote} = 1 if $args->{"${prefix}always_quote"};
160	195	50				396	$tcsv_opts{quote_empty} = 1 if $args->{"${prefix}quote_empty"};
161
162	195					790	Text::CSV_XS->new(\%tcsv_opts);
163							}
164
165							sub _instantiate_emitter {
166	85			85		135	my $args = shift;
167	85					176	_instantiate_parser($args, 'output_');
168							}
169
170							sub _complete_field_or_field_list {
171							# return list of known fields of a CSV
172
173	0			0		0	my $which = shift;
174
175	0					0	my %args = @_;
176	0		0			0	my $word = $args{word} // '';
177	0					0	my $cmdline = $args{cmdline};
178	0					0	my $r = $args{r};
179
180							# we are not called from cmdline, bail
181	0	0				0	return undef unless $cmdline; ## no critic: Subroutines::ProhibitExplicitReturnUndef
182
183							# let's parse argv first
184	0					0	my $args;
185							{
186							# this is not activated yet
187	0					0	$r->{read_config} = 1;
	0					0
188
189	0					0	my $res = $cmdline->parse_argv($r);
190							#return undef unless $res->[0] == 200;
191
192	0	0				0	$cmdline->_read_config($r) unless $r->{config};
193	0					0	$args = $res->[2];
194							}
195
196							# user hasn't specified -f, bail
197	0	0	0			0	return {message=>"Please specify input filename first"} unless defined $args && $args->{input_filename};
198
199							# user wants to read CSV from stdin, bail
200	0	0				0	return {message=>"Can't get field list when input is stdin"} if $args->{input_filename} eq '-';
201
202							# can the file be opened?
203	0					0	my $csv_parser = _instantiate_parser(\%args, 'input_');
204	0	0				0	open my($fh), "<encoding(utf8)", $args->{input_filename} or do {
205							#warn "csvutils: Cannot open file '$args->{input_filename}': $!\n";
206	0					0	return [];
207							};
208
209							# can the header row be read?
210	0	0				0	my $row = $csv_parser->getline($fh) or return [];
211
212	0	0	0			0	if (defined $args->{input_header} && !$args->{input_header}) {
213	0					0	$row = [map {"field$_"} 1 .. @$row];
	0					0
214							}
215
216	0	0				0	if ($which =~ /sort/) {
217	0					0	$row = [map {($_,"-$_","+$_","~$_")} @$row];
	0					0
218							}
219
220	0					0	require Complete::Util;
221	0	0				0	if ($which =~ /field_list/) {
222	0					0	return Complete::Util::complete_comma_sep(
223							word => $word,
224							elems => $row,
225							uniq => 1,
226							);
227							} else {
228	0					0	return Complete::Util::complete_array_elem(
229							word => $word,
230							array => $row,
231							);
232							}
233							}
234
235							sub _complete_field {
236	0			0		0	_complete_field_or_field_list('field', @_);
237							}
238
239							sub _complete_field_list {
240	0			0		0	_complete_field_or_field_list('field_list', @_);
241							}
242
243							sub _complete_sort_field_list {
244	0			0		0	_complete_field_or_field_list('sort_field_list', @_);
245							}
246
247							sub _complete_sort_field {
248	0			0		0	_complete_field_or_field_list('sort_field', @_);
249							}
250
251							sub _array2hash {
252	0			0		0	my ($row, $fields) = @_;
253	0					0	my $rowhash = {};
254	0					0	for my $i (0..$#{$fields}) {
	0					0
255	0					0	$rowhash->{ $fields->[$i] } = $row->[$i];
256							}
257	0					0	$rowhash;
258							}
259
260							# check that the first N values of a field are all defined and numeric. if there
261							# are now rows or less than N values, return true.
262							sub _is_numeric_field {
263	0			0		0	require Scalar::Util::Numeric;
264
265	0					0	my ($rows, $field_idx, $num_samples) = @_;
266	0		0			0	$num_samples //= 5;
267
268	0					0	my $is_numeric = 1;
269	0					0	for my $row (@$rows) {
270	0					0	my $val = $row->[$field_idx];
271	0	0				0	return 0 unless defined $val;
272	0	0				0	return 0 unless Scalar::Util::Numeric::isnum($val);
273							}
274	0					0	$is_numeric;
275							}
276
277							# find a single field by name or index (1-based), return index (0-based). die
278							# when requested field does not exist.
279							sub _find_field {
280	17			17		40	my ($fields, $name_or_idx) = @_;
281
282							# search by name first
283	17					23	for my $i (0 .. $#{$fields}) {
	17					49
284	29					47	my $field = $fields->[$i];
285	29	100				100	return $i if $field eq $name_or_idx;
286							}
287
288	2	50				19	if ($name_or_idx eq '0') {
		100
		50
289	0					0	die [400, "Field index 0 is requested, you probably meant 1 for the first field?"];
290							} elsif ($name_or_idx =~ /\A[1-9][0-9]*\z/) {
291	1	50				5	if ($name_or_idx > @$fields) {
292	0					0	die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
293							} else {
294	1					4	return $name_or_idx-1;
295							}
296							} elsif ($name_or_idx =~ /\A-[1-9][0-9]*\z/) {
297	0	0				0	if (-$name_or_idx > @$fields) {
298	0					0	die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"];
299							} else {
300	0					0	return @$fields + $name_or_idx;
301							}
302							}
303
304							# not found
305							die [404, "Unknown field name/index '$name_or_idx' (known fields include: ".
306	1					7	join(", ", map { "'$_'" } @$fields).")"];
	3					20
307							}
308
309							# select one or more fields with options like --include-field, etc
310							sub _select_fields {
311	12			12		28	my ($fields, $field_idxs, $args, $default_select_choice) = @_;
312
313	12					21	my @selected_fields;
314
315							my $select_field_options_used;
316
317	12	100				28	if (defined $args->{include_field_pat}) {
318	4					9	$select_field_options_used++;
319	4					11	for my $field (@$fields) {
320	10	50				54	if ($field =~ $args->{include_field_pat}) {
321	10					24	push @selected_fields, $field;
322							}
323							}
324							}
325	12	100				30	if (defined $args->{exclude_field_pat}) {
326	1					3	$select_field_options_used++;
327	1					12	@selected_fields = grep { $_ !~ $args->{exclude_field_pat} }
	3					14
328							@selected_fields;
329							}
330	12	100				28	if (defined $args->{include_fields}) {
331	8					13	$select_field_options_used++;
332							FIELD:
333	8					26	for my $field (@{ $args->{include_fields} }) {
	8					24
334	13	100				34	unless (defined $field_idxs->{$field}) {
335	4	100				25	return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
336	2					8	next FIELD;
337							}
338	9	50				41	next if grep { $field eq $_ } @selected_fields;
	3					15
339	9					23	push @selected_fields, $field;
340							}
341							}
342	10	100				26	if (defined $args->{exclude_fields}) {
343	2					4	$select_field_options_used++;
344							FIELD:
345	2					3	for my $field (@{ $args->{exclude_fields} }) {
	2					6
346	2	50				7	unless (defined $field_idxs->{$field}) {
347	0	0				0	return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields};
348	0					0	next FIELD;
349							}
350	2					5	@selected_fields = grep { $field ne $_ } @selected_fields;
	6					15
351							}
352							}
353
354	10	50	33			24	if (!$select_field_options_used && $default_select_choice) {
355	0	0				0	if ($default_select_choice eq 'all') {
		0
		0
		0
356	0					0	@selected_fields = @$fields;
357							} elsif ($default_select_choice eq 'first') {
358	0	0				0	@selected_fields = ($fields->[0]) if @$fields;
359							} elsif ($default_select_choice eq 'last') {
360	0	0				0	@selected_fields = ($fields->[-1]) if @$fields;
361							} elsif ($default_select_choice eq 'first-if-only-field') {
362	0	0				0	@selected_fields = ($fields->[0]) if @$fields == 1;
363							}
364							}
365
366	10	100				23	if ($args->{show_selected_fields}) {
367	1					17	return [200, "OK", \@selected_fields];
368							}
369
370							#my %selected_field_idxs;
371							#$selected_field_idxs{$_} = $fields_idx->{$_} for @selected_fields;
372
373	9					12	my @selected_field_idxs_array;
374	9					30	push @selected_field_idxs_array, $field_idxs->{$_} for @selected_fields;
375
376	9					38	[100, "Continue", [\@selected_fields, \@selected_field_idxs_array]];
377							}
378
379							our $xcomp_csvfiles = [filename => {file_ext_filter => qr/^[tc]sv$/i}];
380
381							our %argspecs_csv_input = (
382							input_header => {
383							summary => 'Specify whether input CSV has a header row',
384							'summary.alt.bool.not' => 'Specify that input CSV does not have a header row',
385							schema => 'bool*',
386							default => 1,
387							description => <<'_',
388
389							By default, the first row of the input CSV will be assumed to contain field
390							names (and the second row contains the first data row). When you declare that
391							input CSV does not have header row (`--no-input-header`), the first row of the
392							CSV is assumed to contain the first data row. Fields will be named `field1`,
393							`field2`, and so on.
394
395							_
396							cmdline_aliases => {
397							},
398							tags => ['category:input'],
399							},
400							input_tsv => {
401							summary => "Inform that input file is in TSV (tab-separated) format instead of CSV",
402							schema => 'true*',
403							description => <<'_',
404
405							Overriden by `--input-sep-char`, `--input-quote-char`, `--input-escape-char`
406							options. If one of those options is specified, then `--input-tsv` will be
407							ignored.
408
409							_
410							tags => ['category:input'],
411							},
412							input_sep_char => {
413							summary => 'Specify field separator character in input CSV, will be passed to Text::CSV_XS',
414							schema => ['str*', len=>1],
415							description => <<'_',
416
417							Defaults to `,` (comma). Overrides `--input-tsv` option.
418
419							_
420							tags => ['category:input'],
421							},
422							input_quote_char => {
423							summary => 'Specify field quote character in input CSV, will be passed to Text::CSV_XS',
424							schema => ['str*', len=>1],
425							description => <<'_',
426
427							Defaults to `"` (double quote). Overrides `--input-tsv` option.
428
429							_
430							tags => ['category:input'],
431							},
432							input_escape_char => {
433							summary => 'Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS',
434							schema => ['str*', len=>1],
435							description => <<'_',
436
437							Defaults to `\\` (backslash). Overrides `--input-tsv` option.
438
439							_
440							tags => ['category:input'],
441							},
442							);
443
444							our %argspecs_csv_output = (
445							output_header => {
446							summary => 'Whether output CSV should have a header row',
447							schema => 'bool*',
448							description => <<'_',
449
450							By default, a header row will be output if input CSV has header row. Under
451							`--output-header`, a header row will be output even if input CSV does not have
452							header row (value will be something like "col0,col1,..."). Under
453							`--no-output-header`, header row will not be printed even if input CSV has
454							header row. So this option can be used to unconditionally add or remove header
455							row.
456
457							_
458							tags => ['category:output'],
459							},
460							output_tsv => {
461							summary => "Inform that output file is TSV (tab-separated) format instead of CSV",
462							schema => 'bool*',
463							description => <<'_',
464
465							This is like `--input-tsv` option but for output instead of input.
466
467							Overriden by `--output-sep-char`, `--output-quote-char`, `--output-escape-char`
468							options. If one of those options is specified, then `--output-tsv` will be
469							ignored.
470
471							_
472							tags => ['category:output'],
473							},
474							output_sep_char => {
475							summary => 'Specify field separator character in output CSV, will be passed to Text::CSV_XS',
476							schema => ['str*', len=>1],
477							description => <<'_',
478
479							This is like `--input-sep-char` option but for output instead of input.
480
481							Defaults to `,` (comma). Overrides `--output-tsv` option.
482
483							_
484							tags => ['category:output'],
485							},
486							output_quote_char => {
487							summary => 'Specify field quote character in output CSV, will be passed to Text::CSV_XS',
488							schema => ['str*', len=>1],
489							description => <<'_',
490
491							This is like `--input-quote-char` option but for output instead of input.
492
493							Defaults to `"` (double quote). Overrides `--output-tsv` option.
494
495							_
496							tags => ['category:output'],
497							},
498							output_escape_char => {
499							summary => 'Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS',
500							schema => ['str*', len=>1],
501							description => <<'_',
502
503							This is like `--input-escape-char` option but for output instead of input.
504
505							Defaults to `\\` (backslash). Overrides `--output-tsv` option.
506
507							_
508							tags => ['category:output'],
509							},
510							output_always_quote => {
511							summary => 'Whether to always quote values',
512							schema => 'bool*',
513							default => 0,
514							description => <<'_',
515
516							When set to false (the default), values are quoted only when necessary:
517
518							field1,field2,"field three contains comma (,)",field4
519
520							When set to true, then all values will be quoted:
521
522							"field1","field2","field three contains comma (,)","field4"
523
524							_
525							tags => ['category:output'],
526							},
527							output_quote_empty => {
528							summary => 'Whether to quote empty values',
529							schema => 'bool*',
530							default => 0,
531							description => <<'_',
532
533							When set to false (the default), empty values are not quoted:
534
535							field1,field2,,field4
536
537							When set to true, then empty values will be quoted:
538
539							field1,field2,"",field4
540
541							_
542							tags => ['category:output'],
543							},
544							);
545
546							our %argspecopt_input_filename = (
547							input_filename => {
548							summary => 'Input CSV file',
549							description => <<'_',
550
551							Use `-` to read from stdin.
552
553							Encoding of input file is assumed to be UTF-8.
554
555							_
556							schema => 'filename*',
557							default => '-',
558							'x.completion' => $xcomp_csvfiles,
559							tags => ['category:input'],
560							},
561							);
562
563							our %argspecopt_input_filenames = (
564							input_filenames => {
565							'x.name.is_plural' => 1,
566							'x.name.singular' => 'input_filename',
567							summary => 'Input CSV files',
568							description => <<'_',
569
570							Use `-` to read from stdin.
571
572							Encoding of input file is assumed to be UTF-8.
573
574							_
575							schema => ['array', of=>'filename'],
576							default => ['-'],
577							'x.completion' => $xcomp_csvfiles,
578							tags => ['category:input'],
579							},
580							);
581
582							our %argspecopt_overwrite = (
583							overwrite => {
584							summary => 'Whether to override existing output file',
585							schema => 'bool*',
586							cmdline_aliases=>{O=>{}},
587							tags => ['category:output'],
588							},
589							);
590
591							our %argspecsopt_inplace = (
592							inplace => {
593							summary => 'Output to the same file as input',
594							schema => 'true*',
595							description => <<'_',
596
597							Normally, you output to a different file than input. If you try to output to the
598							same file (`-o INPUT.csv -O`) you will clobber the input file; thus the utility
599							prevents you from doing it. However, with this `--inplace` option, you can
600							output to the same file. Like perl's `-i` option, this will first output to a
601							temporary file in the same directory as the input file then rename to the final
602							file at the end. You cannot specify output file (`-o`) when using this option,
603							but you can specify backup extension with `-b` option.
604
605							Some caveats:
606
607							- if input file is a symbolic link, it will be replaced with a regular file;
608							- renaming (implemented using `rename()`) can fail if input filename is too long;
609							- value specified in `-b` is currently not checked for acceptable characters;
610							- things can also fail if permissions are restrictive;
611
612							_
613							tags => ['category:output'],
614							},
615							inplace_backup_ext => {
616							summary => 'Extension to add for backup of input file',
617							schema => 'str*',
618							default => '',
619							description => <<'_',
620
621							In inplace mode (`--inplace`), if this option is set to a non-empty string, will
622							rename the input file using this extension as a backup. The old existing backup
623							will be overwritten, if any.
624
625							_
626							cmdline_aliases => {b=>{}},
627							tags => ['category:output'],
628							},
629							);
630
631							our %argspecopt_output_filename = (
632							output_filename => {
633							summary => 'Output filename',
634							description => <<'_',
635
636							Use `-` to output to stdout (the default if you don't specify this option).
637
638							Encoding of output file is assumed to be UTF-8.
639
640							_
641							schema => 'filename*',
642							cmdline_aliases=>{o=>{}},
643							tags => ['category:output'],
644							},
645							);
646
647							our %argspecopt_output_filenames = (
648							output_filenames => {
649							summary => 'Output filenames',
650							description => <<'_',
651
652							Use `-` to output to stdout (the default if you don't specify this option).
653
654							Encoding of output file is assumed to be UTF-8.
655
656							_
657							schema => ['array', of=>'filename'],
658							cmdline_aliases=>{o=>{}},
659							tags => ['category:output'],
660							},
661							);
662
663							our %argspecopt_field = (
664							field => {
665							summary => 'Field name',
666							schema => 'str*',
667							cmdline_aliases => { f=>{} },
668							completion => \&_complete_field,
669							},
670							);
671
672							our %argspecopt_field_1 = (
673							field => {
674							summary => 'Field name',
675							schema => 'str*',
676							pos => 1,
677							cmdline_aliases => { f=>{} },
678							completion => \&_complete_field,
679							},
680							);
681
682							our %argspec_field_1 = (
683							field => {
684							summary => 'Field name',
685							schema => 'str*',
686							cmdline_aliases => { f=>{} },
687							req => 1,
688							pos => 1,
689							completion => \&_complete_field,
690							},
691							);
692
693							our %argspec_fields_1plus = (
694							fields => {
695							'x.name.is_plural' => 1,
696							'x.name.singular' => 'field',
697							summary => 'Field names',
698							schema => ['array', of=>['str', min_len=>1], min_len=>1],
699							req => 1,
700							pos => 1,
701							slurpy => 1,
702							cmdline_aliases => {f=>{}},
703							element_completion => \&_complete_field,
704							},
705							);
706
707							# without completion, for adding new field
708							our %argspec_field_1_nocomp = (
709							field => {
710							summary => 'Field name',
711							schema => 'str*',
712							cmdline_aliases => { f=>{} },
713							req => 1,
714							pos => 1,
715							},
716							);
717
718							# without completion, for adding new fields
719							our %argspec_fields_1plus_nocomp = (
720							fields => {
721							'x.name.is_plural' => 1,
722							'x.name.singular' => 'field',
723							summary => 'Field names',
724							'summary.alt.plurality.singular' => 'Field name',
725							schema => ['array', of=>['str', min_len=>1], min_len=>1],
726							cmdline_aliases => { f=>{} },
727							req => 1,
728							pos => 1,
729							slurpy => 1,
730							},
731							);
732
733							our %argspec_fields = (
734							fields => {
735							'x.name.is_plural' => 1,
736							'x.name.singular' => 'field',
737							summary => 'Field names',
738							schema => ['array', of=>['str', min_len=>1], min_len=>1],
739							req => 1,
740							cmdline_aliases => {f=>{}},
741							element_completion => \&_complete_field,
742							},
743							);
744
745							our %argspecopt_fields = (
746							fields => {
747							'x.name.is_plural' => 1,
748							'x.name.singular' => 'field',
749							summary => 'Field names',
750							schema => ['array', of=>['str', min_len=>1], min_len=>1],
751							cmdline_aliases => {f=>{}},
752							element_completion => \&_complete_field,
753							},
754							);
755
756							our %argspecsopt_field_selection = (
757							include_fields => {
758							'x.name.is_plural' => 1,
759							'x.name.singular' => 'include_field',
760							summary => 'Field names to include, takes precedence over --exclude-field-pat',
761							schema => ['array', of=>'str'],
762							cmdline_aliases => {
763							f => {},
764							field => {}, # backward compatibility
765							},
766							element_completion => \&_complete_field,
767							tags => ['category:field-selection'],
768							},
769							include_field_pat => {
770							summary => 'Field regex pattern to select, overidden by --exclude-field-pat',
771							schema => 're*',
772							cmdline_aliases => {
773							field_pat => {}, # backward compatibility
774							include_all_fields => { summary => 'Shortcut for --field-pat=., effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.' } },
775							a => { summary => 'Shortcut for --field-pat=., effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.' } },
776							},
777							tags => ['category:field-selection'],
778							},
779							exclude_fields => {
780							'x.name.is_plural' => 1,
781							'x.name.singular' => 'exclude_field',
782							summary => 'Field names to exclude, takes precedence over --fields',
783							schema => ['array', of=>'str'],
784							cmdline_aliases => {
785							F => {},
786							},
787							element_completion => \&_complete_field,
788							tags => ['category:field-selection'],
789							},
790							exclude_field_pat => {
791							summary => 'Field regex pattern to exclude, takes precedence over --field-pat',
792							schema => 're*',
793							cmdline_aliases => {
794							exclude_all_fields => { summary => 'Shortcut for --exclude-field-pat=., effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.' } },
795							A => { summary => 'Shortcut for --exclude-field-pat=., effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.' } },
796							},
797							tags => ['category:field-selection'],
798							},
799							ignore_unknown_fields => {
800							summary => 'When unknown fields are specified in --include-field (--field) or --exclude-field options, ignore them instead of throwing an error',
801							schema => 'bool*',
802							},
803							show_selected_fields => {
804							summary => 'Show selected fields and then immediately exit',
805							schema => 'true*',
806							},
807							);
808
809							our %argspec_eval = (
810							eval => {
811							summary => 'Perl code',
812							schema => $sch_req_str_or_code,
813							cmdline_aliases => { e=>{} },
814							req => 1,
815							},
816							);
817
818							our %argspecopt_eval = (
819							eval => {
820							summary => 'Perl code',
821							schema => $sch_req_str_or_code,
822							cmdline_aliases => { e=>{} },
823							},
824							);
825
826							our %argspec_eval_1 = (
827							eval => {
828							summary => 'Perl code',
829							schema => $sch_req_str_or_code,
830							cmdline_aliases => { e=>{} },
831							req => 1,
832							pos => 1,
833							},
834							);
835
836							our %argspec_eval_2 = (
837							eval => {
838							summary => 'Perl code',
839							schema => $sch_req_str_or_code,
840							cmdline_aliases => { e=>{} },
841							req => 1,
842							pos => 2,
843							},
844							);
845
846							our %argspecopt_eval_2 = (
847							eval => {
848							summary => 'Perl code',
849							schema => $sch_req_str_or_code,
850							cmdline_aliases => { e=>{} },
851							pos => 2,
852							},
853							);
854
855							our %argspecsopt_sortsub = (
856							by_sortsub => {
857							schema => 'str*',
858							description => <<'_',
859
860							When sorting rows, usually combined with `--key` because most Sort::Sub routine
861							expects a string to be compared against.
862
863							When sorting fields, the Sort::Sub routine will get the field name as argument.
864
865							_
866							summary => 'Sort using a Sort::Sub routine',
867							'x.completion' => ['sortsub_spec'],
868							},
869							sortsub_args => {
870							summary => 'Arguments to pass to Sort::Sub routine',
871							schema => ['hash', of=>'str'],
872							},
873							);
874
875							our %argspecopt_key = (
876							key => {
877							summary => 'Generate sort keys with this Perl code',
878							description => <<'_',
879
880							If specified, then will compute sort keys using Perl code and sort using the
881							keys. Relevant when sorting using `--by-code` or `--by-sortsub`. If specified,
882							then instead of row when sorting rows, the code (or Sort::Sub routine) will
883							receive these sort keys to sort against.
884
885							The code will receive the row (arrayref, or if -H is specified, hashref) as the
886							argument.
887
888							_
889							schema => $sch_req_str_or_code,
890							cmdline_aliases => {k=>{}},
891							},
892							);
893
894							our %argspecs_sort_rows = (
895							reverse => {
896							schema => ['bool', is=>1],
897							cmdline_aliases => {r=>{}},
898							},
899							ci => {
900							schema => ['bool', is=>1],
901							cmdline_aliases => {i=>{}},
902							},
903							by_fields => {
904							summary => 'Sort by a list of field specifications',
905							'summary.alt.plurality.singular' => 'Add a sort field specification',
906							'x.name.is_plural' => 1,
907							'x.name.singular' => 'by_field',
908							description => <<'_',
909
910							Each field specification is a field name with an optional prefix. `FIELD`
911							(without prefix) means sort asciibetically ascending (smallest to largest),
912							`~FIELD` means sort asciibetically descending (largest to smallest), `+FIELD`
913							means sort numerically ascending, `-FIELD` means sort numerically descending.
914
915							_
916							schema => ['array', of=>'str'],
917							element_completion => \&_complete_sort_field,
918							},
919							by_code => {
920							summary => 'Sort by using Perl code',
921							schema => $sch_req_str_or_code,
922							description => <<'_',
923
924							`$a` and `$b` (or the first and second argument) will contain the two rows to be
925							compared. Which are arrayrefs; or if `--hash` (`-H`) is specified, hashrefs; or
926							if `--key` is specified, whatever the code in `--key` returns.
927
928							_
929							},
930							%argspecopt_key,
931							%argspecsopt_sortsub,
932							);
933
934							our %argspecs_sort_fields = (
935							reverse => {
936							schema => ['bool', is=>1],
937							cmdline_aliases => {r=>{}},
938							},
939							ci => {
940							schema => ['bool', is=>1],
941							cmdline_aliases => {i=>{}},
942							},
943							by_examples => {
944							summary => 'Sort by a list of field names as examples',
945							'summary.alt.plurality.singular' => 'Add a field to sort by example',
946							'x.name.is_plural' => 1,
947							'x.name.singular' => 'by_example',
948							schema => ['array', of=>'str'],
949							element_completion => \&_complete_field,
950							},
951							by_code => {
952							summary => 'Sort fields using Perl code',
953							schema => $sch_req_str_or_code,
954							description => <<'_',
955
956							`$a` and `$b` (or the first and second argument) will contain `[$field_name,
957							$field_idx]`.
958
959							_
960							},
961							%argspecsopt_sortsub,
962							);
963
964							our %argspecopt_with_data_rows = (
965							with_data_rows => {
966							summary => 'Whether to also output data rows',
967							schema => 'bool',
968							},
969							);
970
971							our %argspecopt_hash = (
972							hash => {
973							summary => 'Provide row in $_ as hashref instead of arrayref',
974							schema => ['bool*', is=>1],
975							cmdline_aliases => {H=>{}},
976							},
977							);
978
979							# add a position to specified argument, if possible
980							sub _add_arg_pos {
981	40			40		105	my ($args, $argname, $is_slurpy) = @_;
982
983							# argument already has a position, return
984	40	50				112	return if defined $args->{$argname}{pos};
985
986							# position of slurpy argument
987	40					56	my $slurpy_pos;
988	40					148	for (keys %$args) {
989	497	100				931	next unless $args->{$_}{slurpy};
990	6					13	$slurpy_pos = $args->{$_}{pos};
991	6					13	last;
992							}
993
994							# there is already a slurpy arg, return
995	40	50	66			121	return if $is_slurpy && defined $slurpy_pos;
996
997							# find the lowest position that's not available
998							ARG:
999	40					111	for my $j (0 .. scalar(keys %$args)-1) {
1000	57	100	100			156	last if defined $slurpy_pos && $j >= $slurpy_pos;
1001	53					163	for (keys %$args) {
1002	646	100	100			1352	next ARG if defined $args->{$_}{pos} && $args->{$_}{pos} == $j;
1003							}
1004	36					79	$args->{$argname}{pos} = $j;
1005	36	100				85	$args->{$argname}{slurpy} = 1 if $is_slurpy;
1006	36					78	last;
1007							}
1008							}
1009
1010							sub _randext {
1011	2			2		26	state $charset = [0..9, "A".."Z","a".."z"];
1012	2					5	my $len = shift;
1013	2					6	my $ext = "";
1014	2					5	for (1..$len) { $ext .= $charset->[rand @$charset] }
	10					22
1015	2					7	$ext;
1016							}
1017
1018							$SPEC{gen_csv_util} = {
1019							v => 1.1,
1020							summary => 'Generate a CSV utility',
1021							description => <<'_',
1022
1023							This routine is used to generate a CSV utility in the form of a <pm:Rinci>
1024							function (code and metadata). You can then produce a CLI from the Rinci function
1025							simply using <pm:Perinci::CmdLine::Gen> or, if you use <pm:Dist::Zilla>,
1026							<pm:Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
1027							<prog:gen-pericmd-script>.
1028
1029							Using this routine, by providing just one or a few hooks and setting some
1030							parameters like a couple of extra arguments, you will get a complete CLI with
1031							decent POD/manpage, ability to read one or multiple CSV's and write one or
1032							multiple CSV's, some command-line options to customize how the input CSV's
1033							should be parsed and how the output CSV's should be formatted and named. Your
1034							CLI also has tab completion, usage and help message, and other features.
1035
1036							To create a CSV utility, you specify a `name` (e.g. `csv_dump`; must be a valid
1037							unqualified Perl identifier/function name) and optionally `summary`,
1038							`description`, and other metadata like `links` or even `add_meta_props`. Then
1039							you specify one or more of `on_` or `before_` or `after_*` arguments to supply
1040							handlers (coderefs) for your CSV utility at various hook points.
1041
1042
1043							THE HOOKS
1044
1045							All code for hooks should accept a single argument `r`. `r` is a stash (hashref)
1046							of various data, the keys of which will depend on which hook point being called.
1047							You can also add more keys to store data or for flow control (see hook
1048							documentation below for more details).
1049
1050							The order of the hooks, in processing chronological order:
1051
1052							* on_begin
1053
1054							Called when utility begins, before reading CSV. You can use this hook e.g. to
1055							process arguments, set output filenames (if you allow custom output
1056							filenames).
1057
1058							* before_read_input
1059
1060							Called before opening any input CSV file. This hook is still called even if
1061							your utility sets `reads_csv` to false.
1062
1063							At this point, the `input_filenames` stash key (as well as other keys like
1064							`input_filename`, `input_filenum`, etc) has not been set. You can use this
1065							hook e.g. to set a custom `input_filenames`.
1066
1067							* before_open_input_files
1068
1069							Called before an input CSV file is about to be opened, including for stdin
1070							(`-`). You can use this hook e.g. to check/preprocess input file. Flow control
1071							is available by setting `$r->{wants_skip_files}` to skip reading all the input
1072							file and go directly to the `after_read_input` hook.
1073
1074							* before_open_input_file
1075
1076							Called before an input CSV file is about to be opened, including for stdin
1077							(`-`). For the first file, called after `before_open_input_file` hook. You can
1078							use this hook e.g. to check/preprocess input file. Flow control is available
1079							by setting `$r->{wants_skip_file}` to skip reading a single input file and go
1080							to the next file, or `$r->{wants_skip_files}` to skip reading the rest of the
1081							files and go directly to the `after_read_input` hook.
1082
1083							* on_input_header_row
1084
1085							Called when receiving header row. Will be called for every input file, and
1086							called even when user specify `--no-input-header`, in which case the header
1087							row will be the generated `["field1", "field2", ...]`. You can use this hook
1088							e.g. to add/remove/rearrange fields.
1089
1090							You can set `$r->{wants_fill_rows}` to a defined false if you do not want
1091							`$r->{input_rows}` to be filled with empty string elements when it contains
1092							less than the number of fields (in case of sparse values at the end). Normally
1093							you only want to do this when you want to do checking, e.g. in
1094							<prog:csv-check-rows>.
1095
1096							* on_input_data_row
1097
1098							Called when receiving each data row. You can use this hook e.g. to modify the
1099							row or print output (for line-by-line transformation or filtering).
1100
1101							* after_close_input_file
1102
1103							Called after each input file is closed, including for stdin (`-`) (although
1104							for stdin, the handle is not actually closed). Flow control is possible by
1105							setting `$r->{wants_skip_files}` to skip reading the rest of the files and go
1106							straight to the `after_close_input_files` hook.
1107
1108							* after_close_input_files
1109
1110							Called after the last input file is closed, after the last
1111							`after_close_input_file` hook, including for stdin (`-`) (although for stdin,
1112							the handle is not actually closed).
1113
1114							* after_read_input
1115
1116							Called after the last row of the last CSV file is read and the last file is
1117							closed. This hook is still called, if you set `reads_csv` option to false.
1118							At this point the stash keys related to CSV reading have all been cleared,
1119							including `input_filenames`, `input_filename`, `input_fh`, etc.
1120
1121							You can use this hook e.g. to print output if you buffer the output.
1122
1123							* on_end
1124
1125							Called when utility is about to exit. You can use this hook e.g. to return the
1126							final result.
1127
1128
1129							THE STASH
1130
1131							The common keys that `r` will contain:
1132
1133							- `gen_args`, hash. The arguments used to generate the CSV utility.
1134
1135							- `util_args`, hash. The arguments that your CSV utility accepts. Parsed from
1136							command-line arguments (or configuration files, or environment variables).
1137
1138							- `name`, str. The name of the CSV utility. Which can also be retrieved via
1139							`gen_args`.
1140
1141							- `code_print`, coderef. Routine provided for you to print something. Accepts a
1142							string. Takes care of opening the output files for you.
1143
1144							- `code_print_row`, coderef. Routine provided for you to print a data row. You
1145							pass the row (either arrayref or hashref). Takes care of opening the output
1146							files for you, as well as printing header row the first time, if needed.
1147
1148							- `code_print_header_row`, coderef. Routine provided for you to print header
1149							row. You don't need to pass any arguments. Will only print the header row once
1150							per output file if output header is enabled, even if called multiple times.
1151
1152							If you are accepting CSV data (`reads_csv` gen argument set to true), the
1153							following keys will also be available (in `on_input_header_row` and
1154							`on_input_data_row` hooks):
1155
1156							- `input_parser`, a <pm:Text::CSV_XS> instance for input parsing.
1157
1158							- `input_filenames`, array of str.
1159
1160							- `input_filename`, str. The name of the current input file being read (`-` if
1161							reading from stdin).
1162
1163							- `input_filenum`, uint. The number of the current input file, 1 being the first
1164							file, 2 for the second, and so on.
1165
1166							- `input_fh`, the handle to the current file being read.
1167
1168							- `input_rownum`, uint. The number of rows that have been read (reset after each
1169							input file). In `on_input_header_row` phase, this will be 1 since header row
1170							(including the generated one) is the first row. Then in `on_input_data_row`
1171							phase (called the first time for a file), it will be 2 for the first data row,
1172							even if physically it is the first row for CSV file that does not have a
1173							header.
1174
1175							- `input_data_rownum`, uint. The number of data rows that have been read (reset
1176							after each input file). This will be equal to `input_rownum` less 1 if input
1177							file has header.
1178
1179							- `input_row`, aos (array of str). The current input CSV row as an arrayref.
1180
1181							- `input_row_as_hashref`, hos (hash of str). The current input CSV row as a
1182							hashref, with field names as hash keys and field values as hash values. This
1183							will only be calculated if utility wants it. Utility can express so by setting
1184							`$r->{wants_input_row_as_hashref}` to true, e.g. in the `on_begin` hook.
1185
1186							- `input_header_row_count`, uint. Contains the number of actual header rows that
1187							have been read. If CLI user specifies `--no-input-header`, this will stay at
1188							zero. Will be reset for each CSV file.
1189
1190							- `input_data_row_count`, int. Contains the number of actual data rows that have
1191							read. Will be reset for each CSV file.
1192
1193							If you are outputting CSV (`writes_csv` gen argument set to true), the following
1194							keys will be available:
1195
1196							- `output_emitter`, a <pm:Text::CSV_XS> instance for output.
1197
1198							- `output_filenames`, array of str.
1199
1200							- `output_filename`, str, name of current output file.
1201
1202							- `output_filenum`, uint, the number of the current output file, 1 being the
1203							first file, 2 for the second, and so on.
1204
1205							- `output_fh`, handle to the current output file.
1206
1207							- `output_rownum`, uint. The number of rows that have been outputted (reset
1208							after each output file).
1209
1210							- `output_data_rownum`, uint. The number of data rows that have been outputted
1211							(reset after each output file). This will be equal to `input_rownum` less 1 if
1212							input file has header.
1213
1214							For other hook-specific keys, see the documentation for associated hook point.
1215
1216
1217							ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS
1218
1219							As mentioned above, you will get additional command-line options/arguments in
1220							`$r->{util_args}` hashref. Some options/arguments are already added by
1221							`gen_csv_util`, e.g. `input_filename` or `input_filenames` along with
1222							`input_sep_char`, etc (when your utility declares `reads_csv`),
1223							`output_filename` or `output_filenames` along with `overwrite`,
1224							`output_sep_char`, etc (when your utility declares `writes_csv`).
1225
1226							If you want to accept additional arguments/options, you specify them in
1227							`add_args` (hashref, with key being Each option/argument has to be specified
1228							first via `add_args` (as hashref, with key being argument name and value the
1229							argument specification as defined in <pm:Rinci::function>)). Some argument
1230							specifications have been defined in <pm:App::CSVUtils> and can be used. See
1231							existing utilities for examples.
1232
1233
1234							READING CSV DATA
1235
1236							To read CSV data, normally your utility would provide handler for the
1237							`on_input_data_row` hook and sometimes additionally `on_input_header_row`.
1238
1239
1240							OUTPUTTING STRING OR RETURNING RESULT
1241
1242							To output string, usually you call the provided routine `$r->{code_print}`. This
1243							routine will open the output files for you.
1244
1245							You can also return enveloped result directly by setting `$r->{result}`.
1246
1247
1248							OUTPUTTING CSV DATA
1249
1250							To output CSV data, usually you call the provided routine `$r->{code_print_row}`.
1251							This routine accepts a row (arrayref or hashref). This routine will open the
1252							output files for you when needed, as well as print header row automatically.
1253
1254							You can also buffer rows from input to e.g. `$r->{output_rows}`, then call
1255							`$r->{code_print_row}` repeatedly in the `after_read_input` hook to print all the
1256							rows.
1257
1258
1259							READING MULTIPLE CSV FILES
1260
1261							To read multiple CSV files, you first specify `reads_multiple_csv`. Then, you
1262							can supply handler for `on_input_header_row` and `on_input_data_row` as usual.
1263							If you want to do something before/after each input file, you can also supply
1264							handler for `before_open_input_file` or `after_close_input_file`.
1265
1266
1267							WRITING TO MULTIPLE CSV FILES
1268
1269							Similarly, to write to many CSv files, you first specify `writes_multiple_csv`.
1270							Then, you can supply handler for `on_input_header_row` and `on_input_data_row`
1271							as usual. To switch to the next file, set
1272							`$r->{wants_switch_to_next_output_file}` to true, in which case the next call to
1273							`$r->{code_print_row}` will close the current file and open the next file.
1274
1275
1276							CHANGING THE OUTPUT FIELDS
1277
1278							When calling `$r->{code_print_row}`, you can output whatever fields you want. By
1279							convention, you can set `$r->{output_fields}` and `$r->{output_fields_idx}` to
1280							let other handlers know about the output fields. For example, see the
1281							implementation of <prog:csv-concat>.
1282
1283							_
1284							args => {
1285							name => {
1286							schema => 'perl::identifier::unqualified_ascii*',
1287							req => 1,
1288							tags => ['category:metadata'],
1289							},
1290							summary => {
1291							schema => 'str*',
1292							tags => ['category:metadata'],
1293							},
1294							description => {
1295							schema => 'str*',
1296							tags => ['category:metadata'],
1297							},
1298							links => {
1299							schema => ['array', of=>'hash'], # XXX defhashes
1300							tags => ['category:metadata'],
1301							},
1302							examples => {
1303							schema => ['array*'], # defhashes
1304							tags => ['category:metadata'],
1305							},
1306							add_meta_props => {
1307							summary => 'Add additional Rinci function metadata properties',
1308							schema => ['hash*'],
1309							tags => ['category:metadata'],
1310							},
1311							add_args => {
1312							schema => ['hash*'],
1313							tags => ['category:metadata'],
1314							},
1315							add_args_rels => {
1316							schema => ['hash*'],
1317							tags => ['category:metadata'],
1318							},
1319
1320							reads_csv => {
1321							summary => 'Whether utility reads CSV data',
1322							'summary.alt.bool.not' => 'Specify that utility does not read CSV data',
1323							schema => 'bool*',
1324							default => 1,
1325							},
1326							reads_multiple_csv => {
1327							summary => 'Whether utility accepts CSV data',
1328							schema => 'bool*',
1329							description => <<'_',
1330
1331							Setting this option to true will implicitly set the `reads_csv` option to true,
1332							obviously.
1333
1334							_
1335							},
1336							writes_csv => {
1337							summary => 'Whether utility writes CSV data',
1338							'summary.alt.bool.not' => 'Specify that utility does not write CSV data',
1339							schema => 'bool*',
1340							default => 1,
1341							},
1342							writes_multiple_csv => {
1343							summary => 'Whether utility outputs CSV data',
1344							schema => 'bool*',
1345							description => <<'_',
1346
1347							Setting this option to true will implicitly set the `writes_csv` option to true,
1348							obviously.
1349
1350							_
1351							},
1352
1353							on_begin => {
1354							schema => 'code*',
1355							},
1356							before_read_input => {
1357							schema => 'code*',
1358							},
1359							before_open_input_files => {
1360							schema => 'code*',
1361							},
1362							before_open_input_file => {
1363							schema => 'code*',
1364							},
1365							on_input_header_row => {
1366							schema => 'code*',
1367							},
1368							on_input_data_row => {
1369							schema => 'code*',
1370							},
1371							after_close_input_file => {
1372							schema => 'code*',
1373							},
1374							after_close_input_files => {
1375							schema => 'code*',
1376							},
1377							after_read_input => {
1378							schema => 'code*',
1379							},
1380							on_end => {
1381							schema => 'code*',
1382							},
1383							},
1384							result_naked => 1,
1385							result => {
1386							schema => 'bool*',
1387							},
1388							};
1389							sub gen_csv_util {
1390	24			24	1	195	my %gen_args = @_;
1391
1392	24	50				128	my $name = delete($gen_args{name}) or die "Please specify name";
1393	24		50			88	my $summary = delete($gen_args{summary}) // '(No summary)';
1394	24		100			87	my $description = delete($gen_args{description}) // '(No description)';
1395	24		100			114	my $links = delete($gen_args{links}) // [];
1396	24		100			84	my $examples = delete($gen_args{examples}) // [];
1397	24					42	my $add_meta_props = delete $gen_args{add_meta_props};
1398	24					41	my $add_args = delete $gen_args{add_args};
1399	24					53	my $add_args_rels = delete $gen_args{add_args_rels};
1400	24					39	my $reads_multiple_csv = delete($gen_args{reads_multiple_csv});
1401	24		50			76	my $reads_csv = delete($gen_args{reads_csv}) // 1;
1402	24		50			40	my $tags = [ @{ delete($gen_args{tags}) // [] } ];
	24					77
1403	24	100				69	$reads_csv = 1 if $reads_multiple_csv;
1404	24					44	my $writes_multiple_csv = delete($gen_args{writes_multiple_csv});
1405	24		100			78	my $writes_csv = delete($gen_args{writes_csv}) // 1;
1406	24	50				57	$writes_csv = 1 if $writes_multiple_csv;
1407	24					41	my $on_begin = delete $gen_args{on_begin};
1408	24					36	my $before_read_input = delete $gen_args{before_read_input};
1409	24					37	my $before_open_input_files = delete $gen_args{before_open_input_files};
1410	24					38	my $before_open_input_file = delete $gen_args{before_open_input_file};
1411	24					33	my $on_input_header_row = delete $gen_args{on_input_header_row};
1412	24					35	my $on_input_data_row = delete $gen_args{on_input_data_row};
1413	24					34	my $after_close_input_file = delete $gen_args{after_close_input_file};
1414	24					35	my $after_close_input_files = delete $gen_args{after_close_input_files};
1415	24					48	my $after_read_input = delete $gen_args{after_read_input};
1416	24					36	my $on_end = delete $gen_args{on_end};
1417
1418	24	50				76	scalar(keys %gen_args) and die "Unknown argument(s): ".join(", ", keys %gen_args);
1419
1420	24					30	my $code;
1421							CREATE_CODE: {
1422	24					36	$code = sub {
1423	110			110		280053	my %util_args = @_;
1424
1425	110		100			514	my $has_header = $util_args{input_header} // 1;
1426	110		66			314	my $outputs_header = $util_args{output_header} // $has_header;
1427
1428	110					366	my $r = {
1429							gen_args => \%gen_args,
1430							util_args => \%util_args,
1431							name => $name,
1432							};
1433
1434							# inside the main eval block, we call hook handlers. A handler can
1435							# throw an exception (which can be a string or an enveloped response
1436							# like [500, "some error message"], see Rinci::function). we trap
1437							# the exception so we can return the appropriate enveloped response.
1438							MAIN_EVAL:
1439	110					219	eval {
1440
1441							# do some checking
1442	110	50	33			341	if ($util_args{inplace} && (!$reads_csv \|\| !$writes_csv)) {
			66
1443	0					0	die [412, "--inplace cannot be specified when we do not read & write CSV"];
1444							}
1445
1446	110	100				249	if ($on_begin) {
1447	37					135	log_trace "[csvutil] Calling on_begin hook handler ...";
1448	37					150	$on_begin->($r);
1449							}
1450
1451							my $code_open_file = sub {
1452							# set output filenames, if not yet
1453	184	100		184		373	unless ($r->{output_filenames}) {
1454	70					96	my @output_filenames;
1455	70	100				159	if ($util_args{inplace}) {
		50
1456	2					5	for my $input_filename (@{ $r->{input_filenames} }) {
	2					5
1457	2					2	my $output_filename;
1458	2					3	while (1) {
1459	2					7	$output_filename = $input_filename . "." . _randext(5);
1460	2	50				45	last unless -e $output_filename;
1461							}
1462	2					12	push @output_filenames, $output_filename;
1463							}
1464							} elsif ($writes_multiple_csv) {
1465	0		0			0	@output_filenames = @{ $util_args{output_filenames} // ['-'] };
	0					0
1466							} else {
1467	68		50			278	@output_filenames = ($util_args{output_filename} // '-');
1468							}
1469
1470							CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME: {
1471	70					111	my %seen_output_abs_path; # key = output filename
	70					87
1472	70	100	66			242	last unless $reads_csv && $writes_csv;
1473	67					102	for my $input_filename (@{ $r->{input_filenames} }) {
	67					131
1474	78	50				152	next if $input_filename eq '-';
1475	78					2066	my $input_abs_path = Cwd::abs_path($input_filename);
1476	78	50				237	die [500, "Can't get absolute path of input filename '$input_filename'"] unless $input_abs_path;
1477	78					146	for my $output_filename (@output_filenames) {
1478	78	100				268	next if $output_filename eq '-';
1479	2	50				7	next if $seen_output_abs_path{$output_filename};
1480	2					38	my $output_abs_path = Cwd::abs_path($output_filename);
1481	2	50				9	die [500, "Can't get absolute path of output filename '$output_filename'"] unless $output_abs_path;
1482	2	0				8	die [412, "Cannot set output filename to '$output_filename' ".
		50
1483							($output_filename ne $output_abs_path ? "($output_abs_path) ":"").
1484							"because it is the same as input filename and input will be clobbered; use --inplace to avoid clobbering<"]
1485							if $output_abs_path eq $input_abs_path;
1486							}
1487							}
1488							} # CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME
1489
1490	70					201	$r->{output_filenames} = \@output_filenames;
1491	70		50			343	$r->{output_num_of_files} //= scalar(@output_filenames);
1492							} # set output filenames
1493
1494							# open the next file, if not yet
1495	184	100	66			759	if (!$r->{output_fh} \|\| $r->{wants_switch_to_next_output_file}) {
1496	70		50			283	$r->{output_filenum} //= 0;
1497	70					103	$r->{output_filenum}++;
1498
1499	70					176	$r->{output_rownum} = 0;
1500	70					118	$r->{output_data_rownum} = 0;
1501
1502							# close the previous file, if any
1503	70	50	33			153	if ($r->{output_fh} && $r->{output_filename} ne '-') {
1504	0					0	log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1505	0	0				0	close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1506	0					0	delete $r->{has_printed_header};
1507	0					0	delete $r->{wants_switch_to_next_output_file};
1508							}
1509
1510							# we have exhausted all the files, do nothing & return
1511	70	50				99	return if $r->{output_filenum} > @{ $r->{output_filenames} };
	70					191
1512
1513	70					166	$r->{output_filename} = $r->{output_filenames}[ $r->{output_filenum}-1 ];
1514							log_info "[csvutil] [%d/%s] Opening output file %s ...",
1515	70					271	$r->{output_filenum}, $r->{output_num_of_files}, $r->{output_filename};
1516	70	100				273	if ($r->{output_filename} eq '-') {
1517	68					149	$r->{output_fh} = \*STDOUT;
1518							} else {
1519	2	50				20	if (-f $r->{output_filename}) {
1520	0	0				0	if ($r->{util_args}{overwrite}) {
1521	0					0	log_info "[csvutil] Will be overwriting output file %s", $r->{output_filename};
1522							} else {
1523	0					0	die [412, "Refusing to overwrite existing output file '$r->{output_filename}', choose another name or use --overwrite (-O)"];
1524							}
1525							}
1526	2					7	my ($fh, $err) = _open_file_write($r->{output_filename});
1527	2	50				6	die $err if $err;
1528	2					6	$r->{output_fh} = $fh;
1529							}
1530							} # open the next file
1531	110					711	}; # code_open_file
1532
1533							my $code_print = sub {
1534	9			9		17	my $str = shift;
1535	9					21	$code_open_file->();
1536	9					12	print { $r->{output_fh} } $str;
	9					263
1537	110					371	}; # code_print
1538	110					212	$r->{code_print} = $code_print;
1539
1540	110	100				222	if ($writes_csv) {
1541	85					209	my $output_emitter = _instantiate_emitter(\%util_args);
1542	85					9651	$r->{output_emitter} = $output_emitter;
1543	85					172	$r->{has_printed_header} = 0;
1544
1545							my $code_print_header_row = sub {
1546							# set output fields, if not yet
1547	175	100		175		437	unless ($r->{output_fields}) {
1548							# by default, use the
1549	24					59	$r->{output_fields} = $r->{input_fields};
1550							}
1551
1552							# index the output fields, if not yet
1553	175	100				348	unless ($r->{output_fields_idx}) {
1554	66					163	$r->{output_fields_idx} = {};
1555	66					98	for my $j (0 .. $#{ $r->{output_fields} }) {
	66					204
1556	179					373	$r->{output_fields_idx}{ $r->{output_fields}[$j] } = $j;
1557							}
1558							}
1559
1560	175					408	$code_open_file->();
1561
1562							# print header line, if not yet
1563	175	100	100			606	if ($outputs_header && !$r->{has_printed_header}) {
1564	65					107	$r->{has_printed_header}++;
1565	65					1125	$r->{output_emitter}->print($r->{output_fh}, $r->{output_fields});
1566	65					3443	print { $r->{output_fh} } "\n";
	65					786
1567	65					227	$r->{output_rownum}++;
1568							}
1569	85					379	};
1570	85					177	$r->{code_print_header_row} = $code_print_header_row;
1571
1572							my $code_print_row = sub {
1573	175			175		326	my $row = shift;
1574
1575	175					397	$code_print_header_row->();
1576
1577							# print data line
1578	175	50				424	if ($row) {
1579	175	50				416	if (ref $row eq 'HASH') {
1580	0					0	my $row0 = $row;
1581	0					0	$row = [];
1582	0					0	for my $j (0 .. $#{ $r->{output_fields} }) {
	0					0
1583	0		0			0	$row->[$j] = $row0->{ $r->{output_fields}[$j] } // '';
1584							}
1585							}
1586	175					1134	$r->{output_emitter}->print( $r->{output_fh}, $row );
1587	175					3566	print { $r->{output_fh} } "\n";
	175					1727
1588	175					463	$r->{output_rownum}++;
1589	175					932	$r->{output_data_rownum}++;
1590							}
1591	85					280	}; # code_print_row
1592	85					231	$r->{code_print_row} = $code_print_row;
1593							} # if outputs csv
1594
1595	110	50				231	if ($before_read_input) {
1596	0					0	log_trace "[csvutil] Calling before_read_input handler ...";
1597	0					0	$before_read_input->($r);
1598							}
1599
1600							READ_CSV: {
1601	110	50				192	last unless $reads_csv;
	110					208
1602
1603	110					254	my $input_parser = _instantiate_parser(\%util_args, 'input_');
1604	110					11458	$r->{input_parser} = $input_parser;
1605
1606	110					182	my @input_filenames;
1607	110	100				243	if ($reads_multiple_csv) {
1608	18		50			30	@input_filenames = @{ $util_args{input_filenames} // ['-'] };
	18					71
1609							} else {
1610	92		50			322	@input_filenames = ($util_args{input_filename} // '-');
1611							}
1612	110		50			535	$r->{input_filenames} //= \@input_filenames;
1613
1614							BEFORE_INPUT_FILENAME:
1615	110					204	$r->{input_filenum} = 0;
1616
1617							INPUT_FILENAME:
1618	110					210	for my $input_filename (@input_filenames) {
1619	129					238	$r->{input_filenum}++;
1620	129					261	$r->{input_filename} = $input_filename;
1621
1622	129	100	100			453	if ($r->{input_filenum} == 1 && $before_open_input_files) {
1623	1					5	log_trace "[csvutil] Calling before_open_input_files handler ...";
1624	1					5	$before_open_input_files->($r);
1625	1	50				4	if (delete $r->{wants_skip_files}) {
1626	0					0	log_trace "[csvutil] Handler wants to skip files, skipping all input files";
1627	0					0	last READ_CSV;
1628							}
1629							}
1630
1631	129	50				253	if ($before_open_input_file) {
1632	0					0	log_trace "[csvutil] Calling before_open_input_file handler ...";
1633	0					0	$before_open_input_file->($r);
1634	0	0				0	if (delete $r->{wants_skip_file}) {
		0
1635	0					0	log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1636	0					0	next INPUT_FILENAME;
1637							} elsif (delete $r->{wants_skip_files}) {
1638	0					0	log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1639	0					0	last READ_CSV;
1640							}
1641							}
1642
1643							log_info "[csvutil] [file %d/%d] Reading input file %s ...",
1644	129					520	$r->{input_filenum}, scalar(@input_filenames), $input_filename;
1645	129					845	my ($fh, $err) = _open_file_read($input_filename);
1646	129	50				349	die $err if $err;
1647	129					501	$r->{input_fh} = $r->{input_fhs}[ $r->{input_filenum}-1 ] = $fh;
1648
1649	129					232	my $i;
1650	129					245	$r->{input_header_row_count} = 0;
1651	129					319	$r->{input_data_row_count} = 0;
1652	129					244	$r->{input_fields} = []; # array, field names in order
1653	129					262	$r->{input_field_idxs} = {}; # key=field name, value=index (0-based)
1654	129					186	my $row0;
1655							my $code_getline = sub {
1656	562	50	33	562		2971	if ($r->{stdin_input_fields} && $r->{input_filename} eq '-') {
		100	100
		100	100
1657	0	0				0	if ($i == 0) {
1658							# we have read the header for stdin. since
1659							# we can't seek to the beginning, we return
1660							# the saved fields
1661	0					0	$r->{input_header_row_count}++;
1662	0					0	return $r->{stdin_input_fields};
1663							} else {
1664	0					0	my $row = $input_parser->getline($r->{input_fh});
1665	0	0				0	$r->{input_data_row_count}++ if $row;
1666	0					0	return $row;
1667							}
1668							} elsif ($i == 0 && !$has_header) {
1669							# this is the first line of a file and user
1670							# specifies there is no input header. we save
1671							# the line and return the generated field names
1672							# instead.
1673	4					151	$row0 = $input_parser->getline($r->{input_fh});
1674	4	50				236	return unless $row0;
1675	4					12	return [map { "field$_" } 1..@$row0];
	12					51
1676							} elsif ($i == 1 && !$has_header) {
1677							# we return the saved first line
1678	4	50				16	$r->{input_data_row_count}++ if $row0;
1679	4					10	return $row0;
1680							}
1681	554					14497	my $res = $input_parser->getline($r->{input_fh});
1682	554	100				20693	if ($res) {
1683	447	100				944	$r->{input_header_row_count}++ if $i==0;
1684	447	100				989	$r->{input_data_row_count}++ if $i;
1685							}
1686	554					1521	$res;
1687	129					636	};
1688	129					360	$r->{code_getline} = $code_getline;
1689
1690	129					200	$i = 0;
1691	129					263	while ($r->{input_row} = $code_getline->()) {
1692	455					671	$i++;
1693	455					706	$r->{input_rownum} = $i;
1694	455	100				945	$r->{input_data_rownum} = $has_header ? $i-1 : $i;
1695	455	100				799	if ($i == 1) {
1696							# gather the list of fields
1697	129					265	$r->{input_fields} = $r->{input_row};
1698	129	50	0			277	$r->{stdin_input_fields} //= $r->{input_row} if $input_filename eq '-';
1699	129					229	$r->{orig_input_fields} = $r->{input_fields};
1700	129					267	$r->{input_fields_idx} = {};
1701	129					201	for my $j (0 .. $#{ $r->{input_fields} }) {
	129					446
1702	361					862	$r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1703							}
1704
1705	129	100				265	if ($on_input_header_row) {
1706	115					398	log_trace "[csvutil] Calling on_input_header_row hook handler ...";
1707	115					497	$on_input_header_row->($r);
1708
1709	106	100				342	if (delete $r->{wants_skip_file}) {
		100
1710	3					10	log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1711	3					13	next INPUT_FILENAME;
1712							} elsif (delete $r->{wants_skip_files}) {
1713	1					6	log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1714	1					6	last READ_CSV;
1715							}
1716							}
1717
1718							# reindex the fields, should the above hook
1719							# handler adds/removes fields. let's save the
1720							# old fields_idx to orig_fields_idx.
1721	116					216	$r->{orig_input_fields_idx} = $r->{input_fields_idx};
1722	116					177	$r->{input_fields_idx} = {};
1723	116					200	for my $j (0 .. $#{ $r->{input_fields} }) {
	116					302
1724	326					765	$r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j;
1725							}
1726
1727							} else {
1728							# fill up the elements of row to the number of
1729							# fields, in case the row contains sparse values
1730	326	50	33			751	unless (defined $r->{wants_fill_rows} && !$r->{wants_fill_rows}) {
1731	326	100				453	if (@{ $r->{input_row} } < @{ $r->{input_fields} }) {
	326					538
	326					654
1732	3					4	splice @{ $r->{input_row} }, scalar(@{ $r->{input_row} }), 0, (("") x (@{ $r->{input_fields} } - @{ $r->{input_row} }));
	3					6
	3					7
	3					7
	3					10
1733							}
1734							}
1735
1736							# generate the hashref version of row if utility
1737							# requires it
1738	326	100				674	if ($r->{wants_input_row_as_hashref}) {
1739	34					77	$r->{input_row_as_hashref} = {};
1740	34					51	for my $j (0 .. $#{ $r->{input_row} }) {
	34					90
1741							# ignore extraneous data fields
1742	93	50				128	last if $j >= @{ $r->{input_fields} };
	93					159
1743	93					228	$r->{input_row_as_hashref}{ $r->{input_fields}[$j] } = $r->{input_row}[$j];
1744							}
1745							}
1746
1747	326	50				577	if ($on_input_data_row) {
1748	326	100				897	log_trace "[csvutil] Calling on_input_data_row hook handler (for first data row) ..." if $r->{input_rownum} <= 2;
1749	326					992	$on_input_data_row->($r);
1750
1751	321	100				1103	if (delete $r->{wants_skip_file}) {
		100
1752	2					11	log_trace "[csvutil] Handler wants to skip this file, moving on to the next file";
1753	2					9	next INPUT_FILENAME;
1754							} elsif (delete $r->{wants_skip_files}) {
1755	2					8	log_trace "[csvutil] Handler wants to skip all files, skipping all input files";
1756	2					30	last READ_CSV;
1757							}
1758							}
1759							}
1760
1761							} # while getline
1762
1763							# XXX actually close filehandle except stdin
1764
1765	107	50				362	if ($after_close_input_file) {
1766	0					0	log_trace "[csvutil] Calling after_close_input_file handler ...";
1767	0					0	$after_close_input_file->($r);
1768	0	0				0	if (delete $r->{wants_skip_files}) {
1769	0					0	log_trace "[csvutil] Handler wants to skip reading all file, skipping";
1770	0					0	last READ_CSV;
1771							}
1772							}
1773							} # for input_filename
1774
1775	93	100				224	if ($after_close_input_files) {
1776	32					113	log_trace "[csvutil] Calling after_close_input_files handler ...";
1777	32					123	$after_close_input_files->($r);
1778							}
1779
1780							} # READ_CSV
1781
1782							# cleanup stash from csv-reading-related keys
1783	96					408	delete $r->{input_filenames};
1784	96					161	delete $r->{input_filenum};
1785	96					166	delete $r->{input_filename};
1786	96					151	delete $r->{input_fh};
1787	96					138	delete $r->{input_rownum};
1788	96					139	delete $r->{input_data_rownum};
1789	96					129	delete $r->{input_row};
1790	96					145	delete $r->{input_row_as_hashref};
1791	96					136	delete $r->{input_fields};
1792	96					203	delete $r->{input_fields_idx};
1793	96					159	delete $r->{orig_input_fields_idx};
1794	96					740	delete $r->{code_getline};
1795	96					134	delete $r->{wants_input_row_as_hashref};
1796
1797	96	100				179	if ($after_read_input) {
1798	6					25	log_trace "[csvutil] Calling after_read_input handler ...";
1799	6					23	$after_read_input->($r);
1800							}
1801
1802							# cleanup stash from csv-outputting-related keys
1803	96					140	delete $r->{output_num_of_files};
1804	96					141	delete $r->{output_filenum};
1805	96	100				202	if ($r->{output_fh}) {
1806	70	100				196	if ($r->{output_filename} ne '-') {
1807	2					14	log_info "[csvutil] Closing output file '$r->{output_filename}' ...";
1808	2	50				118	close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"];
1809							}
1810	70					117	delete $r->{output_fh};
1811							}
1812	96	100				208	if ($r->{util_args}{inplace}) {
1813	2		50			11	my $output_filenum = $r->{output_filenum} // 0;
1814	2					3	my $i = -1;
1815	2					4	for my $output_filename (@{ $r->{output_filenames} }) {
	2					8
1816	2					4	$i++;
1817	2	50				5	last if $i > $output_filenum;
1818	2	50				21	(my $input_filename = $output_filename) =~ s/\.\w{5}\z//
1819							or die [500, "BUG: Can't get original input file '$output_filename'"];
1820	2	100				9	if (length(my $ext = $r->{util_args}{inplace_backup_ext})) {
1821	1					4	my $backup_filename = $input_filename . $ext;
1822	1					10	log_info "[csvutil] Backing up input file '$output_filename' -> '$backup_filename' ...";
1823	1	50				40	rename $input_filename, $backup_filename or die [500, "Can't rename '$input_filename' -> '$backup_filename': $!"];
1824							}
1825	2					16	log_info "[csvutil] Renaming from temporary output file '$output_filename' -> '$input_filename' ...";
1826	2	50				179	rename $output_filename, $input_filename or die [500, "Can't rename back '$output_filename' -> '$input_filename': $!"];
1827							}
1828							}
1829	96					181	delete $r->{output_filenames};
1830	96					134	delete $r->{output_filename};
1831	96					147	delete $r->{output_rownum};
1832	96					155	delete $r->{output_data_rownum};
1833	96					154	delete $r->{code_print};
1834	96					459	delete $r->{code_print_row};
1835	96					377	delete $r->{code_print_header_row};
1836	96					131	delete $r->{has_printed_header};
1837	96					120	delete $r->{wants_switch_to_next_output_file};
1838
1839	96	100				1479	if ($on_end) {
1840	11					40	log_trace "[csvutil] Calling on_end hook handler ...";
1841	11					41	$on_end->($r);
1842							}
1843
1844							}; # MAIN_EVAL
1845
1846	110					235	my $err = $@;
1847	110	100				214	if ($err) {
1848	14	50				39	$err = [500, $err] unless ref $err;
1849	14					65	return $err;
1850							}
1851
1852							RETURN_RESULT:
1853	96	100				283	if (!$r->{result}) {
		50
		50
1854	82					226	$r->{result} = [200];
1855							} elsif (!ref($r->{result})) {
1856	0					0	$r->{result} = [500, "BUG: Result (r->{result}) is set to a non-reference ($r->{result}), probably by one of the handlers"];
1857							} elsif (ref($r->{result}) ne 'ARRAY') {
1858	0					0	$r->{result} = [500, "BUG: Result (r->{result}) is not set to an enveloped result (arrayref) ($r->{result}), probably by one of the handlers"];
1859							}
1860	96					3125	$r->{result};
1861	24					246	};
1862							} # CREATE_CODE
1863
1864	24					40	my $meta;
1865							CREATE_META: {
1866
1867	24					125	$meta = {
1868							v => 1.1,
1869							summary => $summary,
1870							description => $description,
1871							args => {},
1872							args_rels => {},
1873							links => $links,
1874							examples => $examples,
1875							tags => $tags,
1876							};
1877
1878							CREATE_ARGS_PROP: {
1879	24	50				51	if ($add_args) {
	24					63
1880	24					142	$meta->{args}{$_} = $add_args->{$_} for keys %$add_args;
1881							}
1882
1883	24	50				69	if ($reads_csv) {
1884	24					85	$meta->{args}{$_} = {%{$argspecs_csv_input{$_}}} for keys %argspecs_csv_input;
	120					533
1885
1886	24	100				62	if ($reads_multiple_csv) {
1887	3					10	$meta->{args}{input_filenames} = {%{$argspecopt_input_filenames{input_filenames}}};
	3					31
1888	3					16	_add_arg_pos($meta->{args}, 'input_filenames', 'slurpy');
1889	3					10	push @$tags, 'reads-multiple-csv';
1890							} else {
1891	21					34	$meta->{args}{input_filename} = {%{$argspecopt_input_filename{input_filename}}};
	21					106
1892	21					98	_add_arg_pos($meta->{args}, 'input_filename');
1893							}
1894
1895	24					49	push @$tags, 'reads-csv';
1896							} # if reads_csv
1897
1898	24	100				56	if ($writes_csv) {
1899	16					74	$meta->{args}{$_} = {%{$argspecs_csv_output{$_}}} for keys %argspecs_csv_output;
	112					486
1900
1901	16	50				50	if ($reads_csv) {
1902	16					48	$meta->{args}{$_} = {%{$argspecsopt_inplace{$_}}} for keys %argspecsopt_inplace;
	32					166
1903	16		50			88	$meta->{args_rels}{'dep_all&'} //= [];
1904	16					23	push @{ $meta->{args_rels}{'dep_all&'} }, ['inplace_backup_ext', ['inplace']];
	16					56
1905	16		50			82	$meta->{args_rels}{'choose_one&'} //= [];
1906	16					25	push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filename'];
	16					49
1907	16					21	push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filenames'];
	16					41
1908							}
1909
1910	16	50				45	if ($writes_multiple_csv) {
1911	0					0	$meta->{args}{output_filenames} = {%{$argspecopt_output_filenames{output_filenames}}};
	0					0
1912	0					0	_add_arg_pos($meta->{args}, 'output_filenames', 'slurpy');
1913	0	0				0	if ($reads_csv) {
1914	0		0			0	$meta->{args_rels}{'choose_one&'} //= [];
1915	0					0	push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filenames inplace/];
	0					0
1916							}
1917	0					0	push @$tags, 'writes-multiple-csv';
1918							} else {
1919	16					23	$meta->{args}{output_filename} = {%{$argspecopt_output_filename{output_filename}}};
	16					76
1920	16					48	_add_arg_pos($meta->{args}, 'output_filename');
1921	16	50				49	if ($reads_csv) {
1922	16		50			46	$meta->{args_rels}{'choose_one&'} //= [];
1923	16					22	push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filename inplace/];
	16					46
1924							}
1925							}
1926
1927	16					27	$meta->{args}{overwrite} = {%{$argspecopt_overwrite{overwrite}}};
	16					84
1928	16		50			86	$meta->{args_rels}{'dep_any&'} //= [];
1929	16					32	push @{ $meta->{args_rels}{'dep_any&'} }, ['overwrite', ['output_filename', 'output_filenames']];
	16					47
1930
1931	16					39	push @$tags, 'writes-csv';
1932							} # if writes csv
1933
1934							} # CREATE_ARGS_PROP
1935
1936							CREATE_ARGS_RELS_PROP: {
1937	24					72	$meta->{args_rels} = {};
	24					78
1938	24	100				54	if ($add_args_rels) {
1939	3					19	$meta->{args_rels}{$_} = $add_args_rels->{$_} for keys %$add_args_rels;
1940							}
1941							} # CREATE_ARGS_RELS_PROP
1942
1943	24	50				54	if ($add_meta_props) {
1944	0					0	$meta->{$_} = $add_meta_props->{$_} for keys %$add_meta_props;
1945							}
1946
1947							} # CREATE_META
1948
1949							{
1950	24					39	my $package = caller();
	24					30
	24					83
1951	1			1		8	no strict 'refs'; ## no critic: TestingAndDebugging::ProhibitNoStrict
	1					2
	1					347
1952	24					502	*{"$package\::$name"} = $code;
	24					146
1953							#use DD; dd $meta;
1954	24					37	${"$package\::SPEC"}{$name} = $meta;
	24					160
1955							}
1956
1957	24					78	1;
1958							}
1959
1960							1;
1961							# ABSTRACT: CLI utilities related to CSV
1962
1963							__END__
1964
1965							=pod
1966
1967							=encoding UTF-8
1968
1969							=head1 NAME
1970
1971							App::CSVUtils - CLI utilities related to CSV
1972
1973							=head1 VERSION
1974
1975							This document describes version 1.023 of App::CSVUtils (from Perl distribution App-CSVUtils), released on 2023-03-31.
1976
1977							=head1 DESCRIPTION
1978
1979							This distribution contains the following CLI utilities:
1980
1981							=over
1982
1983							=item 1. L<csv-add-fields>
1984
1985							=item 2. L<csv-avg>
1986
1987							=item 3. L<csv-check-cell-values>
1988
1989							=item 4. L<csv-check-field-names>
1990
1991							=item 5. L<csv-check-field-values>
1992
1993							=item 6. L<csv-check-rows>
1994
1995							=item 7. L<csv-cmp>
1996
1997							=item 8. L<csv-concat>
1998
1999							=item 9. L<csv-convert-to-hash>
2000
2001							=item 10. L<csv-csv>
2002
2003							=item 11. L<csv-delete-fields>
2004
2005							=item 12. L<csv-dump>
2006
2007							=item 13. L<csv-each-row>
2008
2009							=item 14. L<csv-fill-template>
2010
2011							=item 15. L<csv-find-values>
2012
2013							=item 16. L<csv-freqtable>
2014
2015							=item 17. L<csv-gen>
2016
2017							=item 18. L<csv-get-cells>
2018
2019							=item 19. L<csv-grep>
2020
2021							=item 20. L<csv-info>
2022
2023							=item 21. L<csv-intrange>
2024
2025							=item 22. L<csv-list-field-names>
2026
2027							=item 23. L<csv-lookup-fields>
2028
2029							=item 24. L<csv-ltrim>
2030
2031							=item 25. L<csv-map>
2032
2033							=item 26. L<csv-munge-field>
2034
2035							=item 27. L<csv-munge-rows>
2036
2037							=item 28. L<csv-pick>
2038
2039							=item 29. L<csv-pick-fields>
2040
2041							=item 30. L<csv-pick-rows>
2042
2043							=item 31. L<csv-quote>
2044
2045							=item 32. L<csv-replace-newline>
2046
2047							=item 33. L<csv-rtrim>
2048
2049							=item 34. L<csv-select-fields>
2050
2051							=item 35. L<csv-select-rows>
2052
2053							=item 36. L<csv-setop>
2054
2055							=item 37. L<csv-shuf>
2056
2057							=item 38. L<csv-shuf-fields>
2058
2059							=item 39. L<csv-shuf-rows>
2060
2061							=item 40. L<csv-sort>
2062
2063							=item 41. L<csv-sort-fields>
2064
2065							=item 42. L<csv-sort-rows>
2066
2067							=item 43. L<csv-sorted>
2068
2069							=item 44. L<csv-sorted-fields>
2070
2071							=item 45. L<csv-sorted-rows>
2072
2073							=item 46. L<csv-split>
2074
2075							=item 47. L<csv-sum>
2076
2077							=item 48. L<csv-transpose>
2078
2079							=item 49. L<csv-trim>
2080
2081							=item 50. L<csv-uniq>
2082
2083							=item 51. L<csv-unquote>
2084
2085							=item 52. L<csv2ltsv>
2086
2087							=item 53. L<csv2paras>
2088
2089							=item 54. L<csv2td>
2090
2091							=item 55. L<csv2tsv>
2092
2093							=item 56. L<csv2vcf>
2094
2095							=item 57. L<list-csvutils>
2096
2097							=item 58. L<paras2csv>
2098
2099							=item 59. L<tsv2csv>
2100
2101							=back
2102
2103							=head1 FUNCTIONS
2104
2105
2106							=head2 gen_csv_util
2107
2108							Usage:
2109
2110							gen_csv_util(%args) -> bool
2111
2112							Generate a CSV utility.
2113
2114							This routine is used to generate a CSV utility in the form of a L<Rinci>
2115							function (code and metadata). You can then produce a CLI from the Rinci function
2116							simply using L<Perinci::CmdLine::Gen> or, if you use L<Dist::Zilla>,
2117							L<Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line,
2118							L<gen-pericmd-script>.
2119
2120							Using this routine, by providing just one or a few hooks and setting some
2121							parameters like a couple of extra arguments, you will get a complete CLI with
2122							decent POD/manpage, ability to read one or multiple CSV's and write one or
2123							multiple CSV's, some command-line options to customize how the input CSV's
2124							should be parsed and how the output CSV's should be formatted and named. Your
2125							CLI also has tab completion, usage and help message, and other features.
2126
2127							To create a CSV utility, you specify a C<name> (e.g. C<csv_dump>; must be a valid
2128							unqualified Perl identifier/function name) and optionally C<summary>,
2129							C<description>, and other metadata like C<links> or even C<add_meta_props>. Then
2130							you specify one or more of C<on_> or C<before_> or C<after_*> arguments to supply
2131							handlers (coderefs) for your CSV utility at various hook points.
2132
2133							I<THE HOOKS>
2134
2135							All code for hooks should accept a single argument C<r>. C<r> is a stash (hashref)
2136							of various data, the keys of which will depend on which hook point being called.
2137							You can also add more keys to store data or for flow control (see hook
2138							documentation below for more details).
2139
2140							The order of the hooks, in processing chronological order:
2141
2142							=over
2143
2144							=item * on_begin
2145
2146							Called when utility begins, before reading CSV. You can use this hook e.g. to
2147							process arguments, set output filenames (if you allow custom output
2148							filenames).
2149
2150							=item * before_read_input
2151
2152							Called before opening any input CSV file. This hook is I<still> called even if
2153							your utility sets C<reads_csv> to false.
2154
2155							At this point, the C<input_filenames> stash key (as well as other keys like
2156							C<input_filename>, C<input_filenum>, etc) has not been set. You can use this
2157							hook e.g. to set a custom C<input_filenames>.
2158
2159							=item * before_open_input_files
2160
2161							Called before an input CSV file is about to be opened, including for stdin
2162							(C<->). You can use this hook e.g. to check/preprocess input file. Flow control
2163							is available by setting C<< $r-E<gt>{wants_skip_files} >> to skip reading all the input
2164							file and go directly to the C<after_read_input> hook.
2165
2166							=item * before_open_input_file
2167
2168							Called before an input CSV file is about to be opened, including for stdin
2169							(C<->). For the first file, called after C<before_open_input_file> hook. You can
2170							use this hook e.g. to check/preprocess input file. Flow control is available
2171							by setting C<< $r-E<gt>{wants_skip_file} >> to skip reading a single input file and go
2172							to the next file, or C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the
2173							files and go directly to the C<after_read_input> hook.
2174
2175							=item * on_input_header_row
2176
2177							Called when receiving header row. Will be called for every input file, and
2178							called even when user specify C<--no-input-header>, in which case the header
2179							row will be the generated C<["field1", "field2", ...]>. You can use this hook
2180							e.g. to add/remove/rearrange fields.
2181
2182							You can set C<< $r-E<gt>{wants_fill_rows} >> to a defined false if you do not want
2183							C<< $r-E<gt>{input_rows} >> to be filled with empty string elements when it contains
2184							less than the number of fields (in case of sparse values at the end). Normally
2185							you only want to do this when you want to do checking, e.g. in
2186							L<csv-check-rows>.
2187
2188							=item * on_input_data_row
2189
2190							Called when receiving each data row. You can use this hook e.g. to modify the
2191							row or print output (for line-by-line transformation or filtering).
2192
2193							=item * after_close_input_file
2194
2195							Called after each input file is closed, including for stdin (C<->) (although
2196							for stdin, the handle is not actually closed). Flow control is possible by
2197							setting C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the files and go
2198							straight to the C<after_close_input_files> hook.
2199
2200							=item * after_close_input_files
2201
2202							Called after the last input file is closed, after the last
2203							C<after_close_input_file> hook, including for stdin (C<->) (although for stdin,
2204							the handle is not actually closed).
2205
2206							=item * after_read_input
2207
2208							Called after the last row of the last CSV file is read and the last file is
2209							closed. This hook is I<still> called, if you set C<reads_csv> option to false.
2210							At this point the stash keys related to CSV reading have all been cleared,
2211							including C<input_filenames>, C<input_filename>, C<input_fh>, etc.
2212
2213							You can use this hook e.g. to print output if you buffer the output.
2214
2215							=item * on_end
2216
2217							Called when utility is about to exit. You can use this hook e.g. to return the
2218							final result.
2219
2220							=back
2221
2222							I<THE STASH>
2223
2224							The common keys that C<r> will contain:
2225
2226							=over
2227
2228							=item * C<gen_args>, hash. The arguments used to generate the CSV utility.
2229
2230							=item * C<util_args>, hash. The arguments that your CSV utility accepts. Parsed from
2231							command-line arguments (or configuration files, or environment variables).
2232
2233							=item * C<name>, str. The name of the CSV utility. Which can also be retrieved via
2234							C<gen_args>.
2235
2236							=item * C<code_print>, coderef. Routine provided for you to print something. Accepts a
2237							string. Takes care of opening the output files for you.
2238
2239							=item * C<code_print_row>, coderef. Routine provided for you to print a data row. You
2240							pass the row (either arrayref or hashref). Takes care of opening the output
2241							files for you, as well as printing header row the first time, if needed.
2242
2243							=item * C<code_print_header_row>, coderef. Routine provided for you to print header
2244							row. You don't need to pass any arguments. Will only print the header row once
2245							per output file if output header is enabled, even if called multiple times.
2246
2247							=back
2248
2249							If you are accepting CSV data (C<reads_csv> gen argument set to true), the
2250							following keys will also be available (in C<on_input_header_row> and
2251							C<on_input_data_row> hooks):
2252
2253							=over
2254
2255							=item * C<input_parser>, a L<Text::CSV_XS> instance for input parsing.
2256
2257							=item * C<input_filenames>, array of str.
2258
2259							=item * C<input_filename>, str. The name of the current input file being read (C<-> if
2260							reading from stdin).
2261
2262							=item * C<input_filenum>, uint. The number of the current input file, 1 being the first
2263							file, 2 for the second, and so on.
2264
2265							=item * C<input_fh>, the handle to the current file being read.
2266
2267							=item * C<input_rownum>, uint. The number of rows that have been read (reset after each
2268							input file). In C<on_input_header_row> phase, this will be 1 since header row
2269							(including the generated one) is the first row. Then in C<on_input_data_row>
2270							phase (called the first time for a file), it will be 2 for the first data row,
2271							even if physically it is the first row for CSV file that does not have a
2272							header.
2273
2274							=item * C<input_data_rownum>, uint. The number of data rows that have been read (reset
2275							after each input file). This will be equal to C<input_rownum> less 1 if input
2276							file has header.
2277
2278							=item * C<input_row>, aos (array of str). The current input CSV row as an arrayref.
2279
2280							=item * C<input_row_as_hashref>, hos (hash of str). The current input CSV row as a
2281							hashref, with field names as hash keys and field values as hash values. This
2282							will only be calculated if utility wants it. Utility can express so by setting
2283							C<< $r-E<gt>{wants_input_row_as_hashref} >> to true, e.g. in the C<on_begin> hook.
2284
2285							=item * C<input_header_row_count>, uint. Contains the number of actual header rows that
2286							have been read. If CLI user specifies C<--no-input-header>, this will stay at
2287							zero. Will be reset for each CSV file.
2288
2289							=item * C<input_data_row_count>, int. Contains the number of actual data rows that have
2290							read. Will be reset for each CSV file.
2291
2292							=back
2293
2294							If you are outputting CSV (C<writes_csv> gen argument set to true), the following
2295							keys will be available:
2296
2297							=over
2298
2299							=item * C<output_emitter>, a L<Text::CSV_XS> instance for output.
2300
2301							=item * C<output_filenames>, array of str.
2302
2303							=item * C<output_filename>, str, name of current output file.
2304
2305							=item * C<output_filenum>, uint, the number of the current output file, 1 being the
2306							first file, 2 for the second, and so on.
2307
2308							=item * C<output_fh>, handle to the current output file.
2309
2310							=item * C<output_rownum>, uint. The number of rows that have been outputted (reset
2311							after each output file).
2312
2313							=item * C<output_data_rownum>, uint. The number of data rows that have been outputted
2314							(reset after each output file). This will be equal to C<input_rownum> less 1 if
2315							input file has header.
2316
2317							=back
2318
2319							For other hook-specific keys, see the documentation for associated hook point.
2320
2321							I<ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS>
2322
2323							As mentioned above, you will get additional command-line options/arguments in
2324							C<< $r-E<gt>{util_args} >> hashref. Some options/arguments are already added by
2325							C<gen_csv_util>, e.g. C<input_filename> or C<input_filenames> along with
2326							C<input_sep_char>, etc (when your utility declares C<reads_csv>),
2327							C<output_filename> or C<output_filenames> along with C<overwrite>,
2328							C<output_sep_char>, etc (when your utility declares C<writes_csv>).
2329
2330							If you want to accept additional arguments/options, you specify them in
2331							C<add_args> (hashref, with key being Each option/argument has to be specified
2332							first via C<add_args> (as hashref, with key being argument name and value the
2333							argument specification as defined in L<Rinci::function>)). Some argument
2334							specifications have been defined in L<App::CSVUtils> and can be used. See
2335							existing utilities for examples.
2336
2337							I<READING CSV DATA>
2338
2339							To read CSV data, normally your utility would provide handler for the
2340							C<on_input_data_row> hook and sometimes additionally C<on_input_header_row>.
2341
2342							I<OUTPUTTING STRING OR RETURNING RESULT>
2343
2344							To output string, usually you call the provided routine C<< $r-E<gt>{code_print} >>. This
2345							routine will open the output files for you.
2346
2347							You can also return enveloped result directly by setting C<< $r-E<gt>{result} >>.
2348
2349							I<OUTPUTTING CSV DATA>
2350
2351							To output CSV data, usually you call the provided routine C<< $r-E<gt>{code_print_row} >>.
2352							This routine accepts a row (arrayref or hashref). This routine will open the
2353							output files for you when needed, as well as print header row automatically.
2354
2355							You can also buffer rows from input to e.g. C<< $r-E<gt>{output_rows} >>, then call
2356							C<< $r-E<gt>{code_print_row} >> repeatedly in the C<after_read_input> hook to print all the
2357							rows.
2358
2359							I<READING MULTIPLE CSV FILES>
2360
2361							To read multiple CSV files, you first specify C<reads_multiple_csv>. Then, you
2362							can supply handler for C<on_input_header_row> and C<on_input_data_row> as usual.
2363							If you want to do something before/after each input file, you can also supply
2364							handler for C<before_open_input_file> or C<after_close_input_file>.
2365
2366							I<WRITING TO MULTIPLE CSV FILES>
2367
2368							Similarly, to write to many CSv files, you first specify C<writes_multiple_csv>.
2369							Then, you can supply handler for C<on_input_header_row> and C<on_input_data_row>
2370							as usual. To switch to the next file, set
2371							C<< $r-E<gt>{wants_switch_to_next_output_file} >> to true, in which case the next call to
2372							C<< $r-E<gt>{code_print_row} >> will close the current file and open the next file.
2373
2374							I<CHANGING THE OUTPUT FIELDS>
2375
2376							When calling C<< $r-E<gt>{code_print_row} >>, you can output whatever fields you want. By
2377							convention, you can set C<< $r-E<gt>{output_fields} >> and C<< $r-E<gt>{output_fields_idx} >> to
2378							let other handlers know about the output fields. For example, see the
2379							implementation of L<csv-concat>.
2380
2381							This function is not exported by default, but exportable.
2382
2383							Arguments ('*' denotes required arguments):
2384
2385							=over 4
2386
2387							=item * B<add_args> => I<hash>
2388
2389							(No description)
2390
2391							=item * B<add_args_rels> => I<hash>
2392
2393							(No description)
2394
2395							=item * B<add_meta_props> => I<hash>
2396
2397							Add additional Rinci function metadata properties.
2398
2399							=item * B<after_close_input_file> => I<code>
2400
2401							(No description)
2402
2403							=item * B<after_close_input_files> => I<code>
2404
2405							(No description)
2406
2407							=item * B<after_read_input> => I<code>
2408
2409							(No description)
2410
2411							=item * B<before_open_input_file> => I<code>
2412
2413							(No description)
2414
2415							=item * B<before_open_input_files> => I<code>
2416
2417							(No description)
2418
2419							=item * B<before_read_input> => I<code>
2420
2421							(No description)
2422
2423							=item * B<description> => I<str>
2424
2425							(No description)
2426
2427							=item * B<examples> => I<array>
2428
2429							(No description)
2430
2431							=item * B<links> => I<array[hash]>
2432
2433							(No description)
2434
2435							=item * B<name>* => I<perl::identifier::unqualified_ascii>
2436
2437							(No description)
2438
2439							=item * B<on_begin> => I<code>
2440
2441							(No description)
2442
2443							=item * B<on_end> => I<code>
2444
2445							(No description)
2446
2447							=item * B<on_input_data_row> => I<code>
2448
2449							(No description)
2450
2451							=item * B<on_input_header_row> => I<code>
2452
2453							(No description)
2454
2455							=item * B<reads_csv> => I<bool> (default: 1)
2456
2457							Whether utility reads CSV data.
2458
2459							=item * B<reads_multiple_csv> => I<bool>
2460
2461							Whether utility accepts CSV data.
2462
2463							Setting this option to true will implicitly set the C<reads_csv> option to true,
2464							obviously.
2465
2466							=item * B<summary> => I<str>
2467
2468							(No description)
2469
2470							=item * B<writes_csv> => I<bool> (default: 1)
2471
2472							Whether utility writes CSV data.
2473
2474							=item * B<writes_multiple_csv> => I<bool>
2475
2476							Whether utility outputs CSV data.
2477
2478							Setting this option to true will implicitly set the C<writes_csv> option to true,
2479							obviously.
2480
2481
2482							=back
2483
2484							Return value: (bool)
2485
2486
2487							=head2 compile_eval_code
2488
2489							Usage:
2490
2491							$coderef = compile_eval_code($str, $label);
2492
2493							Compile string code C<$str> to coderef in 'main' package, without C<use strict>
2494							or C<use warnings>. Die on compile error.
2495
2496							=head2 eval_code
2497
2498							Usage:
2499
2500							$res = eval_code($coderef, $r, $topic_var_value, $return_topic_var);
2501
2502							=for Pod::Coverage ^(csvutil)$
2503
2504							=head1 FAQ
2505
2506							=head2 My CSV does not have a header?
2507
2508							Use the C<--no-header> option. Fields will be named C<field1>, C<field2>, and so
2509							on.
2510
2511							=head2 My data is TSV, not CSV?
2512
2513							Use the C<--tsv> option.
2514
2515							=head2 I have a big CSV and the utilities are too slow or eat too much RAM!
2516
2517							These utilities are not (yet) optimized, patches welcome. If your CSV is very
2518							big, perhaps a C-based solution is what you need.
2519
2520							=head1 HOMEPAGE
2521
2522							Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
2523
2524							=head1 SOURCE
2525
2526							Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
2527
2528							=head1 SEE ALSO
2529
2530							=head2 Similar CLI bundles for other format
2531
2532							L<App::TSVUtils>, L<App::LTSVUtils>, L<App::SerializeUtils>.
2533
2534							=head2 Other CSV-related utilities
2535
2536							L<xls2csv> and L<xlsx2csv> from L<Spreadsheet::Read>
2537
2538							L<import-csv-to-sqlite> from L<App::SQLiteUtils>
2539
2540							Query CSV with SQL using L<fsql> from L<App::fsql>
2541
2542							L<csvgrep> from L<csvgrep>
2543
2544							=head2 Other non-Perl-based CSV utilities
2545
2546							=head3 Python
2547
2548							B<csvkit>, L<https://csvkit.readthedocs.io/en/latest/>
2549
2550							=head1 AUTHOR
2551
2552							perlancar <perlancar@cpan.org>
2553
2554							=head1 CONTRIBUTOR
2555
2556							=for stopwords Adam Hopkins
2557
2558							Adam Hopkins <violapiratejunky@gmail.com>
2559
2560							=head1 CONTRIBUTING
2561
2562
2563							To contribute, you can send patches by email/via RT, or send pull requests on
2564							GitHub.
2565
2566							Most of the time, you don't need to build the distribution yourself. You can
2567							simply modify the code, then test via:
2568
2569							% prove -l
2570
2571							If you want to build the distribution (e.g. to try to install it locally on your
2572							system), you can install L<Dist::Zilla>,
2573							L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
2574							L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
2575							Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
2576							that are considered a bug and can be reported to me.
2577
2578							=head1 COPYRIGHT AND LICENSE
2579
2580							This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
2581
2582							This is free software; you can redistribute it and/or modify it under
2583							the same terms as the Perl 5 programming language system itself.
2584
2585							=head1 BUGS
2586
2587							Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
2588
2589							When submitting a bug or request, please include a test-file or a
2590							patch to an existing test-file that illustrates the bug or desired
2591							feature.
2592
2593							=cut