File Coverage

blib/lib/App/CSVUtils/csv_add_fields.pm

Criterion	Covered	Total	%
statement	11	11	100.0
branch			n/a
condition			n/a
subroutine	4	4	100.0
pod			n/a
total	15	15	100.0

line	stmt	sub	time	code
1				package App::CSVUtils::csv_add_fields;
2
3	1	1	5104	use 5.010001;
	1		4
4	1	1	5	use strict;
	1		2
	1		23
5	1	1	4	use warnings;
	1		2
	1		71
6
7				our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8				our $DATE = '2023-04-01'; # DATE
9				our $DIST = 'App-CSVUtils'; # DIST
10				our $VERSION = '1.024'; # VERSION
11
12	1		860	use App::CSVUtils qw(
13				gen_csv_util
14				compile_eval_code
15				eval_code
16	1	1	5	);
	1		3
17
18				gen_csv_util(
19				name => 'csv_add_fields',
20				summary => 'Add one or more fields to CSV file',
21				description => <<'_',
22
23				The new fields by default will be added at the end, unless you specify one of
24				`--after` (to put after a certain field), `--before` (to put before a certain
25				field), or `--at` (to put at specific position, 1 means the first field). The
26				new fields will be clustered together though, you currently cannot set the
27				position of each new field. But you can later reorder fields using
28				<prog:csv-sort-fields>.
29
30				If supplied, your Perl code (`-e`) will be called for each row (excluding the
31				header row) and should return the value for the new fields (either as a list or
32				as an arrayref). `$_` contains the current row (as arrayref, or if you specify
33				`-H`, as a hashref). `$main::row` is available and contains the current row
34				(always as an arrayref). `$main::rownum` contains the row number (2 means the
35				first data row). `$csv` is the <pm:Text::CSV_XS> object. `$main::fields_idx` is
36				also available for additional information.
37
38				If `-e` is not supplied, the new fields will be getting the default value of
39				empty string (`''`).
40
41				_
42				add_args => {
43				%App::CSVUtils::argspec_fields_1plus_nocomp,
44				%App::CSVUtils::argspecopt_eval,
45				%App::CSVUtils::argspecopt_hash,
46				after => {
47				summary => 'Put the new field(s) after specified field',
48				schema => 'str*',
49				completion => \&_complete_field,
50				},
51				before => {
52				summary => 'Put the new field(s) before specified field',
53				schema => 'str*',
54				completion => \&_complete_field,
55				},
56				at => {
57				summary => 'Put the new field(s) at specific position '.
58				'(1 means at the front of all others)',
59				schema => 'posint*',
60				},
61				},
62				add_args_rels => {
63				choose_one => [qw/after before at/],
64				},
65				tags => ['category:munging','adds-fields'],
66
67				examples => [
68				{
69				summary => 'Add a few new blank fields at the end',
70				argv => ['file.csv', 'field4', 'field6', 'field5'],
71				test => 0,
72				'x.doc.show_result' => 0,
73				},
74				{
75				summary => 'Add a few new blank fields after a certain field',
76				argv => ['file.csv', 'field4', 'field6', 'field5', '--after', 'field2'],
77				test => 0,
78				'x.doc.show_result' => 0,
79				},
80				{
81				summary => 'Add a new field and set its value',
82				argv => ['file.csv', 'after_tax', '-e', '$main::row->[5] * 1.11'],
83				test => 0,
84				'x.doc.show_result' => 0,
85				},
86				{
87				summary => 'Add a couple new fields and set their values',
88				argv => ['file.csv', 'tax_rate', 'after_tax', '-e', '(0.11, $main::row->[5] * 1.11)'],
89				test => 0,
90				'x.doc.show_result' => 0,
91				},
92				],
93
94				on_begin => sub {
95				my $r = shift;
96
97				# check arguments
98				if (!defined($r->{util_args}{fields}) \|\| !@{ $r->{util_args}{fields} }) {
99				die [400, "Please specify one or more fields (-f)"];
100				}
101				},
102
103				on_input_header_row => sub {
104				my $r = shift;
105
106				# check that the new fields are not duplicate (against existing fields
107				# and against itself)
108				my %seen;
109				for (@{ $r->{util_args}{fields} }) {
110				unless (length $_) {
111				die [400, "New field name cannot be empty"];
112				}
113				if (defined $r->{input_fields_idx}{$_}) {
114				die [412, "Field '$_' already exists"];
115				}
116				if ($seen{$_}++) {
117				die [412, "Duplicate new field '$_'"];
118				}
119				}
120
121				# determine the position at which to insert the new fields
122				my $new_fields_idx;
123				if (defined $r->{util_args}{at}) {
124				$new_fields_idx = $r->{util_args}{at}-1;
125				} elsif (defined $r->{util_args}{before}) {
126				for (0..$#{ $r->{input_fields} }) {
127				if ($r->{input_fields}[$_] eq $r->{util_args}{before}) {
128				$new_fields_idx = $_;
129				last;
130				}
131				}
132				die [400, "Field '$r->{util_args}{before}' (to add new fields before) not found"]
133				unless defined $new_fields_idx;
134				} elsif (defined $r->{util_args}{after}) {
135				for (0..$#{ $r->{input_fields} }) {
136				if ($r->{input_fields}[$_] eq $r->{util_args}{after}) {
137				$new_fields_idx = $_+1;
138				last;
139				}
140				}
141				die [400, "Field '$r->{util_args}{after}' (to add new fields after) not found"]
142				unless defined $new_fields_idx;
143				} else {
144				$new_fields_idx = @{ $r->{input_fields} };
145				}
146
147				# for printing the header
148				$r->{output_fields} = [@{ $r->{input_fields} }];
149				splice @{ $r->{output_fields} }, $new_fields_idx, 0, @{ $r->{util_args}{fields} };
150
151				$r->{wants_input_row_as_hashref} = 1 if $r->{util_args}{hash};
152
153				# we add the following keys to the stash
154				$r->{code} = compile_eval_code($r->{util_args}{eval} // 'return', 'eval');
155				$r->{new_fields_idx} = $new_fields_idx;
156				},
157
158				on_input_data_row => sub {
159				my $r = shift;
160
161				my @vals;
162				eval { @vals = eval_code($r->{code}, $r, $r->{wants_input_row_as_hashref} ? $r->{input_row_as_hashref} : $r->{input_row}) };
163				die [500, "Error while adding field(s) '".join(",", @{$r->{util_args}{fields}})."' for row #$r->{input_rownum}: $@"]
164				if $@;
165				if (ref $vals[0] eq 'ARRAY') { @vals = @{ $vals[0] } }
166				splice @{ $r->{input_row} }, $r->{new_fields_idx}, 0,
167				(map { $_ // '' } @vals[0 .. $#{$r->{util_args}{fields}}]);
168				$r->{code_print_row}->($r->{input_row});
169				},
170				);
171
172				1;
173				# ABSTRACT: Add one or more fields to CSV file
174
175				__END__
176
177				=pod
178
179				=encoding UTF-8
180
181				=head1 NAME
182
183				App::CSVUtils::csv_add_fields - Add one or more fields to CSV file
184
185				=head1 VERSION
186
187				This document describes version 1.024 of App::CSVUtils::csv_add_fields (from Perl distribution App-CSVUtils), released on 2023-04-01.
188
189				=head1 FUNCTIONS
190
191
192				=head2 csv_add_fields
193
194				Usage:
195
196				csv_add_fields(%args) -> [$status_code, $reason, $payload, \%result_meta]
197
198				Add one or more fields to CSV file.
199
200				Examples:
201
202				=over
203
204				=item * Add a few new blank fields at the end:
205
206				csv_add_fields(
207				input_filename => "file.csv",
208				fields => ["field4", "field6", "field5"]
209				);
210
211				=item * Add a few new blank fields after a certain field:
212
213				csv_add_fields(
214				input_filename => "file.csv",
215				fields => ["field4", "field6", "field5"],
216				after => "field2"
217				);
218
219				=item * Add a new field and set its value:
220
221				csv_add_fields(
222				input_filename => "file.csv",
223				fields => ["after_tax"],
224				eval => "\$main::row->[5] * 1.11"
225				);
226
227				=item * Add a couple new fields and set their values:
228
229				csv_add_fields(
230				input_filename => "file.csv",
231				fields => ["tax_rate", "after_tax"],
232				eval => "(0.11, \$main::row->[5] * 1.11)"
233				);
234
235				=back
236
237				The new fields by default will be added at the end, unless you specify one of
238				C<--after> (to put after a certain field), C<--before> (to put before a certain
239				field), or C<--at> (to put at specific position, 1 means the first field). The
240				new fields will be clustered together though, you currently cannot set the
241				position of each new field. But you can later reorder fields using
242				L<csv-sort-fields>.
243
244				If supplied, your Perl code (C<-e>) will be called for each row (excluding the
245				header row) and should return the value for the new fields (either as a list or
246				as an arrayref). C<$_> contains the current row (as arrayref, or if you specify
247				C<-H>, as a hashref). C<$main::row> is available and contains the current row
248				(always as an arrayref). C<$main::rownum> contains the row number (2 means the
249				first data row). C<$csv> is the L<Text::CSV_XS> object. C<$main::fields_idx> is
250				also available for additional information.
251
252				If C<-e> is not supplied, the new fields will be getting the default value of
253				empty string (C<''>).
254
255				This function is not exported.
256
257				Arguments ('*' denotes required arguments):
258
259				=over 4
260
261				=item * B<after> => I<str>
262
263				Put the new field(s) after specified field.
264
265				=item * B<at> => I<posint>
266
267				Put the new field(s) at specific position (1 means at the front of all others).
268
269				=item * B<before> => I<str>
270
271				Put the new field(s) before specified field.
272
273				=item * B<eval> => I<str\|code>
274
275				Perl code.
276
277				=item * B<fields>* => I<array[str]>
278
279				Field names.
280
281				=item * B<hash> => I<bool>
282
283				Provide row in $_ as hashref instead of arrayref.
284
285				=item * B<inplace> => I<true>
286
287				Output to the same file as input.
288
289				Normally, you output to a different file than input. If you try to output to the
290				same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
291				prevents you from doing it. However, with this C<--inplace> option, you can
292				output to the same file. Like perl's C<-i> option, this will first output to a
293				temporary file in the same directory as the input file then rename to the final
294				file at the end. You cannot specify output file (C<-o>) when using this option,
295				but you can specify backup extension with C<-b> option.
296
297				Some caveats:
298
299				=over
300
301				=item * if input file is a symbolic link, it will be replaced with a regular file;
302
303				=item * renaming (implemented using C<rename()>) can fail if input filename is too long;
304
305				=item * value specified in C<-b> is currently not checked for acceptable characters;
306
307				=item * things can also fail if permissions are restrictive;
308
309				=back
310
311				=item * B<inplace_backup_ext> => I<str> (default: "")
312
313				Extension to add for backup of input file.
314
315				In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
316				rename the input file using this extension as a backup. The old existing backup
317				will be overwritten, if any.
318
319				=item * B<input_escape_char> => I<str>
320
321				Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
322
323				Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
324
325				=item * B<input_filename> => I<filename> (default: "-")
326
327				Input CSV file.
328
329				Use C<-> to read from stdin.
330
331				Encoding of input file is assumed to be UTF-8.
332
333				=item * B<input_header> => I<bool> (default: 1)
334
335				Specify whether input CSV has a header row.
336
337				By default, the first row of the input CSV will be assumed to contain field
338				names (and the second row contains the first data row). When you declare that
339				input CSV does not have header row (C<--no-input-header>), the first row of the
340				CSV is assumed to contain the first data row. Fields will be named C<field1>,
341				C<field2>, and so on.
342
343				=item * B<input_quote_char> => I<str>
344
345				Specify field quote character in input CSV, will be passed to Text::CSV_XS.
346
347				Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
348
349				=item * B<input_sep_char> => I<str>
350
351				Specify field separator character in input CSV, will be passed to Text::CSV_XS.
352
353				Defaults to C<,> (comma). Overrides C<--input-tsv> option.
354
355				=item * B<input_tsv> => I<true>
356
357				Inform that input file is in TSV (tab-separated) format instead of CSV.
358
359				Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
360				options. If one of those options is specified, then C<--input-tsv> will be
361				ignored.
362
363				=item * B<output_always_quote> => I<bool> (default: 0)
364
365				Whether to always quote values.
366
367				When set to false (the default), values are quoted only when necessary:
368
369				field1,field2,"field three contains comma (,)",field4
370
371				When set to true, then all values will be quoted:
372
373				"field1","field2","field three contains comma (,)","field4"
374
375				=item * B<output_escape_char> => I<str>
376
377				Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
378
379				This is like C<--input-escape-char> option but for output instead of input.
380
381				Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
382
383				=item * B<output_filename> => I<filename>
384
385				Output filename.
386
387				Use C<-> to output to stdout (the default if you don't specify this option).
388
389				Encoding of output file is assumed to be UTF-8.
390
391				=item * B<output_header> => I<bool>
392
393				Whether output CSV should have a header row.
394
395				By default, a header row will be output I<if> input CSV has header row. Under
396				C<--output-header>, a header row will be output even if input CSV does not have
397				header row (value will be something like "col0,col1,..."). Under
398				C<--no-output-header>, header row will I<not> be printed even if input CSV has
399				header row. So this option can be used to unconditionally add or remove header
400				row.
401
402				=item * B<output_quote_char> => I<str>
403
404				Specify field quote character in output CSV, will be passed to Text::CSV_XS.
405
406				This is like C<--input-quote-char> option but for output instead of input.
407
408				Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
409
410				=item * B<output_quote_empty> => I<bool> (default: 0)
411
412				Whether to quote empty values.
413
414				When set to false (the default), empty values are not quoted:
415
416				field1,field2,,field4
417
418				When set to true, then empty values will be quoted:
419
420				field1,field2,"",field4
421
422				=item * B<output_sep_char> => I<str>
423
424				Specify field separator character in output CSV, will be passed to Text::CSV_XS.
425
426				This is like C<--input-sep-char> option but for output instead of input.
427
428				Defaults to C<,> (comma). Overrides C<--output-tsv> option.
429
430				=item * B<output_tsv> => I<bool>
431
432				Inform that output file is TSV (tab-separated) format instead of CSV.
433
434				This is like C<--input-tsv> option but for output instead of input.
435
436				Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
437				options. If one of those options is specified, then C<--output-tsv> will be
438				ignored.
439
440				=item * B<overwrite> => I<bool>
441
442				Whether to override existing output file.
443
444
445				=back
446
447				Returns an enveloped result (an array).
448
449				First element ($status_code) is an integer containing HTTP-like status code
450				(200 means OK, 4xx caller error, 5xx function error). Second element
451				($reason) is a string containing error message, or something like "OK" if status is
452				200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
453				element (%result_meta) is called result metadata and is optional, a hash
454				that contains extra information, much like how HTTP response headers provide additional metadata.
455
456				Return value: (any)
457
458				=head1 HOMEPAGE
459
460				Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
461
462				=head1 SOURCE
463
464				Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
465
466				=head1 AUTHOR
467
468				perlancar <perlancar@cpan.org>
469
470				=head1 CONTRIBUTING
471
472
473				To contribute, you can send patches by email/via RT, or send pull requests on
474				GitHub.
475
476				Most of the time, you don't need to build the distribution yourself. You can
477				simply modify the code, then test via:
478
479				% prove -l
480
481				If you want to build the distribution (e.g. to try to install it locally on your
482				system), you can install L<Dist::Zilla>,
483				L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
484				L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
485				Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
486				that are considered a bug and can be reported to me.
487
488				=head1 COPYRIGHT AND LICENSE
489
490				This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
491
492				This is free software; you can redistribute it and/or modify it under
493				the same terms as the Perl 5 programming language system itself.
494
495				=head1 BUGS
496
497				Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
498
499				When submitting a bug or request, please include a test-file or a
500				patch to an existing test-file that illustrates the bug or desired
501				feature.
502
503				=cut