File Coverage

blib/lib/App/CSVUtils/csv_add_fields.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_add_fields;
2              
3 1     1   5104 use 5.010001;
  1         4  
4 1     1   5 use strict;
  1         2  
  1         23  
5 1     1   4 use warnings;
  1         2  
  1         71  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-04-01'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.024'; # VERSION
11              
12 1         860 use App::CSVUtils qw(
13             gen_csv_util
14             compile_eval_code
15             eval_code
16 1     1   5 );
  1         3  
17              
18             gen_csv_util(
19             name => 'csv_add_fields',
20             summary => 'Add one or more fields to CSV file',
21             description => <<'_',
22              
23             The new fields by default will be added at the end, unless you specify one of
24             `--after` (to put after a certain field), `--before` (to put before a certain
25             field), or `--at` (to put at specific position, 1 means the first field). The
26             new fields will be clustered together though, you currently cannot set the
27             position of each new field. But you can later reorder fields using
28             <prog:csv-sort-fields>.
29              
30             If supplied, your Perl code (`-e`) will be called for each row (excluding the
31             header row) and should return the value for the new fields (either as a list or
32             as an arrayref). `$_` contains the current row (as arrayref, or if you specify
33             `-H`, as a hashref). `$main::row` is available and contains the current row
34             (always as an arrayref). `$main::rownum` contains the row number (2 means the
35             first data row). `$csv` is the <pm:Text::CSV_XS> object. `$main::fields_idx` is
36             also available for additional information.
37              
38             If `-e` is not supplied, the new fields will be getting the default value of
39             empty string (`''`).
40              
41             _
42             add_args => {
43             %App::CSVUtils::argspec_fields_1plus_nocomp,
44             %App::CSVUtils::argspecopt_eval,
45             %App::CSVUtils::argspecopt_hash,
46             after => {
47             summary => 'Put the new field(s) after specified field',
48             schema => 'str*',
49             completion => \&_complete_field,
50             },
51             before => {
52             summary => 'Put the new field(s) before specified field',
53             schema => 'str*',
54             completion => \&_complete_field,
55             },
56             at => {
57             summary => 'Put the new field(s) at specific position '.
58             '(1 means at the front of all others)',
59             schema => 'posint*',
60             },
61             },
62             add_args_rels => {
63             choose_one => [qw/after before at/],
64             },
65             tags => ['category:munging','adds-fields'],
66              
67             examples => [
68             {
69             summary => 'Add a few new blank fields at the end',
70             argv => ['file.csv', 'field4', 'field6', 'field5'],
71             test => 0,
72             'x.doc.show_result' => 0,
73             },
74             {
75             summary => 'Add a few new blank fields after a certain field',
76             argv => ['file.csv', 'field4', 'field6', 'field5', '--after', 'field2'],
77             test => 0,
78             'x.doc.show_result' => 0,
79             },
80             {
81             summary => 'Add a new field and set its value',
82             argv => ['file.csv', 'after_tax', '-e', '$main::row->[5] * 1.11'],
83             test => 0,
84             'x.doc.show_result' => 0,
85             },
86             {
87             summary => 'Add a couple new fields and set their values',
88             argv => ['file.csv', 'tax_rate', 'after_tax', '-e', '(0.11, $main::row->[5] * 1.11)'],
89             test => 0,
90             'x.doc.show_result' => 0,
91             },
92             ],
93              
94             on_begin => sub {
95             my $r = shift;
96              
97             # check arguments
98             if (!defined($r->{util_args}{fields}) || !@{ $r->{util_args}{fields} }) {
99             die [400, "Please specify one or more fields (-f)"];
100             }
101             },
102              
103             on_input_header_row => sub {
104             my $r = shift;
105              
106             # check that the new fields are not duplicate (against existing fields
107             # and against itself)
108             my %seen;
109             for (@{ $r->{util_args}{fields} }) {
110             unless (length $_) {
111             die [400, "New field name cannot be empty"];
112             }
113             if (defined $r->{input_fields_idx}{$_}) {
114             die [412, "Field '$_' already exists"];
115             }
116             if ($seen{$_}++) {
117             die [412, "Duplicate new field '$_'"];
118             }
119             }
120              
121             # determine the position at which to insert the new fields
122             my $new_fields_idx;
123             if (defined $r->{util_args}{at}) {
124             $new_fields_idx = $r->{util_args}{at}-1;
125             } elsif (defined $r->{util_args}{before}) {
126             for (0..$#{ $r->{input_fields} }) {
127             if ($r->{input_fields}[$_] eq $r->{util_args}{before}) {
128             $new_fields_idx = $_;
129             last;
130             }
131             }
132             die [400, "Field '$r->{util_args}{before}' (to add new fields before) not found"]
133             unless defined $new_fields_idx;
134             } elsif (defined $r->{util_args}{after}) {
135             for (0..$#{ $r->{input_fields} }) {
136             if ($r->{input_fields}[$_] eq $r->{util_args}{after}) {
137             $new_fields_idx = $_+1;
138             last;
139             }
140             }
141             die [400, "Field '$r->{util_args}{after}' (to add new fields after) not found"]
142             unless defined $new_fields_idx;
143             } else {
144             $new_fields_idx = @{ $r->{input_fields} };
145             }
146              
147             # for printing the header
148             $r->{output_fields} = [@{ $r->{input_fields} }];
149             splice @{ $r->{output_fields} }, $new_fields_idx, 0, @{ $r->{util_args}{fields} };
150              
151             $r->{wants_input_row_as_hashref} = 1 if $r->{util_args}{hash};
152              
153             # we add the following keys to the stash
154             $r->{code} = compile_eval_code($r->{util_args}{eval} // 'return', 'eval');
155             $r->{new_fields_idx} = $new_fields_idx;
156             },
157              
158             on_input_data_row => sub {
159             my $r = shift;
160              
161             my @vals;
162             eval { @vals = eval_code($r->{code}, $r, $r->{wants_input_row_as_hashref} ? $r->{input_row_as_hashref} : $r->{input_row}) };
163             die [500, "Error while adding field(s) '".join(",", @{$r->{util_args}{fields}})."' for row #$r->{input_rownum}: $@"]
164             if $@;
165             if (ref $vals[0] eq 'ARRAY') { @vals = @{ $vals[0] } }
166             splice @{ $r->{input_row} }, $r->{new_fields_idx}, 0,
167             (map { $_ // '' } @vals[0 .. $#{$r->{util_args}{fields}}]);
168             $r->{code_print_row}->($r->{input_row});
169             },
170             );
171              
172             1;
173             # ABSTRACT: Add one or more fields to CSV file
174              
175             __END__
176              
177             =pod
178              
179             =encoding UTF-8
180              
181             =head1 NAME
182              
183             App::CSVUtils::csv_add_fields - Add one or more fields to CSV file
184              
185             =head1 VERSION
186              
187             This document describes version 1.024 of App::CSVUtils::csv_add_fields (from Perl distribution App-CSVUtils), released on 2023-04-01.
188              
189             =head1 FUNCTIONS
190              
191              
192             =head2 csv_add_fields
193              
194             Usage:
195              
196             csv_add_fields(%args) -> [$status_code, $reason, $payload, \%result_meta]
197              
198             Add one or more fields to CSV file.
199              
200             Examples:
201              
202             =over
203              
204             =item * Add a few new blank fields at the end:
205              
206             csv_add_fields(
207             input_filename => "file.csv",
208             fields => ["field4", "field6", "field5"]
209             );
210              
211             =item * Add a few new blank fields after a certain field:
212              
213             csv_add_fields(
214             input_filename => "file.csv",
215             fields => ["field4", "field6", "field5"],
216             after => "field2"
217             );
218              
219             =item * Add a new field and set its value:
220              
221             csv_add_fields(
222             input_filename => "file.csv",
223             fields => ["after_tax"],
224             eval => "\$main::row->[5] * 1.11"
225             );
226              
227             =item * Add a couple new fields and set their values:
228              
229             csv_add_fields(
230             input_filename => "file.csv",
231             fields => ["tax_rate", "after_tax"],
232             eval => "(0.11, \$main::row->[5] * 1.11)"
233             );
234              
235             =back
236              
237             The new fields by default will be added at the end, unless you specify one of
238             C<--after> (to put after a certain field), C<--before> (to put before a certain
239             field), or C<--at> (to put at specific position, 1 means the first field). The
240             new fields will be clustered together though, you currently cannot set the
241             position of each new field. But you can later reorder fields using
242             L<csv-sort-fields>.
243              
244             If supplied, your Perl code (C<-e>) will be called for each row (excluding the
245             header row) and should return the value for the new fields (either as a list or
246             as an arrayref). C<$_> contains the current row (as arrayref, or if you specify
247             C<-H>, as a hashref). C<$main::row> is available and contains the current row
248             (always as an arrayref). C<$main::rownum> contains the row number (2 means the
249             first data row). C<$csv> is the L<Text::CSV_XS> object. C<$main::fields_idx> is
250             also available for additional information.
251              
252             If C<-e> is not supplied, the new fields will be getting the default value of
253             empty string (C<''>).
254              
255             This function is not exported.
256              
257             Arguments ('*' denotes required arguments):
258              
259             =over 4
260              
261             =item * B<after> => I<str>
262              
263             Put the new field(s) after specified field.
264              
265             =item * B<at> => I<posint>
266              
267             Put the new field(s) at specific position (1 means at the front of all others).
268              
269             =item * B<before> => I<str>
270              
271             Put the new field(s) before specified field.
272              
273             =item * B<eval> => I<str|code>
274              
275             Perl code.
276              
277             =item * B<fields>* => I<array[str]>
278              
279             Field names.
280              
281             =item * B<hash> => I<bool>
282              
283             Provide row in $_ as hashref instead of arrayref.
284              
285             =item * B<inplace> => I<true>
286              
287             Output to the same file as input.
288              
289             Normally, you output to a different file than input. If you try to output to the
290             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
291             prevents you from doing it. However, with this C<--inplace> option, you can
292             output to the same file. Like perl's C<-i> option, this will first output to a
293             temporary file in the same directory as the input file then rename to the final
294             file at the end. You cannot specify output file (C<-o>) when using this option,
295             but you can specify backup extension with C<-b> option.
296              
297             Some caveats:
298              
299             =over
300              
301             =item * if input file is a symbolic link, it will be replaced with a regular file;
302              
303             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
304              
305             =item * value specified in C<-b> is currently not checked for acceptable characters;
306              
307             =item * things can also fail if permissions are restrictive;
308              
309             =back
310              
311             =item * B<inplace_backup_ext> => I<str> (default: "")
312              
313             Extension to add for backup of input file.
314              
315             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
316             rename the input file using this extension as a backup. The old existing backup
317             will be overwritten, if any.
318              
319             =item * B<input_escape_char> => I<str>
320              
321             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
322              
323             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
324              
325             =item * B<input_filename> => I<filename> (default: "-")
326              
327             Input CSV file.
328              
329             Use C<-> to read from stdin.
330              
331             Encoding of input file is assumed to be UTF-8.
332              
333             =item * B<input_header> => I<bool> (default: 1)
334              
335             Specify whether input CSV has a header row.
336              
337             By default, the first row of the input CSV will be assumed to contain field
338             names (and the second row contains the first data row). When you declare that
339             input CSV does not have header row (C<--no-input-header>), the first row of the
340             CSV is assumed to contain the first data row. Fields will be named C<field1>,
341             C<field2>, and so on.
342              
343             =item * B<input_quote_char> => I<str>
344              
345             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
346              
347             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
348              
349             =item * B<input_sep_char> => I<str>
350              
351             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
352              
353             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
354              
355             =item * B<input_tsv> => I<true>
356              
357             Inform that input file is in TSV (tab-separated) format instead of CSV.
358              
359             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
360             options. If one of those options is specified, then C<--input-tsv> will be
361             ignored.
362              
363             =item * B<output_always_quote> => I<bool> (default: 0)
364              
365             Whether to always quote values.
366              
367             When set to false (the default), values are quoted only when necessary:
368              
369             field1,field2,"field three contains comma (,)",field4
370              
371             When set to true, then all values will be quoted:
372              
373             "field1","field2","field three contains comma (,)","field4"
374              
375             =item * B<output_escape_char> => I<str>
376              
377             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
378              
379             This is like C<--input-escape-char> option but for output instead of input.
380              
381             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
382              
383             =item * B<output_filename> => I<filename>
384              
385             Output filename.
386              
387             Use C<-> to output to stdout (the default if you don't specify this option).
388              
389             Encoding of output file is assumed to be UTF-8.
390              
391             =item * B<output_header> => I<bool>
392              
393             Whether output CSV should have a header row.
394              
395             By default, a header row will be output I<if> input CSV has header row. Under
396             C<--output-header>, a header row will be output even if input CSV does not have
397             header row (value will be something like "col0,col1,..."). Under
398             C<--no-output-header>, header row will I<not> be printed even if input CSV has
399             header row. So this option can be used to unconditionally add or remove header
400             row.
401              
402             =item * B<output_quote_char> => I<str>
403              
404             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
405              
406             This is like C<--input-quote-char> option but for output instead of input.
407              
408             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
409              
410             =item * B<output_quote_empty> => I<bool> (default: 0)
411              
412             Whether to quote empty values.
413              
414             When set to false (the default), empty values are not quoted:
415              
416             field1,field2,,field4
417              
418             When set to true, then empty values will be quoted:
419              
420             field1,field2,"",field4
421              
422             =item * B<output_sep_char> => I<str>
423              
424             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
425              
426             This is like C<--input-sep-char> option but for output instead of input.
427              
428             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
429              
430             =item * B<output_tsv> => I<bool>
431              
432             Inform that output file is TSV (tab-separated) format instead of CSV.
433              
434             This is like C<--input-tsv> option but for output instead of input.
435              
436             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
437             options. If one of those options is specified, then C<--output-tsv> will be
438             ignored.
439              
440             =item * B<overwrite> => I<bool>
441              
442             Whether to override existing output file.
443              
444              
445             =back
446              
447             Returns an enveloped result (an array).
448              
449             First element ($status_code) is an integer containing HTTP-like status code
450             (200 means OK, 4xx caller error, 5xx function error). Second element
451             ($reason) is a string containing error message, or something like "OK" if status is
452             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
453             element (%result_meta) is called result metadata and is optional, a hash
454             that contains extra information, much like how HTTP response headers provide additional metadata.
455              
456             Return value: (any)
457              
458             =head1 HOMEPAGE
459              
460             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
461              
462             =head1 SOURCE
463              
464             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
465              
466             =head1 AUTHOR
467              
468             perlancar <perlancar@cpan.org>
469              
470             =head1 CONTRIBUTING
471              
472              
473             To contribute, you can send patches by email/via RT, or send pull requests on
474             GitHub.
475              
476             Most of the time, you don't need to build the distribution yourself. You can
477             simply modify the code, then test via:
478              
479             % prove -l
480              
481             If you want to build the distribution (e.g. to try to install it locally on your
482             system), you can install L<Dist::Zilla>,
483             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
484             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
485             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
486             that are considered a bug and can be reported to me.
487              
488             =head1 COPYRIGHT AND LICENSE
489              
490             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
491              
492             This is free software; you can redistribute it and/or modify it under
493             the same terms as the Perl 5 programming language system itself.
494              
495             =head1 BUGS
496              
497             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
498              
499             When submitting a bug or request, please include a test-file or a
500             patch to an existing test-file that illustrates the bug or desired
501             feature.
502              
503             =cut