File Coverage

blib/lib/App/CSVUtils/csv_munge_rows.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_munge_rows;
2              
3 1     1   4614 use 5.010001;
  1         4  
4 1     1   13 use strict;
  1         2  
  1         21  
5 1     1   5 use warnings;
  1         6  
  1         70  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-07-25'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.030'; # VERSION
11              
12 1         407 use App::CSVUtils qw(
13             gen_csv_util
14             compile_eval_code
15             eval_code
16 1     1   6 );
  1         3  
17              
18             gen_csv_util(
19             name => 'csv_munge_rows',
20             summary => 'Modify CSV data rows using Perl code',
21             description => <<'_',
22              
23             Perl code (-e) will be called for each row (excluding the header row) and `$_`
24             will contain the row (arrayref, or hashref if `-H` is specified). The Perl code
25             is expected to modify it.
26              
27             Aside from `$_`, `$main::row` will contain the current row array.
28             `$main::rownum` contains the row number (2 means the first data row).
29             `$main::csv` is the <pm:Text::CSV_XS> object. `$main::fields_idx` is also
30             available for additional information.
31              
32             The modified `$_` will be rendered back to CSV row.
33              
34             You cannot add new fields using this utility. To do so, use
35             <prog:csv-add-fields>. You also cannot delete fields (they just become empty
36             string if you delete the field in the eval code). To delete fields, use
37             <prog:csv-delete-fields>.
38              
39             Note that you can also munge a single field using <prog:csv-munge-field>.
40              
41             _
42             add_args => {
43             %App::CSVUtils::argspec_eval_1,
44             %App::CSVUtils::argspecopt_hash,
45             },
46             tags => ['category:munging', 'modifies-rows'],
47              
48             examples => [
49             {
50             summary => 'Modify two fields in a CSV',
51             argv => ['-He', '$_->{field1} *= 2; $_->{field2} =~ s/foo/bar/', 'file.csv'],
52             test => 0,
53             'x.doc.show_result' => 0,
54             },
55             ],
56              
57             on_input_header_row => sub {
58             my $r = shift;
59              
60             # we add the following keys to the stash
61             $r->{code} = compile_eval_code($r->{util_args}{eval}, 'eval');
62              
63             $r->{wants_input_row_as_hashref} = 1 if $r->{util_args}{hash};
64             },
65              
66             on_input_data_row => sub {
67             my $r = shift;
68              
69             my $topic; eval { $topic = eval_code($r->{code}, $r, $r->{wants_input_row_as_hashref} ? $r->{input_row_as_hashref} : $r->{input_row}, 'return_topic') };
70             die [500, "Error while munging row #$r->{input_rownum}: $@\n"] if $@;
71             # convert back hashref row to arrayref
72             my $newrow;
73             if ($r->{util_args}{hash}) {
74             $newrow = [('') x @{ $r->{input_fields} }];
75             for my $field (keys %$topic) {
76             next unless exists $r->{input_fields_idx}{$field}; # ignore created fields
77             $newrow->[$r->{input_fields_idx}{$field}] = $topic->{$field};
78             }
79             } else {
80             $newrow = $topic;
81             }
82             $r->{code_print_row}->($newrow);
83             },
84             );
85              
86             1;
87             # ABSTRACT: Modify CSV data rows using Perl code
88              
89             __END__
90              
91             =pod
92              
93             =encoding UTF-8
94              
95             =head1 NAME
96              
97             App::CSVUtils::csv_munge_rows - Modify CSV data rows using Perl code
98              
99             =head1 VERSION
100              
101             This document describes version 1.030 of App::CSVUtils::csv_munge_rows (from Perl distribution App-CSVUtils), released on 2023-07-25.
102              
103             =head1 FUNCTIONS
104              
105              
106             =head2 csv_munge_rows
107              
108             Usage:
109              
110             csv_munge_rows(%args) -> [$status_code, $reason, $payload, \%result_meta]
111              
112             Modify CSV data rows using Perl code.
113              
114             Examples:
115              
116             =over
117              
118             =item * Modify two fields in a CSV:
119              
120             csv_munge_rows(
121             input_filename => "file.csv",
122             eval => "\$_->{field1} *= 2; \$_->{field2} =~ s/foo/bar/",
123             hash => 1
124             );
125              
126             =back
127              
128             Perl code (-e) will be called for each row (excluding the header row) and C<$_>
129             will contain the row (arrayref, or hashref if C<-H> is specified). The Perl code
130             is expected to modify it.
131              
132             Aside from C<$_>, C<$main::row> will contain the current row array.
133             C<$main::rownum> contains the row number (2 means the first data row).
134             C<$main::csv> is the L<Text::CSV_XS> object. C<$main::fields_idx> is also
135             available for additional information.
136              
137             The modified C<$_> will be rendered back to CSV row.
138              
139             You cannot add new fields using this utility. To do so, use
140             L<csv-add-fields>. You also cannot delete fields (they just become empty
141             string if you delete the field in the eval code). To delete fields, use
142             L<csv-delete-fields>.
143              
144             Note that you can also munge a single field using L<csv-munge-field>.
145              
146             This function is not exported.
147              
148             Arguments ('*' denotes required arguments):
149              
150             =over 4
151              
152             =item * B<eval>* => I<str|code>
153              
154             Perl code.
155              
156             =item * B<hash> => I<bool>
157              
158             Provide row in $_ as hashref instead of arrayref.
159              
160             =item * B<inplace> => I<true>
161              
162             Output to the same file as input.
163              
164             Normally, you output to a different file than input. If you try to output to the
165             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
166             prevents you from doing it. However, with this C<--inplace> option, you can
167             output to the same file. Like perl's C<-i> option, this will first output to a
168             temporary file in the same directory as the input file then rename to the final
169             file at the end. You cannot specify output file (C<-o>) when using this option,
170             but you can specify backup extension with C<-b> option.
171              
172             Some caveats:
173              
174             =over
175              
176             =item * if input file is a symbolic link, it will be replaced with a regular file;
177              
178             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
179              
180             =item * value specified in C<-b> is currently not checked for acceptable characters;
181              
182             =item * things can also fail if permissions are restrictive;
183              
184             =back
185              
186             =item * B<inplace_backup_ext> => I<str> (default: "")
187              
188             Extension to add for backup of input file.
189              
190             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
191             rename the input file using this extension as a backup. The old existing backup
192             will be overwritten, if any.
193              
194             =item * B<input_escape_char> => I<str>
195              
196             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
197              
198             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
199              
200             =item * B<input_filename> => I<filename> (default: "-")
201              
202             Input CSV file.
203              
204             Use C<-> to read from stdin.
205              
206             Encoding of input file is assumed to be UTF-8.
207              
208             =item * B<input_header> => I<bool> (default: 1)
209              
210             Specify whether input CSV has a header row.
211              
212             By default, the first row of the input CSV will be assumed to contain field
213             names (and the second row contains the first data row). When you declare that
214             input CSV does not have header row (C<--no-input-header>), the first row of the
215             CSV is assumed to contain the first data row. Fields will be named C<field1>,
216             C<field2>, and so on.
217              
218             =item * B<input_quote_char> => I<str>
219              
220             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
221              
222             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
223              
224             =item * B<input_sep_char> => I<str>
225              
226             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
227              
228             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
229              
230             =item * B<input_tsv> => I<true>
231              
232             Inform that input file is in TSV (tab-separated) format instead of CSV.
233              
234             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
235             options. If one of those options is specified, then C<--input-tsv> will be
236             ignored.
237              
238             =item * B<output_always_quote> => I<bool> (default: 0)
239              
240             Whether to always quote values.
241              
242             When set to false (the default), values are quoted only when necessary:
243              
244             field1,field2,"field three contains comma (,)",field4
245              
246             When set to true, then all values will be quoted:
247              
248             "field1","field2","field three contains comma (,)","field4"
249              
250             =item * B<output_escape_char> => I<str>
251              
252             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
253              
254             This is like C<--input-escape-char> option but for output instead of input.
255              
256             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
257              
258             =item * B<output_filename> => I<filename>
259              
260             Output filename.
261              
262             Use C<-> to output to stdout (the default if you don't specify this option).
263              
264             Encoding of output file is assumed to be UTF-8.
265              
266             =item * B<output_header> => I<bool>
267              
268             Whether output CSV should have a header row.
269              
270             By default, a header row will be output I<if> input CSV has header row. Under
271             C<--output-header>, a header row will be output even if input CSV does not have
272             header row (value will be something like "col0,col1,..."). Under
273             C<--no-output-header>, header row will I<not> be printed even if input CSV has
274             header row. So this option can be used to unconditionally add or remove header
275             row.
276              
277             =item * B<output_quote_char> => I<str>
278              
279             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
280              
281             This is like C<--input-quote-char> option but for output instead of input.
282              
283             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
284              
285             =item * B<output_quote_empty> => I<bool> (default: 0)
286              
287             Whether to quote empty values.
288              
289             When set to false (the default), empty values are not quoted:
290              
291             field1,field2,,field4
292              
293             When set to true, then empty values will be quoted:
294              
295             field1,field2,"",field4
296              
297             =item * B<output_sep_char> => I<str>
298              
299             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
300              
301             This is like C<--input-sep-char> option but for output instead of input.
302              
303             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
304              
305             =item * B<output_tsv> => I<bool>
306              
307             Inform that output file is TSV (tab-separated) format instead of CSV.
308              
309             This is like C<--input-tsv> option but for output instead of input.
310              
311             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
312             options. If one of those options is specified, then C<--output-tsv> will be
313             ignored.
314              
315             =item * B<overwrite> => I<bool>
316              
317             Whether to override existing output file.
318              
319              
320             =back
321              
322             Returns an enveloped result (an array).
323              
324             First element ($status_code) is an integer containing HTTP-like status code
325             (200 means OK, 4xx caller error, 5xx function error). Second element
326             ($reason) is a string containing error message, or something like "OK" if status is
327             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
328             element (%result_meta) is called result metadata and is optional, a hash
329             that contains extra information, much like how HTTP response headers provide additional metadata.
330              
331             Return value: (any)
332              
333             =head1 HOMEPAGE
334              
335             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
336              
337             =head1 SOURCE
338              
339             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
340              
341             =head1 AUTHOR
342              
343             perlancar <perlancar@cpan.org>
344              
345             =head1 CONTRIBUTING
346              
347              
348             To contribute, you can send patches by email/via RT, or send pull requests on
349             GitHub.
350              
351             Most of the time, you don't need to build the distribution yourself. You can
352             simply modify the code, then test via:
353              
354             % prove -l
355              
356             If you want to build the distribution (e.g. to try to install it locally on your
357             system), you can install L<Dist::Zilla>,
358             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
359             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
360             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
361             that are considered a bug and can be reported to me.
362              
363             =head1 COPYRIGHT AND LICENSE
364              
365             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
366              
367             This is free software; you can redistribute it and/or modify it under
368             the same terms as the Perl 5 programming language system itself.
369              
370             =head1 BUGS
371              
372             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
373              
374             When submitting a bug or request, please include a test-file or a
375             patch to an existing test-file that illustrates the bug or desired
376             feature.
377              
378             =cut