File Coverage

blib/lib/App/CSVUtils/csv_munge_field.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_munge_field;
2              
3 1     1   4982 use 5.010001;
  1         4  
4 1     1   6 use strict;
  1         2  
  1         28  
5 1     1   15 use warnings;
  1         2  
  1         68  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-03-31'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.023'; # VERSION
11              
12 1         397 use App::CSVUtils qw(
13             gen_csv_util
14             compile_eval_code
15             eval_code
16 1     1   10 );
  1         2  
17              
18             gen_csv_util(
19             name => 'csv_munge_field',
20             summary => 'Munge a field in every row of CSV file with Perl code',
21             description => <<'_',
22              
23             Perl code (-e) will be called for each row (excluding the header row) and `$_`
24             will contain the value of the field, and the Perl code is expected to modify it.
25             `$main::row` will contain the current row array. `$main::rownum` contains the
26             row number (2 means the first data row). `$main::csv` is the <pm:Text::CSV_XS>
27             object. `$main::fields_idx` is also available for additional information.
28              
29             To munge multiple fields, use <prog:csv-munge-rows>.
30              
31             _
32             add_args => {
33             %App::CSVUtils::argspec_field_1,
34             %App::CSVUtils::argspec_eval_2,
35             },
36             tags => ['category:munging', 'modifies-field'],
37              
38             examples => [
39             {
40             summary => 'Square a number field in CSV',
41             argv => ['file.csv', 'num', '$_ = $_*$_'],
42             test => 0,
43             'x.doc.show_result' => 0,
44             },
45             ],
46              
47             on_input_header_row => sub {
48             my $r = shift;
49              
50             # check that selected field exists in the header
51             my $field_idx = $r->{input_fields_idx}{ $r->{util_args}{field} };
52             die [404, "Field '$r->{util_args}{field}' not found in CSV"]
53             unless defined $field_idx;
54              
55             # we add the following keys to the stash
56             $r->{code} = compile_eval_code($r->{util_args}{eval}, 'eval');
57             $r->{field_idx} = $field_idx;
58             },
59              
60             on_input_data_row => sub {
61             my $r = shift;
62              
63             my $topic;
64             eval { $topic = eval_code($r->{code}, $r, $r->{input_row}[$r->{field_idx}], 'return_topic') };
65             die [500, "Error while munging row ".
66             "#$r->{input_rownum} field '$r->{util_args}{field}': $@\n"] if $@;
67             $r->{input_row}->[ $r->{field_idx} ] = $topic;
68             $r->{code_print_row}->($r->{input_row});
69             },
70             );
71              
72             1;
73             # ABSTRACT: Munge a field in every row of CSV file with Perl code
74              
75             __END__
76              
77             =pod
78              
79             =encoding UTF-8
80              
81             =head1 NAME
82              
83             App::CSVUtils::csv_munge_field - Munge a field in every row of CSV file with Perl code
84              
85             =head1 VERSION
86              
87             This document describes version 1.023 of App::CSVUtils::csv_munge_field (from Perl distribution App-CSVUtils), released on 2023-03-31.
88              
89             =head1 FUNCTIONS
90              
91              
92             =head2 csv_munge_field
93              
94             Usage:
95              
96             csv_munge_field(%args) -> [$status_code, $reason, $payload, \%result_meta]
97              
98             Munge a field in every row of CSV file with Perl code.
99              
100             Examples:
101              
102             =over
103              
104             =item * Square a number field in CSV:
105              
106             csv_munge_field(input_filename => "file.csv", field => "num", eval => "\$_ = \$_*\$_");
107              
108             =back
109              
110             Perl code (-e) will be called for each row (excluding the header row) and C<$_>
111             will contain the value of the field, and the Perl code is expected to modify it.
112             C<$main::row> will contain the current row array. C<$main::rownum> contains the
113             row number (2 means the first data row). C<$main::csv> is the L<Text::CSV_XS>
114             object. C<$main::fields_idx> is also available for additional information.
115              
116             To munge multiple fields, use L<csv-munge-rows>.
117              
118             This function is not exported.
119              
120             Arguments ('*' denotes required arguments):
121              
122             =over 4
123              
124             =item * B<eval>* => I<str|code>
125              
126             Perl code.
127              
128             =item * B<field>* => I<str>
129              
130             Field name.
131              
132             =item * B<inplace> => I<true>
133              
134             Output to the same file as input.
135              
136             Normally, you output to a different file than input. If you try to output to the
137             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
138             prevents you from doing it. However, with this C<--inplace> option, you can
139             output to the same file. Like perl's C<-i> option, this will first output to a
140             temporary file in the same directory as the input file then rename to the final
141             file at the end. You cannot specify output file (C<-o>) when using this option,
142             but you can specify backup extension with C<-b> option.
143              
144             Some caveats:
145              
146             =over
147              
148             =item * if input file is a symbolic link, it will be replaced with a regular file;
149              
150             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
151              
152             =item * value specified in C<-b> is currently not checked for acceptable characters;
153              
154             =item * things can also fail if permissions are restrictive;
155              
156             =back
157              
158             =item * B<inplace_backup_ext> => I<str> (default: "")
159              
160             Extension to add for backup of input file.
161              
162             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
163             rename the input file using this extension as a backup. The old existing backup
164             will be overwritten, if any.
165              
166             =item * B<input_escape_char> => I<str>
167              
168             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
169              
170             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
171              
172             =item * B<input_filename> => I<filename> (default: "-")
173              
174             Input CSV file.
175              
176             Use C<-> to read from stdin.
177              
178             Encoding of input file is assumed to be UTF-8.
179              
180             =item * B<input_header> => I<bool> (default: 1)
181              
182             Specify whether input CSV has a header row.
183              
184             By default, the first row of the input CSV will be assumed to contain field
185             names (and the second row contains the first data row). When you declare that
186             input CSV does not have header row (C<--no-input-header>), the first row of the
187             CSV is assumed to contain the first data row. Fields will be named C<field1>,
188             C<field2>, and so on.
189              
190             =item * B<input_quote_char> => I<str>
191              
192             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
193              
194             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
195              
196             =item * B<input_sep_char> => I<str>
197              
198             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
199              
200             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
201              
202             =item * B<input_tsv> => I<true>
203              
204             Inform that input file is in TSV (tab-separated) format instead of CSV.
205              
206             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
207             options. If one of those options is specified, then C<--input-tsv> will be
208             ignored.
209              
210             =item * B<output_always_quote> => I<bool> (default: 0)
211              
212             Whether to always quote values.
213              
214             When set to false (the default), values are quoted only when necessary:
215              
216             field1,field2,"field three contains comma (,)",field4
217              
218             When set to true, then all values will be quoted:
219              
220             "field1","field2","field three contains comma (,)","field4"
221              
222             =item * B<output_escape_char> => I<str>
223              
224             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
225              
226             This is like C<--input-escape-char> option but for output instead of input.
227              
228             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
229              
230             =item * B<output_filename> => I<filename>
231              
232             Output filename.
233              
234             Use C<-> to output to stdout (the default if you don't specify this option).
235              
236             Encoding of output file is assumed to be UTF-8.
237              
238             =item * B<output_header> => I<bool>
239              
240             Whether output CSV should have a header row.
241              
242             By default, a header row will be output I<if> input CSV has header row. Under
243             C<--output-header>, a header row will be output even if input CSV does not have
244             header row (value will be something like "col0,col1,..."). Under
245             C<--no-output-header>, header row will I<not> be printed even if input CSV has
246             header row. So this option can be used to unconditionally add or remove header
247             row.
248              
249             =item * B<output_quote_char> => I<str>
250              
251             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
252              
253             This is like C<--input-quote-char> option but for output instead of input.
254              
255             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
256              
257             =item * B<output_quote_empty> => I<bool> (default: 0)
258              
259             Whether to quote empty values.
260              
261             When set to false (the default), empty values are not quoted:
262              
263             field1,field2,,field4
264              
265             When set to true, then empty values will be quoted:
266              
267             field1,field2,"",field4
268              
269             =item * B<output_sep_char> => I<str>
270              
271             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
272              
273             This is like C<--input-sep-char> option but for output instead of input.
274              
275             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
276              
277             =item * B<output_tsv> => I<bool>
278              
279             Inform that output file is TSV (tab-separated) format instead of CSV.
280              
281             This is like C<--input-tsv> option but for output instead of input.
282              
283             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
284             options. If one of those options is specified, then C<--output-tsv> will be
285             ignored.
286              
287             =item * B<overwrite> => I<bool>
288              
289             Whether to override existing output file.
290              
291              
292             =back
293              
294             Returns an enveloped result (an array).
295              
296             First element ($status_code) is an integer containing HTTP-like status code
297             (200 means OK, 4xx caller error, 5xx function error). Second element
298             ($reason) is a string containing error message, or something like "OK" if status is
299             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
300             element (%result_meta) is called result metadata and is optional, a hash
301             that contains extra information, much like how HTTP response headers provide additional metadata.
302              
303             Return value: (any)
304              
305             =head1 HOMEPAGE
306              
307             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
308              
309             =head1 SOURCE
310              
311             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
312              
313             =head1 AUTHOR
314              
315             perlancar <perlancar@cpan.org>
316              
317             =head1 CONTRIBUTING
318              
319              
320             To contribute, you can send patches by email/via RT, or send pull requests on
321             GitHub.
322              
323             Most of the time, you don't need to build the distribution yourself. You can
324             simply modify the code, then test via:
325              
326             % prove -l
327              
328             If you want to build the distribution (e.g. to try to install it locally on your
329             system), you can install L<Dist::Zilla>,
330             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
331             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
332             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
333             that are considered a bug and can be reported to me.
334              
335             =head1 COPYRIGHT AND LICENSE
336              
337             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
338              
339             This is free software; you can redistribute it and/or modify it under
340             the same terms as the Perl 5 programming language system itself.
341              
342             =head1 BUGS
343              
344             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
345              
346             When submitting a bug or request, please include a test-file or a
347             patch to an existing test-file that illustrates the bug or desired
348             feature.
349              
350             =cut