File Coverage

blib/lib/App/CSVUtils/csv_munge_field.pm

Criterion	Covered	Total	%
statement	11	11	100.0
branch			n/a
condition			n/a
subroutine	4	4	100.0
pod			n/a
total	15	15	100.0

line	stmt	sub	time	code
1				package App::CSVUtils::csv_munge_field;
2
3	1	1	4982	use 5.010001;
	1		4
4	1	1	6	use strict;
	1		2
	1		28
5	1	1	15	use warnings;
	1		2
	1		68
6
7				our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8				our $DATE = '2023-03-31'; # DATE
9				our $DIST = 'App-CSVUtils'; # DIST
10				our $VERSION = '1.023'; # VERSION
11
12	1		397	use App::CSVUtils qw(
13				gen_csv_util
14				compile_eval_code
15				eval_code
16	1	1	10	);
	1		2
17
18				gen_csv_util(
19				name => 'csv_munge_field',
20				summary => 'Munge a field in every row of CSV file with Perl code',
21				description => <<'_',
22
23				Perl code (-e) will be called for each row (excluding the header row) and `$_`
24				will contain the value of the field, and the Perl code is expected to modify it.
25				`$main::row` will contain the current row array. `$main::rownum` contains the
26				row number (2 means the first data row). `$main::csv` is the <pm:Text::CSV_XS>
27				object. `$main::fields_idx` is also available for additional information.
28
29				To munge multiple fields, use <prog:csv-munge-rows>.
30
31				_
32				add_args => {
33				%App::CSVUtils::argspec_field_1,
34				%App::CSVUtils::argspec_eval_2,
35				},
36				tags => ['category:munging', 'modifies-field'],
37
38				examples => [
39				{
40				summary => 'Square a number field in CSV',
41				argv => ['file.csv', 'num', '$_ = $_*$_'],
42				test => 0,
43				'x.doc.show_result' => 0,
44				},
45				],
46
47				on_input_header_row => sub {
48				my $r = shift;
49
50				# check that selected field exists in the header
51				my $field_idx = $r->{input_fields_idx}{ $r->{util_args}{field} };
52				die [404, "Field '$r->{util_args}{field}' not found in CSV"]
53				unless defined $field_idx;
54
55				# we add the following keys to the stash
56				$r->{code} = compile_eval_code($r->{util_args}{eval}, 'eval');
57				$r->{field_idx} = $field_idx;
58				},
59
60				on_input_data_row => sub {
61				my $r = shift;
62
63				my $topic;
64				eval { $topic = eval_code($r->{code}, $r, $r->{input_row}[$r->{field_idx}], 'return_topic') };
65				die [500, "Error while munging row ".
66				"#$r->{input_rownum} field '$r->{util_args}{field}': $@\n"] if $@;
67				$r->{input_row}->[ $r->{field_idx} ] = $topic;
68				$r->{code_print_row}->($r->{input_row});
69				},
70				);
71
72				1;
73				# ABSTRACT: Munge a field in every row of CSV file with Perl code
74
75				__END__
76
77				=pod
78
79				=encoding UTF-8
80
81				=head1 NAME
82
83				App::CSVUtils::csv_munge_field - Munge a field in every row of CSV file with Perl code
84
85				=head1 VERSION
86
87				This document describes version 1.023 of App::CSVUtils::csv_munge_field (from Perl distribution App-CSVUtils), released on 2023-03-31.
88
89				=head1 FUNCTIONS
90
91
92				=head2 csv_munge_field
93
94				Usage:
95
96				csv_munge_field(%args) -> [$status_code, $reason, $payload, \%result_meta]
97
98				Munge a field in every row of CSV file with Perl code.
99
100				Examples:
101
102				=over
103
104				=item * Square a number field in CSV:
105
106				csv_munge_field(input_filename => "file.csv", field => "num", eval => "\$_ = \$_*\$_");
107
108				=back
109
110				Perl code (-e) will be called for each row (excluding the header row) and C<$_>
111				will contain the value of the field, and the Perl code is expected to modify it.
112				C<$main::row> will contain the current row array. C<$main::rownum> contains the
113				row number (2 means the first data row). C<$main::csv> is the L<Text::CSV_XS>
114				object. C<$main::fields_idx> is also available for additional information.
115
116				To munge multiple fields, use L<csv-munge-rows>.
117
118				This function is not exported.
119
120				Arguments ('*' denotes required arguments):
121
122				=over 4
123
124				=item * B<eval>* => I<str\|code>
125
126				Perl code.
127
128				=item * B<field>* => I<str>
129
130				Field name.
131
132				=item * B<inplace> => I<true>
133
134				Output to the same file as input.
135
136				Normally, you output to a different file than input. If you try to output to the
137				same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
138				prevents you from doing it. However, with this C<--inplace> option, you can
139				output to the same file. Like perl's C<-i> option, this will first output to a
140				temporary file in the same directory as the input file then rename to the final
141				file at the end. You cannot specify output file (C<-o>) when using this option,
142				but you can specify backup extension with C<-b> option.
143
144				Some caveats:
145
146				=over
147
148				=item * if input file is a symbolic link, it will be replaced with a regular file;
149
150				=item * renaming (implemented using C<rename()>) can fail if input filename is too long;
151
152				=item * value specified in C<-b> is currently not checked for acceptable characters;
153
154				=item * things can also fail if permissions are restrictive;
155
156				=back
157
158				=item * B<inplace_backup_ext> => I<str> (default: "")
159
160				Extension to add for backup of input file.
161
162				In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
163				rename the input file using this extension as a backup. The old existing backup
164				will be overwritten, if any.
165
166				=item * B<input_escape_char> => I<str>
167
168				Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
169
170				Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
171
172				=item * B<input_filename> => I<filename> (default: "-")
173
174				Input CSV file.
175
176				Use C<-> to read from stdin.
177
178				Encoding of input file is assumed to be UTF-8.
179
180				=item * B<input_header> => I<bool> (default: 1)
181
182				Specify whether input CSV has a header row.
183
184				By default, the first row of the input CSV will be assumed to contain field
185				names (and the second row contains the first data row). When you declare that
186				input CSV does not have header row (C<--no-input-header>), the first row of the
187				CSV is assumed to contain the first data row. Fields will be named C<field1>,
188				C<field2>, and so on.
189
190				=item * B<input_quote_char> => I<str>
191
192				Specify field quote character in input CSV, will be passed to Text::CSV_XS.
193
194				Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
195
196				=item * B<input_sep_char> => I<str>
197
198				Specify field separator character in input CSV, will be passed to Text::CSV_XS.
199
200				Defaults to C<,> (comma). Overrides C<--input-tsv> option.
201
202				=item * B<input_tsv> => I<true>
203
204				Inform that input file is in TSV (tab-separated) format instead of CSV.
205
206				Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
207				options. If one of those options is specified, then C<--input-tsv> will be
208				ignored.
209
210				=item * B<output_always_quote> => I<bool> (default: 0)
211
212				Whether to always quote values.
213
214				When set to false (the default), values are quoted only when necessary:
215
216				field1,field2,"field three contains comma (,)",field4
217
218				When set to true, then all values will be quoted:
219
220				"field1","field2","field three contains comma (,)","field4"
221
222				=item * B<output_escape_char> => I<str>
223
224				Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
225
226				This is like C<--input-escape-char> option but for output instead of input.
227
228				Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
229
230				=item * B<output_filename> => I<filename>
231
232				Output filename.
233
234				Use C<-> to output to stdout (the default if you don't specify this option).
235
236				Encoding of output file is assumed to be UTF-8.
237
238				=item * B<output_header> => I<bool>
239
240				Whether output CSV should have a header row.
241
242				By default, a header row will be output I<if> input CSV has header row. Under
243				C<--output-header>, a header row will be output even if input CSV does not have
244				header row (value will be something like "col0,col1,..."). Under
245				C<--no-output-header>, header row will I<not> be printed even if input CSV has
246				header row. So this option can be used to unconditionally add or remove header
247				row.
248
249				=item * B<output_quote_char> => I<str>
250
251				Specify field quote character in output CSV, will be passed to Text::CSV_XS.
252
253				This is like C<--input-quote-char> option but for output instead of input.
254
255				Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
256
257				=item * B<output_quote_empty> => I<bool> (default: 0)
258
259				Whether to quote empty values.
260
261				When set to false (the default), empty values are not quoted:
262
263				field1,field2,,field4
264
265				When set to true, then empty values will be quoted:
266
267				field1,field2,"",field4
268
269				=item * B<output_sep_char> => I<str>
270
271				Specify field separator character in output CSV, will be passed to Text::CSV_XS.
272
273				This is like C<--input-sep-char> option but for output instead of input.
274
275				Defaults to C<,> (comma). Overrides C<--output-tsv> option.
276
277				=item * B<output_tsv> => I<bool>
278
279				Inform that output file is TSV (tab-separated) format instead of CSV.
280
281				This is like C<--input-tsv> option but for output instead of input.
282
283				Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
284				options. If one of those options is specified, then C<--output-tsv> will be
285				ignored.
286
287				=item * B<overwrite> => I<bool>
288
289				Whether to override existing output file.
290
291
292				=back
293
294				Returns an enveloped result (an array).
295
296				First element ($status_code) is an integer containing HTTP-like status code
297				(200 means OK, 4xx caller error, 5xx function error). Second element
298				($reason) is a string containing error message, or something like "OK" if status is
299				200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
300				element (%result_meta) is called result metadata and is optional, a hash
301				that contains extra information, much like how HTTP response headers provide additional metadata.
302
303				Return value: (any)
304
305				=head1 HOMEPAGE
306
307				Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
308
309				=head1 SOURCE
310
311				Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
312
313				=head1 AUTHOR
314
315				perlancar <perlancar@cpan.org>
316
317				=head1 CONTRIBUTING
318
319
320				To contribute, you can send patches by email/via RT, or send pull requests on
321				GitHub.
322
323				Most of the time, you don't need to build the distribution yourself. You can
324				simply modify the code, then test via:
325
326				% prove -l
327
328				If you want to build the distribution (e.g. to try to install it locally on your
329				system), you can install L<Dist::Zilla>,
330				L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
331				L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
332				Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
333				that are considered a bug and can be reported to me.
334
335				=head1 COPYRIGHT AND LICENSE
336
337				This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
338
339				This is free software; you can redistribute it and/or modify it under
340				the same terms as the Perl 5 programming language system itself.
341
342				=head1 BUGS
343
344				Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
345
346				When submitting a bug or request, please include a test-file or a
347				patch to an existing test-file that illustrates the bug or desired
348				feature.
349
350				=cut