File Coverage

blib/lib/App/CSVUtils/csv_grep.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_grep;
2              
3 1     1   6660 use 5.010001;
  1         5  
4 1     1   9 use strict;
  1         2  
  1         23  
5 1     1   10 use warnings;
  1         20  
  1         141  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-03-31'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.023'; # VERSION
11              
12 1         302 use App::CSVUtils qw(
13             gen_csv_util
14             compile_eval_code
15             eval_code
16 1     1   7 );
  1         2  
17              
18             gen_csv_util(
19             name => 'csv_grep',
20             summary => 'Select (only output) row(s) where Perl expression returns true',
21             description => <<'_',
22              
23             This is like Perl's `grep` performed over rows of CSV. In `$_`, your Perl code
24             will find the CSV row as an arrayref (or, if you specify `-H`, as a hashref).
25             `$main::row` is also set to the row (always as arrayref). `$main::rownum`
26             contains the row number (2 means the first data row). `$main::csv` is the
27             <pm:Text::CSV_XS> object. `$main::fields_idx` is also available for additional
28             information.
29              
30             Your code is then free to return true or false based on some criteria. Only rows
31             where Perl expression returns true will be included in the result.
32              
33             _
34             add_args => {
35             %App::CSVUtils::argspecopt_hash,
36             %App::CSVUtils::argspec_eval,
37             },
38             tags => ['category:filtering', 'accepts-code'],
39              
40             examples => [
41             {
42             summary => 'Only show rows where the amount field '.
43             'is divisible by 7',
44             argv => ['-He', '$_->{amount} % 7 == 0', 'file.csv'],
45             test => 0,
46             'x.doc.show_result' => 0,
47             },
48             {
49             summary => 'Only show rows where date is a Wednesday',
50             argv => ['-He', 'BEGIN { use DateTime::Format::Natural; $parser = DateTime::Format::Natural->new } $dt = $parser->parse_datetime($_->{date}); $dt->day_of_week == 3', 'file.csv'],
51             test => 0,
52             'x.doc.show_result' => 0,
53             },
54             ],
55             links => [
56             {url=>'prog:csvgrep'},
57             ],
58              
59             on_input_header_row => sub {
60             my $r = shift;
61              
62             # we add the following keys to the stash
63             $r->{code} = compile_eval_code($r->{util_args}{eval}, 'eval');
64              
65             $r->{wants_input_row_as_hashref} = 1 if $r->{util_args}{hash};
66             },
67              
68             on_input_data_row => sub {
69             my $r = shift;
70              
71             $r->{code_print_row}->($r->{input_row})
72             if eval_code($r->{code}, $r, $r->{wants_input_row_as_hashref} ? $r->{input_row_as_hashref} : $r->{input_row});
73             },
74             );
75              
76             1;
77             # ABSTRACT: Select (only output) row(s) where Perl expression returns true
78              
79             __END__
80              
81             =pod
82              
83             =encoding UTF-8
84              
85             =head1 NAME
86              
87             App::CSVUtils::csv_grep - Select (only output) row(s) where Perl expression returns true
88              
89             =head1 VERSION
90              
91             This document describes version 1.023 of App::CSVUtils::csv_grep (from Perl distribution App-CSVUtils), released on 2023-03-31.
92              
93             =head1 FUNCTIONS
94              
95              
96             =head2 csv_grep
97              
98             Usage:
99              
100             csv_grep(%args) -> [$status_code, $reason, $payload, \%result_meta]
101              
102             Select (only output) row(s) where Perl expression returns true.
103              
104             Examples:
105              
106             =over
107              
108             =item * Only show rows where the amount field is divisible by 7:
109              
110             csv_grep(
111             input_filename => "file.csv",
112             eval => "\$_->{amount} % 7 == 0",
113             hash => 1
114             );
115              
116             =item * Only show rows where date is a Wednesday:
117              
118             csv_grep(
119             input_filename => "file.csv",
120             eval => "BEGIN { use DateTime::Format::Natural; \$parser = DateTime::Format::Natural->new } \$dt = \$parser->parse_datetime(\$_->{date}); \$dt->day_of_week == 3",
121             hash => 1
122             );
123              
124             =back
125              
126             This is like Perl's C<grep> performed over rows of CSV. In C<$_>, your Perl code
127             will find the CSV row as an arrayref (or, if you specify C<-H>, as a hashref).
128             C<$main::row> is also set to the row (always as arrayref). C<$main::rownum>
129             contains the row number (2 means the first data row). C<$main::csv> is the
130             L<Text::CSV_XS> object. C<$main::fields_idx> is also available for additional
131             information.
132              
133             Your code is then free to return true or false based on some criteria. Only rows
134             where Perl expression returns true will be included in the result.
135              
136             This function is not exported.
137              
138             Arguments ('*' denotes required arguments):
139              
140             =over 4
141              
142             =item * B<eval>* => I<str|code>
143              
144             Perl code.
145              
146             =item * B<hash> => I<bool>
147              
148             Provide row in $_ as hashref instead of arrayref.
149              
150             =item * B<inplace> => I<true>
151              
152             Output to the same file as input.
153              
154             Normally, you output to a different file than input. If you try to output to the
155             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
156             prevents you from doing it. However, with this C<--inplace> option, you can
157             output to the same file. Like perl's C<-i> option, this will first output to a
158             temporary file in the same directory as the input file then rename to the final
159             file at the end. You cannot specify output file (C<-o>) when using this option,
160             but you can specify backup extension with C<-b> option.
161              
162             Some caveats:
163              
164             =over
165              
166             =item * if input file is a symbolic link, it will be replaced with a regular file;
167              
168             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
169              
170             =item * value specified in C<-b> is currently not checked for acceptable characters;
171              
172             =item * things can also fail if permissions are restrictive;
173              
174             =back
175              
176             =item * B<inplace_backup_ext> => I<str> (default: "")
177              
178             Extension to add for backup of input file.
179              
180             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
181             rename the input file using this extension as a backup. The old existing backup
182             will be overwritten, if any.
183              
184             =item * B<input_escape_char> => I<str>
185              
186             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
187              
188             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
189              
190             =item * B<input_filename> => I<filename> (default: "-")
191              
192             Input CSV file.
193              
194             Use C<-> to read from stdin.
195              
196             Encoding of input file is assumed to be UTF-8.
197              
198             =item * B<input_header> => I<bool> (default: 1)
199              
200             Specify whether input CSV has a header row.
201              
202             By default, the first row of the input CSV will be assumed to contain field
203             names (and the second row contains the first data row). When you declare that
204             input CSV does not have header row (C<--no-input-header>), the first row of the
205             CSV is assumed to contain the first data row. Fields will be named C<field1>,
206             C<field2>, and so on.
207              
208             =item * B<input_quote_char> => I<str>
209              
210             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
211              
212             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
213              
214             =item * B<input_sep_char> => I<str>
215              
216             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
217              
218             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
219              
220             =item * B<input_tsv> => I<true>
221              
222             Inform that input file is in TSV (tab-separated) format instead of CSV.
223              
224             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
225             options. If one of those options is specified, then C<--input-tsv> will be
226             ignored.
227              
228             =item * B<output_always_quote> => I<bool> (default: 0)
229              
230             Whether to always quote values.
231              
232             When set to false (the default), values are quoted only when necessary:
233              
234             field1,field2,"field three contains comma (,)",field4
235              
236             When set to true, then all values will be quoted:
237              
238             "field1","field2","field three contains comma (,)","field4"
239              
240             =item * B<output_escape_char> => I<str>
241              
242             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
243              
244             This is like C<--input-escape-char> option but for output instead of input.
245              
246             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
247              
248             =item * B<output_filename> => I<filename>
249              
250             Output filename.
251              
252             Use C<-> to output to stdout (the default if you don't specify this option).
253              
254             Encoding of output file is assumed to be UTF-8.
255              
256             =item * B<output_header> => I<bool>
257              
258             Whether output CSV should have a header row.
259              
260             By default, a header row will be output I<if> input CSV has header row. Under
261             C<--output-header>, a header row will be output even if input CSV does not have
262             header row (value will be something like "col0,col1,..."). Under
263             C<--no-output-header>, header row will I<not> be printed even if input CSV has
264             header row. So this option can be used to unconditionally add or remove header
265             row.
266              
267             =item * B<output_quote_char> => I<str>
268              
269             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
270              
271             This is like C<--input-quote-char> option but for output instead of input.
272              
273             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
274              
275             =item * B<output_quote_empty> => I<bool> (default: 0)
276              
277             Whether to quote empty values.
278              
279             When set to false (the default), empty values are not quoted:
280              
281             field1,field2,,field4
282              
283             When set to true, then empty values will be quoted:
284              
285             field1,field2,"",field4
286              
287             =item * B<output_sep_char> => I<str>
288              
289             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
290              
291             This is like C<--input-sep-char> option but for output instead of input.
292              
293             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
294              
295             =item * B<output_tsv> => I<bool>
296              
297             Inform that output file is TSV (tab-separated) format instead of CSV.
298              
299             This is like C<--input-tsv> option but for output instead of input.
300              
301             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
302             options. If one of those options is specified, then C<--output-tsv> will be
303             ignored.
304              
305             =item * B<overwrite> => I<bool>
306              
307             Whether to override existing output file.
308              
309              
310             =back
311              
312             Returns an enveloped result (an array).
313              
314             First element ($status_code) is an integer containing HTTP-like status code
315             (200 means OK, 4xx caller error, 5xx function error). Second element
316             ($reason) is a string containing error message, or something like "OK" if status is
317             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
318             element (%result_meta) is called result metadata and is optional, a hash
319             that contains extra information, much like how HTTP response headers provide additional metadata.
320              
321             Return value: (any)
322              
323             =head1 HOMEPAGE
324              
325             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
326              
327             =head1 SOURCE
328              
329             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
330              
331             =head1 AUTHOR
332              
333             perlancar <perlancar@cpan.org>
334              
335             =head1 CONTRIBUTING
336              
337              
338             To contribute, you can send patches by email/via RT, or send pull requests on
339             GitHub.
340              
341             Most of the time, you don't need to build the distribution yourself. You can
342             simply modify the code, then test via:
343              
344             % prove -l
345              
346             If you want to build the distribution (e.g. to try to install it locally on your
347             system), you can install L<Dist::Zilla>,
348             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
349             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
350             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
351             that are considered a bug and can be reported to me.
352              
353             =head1 COPYRIGHT AND LICENSE
354              
355             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
356              
357             This is free software; you can redistribute it and/or modify it under
358             the same terms as the Perl 5 programming language system itself.
359              
360             =head1 BUGS
361              
362             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
363              
364             When submitting a bug or request, please include a test-file or a
365             patch to an existing test-file that illustrates the bug or desired
366             feature.
367              
368             =cut