File Coverage

blib/lib/App/CSVUtils/csv_select_fields.pm
Criterion Covered Total %
statement 11 11 100.0
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 15 15 100.0


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_select_fields;
2              
3 1     1   4490 use 5.010001;
  1         4  
4 1     1   6 use strict;
  1         3  
  1         21  
5 1     1   5 use warnings;
  1         2  
  1         68  
6              
7             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
8             our $DATE = '2023-03-31'; # DATE
9             our $DIST = 'App-CSVUtils'; # DIST
10             our $VERSION = '1.023'; # VERSION
11              
12 1         432 use App::CSVUtils qw(
13             gen_csv_util
14 1     1   6 );
  1         2  
15              
16             gen_csv_util(
17             name => 'csv_select_fields',
18             summary => 'Select (only output) field(s) using a combination of excludes/includes, including by regex',
19             add_args => {
20             %App::CSVUtils::argspecsopt_field_selection,
21             %App::CSVUtils::argspecsopt_show_selected_fields,
22             },
23             tags => ['category:filtering'],
24              
25             examples => [
26             {
27             summary => 'Select a single field from CSV',
28             argv => ['file.csv', '-f', 'f1'],
29             test => 0,
30             'x.doc.show_result' => 0,
31             },
32             {
33             summary => 'Select several fields from CSV',
34             argv => ['file.csv', '-f', 'f1', '-f', 'f2', '-f', 'f3'],
35             test => 0,
36             'x.doc.show_result' => 0,
37             },
38             {
39             summary => 'Select fields matching regex from CSV',
40             argv => ['file.csv', '--include-field-pat', '/^extra_/'],
41             test => 0,
42             'x.doc.show_result' => 0,
43             },
44             {
45             summary => 'Select all fields except specified from CSV',
46             argv => ['file.csv', '-a', '-f', 'f1', '-f', 'f2'],
47             test => 0,
48             'x.doc.show_result' => 0,
49             },
50             {
51             summary => 'Only show what fields would be included, then exit',
52             argv => ['file.csv', '--include-field-pat', '/^extra_/', '--show-selected-fields'],
53             test => 0,
54             'x.doc.show_result' => 0,
55             },
56             ],
57              
58             on_input_data_row => sub {
59             my $r = shift;
60              
61             # we add the following keys to the stash
62             unless ($r->{selected_fields_idx_array_sorted}) {
63             my $res = App::CSVUtils::_select_fields($r->{input_fields}, $r->{input_fields_idx}, $r->{util_args});
64             die $res unless $res->[0] == 100;
65             my $selected_fields = $res->[2][0];
66             my $selected_fields_idx_array = $res->[2][1];
67             die [412, "At least one field must be selected"]
68             unless @$selected_fields;
69             $r->{selected_fields_idx_array_sorted} = [sort { $b <=> $a } @$selected_fields_idx_array];
70              
71             # set ouput fields
72             $r->{output_fields} = [];
73             for (@{ $r->{selected_fields_idx_array_sorted} }) {
74             push @{ $r->{output_fields} }, $r->{input_fields}[$_];
75             }
76              
77             if ($r->{util_args}{show_selected_fields}) {
78             $r->{wants_skip_files}++;
79             $r->{result} = [200, "OK", $selected_fields];
80             return;
81             }
82             }
83              
84             my $row = [];
85             for (@{ $r->{selected_fields_idx_array_sorted} }) {
86             push @$row, $r->{input_row}[$_];
87             }
88              
89             $r->{code_print_row}->($row);
90             },
91             );
92              
93             1;
94             # ABSTRACT: Select (only output) field(s) using a combination of excludes/includes, including by regex
95              
96             __END__
97              
98             =pod
99              
100             =encoding UTF-8
101              
102             =head1 NAME
103              
104             App::CSVUtils::csv_select_fields - Select (only output) field(s) using a combination of excludes/includes, including by regex
105              
106             =head1 VERSION
107              
108             This document describes version 1.023 of App::CSVUtils::csv_select_fields (from Perl distribution App-CSVUtils), released on 2023-03-31.
109              
110             =head1 FUNCTIONS
111              
112              
113             =head2 csv_select_fields
114              
115             Usage:
116              
117             csv_select_fields(%args) -> [$status_code, $reason, $payload, \%result_meta]
118              
119             Select (only output) field(s) using a combination of excludesE<sol>includes, including by regex.
120              
121             Examples:
122              
123             =over
124              
125             =item * Select a single field from CSV:
126              
127             csv_select_fields(input_filename => "file.csv", include_fields => ["f1"]);
128              
129             =item * Select several fields from CSV:
130              
131             csv_select_fields(input_filename => "file.csv", include_fields => ["f1", "f2", "f3"]);
132              
133             =item * Select fields matching regex from CSV:
134              
135             csv_select_fields(input_filename => "file.csv", include_field_pat => "/^extra_/");
136              
137             =item * Select all fields except specified from CSV:
138              
139             csv_select_fields(
140             input_filename => "file.csv",
141             include_field_pat => ".*",
142             include_fields => ["f1", "f2"]
143             );
144              
145             =item * Only show what fields would be included, then exit:
146              
147             csv_select_fields(
148             input_filename => "file.csv",
149             include_field_pat => "/^extra_/",
150             show_selected_fields => 1
151             );
152              
153             =back
154              
155             (No description)
156              
157             This function is not exported.
158              
159             Arguments ('*' denotes required arguments):
160              
161             =over 4
162              
163             =item * B<exclude_field_pat> => I<re>
164              
165             Field regex pattern to exclude, takes precedence over --field-pat.
166              
167             =item * B<exclude_fields> => I<array[str]>
168              
169             Field names to exclude, takes precedence over --fields.
170              
171             =item * B<ignore_unknown_fields> => I<bool>
172              
173             When unknown fields are specified in --include-field (--field) or --exclude-field options, ignore them instead of throwing an error.
174              
175             =item * B<include_field_pat> => I<re>
176              
177             Field regex pattern to select, overidden by --exclude-field-pat.
178              
179             =item * B<include_fields> => I<array[str]>
180              
181             Field names to include, takes precedence over --exclude-field-pat.
182              
183             =item * B<inplace> => I<true>
184              
185             Output to the same file as input.
186              
187             Normally, you output to a different file than input. If you try to output to the
188             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
189             prevents you from doing it. However, with this C<--inplace> option, you can
190             output to the same file. Like perl's C<-i> option, this will first output to a
191             temporary file in the same directory as the input file then rename to the final
192             file at the end. You cannot specify output file (C<-o>) when using this option,
193             but you can specify backup extension with C<-b> option.
194              
195             Some caveats:
196              
197             =over
198              
199             =item * if input file is a symbolic link, it will be replaced with a regular file;
200              
201             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
202              
203             =item * value specified in C<-b> is currently not checked for acceptable characters;
204              
205             =item * things can also fail if permissions are restrictive;
206              
207             =back
208              
209             =item * B<inplace_backup_ext> => I<str> (default: "")
210              
211             Extension to add for backup of input file.
212              
213             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
214             rename the input file using this extension as a backup. The old existing backup
215             will be overwritten, if any.
216              
217             =item * B<input_escape_char> => I<str>
218              
219             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
220              
221             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
222              
223             =item * B<input_filename> => I<filename> (default: "-")
224              
225             Input CSV file.
226              
227             Use C<-> to read from stdin.
228              
229             Encoding of input file is assumed to be UTF-8.
230              
231             =item * B<input_header> => I<bool> (default: 1)
232              
233             Specify whether input CSV has a header row.
234              
235             By default, the first row of the input CSV will be assumed to contain field
236             names (and the second row contains the first data row). When you declare that
237             input CSV does not have header row (C<--no-input-header>), the first row of the
238             CSV is assumed to contain the first data row. Fields will be named C<field1>,
239             C<field2>, and so on.
240              
241             =item * B<input_quote_char> => I<str>
242              
243             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
244              
245             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
246              
247             =item * B<input_sep_char> => I<str>
248              
249             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
250              
251             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
252              
253             =item * B<input_tsv> => I<true>
254              
255             Inform that input file is in TSV (tab-separated) format instead of CSV.
256              
257             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
258             options. If one of those options is specified, then C<--input-tsv> will be
259             ignored.
260              
261             =item * B<output_always_quote> => I<bool> (default: 0)
262              
263             Whether to always quote values.
264              
265             When set to false (the default), values are quoted only when necessary:
266              
267             field1,field2,"field three contains comma (,)",field4
268              
269             When set to true, then all values will be quoted:
270              
271             "field1","field2","field three contains comma (,)","field4"
272              
273             =item * B<output_escape_char> => I<str>
274              
275             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
276              
277             This is like C<--input-escape-char> option but for output instead of input.
278              
279             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
280              
281             =item * B<output_filename> => I<filename>
282              
283             Output filename.
284              
285             Use C<-> to output to stdout (the default if you don't specify this option).
286              
287             Encoding of output file is assumed to be UTF-8.
288              
289             =item * B<output_header> => I<bool>
290              
291             Whether output CSV should have a header row.
292              
293             By default, a header row will be output I<if> input CSV has header row. Under
294             C<--output-header>, a header row will be output even if input CSV does not have
295             header row (value will be something like "col0,col1,..."). Under
296             C<--no-output-header>, header row will I<not> be printed even if input CSV has
297             header row. So this option can be used to unconditionally add or remove header
298             row.
299              
300             =item * B<output_quote_char> => I<str>
301              
302             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
303              
304             This is like C<--input-quote-char> option but for output instead of input.
305              
306             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
307              
308             =item * B<output_quote_empty> => I<bool> (default: 0)
309              
310             Whether to quote empty values.
311              
312             When set to false (the default), empty values are not quoted:
313              
314             field1,field2,,field4
315              
316             When set to true, then empty values will be quoted:
317              
318             field1,field2,"",field4
319              
320             =item * B<output_sep_char> => I<str>
321              
322             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
323              
324             This is like C<--input-sep-char> option but for output instead of input.
325              
326             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
327              
328             =item * B<output_tsv> => I<bool>
329              
330             Inform that output file is TSV (tab-separated) format instead of CSV.
331              
332             This is like C<--input-tsv> option but for output instead of input.
333              
334             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
335             options. If one of those options is specified, then C<--output-tsv> will be
336             ignored.
337              
338             =item * B<overwrite> => I<bool>
339              
340             Whether to override existing output file.
341              
342             =item * B<show_selected_fields> => I<true>
343              
344             Show selected fields and then immediately exit.
345              
346              
347             =back
348              
349             Returns an enveloped result (an array).
350              
351             First element ($status_code) is an integer containing HTTP-like status code
352             (200 means OK, 4xx caller error, 5xx function error). Second element
353             ($reason) is a string containing error message, or something like "OK" if status is
354             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
355             element (%result_meta) is called result metadata and is optional, a hash
356             that contains extra information, much like how HTTP response headers provide additional metadata.
357              
358             Return value: (any)
359              
360             =head1 HOMEPAGE
361              
362             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
363              
364             =head1 SOURCE
365              
366             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
367              
368             =head1 AUTHOR
369              
370             perlancar <perlancar@cpan.org>
371              
372             =head1 CONTRIBUTING
373              
374              
375             To contribute, you can send patches by email/via RT, or send pull requests on
376             GitHub.
377              
378             Most of the time, you don't need to build the distribution yourself. You can
379             simply modify the code, then test via:
380              
381             % prove -l
382              
383             If you want to build the distribution (e.g. to try to install it locally on your
384             system), you can install L<Dist::Zilla>,
385             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
386             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
387             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
388             that are considered a bug and can be reported to me.
389              
390             =head1 COPYRIGHT AND LICENSE
391              
392             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
393              
394             This is free software; you can redistribute it and/or modify it under
395             the same terms as the Perl 5 programming language system itself.
396              
397             =head1 BUGS
398              
399             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
400              
401             When submitting a bug or request, please include a test-file or a
402             patch to an existing test-file that illustrates the bug or desired
403             feature.
404              
405             =cut