File Coverage

blib/lib/App/CSVUtils/csv_sort_fields.pm
Criterion Covered Total %
statement 48 48 100.0
branch 22 24 91.6
condition n/a
subroutine 8 8 100.0
pod 0 2 0.0
total 78 82 95.1


line stmt bran cond sub pod time code
1             package App::CSVUtils::csv_sort_fields;
2              
3 1     1   4407 use 5.010001;
  1         3  
4 1     1   5 use strict;
  1         2  
  1         21  
5 1     1   4 use warnings;
  1         2  
  1         23  
6 1     1   9 use Log::ger;
  1         2  
  1         10  
7              
8             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
9             our $DATE = '2023-03-31'; # DATE
10             our $DIST = 'App-CSVUtils'; # DIST
11             our $VERSION = '1.023'; # VERSION
12              
13 1         701 use App::CSVUtils qw(
14             gen_csv_util
15             compile_eval_code
16 1     1   340 );
  1         2  
17              
18             sub on_input_header_row {
19 9     9 0 18 my $r = shift;
20              
21 9         44 my $code;
22             my $code_gets_field_with_pos;
23 9 100       35 if ($r->{util_args}{by_code}) {
    100          
    100          
24 1         7 $code_gets_field_with_pos++;
25 1         4 $code = compile_eval_code($r->{util_args}{by_code}, 'by_code');
26             } elsif (defined $r->{util_args}{by_sortsub}) {
27 1         5 require Sort::Sub;
28             $code = Sort::Sub::get_sorter(
29 1         10 $r->{util_args}{by_sortsub}, $r->{util_args}{sortsub_args});
30             } elsif (my $eg = $r->{util_args}{by_examples}) {
31 2         575 require Sort::ByExample;
32 2         16464 $code = Sort::ByExample->cmp($eg);
33             } else {
34 5     13   21 $code = sub { $_[0] cmp $_[1] };
  13         46  
35             }
36              
37             my @sorted_indices = sort {
38 24 100       143 my $field_a = $r->{util_args}{ci} ? lc($r->{input_fields}[$a]) : $r->{input_fields}[$a];
39 24 100       45 my $field_b = $r->{util_args}{ci} ? lc($r->{input_fields}[$b]) : $r->{input_fields}[$b];
40 24 100       53 local $main::a = $code_gets_field_with_pos ? [$field_a, $a] : $field_a;
41 24 100       47 local $main::b = $code_gets_field_with_pos ? [$field_b, $b] : $field_b;
42 24 100       59 ($r->{util_args}{reverse} ? -1:1) * $code->($main::a, $main::b);
43 9         132 } 0..$#{$r->{input_fields}};
  9         60  
44              
45 9         57 $r->{output_fields} = [map {$r->{input_fields}[$_]} @sorted_indices];
  27         85  
46 9         67 $r->{output_fields_idx_array} = \@sorted_indices; # this is a key we add to stash
47             }
48              
49             sub on_input_data_row {
50 15     15 0 23 my $r = shift;
51              
52 15 100       44 if ($main::_CSV_SORTED_FIELDS) {
53 2         554 require Data::Cmp;
54             #use DD; dd $r->{input_fields}; print "\n"; dd $r->{output_fields};
55 2 100       714 if (Data::Cmp::cmp_data($r->{input_fields}, $r->{output_fields})) {
56             # not sorted
57 1 50       79 $r->{result} = [400, "NOT sorted", $r->{util_args}{quiet} ? undef : "Fields are NOT sorted"];
58             } else {
59             # sorted
60 1 50       105 $r->{result} = [200, "Sorted", $r->{util_args}{quiet} ? undef : "Fields are sorted"];
61             }
62 2         6 $r->{wants_skip_files}++;
63 2         5 return;
64             } else {
65 13         25 my $row = [];
66 13         16 for my $j (@{ $r->{output_fields_idx_array} }) {
  13         29  
67 39         83 push @$row, $r->{input_row}[$j];
68             }
69 13         34 $r->{code_print_row}->($row);
70             }
71             }
72              
73             gen_csv_util(
74             name => 'csv_sort_fields',
75             summary => 'Sort CSV fields',
76             description => <<'_',
77              
78             This utility sorts the order of fields in the CSV. Example input CSV:
79              
80             b,c,a
81             1,2,3
82             4,5,6
83              
84             Example output CSV:
85              
86             a,b,c
87             3,1,2
88             6,4,5
89              
90             You can also reverse the sort order (`-r`), sort case-insensitively (`-i`), or
91             provides the ordering example, e.g. `--by-examples-json '["a","c","b"]'`, or use
92             `--by-code` or `--by-sortsub`.
93              
94             _
95              
96             add_args => {
97             %App::CSVUtils::argspecs_sort_fields,
98             },
99             add_args_rels => {
100             choose_one => ['by_examples', 'by_code', 'by_sortsub'],
101             },
102              
103             tags => ['category:sorting'],
104              
105             on_input_header_row => \&on_input_header_row,
106              
107             on_input_data_row => \&on_input_data_row,
108              
109             );
110              
111             1;
112             # ABSTRACT: Sort CSV fields
113              
114             __END__
115              
116             =pod
117              
118             =encoding UTF-8
119              
120             =head1 NAME
121              
122             App::CSVUtils::csv_sort_fields - Sort CSV fields
123              
124             =head1 VERSION
125              
126             This document describes version 1.023 of App::CSVUtils::csv_sort_fields (from Perl distribution App-CSVUtils), released on 2023-03-31.
127              
128             =for Pod::Coverage ^(on|after|before)_.+$
129              
130             =head1 FUNCTIONS
131              
132              
133             =head2 csv_sort_fields
134              
135             Usage:
136              
137             csv_sort_fields(%args) -> [$status_code, $reason, $payload, \%result_meta]
138              
139             Sort CSV fields.
140              
141             This utility sorts the order of fields in the CSV. Example input CSV:
142              
143             b,c,a
144             1,2,3
145             4,5,6
146              
147             Example output CSV:
148              
149             a,b,c
150             3,1,2
151             6,4,5
152              
153             You can also reverse the sort order (C<-r>), sort case-insensitively (C<-i>), or
154             provides the ordering example, e.g. C<--by-examples-json '["a","c","b"]'>, or use
155             C<--by-code> or C<--by-sortsub>.
156              
157             This function is not exported.
158              
159             Arguments ('*' denotes required arguments):
160              
161             =over 4
162              
163             =item * B<by_code> => I<str|code>
164              
165             Sort fields using Perl code.
166              
167             C<$a> and C<$b> (or the first and second argument) will contain C<[$field_name,
168             $field_idx]>.
169              
170             =item * B<by_examples> => I<array[str]>
171              
172             Sort by a list of field names as examples.
173              
174             =item * B<by_sortsub> => I<str>
175              
176             Sort using a Sort::Sub routine.
177              
178             When sorting rows, usually combined with C<--key> because most Sort::Sub routine
179             expects a string to be compared against.
180              
181             When sorting fields, the Sort::Sub routine will get the field name as argument.
182              
183             =item * B<ci> => I<bool>
184              
185             (No description)
186              
187             =item * B<inplace> => I<true>
188              
189             Output to the same file as input.
190              
191             Normally, you output to a different file than input. If you try to output to the
192             same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
193             prevents you from doing it. However, with this C<--inplace> option, you can
194             output to the same file. Like perl's C<-i> option, this will first output to a
195             temporary file in the same directory as the input file then rename to the final
196             file at the end. You cannot specify output file (C<-o>) when using this option,
197             but you can specify backup extension with C<-b> option.
198              
199             Some caveats:
200              
201             =over
202              
203             =item * if input file is a symbolic link, it will be replaced with a regular file;
204              
205             =item * renaming (implemented using C<rename()>) can fail if input filename is too long;
206              
207             =item * value specified in C<-b> is currently not checked for acceptable characters;
208              
209             =item * things can also fail if permissions are restrictive;
210              
211             =back
212              
213             =item * B<inplace_backup_ext> => I<str> (default: "")
214              
215             Extension to add for backup of input file.
216              
217             In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
218             rename the input file using this extension as a backup. The old existing backup
219             will be overwritten, if any.
220              
221             =item * B<input_escape_char> => I<str>
222              
223             Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
224              
225             Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
226              
227             =item * B<input_filename> => I<filename> (default: "-")
228              
229             Input CSV file.
230              
231             Use C<-> to read from stdin.
232              
233             Encoding of input file is assumed to be UTF-8.
234              
235             =item * B<input_header> => I<bool> (default: 1)
236              
237             Specify whether input CSV has a header row.
238              
239             By default, the first row of the input CSV will be assumed to contain field
240             names (and the second row contains the first data row). When you declare that
241             input CSV does not have header row (C<--no-input-header>), the first row of the
242             CSV is assumed to contain the first data row. Fields will be named C<field1>,
243             C<field2>, and so on.
244              
245             =item * B<input_quote_char> => I<str>
246              
247             Specify field quote character in input CSV, will be passed to Text::CSV_XS.
248              
249             Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
250              
251             =item * B<input_sep_char> => I<str>
252              
253             Specify field separator character in input CSV, will be passed to Text::CSV_XS.
254              
255             Defaults to C<,> (comma). Overrides C<--input-tsv> option.
256              
257             =item * B<input_tsv> => I<true>
258              
259             Inform that input file is in TSV (tab-separated) format instead of CSV.
260              
261             Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
262             options. If one of those options is specified, then C<--input-tsv> will be
263             ignored.
264              
265             =item * B<output_always_quote> => I<bool> (default: 0)
266              
267             Whether to always quote values.
268              
269             When set to false (the default), values are quoted only when necessary:
270              
271             field1,field2,"field three contains comma (,)",field4
272              
273             When set to true, then all values will be quoted:
274              
275             "field1","field2","field three contains comma (,)","field4"
276              
277             =item * B<output_escape_char> => I<str>
278              
279             Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
280              
281             This is like C<--input-escape-char> option but for output instead of input.
282              
283             Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
284              
285             =item * B<output_filename> => I<filename>
286              
287             Output filename.
288              
289             Use C<-> to output to stdout (the default if you don't specify this option).
290              
291             Encoding of output file is assumed to be UTF-8.
292              
293             =item * B<output_header> => I<bool>
294              
295             Whether output CSV should have a header row.
296              
297             By default, a header row will be output I<if> input CSV has header row. Under
298             C<--output-header>, a header row will be output even if input CSV does not have
299             header row (value will be something like "col0,col1,..."). Under
300             C<--no-output-header>, header row will I<not> be printed even if input CSV has
301             header row. So this option can be used to unconditionally add or remove header
302             row.
303              
304             =item * B<output_quote_char> => I<str>
305              
306             Specify field quote character in output CSV, will be passed to Text::CSV_XS.
307              
308             This is like C<--input-quote-char> option but for output instead of input.
309              
310             Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
311              
312             =item * B<output_quote_empty> => I<bool> (default: 0)
313              
314             Whether to quote empty values.
315              
316             When set to false (the default), empty values are not quoted:
317              
318             field1,field2,,field4
319              
320             When set to true, then empty values will be quoted:
321              
322             field1,field2,"",field4
323              
324             =item * B<output_sep_char> => I<str>
325              
326             Specify field separator character in output CSV, will be passed to Text::CSV_XS.
327              
328             This is like C<--input-sep-char> option but for output instead of input.
329              
330             Defaults to C<,> (comma). Overrides C<--output-tsv> option.
331              
332             =item * B<output_tsv> => I<bool>
333              
334             Inform that output file is TSV (tab-separated) format instead of CSV.
335              
336             This is like C<--input-tsv> option but for output instead of input.
337              
338             Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
339             options. If one of those options is specified, then C<--output-tsv> will be
340             ignored.
341              
342             =item * B<overwrite> => I<bool>
343              
344             Whether to override existing output file.
345              
346             =item * B<reverse> => I<bool>
347              
348             (No description)
349              
350             =item * B<sortsub_args> => I<hash>
351              
352             Arguments to pass to Sort::Sub routine.
353              
354              
355             =back
356              
357             Returns an enveloped result (an array).
358              
359             First element ($status_code) is an integer containing HTTP-like status code
360             (200 means OK, 4xx caller error, 5xx function error). Second element
361             ($reason) is a string containing error message, or something like "OK" if status is
362             200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
363             element (%result_meta) is called result metadata and is optional, a hash
364             that contains extra information, much like how HTTP response headers provide additional metadata.
365              
366             Return value: (any)
367              
368             =head1 HOMEPAGE
369              
370             Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
371              
372             =head1 SOURCE
373              
374             Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
375              
376             =head1 AUTHOR
377              
378             perlancar <perlancar@cpan.org>
379              
380             =head1 CONTRIBUTING
381              
382              
383             To contribute, you can send patches by email/via RT, or send pull requests on
384             GitHub.
385              
386             Most of the time, you don't need to build the distribution yourself. You can
387             simply modify the code, then test via:
388              
389             % prove -l
390              
391             If you want to build the distribution (e.g. to try to install it locally on your
392             system), you can install L<Dist::Zilla>,
393             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
394             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
395             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
396             that are considered a bug and can be reported to me.
397              
398             =head1 COPYRIGHT AND LICENSE
399              
400             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
401              
402             This is free software; you can redistribute it and/or modify it under
403             the same terms as the Perl 5 programming language system itself.
404              
405             =head1 BUGS
406              
407             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
408              
409             When submitting a bug or request, please include a test-file or a
410             patch to an existing test-file that illustrates the bug or desired
411             feature.
412              
413             =cut