File Coverage

blib/lib/App/CSVUtils/csv_sort_fields.pm

Criterion	Covered	Total	%
statement	48	48	100.0
branch	22	24	91.6
condition			n/a
subroutine	8	8	100.0
pod	0	2	0.0
total	78	82	95.1

line	stmt	bran	sub	pod	time	code
1						package App::CSVUtils::csv_sort_fields;
2
3	1		1		4407	use 5.010001;
	1				3
4	1		1		5	use strict;
	1				2
	1				21
5	1		1		4	use warnings;
	1				2
	1				23
6	1		1		9	use Log::ger;
	1				2
	1				10
7
8						our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
9						our $DATE = '2023-03-31'; # DATE
10						our $DIST = 'App-CSVUtils'; # DIST
11						our $VERSION = '1.023'; # VERSION
12
13	1				701	use App::CSVUtils qw(
14						gen_csv_util
15						compile_eval_code
16	1		1		340	);
	1				2
17
18						sub on_input_header_row {
19	9		9	0	18	my $r = shift;
20
21	9				44	my $code;
22						my $code_gets_field_with_pos;
23	9	100			35	if ($r->{util_args}{by_code}) {
		100
		100
24	1				7	$code_gets_field_with_pos++;
25	1				4	$code = compile_eval_code($r->{util_args}{by_code}, 'by_code');
26						} elsif (defined $r->{util_args}{by_sortsub}) {
27	1				5	require Sort::Sub;
28						$code = Sort::Sub::get_sorter(
29	1				10	$r->{util_args}{by_sortsub}, $r->{util_args}{sortsub_args});
30						} elsif (my $eg = $r->{util_args}{by_examples}) {
31	2				575	require Sort::ByExample;
32	2				16464	$code = Sort::ByExample->cmp($eg);
33						} else {
34	5		13		21	$code = sub { $_[0] cmp $_[1] };
	13				46
35						}
36
37						my @sorted_indices = sort {
38	24	100			143	my $field_a = $r->{util_args}{ci} ? lc($r->{input_fields}[$a]) : $r->{input_fields}[$a];
39	24	100			45	my $field_b = $r->{util_args}{ci} ? lc($r->{input_fields}[$b]) : $r->{input_fields}[$b];
40	24	100			53	local $main::a = $code_gets_field_with_pos ? [$field_a, $a] : $field_a;
41	24	100			47	local $main::b = $code_gets_field_with_pos ? [$field_b, $b] : $field_b;
42	24	100			59	($r->{util_args}{reverse} ? -1:1) * $code->($main::a, $main::b);
43	9				132	} 0..$#{$r->{input_fields}};
	9				60
44
45	9				57	$r->{output_fields} = [map {$r->{input_fields}[$_]} @sorted_indices];
	27				85
46	9				67	$r->{output_fields_idx_array} = \@sorted_indices; # this is a key we add to stash
47						}
48
49						sub on_input_data_row {
50	15		15	0	23	my $r = shift;
51
52	15	100			44	if ($main::_CSV_SORTED_FIELDS) {
53	2				554	require Data::Cmp;
54						#use DD; dd $r->{input_fields}; print "\n"; dd $r->{output_fields};
55	2	100			714	if (Data::Cmp::cmp_data($r->{input_fields}, $r->{output_fields})) {
56						# not sorted
57	1	50			79	$r->{result} = [400, "NOT sorted", $r->{util_args}{quiet} ? undef : "Fields are NOT sorted"];
58						} else {
59						# sorted
60	1	50			105	$r->{result} = [200, "Sorted", $r->{util_args}{quiet} ? undef : "Fields are sorted"];
61						}
62	2				6	$r->{wants_skip_files}++;
63	2				5	return;
64						} else {
65	13				25	my $row = [];
66	13				16	for my $j (@{ $r->{output_fields_idx_array} }) {
	13				29
67	39				83	push @$row, $r->{input_row}[$j];
68						}
69	13				34	$r->{code_print_row}->($row);
70						}
71						}
72
73						gen_csv_util(
74						name => 'csv_sort_fields',
75						summary => 'Sort CSV fields',
76						description => <<'_',
77
78						This utility sorts the order of fields in the CSV. Example input CSV:
79
80						b,c,a
81						1,2,3
82						4,5,6
83
84						Example output CSV:
85
86						a,b,c
87						3,1,2
88						6,4,5
89
90						You can also reverse the sort order (`-r`), sort case-insensitively (`-i`), or
91						provides the ordering example, e.g. `--by-examples-json '["a","c","b"]'`, or use
92						`--by-code` or `--by-sortsub`.
93
94						_
95
96						add_args => {
97						%App::CSVUtils::argspecs_sort_fields,
98						},
99						add_args_rels => {
100						choose_one => ['by_examples', 'by_code', 'by_sortsub'],
101						},
102
103						tags => ['category:sorting'],
104
105						on_input_header_row => \&on_input_header_row,
106
107						on_input_data_row => \&on_input_data_row,
108
109						);
110
111						1;
112						# ABSTRACT: Sort CSV fields
113
114						__END__
115
116						=pod
117
118						=encoding UTF-8
119
120						=head1 NAME
121
122						App::CSVUtils::csv_sort_fields - Sort CSV fields
123
124						=head1 VERSION
125
126						This document describes version 1.023 of App::CSVUtils::csv_sort_fields (from Perl distribution App-CSVUtils), released on 2023-03-31.
127
128						=for Pod::Coverage ^(on\|after\|before)_.+$
129
130						=head1 FUNCTIONS
131
132
133						=head2 csv_sort_fields
134
135						Usage:
136
137						csv_sort_fields(%args) -> [$status_code, $reason, $payload, \%result_meta]
138
139						Sort CSV fields.
140
141						This utility sorts the order of fields in the CSV. Example input CSV:
142
143						b,c,a
144						1,2,3
145						4,5,6
146
147						Example output CSV:
148
149						a,b,c
150						3,1,2
151						6,4,5
152
153						You can also reverse the sort order (C<-r>), sort case-insensitively (C<-i>), or
154						provides the ordering example, e.g. C<--by-examples-json '["a","c","b"]'>, or use
155						C<--by-code> or C<--by-sortsub>.
156
157						This function is not exported.
158
159						Arguments ('*' denotes required arguments):
160
161						=over 4
162
163						=item * B<by_code> => I<str\|code>
164
165						Sort fields using Perl code.
166
167						C<$a> and C<$b> (or the first and second argument) will contain C<[$field_name,
168						$field_idx]>.
169
170						=item * B<by_examples> => I<array[str]>
171
172						Sort by a list of field names as examples.
173
174						=item * B<by_sortsub> => I<str>
175
176						Sort using a Sort::Sub routine.
177
178						When sorting rows, usually combined with C<--key> because most Sort::Sub routine
179						expects a string to be compared against.
180
181						When sorting fields, the Sort::Sub routine will get the field name as argument.
182
183						=item * B<ci> => I<bool>
184
185						(No description)
186
187						=item * B<inplace> => I<true>
188
189						Output to the same file as input.
190
191						Normally, you output to a different file than input. If you try to output to the
192						same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
193						prevents you from doing it. However, with this C<--inplace> option, you can
194						output to the same file. Like perl's C<-i> option, this will first output to a
195						temporary file in the same directory as the input file then rename to the final
196						file at the end. You cannot specify output file (C<-o>) when using this option,
197						but you can specify backup extension with C<-b> option.
198
199						Some caveats:
200
201						=over
202
203						=item * if input file is a symbolic link, it will be replaced with a regular file;
204
205						=item * renaming (implemented using C<rename()>) can fail if input filename is too long;
206
207						=item * value specified in C<-b> is currently not checked for acceptable characters;
208
209						=item * things can also fail if permissions are restrictive;
210
211						=back
212
213						=item * B<inplace_backup_ext> => I<str> (default: "")
214
215						Extension to add for backup of input file.
216
217						In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
218						rename the input file using this extension as a backup. The old existing backup
219						will be overwritten, if any.
220
221						=item * B<input_escape_char> => I<str>
222
223						Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
224
225						Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
226
227						=item * B<input_filename> => I<filename> (default: "-")
228
229						Input CSV file.
230
231						Use C<-> to read from stdin.
232
233						Encoding of input file is assumed to be UTF-8.
234
235						=item * B<input_header> => I<bool> (default: 1)
236
237						Specify whether input CSV has a header row.
238
239						By default, the first row of the input CSV will be assumed to contain field
240						names (and the second row contains the first data row). When you declare that
241						input CSV does not have header row (C<--no-input-header>), the first row of the
242						CSV is assumed to contain the first data row. Fields will be named C<field1>,
243						C<field2>, and so on.
244
245						=item * B<input_quote_char> => I<str>
246
247						Specify field quote character in input CSV, will be passed to Text::CSV_XS.
248
249						Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
250
251						=item * B<input_sep_char> => I<str>
252
253						Specify field separator character in input CSV, will be passed to Text::CSV_XS.
254
255						Defaults to C<,> (comma). Overrides C<--input-tsv> option.
256
257						=item * B<input_tsv> => I<true>
258
259						Inform that input file is in TSV (tab-separated) format instead of CSV.
260
261						Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
262						options. If one of those options is specified, then C<--input-tsv> will be
263						ignored.
264
265						=item * B<output_always_quote> => I<bool> (default: 0)
266
267						Whether to always quote values.
268
269						When set to false (the default), values are quoted only when necessary:
270
271						field1,field2,"field three contains comma (,)",field4
272
273						When set to true, then all values will be quoted:
274
275						"field1","field2","field three contains comma (,)","field4"
276
277						=item * B<output_escape_char> => I<str>
278
279						Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
280
281						This is like C<--input-escape-char> option but for output instead of input.
282
283						Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
284
285						=item * B<output_filename> => I<filename>
286
287						Output filename.
288
289						Use C<-> to output to stdout (the default if you don't specify this option).
290
291						Encoding of output file is assumed to be UTF-8.
292
293						=item * B<output_header> => I<bool>
294
295						Whether output CSV should have a header row.
296
297						By default, a header row will be output I<if> input CSV has header row. Under
298						C<--output-header>, a header row will be output even if input CSV does not have
299						header row (value will be something like "col0,col1,..."). Under
300						C<--no-output-header>, header row will I<not> be printed even if input CSV has
301						header row. So this option can be used to unconditionally add or remove header
302						row.
303
304						=item * B<output_quote_char> => I<str>
305
306						Specify field quote character in output CSV, will be passed to Text::CSV_XS.
307
308						This is like C<--input-quote-char> option but for output instead of input.
309
310						Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
311
312						=item * B<output_quote_empty> => I<bool> (default: 0)
313
314						Whether to quote empty values.
315
316						When set to false (the default), empty values are not quoted:
317
318						field1,field2,,field4
319
320						When set to true, then empty values will be quoted:
321
322						field1,field2,"",field4
323
324						=item * B<output_sep_char> => I<str>
325
326						Specify field separator character in output CSV, will be passed to Text::CSV_XS.
327
328						This is like C<--input-sep-char> option but for output instead of input.
329
330						Defaults to C<,> (comma). Overrides C<--output-tsv> option.
331
332						=item * B<output_tsv> => I<bool>
333
334						Inform that output file is TSV (tab-separated) format instead of CSV.
335
336						This is like C<--input-tsv> option but for output instead of input.
337
338						Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
339						options. If one of those options is specified, then C<--output-tsv> will be
340						ignored.
341
342						=item * B<overwrite> => I<bool>
343
344						Whether to override existing output file.
345
346						=item * B<reverse> => I<bool>
347
348						(No description)
349
350						=item * B<sortsub_args> => I<hash>
351
352						Arguments to pass to Sort::Sub routine.
353
354
355						=back
356
357						Returns an enveloped result (an array).
358
359						First element ($status_code) is an integer containing HTTP-like status code
360						(200 means OK, 4xx caller error, 5xx function error). Second element
361						($reason) is a string containing error message, or something like "OK" if status is
362						200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
363						element (%result_meta) is called result metadata and is optional, a hash
364						that contains extra information, much like how HTTP response headers provide additional metadata.
365
366						Return value: (any)
367
368						=head1 HOMEPAGE
369
370						Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
371
372						=head1 SOURCE
373
374						Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
375
376						=head1 AUTHOR
377
378						perlancar <perlancar@cpan.org>
379
380						=head1 CONTRIBUTING
381
382
383						To contribute, you can send patches by email/via RT, or send pull requests on
384						GitHub.
385
386						Most of the time, you don't need to build the distribution yourself. You can
387						simply modify the code, then test via:
388
389						% prove -l
390
391						If you want to build the distribution (e.g. to try to install it locally on your
392						system), you can install L<Dist::Zilla>,
393						L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
394						L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
395						Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
396						that are considered a bug and can be reported to me.
397
398						=head1 COPYRIGHT AND LICENSE
399
400						This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
401
402						This is free software; you can redistribute it and/or modify it under
403						the same terms as the Perl 5 programming language system itself.
404
405						=head1 BUGS
406
407						Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
408
409						When submitting a bug or request, please include a test-file or a
410						patch to an existing test-file that illustrates the bug or desired
411						feature.
412
413						=cut