File Coverage

blib/lib/App/CSVUtils/csv_sort_rows.pm

Criterion	Covered	Total	%
statement	96	102	94.1
branch	45	62	72.5
condition	8	9	88.8
subroutine	10	10	100.0
pod	0	3	0.0
total	159	186	85.4

line	stmt	bran	cond	sub	pod	time	code
1							package App::CSVUtils::csv_sort_rows;
2
3	1			1		7417	use 5.010001;
	1					4
4	1			1		6	use strict;
	1					2
	1					26
5	1			1		9	use warnings;
	1					3
	1					23
6	1			1		5	use Log::ger;
	1					2
	1					5
7
8							our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
9							our $DATE = '2023-04-01'; # DATE
10							our $DIST = 'App-CSVUtils'; # DIST
11							our $VERSION = '1.024'; # VERSION
12
13	1					1205	use App::CSVUtils qw(
14							gen_csv_util
15							compile_eval_code
16	1			1		256	);
	1					3
17
18							sub on_input_header_row {
19	14			14	0	29	my $r = shift;
20	14	100				48	$r->{wants_input_row_as_hashref}++ if $r->{util_args}{hash};
21							}
22
23							sub on_input_data_row {
24	42			42	0	76	my $r = shift;
25
26							# keys we add to the stash
27	42		100			118	$r->{input_rows} //= [];
28	42	100				79	if ($r->{wants_input_row_as_hashref}) {
29	9		100			36	$r->{input_rows_as_hashref} //= [];
30							}
31
32	42					58	push @{ $r->{input_rows} }, $r->{input_row};
	42					80
33	42	100				101	if ($r->{wants_input_row_as_hashref}) {
34	9					13	push @{ $r->{input_rows_as_hashref} }, $r->{input_row_as_hashref};
	9					27
35							}
36							}
37
38							sub after_close_input_files {
39	14			14	0	22	my $r = shift;
40
41							# we do the actual sorting here after collecting all the rows
42
43							# whether we should compute keys
44	14					23	my @keys;
45	14	100				36	if ($r->{util_args}{key}) {
46	4					13	my $code_gen_key = compile_eval_code($r->{util_args}{key}, 'key');
47	4	100				9	for my $row (@{ $r->{util_args}{hash} ? $r->{input_rows_as_hashref} : $r->{input_rows} }) {
	4					16
48	12					19	local $_ = $row;
49	12					231	push @keys, $code_gen_key->($row);
50							}
51							}
52
53							# if user doesn't specify any by_* args, try to set a default if we can
54	14	50				53	if (!(grep {defined} ($r->{util_args}{by_code}, $r->{util_args}{by_sortsub}, $r->{util_args}{by_fields}))) {
	42					97
55							# if there is only a single field, use it
56	0	0				0	if (@{ $r->{input_fields} } == 1) {
	0					0
57	0					0	my $field = $r->{input_fields}[0];
58							$r->{util_args}{by_fields} = App::CSVUtils::_is_numeric_field(
59	0	0				0	$r->{input_rows}, 0) ? ["+$field"] : [$field];
60							}
61							}
62
63	14					23	my $sorted_rows;
64	14	100	100			66	if ($r->{util_args}{by_code} \|\| $r->{util_args}{by_sortsub}) {
		50
65
66	6					8	my $code0;
67	6	100				17	if ($r->{util_args}{by_code}) {
		50
68	4					12	$code0 = compile_eval_code($r->{util_args}{by_code}, 'by_code');
69							} elsif (defined $r->{util_args}{by_sortsub}) {
70	2					8	require Sort::Sub;
71							$code0 = Sort::Sub::get_sorter(
72	2					9	$r->{util_args}{by_sortsub}, $r->{util_args}{sortsub_args});
73							}
74
75	6					88	my $sort_indices;
76							my $code;
77	6	100				20	if (@keys) {
		100
78							# compare two sort keys ($a & $b) are indices
79	4					7	$sort_indices++;
80							$code = sub {
81	11			11		22	local $main::a = $keys[$a];
82	11					15	local $main::b = $keys[$b];
83							#log_trace "a=<$main::a> vs b=<$main::b>";
84	11					130	$code0->($main::a, $main::b);
85	4					13	};
86							} elsif ($r->{util_args}{hash}) {
87	1					2	$sort_indices++;
88							$code = sub {
89	3			3		6	local $main::a = $r->{input_rows_as_hashref}[$a];
90	3					4	local $main::b = $r->{input_rows_as_hashref}[$b];
91							#log_trace "a=<%s> vs b=<%s>", $main::a, $main::b;
92	3					64	$code0->($main::a, $main::b);
93	1					4	};
94							} else {
95	1					2	$code = $code0;
96							}
97
98	6	100				13	if ($sort_indices) {
99	5					10	my @sorted_indices = sort { local $main::a=$a; local $main::b=$b; $code->($main::a,$main::b) } 0..$#{$r->{input_rows}};
	14					135
	14					28
	14					21
	5					21
100	5					52	$sorted_rows = [map {$r->{input_rows}[$_]} @sorted_indices];
	15					52
101							} else {
102	1					2	$sorted_rows = [sort { local $main::a=$a; local $main::b=$b; $code->($main::a,$main::b) } @{$r->{input_rows}}];
	3					5
	3					4
	3					58
	1					4
103							}
104
105							} elsif ($r->{util_args}{by_fields}) {
106
107	8					14	my @fields;
108	8					13	my $code_str = "";
109	8					12	for my $field_spec (@{ $r->{util_args}{by_fields} }) {
	8					21
110	9					49	my ($prefix, $field) = $field_spec =~ /\A([+~-]?)(.+)/;
111	9					29	my $field_idx = App::CSVUtils::_find_field($r->{input_fields}, $field);
112	9		50			24	$prefix //= "";
113	9	100				27	if ($prefix eq '+') {
		100
		100
		50
114	2	100				12	$code_str .= ($code_str ? " \|\| " : "") .
115							"(\$a->[$field_idx] <=> \$b->[$field_idx])";
116							} elsif ($prefix eq '-') {
117	2	50				13	$code_str .= ($code_str ? " \|\| " : "") .
118							"(\$b->[$field_idx] <=> \$a->[$field_idx])";
119							} elsif ($prefix eq '') {
120	4	100				9	if ($r->{util_args}{ci}) {
121	1	50				7	$code_str .= ($code_str ? " \|\| " : "") .
122							"(lc(\$a->[$field_idx]) cmp lc(\$b->[$field_idx]))";
123							} else {
124	3	50				13	$code_str .= ($code_str ? " \|\| " : "") .
125							"(\$a->[$field_idx] cmp \$b->[$field_idx])";
126							}
127							} elsif ($prefix eq '~') {
128	1	50				4	if ($r->{util_args}{ci}) {
129	0	0				0	$code_str .= ($code_str ? " \|\| " : "") .
130							"(lc(\$b->[$field_idx]) cmp lc(\$a->[$field_idx]))";
131							} else {
132	1	50				7	$code_str .= ($code_str ? " \|\| " : "") .
133							"(\$b->[$field_idx] cmp \$a->[$field_idx])";
134							}
135							}
136							}
137	8					32	my $code = compile_eval_code($code_str, 'from sort_by_fields');
138	8					17	$sorted_rows = [sort { local $main::a = $a; local $main::b = $b; $code->($main::a, $main::b) } @{$r->{input_rows}}];
	22					38
	22					27
	22					477
	8					32
139
140							} else {
141
142	0					0	die [400, "Please specify by_fields or by_sortsub or by_code"];
143
144							}
145
146	14	100				37	if ($main::_CSV_SORTED_ROWS) {
147	2					10	require Data::Cmp;
148							#use DD; dd $r->{input_rows}; print "\n"; dd $sorted_rows;
149	2	100				13	if (Data::Cmp::cmp_data($r->{input_rows}, $sorted_rows)) {
150							# not sorted
151	1	50				92	$r->{result} = [400, "NOT sorted", $r->{util_args}{quiet} ? undef : "Rows are NOT sorted"];
152							} else {
153							# sorted
154	1	50				299	$r->{result} = [200, "Sorted", $r->{util_args}{quiet} ? undef : "Rows are sorted"];
155							}
156							} else {
157	12					25	for my $row (@$sorted_rows) {
158	36					96	$r->{code_print_row}->($row);
159							}
160							}
161							}
162
163							gen_csv_util(
164							name => 'csv_sort_rows',
165							summary => 'Sort CSV rows',
166							description => <<'_',
167
168							This utility sorts the rows in the CSV. Example input CSV:
169
170							name,age
171							Andy,20
172							Dennis,15
173							Ben,30
174							Jerry,30
175
176							Example output CSV (using `--by-field +age` which means by age numerically and
177							ascending):
178
179							name,age
180							Dennis,15
181							Andy,20
182							Ben,30
183							Jerry,30
184
185							Example output CSV (using `--by-field -age`, which means by age numerically and
186							descending):
187
188							name,age
189							Ben,30
190							Jerry,30
191							Andy,20
192							Dennis,15
193
194							Example output CSV (using `--by-field name`, which means by name ascibetically
195							and ascending):
196
197							name,age
198							Andy,20
199							Ben,30
200							Dennis,15
201							Jerry,30
202
203							Example output CSV (using `--by-field ~name`, which means by name ascibetically
204							and descending):
205
206							name,age
207							Jerry,30
208							Dennis,15
209							Ben,30
210							Andy,20
211
212							Example output CSV (using `--by-field +age --by-field ~name`):
213
214							name,age
215							Dennis,15
216							Andy,20
217							Jerry,30
218							Ben,30
219
220							You can also reverse the sort order (`-r`) or sort case-insensitively (`-i`).
221
222							For more flexibility, instead of `--by-field` you can use `--by-code`:
223
224							Example output `--by-code '$a->[1] <=> $b->[1] \|\| $b->[0] cmp $a->[0]'` (which
225							is equivalent to `--by-field +age --by-field ~name`):
226
227							name,age
228							Dennis,15
229							Andy,20
230							Jerry,30
231							Ben,30
232
233							If you use `--hash`, your code will receive the rows to be compared as hashref,
234							e.g. `--hash --by-code '$a->{age} <=> $b->{age} \|\| $b->{name} cmp $a->{name}'.
235
236							A third alternative is to sort using <pm:Sort::Sub> routines. Example output
237							(using `--by-sortsub 'by_length<r>' --key '$_->[0]'`, which is to say to sort by
238							descending length of name):
239
240							name,age
241							Dennis,15
242							Jerry,30
243							Andy,20
244							Ben,30
245
246							If none of the `--by-*` options are specified, the utility will bail unless
247							there's a default that can be used, e.g. when CSV has a single field then that
248							field will be used.
249
250							_
251
252							add_args => {
253							%App::CSVUtils::argspecopt_hash,
254							%App::CSVUtils::argspecs_sort_rows,
255							},
256
257							tags => ['category:sorting'],
258
259							on_input_header_row => \&App::CSVUtils::csv_sort_rows::on_input_header_row,
260
261							on_input_data_row => \&App::CSVUtils::csv_sort_rows::on_input_data_row,
262
263							after_close_input_files => \&App::CSVUtils::csv_sort_rows::after_close_input_files,
264
265							);
266
267							1;
268							# ABSTRACT: Sort CSV rows
269
270							__END__
271
272							=pod
273
274							=encoding UTF-8
275
276							=head1 NAME
277
278							App::CSVUtils::csv_sort_rows - Sort CSV rows
279
280							=head1 VERSION
281
282							This document describes version 1.024 of App::CSVUtils::csv_sort_rows (from Perl distribution App-CSVUtils), released on 2023-04-01.
283
284							=for Pod::Coverage ^(on\|after\|before)_.+$
285
286							=head1 FUNCTIONS
287
288
289							=head2 csv_sort_rows
290
291							Usage:
292
293							csv_sort_rows(%args) -> [$status_code, $reason, $payload, \%result_meta]
294
295							Sort CSV rows.
296
297							This utility sorts the rows in the CSV. Example input CSV:
298
299							name,age
300							Andy,20
301							Dennis,15
302							Ben,30
303							Jerry,30
304
305							Example output CSV (using C<--by-field +age> which means by age numerically and
306							ascending):
307
308							name,age
309							Dennis,15
310							Andy,20
311							Ben,30
312							Jerry,30
313
314							Example output CSV (using C<--by-field -age>, which means by age numerically and
315							descending):
316
317							name,age
318							Ben,30
319							Jerry,30
320							Andy,20
321							Dennis,15
322
323							Example output CSV (using C<--by-field name>, which means by name ascibetically
324							and ascending):
325
326							name,age
327							Andy,20
328							Ben,30
329							Dennis,15
330							Jerry,30
331
332							Example output CSV (using C<--by-field ~name>, which means by name ascibetically
333							and descending):
334
335							name,age
336							Jerry,30
337							Dennis,15
338							Ben,30
339							Andy,20
340
341							Example output CSV (using C<--by-field +age --by-field ~name>):
342
343							name,age
344							Dennis,15
345							Andy,20
346							Jerry,30
347							Ben,30
348
349							You can also reverse the sort order (C<-r>) or sort case-insensitively (C<-i>).
350
351							For more flexibility, instead of C<--by-field> you can use C<--by-code>:
352
353							Example output C<< --by-code '$a-E<gt>[1] E<lt>=E<gt> $b-E<gt>[1] \|\| $b-E<gt>[0] cmp $a-E<gt>[0]' >> (which
354							is equivalent to C<--by-field +age --by-field ~name>):
355
356							name,age
357							Dennis,15
358							Andy,20
359							Jerry,30
360							Ben,30
361
362							If you use C<--hash>, your code will receive the rows to be compared as hashref,
363							e.g. `--hash --by-code '$a->{age} <=> $b->{age} \|\| $b->{name} cmp $a->{name}'.
364
365							A third alternative is to sort using L<Sort::Sub> routines. Example output
366							(using C<< --by-sortsub 'by_lengthE<lt>rE<gt>' --key '$_-E<gt>[0]' >>, which is to say to sort by
367							descending length of name):
368
369							name,age
370							Dennis,15
371							Jerry,30
372							Andy,20
373							Ben,30
374
375							If none of the C<--by-*> options are specified, the utility will bail unless
376							there's a default that can be used, e.g. when CSV has a single field then that
377							field will be used.
378
379							This function is not exported.
380
381							Arguments ('*' denotes required arguments):
382
383							=over 4
384
385							=item * B<by_code> => I<str\|code>
386
387							Sort by using Perl code.
388
389							C<$a> and C<$b> (or the first and second argument) will contain the two rows to be
390							compared. Which are arrayrefs; or if C<--hash> (C<-H>) is specified, hashrefs; or
391							if C<--key> is specified, whatever the code in C<--key> returns.
392
393							=item * B<by_fields> => I<array[str]>
394
395							Sort by a list of field specifications.
396
397							Each field specification is a field name with an optional prefix. C<FIELD>
398							(without prefix) means sort asciibetically ascending (smallest to largest),
399							C<~FIELD> means sort asciibetically descending (largest to smallest), C<+FIELD>
400							means sort numerically ascending, C<-FIELD> means sort numerically descending.
401
402							=item * B<by_sortsub> => I<str>
403
404							Sort using a Sort::Sub routine.
405
406							When sorting rows, usually combined with C<--key> because most Sort::Sub routine
407							expects a string to be compared against.
408
409							When sorting fields, the Sort::Sub routine will get the field name as argument.
410
411							=item * B<ci> => I<bool>
412
413							(No description)
414
415							=item * B<hash> => I<bool>
416
417							Provide row in $_ as hashref instead of arrayref.
418
419							=item * B<inplace> => I<true>
420
421							Output to the same file as input.
422
423							Normally, you output to a different file than input. If you try to output to the
424							same file (C<-o INPUT.csv -O>) you will clobber the input file; thus the utility
425							prevents you from doing it. However, with this C<--inplace> option, you can
426							output to the same file. Like perl's C<-i> option, this will first output to a
427							temporary file in the same directory as the input file then rename to the final
428							file at the end. You cannot specify output file (C<-o>) when using this option,
429							but you can specify backup extension with C<-b> option.
430
431							Some caveats:
432
433							=over
434
435							=item * if input file is a symbolic link, it will be replaced with a regular file;
436
437							=item * renaming (implemented using C<rename()>) can fail if input filename is too long;
438
439							=item * value specified in C<-b> is currently not checked for acceptable characters;
440
441							=item * things can also fail if permissions are restrictive;
442
443							=back
444
445							=item * B<inplace_backup_ext> => I<str> (default: "")
446
447							Extension to add for backup of input file.
448
449							In inplace mode (C<--inplace>), if this option is set to a non-empty string, will
450							rename the input file using this extension as a backup. The old existing backup
451							will be overwritten, if any.
452
453							=item * B<input_escape_char> => I<str>
454
455							Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS.
456
457							Defaults to C<\\> (backslash). Overrides C<--input-tsv> option.
458
459							=item * B<input_filename> => I<filename> (default: "-")
460
461							Input CSV file.
462
463							Use C<-> to read from stdin.
464
465							Encoding of input file is assumed to be UTF-8.
466
467							=item * B<input_header> => I<bool> (default: 1)
468
469							Specify whether input CSV has a header row.
470
471							By default, the first row of the input CSV will be assumed to contain field
472							names (and the second row contains the first data row). When you declare that
473							input CSV does not have header row (C<--no-input-header>), the first row of the
474							CSV is assumed to contain the first data row. Fields will be named C<field1>,
475							C<field2>, and so on.
476
477							=item * B<input_quote_char> => I<str>
478
479							Specify field quote character in input CSV, will be passed to Text::CSV_XS.
480
481							Defaults to C<"> (double quote). Overrides C<--input-tsv> option.
482
483							=item * B<input_sep_char> => I<str>
484
485							Specify field separator character in input CSV, will be passed to Text::CSV_XS.
486
487							Defaults to C<,> (comma). Overrides C<--input-tsv> option.
488
489							=item * B<input_tsv> => I<true>
490
491							Inform that input file is in TSV (tab-separated) format instead of CSV.
492
493							Overriden by C<--input-sep-char>, C<--input-quote-char>, C<--input-escape-char>
494							options. If one of those options is specified, then C<--input-tsv> will be
495							ignored.
496
497							=item * B<key> => I<str\|code>
498
499							Generate sort keys with this Perl code.
500
501							If specified, then will compute sort keys using Perl code and sort using the
502							keys. Relevant when sorting using C<--by-code> or C<--by-sortsub>. If specified,
503							then instead of row when sorting rows, the code (or Sort::Sub routine) will
504							receive these sort keys to sort against.
505
506							The code will receive the row (arrayref, or if -H is specified, hashref) as the
507							argument.
508
509							=item * B<output_always_quote> => I<bool> (default: 0)
510
511							Whether to always quote values.
512
513							When set to false (the default), values are quoted only when necessary:
514
515							field1,field2,"field three contains comma (,)",field4
516
517							When set to true, then all values will be quoted:
518
519							"field1","field2","field three contains comma (,)","field4"
520
521							=item * B<output_escape_char> => I<str>
522
523							Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS.
524
525							This is like C<--input-escape-char> option but for output instead of input.
526
527							Defaults to C<\\> (backslash). Overrides C<--output-tsv> option.
528
529							=item * B<output_filename> => I<filename>
530
531							Output filename.
532
533							Use C<-> to output to stdout (the default if you don't specify this option).
534
535							Encoding of output file is assumed to be UTF-8.
536
537							=item * B<output_header> => I<bool>
538
539							Whether output CSV should have a header row.
540
541							By default, a header row will be output I<if> input CSV has header row. Under
542							C<--output-header>, a header row will be output even if input CSV does not have
543							header row (value will be something like "col0,col1,..."). Under
544							C<--no-output-header>, header row will I<not> be printed even if input CSV has
545							header row. So this option can be used to unconditionally add or remove header
546							row.
547
548							=item * B<output_quote_char> => I<str>
549
550							Specify field quote character in output CSV, will be passed to Text::CSV_XS.
551
552							This is like C<--input-quote-char> option but for output instead of input.
553
554							Defaults to C<"> (double quote). Overrides C<--output-tsv> option.
555
556							=item * B<output_quote_empty> => I<bool> (default: 0)
557
558							Whether to quote empty values.
559
560							When set to false (the default), empty values are not quoted:
561
562							field1,field2,,field4
563
564							When set to true, then empty values will be quoted:
565
566							field1,field2,"",field4
567
568							=item * B<output_sep_char> => I<str>
569
570							Specify field separator character in output CSV, will be passed to Text::CSV_XS.
571
572							This is like C<--input-sep-char> option but for output instead of input.
573
574							Defaults to C<,> (comma). Overrides C<--output-tsv> option.
575
576							=item * B<output_tsv> => I<bool>
577
578							Inform that output file is TSV (tab-separated) format instead of CSV.
579
580							This is like C<--input-tsv> option but for output instead of input.
581
582							Overriden by C<--output-sep-char>, C<--output-quote-char>, C<--output-escape-char>
583							options. If one of those options is specified, then C<--output-tsv> will be
584							ignored.
585
586							=item * B<overwrite> => I<bool>
587
588							Whether to override existing output file.
589
590							=item * B<reverse> => I<bool>
591
592							(No description)
593
594							=item * B<sortsub_args> => I<hash>
595
596							Arguments to pass to Sort::Sub routine.
597
598
599							=back
600
601							Returns an enveloped result (an array).
602
603							First element ($status_code) is an integer containing HTTP-like status code
604							(200 means OK, 4xx caller error, 5xx function error). Second element
605							($reason) is a string containing error message, or something like "OK" if status is
606							200. Third element ($payload) is the actual result, but usually not present when enveloped result is an error response ($status_code is not 2xx). Fourth
607							element (%result_meta) is called result metadata and is optional, a hash
608							that contains extra information, much like how HTTP response headers provide additional metadata.
609
610							Return value: (any)
611
612							=head1 HOMEPAGE
613
614							Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>.
615
616							=head1 SOURCE
617
618							Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>.
619
620							=head1 AUTHOR
621
622							perlancar <perlancar@cpan.org>
623
624							=head1 CONTRIBUTING
625
626
627							To contribute, you can send patches by email/via RT, or send pull requests on
628							GitHub.
629
630							Most of the time, you don't need to build the distribution yourself. You can
631							simply modify the code, then test via:
632
633							% prove -l
634
635							If you want to build the distribution (e.g. to try to install it locally on your
636							system), you can install L<Dist::Zilla>,
637							L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
638							L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
639							Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
640							that are considered a bug and can be reported to me.
641
642							=head1 COPYRIGHT AND LICENSE
643
644							This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>.
645
646							This is free software; you can redistribute it and/or modify it under
647							the same terms as the Perl 5 programming language system itself.
648
649							=head1 BUGS
650
651							Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils>
652
653							When submitting a bug or request, please include a test-file or a
654							patch to an existing test-file that illustrates the bug or desired
655							feature.
656
657							=cut