File Coverage

blib/lib/App/csvtool.pm

Criterion	Covered	Total	%
statement	208	208	100.0
branch	21	24	87.5
condition	16	22	72.7
subroutine	43	43	100.0
pod			n/a
total	288	297	96.9

line	stmt	bran	cond	sub	time	code
1						# You may distribute under the terms of either the GNU General Public License
2						# or the Artistic License (the same terms as Perl itself)
3						#
4						# (C) Paul Evans, 2021-2024 -- leonerd@leonerd.org.uk
5
6						package App::csvtool 0.04;
7
8	13			13	3697063	use v5.26;
	13				58
9	13			13	98	use warnings;
	13				105
	13				1000
10	13			13	791	use experimental 'signatures';
	13				2162
	13				105
11
12	13			13	10009	use Commandable 0.11;
	13				2787
	13				946
13
14						=head1 NAME
15
16						C - implements the F core commands
17
18						=head1 DESCRIPTION
19
20						This module provides the main commands for the F wrapper script.
21
22						=head1 COMMANDS
23
24						=cut
25
26						package App::csvtool::cut
27						{
28
29						=head2 cut
30
31						$ csvtool cut -fFIELDS INPUT...
32
33						Extracts the given field column(s).
34
35						=head3 --fields, -f
36
37						A comma-separated list of field indexes (defaults to 1).
38
39						A field index of C will result in an undefined (i.e. empty) field being
40						emitted. This can be used to create spaces and pad out the data.
41
42						=cut
43
44	13			13	122	use constant COMMAND_DESC => "Extract the given field(s) to output";
	13				58
	13				1362
45
46	13				817	use constant COMMAND_OPTS => (
47						{ name => "fields\|f=", description => "Comma-separated list of fields to extract",
48						default => "1" },
49	13			13	88	);
	13				25
50
51	13			13	78	use constant WANT_READER => 1;
	13				36
	13				894
52	13			13	91	use constant WANT_OUTPUT => 1;
	13				35
	13				4545
53
54	3				4	sub run ( $pkg, $opts, $reader, $output )
	3				4
	3				5
55	3			3	15684	{
	3				3
	3				4
56	3				10	my @FIELDS = split m/,/, $opts->{fields};
57
58						# 1-indexed
59	3		66		18	$_ eq "u" \|\| $_-- for @FIELDS;
60
61	3				6	while( my $row = $reader->() ) {
62	6	100			35	$output->( [ map { $_ eq "u" ? undef : $row->[$_] } @FIELDS ] );
	12				25
63						}
64						}
65						}
66
67						package App::csvtool::grep
68						{
69
70						=head2 grep
71
72						$ csvtool grep PATTERN INPUT...
73
74						Filter rows by the given pattern. The pattern is always interpreted as a Perl
75						regular expression.
76
77						=head3 --ignore-case, -i
78
79						Ignore case when matching.
80
81						=head3 --invert-match, -v
82
83						Output only the lines that do not match the filter pattern.
84
85						=cut
86
87	13			13	100	use constant COMMAND_DESC => "Filter rows based on a regexp pattern";
	13				25
	13				1343
88
89	13				1201	use constant COMMAND_OPTS => (
90						{ name => "field\|f=", description => "Field to filter by",
91						default => 1 },
92						{ name => "ignore-case\|i", description => "Match ignoring case" },
93						{ name => "invert-match\|v", description => "Selects only the non-matching rows" },
94	13			13	85	);
	13				28
95
96	13				859	use constant COMMAND_ARGS => (
97						{ name => "pattern", description => "regexp pattern for filtering" },
98	13			13	88	);
	13				26
99
100	13			13	76	use constant WANT_READER => 1;
	13				24
	13				758
101	13			13	88	use constant WANT_OUTPUT => 1;
	13				25
	13				4317
102
103	4				10	sub run ( $pkg, $opts, $pattern, $reader, $output )
	4				65
	4				9
	4				8
104	4			4	27861	{
	4				7
	4				6
105	4				9	my $FIELD = $opts->{field};
106	4		100		22	my $INVERT = $opts->{invert_match} // 0;
107
108	4	100			10	$pattern = "(?i:$pattern)" if $opts->{ignore_case};
109
110						# 1-based
111	4				11	$FIELD--;
112
113	4				78	my $re = qr/$pattern/;
114
115	4				17	while( my $row = $reader->() ) {
116	16	100			216	$output->( $row ) if $INVERT ^ $row->[ $FIELD ] =~ $re;
117						}
118						}
119						}
120
121						package App::csvtool::head
122						{
123
124						=head2 head
125
126						$ csvtool head -nLINES INPUT...
127
128						Output only the first few rows.
129
130						=head3 --lines, -n
131
132						Number of lines to output. If negative, will output all but the final few rows
133						of the given number.
134
135						=cut
136
137	13			13	105	use constant COMMAND_DESC => "Select the first few rows";
	13				55
	13				1085
138
139	13				802	use constant COMMAND_OPTS => (
140						{ name => "lines\|n=i", description => "Number of rows to select",
141						default => 10 },
142	13			13	134	);
	13				92
143
144	13			13	74	use constant WANT_READER => 1;
	13				32
	13				710
145	13			13	95	use constant WANT_OUTPUT => 1;
	13				22
	13				6568
146
147	3				9	sub run ( $pkg, $opts, $reader, $output )
	3				8
	3				7
148	3			3	31297	{
	3				6
	3				6
149	3				12	my $LINES = $opts->{lines};
150
151	3	100			19	if( $LINES > 0 ) {
		50
152	2		66		23	while( $LINES-- > 0 and my $row = $reader->() ) {
153	4				47	$output->( $row );
154						}
155						}
156						elsif( $LINES < 0 ) {
157	1				4	my @ROWS;
158	1		66		11	while( $LINES++ < 0 and my $row = $reader->() ) {
159	1				13	push @ROWS, $row;
160						}
161	1				5	while( my $row = $reader->() ) {
162	4				29	$output->( shift @ROWS );
163	4				14	push @ROWS, $row;
164						}
165						}
166						}
167						}
168
169						package App::csvtool::join
170						{
171
172						=head2 join
173
174						$ csvtool join -fFIELD FILE1 FILE2
175
176						Reads two files and outputs rows joined by a common key.
177
178						The second file is read entirely into memory and indexed by its key field.
179						Then the first file is read a row at a time, and each row has the
180						corresponding data from the second file appended to it when output.
181
182						This is more flexible than the F UNIX tool that inspires it, because
183						C does not need to be sorted in key order in the same way as C.
184						Additionally, rows of C may be emitted zero, one, or more times as
185						required by matches from C.
186
187						=head3 --field1, -1
188
189						The field index in FILE1 to use as the lookup key.
190
191						=head3 --field2, -2
192
193						The field index in FILE2 to use as the storage key.
194
195						=head3 --field, -f
196
197						Use the same field index for both files.
198
199						=cut
200
201	13			13	125	use constant COMMAND_DESC => "Join two files by a common key";
	13				24
	13				1345
202
203	13				890	use constant COMMAND_OPTS => (
204						{ name => "field\|f=", description => "Field of both files to join by" },
205						{ name => "field1\|1=", description => "Field of FILE1 to join by" },
206						{ name => "field2\|2=", description => "Field of FILE2 to join by" },
207	13			13	86	);
	13				25
208
209	13			13	76	use constant WANT_READER => 2;
	13				26
	13				716
210	13			13	73	use constant WANT_OUTPUT => 1;
	13				24
	13				6054
211
212	2				7	sub run ( $pkg, $opts, $reader1, $reader2, $output )
	2				5
	2				4
	2				5
213	2			2	21943	{
	2				2
	2				5
214	2		66		13	my $FIELD1 = $opts->{field1} // $opts->{field}; $FIELD1--;
	2				5
215	2		66		28	my $FIELD2 = $opts->{field2} // $opts->{field}; $FIELD2--;
	2				5
216
217						# Load the joindata from second reader
218	2				5	my %rows_by_key;
219	2				8	while( my $row = $reader2->() ) {
220	6				41	my $key = splice @$row, $FIELD2, 1, ();
221						warn "FILE2 contains duplicate key '$key'\n"
222	6	50			16	if exists $rows_by_key{$key};
223	6				21	$rows_by_key{$key} = $row;
224						}
225
226	2				12	while( my $row = $reader1->() ) {
227	3				33	my $key = $row->[$FIELD1];
228	3		100		7	$output->( [ @$row, @{ $rows_by_key{$key} // [] } ] );
	3				1600
229						}
230						}
231						}
232
233						package App::csvtool::sort
234						{
235
236						=head2 sort
237
238						$ csvtool sort INPUT...
239
240						Sorts the rows according to the given field.
241
242						=head3 --field, -f
243
244						The field index to sort by (defaults to 1).
245
246						=head3 --numerical, -n
247
248						Sorts numerically. If absent, sorting happens alphabetically.
249
250						=head3 --reverse, -r
251
252						Reverses the order of sorting.
253
254						=cut
255
256	13			13	108	use constant COMMAND_DESC => "Sort lexicographically (or numerically) by the given FIELD";
	13				28
	13				1480
257
258	13				1260	use constant COMMAND_OPTS => (
259						{ name => "numerical\|n", description => "Sort numerically" },
260						{ name => "reverse\|r", description => "Reverse order of sorting" },
261						{ name => "field\|f=", description => "Field to key by",
262						default => 1 },
263	13			13	134	);
	13				24
264
265	13			13	8227	use List::UtilsBy qw( sort_by nsort_by );
	13				32757
	13				1658
266
267	13			13	105	use constant WANT_READER => 1;
	13				54
	13				1025
268	13			13	101	use constant WANT_OUTPUT => 1;
	13				28
	13				6854
269
270	5				14	sub run ( $pkg, $opts, $reader, $output )
	5				9
	5				10
271	5			5	65603	{
	5				14
	5				10
272	5				16	my $FIELD = $opts->{field};
273
274						# 1-indexed
275	5				12	$FIELD--;
276
277	5				10	my @rows;
278	5				19	while( my $row = $reader->() ) {
279	20				166	push @rows, $row;
280						}
281
282	5	100			33	if( $opts->{numerical} ) {
283	2			8	17	@rows = nsort_by { $_->[$FIELD] } @rows;
	8				52
284						}
285						else {
286	3			12	26	@rows = sort_by { $_->[$FIELD] } @rows;
	12				81
287						}
288
289	5	100			129	if( $opts->{reverse} ) {
290	2				11	$output->( $_ ) for reverse @rows;
291						}
292						else {
293	3				13	$output->( $_ ) for @rows;
294						}
295						}
296						}
297
298						package App::csvtool::tail
299						{
300
301						=head2 tail
302
303						$ csvtool tail -nLINES INPUT...
304
305						Output only the final few rows.
306
307						=head3 --lines, -n
308
309						Number of lines to output. If negative, will output all but the first few rows
310						of the given number.
311
312						=cut
313
314	13			13	115	use constant COMMAND_DESC => "Select the final few rows";
	13				27
	13				1978
315
316	13				958	use constant COMMAND_OPTS => (
317						{ name => "lines\|n=i", description => "Number of rows to select",
318						default => 10 },
319	13			13	109	);
	13				29
320
321	13			13	218	use constant WANT_READER => 1;
	13				163
	13				881
322	13			13	79	use constant WANT_OUTPUT => 1;
	13				25
	13				6318
323
324	3				9	sub run ( $pkg, $opts, $reader, $output )
	3				4
	3				7
325	3			3	23842	{
	3				5
	3				5
326	3				8	my $LINES = $opts->{lines};
327
328	3	100			15	if( $LINES > 0 ) {
		50
329	2				5	my @ROWS;
330	2				5	while( my $row = $reader->() ) {
331	10	100			65	shift @ROWS if @ROWS >= $LINES;
332	10				28	push @ROWS, $row;
333						}
334	2				16	$output->( $_ ) for @ROWS;
335						}
336						elsif( $LINES < 0 ) {
337	1		66		8	while( $LINES++ < 0 and my $row = $reader->() ) {
338						# discard it
339						}
340	1				15	while( my $row = $reader->() ) {
341	4				28	$output->( $row );
342						}
343						}
344						}
345						}
346
347						package App::csvtool::uniq
348						{
349
350						=head2 uniq
351
352						$ csvtool uniq -fFIELD INPUT...
353
354						Filters rows for unique values of the given field.
355
356						=head3 --field, -f
357
358						The field index to select rows on (defaults to 1).
359
360						=cut
361
362	13			13	101	use constant COMMAND_DESC => "Filter rows for unique values of the given FIELD";
	13				49
	13				1052
363
364	13				793	use constant COMMAND_OPTS => (
365						{ name => "field\|f=", description => "Field to key by",
366						default => 1 },
367	13			13	102	);
	13				25
368
369	13			13	75	use constant WANT_READER => 1;
	13				21
	13				674
370	13			13	70	use constant WANT_OUTPUT => 1;
	13				20
	13				3351
371
372	2				6	sub run ( $pkg, $opts, $reader, $output )
	2				4
	2				3
373	2			2	17177	{
	2				4
	2				2
374	2				5	my $FIELD = $opts->{field};
375
376						# 1-based
377	2				4	$FIELD--;
378
379	2				4	my %seen;
380
381	2				5	while( my $row = $reader->() ) {
382	8	100			67	$output->( $row ) unless $seen{ $row->[$FIELD] }++;
383						}
384						}
385						}
386
387						=head1 AUTHOR
388
389						Paul Evans
390
391						=cut
392
393						0x55AA;