| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# You may distribute under the terms of either the GNU General Public License |
|
2
|
|
|
|
|
|
|
# or the Artistic License (the same terms as Perl itself) |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# (C) Paul Evans, 2024 -- leonerd@leonerd.org.uk |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package App::csvtool::Summarizing 0.04; |
|
7
|
|
|
|
|
|
|
|
|
8
|
12
|
|
|
12
|
|
9884
|
use v5.26; |
|
|
12
|
|
|
|
|
46
|
|
|
9
|
12
|
|
|
12
|
|
71
|
use warnings; |
|
|
12
|
|
|
|
|
23
|
|
|
|
12
|
|
|
|
|
788
|
|
|
10
|
12
|
|
|
12
|
|
74
|
use experimental 'signatures'; |
|
|
12
|
|
|
|
|
28
|
|
|
|
12
|
|
|
|
|
88
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
C - summarize tabular data in F |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=cut |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
package App::csvtool::count |
|
19
|
|
|
|
|
|
|
{ |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head2 count |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$ csvtool count -fFIELD INPUT... |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
Counts the number of rows that have distinct values for the selected field. |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Outputs a new table having only two columns. The first column will be the |
|
28
|
|
|
|
|
|
|
distinct values of the selected field that were found in the input, the second |
|
29
|
|
|
|
|
|
|
column will be an integer giving the number of rows of the input which had |
|
30
|
|
|
|
|
|
|
that that value. Rows are output in order of the first time each distinct |
|
31
|
|
|
|
|
|
|
value was seen in the input. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
Besides the selected key field, all other fields of the input are ignored. |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head3 --field, -f |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
The field index to use as the counting key (defaults to 1). |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=cut |
|
40
|
|
|
|
|
|
|
|
|
41
|
12
|
|
|
12
|
|
2558
|
use constant COMMAND_DESC => "Count the number of rows by the value in FIELD"; |
|
|
12
|
|
|
|
|
26
|
|
|
|
12
|
|
|
|
|
880
|
|
|
42
|
|
|
|
|
|
|
|
|
43
|
12
|
|
|
|
|
875
|
use constant COMMAND_OPTS => ( |
|
44
|
|
|
|
|
|
|
{ name => "field|f=", description => "Field to extract", |
|
45
|
|
|
|
|
|
|
default => 1 }, |
|
46
|
12
|
|
|
12
|
|
71
|
); |
|
|
12
|
|
|
|
|
20
|
|
|
47
|
|
|
|
|
|
|
|
|
48
|
12
|
|
|
12
|
|
125
|
use constant WANT_READER => 1; |
|
|
12
|
|
|
|
|
22
|
|
|
|
12
|
|
|
|
|
622
|
|
|
49
|
12
|
|
|
12
|
|
66
|
use constant WANT_OUTPUT => 1; |
|
|
12
|
|
|
|
|
43
|
|
|
|
12
|
|
|
|
|
3912
|
|
|
50
|
|
|
|
|
|
|
|
|
51
|
2
|
|
|
|
|
5
|
sub run ( $pkg, $opts, $reader, $output ) |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
3
|
|
|
52
|
2
|
|
|
2
|
|
23315
|
{ |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
3
|
|
|
53
|
2
|
|
|
|
|
7
|
my $FIELD = $opts->{field}; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# 1-indexed |
|
56
|
2
|
|
|
|
|
6
|
$FIELD--; |
|
57
|
|
|
|
|
|
|
|
|
58
|
2
|
|
|
|
|
4
|
my @keys; # maintain original first-seen order |
|
59
|
|
|
|
|
|
|
my %count_for_key; |
|
60
|
|
|
|
|
|
|
|
|
61
|
2
|
|
|
|
|
8
|
while( my $row = $reader->() ) { |
|
62
|
10
|
|
|
|
|
64
|
my $key = $row->[$FIELD]; |
|
63
|
10
|
50
|
|
|
|
21
|
defined $key or next; |
|
64
|
|
|
|
|
|
|
|
|
65
|
10
|
100
|
|
|
|
27
|
exists $count_for_key{$key} or push @keys, $key; |
|
66
|
10
|
|
|
|
|
35
|
$count_for_key{$key}++; |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
|
|
69
|
2
|
|
|
|
|
12
|
foreach my $key ( @keys ) { |
|
70
|
8
|
|
|
|
|
34
|
$output->( [ $key, $count_for_key{$key} ] ); |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
} |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 AUTHOR |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Paul Evans |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
0x55AA; |