File Coverage

blib/lib/App/csvtool/Summarizing.pm
Criterion Covered Total %
statement 36 36 100.0
branch 3 4 75.0
condition n/a
subroutine 8 8 100.0
pod n/a
total 47 48 97.9


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2024 -- leonerd@leonerd.org.uk
5              
6             package App::csvtool::Summarizing 0.04;
7              
8 12     12   9884 use v5.26;
  12         46  
9 12     12   71 use warnings;
  12         23  
  12         788  
10 12     12   74 use experimental 'signatures';
  12         28  
  12         88  
11              
12             =head1 NAME
13              
14             C - summarize tabular data in F
15              
16             =cut
17              
18             package App::csvtool::count
19             {
20              
21             =head2 count
22              
23             $ csvtool count -fFIELD INPUT...
24              
25             Counts the number of rows that have distinct values for the selected field.
26              
27             Outputs a new table having only two columns. The first column will be the
28             distinct values of the selected field that were found in the input, the second
29             column will be an integer giving the number of rows of the input which had
30             that that value. Rows are output in order of the first time each distinct
31             value was seen in the input.
32              
33             Besides the selected key field, all other fields of the input are ignored.
34              
35             =head3 --field, -f
36              
37             The field index to use as the counting key (defaults to 1).
38              
39             =cut
40              
41 12     12   2558 use constant COMMAND_DESC => "Count the number of rows by the value in FIELD";
  12         26  
  12         880  
42              
43 12         875 use constant COMMAND_OPTS => (
44             { name => "field|f=", description => "Field to extract",
45             default => 1 },
46 12     12   71 );
  12         20  
47              
48 12     12   125 use constant WANT_READER => 1;
  12         22  
  12         622  
49 12     12   66 use constant WANT_OUTPUT => 1;
  12         43  
  12         3912  
50              
51 2         5 sub run ( $pkg, $opts, $reader, $output )
  2         4  
  2         3  
52 2     2   23315 {
  2         4  
  2         3  
53 2         7 my $FIELD = $opts->{field};
54              
55             # 1-indexed
56 2         6 $FIELD--;
57              
58 2         4 my @keys; # maintain original first-seen order
59             my %count_for_key;
60              
61 2         8 while( my $row = $reader->() ) {
62 10         64 my $key = $row->[$FIELD];
63 10 50       21 defined $key or next;
64              
65 10 100       27 exists $count_for_key{$key} or push @keys, $key;
66 10         35 $count_for_key{$key}++;
67             }
68              
69 2         12 foreach my $key ( @keys ) {
70 8         34 $output->( [ $key, $count_for_key{$key} ] );
71             }
72             }
73             }
74              
75             =head1 AUTHOR
76              
77             Paul Evans
78              
79             =cut
80              
81             0x55AA;