File Coverage

blib/lib/App/csvtool/Smudge.pm
Criterion Covered Total %
statement 50 58 86.2
branch 5 8 62.5
condition 4 8 50.0
subroutine 11 12 91.6
pod n/a
total 70 86 81.4


line stmt bran cond sub pod time code
1             # You may distribute under the terms of either the GNU General Public License
2             # or the Artistic License (the same terms as Perl itself)
3             #
4             # (C) Paul Evans, 2024 -- leonerd@leonerd.org.uk
5              
6             package App::csvtool::Smudge 0.04;
7              
8 13     13   2999561 use v5.26;
  13         57  
9 13     13   77 use warnings;
  13         25  
  13         2044  
10 13     13   83 use experimental 'signatures';
  13         27  
  13         128  
11              
12             =head1 NAME
13              
14             C - implements the F F command
15              
16             =head1 COMMANDS
17              
18             =cut
19              
20             package App::csvtool::smudge
21             {
22              
23             =head2 smudge
24              
25             $ csvtool smudge -F IDX:FILTER INPUT...
26              
27             Applies smudging filters to columns of the input, generating new data that is
28             output in the same shape. A "smudge" filter is one that operates on numerical
29             data, attempting to reduce the imact of any individual values and smooth out
30             small variations, emitting values that follow general trends. This assumes
31             that successive rows of data represent successive moments in time, containing
32             measurements or readings taken at each instant.
33              
34             Different filters can be applied to individual columns, as specified by the
35             C<--filter> (or C<-F>) argument. Any columns that are not filtered are simply
36             copied as they stand, and thus do not even have to be numeric in nature.
37              
38             =head3 --filter, -F
39              
40             A filter specification to apply to a column of data. Specified as a string
41             giving the column index (starting from 1), and the name of the filter. May be
42             specified multiple times to apply multiple different filters for different
43             columns. C may specify multiple field indexes, separated by commas.
44              
45             =cut
46              
47 13     13   3314 use constant COMMAND_DESC => "Apply smudge filtering to columns of data";
  13         27  
  13         1446  
48              
49 13         916 use constant COMMAND_OPTS => (
50             { name => "filter|F=", description => "filters to apply to each column",
51             multi => 1, },
52 13     13   119 );
  13         39  
53              
54 13     13   82 use constant WANT_READER => 1;
  13         39  
  13         835  
55 13     13   78 use constant WANT_OUTPUT => 1;
  13         157  
  13         849  
56              
57 13     13   111 use Carp;
  13         53  
  13         1459  
58              
59 13     13   86 use List::Util qw( sum );
  13         23  
  13         21723  
60              
61             =head2 FILTERS
62              
63             The following name templates may be used. Names are parametric,
64             and encode information about how the filter acts.
65              
66             =head3 avgI
67              
68             Applies a moving window average over the previous I values.
69              
70             =head3 midI
71              
72             Sorts the previous I values and returns the middle one. To be
73             well-behaved, N should be an odd number.
74              
75             =head3 ravgI
76              
77             Recursive average with weighting of C<2 ** -NNN>.
78              
79             =head3 total
80              
81             Running total of every value seen so far.
82              
83             =cut
84              
85             my @FILTERS = (
86             {
87             name => "avgN",
88             desc => "N-point moving window average",
89             make => sub ( $count ) {
90             my @hist;
91             return sub ( $new ) {
92             push @hist, $new;
93             shift @hist while @hist > $count;
94             return sum(@hist) / @hist;
95             };
96             },
97             },
98             {
99             name => "midN",
100             desc => "Median of N values",
101             make => sub ( $count ) {
102             my @hist;
103             return sub ( $new ) {
104             push @hist, $new;
105             shift @hist while @hist > $count;
106             my @sorted = sort { $a <=> $b } @hist;
107             return $sorted[$#sorted/2];
108             };
109             }
110             },
111             {
112             name => "ravgN",
113             desc => "Recusive average with weighting 2 ** -N",
114             make => sub ( $param ) {
115             my $alpha = 2 ** -$1;
116             my $prev;
117             return sub ( $new ) {
118             return $prev = $new if !defined $prev;
119             # $result = ( $prev * ( 1 - $alpha ) ) + ( $new * $alpha )
120             # = $prev * 1 - $prev * $alpha + $new * $alpha
121             return $prev = $prev + $alpha * ( $new - $prev );
122             };
123             }
124             },
125             {
126             name => "total",
127             desc => "Running total",
128             make => sub ( $ ) {
129             my $total = 0;
130             return sub ( $new ) {
131             $total += $new;
132             return $total;
133             }
134             }
135             },
136             );
137              
138             sub mk_filter ( $name )
139 6     6   12 {
  6         12  
  6         9  
140 6         15 foreach ( @FILTERS ) {
141 12         56 my $re = $_->{name} =~ s/N$/(\\d+)/r;
142 12 100       429 next unless $name =~ qr/^$re$/;
143 6         33 return $_->{make}( $1 );
144             }
145              
146 0         0 croak "Unrecognised filter name $name";
147             }
148              
149             # For Commandable's builtin 'help' support
150             sub commandable_more_help
151             {
152 0     0   0 Commandable::Output->printf( "\n" );
153 0         0 Commandable::Output->print_heading( "FILTERS:" );
154              
155 0         0 Commandable::Output->printf( " Each filter should be specified as IDX(,IDX...):FILTER\n" );
156 0         0 Commandable::Output->printf( "\n" );
157              
158 0         0 foreach ( @FILTERS ) {
159             Commandable::Output->printf( " %s\n",
160 0         0 Commandable::Output->format_note( $_->{name}, 1 ) );
161             Commandable::Output->printf( " %s\n",
162 0         0 $_->{desc} );
163             }
164             }
165              
166 6         14 sub run ( $pkg, $opts, $reader, $output )
  6         11  
  6         10  
167 6     6   44747 {
  6         9  
  6         12  
168 6         10 my @filters;
169 6   100     32 foreach my $spec ( ( $opts->{filter} // [] )->@* ) {
170             # TODO: Accept DD-DD,DD-DD,etc... as indexes
171 5 50       50 my ( $fields, $filter ) = $spec =~ m/^(\d+(?:,\d+)*):(.*)$/ or
172             warn( "Unrecognised --filter spec; expected IDX:FILTER\n" ), next;
173              
174 5         22 foreach my $idx ( split m/,/, $fields ) {
175 6         15 $filters[$idx - 1] = mk_filter( $filter );
176             }
177             }
178              
179 6         30 while( my $row = $reader->() ) {
180 42         366 my @data = @$row;
181              
182             # Skip header lines
183 42 50 33     183 unless( @data and $data[0] =~ m/^#/ ) {
184 42         91 foreach my $idx ( keys @filters ) {
185 44 50 33     215 length $data[$idx] and defined $filters[$idx] and
186             $data[$idx] = $filters[$idx]->( $data[$idx] );
187             }
188             }
189              
190 42         113 $output->( \@data );
191             }
192             }
193             }
194              
195             =head1 AUTHOR
196              
197             Paul Evans
198              
199             =cut
200              
201             0x55AA;