line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# dbcolstats.pm |
5
|
|
|
|
|
|
|
# Copyright (C) 1991-2015 by John Heidemann |
6
|
|
|
|
|
|
|
# $Id: b8f85fa383507a09ebfc72e644fadd6e1d5ceed0 $ |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# This program is distributed under terms of the GNU general |
9
|
|
|
|
|
|
|
# public license, version 2. See the file COPYING |
10
|
|
|
|
|
|
|
# in $dblibdir for details. |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
package Fsdb::Filter::dbcolstats; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
dbcolstats - compute statistics on a fsdb column |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
dbcolstats [-amS] [-c ConfidenceFraction] [-q NumberOfQuantiles] column |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
Compute statistics over a COLUMN of data. |
26
|
|
|
|
|
|
|
Records containing non-numeric data are considered null |
27
|
|
|
|
|
|
|
do not contribute to the stats (with the C<-a> option |
28
|
|
|
|
|
|
|
they are treated as zeros). |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Confidence intervals are a t-test (+/- (t_{a/2})*s/sqrt(n)) |
31
|
|
|
|
|
|
|
and assume the population takes a normal distribution |
32
|
|
|
|
|
|
|
with a small number of samples (< 100). |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
By default, |
35
|
|
|
|
|
|
|
all statistics are computed for as a population I (with an ``n-1'' term), |
36
|
|
|
|
|
|
|
not as representing the whole population (using ``n''). |
37
|
|
|
|
|
|
|
Select between them with B<--sample> or B<--nosample>. |
38
|
|
|
|
|
|
|
When you measure the entire population, use the latter option. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
The output of this program is probably best looked at after |
41
|
|
|
|
|
|
|
reformatting with L. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Dbcolstats runs in O(1) memory. Median or quantile requires sorting the |
44
|
|
|
|
|
|
|
data and invokes dbsort. Sorting will run in constant RAM but |
45
|
|
|
|
|
|
|
O(number of records) disk space. If median or quantile is required |
46
|
|
|
|
|
|
|
and the data is already sorted, dbcolstats will run more efficiently with |
47
|
|
|
|
|
|
|
the -S option. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 OPTIONS |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=over 4 |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=item B<-a> or B<--include-non-numeric> |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
Compute stats over all records (treat non-numeric records |
57
|
|
|
|
|
|
|
as zero rather than just ignoring them). |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item B<-c FRACTION> or B<--confidence FRACTION> |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Specify FRACTION for the confidence interval. |
62
|
|
|
|
|
|
|
Defaults to 0.95 for a 95% confidence factor. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=item B<-f FORMAT> or B<--format FORMAT> |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Specify a L-style format for output statistics. |
67
|
|
|
|
|
|
|
Defaults to C<%.5g>. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item B<-m> or B<--median> |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Compute median value. (Will sort data if necessary.) |
72
|
|
|
|
|
|
|
(Median is the quantitle for N=2.) |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=item B<-q N> or B<--quantile N> |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Compute quantile (quartile when N is 4), |
77
|
|
|
|
|
|
|
or an arbitrary quantile for other values of N, |
78
|
|
|
|
|
|
|
where the scores that are 1 Nth of the way across the population. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item B<--sample> |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Compute I population statistics |
83
|
|
|
|
|
|
|
(e.g., the sample standard deviation), |
84
|
|
|
|
|
|
|
assuming I degrees of freedom. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=item B<--nosample> |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Compute I population statistics |
89
|
|
|
|
|
|
|
(e.g., the population standard devation). |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item B<-S> or B<--pre-sorted> |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Assume data is already sorted. |
94
|
|
|
|
|
|
|
With one -S, we check and confirm this precondition. |
95
|
|
|
|
|
|
|
When repeated, we skip the check. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item B<--parallelism=N> or C<-j N> |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Allow sorting to happen in parallel. |
100
|
|
|
|
|
|
|
Defaults on. |
101
|
|
|
|
|
|
|
(Only relevant if using non-pre-sorted data with quantiles.) |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item B<-F> or B<--fs> or B<--fieldseparator> S |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Specify the field (column) separator as C. |
106
|
|
|
|
|
|
|
See L for valid field separators. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item B<-T TmpDir> |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
where to put temporary data. |
111
|
|
|
|
|
|
|
Only used if median or quantiles are requested. |
112
|
|
|
|
|
|
|
Also uses environment variable TMPDIR, if -T is |
113
|
|
|
|
|
|
|
not specified. |
114
|
|
|
|
|
|
|
Default is /tmp. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item B<-k KeyField> |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Do multi-stats, grouped by each key. |
119
|
|
|
|
|
|
|
Assumes keys are sorted. (Use dbmultistats to guarantee sorting order.) |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=back |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=for comment |
126
|
|
|
|
|
|
|
begin_standard_fsdb_options |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This module also supports the standard fsdb options: |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=over 4 |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=item B<-d> |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Enable debugging output. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item B<-i> or B<--input> InputSource |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Read from InputSource, typically a file name, or C<-> for standard input, |
139
|
|
|
|
|
|
|
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=item B<-o> or B<--output> OutputDestination |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Write to OutputDestination, typically a file name, or C<-> for standard output, |
144
|
|
|
|
|
|
|
or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=item B<--autorun> or B<--noautorun> |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
By default, programs process automatically, |
149
|
|
|
|
|
|
|
but Fsdb::Filter objects in Perl do not run until you invoke |
150
|
|
|
|
|
|
|
the run() method. |
151
|
|
|
|
|
|
|
The C<--(no)autorun> option controls that behavior within Perl. |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item B<--help> |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Show help. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item B<--man> |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Show full manual. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=back |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=for comment |
164
|
|
|
|
|
|
|
end_standard_fsdb_options |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 SAMPLE USAGE |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 Input: |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
#fsdb absdiff |
172
|
|
|
|
|
|
|
0 |
173
|
|
|
|
|
|
|
0.046953 |
174
|
|
|
|
|
|
|
0.072074 |
175
|
|
|
|
|
|
|
0.075413 |
176
|
|
|
|
|
|
|
0.094088 |
177
|
|
|
|
|
|
|
0.096602 |
178
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbrow |
179
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbcol event clock |
180
|
|
|
|
|
|
|
# | dbrowdiff clock |
181
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbcol absdiff |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head2 Command: |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
cat data.fsdb | dbcolstats absdiff |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head2 Output: |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
#fsdb mean stddev pct_rsd conf_range conf_low conf_high conf_pct sum sum_squared min max n |
190
|
|
|
|
|
|
|
0.064188 0.036194 56.387 0.037989 0.026199 0.102180.95 0.38513 0.031271 0 0.096602 6 |
191
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbrow |
192
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbcol event clock |
193
|
|
|
|
|
|
|
# | dbrowdiff clock |
194
|
|
|
|
|
|
|
# | /home/johnh/BIN/DB/dbcol absdiff |
195
|
|
|
|
|
|
|
# | dbcolstats absdiff |
196
|
|
|
|
|
|
|
# 0.95 confidence intervals assume normal distribution and small n. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head1 SEE ALSO |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
L, handles multiple experiments in a single file. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
L, to pretty-print the output of dbcolstats. |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
L, to compute an even more general version of median/quantiles. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
L, to compute z-scores or t-scores for each row |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
L, to see if two sample populations are statistically different. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
L. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head1 BUGS |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
The algorithms used to compute variance have not been |
215
|
|
|
|
|
|
|
audited to check for numerical stability. |
216
|
|
|
|
|
|
|
(See F).) |
217
|
|
|
|
|
|
|
Variance may be incorrect when standard deviation |
218
|
|
|
|
|
|
|
is small relative to the mean. |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
The field C implies percentage, but it's actually |
221
|
|
|
|
|
|
|
reported as a fraction (0.95 means 95%). |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Because of limits of floating point, statistics on numbers of |
224
|
|
|
|
|
|
|
widely different scales may be incorrect. |
225
|
|
|
|
|
|
|
See the test cases F for examples. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=head1 CLASS FUNCTIONS |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=cut |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
@ISA = qw(Fsdb::Filter); |
233
|
|
|
|
|
|
|
($VERSION) = 2.0; |
234
|
|
|
|
|
|
|
|
235
|
1
|
|
|
1
|
|
14155
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
236
|
1
|
|
|
1
|
|
5
|
use Pod::Usage; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
66
|
|
237
|
|
|
|
|
|
|
|
238
|
1
|
|
|
1
|
|
5
|
use Fsdb::IO::Reader; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
15
|
|
239
|
1
|
|
|
1
|
|
2
|
use Fsdb::IO::Writer; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
14
|
|
240
|
1
|
|
|
1
|
|
2
|
use Fsdb::Filter; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
241
|
1
|
|
|
1
|
|
4
|
use Fsdb::Filter::dbpipeline qw(dbpipeline_sink dbsort); |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
39
|
|
242
|
1
|
|
|
1
|
|
4
|
use Fsdb::Support qw($is_numeric_regexp); |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
73
|
|
243
|
1
|
|
|
1
|
|
4
|
use Fsdb::Support::TDistribution qw(t_distribution); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
37
|
|
244
|
1
|
|
|
1
|
|
8
|
use Fsdb::Support::NamedTmpfile; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1612
|
|
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head2 new |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
$filter = new Fsdb::Filter::dbcolstats(@arguments); |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Create a new dbcolstats object, taking command-line arguments. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub new($@) { |
256
|
0
|
|
|
0
|
1
|
|
my $class = shift @_; |
257
|
0
|
|
|
|
|
|
my $self = $class->SUPER::new(@_); |
258
|
0
|
|
|
|
|
|
bless $self, $class; |
259
|
0
|
|
|
|
|
|
$self->set_defaults; |
260
|
0
|
|
|
|
|
|
$self->parse_options(@_); |
261
|
0
|
|
|
|
|
|
$self->SUPER::post_new(); |
262
|
0
|
|
|
|
|
|
return $self; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head2 set_defaults |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
$filter->set_defaults(); |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Internal: set up defaults. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=cut |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub set_defaults($) { |
275
|
0
|
|
|
0
|
1
|
|
my($self) = @_; |
276
|
0
|
|
|
|
|
|
$self->SUPER::set_defaults(); |
277
|
0
|
|
|
|
|
|
$self->{_target_column} = undef; |
278
|
0
|
|
|
|
|
|
$self->{_confidence_fraction} = 0.95; |
279
|
0
|
|
|
|
|
|
$self->{_format} = "%.5g"; |
280
|
0
|
|
|
|
|
|
$self->{_quantile} = undef; |
281
|
0
|
|
|
|
|
|
$self->{_median} = undef; # special case: renames the output field |
282
|
0
|
|
|
|
|
|
$self->{_sample} = 1; |
283
|
0
|
|
|
|
|
|
$self->{_pre_sorted} = 0; |
284
|
0
|
|
|
|
|
|
$self->{_include_non_numeric} = undef; |
285
|
0
|
|
|
|
|
|
$self->{_fscode} = undef; |
286
|
0
|
|
|
|
|
|
$self->{_max_parallelism} = undef; |
287
|
0
|
|
|
|
|
|
$self->{_key_column} = undef; |
288
|
0
|
|
|
|
|
|
$self->set_default_tmpdir; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head2 parse_options |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
$filter->parse_options(@ARGV); |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Internal: parse command-line arguments. |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=cut |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub parse_options($@) { |
300
|
0
|
|
|
0
|
1
|
|
my $self = shift @_; |
301
|
|
|
|
|
|
|
|
302
|
0
|
|
|
|
|
|
my(@argv) = @_; |
303
|
|
|
|
|
|
|
$self->get_options( |
304
|
|
|
|
|
|
|
\@argv, |
305
|
0
|
|
|
0
|
|
|
'help|?' => sub { pod2usage(1); }, |
306
|
0
|
|
|
0
|
|
|
'man' => sub { pod2usage(-verbose => 2); }, |
307
|
|
|
|
|
|
|
'a|include-non-numeric!' => \$self->{_include_non_numeric}, |
308
|
|
|
|
|
|
|
'autorun!' => \$self->{_autorun}, |
309
|
|
|
|
|
|
|
'close!' => \$self->{_close}, |
310
|
|
|
|
|
|
|
'c|confidence=f' => \$self->{_confidence_fraction}, |
311
|
|
|
|
|
|
|
'd|debug+' => \$self->{_debug}, |
312
|
|
|
|
|
|
|
'f|format=s' => \$self->{_format}, |
313
|
|
|
|
|
|
|
'F|fs|cs|fieldseparator|columnseparator=s' => \$self->{_fscode}, |
314
|
0
|
|
|
0
|
|
|
'i|input=s' => sub { $self->parse_io_option('input', @_); }, |
315
|
|
|
|
|
|
|
'j|parallelism=i' => \$self->{_max_parallelism}, |
316
|
|
|
|
|
|
|
'k|key=s' => \$self->{_key_column}, |
317
|
|
|
|
|
|
|
'log!' => \$self->{_logprog}, |
318
|
|
|
|
|
|
|
'm|median!' => \$self->{_median}, |
319
|
0
|
|
|
0
|
|
|
'o|output=s' => sub { $self->parse_io_option('output', @_); }, |
320
|
|
|
|
|
|
|
'q|quantile=i' => \$self->{_quantile}, |
321
|
|
|
|
|
|
|
's|sample!' => \$self->{_sample}, |
322
|
|
|
|
|
|
|
'S|pre-sorted+' => \$self->{_pre_sorted}, |
323
|
|
|
|
|
|
|
'T|tmpdir|tempdir=s' => \$self->{_tmpdir}, |
324
|
|
|
|
|
|
|
'saveoutput=s' => \$self->{_save_output}, |
325
|
0
|
0
|
|
|
|
|
) or pod2usage(2); |
326
|
0
|
|
|
|
|
|
$self->parse_target_column(\@argv); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head2 setup |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
$filter->setup(); |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Internal: setup, parse headers. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=cut |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub setup($) { |
339
|
0
|
|
|
0
|
1
|
|
my($self) = @_; |
340
|
|
|
|
|
|
|
|
341
|
0
|
0
|
|
|
|
|
pod2usage(2) if (!defined($self->{_target_column})); |
342
|
|
|
|
|
|
|
|
343
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: pre-input setup\n" if ($self->{_debug} > 2); |
344
|
0
|
|
|
|
|
|
$self->finish_io_option('input', -comment_handler => $self->create_delay_comments_sub); |
345
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: post-input setup\n" if ($self->{_debug} > 2); |
346
|
0
|
|
|
|
|
|
$self->{_target_coli} = $self->{_in}->col_to_i($self->{_target_column}); |
347
|
|
|
|
|
|
|
croak $self->{_prog} . ": target column " . $self->{_target_column} . " is not in input stream.\n" |
348
|
0
|
0
|
|
|
|
|
if (!defined($self->{_target_coli})); |
349
|
0
|
|
|
|
|
|
$self->{_key_coli} = undef; |
350
|
0
|
0
|
|
|
|
|
if (defined($self->{_key_column})) { |
351
|
0
|
|
|
|
|
|
$self->{_key_coli} = $self->{_in}->col_to_i($self->{_key_column}); |
352
|
|
|
|
|
|
|
croak($self->{_prog} . ": key column " . $self->{_key_column} . " is not in input stream.\n") |
353
|
0
|
0
|
|
|
|
|
if (!defined($self->{_key_coli})); |
354
|
|
|
|
|
|
|
}; |
355
|
0
|
|
|
|
|
|
my $read_fastpath_sub = $self->{_in}->fastpath_sub(); |
356
|
0
|
|
|
|
|
|
$self->{_read_fastpath_sub} = $read_fastpath_sub; |
357
|
|
|
|
|
|
|
|
358
|
0
|
|
|
|
|
|
my(@headers) = (qw(mean stddev pct_rsd conf_range conf_low conf_high |
359
|
|
|
|
|
|
|
conf_pct sum sum_squared min max n)); |
360
|
0
|
0
|
|
|
|
|
push(@headers, "median") if ($self->{_median}); |
361
|
0
|
0
|
|
|
|
|
if ($self->{_quantile}) { |
362
|
0
|
|
|
|
|
|
foreach (1..($self->{_quantile}-1)) { |
363
|
0
|
|
|
|
|
|
push(@headers, "q$_"); |
364
|
|
|
|
|
|
|
}; |
365
|
|
|
|
|
|
|
}; |
366
|
0
|
0
|
|
|
|
|
unshift(@headers, $self->{_key_column}) if (defined($self->{_key_column})); |
367
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: pre-output setup\n" if ($self->{_debug} > 2); |
368
|
0
|
|
|
|
|
|
my @output_options = (-cols => \@headers); |
369
|
|
|
|
|
|
|
unshift (@output_options, -fscode => $self->{_fscode}) |
370
|
0
|
0
|
|
|
|
|
if (defined($self->{_fscode})); |
371
|
0
|
|
|
|
|
|
$self->finish_io_option('output', @output_options); |
372
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: post-output setup\n" if ($self->{_debug} > 2); |
373
|
|
|
|
|
|
|
|
374
|
0
|
0
|
0
|
|
|
|
if ($self->{_quantile} || $self->{_median}) { |
375
|
|
|
|
|
|
|
croak($self->{_prog} . ": cannot currently do median or quantile with a key column\n") |
376
|
0
|
0
|
|
|
|
|
if (defined($self->{_key_column})); |
377
|
0
|
|
|
|
|
|
$self->{_save_out_filename} = Fsdb::Support::NamedTmpfile::alloc($self->{_tmpdir}); |
378
|
|
|
|
|
|
|
# sorting needed? |
379
|
0
|
|
|
|
|
|
my $save_out; |
380
|
0
|
|
|
|
|
|
my(@writer_args) = (-cols => [qw(data)]); |
381
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: pre-saveoutput setup\n" if ($self->{_debug} > 2); |
382
|
0
|
0
|
|
|
|
|
if (!$self->{_pre_sorted}) { |
383
|
0
|
|
|
|
|
|
my $sorter_fred; |
384
|
0
|
|
|
|
|
|
my(@dbsort_args) = qw(-n data); |
385
|
|
|
|
|
|
|
push (@dbsort_args, '--parallelism', $self->{_max_parallelism}) |
386
|
0
|
0
|
|
|
|
|
if (defined($self->{_max_parallelism})); |
387
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: doing sorter thread\n" if ($self->{_debug} > 2); |
388
|
|
|
|
|
|
|
($save_out, $sorter_fred) = dbpipeline_sink(\@writer_args, |
389
|
|
|
|
|
|
|
'--output' => $self->{_save_out_filename}, |
390
|
0
|
|
|
|
|
|
dbsort(@dbsort_args)); |
391
|
0
|
|
|
|
|
|
$self->{_sorter_fred} = $sorter_fred; |
392
|
|
|
|
|
|
|
} else { |
393
|
|
|
|
|
|
|
# no, just write it ourselves |
394
|
0
|
|
|
|
|
|
$save_out = new Fsdb::IO::Writer('-file' => $self->{_save_out_filename}, @writer_args); |
395
|
|
|
|
|
|
|
}; |
396
|
0
|
|
|
|
|
|
$self->{_save_out} = $save_out; |
397
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: post-saveoutput setup\n" if ($self->{_debug} > 2); |
398
|
|
|
|
|
|
|
} else { |
399
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: no saveoutput needed\n" if ($self->{_debug} > 2); |
400
|
0
|
|
|
|
|
|
$self->{_save_out} = undef; |
401
|
|
|
|
|
|
|
}; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=head2 _round_up |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
$i = _round_up($x); |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Internal: Round up to the next integer. |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=cut |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
sub _round_up($) { |
413
|
0
|
|
|
0
|
|
|
my($x) = @_; |
414
|
0
|
|
|
|
|
|
my($xi) = int($x); |
415
|
0
|
0
|
|
|
|
|
return ($x > $xi) ? $xi+1 : $xi; |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=head2 _compute_quantile |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
($median, $quantile_aref) = _compute_quantile($n, $mean); |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Internal: Compute quantile from the saved data. |
423
|
|
|
|
|
|
|
Not generalizable. |
424
|
|
|
|
|
|
|
We assume the saved output is closed before we enter. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=cut |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
sub _compute_quantile($$$) { |
429
|
0
|
|
|
0
|
|
|
my ($self, $n, $mean) = @_; |
430
|
|
|
|
|
|
|
|
431
|
0
|
0
|
0
|
|
|
|
return if (!($self->{_quantile} || $self->{_median})); |
432
|
0
|
|
|
|
|
|
my $effective_quantile = $self->{_quantile}; |
433
|
0
|
0
|
|
|
|
|
$effective_quantile = 2 if (!defined($effective_quantile)); |
434
|
|
|
|
|
|
|
|
435
|
0
|
|
|
|
|
|
my $median; |
436
|
|
|
|
|
|
|
my @q; |
437
|
0
|
0
|
|
|
|
|
if ($n <= 1) { |
438
|
0
|
|
|
|
|
|
$median = $mean; |
439
|
0
|
|
|
|
|
|
push(@q, ($mean) x $effective_quantile); |
440
|
0
|
|
|
|
|
|
return ($median, \@q); |
441
|
|
|
|
|
|
|
}; |
442
|
|
|
|
|
|
|
|
443
|
0
|
|
|
|
|
|
my $save_in = new Fsdb::IO::Reader(-file => $self->{_save_out_filename}); |
444
|
0
|
0
|
|
|
|
|
$save_in->error && die $self->{_prog} . ": re-read error " . $save_in->error; |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# To handle the ugly case of having more ntiles than |
447
|
|
|
|
|
|
|
# data, we detect it and replicate the data until we have more |
448
|
|
|
|
|
|
|
# replicated_data than ntiles. |
449
|
0
|
0
|
|
|
|
|
my($replicate_data) = ($n >= $effective_quantile+1) ? 1 : _round_up(($effective_quantile+1.0)/$n); |
450
|
0
|
|
|
|
|
|
my($replicated_n) = $n * $replicate_data; |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# Also note that the array of quantiles and the number of |
453
|
|
|
|
|
|
|
# data elements read are both 1-based and not 0-based like |
454
|
|
|
|
|
|
|
# most perl stuff. This is to make the math easier. |
455
|
0
|
|
|
|
|
|
my $median_i = _round_up($replicated_n / 2); |
456
|
0
|
|
|
|
|
|
my $ntile_frac = ($replicated_n + 0.0) / ($effective_quantile + 0.0); |
457
|
0
|
|
|
|
|
|
my($x, $last_x, $next_q_i); |
458
|
0
|
|
|
|
|
|
@q = (0); # note that q is primed with 0 (to fill that zero element) |
459
|
0
|
|
|
|
|
|
my($replicates_left) = 0; |
460
|
0
|
|
|
|
|
|
my($i); # note that i counts from 1! |
461
|
0
|
|
|
|
|
|
for ($i = 1; $#q+1 < $effective_quantile; $i++) { |
462
|
0
|
0
|
|
|
|
|
if (--$replicates_left <= 0) { |
463
|
0
|
|
|
|
|
|
my $fref = $save_in->read_rowobj; |
464
|
0
|
0
|
|
|
|
|
die "internal error re-reading data\n" if (ref($fref) ne 'ARRAY'); |
465
|
0
|
|
|
|
|
|
$x = $fref->[0]; |
466
|
0
|
|
|
|
|
|
$replicates_left = $replicate_data; |
467
|
|
|
|
|
|
|
# Verify sorted order (in case the user lied to us |
468
|
|
|
|
|
|
|
# about pre-sorting). |
469
|
0
|
0
|
0
|
|
|
|
if (defined($last_x) && $x < $last_x) { |
470
|
0
|
0
|
|
|
|
|
my($info) = ($self->{_pre_sorted} ? " (internal error in dbsort)" : " (user specified -S for pre-sorted data but it is unsorted)"); |
471
|
0
|
|
|
|
|
|
die $self->{_prog} . ": cannot process data that is out of order between $last_x and $x $info.\n"; |
472
|
|
|
|
|
|
|
}; |
473
|
0
|
|
|
|
|
|
$last_x = $x; |
474
|
|
|
|
|
|
|
}; |
475
|
0
|
0
|
|
|
|
|
if ($i == $median_i) { $median = $x; }; |
|
0
|
|
|
|
|
|
|
476
|
0
|
0
|
|
|
|
|
$next_q_i = (_round_up($ntile_frac * ($#q + 1.0) )) if (!defined($next_q_i)); |
477
|
|
|
|
|
|
|
# print "d: q=$#q nq=$next_q_i i=$i\n"; |
478
|
0
|
0
|
|
|
|
|
if ($i == $next_q_i) { push(@q, $x); $next_q_i = undef; }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
}; |
480
|
0
|
|
|
|
|
|
return ($median, \@q); |
481
|
|
|
|
|
|
|
}; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=head2 run_one_key |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
$filter->run_one_key(); |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
Internal: run over each row, for a given key. |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=cut |
491
|
|
|
|
|
|
|
sub run_one_key($) { |
492
|
0
|
|
|
0
|
1
|
|
my($self) = @_; |
493
|
|
|
|
|
|
|
|
494
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: starting run\n" if ($self->{_debug} > 2); |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# xxx: should eval all this to factor out constants from runtime |
497
|
0
|
|
|
|
|
|
my($xf) = $self->{_target_coli}; |
498
|
0
|
|
|
|
|
|
my($key_column) = $self->{_key_column}; |
499
|
|
|
|
|
|
|
|
500
|
0
|
|
|
|
|
|
my($n) = 0; |
501
|
0
|
|
|
|
|
|
my($sx) = 0; |
502
|
0
|
|
|
|
|
|
my($sxx) = 0; |
503
|
0
|
|
|
|
|
|
my $min; |
504
|
|
|
|
|
|
|
my $max; |
505
|
0
|
|
|
|
|
|
my $key; |
506
|
0
|
|
|
|
|
|
my $last_key = $self->{_holdover_key}; |
507
|
0
|
|
|
|
|
|
my $holdover_data = $self->{_holdover_data}; |
508
|
0
|
|
|
|
|
|
$self->{_holdover_key} = $self->{_holdover_data} = undef; |
509
|
|
|
|
|
|
|
|
510
|
0
|
|
|
|
|
|
my $fref; |
511
|
|
|
|
|
|
|
my $x; |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
{ |
514
|
0
|
|
|
|
|
|
my $save_out = $self->{_save_out}; |
|
0
|
|
|
|
|
|
|
515
|
0
|
|
|
|
|
|
my $read_fastpath_sub = $self->{_read_fastpath_sub}; |
516
|
|
|
|
|
|
|
|
517
|
0
|
|
|
|
|
|
my $code = q' |
518
|
|
|
|
|
|
|
while (1) { |
519
|
|
|
|
|
|
|
if (defined($holdover_data)) { |
520
|
|
|
|
|
|
|
$x = $holdover_data; # and key was set earlier |
521
|
|
|
|
|
|
|
$holdover_data = undef; |
522
|
|
|
|
|
|
|
} else { |
523
|
|
|
|
|
|
|
$fref = &{$read_fastpath_sub}(); |
524
|
|
|
|
|
|
|
last if (!defined($fref)); |
525
|
|
|
|
|
|
|
$x = $fref->[' . $xf . q']; |
526
|
|
|
|
|
|
|
'; |
527
|
0
|
0
|
|
|
|
|
if (defined($self->{_key_column})) { |
528
|
|
|
|
|
|
|
$code .= q' |
529
|
0
|
|
|
|
|
|
$key = $fref->[' . $self->{_key_coli} . ']; |
530
|
|
|
|
|
|
|
if (!defined($last_key)) { |
531
|
|
|
|
|
|
|
$last_key = $key; |
532
|
|
|
|
|
|
|
} elsif ($key ne $last_key) { |
533
|
|
|
|
|
|
|
$self->{_holdover_key} = $key; |
534
|
|
|
|
|
|
|
$self->{_holdover_data} = $x; |
535
|
|
|
|
|
|
|
last; |
536
|
|
|
|
|
|
|
}; |
537
|
|
|
|
|
|
|
'; |
538
|
|
|
|
|
|
|
}; |
539
|
0
|
|
|
|
|
|
$code .= q' |
540
|
|
|
|
|
|
|
}; |
541
|
|
|
|
|
|
|
'; |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
$code .= 'next if ($x !~ /' . $is_numeric_regexp . "/);\n" |
544
|
0
|
0
|
|
|
|
|
if (!$self->{_include_non_numeric}); |
545
|
0
|
|
|
|
|
|
$code .= q' |
546
|
|
|
|
|
|
|
$x += 0.0; # force numeric |
547
|
|
|
|
|
|
|
$n++; |
548
|
|
|
|
|
|
|
$sx += $x; |
549
|
|
|
|
|
|
|
$sxx += $x * $x; |
550
|
|
|
|
|
|
|
'; |
551
|
0
|
0
|
|
|
|
|
$code .= 'print STDERR "dbcolstats: save-out write\n";' . "\n" if ($self->{_debug} > 2); |
552
|
|
|
|
|
|
|
|
553
|
0
|
0
|
0
|
|
|
|
if ($self->{_quantile} || $self->{_median}) { |
554
|
|
|
|
|
|
|
# note that as of perl-5.14 we must force numeric or perl truncates floats to ints :-( |
555
|
0
|
|
|
|
|
|
$code .= q' |
556
|
|
|
|
|
|
|
my(@row); |
557
|
|
|
|
|
|
|
$row[0] = $x + 0; # force numeric, as guaranteed by above |
558
|
|
|
|
|
|
|
$save_out->write_rowobj(\@row); |
559
|
|
|
|
|
|
|
'; |
560
|
|
|
|
|
|
|
}; |
561
|
0
|
0
|
|
|
|
|
$code .= 'print STDERR "dbcolstats: post save-out write\n";' . "\n" if ($self->{_debug} > 2); |
562
|
0
|
|
|
|
|
|
$code .= q' |
563
|
|
|
|
|
|
|
if (!defined($min)) { |
564
|
|
|
|
|
|
|
$min = $max = $x; |
565
|
|
|
|
|
|
|
} else { |
566
|
|
|
|
|
|
|
$min = $x if ($x < $min); |
567
|
|
|
|
|
|
|
$max = $x if ($x > $max); |
568
|
|
|
|
|
|
|
}; |
569
|
|
|
|
|
|
|
};'; |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# run it |
572
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: eval'ing code\n" if ($self->{_debug}); |
573
|
0
|
0
|
|
|
|
|
print $code if ($self->{_debug}); |
574
|
0
|
|
|
|
|
|
eval $code; |
575
|
0
|
0
|
|
|
|
|
$@ and die $self->{_prog} . ": internal error in eval.: $@\n"; |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# clean up |
578
|
0
|
0
|
0
|
|
|
|
if ($self->{_quantile} || $self->{_median}) { |
579
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: closing save-out\n" if ($self->{_debug} > 2); |
580
|
0
|
|
|
|
|
|
$self->{_save_out}->close; |
581
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: post closing save-out\n" if ($self->{_debug} > 2); |
582
|
|
|
|
|
|
|
}; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# |
586
|
|
|
|
|
|
|
# Make sure we cleaned up before we do any computation. |
587
|
|
|
|
|
|
|
# |
588
|
0
|
0
|
|
|
|
|
if (defined($self->{_sorter_fred})) { |
589
|
|
|
|
|
|
|
# let sorting finish |
590
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: join on sorter thread\n" if ($self->{_debug} > 2); |
591
|
0
|
|
|
|
|
|
$self->{_sorter_fred}->join(); |
592
|
0
|
|
|
|
|
|
$self->{_sorter_fred} = undef; |
593
|
0
|
0
|
|
|
|
|
print STDERR "dbcolstats: post join on sorter thread\n" if ($self->{_debug} > 2); |
594
|
|
|
|
|
|
|
}; |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
# |
597
|
|
|
|
|
|
|
# Compute stats. |
598
|
|
|
|
|
|
|
# |
599
|
0
|
0
|
|
|
|
|
my $mean = ($n == 0 ? "-" : $sx / $n); |
600
|
|
|
|
|
|
|
# stddev = s, not s^2, approximates omega |
601
|
|
|
|
|
|
|
# Check for special cases: |
602
|
|
|
|
|
|
|
# $n <= 1 => divide by zero |
603
|
|
|
|
|
|
|
# all same data value => can sometimes get very small or negative |
604
|
|
|
|
|
|
|
# stddev (due to rounding error) |
605
|
|
|
|
|
|
|
# for these cases, $stddev = 0 |
606
|
0
|
|
|
|
|
|
my $stddev; |
607
|
0
|
0
|
|
|
|
|
if ($n == 0) { |
608
|
0
|
|
|
|
|
|
$stddev = "-"; |
609
|
|
|
|
|
|
|
} else { |
610
|
|
|
|
|
|
|
$stddev = ($n <= 1 || $max == $min) ? 0 : |
611
|
0
|
0
|
0
|
|
|
|
sqrt(($sxx - $n * $mean * $mean) / ($n - ($self->{_sample} ? 1 : 0))); |
|
|
0
|
|
|
|
|
|
612
|
|
|
|
|
|
|
}; |
613
|
0
|
|
|
|
|
|
my $pct_rsd; |
614
|
0
|
0
|
0
|
|
|
|
if ($stddev eq '-' || $mean eq '-' || $mean == 0) { |
|
|
|
0
|
|
|
|
|
615
|
0
|
|
|
|
|
|
$pct_rsd = "-"; |
616
|
|
|
|
|
|
|
} else { |
617
|
0
|
|
|
|
|
|
$pct_rsd = ($stddev / $mean) * 100; |
618
|
|
|
|
|
|
|
}; |
619
|
|
|
|
|
|
|
# |
620
|
|
|
|
|
|
|
# Confidence intervals from "Probability and Statistics for Engineers", |
621
|
|
|
|
|
|
|
# Second Edition, 1986, Scheaffer and McClave, p. 242. |
622
|
|
|
|
|
|
|
# |
623
|
0
|
|
|
|
|
|
my $conf_half; |
624
|
0
|
0
|
|
|
|
|
if ($n <= 1) { |
625
|
0
|
|
|
|
|
|
$conf_half = "-"; |
626
|
|
|
|
|
|
|
} else { |
627
|
0
|
|
|
|
|
|
my $conf_alpha = (1.0 - $self->{_confidence_fraction}) / 2.0; |
628
|
0
|
|
|
|
|
|
$conf_half = t_distribution($n - 1, $conf_alpha) * $stddev / sqrt($n); |
629
|
|
|
|
|
|
|
}; |
630
|
0
|
0
|
|
|
|
|
my $conf_low = ($conf_half eq '-' ? '-' : $mean - $conf_half); |
631
|
0
|
0
|
|
|
|
|
my $conf_high = ($conf_half eq '-' ? '-' : $mean + $conf_half); |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
# |
634
|
|
|
|
|
|
|
# Compute median/quantile. |
635
|
|
|
|
|
|
|
# |
636
|
0
|
|
|
|
|
|
my($median, $q_aref) = $self->_compute_quantile($n, $mean); |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
# |
639
|
|
|
|
|
|
|
# Output the results. |
640
|
|
|
|
|
|
|
# |
641
|
|
|
|
|
|
|
# xxx: bug work-around: the +0s on conf_pct, min, max are |
642
|
|
|
|
|
|
|
# because perl-5.14.2-191.fc16.x86_64 |
643
|
|
|
|
|
|
|
# truncates the floating-point portion of these values otherwise. |
644
|
|
|
|
|
|
|
# |
645
|
|
|
|
|
|
|
my %out_hash = ( |
646
|
|
|
|
|
|
|
mean => $self->numeric_formatting($mean), |
647
|
|
|
|
|
|
|
stddev => $self->numeric_formatting($stddev), |
648
|
|
|
|
|
|
|
pct_rsd => $self->numeric_formatting($pct_rsd), |
649
|
|
|
|
|
|
|
conf_range => $self->numeric_formatting($conf_half), |
650
|
|
|
|
|
|
|
conf_low => $self->numeric_formatting($conf_low), |
651
|
|
|
|
|
|
|
conf_high => $self->numeric_formatting($conf_high), |
652
|
0
|
0
|
0
|
|
|
|
conf_pct => $self->{_confidence_fraction} + 0, |
|
|
0
|
0
|
|
|
|
|
653
|
|
|
|
|
|
|
sum => $self->numeric_formatting($sx), |
654
|
|
|
|
|
|
|
sum_squared => $self->numeric_formatting($sxx), |
655
|
|
|
|
|
|
|
min => (!defined($min) || $min eq '-' ? $min : $min + 0), |
656
|
|
|
|
|
|
|
max => (!defined($max) || $max eq '-' ? $max : $max + 0), |
657
|
|
|
|
|
|
|
n => $n, |
658
|
|
|
|
|
|
|
); |
659
|
|
|
|
|
|
|
# my $bug_workaround = "xxx: conf_pct : $out_hash{conf_pct}\n"; |
660
|
0
|
0
|
|
|
|
|
$out_hash{median} = $median if ($self->{_median}); |
661
|
0
|
0
|
|
|
|
|
if ($self->{_quantile}) { |
662
|
0
|
|
|
|
|
|
foreach (1..($self->{_quantile}-1)) { |
663
|
0
|
|
|
|
|
|
$out_hash{"q$_"} = $q_aref->[$_]; |
664
|
|
|
|
|
|
|
}; |
665
|
|
|
|
|
|
|
}; |
666
|
0
|
0
|
|
|
|
|
if (defined($key_column)) { |
667
|
0
|
|
|
|
|
|
$out_hash{$key_column} = $last_key; |
668
|
|
|
|
|
|
|
}; |
669
|
|
|
|
|
|
|
|
670
|
0
|
|
|
|
|
|
$self->{_out}->write_row_from_href(\%out_hash); |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=head2 run |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
$filter->run(); |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
Internal: run over each row, for one or many keys. |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=cut |
680
|
|
|
|
|
|
|
sub run($) { |
681
|
0
|
|
|
0
|
1
|
|
my($self) = @_; |
682
|
0
|
|
|
|
|
|
$self->{_holdover_key} = $self->{_holdove_data} = undef; |
683
|
0
|
|
|
|
|
|
for (;;) { |
684
|
0
|
|
|
|
|
|
$self->run_one_key(); |
685
|
0
|
0
|
|
|
|
|
last if (!defined($self->{_holdover_key})); |
686
|
|
|
|
|
|
|
}; |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=head1 AUTHOR and COPYRIGHT |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
Copyright (C) 1991-2015 by John Heidemann |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
This program is distributed under terms of the GNU general |
694
|
|
|
|
|
|
|
public license, version 2. See the file COPYING |
695
|
|
|
|
|
|
|
with the distribution for details. |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=cut |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
1; |