File Coverage

blib/lib/Fsdb/Filter/dbcolstats.pm

Criterion	Covered	Total	%
statement	27	209	12.9
branch	0	128	0.0
condition	0	36	0.0
subroutine	9	21	42.8
pod	6	6	100.0
total	42	400	10.5

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl -w
2
3							#
4							# dbcolstats.pm
5							# Copyright (C) 1991-2015 by John Heidemann
6							# $Id: b8f85fa383507a09ebfc72e644fadd6e1d5ceed0 $
7							#
8							# This program is distributed under terms of the GNU general
9							# public license, version 2. See the file COPYING
10							# in $dblibdir for details.
11							#
12
13							package Fsdb::Filter::dbcolstats;
14
15							=head1 NAME
16
17							dbcolstats - compute statistics on a fsdb column
18
19							=head1 SYNOPSIS
20
21							dbcolstats [-amS] [-c ConfidenceFraction] [-q NumberOfQuantiles] column
22
23							=head1 DESCRIPTION
24
25							Compute statistics over a COLUMN of data.
26							Records containing non-numeric data are considered null
27							do not contribute to the stats (with the C<-a> option
28							they are treated as zeros).
29
30							Confidence intervals are a t-test (+/- (t_{a/2})*s/sqrt(n))
31							and assume the population takes a normal distribution
32							with a small number of samples (< 100).
33
34							By default,
35							all statistics are computed for as a population I (with an ``n-1'' term),
36							not as representing the whole population (using ``n'').
37							Select between them with B<--sample> or B<--nosample>.
38							When you measure the entire population, use the latter option.
39
40							The output of this program is probably best looked at after
41							reformatting with L.
42
43							Dbcolstats runs in O(1) memory. Median or quantile requires sorting the
44							data and invokes dbsort. Sorting will run in constant RAM but
45							O(number of records) disk space. If median or quantile is required
46							and the data is already sorted, dbcolstats will run more efficiently with
47							the -S option.
48
49
50							=head1 OPTIONS
51
52							=over 4
53
54							=item B<-a> or B<--include-non-numeric>
55
56							Compute stats over all records (treat non-numeric records
57							as zero rather than just ignoring them).
58
59							=item B<-c FRACTION> or B<--confidence FRACTION>
60
61							Specify FRACTION for the confidence interval.
62							Defaults to 0.95 for a 95% confidence factor.
63
64							=item B<-f FORMAT> or B<--format FORMAT>
65
66							Specify a L-style format for output statistics.
67							Defaults to C<%.5g>.
68
69							=item B<-m> or B<--median>
70
71							Compute median value. (Will sort data if necessary.)
72							(Median is the quantitle for N=2.)
73
74							=item B<-q N> or B<--quantile N>
75
76							Compute quantile (quartile when N is 4),
77							or an arbitrary quantile for other values of N,
78							where the scores that are 1 Nth of the way across the population.
79
80							=item B<--sample>
81
82							Compute I population statistics
83							(e.g., the sample standard deviation),
84							assuming I degrees of freedom.
85
86							=item B<--nosample>
87
88							Compute I population statistics
89							(e.g., the population standard devation).
90
91							=item B<-S> or B<--pre-sorted>
92
93							Assume data is already sorted.
94							With one -S, we check and confirm this precondition.
95							When repeated, we skip the check.
96							(This flag is ignored if quartiles are not requested.)
97
98							=item B<--parallelism=N> or C<-j N>
99
100							Allow sorting to happen in parallel.
101							Defaults on.
102							(Only relevant if using non-pre-sorted data with quantiles.)
103
104							=item B<-F> or B<--fs> or B<--fieldseparator> S
105
106							Specify the field (column) separator as C.
107							See L for valid field separators.
108
109							=item B<-T TmpDir>
110
111							where to put temporary data.
112							Only used if median or quantiles are requested.
113							Also uses environment variable TMPDIR, if -T is
114							not specified.
115							Default is /tmp.
116
117							=item B<-k KeyField>
118
119							Do multi-stats, grouped by each key.
120							Assumes keys are sorted. (Use dbmultistats to guarantee sorting order.)
121
122							=item B<--output-on-no-input>
123
124							Enables null output (all fields are "-", n is 0)
125							if we get input with a schema but no records.
126							Without this option, just output the schema but no rows.
127							Default: no output if no input.
128
129							=back
130
131
132							=for comment
133							begin_standard_fsdb_options
134
135							This module also supports the standard fsdb options:
136
137							=over 4
138
139							=item B<-d>
140
141							Enable debugging output.
142
143							=item B<-i> or B<--input> InputSource
144
145							Read from InputSource, typically a file name, or C<-> for standard input,
146							or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
147
148							=item B<-o> or B<--output> OutputDestination
149
150							Write to OutputDestination, typically a file name, or C<-> for standard output,
151							or (if in Perl) a IO::Handle, Fsdb::IO or Fsdb::BoundedQueue objects.
152
153							=item B<--autorun> or B<--noautorun>
154
155							By default, programs process automatically,
156							but Fsdb::Filter objects in Perl do not run until you invoke
157							the run() method.
158							The C<--(no)autorun> option controls that behavior within Perl.
159
160							=item B<--help>
161
162							Show help.
163
164							=item B<--man>
165
166							Show full manual.
167
168							=back
169
170							=for comment
171							end_standard_fsdb_options
172
173
174							=head1 SAMPLE USAGE
175
176							=head2 Input:
177
178							#fsdb absdiff
179							0
180							0.046953
181							0.072074
182							0.075413
183							0.094088
184							0.096602
185							# \| /home/johnh/BIN/DB/dbrow
186							# \| /home/johnh/BIN/DB/dbcol event clock
187							# \| dbrowdiff clock
188							# \| /home/johnh/BIN/DB/dbcol absdiff
189
190							=head2 Command:
191
192							cat data.fsdb \| dbcolstats absdiff
193
194							=head2 Output:
195
196							#fsdb mean stddev pct_rsd conf_range conf_low conf_high conf_pct sum sum_squared min max n
197							0.064188 0.036194 56.387 0.037989 0.026199 0.102180.95 0.38513 0.031271 0 0.096602 6
198							# \| /home/johnh/BIN/DB/dbrow
199							# \| /home/johnh/BIN/DB/dbcol event clock
200							# \| dbrowdiff clock
201							# \| /home/johnh/BIN/DB/dbcol absdiff
202							# \| dbcolstats absdiff
203							# 0.95 confidence intervals assume normal distribution and small n.
204
205							=head1 SEE ALSO
206
207							L, handles multiple experiments in a single file.
208
209							L, to pretty-print the output of dbcolstats.
210
211							L, to compute an even more general version of median/quantiles.
212
213							L, to compute z-scores or t-scores for each row
214
215							L, to see if two sample populations are statistically different.
216
217							L.
218
219							=head1 BUGS
220
221							The algorithms used to compute variance have not been
222							audited to check for numerical stability.
223							(See F).)
224							Variance may be incorrect when standard deviation
225							is small relative to the mean.
226
227							The field C implies percentage, but it's actually
228							reported as a fraction (0.95 means 95%).
229
230							Because of limits of floating point, statistics on numbers of
231							widely different scales may be incorrect.
232							See the test cases F for examples.
233
234
235							=head1 CLASS FUNCTIONS
236
237							=cut
238
239							@ISA = qw(Fsdb::Filter);
240							($VERSION) = 2.0;
241
242	1			1		6	use strict;
	1					2
	1					26
243	1			1		5	use Pod::Usage;
	1					2
	1					72
244
245	1			1		6	use Fsdb::IO::Reader;
	1					1
	1					17
246	1			1		4	use Fsdb::IO::Writer;
	1					2
	1					14
247	1			1		4	use Fsdb::Filter;
	1					1
	1					22
248	1			1		5	use Fsdb::Filter::dbpipeline qw(dbpipeline_sink dbsort);
	1					2
	1					39
249	1			1		4	use Fsdb::Support qw($is_numeric_regexp);
	1					2
	1					69
250	1			1		5	use Fsdb::Support::TDistribution qw(t_distribution);
	1					2
	1					42
251	1			1		5	use Fsdb::Support::NamedTmpfile;
	1					2
	1					1700
252
253
254							=head2 new
255
256							$filter = new Fsdb::Filter::dbcolstats(@arguments);
257
258							Create a new dbcolstats object, taking command-line arguments.
259
260							=cut
261
262							sub new($@) {
263	0			0	1		my $class = shift @_;
264	0						my $self = $class->SUPER::new(@_);
265	0						bless $self, $class;
266	0						$self->set_defaults;
267	0						$self->parse_options(@_);
268	0						$self->SUPER::post_new();
269	0						return $self;
270							}
271
272
273							=head2 set_defaults
274
275							$filter->set_defaults();
276
277							Internal: set up defaults.
278
279							=cut
280
281							sub set_defaults($) {
282	0			0	1		my($self) = @_;
283	0						$self->SUPER::set_defaults();
284	0						$self->{_target_column} = undef;
285	0						$self->{_confidence_fraction} = 0.95;
286	0						$self->{_format} = "%.5g";
287	0						$self->{_quantile} = undef;
288	0						$self->{_median} = undef; # special case: renames the output field
289	0						$self->{_sample} = 1;
290	0						$self->{_pre_sorted} = 0;
291	0						$self->{_include_non_numeric} = undef;
292	0						$self->{_fscode} = undef;
293	0						$self->{_max_parallelism} = undef;
294	0						$self->{_key_column} = undef;
295	0						$self->{_output_on_no_input} = undef;
296	0						$self->set_default_tmpdir;
297							}
298
299							=head2 parse_options
300
301							$filter->parse_options(@ARGV);
302
303							Internal: parse command-line arguments.
304
305							=cut
306
307							sub parse_options($@) {
308	0			0	1		my $self = shift @_;
309
310	0						my(@argv) = @_;
311							$self->get_options(
312							\@argv,
313	0			0			'help\|?' => sub { pod2usage(1); },
314	0			0			'man' => sub { pod2usage(-verbose => 2); },
315							'a\|include-non-numeric!' => \$self->{_include_non_numeric},
316							'autorun!' => \$self->{_autorun},
317							'close!' => \$self->{_close},
318							'c\|confidence=f' => \$self->{_confidence_fraction},
319							'd\|debug+' => \$self->{_debug},
320							'f\|format=s' => \$self->{_format},
321							'F\|fs\|cs\|fieldseparator\|columnseparator=s' => \$self->{_fscode},
322	0			0			'i\|input=s' => sub { $self->parse_io_option('input', @_); },
323							'j\|parallelism=i' => \$self->{_max_parallelism},
324							'k\|key=s' => \$self->{_key_column},
325							'log!' => \$self->{_logprog},
326							'm\|median!' => \$self->{_median},
327	0			0			'o\|output=s' => sub { $self->parse_io_option('output', @_); },
328							'output-on-no-input!' => \$self->{_output_on_no_input},
329							'q\|quantile=i' => \$self->{_quantile},
330							's\|sample!' => \$self->{_sample},
331							'S\|pre-sorted+' => \$self->{_pre_sorted},
332							'T\|tmpdir\|tempdir=s' => \$self->{_tmpdir},
333							'saveoutput=s' => \$self->{_save_output},
334	0	0					) or pod2usage(2);
335	0						$self->parse_target_column(\@argv);
336							}
337
338							=head2 setup
339
340							$filter->setup();
341
342							Internal: setup, parse headers.
343
344							=cut
345
346
347							sub setup($) {
348	0			0	1		my($self) = @_;
349
350	0	0					pod2usage(2) if (!defined($self->{_target_column}));
351
352	0	0					print STDERR "dbcolstats: pre-input setup\n" if ($self->{_debug} > 2);
353	0						$self->finish_io_option('input', -comment_handler => $self->create_delay_comments_sub);
354	0	0					print STDERR "dbcolstats: post-input setup\n" if ($self->{_debug} > 2);
355	0						$self->{_target_coli} = $self->{_in}->col_to_i($self->{_target_column});
356							croak $self->{_prog} . ": target column " . $self->{_target_column} . " is not in input stream.\n"
357	0	0					if (!defined($self->{_target_coli}));
358	0						$self->{_key_coli} = undef;
359	0	0					if (defined($self->{_key_column})) {
360	0						$self->{_key_coli} = $self->{_in}->col_to_i($self->{_key_column});
361							croak($self->{_prog} . ": key column " . $self->{_key_column} . " is not in input stream.\n")
362	0	0					if (!defined($self->{_key_coli}));
363							};
364	0						my $read_fastpath_sub = $self->{_in}->fastpath_sub();
365	0						$self->{_read_fastpath_sub} = $read_fastpath_sub;
366
367	0						my(@headers) = (qw(mean stddev pct_rsd conf_range conf_low conf_high
368							conf_pct sum sum_squared min max n));
369	0	0					push(@headers, "median") if ($self->{_median});
370	0	0					if ($self->{_quantile}) {
371	0						foreach (1..($self->{_quantile}-1)) {
372	0						push(@headers, "q$_");
373							};
374							};
375	0	0					unshift(@headers, $self->{_key_column}) if (defined($self->{_key_column}));
376	0	0					print STDERR "dbcolstats: pre-output setup\n" if ($self->{_debug} > 2);
377	0						my @output_options = (-cols => \@headers);
378							unshift (@output_options, -fscode => $self->{_fscode})
379	0	0					if (defined($self->{_fscode}));
380	0						$self->finish_io_option('output', @output_options);
381	0	0					print STDERR "dbcolstats: post-output setup\n" if ($self->{_debug} > 2);
382
383	0	0	0				if ($self->{_quantile} \|\| $self->{_median}) {
384							croak($self->{_prog} . ": cannot currently do median or quantile with a key column\n")
385	0	0					if (defined($self->{_key_column}));
386	0						$self->{_save_out_filename} = Fsdb::Support::NamedTmpfile::alloc($self->{_tmpdir});
387							# sorting needed?
388	0						my $save_out;
389	0						my(@writer_args) = (-cols => [qw(data)]);
390	0	0					print STDERR "dbcolstats: pre-saveoutput setup\n" if ($self->{_debug} > 2);
391	0	0					if (!$self->{_pre_sorted}) {
392	0						my $sorter_fred;
393	0						my(@dbsort_args) = qw(-n data);
394							push (@dbsort_args, '--parallelism', $self->{_max_parallelism})
395	0	0					if (defined($self->{_max_parallelism}));
396	0	0					print STDERR "dbcolstats: doing sorter thread\n" if ($self->{_debug} > 2);
397							($save_out, $sorter_fred) = dbpipeline_sink(\@writer_args,
398							'--output' => $self->{_save_out_filename},
399	0						dbsort(@dbsort_args));
400	0						$self->{_sorter_fred} = $sorter_fred;
401							} else {
402							# no, just write it ourselves
403	0						$save_out = new Fsdb::IO::Writer('-file' => $self->{_save_out_filename}, @writer_args);
404							};
405	0						$self->{_save_out} = $save_out;
406	0	0					print STDERR "dbcolstats: post-saveoutput setup\n" if ($self->{_debug} > 2);
407							} else {
408	0	0					print STDERR "dbcolstats: no saveoutput needed\n" if ($self->{_debug} > 2);
409	0						$self->{_save_out} = undef;
410							};
411							}
412
413							=head2 _round_up
414
415							$i = _round_up($x);
416
417							Internal: Round up to the next integer.
418
419							=cut
420
421							sub _round_up($) {
422	0			0			my($x) = @_;
423	0						my($xi) = int($x);
424	0	0					return ($x > $xi) ? $xi+1 : $xi;
425							}
426
427							=head2 _compute_quantile
428
429							($median, $quantile_aref) = _compute_quantile($n, $mean);
430
431							Internal: Compute quantile from the saved data.
432							Not generalizable.
433							We assume the saved output is closed before we enter.
434
435							=cut
436
437							sub _compute_quantile($$$) {
438	0			0			my ($self, $n, $mean) = @_;
439
440	0	0	0				return if (!($self->{_quantile} \|\| $self->{_median}));
441	0						my $effective_quantile = $self->{_quantile};
442	0	0					$effective_quantile = 2 if (!defined($effective_quantile));
443
444	0						my $median;
445							my @q;
446	0	0					if ($n <= 1) {
447	0						$median = $mean;
448	0						push(@q, ($mean) x $effective_quantile);
449	0						return ($median, \@q);
450							};
451
452	0						my $save_in = new Fsdb::IO::Reader(-file => $self->{_save_out_filename});
453	0	0					$save_in->error && die $self->{_prog} . ": re-read error " . $save_in->error;
454
455							# To handle the ugly case of having more ntiles than
456							# data, we detect it and replicate the data until we have more
457							# replicated_data than ntiles.
458	0	0					my($replicate_data) = ($n >= $effective_quantile+1) ? 1 : _round_up(($effective_quantile+1.0)/$n);
459	0						my($replicated_n) = $n * $replicate_data;
460
461							# Also note that the array of quantiles and the number of
462							# data elements read are both 1-based and not 0-based like
463							# most perl stuff. This is to make the math easier.
464	0						my $median_i = _round_up($replicated_n / 2);
465	0						my $ntile_frac = ($replicated_n + 0.0) / ($effective_quantile + 0.0);
466	0						my($x, $last_x, $next_q_i);
467	0						@q = (0); # note that q is primed with 0 (to fill that zero element)
468	0						my($replicates_left) = 0;
469	0						my($i); # note that i counts from 1!
470	0						for ($i = 1; $#q+1 < $effective_quantile; $i++) {
471	0	0					if (--$replicates_left <= 0) {
472	0						my $fref = $save_in->read_rowobj;
473	0	0					die "internal error re-reading data\n" if (ref($fref) ne 'ARRAY');
474	0						$x = $fref->[0];
475	0						$replicates_left = $replicate_data;
476							# Verify sorted order (in case the user lied to us
477							# about pre-sorting).
478	0	0	0				if (defined($last_x) && $x < $last_x) {
479	0	0					my($info) = ($self->{_pre_sorted} ? " (internal error in dbsort)" : " (user specified -S for pre-sorted data but it is unsorted)");
480	0						die $self->{_prog} . ": cannot process data that is out of order between $last_x and $x $info.\n";
481							};
482	0						$last_x = $x;
483							};
484	0	0					if ($i == $median_i) { $median = $x; };
	0
485	0	0					$next_q_i = (_round_up($ntile_frac * ($#q + 1.0) )) if (!defined($next_q_i));
486							# print "d: q=$#q nq=$next_q_i i=$i\n";
487	0	0					if ($i == $next_q_i) { push(@q, $x); $next_q_i = undef; };
	0
	0
488							};
489	0						return ($median, \@q);
490							};
491
492
493							=head2 run_one_key
494
495							$filter->run_one_key();
496
497							Internal: run over each row, for a given key.
498
499							=cut
500							sub run_one_key($) {
501	0			0	1		my($self) = @_;
502
503	0	0					print STDERR "dbcolstats: starting run\n" if ($self->{_debug} > 2);
504
505							# xxx: should eval all this to factor out constants from runtime
506	0						my($xf) = $self->{_target_coli};
507	0						my($key_column) = $self->{_key_column};
508
509	0						my($n) = 0;
510	0						my($sx) = 0;
511	0						my($sxx) = 0;
512	0						my $min;
513							my $max;
514	0						my $key;
515	0						my $last_key = $self->{_holdover_key};
516	0						my $holdover_data = $self->{_holdover_data};
517	0						$self->{_holdover_key} = $self->{_holdover_data} = undef;
518
519	0						my $fref;
520							my $x;
521
522							{
523	0						my $save_out = $self->{_save_out};
	0
524	0						my $read_fastpath_sub = $self->{_read_fastpath_sub};
525
526	0						my $code = q'
527							while (1) {
528							if (defined($holdover_data)) {
529							$x = $holdover_data; # and key was set earlier
530							$holdover_data = undef;
531							} else {
532							$fref = &{$read_fastpath_sub}();
533							last if (!defined($fref));
534							$x = $fref->[' . $xf . q'];
535							';
536	0	0					if (defined($self->{_key_column})) {
537							$code .= q'
538	0						$key = $fref->[' . $self->{_key_coli} . '];
539							if (!defined($last_key)) {
540							$last_key = $key;
541							} elsif ($key ne $last_key) {
542							$self->{_holdover_key} = $key;
543							$self->{_holdover_data} = $x;
544							last;
545							};
546							';
547							};
548	0						$code .= q'
549							};
550							';
551
552							$code .= 'next if ($x !~ /' . $is_numeric_regexp . "/);\n"
553	0	0					if (!$self->{_include_non_numeric});
554	0						$code .= q'
555							$x += 0.0; # force numeric
556							$n++;
557							$sx += $x;
558							$sxx += $x * $x;
559							';
560	0	0					$code .= 'print STDERR "dbcolstats: save-out write\n";' . "\n" if ($self->{_debug} > 2);
561
562	0	0	0				if ($self->{_quantile} \|\| $self->{_median}) {
563							# note that as of perl-5.14 we must force numeric or perl truncates floats to ints :-(
564	0						$code .= q'
565							my(@row);
566							$row[0] = $x + 0; # force numeric, as guaranteed by above
567							$save_out->write_rowobj(\@row);
568							';
569							};
570	0	0					$code .= 'print STDERR "dbcolstats: post save-out write\n";' . "\n" if ($self->{_debug} > 2);
571	0						$code .= q'
572							if (!defined($min)) {
573							$min = $max = $x;
574							} else {
575							$min = $x if ($x < $min);
576							$max = $x if ($x > $max);
577							};
578							};';
579
580							# run it
581	0	0					print STDERR "dbcolstats: eval'ing code\n" if ($self->{_debug});
582	0	0					print $code if ($self->{_debug});
583	0						eval $code;
584	0	0					$@ and die $self->{_prog} . ": internal error in eval.: $@\n";
585
586							# clean up
587	0	0	0				if ($self->{_quantile} \|\| $self->{_median}) {
588	0	0					print STDERR "dbcolstats: closing save-out\n" if ($self->{_debug} > 2);
589	0						$self->{_save_out}->close;
590	0	0					print STDERR "dbcolstats: post closing save-out\n" if ($self->{_debug} > 2);
591							};
592							}
593
594							#
595							# Make sure we cleaned up before we do any computation.
596							#
597	0	0					if (defined($self->{_sorter_fred})) {
598							# let sorting finish
599	0	0					print STDERR "dbcolstats: join on sorter thread\n" if ($self->{_debug} > 2);
600	0						$self->{_sorter_fred}->join();
601	0						$self->{_sorter_fred} = undef;
602	0	0					print STDERR "dbcolstats: post join on sorter thread\n" if ($self->{_debug} > 2);
603							};
604
605							#
606							# Compute stats.
607							#
608	0	0					my $mean = ($n == 0 ? "-" : $sx / $n);
609							# stddev = s, not s^2, approximates omega
610							# Check for special cases:
611							# $n <= 1 => divide by zero
612							# all same data value => can sometimes get very small or negative
613							# stddev (due to rounding error)
614							# for these cases, $stddev = 0
615	0						my $stddev;
616	0	0					if ($n == 0) {
617	0						$stddev = "-";
618							} else {
619							$stddev = ($n <= 1 \|\| $max == $min) ? 0 :
620	0	0	0				sqrt(($sxx - $n * $mean * $mean) / ($n - ($self->{_sample} ? 1 : 0)));
		0
621							};
622	0						my $pct_rsd;
623	0	0	0				if ($stddev eq '-' \|\| $mean eq '-' \|\| $mean == 0) {
			0
624	0						$pct_rsd = "-";
625							} else {
626	0						$pct_rsd = ($stddev / $mean) * 100;
627							};
628							#
629							# Confidence intervals from "Probability and Statistics for Engineers",
630							# Second Edition, 1986, Scheaffer and McClave, p. 242.
631							#
632	0						my $conf_half;
633	0	0					if ($n <= 1) {
634	0						$conf_half = "-";
635							} else {
636	0						my $conf_alpha = (1.0 - $self->{_confidence_fraction}) / 2.0;
637	0						$conf_half = t_distribution($n - 1, $conf_alpha) * $stddev / sqrt($n);
638							};
639	0	0					my $conf_low = ($conf_half eq '-' ? '-' : $mean - $conf_half);
640	0	0					my $conf_high = ($conf_half eq '-' ? '-' : $mean + $conf_half);
641
642							#
643							# Compute median/quantile.
644							#
645	0						my($median, $q_aref) = $self->_compute_quantile($n, $mean);
646
647							#
648							# Output the results.
649							#
650							# xxx: bug work-around: the +0s on conf_pct, min, max are
651							# because perl-5.14.2-191.fc16.x86_64
652							# truncates the floating-point portion of these values otherwise.
653							#
654							my %out_hash = (
655							mean => $self->numeric_formatting($mean),
656							stddev => $self->numeric_formatting($stddev),
657							pct_rsd => $self->numeric_formatting($pct_rsd),
658							conf_range => $self->numeric_formatting($conf_half),
659							conf_low => $self->numeric_formatting($conf_low),
660							conf_high => $self->numeric_formatting($conf_high),
661	0	0	0				conf_pct => $self->{_confidence_fraction} + 0,
		0	0
662							sum => $self->numeric_formatting($sx),
663							sum_squared => $self->numeric_formatting($sxx),
664							min => (!defined($min) \|\| $min eq '-' ? $min : $min + 0),
665							max => (!defined($max) \|\| $max eq '-' ? $max : $max + 0),
666							n => $n,
667							);
668							# my $bug_workaround = "xxx: conf_pct : $out_hash{conf_pct}\n";
669	0	0					$out_hash{median} = $median if ($self->{_median});
670	0	0					if ($self->{_quantile}) {
671	0						foreach (1..($self->{_quantile}-1)) {
672	0						$out_hash{"q$_"} = $q_aref->[$_];
673							};
674							};
675	0	0					if (defined($key_column)) {
676	0						$out_hash{$key_column} = $last_key;
677							};
678
679	0	0	0				if ($n > 0 \|\| ($n == 0 && $self->{_output_on_no_input})) {
			0
680	0						$self->{_out}->write_row_from_href(\%out_hash);
681							};
682							}
683
684							=head2 run
685
686							$filter->run();
687
688							Internal: run over each row, for one or many keys.
689
690							=cut
691							sub run($) {
692	0			0	1		my($self) = @_;
693	0						$self->{_holdover_key} = $self->{_holdove_data} = undef;
694	0						for (;;) {
695	0						$self->run_one_key();
696	0	0					last if (!defined($self->{_holdover_key}));
697							};
698							}
699
700							=head1 AUTHOR and COPYRIGHT
701
702							Copyright (C) 1991-2015 by John Heidemann
703
704							This program is distributed under terms of the GNU general
705							public license, version 2. See the file COPYING
706							with the distribution for details.
707
708							=cut
709
710							1;