File Coverage

blib/lib/Dumbbench/Stats.pm

Criterion	Covered	Total	%
statement	56	72	77.7
branch	5	6	83.3
condition	2	2	100.0
subroutine	16	20	80.0
pod	0	15	0.0
total	79	115	68.7

line	stmt	bran	cond	sub	pod	time	code
1							package Dumbbench::Stats;
2	3			3		20	use strict;
	3					7
	3					105
3	3			3		17	use warnings;
	3					4
	3					74
4	3			3		15	use List::Util ();
	3					5
	3					36
5	3			3		1439	use Statistics::CaseResampling ();
	3					1961
	3					122
6
7							use Class::XSAccessor {
8	3					20	constructor => 'new',
9							accessors => [qw/data name/],
10	3			3		19	};
	3					7
11
12							# Note: This is entirely unoptimized. There is a lot of unnecessary
13							# stuff going on. This is to allow the user to modify the data
14							# set in flight. If this comes back to haunt us at some point,
15							# we can still optimize, but at this point, convenience still wins.
16
17							sub sorted_data {
18	0			0	0	0	my $self = shift;
19	0					0	my $sorted = [sort { $a <=> $b } @{$self->data}];
	0					0
	0					0
20	0					0	return $sorted;
21							}
22
23	2			2	0	52	sub first_quartile { Statistics::CaseResampling::first_quartile($_[0]->data) }
24	2			2	0	1307	sub second_quartile { return $_[0]->median }
25	2			2	0	23	sub third_quartile { Statistics::CaseResampling::third_quartile($_[0]->data) }
26
27
28	17			17	0	23	sub n { scalar(@{$_[0]->data}) }
	17					53
29
30							sub sum {
31	17			17	0	21	my $self = shift;
32	17					22	return List::Util::sum(@{$self->data});
	17					72
33							}
34
35							sub min {
36	0			0	0	0	my $self = shift;
37	0					0	return List::Util::min(@{$self->data});
	0					0
38							}
39
40							sub max {
41	0			0	0	0	my $self = shift;
42	0					0	return List::Util::max(@{$self->data});
	0					0
43							}
44
45							sub mean {
46	17			17	0	2482	my $self = shift;
47	17					30	return $self->sum / $self->n;
48							}
49
50	73			73	0	257	sub median { Statistics::CaseResampling::median($_[0]->data) } # O(n)!
51
52							sub median_confidence_limits {
53	0			0	0	0	my $self = shift;
54	0					0	my $nsigma = shift;
55	0					0	my $alpha = Statistics::CaseResampling::nsigma_to_alpha($nsigma);
56							# note: The 1000 here is kind of a lower limit for reasonable accuracy.
57							# But if the data set is small, that's more significant. If the data
58							# set is VERY large, then running much more than 1k resamplings
59							# is VERY expensive. So 1k is probably a reasonable default.
60	0					0	return Statistics::CaseResampling::median_simple_confidence_limits($self->data, 1-$alpha, 1000)
61							}
62
63							sub mad {
64	26			26	0	45	my $self = shift;
65	26					37	my $median = $self->median;
66	26					30	my @val = map {abs($_ - $median)} @{$self->data};
	114					172
	26					42
67	26					81	return ref($self)->new(data => \@val)->median;
68							}
69
70							sub mad_dev {
71	1			1	0	4	my $self = shift;
72	1					2	return $self->mad()*1.4826;
73							}
74
75							sub std_dev {
76	2			2	0	23	my $self = shift;
77	2					4	my $data = $self->data;
78	2					20	my $mean = $self->mean;
79	2					3	my $var = 0;
80	2					42	$var += ($_-$mean)**2 for @$data;
81	2					5	$var /= @$data - 1;
82	2					9	return sqrt($var);
83							}
84
85							sub filter_outliers {
86	14			14	0	1173	my $self = shift;
87	14					39	my %opt = @_;
88	14		100			38	my $var_measure = $opt{variability_measure} \|\| 'mad';
89	14					18	my $n_sigma = $opt{nsigma_outliers};
90
91							# If outlier rejection is turned off...
92	14	100				47	if (not $n_sigma) {
		50
93	1					8	return ($self->data, []);
94							}
95							elsif ($n_sigma < 0) {
96	0					0	Carp::croak("A negative value for the number of 'sigmas' makes no sense");
97							}
98
99	13					26	my $data = $self->data;
100
101	13					20	my $median = $self->median;
102	13					38	my $variability = $self->$var_measure;
103	13					35	my @good;
104							my @outliers;
105	13					23	foreach my $x (@$data) {
106	55	100				90	if (abs($x-$median) <= $variability*$n_sigma) {
107	43					64	push @good, $x;
108							}
109							else {
110	12					20	push @outliers, $x;
111							}
112							}
113
114	13					43	return(\@good, \@outliers);
115							}
116
117
118							1;