File Coverage

blib/lib/Dumbbench/Stats.pm
Criterion Covered Total %
statement 56 72 77.7
branch 5 6 83.3
condition 2 2 100.0
subroutine 16 20 80.0
pod 0 15 0.0
total 79 115 68.7


line stmt bran cond sub pod time code
1             package Dumbbench::Stats;
2 3     3   31 use strict;
  3         7  
  3         132  
3 3     3   16 use warnings;
  3         5  
  3         182  
4 3     3   18 use List::Util ();
  3         6  
  3         104  
5 3     3   1651 use Statistics::CaseResampling ();
  3         2760  
  3         147  
6              
7             use Class::XSAccessor {
8 3         58 constructor => 'new',
9             accessors => [qw/data name/],
10 3     3   21 };
  3         5  
11              
12             # Note: This is entirely unoptimized. There is a lot of unnecessary
13             # stuff going on. This is to allow the user to modify the data
14             # set in flight. If this comes back to haunt us at some point,
15             # we can still optimize, but at this point, convenience still wins.
16              
17             sub sorted_data {
18 0     0 0 0 my $self = shift;
19 0         0 my $sorted = [sort { $a <=> $b } @{$self->data}];
  0         0  
  0         0  
20 0         0 return $sorted;
21             }
22              
23 2     2 0 23 sub first_quartile { Statistics::CaseResampling::first_quartile($_[0]->data) }
24 2     2 0 1476 sub second_quartile { return $_[0]->median }
25 2     2 0 25 sub third_quartile { Statistics::CaseResampling::third_quartile($_[0]->data) }
26              
27              
28 34     34 0 51 sub n { scalar(@{$_[0]->data}) }
  34         106  
29              
30             sub sum {
31 34     34 0 61 my $self = shift;
32 34         38 return List::Util::sum(@{$self->data});
  34         134  
33             }
34              
35             sub min {
36 0     0 0 0 my $self = shift;
37 0         0 return List::Util::min(@{$self->data});
  0         0  
38             }
39              
40             sub max {
41 0     0 0 0 my $self = shift;
42 0         0 return List::Util::max(@{$self->data});
  0         0  
43             }
44              
45             sub mean {
46 34     34 0 226777 my $self = shift;
47 34         112 return $self->sum / $self->n;
48             }
49              
50 158     158 0 513 sub median { Statistics::CaseResampling::median($_[0]->data) } # O(n)!
51              
52             sub median_confidence_limits {
53 0     0 0 0 my $self = shift;
54 0         0 my $nsigma = shift;
55 0         0 my $alpha = Statistics::CaseResampling::nsigma_to_alpha($nsigma);
56             # note: The 1000 here is kind of a lower limit for reasonable accuracy.
57             # But if the data set is small, that's more significant. If the data
58             # set is VERY large, then running much more than 1k resamplings
59             # is VERY expensive. So 1k is probably a reasonable default.
60 0         0 return Statistics::CaseResampling::median_simple_confidence_limits($self->data, 1-$alpha, 1000)
61             }
62              
63             sub mad {
64 60     60 0 71 my $self = shift;
65 60         78 my $median = $self->median;
66 60         69 my @val = map {abs($_ - $median)} @{$self->data};
  479         634  
  60         99  
67 60         195 return ref($self)->new(data => \@val)->median;
68             }
69              
70             sub mad_dev {
71 1     1 0 3 my $self = shift;
72 1         4 return $self->mad()*1.4826;
73             }
74              
75             sub std_dev {
76 2     2 0 60 my $self = shift;
77 2         7 my $data = $self->data;
78 2         6 my $mean = $self->mean;
79 2         4 my $var = 0;
80 2         16 $var += ($_-$mean)**2 for @$data;
81 2         5 $var /= @$data - 1;
82 2         7 return sqrt($var);
83             }
84              
85             sub filter_outliers {
86 31     31 0 1849 my $self = shift;
87 31         85 my %opt = @_;
88 31   100     78 my $var_measure = $opt{variability_measure} || 'mad';
89 31         39 my $n_sigma = $opt{nsigma_outliers};
90              
91             # If outlier rejection is turned off...
92 31 100       90 if (not $n_sigma) {
    50          
93 1         6 return ($self->data, []);
94             }
95             elsif ($n_sigma < 0) {
96 0         0 Carp::croak("A negative value for the number of 'sigmas' makes no sense");
97             }
98              
99 30         55 my $data = $self->data;
100              
101 30         56 my $median = $self->median;
102 30         59 my $variability = $self->$var_measure;
103 30         94 my @good;
104             my @outliers;
105 30         54 foreach my $x (@$data) {
106 244 100       318 if (abs($x-$median) <= $variability*$n_sigma) {
107 219         280 push @good, $x;
108             }
109             else {
110 25         36 push @outliers, $x;
111             }
112             }
113              
114 30         93 return(\@good, \@outliers);
115             }
116              
117              
118             1;