line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Math::StdDev; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
66932
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
27
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
587
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# perl -MPod::Markdown -e 'Pod::Markdown->new->filter(@ARGV)' lib/Math/StdDev.pm > README.md |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 NAME |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
Math::StdDev - Pure-perl mean and variance computation supporting running/online calculation (Welford's algorithm) |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use Math::StdDev; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $d = new Math::StdDev(); |
20
|
|
|
|
|
|
|
$d->Update(2); |
21
|
|
|
|
|
|
|
$d->Update(3); |
22
|
|
|
|
|
|
|
print $d->mean() . "\t" . $d->sampleVariance(); # or $d->variance() |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
or |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
perl -MMath::StdDev -e '$d=new Math::StdDev; $d->Update(10**8+4, 10**8 + 7, 10**8 + 13, 10**8 + 16); print $d->mean() . "\n" . $d->sampleVariance() . "\n"' |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 DESCRIPTION |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
This module impliments Welford's online algorithm (see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance ) |
32
|
|
|
|
|
|
|
Maybe one day in future the two-pass algo could be included, along with Kahan compensated summation... so much math, so little time... |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head2 EXPORT |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
None by default. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 Notes |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 new |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Usage is |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my $d = new Math::StdDev(); |
47
|
|
|
|
|
|
|
or |
48
|
|
|
|
|
|
|
my $d = new Math::StdDev(1,2,3,4); # Add one or more samples, or a population, right from the start |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Update |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
Usage is |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $d->Update(123); |
56
|
|
|
|
|
|
|
or |
57
|
|
|
|
|
|
|
my $d->Update(@list_of_scalars); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 mean() |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Usage is |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
print $d->mean(); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head2 variance |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Usage is |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
print $d->variance(); |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 sampleVariance |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
(same as variance, but uses n-1 divisor.) Usage is: |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
print $d->sampleVariance(); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=cut |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
require Exporter; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
82
|
|
|
|
|
|
|
our($VERSION)='1.02'; |
83
|
|
|
|
|
|
|
our($UntarError) = ''; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( ) ] ); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
our @EXPORT = qw( ); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub new { |
93
|
2
|
|
|
2
|
1
|
90
|
my $class = shift; |
94
|
2
|
|
|
|
|
5
|
my $this={}; |
95
|
2
|
|
|
|
|
5
|
$this->{count}=0; |
96
|
2
|
|
|
|
|
4
|
$this->{mean}=0; |
97
|
2
|
|
|
|
|
4
|
$this->{M2}=0; |
98
|
|
|
|
|
|
|
|
99
|
2
|
|
|
|
|
4
|
bless $this,$class; |
100
|
2
|
50
|
|
|
|
8
|
if(defined $_[0]) { |
101
|
2
|
|
|
|
|
6
|
$this->Update(@_); # Include anything passed into the new() |
102
|
|
|
|
|
|
|
# Also compute the exact correct result using the 2-pass method, in case they're not going to provide more samples later |
103
|
2
|
|
|
|
|
4
|
my $s=0;$s+=$_ foreach(@_); |
|
2
|
|
|
|
|
8
|
|
104
|
2
|
|
|
|
|
6
|
my $m=$s/(1+$#_); |
105
|
2
|
|
|
|
|
3
|
my $v=0;$v+=($_-$m)**2 foreach(@_); |
|
2
|
|
|
|
|
12
|
|
106
|
2
|
|
|
|
|
4
|
$this->{exactMean}=$m; |
107
|
2
|
|
|
|
|
10
|
$this->{exactVariance}=($v/(1+$#_))**.5; |
108
|
2
|
50
|
|
|
|
9
|
$this->{exactSampleVariance}=($v/($#_))**.5 if($#_>0); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
2
|
|
|
|
|
7
|
return $this; |
112
|
|
|
|
|
|
|
} # new |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# # for a new value newValue, compute the new count, new mean, the new M2. |
116
|
|
|
|
|
|
|
# # mean accumulates the mean of the entire dataset |
117
|
|
|
|
|
|
|
# # M2 aggregates the squared distance from the mean |
118
|
|
|
|
|
|
|
# # count aggregates the number of samples seen so far |
119
|
|
|
|
|
|
|
# def update(existingAggregate, newValue): |
120
|
|
|
|
|
|
|
# (count, mean, M2) = existingAggregate |
121
|
|
|
|
|
|
|
# count += 1 |
122
|
|
|
|
|
|
|
# delta = newValue - mean |
123
|
|
|
|
|
|
|
# mean += delta / count |
124
|
|
|
|
|
|
|
# delta2 = newValue - mean |
125
|
|
|
|
|
|
|
# M2 += delta * delta2 |
126
|
|
|
|
|
|
|
# |
127
|
|
|
|
|
|
|
# return (count, mean, M2) |
128
|
|
|
|
|
|
|
# |
129
|
|
|
|
|
|
|
# # retrieve the mean, variance and sample variance from an aggregate |
130
|
|
|
|
|
|
|
# def finalize(existingAggregate): |
131
|
|
|
|
|
|
|
# (count, mean, M2) = existingAggregate |
132
|
|
|
|
|
|
|
# (mean, variance, sampleVariance) = (mean, M2/count, M2/(count - 1)) |
133
|
|
|
|
|
|
|
# if count < 2: |
134
|
|
|
|
|
|
|
# return float('nan') |
135
|
|
|
|
|
|
|
# else: |
136
|
|
|
|
|
|
|
# return (mean, variance, sampleVariance) |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub Update { |
141
|
3
|
|
|
3
|
1
|
8
|
my $this = shift; |
142
|
3
|
|
|
|
|
7
|
while(defined($_[0])) { |
143
|
16
|
|
|
|
|
21
|
my $newValue=shift; |
144
|
16
|
|
|
|
|
25
|
$this->{count}++; |
145
|
16
|
|
|
|
|
26
|
my $delta = $newValue - $this->{mean}; |
146
|
16
|
|
|
|
|
27
|
$this->{mean} += $delta / $this->{count}; |
147
|
16
|
|
|
|
|
22
|
my $delta2 = $newValue - $this->{mean}; |
148
|
16
|
|
|
|
|
34
|
$this->{M2} += $delta * $delta2; |
149
|
|
|
|
|
|
|
} |
150
|
3
|
|
|
|
|
7
|
undef($this->{exactMean}); # switch over to online method instead of two-pass method |
151
|
|
|
|
|
|
|
} # Update |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub mean { |
154
|
3
|
|
|
3
|
1
|
13
|
my $this = shift; |
155
|
3
|
50
|
|
|
|
8
|
if($this->{count}<1) { return undef; } |
|
0
|
|
|
|
|
0
|
|
156
|
3
|
100
|
|
|
|
11
|
return $this->{exactMean} if(defined $this->{exactMean}); |
157
|
2
|
|
|
|
|
18
|
return $this->{mean}; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub variance { |
161
|
4
|
|
|
4
|
1
|
9
|
my $this = shift; |
162
|
4
|
50
|
|
|
|
25
|
if($this->{count}<1) { return undef; } |
|
0
|
|
|
|
|
0
|
|
163
|
4
|
100
|
|
|
|
18
|
return $this->{exactVariance} if(defined $this->{exactMean}); |
164
|
2
|
|
|
|
|
12
|
return ($this->{M2}/$this->{count})**0.5; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub sampleVariance { |
168
|
2
|
|
|
2
|
1
|
4
|
my $this = shift; |
169
|
2
|
50
|
|
|
|
6
|
if($this->{count}<2) { return undef; } |
|
0
|
|
|
|
|
0
|
|
170
|
2
|
50
|
|
|
|
13
|
return $this->{exactSampleVariance} if(defined $this->{exactMean}); |
171
|
0
|
|
|
|
|
|
return ($this->{M2}/($this->{count}-1))**0.5; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
__END__ |