line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::Running; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
66210
|
use 5.006; |
|
3
|
|
|
|
|
23
|
|
4
|
3
|
|
|
3
|
|
16
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
75
|
|
5
|
3
|
|
|
3
|
|
27
|
use warnings; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
102
|
|
6
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
1787
|
use Data::Dumper; |
|
3
|
|
|
|
|
20243
|
|
|
3
|
|
|
|
|
305
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.11'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# overload these operators to have special meaning when |
12
|
|
|
|
|
|
|
# operand(s) are Statistics::Running: |
13
|
|
|
|
|
|
|
use overload |
14
|
|
|
|
|
|
|
# add two stats object and adjust summed mean,stdev etc. |
15
|
3
|
|
|
|
|
39
|
'+' => \&concatenate, |
16
|
|
|
|
|
|
|
# check if two stats objects are same wrt mean,stdev,N BUT NOT histogram |
17
|
|
|
|
|
|
|
'==' => \&equals, |
18
|
|
|
|
|
|
|
# convert a stats object into a string, e.g. print $obj."\n"; |
19
|
|
|
|
|
|
|
'""' => \&stringify, |
20
|
3
|
|
|
3
|
|
3376
|
; |
|
3
|
|
|
|
|
3118
|
|
21
|
|
|
|
|
|
|
|
22
|
3
|
|
|
3
|
|
1759
|
use Try::Tiny; |
|
3
|
|
|
|
|
6213
|
|
|
3
|
|
|
|
|
169
|
|
23
|
3
|
|
|
3
|
|
1334
|
use Statistics::Histogram; |
|
3
|
|
|
|
|
107245
|
|
|
3
|
|
|
|
|
405
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# this is for all numerical equality comparisons |
26
|
3
|
|
|
3
|
|
29
|
use constant SMALL_NUMBER_FOR_EQUALITY => 1E-10; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
11313
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# creates an obj. There are no input params |
29
|
|
|
|
|
|
|
sub new { |
30
|
16
|
|
|
16
|
1
|
3364
|
my $class = $_[0]; |
31
|
|
|
|
|
|
|
|
32
|
16
|
|
100
|
|
|
103
|
my $parent = ( caller(1) )[3] || "N/A"; |
33
|
16
|
|
|
|
|
104
|
my $whoami = ( caller(0) )[3]; |
34
|
|
|
|
|
|
|
|
35
|
16
|
|
|
|
|
166
|
my $self = { |
36
|
|
|
|
|
|
|
# these are internal variables to store mean etc. or used to calculate Kurtosis |
37
|
|
|
|
|
|
|
'M1' => 0.0, |
38
|
|
|
|
|
|
|
'M2' => 0.0, |
39
|
|
|
|
|
|
|
'M3' => 0.0, |
40
|
|
|
|
|
|
|
'M4' => 0.0, |
41
|
|
|
|
|
|
|
'MIN' => 0.0, |
42
|
|
|
|
|
|
|
'MAX' => 0.0, |
43
|
|
|
|
|
|
|
'N' => 0, # number of data items inserted |
44
|
|
|
|
|
|
|
# this histogram is updated each time a new data point is pushed in the object |
45
|
|
|
|
|
|
|
# it just holds the number of items in each bin, so it is not too expensive. |
46
|
|
|
|
|
|
|
# with this we get an idea of the Probability Distribution of the pushed data. |
47
|
|
|
|
|
|
|
# Which may or may not be useful to users. |
48
|
|
|
|
|
|
|
# Should you want to avoid this then use Statistics::Running::Tiny |
49
|
|
|
|
|
|
|
'histo' => { |
50
|
|
|
|
|
|
|
'num-bins' => -1, |
51
|
|
|
|
|
|
|
'bins' => { |
52
|
|
|
|
|
|
|
# b: [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary] |
53
|
|
|
|
|
|
|
'b' => [], # length is 'num-bins'+1 |
54
|
|
|
|
|
|
|
# c: contains the counts, its size is equal to the number of bins |
55
|
|
|
|
|
|
|
# the first cell contains counts in the interval [histo-left-boundary, bin1_right_boundary] |
56
|
|
|
|
|
|
|
# the last cell contains counts of [binN-1_right_boundary, histo-right-boundary] |
57
|
|
|
|
|
|
|
'c' => [], # length 'num-bins' |
58
|
|
|
|
|
|
|
}, |
59
|
|
|
|
|
|
|
# cached stringified histogram, it is re-calculated only if data points added |
60
|
|
|
|
|
|
|
# and asked to print histogram |
61
|
|
|
|
|
|
|
'stringified' => undef, |
62
|
|
|
|
|
|
|
# when asked to stringify a hist we actually use a cached string |
63
|
|
|
|
|
|
|
# which needs to be recalculated whenever data is added or hist re-created |
64
|
|
|
|
|
|
|
'needs-recalculate' => 1, |
65
|
|
|
|
|
|
|
}, |
66
|
|
|
|
|
|
|
}; |
67
|
16
|
|
|
|
|
38
|
bless($self, $class); |
68
|
16
|
|
|
|
|
53
|
$self->clear(); |
69
|
16
|
|
|
|
|
58
|
return $self |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
# return the mean of the data entered so far |
72
|
4
|
|
|
4
|
1
|
38
|
sub mean { return $_[0]->{'M1'} } |
73
|
|
|
|
|
|
|
# returns the histogram bins (can be empty) in our internal format |
74
|
2
|
|
|
2
|
1
|
23
|
sub histogram { return $_[0]->{'histo'} } |
75
|
|
|
|
|
|
|
# if no params, it returns our bins as a hash |
76
|
|
|
|
|
|
|
# otherwise it imports input bins in the form of a hash |
77
|
|
|
|
|
|
|
# and before that it erases previous histogram and forms it according to input, e.g. |
78
|
|
|
|
|
|
|
# sets bin-widths and numbins etc. |
79
|
|
|
|
|
|
|
sub histogram_bins_hash { |
80
|
9
|
|
|
9
|
0
|
27
|
my $self = $_[0]; |
81
|
9
|
|
|
|
|
20
|
my $bins = $_[1]; |
82
|
9
|
100
|
|
|
|
20
|
if( ! defined($bins) ){ |
83
|
|
|
|
|
|
|
# export to a hash |
84
|
6
|
|
|
|
|
17
|
return $self->_bins2hash() |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
# import from a hash |
87
|
3
|
|
|
|
|
9
|
$self->_hash2bins($bins); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
# if no params, it returns our bins as a hash of the form returned by Statistics::Descriptive::frequency_distribution() |
90
|
|
|
|
|
|
|
# otherwise it imports input bins in the form of a hash in the form returned by Statistics::Descriptive::frequency_distribution() |
91
|
|
|
|
|
|
|
sub histogram_bins_stathash { |
92
|
0
|
|
|
0
|
0
|
0
|
my $self = $_[0]; |
93
|
0
|
|
|
|
|
0
|
my $bi = $_[1]; |
94
|
0
|
0
|
|
|
|
0
|
if( ! defined($bi) ){ |
95
|
|
|
|
|
|
|
# export to a hash |
96
|
0
|
|
|
|
|
0
|
return $self->_bins2stathash() |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
# import from a hash |
99
|
0
|
|
|
|
|
0
|
$self->_stathash2bins($bi); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
# return a string showing this histogram by calling Statistics::Histogram::print_histogram() |
102
|
|
|
|
|
|
|
# we first convert our hist to stathash format |
103
|
|
|
|
|
|
|
sub histogram_stringify { |
104
|
3
|
|
|
3
|
0
|
9
|
my ($self, @opts) = @_; |
105
|
3
|
50
|
|
|
|
19
|
if( $self->{'histo'}->{'needs-recalculate'} == 1 ){ $self->_histogram_recalculate(@opts) } |
|
3
|
|
|
|
|
11
|
|
106
|
3
|
|
|
|
|
90
|
return $self->{'histo'}->{'stringified'} |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
# we need to recalculate each time a new data is added. |
109
|
|
|
|
|
|
|
# but we do recalculate whenever it is needed, i.e. when we asked to print histogram |
110
|
|
|
|
|
|
|
sub _histogram_recalculate { |
111
|
3
|
|
|
3
|
|
7
|
my ($self, @stringify_opts) = @_; |
112
|
3
|
|
|
|
|
6
|
my $histstr = ""; |
113
|
3
|
100
|
|
|
|
18
|
if( $self->{'histo'}->{'num-bins'} > 0 ){ |
114
|
|
|
|
|
|
|
Try::Tiny::try { |
115
|
|
|
|
|
|
|
$histstr = Statistics::Histogram::print_histogram( |
116
|
|
|
|
|
|
|
'hist' => $self->_bins2stathash(), |
117
|
2
|
|
|
2
|
|
120
|
'x_min' => $self->{'histo'}->{'bins'}->{'b'}->[0], |
118
|
|
|
|
|
|
|
use_linear_axes => 1, |
119
|
|
|
|
|
|
|
@stringify_opts |
120
|
|
|
|
|
|
|
) |
121
|
|
|
|
|
|
|
} Try::Tiny::catch { |
122
|
0
|
|
|
0
|
|
0
|
print STDERR "_histogram_recalculate() : error caught trying to stringify: $_\n"; |
123
|
0
|
|
|
|
|
0
|
$histstr = ""; |
124
|
2
|
|
|
|
|
22
|
}; |
125
|
|
|
|
|
|
|
} |
126
|
3
|
|
|
|
|
546
|
$self->{'histo'}->{'stringified'} = $histstr; |
127
|
3
|
|
|
|
|
9
|
$self->{'histo'}->{'needs-recalculate'} = 0; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
# disable histogram logging, all existing histogram data is erased |
130
|
|
|
|
|
|
|
sub histogram_disable { |
131
|
0
|
|
|
0
|
1
|
0
|
my $self = $_[0]; |
132
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
0
|
$self->{'num-bins'} = -1; |
134
|
0
|
|
|
|
|
0
|
$self->{'bins'}->{'b'} = []; |
135
|
0
|
|
|
|
|
0
|
$self->{'bins'}->{'c'} = []; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
# returns the count in bin specified as 1st input param |
138
|
0
|
|
|
0
|
1
|
0
|
sub histogram_count { return $_[0]->{'histo'}->{'c'}->[$_[1]] } |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# enables histogram logging |
141
|
|
|
|
|
|
|
# it expects some parameters for creating the histogram in various forms, |
142
|
|
|
|
|
|
|
# e.g. by specifying the number of bins, bin-width and left boundary or |
143
|
|
|
|
|
|
|
# by specifying a HASH or ARRAY of bin specifications for non-uniform bin |
144
|
|
|
|
|
|
|
# sizes. HASH must be of the form 'FROM:TO'->counts |
145
|
|
|
|
|
|
|
# ARRAY of bin boundaries of the form |
146
|
|
|
|
|
|
|
# [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary] |
147
|
|
|
|
|
|
|
# the number of bins is 1 less than the length of this array |
148
|
|
|
|
|
|
|
sub histogram_enable { |
149
|
7
|
|
|
7
|
1
|
48
|
my $self = $_[0]; |
150
|
7
|
|
|
|
|
9
|
my $params = $_[1]; # $_[1] // {} does not work for perl<5.10, ? : requests $_[1] twice, so Cish if( ! defined below... |
151
|
|
|
|
|
|
|
|
152
|
7
|
50
|
|
|
|
17
|
if( ! defined($params) ){ $params = {} } |
|
0
|
|
|
|
|
0
|
|
153
|
|
|
|
|
|
|
|
154
|
7
|
|
|
|
|
14
|
my ($m1, $m2, $m3); |
155
|
7
|
50
|
33
|
|
|
47
|
if( defined($m1=$params->{'bins'}) ){ |
|
|
50
|
33
|
|
|
|
|
156
|
0
|
|
|
|
|
0
|
my $aref = ref($m1); |
157
|
0
|
0
|
|
|
|
0
|
if( $aref eq 'ARRAY' ){ |
|
|
0
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# an array of bin boundaries of the form |
159
|
|
|
|
|
|
|
# [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary] |
160
|
|
|
|
|
|
|
# the number of bins is 1 less than the length of this array |
161
|
0
|
|
|
|
|
0
|
my @mm = @$m1; |
162
|
0
|
|
|
|
|
0
|
$self->{'histo'}->{'num-bins'} = scalar(@mm)-1; |
163
|
0
|
|
|
|
|
0
|
$self->{'histo'}->{'bins'}->{'b'} = [@mm]; |
164
|
0
|
|
|
|
|
0
|
$self->{'histo'}->{'bins'}->{'c'} = (0) x $self->{'histo'}->{'num-bins'}; |
165
|
|
|
|
|
|
|
} elsif( $aref eq 'HASH' ){ |
166
|
|
|
|
|
|
|
# a hashref keyed on bin-intervals in the form FROM:TO->counts |
167
|
0
|
|
|
|
|
0
|
$self->_hash2bins($m1); |
168
|
0
|
|
|
|
|
0
|
} else { die "parameter 'bins' expects either a HASHREF keyed on bin-intervals in the form FROM:TO->counts (and counts can be non-zero if that is a previous histogram), or an ARRAYREF with bin boundaries of the form [histo-left-boundary, bin1_right_boundary, bin2_right_boundary, ... binN-1_right_boundary, histo-right-boundary]. In this case the number of bins is 1 less than the length of the array." } |
169
|
|
|
|
|
|
|
} elsif( defined($m1=$params->{'bin-width'}) |
170
|
|
|
|
|
|
|
&& defined($m2=$params->{'num-bins'}) |
171
|
|
|
|
|
|
|
&& defined($m3=$params->{'left-boundary'}) |
172
|
|
|
|
|
|
|
){ |
173
|
|
|
|
|
|
|
# we re-create our own bins based on num-bins etc. |
174
|
7
|
|
|
|
|
15
|
$self->_histogram_create_bins_from_spec($m1, $m2, $m3) |
175
|
|
|
|
|
|
|
} else { |
176
|
|
|
|
|
|
|
# no params, set all counts OF ALREADY EXISTING histogram to zero |
177
|
0
|
|
|
|
|
0
|
print STDERR "enable_histogram() : failed to enable histogram because no histogram specification was supplied. Try enable_histogram({bin-width=>1, nun-bins=>10, left-boundary=>-5});\n"; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
# set existing histogram to zero counts |
181
|
|
|
|
|
|
|
sub histogram_reset { |
182
|
18
|
|
|
18
|
1
|
28
|
my $self = $_[0]; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# no params, set all counts OF ALREADY EXISTING histogram to zero |
185
|
18
|
|
|
|
|
51
|
my $m1 = $self->{'histo'}->{'bins'}->{'c'}; |
186
|
18
|
|
|
|
|
65
|
for(my $i=$self->{'histo'}->{'num-bins'};$i-->0;){ $m1->[$i] = 0 } |
|
0
|
|
|
|
|
0
|
|
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
# push Data: a sample and process/update mean and all other stat measures |
189
|
|
|
|
|
|
|
# also insert it in histogram |
190
|
|
|
|
|
|
|
sub add { |
191
|
756
|
|
|
756
|
1
|
2022
|
my $self = $_[0]; |
192
|
756
|
|
|
|
|
944
|
my $x = $_[1]; |
193
|
|
|
|
|
|
|
|
194
|
756
|
|
|
|
|
1058
|
my $aref = ref($x); |
195
|
|
|
|
|
|
|
|
196
|
756
|
100
|
|
|
|
1199
|
if( $aref eq '' ){ |
|
|
50
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# a scalar input |
198
|
753
|
|
|
|
|
1017
|
my ($delta, $delta_n, $delta_n2, $term1); |
199
|
753
|
|
|
|
|
1040
|
my $n1 = $self->{'N'}; |
200
|
753
|
100
|
|
|
|
1135
|
if( $n1 == 0 ){ $self->{'MIN'} = $self->{'MAX'} = $x } |
|
10
|
|
|
|
|
21
|
|
201
|
|
|
|
|
|
|
else { |
202
|
743
|
100
|
|
|
|
1284
|
if( $x < $self->{'MIN'} ){ $self->{'MIN'} = $x } |
|
17
|
|
|
|
|
37
|
|
203
|
743
|
100
|
|
|
|
1267
|
if( $x > $self->{'MAX'} ){ $self->{'MAX'} = $x } |
|
165
|
|
|
|
|
232
|
|
204
|
|
|
|
|
|
|
} |
205
|
753
|
|
|
|
|
982
|
$self->{'N'} += 1; # increment sample size push in |
206
|
753
|
|
|
|
|
1030
|
my $n0 = $self->{'N'}; |
207
|
|
|
|
|
|
|
|
208
|
753
|
|
|
|
|
1044
|
$delta = $x - $self->{'M1'}; |
209
|
753
|
|
|
|
|
1018
|
$delta_n = $delta / $n0; |
210
|
753
|
|
|
|
|
997
|
$delta_n2 = $delta_n * $delta_n; |
211
|
753
|
|
|
|
|
1026
|
$term1 = $delta * $delta_n * $n1; |
212
|
753
|
|
|
|
|
1009
|
$self->{'M1'} += $delta_n; |
213
|
|
|
|
|
|
|
$self->{'M4'} += $term1 * $delta_n2 * ($n0*$n0 - 3*$n0 + 3) |
214
|
|
|
|
|
|
|
+ 6 * $delta_n2 * $self->{'M2'} |
215
|
753
|
|
|
|
|
1578
|
- 4 * $delta_n * $self->{'M3'} |
216
|
|
|
|
|
|
|
; |
217
|
|
|
|
|
|
|
$self->{'M3'} += $term1 * $delta_n * ($n0 - 2) |
218
|
753
|
|
|
|
|
1277
|
- 3 * $delta_n * $self->{'M2'} |
219
|
|
|
|
|
|
|
; |
220
|
753
|
|
|
|
|
1014
|
$self->{'M2'} += $term1; |
221
|
|
|
|
|
|
|
# add data point to the internal histogram |
222
|
753
|
|
|
|
|
1258
|
$self->_histogram_add($x); |
223
|
|
|
|
|
|
|
} elsif( $aref eq 'ARRAY' ){ |
224
|
|
|
|
|
|
|
# an array input |
225
|
3
|
|
|
|
|
9
|
foreach (@$x){ $self->add($_) } |
|
302
|
|
|
|
|
470
|
|
226
|
|
|
|
|
|
|
} else { |
227
|
0
|
|
|
|
|
0
|
die "add(): only ARRAY and SCALAR can be handled (input was type '$aref')." |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
# copies input(=src) Running obj into current/self overwriting our data, this is not a clone()! |
231
|
|
|
|
|
|
|
sub copy_from { |
232
|
1
|
|
|
1
|
1
|
5
|
my $self = $_[0]; |
233
|
1
|
|
|
|
|
2
|
my $src = $_[1]; |
234
|
1
|
|
|
|
|
3
|
$self->{'M1'} = $src->M1(); |
235
|
1
|
|
|
|
|
2
|
$self->{'M2'} = $src->M2(); |
236
|
1
|
|
|
|
|
3
|
$self->{'M3'} = $src->M3(); |
237
|
1
|
|
|
|
|
2
|
$self->{'M4'} = $src->M4(); |
238
|
1
|
|
|
|
|
7
|
$self->set_N($src->get_N()); |
239
|
1
|
|
|
|
|
14
|
$self->_histogram_copy_from($src); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
# clones current obj into a new Running obj with same values |
242
|
|
|
|
|
|
|
sub clone { |
243
|
1
|
|
|
1
|
1
|
3
|
my $self = $_[0]; |
244
|
1
|
|
|
|
|
3
|
my $newO = Statistics::Running->new(); |
245
|
1
|
|
|
|
|
5
|
$newO->{'M1'} = $self->M1(); |
246
|
1
|
|
|
|
|
3
|
$newO->{'M2'} = $self->M2(); |
247
|
1
|
|
|
|
|
3
|
$newO->{'M3'} = $self->M3(); |
248
|
1
|
|
|
|
|
4
|
$newO->{'M4'} = $self->M4(); |
249
|
1
|
|
|
|
|
3
|
$newO->set_N($self->get_N()); |
250
|
1
|
|
|
|
|
3
|
return $newO |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
# clears all data entered/calculated including histogram |
253
|
|
|
|
|
|
|
sub clear { |
254
|
18
|
|
|
18
|
1
|
29
|
my $self = $_[0]; |
255
|
18
|
|
|
|
|
94
|
$self->{'M1'} = 0.0; |
256
|
18
|
|
|
|
|
31
|
$self->{'M2'} = 0.0; |
257
|
18
|
|
|
|
|
27
|
$self->{'M3'} = 0.0; |
258
|
18
|
|
|
|
|
29
|
$self->{'M4'} = 0.0; |
259
|
18
|
|
|
|
|
54
|
$self->{'N'} = 0; |
260
|
18
|
|
|
|
|
45
|
$self->histogram_reset(); |
261
|
|
|
|
|
|
|
} |
262
|
4
|
|
|
4
|
1
|
20
|
sub min { return $_[0]->{'MIN'} } |
263
|
4
|
|
|
4
|
1
|
16
|
sub max { return $_[0]->{'MAX'} } |
264
|
|
|
|
|
|
|
# get number of total elements entered so far |
265
|
22
|
|
|
22
|
1
|
68
|
sub get_N { return $_[0]->{'N'} } |
266
|
|
|
|
|
|
|
sub variance { |
267
|
4
|
|
|
4
|
1
|
8
|
my $self = $_[0]; |
268
|
4
|
|
|
|
|
8
|
my $m = $self->{'N'}; |
269
|
4
|
50
|
|
|
|
12
|
if( $m == 1 ){ return 0 } |
|
0
|
|
|
|
|
0
|
|
270
|
4
|
|
|
|
|
28
|
return $self->{'M2'}/($m-1.0) |
271
|
|
|
|
|
|
|
} |
272
|
4
|
|
|
4
|
1
|
13
|
sub standard_deviation { return sqrt($_[0]->variance()) } |
273
|
|
|
|
|
|
|
sub skewness { |
274
|
3
|
|
|
3
|
1
|
6
|
my $self = $_[0]; |
275
|
3
|
|
|
|
|
6
|
my $m = $self->{'M2'}; |
276
|
3
|
100
|
|
|
|
18
|
if( $m == 0 ){ return 0 } |
|
1
|
|
|
|
|
4
|
|
277
|
|
|
|
|
|
|
return sqrt($self->{'N'}) |
278
|
2
|
|
|
|
|
22
|
* $self->{'M3'} / ($m ** 1.5) |
279
|
|
|
|
|
|
|
; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
sub kurtosis { |
282
|
4
|
|
|
4
|
1
|
8
|
my $self = $_[0]; |
283
|
4
|
|
|
|
|
10
|
my $m = $self->{'M2'}; |
284
|
4
|
100
|
|
|
|
13
|
if( $m == 0 ){ return 0 } |
|
2
|
|
|
|
|
10
|
|
285
|
|
|
|
|
|
|
return $self->{'N'} |
286
|
2
|
|
|
|
|
13
|
* $self->{'M4'} |
287
|
|
|
|
|
|
|
/ ($m * $m) |
288
|
|
|
|
|
|
|
- 3.0 |
289
|
|
|
|
|
|
|
; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
# concatenates another Running obj with current |
292
|
|
|
|
|
|
|
# AND returns a new Running obj with concatenated stats |
293
|
|
|
|
|
|
|
# Current object is not modified. |
294
|
|
|
|
|
|
|
sub concatenate { |
295
|
4
|
|
|
4
|
1
|
24
|
my $self = $_[0]; # us |
296
|
4
|
|
|
|
|
7
|
my $other = $_[1]; # another Running obj |
297
|
|
|
|
|
|
|
|
298
|
4
|
|
|
|
|
17
|
my $combined = Statistics::Running->new(); |
299
|
|
|
|
|
|
|
|
300
|
4
|
|
|
|
|
12
|
my $selfN = $self->get_N(); |
301
|
4
|
|
|
|
|
12
|
my $otherN = $other->get_N(); |
302
|
4
|
|
|
|
|
13
|
my $selfM2 = $self->M2(); |
303
|
4
|
|
|
|
|
29
|
my $otherM2 = $other->M2(); |
304
|
4
|
|
|
|
|
23
|
my $selfM3 = $self->M3(); |
305
|
4
|
|
|
|
|
10
|
my $otherM3 = $other->M3(); |
306
|
|
|
|
|
|
|
|
307
|
4
|
|
|
|
|
7
|
my $combN = $selfN + $otherN; |
308
|
4
|
|
|
|
|
14
|
$combined->set_N($combN); |
309
|
|
|
|
|
|
|
|
310
|
4
|
|
|
|
|
25
|
my $delta = $other->M1() - $self->M1(); |
311
|
4
|
|
|
|
|
8
|
my $delta2 = $delta*$delta; |
312
|
4
|
|
|
|
|
9
|
my $delta3 = $delta*$delta2; |
313
|
4
|
|
|
|
|
6
|
my $delta4 = $delta2*$delta2; |
314
|
|
|
|
|
|
|
|
315
|
4
|
|
|
|
|
11
|
$combined->{'M1'} = ($selfN*$self->M1() + $otherN*$other->M1()) / $combN; |
316
|
|
|
|
|
|
|
|
317
|
4
|
|
|
|
|
14
|
$combined->{'M2'} = $selfM2 + $otherM2 + |
318
|
|
|
|
|
|
|
$delta2 * $selfN * $otherN / $combN; |
319
|
|
|
|
|
|
|
|
320
|
4
|
|
|
|
|
14
|
$combined->{'M3'} = $selfM3 + $otherM3 + |
321
|
|
|
|
|
|
|
$delta3 * $selfN * $otherN * ($selfN - $otherN)/($combN*$combN) + |
322
|
|
|
|
|
|
|
3.0*$delta * ($selfN*$otherM2 - $otherN*$selfM2) / $combN |
323
|
|
|
|
|
|
|
; |
324
|
|
|
|
|
|
|
|
325
|
4
|
|
|
|
|
22
|
$combined->{'M4'} = $self->{'M4'} + $other->{'M4'} |
326
|
|
|
|
|
|
|
+ $delta4*$selfN*$otherN * ($selfN*$selfN - $selfN*$otherN + $otherN*$otherN) / |
327
|
|
|
|
|
|
|
($combN*$combN*$combN) |
328
|
|
|
|
|
|
|
+ 6.0*$delta2 * ($selfN*$selfN*$otherM2 + $otherN*$otherN*$selfM2)/($combN*$combN) + |
329
|
|
|
|
|
|
|
4.0*$delta*($selfN*$otherM3 - $otherN*$selfM3) / $combN |
330
|
|
|
|
|
|
|
; |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# add the histograms only if structure matches: |
333
|
4
|
100
|
|
|
|
13
|
if( $self->_equals_histograms_structure($other) ){ |
334
|
1
|
|
|
|
|
4
|
$combined->_histogram_copy_from($self); |
335
|
1
|
|
|
|
|
8
|
$combined->_add_histograms($other); |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
4
|
|
|
|
|
20
|
return $combined; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
# appends another Running obj INTO current |
341
|
|
|
|
|
|
|
# histogram data is appended only if histogram specs are the same |
342
|
|
|
|
|
|
|
# current obj (self) IS MODIFIED |
343
|
|
|
|
|
|
|
sub append { |
344
|
0
|
|
|
0
|
1
|
0
|
my $self = $_[0]; # us |
345
|
0
|
|
|
|
|
0
|
my $other = $_[1]; # another Running obj |
346
|
0
|
|
|
|
|
0
|
$self->copy_from($self+$other); |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
# equality only wrt to stats BUT NOT histogram |
349
|
|
|
|
|
|
|
sub equals { |
350
|
4
|
|
|
4
|
1
|
16
|
my $self = $_[0]; # us |
351
|
4
|
|
|
|
|
5
|
my $other = $_[1]; # another Running obj |
352
|
|
|
|
|
|
|
return |
353
|
4
|
|
66
|
|
|
8
|
$self->get_N() == $other->get_N() && |
354
|
|
|
|
|
|
|
$self->equals_statistics($other) |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
sub equals_statistics { |
357
|
5
|
|
|
5
|
1
|
15
|
my $self = $_[0]; # us |
358
|
5
|
|
|
|
|
10
|
my $other = $_[1]; # another Running obj |
359
|
|
|
|
|
|
|
return |
360
|
5
|
|
33
|
|
|
8
|
abs($self->M1()-$other->M1()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY && |
361
|
|
|
|
|
|
|
abs($self->M2()-$other->M2()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY && |
362
|
|
|
|
|
|
|
abs($self->M3()-$other->M3()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY && |
363
|
|
|
|
|
|
|
abs($self->M4()-$other->M4()) < Statistics::Running::SMALL_NUMBER_FOR_EQUALITY |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
# checks if structure is same and then if bin contents (counts) are same |
366
|
|
|
|
|
|
|
# returns 1 if equals |
367
|
|
|
|
|
|
|
# returns 0 if either structure or counts are not the same |
368
|
|
|
|
|
|
|
sub equals_histograms { |
369
|
2
|
|
|
2
|
1
|
12
|
my $self = $_[0]; # us |
370
|
2
|
|
|
|
|
4
|
my $other = $_[1]; # another Running obj |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# structure is not the same |
373
|
2
|
50
|
|
|
|
7
|
if( $self->_equals_histograms_structure($other) == 0 ){ return 0 } |
|
0
|
|
|
|
|
0
|
|
374
|
|
|
|
|
|
|
|
375
|
2
|
|
|
|
|
6
|
my $selfC = $self->{'histo'}->{'bins'}->{'c'}; |
376
|
2
|
|
|
|
|
4
|
my $otherC = $other->{'histo'}->{'bins'}->{'c'}; |
377
|
2
|
|
|
|
|
4
|
my $i; |
378
|
2
|
|
|
|
|
5
|
for($i=$self->{'histo'}->{'num-bins'};$i-->0;){ |
379
|
10
|
50
|
|
|
|
23
|
if( $selfC->[$i] != $otherC->[$i] ){ return 0 } |
|
0
|
|
|
|
|
0
|
|
380
|
|
|
|
|
|
|
} |
381
|
2
|
|
|
|
|
10
|
return 1 # equal in structure and counts |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
# adds counts of histograms to us from other |
384
|
|
|
|
|
|
|
# returns 0 if structures do not match |
385
|
|
|
|
|
|
|
# returns 1 if counts added OK |
386
|
|
|
|
|
|
|
sub _add_histograms { |
387
|
1
|
|
|
1
|
|
2
|
my $self = $_[0]; # us |
388
|
1
|
|
|
|
|
2
|
my $other = $_[1]; # another Running obj |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
# structure is not the same |
391
|
1
|
50
|
|
|
|
8
|
if( $self->_equals_histograms_structure($other) == 0 ){ return 0 } |
|
0
|
|
|
|
|
0
|
|
392
|
|
|
|
|
|
|
|
393
|
1
|
|
|
|
|
4
|
my $selfC = $self->{'histo'}->{'bins'}->{'c'}; |
394
|
1
|
|
|
|
|
2
|
my $otherC = $other->{'histo'}->{'bins'}->{'c'}; |
395
|
1
|
|
|
|
|
2
|
my $i; |
396
|
1
|
|
|
|
|
5
|
for($i=$self->{'histo'}->{'num-bins'};$i-->0;){ |
397
|
10
|
|
|
|
|
18
|
$selfC->[$i] += $otherC->[$i]; |
398
|
|
|
|
|
|
|
} |
399
|
1
|
|
|
|
|
2
|
return 1 # counts added |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
# print object as a string, string concat/printing is overloaded on this method |
402
|
|
|
|
|
|
|
sub stringify { |
403
|
3
|
|
|
3
|
1
|
14
|
my $self = $_[0]; |
404
|
3
|
|
|
|
|
17
|
return "N: ".$self->get_N() |
405
|
|
|
|
|
|
|
.", mean: ".$self->mean() |
406
|
|
|
|
|
|
|
.", range: ".$self->min()." to ".$self->max() |
407
|
|
|
|
|
|
|
.", standard deviation: ".$self->standard_deviation() |
408
|
|
|
|
|
|
|
.", kurtosis: ".$self->kurtosis() |
409
|
|
|
|
|
|
|
.", skewness: ".$self->skewness() |
410
|
|
|
|
|
|
|
.", histogram:\n".$self->histogram_stringify() |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
# internal methods, no need for anyone to know or use externally |
413
|
6
|
|
|
6
|
0
|
30
|
sub set_N { $_[0]->{'N'} = $_[1] } |
414
|
28
|
|
|
28
|
0
|
112
|
sub M1 { return $_[0]->{'M1'} } |
415
|
20
|
|
|
20
|
0
|
63
|
sub M2 { return $_[0]->{'M2'} } |
416
|
20
|
|
|
20
|
0
|
51
|
sub M3 { return $_[0]->{'M3'} } |
417
|
12
|
|
|
12
|
0
|
46
|
sub M4 { return $_[0]->{'M4'} } |
418
|
|
|
|
|
|
|
# copy src's histogram to us, erasing previous data and histo-format |
419
|
|
|
|
|
|
|
sub _histogram_copy_from { |
420
|
2
|
|
|
2
|
|
6
|
my $self = $_[0]; |
421
|
2
|
|
|
|
|
5
|
my $src = $_[1]; # a src stats object whose histogram we are copying onto us |
422
|
2
|
|
|
|
|
6
|
$self->histogram_bins_hash($src->histogram_bins_hash()); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
# given bin-width, num-bins and left-boundary create the bin arrays |
425
|
|
|
|
|
|
|
sub _histogram_create_bins_from_spec { |
426
|
7
|
|
|
7
|
|
19
|
my ($self, $bw, $nb, $lb) = @_; |
427
|
|
|
|
|
|
|
|
428
|
7
|
|
|
|
|
12
|
$self->{'histo'}->{'num-bins'} = $nb; |
429
|
7
|
|
|
|
|
25
|
my @B = (0)x($nb+1); |
430
|
7
|
|
|
|
|
10
|
my ($i); |
431
|
7
|
|
|
|
|
10
|
my $v = $lb; |
432
|
7
|
|
|
|
|
18
|
for($i=0;$i<=$nb;$i++){ |
433
|
63
|
|
|
|
|
82
|
$B[$i] = $v; |
434
|
63
|
|
|
|
|
101
|
$v += $bw; |
435
|
|
|
|
|
|
|
} |
436
|
7
|
|
|
|
|
16
|
$self->{'histo'}->{'bins'}->{'b'} = \@B; |
437
|
7
|
|
|
|
|
27
|
$self->{'histo'}->{'bins'}->{'c'} = [(0)x$nb]; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
# add a datapoint to the histogram, this is usually called only via the public add() |
440
|
|
|
|
|
|
|
sub _histogram_add { |
441
|
753
|
|
|
753
|
|
1046
|
my $self = $_[0]; |
442
|
753
|
|
|
|
|
949
|
my $x = $_[1]; # value to add |
443
|
753
|
|
|
|
|
965
|
my ($n, $i); |
444
|
753
|
100
|
|
|
|
1460
|
if( ($n=$self->{'histo'}->{'num-bins'}) <= 0 ){ return } |
|
502
|
|
|
|
|
968
|
|
445
|
251
|
|
|
|
|
431
|
my $B = $self->{'histo'}->{'bins'}->{'b'}; |
446
|
251
|
|
|
|
|
473
|
for($i=0;$i<$n;$i++){ |
447
|
1090
|
100
|
100
|
|
|
3234
|
if( ($x > $B->[$i]) && ($x <= $B->[$i+1]) ){ |
448
|
146
|
|
|
|
|
557
|
$self->{'histo'}->{'bins'}->{'c'}->[$i]++; |
449
|
146
|
|
|
|
|
301
|
$self->{'histo'}->{'needs-recalculate'} = 1; # need to recalc stringify |
450
|
|
|
|
|
|
|
return |
451
|
146
|
|
|
|
|
349
|
} |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
# given the bins and bin counts arrays, return a hash in the natural form: |
455
|
|
|
|
|
|
|
# from-bin:to-bin -> count |
456
|
|
|
|
|
|
|
# see also _bins2stathash for returning a hash of the format specified in Statistics::Descriptive |
457
|
|
|
|
|
|
|
sub _bins2hash { |
458
|
6
|
|
|
6
|
|
20
|
my $self = $_[0]; |
459
|
6
|
|
|
|
|
14
|
my %ret = (); |
460
|
6
|
|
|
|
|
9
|
my $B = $self->{'histo'}->{'bins'}->{'b'}; |
461
|
6
|
|
|
|
|
12
|
my $C = $self->{'histo'}->{'bins'}->{'c'}; |
462
|
6
|
|
|
|
|
8
|
my $i; |
463
|
6
|
|
|
|
|
19
|
for($i=$self->{'histo'}->{'num-bins'};$i-->0;){ |
464
|
45
|
|
|
|
|
117
|
$ret{$B->[$i].":".$B->[$i+1]} = $C->[$i] |
465
|
|
|
|
|
|
|
} |
466
|
6
|
|
|
|
|
20
|
return \%ret |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
# given the bins and bin counts arrays, return a hash with keys |
469
|
|
|
|
|
|
|
# to-bin -> count |
470
|
|
|
|
|
|
|
# whereas count is the count of the bin specified by to-bin and its previous key of the hash |
471
|
|
|
|
|
|
|
sub _bins2stathash { |
472
|
2
|
|
|
2
|
|
4
|
my $self = $_[0]; |
473
|
2
|
|
|
|
|
5
|
my %ret = (); |
474
|
2
|
|
|
|
|
4
|
my $B = $self->{'histo'}->{'bins'}->{'b'}; |
475
|
2
|
|
|
|
|
4
|
my $C = $self->{'histo'}->{'bins'}->{'c'}; |
476
|
2
|
|
|
|
|
3
|
my $i; |
477
|
2
|
|
|
|
|
7
|
for($i=$self->{'histo'}->{'num-bins'}-1;$i-->0;){ |
478
|
18
|
|
|
|
|
57
|
$ret{$B->[$i+1]} = $C->[$i] |
479
|
|
|
|
|
|
|
} |
480
|
2
|
|
|
|
|
16
|
return \%ret |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
# given a hash with keys |
483
|
|
|
|
|
|
|
# from-bin:to-bin -> count |
484
|
|
|
|
|
|
|
# erase and re-create the bin and counts arrays of histo. |
485
|
|
|
|
|
|
|
# for a way to import Statistics::Descriptive frequency_distribution hash check _stathash2bins() |
486
|
|
|
|
|
|
|
sub _hash2bins { |
487
|
3
|
|
|
3
|
|
7
|
my $self = $_[0]; |
488
|
3
|
|
|
|
|
5
|
my $H = $_[1]; |
489
|
3
|
|
|
|
|
5
|
my @B = (); |
490
|
3
|
|
|
|
|
6
|
my @C = (); |
491
|
3
|
|
|
|
|
22
|
my @K = keys %$H; |
492
|
3
|
|
|
|
|
8
|
$self->{'histo'}->{'num-bins'} = scalar(@K); |
493
|
3
|
|
|
|
|
7
|
my ($acount, $akey); |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
my @X = map { |
496
|
15
|
|
|
|
|
37
|
push(@B, $_->[1]); # left-bin (from) |
497
|
15
|
|
|
|
|
27
|
push(@C, $H->{$_->[0]}); # counts |
498
|
15
|
|
|
|
|
48
|
$_->[2]; # spit out the right-bin (to) |
499
|
|
|
|
|
|
|
} |
500
|
30
|
|
|
|
|
49
|
sort { $a->[1] <=> $b->[1] } |
501
|
3
|
|
|
|
|
11
|
map { [ $_, split(/\:/, $_) ] } |
|
15
|
|
|
|
|
56
|
|
502
|
|
|
|
|
|
|
@K |
503
|
|
|
|
|
|
|
; |
504
|
3
|
|
|
|
|
14
|
push(@B, $X[-1]); |
505
|
3
|
|
|
|
|
8
|
$self->{'histo'}->{'bins'}->{'b'} = \@B; |
506
|
3
|
|
|
|
|
15
|
$self->{'histo'}->{'bins'}->{'c'} = \@C; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
# given a hash with keys |
509
|
|
|
|
|
|
|
# to-bin -> count |
510
|
|
|
|
|
|
|
# erase and re-create the bin and counts arrays of histo. |
511
|
|
|
|
|
|
|
# the hash is exactly what Statistics::Descriptive::frequency_distribution() returns |
512
|
|
|
|
|
|
|
# there is only one problem: what is the left-boundary? we will set it to -infinity. |
513
|
|
|
|
|
|
|
sub _stathash2bins { |
514
|
0
|
|
|
0
|
|
0
|
my $self = $_[0]; |
515
|
0
|
|
|
|
|
0
|
my $H = $_[1]; # hashref: exactly what Statistics::Descriptive::frequency_distribution() returns |
516
|
0
|
|
|
|
|
0
|
my @B = (); |
517
|
0
|
|
|
|
|
0
|
my @C = (); |
518
|
0
|
|
|
|
|
0
|
my @K = keys %$H; |
519
|
0
|
|
|
|
|
0
|
$self->{'histo'}->{'num-bins'} = scalar(@K); |
520
|
0
|
|
|
|
|
0
|
my ($acount, $akey); |
521
|
|
|
|
|
|
|
|
522
|
0
|
|
|
|
|
0
|
push(@B, -(~0 >> 1)); # -MAX_INT fuck you. |
523
|
0
|
|
|
|
|
0
|
foreach my $k (sort { $a <=> $b } keys %$H){ |
|
0
|
|
|
|
|
0
|
|
524
|
0
|
|
|
|
|
0
|
push(@B, $k); |
525
|
0
|
|
|
|
|
0
|
push(@C, $H->{$k}); |
526
|
|
|
|
|
|
|
} |
527
|
0
|
|
|
|
|
0
|
$self->{'histo'}->{'bins'}->{'b'} = \@B; |
528
|
0
|
|
|
|
|
0
|
$self->{'histo'}->{'bins'}->{'c'} = \@C; |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
# compares the structure of the histograms of us and another obj |
531
|
|
|
|
|
|
|
# if histograms have same number of bins and same bin-specs (boundaries) |
532
|
|
|
|
|
|
|
# then histograms are equal and returns 1 |
533
|
|
|
|
|
|
|
# if both histograms contain zero bins (not initialised) then also returns 1 |
534
|
|
|
|
|
|
|
# else, histogram structure differs and returns 0 |
535
|
|
|
|
|
|
|
sub _equals_histograms_structure { |
536
|
7
|
|
|
7
|
|
15
|
my ($self, $other) = @_; |
537
|
|
|
|
|
|
|
|
538
|
7
|
|
|
|
|
14
|
my $NB1 = $self->{'histo'}->{'num-bins'}; |
539
|
7
|
100
|
|
|
|
20
|
if( $NB1 != $other->{'histo'}->{'num-bins'} ){ return 0 } |
|
3
|
|
|
|
|
11
|
|
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
# no bins, so equal! |
542
|
4
|
50
|
|
|
|
9
|
if( $NB1 == -1 ){ return 1 } |
|
0
|
|
|
|
|
0
|
|
543
|
|
|
|
|
|
|
|
544
|
4
|
|
|
|
|
8
|
my $b1 = $self->{'histo'}->{'bins'}->{'b'}; |
545
|
4
|
|
|
|
|
8
|
my $b2 = $other->{'histo'}->{'bins'}->{'b'}; |
546
|
4
|
|
|
|
|
13
|
for(my $i=$NB1+1;$i-->0;){ |
547
|
34
|
50
|
|
|
|
74
|
if( $b1->[$i] != $b2->[$i] ){ return 0 } |
|
0
|
|
|
|
|
0
|
|
548
|
|
|
|
|
|
|
} |
549
|
4
|
|
|
|
|
14
|
return 1 # equal histogram STRUCTURES (not bincounts) |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
1; |
552
|
|
|
|
|
|
|
__END__ |