File Coverage

blib/lib/Statistics/DEA.pm
Criterion Covered Total %
statement 57 57 100.0
branch 15 24 62.5
condition 3 6 50.0
subroutine 12 12 100.0
pod 7 7 100.0
total 94 106 88.6


line stmt bran cond sub pod time code
1             package Statistics::DEA;
2              
3 3     3   2613 use vars qw($VERSION);
  3         5  
  3         8411  
4              
5             $VERSION = '0.04';
6              
7 3     3   25 use strict;
  3         5  
  3         374  
8             # use warnings; requires 5.6.0
9              
10 3     3   18 use Carp;
  3         9  
  3         15440  
11              
12             =head1 NAME
13              
14             Statistics::DEA - Discontiguous Exponential Averaging
15              
16             =head1 SYNOPSIS
17              
18             use Statistics::DEA;
19              
20             my $dea = Statistics::DEA->new($alpha, $max_gap);
21              
22             while (($data, $time) = some_data_source(...)) {
23             ...
24             $dea->update($data, $time);
25             print $dea->average(), "\n";
26             print $dea->standard_deviation(), "\n";
27             print $dea->completeness($time), "\n";
28             ...
29             }
30              
31             =head1 DESCRIPTION
32              
33             The Statistics::DEA module can be used to compute exponentially
34             decaying averages and standard deviations even when the data has
35             gaps. The algorithm also avoids initial value bias and postgap bias.
36              
37             =head2 new
38              
39             my $dea = Statistics::DEA->new($alpha, $max_gap);
40              
41             Creates a new (potentially discontiguous) exponential average object.
42              
43             The I<$alpha> is the exponential decay of I: from zero (inclusive)
44             to one (exclusive): the lower values cause the effect of data to decay
45             more quickly, the higher values cause the effect of data to decay more
46             slowly . Specifically, weights on older data decay exponentially with a
47             characteristic time of I<-1/log(alpha)> (I being the natural logarithm).
48              
49             The I<$max_gap> is the maximum I
50             considered lost. If the time interval between updates is I
,
51             using a I<$max_gap> of I will cause each update to fill in up to
52             I of any preceeding skipped updates with the current data value.
53             Use a I<$max_gap> of I
to prevent such filling.
54              
55             =head2 update
56              
57             $dea->update($data, $time);
58              
59             Update the average with new data at a particular point in time.
60              
61             The time parameter is how you can indicate gaps in the data;
62             if you don't have gaps in your data, just monotonously increase it,
63             for example C<$time++>.
64              
65             =head2 average
66              
67             my $avg = $dea->average();
68              
69             Return the current average.
70              
71             Functionally equivalent alias avg() is also available.
72              
73             =head2 standard_deviation
74              
75             my $std_dev = $dea->standard_deviation();
76              
77             Return the current standard deviation.
78              
79             Functionally equivalent alias std_dev() is also available.
80              
81             =head2 completeness
82              
83             my $completeness = $dea->completeness($time);
84              
85             Return the current I: how well based the current average
86             and standard deviation are on actual data. Any time intervals between
87             updates greater than I<$max_gap> reduce this value. A series of updates
88             at time intervals of less than I<$max_gap> will gradually increase this
89             value from its initial, minimum, value of 0 to its maximum value of 1.
90              
91             The $time should represent the current time. It must be >= the time of
92             the last update.
93              
94             =head2 alpha
95              
96             my $alpha = $dea->alpha();
97              
98             Return the current exponential decay of data.
99              
100             $dea->alpha($alpha);
101              
102             Set the exponential decay of data.
103              
104             =head2 max_gap
105              
106             my $alpha = $dea->max_gap();
107              
108             Return the current maximum time gap.
109              
110             $dea->max_gap($max_gap);
111              
112             Set the maximum time gap.
113              
114             =head1 AUTHOR
115              
116             Jarkko Hietaniemi
117              
118             =head1 LICENSE
119              
120             This library is free software; you can redistribute it and/or modify
121             it under the same terms as Perl itself.
122              
123             =head1 ACKNOWLEDGEMENT
124              
125             The idea and the code are from the September 1998 Doctor Dobb's
126             Journal Algorithm Alley article "Discontiguous Exponential Averaging"
127             by John C. Gunther, used with permission. JCG also provided valuable
128             feedback for the documentation -- and even fixed a bug in the code
129             without knowing Perl as such. This is just a Perlification of the
130             pseudocode in the article, all errors in transcription are solely mine.
131              
132             =cut
133              
134             sub alpha {
135 6     6 1 99 my $dea = shift;
136 6 100       24 $dea->{alpha} = shift if @_;
137 6         21 $dea->_max_weight();
138 6         28 return $dea->{alpha};
139             }
140              
141             sub max_gap {
142 6     6 1 14 my $dea = shift;
143 6 100       25 $dea->{max_gap} = shift if @_;
144 6         22 $dea->_max_weight();
145 6         15 return $dea->{max_gap};
146             }
147              
148             sub _max_weight { # Internal use only.
149 12     12   21 my $dea = shift;
150 12 100 66     75 return unless defined $dea->{alpha} && defined $dea->{max_gap};
151 9         44 $dea->{max_weight} = 1 - $dea->{alpha} ** $dea->{max_gap};
152             }
153              
154             sub new {
155 3     3 1 100 my $class = shift;
156 3 50       16 croak __PACKAGE__, "::new: need two arguments: alpha, max_gap"
157             unless @_ == 2;
158 3         9 my ($alpha, $max_gap) = @_;
159 3 50 33     35 croak __PACKAGE__, "::new: Not 0 <= alpha $alpha < 1"
160             unless 0 <= $alpha && $alpha < 1;
161 3 50       14 croak __PACKAGE__, "::new: Not max_gap $max_gap > 0"
162             unless $max_gap > 0;
163 3         12 my $dea = bless {}, $class;
164 3         22 $dea->{sum_of_weights} = 0;
165 3         8 $dea->{sum_of_data} = 0;
166 3         9 $dea->{sum_of_squared_data} = 0;
167 3         10 $dea->{previous_time} = -1e38; # about IEEE -Infinity
168 3         14 $dea->alpha($alpha);
169 3         19 $dea->max_gap($max_gap);
170 3         10 return $dea;
171             }
172              
173             sub update {
174 3     3 1 18 my ($dea, $new_data, $time) = @_;
175 3 50       12 croak __PACKAGE__, "::update: Not previous_time $dea->{previous_time} < time $time"
176             unless $dea->{previous_time} < $time;
177 3         28 my $weight_reduction_factor =
178             $dea->{alpha} ** ($time - $dea->{previous_time});
179 3         7 my $new_data_weight_a = 1 - $weight_reduction_factor;
180 3         6 my $new_data_weight_b = $dea->{max_weight};
181 3 50       11 my $new_data_weight =
182             $new_data_weight_a < $new_data_weight_b ?
183             $new_data_weight_a : $new_data_weight_b;
184 3         9 $dea->{sum_of_weights} =
185             $weight_reduction_factor * $dea->{sum_of_weights} +
186             $new_data_weight;
187 3         11 $dea->{sum_of_data} =
188             $weight_reduction_factor * $dea->{sum_of_data} +
189             $new_data_weight * $new_data;
190 3         14 $dea->{sum_of_squared_data} =
191             $weight_reduction_factor * $dea->{sum_of_squared_data} +
192             $new_data_weight * $new_data * $new_data;
193 3         10 $dea->{previous_time} = $time;
194             }
195              
196             sub _average { # Internal use only.
197 4     4   8 my $dea = shift;
198 4         14 return $dea->{sum_of_data} / $dea->{sum_of_weights};
199             }
200              
201             sub average {
202 2     2 1 4 my $dea = shift;
203 2 50       26 croak __PACKAGE__, "::average: Not sum_of_weights > 0"
204             unless $dea->{sum_of_weights} > 0;
205 2         8 return $dea->_average();
206             }
207              
208             *avg = \&average;
209              
210             sub standard_deviation {
211 2     2 1 45 my $dea = shift;
212 2 50       10 croak __PACKAGE__, "::standard_deviation: Not sum_of_weights > 0"
213             unless $dea->{sum_of_weights} > 0;
214 2         6 my $average = $dea->_average();
215 2         24 return sqrt($dea->{sum_of_squared_data} / $dea->{sum_of_weights} -
216             $average * $average);
217             }
218              
219             *std_dev = \&standard_deviation;
220              
221             sub completeness {
222 2     2 1 5 my $dea = shift;
223 2 50       9 croak __PACKAGE__, "::completeness: need one argument: time"
224             unless @_ == 1;
225 2         4 my $time = shift;
226 2 50       8 croak __PACKAGE__, "::completeness: Not previous_time $dea->{previous_time} <= time $time"
227             unless $dea->{previous_time} <= $time;
228             return
229 2         14 $dea->{alpha} ** ($time - $dea->{previous_time}) *
230             $dea->{sum_of_weights};
231             }
232              
233             1;