blib/lib/Statistics/Sequences/Turns.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 63 | 68 | 92.6 |
branch | 26 | 36 | 72.2 |
condition | 11 | 14 | 78.5 |
subroutine | 15 | 16 | 93.7 |
pod | 9 | 9 | 100.0 |
total | 124 | 143 | 86.7 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Statistics::Sequences::Turns; | ||||||
2 | 2 | 2 | 27270 | use 5.008008; | |||
2 | 5 | ||||||
3 | 2 | 2 | 8 | use strict; | |||
2 | 2 | ||||||
2 | 36 | ||||||
4 | 2 | 2 | 7 | use warnings; | |||
2 | 9 | ||||||
2 | 50 | ||||||
5 | 2 | 2 | 6 | use Carp 'croak'; | |||
2 | 2 | ||||||
2 | 112 | ||||||
6 | 2 | 2 | 6 | use base qw(Statistics::Sequences); | |||
2 | 3 | ||||||
2 | 873 | ||||||
7 | $Statistics::Sequences::Turns::VERSION = '0.13'; | ||||||
8 | 2 | 2 | 56200 | use Statistics::Zed 0.10; | |||
2 | 12481 | ||||||
2 | 1229 | ||||||
9 | |||||||
10 | =pod | ||||||
11 | |||||||
12 | =head1 NAME | ||||||
13 | |||||||
14 | Statistics::Sequences::Turns - Kendall's turning-points test - of peaks and troughs in a numerical sequence | ||||||
15 | |||||||
16 | =head1 VERSION | ||||||
17 | |||||||
18 | This is documentation for B |
||||||
19 | |||||||
20 | =head1 SYNOPSIS | ||||||
21 | |||||||
22 | use strict; | ||||||
23 | use Statistics::Sequences::Turns 0.13; | ||||||
24 | my $turns = Statistics::Sequences::Turns->new(); | ||||||
25 | $turns->load([2, 0, 8.5, 5, 3, 5.01, 2, 2, 3]); # numbers; or give as "data => $aref" with each stat call | ||||||
26 | my $val = $turns->observed(); # or descriptive methods: expected(), variance(), obsdev() and stdev() | ||||||
27 | $val = $turns->z_value(); # # or in list context get both z- and p-value | ||||||
28 | $val = $turns->p_value(); # as above, assume data are loaded | ||||||
29 | my $href = $turns->stats_hash(values => [qw/observed p_value/], ccorr => 1); # incl. any other stat-method | ||||||
30 | $turns->dump(values => [qw/observed expected p_value/], ccorr => 1, flag => 1, precision_s => 3, precision_p => 7); | ||||||
31 | # prints: observed = 11.000, expected = 10.900, p_value = 0.5700167 | ||||||
32 | |||||||
33 | =head1 DESCRIPTION | ||||||
34 | |||||||
35 | Implements Kendall's (1973) "turning point test" of sudden changes as peaks and troughs in the values of a numerical sequence. It is sometimes described as a test of "cyclicity", and often used as a test of randomness. Kendall (1973) introduced this as a test of ups and downs relative to linear progressions in a sequence (ahead of describing tests based on autocorrelation and Fourier analysis). | ||||||
36 | |||||||
37 | Specifically, for a sequence of numerical data (interval or ordinal) of size I |
||||||
38 | |||||||
39 | With these local fluctuations tested regardless of their spacing and magnitude, the test does not indicate if the changes actually cycle between highs and lows, if they are more or less balanced in magnitude, or if any cycling is periodic; only if oscillation in general is more common than linear progression. | ||||||
40 | |||||||
41 | =head1 METHODS | ||||||
42 | |||||||
43 | =head2 new | ||||||
44 | |||||||
45 | $turns = Statistics::Sequences::Turns->new(); | ||||||
46 | |||||||
47 | Returns a new Turns object. Expects/accepts no arguments but the classname. | ||||||
48 | |||||||
49 | =head2 load | ||||||
50 | |||||||
51 | $turns->load(@data); | ||||||
52 | $turns->load(\@data); | ||||||
53 | $turns->load('foodat' => \@data); # labelled whatever | ||||||
54 | |||||||
55 | Loads data anonymously or by name - see L |
||||||
56 | |||||||
57 | =cut | ||||||
58 | |||||||
59 | sub load { | ||||||
60 | 2 | 2 | 1 | 262 | my $self = shift; | ||
61 | 2 | 10 | $self->SUPER::load(@_); | ||||
62 | 2 | 50 | 292 | croak __PACKAGE__, '::load All data must be numerical for turns statistics' | |||
63 | if !$self->all_numeric( $self->access( index => -1 ) ); | ||||||
64 | 2 | 315 | return 1; | ||||
65 | } | ||||||
66 | |||||||
67 | =head2 add, read, unload | ||||||
68 | |||||||
69 | See L |
||||||
70 | |||||||
71 | =head2 observed | ||||||
72 | |||||||
73 | $v = $turns->observed(); # use anonymously loaded data | ||||||
74 | $v = $turns->observed(name => 'mysequence'); # ... or by "name" given on loading | ||||||
75 | $v = $turns->observed(data => \@data); # ... or just give the data now | ||||||
76 | |||||||
77 | Returns observed number of turns. This is the number of peaks and troughs, starting the count from index 1 of the sequence (a flat array), checking if both its immediate left/right (or past/future) neighbours are lesser than it (a peak) or greater than it (a trough). Wherever the values in successive indices in the sequence are equal, they are treated as a single observation/datum - so the following: | ||||||
78 | |||||||
79 | 0 0 1 1 0 1 1 1 0 1 | ||||||
80 | |||||||
81 | is counted up for turns as | ||||||
82 | |||||||
83 | 0 1 0 1 0 1 | ||||||
84 | * * * * | ||||||
85 | |||||||
86 | This shows four turns - two peaks (0 1 0) and two troughs (1 0 1). | ||||||
87 | |||||||
88 | Returns 0 if the given list of is empty, or the number of its elements is less than 3. | ||||||
89 | |||||||
90 | =cut | ||||||
91 | |||||||
92 | sub observed { | ||||||
93 | 8 | 8 | 1 | 319 | my $self = shift; | ||
94 | 8 | 100 | 17 | my $args = ref $_[0] ? shift : {@_}; | |||
95 | 8 | 16 | my $data = _set_data( $self, $args ); | ||||
96 | 8 | 5 | my $trials = scalar @{$data}; | ||||
8 | 10 | ||||||
97 | 8 | 50 | 33 | 25 | return 0 if not $trials or $trials < 3; | ||
98 | 8 | 11 | my ( $count, $i ) = (0); | ||||
99 | 8 | 17 | for ( $i = 1 ; $i < $trials - 1 ; $i++ ) { | ||||
100 | 276 | 100 | 100 | 909 | if ( ( $data->[ $i - 1 ] > $data->[$i] ) | ||
100 | 100 | ||||||
101 | && ( $data->[ $i + 1 ] > $data->[$i] ) ) | ||||||
102 | { # trough at $i | ||||||
103 | 88 | 104 | $count++; | ||||
104 | } | ||||||
105 | elsif (( $data->[ $i - 1 ] < $data->[$i] ) | ||||||
106 | && ( $data->[ $i + 1 ] < $data->[$i] ) ) | ||||||
107 | { # peak at $i | ||||||
108 | 88 | 120 | $count++; | ||||
109 | } | ||||||
110 | } | ||||||
111 | 8 | 27 | return $count; | ||||
112 | } | ||||||
113 | |||||||
114 | =head2 expected | ||||||
115 | |||||||
116 | $v = $turns->expected(); # use loaded data; or specify by "name" | ||||||
117 | $v = $turns->expected(data => \@data); # use these data | ||||||
118 | $v = $turns->expected(trials => POS_INT); # don't use actual data; calculate from this number of trials | ||||||
119 | |||||||
120 | Returns the expected number of turns, which is set by I |
||||||
121 | |||||||
122 | =for html E[T] = 2 / 3 (N – 2) |
||||||
123 | |||||||
124 | or, equivalently (in some sources), | ||||||
125 | |||||||
126 | =for html E[T] = ( 2N – 4 ) / 3 |
||||||
127 | |||||||
128 | =cut | ||||||
129 | |||||||
130 | sub expected { | ||||||
131 | 7 | 7 | 1 | 398 | my $self = shift; | ||
132 | 7 | 50 | 19 | my $args = ref $_[0] ? shift : {@_}; | |||
133 | my $trials = | ||||||
134 | defined $args->{'trials'} | ||||||
135 | ? $args->{'trials'} | ||||||
136 | 7 | 100 | 13 | : scalar( @{ _set_data( $self, $args ) } ); | |||
3 | 8 | ||||||
137 | 7 | 25 | return 2 / 3 * ( $trials - 2 ); | ||||
138 | |||||||
139 | #return (2 * $trials - 4) / 3; | ||||||
140 | } | ||||||
141 | |||||||
142 | =head2 variance | ||||||
143 | |||||||
144 | $v = $turns->variance(); # use loaded data; or specify by "name" | ||||||
145 | $v = $turns->variance(data => \@data); # use these data | ||||||
146 | $v = $turns->variance(trials => POS_INT); # don't use actual data; calculate from this number of trials | ||||||
147 | |||||||
148 | Returns the expected variance in the number of turns for the given length of data I |
||||||
149 | |||||||
150 | =for html V[T] = (16N – 29 ) / 90 |
||||||
151 | |||||||
152 | =cut | ||||||
153 | |||||||
154 | sub variance { | ||||||
155 | 8 | 8 | 1 | 416 | my $self = shift; | ||
156 | 8 | 50 | 20 | my $args = ref $_[0] ? shift : {@_}; | |||
157 | my $trials = | ||||||
158 | defined $args->{'trials'} | ||||||
159 | ? $args->{'trials'} | ||||||
160 | 8 | 100 | 15 | : scalar( @{ _set_data( $self, $args ) } ); | |||
4 | 9 | ||||||
161 | 8 | 52 | return ( 16 * $trials - 29 ) / 90; | ||||
162 | } | ||||||
163 | |||||||
164 | =head2 obsdev | ||||||
165 | |||||||
166 | $v = $turns->obsdev(); # use data already loaded - anonymously; or specify its "name" | ||||||
167 | $v = $turns->obsdev(data => \@data); # use these data | ||||||
168 | |||||||
169 | Returns the observed deviation from expectation for the loaded/given sequence: observed I |
||||||
170 | |||||||
171 | =cut | ||||||
172 | |||||||
173 | sub obsdev { | ||||||
174 | 1 | 1 | 1 | 200 | return observed(@_) - expected(@_); | ||
175 | } | ||||||
176 | *observed_deviation = \&obsdev; | ||||||
177 | |||||||
178 | =head2 stdev | ||||||
179 | |||||||
180 | $v = $turns->stdev(); # use data already loaded - anonymously; or specify its "name" | ||||||
181 | $v = $turns->stdev(data => \@data); | ||||||
182 | |||||||
183 | Returns square-root of the variance. Aliases C |
||||||
184 | |||||||
185 | =cut | ||||||
186 | |||||||
187 | sub stdev { | ||||||
188 | 2 | 2 | 1 | 434 | return sqrt variance(@_); | ||
189 | } | ||||||
190 | *standard_deviation = \&stdev; | ||||||
191 | *stddev = \&stdev; | ||||||
192 | |||||||
193 | =head2 z_value | ||||||
194 | |||||||
195 | $z = $turns->z_value(ccorr => 1); # use data already loaded - anonymously; or specify its "name" | ||||||
196 | $z = $turns->z_value(data => $aref, ccorr => BOOL); | ||||||
197 | ($z, $p) = $turns->z_value(data => $aref, ccorr => BOOL, tails => 2); # same but wanting an array, get the p-value too | ||||||
198 | |||||||
199 | Returns the deviation ratio, or I -value. |
||||||
200 | |||||||
201 | The data to test can already have been L |
||||||
202 | |||||||
203 | Optional named arguments B -value). |
||||||
204 | |||||||
205 | The method can all be called with "sufficient" data: giving, instead of actual data, the B |
||||||
206 | |||||||
207 | Alias C |
||||||
208 | |||||||
209 | =cut | ||||||
210 | |||||||
211 | sub z_value { | ||||||
212 | 4 | 4 | 1 | 659 | my $self = shift; | ||
213 | 4 | 100 | 13 | my $args = ref $_[0] ? shift : {@_}; | |||
214 | 4 | 9 | my $data = _set_data( $self, $args ); | ||||
215 | my $trials = | ||||||
216 | 4 | 50 | 11 | defined $args->{'trials'} ? $args->{'trials'} : scalar @{$data}; | |||
4 | 5 | ||||||
217 | 4 | 19 | my $zed = Statistics::Zed->new(); | ||||
218 | my ( $zval, $pval ) = $zed->zscore( | ||||||
219 | observed => defined $args->{'observed'} | ||||||
220 | ? $args->{'observed'} | ||||||
221 | : $self->observed($args), | ||||||
222 | expected => $self->expected( trials => $trials ), | ||||||
223 | variance => $self->variance( trials => $trials ), | ||||||
224 | ccorr => defined $args->{'ccorr'} ? $args->{'ccorr'} : 1, | ||||||
225 | tails => $args->{'tails'} || 2, | ||||||
226 | precision_s => $args->{'precision_s'}, | ||||||
227 | 4 | 50 | 50 | 88 | precision_p => $args->{'precision_p'}, | ||
100 | |||||||
228 | ); | ||||||
229 | 4 | 100 | 430 | return wantarray ? ( $zval, $pval ) : $zval; | |||
230 | } | ||||||
231 | *z_score = \&z_value; | ||||||
232 | |||||||
233 | =head2 p_value | ||||||
234 | |||||||
235 | $p = $turns->p_value(); # using loaded data and default args | ||||||
236 | $p = $turns->p_value(ccorr => BOOL, tails => 1|2); # normal-approximation based on loaded data | ||||||
237 | $p = $turns->p_value(data => $aref, ccorr => BOOL, tails => 2); # using given data | ||||||
238 | |||||||
239 | Returns the normal distribution I -value for the deviation ratio (I |
||||||
240 | |||||||
241 | =cut | ||||||
242 | |||||||
243 | sub p_value { | ||||||
244 | 1 | 1 | 1 | 202 | return ( z_value(@_) )[1]; | ||
245 | } | ||||||
246 | |||||||
247 | =head2 stats_hash | ||||||
248 | |||||||
249 | $href = $turns->stats_hash(values => [qw/observed expected variance z_value p_value/], ccorr => 1); | ||||||
250 | |||||||
251 | Returns a hashref for the counts and stats as specified in its "values" argument, and with any options for calculating them. See L |
||||||
252 | |||||||
253 | =head2 dump | ||||||
254 | |||||||
255 | $turns->dump(flag => BOOL, verbose => BOOL, format => 'table|labline|csv'); | ||||||
256 | |||||||
257 | Print test results to STDOUT. See L |
||||||
258 | |||||||
259 | =cut | ||||||
260 | |||||||
261 | sub dump { | ||||||
262 | 0 | 0 | 1 | 0 | my $self = shift; | ||
263 | 0 | 0 | 0 | my $args = ref $_[0] ? $_[0] : {@_}; | |||
264 | 0 | 0 | $args->{'stat'} = 'turns'; | ||||
265 | 0 | 0 | $self->SUPER::dump($args); | ||||
266 | 0 | 0 | return $self; | ||||
267 | } | ||||||
268 | |||||||
269 | sub _set_data { # Get data via Statistics::Date | ||||||
270 | # Remove equivalent successors: e.g., strip 2nd 2 from (3, 2, 2, 7, 2): | ||||||
271 | 19 | 19 | 18 | my $self = shift; | |||
272 | 19 | 50 | 39 | my $args = ref $_[0] ? $_[0] : {@_}; | |||
273 | 19 | 50 | my $data = $self->access($args) | ||||
274 | ; # have been already checked to be numerical if previously load()'ed | ||||||
275 | 19 | 50 | 563 | ref $data or croak __PACKAGE__, '::Data for counting up turns are needed'; | |||
276 | 19 | 21 | my @data_u = (); | ||||
277 | 19 | 23 | for my $i ( 0 .. ( scalar @{$data} - 1 ) ) { | ||||
19 | 42 | ||||||
278 | 731 | 100 | 100 | 2424 | push @data_u, $data->[$i] | ||
279 | if not scalar @data_u | ||||||
280 | or $data->[$i] != $data_u[-1]; | ||||||
281 | } | ||||||
282 | 19 | 40 | return \@data_u; | ||||
283 | } | ||||||
284 | |||||||
285 | __END__ |