blib/lib/Statistics/Sequences.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 24 | 123 | 19.5 |
branch | 0 | 68 | 0.0 |
condition | 0 | 7 | 0.0 |
subroutine | 8 | 19 | 42.1 |
pod | 9 | 9 | 100.0 |
total | 41 | 226 | 18.1 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package Statistics::Sequences; | ||||||
2 | 2 | 2 | 26669 | use strict; | |||
2 | 4 | ||||||
2 | 51 | ||||||
3 | 2 | 2 | 6 | use warnings FATAL => 'all'; | |||
2 | 2 | ||||||
2 | 71 | ||||||
4 | 2 | 2 | 7 | use Carp qw(croak cluck); | |||
2 | 8 | ||||||
2 | 125 | ||||||
5 | 2 | 2 | 969 | use Statistics::Data 0.09; | |||
2 | 41296 | ||||||
2 | 59 | ||||||
6 | 2 | 2 | 14 | use base qw(Statistics::Data); | |||
2 | 4 | ||||||
2 | 149 | ||||||
7 | 2 | 2 | 8 | use Scalar::Util qw(looks_like_number); | |||
2 | 2 | ||||||
2 | 545 | ||||||
8 | $Statistics::Sequences::VERSION = '0.14'; | ||||||
9 | |||||||
10 | =pod | ||||||
11 | |||||||
12 | =head1 NAME | ||||||
13 | |||||||
14 | Statistics::Sequences - Manage sequences (ordered list of literals) for testing their runs, joins, turns, trinomes, potential energy, etc. | ||||||
15 | |||||||
16 | =head1 VERSION | ||||||
17 | |||||||
18 | This is documentation for Version 0.14 of Statistics::Sequences. | ||||||
19 | |||||||
20 | =head1 SYNOPSIS | ||||||
21 | |||||||
22 | use Statistics::Sequences 0.14; | ||||||
23 | $seq = Statistics::Sequences->new(); | ||||||
24 | my @data = (1, 'a', 'a', 1); # ordered list of literal scalars (numbers, strings), as permitted by specific test | ||||||
25 | $seq->load(\@data); # or @data or dataname => \@data | ||||||
26 | print $seq->observed(stat => 'runs'); # expected, variance, z_value, p_value - assuming sub-module Runs.pm is installed | ||||||
27 | print $seq->test(stat => 'vnomes', length => 2); # - - assuming sub-module Vnomes.pm is installed | ||||||
28 | $seq->dump(stat => 'runs', values => {observed => 1, z_value => 1, p_value => 1}, exact => 1, tails => 1); | ||||||
29 | # see also Statistics::Data for inherited methods | ||||||
30 | |||||||
31 | =head1 DESCRIPTION | ||||||
32 | |||||||
33 | Loading, updating and accessing data as ordered list of literal scalars (numbers, strings) for statistical tests of their sequential structure via L |
||||||
34 | |||||||
35 | To access the tests, L | ||||||
36 | |||||||
37 | Alternatively, L | ||||||
38 | |||||||
39 | =head1 SUBROUTINES/METHODS | ||||||
40 | |||||||
41 | =head2 new | ||||||
42 | |||||||
43 | $seq = Statistics::Sequences->new(); | ||||||
44 | |||||||
45 | Returns a new Statistics::Sequences object (inherited from L |
||||||
46 | |||||||
47 | Sub-packages also have their own new method - so, e.g., L |
||||||
48 | |||||||
49 | use Statistics::Sequences::Runs; | ||||||
50 | $runs = Statistics::Sequences::Runs->new(); | ||||||
51 | |||||||
52 | In this case, data are not automatically shared across packages, and only one test (in this case, the Runs-test) can be accessed through the class-object returned by L |
||||||
53 | |||||||
54 | =head2 load, add, access, unload | ||||||
55 | |||||||
56 | All these operations on the basic data are inherited from L |
||||||
57 | |||||||
58 | B |
||||||
59 | |||||||
60 | =head2 observed, observation | ||||||
61 | |||||||
62 | $v = $seq->observed(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat | ||||||
63 | $v = $seq->observed(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats | ||||||
64 | $v = $seq->observed(stat => 'joins|pot|runs|turns|vnomes', label => 'myLabelledLoadedData'); # just needs args for partic.stats | ||||||
65 | |||||||
66 | Return the observed value of the statistic for the L |
||||||
67 | |||||||
68 | =cut | ||||||
69 | |||||||
70 | 0 | 0 | 1 | sub observed { return _feed( 'observed', @_ ); } | |||
71 | *observation = \&observed; | ||||||
72 | |||||||
73 | =head2 expected, expectation | ||||||
74 | |||||||
75 | $v = $seq->expected(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat | ||||||
76 | $v = $seq->expected(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats | ||||||
77 | |||||||
78 | Return the expected value of the statistic for the L |
||||||
79 | |||||||
80 | =cut | ||||||
81 | |||||||
82 | 0 | 0 | 1 | sub expected { return _feed( 'expected', @_ ); } | |||
83 | *expectation = \&expected; | ||||||
84 | |||||||
85 | =head2 variance | ||||||
86 | |||||||
87 | $seq->variance(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat | ||||||
88 | $seq->variance(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats | ||||||
89 | |||||||
90 | Returns the expected range of deviation in the statistic's observed value for the given number of trials. | ||||||
91 | |||||||
92 | =cut | ||||||
93 | |||||||
94 | 0 | 0 | 1 | sub variance { return _feed( 'variance', @_ ); } | |||
95 | |||||||
96 | =head2 obsdev, observed_deviation | ||||||
97 | |||||||
98 | $v = $seq->obsdev(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat | ||||||
99 | $v = $seq->obsdev(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats | ||||||
100 | |||||||
101 | Returns the deviation of (difference between) observed and expected values of the statistic for the loaded/given sequence (I |
||||||
102 | |||||||
103 | =cut | ||||||
104 | |||||||
105 | sub obsdev { | ||||||
106 | 0 | 0 | 1 | return observed(@_) - expected(@_); | |||
107 | } | ||||||
108 | *observed_deviation = \&obsdev; | ||||||
109 | |||||||
110 | =head2 stdev, standard_deviation | ||||||
111 | |||||||
112 | $v = $seq->stdev(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat | ||||||
113 | $v = $seq->stdev(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats | ||||||
114 | |||||||
115 | Returns square-root of the variance. | ||||||
116 | |||||||
117 | =cut | ||||||
118 | |||||||
119 | sub stdev { | ||||||
120 | 0 | 0 | 1 | return sqrt variance(@_); | |||
121 | } | ||||||
122 | *standard_deviation = \&stdev; | ||||||
123 | |||||||
124 | =head2 z_value, zscore | ||||||
125 | |||||||
126 | $v = $seq->zscore(stat => 'joins|pot|runs|turns|vnomes', %args); # gets data from cache, with any args needed by the stat | ||||||
127 | $v = $seq->zscore(stat => 'joins|pot|runs|turns|vnomes', data => [qw/blah bing blah blah blah/]); # just needs args for partic.stats | ||||||
128 | |||||||
129 | Return the deviation ratio: observed deviation to standard deviation. Use argument B |
||||||
130 | |||||||
131 | =cut | ||||||
132 | |||||||
133 | 0 | 0 | 1 | sub zscore { return _feed( 'zscore', @_ ); } | |||
134 | *z_value = \&zscore; | ||||||
135 | |||||||
136 | =head2 p_value, test | ||||||
137 | |||||||
138 | $p = $seq->test(stat => 'runs'); | ||||||
139 | $p = $seq->test(stat => 'joins'); | ||||||
140 | $p = $seq->test(stat => 'turns'); | ||||||
141 | $p = $seq->test(stat => 'pot', state => 'a value appearing in the data'); | ||||||
142 | $p = $seq->test(stat => 'vnomes', length => 'an integer greater than zero and less than sample-size'); | ||||||
143 | |||||||
144 | Returns the probability of observing so many runs, joins, etc., versus those expected, relative to the expected variance. | ||||||
145 | |||||||
146 | When using a Statistics::Sequences class-object, this method requires naming which test to perform, i.e., runs, joins, pot or vnomes. This is I |
||||||
147 | |||||||
148 | =head3 Common options | ||||||
149 | |||||||
150 | Options common to all the sub-package tests are as follows. | ||||||
151 | |||||||
152 | =over 8 | ||||||
153 | |||||||
154 | =item data => 'I |
||||||
155 | |||||||
156 | Optionally specify the name of the data to be tested. By default, this is not required: the data tested are those that were last loaded, either anonymously, or as returned by one of the L |
||||||
157 | |||||||
158 | @chimps = (qw/banana banana cheese banana cheese banana banana banana/); | ||||||
159 | @mice = (qw/banana cheese cheese cheese cheese cheese cheese cheese/); | ||||||
160 | $seq->load(chimps => \@chimps, mice => \@mice); | ||||||
161 | $p = $seq->test(stat => 'runs', data => 'chimps'); | ||||||
162 | |||||||
163 | =item ccorr => I |
||||||
164 | |||||||
165 | Specify whether or not to perform the continuity-correction on the observed deviation. Default is false. Relevant only for those tests relying on a I |
||||||
166 | |||||||
167 | =item tails => I<1>|I<2> | ||||||
168 | |||||||
169 | Specify whether the I |
||||||
170 | |||||||
171 | =back | ||||||
172 | |||||||
173 | =head3 Test-specific required settings and options | ||||||
174 | |||||||
175 | Some sub-package tests need to have parameters defined in the call to L |
||||||
176 | |||||||
177 | B |
||||||
178 | |||||||
179 | B |
||||||
180 | |||||||
181 | B |
||||||
182 | |||||||
183 | B |
||||||
184 | |||||||
185 | =cut | ||||||
186 | |||||||
187 | 0 | 0 | 1 | sub p_value { return _feed( 'p_value', @_ ); } | |||
188 | *test = \&p_value; | ||||||
189 | |||||||
190 | =head2 stats_hash | ||||||
191 | |||||||
192 | $href = $seq->stats_hash(stat => 'runs', values => {observed => 1, expected => 1, variance => 1, z_value => 1, p_value => 1}); | ||||||
193 | |||||||
194 | Returns a hashref with values for any of the descriptives and probability value relevant to the specified B |
||||||
195 | |||||||
196 | =cut | ||||||
197 | |||||||
198 | sub stats_hash { | ||||||
199 | 0 | 0 | 1 | my $self = shift; | |||
200 | 0 | 0 | my $args = ref $_[0] ? $_[0] : {@_}; | ||||
201 | 0 | my @methods = keys %{ $args->{'values'} }; | |||||
0 | |||||||
202 | 0 | my (%stats_hash) = (); | |||||
203 | 2 | 2 | 9 | no strict 'refs'; | |||
2 | 2 | ||||||
2 | 2131 | ||||||
204 | 0 | foreach my $method (@methods) { | |||||
205 | 0 | 0 | if ( $args->{'values'}->{$method} == 1 ) { | ||||
206 | 0 | eval { $stats_hash{$method} = $self->$method($args); }; | |||||
0 | |||||||
207 | 0 | 0 | croak "Method $method is not defined or correctly called for " | ||||
208 | . __PACKAGE__ | ||||||
209 | if $@; | ||||||
210 | } | ||||||
211 | } | ||||||
212 | 0 | 0 | if ( !scalar keys %stats_hash ) { # get default stats: | ||||
213 | 0 | foreach my $method (qw/observed p_value/) { | |||||
214 | 0 | eval { $stats_hash{$method} = $self->$method($args); }; | |||||
0 | |||||||
215 | 0 | 0 | croak "Method $method is not defined or correctly called for " | ||||
216 | . __PACKAGE__ | ||||||
217 | if $@; | ||||||
218 | } | ||||||
219 | } | ||||||
220 | 0 | return \%stats_hash; | |||||
221 | } | ||||||
222 | |||||||
223 | =head2 dump | ||||||
224 | |||||||
225 | $seq->dump(stat => 'runs|joins|pot ...', values => {}, format => 'string|table', flag => '1|0', precision_s => 'integer', precision_p => 'integer'); | ||||||
226 | |||||||
227 | I |
||||||
228 | |||||||
229 | Print results of the last-conducted test to STDOUT. By default, if no parameters to C |
||||||
230 | |||||||
231 | =over 8 | ||||||
232 | |||||||
233 | =item values => hashref | ||||||
234 | |||||||
235 | Hashref of the statistical parameters to dump. Default is observed value and p-value for the given B |
||||||
236 | |||||||
237 | =item flag => I |
||||||
238 | |||||||
239 | If true, the I -value associated with the I |
||||||
240 | |||||||
241 | If false (default), nothing is appended to the I -value. |
||||||
242 | |||||||
243 | =item format => 'table|labline|csv' | ||||||
244 | |||||||
245 | Default is 'csv', to print the stats hash as a comma-separated string (no newline), e.g., '4.0000,0.8596800". If specifying 'labline', you get something like "observed = 4.0000, p_value = 0.8596800\n". If specifying "table", this is a dump from L |
||||||
246 | |||||||
247 | .-----------+-----------. | ||||||
248 | | observed | p_value | | ||||||
249 | +-----------+-----------+ | ||||||
250 | | 4.0000 | 0.8596800 | | ||||||
251 | '-----------+-----------' | ||||||
252 | |||||||
253 | =item verbose => 1|0 | ||||||
254 | |||||||
255 | If true, includes a title giving the name of the statistic, details about the hypothesis tested (if B |
||||||
256 | |||||||
257 | =item precision_s => 'I |
||||||
258 | |||||||
259 | Precision of the statistic values (observed, expected, variance, z_value). | ||||||
260 | |||||||
261 | =item precision_p => 'I |
||||||
262 | |||||||
263 | Specify rounding of the probability associated with the I |
||||||
264 | |||||||
265 | =back | ||||||
266 | |||||||
267 | =cut | ||||||
268 | |||||||
269 | sub dump { | ||||||
270 | 0 | 0 | 1 | my $self = shift; | |||
271 | 0 | 0 | my $args = ref $_[0] ? $_[0] : {@_}; | ||||
272 | 0 | my $stats_hash = $self->stats_hash($args); | |||||
273 | 0 | 0 | $args->{'format'} ||= 'csv'; | ||||
274 | 0 | my @standard_methods = | |||||
275 | (qw/observed expected variance obsdev stdev z_value p_value/); | ||||||
276 | 0 | my ( $maxlen, @strs, @headers, @wanted_methods ) = (0); | |||||
277 | 0 | foreach my $method (@standard_methods) | |||||
278 | { # set up what has been requested in a meaningful order: | ||||||
279 | 0 | 0 | push( @wanted_methods, $method ) if defined $stats_hash->{$method}; | ||||
280 | } | ||||||
281 | 0 | foreach my $method ( keys %{$stats_hash} ) | |||||
0 | |||||||
282 | { # add any extra "non-standard" methods | ||||||
283 | 0 | 0 | push( @wanted_methods, $method ) if !grep /$method/, @wanted_methods; | ||||
284 | } | ||||||
285 | 0 | foreach my $method (@wanted_methods) { | |||||
286 | 0 | my $val = delete $stats_hash->{$method}; | |||||
287 | 0 | my $len; | |||||
288 | 0 | 0 | if ( $method eq 'p_value' ) { | ||||
289 | 0 | $val = _precisioned( $args->{'precision_p'}, $val ); | |||||
290 | $val .= ( $val < .05 ? ( $val < .01 ? q{**} : q{*} ) : q{} ) | ||||||
291 | 0 | 0 | if $args->{'flag'}; | ||||
0 | |||||||
0 | |||||||
292 | } | ||||||
293 | else { | ||||||
294 | 0 | 0 | if ( ref $val ) { | ||||
0 | |||||||
295 | 0 | 0 | if ( ref $val eq 'HASH' ) { | ||||
296 | 0 | my %vals = %{$val}; | |||||
0 | |||||||
297 | 0 | $val = q{}; | |||||
298 | 0 | 0 | my $delim = $args->{'format'} eq 'table' ? "\n" : q{,}; | ||||
299 | 0 | my ( $str, $this_len ) = (q{}); | |||||
300 | 0 | while ( my ( $k, $v ) = each %vals ) { | |||||
301 | 0 | $str = "'$k' = $v"; | |||||
302 | 0 | $this_len = length($str); | |||||
303 | 0 | 0 | 0 | $len = $this_len | |||
304 | if not defined $len or $this_len > $len; | ||||||
305 | 0 | $val .= $str; | |||||
306 | 0 | $val .= $delim; | |||||
307 | } | ||||||
308 | 0 | 0 | if ( $args->{'format'} ne 'table' ) { | ||||
309 | 0 | chop $val; | |||||
310 | 0 | $val = '(' . $val . ')'; | |||||
311 | } | ||||||
312 | } | ||||||
313 | else { | ||||||
314 | 0 | $val = join q{, }, @{$val}; | |||||
0 | |||||||
315 | } | ||||||
316 | } | ||||||
317 | elsif ( looks_like_number($val) ) { | ||||||
318 | 0 | $val = _precisioned( $args->{'precision_s'}, $val ); | |||||
319 | } | ||||||
320 | } | ||||||
321 | 0 | push @headers, $method; | |||||
322 | 0 | push( @strs, $val ); | |||||
323 | 0 | 0 | $len = length $val if !defined $len; | ||||
324 | 0 | 0 | $maxlen = $len if $len > $maxlen; | ||||
325 | } | ||||||
326 | 0 | 0 | if ( $args->{'format'} eq 'table' ) { | ||||
0 | |||||||
327 | 0 | 0 | $maxlen = 8 if $maxlen < 8; | ||||
328 | my $title = | ||||||
329 | $args->{'verbose'} | ||||||
330 | 0 | 0 | ? ucfirst( $args->{'stat'} ) . " statistics\n" | ||||
331 | : q{}; | ||||||
332 | 0 | 0 | print $title or croak 'Cannot print title for data-table'; | ||||
333 | 0 | my @hh = (); | |||||
334 | 0 | push( @hh, [ $maxlen, $_ ] ) foreach @headers; | |||||
335 | 0 | require Text::SimpleTable; | |||||
336 | 0 | my $tbl = Text::SimpleTable->new(@hh); | |||||
337 | 0 | $tbl->row(@strs); | |||||
338 | 0 | 0 | print $tbl->draw or croak 'Cannot print data-table'; | ||||
339 | } | ||||||
340 | elsif ( $args->{'format'} eq 'labline' ) { | ||||||
341 | 0 | my @hh; | |||||
342 | 0 | for ( my $i = 0 ; $i <= $#strs ; $i++ ) { | |||||
343 | 0 | $hh[$i] = "$headers[$i] = $strs[$i]"; | |||||
344 | } | ||||||
345 | 0 | my $str = join( q{, }, @hh ); | |||||
346 | 0 | 0 | if ( $args->{'verbose'} ) { | ||||
347 | 0 | $str = ucfirst( $args->{'stat'} ) . ': ' . $str; | |||||
348 | } | ||||||
349 | 0 | 0 | print {*STDOUT} $str, "\n" or croak 'Cannot print data-string'; | ||||
0 | |||||||
350 | } | ||||||
351 | else { # csv | ||||||
352 | 0 | 0 | print join( q{,}, @strs ) or croak 'Cannot print data-string'; | ||||
353 | } | ||||||
354 | 0 | return; | |||||
355 | } | ||||||
356 | *print_summary = \&dump; | ||||||
357 | |||||||
358 | =head2 dump_data | ||||||
359 | |||||||
360 | $seq->dump_data(delim => "\n"); | ||||||
361 | |||||||
362 | Prints to STDOUT a space-separated line of the tested data - as dichotomized and put to test. Optionally, give a value for B |
||||||
363 | |||||||
364 | =cut | ||||||
365 | |||||||
366 | # PRIVATMETHODEN | ||||||
367 | |||||||
368 | sub _feed { | ||||||
369 | 0 | 0 | my $method = shift; | ||||
370 | 0 | my $self = shift; | |||||
371 | 0 | 0 | my $args = ref $_[0] ? $_[0] : {@_}; | ||||
372 | 0 | 0 | my $statname = $args->{'stat'} || q{}; | ||||
373 | 0 | my $class = __PACKAGE__ . q{::} . ucfirst($statname); | |||||
374 | 0 | eval "require $class"; | |||||
375 | 0 | 0 | croak __PACKAGE__, | ||||
376 | " error: Requested sequences module '$class' is not valid/available. You might need to install '$class'" | ||||||
377 | if $@; | ||||||
378 | 0 | my ( $val, $nself ) = ( q{}, {} ); | |||||
379 | |||||||
380 | #my $nself = {}; | ||||||
381 | 0 | bless( $nself, $class ); #$nself = $class->new(); | |||||
382 | 0 | $nself->{$_} = $self->{$_} foreach keys %{$self}; | |||||
0 | |||||||
383 | 2 | 2 | 8 | no strict 'refs'; | |||
2 | 2 | ||||||
2 | 232 | ||||||
384 | 0 | eval '$val = $nself->$method($args)' | |||||
385 | ; # but does not trap "deep recursion" if method not defined | ||||||
386 | 0 | 0 | croak __PACKAGE__, " error: Method '$method' is not defined for $class" | ||||
387 | if $@; | ||||||
388 | 0 | $self->{'stat'} = $statname; | |||||
389 | 0 | return $val; | |||||
390 | } | ||||||
391 | |||||||
392 | sub _precisioned { | ||||||
393 | 0 | 0 | 0 | return $_[0] | |||
0 | |||||||
394 | ? sprintf( q{%.} . $_[0] . 'f', $_[1] ) | ||||||
395 | : ( defined $_[1] ? $_[1] : q{} ); # don't lose any zero | ||||||
396 | } | ||||||
397 | |||||||
398 | =head1 BUNDLING | ||||||
399 | |||||||
400 | This module C |
||||||
401 | |||||||
402 | =head1 AUTHOR | ||||||
403 | |||||||
404 | Roderick Garton, C<< |
||||||
405 | |||||||
406 | =head1 SUPPORT | ||||||
407 | |||||||
408 | You can find documentation for this module with the perldoc command. | ||||||
409 | |||||||
410 | perldoc Statistics::Sequences | ||||||
411 | |||||||
412 | You can also look for information at: | ||||||
413 | |||||||
414 | =over 4 | ||||||
415 | |||||||
416 | =item * RT: CPAN's request tracker (report bugs here) | ||||||
417 | |||||||
418 | L |
||||||
419 | |||||||
420 | =item * AnnoCPAN: Annotated CPAN documentation | ||||||
421 | |||||||
422 | L |
||||||
423 | |||||||
424 | =item * CPAN Ratings | ||||||
425 | |||||||
426 | L |
||||||
427 | |||||||
428 | =item * Search CPAN | ||||||
429 | |||||||
430 | L |
||||||
431 | |||||||
432 | =back | ||||||
433 | |||||||
434 | =head1 LICENSE AND COPYRIGHT | ||||||
435 | |||||||
436 | =over 4 | ||||||
437 | |||||||
438 | =item Copyright (c) 2006-2016 Roderick Garton | ||||||
439 | |||||||
440 | This program is free software. It may be used, redistributed and/or modified under the same terms as Perl-5.6.1 (or later) (see L |
||||||
441 | |||||||
442 | =item Disclaimer | ||||||
443 | |||||||
444 | To the maximum extent permitted by applicable law, the author of this module disclaims all warranties, either express or implied, including but not limited to implied warranties of merchantability and fitness for a particular purpose, with regard to the software and the accompanying documentation. | ||||||
445 | |||||||
446 | =back | ||||||
447 | |||||||
448 | =cut | ||||||
449 | |||||||
450 | 1; # end of Statistics::Sequences |