line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::Zed; |
2
|
8
|
|
|
8
|
|
264202
|
use 5.008008; |
|
8
|
|
|
|
|
33
|
|
|
8
|
|
|
|
|
341
|
|
3
|
8
|
|
|
8
|
|
47
|
use strict; |
|
8
|
|
|
|
|
16
|
|
|
8
|
|
|
|
|
421
|
|
4
|
8
|
|
|
8
|
|
70
|
use warnings FATAL => 'all'; |
|
8
|
|
|
|
|
21
|
|
|
8
|
|
|
|
|
1984
|
|
5
|
8
|
|
|
8
|
|
78
|
use Carp qw(croak); |
|
8
|
|
|
|
|
15
|
|
|
8
|
|
|
|
|
834
|
|
6
|
8
|
|
|
8
|
|
46
|
use base qw(Statistics::Data); |
|
8
|
|
|
|
|
18
|
|
|
8
|
|
|
|
|
28864
|
|
7
|
|
|
|
|
|
|
use Math::Cephes qw(:dists); |
8
|
|
|
|
|
|
|
use Statistics::Lite qw(sum); |
9
|
|
|
|
|
|
|
use String::Util qw(hascontent nocontent); |
10
|
|
|
|
|
|
|
use Scalar::Util qw(looks_like_number); |
11
|
|
|
|
|
|
|
$Statistics::Zed::VERSION = '0.10'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Statistics::Zed - Data-handling and calculations for ratio of observed to standard deviation (zscore) |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 VERSION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Version 0.10 |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Statistics::Zed 0.10; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# new() with optional args: |
26
|
|
|
|
|
|
|
$zed = Statistics::Zed->new( |
27
|
|
|
|
|
|
|
ccorr => 1, |
28
|
|
|
|
|
|
|
tails => 2, |
29
|
|
|
|
|
|
|
precision_s => 3, |
30
|
|
|
|
|
|
|
precision_p => 7, |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# optionally pre-load one or more values with these names: |
34
|
|
|
|
|
|
|
$zed->load(observed => [5, 6, 3], expected => [2.5, 3, 3], variance => [8, 8, 9]); |
35
|
|
|
|
|
|
|
$zed->add(observed => [3, 6], expected => [2.7, 2.5], variance => [7, 8]); # update loaded arrays |
36
|
|
|
|
|
|
|
$z_value = $zed->score(); # calc z_value from pre-loaded data |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# alternatively, call zscore() - alias score() - with the required args (with arefs or single values): |
39
|
|
|
|
|
|
|
$z_value = $zed->zscore( |
40
|
|
|
|
|
|
|
observed => 5, |
41
|
|
|
|
|
|
|
expected => 2.5, |
42
|
|
|
|
|
|
|
variance => 8, |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# as either of above, but call in array context for more results: |
46
|
|
|
|
|
|
|
($z_value, $p_value, $observed_deviation, $standard_deviation) = $zed->zscore(); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# as either of above but with optional args: |
49
|
|
|
|
|
|
|
$z_value = $zed->zscore(ccorr => 1, precision_s => 3); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# get the normal distribution p_value only - alias z2p(): |
52
|
|
|
|
|
|
|
$p_value = $zed->p_value(); # using pre-loaded data |
53
|
|
|
|
|
|
|
$p_value = $zed->p_value(observed => 5, expected => 2.5, variance => 8); # from given data |
54
|
|
|
|
|
|
|
$p_value = $zed->p_value(tails => 2, ccorr => 1, precision_p => 5); # same as either with optional args |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# "inverse phi" (wraps to Math::Cephes::ndtri): |
57
|
|
|
|
|
|
|
$z_value = $zed->p2z(value => $p_value, tails => 1|2); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 DESCRIPTION |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Methods are provided to: |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
+ L<calculate a z-score|Statistics::Zed/zscore>: ratio of an observed deviation to a standard deviation, with optional continuity correction |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
+ L<convert z-value to normal p-value|Statistics::Zed/p_value>, and L<convert p-value to normal-equiv z-value|Statistics::Zed/p2z> |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
+ L<load|Statistics::Zed/load>, L<add|Statistics::Zed/add>, save & retrieve observed, expected and variance values to compute z_score across samples |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
+ support z-testing in L<Statistics::Sequences|Statistics::Sequences> and other modules. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Optionally, load/add B<observed>, B<expected> and B<variance> values (named as such) and compute a z-score between/after updates. The module uses L<Statistics::Data|Statistics::Data> to cache each observed, expected and variance values, and to provide for the load/add methods, as well as to save/retrieve these values between class calls (not documented here, see L<Statistics::Data|Statistics::Data>). Alternatively, simply call L<zscore|Statistics::Zed/zscore> and L<pvalue|Statistics::Zed/pvalue>, passing them the values by these labels in a hash (or hashref), with either single numerical values or referenced arrays of the same. Optionally, specify tails, where relevant, and precision the returned z-values and p-values as required. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head2 new |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
$zed = Statistics::Zed->new(); |
78
|
|
|
|
|
|
|
$zed = Statistics::Zed->new(ccorr => NUM, tails => 1|2, precision_s => INT, precision_p => INT); |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Returns a Statistics::Zed object. Accepts setting of any of the L<OPTIONS|Statistics::Zed/OPTIONS>. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub new { |
85
|
|
|
|
|
|
|
my ( $class, @args ) = @_; |
86
|
|
|
|
|
|
|
my $args = ref $args[0] ? $args[0] : {@args}; |
87
|
|
|
|
|
|
|
my $self = $class->SUPER::new(); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# Set default values: |
90
|
|
|
|
|
|
|
$self->{'tails'} = 2; |
91
|
|
|
|
|
|
|
$self->{'ccorr'} = 0; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
if ( scalar keys %{$args} ) { |
94
|
|
|
|
|
|
|
foreach ( keys %{$args} ) { |
95
|
|
|
|
|
|
|
$self->{$_} = $args->{$_}; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
return $self; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head2 load |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
$zed->load(observed => [NUMs], expected => [NUMs], variance => [NUMs]); # labelled list of each required series |
104
|
|
|
|
|
|
|
$zed->load({ observed => [NUMs], expected => [NUMs], variance => [NUMs] }); # same but as referenced hash |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Optionally load data for each of B<observed>, B<expected> and B<variance> series as arefs (reference to list of numbers), using C<load> in L<Statistics::Data|Statistics::Data/load>. Returns 1 if successful but croaks if data cannot be loaded; see L<DIAGNOSTICS|Statistics::Zed/DIAGNOSTICS>. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=cut |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub load { |
111
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
112
|
|
|
|
|
|
|
$self->SUPER::load(@args); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# ensure there are named data for each of 'observed', 'expected' and 'variance': |
115
|
|
|
|
|
|
|
my $data = 0; |
116
|
|
|
|
|
|
|
foreach (qw/observed expected variance/) { |
117
|
|
|
|
|
|
|
$data++ if $self->access( label => $_ ); |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
croak |
120
|
|
|
|
|
|
|
'Data for deviation ratio are incomplete: Need arefs of data labelled \'observed\', \'expected\' and \'variance\'' |
121
|
|
|
|
|
|
|
if $data > 0 and $data != 3; |
122
|
|
|
|
|
|
|
return 1; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head2 add |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
$zed->add(observed => [NUMs], expected => [NUMs], variance => [NUMs]); # labelled list of each required series |
128
|
|
|
|
|
|
|
$zed->add({ observed => [NUMs], expected => [NUMs], variance => [NUMs] }); # same but as referenced hash |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Update any existing, previously loaded data, via C<add> in L<Statistics::Data|Statistics::Data/add>. Returns 1 if successful but croaks if data cannot be added; see L<DIAGNOSTICS|Statistics::Zed/DIAGNOSTICS>. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=cut |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub add { |
135
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
136
|
|
|
|
|
|
|
my $args = ref $args[0] ? $args[0] : {@args}; |
137
|
|
|
|
|
|
|
$self->SUPER::add($args); |
138
|
|
|
|
|
|
|
foreach (qw/observed expected variance/) { |
139
|
|
|
|
|
|
|
croak |
140
|
|
|
|
|
|
|
'Data for deviation ratio are incomplete: Need arefs of data labelled \'observed\', \'expected\' and \'variance\'' |
141
|
|
|
|
|
|
|
if !$self->access( label => $_ ); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
return 1; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 zscore |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
$zval = $zed->zscore(); # assuming observed, expected and variance values already loaded/added, as above |
149
|
|
|
|
|
|
|
$zval = $zed->zscore(observed => NUM, expected => NUM, variance => NUM); |
150
|
|
|
|
|
|
|
$zval = $zed->zscore(observed => [NUMs], expected => [NUMs], variance => [NUMs]); |
151
|
|
|
|
|
|
|
($zval, $pval, $obs_dev, $stdev) = $zed->zscore(); # same but array context call for more info |
152
|
|
|
|
|
|
|
$zscore = $zed->zscore(observed => [12], expected => [5], variance => [16], ccorr => 1); # same but with continuity correction |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Returns the I<z>-value for the values of B<observed>, B<expected> and B<variance> sent to L<load|Statistics::Zed/load> and/or L<add|Statistics::Zed/add>, or as sent in a call to this method itself as a hash (or hashref). If called wanting an array, then the I<z>-value, its probability, the observed deviation and the standard deviation are returned. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
I<Alias>: score, z_value |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
As described in L<OPTIONS|Statistics::Zed/OPTIONS>, optionally specify a numerical value for L<ccorr|Statistics::Zed/ccorr> for performing the continuity-correction to the observed deviation, and a value of either 1 or 2 to specify the L<tails|Statistics::Zed/tails> for reading off the normal distribution. |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
The basic formula is the basic: |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=for html <p> <i>Z</i> = ( <i>×</i> – <i><o>X</o></i> ) / SD</p> |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
where I<X> is the expected value (mean, etc.). If supplying an array of values for each of the required arguments, then the z-score is based on summing their values, i.e., (sum of observeds less sum of expecteds) divided by square-root of the sum of the variances. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=cut |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub zscore { |
169
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
170
|
|
|
|
|
|
|
my $args = ref $args[0] ? $args[0] : {@args}; |
171
|
|
|
|
|
|
|
my $desc = _get_descriptives( $self, $args ); |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# Calc z-value - leave undefined if no expected deviation: |
174
|
|
|
|
|
|
|
my ( $z_value, $obs_dev, $exp_dev ) = (); |
175
|
|
|
|
|
|
|
$obs_dev = $desc->{'observed'} - $desc->{'expected'}; |
176
|
|
|
|
|
|
|
$obs_dev = _ccorr( $self, $args, $obs_dev ); |
177
|
|
|
|
|
|
|
$exp_dev = sqrt $desc->{'variance'}; |
178
|
|
|
|
|
|
|
$z_value = $obs_dev / $exp_dev if $exp_dev; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# Return array of z-value and p-value etc if wanted, precisioned as wanted, but empty-string if undefined: |
181
|
|
|
|
|
|
|
if (wantarray) { |
182
|
|
|
|
|
|
|
if ( hascontent($z_value) ) { |
183
|
|
|
|
|
|
|
$args->{'value'} = $z_value; |
184
|
|
|
|
|
|
|
my $p_value = _precision( $self, $args, 'p', $self->z2p($args) ); |
185
|
|
|
|
|
|
|
$z_value = _precision( $self, $args, 's', $z_value ); |
186
|
|
|
|
|
|
|
return ( $z_value, $p_value, $obs_dev, $exp_dev ); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
else { |
189
|
|
|
|
|
|
|
return ( q{}, q{}, $obs_dev, $exp_dev ); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
else { |
193
|
|
|
|
|
|
|
return hascontent($z_value) |
194
|
|
|
|
|
|
|
? _precision( $self, $args, 's', $z_value ) |
195
|
|
|
|
|
|
|
: q{}; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# aliases: |
200
|
|
|
|
|
|
|
*score = \&zscore; |
201
|
|
|
|
|
|
|
*z_value = \&zscore; |
202
|
|
|
|
|
|
|
*test = \&zscore; # legacy |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head2 p_value |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
$p_value = $zed->p_value($z); # assumes 2-tailed |
207
|
|
|
|
|
|
|
$p_value = $zed->p_value(value => $z); # assumes 2-tailed |
208
|
|
|
|
|
|
|
$p_value = $zed->p_value(value => $z, tails => 1); |
209
|
|
|
|
|
|
|
$p_value = $zed->p_value(); # assuming observed, expected and variance values already loaded/added, as above |
210
|
|
|
|
|
|
|
$p_value = $zed->p_value(observed => NUM, expected => NUM, variance => NUM); |
211
|
|
|
|
|
|
|
$p_value = $zed->p_value(observed => [NUMs], expected => [NUMs], variance => [NUMs]); |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
I<Alias>: C<pvalue>, C<z2p> |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
Send a I<z>-value, get its associated I<p>-value, 2-tailed by default, or depending on the value of the optional argument B<tails>. If you pass in just one value (unkeyed), it is taken as the z-value. Alternatively, it can be passed the same arguments as for L<zscore|Statistics::Zed/zscore> so that it will calculate the zscore itself but return only the p-value. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Uses L<Math::Cephes|Math::Cephes> C<ndtr> normal probability function, which returns 0 if the z-value is greater than or equal to 38. |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
The optional argument B<precision_p> renders the returned p-value to so many decimal places (simply by sprintf). |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=cut |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub p_value { |
224
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
225
|
|
|
|
|
|
|
my $args = |
226
|
|
|
|
|
|
|
ref $args[0] ? $args[0] |
227
|
|
|
|
|
|
|
: ( scalar(@args) % 2 == 0 ) ? {@args} |
228
|
|
|
|
|
|
|
: { value => $args[0] }; |
229
|
|
|
|
|
|
|
my $z_value; |
230
|
|
|
|
|
|
|
if ( hascontent( $args->{'value'} ) |
231
|
|
|
|
|
|
|
and looks_like_number( $args->{'value'} ) ) |
232
|
|
|
|
|
|
|
{ |
233
|
|
|
|
|
|
|
$z_value = $args->{'value'}; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
else { |
236
|
|
|
|
|
|
|
$z_value = $self->zscore($args); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
return q{} if nocontent($z_value); |
239
|
|
|
|
|
|
|
my $p_value = ndtr($z_value); |
240
|
|
|
|
|
|
|
$p_value = 1 - $p_value if $p_value > .5; |
241
|
|
|
|
|
|
|
$p_value *= _set_tails( $self, $args ); |
242
|
|
|
|
|
|
|
return _precision( $self, $args, 'p', $p_value ); |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
*pvalue = \&p_value; |
245
|
|
|
|
|
|
|
*z2p = \&p_value; |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head2 p2z |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
$z_value = $zed->p2z($p_value) # the p-value is assumed to be 2-tailed |
250
|
|
|
|
|
|
|
$z_value = $zed->p2z(value => $p_value) # the p-value is assumed to be 2-tailed |
251
|
|
|
|
|
|
|
$z_value = $zed->p2z(value => $p_value, tails => 1) # specify 1-tailed probability |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
Returns the I<z>-value associated with a I<p>-value using the inverse phi function C<ndtri> in L<Math::Cephes|Math::Cephes>. I<The p-value is assumed to be two-tailed>, and so is firstly (before conversion) divided by 2, e.g., .05 becomes .025 so you get I<z> = 1.96. As a one-tailed probability, it is then assumed to be a probability of being I<greater> than a certain amount, i.e., of getting a I<z>-value I<greater> than or equal to that observed. So the inverse phi function is actually given (1 - I<p>-value) to work on. So .055 comes back as 1.598 (speaking of the top-end of the distribution), and .991 comes back as -2.349 (now going from right to left across the distribution). This is not the same as found in inversion methods in common spreadsheet packages but seems to be expected by humans. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=cut |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub p2z { |
258
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
259
|
|
|
|
|
|
|
my $args = |
260
|
|
|
|
|
|
|
ref $args[0] ? $args[0] |
261
|
|
|
|
|
|
|
: ( scalar(@args) % 2 == 0 ) ? {@args} |
262
|
|
|
|
|
|
|
: { value => $args[0] }; |
263
|
|
|
|
|
|
|
my $p_value; |
264
|
|
|
|
|
|
|
if ( hascontent( $args->{'value'} ) |
265
|
|
|
|
|
|
|
and $self->all_proportions( [ $args->{'value'} ] ) ) |
266
|
|
|
|
|
|
|
{ |
267
|
|
|
|
|
|
|
$p_value = $args->{'value'}; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
else { |
270
|
|
|
|
|
|
|
croak 'Cannot compute z-value from p-value: ' . $args->{'value'}; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# Avoid ndtri errors by first accounting for 0 and 1 ... |
274
|
|
|
|
|
|
|
my $z_value; |
275
|
|
|
|
|
|
|
if ( $p_value == 0 ) { |
276
|
|
|
|
|
|
|
$z_value = undef; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
elsif ( $p_value == 1 ) { |
279
|
|
|
|
|
|
|
$z_value = 0; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
else { |
282
|
|
|
|
|
|
|
$p_value /= 2 |
283
|
|
|
|
|
|
|
if _set_tails( $self, $args ) == |
284
|
|
|
|
|
|
|
2; # p-value has been given as two-tailed - only use 1 side |
285
|
|
|
|
|
|
|
$z_value = ndtri( 1 - $p_value ); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
return $z_value; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 obsdev |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
$obsdev = $zed->obsdev(); # assuming observed and expected values already loaded/added, as above |
293
|
|
|
|
|
|
|
$obsdev = $zed->obsdev(observed => NUM, expected => NUM); |
294
|
|
|
|
|
|
|
$obsdev = $zed->obsdev(observed => [NUMs], expected => [NUMs]); |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Returns the observed deviation (only), as would be returned as the third value if calling L<zscore|Statistics::Zed/zscore> in array context. This is simply the (sum of) the observed value(s) less the (sum of) the expected value(s), with the (sum of) the latter given the continuity correction if this is (optionally) also given as an argument, named B<ccorr>; see L<OPTIONS|Statistics::Zed/OPTIONS>. |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=cut |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
sub obsdev { |
301
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
302
|
|
|
|
|
|
|
my $args = ref $args[0] ? $args[0] : {@args}; |
303
|
|
|
|
|
|
|
my $desc = _get_descriptives( $self, $args ); |
304
|
|
|
|
|
|
|
return _ccorr( $self, $args, |
305
|
|
|
|
|
|
|
( $desc->{'observed'} - $desc->{'expected'} ) ); |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head2 ccorr |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
$zed->ccorr(value => 1); # will be used in all methods, unless they are given a ccorr value to use |
311
|
|
|
|
|
|
|
$val = $zed->ccorr(); # returns any value set in new() or previously here |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Set the value of the optional B<ccorr> argument to be used for all statistics methods, or, without a B<value>, return the current value. This might be undef if it has not previously been explicitly set in L<new|Statistics::Zed/new> or via this method. To quash any set value, specify B<value> => 0. When sending a value for B<ccorr> to any other method, this value takes precedence over any previously set, but it does not "re-set" the cached value that is set here or in L<new|Statistics::Zed/new>. See L<OPTIONS|Statistics::Zed/OPTIONS> for how this value is used. It is assumed that the value sent is a valid numerical value. |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=cut |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub ccorr { |
318
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
319
|
|
|
|
|
|
|
my $args = ref $args[0] ? $args[0] : {@args}; |
320
|
|
|
|
|
|
|
if ( defined $args->{'value'} ) { |
321
|
|
|
|
|
|
|
$self->{'ccorr'} = $args->{'value'}; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
else { |
324
|
|
|
|
|
|
|
return $self->{'ccorr'}; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
return; |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head2 tails |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
$zed->tails(value => 1); # will be used in all methods, unless they are given a tails value to use |
332
|
|
|
|
|
|
|
$val = $zed->tails(); # returns any value set in new() or previously here |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Set the value of the optional B<tails> argument to be used for all statistics methods, or, without a B<value>, return the current value. The default is 2; and this can be overriden by setting its value in L<new|Statistics::Zed/new>, by this method, or as an explicit argument in any method. When sending a value for B<tails> to any other method, this value takes precedence over any previously set, but it does not "re-set" the cached value that is set here or in L<new|Statistics::Zed/new>. See L<p_value|Statistics::Zed/p_value>, L<p2z|Statistics::Zed/p2z> and L<OPTIONS|Statistics::Zed/OPTIONS> for how this value is used. The value set must be either 1 or 2; a croak is heard otherwise. |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
=cut |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub tails { |
339
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
340
|
|
|
|
|
|
|
my $args = ref $args[0] ? $args[0] : {@args}; |
341
|
|
|
|
|
|
|
if ( defined $args->{'value'} ) { |
342
|
|
|
|
|
|
|
if ( looks_like_number( $args->{'value'} ) |
343
|
|
|
|
|
|
|
and ( $args->{'value'} == 1 or $args->{'value'} == 2 ) ) |
344
|
|
|
|
|
|
|
{ |
345
|
|
|
|
|
|
|
$self->{'tails'} = $args->{'value'}; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
else { |
348
|
|
|
|
|
|
|
croak |
349
|
|
|
|
|
|
|
"Cannot set tails() option: value must be numeric and equal either 1 or 2, not '$args->{'value'}'"; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
else { |
353
|
|
|
|
|
|
|
return $self->{'tails'}; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
return; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=head2 string |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
$str = $zed->string(); # assuming observed, expected and variance values already loaded/added, as above |
361
|
|
|
|
|
|
|
$str = $zed->string(observed => NUM, expected => NUM, variance => NUM); |
362
|
|
|
|
|
|
|
$str = $zed->string(observed => [NUMs], expected => [NUMs], variance => [NUMs]); |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Returns a string giving the zscore and p-value. Takes the same arguments as for L<zscore|Statistics::Zed/zscore>, which it calls itself, taking its returned values to make up a string in the form B<Z = 0.141, 1p = 0.44377>. Accepts the optional arguments B<tails>, B<ccorr>, B<precsion_s> and B<precision_p>; see L<OPTIONS|Statistics::Zed/OPTIONS>. In the example, B<precision_s> has been specified as 3, B<precision_p> has been set to 5, and B<tails> has been set to 1. |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=cut |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub string { |
369
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
370
|
|
|
|
|
|
|
my $args = ref $args[0] ? $args[0] : {@args}; |
371
|
|
|
|
|
|
|
my ( $zval, $pval ) = zscore( $self, $args ); |
372
|
|
|
|
|
|
|
my $tails = _set_tails( $self, $args ); |
373
|
|
|
|
|
|
|
return "Z = $zval, " . $tails . "p = $pval"; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=head2 dump |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
$zed->dump(); # assuming observed, expected and variance values already loaded/added, as above |
379
|
|
|
|
|
|
|
$zed->dump(observed => NUM, expected => NUM, variance => NUM); |
380
|
|
|
|
|
|
|
$zed->dump(observed => [NUMs], expected => [NUMs], variance => [NUMs]); |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Prints to STDOUT a line giving the zscore and p-value, being what would be returned by L<string|Statistics::Zed/string> but with a new-line "\n" character appended. |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub dump { |
387
|
|
|
|
|
|
|
my ( $self, @args ) = @_; |
388
|
|
|
|
|
|
|
print string( $self, @args ), "\n" |
389
|
|
|
|
|
|
|
or croak 'Could not print statistical values'; |
390
|
|
|
|
|
|
|
return 1; |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
## Private methods: |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# Obtain required values by given args or, if not given, by pre-loaded data, or croak: |
396
|
|
|
|
|
|
|
sub _get_descriptives { |
397
|
|
|
|
|
|
|
my ( $self, $args ) = @_; |
398
|
|
|
|
|
|
|
my %desc = (); |
399
|
|
|
|
|
|
|
for (qw/observed expected variance/) { |
400
|
|
|
|
|
|
|
if ( nocontent( $args->{$_} ) |
401
|
|
|
|
|
|
|
and my $data = $self->access( label => $_ ) ) |
402
|
|
|
|
|
|
|
{ # try pre-loaded data |
403
|
|
|
|
|
|
|
$desc{$_} = sum( @{$data} ); |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
elsif ( ref $args->{$_} ) { # assume isa aref |
406
|
|
|
|
|
|
|
$desc{$_} = sum( @{$data} ); |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
elsif ( looks_like_number( $args->{$_} ) ) { # assume single value |
409
|
|
|
|
|
|
|
$desc{$_} = $args->{$_}; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
else { |
412
|
|
|
|
|
|
|
croak |
413
|
|
|
|
|
|
|
"Cannot compute z-value: No defined or numerical '$_' value(s)"; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
return \%desc; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub _precision |
420
|
|
|
|
|
|
|
{ # any $args value takes precedence; try $self value; otherwise, return as is |
421
|
|
|
|
|
|
|
my ( $self, $args, $type, $value ) = @_; |
422
|
|
|
|
|
|
|
my $precision; |
423
|
|
|
|
|
|
|
if ( $args->{ 'precision_' . $type } ) { |
424
|
|
|
|
|
|
|
$precision = $args->{ 'precision_' . $type }; |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
elsif ( $self->{ 'precision_' . $type } ) { |
427
|
|
|
|
|
|
|
$precision = $self->{ 'precision_' . $type }; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
else { |
430
|
|
|
|
|
|
|
return $value; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
return sprintf q{%.} . $precision . 'f', |
433
|
|
|
|
|
|
|
$value; # tried Number::Format but overflows too easily |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub _set_tails |
437
|
|
|
|
|
|
|
{ # any $args value takes precedence; try $self value; otherwise, return as is |
438
|
|
|
|
|
|
|
my ( $self, $args, $value ) = @_; |
439
|
|
|
|
|
|
|
my $tails; |
440
|
|
|
|
|
|
|
if ( $args->{'tails'} ) { |
441
|
|
|
|
|
|
|
$tails = $args->{'tails'}; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
elsif ( $self->{'tails'} ) { |
444
|
|
|
|
|
|
|
$tails = $self->{'tails'}; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
if ( !defined $tails ) |
447
|
|
|
|
|
|
|
{ # what might have been in $self was clobbered by user, perhaps |
448
|
|
|
|
|
|
|
$tails = 2; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
elsif ( $tails != 1 && $tails != 2 ) { |
451
|
|
|
|
|
|
|
croak |
452
|
|
|
|
|
|
|
"Cannot compute p-value: Argument \'tails\' should have value of either 1 or 2, not '$tails'"; |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
return $tails; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
# Apply continuity correction to deviation: |
458
|
|
|
|
|
|
|
sub _ccorr { |
459
|
|
|
|
|
|
|
my ( $self, $args, $dev ) = @_; |
460
|
|
|
|
|
|
|
if ($dev) { |
461
|
|
|
|
|
|
|
my $d; |
462
|
|
|
|
|
|
|
if ( defined $args->{'ccorr'} ) { |
463
|
|
|
|
|
|
|
$d = $args->{'ccorr'}; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
elsif ( defined $self->{'ccorr'} ) { |
466
|
|
|
|
|
|
|
$d = $self->{'ccorr'}; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
if ( !$d ) { |
469
|
|
|
|
|
|
|
return $dev; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
else { |
472
|
|
|
|
|
|
|
my $cdev = abs($dev) - .5 * $d; |
473
|
|
|
|
|
|
|
$cdev *= -1 if $dev < 0; |
474
|
|
|
|
|
|
|
return $cdev; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
else { |
478
|
|
|
|
|
|
|
return $dev; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
1; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
__END__ |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=head1 OPTIONS |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
The following can be set in calls to the above methods, including L<new|Statistics::Zed/new>, where relevant. |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=head2 ccorr |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
Apply the continuity correction. Default = 0. Otherwise, specify a correcting difference value (not necesarily 1), and the procedure is to calculate the observed difference as its absolute value less half of this correcting value, returning the observed difference with its original sign. To clarify for Germans, this is the Stetigkeitskorrektur. |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=head2 tails |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
Tails from which to assess the association I<p>-value (1 or 2). Default = 2. |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 precision_s |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Precision of the I<z>-value (the statistic). Default is undefined - you get all decimal values available. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=head2 precision_p |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
Precision of the associated I<p>-value. Default is undefined - you get all decimal values available. |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=head1 Deprecated methods |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
Methods for "series testing" are deprecated. Use L<load|Statistics::Zed/load> and L<add|Statistics::Zed/add> instead to manage keeping a cache of the oberved, expected and variance values; the z- and p-methods will look them up, if available. See L<dump_vals|Statistics::Data/dump_vals> in Statistics::Data for dumping series data using the present class object, which uses Statistics::Data as a base. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=over 4 |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=item Data for deviation ratio are incomplete |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
Croaked when L<load|Statistics::Zed/load>ing or L<add|Statistics::Zed/add>ing data. As the croak goes on to say, loading and adding (updating) needs arefs of data labelled B<observed>, B<expected> and B<variance>. Also, if any one of them are loaded/updated at one time, it's expected that all three are loaded/updated. For more info about loading data, see L<Statistics::Data|Statistics::Data/load>. |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=item Cannot compute z-value: No defined or numerical '$_' value(s) |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Croaked via L<zscore|Statistics::Zed/zscore> if the three required B<observed>, B<expected> and B<variance> values were not defined in the present call (each to a reference to an array of values, or with a single numerical value), or could not be accessed from a previous load/add. See L<access|Statistics::Data/access> in Statistics::Data for any error that might have resulted from a bad load/add. See C<looks_like_number> in L<Scalar::Util|Scalar::Util> for any error that might have resulted from supplying a single value for B<observed>, B<expected> or B<variance>. |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=item Cannot compute p-value: Argument 'tails' should have value of either 1 or 2, not '$tails' |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Croaked when calling L<p_value|Statistics::Zed/p_value> directly or via L<zscore|Statistics::Zed/zscore>, or when calling L<p2z|Statistics::Zed/p2z>, and any given value for B<tails> is not appropriate. |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=item Cannot compute z-value from p-value |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Croaked by L<p2z|Statistics::Zed/p2z> if its B<value> attribute is not defined, is empty string, is not numeric, or, if numeric, is greater than 1 or less than zero, as per L<all_proportions|Statistics::Data/all_proportions> in Statistics::Data. |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=item Cannot set tails() option: value must be numeric and equal either 1 or 2, not '$_' |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Croaked from L<tails|Statistics::Zed/tails> method; self-explanatory. |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=item Could not print statistical values |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Croaked by the internal L<dump|Statistics::Zed/dump> method if, for some reason, printing to STDOUT is not available. |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=back |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
L<Math::Cephes|Math::Cephes> - C<ndtr> and C<ndtri> normal distribution functions |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
L<Statistics::Lite|Statistics::Lite> - C<sum> method |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
L<String::Util|String::Util> - C<hascontent> and C<nocontent> methods |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
L<Scalar::Util|Scalar::Util> - C<looks_like_number> method |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
L<Statistics::Data|Statistics::Data> - this module uses the latter as a base, for its loading/adding data methods (if required), and a L<p2z|Statistics::Zed/p2z> validity check. |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=head1 SEE ALSO |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
L<Statistics::Sequences|Statistics::Sequences> : for application of this module. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=head1 AUTHOR |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Roderick Garton, C<< <rgarton at cpan.org> >> |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
Copyright 2006-2014 Roderick Garton. |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under the terms of either: the GNU General Public License as published by the Free Software Foundation; or the Artistic License. See L<perl.org|http://dev.perl.org/licenses/> for more information. |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=cut |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
# End of Statistics::Zed |
569
|
|
|
|
|
|
|
|