line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::ANOVA::JT;
|
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
58901
|
use 5.006;
|
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
145
|
|
4
|
4
|
|
|
4
|
|
15
|
use strict;
|
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
135
|
|
5
|
4
|
|
|
4
|
|
13
|
use warnings FATAL => 'all';
|
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
139
|
|
6
|
4
|
|
|
4
|
|
15
|
use base qw(Statistics::Data);
|
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
2356
|
|
7
|
4
|
|
|
4
|
|
83693
|
use Algorithm::Combinatorics qw(combinations);
|
|
4
|
|
|
|
|
11107
|
|
|
4
|
|
|
|
|
227
|
|
8
|
4
|
|
|
4
|
|
20
|
use List::AllUtils qw(sum0);
|
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
131
|
|
9
|
4
|
|
|
4
|
|
1824
|
use Statistics::Data::Rank;
|
|
4
|
|
|
|
|
10755
|
|
|
4
|
|
|
|
|
114
|
|
10
|
4
|
|
|
4
|
|
20
|
use Statistics::Lite qw(count max);
|
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
3528
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Statistics::ANOVA::JT - Jonckheere-Terpstra statistics and test
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 VERSION
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Version 0.01
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=cut
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $VERSION = '0.01';
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
use Statistics::ANOVA::JT;
|
27
|
|
|
|
|
|
|
my $jt = Statistics::ANOVA::JT->new();
|
28
|
|
|
|
|
|
|
$jt->load({1 => [2, 4, 6], 2 => [3, 3, 12], 3 => [5, 7, 11, 16]}); # note ordinal datanames
|
29
|
|
|
|
|
|
|
my $j_value = $jt->observed(); # or expected(), variance()
|
30
|
|
|
|
|
|
|
my ($z_value, $p_value) = $jt->zprob_test(ccorr => 2, tails => 1, correct_ties => 1);
|
31
|
|
|
|
|
|
|
# or without pre-loading:
|
32
|
|
|
|
|
|
|
$j_value = $jt->observed(data => {1 => [2, 4, 6], 2 => [5, 3, 12]});
|
33
|
|
|
|
|
|
|
# or for subset of loaded data:
|
34
|
|
|
|
|
|
|
$j_value = $jt->observed(lab => [1, 3]);
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
Calculates Jonckheere-Terpstra statistics for sameness (common population) across given orders of independent variables. The statistics are based on a between-groups pooled ranking of the data, like the Kruskal-Wallis test, but, unlike Kruskall-Wallis that returns the same result regardless of order of levels, it takes into account ordinal value of the named data. As ordinal values, numerical intervals between the named values do not matter.
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Data-loading and retrieval are as provided in L, on which the JT object is Cd, so its other methods are available here.
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Return values are tested on installation against published examples: in Hollander and Wolfe (1999), for sample MStat output on L, and for the final I-value in the L example.
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head2 new
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
$jt = Statistics::ANOVA::JT->new();
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
New object for accessing methods and storing results. This "isa" Statistics::Data object.
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 observed
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
$val = $jt->observed(); # data pre-loaded
|
55
|
|
|
|
|
|
|
$val = $jt->observed(data => $hashref_of_arefs);
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Returns the statistic I: From between-group rankings of all possible pairwise splits of the data, accumulates I as the sum of I(I - 1)/2 Mann-Whitney I counts.
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Optionally, if the data have not been pre-loaded, send as named argument B.
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=cut
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub observed {
|
64
|
2
|
|
|
2
|
1
|
926
|
my ( $self, %args ) = @_;
|
65
|
2
|
|
|
|
|
7
|
return _calc_j_value( _get_data($self, %args) );
|
66
|
|
|
|
|
|
|
}
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 expected
|
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$val = $jt->expected(); # data pre-loaded
|
71
|
|
|
|
|
|
|
$val = $jt->expected(data => $hashref_of_arefs);
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Returns the expected value of the I statistic for the given data.
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=cut
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub expected {
|
78
|
3
|
|
|
3
|
1
|
1350
|
my ( $self, %args ) = @_;
|
79
|
3
|
|
|
|
|
8
|
return _calc_jexp( _get_data($self, %args) );
|
80
|
|
|
|
|
|
|
}
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 variance
|
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
$val = $jt->variance(); # data pre-loaded
|
85
|
|
|
|
|
|
|
$val = $jt->variance(data => $hashref_of_arefs);
|
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Return the variance expected to occur in the I values for the given data.
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
By default, the method accounts for and corrects for ties, but if C = 0, the returned value is the usual "null" distribution variance, otherwise with an elaborate correction accounting for the number of tied variables and each of their sizes, as offered by Hollander & Wolfe (1999) Eq 6.19, p. 204.
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=cut
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub variance {
|
94
|
2
|
|
|
2
|
1
|
823
|
my ( $self, %args ) = @_;
|
95
|
2
|
100
|
66
|
|
|
14
|
if ( defined $args{'correct_ties'} and $args{'correct_ties'} == 0 ) {
|
96
|
1
|
|
|
|
|
3
|
return _calc_jvar_ig_ties( _get_data($self, %args) );
|
97
|
|
|
|
|
|
|
}
|
98
|
|
|
|
|
|
|
else {
|
99
|
1
|
|
|
|
|
3
|
return _calc_jvar_by_ties( _get_data($self, %args) );
|
100
|
|
|
|
|
|
|
}
|
101
|
|
|
|
|
|
|
}
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 zprob_test
|
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
$p_val = $jt->zprob_test(); # data pre-loaded
|
106
|
|
|
|
|
|
|
$p_val = $jt->zprob_test(data => $hashref_of_arefs);
|
107
|
|
|
|
|
|
|
($z_val, $p_val) = $jt->zprob_test(); # get z-score too
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Performs a z-test on the data and returns the associated probability; or, if called in array context, the z-value itself and then the probability value.
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Rather than calculating the exact I -value, calculates an expected I value and variance, to provide a normalized I for which the I-value is read off the normal distribution. This is appropriate for "large" samples, e.g., greater-than 3 levels, with more than eight observations per level. Otherwise, read the value returned from C<$jt-Eobserved()> and look it up in a table of I-values, such as in Hollander & Wolfe (1999), p. 649ff.
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Optional arguments include B (as above), and B and B as in L. For example, to continuity correct by reducing the observed I-value by 1 (recommended in some texts), set B => 2 (for half on either side of the expected value; if B => 1, then 0.5 is taken off the observed deviation, and so on). The default is not to continuity correct.
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub zprob_test {
|
118
|
3
|
|
|
3
|
1
|
1492
|
my ( $self, %args ) = @_;
|
119
|
3
|
|
|
|
|
14
|
my $href = _get_data($self, %args);
|
120
|
3
|
|
|
|
|
1468
|
require Statistics::Zed;
|
121
|
3
|
|
|
|
|
20816
|
my $zed = Statistics::Zed->new();
|
122
|
3
|
100
|
66
|
|
|
80
|
my ( $z_value, $p_value ) = $zed->z_value(
|
123
|
|
|
|
|
|
|
observed => _calc_j_value($href),
|
124
|
|
|
|
|
|
|
expected => _calc_jexp($href),
|
125
|
|
|
|
|
|
|
variance =>
|
126
|
|
|
|
|
|
|
( defined $args{'correct_ties'} and $args{'correct_ties'} == 0 )
|
127
|
|
|
|
|
|
|
? _calc_jvar_ig_ties($href)
|
128
|
|
|
|
|
|
|
: _calc_jvar_by_ties($href),
|
129
|
|
|
|
|
|
|
%args
|
130
|
|
|
|
|
|
|
);
|
131
|
3
|
50
|
|
|
|
416
|
return wantarray ? ( $z_value, $p_value ) : $p_value;
|
132
|
|
|
|
|
|
|
}
|
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub _calc_j_value {
|
135
|
5
|
|
|
5
|
|
7
|
my $data = shift;
|
136
|
5
|
|
|
|
|
26
|
my $rankd = Statistics::Data::Rank->new();
|
137
|
5
|
|
|
|
|
35
|
my $j_value = 0;
|
138
|
5
|
|
|
|
|
6
|
for my $pairs ( combinations( [ keys %{$data} ], 2 ) ) {
|
|
5
|
|
|
|
|
29
|
|
139
|
15
|
|
|
|
|
543
|
my ( $p1, $p2 ) = @{$pairs};
|
|
15
|
|
|
|
|
24
|
|
140
|
15
|
|
|
|
|
64
|
my $ranks_href = $rankd->ranks_between(
|
141
|
|
|
|
|
|
|
data => { $p1 => $data->{$p1}, $p2 => $data->{$p2} } );
|
142
|
15
|
|
|
|
|
39
|
my %counts = (
|
143
|
15
|
|
|
|
|
51
|
$p1 => count( @{ $data->{$p1} } ),
|
144
|
15
|
|
|
|
|
1725
|
$p2 => count( @{ $data->{$p2} } )
|
145
|
|
|
|
|
|
|
);
|
146
|
15
|
|
|
|
|
45
|
my $nprod = $counts{$p1} * $counts{$p2};
|
147
|
15
|
|
|
|
|
16
|
my @us = ();
|
148
|
15
|
|
|
|
|
20
|
for ( $p1, $p2 ) {
|
149
|
30
|
|
|
|
|
24
|
my $n = $counts{$_};
|
150
|
30
|
|
|
|
|
86
|
push @us,
|
151
|
|
|
|
|
|
|
$nprod +
|
152
|
|
|
|
|
|
|
( ( $n * ( $n + 1 ) ) / 2 ) -
|
153
|
30
|
|
|
|
|
39
|
sum0( @{ $ranks_href->{$_} } );
|
154
|
|
|
|
|
|
|
}
|
155
|
15
|
|
|
|
|
33
|
$j_value += max(@us);
|
156
|
|
|
|
|
|
|
}
|
157
|
5
|
|
|
|
|
70
|
return $j_value;
|
158
|
|
|
|
|
|
|
}
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub _calc_jexp {
|
161
|
6
|
|
|
6
|
|
9
|
my ($data) = @_;
|
162
|
6
|
|
|
|
|
10
|
my ( $nj, $sum_n ) = ( 0, 0 );
|
163
|
6
|
|
|
|
|
5
|
for ( keys %{$data} ) {
|
|
6
|
|
|
|
|
16
|
|
164
|
18
|
|
|
|
|
29
|
my $n = count( @{ $data->{$_} } );
|
|
18
|
|
|
|
|
39
|
|
165
|
18
|
|
|
|
|
31
|
$sum_n += $n;
|
166
|
18
|
|
|
|
|
23
|
$nj += $n**2;
|
167
|
|
|
|
|
|
|
}
|
168
|
6
|
|
|
|
|
40
|
return ( $sum_n**2 - $nj ) / 4;
|
169
|
|
|
|
|
|
|
}
|
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _calc_jvar_ig_ties {
|
172
|
2
|
|
|
2
|
|
2
|
my $data = shift;
|
173
|
2
|
|
|
|
|
3
|
my ( $sum_n, $v ) = ( 0, 0 );
|
174
|
2
|
|
|
|
|
2
|
for ( keys %{$data} ) {
|
|
2
|
|
|
|
|
5
|
|
175
|
6
|
|
|
|
|
5
|
my $n = count( @{ $data->{$_} } );
|
|
6
|
|
|
|
|
14
|
|
176
|
6
|
|
|
|
|
9
|
$sum_n += $n;
|
177
|
6
|
|
|
|
|
11
|
$v += $n**2 * ( 2 * $n + 3 );
|
178
|
|
|
|
|
|
|
}
|
179
|
2
|
|
|
|
|
8
|
return ( $sum_n**2 * ( 2 * $sum_n + 3 ) - $v ) / 72;
|
180
|
|
|
|
|
|
|
}
|
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub _calc_jvar_by_ties {
|
183
|
3
|
|
|
3
|
|
4
|
my $data = shift;
|
184
|
3
|
|
|
|
|
4
|
my $sum_n = 0;
|
185
|
3
|
|
|
|
|
5
|
my @ns = ();
|
186
|
3
|
|
|
|
|
4
|
for ( keys %{$data} ) {
|
|
3
|
|
|
|
|
8
|
|
187
|
9
|
|
|
|
|
9
|
my $n = count( @{ $data->{$_} } );
|
|
9
|
|
|
|
|
14
|
|
188
|
9
|
|
|
|
|
18
|
$sum_n += $n;
|
189
|
9
|
|
|
|
|
8
|
push @ns, $n;
|
190
|
|
|
|
|
|
|
}
|
191
|
3
|
|
|
|
|
4
|
my $ng = scalar( keys( %{$data} ) );
|
|
3
|
|
|
|
|
5
|
|
192
|
3
|
|
|
|
|
13
|
my $rankd = Statistics::Data::Rank->new();
|
193
|
3
|
|
|
|
|
25
|
my ( $uranks_href, $xtied, $gn ) =
|
194
|
|
|
|
|
|
|
$rankd->ranks_between( data => $data );
|
195
|
3
|
|
|
|
|
705
|
my $a2 = sum0( map { $_ * ( $_ - 1 ) * ( 2 * $_ + 5 ) } @ns );
|
|
9
|
|
|
|
|
29
|
|
196
|
3
|
|
|
|
|
5
|
my $a3 = sum0( map { $_ * ( $_ - 1 ) * ( 2 * $_ + 5 ) } @{$xtied} );
|
|
32
|
|
|
|
|
35
|
|
|
3
|
|
|
|
|
6
|
|
197
|
3
|
|
|
|
|
5
|
my $b2 = sum0( map { $_ * ( $_ - 1 ) * ( $_ - 2 ) } @ns );
|
|
9
|
|
|
|
|
14
|
|
198
|
3
|
|
|
|
|
5
|
my $b3 = sum0( map { $_ * ( $_ - 1 ) * ( $_ - 2 ) } @{$xtied} );
|
|
32
|
|
|
|
|
37
|
|
|
3
|
|
|
|
|
5
|
|
199
|
3
|
|
|
|
|
7
|
my $c2 = sum0( map { $_ * ( $_ - 1 ) } @ns );
|
|
9
|
|
|
|
|
15
|
|
200
|
3
|
|
|
|
|
5
|
my $c3 = sum0( map { $_ * ( $_ - 1 ) } @{$xtied} );
|
|
32
|
|
|
|
|
45
|
|
|
3
|
|
|
|
|
3
|
|
201
|
3
|
|
|
|
|
37
|
return ( 1 / 72 * ( $gn * ( $gn - 1 ) * ( 2 * $gn + 5 ) - $a2 - $a3 ) ) +
|
202
|
|
|
|
|
|
|
( 1 / ( 36 * $gn * ( $gn - 1 ) * ( $gn - 2 ) ) * $b2 * $b3 ) +
|
203
|
|
|
|
|
|
|
( 1 / ( 8 * $gn * ( $gn - 1 ) ) * $c2 * $c3 );
|
204
|
|
|
|
|
|
|
}
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub _get_data {
|
207
|
10
|
|
|
10
|
|
14
|
my ($self, %args) = @_;
|
208
|
10
|
|
|
|
|
9
|
my $hoa;
|
209
|
10
|
100
|
|
|
|
23
|
if (ref $args{'data'}) {
|
210
|
2
|
|
|
|
|
4
|
$hoa = delete $args{'data'};
|
211
|
|
|
|
|
|
|
}
|
212
|
|
|
|
|
|
|
else {
|
213
|
8
|
|
|
|
|
32
|
$hoa = $self->get_hoa_by_lab(%args);
|
214
|
|
|
|
|
|
|
}
|
215
|
10
|
|
|
|
|
315
|
return $hoa;
|
216
|
|
|
|
|
|
|
}
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head1 REFERENCES
|
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Hollander, M., & Wolfe, D. A. (1999). I. New York, NY, US: Wiley.
|
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head1 DEPENDENCIES
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
L : used as a C for caching and retrieving data.
|
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
L : used to implement between-sample ranking.
|
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
L : for z-testing with optional continuity correction and tailing.
|
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
L : provides the C algorithm to provide all possible pairs of data-names to loop thru in calculating the observed I value.
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
L : provides the handy sum0() function
|
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 BUGS
|
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through
|
237
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll
|
238
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes.
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head1 SUPPORT
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command.
|
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
perldoc Statistics::ANOVA::JT
|
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
You can also look for information at:
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=over 4
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here)
|
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
L
|
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation
|
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
L
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=item * CPAN Ratings
|
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
L
|
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=item * Search CPAN
|
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
L
|
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=back
|
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head1 AUTHOR
|
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Roderick Garton, C<< >>
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Copyright 2015 Roderick Garton.
|
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it
|
277
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published
|
278
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License.
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
See L for more information.
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=cut
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
1; # End of Statistics::ANOVA::JT
|