| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Statistics::MVA::HotellingTwoSample; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
26219
|
use base Statistics::MVA; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
1045
|
|
|
4
|
1
|
|
|
1
|
|
131028
|
use warnings; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
36
|
|
|
5
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
36
|
|
|
6
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
81
|
|
|
7
|
1
|
|
|
1
|
|
8103
|
use Statistics::Distributions qw/fprob/; |
|
|
1
|
|
|
|
|
9621
|
|
|
|
1
|
|
|
|
|
121
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Statistics::MVA::HotellingTwoSample - Two-Sample Hotelling's T-Square Test Statistic. |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=cut |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 VERSION |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
This document describes Statistics::MVA::HotellingTwoSample version 0.0.2 |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Statistics::MVA::HotellingTwoSample; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# we have two groups of data each with 4 variables and 9 observations. |
|
26
|
|
|
|
|
|
|
my $data_x = [ |
|
27
|
|
|
|
|
|
|
[qw/ 292 222 52 57/], |
|
28
|
|
|
|
|
|
|
[qw/ 100 227 51 45/], |
|
29
|
|
|
|
|
|
|
[qw/ 272 218 49 36/], |
|
30
|
|
|
|
|
|
|
[qw/ 101 221 17 47/], |
|
31
|
|
|
|
|
|
|
[qw/ 181 208 12 35/], |
|
32
|
|
|
|
|
|
|
[qw/ 111 118 51 54/], |
|
33
|
|
|
|
|
|
|
[qw/ 288 321 51 49/], |
|
34
|
|
|
|
|
|
|
[qw/ 286 219 52 45/], |
|
35
|
|
|
|
|
|
|
[qw/ 262 225 47 44/], |
|
36
|
|
|
|
|
|
|
]; |
|
37
|
|
|
|
|
|
|
my $data_y = [ |
|
38
|
|
|
|
|
|
|
[qw/ 286 107 29 62/], |
|
39
|
|
|
|
|
|
|
[qw/ 311 122 29 63/], |
|
40
|
|
|
|
|
|
|
[qw/ 272 131 52 86/], |
|
41
|
|
|
|
|
|
|
[qw/ 182 88 23 69/], |
|
42
|
|
|
|
|
|
|
[qw/ 211 118 61 57/], |
|
43
|
|
|
|
|
|
|
[qw/ 323 127 51 79/], |
|
44
|
|
|
|
|
|
|
[qw/ 385 332 70 63/], |
|
45
|
|
|
|
|
|
|
[qw/ 373 127 85 60/], |
|
46
|
|
|
|
|
|
|
[qw/ 408 95 57 71/], |
|
47
|
|
|
|
|
|
|
]; |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Create a Statistics::MVA::HotellingTwoSample object and pass the data as two Lists-of-Lists within an anonymous array. |
|
50
|
|
|
|
|
|
|
my $mva = Statistics::MVA::HotellingTwoSample->new([ $data_x, $data_y ]); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Generate results and print a report to STDOUT by calling hotelling_two_sample in void context. |
|
53
|
|
|
|
|
|
|
$mva->hotelling_two_sample; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Call hotelling_two_sample in LIST-context to access the results directly. |
|
56
|
|
|
|
|
|
|
my ($T2, $F, $pval, $df1, $df2) = $mva->hotelling_two_sample; |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Hotelling's T-square statistics is a generalisation of Student's t statistic that is used for multivariate hypothesis |
|
63
|
|
|
|
|
|
|
testing. See http://en.wikipedia.org/wiki/Hotelling%27s_T-square_distribution. |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=cut |
|
66
|
|
|
|
|
|
|
|
|
67
|
1
|
|
|
1
|
|
15
|
use version; our $VERSION = qv('0.0.2'); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
14
|
|
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub hotelling_two_sample { |
|
70
|
|
|
|
|
|
|
|
|
71
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
72
|
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
my $k = $self->[1]; |
|
74
|
0
|
0
|
|
|
|
|
croak qq{\nThis is a two-sample test - you must give two samples} if $k != 2; |
|
75
|
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
my $n_x = $self->[0][0][1]; |
|
77
|
0
|
|
|
|
|
|
my $n_y = $self->[0][1][1]; |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
#y just variable number - again will need to check its equal for sample 2 - Statistics::MVA already checked p is same for all matrices... |
|
81
|
0
|
|
|
|
|
|
my $p = $self->[2]; |
|
82
|
|
|
|
|
|
|
#print qq{\ntesting $n_x, $n_y and $p}; |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
#y Just averages of for each variable - naughty changed interface to MVA |
|
85
|
|
|
|
|
|
|
#my @bar_x = &_bar($self->[3][0]); |
|
86
|
|
|
|
|
|
|
#my @bar_y = &_bar($self->[3][1]); |
|
87
|
0
|
|
|
|
|
|
my @bar_x = &_bar($self->[0][0][2]); |
|
88
|
0
|
|
|
|
|
|
my @bar_y = &_bar($self->[0][1][2]); |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#y covariance matrices - this is already done!!! as part of MVA object creation |
|
91
|
0
|
|
|
|
|
|
my $V_x = $self->[0][0][0]; |
|
92
|
0
|
|
|
|
|
|
my $V_y = $self->[0][1][0]; |
|
93
|
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
|
my $V_x_adj = $V_x->shadow; |
|
95
|
0
|
|
|
|
|
|
my $V_y_adj = $V_y->shadow; |
|
96
|
0
|
|
|
|
|
|
my $V_p = $V_x->shadow; |
|
97
|
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
$V_x_adj->multiply_scalar($V_x,$n_x-1); |
|
99
|
0
|
|
|
|
|
|
$V_y_adj->multiply_scalar($V_y,$n_y-1); |
|
100
|
0
|
|
|
|
|
|
$V_p->add($V_x_adj,$V_y_adj); |
|
101
|
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
my $divisor = 1 / ($n_x + $n_y - 2); |
|
103
|
0
|
|
|
|
|
|
$V_p->multiply_scalar($V_p,$divisor); |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
#y subtract means... |
|
106
|
0
|
|
|
|
|
|
my @bar_diff = map { $bar_x[$_] - $bar_y[$_] } (0..$#bar_x); |
|
|
0
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
|
my $bar_diff_mat = Math::MatrixReal->new_from_cols([[@bar_diff]]); |
|
109
|
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
my $dim; |
|
111
|
|
|
|
|
|
|
my $x; |
|
112
|
0
|
|
|
|
|
|
my $B_matreal; |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
#/ using matrixreal - solve equation 'a %*% x = b' for 'x', - 'b' can be either a vector or a matrix. |
|
115
|
0
|
|
|
|
|
|
my $LR = $V_p->decompose_LR(); |
|
116
|
0
|
0
|
|
|
|
|
if (!(($dim,$x,$B_matreal) = $LR->solve_LR($bar_diff_mat))) { croak qq{\nsystem has no solution} } |
|
|
0
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
#print qq{\n\nhere is the matrixreal solution\n}, $x; |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
#/ using Cephes |
|
120
|
|
|
|
|
|
|
#my $M = Math::Cephes::Matrix->new($V_p->[0]); |
|
121
|
|
|
|
|
|
|
#my $B = [@bar_diff]; |
|
122
|
|
|
|
|
|
|
#my $solution = $M->simq($B); |
|
123
|
|
|
|
|
|
|
#print qq{\nhere is the cephes solution @{$solution}\n}; |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $crossprod = ~$bar_diff_mat * $x; |
|
126
|
0
|
|
|
|
|
|
my $df1 = $p; |
|
127
|
0
|
|
|
|
|
|
my $df2 = $n_x + $n_y-$p-1; |
|
128
|
0
|
|
|
|
|
|
my $other = $n_x + $n_y - 2; |
|
129
|
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $T2 = ($crossprod->[0][0][0] * $df2 * (($n_x * $n_y)/($n_x+$n_y)) * $other * $p) / (($n_x+$n_y-2) * $p * $df2); |
|
131
|
0
|
|
|
|
|
|
my $F = ( $df2 * $T2 ) / ($other * $df1); |
|
132
|
0
|
|
|
|
|
|
my $pval = &fprob($df1,$df2,$F); |
|
133
|
0
|
0
|
|
|
|
|
$pval = $pval < 1e-8 ? q{< 1e-8} : $pval; |
|
134
|
|
|
|
|
|
|
|
|
135
|
0
|
0
|
|
|
|
|
if ( !wantarray ) { print qq{\nT^2 = $T2\nF = $F \ndf1 = $df1\ndf2 = $df2 \np.value = $pval}; } |
|
|
0
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
|
else { return ( $T2, $F, $pval, $df1, $df2 ) } |
|
137
|
0
|
|
|
|
|
|
return; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub _bar { |
|
141
|
0
|
|
|
0
|
|
|
my $lol = shift; |
|
142
|
0
|
|
|
|
|
|
my $rows = scalar @{$lol}; |
|
|
0
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
my $cols = scalar @{$lol->[0]}; |
|
|
0
|
|
|
|
|
|
|
|
144
|
0
|
|
|
|
|
|
my @bar; |
|
145
|
0
|
|
|
|
|
|
for my $col (0..$cols-1) { |
|
146
|
0
|
|
|
|
|
|
my $sum = 0; |
|
147
|
0
|
|
|
|
|
|
for my $row (0..$rows-1) { |
|
148
|
0
|
|
|
|
|
|
$sum += $lol->[$row][$col]; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
0
|
|
|
|
|
|
push @bar, $sum/$rows; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
0
|
|
|
|
|
|
$, = q{, }; |
|
153
|
0
|
|
|
|
|
|
return @bar; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
__END__ |