line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Statistics::Gtest; |
4
|
|
|
|
|
|
|
|
5
|
4
|
|
|
4
|
|
131638
|
use strict; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
169
|
|
6
|
4
|
|
|
4
|
|
21
|
use vars qw($VERSION); |
|
4
|
|
|
|
|
9
|
|
|
4
|
|
|
|
|
211
|
|
7
|
4
|
|
|
4
|
|
24
|
use Carp; |
|
4
|
|
|
|
|
13
|
|
|
4
|
|
|
|
|
481
|
|
8
|
4
|
|
|
4
|
|
3793
|
use IO::File; |
|
4
|
|
|
|
|
52907
|
|
|
4
|
|
|
|
|
10604
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
$VERSION = '0.07'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $self; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub new { |
15
|
10
|
|
|
10
|
0
|
8350
|
my ($type, $data) = @_; |
16
|
10
|
|
|
|
|
197
|
my $datahandle; |
17
|
10
|
50
|
|
|
|
41
|
if (! $data) { |
18
|
0
|
|
|
|
|
0
|
_error(1); |
19
|
|
|
|
|
|
|
} |
20
|
10
|
50
|
|
|
|
44
|
if (@_ eq 3) { |
21
|
0
|
|
|
|
|
0
|
_error(2); |
22
|
|
|
|
|
|
|
} |
23
|
10
|
50
|
|
|
|
43
|
if ($datahandle = _getHandle($data)) { |
24
|
10
|
|
|
|
|
25
|
$self = {}; |
25
|
10
|
|
|
|
|
112
|
_initialize($datahandle); |
26
|
10
|
|
|
|
|
32
|
bless($self, $type); |
27
|
10
|
|
|
|
|
34
|
$self; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
else { |
30
|
0
|
|
|
|
|
0
|
_error(3); |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub getG { |
35
|
10
|
|
|
10
|
1
|
22
|
my $self = shift; |
36
|
10
|
|
|
|
|
29
|
$self->{'G'} = $self->getRawG() / $self->getQ(); |
37
|
10
|
|
|
|
|
51
|
$self->{'G'}; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub getRawG { |
41
|
20
|
|
|
20
|
1
|
38
|
my $self = shift; |
42
|
20
|
|
|
|
|
56
|
$self->{'logsum'} = _sumCellOp(); |
43
|
20
|
|
|
|
|
52
|
$self->{'G'} = 2 * $self->{'logsum'}; |
44
|
20
|
|
|
|
|
77
|
2 * $self->{'logsum'}; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub getQ { |
48
|
20
|
|
|
20
|
1
|
1611
|
my $self = shift; |
49
|
20
|
|
|
|
|
56
|
$self->{'q'} = _williamsC(); |
50
|
20
|
|
|
|
|
92
|
$self->{'q'}; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub setExpected { |
54
|
3
|
|
|
3
|
1
|
6
|
my ($self, $expData) = @_; |
55
|
3
|
|
|
|
|
7
|
my $datahandle; |
56
|
|
|
|
|
|
|
my @expect; |
57
|
3
|
|
|
|
|
4
|
my $expectTotal = 0; |
58
|
3
|
50
|
|
|
|
13
|
if (@_ eq 3) { |
59
|
0
|
|
|
|
|
0
|
_error(2); |
60
|
|
|
|
|
|
|
} |
61
|
3
|
50
|
|
|
|
12
|
if ($datahandle = _getHandle($expData)) { |
62
|
3
|
|
|
|
|
7
|
foreach my $row_ref (@{$datahandle}) { |
|
3
|
|
|
|
|
9
|
|
63
|
3
|
50
|
|
|
|
10
|
if (!ref $row_ref) { |
64
|
0
|
|
|
|
|
0
|
_error(4); |
65
|
|
|
|
|
|
|
} |
66
|
3
|
|
|
|
|
5
|
my @row = @{$row_ref}; |
|
3
|
|
|
|
|
11
|
|
67
|
3
|
|
|
|
|
7
|
foreach my $cell (@row) { |
68
|
12
|
|
|
|
|
22
|
_checkNumValidity($cell); |
69
|
12
|
|
|
|
|
21
|
push(@expect, $cell); |
70
|
12
|
|
|
|
|
20
|
$expectTotal += $cell; |
71
|
|
|
|
|
|
|
} |
72
|
3
|
|
|
|
|
7
|
$self->{'expected'} = \@expect; |
73
|
|
|
|
|
|
|
# Expected values no longer intrinsic to observed data, so set |
74
|
|
|
|
|
|
|
# flag to 0. |
75
|
3
|
|
|
|
|
15
|
$self->{'intrinsic'} = 0; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
else { |
79
|
0
|
|
|
|
|
0
|
_error(3); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
# Data sanity checks |
82
|
3
|
50
|
|
|
|
20
|
if ($expectTotal != $self->{'sumtotal'}) { |
83
|
0
|
|
|
|
|
0
|
warn "Warning: Total of expected values ($expectTotal) does not ", |
84
|
|
|
|
|
|
|
"equal total of observed values (",$self->{'sumtotal'},").\nThis ", |
85
|
|
|
|
|
|
|
"will invalidate the test unless the discrepancy is very minor ", |
86
|
|
|
|
|
|
|
"\n(e.g., the result of rounding error).\n"; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub getObserved { |
91
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
92
|
0
|
|
|
|
|
0
|
my $exp = _formatData($self->{'observed'}); |
93
|
0
|
|
|
|
|
0
|
$exp; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub getExpected { |
97
|
6
|
|
|
6
|
1
|
16
|
my $self = shift; |
98
|
6
|
|
|
|
|
29
|
my $exp = _formatData($self->{'expected'}); |
99
|
6
|
|
|
|
|
38
|
$exp; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub getDF { |
103
|
10
|
|
|
10
|
1
|
23671
|
my $self = shift; |
104
|
10
|
|
|
|
|
97
|
$self->{'df'}; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub setDF { |
108
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
109
|
0
|
|
|
|
|
0
|
$self->{'df'} = shift; |
110
|
|
|
|
|
|
|
# Degrees of freedom set externally. |
111
|
0
|
|
|
|
|
0
|
$self->{'df_extern'} = 1; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub getDFstate { |
115
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
116
|
0
|
|
|
|
|
0
|
$self->{'df_extern'}; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub getHypothesis { |
120
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
121
|
0
|
|
|
|
|
0
|
$self->{'intrinsic'}; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub getRow { |
125
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
126
|
0
|
|
|
|
|
0
|
my $idx = shift; |
127
|
0
|
0
|
|
|
|
0
|
if ($idx > ($self->{'nrows'} - 1)) { |
128
|
0
|
|
|
|
|
0
|
return; |
129
|
|
|
|
|
|
|
} |
130
|
0
|
|
|
|
|
0
|
my @row = (); |
131
|
0
|
|
|
|
|
0
|
my $skip = $self->{'ncols'}; |
132
|
0
|
|
|
|
|
0
|
my $offset = $idx * $skip; |
133
|
0
|
|
|
|
|
0
|
my $final = $offset + $skip; |
134
|
0
|
|
|
|
|
0
|
for my $cell ($offset .. $final - 1) { |
135
|
0
|
|
|
|
|
0
|
push(@row, $self->{'observed'}->[$cell]); |
136
|
|
|
|
|
|
|
} |
137
|
0
|
|
|
|
|
0
|
\@row; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub getCol { |
141
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
142
|
0
|
|
|
|
|
0
|
my $idx = shift; |
143
|
0
|
0
|
|
|
|
0
|
if ($idx > ($self->{'ncols'} - 1)) { |
144
|
0
|
|
|
|
|
0
|
return; |
145
|
|
|
|
|
|
|
} |
146
|
0
|
|
|
|
|
0
|
my @col = (); |
147
|
0
|
|
|
|
|
0
|
my $skip = $self->{'ncols'}; |
148
|
0
|
|
|
|
|
0
|
my $max = $skip * $self->{'nrows'}; |
149
|
0
|
|
|
|
|
0
|
for (my $cell = $idx; $cell < $max; $cell += $skip) { |
150
|
0
|
|
|
|
|
0
|
push(@col,$self->{'observed'}->[$cell]); |
151
|
|
|
|
|
|
|
} |
152
|
0
|
|
|
|
|
0
|
\@col; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub rowSum { |
156
|
20
|
|
|
20
|
1
|
45
|
my $self = shift; |
157
|
20
|
|
|
|
|
38
|
my $idx = shift; |
158
|
20
|
|
|
|
|
127
|
$self->{'rowsums'}->[$idx]; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub colSum { |
162
|
20
|
|
|
20
|
1
|
38
|
my $self = shift; |
163
|
20
|
|
|
|
|
31
|
my $idx = shift; |
164
|
20
|
|
|
|
|
105
|
$self->{'colsums'}->[$idx]; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub getRowNum { |
168
|
10
|
|
|
10
|
1
|
30
|
my $self = shift; |
169
|
10
|
|
|
|
|
53
|
$self->{'nrows'}; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub getColNum { |
173
|
10
|
|
|
10
|
1
|
23
|
my $self = shift; |
174
|
10
|
|
|
|
|
60
|
$self->{'ncols'}; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub getSumTotal { |
178
|
10
|
|
|
10
|
1
|
24
|
my $self = shift; |
179
|
10
|
|
|
|
|
56
|
$self->{'sumtotal'}; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# private methods |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# Do all the initialization work: verify the data and write it into the |
185
|
|
|
|
|
|
|
# object; calculate the row and column sums and grand total; determine |
186
|
|
|
|
|
|
|
# the format of the data table (rows x columns); calculate the likely |
187
|
|
|
|
|
|
|
# degrees of freedom and generate default expected values. Set state |
188
|
|
|
|
|
|
|
# flags indicating that the expected values are 'intrinsic' (generated |
189
|
|
|
|
|
|
|
# from the observed data), and that the degrees of freedom are also |
190
|
|
|
|
|
|
|
# based on table characteristics. |
191
|
|
|
|
|
|
|
sub _initialize { |
192
|
10
|
|
|
10
|
|
20
|
my $datahandle = shift; |
193
|
10
|
|
|
|
|
20
|
my @datastruct = (); |
194
|
10
|
|
|
|
|
16
|
my @rowsums = (); |
195
|
10
|
|
|
|
|
18
|
my @sumcols = (); |
196
|
10
|
|
|
|
|
17
|
my ($ncols, $nrows, $sumtotal); |
197
|
10
|
|
|
|
|
43
|
my $totalN = 0; |
198
|
10
|
|
|
|
|
33
|
$self->{'colsums'} = (); |
199
|
|
|
|
|
|
|
|
200
|
10
|
|
|
|
|
24
|
foreach my $row_ref (@{$datahandle}) { |
|
10
|
|
|
|
|
25
|
|
201
|
17
|
50
|
|
|
|
53
|
if (!ref $row_ref) { |
202
|
0
|
|
|
|
|
0
|
_error(4); |
203
|
|
|
|
|
|
|
} |
204
|
17
|
|
|
|
|
23
|
my @row = @{$row_ref}; |
|
17
|
|
|
|
|
58
|
|
205
|
17
|
|
|
|
|
27
|
$ncols = scalar @row; |
206
|
17
|
|
|
|
|
22
|
my $i = 0; |
207
|
17
|
|
|
|
|
30
|
foreach my $cell (@row) { |
208
|
46
|
|
|
|
|
87
|
_checkNumValidity($cell); |
209
|
46
|
|
|
|
|
72
|
push(@datastruct, $cell); |
210
|
46
|
|
|
|
|
80
|
$sumtotal += $cell; |
211
|
46
|
|
|
|
|
96
|
$self->{'colsums'}->[$i] += $cell; |
212
|
46
|
|
|
|
|
70
|
$i++; |
213
|
46
|
|
|
|
|
96
|
$totalN++; |
214
|
|
|
|
|
|
|
} |
215
|
17
|
|
|
|
|
27
|
$nrows++; |
216
|
17
|
|
|
|
|
46
|
push(@rowsums, _rowsum(\@row)); |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
# Data sanity checks; make sure data exist and are internally |
220
|
|
|
|
|
|
|
# consistent. |
221
|
10
|
50
|
|
|
|
34
|
if ($totalN == 0) { |
222
|
0
|
|
|
|
|
0
|
_error(5); |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
10
|
50
|
|
|
|
34
|
if ($totalN != ($nrows * $ncols)) { |
226
|
0
|
|
|
|
|
0
|
_error(6); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
10
|
50
|
33
|
|
|
82
|
if (($ncols == 0) || ($nrows == 0)) { |
230
|
0
|
|
|
|
|
0
|
_error(7); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
10
|
|
|
|
|
30
|
my $tSum = 0; |
234
|
10
|
|
|
|
|
27
|
foreach my $r (@rowsums) { |
235
|
17
|
|
|
|
|
32
|
$tSum += $r; |
236
|
|
|
|
|
|
|
} |
237
|
10
|
50
|
|
|
|
47
|
if ($sumtotal != $tSum) { |
238
|
0
|
|
|
|
|
0
|
_error(6); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
10
|
|
|
|
|
14
|
$tSum = 0; |
242
|
10
|
|
|
|
|
17
|
foreach my $r (@{$self->{'colsums'}}) { |
|
10
|
|
|
|
|
39
|
|
243
|
32
|
|
|
|
|
47
|
$tSum += $r; |
244
|
|
|
|
|
|
|
} |
245
|
10
|
50
|
|
|
|
39
|
if ($sumtotal != $tSum) { |
246
|
0
|
|
|
|
|
0
|
_error(6); |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
10
|
|
|
|
|
26
|
$self->{'observed'} = \@datastruct; |
250
|
10
|
|
|
|
|
25
|
$self->{'rowsums'} = \@rowsums; |
251
|
10
|
|
|
|
|
20
|
$self->{'nrows'} = $nrows; |
252
|
10
|
|
|
|
|
21
|
$self->{'ncols'} = $ncols; |
253
|
10
|
|
|
|
|
22
|
$self->{'sumtotal'} = $sumtotal; |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# tabletype: 0 = 2-way, 1 = single column, 2 = single row |
256
|
10
|
100
|
100
|
|
|
56
|
if (($ncols == 1) || ($nrows == 1)) { |
257
|
6
|
100
|
|
|
|
28
|
if ($ncols == 1) { |
258
|
1
|
|
|
|
|
3
|
$self->{'df'} = $nrows - 1; |
259
|
1
|
|
|
|
|
4
|
$self->{'tabletype'} = 1; |
260
|
|
|
|
|
|
|
} |
261
|
6
|
100
|
|
|
|
21
|
if ($nrows == 1) { |
262
|
5
|
|
|
|
|
14
|
$self->{'df'} = $ncols - 1; |
263
|
5
|
|
|
|
|
18
|
$self->{'tabletype'} = 2; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
else { |
267
|
4
|
|
|
|
|
14
|
$self->{'df'} = ($nrows - 1) * ($ncols - 1); |
268
|
4
|
|
|
|
|
13
|
$self->{'tabletype'} = 0; |
269
|
|
|
|
|
|
|
} |
270
|
10
|
|
|
|
|
35
|
$self->{'expected'}= _expected(\@datastruct); |
271
|
10
|
|
|
|
|
20
|
$self->{'intrinsic'} = 1; |
272
|
10
|
|
|
|
|
41
|
$self->{'df_extern'} = 0; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# Calculate and return William's correction, q. Calculation varies |
276
|
|
|
|
|
|
|
# depending on table format (1-way vs. 2-way). |
277
|
|
|
|
|
|
|
sub _williamsC { |
278
|
20
|
|
|
20
|
|
41
|
my $sumtotal = $self->{'sumtotal'}; |
279
|
20
|
|
|
|
|
43
|
my $rowsums = $self->{'rowsums'}; |
280
|
20
|
|
|
|
|
39
|
my $colsums = $self->{'colsums'}; |
281
|
20
|
100
|
|
|
|
94
|
if ($self->{'tabletype'} == 1) { |
|
|
100
|
|
|
|
|
|
282
|
2
|
|
|
|
|
12
|
$self->{'q'} = |
283
|
|
|
|
|
|
|
1 + ($self->{'nrows'} + 1) / (6 * $sumtotal); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
elsif ($self->{'tabletype'} == 2) { |
286
|
10
|
|
|
|
|
56
|
$self->{'q'} = |
287
|
|
|
|
|
|
|
1 + ($self->{'ncols'} + 1) / (6 * $sumtotal); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
else { |
290
|
8
|
|
|
|
|
21
|
my $recipRows = 0; |
291
|
8
|
|
|
|
|
13
|
my $recipCols = 0; |
292
|
8
|
|
|
|
|
16
|
foreach my $rval (@{$rowsums}) { |
|
8
|
|
|
|
|
19
|
|
293
|
16
|
|
|
|
|
43
|
$recipRows += (1 / $rval); |
294
|
|
|
|
|
|
|
} |
295
|
8
|
|
|
|
|
16
|
foreach my $cval (@{$colsums}) { |
|
8
|
|
|
|
|
18
|
|
296
|
22
|
|
|
|
|
85
|
$recipCols += (1 / $cval); |
297
|
|
|
|
|
|
|
} |
298
|
8
|
|
|
|
|
26
|
my $denom = 6 * $sumtotal * $self->{'df'}; |
299
|
8
|
|
|
|
|
49
|
my $num = ($sumtotal * $recipRows - 1) * ($sumtotal * $recipCols - 1); |
300
|
8
|
|
|
|
|
39
|
$self->{'q'} = 1 + $num / $denom; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# Generate the default expected values, based on a hypothesis of |
305
|
|
|
|
|
|
|
# independence (no association) between factors. |
306
|
|
|
|
|
|
|
# Return ref to expected array. |
307
|
|
|
|
|
|
|
sub _expected { |
308
|
10
|
|
|
10
|
|
17
|
my $data = shift; |
309
|
10
|
|
|
|
|
19
|
my @expected = (); |
310
|
10
|
|
|
|
|
19
|
my $rows = $self->{'nrows'}; |
311
|
10
|
|
|
|
|
20
|
my $cols = $self->{'ncols'}; |
312
|
10
|
|
|
|
|
14
|
my $sumtotal = $self->{'sumtotal'}; |
313
|
10
|
|
|
|
|
35
|
for my $row (0 .. $rows-1) { |
314
|
17
|
|
|
|
|
46
|
my $marginalRow = $self->{'rowsums'}->[$row] / $sumtotal; |
315
|
17
|
|
|
|
|
19
|
my @a; |
316
|
17
|
|
|
|
|
32
|
for my $col (0 .. $cols-1) { |
317
|
46
|
|
|
|
|
72
|
my $marginalCol = $self->{'colsums'}->[$col] / $sumtotal; |
318
|
46
|
|
|
|
|
114
|
push(@expected, ($marginalRow * $marginalCol * $sumtotal)); |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
} |
321
|
10
|
|
|
|
|
38
|
\@expected; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
# Calculate the log-likelihood statistic. |
325
|
|
|
|
|
|
|
# Return sum of Obs * ln(Obs/Exp) for all cells. |
326
|
|
|
|
|
|
|
sub _sumCellOp { |
327
|
20
|
|
|
20
|
|
59
|
my $logsum = 0; |
328
|
20
|
|
|
|
|
59
|
my $ncells = $self->{'nrows'} * $self->{'ncols'}; |
329
|
20
|
|
|
|
|
35
|
my $o = $self->{'observed'}; |
330
|
20
|
|
|
|
|
34
|
my $e = $self->{'expected'}; |
331
|
20
|
|
|
|
|
57
|
for my $cell (0 .. $ncells-1) { |
332
|
92
|
|
|
|
|
320
|
$logsum += $o->[$cell] * log($o->[$cell] / $e->[$cell]); |
333
|
|
|
|
|
|
|
} |
334
|
20
|
|
|
|
|
63
|
$logsum; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# Return sum of the cells in this row. |
338
|
|
|
|
|
|
|
sub _rowsum { |
339
|
17
|
|
|
17
|
|
24
|
my $row = shift; |
340
|
17
|
|
|
|
|
23
|
my $total = 0; |
341
|
17
|
|
|
|
|
43
|
foreach my $val (@{$row}) { |
|
17
|
|
|
|
|
29
|
|
342
|
46
|
|
|
|
|
61
|
$total += $val; |
343
|
|
|
|
|
|
|
} |
344
|
17
|
|
|
|
|
61
|
$total; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# Order the data array back into the format originally input. |
348
|
|
|
|
|
|
|
sub _formatData { |
349
|
6
|
|
|
6
|
|
9
|
my $ref = shift; |
350
|
6
|
|
|
|
|
13
|
my @data; |
351
|
6
|
|
|
|
|
10
|
my $len = scalar @{$ref}; |
|
6
|
|
|
|
|
13
|
|
352
|
6
|
|
|
|
|
22
|
my $rlen = $len / $self->{'nrows'}; |
353
|
6
|
|
|
|
|
14
|
my $idx = $rlen - 1; |
354
|
6
|
|
|
|
|
31
|
for (my $r=0; $r<$len; $r+=$rlen) { |
355
|
9
|
|
|
|
|
25
|
my $row = [ @{$ref}[$r ... $idx] ]; |
|
9
|
|
|
|
|
34
|
|
356
|
9
|
100
|
|
|
|
40
|
return $row if ($rlen eq $len); |
357
|
6
|
|
|
|
|
11
|
push(@data, $row); |
358
|
6
|
|
|
|
|
18
|
$idx += $rlen; |
359
|
|
|
|
|
|
|
} |
360
|
3
|
|
|
|
|
11
|
\@data; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Check to make sure the values are numbers, greater than 0. |
364
|
|
|
|
|
|
|
# Throw error if any problems are found. |
365
|
|
|
|
|
|
|
sub _checkNumValidity { |
366
|
58
|
|
|
58
|
|
82
|
my $value = shift; |
367
|
58
|
|
|
|
|
67
|
my $eString = "Invalid data."; |
368
|
58
|
50
|
|
|
|
261
|
if ($value !~ /^\d+\.?\d*$/) { |
369
|
0
|
|
|
|
|
0
|
_error(8); |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# Very low cell counts (< 5) aren't great, either. Maybe a |
373
|
|
|
|
|
|
|
# warning is in order. |
374
|
58
|
50
|
|
|
|
131
|
if ($value < 5) { |
375
|
0
|
|
|
|
|
0
|
warn "Warning: Very small cell frequency (freq. < 5) found.\n", |
376
|
|
|
|
|
|
|
"This will reduce the reliability of the test.\n"; |
377
|
|
|
|
|
|
|
} |
378
|
58
|
50
|
|
|
|
133
|
if ($value == 0) { |
379
|
0
|
|
|
|
|
0
|
_error(9); |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Take a filehandle, return an array reference. |
384
|
|
|
|
|
|
|
sub _fileread { |
385
|
6
|
|
|
6
|
|
13
|
my $data = shift; |
386
|
6
|
|
|
|
|
8
|
my @a; |
387
|
6
|
|
|
|
|
292
|
while (<$data>) { |
388
|
9
|
|
|
|
|
47
|
my @row = split(/\s+/,$_); |
389
|
9
|
|
|
|
|
64
|
push(@a, \@row); |
390
|
|
|
|
|
|
|
} |
391
|
6
|
|
|
|
|
122
|
\@a; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# Try to take whatever input format is thrown at us, and turn it into |
395
|
|
|
|
|
|
|
# an array reference, or just bail out completely if we can't figure |
396
|
|
|
|
|
|
|
# it out. |
397
|
|
|
|
|
|
|
sub _getHandle { |
398
|
13
|
|
|
13
|
|
27
|
my $data = shift; |
399
|
13
|
100
|
|
|
|
60
|
if (ref $data) { |
400
|
7
|
100
|
|
|
|
45
|
if ($data =~ /\AARRAY/) { |
401
|
3
|
100
|
|
|
|
11
|
if (ref $data->[0]) { |
402
|
2
|
|
|
|
|
7
|
return $data; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
else { |
405
|
1
|
|
|
|
|
4
|
return [ $data ]; |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
} |
408
|
4
|
100
|
|
|
|
22
|
if ($data =~ /\AGLOB/) { |
409
|
2
|
|
|
|
|
6
|
return _fileread($data); |
410
|
|
|
|
|
|
|
} |
411
|
2
|
50
|
|
|
|
12
|
if ($data =~ /\AIO::File=GLOB/) { |
412
|
2
|
|
|
|
|
8
|
return _fileread($data); |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
else { |
415
|
0
|
|
|
|
|
0
|
_error(10); |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
else { |
419
|
6
|
|
|
|
|
57
|
my $fh = new IO::File; |
420
|
6
|
100
|
|
|
|
278
|
if ($fh->open("< $data")) { |
421
|
2
|
|
|
|
|
156
|
return _fileread($fh); |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
else { |
424
|
4
|
|
|
|
|
140
|
my @a = split(/\s+/,$data); |
425
|
4
|
|
|
|
|
27
|
return [ \@a ]; |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
} |
428
|
0
|
|
|
|
|
|
undef $fh; |
429
|
|
|
|
|
|
|
} |
430
|
0
|
|
|
|
|
|
0; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# Keep all the error strings in one place. |
434
|
|
|
|
|
|
|
sub _error { |
435
|
0
|
|
|
0
|
|
|
my ($code) = shift; |
436
|
0
|
|
|
|
|
|
my $errorbar = "\n!-----------------!\n"; |
437
|
0
|
|
|
|
|
|
my @e = ( |
438
|
|
|
|
|
|
|
"", # Normal execution |
439
|
|
|
|
|
|
|
"Input error: Must pass in a filename for a file containing the data.\n\tEx: \$g = new Statistics::Gtest(\$datafile);", |
440
|
|
|
|
|
|
|
"Input error: Can't use array or hash as data; pass array ref.", |
441
|
|
|
|
|
|
|
"Input error: Invalid data format.", |
442
|
|
|
|
|
|
|
"Input error: Data must be structured as array of array refs, with each array ref |
443
|
|
|
|
|
|
|
pointing to one row of data.", |
444
|
|
|
|
|
|
|
"Data inconsistency: No data found in table.", |
445
|
|
|
|
|
|
|
"Data inconsistency: sum of table values does not match total.", |
446
|
|
|
|
|
|
|
"Data inconsistency: could not read row or column.", |
447
|
|
|
|
|
|
|
"Data invalid: All data must be positive integers.", |
448
|
|
|
|
|
|
|
"Data invalid: Found a zero frequency value. Can't continue.", |
449
|
|
|
|
|
|
|
"Input error: Reference to invalid data type; must be reference to array.", |
450
|
|
|
|
|
|
|
); |
451
|
|
|
|
|
|
|
|
452
|
0
|
|
|
|
|
|
print $errorbar; |
453
|
0
|
|
|
|
|
|
print "\n Error",Carp::shortmess(); |
454
|
0
|
|
|
|
|
|
print " ", $e[$code], "\n"; |
455
|
0
|
|
|
|
|
|
print Carp::longmess(" Stack trace:\n"); |
456
|
0
|
|
|
|
|
|
print $errorbar; |
457
|
0
|
|
|
|
|
|
exit $code; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
1; |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
__END__ |