line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# GENERATED WITH PDLA::PP! Don't modify! |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
package PDLA::Stats::Basic; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
@EXPORT_OK = qw( binomial_test rtable which_id PDLA::PP stdv PDLA::PP stdv_unbiased PDLA::PP var PDLA::PP var_unbiased PDLA::PP se PDLA::PP ss PDLA::PP skew PDLA::PP skew_unbiased PDLA::PP kurt PDLA::PP kurt_unbiased PDLA::PP cov PDLA::PP cov_table PDLA::PP corr PDLA::PP corr_table PDLA::PP t_corr PDLA::PP n_pair PDLA::PP corr_dev PDLA::PP t_test PDLA::PP t_test_nev PDLA::PP t_test_paired ); |
8
|
|
|
|
|
|
|
%EXPORT_TAGS = (Func=>[@EXPORT_OK]); |
9
|
|
|
|
|
|
|
|
10
|
5
|
|
|
5
|
|
297597
|
use PDLA::Core; |
|
5
|
|
|
|
|
86823
|
|
|
5
|
|
|
|
|
39
|
|
11
|
5
|
|
|
5
|
|
1354
|
use PDLA::Exporter; |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
27
|
|
12
|
5
|
|
|
5
|
|
103
|
use DynaLoader; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
244
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
@ISA = ( 'PDLA::Exporter','DynaLoader' ); |
18
|
|
|
|
|
|
|
push @PDLA::Core::PP, __PACKAGE__; |
19
|
|
|
|
|
|
|
bootstrap PDLA::Stats::Basic ; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
5
|
|
|
5
|
|
717
|
use PDLA::LiteF; |
|
5
|
|
|
|
|
960
|
|
|
5
|
|
|
|
|
29
|
|
26
|
5
|
|
|
5
|
|
172345
|
use PDLA::NiceSlice; |
|
5
|
|
|
|
|
19
|
|
|
5
|
|
|
|
|
43
|
|
27
|
5
|
|
|
5
|
|
86215
|
use Carp; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
9480
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$PDLA::onlinedoc->scan(__FILE__) if $PDLA::onlinedoc; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
eval { require PDLA::GSL::CDF; }; |
32
|
|
|
|
|
|
|
my $CDF = 1 if !$@; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 NAME |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
PDLA::Stats::Basic -- basic statistics and related utilities such as standard deviation, Pearson correlation, and t-tests. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 DESCRIPTION |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
The terms FUNCTIONS and METHODS are arbitrarily used to refer to methods that are threadable and methods that are NOT threadable, respectively. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Does not have mean or median function here. see SEE ALSO. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 SYNOPSIS |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
use PDLA::LiteF; |
47
|
|
|
|
|
|
|
use PDLA::NiceSlice; |
48
|
|
|
|
|
|
|
use PDLA::Stats::Basic; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my $stdv = $data->stdv; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
or |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
my $stdv = stdv( $data ); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head1 FUNCTIONS |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=cut |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head2 stdv |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=for sig |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=for ref |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Sample standard deviation. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=cut |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=for bad |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
stdv processes bad values. |
93
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=cut |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
*stdv = \&PDLA::stdv; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 stdv_unbiased |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=for sig |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=for ref |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Unbiased estimate of population standard deviation. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=cut |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=for bad |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
stdv_unbiased processes bad values. |
127
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
*stdv_unbiased = \&PDLA::stdv_unbiased; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head2 var |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=for sig |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=for ref |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Sample variance. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=cut |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=for bad |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
var processes bad values. |
161
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=cut |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
*var = \&PDLA::var; |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 var_unbiased |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=for sig |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=for ref |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Unbiased estimate of population variance. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=cut |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=for bad |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
var_unbiased processes bad values. |
195
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=cut |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
*var_unbiased = \&PDLA::var_unbiased; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 se |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=for sig |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=for ref |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
Standard error of the mean. Useful for calculating confidence intervals. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=for usage |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# 95% confidence interval for samples with large N |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
$ci_95_upper = $data->average + 1.96 * $data->se; |
228
|
|
|
|
|
|
|
$ci_95_lower = $data->average - 1.96 * $data->se; |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=for bad |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
se processes bad values. |
236
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=cut |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
*se = \&PDLA::se; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 ss |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=for sig |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=for ref |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Sum of squared deviations from the mean. |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=cut |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=for bad |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
ss processes bad values. |
270
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=cut |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
*ss = \&PDLA::ss; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=head2 skew |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=for sig |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=for ref |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Sample skewness, measure of asymmetry in data. skewness == 0 for normal distribution. |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=cut |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=for bad |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
skew processes bad values. |
304
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
*skew = \&PDLA::skew; |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 skew_unbiased |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=for sig |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=for ref |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Unbiased estimate of population skewness. This is the number in GNumeric Descriptive Statistics. |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=cut |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=for bad |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
skew_unbiased processes bad values. |
338
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=cut |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
*skew_unbiased = \&PDLA::skew_unbiased; |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 kurt |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=for sig |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=for ref |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Sample kurtosis, measure of "peakedness" of data. kurtosis == 0 for normal distribution. |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=cut |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=for bad |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
kurt processes bad values. |
372
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=cut |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
*kurt = \&PDLA::kurt; |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head2 kurt_unbiased |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=for sig |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
Signature: (a(n); float+ [o]b()) |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=for ref |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
Unbiased estimate of population kurtosis. This is the number in GNumeric Descriptive Statistics. |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=cut |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=for bad |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
kurt_unbiased processes bad values. |
406
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=cut |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
*kurt_unbiased = \&PDLA::kurt_unbiased; |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=head2 cov |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=for sig |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
Signature: (a(n); b(n); float+ [o]c()) |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
=for ref |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
Sample covariance. see B for ways to call |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=cut |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=for bad |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
cov processes bad values. |
440
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=cut |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
*cov = \&PDLA::cov; |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 cov_table |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=for sig |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
Signature: (a(n,m); float+ [o]c(m,m)) |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=for ref |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Square covariance table. Gives the same result as threading using B but it calculates only half the square, hence much faster. And it is easier to use with higher dimension pdls. |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=for usage |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Usage: |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
# 5 obs x 3 var, 2 such data tables |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
perldl> $a = random 5, 3, 2 |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
perldl> p $cov = $a->cov_table |
477
|
|
|
|
|
|
|
[ |
478
|
|
|
|
|
|
|
[ |
479
|
|
|
|
|
|
|
[ 8.9636438 -1.8624472 -1.2416588] |
480
|
|
|
|
|
|
|
[-1.8624472 14.341514 -1.4245366] |
481
|
|
|
|
|
|
|
[-1.2416588 -1.4245366 9.8690655] |
482
|
|
|
|
|
|
|
] |
483
|
|
|
|
|
|
|
[ |
484
|
|
|
|
|
|
|
[ 10.32644 -0.31311789 -0.95643674] |
485
|
|
|
|
|
|
|
[-0.31311789 15.051779 -7.2759577] |
486
|
|
|
|
|
|
|
[-0.95643674 -7.2759577 5.4465141] |
487
|
|
|
|
|
|
|
] |
488
|
|
|
|
|
|
|
] |
489
|
|
|
|
|
|
|
# diagonal elements of the cov table are the variances |
490
|
|
|
|
|
|
|
perldl> p $a->var |
491
|
|
|
|
|
|
|
[ |
492
|
|
|
|
|
|
|
[ 8.9636438 14.341514 9.8690655] |
493
|
|
|
|
|
|
|
[ 10.32644 15.051779 5.4465141] |
494
|
|
|
|
|
|
|
] |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
for the same cov matrix table using B, |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
perldl> p $a->dummy(2)->cov($a->dummy(1)) |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=for bad |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
cov_table processes bad values. |
506
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
=cut |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
*cov_table = \&PDLA::cov_table; |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head2 corr |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=for sig |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
Signature: (a(n); b(n); float+ [o]c()) |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=for ref |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Pearson correlation coefficient. r = cov(X,Y) / (stdv(X) * stdv(Y)). |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=for usage |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Usage: |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
perldl> $a = random 5, 3 |
539
|
|
|
|
|
|
|
perldl> $b = sequence 5,3 |
540
|
|
|
|
|
|
|
perldl> p $a->corr($b) |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
[0.20934208 0.30949881 0.26713007] |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
for square corr table |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
perldl> p $a->corr($a->dummy(1)) |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
[ |
549
|
|
|
|
|
|
|
[ 1 -0.41995259 -0.029301192] |
550
|
|
|
|
|
|
|
[ -0.41995259 1 -0.61927619] |
551
|
|
|
|
|
|
|
[-0.029301192 -0.61927619 1] |
552
|
|
|
|
|
|
|
] |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
but it is easier and faster to use B. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=cut |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=for bad |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
corr processes bad values. |
562
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=cut |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
*corr = \&PDLA::corr; |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=head2 corr_table |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=for sig |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
Signature: (a(n,m); float+ [o]c(m,m)) |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=for ref |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
Square Pearson correlation table. Gives the same result as threading using B but it calculates only half the square, hence much faster. And it is easier to use with higher dimension pdls. |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=for usage |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
Usage: |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
# 5 obs x 3 var, 2 such data tables |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
perldl> $a = random 5, 3, 2 |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
perldl> p $a->corr_table |
599
|
|
|
|
|
|
|
[ |
600
|
|
|
|
|
|
|
[ |
601
|
|
|
|
|
|
|
[ 1 -0.69835951 -0.18549048] |
602
|
|
|
|
|
|
|
[-0.69835951 1 0.72481605] |
603
|
|
|
|
|
|
|
[-0.18549048 0.72481605 1] |
604
|
|
|
|
|
|
|
] |
605
|
|
|
|
|
|
|
[ |
606
|
|
|
|
|
|
|
[ 1 0.82722569 -0.71779883] |
607
|
|
|
|
|
|
|
[ 0.82722569 1 -0.63938828] |
608
|
|
|
|
|
|
|
[-0.71779883 -0.63938828 1] |
609
|
|
|
|
|
|
|
] |
610
|
|
|
|
|
|
|
] |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
for the same result using B, |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
perldl> p $a->dummy(2)->corr($a->dummy(1)) |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
This is also how to use B and B with such a table. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
=for bad |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
corr_table processes bad values. |
624
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=cut |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
*corr_table = \&PDLA::corr_table; |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=head2 t_corr |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=for sig |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
Signature: (r(); n(); [o]t()) |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=for usage |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
$corr = $data->corr( $data->dummy(1) ); |
651
|
|
|
|
|
|
|
$n = $data->n_pair( $data->dummy(1) ); |
652
|
|
|
|
|
|
|
$t_corr = $corr->t_corr( $n ); |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
use PDLA::GSL::CDF; |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
$p_2tail = 2 * (1 - gsl_cdf_tdist_P( $t_corr->abs, $n-2 )); |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=for ref |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
t significance test for Pearson correlations. |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=cut |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=for bad |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
t_corr processes bad values. |
668
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=cut |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
*t_corr = \&PDLA::t_corr; |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
=head2 n_pair |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=for sig |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
Signature: (a(n); b(n); indx [o]c()) |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=for ref |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
Returns the number of good pairs between 2 lists. Useful with B (esp. when bad values are involved) |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=cut |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
=for bad |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
n_pair processes bad values. |
702
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
=cut |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
*n_pair = \&PDLA::n_pair; |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=head2 corr_dev |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=for sig |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
Signature: (a(n); b(n); float+ [o]c()) |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=for usage |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
$corr = $a->dev_m->corr_dev($b->dev_m); |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=for ref |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
Calculates correlations from B vals. Seems faster than doing B from original vals when data pdl is big |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=cut |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=for bad |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
corr_dev processes bad values. |
740
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=cut |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
*corr_dev = \&PDLA::corr_dev; |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=head2 t_test |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=for sig |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Signature: (a(n); b(m); float+ [o]t(); [o]d()) |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=for usage |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
my ($t, $df) = t_test( $pdl1, $pdl2 ); |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
use PDLA::GSL::CDF; |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
my $p_2tail = 2 * (1 - gsl_cdf_tdist_P( $t->abs, $df )); |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=for ref |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
Independent sample t-test, assuming equal var. |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=cut |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
=for bad |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
t_test processes bad values. |
782
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=cut |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
*t_test = \&PDLA::t_test; |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head2 t_test_nev |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=for sig |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
Signature: (a(n); b(m); float+ [o]t(); [o]d()) |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=for ref |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
Independent sample t-test, NOT assuming equal var. ie Welch two sample t test. Df follows Welch-Satterthwaite equation instead of Satterthwaite (1946, as cited by Hays, 1994, 5th ed.). It matches GNumeric, which matches R. |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=for usage |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
my ($t, $df) = $pdl1->t_test( $pdl2 ); |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
=cut |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
=for bad |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
t_test_nev processes bad values. |
820
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
=cut |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
*t_test_nev = \&PDLA::t_test_nev; |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
=head2 t_test_paired |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
=for sig |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
Signature: (a(n); b(n); float+ [o]t(); [o]d()) |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=for ref |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Paired sample t-test. |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=cut |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=for bad |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
t_test_paired processes bad values. |
854
|
|
|
|
|
|
|
It will set the bad-value flag of all output piddles if the flag is set for any of the input piddles. |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
=cut |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
*t_test_paired = \&PDLA::t_test_paired; |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
=head2 binomial_test |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
=for Sig |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
Signature: (x(); n(); p_expected(); [o]p()) |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
=for ref |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
Binomial test. One-tailed significance test for two-outcome distribution. Given the number of successes, the number of trials, and the expected probability of success, returns the probability of getting this many or more successes. |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
This function does NOT currently support bad value in the number of successes. |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
=for usage |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
Usage: |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
# assume a fair coin, ie. 0.5 probablity of getting heads |
886
|
|
|
|
|
|
|
# test whether getting 8 heads out of 10 coin flips is unusual |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
my $p = binomial_test( 8, 10, 0.5 ); # 0.0107421875. Yes it is unusual. |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
=cut |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
*binomial_test = \&PDLA::binomial_test; |
893
|
|
|
|
|
|
|
sub PDLA::binomial_test { |
894
|
0
|
|
|
0
|
0
|
0
|
my ($x, $n, $P) = @_; |
895
|
|
|
|
|
|
|
|
896
|
0
|
0
|
|
|
|
0
|
carp 'Please install PDLA::GSL::CDF.' unless $CDF; |
897
|
0
|
0
|
|
|
|
0
|
carp 'This function does NOT currently support bad value in the number of successes.' if $x->badflag(); |
898
|
|
|
|
|
|
|
|
899
|
0
|
|
|
|
|
0
|
my $pdlx = pdl($x); |
900
|
0
|
|
|
|
|
0
|
$pdlx->badflag(1); |
901
|
0
|
|
|
|
|
0
|
$pdlx = $pdlx->setvaltobad(0); |
902
|
|
|
|
|
|
|
|
903
|
0
|
|
|
|
|
0
|
my $p = 1 - PDLA::GSL::CDF::gsl_cdf_binomial_P( $pdlx - 1, $P, $n ); |
904
|
0
|
|
|
|
|
0
|
$p = $p->setbadtoval(1); |
905
|
0
|
|
|
|
|
0
|
$p->badflag(0); |
906
|
|
|
|
|
|
|
|
907
|
0
|
|
|
|
|
0
|
return $p; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=head1 METHODS |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
=head2 rtable |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=for ref |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
Reads either file or file handle*. Returns observation x variable pdl and var and obs ids if specified. Ids in perl @ ref to allow for non-numeric ids. Other non-numeric entries are treated as missing, which are filled with $opt{MISSN} then set to BAD*. Can specify num of data rows to read from top but not arbitrary range. |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
*If passed handle, it will not be closed here. |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
*PDLA::Bad::setvaltobad only works consistently with the default TYPE double before PDLA-2.4.4_04. |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=for options |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
Default options (case insensitive): |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
V => 1, # verbose. prints simple status |
928
|
|
|
|
|
|
|
TYPE => double, |
929
|
|
|
|
|
|
|
C_ID => 1, # boolean. file has col id. |
930
|
|
|
|
|
|
|
R_ID => 1, # boolean. file has row id. |
931
|
|
|
|
|
|
|
R_VAR => 0, # boolean. set to 1 if var in rows |
932
|
|
|
|
|
|
|
SEP => "\t", # can take regex qr// |
933
|
|
|
|
|
|
|
MISSN => -999, # this value treated as missing and set to BAD |
934
|
|
|
|
|
|
|
NROW => '', # set to read specified num of data rows |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
=for usage |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
Usage: |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
Sample file diet.txt: |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
uid height weight diet |
943
|
|
|
|
|
|
|
akw 72 320 1 |
944
|
|
|
|
|
|
|
bcm 68 268 1 |
945
|
|
|
|
|
|
|
clq 67 180 2 |
946
|
|
|
|
|
|
|
dwm 70 200 2 |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
($data, $idv, $ido) = rtable 'diet.txt'; |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
# By default prints out data info and @$idv index and element |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
reading diet.txt for data and id... OK. |
953
|
|
|
|
|
|
|
data table as PDLA dim o x v: PDLA: Double D [4,3] |
954
|
|
|
|
|
|
|
0 height |
955
|
|
|
|
|
|
|
1 weight |
956
|
|
|
|
|
|
|
2 diet |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
Another way of using it, |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
$data = rtable( \*STDIN, {TYPE=>long} ); |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
=cut |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
sub rtable { |
965
|
|
|
|
|
|
|
# returns obs x var data matrix and var and obs ids |
966
|
3
|
|
|
3
|
1
|
28082
|
my ($src, $opt) = @_; |
967
|
|
|
|
|
|
|
|
968
|
3
|
|
|
|
|
6
|
my $fh_in; |
969
|
3
|
50
|
33
|
|
|
31
|
if ($src =~ /STDIN/ or ref $src eq 'GLOB') { $fh_in = $src } |
|
3
|
|
|
|
|
9
|
|
970
|
0
|
0
|
|
|
|
0
|
else { open $fh_in, $src or croak "$!" } |
971
|
|
|
|
|
|
|
|
972
|
3
|
|
|
|
|
16
|
my %opt = ( V => 1, |
973
|
|
|
|
|
|
|
TYPE => double, |
974
|
|
|
|
|
|
|
C_ID => 1, |
975
|
|
|
|
|
|
|
R_ID => 1, |
976
|
|
|
|
|
|
|
R_VAR => 0, |
977
|
|
|
|
|
|
|
SEP => "\t", |
978
|
|
|
|
|
|
|
MISSN => -999, |
979
|
|
|
|
|
|
|
NROW => '', |
980
|
|
|
|
|
|
|
); |
981
|
3
|
|
33
|
|
|
62
|
$opt and $opt{uc $_} = $opt->{$_} for (keys %$opt); |
982
|
3
|
50
|
|
|
|
11
|
$opt{V} and print STDERR "reading $src for data and id... "; |
983
|
|
|
|
|
|
|
|
984
|
3
|
|
|
|
|
8
|
local $PDLA::undefval = $opt{MISSN}; |
985
|
|
|
|
|
|
|
|
986
|
3
|
|
|
|
|
7
|
my $id_c = []; # match declaration of $id_r for return purpose |
987
|
3
|
50
|
|
|
|
7
|
if ($opt{C_ID}) { |
988
|
3
|
|
|
|
|
17
|
chomp( $id_c = <$fh_in> ); |
989
|
3
|
|
|
|
|
52
|
my @entries = split $opt{SEP}, $id_c; |
990
|
3
|
50
|
|
|
|
12
|
$opt{R_ID} and shift @entries; |
991
|
3
|
|
|
|
|
9
|
$id_c = \@entries; |
992
|
|
|
|
|
|
|
} |
993
|
|
|
|
|
|
|
|
994
|
3
|
|
|
|
|
20
|
my ($c_row, $id_r, $data, @data) = (0, [], PDLA->null, ); |
995
|
3
|
|
|
|
|
70
|
while (<$fh_in>) { |
996
|
132
|
|
|
|
|
170
|
chomp; |
997
|
132
|
|
|
|
|
354
|
my @entries = split /$opt{SEP}/, $_, -1; |
998
|
|
|
|
|
|
|
|
999
|
132
|
50
|
|
|
|
287
|
$opt{R_ID} and push @$id_r, shift @entries; |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
# rudimentary check for numeric entry |
1002
|
132
|
100
|
66
|
|
|
172
|
for (@entries) { $_ = $opt{MISSN} unless defined $_ and /\d\b/ } |
|
669
|
|
|
|
|
1858
|
|
1003
|
|
|
|
|
|
|
|
1004
|
132
|
|
|
|
|
259
|
push @data, pdl( $opt{TYPE}, \@entries ); |
1005
|
132
|
|
|
|
|
2509
|
$c_row ++; |
1006
|
|
|
|
|
|
|
last |
1007
|
132
|
50
|
33
|
|
|
483
|
if $opt{NROW} and $c_row == $opt{NROW}; |
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
# not explicitly closing $fh_in here in case it's passed from outside |
1010
|
|
|
|
|
|
|
# $fh_in will close by going out of scope if opened here. |
1011
|
|
|
|
|
|
|
|
1012
|
3
|
|
|
|
|
27
|
$data = pdl $opt{TYPE}, @data; |
1013
|
3
|
|
|
|
|
205
|
@data = (); |
1014
|
|
|
|
|
|
|
# rid of last col unless there is data there |
1015
|
|
|
|
|
|
|
$data = $data(0:$data->getdim(0)-2, )->sever |
1016
|
3
|
100
|
|
|
|
23
|
unless ( nelem $data(-1, )->where($data(-1, ) != $opt{MISSN}) ); |
1017
|
|
|
|
|
|
|
|
1018
|
3
|
|
|
|
|
614
|
my ($idv, $ido) = ($id_r, $id_c); |
1019
|
|
|
|
|
|
|
# var in columns instead of rows |
1020
|
3
|
50
|
|
|
|
23
|
$opt{R_VAR} == 0 |
1021
|
|
|
|
|
|
|
and ($data, $idv, $ido) = ($data->inplace->transpose, $id_c, $id_r); |
1022
|
|
|
|
|
|
|
|
1023
|
3
|
50
|
|
|
|
109
|
if ($opt{V}) { |
1024
|
0
|
|
|
|
|
0
|
print STDERR "OK.\ndata table as PDLA dim o x v: " . $data->info . "\n"; |
1025
|
0
|
|
0
|
|
|
0
|
$idv and print STDERR "$_\t$$idv[$_]\n" for (0..$#$idv); |
1026
|
|
|
|
|
|
|
} |
1027
|
|
|
|
|
|
|
|
1028
|
3
|
|
|
|
|
65
|
$data = $data->setvaltobad( $opt{MISSN} ); |
1029
|
3
|
|
|
|
|
20
|
$data->check_badflag; |
1030
|
3
|
50
|
|
|
|
200
|
return wantarray? (@$idv? ($data, $idv, $ido) : ($data, $ido)) : $data; |
|
|
50
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
=head2 group_by |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
Returns pdl reshaped according to the specified factor variable. Most useful when used in conjunction with other threading calculations such as average, stdv, etc. When the factor variable contains unequal number of cases in each level, the returned pdl is padded with bad values to fit the level with the most number of cases. This allows the subsequent calculation (average, stdv, etc) to return the correct results for each level. |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
Usage: |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
# simple case with 1d pdl and equal number of n in each level of the factor |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
pdl> p $a = sequence 10 |
1042
|
|
|
|
|
|
|
[0 1 2 3 4 5 6 7 8 9] |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
pdl> p $factor = $a > 4 |
1045
|
|
|
|
|
|
|
[0 0 0 0 0 1 1 1 1 1] |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
pdl> p $a->group_by( $factor )->average |
1048
|
|
|
|
|
|
|
[2 7] |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
# more complex case with threading and unequal number of n across levels in the factor |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
pdl> p $a = sequence 10,2 |
1053
|
|
|
|
|
|
|
[ |
1054
|
|
|
|
|
|
|
[ 0 1 2 3 4 5 6 7 8 9] |
1055
|
|
|
|
|
|
|
[10 11 12 13 14 15 16 17 18 19] |
1056
|
|
|
|
|
|
|
] |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
pdl> p $factor = qsort $a( ,0) % 3 |
1059
|
|
|
|
|
|
|
[ |
1060
|
|
|
|
|
|
|
[0 0 0 0 1 1 1 2 2 2] |
1061
|
|
|
|
|
|
|
] |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
pdl> p $a->group_by( $factor ) |
1064
|
|
|
|
|
|
|
[ |
1065
|
|
|
|
|
|
|
[ |
1066
|
|
|
|
|
|
|
[ 0 1 2 3] |
1067
|
|
|
|
|
|
|
[10 11 12 13] |
1068
|
|
|
|
|
|
|
] |
1069
|
|
|
|
|
|
|
[ |
1070
|
|
|
|
|
|
|
[ 4 5 6 BAD] |
1071
|
|
|
|
|
|
|
[ 14 15 16 BAD] |
1072
|
|
|
|
|
|
|
] |
1073
|
|
|
|
|
|
|
[ |
1074
|
|
|
|
|
|
|
[ 7 8 9 BAD] |
1075
|
|
|
|
|
|
|
[ 17 18 19 BAD] |
1076
|
|
|
|
|
|
|
] |
1077
|
|
|
|
|
|
|
] |
1078
|
|
|
|
|
|
|
ARRAY(0xa2a4e40) |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
# group_by supports perl factors, multiple factors |
1081
|
|
|
|
|
|
|
# returns factor labels in addition to pdl in array context |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
pdl> p $a = sequence 12 |
1084
|
|
|
|
|
|
|
[0 1 2 3 4 5 6 7 8 9 10 11] |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
pdl> $odd_even = [qw( e o e o e o e o e o e o )] |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
pdl> $magnitude = [qw( l l l l l l h h h h h h )] |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
pdl> ($a_grouped, $label) = $a->group_by( $odd_even, $magnitude ) |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
pdl> p $a_grouped |
1093
|
|
|
|
|
|
|
[ |
1094
|
|
|
|
|
|
|
[ |
1095
|
|
|
|
|
|
|
[0 2 4] |
1096
|
|
|
|
|
|
|
[1 3 5] |
1097
|
|
|
|
|
|
|
] |
1098
|
|
|
|
|
|
|
[ |
1099
|
|
|
|
|
|
|
[ 6 8 10] |
1100
|
|
|
|
|
|
|
[ 7 9 11] |
1101
|
|
|
|
|
|
|
] |
1102
|
|
|
|
|
|
|
] |
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
pdl> p Dumper $label |
1105
|
|
|
|
|
|
|
$VAR1 = [ |
1106
|
|
|
|
|
|
|
[ |
1107
|
|
|
|
|
|
|
'e_l', |
1108
|
|
|
|
|
|
|
'o_l' |
1109
|
|
|
|
|
|
|
], |
1110
|
|
|
|
|
|
|
[ |
1111
|
|
|
|
|
|
|
'e_h', |
1112
|
|
|
|
|
|
|
'o_h' |
1113
|
|
|
|
|
|
|
] |
1114
|
|
|
|
|
|
|
]; |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
=cut |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
*group_by = \&PDLA::group_by; |
1120
|
|
|
|
|
|
|
sub PDLA::group_by { |
1121
|
3
|
|
|
3
|
0
|
7188
|
my $p = shift; |
1122
|
3
|
|
|
|
|
7
|
my @factors = @_; |
1123
|
|
|
|
|
|
|
|
1124
|
3
|
100
|
|
|
|
10
|
if ( @factors == 1 ) { |
1125
|
2
|
|
|
|
|
3
|
my $factor = $factors[0]; |
1126
|
2
|
|
|
|
|
2
|
my $label; |
1127
|
2
|
50
|
|
|
|
6
|
if (ref $factor eq 'ARRAY') { |
1128
|
0
|
|
|
|
|
0
|
$label = _ordered_uniq($factor); |
1129
|
0
|
|
|
|
|
0
|
$factor = _array_to_pdl($factor); |
1130
|
|
|
|
|
|
|
} else { |
1131
|
2
|
|
|
|
|
7
|
my $perl_factor = [$factor->list]; |
1132
|
2
|
|
|
|
|
41
|
$label = _ordered_uniq($perl_factor); |
1133
|
|
|
|
|
|
|
} |
1134
|
|
|
|
|
|
|
|
1135
|
2
|
|
|
|
|
6
|
my $p_reshaped = _group_by_single_factor( $p, $factor ); |
1136
|
|
|
|
|
|
|
|
1137
|
2
|
100
|
|
|
|
25
|
return wantarray? ($p_reshaped, $label) : $p_reshaped; |
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
# make sure all are arrays instead of pdls |
1141
|
1
|
50
|
|
|
|
2
|
@factors = map { ref($_) eq 'PDLA'? [$_->list] : $_ } @factors; |
|
2
|
|
|
|
|
8
|
|
1142
|
|
|
|
|
|
|
|
1143
|
1
|
|
|
|
|
2
|
my (@cells); |
1144
|
1
|
|
|
|
|
2
|
for my $ele (0 .. $#{$factors[0]}) { |
|
1
|
|
|
|
|
3
|
|
1145
|
10
|
|
|
|
|
12
|
my $c = join '_', map { $_->[$ele] } @factors; |
|
20
|
|
|
|
|
30
|
|
1146
|
10
|
|
|
|
|
17
|
push @cells, $c; |
1147
|
|
|
|
|
|
|
} |
1148
|
|
|
|
|
|
|
# get uniq cell labels (ref List::MoreUtils::uniq) |
1149
|
1
|
|
|
|
|
2
|
my %seen; |
1150
|
1
|
|
|
|
|
2
|
my @uniq_cells = grep {! $seen{$_}++ } @cells; |
|
10
|
|
|
|
|
19
|
|
1151
|
|
|
|
|
|
|
|
1152
|
1
|
|
|
|
|
3
|
my $flat_factor = _array_to_pdl( \@cells ); |
1153
|
|
|
|
|
|
|
|
1154
|
1
|
|
|
|
|
2
|
my $p_reshaped = _group_by_single_factor( $p, $flat_factor ); |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
# get levels of each factor and reshape accordingly |
1157
|
1
|
|
|
|
|
2
|
my @levels; |
1158
|
1
|
|
|
|
|
2
|
for (@factors) { |
1159
|
2
|
|
|
|
|
4
|
my %uniq; |
1160
|
2
|
|
|
|
|
7
|
@uniq{ @$_ } = (); |
1161
|
2
|
|
|
|
|
6
|
push @levels, scalar keys %uniq; |
1162
|
|
|
|
|
|
|
} |
1163
|
|
|
|
|
|
|
|
1164
|
1
|
|
|
|
|
5
|
$p_reshaped = $p_reshaped->reshape( $p_reshaped->dim(0), @levels )->sever; |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
# make labels for the returned data structure matching pdl structure |
1167
|
1
|
|
|
|
|
40
|
my @labels; |
1168
|
1
|
50
|
|
|
|
4
|
if (wantarray) { |
1169
|
1
|
|
|
|
|
3
|
for my $ifactor (0 .. $#levels) { |
1170
|
2
|
|
|
|
|
3
|
my @factor_label; |
1171
|
2
|
|
|
|
|
3
|
for my $ilevel (0 .. $levels[$ifactor]-1) { |
1172
|
4
|
|
|
|
|
6
|
my $i = $ifactor * $levels[$ifactor] + $ilevel; |
1173
|
4
|
|
|
|
|
7
|
push @factor_label, $uniq_cells[$i]; |
1174
|
|
|
|
|
|
|
} |
1175
|
2
|
|
|
|
|
4
|
push @labels, \@factor_label; |
1176
|
|
|
|
|
|
|
} |
1177
|
|
|
|
|
|
|
} |
1178
|
|
|
|
|
|
|
|
1179
|
1
|
50
|
|
|
|
8
|
return wantarray? ($p_reshaped, \@labels) : $p_reshaped; |
1180
|
|
|
|
|
|
|
} |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
# get uniq cell labels (ref List::MoreUtils::uniq) |
1183
|
|
|
|
|
|
|
sub _ordered_uniq { |
1184
|
2
|
|
|
2
|
|
4
|
my $arr = shift; |
1185
|
|
|
|
|
|
|
|
1186
|
2
|
|
|
|
|
3
|
my %seen; |
1187
|
2
|
|
|
|
|
4
|
my @uniq = grep { ! $seen{$_}++ } @$arr; |
|
20
|
|
|
|
|
52
|
|
1188
|
|
|
|
|
|
|
|
1189
|
2
|
|
|
|
|
7
|
return \@uniq; |
1190
|
|
|
|
|
|
|
} |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
sub _group_by_single_factor { |
1193
|
3
|
|
|
3
|
|
4
|
my $p = shift; |
1194
|
3
|
|
|
|
|
4
|
my $factor = shift; |
1195
|
|
|
|
|
|
|
|
1196
|
3
|
|
|
|
|
8
|
$factor = $factor->squeeze; |
1197
|
3
|
50
|
|
|
|
115
|
die "Currently support only 1d factor pdl." |
1198
|
|
|
|
|
|
|
if $factor->ndims > 1; |
1199
|
|
|
|
|
|
|
|
1200
|
3
|
50
|
|
|
|
9
|
die "Data pdl and factor pdl do not match!" |
1201
|
|
|
|
|
|
|
unless $factor->dim(0) == $p->dim(0); |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
# get active dim that will be split according to factor and dims to thread over |
1204
|
3
|
|
|
|
|
6
|
my @p_threaddims = $p->dims; |
1205
|
3
|
|
|
|
|
50
|
my $p_dim0 = shift @p_threaddims; |
1206
|
|
|
|
|
|
|
|
1207
|
3
|
|
|
|
|
8
|
my $uniq = $factor->uniq; |
1208
|
|
|
|
|
|
|
|
1209
|
3
|
|
|
|
|
631
|
my @uniq_ns; |
1210
|
3
|
|
|
|
|
8
|
for ($uniq->list) { |
1211
|
9
|
|
|
|
|
272
|
push @uniq_ns, which( $factor == $_ )->nelem; |
1212
|
|
|
|
|
|
|
} |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# get number of n's in each group, find the biggest, fit output pdl to this |
1215
|
3
|
|
|
|
|
79
|
my $uniq_ns = pdl \@uniq_ns; |
1216
|
3
|
|
|
|
|
56
|
my $max = pdl(\@uniq_ns)->max; |
1217
|
|
|
|
|
|
|
|
1218
|
3
|
|
|
|
|
149
|
my $badvalue = int($p->max + 1); |
1219
|
3
|
|
|
|
|
99
|
my $p_tmp = ones($max, @p_threaddims, $uniq->nelem) * $badvalue; |
1220
|
3
|
|
|
|
|
202
|
for (0 .. $#uniq_ns) { |
1221
|
9
|
|
|
|
|
1164
|
my $i = which $factor == $uniq($_); |
1222
|
9
|
|
|
|
|
398
|
$p_tmp->dice_axis(-1,$_)->squeeze->(0:$uniq_ns[$_]-1, ) .= $p($i, ); |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
|
1225
|
3
|
|
|
|
|
477
|
$p_tmp->badflag(1); |
1226
|
3
|
|
|
|
|
33
|
return $p_tmp->setvaltobad($badvalue); |
1227
|
|
|
|
|
|
|
} |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
=head2 which_id |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
=for ref |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
Lookup specified var (obs) ids in $idv ($ido) (see B) and return indices in $idv ($ido) as pdl if found. The indices are ordered by the specified subset. Useful for selecting data by var (obs) id. |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
=for usage |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
my $ind = which_id $ido, ['smith', 'summers', 'tesla']; |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
my $data_subset = $data( $ind, ); |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
# take advantage of perl pattern matching |
1242
|
|
|
|
|
|
|
# e.g. use data from people whose last name starts with s |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
my $i = which_id $ido, [ grep { /^s/ } @$ido ]; |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
my $data_s = $data($i, ); |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
=cut |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
sub which_id { |
1251
|
38
|
|
|
38
|
1
|
70
|
my ($id, $id_s) = @_; |
1252
|
|
|
|
|
|
|
|
1253
|
38
|
|
|
|
|
44
|
my %ind; |
1254
|
38
|
|
|
|
|
130
|
@ind{ @$id } = ( 0 .. $#$id ); |
1255
|
|
|
|
|
|
|
|
1256
|
38
|
|
|
|
|
54
|
my @ind_select; |
1257
|
38
|
|
|
|
|
68
|
for (@$id_s) { |
1258
|
65
|
50
|
|
|
|
164
|
defined( $ind{$_} ) and push @ind_select, $ind{$_}; |
1259
|
|
|
|
|
|
|
} |
1260
|
38
|
|
|
|
|
75
|
return pdl @ind_select; |
1261
|
|
|
|
|
|
|
} |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
sub _array_to_pdl { |
1264
|
133
|
|
|
133
|
|
3142
|
my ($var_ref) = @_; |
1265
|
|
|
|
|
|
|
|
1266
|
133
|
|
|
|
|
175
|
my (%level, $l); |
1267
|
133
|
|
|
|
|
169
|
$l = 0; |
1268
|
133
|
|
|
|
|
234
|
for (@$var_ref) { |
1269
|
4684
|
100
|
66
|
|
|
15006
|
if (defined($_) and $_ ne '' and $_ ne 'BAD') { |
|
|
|
100
|
|
|
|
|
1270
|
|
|
|
|
|
|
$level{$_} = $l ++ |
1271
|
4676
|
100
|
|
|
|
8183
|
if !exists $level{$_}; |
1272
|
|
|
|
|
|
|
} |
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
|
1275
|
133
|
100
|
100
|
|
|
239
|
my $pdl = pdl( map { (defined($_) and $_ ne '' and $_ ne 'BAD')? $level{$_} : -1 } @$var_ref ); |
|
4684
|
|
|
|
|
15824
|
|
1276
|
133
|
|
|
|
|
4366
|
$pdl = $pdl->setvaltobad(-1); |
1277
|
133
|
|
|
|
|
456
|
$pdl->check_badflag; |
1278
|
|
|
|
|
|
|
|
1279
|
133
|
100
|
|
|
|
6134
|
return wantarray? ($pdl, \%level) : $pdl; |
1280
|
|
|
|
|
|
|
} |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
=head1 SEE ALSO |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
PDLA::Basic (hist for frequency counts) |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
PDLA::Ufunc (sum, avg, median, min, max, etc.) |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
PDLA::GSL::CDF (various cumulative distribution functions) |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=head1 REFERENCES |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
Hays, W.L. (1994). Statistics (5th ed.). Fort Worth, TX: Harcourt Brace College Publishers. |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=head1 AUTHOR |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
Copyright (C) 2009 Maggie J. Xiong |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
All rights reserved. There is no warranty. You are allowed to redistribute this software / documentation as described in the file COPYING in the PDLA distribution. |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
=cut |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
; |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
# Exit with OK status |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
1; |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
|