| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# GENERATED WITH PDL::PP from lib/PDL/Stats/GLM.pd! Don't modify! |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
package PDL::Stats::GLM; |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our @EXPORT_OK = qw(ols_t ols ols_rptd anova anova_rptd anova_design_matrix dummy_code effect_code effect_code_w interaction_code r2_change logistic pca pca_sorti plot_means plot_residuals plot_screes fill_m fill_rand dev_m stddz sse mse rmse pred_logistic d0 dm dvrs ); |
|
7
|
|
|
|
|
|
|
our %EXPORT_TAGS = (Func=>\@EXPORT_OK); |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
1545
|
use PDL::Core; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
7
|
|
|
10
|
1
|
|
|
1
|
|
432
|
use PDL::Exporter; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
20
|
|
|
11
|
1
|
|
|
1
|
|
31
|
use DynaLoader; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
863
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our @ISA = ( 'PDL::Exporter','DynaLoader' ); |
|
16
|
|
|
|
|
|
|
push @PDL::Core::PP, __PACKAGE__; |
|
17
|
|
|
|
|
|
|
bootstrap PDL::Stats::GLM ; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#line 14 "lib/PDL/Stats/GLM.pd" |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
use strict; |
|
29
|
|
|
|
|
|
|
use warnings; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
use Carp; |
|
32
|
|
|
|
|
|
|
use PDL::LiteF; |
|
33
|
|
|
|
|
|
|
use PDL::MatrixOps; |
|
34
|
|
|
|
|
|
|
use PDL::Stats::Basic; |
|
35
|
|
|
|
|
|
|
use PDL::Stats::Kmeans; |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
eval { require PDL::Core; require PDL::GSL::CDF; }; |
|
38
|
|
|
|
|
|
|
my $CDF = 1 if !$@; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=encoding utf8 |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 NAME |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
PDL::Stats::GLM -- general and generalized linear modelling methods such as ANOVA, linear regression, PCA, and logistic regression. |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
use PDL::LiteF; |
|
49
|
|
|
|
|
|
|
use PDL::Stats::GLM; |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# do a multiple linear regression and plot the residuals |
|
52
|
|
|
|
|
|
|
my $y = pdl( 8, 7, 7, 0, 2, 5, 0 ); |
|
53
|
|
|
|
|
|
|
my $x = pdl( [ 0, 1, 2, 3, 4, 5, 6 ], # linear component |
|
54
|
|
|
|
|
|
|
[ 0, 1, 4, 9, 16, 25, 36 ] ); # quadratic component |
|
55
|
|
|
|
|
|
|
my %m = $y->ols( $x, {plot=>1} ); |
|
56
|
|
|
|
|
|
|
print "$_\t$m{$_}\n" for sort keys %m; |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
For more about general linear modelling, see |
|
61
|
|
|
|
|
|
|
L. |
|
62
|
|
|
|
|
|
|
For an unbelievably thorough text on experimental design |
|
63
|
|
|
|
|
|
|
and analysis, including linear modelling, see L
|
|
64
|
|
|
|
|
|
|
Course in Design and Analysis of Experiments, Gary |
|
65
|
|
|
|
|
|
|
W. Oehlert|http://users.stat.umn.edu/~gary/book/fcdae.pdf>. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
The terms FUNCTIONS and METHODS are arbitrarily used to refer to |
|
68
|
|
|
|
|
|
|
methods that are broadcastable and methods that are NOT broadcastable, |
|
69
|
|
|
|
|
|
|
respectively. FUNCTIONS support bad values. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
P-values, where appropriate, are provided if PDL::GSL::CDF is installed. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
|
74
|
|
|
|
|
|
|
#line 75 "lib/PDL/Stats/GLM.pm" |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head2 fill_m |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=for sig |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Signature: (a(n); [o]b(n)) |
|
91
|
|
|
|
|
|
|
Types: (float double) |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=for ref |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Replaces bad values with sample mean. Mean is set to 0 if all obs are |
|
96
|
|
|
|
|
|
|
bad. |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=for usage |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
pdl> p $data |
|
101
|
|
|
|
|
|
|
[ |
|
102
|
|
|
|
|
|
|
[ 5 BAD 2 BAD] |
|
103
|
|
|
|
|
|
|
[ 7 3 7 BAD] |
|
104
|
|
|
|
|
|
|
] |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
pdl> p $data->fill_m |
|
107
|
|
|
|
|
|
|
[ |
|
108
|
|
|
|
|
|
|
[ 5 3.5 2 3.5] |
|
109
|
|
|
|
|
|
|
[ 7 3 7 5.66667] |
|
110
|
|
|
|
|
|
|
] |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=pod |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=for bad |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
The output pdl badflag is cleared. |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=cut |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
*fill_m = \&PDL::fill_m; |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 fill_rand |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=for sig |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Signature: (a(n); [o]b(n)) |
|
138
|
|
|
|
|
|
|
Types: (sbyte byte short ushort long ulong indx ulonglong longlong |
|
139
|
|
|
|
|
|
|
float double ldouble) |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=for ref |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Replaces bad values with random sample (with replacement) of good |
|
144
|
|
|
|
|
|
|
observations from the same variable. |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=for usage |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
pdl> p $data |
|
149
|
|
|
|
|
|
|
[ |
|
150
|
|
|
|
|
|
|
[ 5 BAD 2 BAD] |
|
151
|
|
|
|
|
|
|
[ 7 3 7 BAD] |
|
152
|
|
|
|
|
|
|
] |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
pdl> p $data->fill_rand |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
[ |
|
157
|
|
|
|
|
|
|
[5 2 2 5] |
|
158
|
|
|
|
|
|
|
[7 3 7 7] |
|
159
|
|
|
|
|
|
|
] |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=pod |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=for bad |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
The output pdl badflag is cleared. |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=cut |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
*fill_rand = \&PDL::fill_rand; |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=head2 dev_m |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=for sig |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Signature: (a(n); [o]b(n)) |
|
186
|
|
|
|
|
|
|
Types: (float double) |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=for usage |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
$b = dev_m($a); |
|
191
|
|
|
|
|
|
|
dev_m($a, $b); # all arguments given |
|
192
|
|
|
|
|
|
|
$b = $a->dev_m; # method call |
|
193
|
|
|
|
|
|
|
$a->dev_m($b); |
|
194
|
|
|
|
|
|
|
$a->inplace->dev_m; # can be used inplace |
|
195
|
|
|
|
|
|
|
dev_m($a->inplace); |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=for ref |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Replaces values with deviations from the mean. |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=pod |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=for bad |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
C processes bad values. |
|
208
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
*dev_m = \&PDL::dev_m; |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head2 stddz |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=for sig |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Signature: (a(n); [o]b(n)) |
|
227
|
|
|
|
|
|
|
Types: (float double) |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=for usage |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
$b = stddz($a); |
|
232
|
|
|
|
|
|
|
stddz($a, $b); # all arguments given |
|
233
|
|
|
|
|
|
|
$b = $a->stddz; # method call |
|
234
|
|
|
|
|
|
|
$a->stddz($b); |
|
235
|
|
|
|
|
|
|
$a->inplace->stddz; # can be used inplace |
|
236
|
|
|
|
|
|
|
stddz($a->inplace); |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=for ref |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
Standardize ie replace values with z_scores based on sample standard deviation from the mean (replace with 0s if stdv==0). |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=pod |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=for bad |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
C processes bad values. |
|
249
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=cut |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
*stddz = \&PDL::stddz; |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head2 sse |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=for sig |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Signature: (a(n); b(n); [o]c()) |
|
268
|
|
|
|
|
|
|
Types: (float double) |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=for usage |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
$c = sse($a, $b); |
|
273
|
|
|
|
|
|
|
sse($a, $b, $c); # all arguments given |
|
274
|
|
|
|
|
|
|
$c = $a->sse($b); # method call |
|
275
|
|
|
|
|
|
|
$a->sse($b, $c); |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=for ref |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
Sum of squared errors between actual and predicted values. |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=pod |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=for bad |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
C processes bad values. |
|
288
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
*sse = \&PDL::sse; |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=head2 mse |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=for sig |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Signature: (a(n); b(n); [o]c()) |
|
307
|
|
|
|
|
|
|
Types: (float double) |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=for usage |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
$c = mse($a, $b); |
|
312
|
|
|
|
|
|
|
mse($a, $b, $c); # all arguments given |
|
313
|
|
|
|
|
|
|
$c = $a->mse($b); # method call |
|
314
|
|
|
|
|
|
|
$a->mse($b, $c); |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=for ref |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
Mean of squared errors between actual and predicted values, ie variance around predicted value. |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=pod |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=for bad |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
C processes bad values. |
|
327
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=cut |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
*mse = \&PDL::mse; |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=head2 rmse |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=for sig |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
Signature: (a(n); b(n); [o]c()) |
|
346
|
|
|
|
|
|
|
Types: (float double) |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=for usage |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
$c = rmse($a, $b); |
|
351
|
|
|
|
|
|
|
rmse($a, $b, $c); # all arguments given |
|
352
|
|
|
|
|
|
|
$c = $a->rmse($b); # method call |
|
353
|
|
|
|
|
|
|
$a->rmse($b, $c); |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=for ref |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
Root mean squared error, ie stdv around predicted value. |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=pod |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=for bad |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
C processes bad values. |
|
366
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=cut |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
*rmse = \&PDL::rmse; |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 pred_logistic |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=for sig |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
Signature: (a(n,m); b(m); [o]c(n)) |
|
385
|
|
|
|
|
|
|
Types: (float double) |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=for ref |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
Calculates predicted prob value for logistic regression. |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=for usage |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
# glue constant then apply coeff returned by the logistic method |
|
394
|
|
|
|
|
|
|
$pred = $x->glue(1,ones($x->dim(0)))->pred_logistic( $m{b} ); |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=pod |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=for bad |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
C processes bad values. |
|
403
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=cut |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
*pred_logistic = \&PDL::pred_logistic; |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head2 d0 |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=for sig |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Signature: (a(n); [o]c()) |
|
422
|
|
|
|
|
|
|
Types: (float double) |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=for usage |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
$c = d0($a); |
|
427
|
|
|
|
|
|
|
d0($a, $c); # all arguments given |
|
428
|
|
|
|
|
|
|
$c = $a->d0; # method call |
|
429
|
|
|
|
|
|
|
$a->d0($c); |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=for ref |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Null deviance for logistic regression. |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=pod |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=for bad |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
C processes bad values. |
|
442
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
*d0 = \&PDL::d0; |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head2 dm |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=for sig |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
Signature: (a(n); b(n); [o]c()) |
|
461
|
|
|
|
|
|
|
Types: (float double) |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=for ref |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Model deviance for logistic regression. |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=for usage |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
my $dm = $y->dm( $y_pred ); |
|
470
|
|
|
|
|
|
|
# null deviance |
|
471
|
|
|
|
|
|
|
my $d0 = $y->dm( ones($y->nelem) * $y->avg ); |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=pod |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=for bad |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
C processes bad values. |
|
480
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
*dm = \&PDL::dm; |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=head2 dvrs |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=for sig |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
Signature: (a(); b(); [o]c()) |
|
499
|
|
|
|
|
|
|
Types: (float double) |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=for usage |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
$c = dvrs($a, $b); |
|
504
|
|
|
|
|
|
|
dvrs($a, $b, $c); # all arguments given |
|
505
|
|
|
|
|
|
|
$c = $a->dvrs($b); # method call |
|
506
|
|
|
|
|
|
|
$a->dvrs($b, $c); |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=for ref |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Deviance residual for logistic regression. |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=pod |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Broadcasts over its inputs. |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=for bad |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
C processes bad values. |
|
519
|
|
|
|
|
|
|
It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays. |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=cut |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
*dvrs = \&PDL::dvrs; |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
#line 359 "lib/PDL/Stats/GLM.pd" |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
#line 360 "lib/PDL/Stats/GLM.pd" |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=head2 ols_t |
|
537
|
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=for ref |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Broadcasted version of ordinary least squares regression (B). The |
|
541
|
|
|
|
|
|
|
price of broadcasting was losing significance tests for coefficients |
|
542
|
|
|
|
|
|
|
(but see B). The fitting function was shamelessly copied then |
|
543
|
|
|
|
|
|
|
modified from PDL::Fit::Linfit. Intercept is FIRST of coeff if CONST => 1. |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
ols_t does not handle bad values. consider B or B |
|
546
|
|
|
|
|
|
|
if there are bad values. |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=for options |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Default options (case insensitive): |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
CONST => 1, |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=for usage |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
Usage: |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
# DV, 2 person's ratings for top-10 box office movies |
|
559
|
|
|
|
|
|
|
# ascending sorted by box office numbers |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
pdl> p $y = pdl '1 1 2 4 4 4 4 5 5 5; 1 2 2 2 3 3 3 3 5 5' |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# model with 2 IVs, a linear and a quadratic trend component |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
pdl> $x = cat sequence(10), sequence(10)**2 |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
# suppose our novice modeler thinks this creates 3 different models |
|
568
|
|
|
|
|
|
|
# for predicting movie ratings |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
pdl> p $x = cat $x, $x * 2, $x * 3 |
|
571
|
|
|
|
|
|
|
[ |
|
572
|
|
|
|
|
|
|
[ |
|
573
|
|
|
|
|
|
|
[ 0 1 2 3 4 5 6 7 8 9] |
|
574
|
|
|
|
|
|
|
[ 0 1 4 9 16 25 36 49 64 81] |
|
575
|
|
|
|
|
|
|
] |
|
576
|
|
|
|
|
|
|
[ |
|
577
|
|
|
|
|
|
|
[ 0 2 4 6 8 10 12 14 16 18] |
|
578
|
|
|
|
|
|
|
[ 0 2 8 18 32 50 72 98 128 162] |
|
579
|
|
|
|
|
|
|
] |
|
580
|
|
|
|
|
|
|
[ |
|
581
|
|
|
|
|
|
|
[ 0 3 6 9 12 15 18 21 24 27] |
|
582
|
|
|
|
|
|
|
[ 0 3 12 27 48 75 108 147 192 243] |
|
583
|
|
|
|
|
|
|
] |
|
584
|
|
|
|
|
|
|
] |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
pdl> p $x->info |
|
587
|
|
|
|
|
|
|
PDL: Double D [10,2,3] |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
# insert a dummy dim between IV and the dim (model) to be broadcasted |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
pdl> %m = $y->ols_t( $x->dummy(2) ) |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
pdl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
# 2 persons' ratings, each fitted with 3 "different" models |
|
596
|
|
|
|
|
|
|
F [ |
|
597
|
|
|
|
|
|
|
[ 38.314159 25.087209] |
|
598
|
|
|
|
|
|
|
[ 38.314159 25.087209] |
|
599
|
|
|
|
|
|
|
[ 38.314159 25.087209] |
|
600
|
|
|
|
|
|
|
] |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
# df is the same across dv and iv models |
|
603
|
|
|
|
|
|
|
F_df [2 7] |
|
604
|
|
|
|
|
|
|
F_p [ |
|
605
|
|
|
|
|
|
|
[0.00016967051 0.00064215074] |
|
606
|
|
|
|
|
|
|
[0.00016967051 0.00064215074] |
|
607
|
|
|
|
|
|
|
[0.00016967051 0.00064215074] |
|
608
|
|
|
|
|
|
|
] |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
R2 [ |
|
611
|
|
|
|
|
|
|
[ 0.9162963 0.87756762] |
|
612
|
|
|
|
|
|
|
[ 0.9162963 0.87756762] |
|
613
|
|
|
|
|
|
|
[ 0.9162963 0.87756762] |
|
614
|
|
|
|
|
|
|
] |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
b [ # constant linear quadratic |
|
617
|
|
|
|
|
|
|
[ |
|
618
|
|
|
|
|
|
|
[ 0.66363636 0.99015152 -0.056818182] # person 1 |
|
619
|
|
|
|
|
|
|
[ 1.4 0.18939394 0.022727273] # person 2 |
|
620
|
|
|
|
|
|
|
] |
|
621
|
|
|
|
|
|
|
[ |
|
622
|
|
|
|
|
|
|
[ 0.66363636 0.49507576 -0.028409091] |
|
623
|
|
|
|
|
|
|
[ 1.4 0.09469697 0.011363636] |
|
624
|
|
|
|
|
|
|
] |
|
625
|
|
|
|
|
|
|
[ |
|
626
|
|
|
|
|
|
|
[ 0.66363636 0.33005051 -0.018939394] |
|
627
|
|
|
|
|
|
|
[ 1.4 0.063131313 0.0075757576] |
|
628
|
|
|
|
|
|
|
] |
|
629
|
|
|
|
|
|
|
] |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
# our novice modeler realizes at this point that |
|
632
|
|
|
|
|
|
|
# the 3 models only differ in the scaling of the IV coefficients |
|
633
|
|
|
|
|
|
|
ss_model [ |
|
634
|
|
|
|
|
|
|
[ 20.616667 13.075758] |
|
635
|
|
|
|
|
|
|
[ 20.616667 13.075758] |
|
636
|
|
|
|
|
|
|
[ 20.616667 13.075758] |
|
637
|
|
|
|
|
|
|
] |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
ss_residual [ |
|
640
|
|
|
|
|
|
|
[ 1.8833333 1.8242424] |
|
641
|
|
|
|
|
|
|
[ 1.8833333 1.8242424] |
|
642
|
|
|
|
|
|
|
[ 1.8833333 1.8242424] |
|
643
|
|
|
|
|
|
|
] |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
ss_total [22.5 14.9] |
|
646
|
|
|
|
|
|
|
y_pred [ |
|
647
|
|
|
|
|
|
|
[ |
|
648
|
|
|
|
|
|
|
[0.66363636 1.5969697 2.4166667 3.1227273 ... 4.9727273] |
|
649
|
|
|
|
|
|
|
... |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
=cut |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
*ols_t = \&PDL::ols_t; |
|
654
|
|
|
|
|
|
|
sub PDL::ols_t { |
|
655
|
|
|
|
|
|
|
_ols_common(1, @_); |
|
656
|
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
sub _ols_common { |
|
659
|
|
|
|
|
|
|
my ($broadcasted, $y, $ivs, $opt) = @_; |
|
660
|
|
|
|
|
|
|
($y, $ivs) = _ols_prep_inputs(@_); |
|
661
|
|
|
|
|
|
|
_ols_main($broadcasted, $y, $ivs, $opt); |
|
662
|
|
|
|
|
|
|
} |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
sub _ols_prep_inputs { |
|
665
|
|
|
|
|
|
|
# y [n], ivs [n x attr] pdl |
|
666
|
|
|
|
|
|
|
my ($broadcasted, $y, $ivs, $opt) = @_; |
|
667
|
|
|
|
|
|
|
my %opt = ( |
|
668
|
|
|
|
|
|
|
CONST => 1, |
|
669
|
|
|
|
|
|
|
PLOT => 0, |
|
670
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
671
|
|
|
|
|
|
|
); |
|
672
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
673
|
|
|
|
|
|
|
if (!$broadcasted) { |
|
674
|
|
|
|
|
|
|
$y = $y->squeeze; |
|
675
|
|
|
|
|
|
|
$y->getndims > 1 and |
|
676
|
|
|
|
|
|
|
croak "use ols_t for broadcasted version"; |
|
677
|
|
|
|
|
|
|
} |
|
678
|
|
|
|
|
|
|
$ivs = $ivs->dummy(1) if $ivs->getndims == 1; |
|
679
|
|
|
|
|
|
|
($y, $ivs) = _rm_bad_value( $y, $ivs ) if !$broadcasted; |
|
680
|
|
|
|
|
|
|
# set up ivs and const as ivs |
|
681
|
|
|
|
|
|
|
$opt{CONST} and |
|
682
|
|
|
|
|
|
|
$ivs = ones($ivs->dim(0))->glue( 1, $ivs ); |
|
683
|
|
|
|
|
|
|
($y, $ivs); |
|
684
|
|
|
|
|
|
|
} |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
sub _ols_main { |
|
687
|
|
|
|
|
|
|
my ($broadcasted, $y, $ivs, $opt) = @_; |
|
688
|
|
|
|
|
|
|
my %opt = ( |
|
689
|
|
|
|
|
|
|
CONST => 1, |
|
690
|
|
|
|
|
|
|
PLOT => 0, |
|
691
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
692
|
|
|
|
|
|
|
); |
|
693
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
694
|
|
|
|
|
|
|
my $C = inv( $ivs x $ivs->t ); |
|
695
|
|
|
|
|
|
|
# Internally normalise data |
|
696
|
|
|
|
|
|
|
# (double) it or ushort y and sequence iv won't work right |
|
697
|
|
|
|
|
|
|
my $ymean = $y->abs->avgover->double; |
|
698
|
|
|
|
|
|
|
$ymean->where( $ymean==0 ) .= 1; |
|
699
|
|
|
|
|
|
|
my $divisor = $broadcasted ? $ymean->dummy(0) : $ymean; |
|
700
|
|
|
|
|
|
|
my $y2 = $y / $divisor; |
|
701
|
|
|
|
|
|
|
my $Y = $ivs x $y2->dummy(0); |
|
702
|
|
|
|
|
|
|
# Do the fit |
|
703
|
|
|
|
|
|
|
# Fitted coefficients vector |
|
704
|
|
|
|
|
|
|
my $coeff = PDL::squeeze( $C x $Y ); |
|
705
|
|
|
|
|
|
|
$coeff = $coeff->dummy(0) |
|
706
|
|
|
|
|
|
|
if $broadcasted and $coeff->getndims == 1 and $y->getndims > 1; |
|
707
|
|
|
|
|
|
|
$coeff *= $divisor; # Un-normalise |
|
708
|
|
|
|
|
|
|
# ***$coeff x $ivs looks nice but produces nan on successive tries*** |
|
709
|
|
|
|
|
|
|
my $y_pred = sumover( ($broadcasted ? $coeff->dummy(1) : $coeff) * $ivs->transpose ); |
|
710
|
|
|
|
|
|
|
$opt{PLOT} and $y->plot_residuals( $y_pred, \%opt ); |
|
711
|
|
|
|
|
|
|
return $coeff unless wantarray; |
|
712
|
|
|
|
|
|
|
my %ret = (y_pred => $y_pred); |
|
713
|
|
|
|
|
|
|
$ret{ss_total} = $opt{CONST} ? $y->ss : sumover( $y ** 2 ); |
|
714
|
|
|
|
|
|
|
$ret{ss_residual} = $y->sse( $ret{y_pred} ); |
|
715
|
|
|
|
|
|
|
$ret{ss_model} = $ret{ss_total} - $ret{ss_residual}; |
|
716
|
|
|
|
|
|
|
$ret{R2} = $ret{ss_model} / $ret{ss_total}; |
|
717
|
|
|
|
|
|
|
my $n_var = $opt{CONST} ? $ivs->dim(1) - 1 : $ivs->dim(1); |
|
718
|
|
|
|
|
|
|
$ret{F_df} = pdl( $n_var, my $df1 = $y->dim(0) - $ivs->dim(1) ); |
|
719
|
|
|
|
|
|
|
$ret{F} = $ret{ss_model} / $n_var |
|
720
|
|
|
|
|
|
|
/ ($ret{ss_residual} / $df1); |
|
721
|
|
|
|
|
|
|
$ret{F_p} = 1 - $ret{F}->gsl_cdf_fdist_P( $n_var, $df1 ) |
|
722
|
|
|
|
|
|
|
if $CDF; |
|
723
|
|
|
|
|
|
|
if (!$broadcasted) { |
|
724
|
|
|
|
|
|
|
my $se_b = ones( $coeff->dims? $coeff->dims : 1 ); |
|
725
|
|
|
|
|
|
|
$opt{CONST} and |
|
726
|
|
|
|
|
|
|
$se_b->slice(0) .= sqrt( $ret{ss_residual} / $df1 * $C->slice(0,0) ); |
|
727
|
|
|
|
|
|
|
# get the se for bs by successively regressing each iv by the rest ivs |
|
728
|
|
|
|
|
|
|
if ($ivs->dim(1) > 1) { |
|
729
|
|
|
|
|
|
|
my @coords = $opt{CONST} ? 1..$n_var : 0..$n_var-1; |
|
730
|
|
|
|
|
|
|
my $ones = !$opt{CONST} ? undef : ones($ivs->dim(0)); |
|
731
|
|
|
|
|
|
|
for my $k (@coords) { |
|
732
|
|
|
|
|
|
|
my $G = $ivs->dice_axis(1, [grep $_ != $k, @coords]); |
|
733
|
|
|
|
|
|
|
$G = $ones->glue( 1, $G ) if $opt{CONST}; |
|
734
|
|
|
|
|
|
|
my $b_G = $ivs->slice(':',$k)->ols( $G, {CONST=>0,PLOT=>0} ); |
|
735
|
|
|
|
|
|
|
my $ss_res_k = $ivs->slice(':',$k)->squeeze->sse( sumover($b_G * $G->transpose) ); |
|
736
|
|
|
|
|
|
|
$se_b->slice($k) .= sqrt( $ret{ss_residual} / $df1 / $ss_res_k ); |
|
737
|
|
|
|
|
|
|
} |
|
738
|
|
|
|
|
|
|
} |
|
739
|
|
|
|
|
|
|
else { |
|
740
|
|
|
|
|
|
|
$se_b->slice(0) |
|
741
|
|
|
|
|
|
|
.= sqrt( $ret{ss_residual} / $df1 / sum( $ivs->slice(':',0)**2 ) ); |
|
742
|
|
|
|
|
|
|
} |
|
743
|
|
|
|
|
|
|
$ret{b_se} = $se_b; |
|
744
|
|
|
|
|
|
|
$ret{b_t} = $coeff / $ret{b_se}; |
|
745
|
|
|
|
|
|
|
$ret{b_p} = 2 * ( 1 - $ret{b_t}->abs->gsl_cdf_tdist_P( $df1 ) ) |
|
746
|
|
|
|
|
|
|
if $CDF; |
|
747
|
|
|
|
|
|
|
} |
|
748
|
|
|
|
|
|
|
for (keys %ret) { ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze }; |
|
749
|
|
|
|
|
|
|
$ret{b} = $coeff; |
|
750
|
|
|
|
|
|
|
return %ret; |
|
751
|
|
|
|
|
|
|
} |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
=head2 r2_change |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
=for ref |
|
756
|
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
Significance test for the incremental change in R2 when new variable(s) |
|
758
|
|
|
|
|
|
|
are added to an ols regression model. |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Returns the change stats as well as stats for both models. Based on |
|
761
|
|
|
|
|
|
|
L. (One way to make up for the lack of significance tests for |
|
762
|
|
|
|
|
|
|
coeffs in ols_t). |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=for options |
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
Default options (case insensitive): |
|
767
|
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
CONST => 1, |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=for usage |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
Usage: |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
# suppose these are two persons' ratings for top 10 box office movies |
|
775
|
|
|
|
|
|
|
# ascending sorted by box office |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
pdl> p $y = qsort ceil(random(10, 2) * 5) |
|
778
|
|
|
|
|
|
|
[ |
|
779
|
|
|
|
|
|
|
[1 1 2 2 2 3 4 4 4 4] |
|
780
|
|
|
|
|
|
|
[1 2 2 3 3 3 4 4 5 5] |
|
781
|
|
|
|
|
|
|
] |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
# first IV is a simple linear trend |
|
784
|
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
pdl> p $x1 = sequence 10 |
|
786
|
|
|
|
|
|
|
[0 1 2 3 4 5 6 7 8 9] |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
# the modeler wonders if adding a quadratic trend improves the fit |
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
pdl> p $x2 = sequence(10) ** 2 |
|
791
|
|
|
|
|
|
|
[0 1 4 9 16 25 36 49 64 81] |
|
792
|
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
# two difference models are given in two pdls |
|
794
|
|
|
|
|
|
|
# each as would be pass on to ols_t |
|
795
|
|
|
|
|
|
|
# the 1st model includes only linear trend |
|
796
|
|
|
|
|
|
|
# the 2nd model includes linear and quadratic trends |
|
797
|
|
|
|
|
|
|
# when necessary use dummy dim so both models have the same ndims |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
pdl> %c = $y->r2_change( $x1->dummy(1), cat($x1, $x2) ) |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
pdl> p "$_\t$c{$_}\n" for sort keys %c |
|
802
|
|
|
|
|
|
|
# person 1 person 2 |
|
803
|
|
|
|
|
|
|
F_change [0.72164948 0.071283096] |
|
804
|
|
|
|
|
|
|
# df same for both persons |
|
805
|
|
|
|
|
|
|
F_df [1 7] |
|
806
|
|
|
|
|
|
|
F_p [0.42370145 0.79717232] |
|
807
|
|
|
|
|
|
|
R2_change [0.0085966043 0.00048562549] |
|
808
|
|
|
|
|
|
|
model0 HASH(0x8c10828) |
|
809
|
|
|
|
|
|
|
model1 HASH(0x8c135c8) |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
# the answer here is no. |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
=cut |
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
*r2_change = \&PDL::r2_change; |
|
816
|
|
|
|
|
|
|
sub PDL::r2_change { |
|
817
|
|
|
|
|
|
|
my ($self, $ivs0, $ivs1, $opt) = @_; |
|
818
|
|
|
|
|
|
|
$ivs0->getndims == 1 and $ivs0 = $ivs0->dummy(1); |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
my %ret; |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
$ret{model0} = { $self->ols_t( $ivs0, $opt ) }; |
|
823
|
|
|
|
|
|
|
$ret{model1} = { $self->ols_t( $ivs1, $opt ) }; |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
$ret{R2_change} = $ret{model1}->{R2} - $ret{model0}->{R2}; |
|
826
|
|
|
|
|
|
|
$ret{F_df} |
|
827
|
|
|
|
|
|
|
= pdl(my $df0 = $ivs1->dim(1) - $ivs0->dim(1), |
|
828
|
|
|
|
|
|
|
my $df1 = $ret{model1}->{F_df}->slice('(1)') ); |
|
829
|
|
|
|
|
|
|
$ret{F_change} |
|
830
|
|
|
|
|
|
|
= $ret{R2_change} * $df1 |
|
831
|
|
|
|
|
|
|
/ ( (1-$ret{model1}->{R2}) * $df0 ); |
|
832
|
|
|
|
|
|
|
$ret{F_p} = 1 - $ret{F_change}->gsl_cdf_fdist_P( $df0, $df1 ) |
|
833
|
|
|
|
|
|
|
if $CDF; |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
for (keys %ret) { ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze }; |
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
%ret; |
|
838
|
|
|
|
|
|
|
} |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=head1 METHODS |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
=head2 anova |
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=for ref |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Analysis of variance. Uses type III sum of squares for unbalanced data. |
|
847
|
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
Dependent variable should be a 1D pdl. Independent variables can be |
|
849
|
|
|
|
|
|
|
passed as 1D perl array ref or 1D pdl. |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
Will only calculate p-value (C) if there are more samples than the |
|
852
|
|
|
|
|
|
|
product of categories of all the IVs. |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
Supports bad value (by ignoring missing or BAD values in dependent and |
|
855
|
|
|
|
|
|
|
independent variables list-wise). |
|
856
|
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
For more on ANOVA, see L. |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=for options |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
Default options (case insensitive): |
|
862
|
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
V => 1, # carps if bad value in variables |
|
864
|
|
|
|
|
|
|
IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ] |
|
865
|
|
|
|
|
|
|
PLOT => 0, # plots highest order effect |
|
866
|
|
|
|
|
|
|
# can set plot_means options here |
|
867
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
=for usage |
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
Usage: |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
# suppose this is ratings for 12 apples |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
pdl> p $y = qsort ceil( random(12)*5 ) |
|
876
|
|
|
|
|
|
|
[1 1 2 2 2 3 3 4 4 4 5 5] |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
# IV for types of apple |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
pdl> p $a = sequence(12) % 3 + 1 |
|
881
|
|
|
|
|
|
|
[1 2 3 1 2 3 1 2 3 1 2 3] |
|
882
|
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
# IV for whether we baked the apple |
|
884
|
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
pdl> @b = qw( y y y y y y n n n n n n ) |
|
886
|
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
pdl> %m = $y->anova( $a, \@b, { IVNM=>['apple', 'bake'] } ) |
|
888
|
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
pdl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m |
|
890
|
|
|
|
|
|
|
F 2.46666666666667 |
|
891
|
|
|
|
|
|
|
F_df [5 6] |
|
892
|
|
|
|
|
|
|
F_p 0.151168719948632 |
|
893
|
|
|
|
|
|
|
ms_model 3.08333333333333 |
|
894
|
|
|
|
|
|
|
ms_residual 1.25 |
|
895
|
|
|
|
|
|
|
ss_model 15.4166666666667 |
|
896
|
|
|
|
|
|
|
ss_residual 7.5 |
|
897
|
|
|
|
|
|
|
ss_total 22.9166666666667 |
|
898
|
|
|
|
|
|
|
| apple | F 0.466666666666667 |
|
899
|
|
|
|
|
|
|
| apple | F_p 0.648078345471096 |
|
900
|
|
|
|
|
|
|
| apple | df 2 |
|
901
|
|
|
|
|
|
|
| apple | m [2.75 3 3.5] |
|
902
|
|
|
|
|
|
|
| apple | ms 0.583333333333334 |
|
903
|
|
|
|
|
|
|
| apple | se [0.85391256 0.81649658 0.64549722] |
|
904
|
|
|
|
|
|
|
| apple | ss 1.16666666666667 |
|
905
|
|
|
|
|
|
|
| apple || err df 6 |
|
906
|
|
|
|
|
|
|
| apple ~ bake | F 0.0666666666666671 |
|
907
|
|
|
|
|
|
|
| apple ~ bake | F_p 0.936190104380701 |
|
908
|
|
|
|
|
|
|
| apple ~ bake | df 2 |
|
909
|
|
|
|
|
|
|
| apple ~ bake | m [ |
|
910
|
|
|
|
|
|
|
[1.5 2 2.5] |
|
911
|
|
|
|
|
|
|
[ 4 4 4.5] |
|
912
|
|
|
|
|
|
|
] |
|
913
|
|
|
|
|
|
|
| apple ~ bake | ms 0.0833333333333339 |
|
914
|
|
|
|
|
|
|
| apple ~ bake | se [ |
|
915
|
|
|
|
|
|
|
[0.5 1 0.5] |
|
916
|
|
|
|
|
|
|
[ 1 1 0.5] |
|
917
|
|
|
|
|
|
|
] |
|
918
|
|
|
|
|
|
|
| apple ~ bake | ss 0.166666666666668 |
|
919
|
|
|
|
|
|
|
| apple ~ bake || err df 6 |
|
920
|
|
|
|
|
|
|
| bake | F 11.2666666666667 |
|
921
|
|
|
|
|
|
|
| bake | F_p 0.015294126084452 |
|
922
|
|
|
|
|
|
|
| bake | df 1 |
|
923
|
|
|
|
|
|
|
| bake | m [2 4.1666667] |
|
924
|
|
|
|
|
|
|
| bake | ms 14.0833333333333 |
|
925
|
|
|
|
|
|
|
| bake | se [0.36514837 0.40138649] |
|
926
|
|
|
|
|
|
|
| bake | ss 14.0833333333333 |
|
927
|
|
|
|
|
|
|
| bake || err df 6 |
|
928
|
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
This is implemented as a call to L, with an C |
|
930
|
|
|
|
|
|
|
subjects vector. |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
=cut |
|
933
|
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
*anova = \&PDL::anova; |
|
935
|
|
|
|
|
|
|
sub PDL::anova { |
|
936
|
|
|
|
|
|
|
my ($y, @args) = @_; |
|
937
|
|
|
|
|
|
|
anova_rptd($y, undef, @args); |
|
938
|
|
|
|
|
|
|
} |
|
939
|
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
sub _interactions { |
|
941
|
|
|
|
|
|
|
my ($ivs_ref, $idv) = @_; |
|
942
|
|
|
|
|
|
|
my (@inter, @idv_inter); |
|
943
|
|
|
|
|
|
|
for my $nway ( 2 .. @$ivs_ref ) { |
|
944
|
|
|
|
|
|
|
my $iter_idv = _combinations( $nway, [0..$#$ivs_ref] ); |
|
945
|
|
|
|
|
|
|
while ( my @v = &$iter_idv() ) { |
|
946
|
|
|
|
|
|
|
push @inter, interaction_code(@$ivs_ref[@v]); |
|
947
|
|
|
|
|
|
|
push @idv_inter, join ' ~ ', @$idv[@v]; |
|
948
|
|
|
|
|
|
|
} |
|
949
|
|
|
|
|
|
|
} |
|
950
|
|
|
|
|
|
|
(\@inter, \@idv_inter); |
|
951
|
|
|
|
|
|
|
} |
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
# now prepare for cell mean |
|
954
|
|
|
|
|
|
|
sub _interactions_cm { |
|
955
|
|
|
|
|
|
|
my ($ivs_ref, $pdl_ivs_raw) = @_; |
|
956
|
|
|
|
|
|
|
my ($dim0, @inter_cm, @inter_cmo) = $ivs_ref->[0]->dim(0); |
|
957
|
|
|
|
|
|
|
for my $nway ( 2 .. @$ivs_ref ) { |
|
958
|
|
|
|
|
|
|
my $iter_idv = _combinations( $nway, [0..$#$ivs_ref] ); |
|
959
|
|
|
|
|
|
|
while ( my @v = &$iter_idv() ) { |
|
960
|
|
|
|
|
|
|
my @i_cm; |
|
961
|
|
|
|
|
|
|
for my $o ( 0 .. $dim0 - 1 ) { |
|
962
|
|
|
|
|
|
|
push @i_cm, join '', map $_->slice("($o)"), @$pdl_ivs_raw[@v]; |
|
963
|
|
|
|
|
|
|
} |
|
964
|
|
|
|
|
|
|
my ($inter, $map) = effect_code( \@i_cm ); |
|
965
|
|
|
|
|
|
|
push @inter_cm, $inter; |
|
966
|
|
|
|
|
|
|
# get the order to put means in correct multi dim pdl pos |
|
967
|
|
|
|
|
|
|
# this is order in var_e dim(1) |
|
968
|
|
|
|
|
|
|
my @levels = sort { $map->{$a} <=> $map->{$b} } keys %$map; |
|
969
|
|
|
|
|
|
|
# this is order needed for cell mean |
|
970
|
|
|
|
|
|
|
my @i_cmo = sort { reverse($levels[$a]) cmp reverse($levels[$b]) } |
|
971
|
|
|
|
|
|
|
0 .. $#levels; |
|
972
|
|
|
|
|
|
|
push @inter_cmo, pdl @i_cmo; |
|
973
|
|
|
|
|
|
|
} |
|
974
|
|
|
|
|
|
|
} |
|
975
|
|
|
|
|
|
|
(\@inter_cmo, \@inter_cm); |
|
976
|
|
|
|
|
|
|
} |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
sub _cell_means { |
|
979
|
|
|
|
|
|
|
my ($data, $ivs_cm_ref, $i_cmo_ref, $idv, $pdl_ivs_raw) = @_; |
|
980
|
|
|
|
|
|
|
my %ind_id; |
|
981
|
|
|
|
|
|
|
@ind_id{ @$idv } = 0..$#$idv; |
|
982
|
|
|
|
|
|
|
my %cm; |
|
983
|
|
|
|
|
|
|
my $i = 0; |
|
984
|
|
|
|
|
|
|
for (@$ivs_cm_ref) { |
|
985
|
|
|
|
|
|
|
confess "_cell_means passed empty ivs_cm_ref ndarray at pos $i" if $_->isempty; |
|
986
|
|
|
|
|
|
|
my $last = zeroes $_->dim(0); |
|
987
|
|
|
|
|
|
|
my $i_neg = which $_->slice(':',0) == -1; |
|
988
|
|
|
|
|
|
|
$last->slice($i_neg) .= 1; |
|
989
|
|
|
|
|
|
|
$_->where($_ == -1) .= 0; |
|
990
|
|
|
|
|
|
|
$_ = $_->glue(1, $last); |
|
991
|
|
|
|
|
|
|
my @v = split ' ~ ', $idv->[$i]; |
|
992
|
|
|
|
|
|
|
my @shape = map $pdl_ivs_raw->[$_]->uniq->nelem, @ind_id{@v}; |
|
993
|
|
|
|
|
|
|
my ($m, $ss) = $data->centroid( $_ ); |
|
994
|
|
|
|
|
|
|
$m = $m->slice($i_cmo_ref->[$i])->sever; |
|
995
|
|
|
|
|
|
|
$ss = $ss->slice($i_cmo_ref->[$i])->sever; |
|
996
|
|
|
|
|
|
|
$m = $m->reshape(@shape); |
|
997
|
|
|
|
|
|
|
my $se = sqrt( ($ss/($_->sumover - 1)) / $_->sumover )->reshape(@shape); |
|
998
|
|
|
|
|
|
|
$cm{ "| $idv->[$i] | m" } = $m; |
|
999
|
|
|
|
|
|
|
$cm{ "| $idv->[$i] | se" } = $se; |
|
1000
|
|
|
|
|
|
|
$i++; |
|
1001
|
|
|
|
|
|
|
} |
|
1002
|
|
|
|
|
|
|
\%cm; |
|
1003
|
|
|
|
|
|
|
} |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
# http://www.perlmonks.org/?node_id=371228 |
|
1006
|
|
|
|
|
|
|
sub _combinations { |
|
1007
|
|
|
|
|
|
|
my ($num, $arr) = @_; |
|
1008
|
|
|
|
|
|
|
return sub { return } |
|
1009
|
|
|
|
|
|
|
if $num == 0 or $num > @$arr; |
|
1010
|
|
|
|
|
|
|
my @pick; |
|
1011
|
|
|
|
|
|
|
return sub { |
|
1012
|
|
|
|
|
|
|
return @$arr[ @pick = ( 0 .. $num - 1 ) ] |
|
1013
|
|
|
|
|
|
|
unless @pick; |
|
1014
|
|
|
|
|
|
|
my $i = $#pick; |
|
1015
|
|
|
|
|
|
|
$i-- until $i < 0 or $pick[$i]++ < @$arr - $num + $i; |
|
1016
|
|
|
|
|
|
|
return if $i < 0; |
|
1017
|
|
|
|
|
|
|
@pick[$i .. $#pick] = $pick[$i] .. $#$arr; |
|
1018
|
|
|
|
|
|
|
return @$arr[@pick]; |
|
1019
|
|
|
|
|
|
|
}; |
|
1020
|
|
|
|
|
|
|
} |
|
1021
|
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
=head2 anova_rptd |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
=for ref |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
Repeated measures and mixed model anova. Uses type III sum of squares. |
|
1027
|
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
The standard error (se) for the means are based on the relevant mean |
|
1029
|
|
|
|
|
|
|
squared error from the anova, ie it is pooled across levels of the effect. |
|
1030
|
|
|
|
|
|
|
Will only calculate p-value (C) if there are more samples than the |
|
1031
|
|
|
|
|
|
|
product of categories of all the IVs. |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
Uses L, so supports bad values. |
|
1034
|
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
For more on repeated measures ANOVA, see |
|
1036
|
|
|
|
|
|
|
L, |
|
1037
|
|
|
|
|
|
|
and for mixed models, see |
|
1038
|
|
|
|
|
|
|
L. |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
=for options |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
Default options (case insensitive): |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
V => 1, # carps if bad value in dv |
|
1045
|
|
|
|
|
|
|
IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ] |
|
1046
|
|
|
|
|
|
|
BTWN => [], # indices of between-subject IVs (matches IVNM indices) |
|
1047
|
|
|
|
|
|
|
PLOT => 0, # plots highest order effect |
|
1048
|
|
|
|
|
|
|
# see plot_means() for more options |
|
1049
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
=for usage |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
Usage: |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
Some fictional data: recall_w_beer_and_wings.txt |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
Subject Beer Wings Recall |
|
1058
|
|
|
|
|
|
|
Alex 1 1 8 |
|
1059
|
|
|
|
|
|
|
Alex 1 2 9 |
|
1060
|
|
|
|
|
|
|
Alex 1 3 12 |
|
1061
|
|
|
|
|
|
|
Alex 2 1 7 |
|
1062
|
|
|
|
|
|
|
Alex 2 2 9 |
|
1063
|
|
|
|
|
|
|
Alex 2 3 12 |
|
1064
|
|
|
|
|
|
|
Brian 1 1 12 |
|
1065
|
|
|
|
|
|
|
Brian 1 2 13 |
|
1066
|
|
|
|
|
|
|
Brian 1 3 14 |
|
1067
|
|
|
|
|
|
|
Brian 2 1 9 |
|
1068
|
|
|
|
|
|
|
Brian 2 2 8 |
|
1069
|
|
|
|
|
|
|
Brian 2 3 14 |
|
1070
|
|
|
|
|
|
|
... |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
# rtable allows text only in 1st row and col |
|
1073
|
|
|
|
|
|
|
my ($data, $idv, $subj) = rtable 'recall_w_beer_and_wings.txt'; |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
my ($b, $w, $dv) = $data->dog; |
|
1076
|
|
|
|
|
|
|
# subj and IVs can be 1d pdl or @ ref |
|
1077
|
|
|
|
|
|
|
# subj must be the first argument |
|
1078
|
|
|
|
|
|
|
my %m = $dv->anova_rptd( $subj, $b, $w, {ivnm=>['Beer', 'Wings']} ); |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
print "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
ss_residual 19.0833333333333 |
|
1083
|
|
|
|
|
|
|
ss_subject 24.8333333333333 |
|
1084
|
|
|
|
|
|
|
ss_total 133.833333333333 |
|
1085
|
|
|
|
|
|
|
| Beer | F 9.39130434782609 |
|
1086
|
|
|
|
|
|
|
| Beer | F_p 0.0547977008378944 |
|
1087
|
|
|
|
|
|
|
| Beer | df 1 |
|
1088
|
|
|
|
|
|
|
| Beer | m [10.916667 8.9166667] |
|
1089
|
|
|
|
|
|
|
| Beer | ms 24 |
|
1090
|
|
|
|
|
|
|
| Beer | se [0.4614791 0.4614791] |
|
1091
|
|
|
|
|
|
|
| Beer | ss 24 |
|
1092
|
|
|
|
|
|
|
| Beer || err df 3 |
|
1093
|
|
|
|
|
|
|
| Beer || err ms 2.55555555555556 |
|
1094
|
|
|
|
|
|
|
| Beer || err ss 7.66666666666667 |
|
1095
|
|
|
|
|
|
|
| Beer ~ Wings | F 0.510917030567687 |
|
1096
|
|
|
|
|
|
|
| Beer ~ Wings | F_p 0.623881438624431 |
|
1097
|
|
|
|
|
|
|
| Beer ~ Wings | df 2 |
|
1098
|
|
|
|
|
|
|
| Beer ~ Wings | m [ |
|
1099
|
|
|
|
|
|
|
[ 10 7] |
|
1100
|
|
|
|
|
|
|
[ 10.5 9.25] |
|
1101
|
|
|
|
|
|
|
[12.25 10.5] |
|
1102
|
|
|
|
|
|
|
] |
|
1103
|
|
|
|
|
|
|
| Beer ~ Wings | ms 1.625 |
|
1104
|
|
|
|
|
|
|
| Beer ~ Wings | se [ |
|
1105
|
|
|
|
|
|
|
[0.89170561 0.89170561] |
|
1106
|
|
|
|
|
|
|
[0.89170561 0.89170561] |
|
1107
|
|
|
|
|
|
|
[0.89170561 0.89170561] |
|
1108
|
|
|
|
|
|
|
] |
|
1109
|
|
|
|
|
|
|
| Beer ~ Wings | ss 3.25000000000001 |
|
1110
|
|
|
|
|
|
|
| Beer ~ Wings || err df 6 |
|
1111
|
|
|
|
|
|
|
| Beer ~ Wings || err ms 3.18055555555555 |
|
1112
|
|
|
|
|
|
|
| Beer ~ Wings || err ss 19.0833333333333 |
|
1113
|
|
|
|
|
|
|
| Wings | F 4.52851711026616 |
|
1114
|
|
|
|
|
|
|
| Wings | F_p 0.0632754786153548 |
|
1115
|
|
|
|
|
|
|
| Wings | df 2 |
|
1116
|
|
|
|
|
|
|
| Wings | m [8.5 9.875 11.375] |
|
1117
|
|
|
|
|
|
|
| Wings | ms 16.5416666666667 |
|
1118
|
|
|
|
|
|
|
| Wings | se [0.67571978 0.67571978 0.67571978] |
|
1119
|
|
|
|
|
|
|
| Wings | ss 33.0833333333333 |
|
1120
|
|
|
|
|
|
|
| Wings || err df 6 |
|
1121
|
|
|
|
|
|
|
| Wings || err ms 3.65277777777778 |
|
1122
|
|
|
|
|
|
|
| Wings || err ss 21.9166666666667 |
|
1123
|
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
For mixed model anova, ie when there are between-subject IVs |
|
1125
|
|
|
|
|
|
|
involved, feed the IVs as above, but specify in BTWN which IVs are |
|
1126
|
|
|
|
|
|
|
between-subject. For example, if we had added age as a between-subject |
|
1127
|
|
|
|
|
|
|
IV in the above example, we would do |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
my %m = $dv->anova_rptd( $subj, $age, $b, $w, |
|
1130
|
|
|
|
|
|
|
{ ivnm=>['Age', 'Beer', 'Wings'], btwn=>[0] }); |
|
1131
|
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
=cut |
|
1133
|
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
*anova_rptd = \&PDL::anova_rptd; |
|
1135
|
|
|
|
|
|
|
sub PDL::anova_rptd { |
|
1136
|
|
|
|
|
|
|
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef; |
|
1137
|
|
|
|
|
|
|
my ($y, $subj, @ivs_raw) = @_; |
|
1138
|
|
|
|
|
|
|
confess "Expected 1-D data, instead: ", $y->info if $y->ndims != 1; |
|
1139
|
|
|
|
|
|
|
croak "Mismatched number of elements in DV and IV. Are you passing IVs the old-and-abandoned way?" |
|
1140
|
|
|
|
|
|
|
if (ref $ivs_raw[0] eq 'ARRAY') and (@{ $ivs_raw[0] } != $y->nelem); |
|
1141
|
|
|
|
|
|
|
for (@ivs_raw) { |
|
1142
|
|
|
|
|
|
|
croak "too many dims in IV!" if ref $_ eq 'PDL' and $_->squeeze->ndims > 1 |
|
1143
|
|
|
|
|
|
|
} |
|
1144
|
|
|
|
|
|
|
my %opt = ( |
|
1145
|
|
|
|
|
|
|
V => 1, # carps if bad value in dv |
|
1146
|
|
|
|
|
|
|
IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ] |
|
1147
|
|
|
|
|
|
|
( !defined($subj) ? () : ( |
|
1148
|
|
|
|
|
|
|
BTWN => [], # indices of between-subject IVs (matches IVNM indices) |
|
1149
|
|
|
|
|
|
|
)), |
|
1150
|
|
|
|
|
|
|
PLOT => 0, # plots highest order effect |
|
1151
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
1152
|
|
|
|
|
|
|
); |
|
1153
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
1154
|
|
|
|
|
|
|
$opt{IVNM} = [ map { "IV_$_" } 0 .. $#ivs_raw ] |
|
1155
|
|
|
|
|
|
|
if !$opt{IVNM} or !@{ $opt{IVNM} }; |
|
1156
|
|
|
|
|
|
|
(my ($dsgn, $idv), $y, my ($sj, $ivs_ref_filtrd, $pdl_ivs_raw, $ivs_ref, $err_ref)) = $y->anova_design_matrix($subj, @ivs_raw, \%opt); |
|
1157
|
|
|
|
|
|
|
confess "anova_rptd: got singular matrix for X' x X" |
|
1158
|
|
|
|
|
|
|
if any +($dsgn->t x $dsgn)->det == 0; |
|
1159
|
|
|
|
|
|
|
my $b_full = _ols_main(1, $y, $dsgn->t, {CONST=>0}); |
|
1160
|
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
my %ret = (ss_total => $y->ss, |
|
1162
|
|
|
|
|
|
|
ss_residual => $y->sse( sumover( $b_full * $dsgn ) )); |
|
1163
|
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
if (defined $subj) { |
|
1165
|
|
|
|
|
|
|
my @full = (@$ivs_ref, @$err_ref); |
|
1166
|
|
|
|
|
|
|
my $has_btwn = @{ $opt{BTWN} }; |
|
1167
|
|
|
|
|
|
|
my @is_btwn; $is_btwn[$_] = 1 for @{ $opt{BTWN} }; |
|
1168
|
|
|
|
|
|
|
my @within_inds = 0 .. $#ivs_raw; |
|
1169
|
|
|
|
|
|
|
@within_inds = grep !$is_btwn[$_], @within_inds if $has_btwn; |
|
1170
|
|
|
|
|
|
|
my $within_df = pdl(map $_->dim(1), @full[@within_inds])->prodover->sclr; |
|
1171
|
|
|
|
|
|
|
EFFECT: for my $k (0 .. $#full) { |
|
1172
|
|
|
|
|
|
|
my $e = ($k > $#$ivs_ref)? '| err' : ''; |
|
1173
|
|
|
|
|
|
|
my $i = ($k > $#$ivs_ref)? $k - @$ivs_ref : $k; |
|
1174
|
|
|
|
|
|
|
my $i_pref = $k == $#full ? undef : "| $idv->[$i] |"; |
|
1175
|
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
if (!defined $full[$k]) { # ss_residual as error |
|
1177
|
|
|
|
|
|
|
$ret{ "$i_pref$e ss" } = $ret{ss_residual}; |
|
1178
|
|
|
|
|
|
|
# highest ord inter for purely within design, (p-1)*(q-1)*(n-1) |
|
1179
|
|
|
|
|
|
|
my $factor = (ref $full[-1] ? $full[-1] : $err_ref->[$full[-1]])->dim(1); |
|
1180
|
|
|
|
|
|
|
my $df = $ret{ "$i_pref$e df" } = $factor * $within_df; |
|
1181
|
|
|
|
|
|
|
die "${i_pref}residual df = 0" if $df <= 0; |
|
1182
|
|
|
|
|
|
|
$ret{ "$i_pref$e ms" } = $ret{ "$i_pref$e ss" } / $df; |
|
1183
|
|
|
|
|
|
|
} elsif (ref $full[$k]) { # unique error term |
|
1184
|
|
|
|
|
|
|
next EFFECT |
|
1185
|
|
|
|
|
|
|
unless my @G = grep $_ != $k && defined $full[$_], 0 .. $#full; |
|
1186
|
|
|
|
|
|
|
my $G = ones($y->dim(0))->glue(1, grep ref $_, @full[@G]); |
|
1187
|
|
|
|
|
|
|
my $b_G = $y->ols_t( $G, {CONST=>0} ); |
|
1188
|
|
|
|
|
|
|
my $ss = $ret{$k == $#full ? 'ss_subject' : "$i_pref$e ss"} |
|
1189
|
|
|
|
|
|
|
= $y->sse(sumover($b_G * $G->transpose)) - $ret{ss_residual}; |
|
1190
|
|
|
|
|
|
|
if ($k != $#full) { |
|
1191
|
|
|
|
|
|
|
my $df = $ret{"$i_pref$e df"} = $full[$k]->dim(1); |
|
1192
|
|
|
|
|
|
|
die "residual df = 0" if $df <= 0; |
|
1193
|
|
|
|
|
|
|
$ret{"$i_pref$e ms"} = $ss / $df; |
|
1194
|
|
|
|
|
|
|
} |
|
1195
|
|
|
|
|
|
|
} else { # repeating error term |
|
1196
|
|
|
|
|
|
|
my $ss = $ret{$k == $#full ? 'ss_subject' : "$i_pref$e ss"} |
|
1197
|
|
|
|
|
|
|
= $ret{"| $idv->[$full[$k]] |$e ss"}; |
|
1198
|
|
|
|
|
|
|
if ($k != $#full) { |
|
1199
|
|
|
|
|
|
|
my $df = $ret{"$i_pref$e df"} = $ret{"| $idv->[$full[$k]] |$e df"}; |
|
1200
|
|
|
|
|
|
|
die "residual df = 0" if $df <= 0; |
|
1201
|
|
|
|
|
|
|
$ret{"$i_pref$e ms"} = $ss / $df; |
|
1202
|
|
|
|
|
|
|
} |
|
1203
|
|
|
|
|
|
|
} |
|
1204
|
|
|
|
|
|
|
} |
|
1205
|
|
|
|
|
|
|
} else { |
|
1206
|
|
|
|
|
|
|
$ret{ss_model} = $ret{ss_total} - $ret{ss_residual}; |
|
1207
|
|
|
|
|
|
|
$ret{F_df} = pdl(my $F_df0 = $dsgn->dim(0) - 1, my $df1 = $y->nelem - $dsgn->dim(0)); |
|
1208
|
|
|
|
|
|
|
$ret{ms_model} = $ret{ss_model} / $F_df0; |
|
1209
|
|
|
|
|
|
|
$ret{ms_residual} = $ret{ss_residual} / $df1; |
|
1210
|
|
|
|
|
|
|
$ret{F} = $ret{ms_model} / $ret{ms_residual}; |
|
1211
|
|
|
|
|
|
|
$ret{F_p} = 1 - $ret{F}->gsl_cdf_fdist_P( $F_df0, $df1 ) |
|
1212
|
|
|
|
|
|
|
if $CDF and $df1 > 0; |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
# get IV ss from $ivs_ref instead of $dsgn pdl |
|
1215
|
|
|
|
|
|
|
my $ones = ones($y->dim(0)); |
|
1216
|
|
|
|
|
|
|
for my $k (0 .. $#$ivs_ref) { |
|
1217
|
|
|
|
|
|
|
my $G = $ones->glue(1, @$ivs_ref[grep $_ != $k, 0 .. $#$ivs_ref]); |
|
1218
|
|
|
|
|
|
|
my $b_G = $y->ols_t( $G, {CONST=>0} ); |
|
1219
|
|
|
|
|
|
|
$ret{ "| $idv->[$k] | ss" } |
|
1220
|
|
|
|
|
|
|
= $y->sse( sumover($b_G * $G->transpose) ) - $ret{ss_residual}; |
|
1221
|
|
|
|
|
|
|
my $df0 = $ret{ "| $idv->[$k] | df" } = $ivs_ref->[$k]->dim(1); |
|
1222
|
|
|
|
|
|
|
$ret{ "| $idv->[$k] || err df" } = $df1; |
|
1223
|
|
|
|
|
|
|
die "residual df = 0" if $df1 <= 0; |
|
1224
|
|
|
|
|
|
|
$ret{ "| $idv->[$k] | ms" } = $ret{ "| $idv->[$k] | ss" } / $df0; |
|
1225
|
|
|
|
|
|
|
} |
|
1226
|
|
|
|
|
|
|
} |
|
1227
|
|
|
|
|
|
|
# have all iv, inter, and error effects. get F and F_p |
|
1228
|
|
|
|
|
|
|
for (0 .. $#$ivs_ref) { |
|
1229
|
|
|
|
|
|
|
my $ms_residual = defined $subj ? $ret{ "| $idv->[$_] || err ms" } : $ret{ms_residual}; |
|
1230
|
|
|
|
|
|
|
my ($df0, $df1) = @ret{"| $idv->[$_] | df" , "| $idv->[$_] || err df"}; |
|
1231
|
|
|
|
|
|
|
my $F = $ret{ "| $idv->[$_] | F" } = $ret{ "| $idv->[$_] | ms" } / $ms_residual; |
|
1232
|
|
|
|
|
|
|
$ret{ "| $idv->[$_] | F_p" } = 1 - $F->gsl_cdf_fdist_P($df0, $df1) |
|
1233
|
|
|
|
|
|
|
if $CDF and $df1 > 0; |
|
1234
|
|
|
|
|
|
|
} |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
for (keys %ret) {ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze}; |
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
my ($inter_cmo_ref, $inter_cm_ref) |
|
1239
|
|
|
|
|
|
|
= _interactions_cm($ivs_ref_filtrd, $pdl_ivs_raw); |
|
1240
|
|
|
|
|
|
|
# append inter info to cell means effects |
|
1241
|
|
|
|
|
|
|
my $ivs_cm_ref = [@$ivs_ref_filtrd, @$inter_cm_ref]; |
|
1242
|
|
|
|
|
|
|
my @i_cmo_ref = map pdl(values %{ (effect_code($_))[1] })->qsort, @$pdl_ivs_raw; |
|
1243
|
|
|
|
|
|
|
#line 1068 "lib/PDL/Stats/GLM.pd" |
|
1244
|
|
|
|
|
|
|
push @i_cmo_ref, @$inter_cmo_ref; |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
my $cm_ref |
|
1247
|
|
|
|
|
|
|
= _cell_means( $y, $ivs_cm_ref, \@i_cmo_ref, $idv, $pdl_ivs_raw ); |
|
1248
|
|
|
|
|
|
|
if (defined $subj) { |
|
1249
|
|
|
|
|
|
|
my @ls = map { $_->uniq->nelem } @$pdl_ivs_raw; |
|
1250
|
|
|
|
|
|
|
$cm_ref |
|
1251
|
|
|
|
|
|
|
= _fix_rptd_se( $cm_ref, \%ret, $opt{IVNM}, \@ls, $sj->uniq->nelem ); |
|
1252
|
|
|
|
|
|
|
} |
|
1253
|
|
|
|
|
|
|
# integrate mean and se into %ret |
|
1254
|
|
|
|
|
|
|
@ret{ keys %$cm_ref } = values %$cm_ref; |
|
1255
|
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
my $highest = join(' ~ ', @{ $opt{IVNM} }); |
|
1257
|
|
|
|
|
|
|
$cm_ref->{"| $highest | m"}->plot_means( $cm_ref->{"| $highest | se"}, |
|
1258
|
|
|
|
|
|
|
{ %opt, IVNM=>$idv } ) |
|
1259
|
|
|
|
|
|
|
if $opt{PLOT}; |
|
1260
|
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
%ret; |
|
1262
|
|
|
|
|
|
|
} |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
=head2 anova_design_matrix |
|
1265
|
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=for ref |
|
1267
|
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
Effect-coded design matrix for anova, including repeated-measures and |
|
1269
|
|
|
|
|
|
|
mixed-model. The C for use in linear regression i.e. C. |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
Added in 0.854. See L for more. |
|
1272
|
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
Supports bad value in the dependent and independent variables. It |
|
1274
|
|
|
|
|
|
|
automatically removes bad data listwise, i.e. remove a subject's data if |
|
1275
|
|
|
|
|
|
|
there is any cell missing for the subject. |
|
1276
|
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
=for options |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
Default options (case insensitive): |
|
1280
|
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
V => 1, # carps if bad value in dv |
|
1282
|
|
|
|
|
|
|
IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ] |
|
1283
|
|
|
|
|
|
|
BTWN => [], # indices of between-subject IVs (matches IVNM indices) |
|
1284
|
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
=for usage |
|
1286
|
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
$matrix = $dv->anova_design_matrix(undef, $b, $w, {ivnm=>[qw(b w)]}); |
|
1288
|
|
|
|
|
|
|
$matrix = $dv->anova_design_matrix( |
|
1289
|
|
|
|
|
|
|
$subj, $b, $w, {ivnm=>[qw(b w)]}); # repeated-measures |
|
1290
|
|
|
|
|
|
|
$matrix = $dv->anova_design_matrix( |
|
1291
|
|
|
|
|
|
|
$subj, $b, $w, {ivnm=>[qw(b w)], btwn=>['b']}); # mixed-model |
|
1292
|
|
|
|
|
|
|
($matrix, $ivnm_ref) = $dv->anova_design_matrix( |
|
1293
|
|
|
|
|
|
|
$subj, $b, $w, {ivnm=>[qw(b w)], btwn=>['b']}); # list context also names |
|
1294
|
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=cut |
|
1296
|
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
*anova_design_matrix = \&PDL::anova_design_matrix; |
|
1298
|
|
|
|
|
|
|
sub PDL::anova_design_matrix { |
|
1299
|
|
|
|
|
|
|
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef; |
|
1300
|
|
|
|
|
|
|
my ($y, $subj, @ivs_raw) = @_; |
|
1301
|
|
|
|
|
|
|
confess "No IVs: did you omit 'undef' for anova?" if !@ivs_raw; |
|
1302
|
|
|
|
|
|
|
confess "Expected 1-D data, instead: ", $y->info if $y->ndims != 1; |
|
1303
|
|
|
|
|
|
|
for (@ivs_raw) { |
|
1304
|
|
|
|
|
|
|
croak "too many dims in IV!" if ref $_ eq 'PDL' and $_->squeeze->ndims > 1; |
|
1305
|
|
|
|
|
|
|
} |
|
1306
|
|
|
|
|
|
|
my %opt = ( |
|
1307
|
|
|
|
|
|
|
V => 1, # carps if bad value in dv |
|
1308
|
|
|
|
|
|
|
IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ] |
|
1309
|
|
|
|
|
|
|
( !defined($subj) ? () : ( |
|
1310
|
|
|
|
|
|
|
BTWN => [], # indices of between-subject IVs (matches IVNM indices) |
|
1311
|
|
|
|
|
|
|
)), |
|
1312
|
|
|
|
|
|
|
); |
|
1313
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
1314
|
|
|
|
|
|
|
$opt{IVNM} = [ map { "IV_$_" } 0 .. $#ivs_raw ] |
|
1315
|
|
|
|
|
|
|
if !$opt{IVNM} or !@{ $opt{IVNM} }; |
|
1316
|
|
|
|
|
|
|
my @idv_orig = @{ $opt{IVNM} }; |
|
1317
|
|
|
|
|
|
|
my @pdl_ivs_raw = map scalar PDL::Stats::Basic::code_ivs($_), @ivs_raw; |
|
1318
|
|
|
|
|
|
|
my $pdl_ivs = pdl(\@pdl_ivs_raw); |
|
1319
|
|
|
|
|
|
|
# explicit set badflag because pdl() removes badflag |
|
1320
|
|
|
|
|
|
|
$pdl_ivs->badflag( scalar grep $_->badflag, @pdl_ivs_raw ); |
|
1321
|
|
|
|
|
|
|
my $sj; |
|
1322
|
|
|
|
|
|
|
if (defined($subj)) { |
|
1323
|
|
|
|
|
|
|
# delete bad data listwise ie remove subj if any cell missing |
|
1324
|
|
|
|
|
|
|
$sj = PDL::Stats::Basic::code_ivs($subj); |
|
1325
|
|
|
|
|
|
|
my $ibad = which( $y->isbad | nbadover($pdl_ivs->transpose) ); |
|
1326
|
|
|
|
|
|
|
my $sj_bad = $sj->slice($ibad)->uniq; |
|
1327
|
|
|
|
|
|
|
if ($sj_bad->nelem) { |
|
1328
|
|
|
|
|
|
|
warn $sj_bad->nelem . " subjects with missing data removed\n" |
|
1329
|
|
|
|
|
|
|
if $opt{V}; |
|
1330
|
|
|
|
|
|
|
$sj = $sj->setvaltobad($_) |
|
1331
|
|
|
|
|
|
|
for (list $sj_bad); |
|
1332
|
|
|
|
|
|
|
my $igood = which $sj->isgood; |
|
1333
|
|
|
|
|
|
|
for ($y, $sj, @pdl_ivs_raw) { |
|
1334
|
|
|
|
|
|
|
$_ = $_->slice( $igood )->sever; |
|
1335
|
|
|
|
|
|
|
$_->badflag(0); |
|
1336
|
|
|
|
|
|
|
} |
|
1337
|
|
|
|
|
|
|
} |
|
1338
|
|
|
|
|
|
|
} else { |
|
1339
|
|
|
|
|
|
|
($y, $pdl_ivs) = _rm_bad_value( $y, $pdl_ivs ); |
|
1340
|
|
|
|
|
|
|
if ($opt{V} and $y->nelem < $pdl_ivs_raw[0]->nelem) { |
|
1341
|
|
|
|
|
|
|
warn sprintf "%d subjects with missing data removed\n", $pdl_ivs_raw[0]->nelem - $y->nelem; |
|
1342
|
|
|
|
|
|
|
} |
|
1343
|
|
|
|
|
|
|
@pdl_ivs_raw = $pdl_ivs->dog; |
|
1344
|
|
|
|
|
|
|
} |
|
1345
|
|
|
|
|
|
|
my @ivs_ref_fltrd = map scalar effect_code($_), @pdl_ivs_raw; |
|
1346
|
|
|
|
|
|
|
my ($ivs_inter_ref, $idv_inter) = _interactions(\@ivs_ref_fltrd, \@idv_orig); |
|
1347
|
|
|
|
|
|
|
# append inter info to main effects |
|
1348
|
|
|
|
|
|
|
my $ivs_ref = [@ivs_ref_fltrd, @$ivs_inter_ref]; |
|
1349
|
|
|
|
|
|
|
my @idv = (@idv_orig, @$idv_inter); |
|
1350
|
|
|
|
|
|
|
# matches $ivs_ref, with an extra last pdl for subj effect |
|
1351
|
|
|
|
|
|
|
my $err_ref = !defined($subj) ? [] : |
|
1352
|
|
|
|
|
|
|
_add_errors( $sj, $ivs_ref, \@idv, \@pdl_ivs_raw, $opt{BTWN} ); |
|
1353
|
|
|
|
|
|
|
for (grep ref $err_ref->[$_], 0..$#$err_ref) { |
|
1354
|
|
|
|
|
|
|
my ($null_row_ids, $non_null_row_ids) = $err_ref->[$_]->zcover->which_both; |
|
1355
|
|
|
|
|
|
|
confess "got null columns $null_row_ids in error entry #$_ ($idv[$_])" |
|
1356
|
|
|
|
|
|
|
if !$null_row_ids->isempty; |
|
1357
|
|
|
|
|
|
|
} |
|
1358
|
|
|
|
|
|
|
my $dsgn = PDL::glue(1, ones($y->dim(0)), @$ivs_ref, (grep ref($_), @$err_ref))->t; |
|
1359
|
|
|
|
|
|
|
!wantarray ? $dsgn : ($dsgn, \@idv, $y, $sj, \@ivs_ref_fltrd, \@pdl_ivs_raw, $ivs_ref, $err_ref); |
|
1360
|
|
|
|
|
|
|
} |
|
1361
|
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
# code (btwn group) subjects. Rutherford (2011) pp 208-209 |
|
1363
|
|
|
|
|
|
|
sub _code_btwn { |
|
1364
|
|
|
|
|
|
|
my ($subj, $btwn) = @_; |
|
1365
|
|
|
|
|
|
|
my (@grp, %grp_s); |
|
1366
|
|
|
|
|
|
|
for my $n (0 .. $subj->nelem - 1) { |
|
1367
|
|
|
|
|
|
|
# construct string to code group membership |
|
1368
|
|
|
|
|
|
|
# something not treated as BAD by code_ivs to start off marking group membership |
|
1369
|
|
|
|
|
|
|
# if no $btwn, everyone ends up in the same grp |
|
1370
|
|
|
|
|
|
|
my $s = '_' . join '', map $_->slice($n), @$btwn; |
|
1371
|
|
|
|
|
|
|
push @grp, $s; # group membership |
|
1372
|
|
|
|
|
|
|
$s .= $subj->slice($n); # keep track of total uniq subj |
|
1373
|
|
|
|
|
|
|
$grp_s{$s} = 1; |
|
1374
|
|
|
|
|
|
|
} |
|
1375
|
|
|
|
|
|
|
my $grp = PDL::Stats::Kmeans::iv_cluster \@grp; |
|
1376
|
|
|
|
|
|
|
my $spdl = zeroes $subj->dim(0), keys(%grp_s) - $grp->dim(1); |
|
1377
|
|
|
|
|
|
|
my $d1 = 0; |
|
1378
|
|
|
|
|
|
|
for my $g (0 .. $grp->dim(1)-1) { |
|
1379
|
|
|
|
|
|
|
my $col_inds = which $grp->slice(':',$g); |
|
1380
|
|
|
|
|
|
|
my $gsub = $subj->slice( $col_inds )->effect_code; |
|
1381
|
|
|
|
|
|
|
my ($nobs, $nsub) = $gsub->dims; |
|
1382
|
|
|
|
|
|
|
$spdl->slice($col_inds, [$d1,$d1+$nsub-1]) .= $gsub; |
|
1383
|
|
|
|
|
|
|
$d1 += $nsub; |
|
1384
|
|
|
|
|
|
|
} |
|
1385
|
|
|
|
|
|
|
$spdl; |
|
1386
|
|
|
|
|
|
|
} |
|
1387
|
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
sub _add_errors { |
|
1389
|
|
|
|
|
|
|
my ($subj, $ivs_ref, $idv, $raw_ivs, $btwn) = @_; |
|
1390
|
|
|
|
|
|
|
my $spdl = _code_btwn($subj, [@$raw_ivs[@$btwn]]); |
|
1391
|
|
|
|
|
|
|
# if btwn factor involved, or highest order inter for within factors |
|
1392
|
|
|
|
|
|
|
# elem is undef, so that |
|
1393
|
|
|
|
|
|
|
# @errors ind matches @$ivs_ref, with an extra elem at the end for subj |
|
1394
|
|
|
|
|
|
|
# mark btwn factors for error terms |
|
1395
|
|
|
|
|
|
|
# same error term for B(wn) and A(btwn) x B(wn) (Rutherford, p205) |
|
1396
|
|
|
|
|
|
|
my %is_btwn = map +($_=>1), @$idv[ @$btwn ]; |
|
1397
|
|
|
|
|
|
|
my $has_btwn = keys %is_btwn; |
|
1398
|
|
|
|
|
|
|
my %idv2indx = map +($idv->[$_]=>$_), 0..$#$idv; |
|
1399
|
|
|
|
|
|
|
my $ie_subj; |
|
1400
|
|
|
|
|
|
|
my @errors = map { |
|
1401
|
|
|
|
|
|
|
my @fs = split ' ~ ', $idv->[$_]; |
|
1402
|
|
|
|
|
|
|
# separate bw and wn factors |
|
1403
|
|
|
|
|
|
|
# if only bw, error is bw x subj |
|
1404
|
|
|
|
|
|
|
# if only wn or wn and bw, error is wn x subj |
|
1405
|
|
|
|
|
|
|
my @bw = !$has_btwn ? () : grep $is_btwn{$_}, @fs; |
|
1406
|
|
|
|
|
|
|
my @wn = !$has_btwn ? @fs : grep !$is_btwn{$_}, @fs; |
|
1407
|
|
|
|
|
|
|
$ie_subj = $_ if !defined($ie_subj) and !@wn; |
|
1408
|
|
|
|
|
|
|
my $err = join ' ~ ', @wn ? @wn : @bw; |
|
1409
|
|
|
|
|
|
|
# highest order inter of within factors, use ss_residual as error |
|
1410
|
|
|
|
|
|
|
if ( @wn == @$raw_ivs - @$btwn ) { undef } |
|
1411
|
|
|
|
|
|
|
# repeating btwn factors use ss_subject as error |
|
1412
|
|
|
|
|
|
|
elsif (!@wn and $_ > $ie_subj) { $ie_subj } |
|
1413
|
|
|
|
|
|
|
# repeating error term |
|
1414
|
|
|
|
|
|
|
elsif ($_ > $idv2indx{$err}) { $idv2indx{$err} } |
|
1415
|
|
|
|
|
|
|
elsif (@wn) { interaction_code($ivs_ref->[$_], $spdl) } |
|
1416
|
|
|
|
|
|
|
else { $spdl } |
|
1417
|
|
|
|
|
|
|
} 0 .. $#$ivs_ref; |
|
1418
|
|
|
|
|
|
|
push @errors, $has_btwn ? $ie_subj : $spdl; |
|
1419
|
|
|
|
|
|
|
\@errors; |
|
1420
|
|
|
|
|
|
|
} |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
sub _fix_rptd_se { |
|
1423
|
|
|
|
|
|
|
# if ivnm lvls_ref for within ss only this can work for mixed design |
|
1424
|
|
|
|
|
|
|
my ($cm_ref, $ret, $ivnm, $lvls_ref, $n) = @_; |
|
1425
|
|
|
|
|
|
|
my @se = map /^\| (.+?) \| se$/ ? $1 : (), keys %$cm_ref; |
|
1426
|
|
|
|
|
|
|
my @n_obs = map { |
|
1427
|
|
|
|
|
|
|
my @ivs = split / ~ /, $_; |
|
1428
|
|
|
|
|
|
|
my $i_ivs = which_id $ivnm, \@ivs; |
|
1429
|
|
|
|
|
|
|
my $icollapsed = setops pdl(0 .. $#$ivnm), 'XOR', $i_ivs; |
|
1430
|
|
|
|
|
|
|
my $collapsed = $icollapsed->nelem? |
|
1431
|
|
|
|
|
|
|
pdl( @$lvls_ref[(list $icollapsed)] )->prodover |
|
1432
|
|
|
|
|
|
|
: 1 |
|
1433
|
|
|
|
|
|
|
; |
|
1434
|
|
|
|
|
|
|
$n * $collapsed; |
|
1435
|
|
|
|
|
|
|
} @se; |
|
1436
|
|
|
|
|
|
|
for my $i (0 .. $#se) { |
|
1437
|
|
|
|
|
|
|
$cm_ref->{"| $se[$i] | se"} |
|
1438
|
|
|
|
|
|
|
.= sqrt( $ret->{"| $se[$i] || err ms"} / $n_obs[$i] ); |
|
1439
|
|
|
|
|
|
|
} |
|
1440
|
|
|
|
|
|
|
$cm_ref; |
|
1441
|
|
|
|
|
|
|
} |
|
1442
|
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
=head2 dummy_code |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
=for ref |
|
1446
|
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
Dummy coding of nominal variable (perl @ ref or 1d pdl) for use in regression. |
|
1448
|
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
Supports BAD value (missing or 'BAD' values result in the corresponding pdl elements being marked as BAD). |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
=for usage |
|
1452
|
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
pdl> @a = qw(a a a b b b c c c) |
|
1454
|
|
|
|
|
|
|
pdl> p $a = dummy_code(\@a) |
|
1455
|
|
|
|
|
|
|
[ |
|
1456
|
|
|
|
|
|
|
[1 1 1 0 0 0 0 0 0] |
|
1457
|
|
|
|
|
|
|
[0 0 0 1 1 1 0 0 0] |
|
1458
|
|
|
|
|
|
|
] |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
=cut |
|
1461
|
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
*dummy_code = \&PDL::dummy_code; |
|
1463
|
|
|
|
|
|
|
sub PDL::dummy_code { |
|
1464
|
|
|
|
|
|
|
my ($var_ref) = @_; |
|
1465
|
|
|
|
|
|
|
my $var_e = effect_code( $var_ref ); |
|
1466
|
|
|
|
|
|
|
$var_e->where( $var_e == -1 ) .= 0; |
|
1467
|
|
|
|
|
|
|
$var_e; |
|
1468
|
|
|
|
|
|
|
} |
|
1469
|
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
=head2 effect_code |
|
1471
|
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
=for ref |
|
1473
|
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
Unweighted effect coding of nominal variable (perl @ ref or 1d pdl) for |
|
1475
|
|
|
|
|
|
|
use in regression. returns in @ context coded pdl and % ref to level - |
|
1476
|
|
|
|
|
|
|
pdl->dim(1) index. |
|
1477
|
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
Supports BAD value (missing or 'BAD' values result in the corresponding |
|
1479
|
|
|
|
|
|
|
pdl elements being marked as BAD). |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
=for usage |
|
1482
|
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
my @var = qw( a a a b b b c c c ); |
|
1484
|
|
|
|
|
|
|
my ($var_e, $map) = effect_code( \@var ); |
|
1485
|
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
print $var_e . $var_e->info . "\n"; |
|
1487
|
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
[ |
|
1489
|
|
|
|
|
|
|
[ 1 1 1 0 0 0 -1 -1 -1] |
|
1490
|
|
|
|
|
|
|
[ 0 0 0 1 1 1 -1 -1 -1] |
|
1491
|
|
|
|
|
|
|
] |
|
1492
|
|
|
|
|
|
|
PDL: Double D [9,2] |
|
1493
|
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
print "$_\t$map->{$_}\n" for sort keys %$map |
|
1495
|
|
|
|
|
|
|
a 0 |
|
1496
|
|
|
|
|
|
|
b 1 |
|
1497
|
|
|
|
|
|
|
c 2 |
|
1498
|
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=cut |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
*effect_code = \&PDL::effect_code; |
|
1502
|
|
|
|
|
|
|
sub PDL::effect_code { |
|
1503
|
|
|
|
|
|
|
my ($var_ref) = @_; |
|
1504
|
|
|
|
|
|
|
my ($var, $map_ref) = PDL::Stats::Basic::code_ivs( $var_ref ); |
|
1505
|
|
|
|
|
|
|
my $var_max = $var->max; |
|
1506
|
|
|
|
|
|
|
confess "effect_code called with only one unique value" if $var_max < 1; |
|
1507
|
|
|
|
|
|
|
my $var_e = yvals( float, $var->nelem, $var_max ) == $var; |
|
1508
|
|
|
|
|
|
|
$var_e->slice(which( $var == $var_max ), ) .= -1; |
|
1509
|
|
|
|
|
|
|
$var_e = $var_e->setbadif( $var->isbad ) if $var->badflag; |
|
1510
|
|
|
|
|
|
|
wantarray ? ($var_e, $map_ref) : $var_e; |
|
1511
|
|
|
|
|
|
|
} |
|
1512
|
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
=head2 effect_code_w |
|
1514
|
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
=for ref |
|
1516
|
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
Weighted effect code for nominal variable. returns in @ context coded pdl and % ref to level - pdl->dim(1) index. |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
Supports BAD value (missing or 'BAD' values result in the corresponding pdl elements being marked as BAD). |
|
1520
|
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
=for usage |
|
1522
|
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
pdl> @a = qw( a a b b b c c ) |
|
1524
|
|
|
|
|
|
|
pdl> p $a = effect_code_w(\@a) |
|
1525
|
|
|
|
|
|
|
[ |
|
1526
|
|
|
|
|
|
|
[ 1 1 0 0 0 -1 -1] |
|
1527
|
|
|
|
|
|
|
[ 0 0 1 1 1 -1.5 -1.5] |
|
1528
|
|
|
|
|
|
|
] |
|
1529
|
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
=cut |
|
1531
|
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
*effect_code_w = \&PDL::effect_code_w; |
|
1533
|
|
|
|
|
|
|
sub PDL::effect_code_w { |
|
1534
|
|
|
|
|
|
|
my ($var_ref) = @_; |
|
1535
|
|
|
|
|
|
|
my ($var_e, $map_ref) = effect_code( $var_ref ); |
|
1536
|
|
|
|
|
|
|
return wantarray ? ($var_e, $map_ref) : $var_e if $var_e->sum == 0; |
|
1537
|
|
|
|
|
|
|
my $pos = $var_e == 1; |
|
1538
|
|
|
|
|
|
|
my $neg = $var_e == -1; |
|
1539
|
|
|
|
|
|
|
my $w = $pos->sumover / $neg->sumover; |
|
1540
|
|
|
|
|
|
|
my $neg_ind = $neg->whichND; |
|
1541
|
|
|
|
|
|
|
$var_e->indexND($neg_ind) *= $w->slice($neg_ind->slice('(1)')); |
|
1542
|
|
|
|
|
|
|
wantarray ? ($var_e, $map_ref) : $var_e; |
|
1543
|
|
|
|
|
|
|
} |
|
1544
|
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
=head2 interaction_code |
|
1546
|
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
Returns the coded interaction term for effect-coded variables. |
|
1548
|
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
Supports BAD value (missing or 'BAD' values result in the corresponding |
|
1550
|
|
|
|
|
|
|
pdl elements being marked as BAD). |
|
1551
|
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
=for usage |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
pdl> $a = sequence(6) > 2 |
|
1555
|
|
|
|
|
|
|
pdl> p $a = $a->effect_code |
|
1556
|
|
|
|
|
|
|
[ |
|
1557
|
|
|
|
|
|
|
[ 1 1 1 -1 -1 -1] |
|
1558
|
|
|
|
|
|
|
] |
|
1559
|
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
pdl> $b = pdl( qw( 0 1 2 0 1 2 ) ) |
|
1561
|
|
|
|
|
|
|
pdl> p $b = $b->effect_code |
|
1562
|
|
|
|
|
|
|
[ |
|
1563
|
|
|
|
|
|
|
[ 1 0 -1 1 0 -1] |
|
1564
|
|
|
|
|
|
|
[ 0 1 -1 0 1 -1] |
|
1565
|
|
|
|
|
|
|
] |
|
1566
|
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
pdl> p $ab = interaction_code( $a, $b ) |
|
1568
|
|
|
|
|
|
|
[ |
|
1569
|
|
|
|
|
|
|
[ 1 0 -1 -1 -0 1] |
|
1570
|
|
|
|
|
|
|
[ 0 1 -1 -0 -1 1] |
|
1571
|
|
|
|
|
|
|
] |
|
1572
|
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
=cut |
|
1574
|
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
*interaction_code = \&PDL::interaction_code; |
|
1576
|
|
|
|
|
|
|
sub PDL::interaction_code { |
|
1577
|
|
|
|
|
|
|
my $i = ones( $_[0]->dim(0), 1 ); |
|
1578
|
|
|
|
|
|
|
$i = ($i * $_->dummy(1))->clump(1,2) for @_; |
|
1579
|
|
|
|
|
|
|
$i; |
|
1580
|
|
|
|
|
|
|
} |
|
1581
|
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
=head2 ols |
|
1583
|
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
=for ref |
|
1585
|
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
Ordinary least squares regression, aka linear regression. |
|
1587
|
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
Unlike B, ols is not broadcastable, but it can handle bad value (by |
|
1589
|
|
|
|
|
|
|
ignoring observations with bad value in dependent or independent variables |
|
1590
|
|
|
|
|
|
|
list-wise) and returns the full model in list context with various stats. |
|
1591
|
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
IVs ($x) should be pdl dims $y->nelem or $y->nelem x n_iv. Do not supply |
|
1593
|
|
|
|
|
|
|
the constant vector in $x. Intercept is automatically added and returned |
|
1594
|
|
|
|
|
|
|
as FIRST of the coeffs if CONST=>1. Returns full model in list context |
|
1595
|
|
|
|
|
|
|
and coeff in scalar context. |
|
1596
|
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
For more on multiple linear regression see |
|
1598
|
|
|
|
|
|
|
L. |
|
1599
|
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
=for options |
|
1601
|
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
Default options (case insensitive): |
|
1603
|
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
CONST => 1, |
|
1605
|
|
|
|
|
|
|
PLOT => 0, # see plot_residuals() for plot options |
|
1606
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
1607
|
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
=for usage |
|
1609
|
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
Usage: |
|
1611
|
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
# suppose this is a person's ratings for top 10 box office movies |
|
1613
|
|
|
|
|
|
|
# ascending sorted by box office |
|
1614
|
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
pdl> $y = pdl '[1 1 2 2 2 2 4 4 5 5]' |
|
1616
|
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
# construct IV with linear and quadratic component |
|
1618
|
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
pdl> p $x = cat sequence(10), sequence(10)**2 |
|
1620
|
|
|
|
|
|
|
[ |
|
1621
|
|
|
|
|
|
|
[ 0 1 2 3 4 5 6 7 8 9] |
|
1622
|
|
|
|
|
|
|
[ 0 1 4 9 16 25 36 49 64 81] |
|
1623
|
|
|
|
|
|
|
] |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
pdl> %m = $y->ols( $x ) |
|
1626
|
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
pdl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m |
|
1628
|
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
F 40.4225352112676 |
|
1630
|
|
|
|
|
|
|
F_df [2 7] |
|
1631
|
|
|
|
|
|
|
F_p 0.000142834216344756 |
|
1632
|
|
|
|
|
|
|
R2 0.920314253647587 |
|
1633
|
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
# coeff constant linear quadratic |
|
1635
|
|
|
|
|
|
|
b [0.981818 0.212121 0.030303] |
|
1636
|
|
|
|
|
|
|
b_p [0.039910 0.328001 0.203034] |
|
1637
|
|
|
|
|
|
|
b_se [0.389875 0.201746 0.021579] |
|
1638
|
|
|
|
|
|
|
b_t [2.518284 1.051422 1.404218] |
|
1639
|
|
|
|
|
|
|
ss_model 19.8787878787879 |
|
1640
|
|
|
|
|
|
|
ss_residual 1.72121212121212 |
|
1641
|
|
|
|
|
|
|
ss_total 21.6 |
|
1642
|
|
|
|
|
|
|
y_pred [0.98181818 1.2242424 1.5272727 ... 4.6181818 5.3454545] |
|
1643
|
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
=cut |
|
1645
|
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
*ols = \&PDL::ols; |
|
1647
|
|
|
|
|
|
|
sub PDL::ols { |
|
1648
|
|
|
|
|
|
|
_ols_common(0, @_); |
|
1649
|
|
|
|
|
|
|
} |
|
1650
|
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
# ivs = [nobs x nivs] so can `dog` retval |
|
1652
|
|
|
|
|
|
|
sub _rm_bad_value { |
|
1653
|
|
|
|
|
|
|
my ($y, $ivs) = @_; |
|
1654
|
|
|
|
|
|
|
return ($y, $ivs, undef) if !$y->check_badflag and !$ivs->check_badflag; |
|
1655
|
|
|
|
|
|
|
my $idx = which($y->isgood & (nbadover ($ivs->transpose)==0)); |
|
1656
|
|
|
|
|
|
|
$_ = $_->slice($idx)->sever for $y, $ivs; |
|
1657
|
|
|
|
|
|
|
$_->badflag(0) for $y, $ivs; |
|
1658
|
|
|
|
|
|
|
($y, $ivs, $idx); |
|
1659
|
|
|
|
|
|
|
} |
|
1660
|
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
=head2 ols_rptd |
|
1662
|
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
=for ref |
|
1664
|
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
Repeated measures linear regression. |
|
1666
|
|
|
|
|
|
|
Handles purely within-subject design for now. |
|
1667
|
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
(Lorch & Myers, 1990; Van den Noortgate & Onghena, 2006). |
|
1669
|
|
|
|
|
|
|
See F for an example using the Lorch and Myers data. |
|
1670
|
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
=for usage |
|
1672
|
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
Usage: |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
# This is the example from Lorch and Myers (1990), |
|
1676
|
|
|
|
|
|
|
# a study on how characteristics of sentences affected reading time |
|
1677
|
|
|
|
|
|
|
# Three within-subject IVs: |
|
1678
|
|
|
|
|
|
|
# SP -- serial position of sentence |
|
1679
|
|
|
|
|
|
|
# WORDS -- number of words in sentence |
|
1680
|
|
|
|
|
|
|
# NEW -- number of new arguments in sentence |
|
1681
|
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
# $subj can be 1D pdl or @ ref and must be the first argument |
|
1683
|
|
|
|
|
|
|
# IV can be 1D @ ref or pdl |
|
1684
|
|
|
|
|
|
|
# 1D @ ref is effect coded internally into pdl |
|
1685
|
|
|
|
|
|
|
# pdl is left as is |
|
1686
|
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
my %r = $rt->ols_rptd( $subj, $sp, $words, $new ); |
|
1688
|
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
print "$_\t$r{$_}\n" for sort keys %r; |
|
1690
|
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
ss_residual 58.3754646504336 |
|
1692
|
|
|
|
|
|
|
ss_subject 51.8590337714286 |
|
1693
|
|
|
|
|
|
|
ss_total 405.188241771429 |
|
1694
|
|
|
|
|
|
|
# SP WORDS NEW |
|
1695
|
|
|
|
|
|
|
F [ 7.208473 61.354153 1.0243311] |
|
1696
|
|
|
|
|
|
|
F_p [0.025006181 2.619081e-05 0.33792837] |
|
1697
|
|
|
|
|
|
|
coeff [0.33337285 0.45858933 0.15162986] |
|
1698
|
|
|
|
|
|
|
df [1 1 1] |
|
1699
|
|
|
|
|
|
|
df_err [9 9 9] |
|
1700
|
|
|
|
|
|
|
ms [ 18.450705 73.813294 0.57026483] |
|
1701
|
|
|
|
|
|
|
ms_err [ 2.5595857 1.2030692 0.55671923] |
|
1702
|
|
|
|
|
|
|
ss [ 18.450705 73.813294 0.57026483] |
|
1703
|
|
|
|
|
|
|
ss_err [ 23.036272 10.827623 5.0104731] |
|
1704
|
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
=cut |
|
1706
|
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
*ols_rptd = \&PDL::ols_rptd; |
|
1708
|
|
|
|
|
|
|
sub PDL::ols_rptd { |
|
1709
|
|
|
|
|
|
|
my ($y, $subj, @ivs_raw) = @_; |
|
1710
|
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
$y = $y->squeeze; |
|
1712
|
|
|
|
|
|
|
$y->getndims > 1 and |
|
1713
|
|
|
|
|
|
|
croak "ols_rptd does not support broadcasting"; |
|
1714
|
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
my @ivs = map { (ref $_ eq 'PDL' and $_->ndims > 1)? $_ |
|
1716
|
|
|
|
|
|
|
: ref $_ eq 'PDL' ? $_->dummy(1) |
|
1717
|
|
|
|
|
|
|
: scalar effect_code($_) |
|
1718
|
|
|
|
|
|
|
; |
|
1719
|
|
|
|
|
|
|
} @ivs_raw; |
|
1720
|
|
|
|
|
|
|
|
|
1721
|
|
|
|
|
|
|
my %r; |
|
1722
|
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
$r{ss_total} = $y->ss; |
|
1724
|
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
# STEP 1: subj |
|
1726
|
|
|
|
|
|
|
my $s = effect_code $subj; |
|
1727
|
|
|
|
|
|
|
my $b_s = $y->ols_t($s); |
|
1728
|
|
|
|
|
|
|
my $pred = sumover($b_s->slice('1:-1') * $s->transpose) + $b_s->slice(0); |
|
1729
|
|
|
|
|
|
|
$r{ss_subject} = $r{ss_total} - $y->sse( $pred ); |
|
1730
|
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
# STEP 2: add predictor variables |
|
1732
|
|
|
|
|
|
|
my $iv_p = $s->glue(1, @ivs); |
|
1733
|
|
|
|
|
|
|
my $b_p = $y->ols_t($iv_p); |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
# only care about coeff for predictor vars. no subj or const coeff |
|
1736
|
|
|
|
|
|
|
$r{coeff} = $b_p->slice([-@ivs,-1])->sever; |
|
1737
|
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
# get total sse for this step |
|
1739
|
|
|
|
|
|
|
$pred = sumover($b_p->slice('1:-1') * $iv_p->transpose) + $b_p->slice(0); |
|
1740
|
|
|
|
|
|
|
my $ss_pe = $y->sse( $pred ); |
|
1741
|
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
# get predictor ss by successively reducing the model |
|
1743
|
|
|
|
|
|
|
$r{ss} = zeroes scalar(@ivs); |
|
1744
|
|
|
|
|
|
|
for my $i (0 .. $#ivs) { |
|
1745
|
|
|
|
|
|
|
my $iv = $s->glue(1, @ivs[ grep $_ != $i, 0..$#ivs ]); |
|
1746
|
|
|
|
|
|
|
my $b = $y->ols_t($iv); |
|
1747
|
|
|
|
|
|
|
$pred = sumover($b->slice('1:-1') * $iv->transpose) + $b->slice(0); |
|
1748
|
|
|
|
|
|
|
$r{ss}->slice($i) .= $y->sse($pred) - $ss_pe; |
|
1749
|
|
|
|
|
|
|
} |
|
1750
|
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
# STEP 3: get predictor x subj interaction as error term |
|
1752
|
|
|
|
|
|
|
my $iv_e = PDL::glue 1, map interaction_code( $s, $_ ), @ivs; |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
# get total sse for this step. full model now. |
|
1755
|
|
|
|
|
|
|
my $b_f = $y->ols_t( $iv_p->glue(1,$iv_e) ); |
|
1756
|
|
|
|
|
|
|
$pred = sumover($b_f->slice('1:-1') * $iv_p->glue(1,$iv_e)->transpose) + $b_f->slice(0); |
|
1757
|
|
|
|
|
|
|
$r{ss_residual} = $y->sse( $pred ); |
|
1758
|
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
# get predictor x subj ss by successively reducing the error term |
|
1760
|
|
|
|
|
|
|
$r{ss_err} = zeroes scalar(@ivs); |
|
1761
|
|
|
|
|
|
|
for my $i (0 .. $#ivs) { |
|
1762
|
|
|
|
|
|
|
my $iv = $iv_p->glue(1, map interaction_code($s, $_), |
|
1763
|
|
|
|
|
|
|
@ivs[grep $_ != $i, 0..$#ivs]); |
|
1764
|
|
|
|
|
|
|
my $b = $y->ols_t($iv); |
|
1765
|
|
|
|
|
|
|
my $pred = sumover($b->slice('1:-1') * $iv->transpose) + $b->slice(0); |
|
1766
|
|
|
|
|
|
|
$r{ss_err}->slice($i) .= $y->sse($pred) - $r{ss_residual}; |
|
1767
|
|
|
|
|
|
|
} |
|
1768
|
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
# Finally, get MS, F, etc |
|
1770
|
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
$r{df} = pdl( map $_->squeeze->ndims, @ivs ); |
|
1772
|
|
|
|
|
|
|
$r{ms} = $r{ss} / $r{df}; |
|
1773
|
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
$r{df_err} = $s->dim(1) * $r{df}; |
|
1775
|
|
|
|
|
|
|
$r{ms_err} = $r{ss_err} / $r{df_err}; |
|
1776
|
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
$r{F} = $r{ms} / $r{ms_err}; |
|
1778
|
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
$r{F_p} = 1 - $r{F}->gsl_cdf_fdist_P( $r{df}, $r{df_err} ) |
|
1780
|
|
|
|
|
|
|
if $CDF; |
|
1781
|
|
|
|
|
|
|
|
|
1782
|
|
|
|
|
|
|
%r; |
|
1783
|
|
|
|
|
|
|
} |
|
1784
|
|
|
|
|
|
|
|
|
1785
|
|
|
|
|
|
|
=head2 logistic |
|
1786
|
|
|
|
|
|
|
|
|
1787
|
|
|
|
|
|
|
=for ref |
|
1788
|
|
|
|
|
|
|
|
|
1789
|
|
|
|
|
|
|
Logistic regression with maximum likelihood estimation using L. |
|
1790
|
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
IVs ($x) should be pdl dims $y->nelem or $y->nelem x n_iv. Do not supply |
|
1792
|
|
|
|
|
|
|
the constant vector in $x. It is included in the model and returned as |
|
1793
|
|
|
|
|
|
|
LAST of coeff. Returns full model in list context and coeff in scalar |
|
1794
|
|
|
|
|
|
|
context. |
|
1795
|
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
The significance tests are likelihood ratio tests (-2LL deviance) |
|
1797
|
|
|
|
|
|
|
tests. IV significance is tested by comparing deviances between the |
|
1798
|
|
|
|
|
|
|
reduced model (ie with the IV in question removed) and the full model. |
|
1799
|
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
***NOTE: the results here are qualitatively similar to but not identical |
|
1801
|
|
|
|
|
|
|
with results from R, because different algorithms are used for the |
|
1802
|
|
|
|
|
|
|
nonlinear parameter fit. Use with discretion*** |
|
1803
|
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
=for options |
|
1805
|
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
Default options (case insensitive): |
|
1807
|
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
INITP => zeroes( $x->dim(1) + 1 ), # n_iv + 1 |
|
1809
|
|
|
|
|
|
|
MAXIT => 1000, |
|
1810
|
|
|
|
|
|
|
EPS => 1e-7, |
|
1811
|
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
=for usage |
|
1813
|
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
Usage: |
|
1815
|
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
# suppose this is whether a person had rented 10 movies |
|
1817
|
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
pdl> p $y = ushort( random(10)*2 ) |
|
1819
|
|
|
|
|
|
|
[0 0 0 1 1 0 0 1 1 1] |
|
1820
|
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
# IV 1 is box office ranking |
|
1822
|
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
pdl> p $x1 = sequence(10) |
|
1824
|
|
|
|
|
|
|
[0 1 2 3 4 5 6 7 8 9] |
|
1825
|
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
# IV 2 is whether the movie is action- or chick-flick |
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
pdl> p $x2 = sequence(10) % 2 |
|
1829
|
|
|
|
|
|
|
[0 1 0 1 0 1 0 1 0 1] |
|
1830
|
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
# concatenate the IVs together |
|
1832
|
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
pdl> p $x = cat $x1, $x2 |
|
1834
|
|
|
|
|
|
|
[ |
|
1835
|
|
|
|
|
|
|
[0 1 2 3 4 5 6 7 8 9] |
|
1836
|
|
|
|
|
|
|
[0 1 0 1 0 1 0 1 0 1] |
|
1837
|
|
|
|
|
|
|
] |
|
1838
|
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
pdl> %m = $y->logistic( $x ) |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
pdl> p "$_\t$m{$_}\n" for sort keys %m |
|
1842
|
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
D0 13.8629436111989 |
|
1844
|
|
|
|
|
|
|
Dm 9.8627829791575 |
|
1845
|
|
|
|
|
|
|
Dm_chisq 4.00016063204141 |
|
1846
|
|
|
|
|
|
|
Dm_df 2 |
|
1847
|
|
|
|
|
|
|
Dm_p 0.135324414081692 |
|
1848
|
|
|
|
|
|
|
# ranking genre constant |
|
1849
|
|
|
|
|
|
|
b [0.41127706 0.53876358 -2.1201285] |
|
1850
|
|
|
|
|
|
|
b_chisq [ 3.5974504 0.16835559 2.8577151] |
|
1851
|
|
|
|
|
|
|
b_p [0.057868258 0.6815774 0.090936587] |
|
1852
|
|
|
|
|
|
|
iter 12 |
|
1853
|
|
|
|
|
|
|
y_pred [0.10715577 0.23683909 ... 0.76316091 0.89284423] |
|
1854
|
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
# to get the covariance out, supply a true value for the COV option: |
|
1856
|
|
|
|
|
|
|
pdl> %m = $y->logistic( $x, {COV=>1} ) |
|
1857
|
|
|
|
|
|
|
pdl> p $m{cov}; |
|
1858
|
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
=cut |
|
1860
|
|
|
|
|
|
|
|
|
1861
|
|
|
|
|
|
|
*logistic = \&PDL::logistic; |
|
1862
|
|
|
|
|
|
|
sub PDL::logistic { |
|
1863
|
|
|
|
|
|
|
require PDL::Fit::LM; |
|
1864
|
|
|
|
|
|
|
my ( $self, $ivs, $opt ) = @_; |
|
1865
|
|
|
|
|
|
|
$self = $self->squeeze; |
|
1866
|
|
|
|
|
|
|
# make compatible w multiple var cases |
|
1867
|
|
|
|
|
|
|
$ivs->getndims == 1 and $ivs = $ivs->dummy(1); |
|
1868
|
|
|
|
|
|
|
$self->dim(0) != $ivs->dim(0) and |
|
1869
|
|
|
|
|
|
|
carp "mismatched n btwn DV and IV!"; |
|
1870
|
|
|
|
|
|
|
my %opt = ( |
|
1871
|
|
|
|
|
|
|
INITP => zeroes( $ivs->dim(1) + 1 ), # n_ivs + 1 |
|
1872
|
|
|
|
|
|
|
MAXIT => 1000, |
|
1873
|
|
|
|
|
|
|
EPS => 1e-7, |
|
1874
|
|
|
|
|
|
|
); |
|
1875
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
1876
|
|
|
|
|
|
|
# not using it atm |
|
1877
|
|
|
|
|
|
|
$opt{WT} = 1; |
|
1878
|
|
|
|
|
|
|
# Use lmfit. Fourth input argument is reference to user-defined |
|
1879
|
|
|
|
|
|
|
# copy INITP so we have the original value when needed |
|
1880
|
|
|
|
|
|
|
my ($yfit,$coeff,$cov,$iter) |
|
1881
|
|
|
|
|
|
|
= PDL::Fit::LM::lmfit($ivs, $self, $opt{WT}, \&_logistic, $opt{INITP}->copy, |
|
1882
|
|
|
|
|
|
|
{ Maxiter=>$opt{MAXIT}, Eps=>$opt{EPS} } ); |
|
1883
|
|
|
|
|
|
|
# apparently at least coeff is child of some pdl |
|
1884
|
|
|
|
|
|
|
# which is changed in later lmfit calls |
|
1885
|
|
|
|
|
|
|
$yfit = $yfit->copy; |
|
1886
|
|
|
|
|
|
|
$coeff = $coeff->copy; |
|
1887
|
|
|
|
|
|
|
return $coeff unless wantarray; |
|
1888
|
|
|
|
|
|
|
my %ret; |
|
1889
|
|
|
|
|
|
|
my $n0 = $self->where($self == 0)->nelem; |
|
1890
|
|
|
|
|
|
|
my $n1 = $self->nelem - $n0; |
|
1891
|
|
|
|
|
|
|
$ret{cov} = $cov if $opt{COV}; |
|
1892
|
|
|
|
|
|
|
$ret{D0} = -2*($n0 * log($n0 / $self->nelem) + $n1 * log($n1 / $self->nelem)); |
|
1893
|
|
|
|
|
|
|
$ret{Dm} = sum( $self->dvrs( $yfit ) ** 2 ); |
|
1894
|
|
|
|
|
|
|
$ret{Dm_chisq} = $ret{D0} - $ret{Dm}; |
|
1895
|
|
|
|
|
|
|
$ret{Dm_df} = $ivs->dim(1); |
|
1896
|
|
|
|
|
|
|
$ret{Dm_p} |
|
1897
|
|
|
|
|
|
|
= 1 - PDL::GSL::CDF::gsl_cdf_chisq_P( $ret{Dm_chisq}, $ret{Dm_df} ) |
|
1898
|
|
|
|
|
|
|
if $CDF; |
|
1899
|
|
|
|
|
|
|
my $coeff_chisq = zeroes $opt{INITP}->nelem; |
|
1900
|
|
|
|
|
|
|
if ( $ivs->dim(1) > 1 ) { |
|
1901
|
|
|
|
|
|
|
for my $k (0 .. $ivs->dim(1)-1) { |
|
1902
|
|
|
|
|
|
|
my @G = grep { $_ != $k } (0 .. $ivs->dim(1)-1); |
|
1903
|
|
|
|
|
|
|
my $G = $ivs->dice_axis(1, \@G); |
|
1904
|
|
|
|
|
|
|
my $init = $opt{INITP}->dice([ @G, $opt{INITP}->dim(0)-1 ])->copy; |
|
1905
|
|
|
|
|
|
|
my $y_G |
|
1906
|
|
|
|
|
|
|
= PDL::Fit::LM::lmfit( $G, $self, $opt{WT}, \&_logistic, $init, |
|
1907
|
|
|
|
|
|
|
{ Maxiter=>$opt{MAXIT}, Eps=>$opt{EPS} } ); |
|
1908
|
|
|
|
|
|
|
$coeff_chisq->slice($k) .= $self->dm( $y_G ) - $ret{Dm}; |
|
1909
|
|
|
|
|
|
|
} |
|
1910
|
|
|
|
|
|
|
} |
|
1911
|
|
|
|
|
|
|
else { |
|
1912
|
|
|
|
|
|
|
# d0 is, by definition, the deviance with only intercept |
|
1913
|
|
|
|
|
|
|
$coeff_chisq->slice(0) .= $ret{D0} - $ret{Dm}; |
|
1914
|
|
|
|
|
|
|
} |
|
1915
|
|
|
|
|
|
|
my $y_c |
|
1916
|
|
|
|
|
|
|
= PDL::Fit::LM::lmfit( $ivs, $self, $opt{WT}, \&_logistic_no_intercept, $opt{INITP}->slice('0:-2')->sever, |
|
1917
|
|
|
|
|
|
|
{ Maxiter=>$opt{MAXIT}, Eps=>$opt{EPS} } ); |
|
1918
|
|
|
|
|
|
|
$coeff_chisq->slice(-1) .= $self->dm( $y_c ) - $ret{Dm}; |
|
1919
|
|
|
|
|
|
|
$ret{b} = $coeff; |
|
1920
|
|
|
|
|
|
|
$ret{b_chisq} = $coeff_chisq; |
|
1921
|
|
|
|
|
|
|
$ret{b_p} = 1 - $ret{b_chisq}->gsl_cdf_chisq_P( 1 ) |
|
1922
|
|
|
|
|
|
|
if $CDF; |
|
1923
|
|
|
|
|
|
|
$ret{y_pred} = $yfit; |
|
1924
|
|
|
|
|
|
|
$ret{iter} = $iter; |
|
1925
|
|
|
|
|
|
|
for (keys %ret) { ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze }; |
|
1926
|
|
|
|
|
|
|
%ret; |
|
1927
|
|
|
|
|
|
|
} |
|
1928
|
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
sub _logistic { |
|
1930
|
|
|
|
|
|
|
my ($x,$par,$ym,$dyda) = @_; |
|
1931
|
|
|
|
|
|
|
# $b and $c are fit parameters slope and intercept |
|
1932
|
|
|
|
|
|
|
my $b = $par->slice([0,$x->dim(1) - 1])->sever; |
|
1933
|
|
|
|
|
|
|
my $c = $par->slice(-1)->sever; |
|
1934
|
|
|
|
|
|
|
# Write function with dependent variable $ym, |
|
1935
|
|
|
|
|
|
|
# independent variable $x, and fit parameters as specified above. |
|
1936
|
|
|
|
|
|
|
# Use the .= (dot equals) assignment operator to express the equality |
|
1937
|
|
|
|
|
|
|
# (not just a plain equals) |
|
1938
|
|
|
|
|
|
|
$ym .= 1 / ( 1 + exp( -1 * (sumover($b * $x->transpose) + $c) ) ); |
|
1939
|
|
|
|
|
|
|
my @dy = map $dyda->slice(",($_)"), 0 .. $par->dim(0)-1; |
|
1940
|
|
|
|
|
|
|
# Partial derivative of the function with respect to each slope |
|
1941
|
|
|
|
|
|
|
# fit parameter ($b in this case). Again, note .= assignment |
|
1942
|
|
|
|
|
|
|
# operator (not just "equals") |
|
1943
|
|
|
|
|
|
|
$dy[$_] .= $x->slice(':',$_) * $ym * (1 - $ym) |
|
1944
|
|
|
|
|
|
|
for (0 .. $b->dim(0)-1); |
|
1945
|
|
|
|
|
|
|
# Partial derivative of the function re intercept par |
|
1946
|
|
|
|
|
|
|
$dy[-1] .= $ym * (1 - $ym); |
|
1947
|
|
|
|
|
|
|
} |
|
1948
|
|
|
|
|
|
|
|
|
1949
|
|
|
|
|
|
|
sub _logistic_no_intercept { |
|
1950
|
|
|
|
|
|
|
my ($x,$par,$ym,$dyda) = @_; |
|
1951
|
|
|
|
|
|
|
my $b = $par->slice([0,$x->dim(1) - 1])->sever; |
|
1952
|
|
|
|
|
|
|
# Write function with dependent variable $ym, |
|
1953
|
|
|
|
|
|
|
# independent variable $x, and fit parameters as specified above. |
|
1954
|
|
|
|
|
|
|
# Use the .= (dot equals) assignment operator to express the equality |
|
1955
|
|
|
|
|
|
|
# (not just a plain equals) |
|
1956
|
|
|
|
|
|
|
$ym .= 1 / ( 1 + exp( -1 * sumover($b * $x->transpose) ) ); |
|
1957
|
|
|
|
|
|
|
my (@dy) = map {$dyda -> slice(",($_)") } (0 .. $par->dim(0)-1); |
|
1958
|
|
|
|
|
|
|
# Partial derivative of the function with respect to each slope |
|
1959
|
|
|
|
|
|
|
# fit parameter ($b in this case). Again, note .= assignment |
|
1960
|
|
|
|
|
|
|
# operator (not just "equals") |
|
1961
|
|
|
|
|
|
|
$dy[$_] .= $x->slice(':',$_) * $ym * (1 - $ym) |
|
1962
|
|
|
|
|
|
|
for 0 .. $b->dim(0)-1; |
|
1963
|
|
|
|
|
|
|
} |
|
1964
|
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
=head2 pca |
|
1966
|
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
=for ref |
|
1968
|
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
Principal component analysis. Based on corr instead of cov. |
|
1970
|
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
Bad values are ignored pair-wise. OK when bad values are few but otherwise |
|
1972
|
|
|
|
|
|
|
probably should fill_m etc before pca). Uses L. |
|
1973
|
|
|
|
|
|
|
|
|
1974
|
|
|
|
|
|
|
=for options |
|
1975
|
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
Default options (case insensitive): |
|
1977
|
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
CORR => 1, # boolean. use correlation or covariance |
|
1979
|
|
|
|
|
|
|
PLOT => 0, # calls plot_screes by default |
|
1980
|
|
|
|
|
|
|
# can set plot_screes options here |
|
1981
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
1982
|
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
=for usage |
|
1984
|
|
|
|
|
|
|
|
|
1985
|
|
|
|
|
|
|
Usage: |
|
1986
|
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
my $d = qsort random 10, 5; # 10 obs on 5 variables |
|
1988
|
|
|
|
|
|
|
my %r = $d->pca( \%opt ); |
|
1989
|
|
|
|
|
|
|
print "$_\t$r{$_}\n" for (keys %r); |
|
1990
|
|
|
|
|
|
|
|
|
1991
|
|
|
|
|
|
|
eigenvalue # variance accounted for by each component |
|
1992
|
|
|
|
|
|
|
[4.70192 0.199604 0.0471421 0.0372981 0.0140346] |
|
1993
|
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
eigenvector # dim var x comp. weights for mapping variables to component |
|
1995
|
|
|
|
|
|
|
[ |
|
1996
|
|
|
|
|
|
|
[ -0.451251 -0.440696 -0.457628 -0.451491 -0.434618] |
|
1997
|
|
|
|
|
|
|
[ -0.274551 0.582455 0.131494 0.255261 -0.709168] |
|
1998
|
|
|
|
|
|
|
[ 0.43282 0.500662 -0.139209 -0.735144 -0.0467834] |
|
1999
|
|
|
|
|
|
|
[ 0.693634 -0.428171 0.125114 0.128145 -0.550879] |
|
2000
|
|
|
|
|
|
|
[ 0.229202 0.180393 -0.859217 0.4173 0.0503155] |
|
2001
|
|
|
|
|
|
|
] |
|
2002
|
|
|
|
|
|
|
|
|
2003
|
|
|
|
|
|
|
loadings # dim var x comp. correlation between variable and component |
|
2004
|
|
|
|
|
|
|
[ |
|
2005
|
|
|
|
|
|
|
[ -0.978489 -0.955601 -0.992316 -0.97901 -0.942421] |
|
2006
|
|
|
|
|
|
|
[ -0.122661 0.260224 0.0587476 0.114043 -0.316836] |
|
2007
|
|
|
|
|
|
|
[ 0.0939749 0.108705 -0.0302253 -0.159616 -0.0101577] |
|
2008
|
|
|
|
|
|
|
[ 0.13396 -0.0826915 0.0241629 0.0247483 -0.10639] |
|
2009
|
|
|
|
|
|
|
[ 0.027153 0.0213708 -0.101789 0.0494365 0.00596076] |
|
2010
|
|
|
|
|
|
|
] |
|
2011
|
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
pct_var # percent variance accounted for by each component |
|
2013
|
|
|
|
|
|
|
[0.940384 0.0399209 0.00942842 0.00745963 0.00280691] |
|
2014
|
|
|
|
|
|
|
|
|
2015
|
|
|
|
|
|
|
Plot scores along the first two components, |
|
2016
|
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
$d->plot_scores( $r{eigenvector} ); |
|
2018
|
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
=cut |
|
2020
|
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
*pca = \&PDL::pca; |
|
2022
|
|
|
|
|
|
|
sub PDL::pca { |
|
2023
|
|
|
|
|
|
|
my ($self, $opt) = @_; |
|
2024
|
|
|
|
|
|
|
my %opt = ( |
|
2025
|
|
|
|
|
|
|
CORR => 1, |
|
2026
|
|
|
|
|
|
|
PLOT => 0, |
|
2027
|
|
|
|
|
|
|
WIN => undef, # for plotting |
|
2028
|
|
|
|
|
|
|
); |
|
2029
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
2030
|
|
|
|
|
|
|
my $var_var = $opt{CORR}? $self->corr_table : $self->cov_table; |
|
2031
|
|
|
|
|
|
|
# value is axis pdl and score is var x axis |
|
2032
|
|
|
|
|
|
|
my ($eigvec, $eigval) = $var_var->eigens_sym; |
|
2033
|
|
|
|
|
|
|
$eigvec = $eigvec->transpose; # compatibility with PDL::Slatec::eigsys |
|
2034
|
|
|
|
|
|
|
# ind is sticky point for broadcasting |
|
2035
|
|
|
|
|
|
|
my $ind_sorted = $eigval->qsorti->slice('-1:0'); |
|
2036
|
|
|
|
|
|
|
$eigvec = $eigvec->slice(':',$ind_sorted)->sever; |
|
2037
|
|
|
|
|
|
|
$eigval = $eigval->slice($ind_sorted)->sever; |
|
2038
|
|
|
|
|
|
|
# var x axis |
|
2039
|
|
|
|
|
|
|
my $var = $eigval / $eigval->sum->sclr; |
|
2040
|
|
|
|
|
|
|
my $loadings; |
|
2041
|
|
|
|
|
|
|
if ($opt{CORR}) { |
|
2042
|
|
|
|
|
|
|
$loadings = $eigvec * sqrt( $eigval->transpose ); |
|
2043
|
|
|
|
|
|
|
} |
|
2044
|
|
|
|
|
|
|
else { |
|
2045
|
|
|
|
|
|
|
my $scores = $eigvec x $self->dev_m; |
|
2046
|
|
|
|
|
|
|
$loadings = $self->corr( $scores->dummy(1) ); |
|
2047
|
|
|
|
|
|
|
} |
|
2048
|
|
|
|
|
|
|
$var->plot_screes(\%opt) |
|
2049
|
|
|
|
|
|
|
if $opt{PLOT}; |
|
2050
|
|
|
|
|
|
|
( eigenvalue=>$eigval, eigenvector=>$eigvec, |
|
2051
|
|
|
|
|
|
|
pct_var=>$var, loadings=>$loadings ); |
|
2052
|
|
|
|
|
|
|
} |
|
2053
|
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
=head2 pca_sorti |
|
2055
|
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
Determine by which vars a component is best represented. Descending sort |
|
2057
|
|
|
|
|
|
|
vars by size of association with that component. Returns sorted var and |
|
2058
|
|
|
|
|
|
|
relevant component indices. |
|
2059
|
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
=for options |
|
2061
|
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
Default options (case insensitive): |
|
2063
|
|
|
|
|
|
|
|
|
2064
|
|
|
|
|
|
|
NCOMP => 10, # maximum number of components to consider |
|
2065
|
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
=for usage |
|
2067
|
|
|
|
|
|
|
|
|
2068
|
|
|
|
|
|
|
Usage: |
|
2069
|
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
# let's see if we replicated the Osgood et al. (1957) study |
|
2071
|
|
|
|
|
|
|
pdl> ($data, $idv, $ido) = rtable 'osgood_exp.csv', {v=>0} |
|
2072
|
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
# select a subset of var to do pca |
|
2074
|
|
|
|
|
|
|
pdl> $ind = which_id $idv, [qw( ACTIVE BASS BRIGHT CALM FAST GOOD HAPPY HARD LARGE HEAVY )] |
|
2075
|
|
|
|
|
|
|
pdl> $data = $data( ,$ind)->sever |
|
2076
|
|
|
|
|
|
|
pdl> @$idv = @$idv[list $ind] |
|
2077
|
|
|
|
|
|
|
|
|
2078
|
|
|
|
|
|
|
pdl> %m = $data->pca |
|
2079
|
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
pdl> ($iv, $ic) = $m{loadings}->pca_sorti() |
|
2081
|
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
pdl> p "$idv->[$_]\t" . $m{loadings}->($_,$ic)->flat . "\n" for (list $iv) |
|
2083
|
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
# COMP0 COMP1 COMP2 COMP3 |
|
2085
|
|
|
|
|
|
|
HAPPY [0.860191 0.364911 0.174372 -0.10484] |
|
2086
|
|
|
|
|
|
|
GOOD [0.848694 0.303652 0.198378 -0.115177] |
|
2087
|
|
|
|
|
|
|
CALM [0.821177 -0.130542 0.396215 -0.125368] |
|
2088
|
|
|
|
|
|
|
BRIGHT [0.78303 0.232808 -0.0534081 -0.0528796] |
|
2089
|
|
|
|
|
|
|
HEAVY [-0.623036 0.454826 0.50447 0.073007] |
|
2090
|
|
|
|
|
|
|
HARD [-0.679179 0.0505568 0.384467 0.165608] |
|
2091
|
|
|
|
|
|
|
ACTIVE [-0.161098 0.760778 -0.44893 -0.0888592] |
|
2092
|
|
|
|
|
|
|
FAST [-0.196042 0.71479 -0.471355 0.00460276] |
|
2093
|
|
|
|
|
|
|
LARGE [-0.241994 0.594644 0.634703 -0.00618055] |
|
2094
|
|
|
|
|
|
|
BASS [-0.621213 -0.124918 0.0605367 -0.765184] |
|
2095
|
|
|
|
|
|
|
|
|
2096
|
|
|
|
|
|
|
=cut |
|
2097
|
|
|
|
|
|
|
|
|
2098
|
|
|
|
|
|
|
*pca_sorti = \&PDL::pca_sorti; |
|
2099
|
|
|
|
|
|
|
sub PDL::pca_sorti { |
|
2100
|
|
|
|
|
|
|
# $self is pdl (var x component) |
|
2101
|
|
|
|
|
|
|
my ($self, $opt) = @_; |
|
2102
|
|
|
|
|
|
|
my %opt = ( |
|
2103
|
|
|
|
|
|
|
NCOMP => 10, # maximum number of components to consider |
|
2104
|
|
|
|
|
|
|
); |
|
2105
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
2106
|
|
|
|
|
|
|
my $ncomp = pdl($opt{NCOMP}, $self->dim(1))->min; |
|
2107
|
|
|
|
|
|
|
$self = $self->dice_axis( 1, pdl(0..$ncomp-1) ); |
|
2108
|
|
|
|
|
|
|
my $icomp = $self->transpose->abs->maximum_ind; |
|
2109
|
|
|
|
|
|
|
# sort between comp |
|
2110
|
|
|
|
|
|
|
my $ivar_sort = $icomp->qsorti; |
|
2111
|
|
|
|
|
|
|
$self = $self->slice($ivar_sort)->sever; |
|
2112
|
|
|
|
|
|
|
# sort within comp |
|
2113
|
|
|
|
|
|
|
my $ic = $icomp->slice($ivar_sort)->iv_cluster; |
|
2114
|
|
|
|
|
|
|
for my $comp (0 .. $ic->dim(1)-1) { |
|
2115
|
|
|
|
|
|
|
my $i = $self->slice(which($ic->slice(':',$comp)), "($comp)")->qsorti->slice('-1:0'); |
|
2116
|
|
|
|
|
|
|
$ivar_sort->slice(which $ic->slice(':',$comp)) |
|
2117
|
|
|
|
|
|
|
.= $ivar_sort->slice(which $ic->slice(':',$comp))->slice($i); |
|
2118
|
|
|
|
|
|
|
} |
|
2119
|
|
|
|
|
|
|
wantarray ? ($ivar_sort, pdl(0 .. $ic->dim(1)-1)) : $ivar_sort; |
|
2120
|
|
|
|
|
|
|
} |
|
2121
|
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
=head2 plot_means |
|
2123
|
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
Plots means anova style. Can handle up to 4-way interactions (ie 4D pdl). |
|
2125
|
|
|
|
|
|
|
|
|
2126
|
|
|
|
|
|
|
=for options |
|
2127
|
|
|
|
|
|
|
|
|
2128
|
|
|
|
|
|
|
Default options (case insensitive): |
|
2129
|
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
IVNM => ['IV_0', 'IV_1', 'IV_2', 'IV_3'], |
|
2131
|
|
|
|
|
|
|
DVNM => 'DV', |
|
2132
|
|
|
|
|
|
|
AUTO => 1, # auto set dims to be on x-axis, line, panel |
|
2133
|
|
|
|
|
|
|
# if set 0, dim 0 goes on x-axis, dim 1 as lines |
|
2134
|
|
|
|
|
|
|
# dim 2+ as panels |
|
2135
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for next option |
|
2136
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2137
|
|
|
|
|
|
|
# allows comparing multiple lines in same plot |
|
2138
|
|
|
|
|
|
|
SIZE => 640, # individual square panel size in pixels |
|
2139
|
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
=for usage |
|
2141
|
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
Usage: |
|
2143
|
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
# see anova for mean / se pdl structure |
|
2145
|
|
|
|
|
|
|
$mean->plot_means( $se, {IVNM=>['apple', 'bake']} ); |
|
2146
|
|
|
|
|
|
|
|
|
2147
|
|
|
|
|
|
|
Or like this: |
|
2148
|
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
$m{'| apple ~ bake | m'}->plot_means; |
|
2150
|
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
=cut |
|
2152
|
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
*plot_means = \&PDL::plot_means; |
|
2154
|
|
|
|
|
|
|
sub PDL::plot_means { |
|
2155
|
|
|
|
|
|
|
require PDL::Graphics::Simple; |
|
2156
|
|
|
|
|
|
|
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef; |
|
2157
|
|
|
|
|
|
|
my ($self, $se) = @_; |
|
2158
|
|
|
|
|
|
|
$self = $self->squeeze; |
|
2159
|
|
|
|
|
|
|
if ($self->ndims > 4) { |
|
2160
|
|
|
|
|
|
|
carp "Data is > 4D. No plot here."; |
|
2161
|
|
|
|
|
|
|
return; |
|
2162
|
|
|
|
|
|
|
} |
|
2163
|
|
|
|
|
|
|
my %opt = ( |
|
2164
|
|
|
|
|
|
|
IVNM => ['IV_0', 'IV_1', 'IV_2', 'IV_3'], |
|
2165
|
|
|
|
|
|
|
DVNM => 'DV', |
|
2166
|
|
|
|
|
|
|
AUTO => 1, # auto set vars to be on X axis, line, panel |
|
2167
|
|
|
|
|
|
|
WIN => undef, # PDL::Graphics::Simple object |
|
2168
|
|
|
|
|
|
|
SIZE => 640, # individual square panel size in pixels |
|
2169
|
|
|
|
|
|
|
); |
|
2170
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt } |
|
2171
|
|
|
|
|
|
|
# decide which vars to plot as x axis, lines, panels |
|
2172
|
|
|
|
|
|
|
# put var w most levels on x axis |
|
2173
|
|
|
|
|
|
|
# put var w least levels on diff panels |
|
2174
|
|
|
|
|
|
|
my @iD = 0..3; |
|
2175
|
|
|
|
|
|
|
my @dims = (1, 1, 1, 1); |
|
2176
|
|
|
|
|
|
|
# splice ARRAY,OFFSET,LENGTH,LIST |
|
2177
|
|
|
|
|
|
|
splice @dims, 0, $self->ndims, $self->dims; |
|
2178
|
|
|
|
|
|
|
$self = $self->reshape(@dims)->sever; |
|
2179
|
|
|
|
|
|
|
$se = $se->reshape(@dims)->sever |
|
2180
|
|
|
|
|
|
|
if defined $se; |
|
2181
|
|
|
|
|
|
|
@iD = reverse list qsorti pdl @dims |
|
2182
|
|
|
|
|
|
|
if $opt{AUTO}; |
|
2183
|
|
|
|
|
|
|
# $iD[0] on x axis |
|
2184
|
|
|
|
|
|
|
# $iD[1] as separate lines |
|
2185
|
|
|
|
|
|
|
my $nx = $self->dim($iD[2]); # n xpanels |
|
2186
|
|
|
|
|
|
|
my $ny = $self->dim($iD[3]); # n ypanels |
|
2187
|
|
|
|
|
|
|
my $w = $opt{WIN} || PDL::Graphics::Simple::pgswin( |
|
2188
|
|
|
|
|
|
|
size=>[$opt{SIZE}*$nx, $opt{SIZE}*$ny,'px']); |
|
2189
|
|
|
|
|
|
|
my $seq0 = sequence(my $dim0 = $self->dim($iD[0])); |
|
2190
|
|
|
|
|
|
|
my ($pcount, @plots) = 0; |
|
2191
|
|
|
|
|
|
|
for my $y (0..$ny-1) { |
|
2192
|
|
|
|
|
|
|
for my $x (0..$nx-1) { |
|
2193
|
|
|
|
|
|
|
my $key_prefix = "$opt{IVNM}[$iD[0]]|"; |
|
2194
|
|
|
|
|
|
|
$key_prefix .= $opt{IVNM}[$iD[2]] . "=$x|" if $nx > 1; |
|
2195
|
|
|
|
|
|
|
$key_prefix .= $opt{IVNM}[$iD[3]] . "=$y|" if $ny > 1; |
|
2196
|
|
|
|
|
|
|
for (0 .. $self->dim($iD[1]) - 1) { |
|
2197
|
|
|
|
|
|
|
my $ke = "$key_prefix$opt{IVNM}[$iD[1]]=$_"; |
|
2198
|
|
|
|
|
|
|
my $ydiced = $self->dice_axis($iD[3],$y)->dice_axis($iD[2],$x)->dice_axis($iD[1],$_)->squeeze; |
|
2199
|
|
|
|
|
|
|
push @plots, with=>'lines', ke=>"$ke mean", style=>$pcount, |
|
2200
|
|
|
|
|
|
|
$seq0+$pcount*0.05, $ydiced; |
|
2201
|
|
|
|
|
|
|
push @plots, with=>'errorbars', ke=>"$ke error", style=>$pcount, |
|
2202
|
|
|
|
|
|
|
$seq0+$pcount*0.05, $ydiced, |
|
2203
|
|
|
|
|
|
|
$se->dice_axis($iD[3],$y)->dice_axis($iD[2],$x) |
|
2204
|
|
|
|
|
|
|
->dice_axis($iD[1],$_)->squeeze |
|
2205
|
|
|
|
|
|
|
if defined($se); |
|
2206
|
|
|
|
|
|
|
$pcount++; |
|
2207
|
|
|
|
|
|
|
} |
|
2208
|
|
|
|
|
|
|
} |
|
2209
|
|
|
|
|
|
|
} |
|
2210
|
|
|
|
|
|
|
my ($ymin, $ymax) = pdl($self, !defined $se ? () : ($self+$se, $self-$se))->minmax; |
|
2211
|
|
|
|
|
|
|
$w->plot(@plots, |
|
2212
|
|
|
|
|
|
|
{ xlabel=>$opt{IVNM}[$iD[0]], ylabel=>$opt{DVNM}, |
|
2213
|
|
|
|
|
|
|
xrange=>[-0.05,$dim0-1+$pcount*0.05], yrange=>[$ymin-0.05,$ymax+0.05] } |
|
2214
|
|
|
|
|
|
|
); |
|
2215
|
|
|
|
|
|
|
$w; |
|
2216
|
|
|
|
|
|
|
} |
|
2217
|
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
=head2 plot_residuals |
|
2219
|
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
Plots residuals against predicted values. |
|
2221
|
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
=for usage |
|
2223
|
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
Usage: |
|
2225
|
|
|
|
|
|
|
|
|
2226
|
|
|
|
|
|
|
use PDL::Graphics::Simple; |
|
2227
|
|
|
|
|
|
|
$w = pgswin(); |
|
2228
|
|
|
|
|
|
|
$y->plot_residuals( $y_pred, { win=>$w } ); |
|
2229
|
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
=for options |
|
2231
|
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
Default options (case insensitive): |
|
2233
|
|
|
|
|
|
|
|
|
2234
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for more info |
|
2235
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2236
|
|
|
|
|
|
|
# allows comparing multiple lines in same plot |
|
2237
|
|
|
|
|
|
|
SIZE => 640, # plot size in pixels |
|
2238
|
|
|
|
|
|
|
COLOR => 1, |
|
2239
|
|
|
|
|
|
|
|
|
2240
|
|
|
|
|
|
|
=cut |
|
2241
|
|
|
|
|
|
|
|
|
2242
|
|
|
|
|
|
|
*plot_residuals = \&PDL::plot_residuals; |
|
2243
|
|
|
|
|
|
|
sub PDL::plot_residuals { |
|
2244
|
|
|
|
|
|
|
require PDL::Graphics::Simple; |
|
2245
|
|
|
|
|
|
|
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef; |
|
2246
|
|
|
|
|
|
|
my ($y, $y_pred) = @_; |
|
2247
|
|
|
|
|
|
|
my %opt = ( |
|
2248
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for next options |
|
2249
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2250
|
|
|
|
|
|
|
# allows comparing multiple lines in same plot |
|
2251
|
|
|
|
|
|
|
SIZE => 640, # plot size in pixels |
|
2252
|
|
|
|
|
|
|
COLOR => 1, |
|
2253
|
|
|
|
|
|
|
); |
|
2254
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
2255
|
|
|
|
|
|
|
my $residuals = $y - $y_pred; |
|
2256
|
|
|
|
|
|
|
my $win = $opt{WIN} || PDL::Graphics::Simple::pgswin(size=>[@opt{qw(SIZE SIZE)}, 'px']); |
|
2257
|
|
|
|
|
|
|
$win->plot( |
|
2258
|
|
|
|
|
|
|
with=>'points', style=>$opt{COLOR}, $y_pred, $residuals, |
|
2259
|
|
|
|
|
|
|
with=>'lines', style=>$opt{COLOR}, pdl($y_pred->minmax), pdl(0,0), # 0-line |
|
2260
|
|
|
|
|
|
|
{xlabel=>'predicted value', ylabel=>'residuals'}, |
|
2261
|
|
|
|
|
|
|
); |
|
2262
|
|
|
|
|
|
|
} |
|
2263
|
|
|
|
|
|
|
|
|
2264
|
|
|
|
|
|
|
=head2 plot_scores |
|
2265
|
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
Plots standardized original and PCA transformed scores against two components. (Thank you, Bob MacCallum, for the documentation suggestion that led to this function.) |
|
2267
|
|
|
|
|
|
|
|
|
2268
|
|
|
|
|
|
|
=for options |
|
2269
|
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
Default options (case insensitive): |
|
2271
|
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
CORR => 1, # boolean. PCA was based on correlation or covariance |
|
2273
|
|
|
|
|
|
|
COMP => [0,1], # indices to components to plot |
|
2274
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for next options |
|
2275
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2276
|
|
|
|
|
|
|
# allows comparing multiple lines in same plot |
|
2277
|
|
|
|
|
|
|
SIZE => 640, # plot size in pixels |
|
2278
|
|
|
|
|
|
|
COLOR => [1,2], # color for original and rotated scores |
|
2279
|
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
=for usage |
|
2281
|
|
|
|
|
|
|
|
|
2282
|
|
|
|
|
|
|
Usage: |
|
2283
|
|
|
|
|
|
|
|
|
2284
|
|
|
|
|
|
|
my %p = $data->pca(); |
|
2285
|
|
|
|
|
|
|
$data->plot_scores( $p{eigenvector}, \%opt ); |
|
2286
|
|
|
|
|
|
|
|
|
2287
|
|
|
|
|
|
|
=cut |
|
2288
|
|
|
|
|
|
|
|
|
2289
|
|
|
|
|
|
|
*plot_scores = \&PDL::plot_scores; |
|
2290
|
|
|
|
|
|
|
sub PDL::plot_scores { |
|
2291
|
|
|
|
|
|
|
require PDL::Graphics::Simple; |
|
2292
|
|
|
|
|
|
|
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef; |
|
2293
|
|
|
|
|
|
|
my ($self, $eigvec) = @_; |
|
2294
|
|
|
|
|
|
|
my %opt = ( |
|
2295
|
|
|
|
|
|
|
CORR => 1, # boolean. PCA was based on correlation or covariance |
|
2296
|
|
|
|
|
|
|
COMP => [0,1], # indices to components to plot |
|
2297
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for next options |
|
2298
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2299
|
|
|
|
|
|
|
# allows comparing multiple lines in same plot |
|
2300
|
|
|
|
|
|
|
SIZE => 640, # plot size in pixels |
|
2301
|
|
|
|
|
|
|
COLOR => [1,2], # color for original and transformed scoress |
|
2302
|
|
|
|
|
|
|
); |
|
2303
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt } |
|
2304
|
|
|
|
|
|
|
my $i = pdl $opt{COMP}; |
|
2305
|
|
|
|
|
|
|
my $z = $opt{CORR} ? $self->stddz : $self->dev_m; |
|
2306
|
|
|
|
|
|
|
# transformed normed values |
|
2307
|
|
|
|
|
|
|
my $scores = sumover($eigvec->slice(':',$i) * $z->transpose->dummy(1))->transpose; |
|
2308
|
|
|
|
|
|
|
$z = $z->slice(':',$i)->sever; |
|
2309
|
|
|
|
|
|
|
my $win = $opt{WIN} || PDL::Graphics::Simple::pgswin(size=>[@opt{qw(SIZE SIZE)}, 'px']); |
|
2310
|
|
|
|
|
|
|
$win->plot( |
|
2311
|
|
|
|
|
|
|
with=>'points', style=>$opt{COLOR}[0], ke=>'original', $z->slice(',(0)'), $z->slice(',(1)'), |
|
2312
|
|
|
|
|
|
|
with=>'points', style=>$opt{COLOR}[1], ke=>'transformed', $scores->slice(',(0)'), $scores->slice(',(1)'), |
|
2313
|
|
|
|
|
|
|
{xlabel=>"Component $opt{COMP}[0]", ylabel=>"Component $opt{COMP}[1]"}, |
|
2314
|
|
|
|
|
|
|
); |
|
2315
|
|
|
|
|
|
|
} |
|
2316
|
|
|
|
|
|
|
|
|
2317
|
|
|
|
|
|
|
=head2 plot_screes |
|
2318
|
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
Scree plot. Plots proportion of variance accounted for by PCA components. |
|
2320
|
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
=for options |
|
2322
|
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
Default options (case insensitive): |
|
2324
|
|
|
|
|
|
|
|
|
2325
|
|
|
|
|
|
|
NCOMP => 20, # max number of components to plot |
|
2326
|
|
|
|
|
|
|
CUT => 0, # set to plot cutoff line after this many components |
|
2327
|
|
|
|
|
|
|
# undef to plot suggested cutoff line for NCOMP comps |
|
2328
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for next options |
|
2329
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2330
|
|
|
|
|
|
|
# allows comparing multiple lines in same plot |
|
2331
|
|
|
|
|
|
|
SIZE => 640, # plot size in pixels |
|
2332
|
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
=for usage |
|
2334
|
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
Usage: |
|
2336
|
|
|
|
|
|
|
|
|
2337
|
|
|
|
|
|
|
# variance should be in descending order |
|
2338
|
|
|
|
|
|
|
$d = qsort random 10, 5; # 10 obs on 5 variables |
|
2339
|
|
|
|
|
|
|
%pca = $d->pca( \%opt ); |
|
2340
|
|
|
|
|
|
|
$pca{pct_var}->plot_screes( {ncomp=>16, win=>$win=PDL::Graphics::Simple::pgswin()} ); |
|
2341
|
|
|
|
|
|
|
|
|
2342
|
|
|
|
|
|
|
Or, because NCOMP is used so often, it is allowed a shortcut, |
|
2343
|
|
|
|
|
|
|
|
|
2344
|
|
|
|
|
|
|
$pca{pct_var}->plot_screes( 16 ); |
|
2345
|
|
|
|
|
|
|
|
|
2346
|
|
|
|
|
|
|
=cut |
|
2347
|
|
|
|
|
|
|
|
|
2348
|
|
|
|
|
|
|
*plot_scree = \&PDL::plot_screes; # here for now for compatibility |
|
2349
|
|
|
|
|
|
|
*plot_screes = \&PDL::plot_screes; |
|
2350
|
|
|
|
|
|
|
sub PDL::plot_screes { |
|
2351
|
|
|
|
|
|
|
require PDL::Graphics::Simple; |
|
2352
|
|
|
|
|
|
|
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef; |
|
2353
|
|
|
|
|
|
|
my ($self, $ncomp) = @_; |
|
2354
|
|
|
|
|
|
|
my %opt = ( |
|
2355
|
|
|
|
|
|
|
NCOMP => 20, # max number of components to plot |
|
2356
|
|
|
|
|
|
|
CUT => 0, # set to plot cutoff line after this many components |
|
2357
|
|
|
|
|
|
|
# undef to plot suggested cutoff line for NCOMP comps |
|
2358
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for next options |
|
2359
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2360
|
|
|
|
|
|
|
# allows comparing multiple lines in same plot |
|
2361
|
|
|
|
|
|
|
SIZE => 640, # plot size in pixels |
|
2362
|
|
|
|
|
|
|
); |
|
2363
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
2364
|
|
|
|
|
|
|
$opt{NCOMP} = $ncomp |
|
2365
|
|
|
|
|
|
|
if $ncomp; |
|
2366
|
|
|
|
|
|
|
# re-use $ncomp below |
|
2367
|
|
|
|
|
|
|
$ncomp = ($self->dim(0) < $opt{NCOMP})? $self->dim(0) : $opt{NCOMP}; |
|
2368
|
|
|
|
|
|
|
my $self_comp = $self->slice([0,$ncomp-1]); |
|
2369
|
|
|
|
|
|
|
$opt{CUT} = PDL::Stats::Kmeans::_scree_ind $self_comp |
|
2370
|
|
|
|
|
|
|
if !defined $opt{CUT}; |
|
2371
|
|
|
|
|
|
|
my $win = $opt{WIN} || |
|
2372
|
|
|
|
|
|
|
PDL::Graphics::Simple::pgswin(size=>[@opt{qw(SIZE SIZE)}, 'px']); |
|
2373
|
|
|
|
|
|
|
$win->plot( |
|
2374
|
|
|
|
|
|
|
with=>'lines', ke=>'scree', sequence($ncomp), $self_comp, |
|
2375
|
|
|
|
|
|
|
!$opt{CUT} ? () : (with=>'lines', ke=>'cut', pdl($opt{CUT}-.5, $opt{CUT}-.5), pdl(-.05, $self->max->sclr+.05)), |
|
2376
|
|
|
|
|
|
|
{xlabel=>'Component', ylabel=>'Proportion of Variance Accounted for', |
|
2377
|
|
|
|
|
|
|
xrange=>[-0.05,$ncomp-0.95], yrange=>[0,1], le=>'tr'}, |
|
2378
|
|
|
|
|
|
|
); |
|
2379
|
|
|
|
|
|
|
} |
|
2380
|
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
=head2 plot_stripchart |
|
2382
|
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
Stripchart plot. Plots ANOVA-style data, categorised against given IVs. |
|
2384
|
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
=for options |
|
2386
|
|
|
|
|
|
|
|
|
2387
|
|
|
|
|
|
|
Default options (case insensitive): |
|
2388
|
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ] |
|
2390
|
|
|
|
|
|
|
DVNM => 'DV', |
|
2391
|
|
|
|
|
|
|
# see PDL::Graphics::Simple for next options |
|
2392
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2393
|
|
|
|
|
|
|
|
|
2394
|
|
|
|
|
|
|
=for usage |
|
2395
|
|
|
|
|
|
|
|
|
2396
|
|
|
|
|
|
|
Usage: |
|
2397
|
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
%m = $y->plot_stripchart( $a, \@b, { IVNM=>[qw(apple bake)] } ); |
|
2399
|
|
|
|
|
|
|
|
|
2400
|
|
|
|
|
|
|
=cut |
|
2401
|
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
my $CHART_GAP = 0.1; |
|
2403
|
|
|
|
|
|
|
*plot_stripchart = \&PDL::plot_stripchart; |
|
2404
|
|
|
|
|
|
|
sub PDL::plot_stripchart { |
|
2405
|
|
|
|
|
|
|
require PDL::Graphics::Simple; |
|
2406
|
|
|
|
|
|
|
my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef; |
|
2407
|
|
|
|
|
|
|
my ($y, @ivs_raw) = @_; |
|
2408
|
|
|
|
|
|
|
my %opt = ( |
|
2409
|
|
|
|
|
|
|
IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ] |
|
2410
|
|
|
|
|
|
|
DVNM => 'DV', |
|
2411
|
|
|
|
|
|
|
WIN => undef, # pgswin object. not closed here if passed |
|
2412
|
|
|
|
|
|
|
); |
|
2413
|
|
|
|
|
|
|
if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; } |
|
2414
|
|
|
|
|
|
|
$opt{IVNM} = [ map { "IV_$_" } 0 .. $#ivs_raw ] |
|
2415
|
|
|
|
|
|
|
if !$opt{IVNM} or !@{ $opt{IVNM} }; |
|
2416
|
|
|
|
|
|
|
my $w = $opt{WIN} || PDL::Graphics::Simple::pgswin(); |
|
2417
|
|
|
|
|
|
|
my @codes = map [code_ivs($_)], @ivs_raw; |
|
2418
|
|
|
|
|
|
|
my @levels = map { |
|
2419
|
|
|
|
|
|
|
my $map = $_->[1]; |
|
2420
|
|
|
|
|
|
|
[sort {$map->{$a} <=> $map->{$b}} keys %$map]; |
|
2421
|
|
|
|
|
|
|
} @codes; |
|
2422
|
|
|
|
|
|
|
my $xjitter = $y->random * $CHART_GAP; |
|
2423
|
|
|
|
|
|
|
my ($pcount, @plots) = 0; |
|
2424
|
|
|
|
|
|
|
push @plots, with=>'points', ke=>"all data", $xjitter+$pcount, $y; |
|
2425
|
|
|
|
|
|
|
$pcount++; |
|
2426
|
|
|
|
|
|
|
for my $i (0..$#ivs_raw) { |
|
2427
|
|
|
|
|
|
|
my $levs = $levels[$i]; |
|
2428
|
|
|
|
|
|
|
my $name = $opt{IVNM}[$i]; |
|
2429
|
|
|
|
|
|
|
my $coded = $codes[$i][0]; |
|
2430
|
|
|
|
|
|
|
for my $j (0..$#$levs) { |
|
2431
|
|
|
|
|
|
|
my $inds = which($coded == $j); |
|
2432
|
|
|
|
|
|
|
push @plots, with=>'points', ke=>"$name=$levs->[$j]", |
|
2433
|
|
|
|
|
|
|
$xjitter->slice($inds)+$pcount+$j*$CHART_GAP, $y->slice($inds); |
|
2434
|
|
|
|
|
|
|
} |
|
2435
|
|
|
|
|
|
|
$pcount++; |
|
2436
|
|
|
|
|
|
|
} |
|
2437
|
|
|
|
|
|
|
my ($ymin, $ymax) = $y->minmax; |
|
2438
|
|
|
|
|
|
|
my $xmax = $pcount-1 + $CHART_GAP*($#{$levels[-1]} + 2); |
|
2439
|
|
|
|
|
|
|
$w->plot(@plots, |
|
2440
|
|
|
|
|
|
|
{ xlabel=>'IV', ylabel=>$opt{DVNM}, |
|
2441
|
|
|
|
|
|
|
xrange=>[-1,$xmax], yrange=>[$ymin-$CHART_GAP,$ymax+$CHART_GAP] } |
|
2442
|
|
|
|
|
|
|
); |
|
2443
|
|
|
|
|
|
|
$w; |
|
2444
|
|
|
|
|
|
|
} |
|
2445
|
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
2447
|
|
|
|
|
|
|
|
|
2448
|
|
|
|
|
|
|
L |
|
2449
|
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
L |
|
2451
|
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
=head1 REFERENCES |
|
2453
|
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
Cohen, J., Cohen, P., West, S.G., & Aiken, L.S. (2003). Applied |
|
2455
|
|
|
|
|
|
|
Multiple Regression/correlation Analysis for the Behavioral Sciences |
|
2456
|
|
|
|
|
|
|
(3rd ed.). Mahwah, NJ: Lawrence Erlbaum Associates Publishers. |
|
2457
|
|
|
|
|
|
|
|
|
2458
|
|
|
|
|
|
|
Hosmer, D.W., & Lemeshow, S. (2000). Applied Logistic Regression (2nd |
|
2459
|
|
|
|
|
|
|
ed.). New York, NY: Wiley-Interscience. |
|
2460
|
|
|
|
|
|
|
|
|
2461
|
|
|
|
|
|
|
Lorch, R.F., & Myers, J.L. (1990). Regression analyses of repeated |
|
2462
|
|
|
|
|
|
|
measures data in cognitive research. Journal of Experimental Psychology: |
|
2463
|
|
|
|
|
|
|
Learning, Memory, & Cognition, 16, 149-157. |
|
2464
|
|
|
|
|
|
|
|
|
2465
|
|
|
|
|
|
|
Osgood C.E., Suci, G.J., & Tannenbaum, P.H. (1957). The Measurement of |
|
2466
|
|
|
|
|
|
|
Meaning. Champaign, IL: University of Illinois Press. |
|
2467
|
|
|
|
|
|
|
|
|
2468
|
|
|
|
|
|
|
Rutherford, A. (2011). ANOVA and ANCOVA: A GLM Approach (2nd ed.). Wiley. |
|
2469
|
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
Shlens, J. (2009). A Tutorial on Principal Component Analysis. Retrieved |
|
2471
|
|
|
|
|
|
|
April 10, 2011 from http://citeseerx.ist.psu.edu/ |
|
2472
|
|
|
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
The GLM procedure: unbalanced ANOVA for two-way design with |
|
2474
|
|
|
|
|
|
|
interaction. (2008). SAS/STAT(R) 9.2 User's Guide. Retrieved June 18, |
|
2475
|
|
|
|
|
|
|
2009 from http://support.sas.com/ |
|
2476
|
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
Van den Noortgate, W., & Onghena, P. (2006). Analysing repeated |
|
2478
|
|
|
|
|
|
|
measures data in cognitive research: A comment on regression coefficient |
|
2479
|
|
|
|
|
|
|
analyses. European Journal of Cognitive Psychology, 18, 937-952. |
|
2480
|
|
|
|
|
|
|
|
|
2481
|
|
|
|
|
|
|
=head1 AUTHOR |
|
2482
|
|
|
|
|
|
|
|
|
2483
|
|
|
|
|
|
|
Copyright (C) 2009 Maggie J. Xiong |
|
2484
|
|
|
|
|
|
|
|
|
2485
|
|
|
|
|
|
|
All rights reserved. There is no warranty. You are allowed to redistribute |
|
2486
|
|
|
|
|
|
|
this software / documentation as described in the file COPYING in the |
|
2487
|
|
|
|
|
|
|
PDL distribution. |
|
2488
|
|
|
|
|
|
|
|
|
2489
|
|
|
|
|
|
|
=cut |
|
2490
|
|
|
|
|
|
|
#line 2491 "lib/PDL/Stats/GLM.pm" |
|
2491
|
|
|
|
|
|
|
|
|
2492
|
|
|
|
|
|
|
# Exit with OK status |
|
2493
|
|
|
|
|
|
|
|
|
2494
|
|
|
|
|
|
|
1; |