File Coverage

blib/lib/PDL/Stats/GLM.pm
Criterion Covered Total %
statement 9 9 100.0
branch n/a
condition n/a
subroutine 3 3 100.0
pod n/a
total 12 12 100.0


line stmt bran cond sub pod time code
1             #
2             # GENERATED WITH PDL::PP from lib/PDL/Stats/GLM.pd! Don't modify!
3             #
4             package PDL::Stats::GLM;
5              
6             our @EXPORT_OK = qw(ols_t ols ols_rptd anova anova_rptd anova_design_matrix dummy_code effect_code effect_code_w interaction_code r2_change logistic pca pca_sorti plot_means plot_residuals plot_screes fill_m fill_rand dev_m stddz sse mse rmse pred_logistic d0 dm dvrs );
7             our %EXPORT_TAGS = (Func=>\@EXPORT_OK);
8              
9 1     1   1545 use PDL::Core;
  1         2  
  1         7  
10 1     1   432 use PDL::Exporter;
  1         2  
  1         20  
11 1     1   31 use DynaLoader;
  1         2  
  1         863  
12              
13              
14            
15             our @ISA = ( 'PDL::Exporter','DynaLoader' );
16             push @PDL::Core::PP, __PACKAGE__;
17             bootstrap PDL::Stats::GLM ;
18              
19              
20              
21              
22              
23              
24              
25              
26             #line 14 "lib/PDL/Stats/GLM.pd"
27              
28             use strict;
29             use warnings;
30              
31             use Carp;
32             use PDL::LiteF;
33             use PDL::MatrixOps;
34             use PDL::Stats::Basic;
35             use PDL::Stats::Kmeans;
36              
37             eval { require PDL::Core; require PDL::GSL::CDF; };
38             my $CDF = 1 if !$@;
39              
40             =encoding utf8
41              
42             =head1 NAME
43              
44             PDL::Stats::GLM -- general and generalized linear modelling methods such as ANOVA, linear regression, PCA, and logistic regression.
45              
46             =head1 SYNOPSIS
47              
48             use PDL::LiteF;
49             use PDL::Stats::GLM;
50              
51             # do a multiple linear regression and plot the residuals
52             my $y = pdl( 8, 7, 7, 0, 2, 5, 0 );
53             my $x = pdl( [ 0, 1, 2, 3, 4, 5, 6 ], # linear component
54             [ 0, 1, 4, 9, 16, 25, 36 ] ); # quadratic component
55             my %m = $y->ols( $x, {plot=>1} );
56             print "$_\t$m{$_}\n" for sort keys %m;
57              
58             =head1 DESCRIPTION
59              
60             For more about general linear modelling, see
61             L.
62             For an unbelievably thorough text on experimental design
63             and analysis, including linear modelling, see L
64             Course in Design and Analysis of Experiments, Gary
65             W. Oehlert|http://users.stat.umn.edu/~gary/book/fcdae.pdf>.
66              
67             The terms FUNCTIONS and METHODS are arbitrarily used to refer to
68             methods that are broadcastable and methods that are NOT broadcastable,
69             respectively. FUNCTIONS support bad values.
70              
71             P-values, where appropriate, are provided if PDL::GSL::CDF is installed.
72              
73             =cut
74             #line 75 "lib/PDL/Stats/GLM.pm"
75              
76              
77             =head1 FUNCTIONS
78              
79             =cut
80              
81              
82              
83              
84              
85              
86             =head2 fill_m
87              
88             =for sig
89              
90             Signature: (a(n); [o]b(n))
91             Types: (float double)
92              
93             =for ref
94              
95             Replaces bad values with sample mean. Mean is set to 0 if all obs are
96             bad.
97              
98             =for usage
99              
100             pdl> p $data
101             [
102             [ 5 BAD 2 BAD]
103             [ 7 3 7 BAD]
104             ]
105              
106             pdl> p $data->fill_m
107             [
108             [ 5 3.5 2 3.5]
109             [ 7 3 7 5.66667]
110             ]
111            
112              
113             =pod
114              
115             Broadcasts over its inputs.
116              
117             =for bad
118              
119             The output pdl badflag is cleared.
120              
121             =cut
122              
123              
124              
125              
126             *fill_m = \&PDL::fill_m;
127              
128              
129              
130              
131              
132              
133             =head2 fill_rand
134              
135             =for sig
136              
137             Signature: (a(n); [o]b(n))
138             Types: (sbyte byte short ushort long ulong indx ulonglong longlong
139             float double ldouble)
140              
141             =for ref
142              
143             Replaces bad values with random sample (with replacement) of good
144             observations from the same variable.
145              
146             =for usage
147              
148             pdl> p $data
149             [
150             [ 5 BAD 2 BAD]
151             [ 7 3 7 BAD]
152             ]
153              
154             pdl> p $data->fill_rand
155              
156             [
157             [5 2 2 5]
158             [7 3 7 7]
159             ]
160              
161             =pod
162              
163             Broadcasts over its inputs.
164              
165             =for bad
166              
167             The output pdl badflag is cleared.
168              
169             =cut
170              
171              
172              
173              
174             *fill_rand = \&PDL::fill_rand;
175              
176              
177              
178              
179              
180              
181             =head2 dev_m
182              
183             =for sig
184              
185             Signature: (a(n); [o]b(n))
186             Types: (float double)
187              
188             =for usage
189              
190             $b = dev_m($a);
191             dev_m($a, $b); # all arguments given
192             $b = $a->dev_m; # method call
193             $a->dev_m($b);
194             $a->inplace->dev_m; # can be used inplace
195             dev_m($a->inplace);
196              
197             =for ref
198              
199             Replaces values with deviations from the mean.
200              
201             =pod
202              
203             Broadcasts over its inputs.
204              
205             =for bad
206              
207             C processes bad values.
208             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
209              
210             =cut
211              
212              
213              
214              
215             *dev_m = \&PDL::dev_m;
216              
217              
218              
219              
220              
221              
222             =head2 stddz
223              
224             =for sig
225              
226             Signature: (a(n); [o]b(n))
227             Types: (float double)
228              
229             =for usage
230              
231             $b = stddz($a);
232             stddz($a, $b); # all arguments given
233             $b = $a->stddz; # method call
234             $a->stddz($b);
235             $a->inplace->stddz; # can be used inplace
236             stddz($a->inplace);
237              
238             =for ref
239              
240             Standardize ie replace values with z_scores based on sample standard deviation from the mean (replace with 0s if stdv==0).
241              
242             =pod
243              
244             Broadcasts over its inputs.
245              
246             =for bad
247              
248             C processes bad values.
249             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
250              
251             =cut
252              
253              
254              
255              
256             *stddz = \&PDL::stddz;
257              
258              
259              
260              
261              
262              
263             =head2 sse
264              
265             =for sig
266              
267             Signature: (a(n); b(n); [o]c())
268             Types: (float double)
269              
270             =for usage
271              
272             $c = sse($a, $b);
273             sse($a, $b, $c); # all arguments given
274             $c = $a->sse($b); # method call
275             $a->sse($b, $c);
276              
277             =for ref
278              
279             Sum of squared errors between actual and predicted values.
280              
281             =pod
282              
283             Broadcasts over its inputs.
284              
285             =for bad
286              
287             C processes bad values.
288             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
289              
290             =cut
291              
292              
293              
294              
295             *sse = \&PDL::sse;
296              
297              
298              
299              
300              
301              
302             =head2 mse
303              
304             =for sig
305              
306             Signature: (a(n); b(n); [o]c())
307             Types: (float double)
308              
309             =for usage
310              
311             $c = mse($a, $b);
312             mse($a, $b, $c); # all arguments given
313             $c = $a->mse($b); # method call
314             $a->mse($b, $c);
315              
316             =for ref
317              
318             Mean of squared errors between actual and predicted values, ie variance around predicted value.
319              
320             =pod
321              
322             Broadcasts over its inputs.
323              
324             =for bad
325              
326             C processes bad values.
327             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
328              
329             =cut
330              
331              
332              
333              
334             *mse = \&PDL::mse;
335              
336              
337              
338              
339              
340              
341             =head2 rmse
342              
343             =for sig
344              
345             Signature: (a(n); b(n); [o]c())
346             Types: (float double)
347              
348             =for usage
349              
350             $c = rmse($a, $b);
351             rmse($a, $b, $c); # all arguments given
352             $c = $a->rmse($b); # method call
353             $a->rmse($b, $c);
354              
355             =for ref
356              
357             Root mean squared error, ie stdv around predicted value.
358              
359             =pod
360              
361             Broadcasts over its inputs.
362              
363             =for bad
364              
365             C processes bad values.
366             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
367              
368             =cut
369              
370              
371              
372              
373             *rmse = \&PDL::rmse;
374              
375              
376              
377              
378              
379              
380             =head2 pred_logistic
381              
382             =for sig
383              
384             Signature: (a(n,m); b(m); [o]c(n))
385             Types: (float double)
386              
387             =for ref
388              
389             Calculates predicted prob value for logistic regression.
390              
391             =for usage
392              
393             # glue constant then apply coeff returned by the logistic method
394             $pred = $x->glue(1,ones($x->dim(0)))->pred_logistic( $m{b} );
395              
396             =pod
397              
398             Broadcasts over its inputs.
399              
400             =for bad
401              
402             C processes bad values.
403             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
404              
405             =cut
406              
407              
408              
409              
410             *pred_logistic = \&PDL::pred_logistic;
411              
412              
413              
414              
415              
416              
417             =head2 d0
418              
419             =for sig
420              
421             Signature: (a(n); [o]c())
422             Types: (float double)
423              
424             =for usage
425              
426             $c = d0($a);
427             d0($a, $c); # all arguments given
428             $c = $a->d0; # method call
429             $a->d0($c);
430              
431             =for ref
432              
433             Null deviance for logistic regression.
434              
435             =pod
436              
437             Broadcasts over its inputs.
438              
439             =for bad
440              
441             C processes bad values.
442             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
443              
444             =cut
445              
446              
447              
448              
449             *d0 = \&PDL::d0;
450              
451              
452              
453              
454              
455              
456             =head2 dm
457              
458             =for sig
459              
460             Signature: (a(n); b(n); [o]c())
461             Types: (float double)
462              
463             =for ref
464              
465             Model deviance for logistic regression.
466              
467             =for usage
468              
469             my $dm = $y->dm( $y_pred );
470             # null deviance
471             my $d0 = $y->dm( ones($y->nelem) * $y->avg );
472              
473             =pod
474              
475             Broadcasts over its inputs.
476              
477             =for bad
478              
479             C processes bad values.
480             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
481              
482             =cut
483              
484              
485              
486              
487             *dm = \&PDL::dm;
488              
489              
490              
491              
492              
493              
494             =head2 dvrs
495              
496             =for sig
497              
498             Signature: (a(); b(); [o]c())
499             Types: (float double)
500              
501             =for usage
502              
503             $c = dvrs($a, $b);
504             dvrs($a, $b, $c); # all arguments given
505             $c = $a->dvrs($b); # method call
506             $a->dvrs($b, $c);
507              
508             =for ref
509              
510             Deviance residual for logistic regression.
511              
512             =pod
513              
514             Broadcasts over its inputs.
515              
516             =for bad
517              
518             C processes bad values.
519             It will set the bad-value flag of all output ndarrays if the flag is set for any of the input ndarrays.
520              
521             =cut
522              
523              
524              
525              
526             *dvrs = \&PDL::dvrs;
527              
528              
529              
530              
531              
532             #line 359 "lib/PDL/Stats/GLM.pd"
533              
534             #line 360 "lib/PDL/Stats/GLM.pd"
535              
536             =head2 ols_t
537              
538             =for ref
539              
540             Broadcasted version of ordinary least squares regression (B). The
541             price of broadcasting was losing significance tests for coefficients
542             (but see B). The fitting function was shamelessly copied then
543             modified from PDL::Fit::Linfit. Intercept is FIRST of coeff if CONST => 1.
544              
545             ols_t does not handle bad values. consider B or B
546             if there are bad values.
547              
548             =for options
549              
550             Default options (case insensitive):
551              
552             CONST => 1,
553              
554             =for usage
555              
556             Usage:
557              
558             # DV, 2 person's ratings for top-10 box office movies
559             # ascending sorted by box office numbers
560              
561             pdl> p $y = pdl '1 1 2 4 4 4 4 5 5 5; 1 2 2 2 3 3 3 3 5 5'
562              
563             # model with 2 IVs, a linear and a quadratic trend component
564              
565             pdl> $x = cat sequence(10), sequence(10)**2
566              
567             # suppose our novice modeler thinks this creates 3 different models
568             # for predicting movie ratings
569              
570             pdl> p $x = cat $x, $x * 2, $x * 3
571             [
572             [
573             [ 0 1 2 3 4 5 6 7 8 9]
574             [ 0 1 4 9 16 25 36 49 64 81]
575             ]
576             [
577             [ 0 2 4 6 8 10 12 14 16 18]
578             [ 0 2 8 18 32 50 72 98 128 162]
579             ]
580             [
581             [ 0 3 6 9 12 15 18 21 24 27]
582             [ 0 3 12 27 48 75 108 147 192 243]
583             ]
584             ]
585              
586             pdl> p $x->info
587             PDL: Double D [10,2,3]
588              
589             # insert a dummy dim between IV and the dim (model) to be broadcasted
590              
591             pdl> %m = $y->ols_t( $x->dummy(2) )
592              
593             pdl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
594              
595             # 2 persons' ratings, each fitted with 3 "different" models
596             F [
597             [ 38.314159 25.087209]
598             [ 38.314159 25.087209]
599             [ 38.314159 25.087209]
600             ]
601              
602             # df is the same across dv and iv models
603             F_df [2 7]
604             F_p [
605             [0.00016967051 0.00064215074]
606             [0.00016967051 0.00064215074]
607             [0.00016967051 0.00064215074]
608             ]
609              
610             R2 [
611             [ 0.9162963 0.87756762]
612             [ 0.9162963 0.87756762]
613             [ 0.9162963 0.87756762]
614             ]
615              
616             b [ # constant linear quadratic
617             [
618             [ 0.66363636 0.99015152 -0.056818182] # person 1
619             [ 1.4 0.18939394 0.022727273] # person 2
620             ]
621             [
622             [ 0.66363636 0.49507576 -0.028409091]
623             [ 1.4 0.09469697 0.011363636]
624             ]
625             [
626             [ 0.66363636 0.33005051 -0.018939394]
627             [ 1.4 0.063131313 0.0075757576]
628             ]
629             ]
630              
631             # our novice modeler realizes at this point that
632             # the 3 models only differ in the scaling of the IV coefficients
633             ss_model [
634             [ 20.616667 13.075758]
635             [ 20.616667 13.075758]
636             [ 20.616667 13.075758]
637             ]
638              
639             ss_residual [
640             [ 1.8833333 1.8242424]
641             [ 1.8833333 1.8242424]
642             [ 1.8833333 1.8242424]
643             ]
644              
645             ss_total [22.5 14.9]
646             y_pred [
647             [
648             [0.66363636 1.5969697 2.4166667 3.1227273 ... 4.9727273]
649             ...
650              
651             =cut
652              
653             *ols_t = \&PDL::ols_t;
654             sub PDL::ols_t {
655             _ols_common(1, @_);
656             }
657              
658             sub _ols_common {
659             my ($broadcasted, $y, $ivs, $opt) = @_;
660             ($y, $ivs) = _ols_prep_inputs(@_);
661             _ols_main($broadcasted, $y, $ivs, $opt);
662             }
663              
664             sub _ols_prep_inputs {
665             # y [n], ivs [n x attr] pdl
666             my ($broadcasted, $y, $ivs, $opt) = @_;
667             my %opt = (
668             CONST => 1,
669             PLOT => 0,
670             WIN => undef, # for plotting
671             );
672             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
673             if (!$broadcasted) {
674             $y = $y->squeeze;
675             $y->getndims > 1 and
676             croak "use ols_t for broadcasted version";
677             }
678             $ivs = $ivs->dummy(1) if $ivs->getndims == 1;
679             ($y, $ivs) = _rm_bad_value( $y, $ivs ) if !$broadcasted;
680             # set up ivs and const as ivs
681             $opt{CONST} and
682             $ivs = ones($ivs->dim(0))->glue( 1, $ivs );
683             ($y, $ivs);
684             }
685              
686             sub _ols_main {
687             my ($broadcasted, $y, $ivs, $opt) = @_;
688             my %opt = (
689             CONST => 1,
690             PLOT => 0,
691             WIN => undef, # for plotting
692             );
693             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
694             my $C = inv( $ivs x $ivs->t );
695             # Internally normalise data
696             # (double) it or ushort y and sequence iv won't work right
697             my $ymean = $y->abs->avgover->double;
698             $ymean->where( $ymean==0 ) .= 1;
699             my $divisor = $broadcasted ? $ymean->dummy(0) : $ymean;
700             my $y2 = $y / $divisor;
701             my $Y = $ivs x $y2->dummy(0);
702             # Do the fit
703             # Fitted coefficients vector
704             my $coeff = PDL::squeeze( $C x $Y );
705             $coeff = $coeff->dummy(0)
706             if $broadcasted and $coeff->getndims == 1 and $y->getndims > 1;
707             $coeff *= $divisor; # Un-normalise
708             # ***$coeff x $ivs looks nice but produces nan on successive tries***
709             my $y_pred = sumover( ($broadcasted ? $coeff->dummy(1) : $coeff) * $ivs->transpose );
710             $opt{PLOT} and $y->plot_residuals( $y_pred, \%opt );
711             return $coeff unless wantarray;
712             my %ret = (y_pred => $y_pred);
713             $ret{ss_total} = $opt{CONST} ? $y->ss : sumover( $y ** 2 );
714             $ret{ss_residual} = $y->sse( $ret{y_pred} );
715             $ret{ss_model} = $ret{ss_total} - $ret{ss_residual};
716             $ret{R2} = $ret{ss_model} / $ret{ss_total};
717             my $n_var = $opt{CONST} ? $ivs->dim(1) - 1 : $ivs->dim(1);
718             $ret{F_df} = pdl( $n_var, my $df1 = $y->dim(0) - $ivs->dim(1) );
719             $ret{F} = $ret{ss_model} / $n_var
720             / ($ret{ss_residual} / $df1);
721             $ret{F_p} = 1 - $ret{F}->gsl_cdf_fdist_P( $n_var, $df1 )
722             if $CDF;
723             if (!$broadcasted) {
724             my $se_b = ones( $coeff->dims? $coeff->dims : 1 );
725             $opt{CONST} and
726             $se_b->slice(0) .= sqrt( $ret{ss_residual} / $df1 * $C->slice(0,0) );
727             # get the se for bs by successively regressing each iv by the rest ivs
728             if ($ivs->dim(1) > 1) {
729             my @coords = $opt{CONST} ? 1..$n_var : 0..$n_var-1;
730             my $ones = !$opt{CONST} ? undef : ones($ivs->dim(0));
731             for my $k (@coords) {
732             my $G = $ivs->dice_axis(1, [grep $_ != $k, @coords]);
733             $G = $ones->glue( 1, $G ) if $opt{CONST};
734             my $b_G = $ivs->slice(':',$k)->ols( $G, {CONST=>0,PLOT=>0} );
735             my $ss_res_k = $ivs->slice(':',$k)->squeeze->sse( sumover($b_G * $G->transpose) );
736             $se_b->slice($k) .= sqrt( $ret{ss_residual} / $df1 / $ss_res_k );
737             }
738             }
739             else {
740             $se_b->slice(0)
741             .= sqrt( $ret{ss_residual} / $df1 / sum( $ivs->slice(':',0)**2 ) );
742             }
743             $ret{b_se} = $se_b;
744             $ret{b_t} = $coeff / $ret{b_se};
745             $ret{b_p} = 2 * ( 1 - $ret{b_t}->abs->gsl_cdf_tdist_P( $df1 ) )
746             if $CDF;
747             }
748             for (keys %ret) { ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze };
749             $ret{b} = $coeff;
750             return %ret;
751             }
752              
753             =head2 r2_change
754              
755             =for ref
756              
757             Significance test for the incremental change in R2 when new variable(s)
758             are added to an ols regression model.
759              
760             Returns the change stats as well as stats for both models. Based on
761             L. (One way to make up for the lack of significance tests for
762             coeffs in ols_t).
763              
764             =for options
765              
766             Default options (case insensitive):
767              
768             CONST => 1,
769              
770             =for usage
771              
772             Usage:
773              
774             # suppose these are two persons' ratings for top 10 box office movies
775             # ascending sorted by box office
776              
777             pdl> p $y = qsort ceil(random(10, 2) * 5)
778             [
779             [1 1 2 2 2 3 4 4 4 4]
780             [1 2 2 3 3 3 4 4 5 5]
781             ]
782              
783             # first IV is a simple linear trend
784              
785             pdl> p $x1 = sequence 10
786             [0 1 2 3 4 5 6 7 8 9]
787              
788             # the modeler wonders if adding a quadratic trend improves the fit
789              
790             pdl> p $x2 = sequence(10) ** 2
791             [0 1 4 9 16 25 36 49 64 81]
792              
793             # two difference models are given in two pdls
794             # each as would be pass on to ols_t
795             # the 1st model includes only linear trend
796             # the 2nd model includes linear and quadratic trends
797             # when necessary use dummy dim so both models have the same ndims
798              
799             pdl> %c = $y->r2_change( $x1->dummy(1), cat($x1, $x2) )
800              
801             pdl> p "$_\t$c{$_}\n" for sort keys %c
802             # person 1 person 2
803             F_change [0.72164948 0.071283096]
804             # df same for both persons
805             F_df [1 7]
806             F_p [0.42370145 0.79717232]
807             R2_change [0.0085966043 0.00048562549]
808             model0 HASH(0x8c10828)
809             model1 HASH(0x8c135c8)
810              
811             # the answer here is no.
812              
813             =cut
814              
815             *r2_change = \&PDL::r2_change;
816             sub PDL::r2_change {
817             my ($self, $ivs0, $ivs1, $opt) = @_;
818             $ivs0->getndims == 1 and $ivs0 = $ivs0->dummy(1);
819              
820             my %ret;
821              
822             $ret{model0} = { $self->ols_t( $ivs0, $opt ) };
823             $ret{model1} = { $self->ols_t( $ivs1, $opt ) };
824              
825             $ret{R2_change} = $ret{model1}->{R2} - $ret{model0}->{R2};
826             $ret{F_df}
827             = pdl(my $df0 = $ivs1->dim(1) - $ivs0->dim(1),
828             my $df1 = $ret{model1}->{F_df}->slice('(1)') );
829             $ret{F_change}
830             = $ret{R2_change} * $df1
831             / ( (1-$ret{model1}->{R2}) * $df0 );
832             $ret{F_p} = 1 - $ret{F_change}->gsl_cdf_fdist_P( $df0, $df1 )
833             if $CDF;
834              
835             for (keys %ret) { ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze };
836              
837             %ret;
838             }
839              
840             =head1 METHODS
841              
842             =head2 anova
843              
844             =for ref
845              
846             Analysis of variance. Uses type III sum of squares for unbalanced data.
847              
848             Dependent variable should be a 1D pdl. Independent variables can be
849             passed as 1D perl array ref or 1D pdl.
850              
851             Will only calculate p-value (C) if there are more samples than the
852             product of categories of all the IVs.
853              
854             Supports bad value (by ignoring missing or BAD values in dependent and
855             independent variables list-wise).
856              
857             For more on ANOVA, see L.
858              
859             =for options
860              
861             Default options (case insensitive):
862              
863             V => 1, # carps if bad value in variables
864             IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ]
865             PLOT => 0, # plots highest order effect
866             # can set plot_means options here
867             WIN => undef, # for plotting
868              
869             =for usage
870              
871             Usage:
872              
873             # suppose this is ratings for 12 apples
874              
875             pdl> p $y = qsort ceil( random(12)*5 )
876             [1 1 2 2 2 3 3 4 4 4 5 5]
877              
878             # IV for types of apple
879              
880             pdl> p $a = sequence(12) % 3 + 1
881             [1 2 3 1 2 3 1 2 3 1 2 3]
882              
883             # IV for whether we baked the apple
884              
885             pdl> @b = qw( y y y y y y n n n n n n )
886              
887             pdl> %m = $y->anova( $a, \@b, { IVNM=>['apple', 'bake'] } )
888              
889             pdl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
890             F 2.46666666666667
891             F_df [5 6]
892             F_p 0.151168719948632
893             ms_model 3.08333333333333
894             ms_residual 1.25
895             ss_model 15.4166666666667
896             ss_residual 7.5
897             ss_total 22.9166666666667
898             | apple | F 0.466666666666667
899             | apple | F_p 0.648078345471096
900             | apple | df 2
901             | apple | m [2.75 3 3.5]
902             | apple | ms 0.583333333333334
903             | apple | se [0.85391256 0.81649658 0.64549722]
904             | apple | ss 1.16666666666667
905             | apple || err df 6
906             | apple ~ bake | F 0.0666666666666671
907             | apple ~ bake | F_p 0.936190104380701
908             | apple ~ bake | df 2
909             | apple ~ bake | m [
910             [1.5 2 2.5]
911             [ 4 4 4.5]
912             ]
913             | apple ~ bake | ms 0.0833333333333339
914             | apple ~ bake | se [
915             [0.5 1 0.5]
916             [ 1 1 0.5]
917             ]
918             | apple ~ bake | ss 0.166666666666668
919             | apple ~ bake || err df 6
920             | bake | F 11.2666666666667
921             | bake | F_p 0.015294126084452
922             | bake | df 1
923             | bake | m [2 4.1666667]
924             | bake | ms 14.0833333333333
925             | bake | se [0.36514837 0.40138649]
926             | bake | ss 14.0833333333333
927             | bake || err df 6
928              
929             This is implemented as a call to L, with an C
930             subjects vector.
931              
932             =cut
933              
934             *anova = \&PDL::anova;
935             sub PDL::anova {
936             my ($y, @args) = @_;
937             anova_rptd($y, undef, @args);
938             }
939              
940             sub _interactions {
941             my ($ivs_ref, $idv) = @_;
942             my (@inter, @idv_inter);
943             for my $nway ( 2 .. @$ivs_ref ) {
944             my $iter_idv = _combinations( $nway, [0..$#$ivs_ref] );
945             while ( my @v = &$iter_idv() ) {
946             push @inter, interaction_code(@$ivs_ref[@v]);
947             push @idv_inter, join ' ~ ', @$idv[@v];
948             }
949             }
950             (\@inter, \@idv_inter);
951             }
952              
953             # now prepare for cell mean
954             sub _interactions_cm {
955             my ($ivs_ref, $pdl_ivs_raw) = @_;
956             my ($dim0, @inter_cm, @inter_cmo) = $ivs_ref->[0]->dim(0);
957             for my $nway ( 2 .. @$ivs_ref ) {
958             my $iter_idv = _combinations( $nway, [0..$#$ivs_ref] );
959             while ( my @v = &$iter_idv() ) {
960             my @i_cm;
961             for my $o ( 0 .. $dim0 - 1 ) {
962             push @i_cm, join '', map $_->slice("($o)"), @$pdl_ivs_raw[@v];
963             }
964             my ($inter, $map) = effect_code( \@i_cm );
965             push @inter_cm, $inter;
966             # get the order to put means in correct multi dim pdl pos
967             # this is order in var_e dim(1)
968             my @levels = sort { $map->{$a} <=> $map->{$b} } keys %$map;
969             # this is order needed for cell mean
970             my @i_cmo = sort { reverse($levels[$a]) cmp reverse($levels[$b]) }
971             0 .. $#levels;
972             push @inter_cmo, pdl @i_cmo;
973             }
974             }
975             (\@inter_cmo, \@inter_cm);
976             }
977              
978             sub _cell_means {
979             my ($data, $ivs_cm_ref, $i_cmo_ref, $idv, $pdl_ivs_raw) = @_;
980             my %ind_id;
981             @ind_id{ @$idv } = 0..$#$idv;
982             my %cm;
983             my $i = 0;
984             for (@$ivs_cm_ref) {
985             confess "_cell_means passed empty ivs_cm_ref ndarray at pos $i" if $_->isempty;
986             my $last = zeroes $_->dim(0);
987             my $i_neg = which $_->slice(':',0) == -1;
988             $last->slice($i_neg) .= 1;
989             $_->where($_ == -1) .= 0;
990             $_ = $_->glue(1, $last);
991             my @v = split ' ~ ', $idv->[$i];
992             my @shape = map $pdl_ivs_raw->[$_]->uniq->nelem, @ind_id{@v};
993             my ($m, $ss) = $data->centroid( $_ );
994             $m = $m->slice($i_cmo_ref->[$i])->sever;
995             $ss = $ss->slice($i_cmo_ref->[$i])->sever;
996             $m = $m->reshape(@shape);
997             my $se = sqrt( ($ss/($_->sumover - 1)) / $_->sumover )->reshape(@shape);
998             $cm{ "| $idv->[$i] | m" } = $m;
999             $cm{ "| $idv->[$i] | se" } = $se;
1000             $i++;
1001             }
1002             \%cm;
1003             }
1004              
1005             # http://www.perlmonks.org/?node_id=371228
1006             sub _combinations {
1007             my ($num, $arr) = @_;
1008             return sub { return }
1009             if $num == 0 or $num > @$arr;
1010             my @pick;
1011             return sub {
1012             return @$arr[ @pick = ( 0 .. $num - 1 ) ]
1013             unless @pick;
1014             my $i = $#pick;
1015             $i-- until $i < 0 or $pick[$i]++ < @$arr - $num + $i;
1016             return if $i < 0;
1017             @pick[$i .. $#pick] = $pick[$i] .. $#$arr;
1018             return @$arr[@pick];
1019             };
1020             }
1021              
1022             =head2 anova_rptd
1023              
1024             =for ref
1025              
1026             Repeated measures and mixed model anova. Uses type III sum of squares.
1027              
1028             The standard error (se) for the means are based on the relevant mean
1029             squared error from the anova, ie it is pooled across levels of the effect.
1030             Will only calculate p-value (C) if there are more samples than the
1031             product of categories of all the IVs.
1032              
1033             Uses L, so supports bad values.
1034              
1035             For more on repeated measures ANOVA, see
1036             L,
1037             and for mixed models, see
1038             L.
1039              
1040             =for options
1041              
1042             Default options (case insensitive):
1043              
1044             V => 1, # carps if bad value in dv
1045             IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ]
1046             BTWN => [], # indices of between-subject IVs (matches IVNM indices)
1047             PLOT => 0, # plots highest order effect
1048             # see plot_means() for more options
1049             WIN => undef, # for plotting
1050              
1051             =for usage
1052              
1053             Usage:
1054              
1055             Some fictional data: recall_w_beer_and_wings.txt
1056              
1057             Subject Beer Wings Recall
1058             Alex 1 1 8
1059             Alex 1 2 9
1060             Alex 1 3 12
1061             Alex 2 1 7
1062             Alex 2 2 9
1063             Alex 2 3 12
1064             Brian 1 1 12
1065             Brian 1 2 13
1066             Brian 1 3 14
1067             Brian 2 1 9
1068             Brian 2 2 8
1069             Brian 2 3 14
1070             ...
1071              
1072             # rtable allows text only in 1st row and col
1073             my ($data, $idv, $subj) = rtable 'recall_w_beer_and_wings.txt';
1074              
1075             my ($b, $w, $dv) = $data->dog;
1076             # subj and IVs can be 1d pdl or @ ref
1077             # subj must be the first argument
1078             my %m = $dv->anova_rptd( $subj, $b, $w, {ivnm=>['Beer', 'Wings']} );
1079              
1080             print "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
1081              
1082             ss_residual 19.0833333333333
1083             ss_subject 24.8333333333333
1084             ss_total 133.833333333333
1085             | Beer | F 9.39130434782609
1086             | Beer | F_p 0.0547977008378944
1087             | Beer | df 1
1088             | Beer | m [10.916667 8.9166667]
1089             | Beer | ms 24
1090             | Beer | se [0.4614791 0.4614791]
1091             | Beer | ss 24
1092             | Beer || err df 3
1093             | Beer || err ms 2.55555555555556
1094             | Beer || err ss 7.66666666666667
1095             | Beer ~ Wings | F 0.510917030567687
1096             | Beer ~ Wings | F_p 0.623881438624431
1097             | Beer ~ Wings | df 2
1098             | Beer ~ Wings | m [
1099             [ 10 7]
1100             [ 10.5 9.25]
1101             [12.25 10.5]
1102             ]
1103             | Beer ~ Wings | ms 1.625
1104             | Beer ~ Wings | se [
1105             [0.89170561 0.89170561]
1106             [0.89170561 0.89170561]
1107             [0.89170561 0.89170561]
1108             ]
1109             | Beer ~ Wings | ss 3.25000000000001
1110             | Beer ~ Wings || err df 6
1111             | Beer ~ Wings || err ms 3.18055555555555
1112             | Beer ~ Wings || err ss 19.0833333333333
1113             | Wings | F 4.52851711026616
1114             | Wings | F_p 0.0632754786153548
1115             | Wings | df 2
1116             | Wings | m [8.5 9.875 11.375]
1117             | Wings | ms 16.5416666666667
1118             | Wings | se [0.67571978 0.67571978 0.67571978]
1119             | Wings | ss 33.0833333333333
1120             | Wings || err df 6
1121             | Wings || err ms 3.65277777777778
1122             | Wings || err ss 21.9166666666667
1123              
1124             For mixed model anova, ie when there are between-subject IVs
1125             involved, feed the IVs as above, but specify in BTWN which IVs are
1126             between-subject. For example, if we had added age as a between-subject
1127             IV in the above example, we would do
1128              
1129             my %m = $dv->anova_rptd( $subj, $age, $b, $w,
1130             { ivnm=>['Age', 'Beer', 'Wings'], btwn=>[0] });
1131              
1132             =cut
1133              
1134             *anova_rptd = \&PDL::anova_rptd;
1135             sub PDL::anova_rptd {
1136             my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
1137             my ($y, $subj, @ivs_raw) = @_;
1138             confess "Expected 1-D data, instead: ", $y->info if $y->ndims != 1;
1139             croak "Mismatched number of elements in DV and IV. Are you passing IVs the old-and-abandoned way?"
1140             if (ref $ivs_raw[0] eq 'ARRAY') and (@{ $ivs_raw[0] } != $y->nelem);
1141             for (@ivs_raw) {
1142             croak "too many dims in IV!" if ref $_ eq 'PDL' and $_->squeeze->ndims > 1
1143             }
1144             my %opt = (
1145             V => 1, # carps if bad value in dv
1146             IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ]
1147             ( !defined($subj) ? () : (
1148             BTWN => [], # indices of between-subject IVs (matches IVNM indices)
1149             )),
1150             PLOT => 0, # plots highest order effect
1151             WIN => undef, # for plotting
1152             );
1153             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
1154             $opt{IVNM} = [ map { "IV_$_" } 0 .. $#ivs_raw ]
1155             if !$opt{IVNM} or !@{ $opt{IVNM} };
1156             (my ($dsgn, $idv), $y, my ($sj, $ivs_ref_filtrd, $pdl_ivs_raw, $ivs_ref, $err_ref)) = $y->anova_design_matrix($subj, @ivs_raw, \%opt);
1157             confess "anova_rptd: got singular matrix for X' x X"
1158             if any +($dsgn->t x $dsgn)->det == 0;
1159             my $b_full = _ols_main(1, $y, $dsgn->t, {CONST=>0});
1160              
1161             my %ret = (ss_total => $y->ss,
1162             ss_residual => $y->sse( sumover( $b_full * $dsgn ) ));
1163              
1164             if (defined $subj) {
1165             my @full = (@$ivs_ref, @$err_ref);
1166             my $has_btwn = @{ $opt{BTWN} };
1167             my @is_btwn; $is_btwn[$_] = 1 for @{ $opt{BTWN} };
1168             my @within_inds = 0 .. $#ivs_raw;
1169             @within_inds = grep !$is_btwn[$_], @within_inds if $has_btwn;
1170             my $within_df = pdl(map $_->dim(1), @full[@within_inds])->prodover->sclr;
1171             EFFECT: for my $k (0 .. $#full) {
1172             my $e = ($k > $#$ivs_ref)? '| err' : '';
1173             my $i = ($k > $#$ivs_ref)? $k - @$ivs_ref : $k;
1174             my $i_pref = $k == $#full ? undef : "| $idv->[$i] |";
1175              
1176             if (!defined $full[$k]) { # ss_residual as error
1177             $ret{ "$i_pref$e ss" } = $ret{ss_residual};
1178             # highest ord inter for purely within design, (p-1)*(q-1)*(n-1)
1179             my $factor = (ref $full[-1] ? $full[-1] : $err_ref->[$full[-1]])->dim(1);
1180             my $df = $ret{ "$i_pref$e df" } = $factor * $within_df;
1181             die "${i_pref}residual df = 0" if $df <= 0;
1182             $ret{ "$i_pref$e ms" } = $ret{ "$i_pref$e ss" } / $df;
1183             } elsif (ref $full[$k]) { # unique error term
1184             next EFFECT
1185             unless my @G = grep $_ != $k && defined $full[$_], 0 .. $#full;
1186             my $G = ones($y->dim(0))->glue(1, grep ref $_, @full[@G]);
1187             my $b_G = $y->ols_t( $G, {CONST=>0} );
1188             my $ss = $ret{$k == $#full ? 'ss_subject' : "$i_pref$e ss"}
1189             = $y->sse(sumover($b_G * $G->transpose)) - $ret{ss_residual};
1190             if ($k != $#full) {
1191             my $df = $ret{"$i_pref$e df"} = $full[$k]->dim(1);
1192             die "residual df = 0" if $df <= 0;
1193             $ret{"$i_pref$e ms"} = $ss / $df;
1194             }
1195             } else { # repeating error term
1196             my $ss = $ret{$k == $#full ? 'ss_subject' : "$i_pref$e ss"}
1197             = $ret{"| $idv->[$full[$k]] |$e ss"};
1198             if ($k != $#full) {
1199             my $df = $ret{"$i_pref$e df"} = $ret{"| $idv->[$full[$k]] |$e df"};
1200             die "residual df = 0" if $df <= 0;
1201             $ret{"$i_pref$e ms"} = $ss / $df;
1202             }
1203             }
1204             }
1205             } else {
1206             $ret{ss_model} = $ret{ss_total} - $ret{ss_residual};
1207             $ret{F_df} = pdl(my $F_df0 = $dsgn->dim(0) - 1, my $df1 = $y->nelem - $dsgn->dim(0));
1208             $ret{ms_model} = $ret{ss_model} / $F_df0;
1209             $ret{ms_residual} = $ret{ss_residual} / $df1;
1210             $ret{F} = $ret{ms_model} / $ret{ms_residual};
1211             $ret{F_p} = 1 - $ret{F}->gsl_cdf_fdist_P( $F_df0, $df1 )
1212             if $CDF and $df1 > 0;
1213              
1214             # get IV ss from $ivs_ref instead of $dsgn pdl
1215             my $ones = ones($y->dim(0));
1216             for my $k (0 .. $#$ivs_ref) {
1217             my $G = $ones->glue(1, @$ivs_ref[grep $_ != $k, 0 .. $#$ivs_ref]);
1218             my $b_G = $y->ols_t( $G, {CONST=>0} );
1219             $ret{ "| $idv->[$k] | ss" }
1220             = $y->sse( sumover($b_G * $G->transpose) ) - $ret{ss_residual};
1221             my $df0 = $ret{ "| $idv->[$k] | df" } = $ivs_ref->[$k]->dim(1);
1222             $ret{ "| $idv->[$k] || err df" } = $df1;
1223             die "residual df = 0" if $df1 <= 0;
1224             $ret{ "| $idv->[$k] | ms" } = $ret{ "| $idv->[$k] | ss" } / $df0;
1225             }
1226             }
1227             # have all iv, inter, and error effects. get F and F_p
1228             for (0 .. $#$ivs_ref) {
1229             my $ms_residual = defined $subj ? $ret{ "| $idv->[$_] || err ms" } : $ret{ms_residual};
1230             my ($df0, $df1) = @ret{"| $idv->[$_] | df" , "| $idv->[$_] || err df"};
1231             my $F = $ret{ "| $idv->[$_] | F" } = $ret{ "| $idv->[$_] | ms" } / $ms_residual;
1232             $ret{ "| $idv->[$_] | F_p" } = 1 - $F->gsl_cdf_fdist_P($df0, $df1)
1233             if $CDF and $df1 > 0;
1234             }
1235              
1236             for (keys %ret) {ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze};
1237              
1238             my ($inter_cmo_ref, $inter_cm_ref)
1239             = _interactions_cm($ivs_ref_filtrd, $pdl_ivs_raw);
1240             # append inter info to cell means effects
1241             my $ivs_cm_ref = [@$ivs_ref_filtrd, @$inter_cm_ref];
1242             my @i_cmo_ref = map pdl(values %{ (effect_code($_))[1] })->qsort, @$pdl_ivs_raw;
1243             #line 1068 "lib/PDL/Stats/GLM.pd"
1244             push @i_cmo_ref, @$inter_cmo_ref;
1245              
1246             my $cm_ref
1247             = _cell_means( $y, $ivs_cm_ref, \@i_cmo_ref, $idv, $pdl_ivs_raw );
1248             if (defined $subj) {
1249             my @ls = map { $_->uniq->nelem } @$pdl_ivs_raw;
1250             $cm_ref
1251             = _fix_rptd_se( $cm_ref, \%ret, $opt{IVNM}, \@ls, $sj->uniq->nelem );
1252             }
1253             # integrate mean and se into %ret
1254             @ret{ keys %$cm_ref } = values %$cm_ref;
1255              
1256             my $highest = join(' ~ ', @{ $opt{IVNM} });
1257             $cm_ref->{"| $highest | m"}->plot_means( $cm_ref->{"| $highest | se"},
1258             { %opt, IVNM=>$idv } )
1259             if $opt{PLOT};
1260              
1261             %ret;
1262             }
1263              
1264             =head2 anova_design_matrix
1265              
1266             =for ref
1267              
1268             Effect-coded design matrix for anova, including repeated-measures and
1269             mixed-model. The C for use in linear regression i.e. C.
1270              
1271             Added in 0.854. See L for more.
1272              
1273             Supports bad value in the dependent and independent variables. It
1274             automatically removes bad data listwise, i.e. remove a subject's data if
1275             there is any cell missing for the subject.
1276              
1277             =for options
1278              
1279             Default options (case insensitive):
1280              
1281             V => 1, # carps if bad value in dv
1282             IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ]
1283             BTWN => [], # indices of between-subject IVs (matches IVNM indices)
1284              
1285             =for usage
1286              
1287             $matrix = $dv->anova_design_matrix(undef, $b, $w, {ivnm=>[qw(b w)]});
1288             $matrix = $dv->anova_design_matrix(
1289             $subj, $b, $w, {ivnm=>[qw(b w)]}); # repeated-measures
1290             $matrix = $dv->anova_design_matrix(
1291             $subj, $b, $w, {ivnm=>[qw(b w)], btwn=>['b']}); # mixed-model
1292             ($matrix, $ivnm_ref) = $dv->anova_design_matrix(
1293             $subj, $b, $w, {ivnm=>[qw(b w)], btwn=>['b']}); # list context also names
1294              
1295             =cut
1296              
1297             *anova_design_matrix = \&PDL::anova_design_matrix;
1298             sub PDL::anova_design_matrix {
1299             my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
1300             my ($y, $subj, @ivs_raw) = @_;
1301             confess "No IVs: did you omit 'undef' for anova?" if !@ivs_raw;
1302             confess "Expected 1-D data, instead: ", $y->info if $y->ndims != 1;
1303             for (@ivs_raw) {
1304             croak "too many dims in IV!" if ref $_ eq 'PDL' and $_->squeeze->ndims > 1;
1305             }
1306             my %opt = (
1307             V => 1, # carps if bad value in dv
1308             IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ]
1309             ( !defined($subj) ? () : (
1310             BTWN => [], # indices of between-subject IVs (matches IVNM indices)
1311             )),
1312             );
1313             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
1314             $opt{IVNM} = [ map { "IV_$_" } 0 .. $#ivs_raw ]
1315             if !$opt{IVNM} or !@{ $opt{IVNM} };
1316             my @idv_orig = @{ $opt{IVNM} };
1317             my @pdl_ivs_raw = map scalar PDL::Stats::Basic::code_ivs($_), @ivs_raw;
1318             my $pdl_ivs = pdl(\@pdl_ivs_raw);
1319             # explicit set badflag because pdl() removes badflag
1320             $pdl_ivs->badflag( scalar grep $_->badflag, @pdl_ivs_raw );
1321             my $sj;
1322             if (defined($subj)) {
1323             # delete bad data listwise ie remove subj if any cell missing
1324             $sj = PDL::Stats::Basic::code_ivs($subj);
1325             my $ibad = which( $y->isbad | nbadover($pdl_ivs->transpose) );
1326             my $sj_bad = $sj->slice($ibad)->uniq;
1327             if ($sj_bad->nelem) {
1328             warn $sj_bad->nelem . " subjects with missing data removed\n"
1329             if $opt{V};
1330             $sj = $sj->setvaltobad($_)
1331             for (list $sj_bad);
1332             my $igood = which $sj->isgood;
1333             for ($y, $sj, @pdl_ivs_raw) {
1334             $_ = $_->slice( $igood )->sever;
1335             $_->badflag(0);
1336             }
1337             }
1338             } else {
1339             ($y, $pdl_ivs) = _rm_bad_value( $y, $pdl_ivs );
1340             if ($opt{V} and $y->nelem < $pdl_ivs_raw[0]->nelem) {
1341             warn sprintf "%d subjects with missing data removed\n", $pdl_ivs_raw[0]->nelem - $y->nelem;
1342             }
1343             @pdl_ivs_raw = $pdl_ivs->dog;
1344             }
1345             my @ivs_ref_fltrd = map scalar effect_code($_), @pdl_ivs_raw;
1346             my ($ivs_inter_ref, $idv_inter) = _interactions(\@ivs_ref_fltrd, \@idv_orig);
1347             # append inter info to main effects
1348             my $ivs_ref = [@ivs_ref_fltrd, @$ivs_inter_ref];
1349             my @idv = (@idv_orig, @$idv_inter);
1350             # matches $ivs_ref, with an extra last pdl for subj effect
1351             my $err_ref = !defined($subj) ? [] :
1352             _add_errors( $sj, $ivs_ref, \@idv, \@pdl_ivs_raw, $opt{BTWN} );
1353             for (grep ref $err_ref->[$_], 0..$#$err_ref) {
1354             my ($null_row_ids, $non_null_row_ids) = $err_ref->[$_]->zcover->which_both;
1355             confess "got null columns $null_row_ids in error entry #$_ ($idv[$_])"
1356             if !$null_row_ids->isempty;
1357             }
1358             my $dsgn = PDL::glue(1, ones($y->dim(0)), @$ivs_ref, (grep ref($_), @$err_ref))->t;
1359             !wantarray ? $dsgn : ($dsgn, \@idv, $y, $sj, \@ivs_ref_fltrd, \@pdl_ivs_raw, $ivs_ref, $err_ref);
1360             }
1361              
1362             # code (btwn group) subjects. Rutherford (2011) pp 208-209
1363             sub _code_btwn {
1364             my ($subj, $btwn) = @_;
1365             my (@grp, %grp_s);
1366             for my $n (0 .. $subj->nelem - 1) {
1367             # construct string to code group membership
1368             # something not treated as BAD by code_ivs to start off marking group membership
1369             # if no $btwn, everyone ends up in the same grp
1370             my $s = '_' . join '', map $_->slice($n), @$btwn;
1371             push @grp, $s; # group membership
1372             $s .= $subj->slice($n); # keep track of total uniq subj
1373             $grp_s{$s} = 1;
1374             }
1375             my $grp = PDL::Stats::Kmeans::iv_cluster \@grp;
1376             my $spdl = zeroes $subj->dim(0), keys(%grp_s) - $grp->dim(1);
1377             my $d1 = 0;
1378             for my $g (0 .. $grp->dim(1)-1) {
1379             my $col_inds = which $grp->slice(':',$g);
1380             my $gsub = $subj->slice( $col_inds )->effect_code;
1381             my ($nobs, $nsub) = $gsub->dims;
1382             $spdl->slice($col_inds, [$d1,$d1+$nsub-1]) .= $gsub;
1383             $d1 += $nsub;
1384             }
1385             $spdl;
1386             }
1387              
1388             sub _add_errors {
1389             my ($subj, $ivs_ref, $idv, $raw_ivs, $btwn) = @_;
1390             my $spdl = _code_btwn($subj, [@$raw_ivs[@$btwn]]);
1391             # if btwn factor involved, or highest order inter for within factors
1392             # elem is undef, so that
1393             # @errors ind matches @$ivs_ref, with an extra elem at the end for subj
1394             # mark btwn factors for error terms
1395             # same error term for B(wn) and A(btwn) x B(wn) (Rutherford, p205)
1396             my %is_btwn = map +($_=>1), @$idv[ @$btwn ];
1397             my $has_btwn = keys %is_btwn;
1398             my %idv2indx = map +($idv->[$_]=>$_), 0..$#$idv;
1399             my $ie_subj;
1400             my @errors = map {
1401             my @fs = split ' ~ ', $idv->[$_];
1402             # separate bw and wn factors
1403             # if only bw, error is bw x subj
1404             # if only wn or wn and bw, error is wn x subj
1405             my @bw = !$has_btwn ? () : grep $is_btwn{$_}, @fs;
1406             my @wn = !$has_btwn ? @fs : grep !$is_btwn{$_}, @fs;
1407             $ie_subj = $_ if !defined($ie_subj) and !@wn;
1408             my $err = join ' ~ ', @wn ? @wn : @bw;
1409             # highest order inter of within factors, use ss_residual as error
1410             if ( @wn == @$raw_ivs - @$btwn ) { undef }
1411             # repeating btwn factors use ss_subject as error
1412             elsif (!@wn and $_ > $ie_subj) { $ie_subj }
1413             # repeating error term
1414             elsif ($_ > $idv2indx{$err}) { $idv2indx{$err} }
1415             elsif (@wn) { interaction_code($ivs_ref->[$_], $spdl) }
1416             else { $spdl }
1417             } 0 .. $#$ivs_ref;
1418             push @errors, $has_btwn ? $ie_subj : $spdl;
1419             \@errors;
1420             }
1421              
1422             sub _fix_rptd_se {
1423             # if ivnm lvls_ref for within ss only this can work for mixed design
1424             my ($cm_ref, $ret, $ivnm, $lvls_ref, $n) = @_;
1425             my @se = map /^\| (.+?) \| se$/ ? $1 : (), keys %$cm_ref;
1426             my @n_obs = map {
1427             my @ivs = split / ~ /, $_;
1428             my $i_ivs = which_id $ivnm, \@ivs;
1429             my $icollapsed = setops pdl(0 .. $#$ivnm), 'XOR', $i_ivs;
1430             my $collapsed = $icollapsed->nelem?
1431             pdl( @$lvls_ref[(list $icollapsed)] )->prodover
1432             : 1
1433             ;
1434             $n * $collapsed;
1435             } @se;
1436             for my $i (0 .. $#se) {
1437             $cm_ref->{"| $se[$i] | se"}
1438             .= sqrt( $ret->{"| $se[$i] || err ms"} / $n_obs[$i] );
1439             }
1440             $cm_ref;
1441             }
1442              
1443             =head2 dummy_code
1444              
1445             =for ref
1446              
1447             Dummy coding of nominal variable (perl @ ref or 1d pdl) for use in regression.
1448              
1449             Supports BAD value (missing or 'BAD' values result in the corresponding pdl elements being marked as BAD).
1450              
1451             =for usage
1452              
1453             pdl> @a = qw(a a a b b b c c c)
1454             pdl> p $a = dummy_code(\@a)
1455             [
1456             [1 1 1 0 0 0 0 0 0]
1457             [0 0 0 1 1 1 0 0 0]
1458             ]
1459              
1460             =cut
1461              
1462             *dummy_code = \&PDL::dummy_code;
1463             sub PDL::dummy_code {
1464             my ($var_ref) = @_;
1465             my $var_e = effect_code( $var_ref );
1466             $var_e->where( $var_e == -1 ) .= 0;
1467             $var_e;
1468             }
1469              
1470             =head2 effect_code
1471              
1472             =for ref
1473              
1474             Unweighted effect coding of nominal variable (perl @ ref or 1d pdl) for
1475             use in regression. returns in @ context coded pdl and % ref to level -
1476             pdl->dim(1) index.
1477              
1478             Supports BAD value (missing or 'BAD' values result in the corresponding
1479             pdl elements being marked as BAD).
1480              
1481             =for usage
1482              
1483             my @var = qw( a a a b b b c c c );
1484             my ($var_e, $map) = effect_code( \@var );
1485              
1486             print $var_e . $var_e->info . "\n";
1487              
1488             [
1489             [ 1 1 1 0 0 0 -1 -1 -1]
1490             [ 0 0 0 1 1 1 -1 -1 -1]
1491             ]
1492             PDL: Double D [9,2]
1493              
1494             print "$_\t$map->{$_}\n" for sort keys %$map
1495             a 0
1496             b 1
1497             c 2
1498              
1499             =cut
1500              
1501             *effect_code = \&PDL::effect_code;
1502             sub PDL::effect_code {
1503             my ($var_ref) = @_;
1504             my ($var, $map_ref) = PDL::Stats::Basic::code_ivs( $var_ref );
1505             my $var_max = $var->max;
1506             confess "effect_code called with only one unique value" if $var_max < 1;
1507             my $var_e = yvals( float, $var->nelem, $var_max ) == $var;
1508             $var_e->slice(which( $var == $var_max ), ) .= -1;
1509             $var_e = $var_e->setbadif( $var->isbad ) if $var->badflag;
1510             wantarray ? ($var_e, $map_ref) : $var_e;
1511             }
1512              
1513             =head2 effect_code_w
1514              
1515             =for ref
1516              
1517             Weighted effect code for nominal variable. returns in @ context coded pdl and % ref to level - pdl->dim(1) index.
1518              
1519             Supports BAD value (missing or 'BAD' values result in the corresponding pdl elements being marked as BAD).
1520              
1521             =for usage
1522              
1523             pdl> @a = qw( a a b b b c c )
1524             pdl> p $a = effect_code_w(\@a)
1525             [
1526             [ 1 1 0 0 0 -1 -1]
1527             [ 0 0 1 1 1 -1.5 -1.5]
1528             ]
1529              
1530             =cut
1531              
1532             *effect_code_w = \&PDL::effect_code_w;
1533             sub PDL::effect_code_w {
1534             my ($var_ref) = @_;
1535             my ($var_e, $map_ref) = effect_code( $var_ref );
1536             return wantarray ? ($var_e, $map_ref) : $var_e if $var_e->sum == 0;
1537             my $pos = $var_e == 1;
1538             my $neg = $var_e == -1;
1539             my $w = $pos->sumover / $neg->sumover;
1540             my $neg_ind = $neg->whichND;
1541             $var_e->indexND($neg_ind) *= $w->slice($neg_ind->slice('(1)'));
1542             wantarray ? ($var_e, $map_ref) : $var_e;
1543             }
1544              
1545             =head2 interaction_code
1546              
1547             Returns the coded interaction term for effect-coded variables.
1548              
1549             Supports BAD value (missing or 'BAD' values result in the corresponding
1550             pdl elements being marked as BAD).
1551              
1552             =for usage
1553              
1554             pdl> $a = sequence(6) > 2
1555             pdl> p $a = $a->effect_code
1556             [
1557             [ 1 1 1 -1 -1 -1]
1558             ]
1559              
1560             pdl> $b = pdl( qw( 0 1 2 0 1 2 ) )
1561             pdl> p $b = $b->effect_code
1562             [
1563             [ 1 0 -1 1 0 -1]
1564             [ 0 1 -1 0 1 -1]
1565             ]
1566              
1567             pdl> p $ab = interaction_code( $a, $b )
1568             [
1569             [ 1 0 -1 -1 -0 1]
1570             [ 0 1 -1 -0 -1 1]
1571             ]
1572              
1573             =cut
1574              
1575             *interaction_code = \&PDL::interaction_code;
1576             sub PDL::interaction_code {
1577             my $i = ones( $_[0]->dim(0), 1 );
1578             $i = ($i * $_->dummy(1))->clump(1,2) for @_;
1579             $i;
1580             }
1581              
1582             =head2 ols
1583              
1584             =for ref
1585              
1586             Ordinary least squares regression, aka linear regression.
1587              
1588             Unlike B, ols is not broadcastable, but it can handle bad value (by
1589             ignoring observations with bad value in dependent or independent variables
1590             list-wise) and returns the full model in list context with various stats.
1591              
1592             IVs ($x) should be pdl dims $y->nelem or $y->nelem x n_iv. Do not supply
1593             the constant vector in $x. Intercept is automatically added and returned
1594             as FIRST of the coeffs if CONST=>1. Returns full model in list context
1595             and coeff in scalar context.
1596              
1597             For more on multiple linear regression see
1598             L.
1599              
1600             =for options
1601              
1602             Default options (case insensitive):
1603              
1604             CONST => 1,
1605             PLOT => 0, # see plot_residuals() for plot options
1606             WIN => undef, # for plotting
1607              
1608             =for usage
1609              
1610             Usage:
1611              
1612             # suppose this is a person's ratings for top 10 box office movies
1613             # ascending sorted by box office
1614              
1615             pdl> $y = pdl '[1 1 2 2 2 2 4 4 5 5]'
1616              
1617             # construct IV with linear and quadratic component
1618              
1619             pdl> p $x = cat sequence(10), sequence(10)**2
1620             [
1621             [ 0 1 2 3 4 5 6 7 8 9]
1622             [ 0 1 4 9 16 25 36 49 64 81]
1623             ]
1624              
1625             pdl> %m = $y->ols( $x )
1626              
1627             pdl> p "$_\t@{[$m{$_} =~ /^\n*(.*?)\n*\z/s]}\n" for sort keys %m
1628              
1629             F 40.4225352112676
1630             F_df [2 7]
1631             F_p 0.000142834216344756
1632             R2 0.920314253647587
1633              
1634             # coeff constant linear quadratic
1635             b [0.981818 0.212121 0.030303]
1636             b_p [0.039910 0.328001 0.203034]
1637             b_se [0.389875 0.201746 0.021579]
1638             b_t [2.518284 1.051422 1.404218]
1639             ss_model 19.8787878787879
1640             ss_residual 1.72121212121212
1641             ss_total 21.6
1642             y_pred [0.98181818 1.2242424 1.5272727 ... 4.6181818 5.3454545]
1643              
1644             =cut
1645              
1646             *ols = \&PDL::ols;
1647             sub PDL::ols {
1648             _ols_common(0, @_);
1649             }
1650              
1651             # ivs = [nobs x nivs] so can `dog` retval
1652             sub _rm_bad_value {
1653             my ($y, $ivs) = @_;
1654             return ($y, $ivs, undef) if !$y->check_badflag and !$ivs->check_badflag;
1655             my $idx = which($y->isgood & (nbadover ($ivs->transpose)==0));
1656             $_ = $_->slice($idx)->sever for $y, $ivs;
1657             $_->badflag(0) for $y, $ivs;
1658             ($y, $ivs, $idx);
1659             }
1660              
1661             =head2 ols_rptd
1662              
1663             =for ref
1664              
1665             Repeated measures linear regression.
1666             Handles purely within-subject design for now.
1667              
1668             (Lorch & Myers, 1990; Van den Noortgate & Onghena, 2006).
1669             See F for an example using the Lorch and Myers data.
1670              
1671             =for usage
1672              
1673             Usage:
1674              
1675             # This is the example from Lorch and Myers (1990),
1676             # a study on how characteristics of sentences affected reading time
1677             # Three within-subject IVs:
1678             # SP -- serial position of sentence
1679             # WORDS -- number of words in sentence
1680             # NEW -- number of new arguments in sentence
1681              
1682             # $subj can be 1D pdl or @ ref and must be the first argument
1683             # IV can be 1D @ ref or pdl
1684             # 1D @ ref is effect coded internally into pdl
1685             # pdl is left as is
1686              
1687             my %r = $rt->ols_rptd( $subj, $sp, $words, $new );
1688              
1689             print "$_\t$r{$_}\n" for sort keys %r;
1690              
1691             ss_residual 58.3754646504336
1692             ss_subject 51.8590337714286
1693             ss_total 405.188241771429
1694             # SP WORDS NEW
1695             F [ 7.208473 61.354153 1.0243311]
1696             F_p [0.025006181 2.619081e-05 0.33792837]
1697             coeff [0.33337285 0.45858933 0.15162986]
1698             df [1 1 1]
1699             df_err [9 9 9]
1700             ms [ 18.450705 73.813294 0.57026483]
1701             ms_err [ 2.5595857 1.2030692 0.55671923]
1702             ss [ 18.450705 73.813294 0.57026483]
1703             ss_err [ 23.036272 10.827623 5.0104731]
1704              
1705             =cut
1706              
1707             *ols_rptd = \&PDL::ols_rptd;
1708             sub PDL::ols_rptd {
1709             my ($y, $subj, @ivs_raw) = @_;
1710              
1711             $y = $y->squeeze;
1712             $y->getndims > 1 and
1713             croak "ols_rptd does not support broadcasting";
1714              
1715             my @ivs = map { (ref $_ eq 'PDL' and $_->ndims > 1)? $_
1716             : ref $_ eq 'PDL' ? $_->dummy(1)
1717             : scalar effect_code($_)
1718             ;
1719             } @ivs_raw;
1720              
1721             my %r;
1722              
1723             $r{ss_total} = $y->ss;
1724              
1725             # STEP 1: subj
1726             my $s = effect_code $subj;
1727             my $b_s = $y->ols_t($s);
1728             my $pred = sumover($b_s->slice('1:-1') * $s->transpose) + $b_s->slice(0);
1729             $r{ss_subject} = $r{ss_total} - $y->sse( $pred );
1730              
1731             # STEP 2: add predictor variables
1732             my $iv_p = $s->glue(1, @ivs);
1733             my $b_p = $y->ols_t($iv_p);
1734              
1735             # only care about coeff for predictor vars. no subj or const coeff
1736             $r{coeff} = $b_p->slice([-@ivs,-1])->sever;
1737              
1738             # get total sse for this step
1739             $pred = sumover($b_p->slice('1:-1') * $iv_p->transpose) + $b_p->slice(0);
1740             my $ss_pe = $y->sse( $pred );
1741              
1742             # get predictor ss by successively reducing the model
1743             $r{ss} = zeroes scalar(@ivs);
1744             for my $i (0 .. $#ivs) {
1745             my $iv = $s->glue(1, @ivs[ grep $_ != $i, 0..$#ivs ]);
1746             my $b = $y->ols_t($iv);
1747             $pred = sumover($b->slice('1:-1') * $iv->transpose) + $b->slice(0);
1748             $r{ss}->slice($i) .= $y->sse($pred) - $ss_pe;
1749             }
1750              
1751             # STEP 3: get predictor x subj interaction as error term
1752             my $iv_e = PDL::glue 1, map interaction_code( $s, $_ ), @ivs;
1753              
1754             # get total sse for this step. full model now.
1755             my $b_f = $y->ols_t( $iv_p->glue(1,$iv_e) );
1756             $pred = sumover($b_f->slice('1:-1') * $iv_p->glue(1,$iv_e)->transpose) + $b_f->slice(0);
1757             $r{ss_residual} = $y->sse( $pred );
1758              
1759             # get predictor x subj ss by successively reducing the error term
1760             $r{ss_err} = zeroes scalar(@ivs);
1761             for my $i (0 .. $#ivs) {
1762             my $iv = $iv_p->glue(1, map interaction_code($s, $_),
1763             @ivs[grep $_ != $i, 0..$#ivs]);
1764             my $b = $y->ols_t($iv);
1765             my $pred = sumover($b->slice('1:-1') * $iv->transpose) + $b->slice(0);
1766             $r{ss_err}->slice($i) .= $y->sse($pred) - $r{ss_residual};
1767             }
1768              
1769             # Finally, get MS, F, etc
1770              
1771             $r{df} = pdl( map $_->squeeze->ndims, @ivs );
1772             $r{ms} = $r{ss} / $r{df};
1773              
1774             $r{df_err} = $s->dim(1) * $r{df};
1775             $r{ms_err} = $r{ss_err} / $r{df_err};
1776              
1777             $r{F} = $r{ms} / $r{ms_err};
1778              
1779             $r{F_p} = 1 - $r{F}->gsl_cdf_fdist_P( $r{df}, $r{df_err} )
1780             if $CDF;
1781              
1782             %r;
1783             }
1784              
1785             =head2 logistic
1786              
1787             =for ref
1788              
1789             Logistic regression with maximum likelihood estimation using L.
1790              
1791             IVs ($x) should be pdl dims $y->nelem or $y->nelem x n_iv. Do not supply
1792             the constant vector in $x. It is included in the model and returned as
1793             LAST of coeff. Returns full model in list context and coeff in scalar
1794             context.
1795              
1796             The significance tests are likelihood ratio tests (-2LL deviance)
1797             tests. IV significance is tested by comparing deviances between the
1798             reduced model (ie with the IV in question removed) and the full model.
1799              
1800             ***NOTE: the results here are qualitatively similar to but not identical
1801             with results from R, because different algorithms are used for the
1802             nonlinear parameter fit. Use with discretion***
1803              
1804             =for options
1805              
1806             Default options (case insensitive):
1807              
1808             INITP => zeroes( $x->dim(1) + 1 ), # n_iv + 1
1809             MAXIT => 1000,
1810             EPS => 1e-7,
1811              
1812             =for usage
1813              
1814             Usage:
1815              
1816             # suppose this is whether a person had rented 10 movies
1817              
1818             pdl> p $y = ushort( random(10)*2 )
1819             [0 0 0 1 1 0 0 1 1 1]
1820              
1821             # IV 1 is box office ranking
1822              
1823             pdl> p $x1 = sequence(10)
1824             [0 1 2 3 4 5 6 7 8 9]
1825              
1826             # IV 2 is whether the movie is action- or chick-flick
1827              
1828             pdl> p $x2 = sequence(10) % 2
1829             [0 1 0 1 0 1 0 1 0 1]
1830              
1831             # concatenate the IVs together
1832              
1833             pdl> p $x = cat $x1, $x2
1834             [
1835             [0 1 2 3 4 5 6 7 8 9]
1836             [0 1 0 1 0 1 0 1 0 1]
1837             ]
1838              
1839             pdl> %m = $y->logistic( $x )
1840              
1841             pdl> p "$_\t$m{$_}\n" for sort keys %m
1842              
1843             D0 13.8629436111989
1844             Dm 9.8627829791575
1845             Dm_chisq 4.00016063204141
1846             Dm_df 2
1847             Dm_p 0.135324414081692
1848             # ranking genre constant
1849             b [0.41127706 0.53876358 -2.1201285]
1850             b_chisq [ 3.5974504 0.16835559 2.8577151]
1851             b_p [0.057868258 0.6815774 0.090936587]
1852             iter 12
1853             y_pred [0.10715577 0.23683909 ... 0.76316091 0.89284423]
1854              
1855             # to get the covariance out, supply a true value for the COV option:
1856             pdl> %m = $y->logistic( $x, {COV=>1} )
1857             pdl> p $m{cov};
1858              
1859             =cut
1860              
1861             *logistic = \&PDL::logistic;
1862             sub PDL::logistic {
1863             require PDL::Fit::LM;
1864             my ( $self, $ivs, $opt ) = @_;
1865             $self = $self->squeeze;
1866             # make compatible w multiple var cases
1867             $ivs->getndims == 1 and $ivs = $ivs->dummy(1);
1868             $self->dim(0) != $ivs->dim(0) and
1869             carp "mismatched n btwn DV and IV!";
1870             my %opt = (
1871             INITP => zeroes( $ivs->dim(1) + 1 ), # n_ivs + 1
1872             MAXIT => 1000,
1873             EPS => 1e-7,
1874             );
1875             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
1876             # not using it atm
1877             $opt{WT} = 1;
1878             # Use lmfit. Fourth input argument is reference to user-defined
1879             # copy INITP so we have the original value when needed
1880             my ($yfit,$coeff,$cov,$iter)
1881             = PDL::Fit::LM::lmfit($ivs, $self, $opt{WT}, \&_logistic, $opt{INITP}->copy,
1882             { Maxiter=>$opt{MAXIT}, Eps=>$opt{EPS} } );
1883             # apparently at least coeff is child of some pdl
1884             # which is changed in later lmfit calls
1885             $yfit = $yfit->copy;
1886             $coeff = $coeff->copy;
1887             return $coeff unless wantarray;
1888             my %ret;
1889             my $n0 = $self->where($self == 0)->nelem;
1890             my $n1 = $self->nelem - $n0;
1891             $ret{cov} = $cov if $opt{COV};
1892             $ret{D0} = -2*($n0 * log($n0 / $self->nelem) + $n1 * log($n1 / $self->nelem));
1893             $ret{Dm} = sum( $self->dvrs( $yfit ) ** 2 );
1894             $ret{Dm_chisq} = $ret{D0} - $ret{Dm};
1895             $ret{Dm_df} = $ivs->dim(1);
1896             $ret{Dm_p}
1897             = 1 - PDL::GSL::CDF::gsl_cdf_chisq_P( $ret{Dm_chisq}, $ret{Dm_df} )
1898             if $CDF;
1899             my $coeff_chisq = zeroes $opt{INITP}->nelem;
1900             if ( $ivs->dim(1) > 1 ) {
1901             for my $k (0 .. $ivs->dim(1)-1) {
1902             my @G = grep { $_ != $k } (0 .. $ivs->dim(1)-1);
1903             my $G = $ivs->dice_axis(1, \@G);
1904             my $init = $opt{INITP}->dice([ @G, $opt{INITP}->dim(0)-1 ])->copy;
1905             my $y_G
1906             = PDL::Fit::LM::lmfit( $G, $self, $opt{WT}, \&_logistic, $init,
1907             { Maxiter=>$opt{MAXIT}, Eps=>$opt{EPS} } );
1908             $coeff_chisq->slice($k) .= $self->dm( $y_G ) - $ret{Dm};
1909             }
1910             }
1911             else {
1912             # d0 is, by definition, the deviance with only intercept
1913             $coeff_chisq->slice(0) .= $ret{D0} - $ret{Dm};
1914             }
1915             my $y_c
1916             = PDL::Fit::LM::lmfit( $ivs, $self, $opt{WT}, \&_logistic_no_intercept, $opt{INITP}->slice('0:-2')->sever,
1917             { Maxiter=>$opt{MAXIT}, Eps=>$opt{EPS} } );
1918             $coeff_chisq->slice(-1) .= $self->dm( $y_c ) - $ret{Dm};
1919             $ret{b} = $coeff;
1920             $ret{b_chisq} = $coeff_chisq;
1921             $ret{b_p} = 1 - $ret{b_chisq}->gsl_cdf_chisq_P( 1 )
1922             if $CDF;
1923             $ret{y_pred} = $yfit;
1924             $ret{iter} = $iter;
1925             for (keys %ret) { ref $ret{$_} eq 'PDL' and $ret{$_} = $ret{$_}->squeeze };
1926             %ret;
1927             }
1928              
1929             sub _logistic {
1930             my ($x,$par,$ym,$dyda) = @_;
1931             # $b and $c are fit parameters slope and intercept
1932             my $b = $par->slice([0,$x->dim(1) - 1])->sever;
1933             my $c = $par->slice(-1)->sever;
1934             # Write function with dependent variable $ym,
1935             # independent variable $x, and fit parameters as specified above.
1936             # Use the .= (dot equals) assignment operator to express the equality
1937             # (not just a plain equals)
1938             $ym .= 1 / ( 1 + exp( -1 * (sumover($b * $x->transpose) + $c) ) );
1939             my @dy = map $dyda->slice(",($_)"), 0 .. $par->dim(0)-1;
1940             # Partial derivative of the function with respect to each slope
1941             # fit parameter ($b in this case). Again, note .= assignment
1942             # operator (not just "equals")
1943             $dy[$_] .= $x->slice(':',$_) * $ym * (1 - $ym)
1944             for (0 .. $b->dim(0)-1);
1945             # Partial derivative of the function re intercept par
1946             $dy[-1] .= $ym * (1 - $ym);
1947             }
1948              
1949             sub _logistic_no_intercept {
1950             my ($x,$par,$ym,$dyda) = @_;
1951             my $b = $par->slice([0,$x->dim(1) - 1])->sever;
1952             # Write function with dependent variable $ym,
1953             # independent variable $x, and fit parameters as specified above.
1954             # Use the .= (dot equals) assignment operator to express the equality
1955             # (not just a plain equals)
1956             $ym .= 1 / ( 1 + exp( -1 * sumover($b * $x->transpose) ) );
1957             my (@dy) = map {$dyda -> slice(",($_)") } (0 .. $par->dim(0)-1);
1958             # Partial derivative of the function with respect to each slope
1959             # fit parameter ($b in this case). Again, note .= assignment
1960             # operator (not just "equals")
1961             $dy[$_] .= $x->slice(':',$_) * $ym * (1 - $ym)
1962             for 0 .. $b->dim(0)-1;
1963             }
1964              
1965             =head2 pca
1966              
1967             =for ref
1968              
1969             Principal component analysis. Based on corr instead of cov.
1970              
1971             Bad values are ignored pair-wise. OK when bad values are few but otherwise
1972             probably should fill_m etc before pca). Uses L.
1973              
1974             =for options
1975              
1976             Default options (case insensitive):
1977              
1978             CORR => 1, # boolean. use correlation or covariance
1979             PLOT => 0, # calls plot_screes by default
1980             # can set plot_screes options here
1981             WIN => undef, # for plotting
1982              
1983             =for usage
1984              
1985             Usage:
1986              
1987             my $d = qsort random 10, 5; # 10 obs on 5 variables
1988             my %r = $d->pca( \%opt );
1989             print "$_\t$r{$_}\n" for (keys %r);
1990              
1991             eigenvalue # variance accounted for by each component
1992             [4.70192 0.199604 0.0471421 0.0372981 0.0140346]
1993              
1994             eigenvector # dim var x comp. weights for mapping variables to component
1995             [
1996             [ -0.451251 -0.440696 -0.457628 -0.451491 -0.434618]
1997             [ -0.274551 0.582455 0.131494 0.255261 -0.709168]
1998             [ 0.43282 0.500662 -0.139209 -0.735144 -0.0467834]
1999             [ 0.693634 -0.428171 0.125114 0.128145 -0.550879]
2000             [ 0.229202 0.180393 -0.859217 0.4173 0.0503155]
2001             ]
2002              
2003             loadings # dim var x comp. correlation between variable and component
2004             [
2005             [ -0.978489 -0.955601 -0.992316 -0.97901 -0.942421]
2006             [ -0.122661 0.260224 0.0587476 0.114043 -0.316836]
2007             [ 0.0939749 0.108705 -0.0302253 -0.159616 -0.0101577]
2008             [ 0.13396 -0.0826915 0.0241629 0.0247483 -0.10639]
2009             [ 0.027153 0.0213708 -0.101789 0.0494365 0.00596076]
2010             ]
2011              
2012             pct_var # percent variance accounted for by each component
2013             [0.940384 0.0399209 0.00942842 0.00745963 0.00280691]
2014              
2015             Plot scores along the first two components,
2016              
2017             $d->plot_scores( $r{eigenvector} );
2018              
2019             =cut
2020              
2021             *pca = \&PDL::pca;
2022             sub PDL::pca {
2023             my ($self, $opt) = @_;
2024             my %opt = (
2025             CORR => 1,
2026             PLOT => 0,
2027             WIN => undef, # for plotting
2028             );
2029             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
2030             my $var_var = $opt{CORR}? $self->corr_table : $self->cov_table;
2031             # value is axis pdl and score is var x axis
2032             my ($eigvec, $eigval) = $var_var->eigens_sym;
2033             $eigvec = $eigvec->transpose; # compatibility with PDL::Slatec::eigsys
2034             # ind is sticky point for broadcasting
2035             my $ind_sorted = $eigval->qsorti->slice('-1:0');
2036             $eigvec = $eigvec->slice(':',$ind_sorted)->sever;
2037             $eigval = $eigval->slice($ind_sorted)->sever;
2038             # var x axis
2039             my $var = $eigval / $eigval->sum->sclr;
2040             my $loadings;
2041             if ($opt{CORR}) {
2042             $loadings = $eigvec * sqrt( $eigval->transpose );
2043             }
2044             else {
2045             my $scores = $eigvec x $self->dev_m;
2046             $loadings = $self->corr( $scores->dummy(1) );
2047             }
2048             $var->plot_screes(\%opt)
2049             if $opt{PLOT};
2050             ( eigenvalue=>$eigval, eigenvector=>$eigvec,
2051             pct_var=>$var, loadings=>$loadings );
2052             }
2053              
2054             =head2 pca_sorti
2055              
2056             Determine by which vars a component is best represented. Descending sort
2057             vars by size of association with that component. Returns sorted var and
2058             relevant component indices.
2059              
2060             =for options
2061              
2062             Default options (case insensitive):
2063              
2064             NCOMP => 10, # maximum number of components to consider
2065              
2066             =for usage
2067              
2068             Usage:
2069              
2070             # let's see if we replicated the Osgood et al. (1957) study
2071             pdl> ($data, $idv, $ido) = rtable 'osgood_exp.csv', {v=>0}
2072              
2073             # select a subset of var to do pca
2074             pdl> $ind = which_id $idv, [qw( ACTIVE BASS BRIGHT CALM FAST GOOD HAPPY HARD LARGE HEAVY )]
2075             pdl> $data = $data( ,$ind)->sever
2076             pdl> @$idv = @$idv[list $ind]
2077              
2078             pdl> %m = $data->pca
2079              
2080             pdl> ($iv, $ic) = $m{loadings}->pca_sorti()
2081              
2082             pdl> p "$idv->[$_]\t" . $m{loadings}->($_,$ic)->flat . "\n" for (list $iv)
2083              
2084             # COMP0 COMP1 COMP2 COMP3
2085             HAPPY [0.860191 0.364911 0.174372 -0.10484]
2086             GOOD [0.848694 0.303652 0.198378 -0.115177]
2087             CALM [0.821177 -0.130542 0.396215 -0.125368]
2088             BRIGHT [0.78303 0.232808 -0.0534081 -0.0528796]
2089             HEAVY [-0.623036 0.454826 0.50447 0.073007]
2090             HARD [-0.679179 0.0505568 0.384467 0.165608]
2091             ACTIVE [-0.161098 0.760778 -0.44893 -0.0888592]
2092             FAST [-0.196042 0.71479 -0.471355 0.00460276]
2093             LARGE [-0.241994 0.594644 0.634703 -0.00618055]
2094             BASS [-0.621213 -0.124918 0.0605367 -0.765184]
2095              
2096             =cut
2097              
2098             *pca_sorti = \&PDL::pca_sorti;
2099             sub PDL::pca_sorti {
2100             # $self is pdl (var x component)
2101             my ($self, $opt) = @_;
2102             my %opt = (
2103             NCOMP => 10, # maximum number of components to consider
2104             );
2105             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
2106             my $ncomp = pdl($opt{NCOMP}, $self->dim(1))->min;
2107             $self = $self->dice_axis( 1, pdl(0..$ncomp-1) );
2108             my $icomp = $self->transpose->abs->maximum_ind;
2109             # sort between comp
2110             my $ivar_sort = $icomp->qsorti;
2111             $self = $self->slice($ivar_sort)->sever;
2112             # sort within comp
2113             my $ic = $icomp->slice($ivar_sort)->iv_cluster;
2114             for my $comp (0 .. $ic->dim(1)-1) {
2115             my $i = $self->slice(which($ic->slice(':',$comp)), "($comp)")->qsorti->slice('-1:0');
2116             $ivar_sort->slice(which $ic->slice(':',$comp))
2117             .= $ivar_sort->slice(which $ic->slice(':',$comp))->slice($i);
2118             }
2119             wantarray ? ($ivar_sort, pdl(0 .. $ic->dim(1)-1)) : $ivar_sort;
2120             }
2121              
2122             =head2 plot_means
2123              
2124             Plots means anova style. Can handle up to 4-way interactions (ie 4D pdl).
2125              
2126             =for options
2127              
2128             Default options (case insensitive):
2129              
2130             IVNM => ['IV_0', 'IV_1', 'IV_2', 'IV_3'],
2131             DVNM => 'DV',
2132             AUTO => 1, # auto set dims to be on x-axis, line, panel
2133             # if set 0, dim 0 goes on x-axis, dim 1 as lines
2134             # dim 2+ as panels
2135             # see PDL::Graphics::Simple for next option
2136             WIN => undef, # pgswin object. not closed here if passed
2137             # allows comparing multiple lines in same plot
2138             SIZE => 640, # individual square panel size in pixels
2139              
2140             =for usage
2141              
2142             Usage:
2143              
2144             # see anova for mean / se pdl structure
2145             $mean->plot_means( $se, {IVNM=>['apple', 'bake']} );
2146              
2147             Or like this:
2148              
2149             $m{'| apple ~ bake | m'}->plot_means;
2150              
2151             =cut
2152              
2153             *plot_means = \&PDL::plot_means;
2154             sub PDL::plot_means {
2155             require PDL::Graphics::Simple;
2156             my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
2157             my ($self, $se) = @_;
2158             $self = $self->squeeze;
2159             if ($self->ndims > 4) {
2160             carp "Data is > 4D. No plot here.";
2161             return;
2162             }
2163             my %opt = (
2164             IVNM => ['IV_0', 'IV_1', 'IV_2', 'IV_3'],
2165             DVNM => 'DV',
2166             AUTO => 1, # auto set vars to be on X axis, line, panel
2167             WIN => undef, # PDL::Graphics::Simple object
2168             SIZE => 640, # individual square panel size in pixels
2169             );
2170             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt }
2171             # decide which vars to plot as x axis, lines, panels
2172             # put var w most levels on x axis
2173             # put var w least levels on diff panels
2174             my @iD = 0..3;
2175             my @dims = (1, 1, 1, 1);
2176             # splice ARRAY,OFFSET,LENGTH,LIST
2177             splice @dims, 0, $self->ndims, $self->dims;
2178             $self = $self->reshape(@dims)->sever;
2179             $se = $se->reshape(@dims)->sever
2180             if defined $se;
2181             @iD = reverse list qsorti pdl @dims
2182             if $opt{AUTO};
2183             # $iD[0] on x axis
2184             # $iD[1] as separate lines
2185             my $nx = $self->dim($iD[2]); # n xpanels
2186             my $ny = $self->dim($iD[3]); # n ypanels
2187             my $w = $opt{WIN} || PDL::Graphics::Simple::pgswin(
2188             size=>[$opt{SIZE}*$nx, $opt{SIZE}*$ny,'px']);
2189             my $seq0 = sequence(my $dim0 = $self->dim($iD[0]));
2190             my ($pcount, @plots) = 0;
2191             for my $y (0..$ny-1) {
2192             for my $x (0..$nx-1) {
2193             my $key_prefix = "$opt{IVNM}[$iD[0]]|";
2194             $key_prefix .= $opt{IVNM}[$iD[2]] . "=$x|" if $nx > 1;
2195             $key_prefix .= $opt{IVNM}[$iD[3]] . "=$y|" if $ny > 1;
2196             for (0 .. $self->dim($iD[1]) - 1) {
2197             my $ke = "$key_prefix$opt{IVNM}[$iD[1]]=$_";
2198             my $ydiced = $self->dice_axis($iD[3],$y)->dice_axis($iD[2],$x)->dice_axis($iD[1],$_)->squeeze;
2199             push @plots, with=>'lines', ke=>"$ke mean", style=>$pcount,
2200             $seq0+$pcount*0.05, $ydiced;
2201             push @plots, with=>'errorbars', ke=>"$ke error", style=>$pcount,
2202             $seq0+$pcount*0.05, $ydiced,
2203             $se->dice_axis($iD[3],$y)->dice_axis($iD[2],$x)
2204             ->dice_axis($iD[1],$_)->squeeze
2205             if defined($se);
2206             $pcount++;
2207             }
2208             }
2209             }
2210             my ($ymin, $ymax) = pdl($self, !defined $se ? () : ($self+$se, $self-$se))->minmax;
2211             $w->plot(@plots,
2212             { xlabel=>$opt{IVNM}[$iD[0]], ylabel=>$opt{DVNM},
2213             xrange=>[-0.05,$dim0-1+$pcount*0.05], yrange=>[$ymin-0.05,$ymax+0.05] }
2214             );
2215             $w;
2216             }
2217              
2218             =head2 plot_residuals
2219              
2220             Plots residuals against predicted values.
2221              
2222             =for usage
2223              
2224             Usage:
2225              
2226             use PDL::Graphics::Simple;
2227             $w = pgswin();
2228             $y->plot_residuals( $y_pred, { win=>$w } );
2229              
2230             =for options
2231              
2232             Default options (case insensitive):
2233              
2234             # see PDL::Graphics::Simple for more info
2235             WIN => undef, # pgswin object. not closed here if passed
2236             # allows comparing multiple lines in same plot
2237             SIZE => 640, # plot size in pixels
2238             COLOR => 1,
2239              
2240             =cut
2241              
2242             *plot_residuals = \&PDL::plot_residuals;
2243             sub PDL::plot_residuals {
2244             require PDL::Graphics::Simple;
2245             my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
2246             my ($y, $y_pred) = @_;
2247             my %opt = (
2248             # see PDL::Graphics::Simple for next options
2249             WIN => undef, # pgswin object. not closed here if passed
2250             # allows comparing multiple lines in same plot
2251             SIZE => 640, # plot size in pixels
2252             COLOR => 1,
2253             );
2254             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
2255             my $residuals = $y - $y_pred;
2256             my $win = $opt{WIN} || PDL::Graphics::Simple::pgswin(size=>[@opt{qw(SIZE SIZE)}, 'px']);
2257             $win->plot(
2258             with=>'points', style=>$opt{COLOR}, $y_pred, $residuals,
2259             with=>'lines', style=>$opt{COLOR}, pdl($y_pred->minmax), pdl(0,0), # 0-line
2260             {xlabel=>'predicted value', ylabel=>'residuals'},
2261             );
2262             }
2263              
2264             =head2 plot_scores
2265              
2266             Plots standardized original and PCA transformed scores against two components. (Thank you, Bob MacCallum, for the documentation suggestion that led to this function.)
2267              
2268             =for options
2269              
2270             Default options (case insensitive):
2271              
2272             CORR => 1, # boolean. PCA was based on correlation or covariance
2273             COMP => [0,1], # indices to components to plot
2274             # see PDL::Graphics::Simple for next options
2275             WIN => undef, # pgswin object. not closed here if passed
2276             # allows comparing multiple lines in same plot
2277             SIZE => 640, # plot size in pixels
2278             COLOR => [1,2], # color for original and rotated scores
2279              
2280             =for usage
2281              
2282             Usage:
2283              
2284             my %p = $data->pca();
2285             $data->plot_scores( $p{eigenvector}, \%opt );
2286              
2287             =cut
2288              
2289             *plot_scores = \&PDL::plot_scores;
2290             sub PDL::plot_scores {
2291             require PDL::Graphics::Simple;
2292             my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
2293             my ($self, $eigvec) = @_;
2294             my %opt = (
2295             CORR => 1, # boolean. PCA was based on correlation or covariance
2296             COMP => [0,1], # indices to components to plot
2297             # see PDL::Graphics::Simple for next options
2298             WIN => undef, # pgswin object. not closed here if passed
2299             # allows comparing multiple lines in same plot
2300             SIZE => 640, # plot size in pixels
2301             COLOR => [1,2], # color for original and transformed scoress
2302             );
2303             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt }
2304             my $i = pdl $opt{COMP};
2305             my $z = $opt{CORR} ? $self->stddz : $self->dev_m;
2306             # transformed normed values
2307             my $scores = sumover($eigvec->slice(':',$i) * $z->transpose->dummy(1))->transpose;
2308             $z = $z->slice(':',$i)->sever;
2309             my $win = $opt{WIN} || PDL::Graphics::Simple::pgswin(size=>[@opt{qw(SIZE SIZE)}, 'px']);
2310             $win->plot(
2311             with=>'points', style=>$opt{COLOR}[0], ke=>'original', $z->slice(',(0)'), $z->slice(',(1)'),
2312             with=>'points', style=>$opt{COLOR}[1], ke=>'transformed', $scores->slice(',(0)'), $scores->slice(',(1)'),
2313             {xlabel=>"Component $opt{COMP}[0]", ylabel=>"Component $opt{COMP}[1]"},
2314             );
2315             }
2316              
2317             =head2 plot_screes
2318              
2319             Scree plot. Plots proportion of variance accounted for by PCA components.
2320              
2321             =for options
2322              
2323             Default options (case insensitive):
2324              
2325             NCOMP => 20, # max number of components to plot
2326             CUT => 0, # set to plot cutoff line after this many components
2327             # undef to plot suggested cutoff line for NCOMP comps
2328             # see PDL::Graphics::Simple for next options
2329             WIN => undef, # pgswin object. not closed here if passed
2330             # allows comparing multiple lines in same plot
2331             SIZE => 640, # plot size in pixels
2332              
2333             =for usage
2334              
2335             Usage:
2336              
2337             # variance should be in descending order
2338             $d = qsort random 10, 5; # 10 obs on 5 variables
2339             %pca = $d->pca( \%opt );
2340             $pca{pct_var}->plot_screes( {ncomp=>16, win=>$win=PDL::Graphics::Simple::pgswin()} );
2341              
2342             Or, because NCOMP is used so often, it is allowed a shortcut,
2343              
2344             $pca{pct_var}->plot_screes( 16 );
2345              
2346             =cut
2347              
2348             *plot_scree = \&PDL::plot_screes; # here for now for compatibility
2349             *plot_screes = \&PDL::plot_screes;
2350             sub PDL::plot_screes {
2351             require PDL::Graphics::Simple;
2352             my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
2353             my ($self, $ncomp) = @_;
2354             my %opt = (
2355             NCOMP => 20, # max number of components to plot
2356             CUT => 0, # set to plot cutoff line after this many components
2357             # undef to plot suggested cutoff line for NCOMP comps
2358             # see PDL::Graphics::Simple for next options
2359             WIN => undef, # pgswin object. not closed here if passed
2360             # allows comparing multiple lines in same plot
2361             SIZE => 640, # plot size in pixels
2362             );
2363             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
2364             $opt{NCOMP} = $ncomp
2365             if $ncomp;
2366             # re-use $ncomp below
2367             $ncomp = ($self->dim(0) < $opt{NCOMP})? $self->dim(0) : $opt{NCOMP};
2368             my $self_comp = $self->slice([0,$ncomp-1]);
2369             $opt{CUT} = PDL::Stats::Kmeans::_scree_ind $self_comp
2370             if !defined $opt{CUT};
2371             my $win = $opt{WIN} ||
2372             PDL::Graphics::Simple::pgswin(size=>[@opt{qw(SIZE SIZE)}, 'px']);
2373             $win->plot(
2374             with=>'lines', ke=>'scree', sequence($ncomp), $self_comp,
2375             !$opt{CUT} ? () : (with=>'lines', ke=>'cut', pdl($opt{CUT}-.5, $opt{CUT}-.5), pdl(-.05, $self->max->sclr+.05)),
2376             {xlabel=>'Component', ylabel=>'Proportion of Variance Accounted for',
2377             xrange=>[-0.05,$ncomp-0.95], yrange=>[0,1], le=>'tr'},
2378             );
2379             }
2380              
2381             =head2 plot_stripchart
2382              
2383             Stripchart plot. Plots ANOVA-style data, categorised against given IVs.
2384              
2385             =for options
2386              
2387             Default options (case insensitive):
2388              
2389             IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ]
2390             DVNM => 'DV',
2391             # see PDL::Graphics::Simple for next options
2392             WIN => undef, # pgswin object. not closed here if passed
2393              
2394             =for usage
2395              
2396             Usage:
2397              
2398             %m = $y->plot_stripchart( $a, \@b, { IVNM=>[qw(apple bake)] } );
2399              
2400             =cut
2401              
2402             my $CHART_GAP = 0.1;
2403             *plot_stripchart = \&PDL::plot_stripchart;
2404             sub PDL::plot_stripchart {
2405             require PDL::Graphics::Simple;
2406             my $opt = ref($_[-1]) eq 'HASH' ? pop @_ : undef;
2407             my ($y, @ivs_raw) = @_;
2408             my %opt = (
2409             IVNM => [], # auto filled as ['IV_0', 'IV_1', ... ]
2410             DVNM => 'DV',
2411             WIN => undef, # pgswin object. not closed here if passed
2412             );
2413             if ($opt) { $opt{uc $_} = $opt->{$_} for keys %$opt; }
2414             $opt{IVNM} = [ map { "IV_$_" } 0 .. $#ivs_raw ]
2415             if !$opt{IVNM} or !@{ $opt{IVNM} };
2416             my $w = $opt{WIN} || PDL::Graphics::Simple::pgswin();
2417             my @codes = map [code_ivs($_)], @ivs_raw;
2418             my @levels = map {
2419             my $map = $_->[1];
2420             [sort {$map->{$a} <=> $map->{$b}} keys %$map];
2421             } @codes;
2422             my $xjitter = $y->random * $CHART_GAP;
2423             my ($pcount, @plots) = 0;
2424             push @plots, with=>'points', ke=>"all data", $xjitter+$pcount, $y;
2425             $pcount++;
2426             for my $i (0..$#ivs_raw) {
2427             my $levs = $levels[$i];
2428             my $name = $opt{IVNM}[$i];
2429             my $coded = $codes[$i][0];
2430             for my $j (0..$#$levs) {
2431             my $inds = which($coded == $j);
2432             push @plots, with=>'points', ke=>"$name=$levs->[$j]",
2433             $xjitter->slice($inds)+$pcount+$j*$CHART_GAP, $y->slice($inds);
2434             }
2435             $pcount++;
2436             }
2437             my ($ymin, $ymax) = $y->minmax;
2438             my $xmax = $pcount-1 + $CHART_GAP*($#{$levels[-1]} + 2);
2439             $w->plot(@plots,
2440             { xlabel=>'IV', ylabel=>$opt{DVNM},
2441             xrange=>[-1,$xmax], yrange=>[$ymin-$CHART_GAP,$ymax+$CHART_GAP] }
2442             );
2443             $w;
2444             }
2445              
2446             =head1 SEE ALSO
2447              
2448             L
2449              
2450             L
2451              
2452             =head1 REFERENCES
2453              
2454             Cohen, J., Cohen, P., West, S.G., & Aiken, L.S. (2003). Applied
2455             Multiple Regression/correlation Analysis for the Behavioral Sciences
2456             (3rd ed.). Mahwah, NJ: Lawrence Erlbaum Associates Publishers.
2457              
2458             Hosmer, D.W., & Lemeshow, S. (2000). Applied Logistic Regression (2nd
2459             ed.). New York, NY: Wiley-Interscience.
2460              
2461             Lorch, R.F., & Myers, J.L. (1990). Regression analyses of repeated
2462             measures data in cognitive research. Journal of Experimental Psychology:
2463             Learning, Memory, & Cognition, 16, 149-157.
2464              
2465             Osgood C.E., Suci, G.J., & Tannenbaum, P.H. (1957). The Measurement of
2466             Meaning. Champaign, IL: University of Illinois Press.
2467              
2468             Rutherford, A. (2011). ANOVA and ANCOVA: A GLM Approach (2nd ed.). Wiley.
2469              
2470             Shlens, J. (2009). A Tutorial on Principal Component Analysis. Retrieved
2471             April 10, 2011 from http://citeseerx.ist.psu.edu/
2472              
2473             The GLM procedure: unbalanced ANOVA for two-way design with
2474             interaction. (2008). SAS/STAT(R) 9.2 User's Guide. Retrieved June 18,
2475             2009 from http://support.sas.com/
2476              
2477             Van den Noortgate, W., & Onghena, P. (2006). Analysing repeated
2478             measures data in cognitive research: A comment on regression coefficient
2479             analyses. European Journal of Cognitive Psychology, 18, 937-952.
2480              
2481             =head1 AUTHOR
2482              
2483             Copyright (C) 2009 Maggie J. Xiong
2484              
2485             All rights reserved. There is no warranty. You are allowed to redistribute
2486             this software / documentation as described in the file COPYING in the
2487             PDL distribution.
2488              
2489             =cut
2490             #line 2491 "lib/PDL/Stats/GLM.pm"
2491              
2492             # Exit with OK status
2493              
2494             1;