line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Statistics::Regression; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = '0.53'; |
4
|
|
|
|
|
|
|
my $DATE = "2007/07/07"; |
5
|
|
|
|
|
|
|
my $MNAME= "$0::Statistics::Regression"; |
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
41902
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
134
|
|
8
|
1
|
|
|
1
|
|
7
|
use warnings FATAL => qw{ uninitialized }; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
53
|
|
9
|
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
149
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
################################################################ |
13
|
|
|
|
|
|
|
=pod |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Regression.pm - weighted linear regression package (line+plane fitting) |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use Statistics::Regression; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Create regression object |
25
|
|
|
|
|
|
|
my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] ); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Add data points |
28
|
|
|
|
|
|
|
$reg->include( 2.0, [ 1.0, 3.0, -1.0 ] ); |
29
|
|
|
|
|
|
|
$reg->include( 1.0, [ 1.0, 5.0, 2.0 ] ); |
30
|
|
|
|
|
|
|
$reg->include( 20.0, [ 1.0, 31.0, 0.0 ] ); |
31
|
|
|
|
|
|
|
$reg->include( 15.0, [ 1.0, 11.0, 2.0 ] ); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
or |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my %d; |
36
|
|
|
|
|
|
|
$d{const} = 1.0; $d{someX}= 5.0; $d{someY}= 2.0; $d{ignored}="anything else"; |
37
|
|
|
|
|
|
|
$reg->include( 3.0, \%d ); # names are picked off the Regression specification |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Please note that *you* must provide the constant if you want one. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Finally, print the result |
42
|
|
|
|
|
|
|
$reg->print(); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
This prints the following: |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
**************************************************************** |
47
|
|
|
|
|
|
|
Regression 'sample regression' |
48
|
|
|
|
|
|
|
**************************************************************** |
49
|
|
|
|
|
|
|
Name Theta StdErr T-stat |
50
|
|
|
|
|
|
|
[0='const'] 0.2950 6.0512 0.05 |
51
|
|
|
|
|
|
|
[1='someX'] 0.6723 0.3278 2.05 |
52
|
|
|
|
|
|
|
[2='someY'] 1.0688 2.7954 0.38 |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
R^2= 0.808, N= 4 |
55
|
|
|
|
|
|
|
**************************************************************** |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
The hash input method has the advantage that you can now just |
60
|
|
|
|
|
|
|
fill the observation hashes with all your variables, and use the |
61
|
|
|
|
|
|
|
same code to run regression, changing the regression |
62
|
|
|
|
|
|
|
specification at one and only one spot (the new() invokation). |
63
|
|
|
|
|
|
|
You do not need to change the inputs in the include() statement. |
64
|
|
|
|
|
|
|
For example, |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my @obs; ## a global variable. observations are like: %oneobs= %{$obs[1]}; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub run_regression { |
69
|
|
|
|
|
|
|
my $reg = Statistics::Regression->new( $_[0], $_[2] ); |
70
|
|
|
|
|
|
|
foreach my $obshashptr (@obs) { $reg->include( $_[1], $_[3] ); } |
71
|
|
|
|
|
|
|
$reg->print(); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
run_regression("bivariate regression", $obshashptr->{someY}, [ "const", "someX" ] ); |
75
|
|
|
|
|
|
|
run_regression("trivariate regression", $obshashptr->{someY}, [ "const", "someX", "someZ" ] ); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Of course, you can use the subroutines to do the printing work yourself: |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
my @theta = $reg->theta(); |
82
|
|
|
|
|
|
|
my @se = $reg->standarderrors(); |
83
|
|
|
|
|
|
|
my $rsq = $reg->rsq(); |
84
|
|
|
|
|
|
|
my $adjrsq = $reg->adjrsq(); |
85
|
|
|
|
|
|
|
my $ybar = $reg->ybar(); ## the average of the y vector |
86
|
|
|
|
|
|
|
my $sst = $reg->sst(); ## the sum-squares-total |
87
|
|
|
|
|
|
|
my $sigmasq= $reg->sigmasq(); ## the variance of the residual |
88
|
|
|
|
|
|
|
my $k = $reg->k(); ## the number of variables |
89
|
|
|
|
|
|
|
my $n = $reg->n(); ## the number of observations |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
In addition, there are some other helper routines, and a |
92
|
|
|
|
|
|
|
subroutine linearcombination_variance(). If you don't know what |
93
|
|
|
|
|
|
|
this is, don't use it. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 BACKGROUND WARNING |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
You should have an understanding of OLS regressions if you want |
99
|
|
|
|
|
|
|
to use this package. You can get this from an introductory |
100
|
|
|
|
|
|
|
college econometrics class and/or from most intermediate college |
101
|
|
|
|
|
|
|
statistics classes. If you do not have this background |
102
|
|
|
|
|
|
|
knowledge, then this package will remain a mystery to you. |
103
|
|
|
|
|
|
|
There is no support for this package--please don't expect any. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 DESCRIPTION |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Regression.pm is a multivariate linear regression package. That |
109
|
|
|
|
|
|
|
is, it estimates the c coefficients for a line-fit of the type |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
y= c(0)*x(0) + c(1)*x1 + c(2)*x2 + ... + c(k)*xk |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
given a data set of N observations, each with k independent x |
114
|
|
|
|
|
|
|
variables and one y variable. Naturally, N must be greater than |
115
|
|
|
|
|
|
|
k---and preferably considerably greater. Any reasonable |
116
|
|
|
|
|
|
|
undergraduate statistics book will explain what a regression is. |
117
|
|
|
|
|
|
|
Most of the time, the user will provide a constant ('1') as x(0) |
118
|
|
|
|
|
|
|
for each observation in order to allow the regression package to |
119
|
|
|
|
|
|
|
fit an intercept. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 ALGORITHM |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 Original Algorithm (ALGOL-60): |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
W. M. Gentleman, University of Waterloo, "Basic |
127
|
|
|
|
|
|
|
Description For Large, Sparse Or Weighted Linear Least |
128
|
|
|
|
|
|
|
Squares Problems (Algorithm AS 75)," Applied Statistics |
129
|
|
|
|
|
|
|
(1974) Vol 23; No. 3 |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Gentleman's algorithm is I statistical standard. Insertion |
132
|
|
|
|
|
|
|
of a new observation can be done one observation at any time |
133
|
|
|
|
|
|
|
(WITH A WEIGHT!), and still only takes a low quadratic time. |
134
|
|
|
|
|
|
|
The storage space requirement is of quadratic order (in the |
135
|
|
|
|
|
|
|
indep variables). A practically infinite number of observations |
136
|
|
|
|
|
|
|
can easily be processed! |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 Internal Data Structures |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
R=Rbar is an upperright triangular matrix, kept in normalized |
141
|
|
|
|
|
|
|
form with implicit 1's on the diagonal. D is a diagonal scaling |
142
|
|
|
|
|
|
|
matrix. These correspond to "standard Regression usage" as |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
X' X = R' D R |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
A backsubsitution routine (in thetacov) allows to invert the R |
147
|
|
|
|
|
|
|
matrix (the inverse is upper-right triangular, too!). Call this |
148
|
|
|
|
|
|
|
matrix H, that is H=R^(-1). |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
(X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1) |
151
|
|
|
|
|
|
|
= [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]' |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head1 BUGS/PROBLEMS |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
None known. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=over 4 |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item Perl Problem |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Unfortunately, perl is unaware of IEEE number representations. |
163
|
|
|
|
|
|
|
This makes it a pain to test whether an observation contains any |
164
|
|
|
|
|
|
|
missing variables (coded as 'NaN' in Regression.pm). |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=back |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=for comment |
169
|
|
|
|
|
|
|
pod2html -noindex -title "perl weighted least squares regression package" Regression.pm > Regression.html |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head1 VERSION and RECENT CHANGES |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
2007/04/04: Added Coefficient Standard Errors |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
2007/07/01: Added self-test use (if invoked as perl Regression.pm) |
177
|
|
|
|
|
|
|
at the end. cleaned up some print sprintf. |
178
|
|
|
|
|
|
|
changed syntax on new() to eliminate passing K. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
2007/07/07: allowed passing hash with names to include(). |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 AUTHOR |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Naturally, Gentleman invented this algorithm. It was adaptated |
186
|
|
|
|
|
|
|
by Ivo Welch. Alan Miller (alan\@dmsmelb.mel.dms.CSIRO.AU) |
187
|
|
|
|
|
|
|
pointed out nicer ways to compute the R^2. Ivan Tubert-Brohman |
188
|
|
|
|
|
|
|
helped wrap the module as as a standard CPAN distribution. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head1 LICENSE |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
This module is released for free public use under a GPL license. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
(C) Ivo Welch, 2001,2004, 2007. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=cut |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
################################################################ |
200
|
|
|
|
|
|
|
#### let's start with handling of missing data ("nan" or "NaN") |
201
|
|
|
|
|
|
|
################################################################ |
202
|
1
|
|
|
1
|
|
6
|
use constant TINY => 1e-8; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
4534
|
|
203
|
|
|
|
|
|
|
my $nan= "NaN"; |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
sub isNaN { |
206
|
16
|
50
|
|
16
|
0
|
80
|
if ($_[0] !~ /[0-9nan]/) { confess "$MNAME:isNaN: definitely not a number in NaN: '$_[0]'"; } |
|
0
|
|
|
|
|
0
|
|
207
|
16
|
|
33
|
|
|
85
|
return ($_[0]=~ /NaN/i) || ($_[0] != $_[0]); |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
################################################################ |
212
|
|
|
|
|
|
|
### my $reg = Statistics::Regression->new($regname, \@var_names) |
213
|
|
|
|
|
|
|
### |
214
|
|
|
|
|
|
|
### Receives the number of variables on each observations (i.e., |
215
|
|
|
|
|
|
|
### an integer) and returns the blessed data structure as a |
216
|
|
|
|
|
|
|
### Statistics::Regression object. Also takes an optional name |
217
|
|
|
|
|
|
|
### for this regression to remember, as well as a reference to a |
218
|
|
|
|
|
|
|
### k*1 array of names for the X coefficients. |
219
|
|
|
|
|
|
|
### |
220
|
|
|
|
|
|
|
### I have now made it mandatory to give some names. |
221
|
|
|
|
|
|
|
### |
222
|
|
|
|
|
|
|
################################################################ |
223
|
|
|
|
|
|
|
sub new { |
224
|
1
|
50
|
|
1
|
0
|
13
|
my $classname= shift; (!ref($classname)) or confess "$MNAME:new: bad class call to new ($classname).\n"; |
|
1
|
|
|
|
|
3
|
|
225
|
1
|
|
50
|
|
|
7
|
my $regname= shift || "no-name"; |
226
|
1
|
|
|
|
|
2
|
my $xnameptr= shift; |
227
|
|
|
|
|
|
|
|
228
|
1
|
50
|
|
|
|
3
|
(defined($regname)) or confess "$MNAME:new: bad name in for regression. no undef allowed.\n"; |
229
|
1
|
50
|
|
|
|
4
|
(!ref($regname)) or confess "$MNAME:new: bad name in for regression.\n"; |
230
|
1
|
50
|
|
|
|
2
|
(defined($xnameptr)) or confess "$MNAME:new: You must provide variable names, because this tells me the number of columns. no undef allowed.\n"; |
231
|
1
|
50
|
|
|
|
5
|
(ref($xnameptr) eq "ARRAY") or confess "$MNAME:new: bad xnames for regression. Must be pointer.\n"; |
232
|
|
|
|
|
|
|
|
233
|
1
|
|
|
|
|
3
|
my $K= (@{$xnameptr}); |
|
1
|
|
|
|
|
2
|
|
234
|
|
|
|
|
|
|
|
235
|
1
|
50
|
|
|
|
4
|
if (!defined($K)) { confess "$MNAME:new: cannot determine the number of variables"; } |
|
0
|
|
|
|
|
0
|
|
236
|
1
|
50
|
|
|
|
3
|
if ($K<=1) { confess "$MNAME:new: Cannot run a regression without at least two variables."; } |
|
0
|
|
|
|
|
0
|
|
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub zerovec { |
239
|
3
|
|
|
3
|
0
|
4
|
my @rv; |
240
|
3
|
|
|
|
|
8
|
for (my $i=0; $i<=$_[0]; ++$i) { $rv[$i]=0; } |
|
16
|
|
|
|
|
52
|
|
241
|
3
|
|
|
|
|
30
|
return \@rv; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
1
|
|
|
|
|
5
|
bless { |
245
|
|
|
|
|
|
|
k => $K, |
246
|
|
|
|
|
|
|
regname => $regname, |
247
|
|
|
|
|
|
|
xnames => $xnameptr, |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# constantly updated |
250
|
|
|
|
|
|
|
n => 0, |
251
|
|
|
|
|
|
|
sse => 0, |
252
|
|
|
|
|
|
|
syy => 0, |
253
|
|
|
|
|
|
|
sy => 0, |
254
|
|
|
|
|
|
|
wghtn => 0, |
255
|
|
|
|
|
|
|
d => zerovec($K), |
256
|
|
|
|
|
|
|
thetabar => zerovec($K), |
257
|
|
|
|
|
|
|
rbarsize => ($K+1)*$K/2+1, |
258
|
|
|
|
|
|
|
rbar => zerovec(($K+1)*$K/2+1), |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# other constants |
261
|
|
|
|
|
|
|
neverabort => 0, |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# computed on demand |
264
|
|
|
|
|
|
|
theta => undef, |
265
|
|
|
|
|
|
|
sigmasq => undef, |
266
|
|
|
|
|
|
|
rsq => undef, |
267
|
|
|
|
|
|
|
adjrsq => undef |
268
|
|
|
|
|
|
|
}, $classname; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
################################################################ |
273
|
|
|
|
|
|
|
### $reg->include( $y, [ $x1, $x2, $x3 ... $xk ], $weight ); |
274
|
|
|
|
|
|
|
### |
275
|
|
|
|
|
|
|
### Add one new observation. The weight is optional. Note that |
276
|
|
|
|
|
|
|
### inclusion with a weight of -1 can be used to delete an |
277
|
|
|
|
|
|
|
### observation. |
278
|
|
|
|
|
|
|
### |
279
|
|
|
|
|
|
|
### The error checking and transfer of arguments is clutzy, but |
280
|
|
|
|
|
|
|
### works. if I had POSIX assured, I could do better number |
281
|
|
|
|
|
|
|
### checking. right now, I don't do any. |
282
|
|
|
|
|
|
|
### |
283
|
|
|
|
|
|
|
### Returns the number of observations so far included. |
284
|
|
|
|
|
|
|
################################################################ |
285
|
|
|
|
|
|
|
sub include { |
286
|
4
|
|
|
4
|
0
|
20
|
my $this = shift; |
287
|
4
|
|
|
|
|
5
|
my $yelement= shift; |
288
|
4
|
|
|
|
|
6
|
my $xin= shift; |
289
|
4
|
|
50
|
|
|
17
|
my $weight= shift || 1.0; |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# modest input checking; |
292
|
4
|
50
|
|
|
|
9
|
(ref($this)) or confess "$MNAME:include: bad class call to include.\n"; |
293
|
4
|
50
|
|
|
|
8
|
(defined($yelement)) or confess "$MNAME:include: bad call for y to include. no undef allowed.\n"; |
294
|
4
|
50
|
|
|
|
20
|
(!ref($yelement)) or confess "$MNAME:include: bad call for y to include. need scalar.\n"; |
295
|
4
|
50
|
|
|
|
10
|
(defined($xin)) or confess "$MNAME:include: bad call for x to include. no undef allowed.\n"; |
296
|
4
|
50
|
|
|
|
9
|
(ref($xin)) or confess "$MNAME:include: bad call for x to include. need reference.\n"; |
297
|
4
|
50
|
|
|
|
7
|
(!ref($weight)) or confess "$MNAME:include: bad call for weight to include. need scalar.\n"; |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
# omit observations with missing observations; |
301
|
4
|
50
|
|
|
|
7
|
(defined($yelement)) or confess "$MNAME:include: you must give a y value (predictor)."; |
302
|
4
|
50
|
|
|
|
8
|
(isNaN($yelement)) and return $this->{n}; # ignore this observation; |
303
|
|
|
|
|
|
|
## should check for number, not string |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# check and transfer the X vector |
307
|
4
|
|
|
|
|
5
|
my @xrow; |
308
|
4
|
50
|
|
|
|
10
|
if (ref($xin) eq "ARRAY") { @xrow= @{$xin}; } |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
9
|
|
309
|
|
|
|
|
|
|
else { |
310
|
0
|
|
|
|
|
0
|
my $xctr=0; |
311
|
0
|
|
|
|
|
0
|
foreach my $nm (@{$this->{xnames}}) { |
|
0
|
|
|
|
|
0
|
|
312
|
0
|
0
|
|
|
|
0
|
(defined($xin->{$nm})) or confess "$MNAME:include: Variable '$nm' needs to be set in hash.\n"; |
313
|
0
|
|
|
|
|
0
|
$xrow[$xctr]= $xin->{$nm}; |
314
|
0
|
|
|
|
|
0
|
++$xctr; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
4
|
|
|
|
|
5
|
my @xcopy; |
319
|
4
|
|
|
|
|
38
|
for (my $i=1; $i<=$this->{k}; ++$i) { |
320
|
12
|
50
|
|
|
|
24
|
(defined($xrow[$i-1])) |
321
|
|
|
|
|
|
|
or confess "$MNAME:include: Internal Error: at N=".($this->{n}).", the x[".($i-1)."] is undef. use NaN for missing."; |
322
|
12
|
50
|
|
|
|
19
|
(isNaN($xrow[$i-1])) and return $this->{n}; |
323
|
12
|
|
|
|
|
42
|
$xcopy[$i]= $xrow[$i-1]; |
324
|
|
|
|
|
|
|
## should check for number, not string |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
################ now comes the real routine |
328
|
|
|
|
|
|
|
|
329
|
4
|
|
|
|
|
8
|
$this->{syy}+= ($weight*($yelement*$yelement)); |
330
|
4
|
|
|
|
|
5
|
$this->{sy}+= ($weight*($yelement)); |
331
|
4
|
50
|
|
|
|
8
|
if ($weight>=0.0) { ++$this->{n}; } else { --$this->{n}; } |
|
4
|
|
|
|
|
5
|
|
|
0
|
|
|
|
|
0
|
|
332
|
|
|
|
|
|
|
|
333
|
4
|
|
|
|
|
5
|
$this->{wghtn}+= $weight; |
334
|
|
|
|
|
|
|
|
335
|
4
|
|
|
|
|
16
|
for (my $i=1; $i<=$this->{k};++$i) { |
336
|
11
|
100
|
|
|
|
19
|
if ($weight==0.0) { return $this->{n}; } |
|
2
|
|
|
|
|
8
|
|
337
|
9
|
50
|
|
|
|
22
|
if (abs($xcopy[$i])>(TINY)) { |
338
|
9
|
|
|
|
|
12
|
my $xi=$xcopy[$i]; |
339
|
|
|
|
|
|
|
|
340
|
9
|
|
|
|
|
11
|
my $di=$this->{d}->[$i]; |
341
|
9
|
|
|
|
|
21
|
my $dprimei=$di+$weight*($xi*$xi); |
342
|
9
|
|
|
|
|
12
|
my $cbar= $di/$dprimei; |
343
|
9
|
|
|
|
|
12
|
my $sbar= $weight*$xi/$dprimei; |
344
|
9
|
|
|
|
|
10
|
$weight*=($cbar); |
345
|
9
|
|
|
|
|
13
|
$this->{d}->[$i]=$dprimei; |
346
|
9
|
|
|
|
|
21
|
my $nextr=int( (($i-1)*( (2.0*$this->{k}-$i))/2.0+1) ); |
347
|
9
|
50
|
|
|
|
18
|
if (!($nextr<=$this->{rbarsize}) ) { confess "$MNAME:include: Internal Error 2"; } |
|
0
|
|
|
|
|
0
|
|
348
|
9
|
|
|
|
|
16
|
my $xk; |
349
|
9
|
|
|
|
|
17
|
for (my $kc=$i+1;$kc<=$this->{k};++$kc) { |
350
|
11
|
|
|
|
|
10
|
$xk=$xcopy[$kc]; $xcopy[$kc]=$xk-$xi*$this->{rbar}->[$nextr]; |
|
11
|
|
|
|
|
18
|
|
351
|
11
|
|
|
|
|
19
|
$this->{rbar}->[$nextr]= $cbar * $this->{rbar}->[$nextr]+$sbar*$xk; |
352
|
11
|
|
|
|
|
23
|
++$nextr; |
353
|
|
|
|
|
|
|
} |
354
|
9
|
|
|
|
|
8
|
$xk=$yelement; $yelement-= $xi*$this->{thetabar}->[$i]; |
|
9
|
|
|
|
|
13
|
|
355
|
9
|
|
|
|
|
29
|
$this->{thetabar}->[$i]= $cbar*$this->{thetabar}->[$i]+$sbar*$xk; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} |
358
|
2
|
|
|
|
|
4
|
$this->{sse}+=$weight*($yelement*$yelement); |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# indicate that Theta is garbage now |
361
|
2
|
|
|
|
|
3
|
$this->{theta}= undef; |
362
|
2
|
|
|
|
|
12
|
$this->{sigmasq}= undef; $this->{rsq}= undef; $this->{adjrsq}= undef; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
4
|
|
363
|
|
|
|
|
|
|
|
364
|
2
|
|
|
|
|
6
|
return $this->{n}; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
################################################################ |
369
|
|
|
|
|
|
|
### |
370
|
|
|
|
|
|
|
### $reg->rsq(), $reg->adjrsq(), $reg->sigmasq(), $reg->ybar(), |
371
|
|
|
|
|
|
|
### $reg->sst(), $reg->k(), $reg->n() |
372
|
|
|
|
|
|
|
### |
373
|
|
|
|
|
|
|
### These methods provide common auxiliary information. rsq, |
374
|
|
|
|
|
|
|
### adjrsq, sigmasq, sst, and ybar have not been checked but are |
375
|
|
|
|
|
|
|
### likely correct. The results are stored for later usage, |
376
|
|
|
|
|
|
|
### although this is somewhat unnecessary because the |
377
|
|
|
|
|
|
|
### computation is so simple anyway. |
378
|
|
|
|
|
|
|
################################################################ |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub rsq { |
381
|
1
|
|
|
1
|
0
|
7
|
my $this= shift; |
382
|
1
|
|
|
|
|
5
|
return $this->{rsq}= 1.0- $this->{sse} / $this->sst(); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
sub adjrsq { |
386
|
0
|
|
|
0
|
0
|
0
|
my $this= shift; |
387
|
0
|
|
|
|
|
0
|
return $this->{adjrsq}= 1.0- (1.0- $this->rsq())*($this->{n}-1)/($this->{n} - $this->{k}); |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
sub sigmasq { |
391
|
0
|
|
|
0
|
0
|
0
|
my $this= shift; |
392
|
0
|
0
|
|
|
|
0
|
return $this->{sigmasq}= ($this->{n}<=$this->{k}) ? "Inf" : ($this->{sse}/($this->{n} - $this->{k})); |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub ybar { |
396
|
1
|
|
|
1
|
0
|
2
|
my $this= shift; |
397
|
1
|
|
|
|
|
22
|
return $this->{ybar}= $this->{sy}/$this->{wghtn}; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
sub sst { |
401
|
1
|
|
|
1
|
0
|
2
|
my $this= shift; |
402
|
1
|
|
|
|
|
5
|
return $this->{sst}= ($this->{syy} - $this->{wghtn}*($this->ybar())**2); |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
sub k { |
406
|
0
|
|
|
0
|
0
|
0
|
my $this= shift; |
407
|
0
|
|
|
|
|
0
|
return $this->{k}; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
sub n { |
410
|
0
|
|
|
0
|
0
|
0
|
my $this= shift; |
411
|
0
|
|
|
|
|
0
|
return $this->{n}; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
################################################################ |
417
|
|
|
|
|
|
|
### $reg->print() [no arguments!] |
418
|
|
|
|
|
|
|
### |
419
|
|
|
|
|
|
|
### prints the estimated coefficients, and R^2 and N. For an |
420
|
|
|
|
|
|
|
### example see the Synopsis. |
421
|
|
|
|
|
|
|
################################################################ |
422
|
|
|
|
|
|
|
sub print { |
423
|
0
|
|
|
0
|
0
|
0
|
my $this= shift; |
424
|
|
|
|
|
|
|
|
425
|
0
|
|
|
|
|
0
|
print "****************************************************************\n"; |
426
|
0
|
|
|
|
|
0
|
print "Regression '$this->{regname}'\n"; |
427
|
0
|
|
|
|
|
0
|
print "****************************************************************\n"; |
428
|
|
|
|
|
|
|
|
429
|
0
|
|
|
|
|
0
|
my $theta= $this->theta(); |
430
|
0
|
|
|
|
|
0
|
my @standarderrors= $this->standarderrors(); |
431
|
|
|
|
|
|
|
|
432
|
0
|
|
|
|
|
0
|
printf "%-15s\t%12s\t%12s\t%7s\n", "Name", "Theta", "StdErr", "T-stat"; |
433
|
0
|
|
|
|
|
0
|
for (my $i=0; $i< $this->k(); ++$i) { |
434
|
0
|
0
|
|
|
|
0
|
my $name= "[$i".(defined($this->{xnames}->[$i]) ? "='$this->{xnames}->[$i]'":"")."]"; |
435
|
0
|
|
|
|
|
0
|
printf "%-15s\t", $name; |
436
|
0
|
|
|
|
|
0
|
printf "%12.4f\t", $theta->[$i]; |
437
|
0
|
|
|
|
|
0
|
printf "%12.4f\t", $standarderrors[$i]; |
438
|
0
|
|
|
|
|
0
|
printf "%7.2f", ($theta->[$i]/$standarderrors[$i]); |
439
|
0
|
|
|
|
|
0
|
printf "\n"; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
0
|
|
|
|
|
0
|
print "\nR^2= ".sprintf("%.3f", $this->rsq()).", N= ".$this->n().", K= ".$this->k()."\n"; |
443
|
0
|
|
|
|
|
0
|
print "****************************************************************\n"; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
################################################################ |
449
|
|
|
|
|
|
|
### $theta = $reg->theta or @theta = $reg->theta |
450
|
|
|
|
|
|
|
### |
451
|
|
|
|
|
|
|
### This is the work horse. It estimates and returns the vector |
452
|
|
|
|
|
|
|
### of coefficients. In scalar context returns an array |
453
|
|
|
|
|
|
|
### reference; in list context it returns the list of |
454
|
|
|
|
|
|
|
### coefficients. |
455
|
|
|
|
|
|
|
################################################################ |
456
|
|
|
|
|
|
|
sub theta { |
457
|
1
|
|
|
1
|
0
|
8
|
my $this= shift; |
458
|
|
|
|
|
|
|
|
459
|
1
|
50
|
|
|
|
4
|
if (defined($this->{theta})) { |
460
|
0
|
0
|
|
|
|
0
|
return wantarray ? @{$this->{theta}} : $this->{theta}; |
|
0
|
|
|
|
|
0
|
|
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
1
|
50
|
|
|
|
5
|
if ($this->{n} < $this->{k}) { return; } |
|
0
|
|
|
|
|
0
|
|
464
|
1
|
|
|
|
|
6
|
for (my $i=($this->{k}); $i>=1; --$i) { |
465
|
3
|
|
|
|
|
9
|
$this->{theta}->[$i]= $this->{thetabar}->[$i]; |
466
|
3
|
|
|
|
|
8
|
my $nextr= int (($i-1)*((2.0*$this->{k}-$i))/2.0+1); |
467
|
3
|
50
|
|
|
|
9
|
if (!($nextr<=$this->{rbarsize})) { confess "$MNAME:theta: Internal Error 3"; } |
|
0
|
|
|
|
|
0
|
|
468
|
3
|
|
|
|
|
25
|
for (my $kc=$i+1;$kc<=$this->{k};++$kc) { |
469
|
3
|
|
|
|
|
8
|
$this->{theta}->[$i]-=($this->{rbar}->[$nextr]*$this->{theta}->[$kc]); |
470
|
3
|
|
|
|
|
12
|
++$nextr; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
|
475
|
1
|
|
|
|
|
3
|
my $ref = $this->{theta}; shift(@$ref); # we are counting from 0 |
|
1
|
|
|
|
|
2
|
|
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# if in a scalar context, otherwise please return the array directly |
478
|
1
|
50
|
|
|
|
5
|
wantarray ? @{$this->{theta}} : $this->{theta}; |
|
1
|
|
|
|
|
6
|
|
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
################################################################ |
482
|
|
|
|
|
|
|
### @se= $reg->standarderrors() |
483
|
|
|
|
|
|
|
### |
484
|
|
|
|
|
|
|
### This is the most difficult routine. Take it on faith. |
485
|
|
|
|
|
|
|
### |
486
|
|
|
|
|
|
|
### R=Rbar is an upperright triangular matrix, kept in normalized |
487
|
|
|
|
|
|
|
### form with implicit 1's on the diagonal. D is a diagonal scaling |
488
|
|
|
|
|
|
|
### matrix. These correspond to "standard Regression usage" as |
489
|
|
|
|
|
|
|
### |
490
|
|
|
|
|
|
|
### X' X = R' D R |
491
|
|
|
|
|
|
|
### |
492
|
|
|
|
|
|
|
### A backsubsitution routine (in thetacov) allows to invert the R |
493
|
|
|
|
|
|
|
### matrix (the inverse is upper-right triangular, too!). Call this |
494
|
|
|
|
|
|
|
### matrix H, that is H=R^(-1). |
495
|
|
|
|
|
|
|
### |
496
|
|
|
|
|
|
|
### (X' X)^(-1) = [(R' D^(1/2)') (D^(1/2) R)]^(-1) |
497
|
|
|
|
|
|
|
### = [ R^-1 D^(-1/2) ] [ R^-1 D^(-1/2) ]' |
498
|
|
|
|
|
|
|
### |
499
|
|
|
|
|
|
|
### Let's work this for our example, where |
500
|
|
|
|
|
|
|
### |
501
|
|
|
|
|
|
|
### $reg->include( 2.0, [ 1.0, 3.0, -1.0 ] ); |
502
|
|
|
|
|
|
|
### $reg->include( 1.0, [ 1.0, 5.0, 2.0 ] ); |
503
|
|
|
|
|
|
|
### $reg->include( 20.0, [ 1.0, 31.0, 0.0 ] ); |
504
|
|
|
|
|
|
|
### $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] ); |
505
|
|
|
|
|
|
|
### |
506
|
|
|
|
|
|
|
### For debuggin, the X'X matrix for our example is |
507
|
|
|
|
|
|
|
### 4, 50, 3 |
508
|
|
|
|
|
|
|
### 50 1116 29 |
509
|
|
|
|
|
|
|
### 3 29 9 |
510
|
|
|
|
|
|
|
### |
511
|
|
|
|
|
|
|
### Its inverse is |
512
|
|
|
|
|
|
|
### 0.70967 -0.027992 -0.146360 |
513
|
|
|
|
|
|
|
### -0.02799 0.002082 0.002622 |
514
|
|
|
|
|
|
|
### -0.14636 0.002622 0.151450 |
515
|
|
|
|
|
|
|
### |
516
|
|
|
|
|
|
|
### Internally, this is kept as follows |
517
|
|
|
|
|
|
|
### |
518
|
|
|
|
|
|
|
### R is 1, 0, 0 |
519
|
|
|
|
|
|
|
### 12.5 1 0 |
520
|
|
|
|
|
|
|
### 0.75 -0.0173 1 |
521
|
|
|
|
|
|
|
### |
522
|
|
|
|
|
|
|
### d is the diagonal(4,491,6.603) matrix, which as 1/sqrt becomes dhi= 0.5, 0.04513, 0.3892 |
523
|
|
|
|
|
|
|
### |
524
|
|
|
|
|
|
|
### R * d * R' is indeed the X' X matrix. |
525
|
|
|
|
|
|
|
### |
526
|
|
|
|
|
|
|
### The inverse of R is |
527
|
|
|
|
|
|
|
### |
528
|
|
|
|
|
|
|
### 1, 0, 0 |
529
|
|
|
|
|
|
|
### -12.5 1 0 |
530
|
|
|
|
|
|
|
### -0.9664 0.01731 1 |
531
|
|
|
|
|
|
|
### |
532
|
|
|
|
|
|
|
### in R, t(solve(R) %*% dhi) %*% t( t(solve(R) %*% dhi) ) is the correct inverse. |
533
|
|
|
|
|
|
|
### |
534
|
|
|
|
|
|
|
### The routine has a debug switch which makes it come out very verbose. |
535
|
|
|
|
|
|
|
################################################################ |
536
|
|
|
|
|
|
|
my $debug=0; |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
sub standarderrors { |
539
|
0
|
|
|
0
|
0
|
|
my $this= shift; |
540
|
0
|
|
|
|
|
|
our $K= $this->{k}; # convenience |
541
|
|
|
|
|
|
|
|
542
|
0
|
|
|
|
|
|
our @u; |
543
|
|
|
|
|
|
|
sub ui { |
544
|
0
|
0
|
|
0
|
0
|
|
if ($debug) { |
545
|
0
|
0
|
0
|
|
|
|
($_[0]<1)||($_[0]>$K) and confess "$MNAME:standarderrors: bad index 0 $_[0]\n"; |
546
|
0
|
0
|
0
|
|
|
|
($_[1]<1)||($_[1]>$K) and confess "$MNAME:standarderrors: bad index 1 $_[0]\n"; |
547
|
|
|
|
|
|
|
} |
548
|
0
|
|
|
|
|
|
return (($K*($_[0]-1))+($_[1]-1)); |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
sub giveuclear { |
551
|
0
|
|
|
0
|
0
|
|
for (my $i=0; $i<($K**2); ++$i) { $u[$i]=0.0; } |
|
0
|
|
|
|
|
|
|
552
|
0
|
0
|
|
|
|
|
return (wantarray) ? @u : \@u; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
0
|
|
|
0
|
0
|
|
sub u { return $u[ui($_[0], $_[1])]; } |
556
|
0
|
|
|
0
|
0
|
|
sub setu { return $u[ui($_[0], $_[1])]= $_[2]; } |
557
|
0
|
|
|
0
|
0
|
|
sub add2u { return $u[ui($_[0], $_[1])]+= $_[2]; } |
558
|
0
|
|
|
0
|
0
|
|
sub mult2u { return $u[ui($_[0], $_[1])]*= $_[2]; } |
559
|
|
|
|
|
|
|
|
560
|
0
|
0
|
|
|
|
|
(defined($K)) or confess "$MNAME:standarderrors: Internal Error: I forgot the number of variables.\n"; |
561
|
0
|
0
|
|
|
|
|
if ($debug) { |
562
|
0
|
|
|
|
|
|
print "The Start Matrix is:\n"; |
563
|
0
|
|
|
|
|
|
for (my $i=1; $i<=$K; ++$i) { |
564
|
0
|
|
|
|
|
|
print "[$i]:\t"; |
565
|
0
|
|
|
|
|
|
for (my $j=1; $j<=$K; ++$j) { |
566
|
0
|
|
|
|
|
|
print $this->rbr($i, $j)."\t"; |
567
|
|
|
|
|
|
|
} |
568
|
0
|
|
|
|
|
|
print "\n"; |
569
|
|
|
|
|
|
|
} |
570
|
0
|
|
|
|
|
|
print "The Start d vector is:\n"; |
571
|
0
|
|
|
|
|
|
for (my $i=1; $i<=$K; ++$i) { |
572
|
0
|
|
|
|
|
|
print "".$this->{d}[$i]."\t"; |
573
|
|
|
|
|
|
|
} |
574
|
0
|
|
|
|
|
|
print "\n"; |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
sub rbrindex { |
578
|
0
|
0
|
|
0
|
0
|
|
return ($_[0] == $_[1]) ? -9 : |
|
|
0
|
|
|
|
|
|
579
|
|
|
|
|
|
|
($_[0]>$_[1]) ? -8 : |
580
|
|
|
|
|
|
|
((($_[0]-1.0)* (2.0*$K-$_[0])/2.0+1.0) + $_[1] - 1 - $_[0] ); } |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# now a real member routine; |
583
|
|
|
|
|
|
|
sub rbr { |
584
|
0
|
|
|
0
|
0
|
|
my $this= shift; |
585
|
0
|
0
|
|
|
|
|
return ($_[0] == $_[1]) ? 1 : ( ($_[0]>$_[1]) ? 0 : ($this->{rbar}[rbrindex($_[0],$_[1])])); |
|
|
0
|
|
|
|
|
|
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
0
|
|
|
|
|
|
my $u= giveuclear(); |
589
|
|
|
|
|
|
|
|
590
|
0
|
|
|
|
|
|
for (my $j=$K; $j>=1; --$j) { |
591
|
0
|
|
|
|
|
|
setu($j,$j, 1.0/($this->rbr($j,$j))); |
592
|
0
|
|
|
|
|
|
for (my $k=$j-1; $k>=1; --$k) { |
593
|
0
|
|
|
|
|
|
setu($k,$j,0); |
594
|
0
|
|
|
|
|
|
for (my $i=$k+1; $i<=$j; ++$i) { add2u($k,$j, $this->rbr($k,$i)*u($i,$j)); } |
|
0
|
|
|
|
|
|
|
595
|
0
|
|
|
|
|
|
mult2u($k,$j, (-1.0)/$this->rbr($k,$k)); |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
0
|
0
|
|
|
|
|
if ($debug) { |
600
|
0
|
|
|
|
|
|
print "The Inverse Matrix of R is:\n"; |
601
|
0
|
|
|
|
|
|
for (my $i=1; $i<=$K; ++$i) { |
602
|
0
|
|
|
|
|
|
print "[$i]:\t"; |
603
|
0
|
|
|
|
|
|
for (my $j=1; $j<=$K; ++$j) { |
604
|
0
|
|
|
|
|
|
print $u[ui($i,$j)]."\t"; |
605
|
|
|
|
|
|
|
} |
606
|
0
|
|
|
|
|
|
print "\n"; |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
} |
609
|
|
|
|
|
|
|
|
610
|
0
|
|
|
|
|
|
for (my $i=1;$i<=$K;++$i) { |
611
|
0
|
|
|
|
|
|
for (my $j=1;$j<=$K;++$j) { |
612
|
0
|
0
|
|
|
|
|
if (abs($this->{d}[$j])
|
613
|
0
|
|
|
|
|
|
mult2u($i,$j, sqrt(1.0/TINY)); |
614
|
0
|
0
|
|
|
|
|
if (abs($this->{d}[$j])==0.0) { |
615
|
0
|
0
|
|
|
|
|
if ($this->{neverabort}) { |
616
|
0
|
|
|
|
|
|
for (my $i=0; $i<($K**2); ++$i) { $u[$i]= "NaN"; } |
|
0
|
|
|
|
|
|
|
617
|
0
|
|
|
|
|
|
return undef; |
618
|
|
|
|
|
|
|
} |
619
|
0
|
|
|
|
|
|
else { confess "$MNAME:standarderrors: I cannot compute the theta-covariance matrix for variable $j ".($this->{d}[$j])."\n"; } |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
} |
622
|
0
|
|
|
|
|
|
else { mult2u($i,$j, sqrt(1.0/$this->{d}[$j])); } |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
0
|
0
|
|
|
|
|
if ($debug) { |
627
|
0
|
|
|
|
|
|
print "The Inverse Matrix of R multipled by D^(-1/2) is:\n"; |
628
|
0
|
|
|
|
|
|
for (my $i=1; $i<=$K; ++$i) { |
629
|
0
|
|
|
|
|
|
print "[$i]:\t"; |
630
|
0
|
|
|
|
|
|
for (my $j=1; $j<=$K; ++$j) { |
631
|
0
|
|
|
|
|
|
print $u[ui($i,$j)]."\t"; |
632
|
|
|
|
|
|
|
} |
633
|
0
|
|
|
|
|
|
print "\n"; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
|
637
|
0
|
0
|
|
|
|
|
$this->{sigmasq}= ($this->{n}<=$K) ? "Inf" : ($this->{sse}/($this->{n} - $K)); |
638
|
0
|
|
|
|
|
|
my @xpxinv; |
639
|
0
|
|
|
|
|
|
for (my $i=1;$i<=$K; ++$i) { |
640
|
0
|
|
|
|
|
|
for (my $j=$i;$j<=$K;++$j) { |
641
|
0
|
|
|
|
|
|
my $indexij= ui($i,$j); |
642
|
0
|
|
|
|
|
|
$xpxinv[$indexij]= 0.0; |
643
|
0
|
|
|
|
|
|
for (my $k=1;$k<=$K;++$k) { |
644
|
0
|
|
|
|
|
|
$xpxinv[$indexij] += $u[ui($i,$k)]*$u[ui($j,$k)]; |
645
|
|
|
|
|
|
|
} |
646
|
0
|
|
|
|
|
|
$xpxinv[ui($j,$i)]= $xpxinv[$indexij]; # this is symmetric |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
0
|
0
|
|
|
|
|
if ($debug) { |
651
|
0
|
|
|
|
|
|
print "The full inverse matrix of X'X is:\n"; |
652
|
0
|
|
|
|
|
|
for (my $i=1; $i<=$K; ++$i) { |
653
|
0
|
|
|
|
|
|
print "[$i]:\t"; |
654
|
0
|
|
|
|
|
|
for (my $j=1; $j<=$K; ++$j) { |
655
|
0
|
|
|
|
|
|
print $xpxinv[ui($i,$j)]."\t"; |
656
|
|
|
|
|
|
|
} |
657
|
0
|
|
|
|
|
|
print "\n"; |
658
|
|
|
|
|
|
|
} |
659
|
0
|
|
|
|
|
|
print "The sigma^2 is ".$this->{sigmasq}."\n"; |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
## 99% of the usage here will be to print the diagonal elements of sqrt ( (X' X) sigma^2 ) |
663
|
|
|
|
|
|
|
## so, let's make this our first returned object; |
664
|
|
|
|
|
|
|
|
665
|
0
|
|
|
|
|
|
my @secoefs; |
666
|
0
|
|
|
|
|
|
for (my $i=1; $i<=$K; ++$i) { |
667
|
0
|
|
|
|
|
|
$secoefs[$i-1]= sqrt($xpxinv[ui($i,$i)] * $this->{sigmasq}); |
668
|
|
|
|
|
|
|
} |
669
|
0
|
0
|
|
|
|
|
if ($debug) { for (my $i=0; $i<$K; ++$i) { print " $secoefs[$i] "; } print "\n"; } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
# the following are clever return methods; if the user goes over the secoefs, |
672
|
|
|
|
|
|
|
# almost surely an error will result, because he will run into xpxinv. For special |
673
|
|
|
|
|
|
|
# usage, however, xpxinv may still be useful. |
674
|
|
|
|
|
|
|
|
675
|
0
|
|
|
|
|
|
return ( @secoefs, \@xpxinv, $this->sigmasq ); |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
################################ |
680
|
|
|
|
|
|
|
sub linearcombination_variance { |
681
|
0
|
|
|
0
|
0
|
|
my $this= shift; |
682
|
0
|
|
|
|
|
|
our $K= $this->{k}; # convenience |
683
|
|
|
|
|
|
|
|
684
|
0
|
|
|
|
|
|
my @linear= @_; |
685
|
|
|
|
|
|
|
|
686
|
0
|
0
|
|
|
|
|
($#linear+1 == $K) or confess "$MNAME:linearcombination_variance: ". |
687
|
|
|
|
|
|
|
"Sorry, you must give a vector of length $K, not ".($#linear+1)."\n"; |
688
|
|
|
|
|
|
|
|
689
|
0
|
|
|
|
|
|
my @allback= $this->standarderrors(); # compute everything we need; |
690
|
|
|
|
|
|
|
|
691
|
0
|
|
|
|
|
|
my $xpxinv= $allback[$this->{k}]; |
692
|
0
|
|
|
|
|
|
my $sigmasq= $allback[$this->{k}+1]; |
693
|
|
|
|
|
|
|
|
694
|
0
|
|
|
|
|
|
my $sum=0; |
695
|
0
|
|
|
|
|
|
for (my $i=1; $i<=$K; ++$i) { |
696
|
0
|
|
|
|
|
|
for (my $j=1; $j<=$K; ++$j) { |
697
|
0
|
|
|
|
|
|
$sum+= $linear[$i-1]*$linear[$j-1]*$xpxinv->[ui($i,$j)]; |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
} |
700
|
0
|
|
|
|
|
|
$sum*=$sigmasq; |
701
|
0
|
|
|
|
|
|
return $sum; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
################################################################ |
706
|
|
|
|
|
|
|
### sub dump() was used internally for debugging. |
707
|
|
|
|
|
|
|
################################################################ |
708
|
|
|
|
|
|
|
sub dump { |
709
|
0
|
|
|
0
|
0
|
|
my $this= $_[0]; |
710
|
0
|
|
|
|
|
|
print "****************************************************************\n"; |
711
|
0
|
|
|
|
|
|
print "Regression '$this->{regname}'\n"; |
712
|
0
|
|
|
|
|
|
print "****************************************************************\n"; |
713
|
|
|
|
|
|
|
sub print1val { |
714
|
1
|
|
|
1
|
|
11
|
no strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
559
|
|
715
|
0
|
0
|
|
0
|
0
|
|
print "$_[1]($_[2])=\t". ((defined($_[0]->{ $_[2] }) ? $_[0]->{ $_[2] } : "intentionally undef")); |
716
|
|
|
|
|
|
|
|
717
|
0
|
|
|
|
|
|
my $ref=$_[0]->{ $_[2] }; |
718
|
|
|
|
|
|
|
|
719
|
0
|
0
|
|
|
|
|
if (ref($ref) eq 'ARRAY') { |
|
|
0
|
|
|
|
|
|
720
|
0
|
|
|
|
|
|
my $arrayref= $ref; |
721
|
0
|
|
|
|
|
|
print " $#$arrayref+1 elements:\n"; |
722
|
0
|
0
|
|
|
|
|
if ($#$arrayref>30) { |
723
|
0
|
|
|
|
|
|
print "\t"; |
724
|
0
|
|
|
|
|
|
for(my $i=0; $i<$#$arrayref+1; ++$i) { print "$i='$arrayref->[$i]';"; } |
|
0
|
|
|
|
|
|
|
725
|
0
|
|
|
|
|
|
print "\n"; |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
else { |
728
|
0
|
|
|
|
|
|
for(my $i=0; $i<$#$arrayref+1; ++$i) { print "\t$i=\t'$arrayref->[$i]'\n"; } |
|
0
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
} |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
elsif (ref($ref) eq 'HASH') { |
732
|
0
|
|
|
|
|
|
my $hashref= $ref; |
733
|
0
|
|
|
|
|
|
print " ".scalar(keys(%$hashref))." elements\n"; |
734
|
0
|
|
|
|
|
|
while (my ($key, $val) = each(%$hashref)) { |
735
|
0
|
|
|
|
|
|
print "\t'$key'=>'$val';\n"; |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
else { |
739
|
0
|
|
|
|
|
|
print " [was scalar]\n"; } |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
0
|
|
|
|
|
|
while (my ($key, $val) = each(%$this)) { |
743
|
0
|
|
|
|
|
|
$this->print1val($key, $key); |
744
|
|
|
|
|
|
|
} |
745
|
0
|
|
|
|
|
|
print "****************************************************************\n"; |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
################################################################ |
749
|
|
|
|
|
|
|
### The Test Program. Invoke as "perl Regression.pm". |
750
|
|
|
|
|
|
|
################################################################ |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
if ($0 eq "Regression.pm") { |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
# Create regression object |
756
|
|
|
|
|
|
|
my $reg = Statistics::Regression->new( "sample regression", [ "const", "someX", "someY" ] ); |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
# Add data points |
759
|
|
|
|
|
|
|
$reg->include( 2.0, [ 1.0, 3.0, -1.0 ] ); |
760
|
|
|
|
|
|
|
$reg->include( 1.0, [ 1.0, 5.0, 2.0 ] ); |
761
|
|
|
|
|
|
|
$reg->include( 20.0, [ 1.0, 31.0, 0.0 ] ); |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
my %inhash= ( const => 1.0, someX => 11.0, someY => 2.0, ignored => "ignored" ); |
764
|
|
|
|
|
|
|
$reg->include( 15.0, \%inhash ); |
765
|
|
|
|
|
|
|
# $reg->include( 15.0, [ 1.0, 11.0, 2.0 ] ); |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
# Print the result |
768
|
|
|
|
|
|
|
$reg->print(); |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
1; |