line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
package Statistics::PCA; |
3
|
|
|
|
|
|
|
#package Statistics::PCA; |
4
|
1
|
|
|
1
|
|
63302
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
105
|
|
6
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
96
|
|
7
|
1
|
|
|
1
|
|
1471
|
use Math::Cephes::Matrix qw(mat); |
|
1
|
|
|
|
|
13023
|
|
|
1
|
|
|
|
|
84
|
|
8
|
1
|
|
|
1
|
|
11
|
use List::Util qw(sum); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
129
|
|
9
|
1
|
|
|
1
|
|
2044
|
use Math::MatrixReal; |
|
1
|
|
|
|
|
56907
|
|
|
1
|
|
|
|
|
70
|
|
10
|
1
|
|
|
1
|
|
1353
|
use Text::SimpleTable; |
|
1
|
|
|
|
|
3348
|
|
|
1
|
|
|
|
|
39
|
|
11
|
1
|
|
|
1
|
|
8
|
use Math::Cephes qw(:utils); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
462
|
|
12
|
1
|
|
|
1
|
|
1682
|
use Contextual::Return; |
|
1
|
|
|
|
|
24514
|
|
|
1
|
|
|
|
|
9
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Statistics::PCA - A simple Perl implementation of Principal Component Analysis. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=cut |
18
|
|
|
|
|
|
|
=head1 VERSION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
This document describes Statistics::PCA version 0.0.1 |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=cut |
23
|
|
|
|
|
|
|
=head1 SYNOPSIS |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
use Statistics::PCA; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Create new Statistics::PCA object. |
28
|
|
|
|
|
|
|
my $pca = Statistics::PCA->new; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Var1 Var2 Var3 Var4... |
31
|
|
|
|
|
|
|
my @Obs1 = (qw/ 32 26 51 12 /); |
32
|
|
|
|
|
|
|
my @Obs2 = (qw/ 17 13 34 35 /); |
33
|
|
|
|
|
|
|
my @Obs3 = (qw/ 10 94 83 45 /); |
34
|
|
|
|
|
|
|
my @Obs4 = (qw/ 3 72 72 67 /); |
35
|
|
|
|
|
|
|
my @Obs5 = (qw/ 10 63 35 34 /); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Load data. Data is loaded as a LIST-of-LISTS (LoL) pointed to by a named argument 'data'. Requires argument for format (see METHODS). |
38
|
|
|
|
|
|
|
$pca->load_data ( { format => 'table', data => [ \@Obs1, \@Obs2, \@Obs3, \@Obs4, \@Obs5 ], } ) ; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Perform the PCA analysis. Takes optional argument 'eigen' (see METHODS). |
41
|
|
|
|
|
|
|
#$pca->pca( { eigen => 'C' } ); |
42
|
|
|
|
|
|
|
$pca->pca(); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# Access results. The return value of this method is context-dependent (see METHODS). To print a report to STDOUT call in VOID-context. |
45
|
|
|
|
|
|
|
$pca->results(); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=cut |
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Principal component analysis (PCA) transforms higher-dimensional data consisting of a number of possibly correlated variables into a smaller number of |
51
|
|
|
|
|
|
|
uncorrelated variables termed principal components (PCs). The higher the ranking of the PCs the greater the amount of |
52
|
|
|
|
|
|
|
variability that the PC accounts for. This PCA procedure involves the calculation of the eigenvalue decomposition using either the Math::Cephes::Matrix or |
53
|
|
|
|
|
|
|
Math::MatrixReal modules (see METHODS) from a data covariance matrix after mean centering the data. See |
54
|
|
|
|
|
|
|
http://en.wikipedia.org/wiki/Principal_component_analysis for more details. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
57
|
|
|
|
|
|
|
=head1 METHODS |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
1
|
|
|
1
|
|
3308
|
use version; our $VERSION = qv('0.0.1'); |
|
1
|
|
|
|
|
2927
|
|
|
1
|
|
|
|
|
10
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
#y////////////////////////////////////////////// CONSTRUCTOR AND DATA LOADING ///////////////////////////////////////// |
64
|
|
|
|
|
|
|
#=fs CONSTRUCTOR AND DATA LOADING |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
#sub diag { |
67
|
|
|
|
|
|
|
# my $self = shift; |
68
|
|
|
|
|
|
|
# print Dumper $self; |
69
|
|
|
|
|
|
|
# return; |
70
|
|
|
|
|
|
|
#} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head2 new |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Create a new Statistics::PCA object. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my $pca = Statistics::PCA->new; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=cut |
79
|
|
|
|
|
|
|
sub new { |
80
|
0
|
|
|
0
|
1
|
|
my $class = shift; |
81
|
0
|
|
|
|
|
|
my $self = {}; |
82
|
0
|
|
|
|
|
|
bless $self, $class; |
83
|
0
|
|
|
|
|
|
return $self; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
#/ this will work for a matrix of values where ALL data is complete - i.e. |
87
|
|
|
|
|
|
|
=head2 load_data |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Used for loading data into object. Data is fed as a reference to a LoL within an anonymous hash using the |
90
|
|
|
|
|
|
|
named argument 'data'. Data may be entered in one of two forms specified by the obligatory named argument 'format'. |
91
|
|
|
|
|
|
|
Data may either be entered in standard 'table' fashion (with rows corresponding to observations and columns corresponding |
92
|
|
|
|
|
|
|
to variables). Thus to enter the following table of data: |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Var1 Var2 Var3 Var4 |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Obs1 32 26 51 12 |
97
|
|
|
|
|
|
|
Obs2 17 13 34 35 |
98
|
|
|
|
|
|
|
Obs3 10 94 83 45 |
99
|
|
|
|
|
|
|
Obs4 3 72 72 67 |
100
|
|
|
|
|
|
|
Obs5 10 63 35 34 ... |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The data is passed as an LoL with the with each nested ARRAY reference corresponding to a row of observations in the |
103
|
|
|
|
|
|
|
data table and the 'format' argument value 'table' as follows: |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Var1 Var2 Var3 Var4 ... |
106
|
|
|
|
|
|
|
my $data = [ |
107
|
|
|
|
|
|
|
[qw/ 32 26 51 12 /], # Obs1 |
108
|
|
|
|
|
|
|
[qw/ 17 13 34 35 /], # Obs2 |
109
|
|
|
|
|
|
|
[qw/ 10 94 83 45 /], # Obs3 |
110
|
|
|
|
|
|
|
[qw/ 3 72 72 67 /], # Obs4 |
111
|
|
|
|
|
|
|
[qw/ 10 63 35 34 /], # Obs5 ... |
112
|
|
|
|
|
|
|
]; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
$pca->load_data ( { format => 'table', data => $data, } ); |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Alternatively you may enter the data in a variable-centric fashion where each nested ARRAY reference corresponds to a |
117
|
|
|
|
|
|
|
single variable within the data (i.e. the transpose of the above table-fashion). To pass the above data in this fashion |
118
|
|
|
|
|
|
|
use the 'format' argument with value 'variable' as follows: |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# Obs1 Obs2 Obs3 Obs4 Obs5 ... |
121
|
|
|
|
|
|
|
my $transpose = [ |
122
|
|
|
|
|
|
|
[qw/ 32 17 10 3 10 /], # Var1 |
123
|
|
|
|
|
|
|
[qw/ 26 13 94 72 63 /], # Var2 |
124
|
|
|
|
|
|
|
[qw/ 51 34 83 72 35 /], # Var3 |
125
|
|
|
|
|
|
|
[qw/ 12 35 45 67 34 /], # Var4 ... |
126
|
|
|
|
|
|
|
]; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
$pca->load_data ( { format => 'variable', data => $transpose, } ) ; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
sub load_data { |
132
|
|
|
|
|
|
|
#my ( $self, $data ) = @_; |
133
|
0
|
|
|
0
|
0
|
|
my ( $self, $h_ref ) = @_; |
134
|
0
|
0
|
0
|
|
|
|
croak qq{\nArguments must be passed as HASH reference.} if ( ( $h_ref ) && ( ref $h_ref ne q{HASH} ) ); |
135
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
|
my $data_dirty = $h_ref->{data}; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
#y clean it now to stop strange internal references |
139
|
0
|
|
|
|
|
|
my $data = _deep_copy_references($data_dirty); |
140
|
|
|
|
|
|
|
|
141
|
0
|
0
|
|
|
|
|
croak qq{\nYou must specify a data format} if ( !exists $h_ref->{format} ); |
142
|
0
|
|
|
|
|
|
my $format_val = $h_ref->{format}; |
143
|
|
|
|
|
|
|
|
144
|
0
|
|
|
0
|
|
|
my %formating = ( table => sub { $self->_transpose($data); }, |
145
|
|
|
|
|
|
|
#y no point in have direct method its so short just put it here |
146
|
|
|
|
|
|
|
#variable => sub { $self->_direct }, # just do the calculations directly and put the data in |
147
|
|
|
|
|
|
|
variable => sub { |
148
|
0
|
|
|
0
|
|
|
$self->{summaries}{var_num} = scalar ( @{$data} ); |
|
0
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
|
$self->{summaries}{var_length} = scalar ( @{$data->[0]} ); |
|
0
|
|
|
|
|
|
|
150
|
0
|
|
|
|
|
|
$self->{data}{transpose_temp} = $data; return; }, |
|
0
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
|
); |
152
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
my $format = $formating{$format_val}; |
154
|
0
|
0
|
|
|
|
|
croak qq{\nYou must pass a recognised option: \"table\", \"variable\"} if ( !defined $format ); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
#y &{$cd}() = &$cd() = &$cd = $cd->(); |
157
|
0
|
|
|
|
|
|
$format->(); |
158
|
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
$self->_data_checks; |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
#y adjust flag |
162
|
0
|
|
|
|
|
|
$self->{flags}{data_loaded} = 1; |
163
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
return; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub _direct { |
169
|
0
|
|
|
0
|
|
|
my ( $self, $a_ref ) = @_; |
170
|
|
|
|
|
|
|
|
171
|
0
|
|
|
|
|
|
my $var_num = scalar ( @{$a_ref} ); |
|
0
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
|
my $var_length = scalar ( @{$a_ref->[0]} ); |
|
0
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
|
$self->{data}{transpose_temp} = $a_ref; |
176
|
0
|
|
|
|
|
|
$self->{summaries}{var_num} = $var_num; |
177
|
0
|
|
|
|
|
|
$self->{summaries}{var_length} = $var_length; |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
|
return; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub _transpose { |
183
|
|
|
|
|
|
|
|
184
|
0
|
|
|
0
|
|
|
my ( $self, $a_ref ) = @_; |
185
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
my $var_length = scalar ( @{$a_ref} ); |
|
0
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
0
|
|
|
|
|
|
my $done = []; |
189
|
0
|
|
|
|
|
|
for my $col ( 0..$#{$a_ref->[0]} ) { |
|
0
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
|
push @{$done}, [ map { $_->[$col] } @{$a_ref} ]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
my $var_num = scalar ( @{$done} ); |
|
0
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
$self->{data}{transpose_temp} = $done; |
196
|
0
|
|
|
|
|
|
$self->{summaries}{var_num} = $var_num; |
197
|
0
|
|
|
|
|
|
$self->{summaries}{var_length} = $var_length; |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
return; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub _data_checks { |
203
|
|
|
|
|
|
|
|
204
|
0
|
|
|
0
|
|
|
my $self = shift; |
205
|
|
|
|
|
|
|
|
206
|
0
|
|
|
|
|
|
my $data_a_ref = $self->{data}{transpose_temp}; |
207
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
|
my $rows = $self->{summaries}{var_num}; |
209
|
0
|
0
|
0
|
|
|
|
croak qq{\nI need some data - there are too few rows in your data.\n} if ( !$rows || $rows == 1 ); |
210
|
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
|
my $cols = $self->{summaries}{var_length}; |
212
|
0
|
0
|
0
|
|
|
|
croak qq{\nI need some data - there are too few columns in your data.\n} if ( !$cols || $cols == 1 ); |
213
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
|
for my $row (@{$data_a_ref}) { |
|
0
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
216
|
0
|
0
|
|
|
|
|
croak qq{\n\nData set must be passed as ARRAY references.\n} if ( ref $row ne q{ARRAY} ); |
217
|
0
|
0
|
|
|
|
|
croak qq{\n\nAll rows must have the same number of columns.\n} if ( scalar( @{$row} ) != $cols ); |
|
0
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
#/ all fine and dandy. |
222
|
0
|
|
|
|
|
|
print qq{\nData has $rows variables and $cols observations. Passing data to object.}; |
223
|
0
|
|
|
|
|
|
$self->{data}{transpose} = $data_a_ref; |
224
|
0
|
|
|
|
|
|
delete $self->{data}{transpose_temp}; |
225
|
|
|
|
|
|
|
|
226
|
0
|
|
|
|
|
|
return; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
#=fe |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
#y/////////////////////////////////////////////////////// ANALYSIS //////////////////////////////////////////////////// |
233
|
|
|
|
|
|
|
#=fs ANALYSIS |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head2 pca |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
To perform the PCA analysis. This method takes the optional named argument 'eigen' that takes the values 'M' or 'C' to |
238
|
|
|
|
|
|
|
calculate the eigenvalue decomposition using either the Math::MatrixReal or Math::Cephes::Matrix modules respectively |
239
|
|
|
|
|
|
|
(defaults to 'M' without argument). |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
$pca->pca(); |
242
|
|
|
|
|
|
|
$pca->pca( { eigen => 'M' } ); |
243
|
|
|
|
|
|
|
$pca->pca( { eigen => 'C' } ); |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut |
246
|
|
|
|
|
|
|
sub pca { |
247
|
|
|
|
|
|
|
|
248
|
0
|
|
|
0
|
1
|
|
my ( $self, $h_ref ) = @_; |
249
|
0
|
0
|
0
|
|
|
|
croak qq{\nArguments must be passed as HASH reference.} if ( ( $h_ref ) && ( ref $h_ref ne q{HASH} ) ); |
250
|
|
|
|
|
|
|
|
251
|
0
|
0
|
|
|
|
|
exists $h_ref->{eigen} || print qq{\nUsing default option of Math::MatrixReal to calculate eigen values.}; |
252
|
|
|
|
|
|
|
|
253
|
0
|
0
|
|
|
|
|
my $eigen = exists $h_ref->{eigen} ? $h_ref->{eigen} : q{M}; |
254
|
|
|
|
|
|
|
|
255
|
0
|
0
|
|
|
|
|
croak qq{\nI don\'t recognise that value for the \'eigen\' option - requires \'M\' or \'C\' (defaults to \'M\' without option).} |
256
|
|
|
|
|
|
|
if ( $eigen !~ /\A[MC]\z/xms ); |
257
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
$self->_calculate_averages; |
259
|
0
|
|
|
|
|
|
$self->_calculate_adjustment; |
260
|
0
|
|
|
|
|
|
$self->_calculate_CVs; |
261
|
|
|
|
|
|
|
|
262
|
0
|
0
|
|
|
|
|
if ( $eigen eq q{M} ) { $self->_calculate_eigens_matrixreal; } |
|
0
|
0
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# overkill here |
264
|
0
|
|
|
|
|
|
elsif ( $eigen eq q{C} ) { $self->_calculate_eigens_cephes; } |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
#y re-orders eigenvalues and eigenvectors according to eigenvalue - thus everything from here is in correct order |
267
|
0
|
|
|
|
|
|
$self->_rank_eigenvalues; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
#y we have ranked data - should put in new positions? so now we do the calculations |
270
|
0
|
|
|
|
|
|
$self->_calculate_components; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
#y generates the prcomp eigenvectors calculation - returns it as an object and also stores the raw data as self->{self}{eigen} |
273
|
0
|
|
|
|
|
|
$self->_transform; |
274
|
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
|
return; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub _calculate_averages { |
279
|
0
|
|
|
0
|
|
|
my $self = shift; |
280
|
0
|
|
|
|
|
|
my $new_data = $self->{data}{transpose}; |
281
|
|
|
|
|
|
|
|
282
|
0
|
|
|
|
|
|
my $totals_ref = []; |
283
|
|
|
|
|
|
|
|
284
|
0
|
|
|
|
|
|
for my $row ( 0..($self->{summaries}{var_num}-1) ) { |
285
|
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
|
my $sum = sum @{$new_data->[$row]}; |
|
0
|
|
|
|
|
|
|
287
|
0
|
|
|
|
|
|
my $length = scalar ( @{$new_data->[$row]} ); |
|
0
|
|
|
|
|
|
|
288
|
0
|
|
|
|
|
|
my $average = $sum / $length; |
289
|
|
|
|
|
|
|
|
290
|
0
|
|
|
|
|
|
push @{$totals_ref}, { sum => $sum, length => $length, average => $average}; |
|
0
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
$self->{summaries}{totals} = $totals_ref; |
295
|
0
|
|
|
|
|
|
return; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub _calculate_adjustment { |
299
|
0
|
|
|
0
|
|
|
my $self = shift; |
300
|
|
|
|
|
|
|
|
301
|
0
|
|
|
|
|
|
my $trans = $self->{data}{transpose}; |
302
|
|
|
|
|
|
|
|
303
|
0
|
|
|
|
|
|
my $totals = $self->{summaries}{totals}; |
304
|
|
|
|
|
|
|
|
305
|
0
|
|
|
|
|
|
my $adjust = []; |
306
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
for my $row ( 0..($self->{summaries}{var_num}-1) ) { |
308
|
|
|
|
|
|
|
|
309
|
0
|
|
|
|
|
|
@{$adjust->[$row]} = map { $_ - $totals->[$row]{average} } @{$trans->[$row]}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
|
$self->{data}{adjusted} = $adjust; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub _calculate_CVs { |
317
|
0
|
|
|
0
|
|
|
my $self = shift; |
318
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
|
my $adjusted = $self->{data}{adjusted}; |
320
|
0
|
|
|
|
|
|
my $var_num = $self->{summaries}{var_num}; |
321
|
0
|
|
|
|
|
|
my $length = $self->{summaries}{var_length}; |
322
|
0
|
|
|
|
|
|
my $sum = 0; |
323
|
0
|
|
|
|
|
|
my $covariance_matrix_ref = []; |
324
|
|
|
|
|
|
|
|
325
|
0
|
|
|
|
|
|
for my $row ( 0..($var_num-1) ) { |
326
|
|
|
|
|
|
|
|
327
|
0
|
|
|
|
|
|
for my $col ( 0..($var_num-1) ) { |
328
|
|
|
|
|
|
|
|
329
|
0
|
|
|
|
|
|
my $sum = 0; |
330
|
0
|
|
|
|
|
|
for my $iteration (0..$#{$adjusted->[0]}) { |
|
0
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
|
332
|
0
|
|
|
|
|
|
my $val = $adjusted->[$col][$iteration] * $adjusted->[$row][$iteration]; |
333
|
|
|
|
|
|
|
|
334
|
0
|
|
|
|
|
|
$sum += $val; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
0
|
|
|
|
|
|
my $cv = $sum / ($length-1); |
338
|
|
|
|
|
|
|
|
339
|
0
|
|
|
|
|
|
$covariance_matrix_ref->[$col][$row] = $cv; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
0
|
|
|
|
|
|
$self->{summaries}{covariate_matrix} = $covariance_matrix_ref; |
344
|
0
|
|
|
|
|
|
return; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
sub _calculate_eigens_matrixreal { |
349
|
0
|
|
|
0
|
|
|
my $self = shift; |
350
|
|
|
|
|
|
|
|
351
|
0
|
|
|
|
|
|
my $covariance_matrix_ref = $self->{summaries}{covariate_matrix}; |
352
|
|
|
|
|
|
|
|
353
|
0
|
|
|
|
|
|
my $covariance_matrix_perl = Math::MatrixReal->new_from_cols ( $covariance_matrix_ref ) ; |
354
|
0
|
|
|
|
|
|
my ($eigen_val_perl, $eigen_vec_perl) = $covariance_matrix_perl->sym_diagonalize(); |
355
|
0
|
|
|
|
|
|
my $eigen_vec_perl_T = ~$eigen_vec_perl; |
356
|
0
|
|
|
|
|
|
my $eigen_val_perl_T = ~$eigen_val_perl; |
357
|
|
|
|
|
|
|
|
358
|
0
|
|
|
|
|
|
my $overall_alt = []; |
359
|
0
|
|
|
|
|
|
@{$overall_alt} = map { +{ solution => $_+1, eigenvalue => $eigen_val_perl_T->[0][0][$_], eigenvector => $eigen_vec_perl_T->[0][$_] } } (0..$#{$eigen_val_perl_T->[0][0]}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
|
361
|
0
|
|
|
|
|
|
$self->{summaries}{eigen}{raw} = $overall_alt; |
362
|
|
|
|
|
|
|
|
363
|
0
|
|
|
|
|
|
return; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub _calculate_eigens_cephes { |
367
|
0
|
|
|
0
|
|
|
my $self = shift; |
368
|
|
|
|
|
|
|
|
369
|
0
|
|
|
|
|
|
my $covariance_matrix_ref = $self->{summaries}{covariate_matrix}; |
370
|
0
|
|
|
|
|
|
my $covariance_matrix = mat ( $covariance_matrix_ref ) ; |
371
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
|
my ($eigen_val, $eigen_vec) = $covariance_matrix->eigens(); |
373
|
0
|
|
|
|
|
|
my $eigen_vec_ref = $eigen_vec->coef; |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
#print Dumper $eigen_val, $eigen_vec_ref; |
376
|
|
|
|
|
|
|
#print Dumper $eigen_val_perl_T->[0][0], $eigen_vec_perl_T->[0]; |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
#y we don´t need it but we will force perl to intepret {} as anon HASH and not BATCH with '+' |
379
|
0
|
|
|
|
|
|
my $overall = []; |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
#@{$overall} = map { +{ solution => $_, eigenvalue => $eigen_val->[$_], eigenvector => $eigen_vec_ref->[$_] } } (0..$#{$eigen_val}); |
382
|
0
|
|
|
|
|
|
@{$overall} = map { +{ solution => $_+1, eigenvalue => $eigen_val->[$_], eigenvector => $eigen_vec_ref->[$_] } } (0..$#{$eigen_val}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
|
$self->{summaries}{eigen}{raw} = $overall; |
385
|
0
|
|
|
|
|
|
return; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
sub _deep_copy_references { |
389
|
0
|
|
|
0
|
|
|
my $ref = shift; |
390
|
0
|
0
|
|
|
|
|
if (!ref $ref) { $ref; } |
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
391
|
|
|
|
|
|
|
#y/ this check for a_refs in which case we will access the whole thing as @{$a_ref} |
392
|
|
|
|
|
|
|
elsif (ref $ref eq q{ARRAY} ) { |
393
|
0
|
|
|
|
|
|
[ map { _deep_copy_references($_) } @{$ref} ]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
#y/ this checks for hash refs - in which case it will be handled by fully derferencing: %{$ref} |
396
|
|
|
|
|
|
|
elsif (ref $ref eq q{HASH} ) { |
397
|
|
|
|
|
|
|
#y intepreter forced to read this as an anon HASH and not BATCH by prepending + |
398
|
0
|
|
|
|
|
|
+ { map { $_ => _deep_copy_references($ref->{$_}) } (keys %{$ref}) }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
} |
400
|
0
|
|
|
|
|
|
else { die "what type is $_?" } |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub _rank_eigenvalues { |
404
|
0
|
|
|
0
|
|
|
my $self = shift; |
405
|
0
|
|
|
|
|
|
my $overall = $self->{summaries}{eigen}{raw}; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
#/ deep copy to stop fuss! |
408
|
0
|
|
|
|
|
|
my $overall_clean = _deep_copy_references($overall); |
409
|
|
|
|
|
|
|
|
410
|
0
|
|
|
|
|
|
my $overall_sorted = []; |
411
|
|
|
|
|
|
|
#/ Can't use "my $a" in sort comparison at The_PCA_method.pl line 255 - cos $a was declared as lexical and $a/$b are globals... |
412
|
0
|
|
|
|
|
|
@{$overall_sorted} = sort { $b->{eigenvalue} <=> $a->{eigenvalue} } @{$overall_clean}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
|
414
|
0
|
|
|
|
|
|
$self->{summaries}{eigen}{sorted} = $overall_sorted; |
415
|
|
|
|
|
|
|
|
416
|
0
|
|
|
|
|
|
$self->_add_rank; |
417
|
0
|
|
|
|
|
|
return; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
sub _add_rank { |
421
|
0
|
|
|
0
|
|
|
my $self = shift; |
422
|
0
|
|
|
|
|
|
my $overall_sorted = $self->{summaries}{eigen}{sorted}; |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
#@{$overall_sorted} = sort { $b->{eigenvalue} <=> $a->{eigenvalue} } map { my $temp = $_; $overall_sorted->[$temp]{rank} = $temp+1 } (0..$#{$overall_clean}); |
425
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
for my $pos ( (0..$#{$overall_sorted}) ) { |
|
0
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
#my $overall_sorted->[$pos]{rank} = $pos; |
428
|
0
|
|
|
|
|
|
$overall_sorted->[$pos]{PC} = $pos+1; |
429
|
|
|
|
|
|
|
} |
430
|
0
|
|
|
|
|
|
return; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
sub _calculate_components { |
434
|
0
|
|
|
0
|
|
|
my $self = shift; |
435
|
0
|
|
|
|
|
|
my $sorted_eigen = $self->{summaries}{eigen}{sorted}; |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# we will calculate stdev - this is EITHER stdev of the transformed data OR more generally the stdev of the eigenvalue of the solution. |
438
|
|
|
|
|
|
|
|
439
|
0
|
|
|
|
|
|
my $total_variance = sum map { $_->{eigenvalue} } @{$sorted_eigen}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
|
441
|
0
|
|
|
|
|
|
my $cumulative_variance = 0; |
442
|
|
|
|
|
|
|
|
443
|
0
|
|
|
|
|
|
for my $hash_ref (@{$sorted_eigen}) { |
|
0
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# use this twice to unpack it |
446
|
0
|
|
|
|
|
|
my $variance_aka_eigenvalue = $hash_ref->{eigenvalue}; |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
#my $stdev = sqrt($variance_aka_eigenvalue); |
449
|
0
|
|
|
|
|
|
$hash_ref->{stdev} = sqrt($variance_aka_eigenvalue); |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# use this twice so put it in variable |
452
|
|
|
|
|
|
|
#$hash_ref->{proportion_of_variance} = ( $variance_aka_eigenvalue / $total_variance ); |
453
|
0
|
|
|
|
|
|
my $proportion_of_variance = ($variance_aka_eigenvalue / $total_variance); |
454
|
0
|
|
|
|
|
|
$hash_ref->{proportion_of_variance} = $proportion_of_variance; |
455
|
|
|
|
|
|
|
|
456
|
0
|
|
|
|
|
|
$cumulative_variance += $proportion_of_variance; |
457
|
0
|
|
|
|
|
|
$hash_ref->{cumulative_variance} = $cumulative_variance; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
0
|
|
|
|
|
|
$self->{summaries}{total_variance} = $total_variance; |
461
|
|
|
|
|
|
|
|
462
|
0
|
|
|
|
|
|
return; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
sub _create_row_matrix_of_eigenvectors { |
466
|
|
|
|
|
|
|
|
467
|
0
|
|
|
0
|
|
|
my $self = shift; |
468
|
|
|
|
|
|
|
|
469
|
0
|
|
|
|
|
|
my $sorted_eigen = $self->{summaries}{eigen}{sorted}; |
470
|
0
|
|
|
|
|
|
my $eigen_vectors = []; |
471
|
|
|
|
|
|
|
|
472
|
0
|
|
|
|
|
|
@{$eigen_vectors} = map { $_->{eigenvector} } @{$sorted_eigen}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
#y we turn it into a matrix object - should use _from_rows? |
475
|
0
|
|
|
|
|
|
my $eigen_matrix_object = Math::MatrixReal->new_from_cols( $eigen_vectors ); |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
#y we take the transpose which will be multiplied by the row_adjusted_matrix_object - i.e. transpose |
478
|
0
|
|
|
|
|
|
my $row_eigen_matrix_object = ~$eigen_matrix_object; |
479
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
my $eigen_vectors_copy = _deep_copy_references ($eigen_vectors); |
481
|
0
|
|
|
|
|
|
$self->{pca}{eigenvectors} = $eigen_vectors_copy; |
482
|
|
|
|
|
|
|
|
483
|
0
|
|
|
|
|
|
return $row_eigen_matrix_object; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub _create_row_matrix_of_adjusted { |
487
|
0
|
|
|
0
|
|
|
my $self = shift; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# unpack adjusted data |
490
|
0
|
|
|
|
|
|
my $adjusted = $self->{data}{adjusted}; |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
#my $adjusted_data_m = Math::MatrixReal->new_from_cols( $pca->{data}{adjusted} ); |
493
|
0
|
|
|
|
|
|
my $adjusted_data_matrix_object = Math::MatrixReal->new_from_cols( $adjusted ); |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
#y take transpose |
496
|
0
|
|
|
|
|
|
my $row_adjusted_data_matrix = ~$adjusted_data_matrix_object; |
497
|
|
|
|
|
|
|
|
498
|
0
|
|
|
|
|
|
return $row_adjusted_data_matrix; |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub _transform { |
503
|
|
|
|
|
|
|
|
504
|
0
|
|
|
0
|
|
|
my $self = shift; |
505
|
0
|
|
|
|
|
|
my $row_mat_Eigen_object = $self->_create_row_matrix_of_eigenvectors; |
506
|
0
|
|
|
|
|
|
my $row_mat_Adjust_object = $self->_create_row_matrix_of_adjusted; |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
#y this is the actual pca output - but needs to be transposed |
509
|
0
|
|
|
|
|
|
my $product_matrix = $row_mat_Eigen_object->multiply($row_mat_Adjust_object); |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
#y/ code from MatrixReal: map { $this->[0][$_] = [ @$empty ] } ( 0 .. $rows-1); |
512
|
|
|
|
|
|
|
#y/ i.e. all matrix data is put into ->[0] |
513
|
0
|
|
|
|
|
|
$self->{pca}{transform} = $product_matrix->[0]; |
514
|
|
|
|
|
|
|
|
515
|
0
|
|
|
|
|
|
return; |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
#=fe |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
#y/////////////////////////////////////////////////////// RESULTS ///////////////////////////////////////////////////// |
523
|
|
|
|
|
|
|
#=fs RESULTS |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=head2 results |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
Used to access the results of the PCA analysis. This method is context-dependent and will return a variety of different |
528
|
|
|
|
|
|
|
values depending on whether it is called in VOID or LIST context and the arguments its passed. |
529
|
|
|
|
|
|
|
In VOID-context it prints a formated table of the computed results to STDOUT. |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
$pca->results; |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
In LIST context this method takes an obligatory argument that determines its return values. To return an ordered list |
534
|
|
|
|
|
|
|
(ordered by PC ranking) of the proportions of total variance of each PC pass 'proportion' to the method. |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
my @list = $pca->results('proportion'); |
537
|
|
|
|
|
|
|
print qq{\nOrdered list of individual proportions of variance: @list}; |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
To return an ordered list of the cumulative variance of the PCs pass argument 'cumulative'. |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
@list = $pca->results('cumulative'); |
542
|
|
|
|
|
|
|
print qq{\nOrdered list of cumulative variance of the PCs: @list}; |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
To return an ordered list of the individual standard deviations of the PCs pass argument 'stdev'. |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
@list = $pca->results('stdev'); |
547
|
|
|
|
|
|
|
print qq{\nOrdered list of individual standard deviations of the PCs: @list}; |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
To return an ordered list of the individual eigenvalues of the PCs pass argument 'eigenvalue'. |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
@list = $pca->results('eigenvalue'); |
552
|
|
|
|
|
|
|
print qq{\nOrdered list of individual eigenvalues of the PCs: @list}; |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
To return an ordered list of ARRAY references containing the eigenvectors of the PCs pass argument 'eigenvector'. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# Returns an ordered list of array references containing the eigenvectors for the components |
557
|
|
|
|
|
|
|
@list = $pca->results('eigenvector'); |
558
|
|
|
|
|
|
|
use Data::Dumper; |
559
|
|
|
|
|
|
|
print Dumper \@list; |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
To return an ordered list of ARRAY references containing more detailed information about each PC use the 'full' |
562
|
|
|
|
|
|
|
argument. Each nested ARRAY reference consists of an ordered list of: PC rank, PC stdev, PC proportion of variance, |
563
|
|
|
|
|
|
|
PC cumulative_variance, PC eigenvalue and a further nested ARRAY reference containing the PC eigenvector. |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
@list = $pca->results('full'); |
566
|
|
|
|
|
|
|
for my $i (@list) { |
567
|
|
|
|
|
|
|
print qq{\nPC rank: $i->[0]} |
568
|
|
|
|
|
|
|
. qq{\nPC stdev $i->[1]} |
569
|
|
|
|
|
|
|
. qq{\nPC proportion of variance $i->[2]} |
570
|
|
|
|
|
|
|
. qq{\nPC cumulative variance $i->[3]} |
571
|
|
|
|
|
|
|
. qq{\nPC eigenvalue $i->[4]} |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
To return an ordered LoL of the transformed data for each of the PCs pass 'transformed' to the method. |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
@list = $pca->results('transformed'); |
577
|
|
|
|
|
|
|
print qq{\nThe transformed data for 'the' principal component (first PC): @{$list[0]} }; |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=cut |
580
|
|
|
|
|
|
|
sub results { |
581
|
0
|
|
|
0
|
1
|
|
my ( $self, $arg ) = @_; |
582
|
|
|
|
|
|
|
return ( |
583
|
0
|
|
|
0
|
|
|
VOID { $self->_print } |
584
|
|
|
|
|
|
|
# either have specific methods for type of return! or simply have this and an arguement |
585
|
0
|
|
|
0
|
|
|
LIST { $self->_results_in_list($arg) } |
586
|
|
|
|
|
|
|
#y total variance is popintless without individual and if you´ve got individual total is trivial so leave these returns are they are. |
587
|
|
|
|
|
|
|
# nao faz sentido - nao eh uma teste |
588
|
|
|
|
|
|
|
#BOOL { $F > $standard_F ? 1 : undef; } |
589
|
0
|
|
|
|
|
|
); |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
sub _print { |
593
|
|
|
|
|
|
|
|
594
|
0
|
|
|
0
|
|
|
my $self = shift; |
595
|
0
|
|
|
|
|
|
print qq{\n=======================\nRESULTS OF PCA ANALYSIS\n=======================\n}; |
596
|
|
|
|
|
|
|
|
597
|
0
|
|
|
|
|
|
$self->print_total_variance; |
598
|
0
|
|
|
|
|
|
$self->print_variance; |
599
|
0
|
|
|
|
|
|
$self->print_eigenvectors; |
600
|
0
|
|
|
|
|
|
$self->print_transform; |
601
|
|
|
|
|
|
|
|
602
|
0
|
|
|
|
|
|
return; |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
#/ should really get the vectors directly from summaries and not re-enter them in the object!!! - especially now that you can just use rank for PC number |
606
|
|
|
|
|
|
|
sub print_eigenvectors { |
607
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
608
|
0
|
|
|
|
|
|
$self->_print_private(q{eigenvectors}); |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
sub print_transform { |
612
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
613
|
0
|
|
|
|
|
|
$self->_print_private(q{transform}); |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
sub print_total_variance { |
617
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
618
|
0
|
|
|
|
|
|
print qq{\nTotal Variance = }, sprintf (q{%.8f}, $self->{summaries}{total_variance}), qq{\n}; |
619
|
0
|
|
|
|
|
|
return; |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
sub print_variance { |
623
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
624
|
|
|
|
|
|
|
|
625
|
0
|
|
|
|
|
|
my $sorted_eigen = $self->{summaries}{eigen}{sorted}; |
626
|
|
|
|
|
|
|
|
627
|
0
|
|
|
|
|
|
print qq{\nTable of Standard Deviations and Variances:\n}; |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# just create a first columnm that´s empty for names of rows |
630
|
0
|
|
|
|
|
|
my @config_full = ( [22, q{}] ); |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
# column calculations... |
633
|
|
|
|
|
|
|
#/ really ought to get PC name from rank attribute |
634
|
|
|
|
|
|
|
#my @config = map { [ 12, q{PC_}.$_ ] } ( 1..(scalar (@{$sorted_eigen})) ); |
635
|
|
|
|
|
|
|
#my @config = map { [ 12, q{PC_}.$_->{rank} ] } ( 0..(scalar ($#{$sorted_eigen})) ); |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
#r/ PC is rank - i.e. it is the 'principal' compoenent |
638
|
0
|
|
|
|
|
|
my @config = map { [ 12, q{PC_}.$_->{PC} ] } (@{$sorted_eigen}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
# make the actual configuring array |
641
|
0
|
|
|
|
|
|
push @config_full, @config; |
642
|
|
|
|
|
|
|
|
643
|
0
|
|
|
|
|
|
my $table = Text::SimpleTable->new(@config_full); |
644
|
|
|
|
|
|
|
|
645
|
0
|
|
|
|
|
|
my @row1 = (); # no need to initialise, but whatever... |
646
|
0
|
|
|
|
|
|
for my $hash_ref (@{$sorted_eigen}) { push @row1, sprintf (q{%.8f}, $hash_ref->{stdev}); } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
647
|
0
|
|
|
|
|
|
$table->row( q{Standard Deviation}, @row1 ); |
648
|
0
|
|
|
|
|
|
$table->hr; |
649
|
|
|
|
|
|
|
|
650
|
0
|
|
|
|
|
|
my @row2 = (); # no need to initialise, but whatever... |
651
|
0
|
|
|
|
|
|
for my $hash_ref (@{$sorted_eigen}) { push @row2, sprintf (q{%.8f}, $hash_ref->{proportion_of_variance}); } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
652
|
0
|
|
|
|
|
|
$table->row( q{Proportion of Variance}, @row2 ); |
653
|
0
|
|
|
|
|
|
$table->hr; |
654
|
|
|
|
|
|
|
|
655
|
0
|
|
|
|
|
|
my @row3 = (); # no need to initialise, but whatever... |
656
|
0
|
|
|
|
|
|
for my $hash_ref (@{$sorted_eigen}) { push @row3, sprintf (q{%.8f}, $hash_ref->{cumulative_variance}); } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
657
|
0
|
|
|
|
|
|
$table->row( q{Cumulative Variance}, @row3 ); |
658
|
|
|
|
|
|
|
|
659
|
0
|
|
|
|
|
|
print $table->draw; |
660
|
0
|
|
|
|
|
|
return; |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
sub _print_private { |
664
|
0
|
|
|
0
|
|
|
my ( $self, $arg ) = @_; |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
#/ twat used numeric == |
667
|
0
|
0
|
|
|
|
|
$arg eq q{eigenvectors} and print qq{\nTable of vectors:\n}; |
668
|
0
|
0
|
|
|
|
|
$arg eq q{transform} and print qq{\nTable of Transformed data:\n}; |
669
|
|
|
|
|
|
|
|
670
|
0
|
|
|
|
|
|
my $blah = $self->{pca}{$arg}; |
671
|
|
|
|
|
|
|
|
672
|
0
|
|
|
|
|
|
my @config_full = ( [5, q{}] ); |
673
|
|
|
|
|
|
|
#y column calculations... |
674
|
0
|
|
|
|
|
|
my @config = map { [ 12, q{PC_}.$_ ] } ( 1..(scalar (@{$blah})) ); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
675
|
0
|
|
|
|
|
|
push @config_full, @config; |
676
|
|
|
|
|
|
|
|
677
|
0
|
|
|
|
|
|
my $t2 = Text::SimpleTable->new(@config_full); |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
#y all have same component number so who gives a shit |
680
|
0
|
|
|
|
|
|
for my $row (0..$#{$blah->[0]}) { |
|
0
|
|
|
|
|
|
|
681
|
0
|
|
|
|
|
|
my @data; |
682
|
0
|
|
|
|
|
|
for my $col (0..$#{$blah}) { |
|
0
|
|
|
|
|
|
|
683
|
0
|
|
|
|
|
|
push @data, sprintf (q{%.8f}, $blah->[$col][$row] ); |
684
|
|
|
|
|
|
|
} |
685
|
0
|
|
|
|
|
|
$t2->row( $row+1, @data ); |
686
|
|
|
|
|
|
|
} |
687
|
0
|
|
|
|
|
|
print $t2->draw; |
688
|
0
|
|
|
|
|
|
return; |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
sub _results_in_list { |
692
|
0
|
|
|
0
|
|
|
my ( $self, $arg ) = @_; |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
#/ twat need to make ALL of sorted an array and dereference each for key using map |
695
|
|
|
|
|
|
|
#y really need to re-write the first ref of these due to code re-usage |
696
|
0
|
|
|
0
|
|
|
my %options = ( cumulative => sub { ( map { $_->{cumulative_variance} } @{$self->{summaries}{eigen}{sorted}} ) }, |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
697
|
0
|
|
|
0
|
|
|
proportion => sub { ( map { $_->{proportion_of_variance} } @{$self->{summaries}{eigen}{sorted}} ) }, |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
698
|
0
|
|
|
0
|
|
|
stdev => sub { ( map { $_->{stdev} } @{$self->{summaries}{eigen}{sorted}} ) }, |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
699
|
0
|
|
|
0
|
|
|
eigenvalue => sub { ( map { $_->{eigenvalue} } @{$self->{summaries}{eigen}{sorted}} ) }, |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
#y these are already in the form of array refs |
701
|
|
|
|
|
|
|
#eigenvector => sub { ( map { $_->{eigenvector} } @{$self->{summaries}{eigen}{sorted}} ) }, |
702
|
0
|
|
|
0
|
|
|
eigenvector => sub { ( @{$self->{pca}{eigenvectors}} ) }, |
|
0
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
#y put it into ordered list |
704
|
|
|
|
|
|
|
# to convert this one we need to convert the whole thing to a numeric iterator and dereference each accordingly - not worth it |
705
|
0
|
|
|
|
|
|
full => sub { ( map { [ $_->{PC}, $_->{stdev}, $_->{proportion_of_variance}, |
|
0
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
$_->{cumulative_variance}, $_->{eigenvalue}, $_->{eigenvector}, ] |
707
|
0
|
|
|
0
|
|
|
} @{$self->{summaries}{eigen}{sorted}} ) }, |
708
|
0
|
|
|
0
|
|
|
transformed => sub { ( @{$self->{pca}{transform}} ) }, |
|
0
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
0
|
|
|
|
|
|
); |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
#/ either use exists on the key value - OR - assign it to a variable and check the variable for defindness... e.g. my $setting =.... |
713
|
|
|
|
|
|
|
#croak qq{\nYou must pass a recognised option: \"cumulative\", \"proportion\"} if ( !exists |
714
|
0
|
|
|
|
|
|
my $setting = $options{$arg}; |
715
|
0
|
0
|
|
|
|
|
croak qq{\nYou must pass a recognised option: \"cumulative\", \"proportion\", \"stdev\", \"eigenvalue\"...} if ( !defined $setting ); |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
#y &{$cd}(); |
718
|
|
|
|
|
|
|
#y &$cd(); |
719
|
|
|
|
|
|
|
#y &$cd; |
720
|
|
|
|
|
|
|
#y $cd->(); |
721
|
|
|
|
|
|
|
|
722
|
0
|
|
|
|
|
|
$setting->(); |
723
|
|
|
|
|
|
|
# return; |
724
|
|
|
|
|
|
|
} |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
#=fe |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
__END__ |