File Coverage

blib/lib/Statistics/PCA.pm
Criterion Covered Total %
statement 30 329 9.1
branch 0 38 0.0
condition 0 12 0.0
subroutine 10 47 21.2
pod 3 8 37.5
total 43 434 9.9


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2             package Statistics::PCA;
3             #package Statistics::PCA;
4 1     1   63302 use strict;
  1         2  
  1         40  
5 1     1   5 use warnings;
  1         2  
  1         105  
6 1     1   5 use Carp;
  1         7  
  1         96  
7 1     1   1471 use Math::Cephes::Matrix qw(mat);
  1         13023  
  1         84  
8 1     1   11 use List::Util qw(sum);
  1         2  
  1         129  
9 1     1   2044 use Math::MatrixReal;
  1         56907  
  1         70  
10 1     1   1353 use Text::SimpleTable;
  1         3348  
  1         39  
11 1     1   8 use Math::Cephes qw(:utils);
  1         2  
  1         462  
12 1     1   1682 use Contextual::Return;
  1         24514  
  1         9  
13             =head1 NAME
14              
15             Statistics::PCA - A simple Perl implementation of Principal Component Analysis.
16              
17             =cut
18             =head1 VERSION
19              
20             This document describes Statistics::PCA version 0.0.1
21              
22             =cut
23             =head1 SYNOPSIS
24              
25             use Statistics::PCA;
26              
27             # Create new Statistics::PCA object.
28             my $pca = Statistics::PCA->new;
29              
30             # Var1 Var2 Var3 Var4...
31             my @Obs1 = (qw/ 32 26 51 12 /);
32             my @Obs2 = (qw/ 17 13 34 35 /);
33             my @Obs3 = (qw/ 10 94 83 45 /);
34             my @Obs4 = (qw/ 3 72 72 67 /);
35             my @Obs5 = (qw/ 10 63 35 34 /);
36              
37             # Load data. Data is loaded as a LIST-of-LISTS (LoL) pointed to by a named argument 'data'. Requires argument for format (see METHODS).
38             $pca->load_data ( { format => 'table', data => [ \@Obs1, \@Obs2, \@Obs3, \@Obs4, \@Obs5 ], } ) ;
39              
40             # Perform the PCA analysis. Takes optional argument 'eigen' (see METHODS).
41             #$pca->pca( { eigen => 'C' } );
42             $pca->pca();
43              
44             # Access results. The return value of this method is context-dependent (see METHODS). To print a report to STDOUT call in VOID-context.
45             $pca->results();
46            
47             =cut
48             =head1 DESCRIPTION
49              
50             Principal component analysis (PCA) transforms higher-dimensional data consisting of a number of possibly correlated variables into a smaller number of
51             uncorrelated variables termed principal components (PCs). The higher the ranking of the PCs the greater the amount of
52             variability that the PC accounts for. This PCA procedure involves the calculation of the eigenvalue decomposition using either the Math::Cephes::Matrix or
53             Math::MatrixReal modules (see METHODS) from a data covariance matrix after mean centering the data. See
54             http://en.wikipedia.org/wiki/Principal_component_analysis for more details.
55              
56             =cut
57             =head1 METHODS
58              
59             =cut
60 1     1   3308 use version; our $VERSION = qv('0.0.1');
  1         2927  
  1         10  
61              
62              
63             #y////////////////////////////////////////////// CONSTRUCTOR AND DATA LOADING /////////////////////////////////////////
64             #=fs CONSTRUCTOR AND DATA LOADING
65              
66             #sub diag {
67             # my $self = shift;
68             # print Dumper $self;
69             # return;
70             #}
71              
72             =head2 new
73              
74             Create a new Statistics::PCA object.
75              
76             my $pca = Statistics::PCA->new;
77              
78             =cut
79             sub new {
80 0     0 1   my $class = shift;
81 0           my $self = {};
82 0           bless $self, $class;
83 0           return $self;
84             }
85            
86             #/ this will work for a matrix of values where ALL data is complete - i.e.
87             =head2 load_data
88              
89             Used for loading data into object. Data is fed as a reference to a LoL within an anonymous hash using the
90             named argument 'data'. Data may be entered in one of two forms specified by the obligatory named argument 'format'.
91             Data may either be entered in standard 'table' fashion (with rows corresponding to observations and columns corresponding
92             to variables). Thus to enter the following table of data:
93              
94             Var1 Var2 Var3 Var4
95              
96             Obs1 32 26 51 12
97             Obs2 17 13 34 35
98             Obs3 10 94 83 45
99             Obs4 3 72 72 67
100             Obs5 10 63 35 34 ...
101              
102             The data is passed as an LoL with the with each nested ARRAY reference corresponding to a row of observations in the
103             data table and the 'format' argument value 'table' as follows:
104              
105             # Var1 Var2 Var3 Var4 ...
106             my $data = [
107             [qw/ 32 26 51 12 /], # Obs1
108             [qw/ 17 13 34 35 /], # Obs2
109             [qw/ 10 94 83 45 /], # Obs3
110             [qw/ 3 72 72 67 /], # Obs4
111             [qw/ 10 63 35 34 /], # Obs5 ...
112             ];
113              
114             $pca->load_data ( { format => 'table', data => $data, } );
115              
116             Alternatively you may enter the data in a variable-centric fashion where each nested ARRAY reference corresponds to a
117             single variable within the data (i.e. the transpose of the above table-fashion). To pass the above data in this fashion
118             use the 'format' argument with value 'variable' as follows:
119              
120             # Obs1 Obs2 Obs3 Obs4 Obs5 ...
121             my $transpose = [
122             [qw/ 32 17 10 3 10 /], # Var1
123             [qw/ 26 13 94 72 63 /], # Var2
124             [qw/ 51 34 83 72 35 /], # Var3
125             [qw/ 12 35 45 67 34 /], # Var4 ...
126             ];
127              
128             $pca->load_data ( { format => 'variable', data => $transpose, } ) ;
129              
130             =cut
131             sub load_data {
132             #my ( $self, $data ) = @_;
133 0     0 0   my ( $self, $h_ref ) = @_;
134 0 0 0       croak qq{\nArguments must be passed as HASH reference.} if ( ( $h_ref ) && ( ref $h_ref ne q{HASH} ) );
135            
136 0           my $data_dirty = $h_ref->{data};
137            
138             #y clean it now to stop strange internal references
139 0           my $data = _deep_copy_references($data_dirty);
140              
141 0 0         croak qq{\nYou must specify a data format} if ( !exists $h_ref->{format} );
142 0           my $format_val = $h_ref->{format};
143              
144 0     0     my %formating = ( table => sub { $self->_transpose($data); },
145             #y no point in have direct method its so short just put it here
146             #variable => sub { $self->_direct }, # just do the calculations directly and put the data in
147             variable => sub {
148 0     0     $self->{summaries}{var_num} = scalar ( @{$data} );
  0            
149 0           $self->{summaries}{var_length} = scalar ( @{$data->[0]} );
  0            
150 0           $self->{data}{transpose_temp} = $data; return; },
  0            
151 0           );
152              
153 0           my $format = $formating{$format_val};
154 0 0         croak qq{\nYou must pass a recognised option: \"table\", \"variable\"} if ( !defined $format );
155              
156             #y &{$cd}() = &$cd() = &$cd = $cd->();
157 0           $format->();
158              
159 0           $self->_data_checks;
160              
161             #y adjust flag
162 0           $self->{flags}{data_loaded} = 1;
163              
164 0           return;
165              
166             }
167              
168             sub _direct {
169 0     0     my ( $self, $a_ref ) = @_;
170              
171 0           my $var_num = scalar ( @{$a_ref} );
  0            
172            
173 0           my $var_length = scalar ( @{$a_ref->[0]} );
  0            
174              
175 0           $self->{data}{transpose_temp} = $a_ref;
176 0           $self->{summaries}{var_num} = $var_num;
177 0           $self->{summaries}{var_length} = $var_length;
178              
179 0           return;
180             }
181              
182             sub _transpose {
183            
184 0     0     my ( $self, $a_ref ) = @_;
185              
186 0           my $var_length = scalar ( @{$a_ref} );
  0            
187              
188 0           my $done = [];
189 0           for my $col ( 0..$#{$a_ref->[0]} ) {
  0            
190 0           push @{$done}, [ map { $_->[$col] } @{$a_ref} ];
  0            
  0            
  0            
191             }
192              
193 0           my $var_num = scalar ( @{$done} );
  0            
194            
195 0           $self->{data}{transpose_temp} = $done;
196 0           $self->{summaries}{var_num} = $var_num;
197 0           $self->{summaries}{var_length} = $var_length;
198            
199 0           return;
200             }
201              
202             sub _data_checks {
203              
204 0     0     my $self = shift;
205              
206 0           my $data_a_ref = $self->{data}{transpose_temp};
207              
208 0           my $rows = $self->{summaries}{var_num};
209 0 0 0       croak qq{\nI need some data - there are too few rows in your data.\n} if ( !$rows || $rows == 1 );
210              
211 0           my $cols = $self->{summaries}{var_length};
212 0 0 0       croak qq{\nI need some data - there are too few columns in your data.\n} if ( !$cols || $cols == 1 );
213              
214 0           for my $row (@{$data_a_ref}) {
  0            
215              
216 0 0         croak qq{\n\nData set must be passed as ARRAY references.\n} if ( ref $row ne q{ARRAY} );
217 0 0         croak qq{\n\nAll rows must have the same number of columns.\n} if ( scalar( @{$row} ) != $cols );
  0            
218              
219             }
220            
221             #/ all fine and dandy.
222 0           print qq{\nData has $rows variables and $cols observations. Passing data to object.};
223 0           $self->{data}{transpose} = $data_a_ref;
224 0           delete $self->{data}{transpose_temp};
225              
226 0           return;
227             }
228              
229             #=fe
230              
231              
232             #y/////////////////////////////////////////////////////// ANALYSIS ////////////////////////////////////////////////////
233             #=fs ANALYSIS
234              
235             =head2 pca
236              
237             To perform the PCA analysis. This method takes the optional named argument 'eigen' that takes the values 'M' or 'C' to
238             calculate the eigenvalue decomposition using either the Math::MatrixReal or Math::Cephes::Matrix modules respectively
239             (defaults to 'M' without argument).
240              
241             $pca->pca();
242             $pca->pca( { eigen => 'M' } );
243             $pca->pca( { eigen => 'C' } );
244              
245             =cut
246             sub pca {
247            
248 0     0 1   my ( $self, $h_ref ) = @_;
249 0 0 0       croak qq{\nArguments must be passed as HASH reference.} if ( ( $h_ref ) && ( ref $h_ref ne q{HASH} ) );
250            
251 0 0         exists $h_ref->{eigen} || print qq{\nUsing default option of Math::MatrixReal to calculate eigen values.};
252            
253 0 0         my $eigen = exists $h_ref->{eigen} ? $h_ref->{eigen} : q{M};
254              
255 0 0         croak qq{\nI don\'t recognise that value for the \'eigen\' option - requires \'M\' or \'C\' (defaults to \'M\' without option).}
256             if ( $eigen !~ /\A[MC]\z/xms );
257              
258 0           $self->_calculate_averages;
259 0           $self->_calculate_adjustment;
260 0           $self->_calculate_CVs;
261              
262 0 0         if ( $eigen eq q{M} ) { $self->_calculate_eigens_matrixreal; }
  0 0          
263             # overkill here
264 0           elsif ( $eigen eq q{C} ) { $self->_calculate_eigens_cephes; }
265              
266             #y re-orders eigenvalues and eigenvectors according to eigenvalue - thus everything from here is in correct order
267 0           $self->_rank_eigenvalues;
268              
269             #y we have ranked data - should put in new positions? so now we do the calculations
270 0           $self->_calculate_components;
271              
272             #y generates the prcomp eigenvectors calculation - returns it as an object and also stores the raw data as self->{self}{eigen}
273 0           $self->_transform;
274              
275 0           return;
276             }
277              
278             sub _calculate_averages {
279 0     0     my $self = shift;
280 0           my $new_data = $self->{data}{transpose};
281              
282 0           my $totals_ref = [];
283              
284 0           for my $row ( 0..($self->{summaries}{var_num}-1) ) {
285              
286 0           my $sum = sum @{$new_data->[$row]};
  0            
287 0           my $length = scalar ( @{$new_data->[$row]} );
  0            
288 0           my $average = $sum / $length;
289              
290 0           push @{$totals_ref}, { sum => $sum, length => $length, average => $average};
  0            
291            
292             }
293              
294 0           $self->{summaries}{totals} = $totals_ref;
295 0           return;
296             }
297              
298             sub _calculate_adjustment {
299 0     0     my $self = shift;
300            
301 0           my $trans = $self->{data}{transpose};
302              
303 0           my $totals = $self->{summaries}{totals};
304              
305 0           my $adjust = [];
306              
307 0           for my $row ( 0..($self->{summaries}{var_num}-1) ) {
308              
309 0           @{$adjust->[$row]} = map { $_ - $totals->[$row]{average} } @{$trans->[$row]};
  0            
  0            
  0            
310              
311             }
312              
313 0           $self->{data}{adjusted} = $adjust;
314             }
315              
316             sub _calculate_CVs {
317 0     0     my $self = shift;
318              
319 0           my $adjusted = $self->{data}{adjusted};
320 0           my $var_num = $self->{summaries}{var_num};
321 0           my $length = $self->{summaries}{var_length};
322 0           my $sum = 0;
323 0           my $covariance_matrix_ref = [];
324              
325 0           for my $row ( 0..($var_num-1) ) {
326              
327 0           for my $col ( 0..($var_num-1) ) {
328            
329 0           my $sum = 0;
330 0           for my $iteration (0..$#{$adjusted->[0]}) {
  0            
331              
332 0           my $val = $adjusted->[$col][$iteration] * $adjusted->[$row][$iteration];
333            
334 0           $sum += $val;
335             }
336            
337 0           my $cv = $sum / ($length-1);
338              
339 0           $covariance_matrix_ref->[$col][$row] = $cv;
340             }
341             }
342              
343 0           $self->{summaries}{covariate_matrix} = $covariance_matrix_ref;
344 0           return;
345              
346             }
347              
348             sub _calculate_eigens_matrixreal {
349 0     0     my $self = shift;
350              
351 0           my $covariance_matrix_ref = $self->{summaries}{covariate_matrix};
352              
353 0           my $covariance_matrix_perl = Math::MatrixReal->new_from_cols ( $covariance_matrix_ref ) ;
354 0           my ($eigen_val_perl, $eigen_vec_perl) = $covariance_matrix_perl->sym_diagonalize();
355 0           my $eigen_vec_perl_T = ~$eigen_vec_perl;
356 0           my $eigen_val_perl_T = ~$eigen_val_perl;
357              
358 0           my $overall_alt = [];
359 0           @{$overall_alt} = map { +{ solution => $_+1, eigenvalue => $eigen_val_perl_T->[0][0][$_], eigenvector => $eigen_vec_perl_T->[0][$_] } } (0..$#{$eigen_val_perl_T->[0][0]});
  0            
  0            
  0            
360            
361 0           $self->{summaries}{eigen}{raw} = $overall_alt;
362            
363 0           return;
364             }
365              
366             sub _calculate_eigens_cephes {
367 0     0     my $self = shift;
368            
369 0           my $covariance_matrix_ref = $self->{summaries}{covariate_matrix};
370 0           my $covariance_matrix = mat ( $covariance_matrix_ref ) ;
371              
372 0           my ($eigen_val, $eigen_vec) = $covariance_matrix->eigens();
373 0           my $eigen_vec_ref = $eigen_vec->coef;
374              
375             #print Dumper $eigen_val, $eigen_vec_ref;
376             #print Dumper $eigen_val_perl_T->[0][0], $eigen_vec_perl_T->[0];
377              
378             #y we don´t need it but we will force perl to intepret {} as anon HASH and not BATCH with '+'
379 0           my $overall = [];
380              
381             #@{$overall} = map { +{ solution => $_, eigenvalue => $eigen_val->[$_], eigenvector => $eigen_vec_ref->[$_] } } (0..$#{$eigen_val});
382 0           @{$overall} = map { +{ solution => $_+1, eigenvalue => $eigen_val->[$_], eigenvector => $eigen_vec_ref->[$_] } } (0..$#{$eigen_val});
  0            
  0            
  0            
383              
384 0           $self->{summaries}{eigen}{raw} = $overall;
385 0           return;
386             }
387              
388             sub _deep_copy_references {
389 0     0     my $ref = shift;
390 0 0         if (!ref $ref) { $ref; }
  0 0          
    0          
391             #y/ this check for a_refs in which case we will access the whole thing as @{$a_ref}
392             elsif (ref $ref eq q{ARRAY} ) {
393 0           [ map { _deep_copy_references($_) } @{$ref} ];
  0            
  0            
394             }
395             #y/ this checks for hash refs - in which case it will be handled by fully derferencing: %{$ref}
396             elsif (ref $ref eq q{HASH} ) {
397             #y intepreter forced to read this as an anon HASH and not BATCH by prepending +
398 0           + { map { $_ => _deep_copy_references($ref->{$_}) } (keys %{$ref}) };
  0            
  0            
399             }
400 0           else { die "what type is $_?" }
401             }
402              
403             sub _rank_eigenvalues {
404 0     0     my $self = shift;
405 0           my $overall = $self->{summaries}{eigen}{raw};
406              
407             #/ deep copy to stop fuss!
408 0           my $overall_clean = _deep_copy_references($overall);
409              
410 0           my $overall_sorted = [];
411             #/ Can't use "my $a" in sort comparison at The_PCA_method.pl line 255 - cos $a was declared as lexical and $a/$b are globals...
412 0           @{$overall_sorted} = sort { $b->{eigenvalue} <=> $a->{eigenvalue} } @{$overall_clean};
  0            
  0            
  0            
413              
414 0           $self->{summaries}{eigen}{sorted} = $overall_sorted;
415              
416 0           $self->_add_rank;
417 0           return;
418             }
419              
420             sub _add_rank {
421 0     0     my $self = shift;
422 0           my $overall_sorted = $self->{summaries}{eigen}{sorted};
423              
424             #@{$overall_sorted} = sort { $b->{eigenvalue} <=> $a->{eigenvalue} } map { my $temp = $_; $overall_sorted->[$temp]{rank} = $temp+1 } (0..$#{$overall_clean});
425              
426 0           for my $pos ( (0..$#{$overall_sorted}) ) {
  0            
427             #my $overall_sorted->[$pos]{rank} = $pos;
428 0           $overall_sorted->[$pos]{PC} = $pos+1;
429             }
430 0           return;
431             }
432              
433             sub _calculate_components {
434 0     0     my $self = shift;
435 0           my $sorted_eigen = $self->{summaries}{eigen}{sorted};
436              
437             # we will calculate stdev - this is EITHER stdev of the transformed data OR more generally the stdev of the eigenvalue of the solution.
438              
439 0           my $total_variance = sum map { $_->{eigenvalue} } @{$sorted_eigen};
  0            
  0            
440              
441 0           my $cumulative_variance = 0;
442              
443 0           for my $hash_ref (@{$sorted_eigen}) {
  0            
444            
445             # use this twice to unpack it
446 0           my $variance_aka_eigenvalue = $hash_ref->{eigenvalue};
447            
448             #my $stdev = sqrt($variance_aka_eigenvalue);
449 0           $hash_ref->{stdev} = sqrt($variance_aka_eigenvalue);
450            
451             # use this twice so put it in variable
452             #$hash_ref->{proportion_of_variance} = ( $variance_aka_eigenvalue / $total_variance );
453 0           my $proportion_of_variance = ($variance_aka_eigenvalue / $total_variance);
454 0           $hash_ref->{proportion_of_variance} = $proportion_of_variance;
455              
456 0           $cumulative_variance += $proportion_of_variance;
457 0           $hash_ref->{cumulative_variance} = $cumulative_variance;
458             }
459            
460 0           $self->{summaries}{total_variance} = $total_variance;
461            
462 0           return;
463             }
464              
465             sub _create_row_matrix_of_eigenvectors {
466              
467 0     0     my $self = shift;
468            
469 0           my $sorted_eigen = $self->{summaries}{eigen}{sorted};
470 0           my $eigen_vectors = [];
471            
472 0           @{$eigen_vectors} = map { $_->{eigenvector} } @{$sorted_eigen};
  0            
  0            
  0            
473            
474             #y we turn it into a matrix object - should use _from_rows?
475 0           my $eigen_matrix_object = Math::MatrixReal->new_from_cols( $eigen_vectors );
476            
477             #y we take the transpose which will be multiplied by the row_adjusted_matrix_object - i.e. transpose
478 0           my $row_eigen_matrix_object = ~$eigen_matrix_object;
479            
480 0           my $eigen_vectors_copy = _deep_copy_references ($eigen_vectors);
481 0           $self->{pca}{eigenvectors} = $eigen_vectors_copy;
482              
483 0           return $row_eigen_matrix_object;
484             }
485              
486             sub _create_row_matrix_of_adjusted {
487 0     0     my $self = shift;
488              
489             # unpack adjusted data
490 0           my $adjusted = $self->{data}{adjusted};
491              
492             #my $adjusted_data_m = Math::MatrixReal->new_from_cols( $pca->{data}{adjusted} );
493 0           my $adjusted_data_matrix_object = Math::MatrixReal->new_from_cols( $adjusted );
494              
495             #y take transpose
496 0           my $row_adjusted_data_matrix = ~$adjusted_data_matrix_object;
497              
498 0           return $row_adjusted_data_matrix;
499              
500             }
501              
502             sub _transform {
503              
504 0     0     my $self = shift;
505 0           my $row_mat_Eigen_object = $self->_create_row_matrix_of_eigenvectors;
506 0           my $row_mat_Adjust_object = $self->_create_row_matrix_of_adjusted;
507              
508             #y this is the actual pca output - but needs to be transposed
509 0           my $product_matrix = $row_mat_Eigen_object->multiply($row_mat_Adjust_object);
510            
511             #y/ code from MatrixReal: map { $this->[0][$_] = [ @$empty ] } ( 0 .. $rows-1);
512             #y/ i.e. all matrix data is put into ->[0]
513 0           $self->{pca}{transform} = $product_matrix->[0];
514              
515 0           return;
516              
517             }
518              
519             #=fe
520              
521              
522             #y/////////////////////////////////////////////////////// RESULTS /////////////////////////////////////////////////////
523             #=fs RESULTS
524              
525             =head2 results
526              
527             Used to access the results of the PCA analysis. This method is context-dependent and will return a variety of different
528             values depending on whether it is called in VOID or LIST context and the arguments its passed.
529             In VOID-context it prints a formated table of the computed results to STDOUT.
530              
531             $pca->results;
532              
533             In LIST context this method takes an obligatory argument that determines its return values. To return an ordered list
534             (ordered by PC ranking) of the proportions of total variance of each PC pass 'proportion' to the method.
535              
536             my @list = $pca->results('proportion');
537             print qq{\nOrdered list of individual proportions of variance: @list};
538              
539             To return an ordered list of the cumulative variance of the PCs pass argument 'cumulative'.
540              
541             @list = $pca->results('cumulative');
542             print qq{\nOrdered list of cumulative variance of the PCs: @list};
543              
544             To return an ordered list of the individual standard deviations of the PCs pass argument 'stdev'.
545              
546             @list = $pca->results('stdev');
547             print qq{\nOrdered list of individual standard deviations of the PCs: @list};
548              
549             To return an ordered list of the individual eigenvalues of the PCs pass argument 'eigenvalue'.
550              
551             @list = $pca->results('eigenvalue');
552             print qq{\nOrdered list of individual eigenvalues of the PCs: @list};
553              
554             To return an ordered list of ARRAY references containing the eigenvectors of the PCs pass argument 'eigenvector'.
555              
556             # Returns an ordered list of array references containing the eigenvectors for the components
557             @list = $pca->results('eigenvector');
558             use Data::Dumper;
559             print Dumper \@list;
560              
561             To return an ordered list of ARRAY references containing more detailed information about each PC use the 'full'
562             argument. Each nested ARRAY reference consists of an ordered list of: PC rank, PC stdev, PC proportion of variance,
563             PC cumulative_variance, PC eigenvalue and a further nested ARRAY reference containing the PC eigenvector.
564            
565             @list = $pca->results('full');
566             for my $i (@list) {
567             print qq{\nPC rank: $i->[0]}
568             . qq{\nPC stdev $i->[1]}
569             . qq{\nPC proportion of variance $i->[2]}
570             . qq{\nPC cumulative variance $i->[3]}
571             . qq{\nPC eigenvalue $i->[4]}
572             }
573              
574             To return an ordered LoL of the transformed data for each of the PCs pass 'transformed' to the method.
575              
576             @list = $pca->results('transformed');
577             print qq{\nThe transformed data for 'the' principal component (first PC): @{$list[0]} };
578              
579             =cut
580             sub results {
581 0     0 1   my ( $self, $arg ) = @_;
582             return (
583 0     0     VOID { $self->_print }
584             # either have specific methods for type of return! or simply have this and an arguement
585 0     0     LIST { $self->_results_in_list($arg) }
586             #y total variance is popintless without individual and if you´ve got individual total is trivial so leave these returns are they are.
587             # nao faz sentido - nao eh uma teste
588             #BOOL { $F > $standard_F ? 1 : undef; }
589 0           );
590             }
591              
592             sub _print {
593              
594 0     0     my $self = shift;
595 0           print qq{\n=======================\nRESULTS OF PCA ANALYSIS\n=======================\n};
596              
597 0           $self->print_total_variance;
598 0           $self->print_variance;
599 0           $self->print_eigenvectors;
600 0           $self->print_transform;
601            
602 0           return;
603             }
604              
605             #/ should really get the vectors directly from summaries and not re-enter them in the object!!! - especially now that you can just use rank for PC number
606             sub print_eigenvectors {
607 0     0 0   my $self = shift;
608 0           $self->_print_private(q{eigenvectors});
609             }
610              
611             sub print_transform {
612 0     0 0   my $self = shift;
613 0           $self->_print_private(q{transform});
614             }
615              
616             sub print_total_variance {
617 0     0 0   my $self = shift;
618 0           print qq{\nTotal Variance = }, sprintf (q{%.8f}, $self->{summaries}{total_variance}), qq{\n};
619 0           return;
620             }
621              
622             sub print_variance {
623 0     0 0   my $self = shift;
624            
625 0           my $sorted_eigen = $self->{summaries}{eigen}{sorted};
626            
627 0           print qq{\nTable of Standard Deviations and Variances:\n};
628              
629             # just create a first columnm that´s empty for names of rows
630 0           my @config_full = ( [22, q{}] );
631              
632             # column calculations...
633             #/ really ought to get PC name from rank attribute
634             #my @config = map { [ 12, q{PC_}.$_ ] } ( 1..(scalar (@{$sorted_eigen})) );
635             #my @config = map { [ 12, q{PC_}.$_->{rank} ] } ( 0..(scalar ($#{$sorted_eigen})) );
636              
637             #r/ PC is rank - i.e. it is the 'principal' compoenent
638 0           my @config = map { [ 12, q{PC_}.$_->{PC} ] } (@{$sorted_eigen});
  0            
  0            
639              
640             # make the actual configuring array
641 0           push @config_full, @config;
642            
643 0           my $table = Text::SimpleTable->new(@config_full);
644              
645 0           my @row1 = (); # no need to initialise, but whatever...
646 0           for my $hash_ref (@{$sorted_eigen}) { push @row1, sprintf (q{%.8f}, $hash_ref->{stdev}); }
  0            
  0            
647 0           $table->row( q{Standard Deviation}, @row1 );
648 0           $table->hr;
649              
650 0           my @row2 = (); # no need to initialise, but whatever...
651 0           for my $hash_ref (@{$sorted_eigen}) { push @row2, sprintf (q{%.8f}, $hash_ref->{proportion_of_variance}); }
  0            
  0            
652 0           $table->row( q{Proportion of Variance}, @row2 );
653 0           $table->hr;
654              
655 0           my @row3 = (); # no need to initialise, but whatever...
656 0           for my $hash_ref (@{$sorted_eigen}) { push @row3, sprintf (q{%.8f}, $hash_ref->{cumulative_variance}); }
  0            
  0            
657 0           $table->row( q{Cumulative Variance}, @row3 );
658            
659 0           print $table->draw;
660 0           return;
661             }
662              
663             sub _print_private {
664 0     0     my ( $self, $arg ) = @_;
665              
666             #/ twat used numeric ==
667 0 0         $arg eq q{eigenvectors} and print qq{\nTable of vectors:\n};
668 0 0         $arg eq q{transform} and print qq{\nTable of Transformed data:\n};
669              
670 0           my $blah = $self->{pca}{$arg};
671              
672 0           my @config_full = ( [5, q{}] );
673             #y column calculations...
674 0           my @config = map { [ 12, q{PC_}.$_ ] } ( 1..(scalar (@{$blah})) );
  0            
  0            
675 0           push @config_full, @config;
676            
677 0           my $t2 = Text::SimpleTable->new(@config_full);
678              
679             #y all have same component number so who gives a shit
680 0           for my $row (0..$#{$blah->[0]}) {
  0            
681 0           my @data;
682 0           for my $col (0..$#{$blah}) {
  0            
683 0           push @data, sprintf (q{%.8f}, $blah->[$col][$row] );
684             }
685 0           $t2->row( $row+1, @data );
686             }
687 0           print $t2->draw;
688 0           return;
689             }
690              
691             sub _results_in_list {
692 0     0     my ( $self, $arg ) = @_;
693              
694             #/ twat need to make ALL of sorted an array and dereference each for key using map
695             #y really need to re-write the first ref of these due to code re-usage
696 0     0     my %options = ( cumulative => sub { ( map { $_->{cumulative_variance} } @{$self->{summaries}{eigen}{sorted}} ) },
  0            
  0            
697 0     0     proportion => sub { ( map { $_->{proportion_of_variance} } @{$self->{summaries}{eigen}{sorted}} ) },
  0            
  0            
698 0     0     stdev => sub { ( map { $_->{stdev} } @{$self->{summaries}{eigen}{sorted}} ) },
  0            
  0            
699 0     0     eigenvalue => sub { ( map { $_->{eigenvalue} } @{$self->{summaries}{eigen}{sorted}} ) },
  0            
  0            
700             #y these are already in the form of array refs
701             #eigenvector => sub { ( map { $_->{eigenvector} } @{$self->{summaries}{eigen}{sorted}} ) },
702 0     0     eigenvector => sub { ( @{$self->{pca}{eigenvectors}} ) },
  0            
703             #y put it into ordered list
704             # to convert this one we need to convert the whole thing to a numeric iterator and dereference each accordingly - not worth it
705 0           full => sub { ( map { [ $_->{PC}, $_->{stdev}, $_->{proportion_of_variance},
  0            
706             $_->{cumulative_variance}, $_->{eigenvalue}, $_->{eigenvector}, ]
707 0     0     } @{$self->{summaries}{eigen}{sorted}} ) },
708 0     0     transformed => sub { ( @{$self->{pca}{transform}} ) },
  0            
709            
710 0           );
711              
712             #/ either use exists on the key value - OR - assign it to a variable and check the variable for defindness... e.g. my $setting =....
713             #croak qq{\nYou must pass a recognised option: \"cumulative\", \"proportion\"} if ( !exists
714 0           my $setting = $options{$arg};
715 0 0         croak qq{\nYou must pass a recognised option: \"cumulative\", \"proportion\", \"stdev\", \"eigenvalue\"...} if ( !defined $setting );
716              
717             #y &{$cd}();
718             #y &$cd();
719             #y &$cd;
720             #y $cd->();
721              
722 0           $setting->();
723             # return;
724             }
725              
726             #=fe
727              
728              
729             1; # Magic true value required at end of module
730              
731             __END__