File Coverage

blib/lib/Statistics/PCA.pm

Criterion	Covered	Total	%
statement	30	329	9.1
branch	0	38	0.0
condition	0	12	0.0
subroutine	10	47	21.2
pod	3	8	37.5
total	43	434	9.9

line	stmt	bran	cond	sub	pod	time	code
1							#!/usr/bin/perl -w
2							package Statistics::PCA;
3							#package Statistics::PCA;
4	1			1		63302	use strict;
	1					2
	1					40
5	1			1		5	use warnings;
	1					2
	1					105
6	1			1		5	use Carp;
	1					7
	1					96
7	1			1		1471	use Math::Cephes::Matrix qw(mat);
	1					13023
	1					84
8	1			1		11	use List::Util qw(sum);
	1					2
	1					129
9	1			1		2044	use Math::MatrixReal;
	1					56907
	1					70
10	1			1		1353	use Text::SimpleTable;
	1					3348
	1					39
11	1			1		8	use Math::Cephes qw(:utils);
	1					2
	1					462
12	1			1		1682	use Contextual::Return;
	1					24514
	1					9
13							=head1 NAME
14
15							Statistics::PCA - A simple Perl implementation of Principal Component Analysis.
16
17							=cut
18							=head1 VERSION
19
20							This document describes Statistics::PCA version 0.0.1
21
22							=cut
23							=head1 SYNOPSIS
24
25							use Statistics::PCA;
26
27							# Create new Statistics::PCA object.
28							my $pca = Statistics::PCA->new;
29
30							# Var1 Var2 Var3 Var4...
31							my @Obs1 = (qw/ 32 26 51 12 /);
32							my @Obs2 = (qw/ 17 13 34 35 /);
33							my @Obs3 = (qw/ 10 94 83 45 /);
34							my @Obs4 = (qw/ 3 72 72 67 /);
35							my @Obs5 = (qw/ 10 63 35 34 /);
36
37							# Load data. Data is loaded as a LIST-of-LISTS (LoL) pointed to by a named argument 'data'. Requires argument for format (see METHODS).
38							$pca->load_data ( { format => 'table', data => [ \@Obs1, \@Obs2, \@Obs3, \@Obs4, \@Obs5 ], } ) ;
39
40							# Perform the PCA analysis. Takes optional argument 'eigen' (see METHODS).
41							#$pca->pca( { eigen => 'C' } );
42							$pca->pca();
43
44							# Access results. The return value of this method is context-dependent (see METHODS). To print a report to STDOUT call in VOID-context.
45							$pca->results();
46
47							=cut
48							=head1 DESCRIPTION
49
50							Principal component analysis (PCA) transforms higher-dimensional data consisting of a number of possibly correlated variables into a smaller number of
51							uncorrelated variables termed principal components (PCs). The higher the ranking of the PCs the greater the amount of
52							variability that the PC accounts for. This PCA procedure involves the calculation of the eigenvalue decomposition using either the Math::Cephes::Matrix or
53							Math::MatrixReal modules (see METHODS) from a data covariance matrix after mean centering the data. See
54							http://en.wikipedia.org/wiki/Principal_component_analysis for more details.
55
56							=cut
57							=head1 METHODS
58
59							=cut
60	1			1		3308	use version; our $VERSION = qv('0.0.1');
	1					2927
	1					10
61
62
63							#y////////////////////////////////////////////// CONSTRUCTOR AND DATA LOADING /////////////////////////////////////////
64							#=fs CONSTRUCTOR AND DATA LOADING
65
66							#sub diag {
67							# my $self = shift;
68							# print Dumper $self;
69							# return;
70							#}
71
72							=head2 new
73
74							Create a new Statistics::PCA object.
75
76							my $pca = Statistics::PCA->new;
77
78							=cut
79							sub new {
80	0			0	1		my $class = shift;
81	0						my $self = {};
82	0						bless $self, $class;
83	0						return $self;
84							}
85
86							#/ this will work for a matrix of values where ALL data is complete - i.e.
87							=head2 load_data
88
89							Used for loading data into object. Data is fed as a reference to a LoL within an anonymous hash using the
90							named argument 'data'. Data may be entered in one of two forms specified by the obligatory named argument 'format'.
91							Data may either be entered in standard 'table' fashion (with rows corresponding to observations and columns corresponding
92							to variables). Thus to enter the following table of data:
93
94							Var1 Var2 Var3 Var4
95
96							Obs1 32 26 51 12
97							Obs2 17 13 34 35
98							Obs3 10 94 83 45
99							Obs4 3 72 72 67
100							Obs5 10 63 35 34 ...
101
102							The data is passed as an LoL with the with each nested ARRAY reference corresponding to a row of observations in the
103							data table and the 'format' argument value 'table' as follows:
104
105							# Var1 Var2 Var3 Var4 ...
106							my $data = [
107							[qw/ 32 26 51 12 /], # Obs1
108							[qw/ 17 13 34 35 /], # Obs2
109							[qw/ 10 94 83 45 /], # Obs3
110							[qw/ 3 72 72 67 /], # Obs4
111							[qw/ 10 63 35 34 /], # Obs5 ...
112							];
113
114							$pca->load_data ( { format => 'table', data => $data, } );
115
116							Alternatively you may enter the data in a variable-centric fashion where each nested ARRAY reference corresponds to a
117							single variable within the data (i.e. the transpose of the above table-fashion). To pass the above data in this fashion
118							use the 'format' argument with value 'variable' as follows:
119
120							# Obs1 Obs2 Obs3 Obs4 Obs5 ...
121							my $transpose = [
122							[qw/ 32 17 10 3 10 /], # Var1
123							[qw/ 26 13 94 72 63 /], # Var2
124							[qw/ 51 34 83 72 35 /], # Var3
125							[qw/ 12 35 45 67 34 /], # Var4 ...
126							];
127
128							$pca->load_data ( { format => 'variable', data => $transpose, } ) ;
129
130							=cut
131							sub load_data {
132							#my ( $self, $data ) = @_;
133	0			0	0		my ( $self, $h_ref ) = @_;
134	0	0	0				croak qq{\nArguments must be passed as HASH reference.} if ( ( $h_ref ) && ( ref $h_ref ne q{HASH} ) );
135
136	0						my $data_dirty = $h_ref->{data};
137
138							#y clean it now to stop strange internal references
139	0						my $data = _deep_copy_references($data_dirty);
140
141	0	0					croak qq{\nYou must specify a data format} if ( !exists $h_ref->{format} );
142	0						my $format_val = $h_ref->{format};
143
144	0			0			my %formating = ( table => sub { $self->_transpose($data); },
145							#y no point in have direct method its so short just put it here
146							#variable => sub { $self->_direct }, # just do the calculations directly and put the data in
147							variable => sub {
148	0			0			$self->{summaries}{var_num} = scalar ( @{$data} );
	0
149	0						$self->{summaries}{var_length} = scalar ( @{$data->[0]} );
	0
150	0						$self->{data}{transpose_temp} = $data; return; },
	0
151	0						);
152
153	0						my $format = $formating{$format_val};
154	0	0					croak qq{\nYou must pass a recognised option: \"table\", \"variable\"} if ( !defined $format );
155
156							#y &{$cd}() = &$cd() = &$cd = $cd->();
157	0						$format->();
158
159	0						$self->_data_checks;
160
161							#y adjust flag
162	0						$self->{flags}{data_loaded} = 1;
163
164	0						return;
165
166							}
167
168							sub _direct {
169	0			0			my ( $self, $a_ref ) = @_;
170
171	0						my $var_num = scalar ( @{$a_ref} );
	0
172
173	0						my $var_length = scalar ( @{$a_ref->[0]} );
	0
174
175	0						$self->{data}{transpose_temp} = $a_ref;
176	0						$self->{summaries}{var_num} = $var_num;
177	0						$self->{summaries}{var_length} = $var_length;
178
179	0						return;
180							}
181
182							sub _transpose {
183
184	0			0			my ( $self, $a_ref ) = @_;
185
186	0						my $var_length = scalar ( @{$a_ref} );
	0
187
188	0						my $done = [];
189	0						for my $col ( 0..$#{$a_ref->[0]} ) {
	0
190	0						push @{$done}, [ map { $_->[$col] } @{$a_ref} ];
	0
	0
	0
191							}
192
193	0						my $var_num = scalar ( @{$done} );
	0
194
195	0						$self->{data}{transpose_temp} = $done;
196	0						$self->{summaries}{var_num} = $var_num;
197	0						$self->{summaries}{var_length} = $var_length;
198
199	0						return;
200							}
201
202							sub _data_checks {
203
204	0			0			my $self = shift;
205
206	0						my $data_a_ref = $self->{data}{transpose_temp};
207
208	0						my $rows = $self->{summaries}{var_num};
209	0	0	0				croak qq{\nI need some data - there are too few rows in your data.\n} if ( !$rows \|\| $rows == 1 );
210
211	0						my $cols = $self->{summaries}{var_length};
212	0	0	0				croak qq{\nI need some data - there are too few columns in your data.\n} if ( !$cols \|\| $cols == 1 );
213
214	0						for my $row (@{$data_a_ref}) {
	0
215
216	0	0					croak qq{\n\nData set must be passed as ARRAY references.\n} if ( ref $row ne q{ARRAY} );
217	0	0					croak qq{\n\nAll rows must have the same number of columns.\n} if ( scalar( @{$row} ) != $cols );
	0
218
219							}
220
221							#/ all fine and dandy.
222	0						print qq{\nData has $rows variables and $cols observations. Passing data to object.};
223	0						$self->{data}{transpose} = $data_a_ref;
224	0						delete $self->{data}{transpose_temp};
225
226	0						return;
227							}
228
229							#=fe
230
231
232							#y/////////////////////////////////////////////////////// ANALYSIS ////////////////////////////////////////////////////
233							#=fs ANALYSIS
234
235							=head2 pca
236
237							To perform the PCA analysis. This method takes the optional named argument 'eigen' that takes the values 'M' or 'C' to
238							calculate the eigenvalue decomposition using either the Math::MatrixReal or Math::Cephes::Matrix modules respectively
239							(defaults to 'M' without argument).
240
241							$pca->pca();
242							$pca->pca( { eigen => 'M' } );
243							$pca->pca( { eigen => 'C' } );
244
245							=cut
246							sub pca {
247
248	0			0	1		my ( $self, $h_ref ) = @_;
249	0	0	0				croak qq{\nArguments must be passed as HASH reference.} if ( ( $h_ref ) && ( ref $h_ref ne q{HASH} ) );
250
251	0	0					exists $h_ref->{eigen} \|\| print qq{\nUsing default option of Math::MatrixReal to calculate eigen values.};
252
253	0	0					my $eigen = exists $h_ref->{eigen} ? $h_ref->{eigen} : q{M};
254
255	0	0					croak qq{\nI don\'t recognise that value for the \'eigen\' option - requires \'M\' or \'C\' (defaults to \'M\' without option).}
256							if ( $eigen !~ /\A[MC]\z/xms );
257
258	0						$self->_calculate_averages;
259	0						$self->_calculate_adjustment;
260	0						$self->_calculate_CVs;
261
262	0	0					if ( $eigen eq q{M} ) { $self->_calculate_eigens_matrixreal; }
	0	0
263							# overkill here
264	0						elsif ( $eigen eq q{C} ) { $self->_calculate_eigens_cephes; }
265
266							#y re-orders eigenvalues and eigenvectors according to eigenvalue - thus everything from here is in correct order
267	0						$self->_rank_eigenvalues;
268
269							#y we have ranked data - should put in new positions? so now we do the calculations
270	0						$self->_calculate_components;
271
272							#y generates the prcomp eigenvectors calculation - returns it as an object and also stores the raw data as self->{self}{eigen}
273	0						$self->_transform;
274
275	0						return;
276							}
277
278							sub _calculate_averages {
279	0			0			my $self = shift;
280	0						my $new_data = $self->{data}{transpose};
281
282	0						my $totals_ref = [];
283
284	0						for my $row ( 0..($self->{summaries}{var_num}-1) ) {
285
286	0						my $sum = sum @{$new_data->[$row]};
	0
287	0						my $length = scalar ( @{$new_data->[$row]} );
	0
288	0						my $average = $sum / $length;
289
290	0						push @{$totals_ref}, { sum => $sum, length => $length, average => $average};
	0
291
292							}
293
294	0						$self->{summaries}{totals} = $totals_ref;
295	0						return;
296							}
297
298							sub _calculate_adjustment {
299	0			0			my $self = shift;
300
301	0						my $trans = $self->{data}{transpose};
302
303	0						my $totals = $self->{summaries}{totals};
304
305	0						my $adjust = [];
306
307	0						for my $row ( 0..($self->{summaries}{var_num}-1) ) {
308
309	0						@{$adjust->[$row]} = map { $_ - $totals->[$row]{average} } @{$trans->[$row]};
	0
	0
	0
310
311							}
312
313	0						$self->{data}{adjusted} = $adjust;
314							}
315
316							sub _calculate_CVs {
317	0			0			my $self = shift;
318
319	0						my $adjusted = $self->{data}{adjusted};
320	0						my $var_num = $self->{summaries}{var_num};
321	0						my $length = $self->{summaries}{var_length};
322	0						my $sum = 0;
323	0						my $covariance_matrix_ref = [];
324
325	0						for my $row ( 0..($var_num-1) ) {
326
327	0						for my $col ( 0..($var_num-1) ) {
328
329	0						my $sum = 0;
330	0						for my $iteration (0..$#{$adjusted->[0]}) {
	0
331
332	0						my $val = $adjusted->[$col][$iteration] * $adjusted->[$row][$iteration];
333
334	0						$sum += $val;
335							}
336
337	0						my $cv = $sum / ($length-1);
338
339	0						$covariance_matrix_ref->[$col][$row] = $cv;
340							}
341							}
342
343	0						$self->{summaries}{covariate_matrix} = $covariance_matrix_ref;
344	0						return;
345
346							}
347
348							sub _calculate_eigens_matrixreal {
349	0			0			my $self = shift;
350
351	0						my $covariance_matrix_ref = $self->{summaries}{covariate_matrix};
352
353	0						my $covariance_matrix_perl = Math::MatrixReal->new_from_cols ( $covariance_matrix_ref ) ;
354	0						my ($eigen_val_perl, $eigen_vec_perl) = $covariance_matrix_perl->sym_diagonalize();
355	0						my $eigen_vec_perl_T = ~$eigen_vec_perl;
356	0						my $eigen_val_perl_T = ~$eigen_val_perl;
357
358	0						my $overall_alt = [];
359	0						@{$overall_alt} = map { +{ solution => $_+1, eigenvalue => $eigen_val_perl_T->[0][0][$_], eigenvector => $eigen_vec_perl_T->[0][$_] } } (0..$#{$eigen_val_perl_T->[0][0]});
	0
	0
	0
360
361	0						$self->{summaries}{eigen}{raw} = $overall_alt;
362
363	0						return;
364							}
365
366							sub _calculate_eigens_cephes {
367	0			0			my $self = shift;
368
369	0						my $covariance_matrix_ref = $self->{summaries}{covariate_matrix};
370	0						my $covariance_matrix = mat ( $covariance_matrix_ref ) ;
371
372	0						my ($eigen_val, $eigen_vec) = $covariance_matrix->eigens();
373	0						my $eigen_vec_ref = $eigen_vec->coef;
374
375							#print Dumper $eigen_val, $eigen_vec_ref;
376							#print Dumper $eigen_val_perl_T->[0][0], $eigen_vec_perl_T->[0];
377
378							#y we don´t need it but we will force perl to intepret {} as anon HASH and not BATCH with '+'
379	0						my $overall = [];
380
381							#@{$overall} = map { +{ solution => $_, eigenvalue => $eigen_val->[$_], eigenvector => $eigen_vec_ref->[$_] } } (0..$#{$eigen_val});
382	0						@{$overall} = map { +{ solution => $_+1, eigenvalue => $eigen_val->[$_], eigenvector => $eigen_vec_ref->[$_] } } (0..$#{$eigen_val});
	0
	0
	0
383
384	0						$self->{summaries}{eigen}{raw} = $overall;
385	0						return;
386							}
387
388							sub _deep_copy_references {
389	0			0			my $ref = shift;
390	0	0					if (!ref $ref) { $ref; }
	0	0
		0
391							#y/ this check for a_refs in which case we will access the whole thing as @{$a_ref}
392							elsif (ref $ref eq q{ARRAY} ) {
393	0						[ map { _deep_copy_references($_) } @{$ref} ];
	0
	0
394							}
395							#y/ this checks for hash refs - in which case it will be handled by fully derferencing: %{$ref}
396							elsif (ref $ref eq q{HASH} ) {
397							#y intepreter forced to read this as an anon HASH and not BATCH by prepending +
398	0						+ { map { $_ => _deep_copy_references($ref->{$_}) } (keys %{$ref}) };
	0
	0
399							}
400	0						else { die "what type is $_?" }
401							}
402
403							sub _rank_eigenvalues {
404	0			0			my $self = shift;
405	0						my $overall = $self->{summaries}{eigen}{raw};
406
407							#/ deep copy to stop fuss!
408	0						my $overall_clean = _deep_copy_references($overall);
409
410	0						my $overall_sorted = [];
411							#/ Can't use "my $a" in sort comparison at The_PCA_method.pl line 255 - cos $a was declared as lexical and $a/$b are globals...
412	0						@{$overall_sorted} = sort { $b->{eigenvalue} <=> $a->{eigenvalue} } @{$overall_clean};
	0
	0
	0
413
414	0						$self->{summaries}{eigen}{sorted} = $overall_sorted;
415
416	0						$self->_add_rank;
417	0						return;
418							}
419
420							sub _add_rank {
421	0			0			my $self = shift;
422	0						my $overall_sorted = $self->{summaries}{eigen}{sorted};
423
424							#@{$overall_sorted} = sort { $b->{eigenvalue} <=> $a->{eigenvalue} } map { my $temp = $_; $overall_sorted->[$temp]{rank} = $temp+1 } (0..$#{$overall_clean});
425
426	0						for my $pos ( (0..$#{$overall_sorted}) ) {
	0
427							#my $overall_sorted->[$pos]{rank} = $pos;
428	0						$overall_sorted->[$pos]{PC} = $pos+1;
429							}
430	0						return;
431							}
432
433							sub _calculate_components {
434	0			0			my $self = shift;
435	0						my $sorted_eigen = $self->{summaries}{eigen}{sorted};
436
437							# we will calculate stdev - this is EITHER stdev of the transformed data OR more generally the stdev of the eigenvalue of the solution.
438
439	0						my $total_variance = sum map { $_->{eigenvalue} } @{$sorted_eigen};
	0
	0
440
441	0						my $cumulative_variance = 0;
442
443	0						for my $hash_ref (@{$sorted_eigen}) {
	0
444
445							# use this twice to unpack it
446	0						my $variance_aka_eigenvalue = $hash_ref->{eigenvalue};
447
448							#my $stdev = sqrt($variance_aka_eigenvalue);
449	0						$hash_ref->{stdev} = sqrt($variance_aka_eigenvalue);
450
451							# use this twice so put it in variable
452							#$hash_ref->{proportion_of_variance} = ( $variance_aka_eigenvalue / $total_variance );
453	0						my $proportion_of_variance = ($variance_aka_eigenvalue / $total_variance);
454	0						$hash_ref->{proportion_of_variance} = $proportion_of_variance;
455
456	0						$cumulative_variance += $proportion_of_variance;
457	0						$hash_ref->{cumulative_variance} = $cumulative_variance;
458							}
459
460	0						$self->{summaries}{total_variance} = $total_variance;
461
462	0						return;
463							}
464
465							sub _create_row_matrix_of_eigenvectors {
466
467	0			0			my $self = shift;
468
469	0						my $sorted_eigen = $self->{summaries}{eigen}{sorted};
470	0						my $eigen_vectors = [];
471
472	0						@{$eigen_vectors} = map { $_->{eigenvector} } @{$sorted_eigen};
	0
	0
	0
473
474							#y we turn it into a matrix object - should use _from_rows?
475	0						my $eigen_matrix_object = Math::MatrixReal->new_from_cols( $eigen_vectors );
476
477							#y we take the transpose which will be multiplied by the row_adjusted_matrix_object - i.e. transpose
478	0						my $row_eigen_matrix_object = ~$eigen_matrix_object;
479
480	0						my $eigen_vectors_copy = _deep_copy_references ($eigen_vectors);
481	0						$self->{pca}{eigenvectors} = $eigen_vectors_copy;
482
483	0						return $row_eigen_matrix_object;
484							}
485
486							sub _create_row_matrix_of_adjusted {
487	0			0			my $self = shift;
488
489							# unpack adjusted data
490	0						my $adjusted = $self->{data}{adjusted};
491
492							#my $adjusted_data_m = Math::MatrixReal->new_from_cols( $pca->{data}{adjusted} );
493	0						my $adjusted_data_matrix_object = Math::MatrixReal->new_from_cols( $adjusted );
494
495							#y take transpose
496	0						my $row_adjusted_data_matrix = ~$adjusted_data_matrix_object;
497
498	0						return $row_adjusted_data_matrix;
499
500							}
501
502							sub _transform {
503
504	0			0			my $self = shift;
505	0						my $row_mat_Eigen_object = $self->_create_row_matrix_of_eigenvectors;
506	0						my $row_mat_Adjust_object = $self->_create_row_matrix_of_adjusted;
507
508							#y this is the actual pca output - but needs to be transposed
509	0						my $product_matrix = $row_mat_Eigen_object->multiply($row_mat_Adjust_object);
510
511							#y/ code from MatrixReal: map { $this->[0][$_] = [ @$empty ] } ( 0 .. $rows-1);
512							#y/ i.e. all matrix data is put into ->[0]
513	0						$self->{pca}{transform} = $product_matrix->[0];
514
515	0						return;
516
517							}
518
519							#=fe
520
521
522							#y/////////////////////////////////////////////////////// RESULTS /////////////////////////////////////////////////////
523							#=fs RESULTS
524
525							=head2 results
526
527							Used to access the results of the PCA analysis. This method is context-dependent and will return a variety of different
528							values depending on whether it is called in VOID or LIST context and the arguments its passed.
529							In VOID-context it prints a formated table of the computed results to STDOUT.
530
531							$pca->results;
532
533							In LIST context this method takes an obligatory argument that determines its return values. To return an ordered list
534							(ordered by PC ranking) of the proportions of total variance of each PC pass 'proportion' to the method.
535
536							my @list = $pca->results('proportion');
537							print qq{\nOrdered list of individual proportions of variance: @list};
538
539							To return an ordered list of the cumulative variance of the PCs pass argument 'cumulative'.
540
541							@list = $pca->results('cumulative');
542							print qq{\nOrdered list of cumulative variance of the PCs: @list};
543
544							To return an ordered list of the individual standard deviations of the PCs pass argument 'stdev'.
545
546							@list = $pca->results('stdev');
547							print qq{\nOrdered list of individual standard deviations of the PCs: @list};
548
549							To return an ordered list of the individual eigenvalues of the PCs pass argument 'eigenvalue'.
550
551							@list = $pca->results('eigenvalue');
552							print qq{\nOrdered list of individual eigenvalues of the PCs: @list};
553
554							To return an ordered list of ARRAY references containing the eigenvectors of the PCs pass argument 'eigenvector'.
555
556							# Returns an ordered list of array references containing the eigenvectors for the components
557							@list = $pca->results('eigenvector');
558							use Data::Dumper;
559							print Dumper \@list;
560
561							To return an ordered list of ARRAY references containing more detailed information about each PC use the 'full'
562							argument. Each nested ARRAY reference consists of an ordered list of: PC rank, PC stdev, PC proportion of variance,
563							PC cumulative_variance, PC eigenvalue and a further nested ARRAY reference containing the PC eigenvector.
564
565							@list = $pca->results('full');
566							for my $i (@list) {
567							print qq{\nPC rank: $i->[0]}
568							. qq{\nPC stdev $i->[1]}
569							. qq{\nPC proportion of variance $i->[2]}
570							. qq{\nPC cumulative variance $i->[3]}
571							. qq{\nPC eigenvalue $i->[4]}
572							}
573
574							To return an ordered LoL of the transformed data for each of the PCs pass 'transformed' to the method.
575
576							@list = $pca->results('transformed');
577							print qq{\nThe transformed data for 'the' principal component (first PC): @{$list[0]} };
578
579							=cut
580							sub results {
581	0			0	1		my ( $self, $arg ) = @_;
582							return (
583	0			0			VOID { $self->_print }
584							# either have specific methods for type of return! or simply have this and an arguement
585	0			0			LIST { $self->_results_in_list($arg) }
586							#y total variance is popintless without individual and if you´ve got individual total is trivial so leave these returns are they are.
587							# nao faz sentido - nao eh uma teste
588							#BOOL { $F > $standard_F ? 1 : undef; }
589	0						);
590							}
591
592							sub _print {
593
594	0			0			my $self = shift;
595	0						print qq{\n=======================\nRESULTS OF PCA ANALYSIS\n=======================\n};
596
597	0						$self->print_total_variance;
598	0						$self->print_variance;
599	0						$self->print_eigenvectors;
600	0						$self->print_transform;
601
602	0						return;
603							}
604
605							#/ should really get the vectors directly from summaries and not re-enter them in the object!!! - especially now that you can just use rank for PC number
606							sub print_eigenvectors {
607	0			0	0		my $self = shift;
608	0						$self->_print_private(q{eigenvectors});
609							}
610
611							sub print_transform {
612	0			0	0		my $self = shift;
613	0						$self->_print_private(q{transform});
614							}
615
616							sub print_total_variance {
617	0			0	0		my $self = shift;
618	0						print qq{\nTotal Variance = }, sprintf (q{%.8f}, $self->{summaries}{total_variance}), qq{\n};
619	0						return;
620							}
621
622							sub print_variance {
623	0			0	0		my $self = shift;
624
625	0						my $sorted_eigen = $self->{summaries}{eigen}{sorted};
626
627	0						print qq{\nTable of Standard Deviations and Variances:\n};
628
629							# just create a first columnm that´s empty for names of rows
630	0						my @config_full = ( [22, q{}] );
631
632							# column calculations...
633							#/ really ought to get PC name from rank attribute
634							#my @config = map { [ 12, q{PC_}.$_ ] } ( 1..(scalar (@{$sorted_eigen})) );
635							#my @config = map { [ 12, q{PC_}.$_->{rank} ] } ( 0..(scalar ($#{$sorted_eigen})) );
636
637							#r/ PC is rank - i.e. it is the 'principal' compoenent
638	0						my @config = map { [ 12, q{PC_}.$_->{PC} ] } (@{$sorted_eigen});
	0
	0
639
640							# make the actual configuring array
641	0						push @config_full, @config;
642
643	0						my $table = Text::SimpleTable->new(@config_full);
644
645	0						my @row1 = (); # no need to initialise, but whatever...
646	0						for my $hash_ref (@{$sorted_eigen}) { push @row1, sprintf (q{%.8f}, $hash_ref->{stdev}); }
	0
	0
647	0						$table->row( q{Standard Deviation}, @row1 );
648	0						$table->hr;
649
650	0						my @row2 = (); # no need to initialise, but whatever...
651	0						for my $hash_ref (@{$sorted_eigen}) { push @row2, sprintf (q{%.8f}, $hash_ref->{proportion_of_variance}); }
	0
	0
652	0						$table->row( q{Proportion of Variance}, @row2 );
653	0						$table->hr;
654
655	0						my @row3 = (); # no need to initialise, but whatever...
656	0						for my $hash_ref (@{$sorted_eigen}) { push @row3, sprintf (q{%.8f}, $hash_ref->{cumulative_variance}); }
	0
	0
657	0						$table->row( q{Cumulative Variance}, @row3 );
658
659	0						print $table->draw;
660	0						return;
661							}
662
663							sub _print_private {
664	0			0			my ( $self, $arg ) = @_;
665
666							#/ twat used numeric ==
667	0	0					$arg eq q{eigenvectors} and print qq{\nTable of vectors:\n};
668	0	0					$arg eq q{transform} and print qq{\nTable of Transformed data:\n};
669
670	0						my $blah = $self->{pca}{$arg};
671
672	0						my @config_full = ( [5, q{}] );
673							#y column calculations...
674	0						my @config = map { [ 12, q{PC_}.$_ ] } ( 1..(scalar (@{$blah})) );
	0
	0
675	0						push @config_full, @config;
676
677	0						my $t2 = Text::SimpleTable->new(@config_full);
678
679							#y all have same component number so who gives a shit
680	0						for my $row (0..$#{$blah->[0]}) {
	0
681	0						my @data;
682	0						for my $col (0..$#{$blah}) {
	0
683	0						push @data, sprintf (q{%.8f}, $blah->[$col][$row] );
684							}
685	0						$t2->row( $row+1, @data );
686							}
687	0						print $t2->draw;
688	0						return;
689							}
690
691							sub _results_in_list {
692	0			0			my ( $self, $arg ) = @_;
693
694							#/ twat need to make ALL of sorted an array and dereference each for key using map
695							#y really need to re-write the first ref of these due to code re-usage
696	0			0			my %options = ( cumulative => sub { ( map { $_->{cumulative_variance} } @{$self->{summaries}{eigen}{sorted}} ) },
	0
	0
697	0			0			proportion => sub { ( map { $_->{proportion_of_variance} } @{$self->{summaries}{eigen}{sorted}} ) },
	0
	0
698	0			0			stdev => sub { ( map { $_->{stdev} } @{$self->{summaries}{eigen}{sorted}} ) },
	0
	0
699	0			0			eigenvalue => sub { ( map { $_->{eigenvalue} } @{$self->{summaries}{eigen}{sorted}} ) },
	0
	0
700							#y these are already in the form of array refs
701							#eigenvector => sub { ( map { $_->{eigenvector} } @{$self->{summaries}{eigen}{sorted}} ) },
702	0			0			eigenvector => sub { ( @{$self->{pca}{eigenvectors}} ) },
	0
703							#y put it into ordered list
704							# to convert this one we need to convert the whole thing to a numeric iterator and dereference each accordingly - not worth it
705	0						full => sub { ( map { [ $_->{PC}, $_->{stdev}, $_->{proportion_of_variance},
	0
706							$_->{cumulative_variance}, $_->{eigenvalue}, $_->{eigenvector}, ]
707	0			0			} @{$self->{summaries}{eigen}{sorted}} ) },
708	0			0			transformed => sub { ( @{$self->{pca}{transform}} ) },
	0
709
710	0						);
711
712							#/ either use exists on the key value - OR - assign it to a variable and check the variable for defindness... e.g. my $setting =....
713							#croak qq{\nYou must pass a recognised option: \"cumulative\", \"proportion\"} if ( !exists
714	0						my $setting = $options{$arg};
715	0	0					croak qq{\nYou must pass a recognised option: \"cumulative\", \"proportion\", \"stdev\", \"eigenvalue\"...} if ( !defined $setting );
716
717							#y &{$cd}();
718							#y &$cd();
719							#y &$cd;
720							#y $cd->();
721
722	0						$setting->();
723							# return;
724							}
725
726							#=fe
727
728
729							1; # Magic true value required at end of module
730
731							__END__