File Coverage

blib/lib/Statistics/MVA/MultipleRegression.pm
Criterion Covered Total %
statement 21 71 29.5
branch 0 10 0.0
condition n/a
subroutine 7 8 87.5
pod 0 1 0.0
total 28 90 31.1


line stmt bran cond sub pod time code
1             package Statistics::MVA::MultipleRegression;
2              
3 1     1   22052 use warnings;
  1         2  
  1         34  
4 1     1   5 use strict;
  1         1  
  1         31  
5 1     1   4 use Carp;
  1         5  
  1         98  
6             # re-implement with Cepehes?
7 1     1   903 use Statistics::MVA;
  1         55579  
  1         30  
8 1     1   10 use Math::MatrixReal;
  1         2  
  1         43  
9 1     1   5 use List::Util qw/sum/;
  1         2  
  1         93  
10              
11             require Exporter;
12             our @ISA = qw(Exporter);
13             our @EXPORT = qw(linear_regression);
14              
15             =head1 NAME
16              
17             Statistics::MVA::MultipleRegression - Simple Least Squares Linear Multiple Regression Module.
18              
19             =cut
20             =head1 VERSION
21              
22             This document describes Statistics::MVA::MultipleRegression version 0.0.1
23              
24             =cut
25             =head1 SYNOPSIS
26              
27             my $lol = [
28             [qw/745 36 66/],
29             [qw/895 37 68/],
30             [qw/442 47 64/],
31             [qw/440 32 53/],
32             [qw/1598 1 101/],
33             ];
34              
35             use Statistics::MVA::MultipleRegression;
36              
37             # Call exported routine on List-of-List of data (each nested list - row - corresponds to a set of observations. In void-context it prints a report to STDOUT.
38             linear_regression($lol);
39              
40             # In LIST-context the routine returns an ARRAY reference of the coefficients and R^2.
41             my ($Array_ref_of_coefficients, $R_sq) = linear_regression($d);
42              
43             =cut
44             =head1 DESCRIPTION
45              
46             The general purpose of multiple regression is to gain information about the relationship between several independent
47             variables (x_i) and a dependent variable (y). The procedure involves fitting an equation of the form:
48            
49             y = b_o + x_1 * b_1 + x_2 * b_2 +... ... x_n * b_n
50              
51             This is a minimal implementation of least squares procedure that accepts a List-of-Lists in which each nested list
52             corresponds to a single set of observations. The single exported routine returns the coefficients (b_i) and R^2.
53              
54             =cut
55              
56 1     1   4 use version; our $VERSION = qv('0.0.1');
  1         2  
  1         4  
57              
58              
59             #/ at this moment this is a minimal version that just calculates the coefficients. i will expand it there is interest.
60              
61             sub linear_regression {
62 0     0 0   my $lol = shift;
63              
64 0 0         croak qq{\nAll data must be a matrix of numbers} if &Statistics::MVA::CVMat::_check($lol);
65              
66 0           $lol = &Statistics::MVA::CVMat::transpose($lol);
67              
68 0 0         croak qq{\nYou have no dependent variables - if you want to calculate the mean of Y do not use this module.} if (scalar @{$lol} == 1);
  0            
69 0 0         carp qq{\nYou have one dependent variables - why use a multivariate procedure?} if (scalar @{$lol} == 2);
  0            
70              
71 0           my $y = [@{$lol->[0]}];
  0            
72 0           my $n = scalar @{$y};
  0            
73 0           my $y_mean = (sum @{$y}) / $n;
  0            
74 0           my $x = [@{$lol}[1..$#{$lol}]];
  0            
  0            
75              
76 0           my $y_mat = Math::MatrixReal->new_from_cols([$lol->[0]]);
77              
78             #y can either build a vector of 1´s using x - i.e. 1 x scalar $lol1->[0] and combine it with everything after ->[0]
79             #y or can simply in place modify the whole of ->[0] in place now that it´s been copied to a matrix
80              
81             # in place modification
82 0           for my $i (0..$#{$lol->[0]}) { $lol->[0][$i] = 1 }
  0            
  0            
83              
84 0           my $x_mat = Math::MatrixReal->new_from_cols($lol);
85 0           my $b_mat = $y_mat->shadow;
86              
87 0           $b_mat = (((~$x_mat * $x_mat)**-1) * ~$x_mat) * $y_mat ;
88              
89             # constant is $Bs[0] then coefficients are @Bs[1..$#Bs]
90 0           my @Bs = map { $b_mat->[0][$_][0] } (0..$#{$b_mat->[0]});
  0            
  0            
91              
92 0           my $ss_y = sum map { ($_-$y_mean)**2 } @{$y};
  0            
  0            
93              
94 0           my @Bs_copy = @Bs;
95 0           my $con = shift @Bs_copy;
96              
97 0           my $y_prime = [];
98              
99             #for my $r (0..$#{$x->[0]}) {
100 0           for my $r (0..$#{$y}) {
  0            
101 0           my $val = sum $con, map { $Bs_copy[$_] * $x->[$_][$r] } (0..$#Bs_copy);
  0            
102 0           push @{$y_prime }, $val;
  0            
103             }
104              
105             #my $ss_error = sum map { ($y->[$_]-$y_prime->[$_])**2 } (0..$#{$y});
106             # can use ss_y_prime / ss_y or (ss_y - ss_error) / ss_y
107 0           my $ss_y_prime = sum map { ($_-$y_mean)**2 } @{$y_prime};
  0            
  0            
108              
109 0           my $R2 = $ss_y_prime/$ss_y;
110              
111 0 0         if (!wantarray) {
112 0           print qq{\nThe coefficients are: };
113 0           for (0..$#Bs) {
114 0 0         my $punct = $_ == $#Bs ? q{.} : q{, };
115 0           print qq{B[$_] = $Bs[$_]$punct};
116             }
117 0           print qq{\nR^2 is $R2};
118 0           return;
119             }
120 0           else { return ([@Bs], $R2); }
121             }
122              
123             1; # Magic true value required at end of module
124             __END__