|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Statistics::MVA::BayesianDiscrimination;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
49066
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
    | 
| 
4
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
5
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
 use Carp;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
    | 
| 
6
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
1195
 | 
 use Statistics::MVA;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44870
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
    | 
| 
7
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
953
 | 
 use Math::Cephes qw/:explog/;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8168
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
369
 | 
    | 
| 
8
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
10
 | 
 use List::Util qw/sum/;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
    | 
| 
9
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
1009
 | 
 use Text::SimpleTable;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2197
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #=fs pod stuff  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Statistics::MVA::BayesianDiscrimination - Two-Sample Linear Discrimination Analysis with Posterior Probability Calculation.  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This document describes Statistics::MVA::BayesianDiscrimination version 0.0.2  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Discriminant analysis is a procedure for classifying a set of observations each with k variables into predefined classes such as to allow the  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 determination of the class of new observations based upon the values of the k variables for these new observations. Group membership based on linear combinations of the variables. From the set of observations where group membership is know the procedure constructs a set of linear functions, termed  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 discriminant functions, such that:   | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     L = B[0] + B[1] * x1 + B[2] * x2 +... ... + B[n] * x_n  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Where B[0] is a constant, B[n's] are discriminant coefficients and x's are the input variables. These discriminant functions  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (there is one for each group - consequently as this module only analyses data for two groups atm it generates two such  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 discriminant functions.  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Before proceeding with the analysis you should: (1) Perform Bartlett´s test to see if the covariance matrices of the data  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are homogenous for the populations used (see L. If they are not homogenous you should use Quadratic Discrimination analysis.  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (2) test for equality of the group means using Hotelling's T^2 (see L or MANOVA. If  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the groups do not differ significantly it is extremely unlikely that discrimination analysis with generate any useful  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 discrimination rules. (3) Specify the prior probabilities. This module allows you to do this in several ways - see  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This class automatically generates the discrimination coefficients at part of object construction. You can then either  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use the C | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 observation. Both of these methods are context dependent - see L. See  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 http://en.wikipedia.org/wiki/Linear_discriminant_analysis for further details.  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # we have two groups of data each with 3 variables and 10 observations - example data from http://www.stat.psu.edu/online/courses/stat505/data/insect.txt  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $data_X = [  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 191 131 53/],  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 185 134 50/],  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 200 137 52/],  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 173 127 50/],  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 171 128 49/],  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 160 118 47/],  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 188 134 54/],  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 186 129 51/],  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 174 131 52/],  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 163 115 47/],  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ];  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $data_Y = [  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 186 107 49/],  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 211 122 49/],  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 201 144 47/],  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 242 131 54/],  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 184 108 43/],  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 211 118 51/],  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 217 122 49/],  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 223 127 51/],  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 208 125 50/],  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         [qw/ 199 124 46/],  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ];  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Statistics::MVA::BayesianDiscrimination;  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Pass the data as a list of the two LISTS-of-LISTS above (termed X and Y). The module by default assumes equal prior probabilities.  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #my $bld = Statistics::MVA::BayesianDiscrimination->new($data_X,$data_Y);  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Pass the data but telling the module to calculate the prior probabilities as the ratio of observations for the two groups (e.g. P(X) X_obs_num / Total_obs.  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 1 },$data_X,$data_Y);  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Pass the data but directly specifying the values of prior probability for X and Y to use as an anonymous array.  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => [ 0.25, 0.75 ] },$ins_a,$ins_b);  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Print values for coefficients to STDOUT.  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $bld->output;  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Pass the values as an ARRAY reference by calling in LIST context - see L.  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my ($prior_x, $constant_x, $matrix_x, $prior_y, $constant_y, $matrix_y) = $bld->output;  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Perform discriminantion analyis for a specific observation and print result to STDOUT.  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $bld->discriminate([qw/184 114 59/]);  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Call in LIST context to obtain results directly - see L.  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my ($val_x, $p_x, $post_p_x, $val_y, $p_y, $post_p_y, $type) = $bld->discriminate([qw/194 124 49/]);  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Creates a new Statistics::MVA::BayesianDiscrimination. This accepts two references for List-of-Lists of values  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 corresponding to the two groups of data - termed X and Y. Within each List-of-Lists each nested array corresponds to a single set of observations.   | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It also accepts an optional HASH reference of options preceding these values. The constructor automatically generates  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the discrimination coefficients that are accessed using the C | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Pass data as ARRAY references.  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $bld = Statistics::MVA::BayesianDiscrimination->new($data_X,$data_Y);  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Passing optional HASH reference of options.  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 1 },$data_X,$data_Y);  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 output  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Context-dependent method for accessing results of discrimination analysis. In void context it prints the coefficients to STDOUT.  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $bld->output;  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In LIST-context it returns a list of the relevant data accessed as follows:  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my ($prior_x, $constant_x, $matrix_x, $prior_y, $constant_y, $matrix_y) = $bld->output;  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print qq{\nPrior probability of X = $prior_x and Y = $prior_y.};   | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print qq{\nConstants for discrimination function for X = $constant_x and Y = $constant_y.};  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print qq{\nCoefficients for discrimination function X = @{$matrix_x}.};  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print qq{\nCoefficients for discrimination function Y = @{$matrix_y}.};  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 discriminate  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Method for classification of a new observation. Pass it an ARRAY reference of SCALAR values appropriate for the original  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 data-sets passed to the constructor. In void context it prints a report to STDOUT:  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $bld->discriminate([qw/123 34 325/];  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In LIST-context it returns a list of the relevant data as follows:  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my ($val_x, $p_x, $post_p_x, $val_y, $p_y, $post_p_y, $type) = $bld->discriminate([qw/123 34 325/];  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print qq{\nLinear score function for X = $val_x and Y = $val_y - the new observation is of type \x27$type\x27.};  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print qq{\nThe prior probability that the new observation is of type X = $p_x and the posterior probability = $post_p_x};  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print qq{\nThe prior probability that the new observation is of type X = $p_y and the posterior probability = $post_p_y};       | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 OPTIONS  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 priors  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass within an anonymous HASH preceding the two data references during object construction:  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => option_value },$data_X,$data_Y);  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Passing '0' causes the module to assume equal prior probabilities for the two groups (prior_x = prior_y = 0.5). Passing  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 '1' causes the module to generate priors depending on the ratios of the two data-sets e.g. X has 15 observations and Y  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has 27 observations gives prior_x = 15 / (15 + 27). Alternatively you may specify the values to use by passing an anonymous ARRAY reference of length 2  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 where the first value is prior_x and the second is prior_y. There are currently no checks on priors directly passed so  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ensure that prior_x + prior_y = 1 if you supply you own.  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Use prior_x = prior_y = 0.5.  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 0 },$data_X,$data_Y);  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Generate priors depending on rations of observation numbers.  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => 1 },$data_X,$data_Y);  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Specify your own priors.  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $bld = Statistics::MVA::BayesianDiscrimination->new({priors => [$prior_x, $prior_y] },$data_X,$data_Y);  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #=fe  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
6
 | 
 use version; our $VERSION = qv('0.0.2');  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #/ with these new modules object creation directly checks data and performs analysis. storing just the essential results. can access directly or print them  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {   | 
| 
189
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my $class = shift;  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y grab an options hash if there is one  | 
| 
191
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $options_ref = shift if ( ref $_[0] eq q{HASH} );  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y the rest is data  | 
| 
193
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @data = @_;  | 
| 
194
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_x;  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_y;  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
197
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $self = [];  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
199
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $k = scalar @data;  | 
| 
200
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak qq{\nThis only accepts two groups of data} if $k != 2;  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y if there´s an options hash with priors check it - otherwise default...  | 
| 
202
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if ( defined $options_ref ) { ($p_x, $p_y) = &_priors($options_ref, @data) }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
204
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print qq{\nDefaulting to priors of 0.5 for both x and y.};  | 
| 
205
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_x = 0.5;  | 
| 
206
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_y = 0.5;   | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y check they have good matrix format - need to check compatibility too  | 
| 
209
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak qq{\nbad data} if &Statistics::MVA::CVMat::_check($data[0]);  | 
| 
210
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak qq{\nbad data} if &Statistics::MVA::CVMat::_check($data[1]);  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y data is passed in table format - i.e. nested arrays (rows) correspond to observations and not variables  | 
| 
212
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $lol1 = &Statistics::MVA::CVMat::transpose($data[0]);  | 
| 
213
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $lol2 = &Statistics::MVA::CVMat::transpose($data[1]);  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y get variable number  | 
| 
215
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p = scalar @{$lol1};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak qq{\nThese data sets are not compatible} if ($p != scalar @{$lol2});  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y combine the data for overall means calculations - if using single data with names could use adjust in MVA  | 
| 
218
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $full = [];  | 
| 
219
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $full = &_combine($lol1,$lol2, $p);  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y copy vars and we will do in place mean calculation - no need to deep copy references here  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #my $first = &_deep_copy($lol1);  | 
| 
222
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $first = [@{$lol1}];  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
223
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $second = [@{$lol2}];  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y in place calculation of the means  | 
| 
225
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &_means($full,$p);  | 
| 
226
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &_means($first,$p);  | 
| 
227
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &_means($second,$p);  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y in place subtraction of means from raw data  | 
| 
229
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &_subtract($lol1, $full);  | 
| 
230
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &_subtract($lol2, $full);  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y for MVA cv_calculations the data needs to be in table format - this is wastefull as it just transpose - thus is unecessary transposes - IN PLACE - should modify this?!?  | 
| 
232
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $lol1 = &Statistics::MVA::CVMat::transpose($lol1);  | 
| 
233
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $lol2 = &Statistics::MVA::CVMat::transpose($lol2);  | 
| 
234
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $mva;  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y if we have a twat who likes this annoying method  | 
| 
236
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     if (defined $options_ref && defined $options_ref->{cv} && $options_ref->{cv} == 0) { $mva = &_twats($lol1, $lol2); }  | 
| 
 
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #y create MVA object - these are defaults parameters so don´t need to pass them...  | 
| 
239
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $mva = Statistics::MVA->new([ $lol1, $lol2 ], {standardise => 0, divisor => 1});  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #r CV matrix data - loose last [0] for realmatrix object  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #print Dumper $mva->[0][0][0][0];  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #print Dumper $mva->[0][1][0][0];  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y create empty matrix of same dimensions  | 
| 
245
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $c = $mva->[0][0][0]->shadow;  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #/ its 2x not divided by priors?!? - only constant takes into account priors  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$mva->[0][0][0]->multiply_scalar($mva->[0][0][0],$p_x);  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$mva->[0][1][0]->multiply_scalar($mva->[0][1][0],$p_y);  | 
| 
250
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $mva->[0][0][0]->multiply_scalar($mva->[0][0][0],0.5);  | 
| 
251
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $mva->[0][1][0]->multiply_scalar($mva->[0][1][0],0.5);  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $c->add($mva->[0][0][0],$mva->[0][1][0]);   | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y no need for transpose just build from rows...  | 
| 
256
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $averages_a = Math::MatrixReal->new_from_rows([$first]);  | 
| 
257
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $averages_b = Math::MatrixReal->new_from_rows([$second]);  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y calculate b_0 - constant discrimination coefficients  | 
| 
259
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $constant_x = 0.5 * $averages_a * $c->inverse * ~$averages_a;  | 
| 
260
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $constant_y = 0.5 * $averages_b * $c->inverse * ~$averages_b;  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #/ you must add the priors to the constant - only the constant takes into account the priors.  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$constant_x->[0][0][0] = $constant_x->[0][0][0] - log($p_x);  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$constant_y->[0][0][0] = $constant_y->[0][0][0] - log($p_y);  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y now should be -0.5...  | 
| 
266
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $constant_x->[0][0][0] = -$constant_x->[0][0][0] + log($p_x);  | 
| 
267
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $constant_y->[0][0][0] = -$constant_y->[0][0][0] + log($p_y);  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y calculate the rest of the discrimination coeficients  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #~$averages_a * $c->inverse * $averages_a;  | 
| 
271
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $matrix_x = $averages_a * $c->inverse;  | 
| 
272
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $matrix_y = $averages_b * $c->inverse;  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #y feed the objectvv- this is only done for persistance - otherwise we loose the data...  | 
| 
275
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->[0][0][0] = $constant_x;  | 
| 
276
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->[0][0][1] = $matrix_x;  | 
| 
277
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->[0][1][0] = $constant_y;  | 
| 
278
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->[0][1][1] = $matrix_y;  | 
| 
279
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->[1][0] = $p_x;  | 
| 
280
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->[1][1] = $p_y;  | 
| 
281
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->[2] = $p;  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
283
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     bless $self, $class;  | 
| 
284
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $self;  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _priors {  | 
| 
288
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my ($options_ref, @data) = @_;  | 
| 
289
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_x;  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_y;  | 
| 
291
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if ($options_ref->{priors} ==  1 ) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $n_x = scalar @{$data[0]};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
293
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $n_y = scalar @{$data[1]};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $n_total = $n_x + $n_y;  | 
| 
295
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_x = sprintf (q{%.5f}, $n_x / $n_total);  | 
| 
296
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_y = sprintf (q{%.5f}, $n_y / $n_total);  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (ref $options_ref->{priors} eq q{ARRAY} ) {  | 
| 
299
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @priors = @{$options_ref->{priors}};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
300
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         croak qq{\nIf passing an ARRAY ref of priors it must have length two.} if ( scalar @priors != 2 );  | 
| 
301
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
         croak qq{\nThe priors must sum to 1.} if ( $priors[0] + $priors[1] < 0.95 || $priors[0] + $priors[1] > 1.05 );  | 
| 
302
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_x = $priors[0];  | 
| 
303
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_y = $priors[1];  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($options_ref->{priors} == 0) {  | 
| 
306
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_x = 0.5;  | 
| 
307
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $p_y = 0.5;   | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
309
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else { croak qq{\nI do not recognise that value for the priors option} }  | 
| 
310
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return ($p_x, $p_y);  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _subtract {  | 
| 
314
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my ($lol, $full) = @_;  | 
| 
315
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $var (0..$#{$lol}) {   | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
316
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         for my $cell (0..$#{$lol->[$var]}) {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
317
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $lol->[$var][$cell] = $lol->[$var][$cell] - $full->[$var];  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _combine{  | 
| 
323
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my ($lol1, $lol2, $p) = @_;  | 
| 
324
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $full = [];  | 
| 
325
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (0..$p-1) {  | 
| 
326
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         push @{$full->[$i]}, @{$lol1->[$i]}, @{$lol2->[$i]};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
328
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $full;  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _means {  | 
| 
332
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my ($full, $p) = @_;  | 
| 
333
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (0..$p-1) {  | 
| 
334
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $sum = sum @{$full->[$i]};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
335
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $n = scalar @{$full->[$i]};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
336
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $full->[$i] = $sum/$n;  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _twats {  | 
| 
341
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
     my ($lol1, $lol2) = @_;  | 
| 
342
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $lol1_mat = Math::MatrixReal->new_from_rows($lol1);  | 
| 
343
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $n_1 = scalar @{$lol1};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
344
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $lol1_mat = (~$lol1_mat * $lol1_mat) / $n_1;  | 
| 
345
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $lol2_mat = Math::MatrixReal->new_from_rows($lol2);  | 
| 
346
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $n_2 = scalar @{$lol2};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
347
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $lol2_mat = (~$lol2_mat * $lol2_mat) / $n_2;  | 
| 
348
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $mva = [];  | 
| 
349
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $mva->[0][0][0] = $lol1_mat;  | 
| 
350
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $mva->[0][1][0] = $lol2_mat;  | 
| 
351
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $mva->[0][0][0];  | 
| 
352
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $mva->[0][1][0];  | 
| 
353
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $mva;  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub output {  | 
| 
357
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my $self = shift;  | 
| 
358
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $constant_x = $self->[0][0][0];  | 
| 
359
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $constant_y = $self->[0][1][0];  | 
| 
360
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $matrix_x = $self->[0][0][1];  | 
| 
361
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $matrix_y = $self->[0][1][1];  | 
| 
362
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_x = $self->[1][0];   | 
| 
363
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_y = $self->[1][1];   | 
| 
364
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_var_num = $self->[2];   | 
| 
365
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (!wantarray) {  | 
| 
366
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print qq{\nTwo-groups with $p_var_num variables each and prior probabilities of p(x) = $p_x and p(y) = $p_y.\n\n};  | 
| 
367
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @config = ( [17, q{}], [15, q{X}], [15, q{Y}] );  | 
| 
368
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $tbl = Text::SimpleTable->new(@config);  | 
| 
369
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $tbl->row( qq{Constant B[0]}, sprintf(q{%.5f}, $constant_x->[0][0][0]), sprintf(q{%.5f}, $constant_y->[0][0][0]) );  | 
| 
370
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $tbl->hr;  | 
| 
371
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         for my $row (0..$#{$matrix_x->[0][0]}) {   | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
372
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $coeff = $row+1;  | 
| 
373
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $tbl->row( qq{Coefficient B[$coeff]}, sprintf(q{%.5f}, $matrix_x->[0][0][$row]), sprintf(q{%.5f}, $matrix_y->[0][0][$row]) );  | 
| 
374
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $tbl->hr if $row != $#{$matrix_x->[0][0]};  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
376
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print $tbl->draw;  | 
| 
377
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return;  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
379
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else { return ( $p_x, $constant_x->[0][0][0], $matrix_x->[0][0], $p_y, $constant_y->[0][0][0], $matrix_y->[0][0] ); }  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub discriminate {  | 
| 
383
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     my ($self, $ex) = @_;  | 
| 
384
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $context = wantarray;  | 
| 
385
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $constant_x = $self->[0][0][0];  | 
| 
386
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $constant_y = $self->[0][1][0];  | 
| 
387
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $matrix_x = $self->[0][0][1];  | 
| 
388
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $matrix_y = $self->[0][1][1];  | 
| 
389
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_x = $self->[1][0];   | 
| 
390
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_y = $self->[1][1];   | 
| 
391
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p_var_num = $self->[2];   | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
393
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak qq{\nData must be passed as an ARRAY ref} if (ref $ex ne q{ARRAY});  | 
| 
394
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     croak qq{\nThis example has the wrong variable number} if scalar @{$ex} != $p_var_num;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
396
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $example = Math::MatrixReal->new_from_cols([$ex]);  | 
| 
397
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $val_x = ( $matrix_x * $example ) + $constant_x;#; + log($p_x);  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #r when adding log(p) its the linear score function and not simply the linear discrimination result  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$val_x = $val_x->[0][0][0];#+log($p_x);  | 
| 
401
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $val_x = $val_x->[0][0][0];  | 
| 
402
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $val_y = ( $matrix_y * $example ) + $constant_y;#; + log($p_x);  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #$val_y = $val_y->[0][0][0];#+log($p_y);  | 
| 
404
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $val_y = $val_y->[0][0][0];  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
406
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $type    = $val_x > $val_y ? q{X}  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : $val_y > $val_x ? q{Y}   | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 :                   undef;  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
410
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $post_p_x = exp($val_x) / (exp($val_x) + exp($val_y));  | 
| 
411
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $post_p_y = exp($val_y) / (exp($val_x) + exp($val_y));  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
413
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (!$context) {  | 
| 
414
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print qq{\nLinear score function for x = $val_x\nLinear score function for y = $val_y\n};  | 
| 
415
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ( defined $type  ) { print qq{\nExample (@{$ex}) is a \x27$type\x27.} }   | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
416
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else { print qq{\nCannot distinguish which group the example belongs to.} }  | 
| 
417
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print qq{\n\nPrior probability of being an x = $p_x\nPosterior probability of being an x = $post_p_x\n};  | 
| 
418
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print qq{\nPrior probability of being a y = $p_y\nPosterior probability of being a y = $post_p_y\n};  | 
| 
419
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return;  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
422
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return ($val_x, $p_x, $post_p_x, $val_y, $p_y, $post_p_y, $type);   | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1; # Magic true value required at end of module  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |