File Coverage

blib/lib/AI/ConfusionMatrix.pm
Criterion Covered Total %
statement 85 85 100.0
branch 15 18 83.3
condition n/a
subroutine 10 10 100.0
pod 2 3 66.6
total 112 116 96.5


line stmt bran cond sub pod time code
1             package AI::ConfusionMatrix;
2             $AI::ConfusionMatrix::VERSION = '0.009';
3 2     2   106746 use strict;
  2         12  
  2         43  
4 2     2   7 use warnings;
  2         4  
  2         58  
5 2     2   25 use Carp;
  2         4  
  2         119  
6 2     2   10 use Exporter 'import';
  2         3  
  2         81  
7             our @EXPORT= qw (getConfusionMatrix makeConfusionMatrix);
8 2     2   10 use strict;
  2         3  
  2         42  
9 2     2   1095 use Tie::File;
  2         28615  
  2         1569  
10              
11             # ABSTRACT: Make a confusion matrix
12              
13             sub makeConfusionMatrix {
14 2     2 1 3523 my ($matrix, $file, $delem) = @_;
15 2 100       6 unless(defined $delem) {
16 1         3 $delem = ',';
17             }
18              
19 2 50       6 carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';
20              
21 2         5 my %cmData = genConfusionMatrixData($matrix);
22 2 50       14 tie my @array, 'Tie::File', $file or carp "$!";
23 2         226 my @columns = @{$cmData{columns}};
  2         7  
24 2         8 map {$array[0] .= $delem . $_} join $delem, (@columns, 'TOTAL', 'TP', 'FP', 'FN', 'SENS', 'ACC');
  2         10  
25 2         844 my $n = 1;
26 2         3 my @expected = sort keys %{$matrix};
  2         8  
27 2         5 for my $expected (@expected) {
28 6         18 $array[$n] = $expected;
29 6         1411 my $lastIndex = 0;
30 6         9 my $index;
31 6         7 for my $predicted (sort keys %{$matrix->{$expected}}) {
  6         18  
32             # Calculate the index of the label in the array of columns
33 14         24 $index = _findIndex($predicted, \@columns);
34             # Print some of the delimiter to get to the column of the next value predicted
35 14         57 $array[$n] .= $delem x ($index - $lastIndex) . $matrix->{$expected}{$predicted};
36 14         3010 $lastIndex = $index;
37             }
38              
39             # Get to the columns of the stats
40 6         24 $array[$n] .= $delem x (scalar(@columns) - $lastIndex + 1);
41             $array[$n] .= join $delem, (
42             $cmData{stats}{$expected}{'total'},
43             $cmData{stats}{$expected}{'tp'},
44             $cmData{stats}{$expected}{'fp'},
45             $cmData{stats}{$expected}{'fn'},
46             sprintf('%.2f%%', $cmData{stats}{$expected}{'sensitivity'}),
47 6         1334 sprintf('%.2f%%', $cmData{stats}{$expected}{'acc'})
48             );
49 6         1266 ++$n;
50             }
51             # Print the TOTAL row to the csv file
52 2         8 $array[$n] = 'TOTAL' . $delem;
53 2         478 map {$array[$n] .= $cmData{totals}{$_} . $delem} (sort keys %{$cmData{totals}})[0 .. $#columns];
  10         1896  
  2         11  
54             $array[$n] .= join $delem, (
55             $cmData{totals}{'total'},
56             $cmData{totals}{'tp'},
57             $cmData{totals}{'fp'},
58             $cmData{totals}{'fn'},
59             sprintf('%.2f%%', $cmData{totals}{'sensitivity'}),
60 2         487 sprintf('%.2f%%', $cmData{totals}{'acc'})
61             );
62              
63 2         434 untie @array;
64             }
65              
66             sub getConfusionMatrix {
67 1     1 1 565 my ($matrix) = @_;
68              
69 1 50       4 carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';
70 1         3 return genConfusionMatrixData($matrix);
71             }
72              
73             sub genConfusionMatrixData {
74 3     3 0 5 my $matrix = shift;
75 3         4 my @expected = sort keys %{$matrix};
  3         14  
76 3         9 my %stats;
77             my %totals;
78 3         0 my @columns;
79 3         5 for my $expected (@expected) {
80 9         18 $stats{$expected}{'fn'} = 0;
81 9         11 $stats{$expected}{'tp'} = 0;
82             # Ensure that the False Positive counter is defined to be able to compute the total later
83 9 100       17 unless(defined $stats{$expected}{'fp'}) {
84 6         8 $stats{$expected}{'fp'} = 0;
85             }
86 9         10 for my $predicted (keys %{$matrix->{$expected}}) {
  9         17  
87 21         32 $stats{$expected}{'total'} += $matrix->{$expected}->{$predicted};
88 21 100       43 $stats{$expected}{'tp'} += $matrix->{$expected}->{$predicted} if $expected == $predicted;
89 21 100       34 if ($expected != $predicted) {
90 12         26 $stats{$expected}{'fn'} += $matrix->{$expected}->{$predicted};
91 12         19 $stats{$predicted}{'fp'} += $matrix->{$expected}->{$predicted};
92             }
93 21         23 $totals{$predicted} += $matrix->{$expected}->{$predicted};
94             # Add the label to the array of columns if it does not contain it already
95 21 100       28 push @columns, $predicted unless _findIndex($predicted, \@columns);
96             }
97              
98 9         21 $stats{$expected}{'acc'} = ($stats{$expected}{'tp'} * 100) / $stats{$expected}{'total'};
99             }
100              
101 3         5 for my $expected (@expected) {
102 9         13 $totals{'total'} += $stats{$expected}{'total'};
103 9         11 $totals{'tp'} += $stats{$expected}{'tp'};
104 9         11 $totals{'fn'} += $stats{$expected}{'fn'};
105 9         10 $totals{'fp'} += $stats{$expected}{'fp'};
106 9         19 $stats{$expected}{'sensitivity'} = ($stats{$expected}{'tp'} * 100) / ($stats{$expected}{'tp'} + $stats{$expected}{'fp'});
107             }
108              
109 3         5 $totals{'acc'} = ($totals{'tp'} * 100) / $totals{'total'};
110 3         7 $totals{'sensitivity'} = ($totals{'tp'} * 100) / ($totals{'tp'} + $totals{'fp'});
111              
112             return (
113 3         19 columns => [sort @columns],
114             stats => \%stats,
115             totals => \%totals
116             );
117             }
118              
119             sub _findIndex {
120 35     35   54 my ($string, $array) = @_;
121 35         55 for (0 .. @$array - 1) {
122 87 100       87 return $_ + 1 if ($string eq @{$array}[$_]);
  87         168  
123             }
124             }
125              
126             =head1 NAME
127              
128             AI::ConfusionMatrix - make a confusion matrix
129              
130             =head1 SYNOPSIS
131              
132             my %matrix;
133              
134             # Loop over your predictions
135             # [...]
136              
137             $matrix{$expected}{$predicted} += 1;
138              
139             # [...]
140              
141             makeConfusionMatrix(\%matrix, 'output.csv');
142              
143              
144             =head1 DESCRIPTION
145              
146             This module prints a L from a hash reference. This module tries to be generic enough to be used within a lot of machine learning projects.
147              
148             =head3 Functions:
149              
150             =head4 C
151              
152             This function makes a confusion matrix from C<$hash_ref> and writes it to C<$file>. C<$file> can be a filename or a file handle opened with the C mode. If C<$delimiter> is present, it is used as a custom separator for the fields in the confusion matrix.
153              
154             Examples:
155              
156             makeConfusionMatrix(\%matrix, 'output.csv');
157             makeConfusionMatrix(\%matrix, 'output.csv', ';');
158             makeConfusionMatrix(\%matrix, *$fh);
159              
160             The hash reference must look like this :
161              
162             $VAR1 = {
163             'value_expected1' => {
164             'value_predicted1' => number_of_predictions
165             },
166             'value_expected2' => {
167             'value_predicted1' => number_of_predictions,
168             'value_predicted2' => number_of_predictions
169             },
170             'value_expected3' => {
171             'value_predicted3' => number_of_predictions
172             }
173             };
174              
175             The output will be in CSV. Here is an example:
176              
177             ,1974,1978,2002,2003,2005,TOTAL,TP,FP,FN,SENS,ACC
178             1974,3,1,,,2,6,3,4,3,42.86%,50.00%
179             1978,1,5,,,,6,5,4,1,55.56%,83.33%
180             2002,2,2,8,,,12,8,1,4,88.89%,66.67%
181             2003,1,,,7,2,10,7,0,3,100.00%,70.00%
182             2005,,1,1,,6,8,6,4,2,60.00%,75.00%
183             TOTAL,7,9,9,7,10,42,29,13,13,69.05%,69.05%
184              
185             Prettified:
186              
187             | | 1974 | 1978 | 2002 | 2003 | 2005 | TOTAL | TP | FP | FN | SENS | ACC |
188             |-------|------|------|------|------|------|-------|----|----|----|---------|--------|
189             | 1974 | 3 | 1 | | | 2 | 6 | 3 | 4 | 3 | 42.86% | 50.00% |
190             | 1978 | 1 | 5 | | | | 6 | 5 | 4 | 1 | 55.56% | 83.33% |
191             | 2002 | 2 | 2 | 8 | | | 12 | 8 | 1 | 4 | 88.89% | 66.67% |
192             | 2003 | 1 | | | 7 | 2 | 10 | 7 | 0 | 3 | 100.00% | 70.00% |
193             | 2005 | | 1 | 1 | | 6 | 8 | 6 | 4 | 2 | 60.00% | 75.00% |
194             | TOTAL | 7 | 9 | 9 | 7 | 10 | 42 | 29 | 13 | 13 | 69.05% | 69.05% |
195              
196             =over
197              
198             =item TP:
199              
200             True Positive
201              
202             =item FP:
203              
204             False Positive
205              
206             =item FN:
207              
208             False Negative
209              
210             =item SENS
211              
212             Sensitivity. Number of true positives divided by the number of positives.
213              
214             =item ACC:
215              
216             Accuracy
217              
218             =back
219              
220             =head4 C
221              
222             Get the data used to compute the table above.
223              
224             Example:
225              
226             my %cm = getConfusionMatrix(\%matrix);
227              
228             =head1 AUTHOR
229              
230             Vincent Lequertier
231              
232             =head1 LICENSE
233              
234             This library is free software; you can redistribute it and/or modify
235             it under the same terms as Perl itself.
236              
237             =cut
238              
239             1;
240              
241             # vim: set ts=4 sw=4 tw=0 fdm=marker :
242