line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package AI::ConfusionMatrix; |
2
|
|
|
|
|
|
|
$AI::ConfusionMatrix::VERSION = '0.010'; |
3
|
2
|
|
|
2
|
|
118334
|
use strict; |
|
2
|
|
|
|
|
12
|
|
|
2
|
|
|
|
|
77
|
|
4
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
47
|
|
5
|
2
|
|
|
2
|
|
10
|
use Carp; |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
112
|
|
6
|
2
|
|
|
2
|
|
11
|
use Exporter 'import'; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
88
|
|
7
|
|
|
|
|
|
|
our @EXPORT= qw (getConfusionMatrix makeConfusionMatrix); |
8
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
39
|
|
9
|
2
|
|
|
2
|
|
1276
|
use Tie::File; |
|
2
|
|
|
|
|
33373
|
|
|
2
|
|
|
|
|
1413
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# ABSTRACT: Make a confusion matrix |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub makeConfusionMatrix { |
14
|
4
|
|
|
4
|
1
|
10336
|
my ($matrix, $file, $delem) = @_; |
15
|
4
|
100
|
|
|
|
14
|
unless(defined $delem) { |
16
|
2
|
|
|
|
|
4
|
$delem = ','; |
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
|
19
|
4
|
50
|
|
|
|
14
|
carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH'; |
20
|
|
|
|
|
|
|
|
21
|
4
|
|
|
|
|
10
|
my %cmData = genConfusionMatrixData($matrix); |
22
|
|
|
|
|
|
|
# This ties @output_array to the output file. Each output_array item represents a line in the output file |
23
|
4
|
50
|
|
|
|
26
|
tie my @output_array, 'Tie::File', $file or carp "$!"; |
24
|
|
|
|
|
|
|
# Empty the file |
25
|
4
|
|
|
|
|
435
|
@output_array = (); |
26
|
|
|
|
|
|
|
|
27
|
4
|
|
|
|
|
317
|
my @columns = @{$cmData{columns}}; |
|
4
|
|
|
|
|
12
|
|
28
|
4
|
|
|
|
|
20
|
map {$output_array[0] .= $delem . $_} join $delem, (@columns, 'TOTAL', 'TP', 'FP', 'FN', 'SENS', 'ACC'); |
|
4
|
|
|
|
|
17
|
|
29
|
4
|
|
|
|
|
1749
|
my $line = 1; |
30
|
4
|
|
|
|
|
8
|
my @expected = sort keys %{$matrix}; |
|
4
|
|
|
|
|
18
|
|
31
|
4
|
|
|
|
|
12
|
for my $expected (@expected) { |
32
|
12
|
|
|
|
|
38
|
$output_array[$line] = $expected; |
33
|
12
|
|
|
|
|
3395
|
my $lastIndex = 0; |
34
|
12
|
|
|
|
|
13
|
my $index; |
35
|
12
|
|
|
|
|
15
|
for my $predicted (sort keys %{$matrix->{$expected}}) { |
|
12
|
|
|
|
|
44
|
|
36
|
|
|
|
|
|
|
# Calculate the index of the label in the output_array of columns |
37
|
28
|
|
|
|
|
71
|
$index = _findIndex($predicted, \@columns); |
38
|
|
|
|
|
|
|
# Print some of the delimiter to get to the column of the next value predicted |
39
|
28
|
|
|
|
|
122
|
$output_array[$line] .= $delem x ($index - $lastIndex) . $matrix->{$expected}{$predicted}; |
40
|
28
|
|
|
|
|
8389
|
$lastIndex = $index; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Get to the columns of the stats |
44
|
12
|
|
|
|
|
53
|
$output_array[$line] .= $delem x (scalar(@columns) - $lastIndex + 1); |
45
|
|
|
|
|
|
|
$output_array[$line] .= join $delem, ( |
46
|
|
|
|
|
|
|
$cmData{stats}{$expected}{'total'}, |
47
|
|
|
|
|
|
|
$cmData{stats}{$expected}{'tp'}, |
48
|
|
|
|
|
|
|
$cmData{stats}{$expected}{'fp'}, |
49
|
|
|
|
|
|
|
$cmData{stats}{$expected}{'fn'}, |
50
|
|
|
|
|
|
|
sprintf('%.2f%%', $cmData{stats}{$expected}{'sensitivity'}), |
51
|
12
|
|
|
|
|
3380
|
sprintf('%.2f%%', $cmData{stats}{$expected}{'acc'}) |
52
|
|
|
|
|
|
|
); |
53
|
12
|
|
|
|
|
2968
|
++$line; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
# Print the TOTAL row to the csv file |
56
|
4
|
|
|
|
|
18
|
$output_array[$line] = 'TOTAL' . $delem; |
57
|
4
|
|
|
|
|
1256
|
map {$output_array[$line] .= $cmData{totals}{$_} . $delem} (@columns); |
|
18
|
|
|
|
|
5313
|
|
58
|
|
|
|
|
|
|
$output_array[$line] .= join $delem, ( |
59
|
|
|
|
|
|
|
$cmData{totals}{'total'}, |
60
|
|
|
|
|
|
|
$cmData{totals}{'tp'}, |
61
|
|
|
|
|
|
|
$cmData{totals}{'fp'}, |
62
|
|
|
|
|
|
|
$cmData{totals}{'fn'}, |
63
|
|
|
|
|
|
|
sprintf('%.2f%%', $cmData{totals}{'sensitivity'}), |
64
|
4
|
|
|
|
|
1279
|
sprintf('%.2f%%', $cmData{totals}{'acc'}) |
65
|
|
|
|
|
|
|
); |
66
|
|
|
|
|
|
|
|
67
|
4
|
|
|
|
|
1061
|
untie @output_array; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub getConfusionMatrix { |
71
|
1
|
|
|
1
|
1
|
591
|
my ($matrix) = @_; |
72
|
|
|
|
|
|
|
|
73
|
1
|
50
|
|
|
|
4
|
carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH'; |
74
|
1
|
|
|
|
|
4
|
return genConfusionMatrixData($matrix); |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub genConfusionMatrixData { |
78
|
5
|
|
|
5
|
0
|
9
|
my $matrix = shift; |
79
|
5
|
|
|
|
|
7
|
my @expected = sort keys %{$matrix}; |
|
5
|
|
|
|
|
27
|
|
80
|
5
|
|
|
|
|
15
|
my %stats; |
81
|
|
|
|
|
|
|
my %totals; |
82
|
5
|
|
|
|
|
0
|
my @columns; |
83
|
5
|
|
|
|
|
10
|
for my $expected (@expected) { |
84
|
15
|
|
|
|
|
30
|
$stats{$expected}{'fn'} = 0; |
85
|
15
|
|
|
|
|
21
|
$stats{$expected}{'tp'} = 0; |
86
|
|
|
|
|
|
|
# Ensure that the False Positive counter is defined to be able to compute the total later |
87
|
15
|
100
|
|
|
|
27
|
unless(defined $stats{$expected}{'fp'}) { |
88
|
10
|
|
|
|
|
18
|
$stats{$expected}{'fp'} = 0; |
89
|
|
|
|
|
|
|
} |
90
|
15
|
|
|
|
|
20
|
for my $predicted (keys %{$matrix->{$expected}}) { |
|
15
|
|
|
|
|
33
|
|
91
|
35
|
|
|
|
|
55
|
$stats{$expected}{'total'} += $matrix->{$expected}->{$predicted}; |
92
|
35
|
100
|
|
|
|
63
|
$stats{$expected}{'tp'} += $matrix->{$expected}->{$predicted} if $expected eq $predicted; |
93
|
35
|
100
|
|
|
|
60
|
if ($expected ne $predicted) { |
94
|
20
|
|
|
|
|
32
|
$stats{$expected}{'fn'} += $matrix->{$expected}->{$predicted}; |
95
|
20
|
|
|
|
|
28
|
$stats{$predicted}{'fp'} += $matrix->{$expected}->{$predicted}; |
96
|
|
|
|
|
|
|
} |
97
|
35
|
|
|
|
|
44
|
$totals{$predicted} += $matrix->{$expected}->{$predicted}; |
98
|
|
|
|
|
|
|
# Add the label to the array of columns if it does not contain it already |
99
|
35
|
100
|
|
|
|
52
|
push @columns, $predicted unless _findIndex($predicted, \@columns); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
15
|
|
|
|
|
42
|
$stats{$expected}{'acc'} = ($stats{$expected}{'tp'} * 100) / $stats{$expected}{'total'}; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
5
|
|
|
|
|
11
|
for my $expected (@expected) { |
106
|
15
|
|
|
|
|
22
|
$totals{'total'} += $stats{$expected}{'total'}; |
107
|
15
|
|
|
|
|
17
|
$totals{'tp'} += $stats{$expected}{'tp'}; |
108
|
15
|
|
|
|
|
22
|
$totals{'fn'} += $stats{$expected}{'fn'}; |
109
|
15
|
|
|
|
|
29
|
$totals{'fp'} += $stats{$expected}{'fp'}; |
110
|
15
|
|
|
|
|
32
|
$stats{$expected}{'sensitivity'} = ($stats{$expected}{'tp'} * 100) / ($stats{$expected}{'tp'} + $stats{$expected}{'fp'}); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
5
|
|
|
|
|
14
|
$totals{'acc'} = ($totals{'tp'} * 100) / $totals{'total'}; |
114
|
5
|
|
|
|
|
11
|
$totals{'sensitivity'} = ($totals{'tp'} * 100) / ($totals{'tp'} + $totals{'fp'}); |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
return ( |
117
|
5
|
|
|
|
|
37
|
columns => [sort @columns], |
118
|
|
|
|
|
|
|
stats => \%stats, |
119
|
|
|
|
|
|
|
totals => \%totals |
120
|
|
|
|
|
|
|
); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub _findIndex { |
124
|
63
|
|
|
63
|
|
96
|
my ($string, $array) = @_; |
125
|
63
|
|
|
|
|
123
|
for (0 .. @$array - 1) { |
126
|
144
|
100
|
|
|
|
166
|
return $_ + 1 if ($string eq @{$array}[$_]); |
|
144
|
|
|
|
|
311
|
|
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 NAME |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
AI::ConfusionMatrix - make a confusion matrix |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head1 SYNOPSIS |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
my %matrix; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Loop over your predictions |
139
|
|
|
|
|
|
|
# [...] |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
$matrix{$expected}{$predicted} += 1; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# [...] |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
makeConfusionMatrix(\%matrix, 'output.csv'); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head1 DESCRIPTION |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
This module prints a L from a hash reference. This module tries to be generic enough to be used within a lot of machine learning projects. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head3 Functions: |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head4 C |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
This function makes a confusion matrix from C<$hash_ref> and writes it to C<$file>. C<$file> can be a filename or a file handle opened with the C mode. If C<$delimiter> is present, it is used as a custom separator for the fields in the confusion matrix. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Examples: |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
makeConfusionMatrix(\%matrix, 'output.csv'); |
161
|
|
|
|
|
|
|
makeConfusionMatrix(\%matrix, 'output.csv', ';'); |
162
|
|
|
|
|
|
|
makeConfusionMatrix(\%matrix, *$fh); |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
The hash reference must look like this : |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$VAR1 = { |
167
|
|
|
|
|
|
|
'value_expected1' => { |
168
|
|
|
|
|
|
|
'value_predicted1' => number_of_predictions |
169
|
|
|
|
|
|
|
}, |
170
|
|
|
|
|
|
|
'value_expected2' => { |
171
|
|
|
|
|
|
|
'value_predicted1' => number_of_predictions, |
172
|
|
|
|
|
|
|
'value_predicted2' => number_of_predictions |
173
|
|
|
|
|
|
|
}, |
174
|
|
|
|
|
|
|
'value_expected3' => { |
175
|
|
|
|
|
|
|
'value_predicted3' => number_of_predictions |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
}; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The output will be in CSV. Here is an example: |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
,1974,1978,2002,2003,2005,TOTAL,TP,FP,FN,SENS,ACC |
182
|
|
|
|
|
|
|
1974,3,1,,,2,6,3,4,3,42.86%,50.00% |
183
|
|
|
|
|
|
|
1978,1,5,,,,6,5,4,1,55.56%,83.33% |
184
|
|
|
|
|
|
|
2002,2,2,8,,,12,8,1,4,88.89%,66.67% |
185
|
|
|
|
|
|
|
2003,1,,,7,2,10,7,0,3,100.00%,70.00% |
186
|
|
|
|
|
|
|
2005,,1,1,,6,8,6,4,2,60.00%,75.00% |
187
|
|
|
|
|
|
|
TOTAL,7,9,9,7,10,42,29,13,13,69.05%,69.05% |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Prettified: |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
| | 1974 | 1978 | 2002 | 2003 | 2005 | TOTAL | TP | FP | FN | SENS | ACC | |
192
|
|
|
|
|
|
|
|-------|------|------|------|------|------|-------|----|----|----|---------|--------| |
193
|
|
|
|
|
|
|
| 1974 | 3 | 1 | | | 2 | 6 | 3 | 4 | 3 | 42.86% | 50.00% | |
194
|
|
|
|
|
|
|
| 1978 | 1 | 5 | | | | 6 | 5 | 4 | 1 | 55.56% | 83.33% | |
195
|
|
|
|
|
|
|
| 2002 | 2 | 2 | 8 | | | 12 | 8 | 1 | 4 | 88.89% | 66.67% | |
196
|
|
|
|
|
|
|
| 2003 | 1 | | | 7 | 2 | 10 | 7 | 0 | 3 | 100.00% | 70.00% | |
197
|
|
|
|
|
|
|
| 2005 | | 1 | 1 | | 6 | 8 | 6 | 4 | 2 | 60.00% | 75.00% | |
198
|
|
|
|
|
|
|
| TOTAL | 7 | 9 | 9 | 7 | 10 | 42 | 29 | 13 | 13 | 69.05% | 69.05% | |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=over |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=item TP: |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
True Positive |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=item FP: |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
False Positive |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=item FN: |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
False Negative |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=item SENS |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
Sensitivity. Number of true positives divided by the number of positives. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=item ACC: |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Accuracy |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=back |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head4 C |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Get the data used to compute the table above. |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Example: |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
my %cm = getConfusionMatrix(\%matrix); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head1 AUTHOR |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Vincent Lequertier |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head1 LICENSE |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
239
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=cut |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
1; |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# vim: set ts=4 sw=4 tw=0 fdm=marker : |
246
|
|
|
|
|
|
|
|