File Coverage

blib/lib/AI/ConfusionMatrix.pm

Criterion	Covered	Total	%
statement	85	85	100.0
branch	15	18	83.3
condition			n/a
subroutine	10	10	100.0
pod	2	3	66.6
total	112	116	96.5

line	stmt	bran	sub	pod	time	code
1						package AI::ConfusionMatrix;
2						$AI::ConfusionMatrix::VERSION = '0.009';
3	2		2		106746	use strict;
	2				12
	2				43
4	2		2		7	use warnings;
	2				4
	2				58
5	2		2		25	use Carp;
	2				4
	2				119
6	2		2		10	use Exporter 'import';
	2				3
	2				81
7						our @EXPORT= qw (getConfusionMatrix makeConfusionMatrix);
8	2		2		10	use strict;
	2				3
	2				42
9	2		2		1095	use Tie::File;
	2				28615
	2				1569
10
11						# ABSTRACT: Make a confusion matrix
12
13						sub makeConfusionMatrix {
14	2		2	1	3523	my ($matrix, $file, $delem) = @_;
15	2	100			6	unless(defined $delem) {
16	1				3	$delem = ',';
17						}
18
19	2	50			6	carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';
20
21	2				5	my %cmData = genConfusionMatrixData($matrix);
22	2	50			14	tie my @array, 'Tie::File', $file or carp "$!";
23	2				226	my @columns = @{$cmData{columns}};
	2				7
24	2				8	map {$array[0] .= $delem . $_} join $delem, (@columns, 'TOTAL', 'TP', 'FP', 'FN', 'SENS', 'ACC');
	2				10
25	2				844	my $n = 1;
26	2				3	my @expected = sort keys %{$matrix};
	2				8
27	2				5	for my $expected (@expected) {
28	6				18	$array[$n] = $expected;
29	6				1411	my $lastIndex = 0;
30	6				9	my $index;
31	6				7	for my $predicted (sort keys %{$matrix->{$expected}}) {
	6				18
32						# Calculate the index of the label in the array of columns
33	14				24	$index = _findIndex($predicted, \@columns);
34						# Print some of the delimiter to get to the column of the next value predicted
35	14				57	$array[$n] .= $delem x ($index - $lastIndex) . $matrix->{$expected}{$predicted};
36	14				3010	$lastIndex = $index;
37						}
38
39						# Get to the columns of the stats
40	6				24	$array[$n] .= $delem x (scalar(@columns) - $lastIndex + 1);
41						$array[$n] .= join $delem, (
42						$cmData{stats}{$expected}{'total'},
43						$cmData{stats}{$expected}{'tp'},
44						$cmData{stats}{$expected}{'fp'},
45						$cmData{stats}{$expected}{'fn'},
46						sprintf('%.2f%%', $cmData{stats}{$expected}{'sensitivity'}),
47	6				1334	sprintf('%.2f%%', $cmData{stats}{$expected}{'acc'})
48						);
49	6				1266	++$n;
50						}
51						# Print the TOTAL row to the csv file
52	2				8	$array[$n] = 'TOTAL' . $delem;
53	2				478	map {$array[$n] .= $cmData{totals}{$_} . $delem} (sort keys %{$cmData{totals}})[0 .. $#columns];
	10				1896
	2				11
54						$array[$n] .= join $delem, (
55						$cmData{totals}{'total'},
56						$cmData{totals}{'tp'},
57						$cmData{totals}{'fp'},
58						$cmData{totals}{'fn'},
59						sprintf('%.2f%%', $cmData{totals}{'sensitivity'}),
60	2				487	sprintf('%.2f%%', $cmData{totals}{'acc'})
61						);
62
63	2				434	untie @array;
64						}
65
66						sub getConfusionMatrix {
67	1		1	1	565	my ($matrix) = @_;
68
69	1	50			4	carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';
70	1				3	return genConfusionMatrixData($matrix);
71						}
72
73						sub genConfusionMatrixData {
74	3		3	0	5	my $matrix = shift;
75	3				4	my @expected = sort keys %{$matrix};
	3				14
76	3				9	my %stats;
77						my %totals;
78	3				0	my @columns;
79	3				5	for my $expected (@expected) {
80	9				18	$stats{$expected}{'fn'} = 0;
81	9				11	$stats{$expected}{'tp'} = 0;
82						# Ensure that the False Positive counter is defined to be able to compute the total later
83	9	100			17	unless(defined $stats{$expected}{'fp'}) {
84	6				8	$stats{$expected}{'fp'} = 0;
85						}
86	9				10	for my $predicted (keys %{$matrix->{$expected}}) {
	9				17
87	21				32	$stats{$expected}{'total'} += $matrix->{$expected}->{$predicted};
88	21	100			43	$stats{$expected}{'tp'} += $matrix->{$expected}->{$predicted} if $expected == $predicted;
89	21	100			34	if ($expected != $predicted) {
90	12				26	$stats{$expected}{'fn'} += $matrix->{$expected}->{$predicted};
91	12				19	$stats{$predicted}{'fp'} += $matrix->{$expected}->{$predicted};
92						}
93	21				23	$totals{$predicted} += $matrix->{$expected}->{$predicted};
94						# Add the label to the array of columns if it does not contain it already
95	21	100			28	push @columns, $predicted unless _findIndex($predicted, \@columns);
96						}
97
98	9				21	$stats{$expected}{'acc'} = ($stats{$expected}{'tp'} * 100) / $stats{$expected}{'total'};
99						}
100
101	3				5	for my $expected (@expected) {
102	9				13	$totals{'total'} += $stats{$expected}{'total'};
103	9				11	$totals{'tp'} += $stats{$expected}{'tp'};
104	9				11	$totals{'fn'} += $stats{$expected}{'fn'};
105	9				10	$totals{'fp'} += $stats{$expected}{'fp'};
106	9				19	$stats{$expected}{'sensitivity'} = ($stats{$expected}{'tp'} * 100) / ($stats{$expected}{'tp'} + $stats{$expected}{'fp'});
107						}
108
109	3				5	$totals{'acc'} = ($totals{'tp'} * 100) / $totals{'total'};
110	3				7	$totals{'sensitivity'} = ($totals{'tp'} * 100) / ($totals{'tp'} + $totals{'fp'});
111
112						return (
113	3				19	columns => [sort @columns],
114						stats => \%stats,
115						totals => \%totals
116						);
117						}
118
119						sub _findIndex {
120	35		35		54	my ($string, $array) = @_;
121	35				55	for (0 .. @$array - 1) {
122	87	100			87	return $_ + 1 if ($string eq @{$array}[$_]);
	87				168
123						}
124						}
125
126						=head1 NAME
127
128						AI::ConfusionMatrix - make a confusion matrix
129
130						=head1 SYNOPSIS
131
132						my %matrix;
133
134						# Loop over your predictions
135						# [...]
136
137						$matrix{$expected}{$predicted} += 1;
138
139						# [...]
140
141						makeConfusionMatrix(\%matrix, 'output.csv');
142
143
144						=head1 DESCRIPTION
145
146						This module prints a L from a hash reference. This module tries to be generic enough to be used within a lot of machine learning projects.
147
148						=head3 Functions:
149
150						=head4 C
151
152						This function makes a confusion matrix from C<$hash_ref> and writes it to C<$file>. C<$file> can be a filename or a file handle opened with the C mode. If C<$delimiter> is present, it is used as a custom separator for the fields in the confusion matrix.
153
154						Examples:
155
156						makeConfusionMatrix(\%matrix, 'output.csv');
157						makeConfusionMatrix(\%matrix, 'output.csv', ';');
158						makeConfusionMatrix(\%matrix, *$fh);
159
160						The hash reference must look like this :
161
162						$VAR1 = {
163						'value_expected1' => {
164						'value_predicted1' => number_of_predictions
165						},
166						'value_expected2' => {
167						'value_predicted1' => number_of_predictions,
168						'value_predicted2' => number_of_predictions
169						},
170						'value_expected3' => {
171						'value_predicted3' => number_of_predictions
172						}
173						};
174
175						The output will be in CSV. Here is an example:
176
177						,1974,1978,2002,2003,2005,TOTAL,TP,FP,FN,SENS,ACC
178						1974,3,1,,,2,6,3,4,3,42.86%,50.00%
179						1978,1,5,,,,6,5,4,1,55.56%,83.33%
180						2002,2,2,8,,,12,8,1,4,88.89%,66.67%
181						2003,1,,,7,2,10,7,0,3,100.00%,70.00%
182						2005,,1,1,,6,8,6,4,2,60.00%,75.00%
183						TOTAL,7,9,9,7,10,42,29,13,13,69.05%,69.05%
184
185						Prettified:
186
187						\| \| 1974 \| 1978 \| 2002 \| 2003 \| 2005 \| TOTAL \| TP \| FP \| FN \| SENS \| ACC \|
188						\|-------\|------\|------\|------\|------\|------\|-------\|----\|----\|----\|---------\|--------\|
189						\| 1974 \| 3 \| 1 \| \| \| 2 \| 6 \| 3 \| 4 \| 3 \| 42.86% \| 50.00% \|
190						\| 1978 \| 1 \| 5 \| \| \| \| 6 \| 5 \| 4 \| 1 \| 55.56% \| 83.33% \|
191						\| 2002 \| 2 \| 2 \| 8 \| \| \| 12 \| 8 \| 1 \| 4 \| 88.89% \| 66.67% \|
192						\| 2003 \| 1 \| \| \| 7 \| 2 \| 10 \| 7 \| 0 \| 3 \| 100.00% \| 70.00% \|
193						\| 2005 \| \| 1 \| 1 \| \| 6 \| 8 \| 6 \| 4 \| 2 \| 60.00% \| 75.00% \|
194						\| TOTAL \| 7 \| 9 \| 9 \| 7 \| 10 \| 42 \| 29 \| 13 \| 13 \| 69.05% \| 69.05% \|
195
196						=over
197
198						=item TP:
199
200						True Positive
201
202						=item FP:
203
204						False Positive
205
206						=item FN:
207
208						False Negative
209
210						=item SENS
211
212						Sensitivity. Number of true positives divided by the number of positives.
213
214						=item ACC:
215
216						Accuracy
217
218						=back
219
220						=head4 C
221
222						Get the data used to compute the table above.
223
224						Example:
225
226						my %cm = getConfusionMatrix(\%matrix);
227
228						=head1 AUTHOR
229
230						Vincent Lequertier
231
232						=head1 LICENSE
233
234						This library is free software; you can redistribute it and/or modify
235						it under the same terms as Perl itself.
236
237						=cut
238
239						1;
240
241						# vim: set ts=4 sw=4 tw=0 fdm=marker :
242