File Coverage

blib/lib/AI/ConfusionMatrix.pm

Criterion	Covered	Total	%
statement	85	85	100.0
branch	15	18	83.3
condition			n/a
subroutine	10	10	100.0
pod	2	3	66.6
total	112	116	96.5

line	stmt	bran	sub	pod	time	code
1						package AI::ConfusionMatrix;
2						$AI::ConfusionMatrix::VERSION = '0.010';
3	2		2		118334	use strict;
	2				12
	2				77
4	2		2		9	use warnings;
	2				3
	2				47
5	2		2		10	use Carp;
	2				9
	2				112
6	2		2		11	use Exporter 'import';
	2				2
	2				88
7						our @EXPORT= qw (getConfusionMatrix makeConfusionMatrix);
8	2		2		10	use strict;
	2				4
	2				39
9	2		2		1276	use Tie::File;
	2				33373
	2				1413
10
11						# ABSTRACT: Make a confusion matrix
12
13						sub makeConfusionMatrix {
14	4		4	1	10336	my ($matrix, $file, $delem) = @_;
15	4	100			14	unless(defined $delem) {
16	2				4	$delem = ',';
17						}
18
19	4	50			14	carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';
20
21	4				10	my %cmData = genConfusionMatrixData($matrix);
22						# This ties @output_array to the output file. Each output_array item represents a line in the output file
23	4	50			26	tie my @output_array, 'Tie::File', $file or carp "$!";
24						# Empty the file
25	4				435	@output_array = ();
26
27	4				317	my @columns = @{$cmData{columns}};
	4				12
28	4				20	map {$output_array[0] .= $delem . $_} join $delem, (@columns, 'TOTAL', 'TP', 'FP', 'FN', 'SENS', 'ACC');
	4				17
29	4				1749	my $line = 1;
30	4				8	my @expected = sort keys %{$matrix};
	4				18
31	4				12	for my $expected (@expected) {
32	12				38	$output_array[$line] = $expected;
33	12				3395	my $lastIndex = 0;
34	12				13	my $index;
35	12				15	for my $predicted (sort keys %{$matrix->{$expected}}) {
	12				44
36						# Calculate the index of the label in the output_array of columns
37	28				71	$index = _findIndex($predicted, \@columns);
38						# Print some of the delimiter to get to the column of the next value predicted
39	28				122	$output_array[$line] .= $delem x ($index - $lastIndex) . $matrix->{$expected}{$predicted};
40	28				8389	$lastIndex = $index;
41						}
42
43						# Get to the columns of the stats
44	12				53	$output_array[$line] .= $delem x (scalar(@columns) - $lastIndex + 1);
45						$output_array[$line] .= join $delem, (
46						$cmData{stats}{$expected}{'total'},
47						$cmData{stats}{$expected}{'tp'},
48						$cmData{stats}{$expected}{'fp'},
49						$cmData{stats}{$expected}{'fn'},
50						sprintf('%.2f%%', $cmData{stats}{$expected}{'sensitivity'}),
51	12				3380	sprintf('%.2f%%', $cmData{stats}{$expected}{'acc'})
52						);
53	12				2968	++$line;
54						}
55						# Print the TOTAL row to the csv file
56	4				18	$output_array[$line] = 'TOTAL' . $delem;
57	4				1256	map {$output_array[$line] .= $cmData{totals}{$_} . $delem} (@columns);
	18				5313
58						$output_array[$line] .= join $delem, (
59						$cmData{totals}{'total'},
60						$cmData{totals}{'tp'},
61						$cmData{totals}{'fp'},
62						$cmData{totals}{'fn'},
63						sprintf('%.2f%%', $cmData{totals}{'sensitivity'}),
64	4				1279	sprintf('%.2f%%', $cmData{totals}{'acc'})
65						);
66
67	4				1061	untie @output_array;
68						}
69
70						sub getConfusionMatrix {
71	1		1	1	591	my ($matrix) = @_;
72
73	1	50			4	carp ('First argument must be a hash reference') if ref($matrix) ne 'HASH';
74	1				4	return genConfusionMatrixData($matrix);
75						}
76
77						sub genConfusionMatrixData {
78	5		5	0	9	my $matrix = shift;
79	5				7	my @expected = sort keys %{$matrix};
	5				27
80	5				15	my %stats;
81						my %totals;
82	5				0	my @columns;
83	5				10	for my $expected (@expected) {
84	15				30	$stats{$expected}{'fn'} = 0;
85	15				21	$stats{$expected}{'tp'} = 0;
86						# Ensure that the False Positive counter is defined to be able to compute the total later
87	15	100			27	unless(defined $stats{$expected}{'fp'}) {
88	10				18	$stats{$expected}{'fp'} = 0;
89						}
90	15				20	for my $predicted (keys %{$matrix->{$expected}}) {
	15				33
91	35				55	$stats{$expected}{'total'} += $matrix->{$expected}->{$predicted};
92	35	100			63	$stats{$expected}{'tp'} += $matrix->{$expected}->{$predicted} if $expected eq $predicted;
93	35	100			60	if ($expected ne $predicted) {
94	20				32	$stats{$expected}{'fn'} += $matrix->{$expected}->{$predicted};
95	20				28	$stats{$predicted}{'fp'} += $matrix->{$expected}->{$predicted};
96						}
97	35				44	$totals{$predicted} += $matrix->{$expected}->{$predicted};
98						# Add the label to the array of columns if it does not contain it already
99	35	100			52	push @columns, $predicted unless _findIndex($predicted, \@columns);
100						}
101
102	15				42	$stats{$expected}{'acc'} = ($stats{$expected}{'tp'} * 100) / $stats{$expected}{'total'};
103						}
104
105	5				11	for my $expected (@expected) {
106	15				22	$totals{'total'} += $stats{$expected}{'total'};
107	15				17	$totals{'tp'} += $stats{$expected}{'tp'};
108	15				22	$totals{'fn'} += $stats{$expected}{'fn'};
109	15				29	$totals{'fp'} += $stats{$expected}{'fp'};
110	15				32	$stats{$expected}{'sensitivity'} = ($stats{$expected}{'tp'} * 100) / ($stats{$expected}{'tp'} + $stats{$expected}{'fp'});
111						}
112
113	5				14	$totals{'acc'} = ($totals{'tp'} * 100) / $totals{'total'};
114	5				11	$totals{'sensitivity'} = ($totals{'tp'} * 100) / ($totals{'tp'} + $totals{'fp'});
115
116						return (
117	5				37	columns => [sort @columns],
118						stats => \%stats,
119						totals => \%totals
120						);
121						}
122
123						sub _findIndex {
124	63		63		96	my ($string, $array) = @_;
125	63				123	for (0 .. @$array - 1) {
126	144	100			166	return $_ + 1 if ($string eq @{$array}[$_]);
	144				311
127						}
128						}
129
130						=head1 NAME
131
132						AI::ConfusionMatrix - make a confusion matrix
133
134						=head1 SYNOPSIS
135
136						my %matrix;
137
138						# Loop over your predictions
139						# [...]
140
141						$matrix{$expected}{$predicted} += 1;
142
143						# [...]
144
145						makeConfusionMatrix(\%matrix, 'output.csv');
146
147
148						=head1 DESCRIPTION
149
150						This module prints a L from a hash reference. This module tries to be generic enough to be used within a lot of machine learning projects.
151
152						=head3 Functions:
153
154						=head4 C
155
156						This function makes a confusion matrix from C<$hash_ref> and writes it to C<$file>. C<$file> can be a filename or a file handle opened with the C mode. If C<$delimiter> is present, it is used as a custom separator for the fields in the confusion matrix.
157
158						Examples:
159
160						makeConfusionMatrix(\%matrix, 'output.csv');
161						makeConfusionMatrix(\%matrix, 'output.csv', ';');
162						makeConfusionMatrix(\%matrix, *$fh);
163
164						The hash reference must look like this :
165
166						$VAR1 = {
167						'value_expected1' => {
168						'value_predicted1' => number_of_predictions
169						},
170						'value_expected2' => {
171						'value_predicted1' => number_of_predictions,
172						'value_predicted2' => number_of_predictions
173						},
174						'value_expected3' => {
175						'value_predicted3' => number_of_predictions
176						}
177						};
178
179						The output will be in CSV. Here is an example:
180
181						,1974,1978,2002,2003,2005,TOTAL,TP,FP,FN,SENS,ACC
182						1974,3,1,,,2,6,3,4,3,42.86%,50.00%
183						1978,1,5,,,,6,5,4,1,55.56%,83.33%
184						2002,2,2,8,,,12,8,1,4,88.89%,66.67%
185						2003,1,,,7,2,10,7,0,3,100.00%,70.00%
186						2005,,1,1,,6,8,6,4,2,60.00%,75.00%
187						TOTAL,7,9,9,7,10,42,29,13,13,69.05%,69.05%
188
189						Prettified:
190
191						\| \| 1974 \| 1978 \| 2002 \| 2003 \| 2005 \| TOTAL \| TP \| FP \| FN \| SENS \| ACC \|
192						\|-------\|------\|------\|------\|------\|------\|-------\|----\|----\|----\|---------\|--------\|
193						\| 1974 \| 3 \| 1 \| \| \| 2 \| 6 \| 3 \| 4 \| 3 \| 42.86% \| 50.00% \|
194						\| 1978 \| 1 \| 5 \| \| \| \| 6 \| 5 \| 4 \| 1 \| 55.56% \| 83.33% \|
195						\| 2002 \| 2 \| 2 \| 8 \| \| \| 12 \| 8 \| 1 \| 4 \| 88.89% \| 66.67% \|
196						\| 2003 \| 1 \| \| \| 7 \| 2 \| 10 \| 7 \| 0 \| 3 \| 100.00% \| 70.00% \|
197						\| 2005 \| \| 1 \| 1 \| \| 6 \| 8 \| 6 \| 4 \| 2 \| 60.00% \| 75.00% \|
198						\| TOTAL \| 7 \| 9 \| 9 \| 7 \| 10 \| 42 \| 29 \| 13 \| 13 \| 69.05% \| 69.05% \|
199
200						=over
201
202						=item TP:
203
204						True Positive
205
206						=item FP:
207
208						False Positive
209
210						=item FN:
211
212						False Negative
213
214						=item SENS
215
216						Sensitivity. Number of true positives divided by the number of positives.
217
218						=item ACC:
219
220						Accuracy
221
222						=back
223
224						=head4 C
225
226						Get the data used to compute the table above.
227
228						Example:
229
230						my %cm = getConfusionMatrix(\%matrix);
231
232						=head1 AUTHOR
233
234						Vincent Lequertier
235
236						=head1 LICENSE
237
238						This library is free software; you can redistribute it and/or modify
239						it under the same terms as Perl itself.
240
241						=cut
242
243						1;
244
245						# vim: set ts=4 sw=4 tw=0 fdm=marker :
246