File Coverage

blib/lib/Statistics/Contingency.pm

Criterion	Covered	Total	%
statement	83	93	89.2
branch	28	36	77.7
condition	18	20	90.0
subroutine	21	25	84.0
pod	15	15	100.0
total	165	189	87.3

line	stmt	bran	cond	sub	pod	time	code
1	1			1		5526	use strict;
	1					2
	1					76
2
3							package Statistics::Contingency;
4							{
5							$Statistics::Contingency::VERSION = '0.09';
6							}
7
8							# Correct=Y Correct=N
9							# +-----------+-----------+
10							# Assigned=Y \| a \| b \|
11							# +-----------+-----------+
12							# Assigned=N \| c \| d \|
13							# +-----------+-----------+
14
15							# accuracy = (a+d)/(a+b+c+d)
16							# precision = a/(a+b)
17							# recall = a/(a+c)
18							# F1 = 2a/(2a + b + c)
19
20							# Edge cases:
21							# precision(0,0,+,d) = 0
22							# precision(a,0,c,d) = 1
23							# precision(0,+,c,d) = 0
24							# recall(a,b,0,d) = 1
25							# recall(0,b,+,d) = 0
26							# F1(a,0,0,d) = 1
27							# F1(0,+++,d) = 0
28
29	1			1		1024	use Params::Validate qw(:all);
	1					10178
	1					1698
30
31							sub new {
32	8			8	1	347	my $package = shift;
33	8					188	my $self = bless { validate @_,
34							{
35							verbose => { type => SCALAR, default => 0 },
36							categories => { type => ARRAYREF\|HASHREF },
37							}
38							}, $package;
39
40	8					80	$self->{$_} = 0 foreach qw(a b c d);
41	8					17	my $c = delete $self->{categories};
42	8	50				32	$self->{categories} = { map {($_ => {a=>0, b=>0, c=>0, d=>0})}
	32					161
43							UNIVERSAL::isa($c, 'HASH') ? keys(%$c) : @$c
44							};
45	8					33	return $self;
46							}
47
48							sub set_entries {
49	1			1	1	6	my $self = shift;
50	1					2	@{ $self }{'a', 'b', 'c', 'd'} = @_;
	1					5
51							}
52
53							sub add_result {
54	8			8	1	74	my ($self, $assigned, $correct, $name) = @_;
55	8					14	my $cats_table = $self->{categories};
56
57							# Hashify
58	8					15	foreach ($assigned, $correct) {
59	16	50				34	$_ = {$_ => 1}, next unless ref $_;
60	16	50				42	next if UNIVERSAL::isa($_, 'HASH'); # Leave alone
61	16	50				52	$_ = { map {($_ => 1)} @$_ }, next if UNIVERSAL::isa($_, 'ARRAY');
	19					61
62	0					0	die "Unknown type '$_' for category list";
63							}
64
65							# Add to the micro/macro tables
66	8					52	foreach my $cat (keys %$cats_table) {
67	32	100	100			97	$cats_table->{$cat}{a}++, $self->{a}++ if $assigned->{$cat} and $correct->{$cat};
68	32	100	100			92	$cats_table->{$cat}{b}++, $self->{b}++ if $assigned->{$cat} and !$correct->{$cat};
69	32	100	100			115	$cats_table->{$cat}{c}++, $self->{c}++ if !$assigned->{$cat} and $correct->{$cat};
70	32	100	66			119	$cats_table->{$cat}{d}++, $self->{d}++ if !$assigned->{$cat} and !$correct->{$cat};
71							}
72
73	8	50				24	if ($self->{verbose}) {
74	0					0	print "$name: assigned=(@{[ keys %$assigned ]}) correct=(@{[ keys %$correct ]})\n";
	0					0
	0					0
75							}
76
77							# Clear any cached results
78	8					14	delete $self->{macro};
79
80	8					31	$self->{hypotheses}++;
81							}
82
83							sub _invert {
84	69			69		86	my ($self, $x, $y) = @_;
85	69	100				203	return 1 unless $y;
86	33	100				607	return 0 unless $x;
87	11					53	return 1 / (1 + $y/$x);
88							}
89
90							sub _accuracy {
91	16			16		18	my $h = $_[1];
92	16	50				56	return 1 unless grep $h->{$_}, qw(a b c d);
93	16					67	return +($h->{a} + $h->{d}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d});
94							}
95
96							sub _error {
97	18			18		20	my $h = $_[1];
98	18	50				61	return 0 unless grep $h->{$_}, qw(a b c d);
99	18					106	return +($h->{b} + $h->{c}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d});
100							}
101
102							sub _precision {
103	24			24		28	my ($self, $h) = @_;
104	24	100	100			92	return 0 if $h->{c} and !$h->{a} and !$h->{b};
			100
105	21					54	return $self->_invert($h->{a}, $h->{b});
106							}
107
108							sub _recall {
109	25			25		30	my ($self, $h) = @_;
110	25					55	return $self->_invert($h->{a}, $h->{c});
111							}
112
113							sub _F1 {
114	23			23		28	my ($self, $h) = @_;
115	23					62	return $self->_invert(2 * $h->{a}, $h->{b} + $h->{c});
116							}
117
118							# Fills in precision, recall, etc. for each category, and computes their averages
119							sub _macro_stats {
120	10			10		12	my $self = shift;
121	10	100				34	return $self->{macro} if $self->{macro};
122
123	4					10	my @metrics = qw(precision recall F1 accuracy error);
124
125	4					6	my $cats = $self->{categories};
126	4	50				9	die "No category information has been recorded"
127							unless keys %$cats;
128
129	4					5	my %results;
130	4					12	while (my ($cat, $scores) = each %$cats) {
131	16					19	foreach my $metric (@metrics) {
132	80					112	my $method = "_$metric";
133	80					162	$results{$metric} += ($scores->{$metric} = $self->$method($scores));
134							}
135							}
136	4					6	foreach (@metrics) {
137	20					33	$results{$_} /= keys %$cats;
138							}
139	4					32	$self->{macro} = \%results;
140							}
141
142	0			0	1	0	sub micro_accuracy { $_[0]->_accuracy( $_[0]) }
143	2			2	1	5	sub micro_error { $_[0]->_error( $_[0]) }
144	8			8	1	23	sub micro_precision { $_[0]->_precision($_[0]) }
145	9			9	1	39	sub micro_recall { $_[0]->_recall( $_[0]) }
146	7			7	1	19	sub micro_F1 { $_[0]->_F1( $_[0]) }
147
148	0			0	1	0	sub macro_accuracy { shift()->_macro_stats->{accuracy} }
149	0			0	1	0	sub macro_error { shift()->_macro_stats->{error} }
150	3			3	1	6	sub macro_precision { shift()->_macro_stats->{precision} }
151	4			4	1	9	sub macro_recall { shift()->_macro_stats->{recall} }
152	3			3	1	7	sub macro_F1 { shift()->_macro_stats->{F1} }
153
154							sub category_stats {
155	0			0	1	0	my $self = shift;
156	0					0	$self->_macro_stats;
157
158	0					0	return $self->{categories};
159							}
160
161							sub stats_table {
162	2			2	1	9	my $self = shift;
163	2		50			9	my $figs = shift \|\| 3;
164
165	2					7	my @data = map $self->_sig_figs($_, $figs),
166							(
167							$self->macro_recall,
168							$self->macro_precision,
169							$self->macro_F1,
170							$self->micro_recall,
171							$self->micro_precision,
172							$self->micro_F1,
173							$self->micro_error,
174							);
175
176	2					5	my $m = 0; # Max length of @data items
177	2					4	for (@data) {
178	14	100				28	$m = length() if length() > $m;
179							}
180	2					6	my $s = ' ' x ($m - 4);
181
182	2					6	my $out = "+" . ("-" x (10 + 7*$m)) . "+\n";
183	2					7	$out .= "\| $s maR $s maP$s maF1 $s miR $s miP$s miF1 $s Err \|\n";
184	2					6	$out .= "\| %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s \|\n";
185	2					5	$out .= "+" . ("-" x (10 + 7*$m)) . "+\n";
186
187	2					27	return sprintf($out, @data);
188							}
189
190							sub _sig_figs {
191	14			14		18	my ($self, $number, $figs) = @_;
192	14	100				41	my $after_point = $figs - int ($number != 0 ? log($number)/log(10) : 0);
193	14					74	return sprintf "%.${after_point}f", $number;
194							}
195
196							1;
197
198							__END__