| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
1
|
|
|
1
|
|
5526
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
76
|
|
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Statistics::Contingency; |
|
4
|
|
|
|
|
|
|
{ |
|
5
|
|
|
|
|
|
|
$Statistics::Contingency::VERSION = '0.09'; |
|
6
|
|
|
|
|
|
|
} |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# Correct=Y Correct=N |
|
9
|
|
|
|
|
|
|
# +-----------+-----------+ |
|
10
|
|
|
|
|
|
|
# Assigned=Y | a | b | |
|
11
|
|
|
|
|
|
|
# +-----------+-----------+ |
|
12
|
|
|
|
|
|
|
# Assigned=N | c | d | |
|
13
|
|
|
|
|
|
|
# +-----------+-----------+ |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# accuracy = (a+d)/(a+b+c+d) |
|
16
|
|
|
|
|
|
|
# precision = a/(a+b) |
|
17
|
|
|
|
|
|
|
# recall = a/(a+c) |
|
18
|
|
|
|
|
|
|
# F1 = 2a/(2a + b + c) |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# Edge cases: |
|
21
|
|
|
|
|
|
|
# precision(0,0,+,d) = 0 |
|
22
|
|
|
|
|
|
|
# precision(a,0,c,d) = 1 |
|
23
|
|
|
|
|
|
|
# precision(0,+,c,d) = 0 |
|
24
|
|
|
|
|
|
|
# recall(a,b,0,d) = 1 |
|
25
|
|
|
|
|
|
|
# recall(0,b,+,d) = 0 |
|
26
|
|
|
|
|
|
|
# F1(a,0,0,d) = 1 |
|
27
|
|
|
|
|
|
|
# F1(0,+++,d) = 0 |
|
28
|
|
|
|
|
|
|
|
|
29
|
1
|
|
|
1
|
|
1024
|
use Params::Validate qw(:all); |
|
|
1
|
|
|
|
|
10178
|
|
|
|
1
|
|
|
|
|
1698
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub new { |
|
32
|
8
|
|
|
8
|
1
|
347
|
my $package = shift; |
|
33
|
8
|
|
|
|
|
188
|
my $self = bless { validate @_, |
|
34
|
|
|
|
|
|
|
{ |
|
35
|
|
|
|
|
|
|
verbose => { type => SCALAR, default => 0 }, |
|
36
|
|
|
|
|
|
|
categories => { type => ARRAYREF|HASHREF }, |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
}, $package; |
|
39
|
|
|
|
|
|
|
|
|
40
|
8
|
|
|
|
|
80
|
$self->{$_} = 0 foreach qw(a b c d); |
|
41
|
8
|
|
|
|
|
17
|
my $c = delete $self->{categories}; |
|
42
|
8
|
50
|
|
|
|
32
|
$self->{categories} = { map {($_ => {a=>0, b=>0, c=>0, d=>0})} |
|
|
32
|
|
|
|
|
161
|
|
|
43
|
|
|
|
|
|
|
UNIVERSAL::isa($c, 'HASH') ? keys(%$c) : @$c |
|
44
|
|
|
|
|
|
|
}; |
|
45
|
8
|
|
|
|
|
33
|
return $self; |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub set_entries { |
|
49
|
1
|
|
|
1
|
1
|
6
|
my $self = shift; |
|
50
|
1
|
|
|
|
|
2
|
@{ $self }{'a', 'b', 'c', 'd'} = @_; |
|
|
1
|
|
|
|
|
5
|
|
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub add_result { |
|
54
|
8
|
|
|
8
|
1
|
74
|
my ($self, $assigned, $correct, $name) = @_; |
|
55
|
8
|
|
|
|
|
14
|
my $cats_table = $self->{categories}; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Hashify |
|
58
|
8
|
|
|
|
|
15
|
foreach ($assigned, $correct) { |
|
59
|
16
|
50
|
|
|
|
34
|
$_ = {$_ => 1}, next unless ref $_; |
|
60
|
16
|
50
|
|
|
|
42
|
next if UNIVERSAL::isa($_, 'HASH'); # Leave alone |
|
61
|
16
|
50
|
|
|
|
52
|
$_ = { map {($_ => 1)} @$_ }, next if UNIVERSAL::isa($_, 'ARRAY'); |
|
|
19
|
|
|
|
|
61
|
|
|
62
|
0
|
|
|
|
|
0
|
die "Unknown type '$_' for category list"; |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Add to the micro/macro tables |
|
66
|
8
|
|
|
|
|
52
|
foreach my $cat (keys %$cats_table) { |
|
67
|
32
|
100
|
100
|
|
|
97
|
$cats_table->{$cat}{a}++, $self->{a}++ if $assigned->{$cat} and $correct->{$cat}; |
|
68
|
32
|
100
|
100
|
|
|
92
|
$cats_table->{$cat}{b}++, $self->{b}++ if $assigned->{$cat} and !$correct->{$cat}; |
|
69
|
32
|
100
|
100
|
|
|
115
|
$cats_table->{$cat}{c}++, $self->{c}++ if !$assigned->{$cat} and $correct->{$cat}; |
|
70
|
32
|
100
|
66
|
|
|
119
|
$cats_table->{$cat}{d}++, $self->{d}++ if !$assigned->{$cat} and !$correct->{$cat}; |
|
71
|
|
|
|
|
|
|
} |
|
72
|
|
|
|
|
|
|
|
|
73
|
8
|
50
|
|
|
|
24
|
if ($self->{verbose}) { |
|
74
|
0
|
|
|
|
|
0
|
print "$name: assigned=(@{[ keys %$assigned ]}) correct=(@{[ keys %$correct ]})\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Clear any cached results |
|
78
|
8
|
|
|
|
|
14
|
delete $self->{macro}; |
|
79
|
|
|
|
|
|
|
|
|
80
|
8
|
|
|
|
|
31
|
$self->{hypotheses}++; |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub _invert { |
|
84
|
69
|
|
|
69
|
|
86
|
my ($self, $x, $y) = @_; |
|
85
|
69
|
100
|
|
|
|
203
|
return 1 unless $y; |
|
86
|
33
|
100
|
|
|
|
607
|
return 0 unless $x; |
|
87
|
11
|
|
|
|
|
53
|
return 1 / (1 + $y/$x); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub _accuracy { |
|
91
|
16
|
|
|
16
|
|
18
|
my $h = $_[1]; |
|
92
|
16
|
50
|
|
|
|
56
|
return 1 unless grep $h->{$_}, qw(a b c d); |
|
93
|
16
|
|
|
|
|
67
|
return +($h->{a} + $h->{d}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d}); |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub _error { |
|
97
|
18
|
|
|
18
|
|
20
|
my $h = $_[1]; |
|
98
|
18
|
50
|
|
|
|
61
|
return 0 unless grep $h->{$_}, qw(a b c d); |
|
99
|
18
|
|
|
|
|
106
|
return +($h->{b} + $h->{c}) / ($h->{a} + $h->{b} + $h->{c} + $h->{d}); |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub _precision { |
|
103
|
24
|
|
|
24
|
|
28
|
my ($self, $h) = @_; |
|
104
|
24
|
100
|
100
|
|
|
92
|
return 0 if $h->{c} and !$h->{a} and !$h->{b}; |
|
|
|
|
100
|
|
|
|
|
|
105
|
21
|
|
|
|
|
54
|
return $self->_invert($h->{a}, $h->{b}); |
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub _recall { |
|
109
|
25
|
|
|
25
|
|
30
|
my ($self, $h) = @_; |
|
110
|
25
|
|
|
|
|
55
|
return $self->_invert($h->{a}, $h->{c}); |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub _F1 { |
|
114
|
23
|
|
|
23
|
|
28
|
my ($self, $h) = @_; |
|
115
|
23
|
|
|
|
|
62
|
return $self->_invert(2 * $h->{a}, $h->{b} + $h->{c}); |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# Fills in precision, recall, etc. for each category, and computes their averages |
|
119
|
|
|
|
|
|
|
sub _macro_stats { |
|
120
|
10
|
|
|
10
|
|
12
|
my $self = shift; |
|
121
|
10
|
100
|
|
|
|
34
|
return $self->{macro} if $self->{macro}; |
|
122
|
|
|
|
|
|
|
|
|
123
|
4
|
|
|
|
|
10
|
my @metrics = qw(precision recall F1 accuracy error); |
|
124
|
|
|
|
|
|
|
|
|
125
|
4
|
|
|
|
|
6
|
my $cats = $self->{categories}; |
|
126
|
4
|
50
|
|
|
|
9
|
die "No category information has been recorded" |
|
127
|
|
|
|
|
|
|
unless keys %$cats; |
|
128
|
|
|
|
|
|
|
|
|
129
|
4
|
|
|
|
|
5
|
my %results; |
|
130
|
4
|
|
|
|
|
12
|
while (my ($cat, $scores) = each %$cats) { |
|
131
|
16
|
|
|
|
|
19
|
foreach my $metric (@metrics) { |
|
132
|
80
|
|
|
|
|
112
|
my $method = "_$metric"; |
|
133
|
80
|
|
|
|
|
162
|
$results{$metric} += ($scores->{$metric} = $self->$method($scores)); |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
} |
|
136
|
4
|
|
|
|
|
6
|
foreach (@metrics) { |
|
137
|
20
|
|
|
|
|
33
|
$results{$_} /= keys %$cats; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
4
|
|
|
|
|
32
|
$self->{macro} = \%results; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
0
|
|
|
0
|
1
|
0
|
sub micro_accuracy { $_[0]->_accuracy( $_[0]) } |
|
143
|
2
|
|
|
2
|
1
|
5
|
sub micro_error { $_[0]->_error( $_[0]) } |
|
144
|
8
|
|
|
8
|
1
|
23
|
sub micro_precision { $_[0]->_precision($_[0]) } |
|
145
|
9
|
|
|
9
|
1
|
39
|
sub micro_recall { $_[0]->_recall( $_[0]) } |
|
146
|
7
|
|
|
7
|
1
|
19
|
sub micro_F1 { $_[0]->_F1( $_[0]) } |
|
147
|
|
|
|
|
|
|
|
|
148
|
0
|
|
|
0
|
1
|
0
|
sub macro_accuracy { shift()->_macro_stats->{accuracy} } |
|
149
|
0
|
|
|
0
|
1
|
0
|
sub macro_error { shift()->_macro_stats->{error} } |
|
150
|
3
|
|
|
3
|
1
|
6
|
sub macro_precision { shift()->_macro_stats->{precision} } |
|
151
|
4
|
|
|
4
|
1
|
9
|
sub macro_recall { shift()->_macro_stats->{recall} } |
|
152
|
3
|
|
|
3
|
1
|
7
|
sub macro_F1 { shift()->_macro_stats->{F1} } |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub category_stats { |
|
155
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
|
156
|
0
|
|
|
|
|
0
|
$self->_macro_stats; |
|
157
|
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
0
|
return $self->{categories}; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub stats_table { |
|
162
|
2
|
|
|
2
|
1
|
9
|
my $self = shift; |
|
163
|
2
|
|
50
|
|
|
9
|
my $figs = shift || 3; |
|
164
|
|
|
|
|
|
|
|
|
165
|
2
|
|
|
|
|
7
|
my @data = map $self->_sig_figs($_, $figs), |
|
166
|
|
|
|
|
|
|
( |
|
167
|
|
|
|
|
|
|
$self->macro_recall, |
|
168
|
|
|
|
|
|
|
$self->macro_precision, |
|
169
|
|
|
|
|
|
|
$self->macro_F1, |
|
170
|
|
|
|
|
|
|
$self->micro_recall, |
|
171
|
|
|
|
|
|
|
$self->micro_precision, |
|
172
|
|
|
|
|
|
|
$self->micro_F1, |
|
173
|
|
|
|
|
|
|
$self->micro_error, |
|
174
|
|
|
|
|
|
|
); |
|
175
|
|
|
|
|
|
|
|
|
176
|
2
|
|
|
|
|
5
|
my $m = 0; # Max length of @data items |
|
177
|
2
|
|
|
|
|
4
|
for (@data) { |
|
178
|
14
|
100
|
|
|
|
28
|
$m = length() if length() > $m; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
2
|
|
|
|
|
6
|
my $s = ' ' x ($m - 4); |
|
181
|
|
|
|
|
|
|
|
|
182
|
2
|
|
|
|
|
6
|
my $out = "+" . ("-" x (10 + 7*$m)) . "+\n"; |
|
183
|
2
|
|
|
|
|
7
|
$out .= "| $s maR $s maP$s maF1 $s miR $s miP$s miF1 $s Err |\n"; |
|
184
|
2
|
|
|
|
|
6
|
$out .= "| %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s %${m}s |\n"; |
|
185
|
2
|
|
|
|
|
5
|
$out .= "+" . ("-" x (10 + 7*$m)) . "+\n"; |
|
186
|
|
|
|
|
|
|
|
|
187
|
2
|
|
|
|
|
27
|
return sprintf($out, @data); |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub _sig_figs { |
|
191
|
14
|
|
|
14
|
|
18
|
my ($self, $number, $figs) = @_; |
|
192
|
14
|
100
|
|
|
|
41
|
my $after_point = $figs - int ($number != 0 ? log($number)/log(10) : 0); |
|
193
|
14
|
|
|
|
|
74
|
return sprintf "%.${after_point}f", $number; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1; |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
__END__ |