line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package AI::Calibrate; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
87024
|
use 5.008008; |
|
3
|
|
|
|
|
13
|
|
|
3
|
|
|
|
|
130
|
|
4
|
3
|
|
|
3
|
|
18
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
268
|
|
5
|
3
|
|
|
3
|
|
18
|
use warnings; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
151
|
|
6
|
|
|
|
|
|
|
|
7
|
3
|
|
|
3
|
|
17
|
use vars qw($VERSION); |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
751
|
|
8
|
|
|
|
|
|
|
$VERSION = "1.5"; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
require Exporter; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# This allows declaration: |
15
|
|
|
|
|
|
|
# use AI::Calibrate ':all'; |
16
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
17
|
|
|
|
|
|
|
# will save memory. |
18
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
19
|
|
|
|
|
|
|
'all' => [ |
20
|
|
|
|
|
|
|
qw( |
21
|
|
|
|
|
|
|
calibrate |
22
|
|
|
|
|
|
|
score_prob |
23
|
|
|
|
|
|
|
print_mapping |
24
|
|
|
|
|
|
|
) |
25
|
|
|
|
|
|
|
] |
26
|
|
|
|
|
|
|
); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
29
|
|
|
|
|
|
|
our @EXPORT = qw( ); |
30
|
|
|
|
|
|
|
|
31
|
3
|
|
|
3
|
|
19
|
use constant DEBUG => 0; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
261
|
|
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Structure slot names |
34
|
3
|
|
|
3
|
|
16
|
use constant SCORE => 0; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
130
|
|
35
|
3
|
|
|
3
|
|
15
|
use constant PROB => 1; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
4175
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 NAME |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
AI::Calibrate - Perl module for producing probabilities from classifier scores |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 SYNOPSIS |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
use AI::Calibrate ':all'; |
44
|
|
|
|
|
|
|
... train a classifier ... |
45
|
|
|
|
|
|
|
... test classifier on $points ... |
46
|
|
|
|
|
|
|
$calibrated = calibrate($points); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Classifiers usually return some sort of an instance score with their |
51
|
|
|
|
|
|
|
classifications. These scores can be used as probabilities in various |
52
|
|
|
|
|
|
|
calculations, but first they need to be I. Naive Bayes, for |
53
|
|
|
|
|
|
|
example, is a very useful classifier, but the scores it produces are usually |
54
|
|
|
|
|
|
|
"bunched" around 0 and 1, making these scores poor probability estimates. |
55
|
|
|
|
|
|
|
Support vector machines have a similar problem. Both classifier types should |
56
|
|
|
|
|
|
|
be calibrated before their scores are used as probability estimates. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
This module calibrates classifier scores using a method called the Pool |
59
|
|
|
|
|
|
|
Adjacent Violators (PAV) algorithm. After you train a classifier, you take a |
60
|
|
|
|
|
|
|
(usually separate) set of test instances and run them through the classifier, |
61
|
|
|
|
|
|
|
collecting the scores assigned to each. You then supply this set of instances |
62
|
|
|
|
|
|
|
to the calibrate function defined here, and it will return a set of ranges |
63
|
|
|
|
|
|
|
mapping from a score range to a probability estimate. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
For example, assume you have the following set of instance results from your |
66
|
|
|
|
|
|
|
classifier. Each result is of the form C<[ASSIGNED_SCORE, TRUE_CLASS]>: |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
my $points = [ |
69
|
|
|
|
|
|
|
[.9, 1], |
70
|
|
|
|
|
|
|
[.8, 1], |
71
|
|
|
|
|
|
|
[.7, 0], |
72
|
|
|
|
|
|
|
[.6, 1], |
73
|
|
|
|
|
|
|
[.55, 1], |
74
|
|
|
|
|
|
|
[.5, 1], |
75
|
|
|
|
|
|
|
[.45, 0], |
76
|
|
|
|
|
|
|
[.4, 1], |
77
|
|
|
|
|
|
|
[.35, 1], |
78
|
|
|
|
|
|
|
[.3, 0 ], |
79
|
|
|
|
|
|
|
[.27, 1], |
80
|
|
|
|
|
|
|
[.2, 0 ], |
81
|
|
|
|
|
|
|
[.18, 0], |
82
|
|
|
|
|
|
|
[.1, 1 ], |
83
|
|
|
|
|
|
|
[.02, 0] |
84
|
|
|
|
|
|
|
]; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
If you then call calibrate($points), it will return this structure: |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
[ |
89
|
|
|
|
|
|
|
[.9, 1 ], |
90
|
|
|
|
|
|
|
[.7, 3/4 ], |
91
|
|
|
|
|
|
|
[.45, 2/3 ], |
92
|
|
|
|
|
|
|
[.3, 1/2 ], |
93
|
|
|
|
|
|
|
[.2, 1/3 ], |
94
|
|
|
|
|
|
|
[.02, 0 ] |
95
|
|
|
|
|
|
|
] |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
This means that, given a SCORE produced by the classifier, you can map the |
98
|
|
|
|
|
|
|
SCORE onto a probability like this: |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
SCORE >= .9 prob = 1 |
101
|
|
|
|
|
|
|
.9 > SCORE >= .7 prob = 3/4 |
102
|
|
|
|
|
|
|
.7 > SCORE >= .45 prob = 2/3 |
103
|
|
|
|
|
|
|
.45 > SCORE >= .3 prob = 3/4 |
104
|
|
|
|
|
|
|
.2 > SCORE >= .7 prob = 3/4 |
105
|
|
|
|
|
|
|
.02 > SCORE prob = 0 |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
For a realistic example of classifier calibration, see the test file |
108
|
|
|
|
|
|
|
t/AI-Calibrate-NB.t, which uses the AI::NaiveBayes1 module to train a Naive |
109
|
|
|
|
|
|
|
Bayes classifier then calibrates it using this module. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=cut |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 FUNCTIONS |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=over 4 |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=item B |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
This is the main calibration function. The calling form is: |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
my $calibrated = calibrate( $data, $sorted); |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
$data looks like: C<[ [score, class], [score, class], [score, class]...]> |
124
|
|
|
|
|
|
|
Each score is a number. Each class is either 0 (negative class) or 1 |
125
|
|
|
|
|
|
|
(positive class). |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
$sorted is boolean (0 by default) indicating whether the data are already |
128
|
|
|
|
|
|
|
sorted by score. Unless this is set to 1, calibrate() will sort the data |
129
|
|
|
|
|
|
|
itself. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Calibrate returns a reference to an ordered list of references: |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
[ [score, prob], [score, prob], [score, prob] ... ] |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Scores will be in descending numerical order. See the DESCRIPTION section for |
136
|
|
|
|
|
|
|
how this structure is interpreted. You can pass this structure to the |
137
|
|
|
|
|
|
|
B function, along with a new score, to get a probability. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub calibrate { |
142
|
27
|
|
|
27
|
1
|
11188
|
my($data, $sorted) = @_; |
143
|
|
|
|
|
|
|
|
144
|
27
|
|
|
|
|
35
|
if (DEBUG) { |
145
|
|
|
|
|
|
|
print "Original data:\n"; |
146
|
|
|
|
|
|
|
for my $pair (@$data) { |
147
|
|
|
|
|
|
|
my($score, $prob) = @$pair; |
148
|
|
|
|
|
|
|
print "($score, $prob)\n"; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# Copy the data over so PAV can clobber the PROB field |
153
|
27
|
|
|
|
|
331
|
my $new_data = [ map([@$_], @$data) ]; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# If not already sorted, sort data decreasing by score |
156
|
27
|
100
|
|
|
|
95
|
if (!$sorted) { |
157
|
25
|
|
|
|
|
4045
|
$new_data = [ sort { $b->[SCORE] <=> $a->[SCORE] } @$new_data ]; |
|
843
|
|
|
|
|
1257
|
|
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
27
|
|
|
|
|
80
|
PAV($new_data); |
161
|
|
|
|
|
|
|
|
162
|
27
|
|
|
|
|
30
|
if (DEBUG) { |
163
|
|
|
|
|
|
|
print("After PAV, vector is:\n"); |
164
|
|
|
|
|
|
|
print_vector($new_data); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
27
|
|
|
|
|
33
|
my(@result); |
168
|
27
|
|
|
|
|
29
|
my( $last_prob, $last_score); |
169
|
|
|
|
|
|
|
|
170
|
27
|
|
|
|
|
309
|
push(@$new_data, [-1e10, 0]); |
171
|
|
|
|
|
|
|
|
172
|
27
|
|
|
|
|
55
|
for my $pair (@$new_data) { |
173
|
382
|
|
|
|
|
321
|
print "Seeing @$pair\n" if DEBUG; |
174
|
382
|
|
|
|
|
465
|
my($score, $prob) = @$pair; |
175
|
382
|
100
|
100
|
|
|
1429
|
if (defined($last_prob) and $prob < $last_prob) { |
176
|
114
|
|
|
|
|
192
|
print("Pushing [$last_score, $last_prob]\n") if DEBUG; |
177
|
114
|
|
|
|
|
271
|
push(@result, [$last_score, $last_prob] ); |
178
|
|
|
|
|
|
|
} |
179
|
382
|
|
|
|
|
396
|
$last_prob = $prob; |
180
|
382
|
|
|
|
|
534
|
$last_score = $score; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
27
|
|
|
|
|
197
|
return \@result; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub PAV { |
188
|
27
|
|
|
27
|
0
|
38
|
my ( $result ) = @_; |
189
|
|
|
|
|
|
|
|
190
|
27
|
|
|
|
|
112
|
for ( my $i = 0; $i < @$result - 1; $i++ ) { |
191
|
329
|
100
|
|
|
|
1064
|
if ( $result->[$i][PROB] < $result->[ $i + 1 ][PROB] ) { |
192
|
167
|
|
|
|
|
456
|
$result->[$i][PROB] = |
193
|
|
|
|
|
|
|
( $result->[$i][PROB] + $result->[ $i + 1 ][PROB] ) / 2; |
194
|
167
|
|
|
|
|
400
|
$result->[ $i + 1 ][PROB] = $result->[$i][PROB]; |
195
|
167
|
|
|
|
|
136
|
print "Averaging elements $i and ", $i + 1, "\n" if DEBUG; |
196
|
|
|
|
|
|
|
|
197
|
167
|
|
|
|
|
355
|
for ( my $j = $i - 1; $j >= 0; $j-- ) { |
198
|
344
|
100
|
|
|
|
740
|
if ( $result->[$j][PROB] < $result->[ $i + 1 ][PROB] ) { |
199
|
179
|
|
|
|
|
264
|
my $d = ( $i + 1 ) - $j + 1; |
200
|
179
|
|
|
|
|
291
|
flatten( $result, $j, $d ); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
else { |
203
|
165
|
|
|
|
|
428
|
last; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub print_vector { |
211
|
0
|
|
|
0
|
0
|
0
|
my($vec) = @_; |
212
|
0
|
|
|
|
|
0
|
for my $pair (@$vec) { |
213
|
0
|
|
|
|
|
0
|
print join(", ", @$pair), "\n"; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub flatten { |
219
|
179
|
|
|
179
|
0
|
218
|
my ( $vec, $start, $len ) = @_; |
220
|
179
|
|
|
|
|
188
|
if (DEBUG) { |
221
|
|
|
|
|
|
|
print "Flatten called on vec, $start, $len\n"; |
222
|
|
|
|
|
|
|
print "Vector before: \n"; |
223
|
|
|
|
|
|
|
print_vector($vec); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
179
|
|
|
|
|
198
|
my $sum = 0; |
227
|
179
|
|
|
|
|
344
|
for my $i ( $start .. $start + $len-1 ) { |
228
|
781
|
|
|
|
|
1138
|
$sum += $vec->[$i][PROB]; |
229
|
|
|
|
|
|
|
} |
230
|
179
|
|
|
|
|
278
|
my $avg = $sum / $len; |
231
|
179
|
|
|
|
|
166
|
print "Sum = $sum, avg = $avg\n" if DEBUG; |
232
|
179
|
|
|
|
|
280
|
for my $i ( $start .. $start + $len -1) { |
233
|
781
|
|
|
|
|
1157
|
$vec->[$i][PROB] = $avg; |
234
|
|
|
|
|
|
|
} |
235
|
179
|
|
|
|
|
539
|
if (DEBUG) { |
236
|
|
|
|
|
|
|
print "Vector after: \n"; |
237
|
|
|
|
|
|
|
print_vector($vec); |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=item B |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
This is a simple utility function that takes the structure returned by |
244
|
|
|
|
|
|
|
B, along with a new score, and returns the probability estimate. |
245
|
|
|
|
|
|
|
Example calling form: |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
$p = score_prob($calibrated, $score); |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Once you have a trained, calibrated classifier, you could imagine using it |
250
|
|
|
|
|
|
|
like this: |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
$calibrated = calibrate( $calibration_set ); |
253
|
|
|
|
|
|
|
print "Input instances, one per line:\n"; |
254
|
|
|
|
|
|
|
while (<>) { |
255
|
|
|
|
|
|
|
chomp; |
256
|
|
|
|
|
|
|
my(@fields) = split; |
257
|
|
|
|
|
|
|
my $score = classifier(@fields); |
258
|
|
|
|
|
|
|
my $prob = score_prob($score); |
259
|
|
|
|
|
|
|
print "Estimated probability: $prob\n"; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=cut |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub score_prob { |
265
|
11
|
|
|
11
|
1
|
4319
|
my($calibrated, $score) = @_; |
266
|
|
|
|
|
|
|
|
267
|
11
|
|
|
|
|
16
|
my $last_prob = 1.0; |
268
|
|
|
|
|
|
|
|
269
|
11
|
|
|
|
|
20
|
for my $tuple (@$calibrated) { |
270
|
32
|
|
|
|
|
41
|
my($bound, $prob) = @$tuple; |
271
|
32
|
100
|
|
|
|
161
|
return $prob if $score >= $bound; |
272
|
23
|
|
|
|
|
38
|
$last_prob = $prob; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
# If we drop off the end, probability estimate is zero |
275
|
2
|
|
|
|
|
5
|
return 0; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=item B |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
This is a simple utility function that takes the structure returned by |
282
|
|
|
|
|
|
|
B and prints out a simple list of lines describing the mapping |
283
|
|
|
|
|
|
|
created. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Example calling form: |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
print_mapping($calibrated); |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
Sample output: |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
1.00 > SCORE >= 1.00 prob = 1.000 |
292
|
|
|
|
|
|
|
1.00 > SCORE >= 0.71 prob = 0.667 |
293
|
|
|
|
|
|
|
0.71 > SCORE >= 0.39 prob = 0.000 |
294
|
|
|
|
|
|
|
0.39 > SCORE >= 0.00 prob = 0.000 |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
These ranges are not necessarily compressed/optimized, as this sample output |
297
|
|
|
|
|
|
|
shows. |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=back |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=cut |
302
|
|
|
|
|
|
|
sub print_mapping { |
303
|
2
|
|
|
2
|
1
|
4713
|
my($calibrated) = @_; |
304
|
2
|
|
|
|
|
6
|
my $last_bound = 1.0; |
305
|
2
|
|
|
|
|
5
|
for my $tuple (@$calibrated) { |
306
|
10
|
|
|
|
|
22
|
my($bound, $prob) = @$tuple; |
307
|
10
|
|
|
|
|
147
|
printf("%0.3f > SCORE >= %0.3f prob = %0.3f\n", |
308
|
|
|
|
|
|
|
$last_bound, $bound, $prob); |
309
|
10
|
|
|
|
|
31
|
$last_bound = $bound; |
310
|
|
|
|
|
|
|
} |
311
|
2
|
50
|
|
|
|
13
|
if ($last_bound != 0) { |
312
|
2
|
|
|
|
|
25
|
printf("%0.3f > SCORE >= %0.3f prob = %0.3f\n", |
313
|
|
|
|
|
|
|
$last_bound, 0, 0); |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=head1 DETAILS |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
The PAV algorithm is conceptually straightforward. Given a set of training |
320
|
|
|
|
|
|
|
cases ordered by the scores assigned by the classifier, it first assigns a |
321
|
|
|
|
|
|
|
probability of one to each positive instance and a probability of zero to each |
322
|
|
|
|
|
|
|
negative instance, and puts each instance in its own group. It then looks, at |
323
|
|
|
|
|
|
|
each iteration, for adjacent violators: adjacent groups whose probabilities |
324
|
|
|
|
|
|
|
locally increase rather than decrease. When it finds such groups, it pools |
325
|
|
|
|
|
|
|
them and replaces their probability estimates with the average of the group's |
326
|
|
|
|
|
|
|
values. It continues this process of averaging and replacement until the |
327
|
|
|
|
|
|
|
entire sequence is monotonically decreasing. The result is a sequence of |
328
|
|
|
|
|
|
|
instances, each of which has a score and an associated probability estimate, |
329
|
|
|
|
|
|
|
which can then be used to map scores into probability estimates. |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
For further information on the PAV algorithm, you can read the section in my |
332
|
|
|
|
|
|
|
paper referenced below. |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head1 EXPORT |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
This module exports three functions: calibrate, score_prob and print_mapping. |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head1 BUGS |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
None known. This implementation is straightforward but inefficient (its time |
341
|
|
|
|
|
|
|
is O(n^2) in the length of the data series). A linear time algorithm is |
342
|
|
|
|
|
|
|
known, and in a later version of this module I'll probably implement it. |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=head1 SEE ALSO |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
The AI::NaiveBayes1 perl module. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
My paper "PAV and the ROC Convex Hull" has a good discussion of the PAV |
349
|
|
|
|
|
|
|
algorithm, including examples: |
350
|
|
|
|
|
|
|
L |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
If you want to read more about the general issue of classifier calibration, |
353
|
|
|
|
|
|
|
here are some good papers, which are freely available on the web: |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
I<"Transforming classifier scores into accurate multiclass probability estimates"> |
356
|
|
|
|
|
|
|
by Bianca Zadrozny and Charles Elkan |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
I<"Predicting Good Probabilities With Supervised Learning"> |
359
|
|
|
|
|
|
|
by A. Niculescu-Mizil and R. Caruana |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=head1 AUTHOR |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Tom Fawcett, Etom.fawcett@gmail.comE |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
Copyright (C) 2008-2012 by Tom Fawcett |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
371
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.8.8 or, |
372
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=cut |
375
|
|
|
|
|
|
|
1; |