line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package AI::Categorizer::Learner::Boolean; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
12
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
66
|
|
4
|
2
|
|
|
2
|
|
10
|
use AI::Categorizer::Learner; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
52
|
|
5
|
2
|
|
|
2
|
|
10
|
use base qw(AI::Categorizer::Learner); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
157
|
|
6
|
2
|
|
|
2
|
|
9
|
use Params::Validate qw(:types); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
350
|
|
7
|
2
|
|
|
2
|
|
11
|
use AI::Categorizer::Util qw(random_elements); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1119
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
__PACKAGE__->valid_params |
10
|
|
|
|
|
|
|
( |
11
|
|
|
|
|
|
|
max_instances => {type => SCALAR, default => 0}, |
12
|
|
|
|
|
|
|
threshold => {type => SCALAR, default => 0.5}, |
13
|
|
|
|
|
|
|
); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub create_model { |
16
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
17
|
2
|
|
100
|
|
|
13
|
my $m = $self->{model} ||= {}; |
18
|
2
|
|
|
|
|
4
|
my $mi = $self->{max_instances}; |
19
|
|
|
|
|
|
|
|
20
|
2
|
|
|
|
|
13
|
foreach my $cat ($self->knowledge_set->categories) { |
21
|
4
|
|
|
|
|
5
|
my (@p, @n); |
22
|
4
|
|
|
|
|
11
|
foreach my $doc ($self->knowledge_set->documents) { |
23
|
16
|
100
|
|
|
|
44
|
if ($doc->is_in_category($cat)) { |
24
|
8
|
|
|
|
|
18
|
push @p, $doc; |
25
|
|
|
|
|
|
|
} else { |
26
|
8
|
|
|
|
|
39
|
push @n, $doc; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
} |
29
|
4
|
50
|
33
|
|
|
106
|
if ($mi and @p + @n > $mi) { |
30
|
|
|
|
|
|
|
# Get rid of random instances from training set, preserving |
31
|
|
|
|
|
|
|
# current positive/negative ratio |
32
|
0
|
|
|
|
|
0
|
my $ratio = $mi / (@p + @n); |
33
|
0
|
|
|
|
|
0
|
@p = random_elements(\@p, @p * $ratio); |
34
|
0
|
|
|
|
|
0
|
@n = random_elements(\@n, @n * $ratio); |
35
|
|
|
|
|
|
|
|
36
|
0
|
0
|
|
|
|
0
|
warn "Limiting to ". @p ." positives and ". @n ." negatives\n" if $self->verbose; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
4
|
50
|
|
|
|
24
|
warn "Creating model for ", $cat->name, "\n" if $self->verbose; |
40
|
4
|
|
|
|
|
15
|
$m->{learners}{ $cat->name } = $self->create_boolean_model(\@p, \@n, $cat); |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub create_boolean_model; # Abstract method |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub get_scores { |
47
|
16
|
|
|
16
|
0
|
28
|
my ($self, $doc) = @_; |
48
|
16
|
|
|
|
|
28
|
my $m = $self->{model}; |
49
|
16
|
|
|
|
|
23
|
my %scores; |
50
|
16
|
|
|
|
|
18
|
foreach my $cat (keys %{$m->{learners}}) { |
|
16
|
|
|
|
|
50
|
|
51
|
32
|
|
|
|
|
257
|
$scores{$cat} = $self->get_boolean_score($doc, $m->{learners}{$cat}); |
52
|
|
|
|
|
|
|
} |
53
|
16
|
|
|
|
|
208
|
return (\%scores, $self->{threshold}); |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub get_boolean_score; # Abstract method |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub threshold { |
59
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
60
|
0
|
0
|
|
|
|
0
|
$self->{threshold} = shift if @_; |
61
|
0
|
|
|
|
|
0
|
return $self->{threshold}; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub categories { |
65
|
4
|
|
|
4
|
0
|
9
|
my $self = shift; |
66
|
4
|
|
|
|
|
7
|
return map AI::Categorizer::Category->by_name( name => $_ ), keys %{ $self->{model}{learners} }; |
|
4
|
|
|
|
|
26
|
|
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
1; |
70
|
|
|
|
|
|
|
__END__ |