line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package AI::Categorizer::Learner::Rocchio; |
2
|
|
|
|
|
|
|
$VERSION = '0.01'; |
3
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
2278
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
5
|
1
|
|
|
1
|
|
5
|
use Params::Validate qw(:types); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
173
|
|
6
|
1
|
|
|
1
|
|
5
|
use AI::Categorizer::FeatureVector; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
26
|
|
7
|
1
|
|
|
1
|
|
795
|
use AI::Categorizer::Learner::Boolean; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
8
|
1
|
|
|
1
|
|
6
|
use base qw(AI::Categorizer::Learner::Boolean); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
360
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
__PACKAGE__->valid_params |
11
|
|
|
|
|
|
|
( |
12
|
|
|
|
|
|
|
positive_setting => {type => SCALAR, default => 16 }, |
13
|
|
|
|
|
|
|
negative_setting => {type => SCALAR, default => 4 }, |
14
|
|
|
|
|
|
|
threshold => {type => SCALAR, default => 0.1}, |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub create_model { |
18
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
19
|
1
|
|
|
|
|
9
|
foreach my $doc ($self->knowledge_set->documents) { |
20
|
4
|
|
|
|
|
23
|
$doc->features->normalize; |
21
|
|
|
|
|
|
|
} |
22
|
|
|
|
|
|
|
|
23
|
1
|
|
|
|
|
5
|
$self->{model}{all_features} = $self->knowledge_set->features(undef); |
24
|
1
|
|
|
|
|
10
|
$self->SUPER::create_model(@_); |
25
|
1
|
|
|
|
|
4
|
delete $self->{knowledge_set}; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub create_boolean_model { |
29
|
2
|
|
|
2
|
1
|
5
|
my ($self, $positives, $negatives, $cat) = @_; |
30
|
2
|
|
|
|
|
4
|
my $posdocnum = @$positives; |
31
|
2
|
|
|
|
|
3
|
my $negdocnum = @$negatives; |
32
|
|
|
|
|
|
|
|
33
|
2
|
|
|
|
|
4
|
my $beta = $self->{positive_setting}; |
34
|
2
|
|
|
|
|
2
|
my $gamma = $self->{negative_setting}; |
35
|
|
|
|
|
|
|
|
36
|
2
|
|
|
|
|
9
|
my $profile = $self->{model}{all_features}->clone->scale(-$gamma/$negdocnum); |
37
|
2
|
|
|
|
|
11
|
my $f = $cat->features(undef)->clone->scale( $beta/$posdocnum + $gamma/$negdocnum ); |
38
|
2
|
|
|
|
|
9
|
$profile->add($f); |
39
|
|
|
|
|
|
|
|
40
|
2
|
|
|
|
|
6
|
return $profile->normalize; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub get_boolean_score { |
44
|
16
|
|
|
16
|
1
|
28
|
my ($self, $newdoc, $profile) = @_; |
45
|
16
|
|
|
|
|
57
|
return $newdoc->features->normalize->dot($profile); |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
1; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
|