File Coverage

blib/lib/AI/Categorizer/Learner/Rocchio.pm
Criterion Covered Total %
statement 32 32 100.0
branch n/a
condition n/a
subroutine 8 8 100.0
pod 2 3 66.6
total 42 43 97.6


line stmt bran cond sub pod time code
1             package AI::Categorizer::Learner::Rocchio;
2             $VERSION = '0.01';
3              
4 1     1   2278 use strict;
  1         2  
  1         38  
5 1     1   5 use Params::Validate qw(:types);
  1         3  
  1         173  
6 1     1   5 use AI::Categorizer::FeatureVector;
  1         3  
  1         26  
7 1     1   795 use AI::Categorizer::Learner::Boolean;
  1         2  
  1         30  
8 1     1   6 use base qw(AI::Categorizer::Learner::Boolean);
  1         1  
  1         360  
9              
10             __PACKAGE__->valid_params
11             (
12             positive_setting => {type => SCALAR, default => 16 },
13             negative_setting => {type => SCALAR, default => 4 },
14             threshold => {type => SCALAR, default => 0.1},
15             );
16              
17             sub create_model {
18 1     1 0 2 my $self = shift;
19 1         9 foreach my $doc ($self->knowledge_set->documents) {
20 4         23 $doc->features->normalize;
21             }
22            
23 1         5 $self->{model}{all_features} = $self->knowledge_set->features(undef);
24 1         10 $self->SUPER::create_model(@_);
25 1         4 delete $self->{knowledge_set};
26             }
27              
28             sub create_boolean_model {
29 2     2 1 5 my ($self, $positives, $negatives, $cat) = @_;
30 2         4 my $posdocnum = @$positives;
31 2         3 my $negdocnum = @$negatives;
32            
33 2         4 my $beta = $self->{positive_setting};
34 2         2 my $gamma = $self->{negative_setting};
35            
36 2         9 my $profile = $self->{model}{all_features}->clone->scale(-$gamma/$negdocnum);
37 2         11 my $f = $cat->features(undef)->clone->scale( $beta/$posdocnum + $gamma/$negdocnum );
38 2         9 $profile->add($f);
39              
40 2         6 return $profile->normalize;
41             }
42              
43             sub get_boolean_score {
44 16     16 1 28 my ($self, $newdoc, $profile) = @_;
45 16         57 return $newdoc->features->normalize->dot($profile);
46             }
47              
48             1;
49              
50              
51              
52              
53              
54              
55              
56