File Coverage

blib/lib/AI/Categorizer/FeatureSelector.pm
Criterion Covered Total %
statement 21 39 53.8
branch 0 14 0.0
condition n/a
subroutine 7 10 70.0
pod 1 3 33.3
total 29 66 43.9


line stmt bran cond sub pod time code
1             package AI::Categorizer::FeatureSelector;
2              
3 6     6   28 use strict;
  6         11  
  6         155  
4 6     6   29 use Class::Container;
  6         8  
  6         125  
5 6     6   29 use base qw(Class::Container);
  6         10  
  6         407  
6              
7 6     6   30 use Params::Validate qw(:types);
  6         10  
  6         1057  
8 6     6   33 use AI::Categorizer::FeatureVector;
  6         10  
  6         113  
9 6     6   26 use AI::Categorizer::Util;
  6         10  
  6         236  
10 6     6   27 use Carp qw(croak);
  6         8  
  6         2665  
11              
12             __PACKAGE__->valid_params
13             (
14             features_kept => {
15             type => SCALAR,
16             default => 0.2,
17             },
18             verbose => {
19             type => SCALAR,
20             default => 0,
21             },
22             );
23              
24             sub verbose {
25 0     0 1   my $self = shift;
26 0 0         $self->{verbose} = shift if @_;
27 0           return $self->{verbose};
28             }
29              
30             sub reduce_features {
31             # Takes a feature vector whose weights are "feature scores", and
32             # chops to the highest n features. n is specified by the
33             # 'features_kept' parameter. If it's zero, all features are kept.
34             # If it's between 0 and 1, we multiply by the present number of
35             # features. If it's greater than 1, we treat it as the number of
36             # features to use.
37              
38 0     0 0   my ($self, $f, %args) = @_;
39 0 0         my $kept = defined $args{features_kept} ? $args{features_kept} : $self->{features_kept};
40 0 0         return $f unless $kept;
41              
42 0 0         my $num_kept = ($kept < 1 ?
43             $f->length * $kept :
44             $kept);
45              
46 0 0         print "Trimming features - # features = " . $f->length . "\n" if $self->verbose;
47            
48             # This is algorithmic overkill, but the sort seems fast enough. Will revisit later.
49 0           my $features = $f->as_hash;
50 0           my @new_features = (sort {$features->{$b} <=> $features->{$a}} keys %$features)
  0            
51             [0 .. $num_kept-1];
52              
53 0           my $result = $f->intersection( \@new_features );
54 0 0         print "Finished trimming features - # features = " . $result->length . "\n" if $self->verbose;
55 0           return $result;
56             }
57              
58             # Abstract methods
59             sub rank_features;
60             sub scan_features;
61              
62             sub select_features {
63 0     0 0   my ($self, %args) = @_;
64            
65 0 0         die "No knowledge_set parameter provided to select_features()"
66             unless $args{knowledge_set};
67              
68 0           my $f = $self->rank_features( knowledge_set => $args{knowledge_set} );
69 0           return $self->reduce_features( $f, features_kept => $args{features_kept} );
70             }
71              
72              
73             1;
74              
75             __END__