| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package AI::Categorizer::FeatureSelector; |
|
2
|
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
28
|
use strict; |
|
|
6
|
|
|
|
|
11
|
|
|
|
6
|
|
|
|
|
155
|
|
|
4
|
6
|
|
|
6
|
|
29
|
use Class::Container; |
|
|
6
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
125
|
|
|
5
|
6
|
|
|
6
|
|
29
|
use base qw(Class::Container); |
|
|
6
|
|
|
|
|
10
|
|
|
|
6
|
|
|
|
|
407
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
6
|
|
|
6
|
|
30
|
use Params::Validate qw(:types); |
|
|
6
|
|
|
|
|
10
|
|
|
|
6
|
|
|
|
|
1057
|
|
|
8
|
6
|
|
|
6
|
|
33
|
use AI::Categorizer::FeatureVector; |
|
|
6
|
|
|
|
|
10
|
|
|
|
6
|
|
|
|
|
113
|
|
|
9
|
6
|
|
|
6
|
|
26
|
use AI::Categorizer::Util; |
|
|
6
|
|
|
|
|
10
|
|
|
|
6
|
|
|
|
|
236
|
|
|
10
|
6
|
|
|
6
|
|
27
|
use Carp qw(croak); |
|
|
6
|
|
|
|
|
8
|
|
|
|
6
|
|
|
|
|
2665
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
__PACKAGE__->valid_params |
|
13
|
|
|
|
|
|
|
( |
|
14
|
|
|
|
|
|
|
features_kept => { |
|
15
|
|
|
|
|
|
|
type => SCALAR, |
|
16
|
|
|
|
|
|
|
default => 0.2, |
|
17
|
|
|
|
|
|
|
}, |
|
18
|
|
|
|
|
|
|
verbose => { |
|
19
|
|
|
|
|
|
|
type => SCALAR, |
|
20
|
|
|
|
|
|
|
default => 0, |
|
21
|
|
|
|
|
|
|
}, |
|
22
|
|
|
|
|
|
|
); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub verbose { |
|
25
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
26
|
0
|
0
|
|
|
|
|
$self->{verbose} = shift if @_; |
|
27
|
0
|
|
|
|
|
|
return $self->{verbose}; |
|
28
|
|
|
|
|
|
|
} |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub reduce_features { |
|
31
|
|
|
|
|
|
|
# Takes a feature vector whose weights are "feature scores", and |
|
32
|
|
|
|
|
|
|
# chops to the highest n features. n is specified by the |
|
33
|
|
|
|
|
|
|
# 'features_kept' parameter. If it's zero, all features are kept. |
|
34
|
|
|
|
|
|
|
# If it's between 0 and 1, we multiply by the present number of |
|
35
|
|
|
|
|
|
|
# features. If it's greater than 1, we treat it as the number of |
|
36
|
|
|
|
|
|
|
# features to use. |
|
37
|
|
|
|
|
|
|
|
|
38
|
0
|
|
|
0
|
0
|
|
my ($self, $f, %args) = @_; |
|
39
|
0
|
0
|
|
|
|
|
my $kept = defined $args{features_kept} ? $args{features_kept} : $self->{features_kept}; |
|
40
|
0
|
0
|
|
|
|
|
return $f unless $kept; |
|
41
|
|
|
|
|
|
|
|
|
42
|
0
|
0
|
|
|
|
|
my $num_kept = ($kept < 1 ? |
|
43
|
|
|
|
|
|
|
$f->length * $kept : |
|
44
|
|
|
|
|
|
|
$kept); |
|
45
|
|
|
|
|
|
|
|
|
46
|
0
|
0
|
|
|
|
|
print "Trimming features - # features = " . $f->length . "\n" if $self->verbose; |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# This is algorithmic overkill, but the sort seems fast enough. Will revisit later. |
|
49
|
0
|
|
|
|
|
|
my $features = $f->as_hash; |
|
50
|
0
|
|
|
|
|
|
my @new_features = (sort {$features->{$b} <=> $features->{$a}} keys %$features) |
|
|
0
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
[0 .. $num_kept-1]; |
|
52
|
|
|
|
|
|
|
|
|
53
|
0
|
|
|
|
|
|
my $result = $f->intersection( \@new_features ); |
|
54
|
0
|
0
|
|
|
|
|
print "Finished trimming features - # features = " . $result->length . "\n" if $self->verbose; |
|
55
|
0
|
|
|
|
|
|
return $result; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Abstract methods |
|
59
|
|
|
|
|
|
|
sub rank_features; |
|
60
|
|
|
|
|
|
|
sub scan_features; |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub select_features { |
|
63
|
0
|
|
|
0
|
0
|
|
my ($self, %args) = @_; |
|
64
|
|
|
|
|
|
|
|
|
65
|
0
|
0
|
|
|
|
|
die "No knowledge_set parameter provided to select_features()" |
|
66
|
|
|
|
|
|
|
unless $args{knowledge_set}; |
|
67
|
|
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
|
my $f = $self->rank_features( knowledge_set => $args{knowledge_set} ); |
|
69
|
0
|
|
|
|
|
|
return $self->reduce_features( $f, features_kept => $args{features_kept} ); |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
1; |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
__END__ |