File Coverage

blib/lib/AI/Categorizer.pm
Criterion Covered Total %
statement 40 85 47.0
branch 1 14 7.1
condition n/a
subroutine 14 24 58.3
pod 11 11 100.0
total 66 134 49.2


line stmt bran cond sub pod time code
1             package AI::Categorizer;
2             $VERSION = '0.09';
3              
4 11     11   149445 use strict;
  11         31  
  11         444  
5 11     11   17726 use Class::Container;
  11         183800  
  11         370  
6 11     11   117 use base qw(Class::Container);
  11         27  
  11         1434  
7 11     11   60 use Params::Validate qw(:types);
  11         22  
  11         1801  
8 11     11   116 use File::Spec;
  11         21  
  11         239  
9 11     11   7034 use AI::Categorizer::Learner;
  11         29  
  11         264  
10 11     11   5543 use AI::Categorizer::Document;
  11         29  
  11         293  
11 11     11   5699 use AI::Categorizer::Category;
  11         25  
  11         238  
12 11     11   5270 use AI::Categorizer::Collection;
  11         26  
  11         236  
13 11     11   5565 use AI::Categorizer::Hypothesis;
  11         24  
  11         248  
14 11     11   6235 use AI::Categorizer::KnowledgeSet;
  11         35  
  11         15355  
15              
16              
17             __PACKAGE__->valid_params
18             (
19             progress_file => { type => SCALAR, default => 'save' },
20             knowledge_set => { isa => 'AI::Categorizer::KnowledgeSet' },
21             learner => { isa => 'AI::Categorizer::Learner' },
22             verbose => { type => BOOLEAN, default => 0 },
23             training_set => { type => SCALAR, optional => 1 },
24             test_set => { type => SCALAR, optional => 1 },
25             data_root => { type => SCALAR, optional => 1 },
26             );
27              
28             __PACKAGE__->contained_objects
29             (
30             knowledge_set => { class => 'AI::Categorizer::KnowledgeSet' },
31             learner => { class => 'AI::Categorizer::Learner::NaiveBayes' },
32             experiment => { class => 'AI::Categorizer::Experiment',
33             delayed => 1 },
34             collection => { class => 'AI::Categorizer::Collection::Files',
35             delayed => 1 },
36             );
37              
38             sub new {
39 10     10 1 949 my $package = shift;
40 10         36 my %args = @_;
41 10         18 my %defaults;
42 10 50       38 if (exists $args{data_root}) {
43 0         0 $defaults{training_set} = File::Spec->catfile($args{data_root}, 'training');
44 0         0 $defaults{test_set} = File::Spec->catfile($args{data_root}, 'test');
45 0         0 $defaults{category_file} = File::Spec->catfile($args{data_root}, 'cats.txt');
46 0         0 delete $args{data_root};
47             }
48              
49 10         73 return $package->SUPER::new(%defaults, %args);
50             }
51              
52             #sub dump_parameters {
53             # my $p = shift()->SUPER::dump_parameters;
54             # delete $p->{stopwords} if $p->{stopword_file};
55             # return $p;
56             #}
57              
58 55     55 1 13066 sub knowledge_set { shift->{knowledge_set} }
59 9     9 1 238 sub learner { shift->{learner} }
60              
61             # Combines several methods in one sub
62             sub run_experiment {
63 0     0 1   my $self = shift;
64 0           $self->scan_features;
65 0           $self->read_training_set;
66 0           $self->train;
67 0           $self->evaluate_test_set;
68 0           print $self->stats_table;
69             }
70              
71             sub scan_features {
72 0     0 1   my $self = shift;
73 0 0         return unless $self->knowledge_set->scan_first;
74 0           $self->knowledge_set->scan_features( path => $self->{training_set} );
75 0           $self->knowledge_set->save_features( "$self->{progress_file}-01-features" );
76             }
77              
78             sub read_training_set {
79 0     0 1   my $self = shift;
80 0 0         $self->knowledge_set->restore_features( "$self->{progress_file}-01-features" )
81             if -e "$self->{progress_file}-01-features";
82 0           $self->knowledge_set->read( path => $self->{training_set} );
83 0           $self->_save_progress( '02', 'knowledge_set' );
84 0           return $self->knowledge_set;
85             }
86              
87             sub train {
88 0     0 1   my $self = shift;
89 0           $self->_load_progress( '02', 'knowledge_set' );
90 0           $self->learner->train( knowledge_set => $self->{knowledge_set} );
91 0           $self->_save_progress( '03', 'learner' );
92 0           return $self->learner;
93             }
94              
95             sub evaluate_test_set {
96 0     0 1   my $self = shift;
97 0           $self->_load_progress( '03', 'learner' );
98 0           my $c = $self->create_delayed_object('collection', path => $self->{test_set} );
99 0           $self->{experiment} = $self->learner->categorize_collection( collection => $c );
100 0           $self->_save_progress( '04', 'experiment' );
101 0           return $self->{experiment};
102             }
103              
104             sub stats_table {
105 0     0 1   my $self = shift;
106 0           $self->_load_progress( '04', 'experiment' );
107 0           return $self->{experiment}->stats_table;
108             }
109              
110             sub progress_file {
111 0     0 1   shift->{progress_file};
112             }
113              
114             sub verbose {
115 0     0 1   shift->{verbose};
116             }
117              
118             sub _save_progress {
119 0     0     my ($self, $stage, $node) = @_;
120 0 0         return unless $self->{progress_file};
121 0           my $file = "$self->{progress_file}-$stage-$node";
122 0 0         warn "Saving to $file\n" if $self->{verbose};
123 0           $self->{$node}->save_state($file);
124             }
125              
126             sub _load_progress {
127 0     0     my ($self, $stage, $node) = @_;
128 0 0         return unless $self->{progress_file};
129 0           my $file = "$self->{progress_file}-$stage-$node";
130 0 0         warn "Loading $file\n" if $self->{verbose};
131 0           $self->{$node} = $self->contained_class($node)->restore_state($file);
132             }
133              
134             1;
135             __END__