File Coverage

blib/lib/AI/Categorizer/Collection.pm
Criterion Covered Total %
statement 18 42 42.8
branch 3 12 25.0
condition 1 3 33.3
subroutine 5 6 83.3
pod 2 2 100.0
total 29 65 44.6


line stmt bran cond sub pod time code
1             package AI::Categorizer::Collection;
2 11     11   47 use strict;
  11         15  
  11         333  
3              
4 11     11   49 use Params::Validate qw(:types);
  11         14  
  11         1381  
5 11     11   53 use Class::Container;
  11         17  
  11         214  
6 11     11   48 use base qw(Class::Container);
  11         14  
  11         5487  
7             __PACKAGE__->valid_params
8             (
9             verbose => {type => SCALAR, default => 0},
10             stopword_file => { type => SCALAR, optional => 1 },
11             category_hash => { type => HASHREF, default => {} },
12             category_file => { type => SCALAR, optional => 1 },
13             );
14              
15             __PACKAGE__->contained_objects
16             (
17             document => { class => 'AI::Categorizer::Document::Text',
18             delayed => 1 },
19             );
20              
21             sub new {
22 7     7 1 36 my ($class, %args) = @_;
23            
24             # Optimize so every document doesn't have to convert the stopword list to a hash
25 7 50 33     46 if ($args{stopwords} and UNIVERSAL::isa($args{stopwords}, 'ARRAY')) {
26 0         0 $args{stopwords} = { map {+$_ => 1} @{ $args{stopwords} } };
  0         0  
  0         0  
27             }
28            
29 7         72 my $self = $class->SUPER::new(%args);
30              
31 7 50       2757 if ($self->{category_file}) {
32 0         0 local *FH;
33 0 0       0 open FH, $self->{category_file} or die "Can't open $self->{category_file}: $!";
34 0         0 while () {
35 0         0 my ($doc, @cats) = split;
36 0         0 $self->{category_hash}{$doc} = \@cats;
37             }
38 0         0 close FH;
39             }
40 7 50       44 if (exists $self->{stopword_file}) {
41 0         0 my %stopwords;
42 0         0 local *FH;
43 0 0       0 open FH, "< $self->{stopword_file}" or die "$self->{stopword_file}: $!";
44 0         0 while () {
45 0         0 chomp;
46 0         0 $stopwords{$_} = 1;
47             }
48 0         0 close FH;
49              
50 0         0 $self->delayed_object_params('document', stopwords => \%stopwords);
51             }
52              
53 7         31 return $self;
54             }
55              
56             # This should usually be replaced in subclasses with a faster version that doesn't
57             # need to create actual documents each time through
58             sub count_documents {
59 0     0 1   my $self = shift;
60 0 0         return $self->{document_count} if exists $self->{document_count};
61              
62 0           $self->rewind;
63 0           my $count = 0;
64 0           $count++ while $self->next;
65 0           $self->rewind;
66              
67 0           return $self->{document_count} = $count;
68             }
69              
70             # Abstract methods
71             sub next;
72             sub rewind;
73              
74             1;
75             __END__