File Coverage

blib/lib/Algorithm/NaiveBayes/Model/Discrete.pm
Criterion Covered Total %
statement 38 38 100.0
branch 1 2 50.0
condition 3 4 75.0
subroutine 6 6 100.0
pod 0 3 0.0
total 48 53 90.5


line stmt bran cond sub pod time code
1             package Algorithm::NaiveBayes::Model::Discrete;
2              
3 1     1   6 use strict;
  1         2  
  1         53  
4 1     1   5 use base qw(Algorithm::NaiveBayes);
  1         2  
  1         107  
5 1     1   482 use Algorithm::NaiveBayes::Util qw(rescale);
  1         3  
  1         602  
6              
7             sub do_add_instance {
8 200     200 0 220 my ($self, $attributes, $labels, $data) = @_;
9            
10 200         247 foreach my $label ( @$labels ) {
11 200   100     420 my $mylabel = $data->{labels}{$label} ||= {};
12 200         197 $mylabel->{count}++;
13 200         472 while (my ($attr, $value) = each %$attributes) {
14 400         1559 $mylabel->{attrs}{$attr}{$value}++;
15             }
16             }
17             }
18              
19             sub do_train {
20 1     1 0 2 my ($self, $training_data) = @_;
21 1         2 my $m = {};
22            
23 1         16 my $instances = $self->instances;
24 1         3 my $labels = $training_data->{labels};
25 1         2 my $probs = $m->{probs} = {};
26            
27             # Calculate the log-probabilities for each category
28 1         5 foreach my $label ($self->labels) {
29 2         20 $m->{prior_probs}{$label} = log($labels->{$label}{count} / $instances);
30            
31 2         6 my $denominator = log($labels->{$label}{count});
32 2         3 while (my ($attribute, $values) = each %{ $labels->{$label}{attrs} }) {
  6         30  
33 4         17 while (my ($value, $count) = each %$values) {
34 8         44 $probs->{$attribute}{$label}{$value} = log($count) - $denominator;
35             }
36             }
37             }
38            
39 1         6 return $m;
40             }
41              
42             sub do_predict {
43 1     1 0 2 my ($self, $m, $newattrs) = @_;
44            
45             # Note that we're using the log(prob) here. That's why we add instead of multiply.
46            
47 1         1 my %scores = %{$m->{prior_probs}};
  1         5  
48 1         5 while (my ($feature, $value) = each %$newattrs) {
49 2 50       6 next unless exists $m->{probs}{$feature}; # Ignore totally unseen features
50 2         3 while (my ($label, $values) = each %{$m->{probs}{$feature}}) {
  6         22  
51 4   50     17 $scores{$label} += ($values->{$value} || 0);
52             }
53             }
54            
55 1         5 rescale \%scores;
56 1         4 return \%scores;
57             }
58              
59             1;