|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Algorithm::NaiveBayes::Model::Discrete;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
6
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
    | 
| 
4
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use base qw(Algorithm::NaiveBayes);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
107
 | 
    | 
| 
5
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
482
 | 
 use Algorithm::NaiveBayes::Util qw(rescale);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
602
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub do_add_instance {  | 
| 
8
 | 
200
 | 
 
 | 
 
 | 
  
200
  
 | 
  
0
  
 | 
220
 | 
   my ($self, $attributes, $labels, $data) = @_;  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
10
 | 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
247
 | 
   foreach my $label ( @$labels ) {  | 
| 
11
 | 
200
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
420
 | 
     my $mylabel = $data->{labels}{$label} ||= {};  | 
| 
12
 | 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
197
 | 
     $mylabel->{count}++;  | 
| 
13
 | 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
472
 | 
     while (my ($attr, $value) = each %$attributes) {  | 
| 
14
 | 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1559
 | 
       $mylabel->{attrs}{$attr}{$value}++;  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub do_train {  | 
| 
20
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
2
 | 
   my ($self, $training_data) = @_;  | 
| 
21
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $m = {};  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
23
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
   my $instances = $self->instances;  | 
| 
24
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   my $labels = $training_data->{labels};  | 
| 
25
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
   my $probs = $m->{probs} = {};  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Calculate the log-probabilities for each category  | 
| 
28
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   foreach my $label ($self->labels) {  | 
| 
29
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $m->{prior_probs}{$label} = log($labels->{$label}{count} / $instances);  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
31
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my $denominator = log($labels->{$label}{count});  | 
| 
32
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     while (my ($attribute, $values) = each %{ $labels->{$label}{attrs} }) {  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
    | 
| 
33
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
       while (my ($value, $count) = each %$values) {  | 
| 
34
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
 	$probs->{$attribute}{$label}{$value} = log($count) - $denominator;  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
39
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
   return $m;  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub do_predict {  | 
| 
43
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
0
  
 | 
2
 | 
   my ($self, $m, $newattrs) = @_;  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Note that we're using the log(prob) here.  That's why we add instead of multiply.  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
47
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
   my %scores = %{$m->{prior_probs}};  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
48
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   while (my ($feature, $value) = each %$newattrs) {  | 
| 
49
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     next unless exists $m->{probs}{$feature};  # Ignore totally unseen features  | 
| 
50
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
     while (my ($label, $values) = each %{$m->{probs}{$feature}}) {  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
    | 
| 
51
 | 
4
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
17
 | 
       $scores{$label} += ($values->{$value} || 0);  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
55
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
   rescale \%scores;  | 
| 
56
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
   return \%scores;  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  |