| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Algorithm::NaiveBayes::Model::Discrete; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 6 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 53 |  | 
| 4 | 1 |  |  | 1 |  | 5 | use base qw(Algorithm::NaiveBayes); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 107 |  | 
| 5 | 1 |  |  | 1 |  | 482 | use Algorithm::NaiveBayes::Util qw(rescale); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 602 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | sub do_add_instance { | 
| 8 | 200 |  |  | 200 | 0 | 220 | my ($self, $attributes, $labels, $data) = @_; | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 200 |  |  |  |  | 247 | foreach my $label ( @$labels ) { | 
| 11 | 200 |  | 100 |  |  | 420 | my $mylabel = $data->{labels}{$label} ||= {}; | 
| 12 | 200 |  |  |  |  | 197 | $mylabel->{count}++; | 
| 13 | 200 |  |  |  |  | 472 | while (my ($attr, $value) = each %$attributes) { | 
| 14 | 400 |  |  |  |  | 1559 | $mylabel->{attrs}{$attr}{$value}++; | 
| 15 |  |  |  |  |  |  | } | 
| 16 |  |  |  |  |  |  | } | 
| 17 |  |  |  |  |  |  | } | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | sub do_train { | 
| 20 | 1 |  |  | 1 | 0 | 2 | my ($self, $training_data) = @_; | 
| 21 | 1 |  |  |  |  | 2 | my $m = {}; | 
| 22 |  |  |  |  |  |  |  | 
| 23 | 1 |  |  |  |  | 16 | my $instances = $self->instances; | 
| 24 | 1 |  |  |  |  | 3 | my $labels = $training_data->{labels}; | 
| 25 | 1 |  |  |  |  | 2 | my $probs = $m->{probs} = {}; | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | # Calculate the log-probabilities for each category | 
| 28 | 1 |  |  |  |  | 5 | foreach my $label ($self->labels) { | 
| 29 | 2 |  |  |  |  | 20 | $m->{prior_probs}{$label} = log($labels->{$label}{count} / $instances); | 
| 30 |  |  |  |  |  |  |  | 
| 31 | 2 |  |  |  |  | 6 | my $denominator = log($labels->{$label}{count}); | 
| 32 | 2 |  |  |  |  | 3 | while (my ($attribute, $values) = each %{ $labels->{$label}{attrs} }) { | 
|  | 6 |  |  |  |  | 30 |  | 
| 33 | 4 |  |  |  |  | 17 | while (my ($value, $count) = each %$values) { | 
| 34 | 8 |  |  |  |  | 44 | $probs->{$attribute}{$label}{$value} = log($count) - $denominator; | 
| 35 |  |  |  |  |  |  | } | 
| 36 |  |  |  |  |  |  | } | 
| 37 |  |  |  |  |  |  | } | 
| 38 |  |  |  |  |  |  |  | 
| 39 | 1 |  |  |  |  | 6 | return $m; | 
| 40 |  |  |  |  |  |  | } | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | sub do_predict { | 
| 43 | 1 |  |  | 1 | 0 | 2 | my ($self, $m, $newattrs) = @_; | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | # Note that we're using the log(prob) here.  That's why we add instead of multiply. | 
| 46 |  |  |  |  |  |  |  | 
| 47 | 1 |  |  |  |  | 1 | my %scores = %{$m->{prior_probs}}; | 
|  | 1 |  |  |  |  | 5 |  | 
| 48 | 1 |  |  |  |  | 5 | while (my ($feature, $value) = each %$newattrs) { | 
| 49 | 2 | 50 |  |  |  | 6 | next unless exists $m->{probs}{$feature};  # Ignore totally unseen features | 
| 50 | 2 |  |  |  |  | 3 | while (my ($label, $values) = each %{$m->{probs}{$feature}}) { | 
|  | 6 |  |  |  |  | 22 |  | 
| 51 | 4 |  | 50 |  |  | 17 | $scores{$label} += ($values->{$value} || 0); | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 1 |  |  |  |  | 5 | rescale \%scores; | 
| 56 | 1 |  |  |  |  | 4 | return \%scores; | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | 1; |