| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Algorithm::AM::Batch; | 
| 2 | 4 |  |  | 4 |  | 47719 | use strict; | 
|  | 4 |  |  |  |  | 27 |  | 
|  | 4 |  |  |  |  | 94 |  | 
| 3 | 4 |  |  | 4 |  | 16 | use warnings; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 137 |  | 
| 4 |  |  |  |  |  |  | our $VERSION = '3.11'; | 
| 5 |  |  |  |  |  |  | # ABSTRACT: Classify items in batch mode | 
| 6 | 4 |  |  | 4 |  | 18 | use feature 'state'; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 323 |  | 
| 7 | 4 |  |  | 4 |  | 18 | use Carp; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 229 |  | 
| 8 | 4 |  |  | 4 |  | 825 | use Log::Any qw($log); | 
|  | 4 |  |  |  |  | 22510 |  | 
|  | 4 |  |  |  |  | 15 |  | 
| 9 |  |  |  |  |  |  | our @CARP_NOT = qw(Algorithm::AM::Batch); | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | # Place this accessor here so that Class::Tiny doesn't generate | 
| 12 |  |  |  |  |  |  | # a getter/setter pair. | 
| 13 |  |  |  |  |  |  | sub test_set { | 
| 14 | 38 |  |  | 38 | 1 | 446 | my ($self) = @_; | 
| 15 | 38 |  |  |  |  | 110 | return $self->{test_set}; | 
| 16 |  |  |  |  |  |  | } | 
| 17 |  |  |  |  |  |  |  | 
| 18 | 4 |  |  |  |  | 27 | use Class::Tiny qw( | 
| 19 |  |  |  |  |  |  | training_set | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | exclude_nulls | 
| 22 |  |  |  |  |  |  | exclude_given | 
| 23 |  |  |  |  |  |  | linear | 
| 24 |  |  |  |  |  |  | probability | 
| 25 |  |  |  |  |  |  | repeat | 
| 26 |  |  |  |  |  |  | max_training_items | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | begin_hook | 
| 29 |  |  |  |  |  |  | begin_test_hook | 
| 30 |  |  |  |  |  |  | begin_repeat_hook | 
| 31 |  |  |  |  |  |  | training_item_hook | 
| 32 |  |  |  |  |  |  | end_repeat_hook | 
| 33 |  |  |  |  |  |  | end_test_hook | 
| 34 |  |  |  |  |  |  | end_hook | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | test_set | 
| 37 |  |  |  |  |  |  | ), { | 
| 38 |  |  |  |  |  |  | exclude_nulls     => 1, | 
| 39 |  |  |  |  |  |  | exclude_given    => 1, | 
| 40 |  |  |  |  |  |  | linear      => 0, | 
| 41 |  |  |  |  |  |  | probability => 1, | 
| 42 |  |  |  |  |  |  | repeat      => 1, | 
| 43 | 4 |  |  | 4 |  | 6925 | }; | 
|  | 4 |  |  |  |  | 5264 |  | 
| 44 |  |  |  |  |  |  |  | 
| 45 | 4 |  |  | 4 |  | 6746 | use Algorithm::AM; | 
|  | 4 |  |  |  |  | 12 |  | 
|  | 4 |  |  |  |  | 19 |  | 
| 46 | 4 |  |  | 4 |  | 27 | use Algorithm::AM::Result; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 82 |  | 
| 47 | 4 |  |  | 4 |  | 17 | use Algorithm::AM::BigInt 'bigcmp'; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 119 |  | 
| 48 | 4 |  |  | 4 |  | 23 | use Algorithm::AM::DataSet; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 112 |  | 
| 49 | 4 |  |  | 4 |  | 24 | use Import::Into; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 4502 |  | 
| 50 |  |  |  |  |  |  | # Use Import::Into to export classes into caller | 
| 51 |  |  |  |  |  |  | sub import { | 
| 52 | 4 |  |  | 4 |  | 39 | my $target = caller; | 
| 53 | 4 |  |  |  |  | 24 | Algorithm::AM::BigInt->import::into($target, 'bigcmp'); | 
| 54 | 4 |  |  |  |  | 943 | Algorithm::AM::DataSet->import::into($target, 'dataset_from_file'); | 
| 55 | 4 |  |  |  |  | 667 | Algorithm::AM::DataSet::Item->import::into($target, 'new_item'); | 
| 56 | 4 |  |  |  |  | 2713 | return; | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | sub BUILD { | 
| 60 | 14 |  |  | 14 | 0 | 2308 | my ($self, $args) = @_; | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | # check for invalid arguments | 
| 63 | 14 |  |  |  |  | 24 | my $class = ref $self; | 
| 64 | 14 |  |  |  |  | 33 | my %valid_attrs = map {$_ => 1} | 
|  | 210 |  |  |  |  | 697 |  | 
| 65 |  |  |  |  |  |  | Class::Tiny->get_all_attributes_for($class); | 
| 66 | 14 |  |  |  |  | 57 | my @invalids = grep {!$valid_attrs{$_}} sort keys %$args; | 
|  | 40 |  |  |  |  | 69 |  | 
| 67 | 14 | 100 |  |  |  | 35 | if(@invalids){ | 
| 68 | 1 |  |  |  |  | 10 | croak "Invalid attributes for $class: " . join ' ', | 
| 69 |  |  |  |  |  |  | sort @invalids; | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 | 13 | 100 |  |  |  | 30 | if(!exists $args->{training_set}){ | 
| 73 | 1 |  |  |  |  | 13 | croak "Missing required parameter 'training_set'"; | 
| 74 |  |  |  |  |  |  | } | 
| 75 | 12 | 100 | 66 |  |  | 90 | if(!(ref $args) || !$args->{training_set}->isa( | 
| 76 |  |  |  |  |  |  | 'Algorithm::AM::DataSet')){ | 
| 77 | 1 |  |  |  |  | 31 | croak 'Parameter training_set should be an ' . | 
| 78 |  |  |  |  |  |  | 'Algorithm::AM::DataSet'; | 
| 79 |  |  |  |  |  |  | } | 
| 80 | 11 |  |  |  |  | 26 | for(qw( | 
| 81 |  |  |  |  |  |  | begin_hook | 
| 82 |  |  |  |  |  |  | begin_test_hook | 
| 83 |  |  |  |  |  |  | begin_repeat_hook | 
| 84 |  |  |  |  |  |  | training_item_hook | 
| 85 |  |  |  |  |  |  | end_repeat_hook | 
| 86 |  |  |  |  |  |  | end_test_hook | 
| 87 |  |  |  |  |  |  | end_hook | 
| 88 |  |  |  |  |  |  | )){ | 
| 89 | 77 | 50 | 66 |  |  | 155 | if(exists $args->{$_} and 'CODE' ne ref $args->{$_}){ | 
| 90 | 0 |  |  |  |  | 0 | croak "Input $_ should be a subroutine"; | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 11 |  |  |  |  | 28 | return; | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | sub classify_all { | 
| 98 | 7 |  |  | 7 | 1 | 38 | my ($self, $test_set) = @_; | 
| 99 |  |  |  |  |  |  |  | 
| 100 | 7 | 100 | 100 |  |  | 35 | if(!$test_set || 'Algorithm::AM::DataSet' ne ref $test_set){ | 
| 101 | 2 |  |  |  |  | 15 | croak q[Must provide a DataSet to classify_all]; | 
| 102 |  |  |  |  |  |  | } | 
| 103 | 5 | 100 |  |  |  | 130 | if($self->training_set->cardinality != $test_set->cardinality){ | 
| 104 | 1 |  |  |  |  | 15 | croak 'Training and test sets do not have the same ' . | 
| 105 |  |  |  |  |  |  | 'cardinality (' . $self->training_set->cardinality . | 
| 106 |  |  |  |  |  |  | ' and ' . $test_set->cardinality . ')'; | 
| 107 |  |  |  |  |  |  | } | 
| 108 | 4 |  |  |  |  | 16 | $self->_set_test_set($test_set); | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 4 | 100 |  |  |  | 64 | if($self->begin_hook){ | 
| 111 | 1 |  |  |  |  | 18 | $self->begin_hook->($self); | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  | # save the result objects from all items, all iterations, here | 
| 115 | 4 |  |  |  |  | 1793 | my @all_results; | 
| 116 |  |  |  |  |  |  |  | 
| 117 | 4 |  |  |  |  | 16 | foreach my $item_number (0 .. $test_set->size - 1) { | 
| 118 | 178 | 50 |  |  |  | 565 | if($log->is_debug){ | 
| 119 | 0 |  |  |  |  | 0 | $log->debug('Test items left: ' . | 
| 120 |  |  |  |  |  |  | $test_set->size + 1 - $item_number); | 
| 121 |  |  |  |  |  |  | } | 
| 122 | 178 |  |  |  |  | 1527 | my $test_item = $test_set->get_item($item_number); | 
| 123 |  |  |  |  |  |  | # store the results just for this item | 
| 124 | 178 |  |  |  |  | 258 | my @item_results; | 
| 125 |  |  |  |  |  |  |  | 
| 126 | 178 | 100 |  |  |  | 2613 | if($self->begin_test_hook){ | 
| 127 | 2 |  |  |  |  | 39 | $self->begin_test_hook->($self, $test_item); | 
| 128 |  |  |  |  |  |  | } | 
| 129 |  |  |  |  |  |  |  | 
| 130 | 178 | 50 |  |  |  | 2614 | if($log->is_debug){ | 
| 131 | 0 |  |  |  |  | 0 | my ( $sec, $min, $hour ) = localtime(); | 
| 132 | 0 |  |  |  |  | 0 | $log->info( | 
| 133 |  |  |  |  |  |  | sprintf( "Time: %2s:%02s:%02s\n", $hour, $min, $sec) . | 
| 134 |  |  |  |  |  |  | $test_item->comment . "\n" . | 
| 135 |  |  |  |  |  |  | sprintf( "0/$self->{repeat}  %2s:%02s:%02s", | 
| 136 |  |  |  |  |  |  | $hour, $min, $sec ) ); | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 | 178 |  |  |  |  | 1115 | my $iteration = 1; | 
| 140 | 178 |  |  |  |  | 2307 | while ( $iteration <= $self->repeat ) { | 
| 141 | 182 | 100 |  |  |  | 3190 | if($self->begin_repeat_hook){ | 
| 142 | 4 |  |  |  |  | 106 | $self->begin_repeat_hook->( | 
| 143 |  |  |  |  |  |  | $self, $test_item, $iteration); | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | # this sets excluded_items | 
| 147 | 182 |  |  |  |  | 4368 | my ($training_set, $excluded_items) = $self->_make_training_set( | 
| 148 |  |  |  |  |  |  | $test_item, $iteration); | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | # classify the item with the given training set and | 
| 151 |  |  |  |  |  |  | # configuration | 
| 152 | 182 |  |  |  |  | 2494 | my $am = Algorithm::AM->new( | 
| 153 |  |  |  |  |  |  | training_set => $training_set, | 
| 154 |  |  |  |  |  |  | exclude_nulls => $self->exclude_nulls, | 
| 155 |  |  |  |  |  |  | exclude_given => $self->exclude_given, | 
| 156 |  |  |  |  |  |  | linear => $self->linear, | 
| 157 |  |  |  |  |  |  | ); | 
| 158 | 182 |  |  |  |  | 1171 | my $result = $am->classify($test_item); | 
| 159 |  |  |  |  |  |  |  | 
| 160 | 182 | 50 |  |  |  | 656 | _log_result($result) | 
| 161 |  |  |  |  |  |  | if($log->is_info); | 
| 162 |  |  |  |  |  |  |  | 
| 163 | 182 | 50 |  |  |  | 1713 | if($log->is_info){ | 
| 164 | 0 |  |  |  |  | 0 | my ( $sec, $min, $hour ) = localtime(); | 
| 165 | 0 |  |  |  |  | 0 | $log->info( | 
| 166 |  |  |  |  |  |  | sprintf( | 
| 167 |  |  |  |  |  |  | $iteration . '/' . $self->repeat . | 
| 168 |  |  |  |  |  |  | '  %2s:%02s:%02s', | 
| 169 |  |  |  |  |  |  | $hour, $min, $sec | 
| 170 |  |  |  |  |  |  | ) | 
| 171 |  |  |  |  |  |  | ); | 
| 172 |  |  |  |  |  |  | } | 
| 173 |  |  |  |  |  |  |  | 
| 174 | 182 | 100 |  |  |  | 3723 | if($self->end_repeat_hook){ | 
| 175 |  |  |  |  |  |  | # pass in self, test item, data, and result | 
| 176 | 5 |  |  |  |  | 86 | $self->end_repeat_hook->($self, $test_item, | 
| 177 |  |  |  |  |  |  | $iteration, $excluded_items, $result); | 
| 178 |  |  |  |  |  |  | } | 
| 179 | 182 |  |  |  |  | 9706 | push @item_results, $result; | 
| 180 | 182 |  |  |  |  | 777 | $iteration++; | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 178 | 100 |  |  |  | 8798 | if($self->end_test_hook){ | 
| 184 | 175 |  |  |  |  | 2974 | $self->end_test_hook->($self, $test_item, @item_results); | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  |  | 
| 187 | 178 |  |  |  |  | 7166 | push @all_results, @item_results; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 4 | 50 |  |  |  | 15 | if($log->is_info){ | 
| 191 | 0 |  |  |  |  | 0 | my ( $sec, $min, $hour ) = localtime(); | 
| 192 | 0 |  |  |  |  | 0 | $log->info( | 
| 193 |  |  |  |  |  |  | sprintf( "Time: %2s:%02s:%02s", $hour, $min, $sec ) ); | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 4 | 100 |  |  |  | 100 | if($self->end_hook){ | 
| 197 | 1 |  |  |  |  | 20 | $self->end_hook->($self, @all_results); | 
| 198 |  |  |  |  |  |  | } | 
| 199 | 4 |  |  |  |  | 3775 | $self->_set_test_set(undef); | 
| 200 | 4 |  |  |  |  | 18 | return @all_results; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | # log the summary printouts from the input result object | 
| 204 |  |  |  |  |  |  | sub _log_result { | 
| 205 | 0 |  |  | 0 |  | 0 | my ($result) = @_; | 
| 206 |  |  |  |  |  |  |  | 
| 207 | 0 |  |  |  |  | 0 | $log->info(${$result->statistical_summary}); | 
|  | 0 |  |  |  |  | 0 |  | 
| 208 |  |  |  |  |  |  |  | 
| 209 | 0 |  |  |  |  | 0 | $log->info(${$result->analogical_set_summary()}); | 
|  | 0 |  |  |  |  | 0 |  | 
| 210 |  |  |  |  |  |  |  | 
| 211 | 0 | 0 |  |  |  | 0 | if($log->is_debug){ | 
|  |  | 0 |  |  |  |  |  | 
| 212 | 0 |  |  |  |  | 0 | $log->debug(${ $result->gang_summary(1) }); | 
|  | 0 |  |  |  |  | 0 |  | 
| 213 |  |  |  |  |  |  | }elsif($log->is_info){ | 
| 214 | 0 |  |  |  |  | 0 | $log->info(${ $result->gang_summary(0) }) | 
|  | 0 |  |  |  |  | 0 |  | 
| 215 |  |  |  |  |  |  | } | 
| 216 | 0 |  |  |  |  | 0 | return; | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | # create the training set for this iteration, calling training_item_hook and | 
| 220 |  |  |  |  |  |  | # updating excluded_items along the way | 
| 221 |  |  |  |  |  |  | sub _make_training_set { | 
| 222 | 182 |  |  | 182 |  | 391 | my ($self, $test_item, $iteration) = @_; | 
| 223 | 182 |  |  |  |  | 338 | my $training_set; | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | # $self->_set_excluded_items([]); | 
| 226 |  |  |  |  |  |  | my @excluded_items; | 
| 227 |  |  |  |  |  |  | # Cap the amount of considered data if specified | 
| 228 | 182 | 100 |  |  |  | 2600 | my $max = defined $self->max_training_items ? | 
| 229 |  |  |  |  |  |  | int($self->max_training_items) : | 
| 230 |  |  |  |  |  |  | $self->training_set->size; | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | # use the original DataSet object if there are no settings | 
| 233 |  |  |  |  |  |  | # that would trim items from it | 
| 234 | 182 | 100 | 66 |  |  | 2687 | if(!$self->training_item_hook && | 
|  |  |  | 66 |  |  |  |  | 
| 235 |  |  |  |  |  |  | ($self->probability == 1) && | 
| 236 |  |  |  |  |  |  | $max >= $self->training_set->size){ | 
| 237 | 177 |  |  |  |  | 2270 | $training_set = $self->training_set; | 
| 238 |  |  |  |  |  |  | }else{ | 
| 239 |  |  |  |  |  |  | # otherwise, make a new set with just the selected | 
| 240 |  |  |  |  |  |  | # items | 
| 241 | 5 |  |  |  |  | 96 | $training_set = Algorithm::AM::DataSet->new( | 
| 242 |  |  |  |  |  |  | cardinality => $self->training_set->cardinality); | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | # don't try to add more items than we have! | 
| 245 | 5 | 100 |  |  |  | 75 | my $num_items = ($max > $self->training_set->size) ? | 
| 246 |  |  |  |  |  |  | $self->training_set->size : | 
| 247 |  |  |  |  |  |  | $max; | 
| 248 | 5 |  |  |  |  | 19 | for my $data_index ( 0 .. $num_items - 1 ) { | 
| 249 | 25 |  |  |  |  | 366 | my $training_item = | 
| 250 |  |  |  |  |  |  | $self->training_set->get_item($data_index); | 
| 251 |  |  |  |  |  |  | # skip this data item if the training_item_hook returns false | 
| 252 | 25 | 100 | 66 |  |  | 325 | if($self->training_item_hook && | 
| 253 |  |  |  |  |  |  | !$self->training_item_hook->($self, | 
| 254 |  |  |  |  |  |  | $test_item, $iteration, $training_item) | 
| 255 |  |  |  |  |  |  | ){ | 
| 256 | 5 |  |  |  |  | 98 | push @excluded_items, $training_item; | 
| 257 | 5 |  |  |  |  | 9 | next; | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  | # skip this data item with probability $self->{probability} | 
| 260 | 20 | 50 | 33 |  |  | 24592 | if($self->probability != 1 && | 
| 261 |  |  |  |  |  |  | rand() > $self->probability){ | 
| 262 | 0 |  |  |  |  | 0 | push @excluded_items, $training_item; | 
| 263 | 0 |  |  |  |  | 0 | next; | 
| 264 |  |  |  |  |  |  | } | 
| 265 | 20 |  |  |  |  | 155 | $training_set->add_item($training_item); | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  | } | 
| 268 |  |  |  |  |  |  | # $self->_set_excluded_items(\@excluded_items); | 
| 269 | 182 |  |  |  |  | 1017 | return ($training_set, \@excluded_items); | 
| 270 |  |  |  |  |  |  | } | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | sub _set_test_set { | 
| 273 | 8 |  |  | 8 |  | 20 | my ($self, $test_set) = @_; | 
| 274 | 8 |  |  |  |  | 16 | $self->{test_set} = $test_set; | 
| 275 | 8 |  |  |  |  | 11 | return; | 
| 276 |  |  |  |  |  |  | } | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  | 1; | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | __END__ |