| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # encapsulate information about a single classification result | 
| 2 |  |  |  |  |  |  | package Algorithm::AM::Result; | 
| 3 | 10 |  |  | 10 |  | 59 | use strict; | 
|  | 10 |  |  |  |  | 20 |  | 
|  | 10 |  |  |  |  | 255 |  | 
| 4 | 10 |  |  | 10 |  | 50 | use warnings; | 
|  | 10 |  |  |  |  | 19 |  | 
|  | 10 |  |  |  |  | 383 |  | 
| 5 |  |  |  |  |  |  | our $VERSION = '3.11'; | 
| 6 |  |  |  |  |  |  | # ABSTRACT: Store results of an AM classification | 
| 7 | 10 |  |  | 10 |  | 3490 | use Text::Table; | 
|  | 10 |  |  |  |  | 145735 |  | 
|  | 10 |  |  |  |  | 1112 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | #pod =head2 SYNOPSIS | 
| 10 |  |  |  |  |  |  | #pod | 
| 11 |  |  |  |  |  |  | #pod   use Algorithm::AM; | 
| 12 |  |  |  |  |  |  | #pod | 
| 13 |  |  |  |  |  |  | #pod   my $am = Algorithm::AM->new('finnverb', -commas => 'no'); | 
| 14 |  |  |  |  |  |  | #pod   my ($result) = $am->classify; | 
| 15 |  |  |  |  |  |  | #pod   print @{ $result->winners }; | 
| 16 |  |  |  |  |  |  | #pod   print $result->statistical_summary; | 
| 17 |  |  |  |  |  |  | #pod | 
| 18 |  |  |  |  |  |  | #pod =head2 DESCRIPTION | 
| 19 |  |  |  |  |  |  | #pod | 
| 20 |  |  |  |  |  |  | #pod This package encapsulates all of the classification information | 
| 21 |  |  |  |  |  |  | #pod generated by L, including the assigned class, | 
| 22 |  |  |  |  |  |  | #pod score to each class, gang effects, analogical sets, | 
| 23 |  |  |  |  |  |  | #pod and timing information. It also provides several methods for | 
| 24 |  |  |  |  |  |  | #pod generating printable reports with this information. | 
| 25 |  |  |  |  |  |  | #pod | 
| 26 |  |  |  |  |  |  | #pod Note that the words 'score' and 'point' are used here to represent | 
| 27 |  |  |  |  |  |  | #pod whatever count is assigned by analogical modeling during | 
| 28 |  |  |  |  |  |  | #pod classification. This can be either pointers or occurrences. For an | 
| 29 |  |  |  |  |  |  | #pod explanation of this, see L. | 
| 30 |  |  |  |  |  |  | #pod | 
| 31 |  |  |  |  |  |  | #pod All of the scores returned by the methods here are scalars with | 
| 32 |  |  |  |  |  |  | #pod special PV and NV values. You should excercise caution when doing | 
| 33 |  |  |  |  |  |  | #pod calculations with them. See L for more | 
| 34 |  |  |  |  |  |  | #pod information. | 
| 35 |  |  |  |  |  |  | #pod | 
| 36 |  |  |  |  |  |  | #pod =cut | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | ## TODO: variables consider exporting someday | 
| 39 |  |  |  |  |  |  | ## @itemcontextchain | 
| 40 |  |  |  |  |  |  | ## %itemcontextchainhead | 
| 41 |  |  |  |  |  |  | ## %context_to_class | 
| 42 |  |  |  |  |  |  | ## %contextsize | 
| 43 |  |  |  |  |  |  | use Class::Tiny qw( | 
| 44 |  |  |  |  |  |  | exclude_nulls | 
| 45 |  |  |  |  |  |  | given_excluded | 
| 46 |  |  |  |  |  |  | cardinality | 
| 47 |  |  |  |  |  |  | test_in_train | 
| 48 |  |  |  |  |  |  | test_item | 
| 49 |  |  |  |  |  |  | count_method | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | start_time | 
| 52 |  |  |  |  |  |  | end_time | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | training_set | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | scores | 
| 57 |  |  |  |  |  |  | high_score | 
| 58 |  |  |  |  |  |  | total_points | 
| 59 |  |  |  |  |  |  | winners | 
| 60 |  |  |  |  |  |  | is_tie | 
| 61 |  |  |  |  |  |  | result | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | scores_normalized | 
| 64 |  |  |  |  |  |  | ), { | 
| 65 |  |  |  |  |  |  | 'scores_normalized' => sub { | 
| 66 | 1 |  |  |  |  | 905 | my ($self) = @_; | 
| 67 | 1 |  |  |  |  | 16 | my $total_points = $self->total_points; | 
| 68 | 1 |  |  |  |  | 18 | my $scores = $self->scores; | 
| 69 | 1 |  |  |  |  | 6 | my $normalized = {}; | 
| 70 | 1 |  |  |  |  | 4 | for my $class (keys %$scores){ | 
| 71 | 2 |  |  |  |  | 5 | $normalized->{$class} = $scores->{$class} / $total_points | 
| 72 |  |  |  |  |  |  | } | 
| 73 | 1 |  |  |  |  | 8 | return $normalized; | 
| 74 |  |  |  |  |  |  | }, | 
| 75 | 10 |  |  | 10 |  | 92 | }; | 
|  | 10 |  |  |  |  | 51 |  | 
|  | 10 |  |  |  |  | 144 |  | 
| 76 | 10 |  |  | 10 |  | 16532 | use Carp 'croak'; | 
|  | 10 |  |  |  |  | 27 |  | 
|  | 10 |  |  |  |  | 494 |  | 
| 77 | 10 |  |  | 10 |  | 3448 | use Algorithm::AM::BigInt 'bigcmp'; | 
|  | 10 |  |  |  |  | 54 |  | 
|  | 10 |  |  |  |  | 19115 |  | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | # For printing percentages in reports | 
| 80 |  |  |  |  |  |  | my $percentage_format = '%.3f'; | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  | #pod =head1 REPORT METHODS | 
| 83 |  |  |  |  |  |  | #pod | 
| 84 |  |  |  |  |  |  | #pod The methods below return human eye-friendly reports about the | 
| 85 |  |  |  |  |  |  | #pod classification. The return value is a reference, so it must be | 
| 86 |  |  |  |  |  |  | #pod dereferenced for printing like so: | 
| 87 |  |  |  |  |  |  | #pod | 
| 88 |  |  |  |  |  |  | #pod  print ${ $result->statistical_summary }; | 
| 89 |  |  |  |  |  |  | #pod | 
| 90 |  |  |  |  |  |  | #pod =head2 C | 
| 91 |  |  |  |  |  |  | #pod | 
| 92 |  |  |  |  |  |  | #pod Returns a scalar (string) ref containing information about the | 
| 93 |  |  |  |  |  |  | #pod configuration at the time of classification. Information from the | 
| 94 |  |  |  |  |  |  | #pod following accessors is included: | 
| 95 |  |  |  |  |  |  | #pod | 
| 96 |  |  |  |  |  |  | #pod     exclude_nulls | 
| 97 |  |  |  |  |  |  | #pod     given_excluded | 
| 98 |  |  |  |  |  |  | #pod     cardinality | 
| 99 |  |  |  |  |  |  | #pod     test_in_train | 
| 100 |  |  |  |  |  |  | #pod     test_item | 
| 101 |  |  |  |  |  |  | #pod     count_method | 
| 102 |  |  |  |  |  |  | #pod | 
| 103 |  |  |  |  |  |  | #pod =cut | 
| 104 |  |  |  |  |  |  | sub config_info { | 
| 105 | 2 |  |  | 2 | 1 | 1780 | my ($self) = @_; | 
| 106 | 2 |  |  |  |  | 5 | my @headers = ('Option', 'Setting'); | 
| 107 |  |  |  |  |  |  | my @rows = ( | 
| 108 | 2 | 100 |  |  |  | 4 | [ "Given context", (join ' ', @{$self->test_item->features}) . | 
|  | 2 | 100 |  |  |  | 44 |  | 
|  |  | 100 |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | ', ' . $self->test_item->comment], | 
| 110 |  |  |  |  |  |  | [ "Nulls", ($self->exclude_nulls ? 'exclude' : 'include')], | 
| 111 |  |  |  |  |  |  | [ "Gang",  $self->count_method], | 
| 112 |  |  |  |  |  |  | [ "Test item in training set", ($self->test_in_train ? 'yes' : 'no')], | 
| 113 |  |  |  |  |  |  | [ "Test item excluded", ($self->given_excluded ? 'yes' : 'no')], | 
| 114 |  |  |  |  |  |  | [ "Size of training set", $self->training_set->size ], | 
| 115 |  |  |  |  |  |  | [ "Number of active features", $self->cardinality ], | 
| 116 |  |  |  |  |  |  | ); | 
| 117 | 2 |  |  |  |  | 16 | my @table = _make_table(\@headers, \@rows); | 
| 118 | 2 |  |  |  |  | 458 | my $info = join '', @table; | 
| 119 | 2 |  |  |  |  | 11 | return \$info; | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | # input several variables from AM's guts (sum, pointers, | 
| 123 |  |  |  |  |  |  | # itemcontextchainhead and itemcontextchain). Calculate the | 
| 124 |  |  |  |  |  |  | # prediction statistics, and | 
| 125 |  |  |  |  |  |  | # store information needed for computing analogical sets. | 
| 126 |  |  |  |  |  |  | # Set result to tie/correct/incorrect and also is_tie if | 
| 127 |  |  |  |  |  |  | # expected class is provided, and high_score, scores, winners, and | 
| 128 |  |  |  |  |  |  | # total_points. | 
| 129 |  |  |  |  |  |  | sub _process_stats { | 
| 130 | 193 |  |  | 193 |  | 587 | my ($self, $sum, $pointers, | 
| 131 |  |  |  |  |  |  | $itemcontextchainhead, $itemcontextchain, $context_to_class, | 
| 132 |  |  |  |  |  |  | $gang, $active_feats, $contextsize) = @_; | 
| 133 | 193 |  |  |  |  | 387 | my $total_points = $pointers->{grandtotal}; | 
| 134 | 193 |  |  |  |  | 324 | my $max = ''; | 
| 135 | 193 |  |  |  |  | 329 | my @winners; | 
| 136 |  |  |  |  |  |  | my %scores; | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | # iterate all possible classes and store the ones that have a | 
| 139 |  |  |  |  |  |  | # non-zero score. Store the high-scorers, as well. | 
| 140 |  |  |  |  |  |  | # 1) find which one(s) has the highest score (the prediction) and | 
| 141 |  |  |  |  |  |  | # 2) print out the ones with scores (probability of prediction) | 
| 142 | 193 |  |  |  |  | 2838 | for my $class_index (1 .. $self->training_set->num_classes) { | 
| 143 | 559 |  |  |  |  | 672 | my $class_score; | 
| 144 |  |  |  |  |  |  | # skip classes with no score | 
| 145 | 559 | 100 |  |  |  | 1149 | next unless $class_score = $sum->[$class_index]; | 
| 146 |  |  |  |  |  |  |  | 
| 147 | 355 |  |  |  |  | 4870 | my $class = $self->training_set->_class_for_index($class_index); | 
| 148 | 355 |  |  |  |  | 938 | $scores{$class} = $class_score; | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | # check if the class has the highest score, or ties for it | 
| 151 | 355 |  |  |  |  | 479 | do { | 
| 152 | 355 |  |  |  |  | 892 | my $cmp = bigcmp($class_score, $max); | 
| 153 | 355 | 100 |  |  |  | 891 | if ($cmp > 0){ | 
|  |  | 100 |  |  |  |  |  | 
| 154 | 274 |  |  |  |  | 739 | @winners = ($class); | 
| 155 | 274 |  |  |  |  | 510 | $max = $class_score; | 
| 156 |  |  |  |  |  |  | }elsif($cmp == 0){ | 
| 157 | 2 |  |  |  |  | 7 | push @winners, $class; | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  | }; | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | # set result to tie/correct/incorrect after comparing | 
| 163 |  |  |  |  |  |  | # expected/actual class labels. Only do this if the expected | 
| 164 |  |  |  |  |  |  | # class label is known. | 
| 165 | 193 | 100 |  |  |  | 2846 | if(my $expected = $self->test_item->class){ | 
| 166 | 191 | 100 | 100 |  |  | 732 | if(exists $scores{$expected} && | 
| 167 |  |  |  |  |  |  | bigcmp($scores{$expected}, $max) == 0){ | 
| 168 | 178 | 100 |  |  |  | 408 | if(@winners > 1){ | 
| 169 | 2 |  |  |  |  | 33 | $self->result('tie'); | 
| 170 |  |  |  |  |  |  | }else{ | 
| 171 | 176 |  |  |  |  | 2550 | $self->result('correct'); | 
| 172 |  |  |  |  |  |  | } | 
| 173 |  |  |  |  |  |  | }else{ | 
| 174 | 13 |  |  |  |  | 180 | $self->result('incorrect'); | 
| 175 |  |  |  |  |  |  | } | 
| 176 |  |  |  |  |  |  | } | 
| 177 | 193 | 100 |  |  |  | 1203 | if(@winners > 1){ | 
| 178 | 2 |  |  |  |  | 29 | $self->is_tie(1); | 
| 179 |  |  |  |  |  |  | } | 
| 180 | 193 |  |  |  |  | 3096 | $self->high_score($max); | 
| 181 | 193 |  |  |  |  | 3406 | $self->scores(\%scores); | 
| 182 | 193 |  |  |  |  | 3402 | $self->winners(\@winners); | 
| 183 | 193 |  |  |  |  | 3169 | $self->total_points($total_points); | 
| 184 | 193 |  |  |  |  | 1020 | $self->{pointers} = $pointers; | 
| 185 | 193 |  |  |  |  | 600 | $self->{itemcontextchainhead} = $itemcontextchainhead; | 
| 186 | 193 |  |  |  |  | 431 | $self->{itemcontextchain} = $itemcontextchain; | 
| 187 | 193 |  |  |  |  | 395 | $self->{context_to_class} = $context_to_class; | 
| 188 | 193 |  |  |  |  | 311 | $self->{gang} = $gang; | 
| 189 | 193 |  |  |  |  | 401 | $self->{active_feats} = $active_feats; | 
| 190 | 193 |  |  |  |  | 376 | $self->{contextsize} = $contextsize; | 
| 191 | 193 |  |  |  |  | 494 | return; | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | #pod =head2 C | 
| 195 |  |  |  |  |  |  | #pod | 
| 196 |  |  |  |  |  |  | #pod Returns a scalar reference (string) containing a statistical summary | 
| 197 |  |  |  |  |  |  | #pod of the classification results. The summary includes all possible | 
| 198 |  |  |  |  |  |  | #pod predicted classes with their scores and percentage scores and the | 
| 199 |  |  |  |  |  |  | #pod total score for all classes. Whether the predicted class | 
| 200 |  |  |  |  |  |  | #pod is correct/incorrect/a tie of some sort is also included, if the | 
| 201 |  |  |  |  |  |  | #pod test item had a known class. | 
| 202 |  |  |  |  |  |  | #pod | 
| 203 |  |  |  |  |  |  | #pod =cut | 
| 204 |  |  |  |  |  |  | sub statistical_summary { | 
| 205 | 4 |  |  | 4 | 1 | 1809 | my ($self) = @_; | 
| 206 | 4 |  |  |  |  | 5 | my %scores = %{$self->scores}; | 
|  | 4 |  |  |  |  | 85 |  | 
| 207 | 4 |  |  |  |  | 82 | my $total_points = $self->total_points; | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | # Make a table with information about predictions for different | 
| 210 |  |  |  |  |  |  | # classes. Each row contains a class name, the score, | 
| 211 |  |  |  |  |  |  | # and the percentage predicted. | 
| 212 | 4 |  |  |  |  | 16 | my @rows; | 
| 213 | 4 |  |  |  |  | 15 | for my $class (sort keys %scores){ | 
| 214 |  |  |  |  |  |  | push @rows, [ $class, $scores{$class}, | 
| 215 |  |  |  |  |  |  | sprintf($percentage_format, | 
| 216 | 8 |  |  |  |  | 61 | 100 * $scores{$class} / $total_points) ]; | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  | # add a Total row | 
| 219 | 4 |  |  |  |  | 11 | push @rows, [ 'Total', $total_points ]; | 
| 220 |  |  |  |  |  |  |  | 
| 221 | 4 |  |  |  |  | 11 | my @table = _make_table(['Class', 'Score', 'Percentage'], | 
| 222 |  |  |  |  |  |  | \@rows); | 
| 223 |  |  |  |  |  |  | # copy the rule from the first row into the second to last row | 
| 224 |  |  |  |  |  |  | # to separate the Total row | 
| 225 | 4 |  |  |  |  | 817 | splice(@table, $#table - 1, 0, $table[0]); | 
| 226 |  |  |  |  |  |  |  | 
| 227 | 4 |  |  |  |  | 9 | my $info = "Statistical Summary\n"; | 
| 228 | 4 |  |  |  |  | 16 | $info .= join '', @table; | 
| 229 |  |  |  |  |  |  | # the predicted class (the one with the highest score) | 
| 230 |  |  |  |  |  |  | # and the result (correct/incorrect/tie). | 
| 231 | 4 | 100 |  |  |  | 93 | if ( defined (my $expected = $self->test_item->class) ) { | 
| 232 | 3 |  |  |  |  | 8 | $info .= "Expected class: $expected\n"; | 
| 233 | 3 |  |  |  |  | 21 | my $result = $self->result; | 
| 234 | 3 | 100 |  |  |  | 16 | if ( $result eq 'correct') { | 
|  |  | 100 |  |  |  |  |  | 
| 235 | 1 |  |  |  |  | 3 | $info .= "Correct class predicted.\n"; | 
| 236 |  |  |  |  |  |  | }elsif($result eq 'tie'){ | 
| 237 | 1 |  |  |  |  | 3 | $info .= "Prediction is a tie.\n"; | 
| 238 |  |  |  |  |  |  | }else { | 
| 239 | 1 |  |  |  |  | 3 | $info .= "Incorrect class predicted.\n"; | 
| 240 |  |  |  |  |  |  | } | 
| 241 |  |  |  |  |  |  | }else{ | 
| 242 | 1 |  |  |  |  | 4 | $info .= "Expected class unknown\n"; | 
| 243 |  |  |  |  |  |  | } | 
| 244 | 4 |  |  |  |  | 21 | return \$info; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | #TODO: the keys for this set don't seem to make any sense. | 
| 248 |  |  |  |  |  |  | sub analogical_set { | 
| 249 | 3 |  |  | 3 | 1 | 4542 | my ($self) = @_; | 
| 250 | 3 | 50 |  |  |  | 14 | if(!exists $self->{_analogical_set}){ | 
| 251 | 3 |  |  |  |  | 13 | $self->_calculate_analogical_set; | 
| 252 |  |  |  |  |  |  | } | 
| 253 |  |  |  |  |  |  | # make a safe copy | 
| 254 | 3 |  |  |  |  | 6 | my %set = %{$self->{_analogical_set}}; | 
|  | 3 |  |  |  |  | 13 |  | 
| 255 | 3 |  |  |  |  | 9 | return \%set; | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  |  | 
| 258 |  |  |  |  |  |  | #pod =head2 C | 
| 259 |  |  |  |  |  |  | #pod | 
| 260 |  |  |  |  |  |  | #pod Returns a scalar reference (string) containing the analogical set, | 
| 261 |  |  |  |  |  |  | #pod meaning all items that contributed to the predicted class, along | 
| 262 |  |  |  |  |  |  | #pod with the amount contributed by each item (score and | 
| 263 |  |  |  |  |  |  | #pod percentage overall). Items are ordered by appearance in the data | 
| 264 |  |  |  |  |  |  | #pod set. | 
| 265 |  |  |  |  |  |  | #pod | 
| 266 |  |  |  |  |  |  | #pod =cut | 
| 267 |  |  |  |  |  |  | sub analogical_set_summary { | 
| 268 | 1 |  |  | 1 | 1 | 1164 | my ($self) = @_; | 
| 269 | 1 |  |  |  |  | 5 | my $set = $self->analogical_set; | 
| 270 | 1 |  |  |  |  | 17 | my $total_points = $self->total_points; | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | # Make a table for the analogical set. Each row contains an | 
| 273 |  |  |  |  |  |  | # item with its class, comment, score, and the percentage | 
| 274 |  |  |  |  |  |  | # of total score contributed. | 
| 275 | 1 |  |  |  |  | 4 | my @rows; | 
| 276 | 1 |  |  |  |  | 6 | foreach my $id (sort keys %$set){ | 
| 277 | 4 |  |  |  |  | 7 | my $entry = $set->{$id}; | 
| 278 | 4 |  |  |  |  | 5 | my $score = $entry->{score}; | 
| 279 |  |  |  |  |  |  | push @rows, [ | 
| 280 |  |  |  |  |  |  | $entry->{item}->class, | 
| 281 |  |  |  |  |  |  | $entry->{item}->comment, | 
| 282 | 4 |  |  |  |  | 10 | $score, | 
| 283 |  |  |  |  |  |  | sprintf($percentage_format, 100 * $score / $total_points) | 
| 284 |  |  |  |  |  |  | ]; | 
| 285 |  |  |  |  |  |  | } | 
| 286 | 1 |  |  |  |  | 6 | my @table = _make_table( | 
| 287 |  |  |  |  |  |  | ['Class', 'Item', 'Score', 'Percentage'], \@rows); | 
| 288 | 1 |  |  |  |  | 250 | my $info = "Analogical Set\nTotal Frequency = $total_points\n"; | 
| 289 | 1 |  |  |  |  | 5 | $info .= join '', @table; | 
| 290 | 1 |  |  |  |  | 6 | return \$info; | 
| 291 |  |  |  |  |  |  | } | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | # calculate and store analogical effects in $self->{_analogical_set} | 
| 294 |  |  |  |  |  |  | sub _calculate_analogical_set { | 
| 295 | 3 |  |  | 3 |  | 6 | my ($self) = @_; | 
| 296 | 3 |  |  |  |  | 73 | my $train = $self->training_set; | 
| 297 | 3 |  |  |  |  | 15 | my %set; | 
| 298 | 3 |  |  |  |  | 5 | foreach my $context ( keys %{$self->{pointers}} ) { | 
|  | 3 |  |  |  |  | 12 |  | 
| 299 |  |  |  |  |  |  | next unless | 
| 300 | 11 | 100 |  |  |  | 23 | exists $self->{itemcontextchainhead}->{$context}; | 
| 301 | 8 |  |  |  |  | 20 | for ( | 
| 302 |  |  |  |  |  |  | my $index = $self->{itemcontextchainhead}->{$context}; | 
| 303 |  |  |  |  |  |  | defined $index; | 
| 304 |  |  |  |  |  |  | $index = $self->{itemcontextchain}->[$index] | 
| 305 |  |  |  |  |  |  | ) | 
| 306 |  |  |  |  |  |  | { | 
| 307 | 10 |  |  |  |  | 22 | my $item = $train->get_item($index); | 
| 308 |  |  |  |  |  |  | $set{$item->id} = { | 
| 309 |  |  |  |  |  |  | item => $item, | 
| 310 | 10 |  |  |  |  | 33 | score => $self->{pointers}->{$context} | 
| 311 |  |  |  |  |  |  | }; | 
| 312 |  |  |  |  |  |  | } | 
| 313 |  |  |  |  |  |  | } | 
| 314 | 3 |  |  |  |  | 9 | $self->{_analogical_set} = \%set; | 
| 315 | 3 |  |  |  |  | 6 | return; | 
| 316 |  |  |  |  |  |  | } | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | sub gang_effects { | 
| 319 | 4 |  |  | 4 | 1 | 627 | my ($self) = @_; | 
| 320 | 4 | 100 |  |  |  | 16 | if(!$self->{_gang_effects}){ | 
| 321 | 3 |  |  |  |  | 11 | $self->_calculate_gangs; | 
| 322 |  |  |  |  |  |  | } | 
| 323 | 4 |  |  |  |  | 11 | return $self->{_gang_effects}; | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | #pod =head2 C | 
| 327 |  |  |  |  |  |  | #pod | 
| 328 |  |  |  |  |  |  | #pod Returns a scalar reference (string) containing the gang effects on the | 
| 329 |  |  |  |  |  |  | #pod final class prediction. | 
| 330 |  |  |  |  |  |  | #pod | 
| 331 |  |  |  |  |  |  | #pod A single boolean parameter can be provided to turn on list printing, | 
| 332 |  |  |  |  |  |  | #pod meaning gang items items are printed. This is false (off) by default. | 
| 333 |  |  |  |  |  |  | #pod | 
| 334 |  |  |  |  |  |  | #pod =cut | 
| 335 |  |  |  |  |  |  | sub gang_summary { | 
| 336 | 3 |  |  | 3 | 1 | 2105 | my ($self, $print_list) = @_; | 
| 337 | 3 |  |  |  |  | 71 | my $test_item = $self->test_item; | 
| 338 |  |  |  |  |  |  |  | 
| 339 | 3 |  |  |  |  | 19 | my $gangs = $self->gang_effects; | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | # Make a table for the gangs with these rows: | 
| 342 |  |  |  |  |  |  | #   Percentage | 
| 343 |  |  |  |  |  |  | #   Score | 
| 344 |  |  |  |  |  |  | #   Num | 
| 345 |  |  |  |  |  |  | #   Class | 
| 346 |  |  |  |  |  |  | #   Features | 
| 347 |  |  |  |  |  |  | #   item comment | 
| 348 | 3 |  |  |  |  | 5 | my @rows; | 
| 349 |  |  |  |  |  |  | # first row is a header with test item for easy reference | 
| 350 |  |  |  |  |  |  | push @rows, [ | 
| 351 |  |  |  |  |  |  | 'Context', | 
| 352 |  |  |  |  |  |  | undef, | 
| 353 |  |  |  |  |  |  | undef, | 
| 354 |  |  |  |  |  |  | undef, | 
| 355 | 3 |  |  |  |  | 6 | @{$test_item->features}, | 
|  | 3 |  |  |  |  | 10 |  | 
| 356 |  |  |  |  |  |  | ]; | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | # store the number of rows added for each gang | 
| 359 |  |  |  |  |  |  | # will help with printing later | 
| 360 | 3 |  |  |  |  | 5 | my @gang_rows; | 
| 361 | 3 |  |  |  |  | 5 | my $current_row = -1; | 
| 362 |  |  |  |  |  |  | # add information for each gang; sort by order of highest to | 
| 363 |  |  |  |  |  |  | # lowest effect | 
| 364 | 3 |  |  |  |  | 7 | foreach my $gang (@$gangs){ | 
| 365 | 10 |  |  |  |  | 12 | $current_row++; | 
| 366 | 10 |  |  |  |  | 14 | $gang_rows[$current_row]++; | 
| 367 | 10 |  |  |  |  | 15 | my $features = $gang->{features}; | 
| 368 |  |  |  |  |  |  | # add the gang supracontext, effect and score | 
| 369 |  |  |  |  |  |  | push @rows, [ | 
| 370 |  |  |  |  |  |  | sprintf($percentage_format, 100 * $gang->{effect}), | 
| 371 |  |  |  |  |  |  | $gang->{score}, | 
| 372 |  |  |  |  |  |  | undef, | 
| 373 |  |  |  |  |  |  | undef, | 
| 374 |  |  |  |  |  |  | # print undefined feature slots as asterisks | 
| 375 | 10 | 100 |  |  |  | 61 | map {length($_) ? $_ : '*'} @$features | 
|  | 58 |  |  |  |  | 110 |  | 
| 376 |  |  |  |  |  |  | ]; | 
| 377 |  |  |  |  |  |  | # add each class in the gang, along with the total number | 
| 378 |  |  |  |  |  |  | # and effect of the gang items supporting it | 
| 379 | 10 |  |  |  |  | 19 | for my $class (sort keys %{ $gang->{class} }){ | 
|  | 10 |  |  |  |  | 30 |  | 
| 380 | 12 |  |  |  |  | 16 | $gang_rows[$current_row]++; | 
| 381 |  |  |  |  |  |  | push @rows, [ | 
| 382 |  |  |  |  |  |  | sprintf($percentage_format, | 
| 383 |  |  |  |  |  |  | 100 * $gang->{class}->{$class}->{effect}), | 
| 384 |  |  |  |  |  |  | $gang->{class}->{$class}->{score}, | 
| 385 | 12 |  |  |  |  | 42 | scalar @{ $gang->{data}->{$class} }, | 
|  | 12 |  |  |  |  | 33 |  | 
| 386 |  |  |  |  |  |  | $class, | 
| 387 |  |  |  |  |  |  | undef | 
| 388 |  |  |  |  |  |  | ]; | 
| 389 | 12 | 100 |  |  |  | 26 | if($print_list){ | 
| 390 |  |  |  |  |  |  | # add the list of items in the given context | 
| 391 | 4 |  |  |  |  | 6 | for my $item (@{ $gang->{data}->{$class} }){ | 
|  | 4 |  |  |  |  | 7 |  | 
| 392 | 4 |  |  |  |  | 5 | $gang_rows[$current_row]++; | 
| 393 |  |  |  |  |  |  | push @rows, [ | 
| 394 |  |  |  |  |  |  | undef, | 
| 395 |  |  |  |  |  |  | undef, | 
| 396 |  |  |  |  |  |  | undef, | 
| 397 |  |  |  |  |  |  | undef, | 
| 398 | 4 |  |  |  |  | 6 | @{ $item->features }, | 
|  | 4 |  |  |  |  | 8 |  | 
| 399 |  |  |  |  |  |  | $item->comment, | 
| 400 |  |  |  |  |  |  | ]; | 
| 401 |  |  |  |  |  |  | } | 
| 402 |  |  |  |  |  |  | } | 
| 403 |  |  |  |  |  |  | } | 
| 404 |  |  |  |  |  |  | } | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | # construct the table from the rows | 
| 407 |  |  |  |  |  |  | my @headers = ( | 
| 408 |  |  |  |  |  |  | \'| ', | 
| 409 |  |  |  |  |  |  | 'Percentage' => \' | ', | 
| 410 |  |  |  |  |  |  | 'Score' => \' | ', | 
| 411 |  |  |  |  |  |  | 'Num Items' => \' | ', | 
| 412 |  |  |  |  |  |  | 'Class' => \' | ', | 
| 413 | 3 |  |  |  |  | 8 | ('' => \' ') x @{$test_item->features} | 
|  | 3 |  |  |  |  | 7 |  | 
| 414 |  |  |  |  |  |  | ); | 
| 415 | 3 |  |  |  |  | 6 | pop @headers; | 
| 416 | 3 | 100 |  |  |  | 8 | if($print_list){ | 
| 417 | 1 |  |  |  |  | 3 | push @headers, \' | ', 'Item Comment'; | 
| 418 |  |  |  |  |  |  | } | 
| 419 | 3 |  |  |  |  | 3 | push @headers, \' |'; | 
| 420 | 3 |  |  |  |  | 5 | my @rule = qw(- +); | 
| 421 | 3 |  |  |  |  | 17 | my $table = Text::Table->new(@headers); | 
| 422 | 3 |  |  |  |  | 14139 | $table->load(@rows); | 
| 423 |  |  |  |  |  |  | # main header | 
| 424 | 3 |  |  |  |  | 2636 | $current_row = 0; | 
| 425 | 3 |  |  |  |  | 8 | my $return = $table->rule(@rule) . | 
| 426 |  |  |  |  |  |  | $table->title . | 
| 427 |  |  |  |  |  |  | $table->body($current_row) . | 
| 428 |  |  |  |  |  |  | $table->rule(@rule); | 
| 429 | 3 |  |  |  |  | 140404 | $current_row++; | 
| 430 |  |  |  |  |  |  | # add info with a header for each gang | 
| 431 | 3 |  |  |  |  | 11 | for my $num (@gang_rows){ | 
| 432 |  |  |  |  |  |  | # a row of '*' separates each gang | 
| 433 | 10 |  |  |  |  | 23 | $return .= $table->rule('*','*') . | 
| 434 |  |  |  |  |  |  | $table->body($current_row) . | 
| 435 |  |  |  |  |  |  | $table->rule(@rule); | 
| 436 | 10 |  |  |  |  | 4706 | $current_row++; | 
| 437 | 10 |  |  |  |  | 48 | for(1 .. $num - 1){ | 
| 438 | 16 |  |  |  |  | 34 | $return .= $table->body($current_row); | 
| 439 | 16 |  |  |  |  | 1042 | $current_row++; | 
| 440 |  |  |  |  |  |  | } | 
| 441 |  |  |  |  |  |  | } | 
| 442 | 3 |  |  |  |  | 11 | $return .= $table->rule(@rule); | 
| 443 | 3 |  |  |  |  | 749 | return \$return; | 
| 444 |  |  |  |  |  |  | } | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | sub _calculate_gangs { | 
| 447 | 3 |  |  | 3 |  | 6 | my ($self) = @_; | 
| 448 | 3 |  |  |  |  | 85 | my $train = $self->training_set; | 
| 449 | 3 |  |  |  |  | 55 | my $total_points = $self->total_points; | 
| 450 | 3 |  |  |  |  | 18 | my $raw_gang = $self->{gang}; | 
| 451 | 3 |  |  |  |  | 6 | my @gangs; | 
| 452 |  |  |  |  |  |  |  | 
| 453 | 3 |  |  |  |  | 5 | foreach my $context (keys %{$raw_gang}) | 
|  | 3 |  |  |  |  | 14 |  | 
| 454 |  |  |  |  |  |  | { | 
| 455 | 10 |  |  |  |  | 15 | my $gang = {}; | 
| 456 | 10 |  |  |  |  | 24 | my @features = $self->_unpack_supracontext($context); | 
| 457 |  |  |  |  |  |  | # for now, store gangs by the supracontext printout | 
| 458 | 10 | 100 |  |  |  | 23 | my $key = join ' ', map {length($_) ? $_ : '-'} @features; | 
|  | 58 |  |  |  |  | 129 |  | 
| 459 | 10 |  |  |  |  | 29 | $gang->{score} = $raw_gang->{$context}; | 
| 460 | 10 |  |  |  |  | 26 | $gang->{effect} = $raw_gang->{$context} / $total_points; | 
| 461 | 10 |  |  |  |  | 15 | $gang->{features} = \@features; | 
| 462 |  |  |  |  |  |  |  | 
| 463 | 10 |  |  |  |  | 20 | my $p = $self->{pointers}->{$context}; | 
| 464 |  |  |  |  |  |  | # if the supracontext is homogenous | 
| 465 | 10 | 100 |  |  |  | 24 | if ( my $class_index = $self->{context_to_class}->{$context} ) { | 
| 466 |  |  |  |  |  |  | # store a 'homogenous' key that indicates this, besides | 
| 467 |  |  |  |  |  |  | # indicating the unanimous class prediction. | 
| 468 | 8 |  |  |  |  | 25 | my $class = $train->_class_for_index($class_index); | 
| 469 | 8 |  |  |  |  | 13 | $gang->{homogenous} = $class; | 
| 470 | 8 |  |  |  |  | 13 | my @data; | 
| 471 | 8 |  |  |  |  | 23 | for ( | 
| 472 |  |  |  |  |  |  | my $index = $self->{itemcontextchainhead}->{$context}; | 
| 473 |  |  |  |  |  |  | defined $index; | 
| 474 |  |  |  |  |  |  | $index = $self->{itemcontextchain}->[$index] | 
| 475 |  |  |  |  |  |  | ) | 
| 476 |  |  |  |  |  |  | { | 
| 477 | 9 |  |  |  |  | 24 | push @data, $train->get_item($index); | 
| 478 |  |  |  |  |  |  | } | 
| 479 | 8 |  |  |  |  | 19 | $gang->{data}->{$class} = \@data; | 
| 480 | 8 |  |  |  |  | 15 | $gang->{size} = scalar @data; | 
| 481 | 8 |  |  |  |  | 19 | $gang->{class}->{$class}->{score} = $p; | 
| 482 |  |  |  |  |  |  | $gang->{class}->{$class}->{effect} = | 
| 483 | 8 |  |  |  |  | 15 | $gang->{effect}; | 
| 484 |  |  |  |  |  |  | } | 
| 485 |  |  |  |  |  |  | # for heterogenous supracontexts we have to store data for | 
| 486 |  |  |  |  |  |  | # each class | 
| 487 |  |  |  |  |  |  | else { | 
| 488 | 2 |  |  |  |  | 5 | $gang->{homogenous} = 0; | 
| 489 |  |  |  |  |  |  | # first loop through the data and sort by class, also | 
| 490 |  |  |  |  |  |  | # finding the total gang size | 
| 491 | 2 |  |  |  |  | 4 | my $size = 0; | 
| 492 | 2 |  |  |  |  | 2 | my %data; | 
| 493 | 2 |  |  |  |  | 9 | for ( | 
| 494 |  |  |  |  |  |  | my $index = $self->{itemcontextchainhead}->{$context}; | 
| 495 |  |  |  |  |  |  | defined $index; | 
| 496 |  |  |  |  |  |  | $index = $self->{itemcontextchain}->[$index] | 
| 497 |  |  |  |  |  |  | ) | 
| 498 |  |  |  |  |  |  | { | 
| 499 | 4 |  |  |  |  | 13 | my $item = $train->get_item($index); | 
| 500 | 4 |  |  |  |  | 6 | push @{ $data{$item->class} }, $item; | 
|  | 4 |  |  |  |  | 8 |  | 
| 501 | 4 |  |  |  |  | 13 | $size++; | 
| 502 |  |  |  |  |  |  | } | 
| 503 | 2 |  |  |  |  | 5 | $gang->{data} = \%data; | 
| 504 | 2 |  |  |  |  | 3 | $gang->{size} = $size; | 
| 505 |  |  |  |  |  |  |  | 
| 506 |  |  |  |  |  |  | # then store aggregate statistics for each class | 
| 507 | 2 |  |  |  |  | 7 | for my $class (keys %data){ | 
| 508 | 4 |  |  |  |  | 9 | $gang->{class}->{$class}->{score} = $p; | 
| 509 |  |  |  |  |  |  | $gang->{class}->{$class}->{effect} = | 
| 510 |  |  |  |  |  |  | # score*num_data/total | 
| 511 | 4 |  |  |  |  | 6 | @{ $data{$class} } * $p / $total_points; | 
|  | 4 |  |  |  |  | 12 |  | 
| 512 |  |  |  |  |  |  | } | 
| 513 |  |  |  |  |  |  | } | 
| 514 | 10 |  |  |  |  | 20 | push @gangs, $gang; | 
| 515 |  |  |  |  |  |  | } | 
| 516 |  |  |  |  |  |  |  | 
| 517 |  |  |  |  |  |  | # sort by score and then alphabetically by class labels | 
| 518 |  |  |  |  |  |  | @gangs = sort{ | 
| 519 | 3 |  |  |  |  | 17 | bigcmp($b->{score}, $a->{score}) || | 
| 520 | 0 |  |  |  |  | 0 | (join '', sort keys %{ $b->{class} }) | 
| 521 |  |  |  |  |  |  | cmp | 
| 522 | 11 | 50 |  |  |  | 29 | (join '', sort keys %{ $a->{class} })} @gangs; | 
|  | 0 |  |  |  |  | 0 |  | 
| 523 | 3 |  |  |  |  | 9 | $self->{_gang_effects} = \@gangs; | 
| 524 | 3 |  |  |  |  | 6 | return; | 
| 525 |  |  |  |  |  |  | } | 
| 526 |  |  |  |  |  |  |  | 
| 527 |  |  |  |  |  |  | # Unpack and return the supracontext features. | 
| 528 |  |  |  |  |  |  | # Blank entries mean the variable may be anything, e.g. | 
| 529 |  |  |  |  |  |  | # ('a' 'b' '') means a supracontext containing items | 
| 530 |  |  |  |  |  |  | # wich have ('a' 'b' whatever) as variable values. | 
| 531 |  |  |  |  |  |  | sub _unpack_supracontext { | 
| 532 | 10 |  |  | 10 |  | 22 | my ($self, $context) = @_; | 
| 533 | 10 |  |  |  |  | 36 | my @context_list = unpack "S!4", $context; | 
| 534 | 10 |  |  |  |  | 16 | my @alist = @{$self->{active_feats}}; | 
|  | 10 |  |  |  |  | 23 |  | 
| 535 | 10 |  |  |  |  | 13 | my (@features) = @{ $self->test_item->features }; | 
|  | 10 |  |  |  |  | 183 |  | 
| 536 | 10 |  |  |  |  | 146 | my $exclude_nulls = $self->exclude_nulls; | 
| 537 | 10 |  |  |  |  | 44 | my $j = 1; | 
| 538 | 10 |  |  |  |  | 21 | foreach my $a (reverse @alist) { | 
| 539 | 40 |  |  |  |  | 51 | my $partial_context = pop @context_list; | 
| 540 | 40 |  |  |  |  | 868 | for ( ; $a ; --$a ) { | 
| 541 | 46 | 50 |  |  |  | 64 | if($exclude_nulls){ | 
| 542 | 46 |  |  |  |  | 83 | ++$j while !defined $features[ -$j ]; | 
| 543 |  |  |  |  |  |  | } | 
| 544 | 46 | 100 |  |  |  | 79 | $features[ -$j ] = '' if $partial_context & 1; | 
| 545 | 46 |  |  |  |  | 71 | $partial_context >>= 1; | 
| 546 | 46 |  |  |  |  | 73 | ++$j; | 
| 547 |  |  |  |  |  |  | } | 
| 548 |  |  |  |  |  |  | } | 
| 549 | 10 |  |  |  |  | 39 | return @features; | 
| 550 |  |  |  |  |  |  | } | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  | # mostly by Ovid: | 
| 553 |  |  |  |  |  |  | # http://use.perl.org/use.perl.org/_Ovid/journal/36762.html | 
| 554 |  |  |  |  |  |  | # Return table rows with a nice header and column separators | 
| 555 |  |  |  |  |  |  | sub _make_table { | 
| 556 | 7 |  |  | 7 |  | 12 | my ( $headers, $rows ) = @_; | 
| 557 |  |  |  |  |  |  |  | 
| 558 | 7 |  |  |  |  | 17 | my @rule      = qw(- +); | 
| 559 | 7 |  |  |  |  | 9 | my @headers   = \'| '; | 
| 560 | 7 |  |  |  |  | 15 | push @headers => map { $_ => \' | ' } @$headers; | 
|  | 20 |  |  |  |  | 36 |  | 
| 561 | 7 |  |  |  |  | 11 | pop  @headers; | 
| 562 | 7 |  |  |  |  | 9 | push @headers => \' |'; | 
| 563 |  |  |  |  |  |  |  | 
| 564 | 7 | 50 | 33 |  |  | 36 | unless ('ARRAY' eq ref $rows | 
|  |  |  | 33 |  |  |  |  | 
| 565 |  |  |  |  |  |  | && 'ARRAY' eq ref $rows->[0] | 
| 566 | 7 |  |  |  |  | 17 | && @$headers == @{ $rows->[0] }) { | 
| 567 | 0 |  |  |  |  | 0 | croak( | 
| 568 |  |  |  |  |  |  | "make_table() rows must be an AoA with rows being same size as headers" | 
| 569 |  |  |  |  |  |  | ); | 
| 570 |  |  |  |  |  |  | } | 
| 571 | 7 |  |  |  |  | 23 | my $table = Text::Table->new(@headers); | 
| 572 | 7 |  |  |  |  | 11763 | $table->rule(@rule); | 
| 573 | 7 |  |  |  |  | 14279 | $table->body_rule(@rule); | 
| 574 | 7 |  |  |  |  | 1124 | $table->load(@$rows); | 
| 575 |  |  |  |  |  |  |  | 
| 576 |  |  |  |  |  |  | return $table->rule(@rule), | 
| 577 |  |  |  |  |  |  | $table->title, | 
| 578 |  |  |  |  |  |  | $table->rule(@rule), | 
| 579 | 7 |  |  |  |  | 1907 | map({ $table->body($_) } 0 .. @$rows), | 
|  | 37 |  |  |  |  | 53114 |  | 
| 580 |  |  |  |  |  |  | $table->rule(@rule); | 
| 581 |  |  |  |  |  |  | } | 
| 582 |  |  |  |  |  |  |  | 
| 583 |  |  |  |  |  |  | 1; | 
| 584 |  |  |  |  |  |  |  | 
| 585 |  |  |  |  |  |  | __END__ |