line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# encapsulate information about a single classification result |
2
|
|
|
|
|
|
|
package Algorithm::AM::Result; |
3
|
10
|
|
|
10
|
|
30
|
use strict; |
|
10
|
|
|
|
|
10
|
|
|
10
|
|
|
|
|
236
|
|
4
|
10
|
|
|
10
|
|
30
|
use warnings; |
|
10
|
|
|
|
|
10
|
|
|
10
|
|
|
|
|
287
|
|
5
|
|
|
|
|
|
|
our $VERSION = '3.10'; |
6
|
|
|
|
|
|
|
# ABSTRACT: Store results of an AM classification |
7
|
10
|
|
|
10
|
|
5464
|
use Text::Table; |
|
10
|
|
|
|
|
111345
|
|
|
10
|
|
|
|
|
995
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
#pod =head2 SYNOPSIS |
10
|
|
|
|
|
|
|
#pod |
11
|
|
|
|
|
|
|
#pod use Algorithm::AM; |
12
|
|
|
|
|
|
|
#pod |
13
|
|
|
|
|
|
|
#pod my $am = Algorithm::AM->new('finnverb', -commas => 'no'); |
14
|
|
|
|
|
|
|
#pod my ($result) = $am->classify; |
15
|
|
|
|
|
|
|
#pod print @{ $result->winners }; |
16
|
|
|
|
|
|
|
#pod print $result->statistical_summary; |
17
|
|
|
|
|
|
|
#pod |
18
|
|
|
|
|
|
|
#pod =head2 DESCRIPTION |
19
|
|
|
|
|
|
|
#pod |
20
|
|
|
|
|
|
|
#pod This package encapsulates all of the classification information |
21
|
|
|
|
|
|
|
#pod generated by L, including the assigned class, |
22
|
|
|
|
|
|
|
#pod score to each class, gang effects, analogical sets, |
23
|
|
|
|
|
|
|
#pod and timing information. It also provides several methods for |
24
|
|
|
|
|
|
|
#pod generating printable reports with this information. |
25
|
|
|
|
|
|
|
#pod |
26
|
|
|
|
|
|
|
#pod Note that the words 'score' and 'point' are used here to represent |
27
|
|
|
|
|
|
|
#pod whatever count is assigned by analogical modeling during |
28
|
|
|
|
|
|
|
#pod classification. This can be either pointers or occurrences. For an |
29
|
|
|
|
|
|
|
#pod explanation of this, see L. |
30
|
|
|
|
|
|
|
#pod |
31
|
|
|
|
|
|
|
#pod All of the scores returned by the methods here are scalars with |
32
|
|
|
|
|
|
|
#pod special PV and NV values. You should excercise caution when doing |
33
|
|
|
|
|
|
|
#pod calculations with them. See L for more |
34
|
|
|
|
|
|
|
#pod information. |
35
|
|
|
|
|
|
|
#pod |
36
|
|
|
|
|
|
|
#pod =cut |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
## TODO: variables consider exporting someday |
39
|
|
|
|
|
|
|
## @itemcontextchain |
40
|
|
|
|
|
|
|
## %itemcontextchainhead |
41
|
|
|
|
|
|
|
## %context_to_class |
42
|
|
|
|
|
|
|
## %contextsize |
43
|
|
|
|
|
|
|
use Class::Tiny qw( |
44
|
|
|
|
|
|
|
exclude_nulls |
45
|
|
|
|
|
|
|
given_excluded |
46
|
|
|
|
|
|
|
cardinality |
47
|
|
|
|
|
|
|
test_in_train |
48
|
|
|
|
|
|
|
test_item |
49
|
|
|
|
|
|
|
count_method |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
start_time |
52
|
|
|
|
|
|
|
end_time |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
training_set |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
scores |
57
|
|
|
|
|
|
|
high_score |
58
|
|
|
|
|
|
|
total_points |
59
|
|
|
|
|
|
|
winners |
60
|
|
|
|
|
|
|
is_tie |
61
|
|
|
|
|
|
|
result |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
scores_normalized |
64
|
|
|
|
|
|
|
), { |
65
|
|
|
|
|
|
|
'scores_normalized' => sub { |
66
|
1
|
|
|
|
|
633
|
my ($self) = @_; |
67
|
1
|
|
|
|
|
13
|
my $total_points = $self->total_points; |
68
|
1
|
|
|
|
|
16
|
my $scores = $self->scores; |
69
|
1
|
|
|
|
|
4
|
my $normalized = {}; |
70
|
1
|
|
|
|
|
3
|
for my $class (keys %$scores){ |
71
|
2
|
|
|
|
|
4
|
$normalized->{$class} = $scores->{$class} / $total_points |
72
|
|
|
|
|
|
|
} |
73
|
1
|
|
|
|
|
14
|
return $normalized; |
74
|
|
|
|
|
|
|
}, |
75
|
10
|
|
|
10
|
|
49
|
}; |
|
10
|
|
|
|
|
10
|
|
|
10
|
|
|
|
|
103
|
|
76
|
10
|
|
|
10
|
|
12896
|
use Carp 'croak'; |
|
10
|
|
|
|
|
12
|
|
|
10
|
|
|
|
|
337
|
|
77
|
10
|
|
|
10
|
|
2965
|
use Algorithm::AM::BigInt 'bigcmp'; |
|
10
|
|
|
|
|
13
|
|
|
10
|
|
|
|
|
15573
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# For printing percentages in reports |
80
|
|
|
|
|
|
|
my $percentage_format = '%.3f'; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
#pod =head1 REPORT METHODS |
83
|
|
|
|
|
|
|
#pod |
84
|
|
|
|
|
|
|
#pod The methods below return human eye-friendly reports about the |
85
|
|
|
|
|
|
|
#pod classification. The return value is a reference, so it must be |
86
|
|
|
|
|
|
|
#pod dereferenced for printing like so: |
87
|
|
|
|
|
|
|
#pod |
88
|
|
|
|
|
|
|
#pod print ${ $result->statistical_summary }; |
89
|
|
|
|
|
|
|
#pod |
90
|
|
|
|
|
|
|
#pod =head2 C |
91
|
|
|
|
|
|
|
#pod |
92
|
|
|
|
|
|
|
#pod Returns a scalar (string) ref containing information about the |
93
|
|
|
|
|
|
|
#pod configuration at the time of classification. Information from the |
94
|
|
|
|
|
|
|
#pod following accessors is included: |
95
|
|
|
|
|
|
|
#pod |
96
|
|
|
|
|
|
|
#pod exclude_nulls |
97
|
|
|
|
|
|
|
#pod given_excluded |
98
|
|
|
|
|
|
|
#pod cardinality |
99
|
|
|
|
|
|
|
#pod test_in_train |
100
|
|
|
|
|
|
|
#pod test_item |
101
|
|
|
|
|
|
|
#pod count_method |
102
|
|
|
|
|
|
|
#pod |
103
|
|
|
|
|
|
|
#pod =cut |
104
|
|
|
|
|
|
|
sub config_info { |
105
|
2
|
|
|
2
|
1
|
831
|
my ($self) = @_; |
106
|
2
|
|
|
|
|
4
|
my @headers = ('Option', 'Setting'); |
107
|
|
|
|
|
|
|
my @rows = ( |
108
|
2
|
100
|
|
|
|
2
|
[ "Given context", (join ' ', @{$self->test_item->features}) . |
|
2
|
100
|
|
|
|
42
|
|
|
|
100
|
|
|
|
|
|
109
|
|
|
|
|
|
|
', ' . $self->test_item->comment], |
110
|
|
|
|
|
|
|
[ "Nulls", ($self->exclude_nulls ? 'exclude' : 'include')], |
111
|
|
|
|
|
|
|
[ "Gang", $self->count_method], |
112
|
|
|
|
|
|
|
[ "Test item in training set", ($self->test_in_train ? 'yes' : 'no')], |
113
|
|
|
|
|
|
|
[ "Test item excluded", ($self->given_excluded ? 'yes' : 'no')], |
114
|
|
|
|
|
|
|
[ "Size of training set", $self->training_set->size ], |
115
|
|
|
|
|
|
|
[ "Number of active features", $self->cardinality ], |
116
|
|
|
|
|
|
|
); |
117
|
2
|
|
|
|
|
17
|
my @table = _make_table(\@headers, \@rows); |
118
|
2
|
|
|
|
|
289
|
my $info = join '', @table; |
119
|
2
|
|
|
|
|
8
|
return \$info; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# input several variables from AM's guts (sum, pointers, |
123
|
|
|
|
|
|
|
# itemcontextchainhead and itemcontextchain). Calculate the |
124
|
|
|
|
|
|
|
# prediction statistics, and |
125
|
|
|
|
|
|
|
# store information needed for computing analogical sets. |
126
|
|
|
|
|
|
|
# Set result to tie/correct/incorrect and also is_tie if |
127
|
|
|
|
|
|
|
# expected class is provided, and high_score, scores, winners, and |
128
|
|
|
|
|
|
|
# total_points. |
129
|
|
|
|
|
|
|
sub _process_stats { |
130
|
193
|
|
|
193
|
|
228
|
my ($self, $sum, $pointers, |
131
|
|
|
|
|
|
|
$itemcontextchainhead, $itemcontextchain, $context_to_class, |
132
|
|
|
|
|
|
|
$gang, $active_feats, $contextsize) = @_; |
133
|
193
|
|
|
|
|
215
|
my $total_points = $pointers->{grandtotal}; |
134
|
193
|
|
|
|
|
166
|
my $max = ''; |
135
|
193
|
|
|
|
|
141
|
my @winners; |
136
|
|
|
|
|
|
|
my %scores; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# iterate all possible classes and store the ones that have a |
139
|
|
|
|
|
|
|
# non-zero score. Store the high-scorers, as well. |
140
|
|
|
|
|
|
|
# 1) find which one(s) has the highest score (the prediction) and |
141
|
|
|
|
|
|
|
# 2) print out the ones with scores (probability of prediction) |
142
|
193
|
|
|
|
|
2522
|
for my $class_index (1 .. $self->training_set->num_classes) { |
143
|
559
|
|
|
|
|
353
|
my $class_score; |
144
|
|
|
|
|
|
|
# skip classes with no score |
145
|
559
|
100
|
|
|
|
970
|
next unless $class_score = $sum->[$class_index]; |
146
|
|
|
|
|
|
|
|
147
|
355
|
|
|
|
|
4426
|
my $class = $self->training_set->_class_for_index($class_index); |
148
|
355
|
|
|
|
|
499
|
$scores{$class} = $class_score; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# check if the class has the highest score, or ties for it |
151
|
355
|
|
|
|
|
264
|
do { |
152
|
355
|
|
|
|
|
575
|
my $cmp = bigcmp($class_score, $max); |
153
|
355
|
100
|
|
|
|
560
|
if ($cmp > 0){ |
|
|
100
|
|
|
|
|
|
154
|
274
|
|
|
|
|
370
|
@winners = ($class); |
155
|
274
|
|
|
|
|
337
|
$max = $class_score; |
156
|
|
|
|
|
|
|
}elsif($cmp == 0){ |
157
|
2
|
|
|
|
|
3
|
push @winners, $class; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
}; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# set result to tie/correct/incorrect after comparing |
163
|
|
|
|
|
|
|
# expected/actual class labels. Only do this if the expected |
164
|
|
|
|
|
|
|
# class label is known. |
165
|
193
|
100
|
|
|
|
2593
|
if(my $expected = $self->test_item->class){ |
166
|
191
|
100
|
100
|
|
|
559
|
if(exists $scores{$expected} && |
167
|
|
|
|
|
|
|
bigcmp($scores{$expected}, $max) == 0){ |
168
|
178
|
100
|
|
|
|
227
|
if(@winners > 1){ |
169
|
2
|
|
|
|
|
26
|
$self->result('tie'); |
170
|
|
|
|
|
|
|
}else{ |
171
|
176
|
|
|
|
|
2272
|
$self->result('correct'); |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
}else{ |
174
|
13
|
|
|
|
|
173
|
$self->result('incorrect'); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
} |
177
|
193
|
100
|
|
|
|
864
|
if(@winners > 1){ |
178
|
2
|
|
|
|
|
26
|
$self->is_tie(1); |
179
|
|
|
|
|
|
|
} |
180
|
193
|
|
|
|
|
2429
|
$self->high_score($max); |
181
|
193
|
|
|
|
|
2822
|
$self->scores(\%scores); |
182
|
193
|
|
|
|
|
2802
|
$self->winners(\@winners); |
183
|
193
|
|
|
|
|
2834
|
$self->total_points($total_points); |
184
|
193
|
|
|
|
|
664
|
$self->{pointers} = $pointers; |
185
|
193
|
|
|
|
|
363
|
$self->{itemcontextchainhead} = $itemcontextchainhead; |
186
|
193
|
|
|
|
|
169
|
$self->{itemcontextchain} = $itemcontextchain; |
187
|
193
|
|
|
|
|
159
|
$self->{context_to_class} = $context_to_class; |
188
|
193
|
|
|
|
|
186
|
$self->{gang} = $gang; |
189
|
193
|
|
|
|
|
167
|
$self->{active_feats} = $active_feats; |
190
|
193
|
|
|
|
|
143
|
$self->{contextsize} = $contextsize; |
191
|
193
|
|
|
|
|
336
|
return; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
#pod =head2 C |
195
|
|
|
|
|
|
|
#pod |
196
|
|
|
|
|
|
|
#pod Returns a scalar reference (string) containing a statistical summary |
197
|
|
|
|
|
|
|
#pod of the classification results. The summary includes all possible |
198
|
|
|
|
|
|
|
#pod predicted classes with their scores and percentage scores and the |
199
|
|
|
|
|
|
|
#pod total score for all classes. Whether the predicted class |
200
|
|
|
|
|
|
|
#pod is correct/incorrect/a tie of some sort is also included, if the |
201
|
|
|
|
|
|
|
#pod test item had a known class. |
202
|
|
|
|
|
|
|
#pod |
203
|
|
|
|
|
|
|
#pod =cut |
204
|
|
|
|
|
|
|
sub statistical_summary { |
205
|
4
|
|
|
4
|
1
|
825
|
my ($self) = @_; |
206
|
4
|
|
|
|
|
3
|
my %scores = %{$self->scores}; |
|
4
|
|
|
|
|
66
|
|
207
|
4
|
|
|
|
|
77
|
my $total_points = $self->total_points; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# Make a table with information about predictions for different |
210
|
|
|
|
|
|
|
# classes. Each row contains a class name, the score, |
211
|
|
|
|
|
|
|
# and the percentage predicted. |
212
|
4
|
|
|
|
|
14
|
my @rows; |
213
|
4
|
|
|
|
|
12
|
for my $class (sort keys %scores){ |
214
|
|
|
|
|
|
|
push @rows, [ $class, $scores{$class}, |
215
|
|
|
|
|
|
|
sprintf($percentage_format, |
216
|
8
|
|
|
|
|
53
|
100 * $scores{$class} / $total_points) ]; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
# add a Total row |
219
|
4
|
|
|
|
|
6
|
push @rows, [ 'Total', $total_points ]; |
220
|
|
|
|
|
|
|
|
221
|
4
|
|
|
|
|
10
|
my @table = _make_table(['Class', 'Score', 'Percentage'], |
222
|
|
|
|
|
|
|
\@rows); |
223
|
|
|
|
|
|
|
# copy the rule from the first row into the second to last row |
224
|
|
|
|
|
|
|
# to separate the Total row |
225
|
4
|
|
|
|
|
485
|
splice(@table, $#table - 1, 0, $table[0]); |
226
|
|
|
|
|
|
|
|
227
|
4
|
|
|
|
|
5
|
my $info = "Statistical Summary\n"; |
228
|
4
|
|
|
|
|
9
|
$info .= join '', @table; |
229
|
|
|
|
|
|
|
# the predicted class (the one with the highest score) |
230
|
|
|
|
|
|
|
# and the result (correct/incorrect/tie). |
231
|
4
|
100
|
|
|
|
84
|
if ( defined (my $expected = $self->test_item->class) ) { |
232
|
3
|
|
|
|
|
7
|
$info .= "Expected class: $expected\n"; |
233
|
3
|
|
|
|
|
18
|
my $result = $self->result; |
234
|
3
|
100
|
|
|
|
13
|
if ( $result eq 'correct') { |
|
|
100
|
|
|
|
|
|
235
|
1
|
|
|
|
|
1
|
$info .= "Correct class predicted.\n"; |
236
|
|
|
|
|
|
|
}elsif($result eq 'tie'){ |
237
|
1
|
|
|
|
|
2
|
$info .= "Prediction is a tie.\n"; |
238
|
|
|
|
|
|
|
}else { |
239
|
1
|
|
|
|
|
2
|
$info .= "Incorrect class predicted.\n"; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
}else{ |
242
|
1
|
|
|
|
|
3
|
$info .= "Expected class unknown\n"; |
243
|
|
|
|
|
|
|
} |
244
|
4
|
|
|
|
|
18
|
return \$info; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
#TODO: the keys for this set don't seem to make any sense. |
248
|
|
|
|
|
|
|
sub analogical_set { |
249
|
3
|
|
|
3
|
1
|
2253
|
my ($self) = @_; |
250
|
3
|
50
|
|
|
|
10
|
if(!exists $self->{_analogical_set}){ |
251
|
3
|
|
|
|
|
7
|
$self->_calculate_analogical_set; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
# make a safe copy |
254
|
3
|
|
|
|
|
4
|
my %set = %{$self->{_analogical_set}}; |
|
3
|
|
|
|
|
9
|
|
255
|
3
|
|
|
|
|
6
|
return \%set; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
#pod =head2 C |
259
|
|
|
|
|
|
|
#pod |
260
|
|
|
|
|
|
|
#pod Returns a scalar reference (string) containing the analogical set, |
261
|
|
|
|
|
|
|
#pod meaning all items that contributed to the predicted class, along |
262
|
|
|
|
|
|
|
#pod with the amount contributed by each item (score and |
263
|
|
|
|
|
|
|
#pod percentage overall). Items are ordered by appearance in the data |
264
|
|
|
|
|
|
|
#pod set. |
265
|
|
|
|
|
|
|
#pod |
266
|
|
|
|
|
|
|
#pod =cut |
267
|
|
|
|
|
|
|
sub analogical_set_summary { |
268
|
1
|
|
|
1
|
1
|
510
|
my ($self) = @_; |
269
|
1
|
|
|
|
|
3
|
my $set = $self->analogical_set; |
270
|
1
|
|
|
|
|
22
|
my $total_points = $self->total_points; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# Make a table for the analogical set. Each row contains an |
273
|
|
|
|
|
|
|
# item with its class, comment, score, and the percentage |
274
|
|
|
|
|
|
|
# of total score contributed. |
275
|
1
|
|
|
|
|
4
|
my @rows; |
276
|
1
|
|
|
|
|
5
|
foreach my $id (sort keys %$set){ |
277
|
4
|
|
|
|
|
3
|
my $entry = $set->{$id}; |
278
|
4
|
|
|
|
|
4
|
my $score = $entry->{score}; |
279
|
|
|
|
|
|
|
push @rows, [ |
280
|
|
|
|
|
|
|
$entry->{item}->class, |
281
|
|
|
|
|
|
|
$entry->{item}->comment, |
282
|
4
|
|
|
|
|
8
|
$score, |
283
|
|
|
|
|
|
|
sprintf($percentage_format, 100 * $score / $total_points) |
284
|
|
|
|
|
|
|
]; |
285
|
|
|
|
|
|
|
} |
286
|
1
|
|
|
|
|
4
|
my @table = _make_table( |
287
|
|
|
|
|
|
|
['Class', 'Item', 'Score', 'Percentage'], \@rows); |
288
|
1
|
|
|
|
|
143
|
my $info = "Analogical Set\nTotal Frequency = $total_points\n"; |
289
|
1
|
|
|
|
|
3
|
$info .= join '', @table; |
290
|
1
|
|
|
|
|
5
|
return \$info; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# calculate and store analogical effects in $self->{_analogical_set} |
294
|
|
|
|
|
|
|
sub _calculate_analogical_set { |
295
|
3
|
|
|
3
|
|
5
|
my ($self) = @_; |
296
|
3
|
|
|
|
|
56
|
my $train = $self->training_set; |
297
|
3
|
|
|
|
|
12
|
my %set; |
298
|
3
|
|
|
|
|
5
|
foreach my $context ( keys %{$self->{pointers}} ) { |
|
3
|
|
|
|
|
11
|
|
299
|
|
|
|
|
|
|
next unless |
300
|
11
|
100
|
|
|
|
18
|
exists $self->{itemcontextchainhead}->{$context}; |
301
|
8
|
|
|
|
|
21
|
for ( |
302
|
|
|
|
|
|
|
my $index = $self->{itemcontextchainhead}->{$context}; |
303
|
|
|
|
|
|
|
defined $index; |
304
|
|
|
|
|
|
|
$index = $self->{itemcontextchain}->[$index] |
305
|
|
|
|
|
|
|
) |
306
|
|
|
|
|
|
|
{ |
307
|
10
|
|
|
|
|
19
|
my $item = $train->get_item($index); |
308
|
|
|
|
|
|
|
$set{$item->id} = { |
309
|
|
|
|
|
|
|
item => $item, |
310
|
10
|
|
|
|
|
32
|
score => $self->{pointers}->{$context} |
311
|
|
|
|
|
|
|
}; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
} |
314
|
3
|
|
|
|
|
6
|
$self->{_analogical_set} = \%set; |
315
|
3
|
|
|
|
|
5
|
return; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub gang_effects { |
319
|
4
|
|
|
4
|
1
|
306
|
my ($self) = @_; |
320
|
4
|
100
|
|
|
|
10
|
if(!$self->{_gang_effects}){ |
321
|
3
|
|
|
|
|
6
|
$self->_calculate_gangs; |
322
|
|
|
|
|
|
|
} |
323
|
4
|
|
|
|
|
8
|
return $self->{_gang_effects}; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
#pod =head2 C |
327
|
|
|
|
|
|
|
#pod |
328
|
|
|
|
|
|
|
#pod Returns a scalar reference (string) containing the gang effects on the |
329
|
|
|
|
|
|
|
#pod final class prediction. |
330
|
|
|
|
|
|
|
#pod |
331
|
|
|
|
|
|
|
#pod A single boolean parameter can be provided to turn on list printing, |
332
|
|
|
|
|
|
|
#pod meaning gang items items are printed. This is false (off) by default. |
333
|
|
|
|
|
|
|
#pod |
334
|
|
|
|
|
|
|
#pod =cut |
335
|
|
|
|
|
|
|
sub gang_summary { |
336
|
3
|
|
|
3
|
1
|
855
|
my ($self, $print_list) = @_; |
337
|
3
|
|
|
|
|
52
|
my $test_item = $self->test_item; |
338
|
|
|
|
|
|
|
|
339
|
3
|
|
|
|
|
13
|
my $gangs = $self->gang_effects; |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# Make a table for the gangs with these rows: |
342
|
|
|
|
|
|
|
# Percentage |
343
|
|
|
|
|
|
|
# Score |
344
|
|
|
|
|
|
|
# Num |
345
|
|
|
|
|
|
|
# Class |
346
|
|
|
|
|
|
|
# Features |
347
|
|
|
|
|
|
|
# item comment |
348
|
3
|
|
|
|
|
1
|
my @rows; |
349
|
|
|
|
|
|
|
# first row is a header with test item for easy reference |
350
|
|
|
|
|
|
|
push @rows, [ |
351
|
|
|
|
|
|
|
'Context', |
352
|
|
|
|
|
|
|
undef, |
353
|
|
|
|
|
|
|
undef, |
354
|
|
|
|
|
|
|
undef, |
355
|
3
|
|
|
|
|
4
|
@{$test_item->features}, |
|
3
|
|
|
|
|
6
|
|
356
|
|
|
|
|
|
|
]; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# store the number of rows added for each gang |
359
|
|
|
|
|
|
|
# will help with printing later |
360
|
3
|
|
|
|
|
4
|
my @gang_rows; |
361
|
3
|
|
|
|
|
2
|
my $current_row = -1; |
362
|
|
|
|
|
|
|
# add information for each gang; sort by order of highest to |
363
|
|
|
|
|
|
|
# lowest effect |
364
|
3
|
|
|
|
|
4
|
foreach my $gang (@$gangs){ |
365
|
10
|
|
|
|
|
5
|
$current_row++; |
366
|
10
|
|
|
|
|
10
|
$gang_rows[$current_row]++; |
367
|
10
|
|
|
|
|
8
|
my $features = $gang->{features}; |
368
|
|
|
|
|
|
|
# add the gang supracontext, effect and score |
369
|
|
|
|
|
|
|
push @rows, [ |
370
|
|
|
|
|
|
|
sprintf($percentage_format, 100 * $gang->{effect}), |
371
|
|
|
|
|
|
|
$gang->{score}, |
372
|
|
|
|
|
|
|
undef, |
373
|
|
|
|
|
|
|
undef, |
374
|
|
|
|
|
|
|
# print undefined feature slots as asterisks |
375
|
10
|
100
|
|
|
|
48
|
map {length($_) ? $_ : '*'} @$features |
|
58
|
|
|
|
|
77
|
|
376
|
|
|
|
|
|
|
]; |
377
|
|
|
|
|
|
|
# add each class in the gang, along with the total number |
378
|
|
|
|
|
|
|
# and effect of the gang items supporting it |
379
|
10
|
|
|
|
|
9
|
for my $class (sort keys %{ $gang->{class} }){ |
|
10
|
|
|
|
|
23
|
|
380
|
12
|
|
|
|
|
8
|
$gang_rows[$current_row]++; |
381
|
|
|
|
|
|
|
push @rows, [ |
382
|
|
|
|
|
|
|
sprintf($percentage_format, |
383
|
|
|
|
|
|
|
100 * $gang->{class}->{$class}->{effect}), |
384
|
|
|
|
|
|
|
$gang->{class}->{$class}->{score}, |
385
|
12
|
|
|
|
|
36
|
scalar @{ $gang->{data}->{$class} }, |
|
12
|
|
|
|
|
21
|
|
386
|
|
|
|
|
|
|
$class, |
387
|
|
|
|
|
|
|
undef |
388
|
|
|
|
|
|
|
]; |
389
|
12
|
100
|
|
|
|
24
|
if($print_list){ |
390
|
|
|
|
|
|
|
# add the list of items in the given context |
391
|
4
|
|
|
|
|
3
|
for my $item (@{ $gang->{data}->{$class} }){ |
|
4
|
|
|
|
|
5
|
|
392
|
4
|
|
|
|
|
4
|
$gang_rows[$current_row]++; |
393
|
|
|
|
|
|
|
push @rows, [ |
394
|
|
|
|
|
|
|
undef, |
395
|
|
|
|
|
|
|
undef, |
396
|
|
|
|
|
|
|
undef, |
397
|
|
|
|
|
|
|
undef, |
398
|
4
|
|
|
|
|
3
|
@{ $item->features }, |
|
4
|
|
|
|
|
7
|
|
399
|
|
|
|
|
|
|
$item->comment, |
400
|
|
|
|
|
|
|
]; |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# construct the table from the rows |
407
|
|
|
|
|
|
|
my @headers = ( |
408
|
|
|
|
|
|
|
\'| ', |
409
|
|
|
|
|
|
|
'Percentage' => \' | ', |
410
|
|
|
|
|
|
|
'Score' => \' | ', |
411
|
|
|
|
|
|
|
'Num Items' => \' | ', |
412
|
|
|
|
|
|
|
'Class' => \' | ', |
413
|
3
|
|
|
|
|
3
|
('' => \' ') x @{$test_item->features} |
|
3
|
|
|
|
|
6
|
|
414
|
|
|
|
|
|
|
); |
415
|
3
|
|
|
|
|
4
|
pop @headers; |
416
|
3
|
100
|
|
|
|
6
|
if($print_list){ |
417
|
1
|
|
|
|
|
6
|
push @headers, \' | ', 'Item Comment'; |
418
|
|
|
|
|
|
|
} |
419
|
3
|
|
|
|
|
3
|
push @headers, \' |'; |
420
|
3
|
|
|
|
|
3
|
my @rule = qw(- +); |
421
|
3
|
|
|
|
|
10
|
my $table = Text::Table->new(@headers); |
422
|
3
|
|
|
|
|
8390
|
$table->load(@rows); |
423
|
|
|
|
|
|
|
# main header |
424
|
3
|
|
|
|
|
1419
|
$current_row = 0; |
425
|
3
|
|
|
|
|
7
|
my $return = $table->rule(@rule) . |
426
|
|
|
|
|
|
|
$table->title . |
427
|
|
|
|
|
|
|
$table->body($current_row) . |
428
|
|
|
|
|
|
|
$table->rule(@rule); |
429
|
3
|
|
|
|
|
79252
|
$current_row++; |
430
|
|
|
|
|
|
|
# add info with a header for each gang |
431
|
3
|
|
|
|
|
5
|
for my $num (@gang_rows){ |
432
|
|
|
|
|
|
|
# a row of '*' separates each gang |
433
|
10
|
|
|
|
|
18
|
$return .= $table->rule('*','*') . |
434
|
|
|
|
|
|
|
$table->body($current_row) . |
435
|
|
|
|
|
|
|
$table->rule(@rule); |
436
|
10
|
|
|
|
|
2797
|
$current_row++; |
437
|
10
|
|
|
|
|
18
|
for(1 .. $num - 1){ |
438
|
16
|
|
|
|
|
27
|
$return .= $table->body($current_row); |
439
|
16
|
|
|
|
|
575
|
$current_row++; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
} |
442
|
3
|
|
|
|
|
7
|
$return .= $table->rule(@rule); |
443
|
3
|
|
|
|
|
430
|
return \$return; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub _calculate_gangs { |
447
|
3
|
|
|
3
|
|
3
|
my ($self) = @_; |
448
|
3
|
|
|
|
|
47
|
my $train = $self->training_set; |
449
|
3
|
|
|
|
|
44
|
my $total_points = $self->total_points; |
450
|
3
|
|
|
|
|
12
|
my $raw_gang = $self->{gang}; |
451
|
3
|
|
|
|
|
3
|
my @gangs; |
452
|
|
|
|
|
|
|
|
453
|
3
|
|
|
|
|
4
|
foreach my $context (keys %{$raw_gang}) |
|
3
|
|
|
|
|
9
|
|
454
|
|
|
|
|
|
|
{ |
455
|
10
|
|
|
|
|
9
|
my $gang = {}; |
456
|
10
|
|
|
|
|
15
|
my @features = $self->_unpack_supracontext($context); |
457
|
|
|
|
|
|
|
# for now, store gangs by the supracontext printout |
458
|
10
|
100
|
|
|
|
18
|
my $key = join ' ', map {length($_) ? $_ : '-'} @features; |
|
58
|
|
|
|
|
83
|
|
459
|
10
|
|
|
|
|
19
|
$gang->{score} = $raw_gang->{$context}; |
460
|
10
|
|
|
|
|
14
|
$gang->{effect} = $raw_gang->{$context} / $total_points; |
461
|
10
|
|
|
|
|
12
|
$gang->{features} = \@features; |
462
|
|
|
|
|
|
|
|
463
|
10
|
|
|
|
|
9
|
my $p = $self->{pointers}->{$context}; |
464
|
|
|
|
|
|
|
# if the supracontext is homogenous |
465
|
10
|
100
|
|
|
|
14
|
if ( my $class_index = $self->{context_to_class}->{$context} ) { |
466
|
|
|
|
|
|
|
# store a 'homogenous' key that indicates this, besides |
467
|
|
|
|
|
|
|
# indicating the unanimous class prediction. |
468
|
8
|
|
|
|
|
60
|
my $class = $train->_class_for_index($class_index); |
469
|
8
|
|
|
|
|
9
|
$gang->{homogenous} = $class; |
470
|
8
|
|
|
|
|
7
|
my @data; |
471
|
8
|
|
|
|
|
18
|
for ( |
472
|
|
|
|
|
|
|
my $index = $self->{itemcontextchainhead}->{$context}; |
473
|
|
|
|
|
|
|
defined $index; |
474
|
|
|
|
|
|
|
$index = $self->{itemcontextchain}->[$index] |
475
|
|
|
|
|
|
|
) |
476
|
|
|
|
|
|
|
{ |
477
|
9
|
|
|
|
|
14
|
push @data, $train->get_item($index); |
478
|
|
|
|
|
|
|
} |
479
|
8
|
|
|
|
|
15
|
$gang->{data}->{$class} = \@data; |
480
|
8
|
|
|
|
|
9
|
$gang->{size} = scalar @data; |
481
|
8
|
|
|
|
|
13
|
$gang->{class}->{$class}->{score} = $p; |
482
|
|
|
|
|
|
|
$gang->{class}->{$class}->{effect} = |
483
|
8
|
|
|
|
|
10
|
$gang->{effect}; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
# for heterogenous supracontexts we have to store data for |
486
|
|
|
|
|
|
|
# each class |
487
|
|
|
|
|
|
|
else { |
488
|
2
|
|
|
|
|
3
|
$gang->{homogenous} = 0; |
489
|
|
|
|
|
|
|
# first loop through the data and sort by class, also |
490
|
|
|
|
|
|
|
# finding the total gang size |
491
|
2
|
|
|
|
|
2
|
my $size = 0; |
492
|
2
|
|
|
|
|
2
|
my %data; |
493
|
2
|
|
|
|
|
7
|
for ( |
494
|
|
|
|
|
|
|
my $index = $self->{itemcontextchainhead}->{$context}; |
495
|
|
|
|
|
|
|
defined $index; |
496
|
|
|
|
|
|
|
$index = $self->{itemcontextchain}->[$index] |
497
|
|
|
|
|
|
|
) |
498
|
|
|
|
|
|
|
{ |
499
|
4
|
|
|
|
|
19
|
my $item = $train->get_item($index); |
500
|
4
|
|
|
|
|
3
|
push @{ $data{$item->class} }, $item; |
|
4
|
|
|
|
|
8
|
|
501
|
4
|
|
|
|
|
9
|
$size++; |
502
|
|
|
|
|
|
|
} |
503
|
2
|
|
|
|
|
4
|
$gang->{data} = \%data; |
504
|
2
|
|
|
|
|
2
|
$gang->{size} = $size; |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# then store aggregate statistics for each class |
507
|
2
|
|
|
|
|
7
|
for my $class (keys %data){ |
508
|
4
|
|
|
|
|
7
|
$gang->{class}->{$class}->{score} = $p; |
509
|
|
|
|
|
|
|
$gang->{class}->{$class}->{effect} = |
510
|
|
|
|
|
|
|
# score*num_data/total |
511
|
4
|
|
|
|
|
4
|
@{ $data{$class} } * $p / $total_points; |
|
4
|
|
|
|
|
9
|
|
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
} |
514
|
10
|
|
|
|
|
14
|
push @gangs, $gang; |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# sort by score and then alphabetically by class labels |
518
|
|
|
|
|
|
|
@gangs = sort{ |
519
|
3
|
|
|
|
|
10
|
bigcmp($b->{score}, $a->{score}) || |
520
|
0
|
|
|
|
|
0
|
(join '', sort keys %{ $b->{class} }) |
521
|
|
|
|
|
|
|
cmp |
522
|
11
|
50
|
|
|
|
18
|
(join '', sort keys %{ $a->{class} })} @gangs; |
|
0
|
|
|
|
|
0
|
|
523
|
3
|
|
|
|
|
5
|
$self->{_gang_effects} = \@gangs; |
524
|
3
|
|
|
|
|
5
|
return; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# Unpack and return the supracontext features. |
528
|
|
|
|
|
|
|
# Blank entries mean the variable may be anything, e.g. |
529
|
|
|
|
|
|
|
# ('a' 'b' '') means a supracontext containing items |
530
|
|
|
|
|
|
|
# wich have ('a' 'b' whatever) as variable values. |
531
|
|
|
|
|
|
|
sub _unpack_supracontext { |
532
|
10
|
|
|
10
|
|
10
|
my ($self, $context) = @_; |
533
|
10
|
|
|
|
|
28
|
my @context_list = unpack "S!4", $context; |
534
|
10
|
|
|
|
|
8
|
my @alist = @{$self->{active_feats}}; |
|
10
|
|
|
|
|
12
|
|
535
|
10
|
|
|
|
|
9
|
my (@features) = @{ $self->test_item->features }; |
|
10
|
|
|
|
|
149
|
|
536
|
10
|
|
|
|
|
132
|
my $exclude_nulls = $self->exclude_nulls; |
537
|
10
|
|
|
|
|
28
|
my $j = 1; |
538
|
10
|
|
|
|
|
12
|
foreach my $a (reverse @alist) { |
539
|
40
|
|
|
|
|
27
|
my $partial_context = pop @context_list; |
540
|
40
|
|
|
|
|
78
|
for ( ; $a ; --$a ) { |
541
|
46
|
50
|
|
|
|
47
|
if($exclude_nulls){ |
542
|
46
|
|
|
|
|
66
|
++$j while !defined $features[ -$j ]; |
543
|
|
|
|
|
|
|
} |
544
|
46
|
100
|
|
|
|
97
|
$features[ -$j ] = '' if $partial_context & 1; |
545
|
46
|
|
|
|
|
27
|
$partial_context >>= 1; |
546
|
46
|
|
|
|
|
64
|
++$j; |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
} |
549
|
10
|
|
|
|
|
29
|
return @features; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# mostly by Ovid: |
553
|
|
|
|
|
|
|
# http://use.perl.org/use.perl.org/_Ovid/journal/36762.html |
554
|
|
|
|
|
|
|
# Return table rows with a nice header and column separators |
555
|
|
|
|
|
|
|
sub _make_table { |
556
|
7
|
|
|
7
|
|
6
|
my ( $headers, $rows ) = @_; |
557
|
|
|
|
|
|
|
|
558
|
7
|
|
|
|
|
9
|
my @rule = qw(- +); |
559
|
7
|
|
|
|
|
7
|
my @headers = \'| '; |
560
|
7
|
|
|
|
|
8
|
push @headers => map { $_ => \' | ' } @$headers; |
|
20
|
|
|
|
|
23
|
|
561
|
7
|
|
|
|
|
7
|
pop @headers; |
562
|
7
|
|
|
|
|
8
|
push @headers => \' |'; |
563
|
|
|
|
|
|
|
|
564
|
7
|
50
|
33
|
|
|
34
|
unless ('ARRAY' eq ref $rows |
|
|
|
33
|
|
|
|
|
565
|
|
|
|
|
|
|
&& 'ARRAY' eq ref $rows->[0] |
566
|
7
|
|
|
|
|
37
|
&& @$headers == @{ $rows->[0] }) { |
567
|
0
|
|
|
|
|
0
|
croak( |
568
|
|
|
|
|
|
|
"make_table() rows must be an AoA with rows being same size as headers" |
569
|
|
|
|
|
|
|
); |
570
|
|
|
|
|
|
|
} |
571
|
7
|
|
|
|
|
20
|
my $table = Text::Table->new(@headers); |
572
|
7
|
|
|
|
|
7091
|
$table->rule(@rule); |
573
|
7
|
|
|
|
|
8491
|
$table->body_rule(@rule); |
574
|
7
|
|
|
|
|
663
|
$table->load(@$rows); |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
return $table->rule(@rule), |
577
|
|
|
|
|
|
|
$table->title, |
578
|
|
|
|
|
|
|
$table->rule(@rule), |
579
|
7
|
|
|
|
|
1018
|
map({ $table->body($_) } 0 .. @$rows), |
|
37
|
|
|
|
|
30336
|
|
580
|
|
|
|
|
|
|
$table->rule(@rule); |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
1; |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
__END__ |