| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Lingua::Word::Parser; |
|
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:GENE'; |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# ABSTRACT: Parse a word into scored known and unknown parts |
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
641
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
26
|
|
|
7
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
34
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.0805'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
388
|
use Bit::Vector; |
|
|
1
|
|
|
|
|
827
|
|
|
|
1
|
|
|
|
|
37
|
|
|
12
|
1
|
|
|
1
|
|
1253
|
use DBI; |
|
|
1
|
|
|
|
|
15260
|
|
|
|
1
|
|
|
|
|
66
|
|
|
13
|
1
|
|
|
1
|
|
526
|
use List::PowerSet qw( powerset_lazy ); |
|
|
1
|
|
|
|
|
415
|
|
|
|
1
|
|
|
|
|
53
|
|
|
14
|
1
|
|
|
1
|
|
409
|
use IO::File; |
|
|
1
|
|
|
|
|
7701
|
|
|
|
1
|
|
|
|
|
109
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
1
|
|
|
1
|
|
651
|
use Memoize; |
|
|
1
|
|
|
|
|
2167
|
|
|
|
1
|
|
|
|
|
1999
|
|
|
17
|
|
|
|
|
|
|
memoize('_does_not_overlap'); |
|
18
|
|
|
|
|
|
|
memoize('power'); |
|
19
|
|
|
|
|
|
|
memoize('_reconstruct'); |
|
20
|
|
|
|
|
|
|
memoize('_grouping'); |
|
21
|
|
|
|
|
|
|
memoize('score'); |
|
22
|
|
|
|
|
|
|
memoize('score_parts'); |
|
23
|
|
|
|
|
|
|
memoize('_rle'); |
|
24
|
|
|
|
|
|
|
memoize('_or_together'); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub new { |
|
29
|
2
|
|
|
2
|
1
|
1012
|
my $class = shift; |
|
30
|
2
|
|
|
|
|
6
|
my %args = @_; |
|
31
|
|
|
|
|
|
|
my $self = { |
|
32
|
|
|
|
|
|
|
file => $args{file}, |
|
33
|
|
|
|
|
|
|
dbhost => $args{dbhost} || 'localhost', |
|
34
|
|
|
|
|
|
|
dbtype => $args{dbtype} || 'mysql', |
|
35
|
|
|
|
|
|
|
dbname => $args{dbname}, |
|
36
|
|
|
|
|
|
|
dbuser => $args{dbuser}, |
|
37
|
|
|
|
|
|
|
dbpass => $args{dbpass}, |
|
38
|
|
|
|
|
|
|
lex => $args{lex}, |
|
39
|
|
|
|
|
|
|
word => $args{word}, |
|
40
|
2
|
|
50
|
|
|
21
|
known => {}, |
|
|
|
|
50
|
|
|
|
|
|
41
|
|
|
|
|
|
|
masks => {}, |
|
42
|
|
|
|
|
|
|
combos => [], |
|
43
|
|
|
|
|
|
|
score => {}, |
|
44
|
|
|
|
|
|
|
}; |
|
45
|
2
|
|
|
|
|
4
|
bless $self, $class; |
|
46
|
2
|
|
|
|
|
7
|
$self->_init(%args); |
|
47
|
2
|
|
|
|
|
11
|
return $self; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
sub _init { |
|
50
|
2
|
|
|
2
|
|
18
|
my ($self, %args) = @_; |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Set the length of our word. |
|
53
|
2
|
|
|
|
|
7
|
$self->{wlen} = length $self->{word}; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Set lex if given data. |
|
56
|
2
|
100
|
66
|
|
|
46
|
if ( $self->{file} && -e $self->{file} ) { |
|
|
|
50
|
|
|
|
|
|
|
57
|
1
|
|
|
|
|
6
|
$self->_fetch_lex; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
elsif( $self->{dbname} ) |
|
60
|
|
|
|
|
|
|
{ |
|
61
|
0
|
|
|
|
|
0
|
$self->_db_fetch; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _fetch_lex { |
|
66
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Open the given file for reading... |
|
69
|
1
|
|
|
|
|
9
|
my $fh = IO::File->new(); |
|
70
|
1
|
50
|
|
|
|
48
|
$fh->open( "< $self->{file}" ) or die "Can't read file: '$self->{file}'"; |
|
71
|
1
|
|
|
|
|
95
|
for ( <$fh> ) { |
|
72
|
|
|
|
|
|
|
# Split space-separated entries. |
|
73
|
9
|
|
|
|
|
15
|
chomp; |
|
74
|
9
|
|
|
|
|
37
|
my ($re, $defn) = split /\s+/, $_, 2; |
|
75
|
|
|
|
|
|
|
# Add the entry to the lexicon. |
|
76
|
9
|
|
|
|
|
103
|
$self->{lex}{$re} = { defn => $defn, re => qr/$re/ }; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
1
|
|
|
|
|
19
|
$fh->close; |
|
79
|
|
|
|
|
|
|
|
|
80
|
1
|
|
|
|
|
21
|
return $self->{lex}; |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub _db_fetch { |
|
84
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
85
|
|
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
0
|
my $dsn = "DBI:$self->{dbtype}:$self->{dbname};$self->{dbhost}"; |
|
87
|
|
|
|
|
|
|
|
|
88
|
0
|
0
|
|
|
|
0
|
my $dbh = DBI->connect( $dsn, $self->{dbuser}, $self->{dbpass}, { RaiseError => 1, AutoCommit => 1 } ) |
|
89
|
|
|
|
|
|
|
or die "Unable to connect to $self->{dbname}: $DBI::errstr\n"; |
|
90
|
|
|
|
|
|
|
|
|
91
|
0
|
|
|
|
|
0
|
my $sql = 'SELECT affix, definition FROM fragment'; |
|
92
|
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
0
|
my $sth = $dbh->prepare($sql); |
|
94
|
0
|
0
|
|
|
|
0
|
$sth->execute or die "Unable to execute '$sql': $DBI::errstr\n"; |
|
95
|
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
0
|
while( my @row = $sth->fetchrow_array ) { |
|
97
|
0
|
|
|
|
|
0
|
my $part = $row[0]; |
|
98
|
0
|
|
|
|
|
0
|
$self->{lex}{$part} = { re => qr/$part/, defn => $row[1] }; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
0
|
0
|
|
|
|
0
|
die "Fetch terminated early: $DBI::errstr\n" if $DBI::errstr; |
|
101
|
|
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
0
|
$sth->finish or die "Unable to finish '$sql': $DBI::errstr\n"; |
|
103
|
|
|
|
|
|
|
|
|
104
|
0
|
0
|
|
|
|
0
|
$dbh->disconnect or die "Unable to disconnect from $self->{dbname}: $DBI::errstr\n"; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub knowns { |
|
109
|
1
|
|
|
1
|
1
|
560
|
my $self = shift; |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# The identifier for the known and masks lists. |
|
112
|
1
|
|
|
|
|
2
|
my $id = 0; |
|
113
|
|
|
|
|
|
|
|
|
114
|
1
|
|
|
|
|
2
|
for my $i (values %{ $self->{lex} }) { |
|
|
1
|
|
|
|
|
3
|
|
|
115
|
9
|
|
|
|
|
37
|
while ($self->{word} =~ /$i->{re}/g) { |
|
116
|
|
|
|
|
|
|
# Match positions. |
|
117
|
10
|
|
|
|
|
27
|
my ($m, $n) = ($-[0], $+[0]); |
|
118
|
|
|
|
|
|
|
# Get matched word-part. |
|
119
|
10
|
|
|
|
|
30
|
my $part = substr $self->{word}, $m, $n - $m; |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Create the part-of-word bitmask. |
|
122
|
10
|
|
|
|
|
18
|
my $mask = 0 x $m; # Before known |
|
123
|
10
|
|
50
|
|
|
20
|
$mask .= 1 x (($n - $m) || 1); # Known part |
|
124
|
10
|
|
|
|
|
17
|
$mask .= 0 x ($self->{wlen} - $n); # After known |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# Output our progress. |
|
127
|
|
|
|
|
|
|
# warn sprintf "%s %s - %s, %s (%d %d), %s\n", |
|
128
|
|
|
|
|
|
|
# $mask, |
|
129
|
|
|
|
|
|
|
# $i->{re}, |
|
130
|
|
|
|
|
|
|
# substr($self->{word}, 0, $m), |
|
131
|
|
|
|
|
|
|
# $part, |
|
132
|
|
|
|
|
|
|
# $m, |
|
133
|
|
|
|
|
|
|
# $n - 1, |
|
134
|
|
|
|
|
|
|
# substr($self->{word}, $n), |
|
135
|
|
|
|
|
|
|
# ; |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# Save the known as a member of a list keyed by starting position. |
|
138
|
|
|
|
|
|
|
$self->{known}{$id} = { |
|
139
|
|
|
|
|
|
|
part => $part, |
|
140
|
|
|
|
|
|
|
span => [$m, $n - 1], |
|
141
|
|
|
|
|
|
|
defn => $i->{defn}, |
|
142
|
10
|
|
|
|
|
44
|
mask => $mask, |
|
143
|
|
|
|
|
|
|
}; |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Save the relationship between mask and id. |
|
146
|
10
|
|
|
|
|
45
|
$self->{masks}{$mask} = $id++; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
1
|
|
|
|
|
4
|
return $self->{known}; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub power { |
|
155
|
|
|
|
|
|
|
my $self = shift; |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Get a new powerset generator. |
|
158
|
|
|
|
|
|
|
my $power = powerset_lazy(sort keys %{ $self->{masks} }); |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Consider each member of the powerset.. to save or skip? |
|
161
|
|
|
|
|
|
|
while (my $collection = $power->()) { |
|
162
|
|
|
|
|
|
|
# warn "C: @$collection\n"; |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Save this collection if it has only one item. |
|
165
|
|
|
|
|
|
|
if (1 == @$collection) { |
|
166
|
|
|
|
|
|
|
# warn "\t\tE: only 1 mask\n"; |
|
167
|
|
|
|
|
|
|
push @{ $self->{combos} }, $collection; |
|
168
|
|
|
|
|
|
|
next; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Compare each mask against the others. |
|
172
|
|
|
|
|
|
|
LOOP: for my $i (0 .. @$collection - 1) { |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Set the comparison mask. |
|
175
|
|
|
|
|
|
|
my $compare = $collection->[$i]; |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
for my $j ($i + 1 .. @$collection - 1) { |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# Set the current mask. |
|
180
|
|
|
|
|
|
|
my $mask = $collection->[$j]; |
|
181
|
|
|
|
|
|
|
# warn "\tP:$compare v $mask\n"; |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# Skip this collection if an overlap is found. |
|
184
|
|
|
|
|
|
|
if (not $self->_does_not_overlap($compare, $mask)) { |
|
185
|
|
|
|
|
|
|
# warn "\t\tO:$compare v $mask\n"; |
|
186
|
|
|
|
|
|
|
last LOOP; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Save this collection if we made it to the last pair. |
|
190
|
|
|
|
|
|
|
if ($i == @$collection - 2 && $j == @$collection - 1) { |
|
191
|
|
|
|
|
|
|
# warn "\t\tE:$compare v $mask\n"; |
|
192
|
|
|
|
|
|
|
push @{ $self->{combos} }, $collection; |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# Hand back the "non-overlapping powerset." |
|
199
|
|
|
|
|
|
|
return $self->{combos}; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub score { |
|
204
|
|
|
|
|
|
|
my $self = shift; |
|
205
|
|
|
|
|
|
|
my ( $open_separator, $close_separator ) = @_; |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
my $parts = $self->score_parts( $open_separator, $close_separator ); |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
for my $mask ( keys %$parts ) { |
|
210
|
|
|
|
|
|
|
my $familiarity = sprintf "%.2f chunks / %.2f chars", @{ $self->_familiarity($mask) }; |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
for my $element ( @{ $parts->{$mask} } ) { |
|
213
|
|
|
|
|
|
|
my $score = sprintf "%d:%d chunks / %d:%d chars", |
|
214
|
|
|
|
|
|
|
$element->{score}{knowns}, $element->{score}{unknowns}, |
|
215
|
|
|
|
|
|
|
$element->{score}{knownc}, $element->{score}{unknownc}; |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
my $part = join ', ', @{ $element->{partition} }; |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
my $defn = join ', ', @{ $element->{definition} }; |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
push @{ $self->{score}{$mask} }, { |
|
222
|
|
|
|
|
|
|
score => $score, |
|
223
|
|
|
|
|
|
|
familiarity => $familiarity, |
|
224
|
|
|
|
|
|
|
partition => $part, |
|
225
|
|
|
|
|
|
|
definition => $defn, |
|
226
|
|
|
|
|
|
|
}; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
return $self->{score}; |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub _familiarity { |
|
234
|
609
|
|
|
609
|
|
881
|
my ( $self, $mask ) = @_; |
|
235
|
|
|
|
|
|
|
|
|
236
|
609
|
|
|
|
|
2589
|
my @chunks = grep { $_ ne "" } split /(0+)/, $mask; |
|
|
2826
|
|
|
|
|
4892
|
|
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# Figure out how many chars are only 1s and |
|
239
|
|
|
|
|
|
|
# Figure out how many chunks are made up of 1s: |
|
240
|
609
|
|
|
|
|
973
|
my $char_1s = 0; |
|
241
|
609
|
|
|
|
|
688
|
my $chunk_1s = 0; |
|
242
|
609
|
|
|
|
|
787
|
for my $chunk (@chunks) { |
|
243
|
2577
|
100
|
|
|
|
3916
|
$char_1s += $chunk =~ /0/ ? 0 : length($chunk); |
|
244
|
2577
|
100
|
|
|
|
4026
|
$chunk_1s += $chunk =~ /0/ ? 0 : 1; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
609
|
|
|
|
|
3383
|
return [ $chunk_1s / @chunks, $char_1s / length($mask) ]; |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub score_parts { |
|
252
|
|
|
|
|
|
|
my $self = shift; |
|
253
|
|
|
|
|
|
|
my ( $open_separator, $close_separator, $line_terminator ) = @_; |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
$line_terminator = '' unless defined $line_terminator; |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Visit each combination... |
|
258
|
|
|
|
|
|
|
my $i = 0; |
|
259
|
|
|
|
|
|
|
for my $c (@{ $self->{combos} }) { |
|
260
|
|
|
|
|
|
|
$i++; |
|
261
|
|
|
|
|
|
|
my $together = $self->_or_together(@$c); |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Breakdown knowns vs unknowns and knowncharacters vs unknowncharacters. |
|
264
|
|
|
|
|
|
|
my %count = ( |
|
265
|
|
|
|
|
|
|
knowns => 0, |
|
266
|
|
|
|
|
|
|
unknowns => 0, |
|
267
|
|
|
|
|
|
|
knownc => 0, |
|
268
|
|
|
|
|
|
|
unknownc => 0, |
|
269
|
|
|
|
|
|
|
); |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
for my $x ( reverse sort @$c ) { |
|
272
|
|
|
|
|
|
|
# Run-length encode an "un-digitized" string. |
|
273
|
|
|
|
|
|
|
my $y = _rle($x); |
|
274
|
|
|
|
|
|
|
my ( $knowns, $unknowns, $knownc, $unknownc ) = _grouping($y); |
|
275
|
|
|
|
|
|
|
# Accumulate the counters! |
|
276
|
|
|
|
|
|
|
$count{knowns} += $knowns; |
|
277
|
|
|
|
|
|
|
$count{unknowns} += $unknowns; |
|
278
|
|
|
|
|
|
|
$count{knownc} += $knownc; |
|
279
|
|
|
|
|
|
|
$count{unknownc} += $unknownc; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my ( $s, $m ) = _reconstruct( $self->{word}, $c, $open_separator, $close_separator ); |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
my $defn = []; |
|
285
|
|
|
|
|
|
|
for my $i ( @$m ) |
|
286
|
|
|
|
|
|
|
{ |
|
287
|
|
|
|
|
|
|
for my $j ( keys %{ $self->{known} } ) |
|
288
|
|
|
|
|
|
|
{ |
|
289
|
|
|
|
|
|
|
push @$defn, $self->{known}{$j}{defn} if $self->{known}{$j}{mask} eq $i; |
|
290
|
|
|
|
|
|
|
} |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
push @{ $self->{score_parts}{$together} }, { |
|
294
|
|
|
|
|
|
|
score => \%count, |
|
295
|
|
|
|
|
|
|
partition => $s, |
|
296
|
|
|
|
|
|
|
definition => $defn, |
|
297
|
|
|
|
|
|
|
familiarity => $self->_familiarity($together), |
|
298
|
|
|
|
|
|
|
}; |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
return $self->{score_parts}; |
|
302
|
|
|
|
|
|
|
} |
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub _grouping { |
|
305
|
|
|
|
|
|
|
my $scored = shift; |
|
306
|
|
|
|
|
|
|
my @groups = $scored =~ /([ku]\d+)/g; |
|
307
|
|
|
|
|
|
|
my ( $knowns, $unknowns ) = ( 0, 0 ); |
|
308
|
|
|
|
|
|
|
my ( $knownc, $unknownc ) = ( 0, 0 ); |
|
309
|
|
|
|
|
|
|
for ( @groups ) { |
|
310
|
|
|
|
|
|
|
if ( /k(\d+)/ ) { |
|
311
|
|
|
|
|
|
|
$knowns++; |
|
312
|
|
|
|
|
|
|
$knownc += $1; |
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
if ( /u(\d+)/ ) { |
|
315
|
|
|
|
|
|
|
$unknowns++; |
|
316
|
|
|
|
|
|
|
$unknownc += $1; |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
return $knowns, $unknowns, $knownc, $unknownc; |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub _rle { |
|
323
|
|
|
|
|
|
|
my $scored = shift; |
|
324
|
|
|
|
|
|
|
# Run-length encode an "un-digitized" string. |
|
325
|
|
|
|
|
|
|
$scored =~ s/1/k/g; # Undigitize |
|
326
|
|
|
|
|
|
|
$scored =~ s/0/u/g; # " |
|
327
|
|
|
|
|
|
|
# Count contiguous chars. |
|
328
|
|
|
|
|
|
|
$scored =~ s/(.)\1*/$1 . length(substr($scored, $-[0], $+[0]-$-[0]))/ge; |
|
329
|
|
|
|
|
|
|
return $scored; |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
sub _does_not_overlap { |
|
333
|
|
|
|
|
|
|
my $self = shift; |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# Get our masks to check. |
|
336
|
|
|
|
|
|
|
my ($mask, $check) = @_; |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# Create the bitstrings to compare. |
|
339
|
|
|
|
|
|
|
my $bitmask = Bit::Vector->new_Bin($self->{wlen}, $mask); |
|
340
|
|
|
|
|
|
|
my $orclone = Bit::Vector->new_Bin($self->{wlen}, $check); |
|
341
|
|
|
|
|
|
|
my $xorclone = Bit::Vector->new_Bin($self->{wlen}, $check); |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# Compute or and xor for the strings. |
|
344
|
|
|
|
|
|
|
$orclone->Or($bitmask, $orclone); |
|
345
|
|
|
|
|
|
|
$xorclone->Xor($bitmask, $xorclone); |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# Return the "or & xor equivalent sibling." |
|
348
|
|
|
|
|
|
|
return $xorclone->equal($orclone) ? $orclone->to_Bin : 0; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub _or_together { |
|
352
|
|
|
|
|
|
|
my $self = shift; |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# Get our masks to score. |
|
355
|
|
|
|
|
|
|
my @masks = @_; |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# Initialize the bitmask to return, to zero. |
|
358
|
|
|
|
|
|
|
my $result = Bit::Vector->new_Bin($self->{wlen}, (0 x $self->{wlen})); |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
for my $mask (@masks) { |
|
361
|
|
|
|
|
|
|
# Create the bitstrings to compare. |
|
362
|
|
|
|
|
|
|
my $bitmask = Bit::Vector->new_Bin($self->{wlen}, $mask); |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
# Get the union of the bit strings. |
|
365
|
|
|
|
|
|
|
$result->Or($result, $bitmask); |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# Return the "or sum." |
|
369
|
|
|
|
|
|
|
return $result->to_Bin; |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
sub _reconstruct { |
|
373
|
|
|
|
|
|
|
my ( $word, $masks, $open_separator, $close_separator ) = @_; |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
$open_separator = '<' unless defined $open_separator; |
|
376
|
|
|
|
|
|
|
$close_separator = '>' unless defined $close_separator; |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
my $strings = []; |
|
379
|
|
|
|
|
|
|
my $my_masks = []; |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
for my $mask (reverse sort @$masks) { |
|
382
|
|
|
|
|
|
|
my $i = 0; |
|
383
|
|
|
|
|
|
|
my $last = 0; |
|
384
|
|
|
|
|
|
|
my $string = ''; |
|
385
|
|
|
|
|
|
|
for my $m ( split //, $mask ) { |
|
386
|
|
|
|
|
|
|
if ( $m ) { |
|
387
|
|
|
|
|
|
|
$string .= $open_separator unless $last; |
|
388
|
|
|
|
|
|
|
$string .= substr( $word, $i, 1 ); |
|
389
|
|
|
|
|
|
|
$last = 1; |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
else { |
|
392
|
|
|
|
|
|
|
$string .= $close_separator if $last; |
|
393
|
|
|
|
|
|
|
$string .= substr( $word, $i, 1 ); |
|
394
|
|
|
|
|
|
|
$last = 0; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
$i++; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
$string .= $close_separator if $last; |
|
399
|
|
|
|
|
|
|
push @$strings, $string; |
|
400
|
|
|
|
|
|
|
push @$my_masks, $mask; |
|
401
|
|
|
|
|
|
|
} |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
return $strings, $my_masks; |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
1; |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
__END__ |