line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::Word::Parser; |
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:GENE'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# ABSTRACT: Parse a word into scored known and unknown parts |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
898
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
7
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
50
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.0806'; |
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
498
|
use Bit::Vector; |
|
1
|
|
|
|
|
1085
|
|
|
1
|
|
|
|
|
63
|
|
12
|
1
|
|
|
1
|
|
1672
|
use DBI; |
|
1
|
|
|
|
|
18943
|
|
|
1
|
|
|
|
|
106
|
|
13
|
1
|
|
|
1
|
|
751
|
use List::PowerSet qw( powerset_lazy ); |
|
1
|
|
|
|
|
540
|
|
|
1
|
|
|
|
|
68
|
|
14
|
1
|
|
|
1
|
|
535
|
use IO::File; |
|
1
|
|
|
|
|
9663
|
|
|
1
|
|
|
|
|
138
|
|
15
|
|
|
|
|
|
|
|
16
|
1
|
|
|
1
|
|
878
|
use Memoize; |
|
1
|
|
|
|
|
2617
|
|
|
1
|
|
|
|
|
2491
|
|
17
|
|
|
|
|
|
|
memoize('_does_not_overlap'); |
18
|
|
|
|
|
|
|
memoize('power'); |
19
|
|
|
|
|
|
|
memoize('_reconstruct'); |
20
|
|
|
|
|
|
|
memoize('_grouping'); |
21
|
|
|
|
|
|
|
memoize('score'); |
22
|
|
|
|
|
|
|
memoize('score_parts'); |
23
|
|
|
|
|
|
|
memoize('_rle'); |
24
|
|
|
|
|
|
|
memoize('_or_together'); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub new { |
29
|
2
|
|
|
2
|
1
|
1480
|
my $class = shift; |
30
|
2
|
|
|
|
|
7
|
my %args = @_; |
31
|
|
|
|
|
|
|
my $self = { |
32
|
|
|
|
|
|
|
file => $args{file}, |
33
|
|
|
|
|
|
|
dbhost => $args{dbhost} || 'localhost', |
34
|
|
|
|
|
|
|
dbtype => $args{dbtype} || 'mysql', |
35
|
|
|
|
|
|
|
dbname => $args{dbname}, |
36
|
|
|
|
|
|
|
dbuser => $args{dbuser}, |
37
|
|
|
|
|
|
|
dbpass => $args{dbpass}, |
38
|
|
|
|
|
|
|
lex => $args{lex}, |
39
|
|
|
|
|
|
|
word => $args{word}, |
40
|
2
|
|
50
|
|
|
34
|
known => {}, |
|
|
|
50
|
|
|
|
|
41
|
|
|
|
|
|
|
masks => {}, |
42
|
|
|
|
|
|
|
combos => [], |
43
|
|
|
|
|
|
|
score => {}, |
44
|
|
|
|
|
|
|
}; |
45
|
2
|
|
|
|
|
5
|
bless $self, $class; |
46
|
2
|
|
|
|
|
9
|
$self->_init(%args); |
47
|
2
|
|
|
|
|
7
|
return $self; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
sub _init { |
50
|
2
|
|
|
2
|
|
22
|
my ($self, %args) = @_; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Set the length of our word. |
53
|
2
|
|
|
|
|
8
|
$self->{wlen} = length $self->{word}; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Set lex if given data. |
56
|
2
|
100
|
66
|
|
|
55
|
if ( $self->{file} && -e $self->{file} ) { |
|
|
50
|
|
|
|
|
|
57
|
1
|
|
|
|
|
6
|
$self->_fetch_lex; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
elsif( $self->{dbname} ) |
60
|
|
|
|
|
|
|
{ |
61
|
0
|
|
|
|
|
0
|
$self->_db_fetch; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _fetch_lex { |
66
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
67
|
|
|
|
|
|
|
|
68
|
1
|
|
|
|
|
3
|
my $i = 0; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Open the given file for reading... |
71
|
1
|
|
|
|
|
11
|
my $fh = IO::File->new; |
72
|
1
|
50
|
|
|
|
72
|
$fh->open( "< $self->{file}" ) or die "Can't read file: '$self->{file}'"; |
73
|
1
|
|
|
|
|
116
|
for ( <$fh> ) { |
74
|
9
|
|
|
|
|
21
|
$i++; |
75
|
|
|
|
|
|
|
# Split space-separated entries. |
76
|
9
|
|
|
|
|
13
|
chomp; |
77
|
9
|
|
|
|
|
39
|
my ($re, $defn) = split /\s+/, $_, 2; |
78
|
|
|
|
|
|
|
# Add the entry to the lexicon. |
79
|
9
|
|
|
|
|
135
|
$self->{lex}{$i} = { defn => $defn, re => qr/$re/ }; |
80
|
|
|
|
|
|
|
} |
81
|
1
|
|
|
|
|
16
|
$fh->close; |
82
|
|
|
|
|
|
|
|
83
|
1
|
|
|
|
|
28
|
return $self->{lex}; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub _db_fetch { |
87
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
88
|
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
0
|
my $dsn = "DBI:$self->{dbtype}:$self->{dbname};$self->{dbhost}"; |
90
|
|
|
|
|
|
|
|
91
|
0
|
0
|
|
|
|
0
|
my $dbh = DBI->connect( $dsn, $self->{dbuser}, $self->{dbpass}, { RaiseError => 1, AutoCommit => 1 } ) |
92
|
|
|
|
|
|
|
or die "Unable to connect to $self->{dbname}: $DBI::errstr\n"; |
93
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
0
|
my $sql = 'SELECT id, affix, definition FROM fragment'; |
95
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
0
|
my $sth = $dbh->prepare($sql); |
97
|
0
|
0
|
|
|
|
0
|
$sth->execute or die "Unable to execute '$sql': $DBI::errstr\n"; |
98
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
0
|
while( my @row = $sth->fetchrow_array ) { |
100
|
0
|
|
|
|
|
0
|
my ($id, $part, $defn) = @row; |
101
|
0
|
|
|
|
|
0
|
$self->{lex}{$id} = { re => qr/$part/, defn => $defn }; |
102
|
|
|
|
|
|
|
} |
103
|
0
|
0
|
|
|
|
0
|
die "Fetch terminated early: $DBI::errstr\n" if $DBI::errstr; |
104
|
|
|
|
|
|
|
|
105
|
0
|
0
|
|
|
|
0
|
$sth->finish or die "Unable to finish '$sql': $DBI::errstr\n"; |
106
|
|
|
|
|
|
|
|
107
|
0
|
0
|
|
|
|
0
|
$dbh->disconnect or die "Unable to disconnect from $self->{dbname}: $DBI::errstr\n"; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub knowns { |
112
|
1
|
|
|
1
|
1
|
1113
|
my $self = shift; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# The identifier for the known and masks lists. |
115
|
1
|
|
|
|
|
3
|
my $id = 0; |
116
|
|
|
|
|
|
|
|
117
|
1
|
|
|
|
|
3
|
for my $i (values %{ $self->{lex} }) { |
|
1
|
|
|
|
|
6
|
|
118
|
9
|
|
|
|
|
46
|
while ($self->{word} =~ /$i->{re}/g) { |
119
|
|
|
|
|
|
|
# Match positions. |
120
|
10
|
|
|
|
|
34
|
my ($m, $n) = ($-[0], $+[0]); |
121
|
|
|
|
|
|
|
# Get matched word-part. |
122
|
10
|
|
|
|
|
27
|
my $part = substr $self->{word}, $m, $n - $m; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# Create the part-of-word bitmask. |
125
|
10
|
|
|
|
|
20
|
my $mask = 0 x $m; # Before known |
126
|
10
|
|
50
|
|
|
26
|
$mask .= 1 x (($n - $m) || 1); # Known part |
127
|
10
|
|
|
|
|
17
|
$mask .= 0 x ($self->{wlen} - $n); # After known |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Output our progress. |
130
|
|
|
|
|
|
|
# warn sprintf "%s %s - %s, %s (%d %d), %s\n", |
131
|
|
|
|
|
|
|
# $mask, |
132
|
|
|
|
|
|
|
# $i->{re}, |
133
|
|
|
|
|
|
|
# substr($self->{word}, 0, $m), |
134
|
|
|
|
|
|
|
# $part, |
135
|
|
|
|
|
|
|
# $m, |
136
|
|
|
|
|
|
|
# $n - 1, |
137
|
|
|
|
|
|
|
# substr($self->{word}, $n), |
138
|
|
|
|
|
|
|
# ; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Save the known as a member of a list keyed by starting position. |
141
|
|
|
|
|
|
|
$self->{known}{$id} = { |
142
|
|
|
|
|
|
|
part => $part, |
143
|
|
|
|
|
|
|
span => [$m, $n - 1], |
144
|
|
|
|
|
|
|
defn => $i->{defn}, |
145
|
10
|
|
|
|
|
56
|
mask => $mask, |
146
|
|
|
|
|
|
|
}; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Save the relationship between mask and id. |
149
|
10
|
|
|
|
|
57
|
$self->{masks}{$mask} = $id++; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
1
|
|
|
|
|
5
|
return $self->{known}; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub power { |
158
|
|
|
|
|
|
|
my $self = shift; |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Get a new powerset generator. |
161
|
|
|
|
|
|
|
my $power = powerset_lazy(sort keys %{ $self->{masks} }); |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# Consider each member of the powerset.. to save or skip? |
164
|
|
|
|
|
|
|
while (my $collection = $power->()) { |
165
|
|
|
|
|
|
|
# warn "C: @$collection\n"; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# Save this collection if it has only one item. |
168
|
|
|
|
|
|
|
if (1 == @$collection) { |
169
|
|
|
|
|
|
|
# warn "\t\tE: only 1 mask\n"; |
170
|
|
|
|
|
|
|
push @{ $self->{combos} }, $collection; |
171
|
|
|
|
|
|
|
next; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Compare each mask against the others. |
175
|
|
|
|
|
|
|
LOOP: for my $i (0 .. @$collection - 1) { |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# Set the comparison mask. |
178
|
|
|
|
|
|
|
my $compare = $collection->[$i]; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
for my $j ($i + 1 .. @$collection - 1) { |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
# Set the current mask. |
183
|
|
|
|
|
|
|
my $mask = $collection->[$j]; |
184
|
|
|
|
|
|
|
# warn "\tP:$compare v $mask\n"; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# Skip this collection if an overlap is found. |
187
|
|
|
|
|
|
|
if (not $self->_does_not_overlap($compare, $mask)) { |
188
|
|
|
|
|
|
|
# warn "\t\tO:$compare v $mask\n"; |
189
|
|
|
|
|
|
|
last LOOP; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# Save this collection if we made it to the last pair. |
193
|
|
|
|
|
|
|
if ($i == @$collection - 2 && $j == @$collection - 1) { |
194
|
|
|
|
|
|
|
# warn "\t\tE:$compare v $mask\n"; |
195
|
|
|
|
|
|
|
push @{ $self->{combos} }, $collection; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# Hand back the "non-overlapping powerset." |
202
|
|
|
|
|
|
|
return $self->{combos}; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub score { |
207
|
|
|
|
|
|
|
my $self = shift; |
208
|
|
|
|
|
|
|
my ( $open_separator, $close_separator ) = @_; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
my $parts = $self->score_parts( $open_separator, $close_separator ); |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
for my $mask ( keys %$parts ) { |
213
|
|
|
|
|
|
|
my $familiarity = sprintf "%.2f chunks / %.2f chars", @{ $self->_familiarity($mask) }; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
for my $element ( @{ $parts->{$mask} } ) { |
216
|
|
|
|
|
|
|
my $score = sprintf "%d:%d chunks / %d:%d chars", |
217
|
|
|
|
|
|
|
$element->{score}{knowns}, $element->{score}{unknowns}, |
218
|
|
|
|
|
|
|
$element->{score}{knownc}, $element->{score}{unknownc}; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
my $part = join ', ', @{ $element->{partition} }; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
my $defn = join ', ', @{ $element->{definition} }; |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
push @{ $self->{score}{$mask} }, { |
225
|
|
|
|
|
|
|
score => $score, |
226
|
|
|
|
|
|
|
familiarity => $familiarity, |
227
|
|
|
|
|
|
|
partition => $part, |
228
|
|
|
|
|
|
|
definition => $defn, |
229
|
|
|
|
|
|
|
}; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
return $self->{score}; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
sub _familiarity { |
237
|
609
|
|
|
609
|
|
1203
|
my ( $self, $mask ) = @_; |
238
|
|
|
|
|
|
|
|
239
|
609
|
|
|
|
|
3298
|
my @chunks = grep { $_ ne "" } split /(0+)/, $mask; |
|
2826
|
|
|
|
|
6580
|
|
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# Figure out how many chars are only 1s and |
242
|
|
|
|
|
|
|
# Figure out how many chunks are made up of 1s: |
243
|
609
|
|
|
|
|
1169
|
my $char_1s = 0; |
244
|
609
|
|
|
|
|
838
|
my $chunk_1s = 0; |
245
|
609
|
|
|
|
|
993
|
for my $chunk (@chunks) { |
246
|
2577
|
100
|
|
|
|
4846
|
$char_1s += $chunk =~ /0/ ? 0 : length($chunk); |
247
|
2577
|
100
|
|
|
|
4562
|
$chunk_1s += $chunk =~ /0/ ? 0 : 1; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
609
|
|
|
|
|
4427
|
return [ $chunk_1s / @chunks, $char_1s / length($mask) ]; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub score_parts { |
255
|
|
|
|
|
|
|
my $self = shift; |
256
|
|
|
|
|
|
|
my ( $open_separator, $close_separator, $line_terminator ) = @_; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
$line_terminator = '' unless defined $line_terminator; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# Visit each combination... |
261
|
|
|
|
|
|
|
my $i = 0; |
262
|
|
|
|
|
|
|
for my $c (@{ $self->{combos} }) { |
263
|
|
|
|
|
|
|
$i++; |
264
|
|
|
|
|
|
|
my $together = $self->_or_together(@$c); |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# Breakdown knowns vs unknowns and knowncharacters vs unknowncharacters. |
267
|
|
|
|
|
|
|
my %count = ( |
268
|
|
|
|
|
|
|
knowns => 0, |
269
|
|
|
|
|
|
|
unknowns => 0, |
270
|
|
|
|
|
|
|
knownc => 0, |
271
|
|
|
|
|
|
|
unknownc => 0, |
272
|
|
|
|
|
|
|
); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
for my $x ( reverse sort @$c ) { |
275
|
|
|
|
|
|
|
# Run-length encode an "un-digitized" string. |
276
|
|
|
|
|
|
|
my $y = _rle($x); |
277
|
|
|
|
|
|
|
my ( $knowns, $unknowns, $knownc, $unknownc ) = _grouping($y); |
278
|
|
|
|
|
|
|
# Accumulate the counters! |
279
|
|
|
|
|
|
|
$count{knowns} += $knowns; |
280
|
|
|
|
|
|
|
$count{unknowns} += $unknowns; |
281
|
|
|
|
|
|
|
$count{knownc} += $knownc; |
282
|
|
|
|
|
|
|
$count{unknownc} += $unknownc; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
my ( $s, $m ) = _reconstruct( $self->{word}, $c, $open_separator, $close_separator ); |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
my $defn = []; |
288
|
|
|
|
|
|
|
for my $i ( @$m ) |
289
|
|
|
|
|
|
|
{ |
290
|
|
|
|
|
|
|
for my $j ( keys %{ $self->{known} } ) |
291
|
|
|
|
|
|
|
{ |
292
|
|
|
|
|
|
|
push @$defn, $self->{known}{$j}{defn} if $self->{known}{$j}{mask} eq $i; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
push @{ $self->{score_parts}{$together} }, { |
297
|
|
|
|
|
|
|
score => \%count, |
298
|
|
|
|
|
|
|
partition => $s, |
299
|
|
|
|
|
|
|
definition => $defn, |
300
|
|
|
|
|
|
|
familiarity => $self->_familiarity($together), |
301
|
|
|
|
|
|
|
}; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
return $self->{score_parts}; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _grouping { |
308
|
|
|
|
|
|
|
my $scored = shift; |
309
|
|
|
|
|
|
|
my @groups = $scored =~ /([ku]\d+)/g; |
310
|
|
|
|
|
|
|
my ( $knowns, $unknowns ) = ( 0, 0 ); |
311
|
|
|
|
|
|
|
my ( $knownc, $unknownc ) = ( 0, 0 ); |
312
|
|
|
|
|
|
|
for ( @groups ) { |
313
|
|
|
|
|
|
|
if ( /k(\d+)/ ) { |
314
|
|
|
|
|
|
|
$knowns++; |
315
|
|
|
|
|
|
|
$knownc += $1; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
if ( /u(\d+)/ ) { |
318
|
|
|
|
|
|
|
$unknowns++; |
319
|
|
|
|
|
|
|
$unknownc += $1; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
return $knowns, $unknowns, $knownc, $unknownc; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub _rle { |
326
|
|
|
|
|
|
|
my $scored = shift; |
327
|
|
|
|
|
|
|
# Run-length encode an "un-digitized" string. |
328
|
|
|
|
|
|
|
$scored =~ s/1/k/g; # Undigitize |
329
|
|
|
|
|
|
|
$scored =~ s/0/u/g; # " |
330
|
|
|
|
|
|
|
# Count contiguous chars. |
331
|
|
|
|
|
|
|
$scored =~ s/(.)\1*/$1 . length(substr($scored, $-[0], $+[0]-$-[0]))/ge; |
332
|
|
|
|
|
|
|
return $scored; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
sub _does_not_overlap { |
336
|
|
|
|
|
|
|
my $self = shift; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# Get our masks to check. |
339
|
|
|
|
|
|
|
my ($mask, $check) = @_; |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# Create the bitstrings to compare. |
342
|
|
|
|
|
|
|
my $bitmask = Bit::Vector->new_Bin($self->{wlen}, $mask); |
343
|
|
|
|
|
|
|
my $orclone = Bit::Vector->new_Bin($self->{wlen}, $check); |
344
|
|
|
|
|
|
|
my $xorclone = Bit::Vector->new_Bin($self->{wlen}, $check); |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# Compute or and xor for the strings. |
347
|
|
|
|
|
|
|
$orclone->Or($bitmask, $orclone); |
348
|
|
|
|
|
|
|
$xorclone->Xor($bitmask, $xorclone); |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
# Return the "or & xor equivalent sibling." |
351
|
|
|
|
|
|
|
return $xorclone->equal($orclone) ? $orclone->to_Bin : 0; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub _or_together { |
355
|
|
|
|
|
|
|
my $self = shift; |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# Get our masks to score. |
358
|
|
|
|
|
|
|
my @masks = @_; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# Initialize the bitmask to return, to zero. |
361
|
|
|
|
|
|
|
my $result = Bit::Vector->new_Bin($self->{wlen}, (0 x $self->{wlen})); |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
for my $mask (@masks) { |
364
|
|
|
|
|
|
|
# Create the bitstrings to compare. |
365
|
|
|
|
|
|
|
my $bitmask = Bit::Vector->new_Bin($self->{wlen}, $mask); |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# Get the union of the bit strings. |
368
|
|
|
|
|
|
|
$result->Or($result, $bitmask); |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# Return the "or sum." |
372
|
|
|
|
|
|
|
return $result->to_Bin; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub _reconstruct { |
376
|
|
|
|
|
|
|
my ( $word, $masks, $open_separator, $close_separator ) = @_; |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
$open_separator = '<' unless defined $open_separator; |
379
|
|
|
|
|
|
|
$close_separator = '>' unless defined $close_separator; |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
my $strings = []; |
382
|
|
|
|
|
|
|
my $my_masks = []; |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
for my $mask (reverse sort @$masks) { |
385
|
|
|
|
|
|
|
my $i = 0; |
386
|
|
|
|
|
|
|
my $last = 0; |
387
|
|
|
|
|
|
|
my $string = ''; |
388
|
|
|
|
|
|
|
for my $m ( split //, $mask ) { |
389
|
|
|
|
|
|
|
if ( $m ) { |
390
|
|
|
|
|
|
|
$string .= $open_separator unless $last; |
391
|
|
|
|
|
|
|
$string .= substr( $word, $i, 1 ); |
392
|
|
|
|
|
|
|
$last = 1; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
else { |
395
|
|
|
|
|
|
|
$string .= $close_separator if $last; |
396
|
|
|
|
|
|
|
$string .= substr( $word, $i, 1 ); |
397
|
|
|
|
|
|
|
$last = 0; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
$i++; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
$string .= $close_separator if $last; |
402
|
|
|
|
|
|
|
push @$strings, $string; |
403
|
|
|
|
|
|
|
push @$my_masks, $mask; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
return $strings, $my_masks; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
1; |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
__END__ |