line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::Word::Parser; |
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:GENE'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# ABSTRACT: Parse a word into scored known and unknown parts |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
641
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
26
|
|
7
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.0805'; |
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
388
|
use Bit::Vector; |
|
1
|
|
|
|
|
827
|
|
|
1
|
|
|
|
|
37
|
|
12
|
1
|
|
|
1
|
|
1253
|
use DBI; |
|
1
|
|
|
|
|
15260
|
|
|
1
|
|
|
|
|
66
|
|
13
|
1
|
|
|
1
|
|
526
|
use List::PowerSet qw( powerset_lazy ); |
|
1
|
|
|
|
|
415
|
|
|
1
|
|
|
|
|
53
|
|
14
|
1
|
|
|
1
|
|
409
|
use IO::File; |
|
1
|
|
|
|
|
7701
|
|
|
1
|
|
|
|
|
109
|
|
15
|
|
|
|
|
|
|
|
16
|
1
|
|
|
1
|
|
651
|
use Memoize; |
|
1
|
|
|
|
|
2167
|
|
|
1
|
|
|
|
|
1999
|
|
17
|
|
|
|
|
|
|
memoize('_does_not_overlap'); |
18
|
|
|
|
|
|
|
memoize('power'); |
19
|
|
|
|
|
|
|
memoize('_reconstruct'); |
20
|
|
|
|
|
|
|
memoize('_grouping'); |
21
|
|
|
|
|
|
|
memoize('score'); |
22
|
|
|
|
|
|
|
memoize('score_parts'); |
23
|
|
|
|
|
|
|
memoize('_rle'); |
24
|
|
|
|
|
|
|
memoize('_or_together'); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub new { |
29
|
2
|
|
|
2
|
1
|
1012
|
my $class = shift; |
30
|
2
|
|
|
|
|
6
|
my %args = @_; |
31
|
|
|
|
|
|
|
my $self = { |
32
|
|
|
|
|
|
|
file => $args{file}, |
33
|
|
|
|
|
|
|
dbhost => $args{dbhost} || 'localhost', |
34
|
|
|
|
|
|
|
dbtype => $args{dbtype} || 'mysql', |
35
|
|
|
|
|
|
|
dbname => $args{dbname}, |
36
|
|
|
|
|
|
|
dbuser => $args{dbuser}, |
37
|
|
|
|
|
|
|
dbpass => $args{dbpass}, |
38
|
|
|
|
|
|
|
lex => $args{lex}, |
39
|
|
|
|
|
|
|
word => $args{word}, |
40
|
2
|
|
50
|
|
|
21
|
known => {}, |
|
|
|
50
|
|
|
|
|
41
|
|
|
|
|
|
|
masks => {}, |
42
|
|
|
|
|
|
|
combos => [], |
43
|
|
|
|
|
|
|
score => {}, |
44
|
|
|
|
|
|
|
}; |
45
|
2
|
|
|
|
|
4
|
bless $self, $class; |
46
|
2
|
|
|
|
|
7
|
$self->_init(%args); |
47
|
2
|
|
|
|
|
11
|
return $self; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
sub _init { |
50
|
2
|
|
|
2
|
|
18
|
my ($self, %args) = @_; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Set the length of our word. |
53
|
2
|
|
|
|
|
7
|
$self->{wlen} = length $self->{word}; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Set lex if given data. |
56
|
2
|
100
|
66
|
|
|
46
|
if ( $self->{file} && -e $self->{file} ) { |
|
|
50
|
|
|
|
|
|
57
|
1
|
|
|
|
|
6
|
$self->_fetch_lex; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
elsif( $self->{dbname} ) |
60
|
|
|
|
|
|
|
{ |
61
|
0
|
|
|
|
|
0
|
$self->_db_fetch; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _fetch_lex { |
66
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Open the given file for reading... |
69
|
1
|
|
|
|
|
9
|
my $fh = IO::File->new(); |
70
|
1
|
50
|
|
|
|
48
|
$fh->open( "< $self->{file}" ) or die "Can't read file: '$self->{file}'"; |
71
|
1
|
|
|
|
|
95
|
for ( <$fh> ) { |
72
|
|
|
|
|
|
|
# Split space-separated entries. |
73
|
9
|
|
|
|
|
15
|
chomp; |
74
|
9
|
|
|
|
|
37
|
my ($re, $defn) = split /\s+/, $_, 2; |
75
|
|
|
|
|
|
|
# Add the entry to the lexicon. |
76
|
9
|
|
|
|
|
103
|
$self->{lex}{$re} = { defn => $defn, re => qr/$re/ }; |
77
|
|
|
|
|
|
|
} |
78
|
1
|
|
|
|
|
19
|
$fh->close; |
79
|
|
|
|
|
|
|
|
80
|
1
|
|
|
|
|
21
|
return $self->{lex}; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub _db_fetch { |
84
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
85
|
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
0
|
my $dsn = "DBI:$self->{dbtype}:$self->{dbname};$self->{dbhost}"; |
87
|
|
|
|
|
|
|
|
88
|
0
|
0
|
|
|
|
0
|
my $dbh = DBI->connect( $dsn, $self->{dbuser}, $self->{dbpass}, { RaiseError => 1, AutoCommit => 1 } ) |
89
|
|
|
|
|
|
|
or die "Unable to connect to $self->{dbname}: $DBI::errstr\n"; |
90
|
|
|
|
|
|
|
|
91
|
0
|
|
|
|
|
0
|
my $sql = 'SELECT affix, definition FROM fragment'; |
92
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
0
|
my $sth = $dbh->prepare($sql); |
94
|
0
|
0
|
|
|
|
0
|
$sth->execute or die "Unable to execute '$sql': $DBI::errstr\n"; |
95
|
|
|
|
|
|
|
|
96
|
0
|
|
|
|
|
0
|
while( my @row = $sth->fetchrow_array ) { |
97
|
0
|
|
|
|
|
0
|
my $part = $row[0]; |
98
|
0
|
|
|
|
|
0
|
$self->{lex}{$part} = { re => qr/$part/, defn => $row[1] }; |
99
|
|
|
|
|
|
|
} |
100
|
0
|
0
|
|
|
|
0
|
die "Fetch terminated early: $DBI::errstr\n" if $DBI::errstr; |
101
|
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
0
|
$sth->finish or die "Unable to finish '$sql': $DBI::errstr\n"; |
103
|
|
|
|
|
|
|
|
104
|
0
|
0
|
|
|
|
0
|
$dbh->disconnect or die "Unable to disconnect from $self->{dbname}: $DBI::errstr\n"; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub knowns { |
109
|
1
|
|
|
1
|
1
|
560
|
my $self = shift; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# The identifier for the known and masks lists. |
112
|
1
|
|
|
|
|
2
|
my $id = 0; |
113
|
|
|
|
|
|
|
|
114
|
1
|
|
|
|
|
2
|
for my $i (values %{ $self->{lex} }) { |
|
1
|
|
|
|
|
3
|
|
115
|
9
|
|
|
|
|
37
|
while ($self->{word} =~ /$i->{re}/g) { |
116
|
|
|
|
|
|
|
# Match positions. |
117
|
10
|
|
|
|
|
27
|
my ($m, $n) = ($-[0], $+[0]); |
118
|
|
|
|
|
|
|
# Get matched word-part. |
119
|
10
|
|
|
|
|
30
|
my $part = substr $self->{word}, $m, $n - $m; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Create the part-of-word bitmask. |
122
|
10
|
|
|
|
|
18
|
my $mask = 0 x $m; # Before known |
123
|
10
|
|
50
|
|
|
20
|
$mask .= 1 x (($n - $m) || 1); # Known part |
124
|
10
|
|
|
|
|
17
|
$mask .= 0 x ($self->{wlen} - $n); # After known |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# Output our progress. |
127
|
|
|
|
|
|
|
# warn sprintf "%s %s - %s, %s (%d %d), %s\n", |
128
|
|
|
|
|
|
|
# $mask, |
129
|
|
|
|
|
|
|
# $i->{re}, |
130
|
|
|
|
|
|
|
# substr($self->{word}, 0, $m), |
131
|
|
|
|
|
|
|
# $part, |
132
|
|
|
|
|
|
|
# $m, |
133
|
|
|
|
|
|
|
# $n - 1, |
134
|
|
|
|
|
|
|
# substr($self->{word}, $n), |
135
|
|
|
|
|
|
|
# ; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# Save the known as a member of a list keyed by starting position. |
138
|
|
|
|
|
|
|
$self->{known}{$id} = { |
139
|
|
|
|
|
|
|
part => $part, |
140
|
|
|
|
|
|
|
span => [$m, $n - 1], |
141
|
|
|
|
|
|
|
defn => $i->{defn}, |
142
|
10
|
|
|
|
|
44
|
mask => $mask, |
143
|
|
|
|
|
|
|
}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Save the relationship between mask and id. |
146
|
10
|
|
|
|
|
45
|
$self->{masks}{$mask} = $id++; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
1
|
|
|
|
|
4
|
return $self->{known}; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub power { |
155
|
|
|
|
|
|
|
my $self = shift; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Get a new powerset generator. |
158
|
|
|
|
|
|
|
my $power = powerset_lazy(sort keys %{ $self->{masks} }); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Consider each member of the powerset.. to save or skip? |
161
|
|
|
|
|
|
|
while (my $collection = $power->()) { |
162
|
|
|
|
|
|
|
# warn "C: @$collection\n"; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Save this collection if it has only one item. |
165
|
|
|
|
|
|
|
if (1 == @$collection) { |
166
|
|
|
|
|
|
|
# warn "\t\tE: only 1 mask\n"; |
167
|
|
|
|
|
|
|
push @{ $self->{combos} }, $collection; |
168
|
|
|
|
|
|
|
next; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Compare each mask against the others. |
172
|
|
|
|
|
|
|
LOOP: for my $i (0 .. @$collection - 1) { |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Set the comparison mask. |
175
|
|
|
|
|
|
|
my $compare = $collection->[$i]; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
for my $j ($i + 1 .. @$collection - 1) { |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# Set the current mask. |
180
|
|
|
|
|
|
|
my $mask = $collection->[$j]; |
181
|
|
|
|
|
|
|
# warn "\tP:$compare v $mask\n"; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# Skip this collection if an overlap is found. |
184
|
|
|
|
|
|
|
if (not $self->_does_not_overlap($compare, $mask)) { |
185
|
|
|
|
|
|
|
# warn "\t\tO:$compare v $mask\n"; |
186
|
|
|
|
|
|
|
last LOOP; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Save this collection if we made it to the last pair. |
190
|
|
|
|
|
|
|
if ($i == @$collection - 2 && $j == @$collection - 1) { |
191
|
|
|
|
|
|
|
# warn "\t\tE:$compare v $mask\n"; |
192
|
|
|
|
|
|
|
push @{ $self->{combos} }, $collection; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# Hand back the "non-overlapping powerset." |
199
|
|
|
|
|
|
|
return $self->{combos}; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub score { |
204
|
|
|
|
|
|
|
my $self = shift; |
205
|
|
|
|
|
|
|
my ( $open_separator, $close_separator ) = @_; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
my $parts = $self->score_parts( $open_separator, $close_separator ); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
for my $mask ( keys %$parts ) { |
210
|
|
|
|
|
|
|
my $familiarity = sprintf "%.2f chunks / %.2f chars", @{ $self->_familiarity($mask) }; |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
for my $element ( @{ $parts->{$mask} } ) { |
213
|
|
|
|
|
|
|
my $score = sprintf "%d:%d chunks / %d:%d chars", |
214
|
|
|
|
|
|
|
$element->{score}{knowns}, $element->{score}{unknowns}, |
215
|
|
|
|
|
|
|
$element->{score}{knownc}, $element->{score}{unknownc}; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
my $part = join ', ', @{ $element->{partition} }; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
my $defn = join ', ', @{ $element->{definition} }; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
push @{ $self->{score}{$mask} }, { |
222
|
|
|
|
|
|
|
score => $score, |
223
|
|
|
|
|
|
|
familiarity => $familiarity, |
224
|
|
|
|
|
|
|
partition => $part, |
225
|
|
|
|
|
|
|
definition => $defn, |
226
|
|
|
|
|
|
|
}; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
return $self->{score}; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub _familiarity { |
234
|
609
|
|
|
609
|
|
881
|
my ( $self, $mask ) = @_; |
235
|
|
|
|
|
|
|
|
236
|
609
|
|
|
|
|
2589
|
my @chunks = grep { $_ ne "" } split /(0+)/, $mask; |
|
2826
|
|
|
|
|
4892
|
|
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# Figure out how many chars are only 1s and |
239
|
|
|
|
|
|
|
# Figure out how many chunks are made up of 1s: |
240
|
609
|
|
|
|
|
973
|
my $char_1s = 0; |
241
|
609
|
|
|
|
|
688
|
my $chunk_1s = 0; |
242
|
609
|
|
|
|
|
787
|
for my $chunk (@chunks) { |
243
|
2577
|
100
|
|
|
|
3916
|
$char_1s += $chunk =~ /0/ ? 0 : length($chunk); |
244
|
2577
|
100
|
|
|
|
4026
|
$chunk_1s += $chunk =~ /0/ ? 0 : 1; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
609
|
|
|
|
|
3383
|
return [ $chunk_1s / @chunks, $char_1s / length($mask) ]; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub score_parts { |
252
|
|
|
|
|
|
|
my $self = shift; |
253
|
|
|
|
|
|
|
my ( $open_separator, $close_separator, $line_terminator ) = @_; |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
$line_terminator = '' unless defined $line_terminator; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Visit each combination... |
258
|
|
|
|
|
|
|
my $i = 0; |
259
|
|
|
|
|
|
|
for my $c (@{ $self->{combos} }) { |
260
|
|
|
|
|
|
|
$i++; |
261
|
|
|
|
|
|
|
my $together = $self->_or_together(@$c); |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Breakdown knowns vs unknowns and knowncharacters vs unknowncharacters. |
264
|
|
|
|
|
|
|
my %count = ( |
265
|
|
|
|
|
|
|
knowns => 0, |
266
|
|
|
|
|
|
|
unknowns => 0, |
267
|
|
|
|
|
|
|
knownc => 0, |
268
|
|
|
|
|
|
|
unknownc => 0, |
269
|
|
|
|
|
|
|
); |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
for my $x ( reverse sort @$c ) { |
272
|
|
|
|
|
|
|
# Run-length encode an "un-digitized" string. |
273
|
|
|
|
|
|
|
my $y = _rle($x); |
274
|
|
|
|
|
|
|
my ( $knowns, $unknowns, $knownc, $unknownc ) = _grouping($y); |
275
|
|
|
|
|
|
|
# Accumulate the counters! |
276
|
|
|
|
|
|
|
$count{knowns} += $knowns; |
277
|
|
|
|
|
|
|
$count{unknowns} += $unknowns; |
278
|
|
|
|
|
|
|
$count{knownc} += $knownc; |
279
|
|
|
|
|
|
|
$count{unknownc} += $unknownc; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my ( $s, $m ) = _reconstruct( $self->{word}, $c, $open_separator, $close_separator ); |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
my $defn = []; |
285
|
|
|
|
|
|
|
for my $i ( @$m ) |
286
|
|
|
|
|
|
|
{ |
287
|
|
|
|
|
|
|
for my $j ( keys %{ $self->{known} } ) |
288
|
|
|
|
|
|
|
{ |
289
|
|
|
|
|
|
|
push @$defn, $self->{known}{$j}{defn} if $self->{known}{$j}{mask} eq $i; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
push @{ $self->{score_parts}{$together} }, { |
294
|
|
|
|
|
|
|
score => \%count, |
295
|
|
|
|
|
|
|
partition => $s, |
296
|
|
|
|
|
|
|
definition => $defn, |
297
|
|
|
|
|
|
|
familiarity => $self->_familiarity($together), |
298
|
|
|
|
|
|
|
}; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
return $self->{score_parts}; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub _grouping { |
305
|
|
|
|
|
|
|
my $scored = shift; |
306
|
|
|
|
|
|
|
my @groups = $scored =~ /([ku]\d+)/g; |
307
|
|
|
|
|
|
|
my ( $knowns, $unknowns ) = ( 0, 0 ); |
308
|
|
|
|
|
|
|
my ( $knownc, $unknownc ) = ( 0, 0 ); |
309
|
|
|
|
|
|
|
for ( @groups ) { |
310
|
|
|
|
|
|
|
if ( /k(\d+)/ ) { |
311
|
|
|
|
|
|
|
$knowns++; |
312
|
|
|
|
|
|
|
$knownc += $1; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
if ( /u(\d+)/ ) { |
315
|
|
|
|
|
|
|
$unknowns++; |
316
|
|
|
|
|
|
|
$unknownc += $1; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
return $knowns, $unknowns, $knownc, $unknownc; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub _rle { |
323
|
|
|
|
|
|
|
my $scored = shift; |
324
|
|
|
|
|
|
|
# Run-length encode an "un-digitized" string. |
325
|
|
|
|
|
|
|
$scored =~ s/1/k/g; # Undigitize |
326
|
|
|
|
|
|
|
$scored =~ s/0/u/g; # " |
327
|
|
|
|
|
|
|
# Count contiguous chars. |
328
|
|
|
|
|
|
|
$scored =~ s/(.)\1*/$1 . length(substr($scored, $-[0], $+[0]-$-[0]))/ge; |
329
|
|
|
|
|
|
|
return $scored; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
sub _does_not_overlap { |
333
|
|
|
|
|
|
|
my $self = shift; |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# Get our masks to check. |
336
|
|
|
|
|
|
|
my ($mask, $check) = @_; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# Create the bitstrings to compare. |
339
|
|
|
|
|
|
|
my $bitmask = Bit::Vector->new_Bin($self->{wlen}, $mask); |
340
|
|
|
|
|
|
|
my $orclone = Bit::Vector->new_Bin($self->{wlen}, $check); |
341
|
|
|
|
|
|
|
my $xorclone = Bit::Vector->new_Bin($self->{wlen}, $check); |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# Compute or and xor for the strings. |
344
|
|
|
|
|
|
|
$orclone->Or($bitmask, $orclone); |
345
|
|
|
|
|
|
|
$xorclone->Xor($bitmask, $xorclone); |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
# Return the "or & xor equivalent sibling." |
348
|
|
|
|
|
|
|
return $xorclone->equal($orclone) ? $orclone->to_Bin : 0; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub _or_together { |
352
|
|
|
|
|
|
|
my $self = shift; |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# Get our masks to score. |
355
|
|
|
|
|
|
|
my @masks = @_; |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# Initialize the bitmask to return, to zero. |
358
|
|
|
|
|
|
|
my $result = Bit::Vector->new_Bin($self->{wlen}, (0 x $self->{wlen})); |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
for my $mask (@masks) { |
361
|
|
|
|
|
|
|
# Create the bitstrings to compare. |
362
|
|
|
|
|
|
|
my $bitmask = Bit::Vector->new_Bin($self->{wlen}, $mask); |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
# Get the union of the bit strings. |
365
|
|
|
|
|
|
|
$result->Or($result, $bitmask); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# Return the "or sum." |
369
|
|
|
|
|
|
|
return $result->to_Bin; |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
sub _reconstruct { |
373
|
|
|
|
|
|
|
my ( $word, $masks, $open_separator, $close_separator ) = @_; |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
$open_separator = '<' unless defined $open_separator; |
376
|
|
|
|
|
|
|
$close_separator = '>' unless defined $close_separator; |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
my $strings = []; |
379
|
|
|
|
|
|
|
my $my_masks = []; |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
for my $mask (reverse sort @$masks) { |
382
|
|
|
|
|
|
|
my $i = 0; |
383
|
|
|
|
|
|
|
my $last = 0; |
384
|
|
|
|
|
|
|
my $string = ''; |
385
|
|
|
|
|
|
|
for my $m ( split //, $mask ) { |
386
|
|
|
|
|
|
|
if ( $m ) { |
387
|
|
|
|
|
|
|
$string .= $open_separator unless $last; |
388
|
|
|
|
|
|
|
$string .= substr( $word, $i, 1 ); |
389
|
|
|
|
|
|
|
$last = 1; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
else { |
392
|
|
|
|
|
|
|
$string .= $close_separator if $last; |
393
|
|
|
|
|
|
|
$string .= substr( $word, $i, 1 ); |
394
|
|
|
|
|
|
|
$last = 0; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
$i++; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
$string .= $close_separator if $last; |
399
|
|
|
|
|
|
|
push @$strings, $string; |
400
|
|
|
|
|
|
|
push @$my_masks, $mask; |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
return $strings, $my_masks; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
1; |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
__END__ |