line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::TokenParse; |
2
|
|
|
|
|
|
|
$Lingua::TokenParse::VERSION = '0.1602'; |
3
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:GENE'; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
# ABSTRACT: DEPRECATED in favor of Lingua::Word::Parser |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
$VERSION = '0.1602'; |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
775
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
29
|
|
10
|
1
|
|
|
1
|
|
8
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
11
|
1
|
|
|
1
|
|
8
|
use Carp qw(croak); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
55
|
|
12
|
1
|
|
|
1
|
|
613
|
use Storable qw(retrieve store); |
|
1
|
|
|
|
|
3155
|
|
|
1
|
|
|
|
|
61
|
|
13
|
1
|
|
|
1
|
|
428
|
use Math::BaseCalc (); |
|
1
|
|
|
|
|
1505
|
|
|
1
|
|
|
|
|
2256
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub new { |
16
|
2
|
|
|
2
|
1
|
546
|
my $proto = shift; |
17
|
2
|
|
33
|
|
|
11
|
my $class = ref $proto || $proto; |
18
|
2
|
|
|
|
|
17
|
my $self = { |
19
|
|
|
|
|
|
|
verbose => 0, |
20
|
|
|
|
|
|
|
# The word to parse! |
21
|
|
|
|
|
|
|
word => undef, |
22
|
|
|
|
|
|
|
# We need to use this. |
23
|
|
|
|
|
|
|
word_length => 0, |
24
|
|
|
|
|
|
|
# Known tokens. |
25
|
|
|
|
|
|
|
lexicon => {}, |
26
|
|
|
|
|
|
|
# Local lexicon cache file name. |
27
|
|
|
|
|
|
|
lexicon_file => '', # ?: 'lexicon-' . time(), |
28
|
|
|
|
|
|
|
# All word parts. |
29
|
|
|
|
|
|
|
parts => [], |
30
|
|
|
|
|
|
|
# All possible parts combinations. |
31
|
|
|
|
|
|
|
combinations => [], |
32
|
|
|
|
|
|
|
# Scored list of the known parts combinations. |
33
|
|
|
|
|
|
|
knowns => {}, |
34
|
|
|
|
|
|
|
# Definitions of the known and unknown fragments in knowns. |
35
|
|
|
|
|
|
|
definitions => {}, |
36
|
|
|
|
|
|
|
# Fragment definition separator. |
37
|
|
|
|
|
|
|
separator => ' + ', |
38
|
|
|
|
|
|
|
# Known-but-not-defined definition output string. |
39
|
|
|
|
|
|
|
not_defined => '.', |
40
|
|
|
|
|
|
|
# Unknown definition output string. |
41
|
|
|
|
|
|
|
unknown => '?', |
42
|
|
|
|
|
|
|
# Known trimming regexp rules. |
43
|
|
|
|
|
|
|
constraints => [], |
44
|
|
|
|
|
|
|
@_, # slurp anything else and override defaults. |
45
|
|
|
|
|
|
|
}; |
46
|
2
|
|
|
|
|
4
|
bless $self, $class; |
47
|
2
|
|
|
|
|
6
|
$self->_init(); |
48
|
2
|
|
|
|
|
6
|
return $self; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub _init { |
52
|
2
|
|
|
2
|
|
3
|
my $self = shift; |
53
|
2
|
50
|
|
|
|
8
|
warn "Entering _init()\n" if $self->{verbose}; |
54
|
2
|
100
|
|
|
|
7
|
$self->word( $self->{word} ) if $self->{word}; |
55
|
|
|
|
|
|
|
# Retrieve our lexicon cache if a filename was set. |
56
|
2
|
|
|
|
|
5
|
$self->lexicon_cache; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub DESTROY { |
60
|
2
|
|
|
2
|
|
739
|
my $self = shift; |
61
|
|
|
|
|
|
|
# Cache our lexicon if a filename has been given. |
62
|
|
|
|
|
|
|
$self->lexicon_cache( $self->{lexicon_file} ) |
63
|
2
|
100
|
|
|
|
11
|
if $self->{lexicon_file}; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub verbose { |
67
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
68
|
0
|
0
|
|
|
|
0
|
$self->{verbose} = shift if @_; |
69
|
0
|
|
|
|
|
0
|
return $self->{verbose}; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub word { |
73
|
|
|
|
|
|
|
# WORD: This method is the only place where word_length is set. |
74
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
75
|
1
|
50
|
|
|
|
3
|
warn "Entering word()\n" if $self->{verbose}; |
76
|
1
|
50
|
|
|
|
4
|
if( @_ ) { |
77
|
1
|
|
|
|
|
1
|
$self->{word} = shift; |
78
|
1
|
|
|
|
|
3
|
$self->{word_length} = length $self->{word}; |
79
|
|
|
|
|
|
|
printf "\tword = %s\n\tlength = %d\n", |
80
|
|
|
|
|
|
|
$self->{word}, $self->{word_length} |
81
|
1
|
50
|
|
|
|
3
|
if $self->{verbose}; |
82
|
|
|
|
|
|
|
} |
83
|
1
|
|
|
|
|
1
|
return $self->{word}; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub lexicon { |
87
|
964
|
|
|
964
|
1
|
2075
|
my $self = shift; |
88
|
964
|
100
|
|
|
|
1585
|
if( @_ ) { |
89
|
2
|
0
|
33
|
|
|
17
|
$self->{lexicon} = @_ == 1 && ref $_[0] eq 'HASH' |
|
|
50
|
|
|
|
|
|
90
|
|
|
|
|
|
|
? shift |
91
|
|
|
|
|
|
|
: @_ % 2 == 0 |
92
|
|
|
|
|
|
|
? { @_ } |
93
|
|
|
|
|
|
|
: {}; |
94
|
|
|
|
|
|
|
} |
95
|
964
|
|
|
|
|
1641
|
return $self->{lexicon}; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub parts { |
99
|
2
|
|
|
2
|
1
|
360
|
my $self = shift; |
100
|
2
|
100
|
|
|
|
7
|
$self->{parts} = shift if @_; |
101
|
2
|
|
|
|
|
30
|
return $self->{parts}; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub combinations { |
105
|
195
|
|
|
195
|
1
|
288
|
my $self = shift; |
106
|
195
|
100
|
|
|
|
341
|
$self->{combinations} = shift if @_; |
107
|
195
|
|
|
|
|
493
|
return $self->{combinations}; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub knowns { |
111
|
3
|
|
|
3
|
1
|
622
|
my $self = shift; |
112
|
3
|
100
|
|
|
|
9
|
$self->{knowns} = shift if @_; |
113
|
3
|
|
|
|
|
19
|
return $self->{knowns}; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub definitions { |
117
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
118
|
2
|
100
|
|
|
|
7
|
$self->{definitions} = shift if @_; |
119
|
2
|
|
|
|
|
6
|
return $self->{definitions}; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub separator { |
123
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
124
|
0
|
0
|
|
|
|
0
|
$self->{separator} = shift if @_; |
125
|
0
|
|
|
|
|
0
|
return $self->{separator}; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub not_defined { |
129
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
130
|
0
|
0
|
|
|
|
0
|
$self->{not_defined} = shift if @_; |
131
|
0
|
|
|
|
|
0
|
return $self->{not_defined}; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub unknown { |
135
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
136
|
0
|
0
|
|
|
|
0
|
$self->{unknown} = shift if @_; |
137
|
0
|
|
|
|
|
0
|
return $self->{unknown}; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub constraints { |
141
|
46
|
|
|
46
|
1
|
418
|
my $self = shift; |
142
|
46
|
100
|
|
|
|
81
|
$self->{constraints} = shift if @_; |
143
|
46
|
|
|
|
|
82
|
return $self->{constraints}; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub parse { |
147
|
1
|
|
|
1
|
1
|
4
|
my $self = shift; |
148
|
1
|
50
|
|
|
|
4
|
warn "Entering parse()\n" if $self->{verbose}; |
149
|
1
|
50
|
|
|
|
2
|
$self->word( shift ) if @_; |
150
|
1
|
50
|
|
|
|
3
|
croak 'No word provided.' unless defined $self->{word}; |
151
|
1
|
50
|
|
|
|
1
|
croak 'No lexicon defined.' unless keys %{ $self->{lexicon} }; |
|
1
|
|
|
|
|
6
|
|
152
|
|
|
|
|
|
|
# Reset our data structures. |
153
|
1
|
|
|
|
|
4
|
$self->parts([]); |
154
|
1
|
|
|
|
|
3
|
$self->definitions({}); |
155
|
1
|
|
|
|
|
3
|
$self->combinations([]); |
156
|
1
|
|
|
|
|
2
|
$self->knowns({}); |
157
|
|
|
|
|
|
|
# Build new ones based on the word. |
158
|
1
|
|
|
|
|
3
|
$self->build_parts; |
159
|
1
|
|
|
|
|
2
|
$self->build_definitions; |
160
|
1
|
|
|
|
|
3
|
$self->build_combinations; |
161
|
1
|
|
|
|
|
15
|
$self->build_knowns; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub build_parts { |
165
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
166
|
1
|
50
|
|
|
|
3
|
warn "Entering build_parts()\n" if $self->{verbose}; |
167
|
|
|
|
|
|
|
|
168
|
1
|
|
|
|
|
4
|
for my $i (0 .. $self->{word_length} - 1) { |
169
|
9
|
|
|
|
|
16
|
for my $j (1 .. $self->{word_length} - $i) { |
170
|
45
|
|
|
|
|
73
|
my $part = substr $self->{word}, $i, $j; |
171
|
45
|
|
|
|
|
105
|
push @{ $self->{parts} }, $part |
172
|
45
|
|
|
|
|
146
|
unless grep { $part =~ /$_/ } |
173
|
45
|
50
|
|
|
|
59
|
@{ $self->constraints }; |
|
45
|
|
|
|
|
68
|
|
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
1
|
50
|
|
|
|
3
|
if($self->{verbose}) { |
178
|
|
|
|
|
|
|
# XXX This is ugly. |
179
|
0
|
|
|
|
|
0
|
my $last = 0; |
180
|
0
|
|
|
|
|
0
|
for my $part (@{ $self->{parts} }) { |
|
0
|
|
|
|
|
0
|
|
181
|
0
|
0
|
|
|
|
0
|
print '', |
|
|
0
|
|
|
|
|
|
182
|
|
|
|
|
|
|
($last ? $last > length( $part ) ? "\n\t" : ', ' : "\t"), |
183
|
|
|
|
|
|
|
$part; |
184
|
0
|
|
|
|
|
0
|
$last = length $part; |
185
|
|
|
|
|
|
|
} |
186
|
0
|
0
|
|
|
|
0
|
print "\n" if @{ $self->{parts} }; |
|
0
|
|
|
|
|
0
|
|
187
|
|
|
|
|
|
|
} |
188
|
1
|
|
|
|
|
2
|
return $self->{parts}; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# Save a known combination entry => definition table. |
192
|
|
|
|
|
|
|
sub build_definitions { |
193
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
194
|
1
|
50
|
|
|
|
3
|
warn "Entering build_definitions()\n" if $self->{verbose}; |
195
|
1
|
|
|
|
|
1
|
for my $part (@{ $self->{parts} }) { |
|
1
|
|
|
|
|
3
|
|
196
|
|
|
|
|
|
|
$self->{definitions}{$part} = $self->{lexicon}{$part} |
197
|
45
|
100
|
|
|
|
83
|
if $self->{lexicon}{$part}; |
198
|
|
|
|
|
|
|
} |
199
|
0
|
|
|
|
|
0
|
warn "\t", join( "\n\t", sort keys %{ $self->definitions } ), "\n" |
200
|
1
|
50
|
|
|
|
25
|
if $self->{verbose}; |
201
|
1
|
|
|
|
|
5
|
return $self->{definitions}; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub build_combinations { |
205
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
206
|
1
|
50
|
|
|
|
2
|
warn "Entering build_combinations()\n" if $self->{verbose}; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# field size for binary iteration (digits of precision) |
209
|
1
|
|
|
|
|
2
|
my $y = $self->{word_length} - 1; |
210
|
|
|
|
|
|
|
# total number of zero-based combinations |
211
|
1
|
|
|
|
|
9
|
my $z = 2 ** $y - 1; |
212
|
|
|
|
|
|
|
# field size for the count |
213
|
1
|
|
|
|
|
3
|
my $lz = length $z; |
214
|
|
|
|
|
|
|
# field size for a combination |
215
|
1
|
|
|
|
|
2
|
my $m = $self->{word_length} + $y; |
216
|
|
|
|
|
|
|
warn sprintf |
217
|
|
|
|
|
|
|
"\tTotal combinations: %d\n\tConstrained combinations:\n", |
218
|
|
|
|
|
|
|
$z + 1 |
219
|
1
|
50
|
|
|
|
2
|
if $self->{verbose}; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# Truth is a single partition character: the lowly dot. |
222
|
1
|
|
|
|
|
8
|
my $c = Math::BaseCalc->new( digits => [ 0, '.' ] ); |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# Build a word part combination for each iteration. |
225
|
1
|
|
|
|
|
46
|
for my $n ( 0 .. $z ) { |
226
|
|
|
|
|
|
|
# Iterate in base two. |
227
|
256
|
|
|
|
|
587
|
my $i = $c->to_base( $n ); |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# Get the binary digits as an array. |
230
|
256
|
|
|
|
|
7306
|
my @i = split //, sprintf( '%0'.$y.'s', $i ); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Join the character and digit arrays into a partitioned word. |
233
|
256
|
|
|
|
|
408
|
my $t = ''; |
234
|
|
|
|
|
|
|
# ..by stepping over the characters and peeling off a digit. |
235
|
256
|
|
|
|
|
777
|
for( split //, $self->{word} ) { |
236
|
|
|
|
|
|
|
# Zero values become ''. Haha! Truth prevails. |
237
|
2304
|
|
100
|
|
|
5575
|
$t .= $_ . (shift( @i ) || ''); |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
256
|
100
|
|
|
|
435
|
unless( grep { $t =~ /$_/ } @{ $self->{constraints} } ) { |
|
256
|
|
|
|
|
1165
|
|
|
256
|
|
|
|
|
467
|
|
241
|
|
|
|
|
|
|
# Preach it. |
242
|
|
|
|
|
|
|
printf "\t%".$lz.'d) %0'.$y.'s => %'.$m."s\n", $n, $i, $t |
243
|
192
|
50
|
|
|
|
376
|
if $self->{verbose}; |
244
|
192
|
|
|
|
|
235
|
push @{ $self->combinations }, $t; |
|
192
|
|
|
|
|
293
|
|
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
1
|
|
|
|
|
10
|
return $self->{combinations}; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub build_knowns { |
252
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
253
|
1
|
50
|
|
|
|
2
|
return unless scalar keys %{ $self->{lexicon} }; |
|
1
|
|
|
|
|
5
|
|
254
|
1
|
50
|
|
|
|
4
|
warn "Entering build_knowns()\n" if $self->{verbose}; |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# Save the familiarity value for each "raw" combination. |
257
|
1
|
|
|
|
|
2
|
for my $combo (@{ $self->{combinations} }) { |
|
1
|
|
|
|
|
3
|
|
258
|
|
|
|
|
|
|
# Skip combinations that have already been seen. |
259
|
192
|
50
|
|
|
|
371
|
next if exists $self->{knowns}{$combo}; |
260
|
|
|
|
|
|
|
|
261
|
192
|
|
|
|
|
296
|
my ($sum, $frag_sum, $char_sum) = (0, 0, 0); |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Get the bits of the combination. |
264
|
192
|
|
|
|
|
425
|
my @chunks = split /\./, $combo; |
265
|
192
|
|
|
|
|
301
|
for (@chunks) { |
266
|
|
|
|
|
|
|
# XXX Uh.. Magically handle hyphens in lexicon entries. |
267
|
960
|
|
|
|
|
1449
|
($_, my $combo_seen) = _hyphenate($_, $self->lexicon, 0); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# Sum the combination familiarity values. |
270
|
960
|
100
|
|
|
|
1871
|
if ($combo_seen) { |
271
|
108
|
|
|
|
|
128
|
$frag_sum++; |
272
|
108
|
|
|
|
|
171
|
$char_sum += length; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
# XXX Huh? Why? Can $_ change or something? |
276
|
|
|
|
|
|
|
# Stick our combination back together. |
277
|
192
|
|
|
|
|
354
|
$combo = join '.', @chunks; |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# Save this combination and its familiarity ratios. |
280
|
192
|
|
|
|
|
309
|
my $x = $frag_sum / @chunks; |
281
|
192
|
|
|
|
|
281
|
my $y = $char_sum / $self->{word_length}; |
282
|
192
|
50
|
|
|
|
328
|
warn "\t$combo: [$x, $y]\n" if $self->{verbose}; |
283
|
192
|
100
|
66
|
|
|
471
|
if( $x || $y ) { |
284
|
85
|
|
|
|
|
297
|
$self->{knowns}{$combo} = [ $x, $y ]; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
else { |
287
|
107
|
|
|
|
|
230
|
delete $self->{knowns}{$combo}; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
1
|
|
|
|
|
3
|
return $self->{knowns}; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# Reduce the number of known combinations by concatinating adjacent |
295
|
|
|
|
|
|
|
# unknowns (and then removing any duplicates produced). |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
#sub learn { |
298
|
|
|
|
|
|
|
# my ($self, %args) = @_; |
299
|
|
|
|
|
|
|
# Get the list of (partially) unknown stem combinations. |
300
|
|
|
|
|
|
|
# Loop through each looking in %args or prompting for a definition. |
301
|
|
|
|
|
|
|
#} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# Update the given string with its actual lexicon value and increment |
304
|
|
|
|
|
|
|
# the seen flag. |
305
|
|
|
|
|
|
|
sub _hyphenate { |
306
|
960
|
|
|
960
|
|
1514
|
my ($string, $lexicon, $combo_seen) = @_; |
307
|
|
|
|
|
|
|
|
308
|
960
|
100
|
|
|
|
2442
|
if (exists $lexicon->{$string}) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
309
|
108
|
50
|
|
|
|
195
|
$combo_seen++ if defined $combo_seen; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
elsif (exists $lexicon->{"-$string"}) { |
312
|
0
|
0
|
|
|
|
0
|
$combo_seen++ if defined $combo_seen; |
313
|
0
|
|
|
|
|
0
|
$string = "-$string"; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
elsif (exists $lexicon->{"$string-"}) { |
316
|
0
|
0
|
|
|
|
0
|
$combo_seen++ if defined $combo_seen; |
317
|
0
|
|
|
|
|
0
|
$string = "$string-"; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
960
|
50
|
|
|
|
2097
|
return wantarray ? ($string, $combo_seen) : $string; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub output_knowns { |
324
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
325
|
0
|
|
|
|
|
0
|
my @out = (); |
326
|
0
|
|
|
|
|
0
|
my $header = < |
327
|
|
|
|
|
|
|
Combination [frag familiarity, char familiarity] |
328
|
|
|
|
|
|
|
Fragment definitions |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
HEADER |
331
|
|
|
|
|
|
|
|
332
|
0
|
|
|
|
|
0
|
for my $known ( |
333
|
|
|
|
|
|
|
reverse sort { |
334
|
|
|
|
|
|
|
$self->{knowns}{$a}[0] <=> $self->{knowns}{$b}[0] || |
335
|
0
|
0
|
|
|
|
0
|
$self->{knowns}{$a}[1] <=> $self->{knowns}{$b}[1] |
336
|
0
|
|
|
|
|
0
|
} keys %{ $self->{knowns} } |
337
|
|
|
|
|
|
|
) { |
338
|
0
|
|
|
|
|
0
|
my @definition; |
339
|
0
|
|
|
|
|
0
|
for my $chunk (split /\./, $known) { |
340
|
|
|
|
|
|
|
push @definition, |
341
|
|
|
|
|
|
|
defined $self->{definitions}{$chunk} |
342
|
|
|
|
|
|
|
? $self->{definitions}{$chunk} |
343
|
|
|
|
|
|
|
? $self->{definitions}{$chunk} |
344
|
|
|
|
|
|
|
: $self->{not_defined} |
345
|
0
|
0
|
|
|
|
0
|
: $self->{unknown}; |
|
|
0
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
push @out, sprintf qq/%s [%s]\n%s/, |
349
|
|
|
|
|
|
|
$known, |
350
|
0
|
|
|
|
|
0
|
join (', ', map { sprintf '%0.2f', $_ } |
351
|
0
|
|
|
|
|
0
|
@{ $self->{knowns}{$known} }), |
352
|
0
|
|
|
|
|
0
|
join ($self->{separator}, @definition); |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
0
|
0
|
|
|
|
0
|
return wantarray ? @out : $header . join "\n\n", @out; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# Naive, no locking read/write. If you run a production environment, |
359
|
|
|
|
|
|
|
# you know what to do. |
360
|
|
|
|
|
|
|
sub lexicon_cache { |
361
|
5
|
|
|
5
|
1
|
444
|
my( $self, $file, $value ) = @_; |
362
|
5
|
50
|
|
|
|
13
|
warn "Entering lexicon_cache()\n" if $self->{verbose}; |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
# Set the file and the lexicon_file attribute if we are told to. |
365
|
5
|
100
|
100
|
|
|
25
|
if( $file && $file eq 'lexicon_file' && $value ) { |
|
|
|
66
|
|
|
|
|
366
|
1
|
|
|
|
|
3
|
$self->{lexicon_file} = $value; |
367
|
1
|
|
|
|
|
3
|
$file = $value; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# If there is no file try to use the lexicon_file. |
371
|
5
|
|
66
|
|
|
16
|
$file ||= $self->{lexicon_file}; |
372
|
|
|
|
|
|
|
# Otherwise, bail out! |
373
|
|
|
|
|
|
|
warn( "No lexicon cache file set\n" ) and return |
374
|
5
|
50
|
0
|
|
|
12
|
if $self->{verbose} && !$file; |
|
|
|
33
|
|
|
|
|
375
|
|
|
|
|
|
|
|
376
|
5
|
100
|
|
|
|
16
|
if( $file ) { |
377
|
|
|
|
|
|
|
# Store 'em if you got 'em. |
378
|
3
|
100
|
|
|
|
3
|
if( keys %{ $self->{lexicon} } ) { |
|
3
|
|
|
|
|
11
|
|
379
|
2
|
50
|
|
|
|
15
|
warn "store( $self->{lexicon}, $file )\n" if $self->{verbose}; |
380
|
2
|
|
|
|
|
9
|
store( $self->{lexicon}, $file ); |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
# ..Retrieve 'em if not. |
383
|
|
|
|
|
|
|
else { |
384
|
1
|
50
|
33
|
|
|
5
|
warn "retrieve( $file )\n" if $self->{verbose} && -e $file; |
385
|
1
|
50
|
|
|
|
22
|
$self->lexicon( retrieve( $file ) ) if -e $file; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
1; |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
__END__ |