line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# $Id: TokenParse.pm,v 1.20 2004/08/08 01:20:36 gene Exp $ |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Lingua::TokenParse; |
4
|
|
|
|
|
|
|
$VERSION = '0.1601'; |
5
|
1
|
|
|
1
|
|
45583
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
93
|
|
6
|
1
|
|
|
1
|
|
7
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
242
|
|
7
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
1
|
|
|
|
|
26
|
|
|
1
|
|
|
|
|
267
|
|
8
|
1
|
|
|
1
|
|
7675
|
use Storable; |
|
1
|
|
|
|
|
4399
|
|
|
1
|
|
|
|
|
71
|
|
9
|
1
|
|
|
1
|
|
15722
|
use Math::BaseCalc; |
|
1
|
|
|
|
|
2179
|
|
|
1
|
|
|
|
|
2700
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub new { |
12
|
2
|
|
|
2
|
1
|
14
|
my $proto = shift; |
13
|
2
|
|
33
|
|
|
12
|
my $class = ref $proto || $proto; |
14
|
2
|
|
|
|
|
31
|
my $self = { |
15
|
|
|
|
|
|
|
verbose => 0, |
16
|
|
|
|
|
|
|
# The word to parse! |
17
|
|
|
|
|
|
|
word => undef, |
18
|
|
|
|
|
|
|
# We need to use this. |
19
|
|
|
|
|
|
|
word_length => 0, |
20
|
|
|
|
|
|
|
# Known tokens. |
21
|
|
|
|
|
|
|
lexicon => {}, |
22
|
|
|
|
|
|
|
# Local lexicon cache file name. |
23
|
|
|
|
|
|
|
lexicon_file => '', # ?: 'lexicon-' . time(), |
24
|
|
|
|
|
|
|
# All word parts. |
25
|
|
|
|
|
|
|
parts => [], |
26
|
|
|
|
|
|
|
# All possible parts combinations. |
27
|
|
|
|
|
|
|
combinations => [], |
28
|
|
|
|
|
|
|
# Scored list of the known parts combinations. |
29
|
|
|
|
|
|
|
knowns => {}, |
30
|
|
|
|
|
|
|
# Definitions of the known and unknown fragments in knowns. |
31
|
|
|
|
|
|
|
definitions => {}, |
32
|
|
|
|
|
|
|
# Fragment definition separator. |
33
|
|
|
|
|
|
|
separator => ' + ', |
34
|
|
|
|
|
|
|
# Known-but-not-defined definition output string. |
35
|
|
|
|
|
|
|
not_defined => '.', |
36
|
|
|
|
|
|
|
# Unknown definition output string. |
37
|
|
|
|
|
|
|
unknown => '?', |
38
|
|
|
|
|
|
|
# Known trimming regexp rules. |
39
|
|
|
|
|
|
|
constraints => [], |
40
|
|
|
|
|
|
|
@_, # slurp anything else and override defaults. |
41
|
|
|
|
|
|
|
}; |
42
|
2
|
|
|
|
|
8
|
bless $self, $class; |
43
|
2
|
|
|
|
|
8
|
$self->_init(); |
44
|
2
|
|
|
|
|
4
|
return $self; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub _init { |
48
|
2
|
|
|
2
|
|
3
|
my $self = shift; |
49
|
2
|
50
|
|
|
|
11
|
warn "Entering _init()\n" if $self->{verbose}; |
50
|
2
|
100
|
|
|
|
10
|
$self->word( $self->{word} ) if $self->{word}; |
51
|
|
|
|
|
|
|
# Retrieve our lexicon cache if a filename was set. |
52
|
2
|
|
|
|
|
8
|
$self->lexicon_cache; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub DESTROY { |
56
|
2
|
|
|
2
|
|
437
|
my $self = shift; |
57
|
|
|
|
|
|
|
# Cache our lexicon if a filename has been given. |
58
|
2
|
100
|
|
|
|
15
|
$self->lexicon_cache( $self->{lexicon_file} ) |
59
|
|
|
|
|
|
|
if $self->{lexicon_file}; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub verbose { |
63
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
64
|
0
|
0
|
|
|
|
0
|
$self->{verbose} = shift if @_; |
65
|
0
|
|
|
|
|
0
|
return $self->{verbose}; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub word { |
69
|
|
|
|
|
|
|
# WORD: This method is the only place where word_length is set. |
70
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
71
|
1
|
50
|
|
|
|
4
|
warn "Entering word()\n" if $self->{verbose}; |
72
|
1
|
50
|
|
|
|
3
|
if( @_ ) { |
73
|
1
|
|
|
|
|
2
|
$self->{word} = shift; |
74
|
1
|
|
|
|
|
3
|
$self->{word_length} = length $self->{word}; |
75
|
1
|
50
|
|
|
|
3
|
printf "\tword = %s\n\tlength = %d\n", |
76
|
|
|
|
|
|
|
$self->{word}, $self->{word_length} |
77
|
|
|
|
|
|
|
if $self->{verbose}; |
78
|
|
|
|
|
|
|
} |
79
|
1
|
|
|
|
|
1
|
return $self->{word}; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub lexicon { |
83
|
964
|
|
|
964
|
1
|
2088
|
my $self = shift; |
84
|
964
|
100
|
|
|
|
1598
|
if( @_ ) { |
85
|
2
|
0
|
33
|
|
|
19
|
$self->{lexicon} = @_ == 1 && ref $_[0] eq 'HASH' |
|
|
50
|
|
|
|
|
|
86
|
|
|
|
|
|
|
? shift |
87
|
|
|
|
|
|
|
: @_ % 2 == 0 |
88
|
|
|
|
|
|
|
? { @_ } |
89
|
|
|
|
|
|
|
: {}; |
90
|
|
|
|
|
|
|
} |
91
|
964
|
|
|
|
|
2046
|
return $self->{lexicon}; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub parts { |
95
|
2
|
|
|
2
|
1
|
706
|
my $self = shift; |
96
|
2
|
100
|
|
|
|
8
|
$self->{parts} = shift if @_; |
97
|
2
|
|
|
|
|
7
|
return $self->{parts}; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub combinations { |
101
|
195
|
|
|
195
|
1
|
254
|
my $self = shift; |
102
|
195
|
100
|
|
|
|
319
|
$self->{combinations} = shift if @_; |
103
|
195
|
|
|
|
|
564
|
return $self->{combinations}; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub knowns { |
107
|
3
|
|
|
3
|
1
|
1005
|
my $self = shift; |
108
|
3
|
100
|
|
|
|
10
|
$self->{knowns} = shift if @_; |
109
|
3
|
|
|
|
|
24
|
return $self->{knowns}; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub definitions { |
113
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
114
|
2
|
100
|
|
|
|
7
|
$self->{definitions} = shift if @_; |
115
|
2
|
|
|
|
|
7
|
return $self->{definitions}; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub separator { |
119
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
120
|
0
|
0
|
|
|
|
0
|
$self->{separator} = shift if @_; |
121
|
0
|
|
|
|
|
0
|
return $self->{separator}; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub not_defined { |
125
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
126
|
0
|
0
|
|
|
|
0
|
$self->{not_defined} = shift if @_; |
127
|
0
|
|
|
|
|
0
|
return $self->{not_defined}; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub unknown { |
131
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
132
|
0
|
0
|
|
|
|
0
|
$self->{unknown} = shift if @_; |
133
|
0
|
|
|
|
|
0
|
return $self->{unknown}; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub constraints { |
137
|
46
|
|
|
46
|
1
|
46
|
my $self = shift; |
138
|
46
|
100
|
|
|
|
85
|
$self->{constraints} = shift if @_; |
139
|
46
|
|
|
|
|
79
|
return $self->{constraints}; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub parse { |
143
|
1
|
|
|
1
|
1
|
45
|
my $self = shift; |
144
|
1
|
50
|
|
|
|
24
|
warn "Entering parse()\n" if $self->{verbose}; |
145
|
1
|
50
|
|
|
|
4
|
$self->word( shift ) if @_; |
146
|
1
|
50
|
|
|
|
3
|
croak 'No word provided.' unless defined $self->{word}; |
147
|
1
|
50
|
|
|
|
1
|
croak 'No lexicon defined.' unless keys %{ $self->{lexicon} }; |
|
1
|
|
|
|
|
5
|
|
148
|
|
|
|
|
|
|
# Reset our data structures. |
149
|
1
|
|
|
|
|
3
|
$self->parts([]); |
150
|
1
|
|
|
|
|
5
|
$self->definitions({}); |
151
|
1
|
|
|
|
|
2
|
$self->combinations([]); |
152
|
1
|
|
|
|
|
2
|
$self->knowns({}); |
153
|
|
|
|
|
|
|
# Build new ones based on the word. |
154
|
1
|
|
|
|
|
4
|
$self->build_parts; |
155
|
1
|
|
|
|
|
10
|
$self->build_definitions; |
156
|
1
|
|
|
|
|
5
|
$self->build_combinations; |
157
|
1
|
|
|
|
|
5
|
$self->build_knowns; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub build_parts { |
161
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
162
|
1
|
50
|
|
|
|
6
|
warn "Entering build_parts()\n" if $self->{verbose}; |
163
|
|
|
|
|
|
|
|
164
|
1
|
|
|
|
|
3
|
for my $i (0 .. $self->{word_length} - 1) { |
165
|
9
|
|
|
|
|
16
|
for my $j (1 .. $self->{word_length} - $i) { |
166
|
45
|
|
|
|
|
64
|
my $part = substr $self->{word}, $i, $j; |
167
|
45
|
|
|
|
|
121
|
push @{ $self->{parts} }, $part |
|
45
|
|
|
|
|
178
|
|
168
|
45
|
|
|
|
|
72
|
unless grep { $part =~ /$_/ } |
169
|
45
|
50
|
|
|
|
54
|
@{ $self->constraints }; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
1
|
50
|
|
|
|
4
|
if($self->{verbose}) { |
174
|
|
|
|
|
|
|
# XXX This is ugly. |
175
|
0
|
|
|
|
|
0
|
my $last = 0; |
176
|
0
|
|
|
|
|
0
|
for my $part (@{ $self->{parts} }) { |
|
0
|
|
|
|
|
0
|
|
177
|
0
|
0
|
|
|
|
0
|
print '', |
|
|
0
|
|
|
|
|
|
178
|
|
|
|
|
|
|
($last ? $last > length( $part ) ? "\n\t" : ', ' : "\t"), |
179
|
|
|
|
|
|
|
$part; |
180
|
0
|
|
|
|
|
0
|
$last = length $part; |
181
|
|
|
|
|
|
|
} |
182
|
0
|
0
|
|
|
|
0
|
print "\n" if @{ $self->{parts} }; |
|
0
|
|
|
|
|
0
|
|
183
|
|
|
|
|
|
|
} |
184
|
1
|
|
|
|
|
3
|
return $self->{parts}; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Save a known combination entry => definition table. |
188
|
|
|
|
|
|
|
sub build_definitions { |
189
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
190
|
1
|
50
|
|
|
|
5
|
warn "Entering build_definitions()\n" if $self->{verbose}; |
191
|
1
|
|
|
|
|
1
|
for my $part (@{ $self->{parts} }) { |
|
1
|
|
|
|
|
3
|
|
192
|
45
|
100
|
|
|
|
110
|
$self->{definitions}{$part} = $self->{lexicon}{$part} |
193
|
|
|
|
|
|
|
if $self->{lexicon}{$part}; |
194
|
|
|
|
|
|
|
} |
195
|
1
|
50
|
|
|
|
8
|
warn "\t", join( "\n\t", sort keys %{ $self->definitions } ), "\n" |
|
0
|
|
|
|
|
0
|
|
196
|
|
|
|
|
|
|
if $self->{verbose}; |
197
|
1
|
|
|
|
|
3
|
return $self->{definitions}; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub build_combinations { |
201
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
202
|
1
|
50
|
|
|
|
4
|
warn "Entering build_combinations()\n" if $self->{verbose}; |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# field size for binary iteration (digits of precision) |
205
|
1
|
|
|
|
|
4
|
my $y = $self->{word_length} - 1; |
206
|
|
|
|
|
|
|
# total number of zero-based combinations |
207
|
1
|
|
|
|
|
3
|
my $z = 2 ** $y - 1; |
208
|
|
|
|
|
|
|
# field size for the count |
209
|
1
|
|
|
|
|
3
|
my $lz = length $z; |
210
|
|
|
|
|
|
|
# field size for a combination |
211
|
1
|
|
|
|
|
2
|
my $m = $self->{word_length} + $y; |
212
|
1
|
50
|
|
|
|
4
|
warn sprintf |
213
|
|
|
|
|
|
|
"\tTotal combinations: %d\n\tConstrained combinations:\n", |
214
|
|
|
|
|
|
|
$z + 1 |
215
|
|
|
|
|
|
|
if $self->{verbose}; |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Truth is a single partition character: the lowly dot. |
218
|
1
|
|
|
|
|
20
|
my $c = Math::BaseCalc->new( digits => [ 0, '.' ] ); |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# Build a word part combination for each iteration. |
221
|
1
|
|
|
|
|
49
|
for my $n ( 0 .. $z ) { |
222
|
|
|
|
|
|
|
# Iterate in base two. |
223
|
256
|
|
|
|
|
619
|
my $i = $c->to_base( $n ); |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# Get the binary digits as an array. |
226
|
256
|
|
|
|
|
5279
|
my @i = split //, sprintf( '%0'.$y.'s', $i ); |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# Join the character and digit arrays into a partitioned word. |
229
|
256
|
|
|
|
|
355
|
my $t = ''; |
230
|
|
|
|
|
|
|
# ..by stepping over the characters and peeling off a digit. |
231
|
256
|
|
|
|
|
788
|
for( split //, $self->{word} ) { |
232
|
|
|
|
|
|
|
# Zero values become ''. Haha! Truth prevails. |
233
|
2304
|
|
100
|
|
|
5558
|
$t .= $_ . (shift( @i ) || ''); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
256
|
100
|
|
|
|
1470
|
unless( grep { $t =~ /$_/ } @{ $self->{constraints} } ) { |
|
256
|
|
|
|
|
1161
|
|
|
256
|
|
|
|
|
430
|
|
237
|
|
|
|
|
|
|
# Preach it. |
238
|
192
|
50
|
|
|
|
463
|
printf "\t%".$lz.'d) %0'.$y.'s => %'.$m."s\n", $n, $i, $t |
239
|
|
|
|
|
|
|
if $self->{verbose}; |
240
|
192
|
|
|
|
|
166
|
push @{ $self->combinations }, $t; |
|
192
|
|
|
|
|
332
|
|
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
1
|
|
|
|
|
18
|
return $self->{combinations}; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub build_knowns { |
248
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
249
|
1
|
50
|
|
|
|
2
|
return unless scalar keys %{ $self->{lexicon} }; |
|
1
|
|
|
|
|
6
|
|
250
|
1
|
50
|
|
|
|
3
|
warn "Entering build_knowns()\n" if $self->{verbose}; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# Save the familiarity value for each "raw" combination. |
253
|
1
|
|
|
|
|
2
|
for my $combo (@{ $self->{combinations} }) { |
|
1
|
|
|
|
|
4
|
|
254
|
|
|
|
|
|
|
# Skip combinations that have already been seen. |
255
|
192
|
50
|
|
|
|
435
|
next if exists $self->{knowns}{$combo}; |
256
|
|
|
|
|
|
|
|
257
|
192
|
|
|
|
|
234
|
my ($sum, $frag_sum, $char_sum) = (0, 0, 0); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# Get the bits of the combination. |
260
|
192
|
|
|
|
|
585
|
my @chunks = split /\./, $combo; |
261
|
192
|
|
|
|
|
284
|
for (@chunks) { |
262
|
|
|
|
|
|
|
# XXX Uh.. Magically handle hyphens in lexicon entries. |
263
|
960
|
|
|
|
|
1704
|
($_, my $combo_seen) = _hyphenate($_, $self->lexicon, 0); |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# Sum the combination familiarity values. |
266
|
960
|
100
|
|
|
|
2299
|
if ($combo_seen) { |
267
|
108
|
|
|
|
|
5299
|
$frag_sum++; |
268
|
108
|
|
|
|
|
171
|
$char_sum += length; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
# XXX Huh? Why? Can $_ change or something? |
272
|
|
|
|
|
|
|
# Stick our combination back together. |
273
|
192
|
|
|
|
|
363
|
$combo = join '.', @chunks; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# Save this combination and its familiarity ratios. |
276
|
192
|
|
|
|
|
266
|
my $x = $frag_sum / @chunks; |
277
|
192
|
|
|
|
|
250
|
my $y = $char_sum / $self->{word_length}; |
278
|
192
|
50
|
|
|
|
355
|
warn "\t$combo: [$x, $y]\n" if $self->{verbose}; |
279
|
192
|
100
|
66
|
|
|
531
|
if( $x || $y ) { |
280
|
85
|
|
|
|
|
453
|
$self->{knowns}{$combo} = [ $x, $y ]; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
else { |
283
|
107
|
|
|
|
|
269
|
delete $self->{knowns}{$combo}; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
1
|
|
|
|
|
8
|
return $self->{knowns}; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# Reduce the number of known combinations by concatinating adjacent |
291
|
|
|
|
|
|
|
# unknowns (and then removing any duplicates produced). |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub learn { |
294
|
0
|
|
|
0
|
0
|
0
|
my ($self, %args) = @_; |
295
|
|
|
|
|
|
|
# Get the list of (partially) unknown stem combinations. |
296
|
|
|
|
|
|
|
# Loop through each looking in %args or prompting for a definition. |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# Update the given string with its actual lexicon value and increment |
300
|
|
|
|
|
|
|
# the seen flag. |
301
|
|
|
|
|
|
|
sub _hyphenate { |
302
|
960
|
|
|
960
|
|
1263
|
my ($string, $lexicon, $combo_seen) = @_; |
303
|
|
|
|
|
|
|
|
304
|
960
|
100
|
|
|
|
2960
|
if (exists $lexicon->{$string}) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
305
|
108
|
50
|
|
|
|
190
|
$combo_seen++ if defined $combo_seen; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
elsif (exists $lexicon->{"-$string"}) { |
308
|
0
|
0
|
|
|
|
0
|
$combo_seen++ if defined $combo_seen; |
309
|
0
|
|
|
|
|
0
|
$string = "-$string"; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
elsif (exists $lexicon->{"$string-"}) { |
312
|
0
|
0
|
|
|
|
0
|
$combo_seen++ if defined $combo_seen; |
313
|
0
|
|
|
|
|
0
|
$string = "$string-"; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
960
|
50
|
|
|
|
2651
|
return wantarray ? ($string, $combo_seen) : $string; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub output_knowns { |
320
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
321
|
0
|
|
|
|
|
0
|
my @out = (); |
322
|
0
|
|
|
|
|
0
|
my $header = < |
323
|
|
|
|
|
|
|
Combination [frag familiarity, char familiarity] |
324
|
|
|
|
|
|
|
Fragment definitions |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
HEADER |
327
|
|
|
|
|
|
|
|
328
|
0
|
0
|
|
|
|
0
|
for my $known ( |
|
0
|
|
|
|
|
0
|
|
329
|
|
|
|
|
|
|
reverse sort { |
330
|
0
|
|
|
|
|
0
|
$self->{knowns}{$a}[0] <=> $self->{knowns}{$b}[0] || |
331
|
|
|
|
|
|
|
$self->{knowns}{$a}[1] <=> $self->{knowns}{$b}[1] |
332
|
|
|
|
|
|
|
} keys %{ $self->{knowns} } |
333
|
|
|
|
|
|
|
) { |
334
|
0
|
|
|
|
|
0
|
my @definition; |
335
|
0
|
|
|
|
|
0
|
for my $chunk (split /\./, $known) { |
336
|
0
|
0
|
|
|
|
0
|
push @definition, |
|
|
0
|
|
|
|
|
|
337
|
|
|
|
|
|
|
defined $self->{definitions}{$chunk} |
338
|
|
|
|
|
|
|
? $self->{definitions}{$chunk} |
339
|
|
|
|
|
|
|
? $self->{definitions}{$chunk} |
340
|
|
|
|
|
|
|
: $self->{not_defined} |
341
|
|
|
|
|
|
|
: $self->{unknown}; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
0
|
push @out, sprintf qq/%s [%s]\n%s/, |
345
|
|
|
|
|
|
|
$known, |
346
|
0
|
|
|
|
|
0
|
join (', ', map { sprintf '%0.2f', $_ } |
347
|
0
|
|
|
|
|
0
|
@{ $self->{knowns}{$known} }), |
348
|
|
|
|
|
|
|
join ($self->{separator}, @definition); |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
0
|
0
|
|
|
|
0
|
return wantarray ? @out : $header . join "\n\n", @out; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# Naive, no locking read/write. If you run a production environment, |
355
|
|
|
|
|
|
|
# you know what to do. |
356
|
|
|
|
|
|
|
sub lexicon_cache { |
357
|
5
|
|
|
5
|
1
|
475
|
my( $self, $file, $value ) = @_; |
358
|
5
|
50
|
|
|
|
15
|
warn "Entering lexicon_cache()\n" if $self->{verbose}; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# Set the file and the lexicon_file attribute if we are told to. |
361
|
5
|
100
|
100
|
|
|
29
|
if( $file && $file eq 'lexicon_file' && $value ) { |
|
|
|
66
|
|
|
|
|
362
|
1
|
|
|
|
|
3
|
$self->{lexicon_file} = $value; |
363
|
1
|
|
|
|
|
2
|
$file = $value; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# If there is no file try to use the lexicon_file. |
367
|
5
|
|
66
|
|
|
21
|
$file ||= $self->{lexicon_file}; |
368
|
|
|
|
|
|
|
# Otherwise, bail out! |
369
|
5
|
50
|
0
|
|
|
18
|
warn( "No lexicon cache file set\n" ) and return |
|
|
|
33
|
|
|
|
|
370
|
|
|
|
|
|
|
if $self->{verbose} && !$file; |
371
|
|
|
|
|
|
|
|
372
|
5
|
100
|
|
|
|
15
|
if( $file ) { |
373
|
|
|
|
|
|
|
# Store 'em if you got 'em. |
374
|
3
|
100
|
|
|
|
5
|
if( keys %{ $self->{lexicon} } ) { |
|
3
|
|
|
|
|
10
|
|
375
|
2
|
50
|
|
|
|
7
|
warn "store( $self->{lexicon}, $file )\n" if $self->{verbose}; |
376
|
2
|
|
|
|
|
9
|
store( $self->{lexicon}, $file ); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
# ..Retrieve 'em if not. |
379
|
|
|
|
|
|
|
else { |
380
|
1
|
50
|
33
|
|
|
5
|
warn "retrieve( $file )\n" if $self->{verbose} && -e $file; |
381
|
1
|
50
|
|
|
|
24
|
$self->lexicon( retrieve( $file ) ) if -e $file; |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
1; |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
__END__ |