line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Locale::Maketext::Fuzzy; |
2
|
|
|
|
|
|
|
$Locale::Maketext::Fuzzy::VERSION = '0.11'; |
3
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
30338
|
use 5.005; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
54
|
|
5
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
6
|
1
|
|
|
1
|
|
1153
|
use Locale::Maketext; |
|
1
|
|
|
|
|
13260
|
|
|
1
|
|
|
|
|
38
|
|
7
|
1
|
|
|
1
|
|
12
|
use base 'Locale::Maketext'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
162
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub override_maketext { |
10
|
6
|
|
|
6
|
1
|
1074
|
my ( $class, $flag ) = @_; |
11
|
6
|
50
|
|
|
|
17
|
$class = ref($class) if ref($class); |
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
6
|
no strict 'refs'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
994
|
|
14
|
|
|
|
|
|
|
|
15
|
6
|
100
|
|
|
|
20
|
if ($flag) { |
|
|
100
|
|
|
|
|
|
16
|
2
|
|
|
|
|
3
|
*{"$class\::maketext"} = \&maketext_fuzzy; |
|
2
|
|
|
|
|
12
|
|
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
elsif ( @_ >= 2 ) { |
19
|
2
|
|
|
|
|
3
|
delete ${"$class\::"}{maketext}; |
|
2
|
|
|
|
|
8
|
|
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
6
|
100
|
|
|
|
7
|
return ( defined &{"$class\::maketext"} ? 1 : 0 ); |
|
6
|
|
|
|
|
40
|
|
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Global cache of entries and their regexified forms |
26
|
|
|
|
|
|
|
my %regex_cache; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub maketext_fuzzy { |
29
|
7
|
|
|
7
|
1
|
4568
|
my ( $handle, $phrase ) = splice( @_, 0, 2 ); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# An array of all lexicon hashrefs |
32
|
7
|
|
|
|
|
7
|
my @lexicons = @{ $handle->_lex_refs }; |
|
7
|
|
|
|
|
31
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# Try exact match if possible at all. |
35
|
7
|
|
|
|
|
118
|
foreach my $lex (@lexicons) { |
36
|
7
|
100
|
|
|
|
33
|
return $handle->SUPER::maketext( $phrase, @_ ) |
37
|
|
|
|
|
|
|
if exists $lex->{$phrase}; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Keys are matched entries; values are arrayrefs of extracted params |
41
|
5
|
|
|
|
|
8
|
my %candidate; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Fuzzy match phase 1 -- extract all candidates |
44
|
5
|
|
|
|
|
7
|
foreach my $lex (@lexicons) { |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# We're not interested in non-bracketed entries, so ignore them |
47
|
5
|
|
|
|
|
6
|
foreach my $entry ( grep /(?:(?
|
|
5
|
|
|
|
|
31
|
|
48
|
|
|
|
|
|
|
# Skip entries which are _only_ brackets and whitespace. |
49
|
|
|
|
|
|
|
# The most value they could add is rearrangement, and that |
50
|
|
|
|
|
|
|
# is almost certainly incorrect. |
51
|
10
|
50
|
|
|
|
49
|
next if $entry =~ /^\s*(\[[^]]+\]\s*)+$/; |
52
|
|
|
|
|
|
|
|
53
|
10
|
|
100
|
|
|
28
|
my $re = ( $regex_cache{$entry} ||= [ _regexify($entry) ] ); |
54
|
10
|
100
|
|
|
|
76
|
my @vars = ( $phrase =~ $re->[0] ) or next; |
55
|
7
|
|
|
|
|
15
|
$candidate{$entry} ||= |
56
|
7
|
50
|
33
|
|
|
20
|
( @{ $re->[1] } ? [ @vars[ @{ $re->[1] } ] ] : \@vars ); |
|
7
|
|
|
|
|
35
|
|
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# Fail early if we cannot find anything that matches |
61
|
5
|
100
|
|
|
|
17
|
return $phrase unless %candidate; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# Fuzzy match phase 2 -- select the best candidate |
64
|
3
|
50
|
|
|
|
10
|
$phrase = ( |
65
|
|
|
|
|
|
|
sort { |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# For now, we just use a very crude heuristic: "Longer is better" |
68
|
4
|
|
|
|
|
12
|
length($b) <=> length($a) |
69
|
|
|
|
|
|
|
or $b cmp $a |
70
|
|
|
|
|
|
|
} keys %candidate |
71
|
|
|
|
|
|
|
)[0]; |
72
|
|
|
|
|
|
|
|
73
|
4
|
|
|
|
|
5
|
return $handle->SUPER::maketext( $phrase, @{ $candidate{$phrase} }, @_ ); |
|
4
|
|
|
|
|
17
|
|
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub _regexify { |
77
|
2
|
|
|
2
|
|
6
|
my $text = quotemeta(shift); |
78
|
2
|
|
|
|
|
2
|
my @ords; |
79
|
|
|
|
|
|
|
|
80
|
2
|
|
|
|
|
27
|
$text =~ s{ |
81
|
|
|
|
|
|
|
( # capture into $1... |
82
|
|
|
|
|
|
|
(?
|
83
|
|
|
|
|
|
|
) # (to be restored back) |
84
|
|
|
|
|
|
|
\\\[ # opening bracket |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
( # capture into $2... |
87
|
|
|
|
|
|
|
(?: # any numbers of |
88
|
|
|
|
|
|
|
[^~\]] # ordinary non-] characters |
89
|
|
|
|
|
|
|
| # or |
90
|
|
|
|
|
|
|
~\\?. # escaped characters |
91
|
|
|
|
|
|
|
)* |
92
|
|
|
|
|
|
|
) |
93
|
|
|
|
|
|
|
\\\] # closing bracket |
94
|
|
|
|
|
|
|
}{ |
95
|
3
|
|
|
|
|
8
|
$1._paramify($2, \@ords) |
96
|
|
|
|
|
|
|
}egx; |
97
|
|
|
|
|
|
|
|
98
|
2
|
|
|
|
|
5
|
$text =~ s/\Q.*?\E$/.*/; |
99
|
2
|
|
|
|
|
81
|
return qr/^$text$/, \@ords; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub _paramify { |
103
|
3
|
|
|
3
|
|
5
|
my ( $text, $ordref ) = @_; |
104
|
3
|
|
|
|
|
5
|
my $out = '(.*?)'; |
105
|
3
|
|
|
|
|
7
|
my @choices = split( /\\,/, $text ); |
106
|
|
|
|
|
|
|
|
107
|
3
|
50
|
|
|
|
13
|
if ( $choices[0] =~ /^(?:\w+|\\#|\\\*)$/ ) { |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Do away with the function name |
110
|
3
|
100
|
|
|
|
9
|
shift @choices unless $choices[0] =~ /^_(?:\d+|\\\*)$/; |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# Build an alternate regex to weed out vars |
113
|
2
|
|
|
|
|
22
|
$out .= '(?:' . join( |
114
|
|
|
|
|
|
|
'|', |
115
|
|
|
|
|
|
|
sort { |
116
|
|
|
|
|
|
|
length($b) <=> length($a) # longest first |
117
|
|
|
|
|
|
|
} map { |
118
|
3
|
|
|
|
|
5
|
/^_(?:(\d+)|\\\*)$/ |
119
|
5
|
100
|
|
|
|
17
|
? do { |
120
|
3
|
50
|
|
|
|
8
|
push @{$ordref}, ( $1 - 1 ) if defined $1; |
|
3
|
|
|
|
|
7
|
|
121
|
3
|
|
|
|
|
7
|
''; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
: $_ # turn _1, _2, _*... into '' |
124
|
|
|
|
|
|
|
} @choices |
125
|
|
|
|
|
|
|
) . ')'; |
126
|
|
|
|
|
|
|
|
127
|
3
|
|
|
|
|
7
|
$out =~ s/\Q(?:)\E$//; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
3
|
|
|
|
|
15
|
return $out; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
1; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 NAME |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Locale::Maketext::Fuzzy - Maketext from already interpolated strings |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head1 SYNOPSIS |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
package MyApp::L10N; |
142
|
|
|
|
|
|
|
use base 'Locale::Maketext::Fuzzy'; # instead of Locale::Maketext |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
package MyApp::L10N::de; |
145
|
|
|
|
|
|
|
use base 'MyApp::L10N'; |
146
|
|
|
|
|
|
|
our %Lexicon = ( |
147
|
|
|
|
|
|
|
# Exact match should always be preferred if possible |
148
|
|
|
|
|
|
|
"0 camels were released." |
149
|
|
|
|
|
|
|
=> "Exact match", |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# Fuzzy match candidate |
152
|
|
|
|
|
|
|
"[quant,_1,camel was,camels were] released." |
153
|
|
|
|
|
|
|
=> "[quant,_1,Kamel wurde,Kamele wurden] freigegeben.", |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# This could also match fuzzily, but is less preferred |
156
|
|
|
|
|
|
|
"[_2] released[_1]" |
157
|
|
|
|
|
|
|
=> "[_1][_2] ist frei[_1]", |
158
|
|
|
|
|
|
|
); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
package main; |
161
|
|
|
|
|
|
|
my $lh = MyApp::L10N->get_handle('de'); |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# All ->maketext calls below will become ->maketext_fuzzy instead |
164
|
|
|
|
|
|
|
$lh->override_maketext(1); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# This prints "Exact match" |
167
|
|
|
|
|
|
|
print $lh->maketext('0 camels were released.'); |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# "1 Kamel wurde freigegeben." -- quant() gets 1 |
170
|
|
|
|
|
|
|
print $lh->maketext('1 camel was released.'); |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# "2 Kamele wurden freigegeben." -- quant() gets 2 |
173
|
|
|
|
|
|
|
print $lh->maketext('2 camels were released.'); |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# "3 Kamele wurden freigegeben." -- parameters are ignored |
176
|
|
|
|
|
|
|
print $lh->maketext('3 released.'); |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# "4 Kamele wurden freigegeben." -- normal usage |
179
|
|
|
|
|
|
|
print $lh->maketext('[*,_1,camel was,camels were] released.', 4); |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# "!Perl ist frei!" -- matches the broader one |
182
|
|
|
|
|
|
|
# Note that the sequence ([_2] before [_1]) is preserved |
183
|
|
|
|
|
|
|
print $lh->maketext('Perl released!'); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 DESCRIPTION |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
This module is a subclass of C, with additional |
188
|
|
|
|
|
|
|
support for localizing messages that already contains interpolated |
189
|
|
|
|
|
|
|
variables. |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
This is most useful when the messages are returned by external sources |
192
|
|
|
|
|
|
|
-- for example, to match C against |
193
|
|
|
|
|
|
|
C<[_1]: command not found>. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
Of course, this module is also useful if you're simply too lazy |
196
|
|
|
|
|
|
|
to use the |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
$lh->maketext("[quant,_1,file,files] deleted.", $count); |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
syntax, but wish to write |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
$lh->maketext_fuzzy("$count files deleted"); |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
instead, and have the correct plural form figured out automatically. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
If C seems too long to type for you, this module |
207
|
|
|
|
|
|
|
also provides a C method to turn I C |
208
|
|
|
|
|
|
|
calls into C calls. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=head1 METHODS |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head2 $lh->maketext_fuzzy(I[, I]); |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
That method takes exactly the same arguments as the C method |
215
|
|
|
|
|
|
|
of C. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
If I is found in lexicons, it is applied in the same way as |
218
|
|
|
|
|
|
|
C. Otherwise, it looks at all lexicon entries that could |
219
|
|
|
|
|
|
|
possibly yield I, by turning C<[...]> sequences into C<(.*?)> and |
220
|
|
|
|
|
|
|
match the resulting regular expression against I. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Once it finds all candidate entries, the longest one replaces the |
223
|
|
|
|
|
|
|
I for the real C call. Variables matched by its bracket |
224
|
|
|
|
|
|
|
sequences (C<$1>, C<$2>...) are placed before I; the order |
225
|
|
|
|
|
|
|
of variables in the matched entry are correctly preserved. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
For example, if the matched entry in C<%Lexicon> is C, |
228
|
|
|
|
|
|
|
this call: |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
$fh->maketext_fuzzy("Test string", "param"); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
is equivalent to this: |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
$fh->maketext("Test [_1]", "string", "param"); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
However, most of the time you won't need to supply I to |
237
|
|
|
|
|
|
|
a C call, since all parameters are already interpolated |
238
|
|
|
|
|
|
|
into the string. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head2 $lh->override_maketext([I]); |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
If I is true, this accessor method turns C<$lh-Emaketext> |
243
|
|
|
|
|
|
|
into an alias for C<$lh-Emaketext_fuzzy>, so all consecutive |
244
|
|
|
|
|
|
|
C calls in the C<$lh>'s packages are automatically fuzzy. |
245
|
|
|
|
|
|
|
A false I restores the original behaviour. If the flag is not |
246
|
|
|
|
|
|
|
specified, returns the current status of override; the default is |
247
|
|
|
|
|
|
|
0 (no overriding). |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Note that this call only modifies the symbol table of the I
|
250
|
|
|
|
|
|
|
class> that C<$lh> belongs to, so other languages are not affected. |
251
|
|
|
|
|
|
|
If you want to override all language handles in a certain application, |
252
|
|
|
|
|
|
|
try this: |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
MyApp::L10N->override_maketext(1); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head1 CAVEATS |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=over 4 |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=item * |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
The "longer is better" heuristic to determine the best match is |
263
|
|
|
|
|
|
|
reasonably good, but could certainly be improved. |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=item * |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
Currently, C<"[quant,_1,file] deleted"> won't match C<"3 files deleted">; |
268
|
|
|
|
|
|
|
you'll have to write C<"[quant,_1,file,files] deleted"> instead, or |
269
|
|
|
|
|
|
|
simply use C<"[_1] file deleted"> as the lexicon key and put the correct |
270
|
|
|
|
|
|
|
plural form handling into the corresponding value. |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=item * |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
When used in combination with C's C |
275
|
|
|
|
|
|
|
backend, all keys would be iterated over each time a fuzzy match is |
276
|
|
|
|
|
|
|
performed, and may cause serious speed penalty. Patches welcome. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=back |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head1 SEE ALSO |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
L, L |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=head1 HISTORY |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
This particular module was written to facilitate an I |
287
|
|
|
|
|
|
|
layer for Slashcode's I provider, based on |
288
|
|
|
|
|
|
|
C and C. It would work like this: |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Input | from the [% story.dept %] dept. |
291
|
|
|
|
|
|
|
Output| [%|loc( story.dept )%]from the [_1] dept.[%END%] |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Now, this layer suffers from the same linguistic problems as an |
294
|
|
|
|
|
|
|
ordinary C or C framework does -- what if we want |
295
|
|
|
|
|
|
|
to make ordinals from C<[% story.dept %]> (i.e. C), |
296
|
|
|
|
|
|
|
or expand the C to C / C? |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
The same problem occurred in RT's web interface, where it had to |
299
|
|
|
|
|
|
|
localize messages returned by external modules, which may already |
300
|
|
|
|
|
|
|
contain interpolated variables, e.g. C<"Successfully deleted 7 |
301
|
|
|
|
|
|
|
ticket(s) in 'c:\temp'.">. |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
Since I didn't have the time to refactor C and C, |
304
|
|
|
|
|
|
|
I devised a C method to pre-process their messages into one |
305
|
|
|
|
|
|
|
of the I, then applied the matched string to C. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
Afterwards, I realized that instead of preparing a set of candidate |
308
|
|
|
|
|
|
|
strings, I could actually match against the original I |
309
|
|
|
|
|
|
|
(i.e. PO files via C). This is how |
310
|
|
|
|
|
|
|
C was born. |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=head1 AUTHORS |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
Audrey Tang Ecpan@audreyt.orgE |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=head1 CC0 1.0 Universal |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
To the extent possible under law, 唐鳳 has waived all copyright and related |
319
|
|
|
|
|
|
|
or neighboring rights to Locale-Maketext-Fuzzy. |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
This work is published from Taiwan. |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
L |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=cut |