line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::Preferred; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
1137
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
43
|
|
4
|
1
|
|
|
1
|
|
9
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
227
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
7
|
|
|
|
|
|
|
require AutoLoader; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Use Log::TraceMessages if installed. |
10
|
|
|
|
|
|
|
BEGIN { |
11
|
1
|
|
|
1
|
|
2
|
eval { require Log::TraceMessages }; |
|
1
|
|
|
|
|
602
|
|
12
|
1
|
50
|
|
|
|
7
|
if ($@) { |
13
|
1
|
|
|
277
|
|
7
|
*t = sub {}; |
|
277
|
|
|
|
|
311
|
|
14
|
1
|
|
|
150
|
|
1197
|
*d = sub { '' }; |
|
150
|
|
|
|
|
390
|
|
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
else { |
17
|
0
|
|
|
|
|
0
|
*t = \&Log::TraceMessages::t; |
18
|
0
|
|
|
|
|
0
|
*d = \&Log::TraceMessages::d; |
19
|
0
|
|
|
|
|
0
|
Log::TraceMessages::check_argv(); |
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
} |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
@ISA = qw(Exporter AutoLoader); |
24
|
|
|
|
|
|
|
@EXPORT = qw(); @EXPORT_OK = qw(which_lang acceptable_lang); |
25
|
|
|
|
|
|
|
$VERSION = '0.2.4'; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=pod |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 NAME |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Lingua::Preferred - Perl extension to choose a language |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 SYNOPSIS |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
use Lingua::Preferred qw(which_lang acceptable_lang); |
36
|
|
|
|
|
|
|
my @wanted = qw(en de fr it de_CH); |
37
|
|
|
|
|
|
|
my @available = qw(fr it de); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $which = which_lang(\@wanted, \@available); |
40
|
|
|
|
|
|
|
print "language $which is the best of those available\n"; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
foreach (qw(en_US fr nl de_DE)) { |
43
|
|
|
|
|
|
|
print "language $_ is acceptable\n" |
44
|
|
|
|
|
|
|
if acceptable_lang(\@wanted, $_); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 DESCRIPTION |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Often human-readable information is available in more than one |
50
|
|
|
|
|
|
|
language. Which should you use? This module provides a way for the |
51
|
|
|
|
|
|
|
user to specify possible languages in order of preference, and then to |
52
|
|
|
|
|
|
|
pick the best language of those available. Different 'dialects' given |
53
|
|
|
|
|
|
|
by the 'territory' part of the language specifier (such as en, en_GB, |
54
|
|
|
|
|
|
|
and en_US) are also supported. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
The routine C<which_lang()> picks the best language from a list of |
57
|
|
|
|
|
|
|
alternatives. The arguments are: |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=over |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
a reference to a list of preferred languages (first is best). Here, a |
64
|
|
|
|
|
|
|
language is a string like C<'en'> or C<'fr_CA'>. (C<'fr_*'> can also |
65
|
|
|
|
|
|
|
be given - see below.) C<'C'> (named for the Unix 'C' locale) matches |
66
|
|
|
|
|
|
|
any language. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=item |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
a reference to non-empty list of available languages. Here, a |
71
|
|
|
|
|
|
|
language can be like C<'en'>, C<'en_CA'>, or C<undef> meaning 'unknown'. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=back |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
The return code is which language to use. This will always be an |
76
|
|
|
|
|
|
|
element of the available languages list. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
The cleverness of this module (if you can call it that) comes from |
79
|
|
|
|
|
|
|
inferring implicit language preferences based on the explicit list |
80
|
|
|
|
|
|
|
passed in. For example, if you say that en is acceptable, then en_IE |
81
|
|
|
|
|
|
|
and en_DK will presumably be acceptable too (but not as good as just |
82
|
|
|
|
|
|
|
plain en). If you give your language as en_US, then en is almost as |
83
|
|
|
|
|
|
|
good, with the other dialects of en following soon afterwards. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
If there is a tie between two choices, as when two dialects of the |
86
|
|
|
|
|
|
|
same language are available and neither is explicitly preferred, or |
87
|
|
|
|
|
|
|
when none of the available languages appears in the userE<39>s list, |
88
|
|
|
|
|
|
|
then the choice appearing earlier in the available list is preferred. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Sometimes, the automatic inferring of related dialects is not what you |
91
|
|
|
|
|
|
|
want, because a language dialect may be very different to the 'main' |
92
|
|
|
|
|
|
|
language, for example Swiss German or some forms of English. For this |
93
|
|
|
|
|
|
|
case, the special form 'XX_*' is available. If you dislike Mexican |
94
|
|
|
|
|
|
|
Spanish (as a completely arbitrary example), then C<[ 'es', 'es_*', |
95
|
|
|
|
|
|
|
'es_MX' ]> would rank this dialect below any other dialect of es (but |
96
|
|
|
|
|
|
|
still acceptable). You donE<39>t have to explicitly list every other |
97
|
|
|
|
|
|
|
dialect of Spanish before es_MX. |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
So for example, supposing C<@avail> contains the languages available: |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=over |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
You know English and prefer US English: |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
$which = which_lang([ 'en_US' ], \@avail); |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
You know English and German, German/Germany is preferred: |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
$which = which_lang([ 'en', 'de_DE' ], \@avail); |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
You know English and German, but preferably not Swiss German: |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
$which = which_lang([ 'en', 'de', 'de_*', 'de_CH' ], \@avail); |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
Here any dialect of German (eg de_DE, de_AT) is preferable to de_CH. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
124
|
|
|
|
|
|
|
sub which_lang( $$ ) { |
125
|
39
|
50
|
|
39
|
0
|
5148
|
die 'usage: which_lang(listref of preferred langs, listref of available)' |
126
|
|
|
|
|
|
|
if @_ != 2; |
127
|
39
|
|
|
|
|
54
|
my ($pref, $avail) = @_; |
128
|
39
|
|
|
|
|
76
|
t '$pref=' . d $pref; |
129
|
39
|
|
|
|
|
64
|
t '$avail=' . d $avail; |
130
|
|
|
|
|
|
|
|
131
|
39
|
|
|
|
|
41
|
my (%explicit, %implicit); |
132
|
39
|
|
|
|
|
43
|
my $pos = 0; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# This seems like the best way to make block-nested subroutines |
135
|
|
|
|
|
|
|
my $add_explicit = sub { |
136
|
63
|
|
|
63
|
|
76
|
my $l = shift; |
137
|
63
|
50
|
|
|
|
128
|
die "preferred language $l listed twice" |
138
|
|
|
|
|
|
|
if defined $explicit{$l}; |
139
|
63
|
100
|
|
|
|
108
|
if (delete $implicit{$l}) { t "moved implicit $l to explicit" } |
|
6
|
|
|
|
|
16
|
|
140
|
57
|
|
|
|
|
133
|
else { t "adding explicit $l" } |
141
|
63
|
|
|
|
|
142
|
$explicit{$l} = $pos++; |
142
|
39
|
|
|
|
|
171
|
}; |
143
|
|
|
|
|
|
|
my $add_implicit = sub { |
144
|
64
|
|
|
64
|
|
105
|
my $l = shift; |
145
|
64
|
100
|
|
|
|
120
|
if (defined $explicit{$l}) { |
146
|
14
|
|
|
|
|
34
|
t "$l already explict, not adding implicitly"; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
else { |
149
|
50
|
100
|
|
|
|
4833
|
if (defined $implicit{$l}) { t "replacing implicit $l" } |
|
4
|
|
|
|
|
9
|
|
150
|
46
|
|
|
|
|
103
|
else { t "adding implicit $l" } |
151
|
50
|
|
|
|
|
155
|
$implicit{$l} = $pos++ |
152
|
|
|
|
|
|
|
} |
153
|
39
|
|
|
|
|
115
|
}; |
154
|
|
|
|
|
|
|
|
155
|
39
|
|
|
|
|
75
|
foreach (@$pref) { |
156
|
63
|
|
|
|
|
107
|
$add_explicit->($_); |
157
|
|
|
|
|
|
|
|
158
|
63
|
100
|
|
|
|
284
|
if ($_ eq 'C') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# Doesn't imply anything - C already matches every |
160
|
|
|
|
|
|
|
# possible language. |
161
|
|
|
|
|
|
|
# |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
elsif (/^[a-z][a-z]$/) { |
164
|
|
|
|
|
|
|
# 'en' implies any dialect of 'en' also |
165
|
30
|
|
|
|
|
70
|
$add_implicit->($_ . '_*'); |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
elsif (/^([a-z][a-z])_([A-Z][A-Z])(?:\@.*)?$/) { # ignore @whatever |
168
|
|
|
|
|
|
|
# 'en_GB' implies 'en', and secondly any other dialect |
169
|
17
|
|
|
|
|
31
|
$add_implicit->($1); |
170
|
17
|
|
|
|
|
46
|
$add_implicit->($1 . '_*'); |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
elsif (/^([a-z][a-z])_\*$/) { |
173
|
|
|
|
|
|
|
# 'en_*' doesn't imply anything - it shouldn't be used |
174
|
|
|
|
|
|
|
# except in odd cases. |
175
|
|
|
|
|
|
|
# |
176
|
|
|
|
|
|
|
} |
177
|
0
|
|
|
|
|
0
|
else { die "bad language '$_'" } # FIXME support 'English' etc |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
39
|
|
|
|
|
187
|
my %ranking = reverse (%explicit, %implicit); |
181
|
39
|
50
|
|
|
|
82
|
if ($Log::TraceMessages::On) { |
182
|
0
|
|
|
|
|
0
|
t 'ranking:'; |
183
|
0
|
|
|
|
|
0
|
foreach (sort { $a <=> $b } keys %ranking) { |
|
0
|
|
|
|
|
0
|
|
184
|
0
|
|
|
|
|
0
|
t "$_\t$ranking{$_}"; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
39
|
|
|
|
|
115
|
my @langs = @ranking{sort { $a <=> $b } keys %ranking}; |
|
93
|
|
|
|
|
193
|
|
189
|
39
|
|
|
|
|
5808
|
my %avail; |
190
|
39
|
|
|
|
|
66
|
foreach (@$avail) { |
191
|
62
|
100
|
|
|
|
114
|
next if not defined; |
192
|
56
|
50
|
|
|
|
175
|
$avail{$_}++ && die "available language $_ listed twice"; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
39
|
|
|
|
|
96
|
while (defined (my $lang = shift @langs)) { |
196
|
72
|
100
|
|
|
|
1259
|
if ($lang eq 'C') { |
|
|
100
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# Match first available language. |
198
|
5
|
|
|
|
|
52
|
return $avail->[0]; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
elsif ($lang =~ /^([a-z][a-z])_\*$/) { |
201
|
|
|
|
|
|
|
# Any dialect of $1 (but not standard). Work through all |
202
|
|
|
|
|
|
|
# of @$avail in order trying to find a match. (So there |
203
|
|
|
|
|
|
|
# is a slight bias towards languages appearing earlier in |
204
|
|
|
|
|
|
|
# @$avail.) |
205
|
|
|
|
|
|
|
# |
206
|
23
|
|
|
|
|
50
|
my $base_lang = $1; |
207
|
23
|
|
|
|
|
32
|
AVAIL: foreach (@$avail) { |
208
|
31
|
100
|
|
|
|
72
|
next if not defined; |
209
|
26
|
100
|
|
|
|
233
|
if (/^\Q$base_lang\E_/) { |
210
|
|
|
|
|
|
|
# Well, it matched... but maybe this dialect was |
211
|
|
|
|
|
|
|
# explicitly specified with a lower priority. |
212
|
|
|
|
|
|
|
# |
213
|
11
|
|
|
|
|
15
|
foreach my $lower_lang (@langs) { |
214
|
9
|
100
|
|
|
|
78
|
next AVAIL if (/^\Q$lower_lang\E$/); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
7
|
|
|
|
|
94
|
return $_; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
else { |
222
|
|
|
|
|
|
|
# Exact match |
223
|
44
|
100
|
|
|
|
313
|
return $lang if $avail{$lang}; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# Couldn't find anything - pick first available language. |
228
|
12
|
|
|
|
|
142
|
return $avail->[0]; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=pod |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Whereas C<which_lang()> picks the best language from a list of |
234
|
|
|
|
|
|
|
alternatives, C<acceptable_lang()> answers whether a single |
235
|
|
|
|
|
|
|
language is included (explicitly or implicitly) in the list of wanted |
236
|
|
|
|
|
|
|
languages. It adds the implicit dialects in the same way. |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=cut |
239
|
|
|
|
|
|
|
sub acceptable_lang( $$ ) { |
240
|
36
|
50
|
|
36
|
0
|
1252
|
die 'usage: acceptable_lang(listref of wanted langs, lang)' |
241
|
|
|
|
|
|
|
if @_ != 2; |
242
|
36
|
|
|
|
|
49
|
my ($pref, $l) = @_; |
243
|
36
|
|
|
|
|
61
|
t '$pref=' . d $pref; |
244
|
36
|
|
|
|
|
87
|
t '$l=' . d $l; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# We just need to ignore the dialects and compare the main part. |
247
|
36
|
|
|
|
|
70
|
my @pref = @$pref; # copy |
248
|
36
|
|
|
|
|
88
|
$l =~ s/_.+//; |
249
|
36
|
|
|
|
|
62
|
foreach (@pref) { |
250
|
49
|
|
|
|
|
85
|
s/_.+//; |
251
|
49
|
100
|
|
|
|
141
|
return 1 if $l eq $_; |
252
|
|
|
|
|
|
|
} |
253
|
14
|
|
|
|
|
38
|
return 0; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=pod |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=head1 AUTHOR |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
Ed Avis, ed@membled.com |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head1 SEE ALSO |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
perl(1). |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=cut |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
1; |
269
|
|
|
|
|
|
|
__END__ |