line |
stmt |
bran |
cond |
sub |
time |
code |
1
|
|
|
|
|
|
/* locale.c |
2
|
|
|
|
|
|
* |
3
|
|
|
|
|
|
* Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
4
|
|
|
|
|
|
* 2002, 2003, 2005, 2006, 2007, 2008 by Larry Wall and others |
5
|
|
|
|
|
|
* |
6
|
|
|
|
|
|
* You may distribute under the terms of either the GNU General Public |
7
|
|
|
|
|
|
* License or the Artistic License, as specified in the README file. |
8
|
|
|
|
|
|
* |
9
|
|
|
|
|
|
*/ |
10
|
|
|
|
|
|
|
11
|
|
|
|
|
|
/* |
12
|
|
|
|
|
|
* A Elbereth Gilthoniel, |
13
|
|
|
|
|
|
* silivren penna míriel |
14
|
|
|
|
|
|
* o menel aglar elenath! |
15
|
|
|
|
|
|
* Na-chaered palan-díriel |
16
|
|
|
|
|
|
* o galadhremmin ennorath, |
17
|
|
|
|
|
|
* Fanuilos, le linnathon |
18
|
|
|
|
|
|
* nef aear, si nef aearon! |
19
|
|
|
|
|
|
* |
20
|
|
|
|
|
|
* [p.238 of _The Lord of the Rings_, II/i: "Many Meetings"] |
21
|
|
|
|
|
|
*/ |
22
|
|
|
|
|
|
|
23
|
|
|
|
|
|
/* utility functions for handling locale-specific stuff like what |
24
|
|
|
|
|
|
* character represents the decimal point. |
25
|
|
|
|
|
|
*/ |
26
|
|
|
|
|
|
|
27
|
|
|
|
|
|
#include "EXTERN.h" |
28
|
|
|
|
|
|
#define PERL_IN_LOCALE_C |
29
|
|
|
|
|
|
#include "perl.h" |
30
|
|
|
|
|
|
|
31
|
|
|
|
|
|
#ifdef I_LANGINFO |
32
|
|
|
|
|
|
# include |
33
|
|
|
|
|
|
#endif |
34
|
|
|
|
|
|
|
35
|
|
|
|
|
|
#include "reentr.h" |
36
|
|
|
|
|
|
|
37
|
|
|
|
|
|
#ifdef USE_LOCALE |
38
|
|
|
|
|
|
|
39
|
|
|
|
|
|
/* |
40
|
|
|
|
|
|
* Standardize the locale name from a string returned by 'setlocale'. |
41
|
|
|
|
|
|
* |
42
|
|
|
|
|
|
* The typical return value of setlocale() is either |
43
|
|
|
|
|
|
* (1) "xx_YY" if the first argument of setlocale() is not LC_ALL |
44
|
|
|
|
|
|
* (2) "xa_YY xb_YY ..." if the first argument of setlocale() is LC_ALL |
45
|
|
|
|
|
|
* (the space-separated values represent the various sublocales, |
46
|
|
|
|
|
|
* in some unspecified order). This is not handled by this function. |
47
|
|
|
|
|
|
* |
48
|
|
|
|
|
|
* In some platforms it has a form like "LC_SOMETHING=Lang_Country.866\n", |
49
|
|
|
|
|
|
* which is harmful for further use of the string in setlocale(). This |
50
|
|
|
|
|
|
* function removes the trailing new line and everything up through the '=' |
51
|
|
|
|
|
|
* |
52
|
|
|
|
|
|
*/ |
53
|
|
|
|
|
|
STATIC char * |
54
|
73174
|
|
|
|
|
S_stdize_locale(pTHX_ char *locs) |
55
|
|
|
|
|
|
{ |
56
|
73174
|
|
|
|
|
const char * const s = strchr(locs, '='); |
57
|
|
|
|
|
|
bool okay = TRUE; |
58
|
|
|
|
|
|
|
59
|
|
|
|
|
|
PERL_ARGS_ASSERT_STDIZE_LOCALE; |
60
|
|
|
|
|
|
|
61
|
73174
|
50
|
|
|
|
if (s) { |
62
|
0
|
|
|
|
|
const char * const t = strchr(s, '.'); |
63
|
|
|
|
|
|
okay = FALSE; |
64
|
0
|
0
|
|
|
|
if (t) { |
65
|
0
|
|
|
|
|
const char * const u = strchr(t, '\n'); |
66
|
0
|
0
|
|
|
|
if (u && (u[1] == 0)) { |
|
|
0
|
|
|
|
|
67
|
0
|
|
|
|
|
const STRLEN len = u - s; |
68
|
0
|
|
|
|
|
Move(s + 1, locs, len, char); |
69
|
0
|
|
|
|
|
locs[len] = 0; |
70
|
|
|
|
|
|
okay = TRUE; |
71
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
75
|
73174
|
50
|
|
|
|
if (!okay) |
76
|
0
|
|
|
|
|
Perl_croak(aTHX_ "Can't fix broken locale name \"%s\"", locs); |
77
|
|
|
|
|
|
|
78
|
73174
|
|
|
|
|
return locs; |
79
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
81
|
|
|
|
|
|
#endif |
82
|
|
|
|
|
|
|
83
|
|
|
|
|
|
void |
84
|
48710
|
|
|
|
|
Perl_set_numeric_radix(pTHX) |
85
|
|
|
|
|
|
{ |
86
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
87
|
|
|
|
|
|
dVAR; |
88
|
|
|
|
|
|
# ifdef HAS_LOCALECONV |
89
|
48710
|
|
|
|
|
const struct lconv* const lc = localeconv(); |
90
|
|
|
|
|
|
|
91
|
48710
|
50
|
|
|
|
if (lc && lc->decimal_point) { |
|
|
50
|
|
|
|
|
92
|
48710
|
50
|
|
|
|
if (lc->decimal_point[0] == '.' && lc->decimal_point[1] == 0) { |
|
|
50
|
|
|
|
|
93
|
48710
|
|
|
|
|
SvREFCNT_dec(PL_numeric_radix_sv); |
94
|
48710
|
|
|
|
|
PL_numeric_radix_sv = NULL; |
95
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
else { |
97
|
0
|
0
|
|
|
|
if (PL_numeric_radix_sv) |
98
|
0
|
|
|
|
|
sv_setpv(PL_numeric_radix_sv, lc->decimal_point); |
99
|
|
|
|
|
|
else |
100
|
0
|
|
|
|
|
PL_numeric_radix_sv = newSVpv(lc->decimal_point, 0); |
101
|
0
|
0
|
|
|
|
if (! is_ascii_string((U8 *) lc->decimal_point, 0) |
102
|
0
|
0
|
|
|
|
&& is_utf8_string((U8 *) lc->decimal_point, 0) |
103
|
0
|
0
|
|
|
|
&& is_cur_LC_category_utf8(LC_NUMERIC)) |
104
|
|
|
|
|
|
{ |
105
|
0
|
|
|
|
|
SvUTF8_on(PL_numeric_radix_sv); |
106
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
else |
110
|
0
|
|
|
|
|
PL_numeric_radix_sv = NULL; |
111
|
|
|
|
|
|
# endif /* HAS_LOCALECONV */ |
112
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
113
|
48710
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
115
|
|
|
|
|
|
/* |
116
|
|
|
|
|
|
* Set up for a new numeric locale. |
117
|
|
|
|
|
|
*/ |
118
|
|
|
|
|
|
void |
119
|
24446
|
|
|
|
|
Perl_new_numeric(pTHX_ const char *newnum) |
120
|
|
|
|
|
|
{ |
121
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
122
|
|
|
|
|
|
char *save_newnum; |
123
|
|
|
|
|
|
dVAR; |
124
|
|
|
|
|
|
|
125
|
24446
|
50
|
|
|
|
if (! newnum) { |
126
|
0
|
|
|
|
|
Safefree(PL_numeric_name); |
127
|
0
|
|
|
|
|
PL_numeric_name = NULL; |
128
|
0
|
|
|
|
|
PL_numeric_standard = TRUE; |
129
|
0
|
|
|
|
|
PL_numeric_local = TRUE; |
130
|
24446
|
|
|
|
|
return; |
131
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
133
|
24446
|
|
|
|
|
save_newnum = stdize_locale(savepv(newnum)); |
134
|
24446
|
100
|
|
|
|
if (! PL_numeric_name || strNE(PL_numeric_name, save_newnum)) { |
|
|
100
|
|
|
|
|
135
|
24414
|
|
|
|
|
Safefree(PL_numeric_name); |
136
|
24414
|
|
|
|
|
PL_numeric_name = save_newnum; |
137
|
24142
|
100
|
|
|
|
PL_numeric_standard = ((*save_newnum == 'C' && save_newnum[1] == '\0') |
138
|
36401
|
100
|
|
|
|
|| strEQ(save_newnum, "POSIX")); |
|
|
50
|
|
|
|
|
139
|
24414
|
|
|
|
|
PL_numeric_local = TRUE; |
140
|
24414
|
|
|
|
|
set_numeric_radix(); |
141
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
else { |
143
|
32
|
|
|
|
|
Safefree(save_newnum); |
144
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
146
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
147
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
149
|
|
|
|
|
|
void |
150
|
24356
|
|
|
|
|
Perl_set_numeric_standard(pTHX) |
151
|
|
|
|
|
|
{ |
152
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
153
|
|
|
|
|
|
dVAR; |
154
|
|
|
|
|
|
|
155
|
24356
|
100
|
|
|
|
if (! PL_numeric_standard) { |
156
|
24296
|
|
|
|
|
setlocale(LC_NUMERIC, "C"); |
157
|
24296
|
|
|
|
|
PL_numeric_standard = TRUE; |
158
|
24296
|
|
|
|
|
PL_numeric_local = FALSE; |
159
|
24296
|
|
|
|
|
set_numeric_radix(); |
160
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
162
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
163
|
24356
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
165
|
|
|
|
|
|
void |
166
|
0
|
|
|
|
|
Perl_set_numeric_local(pTHX) |
167
|
|
|
|
|
|
{ |
168
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
169
|
|
|
|
|
|
dVAR; |
170
|
|
|
|
|
|
|
171
|
0
|
0
|
|
|
|
if (! PL_numeric_local) { |
172
|
0
|
|
|
|
|
setlocale(LC_NUMERIC, PL_numeric_name); |
173
|
0
|
|
|
|
|
PL_numeric_standard = FALSE; |
174
|
0
|
|
|
|
|
PL_numeric_local = TRUE; |
175
|
0
|
|
|
|
|
set_numeric_radix(); |
176
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
178
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
179
|
0
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
181
|
|
|
|
|
|
/* |
182
|
|
|
|
|
|
* Set up for a new ctype locale. |
183
|
|
|
|
|
|
*/ |
184
|
|
|
|
|
|
void |
185
|
24412
|
|
|
|
|
Perl_new_ctype(pTHX_ const char *newctype) |
186
|
|
|
|
|
|
{ |
187
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
188
|
|
|
|
|
|
dVAR; |
189
|
|
|
|
|
|
int i; |
190
|
|
|
|
|
|
|
191
|
|
|
|
|
|
PERL_ARGS_ASSERT_NEW_CTYPE; |
192
|
|
|
|
|
|
|
193
|
6273884
|
100
|
|
|
|
for (i = 0; i < 256; i++) { |
194
|
6249472
|
50
|
|
|
|
if (isUPPER_LC(i)) |
|
|
100
|
|
|
|
|
195
|
635132
|
50
|
|
|
|
PL_fold_locale[i] = toLOWER_LC(i); |
196
|
5614340
|
50
|
|
|
|
else if (isLOWER_LC(i)) |
|
|
100
|
|
|
|
|
197
|
635174
|
50
|
|
|
|
PL_fold_locale[i] = toUPPER_LC(i); |
198
|
|
|
|
|
|
else |
199
|
4979166
|
|
|
|
|
PL_fold_locale[i] = i; |
200
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
202
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
203
|
|
|
|
|
|
PERL_ARGS_ASSERT_NEW_CTYPE; |
204
|
|
|
|
|
|
PERL_UNUSED_ARG(newctype); |
205
|
|
|
|
|
|
PERL_UNUSED_CONTEXT; |
206
|
24412
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
208
|
|
|
|
|
|
/* |
209
|
|
|
|
|
|
* Set up for a new collation locale. |
210
|
|
|
|
|
|
*/ |
211
|
|
|
|
|
|
void |
212
|
24410
|
|
|
|
|
Perl_new_collate(pTHX_ const char *newcoll) |
213
|
|
|
|
|
|
{ |
214
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
215
|
|
|
|
|
|
dVAR; |
216
|
|
|
|
|
|
|
217
|
24410
|
50
|
|
|
|
if (! newcoll) { |
218
|
0
|
0
|
|
|
|
if (PL_collation_name) { |
219
|
0
|
|
|
|
|
++PL_collation_ix; |
220
|
0
|
|
|
|
|
Safefree(PL_collation_name); |
221
|
0
|
|
|
|
|
PL_collation_name = NULL; |
222
|
|
|
|
|
|
} |
223
|
0
|
|
|
|
|
PL_collation_standard = TRUE; |
224
|
0
|
|
|
|
|
PL_collxfrm_base = 0; |
225
|
0
|
|
|
|
|
PL_collxfrm_mult = 2; |
226
|
24410
|
|
|
|
|
return; |
227
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
229
|
24410
|
100
|
|
|
|
if (! PL_collation_name || strNE(PL_collation_name, newcoll)) { |
|
|
100
|
|
|
|
|
230
|
24382
|
|
|
|
|
++PL_collation_ix; |
231
|
24382
|
|
|
|
|
Safefree(PL_collation_name); |
232
|
24382
|
|
|
|
|
PL_collation_name = stdize_locale(savepv(newcoll)); |
233
|
24082
|
50
|
|
|
|
PL_collation_standard = ((*newcoll == 'C' && newcoll[1] == '\0') |
234
|
36363
|
100
|
|
|
|
|| strEQ(newcoll, "POSIX")); |
|
|
50
|
|
|
|
|
235
|
|
|
|
|
|
|
236
|
|
|
|
|
|
{ |
237
|
|
|
|
|
|
/* 2: at most so many chars ('a', 'b'). */ |
238
|
|
|
|
|
|
/* 50: surely no system expands a char more. */ |
239
|
|
|
|
|
|
#define XFRMBUFSIZE (2 * 50) |
240
|
|
|
|
|
|
char xbuf[XFRMBUFSIZE]; |
241
|
24382
|
|
|
|
|
const Size_t fa = strxfrm(xbuf, "a", XFRMBUFSIZE); |
242
|
24382
|
|
|
|
|
const Size_t fb = strxfrm(xbuf, "ab", XFRMBUFSIZE); |
243
|
24382
|
|
|
|
|
const SSize_t mult = fb - fa; |
244
|
24382
|
50
|
|
|
|
if (mult < 1 && !(fa == 0 && fb == 0)) |
|
|
0
|
|
|
|
|
245
|
0
|
|
|
|
|
Perl_croak(aTHX_ "panic: strxfrm() gets absurd - a => %"UVuf", ab => %"UVuf, |
246
|
|
|
|
|
|
(UV) fa, (UV) fb); |
247
|
24382
|
100
|
|
|
|
PL_collxfrm_base = (fa > (Size_t)mult) ? (fa - mult) : 0; |
248
|
24382
|
|
|
|
|
PL_collxfrm_mult = mult; |
249
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
252
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
253
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
255
|
|
|
|
|
|
/* |
256
|
|
|
|
|
|
* Initialize locale awareness. |
257
|
|
|
|
|
|
*/ |
258
|
|
|
|
|
|
int |
259
|
24346
|
|
|
|
|
Perl_init_i18nl10n(pTHX_ int printwarn) |
260
|
|
|
|
|
|
{ |
261
|
|
|
|
|
|
int ok = 1; |
262
|
|
|
|
|
|
/* returns |
263
|
|
|
|
|
|
* 1 = set ok or not applicable, |
264
|
|
|
|
|
|
* 0 = fallback to C locale, |
265
|
|
|
|
|
|
* -1 = fallback to C locale failed |
266
|
|
|
|
|
|
*/ |
267
|
|
|
|
|
|
|
268
|
|
|
|
|
|
#if defined(USE_LOCALE) |
269
|
|
|
|
|
|
dVAR; |
270
|
|
|
|
|
|
|
271
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
272
|
|
|
|
|
|
char *curctype = NULL; |
273
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
274
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
275
|
|
|
|
|
|
char *curcoll = NULL; |
276
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
277
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
278
|
|
|
|
|
|
char *curnum = NULL; |
279
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
280
|
|
|
|
|
|
#ifdef __GLIBC__ |
281
|
24346
|
|
|
|
|
char * const language = PerlEnv_getenv("LANGUAGE"); |
282
|
|
|
|
|
|
#endif |
283
|
|
|
|
|
|
/* NULL uses the existing already set up locale */ |
284
|
24346
|
|
|
|
|
const char * const setlocale_init = (PerlEnv_getenv("PERL_SKIP_LOCALE_INIT")) |
285
|
|
|
|
|
|
? NULL |
286
|
24346
|
50
|
|
|
|
: ""; |
287
|
24346
|
|
|
|
|
char * const lc_all = PerlEnv_getenv("LC_ALL"); |
288
|
24346
|
|
|
|
|
char * const lang = PerlEnv_getenv("LANG"); |
289
|
|
|
|
|
|
bool setlocale_failure = FALSE; |
290
|
|
|
|
|
|
|
291
|
|
|
|
|
|
#ifdef LOCALE_ENVIRON_REQUIRED |
292
|
|
|
|
|
|
|
293
|
|
|
|
|
|
/* |
294
|
|
|
|
|
|
* Ultrix setlocale(..., "") fails if there are no environment |
295
|
|
|
|
|
|
* variables from which to get a locale name. |
296
|
|
|
|
|
|
*/ |
297
|
|
|
|
|
|
|
298
|
|
|
|
|
|
bool done = FALSE; |
299
|
|
|
|
|
|
|
300
|
|
|
|
|
|
# ifdef LC_ALL |
301
|
|
|
|
|
|
if (lang) { |
302
|
|
|
|
|
|
if (setlocale(LC_ALL, setlocale_init)) |
303
|
|
|
|
|
|
done = TRUE; |
304
|
|
|
|
|
|
else |
305
|
|
|
|
|
|
setlocale_failure = TRUE; |
306
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
if (!setlocale_failure) { |
308
|
|
|
|
|
|
# ifdef USE_LOCALE_CTYPE |
309
|
|
|
|
|
|
Safefree(curctype); |
310
|
|
|
|
|
|
if (! (curctype = |
311
|
|
|
|
|
|
setlocale(LC_CTYPE, |
312
|
|
|
|
|
|
(!done && (lang || PerlEnv_getenv("LC_CTYPE"))) |
313
|
|
|
|
|
|
? setlocale_init : NULL))) |
314
|
|
|
|
|
|
setlocale_failure = TRUE; |
315
|
|
|
|
|
|
else |
316
|
|
|
|
|
|
curctype = savepv(curctype); |
317
|
|
|
|
|
|
# endif /* USE_LOCALE_CTYPE */ |
318
|
|
|
|
|
|
# ifdef USE_LOCALE_COLLATE |
319
|
|
|
|
|
|
Safefree(curcoll); |
320
|
|
|
|
|
|
if (! (curcoll = |
321
|
|
|
|
|
|
setlocale(LC_COLLATE, |
322
|
|
|
|
|
|
(!done && (lang || PerlEnv_getenv("LC_COLLATE"))) |
323
|
|
|
|
|
|
? setlocale_init : NULL))) |
324
|
|
|
|
|
|
setlocale_failure = TRUE; |
325
|
|
|
|
|
|
else |
326
|
|
|
|
|
|
curcoll = savepv(curcoll); |
327
|
|
|
|
|
|
# endif /* USE_LOCALE_COLLATE */ |
328
|
|
|
|
|
|
# ifdef USE_LOCALE_NUMERIC |
329
|
|
|
|
|
|
Safefree(curnum); |
330
|
|
|
|
|
|
if (! (curnum = |
331
|
|
|
|
|
|
setlocale(LC_NUMERIC, |
332
|
|
|
|
|
|
(!done && (lang || PerlEnv_getenv("LC_NUMERIC"))) |
333
|
|
|
|
|
|
? setlocale_init : NULL))) |
334
|
|
|
|
|
|
setlocale_failure = TRUE; |
335
|
|
|
|
|
|
else |
336
|
|
|
|
|
|
curnum = savepv(curnum); |
337
|
|
|
|
|
|
# endif /* USE_LOCALE_NUMERIC */ |
338
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
340
|
|
|
|
|
|
# endif /* LC_ALL */ |
341
|
|
|
|
|
|
|
342
|
|
|
|
|
|
#endif /* !LOCALE_ENVIRON_REQUIRED */ |
343
|
|
|
|
|
|
|
344
|
|
|
|
|
|
#ifdef LC_ALL |
345
|
24346
|
50
|
|
|
|
if (! setlocale(LC_ALL, setlocale_init)) |
346
|
|
|
|
|
|
setlocale_failure = TRUE; |
347
|
|
|
|
|
|
#endif /* LC_ALL */ |
348
|
|
|
|
|
|
|
349
|
24346
|
50
|
|
|
|
if (!setlocale_failure) { |
350
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
351
|
24346
|
|
|
|
|
Safefree(curctype); |
352
|
24346
|
50
|
|
|
|
if (! (curctype = setlocale(LC_CTYPE, setlocale_init))) |
353
|
|
|
|
|
|
setlocale_failure = TRUE; |
354
|
|
|
|
|
|
else |
355
|
24346
|
|
|
|
|
curctype = savepv(curctype); |
356
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
357
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
358
|
24346
|
|
|
|
|
Safefree(curcoll); |
359
|
24346
|
50
|
|
|
|
if (! (curcoll = setlocale(LC_COLLATE, setlocale_init))) |
360
|
|
|
|
|
|
setlocale_failure = TRUE; |
361
|
|
|
|
|
|
else |
362
|
24346
|
|
|
|
|
curcoll = savepv(curcoll); |
363
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
364
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
365
|
24346
|
|
|
|
|
Safefree(curnum); |
366
|
24346
|
50
|
|
|
|
if (! (curnum = setlocale(LC_NUMERIC, setlocale_init))) |
367
|
|
|
|
|
|
setlocale_failure = TRUE; |
368
|
|
|
|
|
|
else |
369
|
24346
|
|
|
|
|
curnum = savepv(curnum); |
370
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
371
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
373
|
24346
|
50
|
|
|
|
if (setlocale_failure) { |
374
|
|
|
|
|
|
char *p; |
375
|
0
|
0
|
|
|
|
const bool locwarn = (printwarn > 1 || |
|
|
0
|
|
|
|
|
376
|
0
|
0
|
|
|
|
(printwarn && |
377
|
0
|
0
|
|
|
|
(!(p = PerlEnv_getenv("PERL_BADLANG")) || atoi(p)))); |
378
|
|
|
|
|
|
|
379
|
0
|
0
|
|
|
|
if (locwarn) { |
380
|
|
|
|
|
|
#ifdef LC_ALL |
381
|
|
|
|
|
|
|
382
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
383
|
|
|
|
|
|
"perl: warning: Setting locale failed.\n"); |
384
|
|
|
|
|
|
|
385
|
|
|
|
|
|
#else /* !LC_ALL */ |
386
|
|
|
|
|
|
|
387
|
|
|
|
|
|
PerlIO_printf(Perl_error_log, |
388
|
|
|
|
|
|
"perl: warning: Setting locale failed for the categories:\n\t"); |
389
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
390
|
|
|
|
|
|
if (! curctype) |
391
|
|
|
|
|
|
PerlIO_printf(Perl_error_log, "LC_CTYPE "); |
392
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
393
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
394
|
|
|
|
|
|
if (! curcoll) |
395
|
|
|
|
|
|
PerlIO_printf(Perl_error_log, "LC_COLLATE "); |
396
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
397
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
398
|
|
|
|
|
|
if (! curnum) |
399
|
|
|
|
|
|
PerlIO_printf(Perl_error_log, "LC_NUMERIC "); |
400
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
401
|
|
|
|
|
|
PerlIO_printf(Perl_error_log, "\n"); |
402
|
|
|
|
|
|
|
403
|
|
|
|
|
|
#endif /* LC_ALL */ |
404
|
|
|
|
|
|
|
405
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
406
|
|
|
|
|
|
"perl: warning: Please check that your locale settings:\n"); |
407
|
|
|
|
|
|
|
408
|
|
|
|
|
|
#ifdef __GLIBC__ |
409
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
410
|
|
|
|
|
|
"\tLANGUAGE = %c%s%c,\n", |
411
|
|
|
|
|
|
language ? '"' : '(', |
412
|
|
|
|
|
|
language ? language : "unset", |
413
|
|
|
|
|
|
language ? '"' : ')'); |
414
|
|
|
|
|
|
#endif |
415
|
|
|
|
|
|
|
416
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
417
|
|
|
|
|
|
"\tLC_ALL = %c%s%c,\n", |
418
|
|
|
|
|
|
lc_all ? '"' : '(', |
419
|
|
|
|
|
|
lc_all ? lc_all : "unset", |
420
|
|
|
|
|
|
lc_all ? '"' : ')'); |
421
|
|
|
|
|
|
|
422
|
|
|
|
|
|
#if defined(USE_ENVIRON_ARRAY) |
423
|
|
|
|
|
|
{ |
424
|
|
|
|
|
|
char **e; |
425
|
0
|
0
|
|
|
|
for (e = environ; *e; e++) { |
426
|
0
|
0
|
|
|
|
if (strnEQ(*e, "LC_", 3) |
427
|
0
|
0
|
|
|
|
&& strnNE(*e, "LC_ALL=", 7) |
428
|
0
|
0
|
|
|
|
&& (p = strchr(*e, '='))) |
429
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, "\t%.*s = \"%s\",\n", |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
430
|
0
|
|
|
|
|
(int)(p - *e), *e, p + 1); |
431
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
#else |
434
|
|
|
|
|
|
PerlIO_printf(Perl_error_log, |
435
|
|
|
|
|
|
"\t(possibly more locale environment variables)\n"); |
436
|
|
|
|
|
|
#endif |
437
|
|
|
|
|
|
|
438
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
439
|
|
|
|
|
|
"\tLANG = %c%s%c\n", |
440
|
|
|
|
|
|
lang ? '"' : '(', |
441
|
|
|
|
|
|
lang ? lang : "unset", |
442
|
|
|
|
|
|
lang ? '"' : ')'); |
443
|
|
|
|
|
|
|
444
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
445
|
|
|
|
|
|
" are supported and installed on your system.\n"); |
446
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
448
|
|
|
|
|
|
#ifdef LC_ALL |
449
|
|
|
|
|
|
|
450
|
0
|
0
|
|
|
|
if (setlocale(LC_ALL, "C")) { |
451
|
0
|
0
|
|
|
|
if (locwarn) |
452
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
453
|
|
|
|
|
|
"perl: warning: Falling back to the standard locale (\"C\").\n"); |
454
|
|
|
|
|
|
ok = 0; |
455
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
else { |
457
|
0
|
0
|
|
|
|
if (locwarn) |
458
|
0
|
0
|
|
|
|
PerlIO_printf(Perl_error_log, |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
459
|
|
|
|
|
|
"perl: warning: Failed to fall back to the standard locale (\"C\").\n"); |
460
|
|
|
|
|
|
ok = -1; |
461
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
463
|
|
|
|
|
|
#else /* ! LC_ALL */ |
464
|
|
|
|
|
|
|
465
|
|
|
|
|
|
if (0 |
466
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
467
|
|
|
|
|
|
|| !(curctype || setlocale(LC_CTYPE, "C")) |
468
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
469
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
470
|
|
|
|
|
|
|| !(curcoll || setlocale(LC_COLLATE, "C")) |
471
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
472
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
473
|
|
|
|
|
|
|| !(curnum || setlocale(LC_NUMERIC, "C")) |
474
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
475
|
|
|
|
|
|
) |
476
|
|
|
|
|
|
{ |
477
|
|
|
|
|
|
if (locwarn) |
478
|
|
|
|
|
|
PerlIO_printf(Perl_error_log, |
479
|
|
|
|
|
|
"perl: warning: Cannot fall back to the standard locale (\"C\").\n"); |
480
|
|
|
|
|
|
ok = -1; |
481
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
483
|
|
|
|
|
|
#endif /* ! LC_ALL */ |
484
|
|
|
|
|
|
|
485
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
486
|
0
|
|
|
|
|
Safefree(curctype); |
487
|
0
|
|
|
|
|
curctype = savepv(setlocale(LC_CTYPE, NULL)); |
488
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
489
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
490
|
0
|
|
|
|
|
Safefree(curcoll); |
491
|
0
|
|
|
|
|
curcoll = savepv(setlocale(LC_COLLATE, NULL)); |
492
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
493
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
494
|
0
|
|
|
|
|
Safefree(curnum); |
495
|
0
|
|
|
|
|
curnum = savepv(setlocale(LC_NUMERIC, NULL)); |
496
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
497
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
else { |
499
|
|
|
|
|
|
|
500
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
501
|
24346
|
|
|
|
|
new_ctype(curctype); |
502
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
503
|
|
|
|
|
|
|
504
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
505
|
24346
|
|
|
|
|
new_collate(curcoll); |
506
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
507
|
|
|
|
|
|
|
508
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
509
|
24346
|
|
|
|
|
new_numeric(curnum); |
510
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
511
|
|
|
|
|
|
|
512
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
514
|
|
|
|
|
|
#if defined(USE_PERLIO) && defined(USE_LOCALE_CTYPE) |
515
|
|
|
|
|
|
{ |
516
|
|
|
|
|
|
/* Set PL_utf8locale to TRUE if using PerlIO _and_ |
517
|
|
|
|
|
|
the current LC_CTYPE locale is UTF-8. |
518
|
|
|
|
|
|
If PL_utf8locale and PL_unicode (set by -C or by $ENV{PERL_UNICODE}) |
519
|
|
|
|
|
|
are true, perl.c:S_parse_body() will turn on the PerlIO :utf8 layer |
520
|
|
|
|
|
|
on STDIN, STDOUT, STDERR, _and_ the default open discipline. |
521
|
|
|
|
|
|
*/ |
522
|
24346
|
|
|
|
|
PL_utf8locale = is_cur_LC_category_utf8(LC_CTYPE); |
523
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
/* Set PL_unicode to $ENV{PERL_UNICODE} if using PerlIO. |
525
|
|
|
|
|
|
This is an alternative to using the -C command line switch |
526
|
|
|
|
|
|
(the -C if present will override this). */ |
527
|
|
|
|
|
|
{ |
528
|
24346
|
|
|
|
|
const char *p = PerlEnv_getenv("PERL_UNICODE"); |
529
|
24346
|
100
|
|
|
|
PL_unicode = p ? parse_unicode_opts(&p) : 0; |
530
|
24346
|
50
|
|
|
|
if (PL_unicode & PERL_UNICODE_UTF8CACHEASSERT_FLAG) |
531
|
0
|
|
|
|
|
PL_utf8cache = -1; |
532
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
#endif |
534
|
|
|
|
|
|
|
535
|
|
|
|
|
|
#ifdef USE_LOCALE_CTYPE |
536
|
24346
|
|
|
|
|
Safefree(curctype); |
537
|
|
|
|
|
|
#endif /* USE_LOCALE_CTYPE */ |
538
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
539
|
24346
|
|
|
|
|
Safefree(curcoll); |
540
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
541
|
|
|
|
|
|
#ifdef USE_LOCALE_NUMERIC |
542
|
24346
|
|
|
|
|
Safefree(curnum); |
543
|
|
|
|
|
|
#endif /* USE_LOCALE_NUMERIC */ |
544
|
|
|
|
|
|
|
545
|
|
|
|
|
|
#endif /* USE_LOCALE */ |
546
|
|
|
|
|
|
|
547
|
24346
|
|
|
|
|
return ok; |
548
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
550
|
|
|
|
|
|
#ifdef USE_LOCALE_COLLATE |
551
|
|
|
|
|
|
|
552
|
|
|
|
|
|
/* |
553
|
|
|
|
|
|
* mem_collxfrm() is a bit like strxfrm() but with two important |
554
|
|
|
|
|
|
* differences. First, it handles embedded NULs. Second, it allocates |
555
|
|
|
|
|
|
* a bit more memory than needed for the transformed data itself. |
556
|
|
|
|
|
|
* The real transformed data begins at offset sizeof(collationix). |
557
|
|
|
|
|
|
* Please see sv_collxfrm() to see how this is used. |
558
|
|
|
|
|
|
*/ |
559
|
|
|
|
|
|
|
560
|
|
|
|
|
|
char * |
561
|
0
|
|
|
|
|
Perl_mem_collxfrm(pTHX_ const char *s, STRLEN len, STRLEN *xlen) |
562
|
|
|
|
|
|
{ |
563
|
|
|
|
|
|
dVAR; |
564
|
|
|
|
|
|
char *xbuf; |
565
|
|
|
|
|
|
STRLEN xAlloc, xin, xout; /* xalloc is a reserved word in VC */ |
566
|
|
|
|
|
|
|
567
|
|
|
|
|
|
PERL_ARGS_ASSERT_MEM_COLLXFRM; |
568
|
|
|
|
|
|
|
569
|
|
|
|
|
|
/* the first sizeof(collationix) bytes are used by sv_collxfrm(). */ |
570
|
|
|
|
|
|
/* the +1 is for the terminating NUL. */ |
571
|
|
|
|
|
|
|
572
|
0
|
|
|
|
|
xAlloc = sizeof(PL_collation_ix) + PL_collxfrm_base + (PL_collxfrm_mult * len) + 1; |
573
|
0
|
|
|
|
|
Newx(xbuf, xAlloc, char); |
574
|
0
|
0
|
|
|
|
if (! xbuf) |
575
|
|
|
|
|
|
goto bad; |
576
|
|
|
|
|
|
|
577
|
0
|
|
|
|
|
*(U32*)xbuf = PL_collation_ix; |
578
|
|
|
|
|
|
xout = sizeof(PL_collation_ix); |
579
|
0
|
0
|
|
|
|
for (xin = 0; xin < len; ) { |
580
|
|
|
|
|
|
Size_t xused; |
581
|
|
|
|
|
|
|
582
|
|
|
|
|
|
for (;;) { |
583
|
0
|
|
|
|
|
xused = strxfrm(xbuf + xout, s + xin, xAlloc - xout); |
584
|
0
|
0
|
|
|
|
if (xused >= PERL_INT_MAX) |
585
|
|
|
|
|
|
goto bad; |
586
|
0
|
0
|
|
|
|
if ((STRLEN)xused < xAlloc - xout) |
587
|
|
|
|
|
|
break; |
588
|
0
|
|
|
|
|
xAlloc = (2 * xAlloc) + 1; |
589
|
0
|
|
|
|
|
Renew(xbuf, xAlloc, char); |
590
|
0
|
0
|
|
|
|
if (! xbuf) |
591
|
|
|
|
|
|
goto bad; |
592
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
594
|
0
|
|
|
|
|
xin += strlen(s + xin) + 1; |
595
|
0
|
|
|
|
|
xout += xused; |
596
|
|
|
|
|
|
|
597
|
|
|
|
|
|
/* Embedded NULs are understood but silently skipped |
598
|
|
|
|
|
|
* because they make no sense in locale collation. */ |
599
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
601
|
0
|
|
|
|
|
xbuf[xout] = '\0'; |
602
|
0
|
|
|
|
|
*xlen = xout - sizeof(PL_collation_ix); |
603
|
0
|
|
|
|
|
return xbuf; |
604
|
|
|
|
|
|
|
605
|
|
|
|
|
|
bad: |
606
|
0
|
|
|
|
|
Safefree(xbuf); |
607
|
0
|
|
|
|
|
*xlen = 0; |
608
|
0
|
|
|
|
|
return NULL; |
609
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
611
|
|
|
|
|
|
#endif /* USE_LOCALE_COLLATE */ |
612
|
|
|
|
|
|
|
613
|
|
|
|
|
|
#ifdef USE_LOCALE |
614
|
|
|
|
|
|
|
615
|
|
|
|
|
|
STATIC bool |
616
|
24346
|
|
|
|
|
S_is_cur_LC_category_utf8(pTHX_ int category) |
617
|
|
|
|
|
|
{ |
618
|
|
|
|
|
|
/* Returns TRUE if the current locale for 'category' is UTF-8; FALSE |
619
|
|
|
|
|
|
* otherwise. 'category' may not be LC_ALL. If the platform doesn't have |
620
|
|
|
|
|
|
* nl_langinfo(), this employs a heuristic, which hence could give the |
621
|
|
|
|
|
|
* wrong result. It errs on the side of not being a UTF-8 locale. */ |
622
|
|
|
|
|
|
|
623
|
|
|
|
|
|
char *save_input_locale = NULL; |
624
|
|
|
|
|
|
STRLEN final_pos; |
625
|
|
|
|
|
|
|
626
|
|
|
|
|
|
#ifdef LC_ALL |
627
|
|
|
|
|
|
assert(category != LC_ALL); |
628
|
|
|
|
|
|
#endif |
629
|
|
|
|
|
|
|
630
|
|
|
|
|
|
/* First dispose of the trivial cases */ |
631
|
24346
|
|
|
|
|
save_input_locale = stdize_locale(setlocale(category, NULL)); |
632
|
24346
|
50
|
|
|
|
if (! save_input_locale) { |
633
|
|
|
|
|
|
return FALSE; /* XXX maybe should croak */ |
634
|
|
|
|
|
|
} |
635
|
24346
|
100
|
|
|
|
if ((*save_input_locale == 'C' && save_input_locale[1] == '\0') |
|
|
50
|
|
|
|
|
636
|
24296
|
50
|
|
|
|
|| strEQ(save_input_locale, "POSIX")) |
637
|
|
|
|
|
|
{ |
638
|
|
|
|
|
|
return FALSE; |
639
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
641
|
24296
|
|
|
|
|
save_input_locale = savepv(save_input_locale); |
642
|
|
|
|
|
|
|
643
|
|
|
|
|
|
#if defined(HAS_NL_LANGINFO) && defined(CODESET) && defined(USE_LOCALE_CTYPE) |
644
|
|
|
|
|
|
|
645
|
|
|
|
|
|
{ /* Next try nl_langinfo if available */ |
646
|
|
|
|
|
|
|
647
|
|
|
|
|
|
char *save_ctype_locale = NULL; |
648
|
|
|
|
|
|
char *codeset = NULL; |
649
|
|
|
|
|
|
|
650
|
24296
|
50
|
|
|
|
if (category != LC_CTYPE) { /* nl_langinfo works only on LC_CTYPE */ |
651
|
|
|
|
|
|
|
652
|
|
|
|
|
|
/* Get the current LC_CTYPE locale */ |
653
|
0
|
|
|
|
|
save_ctype_locale = stdize_locale(savepv(setlocale(LC_CTYPE, NULL))); |
654
|
0
|
0
|
|
|
|
if (! save_ctype_locale) { |
655
|
|
|
|
|
|
goto cant_use_nllanginfo; |
656
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
658
|
|
|
|
|
|
/* If LC_CTYPE and the desired category use the same locale, this |
659
|
|
|
|
|
|
* means that finding the value for LC_CTYPE is the same as finding |
660
|
|
|
|
|
|
* the value for the desired category. Otherwise, switch LC_CTYPE |
661
|
|
|
|
|
|
* to the desired category's locale */ |
662
|
0
|
0
|
|
|
|
if (strEQ(save_ctype_locale, save_input_locale)) { |
663
|
0
|
|
|
|
|
Safefree(save_ctype_locale); |
664
|
|
|
|
|
|
save_ctype_locale = NULL; |
665
|
|
|
|
|
|
} |
666
|
0
|
0
|
|
|
|
else if (! setlocale(LC_CTYPE, save_input_locale)) { |
667
|
0
|
|
|
|
|
Safefree(save_ctype_locale); |
668
|
0
|
|
|
|
|
goto cant_use_nllanginfo; |
669
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
672
|
|
|
|
|
|
/* Here the current LC_CTYPE is set to the locale of the category whose |
673
|
|
|
|
|
|
* information is desired. This means that nl_langinfo() should give |
674
|
|
|
|
|
|
* the correct results */ |
675
|
24296
|
|
|
|
|
codeset = savepv(nl_langinfo(CODESET)); |
676
|
24296
|
50
|
|
|
|
if (codeset) { |
677
|
|
|
|
|
|
bool is_utf8; |
678
|
|
|
|
|
|
|
679
|
|
|
|
|
|
/* If we switched LC_CTYPE, switch back */ |
680
|
24296
|
50
|
|
|
|
if (save_ctype_locale) { |
681
|
0
|
|
|
|
|
setlocale(LC_CTYPE, save_ctype_locale); |
682
|
0
|
|
|
|
|
Safefree(save_ctype_locale); |
683
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
685
|
36264
|
|
|
|
|
is_utf8 = foldEQ(codeset, STR_WITH_LEN("UTF-8")) |
686
|
24296
|
50
|
|
|
|
|| foldEQ(codeset, STR_WITH_LEN("UTF8")); |
|
|
0
|
|
|
|
|
687
|
|
|
|
|
|
|
688
|
24296
|
|
|
|
|
Safefree(codeset); |
689
|
24296
|
|
|
|
|
Safefree(save_input_locale); |
690
|
24296
|
|
|
|
|
return is_utf8; |
691
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
693
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
cant_use_nllanginfo: |
695
|
|
|
|
|
|
|
696
|
|
|
|
|
|
#endif /* HAS_NL_LANGINFO etc */ |
697
|
|
|
|
|
|
|
698
|
|
|
|
|
|
/* nl_langinfo not available or failed somehow. Look at the locale name to |
699
|
|
|
|
|
|
* see if it matches qr/UTF -? 8 /ix */ |
700
|
|
|
|
|
|
|
701
|
0
|
|
|
|
|
final_pos = strlen(save_input_locale) - 1; |
702
|
0
|
0
|
|
|
|
if (final_pos >= 3) { |
703
|
|
|
|
|
|
char *name = save_input_locale; |
704
|
|
|
|
|
|
|
705
|
|
|
|
|
|
/* Find next 'U' or 'u' and look from there */ |
706
|
0
|
0
|
|
|
|
while ((name += strcspn(name, "Uu") + 1) |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
707
|
0
|
|
|
|
|
<= save_input_locale + final_pos - 2) |
708
|
|
|
|
|
|
{ |
709
|
0
|
0
|
|
|
|
if (toFOLD(*(name)) != 't' |
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
710
|
0
|
0
|
|
|
|
|| toFOLD(*(name + 1)) != 'f') |
|
|
0
|
|
|
|
|
711
|
|
|
|
|
|
{ |
712
|
0
|
|
|
|
|
continue; |
713
|
|
|
|
|
|
} |
714
|
0
|
|
|
|
|
name += 2; |
715
|
0
|
0
|
|
|
|
if (*(name) == '-') { |
716
|
0
|
0
|
|
|
|
if ((name > save_input_locale + final_pos - 1)) { |
717
|
|
|
|
|
|
break; |
718
|
|
|
|
|
|
} |
719
|
0
|
|
|
|
|
name++; |
720
|
|
|
|
|
|
} |
721
|
0
|
0
|
|
|
|
if (*(name) == '8') { |
722
|
0
|
|
|
|
|
Safefree(save_input_locale); |
723
|
0
|
|
|
|
|
return TRUE; |
724
|
|
|
|
|
|
} |
725
|
|
|
|
|
|
} |
726
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
728
|
|
|
|
|
|
#ifdef WIN32 |
729
|
|
|
|
|
|
/* http://msdn.microsoft.com/en-us/library/windows/desktop/dd317756.aspx */ |
730
|
|
|
|
|
|
if (final_pos >= 4 |
731
|
|
|
|
|
|
&& *(save_input_locale + final_pos - 0) == '1' |
732
|
|
|
|
|
|
&& *(save_input_locale + final_pos - 1) == '0' |
733
|
|
|
|
|
|
&& *(save_input_locale + final_pos - 2) == '0' |
734
|
|
|
|
|
|
&& *(save_input_locale + final_pos - 3) == '5' |
735
|
|
|
|
|
|
&& *(save_input_locale + final_pos - 4) == '6') |
736
|
|
|
|
|
|
{ |
737
|
|
|
|
|
|
Safefree(save_input_locale); |
738
|
|
|
|
|
|
return TRUE; |
739
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
#endif |
741
|
|
|
|
|
|
|
742
|
|
|
|
|
|
/* Other common encodings are the ISO 8859 series, which aren't UTF-8 */ |
743
|
0
|
0
|
|
|
|
if (instr(save_input_locale, "8859")) { |
744
|
0
|
|
|
|
|
Safefree(save_input_locale); |
745
|
0
|
|
|
|
|
return FALSE; |
746
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
748
|
|
|
|
|
|
#ifdef HAS_LOCALECONV |
749
|
|
|
|
|
|
|
750
|
|
|
|
|
|
# ifdef USE_LOCALE_MONETARY |
751
|
|
|
|
|
|
|
752
|
|
|
|
|
|
/* Here, there is nothing in the locale name to indicate whether the locale |
753
|
|
|
|
|
|
* is UTF-8 or not. This "name", the return of setlocale(), is actually |
754
|
|
|
|
|
|
* defined to be opaque, so we can't really rely on the absence of various |
755
|
|
|
|
|
|
* substrings in the name to indicate its UTF-8ness. Look at the locale's |
756
|
|
|
|
|
|
* currency symbol. Often that will be in the native script, and if the |
757
|
|
|
|
|
|
* symbol isn't in UTF-8, we know that the locale isn't. If it is |
758
|
|
|
|
|
|
* non-ASCII UTF-8, we infer that the locale is too. |
759
|
|
|
|
|
|
* To do this, like above for LC_CTYPE, we first set LC_MONETARY to the |
760
|
|
|
|
|
|
* locale of the desired category, if it isn't that locale already */ |
761
|
|
|
|
|
|
|
762
|
|
|
|
|
|
{ |
763
|
|
|
|
|
|
char *save_monetary_locale = NULL; |
764
|
|
|
|
|
|
bool illegal_utf8 = FALSE; |
765
|
|
|
|
|
|
bool only_ascii = FALSE; |
766
|
0
|
|
|
|
|
const struct lconv* const lc = localeconv(); |
767
|
|
|
|
|
|
|
768
|
0
|
0
|
|
|
|
if (category != LC_MONETARY) { |
769
|
|
|
|
|
|
|
770
|
0
|
|
|
|
|
save_monetary_locale = stdize_locale(savepv(setlocale(LC_MONETARY, |
771
|
|
|
|
|
|
NULL))); |
772
|
0
|
0
|
|
|
|
if (! save_monetary_locale) { |
773
|
|
|
|
|
|
goto cant_use_monetary; |
774
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
776
|
0
|
0
|
|
|
|
if (strNE(save_monetary_locale, save_input_locale)) { |
777
|
0
|
0
|
|
|
|
if (! setlocale(LC_MONETARY, save_input_locale)) { |
778
|
0
|
|
|
|
|
Safefree(save_monetary_locale); |
779
|
0
|
|
|
|
|
goto cant_use_monetary; |
780
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
784
|
|
|
|
|
|
/* Here the current LC_MONETARY is set to the locale of the category |
785
|
|
|
|
|
|
* whose information is desired. */ |
786
|
|
|
|
|
|
|
787
|
0
|
0
|
|
|
|
if (lc && lc->currency_symbol) { |
|
|
0
|
|
|
|
|
788
|
0
|
0
|
|
|
|
if (! is_utf8_string((U8 *) lc->currency_symbol, 0)) { |
789
|
|
|
|
|
|
illegal_utf8 = TRUE; |
790
|
|
|
|
|
|
} |
791
|
0
|
0
|
|
|
|
else if (is_ascii_string((U8 *) lc->currency_symbol, 0)) { |
792
|
|
|
|
|
|
only_ascii = TRUE; |
793
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
796
|
|
|
|
|
|
/* If we changed it, restore LC_MONETARY to its original locale */ |
797
|
0
|
0
|
|
|
|
if (save_monetary_locale) { |
798
|
0
|
|
|
|
|
setlocale(LC_MONETARY, save_monetary_locale); |
799
|
0
|
|
|
|
|
Safefree(save_monetary_locale); |
800
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
|
802
|
0
|
|
|
|
|
Safefree(save_input_locale); |
803
|
|
|
|
|
|
|
804
|
|
|
|
|
|
/* It isn't a UTF-8 locale if the symbol is not legal UTF-8; otherwise |
805
|
|
|
|
|
|
* assume the locale is UTF-8 if and only if the symbol is non-ascii |
806
|
|
|
|
|
|
* UTF-8. (We can't really tell if the locale is UTF-8 or not if the |
807
|
|
|
|
|
|
* symbol is just a '$', so we err on the side of it not being UTF-8) |
808
|
|
|
|
|
|
* */ |
809
|
0
|
|
|
|
|
return (illegal_utf8) |
810
|
|
|
|
|
|
? FALSE |
811
|
0
|
0
|
|
|
|
: ! only_ascii; |
812
|
|
|
|
|
|
|
813
|
|
|
|
|
|
} |
814
|
|
|
|
|
|
cant_use_monetary: |
815
|
|
|
|
|
|
|
816
|
|
|
|
|
|
# endif /* USE_LOCALE_MONETARY */ |
817
|
|
|
|
|
|
#endif /* HAS_LOCALECONV */ |
818
|
|
|
|
|
|
|
819
|
|
|
|
|
|
#if 0 && defined(HAS_STRERROR) && defined(USE_LOCALE_MESSAGES) |
820
|
|
|
|
|
|
|
821
|
|
|
|
|
|
/* This code is ifdefd out because it was found to not be necessary in testing |
822
|
|
|
|
|
|
* on our dromedary test machine, which has over 700 locales. There, looking |
823
|
|
|
|
|
|
* at just the currency symbol gave essentially the same results as doing this |
824
|
|
|
|
|
|
* extra work. Executing this also caused segfaults in miniperl. I left it in |
825
|
|
|
|
|
|
* so as to avoid rewriting it if real-world experience indicates that |
826
|
|
|
|
|
|
* dromedary is an outlier. Essentially, instead of returning abpve if we |
827
|
|
|
|
|
|
* haven't found illegal utf8, we continue on and examine all the strerror() |
828
|
|
|
|
|
|
* messages on the platform for utf8ness. If all are ASCII, we still don't |
829
|
|
|
|
|
|
* know the answer; but otherwise we have a pretty good indication of the |
830
|
|
|
|
|
|
* utf8ness. The reason this doesn't necessarily help much is that the |
831
|
|
|
|
|
|
* messages may not have been translated into the locale. The currency symbol |
832
|
|
|
|
|
|
* is much more likely to have been translated. The code below would need to |
833
|
|
|
|
|
|
* be altered somewhat to just be a continuation of testing the currency |
834
|
|
|
|
|
|
* symbol. */ |
835
|
|
|
|
|
|
int e; |
836
|
|
|
|
|
|
unsigned int failures = 0, non_ascii = 0; |
837
|
|
|
|
|
|
char *save_messages_locale = NULL; |
838
|
|
|
|
|
|
|
839
|
|
|
|
|
|
/* Like above for LC_CTYPE, we set LC_MESSAGES to the locale of the |
840
|
|
|
|
|
|
* desired category, if it isn't that locale already */ |
841
|
|
|
|
|
|
|
842
|
|
|
|
|
|
if (category != LC_MESSAGES) { |
843
|
|
|
|
|
|
|
844
|
|
|
|
|
|
save_messages_locale = stdize_locale(savepv(setlocale(LC_MESSAGES, |
845
|
|
|
|
|
|
NULL))); |
846
|
|
|
|
|
|
if (! save_messages_locale) { |
847
|
|
|
|
|
|
goto cant_use_messages; |
848
|
|
|
|
|
|
} |
849
|
|
|
|
|
|
|
850
|
|
|
|
|
|
if (strEQ(save_messages_locale, save_input_locale)) { |
851
|
|
|
|
|
|
Safefree(save_input_locale); |
852
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
else if (! setlocale(LC_MESSAGES, save_input_locale)) { |
854
|
|
|
|
|
|
Safefree(save_messages_locale); |
855
|
|
|
|
|
|
goto cant_use_messages; |
856
|
|
|
|
|
|
} |
857
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
859
|
|
|
|
|
|
/* Here the current LC_MESSAGES is set to the locale of the category |
860
|
|
|
|
|
|
* whose information is desired. Look through all the messages */ |
861
|
|
|
|
|
|
|
862
|
|
|
|
|
|
for (e = 0; |
863
|
|
|
|
|
|
#ifdef HAS_SYS_ERRLIST |
864
|
|
|
|
|
|
e <= sys_nerr |
865
|
|
|
|
|
|
#endif |
866
|
|
|
|
|
|
; e++) |
867
|
|
|
|
|
|
{ |
868
|
|
|
|
|
|
const U8* const errmsg = (U8 *) Strerror(e) ; |
869
|
|
|
|
|
|
if (!errmsg) |
870
|
|
|
|
|
|
break; |
871
|
|
|
|
|
|
if (! is_utf8_string(errmsg, 0)) { |
872
|
|
|
|
|
|
failures++; |
873
|
|
|
|
|
|
break; |
874
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
else if (! is_ascii_string(errmsg, 0)) { |
876
|
|
|
|
|
|
non_ascii++; |
877
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
880
|
|
|
|
|
|
/* And, if we changed it, restore LC_MESSAGES to its original locale */ |
881
|
|
|
|
|
|
if (save_messages_locale) { |
882
|
|
|
|
|
|
setlocale(LC_MESSAGES, save_messages_locale); |
883
|
|
|
|
|
|
Safefree(save_messages_locale); |
884
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
886
|
|
|
|
|
|
/* Any non-UTF-8 message means not a UTF-8 locale; if all are valid, |
887
|
|
|
|
|
|
* any non-ascii means it is one; otherwise we assume it isn't */ |
888
|
|
|
|
|
|
return (failures) ? FALSE : non_ascii; |
889
|
|
|
|
|
|
|
890
|
|
|
|
|
|
} |
891
|
|
|
|
|
|
cant_use_messages: |
892
|
|
|
|
|
|
|
893
|
|
|
|
|
|
#endif |
894
|
|
|
|
|
|
|
895
|
0
|
|
|
|
|
Safefree(save_input_locale); |
896
|
12353
|
|
|
|
|
return FALSE; |
897
|
|
|
|
|
|
} |
898
|
|
|
|
|
|
|
899
|
|
|
|
|
|
#endif |
900
|
|
|
|
|
|
|
901
|
|
|
|
|
|
/* |
902
|
|
|
|
|
|
* Local variables: |
903
|
|
|
|
|
|
* c-indentation-style: bsd |
904
|
|
|
|
|
|
* c-basic-offset: 4 |
905
|
|
|
|
|
|
* indent-tabs-mode: nil |
906
|
|
|
|
|
|
* End: |
907
|
|
|
|
|
|
* |
908
|
|
|
|
|
|
* ex: set ts=8 sts=4 sw=4 et: |
909
|
|
|
|
|
|
*/ |