| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package ExtUtils::MakeMaker::Locale; |
|
2
|
|
|
|
|
|
|
|
|
3
|
52
|
|
|
52
|
|
364
|
use strict; |
|
|
52
|
|
|
|
|
125
|
|
|
|
52
|
|
|
|
|
1647
|
|
|
4
|
52
|
|
|
52
|
|
286
|
use warnings; |
|
|
52
|
|
|
|
|
121
|
|
|
|
52
|
|
|
|
|
3395
|
|
|
5
|
|
|
|
|
|
|
our $VERSION = "7.70"; |
|
6
|
|
|
|
|
|
|
$VERSION =~ tr/_//d; |
|
7
|
|
|
|
|
|
|
|
|
8
|
52
|
|
|
52
|
|
334
|
use base 'Exporter'; |
|
|
52
|
|
|
|
|
103
|
|
|
|
52
|
|
|
|
|
8590
|
|
|
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
|
10
|
|
|
|
|
|
|
decode_argv env |
|
11
|
|
|
|
|
|
|
$ENCODING_LOCALE $ENCODING_LOCALE_FS |
|
12
|
|
|
|
|
|
|
$ENCODING_CONSOLE_IN $ENCODING_CONSOLE_OUT |
|
13
|
|
|
|
|
|
|
); |
|
14
|
|
|
|
|
|
|
|
|
15
|
52
|
|
|
52
|
|
29495
|
use Encode (); |
|
|
52
|
|
|
|
|
796538
|
|
|
|
52
|
|
|
|
|
1693
|
|
|
16
|
52
|
|
|
52
|
|
369
|
use Encode::Alias (); |
|
|
52
|
|
|
|
|
123
|
|
|
|
52
|
|
|
|
|
44928
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $ENCODING_LOCALE; |
|
19
|
|
|
|
|
|
|
our $ENCODING_LOCALE_FS; |
|
20
|
|
|
|
|
|
|
our $ENCODING_CONSOLE_IN; |
|
21
|
|
|
|
|
|
|
our $ENCODING_CONSOLE_OUT; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub DEBUG () { 0 } |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub _init { |
|
26
|
104
|
50
|
|
104
|
|
598
|
if ($^O eq "MSWin32") { |
|
27
|
0
|
0
|
|
|
|
0
|
unless ($ENCODING_LOCALE) { |
|
28
|
|
|
|
|
|
|
# Try to obtain what the Windows ANSI code page is |
|
29
|
0
|
|
|
|
|
0
|
eval { |
|
30
|
0
|
0
|
|
|
|
0
|
unless (defined &GetConsoleCP) { |
|
31
|
0
|
|
|
|
|
0
|
require Win32; |
|
32
|
|
|
|
|
|
|
# manually "import" it since Win32->import refuses |
|
33
|
0
|
0
|
|
0
|
|
0
|
*GetConsoleCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; |
|
|
0
|
|
|
|
|
0
|
|
|
34
|
|
|
|
|
|
|
} |
|
35
|
0
|
0
|
|
|
|
0
|
unless (defined &GetConsoleCP) { |
|
36
|
0
|
|
|
|
|
0
|
require Win32::API; |
|
37
|
0
|
|
|
|
|
0
|
Win32::API->Import('kernel32', 'int GetConsoleCP()'); |
|
38
|
|
|
|
|
|
|
} |
|
39
|
0
|
0
|
|
|
|
0
|
if (defined &GetConsoleCP) { |
|
40
|
0
|
|
|
|
|
0
|
my $cp = GetConsoleCP(); |
|
41
|
0
|
0
|
|
|
|
0
|
$ENCODING_LOCALE = "cp$cp" if $cp; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
}; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
0
|
0
|
|
|
|
0
|
unless ($ENCODING_CONSOLE_IN) { |
|
47
|
|
|
|
|
|
|
# only test one since set together |
|
48
|
0
|
0
|
|
|
|
0
|
unless (defined &GetInputCP) { |
|
49
|
0
|
|
|
|
|
0
|
eval { |
|
50
|
0
|
|
|
|
|
0
|
require Win32; |
|
51
|
0
|
|
|
|
|
0
|
eval { |
|
52
|
0
|
0
|
|
0
|
|
0
|
local $SIG{__WARN__} = sub {} if ( "$]" < 5.014 ); # suppress deprecation warning for inherited AUTOLOAD of Win32::GetConsoleCP() |
|
53
|
0
|
|
|
|
|
0
|
Win32::GetConsoleCP(); |
|
54
|
|
|
|
|
|
|
}; |
|
55
|
|
|
|
|
|
|
# manually "import" it since Win32->import refuses |
|
56
|
0
|
0
|
|
0
|
|
0
|
*GetInputCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; |
|
|
0
|
|
|
|
|
0
|
|
|
57
|
0
|
0
|
|
0
|
|
0
|
*GetOutputCP = sub { &Win32::GetConsoleOutputCP } if defined &Win32::GetConsoleOutputCP; |
|
|
0
|
|
|
|
|
0
|
|
|
58
|
|
|
|
|
|
|
}; |
|
59
|
0
|
0
|
|
|
|
0
|
unless (defined &GetInputCP) { |
|
60
|
0
|
|
|
|
|
0
|
eval { |
|
61
|
|
|
|
|
|
|
# try Win32::Console module for codepage to use |
|
62
|
0
|
|
|
|
|
0
|
require Win32::Console; |
|
63
|
0
|
|
|
0
|
|
0
|
*GetInputCP = sub { &Win32::Console::InputCP } |
|
64
|
0
|
0
|
|
|
|
0
|
if defined &Win32::Console::InputCP; |
|
65
|
0
|
|
|
0
|
|
0
|
*GetOutputCP = sub { &Win32::Console::OutputCP } |
|
66
|
0
|
0
|
|
|
|
0
|
if defined &Win32::Console::OutputCP; |
|
67
|
|
|
|
|
|
|
}; |
|
68
|
|
|
|
|
|
|
} |
|
69
|
0
|
0
|
|
|
|
0
|
unless (defined &GetInputCP) { |
|
70
|
|
|
|
|
|
|
# final fallback |
|
71
|
|
|
|
|
|
|
*GetInputCP = *GetOutputCP = sub { |
|
72
|
|
|
|
|
|
|
# another fallback that could work is: |
|
73
|
|
|
|
|
|
|
# reg query HKLM\System\CurrentControlSet\Control\Nls\CodePage /v ACP |
|
74
|
0
|
0
|
0
|
0
|
|
0
|
((qx(chcp) || '') =~ /^Active code page: (\d+)/) |
|
75
|
|
|
|
|
|
|
? $1 : (); |
|
76
|
0
|
|
|
|
|
0
|
}; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
} |
|
79
|
0
|
|
|
|
|
0
|
my $cp = GetInputCP(); |
|
80
|
0
|
0
|
|
|
|
0
|
$ENCODING_CONSOLE_IN = "cp$cp" if $cp; |
|
81
|
0
|
|
|
|
|
0
|
$cp = GetOutputCP(); |
|
82
|
0
|
0
|
|
|
|
0
|
$ENCODING_CONSOLE_OUT = "cp$cp" if $cp; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
104
|
100
|
|
|
|
343
|
unless ($ENCODING_LOCALE) { |
|
87
|
52
|
|
|
|
|
129
|
eval { |
|
88
|
52
|
|
|
|
|
24596
|
require I18N::Langinfo; |
|
89
|
52
|
|
|
|
|
33791
|
$ENCODING_LOCALE = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()); |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# Workaround of Encode < v2.25. The "646" encoding alias was |
|
92
|
|
|
|
|
|
|
# introduced in Encode-2.25, but we don't want to require that version |
|
93
|
|
|
|
|
|
|
# quite yet. Should avoid the CPAN testers failure reported from |
|
94
|
|
|
|
|
|
|
# openbsd-4.7/perl-5.10.0 combo. |
|
95
|
52
|
50
|
|
|
|
224
|
$ENCODING_LOCALE = "ascii" if $ENCODING_LOCALE eq "646"; |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# https://rt.cpan.org/Ticket/Display.html?id=66373 |
|
98
|
52
|
50
|
33
|
|
|
256
|
$ENCODING_LOCALE = "hp-roman8" if $^O eq "hpux" && $ENCODING_LOCALE eq "roman8"; |
|
99
|
|
|
|
|
|
|
}; |
|
100
|
52
|
|
33
|
|
|
176
|
$ENCODING_LOCALE ||= $ENCODING_CONSOLE_IN; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Workaround of Encode < v2.71 for "cp65000" and "cp65001" |
|
104
|
|
|
|
|
|
|
# The "cp65000" and "cp65001" aliases were added in [Encode v2.71](https://github.com/dankogai/p5-encode/commit/7874bd95aa10967a3b5dbae333d16bcd703ac6c6) |
|
105
|
|
|
|
|
|
|
# via commit <https://github.com/dankogai/p5-encode/commit/84b9c1101d5251d37e226f80d1c6781718779047>. |
|
106
|
|
|
|
|
|
|
# This will avoid test failures for Win32 machines using the UTF-7 or UTF-8 code pages. |
|
107
|
104
|
50
|
33
|
|
|
630
|
$ENCODING_LOCALE = 'UTF-7' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65000"; |
|
108
|
104
|
50
|
33
|
|
|
559
|
$ENCODING_LOCALE = 'utf-8-strict' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65001"; |
|
109
|
|
|
|
|
|
|
|
|
110
|
104
|
50
|
|
|
|
268
|
if ($^O eq "darwin") { |
|
111
|
0
|
|
0
|
|
|
0
|
$ENCODING_LOCALE_FS ||= "UTF-8"; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# final fallback |
|
115
|
104
|
0
|
33
|
|
|
247
|
$ENCODING_LOCALE ||= $^O eq "MSWin32" ? "cp1252" : "UTF-8"; |
|
116
|
104
|
|
33
|
|
|
494
|
$ENCODING_LOCALE_FS ||= $ENCODING_LOCALE; |
|
117
|
104
|
|
66
|
|
|
390
|
$ENCODING_CONSOLE_IN ||= $ENCODING_LOCALE; |
|
118
|
104
|
|
66
|
|
|
386
|
$ENCODING_CONSOLE_OUT ||= $ENCODING_CONSOLE_IN; |
|
119
|
|
|
|
|
|
|
|
|
120
|
104
|
50
|
|
|
|
367
|
unless (Encode::find_encoding($ENCODING_LOCALE)) { |
|
121
|
0
|
|
|
|
|
0
|
my $foundit; |
|
122
|
0
|
0
|
|
|
|
0
|
if (lc($ENCODING_LOCALE) eq "gb18030") { |
|
123
|
0
|
|
|
|
|
0
|
eval { |
|
124
|
0
|
|
|
|
|
0
|
require Encode::HanExtra; |
|
125
|
|
|
|
|
|
|
}; |
|
126
|
0
|
0
|
|
|
|
0
|
if ($@) { |
|
127
|
0
|
|
|
|
|
0
|
die "Need Encode::HanExtra to be installed to support locale codeset ($ENCODING_LOCALE), stopped"; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
0
|
0
|
|
|
|
0
|
$foundit++ if Encode::find_encoding($ENCODING_LOCALE); |
|
130
|
|
|
|
|
|
|
} |
|
131
|
0
|
0
|
|
|
|
0
|
die "The locale codeset ($ENCODING_LOCALE) isn't one that perl can decode, stopped" |
|
132
|
|
|
|
|
|
|
unless $foundit; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# use Data::Dump; ddx $ENCODING_LOCALE, $ENCODING_LOCALE_FS, $ENCODING_CONSOLE_IN, $ENCODING_CONSOLE_OUT; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
_init(); |
|
140
|
|
|
|
|
|
|
Encode::Alias::define_alias(sub { |
|
141
|
52
|
|
|
52
|
|
528
|
no strict 'refs'; |
|
|
52
|
|
|
|
|
109
|
|
|
|
52
|
|
|
|
|
2038
|
|
|
142
|
52
|
|
|
52
|
|
305
|
no warnings 'once'; |
|
|
52
|
|
|
|
|
158
|
|
|
|
52
|
|
|
|
|
3921
|
|
|
143
|
|
|
|
|
|
|
return ${"ENCODING_" . uc(shift)}; |
|
144
|
|
|
|
|
|
|
}, "locale"); |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub _flush_aliases { |
|
147
|
52
|
|
|
52
|
|
366
|
no strict 'refs'; |
|
|
52
|
|
|
|
|
104
|
|
|
|
52
|
|
|
|
|
17430
|
|
|
148
|
52
|
|
|
52
|
|
745
|
for my $a (sort keys %Encode::Alias::Alias) { |
|
149
|
208
|
100
|
|
|
|
321
|
if (defined ${"ENCODING_" . uc($a)}) { |
|
|
208
|
|
|
|
|
703
|
|
|
150
|
52
|
|
|
|
|
197
|
delete $Encode::Alias::Alias{$a}; |
|
151
|
52
|
|
|
|
|
174
|
warn "Flushed alias cache for $a" if DEBUG; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub reinit { |
|
157
|
52
|
|
|
52
|
1
|
143
|
$ENCODING_LOCALE = shift; |
|
158
|
52
|
|
|
|
|
113
|
$ENCODING_LOCALE_FS = shift; |
|
159
|
52
|
|
|
|
|
95
|
$ENCODING_CONSOLE_IN = $ENCODING_LOCALE; |
|
160
|
52
|
|
|
|
|
147
|
$ENCODING_CONSOLE_OUT = $ENCODING_LOCALE; |
|
161
|
52
|
|
|
|
|
1237
|
_init(); |
|
162
|
52
|
|
|
|
|
6555
|
_flush_aliases(); |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub decode_argv { |
|
166
|
0
|
0
|
|
0
|
1
|
|
die if defined wantarray; |
|
167
|
0
|
|
|
|
|
|
for (@ARGV) { |
|
168
|
0
|
|
|
|
|
|
$_ = Encode::decode(locale => $_, @_); |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub env { |
|
173
|
0
|
|
|
0
|
1
|
|
my $k = Encode::encode(locale => shift); |
|
174
|
0
|
|
|
|
|
|
my $old = $ENV{$k}; |
|
175
|
0
|
0
|
|
|
|
|
if (@_) { |
|
176
|
0
|
|
|
|
|
|
my $v = shift; |
|
177
|
0
|
0
|
|
|
|
|
if (defined $v) { |
|
178
|
0
|
|
|
|
|
|
$ENV{$k} = Encode::encode(locale => $v); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
else { |
|
181
|
0
|
|
|
|
|
|
delete $ENV{$k}; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
} |
|
184
|
0
|
0
|
|
|
|
|
return Encode::decode(locale => $old) if defined wantarray; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
1; |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
__END__ |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head1 NAME |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
ExtUtils::MakeMaker::Locale - bundled Encode::Locale |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
use Encode::Locale; |
|
198
|
|
|
|
|
|
|
use Encode; |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
$string = decode(locale => $bytes); |
|
201
|
|
|
|
|
|
|
$bytes = encode(locale => $string); |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
if (-t) { |
|
204
|
|
|
|
|
|
|
binmode(STDIN, ":encoding(console_in)"); |
|
205
|
|
|
|
|
|
|
binmode(STDOUT, ":encoding(console_out)"); |
|
206
|
|
|
|
|
|
|
binmode(STDERR, ":encoding(console_out)"); |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# Processing file names passed in as arguments |
|
210
|
|
|
|
|
|
|
my $uni_filename = decode(locale => $ARGV[0]); |
|
211
|
|
|
|
|
|
|
open(my $fh, "<", encode(locale_fs => $uni_filename)) |
|
212
|
|
|
|
|
|
|
|| die "Can't open '$uni_filename': $!"; |
|
213
|
|
|
|
|
|
|
binmode($fh, ":encoding(locale)"); |
|
214
|
|
|
|
|
|
|
... |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
In many applications it's wise to let Perl use Unicode for the strings it |
|
219
|
|
|
|
|
|
|
processes. Most of the interfaces Perl has to the outside world are still byte |
|
220
|
|
|
|
|
|
|
based. Programs therefore need to decode byte strings that enter the program |
|
221
|
|
|
|
|
|
|
from the outside and encode them again on the way out. |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
The POSIX locale system is used to specify both the language conventions |
|
224
|
|
|
|
|
|
|
requested by the user and the preferred character set to consume and |
|
225
|
|
|
|
|
|
|
output. The C<Encode::Locale> module looks up the charset and encoding (called |
|
226
|
|
|
|
|
|
|
a CODESET in the locale jargon) and arranges for the L<Encode> module to know |
|
227
|
|
|
|
|
|
|
this encoding under the name "locale". It means bytes obtained from the |
|
228
|
|
|
|
|
|
|
environment can be converted to Unicode strings by calling C<< |
|
229
|
|
|
|
|
|
|
Encode::encode(locale => $bytes) >> and converted back again with C<< |
|
230
|
|
|
|
|
|
|
Encode::decode(locale => $string) >>. |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Where file systems interfaces pass file names in and out of the program we also |
|
233
|
|
|
|
|
|
|
need care. The trend is for operating systems to use a fixed file encoding |
|
234
|
|
|
|
|
|
|
that don't actually depend on the locale; and this module determines the most |
|
235
|
|
|
|
|
|
|
appropriate encoding for file names. The L<Encode> module will know this |
|
236
|
|
|
|
|
|
|
encoding under the name "locale_fs". For traditional Unix systems this will |
|
237
|
|
|
|
|
|
|
be an alias to the same encoding as "locale". |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
For programs running in a terminal window (called a "Console" on some systems) |
|
240
|
|
|
|
|
|
|
the "locale" encoding is usually a good choice for what to expect as input and |
|
241
|
|
|
|
|
|
|
output. Some systems allows us to query the encoding set for the terminal and |
|
242
|
|
|
|
|
|
|
C<Encode::Locale> will do that if available and make these encodings known |
|
243
|
|
|
|
|
|
|
under the C<Encode> aliases "console_in" and "console_out". For systems where |
|
244
|
|
|
|
|
|
|
we can't determine the terminal encoding these will be aliased as the same |
|
245
|
|
|
|
|
|
|
encoding as "locale". The advice is to use "console_in" for input known to |
|
246
|
|
|
|
|
|
|
come from the terminal and "console_out" for output to the terminal. |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
In addition to arranging for various Encode aliases the following functions and |
|
249
|
|
|
|
|
|
|
variables are provided: |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=over |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=item decode_argv( ) |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=item decode_argv( Encode::FB_CROAK ) |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
This will decode the command line arguments to perl (the C<@ARGV> array) in-place. |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
The function will by default replace characters that can't be decoded by |
|
260
|
|
|
|
|
|
|
"\x{FFFD}", the Unicode replacement character. |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Any argument provided is passed as CHECK to underlying Encode::decode() call. |
|
263
|
|
|
|
|
|
|
Pass the value C<Encode::FB_CROAK> to have the decoding croak if not all the |
|
264
|
|
|
|
|
|
|
command line arguments can be decoded. See L<Encode/"Handling Malformed Data"> |
|
265
|
|
|
|
|
|
|
for details on other options for CHECK. |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=item env( $uni_key ) |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=item env( $uni_key => $uni_value ) |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Interface to get/set environment variables. Returns the current value as a |
|
272
|
|
|
|
|
|
|
Unicode string. The $uni_key and $uni_value arguments are expected to be |
|
273
|
|
|
|
|
|
|
Unicode strings as well. Passing C<undef> as $uni_value deletes the |
|
274
|
|
|
|
|
|
|
environment variable named $uni_key. |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
The returned value will have the characters that can't be decoded replaced by |
|
277
|
|
|
|
|
|
|
"\x{FFFD}", the Unicode replacement character. |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
There is no interface to request alternative CHECK behavior as for |
|
280
|
|
|
|
|
|
|
decode_argv(). If you need that you need to call encode/decode yourself. |
|
281
|
|
|
|
|
|
|
For example: |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
my $key = Encode::encode(locale => $uni_key, Encode::FB_CROAK); |
|
284
|
|
|
|
|
|
|
my $uni_value = Encode::decode(locale => $ENV{$key}, Encode::FB_CROAK); |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item reinit( ) |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=item reinit( $encoding ) |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Reinitialize the encodings from the locale. You want to call this function if |
|
291
|
|
|
|
|
|
|
you changed anything in the environment that might influence the locale. |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
This function will croak if the determined encoding isn't recognized by |
|
294
|
|
|
|
|
|
|
the Encode module. |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
With argument force $ENCODING_... variables to set to the given value. |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=item $ENCODING_LOCALE |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
The encoding name determined to be suitable for the current locale. |
|
301
|
|
|
|
|
|
|
L<Encode> know this encoding as "locale". |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item $ENCODING_LOCALE_FS |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
The encoding name determined to be suitable for file system interfaces |
|
306
|
|
|
|
|
|
|
involving file names. |
|
307
|
|
|
|
|
|
|
L<Encode> know this encoding as "locale_fs". |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item $ENCODING_CONSOLE_IN |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=item $ENCODING_CONSOLE_OUT |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
The encodings to be used for reading and writing output to the a console. |
|
314
|
|
|
|
|
|
|
L<Encode> know these encodings as "console_in" and "console_out". |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=back |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 NOTES |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
This table summarizes the mapping of the encodings set up |
|
321
|
|
|
|
|
|
|
by the C<Encode::Locale> module: |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
Encode | | | |
|
324
|
|
|
|
|
|
|
Alias | Windows | Mac OS X | POSIX |
|
325
|
|
|
|
|
|
|
------------+---------+--------------+------------ |
|
326
|
|
|
|
|
|
|
locale | ANSI | nl_langinfo | nl_langinfo |
|
327
|
|
|
|
|
|
|
locale_fs | ANSI | UTF-8 | nl_langinfo |
|
328
|
|
|
|
|
|
|
console_in | OEM | nl_langinfo | nl_langinfo |
|
329
|
|
|
|
|
|
|
console_out | OEM | nl_langinfo | nl_langinfo |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 Windows |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Windows has basically 2 sets of APIs. A wide API (based on passing UTF-16 |
|
334
|
|
|
|
|
|
|
strings) and a byte based API based a character set called ANSI. The |
|
335
|
|
|
|
|
|
|
regular Perl interfaces to the OS currently only uses the ANSI APIs. |
|
336
|
|
|
|
|
|
|
Unfortunately ANSI is not a single character set. |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
The encoding that corresponds to ANSI varies between different editions of |
|
339
|
|
|
|
|
|
|
Windows. For many western editions of Windows ANSI corresponds to CP-1252 |
|
340
|
|
|
|
|
|
|
which is a character set similar to ISO-8859-1. Conceptually the ANSI |
|
341
|
|
|
|
|
|
|
character set is a similar concept to the POSIX locale CODESET so this module |
|
342
|
|
|
|
|
|
|
figures out what the ANSI code page is and make this available as |
|
343
|
|
|
|
|
|
|
$ENCODING_LOCALE and the "locale" Encoding alias. |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
Windows systems also operate with another byte based character set. |
|
346
|
|
|
|
|
|
|
It's called the OEM code page. This is the encoding that the Console |
|
347
|
|
|
|
|
|
|
takes as input and output. It's common for the OEM code page to |
|
348
|
|
|
|
|
|
|
differ from the ANSI code page. |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=head2 Mac OS X |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
On Mac OS X the file system encoding is always UTF-8 while the locale |
|
353
|
|
|
|
|
|
|
can otherwise be set up as normal for POSIX systems. |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
File names on Mac OS X will at the OS-level be converted to |
|
356
|
|
|
|
|
|
|
NFD-form. A file created by passing a NFC-filename will come |
|
357
|
|
|
|
|
|
|
in NFD-form from readdir(). See L<Unicode::Normalize> for details |
|
358
|
|
|
|
|
|
|
of NFD/NFC. |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
Actually, Apple does not follow the Unicode NFD standard since not all |
|
361
|
|
|
|
|
|
|
character ranges are decomposed. The claim is that this avoids problems with |
|
362
|
|
|
|
|
|
|
round trip conversions from old Mac text encodings. See L<Encode::UTF8Mac> for |
|
363
|
|
|
|
|
|
|
details. |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=head2 POSIX (Linux and other Unixes) |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
File systems might vary in what encoding is to be used for |
|
368
|
|
|
|
|
|
|
filenames. Since this module has no way to actually figure out |
|
369
|
|
|
|
|
|
|
what the is correct it goes with the best guess which is to |
|
370
|
|
|
|
|
|
|
assume filenames are encoding according to the current locale. |
|
371
|
|
|
|
|
|
|
Users are advised to always specify UTF-8 as the locale charset. |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
L<I18N::Langinfo>, L<Encode>, L<Term::Encoding> |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head1 AUTHOR |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Copyright 2010 Gisle Aas <gisle@aas.no>. |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
|
382
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |