line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package ExtUtils::MakeMaker::Locale; |
2
|
|
|
|
|
|
|
|
3
|
52
|
|
|
52
|
|
364
|
use strict; |
|
52
|
|
|
|
|
125
|
|
|
52
|
|
|
|
|
1647
|
|
4
|
52
|
|
|
52
|
|
286
|
use warnings; |
|
52
|
|
|
|
|
121
|
|
|
52
|
|
|
|
|
3395
|
|
5
|
|
|
|
|
|
|
our $VERSION = "7.70"; |
6
|
|
|
|
|
|
|
$VERSION =~ tr/_//d; |
7
|
|
|
|
|
|
|
|
8
|
52
|
|
|
52
|
|
334
|
use base 'Exporter'; |
|
52
|
|
|
|
|
103
|
|
|
52
|
|
|
|
|
8590
|
|
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
10
|
|
|
|
|
|
|
decode_argv env |
11
|
|
|
|
|
|
|
$ENCODING_LOCALE $ENCODING_LOCALE_FS |
12
|
|
|
|
|
|
|
$ENCODING_CONSOLE_IN $ENCODING_CONSOLE_OUT |
13
|
|
|
|
|
|
|
); |
14
|
|
|
|
|
|
|
|
15
|
52
|
|
|
52
|
|
29495
|
use Encode (); |
|
52
|
|
|
|
|
796538
|
|
|
52
|
|
|
|
|
1693
|
|
16
|
52
|
|
|
52
|
|
369
|
use Encode::Alias (); |
|
52
|
|
|
|
|
123
|
|
|
52
|
|
|
|
|
44928
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $ENCODING_LOCALE; |
19
|
|
|
|
|
|
|
our $ENCODING_LOCALE_FS; |
20
|
|
|
|
|
|
|
our $ENCODING_CONSOLE_IN; |
21
|
|
|
|
|
|
|
our $ENCODING_CONSOLE_OUT; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub DEBUG () { 0 } |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub _init { |
26
|
104
|
50
|
|
104
|
|
598
|
if ($^O eq "MSWin32") { |
27
|
0
|
0
|
|
|
|
0
|
unless ($ENCODING_LOCALE) { |
28
|
|
|
|
|
|
|
# Try to obtain what the Windows ANSI code page is |
29
|
0
|
|
|
|
|
0
|
eval { |
30
|
0
|
0
|
|
|
|
0
|
unless (defined &GetConsoleCP) { |
31
|
0
|
|
|
|
|
0
|
require Win32; |
32
|
|
|
|
|
|
|
# manually "import" it since Win32->import refuses |
33
|
0
|
0
|
|
0
|
|
0
|
*GetConsoleCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; |
|
0
|
|
|
|
|
0
|
|
34
|
|
|
|
|
|
|
} |
35
|
0
|
0
|
|
|
|
0
|
unless (defined &GetConsoleCP) { |
36
|
0
|
|
|
|
|
0
|
require Win32::API; |
37
|
0
|
|
|
|
|
0
|
Win32::API->Import('kernel32', 'int GetConsoleCP()'); |
38
|
|
|
|
|
|
|
} |
39
|
0
|
0
|
|
|
|
0
|
if (defined &GetConsoleCP) { |
40
|
0
|
|
|
|
|
0
|
my $cp = GetConsoleCP(); |
41
|
0
|
0
|
|
|
|
0
|
$ENCODING_LOCALE = "cp$cp" if $cp; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
}; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
0
|
0
|
|
|
|
0
|
unless ($ENCODING_CONSOLE_IN) { |
47
|
|
|
|
|
|
|
# only test one since set together |
48
|
0
|
0
|
|
|
|
0
|
unless (defined &GetInputCP) { |
49
|
0
|
|
|
|
|
0
|
eval { |
50
|
0
|
|
|
|
|
0
|
require Win32; |
51
|
0
|
|
|
|
|
0
|
eval { |
52
|
0
|
0
|
|
0
|
|
0
|
local $SIG{__WARN__} = sub {} if ( "$]" < 5.014 ); # suppress deprecation warning for inherited AUTOLOAD of Win32::GetConsoleCP() |
53
|
0
|
|
|
|
|
0
|
Win32::GetConsoleCP(); |
54
|
|
|
|
|
|
|
}; |
55
|
|
|
|
|
|
|
# manually "import" it since Win32->import refuses |
56
|
0
|
0
|
|
0
|
|
0
|
*GetInputCP = sub { &Win32::GetConsoleCP } if defined &Win32::GetConsoleCP; |
|
0
|
|
|
|
|
0
|
|
57
|
0
|
0
|
|
0
|
|
0
|
*GetOutputCP = sub { &Win32::GetConsoleOutputCP } if defined &Win32::GetConsoleOutputCP; |
|
0
|
|
|
|
|
0
|
|
58
|
|
|
|
|
|
|
}; |
59
|
0
|
0
|
|
|
|
0
|
unless (defined &GetInputCP) { |
60
|
0
|
|
|
|
|
0
|
eval { |
61
|
|
|
|
|
|
|
# try Win32::Console module for codepage to use |
62
|
0
|
|
|
|
|
0
|
require Win32::Console; |
63
|
0
|
|
|
0
|
|
0
|
*GetInputCP = sub { &Win32::Console::InputCP } |
64
|
0
|
0
|
|
|
|
0
|
if defined &Win32::Console::InputCP; |
65
|
0
|
|
|
0
|
|
0
|
*GetOutputCP = sub { &Win32::Console::OutputCP } |
66
|
0
|
0
|
|
|
|
0
|
if defined &Win32::Console::OutputCP; |
67
|
|
|
|
|
|
|
}; |
68
|
|
|
|
|
|
|
} |
69
|
0
|
0
|
|
|
|
0
|
unless (defined &GetInputCP) { |
70
|
|
|
|
|
|
|
# final fallback |
71
|
|
|
|
|
|
|
*GetInputCP = *GetOutputCP = sub { |
72
|
|
|
|
|
|
|
# another fallback that could work is: |
73
|
|
|
|
|
|
|
# reg query HKLM\System\CurrentControlSet\Control\Nls\CodePage /v ACP |
74
|
0
|
0
|
0
|
0
|
|
0
|
((qx(chcp) || '') =~ /^Active code page: (\d+)/) |
75
|
|
|
|
|
|
|
? $1 : (); |
76
|
0
|
|
|
|
|
0
|
}; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
} |
79
|
0
|
|
|
|
|
0
|
my $cp = GetInputCP(); |
80
|
0
|
0
|
|
|
|
0
|
$ENCODING_CONSOLE_IN = "cp$cp" if $cp; |
81
|
0
|
|
|
|
|
0
|
$cp = GetOutputCP(); |
82
|
0
|
0
|
|
|
|
0
|
$ENCODING_CONSOLE_OUT = "cp$cp" if $cp; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
104
|
100
|
|
|
|
343
|
unless ($ENCODING_LOCALE) { |
87
|
52
|
|
|
|
|
129
|
eval { |
88
|
52
|
|
|
|
|
24596
|
require I18N::Langinfo; |
89
|
52
|
|
|
|
|
33791
|
$ENCODING_LOCALE = I18N::Langinfo::langinfo(I18N::Langinfo::CODESET()); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# Workaround of Encode < v2.25. The "646" encoding alias was |
92
|
|
|
|
|
|
|
# introduced in Encode-2.25, but we don't want to require that version |
93
|
|
|
|
|
|
|
# quite yet. Should avoid the CPAN testers failure reported from |
94
|
|
|
|
|
|
|
# openbsd-4.7/perl-5.10.0 combo. |
95
|
52
|
50
|
|
|
|
224
|
$ENCODING_LOCALE = "ascii" if $ENCODING_LOCALE eq "646"; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# https://rt.cpan.org/Ticket/Display.html?id=66373 |
98
|
52
|
50
|
33
|
|
|
256
|
$ENCODING_LOCALE = "hp-roman8" if $^O eq "hpux" && $ENCODING_LOCALE eq "roman8"; |
99
|
|
|
|
|
|
|
}; |
100
|
52
|
|
33
|
|
|
176
|
$ENCODING_LOCALE ||= $ENCODING_CONSOLE_IN; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# Workaround of Encode < v2.71 for "cp65000" and "cp65001" |
104
|
|
|
|
|
|
|
# The "cp65000" and "cp65001" aliases were added in [Encode v2.71](https://github.com/dankogai/p5-encode/commit/7874bd95aa10967a3b5dbae333d16bcd703ac6c6) |
105
|
|
|
|
|
|
|
# via commit <https://github.com/dankogai/p5-encode/commit/84b9c1101d5251d37e226f80d1c6781718779047>. |
106
|
|
|
|
|
|
|
# This will avoid test failures for Win32 machines using the UTF-7 or UTF-8 code pages. |
107
|
104
|
50
|
33
|
|
|
630
|
$ENCODING_LOCALE = 'UTF-7' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65000"; |
108
|
104
|
50
|
33
|
|
|
559
|
$ENCODING_LOCALE = 'utf-8-strict' if $ENCODING_LOCALE && lc($ENCODING_LOCALE) eq "cp65001"; |
109
|
|
|
|
|
|
|
|
110
|
104
|
50
|
|
|
|
268
|
if ($^O eq "darwin") { |
111
|
0
|
|
0
|
|
|
0
|
$ENCODING_LOCALE_FS ||= "UTF-8"; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# final fallback |
115
|
104
|
0
|
33
|
|
|
247
|
$ENCODING_LOCALE ||= $^O eq "MSWin32" ? "cp1252" : "UTF-8"; |
116
|
104
|
|
33
|
|
|
494
|
$ENCODING_LOCALE_FS ||= $ENCODING_LOCALE; |
117
|
104
|
|
66
|
|
|
390
|
$ENCODING_CONSOLE_IN ||= $ENCODING_LOCALE; |
118
|
104
|
|
66
|
|
|
386
|
$ENCODING_CONSOLE_OUT ||= $ENCODING_CONSOLE_IN; |
119
|
|
|
|
|
|
|
|
120
|
104
|
50
|
|
|
|
367
|
unless (Encode::find_encoding($ENCODING_LOCALE)) { |
121
|
0
|
|
|
|
|
0
|
my $foundit; |
122
|
0
|
0
|
|
|
|
0
|
if (lc($ENCODING_LOCALE) eq "gb18030") { |
123
|
0
|
|
|
|
|
0
|
eval { |
124
|
0
|
|
|
|
|
0
|
require Encode::HanExtra; |
125
|
|
|
|
|
|
|
}; |
126
|
0
|
0
|
|
|
|
0
|
if ($@) { |
127
|
0
|
|
|
|
|
0
|
die "Need Encode::HanExtra to be installed to support locale codeset ($ENCODING_LOCALE), stopped"; |
128
|
|
|
|
|
|
|
} |
129
|
0
|
0
|
|
|
|
0
|
$foundit++ if Encode::find_encoding($ENCODING_LOCALE); |
130
|
|
|
|
|
|
|
} |
131
|
0
|
0
|
|
|
|
0
|
die "The locale codeset ($ENCODING_LOCALE) isn't one that perl can decode, stopped" |
132
|
|
|
|
|
|
|
unless $foundit; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# use Data::Dump; ddx $ENCODING_LOCALE, $ENCODING_LOCALE_FS, $ENCODING_CONSOLE_IN, $ENCODING_CONSOLE_OUT; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
_init(); |
140
|
|
|
|
|
|
|
Encode::Alias::define_alias(sub { |
141
|
52
|
|
|
52
|
|
528
|
no strict 'refs'; |
|
52
|
|
|
|
|
109
|
|
|
52
|
|
|
|
|
2038
|
|
142
|
52
|
|
|
52
|
|
305
|
no warnings 'once'; |
|
52
|
|
|
|
|
158
|
|
|
52
|
|
|
|
|
3921
|
|
143
|
|
|
|
|
|
|
return ${"ENCODING_" . uc(shift)}; |
144
|
|
|
|
|
|
|
}, "locale"); |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub _flush_aliases { |
147
|
52
|
|
|
52
|
|
366
|
no strict 'refs'; |
|
52
|
|
|
|
|
104
|
|
|
52
|
|
|
|
|
17430
|
|
148
|
52
|
|
|
52
|
|
745
|
for my $a (sort keys %Encode::Alias::Alias) { |
149
|
208
|
100
|
|
|
|
321
|
if (defined ${"ENCODING_" . uc($a)}) { |
|
208
|
|
|
|
|
703
|
|
150
|
52
|
|
|
|
|
197
|
delete $Encode::Alias::Alias{$a}; |
151
|
52
|
|
|
|
|
174
|
warn "Flushed alias cache for $a" if DEBUG; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub reinit { |
157
|
52
|
|
|
52
|
1
|
143
|
$ENCODING_LOCALE = shift; |
158
|
52
|
|
|
|
|
113
|
$ENCODING_LOCALE_FS = shift; |
159
|
52
|
|
|
|
|
95
|
$ENCODING_CONSOLE_IN = $ENCODING_LOCALE; |
160
|
52
|
|
|
|
|
147
|
$ENCODING_CONSOLE_OUT = $ENCODING_LOCALE; |
161
|
52
|
|
|
|
|
1237
|
_init(); |
162
|
52
|
|
|
|
|
6555
|
_flush_aliases(); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub decode_argv { |
166
|
0
|
0
|
|
0
|
1
|
|
die if defined wantarray; |
167
|
0
|
|
|
|
|
|
for (@ARGV) { |
168
|
0
|
|
|
|
|
|
$_ = Encode::decode(locale => $_, @_); |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
sub env { |
173
|
0
|
|
|
0
|
1
|
|
my $k = Encode::encode(locale => shift); |
174
|
0
|
|
|
|
|
|
my $old = $ENV{$k}; |
175
|
0
|
0
|
|
|
|
|
if (@_) { |
176
|
0
|
|
|
|
|
|
my $v = shift; |
177
|
0
|
0
|
|
|
|
|
if (defined $v) { |
178
|
0
|
|
|
|
|
|
$ENV{$k} = Encode::encode(locale => $v); |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
else { |
181
|
0
|
|
|
|
|
|
delete $ENV{$k}; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
} |
184
|
0
|
0
|
|
|
|
|
return Encode::decode(locale => $old) if defined wantarray; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
1; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
__END__ |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head1 NAME |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
ExtUtils::MakeMaker::Locale - bundled Encode::Locale |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head1 SYNOPSIS |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
use Encode::Locale; |
198
|
|
|
|
|
|
|
use Encode; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
$string = decode(locale => $bytes); |
201
|
|
|
|
|
|
|
$bytes = encode(locale => $string); |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
if (-t) { |
204
|
|
|
|
|
|
|
binmode(STDIN, ":encoding(console_in)"); |
205
|
|
|
|
|
|
|
binmode(STDOUT, ":encoding(console_out)"); |
206
|
|
|
|
|
|
|
binmode(STDERR, ":encoding(console_out)"); |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# Processing file names passed in as arguments |
210
|
|
|
|
|
|
|
my $uni_filename = decode(locale => $ARGV[0]); |
211
|
|
|
|
|
|
|
open(my $fh, "<", encode(locale_fs => $uni_filename)) |
212
|
|
|
|
|
|
|
|| die "Can't open '$uni_filename': $!"; |
213
|
|
|
|
|
|
|
binmode($fh, ":encoding(locale)"); |
214
|
|
|
|
|
|
|
... |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=head1 DESCRIPTION |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
In many applications it's wise to let Perl use Unicode for the strings it |
219
|
|
|
|
|
|
|
processes. Most of the interfaces Perl has to the outside world are still byte |
220
|
|
|
|
|
|
|
based. Programs therefore need to decode byte strings that enter the program |
221
|
|
|
|
|
|
|
from the outside and encode them again on the way out. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
The POSIX locale system is used to specify both the language conventions |
224
|
|
|
|
|
|
|
requested by the user and the preferred character set to consume and |
225
|
|
|
|
|
|
|
output. The C<Encode::Locale> module looks up the charset and encoding (called |
226
|
|
|
|
|
|
|
a CODESET in the locale jargon) and arranges for the L<Encode> module to know |
227
|
|
|
|
|
|
|
this encoding under the name "locale". It means bytes obtained from the |
228
|
|
|
|
|
|
|
environment can be converted to Unicode strings by calling C<< |
229
|
|
|
|
|
|
|
Encode::encode(locale => $bytes) >> and converted back again with C<< |
230
|
|
|
|
|
|
|
Encode::decode(locale => $string) >>. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Where file systems interfaces pass file names in and out of the program we also |
233
|
|
|
|
|
|
|
need care. The trend is for operating systems to use a fixed file encoding |
234
|
|
|
|
|
|
|
that don't actually depend on the locale; and this module determines the most |
235
|
|
|
|
|
|
|
appropriate encoding for file names. The L<Encode> module will know this |
236
|
|
|
|
|
|
|
encoding under the name "locale_fs". For traditional Unix systems this will |
237
|
|
|
|
|
|
|
be an alias to the same encoding as "locale". |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
For programs running in a terminal window (called a "Console" on some systems) |
240
|
|
|
|
|
|
|
the "locale" encoding is usually a good choice for what to expect as input and |
241
|
|
|
|
|
|
|
output. Some systems allows us to query the encoding set for the terminal and |
242
|
|
|
|
|
|
|
C<Encode::Locale> will do that if available and make these encodings known |
243
|
|
|
|
|
|
|
under the C<Encode> aliases "console_in" and "console_out". For systems where |
244
|
|
|
|
|
|
|
we can't determine the terminal encoding these will be aliased as the same |
245
|
|
|
|
|
|
|
encoding as "locale". The advice is to use "console_in" for input known to |
246
|
|
|
|
|
|
|
come from the terminal and "console_out" for output to the terminal. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
In addition to arranging for various Encode aliases the following functions and |
249
|
|
|
|
|
|
|
variables are provided: |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=over |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=item decode_argv( ) |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=item decode_argv( Encode::FB_CROAK ) |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
This will decode the command line arguments to perl (the C<@ARGV> array) in-place. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
The function will by default replace characters that can't be decoded by |
260
|
|
|
|
|
|
|
"\x{FFFD}", the Unicode replacement character. |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Any argument provided is passed as CHECK to underlying Encode::decode() call. |
263
|
|
|
|
|
|
|
Pass the value C<Encode::FB_CROAK> to have the decoding croak if not all the |
264
|
|
|
|
|
|
|
command line arguments can be decoded. See L<Encode/"Handling Malformed Data"> |
265
|
|
|
|
|
|
|
for details on other options for CHECK. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=item env( $uni_key ) |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=item env( $uni_key => $uni_value ) |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Interface to get/set environment variables. Returns the current value as a |
272
|
|
|
|
|
|
|
Unicode string. The $uni_key and $uni_value arguments are expected to be |
273
|
|
|
|
|
|
|
Unicode strings as well. Passing C<undef> as $uni_value deletes the |
274
|
|
|
|
|
|
|
environment variable named $uni_key. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
The returned value will have the characters that can't be decoded replaced by |
277
|
|
|
|
|
|
|
"\x{FFFD}", the Unicode replacement character. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
There is no interface to request alternative CHECK behavior as for |
280
|
|
|
|
|
|
|
decode_argv(). If you need that you need to call encode/decode yourself. |
281
|
|
|
|
|
|
|
For example: |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
my $key = Encode::encode(locale => $uni_key, Encode::FB_CROAK); |
284
|
|
|
|
|
|
|
my $uni_value = Encode::decode(locale => $ENV{$key}, Encode::FB_CROAK); |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=item reinit( ) |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=item reinit( $encoding ) |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
Reinitialize the encodings from the locale. You want to call this function if |
291
|
|
|
|
|
|
|
you changed anything in the environment that might influence the locale. |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
This function will croak if the determined encoding isn't recognized by |
294
|
|
|
|
|
|
|
the Encode module. |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
With argument force $ENCODING_... variables to set to the given value. |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=item $ENCODING_LOCALE |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
The encoding name determined to be suitable for the current locale. |
301
|
|
|
|
|
|
|
L<Encode> know this encoding as "locale". |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=item $ENCODING_LOCALE_FS |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
The encoding name determined to be suitable for file system interfaces |
306
|
|
|
|
|
|
|
involving file names. |
307
|
|
|
|
|
|
|
L<Encode> know this encoding as "locale_fs". |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item $ENCODING_CONSOLE_IN |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=item $ENCODING_CONSOLE_OUT |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
The encodings to be used for reading and writing output to the a console. |
314
|
|
|
|
|
|
|
L<Encode> know these encodings as "console_in" and "console_out". |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=back |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 NOTES |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
This table summarizes the mapping of the encodings set up |
321
|
|
|
|
|
|
|
by the C<Encode::Locale> module: |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
Encode | | | |
324
|
|
|
|
|
|
|
Alias | Windows | Mac OS X | POSIX |
325
|
|
|
|
|
|
|
------------+---------+--------------+------------ |
326
|
|
|
|
|
|
|
locale | ANSI | nl_langinfo | nl_langinfo |
327
|
|
|
|
|
|
|
locale_fs | ANSI | UTF-8 | nl_langinfo |
328
|
|
|
|
|
|
|
console_in | OEM | nl_langinfo | nl_langinfo |
329
|
|
|
|
|
|
|
console_out | OEM | nl_langinfo | nl_langinfo |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 Windows |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Windows has basically 2 sets of APIs. A wide API (based on passing UTF-16 |
334
|
|
|
|
|
|
|
strings) and a byte based API based a character set called ANSI. The |
335
|
|
|
|
|
|
|
regular Perl interfaces to the OS currently only uses the ANSI APIs. |
336
|
|
|
|
|
|
|
Unfortunately ANSI is not a single character set. |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
The encoding that corresponds to ANSI varies between different editions of |
339
|
|
|
|
|
|
|
Windows. For many western editions of Windows ANSI corresponds to CP-1252 |
340
|
|
|
|
|
|
|
which is a character set similar to ISO-8859-1. Conceptually the ANSI |
341
|
|
|
|
|
|
|
character set is a similar concept to the POSIX locale CODESET so this module |
342
|
|
|
|
|
|
|
figures out what the ANSI code page is and make this available as |
343
|
|
|
|
|
|
|
$ENCODING_LOCALE and the "locale" Encoding alias. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
Windows systems also operate with another byte based character set. |
346
|
|
|
|
|
|
|
It's called the OEM code page. This is the encoding that the Console |
347
|
|
|
|
|
|
|
takes as input and output. It's common for the OEM code page to |
348
|
|
|
|
|
|
|
differ from the ANSI code page. |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=head2 Mac OS X |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
On Mac OS X the file system encoding is always UTF-8 while the locale |
353
|
|
|
|
|
|
|
can otherwise be set up as normal for POSIX systems. |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
File names on Mac OS X will at the OS-level be converted to |
356
|
|
|
|
|
|
|
NFD-form. A file created by passing a NFC-filename will come |
357
|
|
|
|
|
|
|
in NFD-form from readdir(). See L<Unicode::Normalize> for details |
358
|
|
|
|
|
|
|
of NFD/NFC. |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
Actually, Apple does not follow the Unicode NFD standard since not all |
361
|
|
|
|
|
|
|
character ranges are decomposed. The claim is that this avoids problems with |
362
|
|
|
|
|
|
|
round trip conversions from old Mac text encodings. See L<Encode::UTF8Mac> for |
363
|
|
|
|
|
|
|
details. |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=head2 POSIX (Linux and other Unixes) |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
File systems might vary in what encoding is to be used for |
368
|
|
|
|
|
|
|
filenames. Since this module has no way to actually figure out |
369
|
|
|
|
|
|
|
what the is correct it goes with the best guess which is to |
370
|
|
|
|
|
|
|
assume filenames are encoding according to the current locale. |
371
|
|
|
|
|
|
|
Users are advised to always specify UTF-8 as the locale charset. |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 SEE ALSO |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
L<I18N::Langinfo>, L<Encode>, L<Term::Encoding> |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head1 AUTHOR |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Copyright 2010 Gisle Aas <gisle@aas.no>. |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
382
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |