line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Encode::Alias; |
2
|
41
|
|
|
41
|
|
590
|
use strict; |
|
41
|
|
|
|
|
99
|
|
|
41
|
|
|
|
|
1050
|
|
3
|
41
|
|
|
41
|
|
198
|
use warnings; |
|
41
|
|
|
|
|
82
|
|
|
41
|
|
|
|
|
3155
|
|
4
|
|
|
|
|
|
|
our $VERSION = do { my @r = ( q$Revision: 2.23 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; |
5
|
41
|
|
|
41
|
|
244
|
use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG}; |
|
41
|
|
|
|
|
97
|
|
|
41
|
|
|
|
|
2232
|
|
6
|
|
|
|
|
|
|
|
7
|
41
|
|
|
41
|
|
239
|
use Exporter 'import'; |
|
41
|
|
|
|
|
127
|
|
|
41
|
|
|
|
|
25328
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
# Public, encouraged API is exported by default |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @EXPORT = |
12
|
|
|
|
|
|
|
qw ( |
13
|
|
|
|
|
|
|
define_alias |
14
|
|
|
|
|
|
|
find_alias |
15
|
|
|
|
|
|
|
); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our @Alias; # ordered matching list |
18
|
|
|
|
|
|
|
our %Alias; # cached known aliases |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub find_alias { |
21
|
3513
|
|
|
3513
|
0
|
6763
|
my $class = shift; |
22
|
3513
|
|
|
|
|
5740
|
my $find = shift; |
23
|
3513
|
100
|
|
|
|
9264
|
unless ( exists $Alias{$find} ) { |
24
|
1283
|
|
|
|
|
3022
|
$Alias{$find} = undef; # Recursion guard |
25
|
1283
|
|
|
|
|
3715
|
for ( my $i = 0 ; $i < @Alias ; $i += 2 ) { |
26
|
45252
|
|
|
|
|
70247
|
my $alias = $Alias[$i]; |
27
|
45252
|
|
|
|
|
69315
|
my $val = $Alias[ $i + 1 ]; |
28
|
45252
|
|
|
|
|
60051
|
my $new; |
29
|
45252
|
100
|
100
|
|
|
241961
|
if ( ref($alias) eq 'Regexp' && $find =~ $alias ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
30
|
994
|
|
|
|
|
1772
|
DEBUG and warn "eval $val"; |
31
|
994
|
|
|
|
|
53958
|
$new = eval $val; |
32
|
994
|
|
|
|
|
3214
|
DEBUG and $@ and warn "$val, $@"; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
elsif ( ref($alias) eq 'CODE' ) { |
35
|
11
|
|
|
|
|
16
|
DEBUG and warn "$alias", "->", "($find)"; |
36
|
11
|
|
|
|
|
29
|
$new = $alias->($find); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
elsif ( lc($find) eq lc($alias) ) { |
39
|
31
|
|
|
|
|
64
|
$new = $val; |
40
|
|
|
|
|
|
|
} |
41
|
45252
|
100
|
|
|
|
132159
|
if ( defined($new) ) { |
42
|
1036
|
100
|
|
|
|
2899
|
next if $new eq $find; # avoid (direct) recursion on bugs |
43
|
890
|
|
|
|
|
1265
|
DEBUG and warn "$alias, $new"; |
44
|
890
|
50
|
|
|
|
3394
|
my $enc = |
45
|
|
|
|
|
|
|
( ref($new) ) ? $new : Encode::find_encoding($new); |
46
|
890
|
100
|
|
|
|
2762
|
if ($enc) { |
47
|
844
|
|
|
|
|
1606
|
$Alias{$find} = $enc; |
48
|
844
|
|
|
|
|
1836
|
last; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# case insensitive search when canonical is not in all lowercase |
54
|
|
|
|
|
|
|
# RT ticket #7835 |
55
|
1283
|
100
|
|
|
|
3162
|
unless ( $Alias{$find} ) { |
56
|
439
|
|
|
|
|
840
|
my $lcfind = lc($find); |
57
|
439
|
|
|
|
|
9020
|
for my $name ( keys %Encode::Encoding, keys %Encode::ExtModule ) |
58
|
|
|
|
|
|
|
{ |
59
|
95954
|
100
|
|
|
|
197578
|
$lcfind eq lc($name) or next; |
60
|
65
|
|
|
|
|
284
|
$Alias{$find} = Encode::find_encoding($name); |
61
|
65
|
|
|
|
|
180
|
DEBUG and warn "$find => $name"; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
} |
65
|
3513
|
|
|
|
|
8168
|
if (DEBUG) { |
66
|
|
|
|
|
|
|
my $name; |
67
|
|
|
|
|
|
|
if ( my $e = $Alias{$find} ) { |
68
|
|
|
|
|
|
|
$name = $e->name; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
else { |
71
|
|
|
|
|
|
|
$name = ""; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
warn "find_alias($class, $find)->name = $name"; |
74
|
|
|
|
|
|
|
} |
75
|
3513
|
|
|
|
|
9521
|
return $Alias{$find}; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub define_alias { |
79
|
2334
|
|
|
2334
|
0
|
5582
|
while (@_) { |
80
|
2716
|
|
|
|
|
4350
|
my $alias = shift; |
81
|
2716
|
|
|
|
|
4080
|
my $name = shift; |
82
|
2716
|
50
|
|
|
|
7572
|
unshift( @Alias, $alias => $name ) # newer one has precedence |
83
|
|
|
|
|
|
|
if defined $alias; |
84
|
2716
|
100
|
|
|
|
5923
|
if ( ref($alias) ) { |
|
|
50
|
|
|
|
|
|
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# clear %Alias cache to allow overrides |
87
|
2064
|
|
|
|
|
4194
|
my @a = keys %Alias; |
88
|
2064
|
|
|
|
|
5585
|
for my $k (@a) { |
89
|
2246
|
100
|
100
|
|
|
11912
|
if ( ref($alias) eq 'Regexp' && $k =~ $alias ) { |
|
|
100
|
66
|
|
|
|
|
90
|
32
|
|
|
|
|
46
|
DEBUG and warn "delete \$Alias\{$k\}"; |
91
|
32
|
|
|
|
|
67
|
delete $Alias{$k}; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
elsif ( ref($alias) eq 'CODE' && $alias->($k) ) { |
94
|
359
|
|
|
|
|
3116
|
DEBUG and warn "delete \$Alias\{$k\}"; |
95
|
359
|
|
|
|
|
714
|
delete $Alias{$k}; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
elsif (defined $alias) { |
100
|
652
|
|
|
|
|
957
|
DEBUG and warn "delete \$Alias\{$alias\}"; |
101
|
652
|
|
|
|
|
2031
|
delete $Alias{$alias}; |
102
|
|
|
|
|
|
|
} |
103
|
0
|
|
|
|
|
0
|
elsif (DEBUG) { |
104
|
|
|
|
|
|
|
require Carp; |
105
|
|
|
|
|
|
|
Carp::croak("undef \$alias"); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# HACK: Encode must be used after define_alias is declarated as Encode calls define_alias |
111
|
41
|
|
|
41
|
|
682
|
use Encode (); |
|
41
|
|
|
|
|
87
|
|
|
41
|
|
|
|
|
45919
|
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# Allow latin-1 style names as well |
114
|
|
|
|
|
|
|
# 0 1 2 3 4 5 6 7 8 9 10 |
115
|
|
|
|
|
|
|
our @Latin2iso = ( 0, 1, 2, 3, 4, 9, 10, 13, 14, 15, 16 ); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# Allow winlatin1 style names as well |
118
|
|
|
|
|
|
|
our %Winlatin2cp = ( |
119
|
|
|
|
|
|
|
'latin1' => 1252, |
120
|
|
|
|
|
|
|
'latin2' => 1250, |
121
|
|
|
|
|
|
|
'cyrillic' => 1251, |
122
|
|
|
|
|
|
|
'greek' => 1253, |
123
|
|
|
|
|
|
|
'turkish' => 1254, |
124
|
|
|
|
|
|
|
'hebrew' => 1255, |
125
|
|
|
|
|
|
|
'arabic' => 1256, |
126
|
|
|
|
|
|
|
'baltic' => 1257, |
127
|
|
|
|
|
|
|
'vietnamese' => 1258, |
128
|
|
|
|
|
|
|
); |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
init_aliases(); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub undef_aliases { |
133
|
43
|
|
|
43
|
0
|
814
|
@Alias = (); |
134
|
43
|
|
|
|
|
202
|
%Alias = (); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub init_aliases { |
138
|
42
|
|
|
42
|
0
|
509
|
undef_aliases(); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Try all-lower-case version should all else fails |
141
|
42
|
|
|
|
|
287
|
define_alias( qr/^(.*)$/ => '"\L$1"' ); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# UTF/UCS stuff |
144
|
42
|
|
|
|
|
183
|
define_alias( qr/^(unicode-1-1-)?UTF-?7$/i => '"UTF-7"' ); |
145
|
42
|
|
|
|
|
173
|
define_alias( qr/^UCS-?2-?LE$/i => '"UCS-2LE"' ); |
146
|
42
|
|
|
|
|
276
|
define_alias( |
147
|
|
|
|
|
|
|
qr/^UCS-?2-?(BE)?$/i => '"UCS-2BE"', |
148
|
|
|
|
|
|
|
qr/^UCS-?4-?(BE|LE|)?$/i => 'uc("UTF-32$1")', |
149
|
|
|
|
|
|
|
qr/^iso-10646-1$/i => '"UCS-2BE"' |
150
|
|
|
|
|
|
|
); |
151
|
42
|
|
|
|
|
242
|
define_alias( |
152
|
|
|
|
|
|
|
qr/^UTF-?(16|32)-?BE$/i => '"UTF-$1BE"', |
153
|
|
|
|
|
|
|
qr/^UTF-?(16|32)-?LE$/i => '"UTF-$1LE"', |
154
|
|
|
|
|
|
|
qr/^UTF-?(16|32)$/i => '"UTF-$1"', |
155
|
|
|
|
|
|
|
); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# ASCII |
158
|
42
|
|
|
|
|
191
|
define_alias( qr/^(?:US-?)ascii$/i => '"ascii"' ); |
159
|
42
|
|
|
|
|
130
|
define_alias( 'C' => 'ascii' ); |
160
|
42
|
|
|
|
|
182
|
define_alias( qr/\b(?:ISO[-_]?)?646(?:[-_]?US)?$/i => '"ascii"' ); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# Allow variants of iso-8859-1 etc. |
163
|
42
|
|
|
|
|
174
|
define_alias( qr/\biso[-_]?(\d+)[-_](\d+)$/i => '"iso-$1-$2"' ); |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# At least HP-UX has these. |
166
|
42
|
|
|
|
|
173
|
define_alias( qr/\biso8859(\d+)$/i => '"iso-8859-$1"' ); |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# More HP stuff. |
169
|
42
|
|
|
|
|
175
|
define_alias( |
170
|
|
|
|
|
|
|
qr/\b(?:hp-)?(arabic|greek|hebrew|kana|roman|thai|turkish)8$/i => |
171
|
|
|
|
|
|
|
'"${1}8"' ); |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
# The Official name of ASCII. |
174
|
42
|
|
|
|
|
246
|
define_alias( qr/\bANSI[-_]?X3\.4[-_]?1968$/i => '"ascii"' ); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# This is a font issue, not an encoding issue. |
177
|
|
|
|
|
|
|
# (The currency symbol of the Latin 1 upper half |
178
|
|
|
|
|
|
|
# has been redefined as the euro symbol.) |
179
|
42
|
|
|
|
|
177
|
define_alias( qr/^(.+)\@euro$/i => '"$1"' ); |
180
|
|
|
|
|
|
|
|
181
|
42
|
|
|
|
|
211
|
define_alias( qr/\b(?:iso[-_]?)?latin[-_]?(\d+)$/i => |
182
|
|
|
|
|
|
|
'defined $Encode::Alias::Latin2iso[$1] ? "iso-8859-$Encode::Alias::Latin2iso[$1]" : undef' |
183
|
|
|
|
|
|
|
); |
184
|
|
|
|
|
|
|
|
185
|
42
|
|
|
|
|
171
|
define_alias( |
186
|
|
|
|
|
|
|
qr/\bwin(latin[12]|cyrillic|baltic|greek|turkish| |
187
|
|
|
|
|
|
|
hebrew|arabic|baltic|vietnamese)$/ix => |
188
|
|
|
|
|
|
|
'"cp" . $Encode::Alias::Winlatin2cp{lc($1)}' |
189
|
|
|
|
|
|
|
); |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# Common names for non-latin preferred MIME names |
192
|
42
|
|
|
|
|
168
|
define_alias( |
193
|
|
|
|
|
|
|
'ascii' => 'US-ascii', |
194
|
|
|
|
|
|
|
'cyrillic' => 'iso-8859-5', |
195
|
|
|
|
|
|
|
'arabic' => 'iso-8859-6', |
196
|
|
|
|
|
|
|
'greek' => 'iso-8859-7', |
197
|
|
|
|
|
|
|
'hebrew' => 'iso-8859-8', |
198
|
|
|
|
|
|
|
'thai' => 'iso-8859-11', |
199
|
|
|
|
|
|
|
); |
200
|
|
|
|
|
|
|
# RT #20781 |
201
|
42
|
|
|
|
|
174
|
define_alias(qr/\btis-?620\b/i => '"iso-8859-11"'); |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# At least AIX has IBM-NNN (surprisingly...) instead of cpNNN. |
204
|
|
|
|
|
|
|
# And Microsoft has their own naming (again, surprisingly). |
205
|
|
|
|
|
|
|
# And windows-* is registered in IANA! |
206
|
42
|
|
|
|
|
176
|
define_alias( |
207
|
|
|
|
|
|
|
qr/\b(?:cp|ibm|ms|windows)[-_ ]?(\d{2,4})$/i => '"cp$1"' ); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# Sometimes seen with a leading zero. |
210
|
|
|
|
|
|
|
# define_alias( qr/\bcp037\b/i => '"cp37"'); |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# Mac Mappings |
213
|
|
|
|
|
|
|
# predefined in *.ucm; unneeded |
214
|
|
|
|
|
|
|
# define_alias( qr/\bmacIcelandic$/i => '"macIceland"'); |
215
|
42
|
|
|
|
|
179
|
define_alias( qr/^(?:x[_-])?mac[_-](.*)$/i => '"mac$1"' ); |
216
|
|
|
|
|
|
|
# http://rt.cpan.org/Ticket/Display.html?id=36326 |
217
|
42
|
|
|
|
|
188
|
define_alias( qr/^macintosh$/i => '"MacRoman"' ); |
218
|
|
|
|
|
|
|
# https://rt.cpan.org/Ticket/Display.html?id=78125 |
219
|
42
|
|
|
|
|
182
|
define_alias( qr/^macce$/i => '"MacCentralEurRoman"' ); |
220
|
|
|
|
|
|
|
# Ououououou. gone. They are different! |
221
|
|
|
|
|
|
|
# define_alias( qr/\bmacRomanian$/i => '"macRumanian"'); |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# Standardize on the dashed versions. |
224
|
42
|
|
|
|
|
187
|
define_alias( qr/\bkoi8[\s\-_]*([ru])$/i => '"koi8-$1"' ); |
225
|
|
|
|
|
|
|
|
226
|
42
|
50
|
|
|
|
219
|
unless ($Encode::ON_EBCDIC) { |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# for Encode::CN |
229
|
42
|
|
|
|
|
265
|
define_alias( qr/\beuc.*cn$/i => '"euc-cn"' ); |
230
|
42
|
|
|
|
|
250
|
define_alias( qr/\bcn.*euc$/i => '"euc-cn"' ); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# define_alias( qr/\bGB[- ]?(\d+)$/i => '"euc-cn"' ) |
233
|
|
|
|
|
|
|
# CP936 doesn't have vendor-addon for GBK, so they're identical. |
234
|
42
|
|
|
|
|
209
|
define_alias( qr/^gbk$/i => '"cp936"' ); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# This fixes gb2312 vs. euc-cn confusion, practically |
237
|
42
|
|
|
|
|
183
|
define_alias( qr/\bGB[-_ ]?2312(?!-?raw)/i => '"euc-cn"' ); |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# for Encode::JP |
240
|
42
|
|
|
|
|
200
|
define_alias( qr/\bjis$/i => '"7bit-jis"' ); |
241
|
42
|
|
|
|
|
181
|
define_alias( qr/\beuc.*jp$/i => '"euc-jp"' ); |
242
|
42
|
|
|
|
|
197
|
define_alias( qr/\bjp.*euc$/i => '"euc-jp"' ); |
243
|
42
|
|
|
|
|
183
|
define_alias( qr/\bujis$/i => '"euc-jp"' ); |
244
|
42
|
|
|
|
|
191
|
define_alias( qr/\bshift.*jis$/i => '"shiftjis"' ); |
245
|
42
|
|
|
|
|
193
|
define_alias( qr/\bsjis$/i => '"shiftjis"' ); |
246
|
42
|
|
|
|
|
174
|
define_alias( qr/\bwindows-31j$/i => '"cp932"' ); |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# for Encode::KR |
249
|
42
|
|
|
|
|
178
|
define_alias( qr/\beuc.*kr$/i => '"euc-kr"' ); |
250
|
42
|
|
|
|
|
181
|
define_alias( qr/\bkr.*euc$/i => '"euc-kr"' ); |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# This fixes ksc5601 vs. euc-kr confusion, practically |
253
|
42
|
|
|
|
|
165
|
define_alias( qr/(?:x-)?uhc$/i => '"cp949"' ); |
254
|
42
|
|
|
|
|
176
|
define_alias( qr/(?:x-)?windows-949$/i => '"cp949"' ); |
255
|
42
|
|
|
|
|
259
|
define_alias( qr/\bks_c_5601-1987$/i => '"cp949"' ); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# for Encode::TW |
258
|
42
|
|
|
|
|
560
|
define_alias( qr/\bbig-?5$/i => '"big5-eten"' ); |
259
|
42
|
|
|
|
|
189
|
define_alias( qr/\bbig5-?et(?:en)?$/i => '"big5-eten"' ); |
260
|
42
|
|
|
|
|
159
|
define_alias( qr/\btca[-_]?big5$/i => '"big5-eten"' ); |
261
|
42
|
|
|
|
|
207
|
define_alias( qr/\bbig5-?hk(?:scs)?$/i => '"big5-hkscs"' ); |
262
|
42
|
|
|
|
|
187
|
define_alias( qr/\bhk(?:scs)?[-_]?big5$/i => '"big5-hkscs"' ); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# https://github.com/dankogai/p5-encode/issues/37 |
266
|
42
|
|
|
|
|
367
|
define_alias(qr/cp65000/i => '"UTF-7"'); |
267
|
42
|
|
|
|
|
174
|
define_alias(qr/cp65001/i => '"utf-8-strict"'); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# utf8 is blessed :) |
270
|
42
|
|
|
|
|
175
|
define_alias( qr/\bUTF-8$/i => '"utf-8-strict"' ); |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# At last, Map white space and _ to '-' |
273
|
42
|
|
|
|
|
937
|
define_alias( qr/^(\S+)[\s_]+(.*)$/i => '"$1-$2"' ); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
1; |
277
|
|
|
|
|
|
|
__END__ |