line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
# Unicode::Japanese |
3
|
|
|
|
|
|
|
# Unicode::Japanese::PurePerl |
4
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
5
|
|
|
|
|
|
|
# $Id: Japanese_stub.pm 41491 2008-02-15 07:21:13Z hio $ |
6
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
7
|
|
|
|
|
|
|
package Unicode::Japanese::PurePerl; |
8
|
|
|
|
|
|
|
package Unicode::Japanese; |
9
|
|
|
|
|
|
|
|
10
|
60
|
|
|
60
|
|
552078
|
use strict; |
|
60
|
|
|
|
|
133
|
|
|
60
|
|
|
|
|
2125
|
|
11
|
49
|
|
|
49
|
|
325
|
use vars qw($VERSION $XS_VERSION $PurePerl $xs_loaderror); |
|
49
|
|
|
|
|
96
|
|
|
49
|
|
|
|
|
8247
|
|
12
|
|
|
|
|
|
|
$VERSION = '0.49'; |
13
|
|
|
|
|
|
|
$XS_VERSION = $VERSION; |
14
|
|
|
|
|
|
|
$VERSION = eval $VERSION; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# `use bytes' and `use Encode' if we are on perl-5.8.0 or later. |
17
|
|
|
|
|
|
|
if( $] >= 5.008 ) |
18
|
|
|
|
|
|
|
{ |
19
|
|
|
|
|
|
|
my $evalerr; |
20
|
|
|
|
|
|
|
{ |
21
|
|
|
|
|
|
|
local($SIG{__DIE__}) = 'DEFAULT'; |
22
|
|
|
|
|
|
|
local($@); |
23
|
29
|
|
|
29
|
|
43253
|
eval 'use bytes;use Encode;'; |
|
29
|
|
|
29
|
|
1096
|
|
|
29
|
|
|
|
|
236
|
|
|
29
|
|
|
|
|
32773
|
|
|
29
|
|
|
|
|
462670
|
|
|
29
|
|
|
|
|
2700
|
|
24
|
|
|
|
|
|
|
$evalerr = $@; |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
$evalerr and CORE::die($evalerr); |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
30
|
|
|
|
|
|
|
# import |
31
|
|
|
|
|
|
|
# |
32
|
|
|
|
|
|
|
sub import |
33
|
|
|
|
|
|
|
{ |
34
|
29
|
|
|
29
|
|
869
|
my $pkg = shift; |
35
|
29
|
|
|
|
|
102
|
my ($callerpkg) = caller; |
36
|
29
|
|
|
|
|
156
|
my %exp = |
37
|
|
|
|
|
|
|
( |
38
|
|
|
|
|
|
|
'&unijp' => \&unijp, |
39
|
|
|
|
|
|
|
); |
40
|
29
|
|
|
|
|
71
|
my @na; |
41
|
29
|
50
|
|
|
|
154
|
my @add = (grep{$_ eq ':all'} @_) ? keys %exp : (); |
|
5
|
|
|
|
|
33
|
|
42
|
29
|
|
|
|
|
104
|
foreach(@_, @add) |
43
|
|
|
|
|
|
|
{ |
44
|
5
|
50
|
|
|
|
96
|
$_ eq 'PurePerl' and $PurePerl=1, next; |
45
|
5
|
100
|
66
|
|
|
57
|
if( $exp{$_} || $exp{'&'.$_} ) |
|
|
50
|
|
|
|
|
|
46
|
|
|
|
|
|
|
{ |
47
|
48
|
|
|
48
|
|
294
|
no strict 'refs'; |
|
48
|
|
|
|
|
119
|
|
|
48
|
|
|
|
|
22822
|
|
48
|
2
|
|
|
|
|
14
|
(my $name = $_) =~ s/^\W//; |
49
|
2
|
|
33
|
|
|
10
|
my $obj = $exp{$_} || $exp{'&'.$_}; |
50
|
2
|
|
|
|
|
8
|
*{$callerpkg.'::'.$name} = $obj; |
|
2
|
|
|
|
|
14
|
|
51
|
|
|
|
|
|
|
}elsif( $_ eq 'no_I18N_Japanese' ) |
52
|
|
|
|
|
|
|
{ |
53
|
4
|
|
|
|
|
12
|
$^H &= ~0x0f00_0000; |
54
|
|
|
|
|
|
|
package Unicode::Japanese::PurePerl; |
55
|
4
|
|
|
|
|
21
|
$^H &= ~0x0f00_0000; |
56
|
|
|
|
|
|
|
package Unicode::Japanese; |
57
|
4
|
|
|
|
|
20
|
next; |
58
|
|
|
|
|
|
|
} |
59
|
2
|
|
|
|
|
4
|
push(@na,$_); |
60
|
|
|
|
|
|
|
} |
61
|
29
|
100
|
|
|
|
36669
|
if( @na ) |
62
|
|
|
|
|
|
|
{ |
63
|
|
|
|
|
|
|
#use Carp; |
64
|
|
|
|
|
|
|
#croak("invalid parameter (".join(',',@na).")"); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
69
|
|
|
|
|
|
|
# DESTROY |
70
|
|
|
|
|
|
|
# |
71
|
|
|
|
|
|
|
sub DESTROY |
72
|
1
|
|
|
1
|
|
6
|
{ |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
76
|
|
|
|
|
|
|
# load_xs. |
77
|
|
|
|
|
|
|
# loading xs-subs. |
78
|
|
|
|
|
|
|
# this method is called from new (through new=>_init_table=>load_xs) |
79
|
|
|
|
|
|
|
# |
80
|
|
|
|
|
|
|
sub load_xs |
81
|
|
|
|
|
|
|
{ |
82
|
|
|
|
|
|
|
#print STDERR "load_xs\n"; |
83
|
28
|
50
|
|
28
|
0
|
120
|
if( $PurePerl ) |
84
|
|
|
|
|
|
|
{ |
85
|
|
|
|
|
|
|
#print STDERR "PurePerl mode\n"; |
86
|
1
|
|
|
|
|
4
|
$xs_loaderror = 'disabled'; |
87
|
1
|
|
|
|
|
6
|
return; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
#print STDERR "XS mode\n"; |
90
|
|
|
|
|
|
|
|
91
|
28
|
|
|
|
|
61
|
my $use_xs; |
92
|
28
|
|
|
28
|
|
254
|
LoadXS: |
|
28
|
|
|
28
|
|
140
|
|
|
28
|
|
|
|
|
1389
|
|
|
28
|
|
|
|
|
165
|
|
|
28
|
|
|
|
|
56
|
|
|
28
|
|
|
|
|
2381
|
|
|
28
|
|
|
|
|
2555
|
|
93
|
|
|
|
|
|
|
{ |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
#print STDERR "* * bootstrap...\n"; |
96
|
28
|
|
|
|
|
58
|
eval q |
97
|
|
|
|
|
|
|
{ |
98
|
|
|
|
|
|
|
use strict; |
99
|
|
|
|
|
|
|
require DynaLoader; |
100
|
|
|
|
|
|
|
use vars qw(@ISA); |
101
|
|
|
|
|
|
|
@ISA = qw(DynaLoader); |
102
|
|
|
|
|
|
|
local($SIG{__DIE__}) = 'DEFAULT'; |
103
|
|
|
|
|
|
|
Unicode::Japanese->bootstrap($XS_VERSION); |
104
|
|
|
|
|
|
|
}; |
105
|
|
|
|
|
|
|
#print STDERR "* * the trial has been done.\n"; |
106
|
|
|
|
|
|
|
#undef @ISA; |
107
|
28
|
50
|
|
|
|
294
|
if( $@ ) |
108
|
|
|
|
|
|
|
{ |
109
|
|
|
|
|
|
|
#print STDERR "failed.\n"; |
110
|
|
|
|
|
|
|
#print STDERR "$@\n"; |
111
|
1
|
|
|
|
|
4
|
$use_xs = 0; |
112
|
1
|
|
|
|
|
10
|
$xs_loaderror = $@; |
113
|
1
|
|
|
|
|
2
|
undef $@; |
114
|
1
|
|
|
|
|
14
|
last LoadXS; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
#print STDERR "succeeded.\n"; |
117
|
28
|
|
|
|
|
78
|
$use_xs = 1; |
118
|
|
|
|
|
|
|
eval q |
119
|
28
|
|
|
134
|
|
2440
|
{ |
|
2304
|
|
|
|
|
426914
|
|
120
|
|
|
|
|
|
|
#print STDERR "overriding _s2u,_u2s\n"; |
121
|
|
|
|
|
|
|
do_memmap(); |
122
|
|
|
|
|
|
|
#print STDERR "memmap done\n"; |
123
|
|
|
|
|
|
|
END{ do_memunmap(); } |
124
|
|
|
|
|
|
|
#print STDERR "binding xsubs has been done.\n"; |
125
|
|
|
|
|
|
|
}; |
126
|
28
|
50
|
|
|
|
204
|
if( $@ ) |
127
|
|
|
|
|
|
|
{ |
128
|
|
|
|
|
|
|
#print STDERR "error in the last part of operation to load XS.\n"; |
129
|
1
|
|
|
|
|
6
|
$xs_loaderror = $@; |
130
|
1
|
|
|
|
|
2
|
CORE::die($@); |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
#print STDERR "done.\n"; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
28
|
50
|
|
|
|
126
|
if( $@ ) |
137
|
|
|
|
|
|
|
{ |
138
|
1
|
|
|
|
|
5
|
$xs_loaderror = $@; |
139
|
1
|
|
|
|
|
3
|
CORE::die("Cannot load Unicode::Japanese of neither XS nor PurePerl side\n$@"); |
140
|
|
|
|
|
|
|
} |
141
|
28
|
50
|
|
|
|
185
|
if( !$use_xs ) |
142
|
|
|
|
|
|
|
{ |
143
|
|
|
|
|
|
|
#print STDERR "no xs.\n"; |
144
|
|
|
|
|
|
|
eval q |
145
|
1
|
|
|
|
|
116
|
{ |
146
|
|
|
|
|
|
|
sub do_memmap($){} |
147
|
|
|
|
|
|
|
sub do_memunmap($){} |
148
|
|
|
|
|
|
|
}; |
149
|
|
|
|
|
|
|
} |
150
|
28
|
50
|
|
|
|
242
|
$xs_loaderror = '' if( !defined($xs_loaderror) ); |
151
|
|
|
|
|
|
|
#print STDERR "load_xs done.\n"; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
155
|
|
|
|
|
|
|
# Unicode::Japanese->new(); |
156
|
|
|
|
|
|
|
# cache for char conversion. |
157
|
|
|
|
|
|
|
# 2bytes. |
158
|
|
|
|
|
|
|
# JIS C 6226-1979 \e$@ |
159
|
|
|
|
|
|
|
# JIS X 0208-1983 \e$B |
160
|
|
|
|
|
|
|
# JIS X 0208-1990 \e&@\e$B |
161
|
|
|
|
|
|
|
# JIS X 0212-1990 \e$(D |
162
|
|
|
|
|
|
|
# 1byte. |
163
|
|
|
|
|
|
|
# JIS ROMAN \e(J |
164
|
|
|
|
|
|
|
# JIS ROMAN \e(H |
165
|
|
|
|
|
|
|
# ASCII \e(B |
166
|
|
|
|
|
|
|
# JIS KANA \e(I |
167
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
168
|
|
|
|
|
|
|
# $unijp = Unicode::Japanese->new([$str,[$icode]]); |
169
|
|
|
|
|
|
|
# |
170
|
|
|
|
|
|
|
sub new |
171
|
|
|
|
|
|
|
{ |
172
|
247
|
|
|
247
|
1
|
66458
|
my $pkg = shift; |
173
|
247
|
|
|
|
|
632
|
my $this = {}; |
174
|
|
|
|
|
|
|
|
175
|
247
|
50
|
|
|
|
652
|
if( defined($pkg) ) |
176
|
|
|
|
|
|
|
{ |
177
|
247
|
|
|
|
|
916
|
bless $this, $pkg; |
178
|
247
|
|
|
|
|
1229
|
$this->_init_table; |
179
|
|
|
|
|
|
|
}else |
180
|
|
|
|
|
|
|
{ |
181
|
1
|
|
|
|
|
2
|
bless $this; |
182
|
1
|
|
|
|
|
146
|
$this->_init_table; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
247
|
100
|
|
|
|
7333
|
@_ and $this->set(@_); |
186
|
|
|
|
|
|
|
|
187
|
247
|
|
|
|
|
3394
|
$this; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
192
|
|
|
|
|
|
|
# _got_undefined_subroutine |
193
|
|
|
|
|
|
|
# die with message 'undefiend subroutine'. |
194
|
|
|
|
|
|
|
# |
195
|
|
|
|
|
|
|
sub _got_undefined_subroutine |
196
|
|
|
|
|
|
|
{ |
197
|
1
|
|
|
1
|
|
7
|
my $subname = pop; |
198
|
1
|
|
|
|
|
6
|
CORE::die "Undefined subroutine \&$subname got called.\n"; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
202
|
|
|
|
|
|
|
# AUTOLOAD |
203
|
|
|
|
|
|
|
# AUTOLOAD of Unicode::Japanese. |
204
|
|
|
|
|
|
|
# imports PurePerl methods. |
205
|
|
|
|
|
|
|
# |
206
|
|
|
|
|
|
|
AUTOLOAD |
207
|
|
|
|
|
|
|
{ |
208
|
|
|
|
|
|
|
# load pure perl subs. |
209
|
46
|
|
|
46
|
|
334
|
use vars qw($AUTOLOAD); |
|
46
|
|
|
|
|
105
|
|
|
46
|
|
|
|
|
2104
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
#print "AUTOLOAD... $AUTOLOAD\n"; |
212
|
|
|
|
|
|
|
|
213
|
186
|
100
|
|
186
|
|
7095
|
if(!defined($Unicode::Japanese::xs_loaderror) ) |
214
|
|
|
|
|
|
|
{ |
215
|
28
|
|
|
|
|
291
|
Unicode::Japanese::PurePerl::_init_table(); |
216
|
28
|
100
|
|
|
|
152
|
if( defined(&$AUTOLOAD) ) |
217
|
|
|
|
|
|
|
{ |
218
|
42
|
|
|
42
|
|
543
|
no strict 'refs'; |
|
42
|
|
|
|
|
85
|
|
|
42
|
|
|
|
|
5899
|
|
219
|
2
|
|
|
|
|
21
|
goto &$AUTOLOAD; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
185
|
50
|
|
|
|
300
|
my ($pkg, $subname) = do{ |
224
|
185
|
|
|
|
|
589
|
local($1, $2); |
225
|
185
|
|
|
|
|
1683
|
$AUTOLOAD =~ /^(.*)::(\w+)$/ |
226
|
|
|
|
|
|
|
} or got_undefined_subroutine($AUTOLOAD); |
227
|
|
|
|
|
|
|
|
228
|
185
|
|
|
|
|
429
|
my $pppkg = $pkg . '::PurePerl'; |
229
|
185
|
|
|
|
|
390
|
my $ppsubname = $pkg . '::PurePerl::' . $subname; |
230
|
185
|
100
|
|
|
|
1077
|
if( !defined(&$ppsubname) ) |
231
|
|
|
|
|
|
|
{ |
232
|
155
|
|
|
|
|
262
|
my $save = $@; |
233
|
155
|
|
|
|
|
335
|
my @BAK = @_; |
234
|
155
|
|
|
|
|
505
|
$pppkg->_loadsub($ppsubname); |
235
|
155
|
|
|
|
|
225
|
$@ = $save; |
236
|
155
|
|
|
|
|
507
|
@_ = @BAK; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
185
|
|
|
|
|
621
|
my $sub = \&$ppsubname; |
240
|
|
|
|
|
|
|
{ |
241
|
31
|
|
|
31
|
|
216
|
no strict 'refs'; |
|
31
|
|
|
|
|
74
|
|
|
31
|
|
|
|
|
2120
|
|
|
185
|
|
|
|
|
296
|
|
242
|
185
|
|
|
|
|
552
|
*$AUTOLOAD = $sub; # copy. |
243
|
|
|
|
|
|
|
} |
244
|
185
|
|
|
|
|
5391
|
goto &$sub; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
248
|
|
|
|
|
|
|
# Unicode::Japanese::PurePerl |
249
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
250
|
|
|
|
|
|
|
package Unicode::Japanese::PurePerl; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
|
253
|
31
|
|
|
31
|
|
154
|
use strict; |
|
31
|
|
|
|
|
102
|
|
|
31
|
|
|
|
|
1192
|
|
254
|
31
|
|
|
31
|
|
150
|
use vars qw(%CHARCODE %ESC %RE @CHARSET_LIST); |
|
31
|
|
|
|
|
52
|
|
|
31
|
|
|
|
|
2452
|
|
255
|
|
|
|
|
|
|
|
256
|
31
|
|
|
31
|
|
161
|
use vars qw(@J2S @S2J @S2E @E2S @U2T %T2U %S2U %U2S %SA2U1 %U2SA1 %SA2U2 %U2SA2); |
|
31
|
|
|
|
|
72
|
|
|
31
|
|
|
|
|
15434
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
%CHARCODE = ( |
259
|
|
|
|
|
|
|
UNDEF_EUC => "\xa2\xae", |
260
|
|
|
|
|
|
|
UNDEF_SJIS => "\x81\xac", |
261
|
|
|
|
|
|
|
UNDEF_JIS => "\xa2\xf7", |
262
|
|
|
|
|
|
|
UNDEF_UNICODE => "\x20\x20", |
263
|
|
|
|
|
|
|
); |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
%ESC = ( |
266
|
|
|
|
|
|
|
JIS_0208 => "\e\$B", |
267
|
|
|
|
|
|
|
JIS_0212 => "\e\$(D", |
268
|
|
|
|
|
|
|
ASC => "\e\(B", |
269
|
|
|
|
|
|
|
KANA => "\e\(I", |
270
|
|
|
|
|
|
|
E_JSKY_START => "\e\$", |
271
|
|
|
|
|
|
|
E_JSKY_END => "\x0f", |
272
|
|
|
|
|
|
|
); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
%RE = |
275
|
|
|
|
|
|
|
( |
276
|
|
|
|
|
|
|
ASCII => '[\x00-\x7f]', |
277
|
|
|
|
|
|
|
EUC_0212 => '\x8f[\xa1-\xfe][\xa1-\xfe]', |
278
|
|
|
|
|
|
|
EUC_C => '[\xa1-\xfe][\xa1-\xfe]', |
279
|
|
|
|
|
|
|
EUC_KANA => '\x8e[\xa1-\xdf]', |
280
|
|
|
|
|
|
|
JIS_0208 => '\e\$\@|\e\$B|\e&\@\e\$B', |
281
|
|
|
|
|
|
|
JIS_0212 => "\e" . '\$\(D', |
282
|
|
|
|
|
|
|
JIS_ASC => "\e" . '\([BJ]', |
283
|
|
|
|
|
|
|
JIS_KANA => "\e" . '\(I', |
284
|
|
|
|
|
|
|
SJIS_DBCS => '[\x81-\x9f\xe0-\xef\xfa-\xfc][\x40-\x7e\x80-\xfc]', |
285
|
|
|
|
|
|
|
SJIS_KANA => '[\xa1-\xdf]', |
286
|
|
|
|
|
|
|
UTF8 => '[\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3}|[\xf8-\xfb][\x80-\xbf]{4}|[\xfc-\xfd][\x80-\xbf]{5}', |
287
|
|
|
|
|
|
|
BOM2_BE => '\xfe\xff', |
288
|
|
|
|
|
|
|
BOM2_LE => '\xff\xfe', |
289
|
|
|
|
|
|
|
BOM4_BE => '\x00\x00\xfe\xff', |
290
|
|
|
|
|
|
|
BOM4_LE => '\xff\xfe\x00\x00', |
291
|
|
|
|
|
|
|
UTF32_BE => '\x00[\x00-\x10][\x00-\xff]{2}', |
292
|
|
|
|
|
|
|
UTF32_LE => '[\x00-\xff]{2}[\x00-\x10]\x00', |
293
|
|
|
|
|
|
|
E_IMODEv1 => '\xf8[\x9f-\xfc]|\xf9[\x40-\x49\x50-\x52\x55-\x57\x5b-\x5e\x72-\x7e\x80-\xb0]', |
294
|
|
|
|
|
|
|
E_IMODEv2 => '\xf9[\xb1-\xfc]', |
295
|
|
|
|
|
|
|
E_IMODE => '\xf8[\x9f-\xfc]|\xf9[\x40-\x49\x50-\x52\x55-\x57\x5b-\x5e\x72-\x7e\x80-\xfc]', |
296
|
|
|
|
|
|
|
E_JSKY1 => '[EFGOPQ]', |
297
|
|
|
|
|
|
|
E_JSKY1v1 => '[EFG]', |
298
|
|
|
|
|
|
|
E_JSKY1v2 => '[OPQ]', |
299
|
|
|
|
|
|
|
E_JSKY2 => '[\!-z]', |
300
|
|
|
|
|
|
|
E_DOTI => '\xf0[\x40-\x7e\x80-\xfc]|\xf1[\x40-\x7e\x80-\xd6]|\xf2[\x40-\x7e\x80-\xab\xb0-\xd5\xdf-\xfc]|\xf3[\x40-\x7e\x80-\xfa]|\xf4[\x40-\x4f\x80\x84-\x8a\x8c-\x8e\x90\x94-\x96\x98-\x9c\xa0-\xa4\xa8-\xaf\xb4\xb5\xbc-\xbe\xc4\xc5\xc8\xcc]', |
301
|
|
|
|
|
|
|
E_JIS_AU => '[\x75-\x7b][\x21-\x7e]', |
302
|
|
|
|
|
|
|
E_SJIS_AU => '[\xf3\xf4\xf6\xf7][\x40-\xfc]', |
303
|
|
|
|
|
|
|
E_ICON_AU_START => '
304
|
|
|
|
|
|
|
E_ICON_AU_END => '">', |
305
|
|
|
|
|
|
|
E_JSKY_START => quotemeta($ESC{E_JSKY_START}), |
306
|
|
|
|
|
|
|
E_JSKY_END => '(?:'.quotemeta($ESC{E_JSKY_END}).'|\z)', |
307
|
|
|
|
|
|
|
E_JSKYv1_UTF8 => '\xee(?:\x80[\x81-\xbf]|\x81[\x80-\x9a]|\x84[\x81-\xbf]|\x85[\x80-\x9a]|\x88[\x81-\xbf]|\x89[\x80-\x9a])', |
308
|
|
|
|
|
|
|
E_JSKYv2_UTF8 => '\xee(?:\x8c[\x81-\xbf]|\x8d[\x80-\x8d]|\x90[\x81-\xbf]|\x91[\x80-\x8c]|\x94[\x81-\xb7])', |
309
|
|
|
|
|
|
|
); |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
$]<5.005 and $RE{E_JSKY_END} =~ s/\\z/\$/; |
312
|
|
|
|
|
|
|
$RE{E_JSKY} = $RE{E_JSKY_START} |
313
|
|
|
|
|
|
|
. $RE{E_JSKY1} . $RE{E_JSKY2} . '+' |
314
|
|
|
|
|
|
|
. $RE{E_JSKY_END}; |
315
|
|
|
|
|
|
|
$RE{E_JSKYv1} = $RE{E_JSKY_START} |
316
|
|
|
|
|
|
|
. $RE{E_JSKY1v1} . $RE{E_JSKY2} . '+' |
317
|
|
|
|
|
|
|
. $RE{E_JSKY_END}; |
318
|
|
|
|
|
|
|
$RE{E_JSKYv2} = $RE{E_JSKY_START} |
319
|
|
|
|
|
|
|
. $RE{E_JSKY1v2} . $RE{E_JSKY2} . '+' |
320
|
|
|
|
|
|
|
. $RE{E_JSKY_END}; |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
@CHARSET_LIST = qw( |
323
|
|
|
|
|
|
|
utf8 |
324
|
|
|
|
|
|
|
ucs2 |
325
|
|
|
|
|
|
|
ucs4 |
326
|
|
|
|
|
|
|
utf16 |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
sjis |
329
|
|
|
|
|
|
|
sjis-imode |
330
|
|
|
|
|
|
|
sjis-doti |
331
|
|
|
|
|
|
|
sjis-jsky |
332
|
|
|
|
|
|
|
sjis-icon-au |
333
|
|
|
|
|
|
|
cp932 |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
jis |
336
|
|
|
|
|
|
|
jis-jsky |
337
|
|
|
|
|
|
|
jis-au |
338
|
|
|
|
|
|
|
jis-icon-au |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
euc |
341
|
|
|
|
|
|
|
euc-jp |
342
|
|
|
|
|
|
|
euc-icon-au |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
utf8-jsky |
345
|
|
|
|
|
|
|
utf8-icon-au |
346
|
|
|
|
|
|
|
utf8-imode |
347
|
|
|
|
|
|
|
); |
348
|
|
|
|
|
|
|
|
349
|
29
|
|
|
29
|
|
532
|
use vars qw($s2u_table $u2s_table); |
|
29
|
|
|
|
|
57
|
|
|
29
|
|
|
|
|
1715
|
|
350
|
29
|
|
|
29
|
|
138
|
use vars qw($ei2u1 $ei2u2 $ed2u $ej2u1 $ej2u2 $ea2u1 $ea2u2 $ea2u1s $ea2u2s); |
|
29
|
|
|
|
|
50
|
|
|
29
|
|
|
|
|
3618
|
|
351
|
29
|
|
|
29
|
|
154
|
use vars qw($eu2i1 $eu2i2 $eu2d $eu2j1 $eu2j2 $eu2a1 $eu2a2 $eu2a1s $eu2a2s); |
|
29
|
|
|
|
|
59
|
|
|
29
|
|
|
|
|
4062
|
|
352
|
|
|
|
|
|
|
|
353
|
29
|
|
|
29
|
|
150
|
use vars qw(%_h2zNum %_z2hNum %_h2zAlpha %_z2hAlpha %_h2zSym %_z2hSym %_h2zKanaK %_z2hKanaK %_h2zKanaD %_z2hKanaD %_hira2kata %_kata2hira); |
|
29
|
|
|
|
|
46
|
|
|
29
|
|
|
|
|
3988
|
|
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
357
|
29
|
|
|
29
|
|
158
|
use vars qw($PID $FH $TABLE $HEADLEN $PROGLEN); |
|
29
|
|
|
|
|
123
|
|
|
29
|
|
|
|
|
2467
|
|
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
360
|
|
|
|
|
|
|
# AUTOLOAD |
361
|
|
|
|
|
|
|
# AUTOLOAD of Unicode::Japanese::PurePerl. |
362
|
|
|
|
|
|
|
# load PurePerl methods from embedded data. |
363
|
|
|
|
|
|
|
# |
364
|
|
|
|
|
|
|
AUTOLOAD |
365
|
|
|
|
|
|
|
{ |
366
|
29
|
|
|
29
|
|
156
|
use strict; |
|
29
|
|
|
|
|
54
|
|
|
29
|
|
|
|
|
1241
|
|
367
|
29
|
|
|
29
|
|
154
|
use vars qw($AUTOLOAD); |
|
29
|
|
|
|
|
54
|
|
|
29
|
|
|
|
|
8825
|
|
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
#print "AUTOLOAD... $AUTOLOAD\n"; |
370
|
|
|
|
|
|
|
|
371
|
67
|
|
|
67
|
|
4931
|
my $save = $@; |
372
|
67
|
|
|
|
|
183
|
my @BAK = @_; |
373
|
|
|
|
|
|
|
|
374
|
67
|
50
|
|
|
|
103
|
my ($pkg, $subname) = do{ |
375
|
67
|
|
|
|
|
213
|
local($1, $2); |
376
|
67
|
|
|
|
|
634
|
$AUTOLOAD =~ /^(.*)::(\w+)$/ |
377
|
|
|
|
|
|
|
} or got_undefined_subroutine($AUTOLOAD); |
378
|
|
|
|
|
|
|
|
379
|
67
|
|
|
|
|
245
|
$pkg->_loadsub($AUTOLOAD); |
380
|
|
|
|
|
|
|
|
381
|
67
|
|
|
|
|
112
|
$@ = $save; |
382
|
67
|
|
|
|
|
233
|
@_ = @BAK; |
383
|
67
|
|
|
|
|
3319
|
goto &$AUTOLOAD; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub _loadsub |
387
|
|
|
|
|
|
|
{ |
388
|
221
|
|
|
221
|
|
338
|
my $pkg = shift; |
389
|
221
|
|
|
|
|
301
|
my $fullsubname = shift; |
390
|
|
|
|
|
|
|
#print "subs..\n",join("\n",keys %$TABLE,''); |
391
|
29
|
|
|
29
|
|
169
|
use vars qw($AUTOLOAD); |
|
29
|
|
|
|
|
52
|
|
|
29
|
|
|
|
|
7535
|
|
392
|
|
|
|
|
|
|
|
393
|
221
|
|
|
|
|
555
|
local($1, $2); |
394
|
221
|
50
|
|
|
|
1522
|
my ($subpkg,$subname) = $fullsubname =~ /^(.*)::(\w+)$/ |
395
|
|
|
|
|
|
|
or got_undefined_subroutine($fullsubname); |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# check |
398
|
221
|
50
|
|
|
|
946
|
if(!defined($TABLE->{$subname}{offset})) |
399
|
|
|
|
|
|
|
{ |
400
|
1
|
|
|
|
|
2
|
_init_table(); |
401
|
1
|
0
|
|
|
|
5
|
if( !defined($TABLE->{$subname}{offset}) ) |
402
|
|
|
|
|
|
|
{ |
403
|
0
|
0
|
|
|
|
0
|
if( $subname eq 'DESTROY' ) |
404
|
|
|
|
|
|
|
{ |
405
|
0
|
|
|
1
|
|
0
|
my $sub = sub{}; |
|
0
|
|
|
|
|
0
|
|
406
|
|
|
|
|
|
|
{ |
407
|
29
|
|
|
29
|
|
174
|
no strict 'refs'; |
|
29
|
|
|
|
|
58
|
|
|
29
|
|
|
|
|
16988
|
|
|
0
|
|
|
|
|
0
|
|
408
|
0
|
|
|
|
|
0
|
*$fullsubname = $sub; |
409
|
|
|
|
|
|
|
} |
410
|
0
|
|
|
|
|
0
|
return $sub; |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
0
|
|
|
|
|
0
|
CORE::die "Undefined subroutine \&$fullsubname got called.\n"; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
220
|
50
|
|
|
|
661
|
if($TABLE->{$subname}{offset} == -1) |
417
|
|
|
|
|
|
|
{ |
418
|
0
|
|
|
|
|
0
|
CORE::die "\&$fullsubname is getting loaded twice. There must be a problem in AUTOLOAD.\n"; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
220
|
|
|
|
|
527
|
_check_and_update_fh(); |
422
|
220
|
50
|
|
|
|
3045
|
seek($FH, $PROGLEN + $HEADLEN + $TABLE->{$subname}{offset}, 0) |
423
|
|
|
|
|
|
|
or die "Can't seek $subname. [$!]\n"; |
424
|
|
|
|
|
|
|
|
425
|
220
|
|
|
|
|
325
|
my $sub; |
426
|
220
|
50
|
|
|
|
3746
|
read($FH, $sub, $TABLE->{$subname}{'length'}) |
427
|
|
|
|
|
|
|
or die "Can't read $subname. [$!]\n"; |
428
|
|
|
|
|
|
|
|
429
|
220
|
50
|
|
|
|
648
|
if( $]>=5.008 ) |
430
|
|
|
|
|
|
|
{ |
431
|
220
|
|
|
|
|
905
|
$sub = 'use bytes;'.$sub; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
220
|
100
|
100
|
132
|
|
16302
|
CORE::eval(($sub=~/(.*)/s)[0]); |
|
80
|
100
|
100
|
92
|
|
359
|
|
|
80
|
100
|
66
|
139
|
|
526
|
|
|
77
|
100
|
66
|
173
|
|
1197
|
|
|
68
|
100
|
33
|
185
|
|
33823
|
|
|
62
|
100
|
33
|
199
|
|
1074
|
|
|
90
|
100
|
66
|
184
|
|
7531
|
|
|
89
|
100
|
33
|
116
|
|
819
|
|
|
87
|
100
|
33
|
54
|
|
556
|
|
|
74
|
100
|
66
|
35
|
|
650
|
|
|
58
|
100
|
33
|
31
|
|
569
|
|
|
1841
|
100
|
66
|
36
|
|
4572
|
|
|
1909
|
100
|
33
|
14
|
|
54874
|
|
|
2275
|
50
|
0
|
47
|
|
17822
|
|
|
319
|
50
|
0
|
23
|
|
11312
|
|
|
323
|
100
|
66
|
9
|
|
10193
|
|
|
296
|
100
|
66
|
59
|
|
26450
|
|
|
316
|
100
|
33
|
51
|
|
8868
|
|
|
92
|
100
|
33
|
1
|
|
21116
|
|
|
122
|
100
|
33
|
1
|
|
1535
|
|
|
62
|
100
|
0
|
1
|
|
588
|
|
|
60
|
100
|
33
|
1
|
|
533
|
|
|
76
|
100
|
66
|
|
|
23875
|
|
|
98
|
100
|
33
|
|
|
1579
|
|
|
68
|
100
|
33
|
|
|
24738
|
|
|
95
|
50
|
33
|
|
|
1607
|
|
|
35
|
100
|
0
|
|
|
2302
|
|
|
61
|
100
|
33
|
|
|
625
|
|
|
81
|
100
|
66
|
|
|
37864
|
|
|
96
|
100
|
33
|
|
|
1313
|
|
|
73
|
50
|
33
|
|
|
23053
|
|
|
100
|
100
|
33
|
|
|
1877
|
|
|
63
|
100
|
66
|
|
|
825
|
|
|
59
|
100
|
33
|
|
|
833
|
|
|
95
|
100
|
66
|
|
|
22628
|
|
|
102
|
100
|
|
|
|
2108
|
|
|
141
|
100
|
|
|
|
23306
|
|
|
158
|
100
|
|
|
|
1346
|
|
|
122
|
50
|
|
|
|
6465
|
|
|
79
|
100
|
|
|
|
985
|
|
|
174
|
100
|
|
|
|
36606
|
|
|
112
|
50
|
|
|
|
1693
|
|
|
104
|
50
|
|
|
|
21619
|
|
|
67
|
100
|
|
|
|
1135
|
|
|
42
|
100
|
|
|
|
588
|
|
|
31
|
100
|
|
|
|
55
|
|
|
31
|
100
|
|
|
|
153
|
|
|
36
|
100
|
|
|
|
161
|
|
|
33
|
100
|
|
|
|
94
|
|
|
29
|
50
|
|
|
|
57
|
|
|
80
|
50
|
|
|
|
286
|
|
|
78
|
100
|
|
|
|
1356
|
|
|
78
|
100
|
|
|
|
1087
|
|
|
64
|
100
|
|
|
|
122
|
|
|
88
|
100
|
|
|
|
263
|
|
|
76
|
50
|
|
|
|
183
|
|
|
18
|
50
|
|
|
|
41
|
|
|
76
|
100
|
|
|
|
413
|
|
|
53
|
50
|
|
|
|
86
|
|
|
122
|
50
|
|
|
|
183
|
|
|
64
|
50
|
|
|
|
417
|
|
|
64
|
50
|
|
|
|
2189
|
|
|
64
|
100
|
|
|
|
571
|
|
|
104
|
50
|
|
|
|
185
|
|
|
104
|
100
|
|
|
|
166
|
|
|
104
|
50
|
|
|
|
461
|
|
|
52
|
50
|
|
|
|
87
|
|
|
162
|
100
|
|
|
|
559
|
|
|
59
|
50
|
|
|
|
1012
|
|
|
110
|
50
|
|
|
|
1188
|
|
|
105
|
50
|
|
|
|
295
|
|
|
157
|
100
|
|
|
|
596
|
|
|
105
|
50
|
|
|
|
341
|
|
|
104
|
50
|
|
|
|
2386
|
|
|
112
|
100
|
|
|
|
254
|
|
|
60
|
50
|
|
|
|
273
|
|
|
72
|
100
|
|
|
|
349
|
|
|
21
|
100
|
|
|
|
53
|
|
|
68
|
50
|
|
|
|
169
|
|
|
56
|
50
|
|
|
|
189
|
|
|
16
|
50
|
|
|
|
240
|
|
|
16
|
100
|
|
|
|
33
|
|
|
20
|
50
|
|
|
|
1918
|
|
|
66
|
50
|
|
|
|
1502
|
|
|
32
|
100
|
|
|
|
54
|
|
|
84
|
100
|
|
|
|
284
|
|
|
70
|
0
|
|
|
|
914
|
|
|
66
|
0
|
|
|
|
1004
|
|
|
73
|
0
|
|
|
|
133
|
|
|
68
|
0
|
|
|
|
98
|
|
|
68
|
50
|
|
|
|
297
|
|
|
36
|
50
|
|
|
|
282
|
|
|
86
|
50
|
|
|
|
935
|
|
|
23
|
50
|
|
|
|
395
|
|
|
58
|
50
|
|
|
|
139
|
|
|
6
|
50
|
|
|
|
129
|
|
|
58
|
50
|
|
|
|
149
|
|
|
4
|
50
|
|
|
|
116
|
|
|
4
|
100
|
|
|
|
10
|
|
|
4
|
100
|
|
|
|
11
|
|
|
4
|
0
|
|
|
|
111
|
|
|
4
|
0
|
|
|
|
10
|
|
|
4
|
0
|
|
|
|
47
|
|
|
54
|
0
|
|
|
|
228
|
|
|
56
|
50
|
|
|
|
931
|
|
|
58
|
50
|
|
|
|
1138
|
|
|
99
|
50
|
|
|
|
316
|
|
|
97
|
50
|
|
|
|
160
|
|
|
97
|
50
|
|
|
|
343
|
|
|
6
|
50
|
|
|
|
10
|
|
|
99
|
50
|
|
|
|
207
|
|
|
9
|
50
|
|
|
|
41
|
|
|
99
|
50
|
|
|
|
134
|
|
|
6
|
100
|
|
|
|
17
|
|
|
6
|
0
|
|
|
|
15
|
|
|
6
|
0
|
|
|
|
23
|
|
|
6
|
0
|
|
|
|
19
|
|
|
2
|
0
|
|
|
|
6
|
|
|
2
|
50
|
|
|
|
5
|
|
|
97
|
50
|
|
|
|
668
|
|
|
47
|
50
|
|
|
|
846
|
|
|
99
|
50
|
|
|
|
820
|
|
|
35
|
50
|
|
|
|
71
|
|
|
35
|
50
|
|
|
|
58
|
|
|
35
|
50
|
|
|
|
115
|
|
|
0
|
50
|
|
|
|
0
|
|
|
35
|
50
|
|
|
|
94
|
|
|
0
|
100
|
|
|
|
0
|
|
|
35
|
0
|
|
|
|
94
|
|
|
2
|
0
|
|
|
|
69
|
|
|
35
|
0
|
|
|
|
180
|
|
|
10
|
0
|
|
|
|
126
|
|
|
10
|
50
|
|
|
|
137
|
|
|
10
|
50
|
|
|
|
306
|
|
|
6
|
50
|
|
|
|
12
|
|
|
6
|
50
|
|
|
|
16
|
|
|
6
|
50
|
|
|
|
18
|
|
|
25
|
50
|
|
|
|
122
|
|
|
31
|
50
|
|
|
|
481
|
|
|
26
|
50
|
|
|
|
127
|
|
|
33
|
0
|
|
|
|
77
|
|
|
29
|
50
|
|
|
|
53
|
|
|
33
|
50
|
|
|
|
94
|
|
|
6
|
50
|
|
|
|
139
|
|
|
35
|
50
|
|
|
|
214
|
|
|
6
|
50
|
|
|
|
214
|
|
|
33
|
50
|
|
|
|
82
|
|
|
7
|
50
|
|
|
|
19
|
|
|
33
|
100
|
|
|
|
68
|
|
|
0
|
100
|
|
|
|
0
|
|
|
6
|
100
|
|
|
|
15
|
|
|
0
|
100
|
|
|
|
0
|
|
|
6
|
100
|
|
|
|
29
|
|
|
1
|
50
|
|
|
|
5
|
|
|
6
|
50
|
|
|
|
12
|
|
|
27
|
50
|
|
|
|
140
|
|
|
33
|
50
|
|
|
|
553
|
|
|
33
|
100
|
|
|
|
287
|
|
|
36
|
100
|
|
|
|
207
|
|
|
36
|
50
|
|
|
|
253
|
|
|
33
|
100
|
|
|
|
100
|
|
|
3
|
100
|
|
|
|
5
|
|
|
33
|
50
|
|
|
|
85
|
|
|
1
|
50
|
|
|
|
3
|
|
|
33
|
50
|
|
|
|
103
|
|
|
1
|
100
|
|
|
|
3
|
|
|
33
|
50
|
|
|
|
60
|
|
|
1
|
50
|
|
|
|
4
|
|
|
3
|
100
|
|
|
|
7
|
|
|
0
|
100
|
|
|
|
0
|
|
|
3
|
50
|
|
|
|
5
|
|
|
3
|
100
|
|
|
|
88
|
|
|
3
|
100
|
|
|
|
75
|
|
|
33
|
50
|
|
|
|
231
|
|
|
38
|
50
|
|
|
|
598
|
|
|
38
|
50
|
|
|
|
198
|
|
|
35
|
50
|
|
|
|
102
|
|
|
27
|
100
|
|
|
|
40
|
|
|
35
|
100
|
|
|
|
103
|
|
|
1
|
50
|
|
|
|
5
|
|
|
35
|
100
|
|
|
|
87
|
|
|
1
|
100
|
|
|
|
8
|
|
|
35
|
50
|
|
|
|
69
|
|
|
1
|
100
|
|
|
|
5
|
|
|
35
|
100
|
|
|
|
61
|
|
|
8
|
50
|
|
|
|
170
|
|
|
8
|
100
|
|
|
|
101
|
|
|
8
|
50
|
|
|
|
240
|
|
|
0
|
0
|
|
|
|
0
|
|
|
0
|
0
|
|
|
|
0
|
|
|
0
|
0
|
|
|
|
0
|
|
|
27
|
0
|
|
|
|
125
|
|
|
27
|
0
|
|
|
|
641
|
|
|
27
|
0
|
|
|
|
155
|
|
|
27
|
0
|
|
|
|
120
|
|
|
30
|
0
|
|
|
|
59
|
|
|
30
|
0
|
|
|
|
54
|
|
|
30
|
0
|
|
|
|
90
|
|
|
0
|
0
|
|
|
|
0
|
|
|
30
|
50
|
|
|
|
59
|
|
|
0
|
|
|
|
|
0
|
|
|
30
|
|
|
|
|
76
|
|
|
1
|
|
|
|
|
4
|
|
|
30
|
|
|
|
|
50
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
30
|
|
|
|
|
138
|
|
|
30
|
|
|
|
|
625
|
|
|
30
|
|
|
|
|
139
|
|
|
30
|
|
|
|
|
148
|
|
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
11
|
|
|
0
|
|
|
|
|
0
|
|
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
16
|
|
|
4
|
|
|
|
|
38
|
|
|
4
|
|
|
|
|
133
|
|
|
41
|
|
|
|
|
83
|
|
|
41
|
|
|
|
|
82
|
|
|
41
|
|
|
|
|
127
|
|
|
0
|
|
|
|
|
0
|
|
|
41
|
|
|
|
|
70
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
41
|
|
|
|
|
210
|
|
|
41
|
|
|
|
|
2094
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
5
|
|
|
36
|
|
|
|
|
120
|
|
|
36
|
|
|
|
|
118
|
|
|
36
|
|
|
|
|
281
|
|
|
41
|
|
|
|
|
212
|
|
|
17
|
|
|
|
|
24
|
|
|
17
|
|
|
|
|
24
|
|
|
17
|
|
|
|
|
18
|
|
|
17
|
|
|
|
|
26
|
|
|
17
|
|
|
|
|
45
|
|
|
17
|
|
|
|
|
357
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
28
|
|
|
1
|
|
|
|
|
30
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
31
|
|
|
1
|
|
|
|
|
29
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
29
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
29
|
|
|
1
|
|
|
|
|
41
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
17
|
|
|
1
|
|
|
|
|
12
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
31
|
|
|
1
|
|
|
|
|
31
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
12
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
29
|
|
|
1
|
|
|
|
|
29
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
13
|
|
|
1
|
|
|
|
|
13
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
17
|
|
|
|
|
49
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
17
|
|
|
|
|
65
|
|
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
213
|
|
|
51
|
|
|
|
|
19314
|
|
|
51
|
|
|
|
|
1444
|
|
|
51
|
|
|
|
|
36312
|
|
|
51
|
|
|
|
|
1393
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
32
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
45
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
435
|
220
|
50
|
|
|
|
71502
|
if ($@) |
436
|
|
|
|
|
|
|
{ |
437
|
0
|
|
|
|
|
0
|
CORE::die $@; |
438
|
|
|
|
|
|
|
} |
439
|
220
|
|
|
|
|
396
|
$DB::sub = $fullsubname; # Now debugger knows where we are. |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
# evaled |
442
|
220
|
|
|
|
|
1168
|
$TABLE->{$subname}{offset} = -1; |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
447
|
|
|
|
|
|
|
# Unicode::Japanese::PurePerl->new() |
448
|
|
|
|
|
|
|
# |
449
|
|
|
|
|
|
|
sub new |
450
|
|
|
|
|
|
|
{ |
451
|
461
|
|
|
462
|
|
6459
|
goto &Unicode::Japanese::new; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
455
|
|
|
|
|
|
|
# DESTROY |
456
|
|
|
|
|
|
|
# |
457
|
|
|
|
|
|
|
sub DESTROY |
458
|
368
|
|
|
371
|
|
524
|
{ |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
462
|
|
|
|
|
|
|
# gensym |
463
|
|
|
|
|
|
|
# |
464
|
|
|
|
|
|
|
sub gensym { |
465
|
|
|
|
|
|
|
package Unicode::Japanese::Symbol; |
466
|
29
|
|
|
29
|
|
195
|
no strict; |
|
29
|
|
|
|
|
57
|
|
|
29
|
|
|
|
|
35814
|
|
467
|
395
|
|
|
2127
|
|
1659
|
$genpkg = "Unicode::Japanese::Symbol::"; |
468
|
141
|
|
|
|
|
1985
|
$genseq = 0; |
469
|
389
|
|
|
|
|
99586
|
my $name = "GEN" . $genseq++; |
470
|
312
|
|
|
|
|
964
|
my $ref = \*{$genpkg . $name}; |
|
2150
|
|
|
|
|
414726
|
|
471
|
2151
|
|
|
|
|
4063
|
delete $$genpkg{$name}; |
472
|
2153
|
|
|
|
|
2954
|
$ref; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
sub _check_and_update_fh { |
476
|
2635
|
100
|
|
3006
|
|
6066
|
_open_fh() if not ($PID == $$); |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
sub _open_fh { |
479
|
2138
|
|
|
306
|
|
7137
|
my $file = "Unicode/Japanese.pm"; |
480
|
388
|
|
|
|
|
982
|
$PID = $$; |
481
|
|
|
|
|
|
|
OPEN: |
482
|
|
|
|
|
|
|
{ |
483
|
2136
|
100
|
|
|
|
4039
|
if( $INC{$file} ) |
|
376
|
|
|
|
|
888
|
|
484
|
|
|
|
|
|
|
{ |
485
|
1983
|
100
|
|
|
|
5174
|
open($FH,$INC{$file}) || CORE::die("could not open file [$INC{$file}] for input : $!"); |
486
|
293
|
|
|
|
|
1344
|
last OPEN; |
487
|
|
|
|
|
|
|
} |
488
|
2118
|
|
|
|
|
5279
|
foreach my $path (@INC) |
489
|
|
|
|
|
|
|
{ |
490
|
2118
|
|
|
|
|
11195
|
my $mypath = $path; |
491
|
2106
|
|
|
|
|
4773
|
$mypath =~ s#/$##; |
492
|
272
|
100
|
|
|
|
734
|
if (-f "$mypath/$file") |
493
|
|
|
|
|
|
|
{ |
494
|
262
|
100
|
|
|
|
1204
|
open($FH,"$mypath/$file") || CORE::die("could not open file [$INC{$file}] for input : $!"); |
495
|
262
|
|
|
|
|
1282
|
last OPEN; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
} |
498
|
2098
|
|
|
|
|
4312
|
CORE::die "Can't find Japanese.pm in \@INC\n"; |
499
|
|
|
|
|
|
|
} |
500
|
438
|
|
|
|
|
1859
|
binmode($FH); |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
504
|
|
|
|
|
|
|
# _init_table |
505
|
|
|
|
|
|
|
# |
506
|
|
|
|
|
|
|
sub _init_table { |
507
|
|
|
|
|
|
|
|
508
|
592
|
100
|
|
528
|
|
2239
|
if(!defined($HEADLEN)) |
509
|
|
|
|
|
|
|
{ |
510
|
1898
|
|
|
|
|
3670
|
$FH = gensym; |
511
|
|
|
|
|
|
|
|
512
|
2062
|
|
|
|
|
5014
|
_open_fh(); |
513
|
|
|
|
|
|
|
|
514
|
280
|
|
|
|
|
1072
|
local($/) = "\n"; |
515
|
280
|
|
|
|
|
1645
|
my $line; |
516
|
281
|
|
|
|
|
8240
|
while(defined($line = <$FH>)) |
517
|
|
|
|
|
|
|
{ |
518
|
47101
|
100
|
|
|
|
162516
|
last if($line =~ m/^__DATA__/); |
519
|
|
|
|
|
|
|
} |
520
|
281
|
|
|
|
|
671
|
$PROGLEN = tell($FH); |
521
|
|
|
|
|
|
|
|
522
|
894
|
100
|
|
|
|
4097
|
read($FH, $HEADLEN, 4) |
523
|
|
|
|
|
|
|
or die "Can't read the table. [$!]\n"; |
524
|
373
|
|
|
|
|
1275
|
$HEADLEN = unpack('N', $HEADLEN); |
525
|
365
|
100
|
|
|
|
1360
|
read($FH, $TABLE, $HEADLEN) |
526
|
|
|
|
|
|
|
or die "Can't seek the table. [$!]\n"; |
527
|
280
|
|
|
|
|
3135
|
$TABLE =~ /(.*)/s; |
528
|
374
|
|
|
|
|
47716
|
$TABLE = eval(($TABLE=~/(.*)/s)[0]); |
529
|
364
|
100
|
|
|
|
1530
|
if($@) |
530
|
|
|
|
|
|
|
{ |
531
|
337
|
|
|
|
|
510
|
die "Internal Error. [$@]\n"; |
532
|
|
|
|
|
|
|
} |
533
|
281
|
100
|
|
|
|
697
|
if(!defined($TABLE)) |
534
|
|
|
|
|
|
|
{ |
535
|
105
|
|
|
|
|
521
|
die "Internal Error.\n"; |
536
|
|
|
|
|
|
|
} |
537
|
363
|
|
|
|
|
680
|
$HEADLEN += 4; |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# load xs. |
540
|
114
|
|
|
|
|
595
|
Unicode::Japanese::load_xs(); |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
545
|
|
|
|
|
|
|
# _getFile |
546
|
|
|
|
|
|
|
# load embedded file data. |
547
|
|
|
|
|
|
|
# |
548
|
|
|
|
|
|
|
sub _getFile { |
549
|
635
|
|
|
348
|
|
1501
|
my $this = shift; |
550
|
|
|
|
|
|
|
|
551
|
805
|
|
|
|
|
3142
|
my $file = shift; |
552
|
|
|
|
|
|
|
|
553
|
885
|
100
|
|
|
|
3619
|
exists($TABLE->{$file}) |
554
|
|
|
|
|
|
|
or die "no such file [$file]\n"; |
555
|
|
|
|
|
|
|
|
556
|
624
|
|
|
|
|
1757
|
_check_and_update_fh(); |
557
|
|
|
|
|
|
|
#my $offset16 = $TABLE->{$file}{offset} % 16; |
558
|
|
|
|
|
|
|
#print STDERR "_getFile($file, $TABLE->{$file}{offset}, $TABLE->{$file}{'length'}, $offset16)\n"; |
559
|
876
|
100
|
|
|
|
13025
|
seek($FH, $PROGLEN + $HEADLEN + $TABLE->{$file}{offset}, 0) |
560
|
|
|
|
|
|
|
or die "Can't seek $file. [$!]\n"; |
561
|
|
|
|
|
|
|
|
562
|
872
|
|
|
|
|
7993
|
my $data; |
563
|
561
|
100
|
|
|
|
7082
|
read($FH, $data, $TABLE->{$file}{'length'}) |
564
|
|
|
|
|
|
|
or die "Can't read $file. [$!]\n"; |
565
|
|
|
|
|
|
|
|
566
|
547
|
|
|
|
|
13469
|
$data; |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
570
|
|
|
|
|
|
|
# use_I18N_Japanese |
571
|
|
|
|
|
|
|
# copied from I18N::Japanese in jperl-5.5.3 |
572
|
|
|
|
|
|
|
# |
573
|
|
|
|
|
|
|
sub use_I18N_Japanese |
574
|
|
|
|
|
|
|
{ |
575
|
306
|
|
|
7
|
|
7983
|
shift; |
576
|
11
|
100
|
|
|
|
166
|
if( @_ ) |
577
|
|
|
|
|
|
|
{ |
578
|
13
|
|
|
|
|
211
|
my $bits = 0; |
579
|
4
|
|
|
|
|
120
|
foreach( @_ ) |
580
|
|
|
|
|
|
|
{ |
581
|
5
|
100
|
|
|
|
59
|
$bits |= 0x1000000 if $_ eq 're'; |
582
|
4
|
100
|
|
|
|
54
|
$bits |= 0x2000000 if $_ eq 'tr'; |
583
|
53
|
100
|
|
|
|
1416
|
$bits |= 0x4000000 if $_ eq 'format'; |
584
|
0
|
100
|
|
|
|
0
|
$bits |= 0x8000000 if $_ eq 'string'; |
585
|
|
|
|
|
|
|
} |
586
|
0
|
|
|
|
|
0
|
$^H |= $bits; |
587
|
|
|
|
|
|
|
}else |
588
|
|
|
|
|
|
|
{ |
589
|
0
|
|
|
|
|
0
|
$^H |= 0x0f00_0000; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
594
|
|
|
|
|
|
|
# no_I18N_Japanese |
595
|
|
|
|
|
|
|
# copied from I18N::Japanese in jperl-5.5.3 |
596
|
|
|
|
|
|
|
# |
597
|
|
|
|
|
|
|
sub no_I18N_Japanese |
598
|
|
|
|
|
|
|
{ |
599
|
9
|
|
|
52
|
|
267
|
shift; |
600
|
3
|
100
|
|
|
|
81
|
if( @_ ) |
601
|
|
|
|
|
|
|
{ |
602
|
2
|
|
|
|
|
44
|
my $bits = 0; |
603
|
1
|
|
|
|
|
15
|
foreach( @_ ) |
604
|
|
|
|
|
|
|
{ |
605
|
1
|
100
|
|
|
|
30
|
$bits |= 0x1000000 if $_ eq 're'; |
606
|
8
|
100
|
|
|
|
214
|
$bits |= 0x2000000 if $_ eq 'tr'; |
607
|
1
|
100
|
|
|
|
17
|
$bits |= 0x4000000 if $_ eq 'format'; |
608
|
1
|
100
|
|
|
|
34
|
$bits |= 0x8000000 if $_ eq 'string'; |
609
|
|
|
|
|
|
|
} |
610
|
0
|
|
|
|
|
0
|
$^H &= ~$bits; |
611
|
|
|
|
|
|
|
}else |
612
|
|
|
|
|
|
|
{ |
613
|
0
|
|
|
|
|
0
|
$^H &= ~0x0f00_0000; |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
1; |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=encoding utf-8 |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
=head1 NAME |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
Unicode::Japanese - Convert encoding of japanese text |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=head1 SYNOPSIS |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
use Unicode::Japanese; |
629
|
|
|
|
|
|
|
use Unicode::Japanese qw(unijp); |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
# convert utf8 -> sjis |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
print Unicode::Japanese->new($str)->sjis; |
634
|
|
|
|
|
|
|
print unijp($str)->sjis; # same as above. |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# convert sjis -> utf8 |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
print Unicode::Japanese->new($str,'sjis')->get; |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
# convert sjis (imode_EMOJI) -> utf8 |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
print Unicode::Japanese->new($str,'sjis-imode')->get; |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# convert zenkaku (utf8) -> hankaku (utf8) |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
print Unicode::Japanese->new($str)->z2h->get; |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=head1 DESCRIPTION |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
The Unicode::Japanese module converts encoding of japanese text from one |
651
|
|
|
|
|
|
|
encoding to another. |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head2 FEATURES |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=over 2 |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item * |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
An instance of Unicode::Japanese internally holds a string in UTF-8. |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=item * |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
This module is implemented in two ways: XS and pure perl. If efficiency is |
670
|
|
|
|
|
|
|
important for you, you should build and install the XS module. If you don't want |
671
|
|
|
|
|
|
|
to, or if you can't build the XS module, you may use the pure perl module |
672
|
|
|
|
|
|
|
instead. In that case, only you have to do is to copy Japanese.pm into somewhere |
673
|
|
|
|
|
|
|
in @INC. |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=item * |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
This module can convert characters from zenkaku (full-width) form to hankaku |
681
|
|
|
|
|
|
|
(half-width) form, and vice versa. Conversion between hiragana (one of two sets |
682
|
|
|
|
|
|
|
of japanese phonetical alphabet) and katakana (another set of japanese |
683
|
|
|
|
|
|
|
phonetical alphabet) is also supported. |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=item * |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
This module has mapping tables for emoji (graphic characters) defined by various |
691
|
|
|
|
|
|
|
japanese mobile phones; DoCoMo i-mode, ASTEL dot-i and J-PHONE J-Sky. Those |
692
|
|
|
|
|
|
|
letters are mapped on Unicode Private Use Area so unicode strings it outputs are |
693
|
|
|
|
|
|
|
still valid even if they contain emoji, and you can safely pass them to other |
694
|
|
|
|
|
|
|
softwares that can handle Unicode. |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=item * |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
This module can map some emoji from one set to another. Different mobile phones |
702
|
|
|
|
|
|
|
define different sets of emoji, so mapping each other is not always |
703
|
|
|
|
|
|
|
possible. But since some emoji exist in two or more sets with similar |
704
|
|
|
|
|
|
|
appearance, this module considers those emoji to be the same. |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
=item * |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
This module uses the mapping table for MS-CP932 instead of the standard |
712
|
|
|
|
|
|
|
Shift_JIS. The Shift_JIS encoding used by MS-Windows (MS-SJIS/MS-CP932) slightly |
713
|
|
|
|
|
|
|
differs from the standard. |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=item * |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
When the module converts strings from Unicode to Shift_JIS, EUC-JP or |
721
|
|
|
|
|
|
|
ISO-2022-JP, unicode letters which can't be represented in those encodings will |
722
|
|
|
|
|
|
|
be encoded in "dddd;" form (decimal character reference). Note, however, that |
723
|
|
|
|
|
|
|
letters in Unicode Private Use Area will be replaced with '?' mark ('QUESTION |
724
|
|
|
|
|
|
|
MARK'; U+003F) instead of being encoded. In addition, encoding to character sets |
725
|
|
|
|
|
|
|
for mobile phones makes every unrepresentable letters being '?' mark. |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=item * |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
On perl-5.8.0 or later, this module handles the UTF-8 flag: the method utf8() |
733
|
|
|
|
|
|
|
returns UTF-8 I string, and the method getu() returns UTF-8 I |
734
|
|
|
|
|
|
|
string. |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Currently the method get() returns UTF-8 I string but this behavior may be |
738
|
|
|
|
|
|
|
changed in the future. |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Methods like sjis(), jis(), utf8(), and such like return I string. new(), |
742
|
|
|
|
|
|
|
set(), getcode() methods just ignore the UTF-8 flag of strings they take. |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
=back |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
=head1 REQUIREMENT |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=over 4 |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
=item * |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
perl 5.10.x, 5.8.x, etc. (5.004 and later) |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=item * |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
(optional) |
763
|
|
|
|
|
|
|
C Compiler. |
764
|
|
|
|
|
|
|
This module supports both XS and Pure Perl. |
765
|
|
|
|
|
|
|
If you have no C Compilers, |
766
|
|
|
|
|
|
|
Unicode::Japanese will be installed as Pure Perl module. |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=item * |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
(optional) |
774
|
|
|
|
|
|
|
Test.pm and Test::More for testing. |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=back |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
No other modules are required at run time. |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=head1 METHODS |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
=over 4 |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
=item $s = Unicode::Japanese->new($str [, $icode [, $encode]]) |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
Create a new instance of Unicode::Japanese. |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
Any given parameters will be internally passed to the method L(). |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
=item $s = unijp($str [, $icode [, $encode]]) |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
Same as Unicode::Jananese->new(...). |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=item $s->set($str [, $icode [, $encode]]) |
800
|
|
|
|
|
|
|
X |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=over 2 |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
=item $str: string |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=item $icode: optional character encoding (default: 'utf8') |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
=item $encode: optional binary encoding (default: no binary encodings are assumed) |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=back |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
Store a string into the instance. |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
Possible character encodings are: |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
auto |
819
|
|
|
|
|
|
|
utf8 ucs2 ucs4 |
820
|
|
|
|
|
|
|
utf16-be utf16-le utf16 |
821
|
|
|
|
|
|
|
utf32-be utf32-le utf32 |
822
|
|
|
|
|
|
|
sjis cp932 euc euc-jp jis |
823
|
|
|
|
|
|
|
sjis-imode sjis-imode1 sjis-imode2 |
824
|
|
|
|
|
|
|
utf8-imode utf8-imode1 utf8-imode2 |
825
|
|
|
|
|
|
|
sjis-doti sjis-doti1 |
826
|
|
|
|
|
|
|
sjis-jsky sjis-jsky1 sjis-jsky2 |
827
|
|
|
|
|
|
|
jis-jsky jis-jsky1 jis-jsky2 |
828
|
|
|
|
|
|
|
utf8-jsky utf8-jsky1 utf8-jsky2 |
829
|
|
|
|
|
|
|
sjis-au sjis-au1 sjis-au2 |
830
|
|
|
|
|
|
|
jis-au jis-au1 jis-au2 |
831
|
|
|
|
|
|
|
sjis-icon-au sjis-icon-au1 sjis-icon-au2 |
832
|
|
|
|
|
|
|
euc-icon-au euc-icon-au1 euc-icon-au2 |
833
|
|
|
|
|
|
|
jis-icon-au jis-icon-au1 jis-icon-au2 |
834
|
|
|
|
|
|
|
utf8-icon-au utf8-icon-au1 utf8-icon-au2 |
835
|
|
|
|
|
|
|
ascii binary |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
(see also L.) |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
If you want the Unicode::Japanese detect the character encoding of string, you |
841
|
|
|
|
|
|
|
must explicitly specify 'auto' as the second argument. In that case, the given |
842
|
|
|
|
|
|
|
string will be passed to the method getcode() to guess the encoding. |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
For binary encodings, only 'base64' is currently supported. If you specify |
846
|
|
|
|
|
|
|
'base64' as the third argument, the given string will be decoded using Base64 |
847
|
|
|
|
|
|
|
decoder. |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
Specify 'binary' as the second argument if you want your string to be stored |
851
|
|
|
|
|
|
|
without modification. |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
When you specify 'sjis-imode' or 'sjis-doti' as the character encoding, any |
855
|
|
|
|
|
|
|
occurences of 'dddd;' (decimal character reference) in the string will be |
856
|
|
|
|
|
|
|
interpreted and decoded as code point of emoji, just like emoji implanted into |
857
|
|
|
|
|
|
|
the string in binary form. |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
Since encoded forms of strings in various encodings are not clearly distinctive |
861
|
|
|
|
|
|
|
to each other, it is not always certainly possible to detect what encoding is |
862
|
|
|
|
|
|
|
used for a given string. |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
When a given string is possibly interpreted as both Shift_JIS and UTF-8 string, |
866
|
|
|
|
|
|
|
this module considers such a string to be encoded in Shift_JIS. And if the |
867
|
|
|
|
|
|
|
encoding is not distinguishable between 'sjis-au' and 'sjis-doti', this module |
868
|
|
|
|
|
|
|
considers it 'sjis-au'. |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
=item $str = $s->get |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=over 2 |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
=item $str: string (UTF-8) |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
=back |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
Get the internal string in UTF-8. |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
This method currently returns a byte string (whose UTF-8 flag is turned off), |
883
|
|
|
|
|
|
|
but this behavior may be changed in the future. |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
If you absolutely want a byte string, you should use the method utf8() |
887
|
|
|
|
|
|
|
instead. And if you want a character string (whose UTF-8 flag is turned on), you |
888
|
|
|
|
|
|
|
have to use the method getu(). |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
=item $str = $s->getu |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
=over 2 |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
=item $str: string (UTF-8) |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
=back |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
Get the internal string in UTF-8. |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
On perl-5.8.0 or later, this method returns a character string with its UTF-8 |
903
|
|
|
|
|
|
|
flag turned on. |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
=item $code = $s->getcode($str) |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
=over 2 |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
=item $str: string |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
=item $code: name of character encoding |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
=back |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
Detect the character encoding of given string. |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
Note that this method, exceptionaly, doesn't deal with the internal string of an |
920
|
|
|
|
|
|
|
instance. |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
To guess the encoding, the following algorithm is used: |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
(For pure perl implementation) |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=over 4 |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
=item 1 |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
If the string has an UTF-32 BOM, its encoding is 'utf32'. |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
=item 2 |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
If it has an UTF-16 BOM, its encoding is 'utf16'. |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
=item 3 |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
If it is valid for UTF-32BE, its encoding is 'utf32-be'. |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
=item 4 |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
If it is valid for UTF-32LE, its encoding is 'utf32-le'. |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
=item 5 |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
If it contains no ESC characters or bytes whose eighth bit is on, its encoding |
964
|
|
|
|
|
|
|
is 'ascii'. Every ASCII control characters (0x00-0x1F and 0x7F) except ESC |
965
|
|
|
|
|
|
|
(0x1B) are considered to be in the range of 'ascii'. |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
=item 6 |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
If it contains escape sequences of ISO-2022-JP, its encoding is 'jis'. |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=item 7 |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
If it contains any emoji defined for J-PHONE, its encoding is 'sjis-jsky'. |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
=item 8 |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
If it is valid for EUC-JP, its encoding is 'euc'. |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=item 9 |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
If it is valid for Shift_JIS, its encoding is 'sjis'. |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
=item 10 |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
If it contains any emoji defined for au, and everything else is valid for |
1001
|
|
|
|
|
|
|
Shift_JIS, its encoding is 'sjis-au'. |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
=item 11 |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
If it contains any emoji defined for i-mode, and everything else is valid for |
1009
|
|
|
|
|
|
|
Shift_JIS, its encoding is 'sjis-imode'. |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
=item 12 |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
If it contains any emoji defined for dot-i, and everything else is valid for |
1017
|
|
|
|
|
|
|
Shift_JIS, its encoding is 'sjis-doti'. |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
=item 13 |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
If it is valid for UTF-8, its encoding is 'utf8'. |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
=item 14 |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
If no conditions above are fulfilled, its encoding is 'unknown'. |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
=back |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
(For XS implementation) |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
=over 4 |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
=item 1 |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
If the string has an UTF-32 BOM, its encoding is 'utf32'. |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
=item 2 |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
If it has an UTF-16 BOM, its encoding is 'utf16'. |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=item 3 |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
Find all possible encodings that might have been applied to the string from the |
1060
|
|
|
|
|
|
|
following: |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
ascii / euc / sjis / jis / utf8 / utf32-be / utf32-le / sjis-jsky / |
1064
|
|
|
|
|
|
|
sjis-imode / sjis-au / sjis-doti |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
=item 4 |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
If any encodings have been found possible, this module picks out one encoding |
1072
|
|
|
|
|
|
|
having the highest priority among them. The priority order is as follows: |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
utf32-be / utf32-le / ascii / jis / euc / sjis / sjis-jsky / sjis-imode / |
1076
|
|
|
|
|
|
|
sjis-au / sjis-doti / utf8 |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
=item 5 |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
If no conditions above are fulfilled, its encoding is 'unknown'. |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
=back |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
Pay attention to the following pitfalls in the above algorithm: |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
=over 2 |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
=item * |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
UTF-8 strings might be accidentally considered to be encoded in Shift_JIS. |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
=item * |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
UCS-2 strings (sequence of raw UCS-2 letters in big-endian; each letters has |
1105
|
|
|
|
|
|
|
always 2 bytes) can't be detected because they look like nothing but sequences |
1106
|
|
|
|
|
|
|
of random bytes whose length is an even number. |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=item * |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
UTF-16 strings must have BOM to be detected. |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=item * |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
Emoji are only be recognized if they are implanted into the string in binary |
1121
|
|
|
|
|
|
|
form. If they are described in 'dddd;' form, they aren't considered to be |
1122
|
|
|
|
|
|
|
emoji. |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
=back |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
Since the XS and pure perl implementations use different algorithms to guess |
1128
|
|
|
|
|
|
|
encoding, they may guess differently for the same string. Especially, the pure |
1129
|
|
|
|
|
|
|
perl implementation finds Shift_JIS strings containing ESC character (0x1B) to |
1130
|
|
|
|
|
|
|
be actually encoded in Shift_JIS but XS implementation doesn't. This is because |
1131
|
|
|
|
|
|
|
such strings can hardly be distinguished from 'sjis-jsky'. In addition, EUC-JP |
1132
|
|
|
|
|
|
|
strings containing ESC character are also rejected for the same reason. |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
=item $code = $s->getcodelist($str) |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
=over 2 |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
=item $str: string |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
=item $code: name of character encodings |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
=back |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
Detect the character encoding of given string. |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
Unlike the method getcode(), getcodelist() returns a list of possible encodings. |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
=item $str = $s->conv($ocode, $encode) |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
=over 2 |
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
=item $ocode: character encoding (possible encodings are:) |
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
utf8 ucs2 ucs4 utf16 |
1158
|
|
|
|
|
|
|
sjis cp932 euc euc-jp jis |
1159
|
|
|
|
|
|
|
sjis-imode sjis-imode1 sjis-imode2 |
1160
|
|
|
|
|
|
|
utf8-imode utf8-imode1 utf8-imode2 |
1161
|
|
|
|
|
|
|
sjis-doti sjis-doti1 |
1162
|
|
|
|
|
|
|
sjis-jsky sjis-jsky1 sjis-jsky2 |
1163
|
|
|
|
|
|
|
jis-jsky jis-jsky1 jis-jsky2 |
1164
|
|
|
|
|
|
|
utf8-jsky utf8-jsky1 utf8-jsky2 |
1165
|
|
|
|
|
|
|
sjis-au sjis-au1 sjis-au2 |
1166
|
|
|
|
|
|
|
jis-au jis-au1 jis-au2 |
1167
|
|
|
|
|
|
|
sjis-icon-au sjis-icon-au1 sjis-icon-au2 |
1168
|
|
|
|
|
|
|
euc-icon-au euc-icon-au1 euc-icon-au2 |
1169
|
|
|
|
|
|
|
jis-icon-au jis-icon-au1 jis-icon-au2 |
1170
|
|
|
|
|
|
|
utf8-icon-au utf8-icon-au1 utf8-icon-au2 |
1171
|
|
|
|
|
|
|
binary |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
(see also L.) |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
Some encodings for mobile phones have a trailing digit like 'sjis-au2'. Those |
1177
|
|
|
|
|
|
|
digits represent the version number of encodings. Such encodings have a variant |
1178
|
|
|
|
|
|
|
with no trailing digits, like 'sjis-au', which is the same as the latest version |
1179
|
|
|
|
|
|
|
among its variants. |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
=item $encode: optional binary encoding |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
=item $str: string |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
=back |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
Get the internal string of instance with encoding it using a given character |
1189
|
|
|
|
|
|
|
encoding method. |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
If you want the resulting string to be encoded in Base64, specify 'base64' as |
1193
|
|
|
|
|
|
|
the second argument. |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
On perl-5.8.0 or later, the UTF-8 flag of resulting string is turned off even if |
1197
|
|
|
|
|
|
|
you specify 'utf8' to the first argument. |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
=item $s->tag2bin |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
Interpret decimal character references (dddd;) in the instance, and replaces |
1203
|
|
|
|
|
|
|
them with single characters they represent. |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
=item $s->z2h |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
Replace zenkaku (full-width) letters in the instance with hankaku (half-width) |
1209
|
|
|
|
|
|
|
letters. |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
=item $s->h2z |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
Replace hankaku (half-width) letters in the instance with zenkaku (full-width) |
1215
|
|
|
|
|
|
|
letters. |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=item $s->hira2kata |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
Replace any hiragana in the instance with katakana. |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
=item $s->kata2hira |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
Replace any katakana in the instance with hiragana. |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
=item $str = $s->jis |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
$str: byte string in ISO-2022-JP |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in ISO-2022-JP. |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
=item $str = $s->euc |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
$str: byte string in EUC-JP |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in EUC-JP. |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
=item $str = $s->utf8 |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
$str: byte string in UTF-8 |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
Get the internal UTF-8 string of instance. |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
On perl-5.8.0 or later, the UTF-8 flag of resulting string is turned off. |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
=item $str = $s->ucs2 |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
$str: byte string in UCS-2 |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
Get the internal string of instance as a sequence of raw UCS-2 letters in |
1261
|
|
|
|
|
|
|
big-endian. Note that this is different from UTF-16BE as raw UCS-2 sequence has |
1262
|
|
|
|
|
|
|
no concept of surrogate pair. |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
=item $str = $s->ucs4 |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
$str: byte string in UCS-4 |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
Get the internal string of instance as a sequence of raw UCS-4 letters in |
1271
|
|
|
|
|
|
|
big-endian. This is practically the same as UTF-32BE. |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=item $str = $s->utf16 |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
$str: byte string in UTF-16 |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
Get the insternal string of instance with encoding it in UTF-16 in big-endian |
1280
|
|
|
|
|
|
|
with no BOM prepended. |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
=item $str = $s->sjis |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
$str: byte string in Shift_JIS |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in Shift_JIS (MS-SJIS / |
1289
|
|
|
|
|
|
|
MS-CP932). |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
=item $str = $s->sjis_imode |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
$str: byte string in 'sjis-imode' |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-imode'. |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
=item $str = $s->sjis_imode1 |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
$str: byte string in 'sjis-imode1' |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-imode1'. |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
=item $str = $s->sjis_imode2 |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
$str: byte string in 'sjis-imode2' |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-imode2'. |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=item $str = $s->sjis_doti |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
$str: byte string in 'sjis-doti' |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-doti'. |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
=item $str = $s->sjis_jsky |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
$str: byte string in 'sjis-jsky' |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-jsky'. |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
=item $str = $s->sjis_jsky1 |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
$str: byte string in 'sjis-jsky1' |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-jsky1'. |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
=item $str = $s->sjis_jsky |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
$str: byte string in 'sjis-jsky' |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-jsky'. |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=item $str = $s->sjis_icon_au |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
$str: byte string in 'sjis-icon-au' |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
Get the internal string of instance with encoding it in 'sjis-icon-au'. |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=item $str_arrayref = $s->strcut($len) |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
=over 2 |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=item $len: maximum length of each chunks (in number of |
1361
|
|
|
|
|
|
|
full-width characters) |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
=item $str_arrayref: reference to array of strings |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=back |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
Split the internal string of instance into chunks of a given length. |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
On perl-5.8.0 or later, UTF-8 flags of each chunks are turned on. |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
=item $len = $s->strlen |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
$len: character width of the internal string |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
Calculate the character width of the internal string. Half-width characters have |
1379
|
|
|
|
|
|
|
width of one unit, and full-width characters have width of two units. |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
=item $s->join_csv(@values); |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
@values: array of strings |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
Build a line of CSV from the arguments, and store it into the instance. The |
1388
|
|
|
|
|
|
|
resulting line has a trailing line break ("\n"). |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
=item @values = $s->split_csv; |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
@values: array of strings |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
Parse a line of CSV in the instance and return each columns. The line will be |
1397
|
|
|
|
|
|
|
chomp()ed before getting parsed. |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
If the internal string was decoded from 'binary' encoding (see methods new() and |
1401
|
|
|
|
|
|
|
set()), the UTF-8 flags of the resulting array of strings are turned |
1402
|
|
|
|
|
|
|
off. Otherwise the flags are turned on. |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
=back |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
=head1 SUPPORTED ENCODINGS |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1410
|
|
|
|
|
|
|
|encoding | in | out | guess | |
1411
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1412
|
|
|
|
|
|
|
|auto : OK : -- | ----- | |
1413
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1414
|
|
|
|
|
|
|
|utf8 : OK : OK | OK | |
1415
|
|
|
|
|
|
|
|ucs2 : OK : OK | ----- | |
1416
|
|
|
|
|
|
|
|ucs4 : OK : OK | ----- | |
1417
|
|
|
|
|
|
|
|utf16-be : OK : -- | ----- | |
1418
|
|
|
|
|
|
|
|utf16-le : OK : -- | ----- | |
1419
|
|
|
|
|
|
|
|utf16 : OK : OK | OK(#) | |
1420
|
|
|
|
|
|
|
|utf32-be : OK : -- | OK | |
1421
|
|
|
|
|
|
|
|utf32-le : OK : -- | OK | |
1422
|
|
|
|
|
|
|
|utf32 : OK : -- | OK(#) | |
1423
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1424
|
|
|
|
|
|
|
|sjis : OK : OK | OK | |
1425
|
|
|
|
|
|
|
|cp932 : OK : OK | ----- | |
1426
|
|
|
|
|
|
|
|euc : OK : OK | OK | |
1427
|
|
|
|
|
|
|
|euc-jp : OK : OK | ----- | |
1428
|
|
|
|
|
|
|
|jis : OK : OK | OK | |
1429
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1430
|
|
|
|
|
|
|
|sjis-imode : OK : OK | OK | |
1431
|
|
|
|
|
|
|
|sjis-imode1 : OK : OK | ----- | |
1432
|
|
|
|
|
|
|
|sjis-imode2 : OK : OK | ----- | |
1433
|
|
|
|
|
|
|
|utf8-imode : OK : OK | ----- | |
1434
|
|
|
|
|
|
|
|utf8-imode1 : OK : OK | ----- | |
1435
|
|
|
|
|
|
|
|utf8-imode2 : OK : OK | ----- | |
1436
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1437
|
|
|
|
|
|
|
|sjis-doti : OK : OK | OK | |
1438
|
|
|
|
|
|
|
|sjis-doti1 : OK : OK | ----- | |
1439
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1440
|
|
|
|
|
|
|
|sjis-jsky : OK : OK | OK | |
1441
|
|
|
|
|
|
|
|sjis-jsky1 : OK : OK | ----- | |
1442
|
|
|
|
|
|
|
|sjis-jsky2 : OK : OK | ----- | |
1443
|
|
|
|
|
|
|
|jis-jsky : OK : OK | ----- | |
1444
|
|
|
|
|
|
|
|jis-jsky1 : OK : OK | ----- | |
1445
|
|
|
|
|
|
|
|jis-jsky2 : OK : OK | ----- | |
1446
|
|
|
|
|
|
|
|utf8-jsky : OK : OK | ----- | |
1447
|
|
|
|
|
|
|
|utf8-jsky1 : OK : OK | ----- | |
1448
|
|
|
|
|
|
|
|utf8-jsky2 : OK : OK | ----- | |
1449
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1450
|
|
|
|
|
|
|
|sjis-au : OK : OK | OK | |
1451
|
|
|
|
|
|
|
|sjis-au1 : OK : OK | ----- | |
1452
|
|
|
|
|
|
|
|sjis-au2 : OK : OK | ----- | |
1453
|
|
|
|
|
|
|
|jis-au : OK : OK | ----- | |
1454
|
|
|
|
|
|
|
|jis-au1 : OK : OK | ----- | |
1455
|
|
|
|
|
|
|
|jis-au2 : OK : OK | ----- | |
1456
|
|
|
|
|
|
|
|sjis-icon-au : OK : OK | ----- | |
1457
|
|
|
|
|
|
|
|sjis-icon-au1 : OK : OK | ----- | |
1458
|
|
|
|
|
|
|
|sjis-icon-au2 : OK : OK | ----- | |
1459
|
|
|
|
|
|
|
|euc-icon-au : OK : OK | ----- | |
1460
|
|
|
|
|
|
|
|euc-icon-au1 : OK : OK | ----- | |
1461
|
|
|
|
|
|
|
|euc-icon-au2 : OK : OK | ----- | |
1462
|
|
|
|
|
|
|
|jis-icon-au : OK : OK | ----- | |
1463
|
|
|
|
|
|
|
|jis-icon-au1 : OK : OK | ----- | |
1464
|
|
|
|
|
|
|
|jis-icon-au2 : OK : OK | ----- | |
1465
|
|
|
|
|
|
|
|utf8-icon-au : OK : OK | ----- | |
1466
|
|
|
|
|
|
|
|utf8-icon-au1 : OK : OK | ----- | |
1467
|
|
|
|
|
|
|
|utf8-icon-au2 : OK : OK | ----- | |
1468
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1469
|
|
|
|
|
|
|
|ascii : OK : -- | OK | |
1470
|
|
|
|
|
|
|
|binary : OK : OK | ----- | |
1471
|
|
|
|
|
|
|
+---------------+----+-----+-------+ |
1472
|
|
|
|
|
|
|
(#): guessed when it has bom. |
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
=head2 GUESSING ORDER |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
1. utf32 (#) |
1477
|
|
|
|
|
|
|
2. utf16 (#) |
1478
|
|
|
|
|
|
|
3. utf32-be |
1479
|
|
|
|
|
|
|
4. utf32-le |
1480
|
|
|
|
|
|
|
5. ascii |
1481
|
|
|
|
|
|
|
6. jis |
1482
|
|
|
|
|
|
|
7. sjis-jsky (pp) |
1483
|
|
|
|
|
|
|
8. euc |
1484
|
|
|
|
|
|
|
9. sjis |
1485
|
|
|
|
|
|
|
10. sjis-jsky (xs) |
1486
|
|
|
|
|
|
|
11. sjis-au |
1487
|
|
|
|
|
|
|
12. sjis-imode |
1488
|
|
|
|
|
|
|
13. sjis-doti |
1489
|
|
|
|
|
|
|
14. utf8 |
1490
|
|
|
|
|
|
|
15. unknown |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
=head1 DESCRIPTION OF UNICODE MAPPING |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
Transcoding between Unicode encodings and other ones is performed as below: |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
=over 2 |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=item Shift_JIS |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
This module uses the mapping table of MS-CP932. |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
L<< ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT >> |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
When the module tries to convert Unicode string to Shift_JIS, it represents most |
1508
|
|
|
|
|
|
|
letters which isn't available in Shift_JIS as decimal character reference |
1509
|
|
|
|
|
|
|
('dddd;'). There is one exception to this: every graphic characters for mobile |
1510
|
|
|
|
|
|
|
phones are replaced with '?' mark. |
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
For variants of Shift_JIS defined for mobile phones, every unrepresentable |
1514
|
|
|
|
|
|
|
characters are replaced with '?' mark unlike the plain Shift_JIS. |
1515
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
=item EUC-JP/ISO-2022-JP |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
This module doesn't directly convert Unicode string from/to EUC-JP or |
1520
|
|
|
|
|
|
|
ISO-2022-JP: it once converts from/to Shift_JIS and then do the rest |
1521
|
|
|
|
|
|
|
translation. So characters which aren't available in the Shift_JIS can not be |
1522
|
|
|
|
|
|
|
properly translated. |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
=item DoCoMo i-mode |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
This module maps emoji in the range of F800 - F9FF to U+0FF800 - U+0FF9FF. |
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
=item ASTEL dot-i |
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
This module maps emoji in the range of F000 - F4FF to U+0FF000 - U+0FF4FF. |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
=item J-PHONE J-SKY |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
The encoding method defined by J-SKY is as follows: first an escape sequence |
1538
|
|
|
|
|
|
|
"\e\$" comes to indicate the beginning of emoji, then the first byte of an emoji |
1539
|
|
|
|
|
|
|
comes next, then the second bytes of at least one emoji comes next, then "\x0f" |
1540
|
|
|
|
|
|
|
comes last to indicate the end of emoji. If a string contains a series of emoji |
1541
|
|
|
|
|
|
|
whose first bytes are identical, such sequence can be compressed by cascading |
1542
|
|
|
|
|
|
|
second bytes of them to the single first byte. |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
This module considers a pair of those first and second bytes to be one letter, |
1546
|
|
|
|
|
|
|
and map them from 4500 - 47FF to U+0FFB00 - U+0FFDFF. |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
When the module encodes J-SKY emoji, it performs the compression automatically. |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
=item AU |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
This module maps AU emoji to U+0FF500 - U+0FF6FF. |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
=back |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
=head1 PurePerl mode |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
use Unicode::Japanese qw(PurePerl); |
1562
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
If you want to explicitly take the pure perl implementation, pass |
1564
|
|
|
|
|
|
|
C<'PurePerl'> to the argument of the C |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
=head1 BUGS |
1568
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
Please report bugs and requests to C or |
1570
|
|
|
|
|
|
|
L. If you |
1571
|
|
|
|
|
|
|
report them to the web interface, any progress to your report will be |
1572
|
|
|
|
|
|
|
automatically sent back to you. |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
=over 2 |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
=item * |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
This module doesn't directly convert Unicode string from/to EUC-JP or |
1582
|
|
|
|
|
|
|
ISO-2022-JP: it once converts from/to Shift_JIS and then do the rest |
1583
|
|
|
|
|
|
|
translation. So characters which aren't available in the Shift_JIS can not be |
1584
|
|
|
|
|
|
|
properly translated. |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
=item * |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
The XS implementation of getcode() fails to detect the encoding when the given |
1592
|
|
|
|
|
|
|
string contains \e while its encoding is EUC-JP or Shift_JIS. |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
=item * |
1596
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
Japanese.pm is composed of textual perl script and binary character conversion |
1600
|
|
|
|
|
|
|
table. If you transfer it on FTP using ASCII mode, the file will collapse. |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=back |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
=head1 SUPPORT |
1606
|
|
|
|
|
|
|
|
1607
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
perldoc Unicode::Japanese |
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
You can find more information at: |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
=over 4 |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
L |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
=item * CPAN Ratings |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
L |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
L |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
=item * Search CPAN |
1633
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
L |
1635
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
=back |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
=head1 CREDITS |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
Thanks very much to: |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
NAKAYAMA Nao |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
SUGIURA Tatsuki & Debian JP Project |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
|
1650
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
1651
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
Copyright 2001-2008 |
1653
|
|
|
|
|
|
|
SANO Taku (SAWATARI Mikage) and YAMASHINA Hio, |
1654
|
|
|
|
|
|
|
all rights reserved. |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
1658
|
|
|
|
|
|
|
under the same terms as Perl itself. |
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
|
1662
|
|
|
|
|
|
|
=cut |
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
__DATA__ |