line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#-*- perl -*- |
2
|
|
|
|
|
|
|
#-*- coding: us-ascii -*- |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Encode::ISO2022; |
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
26536
|
use 5.007003; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
75
|
|
7
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
74
|
|
8
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
68
|
|
9
|
2
|
|
|
2
|
|
10
|
use base qw(Encode::Encoding); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
2068
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
11
|
|
|
|
|
|
|
|
12
|
2
|
|
|
2
|
|
25532
|
use Carp qw(carp croak); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
168
|
|
13
|
2
|
|
|
2
|
|
11
|
use XSLoader; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
7396
|
|
14
|
|
|
|
|
|
|
XSLoader::load(__PACKAGE__, $VERSION); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $err_encode_nomap = '"\x{%*v04X}" does not map to %s'; |
17
|
|
|
|
|
|
|
my $err_decode_nomap = '%s "\x%*v02X" does not map to Unicode'; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my $DIE_ON_ERR = Encode::DIE_ON_ERR(); |
20
|
|
|
|
|
|
|
my $FB_QUIET = Encode::FB_QUIET(); |
21
|
|
|
|
|
|
|
my $HTMLCREF = Encode::HTMLCREF(); |
22
|
|
|
|
|
|
|
my $LEAVE_SRC = Encode::LEAVE_SRC(); |
23
|
|
|
|
|
|
|
my $PERLQQ = Encode::PERLQQ(); |
24
|
|
|
|
|
|
|
my $RETURN_ON_ERR = Encode::RETURN_ON_ERR(); |
25
|
|
|
|
|
|
|
my $WARN_ON_ERR = Encode::WARN_ON_ERR(); |
26
|
|
|
|
|
|
|
my $XMLCREF = Encode::XMLCREF(); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Constructor |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub Define { |
31
|
1
|
|
|
1
|
0
|
4
|
my $pkg = shift; |
32
|
1
|
|
|
|
|
32
|
my %opts = @_; |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
|
|
3
|
my $Name = $opts{Name}; |
35
|
1
|
50
|
|
|
|
10
|
croak 'No name defined' unless $Name; |
36
|
|
|
|
|
|
|
|
37
|
1
|
50
|
|
|
|
3
|
my @CCS = @{$opts{CCS} || []}; |
|
1
|
|
|
|
|
9
|
|
38
|
1
|
50
|
|
|
|
6
|
croak 'No CCS defined' unless @CCS; |
39
|
1
|
|
|
|
|
3
|
my @ccs; |
40
|
1
|
|
|
|
|
3
|
foreach my $ccs (@CCS) { |
41
|
10
|
|
|
|
|
11
|
my $encoding; |
42
|
10
|
50
|
|
|
|
33
|
if (ref $ccs->{encoding}) { |
|
|
50
|
|
|
|
|
|
43
|
0
|
|
|
|
|
0
|
$encoding = $ccs->{encoding}; |
44
|
|
|
|
|
|
|
} elsif ($ccs->{encoding}) { |
45
|
10
|
|
|
|
|
32
|
$encoding = Encode::find_encoding($ccs->{encoding}); |
46
|
|
|
|
|
|
|
} |
47
|
10
|
50
|
0
|
|
|
139
|
croak sprintf 'Unknown encoding "%s"', ($ccs->{encoding} || '') |
48
|
|
|
|
|
|
|
unless $encoding; |
49
|
10
|
|
|
|
|
77
|
push @ccs, { %$ccs, encoding => $encoding }; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
1
|
|
50
|
|
|
11
|
my $self = bless { |
53
|
|
|
|
|
|
|
CCS => [@ccs], |
54
|
|
|
|
|
|
|
LineInit => $opts{LineInit}, |
55
|
|
|
|
|
|
|
Name => $Name, |
56
|
|
|
|
|
|
|
SubChar => ($opts{SubChar} || '?') |
57
|
|
|
|
|
|
|
} => $pkg; |
58
|
|
|
|
|
|
|
|
59
|
1
|
50
|
|
|
|
11
|
Encode::define_alias($opts{Alias} => "\"$Name\"") if $opts{Alias}; |
60
|
1
|
|
|
|
|
21
|
$Encode::Encoding{$Name} = $self; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# decode method |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub decode { |
66
|
1
|
|
|
1
|
1
|
2157
|
my ($self, $str, $chk) = @_; |
67
|
|
|
|
|
|
|
|
68
|
1
|
|
|
|
|
2
|
my $chk_sub; |
69
|
1
|
|
|
|
|
2
|
my $utf8 = ''; |
70
|
1
|
|
|
|
|
2
|
my $errChar; |
71
|
|
|
|
|
|
|
|
72
|
1
|
50
|
|
|
|
5
|
if (ref $chk eq 'CODE') { |
73
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
74
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
1
|
|
|
|
|
9
|
$self->init_state(1); |
78
|
|
|
|
|
|
|
|
79
|
1
|
|
|
|
|
4
|
pos($str) = 0; |
80
|
1
|
|
|
|
|
2
|
my $chunk = ''; |
81
|
|
|
|
|
|
|
CHUNKS: |
82
|
1
|
|
|
|
|
11
|
while ( |
83
|
|
|
|
|
|
|
$str =~ m{ |
84
|
|
|
|
|
|
|
\G |
85
|
|
|
|
|
|
|
( |
86
|
|
|
|
|
|
|
( # designation (FIXME) |
87
|
|
|
|
|
|
|
\e\x24?[\x28-\x2B\x2D-\x2F][\x20-\x2F]*[\x40-\x7E] | |
88
|
|
|
|
|
|
|
\e\x24[\x40-\x42] | |
89
|
|
|
|
|
|
|
) | |
90
|
|
|
|
|
|
|
( # locking shift |
91
|
|
|
|
|
|
|
\x0E|\x0F|\e[\x6E\x6F\x7C\x7D\x7E] |
92
|
|
|
|
|
|
|
) | |
93
|
|
|
|
|
|
|
) |
94
|
|
|
|
|
|
|
( |
95
|
|
|
|
|
|
|
( # single shift 2 |
96
|
|
|
|
|
|
|
\x8E|\e\x4E |
97
|
|
|
|
|
|
|
) | |
98
|
|
|
|
|
|
|
( # single shift 3 |
99
|
|
|
|
|
|
|
\x8F|\e\x4F |
100
|
|
|
|
|
|
|
) | |
101
|
|
|
|
|
|
|
) |
102
|
|
|
|
|
|
|
( |
103
|
|
|
|
|
|
|
[^\x0E\x0F\e\x8E\x8F]* |
104
|
|
|
|
|
|
|
) |
105
|
|
|
|
|
|
|
}gcx |
106
|
|
|
|
|
|
|
) { |
107
|
17166
|
|
|
|
|
61040
|
my ($func, $g_seq, $ls, $ss, $ss2, $ss3, $chunk) = |
108
|
|
|
|
|
|
|
($1, $2, $3, $4, $5, $6, $7); |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# process designation and invokation. |
111
|
17166
|
|
|
|
|
16635
|
my $errSeq; |
112
|
17166
|
100
|
|
|
|
25478
|
if ($g_seq) { |
|
|
50
|
|
|
|
|
|
113
|
17152
|
50
|
|
|
|
40111
|
unless (defined $self->designate_dec($g_seq)) { |
114
|
0
|
|
|
|
|
0
|
$errSeq = $g_seq; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
} elsif ($ls) { |
117
|
0
|
0
|
|
|
|
0
|
unless (defined $self->invoke_dec($ls)) { |
118
|
0
|
|
|
|
|
0
|
$errSeq = $ls; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
} |
121
|
17166
|
50
|
|
|
|
36503
|
if ($errSeq) { |
122
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
123
|
0
|
|
|
|
|
0
|
croak sprintf $err_decode_nomap, $self->name, '\x', $errSeq; |
124
|
|
|
|
|
|
|
} |
125
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
126
|
0
|
|
|
|
|
0
|
carp sprintf $err_decode_nomap, $self->name, '\x', $errSeq; |
127
|
|
|
|
|
|
|
} |
128
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
129
|
0
|
|
|
|
|
0
|
pos($str) -= length($errSeq) + length($chunk); |
130
|
0
|
|
|
|
|
0
|
last; # CHUNKS |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
0
|
|
|
|
|
|
134
|
0
|
|
|
|
|
0
|
$utf8 .= join '', map { |
135
|
0
|
|
|
|
|
0
|
$chk_sub->(ord $_) |
136
|
|
|
|
|
|
|
} split(//, $errSeq . $chunk); |
137
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
138
|
0
|
|
|
|
|
0
|
$utf8 .= sprintf '\x%*v02X', '\x', $errSeq . $chunk; |
139
|
|
|
|
|
|
|
} else { |
140
|
0
|
|
|
|
|
0
|
$utf8 .= "\x{FFFD}" x length($chunk); |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
0
|
next; # CHUNKS |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# process encoded elements |
147
|
17166
|
|
|
|
|
30557
|
while (length $chunk) { |
148
|
17173
|
|
|
|
|
17670
|
my ($conv, $bytes); |
149
|
|
|
|
|
|
|
|
150
|
17173
|
|
|
|
|
32719
|
($conv, $bytes) = $self->_decode($chunk, $ss); |
151
|
17173
|
50
|
|
|
|
36786
|
if (defined $conv) { |
152
|
17173
|
|
|
|
|
19386
|
$utf8 .= $conv; |
153
|
|
|
|
|
|
|
|
154
|
17173
|
100
|
66
|
|
|
52129
|
if ($conv =~ /[\r\n]/ and $self->{LineInit}) { |
155
|
1076
|
|
|
|
|
2538
|
$self->init_state(1); |
156
|
|
|
|
|
|
|
} |
157
|
17173
|
|
|
|
|
118160
|
next; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
0
|
|
0
|
|
|
0
|
$errChar = substr($chunk, 0, $bytes || 1); |
161
|
|
|
|
|
|
|
|
162
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
163
|
0
|
|
|
|
|
0
|
croak sprintf $err_decode_nomap, $self->name, '\x', $errChar; |
164
|
|
|
|
|
|
|
} |
165
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
166
|
0
|
|
|
|
|
0
|
carp sprintf $err_decode_nomap, $self->name, '\x', $errChar; |
167
|
|
|
|
|
|
|
} |
168
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
169
|
0
|
|
|
|
|
0
|
last CHUNKS; |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# Maybe erroneous designation: Force invoking CL and retry. |
173
|
0
|
0
|
|
|
|
0
|
if ($errChar =~ /^[\x00-\x1F]/) { |
174
|
0
|
|
|
|
|
0
|
my @ccs = grep { $_->{cl} } @{$self->{CCS}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
175
|
0
|
0
|
|
|
|
0
|
if (@ccs) { |
176
|
0
|
|
|
|
|
0
|
$self->designate($ccs[0]); |
177
|
0
|
|
|
|
|
0
|
next; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
0
|
|
|
|
|
0
|
substr($chunk, 0, length $errChar) = ''; |
182
|
|
|
|
|
|
|
|
183
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
0
|
|
|
|
|
|
184
|
0
|
|
|
|
|
0
|
$utf8 .= join '', map { |
185
|
0
|
|
|
|
|
0
|
$chk_sub->(ord $_) |
186
|
|
|
|
|
|
|
} split(//, $errChar); |
187
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
188
|
0
|
|
|
|
|
0
|
$utf8 .= sprintf '\x%*v02X', '\x', $errChar; |
189
|
|
|
|
|
|
|
} else { |
190
|
0
|
|
|
|
|
0
|
$utf8 .= "\x{FFFD}"; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} # CHUNKS |
194
|
1
|
|
|
|
|
6
|
pos($str) -= length($chunk); |
195
|
1
|
50
|
|
|
|
10
|
$_[1] = substr($str, pos $str) unless $chk & $LEAVE_SRC; |
196
|
|
|
|
|
|
|
|
197
|
1
|
|
|
|
|
154
|
return $utf8; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub _decode { |
201
|
17173
|
|
|
17173
|
|
24723
|
my ($self, $chunk, $ss) = @_; |
202
|
|
|
|
|
|
|
|
203
|
17173
|
|
|
|
|
15592
|
my @ccs; |
204
|
|
|
|
|
|
|
my $conv; |
205
|
0
|
|
|
|
|
0
|
my $errLen; |
206
|
|
|
|
|
|
|
|
207
|
17173
|
100
|
|
|
|
24000
|
if ($ss) { |
208
|
160
|
100
|
100
|
|
|
423
|
@ccs = grep { |
209
|
16
|
|
|
|
|
27
|
$_->{_designated_to} and |
210
|
|
|
|
|
|
|
$_->{ss} and $_->{ss} eq $ss |
211
|
16
|
|
|
|
|
15
|
} @{$self->{CCS}}; |
212
|
|
|
|
|
|
|
} else { |
213
|
171570
|
100
|
33
|
|
|
811148
|
@ccs = grep { |
214
|
17157
|
|
|
|
|
29995
|
$_->{_invoked_to} or |
215
|
|
|
|
|
|
|
not ($_->{g} or $_->{g_init} or $_->{ls} or $_->{ss}) |
216
|
17157
|
|
|
|
|
16134
|
} @{$self->{CCS}}; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
17173
|
|
|
|
|
27334
|
foreach my $ccs (@ccs) { |
220
|
17173
|
|
100
|
|
|
43446
|
my $bytes = $ccs->{bytes} || 1; |
221
|
17173
|
50
|
|
|
|
36442
|
my $range = |
|
|
100
|
|
|
|
|
|
222
|
|
|
|
|
|
|
$ccs->{range} ? $ccs->{range} : $ccs->{gr} ? '\xA0-\xFF' : undef; |
223
|
17173
|
|
|
|
|
19631
|
my $residue = ''; |
224
|
|
|
|
|
|
|
|
225
|
17173
|
100
|
|
|
|
28428
|
if ($range) { |
226
|
10906
|
50
|
|
|
|
77528
|
if ($chunk =~ /^[^$range]/) { |
|
|
50
|
|
|
|
|
|
227
|
0
|
|
|
|
|
0
|
next; |
228
|
|
|
|
|
|
|
} elsif ($chunk =~ s/([^$range].*)$//s) { |
229
|
0
|
|
|
|
|
0
|
$residue = $1; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
17173
|
100
|
|
|
|
31036
|
if ($ss) { |
234
|
16
|
50
|
|
|
|
28
|
if ($bytes <= length $chunk) { |
235
|
16
|
|
|
|
|
30
|
$residue = substr($chunk, $bytes) . $residue; |
236
|
16
|
|
|
|
|
25
|
$chunk = substr($chunk, 0, $bytes); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
17173
|
50
|
|
|
|
27367
|
if ($ccs->{gr}) { |
241
|
0
|
|
|
|
|
0
|
$chunk =~ tr/\x20-\x7F\xA0-\xFF/\xA0-\xFF\x20-\x7F/; |
242
|
0
|
|
|
|
|
0
|
$conv = $ccs->{encoding}->decode($chunk, $FB_QUIET); |
243
|
0
|
|
|
|
|
0
|
$chunk =~ tr/\x20-\x7F\xA0-\xFF/\xA0-\xFF\x20-\x7F/; |
244
|
|
|
|
|
|
|
} else { |
245
|
17173
|
|
|
|
|
61803
|
$conv = $ccs->{encoding}->decode($chunk, $FB_QUIET); |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
17173
|
50
|
66
|
|
|
77656
|
if ($range and $chunk =~ /^([$range]{1,$bytes})/) { |
249
|
0
|
|
|
|
|
0
|
my $len = length $1; |
250
|
0
|
0
|
0
|
|
|
0
|
if (not defined $errLen or $len < $errLen) { |
251
|
0
|
|
|
|
|
0
|
$errLen = $len; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
17173
|
|
|
|
|
20252
|
$chunk .= $residue; |
256
|
|
|
|
|
|
|
|
257
|
17173
|
50
|
|
|
|
48781
|
if ($conv =~ /./os) { # length() on utf8 string is slow |
258
|
17173
|
|
|
|
|
23703
|
$_[1] = $chunk; |
259
|
17173
|
|
|
|
|
18419
|
$_[2] = undef; |
260
|
17173
|
|
|
|
|
53354
|
return $conv; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
} |
263
|
0
|
|
|
|
|
0
|
$_[2] = undef; |
264
|
0
|
|
|
|
|
0
|
return (undef, $errLen); |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub designate_dec { |
268
|
17152
|
|
|
17152
|
0
|
21368
|
my ($self, $g_seq) = @_; |
269
|
|
|
|
|
|
|
|
270
|
171520
|
50
|
|
|
|
629664
|
my $ccs = (grep { |
271
|
17152
|
|
|
|
|
36286
|
$_->{g_seq} and $_->{g_seq} eq $g_seq |
272
|
17152
|
|
|
|
|
16798
|
} @{$self->{CCS}})[0]; |
273
|
17152
|
50
|
|
|
|
34696
|
return undef unless $ccs; |
274
|
|
|
|
|
|
|
|
275
|
17152
|
|
|
|
|
35157
|
return $self->designate($ccs); |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub invoke_dec { |
279
|
0
|
|
|
0
|
0
|
0
|
my ($self, $ls) = @_; |
280
|
|
|
|
|
|
|
|
281
|
0
|
0
|
0
|
|
|
0
|
my $ccs = (grep { |
282
|
0
|
|
|
|
|
0
|
$_->{_designated_to} and |
283
|
|
|
|
|
|
|
$_->{ls} and $_->{ls} eq $ls |
284
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}})[0]; |
285
|
0
|
0
|
|
|
|
0
|
return undef unless $ccs; |
286
|
|
|
|
|
|
|
|
287
|
0
|
|
|
|
|
0
|
return $self->invoke($ccs); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# encode method |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
sub encode { |
293
|
1
|
|
|
1
|
1
|
9578
|
my ($self, $utf8, $chk) = @_; |
294
|
|
|
|
|
|
|
|
295
|
1
|
|
|
|
|
3
|
my $chk_sub; |
296
|
1
|
|
|
|
|
2
|
my $str = ''; |
297
|
1
|
|
|
|
|
2
|
my $errChar; |
298
|
|
|
|
|
|
|
my $subChar; |
299
|
|
|
|
|
|
|
|
300
|
1
|
50
|
|
|
|
7
|
if (ref $chk eq 'CODE') { |
301
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
302
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
1
|
|
|
|
|
7
|
$self->init_state(1); |
306
|
|
|
|
|
|
|
|
307
|
1
|
|
|
|
|
8
|
while ($utf8 =~ /./os) { # length() on utf8 string is slow. |
308
|
17173
|
|
|
|
|
24670
|
my $conv; |
309
|
|
|
|
|
|
|
|
310
|
17173
|
|
|
|
|
53191
|
$conv = $self->_encode($utf8); |
311
|
17173
|
50
|
|
|
|
78120
|
if (defined $conv) { |
312
|
17173
|
|
|
|
|
21625
|
$str .= $conv; |
313
|
|
|
|
|
|
|
|
314
|
17173
|
100
|
66
|
|
|
74960
|
if ($conv =~ /[\r\n]/ and $self->{LineInit}) { |
315
|
1076
|
|
|
|
|
3400
|
$self->init_state(1); |
316
|
|
|
|
|
|
|
} |
317
|
17173
|
|
|
|
|
267438
|
next; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
0
|
|
|
|
|
0
|
$errChar = substr($utf8, 0, 1); |
321
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
322
|
0
|
|
|
|
|
0
|
croak sprintf $err_encode_nomap, '}\x{', $errChar, $self->name; |
323
|
|
|
|
|
|
|
} |
324
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
325
|
0
|
|
|
|
|
0
|
carp sprintf $err_encode_nomap, '}\x{', $errChar, $self->name; |
326
|
|
|
|
|
|
|
} |
327
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
328
|
0
|
|
|
|
|
0
|
last; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
0
|
|
|
|
|
0
|
substr($utf8, 0, 1) = ''; |
332
|
|
|
|
|
|
|
|
333
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
334
|
0
|
|
|
|
|
0
|
$subChar = $chk_sub->(ord $errChar); |
335
|
0
|
0
|
|
|
|
0
|
$subChar = Encode::decode_utf8($subChar) |
336
|
|
|
|
|
|
|
unless Encode::is_utf8($subChar); |
337
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
338
|
0
|
|
|
|
|
0
|
$subChar = sprintf '\x{%04X}', ord $errChar; |
339
|
|
|
|
|
|
|
} elsif ($chk & $XMLCREF) { |
340
|
0
|
|
|
|
|
0
|
$subChar = sprintf '%X;', ord $errChar; |
341
|
|
|
|
|
|
|
} elsif ($chk & $HTMLCREF) { |
342
|
0
|
|
|
|
|
0
|
$subChar = sprintf '%d;', ord $errChar; |
343
|
|
|
|
|
|
|
} else { |
344
|
0
|
|
0
|
|
|
0
|
$subChar = $self->{SubChar} || '?'; |
345
|
|
|
|
|
|
|
} |
346
|
0
|
|
|
|
|
0
|
$conv = $self->_encode($subChar); |
347
|
0
|
0
|
|
|
|
0
|
if (defined $conv) { |
348
|
0
|
|
|
|
|
0
|
$str .= $conv; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} |
351
|
1
|
50
|
|
|
|
6
|
$_[1] = $utf8 unless $chk & $LEAVE_SRC; |
352
|
|
|
|
|
|
|
|
353
|
1
|
50
|
|
|
|
6
|
if (length $str) { |
354
|
1
|
|
|
|
|
5
|
$str .= $self->init_state(); |
355
|
|
|
|
|
|
|
} |
356
|
1
|
|
|
|
|
197
|
return $str; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub _encode { |
360
|
17173
|
|
|
17173
|
|
271164
|
my ($self, $utf8) = @_; |
361
|
|
|
|
|
|
|
|
362
|
17173
|
|
|
|
|
33894
|
foreach my $ccs (@{$self->{CCS}}) { |
|
17173
|
|
|
|
|
42006
|
|
363
|
63509
|
100
|
|
|
|
225942
|
next if $ccs->{dec_only}; |
364
|
|
|
|
|
|
|
|
365
|
56574
|
|
|
|
|
70721
|
my $conv; |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# CCS with single-shift should encode runs as short as possible. |
368
|
|
|
|
|
|
|
# By now we support mapping from Unicode sequence up to 2 characters. |
369
|
56574
|
100
|
|
|
|
143823
|
if (defined $ccs->{ss}) { # empty value is allowed |
370
|
5871
|
|
50
|
|
|
22512
|
my $bytes = $ccs->{bytes} || 1; |
371
|
5871
|
|
|
|
|
407149
|
my $mc = substr($utf8, 0, 2); |
372
|
5871
|
|
|
|
|
46782
|
$conv = $ccs->{encoding}->encode($mc, $FB_QUIET); |
373
|
5871
|
100
|
|
|
|
20234
|
if ($bytes < length $conv) { |
|
|
100
|
|
|
|
|
|
374
|
13
|
|
|
|
|
27
|
$mc = substr($utf8, 0, 1); |
375
|
13
|
|
|
|
|
45
|
$conv = $ccs->{encoding}->encode($mc, $FB_QUIET); |
376
|
13
|
50
|
|
|
|
29
|
if (length $conv) { |
377
|
13
|
|
|
|
|
42
|
substr($utf8, 0, 1) = ''; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
} elsif (length $conv == $bytes) { |
380
|
3
|
|
|
|
|
14
|
substr($utf8, 0, 2) = ''; |
381
|
3
|
|
|
|
|
140
|
$utf8 = $mc . $utf8; |
382
|
|
|
|
|
|
|
} else { |
383
|
5855
|
|
|
|
|
12875
|
undef $conv; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
} else { |
386
|
50703
|
|
|
|
|
1079012
|
$conv = $ccs->{encoding}->encode($utf8, $FB_QUIET); |
387
|
|
|
|
|
|
|
} |
388
|
56574
|
100
|
100
|
|
|
400157
|
if (defined $conv and length $conv) { |
389
|
17173
|
|
|
|
|
347481
|
$_[1] = $utf8; |
390
|
17173
|
|
|
|
|
73262
|
return $self->designate($ccs) . $self->invoke($ccs, $conv); |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
} |
393
|
0
|
|
|
|
|
0
|
return undef; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
sub init_state { |
397
|
2155
|
|
|
2155
|
0
|
4075
|
my ($self, $reset) = @_; |
398
|
|
|
|
|
|
|
|
399
|
2155
|
100
|
|
|
|
4690
|
if ($reset) { |
400
|
2154
|
|
|
|
|
2344
|
foreach my $ccs (@{$self->{CCS}}) { |
|
2154
|
|
|
|
|
8232
|
|
401
|
21540
|
|
|
|
|
33061
|
delete $ccs->{_designated_to}; |
402
|
21540
|
|
|
|
|
40073
|
delete $ccs->{_invoked_to}; |
403
|
|
|
|
|
|
|
} |
404
|
2154
|
|
|
|
|
15377
|
delete $self->{_state}; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
2155
|
|
|
|
|
3049
|
my $ret = ''; |
408
|
2155
|
|
|
|
|
3116
|
foreach my $ccs (grep { $_->{g_init} } @{$self->{CCS}}) { |
|
21550
|
|
|
|
|
46198
|
|
|
2155
|
|
|
|
|
4530
|
|
409
|
2155
|
|
|
|
|
4830
|
$ret .= $self->designate($ccs); |
410
|
|
|
|
|
|
|
} |
411
|
2155
|
|
|
|
|
3992
|
return $ret; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
sub designate { |
415
|
36480
|
|
|
36480
|
0
|
53953
|
my ($self, $ccs) = @_; |
416
|
|
|
|
|
|
|
|
417
|
36480
|
|
66
|
|
|
122167
|
my $g = $ccs->{g} || $ccs->{g_init}; |
418
|
36480
|
50
|
|
|
|
79560
|
croak sprintf 'Cannot designate %s', $ccs->{encoding}->name |
419
|
|
|
|
|
|
|
unless $g; |
420
|
36480
|
|
|
|
|
66371
|
my $g_seq = $ccs->{g_seq}; |
421
|
|
|
|
|
|
|
|
422
|
36480
|
|
|
|
|
53250
|
my @ccs; |
423
|
36480
|
50
|
|
|
|
60838
|
if ($g_seq) { # explicit designation |
424
|
364800
|
50
|
|
|
|
1856992
|
@ccs = grep { |
425
|
36480
|
|
|
|
|
84773
|
$_->{g_seq} and $_->{g_seq} eq $g_seq |
426
|
36480
|
|
|
|
|
47855
|
} @{$self->{CCS}}; |
427
|
|
|
|
|
|
|
} else { # static designation |
428
|
0
|
0
|
0
|
|
|
0
|
@ccs = grep { |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
429
|
0
|
|
|
|
|
0
|
not $_->{g_seq} and |
430
|
|
|
|
|
|
|
($_->{g} and $_->{g} eq $g or $_->{g_init} and $_->{g_init} eq $g) |
431
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}}; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
# Already designated: do nothing |
434
|
36480
|
|
66
|
|
|
169428
|
return '' |
435
|
|
|
|
|
|
|
unless grep { |
436
|
36480
|
100
|
|
|
|
54567
|
not ($_->{_designated_to} and $_->{_designated_to} eq $g) |
437
|
|
|
|
|
|
|
} @ccs; |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# modify designation |
440
|
36458
|
100
|
|
|
|
37832
|
foreach my $ccs (@{$self->{_state}->{$g} || []}) { |
|
36458
|
|
|
|
|
139979
|
|
441
|
34296
|
|
|
|
|
81557
|
delete $ccs->{_designated_to}; |
442
|
34296
|
|
|
|
|
97321
|
delete $ccs->{_invoked_to}; |
443
|
|
|
|
|
|
|
} |
444
|
36458
|
|
|
|
|
150979
|
my %invoked = (gr => [], gl => []); |
445
|
36458
|
|
|
|
|
56142
|
foreach my $ccs (@ccs) { |
446
|
36458
|
|
|
|
|
89796
|
$ccs->{_designated_to} = $g; |
447
|
36458
|
100
|
66
|
|
|
203125
|
unless ($ccs->{ls} or $ccs->{ss}) { |
448
|
36450
|
50
|
|
|
|
77901
|
my $i = $ccs->{gr} ? 'gr' : 'gl'; |
449
|
|
|
|
|
|
|
|
450
|
36450
|
|
|
|
|
57891
|
$ccs->{_invoked_to} = $i; |
451
|
36450
|
|
|
|
|
54762
|
push @{$invoked{$i}}, $ccs; |
|
36450
|
|
|
|
|
175476
|
|
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# modify invokation |
456
|
36458
|
|
|
|
|
139024
|
foreach my $i (qw/gr gl/) { |
457
|
72916
|
50
|
|
|
|
68227
|
next unless @{$invoked{$i} || []}; |
|
72916
|
100
|
|
|
|
339657
|
|
458
|
|
|
|
|
|
|
|
459
|
36450
|
100
|
|
|
|
45081
|
foreach my $ccs (@{$self->{_state}->{$i} || []}) { |
|
36450
|
|
|
|
|
159247
|
|
460
|
34296
|
|
|
|
|
110832
|
delete $ccs->{_invoked_to}; |
461
|
|
|
|
|
|
|
} |
462
|
36450
|
|
|
|
|
138020
|
$self->{_state}->{$i} = $invoked{$i}; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
36458
|
|
|
|
|
103300
|
$self->{_state}->{$g} = [@ccs]; |
466
|
36458
|
|
50
|
|
|
247406
|
return $g_seq || ''; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
sub invoke { |
470
|
17173
|
|
|
17173
|
0
|
57766
|
my ($self, $ccs, $str) = @_; |
471
|
17173
|
50
|
|
|
|
51101
|
$str = '' unless defined $str; |
472
|
|
|
|
|
|
|
|
473
|
17173
|
50
|
|
|
|
38018
|
my $i = $ccs->{gr} ? 'gr' : 'gl'; |
474
|
|
|
|
|
|
|
|
475
|
17173
|
50
|
|
|
|
75590
|
if ($i eq 'gr') { |
476
|
0
|
|
|
|
|
0
|
$str =~ tr/\x20-\x7F/\xA0-\xFF/; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
17173
|
100
|
|
|
|
79552
|
if ($ccs->{ss}) { |
|
|
50
|
|
|
|
|
|
480
|
16
|
|
|
|
|
21
|
my $out = ''; |
481
|
16
|
|
|
|
|
37
|
while (length $str) { |
482
|
16
|
|
50
|
|
|
98
|
$out .= $ccs->{ss} . substr($str, 0, ($ccs->{bytes} || 1), ''); |
483
|
|
|
|
|
|
|
} |
484
|
16
|
|
|
|
|
379
|
return $out; |
485
|
|
|
|
|
|
|
} elsif ($ccs->{ls}) { |
486
|
0
|
|
|
|
|
0
|
my $ls = $ccs->{ls}; |
487
|
0
|
|
|
|
|
0
|
my $g_seq = $ccs->{g_seq}; |
488
|
0
|
|
0
|
|
|
0
|
my $g = $ccs->{g} || $ccs->{g_init}; |
489
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
0
|
my @ccs; |
491
|
0
|
0
|
|
|
|
0
|
if ($g_seq) { |
492
|
0
|
0
|
0
|
|
|
0
|
@ccs = grep { |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
493
|
0
|
|
|
|
|
0
|
$_->{g_seq} and $_->{g_seq} eq $g_seq and |
494
|
|
|
|
|
|
|
$_->{ls} and $_->{ls} eq $ls and |
495
|
|
|
|
|
|
|
($_->{gr} ? 'gr' : 'gl') eq $i |
496
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}}; |
497
|
|
|
|
|
|
|
} else { |
498
|
0
|
0
|
0
|
|
|
0
|
@ccs = grep { |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
499
|
0
|
|
|
|
|
0
|
not $_->{g_seq} and ($_->{g} || $_->{g_init}) eq $g and |
500
|
|
|
|
|
|
|
$_->{ls} and $_->{ls} eq $ls and |
501
|
|
|
|
|
|
|
($_->{gr} ? 'gr' : 'gl') eq $i |
502
|
0
|
|
|
|
|
0
|
} @{$self->{CCS}}; |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
# Already invoked: add nothing |
505
|
0
|
|
0
|
|
|
0
|
return $str |
506
|
|
|
|
|
|
|
unless grep { |
507
|
0
|
0
|
|
|
|
0
|
not ($_->{_invoked_to} and $_->{_invoked_to} eq $i) |
508
|
|
|
|
|
|
|
} @ccs; |
509
|
|
|
|
|
|
|
|
510
|
0
|
0
|
|
|
|
0
|
foreach my $ccs (@{$self->{_state}->{$i} || []}) { |
|
0
|
|
|
|
|
0
|
|
511
|
0
|
|
|
|
|
0
|
delete $ccs->{_invoked_to}; |
512
|
|
|
|
|
|
|
} |
513
|
0
|
|
|
|
|
0
|
foreach my $ccs (@ccs) { |
514
|
0
|
|
|
|
|
0
|
$ccs->{_invoked_to} = $i; |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
0
|
|
|
|
|
0
|
$self->{_state}->{$i} = [@ccs]; |
518
|
0
|
|
|
|
|
0
|
return $ccs->{ls} . $str; |
519
|
|
|
|
|
|
|
} else { |
520
|
17157
|
|
|
|
|
130895
|
return $str; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
# renew method |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
sub renew { |
527
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
528
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
|
my $clone = bless { map { _renew($_) } %$self } => ref($self); |
|
0
|
|
|
|
|
|
|
530
|
0
|
|
|
|
|
|
$clone->{renewed}++; |
531
|
0
|
|
|
|
|
|
return $clone; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sub _renew { |
535
|
0
|
|
|
0
|
|
|
my $item = shift; |
536
|
|
|
|
|
|
|
|
537
|
0
|
0
|
0
|
|
|
|
if (ref $item eq 'HASH') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
538
|
0
|
|
|
|
|
|
return { map { _renew($_) } %$item }; |
|
0
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
} elsif (ref $item eq 'ARRAY') { |
540
|
0
|
|
|
|
|
|
return [ map { _renew($_) } @$item ]; |
|
0
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
} elsif (ref $item and $item->can("renew")) { |
542
|
0
|
|
|
|
|
|
return $item->renew; |
543
|
|
|
|
|
|
|
} else { |
544
|
0
|
|
|
|
|
|
return $item; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# Miscelaneous |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub mime_name { |
551
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
552
|
0
|
0
|
|
|
|
|
return undef if $self->{Name} =~ /^x/i; |
553
|
0
|
|
|
|
|
|
return uc($self->{Name}); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
1; |
557
|
|
|
|
|
|
|
__END__ |