| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#-*- perl -*- |
|
2
|
|
|
|
|
|
|
#-*- coding: us-ascii -*- |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Encode::ShiftJIS2004; |
|
5
|
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
21393
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
117
|
|
|
7
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
177
|
|
|
8
|
2
|
|
|
2
|
|
12
|
use base qw(Encode::Encoding); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
1402
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
10404
|
use Carp qw(carp croak); |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
172
|
|
|
12
|
2
|
|
|
2
|
|
445
|
use Encode::JISX0213::CCS; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
3340
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
my $err_encode_nomap = '"\x{%*v04X}" does not map to %s'; |
|
15
|
|
|
|
|
|
|
my $err_decode_nomap = '%s "\x%*v02X" does not map to Unicode'; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $DIE_ON_ERR = Encode::DIE_ON_ERR(); |
|
18
|
|
|
|
|
|
|
my $FB_QUIET = Encode::FB_QUIET(); |
|
19
|
|
|
|
|
|
|
my $HTMLCREF = Encode::HTMLCREF(); |
|
20
|
|
|
|
|
|
|
my $LEAVE_SRC = Encode::LEAVE_SRC(); |
|
21
|
|
|
|
|
|
|
my $PERLQQ = Encode::PERLQQ(); |
|
22
|
|
|
|
|
|
|
my $RETURN_ON_ERR = Encode::RETURN_ON_ERR(); |
|
23
|
|
|
|
|
|
|
my $WARN_ON_ERR = Encode::WARN_ON_ERR(); |
|
24
|
|
|
|
|
|
|
my $XMLCREF = Encode::XMLCREF(); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $name = 'shift_jis-2004'; |
|
27
|
|
|
|
|
|
|
Encode::define_alias(qr/\bshift.*jis.*2004$/, "\"$name\""); |
|
28
|
|
|
|
|
|
|
$Encode::Encoding{$name} = bless { |
|
29
|
|
|
|
|
|
|
Name => $name, |
|
30
|
|
|
|
|
|
|
encoding => $Encode::Encoding{'jis-x-0213-annex1'}, |
|
31
|
|
|
|
|
|
|
} => __PACKAGE__; |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Workaround for encengine.c which cannot correctly map Unicode sequence |
|
34
|
|
|
|
|
|
|
# with multiple characters. |
|
35
|
|
|
|
|
|
|
my %composed = ( |
|
36
|
|
|
|
|
|
|
"\x{304B}\x{309A}" => "\x82\xF5", |
|
37
|
|
|
|
|
|
|
"\x{304D}\x{309A}" => "\x82\xF6", |
|
38
|
|
|
|
|
|
|
"\x{304F}\x{309A}" => "\x82\xF7", |
|
39
|
|
|
|
|
|
|
"\x{3051}\x{309A}" => "\x82\xF8", |
|
40
|
|
|
|
|
|
|
"\x{3053}\x{309A}" => "\x82\xF9", |
|
41
|
|
|
|
|
|
|
"\x{30AB}\x{309A}" => "\x83\x97", |
|
42
|
|
|
|
|
|
|
"\x{30AD}\x{309A}" => "\x83\x98", |
|
43
|
|
|
|
|
|
|
"\x{30AF}\x{309A}" => "\x83\x99", |
|
44
|
|
|
|
|
|
|
"\x{30B1}\x{309A}" => "\x83\x9A", |
|
45
|
|
|
|
|
|
|
"\x{30B3}\x{309A}" => "\x83\x9B", |
|
46
|
|
|
|
|
|
|
"\x{30BB}\x{309A}" => "\x83\x9C", |
|
47
|
|
|
|
|
|
|
"\x{30C4}\x{309A}" => "\x83\x9D", |
|
48
|
|
|
|
|
|
|
"\x{30C8}\x{309A}" => "\x83\x9E", |
|
49
|
|
|
|
|
|
|
"\x{31F7}\x{309A}" => "\x83\xF6", |
|
50
|
|
|
|
|
|
|
"\x{00E6}\x{0300}" => "\x86\x63", |
|
51
|
|
|
|
|
|
|
"\x{0254}\x{0300}" => "\x86\x67", |
|
52
|
|
|
|
|
|
|
"\x{0254}\x{0301}" => "\x86\x68", |
|
53
|
|
|
|
|
|
|
"\x{028C}\x{0300}" => "\x86\x69", |
|
54
|
|
|
|
|
|
|
"\x{028C}\x{0301}" => "\x86\x6A", |
|
55
|
|
|
|
|
|
|
"\x{0259}\x{0300}" => "\x86\x6B", |
|
56
|
|
|
|
|
|
|
"\x{0259}\x{0301}" => "\x86\x6C", |
|
57
|
|
|
|
|
|
|
"\x{025A}\x{0300}" => "\x86\x6D", |
|
58
|
|
|
|
|
|
|
"\x{025A}\x{0301}" => "\x86\x6E", |
|
59
|
|
|
|
|
|
|
"\x{0301}" => "\x86\x79", |
|
60
|
|
|
|
|
|
|
"\x{0300}" => "\x86\x7B", |
|
61
|
|
|
|
|
|
|
"\x{02E5}" => "\x86\x80", |
|
62
|
|
|
|
|
|
|
"\x{02E9}" => "\x86\x84", |
|
63
|
|
|
|
|
|
|
"\x{02E9}\x{02E5}" => "\x86\x85", |
|
64
|
|
|
|
|
|
|
"\x{02E5}\x{02E9}" => "\x86\x86", |
|
65
|
|
|
|
|
|
|
); |
|
66
|
|
|
|
|
|
|
my $composed_re = join '|', reverse sort keys %composed; |
|
67
|
|
|
|
|
|
|
my $regexp = qr{\A (.*?) ($composed_re | \z)}osx; |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# substitution cacharcter for multibyte. |
|
70
|
|
|
|
|
|
|
my $subChar = "\x81\xAC"; # GETA MARK |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub encode { |
|
73
|
2
|
|
|
2
|
1
|
112528
|
my ($self, $utf8, $chk) = @_; |
|
74
|
2
|
|
100
|
|
|
13
|
$chk ||= 0; |
|
75
|
|
|
|
|
|
|
|
|
76
|
2
|
|
|
|
|
3
|
my $chk_sub; |
|
77
|
2
|
50
|
|
|
|
9
|
if (ref $chk eq 'CODE') { |
|
78
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
|
79
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
2
|
|
|
|
|
3
|
my $str = ''; |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
CHUNKS: |
|
85
|
2
|
|
|
|
|
12
|
while ($utf8 =~ /./os) { |
|
86
|
2
|
|
|
|
|
477
|
while ($utf8 =~ s/$regexp//) { |
|
87
|
33
|
|
|
|
|
108
|
my ($chunk, $mc) = ($1, $2); |
|
88
|
33
|
100
|
100
|
|
|
122
|
last CHUNKS unless $chunk =~ /./os or $mc =~ /./os; |
|
89
|
|
|
|
|
|
|
|
|
90
|
31
|
100
|
|
|
|
55
|
if ($chunk =~ /./os) { |
|
91
|
11
|
|
|
|
|
1686
|
$str .= $self->{encoding}->encode($chunk, $FB_QUIET); |
|
92
|
|
|
|
|
|
|
} |
|
93
|
31
|
50
|
|
|
|
55
|
if ($chunk =~ /./os) { |
|
94
|
0
|
|
|
|
|
0
|
$utf8 = $chunk . $mc . $utf8; |
|
95
|
0
|
|
|
|
|
0
|
last; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
31
|
100
|
|
|
|
80
|
if ($mc =~ /./os) { |
|
99
|
29
|
|
|
|
|
3109
|
$str .= $composed{$mc}; |
|
100
|
|
|
|
|
|
|
} |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
0
|
my $errChar = substr($utf8, 0, 1); |
|
104
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
|
105
|
0
|
|
|
|
|
0
|
croak sprintf $err_encode_nomap, '}\x{', $errChar, $self->{Name}; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
|
108
|
0
|
|
|
|
|
0
|
carp sprintf $err_encode_nomap, '}\x{', $errChar, $self->{Name}; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
|
111
|
0
|
|
|
|
|
0
|
last; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
# PERLQQ won't be suported to avoid ambiguity of "\x5C". |
|
114
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
115
|
0
|
|
|
|
|
0
|
$str .= $chk_sub->(ord $errChar); |
|
116
|
|
|
|
|
|
|
} elsif ($chk & $XMLCREF) { |
|
117
|
0
|
|
|
|
|
0
|
$str .= sprintf '%04X;', ord $errChar; |
|
118
|
|
|
|
|
|
|
} elsif ($chk & $HTMLCREF) { |
|
119
|
0
|
|
|
|
|
0
|
$str .= sprintf '%d;', ord $errChar; |
|
120
|
|
|
|
|
|
|
} else { |
|
121
|
0
|
|
|
|
|
0
|
$str .= $subChar; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
0
|
|
|
|
|
0
|
substr($utf8, 0, 1) = ''; |
|
124
|
|
|
|
|
|
|
} # CHUNKS |
|
125
|
|
|
|
|
|
|
|
|
126
|
2
|
50
|
|
|
|
15
|
$_[1] = $utf8 unless $chk & $LEAVE_SRC; |
|
127
|
2
|
|
|
|
|
12
|
return $str; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub decode { |
|
131
|
2
|
|
|
2
|
1
|
711
|
my ($self, $str, $chk) = @_; |
|
132
|
|
|
|
|
|
|
|
|
133
|
2
|
|
|
|
|
4
|
my $chk_sub; |
|
134
|
2
|
50
|
|
|
|
8
|
if (ref $chk eq 'CODE') { |
|
135
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
|
136
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
2
|
|
|
|
|
4
|
my $utf8 = ''; |
|
140
|
|
|
|
|
|
|
|
|
141
|
2
|
|
|
|
|
7
|
while (length $str) { |
|
142
|
2
|
|
|
|
|
1077
|
$utf8 .= $self->{encoding}->decode($str, $FB_QUIET); |
|
143
|
2
|
50
|
|
|
|
21
|
last unless length $str; |
|
144
|
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
0
|
my $errChar; |
|
146
|
0
|
0
|
|
|
|
0
|
if ($str =~ /^([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])/) { |
|
147
|
0
|
|
|
|
|
0
|
$errChar = $1; |
|
148
|
|
|
|
|
|
|
} else { |
|
149
|
0
|
|
|
|
|
0
|
$errChar = substr($str, 0, 1); |
|
150
|
|
|
|
|
|
|
} |
|
151
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
|
152
|
0
|
|
|
|
|
0
|
croak sprintf $err_decode_nomap, $self->{Name}, '\x', $errChar; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
|
155
|
0
|
|
|
|
|
0
|
carp sprintf $err_decode_nomap, $self->{Name}, '\x', $errChar; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
|
158
|
0
|
|
|
|
|
0
|
last; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
0
|
|
|
|
|
0
|
substr($str, 0, length $errChar) = ''; |
|
161
|
|
|
|
|
|
|
|
|
162
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
|
0
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
0
|
$utf8 .= join '', map { $chk_sub->(ord $_) } split //, $errChar; |
|
|
0
|
|
|
|
|
0
|
|
|
164
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
|
165
|
0
|
|
|
|
|
0
|
$utf8 .= sprintf '\x%*v02X', '\x', $errChar; |
|
166
|
|
|
|
|
|
|
} else { |
|
167
|
0
|
|
|
|
|
0
|
$utf8 .= '\x{FFFD}'; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
} |
|
170
|
2
|
50
|
|
|
|
10
|
$_[1] = $str unless $chk & $LEAVE_SRC; |
|
171
|
2
|
|
|
|
|
9
|
return $utf8; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
0
|
|
|
0
|
1
|
|
sub mime_name { uc(shift->{Name}) } |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
1; |
|
177
|
|
|
|
|
|
|
__END__ |