line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#-*- perl -*- |
2
|
|
|
|
|
|
|
#-*- coding: us-ascii -*- |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Encode::ShiftJIS2004; |
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
25689
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
83
|
|
7
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
60
|
|
8
|
2
|
|
|
2
|
|
10
|
use base qw(Encode::Encoding); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
1290
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
10
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
12943
|
use Carp qw(carp croak); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
145
|
|
12
|
2
|
|
|
2
|
|
597
|
use Encode::JISX0213::CCS; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
2891
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
my $err_encode_nomap = '"\x{%*v04X}" does not map to %s'; |
15
|
|
|
|
|
|
|
my $err_decode_nomap = '%s "\x%*v02X" does not map to Unicode'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $DIE_ON_ERR = Encode::DIE_ON_ERR(); |
18
|
|
|
|
|
|
|
my $FB_QUIET = Encode::FB_QUIET(); |
19
|
|
|
|
|
|
|
my $HTMLCREF = Encode::HTMLCREF(); |
20
|
|
|
|
|
|
|
my $LEAVE_SRC = Encode::LEAVE_SRC(); |
21
|
|
|
|
|
|
|
my $PERLQQ = Encode::PERLQQ(); |
22
|
|
|
|
|
|
|
my $RETURN_ON_ERR = Encode::RETURN_ON_ERR(); |
23
|
|
|
|
|
|
|
my $WARN_ON_ERR = Encode::WARN_ON_ERR(); |
24
|
|
|
|
|
|
|
my $XMLCREF = Encode::XMLCREF(); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my $name = 'shift_jis-2004'; |
27
|
|
|
|
|
|
|
Encode::define_alias(qr/\bshift.*jis.*2004$/, "\"$name\""); |
28
|
|
|
|
|
|
|
$Encode::Encoding{$name} = bless { |
29
|
|
|
|
|
|
|
Name => $name, |
30
|
|
|
|
|
|
|
encoding => $Encode::Encoding{'jis-x-0213-annex1'}, |
31
|
|
|
|
|
|
|
} => __PACKAGE__; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Workaround for encengine.c which cannot correctly map Unicode sequence |
34
|
|
|
|
|
|
|
# with multiple characters. |
35
|
|
|
|
|
|
|
my %composed = ( |
36
|
|
|
|
|
|
|
"\x{304B}\x{309A}" => "\x82\xF5", |
37
|
|
|
|
|
|
|
"\x{304D}\x{309A}" => "\x82\xF6", |
38
|
|
|
|
|
|
|
"\x{304F}\x{309A}" => "\x82\xF7", |
39
|
|
|
|
|
|
|
"\x{3051}\x{309A}" => "\x82\xF8", |
40
|
|
|
|
|
|
|
"\x{3053}\x{309A}" => "\x82\xF9", |
41
|
|
|
|
|
|
|
"\x{30AB}\x{309A}" => "\x83\x97", |
42
|
|
|
|
|
|
|
"\x{30AD}\x{309A}" => "\x83\x98", |
43
|
|
|
|
|
|
|
"\x{30AF}\x{309A}" => "\x83\x99", |
44
|
|
|
|
|
|
|
"\x{30B1}\x{309A}" => "\x83\x9A", |
45
|
|
|
|
|
|
|
"\x{30B3}\x{309A}" => "\x83\x9B", |
46
|
|
|
|
|
|
|
"\x{30BB}\x{309A}" => "\x83\x9C", |
47
|
|
|
|
|
|
|
"\x{30C4}\x{309A}" => "\x83\x9D", |
48
|
|
|
|
|
|
|
"\x{30C8}\x{309A}" => "\x83\x9E", |
49
|
|
|
|
|
|
|
"\x{31F7}\x{309A}" => "\x83\xF6", |
50
|
|
|
|
|
|
|
"\x{00E6}\x{0300}" => "\x86\x63", |
51
|
|
|
|
|
|
|
"\x{0254}\x{0300}" => "\x86\x67", |
52
|
|
|
|
|
|
|
"\x{0254}\x{0301}" => "\x86\x68", |
53
|
|
|
|
|
|
|
"\x{028C}\x{0300}" => "\x86\x69", |
54
|
|
|
|
|
|
|
"\x{028C}\x{0301}" => "\x86\x6A", |
55
|
|
|
|
|
|
|
"\x{0259}\x{0300}" => "\x86\x6B", |
56
|
|
|
|
|
|
|
"\x{0259}\x{0301}" => "\x86\x6C", |
57
|
|
|
|
|
|
|
"\x{025A}\x{0300}" => "\x86\x6D", |
58
|
|
|
|
|
|
|
"\x{025A}\x{0301}" => "\x86\x6E", |
59
|
|
|
|
|
|
|
"\x{0301}" => "\x86\x79", |
60
|
|
|
|
|
|
|
"\x{0300}" => "\x86\x7B", |
61
|
|
|
|
|
|
|
"\x{02E5}" => "\x86\x80", |
62
|
|
|
|
|
|
|
"\x{02E9}" => "\x86\x84", |
63
|
|
|
|
|
|
|
"\x{02E9}\x{02E5}" => "\x86\x85", |
64
|
|
|
|
|
|
|
"\x{02E5}\x{02E9}" => "\x86\x86", |
65
|
|
|
|
|
|
|
); |
66
|
|
|
|
|
|
|
my $composed_re = join '|', reverse sort keys %composed; |
67
|
|
|
|
|
|
|
my $regexp = qr{\A (.*?) ($composed_re | \z)}osx; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# substitution cacharcter for multibyte. |
70
|
|
|
|
|
|
|
my $subChar = "\x81\xAC"; # GETA MARK |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub encode { |
73
|
2
|
|
|
2
|
1
|
172398
|
my ($self, $utf8, $chk) = @_; |
74
|
2
|
|
100
|
|
|
15
|
$chk ||= 0; |
75
|
|
|
|
|
|
|
|
76
|
2
|
|
|
|
|
3
|
my $chk_sub; |
77
|
2
|
50
|
|
|
|
9
|
if (ref $chk eq 'CODE') { |
78
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
79
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
2
|
|
|
|
|
5
|
my $str = ''; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
CHUNKS: |
85
|
2
|
|
|
|
|
12
|
while ($utf8 =~ /./os) { |
86
|
2
|
|
|
|
|
653
|
while ($utf8 =~ s/$regexp//) { |
87
|
33
|
|
|
|
|
127
|
my ($chunk, $mc) = ($1, $2); |
88
|
33
|
100
|
100
|
|
|
139
|
last CHUNKS unless $chunk =~ /./os or $mc =~ /./os; |
89
|
|
|
|
|
|
|
|
90
|
31
|
100
|
|
|
|
87
|
if ($chunk =~ /./os) { |
91
|
11
|
|
|
|
|
2259
|
$str .= $self->{encoding}->encode($chunk, $FB_QUIET); |
92
|
|
|
|
|
|
|
} |
93
|
31
|
50
|
|
|
|
67
|
if ($chunk =~ /./os) { |
94
|
0
|
|
|
|
|
0
|
$utf8 = $chunk . $mc . $utf8; |
95
|
0
|
|
|
|
|
0
|
last; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
31
|
100
|
|
|
|
95
|
if ($mc =~ /./os) { |
99
|
29
|
|
|
|
|
4042
|
$str .= $composed{$mc}; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
0
|
my $errChar = substr($utf8, 0, 1); |
104
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
105
|
0
|
|
|
|
|
0
|
croak sprintf $err_encode_nomap, '}\x{', $errChar, $self->{Name}; |
106
|
|
|
|
|
|
|
} |
107
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
108
|
0
|
|
|
|
|
0
|
carp sprintf $err_encode_nomap, '}\x{', $errChar, $self->{Name}; |
109
|
|
|
|
|
|
|
} |
110
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
111
|
0
|
|
|
|
|
0
|
last; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
# PERLQQ won't be suported to avoid ambiguity of "\x5C". |
114
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
115
|
0
|
|
|
|
|
0
|
$str .= $chk_sub->(ord $errChar); |
116
|
|
|
|
|
|
|
} elsif ($chk & $XMLCREF) { |
117
|
0
|
|
|
|
|
0
|
$str .= sprintf '%04X;', ord $errChar; |
118
|
|
|
|
|
|
|
} elsif ($chk & $HTMLCREF) { |
119
|
0
|
|
|
|
|
0
|
$str .= sprintf '%d;', ord $errChar; |
120
|
|
|
|
|
|
|
} else { |
121
|
0
|
|
|
|
|
0
|
$str .= $subChar; |
122
|
|
|
|
|
|
|
} |
123
|
0
|
|
|
|
|
0
|
substr($utf8, 0, 1) = ''; |
124
|
|
|
|
|
|
|
} # CHUNKS |
125
|
|
|
|
|
|
|
|
126
|
2
|
50
|
|
|
|
15
|
$_[1] = $utf8 unless $chk & $LEAVE_SRC; |
127
|
2
|
|
|
|
|
13
|
return $str; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub decode { |
131
|
2
|
|
|
2
|
1
|
664
|
my ($self, $str, $chk) = @_; |
132
|
|
|
|
|
|
|
|
133
|
2
|
|
|
|
|
4
|
my $chk_sub; |
134
|
2
|
50
|
|
|
|
9
|
if (ref $chk eq 'CODE') { |
135
|
0
|
|
|
|
|
0
|
$chk_sub = $chk; |
136
|
0
|
|
|
|
|
0
|
$chk = $PERLQQ | $LEAVE_SRC; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
2
|
|
|
|
|
8
|
my $utf8 = ''; |
140
|
|
|
|
|
|
|
|
141
|
2
|
|
|
|
|
8
|
while (length $str) { |
142
|
2
|
|
|
|
|
1250
|
$utf8 .= $self->{encoding}->decode($str, $FB_QUIET); |
143
|
2
|
50
|
|
|
|
14
|
last unless length $str; |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
0
|
my $errChar; |
146
|
0
|
0
|
|
|
|
0
|
if ($str =~ /^([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])/) { |
147
|
0
|
|
|
|
|
0
|
$errChar = $1; |
148
|
|
|
|
|
|
|
} else { |
149
|
0
|
|
|
|
|
0
|
$errChar = substr($str, 0, 1); |
150
|
|
|
|
|
|
|
} |
151
|
0
|
0
|
|
|
|
0
|
if ($chk & $DIE_ON_ERR) { |
152
|
0
|
|
|
|
|
0
|
croak sprintf $err_decode_nomap, $self->{Name}, '\x', $errChar; |
153
|
|
|
|
|
|
|
} |
154
|
0
|
0
|
|
|
|
0
|
if ($chk & $WARN_ON_ERR) { |
155
|
0
|
|
|
|
|
0
|
carp sprintf $err_decode_nomap, $self->{Name}, '\x', $errChar; |
156
|
|
|
|
|
|
|
} |
157
|
0
|
0
|
|
|
|
0
|
if ($chk & $RETURN_ON_ERR) { |
158
|
0
|
|
|
|
|
0
|
last; |
159
|
|
|
|
|
|
|
} |
160
|
0
|
|
|
|
|
0
|
substr($str, 0, length $errChar) = ''; |
161
|
|
|
|
|
|
|
|
162
|
0
|
0
|
|
|
|
0
|
if ($chk_sub) { |
|
|
0
|
|
|
|
|
|
163
|
0
|
|
|
|
|
0
|
$utf8 .= join '', map { $chk_sub->(ord $_) } split //, $errChar; |
|
0
|
|
|
|
|
0
|
|
164
|
|
|
|
|
|
|
} elsif ($chk & $PERLQQ) { |
165
|
0
|
|
|
|
|
0
|
$utf8 .= sprintf '\x%*v02X', '\x', $errChar; |
166
|
|
|
|
|
|
|
} else { |
167
|
0
|
|
|
|
|
0
|
$utf8 .= '\x{FFFD}'; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
2
|
50
|
|
|
|
9
|
$_[1] = $str unless $chk & $LEAVE_SRC; |
171
|
2
|
|
|
|
|
7
|
return $utf8; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
0
|
|
|
0
|
1
|
|
sub mime_name { uc(shift->{Name}) } |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
1; |
177
|
|
|
|
|
|
|
__END__ |