line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Shinobi; |
2
|
6
|
|
|
6
|
|
76383
|
use 5.010001; |
|
6
|
|
|
|
|
15
|
|
3
|
6
|
|
|
6
|
|
572
|
use utf8; |
|
6
|
|
|
|
|
14
|
|
|
6
|
|
|
|
|
26
|
|
4
|
6
|
|
|
6
|
|
122
|
use strict; |
|
6
|
|
|
|
|
9
|
|
|
6
|
|
|
|
|
129
|
|
5
|
6
|
|
|
6
|
|
19
|
use warnings; |
|
6
|
|
|
|
|
5
|
|
|
6
|
|
|
|
|
292
|
|
6
|
|
|
|
|
|
|
our $VERSION = "0.01"; |
7
|
|
|
|
|
|
|
|
8
|
6
|
|
|
6
|
|
22
|
use Exporter 'import'; |
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
200
|
|
9
|
6
|
|
|
6
|
|
3291
|
use Unicode::Normalize qw/NFD NFC/; |
|
6
|
|
|
|
|
974011
|
|
|
6
|
|
|
|
|
882
|
|
10
|
6
|
|
|
6
|
|
3716
|
use Lingua::JA::Regular::Unicode; |
|
6
|
|
|
|
|
62899
|
|
|
6
|
|
|
|
|
677
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our @EXPORT_OK = qw/shinobi/; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use constant { |
15
|
6
|
|
|
|
|
7126
|
DUO => 1 << 0, |
16
|
|
|
|
|
|
|
MONO => 1 << 1, |
17
|
|
|
|
|
|
|
JIS => 1 << 2, # JIS X 0208 + JIS X 0212 OR JIS X 0213 |
18
|
|
|
|
|
|
|
UTF8MB3 => 1 << 3, # as utf-8 encoding |
19
|
|
|
|
|
|
|
Y2016 => 1 << 10, # almost viewable in 2016 (Mac10.11, Win10, iOS9, Andoid5 |
20
|
6
|
|
|
6
|
|
39
|
}; |
|
6
|
|
|
|
|
7
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $ENCODE = Y2016; # this version's default mask |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $map = [ |
25
|
|
|
|
|
|
|
{ char => 'い', code => "\x{682C}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
26
|
|
|
|
|
|
|
# ろ |
27
|
|
|
|
|
|
|
# は |
28
|
|
|
|
|
|
|
{ char => 'に', code => "\x{92AB}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
29
|
|
|
|
|
|
|
{ char => 'ほ', code => "\x{23D0A}", flag => MONO }, |
30
|
|
|
|
|
|
|
{ char => 'へ', code => "\x{2021C}", flag => MONO }, |
31
|
|
|
|
|
|
|
{ char => 'と', code => "\x{28246}", flag => MONO }, |
32
|
|
|
|
|
|
|
{ char => 'ち', code => "\x{68C8}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
33
|
|
|
|
|
|
|
# り |
34
|
|
|
|
|
|
|
{ char => 'ぬ', code => "\x{57E5}", flag => MONO | UTF8MB3 | Y2016 }, |
35
|
|
|
|
|
|
|
{ char => 'る', code => "\x{9306}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
36
|
|
|
|
|
|
|
{ char => 'を', code => "\x{6E05}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
37
|
|
|
|
|
|
|
{ char => 'わ', code => "\x{5029}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
38
|
|
|
|
|
|
|
# か |
39
|
|
|
|
|
|
|
{ char => 'よ', code => "\x{6A2A}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
40
|
|
|
|
|
|
|
{ char => 'た', code => "\x{71BF}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
41
|
|
|
|
|
|
|
{ char => 'れ', code => "\x{58B4}", flag => MONO | UTF8MB3 | Y2016 }, |
42
|
|
|
|
|
|
|
{ char => 'そ', code => "\x{9404}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
43
|
|
|
|
|
|
|
{ char => 'つ', code => "\x{6F62}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
44
|
|
|
|
|
|
|
{ char => 'ね', code => "\x{50D9}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
45
|
|
|
|
|
|
|
{ char => 'な', code => "\x{28287}", flag => MONO }, |
46
|
|
|
|
|
|
|
# ら |
47
|
|
|
|
|
|
|
{ char => 'む', code => "\x{7103}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
48
|
|
|
|
|
|
|
{ char => 'う', code => "\x{212FD}", flag => MONO | JIS | Y2016 }, |
49
|
|
|
|
|
|
|
{ char => 'ゐ', code => "\x{4932}", flag => MONO | UTF8MB3 | Y2016 }, |
50
|
|
|
|
|
|
|
{ char => 'の', code => "\x{6D7E}", flag => MONO | UTF8MB3 | Y2016 }, |
51
|
|
|
|
|
|
|
# お |
52
|
|
|
|
|
|
|
# く |
53
|
|
|
|
|
|
|
{ char => 'や', code => "\x{67CF}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
54
|
|
|
|
|
|
|
{ char => 'ま', code => "\x{241E2}", flag => MONO | Y2016 }, |
55
|
|
|
|
|
|
|
{ char => 'け', code => "\x{2129A}", flag => MONO }, |
56
|
|
|
|
|
|
|
{ char => 'ふ', code => "\x{9251}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
57
|
|
|
|
|
|
|
{ char => 'こ', code => "\x{6CCA}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
58
|
|
|
|
|
|
|
{ char => 'え', code => "\x{4F2F}", flag => MONO | UTF8MB3 | JIS | Y2016 }, |
59
|
|
|
|
|
|
|
# て |
60
|
|
|
|
|
|
|
{ char => 'あ', code => "\x{23638}", flag => MONO | JIS | Y2016 }, |
61
|
|
|
|
|
|
|
{ char => 'さ', code => "\x{3DF5}", flag => MONO | UTF8MB3 | Y2016 }, |
62
|
|
|
|
|
|
|
# き |
63
|
|
|
|
|
|
|
{ char => 'ゆ', code => "\x{28B46}", flag => MONO | Y2016 }, |
64
|
|
|
|
|
|
|
{ char => 'め', code => "\x{6F76}", flag => MONO | UTF8MB3 | Y2016 }, |
65
|
|
|
|
|
|
|
{ char => 'み', code => "\x{20381}", flag => MONO | JIS | Y2016 }, |
66
|
|
|
|
|
|
|
{ char => 'し', code => "\x{28282}", flag => MONO | JIS | Y2016 }, |
67
|
|
|
|
|
|
|
{ char => 'ゑ', code => "\x{6A74}", flag => MONO | UTF8MB3 | Y2016 }, |
68
|
|
|
|
|
|
|
# ひ |
69
|
|
|
|
|
|
|
# も |
70
|
|
|
|
|
|
|
# せ |
71
|
|
|
|
|
|
|
# す |
72
|
|
|
|
|
|
|
# ん |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
{ char => 'い', code => "\x{2F4A}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
75
|
|
|
|
|
|
|
{ char => 'ろ', code => "\x{2F55}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
76
|
|
|
|
|
|
|
{ char => 'は', code => "\x{2F1F}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
77
|
|
|
|
|
|
|
{ char => 'に', code => "\x{2FA6}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
78
|
|
|
|
|
|
|
{ char => 'ほ', code => "\x{6C35}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
79
|
|
|
|
|
|
|
{ char => 'へ', code => "\x{4EBB}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
80
|
|
|
|
|
|
|
{ char => 'と', code => "\x{2F9D}\x{2F8A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
81
|
|
|
|
|
|
|
{ char => 'ち', code => "\x{2F4A}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
82
|
|
|
|
|
|
|
{ char => 'り', code => "\x{2F55}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
83
|
|
|
|
|
|
|
{ char => 'ぬ', code => "\x{2F1F}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
84
|
|
|
|
|
|
|
{ char => 'る', code => "\x{2FA6}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
85
|
|
|
|
|
|
|
{ char => 'を', code => "\x{6C35}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
86
|
|
|
|
|
|
|
{ char => 'わ', code => "\x{4EBB}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
87
|
|
|
|
|
|
|
{ char => 'か', code => "\x{2F9D}\x{2ED8}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
88
|
|
|
|
|
|
|
{ char => 'よ', code => "\x{2F4A}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
89
|
|
|
|
|
|
|
{ char => 'た', code => "\x{2F55}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
90
|
|
|
|
|
|
|
{ char => 'れ', code => "\x{2F1F}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
91
|
|
|
|
|
|
|
{ char => 'そ', code => "\x{2FA6}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
92
|
|
|
|
|
|
|
{ char => 'つ', code => "\x{6C35}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
93
|
|
|
|
|
|
|
{ char => 'ね', code => "\x{4EBB}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
94
|
|
|
|
|
|
|
{ char => 'な', code => "\x{2F9D}\x{2EE9}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
95
|
|
|
|
|
|
|
{ char => 'ら', code => "\x{2F4A}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
96
|
|
|
|
|
|
|
{ char => 'む', code => "\x{2F55}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
97
|
|
|
|
|
|
|
{ char => 'う', code => "\x{2F1F}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
98
|
|
|
|
|
|
|
{ char => 'ゐ', code => "\x{2FA6}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
99
|
|
|
|
|
|
|
{ char => 'の', code => "\x{6C35}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
100
|
|
|
|
|
|
|
{ char => 'お', code => "\x{4EBB}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
101
|
|
|
|
|
|
|
{ char => 'く', code => "\x{2F9D}\x{2F9A}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
102
|
|
|
|
|
|
|
{ char => 'や', code => "\x{2F4A}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
103
|
|
|
|
|
|
|
{ char => 'ま', code => "\x{2F55}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
104
|
|
|
|
|
|
|
{ char => 'け', code => "\x{2F1F}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
105
|
|
|
|
|
|
|
{ char => 'ふ', code => "\x{2FA6}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
106
|
|
|
|
|
|
|
{ char => 'こ', code => "\x{6C35}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
107
|
|
|
|
|
|
|
{ char => 'え', code => "\x{4EBB}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
108
|
|
|
|
|
|
|
{ char => 'て', code => "\x{2F9D}\x{2F69}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
109
|
|
|
|
|
|
|
{ char => 'あ', code => "\x{2F4A}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
110
|
|
|
|
|
|
|
{ char => 'さ', code => "\x{2F55}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
111
|
|
|
|
|
|
|
{ char => 'き', code => "\x{2F1F}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
112
|
|
|
|
|
|
|
{ char => 'ゆ', code => "\x{2FA6}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
113
|
|
|
|
|
|
|
{ char => 'め', code => "\x{6C35}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
114
|
|
|
|
|
|
|
{ char => 'み', code => "\x{4EBB}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
115
|
|
|
|
|
|
|
{ char => 'し', code => "\x{2F9D}\x{9ED2}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
116
|
|
|
|
|
|
|
{ char => 'ゑ', code => "\x{2F4A}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
117
|
|
|
|
|
|
|
{ char => 'ひ', code => "\x{2F55}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
118
|
|
|
|
|
|
|
{ char => 'も', code => "\x{2F1F}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
119
|
|
|
|
|
|
|
{ char => 'せ', code => "\x{2FA6}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
120
|
|
|
|
|
|
|
{ char => 'す', code => "\x{6C35}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
121
|
|
|
|
|
|
|
{ char => 'ん', code => "\x{4EBB}\x{7D2B}", flag => DUO | UTF8MB3 | JIS | Y2016 }, |
122
|
|
|
|
|
|
|
]; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
my $encode = {}; |
125
|
|
|
|
|
|
|
my $decode = {}; |
126
|
|
|
|
|
|
|
my $decode_re = join '|', map { $_->{code} } reverse @$map; |
127
|
|
|
|
|
|
|
$decode_re = qr/($decode_re)/; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
for my $v (@$map) { |
130
|
|
|
|
|
|
|
my $list = $encode->{ $v->{char} } ||= []; |
131
|
|
|
|
|
|
|
push @$list, $v; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
$decode->{$v->{code}} = $v->{char}; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub _encode { |
137
|
314
|
|
|
314
|
|
268
|
my $char = shift; |
138
|
314
|
|
100
|
|
|
582
|
my $list = $encode->{$char} // []; |
139
|
314
|
|
|
|
|
280
|
for my $v (@$list) { |
140
|
225
|
100
|
|
|
|
309
|
if ($v->{flag} & $ENCODE) { |
141
|
168
|
|
|
|
|
129
|
$char = $v->{code}; |
142
|
168
|
|
|
|
|
118
|
last; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
314
|
|
|
|
|
428
|
$char; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub normalize { |
150
|
12
|
|
50
|
12
|
0
|
29
|
my $text = shift // ""; |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# decomposition for 濁点s |
153
|
12
|
|
|
|
|
76
|
$text =~ s/(\p{InHiragana}|\p{InKatakana})/NFD($1)/ge; |
|
185
|
|
|
|
|
2564
|
|
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# katakana to hiragana |
156
|
12
|
|
|
|
|
251
|
$text = katakana2hiragana(katakana_h2z($text)); |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# upper ぁぃぅぇぉっゃゅょゎゕゖㇾㇷㇶㇸㇲㇹㇱㇼㇳㇰㇿㇻㇺㇵㇽㇴ |
159
|
12
|
|
|
|
|
522
|
$text =~ tr[\x{3041}\x{3043}\x{3045}\x{3047}\x{3049}\x{3063}\x{3083}\x{3085}\x{3087}\x{308E}\x{3095}\x{3096}\x{31FE}\x{31F7}\x{31F6}\x{31F8}\x{31F2}\x{31F9}\x{31F1}\x{31FC}\x{31F3}\x{31F0}\x{31FF}\x{31FB}\x{31FA}\x{31F5}\x{31FD}\x{31F4}] |
160
|
|
|
|
|
|
|
[\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}\x{3064}\x{3084}\x{3086}\x{3088}\x{308F}\x{304B}\x{3051}\x{30EC}\x{30D5}\x{30D2}\x{30D8}\x{30B9}\x{30DB}\x{30B7}\x{30EA}\x{30C8}\x{30AF}\x{30ED}\x{30E9}\x{30E0}\x{30CF}\x{30EB}\x{30CC}]; |
161
|
|
|
|
|
|
|
|
162
|
12
|
|
|
|
|
23
|
$text; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub encode { |
166
|
10
|
|
|
10
|
1
|
3867
|
my $class = shift; |
167
|
10
|
|
50
|
|
|
24
|
my $text = shift // ""; |
168
|
|
|
|
|
|
|
|
169
|
10
|
|
|
|
|
20
|
$text = normalize($text); |
170
|
10
|
|
|
|
|
29
|
$text =~ s{(.)}{_encode($1)}ge; |
|
314
|
|
|
|
|
270
|
|
171
|
10
|
|
|
|
|
52
|
$text; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
sub decode { |
175
|
4
|
|
|
4
|
1
|
19
|
my $class = shift; |
176
|
4
|
|
50
|
|
|
10
|
my $text = shift // ""; |
177
|
|
|
|
|
|
|
|
178
|
4
|
|
|
|
|
87
|
$text =~ s/$decode_re/$decode->{$1}/ge; |
|
147
|
|
|
|
|
325
|
|
179
|
4
|
|
|
|
|
20
|
$text =~ s/(\p{InHiragana}+)/NFC($1)/ge; |
|
145
|
|
|
|
|
2436
|
|
180
|
4
|
|
|
|
|
160
|
$text; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub shinobi { |
184
|
4
|
|
|
4
|
1
|
22
|
Text::Shinobi->encode(@_); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
1; |
188
|
|
|
|
|
|
|
__END__ |