line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::JA::Kana; |
2
|
2
|
|
|
2
|
|
46164
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
61
|
|
3
|
2
|
|
|
2
|
|
10
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
59
|
|
4
|
2
|
|
|
2
|
|
1941
|
use utf8; |
|
2
|
|
|
|
|
24
|
|
|
2
|
|
|
|
|
10
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = sprintf "%d.%02d", q$Revision: 0.7 $ =~ /(\d+)/g; |
7
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
144
|
use re (); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
51
|
|
9
|
|
|
|
|
|
|
require Exporter; |
10
|
2
|
|
|
2
|
|
11
|
use base qw/Exporter/; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
7695
|
|
11
|
|
|
|
|
|
|
our @EXPORT = qw( |
12
|
|
|
|
|
|
|
hira2kata hiragana2katakana |
13
|
|
|
|
|
|
|
kata2hira katakana2hiragana |
14
|
|
|
|
|
|
|
romaji2hiragana romaji2katakana |
15
|
|
|
|
|
|
|
kana2romaji |
16
|
|
|
|
|
|
|
hankaku2zenkaku zenkaku2hankaku |
17
|
|
|
|
|
|
|
); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $USE_REGEXP_ASSEMBLE = do { |
20
|
|
|
|
|
|
|
eval 'require Regexp::Assemble'; |
21
|
|
|
|
|
|
|
$@ ? 0 : 1; |
22
|
|
|
|
|
|
|
}; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our $Re_Vowels = qr/[aeiou]/i; |
26
|
|
|
|
|
|
|
our $Re_Consonants = qr/[bcdfghjklpqrstvwxyz]/i; # note the absense of n and m |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
our %Kata2Hepburn = qw( |
29
|
|
|
|
|
|
|
ア a イ i ウ u エ e オ o |
30
|
|
|
|
|
|
|
ァ xa ィ xi ゥ xu ェ xe ォ xo |
31
|
|
|
|
|
|
|
カ ka キ ki ク ku ケ ke コ ko |
32
|
|
|
|
|
|
|
ガ ga ギ gi グ gu ゲ ge ゴ go |
33
|
|
|
|
|
|
|
キャ kya キュ kyu キョ kyo |
34
|
|
|
|
|
|
|
ギャ gya ギュ gyu ギョ gyo |
35
|
|
|
|
|
|
|
サ sa シ shi ス su セ se ソ so |
36
|
|
|
|
|
|
|
ザ za ジ ji ズ zu ゼ ze ゾ zo |
37
|
|
|
|
|
|
|
シャ sha シュ shu ショ sho |
38
|
|
|
|
|
|
|
ジャ ja ジュ ju ジョ jo |
39
|
|
|
|
|
|
|
タ ta チ chi ツ tsu テ te ト to |
40
|
|
|
|
|
|
|
ティ ti トゥ tu |
41
|
|
|
|
|
|
|
ダ da ディ di ドゥ du デ de ド do |
42
|
|
|
|
|
|
|
ヂ dhi ヅ dhu |
43
|
|
|
|
|
|
|
チャ cha チュ chu チェ che チョ cho |
44
|
|
|
|
|
|
|
ヂャ dha ヂュ dhu ヂェ dhe ヂョ dho |
45
|
|
|
|
|
|
|
ナ na ニ ni ヌ nu ネ ne ノ no |
46
|
|
|
|
|
|
|
ニャ nya ニュ nyu ニョ nyo |
47
|
|
|
|
|
|
|
ハ ha ヒ hi フ fu ヘ he ホ ho |
48
|
|
|
|
|
|
|
ヒャ hya ヒュ hyu ヒョ hyo |
49
|
|
|
|
|
|
|
バ ba ビ bi ブ bu ベ be ボ bo |
50
|
|
|
|
|
|
|
ビャ bya ビュ byu ビョ byo |
51
|
|
|
|
|
|
|
パ pa ピ pi プ pu ペ pe ポ po |
52
|
|
|
|
|
|
|
ピャ pya ピュ pyu ピョ pyo |
53
|
|
|
|
|
|
|
ファ fa フィ fi フェ fe フォ fo |
54
|
|
|
|
|
|
|
マ ma ミ mi ム mu メ me モ mo |
55
|
|
|
|
|
|
|
ミャ mya ミュ myu ミョ myo |
56
|
|
|
|
|
|
|
ヤ ya ユ yu イェ ye ヨ yo |
57
|
|
|
|
|
|
|
ャ xya ュ xyu ョ xyo |
58
|
|
|
|
|
|
|
ラ ra リ ri ル ru レ re ロ ro |
59
|
|
|
|
|
|
|
リャ rya リュ ryu リョ ryo |
60
|
|
|
|
|
|
|
ワ wa ヰ wi ヱ we ヲ wo |
61
|
|
|
|
|
|
|
ウァ wa ウィ wi ウェ we ウォ wo |
62
|
|
|
|
|
|
|
ヴァ va ヴィ vi ヴ vu ヴェ ve ヴォ vo |
63
|
|
|
|
|
|
|
ン n |
64
|
|
|
|
|
|
|
); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
our %Kana2Hepburn = |
67
|
|
|
|
|
|
|
( %Kata2Hepburn, map { katakana2hiragana($_) } %Kata2Hepburn ); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
our $Re_Kana2Hepburn = do { |
70
|
|
|
|
|
|
|
if ($USE_REGEXP_ASSEMBLE) { |
71
|
|
|
|
|
|
|
my $ra = Regexp::Assemble->new(); |
72
|
|
|
|
|
|
|
$ra->add($_) for keys %Kana2Hepburn; |
73
|
|
|
|
|
|
|
$ra->re; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
else { |
76
|
|
|
|
|
|
|
my $str = join '|', keys %Kana2Hepburn; |
77
|
|
|
|
|
|
|
qr/(?:$str)/; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
}; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
our %Romaji2Kata = qw( |
82
|
|
|
|
|
|
|
a ア i イ u ウ e エ o オ |
83
|
|
|
|
|
|
|
xa ァ xi ィ xu ゥ xe ェ xo ォ |
84
|
|
|
|
|
|
|
ka カ ki キ ku ク ke ケ ko コ |
85
|
|
|
|
|
|
|
ga ガ gi ギ gu グ ge ゲ go ゴ |
86
|
|
|
|
|
|
|
kya キャ kyu キュ kyo キョ |
87
|
|
|
|
|
|
|
gya ギャ gyu ギュ gyo ギョ |
88
|
|
|
|
|
|
|
sa サ shi シ su ス se セ so ソ |
89
|
|
|
|
|
|
|
si シ |
90
|
|
|
|
|
|
|
za ザ ji ジ zu ズ ze ゼ zo ゾ |
91
|
|
|
|
|
|
|
zi ジ |
92
|
|
|
|
|
|
|
sha シャ shu シュ sho ショ |
93
|
|
|
|
|
|
|
ja ジャ ju ジュ jo ジョ |
94
|
|
|
|
|
|
|
sya シャ syu シュ syo ショ |
95
|
|
|
|
|
|
|
ta タ chi チ tsu ツ te テ to ト |
96
|
|
|
|
|
|
|
xtu ッ |
97
|
|
|
|
|
|
|
ti ティ tu トゥ |
98
|
|
|
|
|
|
|
da ダ di ディ du ドゥ de デ do ド |
99
|
|
|
|
|
|
|
dhi ヂ dhu ヅ |
100
|
|
|
|
|
|
|
cha チャ chu チュ che チェ cho チョ |
101
|
|
|
|
|
|
|
tya チャ tyu チュ tye チェ tyo チョ |
102
|
|
|
|
|
|
|
dha ヂャ dhu ヂュ dhe ヂェ dho ヂョ |
103
|
|
|
|
|
|
|
dya ヂャ tyu ヂュ tye ヂェ tyo ヂョ |
104
|
|
|
|
|
|
|
na ナ ni ニ nu ヌ ne ネ no ノ |
105
|
|
|
|
|
|
|
nya ニャ nyu ニュ nyo ニョ |
106
|
|
|
|
|
|
|
ha ハ hi ヒ fu フ he ヘ ho ホ |
107
|
|
|
|
|
|
|
hu フ |
108
|
|
|
|
|
|
|
hya ヒャ hyu ヒュ hyo ヒョ |
109
|
|
|
|
|
|
|
ba バ bi ビ bu ブ be ベ bo ボ |
110
|
|
|
|
|
|
|
bya ビャ byu ビュ byo ビョ |
111
|
|
|
|
|
|
|
pa パ pi ピ pu プ pe ペ po ポ |
112
|
|
|
|
|
|
|
pya ピャ pyu ピュ pyo ピョ |
113
|
|
|
|
|
|
|
fa ファ fi フィ fe フェ fo フォ |
114
|
|
|
|
|
|
|
ma マ mi ミ mu ム me メ mo モ |
115
|
|
|
|
|
|
|
mya ミャ myu ミュ myo ミョ |
116
|
|
|
|
|
|
|
ya ヤ yu ユ ye イェ yo ヨ |
117
|
|
|
|
|
|
|
xya ャ xyu ュ xyo ョ |
118
|
|
|
|
|
|
|
ra ラ ri リ ru ル re レ ro ロ |
119
|
|
|
|
|
|
|
rya リャ ryu リュ ryo リョ |
120
|
|
|
|
|
|
|
la ラ li リ lu ル le レ lo ロ |
121
|
|
|
|
|
|
|
wa ワ wo ヲ |
122
|
|
|
|
|
|
|
wi ウィ we ウェ |
123
|
|
|
|
|
|
|
va ヴァ vi ヴィ vu ヴ ve ヴェ vo ヴォ |
124
|
|
|
|
|
|
|
); |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
our $Re_Romaji2Kata = do { |
127
|
|
|
|
|
|
|
if ($USE_REGEXP_ASSEMBLE) { |
128
|
|
|
|
|
|
|
my $ra = Regexp::Assemble->new(); |
129
|
|
|
|
|
|
|
$ra->add($_) for keys %Romaji2Kata; |
130
|
|
|
|
|
|
|
my $str = $ra->re; |
131
|
|
|
|
|
|
|
if ($] >= 5.009005) { |
132
|
|
|
|
|
|
|
my ($pattern, $mod) = re::regexp_pattern($str); |
133
|
|
|
|
|
|
|
$str = $pattern; |
134
|
|
|
|
|
|
|
} else { |
135
|
|
|
|
|
|
|
substr( $str, 0, 8, '' ); # remove '(?-xism:' |
136
|
|
|
|
|
|
|
substr( $str, -1, 1, '' ); # and ')'; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
qr/$str/i; # and recompile with i |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
else { |
141
|
|
|
|
|
|
|
my $str = join '|', sort {length($b) <=> length($a)} keys %Romaji2Kata; |
142
|
|
|
|
|
|
|
qr/(?:$str)/i; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
}; |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
our %Kana2Romaji = %Kana2Hepburn; |
148
|
|
|
|
|
|
|
our $Re_Kana2Romaji = $Re_Kana2Hepburn; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub katakana2hiragana{ |
151
|
550
|
|
|
550
|
1
|
653
|
my $str = shift; |
152
|
2
|
|
|
2
|
|
34
|
$str =~ tr/ァ-ンヴ/ぁ-んゔ/; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
44
|
|
|
550
|
|
|
|
|
968
|
|
153
|
550
|
|
|
|
|
1562
|
$str; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub hiragana2katakana{ |
157
|
0
|
|
|
0
|
1
|
0
|
my $str = shift; |
158
|
0
|
|
|
|
|
0
|
$str =~ tr/ぁ-んゔ/ァ-ンヴ/; |
159
|
0
|
|
|
|
|
0
|
$str; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
{ |
163
|
2
|
|
|
2
|
|
14962
|
no warnings 'once'; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
360
|
|
164
|
|
|
|
|
|
|
*kata2hira = \&katakana2hiragana; |
165
|
|
|
|
|
|
|
*hira2kata = \&hiragana2katakana; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub romaji2katakana{ |
169
|
2
|
|
|
2
|
1
|
6
|
my $str = shift; |
170
|
|
|
|
|
|
|
# step 1; tta -> ッta |
171
|
2
|
|
|
|
|
67
|
$str =~ s{ ($Re_Consonants) \1 }{ "ッ$1" }msxgei; |
|
0
|
|
|
|
|
0
|
|
172
|
|
|
|
|
|
|
# step 2; |
173
|
2
|
50
|
|
|
|
736
|
$str =~ s{ ($Re_Romaji2Kata) }{ $Romaji2Kata{lc $1} || $1 }msxgei; |
|
7
|
|
|
|
|
55
|
|
174
|
|
|
|
|
|
|
# step 3; |
175
|
2
|
|
|
|
|
50
|
$str =~ s{ ([ァ-ン])[mn] }{ "$1ン" }msxgei; |
|
0
|
|
|
|
|
0
|
|
176
|
2
|
|
|
|
|
16
|
$str; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
2
|
|
|
2
|
1
|
227
|
sub romaji2hiragana{ katakana2hiragana(romaji2katakana(shift)) }; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub kana2romaji{ |
182
|
0
|
|
|
0
|
1
|
|
my $str = shift; |
183
|
|
|
|
|
|
|
# step 1; |
184
|
0
|
0
|
|
|
|
|
$str =~ s{ ($Re_Kana2Romaji) }{ $Kana2Romaji{$1} || $1 }msxge; |
|
0
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# step 2; ッta -> tta |
186
|
0
|
|
|
|
|
|
$str =~ s{ [っッ]($Re_Consonants) }{ "$1$1" }msxge; |
|
0
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# step 3; oー -> oo |
188
|
0
|
|
|
|
|
|
$str =~ s{ ($Re_Vowels)ー }{ "$1$1" }msxge; |
|
0
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
$str; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
if ($0 eq __FILE__){ |
194
|
|
|
|
|
|
|
warn $USE_REGEXP_ASSEMBLE; |
195
|
|
|
|
|
|
|
binmode STDOUT, ':utf8'; |
196
|
|
|
|
|
|
|
local $\ = "\n"; |
197
|
|
|
|
|
|
|
warn $Re_Romaji2Kata; |
198
|
|
|
|
|
|
|
print romaji2katakana("Dan Kogai"); |
199
|
|
|
|
|
|
|
print romaji2katakana("shimbashi"); |
200
|
|
|
|
|
|
|
print romaji2katakana("konnichiwa"); |
201
|
|
|
|
|
|
|
print romaji2hiragana("Dan Kogai"); |
202
|
|
|
|
|
|
|
print romaji2hiragana("shimbashi"); |
203
|
|
|
|
|
|
|
warn $Re_Kana2Romaji; |
204
|
|
|
|
|
|
|
print kana2romaji("ダンコガイ"); |
205
|
|
|
|
|
|
|
print kana2romaji("マイッタ"); |
206
|
|
|
|
|
|
|
print kana2romaji("シンバシ"); |
207
|
|
|
|
|
|
|
print romaji2hiragana("ryoukai"); # RT#39590 |
208
|
|
|
|
|
|
|
print romaji2hiragana("virama"); # RT#45402 |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
2
|
|
|
2
|
|
54439
|
use Encode; |
|
2
|
|
|
|
|
29138
|
|
|
2
|
|
|
|
|
239
|
|
212
|
2
|
|
|
2
|
|
1848
|
use Encode::JP::H2Z; |
|
2
|
|
|
|
|
7713
|
|
|
2
|
|
|
|
|
542
|
|
213
|
|
|
|
|
|
|
my $eucjp = Encode::find_encoding('eucjp'); |
214
|
|
|
|
|
|
|
sub hankaku2zenkaku { |
215
|
0
|
|
|
0
|
1
|
|
my $str = $eucjp->encode(shift); |
216
|
0
|
|
|
|
|
|
Encode::JP::H2Z::h2z(\$str); |
217
|
0
|
|
|
|
|
|
$eucjp->decode($str); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub zenkaku2hankaku { |
221
|
0
|
|
|
0
|
1
|
|
my $str = $eucjp->encode(shift); |
222
|
0
|
|
|
|
|
|
Encode::JP::H2Z::z2h(\$str); |
223
|
0
|
|
|
|
|
|
$eucjp->decode($str); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
1; # End of Lingua::JA::Kana |
228
|
|
|
|
|
|
|
__END__ |