| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::JA::Moji; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 24 |  |  | 24 |  | 1341509 | use warnings; | 
|  | 24 |  |  |  |  | 253 |  | 
|  | 24 |  |  |  |  | 797 |  | 
| 4 | 24 |  |  | 24 |  | 170 | use strict; | 
|  | 24 |  |  |  |  | 51 |  | 
|  | 24 |  |  |  |  | 524 |  | 
| 5 | 24 |  |  | 24 |  | 8287 | use utf8; | 
|  | 24 |  |  |  |  | 215 |  | 
|  | 24 |  |  |  |  | 122 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | require Exporter; | 
| 8 |  |  |  |  |  |  | our @ISA = qw(Exporter); | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | our $VERSION = '0.57'; | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 24 |  |  | 24 |  | 1725 | use Carp 'croak'; | 
|  | 24 |  |  |  |  | 42 |  | 
|  | 24 |  |  |  |  | 1365 |  | 
| 13 | 24 |  |  | 24 |  | 10643 | use Convert::Moji qw/make_regex length_one unambiguous/; | 
|  | 24 |  |  |  |  | 45348 |  | 
|  | 24 |  |  |  |  | 1504 |  | 
| 14 | 24 |  |  | 24 |  | 11460 | use JSON::Parse 'json_file_to_perl'; | 
|  | 24 |  |  |  |  | 24642 |  | 
|  | 24 |  |  |  |  | 41597 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | our @EXPORT_OK = qw/ | 
| 17 |  |  |  |  |  |  | bad_kanji | 
| 18 |  |  |  |  |  |  | cleanup_kana | 
| 19 |  |  |  |  |  |  | hangul2kana | 
| 20 |  |  |  |  |  |  | hentai2kana | 
| 21 |  |  |  |  |  |  | hentai2kanji | 
| 22 |  |  |  |  |  |  | kana2hentai | 
| 23 |  |  |  |  |  |  | kanji2hentai | 
| 24 |  |  |  |  |  |  | katakana2square | 
| 25 |  |  |  |  |  |  | nigori_first | 
| 26 |  |  |  |  |  |  | smallize_kana | 
| 27 |  |  |  |  |  |  | square2katakana | 
| 28 |  |  |  |  |  |  | InHankakuKatakana | 
| 29 |  |  |  |  |  |  | InKana | 
| 30 |  |  |  |  |  |  | InWideAscii | 
| 31 |  |  |  |  |  |  | ascii2wide | 
| 32 |  |  |  |  |  |  | bracketed2kanji | 
| 33 |  |  |  |  |  |  | braille2kana | 
| 34 |  |  |  |  |  |  | circled2kana | 
| 35 |  |  |  |  |  |  | circled2kanji | 
| 36 |  |  |  |  |  |  | cyrillic2katakana | 
| 37 |  |  |  |  |  |  | hira2kata | 
| 38 |  |  |  |  |  |  | hw2katakana | 
| 39 |  |  |  |  |  |  | is_hiragana | 
| 40 |  |  |  |  |  |  | is_kana | 
| 41 |  |  |  |  |  |  | is_romaji | 
| 42 |  |  |  |  |  |  | is_romaji_semistrict | 
| 43 |  |  |  |  |  |  | is_romaji_strict | 
| 44 |  |  |  |  |  |  | is_voiced | 
| 45 |  |  |  |  |  |  | kana2braille | 
| 46 |  |  |  |  |  |  | kana2circled | 
| 47 |  |  |  |  |  |  | kana2cyrillic | 
| 48 |  |  |  |  |  |  | kana2hangul | 
| 49 |  |  |  |  |  |  | kana2hw | 
| 50 |  |  |  |  |  |  | kana2katakana | 
| 51 |  |  |  |  |  |  | kana2morse | 
| 52 |  |  |  |  |  |  | kana2romaji | 
| 53 |  |  |  |  |  |  | kana_to_large | 
| 54 |  |  |  |  |  |  | kanji2bracketed | 
| 55 |  |  |  |  |  |  | kanji2circled | 
| 56 |  |  |  |  |  |  | kata2hira | 
| 57 |  |  |  |  |  |  | katakana2hw | 
| 58 |  |  |  |  |  |  | katakana2syllable | 
| 59 |  |  |  |  |  |  | morse2kana | 
| 60 |  |  |  |  |  |  | new2old_kanji | 
| 61 |  |  |  |  |  |  | normalize_romaji | 
| 62 |  |  |  |  |  |  | old2new_kanji | 
| 63 |  |  |  |  |  |  | romaji2hiragana | 
| 64 |  |  |  |  |  |  | romaji2kana | 
| 65 |  |  |  |  |  |  | romaji_styles | 
| 66 |  |  |  |  |  |  | romaji_vowel_styles | 
| 67 |  |  |  |  |  |  | wide2ascii | 
| 68 |  |  |  |  |  |  | yurei_moji | 
| 69 |  |  |  |  |  |  | join_sound_marks | 
| 70 |  |  |  |  |  |  | split_sound_marks | 
| 71 |  |  |  |  |  |  | /; | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | our %EXPORT_TAGS = ( | 
| 74 |  |  |  |  |  |  | 'all' => \@EXPORT_OK, | 
| 75 |  |  |  |  |  |  | ); | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | # Load a specified convertor from the shared directory. | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | sub load_convertor | 
| 80 |  |  |  |  |  |  | { | 
| 81 | 21 |  |  | 21 | 0 | 132 | my ($in, $out) = @_; | 
| 82 | 21 |  |  |  |  | 69 | my $filename = $in."2".$out; | 
| 83 | 21 |  |  |  |  | 69 | my $file = getdistfile ($filename); | 
| 84 | 21 | 100 | 66 |  |  | 592 | if (! $file || ! -f $file) { | 
| 85 | 1 |  |  |  |  | 221 | croak "Could not find distribution file '$filename'"; | 
| 86 |  |  |  |  |  |  | } | 
| 87 | 20 |  |  |  |  | 136 | my $convertor = Convert::Moji::load_convertor ($file); | 
| 88 | 20 |  |  |  |  | 137092 | return $convertor; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub add_boilerplate | 
| 92 |  |  |  |  |  |  | { | 
| 93 | 16 |  |  | 16 | 0 | 41 | my ($code, $name) = @_; | 
| 94 | 16 |  |  |  |  | 57 | $code =< | 
| 95 |  |  |  |  |  |  | sub convert_$name | 
| 96 |  |  |  |  |  |  | { | 
| 97 |  |  |  |  |  |  | my (\$conv,\$input,\$convert_type) = \@_; | 
| 98 |  |  |  |  |  |  | $code | 
| 99 |  |  |  |  |  |  | return \$input; | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  | EOSUB | 
| 102 | 16 |  |  |  |  | 68 | $code .= "\\\&".__PACKAGE__."::convert_$name;"; | 
| 103 | 16 |  |  |  |  | 45 | return $code; | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | sub ambiguous_reverse | 
| 107 |  |  |  |  |  |  | { | 
| 108 | 0 |  |  | 0 | 0 | 0 | my ($table) = @_; | 
| 109 | 0 |  |  |  |  | 0 | my %inverted; | 
| 110 | 0 |  |  |  |  | 0 | for (keys %$table) { | 
| 111 | 0 |  |  |  |  | 0 | my $val = $table->{$_}; | 
| 112 | 0 |  |  |  |  | 0 | push @{$inverted{$val}}, $_; | 
|  | 0 |  |  |  |  | 0 |  | 
| 113 |  |  |  |  |  |  | } | 
| 114 | 0 |  |  |  |  | 0 | return \%inverted; | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | # Callback | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | sub split_match | 
| 120 |  |  |  |  |  |  | { | 
| 121 | 0 |  |  | 0 | 0 | 0 | my ($conv, $input, $convert_type) = @_; | 
| 122 | 0 | 0 |  |  |  | 0 | if (!$convert_type) { | 
| 123 | 0 |  |  |  |  | 0 | $convert_type = "all"; | 
| 124 |  |  |  |  |  |  | } | 
| 125 | 0 |  |  |  |  | 0 | my @input = split '', $input; | 
| 126 | 0 |  |  |  |  | 0 | my @output; | 
| 127 | 0 |  |  |  |  | 0 | for (@input) { | 
| 128 | 0 |  |  |  |  | 0 | my $in = $conv->{out2in}->{$_}; | 
| 129 |  |  |  |  |  |  | # No conversion defined. | 
| 130 | 0 | 0 |  |  |  | 0 | if (! $in) { | 
| 131 | 0 |  |  |  |  | 0 | push @output, $_; | 
| 132 | 0 |  |  |  |  | 0 | next; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  | # Unambigous case | 
| 135 | 0 | 0 |  |  |  | 0 | if (@{$in} == 1) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 136 | 0 |  |  |  |  | 0 | push @output, $in->[0]; | 
| 137 | 0 |  |  |  |  | 0 | next; | 
| 138 |  |  |  |  |  |  | } | 
| 139 | 0 | 0 |  |  |  | 0 | if ($convert_type eq 'all') { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 140 | 0 |  |  |  |  | 0 | push @output, $in; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | elsif ($convert_type eq 'first') { | 
| 143 | 0 |  |  |  |  | 0 | push @output, $in->[0]; | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  | elsif ($convert_type eq 'random') { | 
| 146 | 0 |  |  |  |  | 0 | my $pos = int rand @$in; | 
| 147 | 0 |  |  |  |  | 0 | push @output, $in->[$pos]; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  | } | 
| 150 | 0 |  |  |  |  | 0 | return \@output; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | sub make_convertors | 
| 154 |  |  |  |  |  |  | { | 
| 155 | 8 |  |  | 8 | 0 | 25 | my ($in, $out, $table) = @_; | 
| 156 | 8 |  |  |  |  | 18 | my $conv = {}; | 
| 157 | 8 | 50 |  |  |  | 25 | if (!$table) { | 
| 158 | 8 |  |  |  |  | 28 | $table = load_convertor ($in, $out); | 
| 159 |  |  |  |  |  |  | } | 
| 160 | 8 |  |  |  |  | 50 | $conv->{in2out} = $table; | 
| 161 | 8 |  |  |  |  | 18 | my @keys = keys %{$table}; | 
|  | 8 |  |  |  |  | 113 |  | 
| 162 | 8 |  |  |  |  | 17 | my @values = values %{$table}; | 
|  | 8 |  |  |  |  | 121 |  | 
| 163 | 8 |  |  |  |  | 18 | my $sub_in2out; | 
| 164 |  |  |  |  |  |  | my $sub_out2in; | 
| 165 | 8 | 50 |  |  |  | 36 | if (length_one (@keys)) { | 
| 166 | 8 |  |  |  |  | 1113 | my $lhs = join '', @keys; | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  | # Improvement: one way tr/// for the ambiguous case lhs/rhs only. | 
| 169 |  |  |  |  |  |  |  | 
| 170 | 8 | 100 | 66 |  |  | 32 | if (length_one (@values) && unambiguous ($table)) { | 
| 171 |  |  |  |  |  |  | # can use tr///; | 
| 172 | 2 |  |  |  |  | 502 | my $rhs = join '', @values; | 
| 173 | 2 |  |  |  |  | 8 | $sub_in2out = "\$input =~ tr/$lhs/$rhs/;"; | 
| 174 | 2 |  |  |  |  | 7 | $sub_out2in = "\$input =~ tr/$rhs/$lhs/;"; | 
| 175 |  |  |  |  |  |  | } | 
| 176 |  |  |  |  |  |  | else { | 
| 177 | 6 |  |  |  |  | 116 | $sub_in2out = "\$input =~ s/([$lhs])/\$conv->{in2out}->{\$1}/eg;"; | 
| 178 | 6 |  |  |  |  | 34 | my $rhs = make_regex (@values); | 
| 179 | 6 | 50 |  |  |  | 2953 | if (unambiguous($conv->{in2out})) { | 
| 180 | 6 |  |  |  |  | 2056 | my %out2in_table = reverse %{$conv->{in2out}}; | 
|  | 6 |  |  |  |  | 172 |  | 
| 181 | 6 |  |  |  |  | 25 | $conv->{out2in} = \%out2in_table; | 
| 182 | 6 |  |  |  |  | 30 | $sub_out2in = "\$input =~ s/($rhs)/\$conv->{out2in}->{\$1}/eg;"; | 
| 183 |  |  |  |  |  |  | } | 
| 184 |  |  |  |  |  |  | else { | 
| 185 | 0 |  |  |  |  | 0 | $conv->{out2in} = ambiguous_reverse ($conv->{in2out}); | 
| 186 | 0 |  |  |  |  | 0 | $sub_out2in = "\$input = \$conv->split_match (\$input, \$convert_type);"; | 
| 187 |  |  |  |  |  |  | } | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  | } | 
| 190 |  |  |  |  |  |  | else { | 
| 191 | 0 |  |  |  |  | 0 | my $lhs = make_regex (@keys); | 
| 192 | 0 |  |  |  |  | 0 | $sub_in2out = "\$input =~ s/($lhs)/\$conv->{in2out}->{\$1}/eg;"; | 
| 193 | 0 |  |  |  |  | 0 | my $rhs = make_regex (@values); | 
| 194 | 0 | 0 |  |  |  | 0 | if (unambiguous($conv->{in2out})) { | 
| 195 | 0 |  |  |  |  | 0 | my %out2in_table = reverse %{$conv->{in2out}}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 196 | 0 |  |  |  |  | 0 | $conv->{out2in} = \%out2in_table; | 
| 197 | 0 |  |  |  |  | 0 | $sub_out2in = "    \$input =~ s/($rhs)/\$conv->{out2in}->{\$1}/eg;"; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  | } | 
| 200 | 8 |  |  |  |  | 37 | $sub_in2out = add_boilerplate ($sub_in2out, "${in}2$out"); | 
| 201 | 8 |  |  | 1 | 0 | 1348 | my $sub1 = eval $sub_in2out; | 
|  | 1 |  |  | 1 | 0 | 3 |  | 
|  | 1 |  |  | 1 | 0 | 21 |  | 
|  | 1 |  |  | 1 | 0 | 3 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 17 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 3 |  |  |  |  | 12 |  | 
|  | 1 |  |  |  |  | 5 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 7 |  |  |  |  | 21 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 202 | 8 |  |  |  |  | 31 | $conv->{in2out_sub} = $sub1; | 
| 203 | 8 | 50 |  |  |  | 32 | if ($sub_out2in) { | 
| 204 | 8 |  |  |  |  | 35 | $sub_out2in = add_boilerplate ($sub_out2in, "${out}2$in"); | 
| 205 | 8 |  |  | 1 | 0 | 2595 | my $sub2 = eval $sub_out2in; | 
|  | 1 |  |  | 1 | 0 | 3 |  | 
|  | 1 |  |  | 8 | 0 | 10 |  | 
|  | 1 |  |  | 7 | 0 | 4 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 20 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 8 |  |  |  |  | 26 |  | 
|  | 8 |  |  |  |  | 63 |  | 
|  | 33 |  |  |  |  | 134 |  | 
|  | 8 |  |  |  |  | 40 |  | 
|  | 7 |  |  |  |  | 14 |  | 
|  | 7 |  |  |  |  | 29 |  | 
|  | 7 |  |  |  |  | 18 |  | 
|  | 7 |  |  |  |  | 19 |  | 
| 206 | 8 | 50 |  |  |  | 51 | if ($@) { | 
| 207 | 0 |  |  |  |  | 0 | print "Errors are ",$@,"\n"; | 
| 208 | 0 |  |  |  |  | 0 | print "\$sub2 = ",$sub2,"\n"; | 
| 209 |  |  |  |  |  |  | } | 
| 210 | 8 |  |  |  |  | 23 | $conv->{out2in_sub} = $sub2; | 
| 211 |  |  |  |  |  |  | } | 
| 212 | 8 |  |  |  |  | 16 | bless $conv; | 
| 213 | 8 |  |  |  |  | 81 | return $conv; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | sub convert | 
| 217 |  |  |  |  |  |  | { | 
| 218 | 4 |  |  | 4 | 0 | 11 | my ($conv, $input) = @_; | 
| 219 | 4 |  |  |  |  | 9 | return &{$conv->{in2out_sub}}($conv, $input); | 
|  | 4 |  |  |  |  | 97 |  | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | sub invert | 
| 223 |  |  |  |  |  |  | { | 
| 224 | 17 |  |  | 17 | 0 | 39 | my ($conv, $input, $convert_type) = @_; | 
| 225 | 17 |  |  |  |  | 26 | return &{$conv->{out2in_sub}}($conv, $input, $convert_type); | 
|  | 17 |  |  |  |  | 361 |  | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | # Kana ordered by consonant. Adds two bogus gyous, a "q" gyou for | 
| 230 |  |  |  |  |  |  | # small vowels and an "xy" gyou for youon (ya, yu, yo), to the usual | 
| 231 |  |  |  |  |  |  | # ones. | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | my @gyou = ( | 
| 234 |  |  |  |  |  |  | a => [qw/ア イ ウ エ オ/], | 
| 235 |  |  |  |  |  |  | # Not a real gyou. | 
| 236 |  |  |  |  |  |  | q => [qw/ァ ィ ゥ ェ ォ/], | 
| 237 |  |  |  |  |  |  | k => [qw/カ キ ク ケ コ/], | 
| 238 |  |  |  |  |  |  | g => [qw/ガ ギ グ ゲ ゴ/], | 
| 239 |  |  |  |  |  |  | s => [qw/サ シ ス セ ソ/], | 
| 240 |  |  |  |  |  |  | z => [qw/ザ ジ ズ ゼ ゾ/], | 
| 241 |  |  |  |  |  |  | t => [qw/タ チ ツ テ ト/], | 
| 242 |  |  |  |  |  |  | d => [qw/ダ ヂ ヅ デ ド/], | 
| 243 |  |  |  |  |  |  | n => [qw/ナ ニ ヌ ネ ノ/], | 
| 244 |  |  |  |  |  |  | h => [qw/ハ ヒ フ ヘ ホ/], | 
| 245 |  |  |  |  |  |  | b => [qw/バ ビ ブ ベ ボ/], | 
| 246 |  |  |  |  |  |  | p => [qw/パ ピ プ ペ ポ/], | 
| 247 |  |  |  |  |  |  | m => [qw/マ ミ ム メ モ/], | 
| 248 |  |  |  |  |  |  | y => [qw/ヤ    ユ    ヨ/], | 
| 249 |  |  |  |  |  |  | xy => [qw/ャ    ュ    ョ/], | 
| 250 |  |  |  |  |  |  | r => [qw/ラ リ ル レ ロ/], | 
| 251 |  |  |  |  |  |  | w => [qw/ワ ヰ    ヱ ヲ/], | 
| 252 |  |  |  |  |  |  | v => [qw/ヴ/], | 
| 253 |  |  |  |  |  |  | ); | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | my %gyou = @gyou; | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | # Kana => consonant mapping. | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | my %siin; | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | for my $consonant (keys %gyou) { | 
| 262 |  |  |  |  |  |  | for my $kana (@{$gyou{$consonant}}) { | 
| 263 |  |  |  |  |  |  | if ($consonant eq 'a') { | 
| 264 |  |  |  |  |  |  | $siin{$kana} = ''; | 
| 265 |  |  |  |  |  |  | } | 
| 266 |  |  |  |  |  |  | else { | 
| 267 |  |  |  |  |  |  | $siin{$kana} = $consonant; | 
| 268 |  |  |  |  |  |  | } | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  | } | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | # Vowel => kana mapping. | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | my %dan = (a => [qw/ア カ ガ サ ザ タ ダ ナ ハ バ パ マ ヤ ラ ワ ャ ァ/], | 
| 275 |  |  |  |  |  |  | i => [qw/イ キ ギ シ ジ チ ヂ ニ ヒ ビ ピ ミ リ ヰ ィ/], | 
| 276 |  |  |  |  |  |  | u => [qw/ウ ク グ ス ズ ツ ヅ ヌ フ ブ プ ム ユ ル ュ ゥ ヴ/], | 
| 277 |  |  |  |  |  |  | e => [qw/エ ケ ゲ セ ゼ テ デ ネ ヘ ベ ペ メ レ ヱ ェ/], | 
| 278 |  |  |  |  |  |  | o => [qw/オ コ ゴ ソ ゾ ト ド ノ ホ ボ ポ モ ヨ ロ ヲ ョ ォ/]); | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | # Kana => vowel mapping | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | my %boin; | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | # List of kana with a certain vowel. | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | my %vowelclass; | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | for my $vowel (keys %dan) { | 
| 289 |  |  |  |  |  |  | my @kana_list = @{$dan{$vowel}}; | 
| 290 |  |  |  |  |  |  | for my $kana (@kana_list) { | 
| 291 |  |  |  |  |  |  | $boin{$kana} = $vowel; | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  | $vowelclass{$vowel} = join '', @kana_list; | 
| 294 |  |  |  |  |  |  | } | 
| 295 |  |  |  |  |  |  |  | 
| 296 |  |  |  |  |  |  | # Kana gyou which can be preceded by a sokuon (small tsu). | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | # Added d to the list for ウッド | 
| 299 |  |  |  |  |  |  | # Added z for "badge" etc. | 
| 300 |  |  |  |  |  |  | # Added g for ドッグ etc. | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | #my @takes_sokuon_gyou = qw/s t k p d z g/; | 
| 303 |  |  |  |  |  |  | #my @takes_sokuon = (map {@{$gyou{$_}}} @takes_sokuon_gyou); | 
| 304 |  |  |  |  |  |  | #my $takes_sokuon = join '', @takes_sokuon; | 
| 305 |  |  |  |  |  |  | #die @takes_sokuon; | 
| 306 |  |  |  |  |  |  | my $takes_sokuon = 'サシスセソタチツテトカキクケコパピプペポダヂヅデドザジズゼゾガギグゲゴ'; | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | # Any kana except ん | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | #my@b4s;push@b4s,@{$gyou{$_}}for sort keys%gyou;@b4s=grep!/ん/,@b4s;die join'',@b4s; | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | my $before_sokuon = 'ヤユヨナニヌネノャュョガギグゲゴダヂヅデドカキクケコヴラリルレロワヰヱヲバビブベボタチツテトアイウエオパピプペポサシスセソァィゥェォマミムメモハヒフヘホザジズゼゾ'; | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | # N | 
| 315 |  |  |  |  |  |  |  | 
| 316 |  |  |  |  |  |  | # Kana gyou which need an apostrophe when preceded by an "n" kana. | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | my $need_apostrophe = join '', (map {@{$gyou{$_}}} qw/a y/); | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | # Gyou which turn an "n" into an "m" in some kinds of romanization | 
| 321 |  |  |  |  |  |  |  | 
| 322 |  |  |  |  |  |  | my $need_m = join '', (map {@{$gyou{$_}}} qw/p b m/); | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | # YOUON | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | # Small ya, yu, yo. | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | my $youon = join '', (@{$gyou{xy}}); | 
| 329 |  |  |  |  |  |  | my %youon = qw/a ャ u ュ o ョ ou ョ/; | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | # HEPBURN | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | # Hepburn irregular romanization | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | my %hepburn = qw/シ sh ツ ts チ ch ジ j ヅ z ヂ j フ f/; | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | # Hepburn map from vowel to list of kana with that vowel. | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | my %hep_vowel = (i => 'シチジヂ', u => 'ヅツフ'); | 
| 340 |  |  |  |  |  |  | my $hep_list = join '', keys %hepburn; | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | # Hepburn irregular romanization of ッチ as "tch". | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | my %hepburn_sokuon = qw/チ t/; | 
| 345 |  |  |  |  |  |  | my $hep_sok_list = join '', keys %hepburn_sokuon; | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | # Hepburn variants for the youon case. | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | my %hepburn_youon = qw/シ sh チ ch ジ j ヂ j/; | 
| 350 |  |  |  |  |  |  | my $is_hepburn_youon = join '', keys %hepburn_youon; | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | # Kunrei romanization | 
| 353 |  |  |  |  |  |  |  | 
| 354 |  |  |  |  |  |  | my %kunrei = qw/ヅ z ヂ z/; | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | my $kun_list = join '', keys %kunrei; | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | my %kunrei_youon = qw/ヂ z/; | 
| 359 |  |  |  |  |  |  | my $is_kunrei_youon = join '', keys %kunrei_youon; | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | # LONG VOWELS | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | # Long vowels, another bugbear of Japanese romanization. | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | my @aiueo = qw/a i u e o ou/; | 
| 366 |  |  |  |  |  |  |  | 
| 367 |  |  |  |  |  |  | # Various ways to display the long vowels. | 
| 368 |  |  |  |  |  |  |  | 
| 369 |  |  |  |  |  |  | my %chouonhyouki; | 
| 370 |  |  |  |  |  |  | @{$chouonhyouki{circumflex}}{@aiueo} = qw/â  î  û  ê  ô  ô/; | 
| 371 |  |  |  |  |  |  | @{$chouonhyouki{macron}}{@aiueo}     = qw/ā  ii  ū  ē  ō  ō/; | 
| 372 |  |  |  |  |  |  | @{$chouonhyouki{wapuro}}{@aiueo}     = qw/aa ii uu ee oo ou/; | 
| 373 |  |  |  |  |  |  | @{$chouonhyouki{passport}}{@aiueo}   = qw/a  i  u  e  oh oh/; | 
| 374 |  |  |  |  |  |  | @{$chouonhyouki{none}}{@aiueo}       = qw/a  ii  u  e  o  o/; | 
| 375 |  |  |  |  |  |  |  | 
| 376 | 24 |  |  | 24 |  | 191 | my $vowel_re = qr/[aeiouâêîôûāēōū]/i; | 
|  | 24 |  |  |  |  | 48 |  | 
|  | 24 |  |  |  |  | 339 |  | 
| 377 |  |  |  |  |  |  | my $no_u_vowel_re = qr/[aeioâêîôāēō]/i; | 
| 378 |  |  |  |  |  |  | my $u_re = qr/[uūû]/i; | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | sub kana2romaji | 
| 381 |  |  |  |  |  |  | { | 
| 382 | 185 |  |  | 185 | 1 | 7547 | my ($input, $options) = @_; | 
| 383 | 185 |  |  |  |  | 427 | $input = kana2katakana ($input); | 
| 384 | 185 | 100 |  |  |  | 424 | if (! $options) { | 
| 385 | 9 |  |  |  |  | 50 | $options = {}; | 
| 386 |  |  |  |  |  |  | } | 
| 387 |  |  |  |  |  |  | # Parse the options | 
| 388 | 185 |  |  |  |  | 546 | my $kunrei; | 
| 389 |  |  |  |  |  |  | my $hepburn; | 
| 390 | 185 |  |  |  |  | 0 | my $passport; | 
| 391 | 185 |  |  |  |  | 0 | my $common; | 
| 392 | 185 | 100 |  |  |  | 435 | if ($options->{style}) { | 
| 393 | 26 |  |  |  |  | 53 | my $style = $options->{style}; | 
| 394 | 26 | 100 |  |  |  | 59 | if ($style eq 'kunrei') { | 
| 395 | 1 |  |  |  |  | 2 | $kunrei   = 1; | 
| 396 |  |  |  |  |  |  | } | 
| 397 | 26 | 50 |  |  |  | 56 | if ($style eq 'passport') { | 
| 398 | 0 |  |  |  |  | 0 | $passport = 1; | 
| 399 |  |  |  |  |  |  | } | 
| 400 | 26 | 100 |  |  |  | 55 | if ($style eq 'hepburn') { | 
| 401 | 18 |  |  |  |  | 27 | $hepburn  = 1; | 
| 402 |  |  |  |  |  |  | } | 
| 403 | 26 | 100 |  |  |  | 53 | if ($style eq 'common') { | 
| 404 | 6 |  |  |  |  | 11 | $hepburn  = 1; | 
| 405 | 6 |  |  |  |  | 10 | $common = 1; | 
| 406 |  |  |  |  |  |  | } | 
| 407 | 26 | 50 | 66 |  |  | 156 | if (!$kunrei && !$passport && !$hepburn && $style ne "nihon" && | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 33 |  |  |  |  | 
| 408 |  |  |  |  |  |  | $style ne 'nippon') { | 
| 409 | 0 |  |  |  |  | 0 | croak "Unknown romanization style '$options->{style}'"; | 
| 410 |  |  |  |  |  |  | } | 
| 411 |  |  |  |  |  |  | } | 
| 412 | 185 |  |  |  |  | 235 | my $wapuro; | 
| 413 | 185 | 100 |  |  |  | 357 | if ($options->{wapuro}) { | 
| 414 | 158 |  |  |  |  | 222 | $wapuro = 1; | 
| 415 |  |  |  |  |  |  | } | 
| 416 | 185 |  |  |  |  | 259 | my $use_m = 0; | 
| 417 | 185 | 100 | 66 |  |  | 644 | if ($hepburn || $passport) { | 
| 418 | 24 |  |  |  |  | 38 | $use_m = 1; | 
| 419 |  |  |  |  |  |  | } | 
| 420 | 185 | 100 |  |  |  | 352 | if (defined $options->{use_m}) { | 
| 421 |  |  |  |  |  |  | $use_m = $options->{use_m} | 
| 422 | 2 |  |  |  |  | 4 | } | 
| 423 | 185 |  |  |  |  | 270 | my $ve_type = 'circumflex'; # type of vowel extension to use. | 
| 424 | 185 | 100 |  |  |  | 337 | if ($hepburn) { | 
| 425 | 24 |  |  |  |  | 34 | $ve_type = 'macron'; | 
| 426 |  |  |  |  |  |  | } | 
| 427 | 185 | 100 |  |  |  | 327 | if ($wapuro) { | 
| 428 | 158 |  |  |  |  | 237 | $ve_type = 'wapuro'; | 
| 429 |  |  |  |  |  |  | } | 
| 430 | 185 | 50 |  |  |  | 321 | if ($passport) { | 
| 431 | 0 |  |  |  |  | 0 | $hepburn = 1; | 
| 432 | 0 |  |  |  |  | 0 | $ve_type = 'passport'; | 
| 433 | 0 |  |  |  |  | 0 | $use_m = 1; | 
| 434 |  |  |  |  |  |  | } | 
| 435 | 185 | 100 |  |  |  | 351 | if ($options->{ve_type}) { | 
| 436 | 10 |  |  |  |  | 67 | $ve_type = $options->{ve_type}; | 
| 437 |  |  |  |  |  |  | } | 
| 438 | 185 | 50 |  |  |  | 400 | if (! $chouonhyouki{$ve_type}) { | 
| 439 | 0 |  |  |  |  | 0 | print STDERR "Warning: unrecognized long vowel type '$ve_type'\n"; | 
| 440 | 0 |  |  |  |  | 0 | $ve_type = 'circumflex'; | 
| 441 |  |  |  |  |  |  | } | 
| 442 | 185 |  |  |  |  | 244 | my $wo; | 
| 443 | 185 | 100 |  |  |  | 328 | if ($options->{wo}) { | 
| 444 | 1 |  |  |  |  | 3 | $wo = 1; | 
| 445 |  |  |  |  |  |  | } | 
| 446 |  |  |  |  |  |  | # Start of conversion | 
| 447 |  |  |  |  |  |  |  | 
| 448 |  |  |  |  |  |  | # 撥音 (ん) | 
| 449 | 185 |  |  |  |  | 870 | $input =~ s/ン(?=[$need_apostrophe])/n\'/g; | 
| 450 | 185 | 100 |  |  |  | 381 | if ($use_m) { | 
| 451 | 22 |  |  |  |  | 122 | $input =~ s/ン(?=[$need_m])/m/g; | 
| 452 |  |  |  |  |  |  | } | 
| 453 | 185 |  |  |  |  | 387 | $input =~ s/ン/n/g; | 
| 454 |  |  |  |  |  |  | # 促音 (っ) | 
| 455 | 185 | 100 |  |  |  | 320 | if ($hepburn) { | 
| 456 | 24 |  |  |  |  | 106 | $input =~ s/ッ([$hep_sok_list])/$hepburn_sokuon{$1}$1/g; | 
| 457 |  |  |  |  |  |  | } | 
| 458 | 185 |  |  |  |  | 742 | $input =~ s/ッ([$takes_sokuon])/$siin{$1}$1/g; | 
| 459 | 185 | 100 |  |  |  | 432 | if ($ve_type eq 'wapuro') { | 
| 460 | 167 |  |  |  |  | 267 | $input =~ s/ー/-/g; | 
| 461 |  |  |  |  |  |  | } | 
| 462 | 185 | 100 |  |  |  | 382 | if ($ve_type eq 'none') { | 
| 463 | 1 |  |  |  |  | 7 | $input =~ s/ー//g; | 
| 464 |  |  |  |  |  |  | } | 
| 465 |  |  |  |  |  |  | # 長音 (ー) | 
| 466 | 185 |  |  |  |  | 346 | for my $vowel (@aiueo) { | 
| 467 | 1110 |  |  |  |  | 2466 | my $ve = $chouonhyouki{$ve_type}->{$vowel}; | 
| 468 | 1110 |  |  |  |  | 1482 | my $vowelclass; | 
| 469 |  |  |  |  |  |  | my $vowel_kana; | 
| 470 | 1110 | 100 |  |  |  | 1938 | if ($vowel eq 'ou') { | 
| 471 | 185 |  |  |  |  | 289 | $vowelclass = $vowelclass{o}; | 
| 472 | 185 |  |  |  |  | 258 | $vowel_kana = 'ウ'; | 
| 473 |  |  |  |  |  |  | } | 
| 474 |  |  |  |  |  |  | else { | 
| 475 | 925 |  |  |  |  | 1297 | $vowelclass = $vowelclass{$vowel}; | 
| 476 | 925 |  |  |  |  | 1441 | $vowel_kana = $dan{$vowel}->[0]; | 
| 477 |  |  |  |  |  |  | } | 
| 478 |  |  |  |  |  |  | # 長音 (ー) + 拗音 (きょ) | 
| 479 | 1110 |  |  |  |  | 1528 | my $y = $youon{$vowel}; | 
| 480 | 1110 | 100 |  |  |  | 1949 | if ($y) { | 
| 481 | 740 | 100 |  |  |  | 1233 | if ($hepburn) { | 
| 482 | 96 |  |  |  |  | 1407 | $input =~ s/([$is_hepburn_youon])${y}[ー$vowel_kana]/$hepburn_youon{$1}$ve/g; | 
| 483 |  |  |  |  |  |  | } | 
| 484 | 740 |  |  |  |  | 12938 | $input =~ s/([$vowelclass{i}])${y}[ー$vowel_kana]/$siin{$1}y$ve/g; | 
| 485 |  |  |  |  |  |  | } | 
| 486 | 1110 | 100 | 100 |  |  | 2656 | if ($hepburn && $hep_vowel{$vowel}) { | 
| 487 | 48 |  |  |  |  | 582 | $input =~ s/([$hep_vowel{$vowel}])[ー$vowel_kana]/$hepburn{$1}$ve/g; | 
| 488 |  |  |  |  |  |  | } | 
| 489 | 1110 |  |  |  |  | 11293 | $input =~ s/${vowel_kana}[ー$vowel_kana]/$ve/g; | 
| 490 | 1110 |  |  |  |  | 16239 | $input =~ s/([$vowelclass])[ー$vowel_kana]/$siin{$1}$ve/g; | 
| 491 |  |  |  |  |  |  | } | 
| 492 |  |  |  |  |  |  | # 拗音 (きょ) | 
| 493 | 185 | 100 |  |  |  | 548 | if ($hepburn) { | 
|  |  | 100 |  |  |  |  |  | 
| 494 | 24 |  |  |  |  | 138 | $input =~ s/([$is_hepburn_youon])([$youon])/$hepburn_youon{$1}$boin{$2}/g; | 
| 495 |  |  |  |  |  |  | } | 
| 496 |  |  |  |  |  |  | elsif ($kunrei) { | 
| 497 | 1 |  |  |  |  | 15 | $input =~ s/([$is_kunrei_youon])([$youon])/$kunrei_youon{$1}y$boin{$2}/g; | 
| 498 |  |  |  |  |  |  | } | 
| 499 | 185 |  |  |  |  | 945 | $input =~ s/([$vowelclass{i}])([$youon])/$siin{$1}y$boin{$2}/g; | 
| 500 |  |  |  |  |  |  | # その他 | 
| 501 | 185 | 100 |  |  |  | 390 | if ($wo) { | 
| 502 | 1 |  |  |  |  | 6 | $input =~ s/ヲ/wo/g; | 
| 503 | 1 |  |  |  |  | 5 | $input =~ s/([アイウエオ])/$boin{$1}/g; | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  | else { | 
| 506 | 184 |  |  |  |  | 620 | $input =~ s/([アイウエオヲ])/$boin{$1}/g; | 
| 507 |  |  |  |  |  |  | } | 
| 508 | 185 |  |  |  |  | 824 | $input =~ s/([ァィゥェォ])/q$boin{$1}/g; | 
| 509 | 185 |  |  |  |  | 427 | $input =~ s/ヮ/xwa/g; | 
| 510 | 185 | 100 |  |  |  | 427 | if ($hepburn) { | 
|  |  | 100 |  |  |  |  |  | 
| 511 | 24 |  |  |  |  | 163 | $input =~ s/([$hep_list])/$hepburn{$1}$boin{$1}/g; | 
| 512 |  |  |  |  |  |  | } | 
| 513 |  |  |  |  |  |  | elsif ($kunrei) { | 
| 514 | 1 |  |  |  |  | 10 | $input =~ s/([$kun_list])/$kunrei{$1}$boin{$1}/g; | 
| 515 |  |  |  |  |  |  | } | 
| 516 | 185 |  |  |  |  | 1193 | $input =~ s/([カ-ヂツ-ヱヴ])/$siin{$1}$boin{$1}/g; | 
| 517 | 185 |  |  |  |  | 1285 | $input =~ s/q($vowel_re)/x$1/g; | 
| 518 | 185 | 100 |  |  |  | 453 | if ($common) { | 
| 519 |  |  |  |  |  |  | # Convert kana + small vowel into thingumibob, if there is a | 
| 520 |  |  |  |  |  |  | # consonant before. | 
| 521 | 6 |  |  |  |  | 225 | $input =~ s/([^\Waiueo])$vowel_re[x]($vowel_re)/$1$2/; | 
| 522 |  |  |  |  |  |  | # Convert u + small kana into w + vowel | 
| 523 | 6 |  |  |  |  | 135 | $input =~ s/($vowel_re|\b)ux([iue])/$1w$2/i; | 
| 524 |  |  |  |  |  |  | } | 
| 525 | 185 |  |  |  |  | 995 | return $input; | 
| 526 |  |  |  |  |  |  | } | 
| 527 |  |  |  |  |  |  |  | 
| 528 |  |  |  |  |  |  | sub romaji2hiragana | 
| 529 |  |  |  |  |  |  | { | 
| 530 | 53 |  |  | 53 | 1 | 1860 | my ($input, $options) = @_; | 
| 531 | 53 | 100 |  |  |  | 125 | if (! $options) { | 
| 532 | 50 |  |  |  |  | 97 | $options = {}; | 
| 533 |  |  |  |  |  |  | } | 
| 534 | 53 |  |  |  |  | 192 | my $katakana = romaji2kana ($input, {wapuro => 1, %$options}); | 
| 535 | 53 |  |  |  |  | 158 | return kata2hira ($katakana); | 
| 536 |  |  |  |  |  |  | } | 
| 537 |  |  |  |  |  |  |  | 
| 538 |  |  |  |  |  |  | sub romaji_styles | 
| 539 |  |  |  |  |  |  | { | 
| 540 | 1 |  |  | 1 | 1 | 4 | my ($check) = @_; | 
| 541 | 1 |  |  |  |  | 13 | my @styles = ( | 
| 542 |  |  |  |  |  |  | { | 
| 543 |  |  |  |  |  |  | abbrev    => "hepburn", | 
| 544 |  |  |  |  |  |  | full_name => "Hepburn", | 
| 545 |  |  |  |  |  |  | }, { | 
| 546 |  |  |  |  |  |  | abbrev    => 'nihon', | 
| 547 |  |  |  |  |  |  | full_name => 'Nihon-shiki', | 
| 548 |  |  |  |  |  |  | }, { | 
| 549 |  |  |  |  |  |  | abbrev    => 'kunrei', | 
| 550 |  |  |  |  |  |  | full_name => 'Kunrei-shiki', | 
| 551 |  |  |  |  |  |  | }, { | 
| 552 |  |  |  |  |  |  | abbrev => 'common', | 
| 553 |  |  |  |  |  |  | full_name => 'common', | 
| 554 |  |  |  |  |  |  | }); | 
| 555 | 1 | 50 |  |  |  | 4 | if (! defined ($check)) { | 
| 556 | 0 |  |  |  |  | 0 | return (@styles); | 
| 557 |  |  |  |  |  |  | } | 
| 558 |  |  |  |  |  |  | else { | 
| 559 | 1 |  |  |  |  | 3 | for my $style (@styles) { | 
| 560 | 2 | 100 |  |  |  | 8 | if ($check eq $style->{abbrev}) { | 
| 561 | 1 |  |  |  |  | 6 | return 1; | 
| 562 |  |  |  |  |  |  | } | 
| 563 |  |  |  |  |  |  | } | 
| 564 | 0 |  |  |  |  | 0 | return; | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  | } | 
| 567 |  |  |  |  |  |  |  | 
| 568 |  |  |  |  |  |  | my %styles = ( | 
| 569 |  |  |  |  |  |  | macron => 1, | 
| 570 |  |  |  |  |  |  | circumflex => 1, | 
| 571 |  |  |  |  |  |  | wapuro => 1, | 
| 572 |  |  |  |  |  |  | passport => 1, | 
| 573 |  |  |  |  |  |  | none => 1, | 
| 574 |  |  |  |  |  |  | ); | 
| 575 |  |  |  |  |  |  |  | 
| 576 |  |  |  |  |  |  | # Check whether this vowel style is allowed. | 
| 577 |  |  |  |  |  |  |  | 
| 578 |  |  |  |  |  |  | sub romaji_vowel_styles | 
| 579 |  |  |  |  |  |  | { | 
| 580 | 1 |  |  | 1 | 1 | 3 | my ($check) = @_; | 
| 581 | 1 | 50 |  |  |  | 4 | if (! defined ($check)) { | 
| 582 | 1 |  |  |  |  | 6 | return [keys %styles]; | 
| 583 |  |  |  |  |  |  | } | 
| 584 |  |  |  |  |  |  | else { | 
| 585 | 0 |  |  |  |  | 0 | return $styles{$check}; | 
| 586 |  |  |  |  |  |  | } | 
| 587 |  |  |  |  |  |  | } | 
| 588 |  |  |  |  |  |  |  | 
| 589 |  |  |  |  |  |  | my $romaji2katakana; | 
| 590 |  |  |  |  |  |  | my $romaji_regex; | 
| 591 |  |  |  |  |  |  |  | 
| 592 |  |  |  |  |  |  | my %longvowels; | 
| 593 |  |  |  |  |  |  | @longvowels{qw/â  î  û  ê  ô/}  = qw/aー iー uー eー oー/; | 
| 594 |  |  |  |  |  |  | @longvowels{qw/ā  ī  ū  ē  ō/}  = qw/aー iー uー eー oー/; | 
| 595 |  |  |  |  |  |  | my $longvowels = join '|', sort {length($a)<=>length($b)} keys %longvowels; | 
| 596 |  |  |  |  |  |  |  | 
| 597 |  |  |  |  |  |  | sub romaji2kana | 
| 598 |  |  |  |  |  |  | { | 
| 599 | 363 |  |  | 363 | 1 | 3050 | my ($input, $options) = @_; | 
| 600 | 363 | 100 |  |  |  | 824 | if (! defined $romaji2katakana) { | 
| 601 | 8 |  |  |  |  | 32 | $romaji2katakana = load_convertor ('romaji', 'katakana'); | 
| 602 | 8 |  |  |  |  | 400 | $romaji_regex = make_regex (keys %$romaji2katakana); | 
| 603 |  |  |  |  |  |  | } | 
| 604 |  |  |  |  |  |  | # Set to true if we want long o to be オウ rather than オー | 
| 605 | 363 |  |  |  |  | 13199 | my $wapuro; | 
| 606 |  |  |  |  |  |  | # Set to true if we want gumma to be ぐっま and onnna to be おんな. | 
| 607 |  |  |  |  |  |  | my $ime; | 
| 608 | 363 | 100 |  |  |  | 808 | if ($options) { | 
| 609 | 269 |  |  |  |  | 439 | $wapuro = $options->{wapuro}; | 
| 610 | 269 |  |  |  |  | 485 | $ime = $options->{ime}; | 
| 611 |  |  |  |  |  |  | } | 
| 612 |  |  |  |  |  |  |  | 
| 613 | 363 | 50 |  |  |  | 735 | if (! defined $input) { | 
| 614 | 0 |  |  |  |  | 0 | return; | 
| 615 |  |  |  |  |  |  | } | 
| 616 | 363 |  |  |  |  | 800 | $input = lc $input; | 
| 617 |  |  |  |  |  |  | # Deal with long vowels | 
| 618 | 363 | 100 |  |  |  | 12082 | if ($wapuro) { | 
| 619 | 269 |  |  |  |  | 616 | $input =~ s/[âā]/aa/g; | 
| 620 | 269 |  |  |  |  | 447 | $input =~ s/[îī]/ii/g; | 
| 621 | 269 |  |  |  |  | 379 | $input =~ s/[êē]/ee/g; | 
| 622 | 269 |  |  |  |  | 376 | $input =~ s/[ûū]/uu/g; | 
| 623 | 269 |  |  |  |  | 408 | $input =~ s/[ôō]/ou/g; | 
| 624 |  |  |  |  |  |  | } | 
| 625 |  |  |  |  |  |  | else { | 
| 626 | 94 |  |  |  |  | 635 | $input =~ s/($longvowels)/$longvowels{$1}/g; | 
| 627 |  |  |  |  |  |  | # Doubled vowels to chouon | 
| 628 | 94 |  |  |  |  | 295 | $input =~ s/([aiueo])\1/$1ー/g; | 
| 629 |  |  |  |  |  |  | } | 
| 630 |  |  |  |  |  |  | # Deal with double consonants | 
| 631 |  |  |  |  |  |  | # danna -> だんな | 
| 632 | 363 | 100 |  |  |  | 623 | if ($ime) { | 
| 633 |  |  |  |  |  |  | # IME romaji rules: | 
| 634 |  |  |  |  |  |  | # Allow double n for ん: | 
| 635 |  |  |  |  |  |  | # gunnma -> グンマ, dannna -> ダンナ | 
| 636 | 3 |  |  |  |  | 22 | $input =~ s/n{1,2}(?=[nm][aiueo])/ン/g; | 
| 637 |  |  |  |  |  |  | # Substitute sokuon for mm + vowel: | 
| 638 |  |  |  |  |  |  | # gumma -> グッマ | 
| 639 | 3 |  |  |  |  | 18 | $input =~ s/m(?=[nm][aiueo])/ッ/g; | 
| 640 |  |  |  |  |  |  | } | 
| 641 |  |  |  |  |  |  | else { | 
| 642 |  |  |  |  |  |  | # Usual romaji rules: Don't allow double n for ん, change | 
| 643 |  |  |  |  |  |  | # gumma to グンマ. | 
| 644 | 360 |  |  |  |  | 597 | $input =~ s/[nm](?=[nm][aiueo])/ン/g; | 
| 645 |  |  |  |  |  |  | } | 
| 646 |  |  |  |  |  |  | # shimbun -> しんぶん | 
| 647 | 363 |  |  |  |  | 695 | $input =~ s/m(?=[pb]y?[aiueo])/ン/g; | 
| 648 |  |  |  |  |  |  | # tcha -> っちゃ | 
| 649 | 363 |  |  |  |  | 720 | $input =~ s/t(?=ch[aiueo])/ッ/g; | 
| 650 |  |  |  |  |  |  | # ccha -> っちゃ | 
| 651 | 363 |  |  |  |  | 531 | $input =~ s/c(?=ch[aiueo])/ッ/g; | 
| 652 |  |  |  |  |  |  | # kkya -> っきゃ etc. | 
| 653 | 363 |  |  |  |  | 985 | $input =~ s/([kstfhmrgzdbpjqvwy])(?=\1y?[aiueo])/ッ/g; | 
| 654 |  |  |  |  |  |  | # kkya -> っきゃ etc. | 
| 655 | 363 |  |  |  |  | 622 | $input =~ s/ttsu/ッツ/g; | 
| 656 |  |  |  |  |  |  | # xtsu -> っ | 
| 657 | 363 |  |  |  |  | 486 | $input =~ s/xtsu/ッ/g; | 
| 658 |  |  |  |  |  |  | # ssha -> っしゃ | 
| 659 | 363 |  |  |  |  | 608 | $input =~ s/([s])(?=\1h[aiueo])/ッ/g; | 
| 660 |  |  |  |  |  |  | # Passport romaji, | 
| 661 |  |  |  |  |  |  | # oh{consonant} -> oo | 
| 662 | 363 | 100 |  |  |  | 637 | if (! $ime) { | 
| 663 |  |  |  |  |  |  | # IMEs do not recognize passport romaji. | 
| 664 | 360 | 100 |  |  |  | 620 | if ($wapuro) { | 
| 665 | 266 |  |  |  |  | 409 | $input =~ s/oh(?=[ksthmrgzdbp])/オウ/g; | 
| 666 |  |  |  |  |  |  | } | 
| 667 |  |  |  |  |  |  | else { | 
| 668 | 94 |  |  |  |  | 160 | $input =~ s/oh(?=[ksthmrgzdbp])/オー/g; | 
| 669 |  |  |  |  |  |  | } | 
| 670 |  |  |  |  |  |  | } | 
| 671 |  |  |  |  |  |  | # All the special cases have been dealt with, now substitute all | 
| 672 |  |  |  |  |  |  | # the kana. | 
| 673 | 363 |  |  |  |  | 8589 | $input =~ s/($romaji_regex)/$romaji2katakana->{$1}/g; | 
| 674 | 363 |  |  |  |  | 1558 | return $input; | 
| 675 |  |  |  |  |  |  | } | 
| 676 |  |  |  |  |  |  |  | 
| 677 |  |  |  |  |  |  | sub is_voiced | 
| 678 |  |  |  |  |  |  | { | 
| 679 | 2 |  |  | 2 | 1 | 6 | my ($sound) = @_; | 
| 680 | 2 | 50 |  |  |  | 5 | if (is_kana ($sound)) { | 
|  |  | 0 |  |  |  |  |  | 
| 681 | 2 |  |  |  |  | 7 | $sound = kana2romaji ($sound); | 
| 682 |  |  |  |  |  |  | } | 
| 683 |  |  |  |  |  |  | elsif (my $romaji = is_romaji ($sound)) { | 
| 684 |  |  |  |  |  |  | # Normalize to nihon shiki so that we don't have to worry | 
| 685 |  |  |  |  |  |  | # about ch, j, ts, etc. at the start of the sound. | 
| 686 | 0 |  |  |  |  | 0 | $sound = $romaji; | 
| 687 |  |  |  |  |  |  | } | 
| 688 | 2 | 100 |  |  |  | 8 | if ($sound =~ /^[aiueogzbpmnry]/) { | 
| 689 | 1 |  |  |  |  | 5 | return 1; | 
| 690 |  |  |  |  |  |  | } | 
| 691 |  |  |  |  |  |  | else { | 
| 692 | 1 |  |  |  |  | 4 | return undef; | 
| 693 |  |  |  |  |  |  | } | 
| 694 |  |  |  |  |  |  | } | 
| 695 |  |  |  |  |  |  |  | 
| 696 |  |  |  |  |  |  | sub is_romaji | 
| 697 |  |  |  |  |  |  | { | 
| 698 | 218 |  |  | 218 | 1 | 4218 | my ($romaji) = @_; | 
| 699 | 218 | 50 |  |  |  | 588 | if (length ($romaji) == 0) { | 
| 700 | 0 |  |  |  |  | 0 | return undef; | 
| 701 |  |  |  |  |  |  | } | 
| 702 |  |  |  |  |  |  | # Test that $romaji contains only characters which may be | 
| 703 |  |  |  |  |  |  | # romanized Japanese. | 
| 704 | 218 | 100 |  |  |  | 910 | if ($romaji =~ /[^\sa-zāīūēōâîûêô'-]|^-/i) { | 
| 705 | 4 |  |  |  |  | 15 | return undef; | 
| 706 |  |  |  |  |  |  | } | 
| 707 | 214 |  |  |  |  | 673 | my $kana = romaji2kana ($romaji, {wapuro => 1}); | 
| 708 | 214 | 100 |  |  |  | 1172 | if ($kana =~ /^[ア-ンッー\s]+$/) { | 
| 709 | 143 |  |  |  |  | 532 | return kana2romaji ($kana, {wapuro => 1}); | 
| 710 |  |  |  |  |  |  | } | 
| 711 | 71 |  |  |  |  | 190 | return undef; | 
| 712 |  |  |  |  |  |  | } | 
| 713 |  |  |  |  |  |  |  | 
| 714 |  |  |  |  |  |  |  | 
| 715 |  |  |  |  |  |  | sub is_romaji_semistrict | 
| 716 |  |  |  |  |  |  | { | 
| 717 | 94 |  |  | 94 | 1 | 201 | my ($romaji) = @_; | 
| 718 | 94 | 100 |  |  |  | 204 | if (! is_romaji ($romaji)) { | 
| 719 | 34 |  |  |  |  | 133 | return undef; | 
| 720 |  |  |  |  |  |  | } | 
| 721 | 60 | 100 |  |  |  | 812 | if ($romaji =~ / | 
| 722 |  |  |  |  |  |  | # Don't allow small vowels, small tsu, or fya, | 
| 723 |  |  |  |  |  |  | # fye etc. | 
| 724 |  |  |  |  |  |  | (fy|l|x|v)y?($vowel_re|ts?u|wa|ka|ke) | 
| 725 |  |  |  |  |  |  | | | 
| 726 |  |  |  |  |  |  | # Don't allow hyi, hye, yi, ye. | 
| 727 |  |  |  |  |  |  | [zh]?y[ieêîē] | 
| 728 |  |  |  |  |  |  | | | 
| 729 |  |  |  |  |  |  | # Don't allow tye | 
| 730 |  |  |  |  |  |  | ty[eêē] | 
| 731 |  |  |  |  |  |  | | | 
| 732 |  |  |  |  |  |  | # Don't allow wh-, kw-, gw-, dh-, etc. | 
| 733 |  |  |  |  |  |  | (wh|kw|gw|dh|thy)$vowel_re | 
| 734 |  |  |  |  |  |  | | | 
| 735 |  |  |  |  |  |  | # Don't allow "t'i" | 
| 736 |  |  |  |  |  |  | [dt]'(i|y?$u_re) | 
| 737 |  |  |  |  |  |  | | | 
| 738 |  |  |  |  |  |  | # Don't allow dwu, twu | 
| 739 |  |  |  |  |  |  | [dt](w$u_re) | 
| 740 |  |  |  |  |  |  | | | 
| 741 |  |  |  |  |  |  | hwy$u_re | 
| 742 |  |  |  |  |  |  | | | 
| 743 |  |  |  |  |  |  | # Don't allow "wi" or "we". | 
| 744 |  |  |  |  |  |  | w(i|e) | 
| 745 |  |  |  |  |  |  | | | 
| 746 |  |  |  |  |  |  | # Don't allow some non-Japanese double consonants. | 
| 747 |  |  |  |  |  |  | (?:rr|yy) | 
| 748 |  |  |  |  |  |  | | | 
| 749 |  |  |  |  |  |  | # Don't allow 'thi' | 
| 750 |  |  |  |  |  |  | thi | 
| 751 |  |  |  |  |  |  | /ix) { | 
| 752 | 51 |  |  |  |  | 328 | return undef; | 
| 753 |  |  |  |  |  |  | } | 
| 754 | 9 |  |  |  |  | 53 | return 1; | 
| 755 |  |  |  |  |  |  | } | 
| 756 |  |  |  |  |  |  |  | 
| 757 |  |  |  |  |  |  | sub is_romaji_strict | 
| 758 |  |  |  |  |  |  | { | 
| 759 | 107 |  |  | 107 | 1 | 40084 | my ($romaji) = @_; | 
| 760 | 107 |  |  |  |  | 236 | my $canonical = is_romaji ($romaji); | 
| 761 | 107 | 100 |  |  |  | 238 | if (! $canonical) { | 
| 762 | 34 |  |  |  |  | 151 | return undef; | 
| 763 |  |  |  |  |  |  | } | 
| 764 | 73 |  |  |  |  | 146 | my $kana = romaji2kana ($romaji); | 
| 765 | 73 | 100 |  |  |  | 312 | if ($kana =~ m! | 
| 766 |  |  |  |  |  |  | # Don't allow tanggono | 
| 767 |  |  |  |  |  |  | ンッ | 
| 768 |  |  |  |  |  |  | | | 
| 769 |  |  |  |  |  |  | # Don't allow "nmichi". | 
| 770 |  |  |  |  |  |  | ^ン | 
| 771 |  |  |  |  |  |  | | | 
| 772 |  |  |  |  |  |  | # Don't allow ffun etc. | 
| 773 |  |  |  |  |  |  | ^ッ | 
| 774 |  |  |  |  |  |  | !x) { | 
| 775 | 6 |  |  |  |  | 32 | return undef; | 
| 776 |  |  |  |  |  |  | } | 
| 777 | 67 | 100 |  |  |  | 1249 | if ($romaji =~ m! | 
| 778 |  |  |  |  |  |  | (fy|l|x|v)y?($vowel_re|ts?u|wa|ka|ke) | 
| 779 |  |  |  |  |  |  | | | 
| 780 |  |  |  |  |  |  | # Don't allow hyi, hye, yi, ye. | 
| 781 |  |  |  |  |  |  | [zh]?y[ieêîē] | 
| 782 |  |  |  |  |  |  | | | 
| 783 |  |  |  |  |  |  | # Don't allow tye | 
| 784 |  |  |  |  |  |  | ty[eêē] | 
| 785 |  |  |  |  |  |  | | | 
| 786 |  |  |  |  |  |  | # Don't allow wh-, kw-, gw-, dh-, etc. | 
| 787 |  |  |  |  |  |  | (wh|kw|gw|dh|thy)$vowel_re | 
| 788 |  |  |  |  |  |  | | | 
| 789 |  |  |  |  |  |  | # Don't allow tsa, tsi, tse, tso, fa, fe, fi, fo. | 
| 790 |  |  |  |  |  |  | (ts|f)$no_u_vowel_re | 
| 791 |  |  |  |  |  |  | | | 
| 792 |  |  |  |  |  |  | # Don't allow "t'i" | 
| 793 |  |  |  |  |  |  | [dt]'(i|y?$u_re) | 
| 794 |  |  |  |  |  |  | | | 
| 795 |  |  |  |  |  |  | # Don't allow dwu, twu | 
| 796 |  |  |  |  |  |  | [dt](w$u_re) | 
| 797 |  |  |  |  |  |  | | | 
| 798 |  |  |  |  |  |  | hwy$u_re | 
| 799 |  |  |  |  |  |  | | | 
| 800 |  |  |  |  |  |  | # Don't allow "wi" or "we". | 
| 801 |  |  |  |  |  |  | w(i|e) | 
| 802 |  |  |  |  |  |  | | | 
| 803 |  |  |  |  |  |  | # Don't allow 'je', 'che', 'she' | 
| 804 |  |  |  |  |  |  | (?:[cs]h|j)e | 
| 805 |  |  |  |  |  |  | | | 
| 806 |  |  |  |  |  |  | # Don't allow some non-Japanese double consonants. | 
| 807 |  |  |  |  |  |  | (?:rr|yy) | 
| 808 |  |  |  |  |  |  | | | 
| 809 |  |  |  |  |  |  | # Don't allow 'thi'/'thu' | 
| 810 |  |  |  |  |  |  | th[iu] | 
| 811 |  |  |  |  |  |  | | | 
| 812 |  |  |  |  |  |  | # Don't allow 'johann' | 
| 813 |  |  |  |  |  |  | nn$ | 
| 814 |  |  |  |  |  |  | | | 
| 815 |  |  |  |  |  |  | # Don't allow 'ridzuan' etc. | 
| 816 |  |  |  |  |  |  | dz | 
| 817 |  |  |  |  |  |  | | | 
| 818 |  |  |  |  |  |  | # Qs are out. | 
| 819 |  |  |  |  |  |  | q | 
| 820 |  |  |  |  |  |  | | | 
| 821 |  |  |  |  |  |  | # Double ws, hs, etc. are out | 
| 822 |  |  |  |  |  |  | ww|hh|bb | 
| 823 |  |  |  |  |  |  | | | 
| 824 |  |  |  |  |  |  | # This is allowed by IMEs as "ちゃ" etc. | 
| 825 |  |  |  |  |  |  | cy | 
| 826 |  |  |  |  |  |  | !ix) { | 
| 827 | 64 |  |  |  |  | 415 | return undef; | 
| 828 |  |  |  |  |  |  | } | 
| 829 | 3 |  |  |  |  | 34 | return $canonical; | 
| 830 |  |  |  |  |  |  | } | 
| 831 |  |  |  |  |  |  |  | 
| 832 |  |  |  |  |  |  | sub hira2kata | 
| 833 |  |  |  |  |  |  | { | 
| 834 | 200 |  |  | 200 | 1 | 1533 | my (@input) = @_; | 
| 835 | 200 | 50 |  |  |  | 470 | if (!@input) { | 
| 836 | 0 |  |  |  |  | 0 | return; | 
| 837 |  |  |  |  |  |  | } | 
| 838 | 200 |  |  |  |  | 434 | for (@input) { | 
| 839 | 200 | 50 |  |  |  | 373 | if ($_) { | 
| 840 | 200 |  |  |  |  | 967 | tr/ぁ-んゔ/ァ-ンヴ/; | 
| 841 |  |  |  |  |  |  | } | 
| 842 |  |  |  |  |  |  | } | 
| 843 | 200 | 50 |  |  |  | 768 | return wantarray ? @input : "@input"; | 
| 844 |  |  |  |  |  |  | } | 
| 845 |  |  |  |  |  |  |  | 
| 846 |  |  |  |  |  |  | sub kata2hira | 
| 847 |  |  |  |  |  |  | { | 
| 848 | 57 |  |  | 57 | 1 | 175 | my (@input) = @_; | 
| 849 | 57 |  |  |  |  | 120 | for (@input) {tr/ァ-ンヴ/ぁ-んゔ/} | 
|  | 57 |  |  |  |  | 288 |  | 
| 850 | 57 | 50 |  |  |  | 391 | return wantarray ? @input : "@input"; | 
| 851 |  |  |  |  |  |  | } | 
| 852 |  |  |  |  |  |  |  | 
| 853 |  |  |  |  |  |  | # Make the list of dakuon stuff. | 
| 854 |  |  |  |  |  |  |  | 
| 855 |  |  |  |  |  |  | sub make_dak_list | 
| 856 |  |  |  |  |  |  | { | 
| 857 | 0 |  |  | 0 | 0 | 0 | my @dak_list; | 
| 858 | 0 |  |  |  |  | 0 | for (@_) { | 
| 859 | 0 |  |  |  |  | 0 | push @dak_list, @{$gyou{$_}}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 860 | 0 |  |  |  |  | 0 | push @dak_list, hira2kata (@{$gyou{$_}}); | 
|  | 0 |  |  |  |  | 0 |  | 
| 861 |  |  |  |  |  |  | } | 
| 862 | 0 |  |  |  |  | 0 | return @dak_list; | 
| 863 |  |  |  |  |  |  | } | 
| 864 |  |  |  |  |  |  |  | 
| 865 |  |  |  |  |  |  | sub load_kana2hw2 | 
| 866 |  |  |  |  |  |  | { | 
| 867 | 3 |  |  | 3 | 0 | 19 | my $conv = Convert::Moji->new (["oneway", "tr", "あ-ん", "ア-ン"], | 
| 868 |  |  |  |  |  |  | ["file", | 
| 869 |  |  |  |  |  |  | getdistfile ("katakana2hw_katakana")]); | 
| 870 | 3 |  |  |  |  | 17201 | return $conv; | 
| 871 |  |  |  |  |  |  | } | 
| 872 |  |  |  |  |  |  |  | 
| 873 |  |  |  |  |  |  | my $kata2hw; | 
| 874 |  |  |  |  |  |  |  | 
| 875 |  |  |  |  |  |  | sub make_kata2hw | 
| 876 |  |  |  |  |  |  | { | 
| 877 | 1 | 50 |  | 1 | 0 | 4 | if (!$kata2hw) { | 
| 878 | 0 |  |  |  |  | 0 | $kata2hw = make_convertors ('katakana','hw_katakana'); | 
| 879 |  |  |  |  |  |  | } | 
| 880 |  |  |  |  |  |  | } | 
| 881 |  |  |  |  |  |  |  | 
| 882 |  |  |  |  |  |  | my $kana2hw; | 
| 883 |  |  |  |  |  |  |  | 
| 884 |  |  |  |  |  |  | sub kana2hw | 
| 885 |  |  |  |  |  |  | { | 
| 886 | 3 |  |  | 3 | 1 | 973 | my ($input) = @_; | 
| 887 | 3 | 50 |  |  |  | 14 | if (! $kana2hw) { | 
| 888 | 3 |  |  |  |  | 11 | $kana2hw = load_kana2hw2 (); | 
| 889 |  |  |  |  |  |  | } | 
| 890 | 3 |  |  |  |  | 17 | return $kana2hw->convert ($input); | 
| 891 |  |  |  |  |  |  | } | 
| 892 |  |  |  |  |  |  |  | 
| 893 |  |  |  |  |  |  | sub katakana2hw | 
| 894 |  |  |  |  |  |  | { | 
| 895 | 1 |  |  | 1 | 1 | 546 | my ($input) = @_; | 
| 896 | 1 |  |  |  |  | 6 | make_kata2hw (); | 
| 897 | 1 |  |  |  |  | 5 | return $kata2hw->convert ($input); | 
| 898 |  |  |  |  |  |  | } | 
| 899 |  |  |  |  |  |  |  | 
| 900 |  |  |  |  |  |  | sub hw2katakana | 
| 901 |  |  |  |  |  |  | { | 
| 902 | 8 |  |  | 8 | 1 | 3798 | my ($input) = @_; | 
| 903 | 8 | 100 |  |  |  | 30 | if (!$kata2hw) { | 
| 904 | 5 |  |  |  |  | 23 | $kata2hw = make_convertors ('katakana','hw_katakana'); | 
| 905 |  |  |  |  |  |  | } | 
| 906 | 8 |  |  |  |  | 41 | return $kata2hw->invert ($input); | 
| 907 |  |  |  |  |  |  | } | 
| 908 |  |  |  |  |  |  |  | 
| 909 |  |  |  |  |  |  | sub InHankakuKatakana | 
| 910 |  |  |  |  |  |  | { | 
| 911 | 12 |  |  | 12 | 1 | 3035 | return <<'END'; | 
| 912 |  |  |  |  |  |  | +utf8::Katakana | 
| 913 |  |  |  |  |  |  | &utf8::InHalfwidthAndFullwidthForms | 
| 914 |  |  |  |  |  |  | END | 
| 915 |  |  |  |  |  |  | } | 
| 916 |  |  |  |  |  |  |  | 
| 917 |  |  |  |  |  |  | # The two lists in wide2ascii and ascii2wide have exactly the same | 
| 918 |  |  |  |  |  |  | # length. | 
| 919 |  |  |  |  |  |  | # | 
| 920 |  |  |  |  |  |  | # The warnings produced by Perl versions later than 22 are bugs in | 
| 921 |  |  |  |  |  |  | # Perl: | 
| 922 |  |  |  |  |  |  | # | 
| 923 |  |  |  |  |  |  | # https://rt.perl.org/Public/Bug/Display.html?id=125493 | 
| 924 |  |  |  |  |  |  | # | 
| 925 |  |  |  |  |  |  | # To save problems for users, switch off warnings in these routines. | 
| 926 |  |  |  |  |  |  | # | 
| 927 |  |  |  |  |  |  | # I have no idea what command to use to switch off just the | 
| 928 |  |  |  |  |  |  | # "Replacement list is longer than search list" warning and leave the | 
| 929 |  |  |  |  |  |  | # others intact. | 
| 930 |  |  |  |  |  |  |  | 
| 931 | 24 |  |  | 24 |  | 725873 | no warnings 'misc'; | 
|  | 24 |  |  |  |  | 62 |  | 
|  | 24 |  |  |  |  | 2394 |  | 
| 932 |  |  |  |  |  |  |  | 
| 933 |  |  |  |  |  |  | sub wide2ascii | 
| 934 |  |  |  |  |  |  | { | 
| 935 | 4 |  |  | 4 | 1 | 1576 | my ($input) = @_; | 
| 936 | 4 |  |  |  |  | 174 | $input =~ tr/\x{3000}\x{FF01}-\x{FF5E}/ -~/; | 
| 937 | 4 |  |  |  |  | 18 | return $input; | 
| 938 |  |  |  |  |  |  | } | 
| 939 |  |  |  |  |  |  |  | 
| 940 |  |  |  |  |  |  | sub ascii2wide | 
| 941 |  |  |  |  |  |  | { | 
| 942 | 2 |  |  | 2 | 1 | 13907 | my ($input) = @_; | 
| 943 | 2 |  |  |  |  | 67 | $input =~ tr/ -~/\x{3000}\x{FF01}-\x{FF5E}/; | 
| 944 | 2 |  |  |  |  | 7 | return $input; | 
| 945 |  |  |  |  |  |  | } | 
| 946 |  |  |  |  |  |  |  | 
| 947 | 24 |  |  | 24 |  | 65702 | use warnings; | 
|  | 24 |  |  |  |  | 50 |  | 
|  | 24 |  |  |  |  | 2971 |  | 
| 948 |  |  |  |  |  |  |  | 
| 949 |  |  |  |  |  |  | sub InWideAscii | 
| 950 |  |  |  |  |  |  | { | 
| 951 | 4 |  |  | 4 | 1 | 857 | return <<'END'; | 
| 952 |  |  |  |  |  |  | FF01 FF5E | 
| 953 |  |  |  |  |  |  | 3000 | 
| 954 |  |  |  |  |  |  | END | 
| 955 |  |  |  |  |  |  | } | 
| 956 |  |  |  |  |  |  |  | 
| 957 |  |  |  |  |  |  | my $kana2morse; | 
| 958 |  |  |  |  |  |  |  | 
| 959 |  |  |  |  |  |  | sub load_kana2morse | 
| 960 |  |  |  |  |  |  | { | 
| 961 | 2 | 100 |  | 2 | 0 | 7 | if (!$kana2morse) { | 
| 962 | 1 |  |  |  |  | 5 | $kana2morse = make_convertors ('katakana', 'morse'); | 
| 963 |  |  |  |  |  |  | } | 
| 964 |  |  |  |  |  |  | } | 
| 965 |  |  |  |  |  |  |  | 
| 966 |  |  |  |  |  |  | sub kana2morse | 
| 967 |  |  |  |  |  |  | { | 
| 968 | 1 |  |  | 1 | 1 | 495 | my ($input) = @_; | 
| 969 | 1 |  |  |  |  | 4 | load_kana2morse; | 
| 970 | 1 |  |  |  |  | 3 | $input = hira2kata ($input); | 
| 971 | 1 |  |  |  |  | 9 | $input =~ tr/ァィゥェォャュョッ/アイウエオヤユヨツ/; | 
| 972 | 1 |  |  |  |  | 4 | $input = split_sound_marks ($input); | 
| 973 | 1 |  |  |  |  | 6 | $input = join ' ', (split '', $input); | 
| 974 | 1 |  |  |  |  | 5 | $input = $kana2morse->convert ($input); | 
| 975 | 1 |  |  |  |  | 5 | return $input; | 
| 976 |  |  |  |  |  |  | } | 
| 977 |  |  |  |  |  |  |  | 
| 978 |  |  |  |  |  |  |  | 
| 979 |  |  |  |  |  |  | sub getdistfile | 
| 980 |  |  |  |  |  |  | { | 
| 981 | 31 |  |  | 31 | 0 | 79 | my ($filename) = @_; | 
| 982 | 31 |  |  |  |  | 74 | my $dir = __FILE__; | 
| 983 | 31 |  |  |  |  | 199 | $dir =~ s!\.pm$!/!; | 
| 984 | 31 |  |  |  |  | 98 | my $file = "$dir$filename.txt"; | 
| 985 | 31 |  |  |  |  | 155 | return $file; | 
| 986 |  |  |  |  |  |  | } | 
| 987 |  |  |  |  |  |  |  | 
| 988 |  |  |  |  |  |  | sub morse2kana | 
| 989 |  |  |  |  |  |  | { | 
| 990 | 1 |  |  | 1 | 1 | 450 | my ($input) = @_; | 
| 991 | 1 |  |  |  |  | 3 | load_kana2morse; | 
| 992 | 1 |  |  |  |  | 7 | my @input = split ' ',$input; | 
| 993 | 1 |  |  |  |  | 3 | for (@input) { | 
| 994 | 7 |  |  |  |  | 15 | $_ = $kana2morse->invert ($_); | 
| 995 |  |  |  |  |  |  | } | 
| 996 | 1 |  |  |  |  | 4 | $input = join '', @input; | 
| 997 | 1 |  |  |  |  | 4 | $input = join_sound_marks ($input); | 
| 998 | 1 |  |  |  |  | 3 | return $input; | 
| 999 |  |  |  |  |  |  | } | 
| 1000 |  |  |  |  |  |  |  | 
| 1001 |  |  |  |  |  |  | my $kana2braille; | 
| 1002 |  |  |  |  |  |  |  | 
| 1003 |  |  |  |  |  |  | sub load_kana2braille | 
| 1004 |  |  |  |  |  |  | { | 
| 1005 | 2 | 100 |  | 2 | 0 | 7 | if (!$kana2braille) { | 
| 1006 | 1 |  |  |  |  | 3 | $kana2braille = make_convertors ('katakana', 'braille'); | 
| 1007 |  |  |  |  |  |  | } | 
| 1008 |  |  |  |  |  |  | } | 
| 1009 |  |  |  |  |  |  |  | 
| 1010 |  |  |  |  |  |  | my %nippon2kana; | 
| 1011 |  |  |  |  |  |  |  | 
| 1012 |  |  |  |  |  |  | for my $k (keys %gyou) { | 
| 1013 |  |  |  |  |  |  | for my $ar (@{$gyou{$k}}) { | 
| 1014 |  |  |  |  |  |  | my $vowel = $boin{$ar}; | 
| 1015 |  |  |  |  |  |  | my $nippon = $k.$vowel; | 
| 1016 |  |  |  |  |  |  | $nippon2kana{$nippon} = $ar; | 
| 1017 |  |  |  |  |  |  | } | 
| 1018 |  |  |  |  |  |  | } | 
| 1019 |  |  |  |  |  |  |  | 
| 1020 |  |  |  |  |  |  | sub is_kana | 
| 1021 |  |  |  |  |  |  | { | 
| 1022 | 7 |  |  | 7 | 1 | 283 | my ($may_be_kana) = @_; | 
| 1023 | 7 | 100 |  |  |  | 56 | if ($may_be_kana =~ /^[あ-んア-ン]+$/) { | 
| 1024 | 5 |  |  |  |  | 17 | return 1; | 
| 1025 |  |  |  |  |  |  | } | 
| 1026 | 2 |  |  |  |  | 11 | return; | 
| 1027 |  |  |  |  |  |  | } | 
| 1028 |  |  |  |  |  |  |  | 
| 1029 |  |  |  |  |  |  | sub is_hiragana | 
| 1030 |  |  |  |  |  |  | { | 
| 1031 | 2 |  |  | 2 | 1 | 4 | my ($may_be_kana) = @_; | 
| 1032 | 2 | 100 |  |  |  | 15 | if ($may_be_kana =~ /^[あ-ん]+$/) { | 
| 1033 | 1 |  |  |  |  | 4 | return 1; | 
| 1034 |  |  |  |  |  |  | } | 
| 1035 | 1 |  |  |  |  | 15 | return; | 
| 1036 |  |  |  |  |  |  | } | 
| 1037 |  |  |  |  |  |  |  | 
| 1038 |  |  |  |  |  |  | my %daku2not = (qw/ | 
| 1039 |  |  |  |  |  |  | が か | 
| 1040 |  |  |  |  |  |  | ぎ き | 
| 1041 |  |  |  |  |  |  | ぐ く | 
| 1042 |  |  |  |  |  |  | げ け | 
| 1043 |  |  |  |  |  |  | ご こ | 
| 1044 |  |  |  |  |  |  | だ た | 
| 1045 |  |  |  |  |  |  | ぢ ち | 
| 1046 |  |  |  |  |  |  | づ つ | 
| 1047 |  |  |  |  |  |  | で て | 
| 1048 |  |  |  |  |  |  | ど と | 
| 1049 |  |  |  |  |  |  | ざ さ | 
| 1050 |  |  |  |  |  |  | じ し | 
| 1051 |  |  |  |  |  |  | ず す | 
| 1052 |  |  |  |  |  |  | ぜ せ | 
| 1053 |  |  |  |  |  |  | ぞ そ | 
| 1054 |  |  |  |  |  |  | ば は | 
| 1055 |  |  |  |  |  |  | び ひ | 
| 1056 |  |  |  |  |  |  | ぶ ふ | 
| 1057 |  |  |  |  |  |  | べ へ | 
| 1058 |  |  |  |  |  |  | ぼ ほ | 
| 1059 |  |  |  |  |  |  | ガ カ | 
| 1060 |  |  |  |  |  |  | ギ キ | 
| 1061 |  |  |  |  |  |  | グ ク | 
| 1062 |  |  |  |  |  |  | ゲ ケ | 
| 1063 |  |  |  |  |  |  | ゴ コ | 
| 1064 |  |  |  |  |  |  | ダ タ | 
| 1065 |  |  |  |  |  |  | ヂ チ | 
| 1066 |  |  |  |  |  |  | ヅ ツ | 
| 1067 |  |  |  |  |  |  | デ テ | 
| 1068 |  |  |  |  |  |  | ド ト | 
| 1069 |  |  |  |  |  |  | ザ サ | 
| 1070 |  |  |  |  |  |  | ジ シ | 
| 1071 |  |  |  |  |  |  | ズ ス | 
| 1072 |  |  |  |  |  |  | ゼ セ | 
| 1073 |  |  |  |  |  |  | ゾ ソ | 
| 1074 |  |  |  |  |  |  | バ ハ | 
| 1075 |  |  |  |  |  |  | ビ ヒ | 
| 1076 |  |  |  |  |  |  | ブ フ | 
| 1077 |  |  |  |  |  |  | ベ ヘ | 
| 1078 |  |  |  |  |  |  | ボ ホ | 
| 1079 |  |  |  |  |  |  | /); | 
| 1080 |  |  |  |  |  |  |  | 
| 1081 |  |  |  |  |  |  | my %not2daku = reverse %daku2not; | 
| 1082 |  |  |  |  |  |  |  | 
| 1083 |  |  |  |  |  |  | my $daku = qr![がぎぐげごだぢづでどざじずぜぞばびぶべぼガギグゲゴダヂヅデドザジズゼゾバビブベボ]!; | 
| 1084 |  |  |  |  |  |  |  | 
| 1085 |  |  |  |  |  |  | my $nodaku = qr![かきくけこたしつてとさしすせそはひふへほカキクケコタシツテトサシスセソハヒフヘホ]!; | 
| 1086 |  |  |  |  |  |  |  | 
| 1087 |  |  |  |  |  |  | my %handaku2not = (qw! | 
| 1088 |  |  |  |  |  |  | ぱ は | 
| 1089 |  |  |  |  |  |  | ぴ ひ | 
| 1090 |  |  |  |  |  |  | ぷ ふ | 
| 1091 |  |  |  |  |  |  | ぺ へ | 
| 1092 |  |  |  |  |  |  | ぽ ほ | 
| 1093 |  |  |  |  |  |  | パ ハ | 
| 1094 |  |  |  |  |  |  | ピ ヒ | 
| 1095 |  |  |  |  |  |  | プ フ | 
| 1096 |  |  |  |  |  |  | ペ ヘ | 
| 1097 |  |  |  |  |  |  | ポ ホ | 
| 1098 |  |  |  |  |  |  | !); | 
| 1099 |  |  |  |  |  |  |  | 
| 1100 |  |  |  |  |  |  | my %not2handaku = reverse %handaku2not; | 
| 1101 |  |  |  |  |  |  |  | 
| 1102 |  |  |  |  |  |  | my $handaku = qr![ぱぴぷぺぽパピプペポ]!; | 
| 1103 |  |  |  |  |  |  |  | 
| 1104 |  |  |  |  |  |  | my $nohandaku = qr![はひふへほハヒフヘホ]!; | 
| 1105 |  |  |  |  |  |  |  | 
| 1106 |  |  |  |  |  |  | sub join_sound_marks | 
| 1107 |  |  |  |  |  |  | { | 
| 1108 | 202 |  |  | 202 | 1 | 13456 | my ($input) = @_; | 
| 1109 | 202 |  |  |  |  | 1409 | $input =~ s!($nohandaku)(゚|゜)!$not2handaku{$1}!g; | 
| 1110 | 202 |  |  |  |  | 1187 | $input =~ s!($nodaku)(゙|゛)!$not2daku{$1}!g; | 
| 1111 |  |  |  |  |  |  | # Remove strays. | 
| 1112 | 202 |  |  |  |  | 545 | $input =~ s![゙゛゚゜]!!g; | 
| 1113 | 202 |  |  |  |  | 493 | return $input; | 
| 1114 |  |  |  |  |  |  | } | 
| 1115 |  |  |  |  |  |  |  | 
| 1116 |  |  |  |  |  |  | sub split_sound_marks | 
| 1117 |  |  |  |  |  |  | { | 
| 1118 | 7 |  |  | 7 | 1 | 33 | my ($input) = @_; | 
| 1119 | 7 |  |  |  |  | 103 | $input =~ s!($handaku)!$handaku2not{$1}゜!g; | 
| 1120 | 7 |  |  |  |  | 92 | $input =~ s!($daku)!$daku2not{$1}゛!g; | 
| 1121 | 7 |  |  |  |  | 39 | return $input; | 
| 1122 |  |  |  |  |  |  | } | 
| 1123 |  |  |  |  |  |  |  | 
| 1124 |  |  |  |  |  |  | sub kana2katakana | 
| 1125 |  |  |  |  |  |  | { | 
| 1126 | 197 |  |  | 197 | 1 | 582 | my ($input) = @_; | 
| 1127 | 197 |  |  |  |  | 394 | $input = join_sound_marks ($input); | 
| 1128 | 197 |  |  |  |  | 495 | $input = hira2kata($input); | 
| 1129 | 197 | 100 |  |  |  | 753 | if ($input =~ /\p{InHankakuKatakana}/) { | 
| 1130 | 2 |  |  |  |  | 12 | $input = hw2katakana($input); | 
| 1131 |  |  |  |  |  |  | } | 
| 1132 | 197 |  |  |  |  | 7676 | return $input; | 
| 1133 |  |  |  |  |  |  | } | 
| 1134 |  |  |  |  |  |  |  | 
| 1135 |  |  |  |  |  |  | sub kana2braille | 
| 1136 |  |  |  |  |  |  | { | 
| 1137 | 1 |  |  | 1 | 1 | 263 | my ($input) = @_; | 
| 1138 | 1 |  |  |  |  | 4 | load_kana2braille; | 
| 1139 | 1 |  |  |  |  | 3 | $input = kana2katakana ($input); | 
| 1140 | 1 |  |  |  |  | 4 | $input = split_sound_marks ($input); | 
| 1141 | 1 |  |  |  |  | 7 | $input =~ s/([キシチヒ])゛([ャュョ])/'⠘'.$nippon2kana{$siin{$1}.$boin{$2}}/eg; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1142 | 1 |  |  |  |  | 3 | $input =~ s/(ヒ)゜([ャュョ])/'⠨'.$nippon2kana{$siin{$1}.$boin{$2}}/eg; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1143 | 1 |  |  |  |  | 7 | $input =~ s/([キシチニヒミリ])([ャュョ])/'⠈'.$nippon2kana{$siin{$1}.$boin{$2}}/eg; | 
|  | 1 |  |  |  |  | 8 |  | 
| 1144 | 1 |  |  |  |  | 28 | $input =~ s/([$vowelclass{o}])ウ/$1ー/g; | 
| 1145 | 1 |  |  |  |  | 4 | $input = $kana2braille->convert ($input); | 
| 1146 | 1 |  |  |  |  | 9 | $input =~ s/(.)([⠐⠠])/$2$1/g; | 
| 1147 | 1 |  |  |  |  | 4 | return $input; | 
| 1148 |  |  |  |  |  |  | } | 
| 1149 |  |  |  |  |  |  |  | 
| 1150 |  |  |  |  |  |  | sub braille2kana | 
| 1151 |  |  |  |  |  |  | { | 
| 1152 | 1 |  |  | 1 | 1 | 300 | my ($input) = @_; | 
| 1153 | 1 |  |  |  |  | 4 | load_kana2braille; | 
| 1154 | 1 |  |  |  |  | 11 | $input =~ s/([⠐⠠])(.)/$2$1/g; | 
| 1155 | 1 |  |  |  |  | 4 | $input = $kana2braille->invert ($input); | 
| 1156 | 1 |  |  |  |  | 3 | $input =~ s/⠘(.)/$nippon2kana{$siin{$1}.'i'}.'゛'.$youon{$boin{$1}}/eg; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1157 | 1 |  |  |  |  | 3 | $input =~ s/⠨(.)/$nippon2kana{$siin{$1}.'i'}.'゜'.$youon{$boin{$1}}/eg; | 
|  | 0 |  |  |  |  | 0 |  | 
| 1158 | 1 |  |  |  |  | 5 | $input =~ s/⠈(.)/$nippon2kana{$siin{$1}.'i'}.$youon{$boin{$1}}/eg; | 
|  | 1 |  |  |  |  | 7 |  | 
| 1159 | 1 |  |  |  |  | 4 | $input = join_sound_marks ($input); | 
| 1160 | 1 |  |  |  |  | 3 | return $input; | 
| 1161 |  |  |  |  |  |  | } | 
| 1162 |  |  |  |  |  |  |  | 
| 1163 |  |  |  |  |  |  | my $circled_conv; | 
| 1164 |  |  |  |  |  |  |  | 
| 1165 |  |  |  |  |  |  | sub load_circled_conv | 
| 1166 |  |  |  |  |  |  | { | 
| 1167 | 2 | 100 |  | 2 | 0 | 7 | if (!$circled_conv) { | 
| 1168 | 1 |  |  |  |  | 3 | $circled_conv = make_convertors ("katakana", "circled"); | 
| 1169 |  |  |  |  |  |  | } | 
| 1170 |  |  |  |  |  |  | } | 
| 1171 |  |  |  |  |  |  |  | 
| 1172 |  |  |  |  |  |  | sub kana2circled | 
| 1173 |  |  |  |  |  |  | { | 
| 1174 | 1 |  |  | 1 | 1 | 3 | my ($input) = @_; | 
| 1175 | 1 |  |  |  |  | 4 | $input = kana2katakana($input); | 
| 1176 | 1 |  |  |  |  | 8 | $input = split_sound_marks ($input); | 
| 1177 | 1 |  |  |  |  | 5 | load_circled_conv; | 
| 1178 | 1 |  |  |  |  | 3 | $input = $circled_conv->convert ($input); | 
| 1179 | 1 |  |  |  |  | 4 | return $input; | 
| 1180 |  |  |  |  |  |  | } | 
| 1181 |  |  |  |  |  |  |  | 
| 1182 |  |  |  |  |  |  | sub circled2kana | 
| 1183 |  |  |  |  |  |  | { | 
| 1184 | 1 |  |  | 1 | 1 | 280 | my ($input) = @_; | 
| 1185 | 1 |  |  |  |  | 4 | load_circled_conv; | 
| 1186 | 1 |  |  |  |  | 3 | $input = $circled_conv->invert ($input); | 
| 1187 | 1 |  |  |  |  | 4 | $input = join_sound_marks ($input); | 
| 1188 | 1 |  |  |  |  | 4 | return $input; | 
| 1189 |  |  |  |  |  |  | } | 
| 1190 |  |  |  |  |  |  |  | 
| 1191 |  |  |  |  |  |  | sub normalize_romaji | 
| 1192 |  |  |  |  |  |  | { | 
| 1193 | 2 |  |  | 2 | 1 | 2773 | my ($romaji) = @_; | 
| 1194 | 2 |  |  |  |  | 12 | my $kana = romaji2kana ($romaji, {wapuro => 1}); | 
| 1195 | 2 |  |  |  |  | 10 | $kana =~ s/[っッ]/xtu/g; | 
| 1196 | 2 |  |  |  |  | 10 | my $romaji_out = kana2romaji ($kana, {ve_type => 'wapuro'}); | 
| 1197 |  |  |  |  |  |  | } | 
| 1198 |  |  |  |  |  |  |  | 
| 1199 |  |  |  |  |  |  | my $new2old_kanji; | 
| 1200 |  |  |  |  |  |  |  | 
| 1201 |  |  |  |  |  |  | sub load_new2old_kanji | 
| 1202 |  |  |  |  |  |  | { | 
| 1203 | 1 |  |  | 1 | 0 | 5 | $new2old_kanji = Convert::Moji->new ( | 
| 1204 |  |  |  |  |  |  | ['file', getdistfile ('new_kanji2old_kanji')], | 
| 1205 |  |  |  |  |  |  | ); | 
| 1206 |  |  |  |  |  |  | } | 
| 1207 |  |  |  |  |  |  |  | 
| 1208 |  |  |  |  |  |  | sub new2old_kanji | 
| 1209 |  |  |  |  |  |  | { | 
| 1210 | 1 |  |  | 1 | 1 | 554 | my ($new_kanji) = @_; | 
| 1211 | 1 | 50 |  |  |  | 5 | if (! $new2old_kanji) { | 
| 1212 | 0 |  |  |  |  | 0 | load_new2old_kanji (); | 
| 1213 |  |  |  |  |  |  | } | 
| 1214 | 1 |  |  |  |  | 5 | my $old_kanji = $new2old_kanji->convert ($new_kanji); | 
| 1215 | 1 |  |  |  |  | 118 | return $old_kanji; | 
| 1216 |  |  |  |  |  |  | } | 
| 1217 |  |  |  |  |  |  |  | 
| 1218 |  |  |  |  |  |  | sub old2new_kanji | 
| 1219 |  |  |  |  |  |  | { | 
| 1220 | 1 |  |  | 1 | 1 | 578 | my ($old_kanji) = @_; | 
| 1221 | 1 | 50 |  |  |  | 5 | if (! $new2old_kanji) { | 
| 1222 | 1 |  |  |  |  | 5 | load_new2old_kanji (); | 
| 1223 |  |  |  |  |  |  | } | 
| 1224 | 1 |  |  |  |  | 16954 | my $new_kanji = $new2old_kanji->invert ($old_kanji); | 
| 1225 | 1 |  |  |  |  | 178 | return $new_kanji; | 
| 1226 |  |  |  |  |  |  | } | 
| 1227 |  |  |  |  |  |  |  | 
| 1228 |  |  |  |  |  |  | my $katakana2cyrillic; | 
| 1229 |  |  |  |  |  |  |  | 
| 1230 |  |  |  |  |  |  | sub load_katakana2cyrillic | 
| 1231 |  |  |  |  |  |  | { | 
| 1232 | 1 |  |  | 1 | 0 | 4 | $katakana2cyrillic = Convert::Moji->new (['file', getdistfile ('katakana2cyrillic')]); | 
| 1233 |  |  |  |  |  |  | } | 
| 1234 |  |  |  |  |  |  |  | 
| 1235 |  |  |  |  |  |  | sub kana2cyrillic | 
| 1236 |  |  |  |  |  |  | { | 
| 1237 | 4 |  |  | 4 | 1 | 1042 | my ($kana) = @_; | 
| 1238 | 4 |  |  |  |  | 14 | my $katakana = kana2katakana ($kana); | 
| 1239 | 4 |  |  |  |  | 25 | $katakana =~ s/ン([アイウエオヤユヨ])/ンъ$1/g; | 
| 1240 | 4 | 100 |  |  |  | 11 | if (! $katakana2cyrillic) { | 
| 1241 | 1 |  |  |  |  | 4 | load_katakana2cyrillic (); | 
| 1242 |  |  |  |  |  |  | } | 
| 1243 | 4 |  |  |  |  | 13771 | my $cyrillic = $katakana2cyrillic->convert ($katakana); | 
| 1244 | 4 |  |  |  |  | 431 | $cyrillic =~ s/н([пбм])/м$1/g; | 
| 1245 | 4 |  |  |  |  | 11 | return $cyrillic; | 
| 1246 |  |  |  |  |  |  | } | 
| 1247 |  |  |  |  |  |  |  | 
| 1248 |  |  |  |  |  |  | sub cyrillic2katakana | 
| 1249 |  |  |  |  |  |  | { | 
| 1250 | 6 |  |  | 6 | 1 | 1389 | my ($cyrillic) = @_; | 
| 1251 |  |  |  |  |  |  | # Convert the Cyrillic letters to lower case versions of the | 
| 1252 |  |  |  |  |  |  | # letters. This table of conversions was made from the one in | 
| 1253 |  |  |  |  |  |  | # Wikipedia at | 
| 1254 |  |  |  |  |  |  | # using Emacs, the revision being | 
| 1255 |  |  |  |  |  |  | # . | 
| 1256 |  |  |  |  |  |  | # I do not know if it covers the alphabets perfectly. | 
| 1257 | 6 |  |  |  |  | 41 | $cyrillic =~ tr/АБВГДЕЖЗИЙIКЛМНОПРСТУФХЦЧШЩЬЮЯ/абвгдежзийiклмнопрстуфхцчшщьюя/; | 
| 1258 | 6 | 50 |  |  |  | 17 | if (! $katakana2cyrillic) { | 
| 1259 | 0 |  |  |  |  | 0 | load_katakana2cyrillic (); | 
| 1260 |  |  |  |  |  |  | } | 
| 1261 | 6 |  |  |  |  | 15 | my $katakana = $katakana2cyrillic->invert ($cyrillic); | 
| 1262 | 6 |  |  |  |  | 432 | $katakana =~ s/м/ン/g; | 
| 1263 | 6 |  |  |  |  | 20 | $katakana =~ s/ンъ([アイウエオヤユヨ])/ン$1/g; | 
| 1264 | 6 |  |  |  |  | 13 | return $katakana; | 
| 1265 |  |  |  |  |  |  | } | 
| 1266 |  |  |  |  |  |  |  | 
| 1267 |  |  |  |  |  |  | my $first2hangul; | 
| 1268 |  |  |  |  |  |  | my $rest2hangul; | 
| 1269 |  |  |  |  |  |  |  | 
| 1270 |  |  |  |  |  |  | my $first2hangul_re; | 
| 1271 |  |  |  |  |  |  | my $rest2hangul_re; | 
| 1272 |  |  |  |  |  |  |  | 
| 1273 |  |  |  |  |  |  | sub load_kana2hangul | 
| 1274 |  |  |  |  |  |  | { | 
| 1275 | 2 |  |  | 2 | 0 | 4 | $first2hangul = load_convertor ('first', 'hangul'); | 
| 1276 | 2 |  |  |  |  | 5 | $rest2hangul = load_convertor ('rest', 'hangul'); | 
| 1277 | 2 |  |  |  |  | 25 | $first2hangul_re = '\b' . make_regex (keys %$first2hangul); | 
| 1278 | 2 |  |  |  |  | 965 | $rest2hangul_re = make_regex (keys %$rest2hangul); | 
| 1279 |  |  |  |  |  |  | } | 
| 1280 |  |  |  |  |  |  |  | 
| 1281 |  |  |  |  |  |  | sub kana2hangul | 
| 1282 |  |  |  |  |  |  | { | 
| 1283 | 2 |  |  | 2 | 1 | 10856 | my ($kana) = @_; | 
| 1284 | 2 |  |  |  |  | 5 | my $katakana = kana2katakana ($kana); | 
| 1285 | 2 | 100 |  |  |  | 5 | if (! $first2hangul) { | 
| 1286 | 1 |  |  |  |  | 3 | load_kana2hangul (); | 
| 1287 |  |  |  |  |  |  | } | 
| 1288 | 2 |  |  |  |  | 698 | $katakana =~ s/($first2hangul_re)/$first2hangul->{$1}/g; | 
| 1289 | 2 |  |  |  |  | 278 | $katakana =~ s/($rest2hangul_re)/$rest2hangul->{$1}/g; | 
| 1290 | 2 |  |  |  |  | 15 | return $katakana; | 
| 1291 |  |  |  |  |  |  | } | 
| 1292 |  |  |  |  |  |  |  | 
| 1293 |  |  |  |  |  |  | my $firsth2k_re; | 
| 1294 |  |  |  |  |  |  | my $resth2k_re; | 
| 1295 |  |  |  |  |  |  | my $firsth2k; | 
| 1296 |  |  |  |  |  |  | my $resth2k; | 
| 1297 |  |  |  |  |  |  |  | 
| 1298 |  |  |  |  |  |  | sub load_hangul2kana | 
| 1299 |  |  |  |  |  |  | { | 
| 1300 | 1 |  |  | 1 | 0 | 3 | load_kana2hangul (); | 
| 1301 | 1 |  |  |  |  | 498 | $firsth2k = { reverse %$first2hangul }; | 
| 1302 | 1 |  |  |  |  | 59 | $resth2k = { reverse %$rest2hangul }; | 
| 1303 | 1 |  |  |  |  | 22 | $firsth2k_re = '\b' . make_regex (keys %$firsth2k); | 
| 1304 | 1 |  |  |  |  | 220 | $resth2k_re = make_regex (keys %$resth2k); | 
| 1305 |  |  |  |  |  |  | } | 
| 1306 |  |  |  |  |  |  |  | 
| 1307 |  |  |  |  |  |  | sub hangul2kana | 
| 1308 |  |  |  |  |  |  | { | 
| 1309 | 1 |  |  | 1 | 0 | 2 | my ($hangul) = @_; | 
| 1310 | 1 | 50 |  |  |  | 5 | if (! $firsth2k) { | 
| 1311 | 1 |  |  |  |  | 3 | load_hangul2kana (); | 
| 1312 |  |  |  |  |  |  | } | 
| 1313 | 1 |  |  |  |  | 257 | $hangul =~ s/($firsth2k_re)/$firsth2k->{$1}/; | 
| 1314 | 1 |  |  |  |  | 32 | $hangul =~ s/($resth2k_re)/$resth2k->{$1}/; | 
| 1315 | 1 |  |  |  |  | 5 | return $hangul; | 
| 1316 |  |  |  |  |  |  | } | 
| 1317 |  |  |  |  |  |  |  | 
| 1318 |  |  |  |  |  |  | my %small2large = qw! | 
| 1319 |  |  |  |  |  |  | ゃ や | 
| 1320 |  |  |  |  |  |  | ゅ ゆ | 
| 1321 |  |  |  |  |  |  | ょ よ | 
| 1322 |  |  |  |  |  |  | ぁ あ | 
| 1323 |  |  |  |  |  |  | ぃ い | 
| 1324 |  |  |  |  |  |  | ぅ う | 
| 1325 |  |  |  |  |  |  | ぇ え | 
| 1326 |  |  |  |  |  |  | ぉ お | 
| 1327 |  |  |  |  |  |  | っ つ | 
| 1328 |  |  |  |  |  |  | ゎ わ | 
| 1329 |  |  |  |  |  |  | !; | 
| 1330 |  |  |  |  |  |  |  | 
| 1331 |  |  |  |  |  |  | sub kana_to_large | 
| 1332 |  |  |  |  |  |  | { | 
| 1333 | 2 |  |  | 2 | 1 | 1314 | my ($kana) = @_; | 
| 1334 | 2 |  |  |  |  | 24 | $kana =~ tr/ゃゅょぁぃぅぇぉっゎ/やゆよあいうえおつわ/; | 
| 1335 | 2 |  |  |  |  | 17 | $kana =~ tr/ャュョァィゥェォッヮ/ヤユヨアイウエオツワ/; | 
| 1336 |  |  |  |  |  |  | # Katakana phonetic extensions. | 
| 1337 | 2 |  |  |  |  | 16 | $kana =~ tr/ㇰㇱㇲㇳㇴㇵㇶㇷㇸㇹㇺㇻㇼㇽㇾㇿ/クシストヌハヒフヘホムラリルレロ/; | 
| 1338 | 2 |  |  |  |  | 8 | return $kana; | 
| 1339 |  |  |  |  |  |  | } | 
| 1340 |  |  |  |  |  |  |  | 
| 1341 |  |  |  |  |  |  | my $circled2kanji; | 
| 1342 |  |  |  |  |  |  |  | 
| 1343 |  |  |  |  |  |  | sub load_circled2kanji | 
| 1344 |  |  |  |  |  |  | { | 
| 1345 | 4 | 100 |  | 4 | 0 | 14 | if (! $circled2kanji) { | 
| 1346 | 1 |  |  |  |  | 5 | $circled2kanji = | 
| 1347 |  |  |  |  |  |  | Convert::Moji->new (["file", | 
| 1348 |  |  |  |  |  |  | getdistfile ('circled2kanji')]); | 
| 1349 |  |  |  |  |  |  | } | 
| 1350 | 4 | 50 |  |  |  | 13284 | if (! $circled2kanji) { | 
| 1351 | 0 |  |  |  |  | 0 | die "ERROR"; | 
| 1352 |  |  |  |  |  |  | } | 
| 1353 |  |  |  |  |  |  | } | 
| 1354 |  |  |  |  |  |  |  | 
| 1355 |  |  |  |  |  |  | sub circled2kanji | 
| 1356 |  |  |  |  |  |  | { | 
| 1357 | 2 |  |  | 2 | 1 | 670 | my ($input) = @_; | 
| 1358 | 2 |  |  |  |  | 8 | load_circled2kanji (); | 
| 1359 | 2 |  |  |  |  | 10 | return $circled2kanji->convert ($input); | 
| 1360 |  |  |  |  |  |  | } | 
| 1361 |  |  |  |  |  |  |  | 
| 1362 |  |  |  |  |  |  | sub kanji2circled | 
| 1363 |  |  |  |  |  |  | { | 
| 1364 | 2 |  |  | 2 | 1 | 1509 | my ($input) = @_; | 
| 1365 | 2 |  |  |  |  | 9 | load_circled2kanji (); | 
| 1366 | 2 |  |  |  |  | 9 | return $circled2kanji->invert ($input); | 
| 1367 |  |  |  |  |  |  | } | 
| 1368 |  |  |  |  |  |  |  | 
| 1369 |  |  |  |  |  |  | my $bracketed2kanji; | 
| 1370 |  |  |  |  |  |  |  | 
| 1371 |  |  |  |  |  |  | sub load_bracketed2kanji | 
| 1372 |  |  |  |  |  |  | { | 
| 1373 | 2 | 100 |  | 2 | 0 | 8 | if (! $bracketed2kanji) { | 
| 1374 | 1 |  |  |  |  | 6 | $bracketed2kanji = | 
| 1375 |  |  |  |  |  |  | Convert::Moji->new (["file", | 
| 1376 |  |  |  |  |  |  | getdistfile ('bracketed2kanji')]); | 
| 1377 |  |  |  |  |  |  | } | 
| 1378 |  |  |  |  |  |  | } | 
| 1379 |  |  |  |  |  |  |  | 
| 1380 |  |  |  |  |  |  | sub bracketed2kanji | 
| 1381 |  |  |  |  |  |  | { | 
| 1382 | 1 |  |  | 1 | 1 | 645 | my ($input) = @_; | 
| 1383 | 1 |  |  |  |  | 5 | load_bracketed2kanji (); | 
| 1384 | 1 |  |  |  |  | 830 | return $bracketed2kanji->convert ($input); | 
| 1385 |  |  |  |  |  |  | } | 
| 1386 |  |  |  |  |  |  |  | 
| 1387 |  |  |  |  |  |  | sub kanji2bracketed | 
| 1388 |  |  |  |  |  |  | { | 
| 1389 | 1 |  |  | 1 | 1 | 619 | my ($input) = @_; | 
| 1390 | 1 |  |  |  |  | 4 | load_bracketed2kanji (); | 
| 1391 | 1 |  |  |  |  | 4 | return $bracketed2kanji->invert ($input); | 
| 1392 |  |  |  |  |  |  | } | 
| 1393 |  |  |  |  |  |  |  | 
| 1394 |  |  |  |  |  |  | sub InKana | 
| 1395 |  |  |  |  |  |  | { | 
| 1396 | 3 |  |  | 3 | 1 | 2337 | return <<"END"; | 
| 1397 |  |  |  |  |  |  | +utf8::Katakana | 
| 1398 |  |  |  |  |  |  | +utf8::InKatakana | 
| 1399 |  |  |  |  |  |  | +utf8::InHiragana | 
| 1400 |  |  |  |  |  |  | FF9E\tFF9F | 
| 1401 |  |  |  |  |  |  | FF70 | 
| 1402 |  |  |  |  |  |  | -utf8::IsCn | 
| 1403 |  |  |  |  |  |  | -30FB | 
| 1404 |  |  |  |  |  |  | END | 
| 1405 |  |  |  |  |  |  | # Explanation of the above gibberish: The funny hex is for dakuten | 
| 1406 |  |  |  |  |  |  | # and handakuten half width. The "Katakana" catches halfwidth | 
| 1407 |  |  |  |  |  |  | # katakana, and the "InKatakana" catches the chouon mark. "IsCn" | 
| 1408 |  |  |  |  |  |  | # means "other, not assigned", so we remove this to prevent | 
| 1409 |  |  |  |  |  |  | # matching non-kana characters floating around near to real | 
| 1410 |  |  |  |  |  |  | # ones. 30FB is "Katakana middle dot", which is not kana as far as | 
| 1411 |  |  |  |  |  |  | # I know, so that's also removed. | 
| 1412 |  |  |  |  |  |  | } | 
| 1413 |  |  |  |  |  |  |  | 
| 1414 |  |  |  |  |  |  | # お | 
| 1415 |  |  |  |  |  |  |  | 
| 1416 |  |  |  |  |  |  | # Match zero or one sokuons, one full-sized kana character, then zero | 
| 1417 |  |  |  |  |  |  | # or one each of small kana, chouon, and syllabic n, in that order. | 
| 1418 |  |  |  |  |  |  |  | 
| 1419 |  |  |  |  |  |  | my $kana2syllable_re = qr/ッ?[アイウエオ-モヤユヨ-ヴ](?:[ャュョァィゥェォ])?ー?ン?/; | 
| 1420 |  |  |  |  |  |  |  | 
| 1421 |  |  |  |  |  |  | sub katakana2syllable | 
| 1422 |  |  |  |  |  |  | { | 
| 1423 | 4 |  |  | 4 | 1 | 14773 | my ($kana) = @_; | 
| 1424 | 4 |  |  |  |  | 9 | my @pieces; | 
| 1425 | 4 |  |  |  |  | 85 | while ($kana =~ /($kana2syllable_re)/g) { | 
| 1426 | 22 |  |  |  |  | 185 | push @pieces, $1; | 
| 1427 |  |  |  |  |  |  | } | 
| 1428 | 4 |  |  |  |  | 15 | return \@pieces; | 
| 1429 |  |  |  |  |  |  | } | 
| 1430 |  |  |  |  |  |  |  | 
| 1431 |  |  |  |  |  |  | my $square2katakana; | 
| 1432 |  |  |  |  |  |  |  | 
| 1433 |  |  |  |  |  |  | sub load_square2katakana | 
| 1434 |  |  |  |  |  |  | { | 
| 1435 | 2 | 100 |  | 2 | 0 | 7 | if (! $square2katakana) { | 
| 1436 | 1 |  |  |  |  | 4 | $square2katakana = | 
| 1437 |  |  |  |  |  |  | Convert::Moji->new (["file", | 
| 1438 |  |  |  |  |  |  | getdistfile ('square-katakana')]); | 
| 1439 |  |  |  |  |  |  | } | 
| 1440 |  |  |  |  |  |  | } | 
| 1441 |  |  |  |  |  |  |  | 
| 1442 |  |  |  |  |  |  | sub square2katakana | 
| 1443 |  |  |  |  |  |  | { | 
| 1444 | 1 |  |  | 1 | 1 | 93 | load_square2katakana (); | 
| 1445 | 1 |  |  |  |  | 14510 | return $square2katakana->convert (@_); | 
| 1446 |  |  |  |  |  |  | } | 
| 1447 |  |  |  |  |  |  |  | 
| 1448 |  |  |  |  |  |  | sub katakana2square | 
| 1449 |  |  |  |  |  |  | { | 
| 1450 | 1 |  |  | 1 | 1 | 1022 | load_square2katakana (); | 
| 1451 | 1 |  |  |  |  | 6 | return $square2katakana->invert (@_); | 
| 1452 |  |  |  |  |  |  | } | 
| 1453 |  |  |  |  |  |  |  | 
| 1454 |  |  |  |  |  |  | sub nigori_first | 
| 1455 |  |  |  |  |  |  | { | 
| 1456 | 1 |  |  | 1 | 1 | 648 | my ($list) = @_; | 
| 1457 | 1 |  |  |  |  | 3 | my @nigori; | 
| 1458 | 1 |  |  |  |  | 3 | for my $kana (@$list) { | 
| 1459 | 4 |  |  |  |  | 12 | my ($first, $remaining) = split //, $kana, 2; | 
| 1460 | 4 |  |  |  |  | 13 | my $nf = $not2daku{$first}; | 
| 1461 | 4 | 100 |  |  |  | 9 | if ($nf) { | 
| 1462 | 3 |  |  |  |  | 7 | push @nigori, $nf.$remaining; | 
| 1463 |  |  |  |  |  |  | } | 
| 1464 | 4 |  |  |  |  | 8 | my $hf = $not2handaku{$first}; | 
| 1465 | 4 | 100 |  |  |  | 10 | if ($hf) { | 
| 1466 | 1 |  |  |  |  | 3 | push @nigori, $hf.$remaining; | 
| 1467 |  |  |  |  |  |  | } | 
| 1468 |  |  |  |  |  |  | } | 
| 1469 | 1 | 50 |  |  |  | 4 | if (@nigori) { | 
| 1470 | 1 |  |  |  |  | 5 | push @$list, @nigori; | 
| 1471 |  |  |  |  |  |  | } | 
| 1472 |  |  |  |  |  |  | } | 
| 1473 |  |  |  |  |  |  |  | 
| 1474 |  |  |  |  |  |  | # Hentaigana (Unicode 10.0) related | 
| 1475 |  |  |  |  |  |  |  | 
| 1476 |  |  |  |  |  |  | my $hentai_file = __FILE__; | 
| 1477 |  |  |  |  |  |  | $hentai_file =~ s!\.pm$!/!; | 
| 1478 |  |  |  |  |  |  | $hentai_file .= "hentaigana.json"; | 
| 1479 |  |  |  |  |  |  | # Hentai to hiragana (one to one) | 
| 1480 |  |  |  |  |  |  | my %hen2hi; | 
| 1481 |  |  |  |  |  |  | # Hiragana to hentai (one to many) | 
| 1482 |  |  |  |  |  |  | my %hi2hen; | 
| 1483 |  |  |  |  |  |  | # Hentaigana to kanji | 
| 1484 |  |  |  |  |  |  | my %hen2k; | 
| 1485 |  |  |  |  |  |  | # Kanji to hentaigana | 
| 1486 |  |  |  |  |  |  | my %k2hen; | 
| 1487 |  |  |  |  |  |  | my $k2hen_re; | 
| 1488 |  |  |  |  |  |  | # Hentai to hiragana/kanji regex (recycled for the kanji case). | 
| 1489 |  |  |  |  |  |  | my $hen_re; | 
| 1490 |  |  |  |  |  |  | # Hiragana to hentai regex | 
| 1491 |  |  |  |  |  |  | my $hi2hen_re; | 
| 1492 |  |  |  |  |  |  | # Hentai data | 
| 1493 |  |  |  |  |  |  | my $hendat; | 
| 1494 |  |  |  |  |  |  |  | 
| 1495 |  |  |  |  |  |  | sub load_hentai | 
| 1496 |  |  |  |  |  |  | { | 
| 1497 | 1 |  |  | 1 | 0 | 4 | $hendat = json_file_to_perl ($hentai_file); | 
| 1498 | 1 |  |  |  |  | 686 | for my $h (@$hendat) { | 
| 1499 | 285 |  |  |  |  | 326 | my $hi = $h->{hi}; | 
| 1500 | 285 |  |  |  |  | 380 | my $hen = chr ($h->{u}); | 
| 1501 | 285 |  |  |  |  | 476 | $hen2hi{$hen} = $hi; | 
| 1502 | 285 |  |  |  |  | 337 | for my $hiragana (@$hi) { | 
| 1503 | 298 |  |  |  |  | 296 | push @{$hi2hen{$hiragana}}, $hen; | 
|  | 298 |  |  |  |  | 592 |  | 
| 1504 |  |  |  |  |  |  | } | 
| 1505 | 285 |  |  |  |  | 438 | $hen2k{$hen} = $h->{ka}; | 
| 1506 | 285 |  |  |  |  | 269 | push @{$k2hen{$h->{ka}}}, $hen; | 
|  | 285 |  |  |  |  | 636 |  | 
| 1507 |  |  |  |  |  |  | } | 
| 1508 | 1 |  |  |  |  | 44 | $hen_re = make_regex (keys %hen2hi); | 
| 1509 | 1 |  |  |  |  | 721 | $hi2hen_re = make_regex (keys %hi2hen); | 
| 1510 | 1 |  |  |  |  | 143 | $k2hen_re = make_regex (keys %k2hen); | 
| 1511 |  |  |  |  |  |  | } | 
| 1512 |  |  |  |  |  |  |  | 
| 1513 |  |  |  |  |  |  | sub hentai2kana | 
| 1514 |  |  |  |  |  |  | { | 
| 1515 | 1 |  |  | 1 | 1 | 10653 | my ($text) = @_; | 
| 1516 | 1 | 50 |  |  |  | 4 | if (! $hendat) { | 
| 1517 | 1 |  |  |  |  | 2 | load_hentai (); | 
| 1518 |  |  |  |  |  |  | } | 
| 1519 | 1 |  |  |  |  | 604 | $text =~ s/$hen_re/join ('・', @{$hen2hi{$1}})/ge; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 12 |  | 
| 1520 | 1 |  |  |  |  | 7 | return $text; | 
| 1521 |  |  |  |  |  |  | } | 
| 1522 |  |  |  |  |  |  |  | 
| 1523 |  |  |  |  |  |  | sub kana2hentai | 
| 1524 |  |  |  |  |  |  | { | 
| 1525 | 2 |  |  | 2 | 1 | 6 | my ($text) = @_; | 
| 1526 | 2 | 50 |  |  |  | 6 | if (! $hendat) { | 
| 1527 | 0 |  |  |  |  | 0 | load_hentai (); | 
| 1528 |  |  |  |  |  |  | } | 
| 1529 |  |  |  |  |  |  | # Make it all-hiragana. | 
| 1530 | 2 |  |  |  |  | 5 | $text = split_sound_marks ($text); | 
| 1531 | 2 |  |  |  |  | 6 | $text = kata2hira ($text); | 
| 1532 | 2 |  |  |  |  | 25 | $text =~ s/$hi2hen_re/join ('・', @{$hi2hen{$1}})/ge; | 
|  | 8 |  |  |  |  | 11 |  | 
|  | 8 |  |  |  |  | 29 |  | 
| 1533 | 2 |  |  |  |  | 10 | return $text; | 
| 1534 |  |  |  |  |  |  | # what to do? | 
| 1535 |  |  |  |  |  |  | } | 
| 1536 |  |  |  |  |  |  |  | 
| 1537 |  |  |  |  |  |  | sub hentai2kanji | 
| 1538 |  |  |  |  |  |  | { | 
| 1539 | 1 |  |  | 1 | 1 | 3 | my ($text) = @_; | 
| 1540 | 1 | 50 |  |  |  | 3 | if (! $hendat) { | 
| 1541 | 0 |  |  |  |  | 0 | load_hentai (); | 
| 1542 |  |  |  |  |  |  | } | 
| 1543 |  |  |  |  |  |  | # This uses the same regex as the kanji case. | 
| 1544 | 1 |  |  |  |  | 73 | $text =~ s/$hen_re/$hen2k{$1}/g; | 
| 1545 | 1 |  |  |  |  | 6 | return $text; | 
| 1546 |  |  |  |  |  |  | } | 
| 1547 |  |  |  |  |  |  |  | 
| 1548 |  |  |  |  |  |  | sub kanji2hentai | 
| 1549 |  |  |  |  |  |  | { | 
| 1550 | 1 |  |  | 1 | 1 | 14 | my ($text) = @_; | 
| 1551 | 1 | 50 |  |  |  | 4 | if (! $hendat) { | 
| 1552 | 0 |  |  |  |  | 0 | load_hentai (); | 
| 1553 |  |  |  |  |  |  | } | 
| 1554 | 1 |  |  |  |  | 81 | $text =~ s/$k2hen_re/join ('・', @{$k2hen{$1}})/ge; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 14 |  | 
| 1555 | 1 |  |  |  |  | 5 | return $text; | 
| 1556 |  |  |  |  |  |  | } | 
| 1557 |  |  |  |  |  |  |  | 
| 1558 |  |  |  |  |  |  | my %yayuyo = (qw/ | 
| 1559 |  |  |  |  |  |  | ヤ ャ | 
| 1560 |  |  |  |  |  |  | ユ ュ | 
| 1561 |  |  |  |  |  |  | ヨ ョ | 
| 1562 |  |  |  |  |  |  | /); | 
| 1563 |  |  |  |  |  |  |  | 
| 1564 |  |  |  |  |  |  | my %l2s = qw!ア ァ イ ィ ウ ゥ エ ェ オ ォ!; | 
| 1565 |  |  |  |  |  |  |  | 
| 1566 |  |  |  |  |  |  | sub smallize_kana | 
| 1567 |  |  |  |  |  |  | { | 
| 1568 | 10 |  |  | 10 | 1 | 12699 | my ($kana) = @_; | 
| 1569 | 10 |  |  |  |  | 18 | my $orig = $kana; | 
| 1570 | 10 |  |  |  |  | 83 | $kana =~ s/([キギシジチヂニヒビピミリ])([ヤユヨ])/$1$yayuyo{$2}/g; | 
| 1571 |  |  |  |  |  |  | # Don't make "ツル" into "ッル". | 
| 1572 | 10 |  |  |  |  | 167 | $kana =~ s/([$before_sokuon])ツ([$takes_sokuon])/$1ッ$2/g; | 
| 1573 | 10 |  |  |  |  | 41 | $kana =~ s/フ([アイエオ])/フ$l2s{$1}/g; | 
| 1574 | 10 | 100 |  |  |  | 33 | if ($kana ne $orig) { | 
| 1575 | 7 |  |  |  |  | 42 | return $kana; | 
| 1576 |  |  |  |  |  |  | } | 
| 1577 | 3 |  |  |  |  | 18 | return undef; | 
| 1578 |  |  |  |  |  |  | } | 
| 1579 |  |  |  |  |  |  |  | 
| 1580 |  |  |  |  |  |  | sub cleanup_kana | 
| 1581 |  |  |  |  |  |  | { | 
| 1582 | 3 |  |  | 3 | 1 | 810 | my ($kana) = @_; | 
| 1583 | 3 | 50 |  |  |  | 10 | if (! $kana) { | 
| 1584 | 0 |  |  |  |  | 0 | return $kana; | 
| 1585 |  |  |  |  |  |  | } | 
| 1586 | 3 | 100 |  |  |  | 27 | if ($kana =~ /[\x{ff01}-\x{ff5e}]/) { | 
|  |  | 50 |  |  |  |  |  | 
| 1587 | 1 |  |  |  |  | 8 | $kana = wide2ascii ($kana); | 
| 1588 | 1 |  |  |  |  | 4 | $kana = romaji2kana ($kana); | 
| 1589 |  |  |  |  |  |  | } | 
| 1590 |  |  |  |  |  |  | elsif ($kana =~ /[a-zâîûêôôāūēō]/i) { | 
| 1591 | 0 |  |  |  |  | 0 | $kana = romaji2kana ($kana); | 
| 1592 |  |  |  |  |  |  | } | 
| 1593 |  |  |  |  |  |  | # This calls join_sound_marks, so that call is not necessary. | 
| 1594 | 3 |  |  |  |  | 10 | $kana = kana2katakana ($kana); | 
| 1595 |  |  |  |  |  |  | # Translate kanjis into katakana where a "naive user" has inserted | 
| 1596 |  |  |  |  |  |  | # kanji not kana.  Because the following expression is visually | 
| 1597 |  |  |  |  |  |  | # confusing, note that the LHS are all kanji, and the RHS are all | 
| 1598 |  |  |  |  |  |  | # kana/chouon | 
| 1599 | 3 |  |  |  |  | 27 | $kana =~ tr/囗口八力二一/ロロハカニー/; | 
| 1600 |  |  |  |  |  |  | # Turn silly small youon kana into big ones | 
| 1601 | 3 |  |  |  |  | 19 | $kana =~ s/([^きぎしじちぢにひびぴみり]|^)([ゃゅょ])/$1$small2large{$2}/g; | 
| 1602 | 3 |  |  |  |  | 16 | return $kana; | 
| 1603 |  |  |  |  |  |  | } | 
| 1604 |  |  |  |  |  |  |  | 
| 1605 |  |  |  |  |  |  | sub load_kanji | 
| 1606 |  |  |  |  |  |  | { | 
| 1607 | 2 |  |  | 2 | 0 | 6 | my ($file) = @_; | 
| 1608 | 2 |  |  |  |  | 6 | my $bkfile = getdistfile ($file); | 
| 1609 | 2 | 50 |  |  |  | 85 | open my $in, "<:encoding(utf8)", $bkfile | 
| 1610 |  |  |  |  |  |  | or die "Error opening '$bkfile': $!"; | 
| 1611 | 2 |  |  |  |  | 114 | my @bk; | 
| 1612 | 2 |  |  |  |  | 39 | while (<$in>) { | 
| 1613 | 20 |  |  |  |  | 84 | while (/(\p{InCJKUnifiedIdeographs})/g) { | 
| 1614 | 268 |  |  |  |  | 829 | push @bk, $1; | 
| 1615 |  |  |  |  |  |  | } | 
| 1616 |  |  |  |  |  |  | } | 
| 1617 | 2 | 50 |  |  |  | 30 | close $in or die $!; | 
| 1618 | 2 |  |  |  |  | 69 | return @bk; | 
| 1619 |  |  |  |  |  |  | } | 
| 1620 |  |  |  |  |  |  |  | 
| 1621 |  |  |  |  |  |  | sub yurei_moji | 
| 1622 |  |  |  |  |  |  | { | 
| 1623 | 1 |  |  | 1 | 1 | 1154 | return load_kanji ('yurei-moji') | 
| 1624 |  |  |  |  |  |  | } | 
| 1625 |  |  |  |  |  |  |  | 
| 1626 |  |  |  |  |  |  | sub bad_kanji | 
| 1627 |  |  |  |  |  |  | { | 
| 1628 | 1 |  |  | 1 | 1 | 5 | return load_kanji ('bad-kanji'); | 
| 1629 |  |  |  |  |  |  | } | 
| 1630 |  |  |  |  |  |  |  | 
| 1631 |  |  |  |  |  |  | 1; |