| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Text::Shinobi; | 
| 2 | 6 |  |  | 6 |  | 76383 | use 5.010001; | 
|  | 6 |  |  |  |  | 15 |  | 
| 3 | 6 |  |  | 6 |  | 572 | use utf8; | 
|  | 6 |  |  |  |  | 14 |  | 
|  | 6 |  |  |  |  | 26 |  | 
| 4 | 6 |  |  | 6 |  | 122 | use strict; | 
|  | 6 |  |  |  |  | 9 |  | 
|  | 6 |  |  |  |  | 129 |  | 
| 5 | 6 |  |  | 6 |  | 19 | use warnings; | 
|  | 6 |  |  |  |  | 5 |  | 
|  | 6 |  |  |  |  | 292 |  | 
| 6 |  |  |  |  |  |  | our $VERSION = "0.01"; | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 6 |  |  | 6 |  | 22 | use Exporter 'import'; | 
|  | 6 |  |  |  |  | 6 |  | 
|  | 6 |  |  |  |  | 200 |  | 
| 9 | 6 |  |  | 6 |  | 3291 | use Unicode::Normalize qw/NFD NFC/; | 
|  | 6 |  |  |  |  | 974011 |  | 
|  | 6 |  |  |  |  | 882 |  | 
| 10 | 6 |  |  | 6 |  | 3716 | use Lingua::JA::Regular::Unicode; | 
|  | 6 |  |  |  |  | 62899 |  | 
|  | 6 |  |  |  |  | 677 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our @EXPORT_OK = qw/shinobi/; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | use constant { | 
| 15 | 6 |  |  |  |  | 7126 | DUO     => 1 <<  0, | 
| 16 |  |  |  |  |  |  | MONO    => 1 <<  1, | 
| 17 |  |  |  |  |  |  | JIS     => 1 <<  2, # JIS X 0208 + JIS X 0212 OR JIS X 0213 | 
| 18 |  |  |  |  |  |  | UTF8MB3 => 1 <<  3, # as utf-8 encoding | 
| 19 |  |  |  |  |  |  | Y2016   => 1 << 10, # almost viewable in 2016 (Mac10.11, Win10, iOS9, Andoid5 | 
| 20 | 6 |  |  | 6 |  | 39 | }; | 
|  | 6 |  |  |  |  | 7 |  | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | our $ENCODE = Y2016; # this version's default mask | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | our $map = [ | 
| 25 |  |  |  |  |  |  | { char => 'い', code => "\x{682C}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 26 |  |  |  |  |  |  | #          ろ | 
| 27 |  |  |  |  |  |  | #          は | 
| 28 |  |  |  |  |  |  | { char => 'に', code => "\x{92AB}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 29 |  |  |  |  |  |  | { char => 'ほ', code => "\x{23D0A}",        flag => MONO }, | 
| 30 |  |  |  |  |  |  | { char => 'へ', code => "\x{2021C}",        flag => MONO }, | 
| 31 |  |  |  |  |  |  | { char => 'と', code => "\x{28246}",        flag => MONO }, | 
| 32 |  |  |  |  |  |  | { char => 'ち', code => "\x{68C8}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 33 |  |  |  |  |  |  | #          り | 
| 34 |  |  |  |  |  |  | { char => 'ぬ', code => "\x{57E5}",         flag => MONO | UTF8MB3 |       Y2016 }, | 
| 35 |  |  |  |  |  |  | { char => 'る', code => "\x{9306}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 36 |  |  |  |  |  |  | { char => 'を', code => "\x{6E05}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 37 |  |  |  |  |  |  | { char => 'わ', code => "\x{5029}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 38 |  |  |  |  |  |  | #          か | 
| 39 |  |  |  |  |  |  | { char => 'よ', code => "\x{6A2A}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 40 |  |  |  |  |  |  | { char => 'た', code => "\x{71BF}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 41 |  |  |  |  |  |  | { char => 'れ', code => "\x{58B4}",         flag => MONO | UTF8MB3 |       Y2016 }, | 
| 42 |  |  |  |  |  |  | { char => 'そ', code => "\x{9404}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 43 |  |  |  |  |  |  | { char => 'つ', code => "\x{6F62}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 44 |  |  |  |  |  |  | { char => 'ね', code => "\x{50D9}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 45 |  |  |  |  |  |  | { char => 'な', code => "\x{28287}",        flag => MONO }, | 
| 46 |  |  |  |  |  |  | #          ら | 
| 47 |  |  |  |  |  |  | { char => 'む', code => "\x{7103}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 48 |  |  |  |  |  |  | { char => 'う', code => "\x{212FD}",        flag => MONO |           JIS | Y2016 }, | 
| 49 |  |  |  |  |  |  | { char => 'ゐ', code => "\x{4932}",         flag => MONO | UTF8MB3 |       Y2016 }, | 
| 50 |  |  |  |  |  |  | { char => 'の', code => "\x{6D7E}",         flag => MONO | UTF8MB3 |       Y2016 }, | 
| 51 |  |  |  |  |  |  | #          お | 
| 52 |  |  |  |  |  |  | #          く | 
| 53 |  |  |  |  |  |  | { char => 'や', code => "\x{67CF}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 54 |  |  |  |  |  |  | { char => 'ま', code => "\x{241E2}",        flag => MONO |                 Y2016 }, | 
| 55 |  |  |  |  |  |  | { char => 'け', code => "\x{2129A}",        flag => MONO }, | 
| 56 |  |  |  |  |  |  | { char => 'ふ', code => "\x{9251}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 57 |  |  |  |  |  |  | { char => 'こ', code => "\x{6CCA}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 58 |  |  |  |  |  |  | { char => 'え', code => "\x{4F2F}",         flag => MONO | UTF8MB3 | JIS | Y2016 }, | 
| 59 |  |  |  |  |  |  | #          て | 
| 60 |  |  |  |  |  |  | { char => 'あ', code => "\x{23638}",        flag => MONO |           JIS | Y2016 }, | 
| 61 |  |  |  |  |  |  | { char => 'さ', code => "\x{3DF5}",         flag => MONO | UTF8MB3 |       Y2016 }, | 
| 62 |  |  |  |  |  |  | #          き | 
| 63 |  |  |  |  |  |  | { char => 'ゆ', code => "\x{28B46}",        flag => MONO |                 Y2016 }, | 
| 64 |  |  |  |  |  |  | { char => 'め', code => "\x{6F76}",         flag => MONO | UTF8MB3 |       Y2016 }, | 
| 65 |  |  |  |  |  |  | { char => 'み', code => "\x{20381}",        flag => MONO |           JIS | Y2016 }, | 
| 66 |  |  |  |  |  |  | { char => 'し', code => "\x{28282}",        flag => MONO |           JIS | Y2016 }, | 
| 67 |  |  |  |  |  |  | { char => 'ゑ', code => "\x{6A74}",         flag => MONO | UTF8MB3 |       Y2016 }, | 
| 68 |  |  |  |  |  |  | #          ひ | 
| 69 |  |  |  |  |  |  | #          も | 
| 70 |  |  |  |  |  |  | #          せ | 
| 71 |  |  |  |  |  |  | #          す | 
| 72 |  |  |  |  |  |  | #          ん | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | { char => 'い', code => "\x{2F4A}\x{2F8A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 75 |  |  |  |  |  |  | { char => 'ろ', code => "\x{2F55}\x{2F8A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 76 |  |  |  |  |  |  | { char => 'は', code => "\x{2F1F}\x{2F8A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 77 |  |  |  |  |  |  | { char => 'に', code => "\x{2FA6}\x{2F8A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 78 |  |  |  |  |  |  | { char => 'ほ', code => "\x{6C35}\x{2F8A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 79 |  |  |  |  |  |  | { char => 'へ', code => "\x{4EBB}\x{2F8A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 80 |  |  |  |  |  |  | { char => 'と', code => "\x{2F9D}\x{2F8A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 81 |  |  |  |  |  |  | { char => 'ち', code => "\x{2F4A}\x{2ED8}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 82 |  |  |  |  |  |  | { char => 'り', code => "\x{2F55}\x{2ED8}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 83 |  |  |  |  |  |  | { char => 'ぬ', code => "\x{2F1F}\x{2ED8}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 84 |  |  |  |  |  |  | { char => 'る', code => "\x{2FA6}\x{2ED8}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 85 |  |  |  |  |  |  | { char => 'を', code => "\x{6C35}\x{2ED8}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 86 |  |  |  |  |  |  | { char => 'わ', code => "\x{4EBB}\x{2ED8}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 87 |  |  |  |  |  |  | { char => 'か', code => "\x{2F9D}\x{2ED8}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 88 |  |  |  |  |  |  | { char => 'よ', code => "\x{2F4A}\x{2EE9}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 89 |  |  |  |  |  |  | { char => 'た', code => "\x{2F55}\x{2EE9}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 90 |  |  |  |  |  |  | { char => 'れ', code => "\x{2F1F}\x{2EE9}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 91 |  |  |  |  |  |  | { char => 'そ', code => "\x{2FA6}\x{2EE9}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 92 |  |  |  |  |  |  | { char => 'つ', code => "\x{6C35}\x{2EE9}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 93 |  |  |  |  |  |  | { char => 'ね', code => "\x{4EBB}\x{2EE9}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 94 |  |  |  |  |  |  | { char => 'な', code => "\x{2F9D}\x{2EE9}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 95 |  |  |  |  |  |  | { char => 'ら', code => "\x{2F4A}\x{2F9A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 96 |  |  |  |  |  |  | { char => 'む', code => "\x{2F55}\x{2F9A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 97 |  |  |  |  |  |  | { char => 'う', code => "\x{2F1F}\x{2F9A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 98 |  |  |  |  |  |  | { char => 'ゐ', code => "\x{2FA6}\x{2F9A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 99 |  |  |  |  |  |  | { char => 'の', code => "\x{6C35}\x{2F9A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 100 |  |  |  |  |  |  | { char => 'お', code => "\x{4EBB}\x{2F9A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 101 |  |  |  |  |  |  | { char => 'く', code => "\x{2F9D}\x{2F9A}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 102 |  |  |  |  |  |  | { char => 'や', code => "\x{2F4A}\x{2F69}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 103 |  |  |  |  |  |  | { char => 'ま', code => "\x{2F55}\x{2F69}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 104 |  |  |  |  |  |  | { char => 'け', code => "\x{2F1F}\x{2F69}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 105 |  |  |  |  |  |  | { char => 'ふ', code => "\x{2FA6}\x{2F69}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 106 |  |  |  |  |  |  | { char => 'こ', code => "\x{6C35}\x{2F69}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 107 |  |  |  |  |  |  | { char => 'え', code => "\x{4EBB}\x{2F69}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 108 |  |  |  |  |  |  | { char => 'て', code => "\x{2F9D}\x{2F69}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 109 |  |  |  |  |  |  | { char => 'あ', code => "\x{2F4A}\x{9ED2}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 110 |  |  |  |  |  |  | { char => 'さ', code => "\x{2F55}\x{9ED2}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 111 |  |  |  |  |  |  | { char => 'き', code => "\x{2F1F}\x{9ED2}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 112 |  |  |  |  |  |  | { char => 'ゆ', code => "\x{2FA6}\x{9ED2}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 113 |  |  |  |  |  |  | { char => 'め', code => "\x{6C35}\x{9ED2}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 114 |  |  |  |  |  |  | { char => 'み', code => "\x{4EBB}\x{9ED2}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 115 |  |  |  |  |  |  | { char => 'し', code => "\x{2F9D}\x{9ED2}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 116 |  |  |  |  |  |  | { char => 'ゑ', code => "\x{2F4A}\x{7D2B}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 117 |  |  |  |  |  |  | { char => 'ひ', code => "\x{2F55}\x{7D2B}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 118 |  |  |  |  |  |  | { char => 'も', code => "\x{2F1F}\x{7D2B}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 119 |  |  |  |  |  |  | { char => 'せ', code => "\x{2FA6}\x{7D2B}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 120 |  |  |  |  |  |  | { char => 'す', code => "\x{6C35}\x{7D2B}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 121 |  |  |  |  |  |  | { char => 'ん', code => "\x{4EBB}\x{7D2B}", flag => DUO |  UTF8MB3 | JIS | Y2016 }, | 
| 122 |  |  |  |  |  |  | ]; | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | my $encode = {}; | 
| 125 |  |  |  |  |  |  | my $decode = {}; | 
| 126 |  |  |  |  |  |  | my $decode_re = join '|', map { $_->{code} } reverse @$map; | 
| 127 |  |  |  |  |  |  | $decode_re = qr/($decode_re)/; | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | for my $v (@$map) { | 
| 130 |  |  |  |  |  |  | my $list = $encode->{ $v->{char} } ||= []; | 
| 131 |  |  |  |  |  |  | push @$list, $v; | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | $decode->{$v->{code}} = $v->{char}; | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub _encode { | 
| 137 | 314 |  |  | 314 |  | 268 | my $char = shift; | 
| 138 | 314 |  | 100 |  |  | 582 | my $list = $encode->{$char} // []; | 
| 139 | 314 |  |  |  |  | 280 | for my $v (@$list) { | 
| 140 | 225 | 100 |  |  |  | 309 | if ($v->{flag} & $ENCODE) { | 
| 141 | 168 |  |  |  |  | 129 | $char = $v->{code}; | 
| 142 | 168 |  |  |  |  | 118 | last; | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  |  | 
| 146 | 314 |  |  |  |  | 428 | $char; | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | sub normalize { | 
| 150 | 12 |  | 50 | 12 | 0 | 29 | my $text = shift // ""; | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | # decomposition for 濁点s | 
| 153 | 12 |  |  |  |  | 76 | $text =~ s/(\p{InHiragana}|\p{InKatakana})/NFD($1)/ge; | 
|  | 185 |  |  |  |  | 2564 |  | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | # katakana to hiragana | 
| 156 | 12 |  |  |  |  | 251 | $text = katakana2hiragana(katakana_h2z($text)); | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | # upper ぁぃぅぇぉっゃゅょゎゕゖㇾㇷㇶㇸㇲㇹㇱㇼㇳㇰㇿㇻㇺㇵㇽㇴ | 
| 159 | 12 |  |  |  |  | 522 | $text =~ tr[\x{3041}\x{3043}\x{3045}\x{3047}\x{3049}\x{3063}\x{3083}\x{3085}\x{3087}\x{308E}\x{3095}\x{3096}\x{31FE}\x{31F7}\x{31F6}\x{31F8}\x{31F2}\x{31F9}\x{31F1}\x{31FC}\x{31F3}\x{31F0}\x{31FF}\x{31FB}\x{31FA}\x{31F5}\x{31FD}\x{31F4}] | 
| 160 |  |  |  |  |  |  | [\x{3042}\x{3044}\x{3046}\x{3048}\x{304A}\x{3064}\x{3084}\x{3086}\x{3088}\x{308F}\x{304B}\x{3051}\x{30EC}\x{30D5}\x{30D2}\x{30D8}\x{30B9}\x{30DB}\x{30B7}\x{30EA}\x{30C8}\x{30AF}\x{30ED}\x{30E9}\x{30E0}\x{30CF}\x{30EB}\x{30CC}]; | 
| 161 |  |  |  |  |  |  |  | 
| 162 | 12 |  |  |  |  | 23 | $text; | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | sub encode { | 
| 166 | 10 |  |  | 10 | 1 | 3867 | my $class = shift; | 
| 167 | 10 |  | 50 |  |  | 24 | my $text = shift // ""; | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 10 |  |  |  |  | 20 | $text = normalize($text); | 
| 170 | 10 |  |  |  |  | 29 | $text =~ s{(.)}{_encode($1)}ge; | 
|  | 314 |  |  |  |  | 270 |  | 
| 171 | 10 |  |  |  |  | 52 | $text; | 
| 172 |  |  |  |  |  |  | } | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | sub decode { | 
| 175 | 4 |  |  | 4 | 1 | 19 | my $class = shift; | 
| 176 | 4 |  | 50 |  |  | 10 | my $text = shift // ""; | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 4 |  |  |  |  | 87 | $text =~ s/$decode_re/$decode->{$1}/ge; | 
|  | 147 |  |  |  |  | 325 |  | 
| 179 | 4 |  |  |  |  | 20 | $text =~ s/(\p{InHiragana}+)/NFC($1)/ge; | 
|  | 145 |  |  |  |  | 2436 |  | 
| 180 | 4 |  |  |  |  | 160 | $text; | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | sub shinobi { | 
| 184 | 4 |  |  | 4 | 1 | 22 | Text::Shinobi->encode(@_); | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | 1; | 
| 188 |  |  |  |  |  |  | __END__ |