| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Lingua::YI::Romanize; | 
| 2 | 4 |  |  | 4 |  | 3123 | use utf8; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 26 |  | 
| 3 |  |  |  |  |  |  |  | 
| 4 | 4 |  |  | 4 |  | 131 | use strict; | 
|  | 4 |  |  |  |  | 9 |  | 
|  | 4 |  |  |  |  | 74 |  | 
| 5 | 4 |  |  | 4 |  | 18 | use warnings; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 168 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | our $VERSION = '0.02'; | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 4 |  |  | 4 |  | 2340 | use Unicode::Normalize; | 
|  | 4 |  |  |  |  | 8693 |  | 
|  | 4 |  |  |  |  | 6380 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | our $normalize_combinings; | 
| 12 |  |  |  |  |  |  | our $yivo2latn; | 
| 13 |  |  |  |  |  |  | our $vowels; | 
| 14 |  |  |  |  |  |  | our $consonants; | 
| 15 |  |  |  |  |  |  | our $consonants_2; | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | sub new { | 
| 19 | 6 |  |  | 6 | 1 | 1023 | my $class = shift; | 
| 20 |  |  |  |  |  |  | # uncoverable condition false | 
| 21 | 6 | 100 | 66 |  |  | 42 | bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class || $class; | 
|  | 2 | 100 |  |  |  | 16 |  | 
| 22 |  |  |  |  |  |  | } | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | sub _gen_charclass { | 
| 25 | 15 |  |  | 15 |  | 25 | my $chartable = shift; | 
| 26 | 15 |  |  |  |  | 22 | my $string; | 
| 27 | 15 |  |  |  |  | 24 | for my $cons (@{$chartable}) { | 
|  | 15 |  |  |  |  | 25 |  | 
| 28 | 225 |  |  |  |  | 318 | $string .= $cons->[0]; | 
| 29 |  |  |  |  |  |  | } | 
| 30 | 15 |  |  |  |  | 114 | return join('|',split('',$string)); | 
| 31 |  |  |  |  |  |  | } | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | sub _normalize { | 
| 34 | 5 |  |  | 5 |  | 20 | my $string = shift; | 
| 35 | 5 |  |  |  |  | 14 | for my $rule (@$normalize_combinings) { | 
| 36 | 85 |  |  |  |  | 3996 | $string =~ s/$rule->[0]/$rule->[1]/g; | 
| 37 |  |  |  |  |  |  | } | 
| 38 | 5 |  |  |  |  | 21 | return $string; | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  |  | 
| 41 |  |  |  |  |  |  | sub yivo2latn { | 
| 42 | 2 |  |  | 2 | 1 | 561 | my ($self, $text) = @_; | 
| 43 | 2 |  |  |  |  | 576 | my $string = _normalize(NFC($text)); | 
| 44 |  |  |  |  |  |  |  | 
| 45 | 2 |  |  |  |  | 18 | my $cons_2 = _gen_charclass($consonants_2);; | 
| 46 | 2 |  |  |  |  | 19 | my $cons   = _gen_charclass($consonants); | 
| 47 | 2 |  |  |  |  | 6 | my $vowels = _gen_charclass($vowels); | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | # "\x{05D9}" HEBREW LETTER YOD|י|y, i | 
| 50 |  |  |  |  |  |  | # 1.1. y before or after a vowel; | 
| 51 |  |  |  |  |  |  | # i between consonants; | 
| 52 |  |  |  |  |  |  | # y after  טt,  דd,  סs,  זz,  לl,  נn; and before a vowel | 
| 53 |  |  |  |  |  |  | #     indicates the palatals in words of Slavic origin. | 
| 54 |  |  |  |  |  |  |  | 
| 55 | 2 |  |  |  |  | 219 | $string =~ s/\x{05D9}($vowels)/y$1/g; | 
| 56 | 2 |  |  |  |  | 169 | $string =~ s/($vowels)\x{05D9}/${1}y/g; | 
| 57 | 2 |  |  |  |  | 186 | $string =~ s/($cons_2)\x{05D9}/${1}y/g; | 
| 58 | 2 |  |  |  |  | 446 | $string =~ s/($cons)\x{05D9}($cons)/${1}i${2}/g; | 
| 59 | 2 |  |  |  |  | 63 | $string =~ s/\x{05D9}/i/g; | 
| 60 |  |  |  |  |  |  |  | 
| 61 | 2 |  |  |  |  | 7 | for my $rule (@$yivo2latn) { | 
| 62 | 88 |  |  |  |  | 2663 | $string =~ s/$rule->[0]/$rule->[1]/g; | 
| 63 |  |  |  |  |  |  | } | 
| 64 | 2 |  |  |  |  | 64 | return $string; | 
| 65 |  |  |  |  |  |  | } | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | sub yivo2ipa { | 
| 68 | 3 |  |  | 3 | 1 | 1195 | my ($self, $text) = @_; | 
| 69 | 3 |  |  |  |  | 2210 | my $string = _normalize(NFC($text)); | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 3 |  |  |  |  | 24 | my $cons_2 = _gen_charclass($consonants_2);; | 
| 72 | 3 |  |  |  |  | 37 | my $cons   = _gen_charclass($consonants); | 
| 73 | 3 |  |  |  |  | 9 | my $vowels = _gen_charclass($vowels); | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | # "\x{05D9}" HEBREW LETTER YOD|י|y, i | 
| 76 |  |  |  |  |  |  | # 1.1. y before or after a vowel; | 
| 77 |  |  |  |  |  |  | # i between consonants; | 
| 78 |  |  |  |  |  |  | # y after  טt,  דd,  סs,  זz,  לl,  נn; and before a vowel | 
| 79 |  |  |  |  |  |  | #     indicates the palatals in words of Slavic origin. | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 3 |  |  |  |  | 548 | $string =~ s/\x{05D9}($vowels)/j$1/g; | 
| 82 | 3 |  |  |  |  | 547 | $string =~ s/($vowels)\x{05D9}/${1}j/g; | 
| 83 | 3 |  |  |  |  | 1452 | $string =~ s/($cons)\x{05D9}($cons)/${1}i${2}/g; | 
| 84 | 3 |  |  |  |  | 467 | $string =~ s/($cons_2)\x{05D9}/${1}j/g; | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  |  | 
| 87 | 3 |  |  |  |  | 16 | for my $rule (@$yivo2latn) { | 
| 88 | 132 |  |  |  |  | 8891 | $string =~ s/$rule->[0]/$rule->[2]/g; | 
| 89 |  |  |  |  |  |  | } | 
| 90 | 3 |  |  |  |  | 84 | return $string; | 
| 91 |  |  |  |  |  |  | } | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | BEGIN { | 
| 94 | 4 |  |  | 4 |  | 71 | $normalize_combinings = [ | 
| 95 |  |  |  |  |  |  | ["\x{05D0}\x{05B7}","\x{FB2E}"], # HEBREW LETTER ALEF WITH PATAH | 
| 96 |  |  |  |  |  |  | ["\x{05D0}\x{05B8}","\x{FB2F}"], # HEBREW LETTER ALEF WITH QAMATS | 
| 97 |  |  |  |  |  |  | ["\x{05D1}\x{05BC}","\x{FB31}"], # HEBREW LETTER BET WITH DAGESH | 
| 98 |  |  |  |  |  |  | ["\x{05D1}\x{05BF}","\x{FB4C}"], # HEBREW LETTER BET WITH RAFE | 
| 99 |  |  |  |  |  |  | ["\x{05D5}\x{05BC}","\x{FB35}"], # HEBREW LETTER VAV WITH DAGESH | 
| 100 |  |  |  |  |  |  | ["\x{05D5}\x{05D5}","\x{05F0}"], # HEBREW LIGATURE YIDDISH DOUBLE VAV | 
| 101 |  |  |  |  |  |  | ["\x{05D5}\x{05D9}","\x{05F1}"], # HEBREW LIGATURE YIDDISH VAV YOD | 
| 102 |  |  |  |  |  |  | ["\x{05D9}\x{05B4}","\x{FB1D}"], # HEBREW LETTER YOD WITH HIRIQ | 
| 103 |  |  |  |  |  |  | ["\x{05D9}\x{05D9}","\x{05F2}"], # HEBREW LIGATURE YIDDISH DOUBLE YOD | 
| 104 |  |  |  |  |  |  | ["\x{05F2}\x{05B7}","\x{FB1F}"], # HEBREW LIGATURE YIDDISH YOD YOD PATAH | 
| 105 |  |  |  |  |  |  | ["\x{05DB}\x{05BC}","\x{FB3B}"], # HEBREW LETTER KAF WITH DAGESH | 
| 106 |  |  |  |  |  |  | ["\x{05E4}\x{05BC}","\x{FB44}"], # HEBREW LETTER PE WITH DAGESH | 
| 107 |  |  |  |  |  |  | ["\x{05E4}\x{05BF}","\x{FB4E}"], # HEBREW LETTER PE WITH RAFE | 
| 108 |  |  |  |  |  |  | ["\x{05E9}\x{05C2}","\x{FB2B}"], # HEBREW LETTER SHIN WITH SIN DOT | 
| 109 |  |  |  |  |  |  | ["\x{05EA}\x{05BC}","\x{FB4A}"], # HEBREW LETTER TAV WITH DAGESH | 
| 110 |  |  |  |  |  |  | ["\x{FB20}","\x{05E2}"],         # HEBREW LETTER ALTERNATIVE AYIN | 
| 111 |  |  |  |  |  |  | ["\x{05E4}","\x{05BF}"],         # HEBREW LETTER PE | 
| 112 |  |  |  |  |  |  | ]; | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  |  | 
| 115 | 4 |  |  |  |  | 143 | $yivo2latn = [ | 
| 116 |  |  |  |  |  |  | ["\x{05D3}\x{05D6}\x{05E9}", 'dzh', 'd͡ʒ'], | 
| 117 |  |  |  |  |  |  | ["\x{05D6}\x{05E9}",         'zh',  'ʒ'  ], | 
| 118 |  |  |  |  |  |  | ["\x{05D8}\x{05E9}",         'tsh', 't͡ʃ'], | 
| 119 |  |  |  |  |  |  | ["\x{05D0}",                 '',    ''   ], | 
| 120 |  |  |  |  |  |  | ["\x{FB2E}",                 'a',   'a'  ], | 
| 121 |  |  |  |  |  |  | ["\x{FB2F}",                 'o',   'ɔ'  ], | 
| 122 |  |  |  |  |  |  | ["\x{05D1}",                 'b',   'b'  ], | 
| 123 |  |  |  |  |  |  | ["\x{FB31}",                  '',   'b'  ], | 
| 124 |  |  |  |  |  |  | ["\x{FB4C}",                 'v',   'v'  ], | 
| 125 |  |  |  |  |  |  | ["\x{05D2}",                 'g',   'ɡ'  ], | 
| 126 |  |  |  |  |  |  | ["\x{05D3}",                 'd',   'd'  ], | 
| 127 |  |  |  |  |  |  | ["\x{05D4}",                 'h',   'h'  ], | 
| 128 |  |  |  |  |  |  | ["\x{05D5}",                 'u',   'ʊ'  ], | 
| 129 |  |  |  |  |  |  | ["\x{FB35}",                 'u',   'ʊ'  ], | 
| 130 |  |  |  |  |  |  | # "\x{05D5}" HEBREW LETTER VAV, | 
| 131 |  |  |  |  |  |  | # "\x{05B9}" HEBREW POINT HOLAM | 
| 132 |  |  |  |  |  |  | # |וֹ|(none)|(none)|ɔ, ɔj|(o,oj)|khoylem|Non-YIVO alternative to אָ and וי. | 
| 133 |  |  |  |  |  |  | ["\x{05F0}",                 'v',   'v'  ], | 
| 134 |  |  |  |  |  |  | ["\x{05F1}",                 'oy',  'ɔj' ], | 
| 135 |  |  |  |  |  |  | ["\x{05D6}",                 'z',   'z'  ], | 
| 136 |  |  |  |  |  |  | ["\x{05D7}",                 'kh',  'x'  ], | 
| 137 |  |  |  |  |  |  | ["\x{05D8}",                 't',   't'  ], | 
| 138 |  |  |  |  |  |  | # TODO "\x{05D9}" HEBREW LETTER YOD | 
| 139 |  |  |  |  |  |  | # |י|y, i|y, i|j, i|j, i|yud|Consonantal [j] | 
| 140 |  |  |  |  |  |  | # when the first character in a syllable. Vocalic [i] otherwise. | 
| 141 |  |  |  |  |  |  | ["\x{FB1D}",                 'i',   'i'  ], | 
| 142 |  |  |  |  |  |  | ["\x{05F2}",                 'ey',  'ɛj' ], | 
| 143 |  |  |  |  |  |  | ["\x{FB1F}",                 'ay',  'aj' ], | 
| 144 |  |  |  |  |  |  | ["\x{FB3B}",                 'k',   'k'  ], | 
| 145 |  |  |  |  |  |  | ["\x{05DB}",                 'kh',  'x'  ], | 
| 146 |  |  |  |  |  |  | ["\x{05DA}",                 'kh',  'x'  ], | 
| 147 |  |  |  |  |  |  | ["\x{05DC}",                 'l',   'l'  ], # TODO: ʎ | 
| 148 |  |  |  |  |  |  | ["\x{05DE}",                 'm',   'm'  ], | 
| 149 |  |  |  |  |  |  | ["\x{05DD}",                 'm',   'm'  ], | 
| 150 |  |  |  |  |  |  | ["\x{05E0}",                 'n',   'n'  ], | 
| 151 |  |  |  |  |  |  | ["\x{05DF}",                 'n',   'n'  ], # TODO: ŋ, m | 
| 152 |  |  |  |  |  |  | ["\x{05E1}",                 's',   's'  ], | 
| 153 |  |  |  |  |  |  | ["\x{05E2}",                 'e',   'ɛ'  ], # TODO: ə | 
| 154 |  |  |  |  |  |  | ["\x{FB44}",                 'p',   'p'  ], | 
| 155 |  |  |  |  |  |  | ["\x{FB4E}",                 'f',   'f'  ], | 
| 156 |  |  |  |  |  |  | ["\x{05E3}",                 'f',   'f'  ], | 
| 157 |  |  |  |  |  |  | ["\x{05BF}",                 'f',   'f'  ], | 
| 158 |  |  |  |  |  |  | ["\x{05E6}",                 'ts',  'ts' ], | 
| 159 |  |  |  |  |  |  | ["\x{05E5}",                 'ts',  'ts' ], | 
| 160 |  |  |  |  |  |  | ["\x{05E7}",                 'k',   'k'  ], | 
| 161 |  |  |  |  |  |  | ["\x{05E8}",                 'r',   'ʀ'  ], | 
| 162 |  |  |  |  |  |  | ["\x{05E9}",                 'sh',  'ʃ'  ], | 
| 163 |  |  |  |  |  |  | ["\x{FB2B}",                 's',   's'  ], | 
| 164 |  |  |  |  |  |  | ["\x{FB4A}",                 't',   't'  ], | 
| 165 |  |  |  |  |  |  | ["\x{05EA}",                 's',   's'  ], | 
| 166 |  |  |  |  |  |  | ]; | 
| 167 |  |  |  |  |  |  |  | 
| 168 | 4 |  |  |  |  | 24 | $vowels = [ | 
| 169 |  |  |  |  |  |  | ["\x{FB2E}",'a'], | 
| 170 |  |  |  |  |  |  | ["\x{FB2F}",'o'], | 
| 171 |  |  |  |  |  |  | ["\x{05D5}",'u'], | 
| 172 |  |  |  |  |  |  | ["\x{FB35}",'u'], | 
| 173 |  |  |  |  |  |  | ["\x{05F1}",'oy'], | 
| 174 |  |  |  |  |  |  | # TODO "\x{05D9}" HEBREW LETTER YOD|י|y, i|y, i|j, i|j, i|yud|Consonantal [j] when the first character in a syllable. Vocalic [i] otherwise. | 
| 175 |  |  |  |  |  |  | ["\x{FB1D}",'i'], | 
| 176 |  |  |  |  |  |  | ["\x{05F2}",'ey'], | 
| 177 |  |  |  |  |  |  | ["\x{FB1F}",'ay'], | 
| 178 |  |  |  |  |  |  | ["\x{05E2}",'e'], | 
| 179 |  |  |  |  |  |  | ]; | 
| 180 |  |  |  |  |  |  |  | 
| 181 | 4 |  |  |  |  | 82 | $consonants = [ | 
| 182 |  |  |  |  |  |  | ["\x{05D1}",'b'], | 
| 183 |  |  |  |  |  |  | ["\x{FB4C}",'v'], | 
| 184 |  |  |  |  |  |  | ["\x{05D2}",'g'], | 
| 185 |  |  |  |  |  |  | ["\x{05D3}",'d'], | 
| 186 |  |  |  |  |  |  | ["\x{05D4}",'h'], | 
| 187 |  |  |  |  |  |  | ["\x{05F0}",'v'], | 
| 188 |  |  |  |  |  |  | ["\x{05D6}",'z'], | 
| 189 |  |  |  |  |  |  | ["\x{05D7}",'kh'], | 
| 190 |  |  |  |  |  |  | ["\x{05D8}",'t'], | 
| 191 |  |  |  |  |  |  | ["\x{FB3B}",'k'], | 
| 192 |  |  |  |  |  |  | ["\x{05DB}",'kh'], | 
| 193 |  |  |  |  |  |  | ["\x{05DA}",'kh'], | 
| 194 |  |  |  |  |  |  | ["\x{05DC}",'l'], | 
| 195 |  |  |  |  |  |  | ["\x{05DE}",'m'], | 
| 196 |  |  |  |  |  |  | ["\x{05DD}",'m'], | 
| 197 |  |  |  |  |  |  | ["\x{05E0}",'n'], | 
| 198 |  |  |  |  |  |  | ["\x{05DF}",'n'], | 
| 199 |  |  |  |  |  |  | ["\x{05E1}",'s'], | 
| 200 |  |  |  |  |  |  | ["\x{FB44}",'p'], | 
| 201 |  |  |  |  |  |  | ["\x{FB4E}",'f'], | 
| 202 |  |  |  |  |  |  | ["\x{05E3}",'f'], | 
| 203 |  |  |  |  |  |  | ["\x{05BF}",'f'], | 
| 204 |  |  |  |  |  |  | ["\x{05E6}",'ts'], | 
| 205 |  |  |  |  |  |  | ["\x{05E5}",'ts'], | 
| 206 |  |  |  |  |  |  | ["\x{05E7}",'k'], | 
| 207 |  |  |  |  |  |  | ["\x{05E8}",'r'], | 
| 208 |  |  |  |  |  |  | ["\x{05E9}",'sh'], | 
| 209 |  |  |  |  |  |  | ["\x{FB2B}",'s'], | 
| 210 |  |  |  |  |  |  | ["\x{FB4A}",'t'], | 
| 211 |  |  |  |  |  |  | ["\x{05EA}",'s'], | 
| 212 |  |  |  |  |  |  | ]; | 
| 213 |  |  |  |  |  |  |  | 
| 214 | 4 |  |  |  |  | 137 | $consonants_2 = [ | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | ["\x{05D3}",'d'], | 
| 217 |  |  |  |  |  |  | ["\x{05D6}",'z'], | 
| 218 |  |  |  |  |  |  | ["\x{05D8}",'t'], | 
| 219 |  |  |  |  |  |  | ["\x{05DC}",'l'], | 
| 220 |  |  |  |  |  |  | ["\x{05E0}",'n'], | 
| 221 |  |  |  |  |  |  | ["\x{05E1}",'s'], | 
| 222 |  |  |  |  |  |  | #["\x{FB4A}",'t'], | 
| 223 |  |  |  |  |  |  | ]; | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | 1; | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | __END__ |