line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::EN::Alphabet::Deseret; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
641
|
use 5.005; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
36
|
|
4
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
21
|
|
6
|
1
|
|
|
1
|
|
1004
|
use utf8; |
|
1
|
|
|
|
|
11
|
|
|
1
|
|
|
|
|
4
|
|
7
|
1
|
|
|
1
|
|
1650
|
use Lingua::EN::Phoneme; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $VERSION = 0.01; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $lep = new Lingua::EN::Phoneme(); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $i=66600; |
13
|
|
|
|
|
|
|
our %correspondence = map { $_ => chr($i++) } qw( |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
IY EY _a AO OW UW |
16
|
|
|
|
|
|
|
IH EH AE AA AH UH |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
AY AW W Y HH |
19
|
|
|
|
|
|
|
P B T D CH JH K G |
20
|
|
|
|
|
|
|
F V TH DH S Z SH ZH |
21
|
|
|
|
|
|
|
R L M N NG |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
OY |
24
|
|
|
|
|
|
|
); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# fixups |
27
|
|
|
|
|
|
|
$correspondence{'ER'} = chr(66600).chr(66633); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub _transliterate_word_raw { |
30
|
|
|
|
|
|
|
my ($word) = @_; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my @pronunciation = $lep->phoneme($word); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
return undef unless @pronunciation; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $result = ''; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
for (@pronunciation) { |
39
|
|
|
|
|
|
|
s/[0-9]//g; # don't care about stress |
40
|
|
|
|
|
|
|
warn "CMU phoneset $_ does not appear in correspondence" |
41
|
|
|
|
|
|
|
unless $correspondence{$_}; |
42
|
|
|
|
|
|
|
$result .= $correspondence{$_}; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
$result =~ s/\x{10437}\x{1042D}/\x{1044F}/ig; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
if ($word =~ /^[A-Z][a-z]/) { |
48
|
|
|
|
|
|
|
# titlecase |
49
|
|
|
|
|
|
|
$result = ucfirst $result; |
50
|
|
|
|
|
|
|
} elsif ($word =~ /^[A-Z]/) { |
51
|
|
|
|
|
|
|
# uppercase |
52
|
|
|
|
|
|
|
$result = uc $result; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
return $result; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub _transliterate_word { |
59
|
|
|
|
|
|
|
my ($word) = @_; |
60
|
|
|
|
|
|
|
my $result = _transliterate_word_raw($word); |
61
|
|
|
|
|
|
|
return uc $word unless $result; |
62
|
|
|
|
|
|
|
return $result; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub transliterate_raw { |
66
|
|
|
|
|
|
|
my ($sentence) = @_; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
$sentence =~ s/([A-Za-z]+)/_transliterate_word_raw($1)/eg; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
return $sentence; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub transliterate { |
74
|
|
|
|
|
|
|
my ($sentence) = @_; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
$sentence =~ s/([A-Za-z]+)/_transliterate_word($1)/eg; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
return $sentence; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
1; |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=encoding utf-8 |
84
|
|
|
|
|
|
|
=head1 NAME |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Lingua::EN::Alphabet::Deseret - transliterate the Latin to Deseret alphabets |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head1 AUTHOR |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Thomas Thurman |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 SYNOPSIS |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
use Lingua::EN::Alphabet::Deseret; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
print Lingua::EN::Alphabet::Deseret::transliterate("badger"); |
97
|
|
|
|
|
|
|
# prints "𐐺𐐰𐐾𐐨𐑉" |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head1 DESCRIPTION |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
The Deseret alphabet was designed by the University of Deseret (now the |
102
|
|
|
|
|
|
|
University of Utah) in the mid-1800s as a replacement for the Latin alphabet |
103
|
|
|
|
|
|
|
for representing English. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Its ISO 15924 code is "Dsrt" 250. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
This module transliterates English text from the Latin alphabet into the |
108
|
|
|
|
|
|
|
Deseret alphabet. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
𐐜𐐲 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐰𐑊𐑁𐐲𐐺𐐯𐐻 𐐶𐐱𐑆 𐐼𐐮𐑆𐐴𐑌𐐼 𐐺𐐴 𐑄𐐲 𐐧𐑌𐐲𐑂𐐨𐑉𐑅𐐲𐐻𐐨 𐐲𐑂 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 (𐑌𐐵 𐑄𐐲 |
111
|
|
|
|
|
|
|
𐐧𐑌𐐲𐑂𐐨𐑉𐑅𐐲𐐻𐐨 𐐲𐑂 𐐧𐐻𐐫) 𐐮𐑌 𐑄𐐲 𐑋𐐮𐐼-1800𐐯𐑅 𐐰𐑆 𐐲 𐑉𐐮𐐹𐑊𐐩𐑅𐑋𐐲𐑌𐐻 𐑁𐐫𐑉 𐑄𐐲 𐐢𐐰𐐻𐐲𐑌 𐐰𐑊𐑁𐐲𐐺𐐯𐐻 |
112
|
|
|
|
|
|
|
𐑁𐐫𐑉 𐑉𐐯𐐹𐑉𐐮𐑆𐐯𐑌𐐻𐐮𐑍 𐐆𐑍𐑀𐑊𐐮𐑇. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
𐐆𐐻𐑅 ISO 15924 𐐿𐐬𐐼 𐐮𐑆 "Dsrt" 250. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
𐐜𐐮𐑅 𐑋𐐱𐐾𐐭𐑊 𐐻𐑉𐐰𐑌𐑅𐑊𐐮𐐻𐐨𐑉𐐩𐐻𐑅 𐐆𐑍𐑀𐑊𐐮𐑇 𐐻𐐯𐐿𐑅𐐻 𐑁𐑉𐐲𐑋 𐑄𐐲 𐐢𐐰𐐻𐐲𐑌 𐐰𐑊𐑁𐐲𐐺𐐯𐐻 𐐮𐑌𐐻𐐭 𐑄𐐲 |
117
|
|
|
|
|
|
|
𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐰𐑊𐑁𐐲𐐺𐐯𐐻. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head1 METHODS |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head2 transliterate($latin) |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Returns the transliteration of the given word into the Deseret alphabet. |
124
|
|
|
|
|
|
|
If the word is not in the dictionary, returns $latin in uppercase. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
𐐡𐐮𐐻𐐨𐑉𐑌𐑆 𐑄𐐲 𐐻𐑉𐐰𐑌𐑅𐑊𐐮𐐻𐐨𐑉𐐩𐑇𐐲𐑌 𐐲𐑂 𐑄𐐲 𐑀𐐮𐑂𐐲𐑌 𐐶𐐨𐑉𐐼 𐐮𐑌𐐻𐐭 𐑄𐐲 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐰𐑊𐑁𐐲𐐺𐐯𐐻. |
127
|
|
|
|
|
|
|
𐐆𐑁 𐑄𐐲 𐐶𐐨𐑉𐐼 𐐮𐑆 𐑌𐐱𐐻 𐐮𐑌 𐑄𐐲 𐐼𐐮𐐿𐑇𐐲𐑌𐐯𐑉𐐨, 𐑉𐐮𐐻𐐨𐑉𐑌𐑆 $latin 𐐮𐑌 𐐲𐐹𐐨𐑉𐐿𐐩𐑅. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head2 transliterate_raw($latin) |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
Similar, but returns undef for unknown words. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
𐐝𐐮𐑋𐐲𐑊𐐨𐑉, 𐐺𐐲𐐻 𐑉𐐮𐐻𐐨𐑉𐑌𐑆 undef 𐑁𐐫𐑉 𐐲𐑌𐑌𐐬𐑌 𐐶𐐨𐑉𐐼𐑆. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 FONTS |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
You will need a Deseret Unicode font to use this module. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
𐐧 𐐶𐐮𐑊 𐑌𐐨𐐼 𐐲 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐧𐑌𐐨𐐿𐐬𐐼 𐑁𐐱𐑌𐐻 𐐻𐐭 𐑏𐑅 𐑄𐐮𐑅 𐑋𐐱𐐾𐐭𐑊. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head1 BUGS |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
The dictionary is quite small. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
One of the vowels ("𐐂") cannot ever be produced because cmudict does not |
146
|
|
|
|
|
|
|
mark it as a distinct vowel. If you think some |
147
|
|
|
|
|
|
|
of the mappings I have made are incorrect, please let me know. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
𐐜𐐲 𐐼𐐮𐐿𐑇𐐲𐑌𐐯𐑉𐐨 𐐮𐑆 𐐿𐐶𐐴𐐻 𐑅𐑋𐐫𐑊. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
𐐎𐐲𐑌 𐐲𐑂 𐑄𐐲 𐑂𐐵𐐲𐑊𐑆 ("𐐂") 𐐿𐐰𐑌𐐱𐐻 𐐯𐑂𐐨𐑉 𐐺𐐨 𐐹𐑉𐐲𐐼𐐭𐑅𐐻 𐐺𐐮𐐿𐐫𐑆 cmudict 𐐼𐐲𐑆 𐑌𐐱𐐻 |
152
|
|
|
|
|
|
|
𐑋𐐱𐑉𐐿 𐐮𐐻 𐐰𐑆 𐐲 𐐼𐐮𐑅𐐻𐐮𐑍𐐿𐐻 𐑂𐐵𐐲𐑊. 𐐆𐑁 𐑏 𐑃𐐮𐑍𐐿 𐑅𐐲𐑋 |
153
|
|
|
|
|
|
|
𐐲𐑂 𐑄𐐲 𐑋𐐰𐐹𐐮𐑍𐑆 𐐌 𐐸𐐰𐑂 𐑋𐐩𐐼 𐐱𐑉 𐐮𐑌𐐿𐐨𐑉𐐯𐐿𐐻, 𐐹𐑊𐐨𐑆 𐑊𐐯𐐻 𐑋𐐨 𐑌𐐬. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head1 COPYRIGHT |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
This Perl module is copyright (C) Thomas Thurman, 2009. |
158
|
|
|
|
|
|
|
This is free software, and can be used/modified under the same terms as |
159
|
|
|
|
|
|
|
Perl itself. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
𐐜𐐮𐑅 𐐑𐐨𐑉𐑊 𐑋𐐱𐐾𐐭𐑊 𐐮𐑆 𐐿𐐱𐐹𐐨𐑉𐐴𐐻 (C) 𐐓𐐱𐑋𐐲𐑅 𐐛𐐨𐑉𐑋𐐲𐑌, 2009. |
162
|
|
|
|
|
|
|
𐐜𐐮𐑅 𐐮𐑆 𐑁𐑉𐐨 𐑅𐐫𐑁𐐻𐐶𐐯𐑉, 𐐲𐑌𐐼 𐐿𐐰𐑌 𐐺𐐨 𐑏𐑆𐐼/𐑋𐐱𐐼𐐲𐑁𐐴𐐼 𐐲𐑌𐐼𐐨𐑉 𐑄𐐲 𐑅𐐩𐑋 𐐻𐐨𐑉𐑋𐑆 𐐰𐑆 |
163
|
|
|
|
|
|
|
𐐑𐐨𐑉𐑊 𐐮𐐻𐑅𐐯𐑊𐑁. |