File Coverage

blib/lib/Lingua/EN/Alphabet/Deseret.pm

Criterion	Covered	Total	%
statement	13	15	86.6
branch			n/a
condition			n/a
subroutine	5	5	100.0
pod			n/a
total	18	20	90.0

line	stmt	sub	time	code
1				package Lingua::EN::Alphabet::Deseret;
2
3	1	1	641	use 5.005;
	1		4
	1		36
4	1	1	4	use strict;
	1		1
	1		25
5	1	1	4	use warnings;
	1		5
	1		21
6	1	1	1004	use utf8;
	1		11
	1		4
7	1	1	1650	use Lingua::EN::Phoneme;
	0
	0
8				our $VERSION = 0.01;
9
10				our $lep = new Lingua::EN::Phoneme();
11
12				my $i=66600;
13				our %correspondence = map { $_ => chr($i++) } qw(
14
15				IY EY _a AO OW UW
16				IH EH AE AA AH UH
17
18				AY AW W Y HH
19				P B T D CH JH K G
20				F V TH DH S Z SH ZH
21				R L M N NG
22
23				OY
24				);
25
26				# fixups
27				$correspondence{'ER'} = chr(66600).chr(66633);
28
29				sub _transliterate_word_raw {
30				my ($word) = @_;
31
32				my @pronunciation = $lep->phoneme($word);
33
34				return undef unless @pronunciation;
35
36				my $result = '';
37
38				for (@pronunciation) {
39				s/[0-9]//g; # don't care about stress
40				warn "CMU phoneset $_ does not appear in correspondence"
41				unless $correspondence{$_};
42				$result .= $correspondence{$_};
43				}
44
45				$result =~ s/\x{10437}\x{1042D}/\x{1044F}/ig;
46
47				if ($word =~ /^[A-Z][a-z]/) {
48				# titlecase
49				$result = ucfirst $result;
50				} elsif ($word =~ /^[A-Z]/) {
51				# uppercase
52				$result = uc $result;
53				}
54
55				return $result;
56				}
57
58				sub _transliterate_word {
59				my ($word) = @_;
60				my $result = _transliterate_word_raw($word);
61				return uc $word unless $result;
62				return $result;
63				}
64
65				sub transliterate_raw {
66				my ($sentence) = @_;
67
68				$sentence =~ s/([A-Za-z]+)/_transliterate_word_raw($1)/eg;
69
70				return $sentence;
71				}
72
73				sub transliterate {
74				my ($sentence) = @_;
75
76				$sentence =~ s/([A-Za-z]+)/_transliterate_word($1)/eg;
77
78				return $sentence;
79				}
80
81				1;
82
83				=encoding utf-8
84				=head1 NAME
85
86				Lingua::EN::Alphabet::Deseret - transliterate the Latin to Deseret alphabets
87
88				=head1 AUTHOR
89
90				Thomas Thurman
91
92				=head1 SYNOPSIS
93
94				use Lingua::EN::Alphabet::Deseret;
95
96				print Lingua::EN::Alphabet::Deseret::transliterate("badger");
97				# prints "𐐺𐐰𐐾𐐨𐑉"
98
99				=head1 DESCRIPTION
100
101				The Deseret alphabet was designed by the University of Deseret (now the
102				University of Utah) in the mid-1800s as a replacement for the Latin alphabet
103				for representing English.
104
105				Its ISO 15924 code is "Dsrt" 250.
106
107				This module transliterates English text from the Latin alphabet into the
108				Deseret alphabet.
109
110				𐐜𐐲 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐰𐑊𐑁𐐲𐐺𐐯𐐻 𐐶𐐱𐑆 𐐼𐐮𐑆𐐴𐑌𐐼 𐐺𐐴 𐑄𐐲 𐐧𐑌𐐲𐑂𐐨𐑉𐑅𐐲𐐻𐐨 𐐲𐑂 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 (𐑌𐐵 𐑄𐐲
111				𐐧𐑌𐐲𐑂𐐨𐑉𐑅𐐲𐐻𐐨 𐐲𐑂 𐐧𐐻𐐫) 𐐮𐑌 𐑄𐐲 𐑋𐐮𐐼-1800𐐯𐑅 𐐰𐑆 𐐲 𐑉𐐮𐐹𐑊𐐩𐑅𐑋𐐲𐑌𐐻 𐑁𐐫𐑉 𐑄𐐲 𐐢𐐰𐐻𐐲𐑌 𐐰𐑊𐑁𐐲𐐺𐐯𐐻
112				𐑁𐐫𐑉 𐑉𐐯𐐹𐑉𐐮𐑆𐐯𐑌𐐻𐐮𐑍 𐐆𐑍𐑀𐑊𐐮𐑇.
113
114				𐐆𐐻𐑅 ISO 15924 𐐿𐐬𐐼 𐐮𐑆 "Dsrt" 250.
115
116				𐐜𐐮𐑅 𐑋𐐱𐐾𐐭𐑊 𐐻𐑉𐐰𐑌𐑅𐑊𐐮𐐻𐐨𐑉𐐩𐐻𐑅 𐐆𐑍𐑀𐑊𐐮𐑇 𐐻𐐯𐐿𐑅𐐻 𐑁𐑉𐐲𐑋 𐑄𐐲 𐐢𐐰𐐻𐐲𐑌 𐐰𐑊𐑁𐐲𐐺𐐯𐐻 𐐮𐑌𐐻𐐭 𐑄𐐲
117				𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐰𐑊𐑁𐐲𐐺𐐯𐐻.
118
119				=head1 METHODS
120
121				=head2 transliterate($latin)
122
123				Returns the transliteration of the given word into the Deseret alphabet.
124				If the word is not in the dictionary, returns $latin in uppercase.
125
126				𐐡𐐮𐐻𐐨𐑉𐑌𐑆 𐑄𐐲 𐐻𐑉𐐰𐑌𐑅𐑊𐐮𐐻𐐨𐑉𐐩𐑇𐐲𐑌 𐐲𐑂 𐑄𐐲 𐑀𐐮𐑂𐐲𐑌 𐐶𐐨𐑉𐐼 𐐮𐑌𐐻𐐭 𐑄𐐲 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐰𐑊𐑁𐐲𐐺𐐯𐐻.
127				𐐆𐑁 𐑄𐐲 𐐶𐐨𐑉𐐼 𐐮𐑆 𐑌𐐱𐐻 𐐮𐑌 𐑄𐐲 𐐼𐐮𐐿𐑇𐐲𐑌𐐯𐑉𐐨, 𐑉𐐮𐐻𐐨𐑉𐑌𐑆 $latin 𐐮𐑌 𐐲𐐹𐐨𐑉𐐿𐐩𐑅.
128
129				=head2 transliterate_raw($latin)
130
131				Similar, but returns undef for unknown words.
132
133				𐐝𐐮𐑋𐐲𐑊𐐨𐑉, 𐐺𐐲𐐻 𐑉𐐮𐐻𐐨𐑉𐑌𐑆 undef 𐑁𐐫𐑉 𐐲𐑌𐑌𐐬𐑌 𐐶𐐨𐑉𐐼𐑆.
134
135				=head1 FONTS
136
137				You will need a Deseret Unicode font to use this module.
138
139				𐐧 𐐶𐐮𐑊 𐑌𐐨𐐼 𐐲 𐐔𐐯𐑅𐐨𐑉𐐯𐐻 𐐧𐑌𐐨𐐿𐐬𐐼 𐑁𐐱𐑌𐐻 𐐻𐐭 𐑏𐑅 𐑄𐐮𐑅 𐑋𐐱𐐾𐐭𐑊.
140
141				=head1 BUGS
142
143				The dictionary is quite small.
144
145				One of the vowels ("𐐂") cannot ever be produced because cmudict does not
146				mark it as a distinct vowel. If you think some
147				of the mappings I have made are incorrect, please let me know.
148
149				𐐜𐐲 𐐼𐐮𐐿𐑇𐐲𐑌𐐯𐑉𐐨 𐐮𐑆 𐐿𐐶𐐴𐐻 𐑅𐑋𐐫𐑊.
150
151				𐐎𐐲𐑌 𐐲𐑂 𐑄𐐲 𐑂𐐵𐐲𐑊𐑆 ("𐐂") 𐐿𐐰𐑌𐐱𐐻 𐐯𐑂𐐨𐑉 𐐺𐐨 𐐹𐑉𐐲𐐼𐐭𐑅𐐻 𐐺𐐮𐐿𐐫𐑆 cmudict 𐐼𐐲𐑆 𐑌𐐱𐐻
152				𐑋𐐱𐑉𐐿 𐐮𐐻 𐐰𐑆 𐐲 𐐼𐐮𐑅𐐻𐐮𐑍𐐿𐐻 𐑂𐐵𐐲𐑊. 𐐆𐑁 𐑏 𐑃𐐮𐑍𐐿 𐑅𐐲𐑋
153				𐐲𐑂 𐑄𐐲 𐑋𐐰𐐹𐐮𐑍𐑆 𐐌 𐐸𐐰𐑂 𐑋𐐩𐐼 𐐱𐑉 𐐮𐑌𐐿𐐨𐑉𐐯𐐿𐐻, 𐐹𐑊𐐨𐑆 𐑊𐐯𐐻 𐑋𐐨 𐑌𐐬.
154
155				=head1 COPYRIGHT
156
157				This Perl module is copyright (C) Thomas Thurman, 2009.
158				This is free software, and can be used/modified under the same terms as
159				Perl itself.
160
161				𐐜𐐮𐑅 𐐑𐐨𐑉𐑊 𐑋𐐱𐐾𐐭𐑊 𐐮𐑆 𐐿𐐱𐐹𐐨𐑉𐐴𐐻 (C) 𐐓𐐱𐑋𐐲𐑅 𐐛𐐨𐑉𐑋𐐲𐑌, 2009.
162				𐐜𐐮𐑅 𐐮𐑆 𐑁𐑉𐐨 𐑅𐐫𐑁𐐻𐐶𐐯𐑉, 𐐲𐑌𐐼 𐐿𐐰𐑌 𐐺𐐨 𐑏𐑆𐐼/𐑋𐐱𐐼𐐲𐑁𐐴𐐼 𐐲𐑌𐐼𐐨𐑉 𐑄𐐲 𐑅𐐩𐑋 𐐻𐐨𐑉𐑋𐑆 𐐰𐑆
163				𐐑𐐨𐑉𐑊 𐐮𐐻𐑅𐐯𐑊𐑁.