line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::FI::Transcribe; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
749
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
6
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
68
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
$VERSION = 0.03; |
8
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
1051
|
use Lingua::FI::Hyphenate qw(tavuta); |
|
1
|
|
|
|
|
546
|
|
|
1
|
|
|
|
|
460
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub English { |
12
|
19
|
|
|
19
|
1
|
149
|
shift; # drop the class |
13
|
|
|
|
|
|
|
|
14
|
19
|
|
|
|
|
318
|
my %T = ( |
15
|
|
|
|
|
|
|
'a' => 'ah', |
16
|
|
|
|
|
|
|
'aa' => 'ahh', |
17
|
|
|
|
|
|
|
'ai' => 'igh', |
18
|
|
|
|
|
|
|
'au' => 'ow', |
19
|
|
|
|
|
|
|
'b' => 'b', |
20
|
|
|
|
|
|
|
'c' => 'k', |
21
|
|
|
|
|
|
|
'd' => 'd', |
22
|
|
|
|
|
|
|
'e' => 'eh', |
23
|
|
|
|
|
|
|
'ee' => 'ehh', |
24
|
|
|
|
|
|
|
'ei' => 'ey', |
25
|
|
|
|
|
|
|
'f' => 'f', |
26
|
|
|
|
|
|
|
'g' => 'g', |
27
|
|
|
|
|
|
|
'h' => 'hh', |
28
|
|
|
|
|
|
|
'i' => 'ee', |
29
|
|
|
|
|
|
|
'j' => 'y', |
30
|
|
|
|
|
|
|
'k' => 'k', |
31
|
|
|
|
|
|
|
'l' => 'l', |
32
|
|
|
|
|
|
|
'm' => 'm', |
33
|
|
|
|
|
|
|
'n' => 'n', |
34
|
|
|
|
|
|
|
'ng' => 'nng', |
35
|
|
|
|
|
|
|
'nk' => 'ng', |
36
|
|
|
|
|
|
|
'o' => 'aw', |
37
|
|
|
|
|
|
|
'oi' => 'oy', |
38
|
|
|
|
|
|
|
'oo' => 'aww', |
39
|
|
|
|
|
|
|
'ou' => 'ow', |
40
|
|
|
|
|
|
|
'p' => 'p', |
41
|
|
|
|
|
|
|
'q' => 'q', |
42
|
|
|
|
|
|
|
'r' => 'rr', |
43
|
|
|
|
|
|
|
's' => 's', |
44
|
|
|
|
|
|
|
't' => 't', |
45
|
|
|
|
|
|
|
'u' => 'oo', |
46
|
|
|
|
|
|
|
'v' => 'v', |
47
|
|
|
|
|
|
|
'w' => 'v', |
48
|
|
|
|
|
|
|
'x' => 'ks', |
49
|
|
|
|
|
|
|
'y' => 'ew', |
50
|
|
|
|
|
|
|
'y' => 'eww', |
51
|
|
|
|
|
|
|
'z' => 'ts', |
52
|
|
|
|
|
|
|
'å' => 'aw', |
53
|
|
|
|
|
|
|
'ä' => 'a', |
54
|
|
|
|
|
|
|
'ö' => 'ur', |
55
|
|
|
|
|
|
|
'öö' => 'urr', |
56
|
|
|
|
|
|
|
); |
57
|
|
|
|
|
|
|
|
58
|
19
|
50
|
|
|
|
134
|
my $T = join("|", sort { length($b) <=> length($a) || $a cmp $b } keys %T); |
|
3182
|
|
|
|
|
5487
|
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
my $English = sub { |
61
|
21
|
|
|
21
|
|
63
|
my @tavut = tavuta($_[0]); |
62
|
21
|
|
|
|
|
1206
|
for (@tavut) { s/($T)/$T{$1}/g } |
|
37
|
|
|
|
|
579
|
|
63
|
21
|
|
|
|
|
89
|
join("-", @tavut); |
64
|
19
|
|
|
|
|
149
|
}; |
65
|
|
|
|
|
|
|
|
66
|
19
|
|
|
|
|
24
|
my @a; |
67
|
|
|
|
|
|
|
my $a; |
68
|
|
|
|
|
|
|
|
69
|
19
|
|
|
|
|
38
|
for (@_) { |
70
|
19
|
|
|
|
|
99
|
($a = $_) =~ s/([aeiouyäåöAEIOUYÅÄÖbcdfghjklmnpqrstvwxzBCDFGHJKLMNPQRSTVWXZ]+)/$English->($1)/eg; |
|
21
|
|
|
|
|
52
|
|
71
|
19
|
|
|
|
|
50
|
push @a, $a; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
19
|
50
|
|
|
|
271
|
wantarray ? @a : $a[0]; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=pod |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 NAME |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Lingua::FI::Transcribe - Finnish transcription |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head1 SYNOPIS |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
use Lingua::FI::Transcribe; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
print Lingua::FI::Transcribe->English("sauna"), "\n"; |
88
|
|
|
|
|
|
|
print Lingua::FI::Transcribe->English("sisu"), "\n"; |
89
|
|
|
|
|
|
|
print Lingua::FI::Transcribe->English("olut"), "\n"; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
print Lingua::FI::Transcribe->English("jarkko hietaniemi"), "\n"; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# The results being |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sow-nah |
96
|
|
|
|
|
|
|
see-soo |
97
|
|
|
|
|
|
|
aw-loot |
98
|
|
|
|
|
|
|
yahrrk-kaw hheeeh-tah-neeeh-mee |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 DESCRIPTION |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
With this module you can get a rough approximation of Finnish |
103
|
|
|
|
|
|
|
pronunciation by I Finnish into something |
104
|
|
|
|
|
|
|
(awful mess, usually) that sounds somewhat similar to Finnish |
105
|
|
|
|
|
|
|
if read aloud (with a straight face). In addition to transcribing |
106
|
|
|
|
|
|
|
the sounds the module also hyphenates the word so that you get more |
107
|
|
|
|
|
|
|
hints as to the correct rhytm. (The stress is always on the first |
108
|
|
|
|
|
|
|
syllable.) |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
However, currently only transcription into English is implemented. |
111
|
|
|
|
|
|
|
Contributions from speakers of other languages gladly accepted. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
One more time: the approximation is very rough. I disclaim |
114
|
|
|
|
|
|
|
any responsibility if after ordering a beer in a Finnish pub |
115
|
|
|
|
|
|
|
the bartender looks at you funny and hands you an umbrella. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 About the English transcription |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Note that the transcription of Finnish to "English" is very rough: |
120
|
|
|
|
|
|
|
it is basically a very simple substitution of one or more letters of |
121
|
|
|
|
|
|
|
Finnish to one or more letters of "English". The highly irregular |
122
|
|
|
|
|
|
|
pronunciation of English doesn't help things. The vowels are the |
123
|
|
|
|
|
|
|
hardest part to right. In principle the basic vowels |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
a e i o u |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
are simple: just use the simple vowel sounds you can find |
128
|
|
|
|
|
|
|
in the English words |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
pun pet pit pot put |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
but consider how "pun" and "put" have different vowels, and when |
133
|
|
|
|
|
|
|
Finnish diphthongs like "au" are introduced, the above simple rule |
134
|
|
|
|
|
|
|
breaks down horribly. (That particular Finnish diphthong is |
135
|
|
|
|
|
|
|
pronounced like the English "ow" in "how", in case your are |
136
|
|
|
|
|
|
|
wondering.) |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head1 ABOUT FINNISH |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Finnish is a highly phonemic and phonetic language-- what this means |
141
|
|
|
|
|
|
|
is that the correlation between graphemes/letters and phonemes/sounds |
142
|
|
|
|
|
|
|
is really strong: all you can see you can hear, all you can hear you |
143
|
|
|
|
|
|
|
can see. One letter corresponds to one sound, and no silent |
144
|
|
|
|
|
|
|
letters. Since Finnish is a natural language, this is of course an |
145
|
|
|
|
|
|
|
oversimplification, there are nuances and exceptions to the above |
146
|
|
|
|
|
|
|
ideal. More information about Finnish pronunciation can be found from |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
http://www.cs.tut.fi/~jkorpela/finnish.pronunciation.html |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
and sound examples from |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
http://www.helsinki-hs.net/thisishelsinki/kieli.html |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head1 LIMITATIONS |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Only English transcription has been implemented. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Only lowercase letters are transcribed. |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Only Latin-1 (ISO 8859-1) is supported as the encoding. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head1 AUTHOR |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Jarkko Hietaniemi |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Copyright 2001 Jarkko Hietaniemi |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
171
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; |