line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Lingua::Phonology::Symbols; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Lingua::Phonology::Symbols - a module for associating symbols with |
8
|
|
|
|
|
|
|
segment prototypes. |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 SYNOPSIS |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use Lingua::Phonology; |
13
|
|
|
|
|
|
|
$phono = new Lingua::Phonology; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Load the default features |
16
|
|
|
|
|
|
|
$phono->features->loadfile; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# Load the default symbols |
19
|
|
|
|
|
|
|
$symbols = $phono->symbols; |
20
|
|
|
|
|
|
|
$symbols->loadfile; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# Make a test segment |
23
|
|
|
|
|
|
|
$segment = $phono->segment; |
24
|
|
|
|
|
|
|
$segment->labial(1); |
25
|
|
|
|
|
|
|
$segment->voice(1); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Find the symbol matching the segment |
28
|
|
|
|
|
|
|
print $symbols->spell($segment); # Should print 'b' |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
When using Lingua::Phonology, you usually manipulate Segment objects that have |
33
|
|
|
|
|
|
|
various feature values that specify the phonetic qualities of the segment. |
34
|
|
|
|
|
|
|
However, it is difficult to print those feature values, and a list of feature |
35
|
|
|
|
|
|
|
values can be difficult to interpret anyway. This is where Symbols comes in--it |
36
|
|
|
|
|
|
|
provides a way to take a Segment object and get a phonetic symbol representing |
37
|
|
|
|
|
|
|
the properties of that segment. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
In Symbols, you may use L() to define text symbols that correlate to |
40
|
|
|
|
|
|
|
"prototypes", which are special Segment objects that represent the ideal |
41
|
|
|
|
|
|
|
segment for each symbol. After you have defined your symbols and prototypes, |
42
|
|
|
|
|
|
|
you may use L() to find which prototype is the most similar to a segment |
43
|
|
|
|
|
|
|
in question, and get the symbol for that prototype. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
As of v0.2, Symbols also includes diacritics. A diacritic is a special symbol |
46
|
|
|
|
|
|
|
that begins or ends with a '*', and which is used to modify other symbols. If |
47
|
|
|
|
|
|
|
the best symbol match for a segment you are trying to spell is an imperfect |
48
|
|
|
|
|
|
|
match, Symbols will then attempt to use diacritics to indicate exactly how the |
49
|
|
|
|
|
|
|
segment is pronounced. For compatibility reasons, however, this feature is off |
50
|
|
|
|
|
|
|
by default. It can be turned on with L. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
You will probably want to read the L, L, and L |
53
|
|
|
|
|
|
|
sections, because these describe the most widely-used functions and the |
54
|
|
|
|
|
|
|
algorithm used to score potential matches. If you're not getting the results |
55
|
|
|
|
|
|
|
you expect, you probably need to examine the way your prototype definitions are |
56
|
|
|
|
|
|
|
interacting with that algorithm. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=cut |
59
|
|
|
|
|
|
|
|
60
|
1
|
|
|
1
|
|
36929
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
61
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
62
|
1
|
|
|
1
|
|
7
|
use warnings::register; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
191
|
|
63
|
1
|
|
|
1
|
|
7
|
use Carp; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
104
|
|
64
|
1
|
|
|
1
|
|
925
|
use Lingua::Phonology::Common; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
use Lingua::Phonology::Segment; |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
our $VERSION = 0.3; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub err ($) { _err($_[0]) if warnings::enabled() }; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Make subs for our flags |
72
|
|
|
|
|
|
|
# flags in sub_name => 'hash_key' format |
73
|
|
|
|
|
|
|
my %flags = ( |
74
|
|
|
|
|
|
|
auto_reindex => 'AUTOINDEX', |
75
|
|
|
|
|
|
|
diacritics => 'USEDCR' |
76
|
|
|
|
|
|
|
); |
77
|
|
|
|
|
|
|
while (my ($sub, $key) = each %flags) { |
78
|
|
|
|
|
|
|
no strict 'refs'; |
79
|
|
|
|
|
|
|
*$sub = sub { |
80
|
|
|
|
|
|
|
my $self = shift; |
81
|
|
|
|
|
|
|
if (@_) { |
82
|
|
|
|
|
|
|
if ($_[0]) { |
83
|
|
|
|
|
|
|
$self->{$key} = 1; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
else { |
86
|
|
|
|
|
|
|
$self->{$key} = 0; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
return $self->{$key}; |
90
|
|
|
|
|
|
|
}; |
91
|
|
|
|
|
|
|
*{'set_' . $sub} = sub { $_[0]->{$key} = 1 }; |
92
|
|
|
|
|
|
|
*{'no_' . $sub} = sub {$_[0]->{$key} = 0; 1; }; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub new { |
96
|
|
|
|
|
|
|
my $proto = shift; |
97
|
|
|
|
|
|
|
my $class = ref($proto) || $proto; |
98
|
|
|
|
|
|
|
my $self = { |
99
|
|
|
|
|
|
|
FEATURES => undef, # a Features object |
100
|
|
|
|
|
|
|
SYMBOLS => {}, # the hash of symbol => prototype |
101
|
|
|
|
|
|
|
DIACRITS => {}, # hash of diacritic => prototype |
102
|
|
|
|
|
|
|
USEDCR => 0, # whether or not to use diacritics (off by default) |
103
|
|
|
|
|
|
|
AUTOINDEX => 1, # whether or not to autoindex (on by default) |
104
|
|
|
|
|
|
|
REINDEX => 0, # whether reindexing is currently necessary |
105
|
|
|
|
|
|
|
INDEX => {}, # index of symbols by feature |
106
|
|
|
|
|
|
|
VALINDEX => {}, # index of features by symbol |
107
|
|
|
|
|
|
|
DCRINDEX => [] # index of diacritics by number of keys |
108
|
|
|
|
|
|
|
}; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
my $features = shift; |
111
|
|
|
|
|
|
|
unless (_is_features($features)) { |
112
|
|
|
|
|
|
|
carp "No feature set or bad featureset given for new Symbols object"; |
113
|
|
|
|
|
|
|
return undef; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
$self->{FEATURES} = $features; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
bless ($self, $class); |
118
|
|
|
|
|
|
|
return $self; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Add a new symbol (why isn't this called add_symbol? Poor planning . . .) |
122
|
|
|
|
|
|
|
sub add_symbol { |
123
|
|
|
|
|
|
|
my $self = shift; |
124
|
|
|
|
|
|
|
my %hash = @_; |
125
|
|
|
|
|
|
|
my $err = 0; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
SYMBOL: for my $symbol (keys %hash) { |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
$self->_check_symbol($symbol, $hash{$symbol}) or do { |
130
|
|
|
|
|
|
|
$err = 1; |
131
|
|
|
|
|
|
|
next SYMBOL; |
132
|
|
|
|
|
|
|
}; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# Drop pre-existing symbols |
135
|
|
|
|
|
|
|
$self->drop_symbol($symbol); |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# Add the new symbol |
138
|
|
|
|
|
|
|
$self->_add_symbol($symbol, $hash{$symbol}); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
$self->{REINDEX} = 1; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
return $err ? () : 1; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Make symbol() synonymous with add_symbol() |
148
|
|
|
|
|
|
|
*symbol = \&add_symbol; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# Private: check that the symbol prototype is okay |
151
|
|
|
|
|
|
|
sub _check_symbol { |
152
|
|
|
|
|
|
|
my ($self, $symbol, $ref) = @_; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
unless (_is_seg($ref)) { |
155
|
|
|
|
|
|
|
return err ("Prototype for '$symbol' is not a Lingua::Phonology::Segment"); |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
if ($self->features ne $ref->featureset) { |
159
|
|
|
|
|
|
|
return err("Prototype for '$symbol' has wrong feature set"); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# Success--spell the proto w/ this symbolset |
163
|
|
|
|
|
|
|
$ref->symbolset($self); |
164
|
|
|
|
|
|
|
return 1; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# Private: add the symbol to yourself |
168
|
|
|
|
|
|
|
sub _add_symbol { |
169
|
|
|
|
|
|
|
my ($self, $symbol, $ref) = @_; |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Diacritics |
172
|
|
|
|
|
|
|
if ($symbol =~ /(^\*\S+)|(\S+\*$)/) { |
173
|
|
|
|
|
|
|
$self->{DIACRITS}->{$symbol} = $ref; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# Regular symbols |
177
|
|
|
|
|
|
|
else { |
178
|
|
|
|
|
|
|
$self->{SYMBOLS}->{$symbol} = $ref; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub drop_symbol { |
183
|
|
|
|
|
|
|
my $self = shift; |
184
|
|
|
|
|
|
|
for (@_) { |
185
|
|
|
|
|
|
|
delete ($self->{SYMBOLS}->{$_}) or delete ($self->{DIACRITS}->{$_}); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
$self->{REINDEX} = 1; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub change_symbol { |
191
|
|
|
|
|
|
|
my $self = shift; |
192
|
|
|
|
|
|
|
my %hash = @_; |
193
|
|
|
|
|
|
|
my $err = 0; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
SYMBOL: for my $symbol (keys(%hash)) { |
196
|
|
|
|
|
|
|
if (not exists $self->{SYMBOLS}->{$symbol}) { |
197
|
|
|
|
|
|
|
err "No symbol $symbol defined"; |
198
|
|
|
|
|
|
|
$err = 1; |
199
|
|
|
|
|
|
|
next SYMBOL; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
$self->_check_symbol($symbol, $hash{$symbol}) or do { |
203
|
|
|
|
|
|
|
$err =1; |
204
|
|
|
|
|
|
|
next SYMBOL; |
205
|
|
|
|
|
|
|
}; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
$self->_add_symbol($symbol, $hash{$symbol}); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
$self->{REINDEX} = 1; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
return $err ? () : 1; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub reindex { |
217
|
|
|
|
|
|
|
my $self = shift; |
218
|
|
|
|
|
|
|
$self->{REINDEX} = 0; |
219
|
|
|
|
|
|
|
$self->{INDEX} = {}; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# Index symbols by feature => value |
222
|
|
|
|
|
|
|
for my $symbol (keys %{$self->{SYMBOLS}}) { |
223
|
|
|
|
|
|
|
my %feat = $self->{SYMBOLS}->{$symbol}->all_values; |
224
|
|
|
|
|
|
|
$self->{VALINDEX}->{$symbol} = \%feat; |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
for (keys %feat) { |
227
|
|
|
|
|
|
|
no warnings 'uninitialized'; # Avoid the warning when $feat{$_} is undef |
228
|
|
|
|
|
|
|
push @{$self->{INDEX}->{$_}->{$feat{$_}}}, $symbol; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Sort diacritics by number of keys. |
233
|
|
|
|
|
|
|
$self->{DCRINDEX} = [ |
234
|
|
|
|
|
|
|
sort |
235
|
|
|
|
|
|
|
{ |
236
|
|
|
|
|
|
|
my %a = $self->{DIACRITS}->{$a}->all_values; |
237
|
|
|
|
|
|
|
my %b = $self->{DIACRITS}->{$b}->all_values; |
238
|
|
|
|
|
|
|
return keys(%b) <=> keys(%a); |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
keys %{$self->{DIACRITS}} |
241
|
|
|
|
|
|
|
]; |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# Also add diacritics to VALINDEX |
244
|
|
|
|
|
|
|
for (keys %{$self->{DIACRITS}}) { |
245
|
|
|
|
|
|
|
my %feats = $self->{DIACRITS}->{$_}->all_values; |
246
|
|
|
|
|
|
|
$self->{VALINDEX}->{$_} = \%feats; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
return 1; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
sub loadfile { |
253
|
|
|
|
|
|
|
my ($self, $file) = @_; |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
my $parse; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Loading default symbols |
258
|
|
|
|
|
|
|
if (not defined $file) { |
259
|
|
|
|
|
|
|
my $start = tell DATA; |
260
|
|
|
|
|
|
|
my $string = join '', ; |
261
|
|
|
|
|
|
|
eval { $parse = _parse_from_string($string, 'symbols') }; |
262
|
|
|
|
|
|
|
return err $@ if $@; |
263
|
|
|
|
|
|
|
seek DATA, $start, 0; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# Loading an actual file |
267
|
|
|
|
|
|
|
else { |
268
|
|
|
|
|
|
|
eval { $parse = _parse_from_file($file, 'symbols') }; |
269
|
|
|
|
|
|
|
if (!$parse) { |
270
|
|
|
|
|
|
|
return $self->old_loadfile($file); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
$self->_load_from_struct($parse); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub old_loadfile { |
278
|
|
|
|
|
|
|
my ($self, $file) = @_; |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
eval { $file = _to_handle($file, '<') }; |
281
|
|
|
|
|
|
|
return err $@ if $@; |
282
|
|
|
|
|
|
|
err "Deprecated method"; |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
while (<$file>) { |
285
|
|
|
|
|
|
|
s/#.*$//; # Remove comments |
286
|
|
|
|
|
|
|
if (/^\s*(\S*)\t+(.*)/) { # General line format |
287
|
|
|
|
|
|
|
my $symbol = $1; |
288
|
|
|
|
|
|
|
my @desc = split(/\s+/, $2); |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
my $proto = Lingua::Phonology::Segment->new( $self->features ); |
291
|
|
|
|
|
|
|
for (@desc) { |
292
|
|
|
|
|
|
|
if (/(\S+)=(\S+)/) { # Feature defs like coronal=1 |
293
|
|
|
|
|
|
|
$proto->value($1, $2); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
elsif (/([*+-])?(\S+)/) { # Feature defs like +feature or feature |
296
|
|
|
|
|
|
|
my $val = $1 ? $1 : 1; |
297
|
|
|
|
|
|
|
$proto->value($2, $val); |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
$self->symbol($symbol => $proto); |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
close $file; |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
$self->{REINDEX} = 1; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub _load_from_struct { |
310
|
|
|
|
|
|
|
my ($self, $parse) = @_; |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
while ( my ($sym, $val) = each %$parse ) { |
313
|
|
|
|
|
|
|
my $proto = new Lingua::Phonology::Segment($self->{FEATURES}, |
314
|
|
|
|
|
|
|
{ map { $_ => $val->{feature}->{$_}->{value} } keys %{$val->{feature}} } ); |
315
|
|
|
|
|
|
|
$self->symbol($sym => $proto); |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
$self->{REINDEX} = 1; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub _to_str { |
321
|
|
|
|
|
|
|
my $self = shift; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
my $href = {}; |
324
|
|
|
|
|
|
|
for ($self->{SYMBOLS}, $self->{DIACRITS}) { |
325
|
|
|
|
|
|
|
for my $sym (keys %$_) { |
326
|
|
|
|
|
|
|
my %h = $_->{$sym}->all_values; |
327
|
|
|
|
|
|
|
for (keys %h) { |
328
|
|
|
|
|
|
|
$h{$_} = '*' if not defined $h{$_}; |
329
|
|
|
|
|
|
|
$href->{$sym}->{feature}->{$_} = { value => $h{$_} }; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
return eval { _string_from_struct({ symbols => { symbol => $href } }) }; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub spell { |
338
|
|
|
|
|
|
|
my $self = shift; |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
my @return = (); |
341
|
|
|
|
|
|
|
for my $comp (@_) { |
342
|
|
|
|
|
|
|
return err("Bad argument to spell()") unless _is_seg($comp); |
343
|
|
|
|
|
|
|
my $winner = $self->score($comp); |
344
|
|
|
|
|
|
|
push (@return, $winner ? $winner : '_?_'); |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
local $" = ''; |
348
|
|
|
|
|
|
|
return wantarray ? @return : "@return"; |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub score { |
352
|
|
|
|
|
|
|
my $self = shift; |
353
|
|
|
|
|
|
|
my $comp = shift; |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
# Reindex if necessary |
356
|
|
|
|
|
|
|
$self->reindex if $self->{REINDEX} and $self->{AUTOINDEX}; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# Prepare data containers |
359
|
|
|
|
|
|
|
my %comp = $comp->all_values; |
360
|
|
|
|
|
|
|
my %scores = (); |
361
|
|
|
|
|
|
|
my @scores = (); |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
for my $feature (keys %{$self->{INDEX}}) { |
365
|
|
|
|
|
|
|
while (my ($val, $list) = each %{$self->{INDEX}->{$feature}}) { |
366
|
|
|
|
|
|
|
# Avoid all sorts of harmless warnings |
367
|
|
|
|
|
|
|
no warnings 'uninitialized'; |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Special case: when $val is '' (which is equiv w/ undef), check |
370
|
|
|
|
|
|
|
# that $comp->$feature actually returns undef, in case $feature is |
371
|
|
|
|
|
|
|
# a node w/ defined children |
372
|
|
|
|
|
|
|
$comp{$feature} = $comp->$feature if $val eq ''; |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
if ($val eq $comp{$feature}) { |
375
|
|
|
|
|
|
|
$scores{$_}++ for @$list; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
else { |
378
|
|
|
|
|
|
|
$scores{$_}-- for @$list; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Build @scores |
384
|
|
|
|
|
|
|
while (my ($sym, $score) = each %scores) { |
385
|
|
|
|
|
|
|
$scores[$score] = $sym if $score > 0; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# Get a diacritic spelling if wanted |
389
|
|
|
|
|
|
|
my $sub = @scores ? $#scores : 0; |
390
|
|
|
|
|
|
|
if ($self->{USEDCR}) { |
391
|
|
|
|
|
|
|
$scores[$sub] = score_diacrit($self, $scores[$sub], %comp); |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
return wantarray ? %scores : $scores[$sub]; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
sub score_diacrit { |
398
|
|
|
|
|
|
|
my ($self, $symbol, %comp) = @_; |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# Don't try to diacriticize completely unmatched segments |
401
|
|
|
|
|
|
|
return '' if not $symbol; |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# Avoid warnings |
404
|
|
|
|
|
|
|
no warnings 'uninitialized'; |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# Build hash of discrepancy |
407
|
|
|
|
|
|
|
my %disc = (); |
408
|
|
|
|
|
|
|
for (keys %comp) { |
409
|
|
|
|
|
|
|
$disc{$_} = $comp{$_} if $comp{$_} ne $self->{VALINDEX}->{$symbol}->{$_}; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
for (keys %{$self->{VALINDEX}->{$symbol}}) { |
412
|
|
|
|
|
|
|
$disc{$_} = $comp{$_} if $comp{$_} ne $self->{VALINDEX}->{$symbol}->{$_}; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
DIACRIT: for (@{$self->{DCRINDEX}}) { |
416
|
|
|
|
|
|
|
# Quit if there's no more discrepancy |
417
|
|
|
|
|
|
|
last if not keys %disc; |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
my $dcr = $_; # No aliasing! otherwise s/// messes us up |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
# Diacrits musn't disagree w/ comp segs at all |
422
|
|
|
|
|
|
|
my %proto = %{$self->{VALINDEX}->{$dcr}}; |
423
|
|
|
|
|
|
|
for (keys %proto) { |
424
|
|
|
|
|
|
|
# Defined features compare normally |
425
|
|
|
|
|
|
|
if (defined $proto{$_}) { |
426
|
|
|
|
|
|
|
next DIACRIT if ($proto{$_} ne $disc{$_}); |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# Undefined features must be specifically mentioned in the |
430
|
|
|
|
|
|
|
# discrepancy hash (i.e. can't be simply missing keys |
431
|
|
|
|
|
|
|
else { |
432
|
|
|
|
|
|
|
next DIACRIT unless (exists $disc{$_}) and (not defined $disc{$_}); |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# If you get here, you agree on all features, so you should be added |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# Don't allow anybody else to match your features |
439
|
|
|
|
|
|
|
delete $disc{$_} for keys %proto; |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
# Add yourself to the beginning or ending, chopping the leading/trailing '*' |
442
|
|
|
|
|
|
|
if ($dcr =~ s/^\*//) { |
443
|
|
|
|
|
|
|
$symbol .= $dcr; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
else { |
446
|
|
|
|
|
|
|
$dcr =~ s/\*$//; |
447
|
|
|
|
|
|
|
$symbol = $dcr . $symbol; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
return $symbol; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub prototype { |
455
|
|
|
|
|
|
|
my $self = shift; |
456
|
|
|
|
|
|
|
my $symbol = shift; |
457
|
|
|
|
|
|
|
my $proto; |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
if ($symbol =~ /(^\*)|(\*$)/) { |
460
|
|
|
|
|
|
|
$proto = $self->{DIACRITS}->{$symbol}; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
else { |
463
|
|
|
|
|
|
|
$proto = $self->{SYMBOLS}->{$symbol}; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
return err("No such symbol '$symbol'") if (not $proto); |
467
|
|
|
|
|
|
|
$self->{REINDEX} = 1; |
468
|
|
|
|
|
|
|
return $proto; |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
sub segment { |
472
|
|
|
|
|
|
|
my $self = shift; |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# If you're not given a symbol, return a blank segment |
475
|
|
|
|
|
|
|
unless (@_) { |
476
|
|
|
|
|
|
|
my $ret = Lingua::Phonology::Segment->new( $self->features ); |
477
|
|
|
|
|
|
|
$ret->symbolset($self); |
478
|
|
|
|
|
|
|
return $ret; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
# Otherwise |
482
|
|
|
|
|
|
|
my @return; |
483
|
|
|
|
|
|
|
while (@_) { |
484
|
|
|
|
|
|
|
my $proto = $self->prototype( shift ); |
485
|
|
|
|
|
|
|
return unless $proto; |
486
|
|
|
|
|
|
|
push @return, $proto->duplicate; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
return wantarray ? @return : $return[0]; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
sub features { |
492
|
|
|
|
|
|
|
my $self = shift; |
493
|
|
|
|
|
|
|
if (@_) { |
494
|
|
|
|
|
|
|
my $arg = shift; |
495
|
|
|
|
|
|
|
return carp "Bad argument to features()" unless _is_features($arg); |
496
|
|
|
|
|
|
|
$self->{FEATURES} = $arg; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
return $self->{FEATURES}; |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
1; |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=head1 METHODS |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head2 new |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
$symbol = Lingua::Phonology::Symbols->new($features); |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
Creates a new Symbols object. This method takes one argument, a Features |
510
|
|
|
|
|
|
|
object that provides the feature set for the prototypes in this object. |
511
|
|
|
|
|
|
|
This will carp if you don't provide an appropriate object. |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
This method is called automatically when you make a C
|
514
|
|
|
|
|
|
|
Lingua::Phonology>. |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=head2 add_symbol |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
$symbol->add_symbol( 'b' => $b ); |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Adds one or more symbols to the current object. The argument to symbol must be |
521
|
|
|
|
|
|
|
a hash. The keys of this hash are the text symbols that will be returned, and |
522
|
|
|
|
|
|
|
the values should be Lingua::Phonology::Segment objects that act as the |
523
|
|
|
|
|
|
|
prototypes for each symbol. See L<"spell"> for explanation of how these symbols |
524
|
|
|
|
|
|
|
and protoypes are used. |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
Symbols can generally be any text string. However, strings beginning or ending |
527
|
|
|
|
|
|
|
with '*' are interpreted specially, as diacritics. The position of the asterisk |
528
|
|
|
|
|
|
|
indicates where the base symbol goes, and the rest is interpreted as the |
529
|
|
|
|
|
|
|
diacritic. Diacritic prototypes are also treated differently from regular |
530
|
|
|
|
|
|
|
prototypes--see the L section for details. For example, you could use a |
531
|
|
|
|
|
|
|
tilde '~' following a symbol to indicate nasality with the following call to |
532
|
|
|
|
|
|
|
symbol: |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# Assume $nasal is an appropriate prototye |
535
|
|
|
|
|
|
|
$symbols->add_symbol('*~' => $nasal); |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Note that '*' by itself is still a valid, non-diacritic symbol. However, '**' |
538
|
|
|
|
|
|
|
will be interpreted as a diacritic consisting of a symbol followed by a single |
539
|
|
|
|
|
|
|
asterisk. |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
If you attempt to pass in a Lingua::Phonology::Segment object associated with a |
542
|
|
|
|
|
|
|
feature set other than the one defined for the current object, C |
543
|
|
|
|
|
|
|
will skip to the next symbol and emit a warning. |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
This method returns true if all of the attempted symbol additions succeeded, |
546
|
|
|
|
|
|
|
and false otherwise. |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=head2 symbol (deprecated) |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Synonymous with C. This method is deprecated, and only exists |
551
|
|
|
|
|
|
|
because of a poor naming choice in earlier versions of the module. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=head2 drop_symbol |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
$symbols->drop_symbol('x'); |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
Deletes a symbol from the current object. Nothing happens if you try to |
558
|
|
|
|
|
|
|
delete a symbol which doesn't currently exist. |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=head2 change_symbol |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
$symbols->change_symbol( 'b' => $b ); |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
Acts exactly the same as C, but first checks to make sure that |
565
|
|
|
|
|
|
|
there already exists a symbol with the key given. Otherwise, it brings |
566
|
|
|
|
|
|
|
up an error. |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
The method C can also be used to redefine existing symbols, but |
569
|
|
|
|
|
|
|
it first drops any existing symbol. In the present implementation this makes no |
570
|
|
|
|
|
|
|
difference, so this method really only exists to aid readability and allow for |
571
|
|
|
|
|
|
|
future expansion. |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
As with C, this method returns true if all of the attempted |
574
|
|
|
|
|
|
|
changes succeeded, otherwise false. |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=head2 features |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
$features = $symbols->features(); |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
Returns the Features object associated with the current object, or sets the |
581
|
|
|
|
|
|
|
object if provided with a Lingua::Phonology::Features object as an argument. |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=head2 prototype |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
$proto = $symbols->prototype('b'); |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
Takes one argument, a text string indicating a symbol in the current set. |
588
|
|
|
|
|
|
|
Returns the prototype associated with that symbol, or carps if no |
589
|
|
|
|
|
|
|
such symbol is defined. You can then make changes to the prototype object, |
590
|
|
|
|
|
|
|
which will be reflected in subsequent calls to spell(). |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=head2 segment |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
# Get one segment |
595
|
|
|
|
|
|
|
$b = $symbols->segment('b'); |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
# Get several segments |
598
|
|
|
|
|
|
|
@word = $symbols->segment('b', 'a', 'n'); |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
Takes one or more argument, a symbol, and return a new Segment object with the |
601
|
|
|
|
|
|
|
feature values of the prototype for that symbol. Unlike L, which |
602
|
|
|
|
|
|
|
return the prototype itself, this method returns a completely new object which |
603
|
|
|
|
|
|
|
can be modified without affecting the values of the prototype. If you supply a |
604
|
|
|
|
|
|
|
list of symbols, you'll get back a list of segments in the same order. This is |
605
|
|
|
|
|
|
|
generally the easiest way to make new segments with some features already set. |
606
|
|
|
|
|
|
|
Example: |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
The segments returned from this method will be associated with the |
609
|
|
|
|
|
|
|
Lingua::Phonology::Features object defined by C and the current |
610
|
|
|
|
|
|
|
Lingua::Phonology::Symbols object. |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=head2 reindex |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
$symbols->reindex(); |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
This function recompiles the internal index that Lingua::Phonology::Symbols |
617
|
|
|
|
|
|
|
uses to speed up Cing. It should generally be unnecessary to call this |
618
|
|
|
|
|
|
|
function, as Lingua::Phonology::Symbols does its best to figure out when |
619
|
|
|
|
|
|
|
reindexing is necessary without any user input. You may call this function by |
620
|
|
|
|
|
|
|
hand to ensure reindexing at a particular time, or if auto reindexing is off. |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=head2 auto_reindex |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
# Get the current state of auto-reindexing |
625
|
|
|
|
|
|
|
$auto_reindex = $symbols->auto_reindex(); |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
# Set the auto-reindexing flag |
628
|
|
|
|
|
|
|
$symbols->auto_reindex(0); |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
Returns true if automatic reindexing is currently turned on, false otherwise. |
631
|
|
|
|
|
|
|
If called with an argument, sets auto reindexing to the truth or falsehood of |
632
|
|
|
|
|
|
|
that argument. Auto reindexing is on by default. |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=head2 set_auto_reindex |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
$symbols->set_auto_reindex(); |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
Turns automatic reindexing (back) on. Same as C. Auto |
639
|
|
|
|
|
|
|
reindexing is on by default, so this is only necessary after a call to |
640
|
|
|
|
|
|
|
C. See L<"INDEXING">. |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
=head2 no_auto_reindex |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
$symbols->no_auto_reindex(); |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
Turns automatic reindexing off. Same as C<< auto_reindex(0) >>. See |
647
|
|
|
|
|
|
|
L<"INDEXING">. |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=head2 diacritics |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# Get the current diacritic flag |
652
|
|
|
|
|
|
|
$symbols->diacritics(); |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
# Set the diacritics flag |
655
|
|
|
|
|
|
|
$symbols->diacritics(1); |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
Returns true if diacritics are currently on, otherwise false. You may also pass |
658
|
|
|
|
|
|
|
this method an argument to turn diacritics on or off, e.g. C<< |
659
|
|
|
|
|
|
|
$symbols->diacritics(1) >>. Diacritics are off by default. |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=head2 set_diacritics |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
$symbols->set_diacritics(); |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
Turns diacritics on. Same as C<< diacritics(1) >>. |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
=head2 no_diacritics |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
$symbols->no_diacritics(); |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
Turns diacritics off. Same as C<< diacritics(0) >>. |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=head2 spell |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
print $symbols->spell($seg); |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
Takes any number of Lingua::Phonology::Segment objects as arguments. For each |
678
|
|
|
|
|
|
|
object, returns a text string indicating the best match of prototype with the |
679
|
|
|
|
|
|
|
Segment given. In a scalar context, returns a string consisting of a |
680
|
|
|
|
|
|
|
concatencation of all of the symbols. |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
The Symbol object given will be compared against every prototype currently |
683
|
|
|
|
|
|
|
defined, and scored according to the following algorithm: |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=over 4 |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
=item * |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
Score one point for every feature whose value is the same for both the |
690
|
|
|
|
|
|
|
prototype and the comparison segments, whether that value is defined or not. |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=item * |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
Lose one point for every feature that is defined for the prototype segment and |
695
|
|
|
|
|
|
|
which the comparison segment disagrees with. |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=item * |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
Score zero points for each feature defined on the comparison segment but not |
700
|
|
|
|
|
|
|
defined for the prototype. |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=back |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
Comparison segments may always be more defined than the prototypes, so |
705
|
|
|
|
|
|
|
there is no consequence if the comparison segment is defined for features |
706
|
|
|
|
|
|
|
that the prototype isn't defined for. |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
Note that this algorithm is slightly different from the one used in previous |
709
|
|
|
|
|
|
|
versions. In my informal tests, about 95% of the segments come out the same, |
710
|
|
|
|
|
|
|
but there is some discrepancy. My subjective impression is that the results |
711
|
|
|
|
|
|
|
given by the new algorithm are better (more inuitive) than those from the |
712
|
|
|
|
|
|
|
previous algorithm. |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
The 'winning' prototype is the one that scores the highest by the preceding |
715
|
|
|
|
|
|
|
algorithm. If more than one prototype scores the same, it's unpredictable which |
716
|
|
|
|
|
|
|
symbol will be returned, since it will depend on the order in which the |
717
|
|
|
|
|
|
|
prototypes came out of the internal hash. |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
If C is on, diacritic formation happens after the best-matching |
720
|
|
|
|
|
|
|
symbol is chosen. A list of the features for which the comparison segment and |
721
|
|
|
|
|
|
|
symbol prototypes do not agree is compiled, and diacritics are selected that |
722
|
|
|
|
|
|
|
match against those features. If there are diacritics that specify more than |
723
|
|
|
|
|
|
|
one feature, or multiple diacritics specifying the same feature, then this |
724
|
|
|
|
|
|
|
method will attempt to minimize the number of diacritics used. The diacritic |
725
|
|
|
|
|
|
|
symbols will be concatenated with the base symbol, the base symbol taking the |
726
|
|
|
|
|
|
|
place of the asterisk in the symbol definition. For example, if a segment |
727
|
|
|
|
|
|
|
matched the base symbol 'a' and the diacritic '*~', the resulting symbol would |
728
|
|
|
|
|
|
|
be 'a~'. If multiple diacritics are matched, there is no way to predict the |
729
|
|
|
|
|
|
|
order in which they will be added, except that diacritics specifying multiple |
730
|
|
|
|
|
|
|
features will appear closer to the base. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
If no prototype scores at least 1 point by this algorithm, the string '_?_' |
733
|
|
|
|
|
|
|
will be returned. This indicates that no suitable matches were found. No |
734
|
|
|
|
|
|
|
diacritic matching is done in this case. |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
Beware of testing a Segment object that is associated with a different feature |
737
|
|
|
|
|
|
|
set than the ones used by the prototypes. This will almost certainly cause |
738
|
|
|
|
|
|
|
errors and bizarre results. |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
=head2 score |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
%score = $symbols->score($seg); |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
Takes a Segment argument and compares it against the defined symbols, just like |
745
|
|
|
|
|
|
|
symbol(). It normally returns a hash with the available symbols as the keys and |
746
|
|
|
|
|
|
|
the score for each symbol as the value. In a scalar context, returns the |
747
|
|
|
|
|
|
|
winning symbol just like spell(). Useful for debugging and determining why the |
748
|
|
|
|
|
|
|
program thinks that [a] is better described as [d] (as happened to the author |
749
|
|
|
|
|
|
|
during testing). Unfortunately, score() can only be used to test one segment at |
750
|
|
|
|
|
|
|
a time, rather than a list of segments. |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
=head2 loadfile |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# Load symbol definitions from a file |
755
|
|
|
|
|
|
|
$symbols->loadfile('phono.xml'); |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
# Load default symbols |
758
|
|
|
|
|
|
|
$symbols->loadfile(); |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Takes one argument, a file name, and loads prototype segment definitions |
761
|
|
|
|
|
|
|
from that file. If no file name is given, loads the default symbol set. |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
Files should be in the XML format described in |
764
|
|
|
|
|
|
|
L. If the filename given does not parse |
765
|
|
|
|
|
|
|
correctly, this method will fall back on C, just in case this |
766
|
|
|
|
|
|
|
is an old script using the deprecated custom file format. In this case, you |
767
|
|
|
|
|
|
|
will get a warning. To avoid the warning, change the method call, or better yet |
768
|
|
|
|
|
|
|
change your file over to the XML format. |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=head2 old_loadfile (deprecated) |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
# Load a file |
773
|
|
|
|
|
|
|
$symbols->old_loadfile('symbols.txt'); |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
This method is deprecated. Use C instead. |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
Takes one argument, a file name. Reads that file according to the format |
778
|
|
|
|
|
|
|
described below and adds the symbols defined there to the current symbols |
779
|
|
|
|
|
|
|
object. This method does NOT load default features when called without any |
780
|
|
|
|
|
|
|
arguments. |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
Lines in the file should match the regular expression /^\s*(\S+)\t+(.*)/. |
783
|
|
|
|
|
|
|
The first parenthesized sub-expression will be taken as the symbol, and the |
784
|
|
|
|
|
|
|
second sub-expression as the feature definitions for the prototype. Feature |
785
|
|
|
|
|
|
|
definitions are separated by spaces, and should be in one of three formats: |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
=over 4 |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
=item * |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
B: The preferred way to set a privative value is simply to write the |
792
|
|
|
|
|
|
|
name of the feature unadorned. Since privatives are either true or undef, this |
793
|
|
|
|
|
|
|
is sufficient to declare the existence of a privative. E.g., since both |
794
|
|
|
|
|
|
|
[labial] and [voice] are privatives in the default feature set, the following |
795
|
|
|
|
|
|
|
line suffices to define the symbol 'b' (though you may want more specificity): |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
b labial voice |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=item * |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
B<[+-*]feature>: The characters before the feature correspond to setting the |
802
|
|
|
|
|
|
|
value to true, false, and undef, respectively. This is the preferred way to set |
803
|
|
|
|
|
|
|
binary features, and the only way to assert that a feature of any type must be |
804
|
|
|
|
|
|
|
undef. For example, the symbol 'd`' for a voiced retroflex stop can be defined |
805
|
|
|
|
|
|
|
with the following line: |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
d` -anterior -distributed voice |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
=item * |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
B: Whatever precedes the equals sign is the feature name; |
812
|
|
|
|
|
|
|
whatever follows is the value. This is the preferred way to set scalar values, |
813
|
|
|
|
|
|
|
and the only way to set scalar values to anything other than undef, 0, or 1. |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
=back |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
Feature definitions may work if you use them other than as recommended, |
818
|
|
|
|
|
|
|
but the recommended forms are provided for maximum readability. To be |
819
|
|
|
|
|
|
|
exact, however, the following are synonymous: |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
# Synonymous one way |
822
|
|
|
|
|
|
|
labial |
823
|
|
|
|
|
|
|
+labial |
824
|
|
|
|
|
|
|
labial=1 |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
# Synonymous in a different way |
827
|
|
|
|
|
|
|
-labial # only if 'labial' is binary |
828
|
|
|
|
|
|
|
labial=0 |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
Since this behavior is partly dependent on the implementation of text and |
831
|
|
|
|
|
|
|
number forms in the Features module, the synonymity of these forms is not |
832
|
|
|
|
|
|
|
guaranteed to remain constant in the future. However, every effort will be |
833
|
|
|
|
|
|
|
made the guarantee that the I forms won't change their |
834
|
|
|
|
|
|
|
behavior. |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
You may begin comments with '#'--anything between the first '#' on a line and |
837
|
|
|
|
|
|
|
the end of that line will be ignored. Consequently, '#' cannot be used as a |
838
|
|
|
|
|
|
|
symbol in a loaded file (though it is a valid symbol elsewhere, and can be |
839
|
|
|
|
|
|
|
assigned via C). |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
As with C, symbol definitions beginning or ending with '*' will be |
842
|
|
|
|
|
|
|
interpreted as diacritics. Diacritic symbols may be defined in exactly the same |
843
|
|
|
|
|
|
|
way as regular symbols. Thus, to define a tilde as a diacritic for nasality, |
844
|
|
|
|
|
|
|
you might use the following simple line: |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
*~ nasal |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
You should only define terminal (non-node) features in your segment |
849
|
|
|
|
|
|
|
definitions. The loadfile method is unable to deal with features that |
850
|
|
|
|
|
|
|
are nodes, and will generate errors if you try to assign to a node. |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
If you don't give a file name, then the default symbol set is loaded. This |
853
|
|
|
|
|
|
|
is described in L<"THE DEFAULT SYMBOL SET">. |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
=head1 INDEXING |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
This section endeavors to explain the purpose of indexing in |
858
|
|
|
|
|
|
|
Lingua::Phonology::Symbols, and how you can control it. |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
As of v0.2, this module uses an efficient hash comparison algorithm that |
861
|
|
|
|
|
|
|
greatly speeds up calls to C and C. This algorithm works by |
862
|
|
|
|
|
|
|
compiling an index of the features and values that prototype segments have, |
863
|
|
|
|
|
|
|
then only comparing against those prototypes that have some chance of winning. |
864
|
|
|
|
|
|
|
Indexing itself is a somewhat costly procedure, but fortunately, it only needs |
865
|
|
|
|
|
|
|
to be done once. Unfortunately, it needs to be done again any time that the |
866
|
|
|
|
|
|
|
list of symbols or the prototypes for those symbols is changed. |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
Fortunately again, Lingua::Phonology::Symbols will take care of this for you. |
869
|
|
|
|
|
|
|
Whenever a method is called that might require reindexing, an internal flag on |
870
|
|
|
|
|
|
|
the object is set. The next time that you ask this module to C |
871
|
|
|
|
|
|
|
something, it will first reindex, then proceed to spelling. The methods that |
872
|
|
|
|
|
|
|
will trigger reindexing are C
|
873
|
|
|
|
|
|
|
prototype>. This reindexing is done "just in time", and isn't done more than is |
874
|
|
|
|
|
|
|
necessary. |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
Unfortunately, not all calls to those methods actually warrant reindexing, so |
877
|
|
|
|
|
|
|
if you call those methods a lot, you might want to have manual control over |
878
|
|
|
|
|
|
|
when the hash is reindexed. To do this, you can use the method |
879
|
|
|
|
|
|
|
C, which will disable automatic reindexing. You then will have |
880
|
|
|
|
|
|
|
to call C yourself whenever it's warranted. If you get tired of this |
881
|
|
|
|
|
|
|
and want reindexing back, you can call C. |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
The author of this module has never felt the need to work with auto reindexing |
884
|
|
|
|
|
|
|
off, for what it's worth. |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=head1 THE DEFAULT SYMBOL SET |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
Currently, Lingua::Phonology::Symbols comes with a set of symbols that can |
889
|
|
|
|
|
|
|
be loaded by calling loadfile with no arguments, like so: |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
$symbols->loadfile; |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
The symbol set thus loaded is based on the X-SAMPA system for encoding the IPA |
894
|
|
|
|
|
|
|
into ASCII. You can read more about X-SAMPA at |
895
|
|
|
|
|
|
|
L. The default does not |
896
|
|
|
|
|
|
|
contain all of the symbols in X-SAMPA, but it does contain a lot of them, plus |
897
|
|
|
|
|
|
|
a few extra symbols for IPA characters not covered in X-SAMPA. These symbols are: |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
# Consonants |
900
|
|
|
|
|
|
|
# Labials |
901
|
|
|
|
|
|
|
p voiceless labial stop |
902
|
|
|
|
|
|
|
b voiced labial stop |
903
|
|
|
|
|
|
|
f voiceless labiodental fricative |
904
|
|
|
|
|
|
|
v voiced labiodental fricative |
905
|
|
|
|
|
|
|
m labial nasal |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
# Dentals |
908
|
|
|
|
|
|
|
t voiceless dental stop |
909
|
|
|
|
|
|
|
d voiced dental stop |
910
|
|
|
|
|
|
|
T voiceless dental fricative |
911
|
|
|
|
|
|
|
D voiced dental fricative |
912
|
|
|
|
|
|
|
s voiceless alveolar fricative |
913
|
|
|
|
|
|
|
z voiced alveolar fricative |
914
|
|
|
|
|
|
|
n alveolar nasal |
915
|
|
|
|
|
|
|
l alveolar lateral |
916
|
|
|
|
|
|
|
r alveolar rhotic |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
# Postalveolars |
919
|
|
|
|
|
|
|
tS voiceless postalveolar stop |
920
|
|
|
|
|
|
|
dZ voiced postalveolar stop |
921
|
|
|
|
|
|
|
S voiceless postalveolar fricative |
922
|
|
|
|
|
|
|
Z voiced postalveolar fricative |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
# Retroflex |
925
|
|
|
|
|
|
|
t` voiceless retroflex stop |
926
|
|
|
|
|
|
|
d` voiced retroflex stop |
927
|
|
|
|
|
|
|
s` voiceless retroflex fricative |
928
|
|
|
|
|
|
|
z` voiced retroflex fricative |
929
|
|
|
|
|
|
|
n` retroflex nasal |
930
|
|
|
|
|
|
|
l` retroflex lateral |
931
|
|
|
|
|
|
|
r` retroflex rhotic |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
# Palatal |
934
|
|
|
|
|
|
|
c voiceless palatal stop |
935
|
|
|
|
|
|
|
d\ voiced palatal stop |
936
|
|
|
|
|
|
|
C voiceless palatal fricative |
937
|
|
|
|
|
|
|
j\ voiced palatal fricative |
938
|
|
|
|
|
|
|
J palatal nasal |
939
|
|
|
|
|
|
|
L palatal lateral |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
# Velar |
942
|
|
|
|
|
|
|
k voiceless velar stop |
943
|
|
|
|
|
|
|
g voiced velar stop |
944
|
|
|
|
|
|
|
x voiceless velar fricative |
945
|
|
|
|
|
|
|
G voiced velar fricative |
946
|
|
|
|
|
|
|
N velar nasal |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
# Uvular |
949
|
|
|
|
|
|
|
q voiceless uvular stop |
950
|
|
|
|
|
|
|
G\ voiced uvular stop |
951
|
|
|
|
|
|
|
X voiceless uvular fricative |
952
|
|
|
|
|
|
|
R voiced uvular fricative |
953
|
|
|
|
|
|
|
N\ uvular nasal |
954
|
|
|
|
|
|
|
R\ uvular rhotic |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
# Pharyngeal |
957
|
|
|
|
|
|
|
q\ voiceless pharyngeal stop |
958
|
|
|
|
|
|
|
X\ voiceless pharyngeal fricative |
959
|
|
|
|
|
|
|
?\ voiced pharyngeal fricative |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
# Glottal |
962
|
|
|
|
|
|
|
? voiceless glottal stop |
963
|
|
|
|
|
|
|
h voicelesss glottal fricative |
964
|
|
|
|
|
|
|
h\ voiced glottal fricative |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
# Vowels |
967
|
|
|
|
|
|
|
# High Front Vowels |
968
|
|
|
|
|
|
|
i high front tense |
969
|
|
|
|
|
|
|
I high front |
970
|
|
|
|
|
|
|
y high front rounded tense |
971
|
|
|
|
|
|
|
Y high front rounded |
972
|
|
|
|
|
|
|
j high front semivowels |
973
|
|
|
|
|
|
|
H high front rounded semivowel |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
# High Back Vowels |
976
|
|
|
|
|
|
|
u high back rounded tense |
977
|
|
|
|
|
|
|
U high back rounded |
978
|
|
|
|
|
|
|
M high back unrounded |
979
|
|
|
|
|
|
|
w high back rounded semivowel |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
# High Central Vowels |
982
|
|
|
|
|
|
|
1 high central |
983
|
|
|
|
|
|
|
} high central rounded |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
# Mid Front Vowels |
986
|
|
|
|
|
|
|
e mid front tense |
987
|
|
|
|
|
|
|
E mid front |
988
|
|
|
|
|
|
|
2 mid front rounded tense |
989
|
|
|
|
|
|
|
9 mid front rounded |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
# Mid Back Vowels |
992
|
|
|
|
|
|
|
o mid back rounded tense |
993
|
|
|
|
|
|
|
O mid back rounded |
994
|
|
|
|
|
|
|
W mid back unrounded tense |
995
|
|
|
|
|
|
|
V mid back unrounded |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
# Mid Central Vowels |
998
|
|
|
|
|
|
|
@ mid central |
999
|
|
|
|
|
|
|
8 mid central rounded |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
# Low Vowels |
1002
|
|
|
|
|
|
|
a low |
1003
|
|
|
|
|
|
|
Q low rounded |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
# Diacritics |
1006
|
|
|
|
|
|
|
~ nasal |
1007
|
|
|
|
|
|
|
_l lateral |
1008
|
|
|
|
|
|
|
_v voiced |
1009
|
|
|
|
|
|
|
_0 voiceless |
1010
|
|
|
|
|
|
|
_h aspirated (spread) |
1011
|
|
|
|
|
|
|
_~ creaky voice (constricted) |
1012
|
|
|
|
|
|
|
_w labialized |
1013
|
|
|
|
|
|
|
_d laminalized |
1014
|
|
|
|
|
|
|
_G velarized |
1015
|
|
|
|
|
|
|
_? pharyngealized |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
The symbols are defined with the following XML structure, which you can use as |
1018
|
|
|
|
|
|
|
a model if you need to write your own symbols definition: |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
|
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
|
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
|
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
These symbols depend upon the default feature set. If you aren't using the |
1542
|
|
|
|
|
|
|
default feature set, you're on your own. If you've modified the default |
1543
|
|
|
|
|
|
|
feature set, these may still work, though you'll probably have to tweak |
1544
|
|
|
|
|
|
|
them. YMMV. |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
=head1 SEE ALSO |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
Lingua::Phonology, Lingua::Phonology::Features |
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
=head1 AUTHOR |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
Jesse S. Bangs > |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
=head1 LICENSE |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
This module is free software. You can distribute and/or modify it under the |
1557
|
|
|
|
|
|
|
same terms as Perl itself. |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
=cut |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
__DATA__ |