File Coverage

blib/lib/Lingua/Phonology/Symbols.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             package Lingua::Phonology::Symbols;
4              
5             =head1 NAME
6              
7             Lingua::Phonology::Symbols - a module for associating symbols with
8             segment prototypes.
9              
10             =head1 SYNOPSIS
11              
12             use Lingua::Phonology;
13             $phono = new Lingua::Phonology;
14              
15             # Load the default features
16             $phono->features->loadfile;
17              
18             # Load the default symbols
19             $symbols = $phono->symbols;
20             $symbols->loadfile;
21              
22             # Make a test segment
23             $segment = $phono->segment;
24             $segment->labial(1);
25             $segment->voice(1);
26              
27             # Find the symbol matching the segment
28             print $symbols->spell($segment); # Should print 'b'
29              
30             =head1 DESCRIPTION
31              
32             When using Lingua::Phonology, you usually manipulate Segment objects that have
33             various feature values that specify the phonetic qualities of the segment.
34             However, it is difficult to print those feature values, and a list of feature
35             values can be difficult to interpret anyway. This is where Symbols comes in--it
36             provides a way to take a Segment object and get a phonetic symbol representing
37             the properties of that segment.
38              
39             In Symbols, you may use L() to define text symbols that correlate to
40             "prototypes", which are special Segment objects that represent the ideal
41             segment for each symbol. After you have defined your symbols and prototypes,
42             you may use L() to find which prototype is the most similar to a segment
43             in question, and get the symbol for that prototype.
44              
45             As of v0.2, Symbols also includes diacritics. A diacritic is a special symbol
46             that begins or ends with a '*', and which is used to modify other symbols. If
47             the best symbol match for a segment you are trying to spell is an imperfect
48             match, Symbols will then attempt to use diacritics to indicate exactly how the
49             segment is pronounced. For compatibility reasons, however, this feature is off
50             by default. It can be turned on with L.
51              
52             You will probably want to read the L, L, and L
53             sections, because these describe the most widely-used functions and the
54             algorithm used to score potential matches. If you're not getting the results
55             you expect, you probably need to examine the way your prototype definitions are
56             interacting with that algorithm.
57              
58             =cut
59              
60 1     1   36929 use strict;
  1         2  
  1         43  
61 1     1   6 use warnings;
  1         2  
  1         38  
62 1     1   7 use warnings::register;
  1         1  
  1         191  
63 1     1   7 use Carp;
  1         3  
  1         104  
64 1     1   925 use Lingua::Phonology::Common;
  0            
  0            
65             use Lingua::Phonology::Segment;
66              
67             our $VERSION = 0.3;
68              
69             sub err ($) { _err($_[0]) if warnings::enabled() };
70              
71             # Make subs for our flags
72             # flags in sub_name => 'hash_key' format
73             my %flags = (
74             auto_reindex => 'AUTOINDEX',
75             diacritics => 'USEDCR'
76             );
77             while (my ($sub, $key) = each %flags) {
78             no strict 'refs';
79             *$sub = sub {
80             my $self = shift;
81             if (@_) {
82             if ($_[0]) {
83             $self->{$key} = 1;
84             }
85             else {
86             $self->{$key} = 0;
87             }
88             }
89             return $self->{$key};
90             };
91             *{'set_' . $sub} = sub { $_[0]->{$key} = 1 };
92             *{'no_' . $sub} = sub {$_[0]->{$key} = 0; 1; };
93             }
94              
95             sub new {
96             my $proto = shift;
97             my $class = ref($proto) || $proto;
98             my $self = {
99             FEATURES => undef, # a Features object
100             SYMBOLS => {}, # the hash of symbol => prototype
101             DIACRITS => {}, # hash of diacritic => prototype
102             USEDCR => 0, # whether or not to use diacritics (off by default)
103             AUTOINDEX => 1, # whether or not to autoindex (on by default)
104             REINDEX => 0, # whether reindexing is currently necessary
105             INDEX => {}, # index of symbols by feature
106             VALINDEX => {}, # index of features by symbol
107             DCRINDEX => [] # index of diacritics by number of keys
108             };
109              
110             my $features = shift;
111             unless (_is_features($features)) {
112             carp "No feature set or bad featureset given for new Symbols object";
113             return undef;
114             }
115             $self->{FEATURES} = $features;
116              
117             bless ($self, $class);
118             return $self;
119             }
120              
121             # Add a new symbol (why isn't this called add_symbol? Poor planning . . .)
122             sub add_symbol {
123             my $self = shift;
124             my %hash = @_;
125             my $err = 0;
126              
127             SYMBOL: for my $symbol (keys %hash) {
128              
129             $self->_check_symbol($symbol, $hash{$symbol}) or do {
130             $err = 1;
131             next SYMBOL;
132             };
133              
134             # Drop pre-existing symbols
135             $self->drop_symbol($symbol);
136              
137             # Add the new symbol
138             $self->_add_symbol($symbol, $hash{$symbol});
139              
140             }
141              
142             $self->{REINDEX} = 1;
143              
144             return $err ? () : 1;
145             }
146              
147             # Make symbol() synonymous with add_symbol()
148             *symbol = \&add_symbol;
149              
150             # Private: check that the symbol prototype is okay
151             sub _check_symbol {
152             my ($self, $symbol, $ref) = @_;
153              
154             unless (_is_seg($ref)) {
155             return err ("Prototype for '$symbol' is not a Lingua::Phonology::Segment");
156             }
157              
158             if ($self->features ne $ref->featureset) {
159             return err("Prototype for '$symbol' has wrong feature set");
160             }
161              
162             # Success--spell the proto w/ this symbolset
163             $ref->symbolset($self);
164             return 1;
165             }
166              
167             # Private: add the symbol to yourself
168             sub _add_symbol {
169             my ($self, $symbol, $ref) = @_;
170            
171             # Diacritics
172             if ($symbol =~ /(^\*\S+)|(\S+\*$)/) {
173             $self->{DIACRITS}->{$symbol} = $ref;
174             }
175              
176             # Regular symbols
177             else {
178             $self->{SYMBOLS}->{$symbol} = $ref;
179             }
180             }
181              
182             sub drop_symbol {
183             my $self = shift;
184             for (@_) {
185             delete ($self->{SYMBOLS}->{$_}) or delete ($self->{DIACRITS}->{$_});
186             }
187             $self->{REINDEX} = 1;
188             }
189              
190             sub change_symbol {
191             my $self = shift;
192             my %hash = @_;
193             my $err = 0;
194              
195             SYMBOL: for my $symbol (keys(%hash)) {
196             if (not exists $self->{SYMBOLS}->{$symbol}) {
197             err "No symbol $symbol defined";
198             $err = 1;
199             next SYMBOL;
200             }
201              
202             $self->_check_symbol($symbol, $hash{$symbol}) or do {
203             $err =1;
204             next SYMBOL;
205             };
206              
207             $self->_add_symbol($symbol, $hash{$symbol});
208            
209             }
210              
211             $self->{REINDEX} = 1;
212              
213             return $err ? () : 1;
214             }
215              
216             sub reindex {
217             my $self = shift;
218             $self->{REINDEX} = 0;
219             $self->{INDEX} = {};
220              
221             # Index symbols by feature => value
222             for my $symbol (keys %{$self->{SYMBOLS}}) {
223             my %feat = $self->{SYMBOLS}->{$symbol}->all_values;
224             $self->{VALINDEX}->{$symbol} = \%feat;
225              
226             for (keys %feat) {
227             no warnings 'uninitialized'; # Avoid the warning when $feat{$_} is undef
228             push @{$self->{INDEX}->{$_}->{$feat{$_}}}, $symbol;
229             }
230             }
231              
232             # Sort diacritics by number of keys.
233             $self->{DCRINDEX} = [
234             sort
235             {
236             my %a = $self->{DIACRITS}->{$a}->all_values;
237             my %b = $self->{DIACRITS}->{$b}->all_values;
238             return keys(%b) <=> keys(%a);
239             }
240             keys %{$self->{DIACRITS}}
241             ];
242              
243             # Also add diacritics to VALINDEX
244             for (keys %{$self->{DIACRITS}}) {
245             my %feats = $self->{DIACRITS}->{$_}->all_values;
246             $self->{VALINDEX}->{$_} = \%feats;
247             }
248              
249             return 1;
250             }
251              
252             sub loadfile {
253             my ($self, $file) = @_;
254              
255             my $parse;
256            
257             # Loading default symbols
258             if (not defined $file) {
259             my $start = tell DATA;
260             my $string = join '', ;
261             eval { $parse = _parse_from_string($string, 'symbols') };
262             return err $@ if $@;
263             seek DATA, $start, 0;
264             }
265              
266             # Loading an actual file
267             else {
268             eval { $parse = _parse_from_file($file, 'symbols') };
269             if (!$parse) {
270             return $self->old_loadfile($file);
271             }
272             }
273              
274             $self->_load_from_struct($parse);
275             }
276              
277             sub old_loadfile {
278             my ($self, $file) = @_;
279              
280             eval { $file = _to_handle($file, '<') };
281             return err $@ if $@;
282             err "Deprecated method";
283              
284             while (<$file>) {
285             s/#.*$//; # Remove comments
286             if (/^\s*(\S*)\t+(.*)/) { # General line format
287             my $symbol = $1;
288             my @desc = split(/\s+/, $2);
289              
290             my $proto = Lingua::Phonology::Segment->new( $self->features );
291             for (@desc) {
292             if (/(\S+)=(\S+)/) { # Feature defs like coronal=1
293             $proto->value($1, $2);
294             }
295             elsif (/([*+-])?(\S+)/) { # Feature defs like +feature or feature
296             my $val = $1 ? $1 : 1;
297             $proto->value($2, $val);
298             }
299             }
300             $self->symbol($symbol => $proto);
301             }
302             }
303              
304             close $file;
305              
306             $self->{REINDEX} = 1;
307             }
308              
309             sub _load_from_struct {
310             my ($self, $parse) = @_;
311              
312             while ( my ($sym, $val) = each %$parse ) {
313             my $proto = new Lingua::Phonology::Segment($self->{FEATURES},
314             { map { $_ => $val->{feature}->{$_}->{value} } keys %{$val->{feature}} } );
315             $self->symbol($sym => $proto);
316             }
317             $self->{REINDEX} = 1;
318             }
319              
320             sub _to_str {
321             my $self = shift;
322              
323             my $href = {};
324             for ($self->{SYMBOLS}, $self->{DIACRITS}) {
325             for my $sym (keys %$_) {
326             my %h = $_->{$sym}->all_values;
327             for (keys %h) {
328             $h{$_} = '*' if not defined $h{$_};
329             $href->{$sym}->{feature}->{$_} = { value => $h{$_} };
330             }
331             }
332             }
333              
334             return eval { _string_from_struct({ symbols => { symbol => $href } }) };
335             }
336              
337             sub spell {
338             my $self = shift;
339              
340             my @return = ();
341             for my $comp (@_) {
342             return err("Bad argument to spell()") unless _is_seg($comp);
343             my $winner = $self->score($comp);
344             push (@return, $winner ? $winner : '_?_');
345             }
346              
347             local $" = '';
348             return wantarray ? @return : "@return";
349             }
350            
351             sub score {
352             my $self = shift;
353             my $comp = shift;
354              
355             # Reindex if necessary
356             $self->reindex if $self->{REINDEX} and $self->{AUTOINDEX};
357              
358             # Prepare data containers
359             my %comp = $comp->all_values;
360             my %scores = ();
361             my @scores = ();
362              
363              
364             for my $feature (keys %{$self->{INDEX}}) {
365             while (my ($val, $list) = each %{$self->{INDEX}->{$feature}}) {
366             # Avoid all sorts of harmless warnings
367             no warnings 'uninitialized';
368              
369             # Special case: when $val is '' (which is equiv w/ undef), check
370             # that $comp->$feature actually returns undef, in case $feature is
371             # a node w/ defined children
372             $comp{$feature} = $comp->$feature if $val eq '';
373              
374             if ($val eq $comp{$feature}) {
375             $scores{$_}++ for @$list;
376             }
377             else {
378             $scores{$_}-- for @$list;
379             }
380             }
381             }
382              
383             # Build @scores
384             while (my ($sym, $score) = each %scores) {
385             $scores[$score] = $sym if $score > 0;
386             }
387              
388             # Get a diacritic spelling if wanted
389             my $sub = @scores ? $#scores : 0;
390             if ($self->{USEDCR}) {
391             $scores[$sub] = score_diacrit($self, $scores[$sub], %comp);
392             }
393              
394             return wantarray ? %scores : $scores[$sub];
395             }
396              
397             sub score_diacrit {
398             my ($self, $symbol, %comp) = @_;
399              
400             # Don't try to diacriticize completely unmatched segments
401             return '' if not $symbol;
402              
403             # Avoid warnings
404             no warnings 'uninitialized';
405              
406             # Build hash of discrepancy
407             my %disc = ();
408             for (keys %comp) {
409             $disc{$_} = $comp{$_} if $comp{$_} ne $self->{VALINDEX}->{$symbol}->{$_};
410             }
411             for (keys %{$self->{VALINDEX}->{$symbol}}) {
412             $disc{$_} = $comp{$_} if $comp{$_} ne $self->{VALINDEX}->{$symbol}->{$_};
413             }
414            
415             DIACRIT: for (@{$self->{DCRINDEX}}) {
416             # Quit if there's no more discrepancy
417             last if not keys %disc;
418              
419             my $dcr = $_; # No aliasing! otherwise s/// messes us up
420              
421             # Diacrits musn't disagree w/ comp segs at all
422             my %proto = %{$self->{VALINDEX}->{$dcr}};
423             for (keys %proto) {
424             # Defined features compare normally
425             if (defined $proto{$_}) {
426             next DIACRIT if ($proto{$_} ne $disc{$_});
427             }
428              
429             # Undefined features must be specifically mentioned in the
430             # discrepancy hash (i.e. can't be simply missing keys
431             else {
432             next DIACRIT unless (exists $disc{$_}) and (not defined $disc{$_});
433             }
434             }
435              
436             # If you get here, you agree on all features, so you should be added
437              
438             # Don't allow anybody else to match your features
439             delete $disc{$_} for keys %proto;
440              
441             # Add yourself to the beginning or ending, chopping the leading/trailing '*'
442             if ($dcr =~ s/^\*//) {
443             $symbol .= $dcr;
444             }
445             else {
446             $dcr =~ s/\*$//;
447             $symbol = $dcr . $symbol;
448             }
449             }
450              
451             return $symbol;
452             }
453              
454             sub prototype {
455             my $self = shift;
456             my $symbol = shift;
457             my $proto;
458              
459             if ($symbol =~ /(^\*)|(\*$)/) {
460             $proto = $self->{DIACRITS}->{$symbol};
461             }
462             else {
463             $proto = $self->{SYMBOLS}->{$symbol};
464             }
465              
466             return err("No such symbol '$symbol'") if (not $proto);
467             $self->{REINDEX} = 1;
468             return $proto;
469             }
470              
471             sub segment {
472             my $self = shift;
473              
474             # If you're not given a symbol, return a blank segment
475             unless (@_) {
476             my $ret = Lingua::Phonology::Segment->new( $self->features );
477             $ret->symbolset($self);
478             return $ret;
479             }
480              
481             # Otherwise
482             my @return;
483             while (@_) {
484             my $proto = $self->prototype( shift );
485             return unless $proto;
486             push @return, $proto->duplicate;
487             }
488             return wantarray ? @return : $return[0];
489             }
490              
491             sub features {
492             my $self = shift;
493             if (@_) {
494             my $arg = shift;
495             return carp "Bad argument to features()" unless _is_features($arg);
496             $self->{FEATURES} = $arg;
497             }
498             return $self->{FEATURES};
499             }
500              
501             1;
502              
503             =head1 METHODS
504              
505             =head2 new
506              
507             $symbol = Lingua::Phonology::Symbols->new($features);
508              
509             Creates a new Symbols object. This method takes one argument, a Features
510             object that provides the feature set for the prototypes in this object.
511             This will carp if you don't provide an appropriate object.
512              
513             This method is called automatically when you make a C
514             Lingua::Phonology>.
515              
516             =head2 add_symbol
517              
518             $symbol->add_symbol( 'b' => $b );
519              
520             Adds one or more symbols to the current object. The argument to symbol must be
521             a hash. The keys of this hash are the text symbols that will be returned, and
522             the values should be Lingua::Phonology::Segment objects that act as the
523             prototypes for each symbol. See L<"spell"> for explanation of how these symbols
524             and protoypes are used.
525              
526             Symbols can generally be any text string. However, strings beginning or ending
527             with '*' are interpreted specially, as diacritics. The position of the asterisk
528             indicates where the base symbol goes, and the rest is interpreted as the
529             diacritic. Diacritic prototypes are also treated differently from regular
530             prototypes--see the L section for details. For example, you could use a
531             tilde '~' following a symbol to indicate nasality with the following call to
532             symbol:
533              
534             # Assume $nasal is an appropriate prototye
535             $symbols->add_symbol('*~' => $nasal);
536              
537             Note that '*' by itself is still a valid, non-diacritic symbol. However, '**'
538             will be interpreted as a diacritic consisting of a symbol followed by a single
539             asterisk.
540              
541             If you attempt to pass in a Lingua::Phonology::Segment object associated with a
542             feature set other than the one defined for the current object, C
543             will skip to the next symbol and emit a warning.
544              
545             This method returns true if all of the attempted symbol additions succeeded,
546             and false otherwise.
547              
548             =head2 symbol (deprecated)
549              
550             Synonymous with C. This method is deprecated, and only exists
551             because of a poor naming choice in earlier versions of the module.
552              
553             =head2 drop_symbol
554              
555             $symbols->drop_symbol('x');
556              
557             Deletes a symbol from the current object. Nothing happens if you try to
558             delete a symbol which doesn't currently exist.
559              
560             =head2 change_symbol
561              
562             $symbols->change_symbol( 'b' => $b );
563              
564             Acts exactly the same as C, but first checks to make sure that
565             there already exists a symbol with the key given. Otherwise, it brings
566             up an error.
567              
568             The method C can also be used to redefine existing symbols, but
569             it first drops any existing symbol. In the present implementation this makes no
570             difference, so this method really only exists to aid readability and allow for
571             future expansion.
572              
573             As with C, this method returns true if all of the attempted
574             changes succeeded, otherwise false.
575              
576             =head2 features
577              
578             $features = $symbols->features();
579              
580             Returns the Features object associated with the current object, or sets the
581             object if provided with a Lingua::Phonology::Features object as an argument.
582              
583             =head2 prototype
584              
585             $proto = $symbols->prototype('b');
586              
587             Takes one argument, a text string indicating a symbol in the current set.
588             Returns the prototype associated with that symbol, or carps if no
589             such symbol is defined. You can then make changes to the prototype object,
590             which will be reflected in subsequent calls to spell().
591              
592             =head2 segment
593              
594             # Get one segment
595             $b = $symbols->segment('b');
596            
597             # Get several segments
598             @word = $symbols->segment('b', 'a', 'n');
599              
600             Takes one or more argument, a symbol, and return a new Segment object with the
601             feature values of the prototype for that symbol. Unlike L, which
602             return the prototype itself, this method returns a completely new object which
603             can be modified without affecting the values of the prototype. If you supply a
604             list of symbols, you'll get back a list of segments in the same order. This is
605             generally the easiest way to make new segments with some features already set.
606             Example:
607              
608             The segments returned from this method will be associated with the
609             Lingua::Phonology::Features object defined by C and the current
610             Lingua::Phonology::Symbols object.
611              
612             =head2 reindex
613              
614             $symbols->reindex();
615              
616             This function recompiles the internal index that Lingua::Phonology::Symbols
617             uses to speed up Cing. It should generally be unnecessary to call this
618             function, as Lingua::Phonology::Symbols does its best to figure out when
619             reindexing is necessary without any user input. You may call this function by
620             hand to ensure reindexing at a particular time, or if auto reindexing is off.
621              
622             =head2 auto_reindex
623              
624             # Get the current state of auto-reindexing
625             $auto_reindex = $symbols->auto_reindex();
626              
627             # Set the auto-reindexing flag
628             $symbols->auto_reindex(0);
629              
630             Returns true if automatic reindexing is currently turned on, false otherwise.
631             If called with an argument, sets auto reindexing to the truth or falsehood of
632             that argument. Auto reindexing is on by default.
633              
634             =head2 set_auto_reindex
635              
636             $symbols->set_auto_reindex();
637              
638             Turns automatic reindexing (back) on. Same as C. Auto
639             reindexing is on by default, so this is only necessary after a call to
640             C. See L<"INDEXING">.
641              
642             =head2 no_auto_reindex
643              
644             $symbols->no_auto_reindex();
645              
646             Turns automatic reindexing off. Same as C<< auto_reindex(0) >>. See
647             L<"INDEXING">.
648              
649             =head2 diacritics
650              
651             # Get the current diacritic flag
652             $symbols->diacritics();
653              
654             # Set the diacritics flag
655             $symbols->diacritics(1);
656              
657             Returns true if diacritics are currently on, otherwise false. You may also pass
658             this method an argument to turn diacritics on or off, e.g. C<<
659             $symbols->diacritics(1) >>. Diacritics are off by default.
660              
661             =head2 set_diacritics
662              
663             $symbols->set_diacritics();
664              
665             Turns diacritics on. Same as C<< diacritics(1) >>.
666              
667             =head2 no_diacritics
668              
669             $symbols->no_diacritics();
670              
671             Turns diacritics off. Same as C<< diacritics(0) >>.
672              
673             =head2 spell
674              
675             print $symbols->spell($seg);
676              
677             Takes any number of Lingua::Phonology::Segment objects as arguments. For each
678             object, returns a text string indicating the best match of prototype with the
679             Segment given. In a scalar context, returns a string consisting of a
680             concatencation of all of the symbols.
681              
682             The Symbol object given will be compared against every prototype currently
683             defined, and scored according to the following algorithm:
684              
685             =over 4
686              
687             =item *
688              
689             Score one point for every feature whose value is the same for both the
690             prototype and the comparison segments, whether that value is defined or not.
691              
692             =item *
693              
694             Lose one point for every feature that is defined for the prototype segment and
695             which the comparison segment disagrees with.
696              
697             =item *
698              
699             Score zero points for each feature defined on the comparison segment but not
700             defined for the prototype.
701              
702             =back
703              
704             Comparison segments may always be more defined than the prototypes, so
705             there is no consequence if the comparison segment is defined for features
706             that the prototype isn't defined for.
707              
708             Note that this algorithm is slightly different from the one used in previous
709             versions. In my informal tests, about 95% of the segments come out the same,
710             but there is some discrepancy. My subjective impression is that the results
711             given by the new algorithm are better (more inuitive) than those from the
712             previous algorithm.
713              
714             The 'winning' prototype is the one that scores the highest by the preceding
715             algorithm. If more than one prototype scores the same, it's unpredictable which
716             symbol will be returned, since it will depend on the order in which the
717             prototypes came out of the internal hash.
718              
719             If C is on, diacritic formation happens after the best-matching
720             symbol is chosen. A list of the features for which the comparison segment and
721             symbol prototypes do not agree is compiled, and diacritics are selected that
722             match against those features. If there are diacritics that specify more than
723             one feature, or multiple diacritics specifying the same feature, then this
724             method will attempt to minimize the number of diacritics used. The diacritic
725             symbols will be concatenated with the base symbol, the base symbol taking the
726             place of the asterisk in the symbol definition. For example, if a segment
727             matched the base symbol 'a' and the diacritic '*~', the resulting symbol would
728             be 'a~'. If multiple diacritics are matched, there is no way to predict the
729             order in which they will be added, except that diacritics specifying multiple
730             features will appear closer to the base.
731              
732             If no prototype scores at least 1 point by this algorithm, the string '_?_'
733             will be returned. This indicates that no suitable matches were found. No
734             diacritic matching is done in this case.
735              
736             Beware of testing a Segment object that is associated with a different feature
737             set than the ones used by the prototypes. This will almost certainly cause
738             errors and bizarre results.
739              
740             =head2 score
741              
742             %score = $symbols->score($seg);
743              
744             Takes a Segment argument and compares it against the defined symbols, just like
745             symbol(). It normally returns a hash with the available symbols as the keys and
746             the score for each symbol as the value. In a scalar context, returns the
747             winning symbol just like spell(). Useful for debugging and determining why the
748             program thinks that [a] is better described as [d] (as happened to the author
749             during testing). Unfortunately, score() can only be used to test one segment at
750             a time, rather than a list of segments.
751              
752             =head2 loadfile
753              
754             # Load symbol definitions from a file
755             $symbols->loadfile('phono.xml');
756              
757             # Load default symbols
758             $symbols->loadfile();
759              
760             Takes one argument, a file name, and loads prototype segment definitions
761             from that file. If no file name is given, loads the default symbol set.
762              
763             Files should be in the XML format described in
764             L. If the filename given does not parse
765             correctly, this method will fall back on C, just in case this
766             is an old script using the deprecated custom file format. In this case, you
767             will get a warning. To avoid the warning, change the method call, or better yet
768             change your file over to the XML format.
769              
770             =head2 old_loadfile (deprecated)
771              
772             # Load a file
773             $symbols->old_loadfile('symbols.txt');
774              
775             This method is deprecated. Use C instead.
776              
777             Takes one argument, a file name. Reads that file according to the format
778             described below and adds the symbols defined there to the current symbols
779             object. This method does NOT load default features when called without any
780             arguments.
781              
782             Lines in the file should match the regular expression /^\s*(\S+)\t+(.*)/.
783             The first parenthesized sub-expression will be taken as the symbol, and the
784             second sub-expression as the feature definitions for the prototype. Feature
785             definitions are separated by spaces, and should be in one of three formats:
786              
787             =over 4
788              
789             =item *
790              
791             B: The preferred way to set a privative value is simply to write the
792             name of the feature unadorned. Since privatives are either true or undef, this
793             is sufficient to declare the existence of a privative. E.g., since both
794             [labial] and [voice] are privatives in the default feature set, the following
795             line suffices to define the symbol 'b' (though you may want more specificity):
796              
797             b labial voice
798              
799             =item *
800              
801             B<[+-*]feature>: The characters before the feature correspond to setting the
802             value to true, false, and undef, respectively. This is the preferred way to set
803             binary features, and the only way to assert that a feature of any type must be
804             undef. For example, the symbol 'd`' for a voiced retroflex stop can be defined
805             with the following line:
806              
807             d` -anterior -distributed voice
808              
809             =item *
810              
811             B: Whatever precedes the equals sign is the feature name;
812             whatever follows is the value. This is the preferred way to set scalar values,
813             and the only way to set scalar values to anything other than undef, 0, or 1.
814              
815             =back
816              
817             Feature definitions may work if you use them other than as recommended,
818             but the recommended forms are provided for maximum readability. To be
819             exact, however, the following are synonymous:
820              
821             # Synonymous one way
822             labial
823             +labial
824             labial=1
825              
826             # Synonymous in a different way
827             -labial # only if 'labial' is binary
828             labial=0
829              
830             Since this behavior is partly dependent on the implementation of text and
831             number forms in the Features module, the synonymity of these forms is not
832             guaranteed to remain constant in the future. However, every effort will be
833             made the guarantee that the I forms won't change their
834             behavior.
835              
836             You may begin comments with '#'--anything between the first '#' on a line and
837             the end of that line will be ignored. Consequently, '#' cannot be used as a
838             symbol in a loaded file (though it is a valid symbol elsewhere, and can be
839             assigned via C).
840              
841             As with C, symbol definitions beginning or ending with '*' will be
842             interpreted as diacritics. Diacritic symbols may be defined in exactly the same
843             way as regular symbols. Thus, to define a tilde as a diacritic for nasality,
844             you might use the following simple line:
845              
846             *~ nasal
847              
848             You should only define terminal (non-node) features in your segment
849             definitions. The loadfile method is unable to deal with features that
850             are nodes, and will generate errors if you try to assign to a node.
851              
852             If you don't give a file name, then the default symbol set is loaded. This
853             is described in L<"THE DEFAULT SYMBOL SET">.
854              
855             =head1 INDEXING
856              
857             This section endeavors to explain the purpose of indexing in
858             Lingua::Phonology::Symbols, and how you can control it.
859              
860             As of v0.2, this module uses an efficient hash comparison algorithm that
861             greatly speeds up calls to C and C. This algorithm works by
862             compiling an index of the features and values that prototype segments have,
863             then only comparing against those prototypes that have some chance of winning.
864             Indexing itself is a somewhat costly procedure, but fortunately, it only needs
865             to be done once. Unfortunately, it needs to be done again any time that the
866             list of symbols or the prototypes for those symbols is changed.
867              
868             Fortunately again, Lingua::Phonology::Symbols will take care of this for you.
869             Whenever a method is called that might require reindexing, an internal flag on
870             the object is set. The next time that you ask this module to C
871             something, it will first reindex, then proceed to spelling. The methods that
872             will trigger reindexing are C
873             prototype>. This reindexing is done "just in time", and isn't done more than is
874             necessary.
875              
876             Unfortunately, not all calls to those methods actually warrant reindexing, so
877             if you call those methods a lot, you might want to have manual control over
878             when the hash is reindexed. To do this, you can use the method
879             C, which will disable automatic reindexing. You then will have
880             to call C yourself whenever it's warranted. If you get tired of this
881             and want reindexing back, you can call C.
882              
883             The author of this module has never felt the need to work with auto reindexing
884             off, for what it's worth.
885              
886             =head1 THE DEFAULT SYMBOL SET
887              
888             Currently, Lingua::Phonology::Symbols comes with a set of symbols that can
889             be loaded by calling loadfile with no arguments, like so:
890              
891             $symbols->loadfile;
892              
893             The symbol set thus loaded is based on the X-SAMPA system for encoding the IPA
894             into ASCII. You can read more about X-SAMPA at
895             L. The default does not
896             contain all of the symbols in X-SAMPA, but it does contain a lot of them, plus
897             a few extra symbols for IPA characters not covered in X-SAMPA. These symbols are:
898              
899             # Consonants
900             # Labials
901             p voiceless labial stop
902             b voiced labial stop
903             f voiceless labiodental fricative
904             v voiced labiodental fricative
905             m labial nasal
906              
907             # Dentals
908             t voiceless dental stop
909             d voiced dental stop
910             T voiceless dental fricative
911             D voiced dental fricative
912             s voiceless alveolar fricative
913             z voiced alveolar fricative
914             n alveolar nasal
915             l alveolar lateral
916             r alveolar rhotic
917              
918             # Postalveolars
919             tS voiceless postalveolar stop
920             dZ voiced postalveolar stop
921             S voiceless postalveolar fricative
922             Z voiced postalveolar fricative
923              
924             # Retroflex
925             t` voiceless retroflex stop
926             d` voiced retroflex stop
927             s` voiceless retroflex fricative
928             z` voiced retroflex fricative
929             n` retroflex nasal
930             l` retroflex lateral
931             r` retroflex rhotic
932              
933             # Palatal
934             c voiceless palatal stop
935             d\ voiced palatal stop
936             C voiceless palatal fricative
937             j\ voiced palatal fricative
938             J palatal nasal
939             L palatal lateral
940              
941             # Velar
942             k voiceless velar stop
943             g voiced velar stop
944             x voiceless velar fricative
945             G voiced velar fricative
946             N velar nasal
947              
948             # Uvular
949             q voiceless uvular stop
950             G\ voiced uvular stop
951             X voiceless uvular fricative
952             R voiced uvular fricative
953             N\ uvular nasal
954             R\ uvular rhotic
955              
956             # Pharyngeal
957             q\ voiceless pharyngeal stop
958             X\ voiceless pharyngeal fricative
959             ?\ voiced pharyngeal fricative
960              
961             # Glottal
962             ? voiceless glottal stop
963             h voicelesss glottal fricative
964             h\ voiced glottal fricative
965              
966             # Vowels
967             # High Front Vowels
968             i high front tense
969             I high front
970             y high front rounded tense
971             Y high front rounded
972             j high front semivowels
973             H high front rounded semivowel
974              
975             # High Back Vowels
976             u high back rounded tense
977             U high back rounded
978             M high back unrounded
979             w high back rounded semivowel
980              
981             # High Central Vowels
982             1 high central
983             } high central rounded
984              
985             # Mid Front Vowels
986             e mid front tense
987             E mid front
988             2 mid front rounded tense
989             9 mid front rounded
990              
991             # Mid Back Vowels
992             o mid back rounded tense
993             O mid back rounded
994             W mid back unrounded tense
995             V mid back unrounded
996              
997             # Mid Central Vowels
998             @ mid central
999             8 mid central rounded
1000              
1001             # Low Vowels
1002             a low
1003             Q low rounded
1004              
1005             # Diacritics
1006             ~ nasal
1007             _l lateral
1008             _v voiced
1009             _0 voiceless
1010             _h aspirated (spread)
1011             _~ creaky voice (constricted)
1012             _w labialized
1013             _d laminalized
1014             _G velarized
1015             _? pharyngealized
1016              
1017             The symbols are defined with the following XML structure, which you can use as
1018             a model if you need to write your own symbols definition:
1019              
1020            
1021            
1022            
1023            
1024              
1025            
1026            
1027            
1028            
1029            
1030            
1031            
1032            
1033            
1034            
1035            
1036            
1037            
1038            
1039            
1040            
1041            
1042            
1043            
1044            
1045            
1046            
1047            
1048            
1049            
1050              
1051            
1052            
1053            
1054            
1055            
1056            
1057            
1058            
1059            
1060            
1061            
1062            
1063            
1064            
1065            
1066            
1067            
1068            
1069            
1070            
1071            
1072            
1073            
1074            
1075            
1076            
1077            
1078            
1079            
1080            
1081            
1082            
1083            
1084            
1085            
1086            
1087            
1088            
1089            
1090            
1091            
1092            
1093            
1094            
1095            
1096            
1097            
1098            
1099            
1100              
1101            
1102            
1103            
1104            
1105            
1106            
1107            
1108            
1109            
1110            
1111            
1112            
1113            
1114            
1115            
1116            
1117            
1118            
1119            
1120            
1121            
1122            
1123            
1124              
1125            
1126            
1127            
1128            
1129            
1130            
1131            
1132            
1133            
1134            
1135            
1136            
1137            
1138            
1139            
1140            
1141            
1142            
1143            
1144            
1145            
1146            
1147            
1148            
1149            
1150            
1151            
1152            
1153            
1154            
1155            
1156            
1157            
1158            
1159            
1160            
1161            
1162            
1163            
1164            
1165            
1166            
1167            
1168              
1169            
1170            
1171            
1172            
1173            
1174            
1175            
1176            
1177            
1178            
1179            
1180            
1181            
1182            
1183            
1184            
1185            
1186            
1187            
1188            
1189            
1190            
1191            
1192            
1193            
1194            
1195            
1196            
1197            
1198            
1199            
1200            
1201            
1202            
1203            
1204            
1205            
1206              
1207            
1208            
1209            
1210            
1211            
1212            
1213            
1214            
1215            
1216            
1217            
1218            
1219            
1220            
1221            
1222            
1223            
1224            
1225            
1226            
1227            
1228            
1229            
1230            
1231            
1232              
1233            
1234            
1235            
1236            
1237            
1238            
1239            
1240            
1241            
1242            
1243            
1244            
1245            
1246            
1247            
1248            
1249            
1250            
1251            
1252            
1253            
1254            
1255            
1256            
1257            
1258            
1259            
1260            
1261            
1262            
1263            
1264            
1265            
1266            
1267            
1268            
1269              
1270            
1271            
1272            
1273            
1274            
1275            
1276            
1277            
1278            
1279            
1280            
1281            
1282            
1283            
1284              
1285            
1286            
1287            
1288            
1289            
1290            
1291            
1292            
1293            
1294            
1295            
1296            
1297            
1298            
1299              
1300              
1301            
1302            
1303            
1304            
1305            
1306            
1307            
1308            
1309            
1310            
1311            
1312            
1313            
1314            
1315            
1316            
1317            
1318            
1319            
1320            
1321            
1322            
1323            
1324            
1325            
1326            
1327            
1328            
1329            
1330            
1331            
1332            
1333            
1334            
1335            
1336            
1337            
1338            
1339            
1340            
1341            
1342            
1343            
1344            
1345            
1346            
1347            
1348            
1349            
1350            
1351            
1352            
1353            
1354              
1355            
1356            
1357            
1358            
1359            
1360            
1361            
1362            
1363            
1364            
1365            
1366            
1367            
1368            
1369            
1370            
1371            
1372            
1373            
1374            
1375            
1376            
1377            
1378            
1379            
1380            
1381            
1382            
1383            
1384            
1385            
1386            
1387            
1388            
1389            
1390            
1391              
1392            
1393            
1394            
1395            
1396            
1397            
1398            
1399            
1400            
1401            
1402            
1403            
1404            
1405            
1406              
1407            
1408            
1409            
1410            
1411            
1412            
1413            
1414            
1415            
1416            
1417            
1418            
1419            
1420            
1421            
1422            
1423            
1424            
1425            
1426            
1427            
1428            
1429            
1430            
1431            
1432            
1433            
1434            
1435            
1436            
1437            
1438            
1439            
1440              
1441            
1442            
1443            
1444            
1445            
1446            
1447            
1448            
1449            
1450            
1451            
1452            
1453            
1454            
1455            
1456            
1457            
1458            
1459            
1460            
1461            
1462            
1463            
1464            
1465            
1466            
1467            
1468            
1469            
1470            
1471            
1472            
1473            
1474              
1475            
1476            
1477            
1478            
1479            
1480            
1481            
1482            
1483            
1484            
1485            
1486            
1487            
1488            
1489              
1490            
1491            
1492            
1493            
1494            
1495            
1496            
1497            
1498            
1499            
1500            
1501            
1502            
1503            
1504              
1505            
1506            
1507            
1508            
1509            
1510            
1511            
1512            
1513            
1514            
1515            
1516            
1517            
1518            
1519            
1520            
1521            
1522            
1523            
1524            
1525            
1526            
1527            
1528            
1529            
1530            
1531            
1532            
1533            
1534            
1535            
1536              
1537            
1538            
1539              
1540              
1541             These symbols depend upon the default feature set. If you aren't using the
1542             default feature set, you're on your own. If you've modified the default
1543             feature set, these may still work, though you'll probably have to tweak
1544             them. YMMV.
1545              
1546             =head1 SEE ALSO
1547              
1548             Lingua::Phonology, Lingua::Phonology::Features
1549              
1550             =head1 AUTHOR
1551              
1552             Jesse S. Bangs >
1553              
1554             =head1 LICENSE
1555              
1556             This module is free software. You can distribute and/or modify it under the
1557             same terms as Perl itself.
1558              
1559             =cut
1560              
1561             __DATA__