File Coverage

blib/lib/Lingua/LO/Romanize/Word.pm
Criterion Covered Total %
statement 10 12 83.3
branch n/a
condition n/a
subroutine 4 4 100.0
pod n/a
total 14 16 87.5


line stmt bran cond sub pod time code
1             package Lingua::LO::Romanize::Word;
2              
3 1     1   11541 use strict;
  1         3  
  1         49  
4 1     1   6 use utf8;
  1         3  
  1         9  
5              
6 1     1   24 use Moose;
  1         2  
  1         9  
7 1     1   7808 use MooseX::AttributeHelpers;
  0            
  0            
8              
9             use Lingua::LO::Romanize::Types;
10             use Lingua::LO::Romanize::Word;
11              
12             =encoding utf-8
13              
14             =head1 NAME
15              
16             Lingua::LO::Romanize::Word - Class for words, used by Lingua::LO::Romanize.
17              
18             =head1 VERSION
19              
20             Version 0.10
21              
22             =cut
23              
24             our $VERSION = '0.10';
25              
26             has 'word_str' => (
27             is => 'ro',
28             isa => 'Str',
29             required => 1,
30             );
31              
32              
33             has 'syllables' => (
34             metaclass => 'Collection::Array',
35             coerce => 1,
36             is => 'ro',
37             isa => 'Lingua::LO::Romanize::Types::SyllableArr',
38             init_arg => undef,
39             builder => '_build_syllables',
40             lazy => 1,
41             provides => {
42             elements => 'all_syllables',
43             },
44             );
45              
46             has 'hyphen' => (
47             is => 'rw',
48             isa => 'Bool',
49             default => 0,
50             );
51              
52             # private builder method for syllables
53             # parsing out lao syllables, lao numbers and unrecognized characters
54              
55             sub _build_syllables {
56             my $self = shift;
57             my $word = $self->word_str;
58             my @syllables;
59            
60             while ($word) {
61             if ($word =~ s/^[^ກ-ໝ]+//s) {
62             push @syllables, $&;
63             } elsif ($word =~ s/^[໐-à»™]+//s) {
64             push @syllables, $&;
65             } elsif ($word =~ s/^ໆ//) {
66             if (scalar(@syllables)) {
67             my $prev_syllable = $syllables[-1];
68             push @syllables, $prev_syllable;
69             }
70             } elsif ($word =~ s/^ຯ//) { #... or perhaps it should be together with a lao syllable
71             push @syllables, $&;
72             } elsif (my $syllable = _find_lao_syllable($word)) {
73             $word =~ s/^$syllable// or $word =~ s/^.//; #just so that we don't loop forever
74             push @syllables, $syllable;
75             } else { #just so we don't loop forever
76             $word =~ s/.//;
77             }
78             }
79             \@syllables;
80             }
81              
82             # private sub routine to find a lao syllable from a string and return the syllable
83             sub _find_lao_syllable {
84             my $word = shift;
85             my $syllable;
86             return
87             unless $word =~ /^[ເ-ໄ]?([ກຂຄງຈສຊຍຽດຕຖທນບປຜຝພຟມຢຣລຼວຫອຮໜໝ])/;
88            
89             my $consonant = $1;
90              
91             if ($word =~ /^[ເ-ໄ]?$consonant[ເ-ໄ]?([ວຣລຼ])/) { # ວ, ຣ, or ລ (also ຼ) can be used in combination with another consonant
92             my $extra = $1;
93             unless ($extra eq 'ວ' && $word =~ /^$consonant(?:ວ)[^ະັາິີຶືຸູະັົອໍວຽຍຳ]/) {
94             $consonant .= $extra;
95             }
96             }
97            
98             my $vowels = '';
99            
100             if ($consonant =~ /^ຫ$/ && $word =~ /^ຫ[ເ-ໄ]?([ຍນມ])/) {
101             my $extra = $1;
102             #fetch the surounding vowels and tone mark if any
103             $word =~ /^$consonant([ເ-ໄ])?$extra([ະັາິີຶືຸູະັົອໍວຽຍຳ່້໊໋]*)/;
104             $consonant .= $extra;
105             $vowels .= $1 if $1;
106             $vowels .= $2 if $2;
107             } else {
108             #fetch the surounding vowels and tone mark if any
109             $word =~ /^([ເ-ໄ])?$consonant([ະັາິີຶືຸູະັົອໍວຽຍຳ່້໊໋]*)/;
110            
111             $vowels .= $1 if $1;
112             $vowels .= $2 if $2;
113             }
114            
115             my $tone;
116             if ($vowels =~ s/([່-໋])//) {
117             $tone = $1;
118             }
119            
120             #find first vowel
121             if ($vowels =~ /^(?:ໍາ|ຳ)/) { #'sala am' is always the end of a syllable
122             my $found = $&;
123             $syllable = $consonant;
124             $found =~ s/^ໍ// and $syllable .= 'ໍ';
125             $syllable .= $tone if defined ($tone);
126             $syllable .= $found;
127             return $syllable;
128             } elsif ($vowels =~ /^(?:ເັຍະ|ເຶອະ|ເັຽະ)/ || $vowels =~ /^(?:ເາະ|ົວະ|ເັຽ|ເັຍ|ເືອ|ເິະ|ເົາ)/) {
129             # trying to match the largest vowel first, then go to shorter (less characters)
130             # doing 4 and 3 character vowels
131             my $found = $&;
132             $found =~ /^ເ/ and $syllable = 'ເ';
133             $syllable .= $consonant;
134             $found =~ /^ເ?([ົັືິຶ])/ and $syllable .= $1;
135             $syllable .= $tone if defined ($tone);
136             if ($found =~ /(ຍະ|ອະ|ຽະ|າະ|ວະ)$/ || $found =~/([ຽຍອະາ])$/) {
137             $syllable .= $1;
138             }
139             } elsif ($vowels =~ /^(?:ເະ|ເັ|ແະ|ແັ|ໂະ|ັອ|ັວ|ົວ|ັຽ|ັຍ|ເິ|ເຍ|ເຽ|ເີ|ເື|ີວ|ິວ)/) {
140             # doing 2 character vowels
141             my $found = $&;
142             $found =~ /^([ເແໂ])/ and $syllable .= $1;
143             $syllable .= $consonant;
144             $found =~ /^[ເແ]?([ັົິີື])/ and $syllable .= $1;
145             $syllable .= $tone if defined ($tone);
146             $found =~ /([ະອວຽຍ])$/ and $syllable .= $1;
147             } elsif ($vowels =~ /^[ະັາິີຶືຸູເແົໂໍອວຽຍໄໃ]/) {
148             # doing single character vowels
149             my $found = $&;
150             if ($found =~ /^([ເ-ໄ])$/) {
151             $syllable = $1 . $consonant;
152             $syllable .= $tone if defined ($tone);
153             } elsif ($found =~ /^([ັິີຶືຸູົໍ])$/) {
154             $syllable = $consonant;
155             $syllable .= $found;
156             $syllable .= $tone if defined ($tone);
157             } else {
158             $syllable = $consonant;
159             $syllable .= $tone if defined ($tone);
160             $syllable .= $found;
161             }
162             } else { #lonely constant, just return it (with possible tone)
163             $syllable = $consonant;
164             $syllable .= $tone if defined ($tone);
165             return $syllable;
166             }
167            
168             my $regexp = qr{$syllable([ກງຍຽດນບມຣວ]໌?)(?:([^່້໊໋ະັາິີຶືຸູະັົໍຳ])(.?)|$)};
169            
170             # checking for a possible closing consonant
171             if ($word =~ /^$regexp/) {
172             my $last_consonant = $1;
173             my $possible_vowel = $2 if $2;
174             my $continued_vowel = $3 if defined $3;
175             if (!(defined $2) || # end of string
176             $possible_vowel =~ /[^ຽວອຍ]/ || #for sure a consonant
177             (defined $continued_vowel && $continued_vowel =~/[ະັາິີຶືຸູະັົອໍວຽຍຳ່້໊໋]/)) {#post vowels and tones
178             $syllable .= $last_consonant;
179             }
180             }
181             return $syllable;
182             }
183              
184             =head1 SYNOPSIS
185              
186             L<Lingua::LO::Romanize::Word> is used by L<Lingua::LO::Romanize> to divide a string to a collection of words. It is recommended to use L<Lingua::LO::Romanize> instead of this class directly (even if it is possible).
187              
188             use Lingua::LO::Romanize::Word;
189              
190             my $foo = Lingua::LO::Romanize::Word->new(word_str => 'ພາສາລາວ');
191              
192             my $bar = $foo->romanize; # $bar will hold the string 'phasalao'
193             $foo->hyphen(1); # set hyphenation between syllables
194             $bar = $foo->romanize; # $bar will hold the string 'pha-sa-lao'
195             $bar = $foo->word_str; # $bar will hold the string 'ພາສາລາວ'
196            
197             my $syllables_array_ref = $foo->all_syllables; # will return an array reference to all syllables;
198              
199             For more information, please see L<Lingua::LO::Romanize>
200              
201             =head1 METHODS
202              
203             =head2 new
204              
205             Creates a new L<Lingua::LO::Romanize::Word> object, a word_str is required.
206              
207             =head2 hyphen
208              
209             If set to 1 (TRUE), the syllables will be hyphenated when romanized is called. Default is 0 (FALSE), not hyphenated.
210              
211             =head2 romanize
212              
213             Romanize the 'word' and return the romanized string accourding to the BGN/PCGN standard.
214              
215             =head2 word_str
216              
217             Returns the word as the original string.
218              
219             =head2 all_syllables
220              
221             Returns an array reference to all L<Lingua::LO::Romanize::Syllable>
222              
223             =cut
224              
225             sub romanize {
226             my $self = shift;
227            
228             my @romanized_arr;
229             my $romanized_str;
230            
231             foreach my $syllable ($self->all_syllables) {
232             push @romanized_arr, $syllable->romanize;
233             }
234            
235             my $join_str = '';
236            
237             $join_str = '-' if $self->hyphen;
238            
239             $romanized_str = join $join_str, @romanized_arr;
240            
241             $romanized_str =~ s/^-//;
242            
243             $romanized_str =~ s/--/-/g if $self->hyphen;
244            
245             return $romanized_str;
246             }
247              
248             =head1 AUTHOR
249              
250             Joakim Lagerqvist, C<< <jokke at cpan.org> >>
251              
252             =head1 BUGS
253              
254             Please report any bugs or feature requests to C<bug-lingua-lo-romanize at rt.cpan.org>, or through
255             the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Lingua-LO-Romanize>. I will be notified, and then you'll
256             automatically be notified of progress on your bug as I make changes.
257              
258             =head1 SEE ALSO
259              
260             L<Lingua::LO::Romanize>
261              
262             =head1 COPYRIGHT & LICENSE
263              
264             Copyright 2009 Joakim Lagerqvist, all rights reserved.
265              
266             This program is free software; you can redistribute it and/or modify it
267             under the same terms as Perl itself.
268              
269              
270             =cut
271              
272             no Moose;
273             __PACKAGE__->meta->make_immutable;
274              
275             1; # End of Lingua::LO::Romanize::Word