File Coverage

blib/lib/Lingua/PT/Hyphenate.pm

Criterion	Covered	Total	%
statement	34	34	100.0
branch	6	6	100.0
condition			n/a
subroutine	6	6	100.0
pod	2	2	100.0
total	48	48	100.0

line	stmt	bran	sub	pod	time	code
1						package Lingua::PT::Hyphenate;
2
3	4		4		99658	use 5.006;
	4				17
	4				170
4	4		4		24	use strict;
	4				7
	4				174
5	4		4		28	use warnings;
	4				11
	4				2909
6
7						require Exporter;
8
9						our @ISA = qw(Exporter);
10
11						our %EXPORT_TAGS = ( 'all' => [ qw(
12						hyphenate
13						) ] );
14
15						our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
16
17						our @EXPORT = qw(
18						hyphenate
19						);
20
21						our $VERSION = '1.05';
22
23						=head1 NAME
24
25						Lingua::PT::Hyphenate - Separates Portuguese words in syllables
26
27						=head1 SYNOPSIS
28
29						use Lingua::PT::Hyphenate;
30
31						@syllables = hyphenate("teste") # @syllables now hold ('tes', 'te')
32
33						# or
34
35						$word = new Lingua::PT::Hyphenate;
36						@syllables = $word->hyphenate;
37
38						=head1 DESCRIPTION
39
40						Separates Portuguese words into syllables.
41
42						=cut
43
44						my ($vowel,$consonant,$letter,$oc_fr);
45						my ($ditongo,$ditongos,@regex);
46
47						BEGIN {
48
49	4		4		21	$vowel = qr/[aeiou��]/i;
50	4				14	$consonant = qr/[zxcvbnmsdfghjlqrtp��]/i;
51	4				21	$letter = qr/[aeiou��zxcvbnmsdfghjlqrtp��]/i;
52	4				32	$oc_fr = qr/[ctpgdbfv]/i;
53
54	4				34	my @ditongos = qw(ia ua uo ai ei oi ou ai ae au ao �i ei am$
55						ui oi �i ou �i �e �o iu eu en �e ui em$);
56
57	4				22	$ditongo = join "\|", @ditongos;
58	4				458	$ditongo = qr/$ditongo/i;
59
60	4				36	$ditongos = join "\|", map { /(.)(.*)/ ; "$1(?=$2)" } @ditongos;
	108				366
	108				313
61	4				593	$ditongos = qr/$ditongos/i;
62
63						=head1 ALGORITHM
64
65						The algorithm has several steps, but all of them consist on marking
66						points of the word that either are to be separated or that are not
67						allowed to be
68						separated.
69
70						After all those main steps are fulfilled, the marks for non-separation
71						are removed and the word is finally splitted by the other marks and
72						returned as an array.
73
74						=cut
75
76	4				5757	@regex = (
77						[ qr/[gq]u(?=$vowel)/i, '.' ],
78						[ qr/$letter(?=${consonant}s)/i, '.' ],
79						[ qr/[cln](?=h)/i, '.' ],
80						[ qr/(?<=$consonant)$oc_fr(?=[lr])/i, '.' ],
81						[ qr/^sub(?=$consonant)/i, '\|' ],
82						[ qr/(?<=$consonant)$consonant(?=$consonant)/i, '\|' ],
83						[ qr/$ditongo(?=$ditongo)/i, '\|' ],
84						[ qr/$vowel(?=$ditongo)/i, '\|' ],
85						[ qr/$ditongos/i, '.' ],
86						[ qr/$vowel(?=$vowel)/i, '\|' ],
87						[ qr/$oc_fr(?=[lr])/i, '.' ],
88						[ qr/${letter}\.?$consonant(?=${consonant}\.?$letter)/i, '\|' ],
89						[ qr/$vowel(?=${consonant}\.?$letter)/i, '\|' ],
90						);
91
92						}
93
94						=head1 METHODS
95
96						=head2 new
97
98						Creates a new Lingua::PT::Hyphenate object.
99
100						$word = Lingua::PT::Hyphenate->new("palavra");
101						# "palavra" is Portuguese for "word"
102
103						If you're doing this lots of time, it would probably be better for you
104						to use the hyphenate function directly (that is, creating a new object
105						for each word in a long text doesn't seem so bright if you're not
106						going to use it later on).
107
108						=cut
109
110						sub new {
111	181		181	1	218571	my ($self, $word) = @_;
112	181				840	bless \$word, $self;
113						}
114
115						=head2 hyphenate
116
117						Separates a Portuguese in syllables.
118
119						my @syllables = hyphenate('palavra');
120						# @syllables now hold ('pa', 'la', 'vra')
121
122						# or, if you created an object
123						my @syllables = $word->hyphenate
124
125						=cut
126
127						sub hyphenate {
128	372	100	372	1	257526	$_[0] \|\| return ();
129
130	352				488	my $word;
131	352	100			863	if (ref($_[0]) eq 'Lingua::PT::Hyphenate') {
132	181				219	my $self = shift;
133	181				313	$word = $$self;
134						}
135						else {
136	171				252	$word = shift;
137						}
138
139	352	100			3340	$word =~ /^$letter+$/ \|\| return ();
140
141	332				793	for my $regex (@regex) {
142	4316				27187	$word =~ s/$$regex[0]/${&}$$regex[1]/g;
143						}
144
145	332				935	$word =~ y/.//d;
146
147	332				1881	split '\\|', $word;
148						}
149
150						1;
151						__END__