|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Lingua::PT::Hyphenate;
  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
3
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
179885
 | 
 use 5.006;
  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
4
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
19
 | 
 use strict;
  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
87
 | 
    | 
| 
5
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
18
 | 
 use warnings;
  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1778
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 require Exporter;
  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @ISA = qw(Exporter);
  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our %EXPORT_TAGS = ( 'all' => [ qw(
  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	hyphenate
  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ) ] );
  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @EXPORT = qw(
  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	hyphenate
  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );
  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '1.06';
  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME
  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Lingua::PT::Hyphenate - Separates Portuguese words in syllables
  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS
  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Lingua::PT::Hyphenate;
  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   @syllables = hyphenate("teste")   # @syllables now hold ('tes', 'te')
  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # or
  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $word = new Lingua::PT::Hyphenate;
  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   @syllables = $word->hyphenate;
  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION
  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Separates Portuguese words into syllables.
  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut
  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my ($vowel,$consonant,$letter,$oc_fr);
  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my ($ditongo,$ditongos,@regex);
  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BEGIN {
  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
49
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
26
 | 
   $vowel     = qr/[aeiouãâáàêéíóõôúÃÁÉÍÓÕÔÊÂÚ]/i;
  | 
| 
50
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
   $consonant = qr/[zxcvbnmsdfghjlqrtpçÇ]/i;
  | 
| 
51
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
   $letter    = qr/[aeiouãâáàêéíóõôúÃÁÂÉÊÍÓÕÔÚzxcvbnmsdfghjlqrtpçÇ]/i;
  | 
| 
52
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
   $oc_fr     = qr/[ctpgdbfv]/i;
  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
54
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
   my @ditongos = qw(ia ua uo ai ei oi ou ai ae au ao éi ei am$
  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ui oi ói ou ãi ãe ão iu eu en õe ui em$);
  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
57
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
   $ditongo = join "|", @ditongos;
  | 
| 
58
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
279
 | 
   $ditongo = qr/$ditongo/i;
  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
60
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
   $ditongos = join "|", map { /(.)(.*)/ ; "$1(?=$2)" } @ditongos;
  | 
| 
 
 | 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
176
 | 
    | 
| 
 
 | 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
216
 | 
    | 
| 
61
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
269
 | 
   $ditongos = qr/$ditongos/i;
  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 ALGORITHM
  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The algorithm has several steps, but all of them consist on marking
  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 points of the word that either are to be separated or that are not
  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 allowed to be
  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 separated.
  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 After all those main steps are fulfilled, the marks for non-separation
  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are removed and the word is finally splitted by the other marks and
  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 returned as an array.
  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut
  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
76
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2377
 | 
   @regex = (
  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/[gq]u(?=$vowel)/i,                                  '.' ],
  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/$letter(?=${consonant}s)/i,                         '.' ],
  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/[cln](?=h)/i,                                       '.' ],
  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/(?<=$consonant)$oc_fr(?=[lr])/i,                    '.' ],
  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/^sub(?=$consonant)/i,                               '|' ],
  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/(?<=$consonant)$consonant(?=$consonant)/i,          '|' ],
  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/$ditongo(?=$ditongo)/i,                             '|' ],
  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/$vowel(?=$ditongo)/i,                               '|' ],
  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/$ditongos/i,                                        '.' ],
  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/$vowel(?=$vowel)/i,                                 '|' ],
  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/$oc_fr(?=[lr])/i,                                   '.' ],
  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/${letter}\.?$consonant(?=${consonant}\.?$letter)/i, '|' ],
  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     [ qr/$vowel(?=${consonant}\.?$letter)/i,                 '|' ],
  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );
  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }
  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS
  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new
  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Creates a new C object.
  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $word = Lingua::PT::Hyphenate->new("palavra");
  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # "palavra" is Portuguese for "word"
  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you're doing this lots of time, it would probably be better for you
  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to use the C function directly (that is, creating a new object
  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for each word in a long text doesn't seem so bright if you're not
  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 going to use it later on).
  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut
  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {
  | 
| 
111
 | 
181
 | 
 
 | 
 
 | 
  
181
  
 | 
  
1
  
 | 
204336
 | 
   my ($self, $word) = @_;
  | 
| 
112
 | 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
403
 | 
   bless \$word, $self;
  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }
  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 hyphenate
  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Separates a Portuguese in syllables.
  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @syllables = hyphenate('palavra');
  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # @syllables now hold ('pa', 'la', 'vra')
  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # or, if you created an object
  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @syllables = $word->hyphenate
  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut
  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub hyphenate {
  | 
| 
128
 | 
372
 | 
  
100
  
 | 
 
 | 
  
372
  
 | 
  
1
  
 | 
213121
 | 
   $_[0] || return ();
  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
130
 | 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
420
 | 
   my $word;
  | 
| 
131
 | 
352
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
739
 | 
   if (ref($_[0]) eq 'Lingua::PT::Hyphenate') {
  | 
| 
132
 | 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
242
 | 
     my $self = shift;
  | 
| 
133
 | 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
289
 | 
     $word = $$self;
  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }
  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else {
  | 
| 
136
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
596
 | 
     $word = shift;
  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }
  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
139
 | 
352
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2353
 | 
   $word =~ /^$letter+$/ || return ();
  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
141
 | 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
589
 | 
   for my $regex (@regex) {
  | 
| 
142
 | 
4316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17896
 | 
     $word =~ s/$$regex[0]/${&}$$regex[1]/g;
  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }
  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
145
 | 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
685
 | 
   $word =~ y/.//d;
  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
147
 | 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1281
 | 
   split '\|', $word;
  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }
  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;
  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__
  |