File Coverage

blib/lib/TeX/Hyphen.pm
Criterion Covered Total %
statement 144 155 92.9
branch 50 68 73.5
condition 14 15 93.3
subroutine 13 14 92.8
pod 0 5 0.0
total 221 257 85.9


line stmt bran cond sub pod time code
1              
2             package TeX::Hyphen;
3              
4             =head1 NAME
5              
6             TeX::Hyphen -- hyphenate words using TeX's patterns
7              
8             =head1 SYNOPSIS
9              
10             use TeX::Hyphen;
11             my $hyp = new TeX::Hyphen 'file' => 'hyphen.tex',
12             'style' => 'czech', leftmin => 2,
13             rightmin => 2;
14              
15             my $hyp = new TeX::Hyphen 'file' => 'hyphen.tex',
16             'style' => 'utf8';
17              
18             # my $hyp = new TeX::Hyphen "hyphen.tex";
19              
20             my $word = "representation";
21             my @points = $hyp->hyphenate($word);
22             print $hyp->visualize($word), "\n";
23              
24             =head1 DESCRIPTION
25              
26             Constructor new() creates a new Hyphen object and loads the file with
27             patterns into memory. Then you can ask it for hyphenation of a word by
28             calling a method of this object. If no file is specified, the default
29             Donald E. Knuth's F, that is included in this module, is
30             used instead.
31              
32             =head2 Arguments to constructor
33              
34             You can pass arguments to the new() call as hash, possible options are
35              
36             =over 4
37              
38             =item file
39              
40             Name of the file with the patters. It will be loaded and the resulting
41             object will be able to hyphenate according to patterns in that file.
42              
43             For convenience and backward compatibility, the file name can also be
44             specified as the first (odd) parameter to new().
45              
46             =item style
47              
48             Various languages use special shortcuts to specify the patterns.
49             Instead of doing the full TeX expansion, we use Perl code to parse the
50             patterns. The style option loads TeX::Hyphen::name_of_the_style module
51             and uses the parsing functions found in it.
52              
53             Currently, the default czech (which also works for English alright),
54             german, and utf8 are available. See the TeX::Hyphen::czech man page
55             for more information, especially if you want to support other
56             languages/styles.
57              
58             =item leftmin
59              
60             The minimum starting substring which will not be hyphenated. This
61             overrides the default specified in the style file.
62              
63             =item rightmin
64              
65             The minimum ending substring which will not be hyphenated. This
66             overrides the default specified in the style file.
67              
68             =back
69              
70             =head2 Methods that are supported
71              
72             Method hyphenate() returns list of places where the word can be
73             divided, so
74              
75             $hyp->visualize('representation')
76              
77             returns list (3, 5, 8, 10).
78              
79             Method visualize() can be used to show these points, so
80              
81             $hyp->visualize('representation')
82            
83             should return C, at least for English patterns.
84              
85             Variables I<$TeX::Hyphen::LEFTMIN> and I<$TeX::Hyphen::RIGHTMIN> can
86             be used to restrict minimal starting and ending substring where it is
87             not possible to hyphenate. They both default to 2 but should be
88             changed to match the paratemers used to generate the patterns.
89              
90             Variable I<$TeX::Hyphen::DEBUG> can be set to see some statistics and
91             processing.
92              
93             The file with hyphenation patterns may contain C<\'> and C<\v> accents,
94             used in the Czech (and other) languages.
95              
96             =cut
97              
98 2     2   7099 use strict;
  2         4  
  2         54  
99 2     2   6 use vars qw( $VERSION $DEBUG $LEFTMIN $RIGHTMIN $errstr );
  2         2  
  2         2421  
100              
101             $VERSION = '1.17';
102 0     0 0 0 sub Version () { $VERSION; }
103              
104             $DEBUG ||= 0;
105              
106             # To protect beginning and end of the word from hyphenation
107             $LEFTMIN = 2;
108             $RIGHTMIN = 2;
109              
110             my (@DATA, $DATA_LOADED);
111              
112             # #############################################################
113             # Constructor. Parameter specifies file with patterns.
114             # File is searched for \patterns{ ... } and \hyphenation{ ... }
115             # sections and these are used.
116             #
117             sub new {
118 8     8 0 2175 my $class = shift;
119 8         11 my ($file, %opts);
120 8 100       30 if (scalar(@_) % 2) {
121 3         6 $file = shift;
122 3         9 %opts = @_;
123             } else {
124 5         11 %opts = @_;
125 5         8 $file = $opts{'file'};
126             }
127 8         24 local *FILE;
128 8 100       18 if (not defined $file) {
129 5 100       14 if (not defined $DATA_LOADED) {
130 2         3061 @DATA = ;
131 2         144 $DATA_LOADED = 1;
132             }
133             } else {
134 3 50       127 open FILE, $file or do {
135 0         0 $errstr = "Error opening file `$file': $!";
136 0         0 return;
137             };
138             }
139 8         16 my $self = {};
140 8         20 bless $self, $class;
141              
142 8         34 local ($/) = "\n";
143 8         7 my ($tag, $value);
144 8         9 my $hyphen = {};
145 8         9 my $beginhyphen = {};
146 8         9 my $endhyphen = {};
147 8         11 my $bothhyphen = {};
148 8         8 my $exception = {};
149              
150 8         9 my ($process_patterns, $process_hyphenation);
151 8         9 my ($leftmin, $rightmin) = ($LEFTMIN, $RIGHTMIN);
152 8 100       27 if (not defined $opts{'style'}) {
153 7         18 $opts{'style'} = 'czech'; # for backward compatibility
154             }
155 8 50       17 if (defined $opts{'style'}) {
156 8 100       22 if ($opts{'style'} eq 'utf8') {
157 1         6 binmode(FILE,':utf8');
158             }
159 2     2   1041 eval qq!use ${class}::$opts{'style'}!;
  2     1   3  
  2     1   32  
  1     1   8  
  1     1   1  
  1     1   14  
  1     1   5  
  1         2  
  1         10  
  1         8  
  1         1  
  1         24  
  1         7  
  1         1  
  1         12  
  1         8  
  1         1  
  1         15  
  1         513  
  1         1  
  1         17  
  8         612  
160 8 50       22 if (not $@) {
161 8         360 eval "\$process_patterns = \\&${class}::$opts{'style'}::process_patterns";
162 8         307 eval "\$process_hyphenation = \\&${class}::$opts{'style'}::process_hyphenation";
163 8         287 eval "\$leftmin = \$${class}::$opts{'style'}::LEFTMIN";
164 8         257 eval "\$rightmin = \$${class}::$opts{'style'}::RIGHTMIN";
165             } else {
166 0         0 $errstr = "Error loading style module $class::$opts{'style'}: $@";
167 0         0 return;
168             }
169             }
170 8 50       36 $leftmin = $opts{leftmin} if exists $opts{leftmin};
171 8 100       18 $rightmin = $opts{rightmin} if exists $opts{rightmin};
172              
173 8         9 my ($in_patterns, $in_hyphenation) = (0, 0);
174 8         8 my $i = 0;
175 8   100     87 while ((defined $file and defined($_ = ))
      100        
      66        
176             or (not defined $file and defined($_ = $DATA[$i++]))) {
177 22347         17960 s/\%.*$//; # comment out
178 22347         14630 chomp;
179 22347 100       19226 if ($in_patterns) {
    100          
    100          
    100          
180 22254         28996 $in_patterns = $process_patterns->($_,
181             $bothhyphen, $beginhyphen,
182             $endhyphen, $hyphen);
183             } elsif ($in_hyphenation) {
184 75         112 $in_hyphenation = $process_hyphenation->($_, $exception);
185             } elsif (/\\patterns\{/) { # find the \patterns section
186 8         34 $in_patterns = 1;
187             } elsif (/\\hyphenation\{/) {
188 5         21 $in_hyphenation = 1;
189             }
190             }
191 8 100       57 close FILE if defined $file;
192 8         64 $self->{hyphen} = $hyphen;
193 8         13 $self->{begin} = $beginhyphen;
194 8         13 $self->{end} = $endhyphen;
195 8         11 $self->{both} = $bothhyphen;
196 8         15 $self->{exception} = $exception;
197 8 0       28 print STDERR 'Statistics for ', (defined $file ? $file : 'hyphen.tex'),
    50          
198             ': all ' , scalar %$hyphen,
199             ' (', scalar keys %$hyphen,
200             '), exception ', scalar %$exception,
201             ' (', scalar keys %$exception,
202             "),\n\tbegin ", scalar %$beginhyphen,
203             ' (', scalar keys %$beginhyphen,
204             '), end ', scalar %$endhyphen,
205             ' (', scalar keys %$endhyphen,
206             '), both ', scalar %$bothhyphen,
207             ' (', scalar keys %$bothhyphen, ")\n" if $DEBUG;
208            
209 8         60 $self->{exact} = { %$exception };
210 8         17 $self->{leftmin} = $leftmin;
211 8         15 $self->{rightmin} = $rightmin;
212 8         72 $self;
213             }
214              
215             # ############################################
216             # For given word finds places for hyphenation.
217             # Returns an array specifying the places.
218             #
219             sub hyphenate {
220 1390     1390 0 5550 my ($self, $word) = (shift, shift);
221              
222 1390 50       1627 print STDERR "Hyphenate `$word'\n" if $DEBUG;
223            
224 1390         1223 my $exact = $self->{exact};
225 1390 100       1774 if (defined(my $res = $exact->{$word})) {
226 3 50       6 print STDERR "Exact match $res\n" if $DEBUG;
227 3         7 return $self->make_result_list($res);
228             }
229              
230 1387         970 my $hyphen = $self->{hyphen};
231 1387         815 my $beginhyphen = $self->{begin};
232 1387         876 my $endhyphen = $self->{end};
233 1387         837 my $bothhyphen = $self->{both};
234              
235 1387         918 my $totallength = length $word;
236 1387         1946 my @result = (0) x ($totallength + 1);
237              
238             # walk the word
239 1387         1186 my $rightstop = $totallength - $self->{rightmin};
240 1387         763 my $pos;
241 1387         1906 for ($pos = 0; $pos <= $rightstop; $pos++) {
242             # length of the rest of the word
243 4471         2998 my $restlength = $totallength - $pos;
244             # length of a substring
245 4471         2381 my $length;
246 4471         5473 for ($length = 1; $length <= $restlength; $length++) {
247 20257         15045 my $substr = substr $word, $pos, $length;
248 20257         10964 my $value;
249             my $j;
250 0         0 my $letter;
251 20257 100       27579 if (defined($value = $hyphen->{$substr})) {
252 2430         1503 $j = $pos;
253 2430 50       2571 print STDERR "$j: $substr: $value\n" if $DEBUG > 2;
254 2430         4615 while ($value =~ /(.)/gs) {
255 7052 100       10514 $result[$j] = $1 if ($1 > $result[$j]);
256 7052         9915 $j++;
257             }
258             }
259 20257 100 100     32375 if (($pos == 0) and
260             defined($value = $beginhyphen->{$substr})) {
261 236         168 $j = 0;
262 236 50       317 print STDERR "$j: .$substr: $value\n" if $DEBUG > 2;
263 236         494 while ($value =~ /(.)/gs) {
264 815 100       1298 $result[$j] = $1 if ($1 > $result[$j]);
265 815         1200 $j++;
266             }
267             }
268 20257 100 100     47767 if (($restlength == $length) and
269             defined($value = $endhyphen->{$substr})) {
270 222         155 $j = $pos;
271 222 50       270 print STDERR "$j: $substr.: $value\n" if $DEBUG > 2;
272 222         435 while ($value =~ /(.)/gs) {
273 477 100       750 $result[$j] = $1 if ($1 > $result[$j]);
274 477         1027 $j++;
275             }
276             }
277             }
278             }
279 1387         838 my $value;
280             my $letter;
281 1387 50       1644 if (defined($value = $bothhyphen->{$word})) {
282 0         0 my $j = 0;
283 0 0       0 print STDERR "$j: .$word.: $value\n" if $DEBUG > 2;
284 0         0 while ($value =~ /(.)/gs) {
285 0 0       0 $result[$j] = $1 if ($1 > $result[$j]);
286 0         0 $j++;
287             }
288             }
289              
290 1387         2192 my $result = join '', @result;
291             ### substr($result, 0, $self->{leftmin} + 1) = '0' x ($self->{leftmin} + 1);
292 1387         1571 substr($result, 0, $self->{leftmin}) = '0' x $self->{leftmin};
293 1387         1238 substr($result, -$self->{rightmin}) = '0' x $self->{rightmin};
294              
295 1387 50       1625 print STDERR "Result: $result\n" if $DEBUG;
296 1387         1598 return $self->make_result_list($result);
297             }
298              
299             # ####################
300             #
301             #
302             sub make_result_list {
303 1390     1390 0 1210 my ($self, $result) = @_;
304 1390         1094 my @result = ();
305 1390         908 my $i = 0;
306 1390         2759 while ($result =~ /(.)/g) {
307 7280 100       9488 push @result, $i if (int($1) % 2);
308 7280         10022 $i++;
309             }
310 1390         2832 @result;
311             }
312              
313             # #########################################
314             # For a word show the result of hyphenation
315             #
316             sub visualize {
317 17     17 0 728 my ($self, $word) = (shift, shift);
318 17         14 my $number = 0;
319 17         13 my $pos;
320 17         25 for $pos ($self->hyphenate($word)) {
321 29         33 substr($word, $pos + $number, 0) = "-";
322 29         27 $number++;
323             }
324 17         39 $word;
325             }
326              
327             =head1 VERSION
328              
329             1.17
330              
331             =head1 AVAILABLE FROM
332              
333             http://www.adelton.com/perl/TeX-Hyphen/
334              
335             =head1 AUTHOR
336              
337             (c) 1997--2016 Jan Pazdziora.
338              
339             All rights reserved. This package is free software; you can
340             redistribute it and/or modify it under the same terms as Perl itself.
341              
342             Contact the author at jpx dash perl at adelton dot com.
343              
344             =head1 SEE ALSO
345              
346             perl(1), TeX::Hyphen::czech.
347              
348             =cut
349              
350             1;
351              
352             __DATA__