line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*- cperl; cperl-indent-level: 4 -*- |
2
|
|
|
|
|
|
|
# Copyright (C) 2009-2021, Roland van Ipenburg |
3
|
|
|
|
|
|
|
package HTML::Hyphenate v1.1.8; |
4
|
11
|
|
|
11
|
|
1078103
|
use Moose; |
|
11
|
|
|
|
|
5252803
|
|
|
11
|
|
|
|
|
93
|
|
5
|
11
|
|
|
11
|
|
82646
|
use utf8; |
|
11
|
|
|
|
|
27
|
|
|
11
|
|
|
|
|
86
|
|
6
|
11
|
|
|
11
|
|
591
|
use 5.016000; |
|
11
|
|
|
|
|
39
|
|
7
|
|
|
|
|
|
|
|
8
|
11
|
|
|
11
|
|
9767
|
use charnames (); |
|
11
|
|
|
|
|
350768
|
|
|
11
|
|
|
|
|
343
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#use Log::Log4perl qw(:resurrect :easy get_logger); |
11
|
11
|
|
|
11
|
|
5508
|
use Set::Scalar; |
|
11
|
|
|
|
|
114937
|
|
|
11
|
|
|
|
|
562
|
|
12
|
11
|
|
|
11
|
|
8640
|
use TeX::Hyphen; |
|
11
|
|
|
|
|
20125
|
|
|
11
|
|
|
|
|
422
|
|
13
|
11
|
|
|
11
|
|
6525
|
use TeX::Hyphen::Pattern 0.100; |
|
11
|
|
|
|
|
1400608
|
|
|
11
|
|
|
|
|
481
|
|
14
|
11
|
|
|
11
|
|
6737
|
use HTML::Hyphenate::DOM; |
|
11
|
|
|
|
|
50
|
|
|
11
|
|
|
|
|
406
|
|
15
|
|
|
|
|
|
|
|
16
|
11
|
|
|
11
|
|
140
|
use Readonly; |
|
11
|
|
|
|
|
40
|
|
|
11
|
|
|
|
|
26092
|
|
17
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUnexportedSubs) |
18
|
|
|
|
|
|
|
Readonly::Scalar my $EMPTY => q{}; |
19
|
|
|
|
|
|
|
Readonly::Scalar my $DOT => q{.}; |
20
|
|
|
|
|
|
|
Readonly::Scalar my $SOFT_HYPHEN => charnames::string_vianame(q{SOFT HYPHEN}); |
21
|
|
|
|
|
|
|
Readonly::Scalar my $CLASS_JOINER => q{, .}; # for CSS classnames |
22
|
|
|
|
|
|
|
Readonly::Scalar my $ONE_LEVEL_UP => -1; |
23
|
|
|
|
|
|
|
Readonly::Scalar my $DOCTYPE => q{<!DOCTYPE html>}; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
Readonly::Hash my %DEFAULT => ( |
26
|
|
|
|
|
|
|
'MIN_LENGTH' => 10, |
27
|
|
|
|
|
|
|
'MIN_PRE' => 2, |
28
|
|
|
|
|
|
|
'MIN_POST' => 2, |
29
|
|
|
|
|
|
|
'LANG' => q{en_us}, |
30
|
|
|
|
|
|
|
'INCLUDED' => 1, |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# HTML %Text attributes <http://www.w3.org/TR/REC-html40/index/attributes.html> |
34
|
|
|
|
|
|
|
# HTML5 text attributes <https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes> |
35
|
|
|
|
|
|
|
my $text_attr = |
36
|
|
|
|
|
|
|
Set::Scalar->new(qw/abbr alt label list placeholder standby summary title/); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUnexportedSubs) |
39
|
|
|
|
|
|
|
Readonly::Hash my %LOG => ( |
40
|
|
|
|
|
|
|
'TRAVERSE' => q{Traversing HTML element '%s'}, |
41
|
|
|
|
|
|
|
'LANGUAGE_SET' => q{Language changed to '%s'}, |
42
|
|
|
|
|
|
|
'PATTERN_FILE' => q{Using pattern file '%s'}, |
43
|
|
|
|
|
|
|
'TEXT_NODE' => q{Text node value '%s'}, |
44
|
|
|
|
|
|
|
'HYPHEN_TEXT' => q{Hyphenating text '%s'}, |
45
|
|
|
|
|
|
|
'HYPHEN_WORD' => q{Hyphenating word '%s' to '%s'}, |
46
|
|
|
|
|
|
|
'LOOKING_UP' => q{Looking up for %d class(es)}, |
47
|
|
|
|
|
|
|
'HTML_METHOD' => q{Using HTML passed to method '%s'}, |
48
|
|
|
|
|
|
|
'HTML_PROPERTY' => q{Using HTML property '%s'}, |
49
|
|
|
|
|
|
|
'NOT_HYPHEN' => q{No pattern found for '%s'}, |
50
|
|
|
|
|
|
|
'REGISTER' => q{Registering TeX::Hyphen object for label '%s'}, |
51
|
|
|
|
|
|
|
'NO_CLASSES' => q{No classes defined, so not check for them}, |
52
|
|
|
|
|
|
|
); |
53
|
|
|
|
|
|
|
## use critic |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## no critic qw(ProhibitCommentedOutCode) |
56
|
|
|
|
|
|
|
###l4p Log::Log4perl->easy_init( { 'level' => $DEBUG, 'utf8' => 1 } ); |
57
|
|
|
|
|
|
|
###l4p my $log = get_logger(); |
58
|
|
|
|
|
|
|
## use critic |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
## no critic qw(ProhibitHashBarewords ProhibitCallsToUnexportedSubs ProhibitCallsToUndeclaredSubs) |
61
|
|
|
|
|
|
|
has html => ( is => 'rw', isa => 'Str' ); |
62
|
|
|
|
|
|
|
after 'html' => sub { |
63
|
|
|
|
|
|
|
my ( $self, $html ) = @_; |
64
|
|
|
|
|
|
|
if ( defined $html ) { |
65
|
|
|
|
|
|
|
## no critic qw(ProhibitUnusedCapture) |
66
|
|
|
|
|
|
|
if ( $self->html =~ m{^(?<doctype>\s*\Q$DOCTYPE\E)(?<html>.*)}gismx ) { |
67
|
|
|
|
|
|
|
## use critic |
68
|
|
|
|
|
|
|
$self->html( ${+}{html} ); |
69
|
|
|
|
|
|
|
$self->_doctype( ${+}{doctype} ); |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
else { |
72
|
|
|
|
|
|
|
$self->_doctype($EMPTY); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
}; |
76
|
|
|
|
|
|
|
has style => ( is => 'rw', isa => 'Str' ); |
77
|
|
|
|
|
|
|
has min_length => |
78
|
|
|
|
|
|
|
( is => 'rw', isa => 'Int', default => $DEFAULT{'MIN_LENGTH'} ); |
79
|
|
|
|
|
|
|
has min_pre => ( is => 'rw', isa => 'Int', default => $DEFAULT{'MIN_PRE'} ); |
80
|
|
|
|
|
|
|
has min_post => ( is => 'rw', isa => 'Int', default => $DEFAULT{'MIN_POST'} ); |
81
|
|
|
|
|
|
|
has default_lang => ( is => 'rw', isa => 'Str', default => $DEFAULT{'LANG'} ); |
82
|
|
|
|
|
|
|
has default_included => |
83
|
|
|
|
|
|
|
( is => 'rw', isa => 'Int', default => $DEFAULT{'INCLUDED'} ); |
84
|
|
|
|
|
|
|
has classes_included => |
85
|
|
|
|
|
|
|
( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); |
86
|
|
|
|
|
|
|
has classes_excluded => |
87
|
|
|
|
|
|
|
( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); |
88
|
|
|
|
|
|
|
after 'classes_included' => sub { |
89
|
|
|
|
|
|
|
my ( $self, $ar ) = @_; |
90
|
|
|
|
|
|
|
if ( defined $ar ) { |
91
|
|
|
|
|
|
|
$self->_classes( |
92
|
|
|
|
|
|
|
( scalar $self->classes_excluded + scalar $self->classes_included ) |
93
|
|
|
|
|
|
|
> 0 ); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
}; |
97
|
|
|
|
|
|
|
after 'classes_excluded' => sub { |
98
|
|
|
|
|
|
|
my ( $self, $ar ) = @_; |
99
|
|
|
|
|
|
|
if ( defined $ar ) { |
100
|
|
|
|
|
|
|
$self->_classes( |
101
|
|
|
|
|
|
|
( scalar $self->classes_excluded + scalar $self->classes_included ) |
102
|
|
|
|
|
|
|
> 0 ); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
}; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
has _hyphenators => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); |
107
|
|
|
|
|
|
|
has _lang => ( is => 'rw', isa => 'Str' ); |
108
|
|
|
|
|
|
|
has _doctype => ( is => 'rw', isa => 'Str' ); |
109
|
|
|
|
|
|
|
has _dom => ( is => 'rw', isa => 'HTML::Hyphenate::DOM' ); |
110
|
|
|
|
|
|
|
has _scope_is_root => ( is => 'rw', isa => 'Bool', default => sub { 0 } ); |
111
|
|
|
|
|
|
|
has _classes => ( is => 'rw', isa => 'Bool', default => sub { 0 } ); |
112
|
|
|
|
|
|
|
## use critic |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUnexportedSubs) |
115
|
|
|
|
|
|
|
Readonly::Scalar my $LANG => q{lang}; |
116
|
|
|
|
|
|
|
Readonly::Scalar my $HTML => q{html}; |
117
|
|
|
|
|
|
|
Readonly::Scalar my $TEXT => q{text}; |
118
|
|
|
|
|
|
|
Readonly::Scalar my $TAG => q{tag}; |
119
|
|
|
|
|
|
|
Readonly::Scalar my $RAW => q{raw}; |
120
|
|
|
|
|
|
|
Readonly::Scalar my $PRE => q{pre}; |
121
|
|
|
|
|
|
|
Readonly::Scalar my $CLASS => q{class}; |
122
|
|
|
|
|
|
|
## no critic qw(RequireDotMatchAnything RequireExtendedFormatting RequireLineBoundaryMatching) |
123
|
|
|
|
|
|
|
Readonly::Scalar my $NONSPACE => qr{\S+}; |
124
|
|
|
|
|
|
|
## use critic |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub hyphenated { |
127
|
133
|
|
|
133
|
1
|
110810
|
my ( $self, $html ) = @_; |
128
|
133
|
100
|
|
|
|
427
|
if ( defined $html ) { |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HTML_METHOD'}, $html ); |
131
|
132
|
|
|
|
|
746
|
$self->html($html); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
else { |
134
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HTML_PROPERTY'}, $self->html ); |
135
|
|
|
|
|
|
|
} |
136
|
133
|
|
|
|
|
814
|
$self->_reset_dom; |
137
|
133
|
|
|
|
|
3296
|
$self->_dom->parse( $self->html ); |
138
|
133
|
|
|
|
|
39308
|
$self->_traverse_dom( $self->_dom->root ); |
139
|
133
|
|
|
|
|
507
|
return $self->_clean_html(); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub register_tex_hyphen { |
143
|
5
|
|
|
5
|
1
|
398596
|
my ( $self, $label, $tex ) = @_; |
144
|
5
|
100
|
100
|
|
|
109
|
if ( |
|
|
|
100
|
|
|
|
|
145
|
|
|
|
|
|
|
defined $label |
146
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUndeclaredSubs) |
147
|
|
|
|
|
|
|
&& blessed $tex |
148
|
|
|
|
|
|
|
## use critic |
149
|
|
|
|
|
|
|
&& $tex->isa('TeX::Hyphen') |
150
|
|
|
|
|
|
|
) |
151
|
|
|
|
|
|
|
{ |
152
|
2
|
|
|
|
|
129
|
my $cache = $self->_hyphenators; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'REGISTER'}, $label ); |
155
|
2
|
|
|
|
|
7
|
${$cache}{$label} = $tex; |
|
2
|
|
|
|
|
8
|
|
156
|
2
|
|
|
|
|
55
|
$self->_hyphenators($cache); |
157
|
|
|
|
|
|
|
} |
158
|
5
|
|
|
|
|
26
|
return; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub _traverse_dom { |
162
|
600
|
|
|
600
|
|
4252
|
my ( $self, $node ) = @_; |
163
|
600
|
100
|
|
|
|
1488
|
if ( $self->_hyphenable($node) ) { |
164
|
543
|
|
|
|
|
1624
|
my $type = $node->type; |
165
|
543
|
100
|
100
|
|
|
6429
|
if ( $TAG eq $type ) { |
|
|
100
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'TRAVERSE'}, $node->tag ); |
168
|
189
|
|
|
|
|
593
|
$self->_configure_lang($node); |
169
|
189
|
|
|
|
|
401
|
while ( my ( $k, $v ) = each %{ $node->attr } ) { |
|
329
|
|
|
|
|
3813
|
|
170
|
140
|
100
|
100
|
|
|
4581
|
if ( $text_attr->has($k) |
171
|
|
|
|
|
|
|
&& length $v >= $self->min_length ) |
172
|
|
|
|
|
|
|
{ |
173
|
10
|
|
|
|
|
29
|
$node->attr( $k, $self->_hyphen($v) ); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
elsif ( $TEXT eq $type || $RAW eq $type ) { |
178
|
229
|
|
|
|
|
739
|
my $string = $node->to_string; |
179
|
|
|
|
|
|
|
###l4p $log->trace( sprintf $LOG{'TEXT_NODE'}, $string ); |
180
|
229
|
100
|
100
|
|
|
12822
|
if ( |
181
|
|
|
|
|
|
|
length $string >= $self->min_length |
182
|
|
|
|
|
|
|
## no critic qw(RequireDotMatchAnything RequireLineBoundaryMatching) |
183
|
|
|
|
|
|
|
&& $string =~ m{$NONSPACE}x |
184
|
|
|
|
|
|
|
) |
185
|
|
|
|
|
|
|
## use critic |
186
|
|
|
|
|
|
|
{ |
187
|
148
|
|
|
|
|
616
|
$self->_configure_lang($node); |
188
|
148
|
|
|
|
|
537
|
my $hyphened = $self->_hyphen($string); |
189
|
148
|
|
|
|
|
927
|
$node->replace($hyphened); |
190
|
|
|
|
|
|
|
} |
191
|
229
|
|
|
|
|
29940
|
return; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
371
|
|
|
|
|
3893
|
for my $child ( $node->child_nodes->each ) { |
195
|
467
|
|
|
|
|
30685
|
$self->_traverse_dom($child); |
196
|
|
|
|
|
|
|
} |
197
|
371
|
|
|
|
|
2431
|
return; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub _clean_html { |
201
|
133
|
|
|
133
|
|
303
|
my ($self) = @_; |
202
|
133
|
|
|
|
|
4486
|
my $html = $self->_dom->to_string(); |
203
|
133
|
|
|
|
|
14158
|
$self->_reset_dom; |
204
|
133
|
100
|
|
|
|
3788
|
if ( $EMPTY ne $self->_doctype ) { |
205
|
3
|
|
|
|
|
64
|
$html = $self->_doctype . $html; |
206
|
|
|
|
|
|
|
} |
207
|
133
|
|
|
|
|
567
|
return $html; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub _hyphen { |
211
|
158
|
|
|
158
|
|
392
|
my ( $self, $text ) = @_; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HYPHEN_TEXT'}, $text ); |
214
|
158
|
|
|
|
|
341
|
$text =~ s/(\w{@{[$self->min_length]},})/$self->_hyphen_word($1)/xsmeg; |
|
180
|
|
|
|
|
652
|
|
|
158
|
|
|
|
|
3986
|
|
215
|
158
|
|
|
|
|
560
|
return $text; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub _hyphen_word { |
219
|
180
|
|
|
180
|
|
616
|
my ( $self, $word ) = @_; |
220
|
180
|
100
|
|
|
|
4840
|
if ( defined $self->_hyphenators->{ $self->_lang } ) { |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HYPHEN_WORD'}, |
223
|
|
|
|
|
|
|
###l4p $word, $self->_hyphenators->{ $self->_lang }->visualize($word) ); |
224
|
178
|
|
|
|
|
333
|
my $number = 0; |
225
|
178
|
|
|
|
|
4707
|
foreach |
226
|
|
|
|
|
|
|
my $pos ( $self->_hyphenators->{ $self->_lang }->hyphenate($word) ) |
227
|
|
|
|
|
|
|
{ |
228
|
1189
|
|
|
|
|
415570
|
substr $word, $pos + $number, 0, $SOFT_HYPHEN; |
229
|
1189
|
|
|
|
|
1810
|
$number += length $SOFT_HYPHEN; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
else { |
233
|
|
|
|
|
|
|
###l4p $log->warn( sprintf $LOG{'NOT_HYPHEN'}, $self->_lang ); |
234
|
|
|
|
|
|
|
} |
235
|
180
|
|
|
|
|
11240
|
return $word; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
## no critic qw(RequireArgUnpacking) |
239
|
|
|
|
|
|
|
sub __lang_attr { |
240
|
674
|
100
|
|
674
|
|
11546
|
if ( $_[0] ) { |
241
|
573
|
|
100
|
|
|
3232
|
return $_[0]->attr($LANG) || $_[0]->attr(qq{xml:$LANG}); |
242
|
|
|
|
|
|
|
## use critic |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
else { |
245
|
101
|
|
|
|
|
220
|
return; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
sub _configure_lang { |
250
|
337
|
|
|
337
|
|
706
|
my ( $self, $element ) = @_; |
251
|
337
|
|
|
|
|
806
|
my $lang = __lang_attr($element); |
252
|
337
|
100
|
|
|
|
9990
|
if ( defined $lang ) { |
253
|
104
|
|
|
|
|
308
|
$self->_scope_is_root( $HTML eq $element->tag ); |
254
|
|
|
|
|
|
|
} |
255
|
337
|
100
|
|
|
|
804
|
if ( !defined $lang ) { |
256
|
233
|
|
|
|
|
562
|
$lang = __lang_attr( $element->parent ); |
257
|
233
|
100
|
|
|
|
5764
|
if ( defined $lang ) { |
258
|
113
|
|
|
|
|
326
|
$self->_scope_is_root( $HTML eq $element->parent->tag ); |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
} |
261
|
337
|
100
|
|
|
|
1224
|
if ( !defined $lang ) { |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# If the scope was already set by the root element we don't have to |
264
|
|
|
|
|
|
|
# check if it has gone out of scope because we never leave the root |
265
|
|
|
|
|
|
|
# scope: |
266
|
120
|
100
|
|
|
|
3713
|
if ( !$self->_scope_is_root ) { |
267
|
104
|
|
|
|
|
436
|
my $recent = $element->ancestors(qq{[$LANG]})->first(); |
268
|
104
|
|
100
|
|
|
26947
|
$self->_scope_is_root( $recent && $HTML eq $recent->tag ); |
269
|
104
|
|
|
|
|
233
|
$lang = __lang_attr($recent); |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
else { |
272
|
16
|
|
|
|
|
315
|
$lang = $self->_lang; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
337
|
100
|
|
|
|
904
|
if ( !defined $lang ) { |
276
|
101
|
|
|
|
|
2766
|
$lang = $self->default_lang; |
277
|
|
|
|
|
|
|
} |
278
|
337
|
100
|
100
|
|
|
8468
|
if ( !defined $self->_lang || $lang ne $self->_lang ) { |
279
|
103
|
|
|
|
|
2433
|
$self->_lang($lang); |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'LANGUAGE_SET'}, $lang ); |
282
|
103
|
100
|
|
|
|
2585
|
if ( !exists $self->_hyphenators->{$lang} ) { |
283
|
76
|
|
|
|
|
253
|
$self->_add_tex_hyphen_to_cache(); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
} |
286
|
337
|
|
|
|
|
32900
|
return; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub _add_tex_hyphen_to_cache { |
290
|
76
|
|
|
76
|
|
173
|
my ($self) = @_; |
291
|
76
|
|
|
|
|
762
|
my $thp = TeX::Hyphen::Pattern->new(); |
292
|
76
|
|
|
|
|
125026
|
$thp->label( $self->_lang ); |
293
|
76
|
|
|
|
|
3264
|
my $cache = $self->_hyphenators; |
294
|
76
|
100
|
|
|
|
456
|
if ( my $file = $thp->filename ) { |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'PATTERN_FILE'}, $file ); |
297
|
74
|
|
|
|
|
24848595
|
${$cache}{ $self->_lang } = TeX::Hyphen->new( |
|
74
|
|
|
|
|
16703500
|
|
298
|
|
|
|
|
|
|
q{file} => $file, |
299
|
|
|
|
|
|
|
q{leftmin} => $self->min_pre, |
300
|
|
|
|
|
|
|
q{rightmin} => $self->min_post, |
301
|
|
|
|
|
|
|
); |
302
|
74
|
|
|
|
|
2244
|
$self->_hyphenators($cache); |
303
|
|
|
|
|
|
|
} |
304
|
76
|
|
|
|
|
611980
|
return; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _hyphenable_by_class { |
308
|
118
|
|
|
118
|
|
247
|
my ( $self, $node ) = @_; |
309
|
118
|
|
|
|
|
183
|
my $included_level = $ONE_LEVEL_UP; |
310
|
118
|
|
|
|
|
181
|
my $excluded_level = $ONE_LEVEL_UP; |
311
|
118
|
100
|
|
|
|
3275
|
$self->default_included && $excluded_level--; |
312
|
118
|
100
|
|
|
|
3139
|
$self->default_included || $included_level--; |
313
|
|
|
|
|
|
|
|
314
|
118
|
|
|
|
|
427
|
$included_level = |
315
|
|
|
|
|
|
|
$self->_get_nearest_ancestor_level_by_classname( $node, |
316
|
|
|
|
|
|
|
$self->classes_included, $included_level ); |
317
|
118
|
|
|
|
|
7792
|
$excluded_level = |
318
|
|
|
|
|
|
|
$self->_get_nearest_ancestor_level_by_classname( $node, |
319
|
|
|
|
|
|
|
$self->classes_excluded, $excluded_level ); |
320
|
118
|
100
|
|
|
|
7413
|
if ( $included_level == $excluded_level ) { |
321
|
4
|
|
|
|
|
150
|
return $self->default_included; |
322
|
|
|
|
|
|
|
} |
323
|
114
|
|
|
|
|
629
|
return !( $excluded_level > $included_level ); |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub __parent_is_pre { |
327
|
600
|
|
|
600
|
|
1024
|
my ($node) = @_; |
328
|
600
|
|
|
|
|
1494
|
my $parent = $node->parent; |
329
|
600
|
|
100
|
|
|
26441
|
return defined $parent |
330
|
|
|
|
|
|
|
&& ( ( $parent->tag || $EMPTY ) eq $PRE ); |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub _hyphenable { |
334
|
600
|
|
|
600
|
|
1044
|
my ( $self, $node ) = @_; |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
###l4p $self->_classes || $log->debug( $LOG{'NO_CLASSES'} ); |
337
|
600
|
|
100
|
|
|
1244
|
return !( __parent_is_pre($node) |
338
|
|
|
|
|
|
|
|| ( $self->_classes && !$self->_hyphenable_by_class($node) ) ); |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub _get_nearest_ancestor_level_by_classname { |
342
|
236
|
|
|
236
|
|
1763
|
my ( $self, $node, $ar_classnames, $level ) = @_; |
343
|
236
|
|
|
|
|
356
|
my $classnames = Set::Scalar->new( @{$ar_classnames} ); |
|
236
|
|
|
|
|
816
|
|
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'LOOKING_UP'}, $classnames->size ); |
346
|
236
|
100
|
100
|
|
|
17641
|
if ( !$classnames->is_empty |
347
|
|
|
|
|
|
|
&& ( $node->ancestors->size ) ) |
348
|
|
|
|
|
|
|
{ |
349
|
144
|
|
|
|
|
14511
|
my $selector = $DOT . join $CLASS_JOINER, $classnames->members; |
350
|
144
|
|
|
|
|
1270
|
my $nearest = $node->ancestors($selector)->first; |
351
|
144
|
100
|
|
|
|
51952
|
if ($nearest) { |
352
|
64
|
|
|
|
|
398
|
return $nearest->ancestors->size; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
} |
355
|
172
|
|
|
|
|
4587
|
return $level; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub _reset_dom { |
359
|
266
|
|
|
266
|
|
613
|
my ($self) = @_; |
360
|
266
|
|
|
|
|
972
|
my $dom = HTML::Hyphenate::DOM->new(); |
361
|
266
|
|
|
|
|
10277
|
$self->_dom($dom); |
362
|
266
|
|
|
|
|
439
|
return; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
1; |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
__END__ |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=encoding utf8 |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=for stopwords Ipenburg Readonly merchantability Mojolicious Bitbucket |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 NAME |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
HTML::Hyphenate - insert soft hyphens into HTML |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head1 VERSION |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
This document describes HTML::Hyphenate version C<v1.1.8>. |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head1 SYNOPSIS |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
use HTML::Hyphenate; |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
$hyphenator = new HTML::Hyphenate(); |
386
|
|
|
|
|
|
|
$html_with_soft_hyphens = $hyphenator->hyphenated($html); |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
$hyphenator->html($html); |
389
|
|
|
|
|
|
|
$hyphenator->style($style); # czech or german |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
$hyphenator->min_length(10); |
392
|
|
|
|
|
|
|
$hyphenator->min_pre(2); |
393
|
|
|
|
|
|
|
$hyphenator->min_post(2); |
394
|
|
|
|
|
|
|
$hyphenator->default_lang('en-us'); |
395
|
|
|
|
|
|
|
$hyphenator->default_included(1); |
396
|
|
|
|
|
|
|
$hyphenator->classes_included(['shy']); |
397
|
|
|
|
|
|
|
$hyphenator->classes_excluded(['noshy']); |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=head1 DESCRIPTION |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Most HTML rendering engines used in web browsers don't figure out by |
402
|
|
|
|
|
|
|
themselves how to hyphenate words when needed, but we can tell them how they |
403
|
|
|
|
|
|
|
might do it by inserting soft hyphens into the words. |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=over 4 |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item HTML::Hyphenate-E<gt>new() |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Constructs a new HTML::Hyphenate object. |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item $hyphenator-E<gt>hyphenated() |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Returns the HTML including the soft hyphens. |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item $hyphenator->html(); |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Gets or sets the HTML to hyphenate. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item $hyphenator->style(); |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Gets or sets the style to use for pattern usages in |
424
|
|
|
|
|
|
|
L<TeX::Hyphen|TeX::Hyphen>. Can be C<czech> or C<german>. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item $hyphenator->min_length(); |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Gets or sets the minimum word length required for having soft hyphens |
429
|
|
|
|
|
|
|
inserted. Defaults to 10 characters. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item $hyphenator->min_pre(2); |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Gets or sets the minimum amount of characters in a word preserved before the |
434
|
|
|
|
|
|
|
first soft hyphen. Defaults to 2 characters. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=item $hyphenator->min_post(2); |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Gets or sets the minimum amount of characters in a word preserved after the |
439
|
|
|
|
|
|
|
last soft hyphen. Defaults to 2 characters. |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=item $hyphenator->default_lang('en-us'); |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
Gets or sets the default pattern to use when no language can be derived from |
444
|
|
|
|
|
|
|
the HTML. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=item $hyphenator->default_included(); |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
Gets or sets if soft hyphens should be included in the whole tree by default. |
449
|
|
|
|
|
|
|
This can be used to insert soft hyphens only in parts of the HTML having |
450
|
|
|
|
|
|
|
specific class names. |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=item $hyphenator->classes_included(); |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Gets or sets a reference to an array of class names that will have soft |
455
|
|
|
|
|
|
|
hyphens inserted. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=item $hyphenator->classes_excluded(); |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Gets or sets a reference to an array of class names that will not have soft |
460
|
|
|
|
|
|
|
hyphens inserted. |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=item $hyphenator->register_tex_hyphen(C<lang>, C<TeX::Hyphen>) |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
Registers a TeX::Hyphen object to handle the language defined by C<lang>. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=back |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
The output is generated by L<Mojo::DOM|Mojo::DOM> so the environment variable |
471
|
|
|
|
|
|
|
C<MOJO_DOM_CSS_DEBUG> can be set to debug it's CSS selection process. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=over 4 |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item * Perl 5.16 |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=item * L<Moose|Moose> |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=item * L<Mojolicious|Mojolicious> for L<Mojo::Dom|Mojo::Dom> |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=item * L<Readonly|Readonly> |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=item * L<Set::Scalar|Set::Scalar> |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=item * L<TeX::Hyphen|TeX::Hyphen> |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=item * L<TeX::Hyphen::Pattern|TeX::Hyphen::Pattern> |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=back |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 INCOMPATIBILITIES |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
This module has the same limits as TeX::Hyphen, TeX::Hyphen::Pattern and |
496
|
|
|
|
|
|
|
Mojo::DOM. Tests might fail if the patterns used for them are updated and |
497
|
|
|
|
|
|
|
change the test result. |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
This module uses Log::Log4perl for logging when it's resurrected. |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=over 4 |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=item * It warns when a language encountered in the HTML is not supported by |
506
|
|
|
|
|
|
|
TeX::Hyphen::Pattern |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=back |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head1 BUGS AND LIMITATIONS |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=over 4 |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=item * Perfect hyphenation can be more complicated than just inserting a |
515
|
|
|
|
|
|
|
hyphen somewhere in a word, and sometimes requires semantics to get it right. |
516
|
|
|
|
|
|
|
For example C<cafeetje> should be hyphenated as C<cafe-tje> and not |
517
|
|
|
|
|
|
|
C<cafee-tje> and C<buurtje> can be hyphenated as C<buur-tje> or C<buurt-je>, |
518
|
|
|
|
|
|
|
depending on it's meaning. While HTML could provide a bit more context - |
519
|
|
|
|
|
|
|
mainly the language being used - than plain text to handle these issues, the |
520
|
|
|
|
|
|
|
initial purpose of this module is to make it possible for HTML rendering |
521
|
|
|
|
|
|
|
engines that support soft hyphens to be able to break long words over multiple |
522
|
|
|
|
|
|
|
lines to avoid unwanted overflow. |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=item * The hyphenation doesn't get better than TeX::Hyphenate and it's |
525
|
|
|
|
|
|
|
hyphenation patterns provide. |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=item * The round trip from HTML source via Mojo::DOM to HTML source might |
528
|
|
|
|
|
|
|
introduce changes to the source, for example accented characters might be |
529
|
|
|
|
|
|
|
transformed to HTML encoded entity equivalents or Boolean attributes are |
530
|
|
|
|
|
|
|
converted to a different notation. |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=back |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
Please report any bugs or feature requests at |
535
|
|
|
|
|
|
|
L<Bitbucket|https://bitbucket.org/rolandvanipenburg/html-hyphenate/issues>. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=head1 AUTHOR |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Roland van Ipenburg, E<lt>roland@rolandvanipenburg.comE<gt> |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
Copyright (C) 2009-2021, Roland van Ipenburg |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
546
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.14.0 or, |
547
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head1 DISCLAIMER OF WARRANTY |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY |
552
|
|
|
|
|
|
|
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN |
553
|
|
|
|
|
|
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES |
554
|
|
|
|
|
|
|
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER |
555
|
|
|
|
|
|
|
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
556
|
|
|
|
|
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE |
557
|
|
|
|
|
|
|
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH |
558
|
|
|
|
|
|
|
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL |
559
|
|
|
|
|
|
|
NECESSARY SERVICING, REPAIR, OR CORRECTION. |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING |
562
|
|
|
|
|
|
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR |
563
|
|
|
|
|
|
|
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE |
564
|
|
|
|
|
|
|
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, |
565
|
|
|
|
|
|
|
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE |
566
|
|
|
|
|
|
|
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING |
567
|
|
|
|
|
|
|
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A |
568
|
|
|
|
|
|
|
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF |
569
|
|
|
|
|
|
|
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF |
570
|
|
|
|
|
|
|
SUCH DAMAGES. |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=cut |