| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# -*- cperl; cperl-indent-level: 4 -*- |
|
2
|
|
|
|
|
|
|
# Copyright (C) 2009-2021, Roland van Ipenburg |
|
3
|
|
|
|
|
|
|
package HTML::Hyphenate v1.1.9; |
|
4
|
11
|
|
|
11
|
|
898039
|
use Moose; |
|
|
11
|
|
|
|
|
4437866
|
|
|
|
11
|
|
|
|
|
70
|
|
|
5
|
11
|
|
|
11
|
|
69435
|
use utf8; |
|
|
11
|
|
|
|
|
23
|
|
|
|
11
|
|
|
|
|
75
|
|
|
6
|
11
|
|
|
11
|
|
488
|
use 5.016000; |
|
|
11
|
|
|
|
|
34
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
11
|
|
|
11
|
|
8099
|
use charnames (); |
|
|
11
|
|
|
|
|
293942
|
|
|
|
11
|
|
|
|
|
280
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#use Log::Log4perl qw(:resurrect :easy get_logger); |
|
11
|
11
|
|
|
11
|
|
4664
|
use Set::Scalar; |
|
|
11
|
|
|
|
|
94408
|
|
|
|
11
|
|
|
|
|
442
|
|
|
12
|
11
|
|
|
11
|
|
6088
|
use TeX::Hyphen; |
|
|
11
|
|
|
|
|
15427
|
|
|
|
11
|
|
|
|
|
349
|
|
|
13
|
11
|
|
|
11
|
|
4913
|
use TeX::Hyphen::Pattern 0.100; |
|
|
11
|
|
|
|
|
1160949
|
|
|
|
11
|
|
|
|
|
437
|
|
|
14
|
11
|
|
|
11
|
|
5575
|
use HTML::Hyphenate::DOM; |
|
|
11
|
|
|
|
|
45
|
|
|
|
11
|
|
|
|
|
359
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
11
|
|
|
11
|
|
121
|
use Readonly; |
|
|
11
|
|
|
|
|
29
|
|
|
|
11
|
|
|
|
|
21997
|
|
|
17
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUnexportedSubs) |
|
18
|
|
|
|
|
|
|
Readonly::Scalar my $EMPTY => q{}; |
|
19
|
|
|
|
|
|
|
Readonly::Scalar my $DOT => q{.}; |
|
20
|
|
|
|
|
|
|
Readonly::Scalar my $SOFT_HYPHEN => charnames::string_vianame(q{SOFT HYPHEN}); |
|
21
|
|
|
|
|
|
|
Readonly::Scalar my $CLASS_JOINER => q{, .}; # for CSS classnames |
|
22
|
|
|
|
|
|
|
Readonly::Scalar my $ONE_LEVEL_UP => -1; |
|
23
|
|
|
|
|
|
|
Readonly::Scalar my $DOCTYPE => q{<!DOCTYPE html>}; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
Readonly::Hash my %DEFAULT => ( |
|
26
|
|
|
|
|
|
|
'MIN_LENGTH' => 10, |
|
27
|
|
|
|
|
|
|
'MIN_PRE' => 2, |
|
28
|
|
|
|
|
|
|
'MIN_POST' => 2, |
|
29
|
|
|
|
|
|
|
'LANG' => q{en_us}, |
|
30
|
|
|
|
|
|
|
'INCLUDED' => 1, |
|
31
|
|
|
|
|
|
|
); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# HTML %Text attributes <http://www.w3.org/TR/REC-html40/index/attributes.html> |
|
34
|
|
|
|
|
|
|
# HTML5 text attributes <https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes> |
|
35
|
|
|
|
|
|
|
my $text_attr = |
|
36
|
|
|
|
|
|
|
Set::Scalar->new(qw/abbr alt label list placeholder standby summary title/); |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUnexportedSubs) |
|
39
|
|
|
|
|
|
|
Readonly::Hash my %LOG => ( |
|
40
|
|
|
|
|
|
|
'TRAVERSE' => q{Traversing HTML element '%s'}, |
|
41
|
|
|
|
|
|
|
'LANGUAGE_SET' => q{Language changed to '%s'}, |
|
42
|
|
|
|
|
|
|
'PATTERN_FILE' => q{Using pattern file '%s'}, |
|
43
|
|
|
|
|
|
|
'TEXT_NODE' => q{Text node value '%s'}, |
|
44
|
|
|
|
|
|
|
'HYPHEN_TEXT' => q{Hyphenating text '%s'}, |
|
45
|
|
|
|
|
|
|
'HYPHEN_WORD' => q{Hyphenating word '%s' to '%s'}, |
|
46
|
|
|
|
|
|
|
'LOOKING_UP' => q{Looking up for %d class(es)}, |
|
47
|
|
|
|
|
|
|
'HTML_METHOD' => q{Using HTML passed to method '%s'}, |
|
48
|
|
|
|
|
|
|
'HTML_PROPERTY' => q{Using HTML property '%s'}, |
|
49
|
|
|
|
|
|
|
'NOT_HYPHEN' => q{No pattern found for '%s'}, |
|
50
|
|
|
|
|
|
|
'REGISTER' => q{Registering TeX::Hyphen object for label '%s'}, |
|
51
|
|
|
|
|
|
|
'NO_CLASSES' => q{No classes defined, so not check for them}, |
|
52
|
|
|
|
|
|
|
); |
|
53
|
|
|
|
|
|
|
## use critic |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## no critic qw(ProhibitCommentedOutCode) |
|
56
|
|
|
|
|
|
|
###l4p Log::Log4perl->easy_init( { 'level' => $DEBUG, 'utf8' => 1 } ); |
|
57
|
|
|
|
|
|
|
###l4p my $log = get_logger(); |
|
58
|
|
|
|
|
|
|
## use critic |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
## no critic qw(ProhibitHashBarewords ProhibitCallsToUnexportedSubs ProhibitCallsToUndeclaredSubs) |
|
61
|
|
|
|
|
|
|
has html => ( is => 'rw', isa => 'Str' ); |
|
62
|
|
|
|
|
|
|
after 'html' => sub { |
|
63
|
|
|
|
|
|
|
my ( $self, $html ) = @_; |
|
64
|
|
|
|
|
|
|
if ( defined $html ) { |
|
65
|
|
|
|
|
|
|
## no critic qw(ProhibitUnusedCapture) |
|
66
|
|
|
|
|
|
|
if ( $self->html =~ m{^(?<doctype>\s*\Q$DOCTYPE\E)(?<html>.*)}gismx ) { |
|
67
|
|
|
|
|
|
|
## use critic |
|
68
|
|
|
|
|
|
|
$self->html( ${+}{html} ); |
|
69
|
|
|
|
|
|
|
$self->_doctype( ${+}{doctype} ); |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
else { |
|
72
|
|
|
|
|
|
|
$self->_doctype($EMPTY); |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
}; |
|
76
|
|
|
|
|
|
|
has style => ( is => 'rw', isa => 'Str' ); |
|
77
|
|
|
|
|
|
|
has min_length => |
|
78
|
|
|
|
|
|
|
( is => 'rw', isa => 'Int', default => $DEFAULT{'MIN_LENGTH'} ); |
|
79
|
|
|
|
|
|
|
has min_pre => ( is => 'rw', isa => 'Int', default => $DEFAULT{'MIN_PRE'} ); |
|
80
|
|
|
|
|
|
|
has min_post => ( is => 'rw', isa => 'Int', default => $DEFAULT{'MIN_POST'} ); |
|
81
|
|
|
|
|
|
|
has default_lang => ( is => 'rw', isa => 'Str', default => $DEFAULT{'LANG'} ); |
|
82
|
|
|
|
|
|
|
has default_included => |
|
83
|
|
|
|
|
|
|
( is => 'rw', isa => 'Int', default => $DEFAULT{'INCLUDED'} ); |
|
84
|
|
|
|
|
|
|
has classes_included => |
|
85
|
|
|
|
|
|
|
( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); |
|
86
|
|
|
|
|
|
|
has classes_excluded => |
|
87
|
|
|
|
|
|
|
( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); |
|
88
|
|
|
|
|
|
|
after 'classes_included' => sub { |
|
89
|
|
|
|
|
|
|
my ( $self, $ar ) = @_; |
|
90
|
|
|
|
|
|
|
if ( defined $ar ) { |
|
91
|
|
|
|
|
|
|
$self->_classes( |
|
92
|
|
|
|
|
|
|
( scalar $self->classes_excluded + scalar $self->classes_included ) |
|
93
|
|
|
|
|
|
|
> 0 ); |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
}; |
|
97
|
|
|
|
|
|
|
after 'classes_excluded' => sub { |
|
98
|
|
|
|
|
|
|
my ( $self, $ar ) = @_; |
|
99
|
|
|
|
|
|
|
if ( defined $ar ) { |
|
100
|
|
|
|
|
|
|
$self->_classes( |
|
101
|
|
|
|
|
|
|
( scalar $self->classes_excluded + scalar $self->classes_included ) |
|
102
|
|
|
|
|
|
|
> 0 ); |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
}; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
has _hyphenators => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); |
|
107
|
|
|
|
|
|
|
has _lang => ( is => 'rw', isa => 'Str' ); |
|
108
|
|
|
|
|
|
|
has _doctype => ( is => 'rw', isa => 'Str' ); |
|
109
|
|
|
|
|
|
|
has _dom => ( is => 'rw', isa => 'HTML::Hyphenate::DOM' ); |
|
110
|
|
|
|
|
|
|
has _scope_is_root => ( is => 'rw', isa => 'Bool', default => sub { 0 } ); |
|
111
|
|
|
|
|
|
|
has _classes => ( is => 'rw', isa => 'Bool', default => sub { 0 } ); |
|
112
|
|
|
|
|
|
|
## use critic |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUnexportedSubs) |
|
115
|
|
|
|
|
|
|
Readonly::Scalar my $LANG => q{lang}; |
|
116
|
|
|
|
|
|
|
Readonly::Scalar my $HTML => q{html}; |
|
117
|
|
|
|
|
|
|
Readonly::Scalar my $TEXT => q{text}; |
|
118
|
|
|
|
|
|
|
Readonly::Scalar my $TAG => q{tag}; |
|
119
|
|
|
|
|
|
|
Readonly::Scalar my $RAW => q{raw}; |
|
120
|
|
|
|
|
|
|
Readonly::Scalar my $PRE => q{pre}; |
|
121
|
|
|
|
|
|
|
Readonly::Scalar my $CLASS => q{class}; |
|
122
|
|
|
|
|
|
|
## no critic qw(RequireDotMatchAnything RequireExtendedFormatting RequireLineBoundaryMatching) |
|
123
|
|
|
|
|
|
|
Readonly::Scalar my $NONSPACE => qr{\S+}; |
|
124
|
|
|
|
|
|
|
## use critic |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub hyphenated { |
|
127
|
133
|
|
|
133
|
1
|
114240
|
my ( $self, $html ) = @_; |
|
128
|
133
|
100
|
|
|
|
404
|
if ( defined $html ) { |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HTML_METHOD'}, $html ); |
|
131
|
132
|
|
|
|
|
664
|
$self->html($html); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
else { |
|
134
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HTML_PROPERTY'}, $self->html ); |
|
135
|
|
|
|
|
|
|
} |
|
136
|
133
|
|
|
|
|
764
|
$self->_reset_dom; |
|
137
|
133
|
|
|
|
|
3027
|
$self->_dom->parse( $self->html ); |
|
138
|
133
|
|
|
|
|
35540
|
$self->_traverse_dom( $self->_dom->root ); |
|
139
|
133
|
|
|
|
|
432
|
return $self->_clean_html(); |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub register_tex_hyphen { |
|
143
|
5
|
|
|
5
|
1
|
321792
|
my ( $self, $label, $tex ) = @_; |
|
144
|
5
|
100
|
100
|
|
|
84
|
if ( |
|
|
|
|
100
|
|
|
|
|
|
145
|
|
|
|
|
|
|
defined $label |
|
146
|
|
|
|
|
|
|
## no critic qw(ProhibitCallsToUndeclaredSubs) |
|
147
|
|
|
|
|
|
|
&& blessed $tex |
|
148
|
|
|
|
|
|
|
## use critic |
|
149
|
|
|
|
|
|
|
&& $tex->isa('TeX::Hyphen') |
|
150
|
|
|
|
|
|
|
) |
|
151
|
|
|
|
|
|
|
{ |
|
152
|
2
|
|
|
|
|
96
|
my $cache = $self->_hyphenators; |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'REGISTER'}, $label ); |
|
155
|
2
|
|
|
|
|
5
|
${$cache}{$label} = $tex; |
|
|
2
|
|
|
|
|
7
|
|
|
156
|
2
|
|
|
|
|
46
|
$self->_hyphenators($cache); |
|
157
|
|
|
|
|
|
|
} |
|
158
|
5
|
|
|
|
|
21
|
return; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub _traverse_dom { |
|
162
|
600
|
|
|
600
|
|
4281
|
my ( $self, $node ) = @_; |
|
163
|
600
|
100
|
|
|
|
1341
|
if ( $self->_hyphenable($node) ) { |
|
164
|
543
|
|
|
|
|
1558
|
my $type = $node->type; |
|
165
|
543
|
100
|
100
|
|
|
5896
|
if ( $TAG eq $type ) { |
|
|
|
100
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'TRAVERSE'}, $node->tag ); |
|
168
|
189
|
|
|
|
|
600
|
$self->_configure_lang($node); |
|
169
|
189
|
|
|
|
|
360
|
while ( my ( $k, $v ) = each %{ $node->attr } ) { |
|
|
329
|
|
|
|
|
3670
|
|
|
170
|
140
|
100
|
100
|
|
|
4333
|
if ( $text_attr->has($k) |
|
171
|
|
|
|
|
|
|
&& length $v >= $self->min_length ) |
|
172
|
|
|
|
|
|
|
{ |
|
173
|
10
|
|
|
|
|
30
|
$node->attr( $k, $self->_hyphen($v) ); |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
elsif ( $TEXT eq $type || $RAW eq $type ) { |
|
178
|
229
|
|
|
|
|
719
|
my $string = $node->to_string; |
|
179
|
|
|
|
|
|
|
###l4p $log->trace( sprintf $LOG{'TEXT_NODE'}, $string ); |
|
180
|
229
|
100
|
100
|
|
|
11775
|
if ( |
|
181
|
|
|
|
|
|
|
length $string >= $self->min_length |
|
182
|
|
|
|
|
|
|
## no critic qw(RequireDotMatchAnything RequireLineBoundaryMatching) |
|
183
|
|
|
|
|
|
|
&& $string =~ m{$NONSPACE}x |
|
184
|
|
|
|
|
|
|
) |
|
185
|
|
|
|
|
|
|
## use critic |
|
186
|
|
|
|
|
|
|
{ |
|
187
|
148
|
|
|
|
|
582
|
$self->_configure_lang($node); |
|
188
|
148
|
|
|
|
|
494
|
my $hyphened = $self->_hyphen($string); |
|
189
|
148
|
|
|
|
|
851
|
$node->replace($hyphened); |
|
190
|
|
|
|
|
|
|
} |
|
191
|
229
|
|
|
|
|
27467
|
return; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
} |
|
194
|
371
|
|
|
|
|
3542
|
for my $child ( $node->child_nodes->each ) { |
|
195
|
467
|
|
|
|
|
27893
|
$self->_traverse_dom($child); |
|
196
|
|
|
|
|
|
|
} |
|
197
|
371
|
|
|
|
|
2085
|
return; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub _clean_html { |
|
201
|
133
|
|
|
133
|
|
299
|
my ($self) = @_; |
|
202
|
133
|
|
|
|
|
4023
|
my $html = $self->_dom->to_string(); |
|
203
|
133
|
|
|
|
|
12619
|
$self->_reset_dom; |
|
204
|
133
|
100
|
|
|
|
3606
|
if ( $EMPTY ne $self->_doctype ) { |
|
205
|
3
|
|
|
|
|
60
|
$html = $self->_doctype . $html; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
133
|
|
|
|
|
617
|
return $html; |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub _hyphen { |
|
211
|
158
|
|
|
158
|
|
335
|
my ( $self, $text ) = @_; |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HYPHEN_TEXT'}, $text ); |
|
214
|
158
|
|
|
|
|
329
|
$text =~ s/(\w{@{[$self->min_length]},})/$self->_hyphen_word($1)/xsmeg; |
|
|
180
|
|
|
|
|
631
|
|
|
|
158
|
|
|
|
|
3550
|
|
|
215
|
158
|
|
|
|
|
524
|
return $text; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub _hyphen_word { |
|
219
|
180
|
|
|
180
|
|
536
|
my ( $self, $word ) = @_; |
|
220
|
180
|
100
|
|
|
|
4449
|
if ( defined $self->_hyphenators->{ $self->_lang } ) { |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'HYPHEN_WORD'}, |
|
223
|
|
|
|
|
|
|
###l4p $word, $self->_hyphenators->{ $self->_lang }->visualize($word) ); |
|
224
|
178
|
|
|
|
|
292
|
my $number = 0; |
|
225
|
178
|
|
|
|
|
3965
|
foreach |
|
226
|
|
|
|
|
|
|
my $pos ( $self->_hyphenators->{ $self->_lang }->hyphenate($word) ) |
|
227
|
|
|
|
|
|
|
{ |
|
228
|
1189
|
|
|
|
|
373083
|
substr $word, $pos + $number, 0, $SOFT_HYPHEN; |
|
229
|
1189
|
|
|
|
|
1606
|
$number += length $SOFT_HYPHEN; |
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
else { |
|
233
|
|
|
|
|
|
|
###l4p $log->warn( sprintf $LOG{'NOT_HYPHEN'}, $self->_lang ); |
|
234
|
|
|
|
|
|
|
} |
|
235
|
180
|
|
|
|
|
10635
|
return $word; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
## no critic qw(RequireArgUnpacking) |
|
239
|
|
|
|
|
|
|
sub __lang_attr { |
|
240
|
674
|
100
|
|
674
|
|
10416
|
if ( $_[0] ) { |
|
241
|
573
|
|
100
|
|
|
2949
|
return $_[0]->attr($LANG) || $_[0]->attr(qq{xml:$LANG}); |
|
242
|
|
|
|
|
|
|
## use critic |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
else { |
|
245
|
101
|
|
|
|
|
170
|
return; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
sub _configure_lang { |
|
250
|
337
|
|
|
337
|
|
678
|
my ( $self, $element ) = @_; |
|
251
|
337
|
|
|
|
|
773
|
my $lang = __lang_attr($element); |
|
252
|
337
|
100
|
|
|
|
9024
|
if ( defined $lang ) { |
|
253
|
104
|
|
|
|
|
285
|
$self->_scope_is_root( $HTML eq $element->tag ); |
|
254
|
|
|
|
|
|
|
} |
|
255
|
337
|
100
|
|
|
|
741
|
if ( !defined $lang ) { |
|
256
|
233
|
|
|
|
|
506
|
$lang = __lang_attr( $element->parent ); |
|
257
|
233
|
100
|
|
|
|
5069
|
if ( defined $lang ) { |
|
258
|
113
|
|
|
|
|
327
|
$self->_scope_is_root( $HTML eq $element->parent->tag ); |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
} |
|
261
|
337
|
100
|
|
|
|
854
|
if ( !defined $lang ) { |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# If the scope was already set by the root element we don't have to |
|
264
|
|
|
|
|
|
|
# check if it has gone out of scope because we never leave the root |
|
265
|
|
|
|
|
|
|
# scope: |
|
266
|
120
|
100
|
|
|
|
3100
|
if ( !$self->_scope_is_root ) { |
|
267
|
104
|
|
|
|
|
366
|
my $recent = $element->ancestors(qq{[$LANG]})->first(); |
|
268
|
104
|
|
100
|
|
|
22977
|
$self->_scope_is_root( $recent && $HTML eq $recent->tag ); |
|
269
|
104
|
|
|
|
|
191
|
$lang = __lang_attr($recent); |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
else { |
|
272
|
16
|
|
|
|
|
368
|
$lang = $self->_lang; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
} |
|
275
|
337
|
100
|
|
|
|
863
|
if ( !defined $lang ) { |
|
276
|
101
|
|
|
|
|
2221
|
$lang = $self->default_lang; |
|
277
|
|
|
|
|
|
|
} |
|
278
|
337
|
100
|
100
|
|
|
7577
|
if ( !defined $self->_lang || $lang ne $self->_lang ) { |
|
279
|
103
|
|
|
|
|
2357
|
$self->_lang($lang); |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'LANGUAGE_SET'}, $lang ); |
|
282
|
103
|
100
|
|
|
|
2602
|
if ( !exists $self->_hyphenators->{$lang} ) { |
|
283
|
76
|
|
|
|
|
251
|
$self->_add_tex_hyphen_to_cache(); |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
} |
|
286
|
337
|
|
|
|
|
31940
|
return; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub _add_tex_hyphen_to_cache { |
|
290
|
76
|
|
|
76
|
|
148
|
my ($self) = @_; |
|
291
|
76
|
|
|
|
|
762
|
my $thp = TeX::Hyphen::Pattern->new(); |
|
292
|
76
|
|
|
|
|
117583
|
$thp->label( $self->_lang ); |
|
293
|
76
|
|
|
|
|
3170
|
my $cache = $self->_hyphenators; |
|
294
|
76
|
100
|
|
|
|
442
|
if ( my $file = $thp->filename ) { |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'PATTERN_FILE'}, $file ); |
|
297
|
74
|
|
|
|
|
22410709
|
${$cache}{ $self->_lang } = TeX::Hyphen->new( |
|
|
74
|
|
|
|
|
15868189
|
|
|
298
|
|
|
|
|
|
|
q{file} => $file, |
|
299
|
|
|
|
|
|
|
q{leftmin} => $self->min_pre, |
|
300
|
|
|
|
|
|
|
q{rightmin} => $self->min_post, |
|
301
|
|
|
|
|
|
|
); |
|
302
|
74
|
|
|
|
|
2187
|
$self->_hyphenators($cache); |
|
303
|
|
|
|
|
|
|
} |
|
304
|
76
|
|
|
|
|
505720
|
return; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _hyphenable_by_class { |
|
308
|
118
|
|
|
118
|
|
193
|
my ( $self, $node ) = @_; |
|
309
|
118
|
|
|
|
|
155
|
my $included_level = $ONE_LEVEL_UP; |
|
310
|
118
|
|
|
|
|
137
|
my $excluded_level = $ONE_LEVEL_UP; |
|
311
|
118
|
100
|
|
|
|
2419
|
$self->default_included && $excluded_level--; |
|
312
|
118
|
100
|
|
|
|
2362
|
$self->default_included || $included_level--; |
|
313
|
|
|
|
|
|
|
|
|
314
|
118
|
|
|
|
|
360
|
$included_level = |
|
315
|
|
|
|
|
|
|
$self->_get_nearest_ancestor_level_by_classname( $node, |
|
316
|
|
|
|
|
|
|
$self->classes_included, $included_level ); |
|
317
|
118
|
|
|
|
|
6076
|
$excluded_level = |
|
318
|
|
|
|
|
|
|
$self->_get_nearest_ancestor_level_by_classname( $node, |
|
319
|
|
|
|
|
|
|
$self->classes_excluded, $excluded_level ); |
|
320
|
118
|
100
|
|
|
|
6119
|
if ( $included_level == $excluded_level ) { |
|
321
|
4
|
|
|
|
|
108
|
return $self->default_included; |
|
322
|
|
|
|
|
|
|
} |
|
323
|
114
|
|
|
|
|
511
|
return !( $excluded_level > $included_level ); |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub __parent_is_pre { |
|
327
|
600
|
|
|
600
|
|
949
|
my ($node) = @_; |
|
328
|
600
|
|
|
|
|
1390
|
my $parent = $node->parent; |
|
329
|
600
|
|
100
|
|
|
24149
|
return defined $parent |
|
330
|
|
|
|
|
|
|
&& ( ( $parent->tag || $EMPTY ) eq $PRE ); |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub _hyphenable { |
|
334
|
600
|
|
|
600
|
|
985
|
my ( $self, $node ) = @_; |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
###l4p $self->_classes || $log->debug( $LOG{'NO_CLASSES'} ); |
|
337
|
600
|
|
100
|
|
|
1113
|
return !( __parent_is_pre($node) |
|
338
|
|
|
|
|
|
|
|| ( $self->_classes && !$self->_hyphenable_by_class($node) ) ); |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub _get_nearest_ancestor_level_by_classname { |
|
342
|
236
|
|
|
236
|
|
1369
|
my ( $self, $node, $ar_classnames, $level ) = @_; |
|
343
|
236
|
|
|
|
|
308
|
my $classnames = Set::Scalar->new( @{$ar_classnames} ); |
|
|
236
|
|
|
|
|
612
|
|
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
###l4p $log->debug( sprintf $LOG{'LOOKING_UP'}, $classnames->size ); |
|
346
|
236
|
100
|
100
|
|
|
13968
|
if ( !$classnames->is_empty |
|
347
|
|
|
|
|
|
|
&& ( $node->ancestors->size ) ) |
|
348
|
|
|
|
|
|
|
{ |
|
349
|
144
|
|
|
|
|
11448
|
my $selector = $DOT . join $CLASS_JOINER, $classnames->members; |
|
350
|
144
|
|
|
|
|
1028
|
my $nearest = $node->ancestors($selector)->first; |
|
351
|
144
|
100
|
|
|
|
40936
|
if ($nearest) { |
|
352
|
64
|
|
|
|
|
321
|
return $nearest->ancestors->size; |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
} |
|
355
|
172
|
|
|
|
|
3602
|
return $level; |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub _reset_dom { |
|
359
|
266
|
|
|
266
|
|
543
|
my ($self) = @_; |
|
360
|
266
|
|
|
|
|
901
|
my $dom = HTML::Hyphenate::DOM->new(); |
|
361
|
266
|
|
|
|
|
9357
|
$self->_dom($dom); |
|
362
|
266
|
|
|
|
|
475
|
return; |
|
363
|
|
|
|
|
|
|
} |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
1; |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
__END__ |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=encoding utf8 |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=for stopwords Ipenburg Readonly merchantability Mojolicious Bitbucket |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 NAME |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
HTML::Hyphenate - insert soft hyphens into HTML |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head1 VERSION |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
This document describes HTML::Hyphenate version C<v1.1.9>. |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
use HTML::Hyphenate; |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
$hyphenator = new HTML::Hyphenate(); |
|
386
|
|
|
|
|
|
|
$html_with_soft_hyphens = $hyphenator->hyphenated($html); |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
$hyphenator->html($html); |
|
389
|
|
|
|
|
|
|
$hyphenator->style($style); # czech or german |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
$hyphenator->min_length(10); |
|
392
|
|
|
|
|
|
|
$hyphenator->min_pre(2); |
|
393
|
|
|
|
|
|
|
$hyphenator->min_post(2); |
|
394
|
|
|
|
|
|
|
$hyphenator->default_lang('en-us'); |
|
395
|
|
|
|
|
|
|
$hyphenator->default_included(1); |
|
396
|
|
|
|
|
|
|
$hyphenator->classes_included(['shy']); |
|
397
|
|
|
|
|
|
|
$hyphenator->classes_excluded(['noshy']); |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Most HTML rendering engines used in web browsers don't figure out by |
|
402
|
|
|
|
|
|
|
themselves how to hyphenate words when needed, but we can tell them how they |
|
403
|
|
|
|
|
|
|
might do it by inserting soft hyphens into the words. |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=over 4 |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item HTML::Hyphenate-E<gt>new() |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Constructs a new HTML::Hyphenate object. |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item $hyphenator-E<gt>hyphenated() |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Returns the HTML including the soft hyphens. |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item $hyphenator->html(); |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Gets or sets the HTML to hyphenate. |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item $hyphenator->style(); |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Gets or sets the style to use for pattern usages in |
|
424
|
|
|
|
|
|
|
L<TeX::Hyphen|TeX::Hyphen>. Can be C<czech> or C<german>. |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item $hyphenator->min_length(); |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Gets or sets the minimum word length required for having soft hyphens |
|
429
|
|
|
|
|
|
|
inserted. Defaults to 10 characters. |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item $hyphenator->min_pre(2); |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Gets or sets the minimum amount of characters in a word preserved before the |
|
434
|
|
|
|
|
|
|
first soft hyphen. Defaults to 2 characters. |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=item $hyphenator->min_post(2); |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Gets or sets the minimum amount of characters in a word preserved after the |
|
439
|
|
|
|
|
|
|
last soft hyphen. Defaults to 2 characters. |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=item $hyphenator->default_lang('en-us'); |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
Gets or sets the default pattern to use when no language can be derived from |
|
444
|
|
|
|
|
|
|
the HTML. |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=item $hyphenator->default_included(); |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
Gets or sets if soft hyphens should be included in the whole tree by default. |
|
449
|
|
|
|
|
|
|
This can be used to insert soft hyphens only in parts of the HTML having |
|
450
|
|
|
|
|
|
|
specific class names. |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=item $hyphenator->classes_included(); |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Gets or sets a reference to an array of class names that will have soft |
|
455
|
|
|
|
|
|
|
hyphens inserted. |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=item $hyphenator->classes_excluded(); |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Gets or sets a reference to an array of class names that will not have soft |
|
460
|
|
|
|
|
|
|
hyphens inserted. |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=item $hyphenator->register_tex_hyphen(C<lang>, C<TeX::Hyphen>) |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
Registers a TeX::Hyphen object to handle the language defined by C<lang>. |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=back |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head1 CONFIGURATION AND ENVIRONMENT |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
The output is generated by L<Mojo::DOM|Mojo::DOM> so the environment variable |
|
471
|
|
|
|
|
|
|
C<MOJO_DOM_CSS_DEBUG> can be set to debug it's CSS selection process. |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=over 4 |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item * Perl 5.16 |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=item * L<Moose|Moose> |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=item * L<Mojolicious|Mojolicious> for L<Mojo::Dom|Mojo::Dom> |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
=item * L<Readonly|Readonly> |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=item * L<Set::Scalar|Set::Scalar> |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
=item * L<TeX::Hyphen|TeX::Hyphen> |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=item * L<TeX::Hyphen::Pattern|TeX::Hyphen::Pattern> |
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=back |
|
492
|
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 INCOMPATIBILITIES |
|
494
|
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
This module has the same limits as TeX::Hyphen, TeX::Hyphen::Pattern and |
|
496
|
|
|
|
|
|
|
Mojo::DOM. Tests might fail if the patterns used for them are updated and |
|
497
|
|
|
|
|
|
|
change the test result. |
|
498
|
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=head1 DIAGNOSTICS |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
This module uses Log::Log4perl for logging when it's resurrected. |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=over 4 |
|
504
|
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=item * It warns when a language encountered in the HTML is not supported by |
|
506
|
|
|
|
|
|
|
TeX::Hyphen::Pattern |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=back |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head1 BUGS AND LIMITATIONS |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=over 4 |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=item * Perfect hyphenation can be more complicated than just inserting a |
|
515
|
|
|
|
|
|
|
hyphen somewhere in a word, and sometimes requires semantics to get it right. |
|
516
|
|
|
|
|
|
|
For example C<cafeetje> should be hyphenated as C<cafe-tje> and not |
|
517
|
|
|
|
|
|
|
C<cafee-tje> and C<buurtje> can be hyphenated as C<buur-tje> or C<buurt-je>, |
|
518
|
|
|
|
|
|
|
depending on it's meaning. While HTML could provide a bit more context - |
|
519
|
|
|
|
|
|
|
mainly the language being used - than plain text to handle these issues, the |
|
520
|
|
|
|
|
|
|
initial purpose of this module is to make it possible for HTML rendering |
|
521
|
|
|
|
|
|
|
engines that support soft hyphens to be able to break long words over multiple |
|
522
|
|
|
|
|
|
|
lines to avoid unwanted overflow. |
|
523
|
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=item * The hyphenation doesn't get better than TeX::Hyphenate and it's |
|
525
|
|
|
|
|
|
|
hyphenation patterns provide. |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=item * The round trip from HTML source via Mojo::DOM to HTML source might |
|
528
|
|
|
|
|
|
|
introduce changes to the source, for example accented characters might be |
|
529
|
|
|
|
|
|
|
transformed to HTML encoded entity equivalents or Boolean attributes are |
|
530
|
|
|
|
|
|
|
converted to a different notation. |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=back |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
Please report any bugs or feature requests at |
|
535
|
|
|
|
|
|
|
L<Bitbucket|https://bitbucket.org/rolandvanipenburg/html-hyphenate/issues>. |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=head1 AUTHOR |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Roland van Ipenburg, E<lt>roland@rolandvanipenburg.comE<gt> |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
|
542
|
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
Copyright (C) 2009-2021, Roland van Ipenburg |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
546
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.14.0 or, |
|
547
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head1 DISCLAIMER OF WARRANTY |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY |
|
552
|
|
|
|
|
|
|
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN |
|
553
|
|
|
|
|
|
|
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES |
|
554
|
|
|
|
|
|
|
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER |
|
555
|
|
|
|
|
|
|
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
556
|
|
|
|
|
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE |
|
557
|
|
|
|
|
|
|
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH |
|
558
|
|
|
|
|
|
|
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL |
|
559
|
|
|
|
|
|
|
NECESSARY SERVICING, REPAIR, OR CORRECTION. |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING |
|
562
|
|
|
|
|
|
|
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR |
|
563
|
|
|
|
|
|
|
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE |
|
564
|
|
|
|
|
|
|
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL, |
|
565
|
|
|
|
|
|
|
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE |
|
566
|
|
|
|
|
|
|
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING |
|
567
|
|
|
|
|
|
|
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A |
|
568
|
|
|
|
|
|
|
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF |
|
569
|
|
|
|
|
|
|
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF |
|
570
|
|
|
|
|
|
|
SUCH DAMAGES. |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=cut |