line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package PDF::Builder::Content::Hyphenate_basic; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
11573
|
use base 'PDF::Builder::Content::Text'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
105
|
|
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
6
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1454
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our $VERSION = '3.025'; # VERSION |
9
|
|
|
|
|
|
|
our $LAST_UPDATE = '3.025'; # manually update whenever code is changed |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
PDF::Builder::Content::Hyphenate_basic - Simple hyphenation capability |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
These are internal routines that are somewhat experimental, and may (or may |
18
|
|
|
|
|
|
|
not) be extended in the future. They are called from various Content routines |
19
|
|
|
|
|
|
|
that take long strings of text and split them into fixed-length lines. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Words are split to fill the line most completely, without regard to widows and |
22
|
|
|
|
|
|
|
orphans, long runs of hyphens at the right edge, "rivers" of space flowing |
23
|
|
|
|
|
|
|
through a paragraph, and other problems. Also, only simple splitting is done |
24
|
|
|
|
|
|
|
(not actually I), on a simple, language-independent basis. No dictionary |
25
|
|
|
|
|
|
|
or rules-based splitting is currently done. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
This functionality may well be replaced by "hooks" to call language-specific |
28
|
|
|
|
|
|
|
word-splitting rules, as well as worrying about the appearance of the results |
29
|
|
|
|
|
|
|
(such as Knuth-Plass). |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=cut |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Main entry. Returns array of left portion of word (and -) to stick on end of |
34
|
|
|
|
|
|
|
# sentence (may be empty) and remaining (right) portion of word to go on next |
35
|
|
|
|
|
|
|
# line (usually not empty). |
36
|
|
|
|
|
|
|
sub splitWord { |
37
|
0
|
|
|
0
|
0
|
|
my ($self, $word, $width, %opts) = @_; |
38
|
|
|
|
|
|
|
# copy dashed option names to preferred undashed names |
39
|
0
|
0
|
0
|
|
|
|
if (defined $opts{'-spHH'} && !defined $opts{'spHH'}) { $opts{'spHH'} = delete($opts{'-spHH'}); } |
|
0
|
|
|
|
|
|
|
40
|
0
|
0
|
0
|
|
|
|
if (defined $opts{'-spOP'} && !defined $opts{'spOP'}) { $opts{'spOP'} = delete($opts{'-spOP'}); } |
|
0
|
|
|
|
|
|
|
41
|
0
|
0
|
0
|
|
|
|
if (defined $opts{'-spDR'} && !defined $opts{'spDR'}) { $opts{'spDR'} = delete($opts{'-spDR'}); } |
|
0
|
|
|
|
|
|
|
42
|
0
|
0
|
0
|
|
|
|
if (defined $opts{'-spLR'} && !defined $opts{'spLR'}) { $opts{'spLR'} = delete($opts{'-spLR'}); } |
|
0
|
|
|
|
|
|
|
43
|
0
|
0
|
0
|
|
|
|
if (defined $opts{'-spCC'} && !defined $opts{'spCC'}) { $opts{'spCC'} = delete($opts{'-spCC'}); } |
|
0
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
|
45
|
0
|
|
|
|
|
|
my ($leftWord, $rightWord, @splitLoc, @chars, $i, $j, $len); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# various settings, some of which may be language-specific |
48
|
0
|
|
|
|
|
|
my $minBegin = 2; # minimum 2 characters before split (English rules) |
49
|
0
|
0
|
|
|
|
|
if (defined $opts{'min_prefix'}) { $minBegin = $opts{'min_prefix'}; } |
|
0
|
|
|
|
|
|
|
50
|
0
|
|
|
|
|
|
my $minEnd = 3; # minimum 3 characters to next line (English rules) |
51
|
0
|
0
|
|
|
|
|
if (defined $opts{'min_suffix'}) { $minEnd = $opts{'min_suffix'}; } |
|
0
|
|
|
|
|
|
|
52
|
0
|
|
|
|
|
|
my $hyphen = '-'; |
53
|
|
|
|
|
|
|
#my $hyphen = "\xAD"; # add a hyphen at split, unless splitting at - |
54
|
|
|
|
|
|
|
# or other dash character |
55
|
|
|
|
|
|
|
# NOTE: PDF-1.7 14.8.2.2.3 suggests using a soft hyphen (\AD) when splitting |
56
|
|
|
|
|
|
|
# a word at the end of the line, so that when text is extracted for |
57
|
|
|
|
|
|
|
# a screen reader, etc., the closed-up word can have the "visible" |
58
|
|
|
|
|
|
|
# hyphen removed. PDF readers should render as -. |
59
|
0
|
|
|
|
|
|
my @suppressHyphen = ( # ASCII/Latin-1/UTF-8 ordinals to NOT add - after |
60
|
|
|
|
|
|
|
# - en-dash em-dash / |
61
|
|
|
|
|
|
|
45, 8211, 8212, 47, |
62
|
|
|
|
|
|
|
); |
63
|
0
|
0
|
|
|
|
|
my $splitHardH = defined($opts{'spHH'})? $opts{'spHH'}: 1; # 1=OK to split on hard (explicit) hyphen U+002D |
64
|
0
|
0
|
|
|
|
|
my $otherPunc = defined($opts{'spOP'})? $opts{'spOP'}: 1; # 1=OK to split after most punctuation |
65
|
0
|
0
|
|
|
|
|
my $digitRun = defined($opts{'spDR'})? $opts{'spDR'}: 1; # 1=OK to split after run of digit(s) |
66
|
0
|
0
|
|
|
|
|
my $letterRun = defined($opts{'spLR'})? $opts{'spLR'}: 1; # 1=OK to split after run of ASCII letter(s) |
67
|
0
|
0
|
|
|
|
|
my $camelCase = defined($opts{'spCC'})? $opts{'spCC'}: 1; # 1=OK to split camelCase on ASCII lc-to-UC transition |
68
|
0
|
0
|
|
|
|
|
my $splitReqBlnk = defined($opts{'spRB'})? $opts{'spRB'}: 0; # 1=OK to split on required blank (NBSP) -- desperation move |
69
|
0
|
0
|
|
|
|
|
my $splitAnywhere = defined($opts{'spFS'})? $opts{'spFS'}: 0; # 1=OK to split to fit available space -- super desperation move |
70
|
0
|
0
|
|
|
|
|
if ($splitAnywhere) { |
71
|
|
|
|
|
|
|
# if requesting to split within a certain length, suppress all other flags |
72
|
0
|
|
|
|
|
|
$splitHardH = $otherPunc = $digitRun = $letterRun = $camelCase = |
73
|
|
|
|
|
|
|
$splitReqBlnk = 0; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# note that we are ignoring U+2010 "hyphen" and U+2011 "non-splitting |
77
|
|
|
|
|
|
|
# hyphen". The first is probably rare enough to not be worth the bother, |
78
|
|
|
|
|
|
|
# and the second won't be split at anyway. |
79
|
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
|
$leftWord = ''; # default return values |
81
|
0
|
|
|
|
|
|
$rightWord = $word; |
82
|
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
@splitLoc = (); # no known OK splits yet |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# highest priority for splits: hard and soft hyphens |
86
|
|
|
|
|
|
|
# remove SHYs, remember any break points |
87
|
0
|
|
|
|
|
|
($word, @splitLoc) = _removeSHY($word); |
88
|
|
|
|
|
|
|
# remember any break points due to hard coded hyphens |
89
|
0
|
|
|
|
|
|
@chars = split //, $word; |
90
|
0
|
|
|
|
|
|
for ($i=0; $i
|
91
|
0
|
0
|
0
|
|
|
|
if ($chars[$i] eq '-' && $splitHardH) { push @splitLoc, $i; } |
|
0
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
# note that unlike SHY, - is not removed |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# If nothing in @splitLoc, proceed to find other splits. If @splitLoc |
96
|
|
|
|
|
|
|
# has at least one entry, could make it the top priority and split there, |
97
|
|
|
|
|
|
|
# and not look at other possible splits. Or, keep adding to @splitLoc |
98
|
|
|
|
|
|
|
# (equal priority for all possible splits). Mix and match is OK |
99
|
|
|
|
|
|
|
# (grouping criteria, as hard and soft hyphens were done together). |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
#if (!@splitLoc) { |
102
|
0
|
0
|
|
|
|
|
if ($otherPunc) { |
103
|
|
|
|
|
|
|
# look for other punctuation to split after. |
104
|
|
|
|
|
|
|
# don't split on ' or " or other quotes (<, <<, etc.) |
105
|
|
|
|
|
|
|
# !%&)]*+/,.:;<>?^_~ and curly right brace ASCII OK for now |
106
|
|
|
|
|
|
|
# en-dash, em-dash should ideally be split after, whether they are |
107
|
|
|
|
|
|
|
# free floating or embedded between words. |
108
|
0
|
|
|
|
|
|
my @ASCII_punct = ( '!', '.', '?', ',', '%', '&', ':', ';', |
109
|
|
|
|
|
|
|
'<', '>', ')', ']', chr(125), '_', '~', |
110
|
|
|
|
|
|
|
'^', '+', '*', '/', ); |
111
|
|
|
|
|
|
|
# en-dash em-dash |
112
|
0
|
|
|
|
|
|
my @UTF8_punct = ( 8211, 8212, ); |
113
|
|
|
|
|
|
|
# remember not to split if next char is - |
114
|
|
|
|
|
|
|
# (defer split to after hard hyphen - [if allowed]). |
115
|
0
|
|
|
|
|
|
for ($i=0; $i
|
116
|
0
|
|
|
|
|
|
foreach (@ASCII_punct) { |
117
|
0
|
0
|
0
|
|
|
|
if ($chars[$i] eq $_ && $chars[$i+1] ne '-') { |
118
|
0
|
|
|
|
|
|
push @splitLoc, $i; |
119
|
0
|
|
|
|
|
|
last; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
} |
122
|
0
|
|
|
|
|
|
foreach (@UTF8_punct) { |
123
|
0
|
0
|
0
|
|
|
|
if (ord($chars[$i]) == $_ && $chars[$i+1] ne '-') { |
124
|
0
|
|
|
|
|
|
push @splitLoc, $i; |
125
|
0
|
|
|
|
|
|
last; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
#} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# group digit runs and camelCase together at same priority |
133
|
|
|
|
|
|
|
#if (!@splitLoc) { |
134
|
0
|
0
|
|
|
|
|
if ($digitRun) { |
135
|
|
|
|
|
|
|
# look for a run of digits to split after. |
136
|
|
|
|
|
|
|
# that is, any digit NOT followed by another digit. |
137
|
|
|
|
|
|
|
# remember not to split if next char is - |
138
|
|
|
|
|
|
|
# (defer split to after hard hyphen - [if allowed]). |
139
|
0
|
|
|
|
|
|
for ($i=0; $i
|
140
|
0
|
0
|
0
|
|
|
|
if ($chars[$i] ge '0' && $chars[$i] le '9' && |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
141
|
|
|
|
|
|
|
!($chars[$i+1] ge '0' && $chars[$i+1] le '9' || |
142
|
|
|
|
|
|
|
$chars[$i+1] eq '-')) { |
143
|
0
|
|
|
|
|
|
push @splitLoc, $i; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
0
|
0
|
|
|
|
|
if ($letterRun) { |
149
|
|
|
|
|
|
|
# look for a run of letters (ASCII) to split after. |
150
|
|
|
|
|
|
|
# that is, any letter NOT followed by another letter. |
151
|
|
|
|
|
|
|
# remember not to split if next char is - |
152
|
|
|
|
|
|
|
# (defer split to after hard hyphen - [if allowed]). |
153
|
0
|
|
|
|
|
|
for ($i=0; $i
|
154
|
0
|
0
|
0
|
|
|
|
if (($chars[$i] ge 'a' && $chars[$i] le 'z' || |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
155
|
|
|
|
|
|
|
$chars[$i] ge 'A' && $chars[$i] le 'Z' ) && |
156
|
|
|
|
|
|
|
!($chars[$i+1] ge 'a' && $chars[$i+1] le 'z' || |
157
|
|
|
|
|
|
|
$chars[$i+1] ge 'A' && $chars[$i+1] le 'Z' || |
158
|
|
|
|
|
|
|
$chars[$i+1] eq '-') ) { |
159
|
0
|
|
|
|
|
|
push @splitLoc, $i; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
0
|
0
|
|
|
|
|
if ($camelCase) { |
165
|
|
|
|
|
|
|
# look for camelCase to split on lowercase to |
166
|
|
|
|
|
|
|
# uppercase transitions. just ASCII letters for now. |
167
|
|
|
|
|
|
|
# Note that this will split names like McIlroy -> Mc-Ilroy |
168
|
|
|
|
|
|
|
# and MacDonald -> Mac-Donald. |
169
|
0
|
|
|
|
|
|
for ($i=0; $i
|
170
|
0
|
0
|
0
|
|
|
|
if ($chars[$i] ge 'a' && $chars[$i] le 'z' && |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
171
|
|
|
|
|
|
|
$chars[$i+1] ge 'A' && $chars[$i+1] le 'Z') { |
172
|
0
|
|
|
|
|
|
push @splitLoc, $i; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
#} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#if (!@splitLoc) { |
179
|
|
|
|
|
|
|
# look for real English word split locations |
180
|
|
|
|
|
|
|
# TBD |
181
|
|
|
|
|
|
|
#} |
182
|
|
|
|
|
|
|
|
183
|
0
|
0
|
0
|
|
|
|
if (!@splitLoc && $splitReqBlnk) { |
184
|
|
|
|
|
|
|
# remember any break points due to desperation split at NBSP |
185
|
0
|
|
|
|
|
|
@chars = split //, $word; |
186
|
0
|
|
|
|
|
|
for ($i=0; $i
|
187
|
0
|
0
|
|
|
|
|
if ($chars[$i] eq "\xA0") { push @splitLoc, $i; } |
|
0
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
# note that NBSP converted to regular space (x20). we will need |
189
|
|
|
|
|
|
|
# to overwrite the split one with the hyphen |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
0
|
0
|
0
|
|
|
|
if (!@splitLoc && $splitAnywhere) { |
194
|
|
|
|
|
|
|
# remember any break point due to desperation split at available length |
195
|
0
|
|
|
|
|
|
@chars = split //, $word; |
196
|
0
|
|
|
|
|
|
my $trial = ''; |
197
|
0
|
|
|
|
|
|
for ($i=0; $i
|
198
|
0
|
|
|
|
|
|
$trial .= $chars[$i]; |
199
|
0
|
0
|
|
|
|
|
if ($self->advancewidth("$trial$hyphen") > $width) { last; } |
|
0
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
# nothing fit? force one letter, even though it overflows |
202
|
0
|
0
|
|
|
|
|
if ($i == 0) { $i = 1; } |
|
0
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
push @splitLoc, $i-1; |
204
|
|
|
|
|
|
|
# disable minimum prefix and suffix for this |
205
|
0
|
|
|
|
|
|
$minBegin = $minEnd = 1; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# sort final @splitLoc, remove any split points violating "min" settings |
209
|
|
|
|
|
|
|
# set $leftWord and $rightWord if find successful split |
210
|
0
|
0
|
|
|
|
|
if (@splitLoc) { |
211
|
0
|
|
|
|
|
|
@splitLoc = sort { $a <=> $b } @splitLoc; |
|
0
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# unnecessary to have unique values |
213
|
0
|
|
|
|
|
|
$len = length($word); |
214
|
0
|
|
|
|
|
|
$j = -1; |
215
|
0
|
|
|
|
|
|
for ($i=0; $i
|
216
|
0
|
0
|
|
|
|
|
if ($splitLoc[$i] >= $minBegin-1) { last; } |
|
0
|
|
|
|
|
|
|
217
|
0
|
|
|
|
|
|
$j = $i; |
218
|
|
|
|
|
|
|
} |
219
|
0
|
0
|
|
|
|
|
if ($j >= 0) { splice(@splitLoc, 0, $j+1); } # remove j+1 els |
|
0
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
|
$j = -1; |
221
|
0
|
|
|
|
|
|
for ($i=$#splitLoc; $i>=0; $i--) { |
222
|
0
|
0
|
|
|
|
|
if ($splitLoc[$i] < $len-$minEnd) { last; } |
|
0
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
$j = $i; |
224
|
|
|
|
|
|
|
} |
225
|
0
|
0
|
|
|
|
|
if ($j >= 0) { splice(@splitLoc, $j); } # remove els >= j-th |
|
0
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# scan R to L through @splitLoc to try splitting there |
228
|
|
|
|
|
|
|
# TBD estimate starting position in @splitLoc by dividing $width by |
229
|
|
|
|
|
|
|
# 1em to get approximate split location; pick highest @splitLoc |
230
|
|
|
|
|
|
|
# element that does not exceed it, and move right (probably) or left |
231
|
|
|
|
|
|
|
# to get proper split point. |
232
|
0
|
|
|
|
|
|
while (@splitLoc) { |
233
|
0
|
|
|
|
|
|
$j = pop @splitLoc; # proposed split rightmost on list |
234
|
0
|
|
|
|
|
|
my $trial = substr($word, 0, $j+1); |
235
|
|
|
|
|
|
|
# this is the left fragment at the end of the line. make sure |
236
|
|
|
|
|
|
|
# there is room for the space before it, the hyphen (if added), |
237
|
|
|
|
|
|
|
# and any letter doubling (e.g., in German or Dutch) |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# does the left fragment already end in -, etc.? |
240
|
|
|
|
|
|
|
# if it does, don't add a $hyphen. |
241
|
0
|
|
|
|
|
|
my $h = $hyphen; |
242
|
0
|
|
|
|
|
|
$i = ord(substr($trial, -1, 1)); # last character in left fragment |
243
|
0
|
|
|
|
|
|
foreach (@suppressHyphen) { |
244
|
0
|
0
|
|
|
|
|
if ($i == $_) { $h = ''; last; } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
# left fragment ends in a space (used to be an NBSP)? |
247
|
|
|
|
|
|
|
# remove space, and no hyphen |
248
|
0
|
0
|
|
|
|
|
if ($i eq ' ') { |
249
|
0
|
|
|
|
|
|
chop($trial); |
250
|
0
|
|
|
|
|
|
$h = ''; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# $width should already count the trailing space in the existing |
254
|
|
|
|
|
|
|
# line, or full width if empty |
255
|
0
|
|
|
|
|
|
$len = $self->advancewidth("$trial$h", %opts); |
256
|
0
|
0
|
|
|
|
|
if ($len > $width) { next; } |
|
0
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# any letter doubling needed? |
259
|
0
|
|
|
|
|
|
$leftWord = $trial.$h; |
260
|
0
|
|
|
|
|
|
$rightWord = substr($word, $j+1); |
261
|
0
|
|
|
|
|
|
last; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
# if fell through because no fragment was short enough, $leftWord and |
264
|
|
|
|
|
|
|
# $rightWord were never reassigned, and effect is to leave the entire |
265
|
|
|
|
|
|
|
# word for the next line. |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
# if 0 elements in @splitLoc, $leftWord and $rightWord already defaulted |
268
|
|
|
|
|
|
|
|
269
|
0
|
|
|
|
|
|
return ($leftWord, $rightWord); |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# remove soft hyphens (SHYs) from a word. assume is always #173 (good for |
273
|
|
|
|
|
|
|
# Latin-1, CP-1252, UTF-8; might not work for some encodings) TBD might want |
274
|
|
|
|
|
|
|
# to pass in current encoding, or what SHY value is. |
275
|
|
|
|
|
|
|
# return list of break points where SHYs were removed |
276
|
|
|
|
|
|
|
sub _removeSHY { |
277
|
0
|
|
|
0
|
|
|
my ($word) = @_; |
278
|
|
|
|
|
|
|
|
279
|
0
|
|
|
|
|
|
my @SHYs = (); |
280
|
0
|
|
|
|
|
|
my $i = 0; |
281
|
|
|
|
|
|
|
|
282
|
0
|
|
|
|
|
|
my @chars = split //, $word; |
283
|
0
|
|
|
|
|
|
my $out = ''; |
284
|
0
|
|
|
|
|
|
foreach (@chars) { |
285
|
0
|
0
|
|
|
|
|
if (ord($_) == 173) { |
286
|
|
|
|
|
|
|
# it's a SHY, so remove from word, add to list |
287
|
0
|
|
|
|
|
|
push @SHYs, ($i - 1); |
288
|
0
|
|
|
|
|
|
next; |
289
|
|
|
|
|
|
|
} |
290
|
0
|
|
|
|
|
|
$out .= $_; |
291
|
0
|
|
|
|
|
|
$i++; |
292
|
|
|
|
|
|
|
} |
293
|
0
|
|
|
|
|
|
return ($out, @SHYs); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
1; |