| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::WideChar::Util; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY |
|
4
|
|
|
|
|
|
|
our $DATE = '2021-04-14'; # DATE |
|
5
|
|
|
|
|
|
|
our $DIST = 'Text-WideChar-Util'; # DIST |
|
6
|
|
|
|
|
|
|
our $VERSION = '0.172'; # VERSION |
|
7
|
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
168574
|
use 5.010001; |
|
|
2
|
|
|
|
|
31
|
|
|
9
|
2
|
|
|
2
|
|
1093
|
use locale; |
|
|
2
|
|
|
|
|
1326
|
|
|
|
2
|
|
|
|
|
11
|
|
|
10
|
2
|
|
|
2
|
|
85
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
44
|
|
|
11
|
2
|
|
|
2
|
|
11
|
use utf8; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
14
|
|
|
12
|
2
|
|
|
2
|
|
41
|
use warnings; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
63
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
2
|
|
|
2
|
|
998
|
use Unicode::GCString; |
|
|
2
|
|
|
|
|
61177
|
|
|
|
2
|
|
|
|
|
1337
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
require Exporter; |
|
17
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
18
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
|
19
|
|
|
|
|
|
|
mbpad |
|
20
|
|
|
|
|
|
|
pad |
|
21
|
|
|
|
|
|
|
mbswidth |
|
22
|
|
|
|
|
|
|
mbswidth_height |
|
23
|
|
|
|
|
|
|
length_height |
|
24
|
|
|
|
|
|
|
mbtrunc |
|
25
|
|
|
|
|
|
|
trunc |
|
26
|
|
|
|
|
|
|
mbwrap |
|
27
|
|
|
|
|
|
|
wrap |
|
28
|
|
|
|
|
|
|
); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub mbswidth { |
|
31
|
244
|
|
|
244
|
1
|
876
|
Unicode::GCString->new($_[0])->columns; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub mbswidth_height { |
|
35
|
0
|
|
|
0
|
1
|
0
|
my $text = shift; |
|
36
|
0
|
|
|
|
|
0
|
my $num_lines = 0; |
|
37
|
0
|
|
|
|
|
0
|
my $len = 0; |
|
38
|
0
|
|
|
|
|
0
|
for my $e (split /(\r?\n)/, $text) { |
|
39
|
0
|
0
|
|
|
|
0
|
if ($e =~ /\n/) { |
|
40
|
0
|
|
|
|
|
0
|
$num_lines++; |
|
41
|
0
|
|
|
|
|
0
|
next; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
0
|
0
|
|
|
|
0
|
$num_lines = 1 if $num_lines == 0; |
|
44
|
0
|
|
|
|
|
0
|
my $l = mbswidth($e); |
|
45
|
0
|
0
|
|
|
|
0
|
$len = $l if $len < $l; |
|
46
|
|
|
|
|
|
|
} |
|
47
|
0
|
|
|
|
|
0
|
[$len, $num_lines]; |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub length_height { |
|
51
|
0
|
|
|
0
|
1
|
0
|
my $text = shift; |
|
52
|
0
|
|
|
|
|
0
|
my $num_lines = 0; |
|
53
|
0
|
|
|
|
|
0
|
my $len = 0; |
|
54
|
0
|
|
|
|
|
0
|
for my $e (split /(\r?\n)/, $text) { |
|
55
|
0
|
0
|
|
|
|
0
|
if ($e =~ /\n/) { |
|
56
|
0
|
|
|
|
|
0
|
$num_lines++; |
|
57
|
0
|
|
|
|
|
0
|
next; |
|
58
|
|
|
|
|
|
|
} |
|
59
|
0
|
0
|
|
|
|
0
|
$num_lines = 1 if $num_lines == 0; |
|
60
|
0
|
|
|
|
|
0
|
my $l = length($e); |
|
61
|
0
|
0
|
|
|
|
0
|
$len = $l if $len < $l; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
0
|
|
|
|
|
0
|
[$len, $num_lines]; |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub _get_indent_width { |
|
67
|
54
|
|
|
54
|
|
146
|
my ($is_mb, $indent, $tab_width) = @_; |
|
68
|
54
|
|
|
|
|
94
|
my $w = 0; |
|
69
|
54
|
|
|
|
|
163
|
for (split //, $indent) { |
|
70
|
83
|
100
|
|
|
|
224
|
if ($_ eq "\t") { |
|
71
|
|
|
|
|
|
|
# go to the next tab |
|
72
|
9
|
|
|
|
|
36
|
$w = $tab_width * (int($w/$tab_width) + 1); |
|
73
|
|
|
|
|
|
|
} else { |
|
74
|
74
|
100
|
|
|
|
156
|
$w += $is_mb ? mbswidth($_) : 1; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
} |
|
77
|
54
|
|
|
|
|
187
|
$w; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# 3002 = IDEOGRAPHIC FULL STOP |
|
81
|
|
|
|
|
|
|
# ff0c = FULLWIDTH COMMA |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
our $re_cjk = qr/(?: |
|
84
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility} |
|
85
|
|
|
|
|
|
|
| \p{Block=CJK_Compatibility_Forms} |
|
86
|
|
|
|
|
|
|
| \p{Block=CJK_Compatibility_Ideographs} |
|
87
|
|
|
|
|
|
|
| \p{Block=CJK_Compatibility_Ideographs_Supplement} |
|
88
|
|
|
|
|
|
|
| \p{Block=CJK_Radicals_Supplement} |
|
89
|
|
|
|
|
|
|
| \p{Block=CJK_Strokes} |
|
90
|
|
|
|
|
|
|
| \p{Block=CJK_Symbols_And_Punctuation} |
|
91
|
|
|
|
|
|
|
| \p{Block=CJK_Unified_Ideographs} |
|
92
|
|
|
|
|
|
|
| \p{Block=CJK_Unified_Ideographs_Extension_A} |
|
93
|
|
|
|
|
|
|
| \p{Block=CJK_Unified_Ideographs_Extension_B} |
|
94
|
|
|
|
|
|
|
| \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} |
|
95
|
|
|
|
|
|
|
#| \p{Block=CJK_Unified_Ideographs_Extension_C} |
|
96
|
|
|
|
|
|
|
[\x{3002}\x{ff0c}] |
|
97
|
2
|
|
|
2
|
|
20
|
)/x; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
34
|
|
|
98
|
|
|
|
|
|
|
our $re_cjk_class = qr/[ |
|
99
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility} |
|
100
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Forms} |
|
101
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs} |
|
102
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs_Supplement} |
|
103
|
|
|
|
|
|
|
\p{Block=CJK_Radicals_Supplement} |
|
104
|
|
|
|
|
|
|
\p{Block=CJK_Strokes} |
|
105
|
|
|
|
|
|
|
\p{Block=CJK_Symbols_And_Punctuation} |
|
106
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs} |
|
107
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_A} |
|
108
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_B} |
|
109
|
|
|
|
|
|
|
\p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} |
|
110
|
|
|
|
|
|
|
\x{3002} |
|
111
|
|
|
|
|
|
|
\x{ff0c} |
|
112
|
|
|
|
|
|
|
]/x; |
|
113
|
|
|
|
|
|
|
our $re_cjk_negclass = qr/[^ |
|
114
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility} |
|
115
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Forms} |
|
116
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs} |
|
117
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs_Supplement} |
|
118
|
|
|
|
|
|
|
\p{Block=CJK_Radicals_Supplement} |
|
119
|
|
|
|
|
|
|
\p{Block=CJK_Strokes} |
|
120
|
|
|
|
|
|
|
\p{Block=CJK_Symbols_And_Punctuation} |
|
121
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs} |
|
122
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_A} |
|
123
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_B} |
|
124
|
|
|
|
|
|
|
\p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} |
|
125
|
|
|
|
|
|
|
\x{3002} |
|
126
|
|
|
|
|
|
|
\x{ff0c} |
|
127
|
|
|
|
|
|
|
]/x; |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub _wrap { |
|
130
|
36
|
|
|
36
|
|
111
|
my ($is_mb, $text, $width, $opts) = @_; |
|
131
|
36
|
|
50
|
|
|
94
|
$width //= 80; |
|
132
|
36
|
|
100
|
|
|
131
|
$opts //= {}; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# our algorithm: split into paragraphs, then process each paragraph. at the |
|
135
|
|
|
|
|
|
|
# start of paragraph, determine indents (either from %opts, or deduced from |
|
136
|
|
|
|
|
|
|
# text, like in Emacs) then push first-line indent. proceed to push words, |
|
137
|
|
|
|
|
|
|
# while adding subsequent-line indent at the start of each line. |
|
138
|
|
|
|
|
|
|
|
|
139
|
36
|
|
50
|
|
|
136
|
my $tw = $opts->{tab_width} // 8; |
|
140
|
36
|
50
|
|
|
|
107
|
die "Please specify a positive tab width" unless $tw > 0; |
|
141
|
36
|
|
|
|
|
61
|
my $optfli = $opts->{flindent}; |
|
142
|
36
|
100
|
|
|
|
94
|
my $optfliw = defined $optfli ? _get_indent_width($is_mb, $optfli, $tw) : undef; |
|
143
|
36
|
|
|
|
|
64
|
my $optsli = $opts->{slindent}; |
|
144
|
36
|
100
|
|
|
|
70
|
my $optsliw = defined $optsli ? _get_indent_width($is_mb, $optsli, $tw) : undef; |
|
145
|
36
|
|
100
|
|
|
114
|
my $optkts = $opts->{keep_trailing_space} // 0; |
|
146
|
36
|
|
|
|
|
54
|
my @res; |
|
147
|
|
|
|
|
|
|
|
|
148
|
36
|
|
|
|
|
208
|
my @para = split /(\n(?:[ \t]*\n)+)/, $text; |
|
149
|
|
|
|
|
|
|
#say "D:para=[",join(", ", @para),"]"; |
|
150
|
|
|
|
|
|
|
|
|
151
|
36
|
|
|
|
|
69
|
my ($maxww, $minww); |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
PARA: |
|
154
|
36
|
|
|
|
|
119
|
while (my ($ptext, $pbreak) = splice @para, 0, 2) { |
|
155
|
42
|
|
|
|
|
63
|
my $x = 0; |
|
156
|
42
|
|
|
|
|
62
|
my $y = 0; |
|
157
|
42
|
|
|
|
|
57
|
my $line_has_word = 0; |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# determine indents |
|
160
|
42
|
|
|
|
|
68
|
my ($fli, $sli, $fliw, $sliw); |
|
161
|
42
|
100
|
|
|
|
80
|
if (defined $optfli) { |
|
162
|
12
|
|
|
|
|
20
|
$fli = $optfli; |
|
163
|
12
|
|
|
|
|
20
|
$fliw = $optfliw; |
|
164
|
|
|
|
|
|
|
} else { |
|
165
|
|
|
|
|
|
|
# XXX emacs can also treat ' #' as indent, e.g. when wrapping |
|
166
|
|
|
|
|
|
|
# multi-line perl comment. |
|
167
|
30
|
|
|
|
|
236
|
($fli) = $ptext =~ /\A([ \t]*)\S/; |
|
168
|
30
|
50
|
|
|
|
93
|
if (defined $fli) { |
|
169
|
30
|
|
|
|
|
74
|
$fliw = _get_indent_width($is_mb, $fli, $tw); |
|
170
|
|
|
|
|
|
|
} else { |
|
171
|
0
|
|
|
|
|
0
|
$fli = ""; |
|
172
|
0
|
|
|
|
|
0
|
$fliw = 0; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
} |
|
175
|
42
|
100
|
|
|
|
85
|
if (defined $optsli) { |
|
176
|
5
|
|
|
|
|
10
|
$sli = $optsli; |
|
177
|
5
|
|
|
|
|
9
|
$sliw = $optsliw; |
|
178
|
|
|
|
|
|
|
} else { |
|
179
|
37
|
|
|
|
|
101
|
($sli) = $ptext =~ /\A[^\n]*\S[\n]([ \t+]*)\S/; |
|
180
|
37
|
100
|
|
|
|
77
|
if (defined $sli) { |
|
181
|
9
|
|
|
|
|
22
|
$sliw = _get_indent_width($is_mb, $sli, $tw); |
|
182
|
|
|
|
|
|
|
} else { |
|
183
|
28
|
|
|
|
|
48
|
$sli = ""; |
|
184
|
28
|
|
|
|
|
47
|
$sliw = 0; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
} |
|
187
|
42
|
50
|
|
|
|
102
|
die "Subsequent indent must be less than width" if $sliw >= $width; |
|
188
|
|
|
|
|
|
|
|
|
189
|
42
|
|
|
|
|
95
|
push @res, $fli; |
|
190
|
42
|
|
|
|
|
77
|
$x += $fliw; |
|
191
|
|
|
|
|
|
|
|
|
192
|
42
|
|
|
|
|
54
|
my @words0; # (WORD1, WORD1_IS_CJK?, WS_AFTER?, WORD2, WORD2_IS_CJK?, WS_AFTER?, ...) |
|
193
|
|
|
|
|
|
|
# we differentiate/split between CJK "word" (cluster of CJK letters, |
|
194
|
|
|
|
|
|
|
# really) and non-CJK word, e.g. "我很爱你my可爱的and beautiful, |
|
195
|
|
|
|
|
|
|
# beautiful wife" is split to ["我很爱你", "my", "可爱的", "and", |
|
196
|
|
|
|
|
|
|
# "beautiful,", "beautiful", "wife"]. we do this because CJK word can be |
|
197
|
|
|
|
|
|
|
# line-broken on a per-letter basis, as they don't separate words with |
|
198
|
|
|
|
|
|
|
# whitespaces. |
|
199
|
42
|
|
|
|
|
810
|
while ($ptext =~ /(?: ($re_cjk+)|(\S+) ) (\s*)/gox) { |
|
200
|
410
|
100
|
|
|
|
2613
|
my $ws_after = $3 ? 1:0; |
|
201
|
410
|
100
|
|
|
|
736
|
if ($1) { |
|
202
|
3
|
|
|
|
|
22
|
push @words0, $1, 1, $ws_after; |
|
203
|
|
|
|
|
|
|
} else { |
|
204
|
407
|
|
|
|
|
809
|
my $ptext2 = $2; |
|
205
|
407
|
|
|
|
|
1341
|
while ($ptext2 =~ /($re_cjk_class+)| |
|
206
|
|
|
|
|
|
|
($re_cjk_negclass+)/gox) { |
|
207
|
412
|
100
|
|
|
|
3656
|
if ($1) { |
|
208
|
3
|
|
|
|
|
39
|
push @words0, $1, 1, 0; |
|
209
|
|
|
|
|
|
|
} else { |
|
210
|
409
|
|
|
|
|
2150
|
push @words0, $2, 0, 0; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
} |
|
213
|
407
|
|
|
|
|
3900
|
$words0[-1] = $ws_after; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# process each word |
|
218
|
42
|
|
|
|
|
78
|
my $prev_ws_after; |
|
219
|
42
|
|
|
|
|
90
|
while (@words0) { |
|
220
|
415
|
|
|
|
|
829
|
my ($word0, $is_cjk, $ws_after) = splice @words0, 0, 3; |
|
221
|
415
|
|
|
|
|
632
|
my @words; |
|
222
|
|
|
|
|
|
|
my @wordsw; |
|
223
|
415
|
|
|
|
|
544
|
while (1) { |
|
224
|
431
|
100
|
|
|
|
739
|
my $wordw = $is_mb ? mbswidth($word0) : length($word0); |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# long cjk word is not truncated here because it will be |
|
227
|
|
|
|
|
|
|
# line-broken later when wrapping. |
|
228
|
431
|
100
|
100
|
|
|
1422
|
if ($wordw <= $width-$sliw || $is_cjk) { |
|
229
|
415
|
|
|
|
|
698
|
push @words , $word0; |
|
230
|
415
|
|
|
|
|
574
|
push @wordsw, $wordw; |
|
231
|
415
|
|
|
|
|
786
|
last; |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
# truncate long word |
|
234
|
16
|
100
|
|
|
|
34
|
if ($is_mb) { |
|
235
|
7
|
|
|
|
|
19
|
my $res = mbtrunc($word0, $width-$sliw, 1); |
|
236
|
7
|
|
|
|
|
15
|
push @words , $res->[0]; |
|
237
|
7
|
|
|
|
|
13
|
push @wordsw, $res->[1]; |
|
238
|
7
|
|
|
|
|
21
|
$word0 = substr($word0, length($res->[0])); |
|
239
|
|
|
|
|
|
|
#say "D:truncated long word (mb): $text -> $res->[0] & $res->[1], word0=$word0"; |
|
240
|
|
|
|
|
|
|
} else { |
|
241
|
9
|
|
|
|
|
19
|
my $w2 = substr($word0, 0, $width-$sliw); |
|
242
|
9
|
|
|
|
|
15
|
push @words , $w2; |
|
243
|
9
|
|
|
|
|
13
|
push @wordsw, $width-$sliw; |
|
244
|
9
|
|
|
|
|
20
|
$word0 = substr($word0, $width-$sliw); |
|
245
|
|
|
|
|
|
|
#say "D:truncated long word: $w2, ".($width-$sliw).", word0=$word0"; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
415
|
|
|
|
|
640
|
for my $word (@words) { |
|
250
|
431
|
|
|
|
|
611
|
my $wordw = shift @wordsw; |
|
251
|
|
|
|
|
|
|
#say "D:x=$x word=$word is_cjk=$is_cjk ws_after=$ws_after wordw=$wordw line_has_word=$line_has_word width=$width"; |
|
252
|
|
|
|
|
|
|
|
|
253
|
431
|
100
|
100
|
|
|
1356
|
$maxww = $wordw if !defined($maxww) || $maxww < $wordw; |
|
254
|
431
|
100
|
100
|
|
|
1156
|
$minww = $wordw if !defined($minww) || $minww > $wordw; |
|
255
|
|
|
|
|
|
|
|
|
256
|
431
|
100
|
|
|
|
801
|
my $x_after_word = $x + ($line_has_word ? 1:0) + $wordw; |
|
257
|
431
|
100
|
|
|
|
702
|
if ($x_after_word <= $width) { |
|
258
|
|
|
|
|
|
|
# the addition of word hasn't exceeded column width |
|
259
|
357
|
100
|
|
|
|
661
|
if ($line_has_word) { |
|
260
|
319
|
100
|
|
|
|
521
|
if ($prev_ws_after) { |
|
261
|
317
|
|
|
|
|
473
|
push @res, " "; |
|
262
|
317
|
|
|
|
|
420
|
$x++; |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
} |
|
265
|
357
|
|
|
|
|
541
|
push @res, $word; |
|
266
|
357
|
|
|
|
|
532
|
$x += $wordw; |
|
267
|
|
|
|
|
|
|
} else { |
|
268
|
74
|
|
|
|
|
102
|
while (1) { |
|
269
|
87
|
100
|
|
|
|
167
|
if ($is_cjk) { |
|
270
|
|
|
|
|
|
|
# CJK word can be broken |
|
271
|
18
|
|
|
|
|
19
|
my $res; |
|
272
|
18
|
100
|
|
|
|
40
|
if ($prev_ws_after) { |
|
273
|
2
|
|
|
|
|
6
|
$res = mbtrunc($word, $width - $x - 1, 1); |
|
274
|
2
|
|
|
|
|
7
|
push @res, " ", $res->[0]; |
|
275
|
|
|
|
|
|
|
} else { |
|
276
|
16
|
|
|
|
|
35
|
$res = mbtrunc($word, $width - $x, 1); |
|
277
|
16
|
|
|
|
|
34
|
push @res, $res->[0]; |
|
278
|
|
|
|
|
|
|
} |
|
279
|
18
|
|
|
|
|
56
|
my $word2 = substr($word, length($res->[0])); |
|
280
|
|
|
|
|
|
|
#say "D:truncated CJK word: $word -> $res->[0] & $res->[1], remaining=$word2"; |
|
281
|
18
|
|
|
|
|
59
|
$prev_ws_after = 0; |
|
282
|
18
|
|
|
|
|
33
|
$word = $word2; |
|
283
|
18
|
|
|
|
|
36
|
$wordw = mbswidth($word); |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# move the word to the next line |
|
287
|
87
|
100
|
100
|
|
|
532
|
push @res, " " if $prev_ws_after && $optkts; |
|
288
|
87
|
|
|
|
|
197
|
push @res, "\n", $sli; |
|
289
|
87
|
|
|
|
|
149
|
$y++; |
|
290
|
|
|
|
|
|
|
|
|
291
|
87
|
100
|
|
|
|
160
|
if ($sliw + $wordw <= $width) { |
|
292
|
74
|
|
|
|
|
123
|
push @res, $word; |
|
293
|
74
|
|
|
|
|
99
|
$x = $sliw + $wordw; |
|
294
|
74
|
|
|
|
|
134
|
last; |
|
295
|
|
|
|
|
|
|
} else { |
|
296
|
|
|
|
|
|
|
# still too long, truncate again |
|
297
|
13
|
|
|
|
|
25
|
$x = $sliw; |
|
298
|
|
|
|
|
|
|
} |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
} |
|
301
|
431
|
|
|
|
|
767
|
$line_has_word++; |
|
302
|
|
|
|
|
|
|
} |
|
303
|
415
|
|
|
|
|
912
|
$prev_ws_after = $ws_after; |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
|
|
306
|
42
|
100
|
|
|
|
89
|
if (defined $pbreak) { |
|
307
|
7
|
|
|
|
|
30
|
push @res, $pbreak; |
|
308
|
|
|
|
|
|
|
} else { |
|
309
|
35
|
100
|
|
|
|
287
|
push @res, "\n" if $ptext =~ /\n[ \t]*\z/; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
|
|
313
|
36
|
100
|
|
|
|
96
|
if ($opts->{return_stats}) { |
|
314
|
1
|
|
|
|
|
15
|
return [join("", @res), { |
|
315
|
|
|
|
|
|
|
max_word_width => $maxww, |
|
316
|
|
|
|
|
|
|
min_word_width => $minww, |
|
317
|
|
|
|
|
|
|
}]; |
|
318
|
|
|
|
|
|
|
} else { |
|
319
|
35
|
|
|
|
|
426
|
return join("", @res); |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub mbwrap { |
|
324
|
7
|
|
|
7
|
1
|
8158
|
_wrap(1, @_); |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub wrap { |
|
328
|
29
|
|
|
29
|
1
|
18285
|
_wrap(0, @_); |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
sub _pad { |
|
332
|
0
|
|
|
0
|
|
0
|
my ($is_mb, $text, $width, $which, $padchar, $is_trunc) = @_; |
|
333
|
0
|
0
|
|
|
|
0
|
if ($which) { |
|
334
|
0
|
|
|
|
|
0
|
$which = substr($which, 0, 1); |
|
335
|
|
|
|
|
|
|
} else { |
|
336
|
0
|
|
|
|
|
0
|
$which = "r"; |
|
337
|
|
|
|
|
|
|
} |
|
338
|
0
|
|
0
|
|
|
0
|
$padchar //= " "; |
|
339
|
|
|
|
|
|
|
|
|
340
|
0
|
0
|
|
|
|
0
|
my $w = $is_mb ? mbswidth($text) : length($text); |
|
341
|
0
|
0
|
0
|
|
|
0
|
if ($is_trunc && $w > $width) { |
|
342
|
0
|
|
|
|
|
0
|
my $res = mbtrunc($text, $width, 1); |
|
343
|
0
|
|
|
|
|
0
|
$text = $res->[0] . ($padchar x ($width-$res->[1])); |
|
344
|
|
|
|
|
|
|
} else { |
|
345
|
0
|
0
|
|
|
|
0
|
if ($which eq 'l') { |
|
|
|
0
|
|
|
|
|
|
|
346
|
0
|
|
|
|
|
0
|
$text = ($padchar x ($width-$w)) . $text; |
|
347
|
|
|
|
|
|
|
} elsif ($which eq 'c') { |
|
348
|
0
|
|
|
|
|
0
|
my $n = int(($width-$w)/2); |
|
349
|
0
|
|
|
|
|
0
|
$text = ($padchar x $n) . $text . ($padchar x ($width-$w-$n)); |
|
350
|
|
|
|
|
|
|
} else { |
|
351
|
0
|
|
|
|
|
0
|
$text .= ($padchar x ($width-$w)); |
|
352
|
|
|
|
|
|
|
} |
|
353
|
|
|
|
|
|
|
} |
|
354
|
0
|
|
|
|
|
0
|
$text; |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub mbpad { |
|
358
|
0
|
|
|
0
|
1
|
0
|
_pad(1, @_); |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub pad { |
|
362
|
0
|
|
|
0
|
1
|
0
|
_pad(0, @_); |
|
363
|
|
|
|
|
|
|
} |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub _trunc { |
|
366
|
25
|
|
|
25
|
|
56
|
my ($is_mb, $text, $width, $return_width) = @_; |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# return_width (undocumented): if set to 1, will return [truncated_text, |
|
369
|
|
|
|
|
|
|
# visual width, length(chars) up to truncation point] |
|
370
|
|
|
|
|
|
|
|
|
371
|
25
|
50
|
|
|
|
57
|
my $w = $is_mb ? mbswidth($text) : length($text); |
|
372
|
25
|
50
|
|
|
|
446
|
die "Invalid argument, width must not be negative" unless $width >= 0; |
|
373
|
25
|
50
|
|
|
|
53
|
if ($w <= $width) { |
|
374
|
0
|
0
|
|
|
|
0
|
return $return_width ? [$text, $w, length($text)] : $text; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
|
|
377
|
25
|
|
|
|
|
42
|
my $c = 0; |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# perform binary cutting |
|
380
|
25
|
|
|
|
|
31
|
my @res; |
|
381
|
25
|
|
|
|
|
80
|
my $wres = 0; # total width of text in @res |
|
382
|
25
|
50
|
|
|
|
84
|
my $l = int($w/2); $l = 1 if $l == 0; |
|
|
25
|
|
|
|
|
56
|
|
|
383
|
25
|
|
|
|
|
36
|
my $end = 0; |
|
384
|
25
|
|
|
|
|
33
|
while (1) { |
|
385
|
167
|
|
|
|
|
368
|
my $left = substr($text, 0, $l); |
|
386
|
167
|
50
|
|
|
|
374
|
my $right = $l > length($text) ? "" : substr($text, $l); |
|
387
|
167
|
50
|
|
|
|
329
|
my $wl = $is_mb ? mbswidth($left) : length($left); |
|
388
|
|
|
|
|
|
|
#say "D:left=$left, right=$right, wl=$wl"; |
|
389
|
167
|
100
|
|
|
|
2502
|
if ($wres + $wl > $width) { |
|
390
|
120
|
|
|
|
|
200
|
$text = $left; |
|
391
|
|
|
|
|
|
|
} else { |
|
392
|
47
|
|
|
|
|
89
|
push @res, $left; |
|
393
|
47
|
|
|
|
|
69
|
$wres += $wl; |
|
394
|
47
|
|
|
|
|
78
|
$c += length($left); |
|
395
|
47
|
|
|
|
|
86
|
$text = $right; |
|
396
|
|
|
|
|
|
|
} |
|
397
|
167
|
|
|
|
|
289
|
$l = int(($l+1)/2); |
|
398
|
|
|
|
|
|
|
#say "D:l=$l"; |
|
399
|
167
|
100
|
100
|
|
|
457
|
last if $l==1 && $end>1; |
|
400
|
142
|
100
|
|
|
|
300
|
$end++ if $l==1; |
|
401
|
|
|
|
|
|
|
} |
|
402
|
25
|
50
|
|
|
|
48
|
if ($return_width) { |
|
403
|
25
|
|
|
|
|
121
|
return [join("", @res), $wres, $c]; |
|
404
|
|
|
|
|
|
|
} else { |
|
405
|
0
|
|
|
|
|
0
|
return join("", @res); |
|
406
|
|
|
|
|
|
|
} |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub mbtrunc { |
|
410
|
25
|
|
|
25
|
1
|
56
|
_trunc(1, @_); |
|
411
|
|
|
|
|
|
|
} |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
sub trunc { |
|
414
|
0
|
|
|
0
|
1
|
|
_trunc(0, @_); |
|
415
|
|
|
|
|
|
|
} |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
1; |
|
418
|
|
|
|
|
|
|
# ABSTRACT: Routines for text containing wide characters |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
__END__ |