line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::WideChar::Util; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY |
4
|
|
|
|
|
|
|
our $DATE = '2021-04-14'; # DATE |
5
|
|
|
|
|
|
|
our $DIST = 'Text-WideChar-Util'; # DIST |
6
|
|
|
|
|
|
|
our $VERSION = '0.172'; # VERSION |
7
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
168574
|
use 5.010001; |
|
2
|
|
|
|
|
31
|
|
9
|
2
|
|
|
2
|
|
1093
|
use locale; |
|
2
|
|
|
|
|
1326
|
|
|
2
|
|
|
|
|
11
|
|
10
|
2
|
|
|
2
|
|
85
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
44
|
|
11
|
2
|
|
|
2
|
|
11
|
use utf8; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
14
|
|
12
|
2
|
|
|
2
|
|
41
|
use warnings; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
63
|
|
13
|
|
|
|
|
|
|
|
14
|
2
|
|
|
2
|
|
998
|
use Unicode::GCString; |
|
2
|
|
|
|
|
61177
|
|
|
2
|
|
|
|
|
1337
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
require Exporter; |
17
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
18
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
19
|
|
|
|
|
|
|
mbpad |
20
|
|
|
|
|
|
|
pad |
21
|
|
|
|
|
|
|
mbswidth |
22
|
|
|
|
|
|
|
mbswidth_height |
23
|
|
|
|
|
|
|
length_height |
24
|
|
|
|
|
|
|
mbtrunc |
25
|
|
|
|
|
|
|
trunc |
26
|
|
|
|
|
|
|
mbwrap |
27
|
|
|
|
|
|
|
wrap |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub mbswidth { |
31
|
244
|
|
|
244
|
1
|
876
|
Unicode::GCString->new($_[0])->columns; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub mbswidth_height { |
35
|
0
|
|
|
0
|
1
|
0
|
my $text = shift; |
36
|
0
|
|
|
|
|
0
|
my $num_lines = 0; |
37
|
0
|
|
|
|
|
0
|
my $len = 0; |
38
|
0
|
|
|
|
|
0
|
for my $e (split /(\r?\n)/, $text) { |
39
|
0
|
0
|
|
|
|
0
|
if ($e =~ /\n/) { |
40
|
0
|
|
|
|
|
0
|
$num_lines++; |
41
|
0
|
|
|
|
|
0
|
next; |
42
|
|
|
|
|
|
|
} |
43
|
0
|
0
|
|
|
|
0
|
$num_lines = 1 if $num_lines == 0; |
44
|
0
|
|
|
|
|
0
|
my $l = mbswidth($e); |
45
|
0
|
0
|
|
|
|
0
|
$len = $l if $len < $l; |
46
|
|
|
|
|
|
|
} |
47
|
0
|
|
|
|
|
0
|
[$len, $num_lines]; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub length_height { |
51
|
0
|
|
|
0
|
1
|
0
|
my $text = shift; |
52
|
0
|
|
|
|
|
0
|
my $num_lines = 0; |
53
|
0
|
|
|
|
|
0
|
my $len = 0; |
54
|
0
|
|
|
|
|
0
|
for my $e (split /(\r?\n)/, $text) { |
55
|
0
|
0
|
|
|
|
0
|
if ($e =~ /\n/) { |
56
|
0
|
|
|
|
|
0
|
$num_lines++; |
57
|
0
|
|
|
|
|
0
|
next; |
58
|
|
|
|
|
|
|
} |
59
|
0
|
0
|
|
|
|
0
|
$num_lines = 1 if $num_lines == 0; |
60
|
0
|
|
|
|
|
0
|
my $l = length($e); |
61
|
0
|
0
|
|
|
|
0
|
$len = $l if $len < $l; |
62
|
|
|
|
|
|
|
} |
63
|
0
|
|
|
|
|
0
|
[$len, $num_lines]; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub _get_indent_width { |
67
|
54
|
|
|
54
|
|
146
|
my ($is_mb, $indent, $tab_width) = @_; |
68
|
54
|
|
|
|
|
94
|
my $w = 0; |
69
|
54
|
|
|
|
|
163
|
for (split //, $indent) { |
70
|
83
|
100
|
|
|
|
224
|
if ($_ eq "\t") { |
71
|
|
|
|
|
|
|
# go to the next tab |
72
|
9
|
|
|
|
|
36
|
$w = $tab_width * (int($w/$tab_width) + 1); |
73
|
|
|
|
|
|
|
} else { |
74
|
74
|
100
|
|
|
|
156
|
$w += $is_mb ? mbswidth($_) : 1; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
} |
77
|
54
|
|
|
|
|
187
|
$w; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# 3002 = IDEOGRAPHIC FULL STOP |
81
|
|
|
|
|
|
|
# ff0c = FULLWIDTH COMMA |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
our $re_cjk = qr/(?: |
84
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility} |
85
|
|
|
|
|
|
|
| \p{Block=CJK_Compatibility_Forms} |
86
|
|
|
|
|
|
|
| \p{Block=CJK_Compatibility_Ideographs} |
87
|
|
|
|
|
|
|
| \p{Block=CJK_Compatibility_Ideographs_Supplement} |
88
|
|
|
|
|
|
|
| \p{Block=CJK_Radicals_Supplement} |
89
|
|
|
|
|
|
|
| \p{Block=CJK_Strokes} |
90
|
|
|
|
|
|
|
| \p{Block=CJK_Symbols_And_Punctuation} |
91
|
|
|
|
|
|
|
| \p{Block=CJK_Unified_Ideographs} |
92
|
|
|
|
|
|
|
| \p{Block=CJK_Unified_Ideographs_Extension_A} |
93
|
|
|
|
|
|
|
| \p{Block=CJK_Unified_Ideographs_Extension_B} |
94
|
|
|
|
|
|
|
| \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} |
95
|
|
|
|
|
|
|
#| \p{Block=CJK_Unified_Ideographs_Extension_C} |
96
|
|
|
|
|
|
|
[\x{3002}\x{ff0c}] |
97
|
2
|
|
|
2
|
|
20
|
)/x; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
34
|
|
98
|
|
|
|
|
|
|
our $re_cjk_class = qr/[ |
99
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility} |
100
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Forms} |
101
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs} |
102
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs_Supplement} |
103
|
|
|
|
|
|
|
\p{Block=CJK_Radicals_Supplement} |
104
|
|
|
|
|
|
|
\p{Block=CJK_Strokes} |
105
|
|
|
|
|
|
|
\p{Block=CJK_Symbols_And_Punctuation} |
106
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs} |
107
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_A} |
108
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_B} |
109
|
|
|
|
|
|
|
\p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} |
110
|
|
|
|
|
|
|
\x{3002} |
111
|
|
|
|
|
|
|
\x{ff0c} |
112
|
|
|
|
|
|
|
]/x; |
113
|
|
|
|
|
|
|
our $re_cjk_negclass = qr/[^ |
114
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility} |
115
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Forms} |
116
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs} |
117
|
|
|
|
|
|
|
\p{Block=CJK_Compatibility_Ideographs_Supplement} |
118
|
|
|
|
|
|
|
\p{Block=CJK_Radicals_Supplement} |
119
|
|
|
|
|
|
|
\p{Block=CJK_Strokes} |
120
|
|
|
|
|
|
|
\p{Block=CJK_Symbols_And_Punctuation} |
121
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs} |
122
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_A} |
123
|
|
|
|
|
|
|
\p{Block=CJK_Unified_Ideographs_Extension_B} |
124
|
|
|
|
|
|
|
\p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} |
125
|
|
|
|
|
|
|
\x{3002} |
126
|
|
|
|
|
|
|
\x{ff0c} |
127
|
|
|
|
|
|
|
]/x; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub _wrap { |
130
|
36
|
|
|
36
|
|
111
|
my ($is_mb, $text, $width, $opts) = @_; |
131
|
36
|
|
50
|
|
|
94
|
$width //= 80; |
132
|
36
|
|
100
|
|
|
131
|
$opts //= {}; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# our algorithm: split into paragraphs, then process each paragraph. at the |
135
|
|
|
|
|
|
|
# start of paragraph, determine indents (either from %opts, or deduced from |
136
|
|
|
|
|
|
|
# text, like in Emacs) then push first-line indent. proceed to push words, |
137
|
|
|
|
|
|
|
# while adding subsequent-line indent at the start of each line. |
138
|
|
|
|
|
|
|
|
139
|
36
|
|
50
|
|
|
136
|
my $tw = $opts->{tab_width} // 8; |
140
|
36
|
50
|
|
|
|
107
|
die "Please specify a positive tab width" unless $tw > 0; |
141
|
36
|
|
|
|
|
61
|
my $optfli = $opts->{flindent}; |
142
|
36
|
100
|
|
|
|
94
|
my $optfliw = defined $optfli ? _get_indent_width($is_mb, $optfli, $tw) : undef; |
143
|
36
|
|
|
|
|
64
|
my $optsli = $opts->{slindent}; |
144
|
36
|
100
|
|
|
|
70
|
my $optsliw = defined $optsli ? _get_indent_width($is_mb, $optsli, $tw) : undef; |
145
|
36
|
|
100
|
|
|
114
|
my $optkts = $opts->{keep_trailing_space} // 0; |
146
|
36
|
|
|
|
|
54
|
my @res; |
147
|
|
|
|
|
|
|
|
148
|
36
|
|
|
|
|
208
|
my @para = split /(\n(?:[ \t]*\n)+)/, $text; |
149
|
|
|
|
|
|
|
#say "D:para=[",join(", ", @para),"]"; |
150
|
|
|
|
|
|
|
|
151
|
36
|
|
|
|
|
69
|
my ($maxww, $minww); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
PARA: |
154
|
36
|
|
|
|
|
119
|
while (my ($ptext, $pbreak) = splice @para, 0, 2) { |
155
|
42
|
|
|
|
|
63
|
my $x = 0; |
156
|
42
|
|
|
|
|
62
|
my $y = 0; |
157
|
42
|
|
|
|
|
57
|
my $line_has_word = 0; |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# determine indents |
160
|
42
|
|
|
|
|
68
|
my ($fli, $sli, $fliw, $sliw); |
161
|
42
|
100
|
|
|
|
80
|
if (defined $optfli) { |
162
|
12
|
|
|
|
|
20
|
$fli = $optfli; |
163
|
12
|
|
|
|
|
20
|
$fliw = $optfliw; |
164
|
|
|
|
|
|
|
} else { |
165
|
|
|
|
|
|
|
# XXX emacs can also treat ' #' as indent, e.g. when wrapping |
166
|
|
|
|
|
|
|
# multi-line perl comment. |
167
|
30
|
|
|
|
|
236
|
($fli) = $ptext =~ /\A([ \t]*)\S/; |
168
|
30
|
50
|
|
|
|
93
|
if (defined $fli) { |
169
|
30
|
|
|
|
|
74
|
$fliw = _get_indent_width($is_mb, $fli, $tw); |
170
|
|
|
|
|
|
|
} else { |
171
|
0
|
|
|
|
|
0
|
$fli = ""; |
172
|
0
|
|
|
|
|
0
|
$fliw = 0; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
42
|
100
|
|
|
|
85
|
if (defined $optsli) { |
176
|
5
|
|
|
|
|
10
|
$sli = $optsli; |
177
|
5
|
|
|
|
|
9
|
$sliw = $optsliw; |
178
|
|
|
|
|
|
|
} else { |
179
|
37
|
|
|
|
|
101
|
($sli) = $ptext =~ /\A[^\n]*\S[\n]([ \t+]*)\S/; |
180
|
37
|
100
|
|
|
|
77
|
if (defined $sli) { |
181
|
9
|
|
|
|
|
22
|
$sliw = _get_indent_width($is_mb, $sli, $tw); |
182
|
|
|
|
|
|
|
} else { |
183
|
28
|
|
|
|
|
48
|
$sli = ""; |
184
|
28
|
|
|
|
|
47
|
$sliw = 0; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
} |
187
|
42
|
50
|
|
|
|
102
|
die "Subsequent indent must be less than width" if $sliw >= $width; |
188
|
|
|
|
|
|
|
|
189
|
42
|
|
|
|
|
95
|
push @res, $fli; |
190
|
42
|
|
|
|
|
77
|
$x += $fliw; |
191
|
|
|
|
|
|
|
|
192
|
42
|
|
|
|
|
54
|
my @words0; # (WORD1, WORD1_IS_CJK?, WS_AFTER?, WORD2, WORD2_IS_CJK?, WS_AFTER?, ...) |
193
|
|
|
|
|
|
|
# we differentiate/split between CJK "word" (cluster of CJK letters, |
194
|
|
|
|
|
|
|
# really) and non-CJK word, e.g. "我很爱你my可爱的and beautiful, |
195
|
|
|
|
|
|
|
# beautiful wife" is split to ["我很爱你", "my", "可爱的", "and", |
196
|
|
|
|
|
|
|
# "beautiful,", "beautiful", "wife"]. we do this because CJK word can be |
197
|
|
|
|
|
|
|
# line-broken on a per-letter basis, as they don't separate words with |
198
|
|
|
|
|
|
|
# whitespaces. |
199
|
42
|
|
|
|
|
810
|
while ($ptext =~ /(?: ($re_cjk+)|(\S+) ) (\s*)/gox) { |
200
|
410
|
100
|
|
|
|
2613
|
my $ws_after = $3 ? 1:0; |
201
|
410
|
100
|
|
|
|
736
|
if ($1) { |
202
|
3
|
|
|
|
|
22
|
push @words0, $1, 1, $ws_after; |
203
|
|
|
|
|
|
|
} else { |
204
|
407
|
|
|
|
|
809
|
my $ptext2 = $2; |
205
|
407
|
|
|
|
|
1341
|
while ($ptext2 =~ /($re_cjk_class+)| |
206
|
|
|
|
|
|
|
($re_cjk_negclass+)/gox) { |
207
|
412
|
100
|
|
|
|
3656
|
if ($1) { |
208
|
3
|
|
|
|
|
39
|
push @words0, $1, 1, 0; |
209
|
|
|
|
|
|
|
} else { |
210
|
409
|
|
|
|
|
2150
|
push @words0, $2, 0, 0; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} |
213
|
407
|
|
|
|
|
3900
|
$words0[-1] = $ws_after; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# process each word |
218
|
42
|
|
|
|
|
78
|
my $prev_ws_after; |
219
|
42
|
|
|
|
|
90
|
while (@words0) { |
220
|
415
|
|
|
|
|
829
|
my ($word0, $is_cjk, $ws_after) = splice @words0, 0, 3; |
221
|
415
|
|
|
|
|
632
|
my @words; |
222
|
|
|
|
|
|
|
my @wordsw; |
223
|
415
|
|
|
|
|
544
|
while (1) { |
224
|
431
|
100
|
|
|
|
739
|
my $wordw = $is_mb ? mbswidth($word0) : length($word0); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# long cjk word is not truncated here because it will be |
227
|
|
|
|
|
|
|
# line-broken later when wrapping. |
228
|
431
|
100
|
100
|
|
|
1422
|
if ($wordw <= $width-$sliw || $is_cjk) { |
229
|
415
|
|
|
|
|
698
|
push @words , $word0; |
230
|
415
|
|
|
|
|
574
|
push @wordsw, $wordw; |
231
|
415
|
|
|
|
|
786
|
last; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
# truncate long word |
234
|
16
|
100
|
|
|
|
34
|
if ($is_mb) { |
235
|
7
|
|
|
|
|
19
|
my $res = mbtrunc($word0, $width-$sliw, 1); |
236
|
7
|
|
|
|
|
15
|
push @words , $res->[0]; |
237
|
7
|
|
|
|
|
13
|
push @wordsw, $res->[1]; |
238
|
7
|
|
|
|
|
21
|
$word0 = substr($word0, length($res->[0])); |
239
|
|
|
|
|
|
|
#say "D:truncated long word (mb): $text -> $res->[0] & $res->[1], word0=$word0"; |
240
|
|
|
|
|
|
|
} else { |
241
|
9
|
|
|
|
|
19
|
my $w2 = substr($word0, 0, $width-$sliw); |
242
|
9
|
|
|
|
|
15
|
push @words , $w2; |
243
|
9
|
|
|
|
|
13
|
push @wordsw, $width-$sliw; |
244
|
9
|
|
|
|
|
20
|
$word0 = substr($word0, $width-$sliw); |
245
|
|
|
|
|
|
|
#say "D:truncated long word: $w2, ".($width-$sliw).", word0=$word0"; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
415
|
|
|
|
|
640
|
for my $word (@words) { |
250
|
431
|
|
|
|
|
611
|
my $wordw = shift @wordsw; |
251
|
|
|
|
|
|
|
#say "D:x=$x word=$word is_cjk=$is_cjk ws_after=$ws_after wordw=$wordw line_has_word=$line_has_word width=$width"; |
252
|
|
|
|
|
|
|
|
253
|
431
|
100
|
100
|
|
|
1356
|
$maxww = $wordw if !defined($maxww) || $maxww < $wordw; |
254
|
431
|
100
|
100
|
|
|
1156
|
$minww = $wordw if !defined($minww) || $minww > $wordw; |
255
|
|
|
|
|
|
|
|
256
|
431
|
100
|
|
|
|
801
|
my $x_after_word = $x + ($line_has_word ? 1:0) + $wordw; |
257
|
431
|
100
|
|
|
|
702
|
if ($x_after_word <= $width) { |
258
|
|
|
|
|
|
|
# the addition of word hasn't exceeded column width |
259
|
357
|
100
|
|
|
|
661
|
if ($line_has_word) { |
260
|
319
|
100
|
|
|
|
521
|
if ($prev_ws_after) { |
261
|
317
|
|
|
|
|
473
|
push @res, " "; |
262
|
317
|
|
|
|
|
420
|
$x++; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
} |
265
|
357
|
|
|
|
|
541
|
push @res, $word; |
266
|
357
|
|
|
|
|
532
|
$x += $wordw; |
267
|
|
|
|
|
|
|
} else { |
268
|
74
|
|
|
|
|
102
|
while (1) { |
269
|
87
|
100
|
|
|
|
167
|
if ($is_cjk) { |
270
|
|
|
|
|
|
|
# CJK word can be broken |
271
|
18
|
|
|
|
|
19
|
my $res; |
272
|
18
|
100
|
|
|
|
40
|
if ($prev_ws_after) { |
273
|
2
|
|
|
|
|
6
|
$res = mbtrunc($word, $width - $x - 1, 1); |
274
|
2
|
|
|
|
|
7
|
push @res, " ", $res->[0]; |
275
|
|
|
|
|
|
|
} else { |
276
|
16
|
|
|
|
|
35
|
$res = mbtrunc($word, $width - $x, 1); |
277
|
16
|
|
|
|
|
34
|
push @res, $res->[0]; |
278
|
|
|
|
|
|
|
} |
279
|
18
|
|
|
|
|
56
|
my $word2 = substr($word, length($res->[0])); |
280
|
|
|
|
|
|
|
#say "D:truncated CJK word: $word -> $res->[0] & $res->[1], remaining=$word2"; |
281
|
18
|
|
|
|
|
59
|
$prev_ws_after = 0; |
282
|
18
|
|
|
|
|
33
|
$word = $word2; |
283
|
18
|
|
|
|
|
36
|
$wordw = mbswidth($word); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# move the word to the next line |
287
|
87
|
100
|
100
|
|
|
532
|
push @res, " " if $prev_ws_after && $optkts; |
288
|
87
|
|
|
|
|
197
|
push @res, "\n", $sli; |
289
|
87
|
|
|
|
|
149
|
$y++; |
290
|
|
|
|
|
|
|
|
291
|
87
|
100
|
|
|
|
160
|
if ($sliw + $wordw <= $width) { |
292
|
74
|
|
|
|
|
123
|
push @res, $word; |
293
|
74
|
|
|
|
|
99
|
$x = $sliw + $wordw; |
294
|
74
|
|
|
|
|
134
|
last; |
295
|
|
|
|
|
|
|
} else { |
296
|
|
|
|
|
|
|
# still too long, truncate again |
297
|
13
|
|
|
|
|
25
|
$x = $sliw; |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
} |
301
|
431
|
|
|
|
|
767
|
$line_has_word++; |
302
|
|
|
|
|
|
|
} |
303
|
415
|
|
|
|
|
912
|
$prev_ws_after = $ws_after; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
42
|
100
|
|
|
|
89
|
if (defined $pbreak) { |
307
|
7
|
|
|
|
|
30
|
push @res, $pbreak; |
308
|
|
|
|
|
|
|
} else { |
309
|
35
|
100
|
|
|
|
287
|
push @res, "\n" if $ptext =~ /\n[ \t]*\z/; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
36
|
100
|
|
|
|
96
|
if ($opts->{return_stats}) { |
314
|
1
|
|
|
|
|
15
|
return [join("", @res), { |
315
|
|
|
|
|
|
|
max_word_width => $maxww, |
316
|
|
|
|
|
|
|
min_word_width => $minww, |
317
|
|
|
|
|
|
|
}]; |
318
|
|
|
|
|
|
|
} else { |
319
|
35
|
|
|
|
|
426
|
return join("", @res); |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub mbwrap { |
324
|
7
|
|
|
7
|
1
|
8158
|
_wrap(1, @_); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub wrap { |
328
|
29
|
|
|
29
|
1
|
18285
|
_wrap(0, @_); |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
sub _pad { |
332
|
0
|
|
|
0
|
|
0
|
my ($is_mb, $text, $width, $which, $padchar, $is_trunc) = @_; |
333
|
0
|
0
|
|
|
|
0
|
if ($which) { |
334
|
0
|
|
|
|
|
0
|
$which = substr($which, 0, 1); |
335
|
|
|
|
|
|
|
} else { |
336
|
0
|
|
|
|
|
0
|
$which = "r"; |
337
|
|
|
|
|
|
|
} |
338
|
0
|
|
0
|
|
|
0
|
$padchar //= " "; |
339
|
|
|
|
|
|
|
|
340
|
0
|
0
|
|
|
|
0
|
my $w = $is_mb ? mbswidth($text) : length($text); |
341
|
0
|
0
|
0
|
|
|
0
|
if ($is_trunc && $w > $width) { |
342
|
0
|
|
|
|
|
0
|
my $res = mbtrunc($text, $width, 1); |
343
|
0
|
|
|
|
|
0
|
$text = $res->[0] . ($padchar x ($width-$res->[1])); |
344
|
|
|
|
|
|
|
} else { |
345
|
0
|
0
|
|
|
|
0
|
if ($which eq 'l') { |
|
|
0
|
|
|
|
|
|
346
|
0
|
|
|
|
|
0
|
$text = ($padchar x ($width-$w)) . $text; |
347
|
|
|
|
|
|
|
} elsif ($which eq 'c') { |
348
|
0
|
|
|
|
|
0
|
my $n = int(($width-$w)/2); |
349
|
0
|
|
|
|
|
0
|
$text = ($padchar x $n) . $text . ($padchar x ($width-$w-$n)); |
350
|
|
|
|
|
|
|
} else { |
351
|
0
|
|
|
|
|
0
|
$text .= ($padchar x ($width-$w)); |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
} |
354
|
0
|
|
|
|
|
0
|
$text; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub mbpad { |
358
|
0
|
|
|
0
|
1
|
0
|
_pad(1, @_); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub pad { |
362
|
0
|
|
|
0
|
1
|
0
|
_pad(0, @_); |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub _trunc { |
366
|
25
|
|
|
25
|
|
56
|
my ($is_mb, $text, $width, $return_width) = @_; |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# return_width (undocumented): if set to 1, will return [truncated_text, |
369
|
|
|
|
|
|
|
# visual width, length(chars) up to truncation point] |
370
|
|
|
|
|
|
|
|
371
|
25
|
50
|
|
|
|
57
|
my $w = $is_mb ? mbswidth($text) : length($text); |
372
|
25
|
50
|
|
|
|
446
|
die "Invalid argument, width must not be negative" unless $width >= 0; |
373
|
25
|
50
|
|
|
|
53
|
if ($w <= $width) { |
374
|
0
|
0
|
|
|
|
0
|
return $return_width ? [$text, $w, length($text)] : $text; |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
25
|
|
|
|
|
42
|
my $c = 0; |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# perform binary cutting |
380
|
25
|
|
|
|
|
31
|
my @res; |
381
|
25
|
|
|
|
|
80
|
my $wres = 0; # total width of text in @res |
382
|
25
|
50
|
|
|
|
84
|
my $l = int($w/2); $l = 1 if $l == 0; |
|
25
|
|
|
|
|
56
|
|
383
|
25
|
|
|
|
|
36
|
my $end = 0; |
384
|
25
|
|
|
|
|
33
|
while (1) { |
385
|
167
|
|
|
|
|
368
|
my $left = substr($text, 0, $l); |
386
|
167
|
50
|
|
|
|
374
|
my $right = $l > length($text) ? "" : substr($text, $l); |
387
|
167
|
50
|
|
|
|
329
|
my $wl = $is_mb ? mbswidth($left) : length($left); |
388
|
|
|
|
|
|
|
#say "D:left=$left, right=$right, wl=$wl"; |
389
|
167
|
100
|
|
|
|
2502
|
if ($wres + $wl > $width) { |
390
|
120
|
|
|
|
|
200
|
$text = $left; |
391
|
|
|
|
|
|
|
} else { |
392
|
47
|
|
|
|
|
89
|
push @res, $left; |
393
|
47
|
|
|
|
|
69
|
$wres += $wl; |
394
|
47
|
|
|
|
|
78
|
$c += length($left); |
395
|
47
|
|
|
|
|
86
|
$text = $right; |
396
|
|
|
|
|
|
|
} |
397
|
167
|
|
|
|
|
289
|
$l = int(($l+1)/2); |
398
|
|
|
|
|
|
|
#say "D:l=$l"; |
399
|
167
|
100
|
100
|
|
|
457
|
last if $l==1 && $end>1; |
400
|
142
|
100
|
|
|
|
300
|
$end++ if $l==1; |
401
|
|
|
|
|
|
|
} |
402
|
25
|
50
|
|
|
|
48
|
if ($return_width) { |
403
|
25
|
|
|
|
|
121
|
return [join("", @res), $wres, $c]; |
404
|
|
|
|
|
|
|
} else { |
405
|
0
|
|
|
|
|
0
|
return join("", @res); |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub mbtrunc { |
410
|
25
|
|
|
25
|
1
|
56
|
_trunc(1, @_); |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
sub trunc { |
414
|
0
|
|
|
0
|
1
|
|
_trunc(0, @_); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
1; |
418
|
|
|
|
|
|
|
# ABSTRACT: Routines for text containing wide characters |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
__END__ |