| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Text::WideChar::Util; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY | 
| 4 |  |  |  |  |  |  | our $DATE = '2021-04-14'; # DATE | 
| 5 |  |  |  |  |  |  | our $DIST = 'Text-WideChar-Util'; # DIST | 
| 6 |  |  |  |  |  |  | our $VERSION = '0.172'; # VERSION | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 2 |  |  | 2 |  | 168574 | use 5.010001; | 
|  | 2 |  |  |  |  | 31 |  | 
| 9 | 2 |  |  | 2 |  | 1093 | use locale; | 
|  | 2 |  |  |  |  | 1326 |  | 
|  | 2 |  |  |  |  | 11 |  | 
| 10 | 2 |  |  | 2 |  | 85 | use strict; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 44 |  | 
| 11 | 2 |  |  | 2 |  | 11 | use utf8; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 14 |  | 
| 12 | 2 |  |  | 2 |  | 41 | use warnings; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 63 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 2 |  |  | 2 |  | 998 | use Unicode::GCString; | 
|  | 2 |  |  |  |  | 61177 |  | 
|  | 2 |  |  |  |  | 1337 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | require Exporter; | 
| 17 |  |  |  |  |  |  | our @ISA       = qw(Exporter); | 
| 18 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 19 |  |  |  |  |  |  | mbpad | 
| 20 |  |  |  |  |  |  | pad | 
| 21 |  |  |  |  |  |  | mbswidth | 
| 22 |  |  |  |  |  |  | mbswidth_height | 
| 23 |  |  |  |  |  |  | length_height | 
| 24 |  |  |  |  |  |  | mbtrunc | 
| 25 |  |  |  |  |  |  | trunc | 
| 26 |  |  |  |  |  |  | mbwrap | 
| 27 |  |  |  |  |  |  | wrap | 
| 28 |  |  |  |  |  |  | ); | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | sub mbswidth { | 
| 31 | 244 |  |  | 244 | 1 | 876 | Unicode::GCString->new($_[0])->columns; | 
| 32 |  |  |  |  |  |  | } | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | sub mbswidth_height { | 
| 35 | 0 |  |  | 0 | 1 | 0 | my $text = shift; | 
| 36 | 0 |  |  |  |  | 0 | my $num_lines = 0; | 
| 37 | 0 |  |  |  |  | 0 | my $len = 0; | 
| 38 | 0 |  |  |  |  | 0 | for my $e (split /(\r?\n)/, $text) { | 
| 39 | 0 | 0 |  |  |  | 0 | if ($e =~ /\n/) { | 
| 40 | 0 |  |  |  |  | 0 | $num_lines++; | 
| 41 | 0 |  |  |  |  | 0 | next; | 
| 42 |  |  |  |  |  |  | } | 
| 43 | 0 | 0 |  |  |  | 0 | $num_lines = 1 if $num_lines == 0; | 
| 44 | 0 |  |  |  |  | 0 | my $l = mbswidth($e); | 
| 45 | 0 | 0 |  |  |  | 0 | $len = $l if $len < $l; | 
| 46 |  |  |  |  |  |  | } | 
| 47 | 0 |  |  |  |  | 0 | [$len, $num_lines]; | 
| 48 |  |  |  |  |  |  | } | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | sub length_height { | 
| 51 | 0 |  |  | 0 | 1 | 0 | my $text = shift; | 
| 52 | 0 |  |  |  |  | 0 | my $num_lines = 0; | 
| 53 | 0 |  |  |  |  | 0 | my $len = 0; | 
| 54 | 0 |  |  |  |  | 0 | for my $e (split /(\r?\n)/, $text) { | 
| 55 | 0 | 0 |  |  |  | 0 | if ($e =~ /\n/) { | 
| 56 | 0 |  |  |  |  | 0 | $num_lines++; | 
| 57 | 0 |  |  |  |  | 0 | next; | 
| 58 |  |  |  |  |  |  | } | 
| 59 | 0 | 0 |  |  |  | 0 | $num_lines = 1 if $num_lines == 0; | 
| 60 | 0 |  |  |  |  | 0 | my $l = length($e); | 
| 61 | 0 | 0 |  |  |  | 0 | $len = $l if $len < $l; | 
| 62 |  |  |  |  |  |  | } | 
| 63 | 0 |  |  |  |  | 0 | [$len, $num_lines]; | 
| 64 |  |  |  |  |  |  | } | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | sub _get_indent_width { | 
| 67 | 54 |  |  | 54 |  | 146 | my ($is_mb, $indent, $tab_width) = @_; | 
| 68 | 54 |  |  |  |  | 94 | my $w = 0; | 
| 69 | 54 |  |  |  |  | 163 | for (split //, $indent) { | 
| 70 | 83 | 100 |  |  |  | 224 | if ($_ eq "\t") { | 
| 71 |  |  |  |  |  |  | # go to the next tab | 
| 72 | 9 |  |  |  |  | 36 | $w = $tab_width * (int($w/$tab_width) + 1); | 
| 73 |  |  |  |  |  |  | } else { | 
| 74 | 74 | 100 |  |  |  | 156 | $w += $is_mb ? mbswidth($_) : 1; | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  | } | 
| 77 | 54 |  |  |  |  | 187 | $w; | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | # 3002 = IDEOGRAPHIC FULL STOP | 
| 81 |  |  |  |  |  |  | # ff0c = FULLWIDTH COMMA | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | our $re_cjk = qr/(?: | 
| 84 |  |  |  |  |  |  | \p{Block=CJK_Compatibility} | 
| 85 |  |  |  |  |  |  | |   \p{Block=CJK_Compatibility_Forms} | 
| 86 |  |  |  |  |  |  | |   \p{Block=CJK_Compatibility_Ideographs} | 
| 87 |  |  |  |  |  |  | |   \p{Block=CJK_Compatibility_Ideographs_Supplement} | 
| 88 |  |  |  |  |  |  | |   \p{Block=CJK_Radicals_Supplement} | 
| 89 |  |  |  |  |  |  | |   \p{Block=CJK_Strokes} | 
| 90 |  |  |  |  |  |  | |   \p{Block=CJK_Symbols_And_Punctuation} | 
| 91 |  |  |  |  |  |  | |   \p{Block=CJK_Unified_Ideographs} | 
| 92 |  |  |  |  |  |  | |   \p{Block=CJK_Unified_Ideographs_Extension_A} | 
| 93 |  |  |  |  |  |  | |   \p{Block=CJK_Unified_Ideographs_Extension_B} | 
| 94 |  |  |  |  |  |  | |   \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} | 
| 95 |  |  |  |  |  |  | #|   \p{Block=CJK_Unified_Ideographs_Extension_C} | 
| 96 |  |  |  |  |  |  | [\x{3002}\x{ff0c}] | 
| 97 | 2 |  |  | 2 |  | 20 | )/x; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 34 |  | 
| 98 |  |  |  |  |  |  | our $re_cjk_class = qr/[ | 
| 99 |  |  |  |  |  |  | \p{Block=CJK_Compatibility} | 
| 100 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Forms} | 
| 101 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs} | 
| 102 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs_Supplement} | 
| 103 |  |  |  |  |  |  | \p{Block=CJK_Radicals_Supplement} | 
| 104 |  |  |  |  |  |  | \p{Block=CJK_Strokes} | 
| 105 |  |  |  |  |  |  | \p{Block=CJK_Symbols_And_Punctuation} | 
| 106 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs} | 
| 107 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_A} | 
| 108 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_B} | 
| 109 |  |  |  |  |  |  | \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} | 
| 110 |  |  |  |  |  |  | \x{3002} | 
| 111 |  |  |  |  |  |  | \x{ff0c} | 
| 112 |  |  |  |  |  |  | ]/x; | 
| 113 |  |  |  |  |  |  | our $re_cjk_negclass = qr/[^ | 
| 114 |  |  |  |  |  |  | \p{Block=CJK_Compatibility} | 
| 115 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Forms} | 
| 116 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs} | 
| 117 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs_Supplement} | 
| 118 |  |  |  |  |  |  | \p{Block=CJK_Radicals_Supplement} | 
| 119 |  |  |  |  |  |  | \p{Block=CJK_Strokes} | 
| 120 |  |  |  |  |  |  | \p{Block=CJK_Symbols_And_Punctuation} | 
| 121 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs} | 
| 122 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_A} | 
| 123 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_B} | 
| 124 |  |  |  |  |  |  | \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} | 
| 125 |  |  |  |  |  |  | \x{3002} | 
| 126 |  |  |  |  |  |  | \x{ff0c} | 
| 127 |  |  |  |  |  |  | ]/x; | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | sub _wrap { | 
| 130 | 36 |  |  | 36 |  | 111 | my ($is_mb, $text, $width, $opts) = @_; | 
| 131 | 36 |  | 50 |  |  | 94 | $width //= 80; | 
| 132 | 36 |  | 100 |  |  | 131 | $opts  //= {}; | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | # our algorithm: split into paragraphs, then process each paragraph. at the | 
| 135 |  |  |  |  |  |  | # start of paragraph, determine indents (either from %opts, or deduced from | 
| 136 |  |  |  |  |  |  | # text, like in Emacs) then push first-line indent. proceed to push words, | 
| 137 |  |  |  |  |  |  | # while adding subsequent-line indent at the start of each line. | 
| 138 |  |  |  |  |  |  |  | 
| 139 | 36 |  | 50 |  |  | 136 | my $tw = $opts->{tab_width} // 8; | 
| 140 | 36 | 50 |  |  |  | 107 | die "Please specify a positive tab width" unless $tw > 0; | 
| 141 | 36 |  |  |  |  | 61 | my $optfli  = $opts->{flindent}; | 
| 142 | 36 | 100 |  |  |  | 94 | my $optfliw = defined $optfli ? _get_indent_width($is_mb, $optfli, $tw) : undef; | 
| 143 | 36 |  |  |  |  | 64 | my $optsli  = $opts->{slindent}; | 
| 144 | 36 | 100 |  |  |  | 70 | my $optsliw = defined $optsli ? _get_indent_width($is_mb, $optsli, $tw) : undef; | 
| 145 | 36 |  | 100 |  |  | 114 | my $optkts  = $opts->{keep_trailing_space} // 0; | 
| 146 | 36 |  |  |  |  | 54 | my @res; | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 36 |  |  |  |  | 208 | my @para = split /(\n(?:[ \t]*\n)+)/, $text; | 
| 149 |  |  |  |  |  |  | #say "D:para=[",join(", ", @para),"]"; | 
| 150 |  |  |  |  |  |  |  | 
| 151 | 36 |  |  |  |  | 69 | my ($maxww, $minww); | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | PARA: | 
| 154 | 36 |  |  |  |  | 119 | while (my ($ptext, $pbreak) = splice @para, 0, 2) { | 
| 155 | 42 |  |  |  |  | 63 | my $x = 0; | 
| 156 | 42 |  |  |  |  | 62 | my $y = 0; | 
| 157 | 42 |  |  |  |  | 57 | my $line_has_word = 0; | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | # determine indents | 
| 160 | 42 |  |  |  |  | 68 | my ($fli, $sli, $fliw, $sliw); | 
| 161 | 42 | 100 |  |  |  | 80 | if (defined $optfli) { | 
| 162 | 12 |  |  |  |  | 20 | $fli  = $optfli; | 
| 163 | 12 |  |  |  |  | 20 | $fliw = $optfliw; | 
| 164 |  |  |  |  |  |  | } else { | 
| 165 |  |  |  |  |  |  | # XXX emacs can also treat ' #' as indent, e.g. when wrapping | 
| 166 |  |  |  |  |  |  | # multi-line perl comment. | 
| 167 | 30 |  |  |  |  | 236 | ($fli) = $ptext =~ /\A([ \t]*)\S/; | 
| 168 | 30 | 50 |  |  |  | 93 | if (defined $fli) { | 
| 169 | 30 |  |  |  |  | 74 | $fliw = _get_indent_width($is_mb, $fli, $tw); | 
| 170 |  |  |  |  |  |  | } else { | 
| 171 | 0 |  |  |  |  | 0 | $fli  = ""; | 
| 172 | 0 |  |  |  |  | 0 | $fliw = 0; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  | } | 
| 175 | 42 | 100 |  |  |  | 85 | if (defined $optsli) { | 
| 176 | 5 |  |  |  |  | 10 | $sli  = $optsli; | 
| 177 | 5 |  |  |  |  | 9 | $sliw = $optsliw; | 
| 178 |  |  |  |  |  |  | } else { | 
| 179 | 37 |  |  |  |  | 101 | ($sli) = $ptext =~ /\A[^\n]*\S[\n]([ \t+]*)\S/; | 
| 180 | 37 | 100 |  |  |  | 77 | if (defined $sli) { | 
| 181 | 9 |  |  |  |  | 22 | $sliw = _get_indent_width($is_mb, $sli, $tw); | 
| 182 |  |  |  |  |  |  | } else { | 
| 183 | 28 |  |  |  |  | 48 | $sli  = ""; | 
| 184 | 28 |  |  |  |  | 47 | $sliw = 0; | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  | } | 
| 187 | 42 | 50 |  |  |  | 102 | die "Subsequent indent must be less than width" if $sliw >= $width; | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 42 |  |  |  |  | 95 | push @res, $fli; | 
| 190 | 42 |  |  |  |  | 77 | $x += $fliw; | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 42 |  |  |  |  | 54 | my @words0; # (WORD1, WORD1_IS_CJK?, WS_AFTER?, WORD2, WORD2_IS_CJK?, WS_AFTER?, ...) | 
| 193 |  |  |  |  |  |  | # we differentiate/split between CJK "word" (cluster of CJK letters, | 
| 194 |  |  |  |  |  |  | # really) and non-CJK word, e.g. "我很爱你my可爱的and beautiful, | 
| 195 |  |  |  |  |  |  | # beautiful wife" is split to ["我很爱你", "my", "可爱的", "and", | 
| 196 |  |  |  |  |  |  | # "beautiful,", "beautiful", "wife"]. we do this because CJK word can be | 
| 197 |  |  |  |  |  |  | # line-broken on a per-letter basis, as they don't separate words with | 
| 198 |  |  |  |  |  |  | # whitespaces. | 
| 199 | 42 |  |  |  |  | 810 | while ($ptext =~ /(?: ($re_cjk+)|(\S+) ) (\s*)/gox) { | 
| 200 | 410 | 100 |  |  |  | 2613 | my $ws_after = $3 ? 1:0; | 
| 201 | 410 | 100 |  |  |  | 736 | if ($1) { | 
| 202 | 3 |  |  |  |  | 22 | push @words0, $1, 1, $ws_after; | 
| 203 |  |  |  |  |  |  | } else { | 
| 204 | 407 |  |  |  |  | 809 | my $ptext2 = $2; | 
| 205 | 407 |  |  |  |  | 1341 | while ($ptext2 =~ /($re_cjk_class+)| | 
| 206 |  |  |  |  |  |  | ($re_cjk_negclass+)/gox) { | 
| 207 | 412 | 100 |  |  |  | 3656 | if ($1) { | 
| 208 | 3 |  |  |  |  | 39 | push @words0, $1, 1, 0; | 
| 209 |  |  |  |  |  |  | } else { | 
| 210 | 409 |  |  |  |  | 2150 | push @words0, $2, 0, 0; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  | } | 
| 213 | 407 |  |  |  |  | 3900 | $words0[-1] = $ws_after; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  | } | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | # process each word | 
| 218 | 42 |  |  |  |  | 78 | my $prev_ws_after; | 
| 219 | 42 |  |  |  |  | 90 | while (@words0) { | 
| 220 | 415 |  |  |  |  | 829 | my ($word0, $is_cjk, $ws_after) = splice @words0, 0, 3; | 
| 221 | 415 |  |  |  |  | 632 | my @words; | 
| 222 |  |  |  |  |  |  | my @wordsw; | 
| 223 | 415 |  |  |  |  | 544 | while (1) { | 
| 224 | 431 | 100 |  |  |  | 739 | my $wordw = $is_mb ? mbswidth($word0) : length($word0); | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | # long cjk word is not truncated here because it will be | 
| 227 |  |  |  |  |  |  | # line-broken later when wrapping. | 
| 228 | 431 | 100 | 100 |  |  | 1422 | if ($wordw <= $width-$sliw || $is_cjk) { | 
| 229 | 415 |  |  |  |  | 698 | push @words , $word0; | 
| 230 | 415 |  |  |  |  | 574 | push @wordsw, $wordw; | 
| 231 | 415 |  |  |  |  | 786 | last; | 
| 232 |  |  |  |  |  |  | } | 
| 233 |  |  |  |  |  |  | # truncate long word | 
| 234 | 16 | 100 |  |  |  | 34 | if ($is_mb) { | 
| 235 | 7 |  |  |  |  | 19 | my $res = mbtrunc($word0, $width-$sliw, 1); | 
| 236 | 7 |  |  |  |  | 15 | push @words , $res->[0]; | 
| 237 | 7 |  |  |  |  | 13 | push @wordsw, $res->[1]; | 
| 238 | 7 |  |  |  |  | 21 | $word0 = substr($word0, length($res->[0])); | 
| 239 |  |  |  |  |  |  | #say "D:truncated long word (mb): $text -> $res->[0] & $res->[1], word0=$word0"; | 
| 240 |  |  |  |  |  |  | } else { | 
| 241 | 9 |  |  |  |  | 19 | my $w2 = substr($word0, 0, $width-$sliw); | 
| 242 | 9 |  |  |  |  | 15 | push @words , $w2; | 
| 243 | 9 |  |  |  |  | 13 | push @wordsw, $width-$sliw; | 
| 244 | 9 |  |  |  |  | 20 | $word0 = substr($word0, $width-$sliw); | 
| 245 |  |  |  |  |  |  | #say "D:truncated long word: $w2, ".($width-$sliw).", word0=$word0"; | 
| 246 |  |  |  |  |  |  | } | 
| 247 |  |  |  |  |  |  | } | 
| 248 |  |  |  |  |  |  |  | 
| 249 | 415 |  |  |  |  | 640 | for my $word (@words) { | 
| 250 | 431 |  |  |  |  | 611 | my $wordw = shift @wordsw; | 
| 251 |  |  |  |  |  |  | #say "D:x=$x word=$word is_cjk=$is_cjk ws_after=$ws_after wordw=$wordw line_has_word=$line_has_word width=$width"; | 
| 252 |  |  |  |  |  |  |  | 
| 253 | 431 | 100 | 100 |  |  | 1356 | $maxww = $wordw if !defined($maxww) || $maxww < $wordw; | 
| 254 | 431 | 100 | 100 |  |  | 1156 | $minww = $wordw if !defined($minww) || $minww > $wordw; | 
| 255 |  |  |  |  |  |  |  | 
| 256 | 431 | 100 |  |  |  | 801 | my $x_after_word = $x + ($line_has_word ? 1:0) + $wordw; | 
| 257 | 431 | 100 |  |  |  | 702 | if ($x_after_word <= $width) { | 
| 258 |  |  |  |  |  |  | # the addition of word hasn't exceeded column width | 
| 259 | 357 | 100 |  |  |  | 661 | if ($line_has_word) { | 
| 260 | 319 | 100 |  |  |  | 521 | if ($prev_ws_after) { | 
| 261 | 317 |  |  |  |  | 473 | push @res, " "; | 
| 262 | 317 |  |  |  |  | 420 | $x++; | 
| 263 |  |  |  |  |  |  | } | 
| 264 |  |  |  |  |  |  | } | 
| 265 | 357 |  |  |  |  | 541 | push @res, $word; | 
| 266 | 357 |  |  |  |  | 532 | $x += $wordw; | 
| 267 |  |  |  |  |  |  | } else { | 
| 268 | 74 |  |  |  |  | 102 | while (1) { | 
| 269 | 87 | 100 |  |  |  | 167 | if ($is_cjk) { | 
| 270 |  |  |  |  |  |  | # CJK word can be broken | 
| 271 | 18 |  |  |  |  | 19 | my $res; | 
| 272 | 18 | 100 |  |  |  | 40 | if ($prev_ws_after) { | 
| 273 | 2 |  |  |  |  | 6 | $res = mbtrunc($word, $width - $x - 1, 1); | 
| 274 | 2 |  |  |  |  | 7 | push @res, " ", $res->[0]; | 
| 275 |  |  |  |  |  |  | } else { | 
| 276 | 16 |  |  |  |  | 35 | $res = mbtrunc($word, $width - $x, 1); | 
| 277 | 16 |  |  |  |  | 34 | push @res, $res->[0]; | 
| 278 |  |  |  |  |  |  | } | 
| 279 | 18 |  |  |  |  | 56 | my $word2 = substr($word, length($res->[0])); | 
| 280 |  |  |  |  |  |  | #say "D:truncated CJK word: $word -> $res->[0] & $res->[1], remaining=$word2"; | 
| 281 | 18 |  |  |  |  | 59 | $prev_ws_after = 0; | 
| 282 | 18 |  |  |  |  | 33 | $word = $word2; | 
| 283 | 18 |  |  |  |  | 36 | $wordw = mbswidth($word); | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | # move the word to the next line | 
| 287 | 87 | 100 | 100 |  |  | 532 | push @res, " " if $prev_ws_after && $optkts; | 
| 288 | 87 |  |  |  |  | 197 | push @res, "\n", $sli; | 
| 289 | 87 |  |  |  |  | 149 | $y++; | 
| 290 |  |  |  |  |  |  |  | 
| 291 | 87 | 100 |  |  |  | 160 | if ($sliw + $wordw <= $width) { | 
| 292 | 74 |  |  |  |  | 123 | push @res, $word; | 
| 293 | 74 |  |  |  |  | 99 | $x = $sliw + $wordw; | 
| 294 | 74 |  |  |  |  | 134 | last; | 
| 295 |  |  |  |  |  |  | } else { | 
| 296 |  |  |  |  |  |  | # still too long, truncate again | 
| 297 | 13 |  |  |  |  | 25 | $x = $sliw; | 
| 298 |  |  |  |  |  |  | } | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  | } | 
| 301 | 431 |  |  |  |  | 767 | $line_has_word++; | 
| 302 |  |  |  |  |  |  | } | 
| 303 | 415 |  |  |  |  | 912 | $prev_ws_after = $ws_after; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 | 42 | 100 |  |  |  | 89 | if (defined $pbreak) { | 
| 307 | 7 |  |  |  |  | 30 | push @res, $pbreak; | 
| 308 |  |  |  |  |  |  | } else { | 
| 309 | 35 | 100 |  |  |  | 287 | push @res, "\n" if $ptext =~ /\n[ \t]*\z/; | 
| 310 |  |  |  |  |  |  | } | 
| 311 |  |  |  |  |  |  | } | 
| 312 |  |  |  |  |  |  |  | 
| 313 | 36 | 100 |  |  |  | 96 | if ($opts->{return_stats}) { | 
| 314 | 1 |  |  |  |  | 15 | return [join("", @res), { | 
| 315 |  |  |  |  |  |  | max_word_width => $maxww, | 
| 316 |  |  |  |  |  |  | min_word_width => $minww, | 
| 317 |  |  |  |  |  |  | }]; | 
| 318 |  |  |  |  |  |  | } else { | 
| 319 | 35 |  |  |  |  | 426 | return join("", @res); | 
| 320 |  |  |  |  |  |  | } | 
| 321 |  |  |  |  |  |  | } | 
| 322 |  |  |  |  |  |  |  | 
| 323 |  |  |  |  |  |  | sub mbwrap { | 
| 324 | 7 |  |  | 7 | 1 | 8158 | _wrap(1, @_); | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | sub wrap { | 
| 328 | 29 |  |  | 29 | 1 | 18285 | _wrap(0, @_); | 
| 329 |  |  |  |  |  |  | } | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | sub _pad { | 
| 332 | 0 |  |  | 0 |  | 0 | my ($is_mb, $text, $width, $which, $padchar, $is_trunc) = @_; | 
| 333 | 0 | 0 |  |  |  | 0 | if ($which) { | 
| 334 | 0 |  |  |  |  | 0 | $which = substr($which, 0, 1); | 
| 335 |  |  |  |  |  |  | } else { | 
| 336 | 0 |  |  |  |  | 0 | $which = "r"; | 
| 337 |  |  |  |  |  |  | } | 
| 338 | 0 |  | 0 |  |  | 0 | $padchar //= " "; | 
| 339 |  |  |  |  |  |  |  | 
| 340 | 0 | 0 |  |  |  | 0 | my $w = $is_mb ? mbswidth($text) : length($text); | 
| 341 | 0 | 0 | 0 |  |  | 0 | if ($is_trunc && $w > $width) { | 
| 342 | 0 |  |  |  |  | 0 | my $res = mbtrunc($text, $width, 1); | 
| 343 | 0 |  |  |  |  | 0 | $text = $res->[0] . ($padchar x ($width-$res->[1])); | 
| 344 |  |  |  |  |  |  | } else { | 
| 345 | 0 | 0 |  |  |  | 0 | if ($which eq 'l') { | 
|  |  | 0 |  |  |  |  |  | 
| 346 | 0 |  |  |  |  | 0 | $text = ($padchar x ($width-$w)) . $text; | 
| 347 |  |  |  |  |  |  | } elsif ($which eq 'c') { | 
| 348 | 0 |  |  |  |  | 0 | my $n = int(($width-$w)/2); | 
| 349 | 0 |  |  |  |  | 0 | $text = ($padchar x $n) . $text . ($padchar x ($width-$w-$n)); | 
| 350 |  |  |  |  |  |  | } else { | 
| 351 | 0 |  |  |  |  | 0 | $text .= ($padchar x ($width-$w)); | 
| 352 |  |  |  |  |  |  | } | 
| 353 |  |  |  |  |  |  | } | 
| 354 | 0 |  |  |  |  | 0 | $text; | 
| 355 |  |  |  |  |  |  | } | 
| 356 |  |  |  |  |  |  |  | 
| 357 |  |  |  |  |  |  | sub mbpad { | 
| 358 | 0 |  |  | 0 | 1 | 0 | _pad(1, @_); | 
| 359 |  |  |  |  |  |  | } | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | sub pad { | 
| 362 | 0 |  |  | 0 | 1 | 0 | _pad(0, @_); | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | sub _trunc { | 
| 366 | 25 |  |  | 25 |  | 56 | my ($is_mb, $text, $width, $return_width) = @_; | 
| 367 |  |  |  |  |  |  |  | 
| 368 |  |  |  |  |  |  | # return_width (undocumented): if set to 1, will return [truncated_text, | 
| 369 |  |  |  |  |  |  | # visual width, length(chars) up to truncation point] | 
| 370 |  |  |  |  |  |  |  | 
| 371 | 25 | 50 |  |  |  | 57 | my $w = $is_mb ? mbswidth($text) : length($text); | 
| 372 | 25 | 50 |  |  |  | 446 | die "Invalid argument, width must not be negative" unless $width >= 0; | 
| 373 | 25 | 50 |  |  |  | 53 | if ($w <= $width) { | 
| 374 | 0 | 0 |  |  |  | 0 | return $return_width ? [$text, $w, length($text)] : $text; | 
| 375 |  |  |  |  |  |  | } | 
| 376 |  |  |  |  |  |  |  | 
| 377 | 25 |  |  |  |  | 42 | my $c = 0; | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | # perform binary cutting | 
| 380 | 25 |  |  |  |  | 31 | my @res; | 
| 381 | 25 |  |  |  |  | 80 | my $wres = 0; # total width of text in @res | 
| 382 | 25 | 50 |  |  |  | 84 | my $l = int($w/2); $l = 1 if $l == 0; | 
|  | 25 |  |  |  |  | 56 |  | 
| 383 | 25 |  |  |  |  | 36 | my $end = 0; | 
| 384 | 25 |  |  |  |  | 33 | while (1) { | 
| 385 | 167 |  |  |  |  | 368 | my $left  = substr($text, 0, $l); | 
| 386 | 167 | 50 |  |  |  | 374 | my $right = $l > length($text) ? "" : substr($text, $l); | 
| 387 | 167 | 50 |  |  |  | 329 | my $wl = $is_mb ? mbswidth($left) : length($left); | 
| 388 |  |  |  |  |  |  | #say "D:left=$left, right=$right, wl=$wl"; | 
| 389 | 167 | 100 |  |  |  | 2502 | if ($wres + $wl > $width) { | 
| 390 | 120 |  |  |  |  | 200 | $text = $left; | 
| 391 |  |  |  |  |  |  | } else { | 
| 392 | 47 |  |  |  |  | 89 | push @res, $left; | 
| 393 | 47 |  |  |  |  | 69 | $wres += $wl; | 
| 394 | 47 |  |  |  |  | 78 | $c += length($left); | 
| 395 | 47 |  |  |  |  | 86 | $text = $right; | 
| 396 |  |  |  |  |  |  | } | 
| 397 | 167 |  |  |  |  | 289 | $l = int(($l+1)/2); | 
| 398 |  |  |  |  |  |  | #say "D:l=$l"; | 
| 399 | 167 | 100 | 100 |  |  | 457 | last if $l==1 && $end>1; | 
| 400 | 142 | 100 |  |  |  | 300 | $end++ if $l==1; | 
| 401 |  |  |  |  |  |  | } | 
| 402 | 25 | 50 |  |  |  | 48 | if ($return_width) { | 
| 403 | 25 |  |  |  |  | 121 | return [join("", @res), $wres, $c]; | 
| 404 |  |  |  |  |  |  | } else { | 
| 405 | 0 |  |  |  |  | 0 | return join("", @res); | 
| 406 |  |  |  |  |  |  | } | 
| 407 |  |  |  |  |  |  | } | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | sub mbtrunc { | 
| 410 | 25 |  |  | 25 | 1 | 56 | _trunc(1, @_); | 
| 411 |  |  |  |  |  |  | } | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | sub trunc { | 
| 414 | 0 |  |  | 0 | 1 |  | _trunc(0, @_); | 
| 415 |  |  |  |  |  |  | } | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | 1; | 
| 418 |  |  |  |  |  |  | # ABSTRACT: Routines for text containing wide characters | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | __END__ |