| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Text::WideChar::Util; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | our $DATE = '2019-05-17'; # DATE | 
| 4 |  |  |  |  |  |  | our $VERSION = '0.170'; # VERSION | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 2 |  |  | 2 |  | 120944 | use 5.010001; | 
|  | 2 |  |  |  |  | 21 |  | 
| 7 | 2 |  |  | 2 |  | 804 | use locale; | 
|  | 2 |  |  |  |  | 937 |  | 
|  | 2 |  |  |  |  | 10 |  | 
| 8 | 2 |  |  | 2 |  | 62 | use strict; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 34 |  | 
| 9 | 2 |  |  | 2 |  | 10 | use utf8; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 10 |  | 
| 10 | 2 |  |  | 2 |  | 35 | use warnings; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 46 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 | 2 |  |  | 2 |  | 701 | use Unicode::GCString; | 
|  | 2 |  |  |  |  | 44667 |  | 
|  | 2 |  |  |  |  | 992 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | require Exporter; | 
| 15 |  |  |  |  |  |  | our @ISA       = qw(Exporter); | 
| 16 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 17 |  |  |  |  |  |  | mbpad | 
| 18 |  |  |  |  |  |  | pad | 
| 19 |  |  |  |  |  |  | mbswidth | 
| 20 |  |  |  |  |  |  | mbswidth_height | 
| 21 |  |  |  |  |  |  | length_height | 
| 22 |  |  |  |  |  |  | mbtrunc | 
| 23 |  |  |  |  |  |  | trunc | 
| 24 |  |  |  |  |  |  | mbwrap | 
| 25 |  |  |  |  |  |  | wrap | 
| 26 |  |  |  |  |  |  | ); | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | sub mbswidth { | 
| 29 | 244 |  |  | 244 | 1 | 465 | Unicode::GCString->new($_[0])->columns; | 
| 30 |  |  |  |  |  |  | } | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | sub mbswidth_height { | 
| 33 | 0 |  |  | 0 | 1 | 0 | my $text = shift; | 
| 34 | 0 |  |  |  |  | 0 | my $num_lines = 0; | 
| 35 | 0 |  |  |  |  | 0 | my $len = 0; | 
| 36 | 0 |  |  |  |  | 0 | for my $e (split /(\r?\n)/, $text) { | 
| 37 | 0 | 0 |  |  |  | 0 | if ($e =~ /\n/) { | 
| 38 | 0 |  |  |  |  | 0 | $num_lines++; | 
| 39 | 0 |  |  |  |  | 0 | next; | 
| 40 |  |  |  |  |  |  | } | 
| 41 | 0 | 0 |  |  |  | 0 | $num_lines = 1 if $num_lines == 0; | 
| 42 | 0 |  |  |  |  | 0 | my $l = mbswidth($e); | 
| 43 | 0 | 0 |  |  |  | 0 | $len = $l if $len < $l; | 
| 44 |  |  |  |  |  |  | } | 
| 45 | 0 |  |  |  |  | 0 | [$len, $num_lines]; | 
| 46 |  |  |  |  |  |  | } | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | sub length_height { | 
| 49 | 0 |  |  | 0 | 1 | 0 | my $text = shift; | 
| 50 | 0 |  |  |  |  | 0 | my $num_lines = 0; | 
| 51 | 0 |  |  |  |  | 0 | my $len = 0; | 
| 52 | 0 |  |  |  |  | 0 | for my $e (split /(\r?\n)/, $text) { | 
| 53 | 0 | 0 |  |  |  | 0 | if ($e =~ /\n/) { | 
| 54 | 0 |  |  |  |  | 0 | $num_lines++; | 
| 55 | 0 |  |  |  |  | 0 | next; | 
| 56 |  |  |  |  |  |  | } | 
| 57 | 0 | 0 |  |  |  | 0 | $num_lines = 1 if $num_lines == 0; | 
| 58 | 0 |  |  |  |  | 0 | my $l = length($e); | 
| 59 | 0 | 0 |  |  |  | 0 | $len = $l if $len < $l; | 
| 60 |  |  |  |  |  |  | } | 
| 61 | 0 |  |  |  |  | 0 | [$len, $num_lines]; | 
| 62 |  |  |  |  |  |  | } | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | sub _get_indent_width { | 
| 65 | 54 |  |  | 54 |  | 123 | my ($is_mb, $indent, $tab_width) = @_; | 
| 66 | 54 |  |  |  |  | 74 | my $w = 0; | 
| 67 | 54 |  |  |  |  | 130 | for (split //, $indent) { | 
| 68 | 83 | 100 |  |  |  | 189 | if ($_ eq "\t") { | 
| 69 |  |  |  |  |  |  | # go to the next tab | 
| 70 | 9 |  |  |  |  | 28 | $w = $tab_width * (int($w/$tab_width) + 1); | 
| 71 |  |  |  |  |  |  | } else { | 
| 72 | 74 | 100 |  |  |  | 134 | $w += $is_mb ? mbswidth($_) : 1; | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  | } | 
| 75 | 54 |  |  |  |  | 159 | $w; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | # 3002 = IDEOGRAPHIC FULL STOP | 
| 79 |  |  |  |  |  |  | # ff0c = FULLWIDTH COMMA | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | our $re_cjk = qr/(?: | 
| 82 |  |  |  |  |  |  | \p{Block=CJK_Compatibility} | 
| 83 |  |  |  |  |  |  | |   \p{Block=CJK_Compatibility_Forms} | 
| 84 |  |  |  |  |  |  | |   \p{Block=CJK_Compatibility_Ideographs} | 
| 85 |  |  |  |  |  |  | |   \p{Block=CJK_Compatibility_Ideographs_Supplement} | 
| 86 |  |  |  |  |  |  | |   \p{Block=CJK_Radicals_Supplement} | 
| 87 |  |  |  |  |  |  | |   \p{Block=CJK_Strokes} | 
| 88 |  |  |  |  |  |  | |   \p{Block=CJK_Symbols_And_Punctuation} | 
| 89 |  |  |  |  |  |  | |   \p{Block=CJK_Unified_Ideographs} | 
| 90 |  |  |  |  |  |  | |   \p{Block=CJK_Unified_Ideographs_Extension_A} | 
| 91 |  |  |  |  |  |  | |   \p{Block=CJK_Unified_Ideographs_Extension_B} | 
| 92 |  |  |  |  |  |  | |   \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} | 
| 93 |  |  |  |  |  |  | #|   \p{Block=CJK_Unified_Ideographs_Extension_C} | 
| 94 |  |  |  |  |  |  | [\x{3002}\x{ff0c}] | 
| 95 | 2 |  |  | 2 |  | 16 | )/x; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 24 |  | 
| 96 |  |  |  |  |  |  | our $re_cjk_class = qr/[ | 
| 97 |  |  |  |  |  |  | \p{Block=CJK_Compatibility} | 
| 98 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Forms} | 
| 99 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs} | 
| 100 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs_Supplement} | 
| 101 |  |  |  |  |  |  | \p{Block=CJK_Radicals_Supplement} | 
| 102 |  |  |  |  |  |  | \p{Block=CJK_Strokes} | 
| 103 |  |  |  |  |  |  | \p{Block=CJK_Symbols_And_Punctuation} | 
| 104 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs} | 
| 105 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_A} | 
| 106 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_B} | 
| 107 |  |  |  |  |  |  | \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} | 
| 108 |  |  |  |  |  |  | \x{3002} | 
| 109 |  |  |  |  |  |  | \x{ff0c} | 
| 110 |  |  |  |  |  |  | ]/x; | 
| 111 |  |  |  |  |  |  | our $re_cjk_negclass = qr/[^ | 
| 112 |  |  |  |  |  |  | \p{Block=CJK_Compatibility} | 
| 113 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Forms} | 
| 114 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs} | 
| 115 |  |  |  |  |  |  | \p{Block=CJK_Compatibility_Ideographs_Supplement} | 
| 116 |  |  |  |  |  |  | \p{Block=CJK_Radicals_Supplement} | 
| 117 |  |  |  |  |  |  | \p{Block=CJK_Strokes} | 
| 118 |  |  |  |  |  |  | \p{Block=CJK_Symbols_And_Punctuation} | 
| 119 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs} | 
| 120 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_A} | 
| 121 |  |  |  |  |  |  | \p{Block=CJK_Unified_Ideographs_Extension_B} | 
| 122 |  |  |  |  |  |  | \p{Hiragana}\p{Katakana}\p{Hangul}\x{30fc} | 
| 123 |  |  |  |  |  |  | \x{3002} | 
| 124 |  |  |  |  |  |  | \x{ff0c} | 
| 125 |  |  |  |  |  |  | ]/x; | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | sub _wrap { | 
| 128 | 36 |  |  | 36 |  | 81 | my ($is_mb, $text, $width, $opts) = @_; | 
| 129 | 36 |  | 50 |  |  | 84 | $width //= 80; | 
| 130 | 36 |  | 100 |  |  | 105 | $opts  //= {}; | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | # our algorithm: split into paragraphs, then process each paragraph. at the | 
| 133 |  |  |  |  |  |  | # start of paragraph, determine indents (either from %opts, or deduced from | 
| 134 |  |  |  |  |  |  | # text, like in Emacs) then push first-line indent. proceed to push words, | 
| 135 |  |  |  |  |  |  | # while adding subsequent-line indent at the start of each line. | 
| 136 |  |  |  |  |  |  |  | 
| 137 | 36 |  | 50 |  |  | 112 | my $tw = $opts->{tab_width} // 8; | 
| 138 | 36 | 50 |  |  |  | 98 | die "Please specify a positive tab width" unless $tw > 0; | 
| 139 | 36 |  |  |  |  | 56 | my $optfli  = $opts->{flindent}; | 
| 140 | 36 | 100 |  |  |  | 84 | my $optfliw = defined $optfli ? _get_indent_width($is_mb, $optfli, $tw) : undef; | 
| 141 | 36 |  |  |  |  | 48 | my $optsli  = $opts->{slindent}; | 
| 142 | 36 | 100 |  |  |  | 60 | my $optsliw = defined $optsli ? _get_indent_width($is_mb, $optsli, $tw) : undef; | 
| 143 | 36 |  | 100 |  |  | 92 | my $optkts  = $opts->{keep_trailing_space} // 0; | 
| 144 | 36 |  |  |  |  | 45 | my @res; | 
| 145 |  |  |  |  |  |  |  | 
| 146 | 36 |  |  |  |  | 163 | my @para = split /(\n(?:[ \t]*\n)+)/, $text; | 
| 147 |  |  |  |  |  |  | #say "D:para=[",join(", ", @para),"]"; | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 36 |  |  |  |  | 60 | my ($maxww, $minww); | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | PARA: | 
| 152 | 36 |  |  |  |  | 94 | while (my ($ptext, $pbreak) = splice @para, 0, 2) { | 
| 153 | 42 |  |  |  |  | 59 | my $x = 0; | 
| 154 | 42 |  |  |  |  | 72 | my $y = 0; | 
| 155 | 42 |  |  |  |  | 51 | my $line_has_word = 0; | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | # determine indents | 
| 158 | 42 |  |  |  |  | 58 | my ($fli, $sli, $fliw, $sliw); | 
| 159 | 42 | 100 |  |  |  | 66 | if (defined $optfli) { | 
| 160 | 12 |  |  |  |  | 13 | $fli  = $optfli; | 
| 161 | 12 |  |  |  |  | 19 | $fliw = $optfliw; | 
| 162 |  |  |  |  |  |  | } else { | 
| 163 |  |  |  |  |  |  | # XXX emacs can also treat ' #' as indent, e.g. when wrapping | 
| 164 |  |  |  |  |  |  | # multi-line perl comment. | 
| 165 | 30 |  |  |  |  | 196 | ($fli) = $ptext =~ /\A([ \t]*)\S/; | 
| 166 | 30 | 50 |  |  |  | 74 | if (defined $fli) { | 
| 167 | 30 |  |  |  |  | 66 | $fliw = _get_indent_width($is_mb, $fli, $tw); | 
| 168 |  |  |  |  |  |  | } else { | 
| 169 | 0 |  |  |  |  | 0 | $fli  = ""; | 
| 170 | 0 |  |  |  |  | 0 | $fliw = 0; | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  | } | 
| 173 | 42 | 100 |  |  |  | 78 | if (defined $optsli) { | 
| 174 | 5 |  |  |  |  | 5 | $sli  = $optsli; | 
| 175 | 5 |  |  |  |  | 7 | $sliw = $optsliw; | 
| 176 |  |  |  |  |  |  | } else { | 
| 177 | 37 |  |  |  |  | 83 | ($sli) = $ptext =~ /\A[^\n]*\S[\n]([ \t+]*)\S/; | 
| 178 | 37 | 100 |  |  |  | 67 | if (defined $sli) { | 
| 179 | 9 |  |  |  |  | 18 | $sliw = _get_indent_width($is_mb, $sli, $tw); | 
| 180 |  |  |  |  |  |  | } else { | 
| 181 | 28 |  |  |  |  | 44 | $sli  = ""; | 
| 182 | 28 |  |  |  |  | 35 | $sliw = 0; | 
| 183 |  |  |  |  |  |  | } | 
| 184 |  |  |  |  |  |  | } | 
| 185 | 42 | 50 |  |  |  | 84 | die "Subsequent indent must be less than width" if $sliw >= $width; | 
| 186 |  |  |  |  |  |  |  | 
| 187 | 42 |  |  |  |  | 74 | push @res, $fli; | 
| 188 | 42 |  |  |  |  | 65 | $x += $fliw; | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 42 |  |  |  |  | 53 | my @words0; # (WORD1, WORD1_IS_CJK?, WS_AFTER?, WORD2, WORD2_IS_CJK?, WS_AFTER?, ...) | 
| 191 |  |  |  |  |  |  | # we differentiate/split between CJK "word" (cluster of CJK letters, | 
| 192 |  |  |  |  |  |  | # really) and non-CJK word, e.g. "我很爱你my可爱的and beautiful, | 
| 193 |  |  |  |  |  |  | # beautiful wife" is split to ["我很爱你", "my", "可爱的", "and", | 
| 194 |  |  |  |  |  |  | # "beautiful,", "beautiful", "wife"]. we do this because CJK word can be | 
| 195 |  |  |  |  |  |  | # line-broken on a per-letter basis, as they don't separate words with | 
| 196 |  |  |  |  |  |  | # whitespaces. | 
| 197 | 42 |  |  |  |  | 673 | while ($ptext =~ /(?: ($re_cjk+)|(\S+) ) (\s*)/gox) { | 
| 198 | 410 | 100 |  |  |  | 2180 | my $ws_after = $3 ? 1:0; | 
| 199 | 410 | 100 |  |  |  | 648 | if ($1) { | 
| 200 | 3 |  |  |  |  | 24 | push @words0, $1, 1, $ws_after; | 
| 201 |  |  |  |  |  |  | } else { | 
| 202 | 407 |  |  |  |  | 725 | my $ptext2 = $2; | 
| 203 | 407 |  |  |  |  | 1105 | while ($ptext2 =~ /($re_cjk_class+)| | 
| 204 |  |  |  |  |  |  | ($re_cjk_negclass+)/gox) { | 
| 205 | 412 | 100 |  |  |  | 3064 | if ($1) { | 
| 206 | 3 |  |  |  |  | 29 | push @words0, $1, 1, 0; | 
| 207 |  |  |  |  |  |  | } else { | 
| 208 | 409 |  |  |  |  | 1753 | push @words0, $2, 0, 0; | 
| 209 |  |  |  |  |  |  | } | 
| 210 |  |  |  |  |  |  | } | 
| 211 | 407 |  |  |  |  | 3114 | $words0[-1] = $ws_after; | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | # process each word | 
| 216 | 42 |  |  |  |  | 60 | my $prev_ws_after; | 
| 217 | 42 |  |  |  |  | 75 | while (@words0) { | 
| 218 | 415 |  |  |  |  | 732 | my ($word0, $is_cjk, $ws_after) = splice @words0, 0, 3; | 
| 219 | 415 |  |  |  |  | 582 | my @words; | 
| 220 |  |  |  |  |  |  | my @wordsw; | 
| 221 | 415 |  |  |  |  | 465 | while (1) { | 
| 222 | 431 | 100 |  |  |  | 654 | my $wordw = $is_mb ? mbswidth($word0) : length($word0); | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | # long cjk word is not truncated here because it will be | 
| 225 |  |  |  |  |  |  | # line-broken later when wrapping. | 
| 226 | 431 | 100 | 100 |  |  | 1192 | if ($wordw <= $width-$sliw || $is_cjk) { | 
| 227 | 415 |  |  |  |  | 568 | push @words , $word0; | 
| 228 | 415 |  |  |  |  | 542 | push @wordsw, $wordw; | 
| 229 | 415 |  |  |  |  | 557 | last; | 
| 230 |  |  |  |  |  |  | } | 
| 231 |  |  |  |  |  |  | # truncate long word | 
| 232 | 16 | 100 |  |  |  | 26 | if ($is_mb) { | 
| 233 | 7 |  |  |  |  | 18 | my $res = mbtrunc($word0, $width-$sliw, 1); | 
| 234 | 7 |  |  |  |  | 13 | push @words , $res->[0]; | 
| 235 | 7 |  |  |  |  | 11 | push @wordsw, $res->[1]; | 
| 236 | 7 |  |  |  |  | 16 | $word0 = substr($word0, length($res->[0])); | 
| 237 |  |  |  |  |  |  | #say "D:truncated long word (mb): $text -> $res->[0] & $res->[1], word0=$word0"; | 
| 238 |  |  |  |  |  |  | } else { | 
| 239 | 9 |  |  |  |  | 40 | my $w2 = substr($word0, 0, $width-$sliw); | 
| 240 | 9 |  |  |  |  | 15 | push @words , $w2; | 
| 241 | 9 |  |  |  |  | 12 | push @wordsw, $width-$sliw; | 
| 242 | 9 |  |  |  |  | 17 | $word0 = substr($word0, $width-$sliw); | 
| 243 |  |  |  |  |  |  | #say "D:truncated long word: $w2, ".($width-$sliw).", word0=$word0"; | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 | 415 |  |  |  |  | 583 | for my $word (@words) { | 
| 248 | 431 |  |  |  |  | 518 | my $wordw = shift @wordsw; | 
| 249 |  |  |  |  |  |  | #say "D:x=$x word=$word is_cjk=$is_cjk ws_after=$ws_after wordw=$wordw line_has_word=$line_has_word width=$width"; | 
| 250 |  |  |  |  |  |  |  | 
| 251 | 431 | 100 | 100 |  |  | 1088 | $maxww = $wordw if !defined($maxww) || $maxww < $wordw; | 
| 252 | 431 | 100 | 100 |  |  | 954 | $minww = $wordw if !defined($minww) || $minww > $wordw; | 
| 253 |  |  |  |  |  |  |  | 
| 254 | 431 | 100 |  |  |  | 976 | my $x_after_word = $x + ($line_has_word ? 1:0) + $wordw; | 
| 255 | 431 | 100 |  |  |  | 611 | if ($x_after_word <= $width) { | 
| 256 |  |  |  |  |  |  | # the addition of word hasn't exceeded column width | 
| 257 | 357 | 100 |  |  |  | 552 | if ($line_has_word) { | 
| 258 | 319 | 100 |  |  |  | 454 | if ($prev_ws_after) { | 
| 259 | 317 |  |  |  |  | 404 | push @res, " "; | 
| 260 | 317 |  |  |  |  | 384 | $x++; | 
| 261 |  |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  | } | 
| 263 | 357 |  |  |  |  | 472 | push @res, $word; | 
| 264 | 357 |  |  |  |  | 425 | $x += $wordw; | 
| 265 |  |  |  |  |  |  | } else { | 
| 266 | 74 |  |  |  |  | 122 | while (1) { | 
| 267 | 87 | 100 |  |  |  | 131 | if ($is_cjk) { | 
| 268 |  |  |  |  |  |  | # CJK word can be broken | 
| 269 | 18 |  |  |  |  | 22 | my $res; | 
| 270 | 18 | 100 |  |  |  | 30 | if ($prev_ws_after) { | 
| 271 | 2 |  |  |  |  | 5 | $res = mbtrunc($word, $width - $x - 1, 1); | 
| 272 | 2 |  |  |  |  | 6 | push @res, " ", $res->[0]; | 
| 273 |  |  |  |  |  |  | } else { | 
| 274 | 16 |  |  |  |  | 32 | $res = mbtrunc($word, $width - $x, 1); | 
| 275 | 16 |  |  |  |  | 31 | push @res, $res->[0]; | 
| 276 |  |  |  |  |  |  | } | 
| 277 | 18 |  |  |  |  | 50 | my $word2 = substr($word, length($res->[0])); | 
| 278 |  |  |  |  |  |  | #say "D:truncated CJK word: $word -> $res->[0] & $res->[1], remaining=$word2"; | 
| 279 | 18 |  |  |  |  | 23 | $prev_ws_after = 0; | 
| 280 | 18 |  |  |  |  | 25 | $word = $word2; | 
| 281 | 18 |  |  |  |  | 31 | $wordw = mbswidth($word); | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | # move the word to the next line | 
| 285 | 87 | 100 | 100 |  |  | 440 | push @res, " " if $prev_ws_after && $optkts; | 
| 286 | 87 |  |  |  |  | 163 | push @res, "\n", $sli; | 
| 287 | 87 |  |  |  |  | 131 | $y++; | 
| 288 |  |  |  |  |  |  |  | 
| 289 | 87 | 100 |  |  |  | 137 | if ($sliw + $wordw <= $width) { | 
| 290 | 74 |  |  |  |  | 106 | push @res, $word; | 
| 291 | 74 |  |  |  |  | 87 | $x = $sliw + $wordw; | 
| 292 | 74 |  |  |  |  | 110 | last; | 
| 293 |  |  |  |  |  |  | } else { | 
| 294 |  |  |  |  |  |  | # still too long, truncate again | 
| 295 | 13 |  |  |  |  | 22 | $x = $sliw; | 
| 296 |  |  |  |  |  |  | } | 
| 297 |  |  |  |  |  |  | } | 
| 298 |  |  |  |  |  |  | } | 
| 299 | 431 |  |  |  |  | 654 | $line_has_word++; | 
| 300 |  |  |  |  |  |  | } | 
| 301 | 415 |  |  |  |  | 785 | $prev_ws_after = $ws_after; | 
| 302 |  |  |  |  |  |  | } | 
| 303 |  |  |  |  |  |  |  | 
| 304 | 42 | 100 |  |  |  | 77 | if (defined $pbreak) { | 
| 305 | 7 |  |  |  |  | 26 | push @res, $pbreak; | 
| 306 |  |  |  |  |  |  | } else { | 
| 307 | 35 | 100 |  |  |  | 210 | push @res, "\n" if $ptext =~ /\n[ \t]*\z/; | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  | } | 
| 310 |  |  |  |  |  |  |  | 
| 311 | 36 | 100 |  |  |  | 80 | if ($opts->{return_stats}) { | 
| 312 | 1 |  |  |  |  | 13 | return [join("", @res), { | 
| 313 |  |  |  |  |  |  | max_word_width => $maxww, | 
| 314 |  |  |  |  |  |  | min_word_width => $minww, | 
| 315 |  |  |  |  |  |  | }]; | 
| 316 |  |  |  |  |  |  | } else { | 
| 317 | 35 |  |  |  |  | 332 | return join("", @res); | 
| 318 |  |  |  |  |  |  | } | 
| 319 |  |  |  |  |  |  | } | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | sub mbwrap { | 
| 322 | 7 |  |  | 7 | 1 | 6654 | _wrap(1, @_); | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  | sub wrap { | 
| 326 | 29 |  |  | 29 | 1 | 13988 | _wrap(0, @_); | 
| 327 |  |  |  |  |  |  | } | 
| 328 |  |  |  |  |  |  |  | 
| 329 |  |  |  |  |  |  | sub _pad { | 
| 330 | 0 |  |  | 0 |  | 0 | my ($is_mb, $text, $width, $which, $padchar, $is_trunc) = @_; | 
| 331 | 0 | 0 |  |  |  | 0 | if ($which) { | 
| 332 | 0 |  |  |  |  | 0 | $which = substr($which, 0, 1); | 
| 333 |  |  |  |  |  |  | } else { | 
| 334 | 0 |  |  |  |  | 0 | $which = "r"; | 
| 335 |  |  |  |  |  |  | } | 
| 336 | 0 |  | 0 |  |  | 0 | $padchar //= " "; | 
| 337 |  |  |  |  |  |  |  | 
| 338 | 0 | 0 |  |  |  | 0 | my $w = $is_mb ? mbswidth($text) : length($text); | 
| 339 | 0 | 0 | 0 |  |  | 0 | if ($is_trunc && $w > $width) { | 
| 340 | 0 |  |  |  |  | 0 | my $res = mbtrunc($text, $width, 1); | 
| 341 | 0 |  |  |  |  | 0 | $text = $res->[0] . ($padchar x ($width-$res->[1])); | 
| 342 |  |  |  |  |  |  | } else { | 
| 343 | 0 | 0 |  |  |  | 0 | if ($which eq 'l') { | 
|  |  | 0 |  |  |  |  |  | 
| 344 | 0 |  |  |  |  | 0 | $text = ($padchar x ($width-$w)) . $text; | 
| 345 |  |  |  |  |  |  | } elsif ($which eq 'c') { | 
| 346 | 0 |  |  |  |  | 0 | my $n = int(($width-$w)/2); | 
| 347 | 0 |  |  |  |  | 0 | $text = ($padchar x $n) . $text . ($padchar x ($width-$w-$n)); | 
| 348 |  |  |  |  |  |  | } else { | 
| 349 | 0 |  |  |  |  | 0 | $text .= ($padchar x ($width-$w)); | 
| 350 |  |  |  |  |  |  | } | 
| 351 |  |  |  |  |  |  | } | 
| 352 | 0 |  |  |  |  | 0 | $text; | 
| 353 |  |  |  |  |  |  | } | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | sub mbpad { | 
| 356 | 0 |  |  | 0 | 1 | 0 | _pad(1, @_); | 
| 357 |  |  |  |  |  |  | } | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | sub pad { | 
| 360 | 0 |  |  | 0 | 1 | 0 | _pad(0, @_); | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | sub _trunc { | 
| 364 | 25 |  |  | 25 |  | 53 | my ($is_mb, $text, $width, $return_width) = @_; | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | # return_width (undocumented): if set to 1, will return [truncated_text, | 
| 367 |  |  |  |  |  |  | # visual width, length(chars) up to truncation point] | 
| 368 |  |  |  |  |  |  |  | 
| 369 | 25 | 50 |  |  |  | 62 | my $w = $is_mb ? mbswidth($text) : length($text); | 
| 370 | 25 | 50 |  |  |  | 358 | die "Invalid argument, width must not be negative" unless $width >= 0; | 
| 371 | 25 | 50 |  |  |  | 49 | if ($w <= $width) { | 
| 372 | 0 | 0 |  |  |  | 0 | return $return_width ? [$text, $w, length($text)] : $text; | 
| 373 |  |  |  |  |  |  | } | 
| 374 |  |  |  |  |  |  |  | 
| 375 | 25 |  |  |  |  | 28 | my $c = 0; | 
| 376 |  |  |  |  |  |  |  | 
| 377 |  |  |  |  |  |  | # perform binary cutting | 
| 378 | 25 |  |  |  |  | 32 | my @res; | 
| 379 | 25 |  |  |  |  | 28 | my $wres = 0; # total width of text in @res | 
| 380 | 25 | 50 |  |  |  | 47 | my $l = int($w/2); $l = 1 if $l == 0; | 
|  | 25 |  |  |  |  | 69 |  | 
| 381 | 25 |  |  |  |  | 42 | my $end = 0; | 
| 382 | 25 |  |  |  |  | 29 | while (1) { | 
| 383 | 167 |  |  |  |  | 328 | my $left  = substr($text, 0, $l); | 
| 384 | 167 | 50 |  |  |  | 330 | my $right = $l > length($text) ? "" : substr($text, $l); | 
| 385 | 167 | 50 |  |  |  | 269 | my $wl = $is_mb ? mbswidth($left) : length($left); | 
| 386 |  |  |  |  |  |  | #say "D:left=$left, right=$right, wl=$wl"; | 
| 387 | 167 | 100 |  |  |  | 2040 | if ($wres + $wl > $width) { | 
| 388 | 120 |  |  |  |  | 176 | $text = $left; | 
| 389 |  |  |  |  |  |  | } else { | 
| 390 | 47 |  |  |  |  | 77 | push @res, $left; | 
| 391 | 47 |  |  |  |  | 57 | $wres += $wl; | 
| 392 | 47 |  |  |  |  | 71 | $c += length($left); | 
| 393 | 47 |  |  |  |  | 68 | $text = $right; | 
| 394 |  |  |  |  |  |  | } | 
| 395 | 167 |  |  |  |  | 256 | $l = int(($l+1)/2); | 
| 396 |  |  |  |  |  |  | #say "D:l=$l"; | 
| 397 | 167 | 100 | 100 |  |  | 360 | last if $l==1 && $end>1; | 
| 398 | 142 | 100 |  |  |  | 287 | $end++ if $l==1; | 
| 399 |  |  |  |  |  |  | } | 
| 400 | 25 | 50 |  |  |  | 45 | if ($return_width) { | 
| 401 | 25 |  |  |  |  | 92 | return [join("", @res), $wres, $c]; | 
| 402 |  |  |  |  |  |  | } else { | 
| 403 | 0 |  |  |  |  | 0 | return join("", @res); | 
| 404 |  |  |  |  |  |  | } | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | sub mbtrunc { | 
| 408 | 25 |  |  | 25 | 1 | 38 | _trunc(1, @_); | 
| 409 |  |  |  |  |  |  | } | 
| 410 |  |  |  |  |  |  |  | 
| 411 |  |  |  |  |  |  | sub trunc { | 
| 412 | 0 |  |  | 0 | 1 |  | _trunc(0, @_); | 
| 413 |  |  |  |  |  |  | } | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | 1; | 
| 416 |  |  |  |  |  |  | # ABSTRACT: Routines for text containing wide characters | 
| 417 |  |  |  |  |  |  |  | 
| 418 |  |  |  |  |  |  | __END__ |