| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::WrapI18N; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require Exporter; |
|
4
|
1
|
|
|
1
|
|
26501
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
40
|
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
125
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
8
|
|
|
|
|
|
|
our @EXPORT = qw(wrap); |
|
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw($columns $separator); |
|
10
|
|
|
|
|
|
|
our %EXPORT_TAGS = ('all' => [ @EXPORT, @EXPORT_OK ]); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
|
13
|
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
6
|
use vars qw($columns $break $tabstop $separator $huge $unexpand $charmap); |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
233
|
|
|
15
|
1
|
|
|
1
|
|
1220
|
use Text::CharWidth qw(mbswidth mblen); |
|
|
1
|
|
|
|
|
4835
|
|
|
|
1
|
|
|
|
|
181
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
BEGIN { |
|
18
|
1
|
|
|
1
|
|
2
|
$columns = 76; |
|
19
|
|
|
|
|
|
|
# $break, $separator, $huge, and $unexpand are not supported yet. |
|
20
|
1
|
|
|
|
|
2
|
$break = '\s'; |
|
21
|
1
|
|
|
|
|
1
|
$tabstop = 8; |
|
22
|
1
|
|
|
|
|
3
|
$separator = "\n"; |
|
23
|
1
|
|
|
|
|
2
|
$huge = 'wrap'; |
|
24
|
1
|
|
|
|
|
2
|
$unexpand = 1; |
|
25
|
1
|
|
|
|
|
1452
|
undef $charmap; |
|
26
|
|
|
|
|
|
|
} |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub wrap { |
|
29
|
2
|
|
|
2
|
0
|
15
|
my $top1=shift; |
|
30
|
2
|
|
|
|
|
9
|
my $top2=shift; |
|
31
|
2
|
|
|
|
|
4
|
my $text=shift; |
|
32
|
|
|
|
|
|
|
|
|
33
|
2
|
|
|
|
|
5
|
$text = $top1 . $text; |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# $out already-formatted text for output including current line |
|
36
|
|
|
|
|
|
|
# $len visible width of the current line without the current word |
|
37
|
|
|
|
|
|
|
# $word the current word which might be sent to the next line |
|
38
|
|
|
|
|
|
|
# $wlen visible width of the current word |
|
39
|
|
|
|
|
|
|
# $c the current character |
|
40
|
|
|
|
|
|
|
# $b whether to allow line-breaking after the current character |
|
41
|
|
|
|
|
|
|
# $cont_lf true when LF (line feed) characters appear continuously |
|
42
|
|
|
|
|
|
|
# $w visible width of the current character |
|
43
|
|
|
|
|
|
|
|
|
44
|
2
|
|
|
|
|
4
|
my $out = ''; |
|
45
|
2
|
|
|
|
|
5
|
my $len = 0; |
|
46
|
2
|
|
|
|
|
3
|
my $word = ''; |
|
47
|
2
|
|
|
|
|
2
|
my $wlen = 0; |
|
48
|
2
|
|
|
|
|
4
|
my $cont_lf = 0; |
|
49
|
2
|
|
|
|
|
3
|
my ($c, $w, $b); |
|
50
|
2
|
|
|
|
|
5
|
$text =~ s/\n+$/\n/; |
|
51
|
2
|
|
|
|
|
2
|
while(1) { |
|
52
|
28
|
100
|
|
|
|
61
|
if (length($text) == 0) { |
|
53
|
2
|
|
|
|
|
52
|
return $out . $word; |
|
54
|
|
|
|
|
|
|
} |
|
55
|
26
|
|
|
|
|
42
|
($c, $text, $w, $b) = _extract($text); |
|
56
|
26
|
50
|
|
|
|
80
|
if ($c eq "\n") { |
|
|
|
50
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
0
|
$out .= $word . $separator; |
|
58
|
0
|
0
|
|
|
|
0
|
if (length($text) == 0) {return $out;} |
|
|
0
|
|
|
|
|
0
|
|
|
59
|
0
|
|
|
|
|
0
|
$len = 0; |
|
60
|
0
|
|
|
|
|
0
|
$text = $top2 . $text; |
|
61
|
0
|
|
|
|
|
0
|
$word = '' ; $wlen = 0; |
|
|
0
|
|
|
|
|
0
|
|
|
62
|
0
|
|
|
|
|
0
|
next; |
|
63
|
|
|
|
|
|
|
} elsif ($w == -1) { |
|
64
|
|
|
|
|
|
|
# all control characters other than LF are ignored |
|
65
|
0
|
|
|
|
|
0
|
next; |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# when the current line have enough room |
|
69
|
|
|
|
|
|
|
# for the curren character |
|
70
|
|
|
|
|
|
|
|
|
71
|
26
|
100
|
|
|
|
67
|
if ($len + $wlen + $w <= $columns) { |
|
72
|
25
|
100
|
66
|
|
|
109
|
if ($c eq ' ' || $b) { |
|
73
|
2
|
|
|
|
|
5
|
$out .= $word . $c; |
|
74
|
2
|
|
|
|
|
14
|
$len += $wlen + $w; |
|
75
|
2
|
|
|
|
|
5
|
$word = ''; $wlen = 0; |
|
|
2
|
|
|
|
|
4
|
|
|
76
|
|
|
|
|
|
|
} else { |
|
77
|
23
|
|
|
|
|
34
|
$word .= $c; $wlen += $w; |
|
|
23
|
|
|
|
|
23
|
|
|
78
|
|
|
|
|
|
|
} |
|
79
|
25
|
|
|
|
|
29
|
next; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# when the current line overflows with the |
|
83
|
|
|
|
|
|
|
# current character |
|
84
|
|
|
|
|
|
|
|
|
85
|
1
|
50
|
|
|
|
4
|
if ($c eq ' ') { |
|
|
|
0
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# the line ends by space |
|
87
|
1
|
|
|
|
|
3
|
$out .= $word . $separator; |
|
88
|
1
|
|
|
|
|
2
|
$len = 0; |
|
89
|
1
|
|
|
|
|
2
|
$text = $top2 . $text; |
|
90
|
1
|
|
|
|
|
1
|
$word = ''; $wlen = 0; |
|
|
1
|
|
|
|
|
2
|
|
|
91
|
|
|
|
|
|
|
} elsif ($wlen + $w <= $columns) { |
|
92
|
|
|
|
|
|
|
# the current word is sent to next line |
|
93
|
0
|
|
|
|
|
0
|
$out .= $separator; |
|
94
|
0
|
|
|
|
|
0
|
$len = 0; |
|
95
|
0
|
|
|
|
|
0
|
$text = $top2 . $word . $c . $text; |
|
96
|
0
|
|
|
|
|
0
|
$word = ''; $wlen = 0; |
|
|
0
|
|
|
|
|
0
|
|
|
97
|
|
|
|
|
|
|
} else { |
|
98
|
|
|
|
|
|
|
# the current word is too long to fit a line |
|
99
|
0
|
|
|
|
|
0
|
$out .= $word . $separator; |
|
100
|
0
|
|
|
|
|
0
|
$len = 0; |
|
101
|
0
|
|
|
|
|
0
|
$text = $top2 . $c . $text; |
|
102
|
0
|
|
|
|
|
0
|
$word = ''; $wlen = 0; |
|
|
0
|
|
|
|
|
0
|
|
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Extract one character from the beginning from the given string. |
|
109
|
|
|
|
|
|
|
# Supports multibyte encodings such as UTF-8, EUC-JP, EUC-KR, |
|
110
|
|
|
|
|
|
|
# GB2312, and Big5. |
|
111
|
|
|
|
|
|
|
# |
|
112
|
|
|
|
|
|
|
# return value: (character, rest string, width, line breakable) |
|
113
|
|
|
|
|
|
|
# character: a character. This may consist from multiple bytes. |
|
114
|
|
|
|
|
|
|
# rest string: given string without the extracted character. |
|
115
|
|
|
|
|
|
|
# width: number of columns which the character occupies on screen. |
|
116
|
|
|
|
|
|
|
# line breakable: true if the character allows line break after it. |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _extract { |
|
119
|
26
|
|
|
26
|
|
31
|
my $string=shift; |
|
120
|
26
|
|
|
|
|
28
|
my ($l, $c, $r, $w, $b, $u); |
|
121
|
|
|
|
|
|
|
|
|
122
|
26
|
50
|
|
|
|
45
|
if (length($string) == 0) { |
|
123
|
0
|
|
|
|
|
0
|
return ('', '', 0, 0); |
|
124
|
|
|
|
|
|
|
} |
|
125
|
26
|
|
|
|
|
64
|
$l = mblen($string); |
|
126
|
26
|
50
|
33
|
|
|
100
|
if ($l == 0 || $l == -1) { |
|
127
|
0
|
|
|
|
|
0
|
return ('?', substr($string,1), 1, 0); |
|
128
|
|
|
|
|
|
|
} |
|
129
|
26
|
|
|
|
|
109
|
$c = substr($string, 0, $l); |
|
130
|
26
|
|
|
|
|
39
|
$r = substr($string, $l); |
|
131
|
26
|
|
|
|
|
61
|
$w = mbswidth($c); |
|
132
|
|
|
|
|
|
|
|
|
133
|
26
|
100
|
|
|
|
46
|
if (!defined($charmap)) { |
|
134
|
1
|
|
|
|
|
19919
|
$charmap = `/usr/bin/locale charmap`; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
|
|
137
|
26
|
50
|
|
|
|
221
|
if ($charmap =~ /UTF.8/i) { |
|
|
|
50
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# UTF-8 |
|
139
|
0
|
0
|
|
|
|
0
|
if ($l == 3) { |
|
|
|
0
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# U+0800 - U+FFFF |
|
141
|
0
|
|
|
|
|
0
|
$u = (ord(substr($c,0,1))&0x0f) * 0x1000 |
|
142
|
|
|
|
|
|
|
+ (ord(substr($c,1,1))&0x3f) * 0x40 |
|
143
|
|
|
|
|
|
|
+ (ord(substr($c,2,1))&0x3f); |
|
144
|
0
|
|
|
|
|
0
|
$b = _isCJ($u); |
|
145
|
|
|
|
|
|
|
} elsif ($l == 4) { |
|
146
|
|
|
|
|
|
|
# U+10000 - U+10FFFF |
|
147
|
0
|
|
|
|
|
0
|
$u = (ord(substr($c,0,1))&7) * 0x40000 |
|
148
|
|
|
|
|
|
|
+ (ord(substr($c,1,1))&0x3f) * 0x1000 |
|
149
|
|
|
|
|
|
|
+ (ord(substr($c,2,1))&0x3f) * 0x40 |
|
150
|
|
|
|
|
|
|
+ (ord(substr($c,3,1))&0x3f); |
|
151
|
0
|
|
|
|
|
0
|
$b = _isCJ($u); |
|
152
|
|
|
|
|
|
|
} else { |
|
153
|
0
|
|
|
|
|
0
|
$b = 0; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
} elsif ($charmap =~ /(^EUC)|(^GB)|(^BIG)/i) { |
|
156
|
|
|
|
|
|
|
# East Asian legacy encodings |
|
157
|
|
|
|
|
|
|
# (EUC-JP, EUC-KR, GB2312, Big5, Big5HKSCS, and so on) |
|
158
|
|
|
|
|
|
|
|
|
159
|
0
|
0
|
|
|
|
0
|
if (ord(substr($c,0,1)) >= 0x80) {$b = 1;} else {$b = 0;} |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
160
|
|
|
|
|
|
|
} else { |
|
161
|
26
|
|
|
|
|
39
|
$b = 0; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
26
|
|
|
|
|
117
|
return ($c, $r, $w, $b); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# Returns 1 for Chinese and Japanese characters. This means that |
|
167
|
|
|
|
|
|
|
# these characters allow line wrapping after this character even |
|
168
|
|
|
|
|
|
|
# without whitespaces because these languages don't use whitespaces |
|
169
|
|
|
|
|
|
|
# between words. |
|
170
|
|
|
|
|
|
|
# |
|
171
|
|
|
|
|
|
|
# Character must be given in UCS-4 codepoint value. |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub _isCJ { |
|
174
|
0
|
|
|
0
|
|
|
my $u=shift; |
|
175
|
|
|
|
|
|
|
|
|
176
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x3000 && $u <= 0x312f) { |
|
177
|
0
|
0
|
0
|
|
|
|
if ($u == 0x300a || $u == 0x300c || $u == 0x300e || |
|
|
0
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
178
|
|
|
|
|
|
|
$u == 0x3010 || $u == 0x3014 || $u == 0x3016 || |
|
179
|
|
|
|
|
|
|
$u == 0x3018 || $u == 0x301a) {return 0;} |
|
180
|
0
|
|
|
|
|
|
return 1; |
|
181
|
|
|
|
|
|
|
} # CJK punctuations, Hiragana, Katakana, Bopomofo |
|
182
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x31a0 && $u <= 0x31bf) {return 1;} # Bopomofo |
|
|
0
|
|
|
|
|
|
|
|
183
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x31f0 && $u <= 0x31ff) {return 1;} # Katakana extension |
|
|
0
|
|
|
|
|
|
|
|
184
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x3400 && $u <= 0x9fff) {return 1;} # Han Ideogram |
|
|
0
|
|
|
|
|
|
|
|
185
|
0
|
0
|
0
|
|
|
|
if ($u >= 0xf900 && $u <= 0xfaff) {return 1;} # Han Ideogram |
|
|
0
|
|
|
|
|
|
|
|
186
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x20000 && $u <= 0x2ffff) {return 1;} # Han Ideogram |
|
|
0
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
|
188
|
0
|
|
|
|
|
|
return 0; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
1; |
|
192
|
|
|
|
|
|
|
__END__ |