line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::WrapI18N; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require Exporter; |
4
|
1
|
|
|
1
|
|
26501
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
125
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
8
|
|
|
|
|
|
|
our @EXPORT = qw(wrap); |
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw($columns $separator); |
10
|
|
|
|
|
|
|
our %EXPORT_TAGS = ('all' => [ @EXPORT, @EXPORT_OK ]); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
13
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
6
|
use vars qw($columns $break $tabstop $separator $huge $unexpand $charmap); |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
233
|
|
15
|
1
|
|
|
1
|
|
1220
|
use Text::CharWidth qw(mbswidth mblen); |
|
1
|
|
|
|
|
4835
|
|
|
1
|
|
|
|
|
181
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
BEGIN { |
18
|
1
|
|
|
1
|
|
2
|
$columns = 76; |
19
|
|
|
|
|
|
|
# $break, $separator, $huge, and $unexpand are not supported yet. |
20
|
1
|
|
|
|
|
2
|
$break = '\s'; |
21
|
1
|
|
|
|
|
1
|
$tabstop = 8; |
22
|
1
|
|
|
|
|
3
|
$separator = "\n"; |
23
|
1
|
|
|
|
|
2
|
$huge = 'wrap'; |
24
|
1
|
|
|
|
|
2
|
$unexpand = 1; |
25
|
1
|
|
|
|
|
1452
|
undef $charmap; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub wrap { |
29
|
2
|
|
|
2
|
0
|
15
|
my $top1=shift; |
30
|
2
|
|
|
|
|
9
|
my $top2=shift; |
31
|
2
|
|
|
|
|
4
|
my $text=shift; |
32
|
|
|
|
|
|
|
|
33
|
2
|
|
|
|
|
5
|
$text = $top1 . $text; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# $out already-formatted text for output including current line |
36
|
|
|
|
|
|
|
# $len visible width of the current line without the current word |
37
|
|
|
|
|
|
|
# $word the current word which might be sent to the next line |
38
|
|
|
|
|
|
|
# $wlen visible width of the current word |
39
|
|
|
|
|
|
|
# $c the current character |
40
|
|
|
|
|
|
|
# $b whether to allow line-breaking after the current character |
41
|
|
|
|
|
|
|
# $cont_lf true when LF (line feed) characters appear continuously |
42
|
|
|
|
|
|
|
# $w visible width of the current character |
43
|
|
|
|
|
|
|
|
44
|
2
|
|
|
|
|
4
|
my $out = ''; |
45
|
2
|
|
|
|
|
5
|
my $len = 0; |
46
|
2
|
|
|
|
|
3
|
my $word = ''; |
47
|
2
|
|
|
|
|
2
|
my $wlen = 0; |
48
|
2
|
|
|
|
|
4
|
my $cont_lf = 0; |
49
|
2
|
|
|
|
|
3
|
my ($c, $w, $b); |
50
|
2
|
|
|
|
|
5
|
$text =~ s/\n+$/\n/; |
51
|
2
|
|
|
|
|
2
|
while(1) { |
52
|
28
|
100
|
|
|
|
61
|
if (length($text) == 0) { |
53
|
2
|
|
|
|
|
52
|
return $out . $word; |
54
|
|
|
|
|
|
|
} |
55
|
26
|
|
|
|
|
42
|
($c, $text, $w, $b) = _extract($text); |
56
|
26
|
50
|
|
|
|
80
|
if ($c eq "\n") { |
|
|
50
|
|
|
|
|
|
57
|
0
|
|
|
|
|
0
|
$out .= $word . $separator; |
58
|
0
|
0
|
|
|
|
0
|
if (length($text) == 0) {return $out;} |
|
0
|
|
|
|
|
0
|
|
59
|
0
|
|
|
|
|
0
|
$len = 0; |
60
|
0
|
|
|
|
|
0
|
$text = $top2 . $text; |
61
|
0
|
|
|
|
|
0
|
$word = '' ; $wlen = 0; |
|
0
|
|
|
|
|
0
|
|
62
|
0
|
|
|
|
|
0
|
next; |
63
|
|
|
|
|
|
|
} elsif ($w == -1) { |
64
|
|
|
|
|
|
|
# all control characters other than LF are ignored |
65
|
0
|
|
|
|
|
0
|
next; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# when the current line have enough room |
69
|
|
|
|
|
|
|
# for the curren character |
70
|
|
|
|
|
|
|
|
71
|
26
|
100
|
|
|
|
67
|
if ($len + $wlen + $w <= $columns) { |
72
|
25
|
100
|
66
|
|
|
109
|
if ($c eq ' ' || $b) { |
73
|
2
|
|
|
|
|
5
|
$out .= $word . $c; |
74
|
2
|
|
|
|
|
14
|
$len += $wlen + $w; |
75
|
2
|
|
|
|
|
5
|
$word = ''; $wlen = 0; |
|
2
|
|
|
|
|
4
|
|
76
|
|
|
|
|
|
|
} else { |
77
|
23
|
|
|
|
|
34
|
$word .= $c; $wlen += $w; |
|
23
|
|
|
|
|
23
|
|
78
|
|
|
|
|
|
|
} |
79
|
25
|
|
|
|
|
29
|
next; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# when the current line overflows with the |
83
|
|
|
|
|
|
|
# current character |
84
|
|
|
|
|
|
|
|
85
|
1
|
50
|
|
|
|
4
|
if ($c eq ' ') { |
|
|
0
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# the line ends by space |
87
|
1
|
|
|
|
|
3
|
$out .= $word . $separator; |
88
|
1
|
|
|
|
|
2
|
$len = 0; |
89
|
1
|
|
|
|
|
2
|
$text = $top2 . $text; |
90
|
1
|
|
|
|
|
1
|
$word = ''; $wlen = 0; |
|
1
|
|
|
|
|
2
|
|
91
|
|
|
|
|
|
|
} elsif ($wlen + $w <= $columns) { |
92
|
|
|
|
|
|
|
# the current word is sent to next line |
93
|
0
|
|
|
|
|
0
|
$out .= $separator; |
94
|
0
|
|
|
|
|
0
|
$len = 0; |
95
|
0
|
|
|
|
|
0
|
$text = $top2 . $word . $c . $text; |
96
|
0
|
|
|
|
|
0
|
$word = ''; $wlen = 0; |
|
0
|
|
|
|
|
0
|
|
97
|
|
|
|
|
|
|
} else { |
98
|
|
|
|
|
|
|
# the current word is too long to fit a line |
99
|
0
|
|
|
|
|
0
|
$out .= $word . $separator; |
100
|
0
|
|
|
|
|
0
|
$len = 0; |
101
|
0
|
|
|
|
|
0
|
$text = $top2 . $c . $text; |
102
|
0
|
|
|
|
|
0
|
$word = ''; $wlen = 0; |
|
0
|
|
|
|
|
0
|
|
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# Extract one character from the beginning from the given string. |
109
|
|
|
|
|
|
|
# Supports multibyte encodings such as UTF-8, EUC-JP, EUC-KR, |
110
|
|
|
|
|
|
|
# GB2312, and Big5. |
111
|
|
|
|
|
|
|
# |
112
|
|
|
|
|
|
|
# return value: (character, rest string, width, line breakable) |
113
|
|
|
|
|
|
|
# character: a character. This may consist from multiple bytes. |
114
|
|
|
|
|
|
|
# rest string: given string without the extracted character. |
115
|
|
|
|
|
|
|
# width: number of columns which the character occupies on screen. |
116
|
|
|
|
|
|
|
# line breakable: true if the character allows line break after it. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _extract { |
119
|
26
|
|
|
26
|
|
31
|
my $string=shift; |
120
|
26
|
|
|
|
|
28
|
my ($l, $c, $r, $w, $b, $u); |
121
|
|
|
|
|
|
|
|
122
|
26
|
50
|
|
|
|
45
|
if (length($string) == 0) { |
123
|
0
|
|
|
|
|
0
|
return ('', '', 0, 0); |
124
|
|
|
|
|
|
|
} |
125
|
26
|
|
|
|
|
64
|
$l = mblen($string); |
126
|
26
|
50
|
33
|
|
|
100
|
if ($l == 0 || $l == -1) { |
127
|
0
|
|
|
|
|
0
|
return ('?', substr($string,1), 1, 0); |
128
|
|
|
|
|
|
|
} |
129
|
26
|
|
|
|
|
109
|
$c = substr($string, 0, $l); |
130
|
26
|
|
|
|
|
39
|
$r = substr($string, $l); |
131
|
26
|
|
|
|
|
61
|
$w = mbswidth($c); |
132
|
|
|
|
|
|
|
|
133
|
26
|
100
|
|
|
|
46
|
if (!defined($charmap)) { |
134
|
1
|
|
|
|
|
19919
|
$charmap = `/usr/bin/locale charmap`; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
26
|
50
|
|
|
|
221
|
if ($charmap =~ /UTF.8/i) { |
|
|
50
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# UTF-8 |
139
|
0
|
0
|
|
|
|
0
|
if ($l == 3) { |
|
|
0
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# U+0800 - U+FFFF |
141
|
0
|
|
|
|
|
0
|
$u = (ord(substr($c,0,1))&0x0f) * 0x1000 |
142
|
|
|
|
|
|
|
+ (ord(substr($c,1,1))&0x3f) * 0x40 |
143
|
|
|
|
|
|
|
+ (ord(substr($c,2,1))&0x3f); |
144
|
0
|
|
|
|
|
0
|
$b = _isCJ($u); |
145
|
|
|
|
|
|
|
} elsif ($l == 4) { |
146
|
|
|
|
|
|
|
# U+10000 - U+10FFFF |
147
|
0
|
|
|
|
|
0
|
$u = (ord(substr($c,0,1))&7) * 0x40000 |
148
|
|
|
|
|
|
|
+ (ord(substr($c,1,1))&0x3f) * 0x1000 |
149
|
|
|
|
|
|
|
+ (ord(substr($c,2,1))&0x3f) * 0x40 |
150
|
|
|
|
|
|
|
+ (ord(substr($c,3,1))&0x3f); |
151
|
0
|
|
|
|
|
0
|
$b = _isCJ($u); |
152
|
|
|
|
|
|
|
} else { |
153
|
0
|
|
|
|
|
0
|
$b = 0; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
} elsif ($charmap =~ /(^EUC)|(^GB)|(^BIG)/i) { |
156
|
|
|
|
|
|
|
# East Asian legacy encodings |
157
|
|
|
|
|
|
|
# (EUC-JP, EUC-KR, GB2312, Big5, Big5HKSCS, and so on) |
158
|
|
|
|
|
|
|
|
159
|
0
|
0
|
|
|
|
0
|
if (ord(substr($c,0,1)) >= 0x80) {$b = 1;} else {$b = 0;} |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
160
|
|
|
|
|
|
|
} else { |
161
|
26
|
|
|
|
|
39
|
$b = 0; |
162
|
|
|
|
|
|
|
} |
163
|
26
|
|
|
|
|
117
|
return ($c, $r, $w, $b); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# Returns 1 for Chinese and Japanese characters. This means that |
167
|
|
|
|
|
|
|
# these characters allow line wrapping after this character even |
168
|
|
|
|
|
|
|
# without whitespaces because these languages don't use whitespaces |
169
|
|
|
|
|
|
|
# between words. |
170
|
|
|
|
|
|
|
# |
171
|
|
|
|
|
|
|
# Character must be given in UCS-4 codepoint value. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub _isCJ { |
174
|
0
|
|
|
0
|
|
|
my $u=shift; |
175
|
|
|
|
|
|
|
|
176
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x3000 && $u <= 0x312f) { |
177
|
0
|
0
|
0
|
|
|
|
if ($u == 0x300a || $u == 0x300c || $u == 0x300e || |
|
0
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
178
|
|
|
|
|
|
|
$u == 0x3010 || $u == 0x3014 || $u == 0x3016 || |
179
|
|
|
|
|
|
|
$u == 0x3018 || $u == 0x301a) {return 0;} |
180
|
0
|
|
|
|
|
|
return 1; |
181
|
|
|
|
|
|
|
} # CJK punctuations, Hiragana, Katakana, Bopomofo |
182
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x31a0 && $u <= 0x31bf) {return 1;} # Bopomofo |
|
0
|
|
|
|
|
|
|
183
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x31f0 && $u <= 0x31ff) {return 1;} # Katakana extension |
|
0
|
|
|
|
|
|
|
184
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x3400 && $u <= 0x9fff) {return 1;} # Han Ideogram |
|
0
|
|
|
|
|
|
|
185
|
0
|
0
|
0
|
|
|
|
if ($u >= 0xf900 && $u <= 0xfaff) {return 1;} # Han Ideogram |
|
0
|
|
|
|
|
|
|
186
|
0
|
0
|
0
|
|
|
|
if ($u >= 0x20000 && $u <= 0x2ffff) {return 1;} # Han Ideogram |
|
0
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
0
|
|
|
|
|
|
return 0; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
1; |
192
|
|
|
|
|
|
|
__END__ |