line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Unicode::String; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Copyright 1997-1999, Gisle Aas. |
4
|
|
|
|
|
|
|
|
5
|
5
|
|
|
5
|
|
9813
|
use strict; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
221
|
|
6
|
5
|
|
|
5
|
|
28
|
use vars qw($VERSION @ISA @EXPORT_OK $UTF7_OPTIONAL_DIRECT_CHARS); |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
420
|
|
7
|
5
|
|
|
5
|
|
25
|
use Carp; |
|
5
|
|
|
|
|
13
|
|
|
5
|
|
|
|
|
1282
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
require Exporter; |
10
|
|
|
|
|
|
|
require DynaLoader; |
11
|
|
|
|
|
|
|
@ISA = qw(Exporter DynaLoader); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
@EXPORT_OK = qw( |
14
|
|
|
|
|
|
|
utf16 utf16le utf16be ucs2 |
15
|
|
|
|
|
|
|
utf8 |
16
|
|
|
|
|
|
|
utf7 |
17
|
|
|
|
|
|
|
ucs4 utf32 utf32be utf32le |
18
|
|
|
|
|
|
|
latin1 |
19
|
|
|
|
|
|
|
uchr uhex |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
byteswap2 byteswap4 |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$VERSION = '2.09'; # $Id: String.pm,v 1.35 2005/10/26 08:13:10 gisle Exp $ |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$UTF7_OPTIONAL_DIRECT_CHARS ||= 1; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
bootstrap Unicode::String $VERSION; |
29
|
|
|
|
|
|
|
|
30
|
5
|
|
|
|
|
77
|
use overload '""' => \&as_string, |
31
|
|
|
|
|
|
|
'bool' => \&as_bool, |
32
|
|
|
|
|
|
|
'0+' => \&as_num, |
33
|
|
|
|
|
|
|
'.=' => \&append, |
34
|
|
|
|
|
|
|
'.' => \&concat, |
35
|
|
|
|
|
|
|
'x' => \&repeat, |
36
|
|
|
|
|
|
|
'=' => \©, |
37
|
5
|
|
|
5
|
|
10459
|
'fallback' => 1; |
|
5
|
|
|
|
|
6236
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my %stringify = ( |
40
|
|
|
|
|
|
|
unicode => \&utf16, |
41
|
|
|
|
|
|
|
utf16 => \&utf16, |
42
|
|
|
|
|
|
|
utf16be => \&utf16, |
43
|
|
|
|
|
|
|
utf16le => \&utf16le, |
44
|
|
|
|
|
|
|
ucs2 => \&utf16, |
45
|
|
|
|
|
|
|
utf8 => \&utf8, |
46
|
|
|
|
|
|
|
utf7 => \&utf7, |
47
|
|
|
|
|
|
|
ucs4 => \&ucs4, |
48
|
|
|
|
|
|
|
utf32 => \&ucs4, |
49
|
|
|
|
|
|
|
utf32be => \&ucs4, |
50
|
|
|
|
|
|
|
utf32le => \&utf32le, |
51
|
|
|
|
|
|
|
latin1 => \&latin1, |
52
|
|
|
|
|
|
|
'hex' => \&hex, |
53
|
|
|
|
|
|
|
); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $stringify_as = \&utf8; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# some aliases |
58
|
|
|
|
|
|
|
*ucs2 = \&utf16; |
59
|
|
|
|
|
|
|
*utf16be = \&utf16; |
60
|
|
|
|
|
|
|
*utf32 = \&ucs4; |
61
|
|
|
|
|
|
|
*utf32be = \&ucs4; |
62
|
|
|
|
|
|
|
*uhex = \&hex; |
63
|
|
|
|
|
|
|
*uchr = \&chr; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub new |
66
|
|
|
|
|
|
|
{ |
67
|
|
|
|
|
|
|
#_dump_arg("new", @_); |
68
|
54
|
|
|
54
|
1
|
2670
|
my $class = shift; |
69
|
54
|
|
|
|
|
52
|
my $str; |
70
|
54
|
|
|
|
|
136
|
my $self = bless \$str, $class; |
71
|
54
|
100
|
|
|
|
133
|
&$stringify_as($self, shift) if @_; |
72
|
54
|
|
|
|
|
251
|
$self; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub repeat |
77
|
|
|
|
|
|
|
{ |
78
|
1
|
|
|
1
|
1
|
9
|
my($self, $count) = @_; |
79
|
1
|
|
|
|
|
3
|
my $class = ref($self); |
80
|
1
|
|
|
|
|
3
|
my $str = $$self x $count; |
81
|
1
|
|
|
|
|
3
|
bless \$str, $class; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _dump_arg |
86
|
|
|
|
|
|
|
{ |
87
|
0
|
|
|
0
|
|
0
|
my $func = shift; |
88
|
0
|
|
|
|
|
0
|
print "$func("; |
89
|
0
|
0
|
|
|
|
0
|
print join(",", map { if (defined $_) { |
|
0
|
|
|
|
|
0
|
|
90
|
0
|
|
|
|
|
0
|
my $x = overload::StrVal($_); |
91
|
0
|
|
|
|
|
0
|
$x =~ s/\n/\\n/g; |
92
|
0
|
0
|
|
|
|
0
|
$x = '""' unless length $x; |
93
|
0
|
|
|
|
|
0
|
$x; |
94
|
|
|
|
|
|
|
} else { |
95
|
0
|
|
|
|
|
0
|
"undef" |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} @_); |
98
|
0
|
|
|
|
|
0
|
print ")\n"; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub concat |
103
|
|
|
|
|
|
|
{ |
104
|
|
|
|
|
|
|
#_dump_arg("concat", @_); |
105
|
4
|
|
|
4
|
1
|
34
|
my($self, $other, $reversed) = @_; |
106
|
4
|
|
|
|
|
6
|
my $class = ref($self); |
107
|
4
|
100
|
|
|
|
17
|
unless (UNIVERSAL::isa($other, 'Unicode::String')) { |
108
|
2
|
|
|
|
|
6
|
$other = Unicode::String->new($other); |
109
|
|
|
|
|
|
|
} |
110
|
4
|
100
|
|
|
|
10
|
my $str = $reversed ? $$other . $$self : $$self . $$other; |
111
|
4
|
|
|
|
|
19
|
bless \$str, $class; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub append |
116
|
|
|
|
|
|
|
{ |
117
|
|
|
|
|
|
|
#_dump_arg("append", @_); |
118
|
20
|
|
|
20
|
1
|
68
|
my($self, $other) = @_; |
119
|
20
|
50
|
|
|
|
110
|
unless (UNIVERSAL::isa($other, 'Unicode::String')) { |
120
|
0
|
|
|
|
|
0
|
$other = Unicode::String->new($other); |
121
|
|
|
|
|
|
|
} |
122
|
20
|
|
|
|
|
118
|
$$self .= $$other; |
123
|
20
|
|
|
|
|
223
|
$self; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub copy |
128
|
|
|
|
|
|
|
{ |
129
|
1
|
|
|
1
|
1
|
24
|
my($self) = @_; |
130
|
1
|
|
|
|
|
2
|
my $class = ref($self); |
131
|
1
|
|
|
|
|
2
|
my $copy = $$self; |
132
|
1
|
|
|
|
|
5
|
bless \$copy, $class; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub as_string |
137
|
|
|
|
|
|
|
{ |
138
|
|
|
|
|
|
|
#_dump_arg("as_string", @_); |
139
|
3
|
|
|
3
|
1
|
27
|
&$stringify_as($_[0]); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub as_bool |
144
|
|
|
|
|
|
|
{ |
145
|
|
|
|
|
|
|
# This is different from perl's normal behaviour by not letting |
146
|
|
|
|
|
|
|
# a U+0030 ("0") be false. |
147
|
3
|
|
|
3
|
1
|
22
|
my $self = shift; |
148
|
3
|
100
|
|
|
|
34
|
$$self ? 1 : ""; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub as_num |
153
|
|
|
|
|
|
|
{ |
154
|
|
|
|
|
|
|
# Should be able to use the numeric property from Unidata |
155
|
|
|
|
|
|
|
# in order to parse a large number of numbers. Currently we |
156
|
|
|
|
|
|
|
# only convert it to a plain string and let perl's normal |
157
|
|
|
|
|
|
|
# num-converter do the job. |
158
|
1
|
|
|
1
|
1
|
17
|
my $self = shift; |
159
|
1
|
|
|
|
|
3
|
my $str = $self->utf8; |
160
|
1
|
|
|
|
|
3
|
$str + 0; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub stringify_as |
165
|
|
|
|
|
|
|
{ |
166
|
3
|
|
|
3
|
1
|
23
|
my $class; |
167
|
3
|
50
|
|
|
|
9
|
if (@_ > 1) { |
168
|
0
|
|
|
|
|
0
|
$class = shift; |
169
|
0
|
0
|
|
|
|
0
|
$class = ref($class) if ref($class); |
170
|
|
|
|
|
|
|
} else { |
171
|
3
|
|
|
|
|
5
|
$class = "Unicode::String"; |
172
|
|
|
|
|
|
|
} |
173
|
3
|
|
|
|
|
5
|
my $old = $stringify_as; |
174
|
3
|
50
|
|
|
|
9
|
if (@_) { |
175
|
3
|
|
|
|
|
4
|
my $as = shift; |
176
|
3
|
100
|
|
|
|
242
|
croak("Don't know how to stringify as '$as'") |
177
|
|
|
|
|
|
|
unless exists $stringify{$as}; |
178
|
2
|
|
|
|
|
5
|
$stringify_as = $stringify{$as}; |
179
|
|
|
|
|
|
|
} |
180
|
2
|
|
|
|
|
5
|
$old; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub utf16 |
185
|
|
|
|
|
|
|
{ |
186
|
78
|
|
|
78
|
0
|
716
|
my $self = shift; |
187
|
78
|
100
|
|
|
|
212
|
unless (ref $self) { |
188
|
29
|
|
|
|
|
92
|
my $u = new Unicode::String; |
189
|
29
|
|
|
|
|
75
|
$u->utf16($self); |
190
|
29
|
|
|
|
|
165
|
return $u; |
191
|
|
|
|
|
|
|
} |
192
|
49
|
|
|
|
|
67
|
my $old = $$self; |
193
|
49
|
100
|
|
|
|
97
|
if (@_) { |
194
|
31
|
|
|
|
|
50
|
$$self = shift; |
195
|
31
|
100
|
|
|
|
123
|
if ((length($$self) % 2) != 0) { |
196
|
1
|
50
|
|
|
|
97
|
warn "Uneven UTF16 data" if $^W; |
197
|
1
|
|
|
|
|
193
|
$$self .= "\0"; |
198
|
|
|
|
|
|
|
} |
199
|
31
|
100
|
|
|
|
90
|
if ($$self =~ /^\xFF\xFE/) { |
200
|
|
|
|
|
|
|
# the string needs byte swapping |
201
|
2
|
|
|
|
|
50
|
$$self = byteswap2($$self); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
49
|
|
|
|
|
121
|
$old; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
sub utf16le |
209
|
|
|
|
|
|
|
{ |
210
|
3
|
|
|
3
|
1
|
6
|
my $self = shift; |
211
|
3
|
100
|
|
|
|
14
|
unless (ref $self) { |
212
|
2
|
|
|
|
|
8
|
my $u = new Unicode::String; |
213
|
2
|
|
|
|
|
12
|
$u->utf16(byteswap2($self)); |
214
|
2
|
|
|
|
|
8
|
return $u; |
215
|
|
|
|
|
|
|
} |
216
|
1
|
|
|
|
|
13
|
my $old = byteswap2($$self); |
217
|
1
|
50
|
|
|
|
3
|
if (@_) { |
218
|
0
|
|
|
|
|
0
|
$self->utf16(byteswap2(shift)); |
219
|
|
|
|
|
|
|
} |
220
|
1
|
|
|
|
|
4
|
$old; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub utf32le |
225
|
|
|
|
|
|
|
{ |
226
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
227
|
2
|
100
|
|
|
|
9
|
unless (ref $self) { |
228
|
1
|
|
|
|
|
4
|
my $u = new Unicode::String; |
229
|
1
|
|
|
|
|
6
|
$u->ucs4(byteswap4($self)); |
230
|
1
|
|
|
|
|
4
|
return $u; |
231
|
|
|
|
|
|
|
} |
232
|
1
|
|
|
|
|
12
|
my $old = byteswap4($self->ucs4); |
233
|
1
|
50
|
|
|
|
9
|
if (@_) { |
234
|
0
|
|
|
|
|
0
|
$self->ucs4(byteswap4(shift)); |
235
|
|
|
|
|
|
|
} |
236
|
1
|
|
|
|
|
4
|
$old; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub utf7 # rfc1642 |
241
|
|
|
|
|
|
|
{ |
242
|
32
|
|
|
32
|
1
|
2104
|
my $self = shift; |
243
|
32
|
100
|
|
|
|
148
|
unless (ref $self) { |
244
|
|
|
|
|
|
|
# act as ctor |
245
|
10
|
|
|
|
|
26
|
my $u = new Unicode::String; |
246
|
10
|
|
|
|
|
32
|
$u->utf7($self); |
247
|
10
|
|
|
|
|
35
|
return $u; |
248
|
|
|
|
|
|
|
} |
249
|
22
|
|
|
|
|
24
|
my $old; |
250
|
22
|
100
|
|
|
|
66
|
if (defined wantarray) { |
251
|
|
|
|
|
|
|
# encode into $old |
252
|
12
|
|
|
|
|
17
|
$old = ""; |
253
|
12
|
|
|
|
|
35
|
pos($$self) = 0; |
254
|
12
|
|
|
|
|
21
|
my $len = length($$self); |
255
|
12
|
|
|
|
|
30
|
while (pos($$self) < $len) { |
256
|
42
|
100
|
100
|
|
|
531
|
if (($UTF7_OPTIONAL_DIRECT_CHARS && |
|
|
50
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
257
|
|
|
|
|
|
|
$$self =~ /\G((?:\0[A-Za-z0-9\'\(\)\,\-\.\/\:\?\!\"\#\$\%\&\*\;\<\=\>\@\[\]\^\_\`\{\|\}\s])+)/gc) |
258
|
|
|
|
|
|
|
|| $$self =~ /\G((?:\0[A-Za-z0-9\'\(\)\,\-\.\/\:\?\s])+)/gc) |
259
|
|
|
|
|
|
|
{ |
260
|
|
|
|
|
|
|
#print "Plain ", utf16($1)->latin1, "\n"; |
261
|
19
|
|
|
|
|
41
|
$old .= utf16($1)->latin1; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
elsif (($UTF7_OPTIONAL_DIRECT_CHARS && |
264
|
|
|
|
|
|
|
$$self =~ /\G((?:[^\0].|\0[^A-Za-z0-9\'\(\)\,\-\.\/\:\?\!\"\#\$\%\&\*\;\<\=\>\@\[\]\^\_\`\{\|\}\s])+)/gsc) |
265
|
|
|
|
|
|
|
|| $$self =~ /\G((?:[^\0].|\0[^A-Za-z0-9\'\(\)\,\-\.\/\:\?\s])+)/gsc) |
266
|
|
|
|
|
|
|
{ |
267
|
|
|
|
|
|
|
#print "Unplain ", utf16($1)->hex, "\n"; |
268
|
23
|
100
|
|
|
|
175
|
if ($1 eq "\0+") { |
269
|
2
|
|
|
|
|
6
|
$old .= "+-"; |
270
|
|
|
|
|
|
|
} else { |
271
|
21
|
|
|
|
|
1102
|
require MIME::Base64; |
272
|
21
|
|
|
|
|
933
|
my $base64 = MIME::Base64::encode($1, ''); |
273
|
21
|
|
|
|
|
79
|
$base64 =~ s/=+$//; |
274
|
21
|
|
|
|
|
81
|
$old .= "+$base64-"; |
275
|
|
|
|
|
|
|
# XXX should we determine when the final "-" is |
276
|
|
|
|
|
|
|
# unnecessary? depends on next char not being part |
277
|
|
|
|
|
|
|
# of the base64 char set. |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
} else { |
280
|
0
|
|
|
|
|
0
|
die "This should not happen, pos=" . pos($$self) . |
281
|
|
|
|
|
|
|
": " . $self->hex . "\n"; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
22
|
100
|
|
|
|
45
|
if (@_) { |
287
|
|
|
|
|
|
|
# decode |
288
|
11
|
|
|
|
|
21
|
my $len = length($_[0]); |
289
|
11
|
|
|
|
|
14
|
$$self = ""; |
290
|
11
|
|
|
|
|
31
|
pos($_[0]) = 0; |
291
|
11
|
|
|
|
|
32
|
while (pos($_[0]) < $len) { |
292
|
40
|
100
|
|
|
|
213
|
if ($_[0] =~ /\G([^+]+)/gc) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
293
|
18
|
|
|
|
|
152
|
$self->append(latin1($1)); |
294
|
|
|
|
|
|
|
} elsif ($_[0] =~ /\G\+-/gc) { |
295
|
2
|
|
|
|
|
7
|
$$self .= "\0+"; |
296
|
|
|
|
|
|
|
} elsif ($_[0] =~ /\G\+([A-Za-z0-9+\/]+)-?/gc) { |
297
|
20
|
|
|
|
|
76
|
my $base64 = $1; |
298
|
20
|
|
|
|
|
30
|
my $pad = length($base64) % 4; |
299
|
20
|
100
|
|
|
|
73
|
$base64 .= "=" x (4 - $pad) if $pad; |
300
|
20
|
|
|
|
|
93
|
require MIME::Base64; |
301
|
20
|
|
|
|
|
213
|
$$self .= MIME::Base64::decode($base64); |
302
|
20
|
50
|
|
|
|
84
|
if ((length($$self) % 2) != 0) { |
303
|
0
|
0
|
|
|
|
0
|
warn "Uneven UTF7 base64-data" if $^W; |
304
|
0
|
|
|
|
|
0
|
chop($$self); # correct it |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} elsif ($_[0] =~ /\G\+/gc) { |
307
|
0
|
0
|
|
|
|
0
|
warn "Bad UTF7 data escape" if $^W; |
308
|
0
|
|
|
|
|
0
|
$$self .= "\0+"; |
309
|
|
|
|
|
|
|
} else { |
310
|
0
|
|
|
|
|
0
|
die "This should not happen " . pos($_[0]); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
} |
314
|
22
|
|
|
|
|
196
|
$old; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub hex |
319
|
|
|
|
|
|
|
{ |
320
|
27
|
|
|
27
|
1
|
1376
|
my $self = shift; |
321
|
27
|
50
|
|
|
|
94
|
unless (ref $self) { |
322
|
0
|
|
|
|
|
0
|
my $u = new Unicode::String; |
323
|
0
|
|
|
|
|
0
|
$u->hex($self); |
324
|
0
|
|
|
|
|
0
|
return $u; |
325
|
|
|
|
|
|
|
} |
326
|
27
|
|
|
|
|
63
|
my $old; |
327
|
27
|
50
|
33
|
|
|
232
|
if (defined($$self) && defined wantarray) { |
328
|
27
|
|
|
|
|
256
|
$old = unpack("H*", $$self); |
329
|
27
|
|
|
|
|
324
|
$old =~ s/(....)/U+$1 /g; |
330
|
27
|
|
|
|
|
155
|
$old =~ s/\s+$//; |
331
|
|
|
|
|
|
|
} |
332
|
27
|
50
|
|
|
|
73
|
if (@_) { |
333
|
0
|
|
|
|
|
0
|
my $new = shift; |
334
|
0
|
|
|
|
|
0
|
$new =~ tr/0-9A-Fa-f//cd; # leave only hex chars |
335
|
0
|
0
|
|
|
|
0
|
croak("Hex string length must be multiple of four") |
336
|
|
|
|
|
|
|
unless (length($new) % 4) == 0; |
337
|
0
|
|
|
|
|
0
|
$$self = pack("H*", $new); |
338
|
|
|
|
|
|
|
} |
339
|
27
|
|
|
|
|
240
|
$old; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub length |
344
|
|
|
|
|
|
|
{ |
345
|
6
|
|
|
6
|
1
|
2640
|
my $self = shift; |
346
|
6
|
|
|
|
|
114
|
int(length($$self) / 2); |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
sub byteswap |
350
|
|
|
|
|
|
|
{ |
351
|
1
|
|
|
1
|
1
|
84
|
my $self = shift; |
352
|
1
|
|
|
|
|
87
|
byteswap2($$self); |
353
|
1
|
|
|
|
|
2
|
$self; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub unpack |
357
|
|
|
|
|
|
|
{ |
358
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
359
|
0
|
|
|
|
|
0
|
unpack("n*", $$self) |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
sub pack |
364
|
|
|
|
|
|
|
{ |
365
|
6
|
|
|
6
|
1
|
105
|
my $self = shift; |
366
|
6
|
|
|
|
|
27
|
$$self = pack("n*", @_); |
367
|
6
|
|
|
|
|
142
|
$self; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub ord |
372
|
|
|
|
|
|
|
{ |
373
|
12
|
|
|
12
|
1
|
85
|
my $self = shift; |
374
|
12
|
50
|
|
|
|
29
|
return () unless defined $$self; |
375
|
|
|
|
|
|
|
|
376
|
12
|
|
|
|
|
15
|
my $array = wantarray; |
377
|
12
|
|
|
|
|
14
|
my @ret; |
378
|
|
|
|
|
|
|
my @chars; |
379
|
12
|
100
|
|
|
|
23
|
if ($array) { |
380
|
4
|
|
|
|
|
23
|
@chars = CORE::unpack("n*", $$self); |
381
|
|
|
|
|
|
|
} else { |
382
|
8
|
|
|
|
|
23
|
@chars = CORE::unpack("n2", $$self); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
12
|
|
|
|
|
31
|
while (@chars) { |
386
|
20
|
|
|
|
|
29
|
my $first = shift(@chars); |
387
|
20
|
100
|
100
|
|
|
70
|
if ($first >= 0xD800 && $first <= 0xDFFF) { # surrogate |
388
|
6
|
|
|
|
|
8
|
my $second = shift(@chars); |
389
|
|
|
|
|
|
|
#print "F=$first S=$second\n"; |
390
|
6
|
100
|
100
|
|
|
43
|
if ($first >= 0xDC00 || $second < 0xDC00 || $second > 0xDFFF) { |
|
|
|
66
|
|
|
|
|
391
|
2
|
|
|
|
|
436
|
carp(sprintf("Bad surrogate pair (U+%04x U+%04x)", |
392
|
|
|
|
|
|
|
$first, $second)); |
393
|
2
|
|
|
|
|
74
|
unshift(@chars, $second); |
394
|
2
|
|
|
|
|
7
|
next; |
395
|
|
|
|
|
|
|
} |
396
|
4
|
|
|
|
|
10
|
push(@ret, ($first-0xD800)*0x400 + ($second-0xDC00) + 0x10000); |
397
|
|
|
|
|
|
|
} else { |
398
|
14
|
|
|
|
|
21
|
push(@ret, $first); |
399
|
|
|
|
|
|
|
} |
400
|
18
|
100
|
|
|
|
49
|
last unless $array; |
401
|
|
|
|
|
|
|
} |
402
|
12
|
100
|
|
|
|
49
|
$array ? @ret : $ret[0]; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub name |
407
|
|
|
|
|
|
|
{ |
408
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
409
|
0
|
|
|
|
|
0
|
require Unicode::CharName; |
410
|
0
|
0
|
|
|
|
0
|
if (wantarray) { |
411
|
0
|
|
|
|
|
0
|
return map { Unicode::CharName::uname($_) } $self->ord; |
|
0
|
|
|
|
|
0
|
|
412
|
|
|
|
|
|
|
} else { |
413
|
0
|
|
|
|
|
0
|
return Unicode::CharName::uname(scalar($self->ord)); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub chr |
419
|
|
|
|
|
|
|
{ |
420
|
8
|
|
|
8
|
1
|
51
|
my($self,$val) = @_; |
421
|
8
|
100
|
|
|
|
21
|
unless (ref $self) { |
422
|
|
|
|
|
|
|
# act as ctor |
423
|
4
|
|
|
|
|
19
|
my $u = new Unicode::String; |
424
|
4
|
|
|
|
|
32
|
return $u->uchr($self); |
425
|
|
|
|
|
|
|
} |
426
|
4
|
100
|
|
|
|
13
|
if ($val > 0xFFFF) { |
427
|
|
|
|
|
|
|
# must be represented by a surrogate pair |
428
|
2
|
50
|
|
|
|
7
|
return undef if $val > 0x10FFFF; # Unicode limit |
429
|
2
|
|
|
|
|
12
|
$val -= 0x10000; |
430
|
2
|
|
|
|
|
7
|
my $h = int($val / 0x400) + 0xD800; |
431
|
2
|
|
|
|
|
5
|
my $l = ($val % 0x400) + 0xDC00; |
432
|
2
|
|
|
|
|
13
|
$$self = CORE::pack("n2", $h, $l); |
433
|
|
|
|
|
|
|
} else { |
434
|
2
|
|
|
|
|
95
|
$$self = CORE::pack("n", $val); |
435
|
|
|
|
|
|
|
} |
436
|
4
|
|
|
|
|
15
|
$self; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
sub substr |
441
|
|
|
|
|
|
|
{ |
442
|
6
|
|
|
6
|
1
|
42
|
my($self, $offset, $length, $substitute) = @_; |
443
|
6
|
|
100
|
|
|
15
|
$offset ||= 0; |
444
|
6
|
|
|
|
|
7
|
$offset *= 2; |
445
|
6
|
|
|
|
|
6
|
my $substr; |
446
|
6
|
50
|
|
|
|
10
|
if (defined $substitute) { |
447
|
0
|
0
|
|
|
|
0
|
unless (UNIVERSAL::isa($substitute, 'Unicode::String')) { |
448
|
0
|
|
|
|
|
0
|
$substitute = Unicode::String->new($substitute); |
449
|
|
|
|
|
|
|
} |
450
|
0
|
0
|
|
|
|
0
|
if (defined $length) { |
451
|
0
|
|
|
|
|
0
|
$substr = substr($$self, $offset, $length*2) = $$substitute; |
452
|
|
|
|
|
|
|
} else { |
453
|
0
|
|
|
|
|
0
|
$substr = substr($$self, $offset) = $$substitute; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
} else { |
456
|
6
|
100
|
|
|
|
8
|
if (defined $length) { |
457
|
4
|
|
|
|
|
8
|
$substr = substr($$self, $offset, $length*2); |
458
|
|
|
|
|
|
|
} else { |
459
|
2
|
|
|
|
|
4
|
$substr = substr($$self, $offset); |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
6
|
|
|
|
|
28
|
bless \$substr, ref($self); |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
sub index |
467
|
|
|
|
|
|
|
{ |
468
|
5
|
|
|
5
|
1
|
30
|
my($self, $other, $pos) = @_; |
469
|
5
|
|
100
|
|
|
17
|
$pos ||= 0; |
470
|
5
|
|
|
|
|
5
|
$pos *= 2; |
471
|
5
|
50
|
|
|
|
10
|
$other = Unicode::String->new($other) unless ref($other); |
472
|
5
|
|
100
|
|
|
34
|
$pos++ while ($pos = index($$self, $$other, $pos)) > 0 && ($pos%2) != 0; |
473
|
5
|
100
|
|
|
|
11
|
$pos /= 2 if $pos > 0; |
474
|
5
|
|
|
|
|
16
|
$pos; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
sub rindex |
479
|
|
|
|
|
|
|
{ |
480
|
0
|
|
|
0
|
0
|
|
my($self, $other, $pos) = @_; |
481
|
0
|
|
0
|
|
|
|
$pos ||= 0; |
482
|
0
|
|
|
|
|
|
die "NYI"; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub chop |
487
|
|
|
|
|
|
|
{ |
488
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
489
|
0
|
0
|
|
|
|
|
if (CORE::length $$self) { |
490
|
0
|
|
|
|
|
|
my $chop = chop($$self); |
491
|
0
|
|
|
|
|
|
$chop = chop($$self) . $chop; |
492
|
0
|
|
|
|
|
|
return bless \$chop, ref($self); |
493
|
|
|
|
|
|
|
} |
494
|
0
|
|
|
|
|
|
undef; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# XXX: Ideas to be implemented |
499
|
|
|
|
|
|
|
sub scan; |
500
|
|
|
|
|
|
|
sub reverse; |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub lc; |
503
|
|
|
|
|
|
|
sub lcfirst; |
504
|
|
|
|
|
|
|
sub uc; |
505
|
|
|
|
|
|
|
sub ucfirst; |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
sub split; |
508
|
|
|
|
|
|
|
sub sprintf; |
509
|
|
|
|
|
|
|
sub study; |
510
|
|
|
|
|
|
|
sub tr; |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
1; |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
__END__ |