| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Unicode::String; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | # Copyright 1997-1999, Gisle Aas. | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 5 |  |  | 5 |  | 4786 | use strict; | 
|  | 5 |  |  |  |  | 5 |  | 
|  | 5 |  |  |  |  | 124 |  | 
| 6 | 5 |  |  | 5 |  | 15 | use vars qw($VERSION @ISA @EXPORT_OK $UTF7_OPTIONAL_DIRECT_CHARS); | 
|  | 5 |  |  |  |  | 6 |  | 
|  | 5 |  |  |  |  | 259 |  | 
| 7 | 5 |  |  | 5 |  | 15 | use Carp; | 
|  | 5 |  |  |  |  | 6 |  | 
|  | 5 |  |  |  |  | 680 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | require Exporter; | 
| 10 |  |  |  |  |  |  | require DynaLoader; | 
| 11 |  |  |  |  |  |  | @ISA = qw(Exporter DynaLoader); | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | @EXPORT_OK = qw( | 
| 14 |  |  |  |  |  |  | utf16 utf16le utf16be ucs2 | 
| 15 |  |  |  |  |  |  | utf8 | 
| 16 |  |  |  |  |  |  | utf7 | 
| 17 |  |  |  |  |  |  | ucs4 utf32 utf32be utf32le | 
| 18 |  |  |  |  |  |  | latin1 | 
| 19 |  |  |  |  |  |  | uchr uhex | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | byteswap2 byteswap4 | 
| 22 |  |  |  |  |  |  | ); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | $VERSION = '2.10'; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | $UTF7_OPTIONAL_DIRECT_CHARS ||= 1; | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | bootstrap Unicode::String $VERSION; | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 5 |  |  |  |  | 39 | use overload '""'   => \&as_string, | 
| 31 |  |  |  |  |  |  | 'bool' => \&as_bool, | 
| 32 |  |  |  |  |  |  | '0+'   => \&as_num, | 
| 33 |  |  |  |  |  |  | '.='   => \&append, | 
| 34 |  |  |  |  |  |  | '.'    => \&concat, | 
| 35 |  |  |  |  |  |  | 'x'    => \&repeat, | 
| 36 |  |  |  |  |  |  | '='    => \©, | 
| 37 | 5 |  |  | 5 |  | 4462 | 'fallback' => 1; | 
|  | 5 |  |  |  |  | 3570 |  | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | my %stringify = ( | 
| 40 |  |  |  |  |  |  | unicode => \&utf16, | 
| 41 |  |  |  |  |  |  | utf16   => \&utf16, | 
| 42 |  |  |  |  |  |  | utf16be => \&utf16, | 
| 43 |  |  |  |  |  |  | utf16le => \&utf16le, | 
| 44 |  |  |  |  |  |  | ucs2    => \&utf16, | 
| 45 |  |  |  |  |  |  | utf8    => \&utf8, | 
| 46 |  |  |  |  |  |  | utf7    => \&utf7, | 
| 47 |  |  |  |  |  |  | ucs4    => \&ucs4, | 
| 48 |  |  |  |  |  |  | utf32   => \&ucs4, | 
| 49 |  |  |  |  |  |  | utf32be => \&ucs4, | 
| 50 |  |  |  |  |  |  | utf32le => \&utf32le, | 
| 51 |  |  |  |  |  |  | latin1  => \&latin1, | 
| 52 |  |  |  |  |  |  | 'hex'    => \&hex, | 
| 53 |  |  |  |  |  |  | ); | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | my $stringify_as = \&utf8; | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | # some aliases | 
| 58 |  |  |  |  |  |  | *ucs2 = \&utf16; | 
| 59 |  |  |  |  |  |  | *utf16be = \&utf16; | 
| 60 |  |  |  |  |  |  | *utf32 = \&ucs4; | 
| 61 |  |  |  |  |  |  | *utf32be = \&ucs4; | 
| 62 |  |  |  |  |  |  | *uhex = \&hex; | 
| 63 |  |  |  |  |  |  | *uchr = \&chr; | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | sub new | 
| 66 |  |  |  |  |  |  | { | 
| 67 |  |  |  |  |  |  | #_dump_arg("new", @_); | 
| 68 | 54 |  |  | 54 | 1 | 796 | my $class = shift; | 
| 69 | 54 |  |  |  |  | 38 | my $str; | 
| 70 | 54 |  |  |  |  | 58 | my $self = bless \$str, $class; | 
| 71 | 54 | 100 |  |  |  | 83 | &$stringify_as($self, shift) if @_; | 
| 72 | 54 |  |  |  |  | 59 | $self; | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | sub repeat | 
| 77 |  |  |  |  |  |  | { | 
| 78 | 1 |  |  | 1 | 1 | 6 | my($self, $count) = @_; | 
| 79 | 1 |  |  |  |  | 1 | my $class = ref($self); | 
| 80 | 1 |  |  |  |  | 3 | my $str = $$self x $count; | 
| 81 | 1 |  |  |  |  | 2 | bless \$str, $class; | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | sub _dump_arg | 
| 86 |  |  |  |  |  |  | { | 
| 87 | 0 |  |  | 0 |  | 0 | my $func = shift; | 
| 88 | 0 |  |  |  |  | 0 | print "$func("; | 
| 89 | 0 | 0 |  |  |  | 0 | print join(",", map { if (defined $_) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 90 | 0 |  |  |  |  | 0 | my $x = overload::StrVal($_); | 
| 91 | 0 |  |  |  |  | 0 | $x =~ s/\n/\\n/g; | 
| 92 | 0 | 0 |  |  |  | 0 | $x = '""' unless length $x; | 
| 93 | 0 |  |  |  |  | 0 | $x; | 
| 94 |  |  |  |  |  |  | } else { | 
| 95 | 0 |  |  |  |  | 0 | "undef" | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  | } @_); | 
| 98 | 0 |  |  |  |  | 0 | print ")\n"; | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | sub concat | 
| 103 |  |  |  |  |  |  | { | 
| 104 |  |  |  |  |  |  | #_dump_arg("concat", @_); | 
| 105 | 4 |  |  | 4 | 1 | 21 | my($self, $other, $reversed) = @_; | 
| 106 | 4 |  |  |  |  | 5 | my $class = ref($self); | 
| 107 | 4 | 100 |  |  |  | 10 | unless (UNIVERSAL::isa($other, 'Unicode::String')) { | 
| 108 | 2 |  |  |  |  | 4 | $other = Unicode::String->new($other); | 
| 109 |  |  |  |  |  |  | } | 
| 110 | 4 | 100 |  |  |  | 9 | my $str = $reversed ? $$other . $$self : $$self . $$other; | 
| 111 | 4 |  |  |  |  | 9 | bless \$str, $class; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | sub append | 
| 116 |  |  |  |  |  |  | { | 
| 117 |  |  |  |  |  |  | #_dump_arg("append", @_); | 
| 118 | 20 |  |  | 20 | 1 | 56 | my($self, $other) = @_; | 
| 119 | 20 | 50 |  |  |  | 43 | unless (UNIVERSAL::isa($other, 'Unicode::String')) { | 
| 120 | 0 |  |  |  |  | 0 | $other = Unicode::String->new($other); | 
| 121 |  |  |  |  |  |  | } | 
| 122 | 20 |  |  |  |  | 66 | $$self .= $$other; | 
| 123 | 20 |  |  |  |  | 31 | $self; | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | sub copy | 
| 128 |  |  |  |  |  |  | { | 
| 129 | 1 |  |  | 1 | 1 | 14 | my($self) = @_; | 
| 130 | 1 |  |  |  |  | 2 | my $class = ref($self); | 
| 131 | 1 |  |  |  |  | 2 | my $copy = $$self; | 
| 132 | 1 |  |  |  |  | 2 | bless \$copy, $class; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub as_string | 
| 137 |  |  |  |  |  |  | { | 
| 138 |  |  |  |  |  |  | #_dump_arg("as_string", @_); | 
| 139 | 3 |  |  | 3 | 1 | 16 | &$stringify_as($_[0]); | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | sub as_bool | 
| 144 |  |  |  |  |  |  | { | 
| 145 |  |  |  |  |  |  | # This is different from perl's normal behaviour by not letting | 
| 146 |  |  |  |  |  |  | # a U+0030  ("0") be false. | 
| 147 | 3 |  |  | 3 | 1 | 12 | my $self = shift; | 
| 148 | 3 | 100 |  |  |  | 16 | $$self ? 1 : ""; | 
| 149 |  |  |  |  |  |  | } | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | sub as_num | 
| 153 |  |  |  |  |  |  | { | 
| 154 |  |  |  |  |  |  | # Should be able to use the numeric property from Unidata | 
| 155 |  |  |  |  |  |  | # in order to parse a large number of numbers.  Currently we | 
| 156 |  |  |  |  |  |  | # only convert it to a plain string and let perl's normal | 
| 157 |  |  |  |  |  |  | # num-converter do the job. | 
| 158 | 1 |  |  | 1 | 1 | 10 | my $self = shift; | 
| 159 | 1 |  |  |  |  | 2 | my $str = $self->utf8; | 
| 160 | 1 |  |  |  |  | 2 | $str + 0; | 
| 161 |  |  |  |  |  |  | } | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | sub stringify_as | 
| 165 |  |  |  |  |  |  | { | 
| 166 | 3 |  |  | 3 | 1 | 21 | my $class; | 
| 167 | 3 | 50 |  |  |  | 6 | if (@_ > 1) { | 
| 168 | 0 |  |  |  |  | 0 | $class = shift; | 
| 169 | 0 | 0 |  |  |  | 0 | $class = ref($class) if ref($class); | 
| 170 |  |  |  |  |  |  | } else { | 
| 171 | 3 |  |  |  |  | 3 | $class = "Unicode::String"; | 
| 172 |  |  |  |  |  |  | } | 
| 173 | 3 |  |  |  |  | 3 | my $old = $stringify_as; | 
| 174 | 3 | 50 |  |  |  | 5 | if (@_) { | 
| 175 | 3 |  |  |  |  | 2 | my $as = shift; | 
| 176 |  |  |  |  |  |  | croak("Don't know how to stringify as '$as'") | 
| 177 | 3 | 100 |  |  |  | 156 | unless exists $stringify{$as}; | 
| 178 | 2 |  |  |  |  | 4 | $stringify_as = $stringify{$as}; | 
| 179 |  |  |  |  |  |  | } | 
| 180 | 2 |  |  |  |  | 2 | $old; | 
| 181 |  |  |  |  |  |  | } | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | sub utf16 | 
| 185 |  |  |  |  |  |  | { | 
| 186 | 78 |  |  | 78 | 0 | 255 | my $self = shift; | 
| 187 | 78 | 100 |  |  |  | 105 | unless (ref $self) { | 
| 188 | 29 |  |  |  |  | 37 | my $u = new Unicode::String; | 
| 189 | 29 |  |  |  |  | 41 | $u->utf16($self); | 
| 190 | 29 |  |  |  |  | 91 | return $u; | 
| 191 |  |  |  |  |  |  | } | 
| 192 | 49 |  |  |  |  | 45 | my $old = $$self; | 
| 193 | 49 | 100 |  |  |  | 63 | if (@_) { | 
| 194 | 31 |  |  |  |  | 31 | $$self = shift; | 
| 195 | 31 | 100 |  |  |  | 50 | if ((length($$self) % 2) != 0) { | 
| 196 | 1 | 50 |  |  |  | 17 | warn "Uneven UTF16 data" if $^W; | 
| 197 | 1 |  |  |  |  | 82 | $$self .= "\0"; | 
| 198 |  |  |  |  |  |  | } | 
| 199 | 31 | 100 |  |  |  | 54 | if ($$self =~ /^\xFF\xFE/) { | 
| 200 |  |  |  |  |  |  | # the string needs byte swapping | 
| 201 | 2 |  |  |  |  | 5 | $$self = byteswap2($$self); | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  | } | 
| 204 | 49 |  |  |  |  | 61 | $old; | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | sub utf16le | 
| 209 |  |  |  |  |  |  | { | 
| 210 | 3 |  |  | 3 | 1 | 5 | my $self = shift; | 
| 211 | 3 | 100 |  |  |  | 7 | unless (ref $self) { | 
| 212 | 2 |  |  |  |  | 4 | my $u = new Unicode::String; | 
| 213 | 2 |  |  |  |  | 7 | $u->utf16(byteswap2($self)); | 
| 214 | 2 |  |  |  |  | 3 | return $u; | 
| 215 |  |  |  |  |  |  | } | 
| 216 | 1 |  |  |  |  | 3 | my $old = byteswap2($$self); | 
| 217 | 1 | 50 |  |  |  | 6 | if (@_) { | 
| 218 | 0 |  |  |  |  | 0 | $self->utf16(byteswap2(shift)); | 
| 219 |  |  |  |  |  |  | } | 
| 220 | 1 |  |  |  |  | 3 | $old; | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | sub utf32le | 
| 225 |  |  |  |  |  |  | { | 
| 226 | 2 |  |  | 2 | 1 | 4 | my $self = shift; | 
| 227 | 2 | 100 |  |  |  | 5 | unless (ref $self) { | 
| 228 | 1 |  |  |  |  | 4 | my $u = new Unicode::String; | 
| 229 | 1 |  |  |  |  | 5 | $u->ucs4(byteswap4($self)); | 
| 230 | 1 |  |  |  |  | 2 | return $u; | 
| 231 |  |  |  |  |  |  | } | 
| 232 | 1 |  |  |  |  | 8 | my $old = byteswap4($self->ucs4); | 
| 233 | 1 | 50 |  |  |  | 5 | if (@_) { | 
| 234 | 0 |  |  |  |  | 0 | $self->ucs4(byteswap4(shift)); | 
| 235 |  |  |  |  |  |  | } | 
| 236 | 1 |  |  |  |  | 3 | $old; | 
| 237 |  |  |  |  |  |  | } | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | sub utf7   # rfc1642 | 
| 241 |  |  |  |  |  |  | { | 
| 242 | 32 |  |  | 32 | 1 | 547 | my $self = shift; | 
| 243 | 32 | 100 |  |  |  | 46 | unless (ref $self) { | 
| 244 |  |  |  |  |  |  | # act as ctor | 
| 245 | 10 |  |  |  |  | 18 | my $u = new Unicode::String; | 
| 246 | 10 |  |  |  |  | 22 | $u->utf7($self); | 
| 247 | 10 |  |  |  |  | 18 | return $u; | 
| 248 |  |  |  |  |  |  | } | 
| 249 | 22 |  |  |  |  | 16 | my $old; | 
| 250 | 22 | 100 |  |  |  | 51 | if (defined wantarray) { | 
| 251 |  |  |  |  |  |  | # encode into $old | 
| 252 | 12 |  |  |  |  | 8 | $old = ""; | 
| 253 | 12 |  |  |  |  | 22 | pos($$self) = 0; | 
| 254 | 12 |  |  |  |  | 14 | my $len = length($$self); | 
| 255 | 12 |  |  |  |  | 18 | while (pos($$self) < $len) { | 
| 256 | 42 | 100 | 100 |  |  | 308 | if (($UTF7_OPTIONAL_DIRECT_CHARS && | 
|  |  | 50 | 100 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
|  |  |  | 66 |  |  |  |  | 
| 257 |  |  |  |  |  |  | $$self =~ /\G((?:\0[A-Za-z0-9\'\(\)\,\-\.\/\:\?\!\"\#\$\%\&\*\;\<\=\>\@\[\]\^\_\`\{\|\}\s])+)/gc) | 
| 258 |  |  |  |  |  |  | || $$self =~ /\G((?:\0[A-Za-z0-9\'\(\)\,\-\.\/\:\?\s])+)/gc) | 
| 259 |  |  |  |  |  |  | { | 
| 260 |  |  |  |  |  |  | #print "Plain ", utf16($1)->latin1, "\n"; | 
| 261 | 19 |  |  |  |  | 25 | $old .= utf16($1)->latin1; | 
| 262 |  |  |  |  |  |  | } | 
| 263 |  |  |  |  |  |  | elsif (($UTF7_OPTIONAL_DIRECT_CHARS && | 
| 264 |  |  |  |  |  |  | $$self =~ /\G((?:[^\0].|\0[^A-Za-z0-9\'\(\)\,\-\.\/\:\?\!\"\#\$\%\&\*\;\<\=\>\@\[\]\^\_\`\{\|\}\s])+)/gsc) | 
| 265 |  |  |  |  |  |  | || $$self =~ /\G((?:[^\0].|\0[^A-Za-z0-9\'\(\)\,\-\.\/\:\?\s])+)/gsc) | 
| 266 |  |  |  |  |  |  | { | 
| 267 |  |  |  |  |  |  | #print "Unplain ", utf16($1)->hex, "\n"; | 
| 268 | 23 | 100 |  |  |  | 37 | if ($1 eq "\0+") { | 
| 269 | 2 |  |  |  |  | 4 | $old .= "+-"; | 
| 270 |  |  |  |  |  |  | } else { | 
| 271 | 21 |  |  |  |  | 535 | require MIME::Base64; | 
| 272 | 21 |  |  |  |  | 567 | my $base64 = MIME::Base64::encode($1, ''); | 
| 273 | 21 |  |  |  |  | 40 | $base64 =~ s/=+$//; | 
| 274 | 21 |  |  |  |  | 48 | $old .= "+$base64-"; | 
| 275 |  |  |  |  |  |  | # XXX should we determine when the final "-" is | 
| 276 |  |  |  |  |  |  | # unnecessary? depends on next char not being part | 
| 277 |  |  |  |  |  |  | # of the base64 char set. | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  | } else { | 
| 280 | 0 |  |  |  |  | 0 | die "This should not happen, pos=" . pos($$self) . | 
| 281 |  |  |  |  |  |  | ":  "  . $self->hex . "\n"; | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  | } | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  |  | 
| 286 | 22 | 100 |  |  |  | 30 | if (@_) { | 
| 287 |  |  |  |  |  |  | # decode | 
| 288 | 11 |  |  |  |  | 10 | my $len = length($_[0]); | 
| 289 | 11 |  |  |  |  | 11 | $$self = ""; | 
| 290 | 11 |  |  |  |  | 14 | pos($_[0]) = 0; | 
| 291 | 11 |  |  |  |  | 19 | while (pos($_[0]) < $len) { | 
| 292 | 40 | 100 |  |  |  | 117 | if ($_[0] =~ /\G([^+]+)/gc) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 293 | 18 |  |  |  |  | 60 | $self->append(latin1($1)); | 
| 294 |  |  |  |  |  |  | } elsif ($_[0] =~ /\G\+-/gc) { | 
| 295 | 2 |  |  |  |  | 4 | $$self .= "\0+"; | 
| 296 |  |  |  |  |  |  | } elsif ($_[0] =~ /\G\+([A-Za-z0-9+\/]+)-?/gc) { | 
| 297 | 20 |  |  |  |  | 22 | my $base64 = $1; | 
| 298 | 20 |  |  |  |  | 20 | my $pad = length($base64) % 4; | 
| 299 | 20 | 100 |  |  |  | 32 | $base64 .= "=" x (4 - $pad) if $pad; | 
| 300 | 20 |  |  |  |  | 50 | require MIME::Base64; | 
| 301 | 20 |  |  |  |  | 36 | $$self .= MIME::Base64::decode($base64); | 
| 302 | 20 | 50 |  |  |  | 45 | if ((length($$self) % 2) != 0) { | 
| 303 | 0 | 0 |  |  |  | 0 | warn "Uneven UTF7 base64-data" if $^W; | 
| 304 | 0 |  |  |  |  | 0 | chop($$self); # correct it | 
| 305 |  |  |  |  |  |  | } | 
| 306 |  |  |  |  |  |  | } elsif ($_[0] =~ /\G\+/gc) { | 
| 307 | 0 | 0 |  |  |  | 0 | warn "Bad UTF7 data escape" if $^W; | 
| 308 | 0 |  |  |  |  | 0 | $$self .= "\0+"; | 
| 309 |  |  |  |  |  |  | } else { | 
| 310 | 0 |  |  |  |  | 0 | die "This should not happen " . pos($_[0]); | 
| 311 |  |  |  |  |  |  | } | 
| 312 |  |  |  |  |  |  | } | 
| 313 |  |  |  |  |  |  | } | 
| 314 | 22 |  |  |  |  | 37 | $old; | 
| 315 |  |  |  |  |  |  | } | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | sub hex | 
| 319 |  |  |  |  |  |  | { | 
| 320 | 27 |  |  | 27 | 1 | 330 | my $self = shift; | 
| 321 | 27 | 50 |  |  |  | 51 | unless (ref $self) { | 
| 322 | 0 |  |  |  |  | 0 | my $u = new Unicode::String; | 
| 323 | 0 |  |  |  |  | 0 | $u->hex($self); | 
| 324 | 0 |  |  |  |  | 0 | return $u; | 
| 325 |  |  |  |  |  |  | } | 
| 326 | 27 |  |  |  |  | 19 | my $old; | 
| 327 | 27 | 50 | 33 |  |  | 134 | if (defined($$self) && defined wantarray) { | 
| 328 | 27 |  |  |  |  | 53 | $old = unpack("H*", $$self); | 
| 329 | 27 |  |  |  |  | 188 | $old =~ s/(....)/U+$1 /g; | 
| 330 | 27 |  |  |  |  | 94 | $old =~ s/\s+$//; | 
| 331 |  |  |  |  |  |  | } | 
| 332 | 27 | 50 |  |  |  | 45 | if (@_) { | 
| 333 | 0 |  |  |  |  | 0 | my $new = shift; | 
| 334 | 0 |  |  |  |  | 0 | $new =~ tr/0-9A-Fa-f//cd;  # leave only hex chars | 
| 335 | 0 | 0 |  |  |  | 0 | croak("Hex string length must be multiple of four") | 
| 336 |  |  |  |  |  |  | unless (length($new) % 4) == 0; | 
| 337 | 0 |  |  |  |  | 0 | $$self = pack("H*", $new); | 
| 338 |  |  |  |  |  |  | } | 
| 339 | 27 |  |  |  |  | 141 | $old; | 
| 340 |  |  |  |  |  |  | } | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | sub length | 
| 344 |  |  |  |  |  |  | { | 
| 345 | 6 |  |  | 6 | 1 | 1699 | my $self = shift; | 
| 346 | 6 |  |  |  |  | 56 | int(length($$self) / 2); | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | sub byteswap | 
| 350 |  |  |  |  |  |  | { | 
| 351 | 1 |  |  | 1 | 1 | 62 | my $self = shift; | 
| 352 | 1 |  |  |  |  | 42 | byteswap2($$self); | 
| 353 | 1 |  |  |  |  | 1 | $self; | 
| 354 |  |  |  |  |  |  | } | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | sub unpack | 
| 357 |  |  |  |  |  |  | { | 
| 358 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 359 | 0 |  |  |  |  | 0 | unpack("n*", $$self) | 
| 360 |  |  |  |  |  |  | } | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | sub pack | 
| 364 |  |  |  |  |  |  | { | 
| 365 | 6 |  |  | 6 | 1 | 36 | my $self = shift; | 
| 366 | 6 |  |  |  |  | 16 | $$self = pack("n*", @_); | 
| 367 | 6 |  |  |  |  | 6 | $self; | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  |  | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | sub ord | 
| 372 |  |  |  |  |  |  | { | 
| 373 | 12 |  |  | 12 | 1 | 53 | my $self = shift; | 
| 374 | 12 | 50 |  |  |  | 16 | return () unless defined $$self; | 
| 375 |  |  |  |  |  |  |  | 
| 376 | 12 |  |  |  |  | 11 | my $array = wantarray; | 
| 377 | 12 |  |  |  |  | 10 | my @ret; | 
| 378 |  |  |  |  |  |  | my @chars; | 
| 379 | 12 | 100 |  |  |  | 13 | if ($array) { | 
| 380 | 4 |  |  |  |  | 7 | @chars = CORE::unpack("n*", $$self); | 
| 381 |  |  |  |  |  |  | } else { | 
| 382 | 8 |  |  |  |  | 11 | @chars = CORE::unpack("n2", $$self); | 
| 383 |  |  |  |  |  |  | } | 
| 384 |  |  |  |  |  |  |  | 
| 385 | 12 |  |  |  |  | 19 | while (@chars) { | 
| 386 | 20 |  |  |  |  | 15 | my $first = shift(@chars); | 
| 387 | 20 | 100 | 100 |  |  | 43 | if ($first >= 0xD800 && $first <= 0xDFFF) { 	# surrogate | 
| 388 | 6 |  |  |  |  | 5 | my $second = shift(@chars); | 
| 389 |  |  |  |  |  |  | #print "F=$first S=$second\n"; | 
| 390 | 6 | 100 | 100 |  |  | 28 | if ($first >= 0xDC00 || $second < 0xDC00 || $second > 0xDFFF) { | 
|  |  |  | 66 |  |  |  |  | 
| 391 | 2 |  |  |  |  | 209 | carp(sprintf("Bad surrogate pair (U+%04x U+%04x)", | 
| 392 |  |  |  |  |  |  | $first, $second)); | 
| 393 | 2 |  |  |  |  | 57 | unshift(@chars, $second); | 
| 394 | 2 |  |  |  |  | 4 | next; | 
| 395 |  |  |  |  |  |  | } | 
| 396 | 4 |  |  |  |  | 7 | push(@ret, ($first-0xD800)*0x400 + ($second-0xDC00) + 0x10000); | 
| 397 |  |  |  |  |  |  | } else { | 
| 398 | 14 |  |  |  |  | 11 | push(@ret, $first); | 
| 399 |  |  |  |  |  |  | } | 
| 400 | 18 | 100 |  |  |  | 27 | last unless $array; | 
| 401 |  |  |  |  |  |  | } | 
| 402 | 12 | 100 |  |  |  | 28 | $array ? @ret : $ret[0]; | 
| 403 |  |  |  |  |  |  | } | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | sub name | 
| 407 |  |  |  |  |  |  | { | 
| 408 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 409 | 0 |  |  |  |  | 0 | require Unicode::CharName; | 
| 410 | 0 | 0 |  |  |  | 0 | if (wantarray) { | 
| 411 | 0 |  |  |  |  | 0 | return map { Unicode::CharName::uname($_) } $self->ord; | 
|  | 0 |  |  |  |  | 0 |  | 
| 412 |  |  |  |  |  |  | } else { | 
| 413 | 0 |  |  |  |  | 0 | return Unicode::CharName::uname(scalar($self->ord)); | 
| 414 |  |  |  |  |  |  | } | 
| 415 |  |  |  |  |  |  | } | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  |  | 
| 418 |  |  |  |  |  |  | sub chr | 
| 419 |  |  |  |  |  |  | { | 
| 420 | 8 |  |  | 8 | 1 | 81 | my($self,$val) = @_; | 
| 421 | 8 | 100 |  |  |  | 12 | unless (ref $self) { | 
| 422 |  |  |  |  |  |  | # act as ctor | 
| 423 | 4 |  |  |  |  | 8 | my $u = new Unicode::String; | 
| 424 | 4 |  |  |  |  | 8 | return $u->uchr($self); | 
| 425 |  |  |  |  |  |  | } | 
| 426 | 4 | 100 |  |  |  | 6 | if ($val > 0xFFFF) { | 
| 427 |  |  |  |  |  |  | # must be represented by a surrogate pair | 
| 428 | 2 | 50 |  |  |  | 4 | return undef if $val > 0x10FFFF;  # Unicode limit | 
| 429 | 2 |  |  |  |  | 2 | $val -= 0x10000; | 
| 430 | 2 |  |  |  |  | 5 | my $h = int($val / 0x400) + 0xD800; | 
| 431 | 2 |  |  |  |  | 2 | my $l = ($val % 0x400) + 0xDC00; | 
| 432 | 2 |  |  |  |  | 4 | $$self = CORE::pack("n2", $h, $l); | 
| 433 |  |  |  |  |  |  | } else { | 
| 434 | 2 |  |  |  |  | 40 | $$self = CORE::pack("n", $val); | 
| 435 |  |  |  |  |  |  | } | 
| 436 | 4 |  |  |  |  | 13 | $self; | 
| 437 |  |  |  |  |  |  | } | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  |  | 
| 440 |  |  |  |  |  |  | sub substr | 
| 441 |  |  |  |  |  |  | { | 
| 442 | 6 |  |  | 6 | 1 | 27 | my($self, $offset, $length, $substitute) = @_; | 
| 443 | 6 |  | 100 |  |  | 12 | $offset ||= 0; | 
| 444 | 6 |  |  |  |  | 4 | $offset *= 2; | 
| 445 | 6 |  |  |  |  | 7 | my $substr; | 
| 446 | 6 | 50 |  |  |  | 7 | if (defined $substitute) { | 
| 447 | 0 | 0 |  |  |  | 0 | unless (UNIVERSAL::isa($substitute, 'Unicode::String')) { | 
| 448 | 0 |  |  |  |  | 0 | $substitute = Unicode::String->new($substitute); | 
| 449 |  |  |  |  |  |  | } | 
| 450 | 0 | 0 |  |  |  | 0 | if (defined $length) { | 
| 451 | 0 |  |  |  |  | 0 | $substr = substr($$self, $offset, $length*2) = $$substitute; | 
| 452 |  |  |  |  |  |  | } else { | 
| 453 | 0 |  |  |  |  | 0 | $substr = substr($$self, $offset) = $$substitute; | 
| 454 |  |  |  |  |  |  | } | 
| 455 |  |  |  |  |  |  | } else { | 
| 456 | 6 | 100 |  |  |  | 7 | if (defined $length) { | 
| 457 | 4 |  |  |  |  | 6 | $substr = substr($$self, $offset, $length*2); | 
| 458 |  |  |  |  |  |  | } else { | 
| 459 | 2 |  |  |  |  | 4 | $substr = substr($$self, $offset); | 
| 460 |  |  |  |  |  |  | } | 
| 461 |  |  |  |  |  |  | } | 
| 462 | 6 |  |  |  |  | 14 | bless \$substr, ref($self); | 
| 463 |  |  |  |  |  |  | } | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  |  | 
| 466 |  |  |  |  |  |  | sub index | 
| 467 |  |  |  |  |  |  | { | 
| 468 | 5 |  |  | 5 | 1 | 16 | my($self, $other, $pos) = @_; | 
| 469 | 5 |  | 100 |  |  | 11 | $pos ||= 0; | 
| 470 | 5 |  |  |  |  | 5 | $pos *= 2; | 
| 471 | 5 | 50 |  |  |  | 7 | $other = Unicode::String->new($other) unless ref($other); | 
| 472 | 5 |  | 100 |  |  | 21 | $pos++ while ($pos = index($$self, $$other, $pos)) > 0 && ($pos%2) != 0; | 
| 473 | 5 | 100 |  |  |  | 8 | $pos /= 2 if $pos > 0; | 
| 474 | 5 |  |  |  |  | 5 | $pos; | 
| 475 |  |  |  |  |  |  | } | 
| 476 |  |  |  |  |  |  |  | 
| 477 |  |  |  |  |  |  |  | 
| 478 |  |  |  |  |  |  | sub rindex | 
| 479 |  |  |  |  |  |  | { | 
| 480 | 0 |  |  | 0 | 0 |  | my($self, $other, $pos) = @_; | 
| 481 | 0 |  | 0 |  |  |  | $pos ||= 0; | 
| 482 | 0 |  |  |  |  |  | die "NYI"; | 
| 483 |  |  |  |  |  |  | } | 
| 484 |  |  |  |  |  |  |  | 
| 485 |  |  |  |  |  |  |  | 
| 486 |  |  |  |  |  |  | sub chop | 
| 487 |  |  |  |  |  |  | { | 
| 488 | 0 |  |  | 0 | 1 |  | my $self = shift; | 
| 489 | 0 | 0 |  |  |  |  | if (CORE::length $$self) { | 
| 490 | 0 |  |  |  |  |  | my $chop = chop($$self); | 
| 491 | 0 |  |  |  |  |  | $chop = chop($$self) . $chop; | 
| 492 | 0 |  |  |  |  |  | return bless \$chop, ref($self); | 
| 493 |  |  |  |  |  |  | } | 
| 494 | 0 |  |  |  |  |  | undef; | 
| 495 |  |  |  |  |  |  | } | 
| 496 |  |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | # XXX: Ideas to be implemented | 
| 499 |  |  |  |  |  |  | sub scan; | 
| 500 |  |  |  |  |  |  | sub reverse; | 
| 501 |  |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  | sub lc; | 
| 503 |  |  |  |  |  |  | sub lcfirst; | 
| 504 |  |  |  |  |  |  | sub uc; | 
| 505 |  |  |  |  |  |  | sub ucfirst; | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | sub split; | 
| 508 |  |  |  |  |  |  | sub sprintf; | 
| 509 |  |  |  |  |  |  | sub study; | 
| 510 |  |  |  |  |  |  | sub tr; | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | 1; | 
| 514 |  |  |  |  |  |  |  | 
| 515 |  |  |  |  |  |  | __END__ |