line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Encode::DoubleEncodedUTF8; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
24260
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
78
|
|
4
|
2
|
|
|
2
|
|
11
|
use base qw( Encode::Encoding ); |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
8855
|
|
5
|
2
|
|
|
2
|
|
33094
|
use Encode 2.12 (); |
|
2
|
|
|
|
|
84
|
|
|
2
|
|
|
|
|
538
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
__PACKAGE__->Define('utf-8-de'); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $latin1_as_utf8 = "[\xC2\xC3][\x80-\xBF]"; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# (Taken from Test::utf8 module) |
14
|
|
|
|
|
|
|
# A Regexp string to match valid UTF8 bytes |
15
|
|
|
|
|
|
|
# this info comes from page 78 of "The Unicode Standard 4.0" |
16
|
|
|
|
|
|
|
# published by the Unicode Consortium |
17
|
|
|
|
|
|
|
my $valid_utf8_regexp = <<'.' ; |
18
|
|
|
|
|
|
|
[\x{00}-\x{7f}] |
19
|
|
|
|
|
|
|
| [\x{c2}-\x{df}][\x{80}-\x{bf}] |
20
|
|
|
|
|
|
|
| \x{e0} [\x{a0}-\x{bf}][\x{80}-\x{bf}] |
21
|
|
|
|
|
|
|
| [\x{e1}-\x{ec}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
22
|
|
|
|
|
|
|
| \x{ed} [\x{80}-\x{9f}][\x{80}-\x{bf}] |
23
|
|
|
|
|
|
|
| [\x{ee}-\x{ef}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
24
|
|
|
|
|
|
|
| \x{f0} [\x{90}-\x{bf}][\x{80}-\x{bf}] |
25
|
|
|
|
|
|
|
| [\x{f1}-\x{f3}][\x{80}-\x{bf}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
26
|
|
|
|
|
|
|
| \x{f4} [\x{80}-\x{8f}][\x{80}-\x{bf}][\x{80}-\x{bf}] |
27
|
|
|
|
|
|
|
. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub decode { |
30
|
6
|
|
|
6
|
1
|
174
|
my($obj, $buf, $chk) = @_; |
31
|
|
|
|
|
|
|
|
32
|
6
|
|
|
|
|
54
|
$buf =~ s{((?:$latin1_as_utf8){2,3})}{ _check_utf8_bytes($1) }ego; |
|
7
|
|
|
|
|
18
|
|
33
|
6
|
50
|
|
|
|
22
|
$_[1] = '' if $chk; # this is what in-place edit means |
34
|
|
|
|
|
|
|
|
35
|
6
|
|
|
|
|
28
|
Encode::decode_utf8($buf); |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub _check_utf8_bytes { |
39
|
7
|
|
|
7
|
|
18
|
my $bytes = shift; |
40
|
7
|
|
|
|
|
11
|
my $copy = $bytes; |
41
|
|
|
|
|
|
|
|
42
|
7
|
|
|
|
|
10
|
my $possible_utf8 = ''; |
43
|
7
|
|
|
|
|
35
|
while ($copy =~ s/^(.)(.)//) { |
44
|
20
|
|
|
|
|
202
|
$possible_utf8 .= chr( (ord($1) << 6 & 0xff) | ord($2) ) |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
7
|
50
|
|
|
|
115
|
$possible_utf8 =~ /$valid_utf8_regexp/xo ? $possible_utf8 : $bytes; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub encode { |
51
|
2
|
|
|
2
|
|
13
|
use Carp; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
235
|
|
52
|
0
|
|
|
0
|
1
|
|
Carp::croak("utf-8-de doesn't support encode() ... Why do you want to do that?"); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
1; |
56
|
|
|
|
|
|
|
__END__ |