line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CCCP::Encode; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
33191
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
41
|
|
4
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
1204
|
use Encode; |
|
1
|
|
|
|
|
18765
|
|
|
1
|
|
|
|
|
107
|
|
7
|
1
|
|
|
1
|
|
1553
|
use Text::Unidecode; |
|
1
|
|
|
|
|
4686
|
|
|
1
|
|
|
|
|
899
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$CCCP::Encode::ToText = 0; |
12
|
|
|
|
|
|
|
$CCCP::Encode::Entities = 'xml'; |
13
|
|
|
|
|
|
|
$CCCP::Encode::CharMap = {}; |
14
|
|
|
|
|
|
|
$CCCP::Encode::Regexp = '[^\p{Cyrillic}|\p{IsLatin}|\p{InBasic_Latin}]'; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $xml_entities = { |
17
|
|
|
|
|
|
|
'&' => '&', |
18
|
|
|
|
|
|
|
'"' => '"', |
19
|
|
|
|
|
|
|
"'" => ''', |
20
|
|
|
|
|
|
|
'>' => '>', |
21
|
|
|
|
|
|
|
'<' => '<' |
22
|
|
|
|
|
|
|
}; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $f = find_encoding('utf-8'); |
25
|
|
|
|
|
|
|
my $t = undef; |
26
|
|
|
|
|
|
|
__err_msg("Unknown encoding 'utf-8'") unless defined $f; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub utf2cyrillic { |
29
|
0
|
|
|
0
|
1
|
|
my ( $class, $str, $to ) = @_; |
30
|
|
|
|
|
|
|
|
31
|
0
|
0
|
0
|
|
|
|
if ($CCCP::Encode::ToText and not UNIVERSAL::isa($CCCP::Encode::CharMap,'HASH') ) { |
32
|
0
|
|
|
|
|
|
__err_msg("\$CCCP::Encode::CharMap must be hash ref"); |
33
|
|
|
|
|
|
|
}; |
34
|
|
|
|
|
|
|
|
35
|
0
|
0
|
|
|
|
|
return undef unless defined $str; |
36
|
|
|
|
|
|
|
|
37
|
0
|
0
|
|
|
|
|
__err_msg("missing 'to' argument") unless $to; |
38
|
0
|
0
|
|
|
|
|
return $str if ($to =~ /^utf/i); |
39
|
|
|
|
|
|
|
|
40
|
0
|
0
|
0
|
|
|
|
$t = find_encoding($to) unless ($t and $t->name eq $to); |
41
|
0
|
0
|
|
|
|
|
__err_msg("Unknown encoding '$to'") unless defined $t; |
42
|
|
|
|
|
|
|
|
43
|
0
|
|
|
|
|
|
Encode::_utf8_off($str); |
44
|
|
|
|
|
|
|
|
45
|
0
|
0
|
|
|
|
|
unless ($CCCP::Encode::ToText) { |
46
|
|
|
|
|
|
|
# decode with html entities |
47
|
0
|
|
0
|
|
|
|
$CCCP::Encode::Entities ||= 'xml'; |
48
|
0
|
0
|
|
|
|
|
my $str = $t->encode($f->decode($str), $CCCP::Encode::Entities eq 'xml' ? Encode::FB_XMLCREF : Encode::FB_HTMLCREF); |
49
|
0
|
0
|
|
|
|
|
if ($CCCP::Encode::Entities eq 'xml') { |
50
|
0
|
|
|
|
|
|
$str =~ s/('|"|<|>|&(?!#x))/$xml_entities->{$1}/geo; |
|
0
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
}; |
52
|
0
|
|
|
|
|
|
return $str; |
53
|
|
|
|
|
|
|
} else { |
54
|
|
|
|
|
|
|
# decode in text mode |
55
|
0
|
0
|
|
|
|
|
($str = $f->decode($str)) =~ s/($CCCP::Encode::Regexp)/exists $CCCP::Encode::CharMap->{$1} ? $CCCP::Encode::CharMap->{$1} : unidecode($1)/sexg; |
|
0
|
|
|
|
|
|
|
56
|
0
|
|
|
|
|
|
return $t->encode($str); |
57
|
|
|
|
|
|
|
}; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub __err_msg { |
61
|
0
|
|
|
0
|
|
|
my ($str) = @_; |
62
|
|
|
|
|
|
|
|
63
|
0
|
|
|
|
|
|
require Carp; |
64
|
0
|
|
|
|
|
|
Carp::croak(__PACKAGE__.": ".$str); |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
1; |
68
|
|
|
|
|
|
|
__END__ |