line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Entities::Latin2; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
41690
|
use 5.006; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
43
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
39
|
|
5
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
48
|
|
6
|
1
|
|
|
1
|
|
6
|
use vars qw(*encode_entities); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
2116
|
|
7
|
|
|
|
|
|
|
require Exporter; |
8
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw(encode encode_entities); |
10
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my %ascii_entities = ( |
13
|
|
|
|
|
|
|
'"' => ['"', '"', '"', "\x{0022}", 'QUOTATION MARK'], |
14
|
|
|
|
|
|
|
'&' => ['&', '&', '&', "\x{0026}", 'AMPERSAND'], |
15
|
|
|
|
|
|
|
'\'' => [''', ''', ''', "\x{0027}", 'APOSTROPHE'], |
16
|
|
|
|
|
|
|
'<' => ['<', '<', '<', "\x{003C}", 'LESS-THAN SIGN'], |
17
|
|
|
|
|
|
|
'>' => ['>', '>', '>', "\x{003E}", 'GREATER-THAN SIGN'], |
18
|
|
|
|
|
|
|
); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my @char_map = ( |
21
|
|
|
|
|
|
|
# chr(160) to chr(255) |
22
|
|
|
|
|
|
|
[' ', ' ', ' ', "\x{00A0}", 'NO-BREAK SPACE'], |
23
|
|
|
|
|
|
|
['Ą', 'Ą', 'Ą', "\x{0104}", 'LATIN CAPITAL LETTER A WITH OGONEK'], |
24
|
|
|
|
|
|
|
['˘', '˘', '˘', "\x{02D8}", 'BREVE'], |
25
|
|
|
|
|
|
|
['Ł', 'Ł', 'Ł', "\x{0141}", 'LATIN CAPITAL LETTER L WITH STROKE'], |
26
|
|
|
|
|
|
|
['¤', '¤', '¤', "\x{00A4}", 'CURRENCY SIGN'], |
27
|
|
|
|
|
|
|
['Ľ', 'Ľ', 'Ľ', "\x{013D}", 'LATIN CAPITAL LETTER L WITH CARON'], |
28
|
|
|
|
|
|
|
['Ś', 'Ś', 'Ś', "\x{015A}", 'LATIN CAPITAL LETTER S WITH ACUTE'], |
29
|
|
|
|
|
|
|
['§', '§', '§', "\x{00A7}", 'SECTION SIGN'], |
30
|
|
|
|
|
|
|
['¨', '¨', '¨', "\x{00A8}", 'DIAERESIS'], |
31
|
|
|
|
|
|
|
['Š', 'Š', 'Š', "\x{0160}", 'LATIN CAPITAL LETTER S WITH CARON'], |
32
|
|
|
|
|
|
|
['Ş', 'Ş', 'Ş', "\x{015E}", 'LATIN CAPITAL LETTER S WITH CEDILLA'], |
33
|
|
|
|
|
|
|
['Ť', 'Ť', 'Ť', "\x{0164}", 'LATIN CAPITAL LETTER T WITH CARON'], |
34
|
|
|
|
|
|
|
['Ź', 'Ź', 'Ź', "\x{0179}", 'LATIN CAPITAL LETTER Z WITH ACUTE'], |
35
|
|
|
|
|
|
|
['', '', '', "\x{00AD}", 'SOFT HYPHEN'], |
36
|
|
|
|
|
|
|
['Ž', 'Ž', 'Ž', "\x{017D}", 'LATIN CAPITAL LETTER Z WITH CARON'], |
37
|
|
|
|
|
|
|
['Ż', 'Ż', 'Ż', "\x{017B}", 'LATIN CAPITAL LETTER Z WITH DOT ABOVE'], |
38
|
|
|
|
|
|
|
['˚', '°', '°', "\x{00B0}", 'DEGREE SIGN'], |
39
|
|
|
|
|
|
|
['ą', 'ą', 'ą', "\x{0105}", 'LATIN SMALL LETTER A WITH OGONEK'], |
40
|
|
|
|
|
|
|
['˛', '˛', '˛', "\x{02DB}", 'OGONEK'], |
41
|
|
|
|
|
|
|
['ł', 'ł', 'ł', "\x{0142}", 'LATIN SMALL LETTER L WITH STROKE'], |
42
|
|
|
|
|
|
|
['ˊ', '´', '´', "\x{00B4}", 'ACUTE ACCENT'], |
43
|
|
|
|
|
|
|
['ľ', 'ľ', 'ľ', "\x{013E}", 'LATIN SMALL LETTER L WITH CARON'], |
44
|
|
|
|
|
|
|
['ś', 'ś', 'ś', "\x{015B}", 'LATIN SMALL LETTER S WITH ACUTE'], |
45
|
|
|
|
|
|
|
['ˇ', 'ˇ', '&caron', "\x{02C7}", 'CARON'], |
46
|
|
|
|
|
|
|
['¸', '¸', '¸', "\x{00B8}", 'CEDILLA'], |
47
|
|
|
|
|
|
|
['š', 'š', 'š', "\x{0161}", 'LATIN SMALL LETTER S WITH CARON'], |
48
|
|
|
|
|
|
|
['ş', 'ş', 'ş', "\x{015F}", 'LATIN SMALL LETTER S WITH CEDILLA'], |
49
|
|
|
|
|
|
|
['ť', 'ť', 'ť', "\x{0165}", 'LATIN SMALL LETTER T WITH CARON'], |
50
|
|
|
|
|
|
|
['ź', 'ź', 'ź', "\x{017A}", 'LATIN SMALL LETTER Z WITH ACUTE'], |
51
|
|
|
|
|
|
|
['˝', '˝', '˝', "\x{02DD}", 'DOUBLE ACUTE ACCENT'], |
52
|
|
|
|
|
|
|
['ž', 'ž', 'ž', "\x{017E}", 'LATIN SMALL LETTER Z WITH CARON'], |
53
|
|
|
|
|
|
|
['ż', 'ż', 'ż', "\x{017C}", 'LATIN SMALL LETTER Z WITH DOT ABOVE'], |
54
|
|
|
|
|
|
|
['Ŕ', 'Ŕ', 'Ŕ', "\x{0154}", 'LATIN CAPITAL LETTER R WITH ACUTE'], |
55
|
|
|
|
|
|
|
['Á', 'Á', 'Á', "\x{00C1}", 'LATIN CAPITAL LETTER A WITH ACUTE'], |
56
|
|
|
|
|
|
|
['Â', 'Â', 'Â', "\x{00C2}", 'LATIN CAPITAL LETTER A WITH CIRCUMFLEX'], |
57
|
|
|
|
|
|
|
['Ă', 'Ă', 'Ă', "\x{0102}", 'LATIN CAPITAL LETTER A WITH BREVE'], |
58
|
|
|
|
|
|
|
['Ä', 'Ä', 'Ä', "\x{00C4}", 'LATIN CAPITAL LETTER A WITH UMLAUT'], |
59
|
|
|
|
|
|
|
['Ĺ', 'Ĺ', 'Ĺ', "\x{0139}", 'LATIN CAPITAL LETTER L WITH ACUTE'], |
60
|
|
|
|
|
|
|
['Ć', 'Ć', 'Ć', "\x{0106}", 'LATIN CAPITAL LETTER C WITH ACUTE'], |
61
|
|
|
|
|
|
|
['Ç', 'Ç', 'Ç', "\x{00C7}", 'LATIN CAPITAL LETTER C WITH CEDILLA'], |
62
|
|
|
|
|
|
|
['Č', 'Č', 'Č', "\x{010C}", 'LATIN CAPITAL LETTER C WITH CARON'], |
63
|
|
|
|
|
|
|
['É', 'É', 'É', "\x{00C9}", 'LATIN CAPITAL LETTER E WITH ACUTE'], |
64
|
|
|
|
|
|
|
['Ę', 'Ę', 'Ę', "\x{0118}", 'LATIN CAPITAL LETTER E WITH OGONEK'], |
65
|
|
|
|
|
|
|
['Ë', 'Ë', 'Ë', "\x{00CB}", 'LATIN CAPITAL LETTER E WITH UMLAUT'], |
66
|
|
|
|
|
|
|
['Ě', 'Ě', 'Ě', "\x{011A}", 'LATIN CAPITAL LETTER E WITH CARON'], |
67
|
|
|
|
|
|
|
['Í', 'Í', 'Í', "\x{00CD}", 'LATIN CAPITAL LETTER I WITH ACUTE'], |
68
|
|
|
|
|
|
|
['Î', 'Î', 'Î', "\x{00CE}", 'LATIN CAPITAL LETTER I WITH CIRCUMFLEX'], |
69
|
|
|
|
|
|
|
['Ď', 'Ď', 'Ď', "\x{010E}", 'LATIN CAPITAL LETTER D WITH CARON'], |
70
|
|
|
|
|
|
|
['Đ', 'Đ', 'Đ', "\x{0110}", 'LATIN CAPITAL LETTER D WITH STROKE'], |
71
|
|
|
|
|
|
|
['Ń', 'Ń', 'Ń', "\x{0143}", 'LATIN CAPITAL LETTER N WITH ACUTE'], |
72
|
|
|
|
|
|
|
['Ň', 'Ň', 'Ň', "\x{0147}", 'LATIN CAPITAL LETTER N WITH CARON'], |
73
|
|
|
|
|
|
|
['Ó', 'Ó', 'Ó', "\x{00D3}", 'LATIN CAPITAL LETTER O WITH ACUTE'], |
74
|
|
|
|
|
|
|
['Ô', 'Ô', 'Ô', "\x{00D4}", 'LATIN CAPITAL LETTER O WITH CIRCUMFLEX'], |
75
|
|
|
|
|
|
|
['Ő', 'ő', 'Ő', "\x{0151}", 'LATIN CAPITAL LETTER O WITH DOUBLE ACUTE'], |
76
|
|
|
|
|
|
|
['Ö', 'Ö', 'Ö', "\x{00D6}", 'LATIN CAPITAL LETTER O WITH UMLAUT'], |
77
|
|
|
|
|
|
|
['×', '×', '×', "\x{00D7}", 'MULTIPLICATION SIGN'], |
78
|
|
|
|
|
|
|
['Ř', 'Ř', 'Ř', "\x{0158}", 'LATIN CAPITAL LETTER R WITH CARON'], |
79
|
|
|
|
|
|
|
['Ů', 'Ů', 'Ů', "\x{016E}", 'LATIN CAPITAL LETTER U WITH RING ABOVE'], |
80
|
|
|
|
|
|
|
['Ú', 'Ú', 'Ú', "\x{00DA}", 'LATIN CAPITAL LETTER U WITH ACUTE'], |
81
|
|
|
|
|
|
|
['Ű', 'Ű', 'Ű', "\x{0170}", 'LATIN CAPITAL LETTER U WITH DOUBLE ACUTE'], |
82
|
|
|
|
|
|
|
['Ü', 'Ü', 'Ü', "\x{00DC}", 'LATIN CAPITAL LETTER U WITH UMLAUT'], |
83
|
|
|
|
|
|
|
['Ý', 'Ý', 'Ý', "\x{00DD}", 'LATIN CAPITAL LETTER Y WITH ACUTE'], |
84
|
|
|
|
|
|
|
['Ţ', 'Ţ', 'Ţ', "\x{0162}", 'LATIN CAPITAL LETTER T WITH CEDILLA'], |
85
|
|
|
|
|
|
|
['ß', 'ß', 'ß', "\x{00DF}", 'LATIN SMALL LETTER SHARP S'], |
86
|
|
|
|
|
|
|
['ŕ', 'ŕ', 'ŕ', "\x{0155}", 'LATIN SMALL LETTER R WITH ACUTE'], |
87
|
|
|
|
|
|
|
['á', 'á', 'á', "\x{00E1}", 'LATIN SMALL LETTER A WITH ACUTE'], |
88
|
|
|
|
|
|
|
['â', 'â', 'â', "\x{00E2}", 'LATIN SMALL LETTER A WITH CIRCUMFLEX'], |
89
|
|
|
|
|
|
|
['ă', 'ă', 'ă', "\x{0103}", 'LATIN SMALL LETTER A WITH BREVE'], |
90
|
|
|
|
|
|
|
['ä', 'ä', 'ä', "\x{00E4}", 'LATIN SMALL LETTER A WITH UMLAUT'], |
91
|
|
|
|
|
|
|
['ĺ', 'ĺ', 'ĺ', "\x{013A}", 'LATIN SMALL LETTER L WITH ACUTE'], |
92
|
|
|
|
|
|
|
['ć', 'ć', 'ć', "\x{0107}", 'LATIN SMALL LETTER C WITH ACUTE'], |
93
|
|
|
|
|
|
|
['ç', 'ç', 'ç', "\x{00E7}", 'LATIN SMALL LETTER C WITH CEDILLA'], |
94
|
|
|
|
|
|
|
['č', 'č', 'č', "\x{010D}", 'LATIN SMALL LETTER C WITH CARON'], |
95
|
|
|
|
|
|
|
['é', 'é', 'é', "\x{00E9}", 'LATIN SMALL LETTER E WITH ACUTE'], |
96
|
|
|
|
|
|
|
['ę', 'ę', 'ę', "\x{0119}", 'LATIN SMALL LETTER E WITH OGONEK'], |
97
|
|
|
|
|
|
|
['ë', 'ë', 'ë', "\x{00EB}", 'LATIN SMALL LETTER E WITH UMLAUT'], |
98
|
|
|
|
|
|
|
['ě', 'ě', 'ě', "\x{011B}", 'LATIN SMALL LETTER E WITH CARON'], |
99
|
|
|
|
|
|
|
['í', 'í', 'í', "\x{00ED}", 'LATIN SMALL LETTER I WITH ACUTE'], |
100
|
|
|
|
|
|
|
['î', 'î', 'î', "\x{00EE}", 'LATIN SMALL LETTER I WITH CIRCUMFLEX'], |
101
|
|
|
|
|
|
|
['ď', 'ď', 'ď', "\x{010F}", 'LATIN SMALL LETTER D WITH CARON'], |
102
|
|
|
|
|
|
|
['đ', 'đ', 'đ', "\x{0111}", 'LATIN SMALL LETTER D WITH STROKE'], |
103
|
|
|
|
|
|
|
['ń', 'ń', 'ń', "\x{0144}", 'LATIN SMALL LETTER N WITH ACUTE'], |
104
|
|
|
|
|
|
|
['ň', 'ň', 'ň', "\x{0148}", 'LATIN SMALL LETTER N WITH CARON'], |
105
|
|
|
|
|
|
|
['ó', 'ó', 'ó', "\x{00F3}", 'LATIN SMALL LETTER O WITH ACUTE'], |
106
|
|
|
|
|
|
|
['ô', 'ô', 'ô', "\x{00F4}", 'LATIN SMALL LETTER O WITH CIRCUMFLEX'], |
107
|
|
|
|
|
|
|
['ő', 'ő', 'ő', "\x{0151}", 'LATIN SMALL LETTER O WITH DOUBLE ACUTE'], |
108
|
|
|
|
|
|
|
['ö', 'ö', 'ö', "\x{00F6}", 'LATIN SMALL LETTER O WITH UMLAUT'], |
109
|
|
|
|
|
|
|
['÷', '÷', '÷', "\x{00F7}", 'DIVISION SIGN'], |
110
|
|
|
|
|
|
|
['ř', 'ř', 'ř', "\x{0159}", 'LATIN SMALL LETTER R WITH CARON'], |
111
|
|
|
|
|
|
|
['ů', 'ů', 'ů', "\x{016F}", 'LATIN SMALL LETTER U WITH RING ABOVE'], |
112
|
|
|
|
|
|
|
['ú', 'ú', 'ú', "\x{00FA}", 'LATIN SMALL LETTER U WITH ACUTE'], |
113
|
|
|
|
|
|
|
['ű', 'ű', 'ű', "\x{0171}", 'LATIN SMALL LETTER U WITH DOUBLE ACUTE'], |
114
|
|
|
|
|
|
|
['ü', 'ü', 'ü', "\x{00FC}", 'LATIN SMALL LETTER U WITH UMLAUT'], |
115
|
|
|
|
|
|
|
['ý', 'ý', 'ý', "\x{00FD}", 'LATIN SMALL LETTER Y WITH ACUTE'], |
116
|
|
|
|
|
|
|
['ţ', 'ţ', 'ţ', "\x{0163}", 'LATIN SMALL LETTER T WITH CEDILLA'], |
117
|
|
|
|
|
|
|
['·', '˙', '˙', "\x{02D9}", 'DOT ABOVE'], |
118
|
|
|
|
|
|
|
); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub encode { |
121
|
3
|
|
|
3
|
0
|
13
|
my($source_str, $scheme_name, $unsafe) = @_; |
122
|
|
|
|
|
|
|
|
123
|
3
|
|
|
|
|
71
|
my $scheme = { |
124
|
|
|
|
|
|
|
decimal=>0, number=>0, numeric=>0, 'hex'=>1, name=>2, named=>2, utf8=>3, description=>4 |
125
|
|
|
|
|
|
|
}->{lc($scheme_name)}; |
126
|
|
|
|
|
|
|
|
127
|
3
|
100
|
|
|
|
15
|
$scheme = 0 unless defined $scheme; # defaults to decimal/numeric entities |
128
|
|
|
|
|
|
|
|
129
|
3
|
|
|
|
|
5
|
my %unsafe = (); |
130
|
3
|
100
|
|
|
|
8
|
if ($unsafe) { |
131
|
1
|
|
|
|
|
4
|
foreach (split //, $unsafe) { |
132
|
2
|
50
|
|
|
|
9
|
if (defined $ascii_entities{$_}) { $unsafe{ord $_} = $ascii_entities{$_}; } |
|
2
|
|
|
|
|
6
|
|
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
3
|
|
|
|
|
6
|
my $encoded = ''; |
137
|
3
|
|
|
|
|
10
|
foreach my $char_val (unpack('C*', $source_str)) { |
138
|
84
|
100
|
|
|
|
126
|
if ($char_val < 127) { # ASCII character |
|
|
50
|
|
|
|
|
|
139
|
69
|
100
|
|
|
|
121
|
if (defined $unsafe{$char_val}) { |
140
|
4
|
|
|
|
|
10
|
$encoded .= $unsafe{$char_val}->[$scheme]; |
141
|
|
|
|
|
|
|
} |
142
|
65
|
|
|
|
|
93
|
else { $encoded .= chr $char_val; } |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
elsif ($char_val >= 160) { |
145
|
15
|
|
|
|
|
31
|
$encoded .= $char_map[$char_val - 160]->[$scheme]; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
else { |
148
|
0
|
|
|
|
|
0
|
warn 'character not in Latin-2 map, character code: '.$char_val; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
3
|
|
|
|
|
25
|
return $encoded; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
*encode_entities = \&encode; |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
1; |
157
|
|
|
|
|
|
|
__END__ |