line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# HTML Encoder |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Encode special caracters content in data structure to HTML code. |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Copyright 2003 Fabiano Reese Righetti |
6
|
|
|
|
|
|
|
# All rights reserved. |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
9
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License as |
10
|
|
|
|
|
|
|
# published by the Free Software Foundation; either version 2 of the |
11
|
|
|
|
|
|
|
# License, or (at your option) any later version. |
12
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
13
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
14
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15
|
|
|
|
|
|
|
# General Public License for more details. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
package HTML::Encoder; |
18
|
|
|
|
|
|
|
require 5.005; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 NAME |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
HTML::Encoder - Encode special caracters to HTML code. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
use HTML::Encoder; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
my $HE = new HTML::Encoder( |
29
|
|
|
|
|
|
|
Extended_A => 1, |
30
|
|
|
|
|
|
|
Extended_B => 1, |
31
|
|
|
|
|
|
|
Latin_1 => 1, |
32
|
|
|
|
|
|
|
); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
$HE->encode($ref); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DESCRIPTION |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
This module implement algorithm for encoding special caracters content |
39
|
|
|
|
|
|
|
in data structure to HTML code. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 METHODS |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=over 4 |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=cut |
46
|
|
|
|
|
|
|
|
47
|
1
|
|
|
1
|
|
59542
|
use vars qw($VERSION %Entities); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
68
|
|
48
|
|
|
|
|
|
|
|
49
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
50
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
1488
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
BEGIN |
53
|
|
|
|
|
|
|
{ |
54
|
1
|
|
|
1
|
|
8
|
our $VERSION = '0.00_04'; |
55
|
1
|
|
|
|
|
704
|
our %Entities = ( |
56
|
|
|
|
|
|
|
Latin_1 => { |
57
|
|
|
|
|
|
|
chr(0x00a0) => 'nbsp', # NO-BREAK SPACE |
58
|
|
|
|
|
|
|
chr(0x00a1) => 'iexcl', # INVERTED EXCLAMATION MARK |
59
|
|
|
|
|
|
|
chr(0x00a2) => 'cent', # CENT SIGN |
60
|
|
|
|
|
|
|
chr(0x00a3) => 'pound', # POUND SIGN |
61
|
|
|
|
|
|
|
chr(0x00a4) => 'curren', # CURRENCY SIGN |
62
|
|
|
|
|
|
|
chr(0x00a5) => 'yen', # YEN SIGN |
63
|
|
|
|
|
|
|
chr(0x00a6) => 'brvbar', # BROKEN BAR |
64
|
|
|
|
|
|
|
chr(0x00a7) => 'sect', # SECTION SIGN |
65
|
|
|
|
|
|
|
chr(0x00a8) => 'uml', # DIAERESIS |
66
|
|
|
|
|
|
|
chr(0x00a9) => 'copy', # COPYRIGHT SIGN |
67
|
|
|
|
|
|
|
chr(0x00aa) => 'ordf', # FEMININE ORDINAL INDICATOR |
68
|
|
|
|
|
|
|
chr(0x00ab) => 'laquo', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK |
69
|
|
|
|
|
|
|
chr(0x00ac) => 'not', # NOT SIGN |
70
|
|
|
|
|
|
|
chr(0x00ad) => 'shy', # SOFT HYPHEN |
71
|
|
|
|
|
|
|
chr(0x00ae) => 'reg', # REGISTERED SIGN |
72
|
|
|
|
|
|
|
chr(0x00af) => 'macr', # MACRON |
73
|
|
|
|
|
|
|
chr(0x00b0) => 'deg', # DEGREE SIGN |
74
|
|
|
|
|
|
|
chr(0x00b1) => 'plusmn', # PLUS-MINUS SIGN |
75
|
|
|
|
|
|
|
chr(0x00b2) => 'sup2', # SUPERSCRIPT TWO |
76
|
|
|
|
|
|
|
chr(0x00b3) => 'sup3', # SUPERSCRIPT THREE |
77
|
|
|
|
|
|
|
chr(0x00b4) => 'acute', # ACUTE ACCENT |
78
|
|
|
|
|
|
|
chr(0x00b5) => 'micro', # MICRO SIGN |
79
|
|
|
|
|
|
|
chr(0x00b6) => 'para', # PILCROW SIGN |
80
|
|
|
|
|
|
|
chr(0x00b7) => 'middot', # MIDDLE DOT |
81
|
|
|
|
|
|
|
chr(0x00b8) => 'cedil', # CEDILLA |
82
|
|
|
|
|
|
|
chr(0x00b9) => 'sup1', # SUPERSCRIPT ONE |
83
|
|
|
|
|
|
|
chr(0x00ba) => 'ordm', # MASCULINE ORDINAL INDICATOR |
84
|
|
|
|
|
|
|
chr(0x00bb) => 'raquo', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK |
85
|
|
|
|
|
|
|
chr(0x00bc) => 'frac14', # VULGAR FRACTION ONE QUARTER |
86
|
|
|
|
|
|
|
chr(0x00bd) => 'frac12', # VULGAR FRACTION ONE HALF |
87
|
|
|
|
|
|
|
chr(0x00be) => 'frac34', # VULGAR FRACTION THREE QUARTERS |
88
|
|
|
|
|
|
|
chr(0x00bf) => 'iquest', # INVERTED QUESTION MARK |
89
|
|
|
|
|
|
|
chr(0x00c0) => 'Agrave', # LATIN CAPITAL LETTER A WITH GRAVE |
90
|
|
|
|
|
|
|
chr(0x00c1) => 'Aacute', # LATIN CAPITAL LETTER A WITH ACUTE |
91
|
|
|
|
|
|
|
chr(0x00c2) => 'Acirc', # LATIN CAPITAL LETTER A WITH CIRCUMFLEX |
92
|
|
|
|
|
|
|
chr(0x00c3) => 'Atilde', # LATIN CAPITAL LETTER A WITH TILDE |
93
|
|
|
|
|
|
|
chr(0x00c4) => 'Auml', # LATIN CAPITAL LETTER A WITH DIAERESIS |
94
|
|
|
|
|
|
|
chr(0x00c5) => 'Aring', # LATIN CAPITAL LETTER A WITH RING ABOVE |
95
|
|
|
|
|
|
|
chr(0x00c6) => 'AElig', # LATIN CAPITAL LETTER AE |
96
|
|
|
|
|
|
|
chr(0x00c7) => 'Ccedil', # LATIN CAPITAL LETTER C WITH CEDILLA |
97
|
|
|
|
|
|
|
chr(0x00c8) => 'Egrave', # LATIN CAPITAL LETTER E WITH GRAVE |
98
|
|
|
|
|
|
|
chr(0x00c9) => 'Eacute', # LATIN CAPITAL LETTER E WITH ACUTE |
99
|
|
|
|
|
|
|
chr(0x00ca) => 'Ecirc', # LATIN CAPITAL LETTER E WITH CIRCUMFLEX |
100
|
|
|
|
|
|
|
chr(0x00cb) => 'Euml', # LATIN CAPITAL LETTER E WITH DIAERESIS |
101
|
|
|
|
|
|
|
chr(0x00cc) => 'Igrave', # LATIN CAPITAL LETTER I WITH GRAVE |
102
|
|
|
|
|
|
|
chr(0x00cd) => 'Iacute', # LATIN CAPITAL LETTER I WITH ACUTE |
103
|
|
|
|
|
|
|
chr(0x00ce) => 'Icirc', # LATIN CAPITAL LETTER I WITH CIRCUMFLEX |
104
|
|
|
|
|
|
|
chr(0x00cf) => 'Iuml', # LATIN CAPITAL LETTER I WITH DIAERESIS |
105
|
|
|
|
|
|
|
chr(0x00d0) => 'ETH', # LATIN CAPITAL LETTER ETH (Icelandic) |
106
|
|
|
|
|
|
|
chr(0x00d1) => 'Ntilde', # LATIN CAPITAL LETTER N WITH TILDE |
107
|
|
|
|
|
|
|
chr(0x00d2) => 'Ograve', # LATIN CAPITAL LETTER O WITH GRAVE |
108
|
|
|
|
|
|
|
chr(0x00d3) => 'Oacute', # LATIN CAPITAL LETTER O WITH ACUTE |
109
|
|
|
|
|
|
|
chr(0x00d4) => 'Ocirc', # LATIN CAPITAL LETTER O WITH CIRCUMFLEX |
110
|
|
|
|
|
|
|
chr(0x00d5) => 'Otilde', # LATIN CAPITAL LETTER O WITH TILDE |
111
|
|
|
|
|
|
|
chr(0x00d6) => 'Ouml', # LATIN CAPITAL LETTER O WITH DIAERESIS |
112
|
|
|
|
|
|
|
chr(0x00d7) => 'times', # MULTIPLICATION SIGN |
113
|
|
|
|
|
|
|
chr(0x00d8) => 'Oslash', # LATIN CAPITAL LETTER O WITH STROKE |
114
|
|
|
|
|
|
|
chr(0x00d9) => 'Ugrave', # LATIN CAPITAL LETTER U WITH GRAVE |
115
|
|
|
|
|
|
|
chr(0x00da) => 'Uacute', # LATIN CAPITAL LETTER U WITH ACUTE |
116
|
|
|
|
|
|
|
chr(0x00db) => 'Ucirc', # LATIN CAPITAL LETTER U WITH CIRCUMFLEX |
117
|
|
|
|
|
|
|
chr(0x00dc) => 'Uuml', # LATIN CAPITAL LETTER U WITH DIAERESIS |
118
|
|
|
|
|
|
|
chr(0x00dd) => 'Yacute', # LATIN CAPITAL LETTER Y WITH ACUTE |
119
|
|
|
|
|
|
|
chr(0x00de) => 'THORN', # LATIN CAPITAL LETTER THORN (Icelandic) |
120
|
|
|
|
|
|
|
chr(0x00df) => 'szlig', # LATIN SMALL LETTER SHARP S (German) |
121
|
|
|
|
|
|
|
chr(0x00e0) => 'agrave', # LATIN SMALL LETTER A WITH GRAVE |
122
|
|
|
|
|
|
|
chr(0x00e1) => 'aacute', # LATIN SMALL LETTER A WITH ACUTE |
123
|
|
|
|
|
|
|
chr(0x00e2) => 'acirc', # LATIN SMALL LETTER A WITH CIRCUMFLEX |
124
|
|
|
|
|
|
|
chr(0x00e3) => 'atilde', # LATIN SMALL LETTER A WITH TILDE |
125
|
|
|
|
|
|
|
chr(0x00e4) => 'auml', # LATIN SMALL LETTER A WITH DIAERESIS |
126
|
|
|
|
|
|
|
chr(0x00e5) => 'aring', # LATIN SMALL LETTER A WITH RING ABOVE |
127
|
|
|
|
|
|
|
chr(0x00e6) => 'aelig', # LATIN SMALL LETTER AE |
128
|
|
|
|
|
|
|
chr(0x00e7) => 'ccedil', # LATIN SMALL LETTER C WITH CEDILLE |
129
|
|
|
|
|
|
|
chr(0x00e8) => 'egrave', # LATIN SMALL LETTER E WITH GRAVE |
130
|
|
|
|
|
|
|
chr(0x00e9) => 'eacute', # LATIN SMALL LETTER E WITH ACUTE |
131
|
|
|
|
|
|
|
chr(0x00ea) => 'ecirc', # LATIN SMALL LETTER E WITH CIRCUMFLEX |
132
|
|
|
|
|
|
|
chr(0x00eb) => 'euml', # LATIN SMALL LETTER E WITH DIAERESIS |
133
|
|
|
|
|
|
|
chr(0x00ec) => 'igrave', # LATIN SMALL LETTER I WITH GRAVE |
134
|
|
|
|
|
|
|
chr(0x00ed) => 'iacute', # LATIN SMALL LETTER I WITH ACUTE |
135
|
|
|
|
|
|
|
chr(0x00ee) => 'icirc', # LATIN SMALL LETTER I WITH CIRCUMFLEX |
136
|
|
|
|
|
|
|
chr(0x00ef) => 'iuml', # LATIN SMALL LETTER I WITH DIAERESIS |
137
|
|
|
|
|
|
|
chr(0x00f0) => 'eth', # LATIN SMALL LETTER ETH (Icelandic) |
138
|
|
|
|
|
|
|
chr(0x00f1) => 'ntilde', # LATIN SMALL LETTER N WITH TILDE |
139
|
|
|
|
|
|
|
chr(0x00f2) => 'ograve', # LATIN SMALL LETTER O WITH GRAVE |
140
|
|
|
|
|
|
|
chr(0x00f3) => 'oacute', # LATIN SMALL LETTER O WITH ACUTE |
141
|
|
|
|
|
|
|
chr(0x00f4) => 'ocirc', # LATIN SMALL LETTER O WITH CIRCUMFLEX |
142
|
|
|
|
|
|
|
chr(0x00f5) => 'otilde', # LATIN SMALL LETTER O WITH TILDE |
143
|
|
|
|
|
|
|
chr(0x00f6) => 'ouml', # LATIN SMALL LETTER O WITH DIAERESIS |
144
|
|
|
|
|
|
|
chr(0x00f7) => 'divide', # DIVISION SIGN |
145
|
|
|
|
|
|
|
chr(0x00f8) => 'oslash', # LATIN SMALL LETTER O WITH STROKE |
146
|
|
|
|
|
|
|
chr(0x00f9) => 'ugrave', # LATIN SMALL LETTER U WITH GRAVE |
147
|
|
|
|
|
|
|
chr(0x00fa) => 'uacute', # LATIN SMALL LETTER U WITH ACUTE |
148
|
|
|
|
|
|
|
chr(0x00fb) => 'ucirc', # LATIN SMALL LETTER U WITH CIRCUMFLEX |
149
|
|
|
|
|
|
|
chr(0x00fc) => 'uuml', # LATIN SMALL LETTER U WITH DIAERESIS |
150
|
|
|
|
|
|
|
chr(0x00fd) => 'yacute', # LATIN SMALL LETTER Y WITH ACUTE |
151
|
|
|
|
|
|
|
chr(0x00fe) => 'thorn', # LATIN SMALL LETTER THORN (Icelandic) |
152
|
|
|
|
|
|
|
chr(0x00ff) => 'yuml', # LATIN SMALL LETTER Y WITH DIAERESIS |
153
|
|
|
|
|
|
|
}, |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Extended_B => { |
156
|
|
|
|
|
|
|
chr(0x0391) => 'Alpha', # GREEK CAPITAL LETTER ALPHA |
157
|
|
|
|
|
|
|
chr(0x0392) => 'Beta', # GREEK CAPITAL LETTER BETA |
158
|
|
|
|
|
|
|
chr(0x0393) => 'Gamma', # GREEK CAPITAL LETTER GAMMA |
159
|
|
|
|
|
|
|
chr(0x0394) => 'Delta', # GREEK CAPITAL LETTER DELTA |
160
|
|
|
|
|
|
|
chr(0x0395) => 'Epsilon', # GREEK CAPITAL LETTER EPSILON |
161
|
|
|
|
|
|
|
chr(0x0396) => 'Zeta', # GREEK CAPITAL LETTER ZETA |
162
|
|
|
|
|
|
|
chr(0x0397) => 'Eta', # GREEK CAPITAL LETTER ETA |
163
|
|
|
|
|
|
|
chr(0x0398) => 'Theta', # GREEK CAPITAL LETTER THETA |
164
|
|
|
|
|
|
|
chr(0x0399) => 'Iota', # GREEK CAPITAL LETTER IOTA |
165
|
|
|
|
|
|
|
chr(0x039a) => 'Kappa', # GREEK CAPITAL LETTER KAPPA |
166
|
|
|
|
|
|
|
chr(0x039b) => 'Lambda', # GREEK CAPITAL LETTER LAMBDA |
167
|
|
|
|
|
|
|
chr(0x039c) => 'Mu', # GREEK CAPITAL LETTER MU |
168
|
|
|
|
|
|
|
chr(0x039d) => 'Nu', # GREEK CAPITAL LETTER NU |
169
|
|
|
|
|
|
|
chr(0x039e) => 'Xi', # GREEK CAPITAL LETTER XI |
170
|
|
|
|
|
|
|
chr(0x039f) => 'Omicron', # GREEK CAPITAL LETTER OMICRON |
171
|
|
|
|
|
|
|
chr(0x03a0) => 'Pi', # GREEK CAPITAL LETTER PI |
172
|
|
|
|
|
|
|
chr(0x03a1) => 'Rho', # GREEK CAPITAL LETTER RHO |
173
|
|
|
|
|
|
|
chr(0x03a3) => 'Sigma', # GREEK CAPITAL LETTER SIGMA |
174
|
|
|
|
|
|
|
chr(0x03a4) => 'Tau', # GREEK CAPITAL LETTER TAU |
175
|
|
|
|
|
|
|
chr(0x03a5) => 'Upsilon', # GREEK CAPITAL LETTER UPSILON |
176
|
|
|
|
|
|
|
chr(0x03a6) => 'Phi', # GREEK CAPITAL LETTER PHI |
177
|
|
|
|
|
|
|
chr(0x03a7) => 'Chi', # GREEK CAPITAL LETTER CHI |
178
|
|
|
|
|
|
|
chr(0x03a8) => 'Psi', # GREEK CAPITAL LETTER PSI |
179
|
|
|
|
|
|
|
chr(0x03a9) => 'Omega', # GREEK CAPITAL LETTER OMEGA |
180
|
|
|
|
|
|
|
chr(0x03b1) => 'alpha', # GREEK SMALL LETTER ALPHA |
181
|
|
|
|
|
|
|
chr(0x03b2) => 'beta', # GREEK SMALL LETTER BETA |
182
|
|
|
|
|
|
|
chr(0x03b3) => 'gamma', # GREEK SMALL LETTER GAMMA |
183
|
|
|
|
|
|
|
chr(0x03b4) => 'delta', # GREEK SMALL LETTER DELTA |
184
|
|
|
|
|
|
|
chr(0x03b5) => 'epsilon', # GREEK SMALL LETTER EPSILON |
185
|
|
|
|
|
|
|
chr(0x03b6) => 'zeta', # GREEK SMALL LETTER ZETA |
186
|
|
|
|
|
|
|
chr(0x03b7) => 'eta', # GREEK SMALL LETTER ETA |
187
|
|
|
|
|
|
|
chr(0x03b8) => 'theta', # GREEK SMALL LETTER THETA |
188
|
|
|
|
|
|
|
chr(0x03b9) => 'iota', # GREEK SMALL LETTER IOTA |
189
|
|
|
|
|
|
|
chr(0x03ba) => 'kappa', # GREEK SMALL LETTER KAPPA |
190
|
|
|
|
|
|
|
chr(0x03bb) => 'lambda', # GREEK SMALL LETTER LAMBDA |
191
|
|
|
|
|
|
|
chr(0x03bc) => 'mu', # GREEK SMALL LETTER MU |
192
|
|
|
|
|
|
|
chr(0x03bd) => 'nu', # GREEK SMALL LETTER NU |
193
|
|
|
|
|
|
|
chr(0x03be) => 'xi', # GREEK SMALL LETTER XI |
194
|
|
|
|
|
|
|
chr(0x03bf) => 'omicron', # GREEK SMALL LETTER OMICRON |
195
|
|
|
|
|
|
|
chr(0x03c0) => 'pi', # GREEK SMALL LETTER PI |
196
|
|
|
|
|
|
|
chr(0x03c1) => 'rho', # GREEK SMALL LETTER RHO |
197
|
|
|
|
|
|
|
chr(0x03c2) => 'sigmaf', # GREEK SMALL LETTER FINAL SIGMA |
198
|
|
|
|
|
|
|
chr(0x03c3) => 'sigma', # GREEK SMALL LETTER SIGMA |
199
|
|
|
|
|
|
|
chr(0x03c4) => 'tau', # GREEK SMALL LETTER TAU |
200
|
|
|
|
|
|
|
chr(0x03c5) => 'upsilon', # GREEK SMALL LETTER UPSILON |
201
|
|
|
|
|
|
|
chr(0x03c6) => 'phi', # GREEK SMALL LETTER PHI |
202
|
|
|
|
|
|
|
chr(0x03c7) => 'chi', # GREEK SMALL LETTER CHI |
203
|
|
|
|
|
|
|
chr(0x03c8) => 'psi', # GREEK SMALL LETTER PSI |
204
|
|
|
|
|
|
|
chr(0x03c9) => 'omega', # GREEK SMALL LETTER OMEGA |
205
|
|
|
|
|
|
|
chr(0x03d1) => 'thetasym', # GREEK SMALL LETTER THETA SYMBOL |
206
|
|
|
|
|
|
|
chr(0x03d2) => 'upsih', # GREEK UPSILON WITH HOOK SYMBOL |
207
|
|
|
|
|
|
|
chr(0x03d6) => 'piv', # GREEK PI SYMBOL |
208
|
|
|
|
|
|
|
}, |
209
|
|
|
|
|
|
|
Punctuation => { |
210
|
|
|
|
|
|
|
chr(0x2022) => 'bull', # BULLET = BLACK SMALL CIRCLE |
211
|
|
|
|
|
|
|
chr(0x2026) => 'hellip', # HORIZONTAL ELLIPSIS = THREE DOT LEADER |
212
|
|
|
|
|
|
|
chr(0x2032) => 'prime', # PRIME = MINUTES = FEET |
213
|
|
|
|
|
|
|
chr(0x2033) => 'Prime', # DOUBLE PRIME = SECONDS = INCHES |
214
|
|
|
|
|
|
|
chr(0x203e) => 'oline', # OVERLINE = SPACING OVERSCORE |
215
|
|
|
|
|
|
|
chr(0x2044) => 'frasl', # FRACTION SLASH |
216
|
|
|
|
|
|
|
}, |
217
|
|
|
|
|
|
|
Letterlike => { |
218
|
|
|
|
|
|
|
chr(0x2111) => 'image', # BLACKLETTER CAPITAL I = IMAGINARY PART |
219
|
|
|
|
|
|
|
chr(0x211c) => 'real', # BLACKLETTER CAPITAL R = REAL PART SYMBOL |
220
|
|
|
|
|
|
|
chr(0x2122) => 'trade', # TRADE MARK SIGN |
221
|
|
|
|
|
|
|
chr(0x2135) => 'alefsym', # ALEF SYMBOL = FIRST TRANSFINITE CARDINAL |
222
|
|
|
|
|
|
|
}, |
223
|
|
|
|
|
|
|
Arrows => { |
224
|
|
|
|
|
|
|
chr(0x2190) => 'larr', # LEFTWARDS ARROW |
225
|
|
|
|
|
|
|
chr(0x2191) => 'uarr', # UPWARDS ARROW |
226
|
|
|
|
|
|
|
chr(0x2192) => 'rarr', # RIGHTWARDS ARROW |
227
|
|
|
|
|
|
|
chr(0x2193) => 'darr', # DOWNWARDS ARROW |
228
|
|
|
|
|
|
|
chr(0x2194) => 'harr', # LEFT RIGHT ARROW |
229
|
|
|
|
|
|
|
chr(0x21d0) => 'lArr', # LEFTWARDS DOUBLE ARROW |
230
|
|
|
|
|
|
|
chr(0x21d1) => 'uArr', # UPWARDS DOUBLE ARROW |
231
|
|
|
|
|
|
|
chr(0x21d2) => 'rArr', # RIGHTWARDS DOUBLE ARROW |
232
|
|
|
|
|
|
|
chr(0x21d3) => 'dArr', # DOWNWARDS DOUBLE ARROW |
233
|
|
|
|
|
|
|
chr(0x21d4) => 'hArr', # LEFT RIGHT DOUBLE ARROW |
234
|
|
|
|
|
|
|
}, |
235
|
|
|
|
|
|
|
Mathematical => { |
236
|
|
|
|
|
|
|
chr(0x2200) => 'forall', # FOR ALL |
237
|
|
|
|
|
|
|
chr(0x2202) => 'part', # PARTIAL DIFFERENTIAL |
238
|
|
|
|
|
|
|
chr(0x2203) => 'exist', # THERE EXISTS |
239
|
|
|
|
|
|
|
chr(0x2205) => 'empty', # EMPTY SET = NULL SET = DIAMETER |
240
|
|
|
|
|
|
|
chr(0x2207) => 'nabla', # NABLA = BACKWARD DIFFERENCE |
241
|
|
|
|
|
|
|
chr(0x2208) => 'isin', # ELEMENT OF |
242
|
|
|
|
|
|
|
chr(0x2209) => 'notin', # NOT AN ELEMENT OF |
243
|
|
|
|
|
|
|
chr(0x220b) => 'ni', # CONTAINS AS MEMBER |
244
|
|
|
|
|
|
|
chr(0x220f) => 'prod', # N-ARY PRODUCT = PRODUCT SIGN |
245
|
|
|
|
|
|
|
chr(0x2211) => 'sum', # N-ARY SUMATION |
246
|
|
|
|
|
|
|
chr(0x2212) => 'minus', # MINUS SIGN |
247
|
|
|
|
|
|
|
chr(0x2217) => 'lowast', # ASTERISK OPERATOR |
248
|
|
|
|
|
|
|
chr(0x221a) => 'radic', # SQUARE ROOT = RADICAL SIGN |
249
|
|
|
|
|
|
|
chr(0x221d) => 'prop', # PROPORTIONAL TO |
250
|
|
|
|
|
|
|
chr(0x221e) => 'infin', # INFINITY |
251
|
|
|
|
|
|
|
chr(0x2220) => 'ang', # ANGLE |
252
|
|
|
|
|
|
|
chr(0x2227) => 'and', # LOGICAL AND = WEDGE |
253
|
|
|
|
|
|
|
chr(0x2228) => 'or', # LOGICAL OR = VEE |
254
|
|
|
|
|
|
|
chr(0x2229) => 'cap', # INTERSECTION = CAP |
255
|
|
|
|
|
|
|
chr(0x222a) => 'cup', # UNION = CUP |
256
|
|
|
|
|
|
|
chr(0x222b) => 'int', # INTEGRAL |
257
|
|
|
|
|
|
|
chr(0x2234) => 'there4', # THEREFORE |
258
|
|
|
|
|
|
|
chr(0x223c) => 'sim', # TILDE OPERATOR = VARIES WITH = SIMILAR TO |
259
|
|
|
|
|
|
|
chr(0x2245) => 'cong', # APPROXIMATELY EQUAL TO |
260
|
|
|
|
|
|
|
chr(0x2248) => 'asymp', # ALMOST EQUAL TO = ASYMPTOTIC TO |
261
|
|
|
|
|
|
|
chr(0x2260) => 'ne', # NOT EQUAL TO |
262
|
|
|
|
|
|
|
chr(0x2261) => 'equiv', # IDENTICAL TO |
263
|
|
|
|
|
|
|
chr(0x2264) => 'le', # LESS-THAN OR EQUAL TO |
264
|
|
|
|
|
|
|
chr(0x2265) => 'ge', # GREATER-THAN OR EQUAL TO |
265
|
|
|
|
|
|
|
chr(0x2282) => 'sub', # SUBSET OF |
266
|
|
|
|
|
|
|
chr(0x2283) => 'sup', # SUPERSET OF |
267
|
|
|
|
|
|
|
chr(0x2284) => 'nsub', # NOT A SUBSET OF |
268
|
|
|
|
|
|
|
chr(0x2286) => 'sube', # SUBSET OF OR EQUAL TO |
269
|
|
|
|
|
|
|
chr(0x2287) => 'supe', # SUPERSET OF OR EQUAL TO |
270
|
|
|
|
|
|
|
chr(0x2295) => 'oplus', # CIRCLED PLUS = DIRECT SUM |
271
|
|
|
|
|
|
|
chr(0x2297) => 'otimes', # CIRCLED TIMES = VECTOR PRODUCT |
272
|
|
|
|
|
|
|
chr(0x22a5) => 'perp', # UP TACK = ORTHOGONAL TO = PERPENDICULAR |
273
|
|
|
|
|
|
|
chr(0x22c5) => 'sdot', # DOT OPERATOR |
274
|
|
|
|
|
|
|
}, |
275
|
|
|
|
|
|
|
Technical => { |
276
|
|
|
|
|
|
|
chr(0x2308) => 'lceil', # LEFT CEILING = APL UPSTILE |
277
|
|
|
|
|
|
|
chr(0x2309) => 'rceil', # RIGHT CEILING |
278
|
|
|
|
|
|
|
chr(0x230a) => 'lfloor', # LEFT FLOOR = APL DOWNSTILE |
279
|
|
|
|
|
|
|
chr(0x230b) => 'rfloor', # RIGHT FLOOR |
280
|
|
|
|
|
|
|
chr(0x2329) => 'lang', # LEFT-POINTING ANGLE BRACKET = BRA |
281
|
|
|
|
|
|
|
chr(0x232a) => 'rang', # RIGHT-POINTING ANGLE BRACKET = KET |
282
|
|
|
|
|
|
|
}, |
283
|
|
|
|
|
|
|
Geometric => { |
284
|
|
|
|
|
|
|
chr(0x25ca) => 'loz', # LOZENGE |
285
|
|
|
|
|
|
|
}, |
286
|
|
|
|
|
|
|
Miscellaneous => { |
287
|
|
|
|
|
|
|
chr(0x2660) => 'spades', # BLACK SPADE SUIT |
288
|
|
|
|
|
|
|
chr(0x2663) => 'clubs', # BLACK CLUB SUIT = SHAMROCK |
289
|
|
|
|
|
|
|
chr(0x2665) => 'hearts', # BLACK HEART SUIT = VALENTINE |
290
|
|
|
|
|
|
|
chr(0x2666) => 'diams', # BLACK DIAMOND SUIT |
291
|
|
|
|
|
|
|
}, |
292
|
|
|
|
|
|
|
Controls => { |
293
|
|
|
|
|
|
|
chr(0x0022) => 'quot', # QUOTATION MARK = APL QUOTE |
294
|
|
|
|
|
|
|
chr(0x0026) => 'amp', # AMPERSAND |
295
|
|
|
|
|
|
|
chr(0x003c) => 'lt', # LESS-THAN SIGN |
296
|
|
|
|
|
|
|
chr(0x003e) => 'gt', # GREATER-THAN SIGN |
297
|
|
|
|
|
|
|
}, |
298
|
|
|
|
|
|
|
Extended_A => { |
299
|
|
|
|
|
|
|
chr(0x0152) => 'OElig', # LATIN CAPITAL LIGATURE OE |
300
|
|
|
|
|
|
|
chr(0x0153) => 'oelig', # LATIN SMALL LIGATURE OE |
301
|
|
|
|
|
|
|
chr(0x0160) => 'Scaron', # LATIN CAPITAL LETTER S WITH CARON |
302
|
|
|
|
|
|
|
chr(0x0161) => 'scaron', # LATIN SMALL LETTER S WITH CARON |
303
|
|
|
|
|
|
|
chr(0x0178) => 'Yuml', # LATIN CAPITAL LETTER Y WITH DIAERESIS |
304
|
|
|
|
|
|
|
}, |
305
|
|
|
|
|
|
|
Modifier => { |
306
|
|
|
|
|
|
|
chr(0x02c6) => 'circ', # MODIFIER LETTER CIRCUMFLEX ACCENT |
307
|
|
|
|
|
|
|
chr(0x02dc) => 'tilde', # SMALL TILDE |
308
|
|
|
|
|
|
|
}, |
309
|
|
|
|
|
|
|
Punctuation => { |
310
|
|
|
|
|
|
|
chr(0x2002) => 'ensp', # EN SPACE |
311
|
|
|
|
|
|
|
chr(0x2003) => 'emsp', # EM SPACE |
312
|
|
|
|
|
|
|
chr(0x2009) => 'thinsp', # THIN SPACE |
313
|
|
|
|
|
|
|
chr(0x200c) => 'zwnj', # ZERO WIDTH NON-JOINER |
314
|
|
|
|
|
|
|
chr(0x200d) => 'zwj', # ZERO WIDTH JOINER |
315
|
|
|
|
|
|
|
chr(0x200e) => 'lrm', # LEFT-TO-RIGHT MARK |
316
|
|
|
|
|
|
|
chr(0x200f) => 'rlm', # RIGHT-TO-LEFT MARK |
317
|
|
|
|
|
|
|
chr(0x2013) => 'ndash', # EN DASH |
318
|
|
|
|
|
|
|
chr(0x2014) => 'mdash', # EM DASH |
319
|
|
|
|
|
|
|
chr(0x2018) => 'lsquo', # LEFT SINGLE QUOTATION MARK |
320
|
|
|
|
|
|
|
chr(0x2019) => 'rsquo', # RIGHT SINGLE QUOTATION MARK |
321
|
|
|
|
|
|
|
chr(0x201a) => 'sbquo', # SINGLE LOW-9 QUOTATION MARK |
322
|
|
|
|
|
|
|
chr(0x201c) => 'ldquo', # LEFT DOUBLE QUOTATION MARK |
323
|
|
|
|
|
|
|
chr(0x201d) => 'rdquo', # RIGHT DOUBLE QUOTATION MARK |
324
|
|
|
|
|
|
|
chr(0x201e) => 'bdquo', # DOUBLE LOW-9 QUOTATION MARK |
325
|
|
|
|
|
|
|
chr(0x2020) => 'dagger', # DAGGER |
326
|
|
|
|
|
|
|
chr(0x2021) => 'Dagger', # DOUBLE DAGGER |
327
|
|
|
|
|
|
|
chr(0x2030) => 'permil', # PER MILLE SIGN |
328
|
|
|
|
|
|
|
chr(0x2039) => 'lsaquo', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK |
329
|
|
|
|
|
|
|
chr(0x203a) => 'rsaquo', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK |
330
|
|
|
|
|
|
|
chr(0x20ac) => 'euro', # EURO SIGN |
331
|
|
|
|
|
|
|
}, |
332
|
|
|
|
|
|
|
); |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item B |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
The constructor method. |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
my $HE = new HTML::Encoder( |
340
|
|
|
|
|
|
|
Arrows => 0, |
341
|
|
|
|
|
|
|
Controls => 0, |
342
|
|
|
|
|
|
|
Extended_A => 1, |
343
|
|
|
|
|
|
|
Extended_B => 1, |
344
|
|
|
|
|
|
|
Geometric => 0, |
345
|
|
|
|
|
|
|
Latin_1 => 1, |
346
|
|
|
|
|
|
|
Letterlike => 0, |
347
|
|
|
|
|
|
|
Mathematical => 0, |
348
|
|
|
|
|
|
|
Miscellaneous => 0, |
349
|
|
|
|
|
|
|
Modifier => 0, |
350
|
|
|
|
|
|
|
Punctuation => 0, |
351
|
|
|
|
|
|
|
Technical => 0, |
352
|
|
|
|
|
|
|
); |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
or |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
my $HE = new HTML::Encode(); # Default Latin_1 entities encode true. |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub new |
361
|
|
|
|
|
|
|
{ |
362
|
0
|
|
|
0
|
1
|
|
my $type = shift; |
363
|
0
|
|
0
|
|
|
|
my $class = ref $type || $type; |
364
|
|
|
|
|
|
|
|
365
|
0
|
|
|
|
|
|
my $self = { |
366
|
|
|
|
|
|
|
Arrows => 0, |
367
|
|
|
|
|
|
|
Controls => 0, |
368
|
|
|
|
|
|
|
Extended_A => 0, |
369
|
|
|
|
|
|
|
Extended_B => 0, |
370
|
|
|
|
|
|
|
Geometric => 0, |
371
|
|
|
|
|
|
|
Latin_1 => 0, |
372
|
|
|
|
|
|
|
Letterlike => 0, |
373
|
|
|
|
|
|
|
Mathematical => 0, |
374
|
|
|
|
|
|
|
Miscellaneous => 0, |
375
|
|
|
|
|
|
|
Modifier => 0, |
376
|
|
|
|
|
|
|
Punctuation => 0, |
377
|
|
|
|
|
|
|
Technical => 0, |
378
|
|
|
|
|
|
|
@_, |
379
|
|
|
|
|
|
|
}; |
380
|
|
|
|
|
|
|
|
381
|
0
|
|
|
|
|
|
my $ok = 0; |
382
|
0
|
|
|
|
|
|
for my $i (keys %Entities) { |
383
|
0
|
0
|
|
|
|
|
if ($self->{$i}) { |
384
|
0
|
|
|
|
|
|
$ok = 1; |
385
|
0
|
|
|
|
|
|
$self->{'Heads_'.$i} = join '|', keys %{$Entities{$i}}; |
|
0
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# Set default HTML codes. |
390
|
0
|
0
|
|
|
|
|
if (!$ok) { |
391
|
0
|
|
|
|
|
|
$self->{Latin_1} = 1; |
392
|
0
|
|
|
|
|
|
$self->{Heads_Latin_1} = join '|', keys %{$Entities{Latin_1}}; |
|
0
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
0
|
|
|
|
|
|
bless $self, $class; |
396
|
0
|
|
|
|
|
|
return $self; |
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=item B |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Parsing data structure to searching special caracters for |
402
|
|
|
|
|
|
|
convert in HTML code. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
$HE->encode($ref); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=cut |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
sub encode |
409
|
|
|
|
|
|
|
{ |
410
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
411
|
0
|
|
|
|
|
|
my $ref = shift; |
412
|
|
|
|
|
|
|
|
413
|
0
|
0
|
|
|
|
|
if (ref $ref eq 'ARRAY') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
414
|
0
|
|
|
|
|
|
for my $i (0 .. $#{$ref}) { |
|
0
|
|
|
|
|
|
|
415
|
0
|
0
|
0
|
|
|
|
if ((ref $ref->[$i] ne 'ARRAY') and |
|
|
|
0
|
|
|
|
|
416
|
|
|
|
|
|
|
(ref $ref->[$i] ne 'HASH') and |
417
|
|
|
|
|
|
|
(ref $ref->[$i] ne 'SCALAR')) { |
418
|
0
|
|
|
|
|
|
&encode($self, \$ref->[$i]); |
419
|
|
|
|
|
|
|
} else { |
420
|
0
|
|
|
|
|
|
&encode($self, $ref->[$i]); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
} elsif (ref $ref eq 'HASH') { |
424
|
0
|
|
|
|
|
|
for my $i (keys %{$ref}) { |
|
0
|
|
|
|
|
|
|
425
|
0
|
0
|
0
|
|
|
|
if ((ref $ref->{$i} ne 'ARRAY') and |
|
|
|
0
|
|
|
|
|
426
|
|
|
|
|
|
|
(ref $ref->{$i} ne 'HASH') and |
427
|
|
|
|
|
|
|
(ref $ref->{$i} ne 'SCALAR')) { |
428
|
0
|
|
|
|
|
|
&encode($self, \$ref->{$i}); |
429
|
|
|
|
|
|
|
} else { |
430
|
0
|
|
|
|
|
|
&encode($self, $ref->{$i}); |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
} elsif (ref $ref eq 'SCALAR') { |
434
|
0
|
|
|
|
|
|
for my $i (keys %Entities) { |
435
|
0
|
0
|
|
|
|
|
if ($self->{$i}) { |
436
|
0
|
|
|
|
|
|
${$ref} =~ s/($self->{'Heads_'.$i})/&$Entities{$i}{$1};/g; |
|
0
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
1; |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
__END__ |