line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MKDoc::XML::Decode::XHTML; |
2
|
7
|
|
|
7
|
|
37
|
use warnings; |
|
7
|
|
|
|
|
12
|
|
|
7
|
|
|
|
|
326
|
|
3
|
7
|
|
|
7
|
|
31
|
use strict; |
|
7
|
|
|
|
|
100
|
|
|
7
|
|
|
|
|
11587
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# Portions (c) International Organization for Standardization 1986: |
7
|
|
|
|
|
|
|
# Permission to copy in any form is granted for use with conforming SGML |
8
|
|
|
|
|
|
|
# systems and applications as defined in ISO 8879, provided this notice is |
9
|
|
|
|
|
|
|
# included in all copies. |
10
|
|
|
|
|
|
|
our %ENTITY_2_CHAR = ( |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Latin1 characters |
13
|
|
|
|
|
|
|
'nbsp' => chr(160), |
14
|
|
|
|
|
|
|
'iexcl' => chr(161), |
15
|
|
|
|
|
|
|
'cent' => chr(162), |
16
|
|
|
|
|
|
|
'pound' => chr(163), |
17
|
|
|
|
|
|
|
'curren' => chr(164), |
18
|
|
|
|
|
|
|
'yen' => chr(165), |
19
|
|
|
|
|
|
|
'brvbar' => chr(166), |
20
|
|
|
|
|
|
|
'sect' => chr(167), |
21
|
|
|
|
|
|
|
'uml' => chr(168), |
22
|
|
|
|
|
|
|
'copy' => chr(169), |
23
|
|
|
|
|
|
|
'ordf' => chr(170), |
24
|
|
|
|
|
|
|
'laquo' => chr(171), |
25
|
|
|
|
|
|
|
'not' => chr(172), |
26
|
|
|
|
|
|
|
'shy' => chr(173), |
27
|
|
|
|
|
|
|
'reg' => chr(174), |
28
|
|
|
|
|
|
|
'macr' => chr(175), |
29
|
|
|
|
|
|
|
'deg' => chr(176), |
30
|
|
|
|
|
|
|
'plusmn' => chr(177), |
31
|
|
|
|
|
|
|
'sup2' => chr(178), |
32
|
|
|
|
|
|
|
'sup3' => chr(179), |
33
|
|
|
|
|
|
|
'acute' => chr(180), |
34
|
|
|
|
|
|
|
'micro' => chr(181), |
35
|
|
|
|
|
|
|
'para' => chr(182), |
36
|
|
|
|
|
|
|
'middot' => chr(183), |
37
|
|
|
|
|
|
|
'cedil' => chr(184), |
38
|
|
|
|
|
|
|
'sup1' => chr(185), |
39
|
|
|
|
|
|
|
'ordm' => chr(186), |
40
|
|
|
|
|
|
|
'raquo' => chr(187), |
41
|
|
|
|
|
|
|
'frac14' => chr(188), |
42
|
|
|
|
|
|
|
'frac12' => chr(189), |
43
|
|
|
|
|
|
|
'frac34' => chr(190), |
44
|
|
|
|
|
|
|
'iquest' => chr(191), |
45
|
|
|
|
|
|
|
'Agrave' => chr(192), |
46
|
|
|
|
|
|
|
'Aacute' => chr(193), |
47
|
|
|
|
|
|
|
'Acirc' => chr(194), |
48
|
|
|
|
|
|
|
'Atilde' => chr(195), |
49
|
|
|
|
|
|
|
'Auml' => chr(196), |
50
|
|
|
|
|
|
|
'Aring' => chr(197), |
51
|
|
|
|
|
|
|
'AElig' => chr(198), |
52
|
|
|
|
|
|
|
'Ccedil' => chr(199), |
53
|
|
|
|
|
|
|
'Egrave' => chr(200), |
54
|
|
|
|
|
|
|
'Eacute' => chr(201), |
55
|
|
|
|
|
|
|
'Ecirc' => chr(202), |
56
|
|
|
|
|
|
|
'Euml' => chr(203), |
57
|
|
|
|
|
|
|
'Igrave' => chr(204), |
58
|
|
|
|
|
|
|
'Iacute' => chr(205), |
59
|
|
|
|
|
|
|
'Icirc' => chr(206), |
60
|
|
|
|
|
|
|
'Iuml' => chr(207), |
61
|
|
|
|
|
|
|
'ETH' => chr(208), |
62
|
|
|
|
|
|
|
'Ntilde' => chr(209), |
63
|
|
|
|
|
|
|
'Ograve' => chr(210), |
64
|
|
|
|
|
|
|
'Oacute' => chr(211), |
65
|
|
|
|
|
|
|
'Ocirc' => chr(212), |
66
|
|
|
|
|
|
|
'Otilde' => chr(213), |
67
|
|
|
|
|
|
|
'Ouml' => chr(214), |
68
|
|
|
|
|
|
|
'times' => chr(215), |
69
|
|
|
|
|
|
|
'Oslash' => chr(216), |
70
|
|
|
|
|
|
|
'Ugrave' => chr(217), |
71
|
|
|
|
|
|
|
'Uacute' => chr(218), |
72
|
|
|
|
|
|
|
'Ucirc' => chr(219), |
73
|
|
|
|
|
|
|
'Uuml' => chr(220), |
74
|
|
|
|
|
|
|
'Yacute' => chr(221), |
75
|
|
|
|
|
|
|
'THORN' => chr(222), |
76
|
|
|
|
|
|
|
'szlig' => chr(223), |
77
|
|
|
|
|
|
|
'agrave' => chr(224), |
78
|
|
|
|
|
|
|
'aacute' => chr(225), |
79
|
|
|
|
|
|
|
'acirc' => chr(226), |
80
|
|
|
|
|
|
|
'atilde' => chr(227), |
81
|
|
|
|
|
|
|
'auml' => chr(228), |
82
|
|
|
|
|
|
|
'aring' => chr(229), |
83
|
|
|
|
|
|
|
'aelig' => chr(230), |
84
|
|
|
|
|
|
|
'ccedil' => chr(231), |
85
|
|
|
|
|
|
|
'egrave' => chr(232), |
86
|
|
|
|
|
|
|
'eacute' => chr(233), |
87
|
|
|
|
|
|
|
'ecirc' => chr(234), |
88
|
|
|
|
|
|
|
'euml' => chr(235), |
89
|
|
|
|
|
|
|
'igrave' => chr(236), |
90
|
|
|
|
|
|
|
'iacute' => chr(237), |
91
|
|
|
|
|
|
|
'icirc' => chr(238), |
92
|
|
|
|
|
|
|
'iuml' => chr(239), |
93
|
|
|
|
|
|
|
'eth' => chr(240), |
94
|
|
|
|
|
|
|
'ntilde' => chr(241), |
95
|
|
|
|
|
|
|
'ograve' => chr(242), |
96
|
|
|
|
|
|
|
'oacute' => chr(243), |
97
|
|
|
|
|
|
|
'ocirc' => chr(244), |
98
|
|
|
|
|
|
|
'otilde' => chr(245), |
99
|
|
|
|
|
|
|
'ouml' => chr(246), |
100
|
|
|
|
|
|
|
'divide' => chr(247), |
101
|
|
|
|
|
|
|
'oslash' => chr(248), |
102
|
|
|
|
|
|
|
'ugrave' => chr(249), |
103
|
|
|
|
|
|
|
'uacute' => chr(250), |
104
|
|
|
|
|
|
|
'ucirc' => chr(251), |
105
|
|
|
|
|
|
|
'uuml' => chr(252), |
106
|
|
|
|
|
|
|
'yacute' => chr(253), |
107
|
|
|
|
|
|
|
'thorn' => chr(254), |
108
|
|
|
|
|
|
|
'yuml' => chr(255), |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# C0 Controls and Basic Latin |
111
|
|
|
|
|
|
|
# 'quot' => chr(34), |
112
|
|
|
|
|
|
|
# 'amp' => chr(38), |
113
|
|
|
|
|
|
|
# 'apos' => chr(39), |
114
|
|
|
|
|
|
|
# 'lt' => chr(60), |
115
|
|
|
|
|
|
|
# 'gt' => chr(62), |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# Latin Extended-A |
118
|
|
|
|
|
|
|
'OElig' => chr(338), |
119
|
|
|
|
|
|
|
'oelig' => chr(339), |
120
|
|
|
|
|
|
|
'Scaron' => chr(352), |
121
|
|
|
|
|
|
|
'scaron' => chr(353), |
122
|
|
|
|
|
|
|
'Yuml' => chr(376), |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
# Spacin g Modifier Letters |
125
|
|
|
|
|
|
|
'circ' => chr(710), |
126
|
|
|
|
|
|
|
'tilde' => chr(732), |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# General Punctuation |
129
|
|
|
|
|
|
|
# * lsaquo is proposed but not yet ISO standardized |
130
|
|
|
|
|
|
|
# * rsaquo is proposed but not yet ISO standardized |
131
|
|
|
|
|
|
|
'ensp' => chr(8194), |
132
|
|
|
|
|
|
|
'emsp' => chr(8195), |
133
|
|
|
|
|
|
|
'thinsp' => chr(8201), |
134
|
|
|
|
|
|
|
'zwnj' => chr(8204), |
135
|
|
|
|
|
|
|
'zwj' => chr(8205), |
136
|
|
|
|
|
|
|
'lrm' => chr(8206), |
137
|
|
|
|
|
|
|
'rlm' => chr(8207), |
138
|
|
|
|
|
|
|
'ndash' => chr(8211), |
139
|
|
|
|
|
|
|
'mdash' => chr(8212), |
140
|
|
|
|
|
|
|
'lsquo' => chr(8216), |
141
|
|
|
|
|
|
|
'rsquo' => chr(8217), |
142
|
|
|
|
|
|
|
'sbquo' => chr(8218), |
143
|
|
|
|
|
|
|
'ldquo' => chr(8220), |
144
|
|
|
|
|
|
|
'rdquo' => chr(8221), |
145
|
|
|
|
|
|
|
'bdquo' => chr(8222), |
146
|
|
|
|
|
|
|
'dagger' => chr(8224), |
147
|
|
|
|
|
|
|
'Dagger' => chr(8225), |
148
|
|
|
|
|
|
|
'permil' => chr(8240), |
149
|
|
|
|
|
|
|
'lsaquo' => chr(8249), |
150
|
|
|
|
|
|
|
'rsaquo' => chr(8250), |
151
|
|
|
|
|
|
|
'euro' => chr(8364), |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# Mathematical, Greek and Symbolic characters for HTML |
154
|
|
|
|
|
|
|
# Latin Extended-B |
155
|
|
|
|
|
|
|
'fnof' => chr(402), |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Greek |
158
|
|
|
|
|
|
|
# * there is no Sigmaf, and no U+03A2 character either |
159
|
|
|
|
|
|
|
'Alpha' => chr(913), |
160
|
|
|
|
|
|
|
'Beta' => chr(914), |
161
|
|
|
|
|
|
|
'Gamma' => chr(915), |
162
|
|
|
|
|
|
|
'Delta' => chr(916), |
163
|
|
|
|
|
|
|
'Epsilon' => chr(917), |
164
|
|
|
|
|
|
|
'Zeta' => chr(918), |
165
|
|
|
|
|
|
|
'Eta' => chr(919), |
166
|
|
|
|
|
|
|
'Theta' => chr(920), |
167
|
|
|
|
|
|
|
'Iota' => chr(921), |
168
|
|
|
|
|
|
|
'Kappa' => chr(922), |
169
|
|
|
|
|
|
|
'Lambda' => chr(923), |
170
|
|
|
|
|
|
|
'Mu' => chr(924), |
171
|
|
|
|
|
|
|
'Nu' => chr(925), |
172
|
|
|
|
|
|
|
'Xi' => chr(926), |
173
|
|
|
|
|
|
|
'Omicron' => chr(927), |
174
|
|
|
|
|
|
|
'Pi' => chr(928), |
175
|
|
|
|
|
|
|
'Rho' => chr(929), |
176
|
|
|
|
|
|
|
'Sigma' => chr(931), |
177
|
|
|
|
|
|
|
'Tau' => chr(932), |
178
|
|
|
|
|
|
|
'Upsilon' => chr(933), |
179
|
|
|
|
|
|
|
'Phi' => chr(934), |
180
|
|
|
|
|
|
|
'Chi' => chr(935), |
181
|
|
|
|
|
|
|
'Psi' => chr(936), |
182
|
|
|
|
|
|
|
'Omega' => chr(937), |
183
|
|
|
|
|
|
|
'alpha' => chr(945), |
184
|
|
|
|
|
|
|
'beta' => chr(946), |
185
|
|
|
|
|
|
|
'gamma' => chr(947), |
186
|
|
|
|
|
|
|
'delta' => chr(948), |
187
|
|
|
|
|
|
|
'epsilon' => chr(949), |
188
|
|
|
|
|
|
|
'zeta' => chr(950), |
189
|
|
|
|
|
|
|
'eta' => chr(951), |
190
|
|
|
|
|
|
|
'theta' => chr(952), |
191
|
|
|
|
|
|
|
'iota' => chr(953), |
192
|
|
|
|
|
|
|
'kappa' => chr(954), |
193
|
|
|
|
|
|
|
'lambda' => chr(955), |
194
|
|
|
|
|
|
|
'mu' => chr(956), |
195
|
|
|
|
|
|
|
'nu' => chr(957), |
196
|
|
|
|
|
|
|
'xi' => chr(958), |
197
|
|
|
|
|
|
|
'omicron' => chr(959), |
198
|
|
|
|
|
|
|
'pi' => chr(960), |
199
|
|
|
|
|
|
|
'rho' => chr(961), |
200
|
|
|
|
|
|
|
'sigmaf' => chr(962), |
201
|
|
|
|
|
|
|
'sigma' => chr(963), |
202
|
|
|
|
|
|
|
'tau' => chr(964), |
203
|
|
|
|
|
|
|
'upsilon' => chr(965), |
204
|
|
|
|
|
|
|
'phi' => chr(966), |
205
|
|
|
|
|
|
|
'chi' => chr(967), |
206
|
|
|
|
|
|
|
'psi' => chr(968), |
207
|
|
|
|
|
|
|
'omega' => chr(969), |
208
|
|
|
|
|
|
|
'thetasym' => chr(977), |
209
|
|
|
|
|
|
|
'upsih' => chr(978), |
210
|
|
|
|
|
|
|
'piv' => chr(982), |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# General Punctuation |
213
|
|
|
|
|
|
|
# * bullet is NOT the same as bullet operator, U+2219 |
214
|
|
|
|
|
|
|
'bull' => chr(8226), |
215
|
|
|
|
|
|
|
'hellip' => chr(8230), |
216
|
|
|
|
|
|
|
'prime' => chr(8242), |
217
|
|
|
|
|
|
|
'Prime' => chr(8243), |
218
|
|
|
|
|
|
|
'oline' => chr(8254), |
219
|
|
|
|
|
|
|
'frasl' => chr(8260), |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# Letterlike Symbols |
222
|
|
|
|
|
|
|
# * alef symbol is NOT the same as hebrew letter alef, U+05D0 although the same glyph could be used to depict both characters |
223
|
|
|
|
|
|
|
'weierp' => chr(8472), |
224
|
|
|
|
|
|
|
'image' => chr(8465), |
225
|
|
|
|
|
|
|
'real' => chr(8476), |
226
|
|
|
|
|
|
|
'trade' => chr(8482), |
227
|
|
|
|
|
|
|
'alefsym' => chr(8501), |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# Arrows |
230
|
|
|
|
|
|
|
# * Unicode does not say that lArr is the same as the 'is implied by' arrow but also |
231
|
|
|
|
|
|
|
# does not have any other character for that function. So ? lArr can be used for 'is implied by' as ISOtech suggests |
232
|
|
|
|
|
|
|
# * Unicode does not say rArr is the 'implies' character but does not have another |
233
|
|
|
|
|
|
|
# character with this function so ? rArr can be used for 'implies' as ISOtech suggests |
234
|
|
|
|
|
|
|
'larr' => chr(8592), |
235
|
|
|
|
|
|
|
'uarr' => chr(8593), |
236
|
|
|
|
|
|
|
'rarr' => chr(8594), |
237
|
|
|
|
|
|
|
'darr' => chr(8595), |
238
|
|
|
|
|
|
|
'harr' => chr(8596), |
239
|
|
|
|
|
|
|
'crarr' => chr(8629), |
240
|
|
|
|
|
|
|
'lArr' => chr(8656), |
241
|
|
|
|
|
|
|
'uArr' => chr(8657), |
242
|
|
|
|
|
|
|
'rArr' => chr(8658), |
243
|
|
|
|
|
|
|
'dArr' => chr(8659), |
244
|
|
|
|
|
|
|
'hArr' => chr(8660), |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Mathematical Operators |
247
|
|
|
|
|
|
|
# * should there be a more memorable name than 'ni'? |
248
|
|
|
|
|
|
|
# * prod is NOT the same character as U+03A0 'greek capital letter pi' though the same glyph might be used for both |
249
|
|
|
|
|
|
|
# * sum is NOT the same character as U+03A3 'greek capital letter sigma' though the same glyph might be used for both |
250
|
|
|
|
|
|
|
# * sim: tilde operator is NOT the same character as the tilde, U+007E, although the same glyph might be used to represent both |
251
|
|
|
|
|
|
|
# * note that nsup, 'not a superset of, U+2283' is not covered by the Symbol font encoding and is not included. |
252
|
|
|
|
|
|
|
# Should it be, for symmetry? It is in ISOamsn |
253
|
|
|
|
|
|
|
# * sdot: dot operator is NOT the same character as U+00B7 middle dot |
254
|
|
|
|
|
|
|
'forall' => chr(8704), |
255
|
|
|
|
|
|
|
'part' => chr(8706), |
256
|
|
|
|
|
|
|
'exist' => chr(8707), |
257
|
|
|
|
|
|
|
'empty' => chr(8709), |
258
|
|
|
|
|
|
|
'nabla' => chr(8711), |
259
|
|
|
|
|
|
|
'isin' => chr(8712), |
260
|
|
|
|
|
|
|
'notin' => chr(8713), |
261
|
|
|
|
|
|
|
'ni' => chr(8715), |
262
|
|
|
|
|
|
|
'prod' => chr(8719), |
263
|
|
|
|
|
|
|
'sum' => chr(8721), |
264
|
|
|
|
|
|
|
'minus' => chr(8722), |
265
|
|
|
|
|
|
|
'lowast' => chr(8727), |
266
|
|
|
|
|
|
|
'radic' => chr(8730), |
267
|
|
|
|
|
|
|
'prop' => chr(8733), |
268
|
|
|
|
|
|
|
'infin' => chr(8734), |
269
|
|
|
|
|
|
|
'ang' => chr(8736), |
270
|
|
|
|
|
|
|
'and' => chr(8743), |
271
|
|
|
|
|
|
|
'or' => chr(8744), |
272
|
|
|
|
|
|
|
'cap' => chr(8745), |
273
|
|
|
|
|
|
|
'cup' => chr(8746), |
274
|
|
|
|
|
|
|
'int' => chr(8747), |
275
|
|
|
|
|
|
|
'there4' => chr(8756), |
276
|
|
|
|
|
|
|
'sim' => chr(8764), |
277
|
|
|
|
|
|
|
'cong' => chr(8773), |
278
|
|
|
|
|
|
|
'asymp' => chr(8776), |
279
|
|
|
|
|
|
|
'ne' => chr(8800), |
280
|
|
|
|
|
|
|
'equiv' => chr(8801), |
281
|
|
|
|
|
|
|
'le' => chr(8804), |
282
|
|
|
|
|
|
|
'ge' => chr(8805), |
283
|
|
|
|
|
|
|
'sub' => chr(8834), |
284
|
|
|
|
|
|
|
'sup' => chr(8835), |
285
|
|
|
|
|
|
|
'nsub' => chr(8836), |
286
|
|
|
|
|
|
|
'sube' => chr(8838), |
287
|
|
|
|
|
|
|
'supe' => chr(8839), |
288
|
|
|
|
|
|
|
'oplus' => chr(8853), |
289
|
|
|
|
|
|
|
'otimes' => chr(8855), |
290
|
|
|
|
|
|
|
'perp' => chr(8869), |
291
|
|
|
|
|
|
|
'sdot' => chr(8901), |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# Miscellaneous Technical |
294
|
|
|
|
|
|
|
# * lang is NOT the same character as U+003C 'less than' or U+2039 'single left-pointing angle quotation mark' |
295
|
|
|
|
|
|
|
# * rang is NOT the same character as U+003E 'greater than' or U+203A 'single right-pointing angle quotation mark' |
296
|
|
|
|
|
|
|
'lceil' => chr(8968), |
297
|
|
|
|
|
|
|
'rceil' => chr(8969), |
298
|
|
|
|
|
|
|
'lfloor' => chr(8970), |
299
|
|
|
|
|
|
|
'rfloor' => chr(8971), |
300
|
|
|
|
|
|
|
'lang' => chr(9001), |
301
|
|
|
|
|
|
|
'rang' => chr(9002), |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# Geometric Shapes |
304
|
|
|
|
|
|
|
'loz' => chr(9674), |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# Miscellaneous Symbols |
307
|
|
|
|
|
|
|
# * black here seems to mean filled as opposed to hollow |
308
|
|
|
|
|
|
|
'spades' => chr(9824), |
309
|
|
|
|
|
|
|
'clubs' => chr(9827), |
310
|
|
|
|
|
|
|
'hearts' => chr(9829), |
311
|
|
|
|
|
|
|
'diams' => chr(9830), |
312
|
|
|
|
|
|
|
); |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub process |
316
|
|
|
|
|
|
|
{ |
317
|
6
|
50
|
|
6
|
0
|
17
|
(@_ == 2) or warn "MKDoc::XML::Encode::process() should be called with two arguments"; |
318
|
6
|
|
|
|
|
7
|
my $class = shift; |
319
|
6
|
|
|
|
|
8
|
my $stuff = shift; |
320
|
6
|
|
|
|
|
21
|
return $ENTITY_2_CHAR{$stuff}; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
1; |