line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::NeXML::Entities; |
2
|
51
|
|
|
51
|
|
282
|
use strict; |
|
51
|
|
|
|
|
109
|
|
|
51
|
|
|
|
|
1394
|
|
3
|
51
|
|
|
51
|
|
243
|
use base 'Exporter'; |
|
51
|
|
|
|
|
92
|
|
|
51
|
|
|
|
|
69020
|
|
4
|
|
|
|
|
|
|
our @EXPORT_OK = qw'encode_entities decode_entities'; |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
my %entity2char = ( |
7
|
|
|
|
|
|
|
# Some normal chars that have special meaning in SGML context |
8
|
|
|
|
|
|
|
'&' => '&', # ampersand |
9
|
|
|
|
|
|
|
'>' => '>', # greater than |
10
|
|
|
|
|
|
|
'<' => '<', # less than |
11
|
|
|
|
|
|
|
'"' => '"', # double quote |
12
|
|
|
|
|
|
|
''' => "'", # single quote |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML |
15
|
|
|
|
|
|
|
'Æ' => chr(198), # capital AE diphthong (ligature) |
16
|
|
|
|
|
|
|
'Á' => chr(193), # capital A, acute accent |
17
|
|
|
|
|
|
|
'Â' => chr(194), # capital A, circumflex accent |
18
|
|
|
|
|
|
|
'À' => chr(192), # capital A, grave accent |
19
|
|
|
|
|
|
|
'Å' => chr(197), # capital A, ring |
20
|
|
|
|
|
|
|
'Ã' => chr(195), # capital A, tilde |
21
|
|
|
|
|
|
|
'Ä' => chr(196), # capital A, dieresis or umlaut mark |
22
|
|
|
|
|
|
|
'Ç' => chr(199), # capital C, cedilla |
23
|
|
|
|
|
|
|
'Ð' => chr(208), # capital Eth, Icelandic |
24
|
|
|
|
|
|
|
'É' => chr(201), # capital E, acute accent |
25
|
|
|
|
|
|
|
'Ê' => chr(202), # capital E, circumflex accent |
26
|
|
|
|
|
|
|
'È' => chr(200), # capital E, grave accent |
27
|
|
|
|
|
|
|
'Ë' => chr(203), # capital E, dieresis or umlaut mark |
28
|
|
|
|
|
|
|
'Í' => chr(205), # capital I, acute accent |
29
|
|
|
|
|
|
|
'Î' => chr(206), # capital I, circumflex accent |
30
|
|
|
|
|
|
|
'Ì' => chr(204), # capital I, grave accent |
31
|
|
|
|
|
|
|
'Ï' => chr(207), # capital I, dieresis or umlaut mark |
32
|
|
|
|
|
|
|
'Ñ' => chr(209), # capital N, tilde |
33
|
|
|
|
|
|
|
'Ó' => chr(211), # capital O, acute accent |
34
|
|
|
|
|
|
|
'Ô' => chr(212), # capital O, circumflex accent |
35
|
|
|
|
|
|
|
'Ò' => chr(210), # capital O, grave accent |
36
|
|
|
|
|
|
|
'Ø' => chr(216), # capital O, slash |
37
|
|
|
|
|
|
|
'Õ' => chr(213), # capital O, tilde |
38
|
|
|
|
|
|
|
'Ö' => chr(214), # capital O, dieresis or umlaut mark |
39
|
|
|
|
|
|
|
'Þ' => chr(222), # capital THORN, Icelandic |
40
|
|
|
|
|
|
|
'Ú' => chr(218), # capital U, acute accent |
41
|
|
|
|
|
|
|
'Û' => chr(219), # capital U, circumflex accent |
42
|
|
|
|
|
|
|
'Ù' => chr(217), # capital U, grave accent |
43
|
|
|
|
|
|
|
'Ü' => chr(220), # capital U, dieresis or umlaut mark |
44
|
|
|
|
|
|
|
'Ý' => chr(221), # capital Y, acute accent |
45
|
|
|
|
|
|
|
'á' => chr(225), # small a, acute accent |
46
|
|
|
|
|
|
|
'â' => chr(226), # small a, circumflex accent |
47
|
|
|
|
|
|
|
'æ' => chr(230), # small ae diphthong (ligature) |
48
|
|
|
|
|
|
|
'à' => chr(224), # small a, grave accent |
49
|
|
|
|
|
|
|
'å' => chr(229), # small a, ring |
50
|
|
|
|
|
|
|
'ã' => chr(227), # small a, tilde |
51
|
|
|
|
|
|
|
'ä' => chr(228), # small a, dieresis or umlaut mark |
52
|
|
|
|
|
|
|
'ç' => chr(231), # small c, cedilla |
53
|
|
|
|
|
|
|
'é' => chr(233), # small e, acute accent |
54
|
|
|
|
|
|
|
'ê' => chr(234), # small e, circumflex accent |
55
|
|
|
|
|
|
|
'è' => chr(232), # small e, grave accent |
56
|
|
|
|
|
|
|
'ð' => chr(240), # small eth, Icelandic |
57
|
|
|
|
|
|
|
'ë' => chr(235), # small e, dieresis or umlaut mark |
58
|
|
|
|
|
|
|
'í' => chr(237), # small i, acute accent |
59
|
|
|
|
|
|
|
'î' => chr(238), # small i, circumflex accent |
60
|
|
|
|
|
|
|
'ì' => chr(236), # small i, grave accent |
61
|
|
|
|
|
|
|
'ï' => chr(239), # small i, dieresis or umlaut mark |
62
|
|
|
|
|
|
|
'ñ' => chr(241), # small n, tilde |
63
|
|
|
|
|
|
|
'ó' => chr(243), # small o, acute accent |
64
|
|
|
|
|
|
|
'ô' => chr(244), # small o, circumflex accent |
65
|
|
|
|
|
|
|
'ò' => chr(242), # small o, grave accent |
66
|
|
|
|
|
|
|
'ø' => chr(248), # small o, slash |
67
|
|
|
|
|
|
|
'õ' => chr(245), # small o, tilde |
68
|
|
|
|
|
|
|
'ö' => chr(246), # small o, dieresis or umlaut mark |
69
|
|
|
|
|
|
|
'ß' => chr(223), # small sharp s, German (sz ligature) |
70
|
|
|
|
|
|
|
'þ' => chr(254), # small thorn, Icelandic |
71
|
|
|
|
|
|
|
'ú' => chr(250), # small u, acute accent |
72
|
|
|
|
|
|
|
'û' => chr(251), # small u, circumflex accent |
73
|
|
|
|
|
|
|
'ù' => chr(249), # small u, grave accent |
74
|
|
|
|
|
|
|
'ü' => chr(252), # small u, dieresis or umlaut mark |
75
|
|
|
|
|
|
|
'ý' => chr(253), # small y, acute accent |
76
|
|
|
|
|
|
|
'ÿ' => chr(255), # small y, dieresis or umlaut mark |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
# Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) |
79
|
|
|
|
|
|
|
'©' => chr(169), # copyright sign |
80
|
|
|
|
|
|
|
'®' => chr(174), # registered sign |
81
|
|
|
|
|
|
|
' ' => chr(160), # non breaking space |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Additional ISO-8859/1 entities listed in rfc1866 (section 14) |
84
|
|
|
|
|
|
|
'¡' => chr(161), |
85
|
|
|
|
|
|
|
'¢' => chr(162), |
86
|
|
|
|
|
|
|
'£' => chr(163), |
87
|
|
|
|
|
|
|
'¤' => chr(164), |
88
|
|
|
|
|
|
|
'¥' => chr(165), |
89
|
|
|
|
|
|
|
'¦' => chr(166), |
90
|
|
|
|
|
|
|
'§' => chr(167), |
91
|
|
|
|
|
|
|
'¨' => chr(168), |
92
|
|
|
|
|
|
|
'ª' => chr(170), |
93
|
|
|
|
|
|
|
'«' => chr(171), |
94
|
|
|
|
|
|
|
'¬' => chr(172), |
95
|
|
|
|
|
|
|
'' => chr(173), |
96
|
|
|
|
|
|
|
'¯' => chr(175), |
97
|
|
|
|
|
|
|
'°' => chr(176), |
98
|
|
|
|
|
|
|
'±' => chr(177), |
99
|
|
|
|
|
|
|
'¹' => chr(185), |
100
|
|
|
|
|
|
|
'²' => chr(178), |
101
|
|
|
|
|
|
|
'³' => chr(179), |
102
|
|
|
|
|
|
|
'´' => chr(180), |
103
|
|
|
|
|
|
|
'µ' => chr(181), |
104
|
|
|
|
|
|
|
'¶' => chr(182), |
105
|
|
|
|
|
|
|
'·' => chr(183), |
106
|
|
|
|
|
|
|
'¸' => chr(184), |
107
|
|
|
|
|
|
|
'º' => chr(186), |
108
|
|
|
|
|
|
|
'»' => chr(187), |
109
|
|
|
|
|
|
|
'¼' => chr(188), |
110
|
|
|
|
|
|
|
'½' => chr(189), |
111
|
|
|
|
|
|
|
'¾' => chr(190), |
112
|
|
|
|
|
|
|
'¿' => chr(191), |
113
|
|
|
|
|
|
|
'×' => chr(215), |
114
|
|
|
|
|
|
|
'÷' => chr(247), |
115
|
|
|
|
|
|
|
'Œ' => chr(338), |
116
|
|
|
|
|
|
|
'œ' => chr(339), |
117
|
|
|
|
|
|
|
'Š' => chr(352), |
118
|
|
|
|
|
|
|
'š' => chr(353), |
119
|
|
|
|
|
|
|
'Ÿ' => chr(376), |
120
|
|
|
|
|
|
|
'ƒ' => chr(402), |
121
|
|
|
|
|
|
|
'ˆ' => chr(710), |
122
|
|
|
|
|
|
|
'˜' => chr(732), |
123
|
|
|
|
|
|
|
'Α' => chr(913), |
124
|
|
|
|
|
|
|
'Β' => chr(914), |
125
|
|
|
|
|
|
|
'Γ' => chr(915), |
126
|
|
|
|
|
|
|
'Δ' => chr(916), |
127
|
|
|
|
|
|
|
'Ε' => chr(917), |
128
|
|
|
|
|
|
|
'Ζ' => chr(918), |
129
|
|
|
|
|
|
|
'Η' => chr(919), |
130
|
|
|
|
|
|
|
'Θ' => chr(920), |
131
|
|
|
|
|
|
|
'Ι' => chr(921), |
132
|
|
|
|
|
|
|
'Κ' => chr(922), |
133
|
|
|
|
|
|
|
'Λ' => chr(923), |
134
|
|
|
|
|
|
|
'Μ' => chr(924), |
135
|
|
|
|
|
|
|
'Ν' => chr(925), |
136
|
|
|
|
|
|
|
'Ξ' => chr(926), |
137
|
|
|
|
|
|
|
'Ο' => chr(927), |
138
|
|
|
|
|
|
|
'Π' => chr(928), |
139
|
|
|
|
|
|
|
'Ρ' => chr(929), |
140
|
|
|
|
|
|
|
'Σ' => chr(931), |
141
|
|
|
|
|
|
|
'Τ' => chr(932), |
142
|
|
|
|
|
|
|
'Υ' => chr(933), |
143
|
|
|
|
|
|
|
'Φ' => chr(934), |
144
|
|
|
|
|
|
|
'Χ' => chr(935), |
145
|
|
|
|
|
|
|
'Ψ' => chr(936), |
146
|
|
|
|
|
|
|
'Ω' => chr(937), |
147
|
|
|
|
|
|
|
'α' => chr(945), |
148
|
|
|
|
|
|
|
'β' => chr(946), |
149
|
|
|
|
|
|
|
'γ' => chr(947), |
150
|
|
|
|
|
|
|
'δ' => chr(948), |
151
|
|
|
|
|
|
|
'ε' => chr(949), |
152
|
|
|
|
|
|
|
'ζ' => chr(950), |
153
|
|
|
|
|
|
|
'η' => chr(951), |
154
|
|
|
|
|
|
|
'θ' => chr(952), |
155
|
|
|
|
|
|
|
'ι' => chr(953), |
156
|
|
|
|
|
|
|
'κ' => chr(954), |
157
|
|
|
|
|
|
|
'λ' => chr(955), |
158
|
|
|
|
|
|
|
'μ' => chr(956), |
159
|
|
|
|
|
|
|
'ν' => chr(957), |
160
|
|
|
|
|
|
|
'ξ' => chr(958), |
161
|
|
|
|
|
|
|
'ο' => chr(959), |
162
|
|
|
|
|
|
|
'π' => chr(960), |
163
|
|
|
|
|
|
|
'ρ' => chr(961), |
164
|
|
|
|
|
|
|
'ς' => chr(962), |
165
|
|
|
|
|
|
|
'σ' => chr(963), |
166
|
|
|
|
|
|
|
'τ' => chr(964), |
167
|
|
|
|
|
|
|
'υ' => chr(965), |
168
|
|
|
|
|
|
|
'φ' => chr(966), |
169
|
|
|
|
|
|
|
'χ' => chr(967), |
170
|
|
|
|
|
|
|
'ψ' => chr(968), |
171
|
|
|
|
|
|
|
'ω' => chr(969), |
172
|
|
|
|
|
|
|
'ϑ' => chr(977), |
173
|
|
|
|
|
|
|
'ϒ' => chr(978), |
174
|
|
|
|
|
|
|
'ϖ' => chr(982), |
175
|
|
|
|
|
|
|
' ' => chr(8194), |
176
|
|
|
|
|
|
|
' ' => chr(8195), |
177
|
|
|
|
|
|
|
' ' => chr(8201), |
178
|
|
|
|
|
|
|
'' => chr(8204), |
179
|
|
|
|
|
|
|
'' => chr(8205), |
180
|
|
|
|
|
|
|
'' => chr(8206), |
181
|
|
|
|
|
|
|
'' => chr(8207), |
182
|
|
|
|
|
|
|
'–' => chr(8211), |
183
|
|
|
|
|
|
|
'—' => chr(8212), |
184
|
|
|
|
|
|
|
'‘' => chr(8216), |
185
|
|
|
|
|
|
|
'’' => chr(8217), |
186
|
|
|
|
|
|
|
'‚' => chr(8218), |
187
|
|
|
|
|
|
|
'“' => chr(8220), |
188
|
|
|
|
|
|
|
'”' => chr(8221), |
189
|
|
|
|
|
|
|
'„' => chr(8222), |
190
|
|
|
|
|
|
|
'†' => chr(8224), |
191
|
|
|
|
|
|
|
'‡' => chr(8225), |
192
|
|
|
|
|
|
|
'•' => chr(8226), |
193
|
|
|
|
|
|
|
'…' => chr(8230), |
194
|
|
|
|
|
|
|
'‰' => chr(8240), |
195
|
|
|
|
|
|
|
'′' => chr(8242), |
196
|
|
|
|
|
|
|
'″' => chr(8243), |
197
|
|
|
|
|
|
|
'‹' => chr(8249), |
198
|
|
|
|
|
|
|
'›' => chr(8250), |
199
|
|
|
|
|
|
|
'‾' => chr(8254), |
200
|
|
|
|
|
|
|
'⁄' => chr(8260), |
201
|
|
|
|
|
|
|
'€' => chr(8364), |
202
|
|
|
|
|
|
|
'ℑ' => chr(8465), |
203
|
|
|
|
|
|
|
'℘' => chr(8472), |
204
|
|
|
|
|
|
|
'ℜ' => chr(8476), |
205
|
|
|
|
|
|
|
'™' => chr(8482), |
206
|
|
|
|
|
|
|
'ℵ' => chr(8501), |
207
|
|
|
|
|
|
|
'←' => chr(8592), |
208
|
|
|
|
|
|
|
'↑' => chr(8593), |
209
|
|
|
|
|
|
|
'→' => chr(8594), |
210
|
|
|
|
|
|
|
'↓' => chr(8595), |
211
|
|
|
|
|
|
|
'↔' => chr(8596), |
212
|
|
|
|
|
|
|
'↵' => chr(8629), |
213
|
|
|
|
|
|
|
'⇐' => chr(8656), |
214
|
|
|
|
|
|
|
'⇑' => chr(8657), |
215
|
|
|
|
|
|
|
'⇒' => chr(8658), |
216
|
|
|
|
|
|
|
'⇓' => chr(8659), |
217
|
|
|
|
|
|
|
'⇔' => chr(8660), |
218
|
|
|
|
|
|
|
'∀' => chr(8704), |
219
|
|
|
|
|
|
|
'∂' => chr(8706), |
220
|
|
|
|
|
|
|
'∃' => chr(8707), |
221
|
|
|
|
|
|
|
'∅' => chr(8709), |
222
|
|
|
|
|
|
|
'∇' => chr(8711), |
223
|
|
|
|
|
|
|
'∈' => chr(8712), |
224
|
|
|
|
|
|
|
'∉' => chr(8713), |
225
|
|
|
|
|
|
|
'∋' => chr(8715), |
226
|
|
|
|
|
|
|
'∏' => chr(8719), |
227
|
|
|
|
|
|
|
'∑' => chr(8721), |
228
|
|
|
|
|
|
|
'−' => chr(8722), |
229
|
|
|
|
|
|
|
'∗' => chr(8727), |
230
|
|
|
|
|
|
|
'√' => chr(8730), |
231
|
|
|
|
|
|
|
'∝' => chr(8733), |
232
|
|
|
|
|
|
|
'∞' => chr(8734), |
233
|
|
|
|
|
|
|
'∠' => chr(8736), |
234
|
|
|
|
|
|
|
'∧' => chr(8743), |
235
|
|
|
|
|
|
|
'∨' => chr(8744), |
236
|
|
|
|
|
|
|
'∩' => chr(8745), |
237
|
|
|
|
|
|
|
'∪' => chr(8746), |
238
|
|
|
|
|
|
|
'∫' => chr(8747), |
239
|
|
|
|
|
|
|
'∴' => chr(8756), |
240
|
|
|
|
|
|
|
'∼' => chr(8764), |
241
|
|
|
|
|
|
|
'≅' => chr(8773), |
242
|
|
|
|
|
|
|
'≈' => chr(8776), |
243
|
|
|
|
|
|
|
'≠' => chr(8800), |
244
|
|
|
|
|
|
|
'≡' => chr(8801), |
245
|
|
|
|
|
|
|
'≤' => chr(8804), |
246
|
|
|
|
|
|
|
'≥' => chr(8805), |
247
|
|
|
|
|
|
|
'⊂' => chr(8834), |
248
|
|
|
|
|
|
|
'⊃' => chr(8835), |
249
|
|
|
|
|
|
|
'⊄' => chr(8836), |
250
|
|
|
|
|
|
|
'⊆' => chr(8838), |
251
|
|
|
|
|
|
|
'⊇' => chr(8839), |
252
|
|
|
|
|
|
|
'⊕' => chr(8853), |
253
|
|
|
|
|
|
|
'⊗' => chr(8855), |
254
|
|
|
|
|
|
|
'⊥' => chr(8869), |
255
|
|
|
|
|
|
|
'⋅' => chr(8901), |
256
|
|
|
|
|
|
|
'⌈' => chr(8968), |
257
|
|
|
|
|
|
|
'⌉' => chr(8969), |
258
|
|
|
|
|
|
|
'⌊' => chr(8970), |
259
|
|
|
|
|
|
|
'⌋' => chr(8971), |
260
|
|
|
|
|
|
|
'〈' => chr(9001), |
261
|
|
|
|
|
|
|
'〉' => chr(9002), |
262
|
|
|
|
|
|
|
'◊' => chr(9674), |
263
|
|
|
|
|
|
|
'♠' => chr(9824), |
264
|
|
|
|
|
|
|
'♣' => chr(9827), |
265
|
|
|
|
|
|
|
'♥' => chr(9829), |
266
|
|
|
|
|
|
|
'♦' => chr(9830), |
267
|
|
|
|
|
|
|
); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
# Make the opposite mapping |
270
|
|
|
|
|
|
|
my %char2entity = map { $entity2char{$_} => $_ } keys %entity2char; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# Fill in missing entities |
273
|
|
|
|
|
|
|
#for (0 .. 255) { |
274
|
|
|
|
|
|
|
# next if exists $char2entity{chr($_)}; |
275
|
|
|
|
|
|
|
# $char2entity{chr($_)} = "$_;"; |
276
|
|
|
|
|
|
|
#} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub encode_entities { |
279
|
0
|
|
|
0
|
1
|
|
my ( $string, $chars ) = @_; |
280
|
0
|
|
|
|
|
|
my %escape; |
281
|
0
|
0
|
|
|
|
|
if ( $chars ) { |
282
|
0
|
|
|
|
|
|
%escape = map { $_ => 1 } split //, $chars; |
|
0
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
else { |
285
|
0
|
|
|
|
|
|
%escape = map { $_ => 1 } keys %char2entity; |
|
0
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
} |
287
|
0
|
|
|
|
|
|
my @string = split //, $string; |
288
|
0
|
|
|
|
|
|
for my $i ( 0 .. $#string ) { |
289
|
0
|
|
|
|
|
|
my $c = $string[$i]; |
290
|
0
|
0
|
0
|
|
|
|
if ( $escape{$c} and $c ne '&' and $c ne ';' ) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
291
|
0
|
|
|
|
|
|
$string[$i] = $char2entity{$c}; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
elsif ( $escape{$c} and $c eq '&' ) { |
294
|
0
|
|
|
|
|
|
my $maybe_entity = ''; |
295
|
0
|
|
|
|
|
|
FIND_SEMI: for my $j ( $i .. $#string ) { |
296
|
0
|
|
|
|
|
|
$maybe_entity .= $string[$j]; |
297
|
0
|
0
|
|
|
|
|
last FIND_SEMI if $string[$j] eq ';'; |
298
|
|
|
|
|
|
|
} |
299
|
0
|
0
|
|
|
|
|
if ( not exists $entity2char{$maybe_entity} ) { |
300
|
0
|
|
|
|
|
|
$string[$i] = $char2entity{$c}; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
elsif( $escape{$c} and $c eq ';' ) { |
304
|
0
|
|
|
|
|
|
my $maybe_entity = ''; |
305
|
0
|
|
|
|
|
|
FIND_AMP: for ( my $j = $i; $j >= 0; $j-- ) { |
306
|
0
|
|
|
|
|
|
$maybe_entity = $string[$j] . $maybe_entity; |
307
|
0
|
0
|
|
|
|
|
last FIND_SEMI if $string[$j] eq '&'; |
308
|
|
|
|
|
|
|
} |
309
|
0
|
0
|
|
|
|
|
if ( not exists $entity2char{$maybe_entity} ) { |
310
|
0
|
|
|
|
|
|
$string[$i] = $char2entity{$c}; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
} |
314
|
0
|
|
|
|
|
|
return join '', @string; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub decode_entities { |
318
|
0
|
|
|
0
|
1
|
|
my @results; |
319
|
0
|
|
|
|
|
|
for my $string ( @_ ) { |
320
|
0
|
|
|
|
|
|
my @string = split //, $string; |
321
|
0
|
|
|
|
|
|
for my $i ( 0 .. $#string ) { |
322
|
0
|
|
|
|
|
|
my $c = $string[$i]; |
323
|
0
|
0
|
|
|
|
|
if ( $c eq '&' ) { |
324
|
0
|
|
|
|
|
|
my $maybe_entity = ''; |
325
|
0
|
|
|
|
|
|
my $length = 0; |
326
|
0
|
|
|
|
|
|
FIND_SEMI: for my $j ( $i .. $#string ) { |
327
|
0
|
|
|
|
|
|
$maybe_entity .= $string[$j]; |
328
|
0
|
0
|
|
|
|
|
last FIND_SEMI if $string[$j] eq ';'; |
329
|
0
|
|
|
|
|
|
$length++; |
330
|
|
|
|
|
|
|
} |
331
|
0
|
0
|
|
|
|
|
if ( exists $entity2char{$maybe_entity} ) { |
332
|
0
|
|
|
|
|
|
$string[$i] = $entity2char{$maybe_entity}; |
333
|
0
|
|
|
|
|
|
splice( @string, $i + 1, $length ); |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
} |
337
|
0
|
|
|
|
|
|
push @results, join '', @string; |
338
|
|
|
|
|
|
|
} |
339
|
0
|
0
|
|
|
|
|
return wantarray ? @results : $results[0]; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
1; |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
__END__ |