line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Phylo::NeXML::Entities; |
2
|
51
|
|
|
51
|
|
282
|
use strict; |
|
51
|
|
|
|
|
84
|
|
|
51
|
|
|
|
|
1186
|
|
3
|
51
|
|
|
51
|
|
252
|
use warnings; |
|
51
|
|
|
|
|
96
|
|
|
51
|
|
|
|
|
1060
|
|
4
|
51
|
|
|
51
|
|
213
|
use base 'Exporter'; |
|
51
|
|
|
|
|
87
|
|
|
51
|
|
|
|
|
66100
|
|
5
|
|
|
|
|
|
|
our @EXPORT_OK = qw'encode_entities decode_entities'; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
my %entity2char = ( |
8
|
|
|
|
|
|
|
# Some normal chars that have special meaning in SGML context |
9
|
|
|
|
|
|
|
'&' => '&', # ampersand |
10
|
|
|
|
|
|
|
'>' => '>', # greater than |
11
|
|
|
|
|
|
|
'<' => '<', # less than |
12
|
|
|
|
|
|
|
'"' => '"', # double quote |
13
|
|
|
|
|
|
|
''' => "'", # single quote |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML |
16
|
|
|
|
|
|
|
'Æ' => chr(198), # capital AE diphthong (ligature) |
17
|
|
|
|
|
|
|
'Á' => chr(193), # capital A, acute accent |
18
|
|
|
|
|
|
|
'Â' => chr(194), # capital A, circumflex accent |
19
|
|
|
|
|
|
|
'À' => chr(192), # capital A, grave accent |
20
|
|
|
|
|
|
|
'Å' => chr(197), # capital A, ring |
21
|
|
|
|
|
|
|
'Ã' => chr(195), # capital A, tilde |
22
|
|
|
|
|
|
|
'Ä' => chr(196), # capital A, dieresis or umlaut mark |
23
|
|
|
|
|
|
|
'Ç' => chr(199), # capital C, cedilla |
24
|
|
|
|
|
|
|
'Ð' => chr(208), # capital Eth, Icelandic |
25
|
|
|
|
|
|
|
'É' => chr(201), # capital E, acute accent |
26
|
|
|
|
|
|
|
'Ê' => chr(202), # capital E, circumflex accent |
27
|
|
|
|
|
|
|
'È' => chr(200), # capital E, grave accent |
28
|
|
|
|
|
|
|
'Ë' => chr(203), # capital E, dieresis or umlaut mark |
29
|
|
|
|
|
|
|
'Í' => chr(205), # capital I, acute accent |
30
|
|
|
|
|
|
|
'Î' => chr(206), # capital I, circumflex accent |
31
|
|
|
|
|
|
|
'Ì' => chr(204), # capital I, grave accent |
32
|
|
|
|
|
|
|
'Ï' => chr(207), # capital I, dieresis or umlaut mark |
33
|
|
|
|
|
|
|
'Ñ' => chr(209), # capital N, tilde |
34
|
|
|
|
|
|
|
'Ó' => chr(211), # capital O, acute accent |
35
|
|
|
|
|
|
|
'Ô' => chr(212), # capital O, circumflex accent |
36
|
|
|
|
|
|
|
'Ò' => chr(210), # capital O, grave accent |
37
|
|
|
|
|
|
|
'Ø' => chr(216), # capital O, slash |
38
|
|
|
|
|
|
|
'Õ' => chr(213), # capital O, tilde |
39
|
|
|
|
|
|
|
'Ö' => chr(214), # capital O, dieresis or umlaut mark |
40
|
|
|
|
|
|
|
'Þ' => chr(222), # capital THORN, Icelandic |
41
|
|
|
|
|
|
|
'Ú' => chr(218), # capital U, acute accent |
42
|
|
|
|
|
|
|
'Û' => chr(219), # capital U, circumflex accent |
43
|
|
|
|
|
|
|
'Ù' => chr(217), # capital U, grave accent |
44
|
|
|
|
|
|
|
'Ü' => chr(220), # capital U, dieresis or umlaut mark |
45
|
|
|
|
|
|
|
'Ý' => chr(221), # capital Y, acute accent |
46
|
|
|
|
|
|
|
'á' => chr(225), # small a, acute accent |
47
|
|
|
|
|
|
|
'â' => chr(226), # small a, circumflex accent |
48
|
|
|
|
|
|
|
'æ' => chr(230), # small ae diphthong (ligature) |
49
|
|
|
|
|
|
|
'à' => chr(224), # small a, grave accent |
50
|
|
|
|
|
|
|
'å' => chr(229), # small a, ring |
51
|
|
|
|
|
|
|
'ã' => chr(227), # small a, tilde |
52
|
|
|
|
|
|
|
'ä' => chr(228), # small a, dieresis or umlaut mark |
53
|
|
|
|
|
|
|
'ç' => chr(231), # small c, cedilla |
54
|
|
|
|
|
|
|
'é' => chr(233), # small e, acute accent |
55
|
|
|
|
|
|
|
'ê' => chr(234), # small e, circumflex accent |
56
|
|
|
|
|
|
|
'è' => chr(232), # small e, grave accent |
57
|
|
|
|
|
|
|
'ð' => chr(240), # small eth, Icelandic |
58
|
|
|
|
|
|
|
'ë' => chr(235), # small e, dieresis or umlaut mark |
59
|
|
|
|
|
|
|
'í' => chr(237), # small i, acute accent |
60
|
|
|
|
|
|
|
'î' => chr(238), # small i, circumflex accent |
61
|
|
|
|
|
|
|
'ì' => chr(236), # small i, grave accent |
62
|
|
|
|
|
|
|
'ï' => chr(239), # small i, dieresis or umlaut mark |
63
|
|
|
|
|
|
|
'ñ' => chr(241), # small n, tilde |
64
|
|
|
|
|
|
|
'ó' => chr(243), # small o, acute accent |
65
|
|
|
|
|
|
|
'ô' => chr(244), # small o, circumflex accent |
66
|
|
|
|
|
|
|
'ò' => chr(242), # small o, grave accent |
67
|
|
|
|
|
|
|
'ø' => chr(248), # small o, slash |
68
|
|
|
|
|
|
|
'õ' => chr(245), # small o, tilde |
69
|
|
|
|
|
|
|
'ö' => chr(246), # small o, dieresis or umlaut mark |
70
|
|
|
|
|
|
|
'ß' => chr(223), # small sharp s, German (sz ligature) |
71
|
|
|
|
|
|
|
'þ' => chr(254), # small thorn, Icelandic |
72
|
|
|
|
|
|
|
'ú' => chr(250), # small u, acute accent |
73
|
|
|
|
|
|
|
'û' => chr(251), # small u, circumflex accent |
74
|
|
|
|
|
|
|
'ù' => chr(249), # small u, grave accent |
75
|
|
|
|
|
|
|
'ü' => chr(252), # small u, dieresis or umlaut mark |
76
|
|
|
|
|
|
|
'ý' => chr(253), # small y, acute accent |
77
|
|
|
|
|
|
|
'ÿ' => chr(255), # small y, dieresis or umlaut mark |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96) |
80
|
|
|
|
|
|
|
'©' => chr(169), # copyright sign |
81
|
|
|
|
|
|
|
'®' => chr(174), # registered sign |
82
|
|
|
|
|
|
|
' ' => chr(160), # non breaking space |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# Additional ISO-8859/1 entities listed in rfc1866 (section 14) |
85
|
|
|
|
|
|
|
'¡' => chr(161), |
86
|
|
|
|
|
|
|
'¢' => chr(162), |
87
|
|
|
|
|
|
|
'£' => chr(163), |
88
|
|
|
|
|
|
|
'¤' => chr(164), |
89
|
|
|
|
|
|
|
'¥' => chr(165), |
90
|
|
|
|
|
|
|
'¦' => chr(166), |
91
|
|
|
|
|
|
|
'§' => chr(167), |
92
|
|
|
|
|
|
|
'¨' => chr(168), |
93
|
|
|
|
|
|
|
'ª' => chr(170), |
94
|
|
|
|
|
|
|
'«' => chr(171), |
95
|
|
|
|
|
|
|
'¬' => chr(172), |
96
|
|
|
|
|
|
|
'­' => chr(173), |
97
|
|
|
|
|
|
|
'¯' => chr(175), |
98
|
|
|
|
|
|
|
'°' => chr(176), |
99
|
|
|
|
|
|
|
'±' => chr(177), |
100
|
|
|
|
|
|
|
'¹' => chr(185), |
101
|
|
|
|
|
|
|
'²' => chr(178), |
102
|
|
|
|
|
|
|
'³' => chr(179), |
103
|
|
|
|
|
|
|
'´' => chr(180), |
104
|
|
|
|
|
|
|
'µ' => chr(181), |
105
|
|
|
|
|
|
|
'¶' => chr(182), |
106
|
|
|
|
|
|
|
'·' => chr(183), |
107
|
|
|
|
|
|
|
'¸' => chr(184), |
108
|
|
|
|
|
|
|
'º' => chr(186), |
109
|
|
|
|
|
|
|
'»' => chr(187), |
110
|
|
|
|
|
|
|
'¼' => chr(188), |
111
|
|
|
|
|
|
|
'½' => chr(189), |
112
|
|
|
|
|
|
|
'¾' => chr(190), |
113
|
|
|
|
|
|
|
'¿' => chr(191), |
114
|
|
|
|
|
|
|
'×' => chr(215), |
115
|
|
|
|
|
|
|
'÷' => chr(247), |
116
|
|
|
|
|
|
|
'Œ' => chr(338), |
117
|
|
|
|
|
|
|
'œ' => chr(339), |
118
|
|
|
|
|
|
|
'Š' => chr(352), |
119
|
|
|
|
|
|
|
'š' => chr(353), |
120
|
|
|
|
|
|
|
'Ÿ' => chr(376), |
121
|
|
|
|
|
|
|
'ƒ' => chr(402), |
122
|
|
|
|
|
|
|
'ˆ' => chr(710), |
123
|
|
|
|
|
|
|
'˜' => chr(732), |
124
|
|
|
|
|
|
|
'Α' => chr(913), |
125
|
|
|
|
|
|
|
'Β' => chr(914), |
126
|
|
|
|
|
|
|
'Γ' => chr(915), |
127
|
|
|
|
|
|
|
'Δ' => chr(916), |
128
|
|
|
|
|
|
|
'Ε' => chr(917), |
129
|
|
|
|
|
|
|
'Ζ' => chr(918), |
130
|
|
|
|
|
|
|
'Η' => chr(919), |
131
|
|
|
|
|
|
|
'Θ' => chr(920), |
132
|
|
|
|
|
|
|
'Ι' => chr(921), |
133
|
|
|
|
|
|
|
'Κ' => chr(922), |
134
|
|
|
|
|
|
|
'Λ' => chr(923), |
135
|
|
|
|
|
|
|
'Μ' => chr(924), |
136
|
|
|
|
|
|
|
'Ν' => chr(925), |
137
|
|
|
|
|
|
|
'Ξ' => chr(926), |
138
|
|
|
|
|
|
|
'Ο' => chr(927), |
139
|
|
|
|
|
|
|
'Π' => chr(928), |
140
|
|
|
|
|
|
|
'Ρ' => chr(929), |
141
|
|
|
|
|
|
|
'Σ' => chr(931), |
142
|
|
|
|
|
|
|
'Τ' => chr(932), |
143
|
|
|
|
|
|
|
'Υ' => chr(933), |
144
|
|
|
|
|
|
|
'Φ' => chr(934), |
145
|
|
|
|
|
|
|
'Χ' => chr(935), |
146
|
|
|
|
|
|
|
'Ψ' => chr(936), |
147
|
|
|
|
|
|
|
'Ω' => chr(937), |
148
|
|
|
|
|
|
|
'α' => chr(945), |
149
|
|
|
|
|
|
|
'β' => chr(946), |
150
|
|
|
|
|
|
|
'γ' => chr(947), |
151
|
|
|
|
|
|
|
'δ' => chr(948), |
152
|
|
|
|
|
|
|
'ε' => chr(949), |
153
|
|
|
|
|
|
|
'ζ' => chr(950), |
154
|
|
|
|
|
|
|
'η' => chr(951), |
155
|
|
|
|
|
|
|
'θ' => chr(952), |
156
|
|
|
|
|
|
|
'ι' => chr(953), |
157
|
|
|
|
|
|
|
'κ' => chr(954), |
158
|
|
|
|
|
|
|
'λ' => chr(955), |
159
|
|
|
|
|
|
|
'μ' => chr(956), |
160
|
|
|
|
|
|
|
'ν' => chr(957), |
161
|
|
|
|
|
|
|
'ξ' => chr(958), |
162
|
|
|
|
|
|
|
'ο' => chr(959), |
163
|
|
|
|
|
|
|
'π' => chr(960), |
164
|
|
|
|
|
|
|
'ρ' => chr(961), |
165
|
|
|
|
|
|
|
'ς' => chr(962), |
166
|
|
|
|
|
|
|
'σ' => chr(963), |
167
|
|
|
|
|
|
|
'τ' => chr(964), |
168
|
|
|
|
|
|
|
'υ' => chr(965), |
169
|
|
|
|
|
|
|
'φ' => chr(966), |
170
|
|
|
|
|
|
|
'χ' => chr(967), |
171
|
|
|
|
|
|
|
'ψ' => chr(968), |
172
|
|
|
|
|
|
|
'ω' => chr(969), |
173
|
|
|
|
|
|
|
'ϑ' => chr(977), |
174
|
|
|
|
|
|
|
'ϒ' => chr(978), |
175
|
|
|
|
|
|
|
'ϖ' => chr(982), |
176
|
|
|
|
|
|
|
' ' => chr(8194), |
177
|
|
|
|
|
|
|
' ' => chr(8195), |
178
|
|
|
|
|
|
|
' ' => chr(8201), |
179
|
|
|
|
|
|
|
'‌' => chr(8204), |
180
|
|
|
|
|
|
|
'‍' => chr(8205), |
181
|
|
|
|
|
|
|
'‎' => chr(8206), |
182
|
|
|
|
|
|
|
'‏' => chr(8207), |
183
|
|
|
|
|
|
|
'–' => chr(8211), |
184
|
|
|
|
|
|
|
'—' => chr(8212), |
185
|
|
|
|
|
|
|
'‘' => chr(8216), |
186
|
|
|
|
|
|
|
'’' => chr(8217), |
187
|
|
|
|
|
|
|
'‚' => chr(8218), |
188
|
|
|
|
|
|
|
'“' => chr(8220), |
189
|
|
|
|
|
|
|
'”' => chr(8221), |
190
|
|
|
|
|
|
|
'„' => chr(8222), |
191
|
|
|
|
|
|
|
'†' => chr(8224), |
192
|
|
|
|
|
|
|
'‡' => chr(8225), |
193
|
|
|
|
|
|
|
'•' => chr(8226), |
194
|
|
|
|
|
|
|
'…' => chr(8230), |
195
|
|
|
|
|
|
|
'‰' => chr(8240), |
196
|
|
|
|
|
|
|
'′' => chr(8242), |
197
|
|
|
|
|
|
|
'″' => chr(8243), |
198
|
|
|
|
|
|
|
'‹' => chr(8249), |
199
|
|
|
|
|
|
|
'›' => chr(8250), |
200
|
|
|
|
|
|
|
'‾' => chr(8254), |
201
|
|
|
|
|
|
|
'⁄' => chr(8260), |
202
|
|
|
|
|
|
|
'€' => chr(8364), |
203
|
|
|
|
|
|
|
'ℑ' => chr(8465), |
204
|
|
|
|
|
|
|
'℘' => chr(8472), |
205
|
|
|
|
|
|
|
'ℜ' => chr(8476), |
206
|
|
|
|
|
|
|
'™' => chr(8482), |
207
|
|
|
|
|
|
|
'ℵ' => chr(8501), |
208
|
|
|
|
|
|
|
'←' => chr(8592), |
209
|
|
|
|
|
|
|
'↑' => chr(8593), |
210
|
|
|
|
|
|
|
'→' => chr(8594), |
211
|
|
|
|
|
|
|
'↓' => chr(8595), |
212
|
|
|
|
|
|
|
'↔' => chr(8596), |
213
|
|
|
|
|
|
|
'↵' => chr(8629), |
214
|
|
|
|
|
|
|
'⇐' => chr(8656), |
215
|
|
|
|
|
|
|
'⇑' => chr(8657), |
216
|
|
|
|
|
|
|
'⇒' => chr(8658), |
217
|
|
|
|
|
|
|
'⇓' => chr(8659), |
218
|
|
|
|
|
|
|
'⇔' => chr(8660), |
219
|
|
|
|
|
|
|
'∀' => chr(8704), |
220
|
|
|
|
|
|
|
'∂' => chr(8706), |
221
|
|
|
|
|
|
|
'∃' => chr(8707), |
222
|
|
|
|
|
|
|
'∅' => chr(8709), |
223
|
|
|
|
|
|
|
'∇' => chr(8711), |
224
|
|
|
|
|
|
|
'∈' => chr(8712), |
225
|
|
|
|
|
|
|
'∉' => chr(8713), |
226
|
|
|
|
|
|
|
'∋' => chr(8715), |
227
|
|
|
|
|
|
|
'∏' => chr(8719), |
228
|
|
|
|
|
|
|
'∑' => chr(8721), |
229
|
|
|
|
|
|
|
'−' => chr(8722), |
230
|
|
|
|
|
|
|
'∗' => chr(8727), |
231
|
|
|
|
|
|
|
'√' => chr(8730), |
232
|
|
|
|
|
|
|
'∝' => chr(8733), |
233
|
|
|
|
|
|
|
'∞' => chr(8734), |
234
|
|
|
|
|
|
|
'∠' => chr(8736), |
235
|
|
|
|
|
|
|
'∧' => chr(8743), |
236
|
|
|
|
|
|
|
'∨' => chr(8744), |
237
|
|
|
|
|
|
|
'∩' => chr(8745), |
238
|
|
|
|
|
|
|
'∪' => chr(8746), |
239
|
|
|
|
|
|
|
'∫' => chr(8747), |
240
|
|
|
|
|
|
|
'∴' => chr(8756), |
241
|
|
|
|
|
|
|
'∼' => chr(8764), |
242
|
|
|
|
|
|
|
'≅' => chr(8773), |
243
|
|
|
|
|
|
|
'≈' => chr(8776), |
244
|
|
|
|
|
|
|
'≠' => chr(8800), |
245
|
|
|
|
|
|
|
'≡' => chr(8801), |
246
|
|
|
|
|
|
|
'≤' => chr(8804), |
247
|
|
|
|
|
|
|
'≥' => chr(8805), |
248
|
|
|
|
|
|
|
'⊂' => chr(8834), |
249
|
|
|
|
|
|
|
'⊃' => chr(8835), |
250
|
|
|
|
|
|
|
'⊄' => chr(8836), |
251
|
|
|
|
|
|
|
'⊆' => chr(8838), |
252
|
|
|
|
|
|
|
'⊇' => chr(8839), |
253
|
|
|
|
|
|
|
'⊕' => chr(8853), |
254
|
|
|
|
|
|
|
'⊗' => chr(8855), |
255
|
|
|
|
|
|
|
'⊥' => chr(8869), |
256
|
|
|
|
|
|
|
'⋅' => chr(8901), |
257
|
|
|
|
|
|
|
'⌈' => chr(8968), |
258
|
|
|
|
|
|
|
'⌉' => chr(8969), |
259
|
|
|
|
|
|
|
'⌊' => chr(8970), |
260
|
|
|
|
|
|
|
'⌋' => chr(8971), |
261
|
|
|
|
|
|
|
'〈' => chr(9001), |
262
|
|
|
|
|
|
|
'〉' => chr(9002), |
263
|
|
|
|
|
|
|
'◊' => chr(9674), |
264
|
|
|
|
|
|
|
'♠' => chr(9824), |
265
|
|
|
|
|
|
|
'♣' => chr(9827), |
266
|
|
|
|
|
|
|
'♥' => chr(9829), |
267
|
|
|
|
|
|
|
'♦' => chr(9830), |
268
|
|
|
|
|
|
|
); |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# Make the opposite mapping |
271
|
|
|
|
|
|
|
my %char2entity = map { $entity2char{$_} => $_ } keys %entity2char; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# Fill in missing entities |
274
|
|
|
|
|
|
|
#for (0 .. 255) { |
275
|
|
|
|
|
|
|
# next if exists $char2entity{chr($_)}; |
276
|
|
|
|
|
|
|
# $char2entity{chr($_)} = "&#$_;"; |
277
|
|
|
|
|
|
|
#} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub encode_entities { |
280
|
0
|
|
|
0
|
1
|
|
my ( $string, $chars ) = @_; |
281
|
0
|
|
|
|
|
|
my %escape; |
282
|
0
|
0
|
|
|
|
|
if ( $chars ) { |
283
|
0
|
|
|
|
|
|
%escape = map { $_ => 1 } split //, $chars; |
|
0
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
else { |
286
|
0
|
|
|
|
|
|
%escape = map { $_ => 1 } keys %char2entity; |
|
0
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
} |
288
|
0
|
|
|
|
|
|
my @string = split //, $string; |
289
|
0
|
|
|
|
|
|
for my $i ( 0 .. $#string ) { |
290
|
0
|
|
|
|
|
|
my $c = $string[$i]; |
291
|
0
|
0
|
0
|
|
|
|
if ( $escape{$c} and $c ne '&' and $c ne ';' ) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
292
|
0
|
|
|
|
|
|
$string[$i] = $char2entity{$c}; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
elsif ( $escape{$c} and $c eq '&' ) { |
295
|
0
|
|
|
|
|
|
my $maybe_entity = ''; |
296
|
0
|
|
|
|
|
|
FIND_SEMI: for my $j ( $i .. $#string ) { |
297
|
0
|
|
|
|
|
|
$maybe_entity .= $string[$j]; |
298
|
0
|
0
|
|
|
|
|
last FIND_SEMI if $string[$j] eq ';'; |
299
|
|
|
|
|
|
|
} |
300
|
0
|
0
|
|
|
|
|
if ( not exists $entity2char{$maybe_entity} ) { |
301
|
0
|
|
|
|
|
|
$string[$i] = $char2entity{$c}; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
elsif( $escape{$c} and $c eq ';' ) { |
305
|
0
|
|
|
|
|
|
my $maybe_entity = ''; |
306
|
0
|
|
|
|
|
|
FIND_AMP: for ( my $j = $i; $j >= 0; $j-- ) { |
307
|
0
|
|
|
|
|
|
$maybe_entity = $string[$j] . $maybe_entity; |
308
|
0
|
0
|
|
|
|
|
last FIND_SEMI if $string[$j] eq '&'; |
309
|
|
|
|
|
|
|
} |
310
|
0
|
0
|
|
|
|
|
if ( not exists $entity2char{$maybe_entity} ) { |
311
|
0
|
|
|
|
|
|
$string[$i] = $char2entity{$c}; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
} |
315
|
0
|
|
|
|
|
|
return join '', @string; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub decode_entities { |
319
|
0
|
|
|
0
|
1
|
|
my @results; |
320
|
0
|
|
|
|
|
|
for my $string ( @_ ) { |
321
|
0
|
|
|
|
|
|
my @string = split //, $string; |
322
|
0
|
|
|
|
|
|
for my $i ( 0 .. $#string ) { |
323
|
0
|
|
|
|
|
|
my $c = $string[$i]; |
324
|
0
|
0
|
|
|
|
|
if ( $c eq '&' ) { |
325
|
0
|
|
|
|
|
|
my $maybe_entity = ''; |
326
|
0
|
|
|
|
|
|
my $length = 0; |
327
|
0
|
|
|
|
|
|
FIND_SEMI: for my $j ( $i .. $#string ) { |
328
|
0
|
|
|
|
|
|
$maybe_entity .= $string[$j]; |
329
|
0
|
0
|
|
|
|
|
last FIND_SEMI if $string[$j] eq ';'; |
330
|
0
|
|
|
|
|
|
$length++; |
331
|
|
|
|
|
|
|
} |
332
|
0
|
0
|
|
|
|
|
if ( exists $entity2char{$maybe_entity} ) { |
333
|
0
|
|
|
|
|
|
$string[$i] = $entity2char{$maybe_entity}; |
334
|
0
|
|
|
|
|
|
splice( @string, $i + 1, $length ); |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
} |
338
|
0
|
|
|
|
|
|
push @results, join '', @string; |
339
|
|
|
|
|
|
|
} |
340
|
0
|
0
|
|
|
|
|
return wantarray ? @results : $results[0]; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
1; |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
__END__ |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=head1 NAME |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
Bio::Phylo::NeXML::Entities - Functions for dealing with XML entities |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=head1 DESCRIPTION |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
This package provides subroutines for dealing with characters that need to be |
354
|
|
|
|
|
|
|
encoded as XML entities, and decoded in other formats. For example: C<&> needs |
355
|
|
|
|
|
|
|
to be encoded as C<&> in XML. The subroutines have the same signatures and |
356
|
|
|
|
|
|
|
the same names as those in the commonly-used module L<HTML::Entities>. They are |
357
|
|
|
|
|
|
|
re-implemented here to avoid introducing dependencies. |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=head1 SUBROUTINES |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
The following subroutines are utility functions that can be imported using: |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
use Bio::Phylo::NeXML::Entities '/entities/'; |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=over |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=item encode_entities |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Encodes problematic characters as XML entities |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Type : Utility function |
372
|
|
|
|
|
|
|
Title : encode_entities |
373
|
|
|
|
|
|
|
Usage : my $encoded = encode_entities('string with & or >','>&') |
374
|
|
|
|
|
|
|
Function: Encodes entities in first argument string |
375
|
|
|
|
|
|
|
Returns : Modified string |
376
|
|
|
|
|
|
|
Args : Required, first argument: a string to encode |
377
|
|
|
|
|
|
|
Optional, second argument: a string that specifies |
378
|
|
|
|
|
|
|
which characters to encode |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=item decode_entities |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Decodes XML entities into the characters they code for |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
Type : Utility function |
385
|
|
|
|
|
|
|
Title : decode_entities |
386
|
|
|
|
|
|
|
Usage : my $decoded = decode_entities('string with & or >') |
387
|
|
|
|
|
|
|
Function: decodes encoded entities in argument string(s) |
388
|
|
|
|
|
|
|
Returns : Array of decoded strings |
389
|
|
|
|
|
|
|
Args : One or more encoded strings |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=back |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head1 SEE ALSO |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo> |
396
|
|
|
|
|
|
|
for any user or developer questions and discussions. |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=over |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=item L<Bio::Phylo::Manual> |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=back |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=head1 CITATION |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
If you use Bio::Phylo in published research, please cite it: |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen> |
411
|
|
|
|
|
|
|
and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl. |
412
|
|
|
|
|
|
|
I<BMC Bioinformatics> B<12>:63. |
413
|
|
|
|
|
|
|
L<http://dx.doi.org/10.1186/1471-2105-12-63> |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=cut |
418
|
|
|
|
|
|
|
|