| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
3
|
|
|
|
|
|
|
#include "perl.h" |
|
4
|
|
|
|
|
|
|
#include "XSUB.h" |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#include "ppport.h" |
|
7
|
|
|
|
|
|
|
#include "GenerateFunctions.h" |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
int GF_paranoia = 0; |
|
10
|
|
|
|
|
|
|
|
|
11
|
9292
|
|
|
|
|
|
SV * GF_escape_html(SV * str, int b_inplace, int b_lftobr, int b_sptonbsp, int b_leaveknown) { |
|
12
|
9292
|
|
|
|
|
|
int i, maxentitylen = 0; |
|
13
|
|
|
|
|
|
|
STRLEN origlen, extrachars; |
|
14
|
|
|
|
|
|
|
char * sp, *newsp, c, lastc; |
|
15
|
|
|
|
|
|
|
SV * newstr; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
/* Get string pointer and length (in bytes) */ |
|
18
|
9292
|
100
|
|
|
|
|
if (b_inplace) { |
|
19
|
18
|
|
|
|
|
|
sp = SvPV_force(str, origlen); |
|
20
|
|
|
|
|
|
|
} else { |
|
21
|
9274
|
|
|
|
|
|
sp = SvPV(str, origlen); |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
/* Calculate extra space required */ |
|
25
|
9292
|
|
|
|
|
|
extrachars = 0; |
|
26
|
9292
|
|
|
|
|
|
c = '\0'; |
|
27
|
159355
|
100
|
|
|
|
|
for (i = 0; i < origlen; i++) { |
|
28
|
|
|
|
|
|
|
/* Need to keep track of previous char for ' ' => ' ' expansion */ |
|
29
|
150063
|
|
|
|
|
|
lastc = c; |
|
30
|
150063
|
|
|
|
|
|
c = sp[i]; |
|
31
|
150063
|
100
|
|
|
|
|
if (c == '<' || c == '>') |
|
|
|
100
|
|
|
|
|
|
|
32
|
14349
|
|
|
|
|
|
extrachars += 3; |
|
33
|
135714
|
100
|
|
|
|
|
else if (c == '&' && (!b_leaveknown || !GF_is_known_entity(sp, i, origlen, &maxentitylen))) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
34
|
7198
|
|
|
|
|
|
extrachars += 4; |
|
35
|
128516
|
100
|
|
|
|
|
else if (c == '"') |
|
36
|
7188
|
|
|
|
|
|
extrachars += 5; |
|
37
|
121328
|
100
|
|
|
|
|
else if (b_lftobr && c == '\n') |
|
|
|
100
|
|
|
|
|
|
|
38
|
756
|
|
|
|
|
|
extrachars += 3; |
|
39
|
120572
|
100
|
|
|
|
|
else if (b_sptonbsp && c == ' ' && lastc == ' ') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
40
|
715
|
|
|
|
|
|
extrachars += 5; |
|
41
|
|
|
|
|
|
|
/* don't pick up immediately again */ |
|
42
|
715
|
|
|
|
|
|
c = '\0'; |
|
43
|
119857
|
50
|
|
|
|
|
} else if (GF_paranoia && (c == '{' || c == '}')) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
44
|
0
|
|
|
|
|
|
extrachars += 5; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
/* Special single space case */ |
|
50
|
9292
|
100
|
|
|
|
|
if (b_sptonbsp && origlen == 1 && sp[0] == ' ') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
51
|
1
|
|
|
|
|
|
extrachars += 5; |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
/* |
|
55
|
|
|
|
|
|
|
* Include maxentitylen in extrachars. Since in the actual substitution |
|
56
|
|
|
|
|
|
|
* phase, we work backwards copying characters towards the end of the |
|
57
|
|
|
|
|
|
|
* string as we go, we might overwrite part of an entity, and then try |
|
58
|
|
|
|
|
|
|
* and call GF_is_known_entity() on the string, which searches forward, |
|
59
|
|
|
|
|
|
|
* and then fails because we already overwrote the entity. So we always |
|
60
|
|
|
|
|
|
|
* make sure we've got maxentitylen extra chars, and then use the perl |
|
61
|
|
|
|
|
|
|
* OOK hack to offset the start of the string at the end |
|
62
|
|
|
|
|
|
|
*/ |
|
63
|
9292
|
100
|
|
|
|
|
if (b_inplace) extrachars += maxentitylen; |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
/* Create new SV, or grow existing SV */ |
|
66
|
9292
|
100
|
|
|
|
|
if (b_inplace) { |
|
67
|
18
|
|
|
|
|
|
newstr = str; |
|
68
|
18
|
50
|
|
|
|
|
SvGROW(newstr, origlen + extrachars + 1); |
|
|
|
100
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
} else { |
|
70
|
9274
|
|
|
|
|
|
newstr = newSV(origlen + extrachars + 1); |
|
71
|
9274
|
|
|
|
|
|
SvPOK_on(newstr); |
|
72
|
|
|
|
|
|
|
/* Make new string UTF-8 if input string was UTF-8 */ |
|
73
|
9274
|
100
|
|
|
|
|
if (SvUTF8(str)) |
|
74
|
2171
|
|
|
|
|
|
SvUTF8_on(newstr); |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
/* Set the length of the string */ |
|
78
|
9292
|
|
|
|
|
|
SvCUR_set(newstr, origlen + extrachars); |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
/* Now do actual replacement (need to work |
|
81
|
|
|
|
|
|
|
backward for inplace change to work */ |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
/* Original string might have moved due to grow */ |
|
84
|
9292
|
|
|
|
|
|
sp = SvPV_nolen(str); |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
/* Null terminate new string */ |
|
87
|
9292
|
|
|
|
|
|
newsp = SvPV_nolen(newstr) + origlen + extrachars; |
|
88
|
9292
|
|
|
|
|
|
*newsp = '\0'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
9292
|
|
|
|
|
|
c = '\0'; |
|
91
|
159355
|
100
|
|
|
|
|
for (i = origlen-1; i >= 0; i--) { |
|
92
|
150063
|
|
|
|
|
|
lastc = c; |
|
93
|
150063
|
|
|
|
|
|
c = sp[i]; |
|
94
|
150063
|
100
|
|
|
|
|
if (c == '<') { |
|
95
|
7152
|
|
|
|
|
|
newsp -= 4; |
|
96
|
7152
|
|
|
|
|
|
memcpy(newsp, "<", 4); |
|
97
|
|
|
|
|
|
|
} |
|
98
|
142911
|
100
|
|
|
|
|
else if (c == '>') { |
|
99
|
7197
|
|
|
|
|
|
newsp -= 4; |
|
100
|
7197
|
|
|
|
|
|
memcpy(newsp, ">", 4); |
|
101
|
|
|
|
|
|
|
} |
|
102
|
135714
|
100
|
|
|
|
|
else if (c == '&' && (!b_leaveknown || !GF_is_known_entity(sp, i, origlen, 0))) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
103
|
7198
|
|
|
|
|
|
newsp -= 5; |
|
104
|
7198
|
|
|
|
|
|
memcpy(newsp, "&", 5); |
|
105
|
|
|
|
|
|
|
} |
|
106
|
128516
|
100
|
|
|
|
|
else if (c == '"') { |
|
107
|
7188
|
|
|
|
|
|
newsp -= 6; |
|
108
|
7188
|
|
|
|
|
|
memcpy(newsp, """, 6); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
121328
|
100
|
|
|
|
|
else if (b_lftobr && c == '\n') { |
|
|
|
100
|
|
|
|
|
|
|
111
|
756
|
|
|
|
|
|
newsp -= 4; |
|
112
|
756
|
|
|
|
|
|
memcpy(newsp, " ", 4); |
|
113
|
|
|
|
|
|
|
} |
|
114
|
120572
|
100
|
|
|
|
|
else if (b_sptonbsp && c == ' ' && lastc == ' ') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
115
|
715
|
|
|
|
|
|
newsp -= 6; |
|
116
|
715
|
|
|
|
|
|
memcpy(newsp, " ", 7); |
|
117
|
|
|
|
|
|
|
/* don't pick up immediately again */ |
|
118
|
715
|
|
|
|
|
|
c = '\0'; |
|
119
|
119857
|
50
|
|
|
|
|
} else if (GF_paranoia && (c == '{' || c == '}')) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
newsp -= 6; |
|
121
|
0
|
0
|
|
|
|
|
memcpy(newsp, c == '{' ? "{" : "}", 6); |
|
122
|
|
|
|
|
|
|
} else |
|
123
|
119857
|
|
|
|
|
|
*--newsp = c; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
/* Special single space case */ |
|
127
|
9292
|
100
|
|
|
|
|
if (b_sptonbsp && origlen == 1 && sp[0] == ' ') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
128
|
1
|
|
|
|
|
|
newsp -= 5; |
|
129
|
1
|
|
|
|
|
|
memcpy(newsp, " ", 6); |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
9292
|
100
|
|
|
|
|
if (b_inplace && maxentitylen) |
|
|
|
100
|
|
|
|
|
|
|
133
|
10
|
|
|
|
|
|
sv_chop(newstr, newsp); |
|
134
|
|
|
|
|
|
|
|
|
135
|
9292
|
50
|
|
|
|
|
if (SvPV_nolen(newstr) != newsp) { |
|
136
|
0
|
|
|
|
|
|
croak("Unexpected length mismatch"); |
|
137
|
|
|
|
|
|
|
return 0; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
9292
|
|
|
|
|
|
return newstr; |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
2041
|
|
|
|
|
|
SV * GF_generate_attributes(HV * attrhv) { |
|
144
|
2041
|
|
|
|
|
|
int i, j, estimatedlen = 1; |
|
145
|
|
|
|
|
|
|
I32 keylen; |
|
146
|
|
|
|
|
|
|
char * key, tmp[64]; |
|
147
|
|
|
|
|
|
|
SV * attrstr, * val; |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
/* Iterate through keys to work out an estimated final length */ |
|
150
|
11078
|
100
|
|
|
|
|
while ((val = hv_iternextsv(attrhv, &key, &keylen))) { |
|
151
|
9037
|
|
|
|
|
|
estimatedlen += keylen + 1; |
|
152
|
9037
|
|
|
|
|
|
estimatedlen += GF_estimate_attribute_value_len(val) + 3; |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
/* warn("estimated len: %d", estimatedlen); */ |
|
156
|
|
|
|
|
|
|
|
|
157
|
2041
|
|
|
|
|
|
attrstr = newSV(estimatedlen); |
|
158
|
2041
|
|
|
|
|
|
SvPOK_on(attrstr); |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
/* Now iteratre and build actual string */ |
|
161
|
2041
|
|
|
|
|
|
hv_iterinit(attrhv); |
|
162
|
13119
|
100
|
|
|
|
|
while ((val = hv_iternextsv(attrhv, &key, &keylen))) { |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
/* Add space to string if already something in it */ |
|
165
|
9037
|
100
|
|
|
|
|
if (SvCUR(attrstr)) |
|
166
|
7192
|
|
|
|
|
|
sv_catpvn(attrstr, " ", 1); |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
/* For key, convert to lower case and add to attrstr */ |
|
169
|
9037
|
50
|
|
|
|
|
if (keylen < 64) { |
|
170
|
|
|
|
|
|
|
/* If key starts with - (eg -width => '10%'), skip - */ |
|
171
|
9037
|
|
|
|
|
|
j = 0; |
|
172
|
9037
|
50
|
|
|
|
|
i = (keylen && key[0] == '-' ? 1 : 0); |
|
|
|
100
|
|
|
|
|
|
|
173
|
62729
|
100
|
|
|
|
|
for (; i < keylen; i++) |
|
174
|
53692
|
100
|
|
|
|
|
tmp[j++] = toLOWER(key[i]); |
|
175
|
9037
|
|
|
|
|
|
sv_catpvn(attrstr, tmp, j); |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
} else { |
|
178
|
0
|
|
|
|
|
|
sv_catpvn(attrstr, key, keylen); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
/* Add '="value"' part if present*/ |
|
182
|
9037
|
100
|
|
|
|
|
if (SvOK(val)) { |
|
183
|
7234
|
|
|
|
|
|
sv_catpvn(attrstr, "=\"", 2); |
|
184
|
7234
|
|
|
|
|
|
GF_generate_attribute_value(attrstr, val); |
|
185
|
7234
|
|
|
|
|
|
sv_catpvn(attrstr, "\"", 1); |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
/* warn("real len: %d, %s", SvCUR(attrstr), SvPV_nolen(attrstr)); */ |
|
190
|
|
|
|
|
|
|
|
|
191
|
2041
|
|
|
|
|
|
return attrstr; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
1015
|
|
|
|
|
|
SV * GF_generate_tag(SV * tag, HV * attrhv, SV * val, int b_escapeval, int b_addnewline, int b_closetag) { |
|
195
|
|
|
|
|
|
|
char * tagsp, * valsp; |
|
196
|
|
|
|
|
|
|
STRLEN taglen, vallen, estimatedlen; |
|
197
|
1015
|
|
|
|
|
|
SV * tagstr, * attrstr = 0; |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
/* Force tag to string when getting length */ |
|
200
|
1015
|
|
|
|
|
|
tagsp = SvPV(tag, taglen); |
|
201
|
1015
|
100
|
|
|
|
|
estimatedlen = taglen + 3 + (b_addnewline ? 1 : 0); |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
/* Create attributes as string */ |
|
204
|
1015
|
100
|
|
|
|
|
if (attrhv) { |
|
205
|
1012
|
|
|
|
|
|
attrstr = GF_generate_attributes(attrhv); |
|
206
|
1012
|
|
|
|
|
|
estimatedlen += SvCUR(attrstr) + 1; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
|
|
209
|
1015
|
100
|
|
|
|
|
if (val) { |
|
210
|
|
|
|
|
|
|
/* If asked to escape, escape the val */ |
|
211
|
1007
|
100
|
|
|
|
|
if (b_escapeval) |
|
212
|
1003
|
|
|
|
|
|
val = GF_escape_html(val, 0, 0, 0, 0); |
|
213
|
|
|
|
|
|
|
/* Force value to string when getting length */ |
|
214
|
1007
|
|
|
|
|
|
valsp = SvPV(val, vallen); |
|
215
|
1007
|
|
|
|
|
|
estimatedlen += vallen + taglen + 3; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
/* If asked to close the tag, add ' /' */ |
|
219
|
1015
|
100
|
|
|
|
|
if (b_closetag) |
|
220
|
4
|
|
|
|
|
|
estimatedlen += 2; |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
/* Create new string to put final result in */ |
|
223
|
1015
|
|
|
|
|
|
tagstr = newSV(estimatedlen); |
|
224
|
1015
|
|
|
|
|
|
SvPOK_on(tagstr); |
|
225
|
|
|
|
|
|
|
|
|
226
|
1015
|
|
|
|
|
|
sv_catpvn(tagstr, "<", 1); |
|
227
|
1015
|
|
|
|
|
|
sv_catsv(tagstr, tag); |
|
228
|
1015
|
100
|
|
|
|
|
if (attrstr) { |
|
229
|
1012
|
100
|
|
|
|
|
if (SvCUR(attrstr)) { |
|
230
|
907
|
|
|
|
|
|
sv_catpvn(tagstr, " ", 1); |
|
231
|
907
|
|
|
|
|
|
sv_catsv(tagstr, attrstr); |
|
232
|
|
|
|
|
|
|
} |
|
233
|
1012
|
|
|
|
|
|
SvREFCNT_dec(attrstr); |
|
234
|
|
|
|
|
|
|
} |
|
235
|
1015
|
100
|
|
|
|
|
if (b_closetag) |
|
236
|
4
|
|
|
|
|
|
sv_catpvn(tagstr, " />", 3); |
|
237
|
|
|
|
|
|
|
else |
|
238
|
1011
|
|
|
|
|
|
sv_catpvn(tagstr, ">", 1); |
|
239
|
|
|
|
|
|
|
|
|
240
|
1015
|
100
|
|
|
|
|
if (val) { |
|
241
|
1007
|
|
|
|
|
|
sv_catsv(tagstr, val); |
|
242
|
1007
|
100
|
|
|
|
|
if (b_escapeval) |
|
243
|
1003
|
|
|
|
|
|
SvREFCNT_dec(val); |
|
244
|
1007
|
|
|
|
|
|
sv_catpvn(tagstr, "", 2); |
|
245
|
1007
|
|
|
|
|
|
sv_catsv(tagstr, tag); |
|
246
|
1007
|
|
|
|
|
|
sv_catpvn(tagstr, ">", 1); |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
1015
|
100
|
|
|
|
|
if (b_addnewline) |
|
250
|
3
|
|
|
|
|
|
sv_catpvn(tagstr, "\n", 1); |
|
251
|
|
|
|
|
|
|
|
|
252
|
1015
|
|
|
|
|
|
return tagstr; |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
static char * hexlookup = "0123456789ABCDEF"; |
|
256
|
|
|
|
|
|
|
|
|
257
|
1282
|
|
|
|
|
|
SV * GF_escape_uri(SV * str, SV * escchars, int b_inplace) { |
|
258
|
|
|
|
|
|
|
int i; |
|
259
|
|
|
|
|
|
|
STRLEN origlen, esclen, extrachars; |
|
260
|
|
|
|
|
|
|
char * sp, *newsp, *escsp; |
|
261
|
|
|
|
|
|
|
unsigned char c; |
|
262
|
|
|
|
|
|
|
SV * newstr; |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
/* Get string pointer and length (in bytes) */ |
|
265
|
1282
|
100
|
|
|
|
|
if (b_inplace) { |
|
266
|
4
|
|
|
|
|
|
sp = SvPV_force(str, origlen); |
|
267
|
|
|
|
|
|
|
} else { |
|
268
|
1278
|
|
|
|
|
|
sp = SvPV(str, origlen); |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
1282
|
|
|
|
|
|
escsp = SvPV(escchars, esclen); |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
/* Calculate extra space required */ |
|
274
|
1282
|
|
|
|
|
|
extrachars = 0; |
|
275
|
16892
|
100
|
|
|
|
|
for (i = 0; i < origlen; i++) { |
|
276
|
15610
|
|
|
|
|
|
c = (unsigned char)sp[i]; |
|
277
|
|
|
|
|
|
|
/* Always escape control on 8-bit chars or chars in our escape set */ |
|
278
|
15610
|
100
|
|
|
|
|
if (c <= 0x20 || c >= 0x80 || memchr(escsp, c, esclen)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
279
|
4703
|
|
|
|
|
|
extrachars += 2; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
/* Create new SV, or grow existing SV */ |
|
284
|
1282
|
100
|
|
|
|
|
if (b_inplace) { |
|
285
|
4
|
|
|
|
|
|
newstr = str; |
|
286
|
|
|
|
|
|
|
/* Always turn of utf8-ness in escaped string */ |
|
287
|
4
|
|
|
|
|
|
SvUTF8_off(newstr); |
|
288
|
4
|
50
|
|
|
|
|
SvGROW(newstr, origlen + extrachars + 1); |
|
|
|
50
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
} else { |
|
290
|
1278
|
|
|
|
|
|
newstr = newSV(origlen + extrachars + 1); |
|
291
|
1278
|
|
|
|
|
|
SvPOK_on(newstr); |
|
292
|
|
|
|
|
|
|
} |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
/* Set the length of the string */ |
|
295
|
1282
|
|
|
|
|
|
SvCUR_set(newstr, origlen + extrachars); |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
/* Now do actual replacement (need to work |
|
298
|
|
|
|
|
|
|
backward for inplace change to work */ |
|
299
|
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
/* Original string might have moved due to grow */ |
|
301
|
1282
|
|
|
|
|
|
sp = SvPV_nolen(str); |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
/* Null terminate new string */ |
|
304
|
1282
|
|
|
|
|
|
newsp = SvPV_nolen(newstr) + origlen + extrachars; |
|
305
|
1282
|
|
|
|
|
|
*newsp = '\0'; |
|
306
|
|
|
|
|
|
|
|
|
307
|
16892
|
100
|
|
|
|
|
for (i = origlen-1; i >= 0; i--) { |
|
308
|
15610
|
|
|
|
|
|
c = (unsigned char)sp[i]; |
|
309
|
15610
|
100
|
|
|
|
|
if (c <= 0x20 || c >= 0x80 || memchr(escsp, c, esclen)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
310
|
4703
|
|
|
|
|
|
newsp -= 3; |
|
311
|
4703
|
|
|
|
|
|
newsp[0] = '%'; |
|
312
|
4703
|
|
|
|
|
|
newsp[1] = hexlookup[(c>>4) & 0x0f]; |
|
313
|
4703
|
|
|
|
|
|
newsp[2] = hexlookup[c & 0x0f]; |
|
314
|
|
|
|
|
|
|
} else |
|
315
|
10907
|
|
|
|
|
|
*--newsp = (char)c; |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
1282
|
50
|
|
|
|
|
if (newsp != SvPV_nolen(newstr)) { |
|
319
|
0
|
|
|
|
|
|
croak("Unexpected length mismatch"); |
|
320
|
|
|
|
|
|
|
return 0; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
1282
|
|
|
|
|
|
return newstr; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
|
|
326
|
116
|
|
|
|
|
|
int GF_is_known_entity(char * sp, int i, int origlen, int *maxlen) { |
|
327
|
116
|
|
|
|
|
|
int start = i; |
|
328
|
|
|
|
|
|
|
|
|
329
|
116
|
100
|
|
|
|
|
if (++i < origlen) { |
|
330
|
|
|
|
|
|
|
/* Check for unicode ref (eg Ӓ) */ |
|
331
|
112
|
100
|
|
|
|
|
if (sp[i] == '#') { |
|
332
|
22
|
|
|
|
|
|
int is_hex = 0; |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
/* Check for hex unicode ref (eg ኯ) */ |
|
335
|
22
|
50
|
|
|
|
|
if (i+1 < origlen && (sp[i+1] == 'x' || sp[i+1] == 'X')) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
336
|
6
|
|
|
|
|
|
is_hex = 1; |
|
337
|
6
|
|
|
|
|
|
i++; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
/* Not quite right, says "" and "" are ok */ |
|
341
|
95
|
100
|
|
|
|
|
while (++i < origlen) { |
|
342
|
90
|
100
|
|
|
|
|
if (sp[i] >= '0' && sp[i] <= '9') continue; |
|
|
|
100
|
|
|
|
|
|
|
343
|
27
|
100
|
|
|
|
|
if (is_hex && ((sp[i] >= 'a' && sp[i] <= 'f') || (sp[i] >= 'A' && sp[i] <= 'F'))) continue; |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
344
|
17
|
100
|
|
|
|
|
if (sp[i] == ';' || sp[i] == ' ') { |
|
|
|
100
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
/* Keep track of maximum entity length */ |
|
346
|
14
|
|
|
|
|
|
i++; |
|
347
|
14
|
100
|
|
|
|
|
if (maxlen && (i - start > *maxlen)) *maxlen = i-start; |
|
|
|
50
|
|
|
|
|
|
|
348
|
14
|
|
|
|
|
|
return 1; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
3
|
|
|
|
|
|
break; |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
/* Check for entity ref (eg ) */ |
|
354
|
90
|
100
|
|
|
|
|
} else if ((sp[i] >= 'a' && sp[i] <= 'z') || (sp[i] >= 'A' && sp[i] <= 'Z')) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
355
|
208
|
100
|
|
|
|
|
while (++i < origlen) { |
|
356
|
196
|
100
|
|
|
|
|
if ((sp[i] >= 'a' && sp[i] <= 'z') || (sp[i] >= 'A' && sp[i] <= 'Z')) continue; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
/* We should check to see if matched text string is known enity, |
|
358
|
|
|
|
|
|
|
but it's not that important */ |
|
359
|
58
|
100
|
|
|
|
|
if (sp[i] == ';' || sp[i] == ' ') { |
|
|
|
100
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
/* Keep track of maximum entity length */ |
|
361
|
48
|
|
|
|
|
|
i++; |
|
362
|
48
|
100
|
|
|
|
|
if (maxlen && (i - start > *maxlen)) *maxlen = i-start; |
|
|
|
100
|
|
|
|
|
|
|
363
|
48
|
|
|
|
|
|
return 1; |
|
364
|
|
|
|
|
|
|
} |
|
365
|
10
|
|
|
|
|
|
break; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
} |
|
369
|
54
|
|
|
|
|
|
return 0; |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
|
|
372
|
9055
|
|
|
|
|
|
int GF_estimate_attribute_value_len(SV * val) { |
|
373
|
|
|
|
|
|
|
STRLEN vallen; |
|
374
|
|
|
|
|
|
|
I32 valtype; |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
/* If reference, de-reference ... */ |
|
377
|
9055
|
100
|
|
|
|
|
if (SvROK(val)) { |
|
378
|
19
|
|
|
|
|
|
val = SvRV(val); |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
|
|
381
|
9055
|
|
|
|
|
|
valtype = SvTYPE(val); |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
/* Array case */ |
|
384
|
9055
|
100
|
|
|
|
|
if (valtype == SVt_PVAV) { |
|
385
|
11
|
|
|
|
|
|
int estimatedlen = 0; |
|
386
|
11
|
|
|
|
|
|
AV * aval = (AV *)val; |
|
387
|
11
|
|
|
|
|
|
I32 alen = av_len(aval), i; |
|
388
|
32
|
100
|
|
|
|
|
for (i = 0; i <= alen; i++) { |
|
389
|
|
|
|
|
|
|
SV **av_val; |
|
390
|
21
|
50
|
|
|
|
|
if ((av_val = av_fetch(aval, i, 0)) && SvOK(val = *av_val)) { |
|
|
|
100
|
|
|
|
|
|
|
391
|
18
|
|
|
|
|
|
estimatedlen += GF_estimate_attribute_value_len(val) + 1; |
|
392
|
|
|
|
|
|
|
} |
|
393
|
|
|
|
|
|
|
} |
|
394
|
11
|
|
|
|
|
|
return estimatedlen; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
/* Hash case */ |
|
398
|
9044
|
100
|
|
|
|
|
if (valtype == SVt_PVHV) { |
|
399
|
3
|
|
|
|
|
|
int estimatedlen = 0; |
|
400
|
3
|
|
|
|
|
|
HV * hval = (HV *)val; |
|
401
|
|
|
|
|
|
|
char * key; I32 keylen; |
|
402
|
3
|
|
|
|
|
|
hv_iterinit(hval); |
|
403
|
6
|
100
|
|
|
|
|
while ((val = hv_iternextsv(hval, &key, &keylen))) { |
|
404
|
3
|
|
|
|
|
|
estimatedlen += keylen + 1; |
|
405
|
|
|
|
|
|
|
} |
|
406
|
3
|
|
|
|
|
|
return estimatedlen; |
|
407
|
|
|
|
|
|
|
} |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
/* Ignore other non-scalar types */ |
|
410
|
9041
|
100
|
|
|
|
|
if (!SvOK(val)) return 0; |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
/* Most common case of a string */ |
|
413
|
7237
|
100
|
|
|
|
|
if (SvPOK(val)) return SvCUR(val); |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
/* Other SV case, turn it into a string */ |
|
416
|
2
|
50
|
|
|
|
|
if (SvOK(val)) return (SvPV(val, vallen), vallen); |
|
417
|
|
|
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
return 0; |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
7252
|
|
|
|
|
|
void GF_generate_attribute_value(SV * attrstr, SV * val) { |
|
422
|
|
|
|
|
|
|
I32 valtype; |
|
423
|
7252
|
|
|
|
|
|
int no_escape = 0; |
|
424
|
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
/* If reference, de-reference ... */ |
|
426
|
7252
|
100
|
|
|
|
|
if (SvROK(val)) { |
|
427
|
19
|
|
|
|
|
|
val = SvRV(val); |
|
428
|
19
|
|
|
|
|
|
no_escape = 1; |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
|
|
431
|
7252
|
|
|
|
|
|
valtype = SvTYPE(val); |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
/* Array? Iterate over array items space separated... */ |
|
434
|
7252
|
100
|
|
|
|
|
if (valtype == SVt_PVAV) { |
|
435
|
11
|
|
|
|
|
|
AV * aval = (AV *)val; |
|
436
|
11
|
|
|
|
|
|
I32 alen = av_len(aval), i; |
|
437
|
32
|
100
|
|
|
|
|
for (i = 0; i <= alen; i++) { |
|
438
|
|
|
|
|
|
|
SV **av_val; |
|
439
|
21
|
50
|
|
|
|
|
if ((av_val = av_fetch(aval, i, 0)) && SvOK(val = *av_val)) { |
|
|
|
100
|
|
|
|
|
|
|
440
|
18
|
|
|
|
|
|
GF_generate_attribute_value(attrstr, val); |
|
441
|
18
|
100
|
|
|
|
|
if (i != alen) sv_catpvn(attrstr, " ", 1); |
|
442
|
|
|
|
|
|
|
} |
|
443
|
|
|
|
|
|
|
} |
|
444
|
11
|
|
|
|
|
|
return; |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
/* Hash? Iterate over keys space separated... */ |
|
448
|
7241
|
100
|
|
|
|
|
if (valtype == SVt_PVHV) { |
|
449
|
3
|
|
|
|
|
|
HV * hval = (HV *)val; |
|
450
|
|
|
|
|
|
|
char * key; I32 keylen; |
|
451
|
3
|
|
|
|
|
|
I32 hlen = hv_iterinit(hval), i = 0; |
|
452
|
|
|
|
|
|
|
HE * hentry; |
|
453
|
6
|
100
|
|
|
|
|
while ((hentry = hv_iternext(hval))) { |
|
454
|
3
|
|
|
|
|
|
key = hv_iterkey(hentry, &keylen); |
|
455
|
3
|
|
|
|
|
|
sv_catpvn(attrstr, key, keylen); |
|
456
|
3
|
100
|
|
|
|
|
if (++i != hlen) sv_catpvn(attrstr, " ", 1); |
|
457
|
|
|
|
|
|
|
} |
|
458
|
3
|
|
|
|
|
|
return; |
|
459
|
|
|
|
|
|
|
} |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
/* Ignore other non-scalar types */ |
|
462
|
7238
|
100
|
|
|
|
|
if (!SvOK(val)) return; |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
/* Otherwise just append to attribute string */ |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
/* If value was reference, use that unescaped */ |
|
467
|
7237
|
100
|
|
|
|
|
if (no_escape) { |
|
468
|
4
|
|
|
|
|
|
sv_catsv(attrstr, val); |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
/* For the value part, escape special html chars, then dispose of result */ |
|
471
|
|
|
|
|
|
|
} else { |
|
472
|
7233
|
|
|
|
|
|
val = GF_escape_html(val, 0, 0, 0, 0); |
|
473
|
7233
|
|
|
|
|
|
sv_catsv(attrstr, val); |
|
474
|
7233
|
|
|
|
|
|
SvREFCNT_dec(val); |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
|
|
477
|
7237
|
|
|
|
|
|
return; |
|
478
|
|
|
|
|
|
|
} |
|
479
|
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
void GF_set_paranoia(int paranoia) { |
|
481
|
0
|
|
|
|
|
|
GF_paranoia = paranoia; |
|
482
|
0
|
|
|
|
|
|
return; |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
|