line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
2
|
|
|
|
|
|
|
#include "perl.h" |
3
|
|
|
|
|
|
|
#include "XSUB.h" |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#define NEED_newSVpvn_flags |
6
|
|
|
|
|
|
|
#define NEED_sv_2pv_flags |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
#include "ppport.h" |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
/* Characters to escape: |
11
|
|
|
|
|
|
|
* 0x22 " 0x26 & 0x27 ' 0x3c < 0x3e > 0x60 ` 0x7b { 0x7d } |
12
|
|
|
|
|
|
|
* |
13
|
|
|
|
|
|
|
* Note that we don't care whether the input uses Perl's single-byte |
14
|
|
|
|
|
|
|
* (Latin-1) or multi-byte (UTF-8) encoding, because every byte >= 0x80 is |
15
|
|
|
|
|
|
|
* safe regardless. |
16
|
|
|
|
|
|
|
*/ |
17
|
|
|
|
|
|
|
static const char unsafe[256] = { |
18
|
|
|
|
|
|
|
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
19
|
|
|
|
|
|
|
/* 0x00 .. 0x0f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
20
|
|
|
|
|
|
|
/* 0x10 .. 0x1f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
21
|
|
|
|
|
|
|
/* 0x20 .. 0x2f */ 0,0,1,0, 0,0,1,1, 0,0,0,0, 0,0,0,0, |
22
|
|
|
|
|
|
|
/* 0x30 .. 0x3f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,0,1,0, |
23
|
|
|
|
|
|
|
/* 0x40 .. 0x4f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
24
|
|
|
|
|
|
|
/* 0x50 .. 0x5f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
25
|
|
|
|
|
|
|
/* 0x60 .. 0x6f */ 1,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
26
|
|
|
|
|
|
|
/* 0x70 .. 0x7f */ 0,0,0,0, 0,0,0,0, 0,0,0,1, 0,1,0,0, |
27
|
|
|
|
|
|
|
/* 0x80 .. 0x8f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
28
|
|
|
|
|
|
|
/* 0x90 .. 0x9f */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
29
|
|
|
|
|
|
|
/* 0xa0 .. 0xaf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
30
|
|
|
|
|
|
|
/* 0xb0 .. 0xbf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
31
|
|
|
|
|
|
|
/* 0xc0 .. 0xcf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
32
|
|
|
|
|
|
|
/* 0xd0 .. 0xdf */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
33
|
|
|
|
|
|
|
/* 0xe0 .. 0xef */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
34
|
|
|
|
|
|
|
/* 0xf0 .. 0xff */ 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, |
35
|
|
|
|
|
|
|
}; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
/* This is essentially a version of standard strcspn() that (a) handles |
38
|
|
|
|
|
|
|
* arbitrary memory buffers, possibly containing \0 bytes, and (b) knows at |
39
|
|
|
|
|
|
|
* compile-time which characters to detect, rather than having to build an |
40
|
|
|
|
|
|
|
* internal data structure representing them on every call. */ |
41
|
|
|
|
|
|
|
static size_t safe_character_span(const char *start, const char *end) { |
42
|
|
|
|
|
|
|
const char *cur = start; |
43
|
11
|
50
|
|
|
|
|
while(cur != end) { |
44
|
11
|
|
|
|
|
|
unsigned char c = (unsigned char) *cur; |
45
|
11
|
100
|
|
|
|
|
if(unsafe[c]) { |
46
|
|
|
|
|
|
|
break; |
47
|
|
|
|
|
|
|
} |
48
|
4
|
|
|
|
|
|
cur++; |
49
|
|
|
|
|
|
|
} |
50
|
7
|
|
|
|
|
|
return cur - start; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
static void /* doesn't care about raw-ness */ |
54
|
5
|
|
|
|
|
|
tx_sv_cat_with_escape_html_force(pTHX_ SV* const dest, SV* const src) { |
55
|
|
|
|
|
|
|
STRLEN len; |
56
|
5
|
50
|
|
|
|
|
const char* cur = SvPV_const(src, len); |
57
|
5
|
|
|
|
|
|
const char* const end = cur + len; |
58
|
5
|
|
|
|
|
|
STRLEN const dest_cur = SvCUR(dest); |
59
|
|
|
|
|
|
|
char* d; |
60
|
|
|
|
|
|
|
|
61
|
5
|
50
|
|
|
|
|
(void)SvGROW(dest, dest_cur + ( len * ( sizeof(""") - 1) ) + 1); |
|
|
100
|
|
|
|
|
|
62
|
5
|
50
|
|
|
|
|
if(!SvUTF8(dest) && SvUTF8(src)) { |
|
|
50
|
|
|
|
|
|
63
|
0
|
|
|
|
|
|
sv_utf8_upgrade(dest); |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
5
|
|
|
|
|
|
d = SvPVX(dest) + dest_cur; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
#define CopyToken(token, to) STMT_START { \ |
69
|
|
|
|
|
|
|
Copy(token "", to, sizeof(token)-1, char); \ |
70
|
|
|
|
|
|
|
to += sizeof(token)-1; \ |
71
|
|
|
|
|
|
|
} STMT_END |
72
|
|
|
|
|
|
|
|
73
|
17
|
100
|
|
|
|
|
while(cur != end) { |
74
|
|
|
|
|
|
|
size_t span = safe_character_span(cur, end); |
75
|
|
|
|
|
|
|
Copy(cur, d, span, char); |
76
|
|
|
|
|
|
|
cur += span; |
77
|
7
|
|
|
|
|
|
d += span; |
78
|
7
|
50
|
|
|
|
|
if(cur != end) { |
79
|
7
|
|
|
|
|
|
const char c = *(cur++); |
80
|
7
|
50
|
|
|
|
|
if(c == '&') { |
81
|
0
|
|
|
|
|
|
CopyToken("&", d); |
82
|
|
|
|
|
|
|
} |
83
|
7
|
100
|
|
|
|
|
else if(c == '<') { |
84
|
1
|
|
|
|
|
|
CopyToken("<", d); |
85
|
|
|
|
|
|
|
} |
86
|
6
|
100
|
|
|
|
|
else if(c == '>') { |
87
|
2
|
|
|
|
|
|
CopyToken(">", d); |
88
|
|
|
|
|
|
|
} |
89
|
4
|
50
|
|
|
|
|
else if(c == '"') { |
90
|
0
|
|
|
|
|
|
CopyToken(""", d); |
91
|
|
|
|
|
|
|
} |
92
|
4
|
100
|
|
|
|
|
else if(c == '`') { |
93
|
1
|
|
|
|
|
|
CopyToken("`", d); |
94
|
|
|
|
|
|
|
} |
95
|
3
|
100
|
|
|
|
|
else if(c == '{') { |
96
|
1
|
|
|
|
|
|
CopyToken("{", d); |
97
|
|
|
|
|
|
|
} |
98
|
2
|
100
|
|
|
|
|
else if(c == '}') { |
99
|
1
|
|
|
|
|
|
CopyToken("}", d); |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
else { /* c == '\'' */ |
102
|
|
|
|
|
|
|
/* XXX: Internet Explorer (at least version 8) doesn't support ' in title */ |
103
|
|
|
|
|
|
|
/* CopyToken("'", d); */ |
104
|
12
|
|
|
|
|
|
CopyToken("'", d); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
#undef CopyToken |
110
|
|
|
|
|
|
|
|
111
|
5
|
|
|
|
|
|
SvCUR_set(dest, d - SvPVX(dest)); |
112
|
5
|
|
|
|
|
|
*SvEND(dest) = '\0'; |
113
|
5
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
static SV* |
116
|
5
|
|
|
|
|
|
tx_escape_html(pTHX_ SV* const str) { |
117
|
5
|
50
|
|
|
|
|
SvGETMAGIC(str); |
118
|
5
|
50
|
|
|
|
|
if(!( !SvOK(str) )) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
119
|
5
|
|
|
|
|
|
SV* const dest = newSVpvs_flags("", SVs_TEMP); |
120
|
5
|
|
|
|
|
|
tx_sv_cat_with_escape_html_force(aTHX_ dest, str); |
121
|
5
|
|
|
|
|
|
return dest; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
else { |
124
|
|
|
|
|
|
|
return str; |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
MODULE = HTML::Escape PACKAGE = HTML::Escape |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
void |
133
|
|
|
|
|
|
|
escape_html(SV* str) |
134
|
|
|
|
|
|
|
CODE: |
135
|
|
|
|
|
|
|
{ |
136
|
5
|
|
|
|
|
|
ST(0) = tx_escape_html(aTHX_ str); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|