| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#include |
|
2
|
|
|
|
|
|
|
#include |
|
3
|
|
|
|
|
|
|
#include |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#include "houdini.h" |
|
6
|
|
|
|
|
|
|
#include "html_unescape.h" |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
static inline void |
|
9
|
2
|
|
|
|
|
|
gh_buf_put_utf8(gh_buf *ob, int c) |
|
10
|
|
|
|
|
|
|
{ |
|
11
|
|
|
|
|
|
|
unsigned char unichar[4]; |
|
12
|
|
|
|
|
|
|
|
|
13
|
2
|
50
|
|
|
|
|
if (c < 0x80) { |
|
14
|
2
|
|
|
|
|
|
gh_buf_putc(ob, c); |
|
15
|
|
|
|
|
|
|
} |
|
16
|
0
|
0
|
|
|
|
|
else if (c < 0x800) { |
|
17
|
0
|
|
|
|
|
|
unichar[0] = 192 + (c / 64); |
|
18
|
0
|
|
|
|
|
|
unichar[1] = 128 + (c % 64); |
|
19
|
0
|
|
|
|
|
|
gh_buf_put(ob, unichar, 2); |
|
20
|
|
|
|
|
|
|
} |
|
21
|
0
|
0
|
|
|
|
|
else if (c - 0xd800u < 0x800) { |
|
22
|
0
|
|
|
|
|
|
gh_buf_putc(ob, '?'); |
|
23
|
|
|
|
|
|
|
} |
|
24
|
0
|
0
|
|
|
|
|
else if (c < 0x10000) { |
|
25
|
0
|
|
|
|
|
|
unichar[0] = 224 + (c / 4096); |
|
26
|
0
|
|
|
|
|
|
unichar[1] = 128 + (c / 64) % 64; |
|
27
|
0
|
|
|
|
|
|
unichar[2] = 128 + (c % 64); |
|
28
|
0
|
|
|
|
|
|
gh_buf_put(ob, unichar, 3); |
|
29
|
|
|
|
|
|
|
} |
|
30
|
0
|
0
|
|
|
|
|
else if (c < 0x110000) { |
|
31
|
0
|
|
|
|
|
|
unichar[0] = 240 + (c / 262144); |
|
32
|
0
|
|
|
|
|
|
unichar[1] = 128 + (c / 4096) % 64; |
|
33
|
0
|
|
|
|
|
|
unichar[2] = 128 + (c / 64) % 64; |
|
34
|
0
|
|
|
|
|
|
unichar[3] = 128 + (c % 64); |
|
35
|
0
|
|
|
|
|
|
gh_buf_put(ob, unichar, 4); |
|
36
|
|
|
|
|
|
|
} |
|
37
|
|
|
|
|
|
|
else { |
|
38
|
0
|
|
|
|
|
|
gh_buf_putc(ob, '?'); |
|
39
|
|
|
|
|
|
|
} |
|
40
|
2
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
static size_t |
|
43
|
16
|
|
|
|
|
|
unescape_ent(gh_buf *ob, const uint8_t *src, size_t size) |
|
44
|
|
|
|
|
|
|
{ |
|
45
|
|
|
|
|
|
|
size_t i = 0; |
|
46
|
|
|
|
|
|
|
|
|
47
|
16
|
100
|
|
|
|
|
if (size > 3 && src[0] == '#') { |
|
|
|
100
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
int codepoint = 0; |
|
49
|
|
|
|
|
|
|
|
|
50
|
2
|
50
|
|
|
|
|
if (_isdigit(src[1])) { |
|
51
|
6
|
50
|
|
|
|
|
for (i = 1; i < size && _isdigit(src[i]); ++i) |
|
|
|
100
|
|
|
|
|
|
|
52
|
4
|
|
|
|
|
|
codepoint = (codepoint * 10) + (src[i] - '0'); |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
0
|
0
|
|
|
|
|
else if (src[1] == 'x' || src[1] == 'X') { |
|
56
|
0
|
0
|
|
|
|
|
for (i = 2; i < size && _isxdigit(src[i]); ++i) |
|
|
|
0
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
2
|
50
|
|
|
|
|
if (i < size && src[i] == ';' && codepoint) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
61
|
2
|
|
|
|
|
|
gh_buf_put_utf8(ob, codepoint); |
|
62
|
2
|
|
|
|
|
|
return i + 1; |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
else { |
|
67
|
14
|
100
|
|
|
|
|
if (size > MAX_WORD_LENGTH) |
|
68
|
|
|
|
|
|
|
size = MAX_WORD_LENGTH; |
|
69
|
|
|
|
|
|
|
|
|
70
|
22
|
50
|
|
|
|
|
for (i = MIN_WORD_LENGTH; i < size; ++i) { |
|
71
|
22
|
50
|
|
|
|
|
if (src[i] == ' ') |
|
72
|
|
|
|
|
|
|
break; |
|
73
|
|
|
|
|
|
|
|
|
74
|
22
|
100
|
|
|
|
|
if (src[i] == ';') { |
|
75
|
14
|
|
|
|
|
|
const struct html_ent *entity = find_entity((char *)src, i); |
|
76
|
|
|
|
|
|
|
|
|
77
|
14
|
50
|
|
|
|
|
if (entity != NULL) { |
|
78
|
14
|
|
|
|
|
|
gh_buf_put(ob, entity->utf8, entity->utf8_len); |
|
79
|
14
|
|
|
|
|
|
return i + 1; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
break; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
gh_buf_putc(ob, '&'); |
|
88
|
0
|
|
|
|
|
|
return 0; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
int |
|
92
|
6
|
|
|
|
|
|
houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size) |
|
93
|
|
|
|
|
|
|
{ |
|
94
|
|
|
|
|
|
|
size_t i = 0, org; |
|
95
|
|
|
|
|
|
|
|
|
96
|
22
|
100
|
|
|
|
|
while (i < size) { |
|
97
|
|
|
|
|
|
|
org = i; |
|
98
|
78
|
100
|
|
|
|
|
while (i < size && src[i] != '&') |
|
|
|
100
|
|
|
|
|
|
|
99
|
59
|
|
|
|
|
|
i++; |
|
100
|
|
|
|
|
|
|
|
|
101
|
19
|
100
|
|
|
|
|
if (likely(i > org)) { |
|
102
|
12
|
100
|
|
|
|
|
if (unlikely(org == 0)) { |
|
103
|
3
|
50
|
|
|
|
|
if (i >= size) |
|
104
|
|
|
|
|
|
|
return 0; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
gh_buf_grow(ob, HOUDINI_UNESCAPED_SIZE(size)); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
9
|
|
|
|
|
|
gh_buf_put(ob, src + org, i - org); |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
/* escaping */ |
|
113
|
16
|
50
|
|
|
|
|
if (i >= size) |
|
114
|
|
|
|
|
|
|
break; |
|
115
|
|
|
|
|
|
|
|
|
116
|
16
|
|
|
|
|
|
i++; |
|
117
|
16
|
|
|
|
|
|
i += unescape_ent(ob, src + i, size - i); |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
return 1; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|