line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
typedef struct { |
2
|
|
|
|
|
|
|
TidyDoc tdoc; |
3
|
|
|
|
|
|
|
int n_mallocs; |
4
|
|
|
|
|
|
|
} |
5
|
|
|
|
|
|
|
html_valid_t; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
typedef enum html_valid_status { |
8
|
|
|
|
|
|
|
html_valid_ok, |
9
|
|
|
|
|
|
|
/* Malloc or calloc failed. */ |
10
|
|
|
|
|
|
|
html_valid_memory_failure, |
11
|
|
|
|
|
|
|
/* An upstream error from the library. */ |
12
|
|
|
|
|
|
|
html_valid_tidy_error, |
13
|
|
|
|
|
|
|
html_valid_inconsistency, |
14
|
|
|
|
|
|
|
html_valid_unknown_option, |
15
|
|
|
|
|
|
|
html_valid_bad_option_type, |
16
|
|
|
|
|
|
|
html_valid_undefined_option, |
17
|
|
|
|
|
|
|
html_valid_non_numerical_option, |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
html_valid_status_t; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
static html_valid_status_t |
22
|
4
|
|
|
|
|
|
html_valid_create (html_valid_t * htv) |
23
|
|
|
|
|
|
|
{ |
24
|
4
|
|
|
|
|
|
htv->tdoc = tidyCreate (); |
25
|
4
|
|
|
|
|
|
htv->n_mallocs++; |
26
|
4
|
|
|
|
|
|
return html_valid_ok; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
#define CALL(x) { \ |
30
|
|
|
|
|
|
|
html_valid_status_t status = \ |
31
|
|
|
|
|
|
|
html_valid_ ## x; \ |
32
|
|
|
|
|
|
|
if (status != html_valid_ok) { \ |
33
|
|
|
|
|
|
|
return status; \ |
34
|
|
|
|
|
|
|
} \ |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#define CALL_TIDY(x) { \ |
38
|
|
|
|
|
|
|
int rc; \ |
39
|
|
|
|
|
|
|
rc = x; \ |
40
|
|
|
|
|
|
|
if (rc < 0) { \ |
41
|
|
|
|
|
|
|
warn ("Error %d from tidy library", rc); \ |
42
|
|
|
|
|
|
|
return html_valid_tidy_error; \ |
43
|
|
|
|
|
|
|
} \ |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#define CHECK_INIT(htv) { \ |
48
|
|
|
|
|
|
|
if (! htv->tdoc) { \ |
49
|
|
|
|
|
|
|
warn ("Uninitialized TidyDoc"); \ |
50
|
|
|
|
|
|
|
return html_valid_inconsistency; \ |
51
|
|
|
|
|
|
|
} \ |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
static html_valid_status_t |
55
|
3
|
|
|
|
|
|
html_valid_run (html_valid_t * htv, SV * html, |
56
|
|
|
|
|
|
|
SV ** output_ptr, SV ** errors_ptr) |
57
|
|
|
|
|
|
|
{ |
58
|
|
|
|
|
|
|
const char * html_string; |
59
|
|
|
|
|
|
|
STRLEN html_length; |
60
|
|
|
|
|
|
|
SV * output; |
61
|
|
|
|
|
|
|
SV * errors; |
62
|
|
|
|
|
|
|
|
63
|
3
|
|
|
|
|
|
TidyBuffer tidy_output = {0}; |
64
|
3
|
|
|
|
|
|
TidyBuffer tidy_errbuf = {0}; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
/* First set these up sanely in case the stuff hits the fan. */ |
67
|
|
|
|
|
|
|
|
68
|
3
|
|
|
|
|
|
* output_ptr = & PL_sv_undef; |
69
|
3
|
|
|
|
|
|
* errors_ptr = & PL_sv_undef; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
/* Work around bug where allocator sometimes does not get set. */ |
72
|
|
|
|
|
|
|
|
73
|
3
|
|
|
|
|
|
CopyAllocator (htv->tdoc, & tidy_output); |
74
|
3
|
|
|
|
|
|
CopyAllocator (htv->tdoc, & tidy_errbuf); |
75
|
|
|
|
|
|
|
|
76
|
3
|
50
|
|
|
|
|
html_string = SvPV (html, html_length); |
77
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidySetErrorBuffer (htv->tdoc, & tidy_errbuf)); |
78
|
3
|
|
|
|
|
|
htv->n_mallocs++; |
79
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidyParseString (htv->tdoc, html_string)); |
80
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidyCleanAndRepair (htv->tdoc)); |
81
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidyRunDiagnostics (htv->tdoc)); |
82
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidySaveBuffer (htv->tdoc, & tidy_output)); |
83
|
3
|
|
|
|
|
|
htv->n_mallocs++; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
/* Copy the contents of the buffers into the Perl scalars. */ |
86
|
|
|
|
|
|
|
|
87
|
3
|
|
|
|
|
|
output = newSVpv ((char *) tidy_output.bp, tidy_output.size); |
88
|
3
|
|
|
|
|
|
errors = newSVpv ((char *) tidy_errbuf.bp, tidy_errbuf.size); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
/* HTML Tidy randomly segfaults here due to "allocator" not being |
91
|
|
|
|
|
|
|
set in some cases, hence the above CopyAllocator fix. */ |
92
|
|
|
|
|
|
|
|
93
|
3
|
|
|
|
|
|
tidyBufFree (& tidy_output); |
94
|
3
|
|
|
|
|
|
htv->n_mallocs--; |
95
|
3
|
|
|
|
|
|
tidyBufFree (& tidy_errbuf); |
96
|
3
|
|
|
|
|
|
htv->n_mallocs--; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
/* These are not our mallocs, they are Perl's mallocs, so we don't |
99
|
|
|
|
|
|
|
increase htv->n_mallocs for these. After we return them, we no |
100
|
|
|
|
|
|
|
longer take care of these. */ |
101
|
|
|
|
|
|
|
|
102
|
3
|
|
|
|
|
|
* output_ptr = output; |
103
|
3
|
|
|
|
|
|
* errors_ptr = errors; |
104
|
3
|
|
|
|
|
|
return html_valid_ok; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
static html_valid_status_t |
108
|
1
|
|
|
|
|
|
html_valid_set_string_option (html_valid_t * htv, const char * coption, |
109
|
|
|
|
|
|
|
TidyOptionId ti, SV * value) |
110
|
|
|
|
|
|
|
{ |
111
|
|
|
|
|
|
|
const char * cvalue; |
112
|
|
|
|
|
|
|
STRLEN cvalue_length; |
113
|
1
|
50
|
|
|
|
|
if (! SvOK (value)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
114
|
0
|
|
|
|
|
|
warn ("cannot set option '%s' to undefined value", |
115
|
|
|
|
|
|
|
coption); |
116
|
0
|
|
|
|
|
|
return html_valid_undefined_option; |
117
|
|
|
|
|
|
|
} |
118
|
1
|
50
|
|
|
|
|
cvalue = SvPV (value, cvalue_length); |
119
|
1
|
50
|
|
|
|
|
if (! tidyOptSetValue (htv->tdoc, ti, cvalue)) { |
120
|
0
|
|
|
|
|
|
warn ("Setting option %d to %s failed", ti, cvalue); |
121
|
0
|
|
|
|
|
|
return html_valid_tidy_error; |
122
|
|
|
|
|
|
|
} |
123
|
1
|
|
|
|
|
|
return html_valid_ok; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
static html_valid_status_t |
127
|
2
|
|
|
|
|
|
html_valid_set_number_option (html_valid_t * htv, const char * coption, |
128
|
|
|
|
|
|
|
TidyOptionId ti, SV * value) |
129
|
|
|
|
|
|
|
{ |
130
|
|
|
|
|
|
|
int cvalue; |
131
|
2
|
50
|
|
|
|
|
if (! SvOK (value)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
132
|
0
|
|
|
|
|
|
warn ("cannot set option '%s' to undefined value", |
133
|
|
|
|
|
|
|
coption); |
134
|
0
|
|
|
|
|
|
return html_valid_undefined_option; |
135
|
|
|
|
|
|
|
} |
136
|
2
|
50
|
|
|
|
|
if (! looks_like_number (value)) { |
137
|
0
|
0
|
|
|
|
|
warn ("option %s expects a numerical value, but you supplied %s", |
138
|
0
|
|
|
|
|
|
coption, SvPV_nolen (value)); |
139
|
0
|
|
|
|
|
|
return html_valid_non_numerical_option; |
140
|
|
|
|
|
|
|
} |
141
|
2
|
50
|
|
|
|
|
cvalue = SvIV (value); |
142
|
2
|
50
|
|
|
|
|
if (! tidyOptSetInt (htv->tdoc, ti, cvalue)) { |
143
|
0
|
|
|
|
|
|
warn ("Setting option %d to %d failed", ti, cvalue); |
144
|
0
|
|
|
|
|
|
return html_valid_tidy_error; |
145
|
|
|
|
|
|
|
} |
146
|
2
|
|
|
|
|
|
return html_valid_ok; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
static html_valid_status_t |
150
|
6
|
|
|
|
|
|
html_valid_set_option (html_valid_t * htv, SV * option, SV * value) |
151
|
|
|
|
|
|
|
{ |
152
|
|
|
|
|
|
|
TidyOption to; |
153
|
|
|
|
|
|
|
TidyOptionType tot; |
154
|
|
|
|
|
|
|
TidyOptionId ti; |
155
|
|
|
|
|
|
|
const char * coption; |
156
|
|
|
|
|
|
|
STRLEN coption_length; |
157
|
6
|
50
|
|
|
|
|
CHECK_INIT (htv); |
158
|
6
|
50
|
|
|
|
|
coption = SvPV (option, coption_length); |
159
|
6
|
|
|
|
|
|
to = tidyGetOptionByName(htv->tdoc, coption); |
160
|
6
|
50
|
|
|
|
|
if (to == 0) { |
161
|
0
|
|
|
|
|
|
warn ("unknown option %s", coption); |
162
|
0
|
|
|
|
|
|
return html_valid_unknown_option; |
163
|
|
|
|
|
|
|
} |
164
|
6
|
|
|
|
|
|
ti = tidyOptGetId (to); |
165
|
6
|
|
|
|
|
|
tot = tidyOptGetType (to); |
166
|
6
|
|
|
|
|
|
switch (tot) { |
167
|
|
|
|
|
|
|
case TidyString: |
168
|
1
|
50
|
|
|
|
|
CALL (set_string_option (htv, coption, ti, value)); |
169
|
1
|
|
|
|
|
|
break; |
170
|
|
|
|
|
|
|
case TidyInteger: |
171
|
2
|
50
|
|
|
|
|
CALL (set_number_option (htv, coption, ti, value)); |
172
|
2
|
|
|
|
|
|
break; |
173
|
|
|
|
|
|
|
case TidyBoolean: |
174
|
3
|
50
|
|
|
|
|
tidyOptSetBool (htv->tdoc, ti, SvTRUE (value)); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
175
|
3
|
|
|
|
|
|
break; |
176
|
|
|
|
|
|
|
default: |
177
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: bad option type %d from tidy library.\n", |
178
|
|
|
|
|
|
|
__FILE__, __LINE__, tot); |
179
|
0
|
|
|
|
|
|
return html_valid_bad_option_type; |
180
|
|
|
|
|
|
|
} |
181
|
6
|
|
|
|
|
|
return html_valid_ok; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
static html_valid_status_t |
185
|
4
|
|
|
|
|
|
html_valid_destroy (html_valid_t * htv) |
186
|
|
|
|
|
|
|
{ |
187
|
4
|
|
|
|
|
|
tidyRelease (htv->tdoc); |
188
|
4
|
|
|
|
|
|
htv->tdoc = 0; |
189
|
4
|
|
|
|
|
|
htv->n_mallocs--; |
190
|
4
|
|
|
|
|
|
return html_valid_ok; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
static html_valid_status_t |
194
|
2
|
|
|
|
|
|
html_valid_tag_information (HV * hv) |
195
|
2
|
|
|
|
|
|
{ |
196
|
|
|
|
|
|
|
int i; |
197
|
|
|
|
|
|
|
// n_html_tags is defined in html-tidy5.h as part of the "extra" |
198
|
|
|
|
|
|
|
// material. |
199
|
2
|
|
|
|
|
|
html_valid_tag_t tags[n_html_tags]; |
200
|
2
|
|
|
|
|
|
TagInformation (tags); |
201
|
308
|
100
|
|
|
|
|
for (i = 0; i < n_html_tags; i++) { |
202
|
|
|
|
|
|
|
int name_len; |
203
|
|
|
|
|
|
|
AV * constants; |
204
|
|
|
|
|
|
|
SV * constants_ref; |
205
|
306
|
|
|
|
|
|
constants = newAV (); |
206
|
|
|
|
|
|
|
// Store the ID for reverse lookup of attributes. |
207
|
306
|
|
|
|
|
|
av_push (constants, newSVuv (i)); |
208
|
306
|
|
|
|
|
|
av_push (constants, newSVuv (tags[i].versions)); |
209
|
306
|
|
|
|
|
|
av_push (constants, newSVuv (tags[i].model)); |
210
|
|
|
|
|
|
|
|
211
|
306
|
|
|
|
|
|
constants_ref = newRV_inc ((SV *) constants); |
212
|
306
|
|
|
|
|
|
name_len = strlen (tags[i].name); |
213
|
|
|
|
|
|
|
/* |
214
|
|
|
|
|
|
|
fprintf (stderr, "Storing %s (%d) into hash.\n", |
215
|
|
|
|
|
|
|
tags[i].name, name_len); |
216
|
|
|
|
|
|
|
*/ |
217
|
306
|
|
|
|
|
|
(void) hv_store (hv, tags[i].name, name_len, constants_ref, 0 /* no hash value */); |
218
|
|
|
|
|
|
|
} |
219
|
2
|
|
|
|
|
|
return html_valid_ok; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
html_valid_status_t |
223
|
4
|
|
|
|
|
|
html_valid_tag_attr (AV * av, unsigned int tag_id, unsigned int version) |
224
|
4
|
|
|
|
|
|
{ |
225
|
4
|
|
|
|
|
|
const char * yes_no[n_attributes]; |
226
|
|
|
|
|
|
|
int i; |
227
|
|
|
|
|
|
|
int j; |
228
|
|
|
|
|
|
|
int n_attr; |
229
|
4
|
|
|
|
|
|
TagAttributes (tag_id, version, yes_no, & n_attr); |
230
|
4
|
50
|
|
|
|
|
if (av_len (av) != -1) { |
231
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: unexpected non-empty array with %d elements", |
232
|
0
|
|
|
|
|
|
__FILE__, __LINE__, (int) (av_len (av) + 1)); |
233
|
0
|
|
|
|
|
|
return html_valid_ok; |
234
|
|
|
|
|
|
|
} |
235
|
4
|
50
|
|
|
|
|
if (n_attr == 0) { |
236
|
0
|
|
|
|
|
|
return html_valid_ok; |
237
|
|
|
|
|
|
|
} |
238
|
4
|
|
|
|
|
|
j = 0; |
239
|
1388
|
100
|
|
|
|
|
for (i = 0; i < n_attributes; i++) { |
240
|
1384
|
100
|
|
|
|
|
if (yes_no[i]) { |
241
|
|
|
|
|
|
|
SV * attribute; |
242
|
401
|
|
|
|
|
|
attribute = newSVpv (yes_no[i], strlen (yes_no[i])); |
243
|
401
|
|
|
|
|
|
av_push (av, attribute); |
244
|
|
|
|
|
|
|
// fprintf (stderr, "Adding %d, %s\n", j, yes_no[i]); |
245
|
401
|
|
|
|
|
|
j++; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
4
|
50
|
|
|
|
|
if (j != n_attr) { |
249
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: inconsistency between expected number of attributes %d and stored number %d\n", |
250
|
|
|
|
|
|
|
__FILE__, __LINE__, n_attr, j); |
251
|
|
|
|
|
|
|
} |
252
|
4
|
|
|
|
|
|
return html_valid_ok; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
html_valid_status_t |
256
|
1
|
|
|
|
|
|
html_valid_all_attributes (AV * av) |
257
|
1
|
|
|
|
|
|
{ |
258
|
1
|
|
|
|
|
|
const char * yes_no[n_attributes]; |
259
|
|
|
|
|
|
|
int i; |
260
|
1
|
|
|
|
|
|
TagAllAttributes (yes_no); |
261
|
1
|
50
|
|
|
|
|
if (av_len (av) != -1) { |
262
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: unexpected non-empty array with %d elements", |
263
|
0
|
|
|
|
|
|
__FILE__, __LINE__, (int) (av_len (av) + 1)); |
264
|
0
|
|
|
|
|
|
return html_valid_ok; |
265
|
|
|
|
|
|
|
} |
266
|
347
|
100
|
|
|
|
|
for (i = 0; i < n_attributes; i++) { |
267
|
|
|
|
|
|
|
SV * attribute; |
268
|
346
|
|
|
|
|
|
attribute = newSVpv (yes_no[i], strlen (yes_no[i])); |
269
|
346
|
|
|
|
|
|
av_push (av, attribute); |
270
|
|
|
|
|
|
|
} |
271
|
1
|
|
|
|
|
|
return html_valid_ok; |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
|