| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
typedef struct { |
|
2
|
|
|
|
|
|
|
TidyDoc tdoc; |
|
3
|
|
|
|
|
|
|
int n_mallocs; |
|
4
|
|
|
|
|
|
|
} |
|
5
|
|
|
|
|
|
|
html_valid_t; |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
typedef enum html_valid_status { |
|
8
|
|
|
|
|
|
|
html_valid_ok, |
|
9
|
|
|
|
|
|
|
/* Malloc or calloc failed. */ |
|
10
|
|
|
|
|
|
|
html_valid_memory_failure, |
|
11
|
|
|
|
|
|
|
/* An upstream error from the library. */ |
|
12
|
|
|
|
|
|
|
html_valid_tidy_error, |
|
13
|
|
|
|
|
|
|
html_valid_inconsistency, |
|
14
|
|
|
|
|
|
|
html_valid_unknown_option, |
|
15
|
|
|
|
|
|
|
html_valid_bad_option_type, |
|
16
|
|
|
|
|
|
|
html_valid_undefined_option, |
|
17
|
|
|
|
|
|
|
html_valid_non_numerical_option, |
|
18
|
|
|
|
|
|
|
} |
|
19
|
|
|
|
|
|
|
html_valid_status_t; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
static html_valid_status_t |
|
22
|
4
|
|
|
|
|
|
html_valid_create (html_valid_t * htv) |
|
23
|
|
|
|
|
|
|
{ |
|
24
|
4
|
|
|
|
|
|
htv->tdoc = tidyCreate (); |
|
25
|
4
|
|
|
|
|
|
htv->n_mallocs++; |
|
26
|
4
|
|
|
|
|
|
return html_valid_ok; |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
#define CALL(x) { \ |
|
30
|
|
|
|
|
|
|
html_valid_status_t status = \ |
|
31
|
|
|
|
|
|
|
html_valid_ ## x; \ |
|
32
|
|
|
|
|
|
|
if (status != html_valid_ok) { \ |
|
33
|
|
|
|
|
|
|
return status; \ |
|
34
|
|
|
|
|
|
|
} \ |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#define CALL_TIDY(x) { \ |
|
38
|
|
|
|
|
|
|
int rc; \ |
|
39
|
|
|
|
|
|
|
rc = x; \ |
|
40
|
|
|
|
|
|
|
if (rc < 0) { \ |
|
41
|
|
|
|
|
|
|
warn ("Error %d from tidy library", rc); \ |
|
42
|
|
|
|
|
|
|
return html_valid_tidy_error; \ |
|
43
|
|
|
|
|
|
|
} \ |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#define CHECK_INIT(htv) { \ |
|
48
|
|
|
|
|
|
|
if (! htv->tdoc) { \ |
|
49
|
|
|
|
|
|
|
warn ("Uninitialized TidyDoc"); \ |
|
50
|
|
|
|
|
|
|
return html_valid_inconsistency; \ |
|
51
|
|
|
|
|
|
|
} \ |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
static html_valid_status_t |
|
55
|
3
|
|
|
|
|
|
html_valid_run (html_valid_t * htv, SV * html, |
|
56
|
|
|
|
|
|
|
SV ** output_ptr, SV ** errors_ptr) |
|
57
|
|
|
|
|
|
|
{ |
|
58
|
|
|
|
|
|
|
const char * html_string; |
|
59
|
|
|
|
|
|
|
STRLEN html_length; |
|
60
|
|
|
|
|
|
|
SV * output; |
|
61
|
|
|
|
|
|
|
SV * errors; |
|
62
|
|
|
|
|
|
|
|
|
63
|
3
|
|
|
|
|
|
TidyBuffer tidy_output = {0}; |
|
64
|
3
|
|
|
|
|
|
TidyBuffer tidy_errbuf = {0}; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
/* First set these up sanely in case the stuff hits the fan. */ |
|
67
|
|
|
|
|
|
|
|
|
68
|
3
|
|
|
|
|
|
* output_ptr = & PL_sv_undef; |
|
69
|
3
|
|
|
|
|
|
* errors_ptr = & PL_sv_undef; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
/* Work around bug where allocator sometimes does not get set. */ |
|
72
|
|
|
|
|
|
|
|
|
73
|
3
|
|
|
|
|
|
CopyAllocator (htv->tdoc, & tidy_output); |
|
74
|
3
|
|
|
|
|
|
CopyAllocator (htv->tdoc, & tidy_errbuf); |
|
75
|
|
|
|
|
|
|
|
|
76
|
3
|
50
|
|
|
|
|
html_string = SvPV (html, html_length); |
|
77
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidySetErrorBuffer (htv->tdoc, & tidy_errbuf)); |
|
78
|
3
|
|
|
|
|
|
htv->n_mallocs++; |
|
79
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidyParseString (htv->tdoc, html_string)); |
|
80
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidyCleanAndRepair (htv->tdoc)); |
|
81
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidyRunDiagnostics (htv->tdoc)); |
|
82
|
3
|
50
|
|
|
|
|
CALL_TIDY (tidySaveBuffer (htv->tdoc, & tidy_output)); |
|
83
|
3
|
|
|
|
|
|
htv->n_mallocs++; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
/* Copy the contents of the buffers into the Perl scalars. */ |
|
86
|
|
|
|
|
|
|
|
|
87
|
3
|
|
|
|
|
|
output = newSVpv ((char *) tidy_output.bp, tidy_output.size); |
|
88
|
3
|
|
|
|
|
|
errors = newSVpv ((char *) tidy_errbuf.bp, tidy_errbuf.size); |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
/* HTML Tidy randomly segfaults here due to "allocator" not being |
|
91
|
|
|
|
|
|
|
set in some cases, hence the above CopyAllocator fix. */ |
|
92
|
|
|
|
|
|
|
|
|
93
|
3
|
|
|
|
|
|
tidyBufFree (& tidy_output); |
|
94
|
3
|
|
|
|
|
|
htv->n_mallocs--; |
|
95
|
3
|
|
|
|
|
|
tidyBufFree (& tidy_errbuf); |
|
96
|
3
|
|
|
|
|
|
htv->n_mallocs--; |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
/* These are not our mallocs, they are Perl's mallocs, so we don't |
|
99
|
|
|
|
|
|
|
increase htv->n_mallocs for these. After we return them, we no |
|
100
|
|
|
|
|
|
|
longer take care of these. */ |
|
101
|
|
|
|
|
|
|
|
|
102
|
3
|
|
|
|
|
|
* output_ptr = output; |
|
103
|
3
|
|
|
|
|
|
* errors_ptr = errors; |
|
104
|
3
|
|
|
|
|
|
return html_valid_ok; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
static html_valid_status_t |
|
108
|
1
|
|
|
|
|
|
html_valid_set_string_option (html_valid_t * htv, const char * coption, |
|
109
|
|
|
|
|
|
|
TidyOptionId ti, SV * value) |
|
110
|
|
|
|
|
|
|
{ |
|
111
|
|
|
|
|
|
|
const char * cvalue; |
|
112
|
|
|
|
|
|
|
STRLEN cvalue_length; |
|
113
|
1
|
50
|
|
|
|
|
if (! SvOK (value)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
114
|
0
|
|
|
|
|
|
warn ("cannot set option '%s' to undefined value", |
|
115
|
|
|
|
|
|
|
coption); |
|
116
|
0
|
|
|
|
|
|
return html_valid_undefined_option; |
|
117
|
|
|
|
|
|
|
} |
|
118
|
1
|
50
|
|
|
|
|
cvalue = SvPV (value, cvalue_length); |
|
119
|
1
|
50
|
|
|
|
|
if (! tidyOptSetValue (htv->tdoc, ti, cvalue)) { |
|
120
|
0
|
|
|
|
|
|
warn ("Setting option %d to %s failed", ti, cvalue); |
|
121
|
0
|
|
|
|
|
|
return html_valid_tidy_error; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
1
|
|
|
|
|
|
return html_valid_ok; |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
static html_valid_status_t |
|
127
|
2
|
|
|
|
|
|
html_valid_set_number_option (html_valid_t * htv, const char * coption, |
|
128
|
|
|
|
|
|
|
TidyOptionId ti, SV * value) |
|
129
|
|
|
|
|
|
|
{ |
|
130
|
|
|
|
|
|
|
int cvalue; |
|
131
|
2
|
50
|
|
|
|
|
if (! SvOK (value)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
132
|
0
|
|
|
|
|
|
warn ("cannot set option '%s' to undefined value", |
|
133
|
|
|
|
|
|
|
coption); |
|
134
|
0
|
|
|
|
|
|
return html_valid_undefined_option; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
2
|
50
|
|
|
|
|
if (! looks_like_number (value)) { |
|
137
|
0
|
0
|
|
|
|
|
warn ("option %s expects a numerical value, but you supplied %s", |
|
138
|
0
|
|
|
|
|
|
coption, SvPV_nolen (value)); |
|
139
|
0
|
|
|
|
|
|
return html_valid_non_numerical_option; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
2
|
50
|
|
|
|
|
cvalue = SvIV (value); |
|
142
|
2
|
50
|
|
|
|
|
if (! tidyOptSetInt (htv->tdoc, ti, cvalue)) { |
|
143
|
0
|
|
|
|
|
|
warn ("Setting option %d to %d failed", ti, cvalue); |
|
144
|
0
|
|
|
|
|
|
return html_valid_tidy_error; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
2
|
|
|
|
|
|
return html_valid_ok; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
static html_valid_status_t |
|
150
|
6
|
|
|
|
|
|
html_valid_set_option (html_valid_t * htv, SV * option, SV * value) |
|
151
|
|
|
|
|
|
|
{ |
|
152
|
|
|
|
|
|
|
TidyOption to; |
|
153
|
|
|
|
|
|
|
TidyOptionType tot; |
|
154
|
|
|
|
|
|
|
TidyOptionId ti; |
|
155
|
|
|
|
|
|
|
const char * coption; |
|
156
|
|
|
|
|
|
|
STRLEN coption_length; |
|
157
|
6
|
50
|
|
|
|
|
CHECK_INIT (htv); |
|
158
|
6
|
50
|
|
|
|
|
coption = SvPV (option, coption_length); |
|
159
|
6
|
|
|
|
|
|
to = tidyGetOptionByName(htv->tdoc, coption); |
|
160
|
6
|
50
|
|
|
|
|
if (to == 0) { |
|
161
|
0
|
|
|
|
|
|
warn ("unknown option %s", coption); |
|
162
|
0
|
|
|
|
|
|
return html_valid_unknown_option; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
6
|
|
|
|
|
|
ti = tidyOptGetId (to); |
|
165
|
6
|
|
|
|
|
|
tot = tidyOptGetType (to); |
|
166
|
6
|
|
|
|
|
|
switch (tot) { |
|
167
|
|
|
|
|
|
|
case TidyString: |
|
168
|
1
|
50
|
|
|
|
|
CALL (set_string_option (htv, coption, ti, value)); |
|
169
|
1
|
|
|
|
|
|
break; |
|
170
|
|
|
|
|
|
|
case TidyInteger: |
|
171
|
2
|
50
|
|
|
|
|
CALL (set_number_option (htv, coption, ti, value)); |
|
172
|
2
|
|
|
|
|
|
break; |
|
173
|
|
|
|
|
|
|
case TidyBoolean: |
|
174
|
3
|
50
|
|
|
|
|
tidyOptSetBool (htv->tdoc, ti, SvTRUE (value)); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
175
|
3
|
|
|
|
|
|
break; |
|
176
|
|
|
|
|
|
|
default: |
|
177
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: bad option type %d from tidy library.\n", |
|
178
|
|
|
|
|
|
|
__FILE__, __LINE__, tot); |
|
179
|
0
|
|
|
|
|
|
return html_valid_bad_option_type; |
|
180
|
|
|
|
|
|
|
} |
|
181
|
6
|
|
|
|
|
|
return html_valid_ok; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
static html_valid_status_t |
|
185
|
4
|
|
|
|
|
|
html_valid_destroy (html_valid_t * htv) |
|
186
|
|
|
|
|
|
|
{ |
|
187
|
4
|
|
|
|
|
|
tidyRelease (htv->tdoc); |
|
188
|
4
|
|
|
|
|
|
htv->tdoc = 0; |
|
189
|
4
|
|
|
|
|
|
htv->n_mallocs--; |
|
190
|
4
|
|
|
|
|
|
return html_valid_ok; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
static html_valid_status_t |
|
194
|
2
|
|
|
|
|
|
html_valid_tag_information (HV * hv) |
|
195
|
2
|
|
|
|
|
|
{ |
|
196
|
|
|
|
|
|
|
int i; |
|
197
|
|
|
|
|
|
|
// n_html_tags is defined in html-tidy5.h as part of the "extra" |
|
198
|
|
|
|
|
|
|
// material. |
|
199
|
2
|
|
|
|
|
|
html_valid_tag_t tags[n_html_tags]; |
|
200
|
2
|
|
|
|
|
|
TagInformation (tags); |
|
201
|
308
|
100
|
|
|
|
|
for (i = 0; i < n_html_tags; i++) { |
|
202
|
|
|
|
|
|
|
int name_len; |
|
203
|
|
|
|
|
|
|
AV * constants; |
|
204
|
|
|
|
|
|
|
SV * constants_ref; |
|
205
|
306
|
|
|
|
|
|
constants = newAV (); |
|
206
|
|
|
|
|
|
|
// Store the ID for reverse lookup of attributes. |
|
207
|
306
|
|
|
|
|
|
av_push (constants, newSVuv (i)); |
|
208
|
306
|
|
|
|
|
|
av_push (constants, newSVuv (tags[i].versions)); |
|
209
|
306
|
|
|
|
|
|
av_push (constants, newSVuv (tags[i].model)); |
|
210
|
|
|
|
|
|
|
|
|
211
|
306
|
|
|
|
|
|
constants_ref = newRV_inc ((SV *) constants); |
|
212
|
306
|
|
|
|
|
|
name_len = strlen (tags[i].name); |
|
213
|
|
|
|
|
|
|
/* |
|
214
|
|
|
|
|
|
|
fprintf (stderr, "Storing %s (%d) into hash.\n", |
|
215
|
|
|
|
|
|
|
tags[i].name, name_len); |
|
216
|
|
|
|
|
|
|
*/ |
|
217
|
306
|
|
|
|
|
|
(void) hv_store (hv, tags[i].name, name_len, constants_ref, 0 /* no hash value */); |
|
218
|
|
|
|
|
|
|
} |
|
219
|
2
|
|
|
|
|
|
return html_valid_ok; |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
html_valid_status_t |
|
223
|
4
|
|
|
|
|
|
html_valid_tag_attr (AV * av, unsigned int tag_id, unsigned int version) |
|
224
|
4
|
|
|
|
|
|
{ |
|
225
|
4
|
|
|
|
|
|
const char * yes_no[n_attributes]; |
|
226
|
|
|
|
|
|
|
int i; |
|
227
|
|
|
|
|
|
|
int j; |
|
228
|
|
|
|
|
|
|
int n_attr; |
|
229
|
4
|
|
|
|
|
|
TagAttributes (tag_id, version, yes_no, & n_attr); |
|
230
|
4
|
50
|
|
|
|
|
if (av_len (av) != -1) { |
|
231
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: unexpected non-empty array with %d elements", |
|
232
|
0
|
|
|
|
|
|
__FILE__, __LINE__, (int) (av_len (av) + 1)); |
|
233
|
0
|
|
|
|
|
|
return html_valid_ok; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
4
|
50
|
|
|
|
|
if (n_attr == 0) { |
|
236
|
0
|
|
|
|
|
|
return html_valid_ok; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
4
|
|
|
|
|
|
j = 0; |
|
239
|
1388
|
100
|
|
|
|
|
for (i = 0; i < n_attributes; i++) { |
|
240
|
1384
|
100
|
|
|
|
|
if (yes_no[i]) { |
|
241
|
|
|
|
|
|
|
SV * attribute; |
|
242
|
401
|
|
|
|
|
|
attribute = newSVpv (yes_no[i], strlen (yes_no[i])); |
|
243
|
401
|
|
|
|
|
|
av_push (av, attribute); |
|
244
|
|
|
|
|
|
|
// fprintf (stderr, "Adding %d, %s\n", j, yes_no[i]); |
|
245
|
401
|
|
|
|
|
|
j++; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
} |
|
248
|
4
|
50
|
|
|
|
|
if (j != n_attr) { |
|
249
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: inconsistency between expected number of attributes %d and stored number %d\n", |
|
250
|
|
|
|
|
|
|
__FILE__, __LINE__, n_attr, j); |
|
251
|
|
|
|
|
|
|
} |
|
252
|
4
|
|
|
|
|
|
return html_valid_ok; |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
html_valid_status_t |
|
256
|
1
|
|
|
|
|
|
html_valid_all_attributes (AV * av) |
|
257
|
1
|
|
|
|
|
|
{ |
|
258
|
1
|
|
|
|
|
|
const char * yes_no[n_attributes]; |
|
259
|
|
|
|
|
|
|
int i; |
|
260
|
1
|
|
|
|
|
|
TagAllAttributes (yes_no); |
|
261
|
1
|
50
|
|
|
|
|
if (av_len (av) != -1) { |
|
262
|
0
|
|
|
|
|
|
fprintf (stderr, "%s:%d: unexpected non-empty array with %d elements", |
|
263
|
0
|
|
|
|
|
|
__FILE__, __LINE__, (int) (av_len (av) + 1)); |
|
264
|
0
|
|
|
|
|
|
return html_valid_ok; |
|
265
|
|
|
|
|
|
|
} |
|
266
|
347
|
100
|
|
|
|
|
for (i = 0; i < n_attributes; i++) { |
|
267
|
|
|
|
|
|
|
SV * attribute; |
|
268
|
346
|
|
|
|
|
|
attribute = newSVpv (yes_no[i], strlen (yes_no[i])); |
|
269
|
346
|
|
|
|
|
|
av_push (av, attribute); |
|
270
|
|
|
|
|
|
|
} |
|
271
|
1
|
|
|
|
|
|
return html_valid_ok; |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
|