line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# This code can be redistributed and modified under the terms of the GNU |
2
|
|
|
|
|
|
|
# General Public License as published by the Free Software Foundation, either |
3
|
|
|
|
|
|
|
# version 3 of the License, or (at your option) any later version. |
4
|
|
|
|
|
|
|
# See the "COPYING" file for details. |
5
|
|
|
|
|
|
|
package HTML::Blitz::Parser; |
6
|
11
|
|
|
11
|
|
81
|
use HTML::Blitz::pragma; |
|
11
|
|
|
|
|
22
|
|
|
11
|
|
|
|
|
74
|
|
7
|
11
|
|
|
11
|
|
9004
|
use HTML::Blitz::ParseError (); |
|
11
|
|
|
|
|
31
|
|
|
11
|
|
|
|
|
415
|
|
8
|
11
|
|
|
|
|
2618
|
use HTML::Blitz::TokenType qw( |
9
|
|
|
|
|
|
|
TT_TAG_OPEN |
10
|
|
|
|
|
|
|
TT_TAG_CLOSE |
11
|
|
|
|
|
|
|
TT_TEXT |
12
|
|
|
|
|
|
|
TT_COMMENT |
13
|
|
|
|
|
|
|
TT_DOCTYPE |
14
|
11
|
|
|
11
|
|
4507
|
); |
|
11
|
|
|
|
|
27
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.09'; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
method _fail( |
19
|
|
|
|
|
|
|
$msg, |
20
|
8
|
|
|
|
|
21
|
:$pos = pos(${$self->{src_ref}}), |
21
|
|
|
|
|
|
|
:$width = 1, |
22
|
|
|
|
|
|
|
:$alt_msg = undef, |
23
|
|
|
|
|
|
|
:$alt_pos = undef, |
24
|
|
|
|
|
|
|
:$alt_width = 1, |
25
|
28
|
50
|
66
|
28
|
|
87
|
) { |
|
28
|
50
|
|
|
|
158
|
|
|
28
|
100
|
|
|
|
45
|
|
|
28
|
100
|
|
|
|
97
|
|
|
28
|
100
|
|
|
|
96
|
|
|
28
|
50
|
|
|
|
88
|
|
|
28
|
|
|
|
|
53
|
|
|
28
|
|
|
|
|
46
|
|
|
28
|
|
|
|
|
63
|
|
|
28
|
|
|
|
|
80
|
|
|
28
|
|
|
|
|
36
|
|
26
|
|
|
|
|
|
|
die HTML::Blitz::ParseError->new( |
27
|
|
|
|
|
|
|
src_name => $self->{src_name}, |
28
|
|
|
|
|
|
|
src_ref => $self->{src_ref}, |
29
|
28
|
|
|
|
|
161
|
msg => $msg, |
30
|
|
|
|
|
|
|
pos => $pos, |
31
|
|
|
|
|
|
|
width => $width, |
32
|
|
|
|
|
|
|
alt_msg => $alt_msg, |
33
|
|
|
|
|
|
|
alt_pos => $alt_pos, |
34
|
|
|
|
|
|
|
alt_width => $alt_width, |
35
|
|
|
|
|
|
|
) |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
11
|
50
|
|
11
|
0
|
32
|
method throw_for($token, $msg) { |
|
11
|
50
|
|
|
|
25
|
|
|
11
|
|
|
|
|
18
|
|
|
11
|
|
|
|
|
20
|
|
|
11
|
|
|
|
|
19
|
|
39
|
11
|
|
|
|
|
20
|
my $type = $token->{type}; |
40
|
|
|
|
|
|
|
$self->_fail( |
41
|
|
|
|
|
|
|
$msg, |
42
|
|
|
|
|
|
|
pos => $token->{pos}, |
43
|
|
|
|
|
|
|
$type eq TT_TAG_OPEN || $type eq TT_TAG_CLOSE |
44
|
|
|
|
|
|
|
? (width => 1 + ($type eq TT_TAG_CLOSE) + length $token->{name}) |
45
|
11
|
100
|
66
|
|
|
63
|
: (), |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
276
|
50
|
|
276
|
0
|
622
|
method new($class: $src_name, $src) { |
|
276
|
50
|
|
|
|
676
|
|
|
276
|
|
|
|
|
407
|
|
|
276
|
|
|
|
|
518
|
|
|
276
|
|
|
|
|
682
|
|
50
|
276
|
|
|
|
|
1012
|
my $self = bless { |
51
|
|
|
|
|
|
|
src_name => $src_name, |
52
|
|
|
|
|
|
|
src_ref => \$src, |
53
|
|
|
|
|
|
|
tag_stack => [], |
54
|
|
|
|
|
|
|
in_foreign_elem => 0, |
55
|
|
|
|
|
|
|
}, $class; |
56
|
|
|
|
|
|
|
|
57
|
276
|
|
|
|
|
869
|
$src =~ s/\r\n?/\n/g; # normalize newlines |
58
|
276
|
50
|
|
|
|
1257
|
$src =~ /([\x{d800}-\x{dfff}])/ |
59
|
|
|
|
|
|
|
and $self->_fail(sprintf("surrogate codepoint U+%04X in input", ord $1), pos => $-[1]); |
60
|
276
|
50
|
|
|
|
22287
|
$src =~ /( |
61
|
|
|
|
|
|
|
[\x{fdd0}-\x{fdef}] |
62
|
|
|
|
|
|
|
| [\x{fffe}\x{ffff}] |
63
|
|
|
|
|
|
|
| [\x{1fffe}\x{1ffff}] |
64
|
|
|
|
|
|
|
| [\x{2fffe}\x{2ffff}] |
65
|
|
|
|
|
|
|
| [\x{3fffe}\x{3ffff}] |
66
|
|
|
|
|
|
|
| [\x{4fffe}\x{4ffff}] |
67
|
|
|
|
|
|
|
| [\x{5fffe}\x{5ffff}] |
68
|
|
|
|
|
|
|
| [\x{6fffe}\x{6ffff}] |
69
|
|
|
|
|
|
|
| [\x{7fffe}\x{7ffff}] |
70
|
|
|
|
|
|
|
| [\x{8fffe}\x{8ffff}] |
71
|
|
|
|
|
|
|
| [\x{9fffe}\x{9ffff}] |
72
|
|
|
|
|
|
|
| [\x{afffe}\x{affff}] |
73
|
|
|
|
|
|
|
| [\x{bfffe}\x{bffff}] |
74
|
|
|
|
|
|
|
| [\x{cfffe}\x{cffff}] |
75
|
|
|
|
|
|
|
| [\x{dfffe}\x{dffff}] |
76
|
|
|
|
|
|
|
| [\x{efffe}\x{effff}] |
77
|
|
|
|
|
|
|
| [\x{ffffe}\x{fffff}] |
78
|
|
|
|
|
|
|
| [\x{10fffe}\x{10ffff}] |
79
|
|
|
|
|
|
|
)/x and $self->_fail(sprintf("non-character codepoint U+%04X in input", ord $1), pos => $-[1]); |
80
|
276
|
50
|
|
|
|
1021
|
$src =~ /((?![ \t\n\f])[\x00-\x1f\x7f-\x9f])/ |
81
|
|
|
|
|
|
|
and $self->_fail(sprintf("control character U+%04X in input", ord $1), pos => $-[1]); |
82
|
|
|
|
|
|
|
|
83
|
276
|
|
|
|
|
917
|
pos($src) = 0; |
84
|
|
|
|
|
|
|
|
85
|
276
|
|
|
|
|
888
|
$self |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my %entities; |
89
|
|
|
|
|
|
|
{ |
90
|
|
|
|
|
|
|
while (my $line = readline DATA) { |
91
|
|
|
|
|
|
|
chomp $line; |
92
|
|
|
|
|
|
|
my ($name, $value) = $line =~ /^(\w+) (\d+(?:,\d+)*)\z/a |
93
|
|
|
|
|
|
|
or die "Internal error: malformed entitiy definition '$line'"; |
94
|
|
|
|
|
|
|
$value =~ s/(\d+),?/chr $1/aeg; |
95
|
|
|
|
|
|
|
$entities{$name} = $value; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
close DATA; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
my %void_tags = map +($_ => 1), qw( |
101
|
|
|
|
|
|
|
area |
102
|
|
|
|
|
|
|
base br |
103
|
|
|
|
|
|
|
col |
104
|
|
|
|
|
|
|
embed |
105
|
|
|
|
|
|
|
hr |
106
|
|
|
|
|
|
|
img input |
107
|
|
|
|
|
|
|
link |
108
|
|
|
|
|
|
|
meta |
109
|
|
|
|
|
|
|
source |
110
|
|
|
|
|
|
|
track |
111
|
|
|
|
|
|
|
wbr |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
basefont bgsound |
114
|
|
|
|
|
|
|
frame |
115
|
|
|
|
|
|
|
keygen |
116
|
|
|
|
|
|
|
param |
117
|
|
|
|
|
|
|
); |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
my %foreign_tags = map +($_ => 1), qw( |
120
|
|
|
|
|
|
|
math |
121
|
|
|
|
|
|
|
svg |
122
|
|
|
|
|
|
|
); |
123
|
|
|
|
|
|
|
|
124
|
3640
|
50
|
|
3640
|
|
6712
|
method _consume_entity_maybe($chunk) { |
|
3640
|
50
|
|
|
|
6151
|
|
|
3640
|
|
|
|
|
5243
|
|
|
3640
|
|
|
|
|
7999
|
|
|
3640
|
|
|
|
|
4539
|
|
125
|
3640
|
100
|
|
|
|
17561
|
$chunk eq '&' or return $chunk; |
126
|
|
|
|
|
|
|
|
127
|
163
|
|
|
|
|
256
|
my $src_ref = $self->{src_ref}; |
128
|
163
|
|
|
|
|
205
|
my $char; |
129
|
|
|
|
|
|
|
|
130
|
163
|
100
|
|
|
|
376
|
if ($$src_ref =~ /\G#/gc) { |
131
|
30
|
100
|
|
|
|
67
|
if ($$src_ref =~ /\G[xX]/gc) { |
132
|
18
|
50
|
|
|
|
47
|
$$src_ref =~ /\G([[:xdigit:]]+)/gc |
133
|
|
|
|
|
|
|
or $self->_fail("missing hex digits after ''"); |
134
|
18
|
|
|
|
|
53
|
$char = chr hex $1; |
135
|
|
|
|
|
|
|
} else { |
136
|
12
|
50
|
|
|
|
36
|
$$src_ref =~ /\G(\d+)/agc |
137
|
|
|
|
|
|
|
or $self->_fail("missing digits after ''"); |
138
|
12
|
|
|
|
|
48
|
$char = chr $1; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} else { |
141
|
133
|
50
|
|
|
|
403
|
$$src_ref =~ /\G(\w+)/agc |
142
|
|
|
|
|
|
|
or $self->_fail("missing character name after '&'"); |
143
|
133
|
|
33
|
|
|
427
|
$char = $entities{$1} |
144
|
|
|
|
|
|
|
// $self->_fail("invalid character reference '$1' after '&'", pos => $-[1], width => length $1 ); |
145
|
|
|
|
|
|
|
} |
146
|
163
|
50
|
|
|
|
483
|
$$src_ref =~ /\G;/gc |
147
|
|
|
|
|
|
|
or $self->_fail("missing ';' after character reference"); |
148
|
|
|
|
|
|
|
|
149
|
163
|
|
|
|
|
624
|
$char |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
2172
|
50
|
|
2172
|
0
|
4147
|
method current_tag() { |
|
2172
|
50
|
|
|
|
3856
|
|
|
2172
|
|
|
|
|
3071
|
|
|
2172
|
|
|
|
|
2683
|
|
153
|
2172
|
|
|
|
|
3053
|
my $tag_stack = $self->{tag_stack}; |
154
|
2172
|
100
|
|
|
|
6275
|
@$tag_stack ? $tag_stack->[-1][0] : '' |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
5653
|
50
|
|
5653
|
0
|
11152
|
method parse() { |
|
5653
|
50
|
|
|
|
9511
|
|
|
5653
|
|
|
|
|
7562
|
|
|
5653
|
|
|
|
|
6783
|
|
158
|
5653
|
|
|
|
|
8238
|
my $src_ref = $self->{src_ref}; |
159
|
5653
|
|
|
|
|
7694
|
my $tag_stack = $self->{tag_stack}; |
160
|
|
|
|
|
|
|
|
161
|
5653
|
100
|
|
|
|
10937
|
my $cur_tag = @$tag_stack ? $tag_stack->[-1][0] : ''; |
162
|
|
|
|
|
|
|
|
163
|
5653
|
100
|
|
|
|
14691
|
if ($$src_ref =~ /\G\z/) { |
164
|
238
|
50
|
|
|
|
515
|
length $cur_tag |
165
|
|
|
|
|
|
|
and $self->_fail("unclosed '<$cur_tag>' tag", pos => $tag_stack->[-1][1], width => 1 + length($cur_tag)); |
166
|
238
|
|
|
|
|
757
|
return undef; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
5415
|
|
|
|
|
8696
|
my $pos = pos $$src_ref; |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
{ |
172
|
5415
|
|
|
|
|
6912
|
my $text = ''; |
|
5415
|
|
|
|
|
7223
|
|
173
|
|
|
|
|
|
|
|
174
|
5415
|
100
|
|
|
|
13480
|
if ($cur_tag eq 'script') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
175
|
202
|
50
|
|
1
|
|
736
|
my $err = fun () { $self->_fail("unclosed '<$cur_tag>' tag", pos => $tag_stack->[-1][1], width => 1 + length($cur_tag)) }; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
176
|
|
|
|
|
|
|
SCRIPT_DATA: { |
177
|
202
|
50
|
|
|
|
322
|
$$src_ref =~ m{ ( ) | < (/?) script [ \t\n\f/>] }xaaigc or $err->(); |
|
12
|
|
|
|
|
83
|
|
182
|
11
|
|
|
|
|
29
|
$match_start = $-[0]; |
183
|
11
|
100
|
|
|
|
35
|
if ($1) { |
184
|
2
|
|
|
|
|
7
|
redo SCRIPT_DATA; |
185
|
|
|
|
|
|
|
} |
186
|
9
|
100
|
|
|
|
30
|
if (!$2) { |
187
|
6
|
50
|
|
|
|
23
|
$$src_ref =~ m{ (-->) | ] }xaaigc or $err->(); |
188
|
6
|
100
|
|
|
|
19
|
if ($1) { |
189
|
3
|
|
|
|
|
8
|
redo SCRIPT_DATA; |
190
|
|
|
|
|
|
|
} |
191
|
3
|
|
|
|
|
5
|
redo SCRIPT_DATA_ESCAPED; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
201
|
|
|
|
|
489
|
pos($$src_ref) = $match_start; |
196
|
|
|
|
|
|
|
} |
197
|
201
|
|
|
|
|
1023
|
$text = substr $$src_ref, $pos, pos($$src_ref) - $pos; |
198
|
|
|
|
|
|
|
} elsif ($cur_tag eq 'style') { |
199
|
140
|
100
|
|
|
|
557
|
if ($$src_ref =~ m{\G ( (?: (?! (?aai: style ) [ \t\n\f/>] ) . )+ ) }xsgc) { |
200
|
70
|
|
|
|
|
170
|
$text = $1; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
} elsif ($cur_tag eq 'title') { |
203
|
188
|
|
|
|
|
821
|
while ($$src_ref =~ m{\G ( (?: (?! (?aai: title ) [ \t\n\f/>] ) [^&] )+ | & ) }xgc) { |
204
|
94
|
|
|
|
|
230
|
$text .= $self->_consume_entity_maybe($1); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} elsif ($cur_tag eq 'textarea') { |
207
|
0
|
|
|
|
|
0
|
while ($$src_ref =~ m{\G ( (?: (?! (?aai: textarea ) [ \t\n\f/>] ) [^&] )+ | & ) }xgc) { |
208
|
0
|
|
|
|
|
0
|
$text .= $self->_consume_entity_maybe($1); |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
} else { |
211
|
4885
|
|
|
|
|
13943
|
while ($$src_ref =~ /\G ( [^<&]+ | & )/xgc) { |
212
|
2441
|
|
|
|
|
4817
|
$text .= $self->_consume_entity_maybe($1); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
5414
|
100
|
|
|
|
11666
|
if (length $text) { |
217
|
|
|
|
|
|
|
return { |
218
|
2415
|
|
|
|
|
10999
|
type => TT_TEXT, |
219
|
|
|
|
|
|
|
pos => $pos, |
220
|
|
|
|
|
|
|
content => $text, |
221
|
|
|
|
|
|
|
}; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
2999
|
50
|
|
|
|
8006
|
if ($$src_ref =~ /\G
|
226
|
2999
|
100
|
|
|
|
6327
|
if ($$src_ref =~ /\G!/gc) { |
227
|
59
|
100
|
|
|
|
159
|
if ($$src_ref =~ /\G--/gc) { |
228
|
47
|
100
|
|
|
|
119
|
if ($$src_ref =~ /\G(-?>)/) { |
229
|
2
|
|
|
|
|
10
|
$self->_fail("improperly closed comment", width => length($1)); |
230
|
|
|
|
|
|
|
} |
231
|
45
|
100
|
|
|
|
206
|
$$src_ref =~ /\G(.*?)(?|--!?>)/sgc |
232
|
|
|
|
|
|
|
or $self->_fail("unterminated comment", pos => $pos, width => 4); |
233
|
44
|
|
|
|
|
152
|
my ($text, $closer) = ($1, $2); |
234
|
44
|
100
|
|
|
|
109
|
if ($closer eq '') { |
245
|
1
|
|
|
|
|
7
|
$self->_fail( |
246
|
|
|
|
|
|
|
"improperly closed comment (should be '-->')", |
247
|
|
|
|
|
|
|
pos => $-[2], |
248
|
|
|
|
|
|
|
width => length($closer), |
249
|
|
|
|
|
|
|
alt_msg => "comment starting here", |
250
|
|
|
|
|
|
|
alt_pos => $pos, |
251
|
|
|
|
|
|
|
alt_width => 4, |
252
|
|
|
|
|
|
|
); |
253
|
|
|
|
|
|
|
} |
254
|
42
|
100
|
|
|
|
78
|
if ($closer eq '') { |
255
|
2
|
|
|
|
|
4
|
$text .= '
|
256
|
2
|
|
|
|
|
5
|
$closer = '-->'; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
return { |
259
|
42
|
|
|
|
|
195
|
type => TT_COMMENT, |
260
|
|
|
|
|
|
|
pos => $pos, |
261
|
|
|
|
|
|
|
content => $text, |
262
|
|
|
|
|
|
|
}; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
12
|
100
|
|
|
|
42
|
if ($$src_ref =~ /\Gdoctype/aaigc) { |
266
|
10
|
50
|
|
|
|
30
|
$$src_ref =~ /\G[ \t\n\f]+/gc |
267
|
|
|
|
|
|
|
or $self->_fail("missing whitespace after '
|
268
|
10
|
50
|
|
|
|
37
|
$$src_ref =~ /\Ghtml/aaigc |
269
|
|
|
|
|
|
|
or $self->_fail("invalid non-html doctype"); |
270
|
10
|
50
|
|
|
|
69
|
$$src_ref =~ /\G[ \t\n\f]*>/gc |
271
|
|
|
|
|
|
|
or $self->_fail("missing '>' after '
|
272
|
|
|
|
|
|
|
return { |
273
|
10
|
|
|
|
|
72
|
type => TT_DOCTYPE, |
274
|
|
|
|
|
|
|
pos => $pos, |
275
|
|
|
|
|
|
|
}; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
2
|
100
|
66
|
|
|
13
|
if ($self->{in_foreign_elem} && $$src_ref =~ /\G\[CDATA\[/gc) { |
279
|
1
|
|
|
|
|
5
|
my $text_start = $+[0]; |
280
|
1
|
50
|
|
|
|
6
|
$$src_ref =~ /\]\]>/gc or $self->_fail("missing ']]>' after '
|
281
|
1
|
|
|
|
|
3
|
my $text_end = $-[0]; |
282
|
|
|
|
|
|
|
return { |
283
|
1
|
|
|
|
|
8
|
type => TT_TEXT, |
284
|
|
|
|
|
|
|
pos => $text_start, |
285
|
|
|
|
|
|
|
content => substr($$src_ref, $text_start, $text_end - $text_start), |
286
|
|
|
|
|
|
|
}; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
1
|
|
|
|
|
5
|
$self->_fail("invalid declaration (should be '--' or 'DOCTYPE')"); |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
2940
|
|
|
|
|
5284
|
my $closing = $$src_ref =~ m{\G/}gc; |
293
|
|
|
|
|
|
|
|
294
|
2940
|
50
|
|
|
|
7546
|
$$src_ref =~ m{\G([a-zA-Z][^\s/>[:cntrl:]]*)}gc |
295
|
|
|
|
|
|
|
or $self->_fail("invalid tag name"); |
296
|
2940
|
|
|
|
|
7914
|
(my $name = $1) =~ tr/A-Z/a-z/; |
297
|
|
|
|
|
|
|
|
298
|
2940
|
|
|
|
|
5999
|
$$src_ref =~ /\G[ \t\n\f]+/gc; |
299
|
|
|
|
|
|
|
|
300
|
2940
|
|
|
|
|
4807
|
my (%attrs, %attr_pos); |
301
|
2940
|
|
|
|
|
6918
|
while ($$src_ref =~ m{\G([^\s/>="'<[:cntrl:]]+)}gc) { |
302
|
1110
|
|
|
|
|
3291
|
my $apos = $-[1]; |
303
|
1110
|
100
|
|
|
|
2559
|
if ($closing) { |
304
|
1
|
|
|
|
|
9
|
$self->_fail("invalid attribute in end tag '$name>'", pos => $apos, width => length $1); |
305
|
|
|
|
|
|
|
} |
306
|
1109
|
|
|
|
|
2197
|
(my $attr_name = $1) =~ tr/A-Z/a-z/; |
307
|
|
|
|
|
|
|
|
308
|
1109
|
100
|
|
|
|
2240
|
if (exists $attrs{$attr_name}) { |
309
|
1
|
|
|
|
|
10
|
$self->_fail("duplicate attribute '$attr_name' in '<$name>' tag", pos => $apos, width => length($attr_name), alt_msg => "first defined here", alt_pos => $attr_pos{$attr_name}, alt_width => length($attr_name)); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
1108
|
|
|
|
|
2203
|
$$src_ref =~ /\G[ \t\n\f]+/gc; |
313
|
|
|
|
|
|
|
|
314
|
1108
|
|
|
|
|
1669
|
my $attr_value = ''; |
315
|
1108
|
100
|
|
|
|
2819
|
if ($$src_ref =~ /\G=[ \t\n\f]*/gc) { |
316
|
1101
|
100
|
|
|
|
3534
|
if ($$src_ref =~ /\G"/gc) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
317
|
341
|
|
|
|
|
915
|
my $qpos = $-[0]; |
318
|
341
|
|
|
|
|
609
|
my $text = ''; |
319
|
341
|
|
|
|
|
971
|
while ($$src_ref =~ /\G ( [^"&]+ | & ) /xgc) { |
320
|
340
|
|
|
|
|
685
|
$text .= $self->_consume_entity_maybe($1); |
321
|
|
|
|
|
|
|
} |
322
|
341
|
50
|
|
|
|
947
|
$$src_ref =~ /\G"/gc |
323
|
|
|
|
|
|
|
or $self->_fail(q{missing '"' after attribute value}, alt_msg => 'starting here', alt_pos => $qpos); |
324
|
341
|
50
|
|
|
|
863
|
$$src_ref =~ m{\G[^ \t\n\f/>]} |
325
|
|
|
|
|
|
|
and $self->_fail('missing whitespace after attribute value'); |
326
|
341
|
|
|
|
|
583
|
$attr_value = $text; |
327
|
|
|
|
|
|
|
} elsif ($$src_ref =~ /\G'/gc) { |
328
|
8
|
|
|
|
|
44
|
my $qpos = $-[0]; |
329
|
8
|
|
|
|
|
17
|
my $text = ''; |
330
|
8
|
|
|
|
|
32
|
while ($$src_ref =~ /\G ( [^'&]+ | & ) /xgc) { |
331
|
14
|
|
|
|
|
80
|
$text .= $self->_consume_entity_maybe($1); |
332
|
|
|
|
|
|
|
} |
333
|
8
|
100
|
|
|
|
33
|
$$src_ref =~ /\G'/gc |
334
|
|
|
|
|
|
|
or $self->_fail(q{missing "'" after attribute value}, alt_msg => 'starting here', alt_pos => $qpos); |
335
|
7
|
100
|
|
|
|
28
|
$$src_ref =~ m{\G[^ \t\n\f/>]} |
336
|
|
|
|
|
|
|
and $self->_fail('missing whitespace after attribute value'); |
337
|
6
|
|
|
|
|
12
|
$attr_value = $text; |
338
|
|
|
|
|
|
|
} elsif ($$src_ref =~ /\G ( [^ \t\n\f&>"'<=`]+ | & )/xgc) { |
339
|
751
|
|
|
|
|
1134
|
my $text = ''; |
340
|
751
|
|
|
|
|
921
|
do { |
341
|
751
|
|
|
|
|
1620
|
$text .= $self->_consume_entity_maybe($1); |
342
|
|
|
|
|
|
|
} while $$src_ref =~ /\G ( [^ \t\n\f&>"'<=`]+ | & )/xgc; |
343
|
751
|
|
|
|
|
1421
|
$attr_value = $text; |
344
|
|
|
|
|
|
|
} else { |
345
|
1
|
|
|
|
|
4
|
$self->_fail("missing attribute value after '='"); |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
1105
|
|
|
|
|
2522
|
$attrs{$attr_name} = $attr_value; |
350
|
1105
|
|
|
|
|
1844
|
$attr_pos{$attr_name} = $apos; |
351
|
|
|
|
|
|
|
|
352
|
1105
|
|
|
|
|
3453
|
$$src_ref =~ /\G[ \t\n\f]+/gc; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
2935
|
100
|
|
|
|
8201
|
$$src_ref =~ m{\G(/?)>}gc |
356
|
|
|
|
|
|
|
or $self->_fail("missing '>' at end of tag", alt_msg => 'starting here', alt_pos => $pos, alt_width => 1 + $closing + length($name)); |
357
|
2933
|
|
|
|
|
5480
|
my $is_self_closing = length $1; |
358
|
|
|
|
|
|
|
|
359
|
2933
|
100
|
|
|
|
5242
|
if ($closing) { |
360
|
1354
|
50
|
|
|
|
2447
|
$is_self_closing and $self->_fail("invalid '/' at end of closing tag '$name>'", pos => $-[1]); |
361
|
1354
|
100
|
|
|
|
2476
|
@$tag_stack |
362
|
|
|
|
|
|
|
or $self->_fail("closing tag '$name>' has no corresponding open tag", pos => $pos, width => 1 + 1 + length($name)); |
363
|
|
|
|
|
|
|
|
364
|
1352
|
50
|
|
|
|
2492
|
$cur_tag eq $name |
365
|
|
|
|
|
|
|
or $self->_fail("closing tag '$name>' does not match current open tag '<$cur_tag>'", pos => $pos, width => 1 + 1 + length($name), alt_msg => 'starting here', alt_pos => $tag_stack->[-1][1], alt_width => 1 + length($cur_tag)); |
366
|
|
|
|
|
|
|
|
367
|
1352
|
100
|
|
|
|
3001
|
if ($foreign_tags{$cur_tag}) { |
368
|
2
|
|
|
|
|
4
|
$self->{in_foreign_elem}--; |
369
|
|
|
|
|
|
|
} |
370
|
1352
|
|
|
|
|
2047
|
pop @$tag_stack; |
371
|
|
|
|
|
|
|
return { |
372
|
1352
|
|
|
|
|
7103
|
type => TT_TAG_CLOSE, |
373
|
|
|
|
|
|
|
pos => $pos, |
374
|
|
|
|
|
|
|
name => $name, |
375
|
|
|
|
|
|
|
}; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
1579
|
|
|
|
|
2860
|
my $is_void = $void_tags{$name}; |
379
|
1579
|
100
|
100
|
|
|
3168
|
if ($is_self_closing && !$is_void && !$foreign_tags{$name} && !$self->{in_foreign_elem}) { |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
380
|
1
|
|
|
|
|
9
|
$self->_fail("invalid '/' at end of non-void tag '<$name>'", pos => $-[1], alt_msg => 'starting here', alt_pos => $pos, alt_width => 1 + length($name)); |
381
|
|
|
|
|
|
|
} |
382
|
1578
|
|
100
|
|
|
5750
|
$is_self_closing ||= $is_void; |
383
|
|
|
|
|
|
|
|
384
|
1578
|
100
|
|
|
|
2859
|
if (!$is_self_closing) { |
385
|
1383
|
|
|
|
|
1841
|
push @{$self->{tag_stack}}, [$name, $pos]; |
|
1383
|
|
|
|
|
3871
|
|
386
|
1383
|
100
|
|
|
|
3254
|
if ($foreign_tags{$name}) { |
387
|
2
|
|
|
|
|
4
|
$self->{in_foreign_elem}++; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
return { |
392
|
1578
|
|
|
|
|
9804
|
type => TT_TAG_OPEN, |
393
|
|
|
|
|
|
|
pos => $pos, |
394
|
|
|
|
|
|
|
name => $name, |
395
|
|
|
|
|
|
|
attrs => \%attrs, |
396
|
|
|
|
|
|
|
is_void => $is_void, |
397
|
|
|
|
|
|
|
is_self_closing => $is_self_closing, |
398
|
|
|
|
|
|
|
}; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
# uncoverable statement |
402
|
0
|
|
|
|
|
|
die "Internal error: unparsable input '${\substr $$src_ref, pos($$src_ref), 10}'..."; |
|
0
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
1 |
406
|
|
|
|
|
|
|
__DATA__ |