| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::TreeBuilder; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# ABSTRACT: Parser that builds a HTML syntax tree |
|
4
|
|
|
|
|
|
|
|
|
5
|
20
|
|
|
20
|
|
263963
|
use 5.008; |
|
|
20
|
|
|
|
|
67
|
|
|
6
|
20
|
|
|
20
|
|
95
|
use warnings; |
|
|
20
|
|
|
|
|
39
|
|
|
|
20
|
|
|
|
|
489
|
|
|
7
|
20
|
|
|
20
|
|
92
|
use strict; |
|
|
20
|
|
|
|
|
47
|
|
|
|
20
|
|
|
|
|
360
|
|
|
8
|
20
|
|
|
20
|
|
7480
|
use integer; # vroom vroom! |
|
|
20
|
|
|
|
|
270
|
|
|
|
20
|
|
|
|
|
82
|
|
|
9
|
20
|
|
|
20
|
|
474
|
use Carp (); |
|
|
20
|
|
|
|
|
41
|
|
|
|
20
|
|
|
|
|
299
|
|
|
10
|
20
|
|
|
20
|
|
86
|
use Scalar::Util qw(openhandle); |
|
|
20
|
|
|
|
|
35
|
|
|
|
20
|
|
|
|
|
3106
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = '5.910'; # TRIAL VERSION from OurPkgVersion |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
#--------------------------------------------------------------------------- |
|
15
|
|
|
|
|
|
|
# Make a 'DEBUG' constant... |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our $DEBUG; # Must be set BEFORE loading this file |
|
18
|
|
|
|
|
|
|
BEGIN { |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# We used to have things like |
|
21
|
|
|
|
|
|
|
# print $indent, "lalala" if $Debug; |
|
22
|
|
|
|
|
|
|
# But there were an awful lot of having to evaluate $Debug's value. |
|
23
|
|
|
|
|
|
|
# If we make that depend on a constant, like so: |
|
24
|
|
|
|
|
|
|
# sub DEBUG () { 1 } # or whatever value. |
|
25
|
|
|
|
|
|
|
# ... |
|
26
|
|
|
|
|
|
|
# print $indent, "lalala" if DEBUG; |
|
27
|
|
|
|
|
|
|
# Which at compile-time (thru the miracle of constant folding) turns into: |
|
28
|
|
|
|
|
|
|
# print $indent, "lalala"; |
|
29
|
|
|
|
|
|
|
# or, if DEBUG is a constant with a true value, then that print statement |
|
30
|
|
|
|
|
|
|
# is simply optimized away, and doesn't appear in the target code at all. |
|
31
|
|
|
|
|
|
|
# If you don't believe me, run: |
|
32
|
|
|
|
|
|
|
# perl -MO=Deparse,-uHTML::TreeBuilder -e 'BEGIN { \ |
|
33
|
|
|
|
|
|
|
# $HTML::TreeBuilder::DEBUG = 4} use HTML::TreeBuilder' |
|
34
|
|
|
|
|
|
|
# and see for yourself (substituting whatever value you want for $DEBUG |
|
35
|
|
|
|
|
|
|
# there). |
|
36
|
|
|
|
|
|
|
## no critic |
|
37
|
20
|
50
|
|
20
|
|
232
|
if ( defined &DEBUG ) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# Already been defined! Do nothing. |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
elsif ( $] < 5.00404 ) { |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Grudgingly accommodate ancient (pre-constant) versions. |
|
44
|
0
|
|
|
|
|
0
|
eval 'sub DEBUG { $Debug } '; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
elsif ( !$DEBUG ) { |
|
47
|
20
|
|
|
|
|
979
|
eval 'sub DEBUG () {0}'; # Make it a constant. |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
elsif ( $DEBUG =~ m<^\d+$>s ) { |
|
50
|
0
|
|
|
|
|
0
|
eval 'sub DEBUG () { ' . $DEBUG . ' }'; # Make THAT a constant. |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
else { # WTF? |
|
53
|
0
|
|
|
|
|
0
|
warn "Non-numeric value \"$DEBUG\" in \$HTML::Element::DEBUG"; |
|
54
|
0
|
|
|
|
|
0
|
eval 'sub DEBUG () { $DEBUG }'; # I guess. |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
## use critic |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
#--------------------------------------------------------------------------- |
|
60
|
|
|
|
|
|
|
|
|
61
|
20
|
|
|
20
|
|
7624
|
use HTML::Entities (); |
|
|
20
|
|
|
|
|
97165
|
|
|
|
20
|
|
|
|
|
671
|
|
|
62
|
20
|
|
|
20
|
|
7579
|
use HTML::Tagset 3.02 (); |
|
|
20
|
|
|
|
|
20179
|
|
|
|
20
|
|
|
|
|
491
|
|
|
63
|
|
|
|
|
|
|
|
|
64
|
20
|
|
|
20
|
|
13364
|
use HTML::Element (); |
|
|
20
|
|
|
|
|
53
|
|
|
|
20
|
|
|
|
|
545
|
|
|
65
|
20
|
|
|
20
|
|
113
|
use HTML::Parser 3.46 (); |
|
|
20
|
|
|
|
|
393
|
|
|
|
20
|
|
|
|
|
23659
|
|
|
66
|
|
|
|
|
|
|
our @ISA = qw(HTML::Element HTML::Parser); |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# This looks schizoid, I know. |
|
69
|
|
|
|
|
|
|
# It's not that we ARE an element AND a parser. |
|
70
|
|
|
|
|
|
|
# We ARE an element, but one that knows how to handle signals |
|
71
|
|
|
|
|
|
|
# (method calls) from Parser in order to elaborate its subtree. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# Legacy aliases: |
|
74
|
|
|
|
|
|
|
*HTML::TreeBuilder::isKnown = \%HTML::Tagset::isKnown; |
|
75
|
|
|
|
|
|
|
*HTML::TreeBuilder::canTighten = \%HTML::Tagset::canTighten; |
|
76
|
|
|
|
|
|
|
*HTML::TreeBuilder::isHeadElement = \%HTML::Tagset::isHeadElement; |
|
77
|
|
|
|
|
|
|
*HTML::TreeBuilder::isBodyElement = \%HTML::Tagset::isBodyElement; |
|
78
|
|
|
|
|
|
|
*HTML::TreeBuilder::isPhraseMarkup = \%HTML::Tagset::isPhraseMarkup; |
|
79
|
|
|
|
|
|
|
*HTML::TreeBuilder::isHeadOrBodyElement = \%HTML::Tagset::isHeadOrBodyElement; |
|
80
|
|
|
|
|
|
|
*HTML::TreeBuilder::isList = \%HTML::Tagset::isList; |
|
81
|
|
|
|
|
|
|
*HTML::TreeBuilder::isTableElement = \%HTML::Tagset::isTableElement; |
|
82
|
|
|
|
|
|
|
*HTML::TreeBuilder::isFormElement = \%HTML::Tagset::isFormElement; |
|
83
|
|
|
|
|
|
|
*HTML::TreeBuilder::p_closure_barriers = \@HTML::Tagset::p_closure_barriers; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#========================================================================== |
|
86
|
|
|
|
|
|
|
# Two little shortcut constructors: |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub new_from_file { # or from a FH |
|
89
|
17
|
|
|
17
|
1
|
11852
|
my $class = shift; |
|
90
|
17
|
50
|
|
|
|
66
|
Carp::croak("new_from_file takes an odd number of arguments") |
|
91
|
|
|
|
|
|
|
unless @_ % 2; |
|
92
|
17
|
50
|
|
|
|
48
|
Carp::croak("new_from_file is a class method only") |
|
93
|
|
|
|
|
|
|
if ref $class; |
|
94
|
17
|
|
|
|
|
36
|
my $file = shift; |
|
95
|
17
|
|
|
|
|
56
|
my $new = $class->new(@_); |
|
96
|
17
|
100
|
|
|
|
48
|
defined $new->parse_file( $file ) |
|
97
|
|
|
|
|
|
|
or Carp::croak("unable to parse file: $!"); |
|
98
|
16
|
|
|
|
|
55
|
return $new; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub new_from_content { # from any number of scalars |
|
102
|
8
|
|
|
8
|
1
|
1664
|
my $class = shift; |
|
103
|
8
|
50
|
|
|
|
34
|
Carp::croak("new_from_content is a class method only") |
|
104
|
|
|
|
|
|
|
if ref $class; |
|
105
|
8
|
|
|
|
|
31
|
my $new = $class->new(); |
|
106
|
8
|
|
|
|
|
25
|
foreach my $whunk (@_) { |
|
107
|
9
|
100
|
|
|
|
40
|
if ( ref($whunk) eq 'SCALAR' ) { |
|
108
|
2
|
|
|
|
|
18
|
$new->parse($$whunk); |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
else { |
|
111
|
7
|
|
|
|
|
77
|
$new->parse($whunk); |
|
112
|
|
|
|
|
|
|
} |
|
113
|
9
|
50
|
|
|
|
37
|
last if $new->{'_stunted'}; # might as well check that. |
|
114
|
|
|
|
|
|
|
} |
|
115
|
8
|
|
|
|
|
30
|
$new->eof(); |
|
116
|
8
|
|
|
|
|
29
|
return $new; |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub new_from_string { # from a single scalar (plus options) |
|
120
|
16
|
|
|
16
|
1
|
7050
|
my $class = shift; |
|
121
|
16
|
50
|
|
|
|
58
|
Carp::croak("new_from_string takes an odd number of arguments") |
|
122
|
|
|
|
|
|
|
unless @_ % 2; |
|
123
|
16
|
50
|
|
|
|
46
|
Carp::croak("new_from_string is a class method only") |
|
124
|
|
|
|
|
|
|
if ref $class; |
|
125
|
16
|
|
|
|
|
32
|
my $string = shift; |
|
126
|
16
|
|
|
|
|
52
|
my $new = $class->new(@_); |
|
127
|
16
|
|
|
|
|
49
|
$new->parse_content($string); |
|
128
|
16
|
|
|
|
|
39
|
return $new; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub new_from_url { # should accept anything that LWP does. |
|
132
|
4
|
|
|
4
|
1
|
14
|
undef our $lwp_response; |
|
133
|
4
|
|
|
|
|
9
|
my $class = shift; |
|
134
|
4
|
50
|
|
|
|
19
|
Carp::croak("new_from_url takes an odd number of arguments") |
|
135
|
|
|
|
|
|
|
unless @_ % 2; |
|
136
|
4
|
50
|
|
|
|
13
|
Carp::croak("new_from_url is a class method only") |
|
137
|
|
|
|
|
|
|
if ref $class; |
|
138
|
4
|
|
|
|
|
9
|
my $url = shift; |
|
139
|
|
|
|
|
|
|
|
|
140
|
4
|
|
|
|
|
27
|
require LWP::UserAgent; |
|
141
|
|
|
|
|
|
|
# RECOMMEND PREREQ: LWP::UserAgent 5.815 |
|
142
|
4
|
|
|
|
|
62
|
LWP::UserAgent->VERSION( 5.815 ); # HTTP::Headers content_is_html method |
|
143
|
4
|
|
|
|
|
24
|
$lwp_response = LWP::UserAgent->new->get( $url ); |
|
144
|
|
|
|
|
|
|
|
|
145
|
4
|
100
|
|
|
|
32475
|
Carp::croak("GET failed on $url: " . $lwp_response->status_line) |
|
146
|
|
|
|
|
|
|
unless $lwp_response->is_success; |
|
147
|
3
|
100
|
|
|
|
37
|
Carp::croak("$url returned " . $lwp_response->content_type . " not HTML") |
|
148
|
|
|
|
|
|
|
unless $lwp_response->content_is_html; |
|
149
|
|
|
|
|
|
|
|
|
150
|
2
|
|
|
|
|
70
|
my $new = $class->new_from_http($lwp_response, @_); |
|
151
|
|
|
|
|
|
|
|
|
152
|
2
|
|
|
|
|
26
|
undef $lwp_response; # Processed successfully |
|
153
|
2
|
|
|
|
|
9
|
return $new; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub new_from_http { # from a HTTP::Message (or subclass) |
|
157
|
3
|
|
|
3
|
1
|
6
|
my $class = shift; |
|
158
|
3
|
50
|
|
|
|
11
|
Carp::croak("new_from_http takes an odd number of arguments") |
|
159
|
|
|
|
|
|
|
unless @_ % 2; |
|
160
|
3
|
50
|
|
|
|
9
|
Carp::croak("new_from_http is a class method only") |
|
161
|
|
|
|
|
|
|
if ref $class; |
|
162
|
3
|
|
|
|
|
7
|
my $message = shift; |
|
163
|
3
|
|
|
|
|
12
|
my $new = $class->new(@_); |
|
164
|
|
|
|
|
|
|
|
|
165
|
3
|
|
|
|
|
5
|
my $cref; |
|
166
|
|
|
|
|
|
|
|
|
167
|
3
|
|
|
|
|
8
|
my %opt = @_; |
|
168
|
3
|
50
|
|
|
|
10
|
if (defined $opt{encoding}) { |
|
169
|
|
|
|
|
|
|
# User-specified charset: |
|
170
|
0
|
|
0
|
|
|
0
|
my $charset = ($opt{encoding} || 'none'); |
|
171
|
0
|
|
|
|
|
0
|
$charset =~ s/:BOM\z//; |
|
172
|
0
|
|
|
|
|
0
|
$cref = $message->decoded_content(ref => 1, charset => $charset); |
|
173
|
|
|
|
|
|
|
} else { |
|
174
|
|
|
|
|
|
|
# Auto-detect charset: |
|
175
|
3
|
|
50
|
|
|
14
|
my $charset = $message->content_charset || 'cp1252'; |
|
176
|
3
|
|
|
|
|
2871
|
$cref = $message->decoded_content(ref => 1, charset => $charset); |
|
177
|
3
|
50
|
|
|
|
336
|
if ($charset eq 'none') { |
|
178
|
0
|
|
|
|
|
0
|
$charset = ''; |
|
179
|
|
|
|
|
|
|
} else { |
|
180
|
3
|
|
|
|
|
18
|
require Encode; |
|
181
|
3
|
50
|
|
|
|
11
|
if (my $encoding = Encode::find_encoding($charset)) { |
|
182
|
3
|
|
|
|
|
60
|
$charset = $encoding->name; # canonical name |
|
183
|
3
|
50
|
|
|
|
13
|
$charset .= ':BOM' if $$cref =~ /^\x{FeFF}/; |
|
184
|
|
|
|
|
|
|
} else { |
|
185
|
0
|
|
|
|
|
0
|
undef $charset; # Encode doesn't recognize it |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
} |
|
188
|
3
|
|
|
|
|
8
|
$new->{_encoding} = $charset; |
|
189
|
|
|
|
|
|
|
} # end else auto-detect charset |
|
190
|
|
|
|
|
|
|
|
|
191
|
3
|
|
|
|
|
24
|
$new->parse( $$cref ); |
|
192
|
3
|
|
|
|
|
13
|
$new->eof; |
|
193
|
3
|
|
|
|
|
11
|
return $new; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# TODO: document more fully? |
|
197
|
|
|
|
|
|
|
sub parse_content { # from any number of scalars |
|
198
|
20
|
|
|
20
|
1
|
53
|
my $tree = shift; |
|
199
|
20
|
|
|
|
|
36
|
my $retval; |
|
200
|
20
|
|
|
|
|
46
|
foreach my $whunk (@_) { |
|
201
|
21
|
100
|
|
|
|
58
|
if ( ref($whunk) eq 'SCALAR' ) { |
|
202
|
3
|
|
|
|
|
24
|
$retval = $tree->parse($$whunk); |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
else { |
|
205
|
18
|
|
|
|
|
176
|
$retval = $tree->parse($whunk); |
|
206
|
|
|
|
|
|
|
} |
|
207
|
21
|
50
|
|
|
|
73
|
last if $tree->{'_stunted'}; # might as well check that. |
|
208
|
|
|
|
|
|
|
} |
|
209
|
20
|
|
|
|
|
69
|
$tree->eof(); |
|
210
|
20
|
|
|
|
|
46
|
return $retval; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
|
214
|
|
|
|
|
|
|
sub parse_file { |
|
215
|
20
|
|
|
20
|
1
|
55
|
my ($self, $file) = @_; |
|
216
|
|
|
|
|
|
|
|
|
217
|
20
|
50
|
|
|
|
51
|
Carp::croak("parse_file requires file argument") unless defined $file; |
|
218
|
|
|
|
|
|
|
|
|
219
|
20
|
|
|
|
|
58
|
my $fh = openhandle($file); |
|
220
|
20
|
100
|
|
|
|
52
|
unless (defined $fh) { |
|
221
|
17
|
|
|
|
|
35
|
my $encoding = $self->{_encoding}; |
|
222
|
|
|
|
|
|
|
|
|
223
|
17
|
100
|
|
|
|
47
|
if (not defined $encoding) { |
|
224
|
11
|
|
|
|
|
2151
|
require IO::HTML; |
|
225
|
|
|
|
|
|
|
|
|
226
|
11
|
|
|
|
|
28822
|
{ local $@; |
|
|
11
|
|
|
|
|
20
|
|
|
227
|
11
|
|
|
|
|
21
|
eval { |
|
228
|
11
|
|
|
|
|
41
|
($fh, $encoding, my $bom) = |
|
229
|
|
|
|
|
|
|
IO::HTML::file_and_encoding($file); |
|
230
|
10
|
100
|
|
|
|
5113
|
$encoding .= ':BOM' if $bom; |
|
231
|
|
|
|
|
|
|
}; |
|
232
|
|
|
|
|
|
|
} # end local $@ |
|
233
|
11
|
|
|
|
|
311
|
$self->{_encoding} = $encoding; |
|
234
|
|
|
|
|
|
|
} # end if auto encoding |
|
235
|
|
|
|
|
|
|
else { |
|
236
|
6
|
|
|
|
|
13
|
$encoding =~ s/:BOM$//; |
|
237
|
6
|
100
|
|
|
|
168
|
open($fh, (length($encoding) ? "<:encoding($encoding):crlf" |
|
|
|
50
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
: "<:raw"), $file) |
|
239
|
|
|
|
|
|
|
or undef $fh; |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
|
|
242
|
17
|
100
|
|
|
|
857
|
return undef unless defined $fh; |
|
243
|
|
|
|
|
|
|
} # end unless filehandle was passed in |
|
244
|
|
|
|
|
|
|
|
|
245
|
19
|
|
|
|
|
107
|
$self->SUPER::parse_file($fh); |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
#--------------------------------------------------------------------------- |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub new { # constructor! |
|
251
|
379
|
|
|
379
|
1
|
517955
|
my $class = shift; |
|
252
|
379
|
|
33
|
|
|
12870
|
$class = ref($class) || $class; |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
# Initialize HTML::Element part |
|
255
|
379
|
|
|
|
|
12157
|
my $self = $class->element_class->new('html'); |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
{ |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# A hack for certain strange versions of Parser: |
|
260
|
379
|
|
|
|
|
11759
|
my $other_self = HTML::Parser->new(); |
|
|
379
|
|
|
|
|
12495
|
|
|
261
|
379
|
|
|
|
|
264707
|
%$self = ( %$self, %$other_self ); # copy fields |
|
262
|
|
|
|
|
|
|
# Yes, multiple inheritance is messy. Kids, don't try this at home. |
|
263
|
379
|
|
|
|
|
23807
|
bless $other_self, "HTML::TreeBuilder::_hideyhole"; |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# whack it out of the HTML::Parser class, to avoid the destructor |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# The root of the tree is special, as it has these funny attributes, |
|
269
|
|
|
|
|
|
|
# and gets reblessed into this class. |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# Initialize parser settings |
|
272
|
379
|
|
|
|
|
11920
|
$self->{'_implicit_tags'} = 1; |
|
273
|
379
|
|
|
|
|
11727
|
$self->{'_implicit_body_p_tag'} = 0; |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# If true, trying to insert text, or any of %isPhraseMarkup right |
|
276
|
|
|
|
|
|
|
# under 'body' will implicate a 'p'. If false, will just go there. |
|
277
|
|
|
|
|
|
|
|
|
278
|
379
|
|
|
|
|
11821
|
$self->{'_tighten'} = 1; |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# whether ignorable WS in this tree should be deleted |
|
281
|
|
|
|
|
|
|
|
|
282
|
379
|
|
|
|
|
11985
|
$self->{'_implicit'} = 1; # to delete, once we find a real open-"html" tag |
|
283
|
|
|
|
|
|
|
|
|
284
|
379
|
|
|
|
|
11926
|
$self->{'_ignore_unknown'} = 1; |
|
285
|
379
|
|
|
|
|
11820
|
$self->{'_ignore_text'} = 0; |
|
286
|
379
|
|
|
|
|
11712
|
$self->{'_warn'} = 0; |
|
287
|
379
|
|
|
|
|
11681
|
$self->{'_no_space_compacting'} = 0; |
|
288
|
379
|
|
|
|
|
11693
|
$self->{'_self_closed_tags'} = 0; |
|
289
|
379
|
|
|
|
|
11627
|
$self->{'_store_comments'} = 0; |
|
290
|
379
|
|
|
|
|
11649
|
$self->{'_store_declarations'} = 1; |
|
291
|
379
|
|
|
|
|
11970
|
$self->{'_store_pis'} = 0; |
|
292
|
379
|
|
|
|
|
11718
|
$self->{'_p_strict'} = 0; |
|
293
|
379
|
|
|
|
|
11598
|
$HTML::Element::encoded_content = $self->{'_no_expand_entities'} = 0; |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
# rebless to our class |
|
296
|
379
|
|
|
|
|
11706
|
bless $self, $class; |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# Parse attributes passed in as arguments |
|
299
|
379
|
100
|
|
|
|
12077
|
if (@_) { |
|
300
|
17
|
50
|
|
|
|
53
|
Carp::croak("new must be passed key => value pairs") if @_ % 2; |
|
301
|
|
|
|
|
|
|
|
|
302
|
17
|
|
|
|
|
55
|
my %attr = @_; |
|
303
|
17
|
|
|
|
|
80
|
my $allowed = $self->_is_attr; |
|
304
|
|
|
|
|
|
|
|
|
305
|
17
|
|
|
|
|
71
|
while (my ($attr, $value) = each %attr ) { |
|
306
|
28
|
50
|
|
|
|
77
|
if ($allowed->{$attr}) { |
|
307
|
28
|
|
|
|
|
496
|
$self->$attr($value); |
|
308
|
|
|
|
|
|
|
} else { |
|
309
|
0
|
|
|
|
|
0
|
Carp::carp("Ignoring unknown attribute $attr"); |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
} # end while each $attr |
|
312
|
|
|
|
|
|
|
} # end if attributes passed to new |
|
313
|
|
|
|
|
|
|
|
|
314
|
379
|
|
|
|
|
12032
|
$self->{'_element_count'} = 1; |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# undocumented, informal, and maybe not exactly correct |
|
317
|
|
|
|
|
|
|
|
|
318
|
379
|
|
|
|
|
12320
|
$self->{'_head'} = $self->insert_element( 'head', 1 ); |
|
319
|
379
|
|
|
|
|
11732
|
$self->{'_pos'} = undef; # pull it back up |
|
320
|
379
|
|
|
|
|
12008
|
$self->{'_body'} = $self->insert_element( 'body', 1 ); |
|
321
|
379
|
|
|
|
|
11711
|
$self->{'_pos'} = undef; # pull it back up again |
|
322
|
|
|
|
|
|
|
|
|
323
|
379
|
|
|
|
|
22980
|
return $self; |
|
324
|
|
|
|
|
|
|
} |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
#========================================================================== |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
sub _elem # universal accessor... |
|
329
|
|
|
|
|
|
|
{ |
|
330
|
68
|
|
|
68
|
|
5544
|
my ( $self, $elem, $val ) = @_; |
|
331
|
68
|
|
|
|
|
5564
|
my $old = $self->{$elem}; |
|
332
|
68
|
50
|
|
|
|
5619
|
$self->{$elem} = $val if defined $val; |
|
333
|
68
|
|
|
|
|
16347
|
return $old; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
|
|
336
|
0
|
|
|
|
|
0
|
BEGIN { |
|
337
|
20
|
|
|
20
|
|
114
|
my @attributes = qw( |
|
338
|
|
|
|
|
|
|
implicit_tags |
|
339
|
|
|
|
|
|
|
implicit_body_p_tag |
|
340
|
|
|
|
|
|
|
p_strict |
|
341
|
|
|
|
|
|
|
no_space_compacting |
|
342
|
|
|
|
|
|
|
ignore_unknown |
|
343
|
|
|
|
|
|
|
ignore_text |
|
344
|
|
|
|
|
|
|
self_closed_tags |
|
345
|
|
|
|
|
|
|
store_comments |
|
346
|
|
|
|
|
|
|
store_declarations |
|
347
|
|
|
|
|
|
|
store_pis |
|
348
|
|
|
|
|
|
|
warn |
|
349
|
|
|
|
|
|
|
); |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
# Create accessor methods: |
|
352
|
20
|
|
|
|
|
47
|
my $code = join('', map { "sub $_ { shift->_elem( '_$_', \@_ ); }\n" } |
|
|
220
|
|
|
|
|
623
|
|
|
353
|
|
|
|
|
|
|
@attributes); |
|
354
|
20
|
|
|
|
|
52
|
my $err; |
|
355
|
|
|
|
|
|
|
{ |
|
356
|
20
|
|
|
|
|
33
|
local $@; |
|
|
20
|
|
|
|
|
38
|
|
|
357
|
20
|
50
|
0
|
2
|
1
|
4202
|
$err = $@ || "UNKNOWN ERROR" unless eval "$code 1"; ## no critic |
|
|
2
|
|
|
2
|
1
|
38
|
|
|
|
2
|
|
|
36
|
1
|
8
|
|
|
|
36
|
|
|
11
|
1
|
60581
|
|
|
|
11
|
|
|
5
|
1
|
244
|
|
|
|
5
|
|
|
0
|
1
|
19
|
|
|
|
0
|
|
|
1
|
1
|
0
|
|
|
|
1
|
|
|
1
|
1
|
6
|
|
|
|
1
|
|
|
1
|
1
|
47
|
|
|
|
1
|
|
|
0
|
1
|
411
|
|
|
|
0
|
|
|
2
|
1
|
0
|
|
|
|
2
|
|
|
|
|
8
|
|
|
358
|
|
|
|
|
|
|
} |
|
359
|
20
|
50
|
|
|
|
68
|
die "$code$err" if $err; |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# Record names of class attributes: |
|
362
|
20
|
|
|
|
|
49
|
my %is_attr = map { $_ => 1 } (@attributes, qw( |
|
|
280
|
|
|
|
|
85297
|
|
|
363
|
|
|
|
|
|
|
encoding |
|
364
|
|
|
|
|
|
|
ignore_ignorable_whitespace |
|
365
|
|
|
|
|
|
|
no_expand_entities |
|
366
|
|
|
|
|
|
|
)); |
|
367
|
|
|
|
|
|
|
|
|
368
|
17
|
|
|
17
|
|
39
|
sub _is_attr { return \%is_attr } |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
# Custom accessors: |
|
372
|
|
|
|
|
|
|
sub ignore_ignorable_whitespace { |
|
373
|
5
|
|
|
5
|
1
|
13
|
shift->_elem( '_tighten', @_ ); # internal name is different |
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub no_expand_entities { |
|
377
|
2
|
|
|
2
|
1
|
6
|
my $self = shift; |
|
378
|
2
|
|
|
|
|
8
|
my $return = $self->_elem( '_no_expand_entities', @_ ); |
|
379
|
2
|
|
|
|
|
5
|
$HTML::Element::encoded_content = $self->{_no_expand_entities}; |
|
380
|
2
|
|
|
|
|
9
|
$return; |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
#========================================================================== |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
sub warning { |
|
386
|
3
|
|
|
3
|
1
|
6
|
my $self = shift; |
|
387
|
3
|
50
|
|
|
|
12
|
CORE::warn("HTML::Parse: $_[0]\n") if $self->{'_warn'}; |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# should maybe say HTML::TreeBuilder instead |
|
390
|
|
|
|
|
|
|
} |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
#========================================================================== |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
{ |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
# To avoid having to rebuild these lists constantly... |
|
397
|
|
|
|
|
|
|
my $_Closed_by_structurals = [qw(p h1 h2 h3 h4 h5 h6 pre textarea)]; |
|
398
|
|
|
|
|
|
|
my $indent; |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
sub start { |
|
401
|
1252
|
50
|
|
1252
|
1
|
16007
|
return if $_[0]{'_stunted'}; |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# Accept a signal from HTML::Parser for start-tags. |
|
404
|
1252
|
|
|
|
|
6879
|
my ( $self, $tag, $attr ) = @_; |
|
405
|
|
|
|
|
|
|
|
|
406
|
1252
|
|
100
|
|
|
7447
|
my $self_closed = ($self->{'_self_closed_tags'} and |
|
407
|
|
|
|
|
|
|
$_[4] =~ m!/[\n\r\f\t ]*>\z!); |
|
408
|
1252
|
100
|
|
|
|
7001
|
delete $attr->{'/'} if $self_closed; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
# Parser passes more, actually: |
|
411
|
|
|
|
|
|
|
# $self->start($tag, $attr, $attrseq, $origtext) |
|
412
|
|
|
|
|
|
|
# But we can merrily ignore $attrseq and $origtext. |
|
413
|
|
|
|
|
|
|
|
|
414
|
1252
|
50
|
|
|
|
7321
|
if ( $tag eq 'x-html' ) { |
|
415
|
0
|
|
|
|
|
0
|
print "Ignoring open-x-html tag.\n" if DEBUG; |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
# inserted by some lame code-generators. |
|
418
|
0
|
|
|
|
|
0
|
return; # bypass tweaking. |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
1252
|
|
|
|
|
6559
|
$tag =~ s{/$}{}s; # So turns into . Silently forgive. |
|
422
|
|
|
|
|
|
|
|
|
423
|
1252
|
50
|
|
|
|
8661
|
unless ( $tag =~ m/^[-_a-zA-Z0-9:%]+$/s ) { |
|
424
|
0
|
|
|
|
|
0
|
DEBUG and print "Start-tag name $tag is no good. Skipping.\n"; |
|
425
|
0
|
|
|
|
|
0
|
return; |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# This avoids having Element's new() throw an exception. |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
|
|
430
|
1252
|
|
66
|
|
|
8260
|
my $ptag = ( my $pos = $self->{'_pos'} || $self )->{'_tag'}; |
|
431
|
1252
|
|
|
|
|
6004
|
my $already_inserted; |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
#my($indent); |
|
434
|
1252
|
|
|
|
|
5918
|
if (DEBUG) { |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
# optimization -- don't figure out indenting unless we're in debug mode |
|
437
|
|
|
|
|
|
|
my @lineage = $pos->lineage; |
|
438
|
|
|
|
|
|
|
$indent = ' ' x ( 1 + @lineage ); |
|
439
|
|
|
|
|
|
|
print $indent, "Proposing a new \U$tag\E under ", |
|
440
|
|
|
|
|
|
|
join( '/', map $_->{'_tag'}, reverse( $pos, @lineage ) ) |
|
441
|
|
|
|
|
|
|
|| 'Root', |
|
442
|
|
|
|
|
|
|
".\n"; |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
#} else { |
|
445
|
|
|
|
|
|
|
# $indent = ' '; |
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
#print $indent, "POS: $pos ($ptag)\n" if DEBUG > 2; |
|
449
|
|
|
|
|
|
|
# $attr = {%$attr}; |
|
450
|
|
|
|
|
|
|
|
|
451
|
1252
|
|
|
|
|
7708
|
foreach my $k ( keys %$attr ) { |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
# Make sure some stooge doesn't have "". |
|
454
|
|
|
|
|
|
|
# That happens every few million Web pages. |
|
455
|
597
|
50
|
33
|
|
|
2815
|
$attr->{ ' ' . $k } = delete $attr->{$k} |
|
456
|
|
|
|
|
|
|
if length $k and substr( $k, 0, 1 ) eq '_'; |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
# Looks bad, but is fine for round-tripping. |
|
459
|
|
|
|
|
|
|
} |
|
460
|
|
|
|
|
|
|
|
|
461
|
1252
|
|
|
|
|
6969
|
my $e = $self->element_class->new( $tag, %$attr ); |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
# Make a new element object. |
|
464
|
|
|
|
|
|
|
# (Only rarely do we end up just throwing it away later in this call.) |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
# Some prep -- custom messiness for those damned tables, and strict P's. |
|
467
|
1252
|
100
|
|
|
|
7805
|
if ( $self->{'_implicit_tags'} ) { # wallawallawalla! |
|
468
|
|
|
|
|
|
|
|
|
469
|
1214
|
100
|
|
|
|
7238
|
unless ( $HTML::TreeBuilder::isTableElement{$tag} ) { |
|
470
|
1036
|
50
|
|
|
|
7126
|
if ( $ptag eq 'table' ) { |
|
|
|
50
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
0
|
print $indent, |
|
472
|
|
|
|
|
|
|
" * Phrasal \U$tag\E right under TABLE makes implicit TR and TD\n" |
|
473
|
|
|
|
|
|
|
if DEBUG > 1; |
|
474
|
0
|
|
|
|
|
0
|
$self->insert_element( 'tr', 1 ); |
|
475
|
0
|
|
|
|
|
0
|
$pos = $self->insert_element( 'td', 1 ) |
|
476
|
|
|
|
|
|
|
; # yes, needs updating |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
elsif ( $ptag eq 'tr' ) { |
|
479
|
0
|
|
|
|
|
0
|
print $indent, |
|
480
|
|
|
|
|
|
|
" * Phrasal \U$tag\E right under TR makes an implicit TD\n" |
|
481
|
|
|
|
|
|
|
if DEBUG > 1; |
|
482
|
0
|
|
|
|
|
0
|
$pos = $self->insert_element( 'td', 1 ) |
|
483
|
|
|
|
|
|
|
; # yes, needs updating |
|
484
|
|
|
|
|
|
|
} |
|
485
|
1036
|
|
|
|
|
10209
|
$ptag = $pos->{'_tag'}; # yes, needs updating |
|
486
|
|
|
|
|
|
|
} |
|
487
|
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
# end of table-implication block. |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
# Now maybe do a little dance to enforce P-strictness. |
|
491
|
|
|
|
|
|
|
# This seems like it should be integrated with the big |
|
492
|
|
|
|
|
|
|
# "ALL HOPE..." block, further below, but that doesn't |
|
493
|
|
|
|
|
|
|
# seem feasible. |
|
494
|
1214
|
0
|
33
|
|
|
11250
|
if ( $self->{'_p_strict'} |
|
|
|
|
0
|
|
|
|
|
|
495
|
|
|
|
|
|
|
and $HTML::TreeBuilder::isKnown{$tag} |
|
496
|
|
|
|
|
|
|
and not $HTML::Tagset::is_Possible_Strict_P_Content{$tag} ) |
|
497
|
|
|
|
|
|
|
{ |
|
498
|
0
|
|
|
|
|
0
|
my $here = $pos; |
|
499
|
0
|
|
|
|
|
0
|
my $here_tag = $ptag; |
|
500
|
0
|
|
|
|
|
0
|
while (1) { |
|
501
|
0
|
0
|
|
|
|
0
|
if ( $here_tag eq 'p' ) { |
|
502
|
0
|
|
|
|
|
0
|
print $indent, " * Inserting $tag closes strict P.\n" |
|
503
|
|
|
|
|
|
|
if DEBUG > 1; |
|
504
|
0
|
|
|
|
|
0
|
$self->end( \q{p} ); |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# NB: same as \'q', but less confusing to emacs cperl-mode |
|
507
|
0
|
|
|
|
|
0
|
last; |
|
508
|
|
|
|
|
|
|
} |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
#print("Lasting from $here_tag\n"), |
|
511
|
|
|
|
|
|
|
last |
|
512
|
|
|
|
|
|
|
if $HTML::TreeBuilder::isKnown{$here_tag} |
|
513
|
|
|
|
|
|
|
and |
|
514
|
|
|
|
|
|
|
not $HTML::Tagset::is_Possible_Strict_P_Content{ |
|
515
|
0
|
0
|
0
|
|
|
0
|
$here_tag}; |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# Don't keep looking up the tree if we see something that can't |
|
518
|
|
|
|
|
|
|
# be strict-P content. |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
$here_tag |
|
521
|
0
|
|
0
|
|
|
0
|
= ( $here = $here->{'_parent'} || last )->{'_tag'}; |
|
522
|
|
|
|
|
|
|
} # end while |
|
523
|
|
|
|
|
|
|
$ptag = ( $pos = $self->{'_pos'} || $self ) |
|
524
|
0
|
|
0
|
|
|
0
|
->{'_tag'}; # better update! |
|
525
|
|
|
|
|
|
|
} |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# end of strict-p block. |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
# And now, get busy... |
|
531
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
532
|
1252
|
100
|
|
|
|
9073
|
if ( !$self->{'_implicit_tags'} ) { # bimskalabim |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# do nothing |
|
534
|
38
|
|
|
|
|
59
|
print $indent, " * _implicit_tags is off. doing nothing\n" |
|
535
|
|
|
|
|
|
|
if DEBUG > 1; |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
elsif ( $HTML::TreeBuilder::isHeadOrBodyElement{$tag} ) { |
|
540
|
2
|
100
|
|
|
|
10
|
if ( $pos->is_inside('body') ) { # all is well |
|
|
|
50
|
|
|
|
|
|
|
541
|
1
|
|
|
|
|
2
|
print $indent, |
|
542
|
|
|
|
|
|
|
" * ambilocal element \U$tag\E is fine under BODY.\n" |
|
543
|
|
|
|
|
|
|
if DEBUG > 1; |
|
544
|
|
|
|
|
|
|
} |
|
545
|
|
|
|
|
|
|
elsif ( $pos->is_inside('head') ) { |
|
546
|
1
|
|
|
|
|
2
|
print $indent, |
|
547
|
|
|
|
|
|
|
" * ambilocal element \U$tag\E is fine under HEAD.\n" |
|
548
|
|
|
|
|
|
|
if DEBUG > 1; |
|
549
|
|
|
|
|
|
|
} |
|
550
|
|
|
|
|
|
|
else { |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# In neither head nor body! mmmmm... put under head? |
|
553
|
|
|
|
|
|
|
|
|
554
|
0
|
0
|
|
|
|
0
|
if ( $ptag eq 'html' ) { # expected case |
|
555
|
|
|
|
|
|
|
# TODO?? : would there ever be a case where _head would be |
|
556
|
|
|
|
|
|
|
# absent from a tree that would ever be accessed at this |
|
557
|
|
|
|
|
|
|
# point? |
|
558
|
0
|
0
|
|
|
|
0
|
die "Where'd my head go?" unless ref $self->{'_head'}; |
|
559
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_head'}{'_implicit'} ) { |
|
560
|
0
|
|
|
|
|
0
|
print $indent, |
|
561
|
|
|
|
|
|
|
" * ambilocal element \U$tag\E makes an implicit HEAD.\n" |
|
562
|
|
|
|
|
|
|
if DEBUG > 1; |
|
563
|
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# or rather, points us at it. |
|
565
|
|
|
|
|
|
|
$self->{'_pos'} |
|
566
|
0
|
|
|
|
|
0
|
= $self->{'_head'}; # to insert under... |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
else { |
|
569
|
0
|
|
|
|
|
0
|
$self->warning( |
|
570
|
|
|
|
|
|
|
"Ambilocal element <$tag> not under HEAD or BODY!?" |
|
571
|
|
|
|
|
|
|
); |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
# Put it under HEAD by default, I guess |
|
574
|
|
|
|
|
|
|
$self->{'_pos'} |
|
575
|
0
|
|
|
|
|
0
|
= $self->{'_head'}; # to insert under... |
|
576
|
|
|
|
|
|
|
} |
|
577
|
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
} |
|
579
|
|
|
|
|
|
|
else { |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
# Neither under head nor body, nor right under html... pass thru? |
|
582
|
0
|
|
|
|
|
0
|
$self->warning( |
|
583
|
|
|
|
|
|
|
"Ambilocal element <$tag> neither under head nor body, nor right under html!?" |
|
584
|
|
|
|
|
|
|
); |
|
585
|
|
|
|
|
|
|
} |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
589
|
|
|
|
|
|
|
} |
|
590
|
|
|
|
|
|
|
elsif ( $HTML::TreeBuilder::isBodyElement{$tag} ) { |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
# Ensure that we are within |
|
593
|
943
|
100
|
66
|
|
|
7840
|
if ( $ptag eq 'body' ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
# We're good. |
|
596
|
|
|
|
|
|
|
} |
|
597
|
|
|
|
|
|
|
elsif ( |
|
598
|
|
|
|
|
|
|
$HTML::TreeBuilder::isBodyElement{$ptag} # glarg |
|
599
|
|
|
|
|
|
|
and not $HTML::TreeBuilder::isHeadOrBodyElement{$ptag} |
|
600
|
|
|
|
|
|
|
) |
|
601
|
|
|
|
|
|
|
{ |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# Special case: Save ourselves a call to is_inside further down. |
|
604
|
|
|
|
|
|
|
# If our $ptag is an isBodyElement element (but not an |
|
605
|
|
|
|
|
|
|
# isHeadOrBodyElement element), then we must be under body! |
|
606
|
258
|
|
|
|
|
385
|
print $indent, " * Inferring that $ptag is under BODY.\n", |
|
607
|
|
|
|
|
|
|
if DEBUG > 3; |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# I think this and the test for 'body' trap everything |
|
610
|
|
|
|
|
|
|
# bodyworthy, except the case where the parent element is |
|
611
|
|
|
|
|
|
|
# under an unknown element that's a descendant of body. |
|
612
|
|
|
|
|
|
|
} |
|
613
|
|
|
|
|
|
|
elsif ( $pos->is_inside('head') ) { |
|
614
|
169
|
|
|
|
|
349
|
print $indent, |
|
615
|
|
|
|
|
|
|
" * body-element \U$tag\E minimizes HEAD, makes implicit BODY.\n" |
|
616
|
|
|
|
|
|
|
if DEBUG > 1; |
|
617
|
|
|
|
|
|
|
$ptag = ( |
|
618
|
|
|
|
|
|
|
$pos = $self->{'_pos'} |
|
619
|
|
|
|
|
|
|
= $self->{'_body'} # yes, needs updating |
|
620
|
|
|
|
|
|
|
|| die "Where'd my body go?" |
|
621
|
169
|
|
50
|
|
|
593
|
)->{'_tag'}; # yes, needs updating |
|
622
|
|
|
|
|
|
|
} |
|
623
|
|
|
|
|
|
|
elsif ( !$pos->is_inside('body') ) { |
|
624
|
104
|
|
|
|
|
4336
|
print $indent, |
|
625
|
|
|
|
|
|
|
" * body-element \U$tag\E makes implicit BODY.\n" |
|
626
|
|
|
|
|
|
|
if DEBUG > 1; |
|
627
|
|
|
|
|
|
|
$ptag = ( |
|
628
|
|
|
|
|
|
|
$pos = $self->{'_pos'} |
|
629
|
|
|
|
|
|
|
= $self->{'_body'} # yes, needs updating |
|
630
|
|
|
|
|
|
|
|| die "Where'd my body go?" |
|
631
|
104
|
|
50
|
|
|
8681
|
)->{'_tag'}; # yes, needs updating |
|
632
|
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# else we ARE under body, so okay. |
|
635
|
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# Handle implicit endings and insert based on and position |
|
637
|
|
|
|
|
|
|
# ... ALL HOPE ABANDON ALL YE WHO ENTER HERE ... |
|
638
|
943
|
100
|
100
|
|
|
20273
|
if ( $tag eq 'p' |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
33
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
639
|
|
|
|
|
|
|
or $tag eq 'h1' |
|
640
|
|
|
|
|
|
|
or $tag eq 'h2' |
|
641
|
|
|
|
|
|
|
or $tag eq 'h3' |
|
642
|
|
|
|
|
|
|
or $tag eq 'h4' |
|
643
|
|
|
|
|
|
|
or $tag eq 'h5' |
|
644
|
|
|
|
|
|
|
or $tag eq 'h6' |
|
645
|
|
|
|
|
|
|
or $tag eq 'form' |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
# Hm, should |
|
648
|
|
|
|
|
|
|
) |
|
649
|
|
|
|
|
|
|
{ |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# Can't have , or |
|
652
|
307
|
|
|
|
|
914
|
$self->end( |
|
653
|
|
|
|
|
|
|
$_Closed_by_structurals, |
|
654
|
|
|
|
|
|
|
@HTML::TreeBuilder::p_closure_barriers |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# used to be just li! |
|
657
|
|
|
|
|
|
|
); |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
elsif ( $tag eq 'ol' or $tag eq 'ul' or $tag eq 'dl' ) { |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
# Can't have lists inside -- in the unlikely |
|
663
|
|
|
|
|
|
|
# event anyone tries to put them there! |
|
664
|
9
|
50
|
33
|
|
|
114
|
if ( $ptag eq 'h1' |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
665
|
|
|
|
|
|
|
or $ptag eq 'h2' |
|
666
|
|
|
|
|
|
|
or $ptag eq 'h3' |
|
667
|
|
|
|
|
|
|
or $ptag eq 'h4' |
|
668
|
|
|
|
|
|
|
or $ptag eq 'h5' |
|
669
|
|
|
|
|
|
|
or $ptag eq 'h6' ) |
|
670
|
|
|
|
|
|
|
{ |
|
671
|
0
|
|
|
|
|
0
|
$self->end( \$ptag ); |
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
# TODO: Maybe keep closing up the tree until |
|
675
|
|
|
|
|
|
|
# the ptag isn't any of the above? |
|
676
|
|
|
|
|
|
|
# But anyone that says
|
|
677
|
|
|
|
|
|
|
# deserves what they get anyway. |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
} |
|
680
|
|
|
|
|
|
|
elsif ( $tag eq 'li' ) { # list item |
|
681
|
|
|
|
|
|
|
# Get under a list tag, one way or another |
|
682
|
12
|
50
|
66
|
|
|
47
|
unless ( |
|
683
|
|
|
|
|
|
|
exists $HTML::TreeBuilder::isList{$ptag} |
|
684
|
|
|
|
|
|
|
or $self->end( \q{*}, keys %HTML::TreeBuilder::isList ) #' |
|
685
|
|
|
|
|
|
|
) |
|
686
|
|
|
|
|
|
|
{ |
|
687
|
0
|
|
|
|
|
0
|
print $indent, |
|
688
|
|
|
|
|
|
|
" * inserting implicit UL for lack of containing ", |
|
689
|
|
|
|
|
|
|
join( '|', keys %HTML::TreeBuilder::isList ), ".\n" |
|
690
|
|
|
|
|
|
|
if DEBUG > 1; |
|
691
|
0
|
|
|
|
|
0
|
$self->insert_element( 'ul', 1 ); |
|
692
|
|
|
|
|
|
|
} |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
} |
|
695
|
|
|
|
|
|
|
elsif ( $tag eq 'dt' or $tag eq 'dd' ) { |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
# Get under a DL, one way or another |
|
698
|
0
|
0
|
0
|
|
|
0
|
unless ( $ptag eq 'dl' or $self->end( \q{*}, 'dl' ) ) { #' |
|
699
|
0
|
|
|
|
|
0
|
print $indent, |
|
700
|
|
|
|
|
|
|
" * inserting implicit DL for lack of containing DL.\n" |
|
701
|
|
|
|
|
|
|
if DEBUG > 1; |
|
702
|
0
|
|
|
|
|
0
|
$self->insert_element( 'dl', 1 ); |
|
703
|
|
|
|
|
|
|
} |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
} |
|
706
|
|
|
|
|
|
|
elsif ( $HTML::TreeBuilder::isFormElement{$tag} ) { |
|
707
|
1
|
50
|
33
|
|
|
5
|
if ($self->{ |
|
708
|
|
|
|
|
|
|
'_ignore_formies_outside_form'} # TODO: document this |
|
709
|
|
|
|
|
|
|
and not $pos->is_inside('form') |
|
710
|
|
|
|
|
|
|
) |
|
711
|
|
|
|
|
|
|
{ |
|
712
|
0
|
|
|
|
|
0
|
print $indent, |
|
713
|
|
|
|
|
|
|
" * ignoring \U$tag\E because not in a FORM.\n" |
|
714
|
|
|
|
|
|
|
if DEBUG > 1; |
|
715
|
0
|
|
|
|
|
0
|
return; # bypass tweaking. |
|
716
|
|
|
|
|
|
|
} |
|
717
|
1
|
50
|
|
|
|
4
|
if ( $tag eq 'option' ) { |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
# return unless $ptag eq 'select'; |
|
720
|
0
|
|
|
|
|
0
|
$self->end( \q{option} ); |
|
721
|
0
|
|
0
|
|
|
0
|
$ptag = ( $self->{'_pos'} || $self )->{'_tag'}; |
|
722
|
0
|
0
|
0
|
|
|
0
|
unless ( $ptag eq 'select' or $ptag eq 'optgroup' ) { |
|
723
|
0
|
|
|
|
|
0
|
print $indent, |
|
724
|
|
|
|
|
|
|
" * \U$tag\E makes an implicit SELECT.\n" |
|
725
|
|
|
|
|
|
|
if DEBUG > 1; |
|
726
|
0
|
|
|
|
|
0
|
$pos = $self->insert_element( 'select', 1 ); |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
# but not a very useful select -- has no 'name' attribute! |
|
729
|
|
|
|
|
|
|
# is $pos's value used after this? |
|
730
|
|
|
|
|
|
|
} |
|
731
|
|
|
|
|
|
|
} |
|
732
|
|
|
|
|
|
|
} |
|
733
|
|
|
|
|
|
|
elsif ( $HTML::TreeBuilder::isTableElement{$tag} ) { |
|
734
|
178
|
100
|
|
|
|
473
|
if ( !$pos->is_inside('table') ) { |
|
735
|
3
|
|
|
|
|
6
|
print $indent, " * \U$tag\E makes an implicit TABLE\n" |
|
736
|
|
|
|
|
|
|
if DEBUG > 1; |
|
737
|
3
|
|
|
|
|
9
|
$self->insert_element( 'table', 1 ); |
|
738
|
|
|
|
|
|
|
} |
|
739
|
|
|
|
|
|
|
|
|
740
|
178
|
100
|
100
|
|
|
606
|
if ( $tag eq 'td' or $tag eq 'th' ) { |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# Get under a tr one way or another |
|
743
|
111
|
100
|
100
|
|
|
330
|
unless ( |
|
744
|
|
|
|
|
|
|
$ptag eq 'tr' # either under a tr |
|
745
|
|
|
|
|
|
|
or $self->end( \q{*}, 'tr', |
|
746
|
|
|
|
|
|
|
'table' ) #or we can get under one |
|
747
|
|
|
|
|
|
|
) |
|
748
|
|
|
|
|
|
|
{ |
|
749
|
2
|
|
|
|
|
5
|
print $indent, |
|
750
|
|
|
|
|
|
|
" * \U$tag\E under \U$ptag\E makes an implicit TR\n" |
|
751
|
|
|
|
|
|
|
if DEBUG > 1; |
|
752
|
2
|
|
|
|
|
7
|
$self->insert_element( 'tr', 1 ); |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# presumably pos's value isn't used after this. |
|
755
|
|
|
|
|
|
|
} |
|
756
|
|
|
|
|
|
|
} |
|
757
|
|
|
|
|
|
|
else { |
|
758
|
67
|
|
|
|
|
185
|
$self->end( \$tag, 'table' ); #' |
|
759
|
|
|
|
|
|
|
} |
|
760
|
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
# Hmm, I guess this is right. To work it out: |
|
762
|
|
|
|
|
|
|
# tr closes any open tr (limited at a table) |
|
763
|
|
|
|
|
|
|
# thead closes any open thead (limited at a table) |
|
764
|
|
|
|
|
|
|
# tbody closes any open tbody (limited at a table) |
|
765
|
|
|
|
|
|
|
# tfoot closes any open tfoot (limited at a table) |
|
766
|
|
|
|
|
|
|
# colgroup closes any open colgroup (limited at a table) |
|
767
|
|
|
|
|
|
|
# col can try, but will always fail, at the enclosing table, |
|
768
|
|
|
|
|
|
|
# as col is empty, and therefore never open! |
|
769
|
|
|
|
|
|
|
# But! |
|
770
|
|
|
|
|
|
|
# td closes any open td OR th (limited at a table) |
|
771
|
|
|
|
|
|
|
# th closes any open th OR td (limited at a table) |
|
772
|
|
|
|
|
|
|
# ...implementable as "close to a tr, or make a tr" |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
} |
|
775
|
|
|
|
|
|
|
elsif ( $HTML::TreeBuilder::isPhraseMarkup{$tag} ) { |
|
776
|
387
|
100
|
66
|
|
|
7335
|
if ( $ptag eq 'body' and $self->{'_implicit_body_p_tag'} ) { |
|
777
|
9
|
|
|
|
|
1369
|
print |
|
778
|
|
|
|
|
|
|
" * Phrasal \U$tag\E right under BODY makes an implicit P\n" |
|
779
|
|
|
|
|
|
|
if DEBUG > 1; |
|
780
|
9
|
|
|
|
|
1371
|
$pos = $self->insert_element( 'p', 1 ); |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
# is $pos's value used after this? |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
} |
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
# End of implicit endings logic |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
# End of "elsif ($HTML::TreeBuilder::isBodyElement{$tag}" |
|
789
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
elsif ( $HTML::TreeBuilder::isHeadElement{$tag} ) { |
|
793
|
204
|
100
|
|
|
|
585
|
if ( $pos->is_inside('body') ) { |
|
|
|
100
|
|
|
|
|
|
|
794
|
3
|
|
|
|
|
6
|
print $indent, " * head element \U$tag\E found inside BODY!\n" |
|
795
|
|
|
|
|
|
|
if DEBUG; |
|
796
|
3
|
|
|
|
|
16
|
$self->warning("Header element <$tag> in body"); # [sic] |
|
797
|
|
|
|
|
|
|
} |
|
798
|
|
|
|
|
|
|
elsif ( !$pos->is_inside('head') ) { |
|
799
|
167
|
|
|
|
|
300
|
print $indent, |
|
800
|
|
|
|
|
|
|
" * head element \U$tag\E makes an implicit HEAD.\n" |
|
801
|
|
|
|
|
|
|
if DEBUG > 1; |
|
802
|
|
|
|
|
|
|
} |
|
803
|
|
|
|
|
|
|
else { |
|
804
|
34
|
|
|
|
|
62
|
print $indent, |
|
805
|
|
|
|
|
|
|
" * head element \U$tag\E goes inside existing HEAD.\n" |
|
806
|
|
|
|
|
|
|
if DEBUG > 1; |
|
807
|
|
|
|
|
|
|
} |
|
808
|
204
|
|
50
|
|
|
648
|
$self->{'_pos'} = $self->{'_head'} || die "Where'd my head go?"; |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
811
|
|
|
|
|
|
|
} |
|
812
|
|
|
|
|
|
|
elsif ( $tag eq 'html' ) { |
|
813
|
21
|
50
|
|
|
|
61
|
if ( delete $self->{'_implicit'} ) { # first time here |
|
814
|
21
|
|
|
|
|
33
|
print $indent, " * good! found the real HTML element!\n" |
|
815
|
|
|
|
|
|
|
if DEBUG > 1; |
|
816
|
|
|
|
|
|
|
} |
|
817
|
|
|
|
|
|
|
else { |
|
818
|
0
|
|
|
|
|
0
|
print $indent, " * Found a second HTML element\n" |
|
819
|
|
|
|
|
|
|
if DEBUG; |
|
820
|
0
|
|
|
|
|
0
|
$self->warning("Found a nested element"); |
|
821
|
|
|
|
|
|
|
} |
|
822
|
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
# in either case, migrate attributes to the real element |
|
824
|
21
|
|
|
|
|
56
|
for ( keys %$attr ) { |
|
825
|
6
|
|
|
|
|
27
|
$self->attr( $_, $attr->{$_} ); |
|
826
|
|
|
|
|
|
|
} |
|
827
|
21
|
|
|
|
|
39
|
$self->{'_pos'} = undef; |
|
828
|
21
|
|
|
|
|
160
|
return $self; # bypass tweaking. |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
831
|
|
|
|
|
|
|
} |
|
832
|
|
|
|
|
|
|
elsif ( $tag eq 'head' ) { |
|
833
|
22
|
|
50
|
|
|
67
|
my $head = $self->{'_head'} || die "Where'd my head go?"; |
|
834
|
22
|
50
|
|
|
|
65
|
if ( delete $head->{'_implicit'} ) { # first time here |
|
835
|
22
|
|
|
|
|
38
|
print $indent, " * good! found the real HEAD element!\n" |
|
836
|
|
|
|
|
|
|
if DEBUG > 1; |
|
837
|
|
|
|
|
|
|
} |
|
838
|
|
|
|
|
|
|
else { # been here before |
|
839
|
0
|
|
|
|
|
0
|
print $indent, " * Found a second HEAD element\n" |
|
840
|
|
|
|
|
|
|
if DEBUG; |
|
841
|
0
|
|
|
|
|
0
|
$self->warning("Found a second element"); |
|
842
|
|
|
|
|
|
|
} |
|
843
|
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
# in either case, migrate attributes to the real element |
|
845
|
22
|
|
|
|
|
60
|
for ( keys %$attr ) { |
|
846
|
0
|
|
|
|
|
0
|
$head->attr( $_, $attr->{$_} ); |
|
847
|
|
|
|
|
|
|
} |
|
848
|
22
|
|
|
|
|
138
|
return $self->{'_pos'} = $head; # bypass tweaking. |
|
849
|
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
851
|
|
|
|
|
|
|
} |
|
852
|
|
|
|
|
|
|
elsif ( $tag eq 'body' ) { |
|
853
|
22
|
|
50
|
|
|
77
|
my $body = $self->{'_body'} || die "Where'd my body go?"; |
|
854
|
22
|
50
|
|
|
|
66
|
if ( delete $body->{'_implicit'} ) { # first time here |
|
855
|
22
|
|
|
|
|
37
|
print $indent, " * good! found the real BODY element!\n" |
|
856
|
|
|
|
|
|
|
if DEBUG > 1; |
|
857
|
|
|
|
|
|
|
} |
|
858
|
|
|
|
|
|
|
else { # been here before |
|
859
|
0
|
|
|
|
|
0
|
print $indent, " * Found a second BODY element\n" |
|
860
|
|
|
|
|
|
|
if DEBUG; |
|
861
|
0
|
|
|
|
|
0
|
$self->warning("Found a second element"); |
|
862
|
|
|
|
|
|
|
} |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
# in either case, migrate attributes to the real element |
|
865
|
22
|
|
|
|
|
62
|
for ( keys %$attr ) { |
|
866
|
0
|
|
|
|
|
0
|
$body->attr( $_, $attr->{$_} ); |
|
867
|
|
|
|
|
|
|
} |
|
868
|
22
|
|
|
|
|
164
|
return $self->{'_pos'} = $body; # bypass tweaking. |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
871
|
|
|
|
|
|
|
} |
|
872
|
|
|
|
|
|
|
elsif ( $tag eq 'frameset' ) { |
|
873
|
0
|
0
|
0
|
|
|
0
|
if (!( $self->{'_frameset_seen'}++ ) # first frameset seen |
|
|
|
|
0
|
|
|
|
|
|
874
|
|
|
|
|
|
|
and !$self->{'_noframes_seen'} |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
# otherwise it'll be under the noframes already |
|
877
|
|
|
|
|
|
|
and !$self->is_inside('body') |
|
878
|
|
|
|
|
|
|
) |
|
879
|
|
|
|
|
|
|
{ |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
# The following is a bit of a hack. We don't use the normal |
|
882
|
|
|
|
|
|
|
# insert_element because 1) we don't want it as _pos, but instead |
|
883
|
|
|
|
|
|
|
# right under $self, and 2), more importantly, that we don't want |
|
884
|
|
|
|
|
|
|
# this inserted at the /end/ of $self's content_list, but instead |
|
885
|
|
|
|
|
|
|
# in the middle of it, specifically right before the body element. |
|
886
|
|
|
|
|
|
|
# |
|
887
|
0
|
|
0
|
|
|
0
|
my $c = $self->{'_content'} || die "Contentless root?"; |
|
888
|
0
|
|
0
|
|
|
0
|
my $body = $self->{'_body'} || die "Where'd my BODY go?"; |
|
889
|
0
|
|
|
|
|
0
|
for ( my $i = 0; $i < @$c; ++$i ) { |
|
890
|
0
|
0
|
|
|
|
0
|
if ( $c->[$i] eq $body ) { |
|
891
|
0
|
|
|
|
|
0
|
splice( @$c, $i, 0, $self->{'_pos'} = $pos = $e ); |
|
892
|
0
|
|
|
|
|
0
|
HTML::Element::_weaken($e->{'_parent'} = $self); |
|
893
|
0
|
|
|
|
|
0
|
$already_inserted = 1; |
|
894
|
0
|
|
|
|
|
0
|
print $indent, |
|
895
|
|
|
|
|
|
|
" * inserting 'frameset' right before BODY.\n" |
|
896
|
|
|
|
|
|
|
if DEBUG > 1; |
|
897
|
0
|
|
|
|
|
0
|
last; |
|
898
|
|
|
|
|
|
|
} |
|
899
|
|
|
|
|
|
|
} |
|
900
|
0
|
0
|
|
|
|
0
|
die "BODY not found in children of root?" |
|
901
|
|
|
|
|
|
|
unless $already_inserted; |
|
902
|
|
|
|
|
|
|
} |
|
903
|
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
} |
|
905
|
|
|
|
|
|
|
elsif ( $tag eq 'frame' ) { |
|
906
|
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
# Okay, fine, pass thru. |
|
908
|
|
|
|
|
|
|
# Should probably enforce that these should be under a frameset. |
|
909
|
|
|
|
|
|
|
# But hey. Ditto for enforcing that 'noframes' should be under |
|
910
|
|
|
|
|
|
|
# a 'frameset', as the DTDs say. |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
} |
|
913
|
|
|
|
|
|
|
elsif ( $tag eq 'noframes' ) { |
|
914
|
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
# This basically assumes there'll be exactly one 'noframes' element |
|
916
|
|
|
|
|
|
|
# per document. At least, only the first one gets to have the |
|
917
|
|
|
|
|
|
|
# body under it. And if there are no noframes elements, then |
|
918
|
|
|
|
|
|
|
# the body pretty much stays where it is. Is that ever a problem? |
|
919
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_noframes_seen'}++ ) { |
|
920
|
0
|
|
|
|
|
0
|
print $indent, " * ANOTHER noframes element?\n" if DEBUG; |
|
921
|
|
|
|
|
|
|
} |
|
922
|
|
|
|
|
|
|
else { |
|
923
|
0
|
0
|
|
|
|
0
|
if ( $pos->is_inside('body') ) { |
|
924
|
0
|
|
|
|
|
0
|
print $indent, " * 'noframes' inside 'body'. Odd!\n" |
|
925
|
|
|
|
|
|
|
if DEBUG; |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
# In that odd case, we /can't/ make body a child of 'noframes', |
|
928
|
|
|
|
|
|
|
# because it's an ancestor of the 'noframes'! |
|
929
|
|
|
|
|
|
|
} |
|
930
|
|
|
|
|
|
|
else { |
|
931
|
0
|
|
0
|
|
|
0
|
$e->push_content( $self->{'_body'} |
|
932
|
|
|
|
|
|
|
|| die "Where'd my body go?" ); |
|
933
|
0
|
|
|
|
|
0
|
print $indent, " * Moving body to be under noframes.\n" |
|
934
|
|
|
|
|
|
|
if DEBUG; |
|
935
|
|
|
|
|
|
|
} |
|
936
|
|
|
|
|
|
|
} |
|
937
|
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
939
|
|
|
|
|
|
|
} |
|
940
|
|
|
|
|
|
|
else { |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
# unknown tag |
|
943
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_ignore_unknown'} ) { |
|
944
|
0
|
|
|
|
|
0
|
print $indent, " * Ignoring unknown tag \U$tag\E\n" if DEBUG; |
|
945
|
0
|
|
|
|
|
0
|
$self->warning("Skipping unknown tag $tag"); |
|
946
|
0
|
|
|
|
|
0
|
return; |
|
947
|
|
|
|
|
|
|
} |
|
948
|
|
|
|
|
|
|
else { |
|
949
|
0
|
|
|
|
|
0
|
print $indent, " * Accepting unknown tag \U$tag\E\n" |
|
950
|
|
|
|
|
|
|
if DEBUG; |
|
951
|
|
|
|
|
|
|
} |
|
952
|
|
|
|
|
|
|
} |
|
953
|
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
#---------------------------------------------------------------------- |
|
955
|
|
|
|
|
|
|
# End of mumbo-jumbo |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
print $indent, "(Attaching ", $e->{'_tag'}, " under ", |
|
958
|
1187
|
|
|
|
|
5905
|
( $self->{'_pos'} || $self )->{'_tag'}, ")\n" |
|
959
|
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
# because if _pos isn't defined, it goes under self |
|
961
|
|
|
|
|
|
|
if DEBUG; |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
# The following if-clause is to delete /some/ ignorable whitespace |
|
964
|
|
|
|
|
|
|
# nodes, as we're making the tree. |
|
965
|
|
|
|
|
|
|
# This'd be a node we'd catch later anyway, but we might as well |
|
966
|
|
|
|
|
|
|
# nip it in the bud now. |
|
967
|
|
|
|
|
|
|
# This doesn't catch /all/ deletable WS-nodes, so we do have to call |
|
968
|
|
|
|
|
|
|
# the tightener later to catch the rest. |
|
969
|
|
|
|
|
|
|
|
|
970
|
1187
|
100
|
66
|
|
|
9127
|
if ( $self->{'_tighten'} and !$self->{'_ignore_text'} ) |
|
971
|
|
|
|
|
|
|
{ # if tightenable |
|
972
|
1178
|
|
|
|
|
6092
|
my ( $sibs, $par ); |
|
973
|
1178
|
100
|
66
|
|
|
18215
|
if (( $sibs = ( $par = $self->{'_pos'} || $self )->{'_content'} ) |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
974
|
|
|
|
|
|
|
and @$sibs # parent already has content |
|
975
|
|
|
|
|
|
|
and ! |
|
976
|
|
|
|
|
|
|
ref( $sibs->[-1] ) # and the last one there is a text node |
|
977
|
|
|
|
|
|
|
and $sibs->[-1] !~ m<[^\n\r\f\t ]>s # and it's all whitespace |
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
and ( # one of these has to be eligible... |
|
980
|
|
|
|
|
|
|
$HTML::TreeBuilder::canTighten{$tag} |
|
981
|
|
|
|
|
|
|
or (( @$sibs == 1 ) |
|
982
|
|
|
|
|
|
|
? # WS is leftmost -- so parent matters |
|
983
|
|
|
|
|
|
|
$HTML::TreeBuilder::canTighten{ $par->{'_tag'} } |
|
984
|
|
|
|
|
|
|
: # WS is after another node -- it matters |
|
985
|
|
|
|
|
|
|
( ref $sibs->[-2] |
|
986
|
|
|
|
|
|
|
and |
|
987
|
|
|
|
|
|
|
$HTML::TreeBuilder::canTighten{ $sibs->[-2] |
|
988
|
|
|
|
|
|
|
{'_tag'} } |
|
989
|
|
|
|
|
|
|
) |
|
990
|
|
|
|
|
|
|
) |
|
991
|
|
|
|
|
|
|
) |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
and !$par->is_inside( 'pre', 'xmp', 'textarea', 'plaintext' ) |
|
994
|
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
# we're clear |
|
996
|
|
|
|
|
|
|
) |
|
997
|
|
|
|
|
|
|
{ |
|
998
|
131
|
|
|
|
|
224
|
pop @$sibs; |
|
999
|
131
|
|
|
|
|
216
|
print $indent, "Popping a preceding all-WS node\n" if DEBUG; |
|
1000
|
|
|
|
|
|
|
} |
|
1001
|
|
|
|
|
|
|
} |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
1187
|
50
|
|
|
|
6967
|
unless ($already_inserted) { |
|
1004
|
1187
|
100
|
|
|
|
6564
|
if ($self_closed) { $self->pos->push_content($e) } |
|
|
8
|
|
|
|
|
24
|
|
|
1005
|
1179
|
|
|
|
|
7228
|
else { $self->insert_element($e) } |
|
1006
|
|
|
|
|
|
|
} |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
1187
|
|
|
|
|
6183
|
if (DEBUG) { |
|
1009
|
|
|
|
|
|
|
if ( $self->{'_pos'} ) { |
|
1010
|
|
|
|
|
|
|
print $indent, "(Current lineage of pos: \U$tag\E under ", |
|
1011
|
|
|
|
|
|
|
join( |
|
1012
|
|
|
|
|
|
|
'/', |
|
1013
|
|
|
|
|
|
|
reverse( |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
# $self->{'_pos'}{'_tag'}, # don't list myself! |
|
1016
|
|
|
|
|
|
|
$self->{'_pos'}->lineage_tag_names |
|
1017
|
|
|
|
|
|
|
) |
|
1018
|
|
|
|
|
|
|
), |
|
1019
|
|
|
|
|
|
|
".)\n"; |
|
1020
|
|
|
|
|
|
|
} |
|
1021
|
|
|
|
|
|
|
else { |
|
1022
|
|
|
|
|
|
|
print $indent, "(Pos points nowhere!?)\n"; |
|
1023
|
|
|
|
|
|
|
} |
|
1024
|
|
|
|
|
|
|
} |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
1187
|
100
|
50
|
|
|
8452
|
unless ( ( $self->{'_pos'} || '' ) eq $e ) { |
|
1027
|
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
# if it's an empty element -- i.e., if it didn't change the _pos |
|
1029
|
51
|
|
|
|
|
1548
|
&{ $self->{"_tweak_$tag"} |
|
1030
|
51
|
50
|
33
|
|
|
3219
|
|| $self->{'_tweak_*'} |
|
1031
|
|
|
|
|
|
|
|| return $e }( map $_, $e, $tag, $self ) |
|
1032
|
|
|
|
|
|
|
; # make a list so the user can't clobber |
|
1033
|
|
|
|
|
|
|
} |
|
1034
|
|
|
|
|
|
|
|
|
1035
|
1136
|
|
|
|
|
12770
|
return $e; |
|
1036
|
|
|
|
|
|
|
} |
|
1037
|
|
|
|
|
|
|
} |
|
1038
|
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
#========================================================================== |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
{ |
|
1042
|
|
|
|
|
|
|
my $indent; |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
sub end { |
|
1045
|
1714
|
50
|
|
1714
|
1
|
18274
|
return if $_[0]{'_stunted'}; |
|
1046
|
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
# Either: Acccept an end-tag signal from HTML::Parser |
|
1048
|
|
|
|
|
|
|
# Or: Method for closing currently open elements in some fairly complex |
|
1049
|
|
|
|
|
|
|
# way, as used by other methods in this class. |
|
1050
|
1714
|
|
|
|
|
18619
|
my ( $self, $tag, @stop ) = @_; |
|
1051
|
1714
|
50
|
|
|
|
18082
|
if ( $tag eq 'x-html' ) { |
|
1052
|
0
|
|
|
|
|
0
|
print "Ignoring close-x-html tag.\n" if DEBUG; |
|
1053
|
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
# inserted by some lame code-generators. |
|
1055
|
0
|
|
|
|
|
0
|
return; |
|
1056
|
|
|
|
|
|
|
} |
|
1057
|
|
|
|
|
|
|
|
|
1058
|
1714
|
50
|
66
|
|
|
21789
|
unless ( ref($tag) or $tag =~ m/^[-_a-zA-Z0-9:%]+$/s ) { |
|
1059
|
0
|
|
|
|
|
0
|
DEBUG and print "End-tag name $tag is no good. Skipping.\n"; |
|
1060
|
0
|
|
|
|
|
0
|
return; |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
# This avoids having Element's new() throw an exception. |
|
1063
|
|
|
|
|
|
|
} |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
# This method accepts two calling formats: |
|
1066
|
|
|
|
|
|
|
# 1) from Parser: $self->end('tag_name', 'origtext') |
|
1067
|
|
|
|
|
|
|
# in which case we shouldn't mistake origtext as a blocker tag |
|
1068
|
|
|
|
|
|
|
# 2) from myself: $self->end(\q{tagname1}, 'blk1', ... ) |
|
1069
|
|
|
|
|
|
|
# from myself: $self->end(['tagname1', 'tagname2'], 'blk1', ... ) |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
# End the specified tag, but don't move above any of the blocker tags. |
|
1072
|
|
|
|
|
|
|
# The tag can also be a reference to an array. Terminate the first |
|
1073
|
|
|
|
|
|
|
# tag found. |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
1714
|
|
66
|
|
|
18873
|
my $ptag = ( my $p = $self->{'_pos'} || $self )->{'_tag'}; |
|
1076
|
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
# $p and $ptag are sort-of scratch |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
1714
|
100
|
|
|
|
17449
|
if ( ref($tag) ) { |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
# First param is a ref of one sort or another -- |
|
1082
|
|
|
|
|
|
|
# THE CALL IS COMING FROM INSIDE THE HOUSE! |
|
1083
|
415
|
100
|
|
|
|
973
|
$tag = $$tag if ref($tag) eq 'SCALAR'; |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
# otherwise it's an arrayref. |
|
1086
|
|
|
|
|
|
|
} |
|
1087
|
|
|
|
|
|
|
else { |
|
1088
|
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
# the call came from Parser -- just ignore origtext |
|
1090
|
|
|
|
|
|
|
# except in a table ignore unmatched table tags RT #59980 |
|
1091
|
1299
|
100
|
|
|
|
31119
|
@stop = $tag =~ /^t[hdr]\z/ ? 'table' : (); |
|
1092
|
|
|
|
|
|
|
} |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
#my($indent); |
|
1095
|
1714
|
|
|
|
|
16308
|
if (DEBUG) { |
|
1096
|
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
# optimization -- don't figure out depth unless we're in debug mode |
|
1098
|
|
|
|
|
|
|
my @lineage_tags = $p->lineage_tag_names; |
|
1099
|
|
|
|
|
|
|
$indent = ' ' x ( 1 + @lineage_tags ); |
|
1100
|
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
# now announce ourselves |
|
1102
|
|
|
|
|
|
|
print $indent, "Ending ", |
|
1103
|
|
|
|
|
|
|
ref($tag) ? ( '[', join( ' ', @$tag ), ']' ) : "\U$tag\E", |
|
1104
|
|
|
|
|
|
|
scalar(@stop) |
|
1105
|
|
|
|
|
|
|
? ( " no higher than [", join( ' ', @stop ), "]" ) |
|
1106
|
|
|
|
|
|
|
: (), ".\n"; |
|
1107
|
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
print $indent, " (Current lineage: ", join( '/', @lineage_tags ), |
|
1109
|
|
|
|
|
|
|
".)\n" |
|
1110
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1111
|
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
if ( DEBUG > 3 ) { |
|
1113
|
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
#my( |
|
1115
|
|
|
|
|
|
|
# $package, $filename, $line, $subroutine, |
|
1116
|
|
|
|
|
|
|
# $hasargs, $wantarray, $evaltext, $is_require) = caller; |
|
1117
|
|
|
|
|
|
|
print $indent, |
|
1118
|
|
|
|
|
|
|
" (Called from ", ( caller(1) )[3], ' line ', |
|
1119
|
|
|
|
|
|
|
( caller(1) )[2], |
|
1120
|
|
|
|
|
|
|
")\n"; |
|
1121
|
|
|
|
|
|
|
} |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
#} else { |
|
1124
|
|
|
|
|
|
|
# $indent = ' '; |
|
1125
|
|
|
|
|
|
|
} |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
# End of if DEBUG |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
# Now actually do it |
|
1130
|
1714
|
|
|
|
|
16430
|
my @to_close; |
|
1131
|
1714
|
100
|
|
|
|
18135
|
if ( $tag eq '*' ) { |
|
|
|
100
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
# Special -- close everything up to (but not including) the first |
|
1134
|
|
|
|
|
|
|
# limiting tag, or return if none found. Somewhat of a special case. |
|
1135
|
|
|
|
|
|
|
PARENT: |
|
1136
|
41
|
|
|
|
|
93
|
while ( defined $p ) { |
|
1137
|
80
|
|
|
|
|
128
|
$ptag = $p->{'_tag'}; |
|
1138
|
80
|
|
|
|
|
106
|
print $indent, " (Looking at $ptag.)\n" if DEBUG > 2; |
|
1139
|
80
|
|
|
|
|
141
|
for (@stop) { |
|
1140
|
131
|
100
|
|
|
|
314
|
if ( $ptag eq $_ ) { |
|
1141
|
41
|
|
|
|
|
62
|
print $indent, |
|
1142
|
|
|
|
|
|
|
" (Hit a $_; closing everything up to here.)\n" |
|
1143
|
|
|
|
|
|
|
if DEBUG > 2; |
|
1144
|
41
|
|
|
|
|
78
|
last PARENT; |
|
1145
|
|
|
|
|
|
|
} |
|
1146
|
|
|
|
|
|
|
} |
|
1147
|
39
|
|
|
|
|
67
|
push @to_close, $p; |
|
1148
|
39
|
|
|
|
|
67
|
$p = $p->{'_parent'}; # no match so far? keep moving up |
|
1149
|
|
|
|
|
|
|
print $indent, |
|
1150
|
39
|
|
|
|
|
83
|
" (Moving on up to ", $p ? $p->{'_tag'} : 'nil', ")\n" |
|
1151
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1152
|
|
|
|
|
|
|
} |
|
1153
|
41
|
50
|
|
|
|
95
|
unless ( defined $p ) { # We never found what we were looking for. |
|
1154
|
0
|
|
|
|
|
0
|
print $indent, " (We never found a limit.)\n" if DEBUG > 1; |
|
1155
|
0
|
|
|
|
|
0
|
return; |
|
1156
|
|
|
|
|
|
|
} |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
#print |
|
1159
|
|
|
|
|
|
|
# $indent, |
|
1160
|
|
|
|
|
|
|
# " (To close: ", join('/', map $_->tag, @to_close), ".)\n" |
|
1161
|
|
|
|
|
|
|
# if DEBUG > 4; |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
# Otherwise update pos and fall thru. |
|
1164
|
41
|
|
|
|
|
74
|
$self->{'_pos'} = $p; |
|
1165
|
|
|
|
|
|
|
} |
|
1166
|
|
|
|
|
|
|
elsif ( ref $tag ) { |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
# Close the first of any of the matching tags, giving up if you hit |
|
1169
|
|
|
|
|
|
|
# any of the stop-tags. |
|
1170
|
|
|
|
|
|
|
PARENT: |
|
1171
|
307
|
|
|
|
|
757
|
while ( defined $p ) { |
|
1172
|
584
|
|
|
|
|
991
|
$ptag = $p->{'_tag'}; |
|
1173
|
584
|
|
|
|
|
785
|
print $indent, " (Looking at $ptag.)\n" if DEBUG > 2; |
|
1174
|
584
|
|
|
|
|
1096
|
for (@$tag) { |
|
1175
|
5014
|
100
|
|
|
|
10511
|
if ( $ptag eq $_ ) { |
|
1176
|
31
|
|
|
|
|
42
|
print $indent, " (Closing $_.)\n" if DEBUG > 2; |
|
1177
|
31
|
|
|
|
|
62
|
last PARENT; |
|
1178
|
|
|
|
|
|
|
} |
|
1179
|
|
|
|
|
|
|
} |
|
1180
|
553
|
|
|
|
|
985
|
for (@stop) { |
|
1181
|
8295
|
50
|
|
|
|
16900
|
if ( $ptag eq $_ ) { |
|
1182
|
0
|
|
|
|
|
0
|
print $indent, |
|
1183
|
|
|
|
|
|
|
" (Hit a limiting $_ -- bailing out.)\n" |
|
1184
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1185
|
0
|
|
|
|
|
0
|
return; # so it was all for naught |
|
1186
|
|
|
|
|
|
|
} |
|
1187
|
|
|
|
|
|
|
} |
|
1188
|
553
|
|
|
|
|
922
|
push @to_close, $p; |
|
1189
|
553
|
|
|
|
|
1300
|
$p = $p->{'_parent'}; |
|
1190
|
|
|
|
|
|
|
} |
|
1191
|
307
|
100
|
|
|
|
1180
|
return unless defined $p; # We went off the top of the tree. |
|
1192
|
|
|
|
|
|
|
# Otherwise specified element was found; set pos to its parent. |
|
1193
|
31
|
|
|
|
|
65
|
push @to_close, $p; |
|
1194
|
31
|
|
|
|
|
64
|
$self->{'_pos'} = $p->{'_parent'}; |
|
1195
|
|
|
|
|
|
|
} |
|
1196
|
|
|
|
|
|
|
else { |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
# Close the first of the specified tag, giving up if you hit |
|
1199
|
|
|
|
|
|
|
# any of the stop-tags. |
|
1200
|
1366
|
|
|
|
|
16988
|
while ( defined $p ) { |
|
1201
|
1876
|
|
|
|
|
32053
|
$ptag = $p->{'_tag'}; |
|
1202
|
1876
|
|
|
|
|
31446
|
print $indent, " (Looking at $ptag.)\n" if DEBUG > 2; |
|
1203
|
1876
|
100
|
|
|
|
33525
|
if ( $ptag eq $tag ) { |
|
1204
|
1324
|
|
|
|
|
14532
|
print $indent, " (Closing $tag.)\n" if DEBUG > 2; |
|
1205
|
1324
|
|
|
|
|
27417
|
last; |
|
1206
|
|
|
|
|
|
|
} |
|
1207
|
552
|
|
|
|
|
17839
|
for (@stop) { |
|
1208
|
67
|
100
|
|
|
|
167
|
if ( $ptag eq $_ ) { |
|
1209
|
32
|
|
|
|
|
41
|
print $indent, |
|
1210
|
|
|
|
|
|
|
" (Hit a limiting $_ -- bailing out.)\n" |
|
1211
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1212
|
32
|
|
|
|
|
94
|
return; # so it was all for naught |
|
1213
|
|
|
|
|
|
|
} |
|
1214
|
|
|
|
|
|
|
} |
|
1215
|
520
|
|
|
|
|
17715
|
push @to_close, $p; |
|
1216
|
520
|
|
|
|
|
19113
|
$p = $p->{'_parent'}; |
|
1217
|
|
|
|
|
|
|
} |
|
1218
|
1334
|
100
|
|
|
|
18382
|
return unless defined $p; # We went off the top of the tree. |
|
1219
|
|
|
|
|
|
|
# Otherwise specified element was found; set pos to its parent. |
|
1220
|
1324
|
|
|
|
|
14740
|
push @to_close, $p; |
|
1221
|
1324
|
|
|
|
|
27324
|
$self->{'_pos'} = $p->{'_parent'}; |
|
1222
|
|
|
|
|
|
|
} |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
1396
|
100
|
100
|
|
|
17696
|
$self->{'_pos'} = undef if $self eq ( $self->{'_pos'} || '' ); |
|
1225
|
|
|
|
|
|
|
print $indent, "(Pos now points to ", |
|
1226
|
1396
|
|
|
|
|
14626
|
$self->{'_pos'} ? $self->{'_pos'}{'_tag'} : '???', ".)\n" |
|
1227
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
### EXPENSIVE, because has to check that it's not under a pre |
|
1230
|
|
|
|
|
|
|
### or a CDATA-parent. That's one more method call per end()! |
|
1231
|
|
|
|
|
|
|
### Might as well just do this at the end of the tree-parse, I guess, |
|
1232
|
|
|
|
|
|
|
### at which point we'd be parsing top-down, and just not traversing |
|
1233
|
|
|
|
|
|
|
### under pre's or CDATA-parents. |
|
1234
|
|
|
|
|
|
|
## |
|
1235
|
|
|
|
|
|
|
## Take this opportunity to nix any terminal whitespace nodes. |
|
1236
|
|
|
|
|
|
|
## TODO: consider whether this (plus the logic in start(), above) |
|
1237
|
|
|
|
|
|
|
## would ever leave any WS nodes in the tree. |
|
1238
|
|
|
|
|
|
|
## If not, then there's no reason to have eof() call |
|
1239
|
|
|
|
|
|
|
## delete_ignorable_whitespace on the tree, is there? |
|
1240
|
|
|
|
|
|
|
## |
|
1241
|
|
|
|
|
|
|
#if(@to_close and $self->{'_tighten'} and !$self->{'_ignore_text'} and |
|
1242
|
|
|
|
|
|
|
# ! $to_close[-1]->is_inside('pre', keys %HTML::Tagset::isCDATA_Parent) |
|
1243
|
|
|
|
|
|
|
#) { # if tightenable |
|
1244
|
|
|
|
|
|
|
# my($children, $e_tag); |
|
1245
|
|
|
|
|
|
|
# foreach my $e (reverse @to_close) { # going top-down |
|
1246
|
|
|
|
|
|
|
# last if 'pre' eq ($e_tag = $e->{'_tag'}) or |
|
1247
|
|
|
|
|
|
|
# $HTML::Tagset::isCDATA_Parent{$e_tag}; |
|
1248
|
|
|
|
|
|
|
# |
|
1249
|
|
|
|
|
|
|
# if( |
|
1250
|
|
|
|
|
|
|
# $children = $e->{'_content'} |
|
1251
|
|
|
|
|
|
|
# and @$children # has children |
|
1252
|
|
|
|
|
|
|
# and !ref($children->[-1]) |
|
1253
|
|
|
|
|
|
|
# and $children->[-1] =~ m<^\s+$>s # last node is all-WS |
|
1254
|
|
|
|
|
|
|
# and |
|
1255
|
|
|
|
|
|
|
# ( |
|
1256
|
|
|
|
|
|
|
# # has a tightenable parent: |
|
1257
|
|
|
|
|
|
|
# $HTML::TreeBuilder::canTighten{ $e_tag } |
|
1258
|
|
|
|
|
|
|
# or |
|
1259
|
|
|
|
|
|
|
# ( # has a tightenable left sibling: |
|
1260
|
|
|
|
|
|
|
# @$children > 1 and |
|
1261
|
|
|
|
|
|
|
# ref($children->[-2]) |
|
1262
|
|
|
|
|
|
|
# and $HTML::TreeBuilder::canTighten{ $children->[-2]{'_tag'} } |
|
1263
|
|
|
|
|
|
|
# ) |
|
1264
|
|
|
|
|
|
|
# ) |
|
1265
|
|
|
|
|
|
|
# ) { |
|
1266
|
|
|
|
|
|
|
# pop @$children; |
|
1267
|
|
|
|
|
|
|
# #print $indent, "Popping a terminal WS node from ", $e->{'_tag'}, |
|
1268
|
|
|
|
|
|
|
# # " (", $e->address, ") while exiting.\n" if DEBUG; |
|
1269
|
|
|
|
|
|
|
# } |
|
1270
|
|
|
|
|
|
|
# } |
|
1271
|
|
|
|
|
|
|
#} |
|
1272
|
|
|
|
|
|
|
|
|
1273
|
1396
|
|
|
|
|
14932
|
foreach my $e (@to_close) { |
|
1274
|
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
# Call the applicable callback, if any |
|
1276
|
1885
|
|
|
|
|
28117
|
$ptag = $e->{'_tag'}; |
|
1277
|
1885
|
|
|
|
|
29966
|
&{ $self->{"_tweak_$ptag"} |
|
1278
|
1885
|
50
|
33
|
|
|
58609
|
|| $self->{'_tweak_*'} |
|
1279
|
|
|
|
|
|
|
|| next }( map $_, $e, $ptag, $self ); |
|
1280
|
0
|
|
|
|
|
0
|
print $indent, "Back from tweaking.\n" if DEBUG; |
|
1281
|
|
|
|
|
|
|
last |
|
1282
|
0
|
0
|
|
|
|
0
|
if $self->{ '_stunted' |
|
1283
|
|
|
|
|
|
|
}; # in case one of the handlers called stunt |
|
1284
|
|
|
|
|
|
|
} |
|
1285
|
1396
|
|
|
|
|
30331
|
return @to_close; |
|
1286
|
|
|
|
|
|
|
} |
|
1287
|
|
|
|
|
|
|
} |
|
1288
|
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
#========================================================================== |
|
1290
|
|
|
|
|
|
|
{ |
|
1291
|
|
|
|
|
|
|
my ( $indent, $nugget ); |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
sub text { |
|
1294
|
2587
|
50
|
|
2587
|
1
|
93193
|
return if $_[0]{'_stunted'}; |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
# Accept a "here's a text token" signal from HTML::Parser. |
|
1297
|
2587
|
|
|
|
|
16780
|
my ( $self, $text, $is_cdata ) = @_; |
|
1298
|
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
# the >3.0 versions of Parser may pass a cdata node. |
|
1300
|
|
|
|
|
|
|
# Thanks to Gisle Aas for pointing this out. |
|
1301
|
|
|
|
|
|
|
|
|
1302
|
2587
|
50
|
|
|
|
17186
|
return unless length $text; # I guess that's always right |
|
1303
|
|
|
|
|
|
|
|
|
1304
|
2587
|
|
|
|
|
15308
|
my $ignore_text = $self->{'_ignore_text'}; |
|
1305
|
2587
|
|
|
|
|
15180
|
my $no_space_compacting = $self->{'_no_space_compacting'}; |
|
1306
|
2587
|
|
|
|
|
15061
|
my $no_expand_entities = $self->{'_no_expand_entities'}; |
|
1307
|
2587
|
|
66
|
|
|
17801
|
my $pos = $self->{'_pos'} || $self; |
|
1308
|
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
HTML::Entities::decode($text) |
|
1310
|
|
|
|
|
|
|
unless $ignore_text |
|
1311
|
|
|
|
|
|
|
|| $is_cdata |
|
1312
|
2587
|
100
|
33
|
|
|
35009
|
|| $HTML::Tagset::isCDATA_Parent{ $pos->{'_tag'} } |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
|| $no_expand_entities; |
|
1314
|
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
#my($indent, $nugget); |
|
1316
|
2587
|
|
|
|
|
14960
|
if (DEBUG) { |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
# optimization -- don't figure out depth unless we're in debug mode |
|
1319
|
|
|
|
|
|
|
my @lineage_tags = $pos->lineage_tag_names; |
|
1320
|
|
|
|
|
|
|
$indent = ' ' x ( 1 + @lineage_tags ); |
|
1321
|
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
$nugget |
|
1323
|
|
|
|
|
|
|
= ( length($text) <= 25 ) |
|
1324
|
|
|
|
|
|
|
? $text |
|
1325
|
|
|
|
|
|
|
: ( substr( $text, 0, 25 ) . '...' ); |
|
1326
|
|
|
|
|
|
|
$nugget =~ s<([\x00-\x1F])> |
|
1327
|
|
|
|
|
|
|
<'\\x'.(unpack("H2",$1))>eg; |
|
1328
|
|
|
|
|
|
|
print $indent, "Proposing a new text node ($nugget) under ", |
|
1329
|
|
|
|
|
|
|
join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) ) |
|
1330
|
|
|
|
|
|
|
|| 'Root', |
|
1331
|
|
|
|
|
|
|
".\n"; |
|
1332
|
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
#} else { |
|
1334
|
|
|
|
|
|
|
# $indent = ' '; |
|
1335
|
|
|
|
|
|
|
} |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
2587
|
|
|
|
|
15087
|
my $ptag; |
|
1338
|
2587
|
50
|
33
|
|
|
20861
|
if ($HTML::Tagset::isCDATA_Parent{ $ptag = $pos->{'_tag'} } |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
#or $pos->is_inside('pre') |
|
1341
|
|
|
|
|
|
|
or $pos->is_inside( 'pre', 'textarea' ) |
|
1342
|
|
|
|
|
|
|
) |
|
1343
|
|
|
|
|
|
|
{ |
|
1344
|
0
|
0
|
|
|
|
0
|
return if $ignore_text; |
|
1345
|
0
|
|
|
|
|
0
|
$pos->push_content($text); |
|
1346
|
|
|
|
|
|
|
} |
|
1347
|
|
|
|
|
|
|
else { |
|
1348
|
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
# return unless $text =~ /\S/; # This is sometimes wrong |
|
1350
|
|
|
|
|
|
|
|
|
1351
|
2587
|
100
|
100
|
|
|
30332
|
if ( !$self->{'_implicit_tags'} || $text !~ /[^\n\r\f\t ]/ ) { |
|
|
|
50
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
# don't change anything |
|
1354
|
|
|
|
|
|
|
} |
|
1355
|
|
|
|
|
|
|
elsif ( $ptag eq 'head' or $ptag eq 'noframes' ) { |
|
1356
|
0
|
0
|
|
|
|
0
|
if ( $self->{'_implicit_body_p_tag'} ) { |
|
1357
|
0
|
|
|
|
|
0
|
print $indent, |
|
1358
|
|
|
|
|
|
|
" * Text node under \U$ptag\E closes \U$ptag\E, implicates BODY and P.\n" |
|
1359
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1360
|
0
|
|
|
|
|
0
|
$self->end( \$ptag ); |
|
1361
|
|
|
|
|
|
|
$pos = $self->{'_body'} |
|
1362
|
|
|
|
|
|
|
? ( $self->{'_pos'} |
|
1363
|
0
|
0
|
|
|
|
0
|
= $self->{'_body'} ) # expected case |
|
1364
|
|
|
|
|
|
|
: $self->insert_element( 'body', 1 ); |
|
1365
|
0
|
|
|
|
|
0
|
$pos = $self->insert_element( 'p', 1 ); |
|
1366
|
|
|
|
|
|
|
} |
|
1367
|
|
|
|
|
|
|
else { |
|
1368
|
0
|
|
|
|
|
0
|
print $indent, |
|
1369
|
|
|
|
|
|
|
" * Text node under \U$ptag\E closes, implicates BODY.\n" |
|
1370
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1371
|
0
|
|
|
|
|
0
|
$self->end( \$ptag ); |
|
1372
|
|
|
|
|
|
|
$pos = $self->{'_body'} |
|
1373
|
|
|
|
|
|
|
? ( $self->{'_pos'} |
|
1374
|
0
|
0
|
|
|
|
0
|
= $self->{'_body'} ) # expected case |
|
1375
|
|
|
|
|
|
|
: $self->insert_element( 'body', 1 ); |
|
1376
|
|
|
|
|
|
|
} |
|
1377
|
|
|
|
|
|
|
} |
|
1378
|
|
|
|
|
|
|
elsif ( $ptag eq 'html' ) { |
|
1379
|
54
|
100
|
|
|
|
5695
|
if ( $self->{'_implicit_body_p_tag'} ) { |
|
1380
|
9
|
|
|
|
|
1389
|
print $indent, |
|
1381
|
|
|
|
|
|
|
" * Text node under HTML implicates BODY and P.\n" |
|
1382
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1383
|
|
|
|
|
|
|
$pos = $self->{'_body'} |
|
1384
|
|
|
|
|
|
|
? ( $self->{'_pos'} |
|
1385
|
9
|
50
|
|
|
|
1378
|
= $self->{'_body'} ) # expected case |
|
1386
|
|
|
|
|
|
|
: $self->insert_element( 'body', 1 ); |
|
1387
|
9
|
|
|
|
|
1373
|
$pos = $self->insert_element( 'p', 1 ); |
|
1388
|
|
|
|
|
|
|
} |
|
1389
|
|
|
|
|
|
|
else { |
|
1390
|
45
|
|
|
|
|
4317
|
print $indent, |
|
1391
|
|
|
|
|
|
|
" * Text node under HTML implicates BODY.\n" |
|
1392
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1393
|
|
|
|
|
|
|
$pos = $self->{'_body'} |
|
1394
|
|
|
|
|
|
|
? ( $self->{'_pos'} |
|
1395
|
45
|
50
|
|
|
|
12686
|
= $self->{'_body'} ) # expected case |
|
1396
|
|
|
|
|
|
|
: $self->insert_element( 'body', 1 ); |
|
1397
|
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
#print "POS is $pos, ", $pos->{'_tag'}, "\n"; |
|
1399
|
|
|
|
|
|
|
} |
|
1400
|
|
|
|
|
|
|
} |
|
1401
|
|
|
|
|
|
|
elsif ( $ptag eq 'body' ) { |
|
1402
|
400
|
50
|
|
|
|
9760
|
if ( $self->{'_implicit_body_p_tag'} ) { |
|
1403
|
0
|
|
|
|
|
0
|
print $indent, " * Text node under BODY implicates P.\n" |
|
1404
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1405
|
0
|
|
|
|
|
0
|
$pos = $self->insert_element( 'p', 1 ); |
|
1406
|
|
|
|
|
|
|
} |
|
1407
|
|
|
|
|
|
|
} |
|
1408
|
|
|
|
|
|
|
elsif ( $ptag eq 'table' ) { |
|
1409
|
8
|
|
|
|
|
12
|
print $indent, |
|
1410
|
|
|
|
|
|
|
" * Text node under TABLE implicates TR and TD.\n" |
|
1411
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1412
|
8
|
|
|
|
|
28
|
$self->insert_element( 'tr', 1 ); |
|
1413
|
8
|
|
|
|
|
21
|
$pos = $self->insert_element( 'td', 1 ); |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
# double whammy! |
|
1416
|
|
|
|
|
|
|
} |
|
1417
|
|
|
|
|
|
|
elsif ( $ptag eq 'tr' ) { |
|
1418
|
3
|
|
|
|
|
7
|
print $indent, " * Text node under TR implicates TD.\n" |
|
1419
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1420
|
3
|
|
|
|
|
8
|
$pos = $self->insert_element( 'td', 1 ); |
|
1421
|
|
|
|
|
|
|
} |
|
1422
|
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
# elsif ( |
|
1424
|
|
|
|
|
|
|
# # $ptag eq 'li' || |
|
1425
|
|
|
|
|
|
|
# # $ptag eq 'dd' || |
|
1426
|
|
|
|
|
|
|
# $ptag eq 'form') { |
|
1427
|
|
|
|
|
|
|
# $pos = $self->insert_element('p', 1); |
|
1428
|
|
|
|
|
|
|
#} |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
# Whatever we've done above should have had the side |
|
1431
|
|
|
|
|
|
|
# effect of updating $self->{'_pos'} |
|
1432
|
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
#print "POS is now $pos, ", $pos->{'_tag'}, "\n"; |
|
1434
|
|
|
|
|
|
|
|
|
1435
|
2587
|
50
|
|
|
|
17024
|
return if $ignore_text; |
|
1436
|
2587
|
100
|
|
|
|
21919
|
$text =~ s/[\n\r\f\t ]+/ /g # canonical space |
|
1437
|
|
|
|
|
|
|
unless $no_space_compacting; |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
print $indent, " (Attaching text node ($nugget) under ", |
|
1440
|
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
# was: $self->{'_pos'} ? $self->{'_pos'}{'_tag'} : $self->{'_tag'}, |
|
1442
|
2587
|
|
|
|
|
15537
|
$pos->{'_tag'}, ").\n" |
|
1443
|
|
|
|
|
|
|
if DEBUG > 1; |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
2587
|
|
|
|
|
17650
|
$pos->push_content($text); |
|
1446
|
|
|
|
|
|
|
} |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
2587
|
50
|
|
|
|
43748
|
&{ $self->{'_tweak_~text'} || return }( $text, $pos, |
|
1449
|
2587
|
|
|
|
|
16509
|
$pos->{'_tag'} . '' ); |
|
1450
|
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
# Note that this is very exceptional -- it doesn't fall back to |
|
1452
|
|
|
|
|
|
|
# _tweak_*, and it gives its tweak different arguments. |
|
1453
|
0
|
|
|
|
|
0
|
return; |
|
1454
|
|
|
|
|
|
|
} |
|
1455
|
|
|
|
|
|
|
} |
|
1456
|
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
#========================================================================== |
|
1458
|
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
# TODO: test whether comment(), declaration(), and process(), do the right |
|
1460
|
|
|
|
|
|
|
# thing as far as tightening and whatnot. |
|
1461
|
|
|
|
|
|
|
# Also, currently, doctypes and comments that appear before head or body |
|
1462
|
|
|
|
|
|
|
# show up in the tree in the wrong place. Something should be done about |
|
1463
|
|
|
|
|
|
|
# this. Tricky. Maybe this whole business of pre-making the body and |
|
1464
|
|
|
|
|
|
|
# whatnot is wrong. |
|
1465
|
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
sub comment { |
|
1467
|
195
|
50
|
|
195
|
1
|
1948
|
return if $_[0]{'_stunted'}; |
|
1468
|
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
# Accept a "here's a comment" signal from HTML::Parser. |
|
1470
|
|
|
|
|
|
|
|
|
1471
|
195
|
|
|
|
|
414
|
my ( $self, $text ) = @_; |
|
1472
|
195
|
|
66
|
|
|
583
|
my $pos = $self->{'_pos'} || $self; |
|
1473
|
|
|
|
|
|
|
return |
|
1474
|
|
|
|
|
|
|
unless $self->{'_store_comments'} |
|
1475
|
195
|
100
|
66
|
|
|
1207
|
|| $HTML::Tagset::isCDATA_Parent{ $pos->{'_tag'} }; |
|
1476
|
|
|
|
|
|
|
|
|
1477
|
1
|
|
|
|
|
1
|
if (DEBUG) { |
|
1478
|
|
|
|
|
|
|
my @lineage_tags = $pos->lineage_tag_names; |
|
1479
|
|
|
|
|
|
|
my $indent = ' ' x ( 1 + @lineage_tags ); |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
my $nugget |
|
1482
|
|
|
|
|
|
|
= ( length($text) <= 25 ) |
|
1483
|
|
|
|
|
|
|
? $text |
|
1484
|
|
|
|
|
|
|
: ( substr( $text, 0, 25 ) . '...' ); |
|
1485
|
|
|
|
|
|
|
$nugget =~ s<([\x00-\x1F])> |
|
1486
|
|
|
|
|
|
|
<'\\x'.(unpack("H2",$1))>eg; |
|
1487
|
|
|
|
|
|
|
print $indent, "Proposing a Comment ($nugget) under ", |
|
1488
|
|
|
|
|
|
|
join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) ) || 'Root', |
|
1489
|
|
|
|
|
|
|
".\n"; |
|
1490
|
|
|
|
|
|
|
} |
|
1491
|
|
|
|
|
|
|
|
|
1492
|
1
|
|
|
|
|
4
|
( my $e = $self->element_class->new('~comment') )->{'text'} = $text; |
|
1493
|
1
|
|
|
|
|
4
|
$pos->push_content($e); |
|
1494
|
1
|
|
|
|
|
2
|
++( $self->{'_element_count'} ); |
|
1495
|
|
|
|
|
|
|
|
|
1496
|
1
|
|
|
|
|
4
|
&{ $self->{'_tweak_~comment'} |
|
1497
|
1
|
50
|
33
|
|
|
7
|
|| $self->{'_tweak_*'} |
|
1498
|
|
|
|
|
|
|
|| return $e }( map $_, $e, '~comment', $self ); |
|
1499
|
|
|
|
|
|
|
|
|
1500
|
0
|
|
|
|
|
0
|
return $e; |
|
1501
|
|
|
|
|
|
|
} |
|
1502
|
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
sub declaration { |
|
1504
|
17
|
50
|
|
17
|
1
|
1158
|
return if $_[0]{'_stunted'}; |
|
1505
|
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
# Accept a "here's a markup declaration" signal from HTML::Parser. |
|
1507
|
|
|
|
|
|
|
|
|
1508
|
17
|
|
|
|
|
77
|
my ( $self, $text ) = @_; |
|
1509
|
17
|
|
33
|
|
|
80
|
my $pos = $self->{'_pos'} || $self; |
|
1510
|
|
|
|
|
|
|
|
|
1511
|
17
|
|
|
|
|
28
|
if (DEBUG) { |
|
1512
|
|
|
|
|
|
|
my @lineage_tags = $pos->lineage_tag_names; |
|
1513
|
|
|
|
|
|
|
my $indent = ' ' x ( 1 + @lineage_tags ); |
|
1514
|
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
my $nugget |
|
1516
|
|
|
|
|
|
|
= ( length($text) <= 25 ) |
|
1517
|
|
|
|
|
|
|
? $text |
|
1518
|
|
|
|
|
|
|
: ( substr( $text, 0, 25 ) . '...' ); |
|
1519
|
|
|
|
|
|
|
$nugget =~ s<([\x00-\x1F])> |
|
1520
|
|
|
|
|
|
|
<'\\x'.(unpack("H2",$1))>eg; |
|
1521
|
|
|
|
|
|
|
print $indent, "Proposing a Declaration ($nugget) under ", |
|
1522
|
|
|
|
|
|
|
join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) ) || 'Root', |
|
1523
|
|
|
|
|
|
|
".\n"; |
|
1524
|
|
|
|
|
|
|
} |
|
1525
|
17
|
|
|
|
|
47
|
( my $e = $self->element_class->new('~declaration') )->{'text'} = $text; |
|
1526
|
|
|
|
|
|
|
|
|
1527
|
17
|
|
|
|
|
39
|
$self->{_decl} = $e; |
|
1528
|
17
|
|
|
|
|
106
|
return $e; |
|
1529
|
|
|
|
|
|
|
} |
|
1530
|
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
#========================================================================== |
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
sub process { |
|
1534
|
2
|
50
|
|
2
|
1
|
138
|
return if $_[0]{'_stunted'}; |
|
1535
|
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
# Accept a "here's a PI" signal from HTML::Parser. |
|
1537
|
|
|
|
|
|
|
|
|
1538
|
2
|
50
|
|
|
|
17
|
return unless $_[0]->{'_store_pis'}; |
|
1539
|
0
|
|
|
|
|
0
|
my ( $self, $text ) = @_; |
|
1540
|
0
|
|
0
|
|
|
0
|
my $pos = $self->{'_pos'} || $self; |
|
1541
|
|
|
|
|
|
|
|
|
1542
|
0
|
|
|
|
|
0
|
if (DEBUG) { |
|
1543
|
|
|
|
|
|
|
my @lineage_tags = $pos->lineage_tag_names; |
|
1544
|
|
|
|
|
|
|
my $indent = ' ' x ( 1 + @lineage_tags ); |
|
1545
|
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
my $nugget |
|
1547
|
|
|
|
|
|
|
= ( length($text) <= 25 ) |
|
1548
|
|
|
|
|
|
|
? $text |
|
1549
|
|
|
|
|
|
|
: ( substr( $text, 0, 25 ) . '...' ); |
|
1550
|
|
|
|
|
|
|
$nugget =~ s<([\x00-\x1F])> |
|
1551
|
|
|
|
|
|
|
<'\\x'.(unpack("H2",$1))>eg; |
|
1552
|
|
|
|
|
|
|
print $indent, "Proposing a PI ($nugget) under ", |
|
1553
|
|
|
|
|
|
|
join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) ) || 'Root', |
|
1554
|
|
|
|
|
|
|
".\n"; |
|
1555
|
|
|
|
|
|
|
} |
|
1556
|
0
|
|
|
|
|
0
|
( my $e = $self->element_class->new('~pi') )->{'text'} = $text; |
|
1557
|
0
|
|
|
|
|
0
|
$pos->push_content($e); |
|
1558
|
0
|
|
|
|
|
0
|
++( $self->{'_element_count'} ); |
|
1559
|
|
|
|
|
|
|
|
|
1560
|
0
|
0
|
0
|
|
|
0
|
&{ $self->{'_tweak_~pi'} || $self->{'_tweak_*'} || return $e }( map $_, |
|
|
0
|
|
|
|
|
0
|
|
|
1561
|
|
|
|
|
|
|
$e, '~pi', $self ); |
|
1562
|
|
|
|
|
|
|
|
|
1563
|
0
|
|
|
|
|
0
|
return $e; |
|
1564
|
|
|
|
|
|
|
} |
|
1565
|
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
#========================================================================== |
|
1567
|
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
#When you call $tree->parse_file($filename), and the |
|
1569
|
|
|
|
|
|
|
#tree's ignore_ignorable_whitespace attribute is on (as it is |
|
1570
|
|
|
|
|
|
|
#by default), HTML::TreeBuilder's logic will manage to avoid |
|
1571
|
|
|
|
|
|
|
#creating some, but not all, nodes that represent ignorable |
|
1572
|
|
|
|
|
|
|
#whitespace. However, at the end of its parse, it traverses the |
|
1573
|
|
|
|
|
|
|
#tree and deletes any that it missed. (It does this with an |
|
1574
|
|
|
|
|
|
|
#around-method around HTML::Parser's eof method.) |
|
1575
|
|
|
|
|
|
|
# |
|
1576
|
|
|
|
|
|
|
#However, with $tree->parse($content), the cleanup-traversal step |
|
1577
|
|
|
|
|
|
|
#doesn't happen automatically -- so when you're done parsing all |
|
1578
|
|
|
|
|
|
|
#content for a document (regardless of whether $content is the only |
|
1579
|
|
|
|
|
|
|
#bit, or whether it's just another chunk of content you're parsing into |
|
1580
|
|
|
|
|
|
|
#the tree), call $tree->eof() to signal that you're at the end of the |
|
1581
|
|
|
|
|
|
|
#text you're inputting to the tree. Besides properly cleaning any bits |
|
1582
|
|
|
|
|
|
|
#of ignorable whitespace from the tree, this will also ensure that |
|
1583
|
|
|
|
|
|
|
#HTML::Parser's internal buffer is flushed. |
|
1584
|
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
sub eof { |
|
1586
|
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
# Accept an "end-of-file" signal from HTML::Parser, or thrown by the user. |
|
1588
|
|
|
|
|
|
|
|
|
1589
|
373
|
50
|
|
373
|
1
|
24207
|
return if $_[0]->{'_done'}; # we've already been here |
|
1590
|
|
|
|
|
|
|
|
|
1591
|
373
|
50
|
|
|
|
11906
|
return $_[0]->SUPER::eof() if $_[0]->{'_stunted'}; |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
373
|
|
|
|
|
11746
|
my $x = $_[0]; |
|
1594
|
373
|
|
|
|
|
11587
|
print "EOF received.\n" if DEBUG; |
|
1595
|
373
|
|
|
|
|
11560
|
my (@rv); |
|
1596
|
373
|
50
|
|
|
|
11901
|
if (wantarray) { |
|
1597
|
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
# I don't think this makes any difference for this particular |
|
1599
|
|
|
|
|
|
|
# method, but let's be scrupulous, for once. |
|
1600
|
0
|
|
|
|
|
0
|
@rv = $x->SUPER::eof(); |
|
1601
|
|
|
|
|
|
|
} |
|
1602
|
|
|
|
|
|
|
else { |
|
1603
|
373
|
|
|
|
|
18397
|
$rv[0] = $x->SUPER::eof(); |
|
1604
|
|
|
|
|
|
|
} |
|
1605
|
|
|
|
|
|
|
|
|
1606
|
373
|
100
|
66
|
|
|
13467
|
$x->end('html') unless $x eq ( $x->{'_pos'} || $x ); |
|
1607
|
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
# That SHOULD close everything, and will run the appropriate tweaks. |
|
1609
|
|
|
|
|
|
|
# We /could/ be running under some insane mode such that there's more |
|
1610
|
|
|
|
|
|
|
# than one HTML element, but really, that's just insane to do anyhow. |
|
1611
|
|
|
|
|
|
|
|
|
1612
|
373
|
100
|
|
|
|
12124
|
unless ( $x->{'_implicit_tags'} ) { |
|
1613
|
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
# delete those silly implicit head and body in case we put |
|
1615
|
|
|
|
|
|
|
# them there in implicit tags mode |
|
1616
|
9
|
|
|
|
|
22
|
foreach my $node ( $x->{'_head'}, $x->{'_body'} ) { |
|
1617
|
|
|
|
|
|
|
$node->replace_with_content |
|
1618
|
|
|
|
|
|
|
if defined $node |
|
1619
|
|
|
|
|
|
|
and ref $node |
|
1620
|
|
|
|
|
|
|
and $node->{'_implicit'} |
|
1621
|
18
|
50
|
33
|
|
|
167
|
and $node->{'_parent'}; |
|
|
|
|
33
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
# I think they should be empty anyhow, since the only |
|
1624
|
|
|
|
|
|
|
# logic that'd insert under them can apply only, I think, |
|
1625
|
|
|
|
|
|
|
# in the case where _implicit_tags is on |
|
1626
|
|
|
|
|
|
|
} |
|
1627
|
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
# this may still leave an implicit 'html' at the top, but there's |
|
1629
|
|
|
|
|
|
|
# nothing we can do about that, is there? |
|
1630
|
|
|
|
|
|
|
} |
|
1631
|
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
$x->delete_ignorable_whitespace() |
|
1633
|
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
# this's why we trap this -- an after-method |
|
1635
|
373
|
100
|
66
|
|
|
13757
|
if $x->{'_tighten'} and !$x->{'_ignore_text'}; |
|
1636
|
373
|
|
|
|
|
11808
|
$x->{'_done'} = 1; |
|
1637
|
|
|
|
|
|
|
|
|
1638
|
373
|
50
|
|
|
|
11946
|
return @rv if wantarray; |
|
1639
|
373
|
|
|
|
|
23805
|
return $rv[0]; |
|
1640
|
|
|
|
|
|
|
} |
|
1641
|
|
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
|
#========================================================================== |
|
1643
|
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
# TODO: document |
|
1645
|
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
sub stunt { |
|
1647
|
0
|
|
|
0
|
1
|
0
|
my $self = $_[0]; |
|
1648
|
0
|
|
|
|
|
0
|
print "Stunting the tree.\n" if DEBUG; |
|
1649
|
0
|
|
|
|
|
0
|
$self->{'_done'} = 1; |
|
1650
|
|
|
|
|
|
|
|
|
1651
|
0
|
0
|
|
|
|
0
|
if ( $HTML::Parser::VERSION < 3 ) { |
|
1652
|
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
#This is a MEAN MEAN HACK. And it works most of the time! |
|
1654
|
0
|
|
|
|
|
0
|
$self->{'_buf'} = ''; |
|
1655
|
0
|
|
|
|
|
0
|
my $fh = *HTML::Parser::F{IO}; |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
# the local'd FH used by parse_file loop |
|
1658
|
0
|
0
|
|
|
|
0
|
if ( defined $fh ) { |
|
1659
|
0
|
|
|
|
|
0
|
print "Closing Parser's filehandle $fh\n" if DEBUG; |
|
1660
|
0
|
|
|
|
|
0
|
close($fh); |
|
1661
|
|
|
|
|
|
|
} |
|
1662
|
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
# But if they called $tree->parse_file($filehandle) |
|
1664
|
|
|
|
|
|
|
# or $tree->parse_file(*IO), then there will be no *HTML::Parser::F{IO} |
|
1665
|
|
|
|
|
|
|
# to close. Ahwell. Not a problem for most users these days. |
|
1666
|
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
} |
|
1668
|
|
|
|
|
|
|
else { |
|
1669
|
0
|
|
|
|
|
0
|
$self->SUPER::eof(); |
|
1670
|
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
# Under 3+ versions, calling eof from inside a parse will abort the |
|
1672
|
|
|
|
|
|
|
# parse / parse_file |
|
1673
|
|
|
|
|
|
|
} |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
# In the off chance that the above didn't work, we'll throw |
|
1676
|
|
|
|
|
|
|
# this flag to make any future events be no-ops. |
|
1677
|
0
|
|
|
|
|
0
|
$self->stunted(1); |
|
1678
|
0
|
|
|
|
|
0
|
return; |
|
1679
|
|
|
|
|
|
|
} |
|
1680
|
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
# TODO: document |
|
1682
|
0
|
|
|
0
|
1
|
0
|
sub stunted { shift->_elem( '_stunted', @_ ); } |
|
1683
|
0
|
|
|
0
|
1
|
0
|
sub done { shift->_elem( '_done', @_ ); } |
|
1684
|
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
#========================================================================== |
|
1686
|
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
sub delete { |
|
1688
|
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
# Override Element's delete method. |
|
1690
|
|
|
|
|
|
|
# This does most, if not all, of what Element's delete does anyway. |
|
1691
|
|
|
|
|
|
|
# Deletes content, including content in some special attributes. |
|
1692
|
|
|
|
|
|
|
# But doesn't empty out the hash. |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
270
|
|
|
270
|
1
|
90470
|
$_[0]->{'_element_count'} = 1; # never hurts to be scrupulously correct |
|
1695
|
|
|
|
|
|
|
|
|
1696
|
270
|
|
|
|
|
4095
|
delete @{ $_[0] }{ '_body', '_head', '_pos' }; |
|
|
270
|
|
|
|
|
8212
|
|
|
1697
|
270
|
|
|
|
|
4053
|
for ( |
|
1698
|
270
|
50
|
|
|
|
8288
|
@{ delete( $_[0]->{'_content'} ) || [] }, # all/any content |
|
1699
|
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
# delete @{$_[0]}{'_body', '_head', '_pos'} |
|
1701
|
|
|
|
|
|
|
# ...and these, in case these elements don't appear in the |
|
1702
|
|
|
|
|
|
|
# content, which is possible. If they did appear (as they |
|
1703
|
|
|
|
|
|
|
# usually do), then calling $_->delete on them again is harmless. |
|
1704
|
|
|
|
|
|
|
# I don't think that's such a hot idea now. Thru creative reattachment, |
|
1705
|
|
|
|
|
|
|
# those could actually now point to elements in OTHER trees (which we do |
|
1706
|
|
|
|
|
|
|
# NOT want to delete!). |
|
1707
|
|
|
|
|
|
|
## Reasoned out: |
|
1708
|
|
|
|
|
|
|
# If these point to elements not in the content list of any element in this |
|
1709
|
|
|
|
|
|
|
# tree, but not in the content list of any element in any OTHER tree, then |
|
1710
|
|
|
|
|
|
|
# just deleting these will make their refcounts hit zero. |
|
1711
|
|
|
|
|
|
|
# If these point to elements in the content lists of elements in THIS tree, |
|
1712
|
|
|
|
|
|
|
# then we'll get to deleting them when we delete from the top. |
|
1713
|
|
|
|
|
|
|
# If these point to elements in the content lists of elements in SOME OTHER |
|
1714
|
|
|
|
|
|
|
# tree, then they're not to be deleted. |
|
1715
|
|
|
|
|
|
|
) |
|
1716
|
|
|
|
|
|
|
{ |
|
1717
|
540
|
50
|
33
|
|
|
7840
|
$_->delete |
|
|
|
|
33
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
if defined $_ and ref $_ # Make sure it's an object. |
|
1719
|
|
|
|
|
|
|
and $_ ne $_[0]; # And avoid hitting myself, just in case! |
|
1720
|
|
|
|
|
|
|
} |
|
1721
|
|
|
|
|
|
|
|
|
1722
|
270
|
0
|
33
|
|
|
4545
|
$_[0]->detach if $_[0]->{'_parent'} and $_[0]->{'_parent'}{'_content'}; |
|
1723
|
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
# An 'html' element having a parent is quite unlikely. |
|
1725
|
|
|
|
|
|
|
|
|
1726
|
270
|
|
|
|
|
17306
|
return; |
|
1727
|
|
|
|
|
|
|
} |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
sub tighten_up { # legacy |
|
1730
|
0
|
|
|
0
|
1
|
0
|
shift->delete_ignorable_whitespace(@_); |
|
1731
|
|
|
|
|
|
|
} |
|
1732
|
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
sub elementify { |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
# Rebless this object down into the normal element class. |
|
1736
|
27
|
|
|
27
|
1
|
4295
|
my $self = $_[0]; |
|
1737
|
27
|
|
|
|
|
4337
|
my $to_class = $self->element_class; |
|
1738
|
27
|
|
|
|
|
8874
|
delete @{$self}{ |
|
1739
|
|
|
|
|
|
|
grep { |
|
1740
|
27
|
|
|
|
|
4412
|
; |
|
1741
|
621
|
100
|
33
|
|
|
206618
|
length $_ and substr( $_, 0, 1 ) eq '_' |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
|
|
1743
|
|
|
|
|
|
|
# The private attributes that we'll retain: |
|
1744
|
|
|
|
|
|
|
and $_ ne '_tag' |
|
1745
|
|
|
|
|
|
|
and $_ ne '_parent' |
|
1746
|
|
|
|
|
|
|
and $_ ne '_content' |
|
1747
|
|
|
|
|
|
|
and $_ ne '_implicit' |
|
1748
|
|
|
|
|
|
|
and $_ ne '_pos' |
|
1749
|
|
|
|
|
|
|
and $_ ne '_element_class' |
|
1750
|
|
|
|
|
|
|
and $_ ne '_encoding' |
|
1751
|
|
|
|
|
|
|
} keys %$self |
|
1752
|
|
|
|
|
|
|
}; |
|
1753
|
27
|
|
|
|
|
8696
|
bless $self, $to_class; # Returns the same object we were fed |
|
1754
|
|
|
|
|
|
|
} |
|
1755
|
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
sub element_class { |
|
1757
|
4637
|
100
|
|
4637
|
1
|
67216
|
return 'HTML::Element' if not ref $_[0]; |
|
1758
|
4258
|
|
50
|
|
|
89971
|
return $_[0]->{_element_class} || 'HTML::Element'; |
|
1759
|
|
|
|
|
|
|
} |
|
1760
|
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
|
1762
|
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
sub guts { |
|
1764
|
0
|
|
|
0
|
1
|
|
my @out; |
|
1765
|
0
|
|
|
|
|
|
my @stack = ( $_[0] ); |
|
1766
|
0
|
|
|
|
|
|
my $destructive = $_[1]; |
|
1767
|
0
|
|
|
|
|
|
my $this; |
|
1768
|
0
|
|
|
|
|
|
while (@stack) { |
|
1769
|
0
|
|
|
|
|
|
$this = shift @stack; |
|
1770
|
0
|
0
|
|
|
|
|
if ( !ref $this ) { |
|
|
|
0
|
|
|
|
|
|
|
1771
|
0
|
|
|
|
|
|
push @out, $this; # yes, it can include text nodes |
|
1772
|
|
|
|
|
|
|
} |
|
1773
|
|
|
|
|
|
|
elsif ( !$this->{'_implicit'} ) { |
|
1774
|
0
|
|
|
|
|
|
push @out, $this; |
|
1775
|
0
|
0
|
|
|
|
|
delete $this->{'_parent'} if $destructive; |
|
1776
|
|
|
|
|
|
|
} |
|
1777
|
|
|
|
|
|
|
else { |
|
1778
|
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
# it's an implicit node. Delete it and recurse |
|
1780
|
0
|
0
|
|
|
|
|
delete $this->{'_parent'} if $destructive; |
|
1781
|
|
|
|
|
|
|
unshift @stack, |
|
1782
|
|
|
|
|
|
|
@{ |
|
1783
|
0
|
|
|
|
|
|
( $destructive |
|
1784
|
|
|
|
|
|
|
? delete( $this->{'_content'} ) |
|
1785
|
0
|
0
|
|
|
|
|
: $this->{'_content'} |
|
|
|
0
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
) |
|
1787
|
|
|
|
|
|
|
|| [] |
|
1788
|
|
|
|
|
|
|
}; |
|
1789
|
|
|
|
|
|
|
} |
|
1790
|
|
|
|
|
|
|
} |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
# Doesn't call a real $root->delete on the (when implicit) root, |
|
1793
|
|
|
|
|
|
|
# but I don't think it needs to. |
|
1794
|
|
|
|
|
|
|
|
|
1795
|
0
|
0
|
|
|
|
|
return @out if wantarray; # one simple normal case. |
|
1796
|
0
|
0
|
|
|
|
|
return unless @out; |
|
1797
|
0
|
0
|
0
|
|
|
|
return $out[0] if @out == 1 and ref( $out[0] ); |
|
1798
|
0
|
|
|
|
|
|
my $x = HTML::Element->new( 'div', '_implicit' => 1 ); |
|
1799
|
0
|
|
|
|
|
|
$x->push_content(@out); |
|
1800
|
0
|
|
|
|
|
|
return $x; |
|
1801
|
|
|
|
|
|
|
} |
|
1802
|
|
|
|
|
|
|
|
|
1803
|
0
|
|
|
0
|
1
|
|
sub disembowel { $_[0]->guts(1) } |
|
1804
|
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
|
1806
|
|
|
|
|
|
|
1; |
|
1807
|
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
__END__ |