|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::TreeBuilder;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: Parser that builds a HTML syntax tree  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
659682
 | 
 use warnings;  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
508
 | 
    | 
| 
6
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
74
 | 
 use strict;  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
263
 | 
    | 
| 
7
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
4434
 | 
 use integer;    # vroom vroom!  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
191
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
    | 
| 
8
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
404
 | 
 use Carp ();  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1879
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '5.07'; # VERSION from OurPkgVersion  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #---------------------------------------------------------------------------  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Make a 'DEBUG' constant...  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $DEBUG; # Must be set BEFORE loading this file  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BEGIN {  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # We used to have things like  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  print $indent, "lalala" if $Debug;  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # But there were an awful lot of having to evaluate $Debug's value.  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # If we make that depend on a constant, like so:  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #   sub DEBUG () { 1 } # or whatever value.  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #   ...  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #   print $indent, "lalala" if DEBUG;  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Which at compile-time (thru the miracle of constant folding) turns into:  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #   print $indent, "lalala";  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # or, if DEBUG is a constant with a true value, then that print statement  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # is simply optimized away, and doesn't appear in the target code at all.  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # If you don't believe me, run:  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #    perl -MO=Deparse,-uHTML::TreeBuilder -e 'BEGIN { \  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      $HTML::TreeBuilder::DEBUG = 4}  use HTML::TreeBuilder'  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # and see for yourself (substituting whatever value you want for $DEBUG  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # there).  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## no critic  | 
| 
35
 | 
16
 | 
  
 50
  
 | 
 
 | 
  
16
  
 | 
 
 | 
173
 | 
     if ( defined &DEBUG ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Already been defined!  Do nothing.  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ( $] < 5.00404 ) {  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Grudgingly accomodate ancient (pre-constant) versions.  | 
| 
42
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         eval 'sub DEBUG { $Debug } ';  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ( !$DEBUG ) {  | 
| 
45
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
792
 | 
         eval 'sub DEBUG () {0}';    # Make it a constant.  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ( $DEBUG =~ m<^\d+$>s ) {  | 
| 
48
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         eval 'sub DEBUG () { ' . $DEBUG . ' }';    # Make THAT a constant.  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {                                         # WTF?  | 
| 
51
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         warn "Non-numeric value \"$DEBUG\" in \$HTML::Element::DEBUG";  | 
| 
52
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         eval 'sub DEBUG () { $DEBUG }';            # I guess.  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## use critic  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #---------------------------------------------------------------------------  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
4434
 | 
 use HTML::Entities ();  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70682
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
564
 | 
    | 
| 
60
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
4408
 | 
 use HTML::Tagset 3.02 ();  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16051
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
421
 | 
    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
9353
 | 
 use HTML::Element ();  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
495
 | 
    | 
| 
63
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
103
 | 
 use HTML::Parser 3.46 ();  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
374
 | 
    | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
86598
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @ISA = qw(HTML::Element HTML::Parser);  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This looks schizoid, I know.  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # It's not that we ARE an element AND a parser.  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # We ARE an element, but one that knows how to handle signals  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  (method calls) from Parser in order to elaborate its subtree.  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Legacy aliases:  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isKnown             = \%HTML::Tagset::isKnown;  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::canTighten          = \%HTML::Tagset::canTighten;  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isHeadElement       = \%HTML::Tagset::isHeadElement;  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isBodyElement       = \%HTML::Tagset::isBodyElement;  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isPhraseMarkup      = \%HTML::Tagset::isPhraseMarkup;  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isHeadOrBodyElement = \%HTML::Tagset::isHeadOrBodyElement;  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isList              = \%HTML::Tagset::isList;  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isTableElement      = \%HTML::Tagset::isTableElement;  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::isFormElement       = \%HTML::Tagset::isFormElement;  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 *HTML::TreeBuilder::p_closure_barriers  = \@HTML::Tagset::p_closure_barriers;  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Two little shortcut constructors:  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new_from_file {    # or from a FH  | 
| 
87
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
8
 | 
     my $class = shift;  | 
| 
88
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     Carp::croak("new_from_file takes only one argument")  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless @_ == 1;  | 
| 
90
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     Carp::croak("new_from_file is a class method only")  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ref $class;  | 
| 
92
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my $new = $class->new();  | 
| 
93
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     defined $new->parse_file( $_[0] )  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or Carp::croak("unable to parse file: $!");  | 
| 
95
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     return $new;  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new_from_content {    # from any number of scalars  | 
| 
99
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
  
1
  
 | 
2005
 | 
     my $class = shift;  | 
| 
100
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     Carp::croak("new_from_content is a class method only")  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ref $class;  | 
| 
102
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     my $new = $class->new();  | 
| 
103
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     foreach my $whunk (@_) {  | 
| 
104
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         if ( ref($whunk) eq 'SCALAR' ) {  | 
| 
105
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             $new->parse($$whunk);  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
108
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
             $new->parse($whunk);  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
110
 | 
9
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
60
 | 
         last if $new->{'_stunted'};    # might as well check that.  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
112
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     $new->eof();  | 
| 
113
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     return $new;  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new_from_url {                     # should accept anything that LWP does.  | 
| 
117
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
14
 | 
     undef our $lwp_response;  | 
| 
118
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my $class = shift;  | 
| 
119
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     Carp::croak("new_from_url takes only one argument")  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unless @_ == 1;  | 
| 
121
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     Carp::croak("new_from_url is a class method only")  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ref $class;  | 
| 
123
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my $url = shift;  | 
| 
124
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     my $new = $class->new();  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     require LWP::UserAgent;  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # RECOMMEND PREREQ: LWP::UserAgent 5.815  | 
| 
128
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
79
 | 
     LWP::UserAgent->VERSION( 5.815 ); # HTTP::Headers content_is_html method  | 
| 
129
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
     $lwp_response = LWP::UserAgent->new->get( $url );  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
33516
 | 
     Carp::croak("GET failed on $url: " . $lwp_response->status_line)  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           unless $lwp_response->is_success;  | 
| 
133
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     Carp::croak("$url returned " . $lwp_response->content_type . " not HTML")  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           unless $lwp_response->content_is_html;  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
     $new->parse( $lwp_response->decoded_content );  | 
| 
137
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     $new->eof;  | 
| 
138
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     undef $lwp_response;        # Processed successfully  | 
| 
139
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     return $new;  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # TODO: document more fully?  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse_content {    # from any number of scalars  | 
| 
144
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
34
 | 
     my $tree = shift;  | 
| 
145
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my $retval;  | 
| 
146
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     foreach my $whunk (@_) {  | 
| 
147
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
         if ( ref($whunk) eq 'SCALAR' ) {  | 
| 
148
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             $retval = $tree->parse($$whunk);  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
151
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97
 | 
             $retval = $tree->parse($whunk);  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
153
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
         last if $tree->{'_stunted'};    # might as well check that.  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
155
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $tree->eof();  | 
| 
156
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     return $retval;  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #---------------------------------------------------------------------------  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {                               # constructor!  | 
| 
162
 | 
291
 | 
 
 | 
 
 | 
  
291
  
 | 
  
1
  
 | 
91675
 | 
     my $class = shift;  | 
| 
163
 | 
291
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
1028
 | 
     $class = ref($class) || $class;  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Initialize HTML::Element part  | 
| 
166
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
641
 | 
     my $self = $class->element_class->new('html');  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # A hack for certain strange versions of Parser:  | 
| 
171
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
390
 | 
         my $other_self = HTML::Parser->new();  | 
| 
 
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
801
 | 
    | 
| 
172
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12538
 | 
         %$self = ( %$self, %$other_self );    # copy fields  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # Yes, multiple inheritance is messy.  Kids, don't try this at home.  | 
| 
174
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
904
 | 
         bless $other_self, "HTML::TreeBuilder::_hideyhole";  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # whack it out of the HTML::Parser class, to avoid the destructor  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # The root of the tree is special, as it has these funny attributes,  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # and gets reblessed into this class.  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Initialize parser settings  | 
| 
183
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
518
 | 
     $self->{'_implicit_tags'}       = 1;  | 
| 
184
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
414
 | 
     $self->{'_implicit_body_p_tag'} = 0;  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # If true, trying to insert text, or any of %isPhraseMarkup right  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  under 'body' will implicate a 'p'.  If false, will just go there.  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
189
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
390
 | 
     $self->{'_tighten'} = 1;  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # whether ignorable WS in this tree should be deleted  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
384
 | 
     $self->{'_implicit'} = 1; # to delete, once we find a real open-"html" tag  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
195
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
660
 | 
     $self->{'_ignore_unknown'}      = 1;  | 
| 
196
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
404
 | 
     $self->{'_ignore_text'}         = 0;  | 
| 
197
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
377
 | 
     $self->{'_warn'}                = 0;  | 
| 
198
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
441
 | 
     $self->{'_no_space_compacting'} = 0;  | 
| 
199
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
404
 | 
     $self->{'_store_comments'}      = 0;  | 
| 
200
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
394
 | 
     $self->{'_store_declarations'}  = 1;  | 
| 
201
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
408
 | 
     $self->{'_store_pis'}           = 0;  | 
| 
202
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
407
 | 
     $self->{'_p_strict'}            = 0;  | 
| 
203
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
585
 | 
     $self->{'_no_expand_entities'}  = 0;  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Parse attributes passed in as arguments  | 
| 
206
 | 
291
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
555
 | 
     if (@_) {  | 
| 
207
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         my %attr = @_;  | 
| 
208
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
         for ( keys %attr ) {  | 
| 
209
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
             $self->{"_$_"} = $attr{$_};  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
418
 | 
     $HTML::Element::encoded_content = $self->{'_no_expand_entities'};  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # rebless to our class  | 
| 
216
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
408
 | 
     bless $self, $class;  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
218
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
424
 | 
     $self->{'_element_count'} = 1;  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # undocumented, informal, and maybe not exactly correct  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
706
 | 
     $self->{'_head'} = $self->insert_element( 'head', 1 );  | 
| 
223
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
460
 | 
     $self->{'_pos'}  = undef;                                # pull it back up  | 
| 
224
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
526
 | 
     $self->{'_body'} = $self->insert_element( 'body', 1 );  | 
| 
225
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
462
 | 
     $self->{'_pos'} = undef;    # pull it back up again  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
227
 | 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
547
 | 
     return $self;  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _elem                       # universal accessor...  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
234
 | 
21
 | 
 
 | 
 
 | 
  
21
  
 | 
 
 | 
40
 | 
     my ( $self, $elem, $val ) = @_;  | 
| 
235
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
     my $old = $self->{$elem};  | 
| 
236
 | 
21
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     $self->{$elem} = $val if defined $val;  | 
| 
237
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     return $old;  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # accessors....  | 
| 
241
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
36
 | 
 sub implicit_tags       { shift->_elem( '_implicit_tags',       @_ ); }  | 
| 
242
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
20
 | 
 sub implicit_body_p_tag { shift->_elem( '_implicit_body_p_tag', @_ ); }  | 
| 
243
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 sub p_strict            { shift->_elem( '_p_strict',            @_ ); }  | 
| 
244
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
17
 | 
 sub no_space_compacting { shift->_elem( '_no_space_compacting', @_ ); }  | 
| 
245
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 sub ignore_unknown      { shift->_elem( '_ignore_unknown',      @_ ); }  | 
| 
246
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 sub ignore_text         { shift->_elem( '_ignore_text',         @_ ); }  | 
| 
247
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
19
 | 
 sub ignore_ignorable_whitespace { shift->_elem( '_tighten',            @_ ); }  | 
| 
248
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
6
 | 
 sub store_comments              { shift->_elem( '_store_comments',     @_ ); }  | 
| 
249
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
373
 | 
 sub store_declarations          { shift->_elem( '_store_declarations', @_ ); }  | 
| 
250
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 sub store_pis                   { shift->_elem( '_store_pis',          @_ ); }  | 
| 
251
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 sub warn                        { shift->_elem( '_warn',               @_ ); }  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub no_expand_entities {  | 
| 
254
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     shift->_elem( '_no_expand_entities', @_ );  | 
| 
255
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $HTML::Element::encoded_content = @_;  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub warning {  | 
| 
261
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1
 | 
     my $self = shift;  | 
| 
262
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     CORE::warn("HTML::Parse: $_[0]\n") if $self->{'_warn'};  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # should maybe say HTML::TreeBuilder instead  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # To avoid having to rebuild these lists constantly...  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $_Closed_by_structurals = [qw(p h1 h2 h3 h4 h5 h6 pre textarea)];  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $indent;  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub start {  | 
| 
276
 | 
1005
 | 
  
 50
  
 | 
 
 | 
  
1005
  
 | 
  
1
  
 | 
3190
 | 
         return if $_[0]{'_stunted'};  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Accept a signal from HTML::Parser for start-tags.  | 
| 
279
 | 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1859
 | 
         my ( $self, $tag, $attr ) = @_;  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Parser passes more, actually:  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #   $self->start($tag, $attr, $attrseq, $origtext)  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # But we can merrily ignore $attrseq and $origtext.  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
1005
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1803
 | 
         if ( $tag eq 'x-html' ) {  | 
| 
286
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             print "Ignoring open-x-html tag.\n" if DEBUG;  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # inserted by some lame code-generators.  | 
| 
289
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return;    # bypass tweaking.  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1628
 | 
         $tag =~ s{/$}{}s;    # So  turns into .  Silently forgive.  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
294
 | 
1005
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2913
 | 
         unless ( $tag =~ m/^[-_a-zA-Z0-9:%]+$/s ) {  | 
| 
295
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             DEBUG and print "Start-tag name $tag is no good.  Skipping.\n";  | 
| 
296
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return;  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # This avoids having Element's new() throw an exception.  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
1005
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
2342
 | 
         my $ptag = ( my $pos = $self->{'_pos'} || $self )->{'_tag'};  | 
| 
302
 | 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1244
 | 
         my $already_inserted;  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #my($indent);  | 
| 
305
 | 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1087
 | 
         if (DEBUG) {  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # optimization -- don't figure out indenting unless we're in debug mode  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my @lineage = $pos->lineage;  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $indent = '  ' x ( 1 + @lineage );  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print $indent, "Proposing a new \U$tag\E under ",  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 join( '/', map $_->{'_tag'}, reverse( $pos, @lineage ) )  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 || 'Root',  | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ".\n";  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #} else {  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  $indent = ' ';  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #print $indent, "POS: $pos ($ptag)\n" if DEBUG > 2;  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # $attr = {%$attr};  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
322
 | 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2362
 | 
         foreach my $k ( keys %$attr ) {  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Make sure some stooge doesn't have "".  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # That happens every few million Web pages.  | 
| 
326
 | 
503
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
1531
 | 
             $attr->{ ' ' . $k } = delete $attr->{$k}  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 if length $k and substr( $k, 0, 1 ) eq '_';  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Looks bad, but is fine for round-tripping.  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
332
 | 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1836
 | 
         my $e = $self->element_class->new( $tag, %$attr );  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Make a new element object.  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # (Only rarely do we end up just throwing it away later in this call.)  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # Some prep -- custom messiness for those damned tables, and strict P's.  | 
| 
338
 | 
1005
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2006
 | 
         if ( $self->{'_implicit_tags'} ) {    # wallawallawalla!  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
983
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1880
 | 
             unless ( $HTML::TreeBuilder::isTableElement{$tag} ) {  | 
| 
341
 | 
829
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1673
 | 
                 if ( $ptag eq 'table' ) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
342
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     print $indent,  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * Phrasal \U$tag\E right under TABLE makes implicit TR and TD\n"  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
345
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $self->insert_element( 'tr', 1 );  | 
| 
346
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $pos = $self->insert_element( 'td', 1 )  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ;                     # yes, needs updating  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ( $ptag eq 'tr' ) {  | 
| 
350
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     print $indent,  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * Phrasal \U$tag\E right under TR makes an implicit TD\n"  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
353
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $pos = $self->insert_element( 'td', 1 )  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ;                     # yes, needs updating  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
356
 | 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1151
 | 
                 $ptag = $pos->{'_tag'};       # yes, needs updating  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # end of table-implication block.  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Now maybe do a little dance to enforce P-strictness.  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # This seems like it should be integrated with the big  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # "ALL HOPE..." block, further below, but that doesn't  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # seem feasable.  | 
| 
365
 | 
983
 | 
  
  0
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
1793
 | 
             if (    $self->{'_p_strict'}  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $HTML::TreeBuilder::isKnown{$tag}  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and not $HTML::Tagset::is_Possible_Strict_P_Content{$tag} )  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
369
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 my $here     = $pos;  | 
| 
370
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 my $here_tag = $ptag;  | 
| 
371
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 while (1) {  | 
| 
372
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     if ( $here_tag eq 'p' ) {  | 
| 
373
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         print $indent, " * Inserting $tag closes strict P.\n"  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 1;  | 
| 
375
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $self->end( \q{p} );  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # NB: same as \'q', but less confusing to emacs cperl-mode  | 
| 
378
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         last;  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     #print("Lasting from $here_tag\n"),  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     last  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if $HTML::TreeBuilder::isKnown{$here_tag}  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             and  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             not $HTML::Tagset::is_Possible_Strict_P_Content{  | 
| 
386
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                                 $here_tag};  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                # Don't keep looking up the tree if we see something that can't  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                #  be strict-P content.  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $here_tag  | 
| 
392
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                         = ( $here = $here->{'_parent'} || last )->{'_tag'};  | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }    # end while  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $ptag = ( $pos = $self->{'_pos'} || $self )  | 
| 
395
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                     ->{'_tag'};    # better update!  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # end of strict-p block.  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # And now, get busy...  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
403
 | 
1005
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2778
 | 
         if ( !$self->{'_implicit_tags'} ) {    # bimskalabim  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                # do nothing  | 
| 
405
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
             print $indent, " * _implicit_tags is off.  doing nothing\n"  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 if DEBUG > 1;  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $HTML::TreeBuilder::isHeadOrBodyElement{$tag} ) {  | 
| 
411
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             if ( $pos->is_inside('body') ) {    # all is well  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
412
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent,  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " * ambilocal element \U$tag\E is fine under BODY.\n"  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $pos->is_inside('head') ) {  | 
| 
417
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                 print $indent,  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " * ambilocal element \U$tag\E is fine under HEAD.\n"  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # In neither head nor body!  mmmmm... put under head?  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
425
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 if ( $ptag eq 'html' ) {    # expected case  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      # TODO?? : would there ever be a case where _head would be  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      #  absent from a tree that would ever be accessed at this  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      #  point?  | 
| 
429
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     die "Where'd my head go?" unless ref $self->{'_head'};  | 
| 
430
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     if ( $self->{'_head'}{'_implicit'} ) {  | 
| 
431
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         print $indent,  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             " * ambilocal element \U$tag\E makes an implicit HEAD.\n"  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 1;  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # or rather, points us at it.  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $self->{'_pos'}  | 
| 
437
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                             = $self->{'_head'};    # to insert under...  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
440
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $self->warning(  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             "Ambilocal element <$tag> not under HEAD or BODY!?"  | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         );  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # Put it under HEAD by default, I guess  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $self->{'_pos'}  | 
| 
446
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                             = $self->{'_head'};    # to insert under...  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              # Neither under head nor body, nor right under html... pass thru?  | 
| 
453
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $self->warning(  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         "Ambilocal element <$tag> neither under head nor body, nor right under html!?"  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $HTML::TreeBuilder::isBodyElement{$tag} ) {  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Ensure that we are within   | 
| 
464
 | 
799
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2113
 | 
             if ( $ptag eq 'body' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # We're good.  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif (  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $HTML::TreeBuilder::isBodyElement{$ptag}    # glarg  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and not $HTML::TreeBuilder::isHeadOrBodyElement{$ptag}  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               # Special case: Save ourselves a call to is_inside further down.  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               # If our $ptag is an isBodyElement element (but not an  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               # isHeadOrBodyElement element), then we must be under body!  | 
| 
477
 | 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
251
 | 
                 print $indent, " * Inferring that $ptag is under BODY.\n",  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 3;  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # I think this and the test for 'body' trap everything  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # bodyworthy, except the case where the parent element is  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # under an unknown element that's a descendant of body.  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $pos->is_inside('head') ) {  | 
| 
485
 | 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
204
 | 
                 print $indent,  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " * body-element \U$tag\E minimizes HEAD, makes implicit BODY.\n"  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $ptag = (  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->{'_pos'}  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         = $self->{'_body'}    # yes, needs updating  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         || die "Where'd my body go?"  | 
| 
492
 | 
164
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
394
 | 
                 )->{'_tag'};                  # yes, needs updating  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( !$pos->is_inside('body') ) {  | 
| 
495
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
90
 | 
                 print $indent,  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " * body-element \U$tag\E makes implicit BODY.\n"  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $ptag = (  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->{'_pos'}  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         = $self->{'_body'}    # yes, needs updating  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         || die "Where'd my body go?"  | 
| 
502
 | 
77
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
197
 | 
                 )->{'_tag'};                  # yes, needs updating  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # else we ARE under body, so okay.  | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Handle implicit endings and insert based on  and position  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # ... ALL HOPE ABANDON ALL YE WHO ENTER HERE ...  | 
| 
509
 | 
799
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
7840
 | 
             if (   $tag eq 'p'  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $tag eq 'h1'  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $tag eq 'h2'  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $tag eq 'h3'  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $tag eq 'h4'  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $tag eq 'h5'  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $tag eq 'h6'  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $tag eq 'form'  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Hm, should  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Can't have  ,  or   | 
| 
523
 | 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
483
 | 
                 $self->end(  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $_Closed_by_structurals,  | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     @HTML::TreeBuilder::p_closure_barriers  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # used to be just li!  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $tag eq 'ol' or $tag eq 'ul' or $tag eq 'dl' ) {  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Can't have lists inside  -- in the unlikely  | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  event anyone tries to put them there!  | 
| 
535
 | 
9
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
79
 | 
                 if (   $ptag eq 'h1'  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or $ptag eq 'h2'  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or $ptag eq 'h3'  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or $ptag eq 'h4'  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or $ptag eq 'h5'  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or $ptag eq 'h6' )  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
542
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $self->end( \$ptag );  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # TODO: Maybe keep closing up the tree until  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  the ptag isn't any of the above?  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # But anyone that says 
 | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  deserves what they get anyway.  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $tag eq 'li' ) {    # list item  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # Get under a list tag, one way or another  | 
| 
553
 | 
12
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
32
 | 
                 unless (  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     exists $HTML::TreeBuilder::isList{$ptag}  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or $self->end( \q{*}, keys %HTML::TreeBuilder::isList ) #'  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
558
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     print $indent,  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * inserting implicit UL for lack of containing ",  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         join( '|', keys %HTML::TreeBuilder::isList ), ".\n"  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
562
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $self->insert_element( 'ul', 1 );  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $tag eq 'dt' or $tag eq 'dd' ) {  | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Get under a DL, one way or another  | 
| 
569
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                 unless ( $ptag eq 'dl' or $self->end( \q{*}, 'dl' ) ) {    #'  | 
| 
570
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     print $indent,  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * inserting implicit DL for lack of containing DL.\n"  | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
573
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $self->insert_element( 'dl', 1 );  | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $HTML::TreeBuilder::isFormElement{$tag} ) {  | 
| 
578
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
6
 | 
                 if ($self->{  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         '_ignore_formies_outside_form'}  # TODO: document this  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     and not $pos->is_inside('form')  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
583
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     print $indent,  | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * ignoring \U$tag\E because not in a FORM.\n"  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
586
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     return;                              # bypass tweaking.  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
588
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 if ( $tag eq 'option' ) {  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # return unless $ptag eq 'select';  | 
| 
591
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $self->end( \q{option} );  | 
| 
592
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                     $ptag = ( $self->{'_pos'} || $self )->{'_tag'};  | 
| 
593
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                     unless ( $ptag eq 'select' or $ptag eq 'optgroup' ) {  | 
| 
594
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         print $indent,  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             " * \U$tag\E makes an implicit SELECT.\n"  | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 1;  | 
| 
597
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $pos = $self->insert_element( 'select', 1 );  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # but not a very useful select -- has no 'name' attribute!  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # is $pos's value used after this?  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $HTML::TreeBuilder::isTableElement{$tag} ) {  | 
| 
605
 | 
154
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
340
 | 
                 if ( !$pos->is_inside('table') ) {  | 
| 
606
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                     print $indent, " * \U$tag\E makes an implicit TABLE\n"  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
608
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                     $self->insert_element( 'table', 1 );  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
611
 | 
154
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
394
 | 
                 if ( $tag eq 'td' or $tag eq 'th' ) {  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # Get under a tr one way or another  | 
| 
614
 | 
96
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
238
 | 
                     unless (  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $ptag eq 'tr'    # either under a tr  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         or $self->end( \q{*}, 'tr',  | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             'table' )    #or we can get under one  | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     {  | 
| 
620
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                         print $indent,  | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             " * \U$tag\E under \U$ptag\E makes an implicit TR\n"  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 1;  | 
| 
623
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                         $self->insert_element( 'tr', 1 );  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # presumably pos's value isn't used after this.  | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
629
 | 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
137
 | 
                     $self->end( \$tag, 'table' );    #'  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Hmm, I guess this is right.  To work it out:  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   tr closes any open tr (limited at a table)  | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   thead closes any open thead (limited at a table)  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   tbody closes any open tbody (limited at a table)  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   tfoot closes any open tfoot (limited at a table)  | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   colgroup closes any open colgroup (limited at a table)  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   col can try, but will always fail, at the enclosing table,  | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     as col is empty, and therefore never open!  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # But!  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   td closes any open td OR th (limited at a table)  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   th closes any open th OR td (limited at a table)  | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   ...implementable as "close to a tr, or make a tr"  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( $HTML::TreeBuilder::isPhraseMarkup{$tag} ) {  | 
| 
647
 | 
347
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1012
 | 
                 if ( $ptag eq 'body' and $self->{'_implicit_body_p_tag'} ) {  | 
| 
648
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
                     print  | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * Phrasal \U$tag\E right under BODY makes an implicit P\n"  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
651
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                     $pos = $self->insert_element( 'p', 1 );  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # is $pos's value used after this?  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # End of implicit endings logic  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # End of "elsif ($HTML::TreeBuilder::isBodyElement{$tag}"  | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $HTML::TreeBuilder::isHeadElement{$tag} ) {  | 
| 
664
 | 
167
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
328
 | 
             if ( $pos->is_inside('body') ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
665
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
                 print $indent, " * head element \U$tag\E found inside BODY!\n"  | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG;  | 
| 
667
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                 $self->warning("Header element <$tag> in body");    # [sic]  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ( !$pos->is_inside('head') ) {  | 
| 
670
 | 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
176
 | 
                 print $indent,  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " * head element \U$tag\E makes an implicit HEAD.\n"  | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
675
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 print $indent,  | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " * head element \U$tag\E goes inside existing HEAD.\n"  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
679
 | 
167
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
381
 | 
             $self->{'_pos'} = $self->{'_head'} || die "Where'd my head go?";  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $tag eq 'html' ) {  | 
| 
684
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
             if ( delete $self->{'_implicit'} ) {    # first time here  | 
| 
685
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 print $indent, " * good! found the real HTML element!\n"  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
689
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent, " * Found a second HTML element\n"  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG;  | 
| 
691
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $self->warning("Found a nested  element");  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # in either case, migrate attributes to the real element  | 
| 
695
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             for ( keys %$attr ) {  | 
| 
696
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $self->attr( $_, $attr->{$_} );  | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
698
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             $self->{'_pos'} = undef;  | 
| 
699
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
             return $self;    # bypass tweaking.  | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $tag eq 'head' ) {  | 
| 
704
 | 
6
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
60
 | 
             my $head = $self->{'_head'} || die "Where'd my head go?";  | 
| 
705
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
             if ( delete $head->{'_implicit'} ) {    # first time here  | 
| 
706
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 print $indent, " * good! found the real HEAD element!\n"  | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {                                  # been here before  | 
| 
710
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent, " * Found a second HEAD element\n"  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG;  | 
| 
712
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $self->warning("Found a second  element");  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # in either case, migrate attributes to the real element  | 
| 
716
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
             for ( keys %$attr ) {  | 
| 
717
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $head->attr( $_, $attr->{$_} );  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
719
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
             return $self->{'_pos'} = $head;         # bypass tweaking.  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $tag eq 'body' ) {  | 
| 
724
 | 
6
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
20
 | 
             my $body = $self->{'_body'} || die "Where'd my body go?";  | 
| 
725
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
             if ( delete $body->{'_implicit'} ) {    # first time here  | 
| 
726
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                 print $indent, " * good! found the real BODY element!\n"  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {                                  # been here before  | 
| 
730
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent, " * Found a second BODY element\n"  | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG;  | 
| 
732
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $self->warning("Found a second  element");  | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # in either case, migrate attributes to the real element  | 
| 
736
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
             for ( keys %$attr ) {  | 
| 
737
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $body->attr( $_, $attr->{$_} );  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
739
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
             return $self->{'_pos'} = $body;         # bypass tweaking.  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $tag eq 'frameset' ) {  | 
| 
744
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             if (!( $self->{'_frameset_seen'}++ )    # first frameset seen  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and !$self->{'_noframes_seen'}  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # otherwise it'll be under the noframes already  | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and !$self->is_inside('body')  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # The following is a bit of a hack.  We don't use the normal  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  insert_element because 1) we don't want it as _pos, but instead  | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  right under $self, and 2), more importantly, that we don't want  | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  this inserted at the /end/ of $self's content_list, but instead  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  in the middle of it, specifically right before the body element.  | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  | 
| 
758
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                 my $c    = $self->{'_content'} || die "Contentless root?";  | 
| 
759
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                 my $body = $self->{'_body'}    || die "Where'd my BODY go?";  | 
| 
760
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 for ( my $i = 0; $i < @$c; ++$i ) {  | 
| 
761
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     if ( $c->[$i] eq $body ) {  | 
| 
762
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         splice( @$c, $i, 0, $self->{'_pos'} = $pos = $e );  | 
| 
763
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         HTML::Element::_weaken($e->{'_parent'} = $self);  | 
| 
764
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $already_inserted = 1;  | 
| 
765
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         print $indent,  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             " * inserting 'frameset' right before BODY.\n"  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 1;  | 
| 
768
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         last;  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
771
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 die "BODY not found in children of root?"  | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     unless $already_inserted;  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $tag eq 'frame' ) {  | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Okay, fine, pass thru.  | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Should probably enforce that these should be under a frameset.  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # But hey.  Ditto for enforcing that 'noframes' should be under  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # a 'frameset', as the DTDs say.  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( $tag eq 'noframes' ) {  | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # This basically assumes there'll be exactly one 'noframes' element  | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  per document.  At least, only the first one gets to have the  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  body under it.  And if there are no noframes elements, then  | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  the body pretty much stays where it is.  Is that ever a problem?  | 
| 
790
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ( $self->{'_noframes_seen'}++ ) {  | 
| 
791
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent, " * ANOTHER noframes element?\n" if DEBUG;  | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
794
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 if ( $pos->is_inside('body') ) {  | 
| 
795
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     print $indent, " * 'noframes' inside 'body'.  Odd!\n"  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG;  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                # In that odd case, we /can't/ make body a child of 'noframes',  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                # because it's an ancestor of the 'noframes'!  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
802
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
                     $e->push_content( $self->{'_body'}  | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             || die "Where'd my body go?" );  | 
| 
804
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     print $indent, " * Moving body to be under noframes.\n"  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG;  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # unknown tag  | 
| 
814
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ( $self->{'_ignore_unknown'} ) {  | 
| 
815
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent, " * Ignoring unknown tag \U$tag\E\n" if DEBUG;  | 
| 
816
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $self->warning("Skipping unknown tag $tag");  | 
| 
817
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 return;  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
820
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent, " * Accepting unknown tag \U$tag\E\n"  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG;  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #----------------------------------------------------------------------  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # End of mumbo-jumbo  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print $indent, "(Attaching ", $e->{'_tag'}, " under ",  | 
| 
829
 | 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1178
 | 
             ( $self->{'_pos'} || $self )->{'_tag'}, ")\n"  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # because if _pos isn't defined, it goes under self  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if DEBUG;  | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # The following if-clause is to delete /some/ ignorable whitespace  | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #  nodes, as we're making the tree.  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # This'd be a node we'd catch later anyway, but we might as well  | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #  nip it in the bud now.  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # This doesn't catch /all/ deletable WS-nodes, so we do have to call  | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #  the tightener later to catch the rest.  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
841
 | 
989
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2862
 | 
         if ( $self->{'_tighten'} and !$self->{'_ignore_text'} )  | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {    # if tightenable  | 
| 
843
 | 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1221
 | 
             my ( $sibs, $par );  | 
| 
844
 | 
980
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
5251
 | 
             if (( $sibs = ( $par = $self->{'_pos'} || $self )->{'_content'} )  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and @$sibs            # parent already has content  | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and !  | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ref( $sibs->[-1] )    # and the last one there is a text node  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $sibs->[-1] !~ m<[^\n\r\f\t ]>s  # and it's all whitespace  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and (    # one of these has to be eligible...  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $HTML::TreeBuilder::canTighten{$tag}  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or (( @$sibs == 1 )  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ?    # WS is leftmost -- so parent matters  | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $HTML::TreeBuilder::canTighten{ $par->{'_tag'} }  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         :    # WS is after another node -- it matters  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         (   ref $sibs->[-2]  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 and  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $HTML::TreeBuilder::canTighten{ $sibs->[-2]  | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                     {'_tag'} }  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and !$par->is_inside( 'pre', 'xmp', 'textarea', 'plaintext' )  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # we're clear  | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
869
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
                 pop @$sibs;  | 
| 
870
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
                 print $indent, "Popping a preceding all-WS node\n" if DEBUG;  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
874
 | 
989
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2927
 | 
         $self->insert_element($e) unless $already_inserted;  | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
876
 | 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1048
 | 
         if (DEBUG) {  | 
| 
877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if ( $self->{'_pos'} ) {  | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 print $indent, "(Current lineage of pos:  \U$tag\E under ",  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     join(  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     '/',  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     reverse(  | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # $self->{'_pos'}{'_tag'},  # don't list myself!  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $self->{'_pos'}->lineage_tag_names  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ),  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ".)\n";  | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 print $indent, "(Pos points nowhere!?)\n";  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
894
 | 
989
 | 
  
100
  
 | 
  
 50
  
 | 
 
 | 
 
 | 
2694
 | 
         unless ( ( $self->{'_pos'} || '' ) eq $e ) {  | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # if it's an empty element -- i.e., if it didn't change the _pos  | 
| 
897
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
             &{         $self->{"_tweak_$tag"}  | 
| 
898
 | 
15
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
143
 | 
                     || $self->{'_tweak_*'}  | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     || return $e }( map $_, $e, $tag, $self )  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;    # make a list so the user can't clobber  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
903
 | 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5540
 | 
         return $e;  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $indent;  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub end {  | 
| 
913
 | 
1400
 | 
  
 50
  
 | 
 
 | 
  
1400
  
 | 
  
1
  
 | 
2838
 | 
         return if $_[0]{'_stunted'};  | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # Either: Acccept an end-tag signal from HTML::Parser  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # Or: Method for closing currently open elements in some fairly complex  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #  way, as used by other methods in this class.  | 
| 
918
 | 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3004
 | 
         my ( $self, $tag, @stop ) = @_;  | 
| 
919
 | 
1400
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2416
 | 
         if ( $tag eq 'x-html' ) {  | 
| 
920
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             print "Ignoring close-x-html tag.\n" if DEBUG;  | 
| 
921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # inserted by some lame code-generators.  | 
| 
923
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return;  | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
926
 | 
1400
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
4818
 | 
         unless ( ref($tag) or $tag =~ m/^[-_a-zA-Z0-9:%]+$/s ) {  | 
| 
927
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             DEBUG and print "End-tag name $tag is no good.  Skipping.\n";  | 
| 
928
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return;  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # This avoids having Element's new() throw an exception.  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        # This method accepts two calling formats:  | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #  1) from Parser:  $self->end('tag_name', 'origtext')  | 
| 
935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #        in which case we shouldn't mistake origtext as a blocker tag  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #  2) from myself:  $self->end(\q{tagname1}, 'blk1', ... )  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        #     from myself:  $self->end(['tagname1', 'tagname2'], 'blk1',  ... )  | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # End the specified tag, but don't move above any of the blocker tags.  | 
| 
940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # The tag can also be a reference to an array.  Terminate the first  | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # tag found.  | 
| 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
943
 | 
1400
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
2886
 | 
         my $ptag = ( my $p = $self->{'_pos'} || $self )->{'_tag'};  | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # $p and $ptag are sort-of stratch  | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
947
 | 
1400
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2055
 | 
         if ( ref($tag) ) {  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # First param is a ref of one sort or another --  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  THE CALL IS COMING FROM INSIDE THE HOUSE!  | 
| 
951
 | 
336
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
606
 | 
             $tag = $$tag if ref($tag) eq 'SCALAR';  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # otherwise it's an arrayref.  | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # the call came from Parser -- just ignore origtext  | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # except in a table ignore unmatched table tags RT #59980  | 
| 
959
 | 
1064
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2274
 | 
             @stop = $tag =~ /^t[hdr]\z/ ? 'table' : ();  | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #my($indent);  | 
| 
963
 | 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1518
 | 
         if (DEBUG) {  | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # optimization -- don't figure out depth unless we're in debug mode  | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my @lineage_tags = $p->lineage_tag_names;  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $indent = '  ' x ( 1 + @lineage_tags );  | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # now announce ourselves  | 
| 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print $indent, "Ending ",  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ref($tag) ? ( '[', join( ' ', @$tag ), ']' ) : "\U$tag\E",  | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 scalar(@stop)  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ? ( " no higher than [", join( ' ', @stop ), "]" )  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : (), ".\n";  | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print $indent, " (Current lineage: ", join( '/', @lineage_tags ),  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ".)\n"  | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 if DEBUG > 1;  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if ( DEBUG > 3 ) {  | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #my(  | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # $package, $filename, $line, $subroutine,  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # $hasargs, $wantarray, $evaltext, $is_require) = caller;  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 print $indent,  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " (Called from ", ( caller(1) )[3], ' line ',  | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ( caller(1) )[2],  | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ")\n";  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #} else {  | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  $indent = ' ';  | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # End of if DEBUG  | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Now actually do it  | 
| 
998
 | 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1533
 | 
         my @to_close;  | 
| 
999
 | 
1400
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2428
 | 
         if ( $tag eq '*' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Special -- close everything up to (but not including) the first  | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #  limiting tag, or return if none found.  Somewhat of a special case.  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         PARENT:  | 
| 
1004
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
             while ( defined $p ) {  | 
| 
1005
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
102
 | 
                 $ptag = $p->{'_tag'};  | 
| 
1006
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
                 print $indent, " (Looking at $ptag.)\n" if DEBUG > 2;  | 
| 
1007
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
95
 | 
                 for (@stop) {  | 
| 
1008
 | 
113
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
223
 | 
                     if ( $ptag eq $_ ) {  | 
| 
1009
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
                         print $indent,  | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             " (Hit a $_; closing everything up to here.)\n"  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 2;  | 
| 
1012
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
                         last PARENT;  | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1015
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
                 push @to_close, $p;  | 
| 
1016
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
                 $p = $p->{'_parent'};    # no match so far? keep moving up  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 print $indent,  | 
| 
1018
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
                     " (Moving on up to ", $p ? $p->{'_tag'} : 'nil', ")\n"  | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     if DEBUG > 1;  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1021
 | 
35
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
57
 | 
             unless ( defined $p ) { # We never found what we were looking for.  | 
| 
1022
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 print $indent, " (We never found a limit.)\n" if DEBUG > 1;  | 
| 
1023
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 return;  | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #print  | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #   $indent,  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #   " (To close: ", join('/', map $_->tag, @to_close), ".)\n"  | 
| 
1029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  if DEBUG > 4;  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Otherwise update pos and fall thru.  | 
| 
1032
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
             $self->{'_pos'} = $p;  | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( ref $tag ) {  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # Close the first of any of the matching tags, giving up if you hit  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            #  any of the stop-tags.  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         PARENT:  | 
| 
1039
 | 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
405
 | 
             while ( defined $p ) {  | 
| 
1040
 | 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
579
 | 
                 $ptag = $p->{'_tag'};  | 
| 
1041
 | 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
765
 | 
                 print $indent, " (Looking at $ptag.)\n" if DEBUG > 2;  | 
| 
1042
 | 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
678
 | 
                 for (@$tag) {  | 
| 
1043
 | 
3983
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5464
 | 
                     if ( $ptag eq $_ ) {  | 
| 
1044
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
                         print $indent, " (Closing $_.)\n" if DEBUG > 2;  | 
| 
1045
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
                         last PARENT;  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1048
 | 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
543
 | 
                 for (@stop) {  | 
| 
1049
 | 
6600
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8987
 | 
                     if ( $ptag eq $_ ) {  | 
| 
1050
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         print $indent,  | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             " (Hit a limiting $_ -- bailing out.)\n"  | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 1;  | 
| 
1053
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         return;    # so it was all for naught  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1056
 | 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
561
 | 
                 push @to_close, $p;  | 
| 
1057
 | 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
742
 | 
                 $p = $p->{'_parent'};  | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1059
 | 
243
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
698
 | 
             return unless defined $p;    # We went off the top of the tree.  | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                # Otherwise specified element was found; set pos to its parent.  | 
| 
1061
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
             push @to_close, $p;  | 
| 
1062
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
             $self->{'_pos'} = $p->{'_parent'};  | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Close the first of the specified tag, giving up if you hit  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  any of the stop-tags.  | 
| 
1068
 | 
1122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1883
 | 
             while ( defined $p ) {  | 
| 
1069
 | 
1511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1962
 | 
                 $ptag = $p->{'_tag'};  | 
| 
1070
 | 
1511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1561
 | 
                 print $indent, " (Looking at $ptag.)\n" if DEBUG > 2;  | 
| 
1071
 | 
1511
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2661
 | 
                 if ( $ptag eq $tag ) {  | 
| 
1072
 | 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1109
 | 
                     print $indent, " (Closing $tag.)\n" if DEBUG > 2;  | 
| 
1073
 | 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1340
 | 
                     last;  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1075
 | 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
583
 | 
                 for (@stop) {  | 
| 
1076
 | 
58
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
121
 | 
                     if ( $ptag eq $_ ) {  | 
| 
1077
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
                         print $indent,  | 
| 
1078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             " (Hit a limiting $_ -- bailing out.)\n"  | 
| 
1079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             if DEBUG > 1;  | 
| 
1080
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
69
 | 
                         return;    # so it was all for naught  | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1083
 | 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
511
 | 
                 push @to_close, $p;  | 
| 
1084
 | 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
640
 | 
                 $p = $p->{'_parent'};  | 
| 
1085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1086
 | 
1093
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1610
 | 
             return unless defined $p;    # We went off the top of the tree.  | 
| 
1087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                # Otherwise specified element was found; set pos to its parent.  | 
| 
1088
 | 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1384
 | 
             push @to_close, $p;  | 
| 
1089
 | 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1527
 | 
             $self->{'_pos'} = $p->{'_parent'};  | 
| 
1090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1092
 | 
1150
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
3218
 | 
         $self->{'_pos'} = undef if $self eq ( $self->{'_pos'} || '' );  | 
| 
1093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print $indent, "(Pos now points to ",  | 
| 
1094
 | 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1244
 | 
             $self->{'_pos'} ? $self->{'_pos'}{'_tag'} : '???', ".)\n"  | 
| 
1095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if DEBUG > 1;  | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ### EXPENSIVE, because has to check that it's not under a pre  | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ### or a CDATA-parent.  That's one more method call per end()!  | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ### Might as well just do this at the end of the tree-parse, I guess,  | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ### at which point we'd be parsing top-down, and just not traversing  | 
| 
1101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ### under pre's or CDATA-parents.  | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ##  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## Take this opportunity to nix any terminal whitespace nodes.  | 
| 
1104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## TODO: consider whether this (plus the logic in start(), above)  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## would ever leave any WS nodes in the tree.  | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## If not, then there's no reason to have eof() call  | 
| 
1107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ## delete_ignorable_whitespace on the tree, is there?  | 
| 
1108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ##  | 
| 
1109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #if(@to_close and $self->{'_tighten'} and !$self->{'_ignore_text'} and  | 
| 
1110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  ! $to_close[-1]->is_inside('pre', keys %HTML::Tagset::isCDATA_Parent)  | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #) {  # if tightenable  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  my($children, $e_tag);  | 
| 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  foreach my $e (reverse @to_close) { # going top-down  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #    last if 'pre' eq ($e_tag = $e->{'_tag'}) or  | 
| 
1115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #     $HTML::Tagset::isCDATA_Parent{$e_tag};  | 
| 
1116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  | 
| 
1117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #    if(  | 
| 
1118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      $children = $e->{'_content'}  | 
| 
1119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      and @$children      # has children  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      and !ref($children->[-1])  | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      and $children->[-1] =~ m<^\s+$>s # last node is all-WS  | 
| 
1122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      and  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #        (  | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #         # has a tightable parent:  | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #         $HTML::TreeBuilder::canTighten{ $e_tag }  | 
| 
1126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #         or  | 
| 
1127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #          ( # has a tightenable left sibling:  | 
| 
1128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #            @$children > 1 and  | 
| 
1129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #            ref($children->[-2])  | 
| 
1130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #            and $HTML::TreeBuilder::canTighten{ $children->[-2]{'_tag'} }  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #          )  | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #        )  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #    ) {  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      pop @$children;  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      #print $indent, "Popping a terminal WS node from ", $e->{'_tag'},  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #      #  " (", $e->address, ") while exiting.\n" if DEBUG;  | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #    }  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  }  | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #}  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1141
 | 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1469
 | 
         foreach my $e (@to_close) {  | 
| 
1142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Call the applicable callback, if any  | 
| 
1144
 | 
1535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1920
 | 
             $ptag = $e->{'_tag'};  | 
| 
1145
 | 
1535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3507
 | 
             &{         $self->{"_tweak_$ptag"}  | 
| 
1146
 | 
1535
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
5731
 | 
                     || $self->{'_tweak_*'}  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     || next }( map $_, $e, $ptag, $self );  | 
| 
1148
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             print $indent, "Back from tweaking.\n" if DEBUG;  | 
| 
1149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             last  | 
| 
1150
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 if $self->{ '_stunted'  | 
| 
1151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     };    # in case one of the handlers called stunt  | 
| 
1152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1153
 | 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3992
 | 
         return @to_close;  | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
1158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my ( $indent, $nugget );  | 
| 
1160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub text {  | 
| 
1162
 | 
2152
 | 
  
 50
  
 | 
 
 | 
  
2152
  
 | 
  
1
  
 | 
61184
 | 
         return if $_[0]{'_stunted'};  | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Accept a "here's a text token" signal from HTML::Parser.  | 
| 
1165
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3839
 | 
         my ( $self, $text, $is_cdata ) = @_;  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # the >3.0 versions of Parser may pass a cdata node.  | 
| 
1168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Thanks to Gisle Aas for pointing this out.  | 
| 
1169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1170
 | 
2152
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3648
 | 
         return unless length $text;    # I guess that's always right  | 
| 
1171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1172
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2710
 | 
         my $ignore_text         = $self->{'_ignore_text'};  | 
| 
1173
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2568
 | 
         my $no_space_compacting = $self->{'_no_space_compacting'};  | 
| 
1174
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2507
 | 
         my $no_expand_entities  = $self->{'_no_expand_entities'};  | 
| 
1175
 | 
2152
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
3752
 | 
         my $pos                 = $self->{'_pos'} || $self;  | 
| 
1176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         HTML::Entities::decode($text)  | 
| 
1178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             unless $ignore_text  | 
| 
1179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 || $is_cdata  | 
| 
1180
 | 
2152
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
13648
 | 
                 || $HTML::Tagset::isCDATA_Parent{ $pos->{'_tag'} }  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 || $no_expand_entities;  | 
| 
1182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #my($indent, $nugget);  | 
| 
1184
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2538
 | 
         if (DEBUG) {  | 
| 
1185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            # optimization -- don't figure out depth unless we're in debug mode  | 
| 
1187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my @lineage_tags = $pos->lineage_tag_names;  | 
| 
1188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $indent = '  ' x ( 1 + @lineage_tags );  | 
| 
1189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $nugget  | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 = ( length($text) <= 25 )  | 
| 
1192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ? $text  | 
| 
1193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : ( substr( $text, 0, 25 ) . '...' );  | 
| 
1194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $nugget =~ s<([\x00-\x1F])>  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  <'\\x'.(unpack("H2",$1))>eg;  | 
| 
1196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print $indent, "Proposing a new text node ($nugget) under ",  | 
| 
1197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) )  | 
| 
1198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 || 'Root',  | 
| 
1199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ".\n";  | 
| 
1200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #} else {  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  $indent = ' ';  | 
| 
1203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1204
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2294
 | 
    | 
| 
1205
 | 
2152
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
6325
 | 
         my $ptag;  | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($HTML::Tagset::isCDATA_Parent{ $ptag = $pos->{'_tag'} }  | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #or $pos->is_inside('pre')  | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             or $pos->is_inside( 'pre', 'textarea' )  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             )  | 
| 
1211
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         {  | 
| 
1212
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return if $ignore_text;  | 
| 
1213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $pos->push_content($text);  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # return unless $text =~ /\S/;  # This is sometimes wrong  | 
| 
1218
 | 
2152
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
11238
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if ( !$self->{'_implicit_tags'} || $text !~ /[^\n\r\f\t ]/ ) {  | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # don't change anything  | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1223
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             elsif ( $ptag eq 'head' or $ptag eq 'noframes' ) {  | 
| 
1224
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 if ( $self->{'_implicit_body_p_tag'} ) {  | 
| 
1225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     print $indent,  | 
| 
1226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * Text node under \U$ptag\E closes \U$ptag\E, implicates BODY and P.\n"  | 
| 
1227
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         if DEBUG > 1;  | 
| 
1228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $self->end( \$ptag );  | 
| 
1229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->{'_body'}  | 
| 
1230
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         ? ( $self->{'_pos'}  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             = $self->{'_body'} )    # expected case  | 
| 
1232
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         : $self->insert_element( 'body', 1 );  | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->insert_element( 'p', 1 );  | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1235
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 else {  | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     print $indent,  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * Text node under \U$ptag\E closes, implicates BODY.\n"  | 
| 
1238
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         if DEBUG > 1;  | 
| 
1239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $self->end( \$ptag );  | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->{'_body'}  | 
| 
1241
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         ? ( $self->{'_pos'}  | 
| 
1242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             = $self->{'_body'} )    # expected case  | 
| 
1243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         : $self->insert_element( 'body', 1 );  | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1246
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
48
 | 
             elsif ( $ptag eq 'html' ) {  | 
| 
1247
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                 if ( $self->{'_implicit_body_p_tag'} ) {  | 
| 
1248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     print $indent,  | 
| 
1249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * Text node under HTML implicates BODY and P.\n"  | 
| 
1250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
1251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->{'_body'}  | 
| 
1252
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                         ? ( $self->{'_pos'}  | 
| 
1253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             = $self->{'_body'} )    # expected case  | 
| 
1254
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                         : $self->insert_element( 'body', 1 );  | 
| 
1255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->insert_element( 'p', 1 );  | 
| 
1256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1257
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
                 else {  | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     print $indent,  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         " * Text node under HTML implicates BODY.\n"  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         if DEBUG > 1;  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->{'_body'}  | 
| 
1262
 | 
21
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
47
 | 
                         ? ( $self->{'_pos'}  | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             = $self->{'_body'} )    # expected case  | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         : $self->insert_element( 'body', 1 );  | 
| 
1265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     #print "POS is $pos, ", $pos->{'_tag'}, "\n";  | 
| 
1267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1269
 | 
370
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
687
 | 
             elsif ( $ptag eq 'body' ) {  | 
| 
1270
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 if ( $self->{'_implicit_body_p_tag'} ) {  | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     print $indent, " * Text node under BODY implicates P.\n"  | 
| 
1272
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         if DEBUG > 1;  | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $pos = $self->insert_element( 'p', 1 );  | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1276
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
             elsif ( $ptag eq 'table' ) {  | 
| 
1277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 print $indent,  | 
| 
1278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     " * Text node under TABLE implicates TR and TD.\n"  | 
| 
1279
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
                     if DEBUG > 1;  | 
| 
1280
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                 $self->insert_element( 'tr', 1 );  | 
| 
1281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $pos = $self->insert_element( 'td', 1 );  | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # double whammy!  | 
| 
1284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1285
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             elsif ( $ptag eq 'tr' ) {  | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 print $indent, " * Text node under TR implicates TD.\n"  | 
| 
1287
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                     if DEBUG > 1;  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $pos = $self->insert_element( 'td', 1 );  | 
| 
1289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # elsif (  | 
| 
1292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #       # $ptag eq 'li'   ||  | 
| 
1293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #       # $ptag eq 'dd'   ||  | 
| 
1294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #         $ptag eq 'form') {  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #    $pos = $self->insert_element('p', 1);  | 
| 
1296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #}  | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Whatever we've done above should have had the side  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # effect of updating $self->{'_pos'}  | 
| 
1300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #print "POS is now $pos, ", $pos->{'_tag'}, "\n";  | 
| 
1302
 | 
2152
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3344
 | 
    | 
| 
1303
 | 
2152
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7350
 | 
             return if $ignore_text;  | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $text =~ s/[\n\r\f\t ]+/ /g    # canonical space  | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 unless $no_space_compacting;  | 
| 
1306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print $indent, " (Attaching text node ($nugget) under ",  | 
| 
1308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1309
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2684
 | 
            # was: $self->{'_pos'} ? $self->{'_pos'}{'_tag'} : $self->{'_tag'},  | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $pos->{'_tag'}, ").\n"  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 if DEBUG > 1;  | 
| 
1312
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4067
 | 
    | 
| 
1313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $pos->push_content($text);  | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1315
 | 
2152
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11454
 | 
    | 
| 
1316
 | 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3781
 | 
         &{ $self->{'_tweak_~text'} || return }( $text, $pos,  | 
| 
1317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $pos->{'_tag'} . '' );  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Note that this is very exceptional -- it doesn't fall back to  | 
| 
1320
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         #  _tweak_*, and it gives its tweak different arguments.  | 
| 
1321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return;  | 
| 
1322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
1326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # TODO: test whether comment(), declaration(), and process(), do the right  | 
| 
1328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  thing as far as tightening and whatnot.  | 
| 
1329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Also, currently, doctypes and comments that appear before head or body  | 
| 
1330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  show up in the tree in the wrong place.  Something should be done about  | 
| 
1331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  this.  Tricky.  Maybe this whole business of pre-making the body and  | 
| 
1332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  whatnot is wrong.  | 
| 
1333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1334
 | 
188
 | 
  
 50
  
 | 
 
 | 
  
188
  
 | 
  
1
  
 | 
1468
 | 
 sub comment {  | 
| 
1335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return if $_[0]{'_stunted'};  | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Accept a "here's a comment" signal from HTML::Parser.  | 
| 
1338
 | 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
282
 | 
    | 
| 
1339
 | 
188
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
349
 | 
     my ( $self, $text ) = @_;  | 
| 
1340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $pos = $self->{'_pos'} || $self;  | 
| 
1341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return  | 
| 
1342
 | 
188
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
715
 | 
         unless $self->{'_store_comments'}  | 
| 
1343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             || $HTML::Tagset::isCDATA_Parent{ $pos->{'_tag'} };  | 
| 
1344
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
1345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (DEBUG) {  | 
| 
1346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @lineage_tags = $pos->lineage_tag_names;  | 
| 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $indent = '  ' x ( 1 + @lineage_tags );  | 
| 
1348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $nugget  | 
| 
1350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             = ( length($text) <= 25 )  | 
| 
1351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ? $text  | 
| 
1352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : ( substr( $text, 0, 25 ) . '...' );  | 
| 
1353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $nugget =~ s<([\x00-\x1F])>  | 
| 
1354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  <'\\x'.(unpack("H2",$1))>eg;  | 
| 
1355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print $indent, "Proposing a Comment ($nugget) under ",  | 
| 
1356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) ) || 'Root',  | 
| 
1357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ".\n";  | 
| 
1358
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     }  | 
| 
1359
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
1360
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     ( my $e = $self->element_class->new('~comment') )->{'text'} = $text;  | 
| 
1361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $pos->push_content($e);  | 
| 
1362
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
     ++( $self->{'_element_count'} );  | 
| 
1363
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
1364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &{         $self->{'_tweak_~comment'}  | 
| 
1365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             || $self->{'_tweak_*'}  | 
| 
1366
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             || return $e }( map $_, $e, '~comment', $self );  | 
| 
1367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $e;  | 
| 
1369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1370
 | 
2
 | 
  
 50
  
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
532
 | 
    | 
| 
1371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub declaration {  | 
| 
1372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return if $_[0]{'_stunted'};  | 
| 
1373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1374
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     # Accept a "here's a markup declaration" signal from HTML::Parser.  | 
| 
1375
 | 
2
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
1376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my ( $self, $text ) = @_;  | 
| 
1377
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     my $pos = $self->{'_pos'} || $self;  | 
| 
1378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (DEBUG) {  | 
| 
1380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @lineage_tags = $pos->lineage_tag_names;  | 
| 
1381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $indent = '  ' x ( 1 + @lineage_tags );  | 
| 
1382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $nugget  | 
| 
1384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             = ( length($text) <= 25 )  | 
| 
1385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ? $text  | 
| 
1386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : ( substr( $text, 0, 25 ) . '...' );  | 
| 
1387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $nugget =~ s<([\x00-\x1F])>  | 
| 
1388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  <'\\x'.(unpack("H2",$1))>eg;  | 
| 
1389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print $indent, "Proposing a Declaration ($nugget) under ",  | 
| 
1390
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) ) || 'Root',  | 
| 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ".\n";  | 
| 
1392
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     }  | 
| 
1393
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     ( my $e = $self->element_class->new('~declaration') )->{'text'} = $text;  | 
| 
1394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{_decl} = $e;  | 
| 
1396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $e;  | 
| 
1397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1399
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 #==========================================================================  | 
| 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub process {  | 
| 
1402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return if $_[0]{'_stunted'};  | 
| 
1403
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1404
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     # Accept a "here's a PI" signal from HTML::Parser.  | 
| 
1405
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return unless $_[0]->{'_store_pis'};  | 
| 
1407
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my ( $self, $text ) = @_;  | 
| 
1408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $pos = $self->{'_pos'} || $self;  | 
| 
1409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (DEBUG) {  | 
| 
1411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @lineage_tags = $pos->lineage_tag_names;  | 
| 
1412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $indent = '  ' x ( 1 + @lineage_tags );  | 
| 
1413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $nugget  | 
| 
1415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             = ( length($text) <= 25 )  | 
| 
1416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ? $text  | 
| 
1417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : ( substr( $text, 0, 25 ) . '...' );  | 
| 
1418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $nugget =~ s<([\x00-\x1F])>  | 
| 
1419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  <'\\x'.(unpack("H2",$1))>eg;  | 
| 
1420
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         print $indent, "Proposing a PI ($nugget) under ",  | 
| 
1421
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             join( '/', reverse( $pos->{'_tag'}, @lineage_tags ) ) || 'Root',  | 
| 
1422
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             ".\n";  | 
| 
1423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1424
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     ( my $e = $self->element_class->new('~pi') )->{'text'} = $text;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $pos->push_content($e);  | 
| 
1426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ++( $self->{'_element_count'} );  | 
| 
1427
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     &{ $self->{'_tweak_~pi'} || $self->{'_tweak_*'} || return $e }( map $_,  | 
| 
1429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $e, '~pi', $self );  | 
| 
1430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $e;  | 
| 
1432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
1435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #When you call $tree->parse_file($filename), and the  | 
| 
1437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #tree's ignore_ignorable_whitespace attribute is on (as it is  | 
| 
1438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #by default), HTML::TreeBuilder's logic will manage to avoid  | 
| 
1439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #creating some, but not all, nodes that represent ignorable  | 
| 
1440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #whitespace.  However, at the end of its parse, it traverses the  | 
| 
1441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #tree and deletes any that it missed.  (It does this with an  | 
| 
1442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #around-method around HTML::Parser's eof method.)  | 
| 
1443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
1444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #However, with $tree->parse($content), the cleanup-traversal step  | 
| 
1445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #doesn't happen automatically -- so when you're done parsing all  | 
| 
1446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #content for a document (regardless of whether $content is the only  | 
| 
1447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #bit, or whether it's just another chunk of content you're parsing into  | 
| 
1448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #the tree), call $tree->eof() to signal that you're at the end of the  | 
| 
1449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #text you're inputting to the tree.  Besides properly cleaning any bits  | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #of ignorable whitespace from the tree, this will also ensure that  | 
| 
1451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #HTML::Parser's internal buffer is flushed.  | 
| 
1452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1453
 | 
283
 | 
  
 50
  
 | 
 
 | 
  
283
  
 | 
  
1
  
 | 
1166
 | 
 sub eof {  | 
| 
1454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1455
 | 
283
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
491
 | 
     # Accept an "end-of-file" signal from HTML::Parser, or thrown by the user.  | 
| 
1456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1457
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
341
 | 
     return if $_[0]->{'_done'};    # we've already been here  | 
| 
1458
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
296
 | 
    | 
| 
1459
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
308
 | 
     return $_[0]->SUPER::eof() if $_[0]->{'_stunted'};  | 
| 
1460
 | 
283
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
468
 | 
    | 
| 
1461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $x = $_[0];  | 
| 
1462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print "EOF received.\n" if DEBUG;  | 
| 
1463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my (@rv);  | 
| 
1464
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if (wantarray) {  | 
| 
1465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # I don't think this makes any difference for this particular  | 
| 
1467
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1079
 | 
         #  method, but let's be scrupulous, for once.  | 
| 
1468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @rv = $x->SUPER::eof();  | 
| 
1469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1470
 | 
283
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
1152
 | 
     else {  | 
| 
1471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $rv[0] = $x->SUPER::eof();  | 
| 
1472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $x->end('html') unless $x eq ( $x->{'_pos'} || $x );  | 
| 
1475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1476
 | 
283
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
480
 | 
     # That SHOULD close everything, and will run the appropriate tweaks.  | 
| 
1477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # We /could/ be running under some insane mode such that there's more  | 
| 
1478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  than one HTML element, but really, that's just insane to do anyhow.  | 
| 
1479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1480
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     unless ( $x->{'_implicit_tags'} ) {  | 
| 
1481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # delete those silly implicit head and body in case we put  | 
| 
1483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # them there in implicit tags mode  | 
| 
1484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         foreach my $node ( $x->{'_head'}, $x->{'_body'} ) {  | 
| 
1485
 | 
10
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
65
 | 
             $node->replace_with_content  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 if defined $node  | 
| 
1487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     and ref $node  | 
| 
1488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     and $node->{'_implicit'}  | 
| 
1489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     and $node->{'_parent'};  | 
| 
1490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # I think they should be empty anyhow, since the only  | 
| 
1492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # logic that'd insert under them can apply only, I think,  | 
| 
1493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # in the case where _implicit_tags is on  | 
| 
1494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # this may still leave an implicit 'html' at the top, but there's  | 
| 
1497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # nothing we can do about that, is there?  | 
| 
1498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1499
 | 
283
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
1327
 | 
    | 
| 
1500
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
447
 | 
     $x->delete_ignorable_whitespace()  | 
| 
1501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1502
 | 
283
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
459
 | 
         # this's why we trap this -- an after-method  | 
| 
1503
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
620
 | 
         if $x->{'_tighten'} and !$x->{'_ignore_text'};  | 
| 
1504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $x->{'_done'} = 1;  | 
| 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return @rv if wantarray;  | 
| 
1507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $rv[0];  | 
| 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
1511
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
    | 
| 
1512
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 # TODO: document  | 
| 
1513
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub stunt {  | 
| 
1515
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $self = $_[0];  | 
| 
1516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print "Stunting the tree.\n" if DEBUG;  | 
| 
1517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{'_done'} = 1;  | 
| 
1518
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1519
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ( $HTML::Parser::VERSION < 3 ) {  | 
| 
1520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #This is a MEAN MEAN HACK.  And it works most of the time!  | 
| 
1522
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $self->{'_buf'} = '';  | 
| 
1523
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my $fh = *HTML::Parser::F{IO};  | 
| 
1524
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # the local'd FH used by parse_file loop  | 
| 
1526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ( defined $fh ) {  | 
| 
1527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print "Closing Parser's filehandle $fh\n" if DEBUG;  | 
| 
1528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             close($fh);  | 
| 
1529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       # But if they called $tree->parse_file($filehandle)  | 
| 
1532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       #  or $tree->parse_file(*IO), then there will be no *HTML::Parser::F{IO}  | 
| 
1533
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       #  to close.  Ahwell.  Not a problem for most users these days.  | 
| 
1534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->SUPER::eof();  | 
| 
1538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Under 3+ versions, calling eof from inside a parse will abort the  | 
| 
1540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #  parse / parse_file  | 
| 
1541
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     }  | 
| 
1542
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # In the off chance that the above didn't work, we'll throw  | 
| 
1544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  this flag to make any future events be no-ops.  | 
| 
1545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->stunted(1);  | 
| 
1546
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     return;  | 
| 
1547
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 }  | 
| 
1548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # TODO: document  | 
| 
1550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub stunted { shift->_elem( '_stunted', @_ ); }  | 
| 
1551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub done    { shift->_elem( '_done',    @_ ); }  | 
| 
1552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #==========================================================================  | 
| 
1554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub delete {  | 
| 
1556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Override Element's delete method.  | 
| 
1558
 | 
250
 | 
 
 | 
 
 | 
  
250
  
 | 
  
1
  
 | 
70008
 | 
     # This does most, if not all, of what Element's delete does anyway.  | 
| 
1559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Deletes content, including content in some special attributes.  | 
| 
1560
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
342
 | 
     # But doesn't empty out the hash.  | 
| 
 
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
641
 | 
    | 
| 
1561
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
333
 | 
    | 
| 
1562
 | 
250
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
668
 | 
     $_[0]->{'_element_count'} = 1;    # never hurts to be scrupulously correct  | 
| 
1563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     delete @{ $_[0] }{ '_body', '_head', '_pos' };  | 
| 
1565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (  | 
| 
1566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @{ delete( $_[0]->{'_content'} ) || [] },    # all/any content  | 
| 
1567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      #       delete @{$_[0]}{'_body', '_head', '_pos'}  | 
| 
1569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      # ...and these, in case these elements don't appear in the  | 
| 
1570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      #   content, which is possible.  If they did appear (as they  | 
| 
1571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      #   usually do), then calling $_->delete on them again is harmless.  | 
| 
1572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      #  I don't think that's such a hot idea now.  Thru creative reattachment,  | 
| 
1573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      #  those could actually now point to elements in OTHER trees (which we do  | 
| 
1574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      #  NOT want to delete!).  | 
| 
1575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## Reasoned out:  | 
| 
1576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #  If these point to elements not in the content list of any element in this  | 
| 
1577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #   tree, but not in the content list of any element in any OTHER tree, then  | 
| 
1578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #   just deleting these will make their refcounts hit zero.  | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #  If these point to elements in the content lists of elements in THIS tree,  | 
| 
1580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #   then we'll get to deleting them when we delete from the top.  | 
| 
1581
 | 
500
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
2506
 | 
   #  If these point to elements in the content lists of elements in SOME OTHER  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #   tree, then they're not to be deleted.  | 
| 
1583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         )  | 
| 
1584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $_->delete  | 
| 
1586
 | 
250
 | 
  
  0
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
700
 | 
             if defined $_ and ref $_    #  Make sure it's an object.  | 
| 
1587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $_ ne $_[0];    #  And avoid hitting myself, just in case!  | 
| 
1588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1590
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1606
 | 
     $_[0]->detach if $_[0]->{'_parent'} and $_[0]->{'_parent'}{'_content'};  | 
| 
1591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # An 'html' element having a parent is quite unlikely.  | 
| 
1593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1594
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     return;  | 
| 
1595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tighten_up {                    # legacy  | 
| 
1598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     shift->delete_ignorable_whitespace(@_);  | 
| 
1599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1600
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
4
 | 
    | 
| 
1601
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 sub elementify {  | 
| 
1602
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
    | 
| 
1603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Rebless this object down into the normal element class.  | 
| 
1604
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my $self     = $_[0];  | 
| 
1605
 | 
63
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
502
 | 
     my $to_class = $self->element_class;  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     delete @{$self}{  | 
| 
1607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         grep {  | 
| 
1608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
1609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             length $_ and substr( $_, 0, 1 ) eq '_'  | 
| 
1610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # The private attributes that we'll retain:  | 
| 
1612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $_ ne '_tag'  | 
| 
1613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $_ ne '_parent'  | 
| 
1614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $_ ne '_content'  | 
| 
1615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $_ ne '_implicit'  | 
| 
1616
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                 and $_ ne '_pos'  | 
| 
1617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 and $_ ne '_element_class'  | 
| 
1618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } keys %$self  | 
| 
1619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         };  | 
| 
1620
 | 
4071
 | 
  
100
  
 | 
 
 | 
  
4071
  
 | 
  
1
  
 | 
7617
 | 
     bless $self, $to_class;    # Returns the same object we were fed  | 
| 
1621
 | 
3780
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
14145
 | 
 }  | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub element_class {  | 
| 
1624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return 'HTML::Element' if not ref $_[0];  | 
| 
1625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $_[0]->{_element_class} || 'HTML::Element';  | 
| 
1626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1627
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
    | 
| 
1628
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #--------------------------------------------------------------------------  | 
| 
1629
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1630
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub guts {  | 
| 
1631
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @out;  | 
| 
1632
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @stack       = ( $_[0] );  | 
| 
1633
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $destructive = $_[1];  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1634
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $this;  | 
| 
1635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     while (@stack) {  | 
| 
1636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $this = shift @stack;  | 
| 
1637
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ( !ref $this ) {  | 
| 
1638
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             push @out, $this;    # yes, it can include text nodes  | 
| 
1639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ( !$this->{'_implicit'} ) {  | 
| 
1641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             push @out, $this;  | 
| 
1642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             delete $this->{'_parent'} if $destructive;  | 
| 
1643
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1646
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # it's an implicit node.  Delete it and recurse  | 
| 
1647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             delete $this->{'_parent'} if $destructive;  | 
| 
1648
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             unshift @stack,  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @{  | 
| 
1650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 (   $destructive  | 
| 
1651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? delete( $this->{'_content'} )  | 
| 
1652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : $this->{'_content'}  | 
| 
1653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
1654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     || []  | 
| 
1655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 };  | 
| 
1656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1658
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1659
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Doesn't call a real $root->delete on the (when implicit) root,  | 
| 
1660
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
     #  but I don't think it needs to.  | 
| 
1661
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1662
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return @out if wantarray;    # one simple normal case.  | 
| 
1663
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return unless @out;  | 
| 
1664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $out[0] if @out == 1 and ref( $out[0] );  | 
| 
1665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $x = HTML::Element->new( 'div', '_implicit' => 1 );  | 
| 
1666
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
     $x->push_content(@out);  | 
| 
1667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return $x;  | 
| 
1668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub disembowel { $_[0]->guts(1) }  | 
| 
1671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #--------------------------------------------------------------------------  | 
| 
1673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
1674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |