line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::TreeBuilder::XPath; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
25658
|
use List::Util qw( first); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
394
|
|
4
|
3
|
|
|
3
|
|
18
|
use Scalar::Util (); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
55
|
|
5
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
14
|
use strict; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
99
|
|
7
|
3
|
|
|
3
|
|
29
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
101
|
|
8
|
|
|
|
|
|
|
|
9
|
3
|
|
|
3
|
|
14
|
use vars qw($VERSION); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
386
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$VERSION = '0.14'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my %CHAR2DEFAULT_ENT= ( '&' => '&', '<' => '<', '>' => '>', '"' => '"'); |
14
|
|
|
|
|
|
|
my %NUM2DEFAULT_ENT= ( '38' => 'amp', '60' => 'lt', '62' => 'gt', '"' => '"'); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
package HTML::TreeBuilder::XPath; |
17
|
|
|
|
|
|
|
|
18
|
3
|
|
|
3
|
|
17
|
use base( 'HTML::TreeBuilder'); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
4016
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
package HTML::TreeBuilder::XPath::Node; |
21
|
|
|
|
|
|
|
|
22
|
938
|
|
|
938
|
|
12696
|
sub isElementNode { 0 } |
23
|
2
|
|
|
2
|
|
1982
|
sub isAttributeNode { 0 } |
24
|
0
|
|
|
0
|
|
0
|
sub isNamespaceNode { 0 } |
25
|
0
|
|
|
0
|
|
0
|
sub isTextNode { 0 } |
26
|
0
|
|
|
0
|
|
0
|
sub isProcessingInstructionNode { 0 } |
27
|
0
|
|
|
0
|
|
0
|
sub isPINode { 0 } |
28
|
0
|
|
|
0
|
|
0
|
sub isCommentNode { 0 } |
29
|
|
|
|
|
|
|
|
30
|
1322
|
100
|
|
1322
|
|
41221
|
sub getChildNodes { return wantarray ? () : []; } |
31
|
80
|
|
|
80
|
|
458
|
sub getFirstChild { return undef; } |
32
|
9
|
|
|
9
|
|
48
|
sub getLastChild { return undef; } |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# need to do a complete look_down each time, as the id could have been changed |
35
|
|
|
|
|
|
|
# without any object being involved, hence without a potential cache being |
36
|
|
|
|
|
|
|
# up to date |
37
|
|
|
|
|
|
|
sub getElementById |
38
|
2
|
|
|
2
|
|
18
|
{ my ($self, $id) = @_; |
39
|
2
|
|
|
|
|
12
|
return scalar $self->look_down( id => $id); |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
0
|
|
|
0
|
|
0
|
sub to_number { return XML::XPathEngine::Number->new( shift->getValue); } |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub cmp |
45
|
52
|
|
|
52
|
|
2495
|
{ my( $a, $b)=@_; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# comparison with the root (in $b, or processed in HTML::TreeBuilder::XPath::Root) |
48
|
52
|
50
|
|
|
|
438
|
if( $b->isa( 'HTML::TreeBuilder::XPath::Root') ) { return -1; } |
|
0
|
|
|
|
|
0
|
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# easy cases |
51
|
52
|
50
|
|
|
|
219
|
return 0 if( $a == $b); |
52
|
52
|
50
|
|
|
|
167
|
return 1 if( $a->is_inside($b)); # a starts after b |
53
|
52
|
50
|
|
|
|
1548
|
return -1 if( $b->is_inside($a)); # a starts before b |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# lineage does not include the element itself |
56
|
52
|
|
|
|
|
1566
|
my @a_pile= ($a, $a->lineage); |
57
|
52
|
|
|
|
|
745
|
my @b_pile= ($b, $b->lineage); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# the 2 elements are not in the same twig |
60
|
52
|
50
|
|
|
|
767
|
unless( $a_pile[-1] == $b_pile[-1]) |
61
|
0
|
|
|
|
|
0
|
{ warn "2 nodes not in the same pile: ", ref( $a), " - ", ref( $b), "\n"; |
62
|
0
|
|
|
|
|
0
|
print "a: ", $a->string_value, "\nb: ", $b->string_value, "\n"; |
63
|
0
|
|
|
|
|
0
|
return undef; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# find the first non common ancestors (they are siblings) |
67
|
52
|
|
|
|
|
85
|
my $a_anc= pop @a_pile; |
68
|
52
|
|
|
|
|
65
|
my $b_anc= pop @b_pile; |
69
|
|
|
|
|
|
|
|
70
|
52
|
|
|
|
|
128
|
while( $a_anc == $b_anc) |
71
|
138
|
|
|
|
|
144
|
{ $a_anc= pop @a_pile; |
72
|
138
|
|
|
|
|
291
|
$b_anc= pop @b_pile; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
52
|
50
|
33
|
|
|
251
|
if( defined( $a_anc->{_rank}) && defined( $b_anc->{_rank})) |
76
|
52
|
|
|
|
|
242
|
{ return $a_anc->{_rank} <=> $b_anc->{_rank}; } |
77
|
|
|
|
|
|
|
else |
78
|
|
|
|
|
|
|
{ |
79
|
|
|
|
|
|
|
# from there move left and right and figure out the order |
80
|
0
|
|
|
|
|
0
|
my( $a_prev, $a_next, $b_prev, $b_next)= ($a_anc, $a_anc, $b_anc, $b_anc); |
81
|
0
|
|
|
|
|
0
|
while() |
82
|
0
|
|
0
|
|
|
0
|
{ $a_prev= $a_prev->getPreviousSibling || return -1; |
83
|
0
|
0
|
|
|
|
0
|
return 1 if( $a_prev == $b_anc); |
84
|
0
|
|
0
|
|
|
0
|
$a_next= $a_next->getNextSibling || return 1; |
85
|
0
|
0
|
|
|
|
0
|
return -1 if( $a_next == $b_anc); |
86
|
0
|
|
0
|
|
|
0
|
$b_prev= $b_prev->getPreviousSibling || return 1; |
87
|
0
|
0
|
|
|
|
0
|
return -1 if( $b_prev == $a_next); |
88
|
0
|
|
0
|
|
|
0
|
$b_next= $b_next->getNextSibling || return -1; |
89
|
0
|
0
|
|
|
|
0
|
return 1 if( $b_next == $a_prev); |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# need to modify directly the HTML::Element package, because HTML::TreeBuilder won't let me |
96
|
|
|
|
|
|
|
# change the class of the nodes it generates |
97
|
|
|
|
|
|
|
package HTML::Element; |
98
|
3
|
|
|
3
|
|
134358
|
use Scalar::Util qw(weaken); |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
438
|
|
99
|
3
|
|
|
3
|
|
17
|
use vars qw(@ISA); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
205
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
push @ISA, 'HTML::TreeBuilder::XPath::Node'; |
102
|
|
|
|
|
|
|
|
103
|
3
|
|
|
3
|
|
3408
|
use XML::XPathEngine; |
|
3
|
|
|
|
|
111172
|
|
|
3
|
|
|
|
|
4677
|
|
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
{ my $xp; |
106
|
|
|
|
|
|
|
sub xp |
107
|
74
|
|
66
|
74
|
0
|
265
|
{ $xp ||=XML::XPathEngine->new(); |
108
|
74
|
|
|
|
|
452
|
return $xp; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
113
|
2
|
|
|
2
|
0
|
1387
|
sub findnodes { my( $elt, $path)= @_; return xp->findnodes( $path, $elt); } |
|
2
|
|
|
|
|
7
|
|
114
|
0
|
|
|
0
|
0
|
0
|
sub findnodes_as_string { my( $elt, $path)= @_; return xp->findnodes_as_string( $path, $elt); } |
|
0
|
|
|
|
|
0
|
|
115
|
2
|
|
|
2
|
0
|
782
|
sub findnodes_as_strings { my( $elt, $path)= @_; return xp->findnodes_as_strings( $path, $elt); } |
|
2
|
|
|
|
|
11
|
|
116
|
68
|
|
|
68
|
0
|
52376
|
sub findvalue { my( $elt, $path)= @_; return xp->findvalue( $path, $elt); } |
|
68
|
|
|
|
|
199
|
|
117
|
2
|
|
|
2
|
0
|
1389
|
sub findvalues { my( $elt, $path)= @_; return xp->findvalues( $path, $elt); } |
|
2
|
|
|
|
|
8
|
|
118
|
0
|
|
|
0
|
0
|
0
|
sub exists { my( $elt, $path)= @_; return xp->exists( $path, $elt); } |
|
0
|
|
|
|
|
0
|
|
119
|
0
|
|
|
0
|
0
|
0
|
sub find_xpath { my( $elt, $path)= @_; return xp->find( $path, $elt); } |
|
0
|
|
|
|
|
0
|
|
120
|
0
|
|
|
0
|
0
|
0
|
sub matches { my( $elt, $path)= @_; return xp->matches( $elt, $path, $elt); } |
|
0
|
|
|
|
|
0
|
|
121
|
0
|
|
|
0
|
0
|
0
|
sub set_namespace { my $elt= shift; xp->new->set_namespace( @_); } |
|
0
|
|
|
|
|
0
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub getRootNode |
124
|
75
|
|
|
75
|
0
|
101979
|
{ my $elt= shift; |
125
|
|
|
|
|
|
|
# The parent of root is a HTML::TreeBuilder::XPath::Root |
126
|
|
|
|
|
|
|
# that helps getting the tree to mimic a DOM tree |
127
|
75
|
|
|
|
|
321
|
return $elt->root->getParentNode; # I like this one! |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub getParentNode |
131
|
150
|
|
|
150
|
0
|
835
|
{ my $elt= shift; |
132
|
150
|
|
100
|
|
|
1216
|
return $elt->{_parent} || bless { _root => $elt }, 'HTML::TreeBuilder::XPath::Root'; |
133
|
|
|
|
|
|
|
} |
134
|
1102
|
|
|
1102
|
0
|
7866
|
sub getName { return shift->tag; } |
135
|
0
|
|
|
0
|
0
|
0
|
sub getLocalName { (my $name= $_[0]->tag) =~ s{^.*:}{}; $name; } |
|
0
|
|
|
|
|
0
|
|
136
|
143
|
|
|
143
|
0
|
3325
|
sub getNextSibling { my( $elt)= @_; |
137
|
143
|
|
100
|
|
|
368
|
my $parent= $elt->{_parent} || return undef; |
138
|
122
|
|
100
|
|
|
336
|
return $parent->_child_as_object( scalar $elt->right, ($elt->{_rank} || 0) + 1); |
139
|
|
|
|
|
|
|
} |
140
|
21
|
|
|
21
|
0
|
406
|
sub getPreviousSibling { my( $elt)= @_; |
141
|
21
|
|
100
|
|
|
50
|
my $parent= $elt->{_parent} || return undef; |
142
|
18
|
100
|
|
|
|
40
|
return undef unless $elt->{_rank}; |
143
|
12
|
|
|
|
|
31
|
return $parent->_child_as_object( scalar $elt->left, $elt->{_rank} - 1); |
144
|
|
|
|
|
|
|
} |
145
|
1102
|
50
|
33
|
1102
|
0
|
27186
|
sub isElementNode { return ref $_[0] && ($_[0]->{_tag}!~ m{^~}) ? 1 : 0; } |
146
|
77
|
50
|
33
|
77
|
0
|
555
|
sub isCommentNode { return ref $_[0] && ($_[0]->{_tag} eq '~comment') ? 1 : 0; } |
147
|
0
|
0
|
0
|
0
|
0
|
0
|
sub isProcessingInstructionNode { return ref $_[0] && ($_[0]->{_tag} eq '~pi') ? 1 : 0; } |
148
|
5
|
50
|
|
5
|
0
|
89
|
sub isTextNode { return ref $_[0] ? 0 : 1; } |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub getValue |
151
|
2
|
|
|
2
|
0
|
64
|
{ my $elt= shift; |
152
|
2
|
50
|
|
|
|
7
|
if( $elt->isCommentNode) { return $elt->{text}; } |
|
0
|
|
|
|
|
0
|
|
153
|
2
|
|
|
|
|
9
|
return $elt->as_text; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub getChildNodes |
157
|
1544
|
|
|
1544
|
0
|
63789
|
{ my $parent= shift; |
158
|
1544
|
|
|
|
|
1676
|
my $rank=0; |
159
|
1544
|
|
|
|
|
3600
|
my @children= map { $parent->_child_as_object( $_, $rank++) } $parent->content_list; |
|
2878
|
|
|
|
|
11988
|
|
160
|
1544
|
100
|
|
|
|
6949
|
return wantarray ? @children : \@children; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub getFirstChild |
164
|
80
|
|
|
80
|
0
|
1280
|
{ my $parent= shift; |
165
|
80
|
|
|
|
|
186
|
my @content= $parent->content_list; |
166
|
80
|
50
|
|
|
|
505
|
if( @content) |
167
|
80
|
|
|
|
|
188
|
{ return $parent->_child_as_object( $content[0], 0); } |
168
|
|
|
|
|
|
|
else |
169
|
0
|
|
|
|
|
0
|
{ return undef; } |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
sub getLastChild |
172
|
12
|
|
|
12
|
0
|
146
|
{ my $parent= shift; |
173
|
12
|
|
|
|
|
24
|
my @content= $parent->content_list; |
174
|
12
|
100
|
|
|
|
70
|
if( @content) |
175
|
9
|
|
|
|
|
27
|
{ return $parent->_child_as_object( $content[-1], $#content); } |
176
|
|
|
|
|
|
|
else |
177
|
3
|
|
|
|
|
6
|
{ return undef; } |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub getAttributes |
181
|
324
|
|
|
324
|
0
|
46255
|
{ my $elt= shift; |
182
|
324
|
|
|
|
|
793
|
my %atts= $elt->all_external_attr; |
183
|
324
|
|
|
|
|
4560
|
my $rank=0; |
184
|
324
|
|
|
|
|
736
|
my @atts= map { bless( { _name => $_, _value => $atts{$_}, |
|
272
|
|
|
|
|
1767
|
|
185
|
|
|
|
|
|
|
_elt => $elt, _rank => $rank++, |
186
|
|
|
|
|
|
|
}, |
187
|
|
|
|
|
|
|
'HTML::TreeBuilder::XPath::Attribute' |
188
|
|
|
|
|
|
|
) |
189
|
|
|
|
|
|
|
} sort keys %atts; |
190
|
324
|
50
|
|
|
|
1479
|
return wantarray ? @atts : \@atts; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
0
|
|
|
0
|
0
|
0
|
sub to_number { return XML::XPathEngine::Number->new( $_[0]->as_text); } |
194
|
|
|
|
|
|
|
sub string_value |
195
|
75
|
|
|
75
|
0
|
7977
|
{ my $elt= shift; |
196
|
75
|
50
|
|
|
|
193
|
if( $elt->isCommentNode) { return $elt->{text}; } |
|
0
|
|
|
|
|
0
|
|
197
|
75
|
|
|
|
|
264
|
return $elt->as_text; |
198
|
|
|
|
|
|
|
}; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# called on a parent, with a child as second argument and its rank as third |
201
|
|
|
|
|
|
|
# returns the child if it is already an element, or |
202
|
|
|
|
|
|
|
# a new HTML::TreeBuilder::XPath::Text element if it is a plain string |
203
|
|
|
|
|
|
|
sub _child_as_object |
204
|
3101
|
|
|
3101
|
|
7096
|
{ my( $elt, $elt_or_text, $rank)= @_; |
205
|
3101
|
100
|
|
|
|
5932
|
return undef unless( defined $elt_or_text); |
206
|
3059
|
100
|
|
|
|
5805
|
if( ! ref $elt_or_text) |
207
|
|
|
|
|
|
|
{ # $elt_or_text is a string, turn it into a TextNode object |
208
|
1477
|
|
|
|
|
5851
|
$elt_or_text= bless { _content => $elt_or_text, _parent => $elt, }, |
209
|
|
|
|
|
|
|
'HTML::TreeBuilder::XPath::TextNode' |
210
|
|
|
|
|
|
|
; |
211
|
1477
|
|
|
|
|
3869
|
Scalar::Util::weaken($elt_or_text->{_parent}); |
212
|
|
|
|
|
|
|
} |
213
|
3059
|
50
|
|
|
|
5486
|
if( ref $rank) { warn "rank is a ", ref( $rank), " elt_or_text is a ", ref( $elt_or_text); } |
|
0
|
|
|
|
|
0
|
|
214
|
3059
|
|
|
|
|
4607
|
$elt_or_text->{_rank}= $rank; # used for sorting; |
215
|
3059
|
|
|
|
|
7656
|
return $elt_or_text; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
0
|
|
|
0
|
0
|
0
|
sub toString { return shift->as_XML( @_); } |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# produces better looking XML |
221
|
|
|
|
|
|
|
{ |
222
|
3
|
|
|
3
|
|
68
|
no warnings 'redefine'; |
|
3
|
|
|
|
|
13
|
|
|
3
|
|
|
|
|
1652
|
|
223
|
|
|
|
|
|
|
sub as_XML_compact |
224
|
0
|
|
|
0
|
0
|
0
|
{ my( $node, $opt)= @_; |
225
|
0
|
|
|
|
|
0
|
my $name = $node->{'_tag'}; |
226
|
0
|
0
|
|
|
|
0
|
if( $name eq '~literal') { return _xml_escape_text( $node->{text}); } |
|
0
|
|
|
|
|
0
|
|
227
|
|
|
|
|
|
|
|
228
|
0
|
0
|
|
|
|
0
|
if( $name eq '~declaration') { return '{text}) . ">"; } |
|
0
|
|
|
|
|
0
|
|
229
|
0
|
0
|
|
|
|
0
|
if( $name eq '~pi') { return '' . _xml_escape_text( $node->{text}) . '?>'; } |
|
0
|
|
|
|
|
0
|
|
230
|
0
|
0
|
|
|
|
0
|
if( $name eq '~comment') { return ''; } |
|
0
|
|
|
|
|
0
|
|
231
|
|
|
|
|
|
|
|
232
|
0
|
|
|
|
|
0
|
my $lc_name= lc $name; |
233
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
0
|
my $xml= $node->_start_tag; |
235
|
|
|
|
|
|
|
|
236
|
0
|
0
|
|
|
|
0
|
if( $HTML::Tagset::isCDATA_Parent{$lc_name}) |
237
|
0
|
|
0
|
|
|
0
|
{ my $content= $node->{_content} || ''; |
238
|
0
|
0
|
0
|
|
|
0
|
if( ref $content eq 'ARRAY' || $content->isa( 'ARRAY')) |
239
|
0
|
|
|
|
|
0
|
{ $xml .= _xml_escape_cdata( join( '', @$content), $opt); } |
240
|
|
|
|
|
|
|
else |
241
|
0
|
|
|
|
|
0
|
{ $xml .= $content; } |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
else |
244
|
|
|
|
|
|
|
{ # start tag |
245
|
0
|
|
|
|
|
0
|
foreach my $child ($node->content_list) |
246
|
0
|
0
|
|
|
|
0
|
{ if( ref $child) { $xml .= $child->as_XML_compact(); } |
|
0
|
|
|
|
|
0
|
|
247
|
0
|
|
|
|
|
0
|
else { $xml .= _xml_escape_text( $child); } |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
} |
250
|
0
|
0
|
|
|
|
0
|
$xml.= "$name>" unless $HTML::Tagset::emptyElement{$lc_name}; |
251
|
0
|
|
|
|
|
0
|
return $xml; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
{ my %phrase_name; # all phrase tags, + literals (those are not indented) |
258
|
|
|
|
|
|
|
my %extra_newline; # tags that get an extra newline before the end tag |
259
|
|
|
|
|
|
|
my $default_indent; # 2 spaces, change with the 'indent' option |
260
|
|
|
|
|
|
|
BEGIN |
261
|
3
|
|
|
3
|
|
91
|
{ %phrase_name= %HTML::Tagset::isPhraseMarkup; |
262
|
3
|
|
|
|
|
13
|
$phrase_name{'~literal'}= 1; |
263
|
3
|
|
|
|
|
7
|
$default_indent= ' '; |
264
|
3
|
|
|
|
|
8
|
%extra_newline= map { $_ => 1 } qw(html head body script div table tbody thead tfoot tr form dl ol ul); |
|
42
|
|
|
|
|
3004
|
|
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub as_XML_indented |
268
|
0
|
|
|
0
|
0
|
0
|
{ my( $node, $opt)= @_; |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
|
271
|
0
|
|
|
|
|
0
|
my $name = $node->{'_tag'}; |
272
|
0
|
|
|
|
|
0
|
my $lc_name= lc $name; |
273
|
|
|
|
|
|
|
|
274
|
0
|
0
|
|
|
|
0
|
if( $name eq '~literal') { return _xml_escape_text( $node->{text}); } |
|
0
|
|
|
|
|
0
|
|
275
|
0
|
0
|
|
|
|
0
|
if( $name eq '~declaration') { return '{text}) . ">\n"; } |
|
0
|
|
|
|
|
0
|
|
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
|
278
|
0
|
0
|
|
|
|
0
|
if( $name eq '~pi') { return '' . _xml_escape_text( $node->{text}) . '?>'; } |
|
0
|
|
|
|
|
0
|
|
279
|
0
|
0
|
|
|
|
0
|
if( $name eq '~comment') { return ''; } |
|
0
|
|
|
|
|
0
|
|
280
|
|
|
|
|
|
|
|
281
|
0
|
|
|
|
|
0
|
my $xml; |
282
|
0
|
|
|
|
|
0
|
my $pre_tag_indent=''; |
283
|
0
|
0
|
0
|
|
|
0
|
if(!$phrase_name{$lc_name}) { $pre_tag_indent= "\n" . ($opt->{indent} || $default_indent) x ($opt->{indent_level}||0); } |
|
0
|
|
0
|
|
|
0
|
|
284
|
0
|
0
|
|
|
|
0
|
if( $opt->{indent_level}) { $xml .= $pre_tag_indent; } |
|
0
|
|
|
|
|
0
|
|
285
|
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
0
|
$xml.= $node->_start_tag(); |
287
|
|
|
|
|
|
|
|
288
|
0
|
|
|
|
|
0
|
my $content=''; |
289
|
|
|
|
|
|
|
|
290
|
0
|
0
|
|
|
|
0
|
if( $HTML::Tagset::isCDATA_Parent{$lc_name}) |
291
|
0
|
|
0
|
|
|
0
|
{ my $content= $node->{_content} || ''; |
292
|
0
|
0
|
0
|
|
|
0
|
if( ref $content && (ref $content eq 'ARRAY' || $content->isa( 'ARRAY') )) |
|
|
|
0
|
|
|
|
|
293
|
0
|
|
|
|
|
0
|
{ $content= _xml_escape_cdata( join( '', @$content), $opt); } |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
else |
296
|
|
|
|
|
|
|
{ |
297
|
0
|
|
|
|
|
0
|
my %child_opt= %$opt; |
298
|
0
|
|
|
|
|
0
|
$child_opt{indent_level}++; |
299
|
0
|
|
|
|
|
0
|
foreach my $child ($node->content_list) |
300
|
0
|
0
|
|
|
|
0
|
{ if( ref $child) { $content .= $child->as_XML_indented( \%child_opt ); } |
|
0
|
|
|
|
|
0
|
|
301
|
0
|
|
|
|
|
0
|
else { $content .= _xml_escape_text( $child); } |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
0
|
|
|
|
|
0
|
$xml .= $content; |
305
|
|
|
|
|
|
|
|
306
|
0
|
0
|
0
|
|
|
0
|
if( $extra_newline{$lc_name} && $content ne '' ) { $xml.= $pre_tag_indent; } |
|
0
|
|
|
|
|
0
|
|
307
|
0
|
0
|
|
|
|
0
|
$xml.= "$name>" unless $HTML::Tagset::emptyElement{$lc_name}; |
308
|
0
|
0
|
|
|
|
0
|
$xml .="\n" if( !$opt->{indent_level}); |
309
|
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
0
|
return $xml; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub _start_tag |
315
|
0
|
|
|
0
|
|
0
|
{ my( $node)= @_; |
316
|
0
|
|
|
|
|
0
|
my $name = $node->{'_tag'}; |
317
|
0
|
|
|
|
|
0
|
my $start_tag.= "<$name"; |
318
|
0
|
|
|
|
|
0
|
foreach my $att_name (sort keys %$node) |
319
|
0
|
0
|
0
|
|
|
0
|
{ next if( (!length $att_name) || ($att_name=~ m{^_}) || ($att_name eq '/') ); |
|
|
|
0
|
|
|
|
|
320
|
0
|
|
|
|
|
0
|
my $well_formed_att_name= well_formed_name( $att_name); |
321
|
0
|
|
|
|
|
0
|
$start_tag .= qq{ $well_formed_att_name="} . _xml_escape_attribute_value( $node->{$att_name}) . qq{"}; |
322
|
|
|
|
|
|
|
} |
323
|
0
|
0
|
|
|
|
0
|
$start_tag.= $HTML::Tagset::emptyElement{lc $name} ? " />" : ">"; |
324
|
0
|
|
|
|
|
0
|
return $start_tag; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub well_formed_name |
328
|
0
|
|
|
0
|
0
|
0
|
{ my( $name)= @_; |
329
|
0
|
|
|
|
|
0
|
$name=~ s{[^\w:_-]+}{_}g; |
330
|
0
|
0
|
|
|
|
0
|
if( $name=~ m{^\d}) { $name= "a$name"; } |
|
0
|
|
|
|
|
0
|
|
331
|
0
|
|
|
|
|
0
|
return $name; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub _indent_level |
335
|
0
|
|
|
0
|
|
0
|
{ my( $node)= @_; |
336
|
0
|
|
|
|
|
0
|
my $level= scalar grep { !$HTML::Tagset::isPhraseMarkup{lc $_->{_tag}} } $node->lineage; |
|
0
|
|
|
|
|
0
|
|
337
|
0
|
|
|
|
|
0
|
return $level; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
{ my( $indent, %extra_newline, $nl); |
341
|
|
|
|
|
|
|
BEGIN |
342
|
3
|
|
|
3
|
|
8
|
{ $indent= ' '; |
343
|
3
|
|
|
|
|
13
|
$nl= "\n"; |
344
|
3
|
|
|
|
|
8
|
%extra_newline= map { $_ => 1 } qw(html head body script div table tr form ol ul); |
|
30
|
|
|
|
|
1870
|
|
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
sub indents |
348
|
0
|
|
|
0
|
0
|
0
|
{ my( $opt, $name)= @_; |
349
|
0
|
|
|
|
|
0
|
my $indents= { pre_start_tag => '', post_start_tag => '', pre_end_tag => '', post_end_tag => ''}; |
350
|
0
|
0
|
|
|
|
0
|
if( $opt->{indented}) |
351
|
0
|
|
|
|
|
0
|
{ my $indent_level= $opt->{indent_level}; |
352
|
0
|
|
|
|
|
0
|
my $wrapping_nl= $nl; |
353
|
0
|
0
|
|
|
|
0
|
if( !defined( $indent_level)) { $indent_level = 0; $wrapping_nl= ''; } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
354
|
0
|
0
|
0
|
|
|
0
|
if( $HTML::Tagset::isKnown{lc $name} && !$HTML::Tagset::isPhraseMarkup{lc $name} && $indent_level > 0) |
|
|
|
0
|
|
|
|
|
355
|
0
|
|
|
|
|
0
|
{ $indents->{pre_start_tag}= $wrapping_nl . ($indent x $indent_level); } |
356
|
0
|
0
|
|
|
|
0
|
if( $extra_newline{lc $name}) |
357
|
0
|
|
|
|
|
0
|
{ $indents->{post_start_tag}= $nl; |
358
|
0
|
|
|
|
|
0
|
$indents->{pre_end_tag}= $nl . ($indent x $indent_level); |
359
|
|
|
|
|
|
|
} |
360
|
0
|
0
|
|
|
|
0
|
if( $indent_level == 0) |
361
|
0
|
|
|
|
|
0
|
{ $indents->{post_end_tag} = $wrapping_nl; } |
362
|
|
|
|
|
|
|
} |
363
|
0
|
|
|
|
|
0
|
return $indents; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub _xml_escape_attribute_value |
369
|
0
|
|
|
0
|
|
0
|
{ my( $text)= @_; |
370
|
0
|
|
|
|
|
0
|
$text=~ s{([&<>"])}{$CHAR2DEFAULT_ENT{$1}}g; # escape also quote, as it is the attribute separator |
371
|
0
|
|
|
|
|
0
|
return $text; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub _xml_escape_text |
375
|
0
|
|
|
0
|
|
0
|
{ my( $text)= @_; |
376
|
0
|
|
|
|
|
0
|
$text=~ s{([&<>])}{$CHAR2DEFAULT_ENT{$1}}g; |
377
|
0
|
|
|
|
|
0
|
return $text; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub _xml_escape_comment |
381
|
0
|
|
|
0
|
|
0
|
{ my( $text)= @_; |
382
|
0
|
|
|
|
|
0
|
$text=~ s{([&<>])}{$CHAR2DEFAULT_ENT{$1}}g; |
383
|
0
|
|
|
|
|
0
|
$text=~ s{--}{--}g; # can't have double --'s in XML comments |
384
|
0
|
|
|
|
|
0
|
return $text; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
sub _xml_escape_cdata |
388
|
0
|
|
|
0
|
|
0
|
{ my( $text, $opt)= @_; |
389
|
0
|
0
|
0
|
|
|
0
|
if( $opt->{force_escape_cdata} || $text=~ m{[<&]}) |
390
|
0
|
|
|
|
|
0
|
{ $text=~ s{^\s*\Q
|
391
|
0
|
|
|
|
|
0
|
$text=~ s{\Q]]>\E\s*$}{}s; |
392
|
0
|
|
|
|
|
0
|
$text=~ s{]]>}{]]>}g; # can't have]]> in CDATA |
393
|
0
|
|
|
|
|
0
|
$text= ""; |
394
|
|
|
|
|
|
|
} |
395
|
0
|
|
|
|
|
0
|
return $text; |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
package HTML::TreeBuilder::XPath::TextNode; |
400
|
|
|
|
|
|
|
|
401
|
3
|
|
|
3
|
|
37
|
use base 'HTML::TreeBuilder::XPath::Node'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
3339
|
|
402
|
|
|
|
|
|
|
|
403
|
89
|
|
|
89
|
|
462
|
sub getParentNode { return shift->{_parent}; } |
404
|
0
|
|
|
0
|
|
0
|
sub getValue { return shift->{_content}; } |
405
|
6
|
|
|
6
|
|
74
|
sub isTextNode { return 1; } |
406
|
38
|
50
|
|
38
|
|
2679
|
sub getAttributes { return wantarray ? () : []; } |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# similar to HTML::Element as_XML |
409
|
|
|
|
|
|
|
sub as_XML |
410
|
0
|
|
|
0
|
|
0
|
{ my( $node, $entities)= @_; |
411
|
0
|
|
|
|
|
0
|
my $content= $node->{_content}; |
412
|
0
|
0
|
0
|
|
|
0
|
if( $node->{_parent} && $node->{_parent}->{_tag} eq 'script') |
413
|
0
|
|
|
|
|
0
|
{ $content=~ s{(&\w+;)}{HTML::Entities::decode($1)}eg; } |
|
0
|
|
|
|
|
0
|
|
414
|
|
|
|
|
|
|
else |
415
|
0
|
|
|
|
|
0
|
{ $content= HTML::Element::_xml_escape_text($content); } |
416
|
0
|
|
|
|
|
0
|
return $content; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
*as_XML_compact = *as_XML; |
419
|
|
|
|
|
|
|
*as_XML_indented = *as_XML; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub getPreviousSibling |
423
|
9
|
|
|
9
|
|
25
|
{ my $self= shift; |
424
|
9
|
|
|
|
|
12
|
my $rank= $self->{_rank}; |
425
|
|
|
|
|
|
|
#unless( defined $self->{_rank}) |
426
|
|
|
|
|
|
|
# { warn "no rank for text node $self->{_content}, parent is $self->{_parent}->{_tag}\n"; } |
427
|
9
|
|
|
|
|
12
|
my $parent= $self->{_parent}; |
428
|
9
|
50
|
|
|
|
25
|
return $rank ? $parent->_child_as_object( $parent->{_content}->[$rank-1], $rank-1) : undef; |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub getNextSibling |
432
|
80
|
|
|
80
|
|
242
|
{ my $self= shift; |
433
|
80
|
|
|
|
|
124
|
my $rank= $self->{_rank}; |
434
|
|
|
|
|
|
|
#unless( defined $self->{_rank}) |
435
|
|
|
|
|
|
|
# { warn "no rank for text node $self->{_content}, parent is $self->{_parent}->{_tag}\n"; } |
436
|
80
|
|
|
|
|
117
|
my $parent= $self->{_parent}; |
437
|
80
|
|
|
|
|
140
|
my $next_sibling= $parent->{_content}->[$rank+1]; |
438
|
80
|
50
|
|
|
|
230
|
return defined( $next_sibling) ? $parent->_child_as_object( $next_sibling, $rank+1) : undef; |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
sub getRootNode |
442
|
0
|
|
|
0
|
|
0
|
{ return shift->{_parent}->getRootNode; } |
443
|
|
|
|
|
|
|
|
444
|
1
|
|
|
1
|
|
182
|
sub string_value { return shift->{_content}; } |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# added to provide element-like methods to text nodes, for use by cmp |
447
|
|
|
|
|
|
|
sub lineage |
448
|
0
|
|
|
0
|
|
0
|
{ my( $node)= @_; |
449
|
0
|
|
|
|
|
0
|
my $parent= $node->{_parent}; |
450
|
0
|
|
|
|
|
0
|
return( $parent, $parent->lineage); |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
sub is_inside |
454
|
0
|
|
|
0
|
|
0
|
{ my( $text, $node)= @_; |
455
|
0
|
|
|
|
|
0
|
return $text->{_parent}->is_inside( $node); |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
1; |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
package HTML::TreeBuilder::XPath::Attribute; |
462
|
3
|
|
|
3
|
|
18
|
use base 'HTML::TreeBuilder::XPath::Node'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
3266
|
|
463
|
|
|
|
|
|
|
|
464
|
1
|
|
|
1
|
|
200
|
sub getParentNode { return $_[0]->{_elt}; } |
465
|
2
|
|
|
2
|
|
48
|
sub getValue { return $_[0]->{_value}; } |
466
|
270
|
|
|
270
|
|
3850
|
sub getName { return $_[0]->{_name} ; } |
467
|
0
|
|
|
0
|
|
0
|
sub getLocalName { (my $name= $_[0]->{_name}) =~ s{^.*:}{}; $name; } |
|
0
|
|
|
|
|
0
|
|
468
|
118
|
|
|
118
|
|
9719
|
sub string_value { return $_[0]->{_value}; } |
469
|
0
|
|
|
0
|
|
0
|
sub to_number { return XML::XPathEngine::Number->new( $_[0]->{_value}); } |
470
|
0
|
|
|
0
|
|
0
|
sub isAttributeNode { 1 } |
471
|
0
|
|
|
0
|
|
0
|
sub toString { return qq{ $_[0]->{_name}="$_[0]->{_value}"}; } |
472
|
0
|
|
|
0
|
|
0
|
sub getAttributes { return $_[0]->{_elt}->getAttributes; } |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# awfully inefficient, but hopefully this is called only for weird (read test-case) queries |
475
|
|
|
|
|
|
|
sub getPreviousSibling |
476
|
0
|
|
|
0
|
|
0
|
{ my $self= shift; |
477
|
0
|
|
|
|
|
0
|
my $rank= $self->{_rank}; |
478
|
0
|
0
|
|
|
|
0
|
return undef unless $rank; |
479
|
0
|
|
|
|
|
0
|
my %atts= $self->{_elt}->all_external_attr; |
480
|
0
|
|
|
|
|
0
|
my $previous_att_name= (sort keys %atts)[$rank-1]; |
481
|
0
|
|
|
|
|
0
|
return bless( { _name => $previous_att_name, |
482
|
|
|
|
|
|
|
_value => $atts{$previous_att_name}, |
483
|
|
|
|
|
|
|
_elt => $self->{_elt}, _rank => $rank-1, |
484
|
|
|
|
|
|
|
}, 'HTML::TreeBuilder::XPath::Attribute' |
485
|
|
|
|
|
|
|
); |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
sub getNextSibling |
489
|
0
|
|
|
0
|
|
0
|
{ my $self= shift; |
490
|
0
|
|
|
|
|
0
|
my $rank= $self->{_rank}; |
491
|
0
|
|
|
|
|
0
|
my %atts= $self->{_elt}->all_external_attr; |
492
|
0
|
|
0
|
|
|
0
|
my $next_att_name= (sort keys %atts)[$rank+1] || return undef; |
493
|
0
|
|
|
|
|
0
|
return bless( { _name => $next_att_name, _value => $atts{$next_att_name}, |
494
|
|
|
|
|
|
|
_elt => $self->{_elt}, _rank => $rank+1, |
495
|
|
|
|
|
|
|
}, 'HTML::TreeBuilder::XPath::Attribute' |
496
|
|
|
|
|
|
|
); |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# added to provide element-like methods to attributes, for use by cmp |
503
|
|
|
|
|
|
|
sub lineage |
504
|
25
|
|
|
25
|
|
40
|
{ my( $att)= @_; |
505
|
25
|
|
|
|
|
37
|
my $elt= $att->{_elt}; |
506
|
25
|
|
|
|
|
203
|
return( $elt, $elt->lineage); |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
sub is_inside |
510
|
25
|
|
|
25
|
|
35
|
{ my( $att, $node)= @_; |
511
|
25
|
|
33
|
|
|
136
|
return ($att->{_elt} == $node) || $att->{_elt}->is_inside( $node); |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
1; |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
package HTML::TreeBuilder::XPath::Root; |
518
|
|
|
|
|
|
|
|
519
|
3
|
|
|
3
|
|
19
|
use base 'HTML::TreeBuilder::XPath::Node'; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
2081
|
|
520
|
|
|
|
|
|
|
|
521
|
25
|
|
|
25
|
|
135
|
sub getParentNode { return (); } |
522
|
114
|
100
|
|
114
|
|
9664
|
sub getChildNodes { my @content= ( $_[0]->{_root}); return wantarray ? @content : \@content; } |
|
114
|
|
|
|
|
484
|
|
523
|
3
|
|
|
3
|
|
223
|
sub getAttributes { return [] } |
524
|
0
|
|
|
0
|
|
0
|
sub isDocumentNode { return 1 } |
525
|
1
|
|
|
1
|
|
6
|
sub getRootNode { return $_[0] } |
526
|
1
|
|
|
1
|
|
4
|
sub getName { return } |
527
|
22
|
|
|
22
|
|
139
|
sub getNextSibling { return } |
528
|
1
|
|
|
1
|
|
5
|
sub getPreviousSibling { return } |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
# added to provide element-like methods to root, for use by cmp |
531
|
0
|
|
|
0
|
|
|
sub lineage { return ($_[0]); } |
532
|
0
|
|
|
0
|
|
|
sub is_inside { return 0; } |
533
|
0
|
0
|
|
0
|
|
|
sub cmp { return $_[1]->isa( ' HTML::TreeBuilder::XPath::Root') ? 0 : 1; } |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
1; |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
__END__ |