line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##---------------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
## HTML Object - ~/lib/HTML/Object/DOM/TreeWalker.pm |
3
|
|
|
|
|
|
|
## Version v0.2.0 |
4
|
|
|
|
|
|
|
## Copyright(c) 2022 DEGUEST Pte. Ltd. |
5
|
|
|
|
|
|
|
## Author: Jacques Deguest <jack@deguest.jp> |
6
|
|
|
|
|
|
|
## Created 2022/01/02 |
7
|
|
|
|
|
|
|
## Modified 2022/09/18 |
8
|
|
|
|
|
|
|
## All rights reserved |
9
|
|
|
|
|
|
|
## |
10
|
|
|
|
|
|
|
## |
11
|
|
|
|
|
|
|
## This program is free software; you can redistribute it and/or modify it |
12
|
|
|
|
|
|
|
## under the same terms as Perl itself. |
13
|
|
|
|
|
|
|
##---------------------------------------------------------------------------- |
14
|
|
|
|
|
|
|
package HTML::Object::DOM::TreeWalker; |
15
|
|
|
|
|
|
|
BEGIN |
16
|
|
|
|
|
|
|
{ |
17
|
2
|
|
|
2
|
|
3457
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
59
|
|
18
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
59
|
|
19
|
2
|
|
|
2
|
|
11
|
use parent qw( HTML::Object::DOM::NodeIteratorShared ); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
11
|
|
20
|
2
|
|
|
2
|
|
165
|
use vars qw( $VERSION ); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
94
|
|
21
|
|
|
|
|
|
|
# To import its constants |
22
|
2
|
|
|
2
|
|
14
|
use HTML::Object::DOM::Node; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
17
|
|
23
|
2
|
|
|
2
|
|
677
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
12
|
|
24
|
2
|
|
|
2
|
|
826
|
our $VERSION = 'v0.2.0'; |
25
|
|
|
|
|
|
|
}; |
26
|
|
|
|
|
|
|
|
27
|
2
|
|
|
2
|
|
13
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
39
|
|
28
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
1786
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Note: method init is inherited |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Note: property currentNode |
33
|
|
|
|
|
|
|
sub currentNode |
34
|
|
|
|
|
|
|
{ |
35
|
8
|
|
|
8
|
1
|
1125
|
my $self = shift( @_ ); |
36
|
8
|
|
|
|
|
25
|
my $elems = $self->_elements; |
37
|
8
|
|
|
|
|
7123
|
return( $elems->index( $self->{_pos} ) ); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Note: property expandEntityReferences read-only is inherited |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Note: property filter read-only is inherited |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub firstChild |
45
|
|
|
|
|
|
|
{ |
46
|
3
|
|
|
3
|
1
|
40983
|
my $self = shift( @_ ); |
47
|
3
|
|
|
|
|
10
|
my $elems = $self->_elements; |
48
|
3
|
|
|
|
|
2528
|
my $this = $elems->index( $self->{_pos} ); |
49
|
3
|
50
|
|
|
|
186
|
return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) ); |
50
|
|
|
|
|
|
|
# Only elements can have children |
51
|
3
|
50
|
|
|
|
15
|
return if( $this->nodeType != ELEMENT_NODE ); |
52
|
3
|
|
|
|
|
8
|
my $node; |
53
|
3
|
|
|
|
|
6
|
my $tmpPos = 0; |
54
|
3
|
|
|
|
|
17
|
my $children = $this->children; |
55
|
3
|
|
|
|
|
198
|
my $size = $children->size; |
56
|
|
|
|
|
|
|
# We seek the first appropriate first child depending on the value of $whatToShow |
57
|
3
|
|
|
|
|
119348
|
while(1) |
58
|
|
|
|
|
|
|
{ |
59
|
5
|
50
|
|
|
|
21
|
last if( $tmpPos > $size ); |
60
|
5
|
|
|
|
|
775
|
my $tmpNode = $children->index( $tmpPos ); |
61
|
5
|
|
|
|
|
332
|
my $rv = $self->_check_element( $tmpNode ); |
62
|
5
|
50
|
|
|
|
272
|
last if( !defined( $rv ) ); |
63
|
5
|
100
|
|
|
|
263
|
$tmpPos++, next if( !$rv ); |
64
|
3
|
|
|
|
|
5
|
$node = $tmpNode; |
65
|
3
|
|
|
|
|
7
|
last; |
66
|
|
|
|
|
|
|
} |
67
|
3
|
50
|
|
|
|
9
|
return if( !defined( $node ) ); |
68
|
3
|
|
|
|
|
16
|
my $pos = $elems->pos( $node ); |
69
|
|
|
|
|
|
|
# Amazingly enough, the first child of this node cannot be found among the list of all nodes in this tree! |
70
|
3
|
50
|
|
|
|
83
|
return if( !defined( $pos ) ); |
71
|
3
|
|
|
|
|
9
|
$self->{_pos} = $pos; |
72
|
3
|
|
|
|
|
16
|
return( $node ); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub lastChild |
76
|
|
|
|
|
|
|
{ |
77
|
1
|
|
|
1
|
1
|
24
|
my $self = shift( @_ ); |
78
|
1
|
|
|
|
|
5
|
my $elems = $self->_elements; |
79
|
1
|
|
|
|
|
866
|
my $this = $elems->index( $self->{_pos} ); |
80
|
1
|
50
|
|
|
|
62
|
return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) ); |
81
|
|
|
|
|
|
|
# Only elements can have children |
82
|
1
|
50
|
|
|
|
6
|
return if( $this->nodeType != ELEMENT_NODE ); |
83
|
1
|
|
|
|
|
3
|
my $node; |
84
|
1
|
|
|
|
|
4
|
my $children = $this->children; |
85
|
1
|
|
|
|
|
68
|
my $tmpPos = $children->size; |
86
|
|
|
|
|
|
|
# We seek the first appropriate first child depending on the value of $whatToShow |
87
|
1
|
|
|
|
|
39612
|
while(1) |
88
|
|
|
|
|
|
|
{ |
89
|
2
|
50
|
|
|
|
15
|
last if( $tmpPos < 0 ); |
90
|
2
|
|
|
|
|
259
|
my $tmpNode = $children->index( $tmpPos ); |
91
|
2
|
|
|
|
|
137
|
my $rv = $self->_check_element( $tmpNode ); |
92
|
2
|
50
|
|
|
|
138
|
last if( !defined( $rv ) ); |
93
|
2
|
100
|
|
|
|
139
|
$tmpPos--, next if( !$rv ); |
94
|
1
|
|
|
|
|
2
|
$node = $tmpNode; |
95
|
1
|
|
|
|
|
3
|
last; |
96
|
|
|
|
|
|
|
} |
97
|
1
|
50
|
|
|
|
4
|
return if( !defined( $node ) ); |
98
|
1
|
|
|
|
|
7
|
my $pos = $elems->pos( $node ); |
99
|
|
|
|
|
|
|
# Amazingly enough, the last child of this node cannot be found among the list of all nodes in this tree! |
100
|
1
|
50
|
|
|
|
66
|
return if( !defined( $pos ) ); |
101
|
1
|
|
|
|
|
3
|
$self->{_pos} = $pos; |
102
|
1
|
|
|
|
|
5
|
return( $node ); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# Note: method nextNode is inherited |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub nextSibling |
108
|
|
|
|
|
|
|
{ |
109
|
1
|
|
|
1
|
1
|
1798
|
my $self = shift( @_ ); |
110
|
1
|
|
|
|
|
4
|
my $elems = $self->_elements; |
111
|
1
|
|
|
|
|
853
|
my $this = $elems->index( $self->{_pos} ); |
112
|
1
|
50
|
|
|
|
61
|
return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) ); |
113
|
|
|
|
|
|
|
# No need to bother if our current node is the root node. Its siblings are not part of the tree |
114
|
1
|
50
|
|
|
|
6
|
return if( $this eq $self->root ); |
115
|
|
|
|
|
|
|
# Get all next siblings |
116
|
0
|
|
|
|
|
0
|
my $node; |
117
|
0
|
|
|
|
|
0
|
my $siblings = $this->right; |
118
|
0
|
|
|
|
|
0
|
my $size = $siblings->size; |
119
|
0
|
|
|
|
|
0
|
my $tmpPos = 0; |
120
|
|
|
|
|
|
|
# We seek the first appropriate sibling depending on the value of $whatToShow |
121
|
0
|
|
|
|
|
0
|
while(1) |
122
|
|
|
|
|
|
|
{ |
123
|
0
|
0
|
|
|
|
0
|
last if( $tmpPos > $size ); |
124
|
0
|
|
|
|
|
0
|
my $tmpNode = $siblings->index( $tmpPos ); |
125
|
0
|
|
|
|
|
0
|
my $rv = $self->_check_element( $tmpNode ); |
126
|
0
|
0
|
|
|
|
0
|
last if( !defined( $rv ) ); |
127
|
0
|
0
|
|
|
|
0
|
$tmpPos++, next if( !$rv ); |
128
|
0
|
|
|
|
|
0
|
$node = $tmpNode; |
129
|
0
|
|
|
|
|
0
|
last; |
130
|
|
|
|
|
|
|
} |
131
|
0
|
0
|
|
|
|
0
|
return if( !defined( $node ) ); |
132
|
0
|
|
|
|
|
0
|
my $pos = $elems->pos( $node ); |
133
|
|
|
|
|
|
|
# Amazingly enough, the next sibling of this node cannot be found among the list of all nodes in this tree! |
134
|
0
|
0
|
|
|
|
0
|
return if( !defined( $pos ) ); |
135
|
0
|
|
|
|
|
0
|
$self->{_pos} = $pos; |
136
|
0
|
|
|
|
|
0
|
return( $node ); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub parentNode |
140
|
|
|
|
|
|
|
{ |
141
|
1
|
|
|
1
|
1
|
1435
|
my $self = shift( @_ ); |
142
|
1
|
|
|
|
|
5
|
my $elems = $self->_elements; |
143
|
1
|
|
|
|
|
912
|
my $this = $elems->index( $self->{_pos} ); |
144
|
1
|
50
|
|
|
|
60
|
return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) ); |
145
|
1
|
|
|
|
|
5
|
my $root = $self->root; |
146
|
|
|
|
|
|
|
# We should not be here in the first place. |
147
|
1
|
50
|
|
|
|
838
|
return if( $this eq $root ); |
148
|
1
|
|
|
|
|
5
|
my $node = $this->parent; |
149
|
1
|
50
|
|
|
|
25
|
return if( !defined( $node ) ); |
150
|
|
|
|
|
|
|
# We cannot go up to the root, but only within the root itself. |
151
|
|
|
|
|
|
|
# return if( $node eq $root ); |
152
|
1
|
|
|
|
|
7
|
my $pos = $elems->pos( $node ); |
153
|
|
|
|
|
|
|
# Amazingly enough, the last child of this node cannot be found among the list of all nodes in this tree! |
154
|
1
|
50
|
|
|
|
20
|
return if( !defined( $pos ) ); |
155
|
1
|
|
|
|
|
3
|
$self->{_pos} = $pos; |
156
|
1
|
|
|
|
|
4
|
return( $node ); |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# Note: method previousNode is inherited |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub previousSibling |
162
|
|
|
|
|
|
|
{ |
163
|
1
|
|
|
1
|
1
|
544
|
my $self = shift( @_ ); |
164
|
1
|
|
|
|
|
5
|
my $elems = $self->_elements; |
165
|
1
|
|
|
|
|
854
|
my $this = $elems->index( $self->{_pos} ); |
166
|
1
|
50
|
|
|
|
61
|
return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) ); |
167
|
|
|
|
|
|
|
# No need to bother if our current node is the root node. Its siblings are not part of the tree |
168
|
1
|
50
|
|
|
|
4
|
return if( $this eq $self->root ); |
169
|
|
|
|
|
|
|
# Get all previous siblings |
170
|
0
|
|
|
|
|
|
my $node; |
171
|
0
|
|
|
|
|
|
my $siblings = $this->left; |
172
|
0
|
|
|
|
|
|
my $tmpPos = $siblings->size; |
173
|
|
|
|
|
|
|
# We seek the first appropriate sibling depending on the value of $whatToShow |
174
|
0
|
|
|
|
|
|
while(1) |
175
|
|
|
|
|
|
|
{ |
176
|
0
|
0
|
|
|
|
|
last if( $tmpPos < 0 ); |
177
|
0
|
|
|
|
|
|
my $tmpNode = $siblings->index( $tmpPos ); |
178
|
0
|
|
|
|
|
|
my $rv = $self->_check_element( $tmpNode ); |
179
|
0
|
0
|
|
|
|
|
last if( !defined( $rv ) ); |
180
|
0
|
0
|
|
|
|
|
$tmpPos--, next if( !$rv ); |
181
|
0
|
|
|
|
|
|
$node = $tmpNode; |
182
|
0
|
|
|
|
|
|
last; |
183
|
|
|
|
|
|
|
} |
184
|
0
|
0
|
|
|
|
|
return if( !defined( $node ) ); |
185
|
0
|
|
|
|
|
|
my $pos = $elems->pos( $node ); |
186
|
|
|
|
|
|
|
# Amazingly enough, the previous sibling of this node cannot be found among the list of all nodes in this tree! |
187
|
0
|
0
|
|
|
|
|
return if( !defined( $pos ) ); |
188
|
0
|
|
|
|
|
|
$self->{_pos} = $pos; |
189
|
0
|
|
|
|
|
|
return( $node ); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# Note: property root read-only is inherited |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# Note: property whatToShow read-only is inherited |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
1; |
197
|
|
|
|
|
|
|
# NOTE: POD |
198
|
|
|
|
|
|
|
__END__ |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=encoding utf-8 |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head1 NAME |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
HTML::Object::DOM::TreeWalker - HTML Object DOM Tree Walker Class |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head1 SYNOPSIS |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
With just one argument, this default to search for everything (C<SHOW_ALL>) and to use the default filter, which always returns C<FILTER_ACCEPT> |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
use HTML::Object::DOM::TreeWalker; |
211
|
|
|
|
|
|
|
my $walker = HTML::Object::DOM::TreeWalker->new( $doc->body ) || |
212
|
|
|
|
|
|
|
die( HTML::Object::DOM::TreeWalker->error, "\n" ); |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Or, passing an anonymous subroutine as the filter |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
my $nodes = HTML::Object::DOM::TreeWalker->new( |
217
|
|
|
|
|
|
|
$root_node, |
218
|
|
|
|
|
|
|
$what_to_show_bit, |
219
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); } |
220
|
|
|
|
|
|
|
) || die( HTML::Object::DOM::TreeWalker->error, "\n" ); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Or, passing an hash reference with a property 'acceptNode' whose value is an anonymous subroutine, as the filter |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
my $nodes = HTML::Object::DOM::TreeWalker->new( |
225
|
|
|
|
|
|
|
$root_node, |
226
|
|
|
|
|
|
|
$what_to_show_bit, |
227
|
|
|
|
|
|
|
{ |
228
|
|
|
|
|
|
|
acceptNode => sub{ return( FILTER_ACCEPT ); } |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
) || die( HTML::Object::DOM::TreeWalker->error, "\n" ); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Or, passing an object that implements the method "acceptNode" |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
my $nodes = HTML::Object::DOM::TreeWalker->new( |
235
|
|
|
|
|
|
|
$root_node, |
236
|
|
|
|
|
|
|
$what_to_show_bit, |
237
|
|
|
|
|
|
|
# This object must implement the acceptNode method |
238
|
|
|
|
|
|
|
My::Customer::NodeFilter->new |
239
|
|
|
|
|
|
|
) || die( HTML::Object::DOM::TreeWalker->error, "\n" ); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
There is also L<HTML::Object::DOM::TreeWalker>, which performs a somewhat similar function. |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Choose L<HTML::Object::DOM::NodeIterator> when you only need a simple iterator to filter and browse the selected nodes, and choose L<HTML::Object::DOM::TreeWalker> when you need to access to the node and its siblings. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=head1 VERSION |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
v0.2.0 |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=head1 DESCRIPTION |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
The C<TreeWalker> object represents the nodes of a document subtree and a position within them. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=head1 PROPERTIES |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head2 currentNode |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
Is the L<Node|HTML::Object::DOM::Node> on which the C<TreeWalker> is currently pointing at. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
Example: |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
262
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
263
|
|
|
|
|
|
|
$doc->body, |
264
|
|
|
|
|
|
|
SHOW_ELEMENT, |
265
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); } |
266
|
|
|
|
|
|
|
); |
267
|
|
|
|
|
|
|
my $root = $treeWalker->currentNode; # the root element as it is the first element! |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/currentNode> |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head2 expandEntityReferences |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set whatever boolean value you want. |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Under JavaScript, this is a boolean value indicating if, when discarding an C<EntityReference> its whole sub-tree must be discarded at the same time. |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
Example: |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
280
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
281
|
|
|
|
|
|
|
$doc->body, |
282
|
|
|
|
|
|
|
SHOW_ELEMENT, |
283
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
284
|
|
|
|
|
|
|
# or |
285
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
286
|
|
|
|
|
|
|
); |
287
|
|
|
|
|
|
|
my $expand = $treeWalker->expandEntityReferences; |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/expandEntityReferences> |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head2 filter |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set it to a new L<HTML::Object::DOM::NodeFilter> object you want, even after object instantiation. |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Returns a L<HTML::Object::DOM::NodeFilter> used to select the relevant nodes. |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Example: |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
300
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
301
|
|
|
|
|
|
|
$doc->body, |
302
|
|
|
|
|
|
|
SHOW_ELEMENT, |
303
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
304
|
|
|
|
|
|
|
# or |
305
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
306
|
|
|
|
|
|
|
); |
307
|
|
|
|
|
|
|
my $nodeFilter = $treeWalker->filter; |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/filter> |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 root |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set whatever L<node value|HTML::Object::DOM::Node> you want. |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Returns a L<Node|HTML::Object::DOM::Node> representing the root node as specified when the C<TreeWalker> was created. |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
Example: |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
320
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
321
|
|
|
|
|
|
|
$doc->body, |
322
|
|
|
|
|
|
|
SHOW_ELEMENT, |
323
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
324
|
|
|
|
|
|
|
# or |
325
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
326
|
|
|
|
|
|
|
); |
327
|
|
|
|
|
|
|
my $root = $treeWalker->root; # $doc->body in this case |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/root> |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 whatToShow |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set whatever number value you want. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Returns an unsigned long being a bitmask made of L<constants|/CONSTANTS> describing the types of L<Node|HTML::Object::DOM::Node> that must to be presented. Non-matching nodes are skipped, but their children may be included, if relevant. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
Possible constant values (exported by L<HTML::Object::DOM::NodeFilter>) are: |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=over 4 |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item SHOW_ALL (4294967295) |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Shows all nodes. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item SHOW_ELEMENT (1) |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Shows Element nodes. |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=item SHOW_ATTRIBUTE (2) |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Shows attribute L<Attribute nodes|HTML::Object::DOM::Attribute>. This is meaningful only when creating a C<TreeWalker> with an L<Attribute node|HTML::Object::DOM::Attribute> as its root; in this case, it means that the L<attribute node|HTML::Object::DOM::Attribute> will appear in the first position of the iteration or traversal. Since attributes are never children of other L<nodes|HTML::Object::DOM::Node>, they do not appear when traversing over the document tree. |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=item SHOW_TEXT (4) |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
Shows Text nodes. |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
Example: |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
360
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
361
|
|
|
|
|
|
|
$doc->body, |
362
|
|
|
|
|
|
|
( SHOW_ELEMENT | SHOW_COMMENT | SHOW_TEXT ), |
363
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
364
|
|
|
|
|
|
|
# or |
365
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
366
|
|
|
|
|
|
|
); |
367
|
|
|
|
|
|
|
if( ( $treeWalker->whatToShow & SHOW_ALL ) || |
368
|
|
|
|
|
|
|
( $treeWalker->whatToShow & SHOW_COMMENT ) ) |
369
|
|
|
|
|
|
|
{ |
370
|
|
|
|
|
|
|
# $treeWalker will show comments |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=item SHOW_CDATA_SECTION (8) |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Will always returns nothing, because there is no support for xml documents. |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=item SHOW_ENTITY_REFERENCE (16) |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Legacy, no more used. |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=item SHOW_ENTITY (32) |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Legacy, no more used. |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=item SHOW_PROCESSING_INSTRUCTION (64) |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
Shows ProcessingInstruction nodes. |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=item SHOW_COMMENT (128) |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
Shows Comment nodes. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=item SHOW_DOCUMENT (256) |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Shows Document nodes |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=item SHOW_DOCUMENT_TYPE (512) |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Shows C<DocumentType> nodes |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=item SHOW_DOCUMENT_FRAGMENT (1024) |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
Shows L<HTML::Object::DOM::DocumentFragment> nodes. |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=item SHOW_NOTATION (2048) |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Legacy, no more used. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item SHOW_SPACE (4096) |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Show Space nodes. This is a non-standard extension under this perl framework. |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=back |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/whatToShow> |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=head1 METHODS |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=head2 firstChild |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
Moves the current L<Node|HTML::Object::DOM::Node> to the first visible child of the current node, and returns the found child. It also moves the current node to this child. If no such child exists, returns C<undef> and the current node is not changed. |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Example: |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
426
|
|
|
|
|
|
|
$doc->body, |
427
|
|
|
|
|
|
|
SHOW_ELEMENT, |
428
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
429
|
|
|
|
|
|
|
); |
430
|
|
|
|
|
|
|
my $node = $treeWalker->firstChild(); # returns the first child of the root element, or null if none |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/firstChild> |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=head2 lastChild |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
Moves the current L<Node|HTML::Object::DOM::Node> to the last visible child of the current node, and returns the found child. It also moves the current node to this child. If no such child exists, C<undef> is returned and the current node is not changed. |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Example: |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
441
|
|
|
|
|
|
|
$doc->body, |
442
|
|
|
|
|
|
|
SHOW_ELEMENT, |
443
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
444
|
|
|
|
|
|
|
); |
445
|
|
|
|
|
|
|
my $node = $treeWalker->lastChild(); # returns the last visible child of the root element |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/lastChild> |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=head2 nextNode |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
Moves the current L<Node|HTML::Object::DOM::Node> to the next visible node in the document order, and returns the found node. It also moves the current node to this one. If no such node exists, returns C<undef> and the current node is not changed. |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Example: |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
456
|
|
|
|
|
|
|
$doc->body, |
457
|
|
|
|
|
|
|
SHOW_ELEMENT, |
458
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
459
|
|
|
|
|
|
|
); |
460
|
|
|
|
|
|
|
my $node = $treeWalker->nextNode(); # returns the first child of root, as it is the next $node in document order |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/nextNode> |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=head2 nextSibling |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Moves the current L<Node|HTML::Object::DOM::Node> to its next sibling, if any, and returns the found sibling. If there is no such node, C<undef> is returned and the current node is not changed. |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Example: |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
471
|
|
|
|
|
|
|
$doc->body, |
472
|
|
|
|
|
|
|
SHOW_ELEMENT, |
473
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
474
|
|
|
|
|
|
|
); |
475
|
|
|
|
|
|
|
$treeWalker->firstChild(); |
476
|
|
|
|
|
|
|
my $node = $treeWalker->nextSibling(); # returns null if the first child of the root element has no sibling |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/nextSibling> |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=head2 parentNode |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
Moves the current L<Node|HTML::Object::DOM::Node> to the first visible ancestor node in the document order, and returns the found node. It also moves the current node to this one. If no such node exists, or if it is before that the root node defined at the object construction, returns C<undef> and the current node is not changed. |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
Example: |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
487
|
|
|
|
|
|
|
$doc->body, |
488
|
|
|
|
|
|
|
SHOW_ELEMENT, |
489
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
490
|
|
|
|
|
|
|
); |
491
|
|
|
|
|
|
|
my $node = $treeWalker->parentNode(); # returns null as there is no parent |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/parentNode> |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head2 previousNode |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
Moves the current L<Node|HTML::Object::DOM::Node> to the previous visible node in the document order, and returns the found node. It also moves the current node to this one. If no such node exists, or if it is before that the root node defined at the object construction, returns C<undef> and the current node is not changed. |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Example: |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
502
|
|
|
|
|
|
|
$doc->body, |
503
|
|
|
|
|
|
|
SHOW_ELEMENT, |
504
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
505
|
|
|
|
|
|
|
); |
506
|
|
|
|
|
|
|
my $node = $treeWalker->previousNode(); # returns null as there is no parent |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/previousNode> |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head2 previousSibling |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Moves the current L<Node|HTML::Object::DOM::Node> to its previous sibling, if any, and returns the found sibling. If there is no such node, return C<undef> and the current node is not changed. |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Example: |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
my $treeWalker = $doc->createTreeWalker( |
517
|
|
|
|
|
|
|
$doc->body, |
518
|
|
|
|
|
|
|
SHOW_ELEMENT, |
519
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
520
|
|
|
|
|
|
|
); |
521
|
|
|
|
|
|
|
my $node = $treeWalker->previousSibling(); # returns null as there is no previous sibiling |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/previousSibling> |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=head1 AUTHOR |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
Jacques Deguest E<lt>F<jack@deguest.jp>E<gt> |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=head1 SEE ALSO |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker> |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Copyright(c) 2022 DEGUEST Pte. Ltd. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
All rights reserved |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=cut |