line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##---------------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
## HTML Object - ~/lib/HTML/Object/DOM/NodeIteratorShared.pm |
3
|
|
|
|
|
|
|
## Version v0.2.0 |
4
|
|
|
|
|
|
|
## Copyright(c) 2021 DEGUEST Pte. Ltd. |
5
|
|
|
|
|
|
|
## Author: Jacques Deguest <jack@deguest.jp> |
6
|
|
|
|
|
|
|
## Created 2021/12/20 |
7
|
|
|
|
|
|
|
## Modified 2022/09/18 |
8
|
|
|
|
|
|
|
## All rights reserved |
9
|
|
|
|
|
|
|
## |
10
|
|
|
|
|
|
|
## |
11
|
|
|
|
|
|
|
## This program is free software; you can redistribute it and/or modify it |
12
|
|
|
|
|
|
|
## under the same terms as Perl itself. |
13
|
|
|
|
|
|
|
##---------------------------------------------------------------------------- |
14
|
|
|
|
|
|
|
package HTML::Object::DOM::NodeIteratorShared; |
15
|
|
|
|
|
|
|
BEGIN |
16
|
|
|
|
|
|
|
{ |
17
|
3
|
|
|
3
|
|
1312
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
92
|
|
18
|
3
|
|
|
3
|
|
15
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
94
|
|
19
|
3
|
|
|
3
|
|
15
|
use parent qw( Module::Generic ); |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
18
|
|
20
|
3
|
|
|
3
|
|
185
|
use vars qw( $VERSION ); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
123
|
|
21
|
|
|
|
|
|
|
# To import its constants |
22
|
3
|
|
|
3
|
|
21
|
use HTML::Object::DOM::Node; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
47
|
|
23
|
3
|
|
|
3
|
|
1848
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
|
3
|
|
|
|
|
22
|
|
|
3
|
|
|
|
|
24
|
|
24
|
3
|
|
|
3
|
|
1278
|
our $VERSION = 'v0.2.0'; |
25
|
|
|
|
|
|
|
}; |
26
|
|
|
|
|
|
|
|
27
|
3
|
|
|
3
|
|
19
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
84
|
|
28
|
3
|
|
|
3
|
|
30
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
4176
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub init |
31
|
|
|
|
|
|
|
{ |
32
|
10
|
|
|
10
|
1
|
901
|
my $self = shift( @_ ); |
33
|
10
|
50
|
|
|
|
58
|
return( $self->error({ |
34
|
|
|
|
|
|
|
message => sprintf( "Expected at least 1 arguments, but only got %d.", scalar( @_ ) ), |
35
|
|
|
|
|
|
|
class => 'HTML::Object::SyntaxError', |
36
|
|
|
|
|
|
|
}) ) if( scalar( @_ ) < 1 ); |
37
|
10
|
|
|
|
|
28
|
my $root = shift( @_ ); |
38
|
10
|
|
|
|
|
27
|
my $what = shift( @_ ); |
39
|
10
|
|
|
|
|
27
|
my( $filterDef, $filter ); |
40
|
10
|
100
|
|
|
|
55
|
$filterDef = shift( @_ ) if( ref( $_[0] ) eq 'CODE' ); |
41
|
10
|
|
|
|
|
72
|
my $opts = $self->_get_args_as_hash( @_ ); |
42
|
10
|
50
|
|
|
|
802
|
return( $self->error({ |
43
|
|
|
|
|
|
|
message => "Root node provided is not a HTML::Object::DOM::Node object.", |
44
|
|
|
|
|
|
|
class => 'HTML::Object::TypeError', |
45
|
|
|
|
|
|
|
}) ) if( !$self->_is_a( $root => 'HTML::Object::DOM::Node' ) ); |
46
|
10
|
50
|
|
|
|
446
|
$what = HTML::Object::DOM::NodeFilter::SHOW_ALL if( !defined( $what ) ); |
47
|
|
|
|
|
|
|
# Default value |
48
|
10
|
100
|
|
|
|
47
|
if( !defined( $filterDef ) ) |
49
|
|
|
|
|
|
|
{ |
50
|
6
|
|
|
|
|
68
|
$filterDef = HTML::Object::DOM::NodeFilter->new; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
10
|
50
|
|
|
|
99
|
return( $self->error({ |
54
|
|
|
|
|
|
|
message => "Value provided for what to show is not an integer.", |
55
|
|
|
|
|
|
|
class => 'HTML::Object::TypeError', |
56
|
|
|
|
|
|
|
}) ) if( !$self->_is_integer( $what ) ); |
57
|
10
|
100
|
|
|
|
240
|
if( scalar( keys( %$opts ) ) ) |
58
|
|
|
|
|
|
|
{ |
59
|
|
|
|
|
|
|
return( $self->error({ |
60
|
|
|
|
|
|
|
message => "Filter parameter provided is an hash reference, but it does not have a \"acceptNode\" property or that property is not a code reference.", |
61
|
|
|
|
|
|
|
class => 'HTML::Object::TypeError', |
62
|
4
|
0
|
0
|
|
|
20
|
}) ) if( !defined( $filterDef ) && ( !exists( $opts->{acceptNode} ) || ref( $opts->{acceptNode} ) ne 'CODE' ) ); |
|
|
|
33
|
|
|
|
|
63
|
4
|
50
|
33
|
|
|
27
|
$filterDef = CORE::delete( $opts->{acceptNode} ) if( CORE::exists( $opts->{acceptNode} ) && ref( $opts->{acceptNode} ) eq 'CODE' ); |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
10
|
100
|
|
|
|
42
|
if( $self->_is_object( $filterDef ) ) |
67
|
|
|
|
|
|
|
{ |
68
|
6
|
50
|
|
|
|
114
|
return( $self->error({ |
69
|
|
|
|
|
|
|
message => "Object provided does not implement the \"acceptNode\" method.", |
70
|
|
|
|
|
|
|
class => 'HTML::Object::TypeError', |
71
|
|
|
|
|
|
|
}) ) if( !$filterDef->can( 'acceptNode' ) ); |
72
|
6
|
|
|
7
|
|
48
|
$filter = sub{ return( $filterDef->acceptNode( @_ ) ); }; |
|
7
|
|
|
|
|
34
|
|
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
else |
75
|
|
|
|
|
|
|
{ |
76
|
4
|
50
|
|
|
|
74
|
return( $self->error({ |
77
|
|
|
|
|
|
|
message => "Filter parameter provided is not a code reference.", |
78
|
|
|
|
|
|
|
class => 'HTML::Object::TypeError', |
79
|
|
|
|
|
|
|
}) ) if( ref( $filterDef ) ne 'CODE' ); |
80
|
4
|
|
|
|
|
9
|
$filter = $filterDef; |
81
|
|
|
|
|
|
|
} |
82
|
10
|
|
|
|
|
228
|
$self->{_init_strict_use_sub} = 1; |
83
|
10
|
50
|
|
|
|
74
|
$self->SUPER::init( @_ ) || return( $self->pass_error ); |
84
|
10
|
|
|
|
|
977
|
$self->{children} = []; |
85
|
10
|
|
|
|
|
41
|
$self->{pointerbeforereferencenode} = 1; |
86
|
10
|
|
|
|
|
57
|
$self->root( $root ); |
87
|
10
|
|
|
|
|
8571
|
$self->whatToShow( $what ); |
88
|
10
|
|
|
|
|
371331
|
$self->filter( $filter ); |
89
|
10
|
|
|
|
|
8500
|
$self->{_parent} = $root; |
90
|
|
|
|
|
|
|
# This is the position of our cursor in the flatten tree represented by an array of all elements |
91
|
10
|
|
|
|
|
45
|
$self->{_pos} = 0; |
92
|
10
|
|
|
|
|
62
|
my $elems = $self->_flatten; |
93
|
10
|
|
|
|
|
118
|
$self->_elements( $elems ); |
94
|
10
|
|
|
|
|
8897
|
return( $self ); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# Note: property expandEntityReferences read-only |
98
|
0
|
|
|
0
|
1
|
0
|
sub expandEntityReferences : lvalue { return( shift->_set_get_boolean( 'expandentityreferences', @_ ) ); } |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Note: property filter read-only |
101
|
64
|
|
|
64
|
1
|
279
|
sub filter : lvalue { return( shift->_set_get_code( 'filter', @_ ) ); } |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub nextNode |
104
|
|
|
|
|
|
|
{ |
105
|
40
|
|
|
40
|
1
|
49737
|
my $self = shift( @_ ); |
106
|
40
|
|
|
|
|
122
|
my $elems = $self->_elements; |
107
|
|
|
|
|
|
|
# Would be -1 if empty |
108
|
40
|
|
|
|
|
31242
|
my $size = $elems->size; |
109
|
|
|
|
|
|
|
# We reached the end of this array |
110
|
40
|
50
|
|
|
|
1445241
|
return if( $self->{_pos} >= $size ); |
111
|
40
|
|
|
|
|
6271
|
my $whattoshow = $self->whatToShow; |
112
|
40
|
50
|
|
|
|
31340
|
return( $self->error( "Somehow the bitwise value of what to show is not an integer!" ) ) if( !$self->_is_integer( $whattoshow ) ); |
113
|
40
|
|
|
|
|
810
|
my $filter = $self->filter; |
114
|
40
|
|
|
|
|
29955
|
my $class = ref( $self ); |
115
|
|
|
|
|
|
|
# Somehow it has been changed maybe? End our iteration |
116
|
40
|
50
|
|
|
|
148
|
if( ref( $filter ) ne 'CODE' ) |
117
|
|
|
|
|
|
|
{ |
118
|
0
|
|
|
|
|
0
|
$self->{_pos} = $size; |
119
|
0
|
|
|
|
|
0
|
return( $self->error({ |
120
|
|
|
|
|
|
|
message => "Filter is not a code reference!", |
121
|
|
|
|
|
|
|
class => 'HTML::Object::TypeError', |
122
|
|
|
|
|
|
|
}) ); |
123
|
|
|
|
|
|
|
} |
124
|
40
|
|
|
|
|
79
|
my $node; |
125
|
40
|
|
|
|
|
102
|
my $tmpPos = $self->{_pos} + 1; |
126
|
40
|
|
|
|
|
63
|
while(1) |
127
|
|
|
|
|
|
|
{ |
128
|
|
|
|
|
|
|
# We reached the end of the array |
129
|
145
|
100
|
|
|
|
9523
|
last if( $tmpPos > $size ); |
130
|
140
|
|
|
|
|
16480
|
my $tmpNode = $elems->index( $tmpPos ); |
131
|
140
|
|
|
|
|
9374
|
my $type = $tmpNode->nodeType; |
132
|
140
|
100
|
|
|
|
445
|
$tmpPos++, next if( !$self->_check_element( $tmpNode ) ); |
133
|
|
|
|
|
|
|
# This is for the pos() method |
134
|
69
|
|
|
|
|
3279
|
$self->{_relative_pos} = $tmpNode->parent->children->pos( $tmpNode ); |
135
|
69
|
|
|
|
|
6938
|
local $_ = $tmpNode; |
136
|
69
|
|
|
|
|
200
|
my $rv = $filter->( $tmpNode ); |
137
|
|
|
|
|
|
|
# Filter should return FILTER_ACCEPT or FILTER_REJECT or FILTER_SKIP |
138
|
69
|
100
|
33
|
|
|
29220
|
$tmpPos++, next if( !defined( $rv ) || $rv == FILTER_REJECT || $rv == FILTER_SKIP ); |
|
|
|
66
|
|
|
|
|
139
|
35
|
|
|
|
|
84
|
$node = $tmpNode; |
140
|
35
|
|
|
|
|
70
|
$self->{_pos} = $tmpPos; |
141
|
35
|
|
|
|
|
71
|
last; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
# Return the node to our caller |
144
|
40
|
|
|
|
|
843
|
return( $node ); |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
0
|
|
|
0
|
1
|
0
|
sub pos { return( shift->{_relative_pos} ); } |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub previousNode |
150
|
|
|
|
|
|
|
{ |
151
|
16
|
|
|
16
|
1
|
29231
|
my $self = shift( @_ ); |
152
|
16
|
|
|
|
|
59
|
my $elems = $self->_elements; |
153
|
|
|
|
|
|
|
# Would be -1 if empty |
154
|
16
|
|
|
|
|
12764
|
my $size = $elems->size; |
155
|
|
|
|
|
|
|
# Already at the beginning |
156
|
16
|
100
|
|
|
|
579696
|
return if( $self->{_pos} <= 0 ); |
157
|
14
|
|
|
|
|
55
|
my $whattoshow = $self->whatToShow; |
158
|
14
|
50
|
|
|
|
11021
|
return( $self->error( "Somehow the bitwise value of what to show is not an integer!" ) ) if( !$self->_is_integer( $whattoshow ) ); |
159
|
14
|
|
|
|
|
291
|
my $filter = $self->filter; |
160
|
14
|
|
|
|
|
10543
|
my $class = ref( $self ); |
161
|
|
|
|
|
|
|
# Somehow it has been changed maybe? End our iteration |
162
|
14
|
50
|
|
|
|
56
|
if( ref( $filter ) ne 'CODE' ) |
163
|
|
|
|
|
|
|
{ |
164
|
0
|
|
|
|
|
0
|
$self->{_pos} = $size; |
165
|
0
|
|
|
|
|
0
|
return( $self->error({ |
166
|
|
|
|
|
|
|
message => "Filter is not a code reference!", |
167
|
|
|
|
|
|
|
class => 'HTML::Object::TypeError', |
168
|
|
|
|
|
|
|
}) ); |
169
|
|
|
|
|
|
|
} |
170
|
14
|
|
|
|
|
24
|
my $node; |
171
|
14
|
|
|
|
|
39
|
my $tmpPos = $self->{_pos} - 1; |
172
|
14
|
|
|
|
|
27
|
while(1) |
173
|
|
|
|
|
|
|
{ |
174
|
|
|
|
|
|
|
# We reached the start of the array |
175
|
44
|
50
|
|
|
|
95
|
last if( $tmpPos < 0 ); |
176
|
44
|
|
|
|
|
152
|
my $tmpNode = $elems->index( $tmpPos ); |
177
|
44
|
|
|
|
|
2786
|
my $type = $tmpNode->nodeType; |
178
|
44
|
50
|
|
|
|
132
|
$tmpPos--, next if( !$self->_check_element( $tmpNode ) ); |
179
|
|
|
|
|
|
|
# This is for the pos() method |
180
|
44
|
|
|
|
|
279
|
$self->{_relative_pos} = $tmpNode->parent->children->pos( $tmpNode ); |
181
|
44
|
|
|
|
|
4369
|
local $_ = $tmpNode; |
182
|
44
|
|
|
|
|
134
|
my $rv = $filter->( $tmpNode ); |
183
|
|
|
|
|
|
|
# Filter should return FILTER_ACCEPT or FILTER_REJECT or FILTER_SKIP |
184
|
44
|
100
|
33
|
|
|
363
|
$tmpPos--, next if( !defined( $rv ) || $rv == FILTER_REJECT || $rv == FILTER_SKIP ); |
|
|
|
66
|
|
|
|
|
185
|
14
|
|
|
|
|
35
|
$node = $tmpNode; |
186
|
|
|
|
|
|
|
# Decrement the position for the next turn |
187
|
14
|
|
|
|
|
31
|
$self->{_pos} = $tmpPos; |
188
|
14
|
|
|
|
|
33
|
last; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
# Return the node to our caller |
191
|
14
|
|
|
|
|
94
|
return( $node ); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# Note: property root read-only |
195
|
25
|
|
|
25
|
1
|
4285
|
sub root : lvalue { return( shift->_set_get_object_lvalue( 'root', 'HTML::Object::DOM::Node', @_ ) ); } |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
# Note: property whatToShow read-only |
198
|
258
|
|
|
258
|
1
|
1061
|
sub whatToShow : lvalue { return( shift->_set_get_number( 'whattoshow', @_ ) ); } |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub _check_element |
201
|
|
|
|
|
|
|
{ |
202
|
191
|
|
|
191
|
|
298
|
my $self = shift( @_ ); |
203
|
191
|
|
50
|
|
|
631
|
my $node = shift( @_ ) || return; |
204
|
191
|
|
|
|
|
399
|
my $type = $node->nodeType; |
205
|
191
|
|
|
|
|
453
|
my $whattoshow = $self->whatToShow; |
206
|
191
|
100
|
|
|
|
181607
|
unless( $whattoshow == SHOW_ALL ) |
207
|
|
|
|
|
|
|
{ |
208
|
98
|
100
|
100
|
|
|
11452
|
if( ( $type == ELEMENT_NODE && !( $whattoshow & SHOW_ELEMENT ) ) || |
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
209
|
|
|
|
|
|
|
( $type == ATTRIBUTE_NODE && !( $whattoshow & SHOW_ATTRIBUTE ) ) || |
210
|
|
|
|
|
|
|
( $type == TEXT_NODE && !( $whattoshow & SHOW_TEXT ) ) || |
211
|
|
|
|
|
|
|
( $type == CDATA_SECTION_NODE && !( $whattoshow & SHOW_CDATA_SECTION ) ) || |
212
|
|
|
|
|
|
|
( $type == PROCESSING_INSTRUCTION_NODE && !( $whattoshow & SHOW_PROCESSING_INSTRUCTION ) ) || |
213
|
|
|
|
|
|
|
( $type == COMMENT_NODE && !( $whattoshow & SHOW_COMMENT ) ) || |
214
|
|
|
|
|
|
|
( $type == DOCUMENT_NODE && !( $whattoshow & SHOW_DOCUMENT ) ) || |
215
|
|
|
|
|
|
|
( $type == DOCUMENT_TYPE_NODE && !( $whattoshow & SHOW_DOCUMENT_TYPE ) ) || |
216
|
|
|
|
|
|
|
( $type == DOCUMENT_FRAGMENT_NODE && !( $whattoshow & SHOW_DOCUMENT_FRAGMENT ) ) || |
217
|
|
|
|
|
|
|
# Notation nodes are deprecated, but we list them here anyway |
218
|
|
|
|
|
|
|
( $type == NOTATION_NODE && !( $whattoshow & SHOW_NOTATION ) ) || |
219
|
|
|
|
|
|
|
# This is a non-standard addition to provide more granularity |
220
|
|
|
|
|
|
|
( $type == SPACE_NODE && !( $whattoshow & SHOW_SPACE ) ) ) |
221
|
|
|
|
|
|
|
{ |
222
|
74
|
|
|
|
|
23626
|
return(0); |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} |
225
|
117
|
|
|
|
|
17807
|
return(1); |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
88
|
|
|
88
|
|
381
|
sub _elements { return( shift->_set_get_array_as_object( '_elements', @_ ) ); } |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub _flatten |
231
|
|
|
|
|
|
|
{ |
232
|
10
|
|
|
10
|
|
32
|
my $self = shift( @_ ); |
233
|
10
|
|
|
|
|
34
|
my $root = $self->root; |
234
|
|
|
|
|
|
|
# Should not happen |
235
|
10
|
50
|
|
|
|
7433
|
return( $self->error( "root element is gone!" ) ) if( !defined( $root ) ); |
236
|
10
|
|
|
|
|
57
|
my $elems = $self->new_array; |
237
|
10
|
|
|
|
|
245
|
my $seen = {}; |
238
|
10
|
|
|
|
|
26
|
my $crawl; |
239
|
|
|
|
|
|
|
$crawl = sub |
240
|
|
|
|
|
|
|
{ |
241
|
250
|
|
|
250
|
|
324
|
my $e = shift( @_ ); |
242
|
250
|
|
|
|
|
591
|
my $addr = $self->_refaddr( $e ); |
243
|
250
|
50
|
|
|
|
1559
|
return if( ++$seen->{ $addr } > 1 ); |
244
|
250
|
|
|
|
|
653
|
my $kids = $e->children; |
245
|
250
|
|
|
|
|
18588
|
foreach my $kid ( @$kids ) |
246
|
|
|
|
|
|
|
{ |
247
|
|
|
|
|
|
|
# Junk somehow although it should not happen |
248
|
240
|
50
|
|
|
|
572
|
next if( !$self->_is_a( $kid => 'HTML::Object::DOM::Node' ) ); |
249
|
240
|
|
|
|
|
7081
|
$elems->push( $kid ); |
250
|
|
|
|
|
|
|
# Drill down... |
251
|
240
|
|
|
|
|
1506
|
$crawl->( $kid ); |
252
|
|
|
|
|
|
|
} |
253
|
10
|
|
|
|
|
87
|
}; |
254
|
10
|
|
|
|
|
64
|
$elems->push( $root ); |
255
|
10
|
|
|
|
|
98
|
$crawl->( $root ); |
256
|
10
|
|
|
|
|
59
|
return( $elems ); |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
0
|
|
|
0
|
|
|
sub _parent { return( shift->_set_get_object_without_init( '_parent', 'HTML::Object::DOM::Node', @_ ) ); } |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
1; |
262
|
|
|
|
|
|
|
# NOTE: POD |
263
|
|
|
|
|
|
|
__END__ |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=encoding utf-8 |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head1 NAME |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
HTML::Object::DOM::NodeIterator - HTML Object DOM Node Iterator Shared Class |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head1 SYNOPSIS |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
With just one argument, this default to search for everything (C<SHOW_ALL>) and to use the default filter, which always returns C<FILTER_ACCEPT> |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeIterator; |
276
|
|
|
|
|
|
|
my $nodes = HTML::Object::DOM::NodeIterator->new( $root_node ) || |
277
|
|
|
|
|
|
|
die( HTML::Object::DOM::NodeIterator->error, "\n" ); |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
Or, passing an anonymous subroutine as the filter |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
my $nodes = HTML::Object::DOM::NodeIterator->new( |
282
|
|
|
|
|
|
|
$root_node, |
283
|
|
|
|
|
|
|
$what_to_show_bit, |
284
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); } |
285
|
|
|
|
|
|
|
) || die( HTML::Object::DOM::NodeIterator->error, "\n" ); |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
Or, passing an hash reference with a property 'acceptNode' whose value is an anonymous subroutine, as the filter |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
my $nodes = HTML::Object::DOM::NodeIterator->new( |
290
|
|
|
|
|
|
|
$root_node, |
291
|
|
|
|
|
|
|
$what_to_show_bit, |
292
|
|
|
|
|
|
|
{ |
293
|
|
|
|
|
|
|
acceptNode => sub{ return( FILTER_ACCEPT ); } |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
) || die( HTML::Object::DOM::NodeIterator->error, "\n" ); |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Or, passing an object that implements the method "acceptNode" |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
my $nodes = HTML::Object::DOM::NodeIterator->new( |
300
|
|
|
|
|
|
|
$root_node, |
301
|
|
|
|
|
|
|
$what_to_show_bit, |
302
|
|
|
|
|
|
|
# This object must implement the acceptNode method |
303
|
|
|
|
|
|
|
My::Customer::NodeFilter->new |
304
|
|
|
|
|
|
|
) || die( HTML::Object::DOM::NodeIterator->error, "\n" ); |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
There is also L<HTML::Object::DOM::TreeWalker>, which performs a somewhat similar function. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
Choose C<NodeIterator> when you only need a simple iterator to filter and browse the selected nodes, and choose L<HTML::Object::DOM::TreeWalker> when you need to access to the node and its siblings. |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head1 VERSION |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
v0.2.0 |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head1 DESCRIPTION |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
This class is to be inherited by either L<HTML::Object::DOM::NodeIterator> or L<HTML::Object::DOM::TreeWalker> and implements basic tree crawling mechanism. |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 PROPERTIES |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 expandEntityReferences |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set whatever boolean value you want. |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
Under JavaScript, this is a boolean value indicating if, when discarding an C<EntityReference> its whole sub-tree must be discarded at the same time. |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Example: |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
329
|
|
|
|
|
|
|
my $nodeIterator = $doc->createNodeIterator( |
330
|
|
|
|
|
|
|
$doc->body, |
331
|
|
|
|
|
|
|
SHOW_ELEMENT, |
332
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
333
|
|
|
|
|
|
|
# or |
334
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
335
|
|
|
|
|
|
|
); |
336
|
|
|
|
|
|
|
my $expand = $nodeIterator->expandEntityReferences; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator/expandEntityReferences> |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=head2 filter |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set it to a new L<HTML::Object::DOM::NodeFilter> object you want, even after object instantiation. |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
Returns a L<HTML::Object::DOM::NodeFilter> used to select the relevant nodes. |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
Example: |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
349
|
|
|
|
|
|
|
my $nodeIterator = $doc->createNodeIterator( |
350
|
|
|
|
|
|
|
$doc->body, |
351
|
|
|
|
|
|
|
SHOW_ELEMENT, |
352
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
353
|
|
|
|
|
|
|
# or |
354
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
355
|
|
|
|
|
|
|
); |
356
|
|
|
|
|
|
|
my $nodeFilter = $nodeIterator->filter; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator/filter> |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=head2 pos |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Read-only. |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
This is a non-standard property, which returns the 0-based position in the array of the anchor element's children. |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
You can poll this to know where the iterator is at. |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
Example: |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
371
|
|
|
|
|
|
|
# You need to first declare $nodeIterator to be able to use it in the callback |
372
|
|
|
|
|
|
|
my $nodeIterator; |
373
|
|
|
|
|
|
|
$nodeIterator = $doc->createNodeIterator( |
374
|
|
|
|
|
|
|
$doc->body, |
375
|
|
|
|
|
|
|
SHOW_ELEMENT, |
376
|
|
|
|
|
|
|
sub |
377
|
|
|
|
|
|
|
{ |
378
|
|
|
|
|
|
|
say "Current position is: ", $nodeIterator->pos ); |
379
|
|
|
|
|
|
|
return( $_->getName eq 'div' ? FILTER_ACCEPT : FILTER_SKIP ); |
380
|
|
|
|
|
|
|
}, |
381
|
|
|
|
|
|
|
); |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head2 root |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set whatever L<node value|HTML::Object::DOM::Node> you want. |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
Returns a L<Node|HTML::Object::DOM::Node> representing the root node as specified when the C<NodeIterator> was created. |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
Example: |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
392
|
|
|
|
|
|
|
my $nodeIterator = $doc->createNodeIterator( |
393
|
|
|
|
|
|
|
$doc->body, |
394
|
|
|
|
|
|
|
SHOW_ELEMENT, |
395
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
396
|
|
|
|
|
|
|
# or |
397
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
398
|
|
|
|
|
|
|
); |
399
|
|
|
|
|
|
|
my $root = $nodeIterator->root; # $doc->body in this case |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
See L<for more information|https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator/root> |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=head2 whatToShow |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Normally this is read-only, but under perl you can set whatever number value you want. |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Returns an unsigned long being a bitmask made of L<constants|/CONSTANTS> describing the types of L<Node|HTML::Object::DOM::Node> that must to be presented. Non-matching nodes are skipped, but their children may be included, if relevant. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Possible constant values (exported by L<HTML::Object::DOM::NodeFilter>) are: |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=over 4 |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item SHOW_ALL (4294967295) |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Shows all nodes. |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item SHOW_ELEMENT (1) |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Shows Element nodes. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item SHOW_ATTRIBUTE (2) |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Shows attribute L<Attribute nodes|HTML::Object::DOM::Attribute>. This is meaningful only when creating a NodeIterator with an L<Attribute node|HTML::Object::DOM::Attribute> as its root; in this case, it means that the L<attribute node|HTML::Object::DOM::Attribute> will appear in the first position of the iteration or traversal. Since attributes are never children of other L<nodes|HTML::Object::DOM::Node>, they do not appear when traversing over the document tree. |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=item SHOW_TEXT (4) |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Shows Text nodes. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
Example: |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
432
|
|
|
|
|
|
|
my $nodeIterator = $doc->createNodeIterator( |
433
|
|
|
|
|
|
|
$doc->body, |
434
|
|
|
|
|
|
|
( SHOW_ELEMENT | SHOW_COMMENT | SHOW_TEXT ), |
435
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
436
|
|
|
|
|
|
|
# or |
437
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
438
|
|
|
|
|
|
|
); |
439
|
|
|
|
|
|
|
if( ( $nodeIterator->whatToShow & SHOW_ALL ) || |
440
|
|
|
|
|
|
|
( $nodeIterator->whatToShow & SHOW_COMMENT ) ) |
441
|
|
|
|
|
|
|
{ |
442
|
|
|
|
|
|
|
# $nodeIterator will show comments |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=item SHOW_CDATA_SECTION (8) |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
Will always returns nothing, because there is no support for xml documents. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=item SHOW_ENTITY_REFERENCE (16) |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
Legacy, no more used. |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=item SHOW_ENTITY (32) |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
Legacy, no more used. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=item SHOW_PROCESSING_INSTRUCTION (64) |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Shows ProcessingInstruction nodes. |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item SHOW_COMMENT (128) |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Shows Comment nodes. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=item SHOW_DOCUMENT (256) |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
Shows Document nodes |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=item SHOW_DOCUMENT_TYPE (512) |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Shows C<DocumentType> nodes |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=item SHOW_DOCUMENT_FRAGMENT (1024) |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
Shows L<HTML::Object::DOM::DocumentFragment> nodes. |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item SHOW_NOTATION (2048) |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
Legacy, no more used. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=item SHOW_SPACE (4096) |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
Show Space nodes. This is a non-standard extension under this perl framework. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
=back |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
See L<for more information|https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator/whatToShow> |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=head2 new |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Provided with a L<root node|HTML::Object::DOM::Node>, an optional bitwise value representing what to show and an optional filter callback and this will return a new node iterator or tree walker depending on the class used. |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head1 METHODS |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=head2 nextNode |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Returns the next L<Node|HTML::Object::DOM::Node> in the document, or C<undef> if there are none. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
Example: |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
504
|
|
|
|
|
|
|
my $nodeIterator = $doc->createNodeIterator( |
505
|
|
|
|
|
|
|
$doc->body, |
506
|
|
|
|
|
|
|
SHOW_ELEMENT, |
507
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
508
|
|
|
|
|
|
|
# or |
509
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
510
|
|
|
|
|
|
|
0 # false; this optional argument is not used any more |
511
|
|
|
|
|
|
|
); |
512
|
|
|
|
|
|
|
my $currentNode = $nodeIterator->nextNode(); # returns the next node |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator/nextNode> |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=head2 previousNode |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Returns the previous L<Node|HTML::Object::DOM::Node> in the document, or C<undef> if there are none. |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
Example: |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
use HTML::Object::DOM::NodeFilter qw( :all ); |
523
|
|
|
|
|
|
|
my $nodeIterator = $doc->createNodeIterator( |
524
|
|
|
|
|
|
|
$doc->body, |
525
|
|
|
|
|
|
|
SHOW_ELEMENT, |
526
|
|
|
|
|
|
|
sub{ return( FILTER_ACCEPT ); }, |
527
|
|
|
|
|
|
|
# or |
528
|
|
|
|
|
|
|
# { acceptNode => sub{ return( FILTER_ACCEPT ); } }, |
529
|
|
|
|
|
|
|
0 # false; this optional argument is not used any more |
530
|
|
|
|
|
|
|
); |
531
|
|
|
|
|
|
|
my $currentNode = $nodeIterator->nextNode(); # returns the next node |
532
|
|
|
|
|
|
|
my $previousNode = $nodeIterator->previousNode(); # same result, since we backtracked to the previous node |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator/previousNode> |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=head1 AUTHOR |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
Jacques Deguest E<lt>F<jack@deguest.jp>E<gt> |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=head1 SEE ALSO |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
L<HTML::Object::DOM::NodeIterator>, L<HTML::Object::DOM::TreeWalker> |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/NodeIterator>, L<StackOverflow topic on NodeIterator|https://stackoverflow.com/questions/7941288/when-to-use-nodeiterator>, L<W3C specifications|https://dom.spec.whatwg.org/#interface-nodeiterator> |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
Copyright(c) 2021 DEGUEST Pte. Ltd. |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
All rights reserved |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=cut |