File Coverage

lib/HTML/Object/DOM/TreeWalker.pm
Criterion Covered Total %
statement 93 126 73.8
branch 24 64 37.5
condition n/a
subroutine 15 15 100.0
pod 6 6 100.0
total 138 211 65.4


line stmt bran cond sub pod time code
1             ##----------------------------------------------------------------------------
2             ## HTML Object - ~/lib/HTML/Object/DOM/TreeWalker.pm
3             ## Version v0.2.0
4             ## Copyright(c) 2022 DEGUEST Pte. Ltd.
5             ## Author: Jacques Deguest <jack@deguest.jp>
6             ## Created 2022/01/02
7             ## Modified 2022/09/18
8             ## All rights reserved
9             ##
10             ##
11             ## This program is free software; you can redistribute it and/or modify it
12             ## under the same terms as Perl itself.
13             ##----------------------------------------------------------------------------
14             package HTML::Object::DOM::TreeWalker;
15             BEGIN
16             {
17 2     2   3354 use strict;
  2         9  
  2         73  
18 2     2   11 use warnings;
  2         4  
  2         59  
19 2     2   23 use parent qw( HTML::Object::DOM::NodeIteratorShared );
  2         4  
  2         11  
20 2     2   133 use vars qw( $VERSION );
  2         11  
  2         80  
21             # To import its constants
22 2     2   10 use HTML::Object::DOM::Node;
  2         8  
  2         17  
23 2     2   697 use HTML::Object::DOM::NodeFilter qw( :all );
  2         5  
  2         35  
24 2     2   782 our $VERSION = 'v0.2.0';
25             };
26              
27 2     2   26 use strict;
  2         7  
  2         39  
28 2     2   12 use warnings;
  2         6  
  2         1825  
29              
30             # Note: method init is inherited
31              
32             # Note: property currentNode
33             sub currentNode
34             {
35 8     8 1 1158 my $self = shift( @_ );
36 8         28 my $elems = $self->_elements;
37 8         7018 return( $elems->index( $self->{_pos} ) );
38             }
39              
40             # Note: property expandEntityReferences read-only is inherited
41              
42             # Note: property filter read-only is inherited
43              
44             sub firstChild
45             {
46 3     3 1 37867 my $self = shift( @_ );
47 3         15 my $elems = $self->_elements;
48 3         2334 my $this = $elems->index( $self->{_pos} );
49 3 50       195 return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) );
50             # Only elements can have children
51 3 50       26 return if( $this->nodeType != ELEMENT_NODE );
52 3         11 my $node;
53 3         7 my $tmpPos = 0;
54 3         15 my $children = $this->children;
55 3         274 my $size = $children->size;
56             # We seek the first appropriate first child depending on the value of $whatToShow
57 3         108661 while(1)
58             {
59 5 50       27 last if( $tmpPos > $size );
60 5         927 my $tmpNode = $children->index( $tmpPos );
61 5         375 my $rv = $self->_check_element( $tmpNode );
62 5 50       277 last if( !defined( $rv ) );
63 5 100       316 $tmpPos++, next if( !$rv );
64 3         11 $node = $tmpNode;
65 3         7 last;
66             }
67 3 50       12 return if( !defined( $node ) );
68 3         20 my $pos = $elems->pos( $node );
69             # Amazingly enough, the first child of this node cannot be found among the list of all nodes in this tree!
70 3 50       105 return if( !defined( $pos ) );
71 3         8 $self->{_pos} = $pos;
72 3         22 return( $node );
73             }
74              
75             sub lastChild
76             {
77 1     1 1 19 my $self = shift( @_ );
78 1         4 my $elems = $self->_elements;
79 1         787 my $this = $elems->index( $self->{_pos} );
80 1 50       57 return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) );
81             # Only elements can have children
82 1 50       4 return if( $this->nodeType != ELEMENT_NODE );
83 1         3 my $node;
84 1         4 my $children = $this->children;
85 1         72 my $tmpPos = $children->size;
86             # We seek the first appropriate first child depending on the value of $whatToShow
87 1         35775 while(1)
88             {
89 2 50       19 last if( $tmpPos < 0 );
90 2         290 my $tmpNode = $children->index( $tmpPos );
91 2         146 my $rv = $self->_check_element( $tmpNode );
92 2 50       132 last if( !defined( $rv ) );
93 2 100       138 $tmpPos--, next if( !$rv );
94 1         6 $node = $tmpNode;
95 1         2 last;
96             }
97 1 50       4 return if( !defined( $node ) );
98 1         4 my $pos = $elems->pos( $node );
99             # Amazingly enough, the last child of this node cannot be found among the list of all nodes in this tree!
100 1 50       69 return if( !defined( $pos ) );
101 1         4 $self->{_pos} = $pos;
102 1         5 return( $node );
103             }
104              
105             # Note: method nextNode is inherited
106              
107             sub nextSibling
108             {
109 1     1 1 1653 my $self = shift( @_ );
110 1         7 my $elems = $self->_elements;
111 1         816 my $this = $elems->index( $self->{_pos} );
112 1 50       75 return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) );
113             # No need to bother if our current node is the root node. Its siblings are not part of the tree
114 1 50       7 return if( $this eq $self->root );
115             # Get all next siblings
116 0         0 my $node;
117 0         0 my $siblings = $this->right;
118 0         0 my $size = $siblings->size;
119 0         0 my $tmpPos = 0;
120             # We seek the first appropriate sibling depending on the value of $whatToShow
121 0         0 while(1)
122             {
123 0 0       0 last if( $tmpPos > $size );
124 0         0 my $tmpNode = $siblings->index( $tmpPos );
125 0         0 my $rv = $self->_check_element( $tmpNode );
126 0 0       0 last if( !defined( $rv ) );
127 0 0       0 $tmpPos++, next if( !$rv );
128 0         0 $node = $tmpNode;
129 0         0 last;
130             }
131 0 0       0 return if( !defined( $node ) );
132 0         0 my $pos = $elems->pos( $node );
133             # Amazingly enough, the next sibling of this node cannot be found among the list of all nodes in this tree!
134 0 0       0 return if( !defined( $pos ) );
135 0         0 $self->{_pos} = $pos;
136 0         0 return( $node );
137             }
138              
139             sub parentNode
140             {
141 1     1 1 1218 my $self = shift( @_ );
142 1         6 my $elems = $self->_elements;
143 1         774 my $this = $elems->index( $self->{_pos} );
144 1 50       58 return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) );
145 1         5 my $root = $self->root;
146             # We should not be here in the first place.
147 1 50       728 return if( $this eq $root );
148 1         6 my $node = $this->parent;
149 1 50       31 return if( !defined( $node ) );
150             # We cannot go up to the root, but only within the root itself.
151             # return if( $node eq $root );
152 1         4 my $pos = $elems->pos( $node );
153             # Amazingly enough, the last child of this node cannot be found among the list of all nodes in this tree!
154 1 50       23 return if( !defined( $pos ) );
155 1         3 $self->{_pos} = $pos;
156 1         3 return( $node );
157             }
158              
159             # Note: method previousNode is inherited
160              
161             sub previousSibling
162             {
163 1     1 1 428 my $self = shift( @_ );
164 1         4 my $elems = $self->_elements;
165 1         757 my $this = $elems->index( $self->{_pos} );
166 1 50       57 return( $self->error( "Unable to find the current node at position '$self->{_pos}' in our tree array!" ) ) if( !defined( $this ) );
167             # No need to bother if our current node is the root node. Its siblings are not part of the tree
168 1 50       4 return if( $this eq $self->root );
169             # Get all previous siblings
170 0           my $node;
171 0           my $siblings = $this->left;
172 0           my $tmpPos = $siblings->size;
173             # We seek the first appropriate sibling depending on the value of $whatToShow
174 0           while(1)
175             {
176 0 0         last if( $tmpPos < 0 );
177 0           my $tmpNode = $siblings->index( $tmpPos );
178 0           my $rv = $self->_check_element( $tmpNode );
179 0 0         last if( !defined( $rv ) );
180 0 0         $tmpPos--, next if( !$rv );
181 0           $node = $tmpNode;
182 0           last;
183             }
184 0 0         return if( !defined( $node ) );
185 0           my $pos = $elems->pos( $node );
186             # Amazingly enough, the previous sibling of this node cannot be found among the list of all nodes in this tree!
187 0 0         return if( !defined( $pos ) );
188 0           $self->{_pos} = $pos;
189 0           return( $node );
190             }
191              
192             # Note: property root read-only is inherited
193              
194             # Note: property whatToShow read-only is inherited
195              
196             1;
197             # NOTE: POD
198             __END__
199              
200             =encoding utf-8
201              
202             =head1 NAME
203              
204             HTML::Object::DOM::TreeWalker - HTML Object DOM Tree Walker Class
205              
206             =head1 SYNOPSIS
207              
208             With just one argument, this default to search for everything (C<SHOW_ALL>) and to use the default filter, which always returns C<FILTER_ACCEPT>
209              
210             use HTML::Object::DOM::TreeWalker;
211             my $walker = HTML::Object::DOM::TreeWalker->new( $doc->body ) ||
212             die( HTML::Object::DOM::TreeWalker->error, "\n" );
213              
214             Or, passing an anonymous subroutine as the filter
215              
216             my $nodes = HTML::Object::DOM::TreeWalker->new(
217             $root_node,
218             $what_to_show_bit,
219             sub{ return( FILTER_ACCEPT ); }
220             ) || die( HTML::Object::DOM::TreeWalker->error, "\n" );
221              
222             Or, passing an hash reference with a property 'acceptNode' whose value is an anonymous subroutine, as the filter
223              
224             my $nodes = HTML::Object::DOM::TreeWalker->new(
225             $root_node,
226             $what_to_show_bit,
227             {
228             acceptNode => sub{ return( FILTER_ACCEPT ); }
229             }
230             ) || die( HTML::Object::DOM::TreeWalker->error, "\n" );
231              
232             Or, passing an object that implements the method "acceptNode"
233              
234             my $nodes = HTML::Object::DOM::TreeWalker->new(
235             $root_node,
236             $what_to_show_bit,
237             # This object must implement the acceptNode method
238             My::Customer::NodeFilter->new
239             ) || die( HTML::Object::DOM::TreeWalker->error, "\n" );
240              
241             There is also L<HTML::Object::DOM::TreeWalker>, which performs a somewhat similar function.
242              
243             Choose L<HTML::Object::DOM::NodeIterator> when you only need a simple iterator to filter and browse the selected nodes, and choose L<HTML::Object::DOM::TreeWalker> when you need to access to the node and its siblings.
244              
245             =head1 VERSION
246              
247             v0.2.0
248              
249             =head1 DESCRIPTION
250              
251             The C<TreeWalker> object represents the nodes of a document subtree and a position within them.
252              
253             =head1 PROPERTIES
254              
255             =head2 currentNode
256              
257             Is the L<Node|HTML::Object::DOM::Node> on which the C<TreeWalker> is currently pointing at.
258              
259             Example:
260              
261             use HTML::Object::DOM::NodeFilter qw( :all );
262             my $treeWalker = $doc->createTreeWalker(
263             $doc->body,
264             SHOW_ELEMENT,
265             sub{ return( FILTER_ACCEPT ); }
266             );
267             my $root = $treeWalker->currentNode; # the root element as it is the first element!
268              
269             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/currentNode>
270              
271             =head2 expandEntityReferences
272              
273             Normally this is read-only, but under perl you can set whatever boolean value you want.
274              
275             Under JavaScript, this is a boolean value indicating if, when discarding an C<EntityReference> its whole sub-tree must be discarded at the same time.
276              
277             Example:
278              
279             use HTML::Object::DOM::NodeFilter qw( :all );
280             my $treeWalker = $doc->createTreeWalker(
281             $doc->body,
282             SHOW_ELEMENT,
283             sub{ return( FILTER_ACCEPT ); },
284             # or
285             # { acceptNode => sub{ return( FILTER_ACCEPT ); } },
286             );
287             my $expand = $treeWalker->expandEntityReferences;
288              
289             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/expandEntityReferences>
290              
291             =head2 filter
292              
293             Normally this is read-only, but under perl you can set it to a new L<HTML::Object::DOM::NodeFilter> object you want, even after object instantiation.
294              
295             Returns a L<HTML::Object::DOM::NodeFilter> used to select the relevant nodes.
296              
297             Example:
298              
299             use HTML::Object::DOM::NodeFilter qw( :all );
300             my $treeWalker = $doc->createTreeWalker(
301             $doc->body,
302             SHOW_ELEMENT,
303             sub{ return( FILTER_ACCEPT ); },
304             # or
305             # { acceptNode => sub{ return( FILTER_ACCEPT ); } },
306             );
307             my $nodeFilter = $treeWalker->filter;
308              
309             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/filter>
310              
311             =head2 root
312              
313             Normally this is read-only, but under perl you can set whatever L<node value|HTML::Object::DOM::Node> you want.
314              
315             Returns a L<Node|HTML::Object::DOM::Node> representing the root node as specified when the C<TreeWalker> was created.
316              
317             Example:
318              
319             use HTML::Object::DOM::NodeFilter qw( :all );
320             my $treeWalker = $doc->createTreeWalker(
321             $doc->body,
322             SHOW_ELEMENT,
323             sub{ return( FILTER_ACCEPT ); },
324             # or
325             # { acceptNode => sub{ return( FILTER_ACCEPT ); } },
326             );
327             my $root = $treeWalker->root; # $doc->body in this case
328              
329             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/root>
330              
331             =head2 whatToShow
332              
333             Normally this is read-only, but under perl you can set whatever number value you want.
334              
335             Returns an unsigned long being a bitmask made of L<constants|/CONSTANTS> describing the types of L<Node|HTML::Object::DOM::Node> that must to be presented. Non-matching nodes are skipped, but their children may be included, if relevant.
336              
337             Possible constant values (exported by L<HTML::Object::DOM::NodeFilter>) are:
338              
339             =over 4
340              
341             =item SHOW_ALL (4294967295)
342              
343             Shows all nodes.
344              
345             =item SHOW_ELEMENT (1)
346              
347             Shows Element nodes.
348              
349             =item SHOW_ATTRIBUTE (2)
350              
351             Shows attribute L<Attribute nodes|HTML::Object::DOM::Attribute>. This is meaningful only when creating a C<TreeWalker> with an L<Attribute node|HTML::Object::DOM::Attribute> as its root; in this case, it means that the L<attribute node|HTML::Object::DOM::Attribute> will appear in the first position of the iteration or traversal. Since attributes are never children of other L<nodes|HTML::Object::DOM::Node>, they do not appear when traversing over the document tree.
352              
353             =item SHOW_TEXT (4)
354              
355             Shows Text nodes.
356              
357             Example:
358              
359             use HTML::Object::DOM::NodeFilter qw( :all );
360             my $treeWalker = $doc->createTreeWalker(
361             $doc->body,
362             ( SHOW_ELEMENT | SHOW_COMMENT | SHOW_TEXT ),
363             sub{ return( FILTER_ACCEPT ); },
364             # or
365             # { acceptNode => sub{ return( FILTER_ACCEPT ); } },
366             );
367             if( ( $treeWalker->whatToShow & SHOW_ALL ) ||
368             ( $treeWalker->whatToShow & SHOW_COMMENT ) )
369             {
370             # $treeWalker will show comments
371             }
372              
373             =item SHOW_CDATA_SECTION (8)
374              
375             Will always returns nothing, because there is no support for xml documents.
376              
377             =item SHOW_ENTITY_REFERENCE (16)
378              
379             Legacy, no more used.
380              
381             =item SHOW_ENTITY (32)
382              
383             Legacy, no more used.
384              
385             =item SHOW_PROCESSING_INSTRUCTION (64)
386              
387             Shows ProcessingInstruction nodes.
388              
389             =item SHOW_COMMENT (128)
390              
391             Shows Comment nodes.
392              
393             =item SHOW_DOCUMENT (256)
394              
395             Shows Document nodes
396              
397             =item SHOW_DOCUMENT_TYPE (512)
398              
399             Shows C<DocumentType> nodes
400              
401             =item SHOW_DOCUMENT_FRAGMENT (1024)
402              
403             Shows L<HTML::Object::DOM::DocumentFragment> nodes.
404              
405             =item SHOW_NOTATION (2048)
406              
407             Legacy, no more used.
408              
409             =item SHOW_SPACE (4096)
410              
411             Show Space nodes. This is a non-standard extension under this perl framework.
412              
413             =back
414              
415             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/whatToShow>
416              
417             =head1 METHODS
418              
419             =head2 firstChild
420              
421             Moves the current L<Node|HTML::Object::DOM::Node> to the first visible child of the current node, and returns the found child. It also moves the current node to this child. If no such child exists, returns C<undef> and the current node is not changed.
422              
423             Example:
424              
425             my $treeWalker = $doc->createTreeWalker(
426             $doc->body,
427             SHOW_ELEMENT,
428             sub{ return( FILTER_ACCEPT ); },
429             );
430             my $node = $treeWalker->firstChild(); # returns the first child of the root element, or null if none
431              
432             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/firstChild>
433              
434             =head2 lastChild
435              
436             Moves the current L<Node|HTML::Object::DOM::Node> to the last visible child of the current node, and returns the found child. It also moves the current node to this child. If no such child exists, C<undef> is returned and the current node is not changed.
437              
438             Example:
439              
440             my $treeWalker = $doc->createTreeWalker(
441             $doc->body,
442             SHOW_ELEMENT,
443             sub{ return( FILTER_ACCEPT ); },
444             );
445             my $node = $treeWalker->lastChild(); # returns the last visible child of the root element
446              
447             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/lastChild>
448              
449             =head2 nextNode
450              
451             Moves the current L<Node|HTML::Object::DOM::Node> to the next visible node in the document order, and returns the found node. It also moves the current node to this one. If no such node exists, returns C<undef> and the current node is not changed.
452              
453             Example:
454              
455             my $treeWalker = $doc->createTreeWalker(
456             $doc->body,
457             SHOW_ELEMENT,
458             sub{ return( FILTER_ACCEPT ); },
459             );
460             my $node = $treeWalker->nextNode(); # returns the first child of root, as it is the next $node in document order
461              
462             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/nextNode>
463              
464             =head2 nextSibling
465              
466             Moves the current L<Node|HTML::Object::DOM::Node> to its next sibling, if any, and returns the found sibling. If there is no such node, C<undef> is returned and the current node is not changed.
467              
468             Example:
469              
470             my $treeWalker = $doc->createTreeWalker(
471             $doc->body,
472             SHOW_ELEMENT,
473             sub{ return( FILTER_ACCEPT ); },
474             );
475             $treeWalker->firstChild();
476             my $node = $treeWalker->nextSibling(); # returns null if the first child of the root element has no sibling
477              
478             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/nextSibling>
479              
480             =head2 parentNode
481              
482             Moves the current L<Node|HTML::Object::DOM::Node> to the first visible ancestor node in the document order, and returns the found node. It also moves the current node to this one. If no such node exists, or if it is before that the root node defined at the object construction, returns C<undef> and the current node is not changed.
483              
484             Example:
485              
486             my $treeWalker = $doc->createTreeWalker(
487             $doc->body,
488             SHOW_ELEMENT,
489             sub{ return( FILTER_ACCEPT ); },
490             );
491             my $node = $treeWalker->parentNode(); # returns null as there is no parent
492              
493             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/parentNode>
494              
495             =head2 previousNode
496              
497             Moves the current L<Node|HTML::Object::DOM::Node> to the previous visible node in the document order, and returns the found node. It also moves the current node to this one. If no such node exists, or if it is before that the root node defined at the object construction, returns C<undef> and the current node is not changed.
498              
499             Example:
500              
501             my $treeWalker = $doc->createTreeWalker(
502             $doc->body,
503             SHOW_ELEMENT,
504             sub{ return( FILTER_ACCEPT ); },
505             );
506             my $node = $treeWalker->previousNode(); # returns null as there is no parent
507              
508             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/previousNode>
509              
510             =head2 previousSibling
511              
512             Moves the current L<Node|HTML::Object::DOM::Node> to its previous sibling, if any, and returns the found sibling. If there is no such node, return C<undef> and the current node is not changed.
513              
514             Example:
515              
516             my $treeWalker = $doc->createTreeWalker(
517             $doc->body,
518             SHOW_ELEMENT,
519             sub{ return( FILTER_ACCEPT ); },
520             );
521             my $node = $treeWalker->previousSibling(); # returns null as there is no previous sibiling
522              
523             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker/previousSibling>
524              
525             =head1 AUTHOR
526              
527             Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
528              
529             =head1 SEE ALSO
530              
531             L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/TreeWalker>
532              
533             =head1 COPYRIGHT & LICENSE
534              
535             Copyright(c) 2022 DEGUEST Pte. Ltd.
536              
537             All rights reserved
538              
539             This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
540              
541             =cut