File Coverage

lib/HTML/Object/DOM/NodeFilter.pm
Criterion Covered Total %
statement 32 32 100.0
branch 1 2 50.0
condition n/a
subroutine 11 11 100.0
pod 2 2 100.0
total 46 47 97.8


line stmt bran cond sub pod time code
1             ##----------------------------------------------------------------------------
2             ## HTML Object - ~/lib/HTML/Object/DOM/NodeFilter.pm
3             ## Version v0.2.0
4             ## Copyright(c) 2022 DEGUEST Pte. Ltd.
5             ## Author: Jacques Deguest <jack@deguest.jp>
6             ## Created 2022/01/01
7             ## Modified 2022/09/18
8             ## All rights reserved
9             ##
10             ##
11             ## This program is free software; you can redistribute it and/or modify it
12             ## under the same terms as Perl itself.
13             ##----------------------------------------------------------------------------
14             package HTML::Object::DOM::NodeFilter;
15             BEGIN
16             {
17 3     3   2295 use strict;
  3         16  
  3         100  
18 3     3   28 use warnings;
  3         8  
  3         92  
19 3     3   28 use parent qw( Module::Generic );
  3         7  
  3         22  
20 3     3   188 use vars qw( @EXPORT_OK %EXPORT_TAGS $VERSION );
  3         7  
  3         164  
21 3     3   35 use HTML::Object::Exception;
  3         12  
  3         29  
22             use constant {
23             # Shows all nodes.
24 3         717 SHOW_ALL => 4294967295,
25             # Shows Element nodes.
26             SHOW_ELEMENT => 1,
27             # Shows attribute Attr nodes.
28             SHOW_ATTRIBUTE => 2,
29             # Shows Text nodes.
30             SHOW_TEXT => 4,
31             # Shows CDATASection nodes.
32             SHOW_CDATA_SECTION => 8,
33             # Legacy, no more used.
34             SHOW_ENTITY_REFERENCE => 16,
35             # Legacy, no more used.
36             SHOW_ENTITY => 32,
37             # Shows ProcessingInstruction nodes.
38             SHOW_PROCESSING_INSTRUCTION => 64,
39             # Shows Comment nodes.
40             SHOW_COMMENT => 128,
41             # Shows Document nodes.
42             SHOW_DOCUMENT => 256,
43             # Shows DocumentType nodes.
44             SHOW_DOCUMENT_TYPE => 512,
45             # Shows DocumentFragment nodes.
46             SHOW_DOCUMENT_FRAGMENT => 1024,
47             # Legacy, no more used.
48             SHOW_NOTATION => 2048,
49             # Show spaces; non-standard addition
50             SHOW_SPACE => 4096,
51            
52             FILTER_ACCEPT => 1,
53             FILTER_REJECT => 2,
54             FILTER_SKIP => 3,
55 3     3   905 };
  3         7  
56 3     3   19 our @EXPORT_OK = qw(
57             SHOW_ALL SHOW_ELEMENT SHOW_ATTRIBUTE SHOW_TEXT SHOW_CDATA_SECTION
58             SHOW_ENTITY_REFERENCE SHOW_ENTITY SHOW_PROCESSING_INSTRUCTION SHOW_COMMENT
59             SHOW_DOCUMENT SHOW_DOCUMENT_TYPE SHOW_DOCUMENT_FRAGMENT SHOW_NOTATION SHOW_SPACE
60             FILTER_ACCEPT FILTER_REJECT FILTER_SKIP
61             );
62 3         34 our %EXPORT_TAGS = ( all => [@EXPORT_OK] );
63 3         69 our $VERSION = 'v0.2.0';
64             };
65              
66 3     3   16 use strict;
  3         10  
  3         61  
67 3     3   15 use warnings;
  3         14  
  3         338  
68              
69             sub init
70             {
71 6     6 1 467 my $self = shift( @_ );
72 6         100 $self->{_init_strict_use_sub} = 1;
73 6 50       35 $self->SUPER::init( @_ ) || return( $self->pass_error );
74 6         448 return( $self );
75             }
76              
77 7     7 1 18 sub acceptNode { return( FILTER_ACCEPT ); }
78              
79             1;
80             # NOTE: POD
81             __END__
82              
83             =encoding utf-8
84              
85             =head1 NAME
86              
87             HTML::Object::DOM::NodeFilter - HTML Object DOM Node Filter
88              
89             =head1 SYNOPSIS
90              
91             use HTML::Object::DOM::NodeFilter;
92             my $filter = HTML::Object::DOM::NodeFilter->new ||
93             die( HTML::Object::DOM::NodeFilter->error, "\n" );
94              
95             =head1 VERSION
96              
97             v0.2.0
98              
99             =head1 DESCRIPTION
100              
101             A C<NodeFilter> interface represents an object used to filter the nodes in a L<HTML::Object::DOM::NodeIterator> or L<HTML::Object::DOM::::TreeWalker>. A C<NodeFilter> knows nothing about the document or traversing nodes; it only knows how to evaluate a single node against the provided filter.
102              
103             =head1 PROPERTIES
104              
105             There are no properties.
106              
107             =head1 METHODS
108              
109             =head2 acceptNode
110              
111             Returns an unsigned short that will be used to tell if a given L<Node|HTML::Object::DOM::Node> must be accepted or not by the L<HTML::Object::DOM::NodeIterator> or L<HTML::Object::DOM::TreeWalker> iteration algorithm.
112              
113             This method is expected to be written by the user of a C<NodeFilter>. Possible return values are:
114              
115             =over 4
116              
117             =item FILTER_ACCEPT
118              
119             Value returned by the L</acceptNode> method when a node should be accepted.
120              
121             =item FILTER_REJECT
122              
123             Value to be returned by the L</acceptNode> method when a node should be rejected. For L<HTML::Object::DOM::TreeWalker>, child nodes are also rejected.
124              
125             For C<NodeIterator>, this flag is synonymous with C<FILTER_SKIP>.
126              
127             =item FILTER_SKIP
128              
129             Value to be returned by L</acceptNode> for nodes to be skipped by the L<HTML::Object::DOM::NodeIterator> or L<HTML::Object::DOM::TreeWalker> object.
130              
131             The children of skipped nodes are still considered. This is treated as "skip this node but not its children".
132              
133             Example:
134              
135             use HTML::Object::DOM::NodeFilter qw( :all );
136             my $nodeIterator = $doc->createNodeIterator(
137             # Node to use as root
138             $doc->getElementById('someId'),
139              
140             # Only consider nodes that are text nodes (nodeType 3)
141             SHOW_TEXT,
142              
143             # Object containing the sub to use for the acceptNode method
144             # of the NodeFilter
145             { acceptNode => sub
146             {
147             my $node = shift( @_ ); # also available as $_
148             # Logic to determine whether to accept, reject or skip node
149             # In this case, only accept nodes that have content other than whitespace
150             if( $node->data !~ /^\s*$/ )
151             {
152             return( FILTER_ACCEPT );
153             }
154             }
155             },
156             0 # false
157             );
158              
159             # Show the content of every non-empty text node that is a child of root
160             my $node;
161             while( ( $node = $nodeIterator->nextNode() ) )
162             {
163             say( $node->data );
164             }
165              
166             See also L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/NodeFilter/acceptNode>
167              
168             =back
169              
170             =head1 CONSTANTS
171              
172             =over 4
173              
174             =item SHOW_ALL (4294967295)
175              
176             Shows all nodes.
177              
178             =item SHOW_ELEMENT (1)
179              
180             Shows Element nodes.
181              
182             =item SHOW_ATTRIBUTE (2)
183              
184             Shows attribute L<Attribute nodes|HTML::Object::DOM::Attribute>. This is meaningful only when creating a NodeIterator with an L<Attribute node|HTML::Object::DOM::Attribute> as its root; in this case, it means that the L<attribute node|HTML::Object::DOM::Attribute> will appear in the first position of the iteration or traversal. Since attributes are never children of other L<nodes|HTML::Object::DOM::Node>, they do not appear when traversing over the document tree.
185              
186             =item SHOW_TEXT (4)
187              
188             Shows Text nodes.
189              
190             Example:
191              
192             use HTML::Object::DOM::NodeFilter qw( :all );
193             my $nodeIterator = $doc->createNodeIterator(
194             $doc->body,
195             SHOW_ELEMENT | SHOW_COMMENT | SHOW_TEXT,
196             { acceptNode => sub{ return( FILTER_ACCEPT ); } },
197             0 # false
198             );
199             if( ( $nodeIterator->whatToShow & SHOW_ALL ) ||
200             ( $nodeIterator->whatToShow & SHOW_COMMENT ) )
201             {
202             # $nodeIterator will show comments
203             }
204              
205             =item SHOW_CDATA_SECTION (8)
206              
207             Will always returns nothing, because there is no support for xml documents.
208              
209             =item SHOW_ENTITY_REFERENCE (16)
210              
211             Legacy, no more used.
212              
213             =item SHOW_ENTITY (32)
214              
215             Legacy, no more used.
216              
217             =item SHOW_PROCESSING_INSTRUCTION (64)
218              
219             Shows ProcessingInstruction nodes.
220              
221             =item SHOW_COMMENT (128)
222              
223             Shows Comment nodes.
224              
225             =item SHOW_DOCUMENT (256)
226              
227             Shows Document nodes
228              
229             =item SHOW_DOCUMENT_TYPE (512)
230              
231             Shows C<DocumentType> nodes
232              
233             =item SHOW_DOCUMENT_FRAGMENT (1024)
234              
235             Shows L<HTML::Object::DOM::DocumentFragment> nodes.
236              
237             =item SHOW_NOTATION (2048)
238              
239             Legacy, no more used.
240              
241             =item SHOW_SPACE (4096)
242              
243             Show Space nodes. This is a non-standard extension under this perl framework.
244              
245             =back
246              
247             And for the callback control:
248              
249             =over 4
250              
251             =item FILTER_ACCEPT (1)
252              
253             =item FILTER_REJECT (2)
254              
255             =item FILTER_SKIP (3)
256              
257             =back
258              
259             =head1 AUTHOR
260              
261             Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
262              
263             =head1 SEE ALSO
264              
265             L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/API/NodeFilter>, L<W3C specifications|https://dom.spec.whatwg.org/#interface-nodefilter>
266              
267             =head1 COPYRIGHT & LICENSE
268              
269             Copyright(c) 2022 DEGUEST Pte. Ltd.
270              
271             All rights reserved
272              
273             This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
274              
275             =cut