line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
##---------------------------------------------------------------------------- |
2
|
|
|
|
|
|
|
## HTML Object - ~/lib/HTML/Object/XPath.pm |
3
|
|
|
|
|
|
|
## Version v0.2.0 |
4
|
|
|
|
|
|
|
## Copyright(c) 2021 DEGUEST Pte. Ltd. |
5
|
|
|
|
|
|
|
## Author: Jacques Deguest <jack@deguest.jp> |
6
|
|
|
|
|
|
|
## Created 2021/12/04 |
7
|
|
|
|
|
|
|
## Modified 2022/09/18 |
8
|
|
|
|
|
|
|
## All rights reserved |
9
|
|
|
|
|
|
|
## |
10
|
|
|
|
|
|
|
## |
11
|
|
|
|
|
|
|
## This program is free software; you can redistribute it and/or modify it |
12
|
|
|
|
|
|
|
## under the same terms as Perl itself. |
13
|
|
|
|
|
|
|
##---------------------------------------------------------------------------- |
14
|
|
|
|
|
|
|
package HTML::Object::XPath; |
15
|
|
|
|
|
|
|
BEGIN |
16
|
|
|
|
|
|
|
{ |
17
|
8
|
|
|
8
|
|
106445
|
use strict; |
|
8
|
|
|
|
|
30
|
|
|
8
|
|
|
|
|
275
|
|
18
|
8
|
|
|
8
|
|
52
|
use warnings; |
|
8
|
|
|
|
|
25
|
|
|
8
|
|
|
|
|
331
|
|
19
|
8
|
|
|
8
|
|
487
|
use parent qw( Module::Generic ); |
|
8
|
|
|
|
|
299
|
|
|
8
|
|
|
|
|
74
|
|
20
|
8
|
|
|
|
|
1196
|
use vars qw( |
21
|
|
|
|
|
|
|
$CACHE $AXES $AXES_KEYS $NC_NAME_RE $QNAME_RE $NC_WILD_RE $QN_WILD_RE |
22
|
|
|
|
|
|
|
$NODE_TYPE_RE $AXIS_NAME_RE $NUMBER_RE $LITERAL_RE $REGEXP_RE $REGEXP_MOD_RE |
23
|
|
|
|
|
|
|
$BASE_CLASS $VERSION |
24
|
8
|
|
|
8
|
|
12010561
|
); |
|
8
|
|
|
|
|
26
|
|
25
|
8
|
|
|
8
|
|
4080
|
use HTML::Object::XPath::Step; |
|
8
|
|
|
|
|
29
|
|
|
8
|
|
|
|
|
91
|
|
26
|
8
|
|
|
8
|
|
7230
|
use HTML::Object::XPath::Expr; |
|
8
|
|
|
|
|
23
|
|
|
8
|
|
|
|
|
104
|
|
27
|
8
|
|
|
8
|
|
6583
|
use HTML::Object::XPath::Function; |
|
8
|
|
|
|
|
23
|
|
|
8
|
|
|
|
|
90
|
|
28
|
8
|
|
|
8
|
|
5523
|
use HTML::Object::XPath::LocationPath; |
|
8
|
|
|
|
|
27
|
|
|
8
|
|
|
|
|
97
|
|
29
|
8
|
|
|
8
|
|
5485
|
use HTML::Object::XPath::Variable; |
|
8
|
|
|
|
|
44
|
|
|
8
|
|
|
|
|
86
|
|
30
|
8
|
|
|
8
|
|
5371
|
use HTML::Object::XPath::Literal; |
|
8
|
|
|
|
|
26
|
|
|
8
|
|
|
|
|
59
|
|
31
|
8
|
|
|
8
|
|
5436
|
use HTML::Object::XPath::Number; |
|
8
|
|
|
|
|
28
|
|
|
8
|
|
|
|
|
84
|
|
32
|
8
|
|
|
8
|
|
5573
|
use HTML::Object::XPath::NodeSet; |
|
8
|
|
|
|
|
20
|
|
|
8
|
|
|
|
|
267
|
|
33
|
8
|
|
|
8
|
|
3507
|
use HTML::Object::XPath::Root; |
|
8
|
|
|
|
|
25
|
|
|
8
|
|
|
|
|
87
|
|
34
|
8
|
|
|
8
|
|
5438
|
our $VERSION = 'v0.2.0'; |
35
|
8
|
|
|
|
|
39
|
our $CACHE = {}; |
36
|
|
|
|
|
|
|
# Axis name to principal node type mapping |
37
|
8
|
|
|
|
|
155
|
our $AXES = |
38
|
|
|
|
|
|
|
{ |
39
|
|
|
|
|
|
|
'ancestor' => 'element', |
40
|
|
|
|
|
|
|
'ancestor-or-self' => 'element', |
41
|
|
|
|
|
|
|
'attribute' => 'attribute', |
42
|
|
|
|
|
|
|
'namespace' => 'namespace', |
43
|
|
|
|
|
|
|
'child' => 'element', |
44
|
|
|
|
|
|
|
'descendant' => 'element', |
45
|
|
|
|
|
|
|
'descendant-or-self' => 'element', |
46
|
|
|
|
|
|
|
'following' => 'element', |
47
|
|
|
|
|
|
|
'following-sibling' => 'element', |
48
|
|
|
|
|
|
|
'parent' => 'element', |
49
|
|
|
|
|
|
|
'preceding' => 'element', |
50
|
|
|
|
|
|
|
'preceding-sibling' => 'element', |
51
|
|
|
|
|
|
|
'self' => 'element', |
52
|
|
|
|
|
|
|
}; |
53
|
8
|
|
|
|
|
76
|
my $AXES_KEYS = join( '|', keys( %$AXES ) ); |
54
|
8
|
|
|
|
|
52
|
our $NC_NAME_RE = qr/([A-Za-z_][\w\.\-]*)/; |
55
|
8
|
|
|
|
|
871
|
our $QNAME_RE = qr/(${NC_NAME_RE}:)?${NC_NAME_RE}/; |
56
|
8
|
|
|
|
|
374
|
our $NC_WILD_RE = qr/${NC_NAME_RE}:\*/; |
57
|
8
|
|
|
|
|
40
|
our $QN_WILD_RE = qr/\*/; |
58
|
8
|
|
|
|
|
26
|
our $NODE_TYPE_RE = qr/((text|comment|processing-instruction|node)\(\))/; |
59
|
8
|
|
|
|
|
720
|
our $AXIS_NAME_RE = qr/(${AXES_KEYS})::/; |
60
|
8
|
|
|
|
|
74
|
our $NUMBER_RE = qr/\d+(\.\d*)?|\.\d+/; |
61
|
8
|
|
|
|
|
31
|
our $LITERAL_RE = qr/\"[^\"]*\"|\'[^\']*\'/; |
62
|
8
|
|
|
|
|
22
|
our $REGEXP_RE = qr{(?:m?/(?:\\.|[^/])*/)}; |
63
|
8
|
|
|
|
|
28
|
our $REGEXP_MOD_RE = qr{(?:[imsx]+)}; |
64
|
8
|
|
|
|
|
217
|
our $BASE_CLASS = __PACKAGE__; |
65
|
|
|
|
|
|
|
}; |
66
|
|
|
|
|
|
|
|
67
|
8
|
|
|
8
|
|
73
|
use strict; |
|
8
|
|
|
|
|
18
|
|
|
8
|
|
|
|
|
184
|
|
68
|
8
|
|
|
8
|
|
57
|
use warnings; |
|
8
|
|
|
|
|
27
|
|
|
8
|
|
|
|
|
48542
|
|
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub init |
71
|
|
|
|
|
|
|
{ |
72
|
10
|
|
|
10
|
1
|
4321
|
my $self = shift( @_ ); |
73
|
10
|
|
|
|
|
545
|
$self->{_init_strict_use_sub} = 1; |
74
|
10
|
50
|
|
|
|
87
|
$self->SUPER::init( @_ ) || return( $self->pass_error ); |
75
|
10
|
|
|
|
|
1024
|
$self->{context_pos} = undef; # 1 based position in array context |
76
|
10
|
|
|
|
|
50
|
$self->{context_set} = $self->new_nodeset; |
77
|
10
|
|
|
|
|
54
|
$self->{context_size} = 0; # total size of context |
78
|
10
|
|
|
|
|
60
|
$self->clear_namespaces(); |
79
|
10
|
|
|
|
|
26
|
$self->{cache} = {}; |
80
|
10
|
|
|
|
|
32
|
$self->{direction} = 'forward'; |
81
|
10
|
|
|
|
|
34
|
$self->{namespaces} = {}; |
82
|
10
|
|
|
|
|
30
|
$self->{vars} = {}; |
83
|
10
|
|
|
|
|
29
|
$self->{_tokpos} = 0; |
84
|
10
|
|
|
|
|
35
|
return( $self ); |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub clear_namespaces |
88
|
|
|
|
|
|
|
{ |
89
|
10
|
|
|
10
|
1
|
28
|
my $self = shift( @_ ); |
90
|
10
|
|
|
|
|
29
|
$self->{uses_namespaces} = 0; |
91
|
10
|
|
|
|
|
35
|
$self->{namespaces} = {}; |
92
|
10
|
|
|
|
|
24
|
return( $self ); |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub exists |
96
|
|
|
|
|
|
|
{ |
97
|
0
|
|
|
0
|
1
|
0
|
my $self = shift( @_ ); |
98
|
0
|
|
|
|
|
0
|
my( $path, $context ) = @_; |
99
|
0
|
0
|
|
|
|
0
|
$self = '/' if( !defined( $self ) ); |
100
|
0
|
|
|
|
|
0
|
my @nodeset = $self->findnodes( $path, $context ); |
101
|
0
|
0
|
|
|
|
0
|
return( scalar( @nodeset ) ? 1 : 0 ); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub find |
105
|
|
|
|
|
|
|
{ |
106
|
68
|
|
|
68
|
1
|
164
|
my $self = shift( @_ ); |
107
|
|
|
|
|
|
|
# xpath expression and $context is a HTML::Object::Element |
108
|
68
|
|
|
|
|
155
|
my( $path, $context ) = @_; |
109
|
|
|
|
|
|
|
# _parse returns an HTML::Object::XPath::Expr object |
110
|
68
|
|
|
|
|
289
|
my $parsed_path = $self->_parse( $path ); |
111
|
|
|
|
|
|
|
# $parsed_path is an HTML:: Object::XPath::Expr object |
112
|
|
|
|
|
|
|
# $results could be a HTML::Object::XPath::NodeSet or something else like HTML::Object::XPath::Number |
113
|
68
|
|
|
|
|
2223
|
my $results = $parsed_path->evaluate( $context ); |
114
|
|
|
|
|
|
|
# if( $results->isa( 'HTML::Object::XPath::NodeSet') ) |
115
|
|
|
|
|
|
|
# if( $self->isa_nodeset( $results ) ) |
116
|
65
|
100
|
|
|
|
378
|
if( $self->_is_a( $results, 'HTML::Object::XPath::NodeSet' ) ) |
117
|
|
|
|
|
|
|
{ |
118
|
52
|
|
|
|
|
2238
|
return( $results->sort->remove_duplicates ); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
else |
121
|
|
|
|
|
|
|
{ |
122
|
13
|
|
|
|
|
566
|
return( $results ); |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub findnodes |
127
|
|
|
|
|
|
|
{ |
128
|
32
|
|
|
32
|
1
|
1255
|
my $self = shift( @_ ); |
129
|
32
|
|
|
|
|
103
|
my( $path, $context ) = @_; |
130
|
|
|
|
|
|
|
|
131
|
32
|
|
|
|
|
149
|
my $results = $self->find( $path, $context ); |
132
|
|
|
|
|
|
|
|
133
|
32
|
50
|
|
|
|
237
|
if( $self->_is_a( $results => 'HTML::Object::XPath::NodeSet' ) ) |
134
|
|
|
|
|
|
|
{ |
135
|
32
|
50
|
|
|
|
1225
|
return( wantarray() ? $results->get_nodelist : $results ); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
# result should be SCALAR |
138
|
|
|
|
|
|
|
else |
139
|
|
|
|
|
|
|
{ |
140
|
0
|
0
|
|
|
|
0
|
return( wantarray() ? $self->new_nodeset( $results ) : $results ); |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
#{ return wantarray ? ($results) : $results; } # result should be SCALAR |
143
|
|
|
|
|
|
|
#{ return wantarray ? () : HTML::Object::XPath::NodeSet->new(); } |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub findnodes_as_string |
147
|
|
|
|
|
|
|
{ |
148
|
0
|
|
|
0
|
1
|
0
|
my $self = shift( @_ ); |
149
|
0
|
|
|
|
|
0
|
my( $path, $context ) = @_; |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
my $results = $self->find( $path, $context ); |
152
|
0
|
0
|
|
|
|
0
|
if( $self->_is_a( $results => 'HTML::Object::XPath::NodeSet' ) ) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
153
|
|
|
|
|
|
|
{ |
154
|
0
|
|
|
|
|
0
|
return( join( '', map{ $_->toString } $results->get_nodelist ) ); |
|
0
|
|
|
|
|
0
|
|
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
elsif( $self->_is_a( $results => 'HTML::Object::XPath::Boolean' ) ) |
157
|
|
|
|
|
|
|
{ |
158
|
|
|
|
|
|
|
# to behave like XML::LibXML |
159
|
0
|
|
|
|
|
0
|
return( '' ); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
elsif( $self->_is_a( $results => 'HTML::Object::XPath::Node' ) ) |
162
|
|
|
|
|
|
|
{ |
163
|
0
|
|
|
|
|
0
|
return( $results->toString ); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
else |
166
|
|
|
|
|
|
|
{ |
167
|
0
|
|
|
|
|
0
|
return( $self->_xml_escape_text( $results->value ) ); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub findnodes_as_strings |
172
|
|
|
|
|
|
|
{ |
173
|
0
|
|
|
0
|
1
|
0
|
my $self = shift( @_ ); |
174
|
0
|
|
|
|
|
0
|
my( $path, $context ) = @_; |
175
|
0
|
|
|
|
|
0
|
my $results = $self->find( $path, $context ); |
176
|
|
|
|
|
|
|
|
177
|
0
|
0
|
|
|
|
0
|
if( $self->_is_a( $results => 'HTML::Object::XPath::NodeSet' ) ) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
178
|
|
|
|
|
|
|
{ |
179
|
0
|
|
|
|
|
0
|
return( map{ $_->getValue } $results->get_nodelist ); |
|
0
|
|
|
|
|
0
|
|
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
elsif( $self->_is_a( $results => 'HTML::Object::XPath::Boolean' ) ) |
182
|
|
|
|
|
|
|
{ |
183
|
|
|
|
|
|
|
# to behave like XML::LibXML |
184
|
0
|
|
|
|
|
0
|
return( () ); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
elsif( $self->_is_a( $results => 'HTML::Object::XPath::Node' ) ) |
187
|
|
|
|
|
|
|
{ |
188
|
0
|
|
|
|
|
0
|
return( $results->getValue ); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
else |
191
|
|
|
|
|
|
|
{ |
192
|
0
|
|
|
|
|
0
|
return( $self->_xml_escape_text( $results->value ) ); |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub findvalue |
197
|
|
|
|
|
|
|
{ |
198
|
31
|
|
|
31
|
1
|
17493
|
my $self = shift( @_ ); |
199
|
31
|
|
|
|
|
84
|
my( $path, $context ) = @_; |
200
|
31
|
|
|
|
|
90
|
my $results = $self->find( $path, $context ); |
201
|
31
|
100
|
|
|
|
151
|
if( $self->_is_a( $results => 'HTML::Object::XPath::NodeSet' ) ) |
202
|
|
|
|
|
|
|
{ |
203
|
18
|
|
|
|
|
610
|
return( $results->to_final_value ); |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
#{ return $results->to_literal; } |
206
|
13
|
|
|
|
|
416
|
return( $results->value ); |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub findvalues |
210
|
|
|
|
|
|
|
{ |
211
|
5
|
|
|
5
|
1
|
2417
|
my $self = shift( @_ ); |
212
|
5
|
|
|
|
|
15
|
my( $path, $context ) = @_; |
213
|
5
|
|
|
|
|
15
|
my $results = $self->find( $path, $context ); |
214
|
2
|
50
|
|
|
|
7
|
if( $self->_is_a( $results => 'HTML::Object::XPath::NodeSet' ) ) |
215
|
|
|
|
|
|
|
{ |
216
|
2
|
|
|
|
|
65
|
return( $results->string_values ); |
217
|
|
|
|
|
|
|
} |
218
|
0
|
|
|
|
|
0
|
return( $results->string_value ); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub get_namespace |
222
|
|
|
|
|
|
|
{ |
223
|
0
|
|
|
0
|
1
|
0
|
my $self = shift( @_ ); |
224
|
0
|
|
|
|
|
0
|
my( $prefix, $node ) = @_; |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
my $ns = $node |
227
|
|
|
|
|
|
|
? $node->getNamespace( $prefix ) |
228
|
|
|
|
|
|
|
: $self->{uses_namespaces} |
229
|
0
|
0
|
|
|
|
0
|
? $self->{namespaces}->{ $prefix } |
|
|
0
|
|
|
|
|
|
230
|
|
|
|
|
|
|
: $prefix; |
231
|
0
|
|
|
|
|
0
|
return( $ns ); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub get_var |
235
|
|
|
|
|
|
|
{ |
236
|
0
|
|
|
0
|
1
|
0
|
my $self = shift( @_ ); |
237
|
0
|
|
|
|
|
0
|
my $var = shift( @_ ); |
238
|
0
|
|
|
|
|
0
|
$self->{vars}->{ $var }; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub matches |
242
|
|
|
|
|
|
|
{ |
243
|
3
|
|
|
3
|
1
|
8
|
my $self = shift( @_ ); |
244
|
3
|
|
|
|
|
13
|
my( $node, $path, $context ) = @_; |
245
|
3
|
|
|
|
|
17
|
my @nodes = $self->findnodes( $path, $context ); |
246
|
3
|
100
|
|
|
|
46
|
return(1) if( grep{ "$node" eq "$_" } @nodes ); |
|
3
|
|
|
|
|
35
|
|
247
|
2
|
|
|
|
|
45
|
return; |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
68
|
|
|
68
|
1
|
327
|
sub namespaces { return( shift->_set_get_hash_as_mix_object( 'namespaces', @_ ) ); } |
251
|
|
|
|
|
|
|
|
252
|
255
|
|
|
255
|
1
|
559
|
sub new_expr { return( shift->_class_for( 'Expr' )->new( @_ ) ); } |
253
|
|
|
|
|
|
|
|
254
|
25
|
|
|
25
|
1
|
66
|
sub new_function { return( shift->_class_for( 'Function' )->new( @_ ) ); } |
255
|
|
|
|
|
|
|
|
256
|
29
|
|
|
29
|
1
|
89
|
sub new_literal { return( shift->_class_for( 'Literal' )->new( @_ ) ); } |
257
|
|
|
|
|
|
|
|
258
|
89
|
|
|
89
|
1
|
221
|
sub new_location_path { return( shift->_class_for( 'LocationPath' )->new( @_ ) ); } |
259
|
|
|
|
|
|
|
|
260
|
10
|
|
|
10
|
1
|
56
|
sub new_nodeset { return( shift->_class_for( 'NodeSet' )->new( @_ ) ); } |
261
|
|
|
|
|
|
|
|
262
|
12
|
|
|
12
|
1
|
41
|
sub new_number { return( shift->_class_for( 'Number' )->new( @_ ) ); } |
263
|
|
|
|
|
|
|
|
264
|
48
|
|
|
48
|
1
|
155
|
sub new_root { return( shift->_class_for( 'Root' )->new( @_ ) ); } |
265
|
|
|
|
|
|
|
|
266
|
166
|
|
|
166
|
1
|
641
|
sub new_step { return( shift->_class_for( 'Step' )->new( @_ ) ); } |
267
|
|
|
|
|
|
|
|
268
|
0
|
|
|
0
|
1
|
0
|
sub new_variable { return( shift->_class_for( 'Variable' )->new( @_ ) ); } |
269
|
|
|
|
|
|
|
|
270
|
0
|
|
|
0
|
1
|
0
|
sub parse { return( shift->_parse( @_ ) ); } |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub set_namespace |
273
|
|
|
|
|
|
|
{ |
274
|
0
|
|
|
0
|
1
|
0
|
my $self = shift( @_ ); |
275
|
0
|
|
|
|
|
0
|
my( $prefix, $expanded ) = @_; |
276
|
0
|
|
|
|
|
0
|
$self->{uses_namespaces} = 1; |
277
|
0
|
|
|
|
|
0
|
$self->{namespaces}{ $prefix } = $expanded; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub set_strict_namespaces |
281
|
|
|
|
|
|
|
{ |
282
|
0
|
|
|
0
|
1
|
0
|
my( $self, $strict ) = @_; |
283
|
0
|
|
|
|
|
0
|
$self->{strict_namespaces} = $strict; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub set_var |
287
|
|
|
|
|
|
|
{ |
288
|
0
|
|
|
0
|
1
|
0
|
my $self = shift( @_ ); |
289
|
0
|
|
|
|
|
0
|
my $var = shift( @_ ); |
290
|
0
|
|
|
|
|
0
|
my $val = shift( @_ ); |
291
|
0
|
|
|
|
|
0
|
$self->{vars}->{ $var } = $val; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub _analyze |
295
|
|
|
|
|
|
|
{ |
296
|
56
|
|
|
56
|
|
121
|
my $self = shift( @_ ); |
297
|
|
|
|
|
|
|
# Array object |
298
|
56
|
|
|
|
|
130
|
my $tokens = shift( @_ ); |
299
|
|
|
|
|
|
|
# lexical analysis |
300
|
56
|
50
|
|
|
|
213
|
if( $self->debug ) |
301
|
|
|
|
|
|
|
{ |
302
|
0
|
|
|
|
|
0
|
my( $p, $f, $l ) = caller; |
303
|
|
|
|
|
|
|
} |
304
|
56
|
|
|
|
|
1502
|
return( $self->_expr( $tokens ) ); |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _arguments |
308
|
|
|
|
|
|
|
{ |
309
|
25
|
|
|
25
|
|
69
|
my( $self, $tokens ) = @_; |
310
|
25
|
|
|
|
|
85
|
my $args = $self->new_array; |
311
|
25
|
50
|
|
|
|
535
|
if( $tokens->[ $self->{_tokpos} ] eq ')' ) |
312
|
|
|
|
|
|
|
{ |
313
|
0
|
|
|
|
|
0
|
return( $args ); |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
25
|
|
|
|
|
80
|
$args->push( $self->_expr( $tokens ) ); |
317
|
25
|
|
|
|
|
229
|
while( $self->_match( $tokens, qr/\,/ ) ) |
318
|
|
|
|
|
|
|
{ |
319
|
5
|
|
|
|
|
25
|
$args->push( $self->_expr( $tokens ) ); |
320
|
|
|
|
|
|
|
} |
321
|
25
|
|
|
|
|
133
|
return( $args ); |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
sub _class_for |
325
|
|
|
|
|
|
|
{ |
326
|
807
|
|
|
807
|
|
7558
|
my( $self, $mod ) = @_; |
327
|
807
|
|
|
|
|
33591
|
eval( "require ${BASE_CLASS}\::${mod};" ); |
328
|
807
|
50
|
|
|
|
3040
|
die( $@ ) if( $@ ); |
329
|
|
|
|
|
|
|
# ${"${BASE_CLASS}\::${mod}\::DEBUG"} = $self->debug; |
330
|
807
|
|
50
|
|
|
3215
|
eval( "\$${BASE_CLASS}\::${mod}\::DEBUG = " . ( $self->debug // 0 ) ); |
331
|
807
|
|
|
|
|
7379
|
return( "${BASE_CLASS}::${mod}" ); |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub _expr |
335
|
|
|
|
|
|
|
{ |
336
|
120
|
|
|
120
|
|
291
|
my( $self, $tokens ) = @_; |
337
|
|
|
|
|
|
|
# $tokens are an array object of xpath expression token |
338
|
|
|
|
|
|
|
# if( $self->debug ) |
339
|
|
|
|
|
|
|
# { |
340
|
|
|
|
|
|
|
# my( $p, $f, $l ) = caller; |
341
|
|
|
|
|
|
|
# } |
342
|
|
|
|
|
|
|
# Returning an HTML::Object::XPath::Expr object |
343
|
120
|
|
|
|
|
419
|
return( $self->_expr_or( $tokens ) ); |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
sub _expr_additive |
347
|
|
|
|
|
|
|
{ |
348
|
148
|
|
|
148
|
|
302
|
my( $self, $tokens ) = @_; |
349
|
|
|
|
|
|
|
|
350
|
148
|
|
|
|
|
378
|
my $expr = $self->_expr_multiplicative( $tokens ); |
351
|
148
|
|
|
|
|
531
|
while( $self->_match( $tokens, qr/[\+\-]/ ) ) |
352
|
|
|
|
|
|
|
{ |
353
|
0
|
|
|
|
|
0
|
my $add_expr = $self->new_expr( $self ); |
354
|
0
|
|
|
|
|
0
|
$add_expr->set_lhs( $expr ); |
355
|
0
|
|
|
|
|
0
|
$add_expr->set_op( $self->{_curr_match} ); |
356
|
|
|
|
|
|
|
|
357
|
0
|
|
|
|
|
0
|
my $rhs = $self->_expr_multiplicative( $tokens ); |
358
|
|
|
|
|
|
|
|
359
|
0
|
|
|
|
|
0
|
$add_expr->set_rhs( $rhs ); |
360
|
0
|
|
|
|
|
0
|
$expr = $add_expr; |
361
|
|
|
|
|
|
|
} |
362
|
148
|
|
|
|
|
387
|
return( $expr ); |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub _expr_and |
366
|
|
|
|
|
|
|
{ |
367
|
121
|
|
|
121
|
|
245
|
my( $self, $tokens ) = @_; |
368
|
121
|
|
|
|
|
402
|
my $expr = $self->_expr_match( $tokens ); |
369
|
121
|
|
|
|
|
325
|
while( $self->_match( $tokens, 'and' ) ) |
370
|
|
|
|
|
|
|
{ |
371
|
2
|
|
|
|
|
8
|
my $and_expr = $self->new_expr( $self ); |
372
|
2
|
|
|
|
|
16
|
$and_expr->set_lhs( $expr ); |
373
|
2
|
|
|
|
|
7
|
$and_expr->set_op( 'and' ); |
374
|
2
|
|
|
|
|
7
|
my $rhs = $self->_expr_match( $tokens ); |
375
|
2
|
|
|
|
|
21
|
$and_expr->set_rhs( $rhs ); |
376
|
2
|
|
|
|
|
6
|
$expr = $and_expr; |
377
|
|
|
|
|
|
|
} |
378
|
121
|
|
|
|
|
301
|
return( $expr ); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub _expr_equality |
382
|
|
|
|
|
|
|
{ |
383
|
126
|
|
|
126
|
|
205
|
my( $self, $tokens ) = @_; |
384
|
|
|
|
|
|
|
|
385
|
126
|
|
|
|
|
384
|
my $expr = $self->_expr_relational( $tokens ); |
386
|
126
|
|
|
|
|
515
|
while( $self->_match( $tokens, qr/!?=/ ) ) |
387
|
|
|
|
|
|
|
{ |
388
|
21
|
|
|
|
|
109
|
my $eq_expr = $self->new_expr( $self ); |
389
|
21
|
|
|
|
|
176
|
$eq_expr->set_lhs( $expr ); |
390
|
21
|
|
|
|
|
126
|
$eq_expr->set_op( $self->{_curr_match} ); |
391
|
|
|
|
|
|
|
|
392
|
21
|
|
|
|
|
84
|
my $rhs = $self->_expr_relational( $tokens ); |
393
|
|
|
|
|
|
|
|
394
|
21
|
|
|
|
|
147
|
$eq_expr->set_rhs( $rhs ); |
395
|
21
|
|
|
|
|
88
|
$expr = $eq_expr; |
396
|
|
|
|
|
|
|
} |
397
|
126
|
|
|
|
|
371
|
return( $expr ); |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
sub _expr_filter |
401
|
|
|
|
|
|
|
{ |
402
|
69
|
|
|
69
|
|
139
|
my( $self, $tokens ) = @_; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
|
405
|
69
|
|
|
|
|
230
|
my $expr = $self->_expr_primary( $tokens ); |
406
|
69
|
|
|
|
|
320
|
while( $self->_match( $tokens, qr/\[/ ) ) |
407
|
|
|
|
|
|
|
{ |
408
|
|
|
|
|
|
|
# really PredicateExpr... |
409
|
2
|
|
|
|
|
11
|
$expr->push_predicate( $self->_expr( $tokens ) ); |
410
|
2
|
|
|
|
|
81433
|
$self->_match( $tokens, qr/\]/, 1 ); |
411
|
|
|
|
|
|
|
} |
412
|
69
|
|
|
|
|
474
|
return( $expr ); |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub _expr_match |
416
|
|
|
|
|
|
|
{ |
417
|
123
|
|
|
123
|
|
242
|
my( $self, $tokens ) = @_; |
418
|
|
|
|
|
|
|
|
419
|
123
|
|
|
|
|
358
|
my $expr = $self->_expr_equality( $tokens ); |
420
|
|
|
|
|
|
|
|
421
|
123
|
|
|
|
|
445
|
while( $self->_match( $tokens, qr/[=!]~/ ) ) |
422
|
|
|
|
|
|
|
{ |
423
|
3
|
|
|
|
|
34
|
my $match_expr = $self->new_expr( $self ); |
424
|
3
|
|
|
|
|
38
|
$match_expr->set_lhs( $expr ); |
425
|
3
|
|
|
|
|
15
|
$match_expr->set_op( $self->{_curr_match} ); |
426
|
|
|
|
|
|
|
|
427
|
3
|
|
|
|
|
11
|
my $rhs = $self->_expr_equality( $tokens ); |
428
|
|
|
|
|
|
|
|
429
|
3
|
|
|
|
|
28
|
$match_expr->set_rhs( $rhs ); |
430
|
3
|
|
|
|
|
13
|
$expr = $match_expr; |
431
|
|
|
|
|
|
|
} |
432
|
123
|
|
|
|
|
368
|
return( $expr ); |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub _expr_multiplicative |
436
|
|
|
|
|
|
|
{ |
437
|
148
|
|
|
148
|
|
252
|
my( $self, $tokens ) = @_; |
438
|
|
|
|
|
|
|
|
439
|
148
|
|
|
|
|
430
|
my $expr = $self->_expr_unary( $tokens ); |
440
|
148
|
|
|
|
|
640
|
while( $self->_match( $tokens, qr/(\*|div|mod)/ ) ) |
441
|
|
|
|
|
|
|
{ |
442
|
0
|
|
|
|
|
0
|
my $mult_expr = $self->new_expr( $self ); |
443
|
0
|
|
|
|
|
0
|
$mult_expr->set_lhs( $expr ); |
444
|
0
|
|
|
|
|
0
|
$mult_expr->set_op( $self->{_curr_match} ); |
445
|
|
|
|
|
|
|
|
446
|
0
|
|
|
|
|
0
|
my $rhs = $self->_expr_unary( $tokens ); |
447
|
|
|
|
|
|
|
|
448
|
0
|
|
|
|
|
0
|
$mult_expr->set_rhs( $rhs ); |
449
|
0
|
|
|
|
|
0
|
$expr = $mult_expr; |
450
|
|
|
|
|
|
|
} |
451
|
148
|
|
|
|
|
372
|
return( $expr ); |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
sub _expr_or |
455
|
|
|
|
|
|
|
{ |
456
|
120
|
|
|
120
|
|
253
|
my( $self, $tokens ) = @_; |
457
|
|
|
|
|
|
|
|
458
|
120
|
|
|
|
|
367
|
my $expr = $self->_expr_and( $tokens ); |
459
|
120
|
|
|
|
|
283
|
while( $self->_match( $tokens, 'or' ) ) |
460
|
|
|
|
|
|
|
{ |
461
|
1
|
|
|
|
|
7
|
my $or_expr = $self->new_expr( $self ); |
462
|
1
|
|
|
|
|
9
|
$or_expr->set_lhs( $expr ); |
463
|
1
|
|
|
|
|
4
|
$or_expr->set_op( 'or' ); |
464
|
|
|
|
|
|
|
|
465
|
1
|
|
|
|
|
4
|
my $rhs = $self->_expr_and( $tokens ); |
466
|
|
|
|
|
|
|
|
467
|
1
|
|
|
|
|
5
|
$or_expr->set_rhs( $rhs ); |
468
|
1
|
|
|
|
|
3
|
$expr = $or_expr; |
469
|
|
|
|
|
|
|
} |
470
|
120
|
50
|
|
|
|
429
|
if( $self->debug ) |
471
|
|
|
|
|
|
|
{ |
472
|
0
|
|
|
|
|
0
|
my( $p, $f, $l ) = caller; |
473
|
|
|
|
|
|
|
} |
474
|
120
|
|
|
|
|
2646
|
return( $expr ); |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub _expr_path |
478
|
|
|
|
|
|
|
{ |
479
|
148
|
|
|
148
|
|
279
|
my( $self, $tokens ) = @_; |
480
|
|
|
|
|
|
|
# _expr_path is _location_path | _expr_filter | _expr_filter '//?' _relative_location_path |
481
|
|
|
|
|
|
|
# Since we are being predictive we need to find out which function to call next, then. |
482
|
|
|
|
|
|
|
# LocationPath either starts with "/", "//", ".", ".." or a proper Step. |
483
|
148
|
|
|
|
|
361
|
my $expr = $self->new_expr( $self ); |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
# $test is a fragment of the xpath initially provided and broken down into bits by |
486
|
|
|
|
|
|
|
# HTML::Object::XPath::_tokenize |
487
|
148
|
|
|
|
|
1146
|
my $test = $tokens->[ $self->{_tokpos} ]; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# Test for AbsoluteLocationPath and AbbreviatedRelativeLocationPath |
490
|
148
|
100
|
|
|
|
1036
|
if( $test =~ /^(\/\/?|\.\.?)$/ ) |
|
|
100
|
|
|
|
|
|
491
|
|
|
|
|
|
|
{ |
492
|
|
|
|
|
|
|
# LocationPath |
493
|
57
|
|
|
|
|
283
|
$expr->set_lhs( $self->_location_path( $tokens ) ); |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
# Test for AxisName::... |
496
|
|
|
|
|
|
|
elsif( $self->_is_step( $tokens ) ) |
497
|
|
|
|
|
|
|
{ |
498
|
22
|
|
|
|
|
84
|
$expr->set_lhs( $self->_location_path( $tokens ) ); |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
else |
501
|
|
|
|
|
|
|
{ |
502
|
|
|
|
|
|
|
# Not a LocationPath |
503
|
|
|
|
|
|
|
# Use _expr_filter instead: |
504
|
69
|
|
|
|
|
227
|
$expr = $self->_expr_filter( $tokens ); |
505
|
69
|
100
|
|
|
|
260
|
if( $self->_match( $tokens, qr/\/\/?/ ) ) |
506
|
|
|
|
|
|
|
{ |
507
|
10
|
|
|
|
|
46
|
my $loc_path = $self->new_location_path(); |
508
|
10
|
|
|
|
|
48
|
$loc_path->push( $expr ); |
509
|
10
|
100
|
|
|
|
66
|
if( $self->{_curr_match} eq '//' ) |
510
|
|
|
|
|
|
|
{ |
511
|
3
|
|
|
|
|
17
|
$loc_path->push( $self->new_step( $self, 'descendant-or-self', $self->_class_for( 'Step' )->TEST_NT_NODE ) ); |
512
|
|
|
|
|
|
|
} |
513
|
10
|
|
|
|
|
50
|
$loc_path->push( $self->_relative_location_path( $tokens ) ); |
514
|
10
|
|
|
|
|
77
|
my $new_expr = $self->new_expr( $self ); |
515
|
10
|
|
|
|
|
99
|
$new_expr->set_lhs( $loc_path ); |
516
|
10
|
|
|
|
|
43
|
return( $new_expr ); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} |
519
|
138
|
|
|
|
|
416
|
return( $expr ); |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub _expr_primary |
523
|
|
|
|
|
|
|
{ |
524
|
69
|
|
|
69
|
|
124
|
my( $self, $tokens ) = @_; |
525
|
|
|
|
|
|
|
|
526
|
69
|
|
|
|
|
149
|
my $expr = $self->new_expr( $self ); |
527
|
|
|
|
|
|
|
|
528
|
69
|
100
|
|
|
|
541
|
if( $self->_match( $tokens, $LITERAL_RE ) ) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
529
|
|
|
|
|
|
|
{ |
530
|
|
|
|
|
|
|
# new Literal with $self->{_curr_match}... |
531
|
26
|
|
|
|
|
177
|
$self->{_curr_match} =~ m/^(["'])(.*)\1$/; |
532
|
26
|
|
|
|
|
126
|
$expr->set_lhs( $self->new_literal( $2 ) ); |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
elsif( $self->_match( $tokens, qr/${REGEXP_RE}${REGEXP_MOD_RE}?/ ) ) |
535
|
|
|
|
|
|
|
{ |
536
|
|
|
|
|
|
|
# new Literal with $self->{_curr_match} turned into a regexp... |
537
|
3
|
|
|
|
|
78
|
my( $regexp, $mod)= $self->{_curr_match} =~ m{($REGEXP_RE)($REGEXP_MOD_RE?)}; |
538
|
3
|
|
|
|
|
22
|
$regexp =~ s{^m?s*/}{}; |
539
|
3
|
|
|
|
|
15
|
$regexp =~ s{/$}{}; |
540
|
|
|
|
|
|
|
# move the mods inside the regexp |
541
|
3
|
50
|
|
|
|
13
|
if( $mod ) |
542
|
|
|
|
|
|
|
{ |
543
|
0
|
|
|
|
|
0
|
$regexp =~ qr/(?$mod:$regexp)/; |
544
|
|
|
|
|
|
|
} |
545
|
3
|
|
|
|
|
16
|
$expr->set_lhs( $self->new_literal( $regexp ) ); |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
elsif( $self->_match( $tokens, $NUMBER_RE ) ) |
548
|
|
|
|
|
|
|
{ |
549
|
|
|
|
|
|
|
# new Number with $self->{_curr_match}... |
550
|
12
|
|
|
|
|
85
|
$expr->set_lhs( $self->new_number( $self->{_curr_match} ) ); |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
elsif( $self->_match( $tokens, qr/\(/ ) ) |
553
|
|
|
|
|
|
|
{ |
554
|
3
|
|
|
|
|
28
|
$expr->set_lhs( $self->_expr( $tokens ) ); |
555
|
3
|
|
|
|
|
370
|
$self->_match( $tokens, qr/\)/, 1 ); |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
elsif( $self->_match( $tokens, qr/\$$QNAME_RE/ ) ) |
558
|
|
|
|
|
|
|
{ |
559
|
|
|
|
|
|
|
# new Variable with $self->{_curr_match}... |
560
|
0
|
|
|
|
|
0
|
$self->{_curr_match} =~ /^\$(.*)$/; |
561
|
0
|
|
|
|
|
0
|
$expr->set_lhs( $self->new_variable( $self, $1 ) ); |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
elsif( $self->_match( $tokens, $QNAME_RE ) ) |
564
|
|
|
|
|
|
|
{ |
565
|
|
|
|
|
|
|
# check match not Node_Type - done in lexer... |
566
|
|
|
|
|
|
|
# new Function |
567
|
25
|
|
|
|
|
68
|
my $func_name = $self->{_curr_match}; |
568
|
25
|
|
|
|
|
122
|
$self->_match( $tokens, qr/\(/, 1 ); |
569
|
25
|
|
|
|
|
175
|
$expr->set_lhs( |
570
|
|
|
|
|
|
|
$self->new_function( |
571
|
|
|
|
|
|
|
$self, |
572
|
|
|
|
|
|
|
$func_name, |
573
|
|
|
|
|
|
|
$self->_arguments( $tokens ) |
574
|
|
|
|
|
|
|
) |
575
|
|
|
|
|
|
|
); |
576
|
25
|
|
|
|
|
147
|
$self->_match( $tokens, qr/\)/, 1 ); |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
else |
579
|
|
|
|
|
|
|
{ |
580
|
|
|
|
|
|
|
# die "Not a _expr_primary at ", $tokens->[$self->{_tokpos}], "\n"; |
581
|
0
|
|
|
|
|
0
|
return( $self->error( "Not a _expr_primary at ", $tokens->[ $self->{_tokpos} ] ) ); |
582
|
|
|
|
|
|
|
} |
583
|
69
|
|
|
|
|
315
|
return( $expr ); |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
sub _expr_relational |
587
|
|
|
|
|
|
|
{ |
588
|
147
|
|
|
147
|
|
264
|
my( $self, $tokens ) = @_; |
589
|
|
|
|
|
|
|
|
590
|
147
|
|
|
|
|
412
|
my $expr = $self->_expr_additive( $tokens ); |
591
|
147
|
|
|
|
|
567
|
while( $self->_match( $tokens, qr/(<|>|<=|>=)/ ) ) |
592
|
|
|
|
|
|
|
{ |
593
|
1
|
|
|
|
|
7
|
my $rel_expr = $self->new_expr( $self ); |
594
|
1
|
|
|
|
|
12
|
$rel_expr->set_lhs( $expr ); |
595
|
1
|
|
|
|
|
7
|
$rel_expr->set_op( $self->{_curr_match} ); |
596
|
|
|
|
|
|
|
|
597
|
1
|
|
|
|
|
5
|
my $rhs = $self->_expr_additive( $tokens ); |
598
|
|
|
|
|
|
|
|
599
|
1
|
|
|
|
|
5
|
$rel_expr->set_rhs( $rhs ); |
600
|
1
|
|
|
|
|
3
|
$expr = $rel_expr; |
601
|
|
|
|
|
|
|
} |
602
|
147
|
|
|
|
|
387
|
return $expr; |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
sub _expr_unary |
606
|
|
|
|
|
|
|
{ |
607
|
148
|
|
|
148
|
|
296
|
my( $self, $tokens ) = @_; |
608
|
|
|
|
|
|
|
# $tokens are an array object of expression tokens |
609
|
|
|
|
|
|
|
|
610
|
148
|
50
|
|
|
|
698
|
if( $self->_match( $tokens, qr/-/ ) ) |
611
|
|
|
|
|
|
|
{ |
612
|
0
|
|
|
|
|
0
|
my $expr = $self->new_expr( $self ); |
613
|
0
|
|
|
|
|
0
|
$expr->set_lhs( $self->new_number(0) ); |
614
|
0
|
|
|
|
|
0
|
$expr->set_op( '-' ); |
615
|
0
|
|
|
|
|
0
|
$expr->set_rhs( $self->_expr_unary( $tokens ) ); |
616
|
0
|
|
|
|
|
0
|
return( $expr ); |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
else |
619
|
|
|
|
|
|
|
{ |
620
|
148
|
|
|
|
|
510
|
return( $self->_expr_union( $tokens ) ); |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
sub _expr_union |
625
|
|
|
|
|
|
|
{ |
626
|
148
|
|
|
148
|
|
327
|
my( $self, $tokens ) = @_; |
627
|
|
|
|
|
|
|
# $tokens are an array object of expression tokens |
628
|
|
|
|
|
|
|
|
629
|
148
|
|
|
|
|
426
|
my $expr = $self->_expr_path( $tokens ); |
630
|
148
|
|
|
|
|
623
|
while( $self->_match( $tokens, qr/\|/ ) ) |
631
|
|
|
|
|
|
|
{ |
632
|
0
|
|
|
|
|
0
|
my $un_expr = $self->new_expr( $self ); |
633
|
0
|
|
|
|
|
0
|
$un_expr->set_lhs( $expr ); |
634
|
0
|
|
|
|
|
0
|
$un_expr->set_op( '|' ); |
635
|
|
|
|
|
|
|
|
636
|
0
|
|
|
|
|
0
|
my $rhs = $self->_expr_path( $tokens ); |
637
|
0
|
|
|
|
|
0
|
$un_expr->set_rhs( $rhs ); |
638
|
0
|
|
|
|
|
0
|
$expr = $un_expr; |
639
|
|
|
|
|
|
|
} |
640
|
148
|
|
|
|
|
530
|
return( $expr ); |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
0
|
|
|
0
|
|
0
|
sub _get_context_node { return( $_[0]->{context_set}->get_node( $_[0]->{context_pos} ) ); } |
644
|
|
|
|
|
|
|
|
645
|
371
|
|
|
371
|
|
747
|
sub _get_context_pos { return( shift->{context_pos} ); } |
646
|
|
|
|
|
|
|
|
647
|
371
|
|
|
371
|
|
772
|
sub _get_context_set { return( shift->{context_set} ); } |
648
|
|
|
|
|
|
|
|
649
|
0
|
|
|
0
|
|
0
|
sub _get_context_size { return( shift->{context_set}->size ); } |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
sub _is_step |
652
|
|
|
|
|
|
|
{ |
653
|
96
|
|
|
96
|
|
208
|
my( $self, $tokens ) = @_; |
654
|
96
|
|
|
|
|
200
|
my $token = $tokens->[ $self->{_tokpos} ]; |
655
|
96
|
50
|
|
|
|
254
|
return unless( defined( $token ) ); |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
# local $^W = 0; |
658
|
96
|
100
|
66
|
|
|
3368
|
if( ( $token eq 'processing-instruction' ) || |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
659
|
|
|
|
|
|
|
( $token =~ /^\@($NC_WILD_RE|$QNAME_RE|$QN_WILD_RE)$/o ) || |
660
|
|
|
|
|
|
|
( ( $token =~ /^($NC_WILD_RE|$QNAME_RE|$QN_WILD_RE)$/o ) && |
661
|
|
|
|
|
|
|
( ( $tokens->[ $self->{_tokpos} + 1 ] || '' ) ne '(' ) ) || |
662
|
|
|
|
|
|
|
( $token =~ /^$NODE_TYPE_RE$/o ) || |
663
|
|
|
|
|
|
|
( $token =~ /^$AXIS_NAME_RE($NC_WILD_RE|$QNAME_RE|$QN_WILD_RE|$NODE_TYPE_RE)$/o ) |
664
|
|
|
|
|
|
|
) |
665
|
|
|
|
|
|
|
{ |
666
|
27
|
|
|
|
|
114
|
return(1); |
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
else |
669
|
|
|
|
|
|
|
{ |
670
|
69
|
|
|
|
|
209
|
return; |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
sub _location_path |
675
|
|
|
|
|
|
|
{ |
676
|
79
|
|
|
79
|
|
218
|
my( $self, $tokens ) = @_; |
677
|
79
|
|
|
|
|
260
|
my $loc_path = $self->new_location_path; |
678
|
|
|
|
|
|
|
|
679
|
79
|
100
|
|
|
|
504
|
if( $self->_match( $tokens, qr/\// ) ) |
|
|
100
|
|
|
|
|
|
680
|
|
|
|
|
|
|
{ |
681
|
|
|
|
|
|
|
# root |
682
|
|
|
|
|
|
|
# push @$loc_path, HTML::Object::XPath::Root->new(); |
683
|
5
|
|
|
|
|
19
|
$loc_path->push( $self->new_root ); |
684
|
|
|
|
|
|
|
# Is it a valid token step? |
685
|
5
|
50
|
|
|
|
37
|
if( $self->_is_step( $tokens ) ) |
686
|
|
|
|
|
|
|
{ |
687
|
|
|
|
|
|
|
# push @$loc_path, $self->_relative_location_path( $tokens); |
688
|
5
|
|
|
|
|
26
|
$loc_path->push( $self->_relative_location_path( $tokens ) ); |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
elsif( $self->_match( $tokens, qr/\/\// ) ) |
692
|
|
|
|
|
|
|
{ |
693
|
|
|
|
|
|
|
# root |
694
|
43
|
|
|
|
|
220
|
$loc_path->push( $self->new_root ); |
695
|
43
|
|
|
|
|
287
|
my $optimised = $self->_optimise_descendant_or_self( $tokens ); |
696
|
43
|
100
|
|
|
|
208
|
if( !$optimised ) |
697
|
|
|
|
|
|
|
{ |
698
|
19
|
|
|
|
|
80
|
$loc_path->push( |
699
|
|
|
|
|
|
|
$self->new_step( $self, 'descendant-or-self', $self->_class_for( 'Step' )->TEST_NT_NODE ) |
700
|
|
|
|
|
|
|
); |
701
|
19
|
|
|
|
|
158
|
$loc_path->push( $self->_relative_location_path( $tokens ) ); |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
else |
704
|
|
|
|
|
|
|
{ |
705
|
24
|
|
|
|
|
115
|
$loc_path->push( $optimised, $self->_relative_location_path( $tokens ) ); |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
else |
709
|
|
|
|
|
|
|
{ |
710
|
31
|
|
|
|
|
181
|
$loc_path->push( $self->_relative_location_path( $tokens ) ); |
711
|
|
|
|
|
|
|
} |
712
|
79
|
|
|
|
|
599
|
return( $loc_path ); |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
sub _match |
716
|
|
|
|
|
|
|
{ |
717
|
2386
|
|
|
2386
|
|
18756
|
my( $self, $tokens, $match, $fatal ) = @_; |
718
|
|
|
|
|
|
|
# Enabling this debugging section will take a lot more time, because of the |
719
|
|
|
|
|
|
|
# $tokens->length that creates a new Module::Generic::Number every time |
720
|
|
|
|
|
|
|
# and _match gets called a lot |
721
|
|
|
|
|
|
|
# if( $self->debug ) |
722
|
|
|
|
|
|
|
# { |
723
|
|
|
|
|
|
|
# my( $p, $f, $l ) = caller; |
724
|
|
|
|
|
|
|
# } |
725
|
2386
|
|
|
|
|
3192
|
$self->{_curr_match} = ''; |
726
|
2386
|
100
|
|
|
|
6194
|
return(0) unless( $self->{_tokpos} < scalar( @$tokens ) ); |
727
|
|
|
|
|
|
|
# return(0) unless( $self->{_tokpos} < $tokens->length ); |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
# local $^W; |
730
|
1825
|
100
|
|
|
|
30253
|
if( $tokens->[ $self->{_tokpos} ] =~ /^$match$/ ) |
731
|
|
|
|
|
|
|
{ |
732
|
339
|
|
|
|
|
978
|
$self->{_curr_match} = $tokens->[ $self->{_tokpos} ]; |
733
|
339
|
|
|
|
|
501
|
$self->{_tokpos}++; |
734
|
339
|
|
|
|
|
1285
|
return(1); |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
else |
737
|
|
|
|
|
|
|
{ |
738
|
1486
|
50
|
|
|
|
2714
|
if( $fatal ) |
739
|
|
|
|
|
|
|
{ |
740
|
0
|
|
|
|
|
0
|
die( "Invalid token: ", $tokens->[$self->{_tokpos}], "\n" ); |
741
|
|
|
|
|
|
|
# return( $self->error( "Invalid token: ", $tokens->[ $self->{_tokpos} ] ) ); |
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
else |
744
|
|
|
|
|
|
|
{ |
745
|
1486
|
|
|
|
|
6129
|
return(0); |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
sub _optimise_descendant_or_self |
751
|
|
|
|
|
|
|
{ |
752
|
50
|
|
|
50
|
|
160
|
my( $self, $tokens ) = @_; |
753
|
|
|
|
|
|
|
|
754
|
50
|
|
|
|
|
127
|
my $tokpos = $self->{_tokpos}; |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# // must be followed by a Step. |
757
|
50
|
100
|
100
|
|
|
447
|
if( $tokens->[ $tokpos + 1 ] && $tokens->[ $tokpos + 1 ] eq '[' ) |
|
|
50
|
|
|
|
|
|
758
|
|
|
|
|
|
|
{ |
759
|
|
|
|
|
|
|
# next token is a predicate |
760
|
18
|
|
|
|
|
48
|
return; |
761
|
|
|
|
|
|
|
} |
762
|
|
|
|
|
|
|
elsif( $tokens->[ $tokpos ] =~ /^\.\.?$/ ) |
763
|
|
|
|
|
|
|
{ |
764
|
|
|
|
|
|
|
# abbreviatedStep - can't optimise. |
765
|
0
|
|
|
|
|
0
|
return; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
else |
768
|
|
|
|
|
|
|
{ |
769
|
32
|
|
|
|
|
134
|
my $step = $self->_step( $tokens ); |
770
|
32
|
100
|
|
|
|
199
|
if( $step->axis ne 'child' ) |
771
|
|
|
|
|
|
|
{ |
772
|
|
|
|
|
|
|
# can't optimise axes other than child for now... |
773
|
1
|
|
|
|
|
895
|
$self->{_tokpos} = $tokpos; |
774
|
1
|
|
|
|
|
10
|
return; |
775
|
|
|
|
|
|
|
} |
776
|
31
|
|
|
|
|
28161
|
$step->axis( 'descendant' ); |
777
|
31
|
|
|
|
|
28430
|
$step->axis_method( 'axis_descendant' ); |
778
|
31
|
|
|
|
|
28264
|
$self->{_tokpos}--; |
779
|
31
|
|
|
|
|
160
|
$tokens->[ $self->{_tokpos} ] = '.'; |
780
|
31
|
|
|
|
|
99
|
return( $step ); |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
sub _parse |
785
|
|
|
|
|
|
|
{ |
786
|
68
|
|
|
68
|
|
153
|
my $self = shift( @_ ); |
787
|
68
|
|
|
|
|
151
|
my $path = shift( @_ ); |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
# $context is something like: //*[@att2="vv"] |
790
|
|
|
|
|
|
|
# my $context = join( '&&', $path, map { "$_=>$self->{namespaces}->{$_}" } sort keys %{$self->{namespaces}}); |
791
|
68
|
|
|
0
|
|
295
|
my $context = $self->namespaces->keys->sort->map(sub{ sprintf( '%s=>%s', $_, $self->namespaces->get( $_ ) ); })->prepend( $path )->join( '&&' ); |
|
0
|
|
|
|
|
0
|
|
792
|
|
|
|
|
|
|
|
793
|
68
|
100
|
|
|
|
3600439
|
return( $CACHE->{ $context } ) if( $CACHE->{ $context } ); |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# my $tokens = $self->_tokenize( $path ) || return( $self->pass_error ); |
796
|
56
|
|
|
|
|
2064
|
my $tokens = $self->_tokenize( $path ); |
797
|
|
|
|
|
|
|
|
798
|
56
|
|
|
|
|
153
|
$self->{_tokpos} = 0; |
799
|
56
|
|
|
|
|
223
|
my $tree = $self->_analyze( $tokens ); |
800
|
|
|
|
|
|
|
|
801
|
56
|
50
|
|
|
|
350
|
if( $self->{_tokpos} < $tokens->length ) |
802
|
|
|
|
|
|
|
{ |
803
|
|
|
|
|
|
|
# didn't manage to parse entire expression - throw an exception |
804
|
0
|
|
|
|
|
0
|
die "Parse of expression $path failed - junk after end of expression: $tokens->[$self->{_tokpos}]"; |
805
|
|
|
|
|
|
|
# return( $self->error( "Parse of expression $path failed - junk after end of expression: $tokens->[$self->{_tokpos}]" ) ); |
806
|
|
|
|
|
|
|
} |
807
|
|
|
|
|
|
|
|
808
|
56
|
|
|
|
|
2269211
|
$tree->{uses_namespaces} = $self->{uses_namespaces}; |
809
|
56
|
|
|
|
|
7354
|
$tree->{strict_namespaces} = $self->{strict_namespaces}; |
810
|
|
|
|
|
|
|
|
811
|
56
|
|
|
|
|
237
|
$CACHE->{ $context } = $tree; |
812
|
|
|
|
|
|
|
|
813
|
56
|
50
|
|
|
|
597
|
if( $self->debug ) |
814
|
|
|
|
|
|
|
{ |
815
|
0
|
|
|
|
|
0
|
my( $p, $f, $l ) = caller; |
816
|
|
|
|
|
|
|
} |
817
|
56
|
|
|
|
|
1575
|
return( $tree ); |
818
|
|
|
|
|
|
|
} |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
sub _relative_location_path |
821
|
|
|
|
|
|
|
{ |
822
|
89
|
|
|
89
|
|
243
|
my( $self, $tokens ) = @_; |
823
|
89
|
|
|
|
|
157
|
my @steps; |
824
|
|
|
|
|
|
|
|
825
|
89
|
|
|
|
|
316
|
push( @steps, $self->_step( $tokens ) ); |
826
|
89
|
|
|
|
|
828
|
while( $self->_match( $tokens, qr/\/\/?/ ) ) |
827
|
|
|
|
|
|
|
{ |
828
|
23
|
100
|
|
|
|
162
|
if( $self->{_curr_match} eq '//' ) |
829
|
|
|
|
|
|
|
{ |
830
|
7
|
|
|
|
|
22
|
my $optimised = $self->_optimise_descendant_or_self( $tokens); |
831
|
7
|
50
|
|
|
|
22
|
if( !$optimised ) |
832
|
|
|
|
|
|
|
{ |
833
|
0
|
|
|
|
|
0
|
push( @steps, $self->new_step( $self, 'descendant-or-self', $self->_class_for( 'Step' )->TEST_NT_NODE ) ); |
834
|
|
|
|
|
|
|
} |
835
|
|
|
|
|
|
|
else |
836
|
|
|
|
|
|
|
{ |
837
|
7
|
|
|
|
|
16
|
push( @steps, $optimised ); |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
} |
840
|
23
|
|
|
|
|
82
|
push( @steps, $self->_step( $tokens ) ); |
841
|
23
|
100
|
66
|
|
|
262
|
if( @steps > 1 && |
|
|
|
66
|
|
|
|
|
842
|
|
|
|
|
|
|
$steps[-1]->axis eq 'self' && |
843
|
|
|
|
|
|
|
$steps[-1]->test == $self->_class_for( 'Step' )->TEST_NT_NODE ) |
844
|
|
|
|
|
|
|
{ |
845
|
7
|
|
|
|
|
68
|
pop( @steps ); |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
} |
848
|
89
|
|
|
|
|
500
|
return( @steps ); |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
|
851
|
1443
|
|
|
1443
|
|
4243
|
sub _set_context_pos { return( shift->_set_get_scalar( 'context_pos', @_ ) ); } |
852
|
|
|
|
|
|
|
|
853
|
936
|
|
|
936
|
|
2715
|
sub _set_context_set { return( shift->_set_get_scalar( 'context_set', @_ ) ); } |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
sub _step |
856
|
|
|
|
|
|
|
{ |
857
|
144
|
|
|
144
|
|
315
|
my( $self, $tokens ) = @_; |
858
|
144
|
100
|
|
|
|
559
|
if( $self->_match( $tokens, qr/\./ ) ) |
|
|
100
|
|
|
|
|
|
859
|
|
|
|
|
|
|
{ |
860
|
|
|
|
|
|
|
# self::node() |
861
|
40
|
|
|
|
|
203
|
return( $self->new_step( $self, 'self', $self->_class_for( 'Step' )->TEST_NT_NODE ) ); |
862
|
|
|
|
|
|
|
} |
863
|
|
|
|
|
|
|
elsif( $self->_match( $tokens, qr/\.\./ ) ) |
864
|
|
|
|
|
|
|
{ |
865
|
|
|
|
|
|
|
# parent::node() |
866
|
1
|
|
|
|
|
22
|
return( $self->new_step( $self, 'parent', $self->_class_for( 'Step' )->TEST_NT_NODE ) ); |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
else |
869
|
|
|
|
|
|
|
{ |
870
|
|
|
|
|
|
|
# AxisSpecifier NodeTest Predicate(s?) |
871
|
103
|
|
|
|
|
428
|
my $token = $tokens->[ $self->{_tokpos} ]; |
872
|
|
|
|
|
|
|
|
873
|
103
|
|
|
|
|
190
|
my $step; |
874
|
103
|
50
|
|
|
|
3511
|
if( $token eq 'processing-instruction' ) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
875
|
|
|
|
|
|
|
{ |
876
|
0
|
|
|
|
|
0
|
$self->{_tokpos}++; |
877
|
0
|
|
|
|
|
0
|
$self->_match( $tokens, qr/\(/, 1 ); |
878
|
0
|
|
|
|
|
0
|
$self->_match( $tokens, $LITERAL_RE ); |
879
|
0
|
|
|
|
|
0
|
$self->{_curr_match} =~ /^["'](.*)["']$/; |
880
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
881
|
|
|
|
|
|
|
$self, 'child', |
882
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_PI, |
883
|
|
|
|
|
|
|
$self->new_literal( $1 ) |
884
|
|
|
|
|
|
|
); |
885
|
0
|
|
|
|
|
0
|
$self->_match( $tokens, qr/\)/, 1 ); |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
elsif( $token =~ /^\@($NC_WILD_RE|$QNAME_RE|$QN_WILD_RE)$/o ) |
888
|
|
|
|
|
|
|
{ |
889
|
32
|
|
|
|
|
103
|
$self->{_tokpos}++; |
890
|
32
|
100
|
|
|
|
774
|
if( $token eq '@*' ) |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
891
|
|
|
|
|
|
|
{ |
892
|
3
|
|
|
|
|
9
|
$step = $self->new_step( |
893
|
|
|
|
|
|
|
$self, 'attribute', |
894
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_ATTR_ANY, |
895
|
|
|
|
|
|
|
'*' |
896
|
|
|
|
|
|
|
); |
897
|
|
|
|
|
|
|
} |
898
|
|
|
|
|
|
|
elsif( $token =~ /^\@($NC_NAME_RE):\*$/o ) |
899
|
|
|
|
|
|
|
{ |
900
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
901
|
|
|
|
|
|
|
$self, 'attribute', |
902
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_ATTR_NCWILD, |
903
|
|
|
|
|
|
|
$1 |
904
|
|
|
|
|
|
|
); |
905
|
|
|
|
|
|
|
} |
906
|
|
|
|
|
|
|
elsif( $token =~ /^\@($QNAME_RE)$/o ) |
907
|
|
|
|
|
|
|
{ |
908
|
29
|
|
|
|
|
119
|
$step = $self->new_step( |
909
|
|
|
|
|
|
|
$self, 'attribute', |
910
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_ATTR_QNAME, |
911
|
|
|
|
|
|
|
$1 |
912
|
|
|
|
|
|
|
); |
913
|
|
|
|
|
|
|
} |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
# ns:* |
916
|
|
|
|
|
|
|
elsif( $token =~ /^($NC_NAME_RE):\*$/o ) |
917
|
|
|
|
|
|
|
{ |
918
|
0
|
|
|
|
|
0
|
$self->{_tokpos}++; |
919
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
920
|
|
|
|
|
|
|
$self, 'child', |
921
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NCWILD, |
922
|
|
|
|
|
|
|
$1 |
923
|
|
|
|
|
|
|
); |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
# * |
926
|
|
|
|
|
|
|
elsif( $token =~ /^$QN_WILD_RE$/o ) |
927
|
|
|
|
|
|
|
{ |
928
|
11
|
|
|
|
|
42
|
$self->{_tokpos}++; |
929
|
11
|
|
|
|
|
52
|
$step = $self->new_step( |
930
|
|
|
|
|
|
|
$self, 'child', |
931
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_ANY, |
932
|
|
|
|
|
|
|
$token |
933
|
|
|
|
|
|
|
); |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
# name:name |
936
|
|
|
|
|
|
|
elsif( $token =~ /^$QNAME_RE$/o ) |
937
|
|
|
|
|
|
|
{ |
938
|
53
|
|
|
|
|
133
|
$self->{_tokpos}++; |
939
|
53
|
|
|
|
|
188
|
$step = $self->new_step( |
940
|
|
|
|
|
|
|
$self, 'child', |
941
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_QNAME, |
942
|
|
|
|
|
|
|
$token |
943
|
|
|
|
|
|
|
); |
944
|
|
|
|
|
|
|
} |
945
|
|
|
|
|
|
|
elsif( $token eq 'comment()' ) |
946
|
|
|
|
|
|
|
{ |
947
|
0
|
|
|
|
|
0
|
$self->{_tokpos}++; |
948
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
949
|
|
|
|
|
|
|
$self, 'child', |
950
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_COMMENT |
951
|
|
|
|
|
|
|
); |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
elsif( $token eq 'text()' ) |
954
|
|
|
|
|
|
|
{ |
955
|
0
|
|
|
|
|
0
|
$self->{_tokpos}++; |
956
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
957
|
|
|
|
|
|
|
$self, 'child', |
958
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_TEXT |
959
|
|
|
|
|
|
|
); |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
elsif( $token eq 'node()' ) |
962
|
|
|
|
|
|
|
{ |
963
|
0
|
|
|
|
|
0
|
$self->{_tokpos}++; |
964
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
965
|
|
|
|
|
|
|
$self, 'child', |
966
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_NODE |
967
|
|
|
|
|
|
|
); |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
elsif( $token eq 'processing-instruction()' ) |
970
|
|
|
|
|
|
|
{ |
971
|
0
|
|
|
|
|
0
|
$self->{_tokpos}++; |
972
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
973
|
|
|
|
|
|
|
$self, 'child', |
974
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_PI |
975
|
|
|
|
|
|
|
); |
976
|
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
elsif( $token =~ /^$AXIS_NAME_RE($NC_WILD_RE|$QNAME_RE|$QN_WILD_RE|$NODE_TYPE_RE)$/o ) |
978
|
|
|
|
|
|
|
{ |
979
|
7
|
|
|
|
|
27
|
my $axis = $1; |
980
|
7
|
|
|
|
|
22
|
$self->{_tokpos}++; |
981
|
7
|
|
|
|
|
18
|
$token = $2; |
982
|
7
|
50
|
|
|
|
172
|
if( $token eq 'processing-instruction' ) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
983
|
|
|
|
|
|
|
{ |
984
|
0
|
|
|
|
|
0
|
$self->_match( $tokens, qr/\(/, 1 ); |
985
|
0
|
|
|
|
|
0
|
$self->_match( $tokens, $LITERAL_RE ); |
986
|
0
|
|
|
|
|
0
|
$self->{_curr_match} =~ /^["'](.*)["']$/; |
987
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
988
|
|
|
|
|
|
|
$self, $axis, |
989
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_PI, |
990
|
|
|
|
|
|
|
HTML::Object::XPath::Literal->new( $1 ) |
991
|
|
|
|
|
|
|
); |
992
|
0
|
|
|
|
|
0
|
$self->_match( $tokens, qr/\)/, 1 ); |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
# ns:* |
995
|
|
|
|
|
|
|
elsif( $token =~ /^($NC_NAME_RE):\*$/o ) |
996
|
|
|
|
|
|
|
{ |
997
|
0
|
0
|
|
|
|
0
|
$step = $self->new_step( |
998
|
|
|
|
|
|
|
$self, $axis, |
999
|
|
|
|
|
|
|
( ( $axis eq 'attribute' ) |
1000
|
|
|
|
|
|
|
? $self->_class_for( 'Step' )->TEST_ATTR_NCWILD |
1001
|
|
|
|
|
|
|
: $self->_class_for( 'Step' )->TEST_NCWILD |
1002
|
|
|
|
|
|
|
), |
1003
|
|
|
|
|
|
|
$1 |
1004
|
|
|
|
|
|
|
); |
1005
|
|
|
|
|
|
|
} |
1006
|
|
|
|
|
|
|
# * |
1007
|
|
|
|
|
|
|
elsif( $token =~ /^$QN_WILD_RE$/o ) |
1008
|
|
|
|
|
|
|
{ |
1009
|
1
|
50
|
|
|
|
7
|
$step = $self->new_step( |
1010
|
|
|
|
|
|
|
$self, $axis, |
1011
|
|
|
|
|
|
|
( ( $axis eq 'attribute' ) |
1012
|
|
|
|
|
|
|
? $self->_class_for( 'Step' )->TEST_ATTR_ANY |
1013
|
|
|
|
|
|
|
: $self->_class_for( 'Step' )->TEST_ANY |
1014
|
|
|
|
|
|
|
), |
1015
|
|
|
|
|
|
|
$token |
1016
|
|
|
|
|
|
|
); |
1017
|
|
|
|
|
|
|
} |
1018
|
|
|
|
|
|
|
# name:name |
1019
|
|
|
|
|
|
|
elsif( $token =~ /^$QNAME_RE$/o ) |
1020
|
|
|
|
|
|
|
{ |
1021
|
6
|
50
|
|
|
|
29
|
$step = $self->new_step( |
1022
|
|
|
|
|
|
|
$self, $axis, |
1023
|
|
|
|
|
|
|
( ( $axis eq 'attribute' ) |
1024
|
|
|
|
|
|
|
? $self->_class_for( 'Step' )->TEST_ATTR_QNAME |
1025
|
|
|
|
|
|
|
: $self->_class_for( 'Step' )->TEST_QNAME |
1026
|
|
|
|
|
|
|
), |
1027
|
|
|
|
|
|
|
$token |
1028
|
|
|
|
|
|
|
); |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
elsif( $token eq 'comment()' ) |
1031
|
|
|
|
|
|
|
{ |
1032
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
1033
|
|
|
|
|
|
|
$self, $axis, |
1034
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_COMMENT |
1035
|
|
|
|
|
|
|
); |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
elsif( $token eq 'text()' ) |
1038
|
|
|
|
|
|
|
{ |
1039
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
1040
|
|
|
|
|
|
|
$self, $axis, |
1041
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_TEXT |
1042
|
|
|
|
|
|
|
); |
1043
|
|
|
|
|
|
|
} |
1044
|
|
|
|
|
|
|
elsif( $token eq 'node()' ) |
1045
|
|
|
|
|
|
|
{ |
1046
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
1047
|
|
|
|
|
|
|
$self, $axis, |
1048
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_NODE |
1049
|
|
|
|
|
|
|
); |
1050
|
|
|
|
|
|
|
} |
1051
|
|
|
|
|
|
|
elsif( $token eq 'processing-instruction()' ) |
1052
|
|
|
|
|
|
|
{ |
1053
|
0
|
|
|
|
|
0
|
$step = $self->new_step( |
1054
|
|
|
|
|
|
|
$self, $axis, |
1055
|
|
|
|
|
|
|
$self->_class_for( 'Step' )->TEST_NT_PI |
1056
|
|
|
|
|
|
|
); |
1057
|
|
|
|
|
|
|
} |
1058
|
|
|
|
|
|
|
else |
1059
|
|
|
|
|
|
|
{ |
1060
|
0
|
|
|
|
|
0
|
die( "Shouldn't get here" ); |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
} |
1063
|
|
|
|
|
|
|
else |
1064
|
|
|
|
|
|
|
{ |
1065
|
0
|
|
|
|
|
0
|
die( "token $token does not match format of a 'Step'\n" ); |
1066
|
|
|
|
|
|
|
} |
1067
|
|
|
|
|
|
|
|
1068
|
103
|
|
|
|
|
1093
|
while( $self->_match( $tokens, qr/\[/ ) ) |
1069
|
|
|
|
|
|
|
{ |
1070
|
29
|
|
|
|
|
149
|
push( @{$step->{predicates}}, $self->_expr( $tokens ) ); |
|
29
|
|
|
|
|
184
|
|
1071
|
29
|
|
|
|
|
196
|
$self->_match( $tokens, qr/\]/, 1 ); |
1072
|
|
|
|
|
|
|
} |
1073
|
103
|
|
|
|
|
423
|
return( $step ); |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
} |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
sub _tokenize |
1078
|
|
|
|
|
|
|
{ |
1079
|
56
|
|
|
56
|
|
165
|
my $self = shift( @_ ); |
1080
|
56
|
|
|
|
|
150
|
my $path = shift( @_ ); |
1081
|
56
|
|
|
|
|
264
|
my $tokens = $self->new_array; |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
# Bug: We do not allow "'@' NodeType" which is in the grammar, but I think is just plain stupid. |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
# used to desambiguate conflicts (for REs) |
1087
|
56
|
|
|
|
|
1114
|
my $expected = ''; |
1088
|
|
|
|
|
|
|
|
1089
|
56
|
|
|
|
|
205
|
while( length( $path ) ) |
1090
|
|
|
|
|
|
|
{ |
1091
|
522
|
|
|
|
|
2675
|
my $token = ''; |
1092
|
522
|
100
|
66
|
|
|
8022
|
if( $expected eq 'RE' && ( $path =~ m{\G\s*($REGEXP_RE $REGEXP_MOD_RE?)\s*}gcxso ) ) |
|
|
100
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
{ |
1094
|
|
|
|
|
|
|
# special case: regexp expected after =~ or !~, regular parsing rules do not apply |
1095
|
|
|
|
|
|
|
# ( the / is now the regexp delimiter) |
1096
|
3
|
|
|
|
|
12
|
$token = $1; |
1097
|
3
|
|
|
|
|
10
|
$expected = ''; |
1098
|
|
|
|
|
|
|
} |
1099
|
|
|
|
|
|
|
elsif( $path =~ m/\G |
1100
|
|
|
|
|
|
|
\s* # ignore all whitespace |
1101
|
|
|
|
|
|
|
( # tokens |
1102
|
|
|
|
|
|
|
$LITERAL_RE| |
1103
|
|
|
|
|
|
|
$NUMBER_RE| # digits |
1104
|
|
|
|
|
|
|
\.\.| # parent |
1105
|
|
|
|
|
|
|
\.| # current |
1106
|
|
|
|
|
|
|
($AXIS_NAME_RE)?$NODE_TYPE_RE| # tests |
1107
|
|
|
|
|
|
|
processing-instruction| |
1108
|
|
|
|
|
|
|
\@($NC_WILD_RE|$QNAME_RE|$QN_WILD_RE)| # attrib |
1109
|
|
|
|
|
|
|
\$$QNAME_RE| # variable reference |
1110
|
|
|
|
|
|
|
($AXIS_NAME_RE)?($NC_WILD_RE|$QNAME_RE|$QN_WILD_RE)| # NCName,NodeType,Axis::Test |
1111
|
|
|
|
|
|
|
\!=|<=|\-|>=|\/\/|and|or|mod|div| # multi-char seps |
1112
|
|
|
|
|
|
|
=~|\!~| # regexp (not in the XPath spec) |
1113
|
|
|
|
|
|
|
[,\+=\|<>\/\(\[\]\)]| # single char seps |
1114
|
|
|
|
|
|
|
(?<!(\@|\(|\[))\*| # multiply operator rules (see xpath spec) |
1115
|
|
|
|
|
|
|
(?<!::)\*| |
1116
|
|
|
|
|
|
|
$ # end of query |
1117
|
|
|
|
|
|
|
) |
1118
|
|
|
|
|
|
|
\s* # ignore all whitespace |
1119
|
|
|
|
|
|
|
/gcxso ) |
1120
|
|
|
|
|
|
|
{ |
1121
|
463
|
|
|
|
|
851
|
$token = $1; |
1122
|
463
|
100
|
|
|
|
813
|
$expected = ( $token =~ m{^[=!]~$} ) ? 'RE' : ''; |
1123
|
|
|
|
|
|
|
} |
1124
|
|
|
|
|
|
|
else |
1125
|
|
|
|
|
|
|
{ |
1126
|
56
|
|
|
|
|
173
|
$token = ''; |
1127
|
56
|
|
|
|
|
136
|
last; |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
|
1130
|
466
|
100
|
|
|
|
874
|
if( length( $token ) ) |
1131
|
|
|
|
|
|
|
{ |
1132
|
|
|
|
|
|
|
# push( @tokens, $token ); |
1133
|
410
|
|
|
|
|
904
|
$tokens->push( $token ); |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
} |
1136
|
|
|
|
|
|
|
|
1137
|
56
|
50
|
|
|
|
193
|
if( pos( $path ) < length( $path ) ) |
1138
|
|
|
|
|
|
|
{ |
1139
|
0
|
|
|
|
|
0
|
my $marker = ( '.' x ( pos( $path ) -1 ) ); |
1140
|
0
|
|
|
|
|
0
|
$path = substr( $path, 0, pos( $path ) + 8 ) . '...'; |
1141
|
0
|
|
|
|
|
0
|
$path =~ s/\n/ /g; |
1142
|
0
|
|
|
|
|
0
|
$path =~ s/\t/ /g; |
1143
|
0
|
|
|
|
|
0
|
die "Query:\n", "$path\n", $marker, "^^^\n", "Invalid query somewhere around here (I think)\n"; |
1144
|
|
|
|
|
|
|
# return( $self->error( "Query:\n", "$path\n", $marker, "^^^\n", "Invalid query somewhere around here (I think)" ) ); |
1145
|
|
|
|
|
|
|
} |
1146
|
|
|
|
|
|
|
# return( \@tokens ); |
1147
|
56
|
|
|
|
|
159
|
return( $tokens ); |
1148
|
|
|
|
|
|
|
} |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
sub _xml_escape_text |
1151
|
|
|
|
|
|
|
{ |
1152
|
0
|
|
|
0
|
|
|
my( $self, $text ) = @_; |
1153
|
0
|
|
|
|
|
|
my $entities = { '&' => '&', '<' => '<', '>' => '>', '"' => '"e;' }; |
1154
|
0
|
|
|
|
|
|
$text =~ s{([&<>])}{$entities->{$1}}g; |
1155
|
0
|
|
|
|
|
|
return( $text ); |
1156
|
|
|
|
|
|
|
} |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
1; |
1159
|
|
|
|
|
|
|
# NOTE: POD |
1160
|
|
|
|
|
|
|
__END__ |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
=encoding utf-8 |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
=head1 NAME |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
HTML::Object::XPath - HTML Object XPath Class |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
=head1 SYNOPSIS |
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
use HTML::Object; |
1171
|
|
|
|
|
|
|
use HTML::Object::XQuery; |
1172
|
|
|
|
|
|
|
use HTML::Object::XPath; |
1173
|
|
|
|
|
|
|
my $this = HTML::Object::XPath->new || die( HTML::Object::XPath->error, "\n" ); |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
my $p = HTML::Object->new; |
1176
|
|
|
|
|
|
|
my $doc = $p->parse_file( $path_to_html_file ) || die( $p->error ); |
1177
|
|
|
|
|
|
|
# Returns a list of HTML::Object::Element objects matching the select, which is |
1178
|
|
|
|
|
|
|
# converted into a xpath |
1179
|
|
|
|
|
|
|
my @nodes = $doc->find( 'p' ); |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
# or directly: |
1182
|
|
|
|
|
|
|
use HTML::Object::XPath; |
1183
|
|
|
|
|
|
|
my $xp = use HTML::Object::XPath->new; |
1184
|
|
|
|
|
|
|
my @nodes = $xp->findnodes( $xpath, $element_object ); |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
=head1 VERSION |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
v0.2.0 |
1189
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=head1 DESCRIPTION |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
This module implements the XPath engine used by L<HTML::Object::XQuery> to provide a jQuery-like interface to query the parsed DOM object. |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
=head1 METHODS |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
=head2 clear_namespaces |
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
Clears all previously set namespace mappings. |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
=head2 exists |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
Provided with a C<path> and a C<context> and this returns true if the given path exists. |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
=head2 findnodes |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
Provided with a C<path> and a C<context> this returns a list of nodes found by C<path>, optionally in context C<context>. |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
In scalar context it returns an HTML::Object::XPath::NodeSet object. |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
=head2 findnodes_as_string |
1211
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
Provided with a C<path> and a C<context> and this returns the nodes found as a single string. The result is not guaranteed to be valid HTML though (it could for example be just text if the query returns attribute values). |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
=head2 findnodes_as_strings |
1215
|
|
|
|
|
|
|
|
1216
|
|
|
|
|
|
|
Provided with a C<path> and a C<context> and this returns the nodes found as a list of strings, one per node found. |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
=head2 findvalue |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
Provided with a C<path> and a C<context> and this returns the result as a string (the concatenation of the values of the result nodes). |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
=head2 findvalues |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
Provided with a C<path> and a C<context> and this returns the values of the result nodes as a list of strings. |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
=head2 matches($node, $path, $context) |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
Provided with a C<node> L<object|HTML::Object::Element>, C<path> and a C<context> and this returns true if the node matches the path. |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
=head2 find |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
Provided with a C<path> and a C<context> and this returns either a L<HTML::Object::XPath::NodeSet> object containing the nodes it found (or empty if no nodes matched the path), or one of L<HTML::Object::XPath::Literal> (a string), L<HTML::Object::XPath::Number>, or L<HTML::Object::XPath::Boolean>. It should always return something - and you can use ->isa() to find out what it returned. If you need to check how many nodes it found you should check $nodeset->size. |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
See L<HTML::Object::XPath::NodeSet>. |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
=head2 get_namespace ($prefix, $node) |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
Provided with a C<prefix> and a C<node> L<object|HTML::Object::Element> and this returns the uri associated to the prefix for the node (mostly for internal usage) |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
=head2 get_var |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
Provided with a variable name, and this returns the value of the XPath variable (mostly for internal usage) |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
=head2 getNodeText |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
Provided with a C<path> and this returns the text string for a particular node. It returns a string, or C<undef> if the node does not exist. |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
=head2 namespaces |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
Sets or gets an hash reference of namespace attributes. |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
=head2 new_expr |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::Expr>, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
=head2 new_function |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::Function> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=head2 new_literal |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::Literal> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
=head2 new_location_path |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::LocationPath> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=head2 new_nodeset |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::NodeSet> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
=head2 new_number |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::Number> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
=head2 new_root |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::Root> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
=head2 new_step |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::Step> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
=head2 new_variable |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
Create a new L<HTML::Object::XPath::Variable> object, passing it whatever argument was provided, and returns the newly instantiated object, or C<undef> upon L<error|Module::Generic/error> |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
=head2 set_namespace |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
Provided with a C<prefix> and an C<uri> and this sets the namespace prefix mapping to the uri. |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
Normally in L<HTML::Object::XPath> the prefixes in XPath node tests take their context from the current node. This means that foo:bar will always match an element <foo:bar> regardless of the namespace that the prefix foo is mapped to (which might even change within the document, resulting in unexpected results). In order to make prefixes in XPath node tests actually map to a real URI, you need to enable that via a call to the set_namespace method of your HTML::Object::XPath object. |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
=head2 parse |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
Provided with an XPath expression and this returns a new L<HTML::Object::XPath::Expr> object that can then be used repeatedly. |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
You can create an XPath expression from a CSS selector expression using L<HTML::selector::XPath> |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
=head2 set_strict_namespaces |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
Takes a boolean value. |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
By default, for historical as well as convenience reasons, L<HTML::Object::XPath> has a slightly non-standard way of dealing with the default namespace. |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
If you search for C<//tag> it will return elements C<tag>. As far as I understand it, if the document has a default namespace, this should not return anything. You would have to first do a C<set_namespace>, and then search using the namespace. |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
Passing a true value to C<set_strict_namespaces> will activate this behaviour, passing a false value will return it to its default behaviour. |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
=head2 set_var |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
Provided with a variable name and its value and this sets an XPath variable (that can be used in queries as C<$var>) |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
=head1 NODE STRUCTURE |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
All nodes have the same first 2 entries in the array: node_parent and node_pos. The type of the node is determined using the ref() function. |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
The node_parent always contains an entry for the parent of the current node - except for the root node which has undef in there. And node_pos is the position of this node in the array that it is in (think: $node == $node->[node_parent]->[node_children]->[$node->[node_pos]] ) |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
Nodes are structured as follows: |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
=head2 Root Node |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
The L<root node|HTML::Object::Root> is just an element node with no parent. |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
[ |
1327
|
|
|
|
|
|
|
undef, # node_parent - check for undef to identify root node |
1328
|
|
|
|
|
|
|
undef, # node_pos |
1329
|
|
|
|
|
|
|
undef, # node_prefix |
1330
|
|
|
|
|
|
|
[ ... ], # node_children (see below) |
1331
|
|
|
|
|
|
|
] |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
=head2 L<Element|HTML::Object::Element> Node |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
[ |
1336
|
|
|
|
|
|
|
$parent, # node_parent |
1337
|
|
|
|
|
|
|
<position in current array>, # node_pos |
1338
|
|
|
|
|
|
|
'xxx', # node_prefix - namespace prefix on this element |
1339
|
|
|
|
|
|
|
[ ... ], # node_children |
1340
|
|
|
|
|
|
|
'yyy', # node_name - element tag name |
1341
|
|
|
|
|
|
|
[ ... ], # node_attribs - attributes on this element |
1342
|
|
|
|
|
|
|
[ ... ], # node_namespaces - namespaces currently in scope |
1343
|
|
|
|
|
|
|
] |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
=head2 L<Attribute|HTML::Object::Attribute> Node |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
[ |
1348
|
|
|
|
|
|
|
$parent, # node_parent - the element node |
1349
|
|
|
|
|
|
|
<position in current array>, # node_pos |
1350
|
|
|
|
|
|
|
'xxx', # node_prefix - namespace prefix on this element |
1351
|
|
|
|
|
|
|
'href', # node_key - attribute name |
1352
|
|
|
|
|
|
|
'ftp://ftp.com/', # node_value - value in the node |
1353
|
|
|
|
|
|
|
] |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
=head2 L<Text|HTML::Object::Text> Nodes |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
[ |
1358
|
|
|
|
|
|
|
$parent, |
1359
|
|
|
|
|
|
|
<pos>, |
1360
|
|
|
|
|
|
|
'This is some text' # node_text - the text in the node |
1361
|
|
|
|
|
|
|
] |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
=head2 L<Comment|HTML::Object::Comment> Nodes |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
[ |
1366
|
|
|
|
|
|
|
$parent, |
1367
|
|
|
|
|
|
|
<pos>, |
1368
|
|
|
|
|
|
|
'This is a comment' # node_comment |
1369
|
|
|
|
|
|
|
] |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=head1 AUTHOR |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
Jacques Deguest E<lt>F<jack@deguest.jp>E<gt> |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
=head1 SEE ALSO |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
L<HTML::Object::XPath::Boolean>, L<HTML::Object::XPath::Expr>, L<HTML::Object::XPath::Function>, L<HTML::Object::XPath::Literal>, L<HTML::Object::XPath::LocationPath>, L<HTML::Object::XPath::NodeSet>, L<HTML::Object::XPath::Number>, L<HTML::Object::XPath::Root>, L<HTML::Object::XPath::Step>, L<HTML::Object::XPath::Variable> |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/XPath/Introduction_to_using_XPath_in_JavaScript> |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
Copyright(c) 2021 DEGUEST Pte. Ltd. |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
You can use, copy, modify and redistribute this package and associated files under the same terms as Perl itself. |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
=cut |