| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package HTML::Query; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.08'; |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
use Badger::Class |
|
6
|
11
|
|
|
|
|
266
|
version => $VERSION, |
|
7
|
|
|
|
|
|
|
debug => 0, |
|
8
|
|
|
|
|
|
|
base => 'Badger::Base', |
|
9
|
|
|
|
|
|
|
utils => 'blessed', |
|
10
|
|
|
|
|
|
|
import => 'class CLASS', |
|
11
|
|
|
|
|
|
|
vars => 'AUTOLOAD', |
|
12
|
|
|
|
|
|
|
constants => 'ARRAY', |
|
13
|
|
|
|
|
|
|
constant => { |
|
14
|
|
|
|
|
|
|
ELEMENT => 'HTML::Element', |
|
15
|
|
|
|
|
|
|
BUILDER => 'HTML::TreeBuilder', |
|
16
|
|
|
|
|
|
|
}, |
|
17
|
|
|
|
|
|
|
exports => { |
|
18
|
|
|
|
|
|
|
any => 'Query', |
|
19
|
|
|
|
|
|
|
hooks => { |
|
20
|
|
|
|
|
|
|
query => \&_export_query_to_element, |
|
21
|
|
|
|
|
|
|
}, |
|
22
|
|
|
|
|
|
|
}, |
|
23
|
|
|
|
|
|
|
messages => { |
|
24
|
|
|
|
|
|
|
no_elements => 'No elements specified to query', |
|
25
|
|
|
|
|
|
|
no_query => 'No query specified', |
|
26
|
|
|
|
|
|
|
no_source => 'No argument specified for source: %s', |
|
27
|
|
|
|
|
|
|
bad_element => 'Invalid element specified: %s', |
|
28
|
|
|
|
|
|
|
bad_source => 'Invalid source specified: %s', |
|
29
|
|
|
|
|
|
|
bad_query => 'Invalid query specified: %s', |
|
30
|
|
|
|
|
|
|
bad_spec => 'Invalid specification "%s" in query: %s', |
|
31
|
|
|
|
|
|
|
is_empty => 'The query does not contain any elements', |
|
32
|
11
|
|
|
11
|
|
1957925
|
}; |
|
|
11
|
|
|
|
|
20764
|
|
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
our $SOURCES = { |
|
35
|
|
|
|
|
|
|
text => sub { |
|
36
|
|
|
|
|
|
|
class(BUILDER)->load; |
|
37
|
|
|
|
|
|
|
BUILDER->new_from_content(shift); |
|
38
|
|
|
|
|
|
|
}, |
|
39
|
|
|
|
|
|
|
file => sub { |
|
40
|
|
|
|
|
|
|
class(BUILDER)->load; |
|
41
|
|
|
|
|
|
|
BUILDER->new_from_file(shift); |
|
42
|
|
|
|
|
|
|
}, |
|
43
|
|
|
|
|
|
|
tree => sub { |
|
44
|
|
|
|
|
|
|
$_[0] |
|
45
|
|
|
|
|
|
|
}, |
|
46
|
|
|
|
|
|
|
query => sub { |
|
47
|
|
|
|
|
|
|
ref $_[0] eq ARRAY |
|
48
|
|
|
|
|
|
|
? @{ $_[0] } |
|
49
|
|
|
|
|
|
|
: $_[0]; |
|
50
|
|
|
|
|
|
|
}, |
|
51
|
|
|
|
|
|
|
}; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub Query (@) { |
|
54
|
48
|
|
|
48
|
1
|
90876
|
CLASS->new(@_); |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub new { |
|
58
|
167
|
|
|
167
|
1
|
16020
|
my $class = shift; |
|
59
|
167
|
|
|
|
|
187
|
my ($element, @elements, $type, $code, $select); |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# expand a single list ref into items |
|
62
|
167
|
100
|
100
|
|
|
661
|
unshift @_, @{ shift @_ } |
|
|
2
|
|
|
|
|
7
|
|
|
63
|
|
|
|
|
|
|
if @_ == 1 && ref $_[0] eq ARRAY; |
|
64
|
|
|
|
|
|
|
|
|
65
|
167
|
|
66
|
|
|
514
|
$class = ref $class || $class; |
|
66
|
|
|
|
|
|
|
|
|
67
|
167
|
|
|
|
|
798
|
my $self = { |
|
68
|
|
|
|
|
|
|
error => undef, |
|
69
|
|
|
|
|
|
|
suppress_errors => undef, |
|
70
|
|
|
|
|
|
|
match_self => undef, |
|
71
|
|
|
|
|
|
|
elements => \@elements, |
|
72
|
|
|
|
|
|
|
specificity => {} |
|
73
|
|
|
|
|
|
|
}; |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# each element should be an HTML::Element object, although we might |
|
76
|
|
|
|
|
|
|
# want to subclass this module to recognise a different kind of object, |
|
77
|
|
|
|
|
|
|
# so we get the element class from the ELEMENT constant method which a |
|
78
|
|
|
|
|
|
|
# subclass can re-define. |
|
79
|
167
|
|
|
|
|
586
|
my $element_class = $class->ELEMENT; |
|
80
|
|
|
|
|
|
|
|
|
81
|
167
|
|
|
|
|
376
|
while (@_) { |
|
82
|
287
|
|
|
|
|
327
|
$element = shift; |
|
83
|
287
|
|
|
|
|
264
|
$class->debug("argument: ".$element) if DEBUG; |
|
84
|
|
|
|
|
|
|
|
|
85
|
287
|
100
|
|
|
|
1089
|
if (! ref $element) { |
|
|
|
50
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# a non-reference item is a source type (text, file, tree) |
|
87
|
|
|
|
|
|
|
# followed by the source, or if it's the last argument following |
|
88
|
|
|
|
|
|
|
# one ore more element options or named argument pairs then it's |
|
89
|
|
|
|
|
|
|
# a selection query |
|
90
|
19
|
100
|
|
|
|
61
|
if (@_) { |
|
|
|
100
|
|
|
|
|
|
|
91
|
12
|
|
|
|
|
18
|
$type = $element; |
|
92
|
12
|
|
50
|
|
|
93
|
$code = $SOURCES->{ $type } |
|
93
|
|
|
|
|
|
|
|| return $class->error_msg( bad_source => $type ); |
|
94
|
12
|
|
|
|
|
20
|
$element = shift; |
|
95
|
12
|
|
|
|
|
13
|
$class->debug("source $type: $element") if DEBUG; |
|
96
|
12
|
|
|
|
|
41
|
unshift(@_, $code->($element)); |
|
97
|
12
|
|
|
|
|
51462
|
next; |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
elsif (@elements) { |
|
100
|
5
|
|
|
|
|
9
|
$select = $element; |
|
101
|
5
|
|
|
|
|
8
|
last; |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
elsif (blessed $element) { |
|
105
|
|
|
|
|
|
|
# otherwise it should be an HTML::Element object or another |
|
106
|
|
|
|
|
|
|
# HTML::Query object |
|
107
|
268
|
100
|
|
|
|
863
|
if ($element->isa($element_class)) { |
|
|
|
50
|
|
|
|
|
|
|
108
|
266
|
|
|
|
|
370
|
push(@elements, $element); |
|
109
|
266
|
|
|
|
|
685
|
next; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
elsif ($element->isa($class)) { |
|
112
|
2
|
|
|
|
|
4
|
push(@elements, @{$element->get_elements}); |
|
|
2
|
|
|
|
|
8
|
|
|
113
|
2
|
|
|
|
|
6
|
next; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
2
|
|
|
|
|
19
|
return $class->error_msg( bad_element => $element ); |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
165
|
|
|
|
|
353
|
bless $self, $class; |
|
121
|
|
|
|
|
|
|
|
|
122
|
165
|
100
|
|
|
|
12810
|
return defined $select ? $self->query($select) : $self; |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub query { |
|
126
|
129
|
|
|
129
|
1
|
4117
|
my ($self, $query) = @_; |
|
127
|
129
|
|
|
|
|
146
|
my @result; |
|
128
|
129
|
|
|
|
|
165
|
my $ops = 0; |
|
129
|
129
|
|
|
|
|
151
|
my $pos = 0; |
|
130
|
|
|
|
|
|
|
|
|
131
|
129
|
|
|
|
|
193
|
$self->{error} = undef; |
|
132
|
|
|
|
|
|
|
|
|
133
|
129
|
100
|
100
|
|
|
717
|
return $self->error_msg('no_query') |
|
134
|
|
|
|
|
|
|
unless defined $query && length $query; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# multiple specs can be comma separated, e.g. "table tr td, li a, div.foo" |
|
137
|
127
|
|
|
|
|
156
|
COMMA: while (1) { |
|
138
|
|
|
|
|
|
|
# each comma-separated traversal spec is applied downward from |
|
139
|
|
|
|
|
|
|
# the source elements in the $self->{elements} query |
|
140
|
134
|
|
|
|
|
141
|
my @elements = @{$self->get_elements}; |
|
|
134
|
|
|
|
|
256
|
|
|
141
|
134
|
|
|
|
|
174
|
my $comops = 0; |
|
142
|
|
|
|
|
|
|
|
|
143
|
134
|
|
|
|
|
135
|
my $specificity = 0; |
|
144
|
134
|
|
100
|
|
|
465
|
my $startpos = pos($query) || 0; |
|
145
|
|
|
|
|
|
|
|
|
146
|
134
|
|
|
|
|
146
|
my $hack_sequence = 0; # look for '* html' |
|
147
|
|
|
|
|
|
|
|
|
148
|
134
|
|
|
|
|
127
|
warn "Starting new COMMA" if DEBUG; |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# for each whitespace delimited descendant spec we grok the correct |
|
151
|
|
|
|
|
|
|
# parameters for look_down() and apply them to each source element |
|
152
|
|
|
|
|
|
|
# e.g. "table tr td" |
|
153
|
134
|
|
|
|
|
127
|
SEQUENCE: while (1) { |
|
154
|
373
|
|
|
|
|
363
|
my @args; |
|
155
|
373
|
|
100
|
|
|
1007
|
$pos = pos($query) || 0; |
|
156
|
373
|
|
|
|
|
468
|
my $relationship = ''; |
|
157
|
373
|
|
|
|
|
343
|
my $leading_whitespace; |
|
158
|
|
|
|
|
|
|
|
|
159
|
373
|
|
|
|
|
380
|
warn "Starting new SEQUENCE" if DEBUG; |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# ignore any leading whitespace |
|
162
|
373
|
100
|
|
|
|
1270
|
if ($query =~ / \G (\s+) /cgsx) { |
|
163
|
101
|
50
|
|
|
|
228
|
$leading_whitespace = defined($1) ? 1 : 0; |
|
164
|
101
|
|
|
|
|
104
|
warn "removing leading whitespace\n" if DEBUG; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# grandchild selector is whitespace sensitive, requires leading whitespace |
|
168
|
373
|
100
|
100
|
|
|
1192
|
if ($leading_whitespace && $comops && ($query =~ / \G (\*) \s+ /cgx)) { |
|
|
|
|
100
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# can't have a relationship modifier as the first part of the query |
|
170
|
6
|
|
|
|
|
11
|
$relationship = $1; |
|
171
|
6
|
|
|
|
|
20
|
warn "relationship = $relationship\n" if DEBUG; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# get other relationship modifiers |
|
175
|
373
|
100
|
|
|
|
917
|
if ($query =~ / \G (>|\+) \s* /cgx) { |
|
176
|
|
|
|
|
|
|
# can't have a relationship modifier as the first part of the query |
|
177
|
27
|
|
|
|
|
47
|
$relationship = $1; |
|
178
|
27
|
|
|
|
|
27
|
warn "relationship = $relationship\n" if DEBUG; |
|
179
|
27
|
50
|
|
|
|
52
|
if (!$comops) { |
|
180
|
0
|
|
|
|
|
0
|
return $self->_report_error( $self->message( bad_spec => $relationship, $query ) ); |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# optional leading word is a tag name |
|
185
|
373
|
100
|
|
|
|
1035
|
if ($query =~ / \G ([\w\*]+) /cgx) { |
|
186
|
223
|
|
|
|
|
419
|
my $tag = $1; |
|
187
|
|
|
|
|
|
|
|
|
188
|
223
|
100
|
|
|
|
458
|
if ($tag =~ m/\*/) { |
|
189
|
18
|
100
|
66
|
|
|
119
|
if (($leading_whitespace || $comops == 0) && ($tag eq '*')) { |
|
|
|
|
66
|
|
|
|
|
|
190
|
14
|
|
|
|
|
14
|
warn "universal tag\n" if DEBUG; |
|
191
|
14
|
|
|
|
|
60
|
push(@args, _tag => qr/\w+/); |
|
192
|
|
|
|
|
|
|
|
|
193
|
14
|
100
|
|
|
|
47
|
if ($comops == 0) { #we need to catch the case where we see '* html' |
|
194
|
7
|
|
|
|
|
15
|
$hack_sequence++; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
else { |
|
198
|
4
|
|
|
|
|
18
|
return $self->_report_error( $self->message( bad_spec => $tag, $query ) ); |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
else { |
|
202
|
205
|
|
|
|
|
208
|
warn "html tag\n" if DEBUG; |
|
203
|
205
|
|
|
|
|
229
|
$specificity += 1; # standard tags are worth 1 point |
|
204
|
205
|
|
|
|
|
372
|
push( @args, _tag => $tag ); |
|
205
|
|
|
|
|
|
|
|
|
206
|
205
|
100
|
100
|
|
|
686
|
if ($comops == 1 && $tag eq 'html') { |
|
207
|
1
|
|
|
|
|
2
|
$hack_sequence++; |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# loop to collect a description about this specific part of the rule |
|
213
|
369
|
|
|
|
|
448
|
while (1) { |
|
214
|
519
|
|
|
|
|
607
|
my $work = scalar @args; |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# that can be followed by (or the query can start with) a #id |
|
217
|
519
|
100
|
|
|
|
2308
|
if ($query =~ / \G \# ([\w\-]+) /cgx) { |
|
218
|
39
|
|
|
|
|
56
|
$specificity += 100; |
|
219
|
39
|
|
|
|
|
105
|
push( @args, id => $1 ); |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# and/or a .class |
|
223
|
519
|
100
|
|
|
|
1314
|
if ($query =~ / \G \. ([\w\-]+) /cgx) { |
|
224
|
73
|
|
|
|
|
100
|
$specificity += 10; |
|
225
|
73
|
|
|
|
|
12316
|
push( @args, class => qr/ (^|\s+) $1 ($|\s+) /x ); |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# and/or none or more [ ] attribute specs |
|
229
|
519
|
100
|
|
|
|
1315
|
if ($query =~ / \G \[ (.*?) \] /cgx) { |
|
230
|
47
|
|
|
|
|
95
|
my $attribute = $1; |
|
231
|
47
|
|
|
|
|
49
|
$specificity += 10; |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
#if we have an operator |
|
234
|
47
|
100
|
|
|
|
269
|
if ($attribute =~ m/(.*?)\s*([\|\~]?=)\s*(.*)/) { |
|
235
|
38
|
|
|
|
|
124
|
my ($name,$attribute_op,$value) = ($1,$2,$3); |
|
236
|
|
|
|
|
|
|
|
|
237
|
38
|
50
|
33
|
|
|
199
|
unless (defined($name) && length($name)) { |
|
238
|
0
|
|
|
|
|
0
|
return $self->_report_error( $self->message( bad_spec => $name, $query ) ); |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
|
|
241
|
38
|
|
|
|
|
38
|
warn "operator $attribute_op" if DEBUG; |
|
242
|
|
|
|
|
|
|
|
|
243
|
38
|
50
|
|
|
|
71
|
if (defined $value) { |
|
244
|
38
|
|
|
|
|
64
|
for ($value) { |
|
245
|
38
|
|
|
|
|
98
|
s/^['"]//; |
|
246
|
38
|
|
|
|
|
134
|
s/['"]$//; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
38
|
100
|
|
|
|
101
|
if ($attribute_op eq '=') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
249
|
33
|
|
|
|
|
90
|
push( @args, $name => $value); |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
elsif ($attribute_op eq '|=') { |
|
252
|
2
|
|
|
|
|
24
|
push(@args, $name => qr/\b${value}-?/) |
|
253
|
|
|
|
|
|
|
} |
|
254
|
|
|
|
|
|
|
elsif ($attribute_op eq '~=') { |
|
255
|
3
|
|
|
|
|
39
|
push(@args, $name => qr/\b${value}\b/) |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
else { |
|
258
|
0
|
|
|
|
|
0
|
return $self->_report_error( $self->message( bad_spec => $attribute_op, $query ) ); |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
else { |
|
262
|
0
|
|
|
|
|
0
|
return $self->_report_error( $self->message( bad_spec => $attribute_op, $query ) ); |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
else { |
|
266
|
9
|
100
|
66
|
|
|
55
|
unless (defined($attribute) && length($attribute)) { |
|
267
|
2
|
|
|
|
|
25
|
return $self->_report_error( $self->message( bad_spec => $attribute, $query ) ); |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# add a regex to match anything (or nothing) |
|
271
|
7
|
|
|
|
|
46
|
push( @args, $attribute => qr/.*/ ); |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
# and/or one or more pseudo-classes |
|
275
|
517
|
100
|
|
|
|
957
|
if ($query =~ / \G : ([\w\-]+) /cgx) { |
|
276
|
5
|
|
|
|
|
8
|
my $pseudoclass = $1; |
|
277
|
5
|
|
|
|
|
7
|
$specificity += 10; |
|
278
|
|
|
|
|
|
|
|
|
279
|
5
|
100
|
|
|
|
14
|
if ($pseudoclass eq 'first-child') { |
|
|
|
50
|
|
|
|
|
|
|
280
|
3
|
|
|
12
|
|
37
|
push( @args, sub { ! grep { ref $_ } $_[0]->left() } ); |
|
|
12
|
|
|
|
|
347
|
|
|
|
12
|
|
|
|
|
132
|
|
|
281
|
|
|
|
|
|
|
} elsif ($pseudoclass eq 'last-child') { |
|
282
|
2
|
|
|
12
|
|
14
|
push( @args, sub { ! grep { ref $_ } $_[0]->right() } ); |
|
|
12
|
|
|
|
|
505
|
|
|
|
12
|
|
|
|
|
175
|
|
|
283
|
|
|
|
|
|
|
} else { |
|
284
|
0
|
|
|
|
|
0
|
warn "Pseudoclass :$pseudoclass not supported"; |
|
285
|
0
|
|
|
|
|
0
|
next; |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
# keep going until this particular expression is fully processed |
|
290
|
517
|
100
|
|
|
|
1098
|
last unless scalar(@args) > $work; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# we must have something in @args by now or we didn't find any |
|
294
|
|
|
|
|
|
|
# valid query specification this time around |
|
295
|
367
|
100
|
|
|
|
753
|
last SEQUENCE unless @args; |
|
296
|
|
|
|
|
|
|
|
|
297
|
239
|
|
|
|
|
234
|
$self->debug( |
|
298
|
|
|
|
|
|
|
'Parsed ', substr($query, $pos, pos($query) - $pos), |
|
299
|
|
|
|
|
|
|
' into args [', join(', ', @args), ']' |
|
300
|
|
|
|
|
|
|
) if DEBUG; |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
# we want to skip certain hack sequences like '* html' |
|
303
|
239
|
100
|
|
|
|
611
|
if ($hack_sequence == 2) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
304
|
1
|
|
|
|
|
4
|
@elements = []; # clear out our stored elements to match behaviour of modern browsers |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
# we're just looking for any descendent |
|
307
|
|
|
|
|
|
|
elsif( !$relationship ) { |
|
308
|
205
|
100
|
|
|
|
435
|
if ($self->{match_self}) { |
|
309
|
|
|
|
|
|
|
# if we are re-querying, be sure to match ourselves not just descendents |
|
310
|
2
|
|
|
|
|
5
|
@elements = map { $_->look_down(@args) } @elements; |
|
|
4
|
|
|
|
|
82
|
|
|
311
|
|
|
|
|
|
|
} else { |
|
312
|
|
|
|
|
|
|
# look_down() will match self in addition to descendents, |
|
313
|
|
|
|
|
|
|
# so we explicitly disallow matches on self as we iterate |
|
314
|
|
|
|
|
|
|
# thru the list. The other cases below already exclude self. |
|
315
|
|
|
|
|
|
|
# https://rt.cpan.org/Public/Bug/Display.html?id=58918 |
|
316
|
203
|
|
|
|
|
238
|
my @accumulator; |
|
317
|
203
|
|
|
|
|
292
|
foreach my $e (@elements) { |
|
318
|
248
|
100
|
|
|
|
3245
|
if ($e->root() == $e) { |
|
319
|
108
|
|
|
|
|
950
|
push(@accumulator, $e->look_down(@args)); |
|
320
|
|
|
|
|
|
|
} |
|
321
|
|
|
|
|
|
|
else { |
|
322
|
140
|
|
|
|
|
1718
|
push(@accumulator, grep { $_ != $e } $e->look_down(@args)); |
|
|
235
|
|
|
|
|
12438
|
|
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
} |
|
325
|
203
|
|
|
|
|
44929
|
@elements = @accumulator; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
# immediate child selector |
|
329
|
|
|
|
|
|
|
elsif( $relationship eq '>' ) { |
|
330
|
|
|
|
|
|
|
@elements = map { |
|
331
|
16
|
|
|
|
|
44
|
$_->look_down( |
|
332
|
|
|
|
|
|
|
@args, |
|
333
|
|
|
|
|
|
|
sub { |
|
334
|
61
|
|
|
61
|
|
2238
|
my $tag = shift; |
|
335
|
61
|
|
|
|
|
65
|
my $root = $_; |
|
336
|
|
|
|
|
|
|
|
|
337
|
61
|
|
|
|
|
146
|
return $tag->depth == $root->depth + 1; |
|
338
|
|
|
|
|
|
|
} |
|
339
|
|
|
|
|
|
|
) |
|
340
|
36
|
|
|
|
|
1302
|
} @elements; |
|
341
|
|
|
|
|
|
|
} |
|
342
|
|
|
|
|
|
|
# immediate sibling selector |
|
343
|
|
|
|
|
|
|
elsif( $relationship eq '+' ) { |
|
344
|
|
|
|
|
|
|
@elements = map { |
|
345
|
11
|
|
|
|
|
19
|
$_->parent->look_down( |
|
346
|
|
|
|
|
|
|
@args, |
|
347
|
|
|
|
|
|
|
sub { |
|
348
|
253
|
|
|
253
|
|
6095
|
my $tag = shift; |
|
349
|
253
|
|
|
|
|
256
|
my $root = $_; |
|
350
|
253
|
|
|
|
|
534
|
my @prev_sibling = $tag->left; |
|
351
|
|
|
|
|
|
|
# get prev next non-text sibling |
|
352
|
253
|
|
|
|
|
3845
|
foreach my $sibling (reverse @prev_sibling) { |
|
353
|
280
|
100
|
|
|
|
842
|
next unless ref $sibling; |
|
354
|
69
|
|
|
|
|
220
|
return $sibling == $root; |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
) |
|
358
|
61
|
|
|
|
|
1012
|
} @elements; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
# grandchild selector |
|
361
|
|
|
|
|
|
|
elsif( $relationship eq '*' ) { |
|
362
|
|
|
|
|
|
|
@elements = map { |
|
363
|
6
|
|
|
|
|
9
|
$_->look_down( |
|
364
|
|
|
|
|
|
|
@args, |
|
365
|
|
|
|
|
|
|
sub { |
|
366
|
40
|
|
|
40
|
|
1886
|
my $tag = shift; |
|
367
|
40
|
|
|
|
|
41
|
my $root = $_; |
|
368
|
|
|
|
|
|
|
|
|
369
|
40
|
|
|
|
|
93
|
return $tag->depth > $root->depth + 1; |
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
) |
|
372
|
9
|
|
|
|
|
169
|
} @elements; |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# so we can check we've done something |
|
376
|
239
|
|
|
|
|
1390
|
$comops++; |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# dedup the results we've gotten |
|
379
|
239
|
|
|
|
|
568
|
@elements = $self->_dedup(\@elements); |
|
380
|
|
|
|
|
|
|
|
|
381
|
239
|
|
|
|
|
593
|
map { warn $_->as_HTML } @elements if DEBUG; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
|
|
384
|
128
|
100
|
|
|
|
233
|
if ($comops) { |
|
385
|
127
|
|
|
|
|
119
|
$self->debug( |
|
386
|
|
|
|
|
|
|
'Added', scalar(@elements), ' elements to results' |
|
387
|
|
|
|
|
|
|
) if DEBUG; |
|
388
|
|
|
|
|
|
|
|
|
389
|
127
|
|
|
|
|
481
|
my $selector = substr ($query,$startpos, $pos - $startpos); |
|
390
|
127
|
|
|
|
|
314
|
$self->_add_specificity($selector,$specificity); |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
#add in the recent pass |
|
393
|
127
|
|
|
|
|
179
|
push(@result,@elements); |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# dedup the results across the result sets, necessary for comma based selectors |
|
396
|
127
|
|
|
|
|
476
|
@result = $self->_dedup(\@result); |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# sort the result set... |
|
399
|
127
|
|
|
|
|
473
|
@result = sort _by_address @result; |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
# update op counter for complete query to include ops performed |
|
402
|
|
|
|
|
|
|
# in this fragment |
|
403
|
127
|
|
|
|
|
198
|
$ops += $comops; |
|
404
|
|
|
|
|
|
|
} |
|
405
|
|
|
|
|
|
|
else { |
|
406
|
|
|
|
|
|
|
# looks like we got an empty comma section, e.g. : ",x, ,y," |
|
407
|
|
|
|
|
|
|
# so we'll ignore it |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
|
|
410
|
128
|
100
|
|
|
|
411
|
last COMMA unless $query =~ / \G \s*,\s* /cgsx; |
|
411
|
|
|
|
|
|
|
} |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# check for any trailing text in the query that we couldn't parse |
|
414
|
121
|
50
|
|
|
|
257
|
if ($query =~ / \G (.+?) \s* $ /cgsx) { |
|
415
|
0
|
|
|
|
|
0
|
return $self->_report_error( $self->message( bad_spec => $1, $query ) ); |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
# check that we performed at least one query operation |
|
419
|
121
|
100
|
|
|
|
229
|
unless ($ops) { |
|
420
|
1
|
|
|
|
|
6
|
return $self->_report_error( $self->message( bad_query => $query ) ); |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
120
|
100
|
|
|
|
382
|
return wantarray ? @result : $self->_new_match_self(@result); |
|
424
|
|
|
|
|
|
|
} |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
# return elements stored from last query |
|
427
|
|
|
|
|
|
|
sub get_elements { |
|
428
|
328
|
|
|
328
|
1
|
362
|
my $self = shift; |
|
429
|
|
|
|
|
|
|
|
|
430
|
328
|
50
|
|
|
|
1390
|
return wantarray ? @{$self->{elements}} : $self->{elements}; |
|
|
0
|
|
|
|
|
0
|
|
|
431
|
|
|
|
|
|
|
} |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
########################################################################################################### |
|
434
|
|
|
|
|
|
|
# from CSS spec at http://www.w3.org/TR/CSS21/cascade.html#specificity |
|
435
|
|
|
|
|
|
|
########################################################################################################### |
|
436
|
|
|
|
|
|
|
# A selector's specificity is calculated as follows: |
|
437
|
|
|
|
|
|
|
# |
|
438
|
|
|
|
|
|
|
# * count the number of ID attributes in the selector (= a) |
|
439
|
|
|
|
|
|
|
# * count the number of other attributes and pseudo-classes in the selector (= b) |
|
440
|
|
|
|
|
|
|
# * count the number of element names in the selector (= c) |
|
441
|
|
|
|
|
|
|
# * ignore pseudo-elements. |
|
442
|
|
|
|
|
|
|
# |
|
443
|
|
|
|
|
|
|
# Concatenating the three numbers a-b-c (in a number system with a large base) gives the specificity. |
|
444
|
|
|
|
|
|
|
# |
|
445
|
|
|
|
|
|
|
# Example(s): |
|
446
|
|
|
|
|
|
|
# |
|
447
|
|
|
|
|
|
|
# Some examples: |
|
448
|
|
|
|
|
|
|
# |
|
449
|
|
|
|
|
|
|
# * {} /* a=0 b=0 c=0 -> specificity = 0 */ |
|
450
|
|
|
|
|
|
|
# LI {} /* a=0 b=0 c=1 -> specificity = 1 */ |
|
451
|
|
|
|
|
|
|
# UL LI {} /* a=0 b=0 c=2 -> specificity = 2 */ |
|
452
|
|
|
|
|
|
|
# UL OL+LI {} /* a=0 b=0 c=3 -> specificity = 3 */ |
|
453
|
|
|
|
|
|
|
# H1 + *[REL=up]{} /* a=0 b=1 c=1 -> specificity = 11 */ |
|
454
|
|
|
|
|
|
|
# UL OL LI.red {} /* a=0 b=1 c=3 -> specificity = 13 */ |
|
455
|
|
|
|
|
|
|
# LI.red.level {} /* a=0 b=2 c=1 -> specificity = 21 */ |
|
456
|
|
|
|
|
|
|
# #x34y {} /* a=1 b=0 c=0 -> specificity = 100 */ |
|
457
|
|
|
|
|
|
|
########################################################################################################### |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=pod |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item specificity() |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Calculate the specificity for any given passed selector, a critical factor in determining how best to apply the cascade |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
A selector's specificity is calculated as follows: |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
* count the number of ID attributes in the selector (= a) |
|
468
|
|
|
|
|
|
|
* count the number of other attributes and pseudo-classes in the selector (= b) |
|
469
|
|
|
|
|
|
|
* count the number of element names in the selector (= c) |
|
470
|
|
|
|
|
|
|
* ignore pseudo-elements. |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
The specificity is based only on the form of the selector. In particular, a selector of the form "[id=p33]" is counted |
|
473
|
|
|
|
|
|
|
as an attribute selector (a=0, b=0, c=1, d=0), even if the id attribute is defined as an "ID" in the source document's DTD. |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
See the following spec for additional details: |
|
476
|
|
|
|
|
|
|
L |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=back |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=cut |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub get_specificity { |
|
483
|
24
|
|
|
24
|
0
|
38
|
my ($self,$selector) = @_; |
|
484
|
|
|
|
|
|
|
|
|
485
|
24
|
50
|
|
|
|
70
|
unless (exists $self->{specificity}->{$selector}) { |
|
486
|
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# if the invoking tree happened to be large this could get expensive real fast |
|
488
|
|
|
|
|
|
|
# instead load up an empty instance and query that. |
|
489
|
24
|
|
|
|
|
58
|
local $self->{elements} = []; |
|
490
|
24
|
|
|
|
|
53
|
$self->query($selector); |
|
491
|
|
|
|
|
|
|
} |
|
492
|
|
|
|
|
|
|
|
|
493
|
24
|
|
|
|
|
71
|
return $self->{specificity}->{$selector}; |
|
494
|
|
|
|
|
|
|
} |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
sub suppress_errors { |
|
497
|
11
|
|
|
11
|
0
|
228
|
my ($self, $setting) = @_; |
|
498
|
|
|
|
|
|
|
|
|
499
|
11
|
100
|
|
|
|
29
|
if (defined($setting)) { |
|
500
|
2
|
|
|
|
|
6
|
$self->{suppress_errors} = $setting; |
|
501
|
|
|
|
|
|
|
} |
|
502
|
|
|
|
|
|
|
|
|
503
|
11
|
|
|
|
|
31
|
return $self->{suppress_errors}; |
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
sub get_error { |
|
507
|
6
|
|
|
6
|
0
|
524
|
my ($self) = @_; |
|
508
|
|
|
|
|
|
|
|
|
509
|
6
|
|
|
|
|
24
|
return $self->{error}; |
|
510
|
|
|
|
|
|
|
} |
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
sub list { |
|
513
|
|
|
|
|
|
|
# return list of items or return unblessed list ref of items |
|
514
|
0
|
0
|
|
0
|
1
|
0
|
return wantarray ? @{ $_[0] } : [ @{ $_[0] } ]; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
sub size { |
|
518
|
98
|
|
|
98
|
1
|
8913
|
my $self = shift; |
|
519
|
98
|
|
|
|
|
108
|
return scalar @{$self->get_elements}; |
|
|
98
|
|
|
|
|
200
|
|
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub first { |
|
523
|
6
|
|
|
6
|
1
|
8
|
my $self = shift; |
|
524
|
|
|
|
|
|
|
|
|
525
|
6
|
50
|
|
|
|
5
|
return @{$self->get_elements} ? $self->get_elements->[0] : $self->error_msg('is_empty'); |
|
|
6
|
|
|
|
|
12
|
|
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
sub last { |
|
529
|
4
|
|
|
4
|
1
|
716
|
my $self = shift; |
|
530
|
|
|
|
|
|
|
|
|
531
|
4
|
50
|
|
|
|
6
|
return @{$self->get_elements} ? $self->get_elements->[-1] : $self->error_msg('is_empty'); |
|
|
4
|
|
|
|
|
7
|
|
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
#################################################################### |
|
535
|
|
|
|
|
|
|
# |
|
536
|
|
|
|
|
|
|
# Everything below here is a private method subject to change |
|
537
|
|
|
|
|
|
|
# |
|
538
|
|
|
|
|
|
|
#################################################################### |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
sub _add_specificity { |
|
541
|
127
|
|
|
127
|
|
201
|
my ($self, $selector, $specificity) = @_; |
|
542
|
|
|
|
|
|
|
|
|
543
|
127
|
|
|
|
|
353
|
$self->{specificity}->{$selector} = $specificity; |
|
544
|
|
|
|
|
|
|
|
|
545
|
127
|
|
|
|
|
193
|
return(); |
|
546
|
|
|
|
|
|
|
} |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
sub _report_error { |
|
549
|
7
|
|
|
7
|
|
900
|
my ($self, $message) = @_; |
|
550
|
|
|
|
|
|
|
|
|
551
|
7
|
100
|
|
|
|
18
|
if ($self->suppress_errors()) { |
|
552
|
6
|
50
|
|
|
|
15
|
if (defined($message)) { |
|
553
|
6
|
|
|
|
|
12
|
$self->{error} = $message; |
|
554
|
|
|
|
|
|
|
} |
|
555
|
6
|
|
|
|
|
32
|
return undef; |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
else { |
|
558
|
1
|
|
|
|
|
3
|
$self->error($message); # this will DIE |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
} |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# this Just Works[tm] because first arg is HTML::Element object |
|
563
|
|
|
|
|
|
|
sub _export_query_to_element { |
|
564
|
2
|
|
|
2
|
|
558
|
class(ELEMENT)->load->method( |
|
565
|
|
|
|
|
|
|
query => \&Query, |
|
566
|
|
|
|
|
|
|
); |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# remove duplicate elements in the case where elements are nested between multiple matching elements |
|
570
|
|
|
|
|
|
|
sub _dedup { |
|
571
|
366
|
|
|
366
|
|
491
|
my ($self,$elements) = @_; |
|
572
|
|
|
|
|
|
|
|
|
573
|
366
|
|
|
|
|
528
|
my %seen = (); |
|
574
|
366
|
|
|
|
|
425
|
my @unique = (); |
|
575
|
|
|
|
|
|
|
|
|
576
|
366
|
|
|
|
|
372
|
foreach my $item (@{$elements}) { |
|
|
366
|
|
|
|
|
667
|
|
|
577
|
910
|
100
|
|
|
|
2239
|
if (!exists($seen{$item})) { |
|
578
|
815
|
|
|
|
|
1011
|
push(@unique, $item); |
|
579
|
|
|
|
|
|
|
} |
|
580
|
|
|
|
|
|
|
|
|
581
|
910
|
|
|
|
|
2162
|
$seen{$item}++; |
|
582
|
|
|
|
|
|
|
} |
|
583
|
|
|
|
|
|
|
|
|
584
|
366
|
|
|
|
|
1517
|
return @unique; |
|
585
|
|
|
|
|
|
|
} |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# utility method to assist in sorting of query return sets |
|
588
|
|
|
|
|
|
|
sub _by_address |
|
589
|
|
|
|
|
|
|
{ |
|
590
|
235
|
|
|
235
|
|
350
|
my $self = shift; |
|
591
|
|
|
|
|
|
|
|
|
592
|
235
|
|
|
|
|
669
|
my @a = split /\./, $a->address(); |
|
593
|
235
|
|
|
|
|
21960
|
my @b = split /\./, $b->address(); |
|
594
|
|
|
|
|
|
|
|
|
595
|
235
|
100
|
|
|
|
17016
|
my $max = (scalar @a > scalar @b) ? scalar @a : scalar @b; |
|
596
|
|
|
|
|
|
|
|
|
597
|
235
|
|
|
|
|
726
|
for (my $index=0; $index<$max; $index++) { |
|
598
|
|
|
|
|
|
|
|
|
599
|
1071
|
50
|
66
|
|
|
3875
|
if (!defined($a[$index]) && !defined($b[$index])) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
600
|
0
|
|
|
|
|
0
|
return 0; |
|
601
|
|
|
|
|
|
|
} |
|
602
|
|
|
|
|
|
|
elsif (!defined($a[$index])) { |
|
603
|
49
|
|
|
|
|
151
|
return -1; |
|
604
|
|
|
|
|
|
|
} |
|
605
|
|
|
|
|
|
|
elsif(!defined($b[$index])) { |
|
606
|
26
|
|
|
|
|
80
|
return 1; |
|
607
|
|
|
|
|
|
|
} |
|
608
|
|
|
|
|
|
|
|
|
609
|
996
|
100
|
|
|
|
1939
|
if ($a[$index] == $b[$index]) { |
|
610
|
842
|
|
|
|
|
1892
|
next; #move to the next |
|
611
|
|
|
|
|
|
|
} |
|
612
|
|
|
|
|
|
|
else { |
|
613
|
154
|
|
|
|
|
579
|
return $a[$index] <=> $b[$index]; |
|
614
|
|
|
|
|
|
|
} |
|
615
|
|
|
|
|
|
|
} |
|
616
|
|
|
|
|
|
|
} |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
# instantiate an instance with match_self turned on, for use with |
|
619
|
|
|
|
|
|
|
# follow-up queries, so they match the top-most elements. |
|
620
|
|
|
|
|
|
|
sub _new_match_self { |
|
621
|
116
|
|
|
116
|
|
147
|
my $self = shift; |
|
622
|
|
|
|
|
|
|
|
|
623
|
116
|
|
|
|
|
296
|
my $result = $self->new(@_); |
|
624
|
|
|
|
|
|
|
|
|
625
|
116
|
|
|
|
|
201
|
$result->{match_self} = 1; |
|
626
|
116
|
|
|
|
|
611
|
return $result; |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
sub AUTOLOAD { |
|
630
|
74
|
|
|
74
|
|
296
|
my $self = shift; |
|
631
|
74
|
|
|
|
|
547
|
my ($method) = ($AUTOLOAD =~ /([^:]+)$/ ); |
|
632
|
74
|
50
|
|
|
|
211
|
return if $method eq 'DESTROY'; |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# we allow Perl to catch any unknown methods that the user might |
|
635
|
|
|
|
|
|
|
# try to call against the HTML::Element objects in the query |
|
636
|
156
|
|
|
|
|
5160
|
my @results = |
|
637
|
74
|
|
|
|
|
149
|
map { $_->$method(@_) } |
|
638
|
74
|
|
|
|
|
92
|
@{$self->get_elements}; |
|
639
|
|
|
|
|
|
|
|
|
640
|
74
|
50
|
|
|
|
3871
|
return wantarray ? @results : \@results; |
|
641
|
|
|
|
|
|
|
} |
|
642
|
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
1; |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=head1 NAME |
|
646
|
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
HTML::Query - jQuery-like selection queries for HTML::Element |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
Creating an C object using the L constructor |
|
652
|
|
|
|
|
|
|
subroutine: |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
use HTML::Query 'Query'; |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# using named parameters |
|
657
|
|
|
|
|
|
|
$q = Query( text => $text ); # HTML text |
|
658
|
|
|
|
|
|
|
$q = Query( file => $file ); # HTML file |
|
659
|
|
|
|
|
|
|
$q = Query( tree => $tree ); # HTML::Element object |
|
660
|
|
|
|
|
|
|
$q = Query( query => $query ); # HTML::Query object |
|
661
|
|
|
|
|
|
|
$q = Query( |
|
662
|
|
|
|
|
|
|
text => $text1, # or any combination |
|
663
|
|
|
|
|
|
|
text => $text2, # of the above |
|
664
|
|
|
|
|
|
|
file => $file1, |
|
665
|
|
|
|
|
|
|
file => $file2, |
|
666
|
|
|
|
|
|
|
tree => $tree, |
|
667
|
|
|
|
|
|
|
query => $query, |
|
668
|
|
|
|
|
|
|
); |
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
# passing elements as positional arguments |
|
671
|
|
|
|
|
|
|
$q = Query( $tree ); # HTML::Element object(s) |
|
672
|
|
|
|
|
|
|
$q = Query( $tree1, $tree2, $tree3, ... ); |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
# or from one or more existing queries |
|
675
|
|
|
|
|
|
|
$q = Query( $query1 ); # HTML::Query object(s) |
|
676
|
|
|
|
|
|
|
$q = Query( $query1, $query2, $query3, ... ); |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
# or a mixture |
|
679
|
|
|
|
|
|
|
$q = Query( $tree1, $query1, $tree2, $query2 ); |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
# the final argument (in all cases) can be a selector |
|
682
|
|
|
|
|
|
|
my $spec = 'ul.menu li a'; # |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
$q = Query( $tree, $spec ); |
|
685
|
|
|
|
|
|
|
$q = Query( $query, $spec ); |
|
686
|
|
|
|
|
|
|
$q = Query( $tree1, $tree2, $query1, $query2, $spec ); |
|
687
|
|
|
|
|
|
|
$q = Query( text => $text, $spec ); |
|
688
|
|
|
|
|
|
|
$q = Query( file => $file, $spec ); |
|
689
|
|
|
|
|
|
|
$q = Query( tree => $tree, $spec ); |
|
690
|
|
|
|
|
|
|
$q = Query( query => $query, $spec ); |
|
691
|
|
|
|
|
|
|
$q = Query( |
|
692
|
|
|
|
|
|
|
text => $text, |
|
693
|
|
|
|
|
|
|
file => $file, |
|
694
|
|
|
|
|
|
|
# ...etc... |
|
695
|
|
|
|
|
|
|
$spec |
|
696
|
|
|
|
|
|
|
); |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
Or using the OO L constructor method (which the L |
|
699
|
|
|
|
|
|
|
subroutine maps onto): |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
use HTML::Query; |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
$q = HTML::Query->new( |
|
704
|
|
|
|
|
|
|
# accepts the same arguments as Query() |
|
705
|
|
|
|
|
|
|
) |
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
Or by monkey-patching a L method into L. |
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
use HTML::Query 'query'; # note lower case 'q' |
|
710
|
|
|
|
|
|
|
use HTML::TreeBuilder; |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
# build a tree |
|
713
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new; |
|
714
|
|
|
|
|
|
|
$tree->parse_file($filename); |
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# call the query() method on any element |
|
717
|
|
|
|
|
|
|
my $query = $tree->query($spec); |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
Once you have a query, you can start selecting elements: |
|
720
|
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
@r = $q->query('a')->get_elements(); # all ... elements |
|
722
|
|
|
|
|
|
|
@r = $q->query('a#menu')->get_elements(); # all with "menu" id |
|
723
|
|
|
|
|
|
|
@r = $q->query('#menu')->get_elements(); # all elements with "menu" id |
|
724
|
|
|
|
|
|
|
@r = $q->query('a.menu')->get_elements(); # all with "menu" class |
|
725
|
|
|
|
|
|
|
@r = $q->query('.menu')->get_elements(); # all elements with "menu" class |
|
726
|
|
|
|
|
|
|
@r = $q->query('a[href]')->get_elements(); # all with 'href' attr |
|
727
|
|
|
|
|
|
|
@r = $q->query('a[href=foo]')->get_elements(); # all with 'href="foo"' attr |
|
728
|
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
# you can specify elements within elements... |
|
730
|
|
|
|
|
|
|
@r = $q->query('ul.menu li a')->get_elements(); # |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# and use commas to delimit multiple path specs for different elements |
|
733
|
|
|
|
|
|
|
@r = $q->query('table tr td a, form input[type=submit]')->get_elements(); |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
# query() in scalar context returns a new query |
|
736
|
|
|
|
|
|
|
$r = $q->query('table')->get_elements();; # find all tables |
|
737
|
|
|
|
|
|
|
$s = $r->query('tr')->get_elements(); # find all rows in all those tables |
|
738
|
|
|
|
|
|
|
$t = $s->query('td')->get_elements(); # and all cells in those rows... |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
Inspecting query elements: |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# get number of elements in query |
|
743
|
|
|
|
|
|
|
my $size = $q->size |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
# get first/last element in query |
|
746
|
|
|
|
|
|
|
my $first = $q->first; |
|
747
|
|
|
|
|
|
|
my $last = $q->last; |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
# convert query to list or list ref of HTML::Element objects |
|
750
|
|
|
|
|
|
|
my $list = $q->list; # list ref in scalar context |
|
751
|
|
|
|
|
|
|
my @list = $q->list; # list in list context |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
All other methods are mapped onto the L objects |
|
754
|
|
|
|
|
|
|
in the query: |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
print $query->as_trimmed_text; # print trimmed text for each element |
|
757
|
|
|
|
|
|
|
print $query->as_HTML; # print each element as HTML |
|
758
|
|
|
|
|
|
|
$query->delete; # call delete() on each element |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
The C module is an add-on for the L module |
|
763
|
|
|
|
|
|
|
set. It provides a simple way to select one or more elements from a tree using |
|
764
|
|
|
|
|
|
|
a query syntax inspired by jQuery. This selector syntax will be reassuringly |
|
765
|
|
|
|
|
|
|
familiar to anyone who has ever written a CSS selector. |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
C is not an attempt to provide a complete (or even near-complete) |
|
768
|
|
|
|
|
|
|
implementation of jQuery in Perl (see Ingy's L module for a |
|
769
|
|
|
|
|
|
|
more ambitious attempt at that). Rather, it borrows some of the tried and |
|
770
|
|
|
|
|
|
|
tested selector syntax from jQuery (and CSS) that can easily be mapped onto |
|
771
|
|
|
|
|
|
|
the C method provided by the L |
|
772
|
|
|
|
|
|
|
module. |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=head2 Creating a Query |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
The easiest way to create a query is using the exportable L |
|
777
|
|
|
|
|
|
|
subroutine. |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
use HTML::Query 'Query'; # note capital 'Q' |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
It accepts a C or C named parameter and will create an |
|
782
|
|
|
|
|
|
|
C object from the HTML source text or file, respectively. |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
my $query = Query( text => $text ); |
|
785
|
|
|
|
|
|
|
my $query = Query( file => $file ); |
|
786
|
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
This delegates to L to parse the |
|
788
|
|
|
|
|
|
|
HTML into a tree of L objects. The root |
|
789
|
|
|
|
|
|
|
element returned is then wrapped in an C object. |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
If you already have one or more L objects that |
|
792
|
|
|
|
|
|
|
you want to query then you can pass them to the L subroutine as |
|
793
|
|
|
|
|
|
|
arguments. For example, you can explicitly use |
|
794
|
|
|
|
|
|
|
L to parse an HTML document into a tree: |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
use HTML::TreeBuilder; |
|
797
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new; |
|
798
|
|
|
|
|
|
|
$tree->parse_file($filename); |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
And then create an C object for the tree either using an |
|
801
|
|
|
|
|
|
|
explicit C named parameter: |
|
802
|
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
my $query = Query( tree => $tree ); |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
Or implicitly using positional arguments. |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
my $query = Query( $tree ); |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
If you want to query across multiple elements, then pass each one as a |
|
810
|
|
|
|
|
|
|
positional argument. |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
my $query = Query( $tree1, $tree2, $tree3 ); |
|
813
|
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
You can also create a new query from one or more existing queries, |
|
815
|
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
my $query = Query( query => $query ); # named parameter |
|
817
|
|
|
|
|
|
|
my $query = Query( $query1, $query2 ); # positional arguments. |
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
You can mix and match these different parameters and positional arguments |
|
820
|
|
|
|
|
|
|
to create a query across several different sources. |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
$q = Query( |
|
823
|
|
|
|
|
|
|
text => $text1, |
|
824
|
|
|
|
|
|
|
text => $text2, |
|
825
|
|
|
|
|
|
|
file => $file1, |
|
826
|
|
|
|
|
|
|
file => $file2, |
|
827
|
|
|
|
|
|
|
tree => $tree, |
|
828
|
|
|
|
|
|
|
query => $query, |
|
829
|
|
|
|
|
|
|
); |
|
830
|
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
The L subroutine is a simple wrapper around the L |
|
832
|
|
|
|
|
|
|
constructor method. You can instantiate your objects manually if you prefer. |
|
833
|
|
|
|
|
|
|
The L method accepts the same arguments as for the L |
|
834
|
|
|
|
|
|
|
subroutine (in fact, the L subroutine simply forwards all |
|
835
|
|
|
|
|
|
|
arguments to the L method). |
|
836
|
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
use HTML::Query; |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
my $query = HTML::Query->new( |
|
840
|
|
|
|
|
|
|
# same argument format as for Query() |
|
841
|
|
|
|
|
|
|
); |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
A final way to use C is to have it add a L method |
|
844
|
|
|
|
|
|
|
to L. The C import hook (all lower |
|
845
|
|
|
|
|
|
|
case) can be specified to make this so. |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
use HTML::Query 'query'; # note lower case 'q' |
|
848
|
|
|
|
|
|
|
use HTML::TreeBuilder; |
|
849
|
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new; |
|
851
|
|
|
|
|
|
|
$tree->parse_file($filename); |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
# now all HTML::Elements have a query() method |
|
854
|
|
|
|
|
|
|
my @items = $tree->query('ul li')->get_elements(); # find all list items |
|
855
|
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
This approach, often referred to as I, should be used |
|
857
|
|
|
|
|
|
|
carefully and sparingly. It involves a violation of |
|
858
|
|
|
|
|
|
|
L's namespace that could have unpredictable |
|
859
|
|
|
|
|
|
|
results with a future version of the module (e.g. one which defines its own |
|
860
|
|
|
|
|
|
|
C method that does something different). Treat it as something that |
|
861
|
|
|
|
|
|
|
is great to get a quick job done right now, but probably not something to be |
|
862
|
|
|
|
|
|
|
used in production code without careful consideration of the implications. |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=head2 Selecting Elements |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
Having created an C object by one of the methods outlined above, |
|
867
|
|
|
|
|
|
|
you can now fetch descendant elements in the tree using a simple query syntax. |
|
868
|
|
|
|
|
|
|
For example, to fetch all the C<< EaE >> elements in the tree, you can |
|
869
|
|
|
|
|
|
|
write: |
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
@links = $query->query('a')->get_elements(); |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
Or, if you want the elements that have a specific C attribute defined |
|
874
|
|
|
|
|
|
|
with a value of, say C |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
@links = $query->query('a.menu')->get_elements(); |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
More generally, you can look for the existence of any attribute and optionally |
|
879
|
|
|
|
|
|
|
provide a specific value for it. |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
@links = $query->query('a[href]')->get_elements(); # any href attribute |
|
882
|
|
|
|
|
|
|
@links = $query->query('a[href=index.html]')->get_elements(); # specific value |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
You can also find an element (or elements) by specifying an id. |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
@links = $query->query('#menu')->get_elements(); # any element with id="menu" |
|
887
|
|
|
|
|
|
|
@links = $query->query('ul#menu')->get_elements(); # ul element with id="menu" |
|
888
|
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
You can provide multiple selection criteria to find elements within elements |
|
890
|
|
|
|
|
|
|
within elements, and so on. For example, to find all links in a menu, |
|
891
|
|
|
|
|
|
|
you can write: |
|
892
|
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
# matches: |
|
894
|
|
|
|
|
|
|
@links = $query->query('ul.menu li a')->get_elements(); |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
You can separate different criteria using commas. For example, to fetch all |
|
897
|
|
|
|
|
|
|
table rows and C elements with a C class: |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
@elems = $query->('table tr, span.foo')->get_elements(); |
|
900
|
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
=head2 Query Results |
|
902
|
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
When called in list context, as shown in the examples above, the L |
|
904
|
|
|
|
|
|
|
method returns a list of L objects matching the |
|
905
|
|
|
|
|
|
|
search criteria. In scalar context, the L method returns a new |
|
906
|
|
|
|
|
|
|
C object containing the L objects |
|
907
|
|
|
|
|
|
|
found. You can then call the L method against that object to further |
|
908
|
|
|
|
|
|
|
refine the query. The L method applies the selection to all elements |
|
909
|
|
|
|
|
|
|
stored in the query. |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
my $tables = $query->query('table'); # query for tables |
|
912
|
|
|
|
|
|
|
my $rows = $tables->query('tr'); # requery for all rows in those tables |
|
913
|
|
|
|
|
|
|
my $cells = $rows->query('td')->get_elements(); # return back all the cells in those rows |
|
914
|
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=head2 Inspection Methods |
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
The L method returns the number of elements in the query. The |
|
918
|
|
|
|
|
|
|
L and L methods return the first and last items in the |
|
919
|
|
|
|
|
|
|
query, respectively. |
|
920
|
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
if ($query->size) { |
|
922
|
|
|
|
|
|
|
print "from ", $query->first->as_trimmed_text, " to ", $query->last->as_trimmed_text; |
|
923
|
|
|
|
|
|
|
} |
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
If you want to extract the L objects from the |
|
926
|
|
|
|
|
|
|
query you can call the L method. This returns a list of |
|
927
|
|
|
|
|
|
|
L objects in list context, or a reference to a |
|
928
|
|
|
|
|
|
|
list in scalar context. |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
@elems = $query->list; |
|
931
|
|
|
|
|
|
|
$elems = $query->list; |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
=head2 Element Methods |
|
934
|
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
Any other methods are automatically applied to each element in the list. For |
|
936
|
|
|
|
|
|
|
example, to call the C method on all the |
|
937
|
|
|
|
|
|
|
L objects in the query, you can write: |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
print $query->as_trimmed_text; |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
In list context, this method returns a list of the return values from |
|
942
|
|
|
|
|
|
|
calling the method on each element. In scalar context it returns a |
|
943
|
|
|
|
|
|
|
reference to a list of return values. |
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
@text_blocks = $query->as_trimmed_text; |
|
946
|
|
|
|
|
|
|
$text_blocks = $query->as_trimmed_text; |
|
947
|
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
See L for further information on the methods it |
|
949
|
|
|
|
|
|
|
provides. |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
=head1 QUERY SYNTAX |
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
=head2 Basic Selectors |
|
954
|
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
=head3 element |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
Matches all elements of a particular type. |
|
958
|
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
@elems = $query->query('table')->get_elements(); #
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=head3 #id |
|
962
|
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
Matches all elements with a specific id attribute. |
|
964
|
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
@elems = $query->query('#menu')->get_elements() # |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
This can be combined with an element type: |
|
968
|
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
@elems = $query->query('ul#menu')->get_elements(); # |
|
970
|
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
=head3 .class |
|
972
|
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
Matches all elements with a specific class attribute. |
|
974
|
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
@elems = $query->query('.info')->get_elements(); # |
|
976
|
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
This can be combined with an element type and/or element id: |
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
@elems = $query->query('p.info')->get_elements(); # |
|
980
|
|
|
|
|
|
|
@elems = $query->query('p#foo.info')->get_elements(); # |
|
981
|
|
|
|
|
|
|
@elems = $query->query('#foo.info')->get_elements(); # |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
The selectors listed above can be combined in a whitespace delimited |
|
984
|
|
|
|
|
|
|
sequence to select down through a hierarchy of elements. Consider the |
|
985
|
|
|
|
|
|
|
following table: |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
To locate the cells that we're interested in, we can write: |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
@elems = $query->query('table.search tr.result td.value')->get_elements(); |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
=head2 Attribute Selectors |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
W3C CSS 2 specification defines new constructs through which to select |
|
1004
|
|
|
|
|
|
|
based on specific attributes within elements. See the following link for the spec: |
|
1005
|
|
|
|
|
|
|
L |
|
1006
|
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
=head3 [attr] |
|
1008
|
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
Matches elements that have the specified attribute, including any where |
|
1010
|
|
|
|
|
|
|
the attribute has no value. |
|
1011
|
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
@elems = $query->query('[href]')->get_elements(); # |
|
1013
|
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
This can be combined with any of the above selectors. For example: |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
@elems = $query->query('a[href]')->get_elements(); # |
|
1017
|
|
|
|
|
|
|
@elems = $query->query('a.menu[href]')->get_elements(); # |
|
1018
|
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
You can specify multiple attribute selectors. Only those elements that |
|
1020
|
|
|
|
|
|
|
match I of them will be selected. |
|
1021
|
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
@elems = $query->query('a[href][rel]')->get_elements(); # |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
=head3 [attr=value] |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
Matches elements that have an attribute set to a specific value. The |
|
1027
|
|
|
|
|
|
|
value can be quoted in either single or double quotes, or left unquoted. |
|
1028
|
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
@elems = $query->query('[href=index.html]')->get_elements(); |
|
1030
|
|
|
|
|
|
|
@elems = $query->query('[href="index.html"]')->get_elements(); |
|
1031
|
|
|
|
|
|
|
@elems = $query->query("[href='index.html']")->get_elements(); |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
You can specify multiple attribute selectors. Only those elements that |
|
1034
|
|
|
|
|
|
|
match I of them will be selected. |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
@elems = $query->query('a[href=index.html][rel=home]')->get_elements(); |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
=head3 [attr|=value] |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
Matches any element X whose foo attribute has a hyphen-separated list of |
|
1041
|
|
|
|
|
|
|
values beginning (from the left) with bar. The value can be quoted in either |
|
1042
|
|
|
|
|
|
|
single or double quotes, or left unquoted. |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
@elems = $query->query('[lang|=en]')->get_elements(); |
|
1045
|
|
|
|
|
|
|
@elems = $query->query('p[class|="example"]')->get_elements(); |
|
1046
|
|
|
|
|
|
|
@elems = $query->query("img[alt|='fig']")->get_elements(); |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
You can specify multiple attribute selectors. Only those elements that |
|
1049
|
|
|
|
|
|
|
match I of them will be selected. |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
@elems = $query->query('p[class|="external"][lang|="en"]')->get_elements(); |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
=head3 [attr~=value] |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
Matches any element X whose foo attribute value is a list of space-separated |
|
1056
|
|
|
|
|
|
|
values, one of which is exactly equal to bar. The value can be quoted in either |
|
1057
|
|
|
|
|
|
|
single or double quotes, or left unquoted. |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
@elems = $query->query('[lang~=en]')->get_elements(); |
|
1060
|
|
|
|
|
|
|
@elems = $query->query('p[class~="example"]')->get_elements(); |
|
1061
|
|
|
|
|
|
|
@elems = $query->query("img[alt~='fig']")->get_elements(); |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
You can specify multiple attribute selectors. Only those elements that |
|
1064
|
|
|
|
|
|
|
match I of them will be selected. |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
@elems = $query->query('p[class~="external"][lang~="en"]')->get_elements(); |
|
1067
|
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
KNOWN BUG: you can't have a C<]> character in the attribute value because |
|
1069
|
|
|
|
|
|
|
it confuses the query parser. Fixing this is TODO. |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
=head2 Universal Selector |
|
1072
|
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
W3C CSS 2 specification defines a new construct through which to select |
|
1074
|
|
|
|
|
|
|
any element within the document below a given hierarchy. |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
http://www.w3.org/TR/css3-selectors/#universal-selector |
|
1077
|
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
@elems = $query->query('*')->get_elements(); |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
=head2 Combinator Selectors |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
W3C CSS 2 specification defines new constructs through which to select |
|
1083
|
|
|
|
|
|
|
based on heirarchy with the DOM. See the following link for the spec: |
|
1084
|
|
|
|
|
|
|
L |
|
1085
|
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
=head3 Immediate Descendents (children) |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
When you combine selectors with whitespace elements are selected if |
|
1089
|
|
|
|
|
|
|
they are descended from the parent in some way. But if you just want |
|
1090
|
|
|
|
|
|
|
to select the children (and not the grandchildren, great-grandchildren, |
|
1091
|
|
|
|
|
|
|
etc) then you can combine the selectors with the C<< > >> character. |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
@elems = $query->query('a > img')->get_elements(); |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
=head3 Non-Immediate Descendents |
|
1096
|
|
|
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
If you just want any descendents that aren't children then you can combine |
|
1098
|
|
|
|
|
|
|
selectors with the C<*> character. |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
@elems = $query->query('div * a')->get_elements(); |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
=head3 Immediate Siblings |
|
1103
|
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
If you want to use a sibling relationship then you can can join selectors |
|
1105
|
|
|
|
|
|
|
with the C<+> character. |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
@elems = $query->query('img + span')->get_elements(); |
|
1108
|
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=head2 Pseudo-classes |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
W3C CSS 2 and CSS 3 specifications define new concepts of pseudo-classes to |
|
1112
|
|
|
|
|
|
|
permit formatting based on information that lies outside the document tree. |
|
1113
|
|
|
|
|
|
|
See the following link for the most recent spec: |
|
1114
|
|
|
|
|
|
|
L |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
HTML::Query currently has limited support for CSS 2, and no support for CSS 3. |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
Patches are *highly* encouraged to help add support here. |
|
1119
|
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
=head3 -child pseudo-classes |
|
1121
|
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
If you want to return child elements within a certain position then -child |
|
1123
|
|
|
|
|
|
|
pseudo-classes (:first-child, :last-child) are what you're looking for. |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
@elems = $query->query('table td:first-child')->get_elements; |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
=head3 Link pseudo-classes: :link and :visited |
|
1128
|
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
Unsupported. |
|
1130
|
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
The :link pseudo-class is to be implemented, currently unsupported. |
|
1132
|
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
It is not possible to locate :visited outside of a browser context due to it's |
|
1134
|
|
|
|
|
|
|
dynamic nature. |
|
1135
|
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
=head3 Dynamic pseudo-classes |
|
1137
|
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
Unsupported. |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
It is not possible to locate these classes(:hover, :active, :focus) outside |
|
1141
|
|
|
|
|
|
|
of a browser context due to their dynamic nature. |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
=head3 Language pseudo-class |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
Unsupported. |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
Functionality for the :lang psuedo-class is largely replicated by using an |
|
1148
|
|
|
|
|
|
|
attribute selector for lang combined with a universal selector query. |
|
1149
|
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
If this is insufficient I'd love to see a patch adding support for it. |
|
1151
|
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
=head3 Other pseudo-classes |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
W3C CSS 3 added a number of new behaviors that need support. At |
|
1155
|
|
|
|
|
|
|
this time there is no support for them, but we should work on adding support. |
|
1156
|
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
Patches are very welcome. |
|
1158
|
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
=head2 Pseudo-elements |
|
1160
|
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
W3C CSS 2 and CSS 3 specification defines new concepts of pseudo-elements to |
|
1162
|
|
|
|
|
|
|
permit formatting based on information that lies outside the document tree. |
|
1163
|
|
|
|
|
|
|
See the following link for the most recent spec: |
|
1164
|
|
|
|
|
|
|
L |
|
1165
|
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
At this time there is no support for pseudo-elements, but we are working |
|
1167
|
|
|
|
|
|
|
on adding support. |
|
1168
|
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
Patches are very welcome. |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
=head2 Combining Selectors |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
You can combine basic and hierarchical selectors into a single query |
|
1174
|
|
|
|
|
|
|
by separating each part with a comma. The query will select all matching |
|
1175
|
|
|
|
|
|
|
elements for each of the comma-delimited selectors. For example, to |
|
1176
|
|
|
|
|
|
|
find all C, C and C elements in a tree: |
|
1177
|
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
@elems = $query->query('a, b, i')->get_elements(); |
|
1179
|
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
Each of these selectors can be arbitrarily complex. |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
@elems = $query->query( |
|
1183
|
|
|
|
|
|
|
'table.search[width=100%] tr.result[valign=top] td.value, |
|
1184
|
|
|
|
|
|
|
form.search input[type=submit], |
|
1185
|
|
|
|
|
|
|
a[href=index.html]' |
|
1186
|
|
|
|
|
|
|
)->get_elements(); |
|
1187
|
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
=head1 EXPORT HOOKS |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
=head2 Query |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
The C constructor subroutine (note the capital letter) can be |
|
1193
|
|
|
|
|
|
|
exported as a convenient way to create C objects. It simply |
|
1194
|
|
|
|
|
|
|
forwards all arguments to the L constructor method. |
|
1195
|
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
use HTML::Query 'Query'; |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
my $query = Query( file => $file, 'ul.menu li a' ); |
|
1199
|
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
=head2 query |
|
1201
|
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
The C export hook can be called to monkey-patch a L method |
|
1203
|
|
|
|
|
|
|
into the L module. |
|
1204
|
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
This is considered questionable behaviour in polite society which regards it |
|
1206
|
|
|
|
|
|
|
as a violation of the inner sanctity of the L. |
|
1207
|
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
But if you're the kind of person that doesn't mind a bit of occasional |
|
1209
|
|
|
|
|
|
|
namespace abuse for the sake of getting the job done, then go right ahead. |
|
1210
|
|
|
|
|
|
|
Just don't blame me if it all blows up later. |
|
1211
|
|
|
|
|
|
|
|
|
1212
|
|
|
|
|
|
|
use HTML::Query 'query'; # note lower case 'q' |
|
1213
|
|
|
|
|
|
|
use HTML::TreeBuilder; |
|
1214
|
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
# build a tree |
|
1216
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new; |
|
1217
|
|
|
|
|
|
|
$tree->parse_file($filename); |
|
1218
|
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
# call the query() method on any element |
|
1220
|
|
|
|
|
|
|
my $query = $tree->query('ul li a'); |
|
1221
|
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
=head1 METHODS |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
The C object is a subclass of L and |
|
1225
|
|
|
|
|
|
|
inherits all of its method. |
|
1226
|
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
=head2 new(@elements,$selector) |
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
This constructor method is used to create a new C object. It |
|
1230
|
|
|
|
|
|
|
expects a list of any number (including zero) of |
|
1231
|
|
|
|
|
|
|
L or C objects. |
|
1232
|
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
# single HTML::Element object |
|
1234
|
|
|
|
|
|
|
my $query = HTML::Query->new($elem); |
|
1235
|
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
# multiple element object |
|
1237
|
|
|
|
|
|
|
my $query = HTML::Query->new($elem1, $elem2, $elem3, ...); |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
# copy elements from an existing query |
|
1240
|
|
|
|
|
|
|
my $query = HTML::Query->new($another_query); |
|
1241
|
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
# copy elements from several queries |
|
1243
|
|
|
|
|
|
|
my $query = HTML::Query->new($query1, $query2, $query3); |
|
1244
|
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
# or a mixture |
|
1246
|
|
|
|
|
|
|
my $query = HTML::Query->new($elem1, $query1, $elem2, $query3); |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
You can also use named parameters to specify an alternate source for a |
|
1249
|
|
|
|
|
|
|
element. |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
$query = HTML::Query->new( file => $file ); |
|
1252
|
|
|
|
|
|
|
$query = HTML::Query->new( text => $text ); |
|
1253
|
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
In this case, the L module is used to |
|
1255
|
|
|
|
|
|
|
parse the source file or text into a tree of L |
|
1256
|
|
|
|
|
|
|
objects. |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
For the sake of completeness, you can also specify element trees and queries |
|
1259
|
|
|
|
|
|
|
using named parameters: |
|
1260
|
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
$query = HTML::Query->new( tree => $tree ); |
|
1262
|
|
|
|
|
|
|
$query = HTML::Query->new( query => $query ); |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
You can freely mix and match elements, queries and named sources. The |
|
1265
|
|
|
|
|
|
|
query will be constructed as an aggregate across them all. |
|
1266
|
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
$q = HTML::Query->new( |
|
1268
|
|
|
|
|
|
|
text => $text1, |
|
1269
|
|
|
|
|
|
|
text => $text2, |
|
1270
|
|
|
|
|
|
|
file => $file1, |
|
1271
|
|
|
|
|
|
|
file => $file2, |
|
1272
|
|
|
|
|
|
|
tree => $tree, |
|
1273
|
|
|
|
|
|
|
query => $query1, |
|
1274
|
|
|
|
|
|
|
); |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
The final, optional argument can be a selector specification. This is |
|
1277
|
|
|
|
|
|
|
immediately passed to the L method which will return a new query |
|
1278
|
|
|
|
|
|
|
with only those elements selected. |
|
1279
|
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
my $spec = 'ul.menu li a'; # |
|
1281
|
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
my $query = HTML::Query->new( $tree, $spec ); |
|
1283
|
|
|
|
|
|
|
my $query = HTML::Query->new( text => $text, $spec ); |
|
1284
|
|
|
|
|
|
|
my $query = HTML::Query->new( |
|
1285
|
|
|
|
|
|
|
text => $text, |
|
1286
|
|
|
|
|
|
|
file => $file, |
|
1287
|
|
|
|
|
|
|
$spec |
|
1288
|
|
|
|
|
|
|
); |
|
1289
|
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
The list of arguments can also be passed by reference to a list. |
|
1291
|
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
my $query = HTML::Query->new(\@args); |
|
1293
|
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
=head2 query($spec) |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
This method locates the descendant elements identified by the C<$spec> |
|
1297
|
|
|
|
|
|
|
argument for each element in the query. It then interally stores the results |
|
1298
|
|
|
|
|
|
|
for requerying or return. See get_elements(). |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
my $query = HTML::Query->new(\@args); |
|
1301
|
|
|
|
|
|
|
my $results = $query->query($spec); |
|
1302
|
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
See L<"QUERY SYNTAX"> for the permitted syntax of the C<$spec> argument. |
|
1304
|
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
=head2 get_elements() |
|
1306
|
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
This method returns the stored results from a query. In list context it returns a list of |
|
1308
|
|
|
|
|
|
|
matching L objects. In scalar context it returns a reference to |
|
1309
|
|
|
|
|
|
|
the results array. |
|
1310
|
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
my $query = HTML::Query->new(\@args); |
|
1312
|
|
|
|
|
|
|
my $results = $query->query($spec); |
|
1313
|
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
my @elements = $results->query($spec)->get_elements(); |
|
1315
|
|
|
|
|
|
|
my $elements = $results->query($spec)->get_elements(); |
|
1316
|
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
=head2 size() |
|
1318
|
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
Returns the number of elements in the query. |
|
1320
|
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=head2 first() |
|
1322
|
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
Returns the first element in the query. |
|
1324
|
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
my $elem = $query->first; |
|
1326
|
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
If the query is empty then an exception will be thrown. If you would rather |
|
1328
|
|
|
|
|
|
|
have an undefined value returned then you can use the C method inherited |
|
1329
|
|
|
|
|
|
|
from L. This effectively wraps the call to |
|
1330
|
|
|
|
|
|
|
C in an C block to catch any exceptions thrown. |
|
1331
|
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
my $elem = $query->try('first') || warn "no first element\n"; |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
=head2 last() |
|
1335
|
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
Similar to L, but returning the last element in the query. |
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
my $elem = $query->last; |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
=head2 list() |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
Returns a list of the L object in the query in |
|
1343
|
|
|
|
|
|
|
list context, or a reference to a list in scalar context. |
|
1344
|
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
my @elems = $query->list; |
|
1346
|
|
|
|
|
|
|
my $elems = $query->list; |
|
1347
|
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
=head2 AUTOLOAD |
|
1349
|
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
The C method maps any other method calls to the |
|
1351
|
|
|
|
|
|
|
L objects in the list. When called in list |
|
1352
|
|
|
|
|
|
|
context it returns a list of the values returned from calling the method on |
|
1353
|
|
|
|
|
|
|
each element. In scalar context it returns a reference to a list of return |
|
1354
|
|
|
|
|
|
|
values. |
|
1355
|
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
my @text_blocks = $query->as_trimmed_text; |
|
1357
|
|
|
|
|
|
|
my $text_blocks = $query->as_trimmed_text; |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=head1 KNOWN BUGS |
|
1360
|
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
=head2 Attribute Values |
|
1362
|
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
It is not possible to use C<]> in an attribute value. This is due to a |
|
1364
|
|
|
|
|
|
|
limitation in the parser which will be fixed RSN. |
|
1365
|
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1367
|
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
Andy Wardley L |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
=head1 MAINTAINER |
|
1371
|
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
Kevin Kamel |
|
1373
|
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
|
1375
|
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
Vivek Khera |
|
1377
|
|
|
|
|
|
|
Michael Peters |
|
1378
|
|
|
|
|
|
|
David Gray |
|
1379
|
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
1381
|
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
Copyright (C) 2010 Andy Wardley. All Rights Reserved. |
|
1383
|
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it |
|
1385
|
|
|
|
|
|
|
under the same terms as Perl itself. |
|
1386
|
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
1388
|
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
L, L, |
|
1390
|
|
|
|
|
|
|
L, L, L |
|
1391
|
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
=cut |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
# Local Variables: |
|
1395
|
|
|
|
|
|
|
# mode: Perl |
|
1396
|
|
|
|
|
|
|
# perl-indent-level: 4 |
|
1397
|
|
|
|
|
|
|
# indent-tabs-mode: nil |
|
1398
|
|
|
|
|
|
|
# End: |
|
1399
|
|
|
|
|
|
|
# |
|
1400
|
|
|
|
|
|
|
# vim: expandtab shiftwidth=4: |
|