line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Selector::Element; |
2
|
|
|
|
|
|
|
our $VERSION = '0.95_03'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
## Adapted from HTML::Selector::XPath |
5
|
|
|
|
|
|
|
## The parser is basically the same, the difference is in what it produces. |
6
|
|
|
|
|
|
|
|
7
|
10
|
|
|
10
|
|
81495
|
use Carp; |
|
10
|
|
|
|
|
28
|
|
|
10
|
|
|
|
|
680
|
|
8
|
10
|
|
|
10
|
|
60
|
use strict; |
|
10
|
|
|
|
|
24
|
|
|
10
|
|
|
|
|
40914
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
sub import { |
11
|
7
|
|
|
7
|
|
64
|
my($class) = shift; |
12
|
7
|
50
|
|
|
|
31
|
if(@_) { |
13
|
7
|
|
|
|
|
4895
|
require HTML::Element; |
14
|
|
|
|
|
|
|
package # hide from pause |
15
|
|
|
|
|
|
|
HTML::Element; |
16
|
7
|
|
|
|
|
119910
|
local $^W; # no warnings 'redefined' doesn't work over there in Exporter |
17
|
7
|
|
|
|
|
761
|
HTML::Selector::Element::Trait->import(@_); |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $ident = qr/(?![0-9]|-[-0-9])[-_a-zA-Z0-9]+/; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my $reg = { |
24
|
|
|
|
|
|
|
# tag name/id/class. Caveat: no namespace |
25
|
|
|
|
|
|
|
element => qr/^([#.]?)([^\s'"#.\/:@,=~>()\[\]|+]+)/i, |
26
|
|
|
|
|
|
|
# attribute presence |
27
|
|
|
|
|
|
|
attr1 => qr/^\[ \s* ($ident) \s* \]/x, |
28
|
|
|
|
|
|
|
# attribute value match |
29
|
|
|
|
|
|
|
attr2 => qr/^\[ \s* ($ident) \s* |
30
|
|
|
|
|
|
|
( [~|*^\$!]? = ) \s* |
31
|
|
|
|
|
|
|
(?: ($ident) | "([^"]*)" | '([^']*)') \s* \] /x, |
32
|
|
|
|
|
|
|
badattr => qr/^\[/, |
33
|
|
|
|
|
|
|
pseudoN => qr/^:(not|has|is)\(/i, # we chop off the closing parenthesis below in the code |
34
|
|
|
|
|
|
|
pseudo => qr/^:([()a-z0-9_+-]+)/i, |
35
|
|
|
|
|
|
|
# adjacency/direct descendance (test for comma first) |
36
|
|
|
|
|
|
|
combinator => qr/^\s*([>+~])\s*|^\s+/i, # doesn't capture matched whitespace |
37
|
|
|
|
|
|
|
# rule separator |
38
|
|
|
|
|
|
|
comma => qr/^\s*,\s*/i, |
39
|
|
|
|
|
|
|
}; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub new { |
42
|
35
|
|
|
35
|
1
|
20080
|
my($class, @expr) = @_; |
43
|
35
|
|
|
|
|
86
|
my $self = bless {}, $class; |
44
|
35
|
|
|
|
|
112
|
$self->selector(@expr); |
45
|
35
|
|
|
|
|
155
|
return $self; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub selector { |
49
|
35
|
|
|
35
|
0
|
59
|
my $self = shift; |
50
|
35
|
50
|
|
|
|
94
|
if (@_) { |
51
|
35
|
|
|
|
|
60
|
delete @{$self}{qw(find is)}; |
|
35
|
|
|
|
|
107
|
|
52
|
35
|
|
|
|
|
122
|
$self->{selector} = join ', ', @_; |
53
|
35
|
|
|
|
|
88
|
$self->{parsed} = \my @parsed; |
54
|
35
|
|
|
|
|
76
|
foreach (@_) { |
55
|
35
|
|
|
|
|
168
|
my($parsed, $leftover) = $self->consume($_); |
56
|
35
|
50
|
|
|
|
109
|
length $leftover |
57
|
|
|
|
|
|
|
and die "Invalid rule, couldn't parse '$leftover'"; |
58
|
35
|
|
|
|
|
109
|
push @parsed, @$parsed; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
} |
61
|
35
|
|
|
|
|
88
|
return $self->{selector}; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub convert_attribute_match { |
65
|
20
|
|
|
20
|
0
|
63
|
my ($left,$op,$right) = @_; |
66
|
|
|
|
|
|
|
# negation (e.g. [input!="text"]) isn't implemented in CSS, but include it anyway: |
67
|
20
|
100
|
|
|
|
94
|
if ($op eq '!=') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
68
|
1
|
|
|
|
|
12
|
$left, qr/^(?!\Q$right\E$)/ |
69
|
|
|
|
|
|
|
} elsif ($op eq '~=') { # substring attribute match |
70
|
13
|
|
|
|
|
241
|
$left, qr/(?
|
71
|
|
|
|
|
|
|
} elsif ($op eq '*=') { # real substring attribute match |
72
|
1
|
|
|
|
|
14
|
$left, qr/\Q$right\E/ |
73
|
|
|
|
|
|
|
} elsif ($op eq '|=') { |
74
|
1
|
|
|
|
|
24
|
$left, qr/^\Q$right\E(?![^-])/ |
75
|
|
|
|
|
|
|
} elsif ($op eq '^=') { |
76
|
1
|
|
|
|
|
51
|
$left, qr/^\Q$right\E/ |
77
|
|
|
|
|
|
|
} elsif ($op eq '$=') { |
78
|
1
|
|
|
|
|
15
|
$left, qr/\Q$right\E$/ |
79
|
|
|
|
|
|
|
} else { # exact match |
80
|
2
|
|
|
|
|
6
|
$left, $right |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# for our purpose, "siblings" includes the element itself |
85
|
|
|
|
|
|
|
sub siblings { |
86
|
66
|
|
|
66
|
0
|
99
|
my($this) = @_; |
87
|
66
|
|
50
|
|
|
160
|
return children($this->{_parent}||return $this); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub nth_of_type { |
91
|
11
|
|
|
11
|
0
|
31
|
my($of_type, $backward, $n, $cycle) = @_; |
92
|
|
|
|
|
|
|
# nth_child = nth_of_type without type filter |
93
|
11
|
|
100
|
|
|
32
|
$cycle ||= 0; |
94
|
11
|
50
|
33
|
|
|
28
|
if($n <= 0 && $cycle > 0) { |
95
|
|
|
|
|
|
|
# permanent correction |
96
|
0
|
|
0
|
|
|
0
|
$n %= $cycle ||= $cycle; # first value above 0 |
97
|
|
|
|
|
|
|
} |
98
|
66
|
|
|
66
|
|
2173
|
return sub { my($this) = @_; |
99
|
66
|
|
|
|
|
123
|
my @sibling = siblings($this); |
100
|
66
|
100
|
|
|
|
273
|
@sibling = grep $_->{_tag} eq $this->{_tag}, @sibling if $of_type; |
101
|
66
|
50
|
33
|
|
|
300
|
for(my $n = # lexical scratch copy |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
102
|
|
|
|
|
|
|
$n > @sibling && $cycle < 0 |
103
|
|
|
|
|
|
|
? ($n-@sibling) % $cycle + @sibling # first value below upper bound as modulo <= 0 |
104
|
|
|
|
|
|
|
: $n ; # no correction |
105
|
|
|
|
|
|
|
$n > 0 && $n <= @sibling; # give up as soon as we get out of range |
106
|
|
|
|
|
|
|
$n += $cycle || last) # loop only once if $cycle is zero |
107
|
|
|
|
|
|
|
{ |
108
|
142
|
100
|
|
|
|
581
|
return 1 if $this == $sibling[$backward ? -$n : $n - 1]; |
|
|
100
|
|
|
|
|
|
109
|
|
|
|
|
|
|
} |
110
|
41
|
|
|
|
|
95
|
return; |
111
|
|
|
|
|
|
|
} |
112
|
11
|
|
|
|
|
67
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub only_child { |
115
|
0
|
|
|
0
|
0
|
0
|
my($this) = @_; |
116
|
0
|
|
|
|
|
0
|
return 1 == siblings($this); |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# A hacky recursive descent |
120
|
|
|
|
|
|
|
# Only descends for :not(...) and :has(...) |
121
|
|
|
|
|
|
|
sub consume { |
122
|
35
|
|
|
35
|
0
|
102
|
my ($self, $rule) = @_; |
123
|
|
|
|
|
|
|
|
124
|
35
|
|
|
|
|
71
|
my @alt; |
125
|
35
|
|
|
|
|
62
|
my $last_rule = ''; |
126
|
35
|
|
|
|
|
94
|
my $set = { static => my $static = [] }; |
127
|
35
|
|
|
|
|
97
|
my $hold; # last valid set |
128
|
|
|
|
|
|
|
my $sibling_root; # root element of search space is sibling of start element |
129
|
35
|
|
|
|
|
0
|
my($any); # flags |
130
|
35
|
|
|
|
|
52
|
my $start_combinators = ''; |
131
|
|
|
|
|
|
|
|
132
|
35
|
|
|
|
|
150
|
$rule =~ s/^\s+//; |
133
|
|
|
|
|
|
|
# Loop through each "unit" of the rule |
134
|
35
|
|
|
|
|
59
|
while() { |
135
|
|
|
|
|
|
|
# Match elements |
136
|
67
|
|
|
|
|
117
|
for($any = 0;; $any++) { # endless loop |
137
|
156
|
100
|
|
|
|
1520
|
if ($rule =~ s/$reg->{element}//) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
138
|
70
|
|
|
|
|
253
|
my ($id_class,$name) = ($1,$2); |
139
|
70
|
100
|
33
|
|
|
408
|
if ($id_class eq '#') { # ID |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
140
|
1
|
|
|
|
|
3
|
unshift @$static, id => $name; |
141
|
|
|
|
|
|
|
# a condition very likely to fail, so try this first |
142
|
|
|
|
|
|
|
} elsif ($id_class eq '.') { # class |
143
|
12
|
|
|
|
|
45
|
push @$static, convert_attribute_match('class', '~=', $name); |
144
|
|
|
|
|
|
|
} elsif (!$set->{tag} && $name ne '*') { |
145
|
|
|
|
|
|
|
# we're not adding '*' yet as that's a very loose condition that seldom fails |
146
|
|
|
|
|
|
|
# It's often not even necessary to test when we have other, more stringent conditions. |
147
|
57
|
|
|
|
|
125
|
$set->{tag} = $name; |
148
|
57
|
|
|
|
|
141
|
push @$static, _tag => $name; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
# Match attribute selectors |
152
|
|
|
|
|
|
|
elsif ($rule =~ s/$reg->{attr2}//) { |
153
|
7
|
|
|
|
|
18
|
push @$static, convert_attribute_match( $1, $2, $^N ); |
154
|
|
|
|
|
|
|
} elsif ($rule =~ s/$reg->{attr1}//) { |
155
|
|
|
|
|
|
|
# any value, as long as it's defined |
156
|
1
|
|
|
|
|
5
|
push @$static, convert_attribute_match( $1, '', qr// ); |
157
|
|
|
|
|
|
|
} elsif ($rule =~ $reg->{badattr}) { |
158
|
0
|
|
|
|
|
0
|
Carp::croak "Invalid attribute-value selector '$rule'"; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
# Match :not and :has |
161
|
|
|
|
|
|
|
elsif ($rule =~ s/$reg->{pseudoN}//) { |
162
|
0
|
|
|
|
|
0
|
my $which = lc $1; |
163
|
|
|
|
|
|
|
# Now we parse the rest, and after parsing the subexpression |
164
|
|
|
|
|
|
|
# has stopped, we must find a matching closing parenthesis: |
165
|
0
|
|
|
|
|
0
|
my( $subset, $leftover ) = $self->consume( $rule ); |
166
|
0
|
|
|
|
|
0
|
$rule = $leftover; |
167
|
0
|
0
|
|
|
|
0
|
$rule =~ s!^\s*\)!! |
168
|
|
|
|
|
|
|
or die "Unbalanced parentheses at '$rule'"; |
169
|
0
|
0
|
|
|
|
0
|
if($which eq 'not') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
170
|
0
|
|
|
|
|
0
|
my @params = criteria($subset, undef); |
171
|
0
|
|
|
0
|
|
0
|
push @$static, sub { not look_self($_[0], @params) }; |
|
0
|
|
|
|
|
0
|
|
172
|
|
|
|
|
|
|
} elsif($which eq 'is') { |
173
|
0
|
|
|
|
|
0
|
my @params = criteria($subset, undef); |
174
|
0
|
|
|
0
|
|
0
|
push @$static, sub { look_self($_[0], @params) }; |
|
0
|
|
|
|
|
0
|
|
175
|
|
|
|
|
|
|
} elsif($which eq 'has') { |
176
|
|
|
|
|
|
|
# This is possibly very slow, especially when executed very often, so we keep this criterium for last |
177
|
0
|
|
|
|
|
0
|
push @{$set->{has}}, find_closure($subset); |
|
0
|
|
|
|
|
0
|
|
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
# other pseudoclasses/pseudoelements |
181
|
|
|
|
|
|
|
# "else" because there could be more than one :not/:has |
182
|
|
|
|
|
|
|
elsif ($rule =~ s/$reg->{pseudo}//) { |
183
|
11
|
|
|
|
|
36
|
my $simple = ":$1"; |
184
|
11
|
50
|
|
|
|
32
|
if ( my @expr = $self->parse_pseudo($1, \$rule) ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
185
|
0
|
|
|
|
|
0
|
push @$static, @expr; |
186
|
|
|
|
|
|
|
} elsif ( $1 eq 'only-child') { |
187
|
0
|
|
|
|
|
0
|
push @$static, only_child(); |
188
|
|
|
|
|
|
|
} elsif (my @m = $1 =~ /^((?:first|last)-(?:child|of-type)$) | ^(nth-(?:last-)?(?:child|of-type)) \((odd|even|(\d+)|(-?\d*)n([\+\-]\d+)?)\)$/x) { |
189
|
|
|
|
|
|
|
# Matches all pseudoelements of the following lists: |
190
|
|
|
|
|
|
|
# - first-child, last-child, first-of-type, last-of-type: without expression |
191
|
|
|
|
|
|
|
# - nth-child, nth-of-type, nth-last-child, nth-last-of-type: with an expression between parens |
192
|
|
|
|
|
|
|
# of one of these types: odd, even, and an+b |
193
|
|
|
|
|
|
|
# with a lot of freedom for that last one, for example: |
194
|
|
|
|
|
|
|
# 3, 3n, 3n+1, n+5, -n+5, -3n+5, 3n-1 |
195
|
11
|
|
|
|
|
34
|
my($pseudo, $nth, $expr, $n, $cycle, $offset) = @m; |
196
|
11
|
100
|
|
|
|
20
|
if($nth) { |
197
|
9
|
100
|
|
|
|
26
|
if(defined $cycle) { |
|
|
100
|
|
|
|
|
|
198
|
4
|
100
|
|
|
|
18
|
$cycle .= '1' if $cycle =~ /^(-?)$/; |
199
|
4
|
|
66
|
|
|
14
|
$n = $offset || $cycle; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
elsif(!defined $n) { |
202
|
|
|
|
|
|
|
# even / odd |
203
|
3
|
|
|
|
|
6
|
$cycle = 2; |
204
|
3
|
100
|
|
|
|
10
|
$n = $expr eq 'odd' ? 1 : 2; |
205
|
|
|
|
|
|
|
} |
206
|
9
|
|
|
|
|
12
|
$pseudo = $nth; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
else { |
209
|
|
|
|
|
|
|
# first / last |
210
|
2
|
|
|
|
|
4
|
$n = 1; |
211
|
|
|
|
|
|
|
} |
212
|
11
|
|
|
|
|
29
|
my $of_type = $pseudo =~ /of-type/; |
213
|
11
|
|
|
|
|
21
|
my $backward = $pseudo =~ /last/; |
214
|
11
|
|
|
|
|
32
|
push @$static, nth_of_type($of_type, $backward, $n+0, $cycle); |
215
|
|
|
|
|
|
|
} elsif ($1 =~ /^contains\($/) { |
216
|
|
|
|
|
|
|
# not sure if this will work well in practise, in regards to whitespace |
217
|
0
|
0
|
|
|
|
0
|
if( $rule =~ s/^\s*"([^"]*)"\s*\)// ) { # "#stupid syntax highlighter |
|
|
0
|
|
|
|
|
|
218
|
0
|
|
|
|
|
0
|
my $fragment = $1; |
219
|
0
|
|
|
0
|
|
0
|
push @$static, sub { $_[0]->as_text() =~ /\Q$fragment/ }; |
|
0
|
|
|
|
|
0
|
|
220
|
|
|
|
|
|
|
} elsif( $rule =~ s/^\s*'([^']*)'\s*\)// ) { #'#stupid syntax highlighter |
221
|
0
|
|
|
|
|
0
|
my $fragment = $1; |
222
|
0
|
|
|
0
|
|
0
|
push @$static, sub { $_[0]->as_text() =~ /\Q$fragment/ }; |
|
0
|
|
|
|
|
0
|
|
223
|
|
|
|
|
|
|
} else { |
224
|
0
|
|
|
|
|
0
|
return( $set, $rule ); |
225
|
0
|
|
|
|
|
0
|
die "Malformed string in :contains(): '$rule'"; |
226
|
|
|
|
|
|
|
}; |
227
|
|
|
|
|
|
|
} elsif ( $1 eq 'root') { |
228
|
|
|
|
|
|
|
# matches document root, or starting element |
229
|
0
|
|
|
|
|
0
|
$set->{is_root} = 1; |
230
|
|
|
|
|
|
|
} elsif ( $1 eq 'empty') { |
231
|
0
|
|
|
0
|
|
0
|
push @$static, sub { (shift)->is_empty }; |
|
0
|
|
|
|
|
0
|
|
232
|
|
|
|
|
|
|
} else { |
233
|
0
|
|
|
|
|
0
|
Carp::croak "Can't translate '$1' pseudo-class"; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
else { |
237
|
|
|
|
|
|
|
# failed to match anything |
238
|
67
|
|
|
|
|
140
|
last; |
239
|
|
|
|
|
|
|
} |
240
|
89
|
|
|
|
|
180
|
$any++; |
241
|
89
|
50
|
|
|
|
199
|
die "Endless loop?"if $any > 20000; |
242
|
89
|
|
|
|
|
156
|
undef $hold; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# Match commas |
246
|
67
|
50
|
|
|
|
331
|
if ($rule =~ s/$reg->{comma}//o) { |
|
|
100
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# ending one rule and beginning another |
248
|
0
|
|
0
|
|
|
0
|
$set->{tag} ||= do { push @$static, _tag => qr/^(?!~)/; '*' }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
249
|
0
|
0
|
0
|
|
|
0
|
$set->{sibling_root} ||= $sibling_root if $sibling_root; |
250
|
0
|
|
|
|
|
0
|
push @alt, $set; |
251
|
0
|
|
|
|
|
0
|
$set = { static => $static = [] }; |
252
|
0
|
|
|
|
|
0
|
($any, $hold, $sibling_root) = (); |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
# Match combinators (whitespace, >, + and ~) |
255
|
|
|
|
|
|
|
elsif ($rule =~ s/$reg->{combinator}//) { |
256
|
32
|
|
100
|
|
|
162
|
my $combinator = $1 || ' '; |
257
|
32
|
100
|
|
|
|
81
|
unless($any) { |
258
|
4
|
50
|
|
|
|
14
|
unless($set->{chained}) { |
259
|
|
|
|
|
|
|
# rule starts with a combinator |
260
|
|
|
|
|
|
|
# add match for start element |
261
|
4
|
|
|
|
|
11
|
$set->{is_root} = 1; # root element / start element |
262
|
|
|
|
|
|
|
} else { |
263
|
|
|
|
|
|
|
# 2 subsequent combinators: interject a '*' |
264
|
0
|
|
|
|
|
0
|
push @$static, _tag => qr/^(?!~)/; |
265
|
0
|
|
|
|
|
0
|
$set->{tag} = '*'; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
# new context |
269
|
32
|
|
|
|
|
62
|
($any, $hold) = (); |
270
|
32
|
100
|
|
|
|
80
|
$hold = $set unless $1; |
271
|
32
|
|
|
|
|
105
|
$set = { static => $static = [], chained => my $chained = $set, combinator => $combinator }; |
272
|
32
|
100
|
66
|
|
|
207
|
if($chained->{is_root} || $chained->{sibling_root}) { |
273
|
4
|
100
|
|
|
|
19
|
if($combinator =~ /([+~])/) { |
274
|
3
|
|
50
|
|
|
19
|
$set->{sibling_root} = ($chained->{sibling_root} || '') . $1; |
275
|
3
|
|
|
|
|
9
|
$sibling_root = $set; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
else { |
280
|
35
|
|
|
|
|
61
|
last; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
# wrap up |
284
|
|
|
|
|
|
|
# rule ended in whitespace - This can only happen in nested rules such as :not( ... ) |
285
|
35
|
50
|
|
|
|
105
|
$set = $hold if $hold; |
286
|
35
|
|
66
|
|
|
90
|
$set->{tag} ||= do { push @$static, _tag => qr/^(?!~)/; '*' }; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
11
|
|
287
|
35
|
100
|
33
|
|
|
95
|
$set->{sibling_root} ||= $sibling_root if $sibling_root; |
288
|
|
|
|
|
|
|
|
289
|
35
|
|
|
|
|
71
|
push @alt, $set; |
290
|
35
|
|
|
|
|
124
|
return \@alt, $rule; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub criteria { |
294
|
|
|
|
|
|
|
# returns criteria for look_down, with bound closures |
295
|
33
|
|
|
33
|
1
|
76
|
my($set, $refroot, $strategy) = @_; |
296
|
33
|
|
50
|
|
|
197
|
$strategy ||= { banroot => 1 }; |
297
|
33
|
100
|
|
|
|
85
|
$strategy->{banroot} = 0 if not $refroot; |
298
|
33
|
|
|
|
|
49
|
my $recurse; |
299
|
|
|
|
|
|
|
$recurse = sub { |
300
|
|
|
|
|
|
|
# embeds $refroot |
301
|
57
|
|
|
57
|
|
125
|
my($set, $banroot) = @_; |
302
|
57
|
50
|
|
|
|
86
|
my @params = @{$set->{static}||[]}; |
|
57
|
|
|
|
|
247
|
|
303
|
|
|
|
|
|
|
|
304
|
57
|
100
|
|
|
|
149
|
if($set->{is_root}) { |
305
|
3
|
|
|
|
|
5
|
$banroot = 0; |
306
|
3
|
50
|
|
|
|
7
|
if($refroot) { |
307
|
|
|
|
|
|
|
# relative, top of branch |
308
|
3
|
|
|
|
|
11
|
unshift @params, sub { $_[0] == $$refroot }; # unlikely to succeed, so fail fast |
|
3
|
|
|
|
|
11
|
|
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
else { |
311
|
|
|
|
|
|
|
# absolute, root of DOM |
312
|
0
|
|
|
|
|
0
|
unshift @params, _parent => undef; # unlikely to succeed, so fail fast |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
57
|
100
|
66
|
|
|
225
|
if($set->{chained}) { |
|
|
100
|
|
|
|
|
|
317
|
27
|
|
|
|
|
59
|
push @params, do { |
318
|
|
|
|
|
|
|
# Value is an anonymous sub |
319
|
|
|
|
|
|
|
# Recurse into linked list |
320
|
27
|
|
|
|
|
226
|
my @params = $recurse->($set->{chained}, $banroot); |
321
|
27
|
|
|
|
|
60
|
my $combinator = $set->{combinator}; |
322
|
|
|
|
|
|
|
# we're in a chained set, so we have to wrap the criteria into a test in a closure. |
323
|
27
|
100
|
|
|
|
146
|
if ($combinator =~ /^\s+$/) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
324
|
62
|
|
|
|
|
1933
|
$banroot ? sub { my($this) = @_; |
325
|
62
|
|
100
|
|
|
171
|
my $rootparent = $$refroot->{_parent} || 0; |
326
|
62
|
|
66
|
|
|
211
|
while($this = $this->{_parent} and $this != $rootparent) { |
327
|
107
|
100
|
|
|
|
218
|
look_self($this, @params) |
328
|
|
|
|
|
|
|
and return 1; |
329
|
|
|
|
|
|
|
} |
330
|
5
|
|
|
|
|
13
|
return 0; |
331
|
|
|
|
|
|
|
} |
332
|
0
|
|
|
|
|
0
|
: sub { my($this) = @_; |
333
|
0
|
|
|
|
|
0
|
while($this = $this->{_parent}) { |
334
|
0
|
0
|
|
|
|
0
|
look_self($this, @params) |
335
|
|
|
|
|
|
|
and return 1; |
336
|
|
|
|
|
|
|
} |
337
|
0
|
|
|
|
|
0
|
return 0; |
338
|
|
|
|
|
|
|
} |
339
|
14
|
50
|
|
|
|
89
|
} |
340
|
|
|
|
|
|
|
elsif ($combinator =~ />/) { |
341
|
3
|
|
|
|
|
57
|
$banroot ? sub { my($this) = @_; |
342
|
3
|
|
100
|
|
|
13
|
my $rootparent = $$refroot->{_parent} || 0; |
343
|
3
|
50
|
33
|
|
|
13
|
if($this = $this->{_parent} and $this != $rootparent) { |
344
|
3
|
50
|
|
|
|
18
|
look_self($this, @params) |
345
|
|
|
|
|
|
|
and return 1; |
346
|
|
|
|
|
|
|
} |
347
|
0
|
|
|
|
|
0
|
return 0; |
348
|
|
|
|
|
|
|
} |
349
|
3
|
|
|
|
|
8
|
: sub { my($this) = @_; |
350
|
3
|
50
|
|
|
|
13
|
if($this = $this->{_parent}) { |
351
|
3
|
50
|
|
|
|
12
|
look_self($this, @params) |
352
|
|
|
|
|
|
|
and return 1; |
353
|
|
|
|
|
|
|
} |
354
|
0
|
|
|
|
|
0
|
return 0; |
355
|
6
|
100
|
|
|
|
35
|
}; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
elsif ($combinator =~ /\+/) { |
358
|
11
|
|
|
|
|
529
|
sub { my($this) = @_; |
359
|
11
|
|
|
|
|
37
|
my @left = $this->left; |
360
|
11
|
|
|
|
|
202
|
while(@left) { |
361
|
7
|
|
|
|
|
20
|
$this = pop @left; |
362
|
7
|
50
|
33
|
|
|
59
|
ref $this && $this->{_tag} && $this->{_tag} !~ /^~/ or next; |
|
|
|
33
|
|
|
|
|
363
|
7
|
50
|
|
|
|
36
|
look_self($this, @params) |
364
|
|
|
|
|
|
|
and return 1; |
365
|
0
|
|
|
|
|
0
|
return 0; |
366
|
|
|
|
|
|
|
} |
367
|
4
|
|
|
|
|
13
|
return 0; |
368
|
7
|
|
|
|
|
29
|
}; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
elsif ($combinator =~ /\~/) { |
371
|
0
|
|
|
|
|
0
|
sub { my($this) = @_; |
372
|
0
|
|
|
|
|
0
|
my @left = $this->left; |
373
|
0
|
|
|
|
|
0
|
while(@left) { |
374
|
0
|
|
|
|
|
0
|
$this = pop @left; |
375
|
0
|
0
|
0
|
|
|
0
|
ref $this && $this->{_tag} !~ /^~/ or next; |
376
|
0
|
0
|
|
|
|
0
|
look_self($this, @params) |
377
|
|
|
|
|
|
|
and return 1; |
378
|
|
|
|
|
|
|
} |
379
|
0
|
|
|
|
|
0
|
return 0; |
380
|
|
|
|
|
|
|
} |
381
|
0
|
|
|
|
|
0
|
} |
382
|
|
|
|
|
|
|
else { |
383
|
0
|
|
|
|
|
0
|
die "Weird combinator '$combinator'" |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
}; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
elsif($banroot && !$set->{is_root}) { |
388
|
|
|
|
|
|
|
# if :root was not specified, $root should never match |
389
|
24
|
|
|
|
|
103
|
push @params, sub { $_[0] != $$refroot }; # likely to succeed, so fail late |
|
142
|
|
|
|
|
4233
|
|
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
# do the :has tests last, because it's a complete subtree scan and that may be very slow. |
393
|
57
|
50
|
|
|
|
132
|
push @params, @{$set->{has}} if $set->{has}; |
|
0
|
|
|
|
|
0
|
|
394
|
57
|
50
|
|
|
|
284
|
return wantarray ? @params : \@params; |
395
|
33
|
|
|
|
|
220
|
}; |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
|
398
|
33
|
100
|
|
|
|
105
|
if(ref $set eq 'ARRAY') { |
399
|
30
|
50
|
|
|
|
82
|
if(@$set > 1) { |
400
|
0
|
|
|
|
|
0
|
my %tags; |
401
|
0
|
|
0
|
|
|
0
|
my @alt = map { $tags{$_->{tag}||'*'} = 1; [ $recurse->($_, !$refroot) ] } @$set; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
402
|
0
|
|
0
|
0
|
|
0
|
my @params = sub { my($this) = @_; look_self($this, @$_) and return 1 foreach @alt; return 0 };; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
403
|
0
|
0
|
|
0
|
|
0
|
unshift @params, sub { $tags{(shift)->{_tag}} } unless $tags{'*'}; |
|
0
|
|
|
|
|
0
|
|
404
|
0
|
0
|
|
|
|
0
|
return wantarray ? @params : \@params; |
405
|
|
|
|
|
|
|
} |
406
|
30
|
|
|
|
|
59
|
($set) = @$set; # non-destructive |
407
|
|
|
|
|
|
|
} |
408
|
33
|
100
|
|
|
|
88
|
if(ref $set eq 'HASH') { |
|
|
50
|
|
|
|
|
|
409
|
30
|
|
|
|
|
73
|
return $recurse->($set, $refroot); |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
elsif(ref $set) { |
412
|
|
|
|
|
|
|
# assumed method call |
413
|
3
|
|
|
|
|
21
|
return criteria($set->{parsed}, $refroot); |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
11
|
0
|
|
sub parse_pseudo { |
418
|
|
|
|
|
|
|
# nop, for subclassing |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
sub find_closure { |
422
|
26
|
|
|
26
|
0
|
42
|
my $sets = shift; |
423
|
26
|
50
|
|
|
|
72
|
$sets = $sets->{parsed} if ref $sets ne 'ARRAY'; |
424
|
26
|
|
|
|
|
34
|
my $root; # The embedded variable |
425
|
26
|
|
|
|
|
45
|
my(@down, @via_right, $right_down, @right_filter); |
426
|
26
|
|
|
|
|
50
|
foreach my $set (@$sets) { |
427
|
26
|
50
|
|
|
|
58
|
unless($set->{sibling_root}) { |
|
|
100
|
|
|
|
|
|
428
|
25
|
|
|
|
|
63
|
push @down, $set; |
429
|
|
|
|
|
|
|
} |
430
|
0
|
|
|
|
|
0
|
elsif(ref $set->{sibling_root}) { |
431
|
0
|
|
|
|
|
0
|
push @via_right, $set; |
432
|
0
|
|
|
|
|
0
|
push @right_filter, $set->{sibling_root}; |
433
|
0
|
|
|
|
|
0
|
$right_down = 1; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
else { |
436
|
1
|
|
|
|
|
4
|
push @via_right, $set; |
437
|
1
|
|
|
|
|
3
|
push @right_filter, $set; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
} |
440
|
26
|
|
|
|
|
51
|
foreach my $array(\@down, \@via_right, \@right_filter) { |
441
|
78
|
100
|
|
|
|
215
|
@$array = criteria($array, \$root) if @$array; |
442
|
|
|
|
|
|
|
} |
443
|
26
|
100
|
|
|
|
57
|
unless(@via_right) { |
444
|
|
|
|
|
|
|
# the most common case: down only |
445
|
|
|
|
|
|
|
return sub { |
446
|
25
|
|
|
25
|
|
51
|
$root = shift; return $root->look_down(@down) |
|
25
|
|
|
|
|
132
|
|
447
|
25
|
|
|
|
|
168
|
}; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
else { |
450
|
|
|
|
|
|
|
return sub { |
451
|
2
|
|
|
2
|
|
5
|
$root = shift; |
452
|
2
|
|
|
|
|
3
|
my($result, @result); |
453
|
2
|
50
|
|
|
|
7
|
if(@down) { |
454
|
|
|
|
|
|
|
# unlikely, but possible |
455
|
0
|
0
|
|
|
|
0
|
unless(wantarray) { |
456
|
0
|
0
|
|
|
|
0
|
$result = $root->look_down(@down) and return $result; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
else { |
459
|
0
|
|
|
|
|
0
|
@result = $root->look_down(@down); |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
2
|
50
|
|
|
|
7
|
if(my @right = grep { ref and look_self($_, @right_filter) } $root->right) { |
|
2
|
50
|
|
|
|
55
|
|
463
|
2
|
50
|
|
|
|
17
|
unless($right_down) { |
464
|
2
|
50
|
|
|
|
15
|
return wantarray ? (@result, @right) : shift @right; |
465
|
|
|
|
|
|
|
} |
466
|
0
|
0
|
|
|
|
0
|
unless(wantarray) { |
467
|
0
|
|
0
|
|
|
0
|
$result = $_->look_down(@via_right) and return $result foreach @right; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
else { |
470
|
0
|
|
|
|
|
0
|
push @result, $_->look_down(@via_right) foreach @right; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
0
|
|
|
|
|
0
|
return @result; |
474
|
1
|
|
|
|
|
8
|
}; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
# flipped |
479
|
|
|
|
|
|
|
sub find { |
480
|
27
|
|
|
27
|
1
|
56
|
my($self, $element) = @_; |
481
|
27
|
|
66
|
|
|
99
|
return ($self->{find} ||= find_closure($self->{parsed}))->($element) |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
package HTML::Selector::Element::Trait; |
485
|
|
|
|
|
|
|
# core methods for trait that adds or overrides Selector support in HTML::Element |
486
|
|
|
|
|
|
|
# use as a superclass in a subclass of HTML::Element, putting it before HTML::Element in @ISA |
487
|
|
|
|
|
|
|
# or monkeypatch HTML::Element: import it into the HTML::Element package |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
require Carp; |
490
|
|
|
|
|
|
|
|
491
|
10
|
|
|
10
|
|
104
|
use Exporter 'import'; |
|
10
|
|
|
|
|
22
|
|
|
10
|
|
|
|
|
7178
|
|
492
|
|
|
|
|
|
|
our @EXPORT = qw(&find is closest); |
493
|
|
|
|
|
|
|
our @EXPORT_OK = qw(look_self siblings children &select &query); |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
sub children { # child elements, no fake elements |
496
|
66
|
|
|
66
|
0
|
102
|
my($this) = @_; |
497
|
66
|
50
|
33
|
|
|
97
|
return grep { ref and $_->{_tag} and $_->{_tag} !~ /^~/ } @{$this->{_content}||return}; |
|
462
|
50
|
|
|
|
2037
|
|
|
66
|
|
|
|
|
152
|
|
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
sub look_self { |
501
|
123
|
|
|
123
|
0
|
181
|
my $this = shift; |
502
|
123
|
|
|
|
|
184
|
my($attr, $value, $matcher); |
503
|
123
|
|
|
|
|
238
|
while(@_) { |
504
|
|
|
|
|
|
|
# For speed reasons, no nested scopes and no block scope lexical variables |
505
|
|
|
|
|
|
|
ref ($attr = shift) or |
506
|
195
|
0
|
50
|
|
|
1176
|
2 != (defined($matcher = shift) + defined($value = $this->{$attr}) || next) ? return |
|
|
100
|
0
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
507
|
|
|
|
|
|
|
: ref $value ? # identical class and stringification or fail |
508
|
|
|
|
|
|
|
ref $matcher eq ref $value && $matcher eq $value ? next : return |
509
|
|
|
|
|
|
|
: ref $matcher |
510
|
|
|
|
|
|
|
? ref $matcher eq 'Regexp' && $value =~ $matcher ? next : return |
511
|
|
|
|
|
|
|
: $value eq $matcher ? next : return; |
512
|
70
|
50
|
|
|
|
189
|
ref $attr eq 'CODE' and $attr->($this) ? next : return; |
|
|
50
|
|
|
|
|
|
513
|
|
|
|
|
|
|
# standard processing ends here |
514
|
0
|
0
|
|
|
|
0
|
if(ref $attr eq 'ARRAY') { |
515
|
0
|
|
|
|
|
0
|
my $success; |
516
|
0
|
|
|
|
|
0
|
foreach my $rule (@$attr) { |
517
|
0
|
0
|
|
|
|
0
|
next if ref $rule ne 'ARRAY'; |
518
|
0
|
0
|
|
|
|
0
|
$success = look_self($this, @$rule) and last; |
519
|
|
|
|
|
|
|
} |
520
|
0
|
0
|
|
|
|
0
|
$success and next; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
# unknown doesn't match |
523
|
0
|
|
|
|
|
0
|
return; |
524
|
|
|
|
|
|
|
} |
525
|
73
|
|
|
|
|
255
|
return $this; # matches |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
my %store; |
529
|
|
|
|
|
|
|
sub find { |
530
|
|
|
|
|
|
|
# backward compatible with find_by_tag_name in HTMl::Element if you stick to normal tags |
531
|
|
|
|
|
|
|
# If you do need special tags (= starting with "~"), find_by_tag_name is still available, and faster than look_down anyway |
532
|
23
|
|
|
23
|
0
|
56091
|
my($element) = shift; |
533
|
23
|
|
33
|
|
|
164
|
my $selector = $store{join ', ', @_} ||= HTML::Selector::Element->new(@_); |
534
|
23
|
|
|
|
|
63
|
return $selector->find($element); |
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub is { |
538
|
1
|
|
|
1
|
0
|
14903
|
my($element) = shift; |
539
|
1
|
50
|
|
|
|
6
|
@_ or return; |
540
|
1
|
|
33
|
|
|
11
|
my $selector = $store{join ', ', @_} ||= HTML::Selector::Element->new(@_); |
541
|
1
|
|
50
|
|
|
7
|
$selector->{is} ||= [$selector->criteria]; |
542
|
1
|
|
|
|
|
2
|
return look_self($element, @{$selector->{is}}); |
|
1
|
|
|
|
|
11
|
|
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
sub closest { |
546
|
0
|
|
|
0
|
0
|
0
|
my($element) = shift; |
547
|
0
|
|
0
|
|
|
0
|
my $selector = $store{join ', ', @_} ||= HTML::Selector::Element->new(@_); |
548
|
0
|
|
0
|
|
|
0
|
$selector->{is} ||= [$selector->criteria]; |
549
|
0
|
|
|
|
|
0
|
return $element->look_up(@{$selector->{is}}); |
|
0
|
|
|
|
|
0
|
|
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
sub select { |
553
|
|
|
|
|
|
|
# same as find except the criteria are absolute in the DOM, instead of relative to the start element |
554
|
|
|
|
|
|
|
# only searches down, never below siblings |
555
|
2
|
|
|
2
|
0
|
27736
|
my($element) = shift; |
556
|
2
|
|
33
|
|
|
27
|
my $selector = $store{join ', ', @_} ||= HTML::Selector::Element->new(@_); |
557
|
2
|
|
50
|
|
|
35
|
$selector->{is} ||= [$selector->criteria]; |
558
|
2
|
|
|
|
|
5
|
return $element->look_down(@{$selector->{is}}); |
|
2
|
|
|
|
|
19
|
|
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# alias |
562
|
|
|
|
|
|
|
*query = \&select; |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
package HTML::Selector::Element; |
565
|
|
|
|
|
|
|
# round up: import subs from Trait |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
HTML::Selector::Element::Trait->import(qw(look_self children)); |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
1; |