line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# This code can be redistributed and modified under the terms of the GNU Affero |
2
|
|
|
|
|
|
|
# General Public License as published by the Free Software Foundation, either |
3
|
|
|
|
|
|
|
# version 3 of the License, or (at your option) any later version. |
4
|
|
|
|
|
|
|
# See the "COPYING" file for details. |
5
|
|
|
|
|
|
|
package HTML::Blitz::Matcher; |
6
|
11
|
|
|
11
|
|
87
|
use HTML::Blitz::pragma; |
|
11
|
|
|
|
|
32
|
|
|
11
|
|
|
|
|
78
|
|
7
|
11
|
|
|
|
|
679
|
use HTML::Blitz::SelectorType qw( |
8
|
|
|
|
|
|
|
LT_DESCENDANT |
9
|
|
|
|
|
|
|
LT_CHILD |
10
|
|
|
|
|
|
|
LT_SIBLING |
11
|
|
|
|
|
|
|
LT_ADJACENT_SIBLING |
12
|
11
|
|
|
11
|
|
9251
|
); |
|
11
|
|
|
|
|
41
|
|
13
|
11
|
|
|
11
|
|
81
|
use Scalar::Util (); |
|
11
|
|
|
|
|
37
|
|
|
11
|
|
|
|
|
420
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use constant { |
16
|
11
|
|
|
|
|
1734
|
INTBITS => length(sprintf '%b', ~0), |
17
|
11
|
|
|
11
|
|
61
|
}; |
|
11
|
|
|
|
|
31
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = '0.07'; |
20
|
|
|
|
|
|
|
|
21
|
273
|
50
|
|
273
|
0
|
642
|
method new($class: $rules) { |
|
273
|
50
|
|
|
|
526
|
|
|
273
|
|
|
|
|
429
|
|
|
273
|
|
|
|
|
471
|
|
|
273
|
|
|
|
|
330
|
|
22
|
273
|
|
|
|
|
2110
|
bless { |
23
|
|
|
|
|
|
|
slices => [ |
24
|
|
|
|
|
|
|
map [ $_, { cur => 0, stack => [{ extra_bits => 0 }] } ], @$rules |
25
|
|
|
|
|
|
|
], |
26
|
|
|
|
|
|
|
doc_state => [ |
27
|
|
|
|
|
|
|
{ |
28
|
|
|
|
|
|
|
nth_child => 0, |
29
|
|
|
|
|
|
|
nth_child_of_type => {}, |
30
|
|
|
|
|
|
|
on_leave => [], |
31
|
|
|
|
|
|
|
}, |
32
|
|
|
|
|
|
|
], |
33
|
|
|
|
|
|
|
}, $class |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
1527
|
|
|
1527
|
|
2642
|
fun _guniq(@values) { |
|
1527
|
|
|
|
|
1898
|
|
37
|
1527
|
|
|
|
|
2208
|
my ($seen_undef, %seen_ref, %seen_str); |
38
|
|
|
|
|
|
|
grep |
39
|
|
|
|
|
|
|
!( |
40
|
|
|
|
|
|
|
ref($_) ? $seen_ref{Scalar::Util::refaddr $_} : |
41
|
1527
|
0
|
|
|
|
6897
|
defined($_) ? $seen_str{$_} : |
|
|
50
|
|
|
|
|
|
42
|
|
|
|
|
|
|
$seen_undef |
43
|
|
|
|
|
|
|
)++, |
44
|
|
|
|
|
|
|
@values |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
1527
|
50
|
|
1527
|
0
|
3151
|
method enter($tag, $attributes) { |
|
1527
|
50
|
|
|
|
2740
|
|
|
1527
|
|
|
|
|
2198
|
|
|
1527
|
|
|
|
|
2794
|
|
|
1527
|
|
|
|
|
1910
|
|
48
|
1527
|
|
|
|
|
2404
|
my $doc_state = $self->{doc_state}; |
49
|
1527
|
|
|
|
|
2292
|
my $dsp = $doc_state->[-1]; |
50
|
1527
|
|
|
|
|
2295
|
my $nth_child = ++$dsp->{nth_child}; |
51
|
1527
|
|
|
|
|
3409
|
my $nth_child_of_type = ++$dsp->{nth_child_of_type}{$tag}; |
52
|
1527
|
|
|
|
|
4671
|
push @$doc_state, { |
53
|
|
|
|
|
|
|
nth_child => 0, |
54
|
|
|
|
|
|
|
nth_child_of_type => {}, |
55
|
|
|
|
|
|
|
on_leave => [], |
56
|
|
|
|
|
|
|
}; |
57
|
|
|
|
|
|
|
|
58
|
1527
|
|
|
|
|
2505
|
my @ret; |
59
|
|
|
|
|
|
|
|
60
|
1527
|
|
|
|
|
2149
|
for my $slice (@{$self->{slices}}) { |
|
1527
|
|
|
|
|
3183
|
|
61
|
1730
|
|
|
|
|
3371
|
my ($glass, $goop) = @$slice; |
62
|
1730
|
|
|
|
|
2506
|
my $cur = $goop->{cur}; |
63
|
1730
|
|
|
|
|
2315
|
my $stack = $goop->{stack}; |
64
|
1730
|
|
|
|
|
2485
|
my $sp = $stack->[-1]; |
65
|
1730
|
|
|
|
|
2403
|
my $extra_volatile = $sp->{extra_volatile}; |
66
|
1730
|
|
|
|
|
2810
|
$sp->{extra_volatile} = []; |
67
|
|
|
|
|
|
|
|
68
|
1730
|
|
|
|
|
3765
|
push @$stack, my $sp_next = { |
69
|
|
|
|
|
|
|
extra_bits => 0, |
70
|
|
|
|
|
|
|
}; |
71
|
1730
|
|
|
|
|
2381
|
my $cur_next; |
72
|
|
|
|
|
|
|
|
73
|
1730
|
|
|
|
|
2335
|
for my $i ($cur, @{$sp->{extra}}, @$extra_volatile) { |
|
1730
|
|
|
|
|
3493
|
|
74
|
1765
|
|
|
|
|
2668
|
my $sss = $glass->[$i]; |
75
|
1765
|
100
|
|
|
|
4629
|
$sss->matches($tag, $attributes, $nth_child, $nth_child_of_type) |
76
|
|
|
|
|
|
|
or next; |
77
|
|
|
|
|
|
|
|
78
|
509
|
|
|
|
|
1210
|
my $link = $sss->link_type; |
79
|
509
|
|
|
|
|
787
|
my $k = $i + 1; |
80
|
509
|
|
|
|
|
756
|
my $bit_shift = $k - $cur - 1; |
81
|
509
|
50
|
|
|
|
1007
|
$bit_shift < INTBITS |
82
|
|
|
|
|
|
|
or die "Internal error: Too many combinators in a single selector (" . ($bit_shift + 1) . " exceeds limit of " . INTBITS . ")"; |
83
|
509
|
|
|
|
|
776
|
my $bit = 1 << $bit_shift; |
84
|
|
|
|
|
|
|
|
85
|
509
|
100
|
|
|
|
996
|
if (!defined $link) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
86
|
427
|
|
|
|
|
977
|
push @ret, $glass->[$k]; |
87
|
|
|
|
|
|
|
} elsif ($link eq LT_DESCENDANT) { |
88
|
33
|
|
|
|
|
59
|
$cur_next = $k; |
89
|
|
|
|
|
|
|
} elsif ($link eq LT_CHILD) { |
90
|
44
|
50
|
|
|
|
87
|
if (!($sp_next->{extra_bits} & $bit)) { |
91
|
44
|
|
|
|
|
114
|
$sp_next->{extra_bits} |= $bit; |
92
|
44
|
|
|
|
|
56
|
push @{$sp_next->{extra}}, $k; |
|
44
|
|
|
|
|
128
|
|
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
} elsif ($link eq LT_SIBLING) { |
95
|
1
|
50
|
|
|
|
4
|
if (!($sp->{extra_bits} & $bit)) { |
96
|
1
|
|
|
|
|
6
|
$sp->{extra_bits} |= $bit; |
97
|
1
|
|
|
|
|
2
|
push @{$sp->{extra}}, $k; |
|
1
|
|
|
|
|
3
|
|
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} elsif ($link eq LT_ADJACENT_SIBLING) { |
100
|
4
|
|
|
|
|
5
|
push @{$sp->{extra_volatile}}, $k; |
|
4
|
|
|
|
|
13
|
|
101
|
|
|
|
|
|
|
} else { |
102
|
0
|
|
|
|
|
0
|
die "Internal error: unexpected selector combinator '$link'"; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
1730
|
100
|
|
|
|
4463
|
if (defined $cur_next) { |
107
|
33
|
|
|
|
|
83
|
$stack->[-1] = { |
108
|
|
|
|
|
|
|
cur => $cur, |
109
|
|
|
|
|
|
|
extra_bits => 0, |
110
|
|
|
|
|
|
|
}; |
111
|
33
|
|
|
|
|
90
|
$goop->{cur} = $cur_next; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
_guniq @ret |
116
|
1527
|
|
|
|
|
3530
|
} |
117
|
|
|
|
|
|
|
|
118
|
1493
|
50
|
|
1493
|
0
|
2923
|
method leave(@args) { |
|
1493
|
|
|
|
|
2229
|
|
|
1493
|
|
|
|
|
2683
|
|
|
1493
|
|
|
|
|
1868
|
|
119
|
1493
|
|
|
|
|
1942
|
my $dsp = pop @{$self->{doc_state}}; |
|
1493
|
|
|
|
|
2703
|
|
120
|
1493
|
100
|
|
|
|
3405
|
if (defined(my $marker = $dsp->{marker})) { |
121
|
3
|
|
|
|
|
10
|
splice @{$self->{slices}}, $marker; |
|
3
|
|
|
|
|
27
|
|
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
1493
|
|
|
|
|
2034
|
for my $slice (@{$self->{slices}}) { |
|
1493
|
|
|
|
|
2847
|
|
125
|
1703
|
|
|
|
|
2524
|
my $goop = $slice->[1]; |
126
|
1703
|
|
|
|
|
2624
|
my $stack = $goop->{stack}; |
127
|
1703
|
|
|
|
|
2303
|
my $sp_prev = pop @$stack; |
128
|
1703
|
100
|
|
|
|
4642
|
if (defined(my $cur = $sp_prev->{cur})) { |
129
|
33
|
|
|
|
|
73
|
$goop->{cur} = $cur; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
1493
|
|
|
|
|
2173
|
for my $cb (reverse @{$dsp->{on_leave}}) { |
|
1493
|
|
|
|
|
4605
|
|
134
|
8
|
|
|
|
|
27
|
$cb->(@args); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
8
|
50
|
|
8
|
0
|
24
|
method on_leave($callback) { |
|
8
|
50
|
|
|
|
44
|
|
|
8
|
|
|
|
|
14
|
|
|
8
|
|
|
|
|
28
|
|
|
8
|
|
|
|
|
12
|
|
139
|
8
|
|
|
|
|
11
|
push @{$self->{doc_state}[-1]{on_leave}}, $callback; |
|
8
|
|
|
|
|
37
|
|
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
6
|
50
|
|
6
|
0
|
17
|
method add_temp_rule(@temp_rules) { |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
11
|
|
143
|
6
|
|
|
|
|
11
|
my $slices = $self->{slices}; |
144
|
6
|
|
66
|
|
|
39
|
$self->{doc_state}[-1]{marker} //= @$slices; |
145
|
6
|
|
|
|
|
42
|
push @$slices, map [ $_, { cur => 0, stack => [{ extra_bits => 0 }] } ], @temp_rules; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
1 |