line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# This code can be redistributed and modified under the terms of the GNU |
2
|
|
|
|
|
|
|
# General Public License as published by the Free Software Foundation, either |
3
|
|
|
|
|
|
|
# version 3 of the License, or (at your option) any later version. |
4
|
|
|
|
|
|
|
# See the "COPYING" file for details. |
5
|
|
|
|
|
|
|
package HTML::Blitz::Matcher; |
6
|
11
|
|
|
11
|
|
95
|
use HTML::Blitz::pragma; |
|
11
|
|
|
|
|
25
|
|
|
11
|
|
|
|
|
68
|
|
7
|
11
|
|
|
|
|
724
|
use HTML::Blitz::SelectorType qw( |
8
|
|
|
|
|
|
|
LT_DESCENDANT |
9
|
|
|
|
|
|
|
LT_CHILD |
10
|
|
|
|
|
|
|
LT_SIBLING |
11
|
|
|
|
|
|
|
LT_ADJACENT_SIBLING |
12
|
11
|
|
|
11
|
|
9016
|
); |
|
11
|
|
|
|
|
29
|
|
13
|
11
|
|
|
11
|
|
84
|
use Scalar::Util (); |
|
11
|
|
|
|
|
24
|
|
|
11
|
|
|
|
|
424
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
use constant { |
16
|
11
|
|
|
|
|
1843
|
INTBITS => length(sprintf '%b', ~0), |
17
|
11
|
|
|
11
|
|
59
|
}; |
|
11
|
|
|
|
|
22
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = '0.09'; |
20
|
|
|
|
|
|
|
|
21
|
276
|
50
|
|
276
|
0
|
644
|
method new($class: $rules) { |
|
276
|
50
|
|
|
|
574
|
|
|
276
|
|
|
|
|
431
|
|
|
276
|
|
|
|
|
466
|
|
|
276
|
|
|
|
|
350
|
|
22
|
276
|
|
|
|
|
2191
|
bless { |
23
|
|
|
|
|
|
|
slices => [ |
24
|
|
|
|
|
|
|
map [ $_, { cur => 0, stack => [{ extra_bits => 0 }] } ], @$rules |
25
|
|
|
|
|
|
|
], |
26
|
|
|
|
|
|
|
doc_state => [ |
27
|
|
|
|
|
|
|
{ |
28
|
|
|
|
|
|
|
nth_child => 0, |
29
|
|
|
|
|
|
|
nth_child_of_type => {}, |
30
|
|
|
|
|
|
|
on_leave => [], |
31
|
|
|
|
|
|
|
}, |
32
|
|
|
|
|
|
|
], |
33
|
|
|
|
|
|
|
}, $class |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
1546
|
|
|
1546
|
|
2613
|
fun _guniq(@values) { |
|
1546
|
|
|
|
|
1933
|
|
37
|
1546
|
|
|
|
|
2313
|
my ($seen_undef, %seen_ref, %seen_str); |
38
|
|
|
|
|
|
|
grep |
39
|
|
|
|
|
|
|
!( |
40
|
|
|
|
|
|
|
ref($_) ? $seen_ref{Scalar::Util::refaddr $_} : |
41
|
1546
|
0
|
|
|
|
6996
|
defined($_) ? $seen_str{$_} : |
|
|
50
|
|
|
|
|
|
42
|
|
|
|
|
|
|
$seen_undef |
43
|
|
|
|
|
|
|
)++, |
44
|
|
|
|
|
|
|
@values |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
1546
|
50
|
|
1546
|
0
|
3112
|
method enter($tag, $attributes) { |
|
1546
|
50
|
|
|
|
2791
|
|
|
1546
|
|
|
|
|
2210
|
|
|
1546
|
|
|
|
|
2865
|
|
|
1546
|
|
|
|
|
1918
|
|
48
|
1546
|
|
|
|
|
2423
|
my $doc_state = $self->{doc_state}; |
49
|
1546
|
|
|
|
|
2271
|
my $dsp = $doc_state->[-1]; |
50
|
1546
|
|
|
|
|
2364
|
my $nth_child = ++$dsp->{nth_child}; |
51
|
1546
|
|
|
|
|
3387
|
my $nth_child_of_type = ++$dsp->{nth_child_of_type}{$tag}; |
52
|
1546
|
|
|
|
|
4816
|
push @$doc_state, { |
53
|
|
|
|
|
|
|
nth_child => 0, |
54
|
|
|
|
|
|
|
nth_child_of_type => {}, |
55
|
|
|
|
|
|
|
on_leave => [], |
56
|
|
|
|
|
|
|
}; |
57
|
|
|
|
|
|
|
|
58
|
1546
|
|
|
|
|
2455
|
my @ret; |
59
|
|
|
|
|
|
|
|
60
|
1546
|
|
|
|
|
2021
|
for my $slice (@{$self->{slices}}) { |
|
1546
|
|
|
|
|
3077
|
|
61
|
1771
|
|
|
|
|
3243
|
my ($glass, $goop) = @$slice; |
62
|
1771
|
|
|
|
|
2540
|
my $cur = $goop->{cur}; |
63
|
1771
|
|
|
|
|
2490
|
my $stack = $goop->{stack}; |
64
|
1771
|
|
|
|
|
2370
|
my $sp = $stack->[-1]; |
65
|
1771
|
|
|
|
|
2570
|
my $extra_volatile = $sp->{extra_volatile}; |
66
|
1771
|
|
|
|
|
2772
|
$sp->{extra_volatile} = []; |
67
|
|
|
|
|
|
|
|
68
|
1771
|
|
|
|
|
3872
|
push @$stack, my $sp_next = { |
69
|
|
|
|
|
|
|
extra_bits => 0, |
70
|
|
|
|
|
|
|
}; |
71
|
1771
|
|
|
|
|
2468
|
my $cur_next; |
72
|
|
|
|
|
|
|
|
73
|
1771
|
|
|
|
|
2229
|
for my $i ($cur, @{$sp->{extra}}, @$extra_volatile) { |
|
1771
|
|
|
|
|
3548
|
|
74
|
1806
|
|
|
|
|
2640
|
my $sss = $glass->[$i]; |
75
|
1806
|
100
|
|
|
|
4642
|
$sss->matches($tag, $attributes, $nth_child, $nth_child_of_type) |
76
|
|
|
|
|
|
|
or next; |
77
|
|
|
|
|
|
|
|
78
|
521
|
|
|
|
|
1180
|
my $link = $sss->link_type; |
79
|
521
|
|
|
|
|
831
|
my $k = $i + 1; |
80
|
521
|
|
|
|
|
798
|
my $bit_shift = $k - $cur - 1; |
81
|
521
|
50
|
|
|
|
996
|
$bit_shift < INTBITS |
82
|
|
|
|
|
|
|
or die "Internal error: Too many combinators in a single selector (" . ($bit_shift + 1) . " exceeds limit of " . INTBITS . ")"; |
83
|
521
|
|
|
|
|
768
|
my $bit = 1 << $bit_shift; |
84
|
|
|
|
|
|
|
|
85
|
521
|
100
|
|
|
|
1001
|
if (!defined $link) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
86
|
439
|
|
|
|
|
929
|
push @ret, $glass->[$k]; |
87
|
|
|
|
|
|
|
} elsif ($link eq LT_DESCENDANT) { |
88
|
33
|
|
|
|
|
51
|
$cur_next = $k; |
89
|
|
|
|
|
|
|
} elsif ($link eq LT_CHILD) { |
90
|
44
|
50
|
|
|
|
89
|
if (!($sp_next->{extra_bits} & $bit)) { |
91
|
44
|
|
|
|
|
61
|
$sp_next->{extra_bits} |= $bit; |
92
|
44
|
|
|
|
|
53
|
push @{$sp_next->{extra}}, $k; |
|
44
|
|
|
|
|
101
|
|
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
} elsif ($link eq LT_SIBLING) { |
95
|
1
|
50
|
|
|
|
4
|
if (!($sp->{extra_bits} & $bit)) { |
96
|
1
|
|
|
|
|
3
|
$sp->{extra_bits} |= $bit; |
97
|
1
|
|
|
|
|
2
|
push @{$sp->{extra}}, $k; |
|
1
|
|
|
|
|
4
|
|
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} elsif ($link eq LT_ADJACENT_SIBLING) { |
100
|
4
|
|
|
|
|
6
|
push @{$sp->{extra_volatile}}, $k; |
|
4
|
|
|
|
|
9
|
|
101
|
|
|
|
|
|
|
} else { |
102
|
0
|
|
|
|
|
0
|
die "Internal error: unexpected selector combinator '$link'"; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
1771
|
100
|
|
|
|
4550
|
if (defined $cur_next) { |
107
|
33
|
|
|
|
|
77
|
$stack->[-1] = { |
108
|
|
|
|
|
|
|
cur => $cur, |
109
|
|
|
|
|
|
|
extra_bits => 0, |
110
|
|
|
|
|
|
|
}; |
111
|
33
|
|
|
|
|
90
|
$goop->{cur} = $cur_next; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
_guniq @ret |
116
|
1546
|
|
|
|
|
3226
|
} |
117
|
|
|
|
|
|
|
|
118
|
1508
|
50
|
|
1508
|
0
|
2911
|
method leave(@args) { |
|
1508
|
|
|
|
|
2494
|
|
|
1508
|
|
|
|
|
2699
|
|
|
1508
|
|
|
|
|
1828
|
|
119
|
1508
|
|
|
|
|
1906
|
my $dsp = pop @{$self->{doc_state}}; |
|
1508
|
|
|
|
|
2723
|
|
120
|
1508
|
100
|
|
|
|
3399
|
if (defined(my $marker = $dsp->{marker})) { |
121
|
4
|
|
|
|
|
10
|
splice @{$self->{slices}}, $marker; |
|
4
|
|
|
|
|
15
|
|
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
1508
|
|
|
|
|
1915
|
for my $slice (@{$self->{slices}}) { |
|
1508
|
|
|
|
|
2976
|
|
125
|
1736
|
|
|
|
|
2587
|
my $goop = $slice->[1]; |
126
|
1736
|
|
|
|
|
2491
|
my $stack = $goop->{stack}; |
127
|
1736
|
|
|
|
|
2526
|
my $sp_prev = pop @$stack; |
128
|
1736
|
100
|
|
|
|
4716
|
if (defined(my $cur = $sp_prev->{cur})) { |
129
|
33
|
|
|
|
|
83
|
$goop->{cur} = $cur; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
1508
|
|
|
|
|
2200
|
for my $cb (reverse @{$dsp->{on_leave}}) { |
|
1508
|
|
|
|
|
4528
|
|
134
|
9
|
|
|
|
|
25
|
$cb->(@args); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
9
|
50
|
|
9
|
0
|
23
|
method on_leave($callback) { |
|
9
|
50
|
|
|
|
33
|
|
|
9
|
|
|
|
|
16
|
|
|
9
|
|
|
|
|
19
|
|
|
9
|
|
|
|
|
11
|
|
139
|
9
|
|
|
|
|
14
|
push @{$self->{doc_state}[-1]{on_leave}}, $callback; |
|
9
|
|
|
|
|
36
|
|
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
7
|
50
|
|
7
|
0
|
27
|
method add_temp_rule(@temp_rules) { |
|
7
|
|
|
|
|
15
|
|
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
9
|
|
143
|
7
|
|
|
|
|
12
|
my $slices = $self->{slices}; |
144
|
7
|
|
66
|
|
|
31
|
$self->{doc_state}[-1]{marker} //= @$slices; |
145
|
7
|
|
|
|
|
47
|
push @$slices, map [ $_, { cur => 0, stack => [{ extra_bits => 0 }] } ], @temp_rules; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
1 |