line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::ListScraper::Sweep; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
20
|
use warnings; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
162
|
|
4
|
4
|
|
|
4
|
|
19
|
use strict; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
82
|
|
5
|
|
|
|
|
|
|
|
6
|
4
|
|
|
4
|
|
3514
|
use Algorithm::NeedlemanWunsch; |
|
4
|
|
|
|
|
14772
|
|
|
4
|
|
|
|
|
130
|
|
7
|
4
|
|
|
4
|
|
2201
|
use HTML::ListScraper::Dust; |
|
4
|
|
|
|
|
14
|
|
|
4
|
|
|
|
|
4899
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $match_score = 2; |
10
|
|
|
|
|
|
|
my $mismatch_score = -4; |
11
|
|
|
|
|
|
|
my $gap_open_penalty = -2; |
12
|
|
|
|
|
|
|
my $gap_extend_penalty = -1; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub _score_sub { |
15
|
125297
|
|
|
125297
|
|
5643270
|
my ($a, $b) = @_; |
16
|
|
|
|
|
|
|
|
17
|
125297
|
100
|
|
|
|
541213
|
return ($a eq $b) ? $match_score : $mismatch_score; |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub new { |
21
|
57
|
|
|
57
|
0
|
107
|
my $class = shift; |
22
|
57
|
|
|
|
|
307
|
my $self = { @_ }; |
23
|
|
|
|
|
|
|
|
24
|
57
|
|
|
|
|
271
|
my $iseq = $self->{book}->get_internal_sequence; |
25
|
57
|
|
|
|
|
116
|
my $first = $self->{begin}; |
26
|
57
|
|
|
|
|
141
|
my $last = $self->{end} - 1; |
27
|
|
|
|
|
|
|
# warn "sweeping $first .. $last\n"; |
28
|
57
|
|
|
|
|
2422
|
my @seq = @$iseq[$first .. $last]; |
29
|
57
|
|
|
|
|
339
|
$self->{haystack} = \@seq; |
30
|
|
|
|
|
|
|
|
31
|
57
|
|
|
|
|
654
|
my @sign = split //, $self->{sign}; |
32
|
57
|
50
|
|
|
|
206
|
if (scalar(@sign) < 2) { |
33
|
0
|
|
|
|
|
0
|
die "sequence signature too short"; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
57
|
|
|
|
|
142
|
$self->{needle} = \@sign; |
37
|
|
|
|
|
|
|
|
38
|
57
|
|
|
|
|
133
|
bless $self, $class; |
39
|
|
|
|
|
|
|
|
40
|
57
|
|
|
|
|
182
|
return $self; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub create_dust { |
44
|
57
|
|
|
57
|
0
|
99
|
my $self = shift; |
45
|
|
|
|
|
|
|
|
46
|
57
|
|
|
|
|
283
|
$self->{dust} = HTML::ListScraper::Dust->new(); |
47
|
|
|
|
|
|
|
|
48
|
57
|
100
|
|
|
|
91
|
if (scalar(@{$self->{haystack}}) > 2) { |
|
57
|
|
|
|
|
209
|
|
49
|
53
|
|
|
|
|
272
|
my $matcher = Algorithm::NeedlemanWunsch->new(\&_score_sub); |
50
|
53
|
|
|
|
|
652
|
$matcher->gap_open_penalty($gap_open_penalty); |
51
|
53
|
|
|
|
|
489
|
$matcher->gap_extend_penalty($gap_extend_penalty); |
52
|
53
|
|
|
|
|
484
|
$matcher->local(1); |
53
|
|
|
|
|
|
|
|
54
|
53
|
|
|
|
|
457
|
$self->{found} = [ ]; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
my $score = $matcher->align($self->{haystack}, |
57
|
|
|
|
|
|
|
$self->{needle}, |
58
|
53
|
|
|
6363
|
|
482
|
{ select_align => sub { $self->_on_align($_[0]); } }); |
|
6363
|
|
|
|
|
340797
|
|
59
|
|
|
|
|
|
|
|
60
|
53
|
|
|
|
|
16001
|
my $found_count = scalar(@{$self->{found}}); |
|
53
|
|
|
|
|
192
|
|
61
|
53
|
50
|
|
|
|
219
|
if ($found_count >= 2) { |
62
|
53
|
|
|
|
|
191
|
my @round = $self->_make_presentable; |
63
|
53
|
100
|
100
|
|
|
300
|
if ((scalar(@round) >= 2) && |
64
|
51
|
|
|
|
|
622
|
(scalar(@round) > (scalar(@{$self->{needle}}) / 2))) { |
65
|
29
|
100
|
|
|
|
101
|
if (scalar(@round) < $found_count) { |
66
|
4
|
|
|
|
|
11
|
$score = undef; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
29
|
|
|
|
|
68
|
my $begin = $self->{begin}; |
70
|
29
|
|
|
|
|
59
|
my $end = $round[0]; |
71
|
29
|
100
|
|
|
|
89
|
if ($begin < $end) { |
72
|
|
|
|
|
|
|
my $sweep = HTML::ListScraper::Sweep->new( |
73
|
|
|
|
|
|
|
book => $self->{book}, sign => $self->{sign}, |
74
|
25
|
|
|
|
|
169
|
begin => $begin, end => $end); |
75
|
25
|
|
|
|
|
122
|
my $dust = $sweep->create_dust; |
76
|
25
|
|
|
|
|
143
|
my $before = $dust->get_alignments; |
77
|
25
|
|
|
|
|
115
|
$self->{dust}->add_alignments_before($before); |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
29
|
|
|
|
|
125
|
$self->{dust}->add_alignment($score, \@round); |
81
|
|
|
|
|
|
|
|
82
|
29
|
|
|
|
|
911
|
$begin = $round[-1] + 1; |
83
|
29
|
|
|
|
|
69
|
$end = $self->{end}; |
84
|
29
|
100
|
|
|
|
241
|
if ($begin < $end) { |
85
|
|
|
|
|
|
|
my $sweep = HTML::ListScraper::Sweep->new( |
86
|
|
|
|
|
|
|
book => $self->{book}, sign => $self->{sign}, |
87
|
16
|
|
|
|
|
80
|
begin => $begin, end => $end); |
88
|
16
|
|
|
|
|
54
|
my $dust = $sweep->create_dust; |
89
|
16
|
|
|
|
|
66
|
my $after = $dust->get_alignments; |
90
|
16
|
|
|
|
|
74
|
$self->{dust}->add_alignments_after($after); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
57
|
|
|
|
|
234
|
return $self->{dust}; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub _on_align { |
100
|
6363
|
|
|
6363
|
|
9993
|
my ($self, $arg) = @_; |
101
|
|
|
|
|
|
|
|
102
|
6363
|
100
|
|
|
|
15483
|
if (exists($arg->{align})) { |
103
|
683
|
|
|
|
|
928
|
my ($i, $j) = @{$arg->{align}}; |
|
683
|
|
|
|
|
1343
|
|
104
|
|
|
|
|
|
|
|
105
|
683
|
100
|
|
|
|
2058
|
if ($self->{haystack}->[$i] eq $self->{needle}->[$j]) { |
106
|
677
|
|
|
|
|
977
|
unshift @{$self->{found}}, $self->{begin} + $i; |
|
677
|
|
|
|
|
1630
|
|
107
|
677
|
|
|
|
|
2254
|
return 'align'; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
5686
|
|
|
|
|
9963
|
foreach (qw(shift_a shift_b)) { |
112
|
6132
|
100
|
|
|
|
15080
|
if (exists($arg->{$_})) { |
113
|
5686
|
|
|
|
|
17142
|
return $_; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
0
|
return 'align'; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub _make_presentable { |
121
|
53
|
|
|
53
|
|
119
|
my $self = shift; |
122
|
|
|
|
|
|
|
|
123
|
53
|
50
|
|
|
|
318
|
if ($self->{book}->shapeless) { |
124
|
0
|
|
|
|
|
0
|
return @{$self->{found}}; |
|
0
|
|
|
|
|
0
|
|
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
53
|
|
|
|
|
102
|
my %core; |
128
|
|
|
|
|
|
|
my @stack; |
129
|
53
|
|
|
|
|
98
|
foreach (@{$self->{found}}) { |
|
53
|
|
|
|
|
165
|
|
130
|
677
|
|
|
|
|
2283
|
my $cur_tag = $self->{book}->get_tag($_); |
131
|
677
|
|
|
|
|
16758
|
my $name = $cur_tag->name; |
132
|
677
|
|
|
|
|
5985
|
my $stem = $name; |
133
|
677
|
|
|
|
|
2139
|
$stem =~ s~^\/~~; |
134
|
|
|
|
|
|
|
|
135
|
677
|
100
|
|
|
|
2044
|
if ($name eq $stem) { |
136
|
338
|
|
|
|
|
1037
|
push @stack, $cur_tag; |
137
|
|
|
|
|
|
|
} else { |
138
|
339
|
|
|
|
|
574
|
my $skip = 0; |
139
|
339
|
|
100
|
|
|
8241
|
while (!$skip && scalar(@stack) && |
|
|
|
100
|
|
|
|
|
140
|
|
|
|
|
|
|
($stack[scalar(@stack) - 1]->name ne $stem)) { |
141
|
47
|
|
|
|
|
518
|
my $top_tag = $stack[scalar(@stack) - 1]; |
142
|
47
|
100
|
|
|
|
1052
|
if ($self->{book}->is_unclosed_tag($top_tag->name)) { |
143
|
27
|
|
|
|
|
630
|
$core{$top_tag->index} = 1; |
144
|
27
|
|
|
|
|
849
|
pop @stack; |
145
|
|
|
|
|
|
|
} else { |
146
|
20
|
|
|
|
|
80
|
$skip = 1; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
339
|
100
|
100
|
|
|
4183
|
if (!$skip && scalar(@stack)) { |
151
|
285
|
|
|
|
|
512
|
my $top_tag = pop @stack; |
152
|
285
|
|
|
|
|
6640
|
$core{$top_tag->index} = 1; |
153
|
285
|
|
|
|
|
3212
|
$core{$_} = 1; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
53
|
|
|
|
|
190
|
while (scalar(@stack)) { |
159
|
26
|
|
|
|
|
112
|
my $top_tag = pop @stack; |
160
|
26
|
100
|
|
|
|
590
|
if ($self->{book}->is_unclosed_tag($top_tag->name)) { |
161
|
8
|
|
|
|
|
180
|
$core{$top_tag->index} = 1; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
53
|
|
|
|
|
401
|
return sort { $a <=> $b; } keys %core; |
|
1667
|
|
|
|
|
2761
|
|
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
1; |