line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Search::Hit::BlastPullHit |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Sendu Bala |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Sendu Bala |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Search::Hit::BlastPullHit - A parser and hit object for BLASTN hits |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# generally we use Bio::SearchIO to build these objects |
21
|
|
|
|
|
|
|
use Bio::SearchIO; |
22
|
|
|
|
|
|
|
my $in = Bio::SearchIO->new(-format => 'blast_pull', |
23
|
|
|
|
|
|
|
-file => 'result.blast'); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
while (my $result = $in->next_result) { |
26
|
|
|
|
|
|
|
while (my $hit = $result->next_hit) { |
27
|
|
|
|
|
|
|
print $hit->name, "\n"; |
28
|
|
|
|
|
|
|
print $hit->score, "\n"; |
29
|
|
|
|
|
|
|
print $hit->significance, "\n"; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
while (my $hsp = $hit->next_hsp) { |
32
|
|
|
|
|
|
|
# process HSPI objects |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 DESCRIPTION |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
This object implements a parser for BLASTN hit output. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 FEEDBACK |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head2 Mailing Lists |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
46
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
47
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
50
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 Support |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
I |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
59
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
60
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
61
|
|
|
|
|
|
|
with code and data examples if at all possible. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 Reporting Bugs |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
66
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
67
|
|
|
|
|
|
|
web: |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 AUTHOR - Sendu Bala |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Email bix@sendu.me.uk |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Additional contributors names and emails here |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 APPENDIX |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
82
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# Let the code begin... |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
package Bio::Search::Hit::BlastPullHit; |
89
|
|
|
|
|
|
|
|
90
|
1
|
|
|
1
|
|
3
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
21
|
|
91
|
|
|
|
|
|
|
|
92
|
1
|
|
|
1
|
|
450
|
use Bio::Search::HSP::BlastPullHSP; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
93
|
|
|
|
|
|
|
|
94
|
1
|
|
|
1
|
|
4
|
use base qw(Bio::Root::Root Bio::Search::Hit::PullHitI); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
598
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head2 new |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Title : new |
99
|
|
|
|
|
|
|
Usage : my $obj = Bio::Search::Hit::BlastNHit->new(); |
100
|
|
|
|
|
|
|
Function: Builds a new Bio::Search::Hit::BlastNHit object. |
101
|
|
|
|
|
|
|
Returns : Bio::Search::Hit::BlastNHit |
102
|
|
|
|
|
|
|
Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent) |
103
|
|
|
|
|
|
|
-parent => Bio::PullParserI object (required if no -chunk) |
104
|
|
|
|
|
|
|
-hit_data => array ref with [name description score significance] |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
where the array ref provided to -chunk contains an IO object |
107
|
|
|
|
|
|
|
for a filehandle to something representing the raw data of the |
108
|
|
|
|
|
|
|
hit, and $start and $end define the tell() position within the |
109
|
|
|
|
|
|
|
filehandle that the hit data starts and ends (optional; defaults |
110
|
|
|
|
|
|
|
to start and end of the entire thing described by the filehandle) |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub new { |
115
|
39
|
|
|
39
|
1
|
80
|
my ($class, @args) = @_; |
116
|
39
|
|
|
|
|
113
|
my $self = $class->SUPER::new(@args); |
117
|
|
|
|
|
|
|
|
118
|
39
|
|
|
|
|
107
|
$self->_setup(@args); |
119
|
|
|
|
|
|
|
|
120
|
39
|
|
|
|
|
65
|
my $fields = $self->_fields; |
121
|
39
|
|
|
|
|
73
|
foreach my $field (qw( header start_end )) { |
122
|
78
|
|
|
|
|
113
|
$fields->{$field} = undef; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
39
|
|
|
|
|
59
|
my $hit_data = $self->_raw_hit_data; |
126
|
39
|
50
|
33
|
|
|
166
|
if ($hit_data && ref($hit_data) eq 'ARRAY') { |
127
|
39
|
|
|
|
|
47
|
foreach my $field (qw(name description score significance)) { |
128
|
156
|
|
|
|
|
100
|
$fields->{$field} = shift(@{$hit_data}); |
|
156
|
|
|
|
|
279
|
|
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
39
|
|
|
|
|
228
|
$self->_dependencies( { ( name => 'header', |
133
|
|
|
|
|
|
|
length => 'header', |
134
|
|
|
|
|
|
|
description => 'header', |
135
|
|
|
|
|
|
|
accession => 'header', |
136
|
|
|
|
|
|
|
next_hsp => 'header', |
137
|
|
|
|
|
|
|
query_start => 'start_end', |
138
|
|
|
|
|
|
|
query_end => 'start_end', |
139
|
|
|
|
|
|
|
hit_start => 'start_end', |
140
|
|
|
|
|
|
|
hit_end => 'start_end' ) } ); |
141
|
|
|
|
|
|
|
|
142
|
39
|
|
|
|
|
108
|
return $self; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# |
146
|
|
|
|
|
|
|
# PullParserI discovery methods so we can answer all HitI questions |
147
|
|
|
|
|
|
|
# |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub _discover_header { |
150
|
20
|
|
|
20
|
|
25
|
my $self = shift; |
151
|
20
|
|
|
|
|
53
|
$self->_chunk_seek(0); |
152
|
20
|
|
|
|
|
55
|
my $header = $self->_get_chunk_by_end("\n Score = "); |
153
|
|
|
|
|
|
|
|
154
|
20
|
100
|
|
|
|
52
|
unless ($header) { |
155
|
|
|
|
|
|
|
# no alignment or other data; all information was in the hit table of |
156
|
|
|
|
|
|
|
# the result |
157
|
3
|
|
|
|
|
9
|
$self->_calculate_accession_from_name; |
158
|
|
|
|
|
|
|
|
159
|
3
|
|
|
|
|
4
|
$self->_fields->{header} = 1; |
160
|
3
|
|
|
|
|
4
|
return; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
17
|
|
|
|
|
39
|
$self->{_after_header} = $self->_chunk_tell; |
164
|
|
|
|
|
|
|
|
165
|
17
|
|
|
|
|
275
|
($self->_fields->{name}, $self->_fields->{description}, $self->_fields->{length}) = $header =~ /^(\S+)\s+(\S.+?)?\s+Length\s*=\s*(\d+)/sm; |
166
|
17
|
100
|
|
|
|
42
|
if ($self->_fields->{description}) { |
167
|
15
|
|
|
|
|
28
|
$self->_fields->{description} =~ s/\n//g; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
else { |
170
|
2
|
|
|
|
|
5
|
$self->_fields->{description} = ''; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
17
|
|
|
|
|
47
|
$self->_calculate_accession_from_name; |
174
|
|
|
|
|
|
|
|
175
|
17
|
|
|
|
|
34
|
$self->_fields->{header} = 1; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub _calculate_accession_from_name { |
179
|
20
|
|
|
20
|
|
22
|
my $self = shift; |
180
|
20
|
|
|
|
|
44
|
my $name = $self->get_field('name'); |
181
|
20
|
100
|
|
|
|
143
|
if ($name =~ /.+?\|.+?\|.+?\|(\w+)/) { |
|
|
100
|
|
|
|
|
|
182
|
9
|
|
|
|
|
16
|
$self->_fields->{accession} = $1; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
elsif ($self->_fields->{name} =~ /.+?\|(\w+)?\./) { |
185
|
|
|
|
|
|
|
# old form? |
186
|
6
|
|
|
|
|
14
|
$self->_fields->{accession} = $1; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
else { |
189
|
5
|
|
|
|
|
11
|
$self->_fields->{accession} = $name; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub _discover_start_end { |
194
|
4
|
|
|
4
|
|
5
|
my $self = shift; |
195
|
|
|
|
|
|
|
|
196
|
4
|
|
|
|
|
4
|
my ($q_start, $q_end, $h_start, $h_end); |
197
|
4
|
|
|
|
|
8
|
foreach my $hsp ($self->hsps) { |
198
|
9
|
|
|
|
|
26
|
my ($this_q_start, $this_h_start) = $hsp->start; |
199
|
9
|
|
|
|
|
23
|
my ($this_q_end, $this_h_end) = $hsp->end; |
200
|
|
|
|
|
|
|
|
201
|
9
|
100
|
100
|
|
|
32
|
if (! defined $q_start || $this_q_start < $q_start) { |
202
|
6
|
|
|
|
|
8
|
$q_start = $this_q_start; |
203
|
|
|
|
|
|
|
} |
204
|
9
|
100
|
100
|
|
|
30
|
if (! defined $h_start || $this_h_start < $h_start) { |
205
|
6
|
|
|
|
|
9
|
$h_start = $this_h_start; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
9
|
100
|
100
|
|
|
30
|
if (! defined $q_end || $this_q_end > $q_end) { |
209
|
7
|
|
|
|
|
8
|
$q_end = $this_q_end; |
210
|
|
|
|
|
|
|
} |
211
|
9
|
100
|
100
|
|
|
29
|
if (! defined $h_end || $this_h_end > $h_end) { |
212
|
5
|
|
|
|
|
9
|
$h_end = $this_h_end; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
4
|
|
|
|
|
11
|
$self->_fields->{query_start} = $q_start; |
217
|
4
|
|
|
|
|
12
|
$self->_fields->{query_end} = $q_end; |
218
|
4
|
|
|
|
|
9
|
$self->_fields->{hit_start} = $h_start; |
219
|
4
|
|
|
|
|
8
|
$self->_fields->{hit_end} = $h_end; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub _discover_next_hsp { |
223
|
177
|
|
|
177
|
|
157
|
my $self = shift; |
224
|
177
|
|
66
|
|
|
349
|
my $pos = $self->{_end_of_previous_hsp} || $self->{_after_header}; |
225
|
177
|
50
|
|
|
|
331
|
return unless $pos; |
226
|
177
|
|
|
|
|
285
|
$self->_chunk_seek($pos); |
227
|
|
|
|
|
|
|
|
228
|
177
|
|
|
|
|
391
|
my ($start, $end) = $self->_find_chunk_by_end("\n Score = "); |
229
|
177
|
100
|
66
|
|
|
571
|
if ((defined $end && ($end + $self->_chunk_true_start) > $self->_chunk_true_end) || ! $end) { |
|
|
|
66
|
|
|
|
|
230
|
45
|
|
66
|
|
|
102
|
$start = $self->{_end_of_previous_hsp} || $self->{_after_header}; |
231
|
45
|
|
|
|
|
89
|
$end = $self->_chunk_true_end; |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
else { |
234
|
132
|
|
|
|
|
190
|
$end += $self->_chunk_true_start; |
235
|
|
|
|
|
|
|
} |
236
|
177
|
|
|
|
|
228
|
$start += $self->_chunk_true_start; |
237
|
|
|
|
|
|
|
|
238
|
177
|
100
|
|
|
|
233
|
return if $start >= $self->_chunk_true_end; |
239
|
|
|
|
|
|
|
|
240
|
145
|
|
|
|
|
184
|
$self->{_end_of_previous_hsp} = $end - $self->_chunk_true_start; |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
#*** needs to inherit piped_behaviour, and we need to deal with _sequential |
243
|
|
|
|
|
|
|
# ourselves |
244
|
145
|
|
|
|
|
316
|
$self->_fields->{next_hsp} = Bio::Search::HSP::BlastPullHSP->new(-parent => $self, |
245
|
|
|
|
|
|
|
-chunk => [$self->chunk, $start, $end]); |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub _discover_num_hsps { |
249
|
4
|
|
|
4
|
|
6
|
my $self = shift; |
250
|
4
|
|
|
|
|
12
|
$self->_fields->{num_hsps} = $self->hsps; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=head2 next_hsp |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
Title : next_hsp |
256
|
|
|
|
|
|
|
Usage : while( $hsp = $obj->next_hsp()) { ... } |
257
|
|
|
|
|
|
|
Function : Returns the next available High Scoring Pair |
258
|
|
|
|
|
|
|
Example : |
259
|
|
|
|
|
|
|
Returns : L object or null if finished |
260
|
|
|
|
|
|
|
Args : none |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=cut |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub next_hsp { |
265
|
177
|
|
|
177
|
1
|
158
|
my $self = shift; |
266
|
177
|
|
|
|
|
365
|
my $hsp = $self->get_field('next_hsp'); |
267
|
177
|
|
|
|
|
267
|
undef $self->_fields->{next_hsp}; |
268
|
177
|
|
|
|
|
423
|
return $hsp; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head2 hsps |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
Usage : $hit_object->hsps(); |
274
|
|
|
|
|
|
|
Purpose : Get a list containing all HSP objects. |
275
|
|
|
|
|
|
|
Example : @hsps = $hit_object->hsps(); |
276
|
|
|
|
|
|
|
Returns : list of L objects. |
277
|
|
|
|
|
|
|
Argument : none |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub hsps { |
282
|
28
|
|
|
28
|
1
|
51
|
my $self = shift; |
283
|
28
|
|
|
|
|
41
|
my $old = $self->{_end_of_previous_hsp}; |
284
|
28
|
|
|
|
|
77
|
$self->rewind; |
285
|
28
|
|
|
|
|
29
|
my @hsps; |
286
|
28
|
|
|
|
|
74
|
while (defined(my $hsp = $self->next_hsp)) { |
287
|
140
|
|
|
|
|
296
|
push(@hsps, $hsp); |
288
|
|
|
|
|
|
|
} |
289
|
28
|
|
|
|
|
44
|
$self->{_end_of_previous_hsp} = $old; |
290
|
28
|
|
|
|
|
107
|
return @hsps; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head2 hsp |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
Usage : $hit_object->hsp( [string] ); |
296
|
|
|
|
|
|
|
Purpose : Get a single HSPI object for the present HitI object. |
297
|
|
|
|
|
|
|
Example : $hspObj = $hit_object->hsp; # same as 'best' |
298
|
|
|
|
|
|
|
: $hspObj = $hit_object->hsp('best'); |
299
|
|
|
|
|
|
|
: $hspObj = $hit_object->hsp('worst'); |
300
|
|
|
|
|
|
|
Returns : Object reference for a L object. |
301
|
|
|
|
|
|
|
Argument : String (or no argument). |
302
|
|
|
|
|
|
|
: No argument (default) = highest scoring HSP (same as 'best'). |
303
|
|
|
|
|
|
|
: 'best' = highest scoring HSP. |
304
|
|
|
|
|
|
|
: 'worst' = lowest scoring HSP. |
305
|
|
|
|
|
|
|
Throws : Exception if an unrecognized argument is used. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
See Also : L, L() |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=cut |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub hsp { |
312
|
0
|
|
|
0
|
1
|
0
|
my ($self, $type) = @_; |
313
|
0
|
|
0
|
|
|
0
|
$type ||= 'best'; |
314
|
0
|
|
|
|
|
0
|
$self->throw_not_implemented; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=head2 rewind |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
Title : rewind |
320
|
|
|
|
|
|
|
Usage : $result->rewind; |
321
|
|
|
|
|
|
|
Function: Allow one to reset the HSP iterator to the beginning, so that |
322
|
|
|
|
|
|
|
next_hsp() will subsequently return the first hsp and so on. |
323
|
|
|
|
|
|
|
Returns : n/a |
324
|
|
|
|
|
|
|
Args : none |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=cut |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
sub rewind { |
329
|
28
|
|
|
28
|
1
|
31
|
my $self = shift; |
330
|
28
|
|
|
|
|
59
|
delete $self->{_end_of_previous_hsp}; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# have p() a synonym of significance() |
334
|
|
|
|
|
|
|
sub p { |
335
|
0
|
|
|
0
|
1
|
|
return shift->significance; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
1; |