line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Search::HSP::GenericHSP |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Jason Stajich |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Search::HSP::GenericHSP - A "Generic" implementation of a High Scoring Pair |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $hsp = Bio::Search::HSP::GenericHSP->new( -algorithm => 'blastp', |
21
|
|
|
|
|
|
|
-evalue => '1e-30', |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
$r_type = $hsp->algorithm; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$pvalue = $hsp->p(); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
$evalue = $hsp->evalue(); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
$frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] ); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] ); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
$gaps = $hsp->gaps( ['query'|'hit'|'total'] ); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$qseq = $hsp->query_string; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
$hseq = $hsp->hit_string; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$homo_string = $hsp->homology_string; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
$len = $hsp->length( ['query'|'hit'|'total'] ); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
$len = $hsp->length( ['query'|'hit'|'total'] ); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
$rank = $hsp->rank; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# TODO: Describe how to configure a SearchIO stream so that it generates |
49
|
|
|
|
|
|
|
# GenericHSP objects. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 DESCRIPTION |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
This implementation is "Generic", meaning it is is suitable for |
54
|
|
|
|
|
|
|
holding information about High Scoring pairs from most Search reports |
55
|
|
|
|
|
|
|
such as BLAST and FastA. Specialized objects can be derived from |
56
|
|
|
|
|
|
|
this. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Unless you're writing a parser, you won't ever need to create a |
59
|
|
|
|
|
|
|
GenericHSP or any other HSPI-implementing object. If you use |
60
|
|
|
|
|
|
|
the SearchIO system, HSPI objects are created automatically from |
61
|
|
|
|
|
|
|
a SearchIO stream which returns Bio::Search::Result::ResultI objects |
62
|
|
|
|
|
|
|
and you get the HSPI objects via the ResultI API. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
For documentation on what you can do with GenericHSP (and other HSPI |
65
|
|
|
|
|
|
|
objects), please see the API documentation in |
66
|
|
|
|
|
|
|
L. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head1 FEEDBACK |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head2 Mailing Lists |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
73
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
74
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
77
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head2 Support |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
I |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
86
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
87
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
88
|
|
|
|
|
|
|
with code and data examples if at all possible. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head2 Reporting Bugs |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
93
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
94
|
|
|
|
|
|
|
web: |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich and Steve Chervitz |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
Email jason-at-bioperl.org |
101
|
|
|
|
|
|
|
Email sac-at-bioperl.org |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Sendu Bala, bix@sendu.me.uk |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 APPENDIX |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
110
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=cut |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# Let the code begin... |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
package Bio::Search::HSP::GenericHSP; |
117
|
27
|
|
|
27
|
|
168
|
use strict; |
|
27
|
|
|
|
|
48
|
|
|
27
|
|
|
|
|
768
|
|
118
|
|
|
|
|
|
|
|
119
|
27
|
|
|
27
|
|
126
|
use Bio::Root::Root; |
|
27
|
|
|
|
|
48
|
|
|
27
|
|
|
|
|
632
|
|
120
|
27
|
|
|
27
|
|
9189
|
use Bio::SeqFeature::Similarity; |
|
27
|
|
|
|
|
70
|
|
|
27
|
|
|
|
|
937
|
|
121
|
|
|
|
|
|
|
|
122
|
27
|
|
|
27
|
|
196
|
use base qw(Bio::Search::HSP::HSPI); |
|
27
|
|
|
|
|
55
|
|
|
27
|
|
|
|
|
12341
|
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 new |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Title : new |
127
|
|
|
|
|
|
|
Usage : my $obj = Bio::Search::HSP::GenericHSP->new(); |
128
|
|
|
|
|
|
|
Function: Builds a new Bio::Search::HSP::GenericHSP object |
129
|
|
|
|
|
|
|
Returns : Bio::Search::HSP::GenericHSP |
130
|
|
|
|
|
|
|
Args : -algorithm => algorithm used (BLASTP, TBLASTX, FASTX, etc) |
131
|
|
|
|
|
|
|
-evalue => evalue |
132
|
|
|
|
|
|
|
-pvalue => pvalue |
133
|
|
|
|
|
|
|
-bits => bit value for HSP |
134
|
|
|
|
|
|
|
-score => score value for HSP (typically z-score but depends on |
135
|
|
|
|
|
|
|
analysis) |
136
|
|
|
|
|
|
|
-hsp_length=> Length of the HSP (including gaps) |
137
|
|
|
|
|
|
|
-identical => # of residues that that matched identically |
138
|
|
|
|
|
|
|
-percent_identity => (optional) percent identity |
139
|
|
|
|
|
|
|
-conserved => # of residues that matched conservatively |
140
|
|
|
|
|
|
|
(only protein comparisons; |
141
|
|
|
|
|
|
|
conserved == identical in nucleotide comparisons) |
142
|
|
|
|
|
|
|
-hsp_gaps => # of gaps in the HSP |
143
|
|
|
|
|
|
|
-query_gaps => # of gaps in the query in the alignment |
144
|
|
|
|
|
|
|
-hit_gaps => # of gaps in the subject in the alignment |
145
|
|
|
|
|
|
|
-query_name => HSP Query sequence name (if available) |
146
|
|
|
|
|
|
|
-query_start => HSP Query start (in original query sequence coords) |
147
|
|
|
|
|
|
|
-query_end => HSP Query end (in original query sequence coords) |
148
|
|
|
|
|
|
|
-query_length=> total length of the query sequence |
149
|
|
|
|
|
|
|
-query_seq => query sequence portion of the HSP |
150
|
|
|
|
|
|
|
-query_desc => textual description of the query |
151
|
|
|
|
|
|
|
-hit_name => HSP Hit sequence name (if available) |
152
|
|
|
|
|
|
|
-hit_start => HSP Hit start (in original hit sequence coords) |
153
|
|
|
|
|
|
|
-hit_end => HSP Hit end (in original hit sequence coords) |
154
|
|
|
|
|
|
|
-hit_length => total length of the hit sequence |
155
|
|
|
|
|
|
|
-hit_seq => hit sequence portion of the HSP |
156
|
|
|
|
|
|
|
-hit_desc => textual description of the hit |
157
|
|
|
|
|
|
|
-homology_seq=> homology sequence for the HSP |
158
|
|
|
|
|
|
|
-hit_frame => hit frame (only if hit is translated protein) |
159
|
|
|
|
|
|
|
-query_frame => query frame (only if query is translated protein) |
160
|
|
|
|
|
|
|
-rank => HSP rank |
161
|
|
|
|
|
|
|
-links => HSP links information (WU-BLAST only) |
162
|
|
|
|
|
|
|
-hsp_group => HSP Group informat (WU-BLAST only) |
163
|
|
|
|
|
|
|
-gap_symbol => symbol representing a gap (default = '-') |
164
|
|
|
|
|
|
|
-hit_features=> string of features found in or near HSP hit |
165
|
|
|
|
|
|
|
region (reported in some BLAST text output, |
166
|
|
|
|
|
|
|
v. 2.2.13 and up) |
167
|
|
|
|
|
|
|
-stranded => If the algorithm isn't known (i.e. defaults to |
168
|
|
|
|
|
|
|
'generic'), setting this will indicate start/end |
169
|
|
|
|
|
|
|
coordinates are to be used to determine the strand |
170
|
|
|
|
|
|
|
for 'query', 'subject', 'hit', 'both', or 'none' |
171
|
|
|
|
|
|
|
(default = 'none') |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub new { |
176
|
1331
|
|
|
1331
|
1
|
10612
|
my($class,%args) = @_; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# don't pass anything to SUPER; complex hierarchy results in lots of work |
179
|
|
|
|
|
|
|
# for nothing |
180
|
|
|
|
|
|
|
|
181
|
1331
|
|
|
|
|
5254
|
my $self = $class->SUPER::new(); |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# for speed, don't use _rearrange and just store all input data directly |
184
|
|
|
|
|
|
|
# with no method calls and no work done. work can be carried |
185
|
|
|
|
|
|
|
# out just-in-time later if desired |
186
|
1331
|
|
|
|
|
4797
|
while (my ($arg, $value) = each %args) { |
187
|
26240
|
|
|
|
|
30904
|
$arg =~ tr/a-z\055/A-Z/d; |
188
|
26240
|
|
|
|
|
65792
|
$self->{$arg} = $value; |
189
|
|
|
|
|
|
|
} |
190
|
1331
|
|
|
|
|
2642
|
my $bits = $self->{BITS}; |
191
|
|
|
|
|
|
|
|
192
|
1331
|
100
|
|
|
|
5183
|
defined $self->{VERBOSE} && $self->verbose($self->{VERBOSE}); |
193
|
1331
|
50
|
|
|
|
2649
|
if (exists $self->{GAP_SYMBOL}) { |
194
|
|
|
|
|
|
|
# not checking anything else but the length (must be 1 as only one gap |
195
|
|
|
|
|
|
|
# symbol allowed currently); can add in support for symbol checks or |
196
|
|
|
|
|
|
|
# multiple symbols later if needed |
197
|
|
|
|
|
|
|
$self->throw("Gap symbol must be of length 1") if |
198
|
0
|
0
|
|
|
|
0
|
CORE::length($self->{GAP_SYMBOL}) != 1; |
199
|
|
|
|
|
|
|
} else { |
200
|
|
|
|
|
|
|
# dafault |
201
|
1331
|
|
|
|
|
3228
|
$self->{GAP_SYMBOL} = '-'; |
202
|
|
|
|
|
|
|
} |
203
|
1331
|
|
100
|
|
|
3323
|
$self->{ALGORITHM} ||= 'GENERIC'; |
204
|
1331
|
|
100
|
|
|
4168
|
$self->{STRANDED} ||= 'NONE'; |
205
|
|
|
|
|
|
|
|
206
|
1331
|
50
|
33
|
|
|
5227
|
if (! defined $self->{QUERY_LENGTH} || ! defined $self->{HIT_LENGTH}) { |
207
|
0
|
|
|
|
|
0
|
$self->throw("Must define hit and query length"); |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
1331
|
|
|
|
|
2298
|
$self->{'_sequenceschanged'} = 1; |
211
|
|
|
|
|
|
|
|
212
|
1331
|
|
|
|
|
2031
|
$self->{_finished_new} = 1; |
213
|
1331
|
|
|
|
|
8140
|
return $self; |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub _logical_length { |
217
|
313
|
|
|
313
|
|
459
|
my ($self, $type) = @_; |
218
|
313
|
100
|
66
|
|
|
852
|
if (!defined($self->{_sbjct_offset}) || !defined($self->{_query_offset})) { |
219
|
105
|
|
|
|
|
241
|
$self->_calculate_seq_offsets(); |
220
|
|
|
|
|
|
|
} |
221
|
313
|
100
|
|
|
|
1150
|
my $key = $type =~ /sbjct|hit|tot/i ? 'sbjct' : 'query'; |
222
|
|
|
|
|
|
|
|
223
|
313
|
|
|
|
|
673
|
my $offset = $self->{"_${key}_offset"}; |
224
|
313
|
|
|
|
|
522
|
return $self->length($type) / $offset ; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=head2 L methods |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Implementation of L methods follow |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 algorithm |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Title : algorithm |
234
|
|
|
|
|
|
|
Usage : my $r_type = $hsp->algorithm |
235
|
|
|
|
|
|
|
Function: Obtain the name of the algorithm used to obtain the HSP |
236
|
|
|
|
|
|
|
Returns : string (e.g., BLASTP) |
237
|
|
|
|
|
|
|
Args : [optional] scalar string to set value |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=cut |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub algorithm{ |
242
|
7397
|
|
|
7397
|
1
|
14811
|
my ($self,$value) = @_; |
243
|
7397
|
|
|
|
|
10130
|
my $previous = $self->{'ALGORITHM'}; |
244
|
7397
|
50
|
33
|
|
|
18515
|
if( defined $value || ! defined $previous ) { |
245
|
0
|
0
|
|
|
|
0
|
$value = $previous = '' unless defined $value; |
246
|
0
|
|
|
|
|
0
|
$self->{'ALGORITHM'} = $value; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
7397
|
|
|
|
|
13394
|
return $previous; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 pvalue |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Title : pvalue |
255
|
|
|
|
|
|
|
Usage : my $pvalue = $hsp->pvalue(); |
256
|
|
|
|
|
|
|
Function: Returns the P-value for this HSP or undef |
257
|
|
|
|
|
|
|
Returns : float or exponential (2e-10) |
258
|
|
|
|
|
|
|
P-value is not defined with NCBI Blast2 reports. |
259
|
|
|
|
|
|
|
Args : [optional] numeric to set value |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=cut |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub pvalue { |
264
|
63
|
|
|
63
|
1
|
109
|
my ($self,$value) = @_; |
265
|
63
|
|
|
|
|
95
|
my $previous = $self->{'PVALUE'}; |
266
|
63
|
50
|
|
|
|
129
|
if( defined $value ) { |
267
|
0
|
|
|
|
|
0
|
$self->{'PVALUE'} = $value; |
268
|
|
|
|
|
|
|
} |
269
|
63
|
|
|
|
|
173
|
return $previous; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head2 evalue |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Title : evalue |
275
|
|
|
|
|
|
|
Usage : my $evalue = $hsp->evalue(); |
276
|
|
|
|
|
|
|
Function: Returns the e-value for this HSP |
277
|
|
|
|
|
|
|
Returns : float or exponential (2e-10) |
278
|
|
|
|
|
|
|
Args : [optional] numeric to set value |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=cut |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub evalue { |
283
|
170
|
|
|
170
|
1
|
338
|
my ($self,$value) = @_; |
284
|
170
|
|
|
|
|
295
|
my $previous = $self->{'EVALUE'}; |
285
|
170
|
50
|
|
|
|
371
|
if( defined $value ) { |
286
|
0
|
|
|
|
|
0
|
$self->{'EVALUE'} = $value; |
287
|
|
|
|
|
|
|
} |
288
|
170
|
|
|
|
|
1101
|
return $previous; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head2 frac_identical |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Title : frac_identical |
294
|
|
|
|
|
|
|
Usage : my $frac_id = $hsp->frac_identical( ['query'|'hit'|'total'] ); |
295
|
|
|
|
|
|
|
Function: Returns the fraction of identitical positions for this HSP |
296
|
|
|
|
|
|
|
Returns : Float in range 0.0 -> 1.0 |
297
|
|
|
|
|
|
|
Args : arg 1: 'query' = num identical / length of query seq (without gaps) |
298
|
|
|
|
|
|
|
'hit' = num identical / length of hit seq (without gaps) |
299
|
|
|
|
|
|
|
synonyms: 'sbjct', 'subject' |
300
|
|
|
|
|
|
|
'total' = num identical / length of alignment (with gaps) |
301
|
|
|
|
|
|
|
synonyms: 'hsp' |
302
|
|
|
|
|
|
|
default = 'total' |
303
|
|
|
|
|
|
|
arg 2: [optional] frac identical value to set for the type requested |
304
|
|
|
|
|
|
|
Note : for translated sequences, this adjusts the length accordingly |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub frac_identical { |
309
|
602
|
|
|
602
|
1
|
1010
|
my ($self, $type,$value) = @_; |
310
|
|
|
|
|
|
|
|
311
|
602
|
100
|
|
|
|
1007
|
unless ($self->{_did_prefrac}) { |
312
|
105
|
|
|
|
|
251
|
$self->_pre_frac; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
602
|
50
|
|
|
|
1175
|
$type = lc $type if defined $type; |
316
|
602
|
50
|
33
|
|
|
1724
|
$type = 'hit' if( defined $type && |
317
|
|
|
|
|
|
|
$type =~ /subject|sbjct/); |
318
|
602
|
100
|
66
|
|
|
3328
|
$type = 'total' if( ! defined $type || $type eq 'hsp' || |
|
|
|
66
|
|
|
|
|
319
|
|
|
|
|
|
|
$type !~ /query|hit|subject|sbjct|total/); |
320
|
602
|
|
|
|
|
1009
|
my $previous = $self->{'_frac_identical'}->{$type}; |
321
|
602
|
100
|
66
|
|
|
1313
|
if( defined $value || ! defined $previous ) { |
322
|
313
|
50
|
|
|
|
457
|
$value = $previous = '' unless defined $value; |
323
|
313
|
100
|
100
|
|
|
726
|
if( $type eq 'hit' || $type eq 'query' ) { |
324
|
208
|
|
|
|
|
459
|
$self->$type()->frac_identical( $value); |
325
|
|
|
|
|
|
|
} |
326
|
313
|
|
|
|
|
553
|
$self->{'_frac_identical'}->{$type} = $value; |
327
|
|
|
|
|
|
|
} |
328
|
602
|
|
|
|
|
2069
|
return $previous; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=head2 frac_conserved |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Title : frac_conserved |
335
|
|
|
|
|
|
|
Usage : my $frac_cons = $hsp->frac_conserved( ['query'|'hit'|'total'] ); |
336
|
|
|
|
|
|
|
Function : Returns the fraction of conserved positions for this HSP. |
337
|
|
|
|
|
|
|
This is the fraction of symbols in the alignment with a |
338
|
|
|
|
|
|
|
positive score. |
339
|
|
|
|
|
|
|
Returns : Float in range 0.0 -> 1.0 |
340
|
|
|
|
|
|
|
Args : arg 1: 'query' = num conserved / length of query seq (without gaps) |
341
|
|
|
|
|
|
|
'hit' = num conserved / length of hit seq (without gaps) |
342
|
|
|
|
|
|
|
synonyms: 'sbjct', 'subject' |
343
|
|
|
|
|
|
|
'total' = num conserved / length of alignment (with gaps) |
344
|
|
|
|
|
|
|
synonyms: 'hsp' |
345
|
|
|
|
|
|
|
default = 'total' |
346
|
|
|
|
|
|
|
arg 2: [optional] frac conserved value to set for the type requested |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=cut |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
sub frac_conserved { |
351
|
440
|
|
|
440
|
1
|
683
|
my ($self, $type,$value) = @_; |
352
|
|
|
|
|
|
|
|
353
|
440
|
50
|
|
|
|
723
|
unless ($self->{_did_prefrac}) { |
354
|
0
|
|
|
|
|
0
|
$self->_pre_frac; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
440
|
100
|
|
|
|
824
|
$type = lc $type if defined $type; |
358
|
440
|
50
|
66
|
|
|
1280
|
$type = 'hit' if( defined $type && $type =~ /subject|sbjct/); |
359
|
440
|
100
|
100
|
|
|
2435
|
$type = 'total' if( ! defined $type || $type eq 'hsp' || |
|
|
|
66
|
|
|
|
|
360
|
|
|
|
|
|
|
$type !~ /query|hit|subject|sbjct|total/); |
361
|
440
|
|
|
|
|
703
|
my $previous = $self->{'_frac_conserved'}->{$type}; |
362
|
440
|
100
|
66
|
|
|
865
|
if( defined $value || ! defined $previous ) { |
363
|
313
|
50
|
|
|
|
425
|
$value = $previous = '' unless defined $value; |
364
|
313
|
|
|
|
|
422
|
$self->{'_frac_conserved'}->{$type} = $value; |
365
|
|
|
|
|
|
|
} |
366
|
440
|
|
|
|
|
1069
|
return $previous; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head2 gaps |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Title : gaps |
372
|
|
|
|
|
|
|
Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] ); |
373
|
|
|
|
|
|
|
Function : Get the number of gap characters in the query, hit, or total alignment. |
374
|
|
|
|
|
|
|
Returns : Integer, number of gaps or 0 if none |
375
|
|
|
|
|
|
|
Args : arg 1: 'query' = num gap characters in query seq |
376
|
|
|
|
|
|
|
'hit' = num gap characters in hit seq; synonyms: 'sbjct', 'subject' |
377
|
|
|
|
|
|
|
'total' = num gap characters in whole alignment; synonyms: 'hsp' |
378
|
|
|
|
|
|
|
default = 'total' |
379
|
|
|
|
|
|
|
arg 2: [optional] integer gap value to set for the type requested |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=cut |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
sub gaps { |
384
|
3125
|
|
|
3125
|
1
|
4318
|
my ($self, $type, $value) = @_; |
385
|
|
|
|
|
|
|
|
386
|
3125
|
100
|
|
|
|
4870
|
unless ($self->{_did_pregaps}) { |
387
|
378
|
|
|
|
|
859
|
$self->_pre_gaps; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
3125
|
100
|
|
|
|
5027
|
$type = lc $type if defined $type; |
391
|
3125
|
50
|
66
|
|
|
12559
|
$type = 'total' if( ! defined $type || $type eq 'hsp' || |
|
|
|
66
|
|
|
|
|
392
|
|
|
|
|
|
|
$type !~ /query|hit|subject|sbjct|total/); |
393
|
3125
|
100
|
|
|
|
5177
|
$type = 'hit' if $type =~ /sbjct|subject/; |
394
|
3125
|
|
|
|
|
4424
|
my $previous = $self->{'_gaps'}->{$type}; |
395
|
3125
|
100
|
100
|
|
|
6438
|
if( defined $value || ! defined $previous ) { |
396
|
1110
|
100
|
|
|
|
1554
|
$value = $previous = '' unless defined $value; |
397
|
1110
|
|
|
|
|
1830
|
$self->{'_gaps'}->{$type} = $value; |
398
|
|
|
|
|
|
|
} |
399
|
3125
|
|
100
|
|
|
8042
|
return $previous || 0; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=head2 query_string |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
Title : query_string |
405
|
|
|
|
|
|
|
Usage : my $qseq = $hsp->query_string; |
406
|
|
|
|
|
|
|
Function: Retrieves the query sequence of this HSP as a string |
407
|
|
|
|
|
|
|
Returns : string |
408
|
|
|
|
|
|
|
Args : [optional] string to set for query sequence |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=cut |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
sub query_string{ |
414
|
610
|
|
|
610
|
1
|
1460
|
my ($self,$value) = @_; |
415
|
610
|
|
|
|
|
931
|
my $previous = $self->{QUERY_SEQ}; |
416
|
610
|
100
|
66
|
|
|
1744
|
if( defined $value || ! defined $previous ) { |
417
|
3
|
50
|
|
|
|
11
|
$value = $previous = '' unless defined $value; |
418
|
3
|
|
|
|
|
7
|
$self->{QUERY_SEQ} = $value; |
419
|
|
|
|
|
|
|
# do some housekeeping so we know when to |
420
|
|
|
|
|
|
|
# re-run _calculate_seq_positions |
421
|
3
|
|
|
|
|
5
|
$self->{'_sequenceschanged'} = 1; |
422
|
|
|
|
|
|
|
} |
423
|
610
|
|
|
|
|
1457
|
return $previous; |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=head2 hit_string |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
Title : hit_string |
429
|
|
|
|
|
|
|
Usage : my $hseq = $hsp->hit_string; |
430
|
|
|
|
|
|
|
Function: Retrieves the hit sequence of this HSP as a string |
431
|
|
|
|
|
|
|
Returns : string |
432
|
|
|
|
|
|
|
Args : [optional] string to set for hit sequence |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=cut |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
sub hit_string{ |
438
|
618
|
|
|
618
|
1
|
3760
|
my ($self,$value) = @_; |
439
|
618
|
|
|
|
|
925
|
my $previous = $self->{HIT_SEQ}; |
440
|
618
|
100
|
66
|
|
|
2230
|
if( defined $value || ! defined $previous ) { |
441
|
1
|
50
|
|
|
|
4
|
$value = $previous = '' unless defined $value; |
442
|
1
|
|
|
|
|
2
|
$self->{HIT_SEQ} = $value; |
443
|
|
|
|
|
|
|
# do some housekeeping so we know when to |
444
|
|
|
|
|
|
|
# re-run _calculate_seq_positions |
445
|
1
|
|
|
|
|
3
|
$self->{'_sequenceschanged'} = 1; |
446
|
|
|
|
|
|
|
} |
447
|
618
|
|
|
|
|
2058
|
return $previous; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 homology_string |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Title : homology_string |
453
|
|
|
|
|
|
|
Usage : my $homo_string = $hsp->homology_string; |
454
|
|
|
|
|
|
|
Function: Retrieves the homology sequence for this HSP as a string. |
455
|
|
|
|
|
|
|
: The homology sequence is the string of symbols in between the |
456
|
|
|
|
|
|
|
: query and hit sequences in the alignment indicating the degree |
457
|
|
|
|
|
|
|
: of conservation (e.g., identical, similar, not similar). |
458
|
|
|
|
|
|
|
Returns : string |
459
|
|
|
|
|
|
|
Args : [optional] string to set for homology sequence |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=cut |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
sub homology_string{ |
464
|
6905
|
|
|
6905
|
1
|
9642
|
my ($self,$value) = @_; |
465
|
6905
|
|
|
|
|
8565
|
my $previous = $self->{HOMOLOGY_SEQ}; |
466
|
6905
|
100
|
66
|
|
|
16481
|
if( defined $value || ! defined $previous ) { |
467
|
3
|
50
|
|
|
|
15
|
$value = $previous = '' unless defined $value; |
468
|
3
|
|
|
|
|
9
|
$self->{HOMOLOGY_SEQ} = $value; |
469
|
|
|
|
|
|
|
# do some housekeeping so we know when to |
470
|
|
|
|
|
|
|
# re-run _calculate_seq_positions |
471
|
3
|
|
|
|
|
7
|
$self->{'_sequenceschanged'} = 1; |
472
|
|
|
|
|
|
|
} |
473
|
6905
|
|
|
|
|
19860
|
return $previous; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head2 consensus_string |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Title : consensus_string |
479
|
|
|
|
|
|
|
Usage : my $cs_string = $hsp->consensus_string; |
480
|
|
|
|
|
|
|
Function: Retrieves the consensus structure line for this HSP as a string (HMMER). |
481
|
|
|
|
|
|
|
: If the model had any consensus structure or reference line annotation |
482
|
|
|
|
|
|
|
: that it inherited from a multiple alignment (#=GC SS cons, |
483
|
|
|
|
|
|
|
: #=GC RF annotation in Stockholm files), that information is shown |
484
|
|
|
|
|
|
|
: as CS or RF annotation line. |
485
|
|
|
|
|
|
|
Returns : string |
486
|
|
|
|
|
|
|
Args : [optional] string to set for consensus structure |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=cut |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
sub consensus_string { |
491
|
8
|
|
|
8
|
1
|
26
|
my ($self,$value) = @_; |
492
|
8
|
|
|
|
|
22
|
my $previous = $self->{CS_SEQ}; |
493
|
8
|
50
|
33
|
|
|
47
|
if( defined $value || ! defined $previous ) { |
494
|
0
|
0
|
|
|
|
0
|
$value = $previous = '' unless defined $value; |
495
|
0
|
|
|
|
|
0
|
$self->{CS_SEQ} = $value; |
496
|
|
|
|
|
|
|
# do some housekeeping so we know when to |
497
|
|
|
|
|
|
|
# re-run _calculate_seq_positions |
498
|
0
|
|
|
|
|
0
|
$self->{'_sequenceschanged'} = 1; |
499
|
|
|
|
|
|
|
} |
500
|
8
|
|
|
|
|
38
|
return $previous; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=head2 posterior_string |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
Title : posterior_string |
506
|
|
|
|
|
|
|
Usage : my $pp_string = $hsp->posterior_string; |
507
|
|
|
|
|
|
|
Function: Retrieves the posterior probability line for this HSP as a string (HMMer3). |
508
|
|
|
|
|
|
|
: The posterior probability is the string of symbols at the bottom |
509
|
|
|
|
|
|
|
: of the alignment indicating the expected accuracy of each aligned residue. |
510
|
|
|
|
|
|
|
: A 0 means 0-5%, 1 means 5-15%, and so on; 9 means 85-95%, |
511
|
|
|
|
|
|
|
: and a * means 95-100% posterior probability. |
512
|
|
|
|
|
|
|
Returns : string |
513
|
|
|
|
|
|
|
Args : [optional] string to set for posterior probability |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=cut |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
sub posterior_string { |
518
|
9
|
|
|
9
|
1
|
29
|
my ($self,$value) = @_; |
519
|
9
|
|
|
|
|
22
|
my $previous = $self->{PP_SEQ}; |
520
|
9
|
100
|
66
|
|
|
63
|
if( defined $value || ! defined $previous ) { |
521
|
2
|
50
|
|
|
|
5
|
$value = $previous = '' unless defined $value; |
522
|
2
|
|
|
|
|
5
|
$self->{PP_SEQ} = $value; |
523
|
|
|
|
|
|
|
# do some housekeeping so we know when to |
524
|
|
|
|
|
|
|
# re-run _calculate_seq_positions |
525
|
2
|
|
|
|
|
2
|
$self->{'_sequenceschanged'} = 1; |
526
|
|
|
|
|
|
|
} |
527
|
9
|
|
|
|
|
46
|
return $previous; |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 length |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Title : length |
533
|
|
|
|
|
|
|
Usage : my $len = $hsp->length( ['query'|'hit'|'total'] ); |
534
|
|
|
|
|
|
|
Function : Returns the length of the query or hit in the alignment |
535
|
|
|
|
|
|
|
(without gaps) |
536
|
|
|
|
|
|
|
or the aggregate length of the HSP (including gaps; |
537
|
|
|
|
|
|
|
this may be greater than either hit or query ) |
538
|
|
|
|
|
|
|
Returns : integer |
539
|
|
|
|
|
|
|
Args : arg 1: 'query' = length of query seq (without gaps) |
540
|
|
|
|
|
|
|
'hit' = length of hit seq (without gaps) (synonyms: sbjct, subject) |
541
|
|
|
|
|
|
|
'total' = length of alignment (with gaps) |
542
|
|
|
|
|
|
|
default = 'total' |
543
|
|
|
|
|
|
|
arg 2: [optional] integer length value to set for specific type |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=cut |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
sub length { |
548
|
|
|
|
|
|
|
|
549
|
3839
|
|
|
3839
|
1
|
4533
|
my $self = shift; |
550
|
3839
|
|
|
|
|
4116
|
my $type = shift; |
551
|
|
|
|
|
|
|
|
552
|
3839
|
100
|
|
|
|
5830
|
$type = 'total' unless defined $type; |
553
|
3839
|
|
|
|
|
5451
|
$type = lc $type; |
554
|
|
|
|
|
|
|
|
555
|
3839
|
100
|
|
|
|
11077
|
if( $type =~ /^q/i ) { |
|
|
100
|
|
|
|
|
|
556
|
1615
|
|
|
|
|
3033
|
return $self->query()->length(shift); |
557
|
|
|
|
|
|
|
} elsif( $type =~ /^(hit|subject|sbjct)/ ) { |
558
|
989
|
|
|
|
|
1901
|
return $self->hit()->length(shift); |
559
|
|
|
|
|
|
|
} else { |
560
|
1235
|
|
|
|
|
1452
|
my $v = shift; |
561
|
1235
|
100
|
|
|
|
1919
|
if( defined $v ) { |
562
|
105
|
|
|
|
|
153
|
$self->{HSP_LENGTH} = $v; |
563
|
|
|
|
|
|
|
} |
564
|
1235
|
|
|
|
|
3377
|
return $self->{HSP_LENGTH}; |
565
|
|
|
|
|
|
|
} |
566
|
0
|
|
|
|
|
0
|
return 0; # should never get here |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=head2 hsp_length |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
Title : hsp_length |
572
|
|
|
|
|
|
|
Usage : my $len = $hsp->hsp_length() |
573
|
|
|
|
|
|
|
Function: shortcut length('hsp') |
574
|
|
|
|
|
|
|
Returns : floating point between 0 and 100 |
575
|
|
|
|
|
|
|
Args : none |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
578
|
|
|
|
|
|
|
|
579
|
16
|
|
|
16
|
1
|
94
|
sub hsp_length { return shift->length('hsp', shift); } |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=head2 percent_identity |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Title : percent_identity |
584
|
|
|
|
|
|
|
Usage : my $percentid = $hsp->percent_identity() |
585
|
|
|
|
|
|
|
Function: Returns the calculated percent identity for an HSP |
586
|
|
|
|
|
|
|
Returns : floating point between 0 and 100 |
587
|
|
|
|
|
|
|
Args : none |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=cut |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
sub percent_identity { |
593
|
84
|
|
|
84
|
1
|
440
|
my $self = shift; |
594
|
|
|
|
|
|
|
|
595
|
84
|
100
|
|
|
|
235
|
unless ($self->{_did_prepi}) { |
596
|
42
|
|
|
|
|
144
|
$self->_pre_pi; |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
84
|
|
|
|
|
263
|
return $self->SUPER::percent_identity(@_); |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=head2 frame |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
Title : frame |
605
|
|
|
|
|
|
|
Usage : my ($qframe, $hframe) = $hsp->frame('list',$queryframe,$subjectframe) |
606
|
|
|
|
|
|
|
Function: Set the Frame for both query and subject and insure that |
607
|
|
|
|
|
|
|
they agree. |
608
|
|
|
|
|
|
|
This overrides the frame() method implementation in |
609
|
|
|
|
|
|
|
FeaturePair. |
610
|
|
|
|
|
|
|
Returns : array of query and subject frame if return type wants an array |
611
|
|
|
|
|
|
|
or query frame if defined or subject frame if not defined |
612
|
|
|
|
|
|
|
Args : 'hit' or 'subject' or 'sbjct' to retrieve the frame of the subject (default) |
613
|
|
|
|
|
|
|
'query' to retrieve the query frame |
614
|
|
|
|
|
|
|
'list' or 'array' to retrieve both query and hit frames together |
615
|
|
|
|
|
|
|
Note : Frames are stored in the GFF way (0-2) not 1-3 |
616
|
|
|
|
|
|
|
as they are in BLAST (negative frames are deduced by checking |
617
|
|
|
|
|
|
|
the strand of the query or hit) |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=cut |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# Note: changed 4/19/08 - bug 2485 |
622
|
|
|
|
|
|
|
# |
623
|
|
|
|
|
|
|
# frame() is supposed to be a getter/setter (as is implied by the Function desc |
624
|
|
|
|
|
|
|
# above; this is also consistent with Bio::SeqFeature::SimilarityPair). Also, |
625
|
|
|
|
|
|
|
# the API is not consistent with the other HSP/SimilarityPair methods such as |
626
|
|
|
|
|
|
|
# strand(), start(), end(), etc. |
627
|
|
|
|
|
|
|
# |
628
|
|
|
|
|
|
|
# In order to make this consistent with other methods all work is now done |
629
|
|
|
|
|
|
|
# when the features are instantiated and not delayed. We compromise by |
630
|
|
|
|
|
|
|
# defaulting back 'to hit' w/o passed args. Setting is now allowed. |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
sub frame { |
633
|
610
|
|
|
610
|
1
|
2327
|
my $self = shift; |
634
|
610
|
|
|
|
|
689
|
my $val = shift; |
635
|
610
|
50
|
|
|
|
1062
|
if (!defined $val) { |
636
|
|
|
|
|
|
|
# unfortunately, w/o args we need to warn about API changes |
637
|
0
|
|
|
|
|
0
|
$self->warn("API for frame() has changed.\n". |
638
|
|
|
|
|
|
|
"Please refer to documentation for Bio::Search::HSP::GenericHSP;\n". |
639
|
|
|
|
|
|
|
"returning query frame"); |
640
|
0
|
|
|
|
|
0
|
$val = 'query'; |
641
|
|
|
|
|
|
|
} |
642
|
610
|
|
|
|
|
1349
|
$val =~ s/^\s+//; |
643
|
|
|
|
|
|
|
|
644
|
610
|
100
|
|
|
|
1593
|
if( $val =~ /^q/i ) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
645
|
284
|
|
|
|
|
464
|
return $self->query->frame(@_); |
646
|
|
|
|
|
|
|
} elsif( $val =~ /^hi|^s/i ) { |
647
|
326
|
|
|
|
|
594
|
return $self->hit->frame(@_); |
648
|
|
|
|
|
|
|
} elsif ( $val =~ /^list|array/i ) { |
649
|
0
|
|
|
|
|
0
|
return ($self->query->frame($_[0]), |
650
|
|
|
|
|
|
|
$self->hit->frame($_[1]) ); |
651
|
|
|
|
|
|
|
} elsif ( $val =~ /^\d+$/) { |
652
|
|
|
|
|
|
|
# old API i.e. frame($query_frame, $hit_frame). This catches all but one |
653
|
|
|
|
|
|
|
# case, where no arg is passed (so the hit is wanted). |
654
|
0
|
|
|
|
|
0
|
$self->warn("API for frame() has changed.\n". |
655
|
|
|
|
|
|
|
"Please refer to documentation for Bio::Search::HSP::GenericHSP"); |
656
|
|
|
|
|
|
|
wantarray ? |
657
|
0
|
0
|
|
|
|
0
|
return ($self->query->frame($val), |
658
|
|
|
|
|
|
|
$self->hit->frame(@_) ) : |
659
|
|
|
|
|
|
|
return $self->hit->frame($val,@_); |
660
|
|
|
|
|
|
|
} else { |
661
|
0
|
|
|
|
|
0
|
$self->warn("unrecognized component '$val' requested\n"); |
662
|
|
|
|
|
|
|
} |
663
|
0
|
|
|
|
|
0
|
return 0; |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=head2 get_aln |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
Title : get_aln |
669
|
|
|
|
|
|
|
Usage : my $aln = $hsp->gel_aln |
670
|
|
|
|
|
|
|
Function: Returns a L object representing the HSP alignment |
671
|
|
|
|
|
|
|
Returns : L |
672
|
|
|
|
|
|
|
Args : none |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=cut |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
sub get_aln { |
677
|
19
|
|
|
19
|
1
|
46
|
my ($self) = @_; |
678
|
19
|
|
|
|
|
165
|
require Bio::LocatableSeq; |
679
|
19
|
|
|
|
|
4073
|
require Bio::SimpleAlign; |
680
|
|
|
|
|
|
|
|
681
|
19
|
|
|
|
|
116
|
my $aln = Bio::SimpleAlign->new(); |
682
|
19
|
|
|
|
|
84
|
my $hs = $self->hit_string(); |
683
|
19
|
|
|
|
|
72
|
my $qs = $self->query_string(); |
684
|
|
|
|
|
|
|
# FASTA specific stuff moved to the FastaHSP object |
685
|
19
|
|
|
|
|
42
|
my $seqonly = $qs; |
686
|
19
|
|
|
|
|
152
|
$seqonly =~ s/[\-\s]//g; |
687
|
19
|
|
|
|
|
67
|
my ($q_nm,$s_nm) = ($self->query->seq_id(), |
688
|
|
|
|
|
|
|
$self->hit->seq_id()); |
689
|
|
|
|
|
|
|
# Should we silently change the name of the query or hit if it isn't |
690
|
|
|
|
|
|
|
# defined? May need revisiting... cjfields 2008-12-3 (commented out below) |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
#unless( defined $q_nm && CORE::length ($q_nm) ) { |
693
|
|
|
|
|
|
|
# $q_nm = 'query'; |
694
|
|
|
|
|
|
|
#} |
695
|
|
|
|
|
|
|
#unless( defined $s_nm && CORE::length ($s_nm) ) { |
696
|
|
|
|
|
|
|
# $s_nm = 'hit'; |
697
|
|
|
|
|
|
|
#} |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# mapping: 1 residues for every x coordinate positions |
700
|
|
|
|
|
|
|
my $query = Bio::LocatableSeq->new( |
701
|
|
|
|
|
|
|
-seq => $qs, |
702
|
|
|
|
|
|
|
-id => $q_nm, |
703
|
|
|
|
|
|
|
-start => $self->query->start, |
704
|
|
|
|
|
|
|
-end => $self->query->end, |
705
|
|
|
|
|
|
|
-strand => $self->query->strand, |
706
|
|
|
|
|
|
|
-force_nse => $q_nm ? 0 : 1, |
707
|
19
|
100
|
|
|
|
73
|
-mapping => [ 1, $self->{_query_mapping} ] |
708
|
|
|
|
|
|
|
); |
709
|
19
|
|
|
|
|
67
|
$seqonly = $hs; |
710
|
19
|
|
|
|
|
195
|
$seqonly =~ s/[\-\s]//g; |
711
|
|
|
|
|
|
|
my $hit = Bio::LocatableSeq->new( |
712
|
|
|
|
|
|
|
-seq => $hs, |
713
|
|
|
|
|
|
|
-id => $s_nm, |
714
|
|
|
|
|
|
|
-start => $self->hit->start, |
715
|
|
|
|
|
|
|
-end => $self->hit->end, |
716
|
|
|
|
|
|
|
-strand => $self->hit->strand, |
717
|
|
|
|
|
|
|
-force_nse => $s_nm ? 0 : 1, |
718
|
19
|
50
|
|
|
|
79
|
-mapping => [ 1, $self->{_hit_mapping} ] |
719
|
|
|
|
|
|
|
); |
720
|
19
|
|
|
|
|
134
|
$aln->add_seq($query); |
721
|
19
|
|
|
|
|
65
|
$aln->add_seq($hit); |
722
|
19
|
|
|
|
|
110
|
return $aln; |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head2 num_conserved |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Title : num_conserved |
728
|
|
|
|
|
|
|
Usage : $obj->num_conserved($newval) |
729
|
|
|
|
|
|
|
Function: returns the number of conserved residues in the alignment |
730
|
|
|
|
|
|
|
Returns : integer |
731
|
|
|
|
|
|
|
Args : integer (optional) |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=cut |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
sub num_conserved{ |
737
|
1914
|
|
|
1914
|
1
|
2496
|
my ($self,$value) = @_; |
738
|
|
|
|
|
|
|
|
739
|
1914
|
100
|
|
|
|
3164
|
unless ($self->{_did_presimilar}) { |
740
|
1
|
|
|
|
|
4
|
$self->_pre_similar_stats; |
741
|
|
|
|
|
|
|
} |
742
|
|
|
|
|
|
|
|
743
|
1914
|
50
|
|
|
|
2698
|
if (defined $value) { |
744
|
0
|
|
|
|
|
0
|
$self->{CONSERVED} = $value; |
745
|
|
|
|
|
|
|
} |
746
|
1914
|
|
|
|
|
3634
|
return $self->{CONSERVED}; |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
=head2 num_identical |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
Title : num_identical |
752
|
|
|
|
|
|
|
Usage : $obj->num_identical($newval) |
753
|
|
|
|
|
|
|
Function: returns the number of identical residues in the alignment |
754
|
|
|
|
|
|
|
Returns : integer |
755
|
|
|
|
|
|
|
Args : integer (optional) |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=cut |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
sub num_identical{ |
761
|
1923
|
|
|
1923
|
1
|
2800
|
my ($self,$value) = @_; |
762
|
|
|
|
|
|
|
|
763
|
1923
|
100
|
|
|
|
3565
|
unless ($self->{_did_presimilar}) { |
764
|
437
|
|
|
|
|
887
|
$self->_pre_similar_stats; |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
|
767
|
1923
|
50
|
|
|
|
2837
|
if( defined $value) { |
768
|
0
|
|
|
|
|
0
|
$self->{IDENTICAL} = $value; |
769
|
|
|
|
|
|
|
} |
770
|
1923
|
|
|
|
|
4024
|
return $self->{IDENTICAL}; |
771
|
|
|
|
|
|
|
} |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=head2 rank |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
Usage : $hsp->rank( [string] ); |
776
|
|
|
|
|
|
|
Purpose : Get the rank of the HSP within a given Blast hit. |
777
|
|
|
|
|
|
|
Example : $rank = $hsp->rank; |
778
|
|
|
|
|
|
|
Returns : Integer (1..n) corresponding to the order in which the HSP |
779
|
|
|
|
|
|
|
appears in the BLAST report. |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
=cut |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
sub rank { |
784
|
4514
|
|
|
4514
|
1
|
5418
|
my ($self,$value) = @_; |
785
|
4514
|
50
|
|
|
|
5538
|
if( defined $value) { |
786
|
0
|
|
|
|
|
0
|
$self->{RANK} = $value; |
787
|
|
|
|
|
|
|
} |
788
|
4514
|
|
|
|
|
10365
|
return $self->{RANK}; |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=head2 seq_inds |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
Title : seq_inds |
794
|
|
|
|
|
|
|
Purpose : Get a list of residue positions (indices) for all identical |
795
|
|
|
|
|
|
|
: or conserved residues in the query or sbjct sequence. |
796
|
|
|
|
|
|
|
Example : @s_ind = $hsp->seq_inds('query', 'identical'); |
797
|
|
|
|
|
|
|
: @h_ind = $hsp->seq_inds('hit', 'conserved'); |
798
|
|
|
|
|
|
|
: @h_ind = $hsp->seq_inds('hit', 'conserved-not-identical'); |
799
|
|
|
|
|
|
|
: @h_ind = $hsp->seq_inds('hit', 'conserved', 1); |
800
|
|
|
|
|
|
|
Returns : List of integers |
801
|
|
|
|
|
|
|
: May include ranges if collapse is true. |
802
|
|
|
|
|
|
|
Argument : seq_type = 'query' or 'hit' or 'sbjct' (default = query) |
803
|
|
|
|
|
|
|
: ('sbjct' is synonymous with 'hit') |
804
|
|
|
|
|
|
|
: class = 'identical' - identical positions |
805
|
|
|
|
|
|
|
: 'conserved' - conserved positions |
806
|
|
|
|
|
|
|
: 'nomatch' - mismatched residue or gap positions |
807
|
|
|
|
|
|
|
: 'mismatch' - mismatched residue positions (no gaps) |
808
|
|
|
|
|
|
|
: 'gap' - gap positions only |
809
|
|
|
|
|
|
|
: 'frameshift'- frameshift positions only |
810
|
|
|
|
|
|
|
: 'conserved-not-identical' - conserved positions w/o |
811
|
|
|
|
|
|
|
: identical residues |
812
|
|
|
|
|
|
|
: The name can be shortened to 'id' or 'cons' unless |
813
|
|
|
|
|
|
|
: the name is . The default value is |
814
|
|
|
|
|
|
|
: 'identical' |
815
|
|
|
|
|
|
|
: |
816
|
|
|
|
|
|
|
: collapse = boolean, if true, consecutive positions are merged |
817
|
|
|
|
|
|
|
: using a range notation, e.g., "1 2 3 4 5 7 9 10 11" |
818
|
|
|
|
|
|
|
: collapses to "1-5 7 9-11". This is useful for |
819
|
|
|
|
|
|
|
: consolidating long lists. Default = no collapse. |
820
|
|
|
|
|
|
|
: |
821
|
|
|
|
|
|
|
Throws : n/a. |
822
|
|
|
|
|
|
|
Comments : For HSPs partially or completely derived from translated sequences |
823
|
|
|
|
|
|
|
: (TBLASTN, BLASTX, TBLASTX, or similar), some positional data |
824
|
|
|
|
|
|
|
: cannot easily be attributed to a single position (i.e. the |
825
|
|
|
|
|
|
|
: positional data is ambiguous). In these cases all three codon |
826
|
|
|
|
|
|
|
: positions are reported. Under these conditions you can check |
827
|
|
|
|
|
|
|
: ambiguous_seq_inds() to determine whether the query, subject, |
828
|
|
|
|
|
|
|
: or both are ambiguous. |
829
|
|
|
|
|
|
|
: |
830
|
|
|
|
|
|
|
See Also : L, |
831
|
|
|
|
|
|
|
L |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
=cut |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
sub seq_inds{ |
836
|
194
|
|
|
194
|
1
|
464
|
my ($self, $seqType, $class, $collapse) = @_; |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
# prepare the internal structures - this is cached so |
839
|
|
|
|
|
|
|
# if the strings have not changed we're okay |
840
|
194
|
|
|
|
|
496
|
$self->_calculate_seq_positions(); |
841
|
|
|
|
|
|
|
|
842
|
194
|
|
50
|
|
|
375
|
$seqType ||= 'query'; |
843
|
194
|
|
50
|
|
|
310
|
$class ||= 'identical'; |
844
|
194
|
|
100
|
|
|
503
|
$collapse ||= 0; |
845
|
194
|
100
|
|
|
|
382
|
$seqType = 'sbjct' if $seqType eq 'hit'; |
846
|
194
|
|
|
|
|
436
|
my $t = lc(substr($seqType,0,1)); |
847
|
194
|
100
|
33
|
|
|
526
|
if( $t eq 'q' ) { |
|
|
50
|
|
|
|
|
|
848
|
100
|
|
|
|
|
150
|
$seqType = 'query'; |
849
|
|
|
|
|
|
|
} elsif ( $t eq 's' || $t eq 'h' ) { |
850
|
94
|
|
|
|
|
145
|
$seqType = 'sbjct'; |
851
|
|
|
|
|
|
|
} else { |
852
|
0
|
|
|
|
|
0
|
$self->warn("unknown seqtype $seqType using 'query'"); |
853
|
0
|
|
|
|
|
0
|
$seqType = 'query'; |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
|
856
|
194
|
|
|
|
|
266
|
$t = lc(substr($class,0,1)); |
857
|
|
|
|
|
|
|
|
858
|
194
|
100
|
|
|
|
705
|
if( $t eq 'c' ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
859
|
8
|
50
|
|
|
|
24
|
if( $class =~ /conserved\-not\-identical/ ) { |
860
|
0
|
|
|
|
|
0
|
$class = 'conserved'; |
861
|
|
|
|
|
|
|
} else { |
862
|
8
|
|
|
|
|
17
|
$class = 'conservedall'; |
863
|
|
|
|
|
|
|
} |
864
|
|
|
|
|
|
|
} elsif( $t eq 'i' ) { |
865
|
0
|
|
|
|
|
0
|
$class = 'identical'; |
866
|
|
|
|
|
|
|
} elsif( $t eq 'n' ) { |
867
|
18
|
|
|
|
|
37
|
$class = 'nomatch'; |
868
|
|
|
|
|
|
|
} elsif( $t eq 'm' ) { |
869
|
10
|
|
|
|
|
40
|
$class = 'mismatch'; |
870
|
|
|
|
|
|
|
} elsif( $t eq 'g' ) { |
871
|
150
|
|
|
|
|
169
|
$class = 'gap'; |
872
|
|
|
|
|
|
|
} elsif( $t eq 'f' ) { |
873
|
8
|
|
|
|
|
11
|
$class = 'frameshift'; |
874
|
|
|
|
|
|
|
} else { |
875
|
0
|
|
|
|
|
0
|
$self->warn("unknown sequence class $class using 'identical'"); |
876
|
0
|
|
|
|
|
0
|
$class = 'identical'; |
877
|
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
## Sensitive to member name changes. |
880
|
194
|
|
|
|
|
326
|
$seqType = "_\L$seqType\E"; |
881
|
194
|
|
|
|
|
248
|
$class = "_\L$class\E"; |
882
|
194
|
|
|
|
|
241
|
my @ary; |
883
|
|
|
|
|
|
|
|
884
|
194
|
100
|
|
|
|
341
|
if( $class eq '_gap' ) { |
|
|
100
|
|
|
|
|
|
885
|
|
|
|
|
|
|
# this means that we are remapping the gap length that is stored |
886
|
|
|
|
|
|
|
# in the hash (for example $self->{'_gapRes_query'} ) |
887
|
|
|
|
|
|
|
# so we'll return an array which has the values of the position of the |
888
|
|
|
|
|
|
|
# of the gap (the key in the hash) + the gap length (value in the |
889
|
|
|
|
|
|
|
# hash for this key - 1. |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
# changing this; since the index is the position prior to the insertion, |
892
|
|
|
|
|
|
|
# repeat the position based on the number of gaps inserted |
893
|
521
|
|
|
|
|
440
|
@ary = map { my @tmp; |
894
|
|
|
|
|
|
|
# position holds number of gaps inserted |
895
|
521
|
|
|
|
|
885
|
for my $g (1..$self->{seqinds}{"${class}Res$seqType"}->{$_}) { |
896
|
1148
|
|
|
|
|
1283
|
push @tmp, $_ ; |
897
|
|
|
|
|
|
|
} |
898
|
521
|
|
|
|
|
772
|
@tmp} |
899
|
150
|
|
|
|
|
180
|
sort { $a <=> $b } keys %{ $self->{seqinds}{"${class}Res$seqType"}}; |
|
1882
|
|
|
|
|
1677
|
|
|
150
|
|
|
|
|
947
|
|
900
|
|
|
|
|
|
|
} elsif( $class eq '_conservedall' ) { |
901
|
13476
|
|
|
|
|
11173
|
@ary = sort { $a <=> $b } |
902
|
8
|
|
|
|
|
185
|
keys %{ $self->{seqinds}{"_conservedRes$seqType"}}, |
903
|
8
|
|
|
|
|
11
|
keys %{ $self->{seqinds}{"_identicalRes$seqType"}}, |
|
8
|
|
|
|
|
404
|
|
904
|
|
|
|
|
|
|
} else { |
905
|
36
|
|
|
|
|
63
|
@ary = sort { $a <=> $b } keys %{ $self->{seqinds}{"${class}Res$seqType"}}; |
|
28308
|
|
|
|
|
23914
|
|
|
36
|
|
|
|
|
1136
|
|
906
|
|
|
|
|
|
|
} |
907
|
194
|
100
|
|
|
|
4152
|
require Bio::Search::BlastUtils if $collapse; |
908
|
|
|
|
|
|
|
|
909
|
194
|
100
|
|
|
|
652
|
return $collapse ? &Bio::Search::SearchUtils::collapse_nums(@ary) : @ary; |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
=head2 ambiguous_seq_inds |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
Title : ambiguous_seq_inds |
915
|
|
|
|
|
|
|
Purpose : returns a string denoting whether sequence indices for query, |
916
|
|
|
|
|
|
|
: subject, or both are ambiguous |
917
|
|
|
|
|
|
|
Returns : String; 'query', 'subject', 'query/subject', or empty string '' |
918
|
|
|
|
|
|
|
Argument : none |
919
|
|
|
|
|
|
|
Comments : For HSPs partially or completely derived from translated sequences |
920
|
|
|
|
|
|
|
: (TBLASTN, BLASTX, TBLASTX, or similar), some positional data |
921
|
|
|
|
|
|
|
: cannot easily be attributed to a single position (i.e. the |
922
|
|
|
|
|
|
|
: positional data is ambiguous). In these cases all three codon |
923
|
|
|
|
|
|
|
: positions are reported when using seq_inds(). Under these |
924
|
|
|
|
|
|
|
: conditions you can check ambiguous_seq_inds() to determine whether |
925
|
|
|
|
|
|
|
: the query, subject, or both are ambiguous. |
926
|
|
|
|
|
|
|
See Also : L |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
=cut |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
sub ambiguous_seq_inds { |
931
|
6
|
|
|
6
|
1
|
18
|
my $self = shift; |
932
|
6
|
|
|
|
|
18
|
$self->_calculate_seq_positions(); |
933
|
|
|
|
|
|
|
my $type = ($self->{_query_offset} == 3 && $self->{_sbjct_offset} == 3) ? |
934
|
|
|
|
|
|
|
'query/subject' : |
935
|
|
|
|
|
|
|
($self->{_query_offset} == 3) ? 'query' : |
936
|
6
|
100
|
100
|
|
|
50
|
($self->{_sbjct_offset} == 3) ? 'subject' : ''; |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
937
|
6
|
|
|
|
|
26
|
return $type; |
938
|
|
|
|
|
|
|
} |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
=head2 Inherited from L |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
These methods come from L |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
=head2 query |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
Title : query |
947
|
|
|
|
|
|
|
Usage : my $query = $hsp->query |
948
|
|
|
|
|
|
|
Function: Returns a SeqFeature representing the query in the HSP |
949
|
|
|
|
|
|
|
Returns : L |
950
|
|
|
|
|
|
|
Args : [optional] new value to set |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
=cut |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
sub query { |
955
|
5590
|
|
|
5590
|
1
|
9326
|
my $self = shift; |
956
|
5590
|
100
|
|
|
|
9786
|
unless ($self->{_created_qff}) { |
957
|
1167
|
|
|
|
|
2196
|
$self->_query_seq_feature; |
958
|
|
|
|
|
|
|
} |
959
|
5590
|
|
|
|
|
11300
|
return $self->SUPER::query(@_); |
960
|
|
|
|
|
|
|
} |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
sub feature1 { |
963
|
6939
|
|
|
6939
|
1
|
7808
|
my $self = shift; |
964
|
6939
|
100
|
66
|
|
|
19500
|
if (! $self->{_finished_new} || $self->{_making_qff}) { |
965
|
1331
|
50
|
|
|
|
3159
|
return $self->{_sim1} if $self->{_sim1}; |
966
|
1331
|
|
|
|
|
4713
|
$self->{_sim1} = Bio::SeqFeature::Similarity->new(); |
967
|
1331
|
|
|
|
|
3662
|
return $self->{_sim1}; |
968
|
|
|
|
|
|
|
} |
969
|
5608
|
100
|
|
|
|
7940
|
unless ($self->{_created_qff}) { |
970
|
9
|
|
|
|
|
39
|
$self->_query_seq_feature; |
971
|
|
|
|
|
|
|
} |
972
|
5608
|
|
|
|
|
9846
|
return $self->SUPER::feature1(@_); |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=head2 hit |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
Title : hit |
978
|
|
|
|
|
|
|
Usage : my $hit = $hsp->hit |
979
|
|
|
|
|
|
|
Function: Returns a SeqFeature representing the hit in the HSP |
980
|
|
|
|
|
|
|
Returns : L |
981
|
|
|
|
|
|
|
Args : [optional] new value to set |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
=cut |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
sub hit { |
986
|
4390
|
|
|
4390
|
1
|
12786
|
my $self = shift; |
987
|
4390
|
100
|
|
|
|
7269
|
unless ($self->{_created_sff}) { |
988
|
550
|
|
|
|
|
1227
|
$self->_subject_seq_feature; |
989
|
|
|
|
|
|
|
} |
990
|
4390
|
|
|
|
|
8852
|
return $self->SUPER::hit(@_); |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
sub feature2 { |
994
|
4408
|
|
|
4408
|
1
|
8022
|
my $self = shift; |
995
|
4408
|
50
|
33
|
|
|
12313
|
if (! $self->{_finished_new} || $self->{_making_sff}) { |
996
|
0
|
0
|
|
|
|
0
|
return $self->{_sim2} if $self->{_sim2}; |
997
|
0
|
|
|
|
|
0
|
$self->{_sim2} = Bio::SeqFeature::Similarity->new(); |
998
|
0
|
|
|
|
|
0
|
return $self->{_sim2}; |
999
|
|
|
|
|
|
|
} |
1000
|
4408
|
100
|
|
|
|
6620
|
unless ($self->{_created_sff}) { |
1001
|
12
|
|
|
|
|
43
|
$self->_subject_seq_feature; |
1002
|
|
|
|
|
|
|
} |
1003
|
4408
|
|
|
|
|
8003
|
return $self->SUPER::feature2(@_); |
1004
|
|
|
|
|
|
|
} |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
=head2 significance |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
Title : significance |
1009
|
|
|
|
|
|
|
Usage : $evalue = $obj->significance(); |
1010
|
|
|
|
|
|
|
$obj->significance($evalue); |
1011
|
|
|
|
|
|
|
Function: Get/Set the significance value |
1012
|
|
|
|
|
|
|
Returns : numeric |
1013
|
|
|
|
|
|
|
Args : [optional] new value to set |
1014
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
=cut |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
# Override significance to return the e-value or, if this is |
1018
|
|
|
|
|
|
|
# not defined (WU-BLAST), return the p-value. |
1019
|
|
|
|
|
|
|
sub significance { |
1020
|
19
|
|
|
19
|
1
|
56
|
my ($self, $val) = @_; |
1021
|
19
|
50
|
33
|
|
|
79
|
if (!defined $self->{SIGNIFICANCE} || defined $val) { |
1022
|
19
|
50
|
|
|
|
93
|
$self->{SIGNIFICANCE} = defined $val ? $val : |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
defined $self->evalue ? $self->evalue : |
1024
|
|
|
|
|
|
|
defined $self->pvalue ? $$self->pvalue : |
1025
|
|
|
|
|
|
|
undef; |
1026
|
19
|
|
|
|
|
54
|
$self->query->significance($self->{SIGNIFICANCE}); |
1027
|
|
|
|
|
|
|
} |
1028
|
19
|
|
|
|
|
93
|
return $self->{SIGNIFICANCE}; |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
=head2 strand |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
Title : strand |
1034
|
|
|
|
|
|
|
Usage : $hsp->strand('query') |
1035
|
|
|
|
|
|
|
Function: Retrieves the strand for the HSP component requested |
1036
|
|
|
|
|
|
|
Returns : +1 or -1 |
1037
|
|
|
|
|
|
|
Args : 'hit' or 'subject' or 'sbjct' to retrieve the strand of the subject, |
1038
|
|
|
|
|
|
|
'query' to retrieve the query strand (default) |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
=cut |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
sub strand { |
1043
|
587
|
|
|
587
|
1
|
1044
|
my ($self,$type) = @_; |
1044
|
|
|
|
|
|
|
|
1045
|
587
|
100
|
100
|
|
|
4280
|
if( $type =~ /^q/i && defined $self->{'QUERY_STRAND'} ) { |
|
|
100
|
100
|
|
|
|
|
1046
|
2
|
|
|
|
|
16
|
return $self->{'QUERY_STRAND'}; |
1047
|
|
|
|
|
|
|
} elsif( $type =~ /^(hit|subject|sbjct)/i && defined $self->{'HIT_STRAND'} ) { |
1048
|
2
|
|
|
|
|
20
|
return $self->{'HIT_STRAND'}; |
1049
|
|
|
|
|
|
|
} |
1050
|
|
|
|
|
|
|
|
1051
|
583
|
|
|
|
|
1755
|
return $self->SUPER::strand($type) |
1052
|
|
|
|
|
|
|
} |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
=head2 score |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
Title : score |
1057
|
|
|
|
|
|
|
Usage : $score = $obj->score(); |
1058
|
|
|
|
|
|
|
$obj->score($value); |
1059
|
|
|
|
|
|
|
Function: Get/Set the score value |
1060
|
|
|
|
|
|
|
Returns : numeric |
1061
|
|
|
|
|
|
|
Args : [optional] new value to set |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=head2 bits |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
Title : bits |
1066
|
|
|
|
|
|
|
Usage : $bits = $obj->bits(); |
1067
|
|
|
|
|
|
|
$obj->bits($value); |
1068
|
|
|
|
|
|
|
Function: Get/Set the bits value |
1069
|
|
|
|
|
|
|
Returns : numeric |
1070
|
|
|
|
|
|
|
Args : [optional] new value to set |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
=head1 Private methods |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
=cut |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
=head2 _calculate_seq_positions |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
Title : _calculate_seq_positions |
1079
|
|
|
|
|
|
|
Usage : $self->_calculate_seq_positions |
1080
|
|
|
|
|
|
|
Function: Internal function |
1081
|
|
|
|
|
|
|
Returns : |
1082
|
|
|
|
|
|
|
Args : |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
=cut |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
sub _calculate_seq_positions { |
1088
|
205
|
|
|
205
|
|
349
|
my ($self,@args) = @_; |
1089
|
205
|
100
|
|
|
|
529
|
return unless ( $self->{'_sequenceschanged'} ); |
1090
|
73
|
|
|
|
|
104
|
$self->{'_sequenceschanged'} = 0; |
1091
|
73
|
|
|
|
|
170
|
my ($seqString, $qseq,$sseq) = ( $self->homology_string(), |
1092
|
|
|
|
|
|
|
$self->query_string(), |
1093
|
|
|
|
|
|
|
$self->hit_string() ); |
1094
|
73
|
|
|
|
|
195
|
my ($mlen, $qlen, $slen) = (CORE::length($seqString), CORE::length($qseq), CORE::length($sseq)); |
1095
|
73
|
|
100
|
|
|
148
|
my $qdir = $self->query->strand || 1; |
1096
|
73
|
|
100
|
|
|
176
|
my $sdir = $self->hit->strand || 1; |
1097
|
73
|
100
|
|
|
|
237
|
my ($resCount_query, $endpoint_query) = ($qdir <=0) ? ($self->query->end, $self->query->start) |
1098
|
|
|
|
|
|
|
: ($self->query->start, $self->query->end); |
1099
|
73
|
100
|
|
|
|
259
|
my ($resCount_sbjct, $endpoint_sbjct) = ($sdir <=0) ? ($self->hit->end, $self->hit->start) |
1100
|
|
|
|
|
|
|
: ($self->hit->start, $self->hit->end); |
1101
|
|
|
|
|
|
|
|
1102
|
73
|
|
|
|
|
193
|
my $prog = $self->algorithm; |
1103
|
|
|
|
|
|
|
|
1104
|
73
|
100
|
|
|
|
346
|
if( $prog =~ /FAST|SSEARCH|SMITH-WATERMAN/i ) { |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
# we infer the end of the regional sequence where the first and last |
1107
|
|
|
|
|
|
|
# non spaces are in the homology string |
1108
|
|
|
|
|
|
|
# then we use the HSP->length to tell us how far to read |
1109
|
|
|
|
|
|
|
# to cut off the end of the sequence |
1110
|
|
|
|
|
|
|
|
1111
|
4
|
|
|
|
|
29
|
my ($start, $rest) = (0,0); |
1112
|
4
|
50
|
|
|
|
100
|
if( $seqString =~ /^(\s+)?(.*?)\s*$/ ) { |
1113
|
4
|
100
|
|
|
|
25
|
($start, $rest) = ($1 ? CORE::length($1) : 0, CORE::length($2)); |
1114
|
|
|
|
|
|
|
} |
1115
|
|
|
|
|
|
|
|
1116
|
4
|
|
|
|
|
15
|
$seqString = substr($seqString, $start, $rest); |
1117
|
4
|
|
|
|
|
10
|
$qseq = substr($qseq, $start, $rest); |
1118
|
4
|
|
|
|
|
14
|
$sseq = substr($sseq, $start, $rest); |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
# commented out 10/29/08 |
1121
|
|
|
|
|
|
|
# removing frameshift symbols doesn't take into account the following: |
1122
|
|
|
|
|
|
|
# 1) does not remove the same point in the homology string (get |
1123
|
|
|
|
|
|
|
# positional errors) |
1124
|
|
|
|
|
|
|
# 2) adjustments to the overall position in the string due to the |
1125
|
|
|
|
|
|
|
# frameshift must be taken into consideration (get balancing errors) |
1126
|
|
|
|
|
|
|
# |
1127
|
|
|
|
|
|
|
# Frameshifts will be handled directly in the main loop. |
1128
|
|
|
|
|
|
|
# --chris |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# fasta reports some extra 'regional' sequence information |
1131
|
|
|
|
|
|
|
# we need to clear out first |
1132
|
|
|
|
|
|
|
# this seemed a bit insane to me at first, but it appears to |
1133
|
|
|
|
|
|
|
# work --jason |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
#$qseq =~ s![\\\/]!!g; |
1136
|
|
|
|
|
|
|
#$sseq =~ s![\\\/]!!g; |
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
|
1139
|
73
|
100
|
66
|
|
|
315
|
if (!defined($self->{_sbjct_offset}) || !defined($self->{_query_offset})) { |
1140
|
2
|
|
|
|
|
11
|
$self->_calculate_seq_offsets(); |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
|
1143
|
73
|
|
|
|
|
149
|
my ($qfs, $sfs) = (0,0); |
1144
|
|
|
|
|
|
|
CHAR_LOOP: |
1145
|
73
|
|
|
|
|
240
|
for my $pos (0..CORE::length($seqString)-1) { |
1146
|
30602
|
|
|
|
|
42446
|
my @qrange = (0 - $qfs)..($self->{_query_offset} - 1); |
1147
|
30602
|
|
|
|
|
33239
|
my @srange = (0 - $sfs)..($self->{_sbjct_offset} - 1); |
1148
|
|
|
|
|
|
|
# $self->debug("QRange:@qrange SRange:@srange\n") if ($qfs || $sfs); |
1149
|
30602
|
|
|
|
|
30831
|
($qfs, $sfs) = (0,0); |
1150
|
30602
|
50
|
100
|
|
|
115853
|
my ($mchar, $qchar, $schar) = ( |
|
|
50
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
unpack("x$pos A1",$seqString) || ' ', |
1152
|
|
|
|
|
|
|
$pos < CORE::length($qseq) ? unpack("x$pos A1",$qseq) : '-', |
1153
|
|
|
|
|
|
|
$pos < CORE::length($sseq) ? unpack("x$pos A1",$sseq) : '-' |
1154
|
|
|
|
|
|
|
); |
1155
|
30602
|
|
|
|
|
34000
|
my $matchtype = ''; |
1156
|
30602
|
|
|
|
|
29410
|
my ($qgap, $sgap) = (0,0); |
1157
|
30602
|
100
|
100
|
|
|
91214
|
if( $mchar eq '+' || $mchar eq '.') { # conserved |
|
|
100
|
100
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1158
|
757
|
|
|
|
|
2521
|
$self->{seqinds}{_conservedRes_query}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange; |
1159
|
757
|
|
|
|
|
3485
|
$self->{seqinds}{_conservedRes_sbjct}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange; |
1160
|
757
|
|
|
|
|
841
|
$matchtype = 'conserved'; |
1161
|
|
|
|
|
|
|
} elsif( $mchar eq ':' || $mchar ne ' ' ) { # identical |
1162
|
25371
|
|
|
|
|
60121
|
$self->{seqinds}{_identicalRes_query}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange; |
1163
|
25371
|
|
|
|
|
48150
|
$self->{seqinds}{_identicalRes_sbjct}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange; |
1164
|
25371
|
|
|
|
|
25370
|
$matchtype = 'identical'; |
1165
|
|
|
|
|
|
|
} elsif( $mchar eq ' ' ) { # possible mismatch/nomatch/frameshift |
1166
|
4474
|
100
|
|
|
|
6045
|
$qfs = $qchar eq '/' ? -1 : # base inserted to match frame |
|
|
50
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
$qchar eq '\\' ? 1 : # base deleted to match frame |
1168
|
|
|
|
|
|
|
0; |
1169
|
4474
|
50
|
|
|
|
5733
|
$sfs = $schar eq '/' ? -1 : |
|
|
50
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
$schar eq '\\' ? 1 : |
1171
|
|
|
|
|
|
|
0; |
1172
|
4474
|
100
|
|
|
|
8572
|
if ($qfs) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
# Frameshifts are tricky. |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
# '/' indicates that the next residue is shifted back one |
1176
|
|
|
|
|
|
|
# (-1) frame position and is a deletion of one base (so to |
1177
|
|
|
|
|
|
|
# correctly align, a base is inserted). That residue should only |
1178
|
|
|
|
|
|
|
# occupy two sequence positions instead of three. |
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
# '\' indicates that the next residue is shifted forward |
1181
|
|
|
|
|
|
|
# one (+1) frame position and is an insertion of one base (so to |
1182
|
|
|
|
|
|
|
# correctly align, a base is removed). That residue should |
1183
|
|
|
|
|
|
|
# occupy four sequence positions instead of three. |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
# Note that gaps are not counted across from frameshifts. |
1186
|
|
|
|
|
|
|
# Frameshift indices are a range of positions starting in the |
1187
|
|
|
|
|
|
|
# previous sequence position in which the frameshift occurs; |
1188
|
|
|
|
|
|
|
# they are ambiguous by nature. |
1189
|
2
|
|
|
|
|
14
|
$self->{seqinds}{_frameshiftRes_query}{ $resCount_query - ($_ * $qdir * $qfs) } = $qfs for @qrange; |
1190
|
2
|
|
|
|
|
5
|
$matchtype = "$qfs frameshift-query"; |
1191
|
2
|
|
|
|
|
3
|
$sgap = $qgap = 1; |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
elsif ($sfs) { |
1194
|
0
|
|
|
|
|
0
|
$self->{seqinds}{_frameshiftRes_sbjct}{ $resCount_sbjct - ($_ * $sdir * $sfs) } = $sfs for @srange; |
1195
|
0
|
|
|
|
|
0
|
$matchtype = "$sfs frameshift-sbcjt"; |
1196
|
0
|
|
|
|
|
0
|
$sgap = $qgap = 1; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
elsif ($qchar eq $self->{GAP_SYMBOL}) { |
1199
|
|
|
|
|
|
|
# gap are counted as being in the immediately preceeding residue |
1200
|
|
|
|
|
|
|
# position; for translated sequences this is not the start of |
1201
|
|
|
|
|
|
|
# the previous codon, but the third codon position |
1202
|
512
|
|
|
|
|
1034
|
$self->{seqinds}{_gapRes_query}{ $resCount_query - $qdir }++ for @qrange; |
1203
|
512
|
|
|
|
|
635
|
$matchtype = 'gap-query'; |
1204
|
512
|
|
|
|
|
467
|
$qgap++; |
1205
|
|
|
|
|
|
|
} |
1206
|
|
|
|
|
|
|
elsif ($schar eq $self->{GAP_SYMBOL}) { |
1207
|
491
|
|
|
|
|
973
|
$self->{seqinds}{_gapRes_sbjct}{ $resCount_sbjct - $sdir }++ for @srange; |
1208
|
491
|
|
|
|
|
516
|
$matchtype = 'gap-sbjct'; |
1209
|
491
|
|
|
|
|
467
|
$sgap++; |
1210
|
|
|
|
|
|
|
} |
1211
|
|
|
|
|
|
|
else { |
1212
|
|
|
|
|
|
|
# if not a gap or frameshift in either seq, must be mismatch |
1213
|
3469
|
|
|
|
|
9023
|
$self->{seqinds}{_mismatchRes_query}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange; |
1214
|
3469
|
|
|
|
|
9528
|
$self->{seqinds}{_mismatchRes_sbjct}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange; |
1215
|
3469
|
|
|
|
|
3663
|
$matchtype = 'mismatch'; |
1216
|
|
|
|
|
|
|
} |
1217
|
|
|
|
|
|
|
# always add a nomatch unless the current position in the seq is a gap |
1218
|
4474
|
100
|
|
|
|
5654
|
if (!$sgap) { |
1219
|
3981
|
|
|
|
|
10297
|
$self->{seqinds}{_nomatchRes_sbjct}{ $resCount_sbjct + ($_ * $sdir) } = 1 for @srange; |
1220
|
|
|
|
|
|
|
} |
1221
|
4474
|
100
|
|
|
|
5580
|
if (!$qgap) { |
1222
|
3960
|
|
|
|
|
7749
|
$self->{seqinds}{_nomatchRes_query}{ $resCount_query + ($_ * $qdir) } = 1 for @qrange; |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
} else { |
1225
|
0
|
|
|
|
|
0
|
$self->warn("Unknown midline character: [$mchar]"); |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
# leave in and uncomment for future debugging |
1228
|
|
|
|
|
|
|
#$self->debug(sprintf("%7d %1s[%1s]%1s %-7d Type: %-20s QOff:%-2d SOff:%-2d\n", |
1229
|
|
|
|
|
|
|
# $resCount_query, |
1230
|
|
|
|
|
|
|
# $qchar, |
1231
|
|
|
|
|
|
|
# $mchar, |
1232
|
|
|
|
|
|
|
# $schar, |
1233
|
|
|
|
|
|
|
# $resCount_sbjct, |
1234
|
|
|
|
|
|
|
# $matchtype, |
1235
|
|
|
|
|
|
|
# ($self->{_query_offset} * $qdir), |
1236
|
|
|
|
|
|
|
# ($self->{_sbjct_offset} * $sdir))); |
1237
|
30602
|
100
|
|
|
|
41328
|
$resCount_query += ($qdir * (scalar(@qrange) + $qfs)) if !$qgap; |
1238
|
30602
|
100
|
|
|
|
50956
|
$resCount_sbjct += ($sdir * (scalar(@srange) + $sfs)) if !$sgap; |
1239
|
|
|
|
|
|
|
} |
1240
|
73
|
|
|
|
|
169
|
return 1; |
1241
|
|
|
|
|
|
|
} |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
sub _calculate_seq_offsets { |
1244
|
107
|
|
|
107
|
|
136
|
my $self = shift; |
1245
|
107
|
|
|
|
|
230
|
my $prog = $self->algorithm; |
1246
|
107
|
|
|
|
|
269
|
($self->{_sbjct_offset}, $self->{_query_offset}) = (1,1); |
1247
|
107
|
100
|
|
|
|
560
|
if($prog =~ /^(?:PSI)?T(BLAST|FAST)(N|X|Y)/oi ) { |
|
|
100
|
|
|
|
|
|
1248
|
68
|
|
|
|
|
96
|
$self->{_sbjct_offset} = 3; |
1249
|
68
|
100
|
66
|
|
|
283
|
if ($1 eq 'BLAST' && $2 eq 'X') { #TBLASTX |
1250
|
67
|
|
|
|
|
98
|
$self->{_query_offset} = 3; |
1251
|
|
|
|
|
|
|
} |
1252
|
|
|
|
|
|
|
} elsif($prog =~ /^(BLAST|FAST)(X|Y|XY)/oi ) { |
1253
|
3
|
|
|
|
|
9
|
$self->{_query_offset} = 3; |
1254
|
|
|
|
|
|
|
} |
1255
|
107
|
|
|
|
|
151
|
1; |
1256
|
|
|
|
|
|
|
} |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
=head2 n |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
See documentation in L |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=cut |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
sub n { |
1265
|
113
|
|
|
113
|
1
|
274
|
my $self = shift; |
1266
|
113
|
50
|
|
|
|
207
|
if(@_) { $self->{'N'} = shift; } |
|
0
|
|
|
|
|
0
|
|
1267
|
|
|
|
|
|
|
# note that returning 1 is completely an assumption |
1268
|
113
|
100
|
|
|
|
356
|
defined $self->{'N'} ? $self->{'N'} : 1; |
1269
|
|
|
|
|
|
|
} |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
=head2 range |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
See documentation in L |
1274
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
=cut |
1276
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
sub range { |
1278
|
1659
|
|
|
1659
|
1
|
2485
|
my ($self, $seqType) = @_; |
1279
|
|
|
|
|
|
|
|
1280
|
1659
|
|
100
|
|
|
2371
|
$seqType ||= 'query'; |
1281
|
1659
|
100
|
|
|
|
2479
|
$seqType = 'sbjct' if $seqType eq 'hit'; |
1282
|
|
|
|
|
|
|
|
1283
|
1659
|
|
|
|
|
1752
|
my ($start, $end); |
1284
|
1659
|
100
|
|
|
|
2541
|
if( $seqType eq 'query' ) { |
1285
|
840
|
|
|
|
|
1516
|
$start = $self->query->start; |
1286
|
840
|
|
|
|
|
1376
|
$end = $self->query->end; |
1287
|
|
|
|
|
|
|
} |
1288
|
|
|
|
|
|
|
else { |
1289
|
819
|
|
|
|
|
1479
|
$start = $self->hit->start; |
1290
|
819
|
|
|
|
|
1450
|
$end = $self->hit->end; |
1291
|
|
|
|
|
|
|
} |
1292
|
1659
|
|
|
|
|
4819
|
return ($start, $end); |
1293
|
|
|
|
|
|
|
} |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
=head2 links |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
Title : links |
1299
|
|
|
|
|
|
|
Usage : $obj->links($newval) |
1300
|
|
|
|
|
|
|
Function: Get/Set the Links value (from WU-BLAST) |
1301
|
|
|
|
|
|
|
Indicates the placement of the alignment in the group of HSPs |
1302
|
|
|
|
|
|
|
Returns : Value of links |
1303
|
|
|
|
|
|
|
Args : On set, new value (a scalar or undef, optional) |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
=cut |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
sub links{ |
1309
|
54
|
|
|
54
|
1
|
94
|
my $self = shift; |
1310
|
|
|
|
|
|
|
|
1311
|
54
|
50
|
|
|
|
119
|
return $self->{LINKS} = shift if @_; |
1312
|
54
|
|
|
|
|
176
|
return $self->{LINKS}; |
1313
|
|
|
|
|
|
|
} |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
=head2 hsp_group |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
Title : hsp_group |
1318
|
|
|
|
|
|
|
Usage : $obj->hsp_group($newval) |
1319
|
|
|
|
|
|
|
Function: Get/Set the Group value (from WU-BLAST) |
1320
|
|
|
|
|
|
|
Indicates a grouping of HSPs |
1321
|
|
|
|
|
|
|
Returns : Value of group |
1322
|
|
|
|
|
|
|
Args : On set, new value (a scalar or undef, optional) |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
=cut |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
sub hsp_group { |
1328
|
12
|
|
|
12
|
1
|
29
|
my $self = shift; |
1329
|
|
|
|
|
|
|
|
1330
|
12
|
50
|
|
|
|
41
|
return $self->{HSP_GROUP} = shift if @_; |
1331
|
12
|
|
|
|
|
53
|
return $self->{HSP_GROUP}; |
1332
|
|
|
|
|
|
|
} |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
=head2 hit_features |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
Title : hit_features |
1337
|
|
|
|
|
|
|
Usage : $obj->hit_features($newval) |
1338
|
|
|
|
|
|
|
Function: Get/Set the HSP hit feature string (from some BLAST 2.2.13 text |
1339
|
|
|
|
|
|
|
output), which is a string of overlapping or nearby features in HSP |
1340
|
|
|
|
|
|
|
hit |
1341
|
|
|
|
|
|
|
Returns : Value of hit features, if present |
1342
|
|
|
|
|
|
|
Args : On set, new value (a scalar or undef, optional) |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
=cut |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
sub hit_features { |
1348
|
6
|
|
|
6
|
1
|
35
|
my $self = shift; |
1349
|
|
|
|
|
|
|
|
1350
|
6
|
50
|
|
|
|
19
|
return $self->{HIT_FEATURES} = shift if @_; |
1351
|
6
|
|
|
|
|
27
|
return $self->{HIT_FEATURES}; |
1352
|
|
|
|
|
|
|
} |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
# The cigar string code is written by Juguang Xiao |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=head1 Brief introduction on cigar string |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
NOTE: the concept is originally from EnsEMBL docs at |
1359
|
|
|
|
|
|
|
http://may2005.archive.ensembl.org/Docs/wiki/html/EnsemblDocs/CigarFormat.html |
1360
|
|
|
|
|
|
|
Please append to these docs if you have a better definition. |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
Sequence alignment hits can be stored in a database as ungapped alignments. |
1363
|
|
|
|
|
|
|
This imposes 2 major constraints on alignments: |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
a) alignments for a single hit record require multiple rows in the database, |
1366
|
|
|
|
|
|
|
and |
1367
|
|
|
|
|
|
|
b) it is not possible to accurately retrieve the exact original alignment. |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
Alternatively, sequence alignments can be stored as gapped alignments using |
1370
|
|
|
|
|
|
|
the CIGAR line format (where CIGAR stands for Concise Idiosyncratic Gapped |
1371
|
|
|
|
|
|
|
Alignment Report). |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
In the cigar line format alignments are stored as follows: |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
M: Match |
1376
|
|
|
|
|
|
|
D: Deletion |
1377
|
|
|
|
|
|
|
I: Insertion |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
An example of an alignment for a hypthetical protein match is shown below: |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
Query: 42 PGPAGLP----GSVGLQGPRGLRGPLP-GPLGPPL... |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
PG P G GP R PLGP |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
Sbjct: 1672 PGTP*TPLVPLGPWVPLGPSSPR--LPSGPLGPTD... |
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
protein_align_feature table as the following cigar line: |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
7M4D12M2I2MD7M |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
=head2 cigar_string |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
Name: cigar_string |
1396
|
|
|
|
|
|
|
Usage: $cigar_string = $hsp->cigar_string |
1397
|
|
|
|
|
|
|
Function: Generate and return cigar string for this HSP alignment |
1398
|
|
|
|
|
|
|
Args: No input needed |
1399
|
|
|
|
|
|
|
Return: a cigar string |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
=cut |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
sub cigar_string { |
1405
|
8
|
|
|
8
|
1
|
24
|
my ($self, $arg) = @_; |
1406
|
8
|
50
|
|
|
|
17
|
$self->warn("this is not a setter") if(defined $arg); |
1407
|
|
|
|
|
|
|
|
1408
|
8
|
100
|
|
|
|
16
|
unless(defined $self->{_cigar_string}){ # generate cigar string |
1409
|
7
|
|
|
|
|
22
|
my $cigar_string = $self->generate_cigar_string($self->query_string, $self->hit_string); |
1410
|
7
|
|
|
|
|
14
|
$self->{_cigar_string} = $cigar_string; |
1411
|
|
|
|
|
|
|
} # end of unless |
1412
|
|
|
|
|
|
|
|
1413
|
8
|
|
|
|
|
29
|
return $self->{_cigar_string}; |
1414
|
|
|
|
|
|
|
} |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
=head2 generate_cigar_string |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
Name: generate_cigar_string |
1419
|
|
|
|
|
|
|
Usage: my $cigar_string = Bio::Search::HSP::GenericHSP::generate_cigar_string ($qstr, $hstr); |
1420
|
|
|
|
|
|
|
Function: generate cigar string from a simple sequence of alignment. |
1421
|
|
|
|
|
|
|
Args: the string of query and subject |
1422
|
|
|
|
|
|
|
Return: cigar string |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
=cut |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
sub generate_cigar_string { |
1427
|
7
|
|
|
7
|
1
|
13
|
my ($self, $qstr, $hstr) = @_; |
1428
|
7
|
|
|
|
|
211
|
my @qchars = split //, $qstr; |
1429
|
7
|
|
|
|
|
193
|
my @hchars = split //, $hstr; |
1430
|
|
|
|
|
|
|
|
1431
|
7
|
50
|
|
|
|
16
|
unless(scalar(@qchars) == scalar(@hchars)){ |
1432
|
0
|
|
|
|
|
0
|
$self->throw("two sequences are not equal in lengths"); |
1433
|
|
|
|
|
|
|
} |
1434
|
|
|
|
|
|
|
|
1435
|
7
|
|
|
|
|
15
|
$self->{_count_for_cigar_string} = 0; |
1436
|
7
|
|
|
|
|
10
|
$self->{_state_for_cigar_string} = 'M'; |
1437
|
|
|
|
|
|
|
|
1438
|
7
|
|
|
|
|
9
|
my $cigar_string = ''; |
1439
|
7
|
|
|
|
|
17
|
for(my $i=0; $i <= $#qchars; $i++){ |
1440
|
1917
|
|
|
|
|
1984
|
my $qchar = $qchars[$i]; |
1441
|
1917
|
|
|
|
|
1658
|
my $hchar = $hchars[$i]; |
1442
|
1917
|
100
|
100
|
|
|
3961
|
if($qchar ne $self->{GAP_SYMBOL} && $hchar ne $self->{GAP_SYMBOL}){ # Match |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1443
|
1837
|
|
|
|
|
2096
|
$cigar_string .= $self->_sub_cigar_string('M'); |
1444
|
|
|
|
|
|
|
}elsif($qchar eq $self->{GAP_SYMBOL}){ # Deletion |
1445
|
13
|
|
|
|
|
22
|
$cigar_string .= $self->_sub_cigar_string('D'); |
1446
|
|
|
|
|
|
|
}elsif($hchar eq $self->{GAP_SYMBOL}){ # Insertion |
1447
|
67
|
|
|
|
|
85
|
$cigar_string .= $self->_sub_cigar_string('I'); |
1448
|
|
|
|
|
|
|
}else{ |
1449
|
0
|
|
|
|
|
0
|
$self->throw("Impossible state that 2 gaps on each seq aligned"); |
1450
|
|
|
|
|
|
|
} |
1451
|
|
|
|
|
|
|
} |
1452
|
7
|
|
|
|
|
12
|
$cigar_string .= $self->_sub_cigar_string('X'); # not forget the tail. |
1453
|
7
|
|
|
|
|
159
|
return $cigar_string; |
1454
|
|
|
|
|
|
|
} |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
# an internal method to help generate cigar string |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
sub _sub_cigar_string { |
1459
|
1924
|
|
|
1924
|
|
2043
|
my ($self, $new_state) = @_; |
1460
|
|
|
|
|
|
|
|
1461
|
1924
|
|
|
|
|
1681
|
my $sub_cigar_string = ''; |
1462
|
1924
|
100
|
|
|
|
2143
|
if($self->{_state_for_cigar_string} eq $new_state){ |
1463
|
1835
|
|
|
|
|
1648
|
$self->{_count_for_cigar_string} += 1; # Remain the state and increase the counter |
1464
|
|
|
|
|
|
|
}else{ |
1465
|
|
|
|
|
|
|
$sub_cigar_string .= $self->{_count_for_cigar_string} |
1466
|
89
|
100
|
|
|
|
131
|
unless $self->{_count_for_cigar_string} == 1; |
1467
|
89
|
|
|
|
|
87
|
$sub_cigar_string .= $self->{_state_for_cigar_string}; |
1468
|
89
|
|
|
|
|
82
|
$self->{_count_for_cigar_string} = 1; |
1469
|
89
|
|
|
|
|
88
|
$self->{_state_for_cigar_string} = $new_state; |
1470
|
|
|
|
|
|
|
} |
1471
|
1924
|
|
|
|
|
3644
|
return $sub_cigar_string; |
1472
|
|
|
|
|
|
|
} |
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
# needed before seqfeatures can be made |
1475
|
|
|
|
|
|
|
sub _pre_seq_feature { |
1476
|
1177
|
|
|
1177
|
|
1410
|
my $self = shift; |
1477
|
1177
|
|
|
|
|
1890
|
my $algo = $self->{ALGORITHM}; |
1478
|
|
|
|
|
|
|
|
1479
|
1177
|
|
|
|
|
1912
|
my ($queryfactor, $hitfactor) = (0,0); |
1480
|
1177
|
|
|
|
|
1790
|
my ($hitmap, $querymap) = (1,1); |
1481
|
1177
|
100
|
100
|
|
|
12547
|
if( $algo =~ /^(?:PSI)?T(?:BLAST|FAST|SW)[NY]/oi ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1482
|
7
|
|
|
|
|
14
|
$hitfactor = 1; |
1483
|
7
|
|
|
|
|
11
|
$hitmap = 3; |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
elsif ($algo =~ /^(?:FAST|BLAST)(?:X|Y|XY)/oi || $algo =~ /^P?GENEWISE/oi ) { |
1486
|
25
|
|
|
|
|
40
|
$queryfactor = 1; |
1487
|
25
|
|
|
|
|
30
|
$querymap = 3; |
1488
|
|
|
|
|
|
|
} |
1489
|
|
|
|
|
|
|
elsif ($algo =~ /^T(BLAST|FAST|SW)(X|Y|XY)/oi || $algo =~ /^(BLAST|FAST|SW)N/oi || $algo =~ /^WABA|AXT|BLAT|BLASTZ|PSL|MEGABLAST|EXONERATE|SW|SSEARCH|SMITH\-WATERMAN|SIM4$/){ |
1490
|
286
|
100
|
|
|
|
893
|
if ($2) { |
1491
|
126
|
|
|
|
|
185
|
$hitmap = $querymap = 3; |
1492
|
|
|
|
|
|
|
} |
1493
|
286
|
|
|
|
|
366
|
$hitfactor = 1; |
1494
|
286
|
|
|
|
|
337
|
$queryfactor = 1; |
1495
|
|
|
|
|
|
|
} |
1496
|
|
|
|
|
|
|
elsif ($algo =~ /^RPS-BLAST/) { |
1497
|
1
|
50
|
|
|
|
4
|
if ($algo =~ /^RPS-BLAST\(BLASTX\)/) { |
1498
|
0
|
|
|
|
|
0
|
$queryfactor = 1; |
1499
|
0
|
|
|
|
|
0
|
$querymap = 3; |
1500
|
|
|
|
|
|
|
} |
1501
|
1
|
|
|
|
|
1
|
$hitfactor = 0; |
1502
|
|
|
|
|
|
|
} |
1503
|
|
|
|
|
|
|
else { |
1504
|
858
|
|
|
|
|
2404
|
my $stranded = lc substr($self->{STRANDED}, 0,1); |
1505
|
858
|
100
|
66
|
|
|
2383
|
$queryfactor = ($stranded eq 'q' || $stranded eq 'b') ? 1 : 0; |
1506
|
858
|
100
|
100
|
|
|
3263
|
$hitfactor = ($stranded eq 'h' || $stranded eq 's' || $stranded eq 'b') ? 1 : 0; |
1507
|
|
|
|
|
|
|
} |
1508
|
1177
|
|
|
|
|
1908
|
$self->{_query_factor} = $queryfactor; |
1509
|
1177
|
|
|
|
|
1698
|
$self->{_hit_factor} = $hitfactor; |
1510
|
1177
|
|
|
|
|
1667
|
$self->{_hit_mapping} = $hitmap; |
1511
|
1177
|
|
|
|
|
2082
|
$self->{_query_mapping} = $querymap; |
1512
|
|
|
|
|
|
|
} |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
# make query seq feature |
1515
|
|
|
|
|
|
|
sub _query_seq_feature { |
1516
|
1176
|
|
|
1176
|
|
1472
|
my $self = shift; |
1517
|
1176
|
|
|
|
|
1959
|
$self->{_making_qff} = 1; |
1518
|
1176
|
|
|
|
|
1968
|
my $qs = $self->{QUERY_START}; |
1519
|
1176
|
|
|
|
|
1736
|
my $qe = $self->{QUERY_END}; |
1520
|
1176
|
100
|
|
|
|
2326
|
unless (defined $self->{_query_factor}) { |
1521
|
1133
|
|
|
|
|
2221
|
$self->_pre_seq_feature; |
1522
|
|
|
|
|
|
|
} |
1523
|
1176
|
|
|
|
|
1658
|
my $queryfactor = $self->{_query_factor}; |
1524
|
|
|
|
|
|
|
|
1525
|
1176
|
50
|
33
|
|
|
3483
|
unless( defined $qe && defined $qs ) { $self->throw("Did not specify a Query End or Query Begin"); } |
|
0
|
|
|
|
|
0
|
|
1526
|
|
|
|
|
|
|
|
1527
|
1176
|
|
|
|
|
1353
|
my $strand; |
1528
|
1176
|
100
|
|
|
|
2051
|
if ($qe > $qs) { # normal query: start < end |
1529
|
1106
|
100
|
|
|
|
1448
|
if ($queryfactor) { |
1530
|
843
|
|
|
|
|
1158
|
$strand = 1; |
1531
|
|
|
|
|
|
|
} |
1532
|
|
|
|
|
|
|
else { |
1533
|
263
|
|
|
|
|
353
|
$strand = undef; |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
} |
1536
|
|
|
|
|
|
|
else { |
1537
|
70
|
50
|
|
|
|
145
|
if ($queryfactor) { |
1538
|
70
|
|
|
|
|
114
|
$strand = -1; |
1539
|
|
|
|
|
|
|
} |
1540
|
|
|
|
|
|
|
else { |
1541
|
0
|
|
|
|
|
0
|
$strand = undef; |
1542
|
|
|
|
|
|
|
} |
1543
|
70
|
|
|
|
|
197
|
($qs,$qe) = ($qe,$qs); |
1544
|
|
|
|
|
|
|
} |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
# Note: many of these data are not query- and hit-specific. |
1547
|
|
|
|
|
|
|
# Only start, end, name, length are. |
1548
|
|
|
|
|
|
|
# We could be more efficient by only storing this info once. |
1549
|
|
|
|
|
|
|
# steve chervitz --- Sat Apr 5 00:55:07 2003 |
1550
|
|
|
|
|
|
|
|
1551
|
1176
|
|
33
|
|
|
2586
|
my $sim1 = $self->{_sim1} || Bio::SeqFeature::Similarity->new(); |
1552
|
1176
|
|
|
|
|
3217
|
$sim1->start($qs); |
1553
|
1176
|
|
|
|
|
2758
|
$sim1->end($qe); |
1554
|
1176
|
|
|
|
|
4164
|
$sim1->significance($self->{EVALUE}); |
1555
|
1176
|
|
|
|
|
3807
|
$sim1->bits($self->{BITS}); |
1556
|
1176
|
|
|
|
|
3374
|
$sim1->score($self->{SCORE}); |
1557
|
1176
|
|
|
|
|
2666
|
$sim1->strand($strand); |
1558
|
1176
|
|
|
|
|
3475
|
$sim1->seq_id($self->{QUERY_NAME}); |
1559
|
1176
|
|
|
|
|
3231
|
$sim1->seqlength($self->{QUERY_LENGTH}); |
1560
|
1176
|
|
|
|
|
2965
|
$sim1->source_tag($self->{ALGORITHM}); |
1561
|
1176
|
|
|
|
|
3425
|
$sim1->seqdesc($self->{QUERY_DESC}); |
1562
|
1176
|
100
|
|
|
|
4698
|
$sim1->add_tag_value('meta', $self->{META}) if $self->can('meta'); |
1563
|
|
|
|
|
|
|
# to determine frame from something like FASTXY which doesn't |
1564
|
|
|
|
|
|
|
# report the frame |
1565
|
1176
|
|
|
|
|
1797
|
my $qframe = $self->{QUERY_FRAME}; |
1566
|
|
|
|
|
|
|
|
1567
|
1176
|
100
|
100
|
|
|
4828
|
if (defined $strand && !defined $qframe && $queryfactor) { |
|
|
100
|
66
|
|
|
|
|
1568
|
603
|
|
|
|
|
994
|
$qframe = ( $qs % 3 ) * $strand; |
1569
|
|
|
|
|
|
|
} |
1570
|
|
|
|
|
|
|
elsif (!defined $strand) { |
1571
|
263
|
|
|
|
|
354
|
$qframe = 0; |
1572
|
|
|
|
|
|
|
} |
1573
|
|
|
|
|
|
|
|
1574
|
1176
|
50
|
|
|
|
4908
|
if( $qframe =~ /^([+-])?([0-3])/ ) { |
1575
|
1176
|
|
100
|
|
|
4319
|
my $dir = $1 || '+'; |
1576
|
1176
|
50
|
33
|
|
|
4285
|
if($qframe && (($dir eq '-' && $strand >= 0) || ($dir eq '+' && $strand <= 0)) ) { |
|
|
|
66
|
|
|
|
|
1577
|
0
|
|
|
|
|
0
|
$self->warn("Query frame ($qframe) did not match strand of query ($strand)"); |
1578
|
|
|
|
|
|
|
} |
1579
|
1176
|
100
|
|
|
|
3583
|
$qframe = $2 != 0 ? $2 - 1 : $2; |
1580
|
|
|
|
|
|
|
} else { |
1581
|
0
|
|
|
|
|
0
|
$self->warn("Unknown query frame ($qframe)"); |
1582
|
0
|
|
|
|
|
0
|
$qframe = 0; |
1583
|
|
|
|
|
|
|
} |
1584
|
|
|
|
|
|
|
|
1585
|
1176
|
|
|
|
|
3270
|
$sim1->frame($qframe); |
1586
|
1176
|
|
|
|
|
3290
|
$self->SUPER::feature1($sim1); |
1587
|
|
|
|
|
|
|
|
1588
|
1176
|
|
|
|
|
1859
|
$self->{_created_qff} = 1; |
1589
|
1176
|
|
|
|
|
1972
|
$self->{_making_qff} = 0; |
1590
|
|
|
|
|
|
|
} |
1591
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
# make subject seq feature |
1593
|
|
|
|
|
|
|
sub _subject_seq_feature { |
1594
|
562
|
|
|
562
|
|
737
|
my $self = shift; |
1595
|
562
|
|
|
|
|
1041
|
$self->{_making_sff} = 1; |
1596
|
562
|
|
|
|
|
890
|
my $hs = $self->{HIT_START}; |
1597
|
562
|
|
|
|
|
847
|
my $he = $self->{HIT_END}; |
1598
|
562
|
100
|
|
|
|
1210
|
unless (defined $self->{_hit_factor}) { |
1599
|
44
|
|
|
|
|
108
|
$self->_pre_seq_feature; |
1600
|
|
|
|
|
|
|
} |
1601
|
562
|
|
|
|
|
721
|
my $hitfactor = $self->{_hit_factor}; |
1602
|
|
|
|
|
|
|
|
1603
|
562
|
50
|
33
|
|
|
1684
|
unless( defined $he && defined $hs ) { $self->throw("Did not specify a Hit End or Hit Begin"); } |
|
0
|
|
|
|
|
0
|
|
1604
|
|
|
|
|
|
|
|
1605
|
562
|
|
|
|
|
701
|
my $strand; |
1606
|
562
|
100
|
|
|
|
1271
|
if ($he > $hs) { # normal subject |
1607
|
455
|
100
|
|
|
|
817
|
if ($hitfactor) { |
1608
|
199
|
|
|
|
|
305
|
$strand = 1; |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
else { |
1611
|
256
|
|
|
|
|
336
|
$strand = undef; |
1612
|
|
|
|
|
|
|
} |
1613
|
|
|
|
|
|
|
} |
1614
|
|
|
|
|
|
|
else { |
1615
|
107
|
100
|
|
|
|
242
|
if ($hitfactor) { |
1616
|
106
|
|
|
|
|
157
|
$strand = -1; |
1617
|
|
|
|
|
|
|
} |
1618
|
|
|
|
|
|
|
else { |
1619
|
1
|
|
|
|
|
2
|
$strand = undef; |
1620
|
|
|
|
|
|
|
} |
1621
|
107
|
|
|
|
|
262
|
($hs,$he) = ( $he,$hs); # reverse subject: start bigger than end |
1622
|
|
|
|
|
|
|
} |
1623
|
|
|
|
|
|
|
|
1624
|
562
|
|
33
|
|
|
2182
|
my $sim2 = $self->{_sim2} || Bio::SeqFeature::Similarity->new(); |
1625
|
562
|
|
|
|
|
1326
|
$sim2->start($hs); |
1626
|
562
|
|
|
|
|
1292
|
$sim2->end($he); |
1627
|
562
|
|
|
|
|
1585
|
$sim2->significance($self->{EVALUE}); |
1628
|
562
|
|
|
|
|
1584
|
$sim2->bits($self->{BITS}); |
1629
|
562
|
|
|
|
|
1527
|
$sim2->score($self->{SCORE}); |
1630
|
562
|
|
|
|
|
1276
|
$sim2->strand($strand); |
1631
|
562
|
|
|
|
|
1519
|
$sim2->seq_id($self->{HIT_NAME}); |
1632
|
562
|
|
|
|
|
1391
|
$sim2->seqlength($self->{HIT_LENGTH}); |
1633
|
562
|
|
|
|
|
1464
|
$sim2->source_tag($self->{ALGORITHM}); |
1634
|
562
|
|
|
|
|
1242
|
$sim2->seqdesc($self->{HIT_DESC}); |
1635
|
562
|
100
|
|
|
|
1917
|
$sim2->add_tag_value('meta', $self->{META}) if $self->can('meta'); |
1636
|
562
|
|
|
|
|
985
|
my $hframe = $self->{HIT_FRAME}; |
1637
|
|
|
|
|
|
|
|
1638
|
562
|
100
|
100
|
|
|
2216
|
if (defined $strand && !defined $hframe && $hitfactor) { |
|
|
100
|
66
|
|
|
|
|
1639
|
3
|
|
|
|
|
11
|
$hframe = ( $hs % 3 ) * $strand; |
1640
|
|
|
|
|
|
|
} |
1641
|
|
|
|
|
|
|
elsif (!defined $strand) { |
1642
|
257
|
|
|
|
|
350
|
$hframe = 0; |
1643
|
|
|
|
|
|
|
} |
1644
|
|
|
|
|
|
|
|
1645
|
562
|
50
|
|
|
|
2323
|
if( $hframe =~ /^([+-])?([0-3])/ ) { |
1646
|
562
|
|
100
|
|
|
2132
|
my $dir = $1 || '+'; |
1647
|
562
|
50
|
33
|
|
|
1672
|
if($hframe && (($dir eq '-' && $strand >= 0) || ($dir eq '+' && $strand <= 0)) ) { |
|
|
|
66
|
|
|
|
|
1648
|
0
|
|
|
|
|
0
|
$self->warn("Subject frame ($hframe) did not match strand of subject ($strand)"); |
1649
|
|
|
|
|
|
|
} |
1650
|
562
|
100
|
|
|
|
1873
|
$hframe = $2 != 0 ? $2 - 1 : $2; |
1651
|
|
|
|
|
|
|
} else { |
1652
|
0
|
|
|
|
|
0
|
$self->warn("Unknown subject frame ($hframe)"); |
1653
|
0
|
|
|
|
|
0
|
$hframe = 0; |
1654
|
|
|
|
|
|
|
} |
1655
|
|
|
|
|
|
|
|
1656
|
562
|
|
|
|
|
1635
|
$sim2->frame($hframe); |
1657
|
562
|
|
|
|
|
1805
|
$self->SUPER::feature2($sim2); |
1658
|
|
|
|
|
|
|
|
1659
|
562
|
|
|
|
|
866
|
$self->{_created_sff} = 1; |
1660
|
562
|
|
|
|
|
978
|
$self->{_making_sff} = 0; |
1661
|
|
|
|
|
|
|
} |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
# before calling the num_* methods |
1664
|
|
|
|
|
|
|
sub _pre_similar_stats { |
1665
|
438
|
|
|
438
|
|
556
|
my $self = shift; |
1666
|
438
|
|
|
|
|
721
|
my $identical = $self->{IDENTICAL}; |
1667
|
438
|
|
|
|
|
596
|
my $conserved = $self->{CONSERVED}; |
1668
|
438
|
|
|
|
|
566
|
my $percent_id = $self->{PERCENT_IDENTITY}; |
1669
|
|
|
|
|
|
|
|
1670
|
438
|
50
|
|
|
|
709
|
if (! defined $identical) { |
1671
|
0
|
0
|
|
|
|
0
|
if (! defined $percent_id) { |
1672
|
0
|
|
|
|
|
0
|
$self->warn("Did not defined the number of identical matches or overall percent identity in the HSP; assuming 0"); |
1673
|
0
|
|
|
|
|
0
|
$identical = 0; |
1674
|
|
|
|
|
|
|
} |
1675
|
|
|
|
|
|
|
else { |
1676
|
0
|
|
|
|
|
0
|
$identical = sprintf("%.0f",$percent_id * $self->{HSP_LENGTH}); |
1677
|
|
|
|
|
|
|
} |
1678
|
|
|
|
|
|
|
} |
1679
|
|
|
|
|
|
|
|
1680
|
438
|
50
|
|
|
|
684
|
if (! defined $conserved) { |
1681
|
|
|
|
|
|
|
$self->warn("Did not define the number of conserved matches in the HSP; assuming conserved == identical ($identical)") |
1682
|
0
|
0
|
|
|
|
0
|
if( $self->{ALGORITHM} !~ /^((FAST|BLAST)N)|EXONERATE|SIM4|AXT|PSL|BLAT|BLASTZ|WABA/oi); |
1683
|
0
|
|
|
|
|
0
|
$conserved = $identical; |
1684
|
|
|
|
|
|
|
} |
1685
|
438
|
|
|
|
|
645
|
$self->{IDENTICAL} = $identical; |
1686
|
438
|
|
|
|
|
597
|
$self->{CONSERVED} = $conserved; |
1687
|
438
|
|
|
|
|
751
|
$self->{_did_presimilar} = 1; |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
# before calling the frac_* methods |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
sub _pre_frac { |
1693
|
105
|
|
|
105
|
|
135
|
my $self = shift; |
1694
|
|
|
|
|
|
|
|
1695
|
105
|
|
|
|
|
159
|
my $hsp_len = $self->{HSP_LENGTH}; |
1696
|
105
|
|
|
|
|
165
|
my $hit_len = $self->{HIT_LENGTH}; |
1697
|
105
|
|
|
|
|
148
|
my $query_len = $self->{QUERY_LENGTH}; |
1698
|
|
|
|
|
|
|
|
1699
|
105
|
|
|
|
|
241
|
my $identical = $self->num_identical; |
1700
|
105
|
|
|
|
|
236
|
my $conserved = $self->num_conserved; |
1701
|
|
|
|
|
|
|
|
1702
|
105
|
|
|
|
|
186
|
$self->{_did_prefrac} = 1; |
1703
|
105
|
|
|
|
|
122
|
my $logical; |
1704
|
105
|
50
|
|
|
|
209
|
if( $hsp_len ) { |
1705
|
105
|
|
|
|
|
247
|
$self->length('total', $hsp_len); |
1706
|
105
|
|
|
|
|
247
|
$logical = $self->_logical_length('total'); |
1707
|
105
|
|
|
|
|
342
|
$self->frac_identical( 'total', $identical / $hsp_len); |
1708
|
105
|
|
|
|
|
306
|
$self->frac_conserved( 'total', $conserved / $hsp_len); |
1709
|
|
|
|
|
|
|
} |
1710
|
105
|
100
|
|
|
|
194
|
if( $hit_len ) { |
1711
|
104
|
|
|
|
|
198
|
$logical = $self->_logical_length('hit'); |
1712
|
104
|
|
|
|
|
336
|
$self->frac_identical( 'hit', $identical / $logical); |
1713
|
104
|
|
|
|
|
276
|
$self->frac_conserved( 'hit', $conserved / $logical); |
1714
|
|
|
|
|
|
|
} |
1715
|
105
|
100
|
|
|
|
202
|
if( $query_len ) { |
1716
|
104
|
|
|
|
|
209
|
$logical = $self->_logical_length('query'); |
1717
|
104
|
|
|
|
|
322
|
$self->frac_identical( 'query', $identical / $logical) ; |
1718
|
104
|
|
|
|
|
228
|
$self->frac_conserved( 'query', $conserved / $logical); |
1719
|
|
|
|
|
|
|
} |
1720
|
|
|
|
|
|
|
} |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
# before calling gaps() |
1723
|
|
|
|
|
|
|
# This relies first on passed parameters (parser-dependent), then on gaps |
1724
|
|
|
|
|
|
|
# calculated by seq_inds() (if set), then falls back to directly checking |
1725
|
|
|
|
|
|
|
# for '-' or '.' as a last resort |
1726
|
|
|
|
|
|
|
|
1727
|
|
|
|
|
|
|
sub _pre_gaps { |
1728
|
378
|
|
|
378
|
|
533
|
my $self = shift; |
1729
|
378
|
|
|
|
|
561
|
my $query_gaps = $self->{QUERY_GAPS}; |
1730
|
378
|
|
|
|
|
599
|
my $query_seq = $self->{QUERY_SEQ}; |
1731
|
378
|
|
|
|
|
460
|
my $hit_gaps = $self->{HIT_GAPS}; |
1732
|
378
|
|
|
|
|
560
|
my $hit_seq = $self->{HIT_SEQ}; |
1733
|
378
|
|
|
|
|
480
|
my $gaps = $self->{HSP_GAPS}; |
1734
|
|
|
|
|
|
|
|
1735
|
378
|
|
|
|
|
578
|
$self->{_did_pregaps} = 1; # well, we're in the process; avoid recursion |
1736
|
378
|
50
|
|
|
|
940
|
if( defined $query_gaps ) { |
|
|
100
|
|
|
|
|
|
1737
|
0
|
|
|
|
|
0
|
$self->gaps('query', $query_gaps); |
1738
|
|
|
|
|
|
|
} elsif( defined $query_seq ) { |
1739
|
363
|
50
|
|
|
|
982
|
my $qg = (defined $self->{'_query_offset'}) ? $self->seq_inds('query','gaps') |
|
|
100
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
: ($self->algorithm eq 'ERPIN') ? scalar( $hit_seq =~ tr/\-//) |
1741
|
|
|
|
|
|
|
: scalar( $query_seq =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-' |
1742
|
363
|
|
100
|
|
|
955
|
my $offset = $self->{'_query_offset'} || 1; |
1743
|
363
|
|
|
|
|
1011
|
$self->gaps('query', $qg/$offset); |
1744
|
|
|
|
|
|
|
} |
1745
|
378
|
50
|
|
|
|
953
|
if( defined $hit_gaps ) { |
|
|
100
|
|
|
|
|
|
1746
|
0
|
|
|
|
|
0
|
$self->gaps('hit', $hit_gaps); |
1747
|
|
|
|
|
|
|
} elsif( defined $hit_seq ) { |
1748
|
358
|
100
|
|
|
|
952
|
my $hg = (defined $self->{'_sbjct_offset'}) ? $self->seq_inds('hit','gaps') |
|
|
100
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
: ($self->algorithm eq 'ERPIN') ? scalar( $hit_seq =~ tr/\-//) |
1750
|
|
|
|
|
|
|
: scalar( $hit_seq =~ tr/\-\.// ); # HMMER3 and Infernal uses '.' and '-' |
1751
|
358
|
|
100
|
|
|
934
|
my $offset = $self->{'_sbjct_offset'} || 1; |
1752
|
358
|
|
|
|
|
766
|
$self->gaps('hit', $hg/$offset); |
1753
|
|
|
|
|
|
|
} |
1754
|
378
|
100
|
|
|
|
724
|
if( ! defined $gaps ) { |
1755
|
329
|
|
|
|
|
616
|
$gaps = $self->gaps("query") + $self->gaps("hit"); |
1756
|
|
|
|
|
|
|
} |
1757
|
378
|
|
|
|
|
775
|
$self->gaps('total', $gaps); |
1758
|
|
|
|
|
|
|
} |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
# before percent_identity |
1761
|
|
|
|
|
|
|
sub _pre_pi { |
1762
|
42
|
|
|
42
|
|
73
|
my $self = shift; |
1763
|
42
|
|
|
|
|
92
|
$self->{_did_prepi} = 1; |
1764
|
42
|
50
|
33
|
|
|
324
|
$self->percent_identity($self->{PERCENT_IDENTITY} || $self->frac_identical('total')*100) if( $self->{HSP_LENGTH} > 0 ); |
1765
|
|
|
|
|
|
|
} |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
1; |