line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# Perl Module for HMMResults |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
#Copyright Genome Research Limited (1997). |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Bio::Tools::HMMER::Results - Object representing HMMER output results |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 SYNOPSIS |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
# parse a hmmsearch file (can also parse a hmmpfam file) |
17
|
|
|
|
|
|
|
$res = Bio::Tools::HMMER::Results->new( -file => 'output.hmm' , |
18
|
|
|
|
|
|
|
-type => 'hmmsearch'); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# print out the results for each sequence |
21
|
|
|
|
|
|
|
foreach $seq ( $res->each_Set ) { |
22
|
|
|
|
|
|
|
print "Sequence bit score is",$seq->bits,"\n"; |
23
|
|
|
|
|
|
|
foreach $domain ( $seq->each_Domain ) { |
24
|
|
|
|
|
|
|
print " Domain start ",$domain->start," end ",$domain->end, |
25
|
|
|
|
|
|
|
" score ",$domain->bits,"\n"; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# new result object on a sequence/domain cutoff of |
30
|
|
|
|
|
|
|
# 25 bits sequence, 15 bits domain |
31
|
|
|
|
|
|
|
$newresult = $res->filter_on_cutoff(25,15); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# alternative way of getting out all domains directly |
34
|
|
|
|
|
|
|
foreach $domain ( $res->each_Domain ) { |
35
|
|
|
|
|
|
|
print "Domain on ",$domain->seq_id," with score ", |
36
|
|
|
|
|
|
|
$domain->bits," evalue ",$domain->evalue,"\n"; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
This object represents HMMER output, either from hmmsearch or |
42
|
|
|
|
|
|
|
hmmpfam. For hmmsearch, a series of HMMER::Set objects are made, one |
43
|
|
|
|
|
|
|
for each sequence, which have the the bits score for the object. For |
44
|
|
|
|
|
|
|
hmmpfam searches, only one Set object is made. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
These objects come from the original HMMResults modules used |
48
|
|
|
|
|
|
|
internally in Pfam, written by Ewan Birney. Ewan then converted them to |
49
|
|
|
|
|
|
|
BioPerl objects in 1999. That conversion is meant to be backwardly |
50
|
|
|
|
|
|
|
compatible, but may not be (caveat emptor). |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 FEEDBACK |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head2 Mailing Lists |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
57
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
58
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
61
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head2 Support |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
I |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
70
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
71
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
72
|
|
|
|
|
|
|
with code and data examples if at all possible. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Reporting Bugs |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
77
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
78
|
|
|
|
|
|
|
web: |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Email birney@ebi.ac.uk |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Jason Stajich, jason-at-bioperl.org |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 APPENDIX |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
93
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=cut |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
package Bio::Tools::HMMER::Results; |
98
|
|
|
|
|
|
|
|
99
|
1
|
|
|
1
|
|
677
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
100
|
|
|
|
|
|
|
|
101
|
1
|
|
|
1
|
|
3
|
use Bio::Tools::HMMER::Domain; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
13
|
|
102
|
1
|
|
|
1
|
|
3
|
use Bio::Tools::HMMER::Set; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
10
|
|
103
|
1
|
|
|
1
|
|
4
|
use Symbol; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
52
|
|
104
|
|
|
|
|
|
|
|
105
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::Root::Root Bio::Root::IO Bio::SeqAnalysisParserI); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
2566
|
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub new { |
108
|
5
|
|
|
5
|
1
|
10
|
my($class,@args) = @_; |
109
|
|
|
|
|
|
|
|
110
|
5
|
|
|
|
|
18
|
my $self = $class->SUPER::new(@args); |
111
|
|
|
|
|
|
|
|
112
|
5
|
|
|
|
|
9
|
$self->{'domain'} = []; # array of HMMUnits |
113
|
5
|
|
|
|
|
9
|
$self->{'seq'} = {}; |
114
|
|
|
|
|
|
|
|
115
|
5
|
|
|
|
|
19
|
my ($parsetype) = $self->_rearrange([qw(TYPE)],@args); |
116
|
5
|
|
|
|
|
20
|
$self->_initialize_io(@args); |
117
|
5
|
50
|
|
|
|
8
|
if( !defined $parsetype ) { |
118
|
0
|
|
|
|
|
0
|
$self->throw("No parse type provided. should be hmmsearch or hmmpfam"); |
119
|
|
|
|
|
|
|
} |
120
|
5
|
|
|
|
|
12
|
$self->parsetype($parsetype); |
121
|
5
|
100
|
|
|
|
8
|
if( defined $self->_fh() ) { |
122
|
4
|
100
|
|
|
|
10
|
if( $parsetype eq 'hmmsearch' ) { |
|
|
50
|
|
|
|
|
|
123
|
2
|
|
|
|
|
4
|
$self->_parse_hmmsearch($self->_fh()); |
124
|
|
|
|
|
|
|
} elsif ( $parsetype eq 'hmmpfam' ) { |
125
|
2
|
|
|
|
|
5
|
$self->_parse_hmmpfam($self->_fh()); |
126
|
|
|
|
|
|
|
} else { |
127
|
0
|
|
|
|
|
0
|
$self->throw("Did not recoginise type $parsetype"); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
5
|
|
|
|
|
22
|
return $self; # success - we hope! |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 next_feature |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Title : next_feature |
138
|
|
|
|
|
|
|
Usage : while( my $feat = $res->next_feature ) { # do something } |
139
|
|
|
|
|
|
|
Function: SeqAnalysisParserI implementing function |
140
|
|
|
|
|
|
|
Example : |
141
|
|
|
|
|
|
|
Returns : A Bio::SeqFeatureI compliant object, in this case, |
142
|
|
|
|
|
|
|
each DomainUnit object, ie, flattening the Sequence |
143
|
|
|
|
|
|
|
aspect of this. |
144
|
|
|
|
|
|
|
Args : None |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub next_feature{ |
150
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
151
|
|
|
|
|
|
|
|
152
|
0
|
0
|
|
|
|
0
|
if( $self->{'_started_next_feature'} == 1 ) { |
153
|
0
|
|
|
|
|
0
|
return shift @{$self->{'_next_feature_array'}}; |
|
0
|
|
|
|
|
0
|
|
154
|
|
|
|
|
|
|
} else { |
155
|
0
|
|
|
|
|
0
|
$self->{'_started_next_feature'} = 1; |
156
|
0
|
|
|
|
|
0
|
my @array; |
157
|
0
|
|
|
|
|
0
|
foreach my $seq ( $self->each_Set() ) { |
158
|
0
|
|
|
|
|
0
|
foreach my $unit ( $seq->each_Domain() ) { |
159
|
0
|
|
|
|
|
0
|
push(@array,$unit); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
} |
162
|
0
|
|
|
|
|
0
|
my $res = shift @array; |
163
|
0
|
|
|
|
|
0
|
$self->{'_next_feature_array'} = \@array; |
164
|
0
|
|
|
|
|
0
|
return $res; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
0
|
$self->throw("Should not reach here! Error!"); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head2 number |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Title : number |
174
|
|
|
|
|
|
|
Usage : print "There are ",$res->number," domains hit\n"; |
175
|
|
|
|
|
|
|
Function: provides the number of domains in the HMMER report |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub number { |
180
|
4
|
|
|
4
|
1
|
1569
|
my $self = shift; |
181
|
4
|
|
|
|
|
5
|
my @val; |
182
|
|
|
|
|
|
|
my $ref; |
183
|
4
|
|
|
|
|
9
|
$ref = $self->{'domain'}; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
186
|
4
|
|
|
|
|
5
|
@val = @{$self->{'domain'}}; |
|
4
|
|
|
|
|
204
|
|
187
|
4
|
|
|
|
|
64
|
return scalar @val; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head2 seqfile |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Title : seqfile |
193
|
|
|
|
|
|
|
Usage : $obj->seqfile($newval) |
194
|
|
|
|
|
|
|
Function: |
195
|
|
|
|
|
|
|
Example : |
196
|
|
|
|
|
|
|
Returns : value of seqfile |
197
|
|
|
|
|
|
|
Args : newvalue (optional) |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=cut |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub seqfile{ |
203
|
6
|
|
|
6
|
1
|
11
|
my ($self,$value) = @_; |
204
|
6
|
100
|
|
|
|
13
|
if( defined $value) { |
205
|
4
|
|
|
|
|
7
|
$self->{'seqfile'} = $value; |
206
|
|
|
|
|
|
|
} |
207
|
6
|
|
|
|
|
12
|
return $self->{'seqfile'}; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 hmmfile |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
Title : hmmfile |
214
|
|
|
|
|
|
|
Usage : $obj->hmmfile($newval) |
215
|
|
|
|
|
|
|
Function: |
216
|
|
|
|
|
|
|
Example : |
217
|
|
|
|
|
|
|
Returns : value of hmmfile |
218
|
|
|
|
|
|
|
Args : newvalue (optional) |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=cut |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub hmmfile{ |
224
|
6
|
|
|
6
|
1
|
17
|
my ($self,$value) = @_; |
225
|
6
|
100
|
|
|
|
12
|
if( defined $value) { |
226
|
4
|
|
|
|
|
10
|
$self->{'hmmfile'} = $value; |
227
|
|
|
|
|
|
|
} |
228
|
6
|
|
|
|
|
15
|
return $self->{'hmmfile'}; |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head2 add_Domain |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Title : add_Domain |
235
|
|
|
|
|
|
|
Usage : $res->add_Domain($unit) |
236
|
|
|
|
|
|
|
Function: adds a domain to the results array. Mainly used internally. |
237
|
|
|
|
|
|
|
Args : A Bio::Tools::HMMER::Domain |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub add_Domain { |
243
|
3037
|
|
|
3037
|
1
|
2147
|
my $self = shift; |
244
|
3037
|
|
|
|
|
1870
|
my $unit = shift; |
245
|
3037
|
|
|
|
|
1796
|
my $name; |
246
|
|
|
|
|
|
|
|
247
|
3037
|
|
|
|
|
4355
|
$name = $unit->seq_id(); |
248
|
|
|
|
|
|
|
|
249
|
3037
|
50
|
|
|
|
5261
|
if( ! exists $self->{'seq'}->{$name} ) { |
250
|
0
|
|
|
|
|
0
|
$self->warn("Adding a domain of $name but with no HMMSequence. Will be kept in domain array but not added to a HMMSequence"); |
251
|
|
|
|
|
|
|
} else { |
252
|
3037
|
|
|
|
|
5714
|
$self->{'seq'}->{$name}->add_Domain($unit); |
253
|
|
|
|
|
|
|
} |
254
|
3037
|
|
|
|
|
2308
|
push(@{$self->{'domain'}},$unit); |
|
3037
|
|
|
|
|
4455
|
|
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=head2 each_Domain |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
Title : each_Domain |
261
|
|
|
|
|
|
|
Usage : foreach $domain ( $res->each_Domain() ) |
262
|
|
|
|
|
|
|
Function: array of Domain units which are held in this report |
263
|
|
|
|
|
|
|
Returns : array |
264
|
|
|
|
|
|
|
Args : none |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=cut |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
sub each_Domain { |
270
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
271
|
0
|
|
|
|
|
0
|
my (@arr,$u); |
272
|
|
|
|
|
|
|
|
273
|
0
|
|
|
|
|
0
|
foreach $u ( @{$self->{'domain'}} ) { |
|
0
|
|
|
|
|
0
|
|
274
|
0
|
|
|
|
|
0
|
push(@arr,$u); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
0
|
|
|
|
|
0
|
return @arr; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head2 domain_bits_cutoff_from_evalue |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Title : domain_bits_cutoff_from_evalue |
284
|
|
|
|
|
|
|
Usage : $cutoff = domain_bits_cutoff_from_evalue(0.01); |
285
|
|
|
|
|
|
|
Function: return a bits cutoff from an evalue using the |
286
|
|
|
|
|
|
|
scores here. Somewhat interesting logic: |
287
|
|
|
|
|
|
|
Find the two bit score which straddle the evalue |
288
|
|
|
|
|
|
|
if( 25 is between these two points) return 25 |
289
|
|
|
|
|
|
|
else return the midpoint. |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
This logic tries to ensure that with large signal to |
292
|
|
|
|
|
|
|
noise separation one still has sensible 25 bit cutoff |
293
|
|
|
|
|
|
|
Returns : |
294
|
|
|
|
|
|
|
Args : |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=cut |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub domain_bits_cutoff_from_evalue { |
299
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
300
|
0
|
|
|
|
|
0
|
my $eval = shift; |
301
|
0
|
|
|
|
|
0
|
my ($dom,$prev,@doms,$cutoff,$sep,$seen); |
302
|
|
|
|
|
|
|
|
303
|
0
|
|
|
|
|
0
|
@doms = $self->each_Domain; |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
|
306
|
0
|
|
|
|
|
0
|
@doms = map { $_->[0] } |
307
|
0
|
|
|
|
|
0
|
sort { $b->[1] <=> $a->[1] } |
308
|
0
|
|
|
|
|
0
|
map { [ $_, $_->bits] } @doms; |
|
0
|
|
|
|
|
0
|
|
309
|
0
|
|
|
|
|
0
|
$seen = 0; |
310
|
0
|
|
|
|
|
0
|
foreach $_ ( @doms ) { |
311
|
0
|
0
|
|
|
|
0
|
if( $_->evalue > $eval ) { |
312
|
0
|
|
|
|
|
0
|
$seen = 1; |
313
|
0
|
|
|
|
|
0
|
$dom = $_; |
314
|
0
|
|
|
|
|
0
|
last; |
315
|
|
|
|
|
|
|
} |
316
|
0
|
|
|
|
|
0
|
$prev = $_; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
0
|
0
|
0
|
|
|
0
|
if( ! defined $prev || $seen == 0) { |
320
|
0
|
|
|
|
|
0
|
$self->throw("Evalue is either above or below the list..."); |
321
|
0
|
|
|
|
|
0
|
return; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
0
|
|
|
|
|
0
|
$sep = $prev->bits - $dom->bits ; |
325
|
|
|
|
|
|
|
|
326
|
0
|
0
|
|
|
|
0
|
if( $sep < 1 ) { |
327
|
0
|
|
|
|
|
0
|
return $prev->bits(); |
328
|
|
|
|
|
|
|
} |
329
|
0
|
0
|
0
|
|
|
0
|
if( $dom->bits < 25 && $prev->bits > 25 ) { |
330
|
0
|
|
|
|
|
0
|
return 25; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
0
|
return int( $dom->bits + $sep/2 ) ; |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
sub dictate_hmm_acc { |
339
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
340
|
0
|
|
|
|
|
0
|
my $acc = shift; |
341
|
0
|
|
|
|
|
0
|
my ($unit); |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
|
344
|
0
|
|
|
|
|
0
|
foreach $unit ( $self->eachHMMUnit() ) { |
345
|
0
|
|
|
|
|
0
|
$unit->hmmacc($acc); |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=head2 write_FT_output |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Title : write_FT_output |
352
|
|
|
|
|
|
|
Usage : $res->write_FT_output(\*STDOUT,'DOMAIN') |
353
|
|
|
|
|
|
|
Function: writes feature table output ala swissprot |
354
|
|
|
|
|
|
|
Returns : |
355
|
|
|
|
|
|
|
Args : |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub write_FT_output { |
361
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
362
|
0
|
|
|
|
|
0
|
my $file = shift; |
363
|
0
|
|
|
|
|
0
|
my $idt = shift; |
364
|
0
|
|
|
|
|
0
|
my ($seq,$unit); |
365
|
|
|
|
|
|
|
|
366
|
0
|
0
|
|
|
|
0
|
if( !defined $idt ) { |
367
|
0
|
|
|
|
|
0
|
$idt = "DOMAIN"; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
0
|
|
|
|
|
0
|
foreach $seq ( $self->each_Set() ) { |
371
|
0
|
|
|
|
|
0
|
print $file sprintf("ID %s\n",$seq->name()); |
372
|
0
|
|
|
|
|
0
|
foreach $unit ( $seq->each_Domain() ) { |
373
|
0
|
|
|
|
|
0
|
print $file sprintf("FT %s %d %d %s\n",$idt, |
374
|
|
|
|
|
|
|
$unit->start,$unit->end,$unit->hmmname); |
375
|
|
|
|
|
|
|
} |
376
|
0
|
|
|
|
|
0
|
print $file "//\n"; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 filter_on_cutoff |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Title : filter_on_cutoff |
383
|
|
|
|
|
|
|
Usage : $newresults = $results->filter_on_cutoff(25,15); |
384
|
|
|
|
|
|
|
Function: Produces a new HMMER::Results module which has |
385
|
|
|
|
|
|
|
been trimmed at the cutoff. |
386
|
|
|
|
|
|
|
Returns : a Bio::Tools::HMMER::Results module |
387
|
|
|
|
|
|
|
Args : sequence cutoff and domain cutoff. in bits score |
388
|
|
|
|
|
|
|
if you want one cutoff, simply use same number both places |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=cut |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
sub filter_on_cutoff { |
393
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
394
|
1
|
|
|
|
|
2
|
my $seqthr = shift; |
395
|
1
|
|
|
|
|
1
|
my $domthr = shift; |
396
|
1
|
|
|
|
|
1
|
my ($new,$seq,$unit,@array,@narray); |
397
|
|
|
|
|
|
|
|
398
|
1
|
50
|
|
|
|
3
|
if( !defined $domthr ) { |
399
|
0
|
|
|
|
|
0
|
$self->throw("hmmresults filter on cutoff needs two arguments"); |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
1
|
|
|
|
|
4
|
$new = Bio::Tools::HMMER::Results->new(-type => $self->parsetype); |
403
|
|
|
|
|
|
|
|
404
|
1
|
|
|
|
|
4
|
foreach $seq ( $self->each_Set()) { |
405
|
751
|
100
|
|
|
|
1049
|
next if( $seq->bits() < $seqthr ); |
406
|
303
|
|
|
|
|
455
|
$new->add_Set($seq); |
407
|
303
|
|
|
|
|
414
|
foreach $unit ( $seq->each_Domain() ) { |
408
|
710
|
100
|
|
|
|
1213
|
next if( $unit->bits() < $domthr ); |
409
|
604
|
|
|
|
|
810
|
$new->add_Domain($unit); |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
} |
412
|
1
|
|
|
|
|
78
|
$new; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=head2 write_ascii_out |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Title : write_ascii_out |
418
|
|
|
|
|
|
|
Usage : $res->write_ascii_out(\*STDOUT) |
419
|
|
|
|
|
|
|
Function: writes as |
420
|
|
|
|
|
|
|
seq seq_start seq_end model-acc model_start model_end model_name |
421
|
|
|
|
|
|
|
Returns : |
422
|
|
|
|
|
|
|
Args : |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
FIXME: Now that we have no modelacc, this is probably a bad thing. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=cut |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# writes as seq sstart send modelacc hstart hend modelname |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub write_ascii_out { |
431
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
432
|
0
|
|
|
|
|
0
|
my $fh = shift; |
433
|
0
|
|
|
|
|
0
|
my ($unit,$seq); |
434
|
|
|
|
|
|
|
|
435
|
0
|
0
|
|
|
|
0
|
if( !defined $fh) { |
436
|
0
|
|
|
|
|
0
|
$fh = \*STDOUT; |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
|
440
|
0
|
|
|
|
|
0
|
foreach $seq ( $self->each_Set()) { |
441
|
0
|
|
|
|
|
0
|
foreach $unit ( $seq->each_Domain()) { |
442
|
0
|
|
|
|
|
0
|
print $fh sprintf("%s %4d %4d %s %4d %4d %4.2f %4.2g %s\n", |
443
|
|
|
|
|
|
|
$unit->seq_id(),$unit->start(),$unit->end(), |
444
|
|
|
|
|
|
|
$unit->hmmacc,$unit->hstart,$unit->hend, |
445
|
|
|
|
|
|
|
$unit->bits,$unit->evalue,$unit->hmmname); |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=head2 write_GDF_bits |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Title : write_GDF_bits |
454
|
|
|
|
|
|
|
Usage : $res->write_GDF_bits(25,15,\*STDOUT) |
455
|
|
|
|
|
|
|
Function: writes GDF format with a sequence,domain threshold |
456
|
|
|
|
|
|
|
Returns : |
457
|
|
|
|
|
|
|
Args : |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=cut |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub write_GDF_bits { |
462
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
463
|
0
|
|
|
|
|
0
|
my $seqt = shift; |
464
|
0
|
|
|
|
|
0
|
my $domt = shift; |
465
|
0
|
|
|
|
|
0
|
my $file = shift; |
466
|
0
|
|
|
|
|
0
|
my $seq; |
467
|
|
|
|
|
|
|
my $unit; |
468
|
0
|
|
|
|
|
0
|
my (@array,@narray); |
469
|
|
|
|
|
|
|
|
470
|
0
|
0
|
|
|
|
0
|
if( !defined $file ) { |
471
|
0
|
|
|
|
|
0
|
$self->throw("Attempting to use write_GDF_bits without passing in correct arguments!"); |
472
|
0
|
|
|
|
|
0
|
return; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
0
|
foreach $seq ( $self->each_Set()) { |
476
|
|
|
|
|
|
|
|
477
|
0
|
0
|
|
|
|
0
|
if( $seq->bits() < $seqt ) { |
478
|
0
|
|
|
|
|
0
|
next; |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
0
|
|
|
|
|
0
|
foreach $unit ( $seq->each_Domain() ) { |
482
|
0
|
0
|
|
|
|
0
|
if( $unit->bits() < $domt ) { |
483
|
0
|
|
|
|
|
0
|
next; |
484
|
|
|
|
|
|
|
} |
485
|
0
|
|
|
|
|
0
|
push(@array,$unit); |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
0
|
@narray = sort { my ($aa,$bb,$st_a,$st_b); |
|
0
|
|
|
|
|
0
|
|
491
|
0
|
|
|
|
|
0
|
$aa = $a->seq_id(); |
492
|
0
|
|
|
|
|
0
|
$bb = $b->seq_id(); |
493
|
0
|
0
|
|
|
|
0
|
if ( $aa eq $bb) { |
494
|
0
|
|
|
|
|
0
|
$st_a = $a->start(); |
495
|
0
|
|
|
|
|
0
|
$st_b = $b->start(); |
496
|
0
|
|
|
|
|
0
|
return $st_a <=> $st_b; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
else { |
499
|
0
|
|
|
|
|
0
|
return $aa cmp $bb; |
500
|
|
|
|
|
|
|
} } @array; |
501
|
|
|
|
|
|
|
|
502
|
0
|
|
|
|
|
0
|
foreach $unit ( @narray ) { |
503
|
0
|
|
|
|
|
0
|
print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue); |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
sub write_scores_bits { |
509
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
510
|
0
|
|
|
|
|
0
|
my $seqt = shift; |
511
|
0
|
|
|
|
|
0
|
my $domt = shift; |
512
|
0
|
|
|
|
|
0
|
my $file = shift; |
513
|
0
|
|
|
|
|
0
|
my $seq; |
514
|
|
|
|
|
|
|
my $unit; |
515
|
0
|
|
|
|
|
0
|
my (@array,@narray); |
516
|
|
|
|
|
|
|
|
517
|
0
|
0
|
|
|
|
0
|
if( !defined $file ) { |
518
|
0
|
|
|
|
|
0
|
$self->warn("Attempting to use write_scores_bits without passing in correct arguments!"); |
519
|
0
|
|
|
|
|
0
|
return; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
|
522
|
0
|
|
|
|
|
0
|
foreach $seq ( $self->eachHMMSequence()) { |
523
|
|
|
|
|
|
|
|
524
|
0
|
0
|
|
|
|
0
|
if( $seq->bits() < $seqt ) { |
525
|
0
|
|
|
|
|
0
|
next; |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
0
|
|
|
|
|
0
|
foreach $unit ( $seq->eachHMMUnit() ) { |
529
|
0
|
0
|
|
|
|
0
|
if( $unit->bits() < $domt ) { |
530
|
0
|
|
|
|
|
0
|
next; |
531
|
|
|
|
|
|
|
} |
532
|
0
|
|
|
|
|
0
|
push(@array,$unit); |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
|
537
|
0
|
|
|
|
|
0
|
@narray = sort { my ($aa,$bb,$st_a,$st_b); |
|
0
|
|
|
|
|
0
|
|
538
|
0
|
|
|
|
|
0
|
$aa = $a->bits(); |
539
|
0
|
|
|
|
|
0
|
$bb = $b->bits(); |
540
|
0
|
|
|
|
|
0
|
return $aa <=> $bb; |
541
|
|
|
|
|
|
|
} @array; |
542
|
|
|
|
|
|
|
|
543
|
0
|
|
|
|
|
0
|
foreach $unit ( @narray ) { |
544
|
0
|
|
|
|
|
0
|
print $file sprintf("%4.2f %s\n",$unit->bits(),$unit->get_nse()); |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
sub write_GDF { |
550
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
551
|
0
|
|
|
|
|
0
|
my $file = shift; |
552
|
0
|
|
|
|
|
0
|
my $unit; |
553
|
|
|
|
|
|
|
|
554
|
0
|
0
|
|
|
|
0
|
if( !defined $file ) { |
555
|
0
|
|
|
|
|
0
|
$file = \*STDOUT; |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
|
559
|
0
|
|
|
|
|
0
|
foreach $unit ( $self->eachHMMUnit() ) { |
560
|
0
|
|
|
|
|
0
|
print $file sprintf("%-24s\t%6d\t%6d\t%15s\t%.1f\t%g\n",$unit->get_nse(),$unit->start(),$unit->end(),$unit->seq_id(),$unit->bits(),$unit->evalue); |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
sub highest_noise { |
566
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
567
|
0
|
|
|
|
|
0
|
my $seqt = shift; |
568
|
0
|
|
|
|
|
0
|
my $domt = shift; |
569
|
0
|
|
|
|
|
0
|
my ($seq,$unit,$hseq,$hdom,$noiseseq,$noisedom); |
570
|
|
|
|
|
|
|
|
571
|
0
|
|
|
|
|
0
|
$hseq = $hdom = -100000; |
572
|
|
|
|
|
|
|
|
573
|
0
|
|
|
|
|
0
|
foreach $seq ( $self->eachHMMSequence()) { |
574
|
0
|
0
|
0
|
|
|
0
|
if( $seq->bits() < $seqt && $seq->bits() > $hseq ) { |
575
|
0
|
|
|
|
|
0
|
$hseq = $seq->bits(); |
576
|
0
|
|
|
|
|
0
|
$noiseseq = $seq; |
577
|
|
|
|
|
|
|
} |
578
|
0
|
|
|
|
|
0
|
foreach $unit ( $seq->eachHMMUnit() ) { |
579
|
0
|
0
|
0
|
|
|
0
|
if( (($seq->bits() < $seqt) || ($seq->bits() > $seqt && $unit->bits < $domt)) && $unit->bits() > $hdom ) { |
|
|
|
0
|
|
|
|
|
580
|
0
|
|
|
|
|
0
|
$hdom = $unit->bits(); |
581
|
0
|
|
|
|
|
0
|
$noisedom = $unit; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
|
587
|
0
|
|
|
|
|
0
|
return ($noiseseq,$noisedom); |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
sub lowest_true { |
593
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
594
|
0
|
|
|
|
|
0
|
my $seqt = shift; |
595
|
0
|
|
|
|
|
0
|
my $domt = shift; |
596
|
0
|
|
|
|
|
0
|
my ($seq,$unit,$lowseq,$lowdom,$trueseq,$truedom); |
597
|
|
|
|
|
|
|
|
598
|
0
|
0
|
|
|
|
0
|
if( ! defined $domt ) { |
599
|
0
|
|
|
|
|
0
|
$self->warn("lowest true needs at least a domain threshold cut-off"); |
600
|
0
|
|
|
|
|
0
|
return (0,0); |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
0
|
|
|
|
|
0
|
$lowseq = $lowdom = 100000; |
604
|
|
|
|
|
|
|
|
605
|
0
|
|
|
|
|
0
|
foreach $seq ( $self->eachHMMSequence()) { |
606
|
|
|
|
|
|
|
|
607
|
0
|
0
|
0
|
|
|
0
|
if( $seq->bits() >= $seqt && $seq->bits() < $lowseq ) { |
608
|
0
|
|
|
|
|
0
|
$lowseq = $seq->bits(); |
609
|
0
|
|
|
|
|
0
|
$trueseq = $seq; |
610
|
|
|
|
|
|
|
} |
611
|
0
|
0
|
|
|
|
0
|
if( $seq->bits() < $seqt ) { |
612
|
0
|
|
|
|
|
0
|
next; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
0
|
|
|
|
|
0
|
foreach $unit ( $seq->eachHMMUnit() ) { |
616
|
0
|
0
|
0
|
|
|
0
|
if( $unit->bits() >= $domt && $unit->bits() < $lowdom ) { |
617
|
0
|
|
|
|
|
0
|
$lowdom = $unit->bits(); |
618
|
0
|
|
|
|
|
0
|
$truedom = $unit; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
|
624
|
0
|
|
|
|
|
0
|
return ($trueseq,$truedom); |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=head2 add_Set |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
Title : add_Set |
633
|
|
|
|
|
|
|
Usage : Mainly internal function |
634
|
|
|
|
|
|
|
Function: |
635
|
|
|
|
|
|
|
Returns : |
636
|
|
|
|
|
|
|
Args : |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=cut |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
sub add_Set { |
642
|
1807
|
|
|
1807
|
1
|
1296
|
my $self = shift; |
643
|
1807
|
|
|
|
|
1021
|
my $seq = shift; |
644
|
1807
|
|
|
|
|
989
|
my $name; |
645
|
|
|
|
|
|
|
|
646
|
1807
|
|
|
|
|
1936
|
$name = $seq->name(); |
647
|
|
|
|
|
|
|
|
648
|
1807
|
50
|
|
|
|
2482
|
if( exists $self->{'seq'}->{$name} ) { |
649
|
0
|
|
|
|
|
0
|
$self->throw("You alredy have $name in HMMResults!"); |
650
|
|
|
|
|
|
|
} |
651
|
1807
|
|
|
|
|
2152
|
$self->{'seq'}->{$name} = $seq; |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
=head2 each_Set |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
Title : each_Set |
658
|
|
|
|
|
|
|
Usage : |
659
|
|
|
|
|
|
|
Function: |
660
|
|
|
|
|
|
|
Returns : |
661
|
|
|
|
|
|
|
Args : |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=cut |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
sub each_Set { |
667
|
3
|
|
|
3
|
1
|
6
|
my $self = shift; |
668
|
3
|
|
|
|
|
4
|
my (@array,$name); |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
|
671
|
3
|
|
|
|
|
4
|
foreach $name ( keys %{$self->{'seq'}} ) { |
|
3
|
|
|
|
|
220
|
|
672
|
1503
|
|
|
|
|
1348
|
push(@array,$self->{'seq'}->{$name}); |
673
|
|
|
|
|
|
|
} |
674
|
3
|
|
|
|
|
126
|
return @array; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
=head2 get_Set |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
Title : get_Set |
681
|
|
|
|
|
|
|
Usage : $set = $res->get_Set('sequence-name'); |
682
|
|
|
|
|
|
|
Function: returns the Set for a particular sequence |
683
|
|
|
|
|
|
|
Returns : a HMMER::Set object |
684
|
|
|
|
|
|
|
Args : name of the sequence |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
=cut |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
sub get_Set { |
690
|
3
|
|
|
3
|
1
|
2
|
my $self = shift; |
691
|
3
|
|
|
|
|
5
|
my $name = shift; |
692
|
|
|
|
|
|
|
|
693
|
3
|
|
|
|
|
6
|
return $self->{'seq'}->{$name}; |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=head2 _parse_hmmpfam |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
Title : _parse_hmmpfam |
700
|
|
|
|
|
|
|
Usage : $res->_parse_hmmpfam($filehandle) |
701
|
|
|
|
|
|
|
Function: |
702
|
|
|
|
|
|
|
Returns : |
703
|
|
|
|
|
|
|
Args : |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=cut |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
sub _parse_hmmpfam { |
709
|
2
|
|
|
2
|
|
4
|
my $self = shift; |
710
|
2
|
|
|
|
|
3
|
my $file = shift; |
711
|
|
|
|
|
|
|
|
712
|
2
|
|
|
|
|
3
|
my ($id,$sqfrom,$sqto,$hmmf,$hmmt,$sc,$ev, |
713
|
|
|
|
|
|
|
$unit,$nd,$seq,$name,$seqname,$from, |
714
|
|
|
|
|
|
|
$to,%hash,%acc,$acc); |
715
|
2
|
|
|
|
|
4
|
my $count = 0; |
716
|
|
|
|
|
|
|
|
717
|
2
|
|
|
|
|
32
|
while(<$file>) { |
718
|
19
|
100
|
|
|
|
70
|
if( /^HMM file:\s+(\S+)/ ) { $self->hmmfile($1); next; } |
|
2
|
100
|
|
|
|
5
|
|
|
2
|
100
|
|
|
|
7
|
|
719
|
2
|
|
|
|
|
7
|
elsif( /^Sequence file:\s+(\S+)/ ) { $self->seqfile($1); next } |
|
2
|
|
|
|
|
11
|
|
720
|
|
|
|
|
|
|
elsif( /^Query(\s+sequence)?:\s+(\S+)/ ) { |
721
|
|
|
|
|
|
|
|
722
|
2
|
|
|
|
|
7
|
$seqname = $2; |
723
|
|
|
|
|
|
|
|
724
|
2
|
|
|
|
|
18
|
$seq = Bio::Tools::HMMER::Set->new(); |
725
|
|
|
|
|
|
|
|
726
|
2
|
|
|
|
|
6
|
$seq ->name($seqname); |
727
|
2
|
|
|
|
|
6
|
$self->add_Set($seq); |
728
|
2
|
|
|
|
|
4
|
%hash = (); |
729
|
|
|
|
|
|
|
|
730
|
2
|
|
|
|
|
9
|
while(<$file>){ |
731
|
|
|
|
|
|
|
|
732
|
16
|
100
|
|
|
|
39
|
if( /Accession:\s+(\S+)/ ) { $seq->accession($1); next } |
|
1
|
100
|
|
|
|
4
|
|
|
1
|
|
|
|
|
2
|
|
733
|
1
|
|
|
|
|
2
|
elsif( s/^Description:\s+// ) { chomp; $seq->desc($_); next } |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
3
|
|
734
|
14
|
100
|
|
|
|
22
|
/^Parsed for domains/ && last; |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
# This is to parse out the accession numbers in old Pfam format. |
737
|
|
|
|
|
|
|
# now not support due to changes in HMMER. |
738
|
|
|
|
|
|
|
|
739
|
12
|
100
|
|
|
|
632
|
if( (($id,$acc, $sc, $ev, $nd) = /^\s*(\S+)\s+(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) { |
|
|
100
|
|
|
|
|
|
740
|
1
|
|
|
|
|
2
|
$hash{$id} = $sc; # we need this for the sequence |
741
|
|
|
|
|
|
|
# core of the domains below! |
742
|
1
|
|
|
|
|
4
|
$acc {$id} = $acc; |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
# this is the more common parsing routine |
745
|
|
|
|
|
|
|
} elsif ( (($id,$sc, $ev, $nd) = |
746
|
|
|
|
|
|
|
/^\s*(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/) ) { |
747
|
|
|
|
|
|
|
|
748
|
1
|
|
|
|
|
4
|
$hash{$id} = $sc; # we need this for the |
749
|
|
|
|
|
|
|
# sequence score of hte domains below! |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
} |
753
|
|
|
|
|
|
|
|
754
|
2
|
|
|
|
|
11
|
while(<$file>) { |
755
|
11
|
100
|
|
|
|
23
|
/^Align/ && last; |
756
|
9
|
50
|
|
|
|
14
|
m{^//} && last; |
757
|
|
|
|
|
|
|
# this is meant to match |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
#Sequence Domain seq-f seq-t hmm-f hmm-t score E-value |
760
|
|
|
|
|
|
|
#-------- ------- ----- ----- ----- ----- ----- ------- |
761
|
|
|
|
|
|
|
#PF00621 1/1 198 372 .. 1 207 [] 281.6 1e-80 |
762
|
|
|
|
|
|
|
|
763
|
9
|
100
|
|
|
|
67
|
if( (($id, $sqfrom, $sqto, $hmmf,$hmmt,$sc, $ev) = |
764
|
|
|
|
|
|
|
/(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) { |
765
|
3
|
|
|
|
|
16
|
$unit = Bio::Tools::HMMER::Domain->new(); |
766
|
3
|
|
|
|
|
9
|
$unit->seq_id ($seqname); |
767
|
3
|
|
|
|
|
8
|
$unit->hmmname ($id); |
768
|
3
|
|
|
|
|
11
|
$unit->start ($sqfrom); |
769
|
3
|
|
|
|
|
7
|
$unit->end ($sqto); |
770
|
3
|
|
|
|
|
6
|
$unit->hstart($hmmf); |
771
|
3
|
|
|
|
|
7
|
$unit->hend ($hmmt); |
772
|
3
|
|
|
|
|
6
|
$unit->bits ($sc); |
773
|
3
|
|
|
|
|
7
|
$unit->evalue ($ev); |
774
|
|
|
|
|
|
|
|
775
|
3
|
50
|
|
|
|
7
|
if( !exists($hash{$id}) ) { |
776
|
0
|
|
|
|
|
0
|
$self->throw("HMMResults parsing error in hmmpfam for $id - can't find sequecne score"); |
777
|
|
|
|
|
|
|
} |
778
|
|
|
|
|
|
|
|
779
|
3
|
|
|
|
|
7
|
$unit->seqbits($hash{$id}); |
780
|
|
|
|
|
|
|
|
781
|
3
|
100
|
|
|
|
7
|
if( defined $acc{$id} ) { |
782
|
1
|
|
|
|
|
3
|
$unit->hmmacc($acc{$id}); |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
# this should find it's own sequence! |
786
|
3
|
|
|
|
|
8
|
$self->add_Domain($unit); |
787
|
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
} |
789
|
2
|
50
|
|
|
|
6
|
if( m{^//} ) { next; } |
|
0
|
|
|
|
|
0
|
|
790
|
|
|
|
|
|
|
|
791
|
2
|
|
|
|
|
4
|
$_ = <$file>; |
792
|
|
|
|
|
|
|
# parses alignment lines. Icky as we have to break on the same line |
793
|
|
|
|
|
|
|
# that we need to read to place the alignment lines with the unit. |
794
|
|
|
|
|
|
|
|
795
|
2
|
|
|
|
|
2
|
while(1) { |
796
|
5
|
100
|
66
|
|
|
38
|
(!defined $_ || m{^//}) && last; |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
# matches: |
799
|
|
|
|
|
|
|
# PF00621: domain 1 of 1, from 198 to 372 |
800
|
3
|
50
|
|
|
|
21
|
if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) { |
801
|
|
|
|
|
|
|
|
802
|
3
|
|
|
|
|
6
|
$name = $1; |
803
|
3
|
|
|
|
|
3
|
$from = $2; |
804
|
3
|
|
|
|
|
5
|
$to = $3; |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
# find the HMMUnit which this alignment is from |
807
|
|
|
|
|
|
|
|
808
|
3
|
|
|
|
|
7
|
$unit = $self->get_unit_nse($seqname,$name,$from,$to); |
809
|
3
|
50
|
|
|
|
9
|
if( !defined $unit ) { |
810
|
0
|
|
|
|
|
0
|
$self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!"); |
811
|
0
|
|
|
|
|
0
|
$_ = <$file>; |
812
|
0
|
|
|
|
|
0
|
next; |
813
|
|
|
|
|
|
|
} |
814
|
3
|
|
|
|
|
15
|
while(<$file>) { |
815
|
43
|
100
|
|
|
|
53
|
m{^//} && last; |
816
|
41
|
100
|
|
|
|
53
|
/^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last; |
817
|
40
|
|
|
|
|
49
|
$unit->add_alignment_line($_); |
818
|
|
|
|
|
|
|
} |
819
|
|
|
|
|
|
|
} else { |
820
|
0
|
|
|
|
|
0
|
$_ = <$file>; |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
# back to main 'Query:' loop |
825
|
|
|
|
|
|
|
} |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
# mainly internal function |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
sub get_unit_nse { |
832
|
3
|
|
|
3
|
0
|
5
|
my $self = shift; |
833
|
3
|
|
|
|
|
3
|
my $seqname = shift; |
834
|
3
|
|
|
|
|
4
|
my $domname = shift; |
835
|
3
|
|
|
|
|
3
|
my $start = shift; |
836
|
3
|
|
|
|
|
2
|
my $end = shift; |
837
|
|
|
|
|
|
|
|
838
|
3
|
|
|
|
|
21
|
my($seq,$unit); |
839
|
|
|
|
|
|
|
|
840
|
3
|
|
|
|
|
7
|
$seq = $self->get_Set($seqname); |
841
|
|
|
|
|
|
|
|
842
|
3
|
50
|
|
|
|
8
|
if( !defined $seq ) { |
843
|
0
|
|
|
|
|
0
|
$self->throw("Could not get sequence name $seqname - so can't get its unit"); |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
|
846
|
3
|
|
|
|
|
8
|
foreach $unit ( $seq->each_Domain() ) { |
847
|
4
|
100
|
66
|
|
|
8
|
if( $unit->hmmname() eq $domname && $unit->start() == $start && $unit->end() == $end ) { |
|
|
|
66
|
|
|
|
|
848
|
3
|
|
|
|
|
5
|
return $unit; |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
} |
851
|
|
|
|
|
|
|
|
852
|
0
|
|
|
|
|
0
|
return; |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=head2 _parse_hmmsearch |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
Title : _parse_hmmsearch |
859
|
|
|
|
|
|
|
Usage : $res->_parse_hmmsearch($filehandle) |
860
|
|
|
|
|
|
|
Function: |
861
|
|
|
|
|
|
|
Returns : |
862
|
|
|
|
|
|
|
Args : |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=cut |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
sub _parse_hmmsearch { |
868
|
2
|
|
|
2
|
|
2
|
my $self = shift; |
869
|
2
|
|
|
|
|
4
|
my $file = shift; |
870
|
2
|
|
|
|
|
3
|
my ($id,$sqfrom,$sqto,$sc,$ev,$unit,$nd,$seq,$hmmf,$hmmt, |
871
|
|
|
|
|
|
|
$hmmfname,$hmmacc, $hmmid, %seqh); |
872
|
2
|
|
|
|
|
2
|
my $count = 0; |
873
|
|
|
|
|
|
|
|
874
|
2
|
|
|
|
|
33
|
while(<$file>) { |
875
|
26
|
100
|
|
|
|
37
|
/^HMM file:\s+(\S+)/ and do { $self->hmmfile($1); $hmmfname = $1 }; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
2
|
|
876
|
26
|
50
|
|
|
|
33
|
/^Accession:\s+(\S+)/ and do { $hmmacc = $1 }; |
|
0
|
|
|
|
|
0
|
|
877
|
26
|
100
|
|
|
|
31
|
/^Query HMM:\s+(\S+)/ and do { $hmmid = $1 }; |
|
2
|
|
|
|
|
4
|
|
878
|
26
|
100
|
|
|
|
33
|
/^Sequence database:\s+(\S+)/ and do { $self->seqfile($1) }; |
|
2
|
|
|
|
|
5
|
|
879
|
26
|
100
|
|
|
|
53
|
/^Scores for complete sequences/ && last; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
|
882
|
2
|
50
|
|
|
|
4
|
$hmmfname = "given" if not $hmmfname; |
883
|
|
|
|
|
|
|
|
884
|
2
|
|
|
|
|
6
|
while(<$file>) { |
885
|
1510
|
100
|
|
|
|
2037
|
/^Parsed for domains/ && last; |
886
|
1508
|
100
|
|
|
|
17223
|
if( (($id, $sc, $ev, $nd) = /(\S+).+?\s(\S+)\s+(\S+)\s+(\d+)\s*$/)) { |
887
|
1502
|
|
|
|
|
1912
|
$seq = Bio::Tools::HMMER::Set->new(); |
888
|
1502
|
|
|
|
|
1719
|
$seq->name($id); |
889
|
1502
|
|
|
|
|
1621
|
$seq->bits($sc); |
890
|
1502
|
|
|
|
|
1844
|
$seqh{$id} = $sc; |
891
|
1502
|
|
|
|
|
1600
|
$seq->evalue($ev); |
892
|
1502
|
|
|
|
|
1413
|
$self->add_Set($seq); |
893
|
1502
|
|
|
|
|
1589
|
$seq->accession($hmmacc); |
894
|
|
|
|
|
|
|
} |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
|
897
|
2
|
|
|
|
|
14
|
while(<$file>) { |
898
|
2804
|
50
|
|
|
|
4641
|
/^Alignments of top-scoring domains/ && last; |
899
|
2804
|
100
|
|
|
|
23555
|
if( (($id, $sqfrom, $sqto, $hmmf, $hmmt, $sc, $ev) = /(\S+)\s+\S+\s+(\d+)\s+(\d+).+?(\d+)\s+(\d+)\s+\S+\s+(\S+)\s+(\S+)\s*$/)) { |
900
|
2430
|
|
|
|
|
5091
|
$unit = Bio::Tools::HMMER::Domain->new(); |
901
|
|
|
|
|
|
|
|
902
|
2430
|
|
|
|
|
4072
|
$unit->seq_id($id); |
903
|
2430
|
|
|
|
|
3542
|
$unit->hmmname($hmmfname); |
904
|
2430
|
|
|
|
|
3514
|
$unit->start($sqfrom); |
905
|
2430
|
|
|
|
|
3423
|
$unit->end($sqto); |
906
|
2430
|
|
|
|
|
3758
|
$unit->bits($sc); |
907
|
2430
|
|
|
|
|
3872
|
$unit->hstart($hmmf); |
908
|
2430
|
|
|
|
|
3553
|
$unit->hend($hmmt); |
909
|
2430
|
|
|
|
|
3860
|
$unit->evalue($ev); |
910
|
2430
|
|
|
|
|
5028
|
$unit->seqbits($seqh{$id}); |
911
|
2430
|
|
|
|
|
4122
|
$self->add_Domain($unit); |
912
|
2430
|
|
|
|
|
12927
|
$count++; |
913
|
|
|
|
|
|
|
} |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
|
916
|
2
|
|
|
|
|
5
|
$_ = <$file>; |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
## Recognize and store domain alignments |
919
|
|
|
|
|
|
|
|
920
|
2
|
|
|
|
|
3
|
while(1) { |
921
|
2
|
50
|
|
|
|
7
|
if( !defined $_ ) { |
922
|
2
|
|
|
|
|
5
|
last; |
923
|
|
|
|
|
|
|
} |
924
|
0
|
0
|
|
|
|
0
|
/^Histogram of all scores/ && last; |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
# matches: |
927
|
|
|
|
|
|
|
# PF00621: domain 1 of 1, from 198 to 372 |
928
|
0
|
0
|
|
|
|
0
|
if( /^\s*(\S+):.*from\s+(\d+)\s+to\s+(\d+)/ ) { |
929
|
0
|
|
|
|
|
0
|
my $name = $1; |
930
|
0
|
|
|
|
|
0
|
my $from = $2; |
931
|
0
|
|
|
|
|
0
|
my $to = $3; |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
# find the HMMUnit which this alignment is from |
934
|
0
|
|
|
|
|
0
|
$unit = $self->get_unit_nse($name,$hmmfname,$from,$to); |
935
|
|
|
|
|
|
|
|
936
|
0
|
0
|
|
|
|
0
|
if( !defined $unit ) { |
937
|
0
|
|
|
|
|
0
|
$self->warn("Could not find $name $from $to unit even though I am reading it in. ugh!"); |
938
|
0
|
|
|
|
|
0
|
next; |
939
|
|
|
|
|
|
|
} |
940
|
0
|
|
|
|
|
0
|
while(<$file>) { |
941
|
0
|
0
|
|
|
|
0
|
/^Histogram of all scores/ && last; |
942
|
0
|
0
|
|
|
|
0
|
/^\s*\S+:.*from\s+\d+\s+to\s+\d+/ && last; |
943
|
0
|
|
|
|
|
0
|
$unit->add_alignment_line($_); |
944
|
|
|
|
|
|
|
} |
945
|
|
|
|
|
|
|
} |
946
|
|
|
|
|
|
|
else { |
947
|
0
|
|
|
|
|
0
|
$_ = <$file>; |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
|
951
|
2
|
|
|
|
|
560
|
return $count; |
952
|
|
|
|
|
|
|
} |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
=head2 parsetype |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
Title : parsetype |
957
|
|
|
|
|
|
|
Usage : $obj->parsetype($newval) |
958
|
|
|
|
|
|
|
Function: |
959
|
|
|
|
|
|
|
Returns : value of parsetype |
960
|
|
|
|
|
|
|
Args : newvalue (optional) |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
=cut |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
sub parsetype{ |
966
|
6
|
|
|
6
|
1
|
9
|
my ($self,$value) = @_; |
967
|
6
|
100
|
|
|
|
11
|
if( defined $value) { |
968
|
5
|
|
|
|
|
13
|
$self->{'_parsetype'} = $value; |
969
|
|
|
|
|
|
|
} |
970
|
6
|
|
|
|
|
13
|
return $self->{'_parsetype'}; |
971
|
|
|
|
|
|
|
} |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
1; # says use was ok |
974
|
|
|
|
|
|
|
__END__ |