File Coverage

Bio/Search/HSP/HmmpfamHSP.pm
Criterion Covered Total %
statement 109 109 100.0
branch 22 26 84.6
condition 7 15 46.6
subroutine 9 9 100.0
pod 5 5 100.0
total 152 164 92.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::HSP::HmmpfamHSP
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::HSP::HmmpfamHSP - A parser and HSP object for hmmpfam hsps
17              
18             =head1 SYNOPSIS
19              
20             # generally we use Bio::SearchIO to build these objects
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23             -file => 'result.hmmer');
24              
25             while (my $result = $in->next_result) {
26             while (my $hit = $result->next_hit) {
27             print $hit->name, "\n";
28             print $hit->score, "\n";
29             print $hit->significance, "\n";
30              
31             while (my $hsp = $hit->next_hsp) {
32             # process HSPI objects
33             }
34             }
35             }
36              
37             =head1 DESCRIPTION
38              
39             This object implements a parser for hmmpfam hsp output, a program in the HMMER
40             package.
41              
42             =head1 FEEDBACK
43              
44             =head2 Mailing Lists
45              
46             User feedback is an integral part of the evolution of this and other
47             Bioperl modules. Send your comments and suggestions preferably to
48             the Bioperl mailing list. Your participation is much appreciated.
49              
50             bioperl-l@bioperl.org - General discussion
51             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52              
53             =head2 Support
54              
55             Please direct usage questions or support issues to the mailing list:
56              
57             I
58              
59             rather than to the module maintainer directly. Many experienced and
60             reponsive experts will be able look at the problem and quickly
61             address it. Please include a thorough description of the problem
62             with code and data examples if at all possible.
63              
64             =head2 Reporting Bugs
65              
66             Report bugs to the Bioperl bug tracking system to help us keep track
67             of the bugs and their resolution. Bug reports can be submitted via the
68             web:
69              
70             https://github.com/bioperl/bioperl-live/issues
71              
72             =head1 AUTHOR - Sendu Bala
73              
74             Email bix@sendu.me.uk
75              
76             =head1 APPENDIX
77              
78             The rest of the documentation details each of the object methods.
79             Internal methods are usually preceded with a _
80              
81             =cut
82              
83             # Let the code begin...
84              
85             package Bio::Search::HSP::HmmpfamHSP;
86              
87 1     1   5 use strict;
  1         3  
  1         29  
88 1     1   5 use base qw(Bio::Search::HSP::PullHSPI);
  1         2  
  1         481  
89              
90             =head2 new
91              
92             Title : new
93             Usage : my $obj = Bio::Search::HSP::HmmpfamHSP->new();
94             Function: Builds a new Bio::Search::HSP::HmmpfamHSP object.
95             Returns : Bio::Search::HSP::HmmpfamHSP
96             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
97             -parent => Bio::PullParserI object (required if no -chunk)
98             -hsp_data => array ref with [rank query_start query_end hit_start
99             hit_end score evalue]
100              
101             where the array ref provided to -chunk contains an IO object
102             for a filehandle to something representing the raw data of the
103             hsp, and $start and $end define the tell() position within the
104             filehandle that the hsp data starts and ends (optional; defaults
105             to start and end of the entire thing described by the filehandle)
106              
107             =cut
108              
109             sub new {
110 34     34 1 111 my ($class, @args) = @_;
111 34         183 my $self = $class->SUPER::new(@args);
112            
113 34         157 $self->_setup(@args);
114            
115 34         69 my $fields = $self->_fields;
116 34         99 foreach my $field (qw( alignment )) {
117 34         97 $fields->{$field} = undef;
118             }
119            
120 34         89 my $hsp_data = $self->_raw_hsp_data;
121 34 50 33     150 if ($hsp_data && ref($hsp_data) eq 'ARRAY') {
122 34         51 my @hsp_data = @{$hsp_data}; # don't alter the reference
  34         130  
123 34         71 foreach my $field (qw(rank query_start query_end hit_start hit_end score evalue)) {
124 238         395 $fields->{$field} = shift(@hsp_data);
125             }
126             }
127            
128 34         359 $self->_dependencies( { ( query_string => 'alignment',
129             hit_string => 'alignment',
130             homology_string => 'alignment',
131             hit_identical_inds => 'seq_inds',
132             hit_conserved_inds => 'seq_inds',
133             hit_nomatch_inds => 'seq_inds',
134             hit_gap_inds => 'seq_inds',
135             query_identical_inds => 'seq_inds',
136             query_conserved_inds => 'seq_inds',
137             query_nomatch_inds => 'seq_inds',
138             query_gap_inds => 'seq_inds' ) } );
139            
140 34         148 return $self;
141             }
142              
143             #
144             # PullParserI discovery methods so we can answer all HitI questions
145             #
146              
147             sub _discover_alignment {
148 25     25   50 my $self = shift;
149 25         59 my $alignments_hash = $self->get_field('alignments');
150            
151 25         62 my $identifier = $self->get_field('name').'~~~~'.$self->get_field('rank');
152            
153 25         99 while (! defined $alignments_hash->{$identifier}) {
154 24 100       92 last unless $self->parent->parent->_next_alignment;
155             }
156 25         60 my $alignment = $alignments_hash->{$identifier};
157            
158 25 100       51 if ($alignment) {
159             # work out query, hit and homology strings, and some stats
160             # (quicker to do this all at once instead of each method working on
161             # $alignment string itself)
162            
163 23         37 my ($query_string, $hit_string, $homology_string);
164 23         371 while ($alignment =~ /\s+(\S+)\n\s+(\S.+)\n\s+\S+\s+\d+\s+(\S+)\s+\d/gm) {
165 45         134 my $hi = $1;
166 45         74 my $ho = $2;
167 45         100 $query_string .= $3;
168            
169 45         141 $hi =~ s/\*\-\>//;
170 45         134 $ho = ' 'x(length($hi) - length($ho)).$ho;
171 45         112 $hi =~ s/\<\-\*//;
172            
173 45         86 $hit_string .= $hi;
174 45         176 $homology_string .= $ho;
175             }
176            
177 23         65 $self->_fields->{query_string} = $query_string;
178 23         57 $self->_fields->{hit_string} = $hit_string;
179 23         97 $homology_string =~ s/ $//;
180 23         51 $self->_fields->{homology_string} = $homology_string;
181            
182 23         99 ($self->{_query_gaps}) = $query_string =~ tr/-//;
183 23         62 ($self->{_hit_gaps}) = $hit_string =~ tr/.//;
184 23         106 ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps};
185             }
186            
187 25         55 $self->_fields->{alignment} = 1; # stop this method being called again
188             }
189              
190             # seq_inds related methods, all just need seq_inds field to have been gotten
191             sub _discover_seq_inds {
192 22     22   43 my $self = shift;
193 22         57 my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'),
194             $self->get_field('query_string'),
195             $self->get_field('hit_string') );
196            
197             # (code largely lifted from GenericHSP)
198            
199             # Using hashes to avoid saving duplicate residue numbers.
200 22         61 my %identicalList_query = ();
201 22         36 my %identicalList_sbjct = ();
202 22         30 my %conservedList_query = ();
203 22         29 my %conservedList_sbjct = ();
204 22         35 my @gapList_query = ();
205 22         22 my @gapList_sbjct = ();
206 22         27 my %nomatchList_query = ();
207 22         18 my %nomatchList_sbjct = ();
208            
209 22         38 my $resCount_query = $self->get_field('query_end');
210 22         48 my $resCount_sbjct = $self->get_field('hit_end');
211            
212 22         43 my ($mchar, $schar, $qchar);
213 22         81 while ($mchar = chop($seqString) ) {
214 1705         1947 ($qchar, $schar) = (chop($qseq), chop($sseq));
215            
216 1705 100 66     4222 if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') {
    100 66        
217 616         828 $conservedList_query{ $resCount_query } = 1;
218 616         765 $conservedList_sbjct{ $resCount_sbjct } = 1;
219             }
220             elsif ($mchar eq ' ') {
221 484         644 $nomatchList_query{ $resCount_query } = 1;
222 484         605 $nomatchList_sbjct{ $resCount_sbjct } = 1;
223             }
224             else {
225 605         803 $identicalList_query{ $resCount_query } = 1;
226 605         730 $identicalList_sbjct{ $resCount_sbjct } = 1;
227             }
228            
229 1705 100       1895 if ($qchar eq '-') {
230 143         189 push(@gapList_query, $resCount_query);
231             }
232             else {
233 1562         1448 $resCount_query -= 1;
234             }
235 1705 100       1806 if ($schar eq '.') {
236 11         25 push(@gapList_sbjct, $resCount_sbjct);
237             }
238             else {
239 1694         2409 $resCount_sbjct -= 1;
240             }
241             }
242            
243 22         82 my $fields = $self->_fields;
244 22         239 $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ];
  2234         2066  
245 22         134 $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ];
  2212         2099  
246 22         107 $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ];
  1570         1442  
247 22         75 $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ];
248 22         108 $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ];
  2204         2078  
249 22         132 $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ];
  2259         2247  
250 22         124 $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ];
  1121         1091  
251 22         104 $fields->{query_gap_inds} = [ reverse @gapList_query ];
252            
253 22         286 $fields->{seq_inds} = 1;
254             }
255              
256             =head2 query
257              
258             Title : query
259             Usage : my $query = $hsp->query
260             Function: Returns a SeqFeature representing the query in the HSP
261             Returns : L
262             Args : none
263              
264             =cut
265              
266             sub query {
267 50     50 1 6486 my $self = shift;
268 50 100       129 unless ($self->{_created_query}) {
269 4         19 $self->SUPER::query( new Bio::SeqFeature::Similarity
270             ('-primary' => $self->primary_tag,
271             '-start' => $self->get_field('query_start'),
272             '-end' => $self->get_field('query_end'),
273             '-expect' => $self->get_field('evalue'),
274             '-score' => $self->get_field('score'),
275             '-strand' => 1,
276             '-seq_id' => $self->get_field('query_name'),
277             #'-seqlength'=> $self->get_field('query_length'), (not known)
278             '-source' => $self->get_field('algorithm'),
279             '-seqdesc' => $self->get_field('query_description')
280             ) );
281 4         29 $self->{_created_query} = 1;
282             }
283 50         196 return $self->SUPER::query(@_);
284             }
285              
286             =head2 hit
287              
288             Title : hit
289             Usage : my $hit = $hsp->hit
290             Function: Returns a SeqFeature representing the hit in the HSP
291             Returns : L
292             Args : [optional] new value to set
293              
294             =cut
295              
296             sub hit {
297 60     60 1 6987 my $self = shift;
298 60 100       173 unless ($self->{_created_hit}) {
299             # the full length isn't always known (given in the report), but don't
300             # warn about the missing info all the time
301 6         29 my $verbose = $self->parent->parent->parent->verbose;
302 6         27 $self->parent->parent->parent->verbose(-1);
303 6         24 my $seq_length = $self->get_field('length');
304 6         25 $self->parent->parent->parent->verbose($verbose);
305            
306 6 100       31 $self->SUPER::hit( new Bio::SeqFeature::Similarity
307             ('-primary' => $self->primary_tag,
308             '-start' => $self->get_field('hit_start'),
309             '-end' => $self->get_field('hit_end'),
310             '-expect' => $self->get_field('evalue'),
311             '-score' => $self->get_field('score'),
312             '-strand' => 1,
313             '-seq_id' => $self->get_field('name'),
314             $seq_length ? ('-seqlength' => $seq_length) : (),
315             '-source' => $self->get_field('algorithm'),
316             '-seqdesc' => $self->get_field('description')
317             ) );
318 6         24 $self->{_created_hit} = 1;
319             }
320 60         182 return $self->SUPER::hit(@_);
321             }
322              
323             =head2 gaps
324              
325             Title : gaps
326             Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
327             Function : Get the number of gaps in the query, hit, or total alignment.
328             Returns : Integer, number of gaps or 0 if none
329             Args : 'query' = num conserved / length of query seq (without gaps)
330             'hit' = num conserved / length of hit seq (without gaps)
331             'total' = num conserved / length of alignment (with gaps)
332             default = 'total'
333              
334             =cut
335              
336             sub gaps {
337 14     14 1 3605 my ($self, $type) = @_;
338            
339 14 50       50 $type = lc $type if defined $type;
340 14 50 33     153 $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
      33        
341 14 50       30 $type = 'hit' if $type =~ /sbjct|subject/;
342            
343 14         49 $self->get_field('alignment'); # make sure gaps have been calculated
344            
345 14         64 return $self->{'_'.$type.'_gaps'};
346             }
347              
348             =head2 pvalue
349              
350             Title : pvalue
351             Usage : my $pvalue = $hsp->pvalue();
352             Function: Returns the P-value for this HSP
353             Returns : undef (Hmmpfam reports do not have p-values)
354             Args : none
355              
356             =cut
357              
358             # noop
359       4 1   sub pvalue { }
360              
361             1;