File Coverage

Bio/Search/HSP/HmmpfamHSP.pm
Criterion Covered Total %
statement 109 109 100.0
branch 22 26 84.6
condition 7 15 46.6
subroutine 9 9 100.0
pod 5 5 100.0
total 152 164 92.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Search::HSP::HmmpfamHSP
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Sendu Bala
7             #
8             # Copyright Sendu Bala
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Search::HSP::HmmpfamHSP - A parser and HSP object for hmmpfam hsps
17              
18             =head1 SYNOPSIS
19              
20             # generally we use Bio::SearchIO to build these objects
21             use Bio::SearchIO;
22             my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23             -file => 'result.hmmer');
24              
25             while (my $result = $in->next_result) {
26             while (my $hit = $result->next_hit) {
27             print $hit->name, "\n";
28             print $hit->score, "\n";
29             print $hit->significance, "\n";
30              
31             while (my $hsp = $hit->next_hsp) {
32             # process HSPI objects
33             }
34             }
35             }
36              
37             =head1 DESCRIPTION
38              
39             This object implements a parser for hmmpfam hsp output, a program in the HMMER
40             package.
41              
42             =head1 FEEDBACK
43              
44             =head2 Mailing Lists
45              
46             User feedback is an integral part of the evolution of this and other
47             Bioperl modules. Send your comments and suggestions preferably to
48             the Bioperl mailing list. Your participation is much appreciated.
49              
50             bioperl-l@bioperl.org - General discussion
51             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52              
53             =head2 Support
54              
55             Please direct usage questions or support issues to the mailing list:
56              
57             I
58              
59             rather than to the module maintainer directly. Many experienced and
60             reponsive experts will be able look at the problem and quickly
61             address it. Please include a thorough description of the problem
62             with code and data examples if at all possible.
63              
64             =head2 Reporting Bugs
65              
66             Report bugs to the Bioperl bug tracking system to help us keep track
67             of the bugs and their resolution. Bug reports can be submitted via the
68             web:
69              
70             https://github.com/bioperl/bioperl-live/issues
71              
72             =head1 AUTHOR - Sendu Bala
73              
74             Email bix@sendu.me.uk
75              
76             =head1 APPENDIX
77              
78             The rest of the documentation details each of the object methods.
79             Internal methods are usually preceded with a _
80              
81             =cut
82              
83             # Let the code begin...
84              
85             package Bio::Search::HSP::HmmpfamHSP;
86              
87 1     1   3 use strict;
  1         1  
  1         23  
88 1     1   2 use base qw(Bio::Search::HSP::PullHSPI);
  1         1  
  1         470  
89              
90             =head2 new
91              
92             Title : new
93             Usage : my $obj = Bio::Search::HSP::HmmpfamHSP->new();
94             Function: Builds a new Bio::Search::HSP::HmmpfamHSP object.
95             Returns : Bio::Search::HSP::HmmpfamHSP
96             Args : -chunk => [Bio::Root::IO, $start, $end] (required if no -parent)
97             -parent => Bio::PullParserI object (required if no -chunk)
98             -hsp_data => array ref with [rank query_start query_end hit_start
99             hit_end score evalue]
100              
101             where the array ref provided to -chunk contains an IO object
102             for a filehandle to something representing the raw data of the
103             hsp, and $start and $end define the tell() position within the
104             filehandle that the hsp data starts and ends (optional; defaults
105             to start and end of the entire thing described by the filehandle)
106              
107             =cut
108              
109             sub new {
110 34     34 1 51 my ($class, @args) = @_;
111 34         80 my $self = $class->SUPER::new(@args);
112            
113 34         71 $self->_setup(@args);
114            
115 34         48 my $fields = $self->_fields;
116 34         39 foreach my $field (qw( alignment )) {
117 34         50 $fields->{$field} = undef;
118             }
119            
120 34         59 my $hsp_data = $self->_raw_hsp_data;
121 34 50 33     125 if ($hsp_data && ref($hsp_data) eq 'ARRAY') {
122 34         22 my @hsp_data = @{$hsp_data}; # don't alter the reference
  34         83  
123 34         40 foreach my $field (qw(rank query_start query_end hit_start hit_end score evalue)) {
124 238         238 $fields->{$field} = shift(@hsp_data);
125             }
126             }
127            
128 34         201 $self->_dependencies( { ( query_string => 'alignment',
129             hit_string => 'alignment',
130             homology_string => 'alignment',
131             hit_identical_inds => 'seq_inds',
132             hit_conserved_inds => 'seq_inds',
133             hit_nomatch_inds => 'seq_inds',
134             hit_gap_inds => 'seq_inds',
135             query_identical_inds => 'seq_inds',
136             query_conserved_inds => 'seq_inds',
137             query_nomatch_inds => 'seq_inds',
138             query_gap_inds => 'seq_inds' ) } );
139            
140 34         87 return $self;
141             }
142              
143             #
144             # PullParserI discovery methods so we can answer all HitI questions
145             #
146              
147             sub _discover_alignment {
148 25     25   20 my $self = shift;
149 25         36 my $alignments_hash = $self->get_field('alignments');
150            
151 25         35 my $identifier = $self->get_field('name').'~~~~'.$self->get_field('rank');
152            
153 25         56 while (! defined $alignments_hash->{$identifier}) {
154 24 100       38 last unless $self->parent->parent->_next_alignment;
155             }
156 25         23 my $alignment = $alignments_hash->{$identifier};
157            
158 25 100       38 if ($alignment) {
159             # work out query, hit and homology strings, and some stats
160             # (quicker to do this all at once instead of each method working on
161             # $alignment string itself)
162            
163 23         14 my ($query_string, $hit_string, $homology_string);
164 23         265 while ($alignment =~ /\s+(\S+)\n\s+(\S.+)\n\s+\S+\s+\d+\s+(\S+)\s+\d/gm) {
165 45         67 my $hi = $1;
166 45         43 my $ho = $2;
167 45         59 $query_string .= $3;
168            
169 45         65 $hi =~ s/\*\-\>//;
170 45         69 $ho = ' 'x(length($hi) - length($ho)).$ho;
171 45         61 $hi =~ s/\<\-\*//;
172            
173 45         45 $hit_string .= $hi;
174 45         118 $homology_string .= $ho;
175             }
176            
177 23         39 $self->_fields->{query_string} = $query_string;
178 23         27 $self->_fields->{hit_string} = $hit_string;
179 23         73 $homology_string =~ s/ $//;
180 23         33 $self->_fields->{homology_string} = $homology_string;
181            
182 23         43 ($self->{_query_gaps}) = $query_string =~ tr/-//;
183 23         27 ($self->{_hit_gaps}) = $hit_string =~ tr/.//;
184 23         42 ($self->{_total_gaps}) = $self->{_query_gaps} + $self->{_hit_gaps};
185             }
186            
187 25         33 $self->_fields->{alignment} = 1; # stop this method being called again
188             }
189              
190             # seq_inds related methods, all just need seq_inds field to have been gotten
191             sub _discover_seq_inds {
192 22     22   16 my $self = shift;
193 22         33 my ($seqString, $qseq, $sseq) = ( $self->get_field('homology_string'),
194             $self->get_field('query_string'),
195             $self->get_field('hit_string') );
196            
197             # (code largely lifted from GenericHSP)
198            
199             # Using hashes to avoid saving duplicate residue numbers.
200 22         33 my %identicalList_query = ();
201 22         18 my %identicalList_sbjct = ();
202 22         17 my %conservedList_query = ();
203 22         17 my %conservedList_sbjct = ();
204 22         19 my @gapList_query = ();
205 22         17 my @gapList_sbjct = ();
206 22         16 my %nomatchList_query = ();
207 22         22 my %nomatchList_sbjct = ();
208            
209 22         25 my $resCount_query = $self->get_field('query_end');
210 22         31 my $resCount_sbjct = $self->get_field('hit_end');
211            
212 22         17 my ($mchar, $schar, $qchar);
213 22         40 while ($mchar = chop($seqString) ) {
214 1705         1272 ($qchar, $schar) = (chop($qseq), chop($sseq));
215            
216 1705 100 66     4757 if ($mchar eq '+' || $mchar eq '.' || $mchar eq ':') {
    100 66        
217 616         498 $conservedList_query{ $resCount_query } = 1;
218 616         465 $conservedList_sbjct{ $resCount_sbjct } = 1;
219             }
220             elsif ($mchar eq ' ') {
221 484         376 $nomatchList_query{ $resCount_query } = 1;
222 484         387 $nomatchList_sbjct{ $resCount_sbjct } = 1;
223             }
224             else {
225 605         472 $identicalList_query{ $resCount_query } = 1;
226 605         484 $identicalList_sbjct{ $resCount_sbjct } = 1;
227             }
228            
229 1705 100       1463 if ($qchar eq '-') {
230 143         120 push(@gapList_query, $resCount_query);
231             }
232             else {
233 1562         826 $resCount_query -= 1;
234             }
235 1705 100       1322 if ($schar eq '.') {
236 11         21 push(@gapList_sbjct, $resCount_sbjct);
237             }
238             else {
239 1694         1979 $resCount_sbjct -= 1;
240             }
241             }
242            
243 22         46 my $fields = $self->_fields;
244 22         145 $fields->{hit_identical_inds} = [ sort { $a <=> $b } keys %identicalList_sbjct ];
  2185         1363  
245 22         86 $fields->{hit_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_sbjct ];
  2227         1321  
246 22         73 $fields->{hit_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_sbjct ];
  1603         940  
247 22         39 $fields->{hit_gap_inds} = [ reverse @gapList_sbjct ];
248 22         65 $fields->{query_identical_inds} = [ sort { $a <=> $b } keys %identicalList_query ];
  2183         1275  
249 22         81 $fields->{query_conserved_inds} = [ sort { $a <=> $b } keys %conservedList_query ];
  2263         1329  
250 22         60 $fields->{query_nomatch_inds} = [ sort { $a <=> $b } keys %nomatchList_query ];
  1127         700  
251 22         57 $fields->{query_gap_inds} = [ reverse @gapList_query ];
252            
253 22         207 $fields->{seq_inds} = 1;
254             }
255              
256             =head2 query
257              
258             Title : query
259             Usage : my $query = $hsp->query
260             Function: Returns a SeqFeature representing the query in the HSP
261             Returns : L
262             Args : none
263              
264             =cut
265              
266             sub query {
267 50     50 1 2988 my $self = shift;
268 50 100       85 unless ($self->{_created_query}) {
269 4         12 $self->SUPER::query( new Bio::SeqFeature::Similarity
270             ('-primary' => $self->primary_tag,
271             '-start' => $self->get_field('query_start'),
272             '-end' => $self->get_field('query_end'),
273             '-expect' => $self->get_field('evalue'),
274             '-score' => $self->get_field('score'),
275             '-strand' => 1,
276             '-seq_id' => $self->get_field('query_name'),
277             #'-seqlength'=> $self->get_field('query_length'), (not known)
278             '-source' => $self->get_field('algorithm'),
279             '-seqdesc' => $self->get_field('query_description')
280             ) );
281 4         12 $self->{_created_query} = 1;
282             }
283 50         91 return $self->SUPER::query(@_);
284             }
285              
286             =head2 hit
287              
288             Title : hit
289             Usage : my $hit = $hsp->hit
290             Function: Returns a SeqFeature representing the hit in the HSP
291             Returns : L
292             Args : [optional] new value to set
293              
294             =cut
295              
296             sub hit {
297 60     60 1 3125 my $self = shift;
298 60 100       102 unless ($self->{_created_hit}) {
299             # the full length isn't always known (given in the report), but don't
300             # warn about the missing info all the time
301 6         14 my $verbose = $self->parent->parent->parent->verbose;
302 6         13 $self->parent->parent->parent->verbose(-1);
303 6         10 my $seq_length = $self->get_field('length');
304 6         11 $self->parent->parent->parent->verbose($verbose);
305            
306 6 100       18 $self->SUPER::hit( new Bio::SeqFeature::Similarity
307             ('-primary' => $self->primary_tag,
308             '-start' => $self->get_field('hit_start'),
309             '-end' => $self->get_field('hit_end'),
310             '-expect' => $self->get_field('evalue'),
311             '-score' => $self->get_field('score'),
312             '-strand' => 1,
313             '-seq_id' => $self->get_field('name'),
314             $seq_length ? ('-seqlength' => $seq_length) : (),
315             '-source' => $self->get_field('algorithm'),
316             '-seqdesc' => $self->get_field('description')
317             ) );
318 6         15 $self->{_created_hit} = 1;
319             }
320 60         117 return $self->SUPER::hit(@_);
321             }
322              
323             =head2 gaps
324              
325             Title : gaps
326             Usage : my $gaps = $hsp->gaps( ['query'|'hit'|'total'] );
327             Function : Get the number of gaps in the query, hit, or total alignment.
328             Returns : Integer, number of gaps or 0 if none
329             Args : 'query' = num conserved / length of query seq (without gaps)
330             'hit' = num conserved / length of hit seq (without gaps)
331             'total' = num conserved / length of alignment (with gaps)
332             default = 'total'
333              
334             =cut
335              
336             sub gaps {
337 14     14 1 2301 my ($self, $type) = @_;
338            
339 14 50       30 $type = lc $type if defined $type;
340 14 50 33     109 $type = 'total' if (! defined $type || $type eq 'hsp' || $type !~ /query|hit|subject|sbjct|total/);
      33        
341 14 50       29 $type = 'hit' if $type =~ /sbjct|subject/;
342            
343 14         28 $self->get_field('alignment'); # make sure gaps have been calculated
344            
345 14         42 return $self->{'_'.$type.'_gaps'};
346             }
347              
348             =head2 pvalue
349              
350             Title : pvalue
351             Usage : my $pvalue = $hsp->pvalue();
352             Function: Returns the P-value for this HSP
353             Returns : undef (Hmmpfam reports do not have p-values)
354             Args : none
355              
356             =cut
357              
358             # noop
359       4 1   sub pvalue { }
360              
361             1;