File Coverage

Bio/SearchIO/hmmer_pull.pm

Criterion	Covered	Total	%
statement	50	60	83.3
branch	9	24	37.5
condition	6	10	60.0
subroutine	9	10	90.0
pod	3	3	100.0
total	77	107	71.9

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::SearchIO::hmmer_pull
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Sendu Bala
7							#
8							# Copyright Sendu Bala
9							#
10							# You may distribute this module under the same terms as perl itself
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::SearchIO::hmmer_pull - A parser for HMMER output
17
18							=head1 SYNOPSIS
19
20							# do not use this class directly it is available through Bio::SearchIO
21							use Bio::SearchIO;
22							my $in = Bio::SearchIO->new(-format => 'hmmer_pull',
23							-file => 't/data/hmmpfam.bigout');
24							while (my $result = $in->next_result) {
25							# this is a Bio::Search::Result::HmmpfamResult object
26							print $result->query_name(), " for HMM ", $result->hmm_name(), "\n";
27							while (my $hit = $result->next_hit) {
28							print $hit->name(), "\n";
29							while (my $hsp = $hit->next_hsp) {
30							print "length is ", $hsp->length(), "\n";
31							}
32							}
33							}
34
35							=head1 DESCRIPTION
36
37							This object implements a pull-parser for HMMER output. It is fast since it
38							only does work on request (hence 'pull').
39
40							=head1 FEEDBACK
41
42							=head2 Mailing Lists
43
44							User feedback is an integral part of the evolution of this and other
45							Bioperl modules. Send your comments and suggestions preferably to
46							the Bioperl mailing list. Your participation is much appreciated.
47
48							bioperl-l@bioperl.org - General discussion
49							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50
51							=head2 Support
52
53							Please direct usage questions or support issues to the mailing list:
54
55							I
56
57							rather than to the module maintainer directly. Many experienced and
58							reponsive experts will be able look at the problem and quickly
59							address it. Please include a thorough description of the problem
60							with code and data examples if at all possible.
61
62							=head2 Reporting Bugs
63
64							Report bugs to the Bioperl bug tracking system to help us keep track
65							of the bugs and their resolution. Bug reports can be submitted via the
66							web:
67
68							https://github.com/bioperl/bioperl-live/issues
69
70							=head1 AUTHOR - Sendu Bala
71
72							Email bix@sendu.me.uk
73
74							=head1 APPENDIX
75
76							The rest of the documentation details each of the object methods.
77							Internal methods are usually preceded with a _
78
79							=cut
80
81							# Let the code begin...
82
83							package Bio::SearchIO::hmmer_pull;
84
85	1			1		3	use strict;
	1					1
	1					27
86
87
88	1			1		4	use base qw(Bio::SearchIO Bio::PullParserI);
	1					1
	1					430
89
90							=head2 new
91
92							Title : new
93							Usage : my $obj = Bio::SearchIO::hmmer_pull->new();
94							Function: Builds a new Bio::SearchIO::hmmer_pull object
95							Returns : Bio::SearchIO::hmmer_pull
96							Args : -fh/-file => HMMER output filename
97							-format => 'hmmer_pull'
98							-evalue => float or scientific notation number to be used
99							as an evalue cutoff for hits
100							-score => integer or scientific notation number to be used
101							as a score value cutoff for hits
102							-hsps => integer minimum number of hsps (domains) a hit must have
103							-piped_behaviour => 'temp_file'\|'memory'\|'sequential_read'
104
105							-piped_behaviour defines what the parser should do if the input is
106							an unseekable filehandle (eg. piped input), see
107							Bio::PullParserI::chunk for details. Default is 'sequential_read'.
108
109							=cut
110
111							sub _initialize {
112	2			2		4	my ($self, @args) = @_;
113
114							# don't do normal SearchIO initialization
115
116	2					13	my ($writer, $file, $fh, $piped_behaviour, $evalue, $score, $hsps) =
117							$self->_rearrange([qw(WRITER
118							FILE FH
119							PIPED_BEHAVIOUR
120							EVALUE
121							SCORE
122							HSPS)], @args);
123	2	50				10	$self->writer($writer) if $writer;
124
125	2					34	$self->_fields( { ( header => undef,
126							algorithm => undef,
127							algorithm_version => undef,
128							algorithm_reference => '',
129							hmm_file => undef,
130							hmm_name => undef,
131							sequence_file => undef,
132							sequence_database => undef,
133							database_name => undef,
134							database_letters => undef,
135							database_entries => undef,
136							next_result => undef,
137							evalue_cutoff => '[unset]',
138							score_cutoff => '[unset]',
139							hsps_cutoff => '[unset]' ) } );
140
141	2	50				4	$self->_fields->{evalue_cutoff} = $evalue if $evalue;
142	2	50				4	$self->_fields->{score_cutoff} = $score if $score;
143	2	50				5	$self->_fields->{hsps_cutoff} = $hsps if $hsps;
144
145	2					15	$self->_dependencies( { ( algorithm => 'header',
146							algorithm_version => 'header',
147							hmm_file => 'header',
148							hmm_name => 'header',
149							sequence_file => 'header',
150							sequence_database => 'header' ) } );
151
152	2		33			22	$self->chunk($file \|\| $fh \|\| $self->throw("-file or -fh must be supplied"),
			50
153							-piped_behaviour => $piped_behaviour \|\| 'sequential_read');
154							}
155
156							sub _discover_header {
157	2			2		3	my $self = shift;
158	2					7	$self->_chunk_seek(0);
159	2					8	my $header = $self->_get_chunk_by_nol(8);
160	2					5	$self->{_after_header} = $self->_chunk_tell;
161
162	2					13	my ($algo) = $header =~ /^(hmm\S+) - search/m;
163	2					6	$self->_fields->{algorithm} = uc $algo;
164
165	2					10	($self->_fields->{algorithm_version}) = $header =~ /^HMMER\s+?(\S+)/m;
166
167	2					12	($self->_fields->{hmm_file}) = $header =~ /^HMM file:\s.+?(\S+)$/m;
168	2					5	$self->_fields->{hmm_name} = $self->_fields->{hmm_file};
169
170	2					14	($self->_fields->{sequence_file}) = $header =~ /^Sequence (?:file\|database):\s.+?(\S+)$/m;
171	2					6	$self->_fields->{sequence_database} = $self->_fields->{sequence_file};
172
173	2					5	$self->_fields->{header} = 1;
174							}
175
176							sub _discover_database_name {
177	1			1		2	my $self = shift;
178	1					3	my $type = $self->get_field('algorithm');
179
180	1	50				4	if ($type eq 'HMMPFAM') {
		0
181	1					2	$self->_fields->{database_name} = $self->get_field('hmm_file');
182							}
183							elsif ($type eq 'HMMSEARCH') {
184	0					0	$self->_fields->{database_name} = $self->get_field('sequence_file');
185							}
186							}
187
188							sub _discover_next_result {
189	4			4		4	my $self = shift;
190	4					9	my $type = $self->get_field('algorithm'); # also sets _after_header if not set
191
192	4	50				12	if ($type eq 'HMMPFAM') {
		0
193	1			1		411	use Bio::Search::Result::HmmpfamResult;
	1					2
	1					231
194
195	4	50				8	unless ($self->_sequential) {
196	4		66			19	$self->_chunk_seek($self->{_end_of_previous_result} \|\| $self->{_after_header});
197
198	4					16	my ($start, $end) = $self->_find_chunk_by_end("//\n");
199	4	100				10	return if $start == $end;
200	3					10	$self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [($self->chunk, $start, $end)],
201							-parent => $self);
202
203	3					8	$self->{_end_of_previous_result} = $end;
204							}
205							else {
206							# deliberatly don't cache these, which means rewind won't work;
207							# if we cached we may as well have used 'memory' option to
208							# -piped_behaviour
209	0					0	my $chunk = $self->_get_chunk_by_end("//\n");
210	0	0				0	$chunk \|\| return;
211	0					0	$self->_fields->{next_result} = Bio::Search::Result::HmmpfamResult->new(-chunk => [$chunk],
212							-parent => $self);
213							}
214							}
215							elsif ($type eq 'HMMSEARCH') {
216	0					0	$self->throw("Can't handle hmmsearch yet\n");
217							}
218							else {
219	0					0	$self->throw("Unknown report type");
220							}
221							}
222
223							=head2 next_result
224
225							Title : next_result
226							Usage : my $hit = $searchio->next_result;
227							Function: Returns the next Result from a search
228							Returns : Bio::Search::Result::ResultI object
229							Args : none
230
231							=cut
232
233							sub next_result {
234	4			4	1	15	my $self = shift;
235	4		100			10	my $result = $self->get_field('next_result') \|\| return;
236
237	3					6	undef $self->_fields->{next_result};
238
239	3					4	$self->{'_result_count'}++;
240	3					5	return $result;
241							}
242
243							=head2 result_count
244
245							Title : result_count
246							Usage : my $count = $searchio->result_count
247							Function: Returns the number of results we have processed.
248							Returns : integer
249							Args : none
250
251							=cut
252
253							sub result_count {
254	1			1	1	5	my $self = shift;
255	1					4	return $self->{'_result_count'};
256							}
257
258							=head2 rewind
259
260							Title : rewind
261							Usage : $searchio->rewind;
262							Function: Allow one to reset the Result iterator to the beginning, so that
263							next_result() will subsequently return the first result and so on.
264
265							NB: result objects are not cached, so you will get new result objects
266							each time you rewind. Also, note that result_count() counts the
267							number of times you have called next_result(), so will not be able
268							tell you how many results there were in the file if you use rewind().
269
270							Returns : n/a
271							Args : none
272
273							=cut
274
275							sub rewind {
276	0			0	1		my $self = shift;
277	0	0					if ($self->_sequential) {
278	0						$self->warn("rewind has no effect on piped input when you have chosen 'sequential_read' mode");
279							}
280	0						delete $self->{_end_of_previous_result};
281							}
282
283							1;