File Coverage

Bio/Tools/QRNA.pm

Criterion	Covered	Total	%
statement	114	124	91.9
branch	65	74	87.8
condition	3	5	60.0
subroutine	14	14	100.0
pod	7	7	100.0
total	203	224	90.6

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::Tools::QRNA
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Jason Stajich
7							#
8							# Copyright Jason Stajich
9							#
10							# You may distribute this module under the same terms as perl itself
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::Tools::QRNA - A Parser for qrna output
17
18							=head1 SYNOPSIS
19
20							use Bio::Tools::QRNA;
21							my $parser = Bio::Tools::QRNA->new(-file => $qrnaoutput);
22							while( my $feature = $parser->next_feature ) {
23							# do something here
24							}
25
26							=head1 DESCRIPTION
27
28							Parses QRNA output (E.Rivas:
29							http://selab.janelia.org/software.html
30							ftp://selab.janelia.org/pub/software/qrna/).
31
32							This module is not complete, but currently it packs information from
33							each QRNA alignment into a single Bio::SeqFeature::Generic object.
34
35							Not all options for QRNA output have been tested or tried. It has
36							been tested on sliding window output (-w -x) and shuffled output (-b
37							or -B).
38
39							See t/QRNA.t for example usage.
40
41							At some point we may have more complicated feature object which will
42							support this data rather than forcing most of the information into
43							tag/value pairs in a SeqFeature::Generic.
44
45							Running with -verbose =E 1 will store extra data in the feature. The
46							entire unparsed entry for a particular feature will be stored as a
47							string in the tag 'entry' it is accessible via:
48
49							my ($entry) = $f->each_tag_value('entry');
50
51							The winning model for any given alignment test will be the name stored
52							in the primary_tag field of feature. The bit score will stored in the
53							score field. The logoddpost is available via the a tag/value pair.
54							This example code will show how to print out the score and log odds
55							post for each model.
56
57							# assuming you got a feature already
58							print "model score logoddspost\n";
59							foreach my $model ( qw(OTH COD RNA) ) {
60							my ($score) = $f->get_tag_values("$model\_score");
61							my ($logoddspost) = $f->get_tag_values("$model\_logoddspost");
62							print "$model $score $logoddspost\n";
63							}
64
65							The start and end of the alignment for both the query and hit sequence
66							are available through the L interface,
67							specifically L and
68							L. Additionally if you have
69							run QRNA with an input file which has the location of the alignment
70							stored in the FASTA filename as in (ID/START-END) which is the default
71							output format from L produced alignment output,
72							this module will re-number start/end for the two sequences so they are
73							in the actual coordinates of the sequence rather than the relative
74							coordinates of the alignment. You may find the bioperl utillity
75							script search2alnblocks useful in creating your input files for QRNA.
76
77							Some other words of warning, QRNA uses a 0 based numbering system for
78							sequence locations, Bioperl uses a 1 based system. You'll notice that
79							locations will be +1 they are reported in the raw QRNA output.
80
81							=head1 FEEDBACK
82
83							=head2 Mailing Lists
84
85							User feedback is an integral part of the evolution of this and other
86							Bioperl modules. Send your comments and suggestions preferably to
87							the Bioperl mailing list. Your participation is much appreciated.
88
89							bioperl-l@bioperl.org - General discussion
90							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
91
92							=head2 Support
93
94							Please direct usage questions or support issues to the mailing list:
95
96							I
97
98							rather than to the module maintainer directly. Many experienced and
99							reponsive experts will be able look at the problem and quickly
100							address it. Please include a thorough description of the problem
101							with code and data examples if at all possible.
102
103							=head2 Reporting Bugs
104
105							Report bugs to the Bioperl bug tracking system to help us keep track
106							of the bugs and their resolution. Bug reports can be submitted via
107							the web:
108
109							https://github.com/bioperl/bioperl-live/issues
110
111							=head1 AUTHOR - Jason Stajich
112
113							Email jason-at-bioperl-dot-org
114
115							=head1 APPENDIX
116
117							The rest of the documentation details each of the object methods.
118							Internal methods are usually preceded with a _
119
120							=cut
121
122
123							# Let the code begin...
124
125
126							package Bio::Tools::QRNA;
127	1			1		495	use vars qw(@Models);
	1					1
	1					40
128	1			1		3	use strict;
	1					1
	1					16
129
130	1			1		378	use Bio::SeqFeature::Generic;
	1					2
	1					28
131	1			1		295	use Bio::SeqFeature::FeaturePair;
	1					2
	1					26
132
133	1			1		4	use base qw(Bio::Root::IO Bio::SeqAnalysisParserI);
	1					1
	1					1292
134							@Models = qw(OTH COD RNA);
135
136							=head2 new
137
138							Title : new
139							Usage : my $obj = Bio::Tools::QRNA->new();
140							Function: Builds a new Bio::Tools::QRNA object
141							Returns : an instance of Bio::Tools::QRNA
142							Args : -fh/-file filehandle/filename standard input for
143							Bio::Root:IO objects
144
145							=cut
146
147							=head2 next_feature
148
149							Title : next_feature
150							Usage : my $feature = $parser->next_feature
151							Function: Get the next QRNA feature
152							Returns :
153							Args :
154
155
156							=cut
157
158							sub next_feature {
159	48			48	1	46	my ($self) = @_;
160	48	100				37	my $f = shift @{$self->{'_parsed_features'} \|\| []};
	48					89
161	48	100	66			106	if( ! defined $f && $self->_parse_pair ) {
162	47	50				35	$f = shift @{$self->{'_parsed_features'} \|\| []};
	47					92
163							}
164	48					78	return $f;
165							}
166
167							sub _parse_pair {
168	48			48		41	my ($self,@args) = @_;
169	48					33	my (@features,%data);
170	48					31	my $seenstart = 0;
171	48					85	while( defined( $_ = $self->_readline) ) {
172	927	100				1298	next if( /^\#\-\-/o );
173	915	100				5575	if( /^\#\s+(qrna)\s+(\S+)\s+$([^$]+)\)/o ) {
		100
		100
		100
		100
		100
		100
		100
		100
		100
		100
		100
174	2					5	$self->program_name($1);
175	2					5	$self->program_version($2);
176	2					4	$self->program_date($3);
177							} elsif( /^\#\s+(PAM model)\s+\=\s+(.+)\s+$/o ) {
178	2					7	$self->PAM_model($2);
179							} elsif( /^\#\s+(RNA model)\s+\=\s+(\S+)/o ) {
180	2					6	$self->RNA_model($2);
181							} elsif( /^\#\s+(seq file)\s+\=\s+(.+)\s+$/o ) {
182	2					7	$self->seq_file($2);
183							} elsif( /^\#\s+(\d+)\s+\[([\-+])\s+strand\]/o ) {
184	94	100				121	if( $seenstart ) {
185	46	50				71	if( $data{'alignment_len'} ) {
186	46					82	push @features, $self->_make_feature(\%data);
187							}
188	46					91	$self->_pushback($_);
189	46					49	last;
190							}
191	48					47	$seenstart = 1;
192							} elsif( /^\#/ ) {
193	4					8	next;
194							} elsif( />(\S+)\s+$(\d+)$/ ) {
195	96	100				61	if( @{$data{'seqs'} \|\| []} == 2 ) {
	96	50				256
196	0					0	$self->warn( "already seen seqs ".join(' ', ,map { $_->[0] }
197	0					0	@{$data{'seqs'}}). "\n");
	0					0
198							} else {
199	96					66	push @{$data{'seqs'}}, [$1,$2];
	96					256
200							}
201							} elsif( /^length alignment:\s+(\d+)\s+$id\=(\d+(\.\d+)?)$/o ) {
202
203	51	100				78	if( $data{'alignment_len'} ) {
204	3					7	push @features, $self->_make_feature(\%data);
205							# reset all the data but the 'seqs' field
206	3					23	%data = ( 'seqs' => $data{'seqs'} );
207							}
208
209	51	50				88	if( /$((sre_)?shuffled)$/ ) {
210	0					0	$data{'shuffled'} = $1;
211							}
212	51					88	$data{'alignment_len'} = $1;
213	51					56	$data{'alignment_pid'} = $2;
214							} elsif ( /^pos([XY]):\s+(\d+)\-(\d+)\s+\[(\d+)\-(\d+)\]$(\d+)$\s+
215							\-\-\s+$(\S+\s+\S+\s+\S+\s+\S+)$/ox ) {
216	102					391	$data{"seq\_$1"}->{'aln'} = [ $2,$3, $4,$5, $6];
217	102					83	@{$data{"seq\_$1"}->{'base_comp'}} = split(/\s+/,$7);
	102					420
218							} elsif( /^winner\s+\=\s+(\S{3})/ ) {
219	51					87	$data{'winning_model'} = $1;
220							} elsif( /^(\S{3})\s+ends\s+\=\s+(\-?\d+)\s+(\-?\d+)/ ) {
221							# QRNA is 0-based
222							# Bioperl is 1 based
223	153					386	$data{'model_location'}->{$1} = [ $2,$3 ];
224							} elsif( /^\s+(logoddspost)?OTH\s+\=\s+/ox ) {
225	102					274	while( /(\S+)\s+\=\s+(\-?\d+(\.\d+))/g ) {
226	306					409	my ($model,$score)= ($1,$2);
227	306	100				446	if( $model =~ s/^logoddspost// ) {
228	153					435	$data{'model_scores'}->{'logoddspost'}->{$model} = $score;
229							} else {
230	153					498	$data{'model_scores'}->{'bits'}->{$model} = $score;
231							}
232							}
233							}
234	865					1723	$data{'entry'} .= $_;
235							}
236	48	100				75	if( @features ) {
237	47					36	push @{$self->{'_parsed_features'}}, @features;
	47					57
238	47					409	return scalar @features;
239							}
240	1					9	return 0;
241							}
242
243							=head2 PAM_model
244
245							Title : PAM_model
246							Usage : $obj->PAM_model($newval)
247							Function:
248							Example :
249							Returns : value of PAM_model (a scalar)
250							Args : on set, new value (a scalar or undef, optional)
251
252
253							=cut
254
255							sub PAM_model{
256	3			3	1	7	my $self = shift;
257	3	100				11	return $self->{'PAM_model'} = shift if @_;
258	1					4	return $self->{'PAM_model'};
259							}
260
261							=head2 RNA_model
262
263							Title : RNA_model
264							Usage : $obj->RNA_model($newval)
265							Function:
266							Example :
267							Returns : value of RNA_model (a scalar)
268							Args : on set, new value (a scalar or undef, optional)
269
270
271							=cut
272
273							sub RNA_model{
274	3			3	1	6	my $self = shift;
275
276	3	100				14	return $self->{'RNA_model'} = shift if @_;
277	1					4	return $self->{'RNA_model'};
278							}
279
280							=head2 seq_file
281
282							Title : seq_file
283							Usage : $obj->seq_file($newval)
284							Function:
285							Example :
286							Returns : value of seq_file (a scalar)
287							Args : on set, new value (a scalar or undef, optional)
288
289
290							=cut
291
292							sub seq_file{
293	3			3	1	4	my $self = shift;
294
295	3	100				9	return $self->{'seq_file'} = shift if @_;
296	1					4	return $self->{'seq_file'};
297							}
298
299
300							=head2 program_name
301
302							Title : program_name
303							Usage : $obj->program_name($newval)
304							Function:
305							Example :
306							Returns : value of program_name (a scalar)
307							Args : on set, new value (a scalar or undef, optional)
308
309
310							=cut
311
312							sub program_name{
313	52			52	1	37	my $self = shift;
314
315	52	100				74	return $self->{'program_name'} = shift if @_;
316	50		50			345	return $self->{'program_name'} \|\| 'qrna';
317							}
318
319							=head2 program_version
320
321							Title : program_version
322							Usage : $obj->program_version($newval)
323							Function:
324							Example :
325							Returns : value of program_version (a scalar)
326							Args : on set, new value (a scalar or undef, optional)
327
328
329							=cut
330
331							sub program_version{
332	3			3	1	5	my $self = shift;
333
334	3	100				11	return $self->{'program_version'} = shift if @_;
335	1					5	return $self->{'program_version'};
336							}
337
338							=head2 program_date
339
340							Title : program_date
341							Usage : $obj->program_date($newval)
342							Function:
343							Example :
344							Returns : value of program_date (a scalar)
345							Args : on set, new value (a scalar or undef, optional)
346
347
348							=cut
349
350							sub program_date{
351	3			3	1	3	my $self = shift;
352	3	100				12	return $self->{'program_date'} = shift if @_;
353	1					4	return $self->{'program_date'};
354							}
355
356							sub _make_feature {
357	49			49		44	my ($self,$data) = @_;
358	49					44	my ($qoffset,$hoffset) = (1,1);
359							# when you run qrna and have produced ID/START-END
360							# formatted input strings we can remap the location
361							# to the original
362
363							# name is stored as the first entry in the seq array ref
364							my ($qid,$hid) = ( $data->{'seqs'}->[0]->[0],
365	49					67	$data->{'seqs'}->[1]->[0]);
366	49	100				73	if( $qid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
367	3					6	($qid,$qoffset) = ($1,$2);
368							}
369	49	100				78	if( $hid =~ /(\S+)\/(\d+)\-(\d+)/ ) {
370	3					4	($hid,$hoffset) = ($1,$2);
371							}
372
373	49					110	my $f = Bio::SeqFeature::FeaturePair->new();
374
375	49					47	my ($s,$e) = @{$data->{'model_location'}->{$data->{'winning_model'}}};
	49					105
376							my $qf = Bio::SeqFeature::Generic->new
377							( -primary_tag => $data->{'winning_model'},
378							-source_tag => $self->program_name,
379	49	100				114	-score => $data->{'model_scores'}->{'bits'}->{$data->{'winning_model'}},
380							-start => $s+$qoffset,
381							-end => $e+$qoffset,
382							-seq_id => $qid,
383							-strand => ($s < $e ) ? 1 : -1,
384							);
385
386	49					122	my $hf = Bio::SeqFeature::Generic->new
387							( -primary_tag => $qf->primary_tag,
388							-source_tag => $qf->source_tag,
389							-score => $qf->score,
390							-seq_id => $hid,
391							-start => $s + $hoffset,
392							-end => $e + $hoffset,
393							-strand => $qf->strand,
394							);
395	49					124	$f->feature1($qf);
396	49					77	$f->feature2($hf);
397	49					81	$f->add_tag_value('alignment_len', $data->{'alignment_len'});
398	49					72	$f->add_tag_value('alignment_pid', $data->{'alignment_pid'});
399							# store the other model scores and data
400	49					70	foreach my $model ( @Models ) {
401	147					296	$f->add_tag_value("$model\_score", $data->{'model_scores'}->{'bits'}->{$model});
402	147					261	$f->add_tag_value("$model\_logoddspost", $data->{'model_scores'}->{'logoddspost'}->{$model});
403	147	50				204	if( ! $data->{'model_location'}->{$model} ) {
404	0	0				0	if( $self->verbose > 0 ) {
405	0					0	$self->debug( $data->{'entry'} );
406							}
407							$self->throw("no location parsed for $model in ",
408	0					0	(map { @$_ } @{$data->{'seqs'}}), " ", $f->start, " ", $f->end);
	0					0
	0					0
409							} else {
410							$f->add_tag_value("$model\_positions",
411	147					136	join("..",@{$data->{'model_location'}->{$model} }));
	147					302
412							}
413							}
414							# probably a better way to store this - as
415							# a seq object perhaps
416	49					48	$f->add_tag_value('seq1', @{$data->{'seqs'}->[0]});
	49					92
417	49					38	$f->add_tag_value('seq2', @{$data->{'seqs'}->[1]});
	49					76
418	49	50				91	$f->add_tag_value('entry', $data->{'entry'}) if $self->verbose > 0;
419	49	50				74	if( $data->{'shuffled'} ) {
420	0					0	$f->add_tag_value('shuffled', $data->{'shuffled'});
421							}
422	49					79	return $f;
423							}
424							1;