File Coverage

Bio/Tools/Spidey/Results.pm

Criterion	Covered	Total	%
statement	127	139	91.3
branch	43	52	82.6
condition	2	3	66.6
subroutine	14	15	93.3
pod	9	9	100.0
total	195	218	89.4

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::Tools::Spidey::Results
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Ryan Golhar
7							#
8							# You may distribute this module under the same terms as perl itself
9
10							# POD documentation - main docs before the code
11
12							=head1 NAME
13
14							Bio::Tools::Spidey::Results - Results of a Spidey run
15
16							=head1 SYNOPSIS
17
18							use Bio::Tools::Spidey::Results;
19							my $spidey = Bio::Tools::Spidey::Results->new(-file => 'result.spidey' );
20
21							# or
22
23							my $spidey = Bio::Tools::Spidey::Results->new( -fh => \*INPUT );
24
25							# get the exons before doing anything else
26							my $exonset = $spidey->next_exonset();
27
28							# parse the results
29							my @exons = $exonset->sub_SeqFeature();
30							print "Total no of Exons: ", scalar(@exons), "\n";
31
32							print "Genomic sequence length: ", $spidey->genomic_dna_length(), "\n";
33
34							# $exonset is-a Bio::SeqFeature::Generic with Bio::Tools::Spidey::Exons
35							# as sub features
36							print "Delimited on sequence ", $exonset->seq_id(), " from ",
37							$exonset->start(), " to ", $exonset->end(), "\n";
38
39							foreach my $exon ( $exonset->sub_SeqFeature() ) {
40							# $exon is-a Bio::SeqFeature::FeaturePair
41							print "Exon from ", $exon->start, " to ", $exon->end,
42							" on strand ", $exon->strand(), "\n";
43							# you can get out what it matched using the est_hit attribute
44							my $homol = $exon->est_hit();
45							print "Matched to sequence ", $homol->seq_id,
46							" at ", $homol->start," to ", $homol->end, "\n";
47							}
48
49							# essential if you gave a filename at initialization (otherwise
50							# the file stays open)
51							$spidey->close();
52
53							=head1 DESCRIPTION
54
55							The spidey module provides a parser and results object for spidey
56							output. The spidey results are specialised types of SeqFeatures,
57							meaning you can add them to AnnSeq objects fine, and manipulate them
58							in the "normal" seqfeature manner.
59
60							The spidey Exon objects are Bio::SeqFeature::FeaturePair inherited
61							objects. The $esthit = $exon-Eest_hit() is the alignment as a
62							feature on the matching object (normally, a cDNA), in which the
63							start/end points are where the hit lies.
64
65							To make this module work sensibly you need to run
66
67							spidey -i genomic.fasta -m cDNA.fasta
68
69							=head1 FEEDBACK
70
71							=head2 Mailing Lists
72
73							User feedback is an integral part of the evolution of this and other
74							Bioperl modules. Send your comments and suggestions preferably to one
75							of the Bioperl mailing lists. Your participation is much appreciated.
76
77							bioperl-l@bioperl.org - General discussion
78							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
79
80							=head2 Support
81
82							Please direct usage questions or support issues to the mailing list:
83
84							I
85
86							rather than to the module maintainer directly. Many experienced and
87							reponsive experts will be able look at the problem and quickly
88							address it. Please include a thorough description of the problem
89							with code and data examples if at all possible.
90
91							=head2 Reporting Bugs
92
93							Report bugs to the Bioperl bug tracking system to help us keep track
94							the bugs and their resolution. Bug reports can be submitted via the
95							web:
96
97							https://github.com/bioperl/bioperl-live/issues
98
99							=head1 AUTHOR - Ryan Golhar
100
101							Email golharam@umdnj.edu
102
103							=head1 APPENDIX
104
105							The rest of the documentation details each of the object methods.
106							Internal methods are usually preceded with a _
107
108							=cut
109
110
111							# Let the code begin...
112
113
114							package Bio::Tools::Spidey::Results;
115	1			1		477	use strict;
	1					1
	1					23
116
117	1			1		3	use File::Basename;
	1					1
	1					54
118	1			1		263	use Bio::Root::Root;
	1					1
	1					24
119	1			1		258	use Bio::Tools::Spidey::Exon;
	1					2
	1					31
120
121	1			1		4	use base qw(Bio::Tools::AnalysisResult);
	1					1
	1					278
122
123							sub _initialize_state {
124	2			2		3	my($self,@args) = @_;
125
126							# call the inherited method first
127	2					7	my $make = $self->SUPER::_initialize_state(@args);
128
129							# my ($est_is_first) = $self->_rearrange([qw(ESTFIRST)], @args);
130
131							# delete($self->{'_est_is_first'});
132							# $self->{'_est_is_first'} = $est_is_first if(defined($est_is_first));
133	2					3	$self->analysis_method("Spidey");
134							}
135
136							=head2 analysis_method
137
138							Usage : $spidey->analysis_method();
139							Purpose : Inherited method. Overridden to ensure that the name matches
140							/Spidey/i.
141							Returns : String
142							Argument : n/a
143
144							=cut
145
146							#-------------
147							sub analysis_method {
148							#-------------
149	8			8	1	7	my ($self, $method) = @_;
150	8	50	66			21	if($method && ($method !~ /Spidey/i)) {
151	0					0	$self->throw("method $method not supported in " . ref($self));
152							}
153	8					18	return $self->SUPER::analysis_method($method);
154							}
155
156							=head2 parse_next_alignment
157
158							Title : parse_next_alignment
159							Usage : @exons = $spidey_result->parse_next_alignment;
160							foreach $exon (@exons) {
161							# do something
162							}
163							Function: Parses the next alignment of the Spidey result file and returns the
164							found exons as an array of Bio::Tools::Spidey::Exon objects. Call
165							this method repeatedly until an empty array is returned to get the
166							results for all alignments.
167							Example :
168							Returns : An array of Bio::Tools::Spidey::Exon objects
169							Args :
170
171
172							=cut
173
174							sub parse_next_alignment {
175	2			2	1	3	my ($self) = @_;
176							# for strand 1 = plus, -1 = minus
177	2					2	my ($started,$version,$strand, $exoncount) = (0,0,0,-1);
178	2					1	my (%seq1props,%seq2props,@exons);
179
180							# we refer to the properties of each seq by reference
181
182	2					8	while(defined($_ = $self->_readline())) {
183	164					120	chomp;
184							#
185							# bascially, parse a Spidey result...
186							#
187							# matches: --SPIDEY version 1.40--
188	164	100				813	if( /^--SPIDEY\s+version\s+(\d+\.\d+)--/) {
		100
		100
		100
		100
		100
		100
		100
		100
		100
		50
189	1	50				2	if($started) {
190	0					0	$self->_pushback($_);
191	0					0	return \@exons;
192							}
193	1					2	$version = $1;
194	1	50				5	if ($version != 1.40) {
195	0					0	$self->throw("Spidey parser only designed to work with Spidey v1.40\n");
196							}
197	1					3	$started = 1;
198							} elsif (/^Genomic:\s+(\S+)\s.*,\s+(\d+)\sbp$/ ) {
199							# matches: Genomic: lcl\|some_name other information, 1234 bp
200							# $seq1props{'filename'} = $1;
201	1					3	$seq1props{'seqname'} = $1;
202	1					3	$seq1props{'length'} = $2;
203	1					2	$self->genomic_dna_length($seq1props{'length'});
204
205							} elsif( /^mRNA:\s+(\S+)\s.*,(?:\s+mRNA\s+sequence,)?\s(\d+)\sbp$/ ) {
206							# matches: mRNA:
207							# $seq2props{'filename'} = $1;
208	1					1	$seq2props{'seqname'} = $1;
209	1					4	$seq2props{'length'} = $2;
210
211							} elsif( /^Strand:/ ) {
212	1	50				3	if (/plus/) {
213	0					0	$strand = 1;
214							} else {
215	1					3	$strand = -1;
216							}
217							} elsif( /^Number of exons: (\d+)/ ) {
218	1					2	$exoncount = $1;
219
220	1					1	my ($genomic_start, $genomic_stop, $cdna_start, $cdna_stop,
221							$id, $mismatches, $gaps, $splice_donor,
222							$splice_acceptor, $uncertain);
223
224							# the next $exoncount lines contains information
225							# about the matches of each exon. we should parse
226							# this information here
227
228	1					100	for (my $ec = 1; $ec <= $exoncount; $ec++) {
229	6	50				13	if (defined($_ = $self->_readline())) {
230	6					7	chomp;
231
232	6	50				233	if (/^Exon\s$ec[-]:\s(\d+)-(\d+)\s$gen$\s+(\d+)-(\d+)\s$mRNA$\s+id\s([\d\.inf-]+)%\s+mismatches\s(\d+)\s+gaps\s(\d+)\s+splice\ssite\s$d\s+a$:\s(\d+)\s+(\d+)\s(\w*)/) {
233	6					10	$genomic_start = $1;
234	6					5	$genomic_stop = $2;
235	6					6	$cdna_start = $3;
236	6					6	$cdna_stop = $4;
237	6					7	$id = $5;
238	6					6	$mismatches = $6;
239	6					4	$gaps = $7;
240	6					4	$splice_donor = $8;
241	6					7	$splice_acceptor = $9;
242	6					5	$uncertain = $10;
243							} else {
244	0					0	$self->throw( "Failed to match anything:\n$_\n");
245							}
246
247	6					25	my $exon = Bio::Tools::Spidey::Exon->new
248							(-start => $genomic_start,
249							-end => $genomic_stop,
250							-strand => $strand);
251	6					15	$exon->seq_id($seq1props{'seqname'});
252
253							# feature1 is supposed to be initialized to a Similarity object, but we provide a safety net
254	6	50				8	if ($exon->feature1->can('seqlength')) {
255	6					8	$exon->feature1->seqlength($seq1props{'length'});
256							} else {
257	0					0	$exon->feature1->add_tag_value('seqlength', $seq1props{'length'});
258							}
259
260							# create and initialize the feature wrapping the 'hit' (the cDNA)
261							my $fea2 = Bio::SeqFeature::Similarity->new
262							(-start => $cdna_start,
263							-end => $cdna_stop,
264							-strand => $strand,
265	6					22	-seq_id => $seq2props{'seqname'},
266							-primary => "aligning_cDNA");
267	6					12	$fea2->seqlength($seq2props{'length'});
268							# store
269	6					13	$exon->est_hit($fea2);
270
271							# general properties
272	6					10	$exon->source_tag($self->analysis_method());
273	6					9	$exon->percentage_id($5);
274	6					8	$exon->mismatches($6);
275	6					10	$exon->gaps($7);
276	6					9	$exon->donor($8);
277	6					7	$exon->acceptor($9);
278
279							# push onto array
280	6					16	push(@exons, $exon);
281							} else {
282	0					0	$self->throw("Unexpected end of file reached\n");
283							}
284							}
285							} elsif( /^Number of splice sites:\s+(\d+)/ ) {
286	1					2	$self->splicesites($1);
287							} elsif( /^mRNA coverage:\s+(\d+)%/ ) {
288	1					3	$self->est_coverage($1);
289							} elsif(/^overall percent identity:\s+([\d\.]+)%/ ) {
290	1					3	$self->overall_percentage_id($1);
291							} elsif(/^Missing mRNA ends:\s+(\w+)/ ) {
292	1					2	$self->missing_mrna_ends($1);
293							} elsif( /^Exon (\d+): (\d+)-(\d+) $gen$\s+(\d+)-(\d+) $mRNA$/ ) {
294	6					5	my ($exon_num, $gen_start, $gen_stop, $cdna_start, $cdna_stop);
295	6					7	$exon_num = $1;
296	6					7	$gen_start = $2;
297	6					3	$gen_stop = $3;
298	6					4	$cdna_start = $4;
299	6					12	$cdna_stop = $5;
300							} elsif( /No alignment found/ ) {
301	0					0	return [];
302							} else {
303							#$self->debug("unmatched $_\n");
304							}
305							}
306							# Typical format:
307							# Exon 1: 36375798-36375691 (gen) 1-108 (mRNA)
308							#
309							#
310							# CCTCTTTTTCTTTGCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
311							# \| \|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
312							# ATGTCAGGGTATATACCCAGTTACTTAGACAAGGATGAGCTATGTGTAGT
313							# M S G Y I P S Y L D K D E L C V V
314							#
315							#
316							# ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAGGT
317							# \|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|
318							# ATGTGGGGACAAAGCCACCGGATATCATTATCGCTGCATCACTTGTGAAGGTTGCAAG
319							# C G D K A T G Y H Y R C I T C E G C K
320							#
321							#
322							# AAATGGCA
323							#
324	2	100				6	@exons ? return \@exons : return ;
325							}
326
327							=head2 next_exonset
328
329							Title : next_exonset
330							Usage : $exonset = $spidey_result->parse_next_exonset;
331							print "Exons start at ", $exonset->start(),
332							"and end at ", $exonset->end(), "\n";
333							for $exon ($exonset->sub_SeqFeature()) {
334							# do something
335							}
336							Function: Parses the next alignment of the Spidey result file and returns the
337							set of exons as a container of features. The container is itself
338							a Bio::SeqFeature::Generic object, with the Bio::Tools::Spidey::Exon
339							objects as sub features. Start, end, and strand of the container
340							will represent the total region covered by the exons of this set.
341
342							See the documentation of parse_next_alignment() for further
343							reference about parsing and how the information is stored.
344							Example :
345							Returns : An Bio::SeqFeature::Generic object holding Bio::Tools::Spidey::Exon
346							objects as sub features.
347							Args :
348
349							=cut
350
351							sub next_exonset {
352	2			2	1	3	my $self = shift;
353	2					2	my $exonset;
354
355							# get the next array of exons
356	2					3	my $exons = $self->parse_next_alignment();
357	2	100				5	if( ! defined $exons ) {
358	1					4	$self->warn("No exons returned");
359	1					2	return;
360							}
361	1	50				4	if( @$exons == 0 ) {
362	0					0	return Bio::SeqFeature::Generic->new();
363							}
364							# create the container of exons as a feature object itself, with the
365							# data of the first exon for initialization
366	1					3	$exonset = Bio::SeqFeature::Generic->new('-start' => $exons->[0]->start(),
367							'-end' => $exons->[-1]->end(),
368							'-strand' => $exons->[0]->strand(),
369							'-primary' => "ExonSet");
370	1					4	$exonset->source_tag($exons->[0]->source_tag());
371	1					3	$exonset->seq_id($exons->[0]->seq_id());
372							# now add all exons as sub features, with enabling EXPANsion of the region
373							# covered in total
374	1					3	foreach my $exon (@$exons) {
375	6					12	$exonset->add_sub_SeqFeature($exon, 'EXPAND');
376							}
377	1					3	return $exonset;
378							}
379
380							=head2 next_feature
381
382							Title : next_feature
383							Usage : while($exonset = $spidey->next_feature()) {
384							# do something
385							}
386							Function: Does the same as L. See there for documentation of
387							the functionality. Call this method repeatedly until FALSE is
388							returned.
389
390							The returned object is actually a SeqFeatureI implementing object.
391							This method is required for classes implementing the
392							SeqAnalysisParserI interface, and is merely an alias for
393							next_exonset() at present.
394
395							Example :
396							Returns : A Bio::SeqFeature::Generic object.
397							Args :
398
399							=cut
400
401							sub next_feature {
402	0			0	1	0	my ($self,@args) = @_;
403							# even though next_exonset doesn't expect any args (and this method
404							# does neither), we pass on args in order to be prepared if this changes
405							# ever
406	0					0	return $self->next_exonset(@args);
407							}
408
409							=head2 genomic_dna_length
410
411							Title : genomic_dna_length
412							Usage : $spidey->genomic_dna_length();
413							Function: Returns the length of the genomic DNA used in this Spidey result
414							Example :
415							Returns : An integer value.
416							Args :
417
418							=cut
419
420							sub genomic_dna_length {
421	2			2	1	378	my ($self, @args) = @_;
422	2					2	my $val;
423
424	2	100				4	if(@args) {
425	1					1	$val = shift(@args);
426	1					2	$self->{'genomic_dna_length'} = $val;
427							} else {
428	1					2	$val = $self->{'genomic_dna_length'};
429							}
430	2					5	return $val;
431							}
432
433							=head2 splicesites
434
435							Title : splicesites
436							Usage : $spidey->splicesites();
437							Function: Returns the number of splice sites found in this Spidey result
438							Example :
439							Returns : An integer value.
440							Args :
441
442							=cut
443
444							sub splicesites {
445	2			2	1	5	my ($self, @args) = @_;
446	2					1	my $val;
447
448	2	100				4	if(@args) {
449	1					2	$val = shift(@args);
450	1					2	$self->{'splicesites'} = $val;
451							} else {
452	1					2	$val = $self->{'splicesites'};
453							}
454	2					4	return $val;
455							}
456
457							=head2 est_coverage
458
459							Title : est_coverage
460							Usage : $spidey->est_coverage();
461							Function: Returns the percent of est coverage in this Spidey result
462							Example :
463							Returns : An integer value.
464							Args :
465
466							=cut
467
468							sub est_coverage {
469	2			2	1	3	my ($self, @args) = @_;
470	2					3	my $val;
471
472	2	100				4	if(@args) {
473	1					2	$val = shift(@args);
474	1					2	$self->{'est_coverage'} = $val;
475							} else {
476	1					1	$val = $self->{'est_coverage'};
477							}
478	2					5	return $val;
479							}
480
481							=head2 overall_percentage_id
482
483							Title : overall_percentage_id
484							Usage : $spidey->overall_percentage_id();
485							Function: Returns the overall percent id in this Spidey result
486							Example :
487							Returns : An float value.
488							Args :
489
490							=cut
491
492							sub overall_percentage_id {
493	2			2	1	4	my ($self, @args) = @_;
494	2					1	my $val;
495
496	2	100				4	if(@args) {
497	1					1	$val = shift(@args);
498	1					1	$self->{'overall_percentage_id'} = $val;
499							} else {
500	1					2	$val = $self->{'overall_percentage_id'};
501							}
502	2					5	return $val;
503							}
504
505							=head2 missing_mrna_ends
506
507							Title : missing_mrna_ends
508							Usage : $spidey->missing_mrna_ends();
509							Function: Returns left/right/neither from Spidey
510							Example :
511							Returns : A string value.
512							Args :
513
514							=cut
515
516							sub missing_mrna_ends {
517	2			2	1	4	my ($self, @args) = @_;
518	2					2	my $val;
519
520	2	100				3	if(@args) {
521	1					1	$val = shift(@args);
522	1					2	$self->{'missing_mrna_ends'} = $val;
523							} else {
524	1					2	$val = $self->{'missing_mrna_ends'};
525							}
526	2					5	return $val;
527							}
528
529							1;