File Coverage

Bio/SeqIO/strider.pm

Criterion	Covered	Total	%
statement	15	38	39.4
branch	0	12	0.0
condition	0	20	0.0
subroutine	5	8	62.5
pod	2	2	100.0
total	22	80	27.5

line	stmt	bran	cond	sub	pod	time	code
1							# BioPerl module for Bio::SeqIO::strider
2							#
3							# Please direct questions and support issues to
4							#
5							# Cared for by Malcolm Cook
6							#
7							# You may distribute this module under the same terms as perl itself
8							#
9							# _history
10							# April 7th, 2005 Malcolm Cook authored
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::SeqIO::strider - DNA strider sequence input/output stream
17
18							=head1 SYNOPSIS
19
20							Do not use this module directly. Use it via the Bio::SeqIO class.
21
22							=head1 DESCRIPTION
23
24							This object can transform Bio::Seq objects to and from strider
25							'binary' format, as documented in the strider manual, in which the
26							first 112 bytes are a header, following by the sequence, followed by a
27							sequence description.
28
29							Note: it does NOT assign any sequence identifier, since they are not
30							contained in the byte stream of the file; the Strider application
31							simply displays the name of the file on disk as the name of the
32							sequence. The caller should set the id, probably based on the name of
33							the file (after possibly cleaning up whitespace, which ought not to be
34							used as the id in most applications).
35
36							Note: the strider 'comment' is mapped to the BioPerl 'description'
37							(since there is no other text field, and description maps to defline
38							text).
39
40							=head1 FEEDBACK
41
42							=head2 Mailing Lists
43
44							User feedback is an integral part of the evolution of this and other
45							Bioperl modules. Send your comments and suggestions preferably to one
46							of the Bioperl mailing lists. Your participation is much appreciated.
47
48							bioperl-l@bioperl.org - General discussion
49							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50
51							=head2 Support
52
53							Please direct usage questions or support issues to the mailing list:
54
55							I
56
57							rather than to the module maintainer directly. Many experienced and
58							reponsive experts will be able look at the problem and quickly
59							address it. Please include a thorough description of the problem
60							with code and data examples if at all possible.
61
62							=head2 Reporting Bugs
63
64							Report bugs to the Bioperl bug tracking system to help us keep track
65							the bugs and their resolution. Bug reports can be submitted via the
66							web:
67
68							https://github.com/bioperl/bioperl-live/issues
69
70							=head1 AUTHORS - Malcolm Cook
71
72							Email: mec@stowers-institute.org
73
74							=head1 CONTRIBUTORS
75
76							Modelled after Bio::SeqIO::fasta by Ewan Birney Ebirney@ebi.ac.ukE and
77							Lincoln Stein Elstein@cshl.orgE
78
79							=head1 APPENDIX
80
81							The rest of the documentation details each of the object
82							methods. Internal methods are usually preceded with a _
83
84							=cut
85
86							# Let the code begin...
87
88							package Bio::SeqIO::strider;
89	1			1		391	use strict;
	1					1
	1					21
90	1			1		3	use warnings;
	1					1
	1					16
91
92
93	1			1		216	use Bio::Seq::SeqFactory;
	1					1
	1					19
94	1			1		4	use Convert::Binary::C;
	1					1
	1					15
95
96	1			1		2	use base qw(Bio::SeqIO);
	1					1
	1					312
97
98							my $c = Convert::Binary::C->new (
99							ByteOrder => 'BigEndian',
100							Alignment => 2
101							);
102
103							my $headerdef;
104							{local ($/);
105							# See this file's __DATA__ section for the c structure definitions
106							# for strider binary header data. Here we slurp it all into $headerdef.
107							$headerdef = };
108
109							$c->parse($headerdef);
110
111							my $size_F_HEADER = 112;
112
113							die "expected strider header structure size of $size_F_HEADER" unless $size_F_HEADER eq $c->sizeof('F_HEADER');
114
115							my %alphabet2type = (
116							# map between BioPerl alphabet and strider
117							# sequence type code.
118
119							# From Strider Documentation: the sequence type:
120							# 1, 2, 3 and 4 for DNA, DNA Degenerate, RNA and
121							# Protein sequence files, respectively.
122
123							# TODO: determine 'DNA Degenerate' based on
124							# sequence alphabet?
125
126							dna => 1,
127							rna => 3,
128							protein => 4,
129							);
130
131							my %type2alphabet = reverse %alphabet2type;
132
133							sub _initialize {
134	0			0			my($self,@args) = @_;
135	0						$self->SUPER::_initialize(@args);
136	0	0					unless ( defined $self->sequence_factory ) {
137	0						$self->sequence_factory(Bio::Seq::SeqFactory->new(-verbose => $self->verbose(),
138							-type => 'Bio::Seq::RichSeq'));
139							}
140							}
141
142							=head2 next_seq
143
144							Title : next_seq
145							Usage : $seq = $stream->next_seq()
146							Function: returns the next sequence in the stream
147							Returns : Bio::Seq object
148							Args : NONE
149
150							=cut
151
152							sub next_seq {
153	0			0	1		my( $self ) = @_;
154	0						my $fh = $self->_fh;
155	0						my ($header,$sequence,$fulldesc);
156	0						eval {read $fh,$header,$size_F_HEADER};
	0
157	0	0					$self->throw ("$@ while attempting to reading strider header from " . $self->{'_file'}) if $@;
158	0	0					$self->throw("required $size_F_HEADER bytes while reading strider header in " . $self->{'_file'} . " but found: " . length($header))
159							unless $size_F_HEADER == length($header);
160	0	0					my $headerdata = $c->unpack('F_HEADER',$header) or return;
161	0						read $fh,$sequence,$headerdata->{nLength};
162	0						read $fh,$fulldesc,$headerdata->{com_length};
163	0						$fulldesc =~ s/\cM/ /g; # gratuitous replacement of mac
164							# linefeed with space.
165							my $seq = $self->sequence_factory->create(
166							# -id => $main::ARGV, #might want to set this in caller to $ARGV.
167							-seq => $sequence,
168							-desc => $fulldesc,
169	0		0				-alphabet => $type2alphabet{$headerdata->{type}} \|\| 'dna',
170							);
171
172	0						return $seq;
173							}
174
175							=head2 write_seq
176
177							Title : write_seq
178							Usage : $stream->write_seq(@seq)
179							Function: writes the $seq object into the stream
180							Returns : 1 for success and 0 for error
181							Args : array of 1 to n Bio::PrimarySeqI objects
182
183
184							=cut
185
186							sub write_seq {
187	0			0	1		my ($self,@seq) = @_;
188	0		0				my $fh = $self->_fh() \|\| *STDOUT; #die "could not determine filehandle in strider.pm";
189	0						foreach my $seq (@seq) {
190	0	0	0				$self->throw("Did not provide a valid Bio::PrimarySeqI object")
			0
191							unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
192							my $headerdata = $c->pack('F_HEADER',{
193							versionNb => 0,
194							type => $alphabet2type{$seq->alphabet} \|\| $alphabet2type{dna},
195	0	0	0				topology => $seq->is_circular ? 1 : 0,
			0
196							nLength => $seq->length,
197							nMinus => 0,
198							com_length => length($seq->desc \|\| ""),
199							});
200	0		0				print $fh $headerdata, $seq->seq() \|\| "" , $seq->desc \|\| "";
			0
201							}
202							}
203
204							1;
205
206							__DATA__