File Coverage

Bio/SeqIO/strider.pm
Criterion Covered Total %
statement 15 38 39.4
branch 0 12 0.0
condition 0 20 0.0
subroutine 5 8 62.5
pod 2 2 100.0
total 22 80 27.5


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::SeqIO::strider
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Malcolm Cook
6             #
7             # You may distribute this module under the same terms as perl itself
8             #
9             # _history
10             # April 7th, 2005 Malcolm Cook authored
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqIO::strider - DNA strider sequence input/output stream
17              
18             =head1 SYNOPSIS
19              
20             Do not use this module directly. Use it via the Bio::SeqIO class.
21              
22             =head1 DESCRIPTION
23              
24             This object can transform Bio::Seq objects to and from strider
25             'binary' format, as documented in the strider manual, in which the
26             first 112 bytes are a header, following by the sequence, followed by a
27             sequence description.
28              
29             Note: it does NOT assign any sequence identifier, since they are not
30             contained in the byte stream of the file; the Strider application
31             simply displays the name of the file on disk as the name of the
32             sequence. The caller should set the id, probably based on the name of
33             the file (after possibly cleaning up whitespace, which ought not to be
34             used as the id in most applications).
35              
36             Note: the strider 'comment' is mapped to the BioPerl 'description'
37             (since there is no other text field, and description maps to defline
38             text).
39              
40             =head1 FEEDBACK
41              
42             =head2 Mailing Lists
43              
44             User feedback is an integral part of the evolution of this and other
45             Bioperl modules. Send your comments and suggestions preferably to one
46             of the Bioperl mailing lists. Your participation is much appreciated.
47              
48             bioperl-l@bioperl.org - General discussion
49             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
50              
51             =head2 Support
52              
53             Please direct usage questions or support issues to the mailing list:
54              
55             I
56              
57             rather than to the module maintainer directly. Many experienced and
58             reponsive experts will be able look at the problem and quickly
59             address it. Please include a thorough description of the problem
60             with code and data examples if at all possible.
61              
62             =head2 Reporting Bugs
63              
64             Report bugs to the Bioperl bug tracking system to help us keep track
65             the bugs and their resolution. Bug reports can be submitted via the
66             web:
67              
68             https://github.com/bioperl/bioperl-live/issues
69              
70             =head1 AUTHORS - Malcolm Cook
71              
72             Email: mec@stowers-institute.org
73              
74             =head1 CONTRIBUTORS
75              
76             Modelled after Bio::SeqIO::fasta by Ewan Birney Ebirney@ebi.ac.ukE and
77             Lincoln Stein Elstein@cshl.orgE
78              
79             =head1 APPENDIX
80              
81             The rest of the documentation details each of the object
82             methods. Internal methods are usually preceded with a _
83              
84             =cut
85              
86             # Let the code begin...
87              
88             package Bio::SeqIO::strider;
89 1     1   413 use strict;
  1         2  
  1         22  
90 1     1   4 use warnings;
  1         26  
  1         31  
91              
92              
93 1     1   202 use Bio::Seq::SeqFactory;
  1         2  
  1         27  
94 1     1   5 use Convert::Binary::C;
  1         1  
  1         19  
95              
96 1     1   4 use base qw(Bio::SeqIO);
  1         1  
  1         326  
97              
98             my $c = Convert::Binary::C->new (
99             ByteOrder => 'BigEndian',
100             Alignment => 2
101             );
102              
103             my $headerdef;
104             {local ($/);
105             # See this file's __DATA__ section for the c structure definitions
106             # for strider binary header data. Here we slurp it all into $headerdef.
107             $headerdef = };
108              
109             $c->parse($headerdef);
110              
111             my $size_F_HEADER = 112;
112              
113             die "expected strider header structure size of $size_F_HEADER" unless $size_F_HEADER eq $c->sizeof('F_HEADER');
114              
115             my %alphabet2type = (
116             # map between BioPerl alphabet and strider
117             # sequence type code.
118              
119             # From Strider Documentation: the sequence type:
120             # 1, 2, 3 and 4 for DNA, DNA Degenerate, RNA and
121             # Protein sequence files, respectively.
122              
123             # TODO: determine 'DNA Degenerate' based on
124             # sequence alphabet?
125              
126             dna => 1,
127             rna => 3,
128             protein => 4,
129             );
130              
131             my %type2alphabet = reverse %alphabet2type;
132              
133             sub _initialize {
134 0     0     my($self,@args) = @_;
135 0           $self->SUPER::_initialize(@args);
136 0 0         unless ( defined $self->sequence_factory ) {
137 0           $self->sequence_factory(Bio::Seq::SeqFactory->new(-verbose => $self->verbose(),
138             -type => 'Bio::Seq::RichSeq'));
139             }
140             }
141              
142             =head2 next_seq
143              
144             Title : next_seq
145             Usage : $seq = $stream->next_seq()
146             Function: returns the next sequence in the stream
147             Returns : Bio::Seq object
148             Args : NONE
149              
150             =cut
151              
152             sub next_seq {
153 0     0 1   my( $self ) = @_;
154 0           my $fh = $self->_fh;
155 0           my ($header,$sequence,$fulldesc);
156 0           eval {read $fh,$header,$size_F_HEADER};
  0            
157 0 0         $self->throw ("$@ while attempting to reading strider header from " . $self->{'_file'}) if $@;
158 0 0         $self->throw("required $size_F_HEADER bytes while reading strider header in " . $self->{'_file'} . " but found: " . length($header))
159             unless $size_F_HEADER == length($header);
160 0 0         my $headerdata = $c->unpack('F_HEADER',$header) or return;
161 0           read $fh,$sequence,$headerdata->{nLength};
162 0           read $fh,$fulldesc,$headerdata->{com_length};
163 0           $fulldesc =~ s/\cM/ /g; # gratuitous replacement of mac
164             # linefeed with space.
165             my $seq = $self->sequence_factory->create(
166             # -id => $main::ARGV, #might want to set this in caller to $ARGV.
167             -seq => $sequence,
168             -desc => $fulldesc,
169 0   0       -alphabet => $type2alphabet{$headerdata->{type}} || 'dna',
170             );
171              
172 0           return $seq;
173             }
174              
175             =head2 write_seq
176              
177             Title : write_seq
178             Usage : $stream->write_seq(@seq)
179             Function: writes the $seq object into the stream
180             Returns : 1 for success and 0 for error
181             Args : array of 1 to n Bio::PrimarySeqI objects
182              
183              
184             =cut
185              
186             sub write_seq {
187 0     0 1   my ($self,@seq) = @_;
188 0   0       my $fh = $self->_fh() || *STDOUT; #die "could not determine filehandle in strider.pm";
189 0           foreach my $seq (@seq) {
190 0 0 0       $self->throw("Did not provide a valid Bio::PrimarySeqI object")
      0        
191             unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
192             my $headerdata = $c->pack('F_HEADER',{
193             versionNb => 0,
194             type => $alphabet2type{$seq->alphabet} || $alphabet2type{dna},
195 0 0 0       topology => $seq->is_circular ? 1 : 0,
      0        
196             nLength => $seq->length,
197             nMinus => 0,
198             com_length => length($seq->desc || ""),
199             });
200 0   0       print $fh $headerdata, $seq->seq() || "" , $seq->desc || "";
      0        
201             }
202             }
203              
204             1;
205              
206             __DATA__