File Coverage

Bio/SeqIO/metafasta.pm
Criterion Covered Total %
statement 74 83 89.1
branch 17 38 44.7
condition 7 21 33.3
subroutine 11 11 100.0
pod 3 3 100.0
total 112 156 71.7


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::SeqIO::metafasta
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Heikki Lehvaslaiho
6             #
7             # Copyright Heikki Lehvaslaiho
8             #
9             # You may distribute this module under the same terms as perl itself
10              
11             # POD documentation - main docs before the code
12              
13             =head1 NAME
14              
15             Bio::SeqIO::metafasta - metafasta sequence input/output stream
16              
17             =head1 SYNOPSIS
18              
19             Do not use this module directly. Use it via the Bio::SeqIO class.
20              
21             use Bio::SeqIO;
22              
23             # read the metafasta file
24             $io = Bio::SeqIO->new(-file => "test.metafasta",
25             -format => "metafasta" );
26              
27             $seq = $io->next_seq;
28              
29             =head1 DESCRIPTION
30              
31             This object can transform Bio::Seq::Meta objects to and from metafasta
32             flat file databases.
33              
34             For sequence part the code is an exact copy of Bio::SeqIO::fasta
35             module. The only added bits deal with meta data IO.
36              
37             The format of a metafasta file is
38              
39             >test
40             ABCDEFHIJKLMNOPQRSTUVWXYZ
41             &charge
42             NBNAANCNJCNNNONNCNNUNNXNZ
43             &chemical
44             LBSAARCLJCLSMOIMCHHULRXRZ
45              
46             where the sequence block is followed by one or several meta blocks.
47             Each meta block starts with the ampersand character '&' in the first
48             column and is immediately followed by the name of the meta data which
49             continues until the new line. The meta data follows it. All
50             characters, except new line, are important in meta data.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to one
58             of the Bioperl mailing lists. Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR - Heikki Lehvaslaiho
83              
84             Email heikki-at-bioperl-dot-org
85              
86             =head1 APPENDIX
87              
88             The rest of the documentation details each of the object
89             methods. Internal methods are usually preceded with a _
90              
91             =cut
92              
93             # Let the code begin...
94              
95             package Bio::SeqIO::metafasta;
96 2     2   421 use vars qw($WIDTH);
  2         2  
  2         78  
97 2     2   6 use strict;
  2         2  
  2         31  
98              
99 2     2   229 use Bio::Seq::SeqFactory;
  2         2  
  2         45  
100 2     2   543 use Bio::Seq::SeqFastaSpeedFactory;
  2         3  
  2         40  
101 2     2   310 use Bio::Seq::Meta;
  2         2  
  2         50  
102              
103 2     2   8 use base qw(Bio::SeqIO);
  2         2  
  2         410  
104              
105 2     2   1201 BEGIN { $WIDTH = 60}
106              
107             sub _initialize {
108 3     3   6 my($self,@args) = @_;
109 3         10 $self->SUPER::_initialize(@args);
110 3         9 my ($width) = $self->_rearrange([qw(WIDTH)], @args);
111 3 50       8 $width && $self->width($width);
112 3 50       16 unless ( defined $self->sequence_factory ) {
113 3         18 $self->sequence_factory(Bio::Seq::SeqFastaSpeedFactory->new());
114             }
115             }
116              
117             =head2 next_seq
118              
119             Title : next_seq
120             Usage : $seq = $stream->next_seq()
121             Function: returns the next sequence in the stream
122             Returns : Bio::Seq object
123             Args : NONE
124              
125             =cut
126              
127             sub next_seq {
128 2     2 1 419 my( $self ) = @_;
129 2         2 my $seq;
130             my $alphabet;
131 2         7 local $/ = "\n>";
132 2 50       10 return unless my $entry = $self->_readline;
133              
134 2         5 chomp($entry);
135 2 50       10 if ($entry =~ m/\A\s*\Z/s) { # very first one
136 0 0       0 return unless $entry = $self->_readline;
137 0         0 chomp($entry);
138             }
139 2         7 $entry =~ s/^>//;
140              
141 2         6 my ($top,$sequence) = split(/\n/,$entry,2);
142 2 50       7 defined $sequence && $sequence =~ s/>//g;
143              
144 2         2 my @metas;
145 2         9 ($sequence, @metas) = split /\n&/, $sequence;
146              
147 2         3 my ($id,$fulldesc);
148 2 50       7 if( $top =~ /^\s*(\S+)\s*(.*)/ ) {
149 2         7 ($id,$fulldesc) = ($1,$2);
150             }
151              
152 2 50 33     15 if (defined $id && $id eq '') {$id=$fulldesc;} # FIX incase no space
  0         0  
153             # between > and name \AE
154 2 50       7 defined $sequence && $sequence =~ s/\s//g; # Remove whitespace
155              
156             # for empty sequences we need to know the mol.type
157 2         14 $alphabet = $self->alphabet();
158 2 50 33     11 if(defined $sequence && length($sequence) == 0) {
159 0 0       0 if(! defined($alphabet)) {
160             # let's default to dna
161 0         0 $alphabet = "dna";
162             }
163             } else {
164             # we don't need it really, so disable
165 2         2 $alphabet = undef;
166             }
167              
168 2         5 $seq = $self->sequence_factory->create(
169             -seq => $sequence,
170             -id => $id,
171             # Ewan's note - I don't think this healthy
172             # but obviously to taste.
173             #-primary_id => $id,
174             -desc => $fulldesc,
175             -alphabet => $alphabet,
176             -direct => 1,
177             );
178              
179 2         8 $seq = $seq->primary_seq;
180 2         9 bless $seq, 'Bio::Seq::Meta';
181              
182 2         3 foreach my $meta (@metas) {
183 10         23 my ($name,$string) = split /\n/, $meta;
184             # $split ||= '';
185 10         12 $string =~ s/\n//g; # Remove newlines, spaces are important
186 10         16 $seq->named_meta($name, $string);
187             }
188              
189             # if there wasn't one before, set the guessed type
190 2 50       6 unless ( defined $alphabet ) {
191 2         7 $self->alphabet($seq->alphabet());
192             }
193 2         16 return $seq;
194             }
195              
196             =head2 write_seq
197              
198             Title : write_seq
199             Usage : $stream->write_seq(@seq)
200             Function: writes the $seq object into the stream
201             Returns : 1 for success and 0 for error
202             Args : array of 1 to n Bio::PrimarySeqI objects
203              
204             =cut
205              
206             sub write_seq {
207 1     1 1 4 my ($self,@seq) = @_;
208 1         2 my $width = $self->width;
209 1         2 foreach my $seq (@seq) {
210 1 50 33     15 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
      33        
211             unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
212              
213 1         3 my $str = $seq->seq;
214 1         2 my $top = $seq->display_id();
215 1 50 33     16 if ($seq->can('desc') and my $desc = $seq->desc()) {
216 0         0 $desc =~ s/\n//g;
217 0         0 $top .= " $desc";
218             }
219 1 50       2 if(length($str) > 0) {
220 1         15 $str =~ s/(.{1,$width})/$1\n/g;
221             } else {
222 0         0 $str = "\n";
223             }
224 1 50       8 $self->_print (">",$top,"\n",$str) or return;
225 1 50       4 if ($seq->isa('Bio::Seq::MetaI')) {
226 1         3 foreach my $meta ($seq->meta_names) {
227 5         8 my $str = $seq->named_meta($meta);
228 5         30 $str =~ s/(.{1,$width})/$1\n/g;
229 5         9 $self->_print ("&",$meta,"\n",$str);
230             }
231             }
232             }
233              
234 1 50 33     2 $self->flush if $self->_flush_on_write && defined $self->_fh;
235 1         4 return 1;
236             }
237              
238             =head2 width
239              
240             Title : width
241             Usage : $obj->width($newval)
242             Function: Get/Set the line width for METAFASTA output
243             Returns : value of width
244             Args : newvalue (optional)
245              
246              
247             =cut
248              
249             sub width{
250 1     1 1 2 my ($self,$value) = @_;
251 1 50       2 if( defined $value) {
252 0         0 $self->{'width'} = $value;
253             }
254 1   33     3 return $self->{'width'} || $WIDTH;
255             }
256              
257             1;