File Coverage

Bio/AlignIO/fasta.pm
Criterion Covered Total %
statement 80 85 94.1
branch 21 30 70.0
condition 8 20 40.0
subroutine 7 7 100.0
pod 3 3 100.0
total 119 145 82.0


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::AlignIO::fasta
3             #
4             # Copyright Peter Schattner
5             #
6             # You may distribute this module under the same terms as perl itself
7             # POD documentation - main docs before the code
8              
9             =head1 NAME
10              
11             Bio::AlignIO::fasta - fasta MSA Sequence input/output stream
12              
13             =head1 SYNOPSIS
14              
15             Do not use this module directly. Use it via the L
16             class.
17              
18             =head1 DESCRIPTION
19              
20             This object can transform L objects to and from
21             fasta flat files. This is for the fasta alignment format, not
22             for the FastA sequence analysis program. To process the alignments from
23             FastA (FastX, FastN, FastP, tFastA, etc) use the Bio::SearchIO module.
24              
25             =head1 FEEDBACK
26              
27             =head2 Support
28              
29             Please direct usage questions or support issues to the mailing list:
30              
31             I
32              
33             rather than to the module maintainer directly. Many experienced and
34             reponsive experts will be able look at the problem and quickly
35             address it. Please include a thorough description of the problem
36             with code and data examples if at all possible.
37              
38             =head2 Reporting Bugs
39              
40             Report bugs to the Bioperl bug tracking system to help us keep track
41             the bugs and their resolution. Bug reports can be submitted via the
42             web:
43              
44             https://github.com/bioperl/bioperl-live/issues
45              
46             =head1 AUTHORS
47              
48             Peter Schattner
49              
50             =head1 APPENDIX
51              
52             The rest of the documentation details each of the object
53             methods. Internal methods are usually preceded with a _
54              
55             =cut
56              
57             # Let the code begin...
58              
59             package Bio::AlignIO::fasta;
60 6     6   364 use strict;
  6         8  
  6         169  
61              
62 6     6   18 use base qw(Bio::AlignIO);
  6         7  
  6         684  
63             our $WIDTH = 60;
64 6     6   22 use Bio::LocatableSeq;
  6         7  
  6         3607  
65              
66             =head2 next_aln
67              
68             Title : next_aln
69             Usage : $aln = $stream->next_aln
70             Function: returns the next alignment in the stream.
71             Returns : Bio::Align::AlignI object - returns 0 on end of file
72             or on error
73             Args : -width => optional argument to specify the width sequence
74             will be written (60 chars by default)
75              
76             See L
77              
78             =cut
79              
80             sub next_aln {
81 10     10 1 26 my $self = shift;
82 10         32 my ($width) = $self->_rearrange( [qw(WIDTH)], @_ );
83 10   33     48 $self->width( $width || $WIDTH );
84              
85 10         12 my ($start, $end, $name, $seqname, $seq, $seqchar,
86             $entry, $tempname, $tempdesc, %align, $desc, $maxlen
87             );
88 10         57 my $aln = Bio::SimpleAlign->new();
89              
90 10         44 while ( defined( $entry = $self->_readline ) ) {
91 540         418 chomp $entry;
92 540 100       880 if ( $entry =~ s/^>\s*(\S+)\s*// ) {
93 86         140 $tempname = $1;
94 86         67 chomp($entry);
95 86         59 $tempdesc = $entry;
96 86 100       123 if ( defined $name ) {
97 76         309 $seqchar =~ s/\s//g;
98 76         51 $seqname = $name;
99 76         67 $start = 1;
100 76         118 $end = $self->_get_len($seqchar);
101 76         221 $seq = Bio::LocatableSeq->new(
102             -seq => $seqchar,
103             -display_id => $seqname,
104             -description => $desc,
105             -start => $start,
106             -end => $end,
107             -alphabet => $self->alphabet,
108             );
109 76         202 $aln->add_seq($seq);
110 76         171 $self->debug("Reading $seqname\n");
111             }
112 86         82 $desc = $tempdesc;
113 86         62 $name = $tempname;
114 86         67 $desc = $entry;
115 86         69 $seqchar = "";
116 86         208 next;
117             }
118              
119             # removed redundant symbol validation
120             # this is already done in Bio::PrimarySeq
121 454         800 $seqchar .= $entry;
122             }
123              
124             # Next two lines are to silence warnings that
125             # otherwise occur at EOF when using <$fh>
126 10 50       26 $name = "" if ( !defined $name );
127 10 50       24 $seqchar = "" if ( !defined $seqchar );
128 10         37 $seqchar =~ s/\s//g;
129              
130             # Put away last name and sequence
131 10 100       53 if ( $name =~ /(\S+\/(\d+)-(\d+))$/ ) {
132 5         16 $seqname = $1;
133 5         10 $start = $2;
134 5         10 $end = $3;
135             }
136             else {
137 5         6 $seqname = $name;
138 5         8 $start = 1;
139 5         9 $end = $self->_get_len($seqchar);
140             }
141              
142             # This logic now also reads empty lines at the
143             # end of the file. Skip this is seqchar and seqname is null
144 10 50 33     35 unless ( length($seqchar) == 0 && length($seqname) == 0 ) {
145 10         44 $seq = Bio::LocatableSeq->new(
146             -seq => $seqchar,
147             -display_id => $seqname,
148             -description => $desc,
149             -start => $start,
150             -end => $end,
151             -alphabet => $self->alphabet,
152             );
153 10         32 $aln->add_seq($seq);
154 10         34 $self->debug("Reading $seqname\n");
155             }
156 10         33 my $alnlen = $aln->length;
157 10         21 foreach my $seq ( $aln->each_seq ) {
158 86 100       98 if ( $seq->length < $alnlen ) {
159 39         47 my ($diff) = ( $alnlen - $seq->length );
160 39         54 $seq->seq( $seq->seq() . "-" x $diff );
161             }
162             }
163              
164             # no sequences means empty alignment (possible EOF)
165 10 50       29 return $aln if $aln->num_sequences;
166 0         0 return;
167             }
168              
169              
170             =head2 write_aln
171              
172             Title : write_aln
173             Usage : $stream->write_aln(@aln)
174             Function: writes the $aln object into the stream in fasta format
175             Returns : 1 for success and 0 for error
176             Args : L object
177              
178             See L
179              
180             =cut
181              
182             sub write_aln {
183 2     2 1 7 my ($self,@aln) = @_;
184 2         7 my $width = $self->width;
185 2         2 my ($seq,$desc,$rseq,$name,$count,$length,$seqsub);
186              
187 2         4 foreach my $aln (@aln) {
188 2 50 33     15 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
189 0         0 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
190 0         0 next;
191             }
192 2 50       13 if( $self->force_displayname_flat ) {
193 0         0 $aln->set_displayname_flat(1);
194             }
195 2         6 foreach $rseq ( $aln->each_seq() ) {
196 17         24 $name = $aln->displayname($rseq->get_nse());
197 17         28 $seq = $rseq->seq();
198 17   100     27 $desc = $rseq->description || '';
199 17 100       45 $desc = ' '.$desc if $desc;
200 17 50       83 $self->_print (">$name$desc\n") or return;
201 17         17 $count = 0;
202 17         12 $length = length($seq);
203 17 50 33     49 if(defined $seq && $length > 0) {
204 17         201 $seq =~ s/(.{1,$width})/$1\n/g;
205             } else {
206 0         0 $seq = "\n";
207             }
208 17         30 $self->_print($seq);
209             }
210             }
211 2 50 33     7 $self->flush if $self->_flush_on_write && defined $self->_fh;
212 2         7 return 1;
213             }
214              
215             =head2 _get_len
216              
217             Title : _get_len
218             Usage :
219             Function: determine number of alphabetic chars
220             Returns : integer
221             Args : sequence string
222              
223             =cut
224              
225             sub _get_len {
226 81     81   80 my ($self,$seq) = @_;
227 81         106 my $chars = $Bio::LocatableSeq::GAP_SYMBOLS.$Bio::LocatableSeq::FRAMESHIFT_SYMBOLS;
228 81         729 $seq =~ s{[$chars]+}{}gi;
229 81         122 return CORE::length($seq);
230             }
231              
232             =head2 width
233              
234             Title : width
235             Usage : $obj->width($newwidth)
236             $width = $obj->width;
237             Function: Get/set width of alignment
238             Returns : integer value of width
239             Args : on set, new value (a scalar or undef, optional)
240              
241              
242             =cut
243              
244             sub width{
245 12     12 1 15 my $self = shift;
246              
247 12 100       33 return $self->{'_width'} = shift if @_;
248 2   33     10 return $self->{'_width'} || $WIDTH;
249             }
250              
251             1;