File Coverage

blib/lib/BioX/Seq/Stream/FASTA.pm
Criterion Covered Total %
statement 54 56 96.4
branch 15 18 83.3
condition n/a
subroutine 6 6 100.0
pod 1 1 100.0
total 76 81 93.8


line stmt bran cond sub pod time code
1             package BioX::Seq::Stream::FASTA;
2              
3 1     1   7 use strict;
  1         1  
  1         55  
4 1     1   7 use warnings;
  1         2  
  1         44  
5              
6 1     1   501 use parent qw/BioX::Seq::Stream/;
  1         318  
  1         5  
7              
8             sub _check_type {
9              
10 15     15   50 my ($class,$self) = @_;
11 15         88 return substr($self->{buffer},0,1) eq '>';
12              
13             }
14              
15             sub _init {
16              
17 10     10   22 my ($self) = @_;
18              
19             # First two bytes should not contain line ending chars
20             die "Missing ID in initial header (check file format)\n"
21 10 50       43 if ($self->{buffer} =~ /[\r\n]/);
22 10         16 my $fh = $self->{fh};
23 10         53 $self->{buffer} .= <$fh>;
24              
25             # detect line endings for text files based on first line
26             # (other solutions, such as using the :crlf layer or s///
27             # instead of chomp may be marginally more robust but slow
28             # things down too much)
29 10 100       173 if ($self->{buffer} =~ /([\r\n]{1,2})$/) {
30 9         36 $self->{rec_sep} = $1;
31             }
32             else {
33 1         21 die "Failed to detect line endings\n";
34             }
35 9         67 local $/ = $self->{rec_sep};
36              
37             # Parse initial header line
38 9         27 chomp $self->{buffer};
39 9 50       53 if ($self->{buffer} =~ /^>(\S+)\s*(.+)?$/) {
40 9         27 $self->{next_id} = $1;
41 9         43 $self->{next_desc} = $2;
42 9         21 $self->{buffer} = undef;
43             }
44             else {
45 0         0 die "Failed to parse initial FASTA header (check file format)\n";
46             }
47              
48 9         35 return;
49              
50             }
51              
52             sub next_seq {
53            
54 114     114 1 2124 my ($self) = @_;
55              
56 114         204 my $fh = $self->{fh};
57 114         205 my $id = $self->{next_id};
58 114         172 my $desc = $self->{next_desc};
59 114         167 my $seq = '';
60              
61 114         332 local $/ = $self->{rec_sep};
62            
63 114         460 my $line = <$fh>;
64              
65 114         311 while ($line) {
66              
67 692         1130 chomp $line;
68              
69             # match next record header
70 692 100       1507 if ($line =~ /^>(\S+)\s*(.+)?$/) {
71              
72 108         287 $self->{next_id} = $1;
73 108         219 $self->{next_desc} = $2;
74              
75             # if not in fast mode, double-check sequence for correctness
76 108 100       223 if (! $self->{fast}) {
77             # allow any alpha character or characters possibly representing
78             # gaps or stop codons
79 59 100       445 if ($seq =~ /[^A-Za-z\-\.\*]/) {
80 1         8 die "Previous sequence record invalid\n";
81             }
82             }
83              
84 107         365 my $obj = BioX::Seq->new($seq, $id, $desc);
85 107         186 $obj->{_input_format} = 'fasta';
86 107         411 return $obj;
87              
88             }
89             else {
90 584         929 $seq .= $line;
91             }
92              
93 584         1410 $line = <$fh>;
94              
95             }
96              
97             # should only reach here on last read
98 6 100       23 if (defined $self->{next_id}) {
99 4         10 delete $self->{next_id};
100 4         7 delete $self->{next_desc};
101              
102             # if not in fast mode, double-check sequence for correctness
103 4 100       14 if (! $self->{fast}) {
104             # allow any alpha character or characters possibly representing
105             # gaps or stop codons
106 3 50       25 if ($seq =~ /[^A-Za-z\-\.\*]/) {
107 0         0 die "Previous sequence record invalid\n";
108             }
109             }
110              
111 4         19 my $obj = BioX::Seq->new($seq, $id, $desc);
112 4         7 $obj->{_input_format} = 'fasta';
113 4         20 return $obj;
114             }
115 2         12 return undef;
116              
117             }
118              
119             1;
120              
121             __END__