File Coverage

blib/lib/BioX/Seq/Stream/FASTQ.pm
Criterion Covered Total %
statement 44 44 100.0
branch 9 12 75.0
condition 6 10 60.0
subroutine 6 6 100.0
pod 1 1 100.0
total 66 73 90.4


line stmt bran cond sub pod time code
1             package BioX::Seq::Stream::FASTQ;
2              
3 1     1   6 use strict;
  1         1  
  1         24  
4 1     1   4 use warnings;
  1         2  
  1         22  
5              
6 1     1   4 use parent qw/BioX::Seq::Stream/;
  1         1  
  1         3  
7              
8             sub _check_type {
9              
10 15     15   34 my ($class,$self) = @_;
11 15         60 return substr($self->{buffer},0,1) eq '@';
12              
13             }
14              
15             sub _init {
16              
17 3     3   6 my ($self) = @_;
18              
19             # First two bytes should not contain line ending chars
20             die "Missing ID in initial header (check file format)\n"
21 3 50       24 if ($self->{buffer} =~ /[\r\n]/);
22 3         6 my $fh = $self->{fh};
23 3         31 $self->{buffer} .= <$fh>;
24              
25             # detect line endings for text files based on first line
26             # (other solutions, such as using the :crlf layer or s///
27             # instead of chomp may be marginally more robust but slow
28             # things down too much)
29 3 100       97 if ($self->{buffer} =~ /([\r\n]{1,2})$/) {
30 2         16 $self->{rec_sep} = $1;
31             }
32             else {
33 1         21 die "Failed to detect line endings\n";
34             }
35              
36 2         9 return;
37            
38             }
39              
40             sub next_seq {
41            
42 4     4 1 1089 my ($self) = @_;
43 4         11 my $fh = $self->{fh};
44              
45 4         17 local $/ = $self->{rec_sep};
46              
47 4   66     23 my $line = $self->{buffer} // <$fh>;
48 4 50       10 return undef if (! defined $line);
49 4         8 chomp $line;
50              
51 4         33 my ($id, $desc) = ($line =~ /^\@(\S+)\s*(.+)?$/);
52 4 50       12 die "Bad FASTQ ID line\n" if (! defined $id);
53              
54             # seq and qual can be multiline (although rare)
55             # qual is tricky since it can contain '@' but we compare to the
56             # sequence length to know when to stop parsing (must be equal lengths)
57 4   50     17 my $seq = <$fh> // die "Bad or missing FASTQ sequence";
58 4         64 chomp $seq;
59              
60             SEQ:
61 4         15 while (my $line = <$fh>) {
62 5         55 chomp $line;
63 5 100       26 last SEQ if ($line =~ /^\+/);
64 1         3 $seq .= $line;
65             }
66              
67 4         6 my $seq_len = length $seq;
68              
69 4   50     23 QUAL:
70             my $qual = <$fh> // die "Bad or missing FASTQ format";
71 4         55 chomp $qual;
72              
73 4   66     28 while ( (length($qual) < $seq_len) && defined (my $line = <$fh>) ) {
74 2         4 chomp $line;
75 2         5 $qual .= $line;
76             }
77 4 100       18 die "Bad FASTQ quality length" if ($seq_len != length($qual));
78              
79 3         5 $self->{buffer} = undef;
80              
81 3         28 my $obj = BioX::Seq->new($seq, $id, $desc, $qual);
82 3         6 $obj->{_input_format} = 'fastq';
83 3         18 return $obj;
84              
85             }
86              
87             1;
88              
89             __END__