line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package FASTX::ReaderPaired; |
2
|
|
|
|
|
|
|
#ABSTRACT: Warning, Experimental Paired-End FASTQ files reader, based on FASTX::Reader. |
3
|
|
|
|
|
|
|
|
4
|
15
|
|
|
15
|
|
8136
|
use 5.012; |
|
15
|
|
|
|
|
59
|
|
5
|
15
|
|
|
15
|
|
109
|
use warnings; |
|
15
|
|
|
|
|
26
|
|
|
15
|
|
|
|
|
482
|
|
6
|
15
|
|
|
15
|
|
93
|
use Carp qw(confess cluck); |
|
15
|
|
|
|
|
29
|
|
|
15
|
|
|
|
|
849
|
|
7
|
15
|
|
|
15
|
|
94
|
use Data::Dumper; |
|
15
|
|
|
|
|
36
|
|
|
15
|
|
|
|
|
608
|
|
8
|
15
|
|
|
15
|
|
102
|
use FASTX::Reader; |
|
15
|
|
|
|
|
379
|
|
|
15
|
|
|
|
|
619
|
|
9
|
15
|
|
|
15
|
|
87
|
use File::Basename; |
|
15
|
|
|
|
|
35
|
|
|
15
|
|
|
|
|
17438
|
|
10
|
|
|
|
|
|
|
$FASTX::ReaderPaired::VERSION = $FASTX::Reader::VERSION; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $for_suffix_re = '(/1|_R?1)'; |
13
|
|
|
|
|
|
|
my $rev_suffix_re = '(/2|_R?2)'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# Instantiate object |
19
|
9
|
|
|
9
|
1
|
3889
|
my ($class, $args) = @_; |
20
|
|
|
|
|
|
|
|
21
|
9
|
|
|
|
|
90
|
my %accepted_parameters = ( |
22
|
|
|
|
|
|
|
'filename' => 1, |
23
|
|
|
|
|
|
|
'tag1' => 1, |
24
|
|
|
|
|
|
|
'tag2' => 1, |
25
|
|
|
|
|
|
|
'rev' => 1, |
26
|
|
|
|
|
|
|
'interleaved' => 1, |
27
|
|
|
|
|
|
|
'nocheck' => 1, |
28
|
|
|
|
|
|
|
'revcompl' => 1, |
29
|
|
|
|
|
|
|
'verbose' => 1, |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
|
32
|
9
|
|
|
|
|
58
|
my $valid_attributes = join(', ', keys %accepted_parameters); |
33
|
|
|
|
|
|
|
|
34
|
9
|
50
|
|
|
|
37
|
if ($args) { |
35
|
9
|
|
|
|
|
14
|
for my $parameter (keys %{ $args} ) { |
|
9
|
|
|
|
|
35
|
|
36
|
|
|
|
|
|
|
confess("Attribute <$parameter> is not expected. Valid attributes are: $valid_attributes\n") |
37
|
20
|
50
|
|
|
|
69
|
if (! $accepted_parameters{$parameter} ); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
} else { |
40
|
0
|
|
|
|
|
0
|
$args->{filename} = '{{STDIN}}'; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
my $self = { |
44
|
|
|
|
|
|
|
filename => $args->{filename}, |
45
|
|
|
|
|
|
|
rev => $args->{rev}, |
46
|
|
|
|
|
|
|
interleaved => $args->{interleaved} // 0, |
47
|
|
|
|
|
|
|
tag1 => $args->{tag1}, |
48
|
|
|
|
|
|
|
tag2 => $args->{tag2}, |
49
|
|
|
|
|
|
|
nocheck => $args->{nocheck} // 0, |
50
|
|
|
|
|
|
|
revcompl => $args->{revcompl} // 0, |
51
|
9
|
|
100
|
|
|
179
|
verbose => $args->{verbose} // 0, |
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
52
|
|
|
|
|
|
|
}; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
55
|
9
|
|
|
|
|
33
|
my $object = bless $self, $class; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Required to read STDIN? |
58
|
9
|
50
|
33
|
|
|
92
|
if ($self->{filename} eq '{{STDIN}}' or not $self->{filename}) { |
59
|
0
|
|
|
|
|
0
|
$self->{interleaved} = 1; |
60
|
0
|
|
|
|
|
0
|
$self->{stdin} = 1; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
9
|
100
|
|
|
|
32
|
if ($self->{interleaved}) { |
64
|
|
|
|
|
|
|
# Decode interleaved |
65
|
2
|
50
|
|
|
|
6
|
if ($self->{stdin}) { |
66
|
0
|
|
|
|
|
0
|
$self->{R1} = FASTX::Reader->new({ filename => '{{STDIN}}' }); |
67
|
|
|
|
|
|
|
} else { |
68
|
2
|
|
|
|
|
14
|
$self->{R1} = FASTX::Reader->new({ filename => "$self->{filename}"}); |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
} else { |
71
|
|
|
|
|
|
|
# Decode PE |
72
|
7
|
100
|
|
|
|
26
|
if ( ! defined $self->{rev} ) { |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# Auto calculate reverse (R2) filename |
75
|
1
|
|
|
|
|
112
|
my $rev = basename($self->{filename}); |
76
|
|
|
|
|
|
|
|
77
|
1
|
50
|
33
|
|
|
7
|
if (defined $self->{tag1} and defined $self->{tag2}) { |
78
|
0
|
|
|
|
|
0
|
$rev =~s/$self->{tag1}/$self->{tag2}/; |
79
|
0
|
|
|
|
|
0
|
$rev = dirname($self->{basename}) . $rev; |
80
|
|
|
|
|
|
|
} else { |
81
|
|
|
|
|
|
|
|
82
|
1
|
|
|
|
|
3
|
$rev =~s/_R1/_R2/; |
83
|
1
|
50
|
|
|
|
19
|
say STDERR "R2 not provided, autoguess (_R1/_R2): $rev" if ($self->{verbose}); |
84
|
1
|
50
|
|
|
|
36
|
if ($rev eq basename($self->{filename}) ) { |
85
|
1
|
|
|
|
|
5
|
$rev =~s/_1\./_2./; |
86
|
1
|
50
|
|
|
|
23
|
say STDERR "R2 not provided for $self->{filename}, now autoguess (_1/_2): $rev" if ($self->{verbose}); |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
1
|
|
|
|
|
43
|
$rev = dirname($self->{filename}) . '/' . $rev; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
1
|
50
|
|
|
|
41
|
if (not -e $rev) { |
|
|
50
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# TO DEFINE: confess("ERROR: The rev file for '$self->{filename}' was not found in '$rev'\n"); |
94
|
0
|
|
|
|
|
0
|
say STDERR "WARNING: Pair not specified and R2 \"$rev\" not found for R1 \"$self->{filename}\":\n trying parsing as interleaved.\n"; |
95
|
0
|
|
|
|
|
0
|
$self->{interleaved} = 1; |
96
|
0
|
|
|
|
|
0
|
$self->{nocheck} = 0; |
97
|
|
|
|
|
|
|
} elsif ($self->{filename} eq $rev) { |
98
|
0
|
|
|
|
|
0
|
say STDERR "WARNING: Pair not specified for \"$self->{filename}\":\n trying parsing as interleaved.\n"; |
99
|
0
|
|
|
|
|
0
|
$self->{interleaved} = 1; |
100
|
0
|
|
|
|
|
0
|
$self->{nocheck} = 0; |
101
|
|
|
|
|
|
|
} else { |
102
|
1
|
|
|
|
|
4
|
$self->{rev} = $rev; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
7
|
|
|
|
|
73
|
$self->{R1} = FASTX::Reader->new({ filename => "$self->{filename}"}); |
108
|
|
|
|
|
|
|
$self->{R2} = FASTX::Reader->new({ filename => "$self->{rev}"}) |
109
|
5
|
50
|
|
|
|
283
|
if (not $self->{interleaved}); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
|
114
|
6
|
|
|
|
|
372
|
return $object; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub getReads { |
120
|
6
|
|
|
6
|
1
|
1367
|
my $self = shift; |
121
|
|
|
|
|
|
|
#my ($fh, $aux) = @_; |
122
|
|
|
|
|
|
|
#@::::::: ::: |
123
|
6
|
|
|
|
|
193
|
my $pe; |
124
|
|
|
|
|
|
|
my $r1; |
125
|
6
|
|
|
|
|
0
|
my $r2; |
126
|
|
|
|
|
|
|
|
127
|
6
|
100
|
|
|
|
111
|
if ($self->{interleaved}) { |
128
|
2
|
|
|
|
|
29
|
$r1 = $self->{R1}->getRead(); |
129
|
2
|
|
|
|
|
13
|
$r2 = $self->{R1}->getRead(); |
130
|
|
|
|
|
|
|
} else { |
131
|
4
|
|
|
|
|
63
|
$r1 = $self->{R1}->getRead(); |
132
|
4
|
|
|
|
|
32
|
$r2 = $self->{R2}->getRead(); |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
6
|
50
|
33
|
|
|
141
|
if (! defined $r1->{name} and !defined $r2->{name}) { |
|
|
50
|
33
|
|
|
|
|
136
|
0
|
|
|
|
|
0
|
return; |
137
|
|
|
|
|
|
|
} elsif (! defined $r1->{name} or !defined $r2->{name}) { |
138
|
0
|
|
0
|
|
|
0
|
my $r = $r1->{name} // $r2->{name}; |
139
|
0
|
|
|
|
|
0
|
$self->{error} = "Premature termination, missing read mate for \"$r\""; |
140
|
0
|
|
|
|
|
0
|
return; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
6
|
|
|
|
|
24
|
my $name_1; |
144
|
|
|
|
|
|
|
my $name_2; |
145
|
|
|
|
|
|
|
|
146
|
6
|
50
|
|
|
|
39
|
if ($self->{nocheck} != 1) { |
147
|
6
|
|
|
|
|
19
|
$name_1 = $r1->{name}; |
148
|
6
|
|
|
|
|
19
|
$name_2 = $r2->{name}; |
149
|
6
|
|
|
|
|
382
|
$name_1 =~s/${for_suffix_re}$//; |
150
|
6
|
|
|
|
|
174
|
$name_2 =~s/${rev_suffix_re}$//; |
151
|
6
|
100
|
|
|
|
37
|
if ($name_1 ne $name_2) { |
152
|
1
|
|
|
|
|
523
|
confess("Read name different in PE:\n[$r1->{name}] !=\n[$r2->{name}]\n"); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
5
|
50
|
33
|
|
|
76
|
if (not $r1->{qual} or not $r2->{qual}) { |
156
|
0
|
|
|
|
|
0
|
confess("Missing quality for one of the two reads ($name_1, $name_2)"); |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
|
161
|
5
|
|
33
|
|
|
24
|
$pe->{name} = $name_1 // $r1->{name}; |
162
|
5
|
|
|
|
|
27
|
$pe->{seq1} = $r1->{seq}; |
163
|
5
|
|
|
|
|
26
|
$pe->{qual1} = $r1->{qual}; |
164
|
|
|
|
|
|
|
|
165
|
5
|
100
|
|
|
|
24
|
if ($self->{revcompl}) { |
166
|
1
|
|
|
|
|
9
|
$pe->{seq2} = _rc( $r2->{seq} ); |
167
|
1
|
|
|
|
|
5
|
$pe->{qual2} = reverse( $r2->{qual} ); |
168
|
|
|
|
|
|
|
} else { |
169
|
4
|
|
|
|
|
10
|
$pe->{seq2} = $r2->{seq}; |
170
|
4
|
|
|
|
|
13
|
$pe->{qual2} = $r2->{qual}; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
5
|
|
|
|
|
13
|
$pe->{comment1} = $r1->{comment}; |
174
|
5
|
|
|
|
|
25
|
$pe->{comment2} = $r2->{comment}; |
175
|
|
|
|
|
|
|
|
176
|
5
|
|
|
|
|
34
|
return $pe; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub _rc { |
185
|
1
|
|
|
1
|
|
3
|
my $sequence = shift @_; |
186
|
1
|
|
|
|
|
6
|
$sequence = reverse($sequence); |
187
|
1
|
|
|
|
|
4
|
$sequence =~tr/ACGTacgt/TGCAtgca/; |
188
|
1
|
|
|
|
|
7
|
return $sequence; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
1; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
__END__ |