| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package App::SimulateReads::Command::Digest; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: digest command class. Simulate single-end and paired-end reads. |
|
3
|
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
2013
|
use App::SimulateReads::Base 'class'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
6
|
|
|
5
|
1
|
|
|
1
|
|
8
|
use App::SimulateReads::Quality::Handle; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
21
|
|
|
6
|
1
|
|
|
1
|
|
304
|
use App::SimulateReads::Fastq::SingleEnd; |
|
|
1
|
|
|
|
|
358
|
|
|
|
1
|
|
|
|
|
38
|
|
|
7
|
1
|
|
|
1
|
|
303
|
use App::SimulateReads::Fastq::PairedEnd; |
|
|
1
|
|
|
|
|
348
|
|
|
|
1
|
|
|
|
|
40
|
|
|
8
|
1
|
|
|
1
|
|
369
|
use App::SimulateReads::Simulator; |
|
|
1
|
|
|
|
|
388
|
|
|
|
1
|
|
|
|
|
47
|
|
|
9
|
1
|
|
|
1
|
|
8
|
use Path::Class 'file'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
61
|
|
|
10
|
1
|
|
|
1
|
|
6
|
use File::Path 'make_path'; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
38
|
|
|
11
|
1
|
|
|
1
|
|
5
|
use Pod::Usage; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
131
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
extends 'App::SimulateReads::CLI::Command'; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION = '0.06'; # VERSION |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use constant { |
|
18
|
1
|
|
|
|
|
1625
|
COUNT_LOOPS_BY_OPT => ['coverage', 'number-of-reads'], |
|
19
|
|
|
|
|
|
|
STRAND_BIAS_OPT => ['random', 'plus', 'minus'], |
|
20
|
|
|
|
|
|
|
SEQID_WEIGHT_OPT => ['length', 'same', 'file'], |
|
21
|
|
|
|
|
|
|
SEQUENCING_TYPE_OPT => ['single-end', 'paired-end'] |
|
22
|
1
|
|
|
1
|
|
6
|
}; |
|
|
1
|
|
|
|
|
2
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
override 'opt_spec' => sub { |
|
25
|
|
|
|
|
|
|
super, |
|
26
|
|
|
|
|
|
|
'prefix|p=s', |
|
27
|
|
|
|
|
|
|
'verbose|v', |
|
28
|
|
|
|
|
|
|
'output-dir|o=s', |
|
29
|
|
|
|
|
|
|
'jobs|j=i', |
|
30
|
|
|
|
|
|
|
'gzip|z!', |
|
31
|
|
|
|
|
|
|
'coverage|c=f', |
|
32
|
|
|
|
|
|
|
'read-size|r=i', |
|
33
|
|
|
|
|
|
|
'fragment-mean|m=i', |
|
34
|
|
|
|
|
|
|
'fragment-stdd|d=i', |
|
35
|
|
|
|
|
|
|
'sequencing-error|e=f', |
|
36
|
|
|
|
|
|
|
'sequencing-type|t=s', |
|
37
|
|
|
|
|
|
|
'quality-profile|q=s', |
|
38
|
|
|
|
|
|
|
'strand-bias|b=s', |
|
39
|
|
|
|
|
|
|
'seqid-weight|w=s', |
|
40
|
|
|
|
|
|
|
'number-of-reads|n=i', |
|
41
|
|
|
|
|
|
|
'weight-file|f=s' |
|
42
|
|
|
|
|
|
|
}; |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub _default_opt { |
|
45
|
0
|
|
|
0
|
|
|
'verbose' => 0, |
|
46
|
|
|
|
|
|
|
'prefix' => 'out', |
|
47
|
|
|
|
|
|
|
'output-dir' => '.', |
|
48
|
|
|
|
|
|
|
'jobs' => 1, |
|
49
|
|
|
|
|
|
|
'gzip' => 1, |
|
50
|
|
|
|
|
|
|
'count-loops-by' => 'coverage', |
|
51
|
|
|
|
|
|
|
'coverage' => 1, |
|
52
|
|
|
|
|
|
|
'strand-bias' => 'random', |
|
53
|
|
|
|
|
|
|
'seqid-weight' => 'length', |
|
54
|
|
|
|
|
|
|
'sequencing-type' => 'paired-end', |
|
55
|
|
|
|
|
|
|
'fragment-mean' => 300, |
|
56
|
|
|
|
|
|
|
'fragment-stdd' => 50, |
|
57
|
|
|
|
|
|
|
'sequencing-error' => 0.005, |
|
58
|
|
|
|
|
|
|
'read-size' => 101, |
|
59
|
|
|
|
|
|
|
'quality-profile' => 'hiseq' |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _log_msg_opt { |
|
63
|
0
|
|
|
0
|
|
|
my ($self, $opts) = @_; |
|
64
|
0
|
|
|
|
|
|
while (my ($key, $value) = each %$opts) { |
|
65
|
0
|
0
|
|
|
|
|
next if ref($value) =~ /Fastq/; |
|
66
|
0
|
0
|
|
|
|
|
$value = "not defined" if not defined $value; |
|
67
|
0
|
|
|
|
|
|
$key =~ s/_/ /g; |
|
68
|
0
|
|
|
|
|
|
log_msg " => $key $value"; |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub _quality_profile_report { |
|
73
|
0
|
|
|
0
|
|
|
my $quality = App::SimulateReads::Quality::Handle->new; |
|
74
|
0
|
|
|
|
|
|
return $quality->make_report; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub validate_args { |
|
78
|
0
|
|
|
0
|
1
|
|
my ($self, $args) = @_; |
|
79
|
0
|
|
|
|
|
|
my $fasta_file = shift @$args; |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Mandatory fasta file |
|
82
|
0
|
0
|
|
|
|
|
if (not defined $fasta_file) { |
|
83
|
0
|
|
|
|
|
|
die "Missing fasta file\n"; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# Is it really a file? |
|
87
|
0
|
0
|
|
|
|
|
if (not -f $fasta_file) { |
|
88
|
0
|
|
|
|
|
|
die "<$fasta_file> is not a file. Please, give me a valid fasta file\n"; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# Check the file extension: fasta, fa, fna, ffn followed, or not, by .gz |
|
92
|
0
|
0
|
|
|
|
|
if ($fasta_file !~ /.+\.(fasta|fa|fna|ffn)(\.gz)?$/) { |
|
93
|
0
|
|
|
|
|
|
die "<$fasta_file> does not seem to be a fasta file. Please check the file extension\n"; |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
0
|
0
|
|
|
|
|
die "Too many arguments: '@$args'\n" if @$args; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub validate_opts { |
|
100
|
0
|
|
|
0
|
1
|
|
my ($self, $opts) = @_; |
|
101
|
0
|
|
|
|
|
|
my %default_opt = $self->_default_opt; |
|
102
|
0
|
|
|
|
|
|
$self->fill_opts($opts, \%default_opt); |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# Possible alternatives |
|
105
|
0
|
|
|
|
|
|
my %STRAND_BIAS = map { $_ => 1 } @{ &STRAND_BIAS_OPT }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
106
|
0
|
|
|
|
|
|
my %SEQID_WEIGHT = map { $_ => 1 } @{ &SEQID_WEIGHT_OPT }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
my %SEQUENCING_TYPE = map { $_ => 1 } @{ &SEQUENCING_TYPE_OPT }; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
|
my %QUALITY_PROFILE = %{ $self->_quality_profile_report }; |
|
|
0
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# prefix |
|
111
|
0
|
0
|
|
|
|
|
if ($opts->{prefix} =~ /([\/\\])/) { |
|
112
|
0
|
|
|
|
|
|
die "Invalid character in 'prefix' option: $opts->{prefix} => '$1'\n"; |
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# jobs > 0 |
|
116
|
0
|
0
|
|
|
|
|
if ($opts->{jobs} <= 0) { |
|
117
|
0
|
|
|
|
|
|
die "Option 'jobs' requires an integer greater than zero, not $opts->{jobs}\n"; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# 0 <= sequencing_error <= 1 |
|
121
|
0
|
0
|
0
|
|
|
|
if (0 > $opts->{'sequencing-error'} || $opts->{'sequencing-error'} > 1) { |
|
122
|
0
|
|
|
|
|
|
die "Option 'sequencing-error' requires a value between zero and one, not $opts->{'sequencing-error'}\n"; |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# quality_profile |
|
126
|
0
|
0
|
0
|
|
|
|
if ((not exists $QUALITY_PROFILE{$opts->{'quality-profile'}}) && ($opts->{'quality-profile'} ne 'poisson')) { |
|
127
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %QUALITY_PROFILE; |
|
128
|
0
|
|
|
|
|
|
die "Option 'quality-profile' requires one of these arguments: $opt and poisson. Not $opts->{'quality-profile'}\n"; |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# 0 < read-size <= 101 |
|
132
|
0
|
0
|
|
|
|
|
if (0 > $opts->{'read-size'}) { |
|
133
|
0
|
|
|
|
|
|
die "Option 'read-size' requires an integer greater than zero, not $opts->{'read-size'}\n"; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# read-size if quality-profile is not poisson, test for available sizes |
|
137
|
0
|
|
|
|
|
|
my $quality_entry = $QUALITY_PROFILE{$opts->{'quality-profile'}}; |
|
138
|
0
|
|
|
|
|
|
my %sizes = map { $_->{size} => 1 } @$quality_entry; |
|
|
0
|
|
|
|
|
|
|
|
139
|
0
|
0
|
0
|
|
|
|
if ((not $sizes{$opts->{'read-size'}}) && ($opts->{'quality-profile'} ne 'poisson')) { |
|
140
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %sizes; |
|
141
|
0
|
|
|
|
|
|
die "Option 'read-size' requires one of these arguments for $opts->{'quality-profile'}: $opt. Not $opts->{'read-size'}\n"; |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# strand_bias (STRAND_BIAS_OPT) |
|
145
|
0
|
0
|
|
|
|
|
if (not exists $STRAND_BIAS{$opts->{'strand-bias'}}) { |
|
146
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %STRAND_BIAS; |
|
147
|
0
|
|
|
|
|
|
die "Option 'strand-bias' requires one of these arguments: $opt. Not $opts->{'strand-bias'}\n"; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# sequencing_type (SEQUENCING_TYPE_OPT) |
|
151
|
0
|
0
|
|
|
|
|
if (not exists $SEQUENCING_TYPE{$opts->{'sequencing-type'}}) { |
|
152
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %SEQUENCING_TYPE; |
|
153
|
0
|
|
|
|
|
|
die "Option 'sequencing-type' requires one of these arguments: $opt not $opts->{'sequencing-type'}\n"; |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
## Dependently validated arguments |
|
157
|
|
|
|
|
|
|
# fragment_mean and fragment_stdd |
|
158
|
0
|
0
|
|
|
|
|
if ($opts->{'sequencing-type'} eq 'paired-end') { |
|
159
|
|
|
|
|
|
|
# fragment_mean > 0 |
|
160
|
0
|
0
|
|
|
|
|
if ($opts->{'fragment-mean'} <= 0) { |
|
161
|
0
|
|
|
|
|
|
die "Option 'fragment-mean' requires an integer greater than zero, not $opts->{'fragment-mean'}\n"; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# fragment_stdd > 0 |
|
165
|
0
|
0
|
|
|
|
|
if ($opts->{'fragment-stdd'} < 0) { |
|
166
|
0
|
|
|
|
|
|
die "Option 'fragment-stdd' requires an integer greater or equal to zero, not $opts->{'fragment-stdd'}\n"; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# (fragment_mean - fragment_stdd) >= read_size |
|
170
|
0
|
0
|
|
|
|
|
if (($opts->{'fragment-mean'} - $opts->{'fragment-stdd'}) < $opts->{'read-size'}) { |
|
171
|
|
|
|
|
|
|
die "Option 'fragment-mean' minus 'fragment-stdd' requires a value greater or equal read-size, not " . |
|
172
|
0
|
|
|
|
|
|
($opts->{'fragment-mean'} - $opts->{'fragment-stdd'}) . "\n"; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# count-loops-by (COUNT_LOOPS_BY_OPT). The default value is counting by coverage |
|
177
|
|
|
|
|
|
|
# Or calculate number of reads by coverage, or the user pass the number-of-reads. |
|
178
|
|
|
|
|
|
|
# number-of-reads option overrides coverage |
|
179
|
0
|
0
|
|
|
|
|
if (exists $opts->{'number-of-reads'}) { |
|
180
|
|
|
|
|
|
|
# number_of_reads > 0 |
|
181
|
0
|
0
|
|
|
|
|
if ($opts->{'number-of-reads'} <= 0) { |
|
182
|
0
|
|
|
|
|
|
die "Option 'number-of-reads' requires a value greater than zero, not $opts->{'number-of-reads'}\n"; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# sequencing_type eq paired-end requires at least 2 reads |
|
186
|
0
|
0
|
0
|
|
|
|
if ($opts->{'number-of-reads'} < 2 && $opts->{'sequencing-type'} eq 'paired-end') { |
|
187
|
0
|
|
|
|
|
|
die "Option 'number-of-reads' requires a value greater or equal to 2 for paired-end reads, not $opts->{'number-of-reads'}\n"; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# coverage > 0 |
|
192
|
0
|
0
|
|
|
|
|
if ($opts->{coverage} <= 0) { |
|
193
|
0
|
|
|
|
|
|
die "Option 'coverage' requires a value greater than zero, not $opts->{coverage}\n"; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# seqid-weight (SEQID_WEIGHT_OPT) |
|
197
|
0
|
0
|
|
|
|
|
if (not exists $SEQID_WEIGHT{$opts->{'seqid-weight'}}) { |
|
198
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %SEQID_WEIGHT; |
|
199
|
0
|
|
|
|
|
|
die "Option 'seqid-weight' requires one of these arguments: $opt not $opts->{'seqid_weight'}\n"; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# seqid-weight eq 'file' requires a weight-file |
|
203
|
0
|
0
|
|
|
|
|
if ($opts->{'seqid-weight'} eq 'file') { |
|
204
|
0
|
0
|
|
|
|
|
if (not defined $opts->{'weight-file'}) { |
|
205
|
0
|
|
|
|
|
|
die "Option 'seqid-weight=file' requires the argument 'weight-file' with a tab-separated values file\n"; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# It is defined, but the file exists? |
|
209
|
0
|
0
|
|
|
|
|
if (not -f $opts->{'weight-file'}) { |
|
210
|
0
|
|
|
|
|
|
die "Option 'weight-file' requires a valid file, not $opts->{'weight-file'}\n"; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub execute { |
|
216
|
0
|
|
|
0
|
1
|
|
my ($self, $opts, $args) = @_; |
|
217
|
0
|
|
|
|
|
|
my $fasta_file = shift @$args; |
|
218
|
|
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
|
my %default_opt = $self->_default_opt; |
|
220
|
0
|
|
|
|
|
|
$self->fill_opts($opts, \%default_opt); |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# Set if user wants a verbose log |
|
223
|
0
|
|
|
|
|
|
$LOG_VERBOSE = $opts->{verbose}; |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# Set default count-loop-by behavior |
|
226
|
0
|
0
|
|
|
|
|
$opts->{'count-loops-by'} = 'number-of-reads' if exists $opts->{'number-of-reads'}; |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# Create output directory if it not exist |
|
229
|
0
|
|
|
|
|
|
make_path($opts->{'output-dir'}, {error => \my $err_list}); |
|
230
|
0
|
|
|
|
|
|
my $err_dir; |
|
231
|
0
|
0
|
|
|
|
|
if (@$err_list) { |
|
232
|
0
|
|
|
|
|
|
for (@$err_list) { |
|
233
|
0
|
|
|
|
|
|
my ($dir, $message) = %$_; |
|
234
|
0
|
|
|
|
|
|
$err_dir .= "Problem creating '$dir': $message\n"; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
0
|
|
|
|
|
|
die "$err_dir"; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# Concatenate output-dir to prefix |
|
240
|
0
|
|
|
|
|
|
my $prefix = file($opts->{'output-dir'}, $opts->{prefix}); |
|
241
|
0
|
|
|
|
|
|
$opts->{prefix} = "$prefix"; |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
244
|
|
|
|
|
|
|
# Log presentation header |
|
245
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
246
|
0
|
|
|
|
|
|
my $time_stamp = localtime; |
|
247
|
0
|
|
|
|
|
|
my $progname = $self->progname; |
|
248
|
0
|
|
|
|
|
|
my $argv = $self->argv; |
|
249
|
0
|
|
|
|
|
|
log_msg <<"HEADER"; |
|
250
|
|
|
|
|
|
|
-------------------------------------------------------- |
|
251
|
|
|
|
|
|
|
Date $time_stamp |
|
252
|
|
|
|
|
|
|
$progname Copyright (C) 2017 Thiago L. A. Miller |
|
253
|
|
|
|
|
|
|
-------------------------------------------------------- |
|
254
|
|
|
|
|
|
|
:: Arguments passed by the user: |
|
255
|
|
|
|
|
|
|
=> '@$argv' |
|
256
|
|
|
|
|
|
|
HEADER |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
259
|
|
|
|
|
|
|
# Construct the Fastq and Simulator classes |
|
260
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
261
|
|
|
|
|
|
|
my %paired_end_param = ( |
|
262
|
|
|
|
|
|
|
quality_profile => $opts->{'quality-profile'}, |
|
263
|
|
|
|
|
|
|
sequencing_error => $opts->{'sequencing-error'}, |
|
264
|
|
|
|
|
|
|
read_size => $opts->{'read-size'}, |
|
265
|
|
|
|
|
|
|
fragment_mean => $opts->{'fragment-mean'}, |
|
266
|
0
|
|
|
|
|
|
fragment_stdd => $opts->{'fragment-stdd'} |
|
267
|
|
|
|
|
|
|
); |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
my %single_end_param = ( |
|
270
|
|
|
|
|
|
|
quality_profile => $opts->{'quality-profile'}, |
|
271
|
|
|
|
|
|
|
sequencing_error => $opts->{'sequencing-error'}, |
|
272
|
0
|
|
|
|
|
|
read_size => $opts->{'read-size'} |
|
273
|
|
|
|
|
|
|
); |
|
274
|
|
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
|
my $fastq; |
|
276
|
0
|
0
|
|
|
|
|
if ($opts->{'sequencing-type'} eq 'paired-end') { |
|
277
|
0
|
|
|
|
|
|
log_msg ":: Creating paired-end fastq generator ..."; |
|
278
|
0
|
|
|
|
|
|
$self->_log_msg_opt(\%paired_end_param); |
|
279
|
0
|
|
|
|
|
|
$fastq = App::SimulateReads::Fastq::PairedEnd->new(%paired_end_param); |
|
280
|
|
|
|
|
|
|
} else { |
|
281
|
0
|
|
|
|
|
|
log_msg ":: Creating single-end fastq generator ..."; |
|
282
|
0
|
|
|
|
|
|
$self->_log_msg_opt(\%single_end_param); |
|
283
|
0
|
|
|
|
|
|
$fastq = App::SimulateReads::Fastq::SingleEnd->new(%single_end_param); |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
my %simulator_param = ( |
|
287
|
|
|
|
|
|
|
fastq => $fastq, |
|
288
|
|
|
|
|
|
|
fasta_file => $fasta_file, |
|
289
|
|
|
|
|
|
|
prefix => $opts->{'prefix'}, |
|
290
|
|
|
|
|
|
|
output_gzip => $opts->{'gzip'}, |
|
291
|
|
|
|
|
|
|
count_loops_by => $opts->{'count-loops-by'}, |
|
292
|
|
|
|
|
|
|
number_of_reads => $opts->{'number-of-reads'}, |
|
293
|
|
|
|
|
|
|
coverage => $opts->{'coverage'}, |
|
294
|
|
|
|
|
|
|
jobs => $opts->{'jobs'}, |
|
295
|
|
|
|
|
|
|
strand_bias => $opts->{'strand-bias'}, |
|
296
|
|
|
|
|
|
|
seqid_weight => $opts->{'seqid-weight'}, |
|
297
|
0
|
|
|
|
|
|
weight_file => $opts->{'weight-file'} |
|
298
|
|
|
|
|
|
|
); |
|
299
|
|
|
|
|
|
|
|
|
300
|
0
|
|
|
|
|
|
my $simulator; |
|
301
|
0
|
|
|
|
|
|
log_msg ":: Creating simulator ..."; |
|
302
|
0
|
|
|
|
|
|
$self->_log_msg_opt(\%simulator_param); |
|
303
|
0
|
|
|
|
|
|
$simulator = App::SimulateReads::Simulator->new(%simulator_param); |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
306
|
|
|
|
|
|
|
# Let's simulate it! |
|
307
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
308
|
0
|
|
|
|
|
|
log_msg ":: Running simulation ..."; |
|
309
|
0
|
|
|
|
|
|
$simulator->run_simulation; |
|
310
|
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
log_msg ":: End simulation. So long, and thanks for all the fish!"; |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
__END__ |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=pod |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=encoding UTF-8 |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head1 NAME |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
App::SimulateReads::Command::Digest - digest command class. Simulate single-end and paired-end reads. |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=head1 VERSION |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
version 0.06 |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
simulate_reads digest [options] <fasta-file> |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
Arguments: |
|
333
|
|
|
|
|
|
|
a fasta-file |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Options: |
|
336
|
|
|
|
|
|
|
-h, --help brief help message |
|
337
|
|
|
|
|
|
|
-M, --man full documentation |
|
338
|
|
|
|
|
|
|
-v, --verbose print log messages |
|
339
|
|
|
|
|
|
|
-p, --prefix prefix output [default:"out"] |
|
340
|
|
|
|
|
|
|
-o, --output-dir output directory [default:"."] |
|
341
|
|
|
|
|
|
|
-j, --jobs number of jobs [default:"1"; Integer] |
|
342
|
|
|
|
|
|
|
-z, --gzip compress output file |
|
343
|
|
|
|
|
|
|
-c, --coverage fastq-file coverage [default:"1", Number] |
|
344
|
|
|
|
|
|
|
-n, --number-of-reads directly set the number of reads |
|
345
|
|
|
|
|
|
|
[default:"1", Integer] |
|
346
|
|
|
|
|
|
|
-t, --sequencing-type single-end or paired-end reads |
|
347
|
|
|
|
|
|
|
[default:"paired-end"] |
|
348
|
|
|
|
|
|
|
-q, --quality-profile illumina sequencing system profiles |
|
349
|
|
|
|
|
|
|
[default:"hiseq"] |
|
350
|
|
|
|
|
|
|
-e, --sequencing-error sequencing error rate |
|
351
|
|
|
|
|
|
|
[default:"0.005"; Number] |
|
352
|
|
|
|
|
|
|
-r, --read-size the read size [default:"101"; Integer] |
|
353
|
|
|
|
|
|
|
-m, --fragment-mean the fragment mean size for paired-end reads |
|
354
|
|
|
|
|
|
|
[default:"300"; Integer] |
|
355
|
|
|
|
|
|
|
-d, --fragment-stdd the fragment standard deviation size for |
|
356
|
|
|
|
|
|
|
paired-end reads [default:"50"; Integer] |
|
357
|
|
|
|
|
|
|
-b, --strand-bias which strand to be used: plus, minus and random |
|
358
|
|
|
|
|
|
|
[default:"random"] |
|
359
|
|
|
|
|
|
|
-w, --seqid-weight seqid raffle type: length, same, file |
|
360
|
|
|
|
|
|
|
[default: "length"] |
|
361
|
|
|
|
|
|
|
-f, --weight-file weight file when seqid-weight=file |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
B<simulate_reads> will read the given input file and do something |
|
366
|
|
|
|
|
|
|
useful with the contents thereof. |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head1 OPTIONS |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=over 8 |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item B<--help> |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
Print a brief help message and exits. |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=item B<--man> |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
Prints the manual page and exits. |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=item B<--verbose> |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Prints log information to standard error |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item B<--prefix> |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
Concatenates the prefix to the output-file name. |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=item B<--output-dir> |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
Creates output-file inside output-dir. If output-dir |
|
391
|
|
|
|
|
|
|
does not exist, it is created recursively |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=item B<--jobs> |
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Sets the number of child jobs to be created |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=item B<--gzip> |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Compress the output-file with gzip algorithm. It is |
|
400
|
|
|
|
|
|
|
possible to pass --no-gzip if one wants |
|
401
|
|
|
|
|
|
|
uncompressed output-file |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item B<--read-size> |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Sets the read size. For now the unique valid value is 101 |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item B<--coverage> |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Calculates the number of reads based on the sequence |
|
410
|
|
|
|
|
|
|
coverage: number_of_reads = (sequence_size * coverage) / read_size |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=item B<--number-of-reads> |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Sets directly the number of reads desired. It overrides coverage, |
|
415
|
|
|
|
|
|
|
in case the two options are given |
|
416
|
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item B<--sequencing-type> |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Sets the sequencing type to single-end or paired-end |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item B<--fragment-mean> |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
If the sequencing-type is set to paired-end, it sets the |
|
424
|
|
|
|
|
|
|
fragment mean |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item B<--fragment-stdd> |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
If the sequencing-type is set to paired-end, it sets the |
|
429
|
|
|
|
|
|
|
fragment standard deviation |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item B<--sequencing-error> |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Sets the sequencing error rate. Valid values are between zero and one |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item B<--quality-profile> |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Sets the illumina sequencing system profile for quality. For now, the unique |
|
438
|
|
|
|
|
|
|
valid values are hiseq and poisson |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=item B<--strand-bias> |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Sets which strand to use to make a read. Valid options are plus, minus and |
|
443
|
|
|
|
|
|
|
random - if you want to randomly calculte the strand for each read |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=item B<--seqid-weight> |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
Sets the seqid (e.g. chromossome, ensembl id) raffle behavior. Valid options are |
|
448
|
|
|
|
|
|
|
length, same and file. If it is set to 'same', all seqid receives the same weight |
|
449
|
|
|
|
|
|
|
when raffling. If it is set to 'length', the seqid weight is calculated based on |
|
450
|
|
|
|
|
|
|
the seqid sequence length. And finally, if it is set to 'file', the user must set |
|
451
|
|
|
|
|
|
|
the option --weight-file. For details, see B<--weight-file> |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=item B<--weight-file> |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
If --seqid-weight is set to file, then this option becomes mandatory. A valid |
|
456
|
|
|
|
|
|
|
weight file is a tab-separated values file with 2 columns. The first column is |
|
457
|
|
|
|
|
|
|
for the seqid and the second column for the desired weight. Valid weights are integers |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=back |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head1 AUTHOR |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Thiago L. A. Miller <tmiller@mochsl.org.br> |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
This software is Copyright (c) 2017 by Teaching and Research Institute from SÃrio-Libanês Hospital. |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This is free software, licensed under: |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=cut |