line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::SimulateReads::Command::Digest; |
2
|
|
|
|
|
|
|
# ABSTRACT: digest command class. Simulate single-end and paired-end reads. |
3
|
|
|
|
|
|
|
|
4
|
1
|
|
|
1
|
|
1837
|
use App::SimulateReads::Base 'class'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
8
|
|
5
|
1
|
|
|
1
|
|
8
|
use App::SimulateReads::Quality::Handle; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
6
|
1
|
|
|
1
|
|
382
|
use App::SimulateReads::Fastq::SingleEnd; |
|
1
|
|
|
|
|
410
|
|
|
1
|
|
|
|
|
52
|
|
7
|
1
|
|
|
1
|
|
376
|
use App::SimulateReads::Fastq::PairedEnd; |
|
1
|
|
|
|
|
401
|
|
|
1
|
|
|
|
|
41
|
|
8
|
1
|
|
|
1
|
|
323
|
use App::SimulateReads::Simulator; |
|
1
|
|
|
|
|
449
|
|
|
1
|
|
|
|
|
45
|
|
9
|
1
|
|
|
1
|
|
8
|
use Path::Class 'file'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
63
|
|
10
|
1
|
|
|
1
|
|
5
|
use File::Path 'make_path'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
11
|
1
|
|
|
1
|
|
5
|
use Pod::Usage; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
146
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
extends 'App::SimulateReads::CLI::Command'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION = '0.05'; # VERSION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use constant { |
18
|
1
|
|
|
|
|
1602
|
COUNT_LOOPS_BY_OPT => ['coverage', 'number-of-reads'], |
19
|
|
|
|
|
|
|
STRAND_BIAS_OPT => ['random', 'plus', 'minus'], |
20
|
|
|
|
|
|
|
SEQID_WEIGHT_OPT => ['length', 'same', 'file'], |
21
|
|
|
|
|
|
|
SEQUENCING_TYPE_OPT => ['single-end', 'paired-end'] |
22
|
1
|
|
|
1
|
|
6
|
}; |
|
1
|
|
|
|
|
3
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
override 'opt_spec' => sub { |
25
|
|
|
|
|
|
|
super, |
26
|
|
|
|
|
|
|
'prefix|p=s', |
27
|
|
|
|
|
|
|
'verbose|v', |
28
|
|
|
|
|
|
|
'output-dir|o=s', |
29
|
|
|
|
|
|
|
'jobs|j=i', |
30
|
|
|
|
|
|
|
'gzip|z!', |
31
|
|
|
|
|
|
|
'coverage|c=f', |
32
|
|
|
|
|
|
|
'read-size|r=i', |
33
|
|
|
|
|
|
|
'fragment-mean|m=i', |
34
|
|
|
|
|
|
|
'fragment-stdd|d=i', |
35
|
|
|
|
|
|
|
'sequencing-error|e=f', |
36
|
|
|
|
|
|
|
'sequencing-type|t=s', |
37
|
|
|
|
|
|
|
'quality-profile|q=s', |
38
|
|
|
|
|
|
|
'strand-bias|b=s', |
39
|
|
|
|
|
|
|
'seqid-weight|w=s', |
40
|
|
|
|
|
|
|
'number-of-reads|n=i', |
41
|
|
|
|
|
|
|
'weight-file|f=s' |
42
|
|
|
|
|
|
|
}; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub _default_opt { |
45
|
0
|
|
|
0
|
|
|
'verbose' => 0, |
46
|
|
|
|
|
|
|
'prefix' => 'out', |
47
|
|
|
|
|
|
|
'output-dir' => '.', |
48
|
|
|
|
|
|
|
'jobs' => 1, |
49
|
|
|
|
|
|
|
'gzip' => 1, |
50
|
|
|
|
|
|
|
'count-loops-by' => 'coverage', |
51
|
|
|
|
|
|
|
'coverage' => 1, |
52
|
|
|
|
|
|
|
'strand-bias' => 'random', |
53
|
|
|
|
|
|
|
'seqid-weight' => 'length', |
54
|
|
|
|
|
|
|
'sequencing-type' => 'paired-end', |
55
|
|
|
|
|
|
|
'fragment-mean' => 300, |
56
|
|
|
|
|
|
|
'fragment-stdd' => 50, |
57
|
|
|
|
|
|
|
'sequencing-error' => 0.005, |
58
|
|
|
|
|
|
|
'read-size' => 101, |
59
|
|
|
|
|
|
|
'quality-profile' => 'hiseq' |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _log_msg_opt { |
63
|
0
|
|
|
0
|
|
|
my ($self, $opts) = @_; |
64
|
0
|
|
|
|
|
|
while (my ($key, $value) = each %$opts) { |
65
|
0
|
0
|
|
|
|
|
next if ref($value) =~ /Fastq/; |
66
|
0
|
0
|
|
|
|
|
$value = "not defined" if not defined $value; |
67
|
0
|
|
|
|
|
|
$key =~ s/_/ /g; |
68
|
0
|
|
|
|
|
|
log_msg " => $key $value"; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub _quality_profile_report { |
73
|
0
|
|
|
0
|
|
|
my $quality = App::SimulateReads::Quality::Handle->new; |
74
|
0
|
|
|
|
|
|
return $quality->make_report; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub validate_args { |
78
|
0
|
|
|
0
|
1
|
|
my ($self, $args) = @_; |
79
|
0
|
|
|
|
|
|
my $fasta_file = shift @$args; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Mandatory fasta file |
82
|
0
|
0
|
|
|
|
|
if (not defined $fasta_file) { |
83
|
0
|
|
|
|
|
|
die "Missing fasta file\n"; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# Is it really a file? |
87
|
0
|
0
|
|
|
|
|
if (not -f $fasta_file) { |
88
|
0
|
|
|
|
|
|
die "<$fasta_file> is not a file. Please, give me a valid fasta file\n"; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# Check the file extension: fasta, fa, fna, ffn followed, or not, by .gz |
92
|
0
|
0
|
|
|
|
|
if ($fasta_file !~ /.+\.(fasta|fa|fna|ffn)(\.gz)?$/) { |
93
|
0
|
|
|
|
|
|
die "<$fasta_file> does not seem to be a fasta file. Please check the file extension\n"; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
0
|
0
|
|
|
|
|
die "Too many arguments: '@$args'\n" if @$args; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub validate_opts { |
100
|
0
|
|
|
0
|
1
|
|
my ($self, $opts) = @_; |
101
|
0
|
|
|
|
|
|
my %default_opt = $self->_default_opt; |
102
|
0
|
|
|
|
|
|
$self->fill_opts($opts, \%default_opt); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# Possible alternatives |
105
|
0
|
|
|
|
|
|
my %STRAND_BIAS = map { $_ => 1 } @{ &STRAND_BIAS_OPT }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
106
|
0
|
|
|
|
|
|
my %SEQID_WEIGHT = map { $_ => 1 } @{ &SEQID_WEIGHT_OPT }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
my %SEQUENCING_TYPE = map { $_ => 1 } @{ &SEQUENCING_TYPE_OPT }; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
|
my %QUALITY_PROFILE = %{ $self->_quality_profile_report }; |
|
0
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# prefix |
111
|
0
|
0
|
|
|
|
|
if ($opts->{prefix} =~ /([\/\\])/) { |
112
|
0
|
|
|
|
|
|
die "Invalid character in 'prefix' option: $opts->{prefix} => '$1'\n"; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# jobs > 0 |
116
|
0
|
0
|
|
|
|
|
if ($opts->{jobs} <= 0) { |
117
|
0
|
|
|
|
|
|
die "Option 'jobs' requires an integer greater than zero, not $opts->{jobs}\n"; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# 0 <= sequencing_error <= 1 |
121
|
0
|
0
|
0
|
|
|
|
if (0 > $opts->{'sequencing-error'} || $opts->{'sequencing-error'} > 1) { |
122
|
0
|
|
|
|
|
|
die "Option 'sequencing-error' requires a value between zero and one, not $opts->{'sequencing-error'}\n"; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# quality_profile |
126
|
0
|
0
|
0
|
|
|
|
if ((not exists $QUALITY_PROFILE{$opts->{'quality-profile'}}) && ($opts->{'quality-profile'} ne 'poisson')) { |
127
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %QUALITY_PROFILE; |
128
|
0
|
|
|
|
|
|
die "Option 'quality-profile' requires one of these arguments: $opt and poisson. Not $opts->{'quality-profile'}\n"; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# 0 < read-size <= 101 |
132
|
0
|
0
|
|
|
|
|
if (0 > $opts->{'read-size'}) { |
133
|
0
|
|
|
|
|
|
die "Option 'read-size' requires an integer greater than zero, not $opts->{'read-size'}\n"; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# read-size if quality-profile is not poisson, test for available sizes |
137
|
0
|
|
|
|
|
|
my $quality_entry = $QUALITY_PROFILE{$opts->{'quality-profile'}}; |
138
|
0
|
|
|
|
|
|
my %sizes = map { $_->{size} => 1 } @$quality_entry; |
|
0
|
|
|
|
|
|
|
139
|
0
|
0
|
0
|
|
|
|
if ((not $sizes{$opts->{'read-size'}}) && ($opts->{'quality-profile'} ne 'poisson')) { |
140
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %sizes; |
141
|
0
|
|
|
|
|
|
die "Option 'read-size' requires one of these arguments for $opts->{'quality-profile'}: $opt. Not $opts->{'read-size'}\n"; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# strand_bias (STRAND_BIAS_OPT) |
145
|
0
|
0
|
|
|
|
|
if (not exists $STRAND_BIAS{$opts->{'strand-bias'}}) { |
146
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %STRAND_BIAS; |
147
|
0
|
|
|
|
|
|
die "Option 'strand-bias' requires one of these arguments: $opt. Not $opts->{'strand-bias'}\n"; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# sequencing_type (SEQUENCING_TYPE_OPT) |
151
|
0
|
0
|
|
|
|
|
if (not exists $SEQUENCING_TYPE{$opts->{'sequencing-type'}}) { |
152
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %SEQUENCING_TYPE; |
153
|
0
|
|
|
|
|
|
die "Option 'sequencing-type' requires one of these arguments: $opt not $opts->{'sequencing-type'}\n"; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
## Dependently validated arguments |
157
|
|
|
|
|
|
|
# fragment_mean and fragment_stdd |
158
|
0
|
0
|
|
|
|
|
if ($opts->{'sequencing-type'} eq 'paired-end') { |
159
|
|
|
|
|
|
|
# fragment_mean > 0 |
160
|
0
|
0
|
|
|
|
|
if ($opts->{'fragment-mean'} <= 0) { |
161
|
0
|
|
|
|
|
|
die "Option 'fragment-mean' requires an integer greater than zero, not $opts->{'fragment-mean'}\n"; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# fragment_stdd > 0 |
165
|
0
|
0
|
|
|
|
|
if ($opts->{'fragment-stdd'} < 0) { |
166
|
0
|
|
|
|
|
|
die "Option 'fragment-stdd' requires an integer greater or equal to zero, not $opts->{'fragment-stdd'}\n"; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# (fragment_mean - fragment_stdd) >= read_size |
170
|
0
|
0
|
|
|
|
|
if (($opts->{'fragment-mean'} - $opts->{'fragment-stdd'}) < $opts->{'read-size'}) { |
171
|
|
|
|
|
|
|
die "Option 'fragment-mean' minus 'fragment-stdd' requires a value greater or equal read-size, not " . |
172
|
0
|
|
|
|
|
|
($opts->{'fragment-mean'} - $opts->{'fragment-stdd'}) . "\n"; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# count-loops-by (COUNT_LOOPS_BY_OPT). The default value is counting by coverage |
177
|
|
|
|
|
|
|
# Or calculate number of reads by coverage, or the user pass the number-of-reads. |
178
|
|
|
|
|
|
|
# number-of-reads option overrides coverage |
179
|
0
|
0
|
|
|
|
|
if (exists $opts->{'number-of-reads'}) { |
180
|
|
|
|
|
|
|
# number_of_reads > 0 |
181
|
0
|
0
|
|
|
|
|
if ($opts->{'number-of-reads'} <= 0) { |
182
|
0
|
|
|
|
|
|
die "Option 'number-of-reads' requires a value greater than zero, not $opts->{'number-of-reads'}\n"; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# sequencing_type eq paired-end requires at least 2 reads |
186
|
0
|
0
|
0
|
|
|
|
if ($opts->{'number-of-reads'} < 2 && $opts->{'sequencing-type'} eq 'paired-end') { |
187
|
0
|
|
|
|
|
|
die "Option 'number-of-reads' requires a value greater or equal to 2 for paired-end reads, not $opts->{'number-of-reads'}\n"; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
# coverage > 0 |
192
|
0
|
0
|
|
|
|
|
if ($opts->{coverage} <= 0) { |
193
|
0
|
|
|
|
|
|
die "Option 'coverage' requires a value greater than zero, not $opts->{coverage}\n"; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# seqid-weight (SEQID_WEIGHT_OPT) |
197
|
0
|
0
|
|
|
|
|
if (not exists $SEQID_WEIGHT{$opts->{'seqid-weight'}}) { |
198
|
0
|
|
|
|
|
|
my $opt = join ', ' => keys %SEQID_WEIGHT; |
199
|
0
|
|
|
|
|
|
die "Option 'seqid-weight' requires one of these arguments: $opt not $opts->{'seqid_weight'}\n"; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# seqid-weight eq 'file' requires a weight-file |
203
|
0
|
0
|
|
|
|
|
if ($opts->{'seqid-weight'} eq 'file') { |
204
|
0
|
0
|
|
|
|
|
if (not defined $opts->{'weight-file'}) { |
205
|
0
|
|
|
|
|
|
die "Option 'seqid-weight=file' requires the argument 'weight-file' with a tab-separated values file\n"; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# It is defined, but the file exists? |
209
|
0
|
0
|
|
|
|
|
if (not -f $opts->{'weight-file'}) { |
210
|
0
|
|
|
|
|
|
die "Option 'weight-file' requires a valid file, not $opts->{'weight-file'}\n"; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub execute { |
216
|
0
|
|
|
0
|
1
|
|
my ($self, $opts, $args) = @_; |
217
|
0
|
|
|
|
|
|
my $fasta_file = shift @$args; |
218
|
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
|
my %default_opt = $self->_default_opt; |
220
|
0
|
|
|
|
|
|
$self->fill_opts($opts, \%default_opt); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# Set if user wants a verbose log |
223
|
0
|
|
|
|
|
|
$LOG_VERBOSE = $opts->{verbose}; |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# Set default count-loop-by behavior |
226
|
0
|
0
|
|
|
|
|
$opts->{'count-loops-by'} = 'number-of-reads' if exists $opts->{'number-of-reads'}; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# Create output directory if it not exist |
229
|
0
|
|
|
|
|
|
make_path($opts->{'output-dir'}, {error => \my $err_list}); |
230
|
0
|
|
|
|
|
|
my $err_dir; |
231
|
0
|
0
|
|
|
|
|
if (@$err_list) { |
232
|
0
|
|
|
|
|
|
for (@$err_list) { |
233
|
0
|
|
|
|
|
|
my ($dir, $message) = %$_; |
234
|
0
|
|
|
|
|
|
$err_dir .= "Problem creating '$dir': $message\n"; |
235
|
|
|
|
|
|
|
} |
236
|
0
|
|
|
|
|
|
die "$err_dir"; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# Concatenate output-dir to prefix |
240
|
0
|
|
|
|
|
|
my $prefix = file($opts->{'output-dir'}, $opts->{prefix}); |
241
|
0
|
|
|
|
|
|
$opts->{prefix} = "$prefix"; |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
244
|
|
|
|
|
|
|
# Log presentation header |
245
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
246
|
0
|
|
|
|
|
|
my $time_stamp = localtime; |
247
|
0
|
|
|
|
|
|
my $progname = $self->progname; |
248
|
0
|
|
|
|
|
|
my $argv = $self->argv; |
249
|
0
|
|
|
|
|
|
log_msg <<"HEADER"; |
250
|
|
|
|
|
|
|
-------------------------------------------------------- |
251
|
|
|
|
|
|
|
Date $time_stamp |
252
|
|
|
|
|
|
|
$progname Copyright (C) 2017 Thiago L. A. Miller |
253
|
|
|
|
|
|
|
-------------------------------------------------------- |
254
|
|
|
|
|
|
|
:: Arguments passed by the user: |
255
|
|
|
|
|
|
|
=> '@$argv' |
256
|
|
|
|
|
|
|
HEADER |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
259
|
|
|
|
|
|
|
# Construct the Fastq and Simulator classes |
260
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
261
|
|
|
|
|
|
|
my %paired_end_param = ( |
262
|
|
|
|
|
|
|
quality_profile => $opts->{'quality-profile'}, |
263
|
|
|
|
|
|
|
sequencing_error => $opts->{'sequencing-error'}, |
264
|
|
|
|
|
|
|
read_size => $opts->{'read-size'}, |
265
|
|
|
|
|
|
|
fragment_mean => $opts->{'fragment-mean'}, |
266
|
0
|
|
|
|
|
|
fragment_stdd => $opts->{'fragment-stdd'} |
267
|
|
|
|
|
|
|
); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
my %single_end_param = ( |
270
|
|
|
|
|
|
|
quality_profile => $opts->{'quality-profile'}, |
271
|
|
|
|
|
|
|
sequencing_error => $opts->{'sequencing-error'}, |
272
|
0
|
|
|
|
|
|
read_size => $opts->{'read-size'} |
273
|
|
|
|
|
|
|
); |
274
|
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
|
my $fastq; |
276
|
0
|
0
|
|
|
|
|
if ($opts->{'sequencing-type'} eq 'paired-end') { |
277
|
0
|
|
|
|
|
|
log_msg ":: Creating paired-end fastq generator ..."; |
278
|
0
|
|
|
|
|
|
$self->_log_msg_opt(\%paired_end_param); |
279
|
0
|
|
|
|
|
|
$fastq = App::SimulateReads::Fastq::PairedEnd->new(%paired_end_param); |
280
|
|
|
|
|
|
|
} else { |
281
|
0
|
|
|
|
|
|
log_msg ":: Creating single-end fastq generator ..."; |
282
|
0
|
|
|
|
|
|
$self->_log_msg_opt(\%single_end_param); |
283
|
0
|
|
|
|
|
|
$fastq = App::SimulateReads::Fastq::SingleEnd->new(%single_end_param); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
my %simulator_param = ( |
287
|
|
|
|
|
|
|
fastq => $fastq, |
288
|
|
|
|
|
|
|
fasta_file => $fasta_file, |
289
|
|
|
|
|
|
|
prefix => $opts->{'prefix'}, |
290
|
|
|
|
|
|
|
output_gzip => $opts->{'gzip'}, |
291
|
|
|
|
|
|
|
count_loops_by => $opts->{'count-loops-by'}, |
292
|
|
|
|
|
|
|
number_of_reads => $opts->{'number-of-reads'}, |
293
|
|
|
|
|
|
|
coverage => $opts->{'coverage'}, |
294
|
|
|
|
|
|
|
jobs => $opts->{'jobs'}, |
295
|
|
|
|
|
|
|
strand_bias => $opts->{'strand-bias'}, |
296
|
|
|
|
|
|
|
seqid_weight => $opts->{'seqid-weight'}, |
297
|
0
|
|
|
|
|
|
weight_file => $opts->{'weight-file'} |
298
|
|
|
|
|
|
|
); |
299
|
|
|
|
|
|
|
|
300
|
0
|
|
|
|
|
|
my $simulator; |
301
|
0
|
|
|
|
|
|
log_msg ":: Creating simulator ..."; |
302
|
0
|
|
|
|
|
|
$self->_log_msg_opt(\%simulator_param); |
303
|
0
|
|
|
|
|
|
$simulator = App::SimulateReads::Simulator->new(%simulator_param); |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
306
|
|
|
|
|
|
|
# Let's simulate it! |
307
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
308
|
0
|
|
|
|
|
|
log_msg ":: Running simulation ..."; |
309
|
0
|
|
|
|
|
|
$simulator->run_simulation; |
310
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
log_msg ":: End simulation. So long, and thanks for all the fish!"; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
__END__ |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=pod |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=encoding UTF-8 |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head1 NAME |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
App::SimulateReads::Command::Digest - digest command class. Simulate single-end and paired-end reads. |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=head1 VERSION |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
version 0.05 |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head1 SYNOPSIS |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
simulate_reads digest [options] <fasta-file> |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
Arguments: |
333
|
|
|
|
|
|
|
a fasta-file |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Options: |
336
|
|
|
|
|
|
|
-h, --help brief help message |
337
|
|
|
|
|
|
|
-M, --man full documentation |
338
|
|
|
|
|
|
|
-v, --verbose print log messages |
339
|
|
|
|
|
|
|
-p, --prefix prefix output [default:"out"] |
340
|
|
|
|
|
|
|
-o, --output-dir output directory [default:"."] |
341
|
|
|
|
|
|
|
-j, --jobs number of jobs [default:"1"; Integer] |
342
|
|
|
|
|
|
|
-z, --gzip compress output file |
343
|
|
|
|
|
|
|
-c, --coverage fastq-file coverage [default:"1", Number] |
344
|
|
|
|
|
|
|
-n, --number-of-reads directly set the number of reads |
345
|
|
|
|
|
|
|
[default:"1", Integer] |
346
|
|
|
|
|
|
|
-t, --sequencing-type single-end or paired-end reads |
347
|
|
|
|
|
|
|
[default:"paired-end"] |
348
|
|
|
|
|
|
|
-q, --quality-profile illumina sequencing system profiles |
349
|
|
|
|
|
|
|
[default:"hiseq"] |
350
|
|
|
|
|
|
|
-e, --sequencing-error sequencing error rate |
351
|
|
|
|
|
|
|
[default:"0.005"; Number] |
352
|
|
|
|
|
|
|
-r, --read-size the read size [default:"101"; Integer] |
353
|
|
|
|
|
|
|
-m, --fragment-mean the fragment mean size for paired-end reads |
354
|
|
|
|
|
|
|
[default:"300"; Integer] |
355
|
|
|
|
|
|
|
-d, --fragment-stdd the fragment standard deviation size for |
356
|
|
|
|
|
|
|
paired-end reads [default:"50"; Integer] |
357
|
|
|
|
|
|
|
-b, --strand-bias which strand to be used: plus, minus and random |
358
|
|
|
|
|
|
|
[default:"random"] |
359
|
|
|
|
|
|
|
-w, --seqid-weight seqid raffle type: length, same, file |
360
|
|
|
|
|
|
|
[default: "length"] |
361
|
|
|
|
|
|
|
-f, --weight-file weight file when seqid-weight=file |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=head1 DESCRIPTION |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
B<simulate_reads> will read the given input file and do something |
366
|
|
|
|
|
|
|
useful with the contents thereof. |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head1 OPTIONS |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=over 8 |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item B<--help> |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
Print a brief help message and exits. |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=item B<--man> |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
Prints the manual page and exits. |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=item B<--verbose> |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Prints log information to standard error |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item B<--prefix> |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
Concatenates the prefix to the output-file name. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=item B<--output-dir> |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
Creates output-file inside output-dir. If output-dir |
391
|
|
|
|
|
|
|
does not exist, it is created recursively |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=item B<--jobs> |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Sets the number of child jobs to be created |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=item B<--gzip> |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Compress the output-file with gzip algorithm. It is |
400
|
|
|
|
|
|
|
possible to pass --no-gzip if one wants |
401
|
|
|
|
|
|
|
uncompressed output-file |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item B<--read-size> |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Sets the read size. For now the unique valid value is 101 |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item B<--coverage> |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Calculates the number of reads based on the sequence |
410
|
|
|
|
|
|
|
coverage: number_of_reads = (sequence_size * coverage) / read_size |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=item B<--number-of-reads> |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Sets directly the number of reads desired. It overrides coverage, |
415
|
|
|
|
|
|
|
in case the two options are given |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item B<--sequencing-type> |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
Sets the sequencing type to single-end or paired-end |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=item B<--fragment-mean> |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
If the sequencing-type is set to paired-end, it sets the |
424
|
|
|
|
|
|
|
fragment mean |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=item B<--fragment-stdd> |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
If the sequencing-type is set to paired-end, it sets the |
429
|
|
|
|
|
|
|
fragment standard deviation |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item B<--sequencing-error> |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Sets the sequencing error rate. Valid values are between zero and one |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item B<--quality-profile> |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Sets the illumina sequencing system profile for quality. For now, the unique |
438
|
|
|
|
|
|
|
valid values are hiseq and poisson |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=item B<--strand-bias> |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Sets which strand to use to make a read. Valid options are plus, minus and |
443
|
|
|
|
|
|
|
random - if you want to randomly calculte the strand for each read |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=item B<--seqid-weight> |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
Sets the seqid (e.g. chromossome, ensembl id) raffle behavior. Valid options are |
448
|
|
|
|
|
|
|
length, same and file. If it is set to 'same', all seqid receives the same weight |
449
|
|
|
|
|
|
|
when raffling. If it is set to 'length', the seqid weight is calculated based on |
450
|
|
|
|
|
|
|
the seqid sequence length. And finally, if it is set to 'file', the user must set |
451
|
|
|
|
|
|
|
the option --weight-file. For details, see B<--weight-file> |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=item B<--weight-file> |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
If --seqid-weight is set to file, then this option becomes mandatory. A valid |
456
|
|
|
|
|
|
|
weight file is a tab-separated values file with 2 columns. The first column is |
457
|
|
|
|
|
|
|
for the seqid and the second column for the desired weight. Valid weights are integers |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=back |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=head1 AUTHOR |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Thiago L. A. Miller <tmiller@mochsl.org.br> |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
This software is Copyright (c) 2017 by Teaching and Research Institute from SÃrio-Libanês Hospital. |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This is free software, licensed under: |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=cut |