line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::Sandy::Simulator; |
2
|
|
|
|
|
|
|
# ABSTRACT: Class responsible to make the simulation |
3
|
|
|
|
|
|
|
|
4
|
6
|
|
|
6
|
|
97186
|
use App::Sandy::Base 'class'; |
|
6
|
|
|
|
|
17
|
|
|
6
|
|
|
|
|
64
|
|
5
|
6
|
|
|
6
|
|
131
|
use App::Sandy::Seq::SingleEnd; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
270
|
|
6
|
6
|
|
|
6
|
|
40
|
use App::Sandy::Seq::PairedEnd; |
|
6
|
|
|
|
|
864
|
|
|
6
|
|
|
|
|
265
|
|
7
|
6
|
|
|
6
|
|
2885
|
use App::Sandy::InterlaceProcesses; |
|
6
|
|
|
|
|
2311
|
|
|
6
|
|
|
|
|
241
|
|
8
|
6
|
|
|
6
|
|
532
|
use App::Sandy::RNG; |
|
6
|
|
|
|
|
13
|
|
|
6
|
|
|
|
|
394
|
|
9
|
6
|
|
|
6
|
|
2965
|
use App::Sandy::WeightedRaffle; |
|
6
|
|
|
|
|
2117
|
|
|
6
|
|
|
|
|
261
|
|
10
|
6
|
|
|
6
|
|
496
|
use App::Sandy::PieceTable; |
|
6
|
|
|
|
|
541
|
|
|
6
|
|
|
|
|
175
|
|
11
|
6
|
|
|
6
|
|
3214
|
use App::Sandy::DB::Handle::Expression; |
|
6
|
|
|
|
|
2038
|
|
|
6
|
|
|
|
|
240
|
|
12
|
6
|
|
|
6
|
|
3051
|
use App::Sandy::DB::Handle::Variation; |
|
6
|
|
|
|
|
2317
|
|
|
6
|
|
|
|
|
243
|
|
13
|
6
|
|
|
6
|
|
51
|
use List::Util 'min'; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
532
|
|
14
|
6
|
|
|
6
|
|
3651
|
use File::Cat 'cat'; |
|
6
|
|
|
|
|
3364
|
|
|
6
|
|
|
|
|
460
|
|
15
|
6
|
|
|
6
|
|
4248
|
use Parallel::ForkManager; |
|
6
|
|
|
|
|
341540
|
|
|
6
|
|
|
|
|
56536
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
with qw/App::Sandy::Role::IO App::Sandy::Role::SeqID/; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = '0.25'; # VERSION |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has 'argv' => ( |
22
|
|
|
|
|
|
|
is => 'ro', |
23
|
|
|
|
|
|
|
isa => 'ArrayRef[Str]', |
24
|
|
|
|
|
|
|
required => 1 |
25
|
|
|
|
|
|
|
); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
has 'truncate' => ( |
28
|
|
|
|
|
|
|
is => 'ro', |
29
|
|
|
|
|
|
|
isa => 'Bool', |
30
|
|
|
|
|
|
|
required => 1 |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has 'seed' => ( |
34
|
|
|
|
|
|
|
is => 'ro', |
35
|
|
|
|
|
|
|
isa => 'Int', |
36
|
|
|
|
|
|
|
required => 1 |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has 'jobs' => ( |
40
|
|
|
|
|
|
|
is => 'ro', |
41
|
|
|
|
|
|
|
isa => 'My:IntGt0', |
42
|
|
|
|
|
|
|
required => 1 |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
has 'prefix' => ( |
46
|
|
|
|
|
|
|
is => 'ro', |
47
|
|
|
|
|
|
|
isa => 'Str', |
48
|
|
|
|
|
|
|
required => 1 |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
has 'join_paired_ends' => ( |
52
|
|
|
|
|
|
|
is => 'ro', |
53
|
|
|
|
|
|
|
isa => 'Bool', |
54
|
|
|
|
|
|
|
required => 1 |
55
|
|
|
|
|
|
|
); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
has 'output_format' => ( |
58
|
|
|
|
|
|
|
is => 'ro', |
59
|
|
|
|
|
|
|
isa => 'My:Format', |
60
|
|
|
|
|
|
|
required => 1 |
61
|
|
|
|
|
|
|
); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
has 'compression_level' => ( |
64
|
|
|
|
|
|
|
is => 'ro', |
65
|
|
|
|
|
|
|
isa => 'My:Level', |
66
|
|
|
|
|
|
|
required => 1 |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
has 'fasta_file' => ( |
70
|
|
|
|
|
|
|
is => 'ro', |
71
|
|
|
|
|
|
|
isa => 'My:Fasta', |
72
|
|
|
|
|
|
|
required => 1 |
73
|
|
|
|
|
|
|
); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
has 'coverage' => ( |
76
|
|
|
|
|
|
|
is => 'ro', |
77
|
|
|
|
|
|
|
isa => 'My:NumGt0', |
78
|
|
|
|
|
|
|
required => 0 |
79
|
|
|
|
|
|
|
); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
has 'number_of_reads' => ( |
82
|
|
|
|
|
|
|
is => 'ro', |
83
|
|
|
|
|
|
|
isa => 'My:IntGt0', |
84
|
|
|
|
|
|
|
required => 0 |
85
|
|
|
|
|
|
|
); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
has 'count_loops_by' => ( |
88
|
|
|
|
|
|
|
is => 'ro', |
89
|
|
|
|
|
|
|
isa => 'My:CountLoopBy', |
90
|
|
|
|
|
|
|
required => 1 |
91
|
|
|
|
|
|
|
); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
has 'strand_bias' => ( |
94
|
|
|
|
|
|
|
is => 'ro', |
95
|
|
|
|
|
|
|
isa => 'My:StrandBias', |
96
|
|
|
|
|
|
|
required => 1 |
97
|
|
|
|
|
|
|
); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
has 'seqid_weight' => ( |
100
|
|
|
|
|
|
|
is => 'ro', |
101
|
|
|
|
|
|
|
isa => 'My:SeqIdWeight', |
102
|
|
|
|
|
|
|
required => 1 |
103
|
|
|
|
|
|
|
); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
has 'expression_matrix' => ( |
106
|
|
|
|
|
|
|
is => 'ro', |
107
|
|
|
|
|
|
|
isa => 'Str', |
108
|
|
|
|
|
|
|
required => 0 |
109
|
|
|
|
|
|
|
); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
has 'genomic_variation' => ( |
112
|
|
|
|
|
|
|
is => 'ro', |
113
|
|
|
|
|
|
|
isa => 'ArrayRef[Str]', |
114
|
|
|
|
|
|
|
required => 0 |
115
|
|
|
|
|
|
|
); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
has '_genomic_variation_names' => ( |
118
|
|
|
|
|
|
|
is => 'ro', |
119
|
|
|
|
|
|
|
isa => 'Maybe[Str]', |
120
|
|
|
|
|
|
|
builder => '_build_genomic_variation_names', |
121
|
|
|
|
|
|
|
lazy_build => 1 |
122
|
|
|
|
|
|
|
); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
has 'seq' => ( |
125
|
|
|
|
|
|
|
is => 'ro', |
126
|
|
|
|
|
|
|
isa => 'App::Sandy::Seq::SingleEnd | App::Sandy::Seq::PairedEnd', |
127
|
|
|
|
|
|
|
required => 1, |
128
|
|
|
|
|
|
|
handles => [ qw{ sprint_seq gen_sam_header gen_eof_marker } ] |
129
|
|
|
|
|
|
|
); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
has '_fasta' => ( |
132
|
|
|
|
|
|
|
is => 'ro', |
133
|
|
|
|
|
|
|
isa => 'My:IdxFasta', |
134
|
|
|
|
|
|
|
builder => '_build_fasta', |
135
|
|
|
|
|
|
|
lazy_build => 1 |
136
|
|
|
|
|
|
|
); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
has '_fasta_tree' => ( |
139
|
|
|
|
|
|
|
traits => ['Hash'], |
140
|
|
|
|
|
|
|
is => 'ro', |
141
|
|
|
|
|
|
|
isa => 'HashRef[ArrayRef]', |
142
|
|
|
|
|
|
|
default => sub { {} }, |
143
|
|
|
|
|
|
|
handles => { |
144
|
|
|
|
|
|
|
_set_fasta_tree => 'set', |
145
|
|
|
|
|
|
|
_get_fasta_tree => 'get', |
146
|
|
|
|
|
|
|
_exists_fasta_tree => 'exists', |
147
|
|
|
|
|
|
|
_fasta_tree_pairs => 'kv', |
148
|
|
|
|
|
|
|
_has_no_fasta_tree => 'is_empty' |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
has '_fasta_rtree' => ( |
153
|
|
|
|
|
|
|
traits => ['Hash'], |
154
|
|
|
|
|
|
|
is => 'ro', |
155
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
156
|
|
|
|
|
|
|
default => sub { {} }, |
157
|
|
|
|
|
|
|
handles => { |
158
|
|
|
|
|
|
|
_set_fasta_rtree => 'set', |
159
|
|
|
|
|
|
|
_get_fasta_rtree => 'get', |
160
|
|
|
|
|
|
|
_delete_fasta_rtree => 'delete', |
161
|
|
|
|
|
|
|
_exists_fasta_rtree => 'exists', |
162
|
|
|
|
|
|
|
_fasta_rtree_pairs => 'kv', |
163
|
|
|
|
|
|
|
_has_no_fasta_rtree => 'is_empty' |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
has '_fasta_blacklist' => ( |
168
|
|
|
|
|
|
|
is => 'ro', |
169
|
|
|
|
|
|
|
isa => 'My:FastaBlackList', |
170
|
|
|
|
|
|
|
builder => '_build_fasta_blacklist', |
171
|
|
|
|
|
|
|
lazy_build => 1 |
172
|
|
|
|
|
|
|
); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
has '_seqname' => ( |
175
|
|
|
|
|
|
|
traits => ['Hash'], |
176
|
|
|
|
|
|
|
is => 'ro', |
177
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
178
|
|
|
|
|
|
|
default => sub { {} }, |
179
|
|
|
|
|
|
|
handles => { |
180
|
|
|
|
|
|
|
_set_seqname => 'set', |
181
|
|
|
|
|
|
|
_get_seqname => 'get' |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
has '_piece_table' => ( |
186
|
|
|
|
|
|
|
is => 'ro', |
187
|
|
|
|
|
|
|
isa => 'HashRef[HashRef[My:PieceTable]]', |
188
|
|
|
|
|
|
|
builder => '_build_piece_table', |
189
|
|
|
|
|
|
|
lazy_build => 1 |
190
|
|
|
|
|
|
|
); |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
has '_strand' => ( |
193
|
|
|
|
|
|
|
is => 'ro', |
194
|
|
|
|
|
|
|
isa => 'CodeRef', |
195
|
|
|
|
|
|
|
builder => '_build_strand', |
196
|
|
|
|
|
|
|
lazy_build => 1 |
197
|
|
|
|
|
|
|
); |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
has '_seqid_raffle' => ( |
200
|
|
|
|
|
|
|
is => 'ro', |
201
|
|
|
|
|
|
|
isa => 'CodeRef', |
202
|
|
|
|
|
|
|
builder => '_build_seqid_raffle', |
203
|
|
|
|
|
|
|
lazy_build => 1 |
204
|
|
|
|
|
|
|
); |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub BUILD { |
207
|
20
|
|
|
20
|
0
|
40
|
my $self = shift; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# If seqid_weight is 'count', then expression_matrix must be defined |
210
|
20
|
50
|
33
|
|
|
575
|
if ($self->seqid_weight eq 'count' and not defined $self->expression_matrix) { |
211
|
0
|
|
|
|
|
0
|
croak "seqid_weight=count requires a expression_matrix\n"; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# If count_loops_by is 'coverage', then coverage must be defined. Else if |
215
|
|
|
|
|
|
|
# it is equal to 'number_of_reads', then number_of_reads must be defined |
216
|
20
|
50
|
33
|
|
|
525
|
if ($self->count_loops_by eq 'coverage' and not defined $self->coverage) { |
|
|
50
|
33
|
|
|
|
|
217
|
0
|
|
|
|
|
0
|
croak "count_loops_by=coverage requires a coverage number\n"; |
218
|
|
|
|
|
|
|
} elsif ($self->count_loops_by eq 'number_of_reads' and not defined $self->number_of_reads) { |
219
|
0
|
|
|
|
|
0
|
croak "count_loops_by=number_of_reads requires a number_of_reads number\n"; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
## Just to ensure that the lazy attributes are built before &new returns |
223
|
20
|
|
|
|
|
610
|
$self->_piece_table; |
224
|
20
|
|
|
|
|
525
|
$self->_seqid_raffle; |
225
|
20
|
|
|
|
|
510
|
$self->_fasta; |
226
|
20
|
|
|
|
|
530
|
$self->_fasta_blacklist; |
227
|
20
|
|
|
|
|
535
|
$self->_strand; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub _build_strand { |
231
|
20
|
|
|
20
|
|
25
|
my $self = shift; |
232
|
20
|
|
|
|
|
30
|
my $strand_sub; |
233
|
|
|
|
|
|
|
|
234
|
20
|
50
|
|
|
|
525
|
if ($self->strand_bias eq 'plus') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
235
|
0
|
|
|
0
|
|
0
|
$strand_sub = sub {1}; |
|
0
|
|
|
|
|
0
|
|
236
|
|
|
|
|
|
|
} elsif ($self->strand_bias eq 'minus') { |
237
|
0
|
|
|
0
|
|
0
|
$strand_sub = sub {0}; |
|
0
|
|
|
|
|
0
|
|
238
|
|
|
|
|
|
|
} elsif ($self->strand_bias eq 'random') { |
239
|
|
|
|
|
|
|
# Use App::Sandy::RNG |
240
|
20
|
|
|
6840
|
|
140
|
$strand_sub = sub { $_[0]->get_n(2) }; |
|
6840
|
|
|
|
|
37489
|
|
241
|
|
|
|
|
|
|
} else { |
242
|
0
|
|
|
|
|
0
|
croak sprintf "Unknown option '%s' for strand bias\n", |
243
|
|
|
|
|
|
|
$self->strand_bias; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
20
|
|
|
|
|
495
|
return $strand_sub; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
sub _clean_fasta_id { |
250
|
100
|
|
|
100
|
|
180
|
my ($self, $id_ref) = @_; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# Remove '>' |
253
|
100
|
|
|
|
|
310
|
$$id_ref =~ s/^>//; |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# Chomp |
256
|
100
|
|
|
|
|
325
|
$$id_ref =~ s/^\s+|\s+$//g; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# Remove id version |
259
|
100
|
|
|
|
|
220
|
$$id_ref =~ s/\.\d+$//; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub _index_fasta { |
263
|
20
|
|
|
20
|
|
45
|
my $self = shift; |
264
|
20
|
|
|
|
|
550
|
my $fasta = $self->fasta_file; |
265
|
|
|
|
|
|
|
|
266
|
20
|
|
|
|
|
90
|
my $fh = $self->with_open_r($fasta); |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# indexed_genome = ID => (seq, len) |
269
|
20
|
|
|
|
|
65
|
my %indexed_fasta; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# >ID|PID as in gencode transcripts |
272
|
|
|
|
|
|
|
my %fasta_rtree; |
273
|
20
|
|
|
|
|
0
|
my $id; |
274
|
|
|
|
|
|
|
|
275
|
20
|
|
|
|
|
780
|
while (<$fh>) { |
276
|
860
|
|
|
|
|
1355
|
chomp; |
277
|
860
|
50
|
|
|
|
1455
|
next if /^;/; |
278
|
860
|
100
|
|
|
|
1545
|
if (/^>/) { |
279
|
100
|
|
|
|
|
340
|
my @fields = split /\|/; |
280
|
100
|
|
|
|
|
165
|
$id = $fields[0]; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# Remove '>', chomp and remove id version |
283
|
100
|
|
|
|
|
295
|
$self->_clean_fasta_id(\$id); |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
# Seq ID standardization in order to manage comparations |
286
|
|
|
|
|
|
|
# between chr1, Chr1, CHR1, 1 etc; |
287
|
100
|
|
|
|
|
325
|
my $std_id = $self->with_std_seqid($id); |
288
|
100
|
|
|
|
|
3780
|
$self->_set_seqname( |
289
|
|
|
|
|
|
|
$id => $std_id, |
290
|
|
|
|
|
|
|
$std_id => $id |
291
|
|
|
|
|
|
|
); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# It is necessary to catch gene -> transcript relation |
294
|
|
|
|
|
|
|
# # TODO: Make a hash tarit for indexed fasta |
295
|
100
|
50
|
|
|
|
540
|
if (defined $fields[1]) { |
296
|
0
|
|
|
|
|
0
|
my $pid = $fields[1]; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# Chomp and remove id version |
299
|
0
|
|
|
|
|
0
|
$self->_clean_fasta_id(\$pid); |
300
|
|
|
|
|
|
|
|
301
|
0
|
|
|
|
|
0
|
$fasta_rtree{$id} = $pid; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} else { |
304
|
760
|
50
|
|
|
|
1160
|
die "Error reading fasta file '$fasta': Not defined id" |
305
|
|
|
|
|
|
|
unless defined $id; |
306
|
760
|
|
|
|
|
2465
|
$indexed_fasta{$id}{seq} .= uc($_); |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
20
|
|
|
|
|
125
|
for (keys %indexed_fasta) { |
311
|
100
|
|
|
|
|
200
|
$indexed_fasta{$_}{size} = length $indexed_fasta{$_}{seq}; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
20
|
50
|
|
|
|
60
|
unless (%indexed_fasta) { |
315
|
0
|
|
|
|
|
0
|
die "Error parsing '$fasta'. Maybe the file is empty\n"; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
$fh->close |
319
|
20
|
50
|
|
|
|
160
|
or die "Cannot close file $fasta: $!\n"; |
320
|
|
|
|
|
|
|
|
321
|
20
|
50
|
|
|
|
545
|
$self->_set_fasta_rtree(%fasta_rtree) if %fasta_rtree; |
322
|
20
|
|
|
|
|
110
|
return \%indexed_fasta; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub _build_fasta { |
326
|
20
|
|
|
20
|
|
35
|
my $self = shift; |
327
|
20
|
|
|
|
|
530
|
my $fasta = $self->fasta_file; |
328
|
|
|
|
|
|
|
|
329
|
20
|
|
|
|
|
110
|
log_msg ":: Indexing fasta file '$fasta' ..."; |
330
|
20
|
|
|
|
|
85
|
my $indexed_fasta = $self->_index_fasta; |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# Validate genome about the read size required |
333
|
20
|
|
|
|
|
105
|
log_msg ":: Validating fasta file '$fasta' ..."; |
334
|
|
|
|
|
|
|
# Entries to remove |
335
|
20
|
|
|
|
|
30
|
my @blacklist; |
336
|
|
|
|
|
|
|
|
337
|
20
|
50
|
|
|
|
695
|
unless ($self->truncate) { |
338
|
20
|
|
|
|
|
70
|
for my $id (keys %$indexed_fasta) { |
339
|
100
|
|
|
|
|
170
|
my $index_size = $indexed_fasta->{$id}{size}; |
340
|
100
|
|
|
|
|
2365
|
my $class = ref $self->seq; |
341
|
|
|
|
|
|
|
|
342
|
100
|
100
|
|
|
|
255
|
if ($class eq 'App::Sandy::Seq::SingleEnd') { |
|
|
50
|
|
|
|
|
|
343
|
50
|
|
|
|
|
1975
|
my $read_mean = $self->seq->read_mean; |
344
|
50
|
50
|
|
|
|
135
|
if ($index_size < $read_mean) { |
345
|
0
|
|
|
|
|
0
|
log_msg ":: Parsing fasta file '$fasta': Seqid sequence length (>$id => $index_size) lesser than required read mean ($read_mean)"; |
346
|
0
|
|
|
|
|
0
|
delete $indexed_fasta->{$id}; |
347
|
0
|
|
|
|
|
0
|
push @blacklist => $id; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
} elsif ($class eq 'App::Sandy::Seq::PairedEnd') { |
350
|
50
|
|
|
|
|
1400
|
my $fragment_mean = $self->seq->fragment_mean; |
351
|
50
|
50
|
|
|
|
190
|
if ($index_size < $fragment_mean) { |
352
|
0
|
|
|
|
|
0
|
log_msg ":: Parsing fasta file '$fasta': Seqid sequence length (>$id => $index_size) lesser than required fragment mean ($fragment_mean)"; |
353
|
0
|
|
|
|
|
0
|
delete $indexed_fasta->{$id}; |
354
|
0
|
|
|
|
|
0
|
push @blacklist => $id; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
} else { |
357
|
0
|
|
|
|
|
0
|
croak "Unknown option '$class' for sequencing type\n"; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
20
|
50
|
|
|
|
75
|
unless (%$indexed_fasta) { |
363
|
0
|
|
|
|
|
0
|
die sprintf "Fasta file '%s' has no valid entry\n" => $self->fasta_file; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# If fasta_rtree has entries |
367
|
20
|
50
|
|
|
|
755
|
unless ($self->_has_no_fasta_rtree) { |
368
|
|
|
|
|
|
|
# Remove no valid entries from id -> pid relation |
369
|
0
|
0
|
|
|
|
0
|
$self->_delete_fasta_rtree(@blacklist) if @blacklist; |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
20
|
|
|
|
|
520
|
return $indexed_fasta; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub _populate_fasta_tree { |
376
|
20
|
|
|
20
|
|
45
|
my $self = shift; |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# If fasta_rtree has entries |
379
|
20
|
50
|
|
|
|
770
|
unless ($self->_has_no_fasta_rtree) { |
380
|
|
|
|
|
|
|
# Build parent -> child ids relation |
381
|
0
|
|
|
|
|
0
|
my %fasta_tree; |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# Reverse fasta_rtree to pid -> \@ids |
384
|
0
|
|
|
|
|
0
|
for my $pair ($self->_fasta_rtree_pairs) { |
385
|
0
|
|
|
|
|
0
|
my ($id, $pid) = (@$pair); |
386
|
0
|
|
|
|
|
0
|
push @{ $fasta_tree{$pid} } => $id; |
|
0
|
|
|
|
|
0
|
|
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
0
|
|
|
|
|
0
|
$self->_set_fasta_tree(%fasta_tree); |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub _build_fasta_blacklist { |
394
|
20
|
|
|
20
|
|
30
|
my $self = shift; |
395
|
|
|
|
|
|
|
|
396
|
20
|
|
|
|
|
85
|
log_msg ":: Index unidentified (NNN..) regions"; |
397
|
|
|
|
|
|
|
|
398
|
20
|
|
|
|
|
490
|
my $indexed_fasta = $self->_fasta; |
399
|
20
|
|
|
|
|
560
|
my $class = ref $self->seq; |
400
|
|
|
|
|
|
|
|
401
|
20
|
|
|
|
|
45
|
my %fasta_blacklist; |
402
|
|
|
|
|
|
|
my @blacklist_id; |
403
|
|
|
|
|
|
|
|
404
|
20
|
|
|
|
|
70
|
for my $id (keys %$indexed_fasta) { |
405
|
100
|
|
|
|
|
1485
|
my $seq = $indexed_fasta->{$id}{seq}; |
406
|
100
|
|
|
|
|
180
|
my $seq_len = $indexed_fasta->{$id}{size}; |
407
|
|
|
|
|
|
|
|
408
|
100
|
|
|
|
|
130
|
my $offset = 0; |
409
|
100
|
|
|
|
|
170
|
my ($st, $en) = (0, 0); |
410
|
100
|
|
|
|
|
145
|
my $pos = 0; |
411
|
100
|
|
|
|
|
115
|
my $init = 0; |
412
|
100
|
|
|
|
|
135
|
my $len = 0; |
413
|
100
|
|
|
|
|
110
|
my @stack; |
414
|
|
|
|
|
|
|
|
415
|
100
|
|
|
|
|
275
|
while (($pos = index $seq, "N", $offset) != -1) { |
416
|
0
|
0
|
|
|
|
0
|
unless ($init) { |
417
|
0
|
|
|
|
|
0
|
$st = $pos; |
418
|
0
|
|
|
|
|
0
|
$en = $pos; |
419
|
0
|
|
|
|
|
0
|
$init = 1; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
0
|
0
|
|
|
|
0
|
if ($en < ($pos - 1)) { |
423
|
0
|
|
|
|
|
0
|
push @stack => [$st, $en]; |
424
|
0
|
|
|
|
|
0
|
$len += $en - $st + 1; |
425
|
0
|
|
|
|
|
0
|
$st = $pos; |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
0
|
$en = $pos; |
429
|
0
|
|
|
|
|
0
|
$offset = $pos + 1; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
|
432
|
100
|
50
|
|
|
|
170
|
if ($init) { |
433
|
0
|
|
|
|
|
0
|
push @stack => [$st, $en]; |
434
|
0
|
|
|
|
|
0
|
$len += $en - $st + 1; |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
100
|
100
|
|
|
|
2625
|
my $read_len = $class eq 'App::Sandy::Seq::SingleEnd' |
438
|
|
|
|
|
|
|
? $self->seq->read_mean |
439
|
|
|
|
|
|
|
: $self->seq->fragment_mean; |
440
|
|
|
|
|
|
|
|
441
|
100
|
50
|
|
|
|
190
|
if ($len > ($seq_len - $read_len)) { |
442
|
0
|
|
|
|
|
0
|
log_msg ":: Seqid '$id' has too much NNN and will not be used"; |
443
|
0
|
|
|
|
|
0
|
delete $indexed_fasta->{$id}; |
444
|
0
|
|
|
|
|
0
|
push @blacklist_id => $id; |
445
|
|
|
|
|
|
|
} else { |
446
|
100
|
|
|
|
|
1165
|
$fasta_blacklist{$id} = \@stack; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
20
|
50
|
|
|
|
75
|
unless (%$indexed_fasta) { |
451
|
0
|
|
|
|
|
0
|
die sprintf "Fasta file '%s' has too much NNN regions\n" => $self->fasta_file; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
# If fasta_rtree has entries |
455
|
20
|
50
|
|
|
|
750
|
unless ($self->_has_no_fasta_rtree) { |
456
|
|
|
|
|
|
|
# Remove no valid entries from id -> pid relation |
457
|
0
|
0
|
|
|
|
0
|
$self->_delete_fasta_rtree(@blacklist_id) if @blacklist_id; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
20
|
|
|
|
|
540
|
return \%fasta_blacklist; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
sub _retrieve_expression_matrix { |
464
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
0
|
my $expression = App::Sandy::DB::Handle::Expression->new; |
467
|
0
|
|
|
|
|
0
|
my $indexed_file = $expression->retrievedb($self->expression_matrix); |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
# Remove transcript id version |
470
|
0
|
|
|
|
|
0
|
for my $id (keys %$indexed_file) { |
471
|
0
|
|
|
|
|
0
|
my $old_id = $id; |
472
|
0
|
|
|
|
|
0
|
$self->_clean_fasta_id(\$id); |
473
|
0
|
0
|
|
|
|
0
|
$indexed_file->{$id} = delete $indexed_file->{$old_id} |
474
|
|
|
|
|
|
|
if $old_id ne $id; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
0
|
return $indexed_file; |
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub _build_seqid_raffle { |
481
|
20
|
|
|
20
|
|
85
|
my $self = shift; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# Get the piece table |
484
|
20
|
|
|
|
|
545
|
my $piece_table = $self->_piece_table; |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
# The builded function |
487
|
20
|
|
|
|
|
45
|
my $seqid_sub; |
488
|
|
|
|
|
|
|
|
489
|
20
|
50
|
|
|
|
515
|
if ($self->seqid_weight eq 'same') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
490
|
0
|
|
|
0
|
|
0
|
my ($keys, $weights) = $self->_populate_key_weight($piece_table, sub { 1 }); |
|
0
|
|
|
|
|
0
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# If weight == 1 means that there are 2 keys for |
493
|
|
|
|
|
|
|
# the same seq_id. |
494
|
|
|
|
|
|
|
# If weight == 2 means that there is only one key |
495
|
|
|
|
|
|
|
# for the seq_id, so I double that key |
496
|
0
|
|
|
|
|
0
|
for (my $i = 0; $i < @$weights; $i++) { |
497
|
0
|
0
|
|
|
|
0
|
if ($weights->[$i] > 1) { |
498
|
0
|
|
|
|
|
0
|
push @$keys => $keys->[$i]; |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
0
|
|
|
|
|
0
|
my $keys_size = scalar @$keys; |
503
|
|
|
|
|
|
|
# Use App::Sandy::RNG |
504
|
0
|
|
|
0
|
|
0
|
$seqid_sub = sub { $keys->[$_[0]->get_n($keys_size)] }; |
|
0
|
|
|
|
|
0
|
|
505
|
|
|
|
|
|
|
} elsif ($self->seqid_weight eq 'count') { |
506
|
|
|
|
|
|
|
# Catch expression-matrix entry from database |
507
|
0
|
|
|
|
|
0
|
my $indexed_file = $self->_retrieve_expression_matrix; |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# Catch indexed fasta |
510
|
0
|
|
|
|
|
0
|
my $indexed_fasta = $self->_fasta; |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
# Validate expression_matrix |
513
|
0
|
|
|
|
|
0
|
for my $id (keys %$indexed_file) { |
514
|
|
|
|
|
|
|
# If not exists into indexed_fasta, it must then exist into fasta_tree |
515
|
0
|
0
|
0
|
|
|
0
|
unless (exists $piece_table->{$id} || $self->_exists_fasta_tree($id)) { |
516
|
0
|
|
|
|
|
0
|
log_msg sprintf ":: Ignoring seqid '%s' from expression-matrix '%s': It is not found into the indexed fasta" |
517
|
|
|
|
|
|
|
=> $id, $self->expression_matrix; |
518
|
0
|
|
|
|
|
0
|
delete $indexed_file->{$id}; |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
|
522
|
0
|
0
|
|
|
|
0
|
unless (%$indexed_file) { |
523
|
0
|
|
|
|
|
0
|
die sprintf "No valid seqid entry of the expression-matrix '%s' is recorded into the indexed fasta\n" |
524
|
|
|
|
|
|
|
=> $self->expression_matrix; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
|
527
|
0
|
|
|
|
|
0
|
my (%ptable_ind, %ptable_cluster); |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# Split indexed_file seq_ids between those |
530
|
|
|
|
|
|
|
# into piece_table and those that represents a cluster |
531
|
|
|
|
|
|
|
# of seq_ids as in gene -> transcript relationship |
532
|
0
|
|
|
|
|
0
|
for my $seq_id (keys %$indexed_file) { |
533
|
0
|
0
|
|
|
|
0
|
if (exists $piece_table->{$seq_id}) { |
534
|
0
|
|
|
|
|
0
|
$ptable_ind{$seq_id} = $piece_table->{$seq_id}; |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
} else { |
537
|
0
|
|
|
|
|
0
|
my $ids = $self->_get_fasta_tree($seq_id); |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# Bug catcher |
540
|
0
|
0
|
|
|
|
0
|
unless (@$ids) { |
541
|
0
|
|
|
|
|
0
|
croak "seq_id '$seq_id' not found into piece_table"; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
0
|
|
|
|
|
0
|
$ptable_cluster{$seq_id} = $ids; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# Let's calculate the weight taking in acount |
549
|
|
|
|
|
|
|
# the size increase/decrease |
550
|
|
|
|
|
|
|
my $calc_ind_weight = sub { |
551
|
0
|
|
|
0
|
|
0
|
my ($seq_id, $type) = @_; |
552
|
|
|
|
|
|
|
|
553
|
0
|
|
|
|
|
0
|
my $counts = $indexed_file->{$seq_id}; |
554
|
0
|
|
|
|
|
0
|
my $size = $piece_table->{$seq_id}{$type}{size}; |
555
|
0
|
|
|
|
|
0
|
my $fasta_size = $indexed_fasta->{$seq_id}{size}; |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
# Correct the weight according to the |
558
|
|
|
|
|
|
|
# genomic variation change by the ratio |
559
|
|
|
|
|
|
|
# between the table size and fasta size |
560
|
0
|
|
|
|
|
0
|
my $factor = $size / $fasta_size; |
561
|
|
|
|
|
|
|
|
562
|
0
|
|
|
|
|
0
|
return $counts * $factor; |
563
|
0
|
|
|
|
|
0
|
}; |
564
|
|
|
|
|
|
|
|
565
|
0
|
|
|
|
|
0
|
my ($keys, $weights); |
566
|
|
|
|
|
|
|
|
567
|
0
|
0
|
|
|
|
0
|
if (%ptable_ind) { |
568
|
0
|
|
|
|
|
0
|
($keys, $weights) = $self->_populate_key_weight(\%ptable_ind, |
569
|
|
|
|
|
|
|
$calc_ind_weight); |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
# If there are seq_id cluster like, then its is |
573
|
|
|
|
|
|
|
# time to calculate these weights |
574
|
0
|
|
|
|
|
0
|
for my $seq_id (sort keys %ptable_cluster) { |
575
|
0
|
|
|
|
|
0
|
my %ptable; |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# Slice piece_table hash |
578
|
0
|
|
|
|
|
0
|
my $ids = $ptable_cluster{$seq_id}; |
579
|
0
|
|
|
|
|
0
|
@ptable{@$ids} = @$piece_table{@$ids}; |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
# total size among all ids of cluster |
582
|
0
|
|
|
|
|
0
|
my %total; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
# Calculate the total size by type |
585
|
0
|
|
|
|
|
0
|
for my $type_h (values %ptable) { |
586
|
0
|
|
|
|
|
0
|
for my $type (keys %$type_h) { |
587
|
0
|
|
|
|
|
0
|
$total{$type} += $type_h->{$type}{size}; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
# Calculate the weight taking in acount the size increase/decrease |
592
|
|
|
|
|
|
|
# and the ratio between the total size by type and the table size. |
593
|
|
|
|
|
|
|
# The problem here is that I must divide the 'counts' for some 'seq_id' |
594
|
|
|
|
|
|
|
# among all ids that belong to it |
595
|
|
|
|
|
|
|
my $calc_cluster_weight = sub { |
596
|
0
|
|
|
0
|
|
0
|
my ($id, $type) = @_; |
597
|
|
|
|
|
|
|
|
598
|
0
|
|
|
|
|
0
|
my $counts = $indexed_file->{$seq_id}; |
599
|
0
|
|
|
|
|
0
|
my $size = $piece_table->{$id}{$type}{size}; |
600
|
0
|
|
|
|
|
0
|
my $fasta_size = $indexed_fasta->{$id}{size}; |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
# Divide the counts among all ids |
603
|
0
|
|
|
|
|
0
|
my $ratio = $size / $total{$type}; |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
# Correct the weight according to the size |
606
|
0
|
|
|
|
|
0
|
my $factor = $size / $fasta_size; |
607
|
|
|
|
|
|
|
|
608
|
0
|
|
|
|
|
0
|
return $counts * $factor * $ratio; |
609
|
0
|
|
|
|
|
0
|
}; |
610
|
|
|
|
|
|
|
|
611
|
0
|
|
|
|
|
0
|
my ($k, $w) = $self->_populate_key_weight(\%ptable, |
612
|
|
|
|
|
|
|
$calc_cluster_weight); |
613
|
|
|
|
|
|
|
|
614
|
0
|
|
|
|
|
0
|
push @$keys => @$k; |
615
|
0
|
|
|
|
|
0
|
push @$weights => @$w; |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
0
|
0
|
0
|
|
|
0
|
unless (@$keys && @$weights) { |
619
|
0
|
|
|
|
|
0
|
croak "No keys weights have been set"; |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# It is very necessary in order |
623
|
|
|
|
|
|
|
# to avoid truncation of numbers |
624
|
|
|
|
|
|
|
# between zero and one |
625
|
0
|
|
|
|
|
0
|
$self->_round_weight($weights); |
626
|
|
|
|
|
|
|
|
627
|
0
|
|
|
|
|
0
|
my $raffler = App::Sandy::WeightedRaffle->new( |
628
|
|
|
|
|
|
|
'weights' => $weights, |
629
|
|
|
|
|
|
|
'keys' => $keys |
630
|
|
|
|
|
|
|
); |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
# Use App::Sandy::Rand |
633
|
0
|
|
|
0
|
|
0
|
$seqid_sub = sub { $raffler->weighted_raffle($_[0]) }; |
|
0
|
|
|
|
|
0
|
|
634
|
|
|
|
|
|
|
} elsif ($self->seqid_weight eq 'length') { |
635
|
|
|
|
|
|
|
my $calc_weight = sub { |
636
|
100
|
|
|
100
|
|
165
|
my ($seq_id, $type) = @_; |
637
|
100
|
|
|
|
|
190
|
return $piece_table->{$seq_id}{$type}{size}; |
638
|
20
|
|
|
|
|
120
|
}; |
639
|
|
|
|
|
|
|
|
640
|
20
|
|
|
|
|
70
|
my ($keys, $weights) = $self->_populate_key_weight($piece_table, |
641
|
|
|
|
|
|
|
$calc_weight); |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
# Just in case ... |
644
|
20
|
|
|
|
|
65
|
$self->_round_weight($weights); |
645
|
|
|
|
|
|
|
|
646
|
20
|
|
|
|
|
780
|
my $raffler = App::Sandy::WeightedRaffle->new( |
647
|
|
|
|
|
|
|
weights => $weights, |
648
|
|
|
|
|
|
|
keys => $keys |
649
|
|
|
|
|
|
|
); |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# Use App::Sandy::Rand |
652
|
20
|
|
|
6840
|
|
215
|
$seqid_sub = sub { $raffler->weighted_raffle($_[0]) }; |
|
6840
|
|
|
|
|
25781
|
|
653
|
|
|
|
|
|
|
} else { |
654
|
0
|
|
|
|
|
0
|
croak sprintf "Unknown option '%s' for seqid_weight\n", |
655
|
|
|
|
|
|
|
$self->seqid_weight; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
20
|
|
|
|
|
590
|
return $seqid_sub; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
sub _round_weight { |
662
|
20
|
|
|
20
|
|
50
|
my ($self, $weights) = @_; |
663
|
|
|
|
|
|
|
|
664
|
20
|
|
|
|
|
70
|
my $min = min @$weights; |
665
|
|
|
|
|
|
|
|
666
|
20
|
50
|
|
|
|
60
|
if ($min <= 0) { |
667
|
0
|
|
|
|
|
0
|
croak "min weight le to zero: $min"; |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
|
670
|
20
|
50
|
|
|
|
60
|
my $factor = $min < 1 |
671
|
|
|
|
|
|
|
? (1 / $min) |
672
|
|
|
|
|
|
|
: 1; |
673
|
|
|
|
|
|
|
|
674
|
20
|
|
|
|
|
40
|
for my $weight (@$weights) { |
675
|
100
|
|
|
|
|
190
|
$weight = int($weight * $factor + 0.5); |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
sub _populate_key_weight { |
680
|
20
|
|
|
20
|
|
50
|
my ($self, $piece_table, $calc_weight) = @_; |
681
|
|
|
|
|
|
|
|
682
|
20
|
|
|
|
|
35
|
my (@keys, @weights); |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# It needs to be sorted in order to the |
685
|
|
|
|
|
|
|
# seed works |
686
|
20
|
|
|
|
|
105
|
for my $seq_id (sort keys %$piece_table) { |
687
|
100
|
|
|
|
|
155
|
my $type_h = $piece_table->{$seq_id}; |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
# If there is no alternative seq_id, then |
690
|
|
|
|
|
|
|
# set a factor to correct the size. |
691
|
|
|
|
|
|
|
# It is necessary because the seq_ids with |
692
|
|
|
|
|
|
|
# alternative and reference will double its |
693
|
|
|
|
|
|
|
# own coverage |
694
|
100
|
50
|
|
|
|
195
|
my $factor = scalar keys %$type_h == 1 |
695
|
|
|
|
|
|
|
? 2 |
696
|
|
|
|
|
|
|
: 1; |
697
|
|
|
|
|
|
|
|
698
|
100
|
|
|
|
|
215
|
for my $type (sort keys %$type_h) { |
699
|
|
|
|
|
|
|
|
700
|
100
|
|
|
|
|
290
|
my %key = ( |
701
|
|
|
|
|
|
|
'seq_id' => $seq_id, |
702
|
|
|
|
|
|
|
'type' => $type |
703
|
|
|
|
|
|
|
); |
704
|
|
|
|
|
|
|
|
705
|
100
|
|
|
|
|
150
|
my $weight = $calc_weight->($seq_id, $type); |
706
|
|
|
|
|
|
|
|
707
|
100
|
|
|
|
|
160
|
push @keys => \%key; |
708
|
100
|
|
|
|
|
230
|
push @weights => $weight * $factor; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
|
712
|
20
|
|
|
|
|
80
|
return (\@keys, \@weights); |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
sub _build_genomic_variation_names { |
716
|
20
|
|
|
20
|
|
40
|
my $self = shift; |
717
|
20
|
50
|
|
|
|
665
|
if ($self->genomic_variation) { |
718
|
0
|
|
|
|
|
0
|
return sprintf "[%s]", => join ", ", @{ $self->genomic_variation }; |
|
0
|
|
|
|
|
0
|
|
719
|
|
|
|
|
|
|
} |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub _retrieve_genomic_variation { |
723
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
724
|
0
|
|
|
|
|
0
|
my $variation = App::Sandy::DB::Handle::Variation->new; |
725
|
0
|
|
|
|
|
0
|
return $variation->retrievedb($self->genomic_variation); |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
sub _build_piece_table { |
729
|
20
|
|
|
20
|
|
45
|
my $self = shift; |
730
|
|
|
|
|
|
|
|
731
|
20
|
|
|
|
|
590
|
my $genomic_variation = $self->_genomic_variation_names; |
732
|
20
|
|
|
|
|
30
|
my $indexed_snv; |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
# Retrieve genomic variation if the user provided it |
735
|
20
|
50
|
|
|
|
50
|
if (defined $genomic_variation) { |
736
|
0
|
|
|
|
|
0
|
$indexed_snv = $self->_retrieve_genomic_variation; |
737
|
0
|
|
|
|
|
0
|
log_msg ":: Validate genomic variation '$genomic_variation' against indexed fasta ..."; |
738
|
0
|
|
|
|
|
0
|
$self->_validate_indexed_snv_against_fasta($indexed_snv); |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
# Catch index fasta |
742
|
20
|
|
|
|
|
525
|
my $indexed_fasta = $self->_fasta; |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
# Build piece table |
745
|
20
|
|
|
|
|
35
|
my %piece_table; |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
# Let's construct the piece_table |
748
|
20
|
|
|
|
|
75
|
log_msg ":: Build piece table ..."; |
749
|
|
|
|
|
|
|
|
750
|
20
|
|
|
|
|
150
|
while (my ($seq_id, $fasta_h) = each %$indexed_fasta) { |
751
|
100
|
|
|
|
|
170
|
my $seq = \$fasta_h->{seq}; |
752
|
100
|
|
|
|
|
3415
|
my $std_seq_id = $self->_get_seqname($seq_id); |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# Initialize piece tables for $seq_id ref |
755
|
100
|
|
|
|
|
2710
|
$piece_table{$seq_id}{ref}{table} = App::Sandy::PieceTable->new(orig => $seq); |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
# If there is indexed_snv for seq_id, then construct the piece table with it |
758
|
100
|
50
|
33
|
|
|
465
|
if (defined $indexed_snv && defined $indexed_snv->{$std_seq_id}) { |
759
|
0
|
|
|
|
|
0
|
my $snvs = $indexed_snv->{$std_seq_id}; |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
# Filter only the homozygotic snvs to feed reference seq_id |
762
|
0
|
|
|
|
|
0
|
my @snvs_homo = grep { $_->{plo} eq 'HO' } @$snvs; |
|
0
|
|
|
|
|
0
|
|
763
|
|
|
|
|
|
|
|
764
|
0
|
0
|
|
|
|
0
|
if (@snvs_homo) { |
765
|
|
|
|
|
|
|
# Populate reference seq_id |
766
|
0
|
|
|
|
|
0
|
$self->_populate_piece_table($piece_table{$seq_id}{ref}{table}, \@snvs_homo); |
767
|
|
|
|
|
|
|
} |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
# Initialize piece tables for $seq_id alt |
770
|
0
|
|
|
|
|
0
|
$piece_table{$seq_id}{alt}{table} = App::Sandy::PieceTable->new(orig => $seq); |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
# Populate alternative seq_id |
773
|
0
|
|
|
|
|
0
|
$self->_populate_piece_table($piece_table{$seq_id}{alt}{table}, $snvs); |
774
|
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
# Initialize the logical offsets and valodate the |
778
|
|
|
|
|
|
|
# new size due to the genomic variation |
779
|
|
|
|
|
|
|
|
780
|
20
|
|
|
|
|
60
|
my @blacklist; |
781
|
|
|
|
|
|
|
|
782
|
20
|
|
|
|
|
155
|
for my $seq_id (keys %piece_table) { |
783
|
100
|
|
|
|
|
185
|
my $type_h = delete $piece_table{$seq_id}; |
784
|
|
|
|
|
|
|
|
785
|
100
|
|
|
|
|
280
|
for my $type (keys %$type_h) { |
786
|
100
|
|
|
|
|
190
|
my $table_h = delete $type_h->{$type}; |
787
|
100
|
|
|
|
|
175
|
my $table = $table_h->{table}; |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
# Initialize the logical offset |
790
|
100
|
|
|
|
|
330
|
$table->calculate_logical_offset; |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
# Get the new size |
793
|
100
|
|
|
|
|
2555
|
my $new_size = $table->logical_len; |
794
|
|
|
|
|
|
|
|
795
|
100
|
50
|
|
|
|
2540
|
unless ($self->truncate) { |
796
|
100
|
|
|
|
|
2365
|
my $class = ref $self->seq; |
797
|
|
|
|
|
|
|
|
798
|
100
|
100
|
|
|
|
360
|
if ($class eq 'App::Sandy::Seq::SingleEnd') { |
|
|
50
|
|
|
|
|
|
799
|
50
|
50
|
|
|
|
1185
|
if ($new_size < $self->seq->read_mean) { |
800
|
0
|
|
|
|
|
0
|
log_msg ":: Skip '$seq_id:$type': So many deletions resulted in a sequence lesser than the required read-mean"; |
801
|
0
|
|
|
|
|
0
|
next; |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
} elsif ($class eq 'App::Sandy::Seq::PairedEnd') { |
804
|
50
|
50
|
|
|
|
1175
|
if ($new_size < $self->seq->fragment_mean) { |
805
|
0
|
|
|
|
|
0
|
log_msg ":: Skip '$seq_id:$type': So many deletions resulted in a sequence lesser than the required fragment mean"; |
806
|
0
|
|
|
|
|
0
|
next; |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
} else { |
809
|
0
|
|
|
|
|
0
|
die "No valid options for 'seq'"; |
810
|
|
|
|
|
|
|
} |
811
|
|
|
|
|
|
|
} |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
# If all's right |
814
|
100
|
|
|
|
|
300
|
$table_h->{size} = $new_size; |
815
|
100
|
|
|
|
|
330
|
$type_h->{$type} = $table_h; |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
# if there is at least one type, |
819
|
|
|
|
|
|
|
# then return it to the piece_table |
820
|
100
|
50
|
|
|
|
235
|
if (%$type_h) { |
821
|
100
|
|
|
|
|
230
|
$piece_table{$seq_id} = $type_h; |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
# else, just remove it! |
824
|
|
|
|
|
|
|
} else { |
825
|
0
|
|
|
|
|
0
|
push @blacklist => $seq_id; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
|
829
|
20
|
50
|
|
|
|
110
|
unless (%piece_table) { |
830
|
0
|
|
|
|
|
0
|
die "All fasta entries were removed due to deletions. ", |
831
|
|
|
|
|
|
|
"Please, verify the genomic variation '$genomic_variation'\n"; |
832
|
|
|
|
|
|
|
} |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
# If fasta_rtree has entries |
835
|
20
|
50
|
|
|
|
740
|
unless ($self->_has_no_fasta_rtree) { |
836
|
|
|
|
|
|
|
# Remove no valid entries from id -> pid relation |
837
|
0
|
0
|
|
|
|
0
|
$self->_delete_fasta_rtree(@blacklist) if @blacklist; |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
# Make the id -> pid relationship |
841
|
20
|
|
|
|
|
85
|
$self->_populate_fasta_tree; |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
# HASH -> SEQ_ID -> @(REF @ALT) -> @(TABLE SIZE) |
844
|
20
|
|
|
|
|
530
|
return \%piece_table; |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
sub _populate_piece_table { |
848
|
0
|
|
|
0
|
|
0
|
my ($self, $table, $snvs) = @_; |
849
|
|
|
|
|
|
|
|
850
|
0
|
|
|
|
|
0
|
for my $snv (@$snvs) { |
851
|
|
|
|
|
|
|
# If there is an ID, make sure that it is not a comma, colon |
852
|
|
|
|
|
|
|
# separated list. Else, make sure to keep the ref/alt length |
853
|
|
|
|
|
|
|
# to max 25+25+1=51 |
854
|
|
|
|
|
|
|
my $annot = defined $snv->{id} && $snv->{id} ne '.' |
855
|
|
|
|
|
|
|
? sprintf "%d:%s" => $snv->{pos} + 1, (split(/[,;]/, $snv->{id}))[0] |
856
|
0
|
0
|
0
|
|
|
0
|
: sprintf "%d:%.25s/%.25s" => $snv->{pos} + 1, $snv->{ref}, $snv->{alt}; |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
# Insertion |
859
|
0
|
0
|
|
|
|
0
|
if ($snv->{ref} eq '-') { |
|
|
0
|
|
|
|
|
|
860
|
0
|
|
|
|
|
0
|
$table->insert(\$snv->{alt}, $snv->{pos}, $annot); |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
# Deletion |
863
|
|
|
|
|
|
|
} elsif ($snv->{alt} eq '-') { |
864
|
0
|
|
|
|
|
0
|
$table->delete($snv->{pos}, length $snv->{ref}, $annot); |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
# Change |
867
|
|
|
|
|
|
|
} else { |
868
|
0
|
|
|
|
|
0
|
$table->change(\$snv->{alt}, $snv->{pos}, length $snv->{ref}, $annot); |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
} |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
sub _validate_indexed_snv_against_fasta { |
874
|
0
|
|
|
0
|
|
0
|
my ($self, $indexed_snv) = @_; |
875
|
|
|
|
|
|
|
|
876
|
0
|
|
|
|
|
0
|
my $indexed_fasta = $self->_fasta; |
877
|
0
|
|
|
|
|
0
|
my $genomic_variation = $self->_genomic_variation_names; |
878
|
|
|
|
|
|
|
|
879
|
0
|
|
|
|
|
0
|
for my $std_seq_id (keys %$indexed_snv) { |
880
|
0
|
|
|
|
|
0
|
my $snvs = delete $indexed_snv->{$std_seq_id}; |
881
|
0
|
|
|
|
|
0
|
my $seq_id = $self->_get_seqname($std_seq_id); |
882
|
|
|
|
|
|
|
|
883
|
0
|
0
|
0
|
|
|
0
|
unless (defined $seq_id && exists $indexed_fasta->{$seq_id}) { |
884
|
0
|
|
|
|
|
0
|
next; |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
|
887
|
0
|
|
|
|
|
0
|
my $seq = \$indexed_fasta->{$seq_id}{seq}; |
888
|
0
|
|
|
|
|
0
|
my $size = $indexed_fasta->{$seq_id}{size}; |
889
|
|
|
|
|
|
|
|
890
|
0
|
|
|
|
|
0
|
my @saved_snvs; |
891
|
|
|
|
|
|
|
|
892
|
0
|
|
|
|
|
0
|
for my $snv (@$snvs) { |
893
|
|
|
|
|
|
|
# Insertions may accur until one base after the |
894
|
|
|
|
|
|
|
# end of the sequence, not more |
895
|
0
|
0
|
0
|
|
|
0
|
if (($snv->{ref} eq '-' && $snv->{pos} > $size) || ($snv->{ref} ne '-' && $snv->{pos} >= $size)) { |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
896
|
|
|
|
|
|
|
log_msg sprintf ":: In validating '%s': Position, %s/%s at %s:%d, outside fasta sequence", |
897
|
0
|
|
|
|
|
0
|
$genomic_variation, $snv->{ref}, $snv->{alt}, $seq_id, $snv->{pos} + 1; |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
# Next snv |
900
|
0
|
|
|
|
|
0
|
next; |
901
|
|
|
|
|
|
|
# Deletions and changes. Just verify if the reference exists |
902
|
|
|
|
|
|
|
} elsif ($snv->{ref} ne '-') { |
903
|
0
|
|
|
|
|
0
|
my $ref = substr $$seq, $snv->{pos}, length($snv->{ref}); |
904
|
|
|
|
|
|
|
|
905
|
0
|
0
|
|
|
|
0
|
if (uc($ref) ne uc($snv->{ref})) { |
906
|
|
|
|
|
|
|
log_msg sprintf ":: In validating '%s': Not found reference '%s' at fasta position %s:%d", |
907
|
0
|
|
|
|
|
0
|
$genomic_variation, $snv->{ref}, $seq_id, $snv->{pos} + 1; |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
# Next snv |
910
|
0
|
|
|
|
|
0
|
next; |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
|
914
|
0
|
|
|
|
|
0
|
push @saved_snvs => $snv; |
915
|
|
|
|
|
|
|
} |
916
|
|
|
|
|
|
|
|
917
|
0
|
0
|
|
|
|
0
|
if (@saved_snvs) { |
918
|
0
|
|
|
|
|
0
|
$indexed_snv->{$std_seq_id} = [@saved_snvs]; |
919
|
|
|
|
|
|
|
} |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
sub _calculate_number_of_reads { |
924
|
8
|
|
|
8
|
|
19
|
my $self = shift; |
925
|
8
|
|
|
|
|
16
|
my $number_of_reads; |
926
|
|
|
|
|
|
|
|
927
|
8
|
50
|
|
|
|
239
|
if ($self->count_loops_by eq 'coverage') { |
|
|
0
|
|
|
|
|
|
928
|
|
|
|
|
|
|
# It is needed to calculate the genome size |
929
|
8
|
|
|
|
|
259
|
my $fasta = $self->_fasta; |
930
|
8
|
|
|
|
|
27
|
my $fasta_size = 0; |
931
|
8
|
|
|
|
|
29
|
$fasta_size += $fasta->{$_}{size} for keys %{ $fasta }; |
|
8
|
|
|
|
|
101
|
|
932
|
8
|
|
|
|
|
239
|
$number_of_reads = int(($fasta_size * $self->coverage) / $self->seq->read_mean); |
933
|
|
|
|
|
|
|
# In case it is paired-end read, divide the number of reads by 2 because |
934
|
|
|
|
|
|
|
# App::Sandy::Seq::PairedEnd class returns 2 reads at time |
935
|
8
|
100
|
|
|
|
215
|
$number_of_reads = int($number_of_reads / 2) |
936
|
|
|
|
|
|
|
if ref($self->seq) eq 'App::Sandy::Seq::PairedEnd'; |
937
|
|
|
|
|
|
|
} elsif ($self->count_loops_by eq 'number-of-reads') { |
938
|
0
|
|
|
|
|
0
|
$number_of_reads = $self->number_of_reads; |
939
|
|
|
|
|
|
|
} else { |
940
|
0
|
|
|
|
|
0
|
croak sprintf "Unknown option '%s' for calculating the number of reads\n", |
941
|
|
|
|
|
|
|
$self->count_loops_by; |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
# Maybe the number_of_reads is zero. It may occur due to the low coverage and/or fasta_file size |
945
|
8
|
50
|
|
|
|
30
|
if ($number_of_reads <= 0) { |
946
|
0
|
|
|
|
|
0
|
die "The computed number of reads is equal to zero.\n" . |
947
|
|
|
|
|
|
|
"It may occur due to the low coverage, fasta-file sequence size or number of reads directly passed by the user\n"; |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
|
950
|
8
|
|
|
|
|
24
|
return $number_of_reads; |
951
|
|
|
|
|
|
|
} |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
sub _calculate_parent_count { |
954
|
0
|
|
|
0
|
|
0
|
my ($self, $counter_ref) = @_; |
955
|
0
|
0
|
|
|
|
0
|
return if $self->_has_no_fasta_rtree; |
956
|
|
|
|
|
|
|
|
957
|
0
|
|
|
|
|
0
|
my %parent_count; |
958
|
|
|
|
|
|
|
|
959
|
0
|
|
|
|
|
0
|
while (my ($id, $count) = each %$counter_ref) { |
960
|
0
|
|
|
|
|
0
|
my $pid = $self->_get_fasta_rtree($id); |
961
|
0
|
0
|
|
|
|
0
|
$parent_count{$pid} += $count if defined $pid; |
962
|
|
|
|
|
|
|
} |
963
|
|
|
|
|
|
|
|
964
|
0
|
|
|
|
|
0
|
return \%parent_count; |
965
|
|
|
|
|
|
|
} |
966
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
sub run_simulation { |
968
|
8
|
|
|
8
|
0
|
3764
|
my $self = shift; |
969
|
8
|
|
|
|
|
334
|
my $piece_table = $self->_piece_table; |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
# Calculate the number of reads to be generated |
972
|
8
|
|
|
|
|
63
|
my $number_of_reads = $self->_calculate_number_of_reads; |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
# Function that returns strand by strand_bias |
975
|
8
|
|
|
|
|
253
|
my $strand = $self->_strand; |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
# Function that returns seqid by seqid_weight |
978
|
8
|
|
|
|
|
265
|
my $seqid = $self->_seqid_raffle; |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
# Count file to be generated |
981
|
8
|
|
|
|
|
249
|
my %count_file = ( |
982
|
|
|
|
|
|
|
transcripts => $self->prefix . '_abundance_transcripts.tsv', |
983
|
|
|
|
|
|
|
genes => $self->prefix . '_abundance_genes.tsv', |
984
|
|
|
|
|
|
|
coverage => $self->prefix . '_coverage.tsv' |
985
|
|
|
|
|
|
|
); |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
# Main files |
988
|
8
|
|
|
|
|
218
|
my %files = ( |
989
|
|
|
|
|
|
|
bam => [ |
990
|
|
|
|
|
|
|
$self->prefix . '.bam' |
991
|
|
|
|
|
|
|
], |
992
|
|
|
|
|
|
|
sam => [ |
993
|
|
|
|
|
|
|
$self->prefix . '.sam' |
994
|
|
|
|
|
|
|
], |
995
|
|
|
|
|
|
|
single_fastq => [ |
996
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq' |
997
|
|
|
|
|
|
|
], |
998
|
|
|
|
|
|
|
single_fastq_gz => [ |
999
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq.gz' |
1000
|
|
|
|
|
|
|
], |
1001
|
|
|
|
|
|
|
join_paired_fastq => [ |
1002
|
|
|
|
|
|
|
$self->prefix . '.fastq' |
1003
|
|
|
|
|
|
|
], |
1004
|
|
|
|
|
|
|
join_paired_fastq_gz => [ |
1005
|
|
|
|
|
|
|
$self->prefix . '.fastq.gz' |
1006
|
|
|
|
|
|
|
], |
1007
|
|
|
|
|
|
|
paired_fastq => [ |
1008
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq', |
1009
|
|
|
|
|
|
|
$self->prefix . '_R2_001.fastq' |
1010
|
|
|
|
|
|
|
], |
1011
|
|
|
|
|
|
|
paired_fastq_gz => [ |
1012
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq.gz', |
1013
|
|
|
|
|
|
|
$self->prefix . '_R2_001.fastq.gz' |
1014
|
|
|
|
|
|
|
] |
1015
|
|
|
|
|
|
|
); |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
# Set the file class in order to know |
1018
|
|
|
|
|
|
|
# how to deal with all files options |
1019
|
8
|
|
|
|
|
222
|
my $seq_class = ref $self->seq; |
1020
|
8
|
|
|
|
|
271
|
my $output_format = $self->output_format; |
1021
|
8
|
|
|
|
|
22
|
my $file_class; |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
# This mess is necessary to catch the |
1024
|
|
|
|
|
|
|
# right value into the %files hash |
1025
|
8
|
50
|
|
|
|
113
|
if ($output_format =~ /(sam|bam)/) { |
|
|
50
|
|
|
|
|
|
1026
|
0
|
|
|
|
|
0
|
$file_class = $output_format; |
1027
|
|
|
|
|
|
|
} elsif ($output_format =~ /fastq/) { |
1028
|
8
|
100
|
|
|
|
39
|
if ($seq_class eq 'App::Sandy::Seq::SingleEnd') { |
|
|
50
|
|
|
|
|
|
1029
|
5
|
|
|
|
|
15
|
$file_class = 'single_fastq'; |
1030
|
|
|
|
|
|
|
} elsif ($seq_class eq 'App::Sandy::Seq::PairedEnd') { |
1031
|
3
|
|
|
|
|
15
|
$file_class = 'paired_fastq'; |
1032
|
3
|
50
|
|
|
|
150
|
$file_class = "join_$file_class" if $self->join_paired_ends; |
1033
|
|
|
|
|
|
|
} else { |
1034
|
0
|
|
|
|
|
0
|
croak "Something wrong with the seq class: $seq_class"; |
1035
|
|
|
|
|
|
|
} |
1036
|
8
|
50
|
|
|
|
30
|
if ($output_format eq 'fastq.gz') { |
1037
|
0
|
|
|
|
|
0
|
$file_class .= '_gz'; |
1038
|
|
|
|
|
|
|
} |
1039
|
|
|
|
|
|
|
} else { |
1040
|
0
|
|
|
|
|
0
|
croak "Something wrong with the output format: $output_format"; |
1041
|
|
|
|
|
|
|
} |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
# Forks |
1044
|
8
|
|
|
|
|
253
|
my $number_of_jobs = $self->jobs; |
1045
|
8
|
|
|
|
|
159
|
my $pm = Parallel::ForkManager->new($number_of_jobs); |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
# Parent child pids |
1048
|
8
|
|
|
|
|
27375
|
my $parent_pid = $$; |
1049
|
8
|
|
|
|
|
24
|
my @child_pid; |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
# Temporary files tracker |
1052
|
|
|
|
|
|
|
my @tmp_files; |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
# Run in parent right after creating child process |
1055
|
|
|
|
|
|
|
$pm->run_on_start( |
1056
|
|
|
|
|
|
|
sub { |
1057
|
10
|
|
|
10
|
|
31570
|
my $pid = shift; |
1058
|
10
|
|
|
|
|
303
|
push @child_pid => $pid; |
1059
|
|
|
|
|
|
|
} |
1060
|
8
|
|
|
|
|
112
|
); |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
# Count the overall cumulative number of reads for each seqid |
1063
|
8
|
|
|
|
|
65
|
my %counters; |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
# Run in parent right after finishing child process |
1066
|
|
|
|
|
|
|
$pm->run_on_finish( |
1067
|
|
|
|
|
|
|
sub { |
1068
|
8
|
|
|
8
|
|
40076476
|
my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $counter_ref) = @_; |
1069
|
8
|
|
|
|
|
121
|
while (my ($seqid, $count) = each %$counter_ref) { |
1070
|
40
|
|
|
|
|
267
|
$counters{$seqid} += $count; |
1071
|
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
} |
1073
|
8
|
|
|
|
|
69
|
); |
1074
|
|
|
|
|
|
|
|
1075
|
8
|
50
|
|
|
|
166
|
log_msg sprintf ":: Creating %d child %s ...", |
1076
|
|
|
|
|
|
|
$number_of_jobs, $number_of_jobs == 1 ? "job" : "jobs"; |
1077
|
|
|
|
|
|
|
|
1078
|
8
|
|
|
|
|
35
|
for my $tid (1..$number_of_jobs) { |
1079
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1080
|
|
|
|
|
|
|
# Inside parent |
1081
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1082
|
14
|
|
|
|
|
661
|
log_msg ":: Creating job $tid ..."; |
1083
|
14
|
|
|
|
|
28
|
my @files_t = map { "$_.${parent_pid}.part$tid" } @{ $files{$file_class} }; |
|
19
|
|
|
|
|
159
|
|
|
14
|
|
|
|
|
188
|
|
1084
|
14
|
|
|
|
|
78
|
push @tmp_files => @files_t; |
1085
|
14
|
100
|
|
|
|
113
|
my $pid = $pm->start and next; |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1088
|
|
|
|
|
|
|
# Inside child |
1089
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1090
|
|
|
|
|
|
|
# Intelace child/parent processes |
1091
|
4
|
|
|
|
|
19924
|
my $sig = App::Sandy::InterlaceProcesses->new(foreign_pid => [$parent_pid]); |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
# Get the FASTA blacklist regions ID => @{ TUPLE } |
1094
|
4
|
|
|
|
|
421
|
my $fasta_blacklist = $self->_fasta_blacklist; |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
# Set child RNG |
1097
|
4
|
|
|
|
|
302
|
my $rng = App::Sandy::RNG->new($self->seed + $tid); |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
# Calculate the number of reads to this job and correct this local index |
1100
|
|
|
|
|
|
|
# to the global index |
1101
|
4
|
|
|
|
|
129
|
my $number_of_reads_t = int($number_of_reads/$number_of_jobs); |
1102
|
4
|
|
|
|
|
57
|
my $last_read_idx = $number_of_reads_t * $tid; |
1103
|
4
|
|
|
|
|
64
|
my $idx = $last_read_idx - $number_of_reads_t + 1; |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
# If it is the last job, make it work on the leftover reads of int() truncation |
1106
|
4
|
100
|
|
|
|
62
|
$last_read_idx += $number_of_reads % $number_of_jobs |
1107
|
|
|
|
|
|
|
if $tid == $number_of_jobs; |
1108
|
|
|
|
|
|
|
|
1109
|
4
|
|
|
|
|
247
|
log_msg " => Job $tid: Working on sequences from $idx to $last_read_idx"; |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
# Create temporary files |
1112
|
4
|
|
|
|
|
40
|
log_msg " => Job $tid: Creating temporary file: @files_t"; |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# And here we go ... |
1115
|
4
|
|
|
|
|
26
|
my @fhs; |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
# Set the right filehandle format |
1118
|
4
|
50
|
|
|
|
219
|
if ($output_format =~ /^(sam|fastq)$/) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1119
|
4
|
|
|
|
|
38
|
@fhs = map { $self->with_open_w($_, 0) } @files_t; |
|
6
|
|
|
|
|
268
|
|
1120
|
|
|
|
|
|
|
} elsif ($output_format eq 'fastq.gz') { |
1121
|
0
|
|
|
|
|
0
|
@fhs = map { $self->with_open_w($_, $self->compression_level) } @files_t; |
|
0
|
|
|
|
|
0
|
|
1122
|
|
|
|
|
|
|
} elsif ($output_format eq 'bam') { |
1123
|
0
|
|
|
|
|
0
|
@fhs = map { $self->with_open_bam_w($_, $self->compression_level) } @files_t; |
|
0
|
|
|
|
|
0
|
|
1124
|
|
|
|
|
|
|
} else { |
1125
|
0
|
|
|
|
|
0
|
croak "Something wrong with the output format: $file_class"; |
1126
|
|
|
|
|
|
|
} |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
# sprint_seq gives two entries for paired-emd, so |
1129
|
|
|
|
|
|
|
# if it is a bam|sam|join-paired-ends, it is necessary |
1130
|
|
|
|
|
|
|
# to copy the filehandle in order to print both entries |
1131
|
|
|
|
|
|
|
# to the same file |
1132
|
4
|
50
|
66
|
|
|
120
|
if ($seq_class eq 'App::Sandy::Seq::PairedEnd' |
1133
|
|
|
|
|
|
|
&& $file_class =~ /(sam|bam|join)/) { |
1134
|
0
|
|
|
|
|
0
|
$fhs[1] = $fhs[0]; |
1135
|
|
|
|
|
|
|
} |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
# Count the cumulative number of reads for each seqid |
1138
|
4
|
|
|
|
|
22
|
my %counter; |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
# If the output format is 'bam|sam' and it is the first job, then |
1141
|
|
|
|
|
|
|
# write the header |
1142
|
4
|
50
|
33
|
|
|
81
|
if ($output_format =~ /^(sam|bam)$/ && $tid == 1) { |
1143
|
0
|
|
|
|
|
0
|
my $header_ref = $self->gen_sam_header($self->argv); |
1144
|
0
|
|
|
|
|
0
|
print {$fhs[0]} "$$header_ref"; |
|
0
|
|
|
|
|
0
|
|
1145
|
|
|
|
|
|
|
} |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
# Run simulation in child |
1148
|
4
|
|
66
|
|
|
445
|
for (my $i = $idx; $i <= $last_read_idx and not $sig->signal_catched; $i++) { |
1149
|
6840
|
|
|
|
|
15660
|
my $id = $seqid->($rng); |
1150
|
6840
|
|
|
|
|
16968
|
my $blacklist = $fasta_blacklist->{$id->{seq_id}}; |
1151
|
6840
|
|
|
|
|
15738
|
my $ptable = $piece_table->{$id->{seq_id}}{$id->{type}}; |
1152
|
6840
|
|
|
|
|
9693
|
my @seq_entry; |
1153
|
|
|
|
|
|
|
try { |
1154
|
|
|
|
|
|
|
@seq_entry = $self->sprint_seq($tid, $i, $id->{seq_id}, $id->{type}, |
1155
|
6840
|
|
|
6840
|
|
434086
|
$ptable->{table}, $ptable->{size}, $strand->($rng), $rng, $blacklist); |
1156
|
|
|
|
|
|
|
} catch { |
1157
|
0
|
|
|
0
|
|
0
|
die "Not defined entry for seqid '>$id->{seq_id}' at job $tid: $_"; |
1158
|
|
|
|
|
|
|
} finally { |
1159
|
6840
|
50
|
|
6840
|
|
143378
|
unless (@_) { |
1160
|
6840
|
|
|
|
|
18645
|
for my $fh_idx (0..$#fhs) { |
1161
|
9120
|
|
|
|
|
19306
|
$counter{$id->{seq_id}}++; |
1162
|
9120
|
|
|
|
|
11998
|
print {$fhs[$fh_idx]} "${$seq_entry[$fh_idx]}"; |
|
9120
|
|
|
|
|
16510
|
|
|
9120
|
|
|
|
|
44405
|
|
1163
|
|
|
|
|
|
|
} |
1164
|
|
|
|
|
|
|
} |
1165
|
6840
|
|
|
|
|
53388
|
}; |
1166
|
|
|
|
|
|
|
} |
1167
|
|
|
|
|
|
|
|
1168
|
4
|
|
|
|
|
123
|
log_msg " => Job $tid: Writing and closing file: @files_t"; |
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
# Close temporary files |
1171
|
|
|
|
|
|
|
# Get index from @files_t in order to avoid |
1172
|
|
|
|
|
|
|
# close the same filehandle twice - When the |
1173
|
|
|
|
|
|
|
# position 1-N is a copy |
1174
|
4
|
|
|
|
|
14
|
for my $fh_idx (0..$#files_t) { |
1175
|
6
|
|
|
|
|
284
|
close $fhs[$fh_idx]; |
1176
|
|
|
|
|
|
|
} |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
# If it is a bam and it is the last loop, then |
1179
|
|
|
|
|
|
|
# write a eof marker |
1180
|
4
|
50
|
33
|
|
|
49
|
if ($output_format eq 'bam' && $tid == $number_of_jobs) { |
1181
|
0
|
|
|
|
|
0
|
$self->gen_eof_marker($files_t[0]); |
1182
|
|
|
|
|
|
|
} |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
# Child exit |
1185
|
4
|
|
|
|
|
82
|
log_msg " => Job $tid is finished"; |
1186
|
4
|
|
|
|
|
129
|
$pm->finish(0, \%counter); |
1187
|
|
|
|
|
|
|
} |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
# Back to parent |
1190
|
|
|
|
|
|
|
# Interlace parent/child(s) processes |
1191
|
4
|
|
|
|
|
1254
|
my $sig = App::Sandy::InterlaceProcesses->new(foreign_pid => \@child_pid); |
1192
|
4
|
|
|
|
|
74
|
$pm->wait_all_children; |
1193
|
|
|
|
|
|
|
|
1194
|
4
|
50
|
|
|
|
598
|
if ($sig->signal_catched) { |
1195
|
0
|
|
|
|
|
0
|
log_msg ":: Termination signal received!"; |
1196
|
|
|
|
|
|
|
} |
1197
|
|
|
|
|
|
|
|
1198
|
4
|
|
|
|
|
118
|
log_msg ":: Saving the work ..."; |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
# Concatenate all temporary files |
1201
|
4
|
|
|
|
|
36
|
log_msg ":: Concatenate all temporary files"; |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
# Save time. Rename tmp_file (1,2) |
1204
|
4
|
|
|
|
|
13
|
for my $file (@{ $files{$file_class} }) { |
|
4
|
|
|
|
|
80
|
|
1205
|
5
|
|
|
|
|
76
|
my $tmp = shift @tmp_files; |
1206
|
5
|
|
|
|
|
63
|
log_msg " => Concatenating $tmp to $file ..."; |
1207
|
5
|
50
|
|
|
|
487
|
rename $tmp => $file |
1208
|
|
|
|
|
|
|
or die "Cannot create '$file': $!\n"; |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
# Append to renamed tmp files |
1212
|
4
|
|
|
|
|
45
|
my @fh = map { $self->with_open_a($_) } @{ $files{$file_class} }; |
|
5
|
|
|
|
|
174
|
|
|
4
|
|
|
|
|
30
|
|
1213
|
|
|
|
|
|
|
|
1214
|
4
|
|
|
|
|
44
|
for my $i (0..$#tmp_files) { |
1215
|
5
|
|
|
|
|
27
|
my $fh_idx = $i % scalar @fh; |
1216
|
|
|
|
|
|
|
|
1217
|
5
|
|
|
|
|
91
|
log_msg " => Concatenating $tmp_files[$i] to $files{$file_class}[$fh_idx] ..."; |
1218
|
5
|
50
|
|
|
|
118
|
cat $tmp_files[$i] => $fh[$fh_idx] |
1219
|
|
|
|
|
|
|
or die "Cannot concatenate $tmp_files[$i] to $files{$file_class}[$fh_idx]: $!\n"; |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
# Clean up the mess |
1222
|
5
|
50
|
|
|
|
54438
|
unlink $tmp_files[$i] |
1223
|
|
|
|
|
|
|
or die "Cannot remove temporary file '$tmp_files[$i]': $!\n"; |
1224
|
|
|
|
|
|
|
} |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
# Close files |
1227
|
4
|
|
|
|
|
25
|
log_msg ":: Writing and closing output file: @{ $files{$file_class} }"; |
|
4
|
|
|
|
|
82
|
|
1228
|
4
|
|
|
|
|
23
|
for my $fh_idx (0..$#fh) { |
1229
|
5
|
50
|
|
|
|
133
|
close $fh[$fh_idx] |
1230
|
|
|
|
|
|
|
or die "Cannot write file $files{$file_class}[$fh_idx]: $!\n"; |
1231
|
|
|
|
|
|
|
} |
1232
|
|
|
|
|
|
|
|
1233
|
4
|
50
|
|
|
|
396
|
if ($self->count_loops_by eq 'number-of-reads') { |
1234
|
|
|
|
|
|
|
# It is necessary to correct the abundance according to |
1235
|
|
|
|
|
|
|
# fragment sequencing end |
1236
|
0
|
0
|
|
|
|
0
|
my $count_factor = ref($self->seq) eq 'App::Sandy::Seq::PairedEnd' |
1237
|
|
|
|
|
|
|
? 2 |
1238
|
|
|
|
|
|
|
: 1; |
1239
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
# Save transcripts |
1241
|
0
|
|
|
|
|
0
|
log_msg ":: Saving transcripts count"; |
1242
|
0
|
|
|
|
|
0
|
my $fh = $self->with_open_w($count_file{transcripts}, 0); |
1243
|
|
|
|
|
|
|
|
1244
|
0
|
|
|
|
|
0
|
log_msg " => Writing counts to $count_file{transcripts} ..."; |
1245
|
0
|
|
|
|
|
0
|
for my $id (sort keys %counters) { |
1246
|
0
|
|
|
|
|
0
|
printf {$fh} "%s\t%d\n" => $id, |
1247
|
0
|
|
|
|
|
0
|
int($counters{$id} / $count_factor); |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
# Close transcripts file |
1251
|
0
|
|
|
|
|
0
|
log_msg ":; Writing and closing $count_file{transcripts} ..."; |
1252
|
0
|
0
|
|
|
|
0
|
close $fh |
1253
|
|
|
|
|
|
|
or die "Cannot write file $count_file{transcripts}: $!\n"; |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
# Calculate 'gene' like expression |
1256
|
0
|
|
|
|
|
0
|
my $parent_count = $self->_calculate_parent_count(\%counters); |
1257
|
|
|
|
|
|
|
|
1258
|
0
|
0
|
|
|
|
0
|
if (%$parent_count) { |
1259
|
|
|
|
|
|
|
# Save genes |
1260
|
0
|
|
|
|
|
0
|
log_msg ":: Saving genes count"; |
1261
|
0
|
|
|
|
|
0
|
my $fh = $self->with_open_w($count_file{genes}, 0); |
1262
|
|
|
|
|
|
|
|
1263
|
0
|
|
|
|
|
0
|
log_msg " => Writing counts to $count_file{genes} ..."; |
1264
|
0
|
|
|
|
|
0
|
for my $id (sort keys %$parent_count) { |
1265
|
0
|
|
|
|
|
0
|
printf {$fh} "%s\t%d\n" => $id, |
1266
|
0
|
|
|
|
|
0
|
int($parent_count->{$id} / $count_factor); |
1267
|
|
|
|
|
|
|
} |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
# Close genes file |
1270
|
0
|
|
|
|
|
0
|
log_msg ":; Writing and closing $count_file{genes} ..."; |
1271
|
0
|
0
|
|
|
|
0
|
close $fh |
1272
|
|
|
|
|
|
|
or die "Cannot write file $count_file{genes}: $!\n"; |
1273
|
|
|
|
|
|
|
} |
1274
|
|
|
|
|
|
|
} else { |
1275
|
|
|
|
|
|
|
# Save coverage |
1276
|
4
|
|
|
|
|
51
|
log_msg ":: Saving coverage count"; |
1277
|
4
|
|
|
|
|
100
|
my $fh = $self->with_open_w($count_file{coverage}, 0); |
1278
|
|
|
|
|
|
|
|
1279
|
4
|
|
|
|
|
43
|
log_msg " => Writing counts to $count_file{coverage} ..."; |
1280
|
4
|
|
|
|
|
70
|
for my $id (sort keys %counters) { |
1281
|
20
|
|
|
|
|
50
|
printf {$fh} "%s\t%d\n" => $id, $counters{$id}; |
|
20
|
|
|
|
|
113
|
|
1282
|
|
|
|
|
|
|
} |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
# Close coverage file |
1285
|
4
|
|
|
|
|
42
|
log_msg ":; Writing and closing $count_file{coverage} ..."; |
1286
|
4
|
50
|
|
|
|
400
|
close $fh |
1287
|
|
|
|
|
|
|
or die "Cannot write file $count_file{coverage}: $!\n"; |
1288
|
|
|
|
|
|
|
} |
1289
|
|
|
|
|
|
|
} |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
__END__ |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
=pod |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=encoding UTF-8 |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
=head1 NAME |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
App::Sandy::Simulator - Class responsible to make the simulation |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
=head1 VERSION |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
version 0.25 |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
=head1 AUTHORS |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
=over 4 |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=item * |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
Thiago L. A. Miller <tmiller@mochsl.org.br> |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
=item * |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
J. Leonel Buzzo <lbuzzo@mochsl.org.br> |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
=item * |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
Felipe R. C. dos Santos <fsantos@mochsl.org.br> |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=item * |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
Helena B. Conceição <hconceicao@mochsl.org.br> |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
=item * |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
Rodrigo Barreiro <rbarreiro@mochsl.org.br> |
1328
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
=item * |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
Gabriela Guardia <gguardia@mochsl.org.br> |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
=item * |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
Fernanda Orpinelli <forpinelli@mochsl.org.br> |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
=item * |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
Rafael Mercuri <rmercuri@mochsl.org.br> |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
=item * |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
Rodrigo Barreiro <rbarreiro@mochsl.org.br> |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
=item * |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
Pedro A. F. Galante <pgalante@mochsl.org.br> |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
=back |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
This software is Copyright (c) 2023 by Teaching and Research Institute from Sírio-Libanês Hospital. |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
This is free software, licensed under: |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=cut |