line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::Sandy::Simulator; |
2
|
|
|
|
|
|
|
# ABSTRACT: Class responsible to make the simulation |
3
|
|
|
|
|
|
|
|
4
|
6
|
|
|
6
|
|
26835
|
use App::Sandy::Base 'class'; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
58
|
|
5
|
6
|
|
|
6
|
|
54
|
use App::Sandy::Seq::SingleEnd; |
|
6
|
|
|
|
|
17
|
|
|
6
|
|
|
|
|
233
|
|
6
|
6
|
|
|
6
|
|
35
|
use App::Sandy::Seq::PairedEnd; |
|
6
|
|
|
|
|
17
|
|
|
6
|
|
|
|
|
150
|
|
7
|
6
|
|
|
6
|
|
3792
|
use App::Sandy::InterlaceProcesses; |
|
6
|
|
|
|
|
3407
|
|
|
6
|
|
|
|
|
248
|
|
8
|
6
|
|
|
6
|
|
545
|
use App::Sandy::RNG; |
|
6
|
|
|
|
|
22
|
|
|
6
|
|
|
|
|
292
|
|
9
|
6
|
|
|
6
|
|
3882
|
use App::Sandy::WeightedRaffle; |
|
6
|
|
|
|
|
2491
|
|
|
6
|
|
|
|
|
237
|
|
10
|
6
|
|
|
6
|
|
1256
|
use App::Sandy::PieceTable; |
|
6
|
|
|
|
|
515
|
|
|
6
|
|
|
|
|
179
|
|
11
|
6
|
|
|
6
|
|
2791
|
use App::Sandy::DB::Handle::Expression; |
|
6
|
|
|
|
|
1918
|
|
|
6
|
|
|
|
|
251
|
|
12
|
6
|
|
|
6
|
|
3266
|
use App::Sandy::DB::Handle::Variation; |
|
6
|
|
|
|
|
2007
|
|
|
6
|
|
|
|
|
211
|
|
13
|
6
|
|
|
6
|
|
41
|
use List::Util 'min'; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
385
|
|
14
|
6
|
|
|
6
|
|
3021
|
use File::Cat 'cat'; |
|
6
|
|
|
|
|
3129
|
|
|
6
|
|
|
|
|
468
|
|
15
|
6
|
|
|
6
|
|
3932
|
use Parallel::ForkManager; |
|
6
|
|
|
|
|
343133
|
|
|
6
|
|
|
|
|
54740
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
with qw/App::Sandy::Role::IO App::Sandy::Role::SeqID/; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
our $VERSION = '0.24'; # VERSION |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has 'argv' => ( |
22
|
|
|
|
|
|
|
is => 'ro', |
23
|
|
|
|
|
|
|
isa => 'ArrayRef[Str]', |
24
|
|
|
|
|
|
|
required => 1 |
25
|
|
|
|
|
|
|
); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
has 'truncate' => ( |
28
|
|
|
|
|
|
|
is => 'ro', |
29
|
|
|
|
|
|
|
isa => 'Bool', |
30
|
|
|
|
|
|
|
required => 1 |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has 'seed' => ( |
34
|
|
|
|
|
|
|
is => 'ro', |
35
|
|
|
|
|
|
|
isa => 'Int', |
36
|
|
|
|
|
|
|
required => 1 |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has 'jobs' => ( |
40
|
|
|
|
|
|
|
is => 'ro', |
41
|
|
|
|
|
|
|
isa => 'My:IntGt0', |
42
|
|
|
|
|
|
|
required => 1 |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
has 'prefix' => ( |
46
|
|
|
|
|
|
|
is => 'ro', |
47
|
|
|
|
|
|
|
isa => 'Str', |
48
|
|
|
|
|
|
|
required => 1 |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
has 'join_paired_ends' => ( |
52
|
|
|
|
|
|
|
is => 'ro', |
53
|
|
|
|
|
|
|
isa => 'Bool', |
54
|
|
|
|
|
|
|
required => 1 |
55
|
|
|
|
|
|
|
); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
has 'output_format' => ( |
58
|
|
|
|
|
|
|
is => 'ro', |
59
|
|
|
|
|
|
|
isa => 'My:Format', |
60
|
|
|
|
|
|
|
required => 1 |
61
|
|
|
|
|
|
|
); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
has 'compression_level' => ( |
64
|
|
|
|
|
|
|
is => 'ro', |
65
|
|
|
|
|
|
|
isa => 'My:Level', |
66
|
|
|
|
|
|
|
required => 1 |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
has 'fasta_file' => ( |
70
|
|
|
|
|
|
|
is => 'ro', |
71
|
|
|
|
|
|
|
isa => 'My:Fasta', |
72
|
|
|
|
|
|
|
required => 1 |
73
|
|
|
|
|
|
|
); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
has 'coverage' => ( |
76
|
|
|
|
|
|
|
is => 'ro', |
77
|
|
|
|
|
|
|
isa => 'My:NumGt0', |
78
|
|
|
|
|
|
|
required => 0 |
79
|
|
|
|
|
|
|
); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
has 'number_of_reads' => ( |
82
|
|
|
|
|
|
|
is => 'ro', |
83
|
|
|
|
|
|
|
isa => 'My:IntGt0', |
84
|
|
|
|
|
|
|
required => 0 |
85
|
|
|
|
|
|
|
); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
has 'count_loops_by' => ( |
88
|
|
|
|
|
|
|
is => 'ro', |
89
|
|
|
|
|
|
|
isa => 'My:CountLoopBy', |
90
|
|
|
|
|
|
|
required => 1 |
91
|
|
|
|
|
|
|
); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
has 'strand_bias' => ( |
94
|
|
|
|
|
|
|
is => 'ro', |
95
|
|
|
|
|
|
|
isa => 'My:StrandBias', |
96
|
|
|
|
|
|
|
required => 1 |
97
|
|
|
|
|
|
|
); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
has 'seqid_weight' => ( |
100
|
|
|
|
|
|
|
is => 'ro', |
101
|
|
|
|
|
|
|
isa => 'My:SeqIdWeight', |
102
|
|
|
|
|
|
|
required => 1 |
103
|
|
|
|
|
|
|
); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
has 'expression_matrix' => ( |
106
|
|
|
|
|
|
|
is => 'ro', |
107
|
|
|
|
|
|
|
isa => 'Str', |
108
|
|
|
|
|
|
|
required => 0 |
109
|
|
|
|
|
|
|
); |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
has 'genomic_variation' => ( |
112
|
|
|
|
|
|
|
is => 'ro', |
113
|
|
|
|
|
|
|
isa => 'ArrayRef[Str]', |
114
|
|
|
|
|
|
|
required => 0 |
115
|
|
|
|
|
|
|
); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
has '_genomic_variation_names' => ( |
118
|
|
|
|
|
|
|
is => 'ro', |
119
|
|
|
|
|
|
|
isa => 'Maybe[Str]', |
120
|
|
|
|
|
|
|
builder => '_build_genomic_variation_names', |
121
|
|
|
|
|
|
|
lazy_build => 1 |
122
|
|
|
|
|
|
|
); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
has 'seq' => ( |
125
|
|
|
|
|
|
|
is => 'ro', |
126
|
|
|
|
|
|
|
isa => 'App::Sandy::Seq::SingleEnd | App::Sandy::Seq::PairedEnd', |
127
|
|
|
|
|
|
|
required => 1, |
128
|
|
|
|
|
|
|
handles => [ qw{ sprint_seq gen_sam_header gen_eof_marker } ] |
129
|
|
|
|
|
|
|
); |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
has '_fasta' => ( |
132
|
|
|
|
|
|
|
is => 'ro', |
133
|
|
|
|
|
|
|
isa => 'My:IdxFasta', |
134
|
|
|
|
|
|
|
builder => '_build_fasta', |
135
|
|
|
|
|
|
|
lazy_build => 1 |
136
|
|
|
|
|
|
|
); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
has '_fasta_tree' => ( |
139
|
|
|
|
|
|
|
traits => ['Hash'], |
140
|
|
|
|
|
|
|
is => 'ro', |
141
|
|
|
|
|
|
|
isa => 'HashRef[ArrayRef]', |
142
|
|
|
|
|
|
|
default => sub { {} }, |
143
|
|
|
|
|
|
|
handles => { |
144
|
|
|
|
|
|
|
_set_fasta_tree => 'set', |
145
|
|
|
|
|
|
|
_get_fasta_tree => 'get', |
146
|
|
|
|
|
|
|
_exists_fasta_tree => 'exists', |
147
|
|
|
|
|
|
|
_fasta_tree_pairs => 'kv', |
148
|
|
|
|
|
|
|
_has_no_fasta_tree => 'is_empty' |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
has '_fasta_rtree' => ( |
153
|
|
|
|
|
|
|
traits => ['Hash'], |
154
|
|
|
|
|
|
|
is => 'ro', |
155
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
156
|
|
|
|
|
|
|
default => sub { {} }, |
157
|
|
|
|
|
|
|
handles => { |
158
|
|
|
|
|
|
|
_set_fasta_rtree => 'set', |
159
|
|
|
|
|
|
|
_get_fasta_rtree => 'get', |
160
|
|
|
|
|
|
|
_delete_fasta_rtree => 'delete', |
161
|
|
|
|
|
|
|
_exists_fasta_rtree => 'exists', |
162
|
|
|
|
|
|
|
_fasta_rtree_pairs => 'kv', |
163
|
|
|
|
|
|
|
_has_no_fasta_rtree => 'is_empty' |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
has '_fasta_blacklist' => ( |
168
|
|
|
|
|
|
|
is => 'ro', |
169
|
|
|
|
|
|
|
isa => 'My:FastaBlackList', |
170
|
|
|
|
|
|
|
builder => '_build_fasta_blacklist', |
171
|
|
|
|
|
|
|
lazy_build => 1 |
172
|
|
|
|
|
|
|
); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
has '_seqname' => ( |
175
|
|
|
|
|
|
|
traits => ['Hash'], |
176
|
|
|
|
|
|
|
is => 'ro', |
177
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
178
|
|
|
|
|
|
|
default => sub { {} }, |
179
|
|
|
|
|
|
|
handles => { |
180
|
|
|
|
|
|
|
_set_seqname => 'set', |
181
|
|
|
|
|
|
|
_get_seqname => 'get' |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
has '_piece_table' => ( |
186
|
|
|
|
|
|
|
is => 'ro', |
187
|
|
|
|
|
|
|
isa => 'HashRef[HashRef[My:PieceTable]]', |
188
|
|
|
|
|
|
|
builder => '_build_piece_table', |
189
|
|
|
|
|
|
|
lazy_build => 1 |
190
|
|
|
|
|
|
|
); |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
has '_strand' => ( |
193
|
|
|
|
|
|
|
is => 'ro', |
194
|
|
|
|
|
|
|
isa => 'CodeRef', |
195
|
|
|
|
|
|
|
builder => '_build_strand', |
196
|
|
|
|
|
|
|
lazy_build => 1 |
197
|
|
|
|
|
|
|
); |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
has '_seqid_raffle' => ( |
200
|
|
|
|
|
|
|
is => 'ro', |
201
|
|
|
|
|
|
|
isa => 'CodeRef', |
202
|
|
|
|
|
|
|
builder => '_build_seqid_raffle', |
203
|
|
|
|
|
|
|
lazy_build => 1 |
204
|
|
|
|
|
|
|
); |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub BUILD { |
207
|
20
|
|
|
20
|
0
|
45
|
my $self = shift; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# If seqid_weight is 'count', then expression_matrix must be defined |
210
|
20
|
50
|
33
|
|
|
575
|
if ($self->seqid_weight eq 'count' and not defined $self->expression_matrix) { |
211
|
0
|
|
|
|
|
0
|
croak "seqid_weight=count requires a expression_matrix\n"; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# If count_loops_by is 'coverage', then coverage must be defined. Else if |
215
|
|
|
|
|
|
|
# it is equal to 'number_of_reads', then number_of_reads must be defined |
216
|
20
|
50
|
33
|
|
|
625
|
if ($self->count_loops_by eq 'coverage' and not defined $self->coverage) { |
|
|
50
|
33
|
|
|
|
|
217
|
0
|
|
|
|
|
0
|
croak "count_loops_by=coverage requires a coverage number\n"; |
218
|
|
|
|
|
|
|
} elsif ($self->count_loops_by eq 'number_of_reads' and not defined $self->number_of_reads) { |
219
|
0
|
|
|
|
|
0
|
croak "count_loops_by=number_of_reads requires a number_of_reads number\n"; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
## Just to ensure that the lazy attributes are built before &new returns |
223
|
20
|
|
|
|
|
570
|
$self->_piece_table; |
224
|
20
|
|
|
|
|
565
|
$self->_seqid_raffle; |
225
|
20
|
|
|
|
|
550
|
$self->_fasta; |
226
|
20
|
|
|
|
|
635
|
$self->_fasta_blacklist; |
227
|
20
|
|
|
|
|
530
|
$self->_strand; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub _build_strand { |
231
|
20
|
|
|
20
|
|
45
|
my $self = shift; |
232
|
20
|
|
|
|
|
25
|
my $strand_sub; |
233
|
|
|
|
|
|
|
|
234
|
20
|
50
|
|
|
|
620
|
if ($self->strand_bias eq 'plus') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
235
|
0
|
|
|
0
|
|
0
|
$strand_sub = sub {1}; |
|
0
|
|
|
|
|
0
|
|
236
|
|
|
|
|
|
|
} elsif ($self->strand_bias eq 'minus') { |
237
|
0
|
|
|
0
|
|
0
|
$strand_sub = sub {0}; |
|
0
|
|
|
|
|
0
|
|
238
|
|
|
|
|
|
|
} elsif ($self->strand_bias eq 'random') { |
239
|
|
|
|
|
|
|
# Use App::Sandy::RNG |
240
|
20
|
|
|
6840
|
|
190
|
$strand_sub = sub { $_[0]->get_n(2) }; |
|
6840
|
|
|
|
|
39003
|
|
241
|
|
|
|
|
|
|
} else { |
242
|
0
|
|
|
|
|
0
|
croak sprintf "Unknown option '%s' for strand bias\n", |
243
|
|
|
|
|
|
|
$self->strand_bias; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
20
|
|
|
|
|
540
|
return $strand_sub; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
sub _index_fasta { |
250
|
20
|
|
|
20
|
|
45
|
my $self = shift; |
251
|
20
|
|
|
|
|
510
|
my $fasta = $self->fasta_file; |
252
|
|
|
|
|
|
|
|
253
|
20
|
|
|
|
|
120
|
my $fh = $self->with_open_r($fasta); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# indexed_genome = ID => (seq, len) |
256
|
20
|
|
|
|
|
75
|
my %indexed_fasta; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# >ID|PID as in gencode transcripts |
259
|
|
|
|
|
|
|
my %fasta_rtree; |
260
|
20
|
|
|
|
|
0
|
my $id; |
261
|
|
|
|
|
|
|
|
262
|
20
|
|
|
|
|
750
|
while (<$fh>) { |
263
|
860
|
|
|
|
|
1245
|
chomp; |
264
|
860
|
50
|
|
|
|
1510
|
next if /^;/; |
265
|
860
|
100
|
|
|
|
1525
|
if (/^>/) { |
266
|
100
|
|
|
|
|
300
|
my @fields = split /\|/; |
267
|
100
|
|
|
|
|
195
|
$id = $fields[0]; |
268
|
100
|
|
|
|
|
300
|
$id =~ s/^>//; |
269
|
100
|
|
|
|
|
320
|
$id =~ s/^\s+|\s+$//g; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
# Seq ID standardization in order to manage comparations |
272
|
|
|
|
|
|
|
# between chr1, Chr1, CHR1, 1 etc; |
273
|
100
|
|
|
|
|
340
|
my $std_id = $self->with_std_seqid($id); |
274
|
100
|
|
|
|
|
4885
|
$self->_set_seqname( |
275
|
|
|
|
|
|
|
$id => $std_id, |
276
|
|
|
|
|
|
|
$std_id => $id |
277
|
|
|
|
|
|
|
); |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# It is necessary to catch gene -> transcript relation |
280
|
|
|
|
|
|
|
# # TODO: Make a hash tarit for indexed fasta |
281
|
100
|
50
|
|
|
|
565
|
if (defined $fields[1]) { |
282
|
0
|
|
|
|
|
0
|
my $pid = $fields[1]; |
283
|
0
|
|
|
|
|
0
|
$pid =~ s/^\s+|\s+$//g; |
284
|
0
|
|
|
|
|
0
|
$fasta_rtree{$id} = $pid; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
} else { |
287
|
760
|
50
|
|
|
|
1280
|
die "Error reading fasta file '$fasta': Not defined id" |
288
|
|
|
|
|
|
|
unless defined $id; |
289
|
760
|
|
|
|
|
2520
|
$indexed_fasta{$id}{seq} .= uc($_); |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
20
|
|
|
|
|
140
|
for (keys %indexed_fasta) { |
294
|
100
|
|
|
|
|
225
|
$indexed_fasta{$_}{size} = length $indexed_fasta{$_}{seq}; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
20
|
50
|
|
|
|
80
|
unless (%indexed_fasta) { |
298
|
0
|
|
|
|
|
0
|
die "Error parsing '$fasta'. Maybe the file is empty\n"; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
$fh->close |
302
|
20
|
50
|
|
|
|
195
|
or die "Cannot close file $fasta: $!\n"; |
303
|
|
|
|
|
|
|
|
304
|
20
|
50
|
|
|
|
480
|
$self->_set_fasta_rtree(%fasta_rtree) if %fasta_rtree; |
305
|
20
|
|
|
|
|
115
|
return \%indexed_fasta; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub _build_fasta { |
309
|
20
|
|
|
20
|
|
35
|
my $self = shift; |
310
|
20
|
|
|
|
|
570
|
my $fasta = $self->fasta_file; |
311
|
|
|
|
|
|
|
|
312
|
20
|
|
|
|
|
105
|
log_msg ":: Indexing fasta file '$fasta' ..."; |
313
|
20
|
|
|
|
|
70
|
my $indexed_fasta = $self->_index_fasta; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Validate genome about the read size required |
316
|
20
|
|
|
|
|
120
|
log_msg ":: Validating fasta file '$fasta' ..."; |
317
|
|
|
|
|
|
|
# Entries to remove |
318
|
20
|
|
|
|
|
35
|
my @blacklist; |
319
|
|
|
|
|
|
|
|
320
|
20
|
50
|
|
|
|
850
|
unless ($self->truncate) { |
321
|
20
|
|
|
|
|
80
|
for my $id (keys %$indexed_fasta) { |
322
|
100
|
|
|
|
|
170
|
my $index_size = $indexed_fasta->{$id}{size}; |
323
|
100
|
|
|
|
|
2505
|
my $class = ref $self->seq; |
324
|
|
|
|
|
|
|
|
325
|
100
|
100
|
|
|
|
275
|
if ($class eq 'App::Sandy::Seq::SingleEnd') { |
|
|
50
|
|
|
|
|
|
326
|
50
|
|
|
|
|
1220
|
my $read_mean = $self->seq->read_mean; |
327
|
50
|
50
|
|
|
|
155
|
if ($index_size < $read_mean) { |
328
|
0
|
|
|
|
|
0
|
log_msg ":: Parsing fasta file '$fasta': Seqid sequence length (>$id => $index_size) lesser than required read mean ($read_mean)"; |
329
|
0
|
|
|
|
|
0
|
delete $indexed_fasta->{$id}; |
330
|
0
|
|
|
|
|
0
|
push @blacklist => $id; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} elsif ($class eq 'App::Sandy::Seq::PairedEnd') { |
333
|
50
|
|
|
|
|
1415
|
my $fragment_mean = $self->seq->fragment_mean; |
334
|
50
|
50
|
|
|
|
185
|
if ($index_size < $fragment_mean) { |
335
|
0
|
|
|
|
|
0
|
log_msg ":: Parsing fasta file '$fasta': Seqid sequence length (>$id => $index_size) lesser than required fragment mean ($fragment_mean)"; |
336
|
0
|
|
|
|
|
0
|
delete $indexed_fasta->{$id}; |
337
|
0
|
|
|
|
|
0
|
push @blacklist => $id; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
} else { |
340
|
0
|
|
|
|
|
0
|
croak "Unknown option '$class' for sequencing type\n"; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
20
|
50
|
|
|
|
105
|
unless (%$indexed_fasta) { |
346
|
0
|
|
|
|
|
0
|
die sprintf "Fasta file '%s' has no valid entry\n" => $self->fasta_file; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
# If fasta_rtree has entries |
350
|
20
|
50
|
|
|
|
760
|
unless ($self->_has_no_fasta_rtree) { |
351
|
|
|
|
|
|
|
# Remove no valid entries from id -> pid relation |
352
|
0
|
0
|
|
|
|
0
|
$self->_delete_fasta_rtree(@blacklist) if @blacklist; |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
20
|
|
|
|
|
545
|
return $indexed_fasta; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub _populate_fasta_tree { |
359
|
20
|
|
|
20
|
|
35
|
my $self = shift; |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# If fasta_rtree has entries |
362
|
20
|
50
|
|
|
|
735
|
unless ($self->_has_no_fasta_rtree) { |
363
|
|
|
|
|
|
|
# Build parent -> child ids relation |
364
|
0
|
|
|
|
|
0
|
my %fasta_tree; |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# Reverse fasta_rtree to pid -> \@ids |
367
|
0
|
|
|
|
|
0
|
for my $pair ($self->_fasta_rtree_pairs) { |
368
|
0
|
|
|
|
|
0
|
my ($id, $pid) = (@$pair); |
369
|
0
|
|
|
|
|
0
|
push @{ $fasta_tree{$pid} } => $id; |
|
0
|
|
|
|
|
0
|
|
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
0
|
|
|
|
|
0
|
$self->_set_fasta_tree(%fasta_tree); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub _build_fasta_blacklist { |
377
|
20
|
|
|
20
|
|
85
|
my $self = shift; |
378
|
|
|
|
|
|
|
|
379
|
20
|
|
|
|
|
90
|
log_msg ":: Index unidentified (NNN..) regions"; |
380
|
|
|
|
|
|
|
|
381
|
20
|
|
|
|
|
560
|
my $indexed_fasta = $self->_fasta; |
382
|
20
|
|
|
|
|
540
|
my $class = ref $self->seq; |
383
|
|
|
|
|
|
|
|
384
|
20
|
|
|
|
|
70
|
my %fasta_blacklist; |
385
|
|
|
|
|
|
|
my @blacklist_id; |
386
|
|
|
|
|
|
|
|
387
|
20
|
|
|
|
|
75
|
for my $id (keys %$indexed_fasta) { |
388
|
100
|
|
|
|
|
170
|
my $seq = $indexed_fasta->{$id}{seq}; |
389
|
100
|
|
|
|
|
155
|
my $seq_len = $indexed_fasta->{$id}{size}; |
390
|
|
|
|
|
|
|
|
391
|
100
|
|
|
|
|
175
|
my $offset = 0; |
392
|
100
|
|
|
|
|
160
|
my ($st, $en) = (0, 0); |
393
|
100
|
|
|
|
|
135
|
my $pos = 0; |
394
|
100
|
|
|
|
|
140
|
my $init = 0; |
395
|
100
|
|
|
|
|
110
|
my $len = 0; |
396
|
100
|
|
|
|
|
125
|
my @stack; |
397
|
|
|
|
|
|
|
|
398
|
100
|
|
|
|
|
260
|
while (($pos = index $seq, "N", $offset) != -1) { |
399
|
0
|
0
|
|
|
|
0
|
unless ($init) { |
400
|
0
|
|
|
|
|
0
|
$st = $pos; |
401
|
0
|
|
|
|
|
0
|
$en = $pos; |
402
|
0
|
|
|
|
|
0
|
$init = 1; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
0
|
0
|
|
|
|
0
|
if ($en < ($pos - 1)) { |
406
|
0
|
|
|
|
|
0
|
push @stack => [$st, $en]; |
407
|
0
|
|
|
|
|
0
|
$len += $en - $st + 1; |
408
|
0
|
|
|
|
|
0
|
$st = $pos; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
|
411
|
0
|
|
|
|
|
0
|
$en = $pos; |
412
|
0
|
|
|
|
|
0
|
$offset = $pos + 1; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
100
|
50
|
|
|
|
180
|
if ($init) { |
416
|
0
|
|
|
|
|
0
|
push @stack => [$st, $en]; |
417
|
0
|
|
|
|
|
0
|
$len += $en - $st + 1; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
100
|
100
|
|
|
|
2605
|
my $read_len = $class eq 'App::Sandy::Seq::SingleEnd' |
421
|
|
|
|
|
|
|
? $self->seq->read_mean |
422
|
|
|
|
|
|
|
: $self->seq->fragment_mean; |
423
|
|
|
|
|
|
|
|
424
|
100
|
50
|
|
|
|
220
|
if ($len > ($seq_len - $read_len)) { |
425
|
0
|
|
|
|
|
0
|
log_msg ":: Seqid '$id' has too much NNN and will not be used"; |
426
|
0
|
|
|
|
|
0
|
delete $indexed_fasta->{$id}; |
427
|
0
|
|
|
|
|
0
|
push @blacklist_id => $id; |
428
|
|
|
|
|
|
|
} else { |
429
|
100
|
|
|
|
|
350
|
$fasta_blacklist{$id} = \@stack; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
20
|
50
|
|
|
|
120
|
unless (%$indexed_fasta) { |
434
|
0
|
|
|
|
|
0
|
die sprintf "Fasta file '%s' has too much NNN regions\n" => $self->fasta_file; |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
# If fasta_rtree has entries |
438
|
20
|
50
|
|
|
|
750
|
unless ($self->_has_no_fasta_rtree) { |
439
|
|
|
|
|
|
|
# Remove no valid entries from id -> pid relation |
440
|
0
|
0
|
|
|
|
0
|
$self->_delete_fasta_rtree(@blacklist_id) if @blacklist_id; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
20
|
|
|
|
|
770
|
return \%fasta_blacklist; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub _retrieve_expression_matrix { |
447
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
448
|
0
|
|
|
|
|
0
|
my $expression = App::Sandy::DB::Handle::Expression->new; |
449
|
0
|
|
|
|
|
0
|
return $expression->retrievedb($self->expression_matrix); |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
sub _build_seqid_raffle { |
453
|
20
|
|
|
20
|
|
30
|
my $self = shift; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# Get the piece table |
456
|
20
|
|
|
|
|
665
|
my $piece_table = $self->_piece_table; |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
# The builded function |
459
|
20
|
|
|
|
|
40
|
my $seqid_sub; |
460
|
|
|
|
|
|
|
|
461
|
20
|
50
|
|
|
|
545
|
if ($self->seqid_weight eq 'same') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
462
|
0
|
|
|
0
|
|
0
|
my ($keys, $weights) = $self->_populate_key_weight($piece_table, sub { 1 }); |
|
0
|
|
|
|
|
0
|
|
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# If weight == 1 means that there are 2 keys for |
465
|
|
|
|
|
|
|
# the same seq_id. |
466
|
|
|
|
|
|
|
# If weight == 2 means that there is only one key |
467
|
|
|
|
|
|
|
# for the seq_id, so I double that key |
468
|
0
|
|
|
|
|
0
|
for (my $i = 0; $i < @$weights; $i++) { |
469
|
0
|
0
|
|
|
|
0
|
if ($weights->[$i] > 1) { |
470
|
0
|
|
|
|
|
0
|
push @$keys => $keys->[$i]; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
0
|
|
|
|
|
0
|
my $keys_size = scalar @$keys; |
475
|
|
|
|
|
|
|
# Use App::Sandy::RNG |
476
|
0
|
|
|
0
|
|
0
|
$seqid_sub = sub { $keys->[$_[0]->get_n($keys_size)] }; |
|
0
|
|
|
|
|
0
|
|
477
|
|
|
|
|
|
|
} elsif ($self->seqid_weight eq 'count') { |
478
|
|
|
|
|
|
|
# Catch expression-matrix entry from database |
479
|
0
|
|
|
|
|
0
|
my $indexed_file = $self->_retrieve_expression_matrix; |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
# Catch indexed fasta |
482
|
0
|
|
|
|
|
0
|
my $indexed_fasta = $self->_fasta; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
# Validate expression_matrix |
485
|
0
|
|
|
|
|
0
|
for my $id (keys %$indexed_file) { |
486
|
|
|
|
|
|
|
# If not exists into indexed_fasta, it must then exist into fasta_tree |
487
|
0
|
0
|
0
|
|
|
0
|
unless (exists $piece_table->{$id} || $self->_exists_fasta_tree($id)) { |
488
|
0
|
|
|
|
|
0
|
log_msg sprintf ":: Ignoring seqid '%s' from expression-matrix '%s': It is not found into the indexed fasta" |
489
|
|
|
|
|
|
|
=> $id, $self->expression_matrix; |
490
|
0
|
|
|
|
|
0
|
delete $indexed_file->{$id}; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
|
494
|
0
|
0
|
|
|
|
0
|
unless (%$indexed_file) { |
495
|
0
|
|
|
|
|
0
|
die sprintf "No valid seqid entry of the expression-matrix '%s' is recorded into the indexed fasta\n" |
496
|
|
|
|
|
|
|
=> $self->expression_matrix; |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
0
|
|
|
|
|
0
|
my (%ptable_ind, %ptable_cluster); |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
# Split indexed_file seq_ids between those |
502
|
|
|
|
|
|
|
# into piece_table and those that represents a cluster |
503
|
|
|
|
|
|
|
# of seq_ids as in gene -> transcript relationship |
504
|
0
|
|
|
|
|
0
|
for my $seq_id (keys %$indexed_file) { |
505
|
0
|
0
|
|
|
|
0
|
if (exists $piece_table->{$seq_id}) { |
506
|
0
|
|
|
|
|
0
|
$ptable_ind{$seq_id} = $piece_table->{$seq_id}; |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
} else { |
509
|
0
|
|
|
|
|
0
|
my $ids = $self->_get_fasta_tree($seq_id); |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
# Bug catcher |
512
|
0
|
0
|
|
|
|
0
|
unless (@$ids) { |
513
|
0
|
|
|
|
|
0
|
croak "seq_id '$seq_id' not found into piece_table"; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
0
|
|
|
|
|
0
|
$ptable_cluster{$seq_id} = $ids; |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# Let's calculate the weight taking in acount |
521
|
|
|
|
|
|
|
# the size increase/decrease |
522
|
|
|
|
|
|
|
my $calc_ind_weight = sub { |
523
|
0
|
|
|
0
|
|
0
|
my ($seq_id, $type) = @_; |
524
|
|
|
|
|
|
|
|
525
|
0
|
|
|
|
|
0
|
my $counts = $indexed_file->{$seq_id}; |
526
|
0
|
|
|
|
|
0
|
my $size = $piece_table->{$seq_id}{$type}{size}; |
527
|
0
|
|
|
|
|
0
|
my $fasta_size = $indexed_fasta->{$seq_id}{size}; |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# Correct the weight according to the |
530
|
|
|
|
|
|
|
# genomic variation change by the ratio |
531
|
|
|
|
|
|
|
# between the table size and fasta size |
532
|
0
|
|
|
|
|
0
|
my $factor = $size / $fasta_size; |
533
|
|
|
|
|
|
|
|
534
|
0
|
|
|
|
|
0
|
return $counts * $factor; |
535
|
0
|
|
|
|
|
0
|
}; |
536
|
|
|
|
|
|
|
|
537
|
0
|
|
|
|
|
0
|
my ($keys, $weights); |
538
|
|
|
|
|
|
|
|
539
|
0
|
0
|
|
|
|
0
|
if (%ptable_ind) { |
540
|
0
|
|
|
|
|
0
|
($keys, $weights) = $self->_populate_key_weight(\%ptable_ind, |
541
|
|
|
|
|
|
|
$calc_ind_weight); |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# If there are seq_id cluster like, then its is |
545
|
|
|
|
|
|
|
# time to calculate these weights |
546
|
0
|
|
|
|
|
0
|
for my $seq_id (sort keys %ptable_cluster) { |
547
|
0
|
|
|
|
|
0
|
my %ptable; |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# Slice piece_table hash |
550
|
0
|
|
|
|
|
0
|
my $ids = $ptable_cluster{$seq_id}; |
551
|
0
|
|
|
|
|
0
|
@ptable{@$ids} = @$piece_table{@$ids}; |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
# total size among all ids of cluster |
554
|
0
|
|
|
|
|
0
|
my %total; |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# Calculate the total size by type |
557
|
0
|
|
|
|
|
0
|
for my $type_h (values %ptable) { |
558
|
0
|
|
|
|
|
0
|
for my $type (keys %$type_h) { |
559
|
0
|
|
|
|
|
0
|
$total{$type} += $type_h->{$type}{size}; |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# Calculate the weight taking in acount the size increase/decrease |
564
|
|
|
|
|
|
|
# and the ratio between the total size by type and the table size. |
565
|
|
|
|
|
|
|
# The problem here is that I must divide the 'counts' for some 'seq_id' |
566
|
|
|
|
|
|
|
# among all ids that belong to it |
567
|
|
|
|
|
|
|
my $calc_cluster_weight = sub { |
568
|
0
|
|
|
0
|
|
0
|
my ($id, $type) = @_; |
569
|
|
|
|
|
|
|
|
570
|
0
|
|
|
|
|
0
|
my $counts = $indexed_file->{$seq_id}; |
571
|
0
|
|
|
|
|
0
|
my $size = $piece_table->{$id}{$type}{size}; |
572
|
0
|
|
|
|
|
0
|
my $fasta_size = $indexed_fasta->{$id}{size}; |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# Divide the counts among all ids |
575
|
0
|
|
|
|
|
0
|
my $ratio = $size / $total{$type}; |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
# Correct the weight according to the size |
578
|
0
|
|
|
|
|
0
|
my $factor = $size / $fasta_size; |
579
|
|
|
|
|
|
|
|
580
|
0
|
|
|
|
|
0
|
return $counts * $factor * $ratio; |
581
|
0
|
|
|
|
|
0
|
}; |
582
|
|
|
|
|
|
|
|
583
|
0
|
|
|
|
|
0
|
my ($k, $w) = $self->_populate_key_weight(\%ptable, |
584
|
|
|
|
|
|
|
$calc_cluster_weight); |
585
|
|
|
|
|
|
|
|
586
|
0
|
|
|
|
|
0
|
push @$keys => @$k; |
587
|
0
|
|
|
|
|
0
|
push @$weights => @$w; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
|
590
|
0
|
0
|
0
|
|
|
0
|
unless (@$keys && @$weights) { |
591
|
0
|
|
|
|
|
0
|
croak "No keys weights have been set"; |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
# It is very necessary in order |
595
|
|
|
|
|
|
|
# to avoid truncation of numbers |
596
|
|
|
|
|
|
|
# between zero and one |
597
|
0
|
|
|
|
|
0
|
$self->_round_weight($weights); |
598
|
|
|
|
|
|
|
|
599
|
0
|
|
|
|
|
0
|
my $raffler = App::Sandy::WeightedRaffle->new( |
600
|
|
|
|
|
|
|
'weights' => $weights, |
601
|
|
|
|
|
|
|
'keys' => $keys |
602
|
|
|
|
|
|
|
); |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
# Use App::Sandy::Rand |
605
|
0
|
|
|
0
|
|
0
|
$seqid_sub = sub { $raffler->weighted_raffle($_[0]) }; |
|
0
|
|
|
|
|
0
|
|
606
|
|
|
|
|
|
|
} elsif ($self->seqid_weight eq 'length') { |
607
|
|
|
|
|
|
|
my $calc_weight = sub { |
608
|
100
|
|
|
100
|
|
170
|
my ($seq_id, $type) = @_; |
609
|
100
|
|
|
|
|
185
|
return $piece_table->{$seq_id}{$type}{size}; |
610
|
20
|
|
|
|
|
175
|
}; |
611
|
|
|
|
|
|
|
|
612
|
20
|
|
|
|
|
85
|
my ($keys, $weights) = $self->_populate_key_weight($piece_table, |
613
|
|
|
|
|
|
|
$calc_weight); |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# Just in case ... |
616
|
20
|
|
|
|
|
140
|
$self->_round_weight($weights); |
617
|
|
|
|
|
|
|
|
618
|
20
|
|
|
|
|
730
|
my $raffler = App::Sandy::WeightedRaffle->new( |
619
|
|
|
|
|
|
|
weights => $weights, |
620
|
|
|
|
|
|
|
keys => $keys |
621
|
|
|
|
|
|
|
); |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
# Use App::Sandy::Rand |
624
|
20
|
|
|
6840
|
|
200
|
$seqid_sub = sub { $raffler->weighted_raffle($_[0]) }; |
|
6840
|
|
|
|
|
23727
|
|
625
|
|
|
|
|
|
|
} else { |
626
|
0
|
|
|
|
|
0
|
croak sprintf "Unknown option '%s' for seqid_weight\n", |
627
|
|
|
|
|
|
|
$self->seqid_weight; |
628
|
|
|
|
|
|
|
} |
629
|
|
|
|
|
|
|
|
630
|
20
|
|
|
|
|
585
|
return $seqid_sub; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
sub _round_weight { |
634
|
20
|
|
|
20
|
|
60
|
my ($self, $weights) = @_; |
635
|
|
|
|
|
|
|
|
636
|
20
|
|
|
|
|
75
|
my $min = min @$weights; |
637
|
|
|
|
|
|
|
|
638
|
20
|
50
|
|
|
|
55
|
if ($min <= 0) { |
639
|
0
|
|
|
|
|
0
|
croak "min weight le to zero: $min"; |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
|
642
|
20
|
50
|
|
|
|
95
|
my $factor = $min < 1 |
643
|
|
|
|
|
|
|
? (1 / $min) |
644
|
|
|
|
|
|
|
: 1; |
645
|
|
|
|
|
|
|
|
646
|
20
|
|
|
|
|
55
|
for my $weight (@$weights) { |
647
|
100
|
|
|
|
|
195
|
$weight = int($weight * $factor + 0.5); |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
sub _populate_key_weight { |
652
|
20
|
|
|
20
|
|
65
|
my ($self, $piece_table, $calc_weight) = @_; |
653
|
|
|
|
|
|
|
|
654
|
20
|
|
|
|
|
25
|
my (@keys, @weights); |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# It needs to be sorted in order to the |
657
|
|
|
|
|
|
|
# seed works |
658
|
20
|
|
|
|
|
170
|
for my $seq_id (sort keys %$piece_table) { |
659
|
100
|
|
|
|
|
165
|
my $type_h = $piece_table->{$seq_id}; |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
# If there is no alternative seq_id, then |
662
|
|
|
|
|
|
|
# set a factor to correct the size. |
663
|
|
|
|
|
|
|
# It is necessary because the seq_ids with |
664
|
|
|
|
|
|
|
# alternative and reference will double its |
665
|
|
|
|
|
|
|
# own coverage |
666
|
100
|
50
|
|
|
|
190
|
my $factor = scalar keys %$type_h == 1 |
667
|
|
|
|
|
|
|
? 2 |
668
|
|
|
|
|
|
|
: 1; |
669
|
|
|
|
|
|
|
|
670
|
100
|
|
|
|
|
195
|
for my $type (sort keys %$type_h) { |
671
|
|
|
|
|
|
|
|
672
|
100
|
|
|
|
|
245
|
my %key = ( |
673
|
|
|
|
|
|
|
'seq_id' => $seq_id, |
674
|
|
|
|
|
|
|
'type' => $type |
675
|
|
|
|
|
|
|
); |
676
|
|
|
|
|
|
|
|
677
|
100
|
|
|
|
|
215
|
my $weight = $calc_weight->($seq_id, $type); |
678
|
|
|
|
|
|
|
|
679
|
100
|
|
|
|
|
185
|
push @keys => \%key; |
680
|
100
|
|
|
|
|
220
|
push @weights => $weight * $factor; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
|
684
|
20
|
|
|
|
|
75
|
return (\@keys, \@weights); |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
sub _build_genomic_variation_names { |
688
|
20
|
|
|
20
|
|
40
|
my $self = shift; |
689
|
20
|
50
|
|
|
|
555
|
if ($self->genomic_variation) { |
690
|
0
|
|
|
|
|
0
|
return sprintf "[%s]", => join ", ", @{ $self->genomic_variation }; |
|
0
|
|
|
|
|
0
|
|
691
|
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
sub _retrieve_genomic_variation { |
695
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
696
|
0
|
|
|
|
|
0
|
my $variation = App::Sandy::DB::Handle::Variation->new; |
697
|
0
|
|
|
|
|
0
|
return $variation->retrievedb($self->genomic_variation); |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub _build_piece_table { |
701
|
20
|
|
|
20
|
|
40
|
my $self = shift; |
702
|
|
|
|
|
|
|
|
703
|
20
|
|
|
|
|
645
|
my $genomic_variation = $self->_genomic_variation_names; |
704
|
20
|
|
|
|
|
35
|
my $indexed_snv; |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
# Retrieve genomic variation if the user provided it |
707
|
20
|
50
|
|
|
|
55
|
if (defined $genomic_variation) { |
708
|
0
|
|
|
|
|
0
|
$indexed_snv = $self->_retrieve_genomic_variation; |
709
|
0
|
|
|
|
|
0
|
log_msg ":: Validate genomic variation '$genomic_variation' against indexed fasta ..."; |
710
|
0
|
|
|
|
|
0
|
$self->_validate_indexed_snv_against_fasta($indexed_snv); |
711
|
|
|
|
|
|
|
} |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# Catch index fasta |
714
|
20
|
|
|
|
|
625
|
my $indexed_fasta = $self->_fasta; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# Build piece table |
717
|
20
|
|
|
|
|
35
|
my %piece_table; |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
# Let's construct the piece_table |
720
|
20
|
|
|
|
|
60
|
log_msg ":: Build piece table ..."; |
721
|
|
|
|
|
|
|
|
722
|
20
|
|
|
|
|
120
|
while (my ($seq_id, $fasta_h) = each %$indexed_fasta) { |
723
|
100
|
|
|
|
|
225
|
my $seq = \$fasta_h->{seq}; |
724
|
100
|
|
|
|
|
3675
|
my $std_seq_id = $self->_get_seqname($seq_id); |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# Initialize piece tables for $seq_id ref |
727
|
100
|
|
|
|
|
2680
|
$piece_table{$seq_id}{ref}{table} = App::Sandy::PieceTable->new(orig => $seq); |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
# If there is indexed_snv for seq_id, then construct the piece table with it |
730
|
100
|
50
|
33
|
|
|
445
|
if (defined $indexed_snv && defined $indexed_snv->{$std_seq_id}) { |
731
|
0
|
|
|
|
|
0
|
my $snvs = $indexed_snv->{$std_seq_id}; |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
# Filter only the homozygotic snvs to feed reference seq_id |
734
|
0
|
|
|
|
|
0
|
my @snvs_homo = grep { $_->{plo} eq 'HO' } @$snvs; |
|
0
|
|
|
|
|
0
|
|
735
|
|
|
|
|
|
|
|
736
|
0
|
0
|
|
|
|
0
|
if (@snvs_homo) { |
737
|
|
|
|
|
|
|
# Populate reference seq_id |
738
|
0
|
|
|
|
|
0
|
$self->_populate_piece_table($piece_table{$seq_id}{ref}{table}, \@snvs_homo); |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
# Initialize piece tables for $seq_id alt |
742
|
0
|
|
|
|
|
0
|
$piece_table{$seq_id}{alt}{table} = App::Sandy::PieceTable->new(orig => $seq); |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
# Populate alternative seq_id |
745
|
0
|
|
|
|
|
0
|
$self->_populate_piece_table($piece_table{$seq_id}{alt}{table}, $snvs); |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
# Initialize the logical offsets and valodate the |
750
|
|
|
|
|
|
|
# new size due to the genomic variation |
751
|
|
|
|
|
|
|
|
752
|
20
|
|
|
|
|
55
|
my @blacklist; |
753
|
|
|
|
|
|
|
|
754
|
20
|
|
|
|
|
65
|
for my $seq_id (keys %piece_table) { |
755
|
100
|
|
|
|
|
185
|
my $type_h = delete $piece_table{$seq_id}; |
756
|
|
|
|
|
|
|
|
757
|
100
|
|
|
|
|
355
|
for my $type (keys %$type_h) { |
758
|
100
|
|
|
|
|
190
|
my $table_h = delete $type_h->{$type}; |
759
|
100
|
|
|
|
|
135
|
my $table = $table_h->{table}; |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
# Initialize the logical offset |
762
|
100
|
|
|
|
|
345
|
$table->calculate_logical_offset; |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
# Get the new size |
765
|
100
|
|
|
|
|
2800
|
my $new_size = $table->logical_len; |
766
|
|
|
|
|
|
|
|
767
|
100
|
50
|
|
|
|
2620
|
unless ($self->truncate) { |
768
|
100
|
|
|
|
|
2570
|
my $class = ref $self->seq; |
769
|
|
|
|
|
|
|
|
770
|
100
|
100
|
|
|
|
365
|
if ($class eq 'App::Sandy::Seq::SingleEnd') { |
|
|
50
|
|
|
|
|
|
771
|
50
|
50
|
|
|
|
1215
|
if ($new_size < $self->seq->read_mean) { |
772
|
0
|
|
|
|
|
0
|
log_msg ":: Skip '$seq_id:$type': So many deletions resulted in a sequence lesser than the required read-mean"; |
773
|
0
|
|
|
|
|
0
|
next; |
774
|
|
|
|
|
|
|
} |
775
|
|
|
|
|
|
|
} elsif ($class eq 'App::Sandy::Seq::PairedEnd') { |
776
|
50
|
50
|
|
|
|
1220
|
if ($new_size < $self->seq->fragment_mean) { |
777
|
0
|
|
|
|
|
0
|
log_msg ":: Skip '$seq_id:$type': So many deletions resulted in a sequence lesser than the required fragment mean"; |
778
|
0
|
|
|
|
|
0
|
next; |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
} else { |
781
|
0
|
|
|
|
|
0
|
die "No valid options for 'seq'"; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
# If all's right |
786
|
100
|
|
|
|
|
275
|
$table_h->{size} = $new_size; |
787
|
100
|
|
|
|
|
250
|
$type_h->{$type} = $table_h; |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
# if there is at least one type, |
791
|
|
|
|
|
|
|
# then return it to the piece_table |
792
|
100
|
50
|
|
|
|
200
|
if (%$type_h) { |
793
|
100
|
|
|
|
|
215
|
$piece_table{$seq_id} = $type_h; |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# else, just remove it! |
796
|
|
|
|
|
|
|
} else { |
797
|
0
|
|
|
|
|
0
|
push @blacklist => $seq_id; |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
|
801
|
20
|
50
|
|
|
|
80
|
unless (%piece_table) { |
802
|
0
|
|
|
|
|
0
|
die "All fasta entries were removed due to deletions. ", |
803
|
|
|
|
|
|
|
"Please, verify the genomic variation '$genomic_variation'\n"; |
804
|
|
|
|
|
|
|
} |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
# If fasta_rtree has entries |
807
|
20
|
50
|
|
|
|
740
|
unless ($self->_has_no_fasta_rtree) { |
808
|
|
|
|
|
|
|
# Remove no valid entries from id -> pid relation |
809
|
0
|
0
|
|
|
|
0
|
$self->_delete_fasta_rtree(@blacklist) if @blacklist; |
810
|
|
|
|
|
|
|
} |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
# Make the id -> pid relationship |
813
|
20
|
|
|
|
|
75
|
$self->_populate_fasta_tree; |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
# HASH -> SEQ_ID -> @(REF @ALT) -> @(TABLE SIZE) |
816
|
20
|
|
|
|
|
540
|
return \%piece_table; |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
sub _populate_piece_table { |
820
|
0
|
|
|
0
|
|
0
|
my ($self, $table, $snvs) = @_; |
821
|
|
|
|
|
|
|
|
822
|
0
|
|
|
|
|
0
|
for my $snv (@$snvs) { |
823
|
|
|
|
|
|
|
# If there is an ID, make sure that it is not a comma, colon |
824
|
|
|
|
|
|
|
# separated list. Else, make sure to keep the ref/alt length |
825
|
|
|
|
|
|
|
# to max 25+25+1=51 |
826
|
|
|
|
|
|
|
my $annot = defined $snv->{id} && $snv->{id} ne '.' |
827
|
|
|
|
|
|
|
? sprintf "%d:%s" => $snv->{pos} + 1, (split(/[,;]/, $snv->{id}))[0] |
828
|
0
|
0
|
0
|
|
|
0
|
: sprintf "%d:%.25s/%.25s" => $snv->{pos} + 1, $snv->{ref}, $snv->{alt}; |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
# Insertion |
831
|
0
|
0
|
|
|
|
0
|
if ($snv->{ref} eq '-') { |
|
|
0
|
|
|
|
|
|
832
|
0
|
|
|
|
|
0
|
$table->insert(\$snv->{alt}, $snv->{pos}, $annot); |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
# Deletion |
835
|
|
|
|
|
|
|
} elsif ($snv->{alt} eq '-') { |
836
|
0
|
|
|
|
|
0
|
$table->delete($snv->{pos}, length $snv->{ref}, $annot); |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
# Change |
839
|
|
|
|
|
|
|
} else { |
840
|
0
|
|
|
|
|
0
|
$table->change(\$snv->{alt}, $snv->{pos}, length $snv->{ref}, $annot); |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
sub _validate_indexed_snv_against_fasta { |
846
|
0
|
|
|
0
|
|
0
|
my ($self, $indexed_snv) = @_; |
847
|
|
|
|
|
|
|
|
848
|
0
|
|
|
|
|
0
|
my $indexed_fasta = $self->_fasta; |
849
|
0
|
|
|
|
|
0
|
my $genomic_variation = $self->_genomic_variation_names; |
850
|
|
|
|
|
|
|
|
851
|
0
|
|
|
|
|
0
|
for my $std_seq_id (keys %$indexed_snv) { |
852
|
0
|
|
|
|
|
0
|
my $snvs = delete $indexed_snv->{$std_seq_id}; |
853
|
0
|
|
|
|
|
0
|
my $seq_id = $self->_get_seqname($std_seq_id); |
854
|
|
|
|
|
|
|
|
855
|
0
|
0
|
0
|
|
|
0
|
unless (defined $seq_id && exists $indexed_fasta->{$seq_id}) { |
856
|
0
|
|
|
|
|
0
|
next; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
0
|
|
|
|
|
0
|
my $seq = \$indexed_fasta->{$seq_id}{seq}; |
860
|
0
|
|
|
|
|
0
|
my $size = $indexed_fasta->{$seq_id}{size}; |
861
|
|
|
|
|
|
|
|
862
|
0
|
|
|
|
|
0
|
my @saved_snvs; |
863
|
|
|
|
|
|
|
|
864
|
0
|
|
|
|
|
0
|
for my $snv (@$snvs) { |
865
|
|
|
|
|
|
|
# Insertions may accur until one base after the |
866
|
|
|
|
|
|
|
# end of the sequence, not more |
867
|
0
|
0
|
0
|
|
|
0
|
if (($snv->{ref} eq '-' && $snv->{pos} > $size) || ($snv->{ref} ne '-' && $snv->{pos} >= $size)) { |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
868
|
|
|
|
|
|
|
log_msg sprintf ":: In validating '%s': Position, %s/%s at %s:%d, outside fasta sequence", |
869
|
0
|
|
|
|
|
0
|
$genomic_variation, $snv->{ref}, $snv->{alt}, $seq_id, $snv->{pos} + 1; |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
# Next snv |
872
|
0
|
|
|
|
|
0
|
next; |
873
|
|
|
|
|
|
|
# Deletions and changes. Just verify if the reference exists |
874
|
|
|
|
|
|
|
} elsif ($snv->{ref} ne '-') { |
875
|
0
|
|
|
|
|
0
|
my $ref = substr $$seq, $snv->{pos}, length($snv->{ref}); |
876
|
|
|
|
|
|
|
|
877
|
0
|
0
|
|
|
|
0
|
if (uc($ref) ne uc($snv->{ref})) { |
878
|
|
|
|
|
|
|
log_msg sprintf ":: In validating '%s': Not found reference '%s' at fasta position %s:%d", |
879
|
0
|
|
|
|
|
0
|
$genomic_variation, $snv->{ref}, $seq_id, $snv->{pos} + 1; |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
# Next snv |
882
|
0
|
|
|
|
|
0
|
next; |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
|
886
|
0
|
|
|
|
|
0
|
push @saved_snvs => $snv; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
0
|
0
|
|
|
|
0
|
if (@saved_snvs) { |
890
|
0
|
|
|
|
|
0
|
$indexed_snv->{$std_seq_id} = [@saved_snvs]; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
} |
893
|
|
|
|
|
|
|
} |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
sub _calculate_number_of_reads { |
896
|
8
|
|
|
8
|
|
33
|
my $self = shift; |
897
|
8
|
|
|
|
|
19
|
my $number_of_reads; |
898
|
|
|
|
|
|
|
|
899
|
8
|
50
|
|
|
|
254
|
if ($self->count_loops_by eq 'coverage') { |
|
|
0
|
|
|
|
|
|
900
|
|
|
|
|
|
|
# It is needed to calculate the genome size |
901
|
8
|
|
|
|
|
275
|
my $fasta = $self->_fasta; |
902
|
8
|
|
|
|
|
27
|
my $fasta_size = 0; |
903
|
8
|
|
|
|
|
16
|
$fasta_size += $fasta->{$_}{size} for keys %{ $fasta }; |
|
8
|
|
|
|
|
73
|
|
904
|
8
|
|
|
|
|
228
|
$number_of_reads = int(($fasta_size * $self->coverage) / $self->seq->read_mean); |
905
|
|
|
|
|
|
|
# In case it is paired-end read, divide the number of reads by 2 because |
906
|
|
|
|
|
|
|
# App::Sandy::Seq::PairedEnd class returns 2 reads at time |
907
|
8
|
100
|
|
|
|
208
|
$number_of_reads = int($number_of_reads / 2) |
908
|
|
|
|
|
|
|
if ref($self->seq) eq 'App::Sandy::Seq::PairedEnd'; |
909
|
|
|
|
|
|
|
} elsif ($self->count_loops_by eq 'number-of-reads') { |
910
|
0
|
|
|
|
|
0
|
$number_of_reads = $self->number_of_reads; |
911
|
|
|
|
|
|
|
} else { |
912
|
0
|
|
|
|
|
0
|
croak sprintf "Unknown option '%s' for calculating the number of reads\n", |
913
|
|
|
|
|
|
|
$self->count_loops_by; |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
# Maybe the number_of_reads is zero. It may occur due to the low coverage and/or fasta_file size |
917
|
8
|
50
|
|
|
|
40
|
if ($number_of_reads <= 0) { |
918
|
0
|
|
|
|
|
0
|
die "The computed number of reads is equal to zero.\n" . |
919
|
|
|
|
|
|
|
"It may occur due to the low coverage, fasta-file sequence size or number of reads directly passed by the user\n"; |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
|
922
|
8
|
|
|
|
|
24
|
return $number_of_reads; |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
sub _calculate_parent_count { |
926
|
0
|
|
|
0
|
|
0
|
my ($self, $counter_ref) = @_; |
927
|
0
|
0
|
|
|
|
0
|
return if $self->_has_no_fasta_rtree; |
928
|
|
|
|
|
|
|
|
929
|
0
|
|
|
|
|
0
|
my %parent_count; |
930
|
|
|
|
|
|
|
|
931
|
0
|
|
|
|
|
0
|
while (my ($id, $count) = each %$counter_ref) { |
932
|
0
|
|
|
|
|
0
|
my $pid = $self->_get_fasta_rtree($id); |
933
|
0
|
0
|
|
|
|
0
|
$parent_count{$pid} += $count if defined $pid; |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
|
936
|
0
|
|
|
|
|
0
|
return \%parent_count; |
937
|
|
|
|
|
|
|
} |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
sub run_simulation { |
940
|
8
|
|
|
8
|
0
|
4084
|
my $self = shift; |
941
|
8
|
|
|
|
|
330
|
my $piece_table = $self->_piece_table; |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
# Calculate the number of reads to be generated |
944
|
8
|
|
|
|
|
48
|
my $number_of_reads = $self->_calculate_number_of_reads; |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
# Function that returns strand by strand_bias |
947
|
8
|
|
|
|
|
255
|
my $strand = $self->_strand; |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
# Function that returns seqid by seqid_weight |
950
|
8
|
|
|
|
|
264
|
my $seqid = $self->_seqid_raffle; |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
# genome or transcriptome? |
953
|
8
|
|
|
|
|
238
|
my $simulation = $self->argv->[0]; |
954
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
# Count file to be generated |
956
|
8
|
|
|
|
|
263
|
my %count_file = ( |
957
|
|
|
|
|
|
|
transcripts => $self->prefix . '_abundance_transcripts.tsv', |
958
|
|
|
|
|
|
|
genes => $self->prefix . '_abundance_genes.tsv', |
959
|
|
|
|
|
|
|
coverage => $self->prefix . '_coverage.tsv' |
960
|
|
|
|
|
|
|
); |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
# Main files |
963
|
8
|
|
|
|
|
223
|
my %files = ( |
964
|
|
|
|
|
|
|
bam => [ |
965
|
|
|
|
|
|
|
$self->prefix . '.bam' |
966
|
|
|
|
|
|
|
], |
967
|
|
|
|
|
|
|
sam => [ |
968
|
|
|
|
|
|
|
$self->prefix . '.sam' |
969
|
|
|
|
|
|
|
], |
970
|
|
|
|
|
|
|
single_fastq => [ |
971
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq' |
972
|
|
|
|
|
|
|
], |
973
|
|
|
|
|
|
|
single_fastq_gz => [ |
974
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq.gz' |
975
|
|
|
|
|
|
|
], |
976
|
|
|
|
|
|
|
join_paired_fastq => [ |
977
|
|
|
|
|
|
|
$self->prefix . '.fastq' |
978
|
|
|
|
|
|
|
], |
979
|
|
|
|
|
|
|
join_paired_fastq_gz => [ |
980
|
|
|
|
|
|
|
$self->prefix . '.fastq.gz' |
981
|
|
|
|
|
|
|
], |
982
|
|
|
|
|
|
|
paired_fastq => [ |
983
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq', |
984
|
|
|
|
|
|
|
$self->prefix . '_R2_001.fastq' |
985
|
|
|
|
|
|
|
], |
986
|
|
|
|
|
|
|
paired_fastq_gz => [ |
987
|
|
|
|
|
|
|
$self->prefix . '_R1_001.fastq.gz', |
988
|
|
|
|
|
|
|
$self->prefix . '_R2_001.fastq.gz' |
989
|
|
|
|
|
|
|
] |
990
|
|
|
|
|
|
|
); |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
# Set the file class in order to know |
993
|
|
|
|
|
|
|
# how to deal with all files options |
994
|
8
|
|
|
|
|
248
|
my $seq_class = ref $self->seq; |
995
|
8
|
|
|
|
|
259
|
my $output_format = $self->output_format; |
996
|
8
|
|
|
|
|
19
|
my $file_class; |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
# This mess is necessary to catch the |
999
|
|
|
|
|
|
|
# right value into the %files hash |
1000
|
8
|
50
|
|
|
|
107
|
if ($output_format =~ /(sam|bam)/) { |
|
|
50
|
|
|
|
|
|
1001
|
0
|
|
|
|
|
0
|
$file_class = $output_format; |
1002
|
|
|
|
|
|
|
} elsif ($output_format =~ /fastq/) { |
1003
|
8
|
100
|
|
|
|
47
|
if ($seq_class eq 'App::Sandy::Seq::SingleEnd') { |
|
|
50
|
|
|
|
|
|
1004
|
5
|
|
|
|
|
15
|
$file_class = 'single_fastq'; |
1005
|
|
|
|
|
|
|
} elsif ($seq_class eq 'App::Sandy::Seq::PairedEnd') { |
1006
|
3
|
|
|
|
|
9
|
$file_class = 'paired_fastq'; |
1007
|
3
|
50
|
|
|
|
126
|
$file_class = "join_$file_class" if $self->join_paired_ends; |
1008
|
|
|
|
|
|
|
} else { |
1009
|
0
|
|
|
|
|
0
|
croak "Something wrong with the seq class: $seq_class"; |
1010
|
|
|
|
|
|
|
} |
1011
|
8
|
50
|
|
|
|
75
|
if ($output_format eq 'fastq.gz') { |
1012
|
0
|
|
|
|
|
0
|
$file_class .= '_gz'; |
1013
|
|
|
|
|
|
|
} |
1014
|
|
|
|
|
|
|
} else { |
1015
|
0
|
|
|
|
|
0
|
croak "Something wrong with the output format: $output_format"; |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
# Forks |
1019
|
8
|
|
|
|
|
227
|
my $number_of_jobs = $self->jobs; |
1020
|
8
|
|
|
|
|
160
|
my $pm = Parallel::ForkManager->new($number_of_jobs); |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
# Parent child pids |
1023
|
8
|
|
|
|
|
27804
|
my $parent_pid = $$; |
1024
|
8
|
|
|
|
|
27
|
my @child_pid; |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
# Temporary files tracker |
1027
|
|
|
|
|
|
|
my @tmp_files; |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
# Run in parent right after creating child process |
1030
|
|
|
|
|
|
|
$pm->run_on_start( |
1031
|
|
|
|
|
|
|
sub { |
1032
|
10
|
|
|
10
|
|
24930
|
my $pid = shift; |
1033
|
10
|
|
|
|
|
357
|
push @child_pid => $pid; |
1034
|
|
|
|
|
|
|
} |
1035
|
8
|
|
|
|
|
118
|
); |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
# Count the overall cumulative number of reads for each seqid |
1038
|
8
|
|
|
|
|
106
|
my %counters; |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
# Run in parent right after finishing child process |
1041
|
|
|
|
|
|
|
$pm->run_on_finish( |
1042
|
|
|
|
|
|
|
sub { |
1043
|
8
|
|
|
8
|
|
43058695
|
my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $counter_ref) = @_; |
1044
|
8
|
|
|
|
|
167
|
while (my ($seqid, $count) = each %$counter_ref) { |
1045
|
40
|
|
|
|
|
254
|
$counters{$seqid} += $count; |
1046
|
|
|
|
|
|
|
} |
1047
|
|
|
|
|
|
|
} |
1048
|
8
|
|
|
|
|
67
|
); |
1049
|
|
|
|
|
|
|
|
1050
|
8
|
50
|
|
|
|
154
|
log_msg sprintf ":: Creating %d child %s ...", |
1051
|
|
|
|
|
|
|
$number_of_jobs, $number_of_jobs == 1 ? "job" : "jobs"; |
1052
|
|
|
|
|
|
|
|
1053
|
8
|
|
|
|
|
47
|
for my $tid (1..$number_of_jobs) { |
1054
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1055
|
|
|
|
|
|
|
# Inside parent |
1056
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1057
|
14
|
|
|
|
|
694
|
log_msg ":: Creating job $tid ..."; |
1058
|
14
|
|
|
|
|
46
|
my @files_t = map { "$_.${parent_pid}.part$tid" } @{ $files{$file_class} }; |
|
19
|
|
|
|
|
175
|
|
|
14
|
|
|
|
|
227
|
|
1059
|
14
|
|
|
|
|
97
|
push @tmp_files => @files_t; |
1060
|
14
|
100
|
|
|
|
63
|
my $pid = $pm->start and next; |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1063
|
|
|
|
|
|
|
# Inside child |
1064
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
1065
|
|
|
|
|
|
|
# Intelace child/parent processes |
1066
|
4
|
|
|
|
|
17886
|
my $sig = App::Sandy::InterlaceProcesses->new(foreign_pid => [$parent_pid]); |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
# Get the FASTA blacklist regions ID => @{ TUPLE } |
1069
|
4
|
|
|
|
|
415
|
my $fasta_blacklist = $self->_fasta_blacklist; |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
# Set child RNG |
1072
|
4
|
|
|
|
|
298
|
my $rng = App::Sandy::RNG->new($self->seed + $tid); |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
# Calculate the number of reads to this job and correct this local index |
1075
|
|
|
|
|
|
|
# to the global index |
1076
|
4
|
|
|
|
|
96
|
my $number_of_reads_t = int($number_of_reads/$number_of_jobs); |
1077
|
4
|
|
|
|
|
73
|
my $last_read_idx = $number_of_reads_t * $tid; |
1078
|
4
|
|
|
|
|
80
|
my $idx = $last_read_idx - $number_of_reads_t + 1; |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
# If it is the last job, make it work on the leftover reads of int() truncation |
1081
|
4
|
100
|
|
|
|
63
|
$last_read_idx += $number_of_reads % $number_of_jobs |
1082
|
|
|
|
|
|
|
if $tid == $number_of_jobs; |
1083
|
|
|
|
|
|
|
|
1084
|
4
|
|
|
|
|
157
|
log_msg " => Job $tid: Working on sequences from $idx to $last_read_idx"; |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
# Create temporary files |
1087
|
4
|
|
|
|
|
91
|
log_msg " => Job $tid: Creating temporary file: @files_t"; |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
# And here we go ... |
1090
|
4
|
|
|
|
|
13
|
my @fhs; |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
# Set the right filehandle format |
1093
|
4
|
50
|
|
|
|
159
|
if ($output_format =~ /^(sam|fastq)$/) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1094
|
4
|
|
|
|
|
41
|
@fhs = map { $self->with_open_w($_, 0) } @files_t; |
|
6
|
|
|
|
|
179
|
|
1095
|
|
|
|
|
|
|
} elsif ($output_format eq 'fastq.gz') { |
1096
|
0
|
|
|
|
|
0
|
@fhs = map { $self->with_open_w($_, $self->compression_level) } @files_t; |
|
0
|
|
|
|
|
0
|
|
1097
|
|
|
|
|
|
|
} elsif ($output_format eq 'bam') { |
1098
|
0
|
|
|
|
|
0
|
@fhs = map { $self->with_open_bam_w($_, $self->compression_level) } @files_t; |
|
0
|
|
|
|
|
0
|
|
1099
|
|
|
|
|
|
|
} else { |
1100
|
0
|
|
|
|
|
0
|
croak "Something wrong with the output format: $file_class"; |
1101
|
|
|
|
|
|
|
} |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
# sprint_seq gives two entries for paired-emd, so |
1104
|
|
|
|
|
|
|
# if it is a bam|sam|join-paired-ends, it is necessary |
1105
|
|
|
|
|
|
|
# to copy the filehandle in order to print both entries |
1106
|
|
|
|
|
|
|
# to the same file |
1107
|
4
|
50
|
66
|
|
|
93
|
if ($seq_class eq 'App::Sandy::Seq::PairedEnd' |
1108
|
|
|
|
|
|
|
&& $file_class =~ /(sam|bam|join)/) { |
1109
|
0
|
|
|
|
|
0
|
$fhs[1] = $fhs[0]; |
1110
|
|
|
|
|
|
|
} |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
# Count the cumulative number of reads for each seqid |
1113
|
4
|
|
|
|
|
50
|
my %counter; |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
# If the output format is 'bam|sam' and it is the first job, then |
1116
|
|
|
|
|
|
|
# write the header |
1117
|
4
|
50
|
33
|
|
|
102
|
if ($output_format =~ /^(sam|bam)$/ && $tid == 1) { |
1118
|
0
|
|
|
|
|
0
|
my $header_ref = $self->gen_sam_header($self->argv); |
1119
|
0
|
|
|
|
|
0
|
print {$fhs[0]} "$$header_ref"; |
|
0
|
|
|
|
|
0
|
|
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
# Run simulation in child |
1123
|
4
|
|
66
|
|
|
368
|
for (my $i = $idx; $i <= $last_read_idx and not $sig->signal_catched; $i++) { |
1124
|
6840
|
|
|
|
|
15930
|
my $id = $seqid->($rng); |
1125
|
6840
|
|
|
|
|
16943
|
my $blacklist = $fasta_blacklist->{$id->{seq_id}}; |
1126
|
6840
|
|
|
|
|
15101
|
my $ptable = $piece_table->{$id->{seq_id}}{$id->{type}}; |
1127
|
6840
|
|
|
|
|
9866
|
my @seq_entry; |
1128
|
|
|
|
|
|
|
try { |
1129
|
|
|
|
|
|
|
@seq_entry = $self->sprint_seq($tid, $i, $id->{seq_id}, $id->{type}, |
1130
|
6840
|
|
|
6840
|
|
446227
|
$ptable->{table}, $ptable->{size}, $strand->($rng), $rng, $blacklist); |
1131
|
|
|
|
|
|
|
} catch { |
1132
|
0
|
|
|
0
|
|
0
|
die "Not defined entry for seqid '>$id->{seq_id}' at job $tid: $_"; |
1133
|
|
|
|
|
|
|
} finally { |
1134
|
6840
|
50
|
|
6840
|
|
139840
|
unless (@_) { |
1135
|
6840
|
|
|
|
|
18847
|
for my $fh_idx (0..$#fhs) { |
1136
|
9120
|
|
|
|
|
19104
|
$counter{$id->{seq_id}}++; |
1137
|
9120
|
|
|
|
|
11812
|
print {$fhs[$fh_idx]} "${$seq_entry[$fh_idx]}"; |
|
9120
|
|
|
|
|
15924
|
|
|
9120
|
|
|
|
|
96381
|
|
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
} |
1140
|
6840
|
|
|
|
|
54223
|
}; |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
|
1143
|
4
|
|
|
|
|
161
|
log_msg " => Job $tid: Writing and closing file: @files_t"; |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
# Close temporary files |
1146
|
|
|
|
|
|
|
# Get index from @files_t in order to avoid |
1147
|
|
|
|
|
|
|
# close the same filehandle twice - When the |
1148
|
|
|
|
|
|
|
# position 1-N is a copy |
1149
|
4
|
|
|
|
|
36
|
for my $fh_idx (0..$#files_t) { |
1150
|
6
|
|
|
|
|
322
|
close $fhs[$fh_idx]; |
1151
|
|
|
|
|
|
|
} |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
# If it is a bam and it is the last loop, then |
1154
|
|
|
|
|
|
|
# write a eof marker |
1155
|
4
|
50
|
33
|
|
|
53
|
if ($output_format eq 'bam' && $tid == $number_of_jobs) { |
1156
|
0
|
|
|
|
|
0
|
$self->gen_eof_marker($files_t[0]); |
1157
|
|
|
|
|
|
|
} |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
# Child exit |
1160
|
4
|
|
|
|
|
51
|
log_msg " => Job $tid is finished"; |
1161
|
4
|
|
|
|
|
159
|
$pm->finish(0, \%counter); |
1162
|
|
|
|
|
|
|
} |
1163
|
|
|
|
|
|
|
|
1164
|
|
|
|
|
|
|
# Back to parent |
1165
|
|
|
|
|
|
|
# Interlace parent/child(s) processes |
1166
|
4
|
|
|
|
|
1267
|
my $sig = App::Sandy::InterlaceProcesses->new(foreign_pid => \@child_pid); |
1167
|
4
|
|
|
|
|
112
|
$pm->wait_all_children; |
1168
|
|
|
|
|
|
|
|
1169
|
4
|
50
|
|
|
|
518
|
if ($sig->signal_catched) { |
1170
|
0
|
|
|
|
|
0
|
log_msg ":: Termination signal received!"; |
1171
|
|
|
|
|
|
|
} |
1172
|
|
|
|
|
|
|
|
1173
|
4
|
|
|
|
|
102
|
log_msg ":: Saving the work ..."; |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
# Concatenate all temporary files |
1176
|
4
|
|
|
|
|
57
|
log_msg ":: Concatenate all temporary files"; |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
# Save time. Rename tmp_file (1,2) |
1179
|
4
|
|
|
|
|
8
|
for my $file (@{ $files{$file_class} }) { |
|
4
|
|
|
|
|
59
|
|
1180
|
5
|
|
|
|
|
47
|
my $tmp = shift @tmp_files; |
1181
|
5
|
|
|
|
|
47
|
log_msg " => Concatenating $tmp to $file ..."; |
1182
|
5
|
50
|
|
|
|
386
|
rename $tmp => $file |
1183
|
|
|
|
|
|
|
or die "Cannot create '$file': $!\n"; |
1184
|
|
|
|
|
|
|
} |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
# Append to renamed tmp files |
1187
|
4
|
|
|
|
|
19
|
my @fh = map { $self->with_open_a($_) } @{ $files{$file_class} }; |
|
5
|
|
|
|
|
125
|
|
|
4
|
|
|
|
|
16
|
|
1188
|
|
|
|
|
|
|
|
1189
|
4
|
|
|
|
|
37
|
for my $i (0..$#tmp_files) { |
1190
|
5
|
|
|
|
|
25
|
my $fh_idx = $i % scalar @fh; |
1191
|
|
|
|
|
|
|
|
1192
|
5
|
|
|
|
|
51
|
log_msg " => Concatenating $tmp_files[$i] to $files{$file_class}[$fh_idx] ..."; |
1193
|
5
|
50
|
|
|
|
94
|
cat $tmp_files[$i] => $fh[$fh_idx] |
1194
|
|
|
|
|
|
|
or die "Cannot concatenate $tmp_files[$i] to $files{$file_class}[$fh_idx]: $!\n"; |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
# Clean up the mess |
1197
|
5
|
50
|
|
|
|
49073
|
unlink $tmp_files[$i] |
1198
|
|
|
|
|
|
|
or die "Cannot remove temporary file '$tmp_files[$i]': $!\n"; |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
# Close files |
1202
|
4
|
|
|
|
|
60
|
log_msg ":: Writing and closing output file: @{ $files{$file_class} }"; |
|
4
|
|
|
|
|
103
|
|
1203
|
4
|
|
|
|
|
44
|
for my $fh_idx (0..$#fh) { |
1204
|
5
|
50
|
|
|
|
137
|
close $fh[$fh_idx] |
1205
|
|
|
|
|
|
|
or die "Cannot write file $files{$file_class}[$fh_idx]: $!\n"; |
1206
|
|
|
|
|
|
|
} |
1207
|
|
|
|
|
|
|
|
1208
|
4
|
50
|
|
|
|
313
|
if ($self->count_loops_by eq 'number-of-reads') { |
1209
|
|
|
|
|
|
|
# It is necessary to correct the abundance according to |
1210
|
|
|
|
|
|
|
# fragment sequencing end |
1211
|
0
|
0
|
|
|
|
0
|
my $count_factor = ref($self->seq) eq 'App::Sandy::Seq::PairedEnd' |
1212
|
|
|
|
|
|
|
? 2 |
1213
|
|
|
|
|
|
|
: 1; |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
# Save transcripts |
1216
|
0
|
|
|
|
|
0
|
log_msg ":: Saving transcripts count"; |
1217
|
0
|
|
|
|
|
0
|
my $fh = $self->with_open_w($count_file{transcripts}, 0); |
1218
|
|
|
|
|
|
|
|
1219
|
0
|
|
|
|
|
0
|
log_msg " => Writing counts to $count_file{transcripts} ..."; |
1220
|
0
|
|
|
|
|
0
|
for my $id (sort keys %counters) { |
1221
|
0
|
|
|
|
|
0
|
printf {$fh} "%s\t%d\n" => $id, |
1222
|
0
|
|
|
|
|
0
|
int($counters{$id} / $count_factor); |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
# Close transcripts file |
1226
|
0
|
|
|
|
|
0
|
log_msg ":; Writing and closing $count_file{transcripts} ..."; |
1227
|
0
|
0
|
|
|
|
0
|
close $fh |
1228
|
|
|
|
|
|
|
or die "Cannot write file $count_file{transcripts}: $!\n"; |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
# Calculate 'gene' like expression |
1231
|
0
|
|
|
|
|
0
|
my $parent_count = $self->_calculate_parent_count(\%counters); |
1232
|
|
|
|
|
|
|
|
1233
|
0
|
0
|
|
|
|
0
|
if (%$parent_count) { |
1234
|
|
|
|
|
|
|
# Save genes |
1235
|
0
|
|
|
|
|
0
|
log_msg ":: Saving genes count"; |
1236
|
0
|
|
|
|
|
0
|
my $fh = $self->with_open_w($count_file{genes}, 0); |
1237
|
|
|
|
|
|
|
|
1238
|
0
|
|
|
|
|
0
|
log_msg " => Writing counts to $count_file{genes} ..."; |
1239
|
0
|
|
|
|
|
0
|
for my $id (sort keys %$parent_count) { |
1240
|
0
|
|
|
|
|
0
|
printf {$fh} "%s\t%d\n" => $id, |
1241
|
0
|
|
|
|
|
0
|
int($parent_count->{$id} / $count_factor); |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
# Close genes file |
1245
|
0
|
|
|
|
|
0
|
log_msg ":; Writing and closing $count_file{genes} ..."; |
1246
|
0
|
0
|
|
|
|
0
|
close $fh |
1247
|
|
|
|
|
|
|
or die "Cannot write file $count_file{genes}: $!\n"; |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
} else { |
1250
|
|
|
|
|
|
|
# Save coverage |
1251
|
4
|
|
|
|
|
70
|
log_msg ":: Saving coverage count"; |
1252
|
4
|
|
|
|
|
84
|
my $fh = $self->with_open_w($count_file{coverage}, 0); |
1253
|
|
|
|
|
|
|
|
1254
|
4
|
|
|
|
|
43
|
log_msg " => Writing counts to $count_file{coverage} ..."; |
1255
|
4
|
|
|
|
|
72
|
for my $id (sort keys %counters) { |
1256
|
20
|
|
|
|
|
40
|
printf {$fh} "%s\t%d\n" => $id, $counters{$id}; |
|
20
|
|
|
|
|
106
|
|
1257
|
|
|
|
|
|
|
} |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
# Close coverage file |
1260
|
4
|
|
|
|
|
48
|
log_msg ":; Writing and closing $count_file{coverage} ..."; |
1261
|
4
|
50
|
|
|
|
399
|
close $fh |
1262
|
|
|
|
|
|
|
or die "Cannot write file $count_file{coverage}: $!\n"; |
1263
|
|
|
|
|
|
|
} |
1264
|
|
|
|
|
|
|
} |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
__END__ |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=pod |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=encoding UTF-8 |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
=head1 NAME |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
App::Sandy::Simulator - Class responsible to make the simulation |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
=head1 VERSION |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
version 0.24 |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
=head1 AUTHORS |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
=over 4 |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
=item * |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
Thiago L. A. Miller <tmiller@mochsl.org.br> |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
=item * |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
J. Leonel Buzzo <lbuzzo@mochsl.org.br> |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
=item * |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
Felipe R. C. dos Santos <fsantos@mochsl.org.br> |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
=item * |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
Helena B. Conceição <hconceicao@mochsl.org.br> |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
=item * |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
Rodrigo Barreiro <rbarreiro@mochsl.org.br> |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
=item * |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
Gabriela Guardia <gguardia@mochsl.org.br> |
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
=item * |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
Fernanda Orpinelli <forpinelli@mochsl.org.br> |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
=item * |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
Rafael Mercuri <rmercuri@mochsl.org.br> |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=item * |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
Rodrigo Barreiro <rbarreiro@mochsl.org.br> |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
=item * |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
Pedro A. F. Galante <pgalante@mochsl.org.br> |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
=back |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
This software is Copyright (c) 2023 by Teaching and Research Institute from Sírio-Libanês Hospital. |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
This is free software, licensed under: |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
=cut |