line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::Run::AssemblerBase |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Florent Angly |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Florent Angly |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Tools::Run::AssemblerBase - base class for wrapping external assemblers |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Give standard usage here |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 DESCRIPTION |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
Describe the object here |
25
|
|
|
|
|
|
|
# use of globals for configuration... |
26
|
|
|
|
|
|
|
# I've created the separate Config.pm module, and 'use'd it in the |
27
|
|
|
|
|
|
|
# main module, for instance... |
28
|
|
|
|
|
|
|
# other configuration globals: |
29
|
|
|
|
|
|
|
# $use_dash = [1|single|double|mixed] |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 FEEDBACK |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head2 Mailing Lists |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
36
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
37
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
40
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head2 Support |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
L |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
49
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
50
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
51
|
|
|
|
|
|
|
with code and data examples if at all possible. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head2 Reporting Bugs |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
56
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
57
|
|
|
|
|
|
|
the web: |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
http://redmine.open-bio.org/projects/bioperl/ |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head1 AUTHOR - Florent Angly |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Email florent dot angly at gmail dot com |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Mark A. Jensen - maj -at- fortinbras -dot- us |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 APPENDIX |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
72
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=cut |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
package Bio::Tools::Run::AssemblerBase; |
77
|
|
|
|
|
|
|
|
78
|
8
|
|
|
8
|
|
77588
|
use strict; |
|
8
|
|
|
|
|
18
|
|
|
8
|
|
|
|
|
221
|
|
79
|
8
|
|
|
8
|
|
2567
|
use Bio::SeqIO; |
|
8
|
|
|
|
|
150940
|
|
|
8
|
|
|
|
|
247
|
|
80
|
8
|
|
|
8
|
|
2264
|
use Bio::Assembly::IO; |
|
8
|
|
|
|
|
7365
|
|
|
8
|
|
|
|
|
216
|
|
81
|
|
|
|
|
|
|
|
82
|
8
|
|
|
8
|
|
50
|
use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase Bio::ParameterBaseI); |
|
8
|
|
|
|
|
17
|
|
|
8
|
|
|
|
|
7733
|
|
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
our $default_out_type = 'Bio::Assembly::ScaffoldI'; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head2 program_name |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Title : program_name |
89
|
|
|
|
|
|
|
Usage : $assembler>program_name() |
90
|
|
|
|
|
|
|
Function: get/set the executable name |
91
|
|
|
|
|
|
|
Returns: string |
92
|
|
|
|
|
|
|
Args : string |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=cut |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub program_name { |
97
|
71
|
|
|
71
|
1
|
2143
|
my ($self, $val) = @_; |
98
|
71
|
100
|
|
|
|
158
|
$self->{'_program_name'} = $val if $val; |
99
|
71
|
|
|
|
|
321
|
return $self->{'_program_name'}; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 program_dir |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Title : program_dir |
106
|
|
|
|
|
|
|
Usage : $assembler->program_dir() |
107
|
|
|
|
|
|
|
Function: get/set the program dir |
108
|
|
|
|
|
|
|
Returns: string |
109
|
|
|
|
|
|
|
Args : string |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=cut |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub program_dir { |
114
|
41
|
|
|
41
|
1
|
85
|
my ($self, $val) = @_; |
115
|
41
|
100
|
|
|
|
93
|
$self->{'_program_dir'} = $val if $val; |
116
|
41
|
|
|
|
|
130
|
return $self->{'_program_dir'}; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head2 out_type |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Title : out_type |
123
|
|
|
|
|
|
|
Usage : $assembler->out_type('Bio::Assembly::ScaffoldI') |
124
|
|
|
|
|
|
|
Function: get/set the desired type of output |
125
|
|
|
|
|
|
|
Returns : The type of results to return |
126
|
|
|
|
|
|
|
Args : Type of results to return (optional): |
127
|
|
|
|
|
|
|
'Bio::Assembly::IO' object |
128
|
|
|
|
|
|
|
'Bio::Assembly::ScaffoldI' object (default) |
129
|
|
|
|
|
|
|
The name of a file to save the results in |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=cut |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub out_type { |
134
|
0
|
|
|
0
|
1
|
0
|
my ($self, $val) = @_; |
135
|
0
|
0
|
|
|
|
0
|
if (defined $val) { |
136
|
0
|
|
|
|
|
0
|
$self->{'_out_type'} = $val; |
137
|
|
|
|
|
|
|
} else { |
138
|
0
|
0
|
|
|
|
0
|
if (not defined $self->{'_out_type'}) { |
139
|
0
|
|
|
|
|
0
|
$self->{'_out_type'} = $default_out_type; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
} |
142
|
0
|
|
|
|
|
0
|
return $self->{'_out_type'}; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 _assembly_format |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Title : _assembly_format |
149
|
|
|
|
|
|
|
Usage : $assembler->_assembly_format('ace') |
150
|
|
|
|
|
|
|
Function: get/set the driver to use to parse the assembly results |
151
|
|
|
|
|
|
|
Returns : the driver to use to parse the assembly results |
152
|
|
|
|
|
|
|
Args : the driver to use to parse the assembly results (optional) |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=cut |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub _assembly_format { |
157
|
9
|
|
|
9
|
|
28
|
my ($self, $asm_format) = @_; |
158
|
9
|
50
|
|
|
|
44
|
if (defined $asm_format) { |
159
|
9
|
|
|
|
|
27
|
$self->{'_assembly_format'} = $asm_format; |
160
|
|
|
|
|
|
|
} |
161
|
9
|
|
|
|
|
25
|
return $self->{'_assembly_format'}; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head2 _assembly_variant |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Title : _assembly_variant |
168
|
|
|
|
|
|
|
Usage : $assembler->_assembly_variant('454') |
169
|
|
|
|
|
|
|
Function: get/set the driver variant to use to parse the assembly results. For |
170
|
|
|
|
|
|
|
example, the ACE format has the ACE-454 and the ACE-consed variants |
171
|
|
|
|
|
|
|
Returns : the driver variant to use to parse the assembly results |
172
|
|
|
|
|
|
|
Args : the driver variant to use to parse the assembly results (optional) |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=cut |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub _assembly_variant { |
177
|
1
|
|
|
1
|
|
2
|
my ($self, $asm_variant) = @_; |
178
|
1
|
50
|
|
|
|
3
|
if (defined $asm_variant) { |
179
|
1
|
|
|
|
|
2
|
$self->{'_assembly_variant'} = $asm_variant; |
180
|
|
|
|
|
|
|
} |
181
|
1
|
|
|
|
|
2
|
return $self->{'_assembly_variant'}; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head2 _check_executable |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Title : _check_executable |
188
|
|
|
|
|
|
|
Usage : $assembler->_check_executable() |
189
|
|
|
|
|
|
|
Function: Verifies that the program executable can be found, or throw an error. |
190
|
|
|
|
|
|
|
Returns: 1 for success |
191
|
|
|
|
|
|
|
Args : - |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=cut |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
sub _check_executable { |
196
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
197
|
0
|
0
|
|
|
|
0
|
if (not defined $self->executable()) { |
198
|
0
|
|
|
|
|
0
|
$self->throw("Could not find the executable '".$self->program_name()."'. ". |
199
|
|
|
|
|
|
|
'You can use $self->program_dir() and $self->program_name() to '. |
200
|
|
|
|
|
|
|
"specify the location of the program."); |
201
|
|
|
|
|
|
|
} |
202
|
0
|
|
|
|
|
0
|
return 1; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head2 _check_sequence_input |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
Title : _check_sequence_input |
208
|
|
|
|
|
|
|
Usage : $assembler->_check_sequence_input($seqs) |
209
|
|
|
|
|
|
|
Function: Check that the sequence input is a valid file, or an arrayref of |
210
|
|
|
|
|
|
|
sequence objects (Bio::PrimarySeqI or Bio::SeqI). If not, an |
211
|
|
|
|
|
|
|
exception is thrown. |
212
|
|
|
|
|
|
|
Returns : 1 if the check passed |
213
|
|
|
|
|
|
|
Args : sequence input |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=cut |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub _check_sequence_input { |
218
|
0
|
|
|
0
|
|
0
|
my ($self, $seqs) = @_; |
219
|
0
|
0
|
|
|
|
0
|
if (not $seqs) { |
220
|
0
|
|
|
|
|
0
|
$self->throw("Must supply sequences as a FASTA filename or a sequence object". |
221
|
|
|
|
|
|
|
" (Bio::PrimarySeqI or Bio::SeqI) array reference"); |
222
|
|
|
|
|
|
|
} else { |
223
|
0
|
0
|
|
|
|
0
|
if (ref($seqs) =~ m/ARRAY/i ) { |
224
|
0
|
|
|
|
|
0
|
for my $seq (@$seqs) { |
225
|
0
|
0
|
0
|
|
|
0
|
unless ($seq->isa('Bio::PrimarySeqI') || $seq->isa('Bio::SeqI')) { |
226
|
0
|
|
|
|
|
0
|
$self->throw("Not a valid Bio::PrimarySeqI or Bio::SeqI object"); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
} else { |
230
|
0
|
0
|
|
|
|
0
|
if (not -f $seqs) { |
231
|
0
|
|
|
|
|
0
|
$self->throw("Input file '$seqs' does not seem to exist."); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} |
235
|
0
|
|
|
|
|
0
|
return 1; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head2 _check_optional_quality_input |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
Title : _check_optional_quality_input |
241
|
|
|
|
|
|
|
Usage : $assembler->_check_optional_quality_input($quals) |
242
|
|
|
|
|
|
|
Function: If a quality score input is provided, check that it is either a |
243
|
|
|
|
|
|
|
valid file or an arrayref of quality score objects (Bio::Seq:: |
244
|
|
|
|
|
|
|
QualI or Bio::Seq::Quality). If not, an exception is thrown. |
245
|
|
|
|
|
|
|
Returns : 1 if the check passed (or quality score input was provided) |
246
|
|
|
|
|
|
|
Args : quality score input |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=cut |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub _check_optional_quality_input { |
251
|
0
|
|
|
0
|
|
0
|
my ($self, $quals) = @_; |
252
|
0
|
0
|
|
|
|
0
|
if (defined $quals) { |
253
|
0
|
0
|
|
|
|
0
|
if (ref($quals) =~ m/ARRAY/i) { |
254
|
0
|
|
|
|
|
0
|
for my $qual (@$quals) { |
255
|
0
|
0
|
0
|
|
|
0
|
unless ($qual->isa('Bio::Seq::QualI') || $qual->isa('Bio::Seq::Quality')) { |
256
|
0
|
|
|
|
|
0
|
$self->throw("Not a valid Bio::Seq::QualI or Bio::Seq::Quality object"); |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
} else { |
260
|
0
|
0
|
|
|
|
0
|
if (not -f $quals) { |
261
|
0
|
|
|
|
|
0
|
$self->throw("Input file '$quals' does not seem to exist."); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
} |
265
|
0
|
|
|
|
|
0
|
return 1; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=head2 _prepare_input_file |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Title : _prepare_input_file |
272
|
|
|
|
|
|
|
Usage : ($fasta_file, $qual_file) = $assembler->_prepare_input_file(\@seqs, \@quals); |
273
|
|
|
|
|
|
|
Function: Create the input FASTA and QUAL files as needed. If the input |
274
|
|
|
|
|
|
|
sequences are provided in a (FASTA) file, the optional input quality |
275
|
|
|
|
|
|
|
scores are also expected to be in a (QUAL) file. If the input |
276
|
|
|
|
|
|
|
sequences are an arrayref of bioperl sequence objects, the optional |
277
|
|
|
|
|
|
|
input quality scores are expected to be an arrayref of bioperl |
278
|
|
|
|
|
|
|
quality score objects, in the same order as the sequence objects. |
279
|
|
|
|
|
|
|
Returns : - input filehandle |
280
|
|
|
|
|
|
|
- input filename |
281
|
|
|
|
|
|
|
Args : - sequence input (FASTA file or sequence object arrayref) |
282
|
|
|
|
|
|
|
- optional quality score input (QUAL file or quality score object |
283
|
|
|
|
|
|
|
arrayref) |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=cut |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
sub _prepare_input_files { |
288
|
0
|
|
|
0
|
|
0
|
my ($self, $seqs, $quals) = @_; |
289
|
|
|
|
|
|
|
# Set up input FASTA and QUAL files |
290
|
0
|
|
|
|
|
0
|
$self->io->_initialize_io(); |
291
|
|
|
|
|
|
|
#$self->tempdir(); |
292
|
0
|
|
|
|
|
0
|
my $fasta_file; |
293
|
|
|
|
|
|
|
my $qual_file; |
294
|
0
|
0
|
|
|
|
0
|
if ( ref($seqs) =~ m/ARRAY/i ) { |
295
|
|
|
|
|
|
|
# Input sequences are an arrayref of Bioperl sequence objects |
296
|
0
|
0
|
0
|
|
|
0
|
if (defined $quals && not ref($quals) =~ m/ARRAY/i) { |
297
|
0
|
|
|
|
|
0
|
$self->throw("The input sequences are an arrayref of sequence objects. ". |
298
|
|
|
|
|
|
|
"Expecting the quality scores as an arrayref of quality score objects"); |
299
|
|
|
|
|
|
|
} else { |
300
|
|
|
|
|
|
|
# The input qualities are not defined or are an arrayref of quality objects |
301
|
|
|
|
|
|
|
# Write temp FASTA and QUAL input files |
302
|
0
|
|
|
|
|
0
|
($fasta_file, $qual_file) = $self->_write_seq_file($seqs, $quals); |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
} else { |
305
|
|
|
|
|
|
|
# Sequence input is a FASTA file |
306
|
0
|
|
|
|
|
0
|
$fasta_file = $seqs; |
307
|
0
|
0
|
0
|
|
|
0
|
if (defined $quals && ref($quals) =~ m/ARRAY/i) { |
308
|
|
|
|
|
|
|
# Quality input is defined and is an arrayref of quality objects |
309
|
0
|
|
|
|
|
0
|
$self->throw("The input sequences are in a FASTA file. Expecting the ". |
310
|
|
|
|
|
|
|
"quality scores in a QUAL file."); |
311
|
|
|
|
|
|
|
} else { |
312
|
|
|
|
|
|
|
# Input quality scores is either not defined or is a QUAL file |
313
|
0
|
|
|
|
|
0
|
$qual_file = $quals; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
0
|
|
|
|
|
0
|
return $fasta_file, $qual_file; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 _write_seq_file |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Title : _write_seq_file |
323
|
|
|
|
|
|
|
Usage : ($fasta_file, $qual_file) = $assembler->_write_seq_file(\@seqs, \@quals) |
324
|
|
|
|
|
|
|
Function: Write temporary FASTA and QUAL files on disk |
325
|
|
|
|
|
|
|
Returns : name of FASTA file |
326
|
|
|
|
|
|
|
name of QUAL file (undef if no quality scoress) |
327
|
|
|
|
|
|
|
Args : - arrayref of sequence objects |
328
|
|
|
|
|
|
|
- optional arrayref of quality score objects |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=cut |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
sub _write_seq_file { |
333
|
0
|
|
|
0
|
|
0
|
my ($self, $seqs, $quals) = @_; |
334
|
|
|
|
|
|
|
# Store the sequences in temporary FASTA files |
335
|
0
|
|
|
|
|
0
|
my $tmpdir = $self->tempdir(); |
336
|
0
|
|
|
|
|
0
|
my ($fasta_h, $fasta_file) = $self->io->tempfile( -dir => $tmpdir ); |
337
|
0
|
|
|
|
|
0
|
my ($qual_h, $qual_file ) = $self->io->tempfile( -dir => $tmpdir ); |
338
|
0
|
|
|
|
|
0
|
my $fasta_out = Bio::SeqIO->new( -fh => $fasta_h , -format => 'fasta'); |
339
|
0
|
|
|
|
|
0
|
my $qual_out = Bio::SeqIO->new( -fh => $qual_h , -format => 'qual' ); |
340
|
0
|
|
|
|
|
0
|
my $use_qual_file = 0; |
341
|
0
|
|
|
|
|
0
|
my $size = scalar @$seqs; |
342
|
0
|
|
|
|
|
0
|
for ( my $i = 0 ; $i < $size ; $i++ ) { |
343
|
0
|
|
|
|
|
0
|
my $seq = $$seqs[$i]; |
344
|
|
|
|
|
|
|
# Make sure that all sequences have an ID (to prevent TIGR Assembler crash) |
345
|
0
|
0
|
|
|
|
0
|
if (not defined $seq->id) { |
346
|
0
|
|
|
|
|
0
|
my $newid = 'tmp'.$i; |
347
|
0
|
|
|
|
|
0
|
print $newid."\n"; |
348
|
0
|
|
|
|
|
0
|
$seq->id($newid); |
349
|
0
|
|
|
|
|
0
|
$self->warn("A sequence had no ID. Its ID is now $newid"); |
350
|
|
|
|
|
|
|
} |
351
|
0
|
|
|
|
|
0
|
my $seqid = $seq->id; |
352
|
|
|
|
|
|
|
# Write the FASTA entries in files (and QUAL if appropriate) |
353
|
0
|
|
|
|
|
0
|
$fasta_out->write_seq($seq); |
354
|
0
|
0
|
|
|
|
0
|
if ($seq->isa('Bio::Seq::Quality')) { |
355
|
|
|
|
|
|
|
# Quality scores embedded in seq object |
356
|
0
|
0
|
|
|
|
0
|
if (scalar @{$seq->qual} > 0) { |
|
0
|
|
|
|
|
0
|
|
357
|
0
|
|
|
|
|
0
|
$qual_out->write_seq($seq); |
358
|
0
|
|
|
|
|
0
|
$use_qual_file = 1; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} else { |
361
|
|
|
|
|
|
|
# Quality score in a different object from the sequence object |
362
|
0
|
|
|
|
|
0
|
my $qual = $$quals[$i]; |
363
|
0
|
0
|
|
|
|
0
|
if (defined $qual) { |
364
|
0
|
|
|
|
|
0
|
my $qualid = $qual->id; |
365
|
0
|
0
|
|
|
|
0
|
if ($qualid eq $seqid) { |
366
|
|
|
|
|
|
|
# valid quality score information |
367
|
0
|
|
|
|
|
0
|
$qual_out->write_seq($qual); |
368
|
0
|
|
|
|
|
0
|
$use_qual_file = 1; |
369
|
|
|
|
|
|
|
} else { |
370
|
|
|
|
|
|
|
# ID mismatch between sequence and quality score |
371
|
0
|
|
|
|
|
0
|
$self->warn("Sequence object with ID $seqid does not match quality ". |
372
|
|
|
|
|
|
|
"score object with ID $qualid"); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
} |
377
|
0
|
|
|
|
|
0
|
close($fasta_h); |
378
|
0
|
|
|
|
|
0
|
close($qual_h); |
379
|
0
|
|
|
|
|
0
|
$fasta_out->close(); |
380
|
0
|
|
|
|
|
0
|
$qual_out->close(); |
381
|
0
|
0
|
|
|
|
0
|
return undef if scalar @$seqs <= 0; |
382
|
0
|
0
|
|
|
|
0
|
$qual_file = undef if $use_qual_file == 0; |
383
|
0
|
|
|
|
|
0
|
return $fasta_file, $qual_file; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=head2 _prepare_output_file |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
Title : _prepare_output_file |
390
|
|
|
|
|
|
|
Usage : ($out_fh, $out_file) = $assembler->_prepare_output_file( ); |
391
|
|
|
|
|
|
|
Function: Prepare the output file |
392
|
|
|
|
|
|
|
Returns : - output filehandle |
393
|
|
|
|
|
|
|
- output filename |
394
|
|
|
|
|
|
|
Args : none |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=cut |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
sub _prepare_output_file { |
399
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
400
|
0
|
|
|
|
|
0
|
my ($output_fh, $output_file); |
401
|
0
|
|
|
|
|
0
|
my $out_type = $self->out_type(); |
402
|
0
|
0
|
0
|
|
|
0
|
if ( (not $out_type eq 'Bio::Assembly::ScaffoldI') && |
403
|
|
|
|
|
|
|
(not $out_type eq 'Bio::Assembly::IO' ) ) { |
404
|
|
|
|
|
|
|
# Output is a file with specified name |
405
|
0
|
|
|
|
|
0
|
$output_file = $out_type; |
406
|
0
|
0
|
|
|
|
0
|
open $output_fh, '>', $output_file or $self->throw("Could not write file ". |
407
|
|
|
|
|
|
|
"'$output_file': $!"); |
408
|
|
|
|
|
|
|
} else { |
409
|
0
|
|
|
|
|
0
|
( $output_fh, $output_file ) = $self->io->tempfile( -dir => $self->tempdir() ); |
410
|
|
|
|
|
|
|
} |
411
|
0
|
|
|
|
|
0
|
$self->outfile_name($output_file); |
412
|
0
|
|
|
|
|
0
|
return $output_fh, $output_file; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=head2 _export_results |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Title : _export_results |
418
|
|
|
|
|
|
|
Usage : $results = $assembler->_export_results($asm_file); |
419
|
|
|
|
|
|
|
Function: Export the assembly results |
420
|
|
|
|
|
|
|
Returns : Exported assembly (file or IO object or assembly object) |
421
|
|
|
|
|
|
|
Args : -Name of the file containing an assembly |
422
|
|
|
|
|
|
|
- -keep_asm => boolean (if true, do not unlink $asm_file) |
423
|
|
|
|
|
|
|
-[optional] additional named args required by the B:A:IO object |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=cut |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
sub _export_results { |
428
|
0
|
|
|
0
|
|
0
|
my ($self, $asm_file, @named_args) = @_; |
429
|
0
|
|
|
|
|
0
|
my $results; |
430
|
|
|
|
|
|
|
my $asm_io; |
431
|
0
|
|
|
|
|
0
|
my $asm; |
432
|
0
|
|
|
|
|
0
|
my %args = @named_args; |
433
|
0
|
|
|
|
|
0
|
my $keep_asm = $args{'-keep_asm'}; |
434
|
0
|
|
|
|
|
0
|
delete $args{'-keep_asm'}; |
435
|
0
|
|
|
|
|
0
|
my $out_type = $self->out_type(); |
436
|
0
|
0
|
0
|
|
|
0
|
if ( (not $out_type eq 'Bio::Assembly::ScaffoldI') && |
437
|
|
|
|
|
|
|
(not $out_type eq 'Bio::Assembly::IO' ) ) { |
438
|
|
|
|
|
|
|
# Results are the assembler output file |
439
|
0
|
|
|
|
|
0
|
$results = $asm_file; |
440
|
|
|
|
|
|
|
} else { |
441
|
0
|
|
|
|
|
0
|
$asm_io = Bio::Assembly::IO->new( |
442
|
|
|
|
|
|
|
-file => "<$asm_file", |
443
|
|
|
|
|
|
|
-format => $self->_assembly_format(), |
444
|
|
|
|
|
|
|
-variant => $self->_assembly_variant(), |
445
|
|
|
|
|
|
|
@named_args ); |
446
|
|
|
|
|
|
|
# this unlink is a problem for Bio::DB::Sam (in B:A:I:sam), which needs |
447
|
|
|
|
|
|
|
# the original bam file around. |
448
|
0
|
0
|
|
|
|
0
|
unlink $asm_file unless $keep_asm; |
449
|
0
|
0
|
|
|
|
0
|
if ($out_type eq 'Bio::Assembly::IO') { |
450
|
|
|
|
|
|
|
# Results are a Bio::Assembly::IO object |
451
|
0
|
|
|
|
|
0
|
$results = $asm_io; |
452
|
|
|
|
|
|
|
} else { |
453
|
0
|
|
|
|
|
0
|
$asm = $asm_io->next_assembly(); |
454
|
0
|
|
|
|
|
0
|
$asm_io->close; |
455
|
0
|
0
|
|
|
|
0
|
if ($out_type eq 'Bio::Assembly::ScaffoldI') { |
456
|
|
|
|
|
|
|
# Results are a Bio::Assembly::Scaffold object |
457
|
0
|
|
|
|
|
0
|
$results = $asm; |
458
|
|
|
|
|
|
|
} else { |
459
|
0
|
|
|
|
|
0
|
$self->throw("The return type has to be 'Bio::Assembly::IO', 'Bio::". |
460
|
|
|
|
|
|
|
"Assembly::ScaffoldI' or a file name."); |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
} |
464
|
0
|
|
|
|
|
0
|
$self->cleanup(); |
465
|
0
|
|
|
|
|
0
|
return $results; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=head2 _register_program_commands() |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Title : _register_program_commands |
472
|
|
|
|
|
|
|
Usage : $assembler->_register_program_commands( \@commands, \%prefixes ) |
473
|
|
|
|
|
|
|
Function: Register the commands a program accepts (for programs that act |
474
|
|
|
|
|
|
|
as frontends for a set of commands, each command having its own |
475
|
|
|
|
|
|
|
set of params/switches) |
476
|
|
|
|
|
|
|
Returns : true on success |
477
|
|
|
|
|
|
|
Args : arrayref to a list of commands (scalar strings), |
478
|
|
|
|
|
|
|
hashref to a translation table of the form |
479
|
|
|
|
|
|
|
{ $prefix1 => $command1, ... } [optional] |
480
|
|
|
|
|
|
|
Note : To implement a program with this kind of calling structure, |
481
|
|
|
|
|
|
|
include a parameter called 'command' in the |
482
|
|
|
|
|
|
|
@program_params global |
483
|
|
|
|
|
|
|
Note : The translation table is used to associate parameters and |
484
|
|
|
|
|
|
|
switches specified in _set_program_options with the correct |
485
|
|
|
|
|
|
|
program command. In the globals @program_params and |
486
|
|
|
|
|
|
|
@program_switches, specify elements as 'prefix1|param' and |
487
|
|
|
|
|
|
|
'prefix1|switch', etc. |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=cut |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
sub _register_program_commands { |
492
|
1
|
|
|
1
|
|
2
|
my ($self, $commands, $prefixes) = @_; |
493
|
1
|
|
|
|
|
3
|
$self->{'_options'}->{'_commands'} = $commands; |
494
|
1
|
|
|
|
|
3
|
$self->{'_options'}->{'_prefixes'} = $prefixes; |
495
|
1
|
|
|
|
|
2
|
return 1; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 _set_program_options |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Title : _set_program_options |
501
|
|
|
|
|
|
|
Usage : $assembler->_set_program_options( \@ args ); |
502
|
|
|
|
|
|
|
Function: Register the parameters and flags that an assembler takes. |
503
|
|
|
|
|
|
|
Returns : 1 for success |
504
|
|
|
|
|
|
|
Args : - arguments passed by the user |
505
|
|
|
|
|
|
|
- parameters that the program accepts, optional (default: none) |
506
|
|
|
|
|
|
|
- switches that the program accepts, optional (default: none) |
507
|
|
|
|
|
|
|
- parameter translation, optional (default: no translation occurs) |
508
|
|
|
|
|
|
|
- dash option for the program parameters, [1|single|double|mixed], |
509
|
|
|
|
|
|
|
optional (default: yes, use single dashes only) |
510
|
|
|
|
|
|
|
- join, optional (default: ' ') |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=cut |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
sub _set_program_options { |
515
|
11
|
|
|
11
|
|
41
|
my ($self, $args, $params, $switches, $translation, $qual_param, $use_dash, $join) = @_; |
516
|
|
|
|
|
|
|
# I think we need to filter on the basis of -command here... |
517
|
11
|
|
|
|
|
41
|
my %args = @$args; |
518
|
11
|
|
66
|
|
|
67
|
my $cmd = $args{'-command'} || $args{'command'}; |
519
|
11
|
100
|
|
|
|
40
|
if ($cmd) { |
520
|
2
|
|
|
|
|
5
|
my (@p,@s, %x); |
521
|
2
|
50
|
|
|
|
9
|
$self->warn('Command found, but no commands registered; invoke _register_program_commands') unless $self->{'_options'}->{'_commands'}; |
522
|
2
|
50
|
|
|
|
3
|
$self->throw("Command '$cmd' not registered") unless grep /^$cmd$/, @{$self->{'_options'}->{'_commands'}}; |
|
2
|
|
|
|
|
53
|
|
523
|
2
|
50
|
|
|
|
9
|
if ($self->{'_options'}->{'_prefixes'}) { |
524
|
2
|
|
|
|
|
5
|
$cmd = $self->{'_options'}->{'_prefixes'}->{$cmd}; |
525
|
|
|
|
|
|
|
} # else, the command is its own prefix |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# problem here: if a param/switch does not have a prefix (pfx|), then |
528
|
|
|
|
|
|
|
# should probably allow it to pass thru... |
529
|
2
|
50
|
|
|
|
66
|
@p = (grep(!/^.*?\|/, @$params), $cmd ? grep(/^${cmd}\|/, @$params) : ()); |
530
|
2
|
50
|
|
|
|
26
|
@s = (grep(!/^.*?\|/, @$switches), $cmd ? grep(/^${cmd}\|/, @$switches) : ()); |
531
|
2
|
|
|
|
|
15
|
s/.*?\|// for @p; |
532
|
2
|
|
|
|
|
8
|
s/.*?\|// for @s; |
533
|
2
|
|
|
|
|
22
|
@x{@p, @s} = @{$translation}{ |
534
|
2
|
50
|
|
|
|
105
|
grep( !/^.*?\|/, @$params, @$switches), |
535
|
|
|
|
|
|
|
$cmd ? grep(/^${cmd}\|/, @$params, @$switches) : () }; |
536
|
2
|
|
|
|
|
4
|
$translation = \%x; |
537
|
2
|
|
|
|
|
4
|
$params = \@p; |
538
|
2
|
|
|
|
|
3
|
$switches = \@s; |
539
|
|
|
|
|
|
|
} |
540
|
11
|
|
|
|
|
71
|
$self->{'_options'}->{'_params'} = $params; |
541
|
11
|
|
|
|
|
26
|
$self->{'_options'}->{'_switches'} = $switches; |
542
|
11
|
|
|
|
|
24
|
$self->{'_options'}->{'_translation'} = $translation; |
543
|
11
|
|
|
|
|
25
|
$self->{'_options'}->{'_qual_param'} = $qual_param; |
544
|
11
|
50
|
|
|
|
33
|
if (not defined $use_dash) { |
545
|
0
|
|
|
|
|
0
|
$self->{'_options'}->{'_dash'} = 1; |
546
|
|
|
|
|
|
|
} else { |
547
|
11
|
|
|
|
|
24
|
$self->{'_options'}->{'_dash'} = $use_dash; |
548
|
|
|
|
|
|
|
} |
549
|
11
|
50
|
|
|
|
39
|
if (not defined $join) { |
550
|
0
|
|
|
|
|
0
|
$self->{'_options'}->{'_join'} = ' '; |
551
|
|
|
|
|
|
|
} else { |
552
|
11
|
|
|
|
|
23
|
$self->{'_options'}->{'_join'} = $join; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
# if there is a parameter 'command' in @program_params, and |
555
|
|
|
|
|
|
|
# new is called with new( -command => $cmd, ... ), then |
556
|
|
|
|
|
|
|
# _set_from_args will create an accessor $self->command containing |
557
|
|
|
|
|
|
|
# the value $cmd... |
558
|
11
|
|
|
|
|
148
|
$self->_set_from_args( |
559
|
|
|
|
|
|
|
$args, |
560
|
|
|
|
|
|
|
-methods => [ @$params, @$switches, 'program_name', 'program_dir', 'out_type' ], |
561
|
|
|
|
|
|
|
-create => 1, |
562
|
|
|
|
|
|
|
# when our parms are accessed, signal parameters are unchanged for |
563
|
|
|
|
|
|
|
# future reads (until set_parameters is called) |
564
|
|
|
|
|
|
|
-code => |
565
|
|
|
|
|
|
|
'my $self = shift; |
566
|
|
|
|
|
|
|
$self->parameters_changed(0); |
567
|
|
|
|
|
|
|
return $self->{\'_\'.$method} = shift if @_; |
568
|
|
|
|
|
|
|
return $self->{\'_\'.$method};' |
569
|
|
|
|
|
|
|
); |
570
|
11
|
|
|
|
|
10073
|
return 1; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
=head2 _translate_params |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
Title : _translate_params |
577
|
|
|
|
|
|
|
Usage : @options = $assembler->_translate_params( ); |
578
|
|
|
|
|
|
|
Function: Translate the Bioperl arguments into the arguments to pass to the |
579
|
|
|
|
|
|
|
assembler on the command line |
580
|
|
|
|
|
|
|
Returns : Arrayref of arguments |
581
|
|
|
|
|
|
|
Args : none |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=cut |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
sub _translate_params { |
586
|
1
|
|
|
1
|
|
2313
|
my ($self) = @_; |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
# Get option string |
589
|
1
|
|
|
|
|
5
|
my $params = $self->{'_options'}->{'_params'}; |
590
|
1
|
|
|
|
|
4
|
my $switches = $self->{'_options'}->{'_switches'}; |
591
|
1
|
|
|
|
|
4
|
my $join = $self->{'_options'}->{'_join'}; |
592
|
1
|
|
|
|
|
3
|
my $dash = $self->{'_options'}->{'_dash'}; |
593
|
1
|
|
|
|
|
4
|
my $translat = $self->{'_options'}->{'_translation'}; |
594
|
|
|
|
|
|
|
# patch to access the multiple dash choices of _setparams... |
595
|
1
|
|
|
|
|
3
|
my @dash_args; |
596
|
1
|
|
50
|
|
|
6
|
$dash ||= 1; # default as advertised |
597
|
1
|
|
|
|
|
4
|
for ($dash) { |
598
|
1
|
50
|
|
|
|
6
|
$_ == 1 && do { |
599
|
1
|
|
|
|
|
4
|
@dash_args = ( -dash => 1 ); |
600
|
1
|
|
|
|
|
4
|
last; |
601
|
|
|
|
|
|
|
}; |
602
|
0
|
0
|
|
|
|
0
|
/^s/ && do { #single dash only |
603
|
0
|
|
|
|
|
0
|
@dash_args = ( -dash => 1); |
604
|
0
|
|
|
|
|
0
|
last; |
605
|
|
|
|
|
|
|
}; |
606
|
0
|
0
|
|
|
|
0
|
/^d/ && do { # double dash only |
607
|
0
|
|
|
|
|
0
|
@dash_args = ( -double_dash => 1); |
608
|
0
|
|
|
|
|
0
|
last; |
609
|
|
|
|
|
|
|
}; |
610
|
0
|
0
|
|
|
|
0
|
/^m/ && do { # mixed dash: one-letter opts get -, |
611
|
|
|
|
|
|
|
# long opts get -- |
612
|
0
|
|
|
|
|
0
|
@dash_args = ( -mixed_dash => 1); |
613
|
0
|
|
|
|
|
0
|
last; |
614
|
|
|
|
|
|
|
}; |
615
|
0
|
|
|
|
|
0
|
do { |
616
|
0
|
|
|
|
|
0
|
$self->warn( "Dash spec '$dash' not recognized; using 'single'" ); |
617
|
0
|
|
|
|
|
0
|
@dash_args = ( -dash => 1 ); |
618
|
|
|
|
|
|
|
}; |
619
|
|
|
|
|
|
|
} |
620
|
1
|
|
|
|
|
16
|
my $options = $self->_setparams( |
621
|
|
|
|
|
|
|
-params => $params, |
622
|
|
|
|
|
|
|
-switches => $switches, |
623
|
|
|
|
|
|
|
-join => $join, |
624
|
|
|
|
|
|
|
@dash_args |
625
|
|
|
|
|
|
|
); |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
# Translate options |
628
|
1
|
|
|
|
|
42
|
my @options = split(/(\s|$join)/, $options); |
629
|
1
|
|
|
|
|
6
|
for (my $i = 0; $i < scalar @options; $i++) { |
630
|
12
|
|
|
|
|
36
|
my ($prefix, $name) = ( $options[$i] =~ m/^(-{0,2})(.+)$/ ); |
631
|
12
|
100
|
|
|
|
20
|
if (defined $name) { |
632
|
11
|
100
|
|
|
|
29
|
if ($name =~ /command/i) { |
|
|
100
|
|
|
|
|
|
633
|
1
|
|
|
|
|
3
|
$name = $options[$i+2]; # get the command |
634
|
1
|
|
|
|
|
3
|
splice @options, $i, 4; |
635
|
1
|
|
|
|
|
3
|
unshift @options, $name; # put it first |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
elsif (defined $$translat{$name}) { |
638
|
3
|
|
|
|
|
9
|
$options[$i] = $prefix.$$translat{$name}; |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
else { |
642
|
1
|
|
|
|
|
2
|
splice @options, $i, 1; |
643
|
1
|
|
|
|
|
4
|
$i--; |
644
|
|
|
|
|
|
|
} |
645
|
|
|
|
|
|
|
} |
646
|
1
|
|
|
|
|
5
|
$options = join('', @options); |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
# this is a kludge for mixed options: the reason mixed doesn't |
649
|
|
|
|
|
|
|
# work right on the pass through _setparams is that the |
650
|
|
|
|
|
|
|
# *aliases* and not the actual params are passed to it. |
651
|
|
|
|
|
|
|
# here we just rejigger the dashes |
652
|
1
|
50
|
|
|
|
3
|
if ($dash =~ /^m/) { |
653
|
0
|
|
|
|
|
0
|
$options =~ s/--([a-z0-9](?:\s|$))/-$1/gi; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# Now arrayify the options |
657
|
1
|
|
|
|
|
4
|
@options = split(' ', $options); |
658
|
|
|
|
|
|
|
|
659
|
1
|
|
|
|
|
5
|
return \@options; |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=head2 _prepare_input_sequences |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
Title : _prepare_input_sequences |
666
|
|
|
|
|
|
|
Usage : ($seqs, $quals) = $assembler->_prepare_input_sequences(\@seqs, \@quals); |
667
|
|
|
|
|
|
|
Function: Do something to the input sequence and qual objects. By default, |
668
|
|
|
|
|
|
|
nothing happens. Overload this method in the specific assembly module |
669
|
|
|
|
|
|
|
if processing of the sequences is needed (e.g. as in the |
670
|
|
|
|
|
|
|
TigrAssembler module). |
671
|
|
|
|
|
|
|
Returns : - sequence input |
672
|
|
|
|
|
|
|
- optional quality score input |
673
|
|
|
|
|
|
|
Args : - sequence input (FASTA file or sequence object arrayref) |
674
|
|
|
|
|
|
|
- optional quality score input (QUAL file or quality score object |
675
|
|
|
|
|
|
|
arrayref) |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=cut |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
sub _prepare_input_sequences { |
680
|
0
|
|
|
0
|
|
0
|
my ($self, $seqs, $quals) = @_; |
681
|
0
|
|
|
|
|
0
|
return $seqs, $quals; |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
=head2 _collate_subcmd_args() |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
Title : _collate_subcmd_args |
687
|
|
|
|
|
|
|
Usage : $args_hash = $self->_collate_subcmd_args |
688
|
|
|
|
|
|
|
Function: collate parameters and switches into command-specific |
689
|
|
|
|
|
|
|
arg lists for passing to new() |
690
|
|
|
|
|
|
|
Returns : hash of named argument lists |
691
|
|
|
|
|
|
|
Args : [optional] composite cmd prefix (scalar string) |
692
|
|
|
|
|
|
|
[default is 'run'] |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
=cut |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
sub _collate_subcmd_args { |
697
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
698
|
0
|
|
|
|
|
0
|
my $cmd = shift; |
699
|
0
|
|
|
|
|
0
|
my %ret; |
700
|
|
|
|
|
|
|
# default command is 'run' |
701
|
0
|
|
0
|
|
|
0
|
$cmd ||= 'run'; |
702
|
0
|
|
|
|
|
0
|
my @subcmds = @{$self->{'_options'}->{'_composite_commands'}->{$cmd}}; |
|
0
|
|
|
|
|
0
|
|
703
|
0
|
|
|
|
|
0
|
my %subcmds; |
704
|
0
|
|
|
|
|
0
|
my $cur_options = $self->{'_options'}; |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
# collate |
707
|
0
|
|
|
|
|
0
|
foreach my $subcmd (@subcmds) { |
708
|
|
|
|
|
|
|
# find the composite cmd form of the argument in |
709
|
|
|
|
|
|
|
# the current params and switches |
710
|
|
|
|
|
|
|
# e.g., map_max_mismatches |
711
|
0
|
|
|
|
|
0
|
my @params = grep /^${subcmd}_/, @{$$cur_options{'_params'}}; |
|
0
|
|
|
|
|
0
|
|
712
|
0
|
|
|
|
|
0
|
my @switches = grep /^${subcmd}_/, @{$$cur_options{'_switches'}}; |
|
0
|
|
|
|
|
0
|
|
713
|
0
|
|
|
|
|
0
|
$ret{$subcmd} = []; |
714
|
|
|
|
|
|
|
# create an argument list suitable for passing to new() of |
715
|
|
|
|
|
|
|
# the subcommand factory... |
716
|
0
|
|
|
|
|
0
|
foreach my $opt (@params, @switches) { |
717
|
0
|
|
|
|
|
0
|
my $subopt = $opt; |
718
|
0
|
|
|
|
|
0
|
$subopt =~ s/^${subcmd}_//; |
719
|
0
|
0
|
|
|
|
0
|
push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt; |
|
0
|
|
|
|
|
0
|
|
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
} |
722
|
0
|
|
|
|
|
0
|
return \%ret; |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head2 run |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Title : run |
728
|
|
|
|
|
|
|
Usage : $assembly = $assembler->run(\@seqs, \@quals); |
729
|
|
|
|
|
|
|
or |
730
|
|
|
|
|
|
|
$assembly = $assembler->run($fasta_file, $qual_file); |
731
|
|
|
|
|
|
|
Function: Run the assembler. The specific assembler wrapper needs to provide |
732
|
|
|
|
|
|
|
the $assembler->_run() method. |
733
|
|
|
|
|
|
|
Returns : Assembly results (file, IO object or Assembly object) |
734
|
|
|
|
|
|
|
Args : - sequence input (FASTA file or sequence object arrayref) |
735
|
|
|
|
|
|
|
- optional quality score input (QUAL file or quality score object |
736
|
|
|
|
|
|
|
arrayref) |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
=cut |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
sub run { |
741
|
0
|
|
|
0
|
1
|
0
|
my ($self, $seqs, $quals) = @_; |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
# Sanity checks |
744
|
0
|
|
|
|
|
0
|
$self->_check_executable(); |
745
|
0
|
|
|
|
|
0
|
$self->_check_sequence_input($seqs); |
746
|
0
|
|
|
|
|
0
|
$self->_check_optional_quality_input($quals); |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
# Process objects if needed |
749
|
0
|
|
|
|
|
0
|
$self->_prepare_input_sequences($seqs, $quals); |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
# Write input files |
752
|
0
|
|
|
|
|
0
|
my ($fasta_file, $qual_file) = $self->_prepare_input_files($seqs,$quals); |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# If needed, set the program argument for a QUAL file |
755
|
0
|
|
|
|
|
0
|
my $qual_param = $self->{'_options'}->{'_qual_param'}; |
756
|
0
|
0
|
|
|
|
0
|
if (defined $qual_param) { |
757
|
0
|
0
|
|
|
|
0
|
if ($qual_file) { |
758
|
|
|
|
|
|
|
# Set the quality input parameter |
759
|
0
|
|
|
|
|
0
|
$quals = $self->$qual_param($qual_file); |
760
|
|
|
|
|
|
|
} else { |
761
|
|
|
|
|
|
|
# Remove the quality input parameter |
762
|
0
|
|
|
|
|
0
|
$quals = $self->$qual_param(undef); |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
# Assemble |
767
|
0
|
|
|
|
|
0
|
my $output_file = $self->_run($fasta_file, $qual_file); |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
# Export results in desired object type |
770
|
0
|
|
|
|
|
0
|
my $asm = $self->_export_results($output_file); |
771
|
0
|
|
|
|
|
0
|
return $asm; |
772
|
|
|
|
|
|
|
} |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=head1 Bio:ParameterBaseI compliance |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=head2 set_parameters() |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
Title : set_parameters |
779
|
|
|
|
|
|
|
Usage : $pobj->set_parameters(%params); |
780
|
|
|
|
|
|
|
Function: sets the parameters listed in the hash or array |
781
|
|
|
|
|
|
|
Returns : true on success |
782
|
|
|
|
|
|
|
Args : [optional] hash or array of parameter/values. |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
=cut |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
sub set_parameters { |
787
|
3
|
|
|
3
|
1
|
545
|
my ($self, @args) = @_; |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
# currently stored stuff |
790
|
3
|
|
|
|
|
10
|
my $opts = $self->{'_options'}; |
791
|
3
|
|
|
|
|
9
|
my $params = $opts->{'_params'}; |
792
|
3
|
|
|
|
|
7
|
my $switches = $opts->{'_switches'}; |
793
|
3
|
|
|
|
|
8
|
my $translation = $opts->{'_translation'}; |
794
|
3
|
|
|
|
|
9
|
my $qual_param = $opts->{'_qual_param'}; |
795
|
3
|
|
|
|
|
8
|
my $use_dash = $opts->{'_dash'}; |
796
|
3
|
|
|
|
|
7
|
my $join = $opts->{'_join'}; |
797
|
|
|
|
|
|
|
|
798
|
3
|
|
|
|
|
16
|
$self->_set_program_options(\@args, $params, $switches, $translation, |
799
|
|
|
|
|
|
|
$qual_param, $use_dash, $join); |
800
|
|
|
|
|
|
|
# the question is, are previously-set parameters left alone when |
801
|
|
|
|
|
|
|
# not specified in @args? |
802
|
3
|
|
|
|
|
7
|
$self->parameters_changed(1); |
803
|
3
|
|
|
|
|
12
|
return 1; |
804
|
|
|
|
|
|
|
} |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=head2 reset_parameters() |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
Title : reset_parameters |
809
|
|
|
|
|
|
|
Usage : resets values |
810
|
|
|
|
|
|
|
Function: resets parameters to either undef or value in passed hash |
811
|
|
|
|
|
|
|
Returns : none |
812
|
|
|
|
|
|
|
Args : [optional] hash of parameter-value pairs |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
=cut |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
sub reset_parameters { |
817
|
1
|
|
|
1
|
1
|
548
|
my ($self, @args) = @_; |
818
|
|
|
|
|
|
|
|
819
|
1
|
|
|
|
|
4
|
my @reset_args; |
820
|
|
|
|
|
|
|
# currently stored stuff |
821
|
1
|
|
|
|
|
3
|
my $opts = $self->{'_options'}; |
822
|
1
|
|
|
|
|
4
|
my $params = $opts->{'_params'}; |
823
|
1
|
|
|
|
|
3
|
my $switches = $opts->{'_switches'}; |
824
|
1
|
|
|
|
|
4
|
my $translation = $opts->{'_translation'}; |
825
|
1
|
|
|
|
|
3
|
my $qual_param = $opts->{'_qual_param'}; |
826
|
1
|
|
|
|
|
3
|
my $use_dash = $opts->{'_dash'}; |
827
|
1
|
|
|
|
|
3
|
my $join = $opts->{'_join'}; |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
# don't like this, b/c _set_program_args will create a bunch of |
830
|
|
|
|
|
|
|
# accessors with undef values, but oh well for now /maj |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
# Is better to use hashes than arrays, to use their unique keys |
833
|
1
|
|
|
|
|
5
|
my %reset_args = @args; |
834
|
1
|
|
|
|
|
26
|
foreach my $p (@$params) { |
835
|
7
|
100
|
|
|
|
15
|
if (not exists $reset_args{"-$p"}) { |
836
|
6
|
|
|
|
|
14
|
$reset_args{"-$p"} = undef; |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
} |
839
|
1
|
|
|
|
|
11
|
foreach my $s (@$switches) { |
840
|
2
|
50
|
|
|
|
6
|
if (not exists $reset_args{"-$s"}) { |
841
|
2
|
|
|
|
|
5
|
$reset_args{"-$s"} = undef; |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
} |
844
|
1
|
|
|
|
|
5
|
while (my ($method, $value) = each %reset_args) { |
845
|
9
|
|
|
|
|
22
|
push(@reset_args, $method => $value); |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
|
848
|
1
|
|
|
|
|
5
|
$self->_set_program_options(\@reset_args, $params, $switches, $translation, |
849
|
|
|
|
|
|
|
$qual_param, $use_dash, $join); |
850
|
1
|
|
|
|
|
2
|
$self->parameters_changed(1); |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
=head2 parameters_changed() |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
Title : parameters_changed |
856
|
|
|
|
|
|
|
Usage : if ($pobj->parameters_changed) {...} |
857
|
|
|
|
|
|
|
Function: Returns boolean true (1) if parameters have changed |
858
|
|
|
|
|
|
|
Returns : Boolean (0 or 1) |
859
|
|
|
|
|
|
|
Args : [optional] Boolean |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
=cut |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
sub parameters_changed { |
864
|
265
|
|
|
265
|
1
|
72657
|
my $self = shift; |
865
|
265
|
100
|
|
|
|
3886
|
return $self->{'_parameters_changed'} = shift if @_; |
866
|
5
|
|
|
|
|
31
|
return $self->{'_parameters_changed'}; |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
=head2 available_parameters() |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
Title : available_parameters |
872
|
|
|
|
|
|
|
Usage : @params = $pobj->available_parameters() |
873
|
|
|
|
|
|
|
Function: Returns a list of the available parameters |
874
|
|
|
|
|
|
|
Returns : Array of parameters |
875
|
|
|
|
|
|
|
Args : 'params' for settable program parameters |
876
|
|
|
|
|
|
|
'switches' for boolean program switches |
877
|
|
|
|
|
|
|
default: all |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
=cut |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
sub available_parameters { |
882
|
3
|
|
|
3
|
1
|
8
|
my $self = shift; |
883
|
3
|
|
|
|
|
7
|
my $subset = shift; |
884
|
3
|
|
|
|
|
10
|
my $opts = $self->{'_options'}; |
885
|
3
|
|
|
|
|
6
|
my @ret; |
886
|
3
|
|
|
|
|
9
|
for ($subset) { |
887
|
3
|
100
|
66
|
|
|
20
|
(!defined || /^a/) && do { |
888
|
1
|
|
|
|
|
3
|
@ret = (@{$opts->{'_params'}}, @{$opts->{'_switches'}}); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
5
|
|
889
|
1
|
|
|
|
|
3
|
last; |
890
|
|
|
|
|
|
|
}; |
891
|
2
|
100
|
|
|
|
11
|
m/^p/i && do { |
892
|
1
|
|
|
|
|
2
|
@ret = @{$opts->{'_params'}}; |
|
1
|
|
|
|
|
4
|
|
893
|
1
|
|
|
|
|
2
|
last; |
894
|
|
|
|
|
|
|
}; |
895
|
1
|
50
|
|
|
|
10
|
m/^s/i && do { |
896
|
1
|
|
|
|
|
2
|
@ret = @{$opts->{'_switches'}}; |
|
1
|
|
|
|
|
4
|
|
897
|
1
|
|
|
|
|
2
|
last; |
898
|
|
|
|
|
|
|
}; |
899
|
0
|
|
|
|
|
0
|
do { #fail |
900
|
0
|
|
|
|
|
0
|
$self->throw("available_parameters: unrecognized subset"); |
901
|
|
|
|
|
|
|
}; |
902
|
|
|
|
|
|
|
} |
903
|
3
|
|
|
|
|
19
|
return @ret; |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
=head2 get_parameters() |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
Title : get_parameters |
909
|
|
|
|
|
|
|
Usage : %params = $pobj->get_parameters; |
910
|
|
|
|
|
|
|
Function: Returns list of key-value pairs of parameter => value |
911
|
|
|
|
|
|
|
Returns : List of key-value pairs |
912
|
|
|
|
|
|
|
Args : [optional] A string is allowed if subsets are wanted or (if a |
913
|
|
|
|
|
|
|
parameter subset is default) 'all' to return all parameters |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=cut |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
sub get_parameters { |
918
|
1
|
|
|
1
|
1
|
4
|
my $self = shift; |
919
|
1
|
|
|
|
|
3
|
my $subset = shift; |
920
|
1
|
|
50
|
|
|
10
|
$subset ||= 'all'; |
921
|
1
|
|
|
|
|
3
|
my @ret; |
922
|
1
|
|
|
|
|
4
|
my $opts = $self->{'_options'}; |
923
|
1
|
|
|
|
|
4
|
for ($subset) { |
924
|
1
|
50
|
|
|
|
6
|
m/^p/i && do { #params only |
925
|
0
|
|
|
|
|
0
|
for (@{$opts->{'_params'}}) { |
|
0
|
|
|
|
|
0
|
|
926
|
0
|
0
|
0
|
|
|
0
|
push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; |
927
|
|
|
|
|
|
|
} |
928
|
0
|
|
|
|
|
0
|
last; |
929
|
|
|
|
|
|
|
}; |
930
|
1
|
50
|
|
|
|
5
|
m/^s/i && do { #switches only |
931
|
0
|
|
|
|
|
0
|
for (@{$opts->{'_switches'}}) { |
|
0
|
|
|
|
|
0
|
|
932
|
0
|
0
|
0
|
|
|
0
|
push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; |
933
|
|
|
|
|
|
|
} |
934
|
0
|
|
|
|
|
0
|
last; |
935
|
|
|
|
|
|
|
}; |
936
|
1
|
50
|
|
|
|
6
|
m/^a/i && do { # all |
937
|
1
|
|
|
|
|
3
|
for (@{$opts->{'_params'}},@{$opts->{'_switches'}}) { |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
938
|
9
|
100
|
66
|
|
|
185
|
push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_; |
939
|
|
|
|
|
|
|
} |
940
|
1
|
|
|
|
|
7
|
last; |
941
|
|
|
|
|
|
|
}; |
942
|
0
|
|
|
|
|
0
|
do { |
943
|
0
|
|
|
|
|
0
|
$self->throw("get_parameters: unrecognized subset"); |
944
|
|
|
|
|
|
|
}; |
945
|
|
|
|
|
|
|
} |
946
|
1
|
|
|
|
|
6
|
return @ret; |
947
|
|
|
|
|
|
|
} |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
1; |