File Coverage

blib/lib/Bio/Tools/Run/AssemblerBase.pm
Criterion Covered Total %
statement 155 324 47.8
branch 39 126 30.9
condition 8 39 20.5
subroutine 16 27 59.2
pod 9 9 100.0
total 227 525 43.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::AssemblerBase
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Florent Angly
7             #
8             # Copyright Florent Angly
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::AssemblerBase - base class for wrapping external assemblers
17              
18             =head1 SYNOPSIS
19              
20             Give standard usage here
21              
22             =head1 DESCRIPTION
23              
24             Describe the object here
25             # use of globals for configuration...
26             # I've created the separate Config.pm module, and 'use'd it in the
27             # main module, for instance...
28             # other configuration globals:
29             # $use_dash = [1|single|double|mixed]
30              
31             =head1 FEEDBACK
32              
33             =head2 Mailing Lists
34              
35             User feedback is an integral part of the evolution of this and other
36             Bioperl modules. Send your comments and suggestions preferably to
37             the Bioperl mailing list. Your participation is much appreciated.
38              
39             bioperl-l@bioperl.org - General discussion
40             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
41              
42             =head2 Support
43              
44             Please direct usage questions or support issues to the mailing list:
45              
46             L
47              
48             rather than to the module maintainer directly. Many experienced and
49             reponsive experts will be able look at the problem and quickly
50             address it. Please include a thorough description of the problem
51             with code and data examples if at all possible.
52              
53             =head2 Reporting Bugs
54              
55             Report bugs to the Bioperl bug tracking system to help us keep track
56             of the bugs and their resolution. Bug reports can be submitted via
57             the web:
58              
59             http://redmine.open-bio.org/projects/bioperl/
60              
61             =head1 AUTHOR - Florent Angly
62              
63             Email florent dot angly at gmail dot com
64              
65             =head1 CONTRIBUTORS
66              
67             Mark A. Jensen - maj -at- fortinbras -dot- us
68              
69             =head1 APPENDIX
70              
71             The rest of the documentation details each of the object methods.
72             Internal methods are usually preceded with a _
73              
74             =cut
75              
76             package Bio::Tools::Run::AssemblerBase;
77              
78 8     8   85050 use strict;
  8         12  
  8         191  
79 8     8   3574 use Bio::SeqIO;
  8         162349  
  8         135  
80 8     8   3412 use Bio::Assembly::IO;
  8         6876  
  8         99  
81              
82 8     8   161 use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase Bio::ParameterBaseI);
  8         9  
  8         7751  
83              
84             our $default_out_type = 'Bio::Assembly::ScaffoldI';
85              
86             =head2 program_name
87              
88             Title : program_name
89             Usage : $assembler>program_name()
90             Function: get/set the executable name
91             Returns: string
92             Args : string
93              
94             =cut
95              
96             sub program_name {
97 71     71 1 1736 my ($self, $val) = @_;
98 71 100       128 $self->{'_program_name'} = $val if $val;
99 71         293 return $self->{'_program_name'};
100             }
101              
102              
103             =head2 program_dir
104              
105             Title : program_dir
106             Usage : $assembler->program_dir()
107             Function: get/set the program dir
108             Returns: string
109             Args : string
110              
111             =cut
112              
113             sub program_dir {
114 41     41 1 45 my ($self, $val) = @_;
115 41 100       79 $self->{'_program_dir'} = $val if $val;
116 41         124 return $self->{'_program_dir'};
117             }
118              
119              
120             =head2 out_type
121              
122             Title : out_type
123             Usage : $assembler->out_type('Bio::Assembly::ScaffoldI')
124             Function: get/set the desired type of output
125             Returns : The type of results to return
126             Args : Type of results to return (optional):
127             'Bio::Assembly::IO' object
128             'Bio::Assembly::ScaffoldI' object (default)
129             The name of a file to save the results in
130              
131             =cut
132              
133             sub out_type {
134 0     0 1 0 my ($self, $val) = @_;
135 0 0       0 if (defined $val) {
136 0         0 $self->{'_out_type'} = $val;
137             } else {
138 0 0       0 if (not defined $self->{'_out_type'}) {
139 0         0 $self->{'_out_type'} = $default_out_type;
140             }
141             }
142 0         0 return $self->{'_out_type'};
143             }
144              
145              
146             =head2 _assembly_format
147              
148             Title : _assembly_format
149             Usage : $assembler->_assembly_format('ace')
150             Function: get/set the driver to use to parse the assembly results
151             Returns : the driver to use to parse the assembly results
152             Args : the driver to use to parse the assembly results (optional)
153              
154             =cut
155              
156             sub _assembly_format {
157 9     9   12 my ($self, $asm_format) = @_;
158 9 50       27 if (defined $asm_format) {
159 9         17 $self->{'_assembly_format'} = $asm_format;
160             }
161 9         17 return $self->{'_assembly_format'};
162             }
163              
164              
165             =head2 _assembly_variant
166              
167             Title : _assembly_variant
168             Usage : $assembler->_assembly_variant('454')
169             Function: get/set the driver variant to use to parse the assembly results. For
170             example, the ACE format has the ACE-454 and the ACE-consed variants
171             Returns : the driver variant to use to parse the assembly results
172             Args : the driver variant to use to parse the assembly results (optional)
173              
174             =cut
175              
176             sub _assembly_variant {
177 1     1   1 my ($self, $asm_variant) = @_;
178 1 50       4 if (defined $asm_variant) {
179 1         2 $self->{'_assembly_variant'} = $asm_variant;
180             }
181 1         2 return $self->{'_assembly_variant'};
182             }
183              
184              
185             =head2 _check_executable
186              
187             Title : _check_executable
188             Usage : $assembler->_check_executable()
189             Function: Verifies that the program executable can be found, or throw an error.
190             Returns: 1 for success
191             Args : -
192              
193             =cut
194              
195             sub _check_executable {
196 0     0   0 my ($self) = @_;
197 0 0       0 if (not defined $self->executable()) {
198 0         0 $self->throw("Could not find the executable '".$self->program_name()."'. ".
199             'You can use $self->program_dir() and $self->program_name() to '.
200             "specify the location of the program.");
201             }
202 0         0 return 1;
203             }
204              
205             =head2 _check_sequence_input
206              
207             Title : _check_sequence_input
208             Usage : $assembler->_check_sequence_input($seqs)
209             Function: Check that the sequence input is a valid file, or an arrayref of
210             sequence objects (Bio::PrimarySeqI or Bio::SeqI). If not, an
211             exception is thrown.
212             Returns : 1 if the check passed
213             Args : sequence input
214              
215             =cut
216              
217             sub _check_sequence_input {
218 0     0   0 my ($self, $seqs) = @_;
219 0 0       0 if (not $seqs) {
220 0         0 $self->throw("Must supply sequences as a FASTA filename or a sequence object".
221             " (Bio::PrimarySeqI or Bio::SeqI) array reference");
222             } else {
223 0 0       0 if (ref($seqs) =~ m/ARRAY/i ) {
224 0         0 for my $seq (@$seqs) {
225 0 0 0     0 unless ($seq->isa('Bio::PrimarySeqI') || $seq->isa('Bio::SeqI')) {
226 0         0 $self->throw("Not a valid Bio::PrimarySeqI or Bio::SeqI object");
227             }
228             }
229             } else {
230 0 0       0 if (not -f $seqs) {
231 0         0 $self->throw("Input file '$seqs' does not seem to exist.");
232             }
233             }
234             }
235 0         0 return 1;
236             }
237              
238             =head2 _check_optional_quality_input
239              
240             Title : _check_optional_quality_input
241             Usage : $assembler->_check_optional_quality_input($quals)
242             Function: If a quality score input is provided, check that it is either a
243             valid file or an arrayref of quality score objects (Bio::Seq::
244             QualI or Bio::Seq::Quality). If not, an exception is thrown.
245             Returns : 1 if the check passed (or quality score input was provided)
246             Args : quality score input
247              
248             =cut
249              
250             sub _check_optional_quality_input {
251 0     0   0 my ($self, $quals) = @_;
252 0 0       0 if (defined $quals) {
253 0 0       0 if (ref($quals) =~ m/ARRAY/i) {
254 0         0 for my $qual (@$quals) {
255 0 0 0     0 unless ($qual->isa('Bio::Seq::QualI') || $qual->isa('Bio::Seq::Quality')) {
256 0         0 $self->throw("Not a valid Bio::Seq::QualI or Bio::Seq::Quality object");
257             }
258             }
259             } else {
260 0 0       0 if (not -f $quals) {
261 0         0 $self->throw("Input file '$quals' does not seem to exist.");
262             }
263             }
264             }
265 0         0 return 1;
266             }
267              
268              
269             =head2 _prepare_input_file
270              
271             Title : _prepare_input_file
272             Usage : ($fasta_file, $qual_file) = $assembler->_prepare_input_file(\@seqs, \@quals);
273             Function: Create the input FASTA and QUAL files as needed. If the input
274             sequences are provided in a (FASTA) file, the optional input quality
275             scores are also expected to be in a (QUAL) file. If the input
276             sequences are an arrayref of bioperl sequence objects, the optional
277             input quality scores are expected to be an arrayref of bioperl
278             quality score objects, in the same order as the sequence objects.
279             Returns : - input filehandle
280             - input filename
281             Args : - sequence input (FASTA file or sequence object arrayref)
282             - optional quality score input (QUAL file or quality score object
283             arrayref)
284              
285             =cut
286              
287             sub _prepare_input_files {
288 0     0   0 my ($self, $seqs, $quals) = @_;
289             # Set up input FASTA and QUAL files
290 0         0 $self->io->_initialize_io();
291             #$self->tempdir();
292 0         0 my $fasta_file;
293             my $qual_file;
294 0 0       0 if ( ref($seqs) =~ m/ARRAY/i ) {
295             # Input sequences are an arrayref of Bioperl sequence objects
296 0 0 0     0 if (defined $quals && not ref($quals) =~ m/ARRAY/i) {
297 0         0 $self->throw("The input sequences are an arrayref of sequence objects. ".
298             "Expecting the quality scores as an arrayref of quality score objects");
299             } else {
300             # The input qualities are not defined or are an arrayref of quality objects
301             # Write temp FASTA and QUAL input files
302 0         0 ($fasta_file, $qual_file) = $self->_write_seq_file($seqs, $quals);
303             }
304             } else {
305             # Sequence input is a FASTA file
306 0         0 $fasta_file = $seqs;
307 0 0 0     0 if (defined $quals && ref($quals) =~ m/ARRAY/i) {
308             # Quality input is defined and is an arrayref of quality objects
309 0         0 $self->throw("The input sequences are in a FASTA file. Expecting the ".
310             "quality scores in a QUAL file.");
311             } else {
312             # Input quality scores is either not defined or is a QUAL file
313 0         0 $qual_file = $quals;
314             }
315             }
316 0         0 return $fasta_file, $qual_file;
317             }
318              
319              
320             =head2 _write_seq_file
321              
322             Title : _write_seq_file
323             Usage : ($fasta_file, $qual_file) = $assembler->_write_seq_file(\@seqs, \@quals)
324             Function: Write temporary FASTA and QUAL files on disk
325             Returns : name of FASTA file
326             name of QUAL file (undef if no quality scoress)
327             Args : - arrayref of sequence objects
328             - optional arrayref of quality score objects
329              
330             =cut
331              
332             sub _write_seq_file {
333 0     0   0 my ($self, $seqs, $quals) = @_;
334             # Store the sequences in temporary FASTA files
335 0         0 my $tmpdir = $self->tempdir();
336 0         0 my ($fasta_h, $fasta_file) = $self->io->tempfile( -dir => $tmpdir );
337 0         0 my ($qual_h, $qual_file ) = $self->io->tempfile( -dir => $tmpdir );
338 0         0 my $fasta_out = Bio::SeqIO->new( -fh => $fasta_h , -format => 'fasta');
339 0         0 my $qual_out = Bio::SeqIO->new( -fh => $qual_h , -format => 'qual' );
340 0         0 my $use_qual_file = 0;
341 0         0 my $size = scalar @$seqs;
342 0         0 for ( my $i = 0 ; $i < $size ; $i++ ) {
343 0         0 my $seq = $$seqs[$i];
344             # Make sure that all sequences have an ID (to prevent TIGR Assembler crash)
345 0 0       0 if (not defined $seq->id) {
346 0         0 my $newid = 'tmp'.$i;
347 0         0 print $newid."\n";
348 0         0 $seq->id($newid);
349 0         0 $self->warn("A sequence had no ID. Its ID is now $newid");
350             }
351 0         0 my $seqid = $seq->id;
352             # Write the FASTA entries in files (and QUAL if appropriate)
353 0         0 $fasta_out->write_seq($seq);
354 0 0       0 if ($seq->isa('Bio::Seq::Quality')) {
355             # Quality scores embedded in seq object
356 0 0       0 if (scalar @{$seq->qual} > 0) {
  0         0  
357 0         0 $qual_out->write_seq($seq);
358 0         0 $use_qual_file = 1;
359             }
360             } else {
361             # Quality score in a different object from the sequence object
362 0         0 my $qual = $$quals[$i];
363 0 0       0 if (defined $qual) {
364 0         0 my $qualid = $qual->id;
365 0 0       0 if ($qualid eq $seqid) {
366             # valid quality score information
367 0         0 $qual_out->write_seq($qual);
368 0         0 $use_qual_file = 1;
369             } else {
370             # ID mismatch between sequence and quality score
371 0         0 $self->warn("Sequence object with ID $seqid does not match quality ".
372             "score object with ID $qualid");
373             }
374             }
375             }
376             }
377 0         0 close($fasta_h);
378 0         0 close($qual_h);
379 0         0 $fasta_out->close();
380 0         0 $qual_out->close();
381 0 0       0 return undef if scalar @$seqs <= 0;
382 0 0       0 $qual_file = undef if $use_qual_file == 0;
383 0         0 return $fasta_file, $qual_file;
384             }
385              
386              
387             =head2 _prepare_output_file
388              
389             Title : _prepare_output_file
390             Usage : ($out_fh, $out_file) = $assembler->_prepare_output_file( );
391             Function: Prepare the output file
392             Returns : - output filehandle
393             - output filename
394             Args : none
395              
396             =cut
397              
398             sub _prepare_output_file {
399 0     0   0 my ($self) = @_;
400 0         0 my ($output_fh, $output_file);
401 0         0 my $out_type = $self->out_type();
402 0 0 0     0 if ( (not $out_type eq 'Bio::Assembly::ScaffoldI') &&
403             (not $out_type eq 'Bio::Assembly::IO' ) ) {
404             # Output is a file with specified name
405 0         0 $output_file = $out_type;
406 0 0       0 open $output_fh, '>', $output_file or $self->throw("Could not write file ".
407             "'$output_file': $!");
408             } else {
409 0         0 ( $output_fh, $output_file ) = $self->io->tempfile( -dir => $self->tempdir() );
410             }
411 0         0 $self->outfile_name($output_file);
412 0         0 return $output_fh, $output_file;
413             }
414              
415             =head2 _export_results
416              
417             Title : _export_results
418             Usage : $results = $assembler->_export_results($asm_file);
419             Function: Export the assembly results
420             Returns : Exported assembly (file or IO object or assembly object)
421             Args : -Name of the file containing an assembly
422             - -keep_asm => boolean (if true, do not unlink $asm_file)
423             -[optional] additional named args required by the B:A:IO object
424              
425             =cut
426              
427             sub _export_results {
428 0     0   0 my ($self, $asm_file, @named_args) = @_;
429 0         0 my $results;
430             my $asm_io;
431 0         0 my $asm;
432 0         0 my %args = @named_args;
433 0         0 my $keep_asm = $args{'-keep_asm'};
434 0         0 delete $args{'-keep_asm'};
435 0         0 my $out_type = $self->out_type();
436 0 0 0     0 if ( (not $out_type eq 'Bio::Assembly::ScaffoldI') &&
437             (not $out_type eq 'Bio::Assembly::IO' ) ) {
438             # Results are the assembler output file
439 0         0 $results = $asm_file;
440             } else {
441 0         0 $asm_io = Bio::Assembly::IO->new(
442             -file => "<$asm_file",
443             -format => $self->_assembly_format(),
444             -variant => $self->_assembly_variant(),
445             @named_args );
446             # this unlink is a problem for Bio::DB::Sam (in B:A:I:sam), which needs
447             # the original bam file around.
448 0 0       0 unlink $asm_file unless $keep_asm;
449 0 0       0 if ($out_type eq 'Bio::Assembly::IO') {
450             # Results are a Bio::Assembly::IO object
451 0         0 $results = $asm_io;
452             } else {
453 0         0 $asm = $asm_io->next_assembly();
454 0         0 $asm_io->close;
455 0 0       0 if ($out_type eq 'Bio::Assembly::ScaffoldI') {
456             # Results are a Bio::Assembly::Scaffold object
457 0         0 $results = $asm;
458             } else {
459 0         0 $self->throw("The return type has to be 'Bio::Assembly::IO', 'Bio::".
460             "Assembly::ScaffoldI' or a file name.");
461             }
462             }
463             }
464 0         0 $self->cleanup();
465 0         0 return $results;
466             }
467              
468              
469             =head2 _register_program_commands()
470              
471             Title : _register_program_commands
472             Usage : $assembler->_register_program_commands( \@commands, \%prefixes )
473             Function: Register the commands a program accepts (for programs that act
474             as frontends for a set of commands, each command having its own
475             set of params/switches)
476             Returns : true on success
477             Args : arrayref to a list of commands (scalar strings),
478             hashref to a translation table of the form
479             { $prefix1 => $command1, ... } [optional]
480             Note : To implement a program with this kind of calling structure,
481             include a parameter called 'command' in the
482             @program_params global
483             Note : The translation table is used to associate parameters and
484             switches specified in _set_program_options with the correct
485             program command. In the globals @program_params and
486             @program_switches, specify elements as 'prefix1|param' and
487             'prefix1|switch', etc.
488              
489             =cut
490              
491             sub _register_program_commands {
492 1     1   2 my ($self, $commands, $prefixes) = @_;
493 1         2 $self->{'_options'}->{'_commands'} = $commands;
494 1         1 $self->{'_options'}->{'_prefixes'} = $prefixes;
495 1         2 return 1;
496             }
497              
498             =head2 _set_program_options
499              
500             Title : _set_program_options
501             Usage : $assembler->_set_program_options( \@ args );
502             Function: Register the parameters and flags that an assembler takes.
503             Returns : 1 for success
504             Args : - arguments passed by the user
505             - parameters that the program accepts, optional (default: none)
506             - switches that the program accepts, optional (default: none)
507             - parameter translation, optional (default: no translation occurs)
508             - dash option for the program parameters, [1|single|double|mixed],
509             optional (default: yes, use single dashes only)
510             - join, optional (default: ' ')
511              
512             =cut
513              
514             sub _set_program_options {
515 11     11   22 my ($self, $args, $params, $switches, $translation, $qual_param, $use_dash, $join) = @_;
516             # I think we need to filter on the basis of -command here...
517 11         28 my %args = @$args;
518 11   66     54 my $cmd = $args{'-command'} || $args{'command'};
519 11 100       25 if ($cmd) {
520 2         2 my (@p,@s, %x);
521 2 50       4 $self->warn('Command found, but no commands registered; invoke _register_program_commands') unless $self->{'_options'}->{'_commands'};
522 2 50       2 $self->throw("Command '$cmd' not registered") unless grep /^$cmd$/, @{$self->{'_options'}->{'_commands'}};
  2         41  
523 2 50       5 if ($self->{'_options'}->{'_prefixes'}) {
524 2         3 $cmd = $self->{'_options'}->{'_prefixes'}->{$cmd};
525             } # else, the command is its own prefix
526              
527             # problem here: if a param/switch does not have a prefix (pfx|), then
528             # should probably allow it to pass thru...
529 2 50       60 @p = (grep(!/^.*?\|/, @$params), $cmd ? grep(/^${cmd}\|/, @$params) : ());
530 2 50       21 @s = (grep(!/^.*?\|/, @$switches), $cmd ? grep(/^${cmd}\|/, @$switches) : ());
531 2         10 s/.*?\|// for @p;
532 2         7 s/.*?\|// for @s;
533 2         15 @x{@p, @s} = @{$translation}{
534 2 50       64 grep( !/^.*?\|/, @$params, @$switches),
535             $cmd ? grep(/^${cmd}\|/, @$params, @$switches) : () };
536 2         3 $translation = \%x;
537 2         2 $params = \@p;
538 2         2 $switches = \@s;
539             }
540 11         62 $self->{'_options'}->{'_params'} = $params;
541 11         14 $self->{'_options'}->{'_switches'} = $switches;
542 11         16 $self->{'_options'}->{'_translation'} = $translation;
543 11         14 $self->{'_options'}->{'_qual_param'} = $qual_param;
544 11 50       20 if (not defined $use_dash) {
545 0         0 $self->{'_options'}->{'_dash'} = 1;
546             } else {
547 11         28 $self->{'_options'}->{'_dash'} = $use_dash;
548             }
549 11 50       21 if (not defined $join) {
550 0         0 $self->{'_options'}->{'_join'} = ' ';
551             } else {
552 11         37 $self->{'_options'}->{'_join'} = $join;
553             }
554             # if there is a parameter 'command' in @program_params, and
555             # new is called with new( -command => $cmd, ... ), then
556             # _set_from_args will create an accessor $self->command containing
557             # the value $cmd...
558 11         111 $self->_set_from_args(
559             $args,
560             -methods => [ @$params, @$switches, 'program_name', 'program_dir', 'out_type' ],
561             -create => 1,
562             # when our parms are accessed, signal parameters are unchanged for
563             # future reads (until set_parameters is called)
564             -code =>
565             'my $self = shift;
566             $self->parameters_changed(0);
567             return $self->{\'_\'.$method} = shift if @_;
568             return $self->{\'_\'.$method};'
569             );
570 11         8945 return 1;
571             }
572              
573              
574             =head2 _translate_params
575              
576             Title : _translate_params
577             Usage : @options = $assembler->_translate_params( );
578             Function: Translate the Bioperl arguments into the arguments to pass to the
579             assembler on the command line
580             Returns : Arrayref of arguments
581             Args : none
582              
583             =cut
584              
585             sub _translate_params {
586 1     1   1366 my ($self) = @_;
587              
588             # Get option string
589 1         2 my $params = $self->{'_options'}->{'_params'};
590 1         2 my $switches = $self->{'_options'}->{'_switches'};
591 1         1 my $join = $self->{'_options'}->{'_join'};
592 1         2 my $dash = $self->{'_options'}->{'_dash'};
593 1         1 my $translat = $self->{'_options'}->{'_translation'};
594             # patch to access the multiple dash choices of _setparams...
595 1         1 my @dash_args;
596 1   50     3 $dash ||= 1; # default as advertised
597 1         2 for ($dash) {
598 1 50       3 $_ == 1 && do {
599 1         3 @dash_args = ( -dash => 1 );
600 1         1 last;
601             };
602 0 0       0 /^s/ && do { #single dash only
603 0         0 @dash_args = ( -dash => 1);
604 0         0 last;
605             };
606 0 0       0 /^d/ && do { # double dash only
607 0         0 @dash_args = ( -double_dash => 1);
608 0         0 last;
609             };
610 0 0       0 /^m/ && do { # mixed dash: one-letter opts get -,
611             # long opts get --
612 0         0 @dash_args = ( -mixed_dash => 1);
613 0         0 last;
614             };
615 0         0 do {
616 0         0 $self->warn( "Dash spec '$dash' not recognized; using 'single'" );
617 0         0 @dash_args = ( -dash => 1 );
618             };
619             }
620 1         8 my $options = $self->_setparams(
621             -params => $params,
622             -switches => $switches,
623             -join => $join,
624             @dash_args
625             );
626              
627             # Translate options
628 1         25 my @options = split(/(\s|$join)/, $options);
629 1         4 for (my $i = 0; $i < scalar @options; $i++) {
630 12         26 my ($prefix, $name) = ( $options[$i] =~ m/^(-{0,2})(.+)$/ );
631 12 100       10 if (defined $name) {
632 11 100       28 if ($name =~ /command/i) {
    100          
633 1         2 $name = $options[$i+2]; # get the command
634 1         2 splice @options, $i, 4;
635 1         2 unshift @options, $name; # put it first
636             }
637             elsif (defined $$translat{$name}) {
638 3         8 $options[$i] = $prefix.$$translat{$name};
639             }
640             }
641             else {
642 1         2 splice @options, $i, 1;
643 1         3 $i--;
644             }
645             }
646 1         2 $options = join('', @options);
647              
648             # this is a kludge for mixed options: the reason mixed doesn't
649             # work right on the pass through _setparams is that the
650             # *aliases* and not the actual params are passed to it.
651             # here we just rejigger the dashes
652 1 50       4 if ($dash =~ /^m/) {
653 0         0 $options =~ s/--([a-z0-9](?:\s|$))/-$1/gi;
654             }
655              
656             # Now arrayify the options
657 1         3 @options = split(' ', $options);
658              
659 1         5 return \@options;
660             }
661              
662              
663             =head2 _prepare_input_sequences
664              
665             Title : _prepare_input_sequences
666             Usage : ($seqs, $quals) = $assembler->_prepare_input_sequences(\@seqs, \@quals);
667             Function: Do something to the input sequence and qual objects. By default,
668             nothing happens. Overload this method in the specific assembly module
669             if processing of the sequences is needed (e.g. as in the
670             TigrAssembler module).
671             Returns : - sequence input
672             - optional quality score input
673             Args : - sequence input (FASTA file or sequence object arrayref)
674             - optional quality score input (QUAL file or quality score object
675             arrayref)
676              
677             =cut
678              
679             sub _prepare_input_sequences {
680 0     0   0 my ($self, $seqs, $quals) = @_;
681 0         0 return $seqs, $quals;
682             }
683              
684             =head2 _collate_subcmd_args()
685              
686             Title : _collate_subcmd_args
687             Usage : $args_hash = $self->_collate_subcmd_args
688             Function: collate parameters and switches into command-specific
689             arg lists for passing to new()
690             Returns : hash of named argument lists
691             Args : [optional] composite cmd prefix (scalar string)
692             [default is 'run']
693              
694             =cut
695              
696             sub _collate_subcmd_args {
697 0     0   0 my $self = shift;
698 0         0 my $cmd = shift;
699 0         0 my %ret;
700             # default command is 'run'
701 0   0     0 $cmd ||= 'run';
702 0         0 my @subcmds = @{$self->{'_options'}->{'_composite_commands'}->{$cmd}};
  0         0  
703 0         0 my %subcmds;
704 0         0 my $cur_options = $self->{'_options'};
705              
706             # collate
707 0         0 foreach my $subcmd (@subcmds) {
708             # find the composite cmd form of the argument in
709             # the current params and switches
710             # e.g., map_max_mismatches
711 0         0 my @params = grep /^${subcmd}_/, @{$$cur_options{'_params'}};
  0         0  
712 0         0 my @switches = grep /^${subcmd}_/, @{$$cur_options{'_switches'}};
  0         0  
713 0         0 $ret{$subcmd} = [];
714             # create an argument list suitable for passing to new() of
715             # the subcommand factory...
716 0         0 foreach my $opt (@params, @switches) {
717 0         0 my $subopt = $opt;
718 0         0 $subopt =~ s/^${subcmd}_//;
719 0 0       0 push(@{$ret{$subcmd}}, '-'.$subopt => $self->$opt) if defined $self->$opt;
  0         0  
720             }
721             }
722 0         0 return \%ret;
723             }
724              
725             =head2 run
726              
727             Title : run
728             Usage : $assembly = $assembler->run(\@seqs, \@quals);
729             or
730             $assembly = $assembler->run($fasta_file, $qual_file);
731             Function: Run the assembler. The specific assembler wrapper needs to provide
732             the $assembler->_run() method.
733             Returns : Assembly results (file, IO object or Assembly object)
734             Args : - sequence input (FASTA file or sequence object arrayref)
735             - optional quality score input (QUAL file or quality score object
736             arrayref)
737              
738             =cut
739              
740             sub run {
741 0     0 1 0 my ($self, $seqs, $quals) = @_;
742              
743             # Sanity checks
744 0         0 $self->_check_executable();
745 0         0 $self->_check_sequence_input($seqs);
746 0         0 $self->_check_optional_quality_input($quals);
747              
748             # Process objects if needed
749 0         0 $self->_prepare_input_sequences($seqs, $quals);
750              
751             # Write input files
752 0         0 my ($fasta_file, $qual_file) = $self->_prepare_input_files($seqs,$quals);
753              
754             # If needed, set the program argument for a QUAL file
755 0         0 my $qual_param = $self->{'_options'}->{'_qual_param'};
756 0 0       0 if (defined $qual_param) {
757 0 0       0 if ($qual_file) {
758             # Set the quality input parameter
759 0         0 $quals = $self->$qual_param($qual_file);
760             } else {
761             # Remove the quality input parameter
762 0         0 $quals = $self->$qual_param(undef);
763             }
764             }
765              
766             # Assemble
767 0         0 my $output_file = $self->_run($fasta_file, $qual_file);
768              
769             # Export results in desired object type
770 0         0 my $asm = $self->_export_results($output_file);
771 0         0 return $asm;
772             }
773              
774             =head1 Bio:ParameterBaseI compliance
775              
776             =head2 set_parameters()
777              
778             Title : set_parameters
779             Usage : $pobj->set_parameters(%params);
780             Function: sets the parameters listed in the hash or array
781             Returns : true on success
782             Args : [optional] hash or array of parameter/values.
783              
784             =cut
785              
786             sub set_parameters {
787 3     3 1 320 my ($self, @args) = @_;
788              
789             # currently stored stuff
790 3         3 my $opts = $self->{'_options'};
791 3         4 my $params = $opts->{'_params'};
792 3         3 my $switches = $opts->{'_switches'};
793 3         1 my $translation = $opts->{'_translation'};
794 3         3 my $qual_param = $opts->{'_qual_param'};
795 3         3 my $use_dash = $opts->{'_dash'};
796 3         3 my $join = $opts->{'_join'};
797              
798 3         4 $self->_set_program_options(\@args, $params, $switches, $translation,
799             $qual_param, $use_dash, $join);
800             # the question is, are previously-set parameters left alone when
801             # not specified in @args?
802 3         4 $self->parameters_changed(1);
803 3         9 return 1;
804             }
805              
806             =head2 reset_parameters()
807              
808             Title : reset_parameters
809             Usage : resets values
810             Function: resets parameters to either undef or value in passed hash
811             Returns : none
812             Args : [optional] hash of parameter-value pairs
813              
814             =cut
815              
816             sub reset_parameters {
817 1     1 1 314 my ($self, @args) = @_;
818              
819 1         1 my @reset_args;
820             # currently stored stuff
821 1         2 my $opts = $self->{'_options'};
822 1         1 my $params = $opts->{'_params'};
823 1         2 my $switches = $opts->{'_switches'};
824 1         1 my $translation = $opts->{'_translation'};
825 1         2 my $qual_param = $opts->{'_qual_param'};
826 1         1 my $use_dash = $opts->{'_dash'};
827 1         1 my $join = $opts->{'_join'};
828              
829             # don't like this, b/c _set_program_args will create a bunch of
830             # accessors with undef values, but oh well for now /maj
831              
832             # Is better to use hashes than arrays, to use their unique keys
833 1         2 my %reset_args = @args;
834 1         18 foreach my $p (@$params) {
835 7 100       11 if (not exists $reset_args{"-$p"}) {
836 6         11 $reset_args{"-$p"} = undef;
837             }
838             }
839 1         2 foreach my $s (@$switches) {
840 2 50       5 if (not exists $reset_args{"-$s"}) {
841 2         4 $reset_args{"-$s"} = undef;
842             }
843             }
844 1         4 while (my ($method, $value) = each %reset_args) {
845 9         16 push(@reset_args, $method => $value);
846             }
847              
848 1         3 $self->_set_program_options(\@reset_args, $params, $switches, $translation,
849             $qual_param, $use_dash, $join);
850 1         2 $self->parameters_changed(1);
851             }
852              
853             =head2 parameters_changed()
854              
855             Title : parameters_changed
856             Usage : if ($pobj->parameters_changed) {...}
857             Function: Returns boolean true (1) if parameters have changed
858             Returns : Boolean (0 or 1)
859             Args : [optional] Boolean
860              
861             =cut
862              
863             sub parameters_changed {
864 265     265 1 64730 my $self = shift;
865 265 100       3720 return $self->{'_parameters_changed'} = shift if @_;
866 5         17 return $self->{'_parameters_changed'};
867             }
868              
869             =head2 available_parameters()
870              
871             Title : available_parameters
872             Usage : @params = $pobj->available_parameters()
873             Function: Returns a list of the available parameters
874             Returns : Array of parameters
875             Args : 'params' for settable program parameters
876             'switches' for boolean program switches
877             default: all
878              
879             =cut
880              
881             sub available_parameters {
882 3     3 1 2 my $self = shift;
883 3         3 my $subset = shift;
884 3         4 my $opts = $self->{'_options'};
885 3         2 my @ret;
886 3         4 for ($subset) {
887 3 100 66     13 (!defined || /^a/) && do {
888 1         1 @ret = (@{$opts->{'_params'}}, @{$opts->{'_switches'}});
  1         2  
  1         3  
889 1         2 last;
890             };
891 2 100       6 m/^p/i && do {
892 1         1 @ret = @{$opts->{'_params'}};
  1         2  
893 1         2 last;
894             };
895 1 50       5 m/^s/i && do {
896 1         1 @ret = @{$opts->{'_switches'}};
  1         2  
897 1         2 last;
898             };
899 0         0 do { #fail
900 0         0 $self->throw("available_parameters: unrecognized subset");
901             };
902             }
903 3         9 return @ret;
904             }
905              
906             =head2 get_parameters()
907              
908             Title : get_parameters
909             Usage : %params = $pobj->get_parameters;
910             Function: Returns list of key-value pairs of parameter => value
911             Returns : List of key-value pairs
912             Args : [optional] A string is allowed if subsets are wanted or (if a
913             parameter subset is default) 'all' to return all parameters
914              
915             =cut
916              
917             sub get_parameters {
918 1     1 1 2 my $self = shift;
919 1         1 my $subset = shift;
920 1   50     4 $subset ||= 'all';
921 1         2 my @ret;
922 1         1 my $opts = $self->{'_options'};
923 1         3 for ($subset) {
924 1 50       3 m/^p/i && do { #params only
925 0         0 for (@{$opts->{'_params'}}) {
  0         0  
926 0 0 0     0 push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
927             }
928 0         0 last;
929             };
930 1 50       3 m/^s/i && do { #switches only
931 0         0 for (@{$opts->{'_switches'}}) {
  0         0  
932 0 0 0     0 push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
933             }
934 0         0 last;
935             };
936 1 50       3 m/^a/i && do { # all
937 1         1 for (@{$opts->{'_params'}},@{$opts->{'_switches'}}) {
  1         2  
  1         3  
938 9 100 66     161 push(@ret, $_, $self->$_) if $self->can($_) && defined $self->$_;
939             }
940 1         7 last;
941             };
942 0         0 do {
943 0         0 $self->throw("get_parameters: unrecognized subset");
944             };
945             }
946 1         5 return @ret;
947             }
948              
949             1;