File Coverage

blib/lib/Bio/Tools/Run/Minimo.pm
Criterion Covered Total %
statement 26 73 35.6
branch 1 14 7.1
condition 0 3 0.0
subroutine 7 9 77.7
pod 1 1 100.0
total 35 100 35.0


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::Run::Minimo
2             #
3             # Copyright Florent E Angly
4             #
5             # You may distribute this module under the same terms as perl itself
6             #
7             # POD documentation - main docs before the code
8              
9             =head1 NAME
10              
11             Bio::Tools::Run::Minimo - Wrapper for local execution of the Minimo assembler
12              
13             =head1 SYNOPSIS
14              
15             use Bio::Tools::Run::Minimo;
16             # Run Minmo using an input FASTA file
17             my $factory = Bio::Tools::Run::Minimo->new( -minimum_overlap_length => 35 );
18             my $asm_obj = $factory->run($fasta_file, $qual_file);
19             # An assembly object is returned by default
20             for my $contig ($assembly->all_contigs) {
21             ... do something ...
22             }
23              
24             # Read some sequences
25             use Bio::SeqIO;
26             my $sio = Bio::SeqIO->new(-file => $fasta_file, -format => 'fasta');
27             my @seqs;
28             while (my $seq = $sio->next_seq()) {
29             push @seqs,$seq;
30             }
31              
32             # Run Minimo using input sequence objects and returning an assembly file
33             my $asm_file = 'results.ace';
34             $factory->out_type($asm_file);
35             $factory->run(\@seqs);
36              
37             =head1 DESCRIPTION
38              
39             Wrapper module for the local execution of the DNA assembly program Minimo.
40             Minimo is based on AMOS (http://sourceforge.net/apps/mediawiki/amos/) and
41             implements the same conservative assembly algorithm as Minimus
42             (http://sourceforge.net/apps/mediawiki/amos/index.php?title=Minimus).
43              
44             =head1 FEEDBACK
45              
46             =head2 Mailing Lists
47              
48             User feedback is an integral part of the evolution of this and other Bioperl
49             modules. Send your comments and suggestions preferably to one of the Bioperl
50             mailing lists. Your participation is much appreciated.
51              
52             bioperl-l@bioperl.org - General discussion
53             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
54              
55             =head2 Support
56              
57             Please direct usage questions or support issues to the mailing list:
58              
59             I
60              
61             rather than to the module maintainer directly. Many experienced and
62             reponsive experts will be able look at the problem and quickly
63             address it. Please include a thorough description of the problem
64             with code and data examples if at all possible.
65              
66             =head2 Reporting Bugs
67              
68             Report bugs to the Bioperl bug tracking system to help us keep track the bugs
69             and their resolution. Bug reports can be submitted via the web:
70              
71             http://redmine.open-bio.org/projects/bioperl/
72              
73             =head1 AUTHOR - Florent E Angly
74              
75             Email: florent-dot-angly-at-gmail-dot-com
76              
77             =head1 APPENDIX
78              
79             The rest of the documentation details each of the object methods. Internal
80             methods are usually preceded with a _
81              
82             =cut
83              
84              
85             package Bio::Tools::Run::Minimo;
86              
87 1     1   159484 use strict;
  1         2  
  1         26  
88 1     1   3 use IPC::Run;
  1         2  
  1         30  
89 1     1   427 use File::Copy;
  1         1779  
  1         48  
90 1     1   5 use File::Spec;
  1         1  
  1         4  
91 1     1   15 use File::Basename;
  1         1  
  1         48  
92              
93 1     1   4 use base qw( Bio::Root::Root Bio::Tools::Run::AssemblerBase );
  1         0  
  1         549  
94              
95             our $program_name = 'Minimo'; # name of the executable
96             our @program_params = (qw( qual_in good_qual bad_qual min_len min_ident aln_wiggle out_prefix ace_exp ));
97             our @program_switches;
98             our %param_translation = (
99             'qual_in' => 'D QUAL_IN',
100             'good_qual' => 'D GOOD_QUAL',
101             'bad_qual' => 'D BAD_QUAL',
102             'min_len' => 'D MIN_LEN',
103             'min_ident' => 'D MIN_IDENT',
104             'aln_wiggle' => 'D ALN_WIGGLE',
105             'out_prefix' => 'D OUT_PREFIX',
106             'ace_exp' => 'D ACE_EXP'
107             );
108              
109             our $qual_param = 'qual_in';
110             our $use_dash = 1;
111             our $join = '=';
112             our $asm_format = 'ace';
113              
114              
115             =head2 new
116              
117             Title : new
118             Usage : $assembler->new( -min_len => 50,
119             -min_ident => 95 );
120             Function: Creates a Minimo factory
121             Returns : A Bio::Tools::Run::Minimo object
122             Args : Minimo options available in this module:
123             qual_in Input quality score file
124             good_qual Quality score to set for bases within the clear
125             range if no quality file was given (default: 30)
126             bad_qual Quality score to set for bases outside clear range
127             if no quality file was given (default: 10). If your
128             sequences are trimmed, try the same value as GOOD_QUAL.
129             min_len / minimum_overlap_length
130             Minimum contig overlap length (between 20 and 100 bp,
131             default: 35)
132             min_ident / minimum_overlap_similarity
133             Minimum contig overlap identity percentage (between 0
134             and 100 %, default: 98)
135             aln_wiggle Alignment wiggle value when determining the consensus
136             sequence (default: 2 bp)
137             out_prefix Prefix to use for the output file path and name
138              
139             =cut
140              
141             sub new {
142 1     1 1 119 my ($class,@args) = @_;
143 1         15 my $self = $class->SUPER::new(@args);
144 1         22 $self->_set_program_options(\@args, \@program_params, \@program_switches,
145             \%param_translation, $qual_param, $use_dash, $join);
146 1         3 *minimum_overlap_length = \&min_len;
147 1         1 *minimum_overlap_similarity = \&min_ident;
148 1 50       5 $self->program_name($program_name) if not defined $self->program_name();
149 1         8 $self->_assembly_format($asm_format);
150 1         6 return $self;
151             }
152              
153              
154             =head2 out_type
155              
156             Title : out_type
157             Usage : $factory->out_type('Bio::Assembly::ScaffoldI')
158             Function: Get/set the desired type of output
159             Returns : The type of results to return
160             Args : Desired type of results to return (optional):
161             'Bio::Assembly::IO' object
162             'Bio::Assembly::ScaffoldI' object (default)
163             The name of a file to save the results in
164              
165             =cut
166              
167              
168             =head2 run
169              
170             Title : run
171             Usage : $factory->run($fasta_file);
172             Function: Run TIGR Assembler
173             Returns : - a Bio::Assembly::ScaffoldI object, a Bio::Assembly::IO
174             object, a filename, or undef if all sequences were too small to
175             be usable
176             Returns : Assembly results (file, IO object or assembly object)
177             Args : - sequence input (FASTA file or sequence object arrayref)
178             - optional quality score input (QUAL file or quality score object
179             arrayref)
180             =cut
181              
182              
183             =head2 _run
184              
185             Title : _run
186             Usage : $factory->_run()
187             Function: Make a system call and run TIGR Assembler
188             Returns : An assembly file
189             Args : - FASTA file
190             - optional QUAL file
191              
192             =cut
193              
194              
195             sub _run {
196 0     0     my ($self, $fasta_file, $qual_file) = @_;
197              
198             # qual_in Input quality score file
199             # fasta_exp Export results in FASTA format (0:no 1:yes, default: 1)
200             # ace_exp Export results in ACE format (0:no 1:yes, default: 1)
201              
202             # Specify that we want an ACE output file
203 0           $self->ace_exp(1);
204              
205             # Setup needed files and filehandles first
206 0           my ($output_fh, $output_file) = $self->_prepare_output_file( );
207 0           my ($stdout_fh, $stdout_file) = $self->io->tempfile( -dir => $self->tempdir() );
208              
209             # Get program executable
210 0           my $exe = $self->executable;
211              
212             # Get command-line options
213 0           my $options = $self->_translate_params();
214              
215             # Usage: Minimo FASTA_IN [options]
216             # Options are of the style: -D PARAM=VAL
217 0           my @program_args = ( $exe, $fasta_file, @$options);
218 0           my @ipc_args = ( \@program_args, '>', $stdout_file);
219              
220             # Print command for debugging
221 0 0         if ($self->verbose() >= 0) {
222 0           my $cmd = '';
223 0           $cmd .= join ( ' ', @program_args );
224 0           for ( my $i = 1 ; $i < scalar @ipc_args ; $i++ ) {
225 0           my $element = $ipc_args[$i];
226 0           my $ref = ref($element);
227 0           my $value;
228 0 0 0       if ( $ref && $ref eq 'SCALAR') {
229 0           $value = $$element;
230             } else {
231 0           $value = $element;
232             }
233 0           $cmd .= " $value";
234             }
235 0           $self->debug( "$exe command = $cmd\n" );
236             }
237              
238             # Execute command
239 0           my $log_file = "$fasta_file.runAmos.log";
240 0           eval {
241 0 0         IPC::Run::run(@ipc_args) || die("There was a problem running $exe. The ".
242             "error message is: $!. Check the log file $log_file for possible causes.");
243             };
244 0 0         if ($@) {
245 0           $self->throw("$exe call crashed: $@");
246             }
247              
248             # Close filehandles
249 0           close($output_fh);
250 0           close($stdout_fh);
251              
252             # Result files
253 0           my $base = $self->out_prefix();
254 0 0         if (not defined $base) {
255 0           my $dirname = dirname($fasta_file);
256 0           my $basename = basename($fasta_file);
257 0           $basename =~ s/^(.+)\..+$/$1/;
258 0           $base = File::Spec->catfile($dirname, $basename);
259             }
260 0           my $ace_file = "$base-contigs.ace";
261 0           my $amos_file = "$base-contigs.afg";
262              
263             # Remove all files except for the ACE file
264 0           for my $file ($log_file, $stdout_file, $amos_file) {
265 0           unlink $file;
266             }
267              
268             # Clean the ACE file
269 0           $self->_clean_file($ace_file);
270              
271             # Move the ACE file to its final destination
272 0 0         move ($ace_file, $output_file) or $self->throw("Could not move file ".
273             "'$ace_file' to '$output_file': $!");
274              
275 0           return $output_file;
276             }
277              
278             =head2 _clean_file
279              
280             Title : _clean_file
281             Usage : $factory->_clean_file($file)
282             Function: Clean file in place by removing NULL characters. NULL characters
283             can be present in the output files of AMOS 2.0.8 but they do not
284             validate as proper sequence characters in Bioperl.
285             Returns : 1 for success
286             Args : Filename
287              
288             =cut
289              
290             sub _clean_file {
291 0     0     my ($self, $file) = @_;
292             # Set in-place file editing mode
293 0           local $^I = "~";
294 0           local @ARGV = ( $file );
295             # Replace lines in file
296 0           while (<>) {
297 0           s/\x0//g;
298 0           print;
299             }
300 0           return 1;
301             }
302              
303             1;