| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # BioPerl module for Bio::Tools::Run::Newbler | 
| 2 |  |  |  |  |  |  | # | 
| 3 |  |  |  |  |  |  | # Copyright Florent E Angly | 
| 4 |  |  |  |  |  |  | # | 
| 5 |  |  |  |  |  |  | # You may distribute this module under the same terms as perl itself | 
| 6 |  |  |  |  |  |  | # | 
| 7 |  |  |  |  |  |  | # POD documentation - main docs before the code | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 NAME | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | Bio::Tools::Run::Newbler - Wrapper for local execution of Newbler | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | use Bio::Tools::Run::Newbler; | 
| 16 |  |  |  |  |  |  | # Run Minmo using an input FASTA file | 
| 17 |  |  |  |  |  |  | my $factory = Bio::Tools::Run::Newbler->new( -minimum_overlap_length => 35 ); | 
| 18 |  |  |  |  |  |  | my $asm_obj = $factory->run($fasta_file, $qual_file); | 
| 19 |  |  |  |  |  |  | # An assembly object is returned by default | 
| 20 |  |  |  |  |  |  | for my $contig ($assembly->all_contigs) { | 
| 21 |  |  |  |  |  |  | ... do something ... | 
| 22 |  |  |  |  |  |  | } | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | # Read some sequences | 
| 25 |  |  |  |  |  |  | use Bio::SeqIO; | 
| 26 |  |  |  |  |  |  | my $sio = Bio::SeqIO->new(-file => $fasta_file, -format => 'fasta'); | 
| 27 |  |  |  |  |  |  | my @seqs; | 
| 28 |  |  |  |  |  |  | while (my $seq = $sio->next_seq()) { | 
| 29 |  |  |  |  |  |  | push @seqs,$seq; | 
| 30 |  |  |  |  |  |  | } | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | # Run Newbler using input sequence objects and returning an assembly file | 
| 33 |  |  |  |  |  |  | my $asm_file = 'results.ace'; | 
| 34 |  |  |  |  |  |  | $factory->out_type($asm_file); | 
| 35 |  |  |  |  |  |  | $factory->run(\@seqs); | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | Wrapper module for the local execution of the proprietary DNA assembly | 
| 40 |  |  |  |  |  |  | program GS De Novo Assembler (Newbler) from Roche/454 v2.0.00.20: | 
| 41 |  |  |  |  |  |  | http://www.454.com/products-solutions/analysis-tools/gs-de-novo-assembler.asp | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | =head1 FEEDBACK | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | =head2 Mailing Lists | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | User feedback is an integral part of the evolution of this and other Bioperl | 
| 48 |  |  |  |  |  |  | modules. Send your comments and suggestions preferably to one of the Bioperl | 
| 49 |  |  |  |  |  |  | mailing lists.  Your participation is much appreciated. | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | bioperl-l@bioperl.org                  - General discussion | 
| 52 |  |  |  |  |  |  | http://bioperl.org/wiki/Mailing_lists  - About the mailing lists | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | =head2 Support | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | Please direct usage questions or support issues to the mailing list: | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | I | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | rather than to the module maintainer directly. Many experienced and | 
| 61 |  |  |  |  |  |  | reponsive experts will be able look at the problem and quickly | 
| 62 |  |  |  |  |  |  | address it. Please include a thorough description of the problem | 
| 63 |  |  |  |  |  |  | with code and data examples if at all possible. | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | =head2 Reporting Bugs | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | Report bugs to the Bioperl bug tracking system to help us keep track the bugs | 
| 68 |  |  |  |  |  |  | and their resolution.  Bug reports can be submitted via the web: | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | http://redmine.open-bio.org/projects/bioperl/ | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | =head1 AUTHOR - Florent E Angly | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | Email: florent-dot-angly-at-gmail-dot-com | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | =head1 APPENDIX | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | The rest of the documentation details each of the object methods. Internal | 
| 79 |  |  |  |  |  |  | methods are usually preceded with a _ | 
| 80 |  |  |  |  |  |  |  | 
| 81 |  |  |  |  |  |  | =cut | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | package Bio::Tools::Run::Newbler; | 
| 85 |  |  |  |  |  |  |  | 
| 86 | 1 |  |  | 1 |  | 124496 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 87 | 1 |  |  | 1 |  | 3 | use IPC::Run; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 28 |  | 
| 88 | 1 |  |  | 1 |  | 424 | use File::Copy; | 
|  | 1 |  |  |  |  | 1603 |  | 
|  | 1 |  |  |  |  | 46 |  | 
| 89 | 1 |  |  | 1 |  | 4 | use File::Path; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 40 |  | 
| 90 | 1 |  |  | 1 |  | 3 | use File::Spec; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 3 |  | 
| 91 | 1 |  |  | 1 |  | 15 | use File::Basename; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 43 |  | 
| 92 |  |  |  |  |  |  |  | 
| 93 | 1 |  |  | 1 |  | 3 | use base qw( Bio::Root::Root Bio::Tools::Run::AssemblerBase ); | 
|  | 1 |  |  |  |  | 0 |  | 
|  | 1 |  |  |  |  | 506 |  | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | our $program_name = 'runAssembly'; # name of the executable | 
| 96 |  |  |  |  |  |  | our @program_params = (qw( expected_depth mid_conf_file vector_trim vector_screen | 
| 97 |  |  |  |  |  |  | aln_identity_score aln_difference_score min_ovl_identity min_ovl_length | 
| 98 |  |  |  |  |  |  | seed_count seed_length seed_step out_dir )); | 
| 99 |  |  |  |  |  |  | our @program_switches = (qw( large ace ace_raw ace_trimmed no_trim in_memory | 
| 100 |  |  |  |  |  |  | no_auto_rescore no_duplicates )); | 
| 101 |  |  |  |  |  |  | our %param_translation = ( | 
| 102 |  |  |  |  |  |  | 'large'               => 'large', | 
| 103 |  |  |  |  |  |  | 'ace'                 => 'ace', | 
| 104 |  |  |  |  |  |  | 'ace_raw'             => 'ar', | 
| 105 |  |  |  |  |  |  | 'ace_trimmed'         => 'at', | 
| 106 |  |  |  |  |  |  | 'expected_depth'      => 'e', | 
| 107 |  |  |  |  |  |  | 'mid_conf_file'       => 'mcf', | 
| 108 |  |  |  |  |  |  | 'no_trim'             => 'notrim', | 
| 109 |  |  |  |  |  |  | 'vector_trim'         => 'vt', | 
| 110 |  |  |  |  |  |  | 'vector_screen'       => 'vs', | 
| 111 |  |  |  |  |  |  | 'aln_identity_score'  => 'ais', | 
| 112 |  |  |  |  |  |  | 'aln_difference_score'=> 'ads', | 
| 113 |  |  |  |  |  |  | 'in_memory'           => 'm', | 
| 114 |  |  |  |  |  |  | 'min_ovl_identity'    => 'mi', | 
| 115 |  |  |  |  |  |  | 'min_ovl_length'      => 'ml', | 
| 116 |  |  |  |  |  |  | 'no_auto_rescore'     => 'nor', | 
| 117 |  |  |  |  |  |  | 'seed_count'          => 'sc', | 
| 118 |  |  |  |  |  |  | 'seed_length'         => 'sl', | 
| 119 |  |  |  |  |  |  | 'seed_step'           => 'ss', | 
| 120 |  |  |  |  |  |  | 'no_duplicates'       => 'ud', | 
| 121 |  |  |  |  |  |  | 'out_dir'             => 'o' | 
| 122 |  |  |  |  |  |  | ); | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | our $qual_param; | 
| 125 |  |  |  |  |  |  | our $use_dash = 1; | 
| 126 |  |  |  |  |  |  | our $join = ' '; | 
| 127 |  |  |  |  |  |  | our $asm_format = 'ace'; | 
| 128 |  |  |  |  |  |  | our $asm_variant = '454'; | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | =head2 new | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | Title   : new | 
| 134 |  |  |  |  |  |  | Usage   : $assembler->new( -min_len   => 50, | 
| 135 |  |  |  |  |  |  | -min_ident => 95 ); | 
| 136 |  |  |  |  |  |  | Function: Creates a Newbler factory | 
| 137 |  |  |  |  |  |  | Returns : A Bio::Tools::Run::Newbler object | 
| 138 |  |  |  |  |  |  | Args    : Newbler options available in this module (from the Newbler manual): | 
| 139 |  |  |  |  |  |  |  | 
| 140 |  |  |  |  |  |  | large                Shortcut some of the computationally expensive algorithms | 
| 141 |  |  |  |  |  |  | to save some time. Useful for large or complex datasets | 
| 142 |  |  |  |  |  |  | (default: off). | 
| 143 |  |  |  |  |  |  | ace_raw              Output the full "raw" read sequence (default: off). | 
| 144 |  |  |  |  |  |  | ace_trimmed          Output only the "trimmed" sequences (after low quality, | 
| 145 |  |  |  |  |  |  | vector and key trimming) (default: on). | 
| 146 |  |  |  |  |  |  | expected_depth       Expected depth of the assembly. Filters out random-chance | 
| 147 |  |  |  |  |  |  | level events at bigger depths. 0 means to not use the | 
| 148 |  |  |  |  |  |  | expected depth information (default: 0). | 
| 149 |  |  |  |  |  |  | mid_conf_file        MID configuration file for decoding the multiplex data. | 
| 150 |  |  |  |  |  |  | no_trim              Disable the quality and primer trimming of the input | 
| 151 |  |  |  |  |  |  | sequences (default: off). | 
| 152 |  |  |  |  |  |  | vector_trim          Specify a vector trimming database (in FASTA format) to | 
| 153 |  |  |  |  |  |  | trim the ends of input sequences. | 
| 154 |  |  |  |  |  |  | vector_screen        Specify a vector screening database (in FASTA format) to | 
| 155 |  |  |  |  |  |  | remove contaminants, i.e. input reads that align | 
| 156 |  |  |  |  |  |  | against a sequence in the database. | 
| 157 |  |  |  |  |  |  | aln_identity_score   Set the alignment identity score. When multiple alignments | 
| 158 |  |  |  |  |  |  | are found, it is the per-overlap column identity score | 
| 159 |  |  |  |  |  |  | used to sort the overlaps for use in the progressive | 
| 160 |  |  |  |  |  |  | alignment (default: 2). | 
| 161 |  |  |  |  |  |  | aln_difference_score Set the alignment difference score. For multiple alignments | 
| 162 |  |  |  |  |  |  | this is the per-overlap difference score used to sort the | 
| 163 |  |  |  |  |  |  | overlaps for use in the progressive multi-alignment | 
| 164 |  |  |  |  |  |  | (default: -3). | 
| 165 |  |  |  |  |  |  | in_memory            Keep all sequence data in memory throughout the computation. | 
| 166 |  |  |  |  |  |  | Can speed up the computation but requires more computer | 
| 167 |  |  |  |  |  |  | memory (default: off). | 
| 168 |  |  |  |  |  |  | min_ovl_identity / minimum_overlap_similarity | 
| 169 |  |  |  |  |  |  | Minimum overlap identity, i.e. the minimum percent identity | 
| 170 |  |  |  |  |  |  | of overlaps used by the assembler (default: 40). | 
| 171 |  |  |  |  |  |  | min_ovl_length / minimum_overlap_length | 
| 172 |  |  |  |  |  |  | Minimum overlap length, i.e. the minimum length of overlaps | 
| 173 |  |  |  |  |  |  | considered by the assembler (default: 90). Warning: It | 
| 174 |  |  |  |  |  |  | seems like this parameter is not respected by the program | 
| 175 |  |  |  |  |  |  | in the current version | 
| 176 |  |  |  |  |  |  | no_auto_rescore      Do not use the quality score re-scoring algorithm (default: | 
| 177 |  |  |  |  |  |  | off). | 
| 178 |  |  |  |  |  |  | seed_count           Set the seed count parameter, the number of seeds required | 
| 179 |  |  |  |  |  |  | in a window before an extension is made (default: 1). | 
| 180 |  |  |  |  |  |  | seed_length          Set the seed length parameter, i.e. the number of bases | 
| 181 |  |  |  |  |  |  | between seed generation locations used in the exact | 
| 182 |  |  |  |  |  |  | k-mer matching part of the overlap detection (between 6 | 
| 183 |  |  |  |  |  |  | 16) (default: 16). | 
| 184 |  |  |  |  |  |  | seed_step            Set the seed step parameter, i.e. the number of bases used | 
| 185 |  |  |  |  |  |  | for each seed in the exact k-mer matching part of the | 
| 186 |  |  |  |  |  |  | overlap detection (i.e. the "k" value) (default: 12). | 
| 187 |  |  |  |  |  |  | no_duplicates        Treat each read as a separate read and do not group them | 
| 188 |  |  |  |  |  |  | into duplicates for assembly or consensus calling | 
| 189 |  |  |  |  |  |  | (default: off). | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | =cut | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | sub new { | 
| 194 | 1 |  |  | 1 | 1 | 75 | my ($class,@args) = @_; | 
| 195 | 1 |  |  |  |  | 23 | my $self = $class->SUPER::new(@args); | 
| 196 | 1 |  |  |  |  | 20 | $self->_set_program_options(\@args, \@program_params, \@program_switches, | 
| 197 |  |  |  |  |  |  | \%param_translation, $qual_param, $use_dash, $join); | 
| 198 | 1 |  |  |  |  | 3 | *minimum_overlap_length = \&min_ovl_length; | 
| 199 | 1 |  |  |  |  | 2 | *minimum_overlap_similarity = \&min_ovl_identity; | 
| 200 | 1 | 50 |  |  |  | 4 | $self->program_name($program_name) if not defined $self->program_name(); | 
| 201 | 1 |  |  |  |  | 6 | $self->_assembly_format($asm_format); | 
| 202 | 1 |  |  |  |  | 4 | $self->_assembly_variant($asm_variant); | 
| 203 | 1 |  |  |  |  | 3 | return $self; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | =head2 _check_sequence_input | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | Title   : _check_sequence_input | 
| 209 |  |  |  |  |  |  | Usage   : $assembler->_check_sequence_input($seqs) | 
| 210 |  |  |  |  |  |  | Function: Check that the sequence input is arrayref of sequence objects or | 
| 211 |  |  |  |  |  |  | a FASTA file, or a MIDinfo + dir, or a MIDinfo + file. If not, an | 
| 212 |  |  |  |  |  |  | error is thrown. | 
| 213 |  |  |  |  |  |  | Returns : 1 if the check passed | 
| 214 |  |  |  |  |  |  | Args    : sequence input | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | =cut | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | sub _check_sequence_input { | 
| 219 | 4 |  |  | 4 |  | 463 | my ($self, $seqs) = @_; | 
| 220 | 4 | 50 |  |  |  | 9 | if (not $seqs) { | 
| 221 | 0 |  |  |  |  | 0 | $self->throw("Must supply sequences as a FASTA filename or a sequence object". | 
| 222 |  |  |  |  |  |  | " (Bio::PrimarySeqI or Bio::SeqI) array reference"); | 
| 223 |  |  |  |  |  |  | } else { | 
| 224 | 4 | 50 |  |  |  | 6 | if (ref($seqs) =~ m/ARRAY/i ) { | 
| 225 | 0 |  |  |  |  | 0 | for my $seq (@$seqs) { | 
| 226 | 0 | 0 | 0 |  |  | 0 | unless ($seq->isa('Bio::PrimarySeqI') || $seq->isa('Bio::SeqI')) { | 
| 227 | 0 |  |  |  |  | 0 | $self->throw("Not a valid Bio::PrimarySeqI or Bio::SeqI object"); | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  | } | 
| 230 |  |  |  |  |  |  | } else { | 
| 231 |  |  |  |  |  |  | # [midinfo@]sffile|[midinfo@]projectdir|fastafile | 
| 232 | 4 |  |  |  |  | 19 | my ($mid, $file_or_dir) = ($seqs =~ m/^(.+@)?(.+)$/); | 
| 233 | 4 | 50 |  |  |  | 5 | if (not defined $file_or_dir) { | 
| 234 | 0 |  |  |  |  | 0 | $self->throw("Input string $seqs does not seem valid."); | 
| 235 |  |  |  |  |  |  | } else { | 
| 236 | 4 | 50 |  |  |  | 57 | if (not -e $file_or_dir) { | 
| 237 | 0 |  |  |  |  | 0 | $self->throw("Input file or directory '$file_or_dir' does not seem to exist."); | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  | } | 
| 240 |  |  |  |  |  |  | } | 
| 241 |  |  |  |  |  |  | } | 
| 242 | 4 |  |  |  |  | 13 | return 1; | 
| 243 |  |  |  |  |  |  | } | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | =head2 out_type | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | Title   : out_type | 
| 248 |  |  |  |  |  |  | Usage   : $factory->out_type('Bio::Assembly::ScaffoldI') | 
| 249 |  |  |  |  |  |  | Function: Get/set the desired type of output | 
| 250 |  |  |  |  |  |  | Returns : The type of results to return | 
| 251 |  |  |  |  |  |  | Args    : Desired type of results to return (optional): | 
| 252 |  |  |  |  |  |  | 'Bio::Assembly::IO' object | 
| 253 |  |  |  |  |  |  | 'Bio::Assembly::ScaffoldI' object (default) | 
| 254 |  |  |  |  |  |  | The name of a file to save the results in | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | =cut | 
| 257 |  |  |  |  |  |  |  | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | =head2 run | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | Title   :   run | 
| 262 |  |  |  |  |  |  | Usage   :   $factory->run($fasta_file); | 
| 263 |  |  |  |  |  |  | Function:   Run TIGR Assembler | 
| 264 |  |  |  |  |  |  | Returns :   - a Bio::Assembly::ScaffoldI object, a Bio::Assembly::IO | 
| 265 |  |  |  |  |  |  | object, a filename, or undef if all sequences were too small to | 
| 266 |  |  |  |  |  |  | be usable | 
| 267 |  |  |  |  |  |  | Returns :   Assembly results (file, IO object or assembly object) | 
| 268 |  |  |  |  |  |  | Args    :   Sequence input can be: | 
| 269 |  |  |  |  |  |  | * a sequence object arrayref | 
| 270 |  |  |  |  |  |  | * a FASTA file | 
| 271 |  |  |  |  |  |  | * a SFF file and optional MID information. Example: | 
| 272 |  |  |  |  |  |  | mid2@/home/xxx/myreads.sff | 
| 273 |  |  |  |  |  |  | * the path to an run analysis directory and MID information | 
| 274 |  |  |  |  |  |  | The reads must be between 50 and 2000 bp. Newbler does not support | 
| 275 |  |  |  |  |  |  | for input quality files. See the Newbler manual for details. | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | =cut | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | =head2 _run | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | Title   :   _run | 
| 283 |  |  |  |  |  |  | Usage   :   $factory->_run() | 
| 284 |  |  |  |  |  |  | Function:   Make a system call and run TIGR Assembler | 
| 285 |  |  |  |  |  |  | Returns :   An assembly file | 
| 286 |  |  |  |  |  |  | Args    :   - FASTA file, SFF file and MID, or analysis dir and MID | 
| 287 |  |  |  |  |  |  | - optional QUAL file | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | =cut | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | sub _run { | 
| 293 | 0 |  |  | 0 |  |  | my ($self, $fasta_file, $qual_file) = @_; | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | # fasta_file: [midinfo@]sffile|[midinfo@]projectdir|fastafile | 
| 296 |  |  |  |  |  |  | # qual_file: not supported by newbler | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | # Specify that we want a single ACE output file containing all contigs | 
| 299 | 0 |  |  |  |  |  | $self->ace(1); | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | # Setup needed files and filehandles first | 
| 302 | 0 |  |  |  |  |  | my ($output_fh, $output_file) = $self->_prepare_output_file( ); | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | # Set the output directory based on the the output file name | 
| 305 | 0 |  |  |  |  |  | my $output_dir = dirname($output_file); | 
| 306 | 0 |  |  |  |  |  | $self->out_dir($output_dir); | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | # Set a log file | 
| 309 | 0 |  |  |  |  |  | my $log_file = File::Spec->catfile($output_dir, '454Log.txt'); | 
| 310 |  |  |  |  |  |  |  | 
| 311 |  |  |  |  |  |  | # Get program executable | 
| 312 | 0 |  |  |  |  |  | my $exe = $self->executable; | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | # Get command-line options | 
| 315 | 0 |  |  |  |  |  | my $options = $self->_translate_params(); | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | # Usage: runAssembly [options] (sfffile | [regionlist:]analysisDir | readfastafile)... | 
| 318 |  |  |  |  |  |  | # where options is: [-o projdir] [-nrm] [-p (sfffile | [regionlist:]analysisDir)]... | 
| 319 | 0 |  |  |  |  |  | my @program_args = ( $exe, @$options, $fasta_file); | 
| 320 | 0 |  |  |  |  |  | my @ipc_args = ( \@program_args, '>', $log_file ); | 
| 321 |  |  |  |  |  |  |  | 
| 322 |  |  |  |  |  |  | # Print command for debugging | 
| 323 | 0 | 0 |  |  |  |  | if ($self->verbose() >= 0) { | 
| 324 | 0 |  |  |  |  |  | my $cmd = ''; | 
| 325 | 0 |  |  |  |  |  | $cmd .= join ( ' ', @program_args ); | 
| 326 | 0 |  |  |  |  |  | for ( my $i = 1 ; $i < scalar @ipc_args ; $i++ ) { | 
| 327 | 0 |  |  |  |  |  | my $element = $ipc_args[$i]; | 
| 328 | 0 |  |  |  |  |  | my $ref = ref($element); | 
| 329 | 0 |  |  |  |  |  | my $value; | 
| 330 | 0 | 0 | 0 |  |  |  | if ( $ref && $ref eq 'SCALAR') { | 
| 331 | 0 |  |  |  |  |  | $value = $$element; | 
| 332 |  |  |  |  |  |  | } else { | 
| 333 | 0 |  |  |  |  |  | $value = $element; | 
| 334 |  |  |  |  |  |  | } | 
| 335 | 0 |  |  |  |  |  | $cmd .= " $value"; | 
| 336 |  |  |  |  |  |  | } | 
| 337 | 0 |  |  |  |  |  | $self->debug( "$exe command = $cmd\n" ); | 
| 338 |  |  |  |  |  |  | } | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | # Execute command | 
| 341 | 0 |  |  |  |  |  | eval { | 
| 342 | 0 | 0 |  |  |  |  | IPC::Run::run(@ipc_args) || die("There was a problem running $exe. The ". | 
| 343 |  |  |  |  |  |  | "error message is: $!."); | 
| 344 |  |  |  |  |  |  | }; | 
| 345 | 0 | 0 |  |  |  |  | if ($@) { | 
| 346 | 0 |  |  |  |  |  | $self->throw("$exe call crashed: $@"); | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | # Close filehandles | 
| 350 | 0 |  |  |  |  |  | close($output_fh); | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | # Result files | 
| 353 | 0 |  |  |  |  |  | my $ace_file              = File::Spec->catfile($output_dir, '454Contigs.ace'); | 
| 354 | 0 |  |  |  |  |  | my $aln_file              = File::Spec->catfile($output_dir, '454AlignmentInfo.tsv'); | 
| 355 | 0 |  |  |  |  |  | my $all_cont_fasta_file   = File::Spec->catfile($output_dir, '454AllContigs.fna'); | 
| 356 | 0 |  |  |  |  |  | my $all_cont_qual_file    = File::Spec->catfile($output_dir, '454AllContigs.qual'); | 
| 357 | 0 |  |  |  |  |  | my $large_cont_fasta_file = File::Spec->catfile($output_dir, '454LargeContigs.fna'); | 
| 358 | 0 |  |  |  |  |  | my $large_cont_qual_file  = File::Spec->catfile($output_dir, '454LargeContigs.qual'); | 
| 359 | 0 |  |  |  |  |  | my $metrics_file          = File::Spec->catfile($output_dir, '454NewblerMetrics.txt'); | 
| 360 | 0 |  |  |  |  |  | my $status_file           = File::Spec->catfile($output_dir, '454ReadStatus.txt'); | 
| 361 | 0 |  |  |  |  |  | my $progress_file         = File::Spec->catfile($output_dir, '454NewblerProgress.txt'); | 
| 362 | 0 |  |  |  |  |  | my $trim_file             = File::Spec->catfile($output_dir, '454TrimStatus.txt'); | 
| 363 | 0 |  |  |  |  |  | my $sff_dir               = File::Spec->catfile($output_dir, 'sff'); | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | # Remove all files except for the ACE file | 
| 366 | 0 |  |  |  |  |  | for my $file ($aln_file, $all_cont_fasta_file, $all_cont_qual_file, | 
| 367 |  |  |  |  |  |  | $large_cont_fasta_file, $large_cont_qual_file, $metrics_file, $status_file, | 
| 368 |  |  |  |  |  |  | $progress_file, $trim_file, $log_file ) { | 
| 369 | 0 |  |  |  |  |  | unlink $file; | 
| 370 |  |  |  |  |  |  | } | 
| 371 | 0 |  |  |  |  |  | rmtree( $sff_dir ); | 
| 372 |  |  |  |  |  |  |  | 
| 373 |  |  |  |  |  |  | # Move output file to proper location/name | 
| 374 | 0 | 0 |  |  |  |  | move ($ace_file, $output_file) or $self->throw("Could not move file ". | 
| 375 |  |  |  |  |  |  | "'$ace_file' to '$output_file': $!"); | 
| 376 |  |  |  |  |  |  |  | 
| 377 | 0 |  |  |  |  |  | return $output_file; | 
| 378 |  |  |  |  |  |  | } | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | 1; |