File Coverage

blib/lib/Bio/Tools/Run/Bowtie.pm
Criterion Covered Total %
statement 81 335 24.1
branch 16 192 8.3
condition 0 129 0.0
subroutine 17 27 62.9
pod 6 8 75.0
total 120 691 17.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Bowtie
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Dan Kortschak
7             #
8             # Copyright Dan Kortschak and Mark A. Jensen
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::Run::Bowtie - Run wrapper for the Bowtie short-read assembler *BETA*
17              
18             =head1 SYNOPSIS
19              
20             # create an index
21             $bowtie_build = Bio::Tools::Run::Bowtie->new();
22             $index = $bowtie_fac->run( 'reference.fasta', 'index_base' );
23              
24             # or with named args...
25              
26             $index = $bowtie_fac->run( -ref => 'reference.fasta', -ind => 'index_base' );
27              
28             # get the base name of the last index from an index builder
29             $index = $bowtie_fac->result;
30            
31             # create an assembly
32             $bowtie_fac = Bio::Tools::Run::Bowtie->new();
33             $bowtie_fac->want('Bio::Assembly::Scaffold');
34             $bowtie_assy = $bowtie_fac->run( 'reads.fastq', 'index_base' );
35            
36             # if IO::Uncompress::Gunzip is available and with named args...
37             $bowtie_assy = $bowtie_fac->run( -seq => 'reads.fastq.gz', -ind => 'index_base' );
38            
39             # paired-end
40             $bowtie_fac = Bio::Tools::Run::Bowtie->new(-command => 'paired',
41             -want => 'Bio::Assembly::Scaffold');
42             $bowtie_assy = $bowtie_fac->run( 'reads.fastq', 'index_base', 'paired-reads.fastq' );
43            
44             # be more strict
45             $bowtie_fac->set_parameters( -max_qual_mismatch => 50 );
46            
47             # create a Bio::Assembly::Scaffold object
48             $bowtie_assy = $bowtie_fac->run( 'reads.fastq', 'index_base', 'paired-reads.fastq' );
49            
50             # print consensus sequences from assembly object
51             for $contig ($bowtie_assy->all_contigs) {
52             print $contig->get_consensus_sequence->seq,"\n";
53             }
54            
55             # get the file object of the last assembly
56             $io = $bowtie_fac->result( -want => 'Bio::Root::IO' );
57            
58             # get a merged SeqFeature::Collection of all hits
59             # - currently only available with SAM format
60             $io = $bowtie_fac->result( -want => 'Bio::SeqFeature::Collection' );
61            
62             #... or the file name directly
63             $filename = $bowtie_fac->result( -want => 'raw' );
64            
65             =head1 DESCRIPTION
66              
67             This module provides a wrapper interface for Ben Langmead and Col
68             Trapnell's ultrafast memory-efficient short read aligner C
69             (see L for manuals and downloads).
70              
71              
72             =head1 OPTIONS
73              
74             C is complex, with many command-line options. This module attempts to
75             provide and options comprehensively. You can browse the choices like so:
76              
77             $bowtiefac = Bio::Tools::Run::Bowtie->new( -command => 'single' );
78             # all bowtie commands
79             @all_commands = $bowtiefac->available_parameters('commands');
80             @all_commands = $bowtiefac->available_commands; # alias
81             # just for single
82             @assemble_params = $bowtiefac->available_parameters('params');
83             @assemble_switches = $bowtiefac->available_parameters('switches');
84             @assemble_all_options = $bowtiefac->available_parameters();
85              
86             Reasonably mnemonic names have been assigned to the single-letter
87             command line options. These are the names returned by
88             C, and can be used in the factory constructor
89             like typical BioPerl named parameters.
90              
91             As a number of options are mutually exclusive, and the interpretation of
92             intent is based on last-pass option reaching bowtie with potentially unpredicted
93             results. This module will prevent inconsistent switches and parameters
94             from being passed.
95              
96             See L for details of bowtie
97             options.
98              
99             =head1 FILES
100              
101             When a command requires filenames, these are provided to the C method, not
102             the constructor (C). To see the set of files required by a command, use
103             C or the alias C:
104              
105             $bowtiefac = Bio::Tools::Run::Bowtie->new( -command => 'paired' );
106             @filespec = $bowtiefac->filespec;
107              
108             This example returns the following array:
109              
110             ind
111             seq
112             seq2
113             #out
114              
115             This indicates that ind (C index file base name), seq (fasta/fastq),and seq2
116             (fasta/fastq) files MUST be specified, and that the out file MAY be specified. Use
117             these in the C call like so:
118              
119             $bowtiefac->run( -ind => 'index_base', -seq => 'seq-a.fq',
120             -seq2 => 'seq-b.fq', -out => 'align.out' );
121              
122             Note that named parameters in this form allow you to specify the location of the outfile;
123             without named parameters, the outfile is located in a tempdir and does not persist beyond
124             the life of the object - with the exception of index creation.
125              
126             The object will store the programs STDOUT and STDERR output for you in the C
127             and C attributes:
128              
129             handle_map_warning($bowtiefac) if ($bowtiefac->stderr =~ /warning/);
130              
131             =head1 FEEDBACK
132              
133             =head2 Mailing Lists
134              
135             User feedback is an integral part of the evolution of this and other
136             Bioperl modules. Send your comments and suggestions preferably to
137             the Bioperl mailing list. Your participation is much appreciated.
138              
139             bioperl-l@bioperl.org - General discussion
140             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
141              
142             =head2 Support
143              
144             Please direct usage questions or support issues to the mailing list:
145              
146             L
147              
148             Rather than to the module maintainer directly. Many experienced and
149             reponsive experts will be able look at the problem and quickly
150             address it. Please include a thorough description of the problem
151             with code and data examples if at all possible.
152              
153             =head2 Reporting Bugs
154              
155             Report bugs to the Bioperl bug tracking system to help us keep track
156             of the bugs and their resolution. Bug reports can be submitted via
157             the web:
158              
159             http://redmine.open-bio.org/projects/bioperl/
160              
161             =head1 AUTHOR - Dan Kortschak
162              
163             Email dan.kortschak adelaide.edu.au
164              
165             =head1 CONTRIBUTORS
166              
167             Mark A. Jensen (maj -at- fortinbras -dot- us)
168              
169             =head1 APPENDIX
170              
171             The rest of the documentation details each of the object methods.
172             Internal methods are usually preceded with a _
173              
174             =cut
175              
176             # Let the code begin...
177              
178              
179             package Bio::Tools::Run::Bowtie;
180 1     1   139986 use strict;
  1         2  
  1         31  
181             our $HAVE_IO_UNCOMPRESS;
182              
183             BEGIN {
184 1     1   51 eval 'require IO::Uncompress::Gunzip; $HAVE_IO_UNCOMPRESS = 1';
185             }
186              
187 1     1   11 use IPC::Run;
  1         2  
  1         62  
188              
189             # Object preamble - inherits from Bio::Root::Root
190              
191 1     1   448 use lib '../../..';
  1         494  
  1         7  
192 1     1   555 use Bio::Tools::Run::Bowtie::Config;
  1         3  
  1         177  
193 1     1   510 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         9  
194 1     1   525 use Bio::Tools::Run::WrapperBase::CommandExts;
  1         2  
  1         10  
195 1     1   569 use Bio::Tools::GuessSeqFormat;
  1         2729  
  1         11  
196 1     1   446 use Bio::Tools::Run::Samtools;
  1         2  
  1         6  
197 1     1   553 use Bio::Seq;
  1         28259  
  1         11  
198 1     1   33 use File::Basename;
  1         1  
  1         84  
199              
200 1     1   7 use base qw( Bio::Tools::Run::WrapperBase Bio::Tools::Run::AssemblerBase );
  1         2  
  1         548  
201              
202             ## bowtie
203             our $program_name = '*bowtie';
204             our $default_cmd = 'single';
205              
206             our $asm_format; # this is determined dynamically
207              
208             # Note:
209             # other globals required by Bio::Tools::Run::AssemblerBase are
210             # imported from Bio::Tools::Run::Bowtie::Config
211              
212             our $qual_param = undef;
213             our $use_dash = 'mixed';
214             our $join = ' ';
215              
216             =head2 new()
217              
218             Title : new
219             Usage : my $obj = new Bio::Tools::Run::Bowtie();
220             Function: Builds a new Bio::Tools::Run::Bowtie object
221             Returns : an instance of Bio::Tools::Run::Bowtie
222             Args :
223              
224             =cut
225              
226             sub new {
227 2     2 1 108 my ($class,@args) = @_;
228 2 50       18 unless (grep /command/, @args) {
229 0         0 push @args, '-command', $default_cmd;
230             }
231             #default to SAM output if no other format specified and we are running an alignment
232 2         10 my %args=@args;
233 2 50       20 if ($args{'-command'} =~ m/(?:single|paired|crossbow)/) {
234 2 50       16 unless (grep /(?:sam_format|concise|quiet|refout|refidx)/, @args) {
235 2         5 push @args, ('-sam_format', 1);
236             }
237             }
238 2         43 my $self = $class->SUPER::new(@args);
239 2         11 foreach (keys %command_executables) {
240 10         31 $self->executables($_, $self->_find_executable($command_executables{$_}));
241             }
242 2         13 my ($want) = $self->_rearrange([qw(WANT)],@args);
243 2         60 $self->want($want);
244 2         7 $asm_format = $self->_assembly_format;
245 2         6 $self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI
246 2         15 return $self;
247             }
248              
249             =head2 run()
250              
251             Title : run
252             Usage : $assembly = $bowtie_assembler->run($read1_fastq_file,
253             $index_location,
254             $read2_fastq_file);
255             $assembly = $bowtie_assembler->run(%params);
256             Function: Run the bowtie assembly pipeline.
257             Returns : Assembly results (file, IO object or Assembly object)
258             Args : - fastq file containing single-end reads
259             - name of the base of the bowtie index
260             - [optional] fastq file containing paired-end reads
261             Named params are also available with args:
262             -seq, -seq2, -ind (bowtie index), -ref (fasta reference) and -out
263             Note : gzipped inputs are allowed if IO::Uncompress::Gunzip
264             is available
265             The behaviour for locating indexes follows the definition in
266             the bowtie manual - you may use the environment variable
267             BOWTIE_INDEXES to specify the index path or use an 'indexes'
268             directory under the directory where the bowtie executable
269             is located
270              
271             =cut
272              
273             sub run {
274 0     0 1 0 my $self = shift;
275              
276 0         0 my ($arg1, $arg2, $arg3); # these are useless names because the different
277             # programs take very different arguments
278 0         0 my ($index, $seq, $seq2, $ref, $out); # these are the meaningful names that are used
279             # with named args
280              
281 0 0       0 if (!(@_ % 2)) {
282 0         0 my %args = @_;
283 0 0       0 if ((grep /^-\w+/, keys %args) == keys %args) {
    0          
284 0         0 ($index, $seq, $seq2, $ref, $out) =
285             $self->_rearrange([qw( IND SEQ SEQ2 REF OUT )], @_);
286             } elsif (grep /^-\w+/, keys %args) {
287 0         0 $self->throw("Badly formed named args: ".join(' ',@_));
288             } else {
289 0         0 ($arg1, $arg2) = @_;
290             }
291             } else {
292 0 0       0 if (grep /^-\w+/, @_) {
293 0         0 $self->throw("Badly formed named args: ".join(' ',@_));
294             } else {
295 0         0 ($arg1, $arg2, $arg3) = @_;
296             }
297             }
298              
299             # Sanity checks
300 0         0 $self->_check_executable();
301 0 0       0 my $cmd = $self->command if $self->can('command');
302              
303 0         0 for ($cmd) {
304 0 0       0 m/(?:single|paired|crossbow)/ && do {
305 0   0     0 $seq ||= $arg1;
306 0   0     0 $index ||= $arg2;
307 0   0     0 $seq2 ||= $arg3;
308 0 0       0 $seq or $self->throw("Fasta/fastq/raw read(s) file/Bio::Seq required at arg 1/-seq");
309 0 0       0 $index or $self->throw("Bowtie index base required at arg 2/-index");
310              
311             # expand gzipped files as nec.
312 0         0 for ($seq, $seq2) {
313 0 0       0 next unless $_;
314 0 0       0 if (/\.gz[^.]*$/) {
315 0 0       0 unless ($HAVE_IO_UNCOMPRESS) {
316 0         0 croak( "IO::Uncompress::Gunzip not available, can't expand '$_'" );
317             }
318 0         0 my ($tfh, $tf) = $self->io->tempfile;
319 0         0 my $z = IO::Uncompress::Gunzip->new($_);
320 0         0 while (<$z>) { print $tfh $_ }
  0         0  
321 0         0 close $tfh;
322 0         0 $_ = $tf;
323             }
324             }
325              
326             # confirm index files exist
327             $self->_validate_file_input( -ind => $index ) or
328             ($self->_validate_file_input( -ind => $self->io->catfile(dirname($self->executable),'indexes',$index)) and
329             $index = $self->io->catfile(dirname($self->executable),'indexes',$index)) or
330             ($self->_validate_file_input( -ind => $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) and
331 0 0 0     0 $index = $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) or
      0        
      0        
      0        
332             $self->throw("Incorrect filetype (expecting bowtie index) or absent file arg 2/-index");
333            
334             # bowtie prepare the multiple input types
335 0         0 $seq = $self->_prepare_input_sequences($seq);
336 0 0       0 if ($cmd =~ m/^p/) {
337 0 0       0 $seq2 && ($seq2 = $self->_prepare_input_sequences($seq2));
338             } else {
339 0 0       0 $seq2 && $self->throw("Second sequence input not wanted for command: $cmd");
340             }
341            
342             # Assemble
343 0         0 my $format = $self->_assembly_format;
344 0         0 my $suffix = '.'.$format;
345            
346 0 0       0 if ($out) {
347 0         0 $out .= $suffix;
348             } else {
349 0         0 my ($bowtieh, $bowtief) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => $suffix );
350 0         0 $bowtieh->close;
351 0         0 $out = $bowtief;
352             }
353              
354 0         0 my %params = ( -ind => $index, -seq => $seq, -seq2 => $seq2, -out => $out );
355             map {
356 0 0       0 delete $params{$_} unless defined $params{$_}
  0         0  
357             } keys %params;
358 0         0 $self->_run(%params);
359            
360 0         0 $self->{'_result'}->{'index'} = $index;
361 0         0 $self->{'_result'}->{'file_name'} = $out;
362 0         0 $self->{'_result'}->{'format'} = $format;
363 0         0 $self->{'_result'}->{'file'} = Bio::Root::IO->new( -file => $out );
364            
365 0         0 return $self->result;
366             };
367            
368 0 0       0 m/build/ && do {
369 0   0     0 $ref ||= $arg1;
370 0   0     0 $index ||= $arg2;
371 0 0       0 $ref or $self->throw("Fasta read(s) file/Bio::Seq required at arg 1/-ref");
372 0   0     0 $index ||= $self->io->tempdir(CLEANUP => 1).'/index'; # we want a new one each time
373 0 0       0 $arg3 && $self->throw("Second sequence input not wanted for command: $cmd");
374              
375 0         0 my $format = $self->_assembly_format;
376            
377             # expand gzipped file as nec.
378 0 0       0 if ($ref =~ (m/\.gz[^.]*$/)) {
379 0 0       0 unless ($HAVE_IO_UNCOMPRESS) {
380 0         0 croak( "IO::Uncompress::Gunzip not available, can't expand '$_'" );
381             }
382 0         0 my ($tfh, $tf) = $self->io->tempfile;
383 0         0 my $z = IO::Uncompress::Gunzip->new($_);
384 0         0 while (<$z>) { print $tfh $_ }
  0         0  
385 0         0 close $tfh;
386 0         0 $ref = $tf;
387             }
388              
389             # bowtie prepare the two input types for the first argument
390 0         0 $ref = $self->_prepare_input_sequences($ref);
391            
392             # Build index
393 0         0 $self->_run( -ref => $ref, -out => $index );
394 0         0 $self->{'_result'}->{'format'} = $format;
395 0         0 $self->{'_result'}->{'file_name'} = $index;
396            
397 0         0 return $index;
398             };
399            
400 0 0       0 m/inspect/ && do {
401 0   0     0 $index ||= $arg1;
402 0   0     0 $out ||= $arg2;
403 0 0       0 $index or $self->throw("Bowtie index required at arg 1");
404              
405             $self->_validate_file_input( -ind => $index ) or
406             ($self->_validate_file_input( -ind => $self->io->catfile(dirname($self->executable),'indexes',$index)) and
407             $index = $self->io->catfile(dirname($self->executable),'indexes',$index)) or
408             ($self->_validate_file_input( -ind => $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) and
409 0 0 0     0 $index = $self->io->catfile($ENV{BOWTIE_INDEXES},$index)) or
      0        
      0        
      0        
410             $self->throw("'$index' doesn't look like a bowtie index or index component is missing at arg 1/-ind");
411 0 0       0 $arg3 && $self->throw("Second sequence input not wanted for command: $cmd");
412              
413             # Inspect index
414 0         0 my $format = $self->_assembly_format;
415 0         0 my $suffix = '.'.$format;
416              
417 0 0       0 if ($out) {
418 0         0 $out .= $suffix;
419             } else {
420 0         0 my ($desch, $descf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => $suffix );
421 0         0 $desch->close;
422 0         0 $out = $descf;
423             }
424              
425 0         0 $self->_run( -ind => $index, -out => $out );
426            
427 0         0 $self->{'_result'}->{'file_name'} = $out;
428 0         0 $self->{'_result'}->{'format'} = $format;
429 0         0 $self->{'_result'}->{'file'} = Bio::Root::IO->new( -file => $out );
430            
431 0         0 return $self->result;
432             }
433             }
434             }
435              
436             =head2 want()
437              
438             Title : want
439             Usage : $bowtiefac->want( $class )
440             Function: make factory return $class, or raw (scalar) results in file
441             Returns : return wanted type
442             Args : [optional] string indicating class or raw of wanted result
443              
444             =cut
445              
446             sub want {
447 2     2 1 2 my $self = shift;
448 2 50       8 return $self->{'_want'} = shift if @_;
449 0         0 return $self->{'_want'};
450             }
451              
452             =head2 result()
453              
454             Title : result
455             Usage : $bowtiefac->result( [-want => $type|$format] )
456             Function: return result in wanted format
457             Returns : results
458             Args : [optional] hashref of wanted type
459              
460             =cut
461              
462             sub result {
463 0     0 1 0 my ($self, @args) = @_;
464            
465 0 0       0 my $want = $self->want ? $self->want : $self->want($self->_rearrange([qw(WANT)],@args));
466 0 0       0 my $cmd = $self->command if $self->can('command');
467 0         0 my $format = $self->{'_result'}->{'format'};
468              
469 0 0 0     0 return $self->{'_result'}->{'format'} if (defined $want && $want eq 'format');
470 0 0 0     0 return $self->{'_result'}->{'file_name'} if (!$want || $want eq 'raw' || $cmd eq 'build');
      0        
471 0 0       0 return $self->{'_result'}->{'file'} if ($want =~ m/^Bio::Root::IO/);
472            
473 0         0 for ($cmd) {
474 0 0       0 m/(?:single|paired|crossbow)/ && do {
475 0         0 my $scaffold;
476 0         0 for ($format) {
477 0 0 0     0 m/^bowtie/i && $want =~ m/^Bio::Assembly::Scaffold/ && do {
478 0 0 0     0 unless (defined $self->{'_result'}->{'object'} &&
479             ref($self->{'_result'}->{'object'}) =~ m/^Bio::Assembly::Scaffold/) {
480             $self->{'_result'}->{'object'} =
481             $self->_export_results( $self->{'_result'}->{'file_name'},
482 0         0 -index => $self->{'_result'}->{'index'},
483             -keep_asm => 1 );
484             }
485 0         0 last;
486             };
487 0 0 0     0 m/^bowtie/i && $want =~ m/^Bio::SeqFeature::Collection/ && do {
488 0         0 $self->warn("Don't know how to create a $want object for $cmd with bowtie format - try SAM format.");
489 0         0 last;
490             };
491 0 0 0     0 m/^sam/i && $want =~ m/^Bio::Assembly::Scaffold/ && do {
492 0 0 0     0 unless (defined $self->{'_result'}->{'object'} &&
493             ref($self->{'_result'}->{'object'}) =~ m/^Bio::Assembly::Scaffold/) {
494 0         0 my $bamf = $self->_make_bam($self->{'_result'}->{'file_name'}, 1);
495 0         0 my $inspector = Bio::Tools::Run::Bowtie->new( -command => 'inspect' );
496 0         0 my $refdb = $inspector->run($self->{'_result'}->{'index'});
497 0         0 $self->{'_result'}->{'object'} =
498             $self->_export_results($bamf, -refdb => $refdb, -keep_asm => 1 );
499             }
500 0         0 last;
501             };
502 0 0 0     0 m/^sam/i && $want =~ m/^Bio::SeqFeature::Collection/ && do {
503 0 0 0     0 unless (defined $self->{'_result'}->{'object'} &&
504             ref($self->{'_result'}->{'object'}) =~ m/^Bio::Assembly::Scaffold/) {
505 0         0 my $bamf = $self->_make_bam($self->{'_result'}->{'file_name'}, 0);
506 0         0 my $convert = Bio::Tools::Run::BEDTools->new( -command => 'bam_to_bed' );
507 0         0 my $bedf = $convert->run( -bed => $bamf );
508 0         0 my $merge = Bio::Tools::Run::BEDTools->new( -command => 'merge' );
509 0         0 $merge->run($self->{'_result'}->{'index'});
510 0         0 $self->{'_result'}->{'object'} = $merge->result( -want => $want );
511             }
512 0         0 last;
513             };
514 0         0 do {
515 0         0 $self->warn("Don't know how to create a $want object for $cmd.");
516 0         0 return;
517             }
518             };
519 0         0 last;
520             };
521 0 0       0 m/inspect/ && do {
522 0         0 for ($want) {
523 0 0 0     0 m/^Bio::SeqIO/ && $format eq 'fasta' && do {
524 0 0 0     0 unless (defined $self->{'_result'}->{'object'} &&
525             ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqIO/) {
526             $self->{'_result'}->{'object'} =
527 0         0 Bio::SeqIO->new(-file => $self->{'_result'}->{'file'},
528             -format => 'fasta');
529             }
530 0         0 last;
531             };
532 0 0 0     0 m/^Bio::SeqIO/ && $format ne 'fasta' && do {
533 0         0 $self->warn("Don't know how to create a $want object for names only - try -want => 'Bio::Root::IO'.");
534 0         0 return;
535             };
536 0         0 do {
537 0         0 $self->warn("Don't know how to create a $want object for $cmd.");
538 0         0 return;
539             }
540             }
541             }
542             }
543            
544 0         0 return $self->{'_result'}->{'object'};
545             }
546              
547             =head2 _determine_format()
548              
549             Title : _determine_format
550             Usage : $bowtiefac->_determine_format
551             Function: determine the format of output for current options
552             Returns : format of bowtie output
553             Args :
554              
555             =cut
556              
557             sub _determine_format {
558 2     2   3 my ($self) = shift;
559              
560 2 50       52 my $cmd = $self->command if $self->can('command');
561 2         15 for ($cmd) {
562 2 50       9 m/build/ && do {
563 0         0 return 'ebwt';
564             };
565 2 50       6 m/inspect/ && do {
566 0 0       0 $self->{'_summary'} && return 'text';
567 0 0       0 return $self->{'_names_only'} ? 'text' : 'fasta';
568             };
569 2 50       32 m/(?:single|paired|crossbow)/ && do {
570 2         4 my $format = 'bowtie'; # this is our default position
571 2         9 for (keys %format_lookup) {
572 10 100       21 $format = $format_lookup{$_} if $self->{'_'.$_};
573             }
574 2         5 return $format;
575             }
576             }
577             }
578              
579             =head2 _make_bam()
580              
581             Title : _make_bam
582             Usage : $bowtiefac->_make_bam( $file, $sort )
583             Function: make a sorted BAM format file from SAM file
584             Returns : sorted BAM file name
585             Args : SAM file name and boolean flag to select sorted BAM format
586              
587             =cut
588              
589             sub _make_bam {
590 0     0   0 my ($self, $file, $sort) = @_;
591            
592 0 0 0     0 $self->throw("'$file' does not exist or is not readable")
593             unless ( -e $file && -r _ );
594              
595             # make a sorted bam file from a sam file input
596 0         0 my ($bamh, $bamf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => '.bam' );
597 0         0 $bamh->close;
598            
599 0         0 my $samt = Bio::Tools::Run::Samtools->new( -command => 'view',
600             -sam_input => 1,
601             -bam_output => 1 );
602              
603 0         0 $samt->run( -bam => $file, -out => $bamf );
604              
605 0 0       0 if ($sort) {
606 0         0 my ($srth, $srtf) = $self->io->tempfile( -dir => $self->io->tempdir(CLEANUP=>1), -suffix => '.srt' );
607             # shared tempdir, so make new - otherwise it is scrubbed during Bio::DB::Sam
608 0         0 $srth->close;
609            
610 0         0 $samt = Bio::Tools::Run::Samtools->new( -command => 'sort' );
611 0         0 $samt->run( -bam => $bamf, -pfx => $srtf);
612            
613 0         0 return $srtf.'.bam';
614             } else {
615 0         0 return $bamf;
616             }
617             }
618              
619             =head2 _validate_file_input()
620              
621             Title : _validate_file_input
622             Usage : $bowtiefac->_validate_file_input( -type => $file )
623             Function: validate file type for file spec
624             Returns : file type if valid type for file spec
625             Args : hash of filespec => file_name
626              
627             =cut
628              
629             sub _validate_file_input {
630 0     0   0 my ($self, @args) = @_;
631 0         0 my (%args);
632 0 0       0 if (grep (/^-/, @args)) { # named parms
633 0 0       0 $self->throw("Wrong number of args - requires one named arg") if (@args > 2);
634 0         0 s/^-// for @args;
635 0         0 %args = @args;
636             } else {
637 0         0 $self->throw("Must provide named filespec");
638             }
639            
640 0         0 for (keys %args) {
641 0 0       0 m/^seq|seq2|ref$/ && do {
642 0 0 0     0 return unless ( -e $args{$_} && -r _ );
643 0         0 my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$args{$_});
644 0 0       0 return $guesser->guess if grep {$guesser->guess =~ m/$_/} @{$accepted_types{$_}};
  0         0  
  0         0  
645             };
646 0 0       0 m/^ind$/ && do {
647             return 'ebwt' if
648             (-e $args{$_}.'.1.ebwt' && -e $args{$_}.'.2.ebwt' && -e $args{$_}.'.3.ebwt' &&
649 0 0 0     0 -e $args{$_}.'.4.ebwt' && -e $args{$_}.'.rev.1.ebwt' && -e $args{$_}.'.rev.2.ebwt');
      0        
      0        
      0        
      0        
650             }
651             }
652 0         0 return;
653             }
654              
655             =head1 Bio::Tools::Run::AssemblerBase overrides
656              
657             =head2 _assembly_format()
658              
659             Title : _assembly_format
660             Usage : $bowtiefac->_determine_format
661             Function: set the format of output for current options
662             Returns : format of bowtie output
663             Args :
664              
665             =cut
666              
667             sub _assembly_format {
668 2     2   2 my $self = shift;
669              
670 2         8 my $format = $self->_determine_format;
671 2         17 return $self->SUPER::_assembly_format($format);
672             }
673            
674              
675             =head2 _check_sequence_input()
676              
677             No-op.
678              
679             =cut
680              
681             sub _check_sequence_input {
682 0     0   0 return 1;
683             }
684              
685             =head2 _check_optional_quality_input()
686              
687             No-op.
688              
689             =cut
690              
691             sub _check_optional_quality_input {
692 0     0   0 return 1;
693             }
694              
695             =head2 _prepare_input_sequences()
696              
697             Prepare and check input sequences for bowtie.
698              
699             =cut
700              
701             sub _prepare_input_sequences {
702              
703 0     0   0 my ($self, @args) = @_;
704 0         0 my (%args, $read);
705 0 0       0 if (grep (/^-/, @args)) { # named parms
706 0 0       0 $self->throw("Input args not an even number") unless !(@args % 2);
707 0         0 %args = @args;
708 0         0 ($read) = @args{qw( -sequence )};
709             } else {
710 0         0 ($read) = @args;
711             }
712              
713             # Could use the AssemblerBase routine for this, except that would not permit
714             # an array of strings
715 0 0       0 if ($self->inline) { # expect inline data
716 0 0 0     0 if (UNIVERSAL::isa($read,'can') && $read->isa("Bio::PrimarySeqI")) { # we have a Bio::*Seq*
717 0         0 $read=$read->seq();
718             } else { # we have something else
719 0 0       0 if (ref($read) =~ /ARRAY/i) {
720 0         0 my @ts;
721 0         0 foreach my $seq (@$read) {
722 0 0       0 if ($seq->isa("Bio::PrimarySeqI")) {
723 0         0 $seq=$seq->seq();
724             } else {
725 0 0       0 next if $read=~m/[[^:alpha:]]/;
726             }
727 0         0 push @ts,$seq;
728             }
729 0 0       0 $self->throw("bowtie requires at least one sequence read") unless (@ts);
730 0 0       0 if (@ts>1) {
731 0         0 $read="'".join(',',@ts)."'";
732             } else {
733 0         0 ($read)=@ts;
734             }
735             } else { #must be a string... fail if non-alpha
736 0 0       0 $self->throw("bowtie requires at least one valid sequence read") if $read=~m/[[^:alpha:]]/;
737             }
738             }
739             } else { # expect file(s) - so test whether it's/they're appropriate
740             # and make a comma-separated list of filenames
741 0 0       0 my @ts = (ref($read) =~ /ARRAY/i) ? @$read : ($read);
742 0         0 for my $file (@ts) {
743 0 0       0 if ( -e $file ) {
744 0 0       0 my $cmd = $self->command if $self->can('command');
745 0         0 my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$file);
746 0         0 for ($guesser->guess) {
747 0 0       0 m/^fasta$/ && do {
748 0 0       0 $cmd =~ m/^b/ && last;
749 0 0 0     0 ($cmd =~ m/^c/ or $self->fastq or $self->raw) and $self->throw("Fasta reads file '$file' inappropriate");
      0        
750 0         0 $self->fasta(1);
751 0         0 last;
752             };
753 0 0       0 m/^fastq$/ && do {
754 0 0 0     0 ($cmd =~ m/^[cb]/ or $self->fasta or $self->raw) and $self->throw("Fastq reads file '$file' inappropriate");
      0        
755 0         0 $self->fastq(1);
756 0         0 last;
757             };
758 0 0       0 m/^tab$/ && do {
759 0 0       0 $cmd =~ m/^c/ or $self->throw("Crossbow reads file '$file' inappropriate"); # this is unrecoverable since the object has default program defined
760 0         0 last;
761             };
762 0 0       0 m/^raw$/ && do {
763 0 0 0     0 ($cmd =~ m/^[cb]/ or $self->fasta or $self->fastq) and $self->throw("Raw reads file '$file' inappropriate");
      0        
764 0         0 $self->raw(1);
765 0         0 last;
766             };
767 0         0 do {
768 0         0 $self->throw("File '$file' not a recognised bowtie input filetype");
769             }
770             }
771             } else {
772 0         0 $self->throw("Sequence read file '$file' does not exist");
773             }
774             }
775 0 0       0 if (@ts>1) {
776 0         0 $read="'".join(',',@ts)."'";
777             } else {
778 0         0 ($read)=@ts;
779             }
780             }
781              
782 0         0 return $read;
783             }
784              
785             =head2 set_parameters()
786              
787             Title : set_parameters
788             Usage : $bowtiefac->set_parameters(%params);
789             Function: sets the parameters listed in the hash or array,
790             maintaining sane options.
791             Returns : true on success
792             Args : [optional] hash or array of parameter/values.
793             Note : This will unset conflicts and set required options,
794             but will not prevent non-sane requests in the arguments
795              
796             =cut
797              
798             sub set_parameters {
799 8     8 1 1101 my ($self, @args) = @_;
800              
801             # Mutually exclusive switches/params prevented from being set to
802             # avoid confusion resulting from setting incompatible switches.
803              
804 8 50       23 $self->throw("Input args not an even number") if (@args % 2);
805 8         53 my %args = @args;
806              
807              
808 8         26 foreach (keys %args) {
809 88         46 my @added;
810             my @removed;
811 88         98 s/^-//;
812 88         53 foreach my $conflict (@{$incompat_params{$_}}) {
  88         118  
813 66 50       84 return if grep /$conflict/, @added;
814 66         64 delete $args{'-'.$conflict};
815 66 100       95 $args{'-'.$conflict} = undef if $self->{'_'.$conflict};
816 66         56 push @removed, $conflict;
817             }
818 88         54 foreach my $requirement (@{$corequisite_switches{$_}}) {
  88         124  
819 12 50       33 return if grep /$requirement/, @removed;
820 12 50       22 $args{'-'.$requirement}=1 if $args{$_};
821 12         15 push @added, $requirement;
822             }
823             }
824              
825 8         48 return $self->SUPER::set_parameters(%args);
826             }
827              
828             =head2 version()
829              
830             Title : version
831             Usage : $version = $bowtiefac->version()
832             Function: Returns the program version (if available)
833             Returns : string representing location and version of the program
834              
835             =cut
836              
837             sub version{
838 0     0 1   my ($self) = @_;
839              
840 0 0         my $cmd = $self->command if $self->can('command');
841              
842 0 0         defined $cmd or $self->throw("No command defined - cannot determine program executable");
843              
844 0           my ($in, $out, $err);
845 0           my $dum;
846 0           $in = \$dum;
847 0           $out = \$self->{'stdout'};
848 0           $err = \$self->{'stderr'};
849              
850             # Get program executable
851 0           my $exe = $self->executable;
852             # Get version switch from switches, translate and dash it
853 0           my $version_switch = $param_translation{"$command_prefixes{$cmd}|version"};
854 0           $version_switch = $self->_dash_switch( $version_switch );
855              
856 0           my @ipc_args = ( $exe, $version_switch );
857              
858 0           eval {
859 0 0         IPC::Run::run(\@ipc_args, $in, $out, $err) or
860             die ("There was a problem running $exe : $!");
861             };
862 0 0         if ($@) {
863 0           $self->throw("$exe call crashed: $@");
864             }
865              
866 0           my @details = split("\n",$self->stdout);
867 0           (my $version) = grep /$exe version [[:graph:]]*$/, @details;
868 0           $version =~ s/version //;
869 0           (my $addressing) = grep /-bit$/, @details;
870              
871 0           return $version.' '.$addressing;
872             }
873              
874 0     0 0   sub available_commands { shift->available_parameters('commands') };
875              
876 0     0 0   sub filespec { shift->available_parameters('filespec') };
877              
878             1;