File Coverage

lib/Bio/Roary/ParallelAllAgainstAllBlast.pm
Criterion Covered Total %
statement 77 78 98.7
branch 4 8 50.0
condition n/a
subroutine 19 19 100.0
pod 0 2 0.0
total 100 107 93.4


line stmt bran cond sub pod time code
1             package Bio::Roary::ParallelAllAgainstAllBlast;
2             $Bio::Roary::ParallelAllAgainstAllBlast::VERSION = '3.11.0';
3             # ABSTRACT: Run all against all blast in parallel
4              
5              
6 3     3   124739 use Moose;
  3         383263  
  3         23  
7 3     3   20287 use Bio::Roary::Exceptions;
  3         8  
  3         72  
8 3     3   762 use Bio::Roary::ChunkFastaFile;
  3         11  
  3         198  
9 3     3   1383 use Bio::Roary::External::Makeblastdb;
  3         30  
  3         117  
10 3     3   1202 use Bio::Roary::External::Blastp;
  3         11  
  3         121  
11 3     3   24 use Cwd;
  3         5  
  3         207  
12 3     3   19 use File::Temp;
  3         6  
  3         193  
13 3     3   18 use File::Basename;
  3         6  
  3         2078  
14             with 'Bio::Roary::JobRunner::Role';
15              
16             has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 );
17             has 'blast_results_file_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_blast_results_file_name' );
18             has 'makeblastdb_exec' => ( is => 'ro', isa => 'Str', default => 'makeblastdb' );
19             has 'blastp_exec' => ( is => 'ro', isa => 'Str', default => 'blastp' );
20             has 'segmasker_exec' => ( is => 'ro', isa => 'Str', default => 'segmasker' );
21             has 'perc_identity' => ( is => 'ro', isa => 'Num', default => 98 );
22             has '_chunk_fasta_file_obj' => ( is => 'ro', isa => 'Bio::Roary::ChunkFastaFile', lazy => 1, builder => '_build__chunk_fasta_file_obj' );
23             has '_sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__sequence_file_names' );
24             has '_makeblastdb_obj' => ( is => 'ro', isa => 'Bio::Roary::External::Makeblastdb', lazy => 1, builder => '_build__makeblastdb_obj' );
25             has '_blast_database' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__blast_database' );
26             has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
27              
28             has '_working_directory' =>
29             ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
30             has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' );
31              
32             has 'memory_in_mb' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_build_memory_in_mb' );
33              
34              
35             sub BUILD {
36 3     3 0 8 my ($self) = @_;
37 3         94 $self->_makeblastdb_obj();
38             }
39              
40              
41             sub _build__blast_database {
42 3     3   8 my ($self) = @_;
43 3         115 return $self->_makeblastdb_obj->output_database;
44             }
45              
46             sub _build__makeblastdb_obj {
47 3     3   8 my ($self) = @_;
48 3         79 my $blast_database =
49             Bio::Roary::External::Makeblastdb->new( fasta_file => $self->fasta_file, exec => $self->makeblastdb_exec, job_runner => $self->job_runner, cpus => $self->cpus );
50 3         25 $blast_database->run();
51 3         159 return $blast_database;
52             }
53              
54             sub _build__chunk_fasta_file_obj {
55 3     3   9 my ($self) = @_;
56 3         119 return Bio::Roary::ChunkFastaFile->new( fasta_file => $self->fasta_file, );
57             }
58              
59             sub _build__sequence_file_names {
60 3     3   16 my ($self) = @_;
61 3         132 return $self->_chunk_fasta_file_obj->sequence_file_names;
62             }
63              
64             sub _build__working_directory_name {
65 3     3   11 my ($self) = @_;
66 3         102 return $self->_working_directory->dirname();
67             }
68              
69             sub _build_blast_results_file_name {
70 1     1   3 my ($self) = @_;
71 1         30 return join( '/', ( $self->_working_directory_name, 'blast_results' ) );
72             }
73              
74             sub _combine_blast_results {
75 3     3   19 my ( $self, $output_files ) = @_;
76 3         17 for my $output_file ( @{$output_files} ) {
  3         20  
77 3 50       142 Bio::Roary::Exceptions::FileNotFound->throw( error => "Cant find blast results: " . $output_file )
78             unless ( -e $output_file );
79             }
80 3 50       127 if ( -e $self->blast_results_file_name )
81             {
82 0         0 system( "rm " . $self->blast_results_file_name );
83             }
84 3         127 system( "touch " . $self->blast_results_file_name );
85 3         93 for my $output_file ( @{$output_files} ) {
  3         49  
86 3         412 system( "cat $output_file >> " . $self->blast_results_file_name );
87             }
88 3         85 return 1;
89             }
90              
91             sub _build_memory_in_mb
92             {
93 3     3   9 my ($self) = @_;
94 3         89 my $filename = $self->fasta_file;
95 3         7 my $file_size = 3000;
96 3 50       96 if(-e $filename)
97             {
98 3         26 $file_size = -s $filename;
99 3         9 $file_size *=12;
100 3         24 $file_size = int($file_size/1000000);
101 3 50       14 $file_size = 3000 if($file_size < 3000);
102             }
103              
104 3         114 return $file_size;
105             }
106              
107             sub run {
108 3     3 0 15 my ($self) = @_;
109 3         9 my @expected_output_files;
110             my @commands_to_run;
111            
112 3         10 for my $filename ( @{ $self->_sequence_file_names } ) {
  3         122  
113 3         151 my ( $filename_without_directory, $directories, $suffix ) = fileparse($filename);
114 3         136 my $output_seq_results_file =
115             join( '/', ( $self->_working_directory_name, $filename_without_directory . '.out' ) );
116              
117 3         94 my $blast_database = Bio::Roary::External::Blastp->new(
118             fasta_file => $filename,
119             blast_database => $self->_blast_database,
120             exec => $self->blastp_exec,
121             output_file => $output_seq_results_file,
122             perc_identity => $self->perc_identity
123             );
124 3         12 push( @expected_output_files, $output_seq_results_file );
125 3         24 push( @commands_to_run, $blast_database->_command_to_run() );
126 3         90 $self->logger->info( "Running command: " . $blast_database->_command_to_run() );
127             }
128 3         198 my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus => $self->cpus );
129 3         19 $job_runner_obj->run();
130 3         433 $self->logger->info( "Combining blast results" );
131 3         98 $self->_combine_blast_results(\@expected_output_files);
132 3         311 return 1;
133             }
134              
135 3     3   20 no Moose;
  3         6  
  3         19  
136             __PACKAGE__->meta->make_immutable;
137              
138             1;
139              
140             __END__
141              
142             =pod
143              
144             =encoding UTF-8
145              
146             =head1 NAME
147              
148             Bio::Roary::ParallelAllAgainstAllBlast - Run all against all blast in parallel
149              
150             =head1 VERSION
151              
152             version 3.11.0
153              
154             =head1 SYNOPSIS
155              
156             Run blastp in parallel over a FASTA file of proteins
157             use Bio::Roary::ParallelAllAgainstAllBlast;
158              
159             my $obj = Bio::Roary::ParallelAllAgainstAllBlast->new(
160             fasta_file => 'abc.fa',
161             );
162             $obj->run();
163              
164             =head1 AUTHOR
165              
166             Andrew J. Page <ap13@sanger.ac.uk>
167              
168             =head1 COPYRIGHT AND LICENSE
169              
170             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
171              
172             This is free software, licensed under:
173              
174             The GNU General Public License, Version 3, June 2007
175              
176             =cut