line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
undef $VERSION; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Bio::Roary::CommandLine::Roary; |
4
|
|
|
|
|
|
|
$Bio::Roary::CommandLine::Roary::VERSION = '3.10.2'; |
5
|
|
|
|
|
|
|
# ABSTRACT: Take in FASTA files of proteins and cluster them |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
702
|
use Moose; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
8
|
|
9
|
1
|
|
|
1
|
|
6756
|
use Getopt::Long qw(GetOptionsFromArray); |
|
1
|
|
|
|
|
8416
|
|
|
1
|
|
|
|
|
6
|
|
10
|
1
|
|
|
1
|
|
488
|
use Bio::Roary; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
40
|
|
11
|
1
|
|
|
1
|
|
405
|
use Bio::Roary::PrepareInputFiles; |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
76
|
|
12
|
1
|
|
|
1
|
|
1102
|
use Bio::Roary::QC::Report; |
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
70
|
|
13
|
1
|
|
|
1
|
|
673
|
use Bio::Roary::ReformatInputGFFs; |
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
148
|
|
14
|
1
|
|
|
1
|
|
686
|
use Bio::Roary::External::CheckTools; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
42
|
|
15
|
1
|
|
|
1
|
|
13
|
use File::Which; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
94
|
|
16
|
1
|
|
|
1
|
|
7
|
use File::Path qw(make_path); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
45
|
|
17
|
1
|
|
|
1
|
|
7
|
use Cwd qw(abs_path getcwd); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
41
|
|
18
|
1
|
|
|
1
|
|
5
|
use File::Temp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1555
|
|
19
|
|
|
|
|
|
|
extends 'Bio::Roary::CommandLine::Common'; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); |
22
|
|
|
|
|
|
|
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 ); |
23
|
|
|
|
|
|
|
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 ); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); |
26
|
|
|
|
|
|
|
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' ); |
27
|
|
|
|
|
|
|
has 'output_directory' => ( is => 'rw', isa => 'Str', default => '.' ); |
28
|
|
|
|
|
|
|
has '_original_directory' => ( is => 'rw', isa => 'Str', default => '.' ); |
29
|
|
|
|
|
|
|
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' ); |
30
|
|
|
|
|
|
|
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' ); |
31
|
|
|
|
|
|
|
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' ); |
32
|
|
|
|
|
|
|
has 'mcxdeblast_exec' => ( is => 'rw', isa => 'Str', default => 'mcxdeblast' ); |
33
|
|
|
|
|
|
|
has 'mcl_exec' => ( is => 'rw', isa => 'Str', default => 'mcl' ); |
34
|
|
|
|
|
|
|
has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 ); |
35
|
|
|
|
|
|
|
has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 ); |
36
|
|
|
|
|
|
|
has 'output_multifasta_files' => ( is => 'rw', isa => 'Bool', default => 0 ); |
37
|
|
|
|
|
|
|
has 'perc_identity' => ( is => 'rw', isa => 'Num', default => 95 ); |
38
|
|
|
|
|
|
|
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 ); |
39
|
|
|
|
|
|
|
has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 1 ); |
40
|
|
|
|
|
|
|
has 'dont_run_qc' => ( is => 'rw', isa => 'Bool', default => 0 ); |
41
|
|
|
|
|
|
|
has 'dont_split_groups' => ( is => 'rw', isa => 'Bool', default => 0 ); |
42
|
|
|
|
|
|
|
has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 ); |
43
|
|
|
|
|
|
|
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 ); |
44
|
|
|
|
|
|
|
has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 ); |
45
|
|
|
|
|
|
|
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 ); |
46
|
|
|
|
|
|
|
has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 ); |
47
|
|
|
|
|
|
|
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 ); |
48
|
|
|
|
|
|
|
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); |
49
|
|
|
|
|
|
|
has 'kraken_db' => ( is => 'rw', isa => 'Str', default => '/lustre/scratch118/infgen/pathogen/pathpipe/kraken/minikraken_20140330/' ); |
50
|
|
|
|
|
|
|
has 'run_qc' => ( is => 'rw', isa => 'Bool', default => 0 ); |
51
|
|
|
|
|
|
|
has '_working_directory' => ( is => 'rw', isa => 'File::Temp::Dir', lazy => 1, builder => '_build__working_directory' ); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
has 'inflation_value' => ( is => 'rw', isa => 'Num', default => 1.5 ); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub _build__working_directory |
56
|
|
|
|
|
|
|
{ |
57
|
1
|
|
|
1
|
|
3
|
my ($self) = @_; |
58
|
1
|
|
|
|
|
34
|
return File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub BUILD { |
62
|
1
|
|
|
1
|
0
|
3
|
my ($self) = @_; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my ( |
65
|
1
|
|
|
|
|
5
|
$fasta_files, $verbose, $create_rplots, $group_limit, $dont_run_qc, |
66
|
|
|
|
|
|
|
$max_threads, $dont_delete_files, $dont_split_groups, $perc_identity, $output_filename, |
67
|
|
|
|
|
|
|
$job_runner, $makeblastdb_exec, $mcxdeblast_exec, $mcl_exec, $blastp_exec, |
68
|
|
|
|
|
|
|
$apply_unknowns_filter, $cpus, $output_multifasta_files, $verbose_stats, $translation_table, |
69
|
|
|
|
|
|
|
$run_qc, $core_definition, $help, $kraken_db, $cmd_version, |
70
|
|
|
|
|
|
|
$mafft, $output_directory, $check_dependancies, $inflation_value, $allow_paralogs, |
71
|
|
|
|
|
|
|
); |
72
|
|
|
|
|
|
|
|
73
|
1
|
|
|
|
|
38
|
GetOptionsFromArray( |
74
|
|
|
|
|
|
|
$self->args, |
75
|
|
|
|
|
|
|
'o|output=s' => \$output_filename, |
76
|
|
|
|
|
|
|
'f|output_directory=s' => \$output_directory, |
77
|
|
|
|
|
|
|
'j|job_runner=s' => \$job_runner, |
78
|
|
|
|
|
|
|
'm|makeblastdb_exec=s' => \$makeblastdb_exec, |
79
|
|
|
|
|
|
|
'b|blastp_exec=s' => \$blastp_exec, |
80
|
|
|
|
|
|
|
'd|mcxdeblast_exec=s' => \$mcxdeblast_exec, |
81
|
|
|
|
|
|
|
'c|mcl_exec=s' => \$mcl_exec, |
82
|
|
|
|
|
|
|
'p|processors=i' => \$cpus, |
83
|
|
|
|
|
|
|
'u|apply_unknowns_filter=i' => \$apply_unknowns_filter, |
84
|
|
|
|
|
|
|
'e|output_multifasta_files' => \$output_multifasta_files, |
85
|
|
|
|
|
|
|
'i|perc_identity=i' => \$perc_identity, |
86
|
|
|
|
|
|
|
'z|dont_delete_files' => \$dont_delete_files, |
87
|
|
|
|
|
|
|
's|dont_split_groups' => \$dont_split_groups, |
88
|
|
|
|
|
|
|
'r|create_rplots' => \$create_rplots, |
89
|
|
|
|
|
|
|
'y|verbose_stats' => \$verbose_stats, |
90
|
|
|
|
|
|
|
't|translation_table=i' => \$translation_table, |
91
|
|
|
|
|
|
|
'g|group_limit=i' => \$group_limit, |
92
|
|
|
|
|
|
|
'qc|run_qc' => \$run_qc, |
93
|
|
|
|
|
|
|
'x|dont_run_qc' => \$dont_run_qc, |
94
|
|
|
|
|
|
|
'cd|core_definition=f' => \$core_definition, |
95
|
|
|
|
|
|
|
'v|verbose' => \$verbose, |
96
|
|
|
|
|
|
|
'n|mafft' => \$mafft, |
97
|
|
|
|
|
|
|
'ap|allow_paralogs' => \$allow_paralogs, |
98
|
|
|
|
|
|
|
'k|kraken_db=s' => \$kraken_db, |
99
|
|
|
|
|
|
|
'w|version' => \$cmd_version, |
100
|
|
|
|
|
|
|
'a|check_dependancies' => \$check_dependancies, |
101
|
|
|
|
|
|
|
'iv|inflation_value=f' => \$inflation_value, |
102
|
|
|
|
|
|
|
'h|help' => \$help, |
103
|
|
|
|
|
|
|
); |
104
|
|
|
|
|
|
|
|
105
|
1
|
50
|
|
|
|
2952
|
$self->version($cmd_version) if ( defined($cmd_version) ); |
106
|
1
|
50
|
|
|
|
75
|
if ( $self->version ) { |
107
|
0
|
|
|
|
|
0
|
print $self->_version() ; |
108
|
0
|
|
|
|
|
0
|
return; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
1
|
|
|
|
|
17
|
print "\nPlease cite Roary if you use any of the results it produces: |
112
|
|
|
|
|
|
|
Andrew J. Page, Carla A. Cummins, Martin Hunt, Vanessa K. Wong, Sandra Reuter, Matthew T. G. Holden, Maria Fookes, Daniel Falush, Jacqueline A. Keane, Julian Parkhill, |
113
|
|
|
|
|
|
|
\"Roary: Rapid large-scale prokaryote pan genome analysis\", Bioinformatics, 2015 Nov 15;31(22):3691-3693 |
114
|
|
|
|
|
|
|
doi: http://doi.org/10.1093/bioinformatics/btv421 |
115
|
|
|
|
|
|
|
Pubmed: 26198102\n\n"; |
116
|
|
|
|
|
|
|
|
117
|
1
|
50
|
|
|
|
6
|
$self->help($help) if ( defined($help) ); |
118
|
1
|
50
|
|
|
|
50
|
if( $self->help ) |
119
|
|
|
|
|
|
|
{ |
120
|
0
|
|
|
|
|
0
|
print $self->usage_text; |
121
|
0
|
|
|
|
|
0
|
return; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
1
|
50
|
|
|
|
6
|
if ($check_dependancies) { |
125
|
0
|
|
|
|
|
0
|
my $check_tools = Bio::Roary::External::CheckTools->new(); |
126
|
0
|
|
|
|
|
0
|
$check_tools->check_all_tools; |
127
|
0
|
|
|
|
|
0
|
$self->logger->error( "Roary version " . $self->_version() ); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
1
|
50
|
|
|
|
5
|
if ( defined($verbose) ) { |
131
|
0
|
|
|
|
|
0
|
$self->verbose($verbose); |
132
|
0
|
|
|
|
|
0
|
$self->logger->level(10000); |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
1
|
50
|
|
|
|
2
|
if ( @{ $self->args } < 2 ) { |
|
1
|
|
|
|
|
40
|
|
136
|
0
|
|
|
|
|
0
|
$self->logger->error("Error: You need to provide at least 2 files to build a pan genome"); |
137
|
0
|
|
|
|
|
0
|
die $self->usage_text; |
138
|
|
|
|
|
|
|
} |
139
|
1
|
50
|
|
|
|
5
|
$self->output_filename($output_filename) if ( defined($output_filename) ); |
140
|
1
|
50
|
|
|
|
51
|
$self->job_runner($job_runner) if ( defined($job_runner) ); |
141
|
1
|
50
|
|
|
|
6
|
$self->makeblastdb_exec($makeblastdb_exec) if ( defined($makeblastdb_exec) ); |
142
|
1
|
50
|
|
|
|
5
|
$self->blastp_exec($blastp_exec) if ( defined($blastp_exec) ); |
143
|
1
|
50
|
|
|
|
4
|
$self->mcxdeblast_exec($mcxdeblast_exec) if ( defined($mcxdeblast_exec) ); |
144
|
1
|
50
|
|
|
|
5
|
$self->mcl_exec($mcl_exec) if ( defined($mcl_exec) ); |
145
|
1
|
50
|
|
|
|
4
|
$self->cpus($cpus) if ( defined($cpus) ); |
146
|
1
|
50
|
|
|
|
7
|
$self->inflation_value($inflation_value) if ( defined($inflation_value)); |
147
|
|
|
|
|
|
|
|
148
|
1
|
50
|
|
|
|
9
|
if ( defined($perc_identity) ) { |
149
|
0
|
|
|
|
|
0
|
$self->perc_identity($perc_identity); |
150
|
0
|
0
|
|
|
|
0
|
if ( $perc_identity < 50 ) { |
151
|
0
|
|
|
|
|
0
|
$self->logger->error( |
152
|
|
|
|
|
|
|
"The percentage identity is too low. Either something is wrong with your data, like contamination, or your doing something that the software isnt designed to support." |
153
|
|
|
|
|
|
|
); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
1
|
50
|
|
|
|
5
|
$self->mafft($mafft) if ( defined($mafft) ); |
158
|
1
|
50
|
|
|
|
6
|
$self->apply_unknowns_filter($apply_unknowns_filter) |
159
|
|
|
|
|
|
|
if ( defined($apply_unknowns_filter) ); |
160
|
|
|
|
|
|
|
|
161
|
1
|
50
|
|
|
|
9
|
if ( defined($output_multifasta_files) ) { |
162
|
0
|
0
|
|
|
|
0
|
if ( which('prank') ) { |
163
|
0
|
|
|
|
|
0
|
$self->output_multifasta_files($output_multifasta_files); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
else { |
166
|
|
|
|
|
|
|
|
167
|
0
|
0
|
|
|
|
0
|
if ( which('mafft') ) { |
168
|
0
|
|
|
|
|
0
|
$self->output_multifasta_files($output_multifasta_files); |
169
|
0
|
|
|
|
|
0
|
$self->mafft(1); |
170
|
0
|
|
|
|
|
0
|
$self->logger->warn("PRANK not found in your PATH so using MAFFT instead to generate multiFASTA alignments."); |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
else { |
173
|
0
|
|
|
|
|
0
|
$self->logger->warn("PRANK (or MAFFT) not found in your PATH so cannot generate multiFASTA alignments, skipping for now."); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
} |
177
|
1
|
50
|
|
|
|
5
|
$self->dont_delete_files($dont_delete_files) if ( defined($dont_delete_files) ); |
178
|
1
|
50
|
|
|
|
55
|
$self->dont_split_groups($dont_split_groups) if ( defined($dont_split_groups) ); |
179
|
1
|
50
|
|
|
|
10
|
$self->dont_create_rplots(0) if ( defined($create_rplots) ); |
180
|
1
|
50
|
|
|
|
5
|
$self->verbose_stats($verbose_stats) if ( defined $verbose_stats ); |
181
|
1
|
50
|
|
|
|
54
|
$self->translation_table($translation_table) if ( defined($translation_table) ); |
182
|
1
|
50
|
|
|
|
8
|
$self->group_limit($group_limit) if ( defined($group_limit) ); |
183
|
1
|
50
|
|
|
|
10
|
$self->kraken_db($kraken_db) if ( defined($kraken_db) ); |
184
|
1
|
50
|
|
|
|
7
|
$self->output_directory($output_directory) if ( defined($output_directory) ); |
185
|
|
|
|
|
|
|
|
186
|
1
|
50
|
33
|
|
|
8
|
if ( defined $verbose_stats && defined($output_multifasta_files) ) { |
187
|
0
|
|
|
|
|
0
|
$self->verbose_stats(0); |
188
|
0
|
|
|
|
|
0
|
$self->logger->warn("The verbose stats spreadsheet is not compatible with the core gene alignement so disabling verbose_stats"); |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
1
|
50
|
|
|
|
6
|
if ( defined($run_qc) ) { |
192
|
0
|
0
|
0
|
|
|
0
|
if ( which('kraken') && which('kraken-report') ) { |
193
|
0
|
|
|
|
|
0
|
$self->run_qc($run_qc); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
else { |
196
|
0
|
|
|
|
|
0
|
$self->logger->warn("kraken or kraken-report not found in your PATH so cannot run QC, skipping for now."); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
1
|
50
|
|
|
|
53
|
if ( $self->cpus > 1 ) { |
201
|
0
|
|
|
|
|
0
|
$self->job_runner('Parallel'); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
1
|
50
|
|
|
|
9
|
$self->core_definition( $core_definition / 100 ) if ( defined($core_definition) ); |
205
|
|
|
|
|
|
|
|
206
|
1
|
|
|
|
|
5
|
for my $filename ( @{ $self->args } ) { |
|
1
|
|
|
|
|
50
|
|
207
|
3
|
50
|
|
|
|
65
|
if ( !-e $filename ) { |
208
|
0
|
|
|
|
|
0
|
$self->logger->error("Error: Cant access file $filename"); |
209
|
0
|
|
|
|
|
0
|
die $self->usage_text; |
210
|
|
|
|
|
|
|
} |
211
|
3
|
|
|
|
|
9
|
push( @{ $self->fasta_files }, abs_path($filename) ); |
|
3
|
|
|
|
|
135
|
|
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
1
|
50
|
|
|
|
51
|
$self->_working_directory( File::Temp->newdir( DIR => getcwd, CLEANUP => 0 ) ) if ( $self->dont_delete_files ); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub _setup_output_directory { |
218
|
1
|
|
|
1
|
|
4
|
my ($self) = @_; |
219
|
1
|
50
|
33
|
|
|
52
|
return if ( $self->output_directory eq '.' || $self->output_directory eq '' ); |
220
|
|
|
|
|
|
|
|
221
|
0
|
0
|
0
|
|
|
0
|
if ( -e $self->output_directory || -d $self->output_directory ) { |
222
|
0
|
|
|
|
|
0
|
$self->logger->warn("Output directory name exists already so adding a timestamp to the end"); |
223
|
0
|
|
|
|
|
0
|
$self->output_directory( $self->output_directory() . '_' . time() ); |
224
|
0
|
0
|
0
|
|
|
0
|
if ( -e $self->output_directory || -d $self->output_directory ) { |
225
|
0
|
|
|
|
|
0
|
die("Output directory name with time stamp exist so giving up"); |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
} |
228
|
0
|
|
|
|
|
0
|
make_path( $self->output_directory, { error => \my $err } ); |
229
|
0
|
0
|
|
|
|
0
|
if (@$err) { |
230
|
0
|
|
|
|
|
0
|
for my $diag (@$err) { |
231
|
0
|
|
|
|
|
0
|
my ( $file, $message ) = %$diag; |
232
|
0
|
|
|
|
|
0
|
die("Error creating output directory $message"); |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
} |
235
|
0
|
|
|
|
|
0
|
$self->logger->info( "Output directory created: " . $self->output_directory ); |
236
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
0
|
$self->_original_directory( getcwd() ); |
238
|
0
|
|
|
|
|
0
|
chdir( $self->output_directory ); |
239
|
0
|
|
|
|
|
0
|
return $self; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub run { |
243
|
1
|
|
|
1
|
0
|
4
|
my ($self) = @_; |
244
|
|
|
|
|
|
|
|
245
|
1
|
50
|
33
|
|
|
42
|
return if($self->version || $self->help); |
246
|
|
|
|
|
|
|
|
247
|
1
|
|
|
|
|
7
|
$self->_setup_output_directory; |
248
|
|
|
|
|
|
|
|
249
|
1
|
|
|
|
|
88
|
$self->logger->info("Fixing input GFF files"); |
250
|
1
|
|
|
|
|
71
|
my $reformat_input_files = Bio::Roary::ReformatInputGFFs->new( gff_files => $self->fasta_files, logger => $self->logger ); |
251
|
1
|
|
|
|
|
15
|
$reformat_input_files->fix_duplicate_gene_ids(); |
252
|
1
|
50
|
|
|
|
4
|
if ( @{ $reformat_input_files->fixed_gff_files } == 0 ) { |
|
1
|
|
|
|
|
27
|
|
253
|
0
|
|
|
|
|
0
|
die( |
254
|
|
|
|
|
|
|
"All input files have been excluded from analysis. Please check you have valid GFF files, with annotation and a FASTA sequence at the end. Better still, reannotate your FASTA file with PROKKA." |
255
|
|
|
|
|
|
|
); |
256
|
|
|
|
|
|
|
} |
257
|
1
|
|
|
|
|
39
|
$self->fasta_files( $reformat_input_files->fixed_gff_files ); |
258
|
|
|
|
|
|
|
|
259
|
1
|
|
|
|
|
29
|
$self->logger->info("Extracting proteins from GFF files"); |
260
|
1
|
|
|
|
|
35
|
my $prepare_input_files = Bio::Roary::PrepareInputFiles->new( |
261
|
|
|
|
|
|
|
input_files => $self->fasta_files, |
262
|
|
|
|
|
|
|
job_runner => $self->job_runner, |
263
|
|
|
|
|
|
|
apply_unknowns_filter => $self->apply_unknowns_filter, |
264
|
|
|
|
|
|
|
cpus => $self->cpus, |
265
|
|
|
|
|
|
|
translation_table => $self->translation_table, |
266
|
|
|
|
|
|
|
verbose => $self->verbose, |
267
|
|
|
|
|
|
|
working_directory => $self->_working_directory, |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
); |
270
|
|
|
|
|
|
|
|
271
|
1
|
50
|
|
|
|
37
|
if ( $self->run_qc ) { |
272
|
0
|
|
|
|
|
0
|
$self->logger->info("Running Kraken on each input assembly"); |
273
|
0
|
|
|
|
|
0
|
my $qc_input_files = Bio::Roary::QC::Report->new( |
274
|
|
|
|
|
|
|
input_files => $self->fasta_files, |
275
|
|
|
|
|
|
|
job_runner => $self->job_runner, |
276
|
|
|
|
|
|
|
cpus => $self->cpus, |
277
|
|
|
|
|
|
|
verbose => $self->verbose, |
278
|
|
|
|
|
|
|
kraken_db => $self->kraken_db |
279
|
|
|
|
|
|
|
); |
280
|
0
|
|
|
|
|
0
|
$qc_input_files->report; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
1
|
|
|
|
|
24
|
my $pan_genome_obj = Bio::Roary->new( |
284
|
|
|
|
|
|
|
input_files => $self->fasta_files, |
285
|
|
|
|
|
|
|
fasta_files => $prepare_input_files->fasta_files, |
286
|
|
|
|
|
|
|
output_filename => $self->output_filename, |
287
|
|
|
|
|
|
|
job_runner => $self->job_runner, |
288
|
|
|
|
|
|
|
cpus => $self->cpus, |
289
|
|
|
|
|
|
|
makeblastdb_exec => $self->makeblastdb_exec, |
290
|
|
|
|
|
|
|
blastp_exec => $self->blastp_exec, |
291
|
|
|
|
|
|
|
output_multifasta_files => $self->output_multifasta_files, |
292
|
|
|
|
|
|
|
perc_identity => $self->perc_identity, |
293
|
|
|
|
|
|
|
dont_delete_files => $self->dont_delete_files, |
294
|
|
|
|
|
|
|
dont_create_rplots => $self->dont_create_rplots, |
295
|
|
|
|
|
|
|
dont_split_groups => $self->dont_split_groups, |
296
|
|
|
|
|
|
|
verbose_stats => $self->verbose_stats, |
297
|
|
|
|
|
|
|
translation_table => $self->translation_table, |
298
|
|
|
|
|
|
|
group_limit => $self->group_limit, |
299
|
|
|
|
|
|
|
core_definition => $self->core_definition, |
300
|
|
|
|
|
|
|
verbose => $self->verbose, |
301
|
|
|
|
|
|
|
mafft => $self->mafft, |
302
|
|
|
|
|
|
|
allow_paralogs => $self->allow_paralogs, |
303
|
|
|
|
|
|
|
inflation_value => $self->inflation_value, |
304
|
|
|
|
|
|
|
); |
305
|
1
|
|
|
|
|
17
|
$pan_genome_obj->run(); |
306
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
chdir( $self->_original_directory ); |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub _version { |
311
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
312
|
0
|
0
|
|
|
|
|
if ( defined($Bio::Roary::CommandLine::Roary::VERSION) ) { |
313
|
0
|
|
|
|
|
|
return $Bio::Roary::CommandLine::Roary::VERSION . "\n"; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
else { |
316
|
0
|
|
|
|
|
|
return "x.y.z\n"; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub usage_text { |
321
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
return <<USAGE; |
324
|
|
|
|
|
|
|
Usage: roary [options] *.gff |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Options: -p INT number of threads [1] |
327
|
|
|
|
|
|
|
-o STR clusters output filename [clustered_proteins] |
328
|
|
|
|
|
|
|
-f STR output directory [.] |
329
|
|
|
|
|
|
|
-e create a multiFASTA alignment of core genes using PRANK |
330
|
|
|
|
|
|
|
-n fast core gene alignment with MAFFT, use with -e |
331
|
|
|
|
|
|
|
-i minimum percentage identity for blastp [95] |
332
|
|
|
|
|
|
|
-cd FLOAT percentage of isolates a gene must be in to be core [99] |
333
|
|
|
|
|
|
|
-qc generate QC report with Kraken |
334
|
|
|
|
|
|
|
-k STR path to Kraken database for QC, use with -qc |
335
|
|
|
|
|
|
|
-a check dependancies and print versions |
336
|
|
|
|
|
|
|
-b STR blastp executable [blastp] |
337
|
|
|
|
|
|
|
-c STR mcl executable [mcl] |
338
|
|
|
|
|
|
|
-d STR mcxdeblast executable [mcxdeblast] |
339
|
|
|
|
|
|
|
-g INT maximum number of clusters [50000] |
340
|
|
|
|
|
|
|
-m STR makeblastdb executable [makeblastdb] |
341
|
|
|
|
|
|
|
-r create R plots, requires R and ggplot2 |
342
|
|
|
|
|
|
|
-s dont split paralogs |
343
|
|
|
|
|
|
|
-t INT translation table [11] |
344
|
|
|
|
|
|
|
-ap allow paralogs in core alignment |
345
|
|
|
|
|
|
|
-z dont delete intermediate files |
346
|
|
|
|
|
|
|
-v verbose output to STDOUT |
347
|
|
|
|
|
|
|
-w print version and exit |
348
|
|
|
|
|
|
|
-y add gene inference information to spreadsheet, doesnt work with -e |
349
|
|
|
|
|
|
|
-iv STR Change the MCL inflation value [1.5] |
350
|
|
|
|
|
|
|
-h this help message |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Example: Quickly generate a core gene alignment using 8 threads |
353
|
|
|
|
|
|
|
roary -e --mafft -p 8 *.gff |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
For further info see: http://sanger-pathogens.github.io/Roary/ |
356
|
|
|
|
|
|
|
USAGE |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
360
|
1
|
|
|
1
|
|
8
|
no Moose; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
8
|
|
361
|
|
|
|
|
|
|
1; |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
__END__ |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=pod |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=encoding UTF-8 |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head1 NAME |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Bio::Roary::CommandLine::Roary - Take in FASTA files of proteins and cluster them |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 VERSION |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
version 3.10.2 |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head1 SYNOPSIS |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Take in FASTA files of proteins and cluster them |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head1 AUTHOR |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
This is free software, licensed under: |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=cut |