| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
undef $VERSION; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Bio::Roary::CommandLine::Roary; |
|
4
|
|
|
|
|
|
|
$Bio::Roary::CommandLine::Roary::VERSION = '3.10.1'; |
|
5
|
|
|
|
|
|
|
# ABSTRACT: Take in FASTA files of proteins and cluster them |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
783
|
use Moose; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
7
|
|
|
9
|
1
|
|
|
1
|
|
6602
|
use Getopt::Long qw(GetOptionsFromArray); |
|
|
1
|
|
|
|
|
8248
|
|
|
|
1
|
|
|
|
|
4
|
|
|
10
|
1
|
|
|
1
|
|
500
|
use Bio::Roary; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
43
|
|
|
11
|
1
|
|
|
1
|
|
414
|
use Bio::Roary::PrepareInputFiles; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
42
|
|
|
12
|
1
|
|
|
1
|
|
452
|
use Bio::Roary::QC::Report; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
39
|
|
|
13
|
1
|
|
|
1
|
|
447
|
use Bio::Roary::ReformatInputGFFs; |
|
|
1
|
|
|
|
|
12
|
|
|
|
1
|
|
|
|
|
86
|
|
|
14
|
1
|
|
|
1
|
|
439
|
use Bio::Roary::External::CheckTools; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
34
|
|
|
15
|
1
|
|
|
1
|
|
7
|
use File::Which; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
62
|
|
|
16
|
1
|
|
|
1
|
|
5
|
use File::Path qw(make_path); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
40
|
|
|
17
|
1
|
|
|
1
|
|
6
|
use Cwd qw(abs_path getcwd); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
37
|
|
|
18
|
1
|
|
|
1
|
|
5
|
use File::Temp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1446
|
|
|
19
|
|
|
|
|
|
|
extends 'Bio::Roary::CommandLine::Common'; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); |
|
22
|
|
|
|
|
|
|
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 ); |
|
23
|
|
|
|
|
|
|
has 'help' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); |
|
26
|
|
|
|
|
|
|
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' ); |
|
27
|
|
|
|
|
|
|
has 'output_directory' => ( is => 'rw', isa => 'Str', default => '.' ); |
|
28
|
|
|
|
|
|
|
has '_original_directory' => ( is => 'rw', isa => 'Str', default => '.' ); |
|
29
|
|
|
|
|
|
|
has 'job_runner' => ( is => 'rw', isa => 'Str', default => 'Local' ); |
|
30
|
|
|
|
|
|
|
has 'makeblastdb_exec' => ( is => 'rw', isa => 'Str', default => 'makeblastdb' ); |
|
31
|
|
|
|
|
|
|
has 'blastp_exec' => ( is => 'rw', isa => 'Str', default => 'blastp' ); |
|
32
|
|
|
|
|
|
|
has 'mcxdeblast_exec' => ( is => 'rw', isa => 'Str', default => 'mcxdeblast' ); |
|
33
|
|
|
|
|
|
|
has 'mcl_exec' => ( is => 'rw', isa => 'Str', default => 'mcl' ); |
|
34
|
|
|
|
|
|
|
has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 ); |
|
35
|
|
|
|
|
|
|
has 'cpus' => ( is => 'rw', isa => 'Int', default => 1 ); |
|
36
|
|
|
|
|
|
|
has 'output_multifasta_files' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
37
|
|
|
|
|
|
|
has 'perc_identity' => ( is => 'rw', isa => 'Num', default => 95 ); |
|
38
|
|
|
|
|
|
|
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
39
|
|
|
|
|
|
|
has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 1 ); |
|
40
|
|
|
|
|
|
|
has 'dont_run_qc' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
41
|
|
|
|
|
|
|
has 'dont_split_groups' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
42
|
|
|
|
|
|
|
has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
43
|
|
|
|
|
|
|
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 ); |
|
44
|
|
|
|
|
|
|
has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
45
|
|
|
|
|
|
|
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
46
|
|
|
|
|
|
|
has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 ); |
|
47
|
|
|
|
|
|
|
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 ); |
|
48
|
|
|
|
|
|
|
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
49
|
|
|
|
|
|
|
has 'kraken_db' => ( is => 'rw', isa => 'Str', default => '/lustre/scratch118/infgen/pathogen/pathpipe/kraken/minikraken_20140330/' ); |
|
50
|
|
|
|
|
|
|
has 'run_qc' => ( is => 'rw', isa => 'Bool', default => 0 ); |
|
51
|
|
|
|
|
|
|
has '_working_directory' => ( is => 'rw', isa => 'File::Temp::Dir', lazy => 1, builder => '_build__working_directory' ); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
has 'inflation_value' => ( is => 'rw', isa => 'Num', default => 1.5 ); |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub _build__working_directory |
|
56
|
|
|
|
|
|
|
{ |
|
57
|
1
|
|
|
1
|
|
3
|
my ($self) = @_; |
|
58
|
1
|
|
|
|
|
25
|
return File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub BUILD { |
|
62
|
1
|
|
|
1
|
0
|
4
|
my ($self) = @_; |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my ( |
|
65
|
1
|
|
|
|
|
4
|
$fasta_files, $verbose, $create_rplots, $group_limit, $dont_run_qc, |
|
66
|
|
|
|
|
|
|
$max_threads, $dont_delete_files, $dont_split_groups, $perc_identity, $output_filename, |
|
67
|
|
|
|
|
|
|
$job_runner, $makeblastdb_exec, $mcxdeblast_exec, $mcl_exec, $blastp_exec, |
|
68
|
|
|
|
|
|
|
$apply_unknowns_filter, $cpus, $output_multifasta_files, $verbose_stats, $translation_table, |
|
69
|
|
|
|
|
|
|
$run_qc, $core_definition, $help, $kraken_db, $cmd_version, |
|
70
|
|
|
|
|
|
|
$mafft, $output_directory, $check_dependancies, $inflation_value, $allow_paralogs, |
|
71
|
|
|
|
|
|
|
); |
|
72
|
|
|
|
|
|
|
|
|
73
|
1
|
|
|
|
|
38
|
GetOptionsFromArray( |
|
74
|
|
|
|
|
|
|
$self->args, |
|
75
|
|
|
|
|
|
|
'o|output=s' => \$output_filename, |
|
76
|
|
|
|
|
|
|
'f|output_directory=s' => \$output_directory, |
|
77
|
|
|
|
|
|
|
'j|job_runner=s' => \$job_runner, |
|
78
|
|
|
|
|
|
|
'm|makeblastdb_exec=s' => \$makeblastdb_exec, |
|
79
|
|
|
|
|
|
|
'b|blastp_exec=s' => \$blastp_exec, |
|
80
|
|
|
|
|
|
|
'd|mcxdeblast_exec=s' => \$mcxdeblast_exec, |
|
81
|
|
|
|
|
|
|
'c|mcl_exec=s' => \$mcl_exec, |
|
82
|
|
|
|
|
|
|
'p|processors=i' => \$cpus, |
|
83
|
|
|
|
|
|
|
'u|apply_unknowns_filter=i' => \$apply_unknowns_filter, |
|
84
|
|
|
|
|
|
|
'e|output_multifasta_files' => \$output_multifasta_files, |
|
85
|
|
|
|
|
|
|
'i|perc_identity=i' => \$perc_identity, |
|
86
|
|
|
|
|
|
|
'z|dont_delete_files' => \$dont_delete_files, |
|
87
|
|
|
|
|
|
|
's|dont_split_groups' => \$dont_split_groups, |
|
88
|
|
|
|
|
|
|
'r|create_rplots' => \$create_rplots, |
|
89
|
|
|
|
|
|
|
'y|verbose_stats' => \$verbose_stats, |
|
90
|
|
|
|
|
|
|
't|translation_table=i' => \$translation_table, |
|
91
|
|
|
|
|
|
|
'g|group_limit=i' => \$group_limit, |
|
92
|
|
|
|
|
|
|
'qc|run_qc' => \$run_qc, |
|
93
|
|
|
|
|
|
|
'x|dont_run_qc' => \$dont_run_qc, |
|
94
|
|
|
|
|
|
|
'cd|core_definition=f' => \$core_definition, |
|
95
|
|
|
|
|
|
|
'v|verbose' => \$verbose, |
|
96
|
|
|
|
|
|
|
'n|mafft' => \$mafft, |
|
97
|
|
|
|
|
|
|
'ap|allow_paralogs' => \$allow_paralogs, |
|
98
|
|
|
|
|
|
|
'k|kraken_db=s' => \$kraken_db, |
|
99
|
|
|
|
|
|
|
'w|version' => \$cmd_version, |
|
100
|
|
|
|
|
|
|
'a|check_dependancies' => \$check_dependancies, |
|
101
|
|
|
|
|
|
|
'iv|inflation_value=f' => \$inflation_value, |
|
102
|
|
|
|
|
|
|
'h|help' => \$help, |
|
103
|
|
|
|
|
|
|
); |
|
104
|
|
|
|
|
|
|
|
|
105
|
1
|
50
|
|
|
|
2813
|
$self->version($cmd_version) if ( defined($cmd_version) ); |
|
106
|
1
|
50
|
|
|
|
46
|
if ( $self->version ) { |
|
107
|
0
|
|
|
|
|
0
|
print $self->_version() ; |
|
108
|
0
|
|
|
|
|
0
|
return; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
1
|
|
|
|
|
16
|
print "\nPlease cite Roary if you use any of the results it produces: |
|
112
|
|
|
|
|
|
|
Andrew J. Page, Carla A. Cummins, Martin Hunt, Vanessa K. Wong, Sandra Reuter, Matthew T. G. Holden, Maria Fookes, Daniel Falush, Jacqueline A. Keane, Julian Parkhill, |
|
113
|
|
|
|
|
|
|
\"Roary: Rapid large-scale prokaryote pan genome analysis\", Bioinformatics, 2015 Nov 15;31(22):3691-3693 |
|
114
|
|
|
|
|
|
|
doi: http://doi.org/10.1093/bioinformatics/btv421 |
|
115
|
|
|
|
|
|
|
Pubmed: 26198102\n\n"; |
|
116
|
|
|
|
|
|
|
|
|
117
|
1
|
50
|
|
|
|
3
|
$self->help($help) if ( defined($help) ); |
|
118
|
1
|
50
|
|
|
|
32
|
if( $self->help ) |
|
119
|
|
|
|
|
|
|
{ |
|
120
|
0
|
|
|
|
|
0
|
print $self->usage_text; |
|
121
|
0
|
|
|
|
|
0
|
return; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
1
|
50
|
|
|
|
4
|
if ($check_dependancies) { |
|
125
|
0
|
|
|
|
|
0
|
my $check_tools = Bio::Roary::External::CheckTools->new(); |
|
126
|
0
|
|
|
|
|
0
|
$check_tools->check_all_tools; |
|
127
|
0
|
|
|
|
|
0
|
$self->logger->error( "Roary version " . $self->_version() ); |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
1
|
50
|
|
|
|
5
|
if ( defined($verbose) ) { |
|
131
|
0
|
|
|
|
|
0
|
$self->verbose($verbose); |
|
132
|
0
|
|
|
|
|
0
|
$self->logger->level(10000); |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
1
|
50
|
|
|
|
2
|
if ( @{ $self->args } < 2 ) { |
|
|
1
|
|
|
|
|
25
|
|
|
136
|
0
|
|
|
|
|
0
|
$self->logger->error("Error: You need to provide at least 2 files to build a pan genome"); |
|
137
|
0
|
|
|
|
|
0
|
die $self->usage_text; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
1
|
50
|
|
|
|
3
|
$self->output_filename($output_filename) if ( defined($output_filename) ); |
|
140
|
1
|
50
|
|
|
|
29
|
$self->job_runner($job_runner) if ( defined($job_runner) ); |
|
141
|
1
|
50
|
|
|
|
3
|
$self->makeblastdb_exec($makeblastdb_exec) if ( defined($makeblastdb_exec) ); |
|
142
|
1
|
50
|
|
|
|
3
|
$self->blastp_exec($blastp_exec) if ( defined($blastp_exec) ); |
|
143
|
1
|
50
|
|
|
|
2
|
$self->mcxdeblast_exec($mcxdeblast_exec) if ( defined($mcxdeblast_exec) ); |
|
144
|
1
|
50
|
|
|
|
4
|
$self->mcl_exec($mcl_exec) if ( defined($mcl_exec) ); |
|
145
|
1
|
50
|
|
|
|
3
|
$self->cpus($cpus) if ( defined($cpus) ); |
|
146
|
1
|
50
|
|
|
|
2
|
$self->inflation_value($inflation_value) if ( defined($inflation_value)); |
|
147
|
|
|
|
|
|
|
|
|
148
|
1
|
50
|
|
|
|
6
|
if ( defined($perc_identity) ) { |
|
149
|
0
|
|
|
|
|
0
|
$self->perc_identity($perc_identity); |
|
150
|
0
|
0
|
|
|
|
0
|
if ( $perc_identity < 50 ) { |
|
151
|
0
|
|
|
|
|
0
|
$self->logger->error( |
|
152
|
|
|
|
|
|
|
"The percentage identity is too low. Either something is wrong with your data, like contamination, or your doing something that the software isnt designed to support." |
|
153
|
|
|
|
|
|
|
); |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
1
|
50
|
|
|
|
3
|
$self->mafft($mafft) if ( defined($mafft) ); |
|
158
|
1
|
50
|
|
|
|
3
|
$self->apply_unknowns_filter($apply_unknowns_filter) |
|
159
|
|
|
|
|
|
|
if ( defined($apply_unknowns_filter) ); |
|
160
|
|
|
|
|
|
|
|
|
161
|
1
|
50
|
|
|
|
3
|
if ( defined($output_multifasta_files) ) { |
|
162
|
0
|
0
|
|
|
|
0
|
if ( which('prank') ) { |
|
163
|
0
|
|
|
|
|
0
|
$self->output_multifasta_files($output_multifasta_files); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
else { |
|
166
|
|
|
|
|
|
|
|
|
167
|
0
|
0
|
|
|
|
0
|
if ( which('mafft') ) { |
|
168
|
0
|
|
|
|
|
0
|
$self->output_multifasta_files($output_multifasta_files); |
|
169
|
0
|
|
|
|
|
0
|
$self->mafft(1); |
|
170
|
0
|
|
|
|
|
0
|
$self->logger->warn("PRANK not found in your PATH so using MAFFT instead to generate multiFASTA alignments."); |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
else { |
|
173
|
0
|
|
|
|
|
0
|
$self->logger->warn("PRANK (or MAFFT) not found in your PATH so cannot generate multiFASTA alignments, skipping for now."); |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
} |
|
177
|
1
|
50
|
|
|
|
3
|
$self->dont_delete_files($dont_delete_files) if ( defined($dont_delete_files) ); |
|
178
|
1
|
50
|
|
|
|
32
|
$self->dont_split_groups($dont_split_groups) if ( defined($dont_split_groups) ); |
|
179
|
1
|
50
|
|
|
|
3
|
$self->dont_create_rplots(0) if ( defined($create_rplots) ); |
|
180
|
1
|
50
|
|
|
|
3
|
$self->verbose_stats($verbose_stats) if ( defined $verbose_stats ); |
|
181
|
1
|
50
|
|
|
|
34
|
$self->translation_table($translation_table) if ( defined($translation_table) ); |
|
182
|
1
|
50
|
|
|
|
4
|
$self->group_limit($group_limit) if ( defined($group_limit) ); |
|
183
|
1
|
50
|
|
|
|
2
|
$self->kraken_db($kraken_db) if ( defined($kraken_db) ); |
|
184
|
1
|
50
|
|
|
|
2
|
$self->output_directory($output_directory) if ( defined($output_directory) ); |
|
185
|
|
|
|
|
|
|
|
|
186
|
1
|
50
|
33
|
|
|
4
|
if ( defined $verbose_stats && defined($output_multifasta_files) ) { |
|
187
|
0
|
|
|
|
|
0
|
$self->verbose_stats(0); |
|
188
|
0
|
|
|
|
|
0
|
$self->logger->warn("The verbose stats spreadsheet is not compatible with the core gene alignement so disabling verbose_stats"); |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
1
|
50
|
|
|
|
3
|
if ( defined($run_qc) ) { |
|
192
|
0
|
0
|
0
|
|
|
0
|
if ( which('kraken') && which('kraken-report') ) { |
|
193
|
0
|
|
|
|
|
0
|
$self->run_qc($run_qc); |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
else { |
|
196
|
0
|
|
|
|
|
0
|
$self->logger->warn("kraken or kraken-report not found in your PATH so cannot run QC, skipping for now."); |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
1
|
50
|
|
|
|
27
|
if ( $self->cpus > 1 ) { |
|
201
|
0
|
|
|
|
|
0
|
$self->job_runner('Parallel'); |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
1
|
50
|
|
|
|
3
|
$self->core_definition( $core_definition / 100 ) if ( defined($core_definition) ); |
|
205
|
|
|
|
|
|
|
|
|
206
|
1
|
|
|
|
|
3
|
for my $filename ( @{ $self->args } ) { |
|
|
1
|
|
|
|
|
22
|
|
|
207
|
3
|
50
|
|
|
|
40
|
if ( !-e $filename ) { |
|
208
|
0
|
|
|
|
|
0
|
$self->logger->error("Error: Cant access file $filename"); |
|
209
|
0
|
|
|
|
|
0
|
die $self->usage_text; |
|
210
|
|
|
|
|
|
|
} |
|
211
|
3
|
|
|
|
|
5
|
push( @{ $self->fasta_files }, abs_path($filename) ); |
|
|
3
|
|
|
|
|
76
|
|
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
|
|
214
|
1
|
50
|
|
|
|
29
|
$self->_working_directory( File::Temp->newdir( DIR => getcwd, CLEANUP => 0 ) ) if ( $self->dont_delete_files ); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub _setup_output_directory { |
|
218
|
1
|
|
|
1
|
|
3
|
my ($self) = @_; |
|
219
|
1
|
50
|
33
|
|
|
34
|
return if ( $self->output_directory eq '.' || $self->output_directory eq '' ); |
|
220
|
|
|
|
|
|
|
|
|
221
|
0
|
0
|
0
|
|
|
0
|
if ( -e $self->output_directory || -d $self->output_directory ) { |
|
222
|
0
|
|
|
|
|
0
|
$self->logger->warn("Output directory name exists already so adding a timestamp to the end"); |
|
223
|
0
|
|
|
|
|
0
|
$self->output_directory( $self->output_directory() . '_' . time() ); |
|
224
|
0
|
0
|
0
|
|
|
0
|
if ( -e $self->output_directory || -d $self->output_directory ) { |
|
225
|
0
|
|
|
|
|
0
|
die("Output directory name with time stamp exist so giving up"); |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
} |
|
228
|
0
|
|
|
|
|
0
|
make_path( $self->output_directory, { error => \my $err } ); |
|
229
|
0
|
0
|
|
|
|
0
|
if (@$err) { |
|
230
|
0
|
|
|
|
|
0
|
for my $diag (@$err) { |
|
231
|
0
|
|
|
|
|
0
|
my ( $file, $message ) = %$diag; |
|
232
|
0
|
|
|
|
|
0
|
die("Error creating output directory $message"); |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
} |
|
235
|
0
|
|
|
|
|
0
|
$self->logger->info( "Output directory created: " . $self->output_directory ); |
|
236
|
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
0
|
$self->_original_directory( getcwd() ); |
|
238
|
0
|
|
|
|
|
0
|
chdir( $self->output_directory ); |
|
239
|
0
|
|
|
|
|
0
|
return $self; |
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub run { |
|
243
|
1
|
|
|
1
|
0
|
4
|
my ($self) = @_; |
|
244
|
|
|
|
|
|
|
|
|
245
|
1
|
50
|
33
|
|
|
22
|
return if($self->version || $self->help); |
|
246
|
|
|
|
|
|
|
|
|
247
|
1
|
|
|
|
|
5
|
$self->_setup_output_directory; |
|
248
|
|
|
|
|
|
|
|
|
249
|
1
|
|
|
|
|
42
|
$self->logger->info("Fixing input GFF files"); |
|
250
|
1
|
|
|
|
|
39
|
my $reformat_input_files = Bio::Roary::ReformatInputGFFs->new( gff_files => $self->fasta_files, logger => $self->logger ); |
|
251
|
1
|
|
|
|
|
9
|
$reformat_input_files->fix_duplicate_gene_ids(); |
|
252
|
1
|
50
|
|
|
|
2
|
if ( @{ $reformat_input_files->fixed_gff_files } == 0 ) { |
|
|
1
|
|
|
|
|
27
|
|
|
253
|
0
|
|
|
|
|
0
|
die( |
|
254
|
|
|
|
|
|
|
"All input files have been excluded from analysis. Please check you have valid GFF files, with annotation and a FASTA sequence at the end. Better still, reannotate your FASTA file with PROKKA." |
|
255
|
|
|
|
|
|
|
); |
|
256
|
|
|
|
|
|
|
} |
|
257
|
1
|
|
|
|
|
23
|
$self->fasta_files( $reformat_input_files->fixed_gff_files ); |
|
258
|
|
|
|
|
|
|
|
|
259
|
1
|
|
|
|
|
27
|
$self->logger->info("Extracting proteins from GFF files"); |
|
260
|
1
|
|
|
|
|
34
|
my $prepare_input_files = Bio::Roary::PrepareInputFiles->new( |
|
261
|
|
|
|
|
|
|
input_files => $self->fasta_files, |
|
262
|
|
|
|
|
|
|
job_runner => $self->job_runner, |
|
263
|
|
|
|
|
|
|
apply_unknowns_filter => $self->apply_unknowns_filter, |
|
264
|
|
|
|
|
|
|
cpus => $self->cpus, |
|
265
|
|
|
|
|
|
|
translation_table => $self->translation_table, |
|
266
|
|
|
|
|
|
|
verbose => $self->verbose, |
|
267
|
|
|
|
|
|
|
working_directory => $self->_working_directory, |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
); |
|
270
|
|
|
|
|
|
|
|
|
271
|
1
|
50
|
|
|
|
35
|
if ( $self->run_qc ) { |
|
272
|
0
|
|
|
|
|
0
|
$self->logger->info("Running Kraken on each input assembly"); |
|
273
|
0
|
|
|
|
|
0
|
my $qc_input_files = Bio::Roary::QC::Report->new( |
|
274
|
|
|
|
|
|
|
input_files => $self->fasta_files, |
|
275
|
|
|
|
|
|
|
job_runner => $self->job_runner, |
|
276
|
|
|
|
|
|
|
cpus => $self->cpus, |
|
277
|
|
|
|
|
|
|
verbose => $self->verbose, |
|
278
|
|
|
|
|
|
|
kraken_db => $self->kraken_db |
|
279
|
|
|
|
|
|
|
); |
|
280
|
0
|
|
|
|
|
0
|
$qc_input_files->report; |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
|
|
283
|
1
|
|
|
|
|
25
|
my $pan_genome_obj = Bio::Roary->new( |
|
284
|
|
|
|
|
|
|
input_files => $self->fasta_files, |
|
285
|
|
|
|
|
|
|
fasta_files => $prepare_input_files->fasta_files, |
|
286
|
|
|
|
|
|
|
output_filename => $self->output_filename, |
|
287
|
|
|
|
|
|
|
job_runner => $self->job_runner, |
|
288
|
|
|
|
|
|
|
cpus => $self->cpus, |
|
289
|
|
|
|
|
|
|
makeblastdb_exec => $self->makeblastdb_exec, |
|
290
|
|
|
|
|
|
|
blastp_exec => $self->blastp_exec, |
|
291
|
|
|
|
|
|
|
output_multifasta_files => $self->output_multifasta_files, |
|
292
|
|
|
|
|
|
|
perc_identity => $self->perc_identity, |
|
293
|
|
|
|
|
|
|
dont_delete_files => $self->dont_delete_files, |
|
294
|
|
|
|
|
|
|
dont_create_rplots => $self->dont_create_rplots, |
|
295
|
|
|
|
|
|
|
dont_split_groups => $self->dont_split_groups, |
|
296
|
|
|
|
|
|
|
verbose_stats => $self->verbose_stats, |
|
297
|
|
|
|
|
|
|
translation_table => $self->translation_table, |
|
298
|
|
|
|
|
|
|
group_limit => $self->group_limit, |
|
299
|
|
|
|
|
|
|
core_definition => $self->core_definition, |
|
300
|
|
|
|
|
|
|
verbose => $self->verbose, |
|
301
|
|
|
|
|
|
|
mafft => $self->mafft, |
|
302
|
|
|
|
|
|
|
allow_paralogs => $self->allow_paralogs, |
|
303
|
|
|
|
|
|
|
inflation_value => $self->inflation_value, |
|
304
|
|
|
|
|
|
|
); |
|
305
|
1
|
|
|
|
|
11
|
$pan_genome_obj->run(); |
|
306
|
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
chdir( $self->_original_directory ); |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub _version { |
|
311
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
312
|
0
|
0
|
|
|
|
|
if ( defined($Bio::Roary::CommandLine::Roary::VERSION) ) { |
|
313
|
0
|
|
|
|
|
|
return $Bio::Roary::CommandLine::Roary::VERSION . "\n"; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
else { |
|
316
|
0
|
|
|
|
|
|
return "x.y.z\n"; |
|
317
|
|
|
|
|
|
|
} |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub usage_text { |
|
321
|
0
|
|
|
0
|
0
|
|
my ($self) = @_; |
|
322
|
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
return <<USAGE; |
|
324
|
|
|
|
|
|
|
Usage: roary [options] *.gff |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Options: -p INT number of threads [1] |
|
327
|
|
|
|
|
|
|
-o STR clusters output filename [clustered_proteins] |
|
328
|
|
|
|
|
|
|
-f STR output directory [.] |
|
329
|
|
|
|
|
|
|
-e create a multiFASTA alignment of core genes using PRANK |
|
330
|
|
|
|
|
|
|
-n fast core gene alignment with MAFFT, use with -e |
|
331
|
|
|
|
|
|
|
-i minimum percentage identity for blastp [95] |
|
332
|
|
|
|
|
|
|
-cd FLOAT percentage of isolates a gene must be in to be core [99] |
|
333
|
|
|
|
|
|
|
-qc generate QC report with Kraken |
|
334
|
|
|
|
|
|
|
-k STR path to Kraken database for QC, use with -qc |
|
335
|
|
|
|
|
|
|
-a check dependancies and print versions |
|
336
|
|
|
|
|
|
|
-b STR blastp executable [blastp] |
|
337
|
|
|
|
|
|
|
-c STR mcl executable [mcl] |
|
338
|
|
|
|
|
|
|
-d STR mcxdeblast executable [mcxdeblast] |
|
339
|
|
|
|
|
|
|
-g INT maximum number of clusters [50000] |
|
340
|
|
|
|
|
|
|
-m STR makeblastdb executable [makeblastdb] |
|
341
|
|
|
|
|
|
|
-r create R plots, requires R and ggplot2 |
|
342
|
|
|
|
|
|
|
-s dont split paralogs |
|
343
|
|
|
|
|
|
|
-t INT translation table [11] |
|
344
|
|
|
|
|
|
|
-ap allow paralogs in core alignment |
|
345
|
|
|
|
|
|
|
-z dont delete intermediate files |
|
346
|
|
|
|
|
|
|
-v verbose output to STDOUT |
|
347
|
|
|
|
|
|
|
-w print version and exit |
|
348
|
|
|
|
|
|
|
-y add gene inference information to spreadsheet, doesnt work with -e |
|
349
|
|
|
|
|
|
|
-iv STR Change the MCL inflation value [1.5] |
|
350
|
|
|
|
|
|
|
-h this help message |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
Example: Quickly generate a core gene alignment using 8 threads |
|
353
|
|
|
|
|
|
|
roary -e --mafft -p 8 *.gff |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
For further info see: http://sanger-pathogens.github.io/Roary/ |
|
356
|
|
|
|
|
|
|
USAGE |
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
|
360
|
1
|
|
|
1
|
|
7
|
no Moose; |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
6
|
|
|
361
|
|
|
|
|
|
|
1; |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
__END__ |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=pod |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=encoding UTF-8 |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head1 NAME |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Bio::Roary::CommandLine::Roary - Take in FASTA files of proteins and cluster them |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=head1 VERSION |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
version 3.10.1 |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Take in FASTA files of proteins and cluster them |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head1 AUTHOR |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
This is free software, licensed under: |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=cut |