File Coverage

lib/Bio/Roary/PostAnalysis.pm
Criterion Covered Total %
statement 54 152 35.5
branch 0 18 0.0
condition 0 6 0.0
subroutine 18 35 51.4
pod 0 1 0.0
total 72 212 33.9


line stmt bran cond sub pod time code
1             package Bio::Roary::PostAnalysis;
2             $Bio::Roary::PostAnalysis::VERSION = '3.10.1';
3             # ABSTRACT: Post analysis of pan genomes
4              
5              
6 1     1   6 use Moose;
  1         2  
  1         9  
7 1     1   7251 use File::Copy;
  1         1950  
  1         80  
8 1     1   256 use Bio::Roary::InflateClusters;
  1         7  
  1         109  
9 1     1   557 use Bio::Roary::AnalyseGroups;
  1         4  
  1         46  
10 1     1   422 use Bio::Roary::GroupLabels;
  1         3  
  1         50  
11 1     1   569 use Bio::Roary::AnnotateGroups;
  1         5  
  1         71  
12 1     1   587 use Bio::Roary::GroupStatistics;
  1         6  
  1         70  
13 1     1   458 use Bio::Roary::Output::GroupsMultifastasNucleotide;
  1         5  
  1         50  
14 1     1   493 use Bio::Roary::Output::NumberOfGroups;
  1         3  
  1         51  
15 1     1   431 use Bio::Roary::OrderGenes;
  1         4  
  1         50  
16 1     1   564 use Bio::Roary::Output::EmblGroups;
  1         4  
  1         52  
17 1     1   434 use Bio::Roary::SplitGroups;
  1         5  
  1         50  
18 1     1   411 use Bio::Roary::AccessoryBinaryFasta;
  1         8  
  1         55  
19 1     1   514 use Bio::Roary::External::Fasttree;
  1         7  
  1         68  
20 1     1   579 use Bio::Roary::AccessoryClustering;
  1         6  
  1         79  
21 1     1   673 use Bio::Roary::AssemblyStatistics;
  1         7  
  1         93  
22 1     1   16 use Log::Log4perl qw(:easy);
  1         4  
  1         16  
23              
24             has 'fasta_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
25             has 'input_files' => ( is => 'rw', isa => 'ArrayRef', required => 1 );
26             has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'clustered_proteins' );
27             has 'output_pan_geneome_filename' => ( is => 'rw', isa => 'Str', default => 'pan_genome.fa' );
28             has 'output_statistics_filename' => ( is => 'rw', isa => 'Str', default => 'gene_presence_absence.csv' );
29             has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );
30             has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
31             has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
32             has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
33              
34             has 'clusters_filename' => ( is => 'rw', isa => 'Str', required => 1 );
35             has 'dont_delete_files' => ( is => 'ro', isa => 'Bool', default => 0 );
36             has 'dont_split_groups' => ( is => 'ro', isa => 'Bool', default => 0 );
37             has 'dont_create_rplots' => ( is => 'rw', isa => 'Bool', default => 1 );
38             has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 );
39              
40             has '_output_mcl_filename' => ( is => 'ro', isa => 'Str', default => '_uninflated_mcl_groups' );
41             has '_output_inflate_unsplit_clusters_filename' => ( is => 'ro', isa => 'Str', default => '_inflated_unsplit_mcl_groups' );
42             has '_output_inflate_clusters_filename' => ( is => 'ro', isa => 'Str', default => '_inflated_mcl_groups' );
43             has '_output_group_labels_filename' => ( is => 'ro', isa => 'Str', default => '_labeled_mcl_groups' );
44             has '_output_combined_filename' => ( is => 'ro', isa => 'Str', default => '_combined_files' );
45             has '_input_cd_hit_groups_file' => ( is => 'ro', isa => 'Str', default => '_combined_files.groups' );
46             has 'core_accessory_tab_output_filename' => ( is => 'ro', isa => 'Str', default => 'core_accessory.tab' );
47             has 'accessory_tab_output_filename' => ( is => 'ro', isa => 'Str', default => 'accessory.tab' );
48             has 'core_accessory_ordering_key' => ( is => 'ro', isa => 'Str', default => 'core_accessory_overall_order_filtered' );
49             has 'accessory_ordering_key' => ( is => 'ro', isa => 'Str', default => 'accessory_overall_order_filtered' );
50             has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
51             has 'pan_genome_reference_filename' => ( is => 'ro', isa => 'Str', default => 'pan_genome_reference.fa' );
52              
53             has '_inflate_clusters_obj' => ( is => 'ro', isa => 'Bio::Roary::InflateClusters', lazy => 1, builder => '_build__inflate_clusters_obj' );
54             has '_group_labels_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupLabels', lazy => 1, builder => '_build__group_labels_obj' );
55             has '_annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', lazy => 1, builder => '_build__annotate_groups_obj' );
56             has '_analyse_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', lazy => 1, builder => '_build__analyse_groups_obj' );
57             has '_order_genes_obj' => ( is => 'ro', isa => 'Bio::Roary::OrderGenes', lazy => 1, builder => '_build__order_genes_obj' );
58             has '_group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', lazy => 1, builder => '_build__group_statistics_obj' );
59             has '_number_of_groups_obj' =>
60             ( is => 'ro', isa => 'Bio::Roary::Output::NumberOfGroups', lazy => 1, builder => '_build__number_of_groups_obj' );
61             has '_accessory_binary_fasta' =>
62             ( is => 'ro', isa => 'Bio::Roary::AccessoryBinaryFasta', lazy => 1, builder => '_build__accessory_binary_fasta' );
63             has '_groups_multifastas_nuc_obj' =>
64             ( is => 'ro', isa => 'Bio::Roary::Output::GroupsMultifastasNucleotide', lazy => 1, builder => '_build__groups_multifastas_nuc_obj' );
65             has '_split_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::SplitGroups', lazy => 1, builder => '_build__split_groups_obj' );
66             has '_accessory_binary_tree' =>
67             ( is => 'ro', isa => 'Bio::Roary::External::Fasttree', lazy => 1, builder => '_build__accessory_binary_tree' );
68             has '_accessory_clustering' =>
69             ( is => 'ro', isa => 'Maybe[Bio::Roary::AccessoryClustering]', lazy => 1, builder => '_build__accessory_clustering' );
70             has '_assembly_statistics' => ( is => 'ro', isa => 'Bio::Roary::AssemblyStatistics', lazy => 1, builder => '_build__assembly_statistics' );
71              
72             has 'logger' => ( is => 'ro', lazy => 1, builder => '_build_logger' );
73              
74             sub _build_logger {
75 0     0     my ($self) = @_;
76 0           Log::Log4perl->easy_init( level => $ERROR );
77 0           my $logger = get_logger();
78 0           return $logger;
79             }
80              
81             sub run {
82 0     0 0   my ($self) = @_;
83              
84 0           $self->logger->info("Reinflate clusters");
85 0           $self->_inflate_clusters_obj->inflate();
86              
87 0           $self->logger->info("Split groups with paralogs");
88             ## SPLIT GROUPS WITH PARALOGS ##
89 0 0         if ( $self->dont_split_groups ) {
90 0           move( $self->_output_inflate_unsplit_clusters_filename, $self->_output_inflate_clusters_filename );
91             }
92             else {
93 0           $self->_split_groups_obj->split_groups;
94             }
95              
96 0           $self->logger->info("Labelling the groups");
97 0           $self->_group_labels_obj->add_labels();
98              
99 0           $self->logger->info("Transfering the annotation to the groups");
100 0           $self->_annotate_groups_obj->reannotate;
101              
102 0           $self->logger->info("Creating accessory binary gene presence and absence fasta");
103 0           $self->_accessory_binary_fasta->create_accessory_binary_fasta;
104              
105 0           $self->logger->info("Creating accessory binary gene presence and absence tree");
106 0           $self->_accessory_binary_tree->run;
107              
108 0           $self->logger->info("Creating accessory gene presence and absence clusters");
109 0 0         if ( $self->_accessory_clustering ) {
110 0           $self->_accessory_clustering->sample_weights;
111             }
112              
113 0           $self->logger->info("Creating the spreadsheet with gene presence and absence");
114 0           $self->_group_statistics_obj->create_spreadsheet;
115 0           $self->_group_statistics_obj->create_rtab;
116              
117 0           $self->logger->info("Creating summary statistics of the spreadsheet");
118 0           $self->_assembly_statistics->create_summary_output;
119              
120 0           $self->logger->info("Creating tab files for R");
121 0           $self->_number_of_groups_obj->create_output_files;
122              
123 0 0         system("create_pan_genome_plots.R") unless ( $self->dont_create_rplots == 1 );
124              
125 0           $self->logger->info("Create EMBL files");
126 0           $self->_create_embl_files;
127              
128 0           my $clusters_not_exceeded = 1;
129 0 0         if ( $self->output_multifasta_files ) {
130 0           $self->logger->info("Creating files with the nucleotide sequences for every cluster");
131 0           $clusters_not_exceeded = $self->_groups_multifastas_nuc_obj->create_files();
132             }
133              
134 0           $self->_delete_intermediate_files;
135 0 0 0       if ( $clusters_not_exceeded == 0 && $self->output_multifasta_files ) {
136 0           $self->logger->error("Exiting early because number of clusters is too high");
137 0           exit();
138             }
139             }
140              
141             sub _build__assembly_statistics {
142 0     0     my ($self) = @_;
143 0           return Bio::Roary::AssemblyStatistics->new(
144             spreadsheet => $self->_group_statistics_obj->output_filename,
145             core_definition => $self->core_definition,
146             logger => $self->logger
147             );
148             }
149              
150             sub _build__accessory_clustering {
151 0     0     my ($self) = @_;
152 0 0 0       if ( ( -e $self->_accessory_binary_fasta->output_filename ) && ( -s $self->_accessory_binary_fasta->output_filename > 5 ) ) {
153 0           $self->logger->info( $self->_accessory_binary_fasta->output_filename );
154 0           return Bio::Roary::AccessoryClustering->new(
155             input_file => $self->_accessory_binary_fasta->output_filename,
156             cpus => $self->cpus,
157             logger => $self->logger
158             );
159             }
160             else {
161 0           $self->logger->info("Theres no accessory binary file so skipping accessory binary clustering");
162 0           return undef;
163             }
164              
165             }
166              
167             sub _build__accessory_binary_tree {
168 0     0     my ($self) = @_;
169 0           return Bio::Roary::External::Fasttree->new(
170             input_file => $self->_accessory_binary_fasta->output_filename,
171             verbose => $self->verbose,
172             logger => $self->logger
173             );
174             }
175              
176             sub _build__accessory_binary_fasta {
177 0     0     my ($self) = @_;
178 0           return Bio::Roary::AccessoryBinaryFasta->new(
179             input_files => $self->fasta_files,
180             annotate_groups_obj => $self->_annotate_groups_obj,
181             analyse_groups_obj => $self->_analyse_groups_obj,
182             logger => $self->logger
183             );
184             }
185              
186             sub _build__split_groups_obj {
187 0     0     my ($self) = @_;
188 0           return Bio::Roary::SplitGroups->new(
189             groupfile => $self->_output_inflate_unsplit_clusters_filename,
190             gff_files => $self->input_files,
191             fasta_files => $self->fasta_files,
192             outfile => $self->_output_inflate_clusters_filename,
193             dont_delete => $self->dont_delete_files,
194             logger => $self->logger
195             );
196             }
197              
198             sub _build__number_of_groups_obj {
199 0     0     my ($self) = @_;
200 0           return Bio::Roary::Output::NumberOfGroups->new(
201             group_statistics_obj => $self->_group_statistics_obj,
202             groups_to_contigs => $self->_order_genes_obj->groups_to_contigs,
203             annotate_groups_obj => $self->_annotate_groups_obj,
204             core_definition => $self->core_definition,
205             logger => $self->logger
206             );
207             }
208              
209             sub _build__group_statistics_obj {
210 0     0     my ($self) = @_;
211 0           return Bio::Roary::GroupStatistics->new(
212             output_filename => $self->output_statistics_filename,
213             annotate_groups_obj => $self->_annotate_groups_obj,
214             analyse_groups_obj => $self->_analyse_groups_obj,
215             groups_to_contigs => $self->_order_genes_obj->groups_to_contigs,
216             _verbose => $self->verbose_stats,
217             logger => $self->logger
218             );
219             }
220              
221             sub _build__order_genes_obj {
222 0     0     my ($self) = @_;
223 0 0         if ( defined( $self->_accessory_clustering ) ) {
224 0           return Bio::Roary::OrderGenes->new(
225             analyse_groups_obj => $self->_analyse_groups_obj,
226             gff_files => $self->input_files,
227             core_definition => $self->core_definition,
228             sample_weights => $self->_accessory_clustering->sample_weights,
229             samples_to_clusters => $self->_accessory_clustering->samples_to_clusters,
230             logger => $self->logger
231             );
232             }
233             else {
234 0           return Bio::Roary::OrderGenes->new(
235             analyse_groups_obj => $self->_analyse_groups_obj,
236             gff_files => $self->input_files,
237             core_definition => $self->core_definition,
238             logger => $self->logger
239             );
240             }
241             }
242              
243             sub _build__group_labels_obj {
244 0     0     my ($self) = @_;
245 0           return Bio::Roary::GroupLabels->new(
246             groups_filename => $self->_output_inflate_clusters_filename,
247             output_filename => $self->_output_group_labels_filename,
248             logger => $self->logger
249             );
250             }
251              
252             sub _build__annotate_groups_obj {
253 0     0     my ($self) = @_;
254 0           return Bio::Roary::AnnotateGroups->new(
255             gff_files => $self->input_files,
256             output_filename => $self->output_filename,
257             groups_filename => $self->_output_group_labels_filename,
258             logger => $self->logger
259             );
260             }
261              
262             sub _build__analyse_groups_obj {
263 0     0     my ($self) = @_;
264 0           return Bio::Roary::AnalyseGroups->new(
265             fasta_files => $self->fasta_files,
266             groups_filename => $self->output_filename,
267             logger => $self->logger
268             );
269             }
270              
271             sub _build__inflate_clusters_obj {
272 0     0     my ($self) = @_;
273 0           return Bio::Roary::InflateClusters->new(
274             clusters_filename => $self->clusters_filename,
275             cdhit_groups_filename => $self->_input_cd_hit_groups_file,
276             mcl_filename => $self->_output_mcl_filename,
277             output_file => $self->_output_inflate_unsplit_clusters_filename,
278             logger => $self->logger
279             );
280             }
281              
282             sub _build__groups_multifastas_nuc_obj {
283 0     0     my ($self) = @_;
284 0           return Bio::Roary::Output::GroupsMultifastasNucleotide->new(
285             output_multifasta_files => $self->output_multifasta_files,
286             gff_files => $self->input_files,
287             annotate_groups => $self->_annotate_groups_obj,
288             group_names => $self->_analyse_groups_obj->_groups,
289             group_limit => $self->group_limit,
290             core_definition => $self->core_definition,
291             dont_delete_files => $self->dont_delete_files,
292             logger => $self->logger
293             );
294             }
295              
296             sub _create_embl_files {
297 0     0     my ($self) = @_;
298 0           my $core_accessory_tab_obj = Bio::Roary::Output::EmblGroups->new(
299             output_filename => $self->core_accessory_tab_output_filename,
300             annotate_groups_obj => $self->_annotate_groups_obj,
301             analyse_groups_obj => $self->_analyse_groups_obj,
302             ordering_key => $self->core_accessory_ordering_key,
303             groups_to_contigs => $self->_order_genes_obj->groups_to_contigs,
304             logger => $self->logger
305             );
306 0           $core_accessory_tab_obj->create_files;
307              
308 0           my $accessory_tab_obj = Bio::Roary::Output::EmblGroups->new(
309             output_filename => $self->accessory_tab_output_filename,
310             annotate_groups_obj => $self->_annotate_groups_obj,
311             analyse_groups_obj => $self->_analyse_groups_obj,
312             ordering_key => $self->accessory_ordering_key,
313             groups_to_contigs => $self->_order_genes_obj->groups_to_contigs,
314             logger => $self->logger
315             );
316 0           $accessory_tab_obj->create_files;
317             }
318              
319             sub _delete_intermediate_files {
320 0     0     my ($self) = @_;
321 0 0         return if ( $self->dont_delete_files == 1 );
322 0           $self->logger->info("Cleaning up files");
323              
324 0           for my $fasta_file ( @{ $self->fasta_files } ) {
  0            
325 0 0         unlink($fasta_file) if ( -e $fasta_file );
326             }
327              
328 0           unlink( $self->_output_mcl_filename );
329 0           unlink( $self->_output_inflate_clusters_filename );
330 0           unlink( $self->_output_group_labels_filename );
331 0           unlink( $self->_output_combined_filename );
332 0           unlink( $self->clusters_filename );
333 0           unlink( $self->clusters_filename . '.clstr' );
334 0           unlink( $self->clusters_filename . '.bak.clstr' );
335 0           unlink('_gff_files');
336 0           unlink('_fasta_files');
337 0           unlink('_clustered_filtered.fa');
338 0           unlink( $self->_input_cd_hit_groups_file );
339 0           unlink('database_masking.asnb');
340 0           unlink('_clustered');
341 0           unlink('_accessory_clusters');
342 0           unlink('_accessory_clusters.clstr');
343             }
344              
345 1     1   2770 no Moose;
  1         6  
  1         13  
346             __PACKAGE__->meta->make_immutable;
347              
348             1;
349              
350             __END__
351              
352             =pod
353              
354             =encoding UTF-8
355              
356             =head1 NAME
357              
358             Bio::Roary::PostAnalysis - Post analysis of pan genomes
359              
360             =head1 VERSION
361              
362             version 3.10.1
363              
364             =head1 SYNOPSIS
365              
366             Create a pan genome
367              
368             =head1 AUTHOR
369              
370             Andrew J. Page <ap13@sanger.ac.uk>
371              
372             =head1 COPYRIGHT AND LICENSE
373              
374             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
375              
376             This is free software, licensed under:
377              
378             The GNU General Public License, Version 3, June 2007
379              
380             =cut