File Coverage

lib/Bio/Roary/Output/NumberOfGroups.pm
Criterion Covered Total %
statement 76 76 100.0
branch 6 6 100.0
condition n/a
subroutine 9 9 100.0
pod 0 1 0.0
total 91 92 98.9


line stmt bran cond sub pod time code
1             package Bio::Roary::Output::NumberOfGroups;
2             $Bio::Roary::Output::NumberOfGroups::VERSION = '3.10.1';
3             # ABSTRACT: Create raw output files of group counts for turning into plots
4              
5              
6 2     2   620 use Moose;
  2         5  
  2         14  
7 2     2   12775 use List::Util qw(shuffle);
  2         4  
  2         172  
8 2     2   14 use Bio::Roary::AnnotateGroups;
  2         3  
  2         42  
9 2     2   19 use Bio::Roary::GroupStatistics;
  2         5  
  2         1262  
10              
11             has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', required => 1 );
12             has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 10);
13             has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]' );
14             has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 );
15             has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
16              
17             has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.Rtab' );
18             has 'output_raw_filename_unique_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.Rtab' );
19             has 'output_raw_filename_total_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_genes_in_pan_genome.Rtab' );
20             has 'output_raw_filename_new_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.Rtab' );
21             has '_conserved_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
22             has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
23             has '_total_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
24             has '_new_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } );
25              
26             sub create_output_files {
27 2     2 0 6 my ($self) = @_;
28              
29 2         80 for ( my $i = 0 ; $i < $self->number_of_iterations ; $i++ ) {
30 20         59 $self->_single_iteration_gene_expansion;
31             }
32              
33 2         87 $self->_create_raw_output_file( $self->output_raw_filename_conserved_genes, $self->_conserved_genes );
34 2         298 $self->_create_raw_output_file( $self->output_raw_filename_unique_genes, $self->_unique_genes );
35 2         103 $self->_create_raw_output_file( $self->output_raw_filename_total_genes, $self->_total_genes );
36 2         80 $self->_create_raw_output_file( $self->output_raw_filename_new_genes, $self->_new_genes );
37 2         20 return 1;
38             }
39              
40             sub _create_raw_output_file {
41 8     8   22 my ( $self, $filename, $output_data ) = @_;
42 8         513 open( my $fh, '>', $filename );
43 8         30 for my $iterations ( @{$output_data} ) {
  8         24  
44 80         117 print {$fh} join( "\t", @{$iterations} );
  80         121  
  80         288  
45 80         133 print {$fh} "\n";
  80         165  
46             }
47 8         272 close($fh);
48             }
49              
50             sub _shuffle_input_files {
51 20     20   34 my ($self) = @_;
52 20         26 my @shuffled_input_files = shuffle( @{ $self->group_statistics_obj->_sorted_file_names } );
  20         639  
53 20         56 return \@shuffled_input_files;
54             }
55              
56             sub _single_iteration_gene_expansion {
57 20     20   43 my ($self) = @_;
58 20         70 my %existing_groups;
59             my @conserved_genes_added_per_file;
60 20         0 my @unique_genes_added_per_file;
61 20         0 my @total_genes_added_per_file;
62 20         0 my @new_genes_added_per_file;
63              
64 20         37 my $shuffled_input_files = $self->_shuffle_input_files();
65              
66 20         33 my $files_counter = 1;
67 20         30 for my $input_file ( @{$shuffled_input_files} ) {
  20         42  
68 60         84 my $unique_groups_counter = 0;
69 60         75 my $total_groups_counter = 0;
70 60         74 my $new_group_counter = 0;
71 60         75 my $conserved_groups_counter = 0;
72 60         1731 my $new_groups = $self->group_statistics_obj->_files_to_groups->{$input_file};
73              
74 60         89 for my $group ( @{$new_groups} ) {
  60         103  
75 240 100       426 if ( !defined( $existing_groups{$group} ) ) {
76 140         186 $new_group_counter++;
77             }
78 240         418 $existing_groups{$group}++;
79             }
80              
81 60         167 for my $group ( keys %existing_groups ) {
82 340 100       9307 if ( $existing_groups{$group} >= ($files_counter*$self->core_definition) ) {
83 170         238 $conserved_groups_counter++;
84             }
85              
86 340 100       732 if ( $existing_groups{$group} == 1 ) {
87 220         290 $unique_groups_counter++;
88             }
89 340         515 $total_groups_counter++;
90             }
91              
92 60         128 push( @conserved_genes_added_per_file, $conserved_groups_counter );
93 60         79 push( @unique_genes_added_per_file, $unique_groups_counter );
94 60         85 push( @total_genes_added_per_file, $total_groups_counter );
95 60         71 push( @new_genes_added_per_file, $new_group_counter );
96 60         108 $files_counter++;
97             }
98 20         32 push( @{ $self->_conserved_genes }, \@conserved_genes_added_per_file );
  20         556  
99 20         33 push( @{ $self->_unique_genes }, \@unique_genes_added_per_file );
  20         551  
100 20         37 push( @{ $self->_total_genes }, \@total_genes_added_per_file );
  20         566  
101 20         38 push( @{ $self->_new_genes }, \@new_genes_added_per_file );
  20         546  
102              
103 20         689 return;
104             }
105              
106 2     2   15 no Moose;
  2         8  
  2         11  
107             __PACKAGE__->meta->make_immutable;
108              
109             1;
110              
111             __END__
112              
113             =pod
114              
115             =encoding UTF-8
116              
117             =head1 NAME
118              
119             Bio::Roary::Output::NumberOfGroups - Create raw output files of group counts for turning into plots
120              
121             =head1 VERSION
122              
123             version 3.10.1
124              
125             =head1 SYNOPSIS
126              
127             # ABSTRACT: Create raw output files of group counts for turning into plots
128             use Bio::Roary::Output::NumberOfGroups;
129              
130             my $obj = Bio::Roary::Output::NumberOfGroups->new(
131             group_statistics_obj => $group_stats
132             );
133             $obj->create_files();
134              
135             =head1 AUTHOR
136              
137             Andrew J. Page <ap13@sanger.ac.uk>
138              
139             =head1 COPYRIGHT AND LICENSE
140              
141             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
142              
143             This is free software, licensed under:
144              
145             The GNU General Public License, Version 3, June 2007
146              
147             =cut