line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Roary::Output::NumberOfGroups; |
2
|
|
|
|
|
|
|
$Bio::Roary::Output::NumberOfGroups::VERSION = '3.11.0'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Create raw output files of group counts for turning into plots |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
652
|
use Moose; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
13
|
|
7
|
2
|
|
|
2
|
|
12097
|
use List::Util qw(shuffle); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
141
|
|
8
|
2
|
|
|
2
|
|
12
|
use Bio::Roary::AnnotateGroups; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
38
|
|
9
|
2
|
|
|
2
|
|
8
|
use Bio::Roary::GroupStatistics; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1211
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
has 'group_statistics_obj' => ( is => 'ro', isa => 'Bio::Roary::GroupStatistics', required => 1 ); |
12
|
|
|
|
|
|
|
has 'number_of_iterations' => ( is => 'ro', isa => 'Int', default => 10); |
13
|
|
|
|
|
|
|
has 'groups_to_contigs' => ( is => 'ro', isa => 'Maybe[HashRef]' ); |
14
|
|
|
|
|
|
|
has 'annotate_groups_obj' => ( is => 'ro', isa => 'Bio::Roary::AnnotateGroups', required => 1 ); |
15
|
|
|
|
|
|
|
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 ); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
has 'output_raw_filename_conserved_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_conserved_genes.Rtab' ); |
18
|
|
|
|
|
|
|
has 'output_raw_filename_unique_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_unique_genes.Rtab' ); |
19
|
|
|
|
|
|
|
has 'output_raw_filename_total_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_genes_in_pan_genome.Rtab' ); |
20
|
|
|
|
|
|
|
has 'output_raw_filename_new_genes' => ( is => 'ro', isa => 'Str', default => 'number_of_new_genes.Rtab' ); |
21
|
|
|
|
|
|
|
has '_conserved_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); |
22
|
|
|
|
|
|
|
has '_unique_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); |
23
|
|
|
|
|
|
|
has '_total_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); |
24
|
|
|
|
|
|
|
has '_new_genes' => ( is => 'ro', isa => 'ArrayRef', default => sub { [] } ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub create_output_files { |
27
|
2
|
|
|
2
|
0
|
8
|
my ($self) = @_; |
28
|
|
|
|
|
|
|
|
29
|
2
|
|
|
|
|
88
|
for ( my $i = 0 ; $i < $self->number_of_iterations ; $i++ ) { |
30
|
20
|
|
|
|
|
50
|
$self->_single_iteration_gene_expansion; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
2
|
|
|
|
|
55
|
$self->_create_raw_output_file( $self->output_raw_filename_conserved_genes, $self->_conserved_genes ); |
34
|
2
|
|
|
|
|
103
|
$self->_create_raw_output_file( $self->output_raw_filename_unique_genes, $self->_unique_genes ); |
35
|
2
|
|
|
|
|
71
|
$self->_create_raw_output_file( $self->output_raw_filename_total_genes, $self->_total_genes ); |
36
|
2
|
|
|
|
|
60
|
$self->_create_raw_output_file( $self->output_raw_filename_new_genes, $self->_new_genes ); |
37
|
2
|
|
|
|
|
25
|
return 1; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub _create_raw_output_file { |
41
|
8
|
|
|
8
|
|
17
|
my ( $self, $filename, $output_data ) = @_; |
42
|
8
|
|
|
|
|
473
|
open( my $fh, '>', $filename ); |
43
|
8
|
|
|
|
|
17
|
for my $iterations ( @{$output_data} ) { |
|
8
|
|
|
|
|
21
|
|
44
|
80
|
|
|
|
|
91
|
print {$fh} join( "\t", @{$iterations} ); |
|
80
|
|
|
|
|
97
|
|
|
80
|
|
|
|
|
221
|
|
45
|
80
|
|
|
|
|
95
|
print {$fh} "\n"; |
|
80
|
|
|
|
|
122
|
|
46
|
|
|
|
|
|
|
} |
47
|
8
|
|
|
|
|
251
|
close($fh); |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub _shuffle_input_files { |
51
|
20
|
|
|
20
|
|
28
|
my ($self) = @_; |
52
|
20
|
|
|
|
|
21
|
my @shuffled_input_files = shuffle( @{ $self->group_statistics_obj->_sorted_file_names } ); |
|
20
|
|
|
|
|
502
|
|
53
|
20
|
|
|
|
|
47
|
return \@shuffled_input_files; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub _single_iteration_gene_expansion { |
57
|
20
|
|
|
20
|
|
37
|
my ($self) = @_; |
58
|
20
|
|
|
|
|
62
|
my %existing_groups; |
59
|
|
|
|
|
|
|
my @conserved_genes_added_per_file; |
60
|
20
|
|
|
|
|
0
|
my @unique_genes_added_per_file; |
61
|
20
|
|
|
|
|
0
|
my @total_genes_added_per_file; |
62
|
20
|
|
|
|
|
0
|
my @new_genes_added_per_file; |
63
|
|
|
|
|
|
|
|
64
|
20
|
|
|
|
|
35
|
my $shuffled_input_files = $self->_shuffle_input_files(); |
65
|
|
|
|
|
|
|
|
66
|
20
|
|
|
|
|
27
|
my $files_counter = 1; |
67
|
20
|
|
|
|
|
21
|
for my $input_file ( @{$shuffled_input_files} ) { |
|
20
|
|
|
|
|
37
|
|
68
|
60
|
|
|
|
|
83
|
my $unique_groups_counter = 0; |
69
|
60
|
|
|
|
|
66
|
my $total_groups_counter = 0; |
70
|
60
|
|
|
|
|
75
|
my $new_group_counter = 0; |
71
|
60
|
|
|
|
|
55
|
my $conserved_groups_counter = 0; |
72
|
60
|
|
|
|
|
1597
|
my $new_groups = $self->group_statistics_obj->_files_to_groups->{$input_file}; |
73
|
|
|
|
|
|
|
|
74
|
60
|
|
|
|
|
94
|
for my $group ( @{$new_groups} ) { |
|
60
|
|
|
|
|
89
|
|
75
|
240
|
100
|
|
|
|
340
|
if ( !defined( $existing_groups{$group} ) ) { |
76
|
140
|
|
|
|
|
142
|
$new_group_counter++; |
77
|
|
|
|
|
|
|
} |
78
|
240
|
|
|
|
|
322
|
$existing_groups{$group}++; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
60
|
|
|
|
|
163
|
for my $group ( keys %existing_groups ) { |
82
|
340
|
100
|
|
|
|
7804
|
if ( $existing_groups{$group} >= ($files_counter*$self->core_definition) ) { |
83
|
170
|
|
|
|
|
212
|
$conserved_groups_counter++; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
340
|
100
|
|
|
|
571
|
if ( $existing_groups{$group} == 1 ) { |
87
|
220
|
|
|
|
|
244
|
$unique_groups_counter++; |
88
|
|
|
|
|
|
|
} |
89
|
340
|
|
|
|
|
410
|
$total_groups_counter++; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
60
|
|
|
|
|
121
|
push( @conserved_genes_added_per_file, $conserved_groups_counter ); |
93
|
60
|
|
|
|
|
71
|
push( @unique_genes_added_per_file, $unique_groups_counter ); |
94
|
60
|
|
|
|
|
76
|
push( @total_genes_added_per_file, $total_groups_counter ); |
95
|
60
|
|
|
|
|
68
|
push( @new_genes_added_per_file, $new_group_counter ); |
96
|
60
|
|
|
|
|
92
|
$files_counter++; |
97
|
|
|
|
|
|
|
} |
98
|
20
|
|
|
|
|
26
|
push( @{ $self->_conserved_genes }, \@conserved_genes_added_per_file ); |
|
20
|
|
|
|
|
474
|
|
99
|
20
|
|
|
|
|
29
|
push( @{ $self->_unique_genes }, \@unique_genes_added_per_file ); |
|
20
|
|
|
|
|
615
|
|
100
|
20
|
|
|
|
|
28
|
push( @{ $self->_total_genes }, \@total_genes_added_per_file ); |
|
20
|
|
|
|
|
438
|
|
101
|
20
|
|
|
|
|
30
|
push( @{ $self->_new_genes }, \@new_genes_added_per_file ); |
|
20
|
|
|
|
|
422
|
|
102
|
|
|
|
|
|
|
|
103
|
20
|
|
|
|
|
640
|
return; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
2
|
|
|
2
|
|
14
|
no Moose; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
10
|
|
107
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
1; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
__END__ |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=pod |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=encoding UTF-8 |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 NAME |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Bio::Roary::Output::NumberOfGroups - Create raw output files of group counts for turning into plots |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 VERSION |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
version 3.11.0 |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 SYNOPSIS |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# ABSTRACT: Create raw output files of group counts for turning into plots |
128
|
|
|
|
|
|
|
use Bio::Roary::Output::NumberOfGroups; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
my $obj = Bio::Roary::Output::NumberOfGroups->new( |
131
|
|
|
|
|
|
|
group_statistics_obj => $group_stats |
132
|
|
|
|
|
|
|
); |
133
|
|
|
|
|
|
|
$obj->create_files(); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 AUTHOR |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
This is free software, licensed under: |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |