line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Roary::AnalyseGroups; |
2
|
|
|
|
|
|
|
$Bio::Roary::AnalyseGroups::VERSION = '3.10.2'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Take in a groups file and the original FASTA files and create plots and stats |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
21
|
|
|
21
|
|
174795
|
use Moose; |
|
21
|
|
|
|
|
748210
|
|
|
21
|
|
|
|
|
155
|
|
7
|
21
|
|
|
21
|
|
130942
|
use Bio::Roary::Exceptions; |
|
21
|
|
|
|
|
54
|
|
|
21
|
|
|
|
|
12444
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); |
10
|
|
|
|
|
|
|
has 'groups_filename' => ( is => 'ro', isa => 'Str', required => 1 ); |
11
|
|
|
|
|
|
|
has 'output_filename' => ( is => 'ro', isa => 'Str', default => 'summary_of_groups' ); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
has '_number_of_isolates' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' ); |
14
|
|
|
|
|
|
|
has '_genes_to_file' => ( is => 'rw', isa => 'HashRef' ); |
15
|
|
|
|
|
|
|
has '_files_to_genes' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__files_to_genes' ); |
16
|
|
|
|
|
|
|
has '_groups_to_genes' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_builder__groups_to_genes' ); |
17
|
|
|
|
|
|
|
has '_genes_to_groups' => ( is => 'rw', isa => 'HashRef' ); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_builder__groups' ); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub BUILD { |
23
|
67
|
|
|
67
|
0
|
193
|
my ($self) = @_; |
24
|
|
|
|
|
|
|
# This triggers _genes_to_groups to be built |
25
|
67
|
|
|
|
|
1974
|
$self->_groups_to_genes; |
26
|
|
|
|
|
|
|
# This triggers _genes_to_file to be built |
27
|
67
|
|
|
|
|
1761
|
$self->_files_to_genes; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub _builder__groups |
31
|
|
|
|
|
|
|
{ |
32
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
33
|
0
|
|
|
|
|
0
|
my @groups = sort keys %{$self->_groups_to_genes}; |
|
0
|
|
|
|
|
0
|
|
34
|
0
|
|
|
|
|
0
|
return \@groups; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _builder__number_of_isolates { |
38
|
1
|
|
|
1
|
|
4
|
my ($self) = @_; |
39
|
1
|
|
|
|
|
3
|
return @{ $self->fasta_files }; |
|
1
|
|
|
|
|
41
|
|
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub _builder__files_to_genes { |
43
|
67
|
|
|
67
|
|
210
|
my ($self) = @_; |
44
|
67
|
|
|
|
|
124
|
my %files_to_genes; |
45
|
|
|
|
|
|
|
my %genes_to_file; |
46
|
67
|
|
|
|
|
127
|
for my $filename ( @{ $self->fasta_files } ) { |
|
67
|
|
|
|
|
1643
|
|
47
|
190
|
|
|
|
|
746549
|
open( my $fh, '-|', 'grep \> ' . $filename . ' | awk \'{print $1}\' | sed \'s/>//\' ' ); |
48
|
190
|
|
|
|
|
491224
|
while (<$fh>) { |
49
|
1672
|
|
|
|
|
3403
|
chomp; |
50
|
1672
|
|
|
|
|
3065
|
my $gene_name = $_; |
51
|
1672
|
50
|
|
|
|
3175
|
next if($gene_name eq ""); |
52
|
1672
|
|
|
|
|
2248
|
push( @{ $files_to_genes{$filename} }, $gene_name ); |
|
1672
|
|
|
|
|
7054
|
|
53
|
1672
|
|
|
|
|
23816
|
$genes_to_file{$gene_name} = $filename; |
54
|
|
|
|
|
|
|
} |
55
|
190
|
|
|
|
|
8030
|
close($fh); |
56
|
|
|
|
|
|
|
} |
57
|
67
|
|
|
|
|
7434
|
$self->_genes_to_file(\%genes_to_file); |
58
|
|
|
|
|
|
|
|
59
|
67
|
|
|
|
|
2514
|
return \%files_to_genes; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _count_num_files_in_group { |
63
|
126
|
|
|
126
|
|
277
|
my ( $self, $genes ) = @_; |
64
|
126
|
|
|
|
|
203
|
my $count = 0; |
65
|
126
|
|
|
|
|
157
|
my %filename_freq; |
66
|
126
|
|
|
|
|
179
|
for my $gene ( @{$genes} ) { |
|
126
|
|
|
|
|
289
|
|
67
|
218
|
50
|
|
|
|
481
|
next if ( $gene eq "" ); |
68
|
218
|
50
|
|
|
|
5350
|
if ( defined( $self->_genes_to_file->{$gene} ) ) { |
69
|
218
|
|
|
|
|
5186
|
$filename_freq{ $self->_genes_to_file->{$gene} }++; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
} |
72
|
126
|
|
|
|
|
348
|
my @uniq_filenames = keys %filename_freq; |
73
|
126
|
|
|
|
|
447
|
return @uniq_filenames; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub _builder__groups_to_genes { |
77
|
67
|
|
|
67
|
|
197
|
my ($self) = @_; |
78
|
67
|
|
|
|
|
127
|
my %groups_to_genes; |
79
|
|
|
|
|
|
|
my %genes_to_groups; |
80
|
|
|
|
|
|
|
|
81
|
67
|
50
|
|
|
|
2382
|
open( my $fh, $self->groups_filename ) |
82
|
|
|
|
|
|
|
or Bio::Roary::Exceptions::FileNotFound->throw( error => "Group file not found:" . $self->groups_filename ); |
83
|
67
|
|
|
|
|
1119
|
while (<$fh>) { |
84
|
350
|
|
|
|
|
579
|
chomp; |
85
|
350
|
|
|
|
|
547
|
my $line = $_; |
86
|
350
|
100
|
|
|
|
1497
|
if ( $line =~ /^(.+): (.+)$/ ) { |
87
|
299
|
|
|
|
|
698
|
my $group_name = $1; |
88
|
299
|
|
|
|
|
514
|
my $genes = $2; |
89
|
299
|
|
|
|
|
1172
|
my @elements = split( /[\s\t]+/, $genes ); |
90
|
299
|
|
|
|
|
734
|
$groups_to_genes{$group_name} = \@elements; |
91
|
|
|
|
|
|
|
|
92
|
299
|
|
|
|
|
482
|
for my $gene (@elements) { |
93
|
564
|
|
|
|
|
1874
|
$genes_to_groups{$gene} = $group_name; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
} |
97
|
67
|
|
|
|
|
2178
|
$self->_genes_to_groups(\%genes_to_groups); |
98
|
|
|
|
|
|
|
|
99
|
67
|
|
|
|
|
2202
|
return \%groups_to_genes; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
21
|
|
|
21
|
|
174
|
no Moose; |
|
21
|
|
|
|
|
41
|
|
|
21
|
|
|
|
|
110
|
|
103
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
1; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
__END__ |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=pod |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=encoding UTF-8 |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 NAME |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Bio::Roary::AnalyseGroups - Take in a groups file and the original FASTA files and create plots and stats |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 VERSION |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
version 3.10.2 |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 SYNOPSIS |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Take in a groups file and the original FASTA files and create plots and stats |
124
|
|
|
|
|
|
|
use Bio::Roary::AnalyseGroups; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
my $plot_groups_obj = Bio::Roary::AnalyseGroups->new( |
127
|
|
|
|
|
|
|
fasta_files => $fasta_files, |
128
|
|
|
|
|
|
|
groups_filename => $groups_filename, |
129
|
|
|
|
|
|
|
output_filename => $output_filename |
130
|
|
|
|
|
|
|
); |
131
|
|
|
|
|
|
|
$plot_groups_obj->create_plots(); |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head1 AUTHOR |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
This is free software, licensed under: |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=cut |