line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Roary::Output::QueryGroups; |
2
|
|
|
|
|
|
|
$Bio::Roary::Output::QueryGroups::VERSION = '3.11.0'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Output the groups of the union of a set of input isolates |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
451049
|
use Moose; |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
20
|
|
7
|
3
|
|
|
3
|
|
17201
|
use Bio::SeqIO; |
|
3
|
|
|
|
|
51531
|
|
|
3
|
|
|
|
|
75
|
|
8
|
3
|
|
|
3
|
|
248
|
use Bio::Roary::Exceptions; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
94
|
|
9
|
3
|
|
|
3
|
|
216
|
use Bio::Roary::AnalyseGroups; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
83
|
|
10
|
3
|
|
|
3
|
|
1114
|
use POSIX; |
|
3
|
|
|
|
|
13711
|
|
|
3
|
|
|
|
|
14
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
has 'analyse_groups' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 ); |
13
|
|
|
|
|
|
|
has 'input_filenames' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); |
14
|
|
|
|
|
|
|
has 'output_union_filename' => ( is => 'ro', isa => 'Str', default => 'union_of_groups.gg' ); |
15
|
|
|
|
|
|
|
has 'output_intersection_filename' => ( is => 'ro', isa => 'Str', default => 'intersection_of_groups.gg' ); |
16
|
|
|
|
|
|
|
has 'output_complement_filename' => ( is => 'ro', isa => 'Str', default => 'complement_of_groups.gg' ); |
17
|
|
|
|
|
|
|
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 ); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has '_groups_freq' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_freq' ); |
20
|
|
|
|
|
|
|
has '_groups_intersection' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_intersection' ); |
21
|
|
|
|
|
|
|
has '_groups_complement' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_complement' ); |
22
|
|
|
|
|
|
|
has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups' ); |
23
|
|
|
|
|
|
|
has '_number_of_isolates' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' ); |
24
|
|
|
|
|
|
|
has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Int', lazy_build => 1 ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub _build__min_no_isolates_for_core { |
27
|
5
|
|
|
5
|
|
13
|
my ( $self ) = @_; |
28
|
5
|
|
|
|
|
144
|
my $threshold = ceil( $self->_number_of_isolates * $self->core_definition ); |
29
|
|
|
|
|
|
|
|
30
|
5
|
|
|
|
|
112
|
return $threshold; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub _builder__number_of_isolates { |
34
|
5
|
|
|
5
|
|
16
|
my ($self) = @_; |
35
|
5
|
|
|
|
|
11
|
return @{ $self->input_filenames }; |
|
5
|
|
|
|
|
108
|
|
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub _build__groups_freq { |
39
|
31
|
|
|
31
|
|
79
|
my ($self) = @_; |
40
|
31
|
|
|
|
|
50
|
my %groups_freq; |
41
|
|
|
|
|
|
|
|
42
|
31
|
|
|
|
|
43
|
for my $filename ( @{ $self->input_filenames } ) { |
|
31
|
|
|
|
|
973
|
|
43
|
68
|
|
|
|
|
1631
|
my $genes = $self->analyse_groups->_files_to_genes->{$filename}; |
44
|
|
|
|
|
|
|
|
45
|
68
|
|
|
|
|
133
|
my %file_groups_seen; |
46
|
68
|
|
|
|
|
89
|
for my $gene ( @{$genes} ) { |
|
68
|
|
|
|
|
145
|
|
47
|
249
|
50
|
|
|
|
475
|
next if(!defined($gene)); |
48
|
249
|
50
|
|
|
|
5522
|
next if(!defined($self->analyse_groups->_genes_to_groups->{$gene})); |
49
|
249
|
100
|
|
|
|
5416
|
next if(defined($file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}})); |
50
|
|
|
|
|
|
|
|
51
|
246
|
|
|
|
|
339
|
push(@{$groups_freq{ $self->analyse_groups->_genes_to_groups->{$gene} }}, $gene); |
|
246
|
|
|
|
|
5392
|
|
52
|
246
|
|
|
|
|
5709
|
$file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}} = 1; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
31
|
|
|
|
|
819
|
return \%groups_freq; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
sub _build__groups { |
60
|
31
|
|
|
31
|
|
84
|
my ($self) = @_; |
61
|
31
|
|
|
|
|
54
|
my %groups_freq = %{ $self->_groups_freq }; |
|
31
|
|
|
|
|
888
|
|
62
|
31
|
|
|
|
|
319
|
my @groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } keys %groups_freq; |
|
270
|
|
|
|
|
330
|
|
|
270
|
|
|
|
|
395
|
|
|
270
|
|
|
|
|
535
|
|
63
|
31
|
|
|
|
|
913
|
return \@groups; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub _build__groups_intersection { |
67
|
5
|
|
|
5
|
|
17
|
my ($self) = @_; |
68
|
5
|
|
|
|
|
10
|
my @groups_intersection; |
69
|
|
|
|
|
|
|
|
70
|
5
|
|
|
|
|
9
|
for my $group ( @{$self->_groups} ) { |
|
5
|
|
|
|
|
144
|
|
71
|
34
|
100
|
|
|
|
174
|
if ( scalar @{$self->_groups_freq->{$group}} >= $self->_min_no_isolates_for_core ) { |
|
34
|
|
|
|
|
685
|
|
72
|
9
|
|
|
|
|
25
|
push( @groups_intersection, $group ); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
5
|
|
|
|
|
139
|
return \@groups_intersection; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub _build__groups_complement { |
79
|
3
|
|
|
3
|
|
8
|
my ($self) = @_; |
80
|
3
|
|
|
|
|
353
|
my %groups_intersection = map { $_ => 1 } @{ $self->_groups_intersection }; |
|
6
|
|
|
|
|
17
|
|
|
3
|
|
|
|
|
72
|
|
81
|
3
|
|
|
|
|
10
|
my @complement = grep { not $groups_intersection{$_} } @{ $self->_groups }; |
|
21
|
|
|
|
|
34
|
|
|
3
|
|
|
|
|
66
|
|
82
|
3
|
|
|
|
|
65
|
return \@complement; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _print_out_groups { |
86
|
34
|
|
|
34
|
|
85
|
my ( $self, $filename, $groups ) = @_; |
87
|
34
|
50
|
|
|
|
2855
|
open( my $fh, '>', $filename ) |
88
|
|
|
|
|
|
|
or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Couldnt write to file: ' . $filename ); |
89
|
|
|
|
|
|
|
|
90
|
34
|
|
|
|
|
92
|
my %groups_freq = %{ $self->_groups_freq }; |
|
34
|
|
|
|
|
1066
|
|
91
|
34
|
|
|
|
|
100
|
my @sorted_groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } @{$groups}; |
|
99
|
|
|
|
|
123
|
|
|
99
|
|
|
|
|
132
|
|
|
99
|
|
|
|
|
187
|
|
|
34
|
|
|
|
|
111
|
|
92
|
|
|
|
|
|
|
|
93
|
34
|
|
|
|
|
125
|
for my $group ( @sorted_groups ) { |
94
|
92
|
|
|
|
|
133
|
print {$fh} $group.': '.join("\t",@{$self->_groups_freq->{$group}}) . "\n"; |
|
92
|
|
|
|
|
165
|
|
|
92
|
|
|
|
|
2134
|
|
95
|
|
|
|
|
|
|
} |
96
|
34
|
|
|
|
|
1695
|
close($fh); |
97
|
34
|
|
|
|
|
469
|
return $self; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub groups_complement { |
101
|
3
|
|
|
3
|
0
|
8
|
my ($self) = @_; |
102
|
3
|
|
|
|
|
92
|
$self->_print_out_groups( $self->output_complement_filename, $self->_groups_complement ); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub groups_intersection { |
106
|
4
|
|
|
4
|
0
|
14
|
my ($self) = @_; |
107
|
4
|
|
|
|
|
155
|
$self->_print_out_groups( $self->output_intersection_filename, $self->_groups_intersection ); |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub groups_union { |
111
|
3
|
|
|
3
|
0
|
15
|
my ($self) = @_; |
112
|
3
|
|
|
|
|
116
|
$self->_print_out_groups( $self->output_union_filename, $self->_groups ); |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub groups_with_external_inputs |
116
|
|
|
|
|
|
|
{ |
117
|
24
|
|
|
24
|
0
|
53
|
my ($self, $output_filename,$groups) = @_; |
118
|
24
|
|
|
|
|
91
|
$self->_print_out_groups( $output_filename, $groups ); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
3
|
|
|
3
|
|
8402
|
no Moose; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
20
|
|
123
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
1; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
__END__ |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=pod |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=encoding UTF-8 |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head1 NAME |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Bio::Roary::Output::QueryGroups - Output the groups of the union of a set of input isolates |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=head1 VERSION |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
version 3.11.0 |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head1 SYNOPSIS |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Output the groups of the union of a set of input isolates |
144
|
|
|
|
|
|
|
use Bio::Roary::Output::QueryGroups; |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
my $obj = Bio::Roary::Output::QueryGroups->new( |
147
|
|
|
|
|
|
|
analyse_groups => $analyse_groups |
148
|
|
|
|
|
|
|
); |
149
|
|
|
|
|
|
|
$obj->groups_union(); |
150
|
|
|
|
|
|
|
$obj->groups_intersection(); |
151
|
|
|
|
|
|
|
$obj->groups_complement(); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head1 AUTHOR |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
This is free software, licensed under: |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=cut |