File Coverage

lib/Bio/Roary/Output/QueryGroups.pm
Criterion Covered Total %
statement 86 86 100.0
branch 7 10 70.0
condition n/a
subroutine 17 17 100.0
pod 0 4 0.0
total 110 117 94.0


line stmt bran cond sub pod time code
1             package Bio::Roary::Output::QueryGroups;
2             $Bio::Roary::Output::QueryGroups::VERSION = '3.10.1';
3             # ABSTRACT: Output the groups of the union of a set of input isolates
4              
5              
6 3     3   441767 use Moose;
  3         5  
  3         22  
7 3     3   18789 use Bio::SeqIO;
  3         50830  
  3         82  
8 3     3   246 use Bio::Roary::Exceptions;
  3         6  
  3         68  
9 3     3   276 use Bio::Roary::AnalyseGroups;
  3         9  
  3         88  
10 3     3   1259 use POSIX;
  3         16879  
  3         17  
11              
12             has 'analyse_groups' => ( is => 'ro', isa => 'Bio::Roary::AnalyseGroups', required => 1 );
13             has 'input_filenames' => ( is => 'ro', isa => 'ArrayRef', required => 1 );
14             has 'output_union_filename' => ( is => 'ro', isa => 'Str', default => 'union_of_groups.gg' );
15             has 'output_intersection_filename' => ( is => 'ro', isa => 'Str', default => 'intersection_of_groups.gg' );
16             has 'output_complement_filename' => ( is => 'ro', isa => 'Str', default => 'complement_of_groups.gg' );
17             has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
18              
19             has '_groups_freq' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__groups_freq' );
20             has '_groups_intersection' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_intersection' );
21             has '_groups_complement' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups_complement' );
22             has '_groups' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build__groups' );
23             has '_number_of_isolates' => ( is => 'ro', isa => 'Int', lazy => 1, builder => '_builder__number_of_isolates' );
24             has '_min_no_isolates_for_core' => ( is => 'rw', isa => 'Int', lazy_build => 1 );
25              
26             sub _build__min_no_isolates_for_core {
27 5     5   16 my ( $self ) = @_;
28 5         147 my $threshold = ceil( $self->_number_of_isolates * $self->core_definition );
29              
30 5         354 return $threshold;
31             }
32              
33             sub _builder__number_of_isolates {
34 5     5   628 my ($self) = @_;
35 5         15 return @{ $self->input_filenames };
  5         145  
36             }
37              
38             sub _build__groups_freq {
39 31     31   101 my ($self) = @_;
40 31         62 my %groups_freq;
41              
42 31         58 for my $filename ( @{ $self->input_filenames } ) {
  31         990  
43 68         2017 my $genes = $self->analyse_groups->_files_to_genes->{$filename};
44            
45 68         132 my %file_groups_seen;
46 68         103 for my $gene ( @{$genes} ) {
  68         172  
47 249 50       542 next if(!defined($gene));
48 249 50       7273 next if(!defined($self->analyse_groups->_genes_to_groups->{$gene}));
49 249 100       7143 next if(defined($file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}}));
50            
51 246         492 push(@{$groups_freq{ $self->analyse_groups->_genes_to_groups->{$gene} }}, $gene);
  246         7333  
52 246         7184 $file_groups_seen{$self->analyse_groups->_genes_to_groups->{$gene}} = 1;
53             }
54             }
55              
56 31         920 return \%groups_freq;
57             }
58              
59             sub _build__groups {
60 31     31   95 my ($self) = @_;
61 31         61 my %groups_freq = %{ $self->_groups_freq };
  31         1035  
62 31         328 my @groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } keys %groups_freq;
  280         445  
  280         440  
  280         603  
63 31         1038 return \@groups;
64             }
65              
66             sub _build__groups_intersection {
67 5     5   15 my ($self) = @_;
68 5         11 my @groups_intersection;
69              
70 5         11 for my $group ( @{$self->_groups} ) {
  5         135  
71 34 100       64 if ( scalar @{$self->_groups_freq->{$group}} >= $self->_min_no_isolates_for_core ) {
  34         821  
72 9         26 push( @groups_intersection, $group );
73             }
74             }
75 5         141 return \@groups_intersection;
76             }
77              
78             sub _build__groups_complement {
79 3     3   11 my ($self) = @_;
80 3         7 my %groups_intersection = map { $_ => 1 } @{ $self->_groups_intersection };
  6         21  
  3         111  
81 3         7 my @complement = grep { not $groups_intersection{$_} } @{ $self->_groups };
  21         45  
  3         77  
82 3         89 return \@complement;
83             }
84              
85             sub _print_out_groups {
86 34     34   85 my ( $self, $filename, $groups ) = @_;
87 34 50       3266 open( my $fh, '>', $filename )
88             or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Couldnt write to file: ' . $filename );
89              
90 34         122 my %groups_freq = %{ $self->_groups_freq };
  34         1333  
91 34         126 my @sorted_groups = sort { @{$groups_freq{$b}} <=> @{$groups_freq{$a}} } @{$groups};
  99         173  
  99         143  
  99         212  
  34         240  
92              
93 34         113 for my $group ( @sorted_groups ) {
94 92         158 print {$fh} $group.': '.join("\t",@{$self->_groups_freq->{$group}}) . "\n";
  92         326  
  92         2708  
95             }
96 34         1930 close($fh);
97 34         615 return $self;
98             }
99              
100             sub groups_complement {
101 3     3 0 15 my ($self) = @_;
102 3         129 $self->_print_out_groups( $self->output_complement_filename, $self->_groups_complement );
103             }
104              
105             sub groups_intersection {
106 4     4 0 15 my ($self) = @_;
107 4         143 $self->_print_out_groups( $self->output_intersection_filename, $self->_groups_intersection );
108             }
109              
110             sub groups_union {
111 3     3 0 10 my ($self) = @_;
112 3         163 $self->_print_out_groups( $self->output_union_filename, $self->_groups );
113             }
114              
115             sub groups_with_external_inputs
116             {
117 24     24 0 86 my ($self, $output_filename,$groups) = @_;
118 24         105 $self->_print_out_groups( $output_filename, $groups );
119            
120             }
121              
122 3     3   10756 no Moose;
  3         7  
  3         38  
123             __PACKAGE__->meta->make_immutable;
124              
125             1;
126              
127             __END__
128              
129             =pod
130              
131             =encoding UTF-8
132              
133             =head1 NAME
134              
135             Bio::Roary::Output::QueryGroups - Output the groups of the union of a set of input isolates
136              
137             =head1 VERSION
138              
139             version 3.10.1
140              
141             =head1 SYNOPSIS
142              
143             Output the groups of the union of a set of input isolates
144             use Bio::Roary::Output::QueryGroups;
145              
146             my $obj = Bio::Roary::Output::QueryGroups->new(
147             analyse_groups => $analyse_groups
148             );
149             $obj->groups_union();
150             $obj->groups_intersection();
151             $obj->groups_complement();
152              
153             =head1 AUTHOR
154              
155             Andrew J. Page <ap13@sanger.ac.uk>
156              
157             =head1 COPYRIGHT AND LICENSE
158              
159             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
160              
161             This is free software, licensed under:
162              
163             The GNU General Public License, Version 3, June 2007
164              
165             =cut