File Coverage

lib/Bio/Roary/InflateClusters.pm
Criterion Covered Total %
statement 41 50 82.0
branch 6 14 42.8
condition n/a
subroutine 8 8 100.0
pod 0 1 0.0
total 55 73 75.3


line stmt bran cond sub pod time code
1             package Bio::Roary::InflateClusters;
2             $Bio::Roary::InflateClusters::VERSION = '3.10.1';
3             # ABSTRACT: Take the clusters file from cd-hit and use it to inflate the output of MCL
4              
5              
6 3     3   84543 use Moose;
  3         398735  
  3         24  
7 3     3   23912 use Bio::Roary::Exceptions;
  3         9  
  3         1917  
8             with 'Bio::Roary::ClustersRole';
9              
10             has 'mcl_filename' => ( is => 'ro', isa => 'Str', required => 1 );
11             has 'output_file' => ( is => 'ro', isa => 'Str', default => 'inflated_results' );
12             has '_mcl_fh' => ( is => 'ro',lazy => 1, builder => '_build__mcl_fh' );
13             has '_output_fh' => ( is => 'ro',lazy => 1, builder => '_build__output_fh' );
14             has 'cdhit_groups_filename' => ( is => 'ro', isa => 'Maybe[Str]' );
15              
16             sub _build__output_fh
17             {
18 2     2   6 my($self) = @_;
19 2 50       76 open(my $fh, '>', $self->output_file) or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Cant write to file: ' . $self->output_file );
20 2         67 return $fh;
21             }
22              
23             sub _build__mcl_fh
24             {
25 2     2   5 my($self) = @_;
26 2 50       65 open(my $fh, $self->mcl_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $self->mcl_filename );
27 2         69 return $fh;
28             }
29              
30             sub _inflate_line
31             {
32 9     9   23 my($self, $line) = @_;
33 9         13 my @inflated_genes;
34 9         15 chomp($line);
35 9         46 my @gene_names = split(/[\t\s]+/, $line);
36 9         20 for my $gene_name (@gene_names)
37             {
38 21         47 push(@inflated_genes, $self->_inflate_gene($gene_name));
39             }
40 9         77 return join(' ',@inflated_genes);
41             }
42              
43             sub _inflate_gene
44             {
45 21     21   37 my($self, $gene_name) = @_;
46 21         29 my $inflated_gene = $gene_name;
47 21 100       506 if(defined($self->_clustered_genes->{$gene_name}))
48             {
49 5         12 $inflated_gene = $inflated_gene."\t". join("\t",@{$self->_clustered_genes->{$gene_name}});
  5         134  
50 5         117 delete($self->_clustered_genes->{$gene_name});
51             }
52 21         56 return $inflated_gene;
53             }
54              
55             sub inflate
56             {
57 2     2 0 7 my($self) = @_;
58 2         67 my $mcl_fh = $self->_mcl_fh;
59            
60             # Inflate genes from cdhit which were sent to mcl
61 2         30 while(<$mcl_fh>)
62             {
63 9         17 my $line = $_;
64 9         16 print { $self->_output_fh } $self->_inflate_line($line) . "\n";
  9         238  
65             }
66            
67             # Inflate any clusters that were in the clusters file but not sent to mcl
68 2         3 for my $gene_name(keys %{$self->_clustered_genes})
  2         54  
69             {
70 16 50       401 next unless(defined($self->_clustered_genes->{$gene_name}));
71 0         0 print { $self->_output_fh } $gene_name."\t". join("\t",@{$self->_clustered_genes->{$gene_name}})."\n";
  0         0  
  0         0  
72             }
73            
74 2 50       60 if(defined($self->cdhit_groups_filename))
75             {
76             # Add clusters which were excluded because the groups were full at the cdhit stage
77 0 0       0 open(my $cdhit_fh, $self->cdhit_groups_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => "CD hit group file not found: " . $self->cdhit_groups_filename);
78 0         0 while(<$cdhit_fh>)
79             {
80 0         0 my $line = $_;
81            
82 0 0       0 if(defined($line))
83             {
84 0         0 print { $self->_output_fh } $line ;
  0         0  
85             }
86             }
87             }
88            
89 2         51 close($self->_output_fh);
90 2         30 1;
91             }
92              
93 3     3   26 no Moose;
  3         5  
  3         22  
94             __PACKAGE__->meta->make_immutable;
95              
96             1;
97              
98             __END__
99              
100             =pod
101              
102             =encoding UTF-8
103              
104             =head1 NAME
105              
106             Bio::Roary::InflateClusters - Take the clusters file from cd-hit and use it to inflate the output of MCL
107              
108             =head1 VERSION
109              
110             version 3.10.1
111              
112             =head1 SYNOPSIS
113              
114             Take the clusters file from cd-hit and use it to inflate the output of MCL
115             use Bio::Roary::InflateClusters;
116              
117             my $obj = Bio::Roary::InflateClusters->new(
118             clusters_filename => 'example.clstr',
119             mcl_filename => 'example.mcl',
120             output_file => 'example.output'
121             );
122             $obj->inflate;
123              
124             =head1 AUTHOR
125              
126             Andrew J. Page <ap13@sanger.ac.uk>
127              
128             =head1 COPYRIGHT AND LICENSE
129              
130             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
131              
132             This is free software, licensed under:
133              
134             The GNU General Public License, Version 3, June 2007
135              
136             =cut