File Coverage

lib/Bio/Roary/ClustersRole.pm
Criterion Covered Total %
statement 27 27 100.0
branch 8 8 100.0
condition n/a
subroutine 4 4 100.0
pod n/a
total 39 39 100.0


line stmt bran cond sub pod time code
1             package Bio::Roary::ClustersRole;
2             # ABSTRACT: A role to read a clusters file from CD hit
3             $Bio::Roary::ClustersRole::VERSION = '3.11.0';
4              
5 5     5   2805 use Moose::Role;
  5         12853  
  5         23  
6 5     5   24067 use Bio::Roary::Exceptions;
  5         10  
  5         1413  
7              
8             has 'clusters_filename' => ( is => 'ro', isa => 'Str', required => 1 );
9             has '_clustered_genes' => ( is => 'ro',lazy => 1, builder => '_build__clustered_genes' );
10             has '_clusters_fh' => ( is => 'ro',lazy => 1, builder => '_build__clusters_fh' );
11              
12             sub _build__clusters_fh
13             {
14 4     4   8 my($self) = @_;
15 4 100       121 open(my $fh, $self->clusters_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $self->clusters_filename );
16 3         61 return $fh;
17             }
18              
19             sub _build__clustered_genes
20             {
21 4     4   12 my($self) = @_;
22 4         84 my $fh = $self->_clusters_fh;
23 3         8 my %clustered_genes ;
24              
25             my %raw_clusters;
26 3         0 my $current_cluster_name;
27 3         30 while(<$fh>)
28             {
29 113         130 my $line = $_;
30 113 100       164 if($line =~ /^>(.+)$/)
31             {
32 31         42 $current_cluster_name = $1;
33             }
34            
35             #>Cluster 5
36             #0 4201aa, >6630_4#9_00008... *
37             #1 4201aa, >6631_1#23_00379... at 100.00%
38            
39 113 100       302 if($line =~ /[\d]+\t[\w]+, >(.+)\.\.\. (.+)$/)
40             {
41 82         108 my $gene_name = $1;
42 82         85 my $identity = $2;
43            
44 82 100       100 if($identity eq '*')
45             {
46 31         85 $raw_clusters{$current_cluster_name}{representative_gene_name} = $gene_name;
47             }
48             else
49             {
50 51         47 push(@{$raw_clusters{$current_cluster_name}{gene_names}}, $gene_name);
  51         149  
51             }
52             }
53             }
54            
55             # iterate over the raw clusters and convert to a simple hash
56 3         11 for my $cluster_name (keys %raw_clusters)
57             {
58 31         53 $clustered_genes{$raw_clusters{$cluster_name}{representative_gene_name}} = $raw_clusters{$cluster_name}{gene_names};
59             }
60            
61 3         79 return \%clustered_genes;
62             }
63              
64             1;
65              
66             __END__
67              
68             =pod
69              
70             =encoding UTF-8
71              
72             =head1 NAME
73              
74             Bio::Roary::ClustersRole - A role to read a clusters file from CD hit
75              
76             =head1 VERSION
77              
78             version 3.11.0
79              
80             =head1 SYNOPSIS
81              
82             A role to read a clusters file from CD hit
83             with 'Bio::Roary::ClustersRole';
84              
85             =head1 AUTHOR
86              
87             Andrew J. Page <ap13@sanger.ac.uk>
88              
89             =head1 COPYRIGHT AND LICENSE
90              
91             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
92              
93             This is free software, licensed under:
94              
95             The GNU General Public License, Version 3, June 2007
96              
97             =cut