File Coverage

lib/Bio/Roary/AccessoryClustering.pm
Criterion Covered Total %
statement 15 45 33.3
branch 0 4 0.0
condition n/a
subroutine 5 7 71.4
pod n/a
total 20 56 35.7


line stmt bran cond sub pod time code
1             package Bio::Roary::AccessoryClustering;
2             $Bio::Roary::AccessoryClustering::VERSION = '3.11.0';
3             # ABSTRACT: Take an a clusters file from CD-hit and the fasta file and output a fasta file without full clusters
4              
5              
6 2     2   79036 use Moose;
  2         373752  
  2         12  
7 2     2   12547 use Bio::Roary::External::Cdhit;
  2         5  
  2         767  
8             with 'Bio::Roary::ClustersRole';
9              
10             has 'input_file' => ( is => 'ro', isa => 'Str', required => 1 );
11             has 'identity' => ( is => 'ro', isa => 'Num', default => 0.9 );
12             has 'cpus' => ( is => 'ro', isa => 'Int', default => 1 );
13             has '_output_cd_hit_filename' => ( is => 'ro', isa => 'Str', default => '_accessory_clusters' );
14             has 'clusters_to_samples' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_clusters_to_samples' );
15             has 'samples_to_clusters' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_samples_to_clusters' );
16             has 'sample_weights' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_sample_weights' );
17             has 'clusters_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_clusters_filename' );
18             has 'clusters' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build__clusters' );
19              
20             sub _build_sample_weights {
21 0     0   0 my ($self) = @_;
22 0         0 my %sample_weights;
23 0         0 for my $cluster_name ( keys %{ $self->clusters_to_samples } ) {
  0         0  
24 0         0 my $cluster_size = @{ $self->clusters_to_samples->{$cluster_name} };
  0         0  
25 0         0 for my $sample_name ( @{ $self->clusters_to_samples->{$cluster_name} } ) {
  0         0  
26 0         0 $sample_weights{$sample_name} = 1 / $cluster_size;
27             }
28             }
29 0         0 return \%sample_weights;
30             }
31              
32             sub _build_samples_to_clusters {
33 0     0   0 my ($self) = @_;
34 0         0 my %samples_to_clusters;
35 0         0 for my $cluster_name ( keys %{ $self->clusters_to_samples } ) {
  0         0  
36 0         0 for my $sample_name ( @{ $self->clusters_to_samples->{$cluster_name} } ) {
  0         0  
37 0         0 $samples_to_clusters{$sample_name} = $cluster_name;
38             }
39             }
40 0         0 return \%samples_to_clusters;
41             }
42              
43             sub _build_clusters_filename {
44 1     1   20 my ($self) = @_;
45 1         27 return $self->_output_cd_hit_filename . '.clstr';
46             }
47              
48             sub _build_clusters_to_samples {
49 1     1   3 my ($self) = @_;
50              
51 1         22 my $cdhit_obj = Bio::Roary::External::Cdhit->new(
52             input_file => $self->input_file,
53             output_base => $self->_output_cd_hit_filename,
54             _length_difference_cutoff => 1,
55             _sequence_identity_threshold => $self->identity,
56             cpus => $self->cpus
57             );
58 1         5 $cdhit_obj->run();
59 1         38 my $clusterd_genes = $self->_clustered_genes;
60              
61 0           for my $cluster_name ( keys %{$clusterd_genes} ) {
  0            
62 0           my $found = 0;
63 0           for my $gene_name ( @{ $clusterd_genes->{$cluster_name} } ) {
  0            
64 0 0         if ( $gene_name eq $cluster_name ) {
65 0           $found = 1;
66 0           last;
67             }
68             }
69              
70 0 0         if ( $found == 0 ) {
71 0           push( @{ $clusterd_genes->{$cluster_name} }, $cluster_name );
  0            
72             }
73             }
74              
75 0           return $clusterd_genes;
76             }
77              
78 2     2   18 no Moose;
  2         3  
  2         13  
79             __PACKAGE__->meta->make_immutable;
80              
81             1;
82              
83             __END__
84              
85             =pod
86              
87             =encoding UTF-8
88              
89             =head1 NAME
90              
91             Bio::Roary::AccessoryClustering - Take an a clusters file from CD-hit and the fasta file and output a fasta file without full clusters
92              
93             =head1 VERSION
94              
95             version 3.11.0
96              
97             =head1 SYNOPSIS
98              
99             Take an a clusters file from CD-hit and the fasta file and output a fasta file without full clusters
100             use Bio::Roary::AccessoryClustering;
101              
102             my $obj = Bio::Roary::AccessoryClustering->new(
103             input_file => 'accessory_binary_genes.fa',
104             identity => 0.96,
105             cpus => 10,
106             );
107             $obj->sample_weights();
108              
109             =head1 AUTHOR
110              
111             Andrew J. Page <ap13@sanger.ac.uk>
112              
113             =head1 COPYRIGHT AND LICENSE
114              
115             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
116              
117             This is free software, licensed under:
118              
119             The GNU General Public License, Version 3, June 2007
120              
121             =cut