|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::Roary::InflateClusters;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Bio::Roary::InflateClusters::VERSION = '3.10.1';  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: Take the clusters file from cd-hit and use it to inflate the output of MCL  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
84543
 | 
 use Moose;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
398735
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
7
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
23912
 | 
 use Bio::Roary::Exceptions;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1917
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with 'Bio::Roary::ClustersRole';  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has 'mcl_filename'      => ( is => 'ro', isa => 'Str', required => 1 );  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has 'output_file'       => ( is => 'ro', isa => 'Str', default  => 'inflated_results' );  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has '_mcl_fh'           => ( is => 'ro',lazy => 1, builder => '_build__mcl_fh' );  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has '_output_fh'        => ( is => 'ro',lazy => 1, builder => '_build__output_fh' );  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has 'cdhit_groups_filename'  => ( is => 'ro', isa => 'Maybe[Str]' );  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _build__output_fh  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
18
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
6
 | 
   my($self) = @_;  | 
| 
19
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
76
 | 
   open(my $fh, '>', $self->output_file) or Bio::Roary::Exceptions::CouldntWriteToFile->throw( error => 'Cant write to file: ' . $self->output_file );  | 
| 
20
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
   return $fh;  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _build__mcl_fh  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
25
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
5
 | 
    my($self) = @_;  | 
| 
26
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
65
 | 
    open(my $fh, $self->mcl_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => 'Cant open file: ' . $self->mcl_filename );  | 
| 
27
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
69
 | 
    return $fh;  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _inflate_line  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
32
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
23
 | 
    my($self, $line) = @_;  | 
| 
33
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    my @inflated_genes;  | 
| 
34
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    chomp($line);  | 
| 
35
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
    my @gene_names = split(/[\t\s]+/, $line);  | 
| 
36
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
    for my $gene_name (@gene_names)  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    {  | 
| 
38
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
      push(@inflated_genes, $self->_inflate_gene($gene_name));  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
40
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
77
 | 
    return join(' ',@inflated_genes);  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _inflate_gene  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
45
 | 
21
 | 
 
 | 
 
 | 
  
21
  
 | 
 
 | 
37
 | 
    my($self, $gene_name) = @_;  | 
| 
46
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    my $inflated_gene = $gene_name;  | 
| 
47
 | 
21
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
506
 | 
    if(defined($self->_clustered_genes->{$gene_name}))  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    {  | 
| 
49
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
      $inflated_gene = $inflated_gene."\t". join("\t",@{$self->_clustered_genes->{$gene_name}});       | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
134
 | 
    | 
| 
50
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
117
 | 
      delete($self->_clustered_genes->{$gene_name});  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
52
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    return $inflated_gene;  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub inflate  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
57
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
0
  
 | 
7
 | 
   my($self) = @_;  | 
| 
58
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
   my $mcl_fh = $self->_mcl_fh;  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Inflate genes from cdhit which were sent to mcl  | 
| 
61
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
   while(<$mcl_fh>)  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
63
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     my $line = $_;  | 
| 
64
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     print { $self->_output_fh } $self->_inflate_line($line) . "\n";  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
238
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Inflate any clusters that were in the clusters file but not sent to mcl  | 
| 
68
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
   for my $gene_name(keys %{$self->_clustered_genes})  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
    | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
70
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
401
 | 
     next unless(defined($self->_clustered_genes->{$gene_name}));  | 
| 
71
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     print { $self->_output_fh } $gene_name."\t". join("\t",@{$self->_clustered_genes->{$gene_name}})."\n";  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
74
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
60
 | 
   if(defined($self->cdhit_groups_filename))  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Add clusters which were excluded because the groups were full at the cdhit stage  | 
| 
77
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     open(my $cdhit_fh, $self->cdhit_groups_filename) or Bio::Roary::Exceptions::FileNotFound->throw( error => "CD hit group file not found: " . $self->cdhit_groups_filename);  | 
| 
78
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     while(<$cdhit_fh>)  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
80
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       my $line = $_;  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         | 
| 
82
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if(defined($line))  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
84
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         print { $self->_output_fh } $line ;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
89
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
   close($self->_output_fh);  | 
| 
90
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
   1;  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
26
 | 
 no Moose;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
    | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __PACKAGE__->meta->make_immutable;  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =encoding UTF-8  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::Roary::InflateClusters - Take the clusters file from cd-hit and use it to inflate the output of MCL  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 version 3.10.1  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Take the clusters file from cd-hit and use it to inflate the output of MCL  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    use Bio::Roary::InflateClusters;  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    my $obj = Bio::Roary::InflateClusters->new(  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      clusters_filename  => 'example.clstr',  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      mcl_filename       => 'example.mcl',  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      output_file        => 'example.output'  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    );  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $obj->inflate;  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Andrew J. Page <ap13@sanger.ac.uk>  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT AND LICENSE  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is free software, licensed under:  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   The GNU General Public License, Version 3, June 2007  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |