File Coverage

blib/lib/Bio/Gonzales/Domain/Group.pm
Criterion Covered Total %
statement 102 116 87.9
branch 14 20 70.0
condition 2 8 25.0
subroutine 16 19 84.2
pod 1 7 14.2
total 135 170 79.4


line stmt bran cond sub pod time code
1             package Bio::Gonzales::Domain::Group;
2              
3 1     1   13215 use Mouse;
  1         3  
  1         9  
4              
5 1     1   554 use warnings;
  1         2  
  1         50  
6 1     1   7 use strict;
  1         2  
  1         20  
7 1     1   4 use Carp;
  1         2  
  1         121  
8 1     1   8 use Data::Dumper;
  1         2  
  1         61  
9 1     1   520 use Bio::Gonzales::Feat::IO::GFF3;
  1         4  
  1         51  
10              
11 1     1   20 use 5.010;
  1         5  
12 1     1   7 use List::MoreUtils qw/uniq first_value any/;
  1         2  
  1         9  
13              
14             our $VERSION = '0.083'; # VERSION
15              
16             #my $q = Bio::Gonzales::Search::IO::HMMER3->new( file => $out )->parse;
17             has search_result => ( is => 'rw', required => 1 );
18             has required_domains => ( is => 'rw', required => 1 );
19             has forbidden_domains => ( is => 'rw', default => sub { [] } );
20             has can_have_additional_domains => ( is => 'rw', default => 1 );
21             has group_idcs => ( is => 'rw', lazy_build => 1 );
22             has only_best_hit => ( is => 'rw' );
23              
24       0 0   sub add_required {
25              
26             }
27              
28       0 0   sub add_forbidden {
29              
30             }
31              
32             sub BUILD {
33 4     4 1 77 my ($self) = @_;
34 4         23 my $r = $self->domain_list( $self->search_result );
35              
36             $self->required_domains( _match_domain_names( $r, $self->required_domains ) )
37 4 50       11 if ( @{ $self->required_domains } > 0 );
  4         41  
38             $self->forbidden_domains( _match_domain_names( $r, $self->forbidden_domains ) )
39 4 100       8 if ( @{ $self->forbidden_domains } > 0 );
  4         24  
40              
41 4         19 return $self;
42             }
43              
44             sub domain_list {
45 6     6 0 56 my $r = pop;
46              
47 6         47 return [ keys %$r ];
48             }
49              
50             sub _match_domain_names {
51 5     5   14 my ( $reference, $query ) = @_;
52              
53 5         8 my @gresult;
54 5         14 for my $q (@$query) {
55 6         12 my @result;
56 6         14 for my $e (@$q) {
57 8         20 push @result, grep {/$e/} @$reference;
  28         172  
58             }
59 6 50       25 push @gresult, \@result if (@result);
60             }
61 5         23 return \@gresult;
62             }
63              
64             sub _build_group_idcs {
65 1     1   5 my ($self) = @_;
66              
67 1         4 my $domain_groups = $self->required_domains;
68 1         3 my %group;
69 1         2 my $i = 0;
70 1         3 for my $g (@$domain_groups) {
71 2         6 map { $group{$_} = $i } @$g;
  3         7  
72 2         3 $i++;
73             }
74 1         4 return \%group;
75             }
76              
77             sub filter_hits {
78 1     1 0 3 my ($self) = @_;
79              
80 1         5 my $q = $self->search_result;
81 1         3 my $id_lookup = { map { $_ => 1 } @{ $self->filter_ids } };
  37         67  
  1         2  
82 1         14 my $group_idcs = $self->group_idcs;
83              
84 1         2 my @hit_col;
85 1         6 while ( my ( $domain_id, $result ) = each %$q ) {
86 4 100       13 next unless ( exists( $group_idcs->{$domain_id} ) );
87 3         9 while ( my ( $seq_id, $hits ) = each %$result ) {
88 300 100 66     1058 next if ( $id_lookup && !exists( $id_lookup->{$seq_id} ) );
89 110         193 for my $hit (@$hits) {
90 141         275 $hit->{domain_id} = $domain_id;
91 141         232 $hit->{seq_id} = $seq_id;
92 141         196 $hit->{group_id} = $group_idcs->{$domain_id};
93 141         361 push @hit_col, $hit;
94             }
95             }
96             }
97 1 50       8 return $self->_pick_best_domain_hits( \@hit_col )
98             if ( $self->only_best_hit );
99              
100 1         7 return \@hit_col;
101             }
102              
103             sub _pick_best_domain_hits {
104 0     0   0 my ( $self, $hit_list ) = @_;
105              
106 0         0 my %group_collection;
107 0         0 for my $h (@$hit_list) {
108 0         0 my $id = $h->{seq_id} . "__//__" . $h->{group_id};
109 0   0     0 $group_collection{$id} //= [];
110              
111 0         0 push @{ $group_collection{$id} }, $h;
  0         0  
112             }
113              
114 0         0 my @best_ones;
115 0         0 for my $h ( values %group_collection ) {
116 0         0 my $max;
117              
118             #find the domain with the maximal score
119 0 0 0     0 map { $max = $_ if ( !$max || $_->{score} > $max->{score} ) } @$h;
  0         0  
120 0         0 push @best_ones, $max;
121             }
122              
123 0         0 return \@best_ones;
124             }
125              
126             sub to_gff {
127 1     1 0 732 my ( $self, $dest ) = @_;
128 1         5 my $hits = $self->filter_hits;
129              
130 1         34 my $gffout = Bio::Gonzales::Feat::IO::GFF3->new( file_or_fh => $dest, mode => '>' );
131              
132 1         6 my $gidcs = $self->group_idcs;
133 1         10 while ( my ( $d, $idx ) = each %$gidcs ) {
134 3         16 $gffout->write_comment(" Group $idx contains domain: $d");
135             }
136              
137 1         4 my $i = 0;
138 1         3 for my $max (@$hits) {
139              
140             $gffout->write_feat(
141             Bio::Gonzales::Feat->new(
142             seq_id => $max->{seq_id},
143             source => 'hmmer',
144             type => 'region',
145             start => $max->{env_from},
146             end => $max->{env_to},
147             strand => 0,
148             score => $max->{score},
149             attributes => {
150             ID => [ "hit_" . $i++ ],
151             domainID => [ $max->{domain_id} ],
152 141         1990 groupID => [ $max->{group_id} ]
153             }
154             )
155             );
156             }
157              
158 1         15 $gffout->close;
159             }
160              
161             sub filter_ids {
162 4     4 0 27 my ($self) = @_;
163              
164 4         13 my $q = $self->search_result;
165 4         10 my $rgroups = $self->required_domains;
166 4 50       15 return unless ($rgroups);
167              
168 4         11 my %id_occurrence;
169              
170             # select the ids for every group by...
171 4         10 for my $domain_synonyms (@$rgroups) {
172 6         12 my @ids;
173             # iterating through the domains ...
174 6         14 for my $domain_synonym (@$domain_synonyms) {
175             # and storing all ids in an group-wide id list
176 8         14 push @ids, grep { @{ $q->{$domain_synonym}{$_} } > 0 } keys %{ $q->{$domain_synonym} };
  682         812  
  682         1290  
  8         149  
177             }
178              
179             # we need to eliminate duplicate ids (multiple synonyms per group)
180             # and increment the occurrence of the found ids
181 6         206 map { $id_occurrence{$_}++ } uniq @ids;
  426         704  
182              
183             }
184              
185             # if an id is in every group it occurred #num of rgroups times and is not forbidden, return it
186 4 100       42 return [ grep { $id_occurrence{$_} == @$rgroups && !$self->_is_forbidden($_) } keys %id_occurrence ];
  352         758  
187             }
188              
189             sub _is_forbidden {
190 156     156   244 my ( $self, $id ) = @_;
191              
192 156         294 my $q = $self->search_result;
193 156         239 my $fgroups = $self->forbidden_domains;
194 156         232 for my $fg (@$fgroups) {
195 82         113 for my $fd (@$fg) {
196 92 100       170 if ( exists( $q->{$fd}{$id} ) ) {
197 74         237 return 1;
198             }
199             }
200             }
201 82         259 return;
202              
203             }
204             __PACKAGE__->meta->make_immutable();