File Coverage

blib/lib/Bio/MUST/Core/PostPred/Composition.pm
Criterion Covered Total %
statement 46 46 100.0
branch 2 2 100.0
condition 2 2 100.0
subroutine 8 8 100.0
pod n/a
total 58 58 100.0


line stmt bran cond sub pod time code
1             package Bio::MUST::Core::PostPred::Composition;
2             # ABSTRACT: Posterior predictive test for compositional bias
3             $Bio::MUST::Core::PostPred::Composition::VERSION = '0.212530';
4 17     17   12988 use Moose;
  17         67  
  17         139  
5 17     17   125584 use namespace::autoclean;
  17         49  
  17         159  
6              
7 17     17   1497 use autodie;
  17         45  
  17         147  
8 17     17   94679 use feature qw(say);
  17         52  
  17         1669  
9              
10             # use Smart::Comments;
11              
12 17     17   146 use List::AllUtils qw(sum);
  17         55  
  17         1372  
13 17     17   141 use Tie::IxHash;
  17         48  
  17         592  
14              
15 17     17   120 use Bio::MUST::Core::Types;
  17         41  
  17         9167  
16              
17              
18             has 'seqs' => (
19             is => 'ro',
20             isa => 'Bio::MUST::Core::Ali',
21             required => 1,
22             coerce => 1,
23             handles => [
24             qw(gapmiss_regex all_seqs)
25             ],
26             );
27              
28             # TODO: consider a role if more tests are implemented
29              
30             # private hash containing compositional biases
31             # Note: this hash is actually a Tie::IxHash (see builder)
32             has '_stat_for' => (
33             traits => ['Hash'],
34             is => 'ro',
35             isa => 'HashRef[Num]',
36             init_arg => undef,
37             lazy => 1,
38             builder => '_build_stat_for',
39             handles => {
40             all_ids => 'keys',
41             stat_for => 'get',
42             },
43             );
44              
45             ## no critic (ProhibitUnusedPrivateSubroutines)
46              
47             sub _build_stat_for {
48 51     51   122 my $self = shift;
49              
50 51         124 my %glb_freq_for;
51 51         325 tie my %seq_freq_for, 'Tie::IxHash';
52              
53 51         1291 my $regex = $self->gapmiss_regex;
54 51         129 my $glb_tot = 0;
55              
56             # loop through seqs to store state freqs
57 51         305 for my $seq ($self->all_seqs) {
58 4029         65378 my %freq_for;
59 4029         6594 my $seq_tot = 0;
60              
61             STATE:
62 4029         12591 for my $state ($seq->all_states) {
63 745365         1082539 $state = uc $state;
64              
65             # skip missing/gap states
66             # Note: This is different from what we do in SeqMask::Profiles
67             # so as to avoid these states to decrease regular state freqs
68 745365 100       1783657 next STATE if $state =~ m/$regex/xms;
69              
70             # store state occurrences both for current seq and globally
71 744035         1098484 $glb_freq_for{$state}++;
72 744035         1026574 $freq_for{$state}++;
73 744035         990791 $glb_tot++;
74 744035         1046484 $seq_tot++;
75             }
76              
77             # convert occurrences to freqs for current seq
78 4029         82504 $freq_for{$_} /= $seq_tot for keys %freq_for;
79              
80             # store freqs for current seq
81 4029         22511 $seq_freq_for{ $seq->full_id } = \%freq_for;
82             }
83              
84             # convert global occurrences to freqs
85 51         1759 $glb_freq_for{$_} /= $glb_tot for keys %glb_freq_for;
86              
87 51         600 tie my %bias_for, 'Tie::IxHash';
88              
89             # compute bias for each seq
90             # according to Blanquart and Lartillot 2008
91 51         1328 for my $id (keys %seq_freq_for) {
92 4029         96480 while (my ($aa, $freq) = each %glb_freq_for) {
93 79237   100     1629512 $bias_for{$id} += ( ($seq_freq_for{$id}{$aa} // 0) - $freq ) ** 2;
94             }
95             }
96              
97             # compute global biases
98 51         2291 $bias_for{'GLOBALMAX'} = List::AllUtils::max( values %bias_for );
99 51         30772 $bias_for{'GLOBALMEAN'} = sum( values %bias_for ) / keys %bias_for;
100              
101 51         51136 return \%bias_for;
102             }
103              
104             ## use critic
105              
106             __PACKAGE__->meta->make_immutable;
107             1;
108              
109             __END__
110              
111             =pod
112              
113             =head1 NAME
114              
115             Bio::MUST::Core::PostPred::Composition - Posterior predictive test for compositional bias
116              
117             =head1 VERSION
118              
119             version 0.212530
120              
121             =head1 SYNOPSIS
122              
123             # TODO
124              
125             =head1 DESCRIPTION
126              
127             # TODO
128              
129             =head1 AUTHOR
130              
131             Denis BAURAIN <denis.baurain@uliege.be>
132              
133             =head1 COPYRIGHT AND LICENSE
134              
135             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
136              
137             This is free software; you can redistribute it and/or modify it under
138             the same terms as the Perl 5 programming language system itself.
139              
140             =cut