|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::CUA::CUB::Calculator;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::CUA::CUB::Calculator -- A module to calculate codon usage bias  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (CUB) indice for protein-coding sequences  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	use Bio::CUA::CUB::Calculator;  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $calc = Bio::CUA::CUB::Calculator->new(  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	           -codon_table => 1,  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   -tAI_values  => 'tai.out' # from Bio::CUA::CUB::Builder  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			   );  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# calculate tAI for each sequence  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $io = Bio::CUA::SeqIO->new(-file => "seqs.fa");  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	or  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $io = Bio::CUA::SeqIO->new(-file => "seqs.fa", -format => 'fasta');  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	while(my $seq = $io->next_seq)  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		my $tai = $calc->tai($seq);  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		printf("%10s: %.7f\n", $seq->id, $tai);  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Codon usage bias (CUB) can be represented at two levels, codon and  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence. The latter is often computed as the geometric means of the  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence's codons. This module caculates CUB metrics at sequence  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 level.  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Supported CUB metrics include CAI (codon adaptation index), tAI (tRNA  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 adaptation index), Fop (Frequency of optimal codons), ENC (Effective  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Number of Codons) and their variants. See the methods below for  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 details.  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
5124
 | 
 use 5.006;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
135
 | 
    | 
| 
45
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
16
 | 
 use strict;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
88
 | 
    | 
| 
46
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
12
 | 
 use warnings;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
88
 | 
    | 
| 
47
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
13
 | 
 use parent qw/Bio::CUA::CUB/;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
48
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
129
 | 
 use Bio::CUA::CodonTable;  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6091
 | 
    | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : new  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : my $calc=Bio::CUA::CUB::Calculator->new(@args);  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: initialize the calculator  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : an object of this class  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : a hash with following acceptable keys:  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  B:  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C<-codon_table>  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  the genetic code table applied for following sequence analyses. It  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  can be specified by an integer (genetic code table id), an object of  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  L, or a map-file. See the method  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  L for details.  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  B  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C<-optimal_codons>  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  a file contains all the optimal codons, one codon per line. Or a  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  hashref with keys being the optimal codons  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  B  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C<-CAI_values>  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  a file containing CAI values for each codon, excluding 3  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  stop codons, so 61 lines with each line containing a codon and its  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  value separated by space or tab.  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  or  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  a hashref with each key being a codon and each value being CAI index  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  for the codon.  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  B  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C<-tAI_values>  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  similar to C<-CAI_values>, a file or a hash containing tAI value   | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  for each codon.  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  B  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C<-base_background>  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  optional.   | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  an arrayref containing base frequency of 4 bases (in the order   | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  A,T,C, and G) derived from background data such as introns.   | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Or one of the following values: 'seq', 'seq3', which will lead to  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  estimating base frequencies from each analyzed sequence in whol or  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  its 3rd codon position, respectively.  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  It can also be specified for each analyzed sequence with the methods  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  L and L  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
132
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
5312
 | 
 	my ($caller, @args) = @_;  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# option -codon_table is processed in this parent class  | 
| 
135
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 	my $self = $caller->SUPER::new(@args);  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# process all the parameters  | 
| 
138
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	my $hashRef = $self->_array_to_hash(\@args);  | 
| 
139
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	while(my ($tag, $val) = each %$hashRef)  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# tag 'codon_table' is now processed by parent package  | 
| 
142
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
 		if($tag =~ /^optimal/o) # optimal codons  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# a hash using codons as keys  | 
| 
145
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $optimalCodons = ref($val) eq 'HASH'?   | 
| 
146
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			{map { $_ => 1 } keys(%$val)} : $self->_parse_file($val,1);  | 
| 
147
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->{'_optimal_codons'} = $optimalCodons;  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}elsif($tag =~ /^cai/o) # CAI values  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# a hash like codon => CAI_value  | 
| 
151
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 			my $caiValues = ref($val) eq 'HASH'?   | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$val : $self->_parse_file($val,2);  | 
| 
153
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 			$self->{'_cai_values'} = $caiValues;  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}elsif($tag =~ /^tai/o) # tAI values  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# a hash like codon => tAI_value  | 
| 
157
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 			my $taiValues = ref($val) eq 'HASH'?  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$val : $self->_parse_file($val,2);  | 
| 
159
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 			$self->{'_tai_values'} = $taiValues;  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}elsif($tag =~ /^base/o) # background base composition  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
162
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			if(ref($val) eq 'ARRAY' or $val =~ /^seq/)  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
164
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->{'_base_comp'} = $val;  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}else  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
167
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->throw("Unknown value '$val' for parameter",  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				"-base_background");  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}else  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# Unknown parameter '$tag', ignored  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 	$self->no_atg(1); # exclude ATG in tAI calculation  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check the input values  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# 1. make sure all the sense codons have CAI or tAI values if  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# provided  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	return $self;  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 sequence input  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all the following methods accept one of the following formats as  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sequence input  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 1  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  string of nucleotide sequence with length of 3N,   | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 2  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  sequence object which has a method I to get the sequence string,  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 3  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    a sequence file in fasta format  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item 4  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    reference to a codon count hash, like  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    $codons = {   | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   AGC => 50,   | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        GTC => 124,  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   ...    ...  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   }.  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 cai  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : cai  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $caiValue = $self->cai($seq);  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: calculate the CAI value for the sequence  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a number, or undef if failed  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : see L"sequence input">  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Note: codons without synonymous competitors are excluded in  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  calculation.  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cai  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
228
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
  
1
  
 | 
104
 | 
 	my ($self, $seq) = @_;  | 
| 
229
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
75
 | 
 	$self->_xai($seq, 'CAI');  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # the real calculator of tAI or CAI as both have the same formula  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _xai  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
235
 | 
26
 | 
 
 | 
 
 | 
  
26
  
 | 
 
 | 
63
 | 
 	my ($self, $seq, $type) = @_;  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
237
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
108
 | 
 	my $name;  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $xaiHash;  | 
| 
239
 | 
26
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
251
 | 
 	if($type =~ /cai/i)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
241
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 		$name = 'CAI';  | 
| 
242
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
 		$xaiHash = $self->{"_cai_values"};  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}elsif($type =~ /tai/i)  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
245
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
 		$name = 'tAI';  | 
| 
246
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
 		$xaiHash = $self->{"_tai_values"};  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}else  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
249
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->throw("Unknown adaptation index type '$type'");  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
251
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
95
 | 
 	unless($xaiHash)  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
253
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->warn("$name values for codons were not provided for",  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		"this analyzer, so can not calculate $name for sequences");  | 
| 
255
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return undef;  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
163
 | 
 	my $codonList = $self->get_codon_list($seq) or return;  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
260
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
84
 | 
 	my $xai = 0;  | 
| 
261
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
 	my $seqLen = 0; # this excludes some unsuitable codons  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# get non-degenerative codons which are excluded in CAI  | 
| 
263
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
315
 | 
 	my %nonDegCodons = map { $_ => 1 } $self->codons_by_degeneracy(1);  | 
| 
 
 | 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
317
 | 
    | 
| 
264
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
140
 | 
 	my @senseCodons = $self->codon_table->all_sense_codons;  | 
| 
265
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
179
 | 
 	foreach my $codon (@senseCodons)  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
267
 | 
1586
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4002
 | 
 		next unless($codonList->{$codon}); # no observation of this codon  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# excluding non-degenerate codons for CAI calculation  | 
| 
269
 | 
1560
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
4702
 | 
 		next if($nonDegCodons{$codon} and $type =~ /cai/i);  | 
| 
270
 | 
1534
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3308
 | 
 		unless(exists $xaiHash->{$codon})  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
272
 | 
13
 | 
  
  0
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
258
 | 
 			$self->warn("Codon '$codon' is ignored")  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			if($self->debug and ($self->no_atg? ($codon ne 'ATG') : 1));  | 
| 
274
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
 			next;  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
276
 | 
1521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1840
 | 
 		my $cnt = $codonList->{$codon};  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# to overcome real number overflow, use log  | 
| 
278
 | 
1521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5596
 | 
 		$xai += $cnt*log($xaiHash->{$codon});  | 
| 
279
 | 
1521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3017
 | 
 		$seqLen += $cnt;  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
153
 | 
 	return undef unless($xai); # no codons with CAI/tAI  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
284
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
163
 | 
 	$xai = exp($xai/$seqLen);  | 
| 
285
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
770
 | 
 	return $xai;  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 fop  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : fop  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $fopValue = $self->fop($seq[,$withNonDegenerate]);  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: calculate the fraction of optimal codons in the sequence  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a number, or undef if failed  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : for sequence see L"sequence input">.  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  if optional argument '$withNonDegenerate' is true, then  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  non-degenerate codons (those do not have synonymous partners) are  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  included in calculation. Default is excluding these codons.  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub fop  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
303
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my ($self, $seq, $withNonDeg) = @_;  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $optimalCodons = $self->{'_optimal_codons'} or   | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$self->throw("No optimal codons associated with $self");  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $codonList = $self->get_codon_list($seq) or return;  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# get non-degenerate codons  | 
| 
310
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my %nonDegCodons = map { $_ => 1 } $self->codons_by_degeneracy(1);  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
313
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $optCnt = 0; # optimal codons  | 
| 
314
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $total  = 0;  | 
| 
315
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	while(my ($codon, $cnt) = each %$codonList)  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# excluding non-degenerate codons if necessary  | 
| 
318
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 		next if(!$withNonDeg and $nonDegCodons{$codon});  | 
| 
319
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$optCnt += $cnt if($optimalCodons->{$codon});  | 
| 
320
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$total += $cnt;  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	return $optCnt/($total || 1);  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 tai  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : tai  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $taiValue = $self->tai($seq);  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: calculate the tAI value for the sequence  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a number, or undef if failed  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : for sequence see L"sequence input">.  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Note: codons which do not have tAI values are ignored from input  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  sequence  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tai  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
341
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
  
1
  
 | 
118
 | 
 	my ($self, $seq) = @_;  | 
| 
342
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
 	$self->_xai($seq, 'tAI');  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # an alias  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tAI  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
348
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
 	my ($self, $seq) = @_;  | 
| 
349
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->_xai($seq, 'tAI');  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 enc  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : enc  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $encValue = $self->enc($seq,[$minTotal]);  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: calculate ENC for the sequence using the original method   | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  I  | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a number, or undef if failed  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : for sequence see L"sequence input">.  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Optional argument I specifies minimal count   | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  for an amino acid; if observed count is smaller than this count, this  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  amino acid's F will not be calculated but inferred. Deafult is 5.  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Note: when the F of a redundancy group is unavailable due to lack of  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  sufficient data, it will be estimated from other groups following Wright's  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  method, that is, F3=(F2+F4)/2, and for others, F=1/r where r is the  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  degeneracy degree of that group.  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub enc  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
372
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
  
1
  
 | 
107
 | 
 	my ($self, $seq, $minTotal) = @_;  | 
| 
373
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
 	$self->_enc_factory($seq, $minTotal, 'mean');  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 enc_r  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : enc_r  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $encValue = $self->enc_r($seq,[$minTotal]);  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: similar to the method L, except that missing F values  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  are estimated in a different way.  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a number, or undef if failed  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : for sequence see L"sequence input">.  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Optional argument I specifies minimal count   | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  for an amino acid; if observed count is smaller than this count, this  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  amino acid's F will not be calculated but inferred. Deafult is 5.  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Note: for missing Fx of degeneracy class 'x', we first estimated the  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  ratio (1/Fx-1)/(x-1) by averaging the ratios of other degeneracy  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  classes with known F values. Then Fx is obtained by solving the simple  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  equation.  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub enc_r  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
397
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
  
1
  
 | 
169
 | 
 	my ($self, $seq, $minTotal) = @_;  | 
| 
398
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 	$self->_enc_factory($seq, $minTotal, 'equal_ratio');  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 encp  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : encp  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $encpValue = $self->encp($seq,[$minTotal,[$A,$T,$C,$G]]);  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: calculate ENC for the sequence using the updated method   | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  by Novembre I<2002, MBE>, which corrects the  background nucleotide   | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  composition.  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a number, or undef if failed  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : for sequence see L"sequence input">.  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Optional argument I specifies minimal count   | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  for an amino acid; if observed count is smaller than this count, this  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  amino acid's F will not be calculated but inferred. Deafult is 5.  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  another optional argument gives the background nucleotide composition  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  in the order of A,T,C,G in an array, if not provided, it will use the  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  default one provided when calling the method L. If stil  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  unavailable, error occurs.  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub encp  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
424
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my ($self, $seq, $minTotal, $baseComp) = @_;  | 
| 
425
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->_enc_factory($seq, $minTotal, 'mean', 1, $baseComp);  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 encp_r  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : encp_r  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $encpValue =  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  $self->encp_r($seq,[$minTotal,[$A,$T,$C,$G]]);  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: similar to the method L, except that missing F values  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  are estimated using a different way.  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a number, or undef if failed  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : for sequence see L"sequence input">.  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Optional argument I specifies minimal count   | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  for an amino acid; if observed count is smaller than this count, this  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  amino acid's F will not be calculated but inferred. Deafult is 5.  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  another optional argument gives the background nucleotide composition  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  in the order of A,T,C,G in an array, if not provided, it will use the  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  default one provided when calling the method L. If stil  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  unavailable, error occurs.  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Note: for missing Fx of degeneracy class 'x', we first estimated the  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  ratio (1/Fx-1)/(x-1) by averaging the ratios of other degeneracy  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  classes with known F values. Then Fx is obtained by solving the simple  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  equation.  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub encp_r  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
456
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my ($self, $seq, $minTotal, $baseComp) = @_;  | 
| 
457
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$self->_enc_factory($seq, $minTotal, 'equal_ratio', 1, $baseComp);  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # real function calculate different versions of ENC  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # parameters explanation  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # seq: sequence string, sequence object, sequence file, or hash  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # reference to codon list  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # correctBaseComp: if true, correct background base composition using  | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Novembre's method  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # F_EstimateMethod: how to estimate average F for a certain redundancy  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # class if that class does not have observed data so can't be  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # calculated; 'mean' is for Wright's method, and 'equal_ratio' for  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Zhenguo's method. The latter assumes a similar (1/F[r])/r for each  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # redundancy class with redundancy degree 'r'  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # baseComposition: optional, a reference to an array containing  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # background nucleotide composition. If provided, it overides the  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # values set when method L was called.  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _enc_factory  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
476
 | 
26
 | 
 
 | 
 
 | 
  
26
  
 | 
 
 | 
62
 | 
 	my ($self, $seq, $minTotal, $F_EstimateMethod, $correctBaseComp, $baseComposition) = @_;  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
478
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
112
 | 
 	$minTotal = 5 unless(defined $minTotal); # the minumum count of residule for a given amino  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# acid for it to be included in F calculation  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# a hash ref, codon => counts  | 
| 
482
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
142
 | 
 	my $codonList = $self->get_codon_list($seq) or return;  | 
| 
483
 | 
26
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
647
 | 
 	my $seqId = (ref($seq) and $seq->can('id'))? $seq->id : '';  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# determine expected codon frequency if necessary  | 
| 
486
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
77
 | 
 	my $expectedCodonFreq;  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# determine base compositions now  | 
| 
488
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
94
 | 
 	if($correctBaseComp)  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
490
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if(!defined($baseComposition)) # not provided for this sequence  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
492
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $defaultBaseComp = $self->base_composition;  | 
| 
493
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			unless($defaultBaseComp)  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
495
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->warn("No default base composition for seq",  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				" '$seqId', so no GC-corrected ENC");  | 
| 
497
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				return undef;  | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
499
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if($defaultBaseComp eq 'seq')  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
501
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$baseComposition =  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$self->estimate_base_composition($codonList);  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}elsif($defaultBaseComp eq 'seq3')  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
505
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$baseComposition =  | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$self->estimate_base_composition($codonList,3);  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}else  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
509
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$baseComposition = $defaultBaseComp;  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} # otherwise sequence-specific base-composition is provided  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# here  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# codon frequency may not be estimated due to invalid  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# compositions  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$expectedCodonFreq =  | 
| 
517
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->expect_codon_freq($baseComposition)   | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if($baseComposition);  | 
| 
519
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return undef unless($expectedCodonFreq);  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# now let us calculate F for each redundancy class  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# determined by codon table, containing all amino acid classes  | 
| 
525
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
200
 | 
 	my $AARedundancyClasses = $self->aa_degeneracy_classes; #  | 
| 
526
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 	my %FavgByClass; # record the average F from each class  | 
| 
527
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
247
 | 
 	while(my ($redundancy, $AAHash) = each %$AARedundancyClasses)  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# number of observed AA types in this class  | 
| 
530
 | 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
168
 | 
 		my $numAAInClass = 0; # number of amino acid species in this class  | 
| 
531
 | 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
157
 | 
 		my $Fsum = 0;  | 
| 
532
 | 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
421
 | 
 		while(my ($AA, $codonArray) = each %$AAHash)  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
534
 | 
494
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1095
 | 
 			if($redundancy == 1) # this class has only one codon  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
536
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3570
 | 
 				$numAAInClass = scalar(keys %$AAHash);  | 
| 
537
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
 				$Fsum = $numAAInClass; # each AA contribute 1  | 
| 
538
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
 				last;  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# total count of observed residules for this AA  | 
| 
541
 | 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
541
 | 
 			my $AAcnt = 0;   | 
| 
542
 | 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
847
 | 
 			foreach (@$codonArray)  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# check the codon exists in this seq  | 
| 
545
 | 
1534
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3166
 | 
 				next unless(exists $codonList->{$_});  | 
| 
546
 | 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2844
 | 
 				$AAcnt += $codonList->{$_};  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# skip if occurence of this amino acid is less than the  | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# minimal threshold  | 
| 
551
 | 
468
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
3486
 | 
 			next if($AAcnt < $minTotal or $AAcnt < 2);  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# now calculate F for this AA species  | 
| 
554
 | 
464
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
741
 | 
 			if($correctBaseComp) # correct base composition  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
556
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				my $chisq = 0;  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# get the freq of codons of this amino acids  | 
| 
558
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				my $totalFreq = 0;  | 
| 
559
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				foreach (@$codonArray)  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				{  | 
| 
561
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					$totalFreq += $expectedCodonFreq->{$_};  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
563
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				foreach (@$codonArray)  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				{  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					# set unobserved codons to 0  | 
| 
566
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 					my $codonCnt = $codonList->{$_} || 0;  | 
| 
567
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					my $expectedFreq =  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					$expectedCodonFreq->{$_}/$totalFreq;  | 
| 
569
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					$chisq += ($codonCnt/$AAcnt -  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						$expectedFreq)**2/$expectedFreq;  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
572
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$chisq *= $AAcnt; # don't forget multiply this  | 
| 
573
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$Fsum += ($chisq + $AAcnt -  | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					$redundancy)/($redundancy*($AAcnt-1));  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}else # no correction, use old Wright method  | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
577
 | 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
721
 | 
 				my $pSquareSum = 0;  | 
| 
578
 | 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
951
 | 
 				foreach (@$codonArray)  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				{  | 
| 
580
 | 
1526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2238
 | 
 					my $codonCnt = $codonList->{$_};  | 
| 
581
 | 
1526
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3230
 | 
 					next unless($codonCnt);  | 
| 
582
 | 
1504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3302
 | 
 					$pSquareSum += ($codonCnt/$AAcnt)**2;  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
584
 | 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1234
 | 
 				$Fsum += ($AAcnt*$pSquareSum -1)/($AAcnt-1);  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# increase the number of AA species in this class  | 
| 
587
 | 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1958
 | 
 			$numAAInClass++;  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# check whether all AA species are ignored or not observed  | 
| 
590
 | 
130
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
387
 | 
 		if($numAAInClass > 0)  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# note, in some special cases, Fsum == 0 even though  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# $numAAInClass >0, for example for a 6-fold amino acid,  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# if each of its codon is observed only once, it would  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# result in Faa = 0. so we need add restriction on this  | 
| 
596
 | 
130
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
877
 | 
 			$FavgByClass{$redundancy} = $Fsum/$numAAInClass if($Fsum >  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				0);  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} # otherwise no data  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# estimate missing redundancy classes due to no observation of  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# that class's AAs, and get the final Nc values  | 
| 
603
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
 	my $enc = 0;  | 
| 
604
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141
 | 
 	while(my ($redundancy, $AAHash) = each %$AARedundancyClasses)  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# the number of AA species in this class, determined by the  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# codon table, not the input seq  | 
| 
608
 | 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
169
 | 
 		my $AAcntInClass = scalar(keys %$AAHash);  | 
| 
609
 | 
130
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
278
 | 
 		if(exists $FavgByClass{$redundancy})  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
611
 | 
130
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
279
 | 
 			die "$redundancy, $AAcntInClass in seq '$seqId':$!"  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			unless($FavgByClass{$redundancy});  | 
| 
613
 | 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
300
 | 
 			$enc += $AAcntInClass/$FavgByClass{$redundancy};  | 
| 
614
 | 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
420
 | 
 			next;  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# otherwise this class was not observed  | 
| 
618
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $equalRatio = $F_EstimateMethod eq 'mean'? 0 : 1;  | 
| 
619
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $estimatedFavg = _estimate_F(\%FavgByClass, $redundancy,  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$equalRatio);  | 
| 
621
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		unless($estimatedFavg)  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
623
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->warn("Cannot estimate average F for class with",  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				"redundancy=$redundancy in sequence $seqId, ",   | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				"probably no known F values for any class");  | 
| 
626
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			return undef;  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
628
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$enc +=  $AAcntInClass/$estimatedFavg;  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
631
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
680
 | 
 	return $enc;  | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # estimate F average  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _estimate_F  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
637
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 	my ($knownF,$redundancy,$equalRatio) = @_;  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
639
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return 1 if($redundancy == 1);  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
641
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if($equalRatio) # get the mean (1/Fr-1)/(r-1)  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
643
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		my $ratioSum;  | 
| 
644
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		my $cnt = 0; # number of known Fs  | 
| 
645
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		while(my ($r, $F) = each %$knownF)  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
647
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			next if $r < 2; # excluding class of redundancy==1  | 
| 
648
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$ratioSum += (1/$F-1)/($r-1);  | 
| 
649
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$cnt++;  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
652
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if( $cnt > 0)  | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
654
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			my $Fx = 1/($ratioSum/$cnt*($redundancy-1)+1);  | 
| 
655
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			return $Fx;  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}else # no known F for any class with redundancy > 1  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
658
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			return undef;  | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}else # otherwise use Wright's method  | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
663
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if($redundancy == 3)  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
665
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 			my $F2 = $knownF->{2} || 1/2; # class 2  | 
| 
666
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 			my $F4 = $knownF->{4} || 1/4; # class 4  | 
| 
667
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			return ($F2 + $F4)/2;  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}else  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
670
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			return 1/$redundancy; # assuming no bias  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # get the default base compostion of this object  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub base_composition  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
679
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
 	my $self = shift;  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
681
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->{'_base_comp'};  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 estimate_base_composition  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : estimate_base_composition  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : @baseComp = $self->estimate_base_composition($seq,[$pos])  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: estimate base compositions in the sequence  | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : an array of numbers in the order of A,T,C,G, or its  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  reference if in the scalar context  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : a sequence string or a reference of hash containing codons  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  and their counts (eg., AGG => 30), and optionally an integer; the integer  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  specifies which codon position's nucleotide will be used instead of  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  all three codon positions.  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub estimate_base_composition  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
700
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
 	my ($self, $seq, $pos) = @_;  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
702
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my %bases;  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check if input is a codon list  | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $codonList;  | 
| 
705
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if(ref($seq) eq 'HASH') # a codon list  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
707
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$codonList = $seq;  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}else # a sequence string or object  | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
710
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$seq = $self->_get_seq_str($seq);  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
713
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if($pos)  | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
715
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		$self->throw("Only 1, 2, or 3 are acceptable for pos,",  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			"'$pos' is not valid here") unless($pos > 0 and $pos < 4);  | 
| 
717
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if($codonList) # input is a codon list  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
719
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			my $base;  | 
| 
720
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			while(my ($codon, $cnt) = each %$codonList)  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
722
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$base = substr($codon, $pos-1,1);  | 
| 
723
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$bases{$base} += $cnt;  | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}else # a sequence  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
727
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			my $seqLen = length($seq);  | 
| 
728
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			my $accuLen = $pos - 1;  | 
| 
729
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			my $period = 3; # a codon length  | 
| 
730
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			my $base;  | 
| 
731
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			while($accuLen < $seqLen)  | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
733
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$base = substr($seq,$accuLen,1);  | 
| 
734
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$bases{$base}++;  | 
| 
735
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$accuLen += $period;  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}else # all nucleotides  | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
740
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if($codonList) # input is a codon list  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
742
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			while(my ($codon, $cnt) = each %$codonList)  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
744
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				map { $bases{$_} += $cnt } split('', $codon);  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}else  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
748
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			map { $bases{$_}++ } split('',$seq);  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
752
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $total = 0;  | 
| 
753
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my @comp;  | 
| 
754
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	foreach ($self->bases) # only consider A,T,C,G  | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
756
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		$total += $bases{$_} || 0;  | 
| 
757
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 		push @comp, $bases{$_} || 0;  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
759
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	@comp = map { $_/$total } @comp;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
761
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return wantarray? @comp : \@comp;  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 gc_fraction  | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : gc_fraction  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $frac = $self->gc_fraction($seq,[$pos])  | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: get fraction of GC content in the sequence  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : a floating number between 0 and 1.  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : a sequence string or a reference of hash containing codons  | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  and their counts (eg., AGG => 30), and optionally an integer; the integer  | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  specifies which codon position's nucleotide will be used for  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  calculation (i.e., 1, 2, or 3), instead of all three positions.  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub gc_frac  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
779
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
 
 | 
 	my ($self, @args) = @_;  | 
| 
780
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$self->gc_fraction(@args);  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub gc_fraction  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
785
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
 	my ($self, @args) = @_;  | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
787
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my @composition = $self->estimate_base_composition(@args);  | 
| 
788
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my @bases = $self->bases;  | 
| 
789
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my @indice = grep { $bases[$_] =~ /[GC]/ } 0..$#bases;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
791
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $frac = 0;  | 
| 
792
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	foreach (@indice)  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
794
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$frac += $composition[$_];  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
797
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $frac;  | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 expect_codon_freq  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Title   : expect_codon_freq  | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Usage   : $codonFreq = $self->expect_codon_freq($base_composition)  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Function: return the expected frequency of codons  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Returns : reference to a hash in which codon is hash key, and  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  fraction is hash value  | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  Args    : reference to an array of base compositions in the order of  | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  [A, T, C, G], represented as either counts or fractions  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub expect_codon_freq  | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
814
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
 
 | 
 	my ($self, $baseComp) = @_;  | 
| 
815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
816
 | 
0
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	unless($baseComp and ref($baseComp) eq 'ARRAY')  | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
818
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$self->warn("Invalid base composition '$baseComp'",  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		" for expect_codon_freq, which should be an array reference");  | 
| 
820
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return undef;  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
823
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my @bases = $self->bases;  | 
| 
824
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $compSum = 0; # used to normalize in case they are not summed to 1  | 
| 
825
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $zeroCnt = 0; # count of zero values  | 
| 
826
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	foreach (0..3)  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
828
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$zeroCnt++ if($baseComp->[$_] == 0);  | 
| 
829
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$compSum += $baseComp->[$_];  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# set zero value a pseudo count, depending on the provided values  | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# are fractions or counts  | 
| 
834
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $pseudoCnt = $compSum > 2? 1 : 1/100;  | 
| 
835
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$compSum += $pseudoCnt * $zeroCnt;  | 
| 
836
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 	my %freq = map { $bases[$_] => ($baseComp->[$_] || $pseudoCnt)/$compSum } 0..3;  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
837
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my %result;  | 
| 
838
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	foreach my $b1 (@bases)  | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
840
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		foreach my $b2 (@bases)  | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
842
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			foreach my $b3 (@bases)  | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			{  | 
| 
844
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				my $codon = $b1.$b2.$b3;  | 
| 
845
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$result{$codon} = $freq{$b1}*$freq{$b2}*$freq{$b3};  | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
850
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return \%result;  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Zhenguo Zhang, C<<  >>  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 BUGS  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Please report any bugs or feature requests to C, or through  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the web interface at L.  I will be notified, and then you'll  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 automatically be notified of progress on your bug as I make changes.  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SUPPORT  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can find documentation for this module with the perldoc command.  | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     perldoc Bio::CUA::CUB::Calculator  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can also look for information at:  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * RT: CPAN's request tracker (report bugs here)  | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * AnnoCPAN: Annotated CPAN documentation  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * CPAN Ratings  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * Search CPAN  | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 ACKNOWLEDGEMENTS  | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LICENSE AND COPYRIGHT  | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright 2015 Zhenguo Zhang.  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This program is free software: you can redistribute it and/or modify  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it under the terms of the GNU General Public License as published by  | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the Free Software Foundation, either version 3 of the License, or  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (at your option) any later version.  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This program is distributed in the hope that it will be useful,  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 GNU General Public License for more details.  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You should have received a copy of the GNU General Public License  | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 along with this program.  If not, see L.  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1; # End of Bio::CUA::CUB::Calculator  |