| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Bio::CUA::CUB::Builder; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | =pod | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 NAME | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | Bio::CUA::CUB::Builder -- A module to calculate codon usage bias (CUB) | 
| 8 |  |  |  |  |  |  | metrics at codon level and other parameters | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | use Bio::CUA::CUB::Builder; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | # initialize the builder | 
| 15 |  |  |  |  |  |  | my $builder = Bio::CUA::CUB::Builder->new( | 
| 16 |  |  |  |  |  |  | codon_table => 1 ); # using stardard genetic code | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | # calculate RSCU for each codon, and result is stored in "rscu.out" as | 
| 19 |  |  |  |  |  |  | # well as returned as a hash reference | 
| 20 |  |  |  |  |  |  | my $rscuHash = $builder->build_rscu("seqs.fa",undef, 0.5,"rscu.out"); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | # calculate CAI for each codon, normalizing RSCU values of codons | 
| 23 |  |  |  |  |  |  | # for each amino acid by the expected RSCUs under even usage, | 
| 24 |  |  |  |  |  |  | # rather than the maximal RSCU used by the traditional CAI method. | 
| 25 |  |  |  |  |  |  | my $caiHash = $builder->build_cai($codonList,2,'mean',"cai.out"); | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | # calculate tAI for each codon | 
| 28 |  |  |  |  |  |  | my $taiHash = $builder->build_tai("tRNA_copy_number.txt","tai.out", undef, 1); | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | Codon usage bias (CUB) can be represented at two levels, codon and | 
| 33 |  |  |  |  |  |  | sequence. The latter is often computed as the geometric means of the | 
| 34 |  |  |  |  |  |  | sequence's codons. This module caculates CUB metrics at codon level. | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | Supported CUB metrics include CAI (codon adaptation index), tAI (tRNA | 
| 37 |  |  |  |  |  |  | adaptation index), RSCU (relative synonymous codon usage), and their | 
| 38 |  |  |  |  |  |  | variants. See the methods below for details. | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | The output can be stored in a file which is then used by methods in | 
| 41 |  |  |  |  |  |  | L to calculate CUB indice for each | 
| 42 |  |  |  |  |  |  | protein-coding sequence. | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | =cut | 
| 45 |  |  |  |  |  |  |  | 
| 46 | 2 |  |  | 2 |  | 18419 | use 5.006; | 
|  | 2 |  |  |  |  | 7 |  | 
|  | 2 |  |  |  |  | 76 |  | 
| 47 | 2 |  |  | 2 |  | 10 | use strict; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 60 |  | 
| 48 | 2 |  |  | 2 |  | 8 | use warnings; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 61 |  | 
| 49 | 2 |  |  | 2 |  | 490 | use parent qw/Bio::CUA::CUB/; | 
|  | 2 |  |  |  |  | 297 |  | 
|  | 2 |  |  |  |  | 12 |  | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | # paired codon bases for each anticodon base at wobble position. | 
| 52 |  |  |  |  |  |  | # According to Crick, FH, 1966, JMB | 
| 53 |  |  |  |  |  |  | my %wobbleBasePairsAnti = ( | 
| 54 |  |  |  |  |  |  | #Anti => Codon at 3rd position | 
| 55 |  |  |  |  |  |  | A => [qw/T/], | 
| 56 |  |  |  |  |  |  | C => [qw/G/], | 
| 57 |  |  |  |  |  |  | T => [qw/A G/], | 
| 58 |  |  |  |  |  |  | G => [qw/C T/], | 
| 59 |  |  |  |  |  |  | I => [qw/A C T/] | 
| 60 |  |  |  |  |  |  | ); | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | # get the version using codon's position as key | 
| 63 |  |  |  |  |  |  | sub _build_pairing_anti_base | 
| 64 |  |  |  |  |  |  | { | 
| 65 | 1 |  |  | 1 |  | 2 | my $self = shift; | 
| 66 | 0 |  |  |  |  | 0 | my %wobbleAntiKey = $self->get_tag('anti_wobble_pair')? | 
| 67 | 1 | 50 |  |  |  | 7 | %{$self->get_tag('anti_wobble_pair')} : %wobbleBasePairsAnti; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 1 | 50 |  |  |  | 5 | if($self->_a_to_i) | 
| 70 |  |  |  |  |  |  | { | 
| 71 | 1 |  |  |  |  | 3 | $wobbleAntiKey{'A'} = $wobbleAntiKey{'I'}; | 
| 72 |  |  |  |  |  |  | } | 
| 73 | 1 |  |  |  |  | 2 | my %wobbleBasePairs; | 
| 74 | 1 |  |  |  |  | 6 | while(my ($antiPos, $codonMatches) = each %wobbleAntiKey) | 
| 75 |  |  |  |  |  |  | { | 
| 76 | 5 |  |  |  |  | 9 | foreach (@$codonMatches) | 
| 77 |  |  |  |  |  |  | { | 
| 78 | 11 |  |  |  |  | 10 | push @{$wobbleBasePairs{$_}}, $antiPos; | 
|  | 11 |  |  |  |  | 46 |  | 
| 79 |  |  |  |  |  |  | } | 
| 80 |  |  |  |  |  |  | } | 
| 81 | 1 |  |  |  |  | 5 | $self->{'_wobble_pair'} = \%wobbleBasePairs; | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | #default codon selective constraints from dos Reis, et al., 2004 NAR. | 
| 85 |  |  |  |  |  |  | # key is anticodon-codon at wobbling site | 
| 86 |  |  |  |  |  |  | my %defaultSC = ( | 
| 87 |  |  |  |  |  |  | 'I-T'	=> 0, # 1 | 
| 88 |  |  |  |  |  |  | 'A-T'	=> 0, | 
| 89 |  |  |  |  |  |  | 'G-C'	=> 0, | 
| 90 |  |  |  |  |  |  | 'T-A'	=> 0, | 
| 91 |  |  |  |  |  |  | 'C-G'	=> 0, # 5 | 
| 92 |  |  |  |  |  |  | 'G-T'	=> 0.41, | 
| 93 |  |  |  |  |  |  | 'I-C'	=> 0.28, | 
| 94 |  |  |  |  |  |  | 'I-A'	=> 0.9999, | 
| 95 |  |  |  |  |  |  | 'A-C'	=> 0.28,   # for cases when A is | 
| 96 |  |  |  |  |  |  | 'A-A'	=> 0.9999, # regarded as I | 
| 97 |  |  |  |  |  |  | 'T-G'	=> 0.68, | 
| 98 |  |  |  |  |  |  | 'L-A'	=> 0.89  # 10, L=lysidine present in prokaryotes | 
| 99 |  |  |  |  |  |  | ); | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | =head1 METHODS | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | =head2 new | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | Title   : new | 
| 106 |  |  |  |  |  |  | Usage   : $analyzer = Bio::CUA::CUB::Builder->new(-codon_table => 1) | 
| 107 |  |  |  |  |  |  | Function: initiate the analyzer | 
| 108 |  |  |  |  |  |  | Returns : an object | 
| 109 |  |  |  |  |  |  | Args    : accepted options are as follows | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | B | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | =over | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | =item C<-codon_table> | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | the genetic code table applied for following sequence analyses. It can | 
| 118 |  |  |  |  |  |  | be specified by an integer (genetic code table id), an object of | 
| 119 |  |  |  |  |  |  | L, or a map-file. See the method | 
| 120 |  |  |  |  |  |  | L for details. | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | =back | 
| 123 |  |  |  |  |  |  |  | 
| 124 |  |  |  |  |  |  | B | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | =over | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | =item C<-a_to_i> | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | a switch option. If true (any nonzero values), all | 
| 131 |  |  |  |  |  |  | 'A' nucleotides at the 1st position of anticodon will be regarded as I | 
| 132 |  |  |  |  |  |  | (inosine) which can pair with more nucleotides at codons's wobbling | 
| 133 |  |  |  |  |  |  | position (A,T,C at the 3rd position). The default is true. | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | =item C<-no_atg> | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | a switch option to indicate whether ATG codons should be | 
| 138 |  |  |  |  |  |  | excluded in tAI calculation. Default is true, following I | 
| 139 |  |  |  |  |  |  | et al., 2004, NAR>. To include ATG in tAI calculation, provide '0' here. | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =item C<-wobble> | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | reference to a hash containing anticodon-codon basepairs at | 
| 144 |  |  |  |  |  |  | wobbling position, such as ('U' is equivalent to 'T') | 
| 145 |  |  |  |  |  |  | %wobblePairs = ( | 
| 146 |  |  |  |  |  |  | A => [qw/T/], | 
| 147 |  |  |  |  |  |  | C => [qw/G/], | 
| 148 |  |  |  |  |  |  | T => [qw/A G/], | 
| 149 |  |  |  |  |  |  | G => [qw/C T/], | 
| 150 |  |  |  |  |  |  | I => [qw/A C T/] | 
| 151 |  |  |  |  |  |  | ); # this is the default setting | 
| 152 |  |  |  |  |  |  | Hash keys are the bases in anticodons and hash values are paired | 
| 153 |  |  |  |  |  |  | bases in codons's 3rd positions. This option is optional and default | 
| 154 |  |  |  |  |  |  | value is shown above by the example. | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | =back | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | =cut | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | sub new | 
| 161 |  |  |  |  |  |  | { | 
| 162 | 1 |  |  | 1 | 1 | 4640 | my ($caller, @args) = @_; | 
| 163 | 1 | 50 |  |  |  | 5 | my $class = ref($caller)? ref($caller) : $caller; | 
| 164 | 1 |  |  |  |  | 13 | my $self = $class->SUPER::new(@args); | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 1 |  |  |  |  | 5 | my $hashRef = $self->_array_to_hash(\@args); | 
| 167 | 1 | 50 |  |  |  | 6 | if(exists $hashRef->{'a_to_i'}) | 
| 168 |  |  |  |  |  |  | { | 
| 169 | 0 |  |  |  |  | 0 | $self->set_tag('a2i',$hashRef->{'a_to_i'}); | 
| 170 |  |  |  |  |  |  | }else | 
| 171 |  |  |  |  |  |  | { | 
| 172 | 1 |  |  |  |  | 27 | $self->set_tag('a2i', 1); # true, default | 
| 173 |  |  |  |  |  |  | } | 
| 174 | 1 | 50 |  |  |  | 3 | if(exists $hashRef->{'no_atg'}) | 
| 175 |  |  |  |  |  |  | { | 
| 176 | 0 |  |  |  |  | 0 | $self->set_tag('no_atg', $hashRef->{'no_atg'}); | 
| 177 |  |  |  |  |  |  | }else | 
| 178 |  |  |  |  |  |  | { | 
| 179 | 1 |  |  |  |  | 3 | $self->set_tag('no_atg',1); # default is true | 
| 180 |  |  |  |  |  |  | } | 
| 181 | 1 | 50 |  |  |  | 3 | if(exists $hashRef->{'wobble'}) | 
| 182 |  |  |  |  |  |  | { | 
| 183 | 0 |  |  |  |  | 0 | $self->set_tag('anti_wobble_pair', $hashRef->{'wobble'}); | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  |  | 
| 186 | 1 |  |  |  |  | 3 | $self->_build_pairing_anti_base; # make wobble pairing hash | 
| 187 | 1 |  |  |  |  | 6 | return $self; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | # indicator whether A should be regarded as I | 
| 191 |  |  |  |  |  |  | sub _a_to_i | 
| 192 |  |  |  |  |  |  | { | 
| 193 | 1 |  |  | 1 |  | 2 | my $self = shift; | 
| 194 | 1 |  |  |  |  | 3 | $self->get_tag('a2i'); | 
| 195 |  |  |  |  |  |  | } | 
| 196 |  |  |  |  |  |  |  | 
| 197 |  |  |  |  |  |  | =head2 no_atg | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | Title   : no_atg | 
| 200 |  |  |  |  |  |  | Usage   : $status = $self->no_atg([$newVal]) | 
| 201 |  |  |  |  |  |  | Function: get/set the status whether ATG should be excluded in tAI | 
| 202 |  |  |  |  |  |  | calculation. | 
| 203 |  |  |  |  |  |  | Returns : current status after updating | 
| 204 |  |  |  |  |  |  | Args    : optional. 1 for true, 0 for false | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | =cut | 
| 207 |  |  |  |  |  |  | # implement in parent class Bio::CUA | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | #=head2 detect_optimal_codons | 
| 210 |  |  |  |  |  |  | # | 
| 211 |  |  |  |  |  |  | # Title   : detect_optimal_codons | 
| 212 |  |  |  |  |  |  | # Usage   : $ok = $self->detect_optimal_codons(); | 
| 213 |  |  |  |  |  |  | # Function: | 
| 214 |  |  |  |  |  |  | # Returns : | 
| 215 |  |  |  |  |  |  | # Args    : | 
| 216 |  |  |  |  |  |  | # | 
| 217 |  |  |  |  |  |  | #=cut | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | sub detect_optimal_codons | 
| 220 |  |  |  |  |  |  | { | 
| 221 | 0 |  |  | 0 | 0 | 0 | die "detect_optimal_codons has not implemented, yet.$!"; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | =head2 build_rscu | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | Title   : build_rscu | 
| 227 |  |  |  |  |  |  | Usage   : $ok = $self->build_rscu($input,[$minTotal,$pseudoCnt,$output]); | 
| 228 |  |  |  |  |  |  | Function: calculate RSCU values for all sense codons | 
| 229 |  |  |  |  |  |  | Returns : reference of a hash using the format 'codon => RSCU value'. | 
| 230 |  |  |  |  |  |  | return undef if failed. | 
| 231 |  |  |  |  |  |  | Args    : accepted arguments are as follows (note: not as hash): | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | =over | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | =item C | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | name of a file containing fasta CDS sequences of interested | 
| 238 |  |  |  |  |  |  | genes, or a sequence object with method I to extract sequence | 
| 239 |  |  |  |  |  |  | string, or a plain sequence string, or reference to a hash containing | 
| 240 |  |  |  |  |  |  | codon counts with structure like I<{ AGC => 50, GTC => 124}>. | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | =item C | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | optional, name of the file to store the result. If omitted, | 
| 245 |  |  |  |  |  |  | no result will be written. | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | =item C | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | optional, minimal count of an amino acid in sequences; if observed | 
| 250 |  |  |  |  |  |  | count is smaller than this minimum, all codons of this amino acid would | 
| 251 |  |  |  |  |  |  | be assigned equal RSCU values. This is to reduce sampling errors in | 
| 252 |  |  |  |  |  |  | rarely observed amino acids. Default value is 5. | 
| 253 |  |  |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | =item C | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | optional. Pseudo-counts for unobserved codons. Default is 0.5. | 
| 257 |  |  |  |  |  |  |  | 
| 258 |  |  |  |  |  |  | =back | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | =cut | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | sub build_rscu | 
| 263 |  |  |  |  |  |  | { | 
| 264 | 5 |  |  | 5 | 1 | 19 | my ($self, $input, $minTotal, $pseudoCnt, $output) = @_; | 
| 265 |  |  |  |  |  |  |  | 
| 266 | 5 | 50 | 33 |  |  | 49 | $pseudoCnt = 0.5 unless($pseudoCnt and $pseudoCnt > 0); | 
| 267 | 5 | 50 | 33 |  |  | 27 | $minTotal = 5 unless($minTotal and $minTotal > 0); | 
| 268 | 5 | 50 |  |  |  | 30 | my $codonList = $self->get_codon_list($input) or return; | 
| 269 |  |  |  |  |  |  |  | 
| 270 | 5 |  |  |  |  | 39 | my @allAAs = $self->all_AAs_in_table; # get all the amino acids in the codon table | 
| 271 | 5 |  |  |  |  | 12 | my %rscu; | 
| 272 | 5 |  |  |  |  | 14 | foreach my $AA (@allAAs) | 
| 273 |  |  |  |  |  |  | { | 
| 274 |  |  |  |  |  |  | # get the codons encoding this AA | 
| 275 | 100 |  |  |  |  | 341 | my @codons = $self->codons_of_AA($AA); | 
| 276 | 100 |  |  |  |  | 145 | my $cntSum = 0; # total observations of this AA's codons | 
| 277 | 100 |  |  |  |  | 99 | my $zeroCnt = 0; # number of codons with zero values | 
| 278 | 100 |  |  |  |  | 186 | foreach (@codons) | 
| 279 |  |  |  |  |  |  | { | 
| 280 | 305 | 50 | 0 |  |  | 594 | ++$zeroCnt and next unless($codonList->{$_}); | 
| 281 | 305 |  |  |  |  | 498 | $cntSum += $codonList->{$_}; | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  | # get the rscu values | 
| 284 | 100 | 50 |  |  |  | 209 | if($cntSum < $minTotal) # too small sample | 
| 285 |  |  |  |  |  |  | { | 
| 286 |  |  |  |  |  |  | # assign equal usage to all codons of this amino acid | 
| 287 | 0 |  |  |  |  | 0 | foreach (@codons) | 
| 288 |  |  |  |  |  |  | { | 
| 289 | 0 |  |  |  |  | 0 | $rscu{$_} = 1/($#codons+1); | 
| 290 |  |  |  |  |  |  | } | 
| 291 |  |  |  |  |  |  | }else | 
| 292 |  |  |  |  |  |  | { | 
| 293 |  |  |  |  |  |  | # add the pseudoCnt correction | 
| 294 | 100 |  |  |  |  | 169 | $cntSum += $zeroCnt*$pseudoCnt; | 
| 295 | 100 |  |  |  |  | 136 | foreach (@codons) | 
| 296 |  |  |  |  |  |  | { | 
| 297 | 305 |  | 33 |  |  | 1419 | $rscu{$_} = ($codonList->{$_} || $pseudoCnt)/($cntSum || 1); | 
|  |  |  | 50 |  |  |  |  | 
| 298 |  |  |  |  |  |  | } | 
| 299 |  |  |  |  |  |  | } | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  |  | 
| 302 | 5 | 50 |  |  |  | 19 | $self->_write_out_hash($output, \%rscu) if($output); | 
| 303 | 5 |  |  |  |  | 72 | return \%rscu; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | =head2 build_cai | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | Title   : build_cai | 
| 309 |  |  |  |  |  |  | Usage   : $ok = $self->build_cai($input,[$minTotal,$norm_method,$output]); | 
| 310 |  |  |  |  |  |  | Function: calculate CAI values for all sense codons | 
| 311 |  |  |  |  |  |  | Returns : reference of a hash in which codons are keys and CAI values | 
| 312 |  |  |  |  |  |  | are values. return undef if failed. | 
| 313 |  |  |  |  |  |  | Args    : accepted arguments are as follows: | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | =over | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | =item C | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | name of a file containing fasta CDS sequences of interested | 
| 320 |  |  |  |  |  |  | genes, or a sequence object with metho I to derive sequence | 
| 321 |  |  |  |  |  |  | string, or a plain sequence string, or reference to a hash containing | 
| 322 |  |  |  |  |  |  | codon list with structure like I<{ AGC => 50, GTC => 124}>. | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | =item C | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | optional, minimal codon count for an amino acid; if observed | 
| 327 |  |  |  |  |  |  | count is smaller than this count, all codons of this amino acid would | 
| 328 |  |  |  |  |  |  | be assigned equal CAI values. This is to reduce sampling errors in | 
| 329 |  |  |  |  |  |  | rarely observed amino acids. Default value is 5. | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | =item C | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | optional, indicating how to normalize RSCU to get CAI | 
| 334 |  |  |  |  |  |  | values. Valid values are 'max' and 'mean'; the former represents the | 
| 335 |  |  |  |  |  |  | original method used by I, i.e., dividing | 
| 336 |  |  |  |  |  |  | all RSCUs by the maximum of an amino acid, while 'mean' indicates | 
| 337 |  |  |  |  |  |  | dividing RSCU by expected average fraction assuming even usage of | 
| 338 |  |  |  |  |  |  | all codons, i.e., 0.5 for amino acids encoded by 2 codons, 0.25 for | 
| 339 |  |  |  |  |  |  | amino acids encoded by 4 codons, etc. The latter method is able to | 
| 340 |  |  |  |  |  |  | give different CAI values for the most preferred codons of different | 
| 341 |  |  |  |  |  |  | amino acids, which otherwise would be the same (i.e., 1). | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | =item C | 
| 344 |  |  |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | optional. If provided, result will be stored in the file | 
| 346 |  |  |  |  |  |  | specified by this argument. | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  | =back | 
| 349 |  |  |  |  |  |  |  | 
| 350 |  |  |  |  |  |  | Note: for codons which are not observed will be assigned a count of | 
| 351 |  |  |  |  |  |  | 0.5, and codons which are not degenerate (such as AUG and UGG in | 
| 352 |  |  |  |  |  |  | standard genetic code table) are excluded. These are the default of | 
| 353 |  |  |  |  |  |  | the paper I. Here you can also reduce | 
| 354 |  |  |  |  |  |  | sampling error by setting parameter $minTotal. | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | =cut | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | sub build_cai | 
| 359 |  |  |  |  |  |  | { | 
| 360 | 2 |  |  | 2 | 1 | 16 | my ($self, $input, $minTotal, $norm, $output) = @_; | 
| 361 |  |  |  |  |  |  |  | 
| 362 | 2 | 50 |  |  |  | 8 | $minTotal = 5 unless(defined $minTotal); | 
| 363 |  |  |  |  |  |  | # get RSCU values first | 
| 364 | 2 |  |  |  |  | 8 | my $rscuHash = $self->build_rscu($input,$minTotal,0.5); | 
| 365 |  |  |  |  |  |  |  | 
| 366 | 2 |  |  |  |  | 10 | my @AAs = $self->all_AAs_in_table; | 
| 367 |  |  |  |  |  |  |  | 
| 368 | 2 |  |  |  |  | 6 | my %cai; | 
| 369 | 2 |  |  |  |  | 5 | my $maxCAI = 0; # the maximum value of CAI in this dataset | 
| 370 | 2 |  |  |  |  | 6 | foreach my $AA (@AAs) | 
| 371 |  |  |  |  |  |  | { | 
| 372 | 40 |  |  |  |  | 131 | my @codons = $self->codons_of_AA($AA); | 
| 373 |  |  |  |  |  |  | # skip non-degenerate codons | 
| 374 | 40 | 100 |  |  |  | 112 | next unless($#codons > 0); | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | # determine the factor to normalize the values | 
| 377 | 36 |  |  |  |  | 44 | my $scaleFactor; | 
| 378 | 36 | 100 | 66 |  |  | 212 | if($norm and $norm =~ /mean/i) # normalized by average | 
| 379 |  |  |  |  |  |  | { | 
| 380 | 18 |  |  |  |  | 27 | $scaleFactor = $#codons + 1; | 
| 381 |  |  |  |  |  |  | }else # old method, by maximum | 
| 382 |  |  |  |  |  |  | { | 
| 383 |  |  |  |  |  |  | # get the maximum RSCU value for this codon | 
| 384 | 18 |  |  |  |  | 24 | my $maxRSCU = 0; | 
| 385 | 18 |  |  |  |  | 36 | foreach (@codons) | 
| 386 |  |  |  |  |  |  | { | 
| 387 | 59 |  |  |  |  | 80 | my $rscu = $rscuHash->{$_}; | 
| 388 | 59 | 100 |  |  |  | 184 | $maxRSCU = $rscu if($maxRSCU < $rscu); | 
| 389 |  |  |  |  |  |  | } | 
| 390 | 18 | 50 |  |  |  | 58 | $scaleFactor = $maxRSCU > 0? 1/$maxRSCU : 0; | 
| 391 |  |  |  |  |  |  | } | 
| 392 |  |  |  |  |  |  | # get CAI values now | 
| 393 | 36 |  |  |  |  | 64 | foreach (@codons) | 
| 394 |  |  |  |  |  |  | { | 
| 395 | 118 |  |  |  |  | 144 | my $rscu = $rscuHash->{$_}; | 
| 396 | 118 |  |  |  |  | 195 | $cai{$_} = $rscu*$scaleFactor; | 
| 397 |  |  |  |  |  |  | # global maximum of CAI | 
| 398 | 118 | 100 |  |  |  | 360 | $maxCAI = $cai{$_} if($cai{$_} > $maxCAI); | 
| 399 |  |  |  |  |  |  | } | 
| 400 |  |  |  |  |  |  | } | 
| 401 |  |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  | #*********************** | 
| 403 |  |  |  |  |  |  | # further normalize all CAI by the global maximal CAI, like tAI, | 
| 404 |  |  |  |  |  |  | # but one can opt out this, because without normalize by maxCAI | 
| 405 |  |  |  |  |  |  | # one can distinguish genes more often use less-preferred codons. | 
| 406 |  |  |  |  |  |  | # In this way, value 1 means no bias, > 1 means towards preferred | 
| 407 |  |  |  |  |  |  | # codons while < 1 means towards non-preferred codons | 
| 408 | 2 | 100 | 66 |  |  | 52 | map { $cai{$_} /= $maxCAI } keys(%cai) | 
|  | 59 |  | 66 |  |  | 74 |  | 
| 409 |  |  |  |  |  |  | if($norm and $norm =~ /mean/i and $maxCAI); | 
| 410 |  |  |  |  |  |  |  | 
| 411 | 2 | 100 |  |  |  | 28 | $self->_write_out_hash($output, \%cai) if($output); | 
| 412 | 2 |  |  |  |  | 31 | return \%cai; | 
| 413 |  |  |  |  |  |  | } | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | =head2 build_b_cai | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | Title   : build_b_cai | 
| 418 |  |  |  |  |  |  | Usage   : $caiHash = | 
| 419 |  |  |  |  |  |  | $self->build_b_cai($input,$background,[$minTotal,$output]); | 
| 420 |  |  |  |  |  |  | Function: calculate CAI values for all sense codons. Instead of | 
| 421 |  |  |  |  |  |  | normalizing RSCUs by maximal RSCU or expected fractions, each RSCU value is | 
| 422 |  |  |  |  |  |  | normalized by the corresponding background RSCU, then these | 
| 423 |  |  |  |  |  |  | normalized RSCUs are used to calculate CAI values. | 
| 424 |  |  |  |  |  |  | Returns : reference of a hash in which codons are keys and CAI values | 
| 425 |  |  |  |  |  |  | are values. return undef if failed. | 
| 426 |  |  |  |  |  |  | Args    : accepted arguments are as follows: | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | =over | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | =item C | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | name of a file containing fasta CDS sequences of interested | 
| 433 |  |  |  |  |  |  | genes, or a sequence object with metho I to derive sequence | 
| 434 |  |  |  |  |  |  | string, or a plain sequence string, or reference to a hash containing | 
| 435 |  |  |  |  |  |  | codon list with structure like I<{ AGC => 50, GTC => 124}>. | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | =item C | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | background data from which background codon usage (RSCUs) | 
| 440 |  |  |  |  |  |  | is computed. Acceptable formats are the same as the above argument | 
| 441 |  |  |  |  |  |  | 'input'. | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | =item C | 
| 444 |  |  |  |  |  |  |  | 
| 445 |  |  |  |  |  |  | optional, minimal codon count for an amino acid; if observed | 
| 446 |  |  |  |  |  |  | count is smaller than this count, all codons of this amino acid would | 
| 447 |  |  |  |  |  |  | be assigned equal RSCU values. This is to reduce sampling errors in | 
| 448 |  |  |  |  |  |  | rarely observed amino acids. Default value is 5. | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | =item C | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | optional. If provided, result will be stored in the file | 
| 453 |  |  |  |  |  |  | specified by this argument. | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  | =back | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | Note: for codons which are not observed will be assigned a count of | 
| 458 |  |  |  |  |  |  | 0.5, and codons which are not degenerate (such as AUG and UGG in | 
| 459 |  |  |  |  |  |  | standard genetic code table) are excluded. | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | =cut | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | sub build_b_cai | 
| 464 |  |  |  |  |  |  | { | 
| 465 | 1 |  |  | 1 | 1 | 8 | my ($self, $input, $background, $minTotal, $output) = @_; | 
| 466 |  |  |  |  |  |  |  | 
| 467 | 1 | 50 |  |  |  | 4 | $minTotal = 5 unless(defined $minTotal); | 
| 468 |  |  |  |  |  |  | # get RSCU values first for input as well as background | 
| 469 | 1 |  |  |  |  | 4 | my $rscuHash = $self->build_rscu($input,$minTotal,0.5); | 
| 470 | 1 |  |  |  |  | 6 | my $backRscuHash = $self->build_rscu($background,$minTotal,0.5); | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | # normalize all RSCU values by background RSCUs | 
| 473 | 1 |  |  |  |  | 17 | my @senseCodons = $self->all_sense_codons(); | 
| 474 | 1 |  |  |  |  | 6 | foreach (@senseCodons) | 
| 475 |  |  |  |  |  |  | { | 
| 476 | 61 |  |  |  |  | 161 | $rscuHash->{$_} /= $backRscuHash->{$_}; | 
| 477 |  |  |  |  |  |  | } | 
| 478 |  |  |  |  |  |  |  | 
| 479 |  |  |  |  |  |  | # now calculate CAIs for each amino acid | 
| 480 | 1 |  |  |  |  | 7 | my @AAs = $self->all_AAs_in_table; | 
| 481 |  |  |  |  |  |  |  | 
| 482 | 1 |  |  |  |  | 3 | my %cai; | 
| 483 | 1 |  |  |  |  | 3 | my $maxCAI = 0; # the maximum value of CAI in this dataset | 
| 484 | 1 |  |  |  |  | 3 | foreach my $AA (@AAs) | 
| 485 |  |  |  |  |  |  | { | 
| 486 | 20 |  |  |  |  | 66 | my @codons = $self->codons_of_AA($AA); | 
| 487 |  |  |  |  |  |  | # skip non-degenerate codons | 
| 488 | 20 | 100 |  |  |  | 55 | next unless($#codons > 0); | 
| 489 |  |  |  |  |  |  |  | 
| 490 |  |  |  |  |  |  | # get the maximum RSCU value for this amino acid | 
| 491 | 18 |  |  |  |  | 22 | my $maxRSCU = 0; | 
| 492 | 18 |  |  |  |  | 31 | foreach (@codons) | 
| 493 |  |  |  |  |  |  | { | 
| 494 | 59 |  |  |  |  | 156 | my $rscu = $rscuHash->{$_}; | 
| 495 | 59 | 100 |  |  |  | 149 | $maxRSCU = $rscu if($maxRSCU < $rscu); | 
| 496 |  |  |  |  |  |  | } | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | # get CAI values now | 
| 499 | 18 |  |  |  |  | 28 | foreach (@codons) | 
| 500 |  |  |  |  |  |  | { | 
| 501 | 59 |  |  |  |  | 66 | my $rscu = $rscuHash->{$_}; # normalized one | 
| 502 | 59 |  |  |  |  | 151 | $cai{$_} = $rscu/$maxRSCU; | 
| 503 |  |  |  |  |  |  | } | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  |  | 
| 506 | 1 | 50 |  |  |  | 5 | $self->_write_out_hash($output, \%cai) if($output); | 
| 507 | 1 |  |  |  |  | 26 | return \%cai; | 
| 508 |  |  |  |  |  |  | } | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | =head2 build_tai | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | Title   : build_tai | 
| 513 |  |  |  |  |  |  | Usage   : $taiHash = | 
| 514 |  |  |  |  |  |  | $self->build_tai($input,[$output,$selective_constraints, $kingdom]); | 
| 515 |  |  |  |  |  |  | Function: build tAI values for all sense codons | 
| 516 |  |  |  |  |  |  | Returns : reference of a hash in which codons are keys and tAI indice | 
| 517 |  |  |  |  |  |  | are values. return undef if failed. See Formula 1 and 2 in I | 
| 518 |  |  |  |  |  |  | Reis, 2004, NAR> to see how they are computed. | 
| 519 |  |  |  |  |  |  | Args    : accepted arguments are as follows: | 
| 520 |  |  |  |  |  |  |  | 
| 521 |  |  |  |  |  |  | =over | 
| 522 |  |  |  |  |  |  |  | 
| 523 |  |  |  |  |  |  | =item C | 
| 524 |  |  |  |  |  |  |  | 
| 525 |  |  |  |  |  |  | name of a file containing tRNA copies/abundance in the format | 
| 526 |  |  |  |  |  |  | 'anticodoncount' per line, where 'anticodon' is anticodon in | 
| 527 |  |  |  |  |  |  | the tRNA and count can be the tRNA gene copy number or abundance. | 
| 528 |  |  |  |  |  |  |  | 
| 529 |  |  |  |  |  |  | =item C | 
| 530 |  |  |  |  |  |  |  | 
| 531 |  |  |  |  |  |  | optional. If provided, result will be stored in the file | 
| 532 |  |  |  |  |  |  | specified by this argument. | 
| 533 |  |  |  |  |  |  |  | 
| 534 |  |  |  |  |  |  | =item C | 
| 535 |  |  |  |  |  |  |  | 
| 536 |  |  |  |  |  |  | optional, reference to hash containing wobble base-pairing and its | 
| 537 |  |  |  |  |  |  | selective constraint compared to Watson-Crick base-pair, the format | 
| 538 |  |  |  |  |  |  | is like this: | 
| 539 |  |  |  |  |  |  | $selective_constraints = { | 
| 540 |  |  |  |  |  |  | ...   ...   ... | 
| 541 |  |  |  |  |  |  | 'C-G'   => 0, | 
| 542 |  |  |  |  |  |  | 'G-T'   => 0.41, | 
| 543 |  |  |  |  |  |  | 'I-C'   => 0.28, | 
| 544 |  |  |  |  |  |  | ...   ...   ... | 
| 545 |  |  |  |  |  |  | }; | 
| 546 |  |  |  |  |  |  | The key follows the 'anticodon-codon' order, and the values are codon | 
| 547 |  |  |  |  |  |  | selective constraints. The smaller the constraint, the stronger the | 
| 548 |  |  |  |  |  |  | pairing, so all Watson-Crick pairings have value 0. | 
| 549 |  |  |  |  |  |  | If this option is omitted, values will be searched for in the 'input' file, | 
| 550 |  |  |  |  |  |  | following the section of anticodons and started with a line '>SC'. If it is | 
| 551 |  |  |  |  |  |  | not in the input file, then the values in the Table 2 of | 
| 552 |  |  |  |  |  |  | I are used. | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | =item C | 
| 555 |  |  |  |  |  |  |  | 
| 556 |  |  |  |  |  |  | kingdom = 1 for prokaryota and 0 or undef for eukaryota, which | 
| 557 |  |  |  |  |  |  | affects the cacluation for bacteria isoleucine ATA codon. Default is | 
| 558 |  |  |  |  |  |  | undef for eukaryota | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | =back | 
| 561 |  |  |  |  |  |  |  | 
| 562 |  |  |  |  |  |  | =cut | 
| 563 |  |  |  |  |  |  |  | 
| 564 |  |  |  |  |  |  | sub build_tai | 
| 565 |  |  |  |  |  |  | { | 
| 566 | 1 |  |  | 1 | 1 | 9 | my ($self, $input, $output, $SC, $kingdom) = @_; | 
| 567 |  |  |  |  |  |  |  | 
| 568 |  |  |  |  |  |  | # the input copy number of each tRNA | 
| 569 | 1 |  |  |  |  | 14 | my $fh = $self->_open_file($input); | 
| 570 |  |  |  |  |  |  |  | 
| 571 |  |  |  |  |  |  | # read into tRNA abundance and if provided, selective constraints | 
| 572 | 1 |  |  |  |  | 3 | my %antiCodonCopyNum; | 
| 573 |  |  |  |  |  |  | my %scHash; | 
| 574 | 1 |  |  |  |  | 2 | my $metSC = 0; | 
| 575 | 1 |  |  |  |  | 19 | while(<$fh>) | 
| 576 |  |  |  |  |  |  | { | 
| 577 | 45 | 50 |  |  |  | 95 | if(/^>SC/) # constraint section | 
| 578 |  |  |  |  |  |  | { | 
| 579 | 0 | 0 |  |  |  | 0 | last if $SC; # provided in this call | 
| 580 |  |  |  |  |  |  | # otherwise record it | 
| 581 | 0 |  |  |  |  | 0 | $metSC = 1; | 
| 582 | 0 |  |  |  |  | 0 | next; | 
| 583 |  |  |  |  |  |  | } | 
| 584 | 45 |  |  |  |  | 61 | chomp; | 
| 585 | 45 |  |  |  |  | 149 | my ($k, $v) = split /\s+/; | 
| 586 | 45 | 50 |  |  |  | 263 | $metSC? $scHash{$k} = $v : $antiCodonCopyNum{$k} = $v; | 
| 587 |  |  |  |  |  |  | } | 
| 588 | 1 |  |  |  |  | 107 | close $fh; | 
| 589 | 1 | 50 | 0 |  |  | 7 | $SC ||= \%scHash if(%scHash); | 
| 590 | 1 | 50 |  |  |  | 5 | unless($SC) | 
| 591 |  |  |  |  |  |  | { | 
| 592 | 1 | 50 |  |  |  | 12 | $self->warn("default codon selective constraints (dos Reis) are used") | 
| 593 |  |  |  |  |  |  | if($self->debug); | 
| 594 | 1 |  |  |  |  | 2 | $SC = \%defaultSC; | 
| 595 |  |  |  |  |  |  | } | 
| 596 |  |  |  |  |  |  |  | 
| 597 |  |  |  |  |  |  | # now calculate tAI for each codon | 
| 598 | 1 |  |  |  |  | 6 | my $allSenseCodons = $self->all_sense_codons; | 
| 599 | 1 |  |  |  |  | 2 | my $maxW = 0; # maximum W of all codons | 
| 600 | 1 |  |  |  |  | 2 | my %codonWs; | 
| 601 | 1 |  |  |  |  | 1 | my $nonzeroWcnt = 0; | 
| 602 | 1 |  |  |  |  | 2 | my $nonzeroWsumLog = 0; | 
| 603 | 1 |  |  |  |  | 8 | my $excludeATG = $self->no_atg; | 
| 604 | 1 |  |  |  |  | 2 | foreach my $codon (@$allSenseCodons) | 
| 605 |  |  |  |  |  |  | { | 
| 606 |  |  |  |  |  |  | # exclude ATG | 
| 607 | 61 | 100 | 66 |  |  | 253 | next if($excludeATG and $codon eq 'ATG'); | 
| 608 |  |  |  |  |  |  | # find its recognizable anticodons | 
| 609 | 60 |  |  |  |  | 89 | my $antiCodons = $self->_find_anticodons($codon); | 
| 610 | 60 | 50 |  |  |  | 125 | $self->throw("No anticodons found for codon '$codon'") | 
| 611 |  |  |  |  |  |  | unless($#$antiCodons > 0); | 
| 612 |  |  |  |  |  |  | #print "$codon: ", join(',', @$antiCodons), "\n"; | 
| 613 | 60 |  |  |  |  | 55 | my $W = 0; | 
| 614 |  |  |  |  |  |  | # this codon may have no anticodons at all, so $W will be 0 | 
| 615 | 60 |  |  |  |  | 79 | foreach my $anti (@$antiCodons) | 
| 616 |  |  |  |  |  |  | { | 
| 617 |  |  |  |  |  |  | # check whether this tRNA exists here | 
| 618 | 166 | 100 |  |  |  | 358 | next unless(exists $antiCodonCopyNum{$anti}); | 
| 619 |  |  |  |  |  |  | # now determine the wobble pair | 
| 620 | 80 |  |  |  |  | 134 | my $wobble = substr($anti,0,1).'-'.substr($codon,2,1); | 
| 621 |  |  |  |  |  |  | #my $s = $SC->{$wobble} || 0; | 
| 622 | 80 | 50 |  |  |  | 147 | $self->throw("Unknow wobble '$wobble' pair found") | 
| 623 |  |  |  |  |  |  | unless(exists $SC->{$wobble}); | 
| 624 | 80 |  |  |  |  | 93 | my $s = $SC->{$wobble}; | 
| 625 | 80 |  |  |  |  | 244 | $W += (1-$s)*$antiCodonCopyNum{$anti}; | 
| 626 |  |  |  |  |  |  | } | 
| 627 | 60 | 100 |  |  |  | 155 | $maxW = $W if($W > $maxW); | 
| 628 | 60 |  |  |  |  | 140 | $codonWs{$codon} = $W; | 
| 629 | 60 | 50 |  |  |  | 136 | if($W > 0) | 
| 630 |  |  |  |  |  |  | { | 
| 631 | 60 |  |  |  |  | 56 | $nonzeroWcnt++; | 
| 632 | 60 |  |  |  |  | 132 | $nonzeroWsumLog += log($W); | 
| 633 |  |  |  |  |  |  | } | 
| 634 | 60 | 50 |  |  |  | 153 | $self->warn("The raw W for codon $codon is 0") | 
| 635 |  |  |  |  |  |  | if($self->debug); | 
| 636 |  |  |  |  |  |  | } | 
| 637 |  |  |  |  |  |  |  | 
| 638 |  |  |  |  |  |  | # geometric mean of non-zero ws | 
| 639 | 1 |  |  |  |  | 14 | my $geoMean = exp($nonzeroWsumLog/$nonzeroWcnt)/$maxW; | 
| 640 |  |  |  |  |  |  | # normalize all W values by the max | 
| 641 | 1 |  |  |  |  | 7 | while(my ($c, $w) = each %codonWs) | 
| 642 |  |  |  |  |  |  | { | 
| 643 |  |  |  |  |  |  | # assign zero w an geometric mean of other ws | 
| 644 | 60 | 50 |  |  |  | 279 | $codonWs{$c} = $w > 0? $w/$maxW : $geoMean; | 
| 645 |  |  |  |  |  |  | } | 
| 646 |  |  |  |  |  |  |  | 
| 647 |  |  |  |  |  |  | # modify prokaryotic ATA if present | 
| 648 | 1 | 50 |  |  |  | 3 | if($kingdom) | 
| 649 |  |  |  |  |  |  | { | 
| 650 | 0 |  |  |  |  | 0 | $codonWs{'ATA'} = (1-$SC->{'L-A'})/$maxW; | 
| 651 |  |  |  |  |  |  | } | 
| 652 |  |  |  |  |  |  |  | 
| 653 | 1 | 50 |  |  |  | 7 | $self->_write_out_hash($output, \%codonWs) if($output); | 
| 654 |  |  |  |  |  |  |  | 
| 655 | 1 |  |  |  |  | 17 | return \%codonWs; | 
| 656 |  |  |  |  |  |  | } | 
| 657 |  |  |  |  |  |  |  | 
| 658 |  |  |  |  |  |  | sub _find_anticodons | 
| 659 |  |  |  |  |  |  | { | 
| 660 | 60 |  |  | 60 |  | 71 | my ($self, $codon) = @_; | 
| 661 |  |  |  |  |  |  |  | 
| 662 | 60 |  |  |  |  | 159 | my @bases = split //, $codon; | 
| 663 | 60 |  |  |  |  | 96 | my $fixedBases = _complement($bases[0])._complement($bases[1]); | 
| 664 | 60 |  |  |  |  | 80 | my $wobbleBasePairs = $self->{'_wobble_pair'}; | 
| 665 | 60 |  |  |  |  | 69 | my $matchAntiBases = $wobbleBasePairs->{$bases[2]}; | 
| 666 | 60 |  |  |  |  | 55 | my @antiCodons; | 
| 667 | 60 |  |  |  |  | 89 | foreach (@$matchAntiBases) | 
| 668 |  |  |  |  |  |  | { | 
| 669 | 166 |  |  |  |  | 161 | my $anti = $fixedBases.$_; | 
| 670 | 166 |  |  |  |  | 296 | push @antiCodons, scalar(reverse($anti)); # convert to 5'->3' | 
| 671 |  |  |  |  |  |  | } | 
| 672 |  |  |  |  |  |  |  | 
| 673 | 60 |  |  |  |  | 155 | return \@antiCodons; | 
| 674 |  |  |  |  |  |  | } | 
| 675 |  |  |  |  |  |  |  | 
| 676 |  |  |  |  |  |  | sub _complement | 
| 677 |  |  |  |  |  |  | { | 
| 678 | 120 |  |  | 120 |  | 120 | my ($base) = @_; | 
| 679 |  |  |  |  |  |  |  | 
| 680 | 120 |  |  |  |  | 136 | $base =~ tr/ATCG/TAGC/; | 
| 681 |  |  |  |  |  |  |  | 
| 682 | 120 |  |  |  |  | 216 | return $base; | 
| 683 |  |  |  |  |  |  | } | 
| 684 |  |  |  |  |  |  |  | 
| 685 |  |  |  |  |  |  | =head1 AUTHOR | 
| 686 |  |  |  |  |  |  |  | 
| 687 |  |  |  |  |  |  | Zhenguo Zhang, C<<  >> | 
| 688 |  |  |  |  |  |  |  | 
| 689 |  |  |  |  |  |  | =head1 BUGS | 
| 690 |  |  |  |  |  |  |  | 
| 691 |  |  |  |  |  |  | Please report any bugs or feature requests to C, or through | 
| 692 |  |  |  |  |  |  | the web interface at L.  I will be notified, and then you'll | 
| 693 |  |  |  |  |  |  | automatically be notified of progress on your bug as I make changes. | 
| 694 |  |  |  |  |  |  |  | 
| 695 |  |  |  |  |  |  |  | 
| 696 |  |  |  |  |  |  | =head1 SUPPORT | 
| 697 |  |  |  |  |  |  |  | 
| 698 |  |  |  |  |  |  | You can find documentation for this module with the perldoc command. | 
| 699 |  |  |  |  |  |  |  | 
| 700 |  |  |  |  |  |  | perldoc Bio::CUA::CUB::Builder | 
| 701 |  |  |  |  |  |  |  | 
| 702 |  |  |  |  |  |  |  | 
| 703 |  |  |  |  |  |  | You can also look for information at: | 
| 704 |  |  |  |  |  |  |  | 
| 705 |  |  |  |  |  |  | =over 4 | 
| 706 |  |  |  |  |  |  |  | 
| 707 |  |  |  |  |  |  | =item * RT: CPAN's request tracker (report bugs here) | 
| 708 |  |  |  |  |  |  |  | 
| 709 |  |  |  |  |  |  | L | 
| 710 |  |  |  |  |  |  |  | 
| 711 |  |  |  |  |  |  | =item * AnnoCPAN: Annotated CPAN documentation | 
| 712 |  |  |  |  |  |  |  | 
| 713 |  |  |  |  |  |  | L | 
| 714 |  |  |  |  |  |  |  | 
| 715 |  |  |  |  |  |  | =item * CPAN Ratings | 
| 716 |  |  |  |  |  |  |  | 
| 717 |  |  |  |  |  |  | L | 
| 718 |  |  |  |  |  |  |  | 
| 719 |  |  |  |  |  |  | =item * Search CPAN | 
| 720 |  |  |  |  |  |  |  | 
| 721 |  |  |  |  |  |  | L | 
| 722 |  |  |  |  |  |  |  | 
| 723 |  |  |  |  |  |  | =back | 
| 724 |  |  |  |  |  |  |  | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | =head1 ACKNOWLEDGEMENTS | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  |  | 
| 729 |  |  |  |  |  |  | =head1 LICENSE AND COPYRIGHT | 
| 730 |  |  |  |  |  |  |  | 
| 731 |  |  |  |  |  |  | Copyright 2015 Zhenguo Zhang. | 
| 732 |  |  |  |  |  |  |  | 
| 733 |  |  |  |  |  |  | This program is free software: you can redistribute it and/or modify | 
| 734 |  |  |  |  |  |  | it under the terms of the GNU General Public License as published by | 
| 735 |  |  |  |  |  |  | the Free Software Foundation, either version 3 of the License, or | 
| 736 |  |  |  |  |  |  | (at your option) any later version. | 
| 737 |  |  |  |  |  |  |  | 
| 738 |  |  |  |  |  |  | This program is distributed in the hope that it will be useful, | 
| 739 |  |  |  |  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 740 |  |  |  |  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
| 741 |  |  |  |  |  |  | GNU General Public License for more details. | 
| 742 |  |  |  |  |  |  |  | 
| 743 |  |  |  |  |  |  | You should have received a copy of the GNU General Public License | 
| 744 |  |  |  |  |  |  | along with this program.  If not, see L. | 
| 745 |  |  |  |  |  |  |  | 
| 746 |  |  |  |  |  |  |  | 
| 747 |  |  |  |  |  |  | =cut | 
| 748 |  |  |  |  |  |  |  | 
| 749 |  |  |  |  |  |  | 1; # End of Bio::CUA::CUB::Builder | 
| 750 |  |  |  |  |  |  |  |