File Coverage

blib/lib/AlignDB/Codon.pm
Criterion Covered Total %
statement 260 272 95.5
branch 66 76 86.8
condition 18 18 100.0
subroutine 21 21 100.0
pod 6 9 66.6
total 371 396 93.6


line stmt bran cond sub pod time code
1             package AlignDB::Codon;
2 8     8   476157 use Moose;
  8         3034858  
  8         58  
3 8     8   48431 use Carp;
  8         16  
  8         744  
4              
5 8     8   6962 use AlignDB::IntSpan;
  8         52575  
  8         258  
6 8     8   4652 use List::MoreUtils::PP;
  8         21859  
  8         261  
7 8     8   3934 use YAML::Syck;
  8         13386  
  8         19280  
8              
9             our $VERSION = '1.1.1';
10              
11             # codon tables
12             has 'table_id' => ( is => 'ro', isa => 'Int', default => sub {1}, );
13             has 'table_name' => ( is => 'ro', isa => 'Str', );
14             has 'table_content' => ( is => 'ro', isa => 'Str', );
15             has 'table_starts' => ( is => 'ro', isa => 'Str', );
16              
17             # codons
18             has 'codons' => ( is => 'ro', isa => 'ArrayRef', );
19             has 'codon_idx' => ( is => 'ro', isa => 'HashRef', );
20             has 'codon2aa' => ( is => 'ro', isa => 'HashRef', );
21              
22             # lookup hash for the number of synonymous changes per codon
23             has 'syn_sites' => ( is => 'ro', isa => 'HashRef', );
24              
25             # lookup hash of all pairwise combinations of codons differing by 1
26             # 1 = synonymous, 0 = non-synonymous, -1 = stop
27             has 'syn_changes' => ( is => 'ro', isa => 'HashRef', );
28              
29             # One <=> Three
30             has 'one2three' => ( is => 'ro', isa => 'HashRef', );
31             has 'three2one' => ( is => 'ro', isa => 'HashRef', );
32              
33             sub BUILD {
34 22     22 0 180658 my $self = shift;
35              
36 22         76 $self->_make_codons;
37 22         78 $self->change_codon_table( $self->{table_id} );
38 20         157 $self->_load_aa_code;
39              
40 20         155 return;
41             }
42              
43             sub _make_codons {
44 22     22   37 my $self = shift;
45              
46             # makes all codon combinations
47 22         76 my @nucs = qw(T C A G);
48 22         37 my @codons;
49 22         58 for my $i (@nucs) {
50 88         92 for my $j (@nucs) {
51 352         322 for my $k (@nucs) {
52 1408         1822 push @codons, "$i$j$k";
53             }
54             }
55             }
56 22         168 $self->{codons} = \@codons;
57              
58 22         32 my %codon_idx;
59 22         84 for my $i ( 0 .. $#codons ) {
60 1408         1864 $codon_idx{ $codons[$i] } = $i;
61             }
62 22         51 $self->{codon_idx} = \%codon_idx;
63              
64 22         52 return;
65             }
66              
67             sub change_codon_table {
68 23     23 1 53 my $self = shift;
69 23         40 my $id = shift;
70              
71 23         178 my @NAMES = ( #id
72             'Strict', # 0, special option for ATG-only start
73             'Standard', # 1
74             'Vertebrate Mitochondrial', # 2
75             'Yeast Mitochondrial', # 3
76             'Mold, Protozoan, and Coelenterate Mitochondrial and Mycoplasma/Spiroplasma', # 4
77             'Invertebrate Mitochondrial', # 5
78             'Ciliate, Dasycladacean and Hexamita Nuclear', # 6
79             '', '',
80             'Echinoderm and Flatworm Mitochondrial', # 9
81             'Euplotid Nuclear', # 10
82             'Bacterial, Archaeal and Plant Plastid', # 11
83             'Alternative Yeast Nuclear', # 12
84             'Ascidian Mitochondrial', # 13
85             'Alternative Flatworm Mitochondrial', # 14
86             'Blepharisma Nuclear', # 15
87             'Chlorophycean Mitochondrial', # 16
88             '', '', '', '',
89             'Trematode Mitochondrial', # 21
90             'Scenedesmus obliquus Mitochondrial', # 22
91             'Thraustochytrium Mitochondrial', # 23
92             'Pterobranchia Mitochondrial', # 24
93             'Candidate Division SR1 and Gracilibacteria', # 25
94             );
95              
96 23         130 my @TABLES = qw(
97             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
98             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
99             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
100             FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
101             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
102             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
103             FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
104             '' ''
105             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
106             FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
107             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
108             FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
109             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
110             FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
111             FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
112             FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
113             '' '' '' ''
114             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
115             FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
116             FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
117             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG
118             FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
119             );
120              
121 23         155 my @STARTS = qw(
122             -----------------------------------M----------------------------
123             ---M---------------M---------------M----------------------------
124             --------------------------------MMMM---------------M------------
125             ----------------------------------MM----------------------------
126             --MM---------------M------------MMMM---------------M------------
127             ---M----------------------------MMMM---------------M------------
128             -----------------------------------M----------------------------
129             '' ''
130             -----------------------------------M---------------M------------
131             -----------------------------------M----------------------------
132             ---M---------------M------------MMMM---------------M------------
133             -------------------M---------------M----------------------------
134             ---M------------------------------MM---------------M------------
135             -----------------------------------M----------------------------
136             -----------------------------------M----------------------------
137             -----------------------------------M----------------------------
138             '' '' '' ''
139             -----------------------------------M---------------M------------
140             -----------------------------------M----------------------------
141             --------------------------------M--M---------------M------------
142             ---M---------------M---------------M---------------M------------
143             ---M-------------------------------M---------------M------------
144             );
145              
146 23         180 my $id_set = AlignDB::IntSpan->new("1-6,9-16,21");
147              
148 23 100       4899 if ( not defined $id ) {
    100          
149 1         252 Carp::confess "codon table id is not defined\n";
150             }
151             elsif ( $id_set->contains($id) ) {
152 20         578 $self->{table_id} = $id;
153              
154 20         37 $self->{table_name} = $NAMES[$id];
155 20         34 $self->{table_content} = $TABLES[$id];
156 20         37 $self->{table_starts} = $STARTS[$id];
157              
158 20         57 $self->_make_codon2aa;
159 20         53 $self->_make_syn_sites;
160 20         91 $self->_make_syn_changes;
161             }
162             else {
163 2         53 Carp::confess "codon table id should be in range of $id_set\n";
164             }
165              
166 20         467 return;
167             }
168              
169             sub _make_codon2aa {
170 20     20   37 my $self = shift;
171              
172 20         705 my $table_content = $self->table_content;
173 20         524 my $codons = $self->codons;
174 20         535 my $codon_idx = $self->codon_idx;
175              
176 20         27 my %codon2aa;
177 20         31 for my $codon ( @{$codons} ) {
  20         44  
178 1280         1243 my $aa = substr( $table_content, $codon_idx->{$codon}, 1 );
179 1280         1563 $codon2aa{$codon} = $aa;
180             }
181              
182             # gaps in cDNA
183 20         42 $codon2aa{"---"} = "-";
184              
185 20         40 $self->{codon2aa} = \%codon2aa;
186              
187 20         37 return;
188             }
189              
190             sub _make_syn_sites {
191 20     20   32 my $self = shift;
192              
193 20         586 my $codons = $self->codons;
194 20         508 my $codon2aa = $self->codon2aa;
195              
196 20         31 my %raw_results;
197 20         48 for my $cod (@$codons) {
198 1280         1149 my $aa = $codon2aa->{$cod};
199              
200             # calculate number of synonymous mutations vs non-syn mutations
201 1280         1345 for my $i ( 0 .. 2 ) {
202 3840         2898 my $s = 0;
203 3840         2588 my $n = 3;
204 3840         3450 for my $nuc (qw(A T C G)) {
205 15360 100       22435 next if substr( $cod, $i, 1 ) eq $nuc;
206 11520         8363 my $test = $cod;
207 11520         8811 substr( $test, $i, 1, $nuc );
208 11520 100       16444 if ( $codon2aa->{$test} eq $aa ) {
209 2750         2064 $s++;
210             }
211 11520 100       17689 if ( $codon2aa->{$test} eq '*' ) {
212 531         516 $n--;
213             }
214             }
215 3840         8020 $raw_results{$cod}[$i] = {
216             's' => $s,
217             'n' => $n
218             };
219             }
220             }
221              
222 20         46 my %final_results;
223 20         587 for my $cod ( sort keys %raw_results ) {
224 1280         940 my $t = 0;
225 1280         942 map { $t += ( $_->{'s'} / $_->{'n'} ) } @{ $raw_results{$cod} };
  3840         5174  
  1280         1421  
226 1280         2574 $final_results{$cod} = { 's' => $t, 'n' => 3 - $t };
227             }
228              
229 20         118 $self->{syn_sites} = \%final_results;
230 20         885 return;
231             }
232              
233             sub _make_syn_changes {
234 20     20   36 my $self = shift;
235              
236 20         991 my $codons = $self->codons;
237 20         551 my $codon2aa = $self->codon2aa;
238              
239 20         45 my $arr_len = scalar @$codons;
240              
241 20         23 my %results;
242 20         95 for ( my $i = 0; $i < $arr_len - 1; $i++ ) {
243 1260         1200 my $cod1 = $codons->[$i];
244 1260         2082 for ( my $j = $i + 1; $j < $arr_len; $j++ ) {
245 40320         33104 my $cod2 = $codons->[$j];
246 40320         28529 my $diff_cnt = 0;
247 40320         38449 for my $pos ( 0 .. 2 ) {
248 120960 100       180587 if ( substr( $cod1, $pos, 1 ) ne substr( $cod2, $pos, 1 ) ) {
249 92160         77710 $diff_cnt++;
250             }
251             }
252 40320 100       85280 next if $diff_cnt != 1;
253              
254             # synonymous change
255 5760 100 100     18402 if ( $codon2aa->{$cod1} eq $codon2aa->{$cod2} ) {
    100          
256 1375         1659 $results{$cod1}{$cod2} = 1;
257 1375         2766 $results{$cod2}{$cod1} = 1;
258             }
259              
260             # stop codon
261             elsif ( $codon2aa->{$cod1} eq '*' or $codon2aa->{$cod2} eq '*' ) {
262 455         587 $results{$cod1}{$cod2} = -1;
263 455         981 $results{$cod2}{$cod1} = -1;
264             }
265              
266             # non-synonymous change
267             else {
268 3930         5458 $results{$cod1}{$cod2} = 0;
269 3930         8771 $results{$cod2}{$cod1} = 0;
270             }
271             }
272             }
273              
274 20         89 $self->{syn_changes} = \%results;
275 20         102 return;
276             }
277              
278             sub comp_codons {
279 21     21 1 17727 my $self = shift;
280 21         33 my $cod1 = shift;
281 21         23 my $cod2 = shift;
282 21         25 my $pos = shift;
283              
284 21         975 my $syn_changes = $self->syn_changes;
285 21         574 my $codon2aa = $self->codon2aa;
286              
287 21         25 my $syn_cnt = 0; # total synonymous changes
288 21         24 my $nsy_cnt = 0; # total non-synonymous changes
289              
290             # ignore codon if beeing compared with gaps!
291 21 100 100     113 if ( $cod1 =~ /\-/ or $cod2 =~ /\-/ ) {
292 2         8 return ( $syn_cnt, $nsy_cnt );
293             }
294              
295             # check codons
296 19         32 for ( $cod1, $cod2 ) {
297 37 100       100 if ( !exists $codon2aa->{$_} ) {
298 2         19 Carp::confess YAML::Syck::Dump( { cod1 => $cod1, cod2 => $cod2 } ), "Wrong codon\n";
299 0         0 return ( 0, 0 );
300             }
301             }
302              
303             # check codon position
304 17 100       36 if ( defined $pos ) {
305 12 100   27   107 if ( List::MoreUtils::PP::none { $_ == $pos } ( 0 .. 2 ) ) {
  27         129  
306 2         25 Carp::confess YAML::Syck::Dump( { pos => $pos } ), "Wrong codon position\n";
307 0         0 return ( 0, 0 );
308             }
309             }
310              
311 15         228 my %mutator = (
312             2 => # codon positions to be altered
313             # depend on which is the same
314             {
315             0 => [ [ 1, 2 ], [ 2, 1 ] ],
316             1 => [ [ 0, 2 ], [ 2, 0 ] ],
317             2 => [ [ 0, 1 ], [ 1, 0 ] ],
318             },
319             3 => # all need to be altered
320             [ [ 0, 1, 2 ], [ 1, 0, 2 ], [ 0, 2, 1 ], [ 1, 2, 0 ], [ 2, 0, 1 ], [ 2, 1, 0 ], ],
321             );
322              
323 15         58 my ( $diff_cnt, $codon_pos ) = $self->count_diffs( $cod1, $cod2 );
324              
325 15 50       48 if ( $diff_cnt == 0 ) { # ignore if codons are identical
    100          
    100          
    50          
326             }
327             elsif ( $diff_cnt == 1 ) { # In $codon_pos where bases are different
328 6 100 100     35 if ( !defined $pos or $codon_pos == $pos ) {
329 4         11 $syn_cnt = $syn_changes->{$cod1}{$cod2};
330 4         8 $nsy_cnt = 1 - $syn_changes->{$cod1}{$cod2};
331             }
332             }
333             elsif ( $diff_cnt == 2 ) { # In $codon_pos where bases are the same
334 5         17 my ( $s_cnt, $n_cnt ) = ( 0, 0 );
335 5         5 my $pathway = 2; # will stay 2 unless there are stop codons
336             # at intervening point
337 5         5 PATH: for my $perm ( @{ $mutator{2}{$codon_pos} } ) {
  5         15  
338 10         12 my $altered = $cod1;
339 10         9 my $prev = $cod1;
340 10         10 my ( $sub_s_cnt, $sub_n_cnt ) = ( 0, 0 );
341              
342 10         11 for my $mut_i (@$perm) { # index of codon mutated
343 20         24 my $mut_base = substr( $cod2, $mut_i, 1 );
344 20         24 substr( $altered, $mut_i, 1, $mut_base );
345 20 50       38 if ( $codon2aa->{$altered} eq '*' ) {
346 0         0 $pathway--;
347 0         0 next PATH; # abadon this pathway
348             }
349             else {
350 20 100 100     68 if ( !defined $pos or $mut_i == $pos ) {
351 12         18 $sub_s_cnt += $syn_changes->{$prev}{$altered};
352 12         15 $sub_n_cnt += 1 - $syn_changes->{$prev}{$altered};
353             }
354             }
355 20         24 $prev = $altered;
356             }
357              
358 10         9 $s_cnt += $sub_s_cnt;
359 10         11 $n_cnt += $sub_n_cnt;
360             }
361 5 50       11 if ( $pathway != 0 ) {
362 5         8 $syn_cnt = ( $s_cnt / $pathway );
363 5         5 $nsy_cnt = ( $n_cnt / $pathway );
364             }
365             }
366             elsif ( $diff_cnt == 3 ) {
367 4         6 my ( $s_cnt, $n_cnt ) = ( 0, 0 );
368 4         7 my $pathway = 6; # will stay 6 unless there are stop codons
369             # at intervening point
370 4         7 PATH: for my $perm ( @{ $mutator{'3'} } ) {
  4         10  
371 24         32 my $altered = $cod1;
372 24         18 my $prev = $cod1;
373 24         25 my ( $sub_s_cnt, $sub_n_cnt ) = ( 0, 0 );
374              
375 24         24 for my $mut_i (@$perm) { #index of codon mutated
376 64         57 my $mut_base = substr( $cod2, $mut_i, 1 );
377 64         56 substr( $altered, $mut_i, 1, $mut_base );
378 64 100       92 if ( $codon2aa->{$altered} eq '*' ) {
379 8         7 $pathway--;
380 8         23 next PATH; # abadon this pathway
381             }
382             else {
383 56 100 100     168 if ( !defined $pos or $mut_i == $pos ) {
384 28         40 $sub_s_cnt += $syn_changes->{$prev}{$altered};
385 28         28 $sub_n_cnt += 1 - $syn_changes->{$prev}{$altered};
386             }
387             }
388 56         58 $prev = $altered;
389             }
390              
391 16         16 $s_cnt += $sub_s_cnt;
392 16         17 $n_cnt += $sub_n_cnt;
393             }
394              
395             # calculate number of synonymous/non synonymous mutations for that
396             # codon and add to total
397 4 50       8 if ( $pathway != 0 ) {
398 4         5 $syn_cnt = ( $s_cnt / $pathway );
399 4         6 $nsy_cnt = ( $n_cnt / $pathway );
400             }
401             } # endif $diffcnt = 3
402              
403 15         90 return ( $syn_cnt, $nsy_cnt );
404             }
405              
406             # counts the number of nucleotide differences between 2 codons
407             # when 1 nucleotide is different, returns this value plus the codon index
408             # of which nucleotide is different
409             # when 2 nucleotides are different, returns this value plus the codon index
410             # of which nucleotide is the same
411             # So comp_codons() knows which nucleotides to change or not to change
412             sub count_diffs {
413 17     17 0 67 my $self = shift;
414 17         20 my $cod1 = shift;
415 17         33 my $cod2 = shift;
416              
417 17         18 my $cnt = 0;
418 17         15 my $return_pos;
419             my @sames; # store same base position
420 0         0 my @diffs; # store diff base position
421              
422 17 100 100     82 if ( length $cod1 != 3 or length $cod2 != 3 ) {
423 2         30 Carp::confess YAML::Syck::Dump( { cod1 => $cod1, cod2 => $cod2 } ), "Codon length error\n";
424 0         0 return ( $cnt, $return_pos );
425             }
426              
427             # just for 2 differences
428 15         34 for ( 0 .. 2 ) {
429 45 100       81 if ( substr( $cod1, $_, 1 ) ne substr( $cod2, $_, 1 ) ) {
430 28         28 $cnt++;
431 28         38 push @diffs, $_;
432             }
433             else {
434 17         22 push @sames, $_;
435             }
436             }
437              
438 15 100       34 if ( $cnt == 1 ) {
    100          
439 6         63 $return_pos = $diffs[0];
440             }
441             elsif ( $cnt == 2 ) {
442 5         6 $return_pos = $sames[0];
443             }
444              
445 15         33 return ( $cnt, $return_pos );
446             }
447              
448             sub translate {
449 3     3 0 24 my $self = shift;
450 3         5 my $seq = shift;
451 3         3 my $frame = shift;
452              
453             # check $frame
454 3 100       7 if ( defined $frame ) {
455 1 50   3   10 if ( List::MoreUtils::PP::none { $_ == $frame } ( 0 .. 2 ) ) {
  3         37  
456 1         11 confess Dump( { frame => $frame } ), "Wrong frame\n";
457             }
458             }
459             else {
460 2         3 $frame = 0;
461             }
462              
463 2 50       5 if ( $frame != 0 ) {
464 0         0 $seq = substr( $seq, $frame ); # delete first $frame bases from $seq
465             }
466 2         4 my $offset = length($seq) - ( length($seq) % 3 );
467 2         6 substr( $seq, $offset, length($seq), '' ); # now $seq is 3n bp
468              
469 2         3 my $peptide = "";
470 2         81 my $codon2aa = $self->codon2aa;
471 2         3 my $codon_size = 3;
472 2         8 for ( my $i = 0; $i < ( length($seq) - ( $codon_size - 1 ) ); $i += $codon_size ) {
473 23         23 my $triplet = substr( $seq, $i, $codon_size );
474 23 50       26 if ( exists $codon2aa->{$triplet} ) {
475 23         43 $peptide .= $codon2aa->{$triplet};
476             }
477             else {
478 0         0 $peptide .= 'X';
479             }
480             }
481 2         14 return $peptide;
482             }
483              
484             sub is_start_codon {
485 3     3 1 20 my $self = shift;
486 3         5 my $cod = shift;
487              
488 3         5 $cod = uc $cod;
489 3         7 $cod =~ tr/U/T/;
490              
491 3         114 my $table_starts = $self->table_starts;
492 3         76 my $codon_idx = $self->codon_idx;
493              
494 3 100       8 if ( exists $codon_idx->{$cod} ) {
495 2         4 my $aa = substr( $table_starts, $codon_idx->{$cod}, 1 );
496 2 100       13 return $aa eq "M" ? 1 : 0;
497             }
498             else {
499 1         5 return 0;
500             }
501             }
502              
503             sub is_ter_codon {
504 6     6 1 9 my $self = shift;
505 6         7 my $cod = shift;
506              
507 6         9 $cod = uc $cod;
508 6         9 $cod =~ tr/U/T/;
509              
510 6         200 my $table_content = $self->table_content;
511 6         146 my $codon_idx = $self->codon_idx;
512              
513 6 100       15 if ( exists $codon_idx->{$cod} ) {
514 3         6 my $aa = substr( $table_content, $codon_idx->{$cod}, 1 );
515 3 100       14 return $aa eq "*" ? 1 : 0;
516             }
517             else {
518 3         12 return 0;
519             }
520             }
521              
522             sub _load_aa_code {
523 20     20   46 my $self = shift;
524              
525 20         646 my %one2three = (
526             A => 'Ala', # Alanine
527             R => 'Arg', # Arginine
528             N => 'Asn', # Asparagine
529             D => 'Asp', # Aspartic acid
530             C => 'Cys', # Cysteine
531             Q => 'Gln', # Glutamine
532             E => 'Glu', # Glutamic acid
533             G => 'Gly', # Glycine
534             H => 'His', # Histidine
535             I => 'Ile', # Isoleucine
536             L => 'Leu', # Leucine
537             K => 'Lys', # Lysine
538             M => 'Met', # Methionine
539             F => 'Phe', # Phenylalanine
540             P => 'Pro', # Proline
541             S => 'Ser', # Serine
542             T => 'Thr', # Threonine
543             W => 'Trp', # Tryptophan
544             Y => 'Tyr', # Tyrosine
545             V => 'Val', # Valine
546             B => 'Asx', # Aspartic acid or Asparagine
547             Z => 'Glx', # Glutamine or Glutamic acid
548             X => 'Xaa', # Any or unknown amino acid
549             '*' => '***', # Stop codon
550             );
551 20         507 my %three2one = reverse(%one2three);
552              
553 20         63 $self->{one2three} = \%one2three;
554 20         46 $self->{three2one} = \%three2one;
555              
556 20         40 return;
557             }
558              
559             sub convert_123 {
560 1     1 1 12 my $self = shift;
561 1         1 my $peptide = shift;
562              
563 1         2 $peptide = uc $peptide;
564 1         37 my $three_of = $self->one2three;
565              
566 1         1 my $converted;
567 1         3 for my $pos ( 0 .. length($peptide) - 1 ) {
568 1         3 my $aa_code = substr( $peptide, $pos, 1 );
569 1 50       3 if ( $three_of->{$aa_code} ) {
570 1         3 $converted .= $three_of->{$aa_code};
571             }
572             else {
573 0         0 Carp::confess "Wrong single-letter amino acid code [$aa_code]!\n";
574 0         0 $converted .= ' ' x 3;
575             }
576             }
577 1         4 return $converted;
578             }
579              
580             sub convert_321 {
581 1     1 1 2 my $self = shift;
582 1         2 my $peptide = shift;
583              
584 1         2 $peptide = lc $peptide;
585 1         35 my $one_of = $self->three2one;
586              
587 1         1 my $converted;
588 1         4 for ( my $pos = 0; $pos < length($peptide); $pos += 3 ) {
589 1         3 my $aa_code = substr( $peptide, $pos, 3 );
590 1         2 $aa_code = ucfirst $aa_code;
591 1 50       3 if ( $one_of->{$aa_code} ) {
592 1         5 $converted .= $one_of->{$aa_code};
593             }
594             else {
595 0         0 warn "Wrong three-letter amino acid code [$aa_code]!\n";
596 0         0 $converted .= ' ' x 3;
597             }
598              
599             }
600              
601 1         3 return $converted;
602             }
603              
604             1; # Magic true value required at end of module
605              
606             __END__
607              
608             =pod
609              
610             =encoding UTF-8
611              
612             =head1 NAME
613              
614             AlignDB::Codon - translate sequences and calculate Dn/Ds
615              
616             =head1 DESCRIPTION
617              
618             AlignDB::Codon provides methods to translate sequences and calculate Dn/Ds with different codon
619             tables.
620              
621             Some parts of this module are extracted from BioPerl to avoid the huge number of its dependencies.
622              
623             =head1 METHODS
624              
625             =head2 change_codon_table
626              
627             $obj->change_codon_table(2);
628              
629             Change used codon table and recalc all attributes.
630              
631             Codon table id should be in range of 1-6,9-16,21.
632              
633             =head2 comp_codons
634              
635             my ($syn, $nsy) = $obj->comp_codons('TTT', 'GTA');
636              
637             my ($syn, $nsy) = $obj->comp_codons('TTT', 'GTA', 1);
638              
639             Compares 2 codons to find the number of synonymous and non-synonymous mutations between them.
640              
641             If the third parameter (in 0 .. 2) is given, this method will return syn&nsy at this position.
642              
643             =head2 is_start_codon
644              
645             my $bool = $obj->is_start_codon('ATG')
646              
647             Returns true for codons that can be used as a translation start, false for others.
648              
649             =head2 is_ter_codon
650              
651             my $bool = $obj->is_ter_codon('GAA')
652              
653             Returns true for codons that can be used as a translation terminator, false for others.
654              
655             =head2 convert_123
656              
657             my $three_format = $obj->convert_123('ARN');
658              
659             Convert aa code from one-letter to three-letter
660              
661             =head2 convert_321
662              
663             my $one_format = $obj->convert_321('AlaArgAsn');
664              
665             Convert aa code from three-letter to one-letter
666              
667             =head1 AUTHOR
668              
669             Qiang Wang <wang-q@outlook.com>
670              
671             =head1 COPYRIGHT AND LICENSE
672              
673             This software is copyright (c) 2008 by Qiang Wang.
674              
675             This is free software; you can redistribute it and/or modify it under
676             the same terms as the Perl 5 programming language system itself.
677              
678             =cut