File Coverage

Bio/Tools/CodonTable.pm
Criterion Covered Total %
statement 225 239 94.1
branch 75 92 81.5
condition 25 41 60.9
subroutine 24 25 96.0
pod 14 14 100.0
total 363 411 88.3


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::Tools::CodonTable
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::CodonTable - Codon table object
17              
18             =head1 SYNOPSIS
19              
20             # This is a read-only class for all known codon tables. The IDs are
21             # the ones used by nucleotide sequence databases. All common IUPAC
22             # ambiguity codes for DNA, RNA and amino acids are recognized.
23              
24             use Bio::Tools::CodonTable;
25              
26             # defaults to ID 1 "Standard"
27             $myCodonTable = Bio::Tools::CodonTable->new();
28             $myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
29              
30             # change codon table
31             $myCodonTable->id(5);
32              
33             # examine codon table
34             print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
35             "is:", $myCodonTable->name(), "\n");
36              
37             # print possible codon tables
38             $tables = Bio::Tools::CodonTable->tables;
39             while ( ($id,$name) = each %{$tables} ) {
40             print "$id = $name\n";
41             }
42              
43             # translate a codon
44             $aa = $myCodonTable->translate('ACU');
45             $aa = $myCodonTable->translate('act');
46             $aa = $myCodonTable->translate('ytr');
47              
48             # reverse translate an amino acid
49             @codons = $myCodonTable->revtranslate('A');
50             @codons = $myCodonTable->revtranslate('Ser');
51             @codons = $myCodonTable->revtranslate('Glx');
52             @codons = $myCodonTable->revtranslate('cYS', 'rna');
53              
54             # reverse translate an entire amino acid sequence into a IUPAC
55             # nucleotide string
56              
57             my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
58             my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
59              
60             # boolean tests
61             print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
62             print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
63             print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
64              
65             =head1 DESCRIPTION
66              
67             Codon tables are also called translation tables or genetic codes
68             since that is what they represent. A bit more complete picture
69             of the full complexity of codon usage in various taxonomic groups
70             is presented at the NCBI Genetic Codes Home page.
71              
72             CodonTable is a BioPerl class that knows all current translation
73             tables that are used by primary nucleotide sequence databases
74             (GenBank, EMBL and DDBJ). It provides methods to output information
75             about tables and relationships between codons and amino acids.
76              
77             This class and its methods recognized all common IUPAC ambiguity codes
78             for DNA, RNA and animo acids. The translation method follows the
79             conventions in EMBL and TREMBL databases.
80              
81             It is a nuisance to separate RNA and cDNA representations of nucleic
82             acid transcripts. The CodonTable object accepts codons of both type as
83             input and allows the user to set the mode for output when reverse
84             translating. Its default for output is DNA.
85              
86             Note:
87              
88             This class deals primarily with individual codons and amino
89             acids. However in the interest of speed you can L
90             longer sequence, too. The full complexity of protein translation
91             is tackled by L.
92              
93              
94             The amino acid codes are IUPAC recommendations for common amino acids:
95              
96             A Ala Alanine
97             R Arg Arginine
98             N Asn Asparagine
99             D Asp Aspartic acid
100             C Cys Cysteine
101             Q Gln Glutamine
102             E Glu Glutamic acid
103             G Gly Glycine
104             H His Histidine
105             I Ile Isoleucine
106             L Leu Leucine
107             K Lys Lysine
108             M Met Methionine
109             F Phe Phenylalanine
110             P Pro Proline
111             O Pyl Pyrrolysine (22nd amino acid)
112             U Sec Selenocysteine (21st amino acid)
113             S Ser Serine
114             T Thr Threonine
115             W Trp Tryptophan
116             Y Tyr Tyrosine
117             V Val Valine
118             B Asx Aspartic acid or Asparagine
119             Z Glx Glutamine or Glutamic acid
120             J Xle Isoleucine or Valine (mass spec ambiguity)
121             X Xaa Any or unknown amino acid
122              
123              
124             It is worth noting that, "Bacterial" codon table no. 11 produces an
125             polypeptide that is, confusingly, identical to the standard one. The
126             only differences are in available initiator codons.
127              
128              
129             NCBI Genetic Codes home page:
130             (Last update of the Genetic Codes: April 30, 2013)
131             https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
132              
133             ASN.1 version with ids 1 to 25 is at:
134             ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
135              
136             Thanks to Matteo diTomasso for the original Perl implementation
137             of these tables.
138              
139             =head1 FEEDBACK
140              
141             =head2 Mailing Lists
142              
143             User feedback is an integral part of the evolution of this and other
144             Bioperl modules. Send your comments and suggestions preferably to the
145             Bioperl mailing lists Your participation is much appreciated.
146              
147             bioperl-l@bioperl.org - General discussion
148             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
149              
150             =head2 Support
151              
152             Please direct usage questions or support issues to the mailing list:
153              
154             I
155              
156             rather than to the module maintainer directly. Many experienced and
157             reponsive experts will be able look at the problem and quickly
158             address it. Please include a thorough description of the problem
159             with code and data examples if at all possible.
160              
161             =head2 Reporting Bugs
162              
163             Report bugs to the Bioperl bug tracking system to help us keep track
164             the bugs and their resolution. Bug reports can be submitted via the
165             web:
166              
167             https://github.com/bioperl/bioperl-live/issues
168              
169             =head1 AUTHOR - Heikki Lehvaslaiho
170              
171             Email: heikki-at-bioperl-dot-org
172              
173             =head1 APPENDIX
174              
175             The rest of the documentation details each of the object
176             methods. Internal methods are usually preceded with a _
177              
178             =cut
179              
180             # Let the code begin...
181              
182             package Bio::Tools::CodonTable;
183 203         17599 use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
184 203     203   1782 %IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
  203         221  
185 203     203   705 use strict;
  203         229  
  203         3773  
186              
187             # Object preamble - inherits from Bio::Root::Root
188 203     203   60776 use Bio::Tools::IUPAC;
  203         968  
  203         4469  
189 203     203   89730 use Bio::SeqUtils;
  203         371  
  203         5790  
190              
191 203     203   1041 use base qw(Bio::Root::Root);
  203         234  
  203         13984  
192              
193              
194             # first set internal values for all translation tables
195              
196             BEGIN {
197 203     203   843 use constant CODONSIZE => 3;
  203         1045  
  203         54565  
198 203     203   414 $GAP = '-';
199 203         551 $CODONGAP = $GAP x CODONSIZE;
200              
201 203         885 @NAMES = #id
202             (
203             'Strict', #0, special option for ATG-only start
204             'Standard', #1
205             'Vertebrate Mitochondrial',#2
206             'Yeast Mitochondrial',# 3
207             'Mold, Protozoan, and Coelenterate Mitochondrial and Mycoplasma/Spiroplasma',#4
208             'Invertebrate Mitochondrial',#5
209             'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
210             '', '',
211             'Echinoderm and Flatworm Mitochondrial',#9
212             'Euplotid Nuclear',#10
213             'Bacterial, Archaeal and Plant Plastid',# 11
214             'Alternative Yeast Nuclear',# 12
215             'Ascidian Mitochondrial',# 13
216             'Alternative Flatworm Mitochondrial',# 14
217             'Blepharisma Nuclear',# 15
218             'Chlorophycean Mitochondrial',# 16
219             '', '', '', '',
220             'Trematode Mitochondrial',# 21
221             'Scenedesmus obliquus Mitochondrial', #22
222             'Thraustochytrium Mitochondrial', #23
223             'Pterobranchia Mitochondrial', #24
224             'Candidate Division SR1 and Gracilibacteria', #25
225             );
226              
227 203         780 @TABLES =
228             qw(
229             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
230             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
231             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
232             FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
233             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
234             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
235             FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
236             '' ''
237             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
238             FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
239             FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
240             FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
241             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
242             FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
243             FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
244             FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
245             '' '' '' ''
246             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
247             FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
248             FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
249             FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG
250             FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
251             );
252              
253             # (bases used for these tables, for reference)
254             # 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
255             # 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
256             # 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
257              
258 203         783 @STARTS =
259             qw(
260             -----------------------------------M----------------------------
261             ---M---------------M---------------M----------------------------
262             --------------------------------MMMM---------------M------------
263             ----------------------------------MM----------------------------
264             --MM---------------M------------MMMM---------------M------------
265             ---M----------------------------MMMM---------------M------------
266             -----------------------------------M----------------------------
267             '' ''
268             -----------------------------------M---------------M------------
269             -----------------------------------M----------------------------
270             ---M---------------M------------MMMM---------------M------------
271             -------------------M---------------M----------------------------
272             ---M------------------------------MM---------------M------------
273             -----------------------------------M----------------------------
274             -----------------------------------M----------------------------
275             -----------------------------------M----------------------------
276             '' '' '' ''
277             -----------------------------------M---------------M------------
278             -----------------------------------M----------------------------
279             --------------------------------M--M---------------M------------
280             ---M---------------M---------------M---------------M------------
281             ---M-------------------------------M---------------M------------
282             );
283              
284 203         384 my @nucs = qw(t c a g);
285 203         295 my $x = 0;
286 203         454 ($CODONS, $TRCOL) = ({}, {});
287 203         478 for my $i (@nucs) {
288 812         753 for my $j (@nucs) {
289 3248         2338 for my $k (@nucs) {
290 12992         9223 my $codon = "$i$j$k";
291 12992         15555 $CODONS->{$codon} = $x;
292 12992         14982 $TRCOL->{$x} = $codon;
293 12992         9549 $x++;
294             }
295             }
296             }
297 203         1214 %IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
298 203         825 %IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
299 203         875 %THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
300 203         1115 $VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
301 203         349114 $TERMINATOR = '*';
302             }
303              
304             sub new {
305 505     505 1 1741 my($class,@args) = @_;
306 505         1116 my $self = $class->SUPER::new(@args);
307              
308 505         1383 my($id) =
309             $self->_rearrange([qw(ID
310             )],
311             @args);
312              
313 505 100       995 $id = 1 if ( ! $id );
314 505 50       1189 $id && $self->id($id);
315 505         830 return $self; # success - we hope!
316             }
317              
318             =head2 id
319              
320             Title : id
321             Usage : $obj->id(3); $id_integer = $obj->id();
322             Function: Sets or returns the id of the translation table. IDs are
323             integers from 0 (special ATG-only start) to 25, excluding
324             7-8 and 17-20 which have been removed. If an invalid ID is
325             given the method returns 1, the standard table.
326             Example :
327             Returns : value of id, a scalar, warn and fall back to 1 (standard table)
328             if specified id is not valid
329             Args : newvalue (optional)
330              
331             =cut
332              
333             sub id{
334 25860     25860 1 20518 my ($self,$value) = @_;
335 25860 100       28612 if( defined $value) {
336 644 100 66     2176 if ( not defined $TABLES[$value] or $TABLES[$value] eq '') {
337 1         11 $self->warn("Not a valid codon table ID [$value], using [1] instead ");
338 1         1 $value = 1;
339             }
340 644         775 $self->{'id'} = $value;
341             }
342 25860         23564 return $self->{'id'};
343             }
344              
345             =head2 name
346              
347             Title : name
348             Usage : $obj->name()
349             Function: returns the descriptive name of the translation table
350             Example :
351             Returns : A string
352             Args : None
353              
354              
355             =cut
356              
357             sub name{
358 1     1 1 2 my ($self) = @_;
359              
360 1         3 my ($id) = $self->{'id'};
361 1         5 return $NAMES[$id];
362             }
363              
364             =head2 tables
365              
366             Title : tables
367             Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
368             Function: returns a hash reference where each key is a valid codon
369             table id() number, and each value is the corresponding
370             codon table name() string
371             Example :
372             Returns : A hashref
373             Args : None
374              
375              
376             =cut
377              
378             sub tables{
379 2     2 1 901 my %tables;
380 2         6 for my $id (0 .. $#NAMES) {
381 52         33 my $name = $NAMES[$id];
382 52 100       80 $tables{$id} = $name if $name;
383             }
384 2         5 return \%tables;
385             }
386              
387             =head2 translate
388              
389             Title : translate
390             Usage : $obj->translate('YTR')
391             Function: Returns a string of one letter amino acid codes from
392             nucleotide sequence input. The imput can be of any length.
393              
394             Returns 'X' for unknown codons and codons that code for
395             more than one amino acid. Returns an empty string if input
396             is not three characters long. Exceptions for these are:
397              
398             - IUPAC amino acid code B for Aspartic Acid and
399             Asparagine, is used.
400             - IUPAC amino acid code Z for Glutamic Acid, Glutamine is
401             used.
402             - if the codon is two nucleotides long and if by adding
403             an a third character 'N', it codes for a single amino
404             acid (with exceptions above), return that, otherwise
405             return empty string.
406              
407             Returns empty string for other input strings that are not
408             three characters long.
409              
410             Example :
411             Returns : a string of one letter ambiguous IUPAC amino acid codes
412             Args : ambiguous IUPAC nucleotide string
413              
414              
415             =cut
416              
417             sub translate {
418 19902     19902 1 19093 my ($self, $seq, $complete_codon) = @_;
419 19902 100       24493 $self->throw("Calling translate without a seq argument!") unless defined $seq;
420 19901 100       21940 return '' unless $seq;
421              
422 19900         20329 my $id = $self->id;
423 19900         14725 my ($partial) = 0;
424 19900 100       24904 $partial = 2 if length($seq) % CODONSIZE == 2;
425            
426 19900         15812 $seq = lc $seq;
427 19900         14578 $seq =~ tr/u/t/;
428 19900         12907 my $protein = "";
429 19900 100       33788 if ($seq =~ /[^actg]/ ) { #ambiguous chars
430 5315         8112 for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
431 17779         13692 my $triplet = substr($seq, $i, CODONSIZE);
432 17779 100       22938 if( $triplet eq $CODONGAP ) {
    100          
433 1527         2032 $protein .= $GAP;
434             } elsif (exists $CODONS->{$triplet}) {
435             $protein .= substr($TABLES[$id],
436 10946         15787 $CODONS->{$triplet},1);
437             } else {
438 5306         6004 $protein .= $self->_translate_ambiguous_codon($triplet);
439             }
440             }
441             } else { # simple, strict translation
442 14585         22212 for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
443 124418         85478 my $triplet = substr($seq, $i, CODONSIZE);
444 124418 50       128886 if( $triplet eq $CODONGAP ) {
445 0         0 $protein .= $GAP;
446             }
447 124418 50       112170 if (exists $CODONS->{$triplet}) {
448 124418         184904 $protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
449             } else {
450 0         0 $protein .= 'X';
451             }
452             }
453             }
454 19900 100 100     27817 if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
455 7         14 my $triplet = substr($seq, ($partial -4)). "n";
456 7 50       22 if( $triplet eq $CODONGAP ) {
    50          
457 0         0 $protein .= $GAP;
458             } elsif (exists $CODONS->{$triplet}) {
459 0         0 my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
460 0         0 $protein .= $aa;
461             } else {
462 7         13 $protein .= $self->_translate_ambiguous_codon($triplet, $partial);
463             }
464             }
465 19900         25904 return $protein;
466             }
467              
468             sub _translate_ambiguous_codon {
469 5313     5313   4089 my ($self, $triplet, $partial) = @_;
470 5313   100     11966 $partial ||= 0;
471 5313         5102 my $id = $self->id;
472 5313         3777 my $aa;
473 5313         5899 my @codons = $self->unambiguous_codons($triplet);
474 5313         4442 my %aas =();
475 5313         4337 foreach my $codon (@codons) {
476 401         514 $aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
477             }
478 5313         4894 my $count = scalar keys %aas;
479 5313 100       7334 if ( $count == 1 ) {
    100          
480 141         170 $aa = (keys %aas)[0];
481             }
482             elsif ( $count == 2 ) {
483 13 100 66     53 if ($aas{'D'} and $aas{'N'}) {
    100 66        
484 3         3 $aa = 'B';
485             }
486             elsif ($aas{'E'} and $aas{'Q'}) {
487 4         5 $aa = 'Z';
488             } else {
489 6 100       11 $partial ? ($aa = '') : ($aa = 'X');
490             }
491             } else {
492 5159 100       6182 $partial ? ($aa = '') : ($aa = 'X');
493             }
494 5313         12328 return $aa;
495             }
496              
497             =head2 translate_strict
498              
499             Title : translate_strict
500             Usage : $obj->translate_strict('ACT')
501             Function: returns one letter amino acid code for a codon input
502              
503             Fast and simple translation. User is responsible to resolve
504             ambiguous nucleotide codes before calling this
505             method. Returns 'X' for unknown codons and an empty string
506             for input strings that are not three characters long.
507              
508             It is not recommended to use this method in a production
509             environment. Use method translate, instead.
510              
511             Example :
512             Returns : A string
513             Args : a codon = a three nucleotide character string
514              
515              
516             =cut
517              
518             sub translate_strict{
519 3     3 1 7 my ($self, $value) = @_;
520 3         5 my $id = $self->{'id'};
521              
522 3         6 $value = lc $value;
523 3         3 $value =~ tr/u/t/;
524              
525 3 50       10 return '' unless length $value == 3;
526              
527 3 50       10 return 'X' unless defined $CODONS->{$value};
528              
529 3         13 return substr( $TABLES[$id], $CODONS->{$value}, 1 );
530             }
531              
532             =head2 revtranslate
533              
534             Title : revtranslate
535             Usage : $obj->revtranslate('G')
536             Function: returns codons for an amino acid
537              
538             Returns an empty string for unknown amino acid
539             codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
540             Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
541             letter amino acid codes are accepted. '*' and 'Ter' are
542             used for terminator.
543              
544             By default, the output codons are shown in DNA. If the
545             output is needed in RNA (tr/t/u/), add a second argument
546             'RNA'.
547              
548             Example : $obj->revtranslate('Gly', 'RNA')
549             Returns : An array of three lower case letter strings i.e. codons
550             Args : amino acid, 'RNA'
551              
552             =cut
553              
554             sub revtranslate {
555 135     135 1 1481 my ($self, $value, $coding) = @_;
556 135         88 my @codons;
557              
558 135 100       200 if (length($value) == 3 ) {
559 5         6 $value = lc $value;
560 5         24 $value = ucfirst $value;
561 5         14 $value = $THREELETTERSYMBOLS{$value};
562             }
563 135 100 100     811 if ( defined $value and $value =~ /$VALID_PROTEIN/
      66        
564             and length($value) == 1
565             ) {
566 133         117 my $id = $self->{'id'};
567              
568 133         131 $value = uc $value;
569 133         87 my @aas = @{$IUPAC_AA{$value}};
  133         245  
570 133         186 foreach my $aa (@aas) {
571             #print $aa, " -2\n";
572 145 100       190 $aa = '\*' if $aa eq '*';
573 145         1074 while ($TABLES[$id] =~ m/$aa/g) {
574 399         315 my $p = pos $TABLES[$id];
575 399         1090 push (@codons, $TRCOL->{--$p});
576             }
577             }
578             }
579              
580 135 100 66     246 if ($coding and uc ($coding) eq 'RNA') {
581 1         5 for my $i (0..$#codons) {
582 1         5 $codons[$i] =~ tr/t/u/;
583             }
584             }
585              
586 135         357 return @codons;
587             }
588              
589             =head2 reverse_translate_all
590              
591             Title : reverse_translate_all
592             Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
593             my $iup_str = $cttable->reverse_translate_all($seq_object,
594             $cutable,
595             15);
596             Function: reverse translates a protein sequence into IUPAC nucleotide
597             sequence. An 'X' in the protein sequence is converted to 'NNN'
598             in the nucleotide sequence.
599             Returns : a string
600             Args : a Bio::PrimarySeqI compatible object (mandatory)
601             a Bio::CodonUsage::Table object and a threshold if only
602             codons with a relative frequency above the threshold are
603             to be considered.
604             =cut
605              
606             sub reverse_translate_all {
607 22     22 1 29 my ($self, $obj, $cut, $threshold) = @_;
608              
609             ## check args are OK
610              
611 22 50 33     114 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
612 0         0 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
613             ref($obj) . "]");
614             }
615 22 50       49 if($obj->alphabet ne 'protein') {
616 0         0 $self->throw("Cannot reverse translate, need an amino acid sequence .".
617             "This sequence is of type [" . $obj->alphabet ."]");
618             }
619 22         23 my @data;
620 22         46 my @seq = split '', $obj->seq;
621              
622             ## if we're not supplying a codon usage table...
623 22 100 66     85 if( !$cut && !$threshold) {
624             ## get lists of possible codons for each aa.
625 21         28 for my $aa (@seq) {
626 75 100       138 if ($aa =~ /x/i) {
627 7         13 push @data, (['NNN']);
628             }else {
629 68         105 my @cods = $self->revtranslate($aa);
630 68         112 push @data, \@cods;
631             }
632             }
633             }else{
634             #else we are supplying a codon usage table, we just want common codons
635             #check args first.
636 1 50       5 if(!$cut->isa('Bio::CodonUsage::Table')) {
637 0         0 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
638             ref($cut). "].");
639             }
640 1         4 my $cod_ref = $cut->probable_codons($threshold);
641 1         2 for my $aa (@seq) {
642 21 100       29 if ($aa =~ /x/i) {
643 1         2 push @data, (['NNN']);
644 1         4 next;
645             }
646 20         15 push @data, $cod_ref->{$aa};
647             }
648             }
649              
650 22         58 return $self->_make_iupac_string(\@data);
651             }
652              
653             =head2 reverse_translate_best
654              
655             Title : reverse_translate_best
656             Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
657             Function: Reverse translates a protein sequence into plain nucleotide
658             sequence (GATC), uses the most common codon for each amino acid
659             Returns : A string
660             Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
661              
662             =cut
663              
664             sub reverse_translate_best {
665              
666 1     1 1 3 my ($self, $obj, $cut) = @_;
667              
668 1 50 33     9 if (!$obj || !$obj->isa('Bio::PrimarySeqI')){
669 0         0 $self->throw(" I need a Bio::PrimarySeqI object, not a [".
670             ref($obj) . "]");
671             }
672 1 50       5 if ($obj->alphabet ne 'protein') {
673 0         0 $self->throw("Cannot reverse translate, need an amino acid sequence .".
674             "This sequence is of type [" . $obj->alphabet ."]");
675             }
676 1 50       13 if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) {
677 0         0 $self->throw("I need a Bio::CodonUsage::Table object, not a [".
678             ref($cut). "].");
679             }
680              
681 1         2 my $str = '';
682 1         4 my @seq = split '', $obj->seq;
683              
684 1         4 my $cod_ref = $cut->most_common_codons();
685              
686 1         2 for my $aa ( @seq ) {
687 21 100       35 if ($aa =~ /x/i) {
688 1         2 $str .= 'NNN';
689 1         2 next;
690             }
691 20 50       22 if ( defined $cod_ref->{$aa} ) {
692 20         19 $str .= $cod_ref->{$aa};
693             } else {
694 0         0 $self->throw("Input sequence contains invalid character: $aa");
695             }
696             }
697 1         8 return $str;
698             }
699              
700             =head2 is_start_codon
701              
702             Title : is_start_codon
703             Usage : $obj->is_start_codon('ATG')
704             Function: returns true (1) for all codons that can be used as a
705             translation start, false (0) for others.
706             Example : $myCodonTable->is_start_codon('ATG')
707             Returns : boolean
708             Args : codon
709              
710             =cut
711              
712             sub is_start_codon{
713 718     718 1 763 shift->_codon_is( shift, \@STARTS, 'M' );
714             }
715              
716             =head2 is_ter_codon
717              
718             Title : is_ter_codon
719             Usage : $obj->is_ter_codon('GAA')
720             Function: returns true (1) for all codons that can be used as a
721             translation tarminator, false (0) for others.
722             Example : $myCodonTable->is_ter_codon('ATG')
723             Returns : boolean
724             Args : codon
725              
726             =cut
727              
728             sub is_ter_codon{
729 613     613 1 460 my ($self, $value) = @_;
730 613         455 my $id = $self->{'id'};
731              
732             # We need to ensure U is mapped to T (ie. UAG)
733 613         437 $value = uc $value;
734 613         435 $value =~ tr/U/T/;
735              
736 613 100       625 if (length $value != 3 ) {
737             # Incomplete codons are not stop codons
738 1         4 return 0;
739             } else {
740 612         397 my $result = 0;
741              
742             # For all the possible codons, if any are not a stop
743             # codon, fail immediately
744 612         571 for my $c ( $self->unambiguous_codons($value) ) {
745 615         666 my $m = substr( $TABLES[$id], $CODONS->{$c}, 1 );
746 615 100       602 if($m eq $TERMINATOR) {
747 46         53 $result = 1;
748             } else {
749 569         2461 return 0;
750             }
751             }
752 43         97 return $result;
753             }
754             }
755              
756             # desc: compares the passed value with a single entry in the given
757             # codon table
758             # args: a value (typically a three-char string like 'atg'),
759             # a reference to the appropriate set of codon tables,
760             # a single-character value to check for at the position in the
761             # given codon table
762             # ret: boolean, true if the given codon table contains the $key at the
763             # position corresponding to $value
764             sub _codon_is {
765 718     718   604 my ($self, $value, $table, $key ) = @_;
766              
767 718 50       840 return 0 unless length $value == 3;
768              
769 718         508 $value = lc $value;
770 718         444 $value =~ tr/u/t/;
771              
772 718         530 my $id = $self->{'id'};
773 718         666 for my $c ( $self->unambiguous_codons($value) ) {
774 720         737 my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
775 720 100       1014 if ($m eq $key) { return 1; }
  55         129  
776             }
777 663         1723 return 0;
778             }
779              
780             =head2 is_unknown_codon
781              
782             Title : is_unknown_codon
783             Usage : $obj->is_unknown_codon('GAJ')
784             Function: returns false (0) for all codons that are valid,
785             true (1) for others.
786             Example : $myCodonTable->is_unknown_codon('NTG')
787             Returns : boolean
788             Args : codon
789              
790              
791             =cut
792              
793             sub is_unknown_codon{
794 3     3 1 4 my ($self, $value) = @_;
795 3         5 $value = lc $value;
796 3         4 $value =~ tr/u/t/;
797 3 100       5 return 1 unless $self->unambiguous_codons($value);
798 1         3 return 0;
799             }
800              
801             =head2 unambiguous_codons
802              
803             Title : unambiguous_codons
804             Usage : @codons = $self->unambiguous_codons('ACN')
805             Returns : array of strings (one-letter unambiguous amino acid codes)
806             Args : a codon = a three IUPAC nucleotide character string
807              
808             =cut
809              
810             sub unambiguous_codons{
811 6646     6646 1 4954 my ($self,$value) = @_;
812 6646         10752 my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
  19937         20971  
813              
814 6646         5563 my @codons;
815 6646         4651 for my $i ( @{$nts[0]} ) {
  6646         9231  
816 1658         978 for my $j ( @{$nts[1]} ) {
  1658         1337  
817 1633         985 for my $k ( @{$nts[2]} ) {
  1633         1378  
818 1804         2763 push @codons, lc "$i$j$k";
819             }}}
820 6646         7669 return @codons;
821             }
822              
823             =head2 _unambiquous_codons
824              
825             deprecated, now an alias for unambiguous_codons
826              
827             =cut
828              
829             sub _unambiquous_codons {
830 0     0   0 unambiguous_codons( undef, @_ );
831             }
832              
833             =head2 add_table
834              
835             Title : add_table
836             Usage : $newid = $ct->add_table($name, $table, $starts)
837             Function: Add a custom Codon Table into the object.
838             Know what you are doing, only the length of
839             the argument strings is checked!
840             Returns : the id of the new codon table
841             Args : name, a string, optional (can be empty)
842             table, a string of 64 characters
843             startcodons, a string of 64 characters, defaults to standard
844              
845             =cut
846              
847             sub add_table {
848 1     1 1 2 my ($self, $name, $table, $starts) = @_;
849              
850 1   33     4 $name ||= 'Custom' . $#NAMES + 1;
851 1   33     7 $starts ||= $STARTS[1];
852 1 50 33     6 $self->throw('Suspect input!')
853             unless length($table) == 64 and length($starts) == 64;
854              
855 1         3 push @NAMES, $name;
856 1         2 push @TABLES, $table;
857 1         2 push @STARTS, $starts;
858              
859 1         5 return $#NAMES;
860             }
861              
862             sub _make_iupac_string {
863 22     22   26 my ($self, $cod_ref) = @_;
864 22 50       50 if(ref($cod_ref) ne 'ARRAY') {
865 0         0 $self->throw(" I need a reference to a list of references to codons, ".
866             " not a [". ref($cod_ref) . "].");
867             }
868 22         82 my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub();
869 22         46 my $iupac_string = ''; ## the string to be returned
870 22         43 for my $aa (@$cod_ref) {
871              
872             ## scan through codon positions, record the differing values,
873             # then look up in the iub hash
874 96         104 for my $index(0..2) {
875 288         176 my %h;
876 288         211 map { my $k = substr($_,$index,1);
  846         591  
877 846         745 $h{$k} = undef;} @$aa;
878 288         665 my $lookup_key = join '', sort{$a cmp $b}keys %h;
  213         222  
879              
880             ## extend string
881 288         460 $iupac_string .= $iupac_hash{uc$lookup_key};
882             }
883             }
884 22         177 return $iupac_string;
885             }
886              
887              
888             1;