File Coverage

Bio/Tools/CodonTable.pm

Criterion	Covered	Total	%
statement	225	239	94.1
branch	75	92	81.5
condition	25	41	60.9
subroutine	24	25	96.0
pod	14	14	100.0
total	363	411	88.3

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# bioperl module for Bio::Tools::CodonTable
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Heikki Lehvaslaiho
7							#
8							# Copyright Heikki Lehvaslaiho
9							#
10							# You may distribute this module under the same terms as perl itself
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::Tools::CodonTable - Codon table object
17
18							=head1 SYNOPSIS
19
20							# This is a read-only class for all known codon tables. The IDs are
21							# the ones used by nucleotide sequence databases. All common IUPAC
22							# ambiguity codes for DNA, RNA and amino acids are recognized.
23
24							use Bio::Tools::CodonTable;
25
26							# defaults to ID 1 "Standard"
27							$myCodonTable = Bio::Tools::CodonTable->new();
28							$myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 );
29
30							# change codon table
31							$myCodonTable->id(5);
32
33							# examine codon table
34							print join (' ', "The name of the codon table no.", $myCodonTable->id(4),
35							"is:", $myCodonTable->name(), "\n");
36
37							# print possible codon tables
38							$tables = Bio::Tools::CodonTable->tables;
39							while ( ($id,$name) = each %{$tables} ) {
40							print "$id = $name\n";
41							}
42
43							# translate a codon
44							$aa = $myCodonTable->translate('ACU');
45							$aa = $myCodonTable->translate('act');
46							$aa = $myCodonTable->translate('ytr');
47
48							# reverse translate an amino acid
49							@codons = $myCodonTable->revtranslate('A');
50							@codons = $myCodonTable->revtranslate('Ser');
51							@codons = $myCodonTable->revtranslate('Glx');
52							@codons = $myCodonTable->revtranslate('cYS', 'rna');
53
54							# reverse translate an entire amino acid sequence into a IUPAC
55							# nucleotide string
56
57							my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL');
58							my $iupac_str = $myCodonTable->reverse_translate_all($seqobj);
59
60							# boolean tests
61							print "Is a start\n" if $myCodonTable->is_start_codon('ATG');
62							print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar');
63							print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG');
64
65							=head1 DESCRIPTION
66
67							Codon tables are also called translation tables or genetic codes
68							since that is what they represent. A bit more complete picture
69							of the full complexity of codon usage in various taxonomic groups
70							is presented at the NCBI Genetic Codes Home page.
71
72							CodonTable is a BioPerl class that knows all current translation
73							tables that are used by primary nucleotide sequence databases
74							(GenBank, EMBL and DDBJ). It provides methods to output information
75							about tables and relationships between codons and amino acids.
76
77							This class and its methods recognized all common IUPAC ambiguity codes
78							for DNA, RNA and animo acids. The translation method follows the
79							conventions in EMBL and TREMBL databases.
80
81							It is a nuisance to separate RNA and cDNA representations of nucleic
82							acid transcripts. The CodonTable object accepts codons of both type as
83							input and allows the user to set the mode for output when reverse
84							translating. Its default for output is DNA.
85
86							Note:
87
88							This class deals primarily with individual codons and amino
89							acids. However in the interest of speed you can L
90							longer sequence, too. The full complexity of protein translation
91							is tackled by L.
92
93
94							The amino acid codes are IUPAC recommendations for common amino acids:
95
96							A Ala Alanine
97							R Arg Arginine
98							N Asn Asparagine
99							D Asp Aspartic acid
100							C Cys Cysteine
101							Q Gln Glutamine
102							E Glu Glutamic acid
103							G Gly Glycine
104							H His Histidine
105							I Ile Isoleucine
106							L Leu Leucine
107							K Lys Lysine
108							M Met Methionine
109							F Phe Phenylalanine
110							P Pro Proline
111							O Pyl Pyrrolysine (22nd amino acid)
112							U Sec Selenocysteine (21st amino acid)
113							S Ser Serine
114							T Thr Threonine
115							W Trp Tryptophan
116							Y Tyr Tyrosine
117							V Val Valine
118							B Asx Aspartic acid or Asparagine
119							Z Glx Glutamine or Glutamic acid
120							J Xle Isoleucine or Valine (mass spec ambiguity)
121							X Xaa Any or unknown amino acid
122
123
124							It is worth noting that, "Bacterial" codon table no. 11 produces an
125							polypeptide that is, confusingly, identical to the standard one. The
126							only differences are in available initiator codons.
127
128
129							NCBI Genetic Codes home page:
130							(Last update of the Genetic Codes: April 30, 2013)
131							https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c
132
133							ASN.1 version with ids 1 to 25 is at:
134							ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt
135
136							Thanks to Matteo diTomasso for the original Perl implementation
137							of these tables.
138
139							=head1 FEEDBACK
140
141							=head2 Mailing Lists
142
143							User feedback is an integral part of the evolution of this and other
144							Bioperl modules. Send your comments and suggestions preferably to the
145							Bioperl mailing lists Your participation is much appreciated.
146
147							bioperl-l@bioperl.org - General discussion
148							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
149
150							=head2 Support
151
152							Please direct usage questions or support issues to the mailing list:
153
154							I
155
156							rather than to the module maintainer directly. Many experienced and
157							reponsive experts will be able look at the problem and quickly
158							address it. Please include a thorough description of the problem
159							with code and data examples if at all possible.
160
161							=head2 Reporting Bugs
162
163							Report bugs to the Bioperl bug tracking system to help us keep track
164							the bugs and their resolution. Bug reports can be submitted via the
165							web:
166
167							https://github.com/bioperl/bioperl-live/issues
168
169							=head1 AUTHOR - Heikki Lehvaslaiho
170
171							Email: heikki-at-bioperl-dot-org
172
173							=head1 APPENDIX
174
175							The rest of the documentation details each of the object
176							methods. Internal methods are usually preceded with a _
177
178							=cut
179
180							# Let the code begin...
181
182							package Bio::Tools::CodonTable;
183	203					17496	use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP
184	203			203		1786	%IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR);
	203					246
185	203			203		693	use strict;
	203					238
	203					3582
186
187							# Object preamble - inherits from Bio::Root::Root
188	203			203		60256	use Bio::Tools::IUPAC;
	203					320
	203					4615
189	203			203		89367	use Bio::SeqUtils;
	203					357
	203					5769
190
191	203			203		1097	use base qw(Bio::Root::Root);
	203					213
	203					14206
192
193
194							# first set internal values for all translation tables
195
196							BEGIN {
197	203			203		858	use constant CODONSIZE => 3;
	203					244
	203					54678
198	203			203		415	$GAP = '-';
199	203					525	$CODONGAP = $GAP x CODONSIZE;
200
201	203					887	@NAMES = #id
202							(
203							'Strict', #0, special option for ATG-only start
204							'Standard', #1
205							'Vertebrate Mitochondrial',#2
206							'Yeast Mitochondrial',# 3
207							'Mold, Protozoan, and Coelenterate Mitochondrial and Mycoplasma/Spiroplasma',#4
208							'Invertebrate Mitochondrial',#5
209							'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
210							'', '',
211							'Echinoderm and Flatworm Mitochondrial',#9
212							'Euplotid Nuclear',#10
213							'Bacterial, Archaeal and Plant Plastid',# 11
214							'Alternative Yeast Nuclear',# 12
215							'Ascidian Mitochondrial',# 13
216							'Alternative Flatworm Mitochondrial',# 14
217							'Blepharisma Nuclear',# 15
218							'Chlorophycean Mitochondrial',# 16
219							'', '', '', '',
220							'Trematode Mitochondrial',# 21
221							'Scenedesmus obliquus Mitochondrial', #22
222							'Thraustochytrium Mitochondrial', #23
223							'Pterobranchia Mitochondrial', #24
224							'Candidate Division SR1 and Gracilibacteria', #25
225							);
226
227	203					764	@TABLES =
228							qw(
229							FFLLSSSSYY*CCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
230							FFLLSSSSYY*CCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
231							FFLLSSSSYYCCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSVVVVAAAADDEEGGGG
232							FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
233							FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
234							FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
235							FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
236							'' ''
237							FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
238							FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
239							FFLLSSSSYY*CCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
240							FFLLSSSSYY*CCWLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
241							FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
242							FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
243							FFLLSSSSYYQCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
244							FFLLSSSSYYLCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
245							'' '' '' ''
246							FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
247							FFLLSSSYYLCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
248							FFLSSSSYYCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
249							FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG
250							FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
251							);
252
253							# (bases used for these tables, for reference)
254							# 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
255							# 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
256							# 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG
257
258	203					785	@STARTS =
259							qw(
260							-----------------------------------M----------------------------
261							---M---------------M---------------M----------------------------
262							--------------------------------MMMM---------------M------------
263							----------------------------------MM----------------------------
264							--MM---------------M------------MMMM---------------M------------
265							---M----------------------------MMMM---------------M------------
266							-----------------------------------M----------------------------
267							'' ''
268							-----------------------------------M---------------M------------
269							-----------------------------------M----------------------------
270							---M---------------M------------MMMM---------------M------------
271							-------------------M---------------M----------------------------
272							---M------------------------------MM---------------M------------
273							-----------------------------------M----------------------------
274							-----------------------------------M----------------------------
275							-----------------------------------M----------------------------
276							'' '' '' ''
277							-----------------------------------M---------------M------------
278							-----------------------------------M----------------------------
279							--------------------------------M--M---------------M------------
280							---M---------------M---------------M---------------M------------
281							---M-------------------------------M---------------M------------
282							);
283
284	203					382	my @nucs = qw(t c a g);
285	203					274	my $x = 0;
286	203					441	($CODONS, $TRCOL) = ({}, {});
287	203					436	for my $i (@nucs) {
288	812					697	for my $j (@nucs) {
289	3248					2354	for my $k (@nucs) {
290	12992					9340	my $codon = "$i$j$k";
291	12992					15171	$CODONS->{$codon} = $x;
292	12992					15295	$TRCOL->{$x} = $codon;
293	12992					9666	$x++;
294							}
295							}
296							}
297	203					1213	%IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub();
298	203					793	%IUPAC_AA = Bio::Tools::IUPAC->iupac_iup();
299	203					870	%THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2);
300	203					1100	$VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']';
301	203					349993	$TERMINATOR = '*';
302							}
303
304							sub new {
305	505			505	1	1569	my($class,@args) = @_;
306	505					1147	my $self = $class->SUPER::new(@args);
307
308	505					1309	my($id) =
309							$self->_rearrange([qw(ID
310							)],
311							@args);
312
313	505	100				1057	$id = 1 if ( ! $id );
314	505	50				1161	$id && $self->id($id);
315	505					814	return $self; # success - we hope!
316							}
317
318							=head2 id
319
320							Title : id
321							Usage : $obj->id(3); $id_integer = $obj->id();
322							Function: Sets or returns the id of the translation table. IDs are
323							integers from 0 (special ATG-only start) to 25, excluding
324							7-8 and 17-20 which have been removed. If an invalid ID is
325							given the method returns 1, the standard table.
326							Example :
327							Returns : value of id, a scalar, warn and fall back to 1 (standard table)
328							if specified id is not valid
329							Args : newvalue (optional)
330
331							=cut
332
333							sub id{
334	25860			25860	1	19358	my ($self,$value) = @_;
335	25860	100				29756	if( defined $value) {
336	644	100	66			2230	if ( not defined $TABLES[$value] or $TABLES[$value] eq '') {
337	1					9	$self->warn("Not a valid codon table ID [$value], using [1] instead ");
338	1					2	$value = 1;
339							}
340	644					764	$self->{'id'} = $value;
341							}
342	25860					22613	return $self->{'id'};
343							}
344
345							=head2 name
346
347							Title : name
348							Usage : $obj->name()
349							Function: returns the descriptive name of the translation table
350							Example :
351							Returns : A string
352							Args : None
353
354
355							=cut
356
357							sub name{
358	1			1	1	2	my ($self) = @_;
359
360	1					1	my ($id) = $self->{'id'};
361	1					4	return $NAMES[$id];
362							}
363
364							=head2 tables
365
366							Title : tables
367							Usage : $obj->tables() or Bio::Tools::CodonTable->tables()
368							Function: returns a hash reference where each key is a valid codon
369							table id() number, and each value is the corresponding
370							codon table name() string
371							Example :
372							Returns : A hashref
373							Args : None
374
375
376							=cut
377
378							sub tables{
379	2			2	1	791	my %tables;
380	2					5	for my $id (0 .. $#NAMES) {
381	52					28	my $name = $NAMES[$id];
382	52	100				79	$tables{$id} = $name if $name;
383							}
384	2					4	return \%tables;
385							}
386
387							=head2 translate
388
389							Title : translate
390							Usage : $obj->translate('YTR')
391							Function: Returns a string of one letter amino acid codes from
392							nucleotide sequence input. The imput can be of any length.
393
394							Returns 'X' for unknown codons and codons that code for
395							more than one amino acid. Returns an empty string if input
396							is not three characters long. Exceptions for these are:
397
398							- IUPAC amino acid code B for Aspartic Acid and
399							Asparagine, is used.
400							- IUPAC amino acid code Z for Glutamic Acid, Glutamine is
401							used.
402							- if the codon is two nucleotides long and if by adding
403							an a third character 'N', it codes for a single amino
404							acid (with exceptions above), return that, otherwise
405							return empty string.
406
407							Returns empty string for other input strings that are not
408							three characters long.
409
410							Example :
411							Returns : a string of one letter ambiguous IUPAC amino acid codes
412							Args : ambiguous IUPAC nucleotide string
413
414
415							=cut
416
417							sub translate {
418	19902			19902	1	19535	my ($self, $seq, $complete_codon) = @_;
419	19902	100				22907	$self->throw("Calling translate without a seq argument!") unless defined $seq;
420	19901	100				20940	return '' unless $seq;
421
422	19900					21213	my $id = $self->id;
423	19900					13972	my ($partial) = 0;
424	19900	100				24766	$partial = 2 if length($seq) % CODONSIZE == 2;
425
426	19900					15896	$seq = lc $seq;
427	19900					14088	$seq =~ tr/u/t/;
428	19900					12778	my $protein = "";
429	19900	100				34675	if ($seq =~ /[^actg]/ ) { #ambiguous chars
430	5315					8431	for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
431	17779					13041	my $triplet = substr($seq, $i, CODONSIZE);
432	17779	100				22028	if( $triplet eq $CODONGAP ) {
		100
433	1527					1896	$protein .= $GAP;
434							} elsif (exists $CODONS->{$triplet}) {
435							$protein .= substr($TABLES[$id],
436	10946					15656	$CODONS->{$triplet},1);
437							} else {
438	5306					6328	$protein .= $self->_translate_ambiguous_codon($triplet);
439							}
440							}
441							} else { # simple, strict translation
442	14585					22708	for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
443	124418					83691	my $triplet = substr($seq, $i, CODONSIZE);
444	124418	50				128067	if( $triplet eq $CODONGAP ) {
445	0					0	$protein .= $GAP;
446							}
447	124418	50				111405	if (exists $CODONS->{$triplet}) {
448	124418					185054	$protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1);
449							} else {
450	0					0	$protein .= 'X';
451							}
452							}
453							}
454	19900	100	100			28032	if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides
455	7					13	my $triplet = substr($seq, ($partial -4)). "n";
456	7	50				20	if( $triplet eq $CODONGAP ) {
		50
457	0					0	$protein .= $GAP;
458							} elsif (exists $CODONS->{$triplet}) {
459	0					0	my $aa = substr($TABLES[$id], $CODONS->{$triplet},1);
460	0					0	$protein .= $aa;
461							} else {
462	7					10	$protein .= $self->_translate_ambiguous_codon($triplet, $partial);
463							}
464							}
465	19900					26973	return $protein;
466							}
467
468							sub _translate_ambiguous_codon {
469	5313			5313		4487	my ($self, $triplet, $partial) = @_;
470	5313		100			11948	$partial \|\|= 0;
471	5313					5067	my $id = $self->id;
472	5313					3826	my $aa;
473	5313					5713	my @codons = $self->unambiguous_codons($triplet);
474	5313					4721	my %aas =();
475	5313					4446	foreach my $codon (@codons) {
476	401					478	$aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1;
477							}
478	5313					4378	my $count = scalar keys %aas;
479	5313	100				8187	if ( $count == 1 ) {
		100
480	141					148	$aa = (keys %aas)[0];
481							}
482							elsif ( $count == 2 ) {
483	13	100	66			36	if ($aas{'D'} and $aas{'N'}) {
		100	66
484	3					4	$aa = 'B';
485							}
486							elsif ($aas{'E'} and $aas{'Q'}) {
487	4					3	$aa = 'Z';
488							} else {
489	6	100				10	$partial ? ($aa = '') : ($aa = 'X');
490							}
491							} else {
492	5159	100				5576	$partial ? ($aa = '') : ($aa = 'X');
493							}
494	5313					12982	return $aa;
495							}
496
497							=head2 translate_strict
498
499							Title : translate_strict
500							Usage : $obj->translate_strict('ACT')
501							Function: returns one letter amino acid code for a codon input
502
503							Fast and simple translation. User is responsible to resolve
504							ambiguous nucleotide codes before calling this
505							method. Returns 'X' for unknown codons and an empty string
506							for input strings that are not three characters long.
507
508							It is not recommended to use this method in a production
509							environment. Use method translate, instead.
510
511							Example :
512							Returns : A string
513							Args : a codon = a three nucleotide character string
514
515
516							=cut
517
518							sub translate_strict{
519	3			3	1	5	my ($self, $value) = @_;
520	3					4	my $id = $self->{'id'};
521
522	3					4	$value = lc $value;
523	3					3	$value =~ tr/u/t/;
524
525	3	50				7	return '' unless length $value == 3;
526
527	3	50				7	return 'X' unless defined $CODONS->{$value};
528
529	3					10	return substr( $TABLES[$id], $CODONS->{$value}, 1 );
530							}
531
532							=head2 revtranslate
533
534							Title : revtranslate
535							Usage : $obj->revtranslate('G')
536							Function: returns codons for an amino acid
537
538							Returns an empty string for unknown amino acid
539							codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and
540							Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three
541							letter amino acid codes are accepted. '*' and 'Ter' are
542							used for terminator.
543
544							By default, the output codons are shown in DNA. If the
545							output is needed in RNA (tr/t/u/), add a second argument
546							'RNA'.
547
548							Example : $obj->revtranslate('Gly', 'RNA')
549							Returns : An array of three lower case letter strings i.e. codons
550							Args : amino acid, 'RNA'
551
552							=cut
553
554							sub revtranslate {
555	135			135	1	842	my ($self, $value, $coding) = @_;
556	135					81	my @codons;
557
558	135	100				186	if (length($value) == 3 ) {
559	5					4	$value = lc $value;
560	5					7	$value = ucfirst $value;
561	5					8	$value = $THREELETTERSYMBOLS{$value};
562							}
563	135	100	100			766	if ( defined $value and $value =~ /$VALID_PROTEIN/
			66
564							and length($value) == 1
565							) {
566	133					131	my $id = $self->{'id'};
567
568	133					116	$value = uc $value;
569	133					78	my @aas = @{$IUPAC_AA{$value}};
	133					198
570	133					130	foreach my $aa (@aas) {
571							#print $aa, " -2\n";
572	145	100				177	$aa = '\' if $aa eq '';
573	145					912	while ($TABLES[$id] =~ m/$aa/g) {
574	399					280	my $p = pos $TABLES[$id];
575	399					935	push (@codons, $TRCOL->{--$p});
576							}
577							}
578							}
579
580	135	100	66			204	if ($coding and uc ($coding) eq 'RNA') {
581	1					3	for my $i (0..$#codons) {
582	1					2	$codons[$i] =~ tr/t/u/;
583							}
584							}
585
586	135					298	return @codons;
587							}
588
589							=head2 reverse_translate_all
590
591							Title : reverse_translate_all
592							Usage : my $iup_str = $cttable->reverse_translate_all($seq_object)
593							my $iup_str = $cttable->reverse_translate_all($seq_object,
594							$cutable,
595							15);
596							Function: reverse translates a protein sequence into IUPAC nucleotide
597							sequence. An 'X' in the protein sequence is converted to 'NNN'
598							in the nucleotide sequence.
599							Returns : a string
600							Args : a Bio::PrimarySeqI compatible object (mandatory)
601							a Bio::CodonUsage::Table object and a threshold if only
602							codons with a relative frequency above the threshold are
603							to be considered.
604							=cut
605
606							sub reverse_translate_all {
607	22			22	1	26	my ($self, $obj, $cut, $threshold) = @_;
608
609							## check args are OK
610
611	22	50	33			89	if (!$obj \|\| !$obj->isa('Bio::PrimarySeqI')){
612	0					0	$self->throw(" I need a Bio::PrimarySeqI object, not a [".
613							ref($obj) . "]");
614							}
615	22	50				36	if($obj->alphabet ne 'protein') {
616	0					0	$self->throw("Cannot reverse translate, need an amino acid sequence .".
617							"This sequence is of type [" . $obj->alphabet ."]");
618							}
619	22					19	my @data;
620	22					35	my @seq = split '', $obj->seq;
621
622							## if we're not supplying a codon usage table...
623	22	100	66			77	if( !$cut && !$threshold) {
624							## get lists of possible codons for each aa.
625	21					26	for my $aa (@seq) {
626	75	100				108	if ($aa =~ /x/i) {
627	7					11	push @data, (['NNN']);
628							}else {
629	68					91	my @cods = $self->revtranslate($aa);
630	68					106	push @data, \@cods;
631							}
632							}
633							}else{
634							#else we are supplying a codon usage table, we just want common codons
635							#check args first.
636	1	50				4	if(!$cut->isa('Bio::CodonUsage::Table')) {
637	0					0	$self->throw("I need a Bio::CodonUsage::Table object, not a [".
638							ref($cut). "].");
639							}
640	1					3	my $cod_ref = $cut->probable_codons($threshold);
641	1					2	for my $aa (@seq) {
642	21	100				30	if ($aa =~ /x/i) {
643	1					2	push @data, (['NNN']);
644	1					3	next;
645							}
646	20					16	push @data, $cod_ref->{$aa};
647							}
648							}
649
650	22					38	return $self->_make_iupac_string(\@data);
651							}
652
653							=head2 reverse_translate_best
654
655							Title : reverse_translate_best
656							Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable);
657							Function: Reverse translates a protein sequence into plain nucleotide
658							sequence (GATC), uses the most common codon for each amino acid
659							Returns : A string
660							Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object
661
662							=cut
663
664							sub reverse_translate_best {
665
666	1			1	1	2	my ($self, $obj, $cut) = @_;
667
668	1	50	33			6	if (!$obj \|\| !$obj->isa('Bio::PrimarySeqI')){
669	0					0	$self->throw(" I need a Bio::PrimarySeqI object, not a [".
670							ref($obj) . "]");
671							}
672	1	50				3	if ($obj->alphabet ne 'protein') {
673	0					0	$self->throw("Cannot reverse translate, need an amino acid sequence .".
674							"This sequence is of type [" . $obj->alphabet ."]");
675							}
676	1	50				7	if ( !$cut \| !$cut->isa('Bio::CodonUsage::Table')) {
677	0					0	$self->throw("I need a Bio::CodonUsage::Table object, not a [".
678							ref($cut). "].");
679							}
680
681	1					1	my $str = '';
682	1					2	my @seq = split '', $obj->seq;
683
684	1					3	my $cod_ref = $cut->most_common_codons();
685
686	1					2	for my $aa ( @seq ) {
687	21	100				27	if ($aa =~ /x/i) {
688	1					2	$str .= 'NNN';
689	1					1	next;
690							}
691	20	50				19	if ( defined $cod_ref->{$aa} ) {
692	20					17	$str .= $cod_ref->{$aa};
693							} else {
694	0					0	$self->throw("Input sequence contains invalid character: $aa");
695							}
696							}
697	1					6	return $str;
698							}
699
700							=head2 is_start_codon
701
702							Title : is_start_codon
703							Usage : $obj->is_start_codon('ATG')
704							Function: returns true (1) for all codons that can be used as a
705							translation start, false (0) for others.
706							Example : $myCodonTable->is_start_codon('ATG')
707							Returns : boolean
708							Args : codon
709
710							=cut
711
712							sub is_start_codon{
713	718			718	1	762	shift->_codon_is( shift, \@STARTS, 'M' );
714							}
715
716							=head2 is_ter_codon
717
718							Title : is_ter_codon
719							Usage : $obj->is_ter_codon('GAA')
720							Function: returns true (1) for all codons that can be used as a
721							translation tarminator, false (0) for others.
722							Example : $myCodonTable->is_ter_codon('ATG')
723							Returns : boolean
724							Args : codon
725
726							=cut
727
728							sub is_ter_codon{
729	613			613	1	471	my ($self, $value) = @_;
730	613					433	my $id = $self->{'id'};
731
732							# We need to ensure U is mapped to T (ie. UAG)
733	613					437	$value = uc $value;
734	613					410	$value =~ tr/U/T/;
735
736	613	100				619	if (length $value != 3 ) {
737							# Incomplete codons are not stop codons
738	1					3	return 0;
739							} else {
740	612					386	my $result = 0;
741
742							# For all the possible codons, if any are not a stop
743							# codon, fail immediately
744	612					592	for my $c ( $self->unambiguous_codons($value) ) {
745	615					660	my $m = substr( $TABLES[$id], $CODONS->{$c}, 1 );
746	615	100				613	if($m eq $TERMINATOR) {
747	46					50	$result = 1;
748							} else {
749	569					2514	return 0;
750							}
751							}
752	43					97	return $result;
753							}
754							}
755
756							# desc: compares the passed value with a single entry in the given
757							# codon table
758							# args: a value (typically a three-char string like 'atg'),
759							# a reference to the appropriate set of codon tables,
760							# a single-character value to check for at the position in the
761							# given codon table
762							# ret: boolean, true if the given codon table contains the $key at the
763							# position corresponding to $value
764							sub _codon_is {
765	718			718		562	my ($self, $value, $table, $key ) = @_;
766
767	718	50				866	return 0 unless length $value == 3;
768
769	718					489	$value = lc $value;
770	718					471	$value =~ tr/u/t/;
771
772	718					522	my $id = $self->{'id'};
773	718					651	for my $c ( $self->unambiguous_codons($value) ) {
774	720					727	my $m = substr( $table->[$id], $CODONS->{$c}, 1 );
775	720	100				1070	if ($m eq $key) { return 1; }
	55					126
776							}
777	663					1640	return 0;
778							}
779
780							=head2 is_unknown_codon
781
782							Title : is_unknown_codon
783							Usage : $obj->is_unknown_codon('GAJ')
784							Function: returns false (0) for all codons that are valid,
785							true (1) for others.
786							Example : $myCodonTable->is_unknown_codon('NTG')
787							Returns : boolean
788							Args : codon
789
790
791							=cut
792
793							sub is_unknown_codon{
794	3			3	1	3	my ($self, $value) = @_;
795	3					5	$value = lc $value;
796	3					4	$value =~ tr/u/t/;
797	3	100				4	return 1 unless $self->unambiguous_codons($value);
798	1					3	return 0;
799							}
800
801							=head2 unambiguous_codons
802
803							Title : unambiguous_codons
804							Usage : @codons = $self->unambiguous_codons('ACN')
805							Returns : array of strings (one-letter unambiguous amino acid codes)
806							Args : a codon = a three IUPAC nucleotide character string
807
808							=cut
809
810							sub unambiguous_codons{
811	6646			6646	1	4675	my ($self,$value) = @_;
812	6646					10107	my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
	19937					22101
813
814	6646					5462	my @codons;
815	6646					4095	for my $i ( @{$nts[0]} ) {
	6646					9111
816	1658					969	for my $j ( @{$nts[1]} ) {
	1658					1407
817	1633					907	for my $k ( @{$nts[2]} ) {
	1633					1413
818	1804					2680	push @codons, lc "$i$j$k";
819							}}}
820	6646					8162	return @codons;
821							}
822
823							=head2 _unambiquous_codons
824
825							deprecated, now an alias for unambiguous_codons
826
827							=cut
828
829							sub _unambiquous_codons {
830	0			0		0	unambiguous_codons( undef, @_ );
831							}
832
833							=head2 add_table
834
835							Title : add_table
836							Usage : $newid = $ct->add_table($name, $table, $starts)
837							Function: Add a custom Codon Table into the object.
838							Know what you are doing, only the length of
839							the argument strings is checked!
840							Returns : the id of the new codon table
841							Args : name, a string, optional (can be empty)
842							table, a string of 64 characters
843							startcodons, a string of 64 characters, defaults to standard
844
845							=cut
846
847							sub add_table {
848	1			1	1	2	my ($self, $name, $table, $starts) = @_;
849
850	1		33			2	$name \|\|= 'Custom' . $#NAMES + 1;
851	1		33			4	$starts \|\|= $STARTS[1];
852	1	50	33			5	$self->throw('Suspect input!')
853							unless length($table) == 64 and length($starts) == 64;
854
855	1					2	push @NAMES, $name;
856	1					2	push @TABLES, $table;
857	1					1	push @STARTS, $starts;
858
859	1					3	return $#NAMES;
860							}
861
862							sub _make_iupac_string {
863	22			22		18	my ($self, $cod_ref) = @_;
864	22	50				47	if(ref($cod_ref) ne 'ARRAY') {
865	0					0	$self->throw(" I need a reference to a list of references to codons, ".
866							" not a [". ref($cod_ref) . "].");
867							}
868	22					52	my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub();
869	22					38	my $iupac_string = ''; ## the string to be returned
870	22					23	for my $aa (@$cod_ref) {
871
872							## scan through codon positions, record the differing values,
873							# then look up in the iub hash
874	96					88	for my $index(0..2) {
875	288					150	my %h;
876	288					206	map { my $k = substr($_,$index,1);
	846					544
877	846					682	$h{$k} = undef;} @$aa;
878	288					337	my $lookup_key = join '', sort{$a cmp $b}keys %h;
	225					224
879
880							## extend string
881	288					378	$iupac_string .= $iupac_hash{uc$lookup_key};
882							}
883							}
884	22					149	return $iupac_string;
885							}
886
887
888							1;