line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# bioperl module for Bio::Tools::CodonTable |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Heikki Lehvaslaiho |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Heikki Lehvaslaiho |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Tools::CodonTable - Codon table object |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# This is a read-only class for all known codon tables. The IDs are |
21
|
|
|
|
|
|
|
# the ones used by nucleotide sequence databases. All common IUPAC |
22
|
|
|
|
|
|
|
# ambiguity codes for DNA, RNA and amino acids are recognized. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
use Bio::Tools::CodonTable; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# defaults to ID 1 "Standard" |
27
|
|
|
|
|
|
|
$myCodonTable = Bio::Tools::CodonTable->new(); |
28
|
|
|
|
|
|
|
$myCodonTable2 = Bio::Tools::CodonTable->new( -id => 3 ); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# change codon table |
31
|
|
|
|
|
|
|
$myCodonTable->id(5); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# examine codon table |
34
|
|
|
|
|
|
|
print join (' ', "The name of the codon table no.", $myCodonTable->id(4), |
35
|
|
|
|
|
|
|
"is:", $myCodonTable->name(), "\n"); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# print possible codon tables |
38
|
|
|
|
|
|
|
$tables = Bio::Tools::CodonTable->tables; |
39
|
|
|
|
|
|
|
while ( ($id,$name) = each %{$tables} ) { |
40
|
|
|
|
|
|
|
print "$id = $name\n"; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# translate a codon |
44
|
|
|
|
|
|
|
$aa = $myCodonTable->translate('ACU'); |
45
|
|
|
|
|
|
|
$aa = $myCodonTable->translate('act'); |
46
|
|
|
|
|
|
|
$aa = $myCodonTable->translate('ytr'); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# reverse translate an amino acid |
49
|
|
|
|
|
|
|
@codons = $myCodonTable->revtranslate('A'); |
50
|
|
|
|
|
|
|
@codons = $myCodonTable->revtranslate('Ser'); |
51
|
|
|
|
|
|
|
@codons = $myCodonTable->revtranslate('Glx'); |
52
|
|
|
|
|
|
|
@codons = $myCodonTable->revtranslate('cYS', 'rna'); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# reverse translate an entire amino acid sequence into a IUPAC |
55
|
|
|
|
|
|
|
# nucleotide string |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $seqobj = Bio::PrimarySeq->new(-seq => 'FHGERHEL'); |
58
|
|
|
|
|
|
|
my $iupac_str = $myCodonTable->reverse_translate_all($seqobj); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# boolean tests |
61
|
|
|
|
|
|
|
print "Is a start\n" if $myCodonTable->is_start_codon('ATG'); |
62
|
|
|
|
|
|
|
print "Is a terminator\n" if $myCodonTable->is_ter_codon('tar'); |
63
|
|
|
|
|
|
|
print "Is a unknown\n" if $myCodonTable->is_unknown_codon('JTG'); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 DESCRIPTION |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Codon tables are also called translation tables or genetic codes |
68
|
|
|
|
|
|
|
since that is what they represent. A bit more complete picture |
69
|
|
|
|
|
|
|
of the full complexity of codon usage in various taxonomic groups |
70
|
|
|
|
|
|
|
is presented at the NCBI Genetic Codes Home page. |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
CodonTable is a BioPerl class that knows all current translation |
73
|
|
|
|
|
|
|
tables that are used by primary nucleotide sequence databases |
74
|
|
|
|
|
|
|
(GenBank, EMBL and DDBJ). It provides methods to output information |
75
|
|
|
|
|
|
|
about tables and relationships between codons and amino acids. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
This class and its methods recognized all common IUPAC ambiguity codes |
78
|
|
|
|
|
|
|
for DNA, RNA and animo acids. The translation method follows the |
79
|
|
|
|
|
|
|
conventions in EMBL and TREMBL databases. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
It is a nuisance to separate RNA and cDNA representations of nucleic |
82
|
|
|
|
|
|
|
acid transcripts. The CodonTable object accepts codons of both type as |
83
|
|
|
|
|
|
|
input and allows the user to set the mode for output when reverse |
84
|
|
|
|
|
|
|
translating. Its default for output is DNA. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Note: |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
This class deals primarily with individual codons and amino |
89
|
|
|
|
|
|
|
acids. However in the interest of speed you can L |
90
|
|
|
|
|
|
|
longer sequence, too. The full complexity of protein translation |
91
|
|
|
|
|
|
|
is tackled by L. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
The amino acid codes are IUPAC recommendations for common amino acids: |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
A Ala Alanine |
97
|
|
|
|
|
|
|
R Arg Arginine |
98
|
|
|
|
|
|
|
N Asn Asparagine |
99
|
|
|
|
|
|
|
D Asp Aspartic acid |
100
|
|
|
|
|
|
|
C Cys Cysteine |
101
|
|
|
|
|
|
|
Q Gln Glutamine |
102
|
|
|
|
|
|
|
E Glu Glutamic acid |
103
|
|
|
|
|
|
|
G Gly Glycine |
104
|
|
|
|
|
|
|
H His Histidine |
105
|
|
|
|
|
|
|
I Ile Isoleucine |
106
|
|
|
|
|
|
|
L Leu Leucine |
107
|
|
|
|
|
|
|
K Lys Lysine |
108
|
|
|
|
|
|
|
M Met Methionine |
109
|
|
|
|
|
|
|
F Phe Phenylalanine |
110
|
|
|
|
|
|
|
P Pro Proline |
111
|
|
|
|
|
|
|
O Pyl Pyrrolysine (22nd amino acid) |
112
|
|
|
|
|
|
|
U Sec Selenocysteine (21st amino acid) |
113
|
|
|
|
|
|
|
S Ser Serine |
114
|
|
|
|
|
|
|
T Thr Threonine |
115
|
|
|
|
|
|
|
W Trp Tryptophan |
116
|
|
|
|
|
|
|
Y Tyr Tyrosine |
117
|
|
|
|
|
|
|
V Val Valine |
118
|
|
|
|
|
|
|
B Asx Aspartic acid or Asparagine |
119
|
|
|
|
|
|
|
Z Glx Glutamine or Glutamic acid |
120
|
|
|
|
|
|
|
J Xle Isoleucine or Valine (mass spec ambiguity) |
121
|
|
|
|
|
|
|
X Xaa Any or unknown amino acid |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
It is worth noting that, "Bacterial" codon table no. 11 produces an |
125
|
|
|
|
|
|
|
polypeptide that is, confusingly, identical to the standard one. The |
126
|
|
|
|
|
|
|
only differences are in available initiator codons. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
NCBI Genetic Codes home page: |
130
|
|
|
|
|
|
|
(Last update of the Genetic Codes: April 30, 2013) |
131
|
|
|
|
|
|
|
https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
ASN.1 version with ids 1 to 25 is at: |
134
|
|
|
|
|
|
|
ftp://ftp.ncbi.nih.gov/entrez/misc/data/gc.prt |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Thanks to Matteo diTomasso for the original Perl implementation |
137
|
|
|
|
|
|
|
of these tables. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head1 FEEDBACK |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head2 Mailing Lists |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
144
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to the |
145
|
|
|
|
|
|
|
Bioperl mailing lists Your participation is much appreciated. |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
148
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=head2 Support |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
I |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
157
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
158
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
159
|
|
|
|
|
|
|
with code and data examples if at all possible. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head2 Reporting Bugs |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
164
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
165
|
|
|
|
|
|
|
web: |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head1 AUTHOR - Heikki Lehvaslaiho |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Email: heikki-at-bioperl-dot-org |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head1 APPENDIX |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
176
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# Let the code begin... |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
package Bio::Tools::CodonTable; |
183
|
203
|
|
|
|
|
17496
|
use vars qw(@NAMES @TABLES @STARTS $TRCOL $CODONS %IUPAC_DNA $CODONGAP $GAP |
184
|
203
|
|
|
203
|
|
1786
|
%IUPAC_AA %THREELETTERSYMBOLS $VALID_PROTEIN $TERMINATOR); |
|
203
|
|
|
|
|
246
|
|
185
|
203
|
|
|
203
|
|
693
|
use strict; |
|
203
|
|
|
|
|
238
|
|
|
203
|
|
|
|
|
3582
|
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
188
|
203
|
|
|
203
|
|
60256
|
use Bio::Tools::IUPAC; |
|
203
|
|
|
|
|
320
|
|
|
203
|
|
|
|
|
4615
|
|
189
|
203
|
|
|
203
|
|
89367
|
use Bio::SeqUtils; |
|
203
|
|
|
|
|
357
|
|
|
203
|
|
|
|
|
5769
|
|
190
|
|
|
|
|
|
|
|
191
|
203
|
|
|
203
|
|
1097
|
use base qw(Bio::Root::Root); |
|
203
|
|
|
|
|
213
|
|
|
203
|
|
|
|
|
14206
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# first set internal values for all translation tables |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
BEGIN { |
197
|
203
|
|
|
203
|
|
858
|
use constant CODONSIZE => 3; |
|
203
|
|
|
|
|
244
|
|
|
203
|
|
|
|
|
54678
|
|
198
|
203
|
|
|
203
|
|
415
|
$GAP = '-'; |
199
|
203
|
|
|
|
|
525
|
$CODONGAP = $GAP x CODONSIZE; |
200
|
|
|
|
|
|
|
|
201
|
203
|
|
|
|
|
887
|
@NAMES = #id |
202
|
|
|
|
|
|
|
( |
203
|
|
|
|
|
|
|
'Strict', #0, special option for ATG-only start |
204
|
|
|
|
|
|
|
'Standard', #1 |
205
|
|
|
|
|
|
|
'Vertebrate Mitochondrial',#2 |
206
|
|
|
|
|
|
|
'Yeast Mitochondrial',# 3 |
207
|
|
|
|
|
|
|
'Mold, Protozoan, and Coelenterate Mitochondrial and Mycoplasma/Spiroplasma',#4 |
208
|
|
|
|
|
|
|
'Invertebrate Mitochondrial',#5 |
209
|
|
|
|
|
|
|
'Ciliate, Dasycladacean and Hexamita Nuclear',# 6 |
210
|
|
|
|
|
|
|
'', '', |
211
|
|
|
|
|
|
|
'Echinoderm and Flatworm Mitochondrial',#9 |
212
|
|
|
|
|
|
|
'Euplotid Nuclear',#10 |
213
|
|
|
|
|
|
|
'Bacterial, Archaeal and Plant Plastid',# 11 |
214
|
|
|
|
|
|
|
'Alternative Yeast Nuclear',# 12 |
215
|
|
|
|
|
|
|
'Ascidian Mitochondrial',# 13 |
216
|
|
|
|
|
|
|
'Alternative Flatworm Mitochondrial',# 14 |
217
|
|
|
|
|
|
|
'Blepharisma Nuclear',# 15 |
218
|
|
|
|
|
|
|
'Chlorophycean Mitochondrial',# 16 |
219
|
|
|
|
|
|
|
'', '', '', '', |
220
|
|
|
|
|
|
|
'Trematode Mitochondrial',# 21 |
221
|
|
|
|
|
|
|
'Scenedesmus obliquus Mitochondrial', #22 |
222
|
|
|
|
|
|
|
'Thraustochytrium Mitochondrial', #23 |
223
|
|
|
|
|
|
|
'Pterobranchia Mitochondrial', #24 |
224
|
|
|
|
|
|
|
'Candidate Division SR1 and Gracilibacteria', #25 |
225
|
|
|
|
|
|
|
); |
226
|
|
|
|
|
|
|
|
227
|
203
|
|
|
|
|
764
|
@TABLES = |
228
|
|
|
|
|
|
|
qw( |
229
|
|
|
|
|
|
|
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
230
|
|
|
|
|
|
|
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
231
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG |
232
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
233
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
234
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG |
235
|
|
|
|
|
|
|
FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
236
|
|
|
|
|
|
|
'' '' |
237
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG |
238
|
|
|
|
|
|
|
FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
239
|
|
|
|
|
|
|
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
240
|
|
|
|
|
|
|
FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
241
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG |
242
|
|
|
|
|
|
|
FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG |
243
|
|
|
|
|
|
|
FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
244
|
|
|
|
|
|
|
FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
245
|
|
|
|
|
|
|
'' '' '' '' |
246
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG |
247
|
|
|
|
|
|
|
FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
248
|
|
|
|
|
|
|
FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
249
|
|
|
|
|
|
|
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG |
250
|
|
|
|
|
|
|
FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG |
251
|
|
|
|
|
|
|
); |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# (bases used for these tables, for reference) |
254
|
|
|
|
|
|
|
# 1 TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG |
255
|
|
|
|
|
|
|
# 2 TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG |
256
|
|
|
|
|
|
|
# 3 TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG |
257
|
|
|
|
|
|
|
|
258
|
203
|
|
|
|
|
785
|
@STARTS = |
259
|
|
|
|
|
|
|
qw( |
260
|
|
|
|
|
|
|
-----------------------------------M---------------------------- |
261
|
|
|
|
|
|
|
---M---------------M---------------M---------------------------- |
262
|
|
|
|
|
|
|
--------------------------------MMMM---------------M------------ |
263
|
|
|
|
|
|
|
----------------------------------MM---------------------------- |
264
|
|
|
|
|
|
|
--MM---------------M------------MMMM---------------M------------ |
265
|
|
|
|
|
|
|
---M----------------------------MMMM---------------M------------ |
266
|
|
|
|
|
|
|
-----------------------------------M---------------------------- |
267
|
|
|
|
|
|
|
'' '' |
268
|
|
|
|
|
|
|
-----------------------------------M---------------M------------ |
269
|
|
|
|
|
|
|
-----------------------------------M---------------------------- |
270
|
|
|
|
|
|
|
---M---------------M------------MMMM---------------M------------ |
271
|
|
|
|
|
|
|
-------------------M---------------M---------------------------- |
272
|
|
|
|
|
|
|
---M------------------------------MM---------------M------------ |
273
|
|
|
|
|
|
|
-----------------------------------M---------------------------- |
274
|
|
|
|
|
|
|
-----------------------------------M---------------------------- |
275
|
|
|
|
|
|
|
-----------------------------------M---------------------------- |
276
|
|
|
|
|
|
|
'' '' '' '' |
277
|
|
|
|
|
|
|
-----------------------------------M---------------M------------ |
278
|
|
|
|
|
|
|
-----------------------------------M---------------------------- |
279
|
|
|
|
|
|
|
--------------------------------M--M---------------M------------ |
280
|
|
|
|
|
|
|
---M---------------M---------------M---------------M------------ |
281
|
|
|
|
|
|
|
---M-------------------------------M---------------M------------ |
282
|
|
|
|
|
|
|
); |
283
|
|
|
|
|
|
|
|
284
|
203
|
|
|
|
|
382
|
my @nucs = qw(t c a g); |
285
|
203
|
|
|
|
|
274
|
my $x = 0; |
286
|
203
|
|
|
|
|
441
|
($CODONS, $TRCOL) = ({}, {}); |
287
|
203
|
|
|
|
|
436
|
for my $i (@nucs) { |
288
|
812
|
|
|
|
|
697
|
for my $j (@nucs) { |
289
|
3248
|
|
|
|
|
2354
|
for my $k (@nucs) { |
290
|
12992
|
|
|
|
|
9340
|
my $codon = "$i$j$k"; |
291
|
12992
|
|
|
|
|
15171
|
$CODONS->{$codon} = $x; |
292
|
12992
|
|
|
|
|
15295
|
$TRCOL->{$x} = $codon; |
293
|
12992
|
|
|
|
|
9666
|
$x++; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
} |
297
|
203
|
|
|
|
|
1213
|
%IUPAC_DNA = Bio::Tools::IUPAC->iupac_iub(); |
298
|
203
|
|
|
|
|
793
|
%IUPAC_AA = Bio::Tools::IUPAC->iupac_iup(); |
299
|
203
|
|
|
|
|
870
|
%THREELETTERSYMBOLS = Bio::SeqUtils->valid_aa(2); |
300
|
203
|
|
|
|
|
1100
|
$VALID_PROTEIN = '['.join('',Bio::SeqUtils->valid_aa(0)).']'; |
301
|
203
|
|
|
|
|
349993
|
$TERMINATOR = '*'; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub new { |
305
|
505
|
|
|
505
|
1
|
1569
|
my($class,@args) = @_; |
306
|
505
|
|
|
|
|
1147
|
my $self = $class->SUPER::new(@args); |
307
|
|
|
|
|
|
|
|
308
|
505
|
|
|
|
|
1309
|
my($id) = |
309
|
|
|
|
|
|
|
$self->_rearrange([qw(ID |
310
|
|
|
|
|
|
|
)], |
311
|
|
|
|
|
|
|
@args); |
312
|
|
|
|
|
|
|
|
313
|
505
|
100
|
|
|
|
1057
|
$id = 1 if ( ! $id ); |
314
|
505
|
50
|
|
|
|
1161
|
$id && $self->id($id); |
315
|
505
|
|
|
|
|
814
|
return $self; # success - we hope! |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head2 id |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
Title : id |
321
|
|
|
|
|
|
|
Usage : $obj->id(3); $id_integer = $obj->id(); |
322
|
|
|
|
|
|
|
Function: Sets or returns the id of the translation table. IDs are |
323
|
|
|
|
|
|
|
integers from 0 (special ATG-only start) to 25, excluding |
324
|
|
|
|
|
|
|
7-8 and 17-20 which have been removed. If an invalid ID is |
325
|
|
|
|
|
|
|
given the method returns 1, the standard table. |
326
|
|
|
|
|
|
|
Example : |
327
|
|
|
|
|
|
|
Returns : value of id, a scalar, warn and fall back to 1 (standard table) |
328
|
|
|
|
|
|
|
if specified id is not valid |
329
|
|
|
|
|
|
|
Args : newvalue (optional) |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=cut |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub id{ |
334
|
25860
|
|
|
25860
|
1
|
19358
|
my ($self,$value) = @_; |
335
|
25860
|
100
|
|
|
|
29756
|
if( defined $value) { |
336
|
644
|
100
|
66
|
|
|
2230
|
if ( not defined $TABLES[$value] or $TABLES[$value] eq '') { |
337
|
1
|
|
|
|
|
9
|
$self->warn("Not a valid codon table ID [$value], using [1] instead "); |
338
|
1
|
|
|
|
|
2
|
$value = 1; |
339
|
|
|
|
|
|
|
} |
340
|
644
|
|
|
|
|
764
|
$self->{'id'} = $value; |
341
|
|
|
|
|
|
|
} |
342
|
25860
|
|
|
|
|
22613
|
return $self->{'id'}; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=head2 name |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Title : name |
348
|
|
|
|
|
|
|
Usage : $obj->name() |
349
|
|
|
|
|
|
|
Function: returns the descriptive name of the translation table |
350
|
|
|
|
|
|
|
Example : |
351
|
|
|
|
|
|
|
Returns : A string |
352
|
|
|
|
|
|
|
Args : None |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=cut |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub name{ |
358
|
1
|
|
|
1
|
1
|
2
|
my ($self) = @_; |
359
|
|
|
|
|
|
|
|
360
|
1
|
|
|
|
|
1
|
my ($id) = $self->{'id'}; |
361
|
1
|
|
|
|
|
4
|
return $NAMES[$id]; |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=head2 tables |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
Title : tables |
367
|
|
|
|
|
|
|
Usage : $obj->tables() or Bio::Tools::CodonTable->tables() |
368
|
|
|
|
|
|
|
Function: returns a hash reference where each key is a valid codon |
369
|
|
|
|
|
|
|
table id() number, and each value is the corresponding |
370
|
|
|
|
|
|
|
codon table name() string |
371
|
|
|
|
|
|
|
Example : |
372
|
|
|
|
|
|
|
Returns : A hashref |
373
|
|
|
|
|
|
|
Args : None |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=cut |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
sub tables{ |
379
|
2
|
|
|
2
|
1
|
791
|
my %tables; |
380
|
2
|
|
|
|
|
5
|
for my $id (0 .. $#NAMES) { |
381
|
52
|
|
|
|
|
28
|
my $name = $NAMES[$id]; |
382
|
52
|
100
|
|
|
|
79
|
$tables{$id} = $name if $name; |
383
|
|
|
|
|
|
|
} |
384
|
2
|
|
|
|
|
4
|
return \%tables; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=head2 translate |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
Title : translate |
390
|
|
|
|
|
|
|
Usage : $obj->translate('YTR') |
391
|
|
|
|
|
|
|
Function: Returns a string of one letter amino acid codes from |
392
|
|
|
|
|
|
|
nucleotide sequence input. The imput can be of any length. |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Returns 'X' for unknown codons and codons that code for |
395
|
|
|
|
|
|
|
more than one amino acid. Returns an empty string if input |
396
|
|
|
|
|
|
|
is not three characters long. Exceptions for these are: |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
- IUPAC amino acid code B for Aspartic Acid and |
399
|
|
|
|
|
|
|
Asparagine, is used. |
400
|
|
|
|
|
|
|
- IUPAC amino acid code Z for Glutamic Acid, Glutamine is |
401
|
|
|
|
|
|
|
used. |
402
|
|
|
|
|
|
|
- if the codon is two nucleotides long and if by adding |
403
|
|
|
|
|
|
|
an a third character 'N', it codes for a single amino |
404
|
|
|
|
|
|
|
acid (with exceptions above), return that, otherwise |
405
|
|
|
|
|
|
|
return empty string. |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Returns empty string for other input strings that are not |
408
|
|
|
|
|
|
|
three characters long. |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Example : |
411
|
|
|
|
|
|
|
Returns : a string of one letter ambiguous IUPAC amino acid codes |
412
|
|
|
|
|
|
|
Args : ambiguous IUPAC nucleotide string |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=cut |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
sub translate { |
418
|
19902
|
|
|
19902
|
1
|
19535
|
my ($self, $seq, $complete_codon) = @_; |
419
|
19902
|
100
|
|
|
|
22907
|
$self->throw("Calling translate without a seq argument!") unless defined $seq; |
420
|
19901
|
100
|
|
|
|
20940
|
return '' unless $seq; |
421
|
|
|
|
|
|
|
|
422
|
19900
|
|
|
|
|
21213
|
my $id = $self->id; |
423
|
19900
|
|
|
|
|
13972
|
my ($partial) = 0; |
424
|
19900
|
100
|
|
|
|
24766
|
$partial = 2 if length($seq) % CODONSIZE == 2; |
425
|
|
|
|
|
|
|
|
426
|
19900
|
|
|
|
|
15896
|
$seq = lc $seq; |
427
|
19900
|
|
|
|
|
14088
|
$seq =~ tr/u/t/; |
428
|
19900
|
|
|
|
|
12778
|
my $protein = ""; |
429
|
19900
|
100
|
|
|
|
34675
|
if ($seq =~ /[^actg]/ ) { #ambiguous chars |
430
|
5315
|
|
|
|
|
8431
|
for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) { |
431
|
17779
|
|
|
|
|
13041
|
my $triplet = substr($seq, $i, CODONSIZE); |
432
|
17779
|
100
|
|
|
|
22028
|
if( $triplet eq $CODONGAP ) { |
|
|
100
|
|
|
|
|
|
433
|
1527
|
|
|
|
|
1896
|
$protein .= $GAP; |
434
|
|
|
|
|
|
|
} elsif (exists $CODONS->{$triplet}) { |
435
|
|
|
|
|
|
|
$protein .= substr($TABLES[$id], |
436
|
10946
|
|
|
|
|
15656
|
$CODONS->{$triplet},1); |
437
|
|
|
|
|
|
|
} else { |
438
|
5306
|
|
|
|
|
6328
|
$protein .= $self->_translate_ambiguous_codon($triplet); |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
} else { # simple, strict translation |
442
|
14585
|
|
|
|
|
22708
|
for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) { |
443
|
124418
|
|
|
|
|
83691
|
my $triplet = substr($seq, $i, CODONSIZE); |
444
|
124418
|
50
|
|
|
|
128067
|
if( $triplet eq $CODONGAP ) { |
445
|
0
|
|
|
|
|
0
|
$protein .= $GAP; |
446
|
|
|
|
|
|
|
} |
447
|
124418
|
50
|
|
|
|
111405
|
if (exists $CODONS->{$triplet}) { |
448
|
124418
|
|
|
|
|
185054
|
$protein .= substr($TABLES[$id], $CODONS->{$triplet}, 1); |
449
|
|
|
|
|
|
|
} else { |
450
|
0
|
|
|
|
|
0
|
$protein .= 'X'; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} |
454
|
19900
|
100
|
100
|
|
|
28032
|
if ($partial == 2 && $complete_codon) { # 2 overhanging nucleotides |
455
|
7
|
|
|
|
|
13
|
my $triplet = substr($seq, ($partial -4)). "n"; |
456
|
7
|
50
|
|
|
|
20
|
if( $triplet eq $CODONGAP ) { |
|
|
50
|
|
|
|
|
|
457
|
0
|
|
|
|
|
0
|
$protein .= $GAP; |
458
|
|
|
|
|
|
|
} elsif (exists $CODONS->{$triplet}) { |
459
|
0
|
|
|
|
|
0
|
my $aa = substr($TABLES[$id], $CODONS->{$triplet},1); |
460
|
0
|
|
|
|
|
0
|
$protein .= $aa; |
461
|
|
|
|
|
|
|
} else { |
462
|
7
|
|
|
|
|
10
|
$protein .= $self->_translate_ambiguous_codon($triplet, $partial); |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
} |
465
|
19900
|
|
|
|
|
26973
|
return $protein; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
sub _translate_ambiguous_codon { |
469
|
5313
|
|
|
5313
|
|
4487
|
my ($self, $triplet, $partial) = @_; |
470
|
5313
|
|
100
|
|
|
11948
|
$partial ||= 0; |
471
|
5313
|
|
|
|
|
5067
|
my $id = $self->id; |
472
|
5313
|
|
|
|
|
3826
|
my $aa; |
473
|
5313
|
|
|
|
|
5713
|
my @codons = $self->unambiguous_codons($triplet); |
474
|
5313
|
|
|
|
|
4721
|
my %aas =(); |
475
|
5313
|
|
|
|
|
4446
|
foreach my $codon (@codons) { |
476
|
401
|
|
|
|
|
478
|
$aas{substr($TABLES[$id],$CODONS->{$codon},1)} = 1; |
477
|
|
|
|
|
|
|
} |
478
|
5313
|
|
|
|
|
4378
|
my $count = scalar keys %aas; |
479
|
5313
|
100
|
|
|
|
8187
|
if ( $count == 1 ) { |
|
|
100
|
|
|
|
|
|
480
|
141
|
|
|
|
|
148
|
$aa = (keys %aas)[0]; |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
elsif ( $count == 2 ) { |
483
|
13
|
100
|
66
|
|
|
36
|
if ($aas{'D'} and $aas{'N'}) { |
|
|
100
|
66
|
|
|
|
|
484
|
3
|
|
|
|
|
4
|
$aa = 'B'; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
elsif ($aas{'E'} and $aas{'Q'}) { |
487
|
4
|
|
|
|
|
3
|
$aa = 'Z'; |
488
|
|
|
|
|
|
|
} else { |
489
|
6
|
100
|
|
|
|
10
|
$partial ? ($aa = '') : ($aa = 'X'); |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
} else { |
492
|
5159
|
100
|
|
|
|
5576
|
$partial ? ($aa = '') : ($aa = 'X'); |
493
|
|
|
|
|
|
|
} |
494
|
5313
|
|
|
|
|
12982
|
return $aa; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=head2 translate_strict |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Title : translate_strict |
500
|
|
|
|
|
|
|
Usage : $obj->translate_strict('ACT') |
501
|
|
|
|
|
|
|
Function: returns one letter amino acid code for a codon input |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Fast and simple translation. User is responsible to resolve |
504
|
|
|
|
|
|
|
ambiguous nucleotide codes before calling this |
505
|
|
|
|
|
|
|
method. Returns 'X' for unknown codons and an empty string |
506
|
|
|
|
|
|
|
for input strings that are not three characters long. |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
It is not recommended to use this method in a production |
509
|
|
|
|
|
|
|
environment. Use method translate, instead. |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
Example : |
512
|
|
|
|
|
|
|
Returns : A string |
513
|
|
|
|
|
|
|
Args : a codon = a three nucleotide character string |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=cut |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
sub translate_strict{ |
519
|
3
|
|
|
3
|
1
|
5
|
my ($self, $value) = @_; |
520
|
3
|
|
|
|
|
4
|
my $id = $self->{'id'}; |
521
|
|
|
|
|
|
|
|
522
|
3
|
|
|
|
|
4
|
$value = lc $value; |
523
|
3
|
|
|
|
|
3
|
$value =~ tr/u/t/; |
524
|
|
|
|
|
|
|
|
525
|
3
|
50
|
|
|
|
7
|
return '' unless length $value == 3; |
526
|
|
|
|
|
|
|
|
527
|
3
|
50
|
|
|
|
7
|
return 'X' unless defined $CODONS->{$value}; |
528
|
|
|
|
|
|
|
|
529
|
3
|
|
|
|
|
10
|
return substr( $TABLES[$id], $CODONS->{$value}, 1 ); |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=head2 revtranslate |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
Title : revtranslate |
535
|
|
|
|
|
|
|
Usage : $obj->revtranslate('G') |
536
|
|
|
|
|
|
|
Function: returns codons for an amino acid |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
Returns an empty string for unknown amino acid |
539
|
|
|
|
|
|
|
codes. Ambiguous IUPAC codes Asx,B, (Asp,D; Asn,N) and |
540
|
|
|
|
|
|
|
Glx,Z (Glu,E; Gln,Q) are resolved. Both single and three |
541
|
|
|
|
|
|
|
letter amino acid codes are accepted. '*' and 'Ter' are |
542
|
|
|
|
|
|
|
used for terminator. |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
By default, the output codons are shown in DNA. If the |
545
|
|
|
|
|
|
|
output is needed in RNA (tr/t/u/), add a second argument |
546
|
|
|
|
|
|
|
'RNA'. |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
Example : $obj->revtranslate('Gly', 'RNA') |
549
|
|
|
|
|
|
|
Returns : An array of three lower case letter strings i.e. codons |
550
|
|
|
|
|
|
|
Args : amino acid, 'RNA' |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=cut |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
sub revtranslate { |
555
|
135
|
|
|
135
|
1
|
842
|
my ($self, $value, $coding) = @_; |
556
|
135
|
|
|
|
|
81
|
my @codons; |
557
|
|
|
|
|
|
|
|
558
|
135
|
100
|
|
|
|
186
|
if (length($value) == 3 ) { |
559
|
5
|
|
|
|
|
4
|
$value = lc $value; |
560
|
5
|
|
|
|
|
7
|
$value = ucfirst $value; |
561
|
5
|
|
|
|
|
8
|
$value = $THREELETTERSYMBOLS{$value}; |
562
|
|
|
|
|
|
|
} |
563
|
135
|
100
|
100
|
|
|
766
|
if ( defined $value and $value =~ /$VALID_PROTEIN/ |
|
|
|
66
|
|
|
|
|
564
|
|
|
|
|
|
|
and length($value) == 1 |
565
|
|
|
|
|
|
|
) { |
566
|
133
|
|
|
|
|
131
|
my $id = $self->{'id'}; |
567
|
|
|
|
|
|
|
|
568
|
133
|
|
|
|
|
116
|
$value = uc $value; |
569
|
133
|
|
|
|
|
78
|
my @aas = @{$IUPAC_AA{$value}}; |
|
133
|
|
|
|
|
198
|
|
570
|
133
|
|
|
|
|
130
|
foreach my $aa (@aas) { |
571
|
|
|
|
|
|
|
#print $aa, " -2\n"; |
572
|
145
|
100
|
|
|
|
177
|
$aa = '\*' if $aa eq '*'; |
573
|
145
|
|
|
|
|
912
|
while ($TABLES[$id] =~ m/$aa/g) { |
574
|
399
|
|
|
|
|
280
|
my $p = pos $TABLES[$id]; |
575
|
399
|
|
|
|
|
935
|
push (@codons, $TRCOL->{--$p}); |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
135
|
100
|
66
|
|
|
204
|
if ($coding and uc ($coding) eq 'RNA') { |
581
|
1
|
|
|
|
|
3
|
for my $i (0..$#codons) { |
582
|
1
|
|
|
|
|
2
|
$codons[$i] =~ tr/t/u/; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
135
|
|
|
|
|
298
|
return @codons; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
=head2 reverse_translate_all |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
Title : reverse_translate_all |
592
|
|
|
|
|
|
|
Usage : my $iup_str = $cttable->reverse_translate_all($seq_object) |
593
|
|
|
|
|
|
|
my $iup_str = $cttable->reverse_translate_all($seq_object, |
594
|
|
|
|
|
|
|
$cutable, |
595
|
|
|
|
|
|
|
15); |
596
|
|
|
|
|
|
|
Function: reverse translates a protein sequence into IUPAC nucleotide |
597
|
|
|
|
|
|
|
sequence. An 'X' in the protein sequence is converted to 'NNN' |
598
|
|
|
|
|
|
|
in the nucleotide sequence. |
599
|
|
|
|
|
|
|
Returns : a string |
600
|
|
|
|
|
|
|
Args : a Bio::PrimarySeqI compatible object (mandatory) |
601
|
|
|
|
|
|
|
a Bio::CodonUsage::Table object and a threshold if only |
602
|
|
|
|
|
|
|
codons with a relative frequency above the threshold are |
603
|
|
|
|
|
|
|
to be considered. |
604
|
|
|
|
|
|
|
=cut |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
sub reverse_translate_all { |
607
|
22
|
|
|
22
|
1
|
26
|
my ($self, $obj, $cut, $threshold) = @_; |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
## check args are OK |
610
|
|
|
|
|
|
|
|
611
|
22
|
50
|
33
|
|
|
89
|
if (!$obj || !$obj->isa('Bio::PrimarySeqI')){ |
612
|
0
|
|
|
|
|
0
|
$self->throw(" I need a Bio::PrimarySeqI object, not a [". |
613
|
|
|
|
|
|
|
ref($obj) . "]"); |
614
|
|
|
|
|
|
|
} |
615
|
22
|
50
|
|
|
|
36
|
if($obj->alphabet ne 'protein') { |
616
|
0
|
|
|
|
|
0
|
$self->throw("Cannot reverse translate, need an amino acid sequence .". |
617
|
|
|
|
|
|
|
"This sequence is of type [" . $obj->alphabet ."]"); |
618
|
|
|
|
|
|
|
} |
619
|
22
|
|
|
|
|
19
|
my @data; |
620
|
22
|
|
|
|
|
35
|
my @seq = split '', $obj->seq; |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
## if we're not supplying a codon usage table... |
623
|
22
|
100
|
66
|
|
|
77
|
if( !$cut && !$threshold) { |
624
|
|
|
|
|
|
|
## get lists of possible codons for each aa. |
625
|
21
|
|
|
|
|
26
|
for my $aa (@seq) { |
626
|
75
|
100
|
|
|
|
108
|
if ($aa =~ /x/i) { |
627
|
7
|
|
|
|
|
11
|
push @data, (['NNN']); |
628
|
|
|
|
|
|
|
}else { |
629
|
68
|
|
|
|
|
91
|
my @cods = $self->revtranslate($aa); |
630
|
68
|
|
|
|
|
106
|
push @data, \@cods; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
}else{ |
634
|
|
|
|
|
|
|
#else we are supplying a codon usage table, we just want common codons |
635
|
|
|
|
|
|
|
#check args first. |
636
|
1
|
50
|
|
|
|
4
|
if(!$cut->isa('Bio::CodonUsage::Table')) { |
637
|
0
|
|
|
|
|
0
|
$self->throw("I need a Bio::CodonUsage::Table object, not a [". |
638
|
|
|
|
|
|
|
ref($cut). "]."); |
639
|
|
|
|
|
|
|
} |
640
|
1
|
|
|
|
|
3
|
my $cod_ref = $cut->probable_codons($threshold); |
641
|
1
|
|
|
|
|
2
|
for my $aa (@seq) { |
642
|
21
|
100
|
|
|
|
30
|
if ($aa =~ /x/i) { |
643
|
1
|
|
|
|
|
2
|
push @data, (['NNN']); |
644
|
1
|
|
|
|
|
3
|
next; |
645
|
|
|
|
|
|
|
} |
646
|
20
|
|
|
|
|
16
|
push @data, $cod_ref->{$aa}; |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
} |
649
|
|
|
|
|
|
|
|
650
|
22
|
|
|
|
|
38
|
return $self->_make_iupac_string(\@data); |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=head2 reverse_translate_best |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
Title : reverse_translate_best |
656
|
|
|
|
|
|
|
Usage : my $str = $cttable->reverse_translate_best($seq_object,$cutable); |
657
|
|
|
|
|
|
|
Function: Reverse translates a protein sequence into plain nucleotide |
658
|
|
|
|
|
|
|
sequence (GATC), uses the most common codon for each amino acid |
659
|
|
|
|
|
|
|
Returns : A string |
660
|
|
|
|
|
|
|
Args : A Bio::PrimarySeqI compatible object and a Bio::CodonUsage::Table object |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=cut |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
sub reverse_translate_best { |
665
|
|
|
|
|
|
|
|
666
|
1
|
|
|
1
|
1
|
2
|
my ($self, $obj, $cut) = @_; |
667
|
|
|
|
|
|
|
|
668
|
1
|
50
|
33
|
|
|
6
|
if (!$obj || !$obj->isa('Bio::PrimarySeqI')){ |
669
|
0
|
|
|
|
|
0
|
$self->throw(" I need a Bio::PrimarySeqI object, not a [". |
670
|
|
|
|
|
|
|
ref($obj) . "]"); |
671
|
|
|
|
|
|
|
} |
672
|
1
|
50
|
|
|
|
3
|
if ($obj->alphabet ne 'protein') { |
673
|
0
|
|
|
|
|
0
|
$self->throw("Cannot reverse translate, need an amino acid sequence .". |
674
|
|
|
|
|
|
|
"This sequence is of type [" . $obj->alphabet ."]"); |
675
|
|
|
|
|
|
|
} |
676
|
1
|
50
|
|
|
|
7
|
if ( !$cut | !$cut->isa('Bio::CodonUsage::Table')) { |
677
|
0
|
|
|
|
|
0
|
$self->throw("I need a Bio::CodonUsage::Table object, not a [". |
678
|
|
|
|
|
|
|
ref($cut). "]."); |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
|
681
|
1
|
|
|
|
|
1
|
my $str = ''; |
682
|
1
|
|
|
|
|
2
|
my @seq = split '', $obj->seq; |
683
|
|
|
|
|
|
|
|
684
|
1
|
|
|
|
|
3
|
my $cod_ref = $cut->most_common_codons(); |
685
|
|
|
|
|
|
|
|
686
|
1
|
|
|
|
|
2
|
for my $aa ( @seq ) { |
687
|
21
|
100
|
|
|
|
27
|
if ($aa =~ /x/i) { |
688
|
1
|
|
|
|
|
2
|
$str .= 'NNN'; |
689
|
1
|
|
|
|
|
1
|
next; |
690
|
|
|
|
|
|
|
} |
691
|
20
|
50
|
|
|
|
19
|
if ( defined $cod_ref->{$aa} ) { |
692
|
20
|
|
|
|
|
17
|
$str .= $cod_ref->{$aa}; |
693
|
|
|
|
|
|
|
} else { |
694
|
0
|
|
|
|
|
0
|
$self->throw("Input sequence contains invalid character: $aa"); |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
} |
697
|
1
|
|
|
|
|
6
|
return $str; |
698
|
|
|
|
|
|
|
} |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=head2 is_start_codon |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
Title : is_start_codon |
703
|
|
|
|
|
|
|
Usage : $obj->is_start_codon('ATG') |
704
|
|
|
|
|
|
|
Function: returns true (1) for all codons that can be used as a |
705
|
|
|
|
|
|
|
translation start, false (0) for others. |
706
|
|
|
|
|
|
|
Example : $myCodonTable->is_start_codon('ATG') |
707
|
|
|
|
|
|
|
Returns : boolean |
708
|
|
|
|
|
|
|
Args : codon |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=cut |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
sub is_start_codon{ |
713
|
718
|
|
|
718
|
1
|
762
|
shift->_codon_is( shift, \@STARTS, 'M' ); |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=head2 is_ter_codon |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
Title : is_ter_codon |
719
|
|
|
|
|
|
|
Usage : $obj->is_ter_codon('GAA') |
720
|
|
|
|
|
|
|
Function: returns true (1) for all codons that can be used as a |
721
|
|
|
|
|
|
|
translation tarminator, false (0) for others. |
722
|
|
|
|
|
|
|
Example : $myCodonTable->is_ter_codon('ATG') |
723
|
|
|
|
|
|
|
Returns : boolean |
724
|
|
|
|
|
|
|
Args : codon |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=cut |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
sub is_ter_codon{ |
729
|
613
|
|
|
613
|
1
|
471
|
my ($self, $value) = @_; |
730
|
613
|
|
|
|
|
433
|
my $id = $self->{'id'}; |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# We need to ensure U is mapped to T (ie. UAG) |
733
|
613
|
|
|
|
|
437
|
$value = uc $value; |
734
|
613
|
|
|
|
|
410
|
$value =~ tr/U/T/; |
735
|
|
|
|
|
|
|
|
736
|
613
|
100
|
|
|
|
619
|
if (length $value != 3 ) { |
737
|
|
|
|
|
|
|
# Incomplete codons are not stop codons |
738
|
1
|
|
|
|
|
3
|
return 0; |
739
|
|
|
|
|
|
|
} else { |
740
|
612
|
|
|
|
|
386
|
my $result = 0; |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# For all the possible codons, if any are not a stop |
743
|
|
|
|
|
|
|
# codon, fail immediately |
744
|
612
|
|
|
|
|
592
|
for my $c ( $self->unambiguous_codons($value) ) { |
745
|
615
|
|
|
|
|
660
|
my $m = substr( $TABLES[$id], $CODONS->{$c}, 1 ); |
746
|
615
|
100
|
|
|
|
613
|
if($m eq $TERMINATOR) { |
747
|
46
|
|
|
|
|
50
|
$result = 1; |
748
|
|
|
|
|
|
|
} else { |
749
|
569
|
|
|
|
|
2514
|
return 0; |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
} |
752
|
43
|
|
|
|
|
97
|
return $result; |
753
|
|
|
|
|
|
|
} |
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# desc: compares the passed value with a single entry in the given |
757
|
|
|
|
|
|
|
# codon table |
758
|
|
|
|
|
|
|
# args: a value (typically a three-char string like 'atg'), |
759
|
|
|
|
|
|
|
# a reference to the appropriate set of codon tables, |
760
|
|
|
|
|
|
|
# a single-character value to check for at the position in the |
761
|
|
|
|
|
|
|
# given codon table |
762
|
|
|
|
|
|
|
# ret: boolean, true if the given codon table contains the $key at the |
763
|
|
|
|
|
|
|
# position corresponding to $value |
764
|
|
|
|
|
|
|
sub _codon_is { |
765
|
718
|
|
|
718
|
|
562
|
my ($self, $value, $table, $key ) = @_; |
766
|
|
|
|
|
|
|
|
767
|
718
|
50
|
|
|
|
866
|
return 0 unless length $value == 3; |
768
|
|
|
|
|
|
|
|
769
|
718
|
|
|
|
|
489
|
$value = lc $value; |
770
|
718
|
|
|
|
|
471
|
$value =~ tr/u/t/; |
771
|
|
|
|
|
|
|
|
772
|
718
|
|
|
|
|
522
|
my $id = $self->{'id'}; |
773
|
718
|
|
|
|
|
651
|
for my $c ( $self->unambiguous_codons($value) ) { |
774
|
720
|
|
|
|
|
727
|
my $m = substr( $table->[$id], $CODONS->{$c}, 1 ); |
775
|
720
|
100
|
|
|
|
1070
|
if ($m eq $key) { return 1; } |
|
55
|
|
|
|
|
126
|
|
776
|
|
|
|
|
|
|
} |
777
|
663
|
|
|
|
|
1640
|
return 0; |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
=head2 is_unknown_codon |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
Title : is_unknown_codon |
783
|
|
|
|
|
|
|
Usage : $obj->is_unknown_codon('GAJ') |
784
|
|
|
|
|
|
|
Function: returns false (0) for all codons that are valid, |
785
|
|
|
|
|
|
|
true (1) for others. |
786
|
|
|
|
|
|
|
Example : $myCodonTable->is_unknown_codon('NTG') |
787
|
|
|
|
|
|
|
Returns : boolean |
788
|
|
|
|
|
|
|
Args : codon |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=cut |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
sub is_unknown_codon{ |
794
|
3
|
|
|
3
|
1
|
3
|
my ($self, $value) = @_; |
795
|
3
|
|
|
|
|
5
|
$value = lc $value; |
796
|
3
|
|
|
|
|
4
|
$value =~ tr/u/t/; |
797
|
3
|
100
|
|
|
|
4
|
return 1 unless $self->unambiguous_codons($value); |
798
|
1
|
|
|
|
|
3
|
return 0; |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=head2 unambiguous_codons |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
Title : unambiguous_codons |
804
|
|
|
|
|
|
|
Usage : @codons = $self->unambiguous_codons('ACN') |
805
|
|
|
|
|
|
|
Returns : array of strings (one-letter unambiguous amino acid codes) |
806
|
|
|
|
|
|
|
Args : a codon = a three IUPAC nucleotide character string |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
=cut |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
sub unambiguous_codons{ |
811
|
6646
|
|
|
6646
|
1
|
4675
|
my ($self,$value) = @_; |
812
|
6646
|
|
|
|
|
10107
|
my @nts = map { $IUPAC_DNA{uc $_} } split(//, $value); |
|
19937
|
|
|
|
|
22101
|
|
813
|
|
|
|
|
|
|
|
814
|
6646
|
|
|
|
|
5462
|
my @codons; |
815
|
6646
|
|
|
|
|
4095
|
for my $i ( @{$nts[0]} ) { |
|
6646
|
|
|
|
|
9111
|
|
816
|
1658
|
|
|
|
|
969
|
for my $j ( @{$nts[1]} ) { |
|
1658
|
|
|
|
|
1407
|
|
817
|
1633
|
|
|
|
|
907
|
for my $k ( @{$nts[2]} ) { |
|
1633
|
|
|
|
|
1413
|
|
818
|
1804
|
|
|
|
|
2680
|
push @codons, lc "$i$j$k"; |
819
|
|
|
|
|
|
|
}}} |
820
|
6646
|
|
|
|
|
8162
|
return @codons; |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
=head2 _unambiquous_codons |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
deprecated, now an alias for unambiguous_codons |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
=cut |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
sub _unambiquous_codons { |
830
|
0
|
|
|
0
|
|
0
|
unambiguous_codons( undef, @_ ); |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
=head2 add_table |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
Title : add_table |
836
|
|
|
|
|
|
|
Usage : $newid = $ct->add_table($name, $table, $starts) |
837
|
|
|
|
|
|
|
Function: Add a custom Codon Table into the object. |
838
|
|
|
|
|
|
|
Know what you are doing, only the length of |
839
|
|
|
|
|
|
|
the argument strings is checked! |
840
|
|
|
|
|
|
|
Returns : the id of the new codon table |
841
|
|
|
|
|
|
|
Args : name, a string, optional (can be empty) |
842
|
|
|
|
|
|
|
table, a string of 64 characters |
843
|
|
|
|
|
|
|
startcodons, a string of 64 characters, defaults to standard |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
=cut |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
sub add_table { |
848
|
1
|
|
|
1
|
1
|
2
|
my ($self, $name, $table, $starts) = @_; |
849
|
|
|
|
|
|
|
|
850
|
1
|
|
33
|
|
|
2
|
$name ||= 'Custom' . $#NAMES + 1; |
851
|
1
|
|
33
|
|
|
4
|
$starts ||= $STARTS[1]; |
852
|
1
|
50
|
33
|
|
|
5
|
$self->throw('Suspect input!') |
853
|
|
|
|
|
|
|
unless length($table) == 64 and length($starts) == 64; |
854
|
|
|
|
|
|
|
|
855
|
1
|
|
|
|
|
2
|
push @NAMES, $name; |
856
|
1
|
|
|
|
|
2
|
push @TABLES, $table; |
857
|
1
|
|
|
|
|
1
|
push @STARTS, $starts; |
858
|
|
|
|
|
|
|
|
859
|
1
|
|
|
|
|
3
|
return $#NAMES; |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
sub _make_iupac_string { |
863
|
22
|
|
|
22
|
|
18
|
my ($self, $cod_ref) = @_; |
864
|
22
|
50
|
|
|
|
47
|
if(ref($cod_ref) ne 'ARRAY') { |
865
|
0
|
|
|
|
|
0
|
$self->throw(" I need a reference to a list of references to codons, ". |
866
|
|
|
|
|
|
|
" not a [". ref($cod_ref) . "]."); |
867
|
|
|
|
|
|
|
} |
868
|
22
|
|
|
|
|
52
|
my %iupac_hash = Bio::Tools::IUPAC->iupac_rev_iub(); |
869
|
22
|
|
|
|
|
38
|
my $iupac_string = ''; ## the string to be returned |
870
|
22
|
|
|
|
|
23
|
for my $aa (@$cod_ref) { |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
## scan through codon positions, record the differing values, |
873
|
|
|
|
|
|
|
# then look up in the iub hash |
874
|
96
|
|
|
|
|
88
|
for my $index(0..2) { |
875
|
288
|
|
|
|
|
150
|
my %h; |
876
|
288
|
|
|
|
|
206
|
map { my $k = substr($_,$index,1); |
|
846
|
|
|
|
|
544
|
|
877
|
846
|
|
|
|
|
682
|
$h{$k} = undef;} @$aa; |
878
|
288
|
|
|
|
|
337
|
my $lookup_key = join '', sort{$a cmp $b}keys %h; |
|
225
|
|
|
|
|
224
|
|
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
## extend string |
881
|
288
|
|
|
|
|
378
|
$iupac_string .= $iupac_hash{uc$lookup_key}; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
} |
884
|
22
|
|
|
|
|
149
|
return $iupac_string; |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
1; |