line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::CUA::CodonTable; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=pod |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Bio::CUA::CodonTable -- A package processing genetic codon table |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
This package is provided to improve portability of |
12
|
|
|
|
|
|
|
L, in case that one may not |
13
|
|
|
|
|
|
|
install L which includes huge number |
14
|
|
|
|
|
|
|
of modules. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
The package obtains genetic code tables from NCBI at |
17
|
|
|
|
|
|
|
L |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
examples: |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# get the standard genetic code |
22
|
|
|
|
|
|
|
my $table = Bio::CUA::CodonTable->new(-id => 1) |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# get table from an input file if know genetic codes can not |
25
|
|
|
|
|
|
|
# satisfy the need. |
26
|
|
|
|
|
|
|
my $table = Bio::CUA::CodonTable->new(-map_file => |
27
|
|
|
|
|
|
|
'codon_to_aa.tsv') |
28
|
|
|
|
|
|
|
# in 'codon_to_aa.tsv', it looks like this |
29
|
|
|
|
|
|
|
# GCU A |
30
|
|
|
|
|
|
|
# AAU N |
31
|
|
|
|
|
|
|
# CAU H |
32
|
|
|
|
|
|
|
# ... ... |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=cut |
35
|
|
|
|
|
|
|
|
36
|
5
|
|
|
5
|
|
17776
|
use 5.006; |
|
5
|
|
|
|
|
12
|
|
37
|
5
|
|
|
5
|
|
20
|
use strict; |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
98
|
|
38
|
5
|
|
|
5
|
|
18
|
use warnings; |
|
5
|
|
|
|
|
5
|
|
|
5
|
|
|
|
|
142
|
|
39
|
5
|
|
|
5
|
|
414
|
use parent qw/Bio::CUA/; |
|
5
|
|
|
|
|
299
|
|
|
5
|
|
|
|
|
32
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# global variables |
42
|
|
|
|
|
|
|
my $pkg = __PACKAGE__; |
43
|
|
|
|
|
|
|
my $STOPAA = '*'; |
44
|
|
|
|
|
|
|
my %validGCIds = map { $_ => 1 } (1..6,9..14,16,21..25); # in future this can be derived |
45
|
|
|
|
|
|
|
# from data section at the end |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head2 new |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Title : new |
50
|
|
|
|
|
|
|
Usage : $obj = Bio::CUA::CodonTable->new(-map_file => 'file'); |
51
|
|
|
|
|
|
|
Function: creat an object for processing genetic codon tables |
52
|
|
|
|
|
|
|
Returns : an object of L |
53
|
|
|
|
|
|
|
Args : a hash with following keys: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=over 4 |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=item -id |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
genetic code id. The id follows NCBI's standard, here are |
60
|
|
|
|
|
|
|
the list: |
61
|
|
|
|
|
|
|
1. The Standard Code |
62
|
|
|
|
|
|
|
2. The Vertebrate Mitochondrial Code |
63
|
|
|
|
|
|
|
3. The Yeast Mitochondrial Code |
64
|
|
|
|
|
|
|
4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and |
65
|
|
|
|
|
|
|
the Mycoplasma/Spiroplasma Code |
66
|
|
|
|
|
|
|
5. The Invertebrate Mitochondrial Code |
67
|
|
|
|
|
|
|
6. The Ciliate, Dasycladacean and Hexamita Nuclear Code |
68
|
|
|
|
|
|
|
9. The Echinoderm and Flatworm Mitochondrial Code |
69
|
|
|
|
|
|
|
10. The Euplotid Nuclear Code |
70
|
|
|
|
|
|
|
11. The Bacterial, Archaeal and Plant Plastid Code |
71
|
|
|
|
|
|
|
12. The Alternative Yeast Nuclear Code |
72
|
|
|
|
|
|
|
13. The Ascidian Mitochondrial Code |
73
|
|
|
|
|
|
|
14. The Alternative Flatworm Mitochondrial Code |
74
|
|
|
|
|
|
|
16. Chlorophycean Mitochondrial Code |
75
|
|
|
|
|
|
|
21. Trematode Mitochondrial Code |
76
|
|
|
|
|
|
|
22. Scenedesmus obliquus Mitochondrial Code |
77
|
|
|
|
|
|
|
23. Thraustochytrium Mitochondrial Code |
78
|
|
|
|
|
|
|
24. Pterobranchia Mitochondrial Code |
79
|
|
|
|
|
|
|
25. Candidate Division SR1 and Gracilibacteria Code |
80
|
|
|
|
|
|
|
see |
81
|
|
|
|
|
|
|
L |
82
|
|
|
|
|
|
|
for more details. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=item -map_file |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
-map_file = a file containing a mapping between codons to amino |
87
|
|
|
|
|
|
|
acids, one codon per line followed by its amino acid, separated by |
88
|
|
|
|
|
|
|
tab or space. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item -debug |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
a switch to indicate whether to show more warnings which may |
93
|
|
|
|
|
|
|
help to identify sources of errors if any. put 1 to switch |
94
|
|
|
|
|
|
|
it on. The default is off. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=back |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Note: argument -map_file has higher priority than -id, and the |
99
|
|
|
|
|
|
|
default is -id => 1, i.e., the standard genetic code |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=cut |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub new |
104
|
|
|
|
|
|
|
{ |
105
|
4
|
|
|
4
|
1
|
1457
|
my ($caller, @args) = @_; |
106
|
|
|
|
|
|
|
|
107
|
4
|
|
|
|
|
32
|
my $self = $caller->SUPER::new(@args); |
108
|
|
|
|
|
|
|
|
109
|
4
|
|
|
|
|
13
|
my $hash = $self->_array_to_hash(\@args); |
110
|
|
|
|
|
|
|
|
111
|
4
|
50
|
|
|
|
19
|
if($hash->{'map_file'}) |
|
|
50
|
|
|
|
|
|
112
|
|
|
|
|
|
|
{ |
113
|
0
|
|
|
|
|
0
|
$self->_build_table_by_file($hash->{'map_file'}); |
114
|
|
|
|
|
|
|
}elsif($hash->{'id'}) |
115
|
|
|
|
|
|
|
{ |
116
|
4
|
|
|
|
|
17
|
$self->_build_table_by_id($hash->{'id'}); |
117
|
|
|
|
|
|
|
}else |
118
|
|
|
|
|
|
|
{ |
119
|
0
|
0
|
|
|
|
0
|
$self->warn("No arguments -map_file or -id is provided in", |
120
|
|
|
|
|
|
|
"$pkg, -id => 1 will be used") if($self->debug); |
121
|
0
|
|
|
|
|
0
|
$self->_build_table_by_id(1); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
4
|
|
|
|
|
30
|
return $self; |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# get genetic code table by parsing a file |
128
|
|
|
|
|
|
|
sub _build_table_by_file |
129
|
|
|
|
|
|
|
{ |
130
|
0
|
|
|
0
|
|
0
|
my ($self, $file) = @_; |
131
|
|
|
|
|
|
|
|
132
|
0
|
|
|
|
|
0
|
my $codonToAA = $self->_parse_file($file,2); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# check all the codons and amino acids |
135
|
0
|
|
|
|
|
0
|
my %validCodons; |
136
|
|
|
|
|
|
|
my %stopCodons; |
137
|
0
|
|
|
|
|
0
|
while(my ($codon, $AA) = each %$codonToAA) |
138
|
|
|
|
|
|
|
{ |
139
|
0
|
|
|
|
|
0
|
$codon = _process_codon($codon); |
140
|
0
|
0
|
0
|
|
|
0
|
($self->warn("$codon is Not a valid codon") and next) |
141
|
|
|
|
|
|
|
unless($codon =~ /^[ATCG]{3}$/); |
142
|
0
|
|
|
|
|
0
|
$validCodons{$codon} = $AA; |
143
|
0
|
0
|
|
|
|
0
|
$stopCodons{$codon}++ if($self->_is_stop_aa($AA)); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
0
|
$self->{'_codon_to_aa'} = \%validCodons; |
147
|
0
|
|
|
|
|
0
|
$self->{'_stop_codons'} = \%stopCodons; |
148
|
0
|
|
|
|
|
0
|
my $totalCodonNum = scalar(keys %validCodons); |
149
|
0
|
|
|
|
|
0
|
$self->{'_num_codons'} = $totalCodonNum; |
150
|
0
|
0
|
|
|
|
0
|
if($totalCodonNum < 64) |
151
|
|
|
|
|
|
|
{ |
152
|
0
|
|
|
|
|
0
|
$self->warn("Only $totalCodonNum valid codons found in '$file'"); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
0
|
return 1; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# make codon table with given table ID |
159
|
|
|
|
|
|
|
sub _build_table_by_id |
160
|
|
|
|
|
|
|
{ |
161
|
4
|
|
|
4
|
|
9
|
my ($self, $id) = @_; |
162
|
|
|
|
|
|
|
|
163
|
4
|
50
|
|
|
|
17
|
$self->throw("Id '$id' is not a valid genetic code table Id") |
164
|
|
|
|
|
|
|
unless($self->_is_valid_gc_id($id)); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
#my $curFile = __FILE__; |
167
|
|
|
|
|
|
|
#warn "I am in $curFile\n"; |
168
|
|
|
|
|
|
|
|
169
|
4
|
|
|
|
|
44
|
my $fh = $self->_open_file(__FILE__); |
170
|
|
|
|
|
|
|
|
171
|
4
|
|
|
|
|
5
|
my $inDataSection = 0; |
172
|
4
|
|
|
|
|
8
|
my $inGCSection = 0; # genetic codon section |
173
|
4
|
|
|
|
|
5
|
my $data = ''; |
174
|
|
|
|
|
|
|
# cut the genetic codon section first |
175
|
4
|
|
|
|
|
99
|
while(<$fh>) |
176
|
|
|
|
|
|
|
{ |
177
|
3756
|
100
|
|
|
|
6296
|
$inDataSection = 1 if(/^__END__/); |
178
|
3756
|
100
|
|
|
|
9182
|
next unless($inDataSection); |
179
|
1068
|
100
|
|
|
|
1776
|
last if(/^<
|
180
|
1064
|
100
|
|
|
|
1559
|
$inGCSection = 1 if(/^>>GC/); |
181
|
1064
|
100
|
100
|
|
|
4504
|
next if(/^>/ or /^--/); # comment lines |
182
|
736
|
|
|
|
|
2200
|
$data .= $_; |
183
|
|
|
|
|
|
|
} |
184
|
4
|
|
|
|
|
49
|
close $fh; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# match each table and find that with the id = $id |
187
|
4
|
|
|
|
|
8
|
my $table; |
188
|
4
|
|
|
|
|
66
|
while($data =~ /\n *{ *\n *(name[^}]+)}/gcm) |
189
|
|
|
|
|
|
|
{ |
190
|
4
|
|
|
|
|
28
|
$table = $1; |
191
|
4
|
50
|
|
|
|
133
|
next unless($table =~ /^ *id\s+$id\s*,/m); |
192
|
4
|
|
|
|
|
10
|
last; # found |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# now parse this table |
196
|
4
|
|
|
|
|
7
|
my %codonToAA; |
197
|
|
|
|
|
|
|
my %stopCodons; |
198
|
0
|
|
|
|
|
0
|
my %startCodons; |
199
|
4
|
|
|
|
|
29
|
my ($b1) = $table =~ /^ *-- +Base1 +(\w+)/mo; |
200
|
4
|
|
|
|
|
29
|
my ($b2) = $table =~ /^ *-- +Base2 +(\w+)/mo; |
201
|
4
|
|
|
|
|
28
|
my ($b3) = $table =~ /^ *-- +Base3 +(\w+)/mo; |
202
|
4
|
|
|
|
|
25
|
my ($AAs) = $table =~ /^ *ncbieaa +"([^"]+)"/mo; |
203
|
4
|
|
|
|
|
11
|
$AAs =~ s/\s+//g; |
204
|
4
|
|
|
|
|
24
|
my ($starts) = $table =~ /^ *sncbieaa +"([^"]+)"/mo; |
205
|
4
|
|
|
|
|
11
|
$starts =~ s/\s+//g; |
206
|
4
|
|
|
|
|
6
|
my @names; |
207
|
4
|
|
|
|
|
28
|
while($table =~ /^ *name +("[^"]+")/mgco) |
208
|
|
|
|
|
|
|
{ |
209
|
8
|
|
|
|
|
16
|
my $name = $1; |
210
|
8
|
|
|
|
|
13
|
$name =~ s/\n/ /g; |
211
|
8
|
|
|
|
|
57
|
push @names, $name; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
4
|
50
|
|
|
|
12
|
$self->warn("The length of lines in genetic table $id is not 64") |
215
|
|
|
|
|
|
|
unless(length($b1) == 64); |
216
|
4
|
50
|
33
|
|
|
58
|
$self->throw("lines of bases and amino acids are not the same long", |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
217
|
|
|
|
|
|
|
"in genetic table $id") |
218
|
|
|
|
|
|
|
unless( length($b1) == length($b2) and |
219
|
|
|
|
|
|
|
length($b1) == length($b3) and |
220
|
|
|
|
|
|
|
length($b1) == length($AAs) and |
221
|
|
|
|
|
|
|
length($b1) == length($starts)); |
222
|
|
|
|
|
|
|
|
223
|
4
|
|
|
|
|
101
|
$self->set_tag('name', join(' or ', @names)); |
224
|
4
|
|
|
|
|
12
|
$self->set_tag('id', $id); |
225
|
4
|
|
|
|
|
16
|
for(my $i = 0; $i < length($b1); $i++) |
226
|
|
|
|
|
|
|
{ |
227
|
256
|
|
|
|
|
280
|
my $nt1 = substr($b1, $i, 1); |
228
|
256
|
|
|
|
|
214
|
my $nt2 = substr($b2, $i, 1); |
229
|
256
|
|
|
|
|
224
|
my $nt3 = substr($b3, $i, 1); |
230
|
256
|
|
|
|
|
217
|
my $AA = substr($AAs, $i, 1); |
231
|
256
|
|
|
|
|
225
|
my $start = substr($starts, $i, 1); |
232
|
256
|
|
|
|
|
255
|
my $codon = uc($nt1.$nt2.$nt3); |
233
|
256
|
|
|
|
|
440
|
$codonToAA{$codon} = $AA; |
234
|
256
|
100
|
|
|
|
329
|
$stopCodons{$codon}++ if($self->_is_stop_aa($AA)); |
235
|
256
|
100
|
|
|
|
721
|
$startCodons{$codon}++ unless($start eq '-'); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
4
|
|
|
|
|
12
|
$self->{'_codon_to_aa'} = \%codonToAA; |
239
|
4
|
|
|
|
|
8
|
$self->{'_stop_codons'} = \%stopCodons; |
240
|
4
|
|
|
|
|
13
|
$self->{'_start_codons'} = \%startCodons; |
241
|
4
|
|
|
|
|
12
|
$self->{'_num_codons'} = scalar(keys %codonToAA); |
242
|
|
|
|
|
|
|
|
243
|
4
|
|
|
|
|
26
|
return 1; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 name |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
Title : name |
249
|
|
|
|
|
|
|
Usage : $name = $self->name(); |
250
|
|
|
|
|
|
|
Function: the name of genetic code table in use |
251
|
|
|
|
|
|
|
Returns : a string for the name |
252
|
|
|
|
|
|
|
Args : None |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=cut |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub name |
257
|
|
|
|
|
|
|
{ |
258
|
0
|
|
|
0
|
1
|
0
|
$_[0]->get_tag('name'); |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=head2 id |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Title : id |
264
|
|
|
|
|
|
|
Usage : $id = $self->id(); |
265
|
|
|
|
|
|
|
Function: the id of genetic code table in use |
266
|
|
|
|
|
|
|
Returns : a integer for the id |
267
|
|
|
|
|
|
|
Args : None |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=cut |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub id |
272
|
|
|
|
|
|
|
{ |
273
|
0
|
|
|
0
|
1
|
0
|
$_[0]->get_tag('id'); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=head2 total_num_of_codons |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Title : total_num_of_codons |
279
|
|
|
|
|
|
|
Usage : $num = $self->total_num_of_codons; |
280
|
|
|
|
|
|
|
Function: get total number of codons of the genetic code table in use |
281
|
|
|
|
|
|
|
Returns : an integer |
282
|
|
|
|
|
|
|
Args : None |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=cut |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub total_num_of_codons |
287
|
|
|
|
|
|
|
{ |
288
|
0
|
|
|
0
|
1
|
0
|
$_[0]->{'_num_codons'}; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
sub _is_valid_gc_id |
292
|
|
|
|
|
|
|
{ |
293
|
4
|
|
|
4
|
|
7
|
my ($self, $id) = @_; |
294
|
|
|
|
|
|
|
|
295
|
4
|
50
|
|
|
|
28
|
return 1 if($validGCIds{$id}); |
296
|
0
|
|
|
|
|
0
|
return 0; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# check whether this AA is a stop symbol |
300
|
|
|
|
|
|
|
sub _is_stop_aa |
301
|
|
|
|
|
|
|
{ |
302
|
768
|
|
|
768
|
|
665
|
my ($self, $AA) = @_; |
303
|
|
|
|
|
|
|
|
304
|
768
|
100
|
|
|
|
1243
|
return 1 if($AA eq $STOPAA); |
305
|
732
|
|
|
|
|
1202
|
return 0; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head2 is_valid_codon |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Title : is_valid_codon |
311
|
|
|
|
|
|
|
Usage : $test = $self->is_valid_codon('ACG'); |
312
|
|
|
|
|
|
|
Function: test whether a given character string is a valid codon in |
313
|
|
|
|
|
|
|
current codon table |
314
|
|
|
|
|
|
|
Returns : 1 if true, otherwise 0 |
315
|
|
|
|
|
|
|
Args : a codon sequence |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=cut |
318
|
|
|
|
|
|
|
# check whether this is a valid codon |
319
|
|
|
|
|
|
|
sub is_valid_codon |
320
|
|
|
|
|
|
|
{ |
321
|
0
|
|
|
0
|
1
|
0
|
my ($self,$codon,$allowAmb) = @_; |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
0
|
$codon = _process_codon($codon); |
324
|
0
|
0
|
|
|
|
0
|
return 0 unless($codon =~ /^[ATCGU]{3}$/); # no ambiguous at present |
325
|
|
|
|
|
|
|
# also check whether it is in codon table |
326
|
0
|
|
|
|
|
0
|
my $codons = $self->{'_codon_to_aa'}; |
327
|
0
|
0
|
|
|
|
0
|
return 0 unless(exists $codons->{$codon}); |
328
|
0
|
|
|
|
|
0
|
return 1; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 all_codons |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Title : all_codons |
334
|
|
|
|
|
|
|
Usage : @codons = $self->all_codons; |
335
|
|
|
|
|
|
|
Function: get all the codons in this genetic code table. Codons are |
336
|
|
|
|
|
|
|
ordered by the coded amino acids. Stop codons are also included. |
337
|
|
|
|
|
|
|
Returns : an array of codons, or its reference in scalar context |
338
|
|
|
|
|
|
|
Args : None |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=cut |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
sub all_codons |
343
|
|
|
|
|
|
|
{ |
344
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
345
|
|
|
|
|
|
|
|
346
|
0
|
0
|
|
|
|
0
|
my $codonToAA = $self->{'_codon_to_aa'} or return; |
347
|
|
|
|
|
|
|
|
348
|
0
|
|
|
|
|
0
|
my %AAs; |
349
|
0
|
|
|
|
|
0
|
while(my ($k,$v) = each %$codonToAA) |
350
|
|
|
|
|
|
|
{ |
351
|
0
|
|
|
|
|
0
|
push @{$AAs{$v}}, $k; |
|
0
|
|
|
|
|
0
|
|
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
# now order the codons |
355
|
0
|
|
|
|
|
0
|
my @sortedAAs = sort keys(%AAs); |
356
|
0
|
|
|
|
|
0
|
my @codons; |
357
|
0
|
|
|
|
|
0
|
foreach my $AA (@sortedAAs) |
358
|
|
|
|
|
|
|
{ |
359
|
0
|
|
|
|
|
0
|
push @codons, @{$AAs{$AA}}; |
|
0
|
|
|
|
|
0
|
|
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
0
|
0
|
|
|
|
0
|
return wantarray? @codons : \@codons; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=head2 all_sense_codons |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Title : all_sense_codons |
368
|
|
|
|
|
|
|
Usage : @codons = $self->all_sense_codons; |
369
|
|
|
|
|
|
|
Function: get all the sense codons in this genetic code table |
370
|
|
|
|
|
|
|
Returns : an array of codons, or its reference in scalar context |
371
|
|
|
|
|
|
|
Args : None |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=cut |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub all_sense_codons |
376
|
|
|
|
|
|
|
{ |
377
|
28
|
|
|
28
|
1
|
31
|
my ($self) = @_; |
378
|
|
|
|
|
|
|
|
379
|
28
|
|
|
|
|
40
|
my $codonToAA = $self->{'_codon_to_aa'}; |
380
|
28
|
|
|
|
|
40
|
my $stopCodons = $self->{'_stop_codons'}; |
381
|
28
|
|
|
|
|
217
|
my @senseCodons = grep {!exists($stopCodons->{$_})} keys %$codonToAA; |
|
1792
|
|
|
|
|
1964
|
|
382
|
|
|
|
|
|
|
|
383
|
28
|
100
|
|
|
|
337
|
return wantarray? @senseCodons : \@senseCodons; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head2 all_amino_acids |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Title : all_amino_acids |
389
|
|
|
|
|
|
|
Usage : @AAs = $self->all_amino_acids |
390
|
|
|
|
|
|
|
Function: get all the amino acids in this genetic code table. Stop |
391
|
|
|
|
|
|
|
codons are excluded. |
392
|
|
|
|
|
|
|
Returns : an array of amino acids, or its reference if in scalar |
393
|
|
|
|
|
|
|
context |
394
|
|
|
|
|
|
|
Args : None |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=cut |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
sub all_amino_acids |
399
|
|
|
|
|
|
|
{ |
400
|
8
|
|
|
8
|
1
|
11
|
my $self = shift; |
401
|
8
|
50
|
|
|
|
24
|
my $codonToAA = $self->{'_codon_to_aa'} or return; |
402
|
|
|
|
|
|
|
|
403
|
8
|
|
|
|
|
9
|
my %AAs; |
404
|
8
|
|
|
|
|
36
|
while(my ($k,$v) = each %$codonToAA) |
405
|
|
|
|
|
|
|
{ |
406
|
512
|
100
|
|
|
|
556
|
next if $self->_is_stop_aa($v); |
407
|
488
|
|
|
|
|
1120
|
$AAs{$v}++; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
|
410
|
8
|
|
|
|
|
46
|
my @tmp = keys %AAs; |
411
|
8
|
50
|
|
|
|
80
|
return wantarray? @tmp : \@tmp; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 all_start_codons |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Title : all_start_codons |
417
|
|
|
|
|
|
|
Usage : @startCodons = $self->all_start_codons; |
418
|
|
|
|
|
|
|
Function: get all the start codons in the genetic code table in use |
419
|
|
|
|
|
|
|
Returns : an array of codons, or its reference if in scalar context |
420
|
|
|
|
|
|
|
Args : None |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=cut |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub all_start_codons |
425
|
|
|
|
|
|
|
{ |
426
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
427
|
|
|
|
|
|
|
$self->warn("No marked start codons in this GC table") and return |
428
|
0
|
0
|
0
|
|
|
0
|
unless(exists $self->{'_start_codons'}); |
429
|
0
|
|
|
|
|
0
|
my @codons = keys %{$self->{'_start_codons'}}; |
|
0
|
|
|
|
|
0
|
|
430
|
0
|
0
|
|
|
|
0
|
wantarray? @codons : \@codons; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head2 all_stop_codons |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Title : all_stop_codons |
436
|
|
|
|
|
|
|
Usage : @stopCodons = $self->all_stop_codons; |
437
|
|
|
|
|
|
|
Function: get all the stop codons in the genetic code table in use |
438
|
|
|
|
|
|
|
Returns : an array of codons, or its reference if in scalar context |
439
|
|
|
|
|
|
|
Args : None |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=cut |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub all_stop_codons |
444
|
|
|
|
|
|
|
{ |
445
|
0
|
|
|
0
|
1
|
0
|
my @codons = keys %{$_[0]->{'_stop_codons'}}; |
|
0
|
|
|
|
|
0
|
|
446
|
0
|
0
|
|
|
|
0
|
wantarray? @codons : \@codons; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=head2 codons_of_AA |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
Title : codons_of_AA |
452
|
|
|
|
|
|
|
Usage : @codons = $self->codons_of_AA('S'); |
453
|
|
|
|
|
|
|
Function: get codons encoding the given amino acid |
454
|
|
|
|
|
|
|
Returns : an array of codons, or its reference if in scalar context |
455
|
|
|
|
|
|
|
Args : a single amino acid; for stop codons, one can give '*' here |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=cut |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub codons_of_AA |
460
|
|
|
|
|
|
|
{ |
461
|
160
|
|
|
160
|
1
|
149
|
my ($self, $AA) = @_; |
462
|
|
|
|
|
|
|
|
463
|
160
|
|
|
|
|
193
|
$AA =~ s/\s+//g; $AA = uc($AA); |
|
160
|
|
|
|
|
159
|
|
464
|
160
|
50
|
|
|
|
240
|
$self->throw("Can only process one amino acid each time") |
465
|
|
|
|
|
|
|
if(length($AA) > 1); |
466
|
|
|
|
|
|
|
|
467
|
160
|
|
|
|
|
160
|
my $codonToAA = $self->{'_codon_to_aa'}; |
468
|
160
|
|
|
|
|
835
|
my @codons = grep { $codonToAA->{$_} eq $AA } keys %$codonToAA; |
|
10240
|
|
|
|
|
10512
|
|
469
|
|
|
|
|
|
|
|
470
|
160
|
50
|
|
|
|
772
|
return wantarray? @codons : \@codons; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=head2 codon_to_AA_map |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
Title : codon_to_AA_map |
476
|
|
|
|
|
|
|
Usage : $hash = $self->codon_to_AA_map |
477
|
|
|
|
|
|
|
Function: get the mapping from codon to amino acid in a hash |
478
|
|
|
|
|
|
|
Returns : a hash reference in which codons are keys and AAs are |
479
|
|
|
|
|
|
|
values |
480
|
|
|
|
|
|
|
Args : None |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
sub codon_to_AA_map |
485
|
|
|
|
|
|
|
{ |
486
|
1
|
|
|
1
|
1
|
6
|
$_[0]->{'_codon_to_aa'}; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
=head2 translate |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
Title : translate |
492
|
|
|
|
|
|
|
Usage : $AA_string = $self->translate('ATGGCA'); |
493
|
|
|
|
|
|
|
Function: get the translation of input nucleotides |
494
|
|
|
|
|
|
|
Returns : a string of amino acids, unknown amino acids are |
495
|
|
|
|
|
|
|
represented as 'X'. |
496
|
|
|
|
|
|
|
Args : nucleotide sequence. |
497
|
|
|
|
|
|
|
Note : if the input sequence is not multiple of 3 long, the last |
498
|
|
|
|
|
|
|
remained 1 or 2 nucleotides would be simply ignored. |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub translate |
503
|
|
|
|
|
|
|
{ |
504
|
0
|
|
|
0
|
1
|
0
|
my ($self, $seq) = @_; |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
0
|
$seq =~ s/\s+//g; |
507
|
0
|
|
|
|
|
0
|
$seq = uc($seq); |
508
|
|
|
|
|
|
|
|
509
|
0
|
|
|
|
|
0
|
my $seqLen = length($seq); |
510
|
0
|
|
|
|
|
0
|
my $accuLen = 0; |
511
|
0
|
|
|
|
|
0
|
my $AAs = ''; |
512
|
0
|
|
|
|
|
0
|
my $codonToAA = $self->{'_codon_to_aa'}; |
513
|
0
|
|
|
|
|
0
|
while($accuLen + 3 <= $seqLen) |
514
|
|
|
|
|
|
|
{ |
515
|
0
|
|
|
|
|
0
|
my $codon = substr($seq, $accuLen, 3); |
516
|
0
|
0
|
|
|
|
0
|
$self->warn("'$codon' is not a valid codon") |
517
|
|
|
|
|
|
|
unless($self->is_valid_codon($codon)); |
518
|
0
|
0
|
|
|
|
0
|
$AAs .= exists $codonToAA->{$codon}? $codonToAA->{$codon} : |
519
|
|
|
|
|
|
|
'X'; # X for unknown codons |
520
|
0
|
|
|
|
|
0
|
$accuLen += 3; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
0
|
|
|
|
|
0
|
return $AAs; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=head2 is_stop_codon |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Title : is_stop_codon |
529
|
|
|
|
|
|
|
Usage : $test = $self->is_stop_codon('UAG'); |
530
|
|
|
|
|
|
|
Function: check whether this is a stop codon |
531
|
|
|
|
|
|
|
Returns : 1 if true, otherwise 0 |
532
|
|
|
|
|
|
|
Args : a codon sequence |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=cut |
535
|
|
|
|
|
|
|
# check whether |
536
|
|
|
|
|
|
|
sub is_stop_codon |
537
|
|
|
|
|
|
|
{ |
538
|
128
|
|
|
128
|
1
|
111
|
my ($self, $codon) = @_; |
539
|
128
|
|
|
|
|
110
|
my $stopCodons = $self->{'_stop_codons'}; |
540
|
128
|
|
|
|
|
132
|
$codon = _process_codon($codon); |
541
|
128
|
100
|
|
|
|
221
|
return 1 if($stopCodons->{$codon}); |
542
|
122
|
|
|
|
|
204
|
return 0; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
# process codons before other actions |
546
|
|
|
|
|
|
|
sub _process_codon |
547
|
|
|
|
|
|
|
{ |
548
|
128
|
|
|
128
|
|
105
|
my $codon = shift; |
549
|
128
|
|
|
|
|
132
|
$codon =~ s/\s+//g; |
550
|
128
|
|
|
|
|
130
|
$codon =~ tr/uU/TT/; # U to T |
551
|
128
|
|
|
|
|
159
|
return uc($codon); |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=head2 codon_degeneracy |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
Title : codon_degeneracy |
557
|
|
|
|
|
|
|
Usage : $hash = $self->codon_degeneracy; |
558
|
|
|
|
|
|
|
Function: group AAs and codons into codon degeneracy groups |
559
|
|
|
|
|
|
|
Returns : reference to a hash in which 1st level key is degeneracy |
560
|
|
|
|
|
|
|
(i.e., 1,2,6,etc), 2nd level key is amino acids for that degeneracy |
561
|
|
|
|
|
|
|
group, and 3rd level is reference of arrays containing coding codons |
562
|
|
|
|
|
|
|
for each amino acid. For example: |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
{ 2 => { D => [GAU, GAC], |
565
|
|
|
|
|
|
|
C => [UGU, UGC], |
566
|
|
|
|
|
|
|
... ... |
567
|
|
|
|
|
|
|
}, |
568
|
|
|
|
|
|
|
4 => { A => [GCU, GCC, GCA, GCG], |
569
|
|
|
|
|
|
|
... ... |
570
|
|
|
|
|
|
|
}, |
571
|
|
|
|
|
|
|
... ... ... |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Args : None |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=cut |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
# group AAs and codons into redundancy groups |
579
|
|
|
|
|
|
|
sub codon_degeneracy |
580
|
|
|
|
|
|
|
{ |
581
|
53
|
|
|
53
|
1
|
1165
|
my $self = shift; |
582
|
|
|
|
|
|
|
|
583
|
53
|
100
|
|
|
|
194
|
return $self->{'_codon_deg'} if(exists $self->{'_codon_deg'}); |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# otherwise construct it if its the first time |
586
|
2
|
|
|
|
|
3
|
my $codonToAA = $self->{'_codon_to_aa'}; |
587
|
2
|
|
|
|
|
3
|
my %aaToCodon; |
588
|
2
|
|
|
|
|
9
|
while(my ($codon, $AA) = each %$codonToAA) |
589
|
|
|
|
|
|
|
{ |
590
|
|
|
|
|
|
|
# ignore stop codons |
591
|
128
|
100
|
|
|
|
157
|
next if($self->is_stop_codon($codon)); |
592
|
122
|
|
|
|
|
97
|
push @{$aaToCodon{$AA}}, $codon; |
|
122
|
|
|
|
|
370
|
|
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
2
|
|
|
|
|
3
|
my %redundancy; |
596
|
2
|
|
|
|
|
14
|
while(my ($AA, $codons) = each %aaToCodon) |
597
|
|
|
|
|
|
|
{ |
598
|
40
|
|
|
|
|
35
|
my $red = $#$codons + 1; |
599
|
40
|
|
|
|
|
142
|
$redundancy{$red}->{$AA} = [sort @$codons]; |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
2
|
|
|
|
|
5
|
$self->{'_codon_deg'} = \%redundancy; # store it first |
603
|
2
|
|
|
|
|
17
|
return \%redundancy; |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=head1 AUTHOR |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
Zhenguo Zhang, C<< >> |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=head1 BUGS |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
614
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
615
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head1 SUPPORT |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
perldoc Bio::CUA::CodonTable |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
You can also look for information at: |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=over 4 |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
L |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
L |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=item * CPAN Ratings |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
L |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=item * Search CPAN |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
L |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=back |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
Copyright 2015 Zhenguo Zhang. |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
656
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
657
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
658
|
|
|
|
|
|
|
(at your option) any later version. |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
661
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
662
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
663
|
|
|
|
|
|
|
GNU General Public License for more details. |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
666
|
|
|
|
|
|
|
along with this program. If not, see L. |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=cut |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
1; # End of Bio::CUA::CodonTable |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
__END__ |