line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::CUA::CodonTable; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=pod |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Bio::CUA::CodonTable -- A package processing genetic codon table |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
This package is provided to improve portability of |
12
|
|
|
|
|
|
|
L, in case that one may not |
13
|
|
|
|
|
|
|
install L which includes huge number |
14
|
|
|
|
|
|
|
of modules. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
The package obtains genetic code tables from NCBI at |
17
|
|
|
|
|
|
|
L |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
examples: |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# get the standard genetic code |
22
|
|
|
|
|
|
|
my $table = Bio::CUA::CodonTable->new(-id => 1) |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# get table from an input file if know genetic codes can not |
25
|
|
|
|
|
|
|
# satisfy the need. |
26
|
|
|
|
|
|
|
my $table = Bio::CUA::CodonTable->new(-map_file => |
27
|
|
|
|
|
|
|
'codon_to_aa.tsv') |
28
|
|
|
|
|
|
|
# in 'codon_to_aa.tsv', it looks like this |
29
|
|
|
|
|
|
|
# GCU A |
30
|
|
|
|
|
|
|
# AAU N |
31
|
|
|
|
|
|
|
# CAU H |
32
|
|
|
|
|
|
|
# ... ... |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=cut |
35
|
|
|
|
|
|
|
|
36
|
5
|
|
|
5
|
|
17633
|
use 5.006; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
172
|
|
37
|
5
|
|
|
5
|
|
29
|
use strict; |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
123
|
|
38
|
5
|
|
|
5
|
|
22
|
use warnings; |
|
5
|
|
|
|
|
7
|
|
|
5
|
|
|
|
|
135
|
|
39
|
5
|
|
|
5
|
|
461
|
use parent qw/Bio::CUA/; |
|
5
|
|
|
|
|
298
|
|
|
5
|
|
|
|
|
27
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# global variables |
42
|
|
|
|
|
|
|
my $pkg = __PACKAGE__; |
43
|
|
|
|
|
|
|
my $STOPAA = '*'; |
44
|
|
|
|
|
|
|
my %validGCIds = map { $_ => 1 } (1..6,9..14,16,21..25); # in future this can be derived |
45
|
|
|
|
|
|
|
# from data section at the end |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head2 new |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Title : new |
50
|
|
|
|
|
|
|
Usage : $obj = Bio::CUA::CodonTable->new(-map_file => 'file'); |
51
|
|
|
|
|
|
|
Function: creat an object for processing genetic codon tables |
52
|
|
|
|
|
|
|
Returns : an object of L |
53
|
|
|
|
|
|
|
Args : a hash with following keys: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=over 4 |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=item -id |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
genetic code id. The id follows NCBI's standard, here are |
60
|
|
|
|
|
|
|
the list: |
61
|
|
|
|
|
|
|
1. The Standard Code |
62
|
|
|
|
|
|
|
2. The Vertebrate Mitochondrial Code |
63
|
|
|
|
|
|
|
3. The Yeast Mitochondrial Code |
64
|
|
|
|
|
|
|
4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and |
65
|
|
|
|
|
|
|
the Mycoplasma/Spiroplasma Code |
66
|
|
|
|
|
|
|
5. The Invertebrate Mitochondrial Code |
67
|
|
|
|
|
|
|
6. The Ciliate, Dasycladacean and Hexamita Nuclear Code |
68
|
|
|
|
|
|
|
9. The Echinoderm and Flatworm Mitochondrial Code |
69
|
|
|
|
|
|
|
10. The Euplotid Nuclear Code |
70
|
|
|
|
|
|
|
11. The Bacterial, Archaeal and Plant Plastid Code |
71
|
|
|
|
|
|
|
12. The Alternative Yeast Nuclear Code |
72
|
|
|
|
|
|
|
13. The Ascidian Mitochondrial Code |
73
|
|
|
|
|
|
|
14. The Alternative Flatworm Mitochondrial Code |
74
|
|
|
|
|
|
|
16. Chlorophycean Mitochondrial Code |
75
|
|
|
|
|
|
|
21. Trematode Mitochondrial Code |
76
|
|
|
|
|
|
|
22. Scenedesmus obliquus Mitochondrial Code |
77
|
|
|
|
|
|
|
23. Thraustochytrium Mitochondrial Code |
78
|
|
|
|
|
|
|
24. Pterobranchia Mitochondrial Code |
79
|
|
|
|
|
|
|
25. Candidate Division SR1 and Gracilibacteria Code |
80
|
|
|
|
|
|
|
see |
81
|
|
|
|
|
|
|
L |
82
|
|
|
|
|
|
|
for more details. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=item -map_file |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
-map_file = a file containing a mapping between codons to amino |
87
|
|
|
|
|
|
|
acids, one codon per line followed by its amino acid, separated by |
88
|
|
|
|
|
|
|
tab or space. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item -debug |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
a switch to indicate whether to show more warnings which may |
93
|
|
|
|
|
|
|
help to identify sources of errors if any. put 1 to switch |
94
|
|
|
|
|
|
|
it on. The default is off. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=back |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Note: argument -map_file has higher priority than -id, and the |
99
|
|
|
|
|
|
|
default is -id => 1, i.e., the standard genetic code |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=cut |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub new |
104
|
|
|
|
|
|
|
{ |
105
|
4
|
|
|
4
|
1
|
1108
|
my ($caller, @args) = @_; |
106
|
|
|
|
|
|
|
|
107
|
4
|
|
|
|
|
35
|
my $self = $caller->SUPER::new(@args); |
108
|
|
|
|
|
|
|
|
109
|
4
|
|
|
|
|
19
|
my $hash = $self->_array_to_hash(\@args); |
110
|
|
|
|
|
|
|
|
111
|
4
|
50
|
|
|
|
32
|
if($hash->{'map_file'}) |
|
|
50
|
|
|
|
|
|
112
|
|
|
|
|
|
|
{ |
113
|
0
|
|
|
|
|
0
|
$self->_build_table_by_file($hash->{'map_file'}); |
114
|
|
|
|
|
|
|
}elsif($hash->{'id'}) |
115
|
|
|
|
|
|
|
{ |
116
|
4
|
|
|
|
|
26
|
$self->_build_table_by_id($hash->{'id'}); |
117
|
|
|
|
|
|
|
}else |
118
|
|
|
|
|
|
|
{ |
119
|
0
|
0
|
|
|
|
0
|
$self->warn("No arguments -map_file or -id is provided in", |
120
|
|
|
|
|
|
|
"$pkg, -id => 1 will be used") if($self->debug); |
121
|
0
|
|
|
|
|
0
|
$self->_build_table_by_id(1); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
4
|
|
|
|
|
44
|
return $self; |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# get genetic code table by parsing a file |
128
|
|
|
|
|
|
|
sub _build_table_by_file |
129
|
|
|
|
|
|
|
{ |
130
|
0
|
|
|
0
|
|
0
|
my ($self, $file) = @_; |
131
|
|
|
|
|
|
|
|
132
|
0
|
|
|
|
|
0
|
my $codonToAA = $self->_parse_file($file,2); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# check all the codons and amino acids |
135
|
0
|
|
|
|
|
0
|
my %validCodons; |
136
|
|
|
|
|
|
|
my %stopCodons; |
137
|
0
|
|
|
|
|
0
|
while(my ($codon, $AA) = each %$codonToAA) |
138
|
|
|
|
|
|
|
{ |
139
|
0
|
|
|
|
|
0
|
$codon = _process_codon($codon); |
140
|
0
|
0
|
0
|
|
|
0
|
($self->warn("$codon is Not a valid codon") and next) |
141
|
|
|
|
|
|
|
unless($codon =~ /^[ATCG]{3}$/); |
142
|
0
|
|
|
|
|
0
|
$validCodons{$codon} = $AA; |
143
|
0
|
0
|
|
|
|
0
|
$stopCodons{$codon}++ if($self->_is_stop_aa($AA)); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
0
|
$self->{'_codon_to_aa'} = \%validCodons; |
147
|
0
|
|
|
|
|
0
|
$self->{'_stop_codons'} = \%stopCodons; |
148
|
0
|
|
|
|
|
0
|
my $totalCodonNum = scalar(keys %validCodons); |
149
|
0
|
|
|
|
|
0
|
$self->{'_num_codons'} = $totalCodonNum; |
150
|
0
|
0
|
|
|
|
0
|
if($totalCodonNum < 64) |
151
|
|
|
|
|
|
|
{ |
152
|
0
|
|
|
|
|
0
|
$self->warn("Only $totalCodonNum valid codons found in '$file'"); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
0
|
return 1; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# make codon table with given table ID |
159
|
|
|
|
|
|
|
sub _build_table_by_id |
160
|
|
|
|
|
|
|
{ |
161
|
4
|
|
|
4
|
|
11
|
my ($self, $id) = @_; |
162
|
|
|
|
|
|
|
|
163
|
4
|
50
|
|
|
|
17
|
$self->throw("Id '$id' is not a valid genetic code table Id") |
164
|
|
|
|
|
|
|
unless($self->_is_valid_gc_id($id)); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
#my $curFile = __FILE__; |
167
|
|
|
|
|
|
|
#warn "I am in $curFile\n"; |
168
|
|
|
|
|
|
|
|
169
|
4
|
|
|
|
|
63
|
my $fh = $self->_open_file(__FILE__); |
170
|
|
|
|
|
|
|
|
171
|
4
|
|
|
|
|
9
|
my $inDataSection = 0; |
172
|
4
|
|
|
|
|
8
|
my $inGCSection = 0; # genetic codon section |
173
|
4
|
|
|
|
|
10
|
my $data = ''; |
174
|
|
|
|
|
|
|
# cut the genetic codon section first |
175
|
4
|
|
|
|
|
130
|
while(<$fh>) |
176
|
|
|
|
|
|
|
{ |
177
|
3616
|
100
|
|
|
|
6626
|
$inDataSection = 1 if(/^__END__/); |
178
|
3616
|
100
|
|
|
|
9831
|
next unless($inDataSection); |
179
|
1068
|
100
|
|
|
|
1913
|
last if(/^<
|
180
|
1064
|
100
|
|
|
|
1879
|
$inGCSection = 1 if(/^>>GC/); |
181
|
1064
|
100
|
100
|
|
|
5274
|
next if(/^>/ or /^--/); # comment lines |
182
|
736
|
|
|
|
|
2540
|
$data .= $_; |
183
|
|
|
|
|
|
|
} |
184
|
4
|
|
|
|
|
91
|
close $fh; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# match each table and find that with the id = $id |
187
|
4
|
|
|
|
|
9
|
my $table; |
188
|
4
|
|
|
|
|
112
|
while($data =~ /\n *{ *\n *(name[^}]+)}/gcm) |
189
|
|
|
|
|
|
|
{ |
190
|
4
|
|
|
|
|
28
|
$table = $1; |
191
|
4
|
50
|
|
|
|
188
|
next unless($table =~ /^ *id\s+$id\s*,/m); |
192
|
4
|
|
|
|
|
11
|
last; # found |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# now parse this table |
196
|
4
|
|
|
|
|
9
|
my %codonToAA; |
197
|
|
|
|
|
|
|
my %stopCodons; |
198
|
0
|
|
|
|
|
0
|
my %startCodons; |
199
|
4
|
|
|
|
|
37
|
my ($b1) = $table =~ /^ *-- +Base1 +(\w+)/mo; |
200
|
4
|
|
|
|
|
38
|
my ($b2) = $table =~ /^ *-- +Base2 +(\w+)/mo; |
201
|
4
|
|
|
|
|
38
|
my ($b3) = $table =~ /^ *-- +Base3 +(\w+)/mo; |
202
|
4
|
|
|
|
|
34
|
my ($AAs) = $table =~ /^ *ncbieaa +"([^"]+)"/mo; |
203
|
4
|
|
|
|
|
17
|
$AAs =~ s/\s+//g; |
204
|
4
|
|
|
|
|
34
|
my ($starts) = $table =~ /^ *sncbieaa +"([^"]+)"/mo; |
205
|
4
|
|
|
|
|
12
|
$starts =~ s/\s+//g; |
206
|
4
|
|
|
|
|
9
|
my @names; |
207
|
4
|
|
|
|
|
34
|
while($table =~ /^ *name +("[^"]+")/mgco) |
208
|
|
|
|
|
|
|
{ |
209
|
8
|
|
|
|
|
20
|
my $name = $1; |
210
|
8
|
|
|
|
|
18
|
$name =~ s/\n/ /g; |
211
|
8
|
|
|
|
|
42
|
push @names, $name; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
4
|
50
|
|
|
|
19
|
$self->warn("The length of lines in genetic table $id is not 64") |
215
|
|
|
|
|
|
|
unless(length($b1) == 64); |
216
|
4
|
50
|
33
|
|
|
70
|
$self->throw("lines of bases and amino acids are not the same long", |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
217
|
|
|
|
|
|
|
"in genetic table $id") |
218
|
|
|
|
|
|
|
unless( length($b1) == length($b2) and |
219
|
|
|
|
|
|
|
length($b1) == length($b3) and |
220
|
|
|
|
|
|
|
length($b1) == length($AAs) and |
221
|
|
|
|
|
|
|
length($b1) == length($starts)); |
222
|
|
|
|
|
|
|
|
223
|
4
|
|
|
|
|
190
|
$self->set_tag('name', join(' or ', @names)); |
224
|
4
|
|
|
|
|
15
|
$self->set_tag('id', $id); |
225
|
4
|
|
|
|
|
27
|
for(my $i = 0; $i < length($b1); $i++) |
226
|
|
|
|
|
|
|
{ |
227
|
256
|
|
|
|
|
308
|
my $nt1 = substr($b1, $i, 1); |
228
|
256
|
|
|
|
|
236
|
my $nt2 = substr($b2, $i, 1); |
229
|
256
|
|
|
|
|
231
|
my $nt3 = substr($b3, $i, 1); |
230
|
256
|
|
|
|
|
255
|
my $AA = substr($AAs, $i, 1); |
231
|
256
|
|
|
|
|
349
|
my $start = substr($starts, $i, 1); |
232
|
256
|
|
|
|
|
300
|
my $codon = uc($nt1.$nt2.$nt3); |
233
|
256
|
|
|
|
|
452
|
$codonToAA{$codon} = $AA; |
234
|
256
|
100
|
|
|
|
362
|
$stopCodons{$codon}++ if($self->_is_stop_aa($AA)); |
235
|
256
|
100
|
|
|
|
752
|
$startCodons{$codon}++ unless($start eq '-'); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
4
|
|
|
|
|
15
|
$self->{'_codon_to_aa'} = \%codonToAA; |
239
|
4
|
|
|
|
|
10
|
$self->{'_stop_codons'} = \%stopCodons; |
240
|
4
|
|
|
|
|
12
|
$self->{'_start_codons'} = \%startCodons; |
241
|
4
|
|
|
|
|
15
|
$self->{'_num_codons'} = scalar(keys %codonToAA); |
242
|
|
|
|
|
|
|
|
243
|
4
|
|
|
|
|
52
|
return 1; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 name |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
Title : name |
249
|
|
|
|
|
|
|
Usage : $name = $self->name(); |
250
|
|
|
|
|
|
|
Function: the name of genetic code table in use |
251
|
|
|
|
|
|
|
Returns : a string for the name |
252
|
|
|
|
|
|
|
Args : None |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=cut |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
sub name |
257
|
|
|
|
|
|
|
{ |
258
|
0
|
|
|
0
|
1
|
0
|
$_[0]->get_tag('name'); |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=head2 id |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Title : id |
264
|
|
|
|
|
|
|
Usage : $id = $self->id(); |
265
|
|
|
|
|
|
|
Function: the id of genetic code table in use |
266
|
|
|
|
|
|
|
Returns : a integer for the id |
267
|
|
|
|
|
|
|
Args : None |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=cut |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub id |
272
|
|
|
|
|
|
|
{ |
273
|
0
|
|
|
0
|
1
|
0
|
$_[0]->get_tag('id'); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=head2 total_num_of_codons |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Title : total_num_of_codons |
279
|
|
|
|
|
|
|
Usage : $num = $self->total_num_of_codons; |
280
|
|
|
|
|
|
|
Function: get total codons of the genetic code table in use |
281
|
|
|
|
|
|
|
Returns : an integer |
282
|
|
|
|
|
|
|
Args : None |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=cut |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub total_num_of_codons |
287
|
|
|
|
|
|
|
{ |
288
|
0
|
|
|
0
|
1
|
0
|
$_[0]->{'_num_codons'}; |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
sub _is_valid_gc_id |
292
|
|
|
|
|
|
|
{ |
293
|
4
|
|
|
4
|
|
12
|
my ($self, $id) = @_; |
294
|
|
|
|
|
|
|
|
295
|
4
|
50
|
|
|
|
92
|
return 1 if($validGCIds{$id}); |
296
|
0
|
|
|
|
|
0
|
return 0; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# check whether this AA is a stop symbol |
300
|
|
|
|
|
|
|
sub _is_stop_aa |
301
|
|
|
|
|
|
|
{ |
302
|
768
|
|
|
768
|
|
793
|
my ($self, $AA) = @_; |
303
|
|
|
|
|
|
|
|
304
|
768
|
100
|
|
|
|
1412
|
return 1 if($AA eq $STOPAA); |
305
|
732
|
|
|
|
|
1454
|
return 0; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head2 is_valid_codon |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Title : is_valid_codon |
311
|
|
|
|
|
|
|
Usage : $test = $self->is_valid_codon('ACG'); |
312
|
|
|
|
|
|
|
Function: test whether a given character string is a valid codon in |
313
|
|
|
|
|
|
|
current codon table |
314
|
|
|
|
|
|
|
Returns : 1 if true, otherwise 0 |
315
|
|
|
|
|
|
|
Args : a codon sequence |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=cut |
318
|
|
|
|
|
|
|
# check whether this is a valid codon |
319
|
|
|
|
|
|
|
sub is_valid_codon |
320
|
|
|
|
|
|
|
{ |
321
|
0
|
|
|
0
|
1
|
0
|
my ($self,$codon,$allowAmb) = @_; |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
0
|
$codon = _process_codon($codon); |
324
|
0
|
0
|
|
|
|
0
|
return 0 unless($codon =~ /^[ATCGU]{3}$/); # no ambiguous at present |
325
|
|
|
|
|
|
|
# also check whether it is in codon table |
326
|
0
|
|
|
|
|
0
|
my $codons = $self->{'_codon_to_aa'}; |
327
|
0
|
0
|
|
|
|
0
|
return 0 unless(exists $codons->{$codon}); |
328
|
0
|
|
|
|
|
0
|
return 1; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 all_sense_codons |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
Title : all_sense_codons |
334
|
|
|
|
|
|
|
Usage : @codons = $self->all_sense_codons; |
335
|
|
|
|
|
|
|
Function: get all the sense codons in this genetic code table |
336
|
|
|
|
|
|
|
Returns : an array of codons, or its reference in scalar context |
337
|
|
|
|
|
|
|
Args : None |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=cut |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub all_sense_codons |
342
|
|
|
|
|
|
|
{ |
343
|
28
|
|
|
28
|
1
|
56
|
my ($self) = @_; |
344
|
|
|
|
|
|
|
|
345
|
28
|
|
|
|
|
101
|
my $codonToAA = $self->{'_codon_to_aa'}; |
346
|
28
|
|
|
|
|
90
|
my $stopCodons = $self->{'_stop_codons'}; |
347
|
28
|
|
|
|
|
575
|
my @senseCodons = grep {!exists($stopCodons->{$_})} keys %$codonToAA; |
|
1792
|
|
|
|
|
3450
|
|
348
|
|
|
|
|
|
|
|
349
|
28
|
100
|
|
|
|
664
|
return wantarray? @senseCodons : \@senseCodons; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=head2 all_amino_acids |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
Title : all_amino_acids |
355
|
|
|
|
|
|
|
Usage : @AAs = $self->all_amino_acids |
356
|
|
|
|
|
|
|
Function: get all the amino acids in this genetic code table |
357
|
|
|
|
|
|
|
Returns : an array of amino acids, or its reference if in scalar |
358
|
|
|
|
|
|
|
context |
359
|
|
|
|
|
|
|
Args : None |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=cut |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
sub all_amino_acids |
364
|
|
|
|
|
|
|
{ |
365
|
8
|
|
|
8
|
1
|
13
|
my $self = shift; |
366
|
8
|
50
|
|
|
|
29
|
my $codonToAA = $self->{'_codon_to_aa'} or return; |
367
|
|
|
|
|
|
|
|
368
|
8
|
|
|
|
|
14
|
my %AAs; |
369
|
8
|
|
|
|
|
39
|
while(my ($k,$v) = each %$codonToAA) |
370
|
|
|
|
|
|
|
{ |
371
|
512
|
100
|
|
|
|
780
|
next if $self->_is_stop_aa($v); |
372
|
488
|
|
|
|
|
1680
|
$AAs{$v}++; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
8
|
|
|
|
|
74
|
my @tmp = keys %AAs; |
376
|
8
|
50
|
|
|
|
125
|
return wantarray? @tmp : \@tmp; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=head2 all_start_codons |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
Title : all_start_codons |
382
|
|
|
|
|
|
|
Usage : @startCodons = $self->all_start_codons; |
383
|
|
|
|
|
|
|
Function: get all the start codons in the genetic code table in use |
384
|
|
|
|
|
|
|
Returns : an array of codons, or its reference if in scalar context |
385
|
|
|
|
|
|
|
Args : None |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=cut |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
sub all_start_codons |
390
|
|
|
|
|
|
|
{ |
391
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
392
|
0
|
0
|
0
|
|
|
0
|
$self->warn("No marked start codons in this GC table") and return |
393
|
|
|
|
|
|
|
unless(exists $self->{'_start_codons'}); |
394
|
0
|
|
|
|
|
0
|
my @codons = keys %{$self->{'_start_codons'}}; |
|
0
|
|
|
|
|
0
|
|
395
|
0
|
0
|
|
|
|
0
|
wantarray? @codons : \@codons; |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=head2 all_stop_codons |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Title : all_stop_codons |
401
|
|
|
|
|
|
|
Usage : @stopCodons = $self->all_stop_codons; |
402
|
|
|
|
|
|
|
Function: get all the stop codons in the genetic code table in use |
403
|
|
|
|
|
|
|
Returns : an array of codons, or its reference if in scalar context |
404
|
|
|
|
|
|
|
Args : None |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=cut |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
sub all_stop_codons |
409
|
|
|
|
|
|
|
{ |
410
|
0
|
|
|
0
|
1
|
0
|
my @codons = keys %{$_[0]->{'_stop_codons'}}; |
|
0
|
|
|
|
|
0
|
|
411
|
0
|
0
|
|
|
|
0
|
wantarray? @codons : \@codons; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=head2 codons_of_AA |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Title : codons_of_AA |
417
|
|
|
|
|
|
|
Usage : @codons = $self->codons_of_AA('S'); |
418
|
|
|
|
|
|
|
Function: get codons encoding the given amino acid |
419
|
|
|
|
|
|
|
Returns : an array of codons, or its reference if in scalar context |
420
|
|
|
|
|
|
|
Args : a single amino acid; for stop codons, one can give '*' here |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=cut |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
sub codons_of_AA |
425
|
|
|
|
|
|
|
{ |
426
|
160
|
|
|
160
|
1
|
210
|
my ($self, $AA) = @_; |
427
|
|
|
|
|
|
|
|
428
|
160
|
|
|
|
|
346
|
$AA =~ s/\s+//g; $AA = uc($AA); |
|
160
|
|
|
|
|
251
|
|
429
|
160
|
50
|
|
|
|
345
|
$self->throw("Can only process one amino acid each time") |
430
|
|
|
|
|
|
|
if(length($AA) > 1); |
431
|
|
|
|
|
|
|
|
432
|
160
|
|
|
|
|
228
|
my $codonToAA = $self->{'_codon_to_aa'}; |
433
|
160
|
|
|
|
|
1475
|
my @codons = grep { $codonToAA->{$_} eq $AA } keys %$codonToAA; |
|
10240
|
|
|
|
|
15249
|
|
434
|
|
|
|
|
|
|
|
435
|
160
|
50
|
|
|
|
1338
|
return wantarray? @codons : \@codons; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=head2 codon_to_AA_map |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
Title : codon_to_AA_map |
441
|
|
|
|
|
|
|
Usage : $hash = $self->codon_to_AA_map |
442
|
|
|
|
|
|
|
Function: get the mapping from codon to amino acid in a hash |
443
|
|
|
|
|
|
|
Returns : a hash reference in which codons are keys and AAs are |
444
|
|
|
|
|
|
|
values |
445
|
|
|
|
|
|
|
Args : None |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
=cut |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
sub codon_to_AA_map |
450
|
|
|
|
|
|
|
{ |
451
|
1
|
|
|
1
|
1
|
8
|
$_[0]->{'_codon_to_aa'}; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head2 translate |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Title : translate |
457
|
|
|
|
|
|
|
Usage : $AA_string = $self->translate('ATGGCA'); |
458
|
|
|
|
|
|
|
Function: get the translation of input nucleotides |
459
|
|
|
|
|
|
|
Returns : a string of amino acids, unknown amino acids are |
460
|
|
|
|
|
|
|
represented as 'X'. |
461
|
|
|
|
|
|
|
Args : nucleotide sequence. |
462
|
|
|
|
|
|
|
Note : if the input sequence is not multiple of 3 long, the last |
463
|
|
|
|
|
|
|
remained 1 or 2 nucleotides would be simply ignored. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=cut |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
sub translate |
468
|
|
|
|
|
|
|
{ |
469
|
0
|
|
|
0
|
1
|
0
|
my ($self, $seq) = @_; |
470
|
|
|
|
|
|
|
|
471
|
0
|
|
|
|
|
0
|
$seq =~ s/\s+//g; |
472
|
0
|
|
|
|
|
0
|
$seq = uc($seq); |
473
|
|
|
|
|
|
|
|
474
|
0
|
|
|
|
|
0
|
my $seqLen = length($seq); |
475
|
0
|
|
|
|
|
0
|
my $accuLen = 0; |
476
|
0
|
|
|
|
|
0
|
my $AAs = ''; |
477
|
0
|
|
|
|
|
0
|
my $codonToAA = $self->{'_codon_to_aa'}; |
478
|
0
|
|
|
|
|
0
|
while($accuLen + 3 <= $seqLen) |
479
|
|
|
|
|
|
|
{ |
480
|
0
|
|
|
|
|
0
|
my $codon = substr($seq, $accuLen, 3); |
481
|
0
|
0
|
|
|
|
0
|
$self->warn("'$codon' is not a valid codon") |
482
|
|
|
|
|
|
|
unless($self->is_valid_codon($codon)); |
483
|
0
|
0
|
|
|
|
0
|
$AAs .= exists $codonToAA->{$codon}? $codonToAA->{$codon} : |
484
|
|
|
|
|
|
|
'X'; # X for unknown codons |
485
|
0
|
|
|
|
|
0
|
$accuLen += 3; |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
0
|
|
|
|
|
0
|
return $AAs; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=head2 is_stop_codon |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Title : is_stop_codon |
494
|
|
|
|
|
|
|
Usage : $test = $self->is_stop_codon('UAG'); |
495
|
|
|
|
|
|
|
Function: check whether this is a stop codon |
496
|
|
|
|
|
|
|
Returns : 1 if true, otherwise 0 |
497
|
|
|
|
|
|
|
Args : a codon sequence |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=cut |
500
|
|
|
|
|
|
|
# check whether |
501
|
|
|
|
|
|
|
sub is_stop_codon |
502
|
|
|
|
|
|
|
{ |
503
|
128
|
|
|
128
|
1
|
163
|
my ($self, $codon) = @_; |
504
|
128
|
|
|
|
|
148
|
my $stopCodons = $self->{'_stop_codons'}; |
505
|
128
|
|
|
|
|
167
|
$codon = _process_codon($codon); |
506
|
128
|
100
|
|
|
|
294
|
return 1 if($stopCodons->{$codon}); |
507
|
122
|
|
|
|
|
234
|
return 0; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
# process codons before other actions |
511
|
|
|
|
|
|
|
sub _process_codon |
512
|
|
|
|
|
|
|
{ |
513
|
128
|
|
|
128
|
|
132
|
my $codon = shift; |
514
|
128
|
|
|
|
|
179
|
$codon =~ s/\s+//g; |
515
|
128
|
|
|
|
|
152
|
$codon =~ tr/uU/TT/; # U to T |
516
|
128
|
|
|
|
|
206
|
return uc($codon); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=head2 codon_degeneracy |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
Title : codon_degeneracy |
522
|
|
|
|
|
|
|
Usage : $hash = $self->codon_degeneracy; |
523
|
|
|
|
|
|
|
Function: group AAs and codons into codon degeneracy groups |
524
|
|
|
|
|
|
|
Returns : reference to a hash in which 1st level key is degeneracy |
525
|
|
|
|
|
|
|
(i.e., 1,2,6,etc), 2nd level key is amino acids for that degeneracy |
526
|
|
|
|
|
|
|
group, and 3rd level is reference of arrays containing coding codons |
527
|
|
|
|
|
|
|
for each amino acid. For example: |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
{ 2 => { D => [GAU, GAC], |
530
|
|
|
|
|
|
|
C => [UGU, UGC], |
531
|
|
|
|
|
|
|
... ... |
532
|
|
|
|
|
|
|
}, |
533
|
|
|
|
|
|
|
4 => { A => [GCU, GCC, GCA, GCG], |
534
|
|
|
|
|
|
|
... ... |
535
|
|
|
|
|
|
|
}, |
536
|
|
|
|
|
|
|
... ... ... |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Args : None |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
=cut |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
# group AAs and codons into redundancy groups |
544
|
|
|
|
|
|
|
sub codon_degeneracy |
545
|
|
|
|
|
|
|
{ |
546
|
53
|
|
|
53
|
1
|
1310
|
my $self = shift; |
547
|
|
|
|
|
|
|
|
548
|
53
|
100
|
|
|
|
455
|
return $self->{'_codon_deg'} if(exists $self->{'_codon_deg'}); |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
# otherwise construct it if its the first time |
551
|
2
|
|
|
|
|
5
|
my $codonToAA = $self->{'_codon_to_aa'}; |
552
|
2
|
|
|
|
|
4
|
my %aaToCodon; |
553
|
2
|
|
|
|
|
16
|
while(my ($codon, $AA) = each %$codonToAA) |
554
|
|
|
|
|
|
|
{ |
555
|
|
|
|
|
|
|
# ignore stop codons |
556
|
128
|
100
|
|
|
|
214
|
next if($self->is_stop_codon($codon)); |
557
|
122
|
|
|
|
|
113
|
push @{$aaToCodon{$AA}}, $codon; |
|
122
|
|
|
|
|
461
|
|
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
2
|
|
|
|
|
4
|
my %redundancy; |
561
|
2
|
|
|
|
|
15
|
while(my ($AA, $codons) = each %aaToCodon) |
562
|
|
|
|
|
|
|
{ |
563
|
40
|
|
|
|
|
47
|
my $red = $#$codons + 1; |
564
|
40
|
|
|
|
|
289
|
$redundancy{$red}->{$AA} = [sort @$codons]; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
2
|
|
|
|
|
9
|
$self->{'_codon_deg'} = \%redundancy; # store it first |
568
|
2
|
|
|
|
|
31
|
return \%redundancy; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head1 AUTHOR |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Zhenguo Zhang, C<< >> |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=head1 BUGS |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
579
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
580
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=head1 SUPPORT |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
perldoc Bio::CUA::CodonTable |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
You can also look for information at: |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=over 4 |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
L |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
L |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=item * CPAN Ratings |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
L |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=item * Search CPAN |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
L |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
=back |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
Copyright 2015 Zhenguo Zhang. |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
621
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
622
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
623
|
|
|
|
|
|
|
(at your option) any later version. |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
626
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
627
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
628
|
|
|
|
|
|
|
GNU General Public License for more details. |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
631
|
|
|
|
|
|
|
along with this program. If not, see L. |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=cut |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
1; # End of Bio::CUA::CodonTable |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
__END__ |