line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::CodonUsage::Table |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Richard Adams (richard.adams@ed.ac.uk) |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Richard Adams |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::CodonUsage::Table - for access to the Codon usage Database |
17
|
|
|
|
|
|
|
at http://www.kazusa.or.jp/codon. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use Bio::CodonUsage::Table; |
22
|
|
|
|
|
|
|
use Bio::DB::CUTG; |
23
|
|
|
|
|
|
|
use Bio::CodonUsage::IO; |
24
|
|
|
|
|
|
|
use Bio::Tools::SeqStats; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Get a codon usage table from web database |
27
|
|
|
|
|
|
|
my $cdtable = Bio::DB::CUTG->new(-sp => 'Mus musculus', |
28
|
|
|
|
|
|
|
-gc => 1); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Or from local file |
31
|
|
|
|
|
|
|
my $io = Bio::CodonUsage::IO->new(-file => "file"); |
32
|
|
|
|
|
|
|
my $cdtable = $io->next_data(); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# Or create your own from a Bio::PrimarySeq compliant object, |
35
|
|
|
|
|
|
|
# $codonstats is a ref to a hash of codon name /count key-value pairs |
36
|
|
|
|
|
|
|
my $codonstats = Bio::Tools::SeqStats->count_codons($Seq_objct); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# '-data' must be specified, '-species' and 'genetic_code' are optional |
39
|
|
|
|
|
|
|
my $CUT = Bio::CodonUsage::Table->new(-data => $codonstats, |
40
|
|
|
|
|
|
|
-species => 'Hsapiens_kinase'); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
print "leu frequency is ", $cdtable->aa_frequency('LEU'), "\n"; |
43
|
|
|
|
|
|
|
print "freq of ATG is ", $cdtable->codon_rel_frequency('ttc'), "\n"; |
44
|
|
|
|
|
|
|
print "abs freq of ATG is ", $cdtable->codon_abs_frequency('ATG'), "\n"; |
45
|
|
|
|
|
|
|
print "number of ATG codons is ", $cdtable->codon_count('ATG'), "\n"; |
46
|
|
|
|
|
|
|
print "GC content at position 1 is ", $cdtable->get_coding_gc('1'), "\n"; |
47
|
|
|
|
|
|
|
print "total CDSs for Mus musculus is ", $cdtable->cds_count(), "\n"; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 DESCRIPTION |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This class provides methods for accessing codon usage table data. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
All of the methods at present are simple look-ups of the table or are |
55
|
|
|
|
|
|
|
derived from simple calculations from the table. Future methods could |
56
|
|
|
|
|
|
|
include measuring the codon usage of a sequence , for example, or |
57
|
|
|
|
|
|
|
provide methods for examining codon usage in alignments. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 SEE ALSO |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
L, |
62
|
|
|
|
|
|
|
L, |
63
|
|
|
|
|
|
|
L, |
64
|
|
|
|
|
|
|
L |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 FEEDBACK |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head2 Mailing Lists |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
72
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
73
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
76
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head2 Support |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
I |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
85
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
86
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
87
|
|
|
|
|
|
|
with code and data examples if at all possible. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 Reporting Bugs |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
92
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
93
|
|
|
|
|
|
|
web: |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head1 AUTHORS |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Richard Adams, Richard.Adams@ed.ac.uk |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 APPENDIX |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
104
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=cut |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Let the code begin... |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
package Bio::CodonUsage::Table; |
112
|
2
|
|
|
2
|
|
8
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
54
|
|
113
|
2
|
|
|
2
|
|
13
|
use vars qw(%STRICTAA @AA); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
75
|
|
114
|
2
|
|
|
2
|
|
394
|
use Bio::SeqUtils; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
44
|
|
115
|
2
|
|
|
2
|
|
338
|
use Bio::Tools::CodonTable; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
37
|
|
116
|
|
|
|
|
|
|
|
117
|
2
|
|
|
2
|
|
8
|
use base qw(Bio::Root::Root); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
163
|
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
BEGIN{ |
120
|
2
|
|
|
2
|
|
8
|
@AA = qw(A C D E F G H I K L M N P Q R S T V W Y *); |
121
|
2
|
|
|
|
|
6
|
map {$STRICTAA{$_} = undef} @AA; |
|
42
|
|
|
|
|
3780
|
|
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 new |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Title : new |
127
|
|
|
|
|
|
|
Usage : my $cut = Bio::CodonUsage::Table->new(-data => $cut_hash_ref, |
128
|
|
|
|
|
|
|
-species => 'H.sapiens_kinase' |
129
|
|
|
|
|
|
|
-genetic_code =>1); |
130
|
|
|
|
|
|
|
Returns : a reference to a new Bio::CodonUsage::Table object |
131
|
|
|
|
|
|
|
Args : none or a reference to a hash of codon counts. This constructor is |
132
|
|
|
|
|
|
|
designed to be compatible with the output of |
133
|
|
|
|
|
|
|
Bio::Tools::SeqUtils::count_codons() |
134
|
|
|
|
|
|
|
Species and genetic code parameters can be entered here or via the |
135
|
|
|
|
|
|
|
species() and genetic_code() methods separately. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=cut |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub new { |
140
|
4
|
|
|
4
|
1
|
7
|
my ($class, @args) = @_; |
141
|
4
|
|
|
|
|
17
|
my $self= $class->SUPER::new(@args); |
142
|
4
|
100
|
|
|
|
9
|
if (@args) { |
143
|
1
|
|
|
|
|
5
|
$self->_rearrange([qw(DATA)], @args); |
144
|
1
|
|
|
|
|
1
|
shift @args; # get rid of key |
145
|
1
|
|
|
|
|
2
|
my $arg = shift @args; |
146
|
1
|
50
|
|
|
|
3
|
$self->throw("need a hash reference, not a [" . ref($arg). "] reference") if ref($arg) ne 'HASH'; |
147
|
|
|
|
|
|
|
### flags to detect argument type, can be either to start with ## |
148
|
1
|
|
|
|
|
2
|
my $is_codon_hash = 1; |
149
|
1
|
|
|
|
|
1
|
my $is_Aa_hash = 1; |
150
|
1
|
|
|
|
|
7
|
for my $k (keys %$arg) { |
151
|
|
|
|
|
|
|
## convert to UC |
152
|
63
|
|
|
|
|
149
|
$k =~ s/(\w+)/\U$1/; |
153
|
63
|
50
|
|
|
|
58
|
if (!exists($STRICTAA{$k}) ){ |
|
|
0
|
|
|
|
|
|
154
|
63
|
|
|
|
|
46
|
$is_Aa_hash = 0; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
elsif ($k =~ /[^ATCGatcg]/) { |
157
|
0
|
|
|
|
|
0
|
$is_codon_hash = 0; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
} |
160
|
1
|
50
|
33
|
|
|
11
|
if (!$is_codon_hash && !$is_Aa_hash) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
161
|
0
|
|
|
|
|
0
|
$self->throw(" invalid key values in CUT hash - must be unique aa or nucleotide identifiers"); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
elsif ($is_Aa_hash) { |
164
|
0
|
|
|
|
|
0
|
$self->_init_from_aa($arg); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
elsif($is_codon_hash) { |
167
|
1
|
|
|
|
|
3
|
$self->_init_from_cod($arg); |
168
|
|
|
|
|
|
|
} |
169
|
1
|
|
|
|
|
9
|
while (@args) { |
170
|
0
|
|
|
|
|
0
|
my $key = shift @args; |
171
|
0
|
|
|
|
|
0
|
$key =~ s/\-(\w+)/\L$1/; |
172
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
0
|
$self->$key(shift @args); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
4
|
|
|
|
|
9
|
return $self; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 all_aa_frequencies |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Title : all_aa_frequencies |
183
|
|
|
|
|
|
|
Usage : my $freq = $cdtable->all_aa_frequencies(); |
184
|
|
|
|
|
|
|
Returns : a reference to a hash where each key is an amino acid |
185
|
|
|
|
|
|
|
and each value is its frequency in all proteins in that |
186
|
|
|
|
|
|
|
species. |
187
|
|
|
|
|
|
|
Args : none |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=cut |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub all_aa_frequencies { |
192
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
193
|
0
|
|
|
|
|
0
|
my %aa_freqs =(); |
194
|
0
|
|
|
|
|
0
|
for my $aa (keys %STRICTAA) { |
195
|
0
|
|
|
|
|
0
|
my $freq = $self->aa_frequency($aa); |
196
|
0
|
|
|
|
|
0
|
$aa_freqs{$aa} = $freq; |
197
|
|
|
|
|
|
|
} |
198
|
0
|
|
|
|
|
0
|
return \%aa_freqs; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=head2 codon_abs_frequency |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
Title : codon_abs_frequency |
204
|
|
|
|
|
|
|
Usage : my $freq = $cdtable->codon_abs_frequency('CTG'); |
205
|
|
|
|
|
|
|
Purpose : To return the frequency of that codon as a percentage |
206
|
|
|
|
|
|
|
of all codons in the organism. |
207
|
|
|
|
|
|
|
Returns : a percentage frequency |
208
|
|
|
|
|
|
|
Args : a non-ambiguous codon string |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=cut |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub codon_abs_frequency { |
213
|
1
|
|
|
1
|
1
|
2
|
my ($self, $a) = @_; |
214
|
1
|
|
|
|
|
2
|
my $cod = uc $a; |
215
|
1
|
50
|
|
|
|
2
|
if ($self->_check_codon($cod)) { |
216
|
1
|
|
|
|
|
3
|
my $ctable = Bio::Tools::CodonTable->new; |
217
|
1
|
|
|
|
|
3
|
$ctable->id($self->genetic_code() ); |
218
|
1
|
|
|
|
|
3
|
my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)}; |
219
|
|
|
|
|
|
|
|
220
|
1
|
|
|
|
|
5
|
return $self->{'_table'}{$aa}{$cod}{'per1000'}/10 ; |
221
|
|
|
|
|
|
|
} |
222
|
0
|
|
|
|
|
0
|
else {return 0;} |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head2 codon_rel_frequency |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
Title : codon_rel_frequency |
228
|
|
|
|
|
|
|
Usage : my $freq = $cdtable->codon_rel_frequency('CTG'); |
229
|
|
|
|
|
|
|
Purpose : To return the frequency of that codon as a percentage |
230
|
|
|
|
|
|
|
of codons coding for the same amino acid. E.g., ATG and TGG |
231
|
|
|
|
|
|
|
would return 100 as those codons are unique. |
232
|
|
|
|
|
|
|
Returns : a percentage frequency |
233
|
|
|
|
|
|
|
Args : a non-ambiguous codon string |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=cut |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
sub codon_rel_frequency { |
239
|
65
|
|
|
65
|
1
|
64
|
my ($self, $a) = @_; |
240
|
65
|
|
|
|
|
57
|
my $cod = uc $a; |
241
|
65
|
50
|
|
|
|
66
|
if ($self->_check_codon($cod)) { |
242
|
65
|
|
|
|
|
91
|
my $ctable = Bio::Tools::CodonTable->new; |
243
|
65
|
|
|
|
|
73
|
$ctable->id($self->genetic_code () ); |
244
|
65
|
|
|
|
|
97
|
my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)}; |
245
|
65
|
|
|
|
|
146
|
return $self->{'_table'}{$aa}{$cod}{'rel_freq'}; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
else { |
248
|
0
|
|
|
|
|
0
|
return 0; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 probable_codons |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Title : probable_codons |
255
|
|
|
|
|
|
|
Usage : my $prob_codons = $cd_table->probable_codons(10); |
256
|
|
|
|
|
|
|
Purpose : to obtain a list of codons for the amino acid above a given |
257
|
|
|
|
|
|
|
threshold % relative frequency |
258
|
|
|
|
|
|
|
Returns : A reference to a hash where keys are 1 letter amino acid codes |
259
|
|
|
|
|
|
|
and values are references to arrays of codons whose frequency |
260
|
|
|
|
|
|
|
is above the threshold. |
261
|
|
|
|
|
|
|
Arguments: a minimum threshold frequency |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=cut |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub probable_codons { |
266
|
2
|
|
|
2
|
1
|
4
|
my ($self, $threshold) = @_; |
267
|
2
|
50
|
33
|
|
|
19
|
if (!$threshold || $threshold < 0 || $threshold > 100) { |
|
|
|
33
|
|
|
|
|
268
|
0
|
|
|
|
|
0
|
$self->throw(" I need a threshold percentage "); |
269
|
|
|
|
|
|
|
} |
270
|
2
|
|
|
|
|
3
|
my %return_hash; |
271
|
2
|
|
|
|
|
11
|
for my $a(keys %STRICTAA) { |
272
|
42
|
|
|
|
|
25
|
my @common_codons; |
273
|
42
|
|
|
|
|
29
|
my $aa =$Bio::SeqUtils::THREECODE{$a}; |
274
|
42
|
|
|
|
|
28
|
for my $codon (keys %{ $self->{'_table'}{$aa}}) { |
|
42
|
|
|
|
|
66
|
|
275
|
127
|
100
|
|
|
|
197
|
if ($self->{'_table'}{$aa}{$codon}{'rel_freq'} > $threshold/100){ |
276
|
95
|
|
|
|
|
89
|
push @common_codons, $codon; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
} |
279
|
42
|
|
|
|
|
62
|
$return_hash{$a} = \@common_codons; |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
## check to make sure that all codons are populated ## |
282
|
2
|
50
|
|
|
|
8
|
if (grep{scalar @{$return_hash{$_}} == 0} keys %return_hash) { |
|
42
|
|
|
|
|
21
|
|
|
42
|
|
|
|
|
52
|
|
283
|
0
|
|
|
|
|
0
|
$self->warn("Threshold is too high, some amino acids do not" . |
284
|
|
|
|
|
|
|
" have any codon above the threshold!"); |
285
|
|
|
|
|
|
|
} |
286
|
2
|
|
|
|
|
8
|
return \%return_hash; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 most_common_codons |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
Title : most_common_codons |
293
|
|
|
|
|
|
|
Usage : my $common_codons = $cd_table->most_common_codons(); |
294
|
|
|
|
|
|
|
Purpose : To obtain the most common codon for a given amino acid |
295
|
|
|
|
|
|
|
Returns : A reference to a hash where keys are 1 letter amino acid codes |
296
|
|
|
|
|
|
|
and the values are the single most common codons for those amino acids |
297
|
|
|
|
|
|
|
Arguments: None |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=cut |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
sub most_common_codons { |
302
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
303
|
|
|
|
|
|
|
|
304
|
1
|
|
|
|
|
1
|
my %return_hash; |
305
|
|
|
|
|
|
|
|
306
|
1
|
|
|
|
|
14
|
for my $a ( keys %STRICTAA ) { |
307
|
|
|
|
|
|
|
|
308
|
21
|
|
|
|
|
17
|
my $common_codon = ''; |
309
|
21
|
|
|
|
|
13
|
my $rel_freq = 0; |
310
|
21
|
|
|
|
|
20
|
my $aa = $Bio::SeqUtils::THREECODE{$a}; |
311
|
|
|
|
|
|
|
|
312
|
21
|
50
|
|
|
|
27
|
if ( ! defined $self->{'_table'}{$aa} ) { |
313
|
0
|
|
|
|
|
0
|
$self->warn("Amino acid $aa ($a) does not have any codons!"); |
314
|
0
|
|
|
|
|
0
|
next; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
21
|
|
|
|
|
16
|
for my $codon ( keys %{ $self->{'_table'}{$aa}} ) { |
|
21
|
|
|
|
|
169
|
|
318
|
64
|
100
|
|
|
|
113
|
if ($self->{'_table'}{$aa}{$codon}{'rel_freq'} > $rel_freq ){ |
319
|
37
|
|
|
|
|
22
|
$common_codon = $codon; |
320
|
37
|
|
|
|
|
35
|
$rel_freq = $self->{'_table'}{$aa}{$codon}{'rel_freq'}; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
} |
323
|
21
|
|
|
|
|
28
|
$return_hash{$a} = $common_codon; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
1
|
|
|
|
|
5
|
return \%return_hash; |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head2 codon_count |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Title : codon_count |
332
|
|
|
|
|
|
|
Usage : my $count = $cdtable->codon_count('CTG'); |
333
|
|
|
|
|
|
|
Purpose : To obtain the absolute number of the codons in the |
334
|
|
|
|
|
|
|
organism. |
335
|
|
|
|
|
|
|
Returns : an integer |
336
|
|
|
|
|
|
|
Args : a non-ambiguous codon string |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=cut |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
sub codon_count { |
341
|
65
|
|
|
65
|
1
|
49
|
my $self = shift; |
342
|
65
|
50
|
|
|
|
67
|
if (@_) { |
343
|
65
|
|
|
|
|
47
|
my $a = shift; |
344
|
65
|
|
|
|
|
55
|
my $cod = uc $a; |
345
|
65
|
50
|
|
|
|
71
|
if ($self->_check_codon($cod)) { |
346
|
65
|
|
|
|
|
86
|
my $ctable = Bio::Tools::CodonTable->new; |
347
|
65
|
|
|
|
|
73
|
$ctable->id($self->genetic_code()); |
348
|
65
|
|
|
|
|
95
|
my $aa =$Bio::SeqUtils::THREECODE {$ctable->translate($cod)}; |
349
|
65
|
|
|
|
|
155
|
return $self->{'_table'}{$aa}{$cod}{'abs_count'}; |
350
|
|
|
|
|
|
|
} |
351
|
0
|
|
|
|
|
0
|
else {return 0;} |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
else { |
354
|
0
|
|
|
|
|
0
|
$self->warn(" need to give a codon sequence as a parameter "); |
355
|
0
|
|
|
|
|
0
|
return 0; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=head2 get_coding_gc |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Title : get_coding_gc |
363
|
|
|
|
|
|
|
Usage : my $count = $cdtable->get_coding_gc(1); |
364
|
|
|
|
|
|
|
Purpose : To return the percentage GC composition for the organism at |
365
|
|
|
|
|
|
|
codon positions 1,2 or 3, or an average for all coding sequence |
366
|
|
|
|
|
|
|
('all'). |
367
|
|
|
|
|
|
|
Returns : a number (%-age GC content) or 0 if these fields are undefined |
368
|
|
|
|
|
|
|
Args : 1,2,3 or 'all'. |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=cut |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
sub get_coding_gc { |
373
|
5
|
|
|
5
|
1
|
5
|
my $self = shift; |
374
|
5
|
50
|
|
|
|
8
|
if (! @_) { |
375
|
0
|
|
|
|
|
0
|
$self->warn(" no parameters supplied must be a codon position (1,2,3) or 'all'"); |
376
|
0
|
|
|
|
|
0
|
return 0; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
else{ |
379
|
5
|
|
|
|
|
4
|
my $n = shift; |
380
|
|
|
|
|
|
|
##return request if valid ## |
381
|
5
|
50
|
|
|
|
8
|
if ( exists($self->{'_coding_gc'}{$n} ) ) { |
|
|
0
|
|
|
|
|
|
382
|
5
|
|
|
|
|
28
|
return sprintf("%.2f", $self->{'_coding_gc'}{$n}); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
##else return 'all' value if exists |
385
|
|
|
|
|
|
|
elsif (exists($self->{'_coding_gc'}{'all'} )) { |
386
|
0
|
|
|
|
|
0
|
$self->warn("coding gc doesn't have value for [$n], returning gc content for all CDSs"); |
387
|
0
|
|
|
|
|
0
|
return sprintf("%.2f", $self->{'_coding_gc'}{'all'}); |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
### else return 0, |
390
|
|
|
|
|
|
|
else { |
391
|
0
|
|
|
|
|
0
|
$self->warn("coding gc values aren't defined, returning 0"); |
392
|
0
|
|
|
|
|
0
|
return 0; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
}#end of outer else |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=head2 set_coding_gc |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Title : set_coding_gc |
402
|
|
|
|
|
|
|
Usage : my $count = $cdtable->set_coding_gc(-1=>55.78); |
403
|
|
|
|
|
|
|
Purpose : To set the percentage GC composition for the organism at |
404
|
|
|
|
|
|
|
codon positions 1,2 or 3, or an average for all coding sequence |
405
|
|
|
|
|
|
|
('all'). |
406
|
|
|
|
|
|
|
Returns : void |
407
|
|
|
|
|
|
|
Args : a hash where the key must be 1,2,3 or 'all' and the value the %age GC |
408
|
|
|
|
|
|
|
at that codon position.. |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=cut |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
sub set_coding_gc { |
413
|
4
|
|
|
4
|
1
|
4
|
my ($self, $key, $value) = @_; |
414
|
4
|
|
|
|
|
5
|
my @allowed = qw(1 2 3 all); |
415
|
4
|
|
|
|
|
5
|
$key =~ s/\-//; |
416
|
4
|
50
|
|
|
|
3
|
if (!grep {$key eq $_} @allowed ) { |
|
16
|
|
|
|
|
20
|
|
417
|
0
|
|
|
|
|
0
|
$self->warn ("invalid key! - must be one of [ ". (join " ", @allowed) . "]"); |
418
|
0
|
|
|
|
|
0
|
return; |
419
|
|
|
|
|
|
|
} |
420
|
4
|
|
|
|
|
8
|
$self->{'_coding_gc'}{$key} = $value; |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head2 species |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Title : species |
428
|
|
|
|
|
|
|
Usage : my $sp = $cut->species(); |
429
|
|
|
|
|
|
|
Purpose : Get/setter for species name of codon table |
430
|
|
|
|
|
|
|
Returns : Void or species name string |
431
|
|
|
|
|
|
|
Args : None or species name string |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=cut |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
sub species { |
436
|
4
|
|
|
4
|
1
|
6
|
my $self = shift; |
437
|
4
|
100
|
|
|
|
7
|
if (@_ ){ |
438
|
3
|
|
|
|
|
19
|
$self->{'_species'} = shift; |
439
|
|
|
|
|
|
|
} |
440
|
4
|
|
50
|
|
|
15
|
return $self->{'_species'} || "unknown"; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=head2 genetic_code |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
Title : genetic_code |
446
|
|
|
|
|
|
|
Usage : my $sp = $cut->genetic_code(); |
447
|
|
|
|
|
|
|
Purpose : Get/setter for genetic_code name of codon table |
448
|
|
|
|
|
|
|
Returns : Void or genetic_code id, 1 by default |
449
|
|
|
|
|
|
|
Args : None or genetic_code id, 1 by default if invalid argument. |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
=cut |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
sub genetic_code { |
454
|
132
|
|
|
132
|
1
|
100
|
my $self = shift; |
455
|
132
|
50
|
|
|
|
164
|
if (@_ ){ |
456
|
0
|
|
|
|
|
0
|
my $val = shift; |
457
|
0
|
0
|
0
|
|
|
0
|
if ($val < 0 || $val >16 || $val =~ /[^\d]/ |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
458
|
|
|
|
|
|
|
|| $val ==7 || $val ==8) { |
459
|
0
|
|
|
|
|
0
|
$self->warn ("invalid genetic code - must be 1-16 but not 7 or 8,setting to default [1]"); |
460
|
0
|
|
|
|
|
0
|
$self->{'_genetic_code'} = 1; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
else { |
463
|
0
|
|
|
|
|
0
|
$self->{'_genetic_code'} = shift; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
} |
466
|
132
|
|
100
|
|
|
320
|
return $self->{'_genetic_code'} || 1; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=head2 cds_count |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Title : cds_count |
472
|
|
|
|
|
|
|
Usage : my $count = $cdtable->cds_count(); |
473
|
|
|
|
|
|
|
Purpose : To retrieve the total number of CDSs used to generate the Codon Table |
474
|
|
|
|
|
|
|
for that organism. |
475
|
|
|
|
|
|
|
Returns : an integer |
476
|
|
|
|
|
|
|
Args : none (if retrieving the value) or an integer( if setting ). |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub cds_count { |
481
|
4
|
|
|
4
|
1
|
5
|
my $self= shift; |
482
|
4
|
100
|
|
|
|
8
|
if (@_) { |
483
|
3
|
|
|
|
|
4
|
my $val = shift; |
484
|
3
|
50
|
|
|
|
9
|
if ($val < 0) { |
485
|
0
|
|
|
|
|
0
|
$self->warn("can't have negative count initializing to 1"); |
486
|
0
|
|
|
|
|
0
|
$self->{'_cds_count'} = 0.00; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
else{ |
489
|
3
|
|
|
|
|
4
|
$self->{'_cds_count'} = $val; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
$self->warn("cds_count value is undefined, returning 0") |
493
|
4
|
50
|
|
|
|
8
|
if !exists($self->{'_cds_count'}); |
494
|
|
|
|
|
|
|
|
495
|
4
|
|
50
|
|
|
11
|
return $self->{'_cds_count'} || 0.00; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 aa_frequency |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Title : aa_frequency |
501
|
|
|
|
|
|
|
Usage : my $freq = $cdtable->aa_frequency('Leu'); |
502
|
|
|
|
|
|
|
Purpose : To retrieve the frequency of an amino acid in the organism |
503
|
|
|
|
|
|
|
Returns : a percentage |
504
|
|
|
|
|
|
|
Args : a 1 letter or 3 letter string representing the amino acid |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=cut |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
sub aa_frequency { |
511
|
2
|
|
|
2
|
1
|
3
|
my ($self, $a) = @_; |
512
|
|
|
|
|
|
|
## process args ## |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
## deal with cases ## |
515
|
2
|
|
|
|
|
3
|
my $aa = lc $a; |
516
|
2
|
|
|
|
|
13
|
$aa =~ s/^(\w)/\U$1/; |
517
|
2
|
50
|
33
|
|
|
8
|
if (!exists($STRICTAA{$aa}) && !exists($Bio::SeqUtils::ONECODE{$aa}) ) { |
518
|
0
|
|
|
|
|
0
|
$self->warn("Invalid amino acid! must be a unique 1 letter or 3 letter identifier"); |
519
|
0
|
|
|
|
|
0
|
return; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
#translate to 3 letter code for Ctable # |
522
|
2
|
|
33
|
|
|
7
|
my $aa3 = $Bio::SeqUtils::THREECODE{$aa} || $aa; |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
## return % of all amino acids in organism ## |
525
|
2
|
|
|
|
|
2
|
my $freq = 0; |
526
|
2
|
|
|
|
|
2
|
map {$freq += $self->{'_table'}{$aa3}{$_}{'per1000'} } keys %{$self->{'_table'}{$aa3}}; |
|
12
|
|
|
|
|
20
|
|
|
2
|
|
|
|
|
8
|
|
527
|
2
|
|
|
|
|
24
|
return sprintf("%.2f", $freq/10); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 common_codon |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Title : common_codon |
533
|
|
|
|
|
|
|
Usage : my $freq = $cdtable->common_codon('Leu'); |
534
|
|
|
|
|
|
|
Purpose : To retrieve the frequency of the most common codon of that aa |
535
|
|
|
|
|
|
|
Returns : a percentage |
536
|
|
|
|
|
|
|
Args : a 1 letter or 3 letter string representing the amino acid |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=cut |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
sub common_codon{ |
541
|
|
|
|
|
|
|
|
542
|
0
|
|
|
0
|
1
|
0
|
my ($self, $a) = @_; |
543
|
0
|
|
|
|
|
0
|
my $aa = lc $a; |
544
|
0
|
|
|
|
|
0
|
$aa =~ s/^(\w)/\U$1/; |
545
|
|
|
|
|
|
|
|
546
|
0
|
0
|
|
|
|
0
|
if ($self->_check_aa($aa)) { |
547
|
0
|
|
|
|
|
0
|
my $aa3 = $Bio::SeqUtils::THREECODE{$aa} ; |
548
|
0
|
|
0
|
|
|
0
|
$aa3 ||= $aa; |
549
|
0
|
|
|
|
|
0
|
my $max = 0; |
550
|
0
|
|
|
|
|
0
|
for my $cod (keys %{$self->{'_table'}{$aa3}}) { |
|
0
|
|
|
|
|
0
|
|
551
|
|
|
|
|
|
|
$max = ($self->{'_table'}{$aa3}{$cod}{'rel_freq'} > $max) ? |
552
|
0
|
0
|
|
|
|
0
|
$self->{'_table'}{$aa3}{$cod}{'rel_freq'}:$max; |
553
|
|
|
|
|
|
|
} |
554
|
0
|
|
|
|
|
0
|
return $max; |
555
|
0
|
|
|
|
|
0
|
}else {return 0;} |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=head2 rare_codon |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
Title : rare_codon |
561
|
|
|
|
|
|
|
Usage : my $freq = $cdtable->rare_codon('Leu'); |
562
|
|
|
|
|
|
|
Purpose : To retrieve the frequency of the least common codon of that aa |
563
|
|
|
|
|
|
|
Returns : a percentage |
564
|
|
|
|
|
|
|
Args : a 1 letter or 3 letter string representing the amino acid |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=cut |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
sub rare_codon { |
569
|
0
|
|
|
0
|
1
|
0
|
my ($self, $a) = @_; |
570
|
0
|
|
|
|
|
0
|
my $aa = lc $a; |
571
|
0
|
|
|
|
|
0
|
$aa =~ s/^(\w)/\U$1/; |
572
|
0
|
0
|
|
|
|
0
|
if ($self->_check_aa($aa)) { |
573
|
0
|
|
|
|
|
0
|
my $aa3 = $Bio::SeqUtils::THREECODE{$aa}; |
574
|
0
|
|
0
|
|
|
0
|
$aa3 ||= $aa; |
575
|
0
|
|
|
|
|
0
|
my $min = 1; |
576
|
0
|
|
|
|
|
0
|
for my $cod (keys %{$self->{'_table'}{$aa3}}) { |
|
0
|
|
|
|
|
0
|
|
577
|
|
|
|
|
|
|
$min = ($self->{'_table'}{$aa3}{$cod}{'rel_freq'} < $min) ? |
578
|
0
|
0
|
|
|
|
0
|
$self->{'_table'}{$aa3}{$cod}{'rel_freq'}:$min; |
579
|
|
|
|
|
|
|
} |
580
|
0
|
|
|
|
|
0
|
return $min; |
581
|
0
|
|
|
|
|
0
|
}else {return 0;} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
## internal sub that checks a codon is correct format |
588
|
|
|
|
|
|
|
sub _check_aa { |
589
|
0
|
|
|
0
|
|
0
|
my ($self, $aa ) = @_; |
590
|
0
|
0
|
0
|
|
|
0
|
if (!exists($STRICTAA{$aa}) && !exists($Bio::SeqUtils::ONECODE{$aa}) ) { |
591
|
0
|
|
|
|
|
0
|
$self->warn("Invalid amino acid! must be a unique 1 letter or 3 letter identifier"); |
592
|
0
|
|
|
|
|
0
|
return 0; |
593
|
0
|
|
|
|
|
0
|
}else {return 1;} |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
sub _check_codon { |
600
|
131
|
|
|
131
|
|
109
|
my ($self, $cod) = @_; |
601
|
131
|
50
|
33
|
|
|
488
|
if ($cod =~ /[^ATCG]/ || $cod !~ /\w\w\w/) { |
602
|
0
|
|
|
|
|
0
|
$self->warn(" impossible codon - must be 3 letters and just containing ATCG"); |
603
|
0
|
|
|
|
|
0
|
return 0; |
604
|
|
|
|
|
|
|
} |
605
|
131
|
|
|
|
|
186
|
else {return 1;} |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
sub _init_from_cod { |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
## make hash based on aa and then send to _init_from_aa |
610
|
1
|
|
|
1
|
|
2
|
my ($self, $ref) = @_; |
611
|
1
|
|
|
|
|
5
|
my $ct = Bio::Tools::CodonTable->new(); |
612
|
1
|
|
|
|
|
1
|
my %aa_hash; |
613
|
1
|
|
|
|
|
6
|
for my $codon(keys %$ref ) { |
614
|
63
|
|
|
|
|
72
|
my $aa = $ct->translate($codon); |
615
|
63
|
|
|
|
|
87
|
$aa_hash{$aa}{$codon} = $ref->{$codon}; |
616
|
|
|
|
|
|
|
} |
617
|
1
|
|
|
|
|
5
|
$self->_init_from_aa(\%aa_hash); |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
sub _init_from_aa { |
622
|
1
|
|
|
1
|
|
1
|
my ($self, $ref) = @_; |
623
|
|
|
|
|
|
|
## abs counts and count codons |
624
|
1
|
|
|
|
|
2
|
my $total_codons = 0; |
625
|
1
|
|
|
|
|
1
|
my %threeletter; |
626
|
1
|
|
|
|
|
4
|
map{$threeletter{$Bio::SeqUtils::THREECODE{$_}} = $ref->{$_} } keys %$ref; |
|
21
|
|
|
|
|
22
|
|
627
|
1
|
|
|
|
|
2
|
$ref = \%threeletter; |
628
|
1
|
|
|
|
|
4
|
for my $aa (keys %$ref) { |
629
|
21
|
|
|
|
|
12
|
for my $cod(keys %{$ref->{$aa}} ) { |
|
21
|
|
|
|
|
30
|
|
630
|
63
|
|
|
|
|
78
|
$self->{'_table'}{$aa}{$cod}{'abs_count'} = $ref->{$aa}{$cod}; |
631
|
63
|
|
|
|
|
53
|
$total_codons += $ref->{$aa}{$cod}; |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
## now calculate abs codon frequencies |
636
|
1
|
|
|
|
|
4
|
for my $aa (keys %$ref) { |
637
|
21
|
|
|
|
|
16
|
for my $cod(keys %{$ref->{$aa}} ) { |
|
21
|
|
|
|
|
27
|
|
638
|
|
|
|
|
|
|
$self->{'_table'}{$aa}{$cod}{'per1000'} = |
639
|
63
|
|
|
|
|
165
|
sprintf("%.2f",$ref->{$aa}{$cod} /$total_codons * 1000) ; |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
## now calculate rel codon_frequencies |
643
|
1
|
|
|
|
|
3
|
for my $aa (keys %$ref) { |
644
|
21
|
|
|
|
|
14
|
my $aa_freq = 0; |
645
|
63
|
|
|
|
|
48
|
map{$aa_freq += $ref->{$aa}{$_} } |
646
|
21
|
|
|
|
|
14
|
keys %{$ref->{$aa}}; |
|
21
|
|
|
|
|
25
|
|
647
|
21
|
|
|
|
|
16
|
for my $cod(keys %{$ref->{$aa}} ) { |
|
21
|
|
|
|
|
26
|
|
648
|
|
|
|
|
|
|
$self->{'_table'}{$aa}{$cod}{'rel_freq'}= |
649
|
63
|
|
|
|
|
153
|
sprintf("%.2f",$ref->{$aa}{$cod}/ $aa_freq ); |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
## now calculate gc fields |
655
|
1
|
|
|
|
|
2
|
my %GC; |
656
|
1
|
|
|
|
|
3
|
for my $aa (keys %$ref) { |
657
|
21
|
|
|
|
|
16
|
for my $cod(keys %{$ref->{$aa}} ) { |
|
21
|
|
|
|
|
24
|
|
658
|
63
|
|
|
|
|
49
|
for my $index (qw(1 2 3) ) { |
659
|
189
|
100
|
|
|
|
340
|
if (substr ($cod, $index -1, 1) =~ /g|c/oi) { |
660
|
93
|
|
|
|
|
92
|
$GC{$index} += $ref->{$aa}{$cod}; |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
} |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
} |
665
|
1
|
|
|
|
|
2
|
my $tot = 0; |
666
|
1
|
|
|
|
|
2
|
map{$tot += $GC{$_}} qw(1 2 3); |
|
3
|
|
|
|
|
3
|
|
667
|
1
|
|
|
|
|
4
|
$self->set_coding_gc('all', $tot/(3 *$total_codons) * 100); |
668
|
1
|
|
|
|
|
1
|
map{$self->set_coding_gc($_,$GC{$_}/$total_codons * 100)} qw(1 2 3); |
|
3
|
|
|
|
|
6
|
|
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
## |
671
|
1
|
|
|
|
|
5
|
return $self; |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
sub _gb_db { |
675
|
1
|
|
|
1
|
|
1
|
my $self = shift; |
676
|
1
|
|
50
|
|
|
6
|
return $self->{'_gd_db'} || "unknown"; |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
1; |