line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::CUA::Summarizer; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
29792
|
use 5.006; |
|
4
|
|
|
|
|
10
|
|
4
|
4
|
|
|
4
|
|
22
|
use strict; |
|
4
|
|
|
|
|
4
|
|
|
4
|
|
|
|
|
93
|
|
5
|
4
|
|
|
4
|
|
18
|
use warnings; |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
142
|
|
6
|
4
|
|
|
4
|
|
1044
|
use parent qw/Bio::CUA/; |
|
4
|
|
|
|
|
608
|
|
|
4
|
|
|
|
|
31
|
|
7
|
4
|
|
|
4
|
|
2138
|
use Bio::CUA::CodonTable; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
448
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $codonPkg = 'Bio::CUA::CodonTable'; |
10
|
|
|
|
|
|
|
my $pkg = __PACKAGE__; |
11
|
|
|
|
|
|
|
my @bases = qw/A T C G/; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# determine which class is used for sequence processing |
14
|
|
|
|
|
|
|
my $seq_io_pkg; |
15
|
|
|
|
|
|
|
BEGIN{ |
16
|
|
|
|
|
|
|
# set version which might be checked during compilation |
17
|
|
|
|
|
|
|
#our $VERSION = 1.01; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# determine sequence processing module |
20
|
4
|
|
|
4
|
|
8
|
eval { require Bio::SeqIO; }; |
|
4
|
|
|
|
|
1016
|
|
21
|
4
|
50
|
|
|
|
24
|
if($@) # Bio::SeqIO is not available |
22
|
|
|
|
|
|
|
{ |
23
|
4
|
|
|
|
|
6
|
$seq_io_pkg = 'Bio::CUA::SeqIO'; |
24
|
4
|
|
|
|
|
2674
|
require Bio::CUA::SeqIO; |
25
|
|
|
|
|
|
|
}else # otherwise use Bio::SeqIO |
26
|
|
|
|
|
|
|
{ |
27
|
0
|
|
|
|
|
0
|
$seq_io_pkg = 'Bio::SeqIO'; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=pod |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 NAME |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
Bio::CUA::Summarizer - a class to summarize features of sequences. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 SYNOPSIS |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
This class provides convenience for its child classes with methods |
40
|
|
|
|
|
|
|
summarizing sequence features, such |
41
|
|
|
|
|
|
|
as counting and listing amino acids and codons, retrieving amino acids |
42
|
|
|
|
|
|
|
with certain degree degeneracy in a genetic table. Refer to the |
43
|
|
|
|
|
|
|
L section for more details. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
use Bio::CUA::Summarizer; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $summarizer = Bio::CUA::Summarizer->new( |
48
|
|
|
|
|
|
|
codon_table => 1 ); # using stardard genetic code |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# get codons in a sequence file |
51
|
|
|
|
|
|
|
my $codonList = $summarizer->tabulate_codons('seqs.fa'); |
52
|
|
|
|
|
|
|
# get the codon table object of this summarizer |
53
|
|
|
|
|
|
|
my $table = $summarizer->codon_table; |
54
|
|
|
|
|
|
|
# get all sense codons in the genetic codon table |
55
|
|
|
|
|
|
|
my @senseCodons = $summarizer->all_sense_codons; |
56
|
|
|
|
|
|
|
# get codons encoding an amino acid |
57
|
|
|
|
|
|
|
my @codons = $summarizer->codons_of_AA('Ser'); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 new |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Title : new |
65
|
|
|
|
|
|
|
Usage : $obj=Bio::CUA::Summarizer->new(%args); |
66
|
|
|
|
|
|
|
Function: create an object which can be used to summarizing sequence |
67
|
|
|
|
|
|
|
features. |
68
|
|
|
|
|
|
|
Returns : an object of this or child class |
69
|
|
|
|
|
|
|
Args : a hash with a key 'codon_table', acceptable values are |
70
|
|
|
|
|
|
|
codon_table => id of genetic codon table # 1 |
71
|
|
|
|
|
|
|
codon_table => Bio::CUA::CodonTable object # 2 |
72
|
|
|
|
|
|
|
codon_table => 'map-file' # 3 |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=over 3 |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=item 1 |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
id of genetic codon table can be found from L
|
79
|
|
|
|
|
|
|
codes|http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t>. |
80
|
|
|
|
|
|
|
A valid id is an integer. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=item 2 |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
an object of L. One can directly provide an |
85
|
|
|
|
|
|
|
object to the method. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item 3 |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
If genetic code in analyzed sequences is not included in NCBI, one can |
90
|
|
|
|
|
|
|
also provide its own genetic code in a map-file, in the format of |
91
|
|
|
|
|
|
|
codon1AA1 |
92
|
|
|
|
|
|
|
codon2AA2, |
93
|
|
|
|
|
|
|
... ... .... |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=back |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Note all the analyzed sequences will use this provided genetic codon |
98
|
|
|
|
|
|
|
table to map between amino acids and codons. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=cut |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub new |
103
|
|
|
|
|
|
|
{ |
104
|
3
|
|
|
3
|
1
|
772
|
my ($caller, @args) = @_; |
105
|
3
|
|
|
|
|
26
|
my $self = $caller->SUPER::new(@args); |
106
|
|
|
|
|
|
|
|
107
|
3
|
|
|
|
|
13
|
my $hashRef = $self->_array_to_hash(\@args); |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# only process its own argument |
110
|
3
|
|
|
|
|
4
|
my $codonTable; |
111
|
3
|
|
|
|
|
23
|
while(my ($tag, $val) = each %$hashRef) |
112
|
|
|
|
|
|
|
{ |
113
|
3
|
50
|
|
|
|
11
|
next unless($tag eq 'codon_table'); |
114
|
3
|
50
|
|
|
|
25
|
if(ref($val)) |
|
|
50
|
|
|
|
|
|
115
|
|
|
|
|
|
|
{ |
116
|
0
|
0
|
|
|
|
0
|
$self->throw("$val is not an object of $codonPkg") |
117
|
|
|
|
|
|
|
unless($val->isa($codonPkg)); |
118
|
0
|
|
|
|
|
0
|
$codonTable = $val; |
119
|
|
|
|
|
|
|
}elsif($val =~ /^\d+$/) # genetic code id |
120
|
|
|
|
|
|
|
{ |
121
|
3
|
50
|
|
|
|
28
|
$codonTable = $codonPkg->new(-id => $val) or |
122
|
|
|
|
|
|
|
$self->throw("Invalid genetic code ID '$val'"); |
123
|
|
|
|
|
|
|
}else # a map file |
124
|
|
|
|
|
|
|
{ |
125
|
0
|
0
|
|
|
|
0
|
$codonTable = $codonPkg->new(-map_file => $val) or |
126
|
|
|
|
|
|
|
$self->throw("Can not construct codon table with file '$val'"); |
127
|
|
|
|
|
|
|
} |
128
|
3
|
|
|
|
|
9
|
last; |
129
|
|
|
|
|
|
|
} |
130
|
3
|
50
|
0
|
|
|
13
|
$self->warn("option 'codon_table' is missing in the method", |
131
|
|
|
|
|
|
|
"'new' of $pkg") and return undef unless($codonTable); |
132
|
|
|
|
|
|
|
# store the result |
133
|
3
|
|
|
|
|
26
|
$self->{'_codon_table'} = $codonTable; |
134
|
|
|
|
|
|
|
|
135
|
3
|
|
|
|
|
17
|
return $self; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 codon_table |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Title : codon_table |
141
|
|
|
|
|
|
|
Usage : $table = $self->codon_table; |
142
|
|
|
|
|
|
|
Function: get associated codon table of this object |
143
|
|
|
|
|
|
|
Returns : an object of L |
144
|
|
|
|
|
|
|
Args : None |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=cut |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub codon_table |
149
|
|
|
|
|
|
|
{ |
150
|
249
|
50
|
|
249
|
1
|
1135
|
my $table = $_[0]->{'_codon_table'} or |
151
|
|
|
|
|
|
|
$_[0]->warn("No codon table associated with this object $_[0]"); |
152
|
249
|
|
|
|
|
499
|
return $table; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 bases |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
get the 4 nucleotides A,T,C,G always in this order, to keep |
158
|
|
|
|
|
|
|
consistency among different classes |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=cut |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# get all the nucleotide bases in a certain order |
163
|
|
|
|
|
|
|
sub bases |
164
|
|
|
|
|
|
|
{ |
165
|
1
|
50
|
|
1
|
1
|
481
|
return wantarray? @bases : \@bases; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# sequence-level functions |
169
|
|
|
|
|
|
|
=head2 get_codon_list |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Title : get_codon_list |
172
|
|
|
|
|
|
|
Usage : $codonList = $self->get_codon_list($input) |
173
|
|
|
|
|
|
|
Function: get codons and their counts in input |
174
|
|
|
|
|
|
|
Returns : reference to a hash containing codons as keys and counts |
175
|
|
|
|
|
|
|
as values. |
176
|
|
|
|
|
|
|
Args : seq string, seq object, seq file, or another codon list |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# the main interface to preprocess input to most methods |
181
|
|
|
|
|
|
|
# return a codon list with its count |
182
|
|
|
|
|
|
|
# acceptable parameters: seq string, seq object, seq file, codon list |
183
|
|
|
|
|
|
|
sub get_codon_list |
184
|
|
|
|
|
|
|
{ |
185
|
57
|
|
|
57
|
0
|
67
|
my ($self, $input) = @_; |
186
|
|
|
|
|
|
|
|
187
|
57
|
|
|
|
|
83
|
my $ref = ref($input); |
188
|
57
|
100
|
|
|
|
100
|
unless($ref) # a scalar variable |
189
|
|
|
|
|
|
|
{ |
190
|
|
|
|
|
|
|
# a sequence string |
191
|
5
|
50
|
33
|
|
|
31
|
if($input =~ /^[ATGCUN]+$/ and (! -f $input)) |
192
|
|
|
|
|
|
|
{ |
193
|
0
|
|
|
|
|
0
|
return $self->_catalog_codons($input); |
194
|
|
|
|
|
|
|
}else # a sequence file |
195
|
|
|
|
|
|
|
{ |
196
|
5
|
|
|
|
|
20
|
return $self->tabulate_codons($input); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
52
|
50
|
|
|
|
88
|
if($ref eq 'HASH') # codon list |
201
|
|
|
|
|
|
|
{ |
202
|
0
|
|
|
|
|
0
|
return $input; |
203
|
|
|
|
|
|
|
}else # an seq object |
204
|
|
|
|
|
|
|
{ |
205
|
52
|
|
|
|
|
89
|
return $self->_catalog_codons($input); |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 tabulate_codons |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Title : tabulate_codons |
212
|
|
|
|
|
|
|
Usage : $codonList = $self->tabulate_codons($input,[$each]); |
213
|
|
|
|
|
|
|
Function: count codons in the input sequences |
214
|
|
|
|
|
|
|
Returns : reference to a hash in which codon is the key and counts as |
215
|
|
|
|
|
|
|
values. If $each is true, then each sequence is separately processed |
216
|
|
|
|
|
|
|
and stored in a larger hash. The count of a codon in a sequence can |
217
|
|
|
|
|
|
|
be retrieved like this: $codonList->{'seqId'}->{'codon'}. |
218
|
|
|
|
|
|
|
Args : accepted arguments are as follows: |
219
|
|
|
|
|
|
|
I = name of a file containing fasta sequences |
220
|
|
|
|
|
|
|
I = optional, if TRUE (i.e., non-zero values), each sequence is |
221
|
|
|
|
|
|
|
separately processed. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
This is a companionate method of L for situations |
224
|
|
|
|
|
|
|
when one want to get codon counts for each sequence separately. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=cut |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub tabulate_codons |
229
|
|
|
|
|
|
|
{ |
230
|
6
|
|
|
6
|
1
|
13
|
my ($self, $input, $each) = @_; |
231
|
|
|
|
|
|
|
|
232
|
6
|
50
|
|
|
|
33
|
my $seqIO = $self->_get_seq_io($input) or return; |
233
|
6
|
|
|
|
|
11
|
my %list; |
234
|
|
|
|
|
|
|
|
235
|
6
|
50
|
|
|
|
16
|
if($each) |
236
|
|
|
|
|
|
|
{ |
237
|
0
|
|
|
|
|
0
|
while(my $seq = $seqIO->next_seq()) |
238
|
|
|
|
|
|
|
{ |
239
|
0
|
|
|
|
|
0
|
my $codons = $self->_catalog_codons($seq->seq); |
240
|
0
|
|
|
|
|
0
|
$list{$seq->id} = $codons; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
}else # otherwise process together |
244
|
|
|
|
|
|
|
{ |
245
|
6
|
|
|
|
|
31
|
while(my $seq = $seqIO->next_seq()) |
246
|
|
|
|
|
|
|
{ |
247
|
78
|
|
|
|
|
188
|
my $codons = $self->_catalog_codons($seq->seq); |
248
|
|
|
|
|
|
|
# merge all codons together |
249
|
78
|
|
|
|
|
413
|
while(my ($c, $v) = each %$codons) |
250
|
|
|
|
|
|
|
{ |
251
|
4758
|
|
|
|
|
9979
|
$list{$c} += $v; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
6
|
50
|
|
|
|
33
|
return undef unless(keys %list); |
257
|
6
|
|
|
|
|
41
|
return \%list; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 tabulate_AAs |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : tabulate_AAs |
263
|
|
|
|
|
|
|
Usage : $AAList = $self->tabulate_AAs($input,[$each]); |
264
|
|
|
|
|
|
|
Function: similar to L, but for counting amino acids |
265
|
|
|
|
|
|
|
Returns : the same as L, but for amino acids |
266
|
|
|
|
|
|
|
Args : refer to L. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub tabulate_AAs |
271
|
|
|
|
|
|
|
{ |
272
|
0
|
|
|
0
|
1
|
0
|
my ($self, $input, $each) = @_; |
273
|
|
|
|
|
|
|
|
274
|
0
|
0
|
|
|
|
0
|
my $codonList = $self->tabulate_codons($input) or return; |
275
|
|
|
|
|
|
|
|
276
|
0
|
|
|
|
|
0
|
my %AAs; |
277
|
0
|
0
|
|
|
|
0
|
if($each) |
278
|
|
|
|
|
|
|
{ |
279
|
0
|
|
|
|
|
0
|
while(my ($id, $hashRef) = each %$codonList) |
280
|
|
|
|
|
|
|
{ |
281
|
0
|
|
|
|
|
0
|
while(my ($codon, $count) = each %$codonList) |
282
|
|
|
|
|
|
|
{ |
283
|
0
|
0
|
|
|
|
0
|
my $AA = $self->_codon_to_aa($codon) or next; |
284
|
0
|
|
|
|
|
0
|
$AAs{$id}->{$AA} += $count; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
}else |
288
|
|
|
|
|
|
|
{ |
289
|
0
|
|
|
|
|
0
|
while(my ($codon, $count) = each %$codonList) |
290
|
|
|
|
|
|
|
{ |
291
|
0
|
0
|
|
|
|
0
|
my $AA = $self->_codon_to_aa($codon) or next; |
292
|
0
|
|
|
|
|
0
|
$AAs{$AA} += $count; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
0
|
return \%AAs; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# get the sequence IO and return it |
300
|
|
|
|
|
|
|
sub _get_seq_io |
301
|
|
|
|
|
|
|
{ |
302
|
6
|
|
|
6
|
|
12
|
my ($self, $input) = @_; |
303
|
|
|
|
|
|
|
|
304
|
6
|
50
|
0
|
|
|
19
|
$self->warn("input fasta file is needed to obtain seq IO") and return |
305
|
|
|
|
|
|
|
unless($input); |
306
|
|
|
|
|
|
|
# at present, use Bio::SeqIO |
307
|
6
|
|
|
|
|
79
|
my $io = $seq_io_pkg->new(-file => $input, -format => 'fasta'); |
308
|
6
|
|
|
|
|
29
|
return $io; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
# codon table functions |
313
|
|
|
|
|
|
|
=head2 all_sense_codons |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
get all sense codons in the genetic codon table of this object |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=cut |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub all_sense_codons |
320
|
|
|
|
|
|
|
{ |
321
|
2
|
|
|
2
|
0
|
5
|
my ($self) = @_; |
322
|
|
|
|
|
|
|
|
323
|
2
|
50
|
|
|
|
3
|
my $codonTable = $self->codon_table() or return; |
324
|
|
|
|
|
|
|
|
325
|
2
|
|
|
|
|
9
|
return $codonTable->all_sense_codons; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head2 all_AAs_in_table |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
get all the amino acids coded in the genetic code table of this object |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=cut |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub all_AAs_in_table |
335
|
|
|
|
|
|
|
{ |
336
|
8
|
|
|
8
|
1
|
13
|
my ($self) = @_; |
337
|
|
|
|
|
|
|
|
338
|
8
|
50
|
|
|
|
27
|
my $codonTable = $self->codon_table() or return; |
339
|
|
|
|
|
|
|
|
340
|
8
|
|
|
|
|
35
|
$codonTable->all_amino_acids(); |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=head2 codons_of_AA |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
get codons coding the given amino acid, I, |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
my @codons = $self->codons_of_AA('Ser'); |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=cut |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub codons_of_AA |
352
|
|
|
|
|
|
|
{ |
353
|
160
|
|
|
160
|
1
|
149
|
my ($self, $AA) = @_; |
354
|
|
|
|
|
|
|
|
355
|
160
|
50
|
|
|
|
197
|
my $codonTable = $self->codon_table() or return; |
356
|
|
|
|
|
|
|
|
357
|
160
|
|
|
|
|
292
|
return $codonTable->codons_of_AA($AA); |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=head2 aa_degeneracy_classes |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Title : aa_degeneracy_classes |
363
|
|
|
|
|
|
|
Usage : $hashRef = $self->aa_degeneracy_classes; |
364
|
|
|
|
|
|
|
Function: get amino acid degeneracy classes according to the |
365
|
|
|
|
|
|
|
associated genetic code table |
366
|
|
|
|
|
|
|
Returns : a hash reference in which first level key is degeneracy |
367
|
|
|
|
|
|
|
degrees such as 1,2,3,4,6, second level is amino acid, the value is |
368
|
|
|
|
|
|
|
reference to the corresponding codon array. For example: |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
{ 2 => { D => [GAU, GAC], |
371
|
|
|
|
|
|
|
C => [UGU, UGC], |
372
|
|
|
|
|
|
|
... ... |
373
|
|
|
|
|
|
|
}, |
374
|
|
|
|
|
|
|
4 => { A => [GCU, GCC, GCA, GCG], |
375
|
|
|
|
|
|
|
... ... |
376
|
|
|
|
|
|
|
}, |
377
|
|
|
|
|
|
|
... ... ... |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
Args : None |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=cut |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
sub aa_degeneracy_classes |
385
|
|
|
|
|
|
|
{ |
386
|
52
|
50
|
|
52
|
1
|
97
|
my $codonTable = $_[0]->codon_table or return; |
387
|
52
|
|
|
|
|
178
|
return $codonTable->codon_degeneracy; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=head2 codons_by_degeneracy |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
Title : codons_by_degeneracy |
393
|
|
|
|
|
|
|
Usage : @codons = $self->codons_by_degeneracy(2); |
394
|
|
|
|
|
|
|
Function: get all the codons of AAs which have the specified degree |
395
|
|
|
|
|
|
|
of degeneracy, for example, codons of amino acids with degenracy |
396
|
|
|
|
|
|
|
degree 2. |
397
|
|
|
|
|
|
|
Returns : an array of codons, or its reference in scalar context |
398
|
|
|
|
|
|
|
Args : an integer for degeneracy degree |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=cut |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub codons_by_degeneracy |
403
|
|
|
|
|
|
|
{ |
404
|
26
|
|
|
26
|
1
|
42
|
my ($self, $deg) = @_; |
405
|
26
|
50
|
|
|
|
49
|
my $degHash = $self->aa_degeneracy_classes or return; |
406
|
26
|
50
|
|
|
|
72
|
my $aaClass = $degHash->{$deg} or return; |
407
|
26
|
|
|
|
|
31
|
my @codons; |
408
|
26
|
|
|
|
|
84
|
while(my ($aa, $codonRef) = each %$aaClass) |
409
|
|
|
|
|
|
|
{ |
410
|
52
|
|
|
|
|
147
|
push @codons, @$codonRef; |
411
|
|
|
|
|
|
|
} |
412
|
26
|
50
|
|
|
|
127
|
return wantarray? @codons : \@codons; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# other misc functions |
416
|
|
|
|
|
|
|
############################################# |
417
|
|
|
|
|
|
|
# Other methods used internally |
418
|
|
|
|
|
|
|
############################################# |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
# check whether a codon is valid and also not stop codon |
421
|
|
|
|
|
|
|
sub _is_sense_codon |
422
|
|
|
|
|
|
|
{ |
423
|
0
|
|
|
0
|
|
0
|
my ($self, $codon) = @_; |
424
|
|
|
|
|
|
|
|
425
|
0
|
0
|
|
|
|
0
|
my $codonTable = $self->codon_table() or return; |
426
|
0
|
0
|
|
|
|
0
|
return 0 unless($codonTable->is_valid_codon($codon)); |
427
|
0
|
0
|
|
|
|
0
|
return 0 if($codonTable->is_stop_codon($codon)); |
428
|
|
|
|
|
|
|
|
429
|
0
|
|
|
|
|
0
|
return 1; |
430
|
|
|
|
|
|
|
} |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# check whether a codon is a stop codon |
433
|
|
|
|
|
|
|
sub _is_stop_codon |
434
|
|
|
|
|
|
|
{ |
435
|
0
|
|
|
0
|
|
0
|
my ($self, $codon) = @_; |
436
|
0
|
0
|
|
|
|
0
|
my $codonTable = $self->codon_table() or return; |
437
|
0
|
|
|
|
|
0
|
$codonTable->is_stop_codon($codon); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
# get the corresponding AA of a codon |
441
|
|
|
|
|
|
|
sub _codon_to_aa |
442
|
|
|
|
|
|
|
{ |
443
|
0
|
|
|
0
|
|
0
|
my ($self, $codon) = @_; |
444
|
|
|
|
|
|
|
|
445
|
0
|
0
|
|
|
|
0
|
my $codonTable = $self->codon_table() or return; |
446
|
0
|
|
|
|
|
0
|
$codonTable->translate($codon); |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# get all the codons in the sequence, that is, split into nucleotide |
450
|
|
|
|
|
|
|
# triplet |
451
|
|
|
|
|
|
|
sub _catalog_codons |
452
|
|
|
|
|
|
|
{ |
453
|
130
|
|
|
130
|
|
228
|
my ($self, $seq) = @_; |
454
|
|
|
|
|
|
|
|
455
|
130
|
|
|
|
|
271
|
$seq = $self->_get_seq_str($seq); |
456
|
130
|
|
|
|
|
166
|
my %codons; |
457
|
130
|
|
|
|
|
136
|
my $accuLen = 0; |
458
|
130
|
|
|
|
|
198
|
my $seqLen = length($seq); |
459
|
130
|
50
|
|
|
|
369
|
$self->warn("sequence [$seq] is not multiple of 3 long") unless($seqLen % |
460
|
|
|
|
|
|
|
3 == 0); |
461
|
130
|
|
|
|
|
125
|
my $codon; |
462
|
130
|
|
|
|
|
293
|
while($accuLen + 3 <= $seqLen) |
463
|
|
|
|
|
|
|
{ |
464
|
88570
|
|
|
|
|
114341
|
$codon = substr($seq,$accuLen,3); |
465
|
88570
|
|
|
|
|
88053
|
$codons{$codon}++; |
466
|
88570
|
|
|
|
|
165545
|
$accuLen += 3; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
130
|
50
|
|
|
|
284
|
return undef unless($accuLen); |
470
|
130
|
|
|
|
|
694
|
return \%codons; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
# get and preprocess sequence |
474
|
|
|
|
|
|
|
# get a seq string or seq object and return sequence string |
475
|
|
|
|
|
|
|
sub _get_seq_str |
476
|
|
|
|
|
|
|
{ |
477
|
130
|
|
|
130
|
|
186
|
my ($self, $seq) = @_; |
478
|
130
|
100
|
|
|
|
315
|
$seq = ref($seq)? $seq->seq : $seq; |
479
|
130
|
|
|
|
|
979
|
$seq = uc($seq); |
480
|
130
|
|
|
|
|
531
|
$seq =~ tr/U/T/; |
481
|
130
|
|
|
|
|
1373
|
$seq =~ s/[^A-Z]+//g; # remove all non-nucleotide characters |
482
|
|
|
|
|
|
|
|
483
|
130
|
|
|
|
|
239
|
my $len = length($seq); |
484
|
130
|
50
|
|
|
|
311
|
if($len > 0) |
485
|
|
|
|
|
|
|
{ |
486
|
130
|
|
|
|
|
397
|
return $seq; |
487
|
|
|
|
|
|
|
}else |
488
|
|
|
|
|
|
|
{ |
489
|
0
|
|
|
|
|
|
return undef; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 AUTHOR |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
Zhenguo Zhang, C<< >> |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=head1 BUGS |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
500
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
501
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
=head1 SUPPORT |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
perldoc Bio::CUA::Summarizer |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
You can also look for information at: |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=over 4 |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
L |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
L |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=item * CPAN Ratings |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
L |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=item * Search CPAN |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
L |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=back |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
Copyright 2015 Zhenguo Zhang. |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
541
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
542
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
543
|
|
|
|
|
|
|
(at your option) any later version. |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
546
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
547
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
548
|
|
|
|
|
|
|
GNU General Public License for more details. |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
551
|
|
|
|
|
|
|
along with this program. If not, see L. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=cut |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
1; # End of Bio::CUA::Summarizer |
557
|
|
|
|
|
|
|
|