line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::CUA::Summarizer; |
2
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
20553
|
use 5.006; |
|
4
|
|
|
|
|
10
|
|
|
4
|
|
|
|
|
123
|
|
4
|
4
|
|
|
4
|
|
23
|
use strict; |
|
4
|
|
|
|
|
5
|
|
|
4
|
|
|
|
|
89
|
|
5
|
4
|
|
|
4
|
|
15
|
use warnings; |
|
4
|
|
|
|
|
3
|
|
|
4
|
|
|
|
|
89
|
|
6
|
4
|
|
|
4
|
|
819
|
use parent qw/Bio::CUA/; |
|
4
|
|
|
|
|
504
|
|
|
4
|
|
|
|
|
26
|
|
7
|
4
|
|
|
4
|
|
2440
|
use Bio::CUA::CodonTable; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
303
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $codonPkg = 'Bio::CUA::CodonTable'; |
10
|
|
|
|
|
|
|
my $pkg = __PACKAGE__; |
11
|
|
|
|
|
|
|
my @bases = qw/A T C G/; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# determine which class is used for sequence processing |
14
|
|
|
|
|
|
|
my $seq_io_pkg; |
15
|
|
|
|
|
|
|
BEGIN{ |
16
|
|
|
|
|
|
|
# set version which might be checked during compilation |
17
|
|
|
|
|
|
|
#our $VERSION = 1.01; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# determine sequence processing module |
20
|
4
|
|
|
4
|
|
6
|
eval { require Bio::SeqIO; }; |
|
4
|
|
|
|
|
817
|
|
21
|
4
|
50
|
|
|
|
21
|
if($@) # Bio::SeqIO is not available |
22
|
|
|
|
|
|
|
{ |
23
|
4
|
|
|
|
|
6
|
$seq_io_pkg = 'Bio::CUA::SeqIO'; |
24
|
4
|
|
|
|
|
2578
|
require Bio::CUA::SeqIO; |
25
|
|
|
|
|
|
|
}else # otherwise use Bio::SeqIO |
26
|
|
|
|
|
|
|
{ |
27
|
0
|
|
|
|
|
0
|
$seq_io_pkg = 'Bio::SeqIO'; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=pod |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 NAME |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
Bio::CUA::Summarizer - a class to summarize features of sequences. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 SYNOPSIS |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
This class provides convenience for its child classes with methods |
40
|
|
|
|
|
|
|
summarizing sequence features, such |
41
|
|
|
|
|
|
|
as counting and listing amino acids and codons, retrieving amino acids |
42
|
|
|
|
|
|
|
with certain degree degeneracy in a genetic table. Refer to the |
43
|
|
|
|
|
|
|
L section for more details. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
use Bio::CUA::Summarizer; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $summarizer = Bio::CUA::Summarizer->new( |
48
|
|
|
|
|
|
|
codon_table => 1 ); # using stardard genetic code |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# get codons in a sequence file |
51
|
|
|
|
|
|
|
my $codonList = $summarizer->tabulate_codons('seqs.fa'); |
52
|
|
|
|
|
|
|
# get the codon table object of this summarizer |
53
|
|
|
|
|
|
|
my $table = $summarizer->codon_table; |
54
|
|
|
|
|
|
|
# get all sense codons in the genetic codon table |
55
|
|
|
|
|
|
|
my @senseCodons = $summarizer->all_sense_codons; |
56
|
|
|
|
|
|
|
# get codons encoding an amino acid |
57
|
|
|
|
|
|
|
my @codons = $summarizer->codons_of_AA('Ser'); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 new |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
Title : new |
65
|
|
|
|
|
|
|
Usage : $obj=Bio::CUA::Summarizer->new(%args); |
66
|
|
|
|
|
|
|
Function: create an object which can be used to summarizing sequence |
67
|
|
|
|
|
|
|
features. |
68
|
|
|
|
|
|
|
Returns : an object of this or child class |
69
|
|
|
|
|
|
|
Args : a hash with a key 'codon_table', acceptable values are |
70
|
|
|
|
|
|
|
codon_table => id of genetic codon table # 1 |
71
|
|
|
|
|
|
|
codon_table => Bio::CUA::CodonTable object # 2 |
72
|
|
|
|
|
|
|
codon_table => 'map-file' # 3 |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=over 3 |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=item 1 |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
id of genetic codon table can be found from L
|
79
|
|
|
|
|
|
|
codes|http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=t>. |
80
|
|
|
|
|
|
|
A valid id is an integer. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=item 2 |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
an object of L. One can directly provide an |
85
|
|
|
|
|
|
|
object to the method. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item 3 |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
If genetic code in analyzed sequences is not included in NCBI, one can |
90
|
|
|
|
|
|
|
also provide its own genetic code in a map-file, in the format of |
91
|
|
|
|
|
|
|
codon1AA1 |
92
|
|
|
|
|
|
|
codon2AA2, |
93
|
|
|
|
|
|
|
... ... .... |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=back |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Note all the analyzed sequences will use this provided genetic codon |
98
|
|
|
|
|
|
|
table to map between amino acids and codons. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=cut |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub new |
103
|
|
|
|
|
|
|
{ |
104
|
3
|
|
|
3
|
1
|
636
|
my ($caller, @args) = @_; |
105
|
3
|
|
|
|
|
31
|
my $self = $caller->SUPER::new(@args); |
106
|
|
|
|
|
|
|
|
107
|
3
|
|
|
|
|
17
|
my $hashRef = $self->_array_to_hash(\@args); |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# only process its own argument |
110
|
3
|
|
|
|
|
6
|
my $codonTable; |
111
|
3
|
|
|
|
|
26
|
while(my ($tag, $val) = each %$hashRef) |
112
|
|
|
|
|
|
|
{ |
113
|
4
|
100
|
|
|
|
20
|
next unless($tag eq 'codon_table'); |
114
|
3
|
50
|
|
|
|
29
|
if(ref($val)) |
|
|
50
|
|
|
|
|
|
115
|
|
|
|
|
|
|
{ |
116
|
0
|
0
|
|
|
|
0
|
$self->throw("$val is not an object of $codonPkg") |
117
|
|
|
|
|
|
|
unless($val->isa($codonPkg)); |
118
|
0
|
|
|
|
|
0
|
$codonTable = $val; |
119
|
|
|
|
|
|
|
}elsif($val =~ /^\d+$/) # genetic code id |
120
|
|
|
|
|
|
|
{ |
121
|
3
|
50
|
|
|
|
32
|
$codonTable = $codonPkg->new(-id => $val) or |
122
|
|
|
|
|
|
|
$self->throw("Invalid genetic code ID '$val'"); |
123
|
|
|
|
|
|
|
}else # a map file |
124
|
|
|
|
|
|
|
{ |
125
|
0
|
0
|
|
|
|
0
|
$codonTable = $codonPkg->new(-map_file => $val) or |
126
|
|
|
|
|
|
|
$self->throw("Can not construct codon table with file '$val'"); |
127
|
|
|
|
|
|
|
} |
128
|
3
|
|
|
|
|
14
|
last; |
129
|
|
|
|
|
|
|
} |
130
|
3
|
50
|
0
|
|
|
10
|
$self->warn("option 'codon_table' is missing in the method", |
131
|
|
|
|
|
|
|
"'new' of $pkg") and return undef unless($codonTable); |
132
|
|
|
|
|
|
|
# store the result |
133
|
3
|
|
|
|
|
39
|
$self->{'_codon_table'} = $codonTable; |
134
|
|
|
|
|
|
|
|
135
|
3
|
|
|
|
|
31
|
return $self; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 codon_table |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Title : codon_table |
141
|
|
|
|
|
|
|
Usage : $table = $self->codon_table; |
142
|
|
|
|
|
|
|
Function: get associated codon table of this object |
143
|
|
|
|
|
|
|
Returns : an object of L |
144
|
|
|
|
|
|
|
Args : None |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=cut |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub codon_table |
149
|
|
|
|
|
|
|
{ |
150
|
249
|
50
|
|
249
|
1
|
1355
|
my $table = $_[0]->{'_codon_table'} or |
151
|
|
|
|
|
|
|
$_[0]->warn("No codon table associated with this object $_[0]"); |
152
|
249
|
|
|
|
|
935
|
return $table; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 bases |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
get the 4 nucleotides A,T,C,G always in this order, to keep |
158
|
|
|
|
|
|
|
consistency among different classes |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=cut |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# get all the nucleotide bases in a certain order |
163
|
|
|
|
|
|
|
sub bases |
164
|
|
|
|
|
|
|
{ |
165
|
1
|
50
|
|
1
|
1
|
559
|
return wantarray? @bases : \@bases; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# sequence-level functions |
169
|
|
|
|
|
|
|
=head2 get_codon_list |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Title : get_codon_list |
172
|
|
|
|
|
|
|
Usage : $codonList = $self->get_codon_list($input) |
173
|
|
|
|
|
|
|
Function: get codons and their counts in input |
174
|
|
|
|
|
|
|
Returns : reference to a hash containing codons as keys and counts |
175
|
|
|
|
|
|
|
as values. |
176
|
|
|
|
|
|
|
Args : seq string, seq object, seq file, or another codon list |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# the main interface to preprocess input to most methods |
181
|
|
|
|
|
|
|
# return a codon list with its count |
182
|
|
|
|
|
|
|
# acceptable parameters: seq string, seq object, seq file, codon list |
183
|
|
|
|
|
|
|
sub get_codon_list |
184
|
|
|
|
|
|
|
{ |
185
|
57
|
|
|
57
|
0
|
145
|
my ($self, $input) = @_; |
186
|
|
|
|
|
|
|
|
187
|
57
|
|
|
|
|
159
|
my $ref = ref($input); |
188
|
57
|
100
|
|
|
|
165
|
unless($ref) # a scalar variable |
189
|
|
|
|
|
|
|
{ |
190
|
|
|
|
|
|
|
# a sequence string |
191
|
5
|
50
|
33
|
|
|
38
|
if($input =~ /^[ATGCUN]+$/ and (! -f $input)) |
192
|
|
|
|
|
|
|
{ |
193
|
0
|
|
|
|
|
0
|
return $self->_catalog_codons($input); |
194
|
|
|
|
|
|
|
}else # a sequence file |
195
|
|
|
|
|
|
|
{ |
196
|
5
|
|
|
|
|
26
|
return $self->tabulate_codons($input); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
52
|
50
|
|
|
|
907
|
if($ref eq 'HASH') # codon list |
201
|
|
|
|
|
|
|
{ |
202
|
0
|
|
|
|
|
0
|
return $input; |
203
|
|
|
|
|
|
|
}else # an seq object |
204
|
|
|
|
|
|
|
{ |
205
|
52
|
|
|
|
|
348
|
return $self->_catalog_codons($input); |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 tabulate_codons |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Title : tabulate_codons |
212
|
|
|
|
|
|
|
Usage : $codonList = $self->tabulate_codons($input,[$each]); |
213
|
|
|
|
|
|
|
Function: count codons in the input sequences |
214
|
|
|
|
|
|
|
Returns : reference to a hash in which codon is the key and counts as |
215
|
|
|
|
|
|
|
values. If $each is true, then each sequence is separately processed |
216
|
|
|
|
|
|
|
and stored in a larger hash with the sequence names as hash keys |
217
|
|
|
|
|
|
|
Args : accepted arguments are as follows: |
218
|
|
|
|
|
|
|
I = name of a file containing fasta sequences |
219
|
|
|
|
|
|
|
I = optional, if TRUE (i.e., non-zero values), each sequence is |
220
|
|
|
|
|
|
|
separately processed. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
This is a companionate method of L for situations |
223
|
|
|
|
|
|
|
when one want to get codon counts for each sequence separately. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=cut |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub tabulate_codons |
228
|
|
|
|
|
|
|
{ |
229
|
6
|
|
|
6
|
1
|
10
|
my ($self, $input, $each) = @_; |
230
|
|
|
|
|
|
|
|
231
|
6
|
50
|
|
|
|
35
|
my $seqIO = $self->_get_seq_io($input) or return; |
232
|
6
|
|
|
|
|
11
|
my %list; |
233
|
|
|
|
|
|
|
|
234
|
6
|
50
|
|
|
|
21
|
if($each) |
235
|
|
|
|
|
|
|
{ |
236
|
0
|
|
|
|
|
0
|
while(my $seq = $seqIO->next_seq()) |
237
|
|
|
|
|
|
|
{ |
238
|
0
|
|
|
|
|
0
|
my $codons = $self->_catalog_codons($seq->seq); |
239
|
0
|
|
|
|
|
0
|
$list{$seq->id} = $codons; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
}else # otherwise process together |
243
|
|
|
|
|
|
|
{ |
244
|
6
|
|
|
|
|
44
|
while(my $seq = $seqIO->next_seq()) |
245
|
|
|
|
|
|
|
{ |
246
|
78
|
|
|
|
|
272
|
my $codons = $self->_catalog_codons($seq->seq); |
247
|
|
|
|
|
|
|
# merge all codons together |
248
|
78
|
|
|
|
|
581
|
while(my ($c, $v) = each %$codons) |
249
|
|
|
|
|
|
|
{ |
250
|
4758
|
|
|
|
|
12722
|
$list{$c} += $v; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
6
|
50
|
|
|
|
45
|
return undef unless(keys %list); |
256
|
6
|
|
|
|
|
52
|
return \%list; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=head2 tabulate_AAs |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
Title : tabulate_AAs |
262
|
|
|
|
|
|
|
Usage : $AAList = $self->tabulate_AAs($input,[$each]); |
263
|
|
|
|
|
|
|
Function: similar to L, but for counting amino acids |
264
|
|
|
|
|
|
|
Returns : the same as L, but for amino acids |
265
|
|
|
|
|
|
|
Args : refer to L. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=cut |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
sub tabulate_AAs |
270
|
|
|
|
|
|
|
{ |
271
|
0
|
|
|
0
|
1
|
0
|
my ($self, $input, $each) = @_; |
272
|
|
|
|
|
|
|
|
273
|
0
|
0
|
|
|
|
0
|
my $codonList = $self->tabulate_codons($input) or return; |
274
|
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
0
|
my %AAs; |
276
|
0
|
0
|
|
|
|
0
|
if($each) |
277
|
|
|
|
|
|
|
{ |
278
|
0
|
|
|
|
|
0
|
while(my ($id, $hashRef) = each %$codonList) |
279
|
|
|
|
|
|
|
{ |
280
|
0
|
|
|
|
|
0
|
while(my ($codon, $count) = each %$codonList) |
281
|
|
|
|
|
|
|
{ |
282
|
0
|
0
|
|
|
|
0
|
my $AA = $self->_codon_to_aa($codon) or next; |
283
|
0
|
|
|
|
|
0
|
$AAs{$id}->{$AA} += $count; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
}else |
287
|
|
|
|
|
|
|
{ |
288
|
0
|
|
|
|
|
0
|
while(my ($codon, $count) = each %$codonList) |
289
|
|
|
|
|
|
|
{ |
290
|
0
|
0
|
|
|
|
0
|
my $AA = $self->_codon_to_aa($codon) or next; |
291
|
0
|
|
|
|
|
0
|
$AAs{$AA} += $count; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
0
|
|
|
|
|
0
|
return \%AAs; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# get the sequence IO and return it |
299
|
|
|
|
|
|
|
sub _get_seq_io |
300
|
|
|
|
|
|
|
{ |
301
|
6
|
|
|
6
|
|
11
|
my ($self, $input) = @_; |
302
|
|
|
|
|
|
|
|
303
|
6
|
50
|
0
|
|
|
19
|
$self->warn("input fasta file is needed to obtain seq IO") and return |
304
|
|
|
|
|
|
|
unless($input); |
305
|
|
|
|
|
|
|
# at present, use Bio::SeqIO |
306
|
6
|
|
|
|
|
71
|
my $io = $seq_io_pkg->new(-file => $input, -format => 'fasta'); |
307
|
6
|
|
|
|
|
28
|
return $io; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
# codon table functions |
312
|
|
|
|
|
|
|
=head2 all_sense_codons |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
get all sense codons in the genetic codon table this object |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=cut |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
sub all_sense_codons |
319
|
|
|
|
|
|
|
{ |
320
|
2
|
|
|
2
|
0
|
5
|
my ($self) = @_; |
321
|
|
|
|
|
|
|
|
322
|
2
|
50
|
|
|
|
5
|
my $codonTable = $self->codon_table() or return; |
323
|
|
|
|
|
|
|
|
324
|
2
|
|
|
|
|
9
|
return $codonTable->all_sense_codons; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=head2 all_AAs_in_table |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
get all the amino acids coded in the genetic code table of this object |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=cut |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub all_AAs_in_table |
334
|
|
|
|
|
|
|
{ |
335
|
8
|
|
|
8
|
1
|
18
|
my ($self) = @_; |
336
|
|
|
|
|
|
|
|
337
|
8
|
50
|
|
|
|
27
|
my $codonTable = $self->codon_table() or return; |
338
|
|
|
|
|
|
|
|
339
|
8
|
|
|
|
|
41
|
$codonTable->all_amino_acids(); |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head2 codons_of_AA |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
get codons coding the given amino acid, e.g., |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
my @codons = $self->codons_of_AA('Ser'); |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=cut |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
sub codons_of_AA |
351
|
|
|
|
|
|
|
{ |
352
|
160
|
|
|
160
|
1
|
238
|
my ($self, $AA) = @_; |
353
|
|
|
|
|
|
|
|
354
|
160
|
50
|
|
|
|
278
|
my $codonTable = $self->codon_table() or return; |
355
|
|
|
|
|
|
|
|
356
|
160
|
|
|
|
|
425
|
return $codonTable->codons_of_AA($AA); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=head2 aa_degeneracy_classes |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Title : aa_degeneracy_classes |
362
|
|
|
|
|
|
|
Usage : $hashRef = $self->aa_degeneracy_classes; |
363
|
|
|
|
|
|
|
Function: get amino acid degeneracy classes according to the |
364
|
|
|
|
|
|
|
associated genetic code table |
365
|
|
|
|
|
|
|
Returns : a hash reference in which first level key is degeneracy |
366
|
|
|
|
|
|
|
degrees such as 1,2,3,4,6, second level is amino acid, the value is |
367
|
|
|
|
|
|
|
reference to the corresponding codon array. For example: |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
{ 2 => { D => [GAU, GAC], |
370
|
|
|
|
|
|
|
C => [UGU, UGC], |
371
|
|
|
|
|
|
|
... ... |
372
|
|
|
|
|
|
|
}, |
373
|
|
|
|
|
|
|
4 => { A => [GCU, GCC, GCA, GCG], |
374
|
|
|
|
|
|
|
... ... |
375
|
|
|
|
|
|
|
}, |
376
|
|
|
|
|
|
|
... ... ... |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Args : None |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=cut |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
sub aa_degeneracy_classes |
384
|
|
|
|
|
|
|
{ |
385
|
52
|
50
|
|
52
|
1
|
448
|
my $codonTable = $_[0]->codon_table or return; |
386
|
52
|
|
|
|
|
393
|
return $codonTable->codon_degeneracy; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=head2 codons_by_degeneracy |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
Title : codons_by_degeneracy |
392
|
|
|
|
|
|
|
Usage : @codons = $self->codons_by_degeneracy(2); |
393
|
|
|
|
|
|
|
Function: get all the codons of AAs which have the specified degree |
394
|
|
|
|
|
|
|
of degeneracy, for example, codons of amino acids with degenracy |
395
|
|
|
|
|
|
|
degree 2. |
396
|
|
|
|
|
|
|
Returns : an array of codons, or its reference in scalar context |
397
|
|
|
|
|
|
|
Args : an integer for degeneracy degree |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=cut |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
sub codons_by_degeneracy |
402
|
|
|
|
|
|
|
{ |
403
|
26
|
|
|
26
|
1
|
177
|
my ($self, $deg) = @_; |
404
|
26
|
50
|
|
|
|
186
|
my $degHash = $self->aa_degeneracy_classes or return; |
405
|
26
|
50
|
|
|
|
142
|
my $aaClass = $degHash->{$deg} or return; |
406
|
26
|
|
|
|
|
52
|
my @codons; |
407
|
26
|
|
|
|
|
203
|
while(my ($aa, $codonRef) = each %$aaClass) |
408
|
|
|
|
|
|
|
{ |
409
|
52
|
|
|
|
|
301
|
push @codons, @$codonRef; |
410
|
|
|
|
|
|
|
} |
411
|
26
|
50
|
|
|
|
224
|
return wantarray? @codons : \@codons; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# other misc functions |
415
|
|
|
|
|
|
|
############################################# |
416
|
|
|
|
|
|
|
# Other methods used internally |
417
|
|
|
|
|
|
|
############################################# |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
# check whether a codon is valid and also not stop codon |
420
|
|
|
|
|
|
|
sub _is_sense_codon |
421
|
|
|
|
|
|
|
{ |
422
|
0
|
|
|
0
|
|
0
|
my ($self, $codon) = @_; |
423
|
|
|
|
|
|
|
|
424
|
0
|
0
|
|
|
|
0
|
my $codonTable = $self->codon_table() or return; |
425
|
0
|
0
|
|
|
|
0
|
return 0 unless($codonTable->is_valid_codon($codon)); |
426
|
0
|
0
|
|
|
|
0
|
return 0 if($codonTable->is_stop_codon($codon)); |
427
|
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
0
|
return 1; |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
# check whether a codon is a stop codon |
432
|
|
|
|
|
|
|
sub _is_stop_codon |
433
|
|
|
|
|
|
|
{ |
434
|
0
|
|
|
0
|
|
0
|
my ($self, $codon) = @_; |
435
|
0
|
0
|
|
|
|
0
|
my $codonTable = $self->codon_table() or return; |
436
|
0
|
|
|
|
|
0
|
$codonTable->is_stop_codon($codon); |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# get the corresponding AA of a codon |
440
|
|
|
|
|
|
|
sub _codon_to_aa |
441
|
|
|
|
|
|
|
{ |
442
|
0
|
|
|
0
|
|
0
|
my ($self, $codon) = @_; |
443
|
|
|
|
|
|
|
|
444
|
0
|
0
|
|
|
|
0
|
my $codonTable = $self->codon_table() or return; |
445
|
0
|
|
|
|
|
0
|
$codonTable->translate($codon); |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
# get all the codons in the sequence, that is, split into nucleotide |
449
|
|
|
|
|
|
|
# triplet |
450
|
|
|
|
|
|
|
sub _catalog_codons |
451
|
|
|
|
|
|
|
{ |
452
|
130
|
|
|
130
|
|
366
|
my ($self, $seq) = @_; |
453
|
|
|
|
|
|
|
|
454
|
130
|
|
|
|
|
496
|
$seq = $self->_get_seq_str($seq); |
455
|
130
|
|
|
|
|
281
|
my %codons; |
456
|
130
|
|
|
|
|
189
|
my $accuLen = 0; |
457
|
130
|
|
|
|
|
326
|
my $seqLen = length($seq); |
458
|
130
|
50
|
|
|
|
667
|
$self->warn("sequence [$seq] is not multiple of 3 long") unless($seqLen % |
459
|
|
|
|
|
|
|
3 == 0); |
460
|
130
|
|
|
|
|
158
|
my $codon; |
461
|
130
|
|
|
|
|
494
|
while($accuLen + 3 <= $seqLen) |
462
|
|
|
|
|
|
|
{ |
463
|
88570
|
|
|
|
|
161547
|
$codon = substr($seq,$accuLen,3); |
464
|
88570
|
|
|
|
|
129855
|
$codons{$codon}++; |
465
|
88570
|
|
|
|
|
223645
|
$accuLen += 3; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
130
|
50
|
|
|
|
506
|
return undef unless($accuLen); |
469
|
130
|
|
|
|
|
2225
|
return \%codons; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
# get and preprocess sequence |
473
|
|
|
|
|
|
|
# get a seq string or seq object and return sequence string |
474
|
|
|
|
|
|
|
sub _get_seq_str |
475
|
|
|
|
|
|
|
{ |
476
|
130
|
|
|
130
|
|
296
|
my ($self, $seq) = @_; |
477
|
130
|
100
|
|
|
|
609
|
$seq = ref($seq)? $seq->seq : $seq; |
478
|
130
|
|
|
|
|
1627
|
$seq = uc($seq); |
479
|
130
|
|
|
|
|
960
|
$seq =~ tr/U/T/; |
480
|
130
|
|
|
|
|
5687
|
$seq =~ s/[^A-Z]+//g; # remove all non-nucleotide characters |
481
|
|
|
|
|
|
|
|
482
|
130
|
|
|
|
|
490
|
my $len = length($seq); |
483
|
130
|
50
|
|
|
|
684
|
if($len > 0) |
484
|
|
|
|
|
|
|
{ |
485
|
130
|
|
|
|
|
858
|
return $seq; |
486
|
|
|
|
|
|
|
}else |
487
|
|
|
|
|
|
|
{ |
488
|
0
|
|
|
|
|
|
return undef; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head1 AUTHOR |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Zhenguo Zhang, C<< >> |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=head1 BUGS |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
499
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
500
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=head1 SUPPORT |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
perldoc Bio::CUA::Summarizer |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
You can also look for information at: |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=over 4 |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
L |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
L |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=item * CPAN Ratings |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
L |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=item * Search CPAN |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
L |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=back |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Copyright 2015 Zhenguo Zhang. |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
540
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
541
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
542
|
|
|
|
|
|
|
(at your option) any later version. |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
545
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
546
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
547
|
|
|
|
|
|
|
GNU General Public License for more details. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
550
|
|
|
|
|
|
|
along with this program. If not, see L. |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=cut |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
1; # End of Bio::CUA::Summarizer |
556
|
|
|
|
|
|
|
|