line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::CUA; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
26784
|
use 5.006; |
|
6
|
|
|
|
|
20
|
|
4
|
6
|
|
|
6
|
|
28
|
use strict; |
|
6
|
|
|
|
|
7
|
|
|
6
|
|
|
|
|
166
|
|
5
|
6
|
|
|
6
|
|
23
|
use warnings; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
210
|
|
6
|
6
|
|
|
6
|
|
29
|
use Carp; |
|
6
|
|
|
|
|
6
|
|
|
6
|
|
|
|
|
7800
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
# some global variables |
9
|
|
|
|
|
|
|
our $VERSION = '1.04'; |
10
|
|
|
|
|
|
|
my $sep = "\t"; |
11
|
|
|
|
|
|
|
#my @openFHs; # all file handles opened by this class |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=pod |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::CUA - Codon Usage Analyzer. |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 VERSION |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Version 1.04 |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 SYNOPSIS |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This is the root class for the whole distribution of |
26
|
|
|
|
|
|
|
L, |
27
|
|
|
|
|
|
|
providing some routine methods used by all classs in the |
28
|
|
|
|
|
|
|
distribution L. Users should not use this class |
29
|
|
|
|
|
|
|
directly. Please start with its child classes such as |
30
|
|
|
|
|
|
|
L, L. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 DESCRIPTION |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
The aim of this distribution is to provide comprehensive and flexible |
35
|
|
|
|
|
|
|
tools to analyze codon usage bias (CUB) and relevant problems, so that |
36
|
|
|
|
|
|
|
users can speed up the genetic research by taking advantage of this |
37
|
|
|
|
|
|
|
convenience. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
One amino acid can be encoded by more than one synonymous codon, and |
40
|
|
|
|
|
|
|
synonymous codons are unevenly used. For example, some codons are used |
41
|
|
|
|
|
|
|
more often than other synonymous ones in highly expressed genes (I
|
42
|
|
|
|
|
|
|
and Li 1987>). To measure the unevenness of codon usage, multiple |
43
|
|
|
|
|
|
|
metrics of codon usage bias have been developed, such as Fop |
44
|
|
|
|
|
|
|
(Frequency of optimal codons), CAI (Codon Adaptation Index), tAI (tRNA |
45
|
|
|
|
|
|
|
Adaptation Index), and ENC (Effective Number of Codons). The causes of |
46
|
|
|
|
|
|
|
CUB phenomena are complicated, including, mutational bias, selection on |
47
|
|
|
|
|
|
|
translational efficiency or accurancy. CUB is one fundamental concept |
48
|
|
|
|
|
|
|
in genetics. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
So far, no software exists to compute all the above CUB metrics, and |
51
|
|
|
|
|
|
|
more importantly parameters of CUB calculations are often fixed in |
52
|
|
|
|
|
|
|
software, so one can only analyze genes in a limited list of species |
53
|
|
|
|
|
|
|
and one can not incorporate its own parameters such as sequences of |
54
|
|
|
|
|
|
|
highly expressed genes in a tissue. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
This package mainly solves these two problems. We also extend some |
57
|
|
|
|
|
|
|
methods, such as GC-content corrected ENC, background-data normalized |
58
|
|
|
|
|
|
|
CAI, etc. See the relevant methods in CUB classes for more details. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 METHODS |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=cut |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub new |
65
|
|
|
|
|
|
|
{ |
66
|
119
|
|
|
119
|
0
|
326
|
my ($caller, @args) = @_; |
67
|
119
|
|
|
|
|
178
|
my $self = {}; |
68
|
119
|
50
|
|
|
|
259
|
my $class = ref($caller)? ref($caller) : $caller; |
69
|
|
|
|
|
|
|
|
70
|
119
|
|
|
|
|
153
|
bless $self, $class; |
71
|
119
|
|
|
|
|
311
|
my $hashRef = $self->_array_to_hash(\@args); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# only process its own argument |
74
|
119
|
50
|
|
|
|
277
|
$self->debug(1) if($hashRef->{'debug'}); |
75
|
|
|
|
|
|
|
|
76
|
119
|
|
|
|
|
460
|
return $self; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# store and retrieve tag values |
80
|
|
|
|
|
|
|
sub get_tag |
81
|
|
|
|
|
|
|
{ |
82
|
312
|
|
|
312
|
0
|
378
|
my ($self, $tag) = @_; |
83
|
312
|
|
|
|
|
1186
|
return $self->{'_tags'}->{$tag}; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub set_tag |
87
|
|
|
|
|
|
|
{ |
88
|
323
|
|
|
323
|
0
|
498
|
my ($self, $tag, $val) = @_; |
89
|
323
|
|
|
|
|
898
|
$self->{'_tags'}->{$tag} = $val; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head2 debug |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Title : debug |
95
|
|
|
|
|
|
|
Usage : $true_of_false=$self->debug([$bool]); |
96
|
|
|
|
|
|
|
Function: get/set the boolean value. |
97
|
|
|
|
|
|
|
Returns : 0 as false, 1 as true |
98
|
|
|
|
|
|
|
Args : optional. 0 or 1 for false and true, respectively. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=cut |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub debug |
103
|
|
|
|
|
|
|
{ |
104
|
74
|
|
|
74
|
1
|
78
|
my ($self, $val) = @_; |
105
|
74
|
50
|
|
|
|
122
|
$self->set_tag('debug', $val) if($val); |
106
|
74
|
|
|
|
|
107
|
return $self->get_tag('debug'); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 throw |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Title : throw |
113
|
|
|
|
|
|
|
Usage : $self->throw("Some fatal errors"); |
114
|
|
|
|
|
|
|
Function: stop and report when fatal errors in formatted message |
115
|
|
|
|
|
|
|
Returns : None |
116
|
|
|
|
|
|
|
Args : error message |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=cut |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# simplified version |
121
|
|
|
|
|
|
|
sub throw |
122
|
|
|
|
|
|
|
{ |
123
|
0
|
|
|
0
|
1
|
0
|
my ($self, @args) = @_; |
124
|
0
|
|
0
|
|
|
0
|
my $class = ref($self) || $self; |
125
|
0
|
0
|
|
|
|
0
|
$class = ' '.$class if $class; |
126
|
0
|
|
|
|
|
0
|
my $title = "------------- EXCEPTION$class -------------"; |
127
|
0
|
|
|
|
|
0
|
my $footer = ('-' x length($title))."\n"; |
128
|
|
|
|
|
|
|
#my $text = join("\n", @args); |
129
|
0
|
|
|
|
|
0
|
my $text = _format_text(join(' ',@args)); |
130
|
0
|
|
|
|
|
0
|
croak "\n$title\n", "MSG: $text\n", $footer, "\n"; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 warn |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Title : warn |
136
|
|
|
|
|
|
|
Usage : $self->warn("Please pay attention here") |
137
|
|
|
|
|
|
|
Function: report warning message when something looks not good |
138
|
|
|
|
|
|
|
Returns : None |
139
|
|
|
|
|
|
|
Args : warning messages. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=cut |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub warn |
144
|
|
|
|
|
|
|
{ |
145
|
0
|
|
|
0
|
1
|
0
|
my ($self, @args) = @_; |
146
|
|
|
|
|
|
|
|
147
|
0
|
|
0
|
|
|
0
|
my $class = ref($self) || $self; |
148
|
0
|
0
|
|
|
|
0
|
$class = ' '.$class if $class; |
149
|
0
|
|
|
|
|
0
|
my $title = "------------- WARNING$class -------------"; |
150
|
0
|
|
|
|
|
0
|
my $footer = ('-' x length($title))."\n"; |
151
|
0
|
|
|
|
|
0
|
my $text = _format_text(join(' ',@args)); |
152
|
|
|
|
|
|
|
#my $text = join("\n", @args); |
153
|
0
|
|
|
|
|
0
|
carp "\n$title\n", "MSG: $text\n", $footer, "\n"; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# format the text into blocks with same line length |
157
|
|
|
|
|
|
|
sub _format_text |
158
|
|
|
|
|
|
|
{ |
159
|
0
|
|
|
0
|
|
0
|
my ($text, $lineLen) = @_; |
160
|
|
|
|
|
|
|
|
161
|
0
|
|
0
|
|
|
0
|
$lineLen ||= 60; |
162
|
0
|
|
|
|
|
0
|
chomp($text); |
163
|
0
|
|
|
|
|
0
|
my $result = ''; |
164
|
|
|
|
|
|
|
|
165
|
0
|
|
|
|
|
0
|
my @blocks = split /\n/, $text; |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
0
|
foreach my $b (@blocks) |
168
|
|
|
|
|
|
|
{ |
169
|
0
|
|
|
|
|
0
|
my $newB = _break_into_lines($b, $lineLen); |
170
|
0
|
|
|
|
|
0
|
$result .= $newB; |
171
|
|
|
|
|
|
|
} |
172
|
0
|
|
|
|
|
0
|
return $result; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub _break_into_lines |
176
|
|
|
|
|
|
|
{ |
177
|
0
|
|
|
0
|
|
0
|
my ($text, $size) = @_; |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
0
|
my $lines = ''; |
180
|
0
|
|
|
|
|
0
|
my $textLen = length($text); |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
0
|
my $accuLen = 0; |
183
|
0
|
|
|
|
|
0
|
while($accuLen < $textLen) |
184
|
|
|
|
|
|
|
{ |
185
|
0
|
0
|
|
|
|
0
|
my $lineLen = $accuLen + $size > $textLen? $textLen - $accuLen |
186
|
|
|
|
|
|
|
: $size; |
187
|
0
|
|
|
|
|
0
|
my $l = substr($text,$accuLen,$lineLen); |
188
|
0
|
|
|
|
|
0
|
$accuLen += $lineLen; |
189
|
0
|
|
|
|
|
0
|
$lines .= $l."\n"; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
0
|
|
|
|
|
0
|
return $lines; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# return hash ref by reading into an array ref |
196
|
|
|
|
|
|
|
sub _array_to_hash |
197
|
|
|
|
|
|
|
{ |
198
|
240
|
|
|
240
|
|
306
|
my ($self,$arrayRef,$nc) = @_; |
199
|
|
|
|
|
|
|
|
200
|
240
|
50
|
|
|
|
545
|
$self->throw("parameter '$arrayRef' to _array_to_hash is not an array reference") |
201
|
|
|
|
|
|
|
unless(ref($arrayRef) eq 'ARRAY'); |
202
|
|
|
|
|
|
|
|
203
|
240
|
|
|
|
|
210
|
my %hash; |
204
|
|
|
|
|
|
|
|
205
|
240
|
50
|
|
|
|
493
|
$self->throw("Odd number of elements are in the array fed to", |
206
|
|
|
|
|
|
|
"_array_to_hash, check the array $arrayRef") |
207
|
|
|
|
|
|
|
unless($#$arrayRef % 2); |
208
|
|
|
|
|
|
|
|
209
|
240
|
|
|
|
|
572
|
for(my $i = 0; $i < $#$arrayRef; $i += 2) |
210
|
|
|
|
|
|
|
{ |
211
|
674
|
|
|
|
|
710
|
my $k = $arrayRef->[$i]; |
212
|
674
|
|
|
|
|
1449
|
$k =~ s/^\-*//; # removing leading '-' |
213
|
674
|
50
|
|
|
|
1327
|
$k = lc($k) unless($nc); |
214
|
674
|
|
|
|
|
2126
|
$hash{$k} = $arrayRef->[$i+1]; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
240
|
|
|
|
|
553
|
return \%hash; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
# write out hash to an outfile |
222
|
|
|
|
|
|
|
sub _write_out_hash |
223
|
|
|
|
|
|
|
{ |
224
|
2
|
|
|
2
|
|
6
|
my ($self, $outFile, $hashRef) = @_; |
225
|
|
|
|
|
|
|
|
226
|
2
|
|
|
|
|
2
|
my $fh; |
227
|
2
|
50
|
|
|
|
36676
|
open($fh, "> $outFile") or die "Can not open $outFile:$!"; |
228
|
|
|
|
|
|
|
# let's sort the hash so that every time the same order is |
229
|
|
|
|
|
|
|
# produced |
230
|
2
|
|
|
|
|
76
|
my @sortedKeys = sort keys(%$hashRef); |
231
|
2
|
|
|
|
|
14
|
foreach my $k (@sortedKeys) |
232
|
|
|
|
|
|
|
{ |
233
|
119
|
|
|
|
|
389
|
print $fh join($sep, $k, $hashRef->{$k}),"\n"; |
234
|
|
|
|
|
|
|
} |
235
|
2
|
|
|
|
|
82
|
close $fh; |
236
|
|
|
|
|
|
|
|
237
|
2
|
|
|
|
|
27
|
return 1; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# open a file and return its file handle |
241
|
|
|
|
|
|
|
sub _open_file |
242
|
|
|
|
|
|
|
{ |
243
|
15
|
|
|
15
|
|
26
|
my ($self, $file, $mode) = @_; |
244
|
|
|
|
|
|
|
|
245
|
15
|
|
50
|
|
|
76
|
$mode ||= ' '; |
246
|
|
|
|
|
|
|
|
247
|
15
|
|
|
|
|
15
|
my $fh; |
248
|
15
|
50
|
|
|
|
673
|
open($fh, "$mode $file") or $self->throw("can not open $file:$!"); |
249
|
|
|
|
|
|
|
#push @openFHs, $fh; |
250
|
15
|
|
|
|
|
80
|
return $fh; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# parse the first $num fields of input file, and use the vaule at the |
254
|
|
|
|
|
|
|
# first column as key |
255
|
|
|
|
|
|
|
sub _parse_file |
256
|
|
|
|
|
|
|
{ |
257
|
2
|
|
|
2
|
|
4
|
my ($self, $file, $num) = @_; |
258
|
|
|
|
|
|
|
|
259
|
2
|
|
|
|
|
1
|
my %hash; |
260
|
2
|
|
|
|
|
7
|
my $fh = $self->_open_file($file); |
261
|
2
|
|
|
|
|
31
|
while(<$fh>) |
262
|
|
|
|
|
|
|
{ |
263
|
119
|
50
|
33
|
|
|
438
|
next if /^#/ or /^\s*$/; |
264
|
119
|
|
|
|
|
124
|
chomp; |
265
|
119
|
|
|
|
|
128
|
s/^\s+//; # remove leading blanks |
266
|
119
|
|
|
|
|
281
|
my @fields = split /\s+/; |
267
|
119
|
50
|
|
|
|
147
|
if($num > 1) |
268
|
|
|
|
|
|
|
{ |
269
|
119
|
50
|
|
|
|
509
|
$hash{uc($fields[0])} = $num > 2? |
270
|
|
|
|
|
|
|
[@fields[1..($num-1)]] : $fields[1]; |
271
|
|
|
|
|
|
|
}else |
272
|
|
|
|
|
|
|
{ |
273
|
0
|
|
|
|
|
0
|
$hash{uc($fields[0])}++; |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
2
|
|
|
|
|
13
|
close $fh; |
277
|
2
|
|
|
|
|
8
|
return \%hash; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
# this method is called when object of this or child classes is being |
281
|
|
|
|
|
|
|
# destroyed |
282
|
|
|
|
|
|
|
# close the file handle if the object has one |
283
|
|
|
|
|
|
|
sub DESTROY |
284
|
|
|
|
|
|
|
{ |
285
|
119
|
|
|
119
|
|
13850
|
my $self = shift; |
286
|
|
|
|
|
|
|
#print $self; |
287
|
119
|
100
|
|
|
|
1351
|
close $self->{'_fh'} if(exists $self->{'_fh'}); |
288
|
|
|
|
|
|
|
# $self->SUPER::DESTROY(@_); |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head1 AUTHOR |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Zhenguo Zhang, C<< >> |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=head1 BUGS |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
Please report any bugs or feature requests to C
|
298
|
|
|
|
|
|
|
rt.cpan.org> or through the web interface at |
299
|
|
|
|
|
|
|
L. I will be |
300
|
|
|
|
|
|
|
notified, and then you'll automatically be notified of progress on |
301
|
|
|
|
|
|
|
your bug as I make changes. |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=cut |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
=head1 SUPPORT |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
You can find documentation for this class with the perldoc command. |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
perldoc Bio::CUA |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
You can also look for information at: |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=over 4 |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
L |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
L |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=item * CPAN Ratings |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
L |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=item * Search CPAN |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
L |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=back |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
Copyright 2015 Zhenguo Zhang. |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify |
342
|
|
|
|
|
|
|
it under the terms of the GNU General Public License as published by |
343
|
|
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or |
344
|
|
|
|
|
|
|
(at your option) any later version. |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
347
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
348
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
349
|
|
|
|
|
|
|
GNU General Public License for more details. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License |
352
|
|
|
|
|
|
|
along with this program. If not, see L. |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=cut |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
1; # End of Bio::CUA |
358
|
|
|
|
|
|
|
|