| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#--------------------------------------------------------- |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Bio::Matrix::PSM::IO::masta - motif fasta format parser |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
MASTA is a position frequency matrix format similar to |
|
10
|
|
|
|
|
|
|
fasta. It contains one ID row just like fasta and then the actual |
|
11
|
|
|
|
|
|
|
data, which is tab delimited: |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
0.1 0.62 .017 0.11 |
|
14
|
|
|
|
|
|
|
0.22 0.13 0.54 0.11 |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Or A,C,G and T could be horizontally positioned (positioning is |
|
17
|
|
|
|
|
|
|
automatically detected). Please note masta will parse only DNA at the |
|
18
|
|
|
|
|
|
|
moment. |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
It will also convert a set of aligned sequences: |
|
21
|
|
|
|
|
|
|
ACATGCAT |
|
22
|
|
|
|
|
|
|
ACAGGGAT |
|
23
|
|
|
|
|
|
|
ACAGGCAT |
|
24
|
|
|
|
|
|
|
ACCGGCAT |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
to a PFM (SiteMatrix object). When writing if you supply SEQ it will |
|
27
|
|
|
|
|
|
|
write 10 random instances, which represent correctly the frequency and |
|
28
|
|
|
|
|
|
|
can be used as an input for weblogo creation purposes. |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
See Bio::Matrix::PSM::IO for detailed documentation on how to use masta parser |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Parser for meme. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this |
|
41
|
|
|
|
|
|
|
and other Bioperl modules. Send your comments and suggestions preferably |
|
42
|
|
|
|
|
|
|
to one of the Bioperl mailing lists. |
|
43
|
|
|
|
|
|
|
Your participation is much appreciated. |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
46
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head2 Support |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
I |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
55
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
56
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
57
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
62
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
63
|
|
|
|
|
|
|
web: |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 AUTHOR - Stefan Kirov |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Email skirov@utk.edu |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 APPENDIX |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Let the code begin... |
|
77
|
|
|
|
|
|
|
package Bio::Matrix::PSM::IO::masta; |
|
78
|
1
|
|
|
1
|
|
343
|
use Bio::Matrix::PSM::SiteMatrix; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
25
|
|
|
79
|
1
|
|
|
1
|
|
5
|
use vars qw(@HEADER); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
40
|
|
|
80
|
1
|
|
|
1
|
|
4
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
18
|
|
|
81
|
|
|
|
|
|
|
|
|
82
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::Matrix::PSM::IO Bio::Root::Root); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
1281
|
|
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head2 new |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Title : new |
|
89
|
|
|
|
|
|
|
Usage : my $psmIO = Bio::Matrix::PSM::IO->new(-format=> 'masta', |
|
90
|
|
|
|
|
|
|
-file => $file, |
|
91
|
|
|
|
|
|
|
-mtype => 'PWM'); |
|
92
|
|
|
|
|
|
|
Function: Associates a file with the appropriate parser |
|
93
|
|
|
|
|
|
|
Throws : |
|
94
|
|
|
|
|
|
|
Example : |
|
95
|
|
|
|
|
|
|
Args : hash |
|
96
|
|
|
|
|
|
|
Returns : "Bio::Matrix::PSM::$format"->new(@args); |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=cut |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub new { |
|
101
|
3
|
|
|
3
|
1
|
5
|
my($class, @args)=@_; |
|
102
|
3
|
|
|
|
|
12
|
my $self = $class->SUPER::new(@args); |
|
103
|
3
|
|
|
|
|
8
|
my ($file)=$self->_rearrange(['FILE'], @args); |
|
104
|
3
|
|
|
|
|
8
|
my ($query,$tr1)=split(/\./,$file,2); |
|
105
|
3
|
|
|
|
|
5
|
$self->{file} = $file; |
|
106
|
3
|
|
|
|
|
3
|
$self->{_end} = 0; |
|
107
|
3
|
|
50
|
|
|
7
|
$self->{mtype} = uc($self->_rearrange(['MTYPE'], @args) || "PFM"); |
|
108
|
3
|
50
|
|
|
|
8
|
$self->_initialize_io(@args) || $self->warn("Did you intend to use STDIN?"); #Read only for now |
|
109
|
3
|
|
|
|
|
15
|
return $self; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 write_psm |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Title : write_psm |
|
115
|
|
|
|
|
|
|
Usage : |
|
116
|
|
|
|
|
|
|
Function: writes a pfm/pwm/raw sequence in a simple masta format |
|
117
|
|
|
|
|
|
|
Throws : |
|
118
|
|
|
|
|
|
|
Example : |
|
119
|
|
|
|
|
|
|
Args : SiteMatrix object, type (optional string: PWM, SEQ or PFM) |
|
120
|
|
|
|
|
|
|
Returns : |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub write_psm { |
|
125
|
3
|
|
|
3
|
1
|
1308
|
my ($self,$matrix,$type)=@_; |
|
126
|
3
|
50
|
|
|
|
9
|
$self->{mtype} = uc($type) if ($type); |
|
127
|
3
|
|
|
|
|
5
|
my $idline=">". $matrix->id . "\n"; |
|
128
|
3
|
|
|
|
|
10
|
$self->_print($idline); |
|
129
|
3
|
100
|
|
|
|
8
|
unless ($self->{mtype} eq 'SEQ') { |
|
130
|
2
|
|
|
|
|
4
|
while (my %h=$matrix->next_pos) { |
|
131
|
50
|
100
|
|
|
|
148
|
my $row=$self->{mtype} eq 'PWM' ? join("\t",$h{lA},$h{lC},$h{lG},$h{lT},"\n"):join("\t",$h{pA},$h{pC},$h{pG},$h{pT},"\n"); |
|
132
|
50
|
|
|
|
|
59
|
$self->_print ($row); |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} else { |
|
135
|
1
|
|
|
|
|
1
|
my @seq; |
|
136
|
1
|
|
|
|
|
3
|
while (my %h=$matrix->next_pos) { |
|
137
|
25
|
|
|
|
|
27
|
my ($a,$c,$g,$t)=_freq_to_count(\%h); |
|
138
|
25
|
50
|
|
|
|
41
|
$self->throw("Could not convert from frequency to count\n") if (($a+$c+$g+$t) !=10); |
|
139
|
25
|
|
|
|
|
27
|
for my $i (0..$a-1) {$seq[$i].='A';} |
|
|
97
|
|
|
|
|
61
|
|
|
140
|
25
|
|
|
|
|
19
|
my $m=$a+$c; |
|
141
|
25
|
|
|
|
|
23
|
for my $i ($a..$m-1) {$seq[$i].='C';} |
|
|
110
|
|
|
|
|
74
|
|
|
142
|
25
|
|
|
|
|
16
|
my $n=$a+$c+$g; |
|
143
|
25
|
|
|
|
|
23
|
for my $i ($m..$n-1) {$seq[$i].='G';} |
|
|
20
|
|
|
|
|
15
|
|
|
144
|
25
|
|
|
|
|
63
|
for my $i ($n..9) {$seq[$i].='T';} |
|
|
23
|
|
|
|
|
22
|
|
|
145
|
|
|
|
|
|
|
} |
|
146
|
1
|
|
|
|
|
2
|
foreach my $s (@seq) { |
|
147
|
10
|
|
|
|
|
4
|
$s.="\n"; |
|
148
|
10
|
|
|
|
|
12
|
$self->_print ($s); |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 next_matrix |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Title : next_matrix |
|
156
|
|
|
|
|
|
|
Usage : my $matrix = $psmio->next_matrix; |
|
157
|
|
|
|
|
|
|
Function: Alias of next_psm function |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=cut |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub next_matrix { |
|
162
|
7
|
|
|
7
|
1
|
1320
|
shift->next_psm(@_); |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head2 next_psm |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Title : next_psm |
|
168
|
|
|
|
|
|
|
Usage : my $matrix=$psmio->next_psm; |
|
169
|
|
|
|
|
|
|
Function: returns the next matrix in the stream |
|
170
|
|
|
|
|
|
|
Throws : If there is you mix different types, for example weights and |
|
171
|
|
|
|
|
|
|
frequencies occur in the same entry You can mix weights, but these |
|
172
|
|
|
|
|
|
|
should be designated by different ID lines |
|
173
|
|
|
|
|
|
|
Example : |
|
174
|
|
|
|
|
|
|
Args : |
|
175
|
|
|
|
|
|
|
Returns : Bio::Matrix::PSM::SiteMatrix |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub next_psm { |
|
180
|
7
|
|
|
7
|
1
|
6
|
my $self=shift; |
|
181
|
7
|
100
|
|
|
|
13
|
return if ($self->{_end}); |
|
182
|
6
|
|
|
|
|
18
|
my $line=$self->_readline; |
|
183
|
6
|
50
|
|
|
|
21
|
$self->throw("No ID line- wrong format\n") unless ($line=~/^>/); |
|
184
|
6
|
|
|
|
|
18
|
my ($id,$desc)=split(/[\t\s]+/,$line,2); |
|
185
|
6
|
|
|
|
|
11
|
$id=~s/>//; |
|
186
|
6
|
|
|
|
|
5
|
my ($mtype,$format,@mdata,$len); |
|
187
|
6
|
|
|
|
|
7
|
$self->{_mtype} = 0; |
|
188
|
6
|
|
|
|
|
11
|
while ($line=$self->_readline) { |
|
189
|
123
|
100
|
|
|
|
259
|
next if $line =~ /^\s+$/;# There should not be empty lines, but just in case... |
|
190
|
122
|
|
|
|
|
87
|
chomp $line; |
|
191
|
122
|
100
|
|
|
|
140
|
if ($line =~ /^>/) { |
|
192
|
4
|
|
|
|
|
9
|
$self->_pushback($line); |
|
193
|
4
|
|
|
|
|
4
|
last; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
118
|
100
|
|
|
|
198
|
if ($line !~ /[^ACGTacgt]/g) { |
|
197
|
|
|
|
|
|
|
# This is a set of aligned sequences |
|
198
|
|
|
|
|
|
|
$self->throw("Mixing between types is not allowed or a parsing error occured\n") |
|
199
|
18
|
50
|
66
|
|
|
31
|
if (($self->{_mtype} != 3) && ($mtype)) ; |
|
200
|
18
|
50
|
66
|
|
|
48
|
$self->throw("Bad sequence- different length: $line\n") |
|
201
|
|
|
|
|
|
|
if (($len) && ($len!=length($line))); |
|
202
|
18
|
100
|
|
|
|
20
|
$len=length($line) unless ($len); |
|
203
|
18
|
|
|
|
|
17
|
push @mdata,$line; |
|
204
|
18
|
|
|
|
|
26
|
$self->{_mtype}=3; |
|
205
|
|
|
|
|
|
|
} else { |
|
206
|
|
|
|
|
|
|
# do not strip 'e's since they are part of number notation for small/big numbers |
|
207
|
100
|
|
|
|
|
93
|
$line=~s/[a-df-zA-DF-Z]//g; #Well we may wanna do a hash and auto check for letter order if there is a really boring talk... |
|
208
|
100
|
|
|
|
|
129
|
$line=~s/^[\s\t]+//; |
|
209
|
100
|
|
|
|
|
278
|
$line=~s/[\s\t]+/\t/g; |
|
210
|
100
|
|
|
|
|
286
|
my @data=split(/[\s\t]+/,$line); |
|
211
|
100
|
50
|
|
|
|
106
|
if ($#data==3) { |
|
212
|
100
|
50
|
66
|
|
|
273
|
$self->throw("Mixing between types is not allowed or a parsing error occured\n") if (($mtype)&&($self->{_mtype} !=1)) ; |
|
213
|
100
|
|
|
|
|
66
|
$self->{_mtype}=1; |
|
214
|
100
|
|
|
|
|
76
|
$mtype=1; |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
else { |
|
217
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Mixing between types is not allowedor a parsing error occured\n") if (($mtype)&&($self->{_mtype} !=2)) ; |
|
218
|
0
|
|
|
|
|
0
|
$self->{_mtype}=2; |
|
219
|
0
|
|
|
|
|
0
|
$mtype=1; |
|
220
|
|
|
|
|
|
|
} |
|
221
|
100
|
|
|
|
|
189
|
push @mdata,\@data; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
} |
|
224
|
6
|
100
|
66
|
|
|
20
|
$self->{_end} = 1 if (!defined $line || $line !~ /^>/); |
|
225
|
6
|
|
|
|
|
11
|
return _make_matrix(\@mdata,$self->{_mtype},$id,$desc); |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub _make_matrix { |
|
229
|
6
|
|
|
6
|
|
7
|
my ($mdata,$type,$id,$desc)=@_; |
|
230
|
6
|
100
|
|
|
|
11
|
if ($type==1) { |
|
231
|
4
|
|
|
|
|
8
|
my @rearr=_rearrange_matrix($mdata); |
|
232
|
4
|
|
|
|
|
5
|
$mdata=\@rearr; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
#Auto recognition for what type is this entry (PFM, PWM or simple count) |
|
235
|
|
|
|
|
|
|
#A bit dangerous, I hate too much auto stuff, but I want to be able to mix different |
|
236
|
|
|
|
|
|
|
#types in a single file |
|
237
|
6
|
|
|
|
|
4
|
my $mformat='count'; |
|
238
|
6
|
|
|
|
|
51
|
my ($a,$c,$g,$t); |
|
239
|
6
|
100
|
|
|
|
8
|
if ($type == 3 ) { |
|
240
|
2
|
|
|
|
|
3
|
($a,$c,$g,$t)= &_count_positions($mdata); |
|
241
|
|
|
|
|
|
|
} else { |
|
242
|
4
|
|
|
|
|
4
|
($a,$c,$g,$t)=@{$mdata}; |
|
|
4
|
|
|
|
|
5
|
|
|
243
|
4
|
|
|
|
|
13
|
my $k=$a->[0]+$c->[0]+$g->[0]+$t->[0]; |
|
244
|
4
|
|
|
|
|
10
|
my $l= ($a->[0]+$c->[0]+$g->[0]+$t->[0]) - |
|
245
|
|
|
|
|
|
|
(abs($a->[0])+abs($c->[0])+abs($g->[0])+abs($t->[0])); |
|
246
|
4
|
100
|
66
|
|
|
13
|
$mformat='freq' if (($k==1) && ($l==0)); |
|
247
|
4
|
100
|
|
|
|
8
|
$mformat='pwm' if ($l!=0); |
|
248
|
|
|
|
|
|
|
} |
|
249
|
6
|
|
|
|
|
4
|
my (@fa,@fc,@fg,@ft,%mparam); |
|
250
|
|
|
|
|
|
|
|
|
251
|
6
|
100
|
|
|
|
12
|
if ($mformat eq 'pwm') { |
|
252
|
2
|
|
|
|
|
1
|
foreach my $i (0..$#{$a}) { |
|
|
2
|
|
|
|
|
4
|
|
|
253
|
50
|
|
|
|
|
47
|
my $ca=exp $a->[$i]; |
|
254
|
50
|
|
|
|
|
45
|
my $cc=exp $c->[$i]; |
|
255
|
50
|
|
|
|
|
41
|
my $cg=exp $g->[$i]; |
|
256
|
50
|
|
|
|
|
39
|
my $ct=exp $t->[$i]; |
|
257
|
50
|
|
|
|
|
35
|
my $all=$ca+$cc+$cg+$ct; |
|
258
|
50
|
|
|
|
|
39
|
push @fa,($ca/$all)*100; |
|
259
|
50
|
|
|
|
|
41
|
push @fc,($cc/$all)*100; |
|
260
|
50
|
|
|
|
|
47
|
push @fg,($cg/$all)*100; |
|
261
|
50
|
|
|
|
|
42
|
push @ft,($ct/$all)*100; |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
} |
|
264
|
6
|
|
|
|
|
9
|
$desc.=", source is $mformat"; |
|
265
|
6
|
100
|
|
|
|
7
|
if ($mformat eq 'pwm') { |
|
266
|
2
|
|
|
|
|
3
|
$desc=~s/^pwm//; |
|
267
|
2
|
|
|
|
|
15
|
%mparam=(-pA=>\@fa,-pC=>\@fc,-pG=>\@fg,-pT=>\@ft,-id=>$id,-desc=>$desc, |
|
268
|
|
|
|
|
|
|
-lA=>$a,-lC=>$c,-lG=>$g,-lT=>$t); |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
else { |
|
271
|
4
|
|
|
|
|
18
|
%mparam=(-pA=>$a,-pC=>$c,-pG=>$g,-pT=>$t,-id=>$id,-desc=>$desc); |
|
272
|
|
|
|
|
|
|
} |
|
273
|
6
|
|
|
|
|
27
|
return new Bio::Matrix::PSM::SiteMatrix(%mparam); |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub _rearrange_matrix { |
|
277
|
4
|
|
|
4
|
|
5
|
my $mdata=shift; |
|
278
|
4
|
|
|
|
|
2
|
my (@a,@c,@g,@t); |
|
279
|
4
|
|
|
|
|
4
|
foreach my $entry (@{$mdata}) { |
|
|
4
|
|
|
|
|
6
|
|
|
280
|
100
|
|
|
|
|
136
|
my ($a,$c,$g,$t)=@$entry; |
|
281
|
100
|
|
|
|
|
85
|
push @a,$a; |
|
282
|
100
|
|
|
|
|
115
|
push @c,$c; |
|
283
|
100
|
|
|
|
|
70
|
push @g,$g; |
|
284
|
100
|
|
|
|
|
87
|
push @t,$t; |
|
285
|
|
|
|
|
|
|
} |
|
286
|
4
|
|
|
|
|
10
|
return \@a,\@c,\@g,\@t; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub _count_positions { |
|
291
|
2
|
|
|
2
|
|
3
|
my $seq=shift; |
|
292
|
2
|
|
|
|
|
2
|
my %pos; |
|
293
|
2
|
|
|
|
|
3
|
my $l=length($seq->[0])-1; |
|
294
|
2
|
|
|
|
|
6
|
for( my $i = 0; $i <= $l; $i++ ) { |
|
295
|
50
|
|
|
|
|
41
|
for ( qw(A C G T) ) { |
|
296
|
200
|
|
|
|
|
181
|
$pos{$_}->[$i] = 0; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
} |
|
299
|
2
|
|
|
|
|
2
|
foreach my $sequence (@{$seq}) { |
|
|
2
|
|
|
|
|
3
|
|
|
300
|
18
|
|
|
|
|
40
|
my @let= split(//,$sequence); |
|
301
|
18
|
|
|
|
|
20
|
for my $i (0..$#let) { |
|
302
|
450
|
|
|
|
|
331
|
$pos{uc($let[$i])}->[$i]++; |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
} |
|
305
|
2
|
|
|
|
|
6
|
return $pos{A},$pos{C},$pos{G},$pos{T}; |
|
306
|
|
|
|
|
|
|
} |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
sub _freq_to_count { |
|
310
|
25
|
|
|
25
|
|
15
|
my $h=shift; |
|
311
|
25
|
|
|
|
|
28
|
my $a=int(10*$h->{pA}+0.5); |
|
312
|
25
|
|
|
|
|
25
|
my $c=int(10*$h->{pC}+0.5); |
|
313
|
25
|
|
|
|
|
18
|
my $g=int(10*$h->{pG}+0.5); |
|
314
|
25
|
|
|
|
|
17
|
my $t=int(10*$h->{pT}+0.5); |
|
315
|
25
|
|
|
|
|
25
|
return ($a,$c,$g,$t); |
|
316
|
|
|
|
|
|
|
} |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
1; |