| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# ChromBed - A class for extracting short read information for one chromosome. |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
34
|
|
|
6
|
1
|
|
|
1
|
|
26
|
use 5.006; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
49
|
|
|
7
|
|
|
|
|
|
|
package MyShortRead::ChromBed; |
|
8
|
1
|
|
|
1
|
|
5
|
use POSIX qw(ceil); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
8
|
|
|
9
|
1
|
|
|
1
|
|
65
|
use MyShortRead::MyShortRead qw(bin_genome_count bin_genome_count_direction); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
48
|
|
|
10
|
1
|
|
|
1
|
|
6
|
use MyBioinfo::Common qw(mean median); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
1590
|
|
|
11
|
|
|
|
|
|
|
our $VERSION = '1.00'; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Class constructor. |
|
14
|
|
|
|
|
|
|
sub new{ |
|
15
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
|
16
|
0
|
|
|
|
|
|
my $self = {}; |
|
17
|
0
|
|
|
|
|
|
$self->{chrom_name} = undef; |
|
18
|
0
|
|
|
|
|
|
$self->{chrom_len} = undef; |
|
19
|
0
|
|
|
|
|
|
$self->{file_name} = undef; # chromosome file name. |
|
20
|
0
|
|
|
|
|
|
$self->{bin_num} = undef; # number of bins for the chromosome. |
|
21
|
0
|
|
|
|
|
|
$self->{bin_count} = []; # vector for bin read count. |
|
22
|
0
|
|
|
|
|
|
$self->{bin_count_F} = []; # bin Forward strand read count. |
|
23
|
0
|
|
|
|
|
|
$self->{bin_count_R} = []; # bin Reverse strand read count. |
|
24
|
0
|
|
|
|
|
|
$self->{window_num} = undef; # number of sliding windows. |
|
25
|
0
|
|
|
|
|
|
$self->{window_count} = []; # vector for window read count. |
|
26
|
0
|
|
|
|
|
|
$self->{window_count_F} = []; # window read count for Forward strand. |
|
27
|
0
|
|
|
|
|
|
$self->{window_count_R} = []; # window read count for Reverse strand. |
|
28
|
0
|
|
|
|
|
|
bless($self, $class); |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# User supplied chromosome name, length and file name. |
|
31
|
0
|
0
|
|
|
|
|
if(@_ >= 3){ |
|
32
|
0
|
|
|
|
|
|
$self->{chrom_name} = $_[0]; |
|
33
|
0
|
|
|
|
|
|
$self->{chrom_len} = $_[1]; |
|
34
|
0
|
|
|
|
|
|
$self->{file_name} = $_[2]; |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
return $self; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Set chromosome information: name, length and file name. |
|
41
|
|
|
|
|
|
|
# Clean memory data. |
|
42
|
|
|
|
|
|
|
sub set_chr_info{ |
|
43
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
44
|
0
|
0
|
|
|
|
|
if(@_ >= 3){ |
|
45
|
0
|
|
|
|
|
|
$self->{chrom_name} = $_[0]; |
|
46
|
0
|
|
|
|
|
|
$self->{chrom_len} = $_[1]; |
|
47
|
0
|
|
|
|
|
|
$self->{file_name} = $_[2]; |
|
48
|
0
|
|
|
|
|
|
$self->{bin_num} = undef; |
|
49
|
0
|
|
|
|
|
|
$self->{bin_count} = []; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# Create bin count vector for the specified chromosome file. |
|
54
|
|
|
|
|
|
|
sub do_bin_count{ |
|
55
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
56
|
0
|
0
|
|
|
|
|
if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
57
|
0
|
0
|
0
|
|
|
|
if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){ |
|
|
|
|
0
|
|
|
|
|
|
58
|
0
|
|
|
|
|
|
warn "Chromosome information missing! Cannot continue. Exit.\n"; |
|
59
|
0
|
|
|
|
|
|
return; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
0
|
|
|
|
|
|
my($bin_size,$frag_size) = @_; |
|
62
|
|
|
|
|
|
|
# Read bin size and calculate bin number. |
|
63
|
0
|
|
|
|
|
|
$self->{bin_num} = ceil($self->{chrom_len} / $bin_size); |
|
64
|
|
|
|
|
|
|
# Create bin count vector. |
|
65
|
0
|
|
|
|
|
|
bin_genome_count($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count}); |
|
66
|
|
|
|
|
|
|
} |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Create bin count with direction for the specified chromosome file. |
|
69
|
|
|
|
|
|
|
sub do_bin_count_direction{ |
|
70
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
71
|
0
|
0
|
|
|
|
|
if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
72
|
0
|
0
|
0
|
|
|
|
if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){ |
|
|
|
|
0
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
warn "Chromosome information missing! Cannot continue. Exit.\n"; |
|
74
|
0
|
|
|
|
|
|
return; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
0
|
|
|
|
|
|
my($bin_size,$frag_size) = @_; |
|
77
|
|
|
|
|
|
|
# Read bin size and calculate bin number. |
|
78
|
0
|
|
|
|
|
|
$self->{bin_num} = ceil($self->{chrom_len} / $bin_size); |
|
79
|
|
|
|
|
|
|
# Create bin count vector. |
|
80
|
0
|
|
|
|
|
|
bin_genome_count_direction($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count_F},$self->{bin_count_R}); |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Smooth bin count signals by applying window functions. |
|
84
|
|
|
|
|
|
|
sub smooth_bin_count{ |
|
85
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
86
|
0
|
0
|
|
|
|
|
if(@_ < 2) {warn "Smoothing width or function not specified. Exit.\n"; return;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
87
|
0
|
0
|
|
|
|
|
if(!defined $self->{bin_count}){ |
|
88
|
0
|
|
|
|
|
|
warn "Bin count vector not exists. Do bin count first! Exit.\n"; |
|
89
|
0
|
|
|
|
|
|
return; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
0
|
|
|
|
|
|
my($smooth_width,$fun_str) = @_; |
|
92
|
0
|
0
|
|
|
|
|
if($smooth_width <= 1){ |
|
93
|
0
|
|
|
|
|
|
warn "Smooth width must be larger than one. Exit.\n"; |
|
94
|
0
|
|
|
|
|
|
return; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
0
|
0
|
|
|
|
|
if($smooth_width % 2 != 1){ |
|
97
|
0
|
|
|
|
|
|
warn "Smooth width must be odd number. Convert it to nearest odd number.\n"; |
|
98
|
0
|
|
|
|
|
|
$smooth_width++; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
0
|
|
|
|
|
|
my $half = ($smooth_width-1)/2; |
|
101
|
0
|
0
|
|
|
|
|
if($self->{bin_num} <= $half){ |
|
102
|
0
|
|
|
|
|
|
warn "Bin vector too small to calculate smoothed signals.\n"; |
|
103
|
0
|
|
|
|
|
|
return; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
0
|
|
|
|
|
|
my @mov_arr; # Array containing bin counts of moving window. |
|
106
|
|
|
|
|
|
|
# Build the initial moving array. |
|
107
|
0
|
|
|
|
|
|
for my $i(0..$half) {push @mov_arr, $self->{bin_count}[$i];} |
|
|
0
|
|
|
|
|
|
|
|
108
|
0
|
0
|
|
|
|
|
if($fun_str eq 'mean') {$self->{bin_count}[0] = mean(@mov_arr);} |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
elsif($fun_str eq 'median') {$self->{bin_count}[0] = median(@mov_arr);} |
|
110
|
|
|
|
|
|
|
# Iteratively updating moving array and calculate smoothed signals. |
|
111
|
0
|
|
|
|
|
|
for(my $i = 1; $i < $self->{bin_num}; $i++){ |
|
112
|
0
|
0
|
|
|
|
|
if($i - $half > 0) {shift @mov_arr;} # Remove the left element. |
|
|
0
|
|
|
|
|
|
|
|
113
|
0
|
0
|
|
|
|
|
if($i + $half < $self->{bin_num}) {push @mov_arr, $self->{bin_count}[$i+$half];} # Add new element to the right. |
|
|
0
|
|
|
|
|
|
|
|
114
|
0
|
0
|
|
|
|
|
if($fun_str eq 'mean') {$self->{bin_count}[$i] = mean(@mov_arr);} |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
elsif($fun_str eq 'median') {$self->{bin_count}[$i] = median(@mov_arr);} |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# Create sliding window count vector. |
|
120
|
|
|
|
|
|
|
sub create_window_count{ |
|
121
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
122
|
0
|
0
|
|
|
|
|
if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
my $bins_per_window = shift; |
|
124
|
0
|
0
|
|
|
|
|
if(@{$self->{bin_count}} < $bins_per_window){ |
|
|
0
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
warn "Bin count vector not long enough to calculate window count.\n"; |
|
126
|
0
|
|
|
|
|
|
return; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
# Initialize window count vector. |
|
129
|
0
|
|
|
|
|
|
$self->{window_num} = $self->{bin_num} - $bins_per_window + 1; |
|
130
|
0
|
|
|
|
|
|
$#{$self->{window_count}} = $self->{window_num} - 1; |
|
|
0
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# $mov_window_count is the moving window count. |
|
132
|
0
|
|
|
|
|
|
my $mov_window_count = 0; |
|
133
|
0
|
|
|
|
|
|
for my $i(0..$bins_per_window-1) {$mov_window_count += $self->{bin_count}[$i];} |
|
|
0
|
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
$self->{window_count}[0] = $mov_window_count; |
|
135
|
|
|
|
|
|
|
# $i is the index to current window. |
|
136
|
0
|
|
|
|
|
|
for(my $i=1; $i < $self->{window_num}; $i++){ |
|
137
|
0
|
|
|
|
|
|
$mov_window_count = $mov_window_count - $self->{bin_count}[$i-1] + $self->{bin_count}[$i-1+$bins_per_window]; |
|
138
|
0
|
|
|
|
|
|
$self->{window_count}[$i] = $mov_window_count; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Create sliding window count vector with direction. |
|
143
|
|
|
|
|
|
|
sub create_window_count_direction{ |
|
144
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
145
|
0
|
0
|
|
|
|
|
if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
my $bins_per_window = shift; |
|
147
|
0
|
0
|
0
|
|
|
|
if(@{$self->{bin_count_F}} < $bins_per_window or @{$self->{bin_count_R}} < $bins_per_window){ |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
warn "Bin count vector not long enough to calculate window count.\n"; |
|
149
|
0
|
|
|
|
|
|
return; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
# Initialize window count vector. |
|
152
|
0
|
|
|
|
|
|
$self->{window_num} = $self->{bin_num} - $bins_per_window + 1; |
|
153
|
0
|
|
|
|
|
|
$#{$self->{window_count_F}} = $self->{window_num} - 1; |
|
|
0
|
|
|
|
|
|
|
|
154
|
0
|
|
|
|
|
|
$#{$self->{window_count_R}} = $self->{window_num} - 1; |
|
|
0
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# $mov_window_count is the moving window count. |
|
156
|
0
|
|
|
|
|
|
my $mov_window_count_F = 0; |
|
157
|
0
|
|
|
|
|
|
my $mov_window_count_R = 0; |
|
158
|
0
|
|
|
|
|
|
for my $i(0..$bins_per_window-1) {$mov_window_count_F += $self->{bin_count_F}[$i];} |
|
|
0
|
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
for my $i(0..$bins_per_window-1) {$mov_window_count_R += $self->{bin_count_R}[$i];} |
|
|
0
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
$self->{window_count_F}[0] = $mov_window_count_F; |
|
161
|
0
|
|
|
|
|
|
$self->{window_count_R}[0] = $mov_window_count_R; |
|
162
|
|
|
|
|
|
|
# $i is the index to current window. |
|
163
|
0
|
|
|
|
|
|
for(my $i=1; $i < $self->{window_num}; $i++){ |
|
164
|
0
|
|
|
|
|
|
$mov_window_count_F = $mov_window_count_F - $self->{bin_count_F}[$i-1] + $self->{bin_count_F}[$i-1+$bins_per_window]; |
|
165
|
0
|
|
|
|
|
|
$mov_window_count_R = $mov_window_count_R - $self->{bin_count_R}[$i-1] + $self->{bin_count_R}[$i-1+$bins_per_window]; |
|
166
|
0
|
|
|
|
|
|
$self->{window_count_F}[$i] = $mov_window_count_F; |
|
167
|
0
|
|
|
|
|
|
$self->{window_count_R}[$i] = $mov_window_count_R; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Delete chromosome file but keep memory data. |
|
172
|
|
|
|
|
|
|
sub del_file{ |
|
173
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
174
|
0
|
0
|
|
|
|
|
if(defined $self->{file_name}){ |
|
175
|
0
|
|
|
|
|
|
my $fn = $self->{file_name}; |
|
176
|
0
|
0
|
|
|
|
|
unlink $fn or warn "Cannot delete file $fn: $!\n"; |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
1; |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
__END__ |