line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# ChromBed - A class for extracting short read information for one chromosome. |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
6
|
1
|
|
|
1
|
|
26
|
use 5.006; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
49
|
|
7
|
|
|
|
|
|
|
package MyShortRead::ChromBed; |
8
|
1
|
|
|
1
|
|
5
|
use POSIX qw(ceil); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
9
|
1
|
|
|
1
|
|
65
|
use MyShortRead::MyShortRead qw(bin_genome_count bin_genome_count_direction); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
48
|
|
10
|
1
|
|
|
1
|
|
6
|
use MyBioinfo::Common qw(mean median); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
1590
|
|
11
|
|
|
|
|
|
|
our $VERSION = '1.00'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# Class constructor. |
14
|
|
|
|
|
|
|
sub new{ |
15
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
16
|
0
|
|
|
|
|
|
my $self = {}; |
17
|
0
|
|
|
|
|
|
$self->{chrom_name} = undef; |
18
|
0
|
|
|
|
|
|
$self->{chrom_len} = undef; |
19
|
0
|
|
|
|
|
|
$self->{file_name} = undef; # chromosome file name. |
20
|
0
|
|
|
|
|
|
$self->{bin_num} = undef; # number of bins for the chromosome. |
21
|
0
|
|
|
|
|
|
$self->{bin_count} = []; # vector for bin read count. |
22
|
0
|
|
|
|
|
|
$self->{bin_count_F} = []; # bin Forward strand read count. |
23
|
0
|
|
|
|
|
|
$self->{bin_count_R} = []; # bin Reverse strand read count. |
24
|
0
|
|
|
|
|
|
$self->{window_num} = undef; # number of sliding windows. |
25
|
0
|
|
|
|
|
|
$self->{window_count} = []; # vector for window read count. |
26
|
0
|
|
|
|
|
|
$self->{window_count_F} = []; # window read count for Forward strand. |
27
|
0
|
|
|
|
|
|
$self->{window_count_R} = []; # window read count for Reverse strand. |
28
|
0
|
|
|
|
|
|
bless($self, $class); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# User supplied chromosome name, length and file name. |
31
|
0
|
0
|
|
|
|
|
if(@_ >= 3){ |
32
|
0
|
|
|
|
|
|
$self->{chrom_name} = $_[0]; |
33
|
0
|
|
|
|
|
|
$self->{chrom_len} = $_[1]; |
34
|
0
|
|
|
|
|
|
$self->{file_name} = $_[2]; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
return $self; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Set chromosome information: name, length and file name. |
41
|
|
|
|
|
|
|
# Clean memory data. |
42
|
|
|
|
|
|
|
sub set_chr_info{ |
43
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
44
|
0
|
0
|
|
|
|
|
if(@_ >= 3){ |
45
|
0
|
|
|
|
|
|
$self->{chrom_name} = $_[0]; |
46
|
0
|
|
|
|
|
|
$self->{chrom_len} = $_[1]; |
47
|
0
|
|
|
|
|
|
$self->{file_name} = $_[2]; |
48
|
0
|
|
|
|
|
|
$self->{bin_num} = undef; |
49
|
0
|
|
|
|
|
|
$self->{bin_count} = []; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# Create bin count vector for the specified chromosome file. |
54
|
|
|
|
|
|
|
sub do_bin_count{ |
55
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
56
|
0
|
0
|
|
|
|
|
if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
57
|
0
|
0
|
0
|
|
|
|
if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){ |
|
|
|
0
|
|
|
|
|
58
|
0
|
|
|
|
|
|
warn "Chromosome information missing! Cannot continue. Exit.\n"; |
59
|
0
|
|
|
|
|
|
return; |
60
|
|
|
|
|
|
|
} |
61
|
0
|
|
|
|
|
|
my($bin_size,$frag_size) = @_; |
62
|
|
|
|
|
|
|
# Read bin size and calculate bin number. |
63
|
0
|
|
|
|
|
|
$self->{bin_num} = ceil($self->{chrom_len} / $bin_size); |
64
|
|
|
|
|
|
|
# Create bin count vector. |
65
|
0
|
|
|
|
|
|
bin_genome_count($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count}); |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Create bin count with direction for the specified chromosome file. |
69
|
|
|
|
|
|
|
sub do_bin_count_direction{ |
70
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
71
|
0
|
0
|
|
|
|
|
if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
72
|
0
|
0
|
0
|
|
|
|
if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){ |
|
|
|
0
|
|
|
|
|
73
|
0
|
|
|
|
|
|
warn "Chromosome information missing! Cannot continue. Exit.\n"; |
74
|
0
|
|
|
|
|
|
return; |
75
|
|
|
|
|
|
|
} |
76
|
0
|
|
|
|
|
|
my($bin_size,$frag_size) = @_; |
77
|
|
|
|
|
|
|
# Read bin size and calculate bin number. |
78
|
0
|
|
|
|
|
|
$self->{bin_num} = ceil($self->{chrom_len} / $bin_size); |
79
|
|
|
|
|
|
|
# Create bin count vector. |
80
|
0
|
|
|
|
|
|
bin_genome_count_direction($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count_F},$self->{bin_count_R}); |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# Smooth bin count signals by applying window functions. |
84
|
|
|
|
|
|
|
sub smooth_bin_count{ |
85
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
86
|
0
|
0
|
|
|
|
|
if(@_ < 2) {warn "Smoothing width or function not specified. Exit.\n"; return;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
87
|
0
|
0
|
|
|
|
|
if(!defined $self->{bin_count}){ |
88
|
0
|
|
|
|
|
|
warn "Bin count vector not exists. Do bin count first! Exit.\n"; |
89
|
0
|
|
|
|
|
|
return; |
90
|
|
|
|
|
|
|
} |
91
|
0
|
|
|
|
|
|
my($smooth_width,$fun_str) = @_; |
92
|
0
|
0
|
|
|
|
|
if($smooth_width <= 1){ |
93
|
0
|
|
|
|
|
|
warn "Smooth width must be larger than one. Exit.\n"; |
94
|
0
|
|
|
|
|
|
return; |
95
|
|
|
|
|
|
|
} |
96
|
0
|
0
|
|
|
|
|
if($smooth_width % 2 != 1){ |
97
|
0
|
|
|
|
|
|
warn "Smooth width must be odd number. Convert it to nearest odd number.\n"; |
98
|
0
|
|
|
|
|
|
$smooth_width++; |
99
|
|
|
|
|
|
|
} |
100
|
0
|
|
|
|
|
|
my $half = ($smooth_width-1)/2; |
101
|
0
|
0
|
|
|
|
|
if($self->{bin_num} <= $half){ |
102
|
0
|
|
|
|
|
|
warn "Bin vector too small to calculate smoothed signals.\n"; |
103
|
0
|
|
|
|
|
|
return; |
104
|
|
|
|
|
|
|
} |
105
|
0
|
|
|
|
|
|
my @mov_arr; # Array containing bin counts of moving window. |
106
|
|
|
|
|
|
|
# Build the initial moving array. |
107
|
0
|
|
|
|
|
|
for my $i(0..$half) {push @mov_arr, $self->{bin_count}[$i];} |
|
0
|
|
|
|
|
|
|
108
|
0
|
0
|
|
|
|
|
if($fun_str eq 'mean') {$self->{bin_count}[0] = mean(@mov_arr);} |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
elsif($fun_str eq 'median') {$self->{bin_count}[0] = median(@mov_arr);} |
110
|
|
|
|
|
|
|
# Iteratively updating moving array and calculate smoothed signals. |
111
|
0
|
|
|
|
|
|
for(my $i = 1; $i < $self->{bin_num}; $i++){ |
112
|
0
|
0
|
|
|
|
|
if($i - $half > 0) {shift @mov_arr;} # Remove the left element. |
|
0
|
|
|
|
|
|
|
113
|
0
|
0
|
|
|
|
|
if($i + $half < $self->{bin_num}) {push @mov_arr, $self->{bin_count}[$i+$half];} # Add new element to the right. |
|
0
|
|
|
|
|
|
|
114
|
0
|
0
|
|
|
|
|
if($fun_str eq 'mean') {$self->{bin_count}[$i] = mean(@mov_arr);} |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
elsif($fun_str eq 'median') {$self->{bin_count}[$i] = median(@mov_arr);} |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# Create sliding window count vector. |
120
|
|
|
|
|
|
|
sub create_window_count{ |
121
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
122
|
0
|
0
|
|
|
|
|
if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
my $bins_per_window = shift; |
124
|
0
|
0
|
|
|
|
|
if(@{$self->{bin_count}} < $bins_per_window){ |
|
0
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
warn "Bin count vector not long enough to calculate window count.\n"; |
126
|
0
|
|
|
|
|
|
return; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
# Initialize window count vector. |
129
|
0
|
|
|
|
|
|
$self->{window_num} = $self->{bin_num} - $bins_per_window + 1; |
130
|
0
|
|
|
|
|
|
$#{$self->{window_count}} = $self->{window_num} - 1; |
|
0
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# $mov_window_count is the moving window count. |
132
|
0
|
|
|
|
|
|
my $mov_window_count = 0; |
133
|
0
|
|
|
|
|
|
for my $i(0..$bins_per_window-1) {$mov_window_count += $self->{bin_count}[$i];} |
|
0
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
$self->{window_count}[0] = $mov_window_count; |
135
|
|
|
|
|
|
|
# $i is the index to current window. |
136
|
0
|
|
|
|
|
|
for(my $i=1; $i < $self->{window_num}; $i++){ |
137
|
0
|
|
|
|
|
|
$mov_window_count = $mov_window_count - $self->{bin_count}[$i-1] + $self->{bin_count}[$i-1+$bins_per_window]; |
138
|
0
|
|
|
|
|
|
$self->{window_count}[$i] = $mov_window_count; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# Create sliding window count vector with direction. |
143
|
|
|
|
|
|
|
sub create_window_count_direction{ |
144
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
145
|
0
|
0
|
|
|
|
|
if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
my $bins_per_window = shift; |
147
|
0
|
0
|
0
|
|
|
|
if(@{$self->{bin_count_F}} < $bins_per_window or @{$self->{bin_count_R}} < $bins_per_window){ |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
warn "Bin count vector not long enough to calculate window count.\n"; |
149
|
0
|
|
|
|
|
|
return; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
# Initialize window count vector. |
152
|
0
|
|
|
|
|
|
$self->{window_num} = $self->{bin_num} - $bins_per_window + 1; |
153
|
0
|
|
|
|
|
|
$#{$self->{window_count_F}} = $self->{window_num} - 1; |
|
0
|
|
|
|
|
|
|
154
|
0
|
|
|
|
|
|
$#{$self->{window_count_R}} = $self->{window_num} - 1; |
|
0
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# $mov_window_count is the moving window count. |
156
|
0
|
|
|
|
|
|
my $mov_window_count_F = 0; |
157
|
0
|
|
|
|
|
|
my $mov_window_count_R = 0; |
158
|
0
|
|
|
|
|
|
for my $i(0..$bins_per_window-1) {$mov_window_count_F += $self->{bin_count_F}[$i];} |
|
0
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
for my $i(0..$bins_per_window-1) {$mov_window_count_R += $self->{bin_count_R}[$i];} |
|
0
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
$self->{window_count_F}[0] = $mov_window_count_F; |
161
|
0
|
|
|
|
|
|
$self->{window_count_R}[0] = $mov_window_count_R; |
162
|
|
|
|
|
|
|
# $i is the index to current window. |
163
|
0
|
|
|
|
|
|
for(my $i=1; $i < $self->{window_num}; $i++){ |
164
|
0
|
|
|
|
|
|
$mov_window_count_F = $mov_window_count_F - $self->{bin_count_F}[$i-1] + $self->{bin_count_F}[$i-1+$bins_per_window]; |
165
|
0
|
|
|
|
|
|
$mov_window_count_R = $mov_window_count_R - $self->{bin_count_R}[$i-1] + $self->{bin_count_R}[$i-1+$bins_per_window]; |
166
|
0
|
|
|
|
|
|
$self->{window_count_F}[$i] = $mov_window_count_F; |
167
|
0
|
|
|
|
|
|
$self->{window_count_R}[$i] = $mov_window_count_R; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Delete chromosome file but keep memory data. |
172
|
|
|
|
|
|
|
sub del_file{ |
173
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
174
|
0
|
0
|
|
|
|
|
if(defined $self->{file_name}){ |
175
|
0
|
|
|
|
|
|
my $fn = $self->{file_name}; |
176
|
0
|
0
|
|
|
|
|
unlink $fn or warn "Cannot delete file $fn: $!\n"; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
1; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
__END__ |