| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
1
|
|
|
1
|
|
1008
|
use 5.006; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
126
|
|
|
2
|
|
|
|
|
|
|
our $VERSION = '1.122'; |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# Class for a chromatin modification site consisting consecutive significant windows. |
|
5
|
|
|
|
|
|
|
package diffReps::ChromaModSite; |
|
6
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
285
|
|
|
7
|
1
|
|
|
1
|
|
6
|
use MyBioinfo::Common; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
126
|
|
|
8
|
1
|
|
|
1
|
|
6
|
use MyShortRead::SRBed; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
2252
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(diffReps::SlideWindow); # Inherited from 'SlideWindow'. |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Constructor. |
|
13
|
|
|
|
|
|
|
sub new{ |
|
14
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
|
15
|
0
|
|
|
|
|
|
my $self = { |
|
16
|
|
|
|
|
|
|
chrom => undef, |
|
17
|
|
|
|
|
|
|
start => undef, |
|
18
|
|
|
|
|
|
|
end => undef, |
|
19
|
|
|
|
|
|
|
siteCenter => undef, |
|
20
|
|
|
|
|
|
|
retrieved => 0, # boolean tag: whether count data have been retrieved? |
|
21
|
|
|
|
|
|
|
tr_cnt => [], # region normalized treatment counts. |
|
22
|
|
|
|
|
|
|
co_cnt => [], # region normalized control counts. |
|
23
|
|
|
|
|
|
|
tr_cnt_ => [], # normalized treatment counts using #reads. |
|
24
|
|
|
|
|
|
|
co_cnt_ => [], # normalized control counts using #reads. |
|
25
|
|
|
|
|
|
|
btr_cnt => [], # normalized background treatment counts. |
|
26
|
|
|
|
|
|
|
bco_cnt => [], # normalized background control counts. |
|
27
|
|
|
|
|
|
|
#trEnr => undef, # treatment enrichment vs. background. |
|
28
|
|
|
|
|
|
|
#coEnr => undef, # control enrichment vs. background. |
|
29
|
|
|
|
|
|
|
rsumTr => undef, # sum of raw treatment counts. |
|
30
|
|
|
|
|
|
|
rsumCo => undef, # sum of raw control counts. |
|
31
|
|
|
|
|
|
|
dirn => undef, # change direction: 'Up' or 'Down'. |
|
32
|
|
|
|
|
|
|
logFC => undef, # log2 fold change. |
|
33
|
|
|
|
|
|
|
pval => undef, # P-value. |
|
34
|
|
|
|
|
|
|
padj => undef, # adjusted P-value. |
|
35
|
|
|
|
|
|
|
winSta => undef, # best window start. |
|
36
|
|
|
|
|
|
|
#winTr => [], # window normalized treatment counts. |
|
37
|
|
|
|
|
|
|
#winCo => [], # window normalized control counts. |
|
38
|
|
|
|
|
|
|
winFC => undef, # window log2 fold change. |
|
39
|
|
|
|
|
|
|
winP => undef, # window P-value. |
|
40
|
|
|
|
|
|
|
winQ => undef # window adjusted P-value. |
|
41
|
|
|
|
|
|
|
}; |
|
42
|
0
|
|
|
|
|
|
bless $self, $class; |
|
43
|
0
|
|
|
|
|
|
return $self; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Copy member data into an anonymous hash table. |
|
47
|
|
|
|
|
|
|
sub dcopy{ |
|
48
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
49
|
0
|
|
|
|
|
|
my $new = {}; |
|
50
|
0
|
|
|
|
|
|
%{$new} = %{$self}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Deep copy array members that are array variables. |
|
52
|
0
|
|
|
|
|
|
$new->{tr_cnt} = []; @{$new->{tr_cnt}} = @{$self->{tr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
53
|
0
|
|
|
|
|
|
$new->{co_cnt} = []; @{$new->{co_cnt}} = @{$self->{co_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
54
|
0
|
|
|
|
|
|
$new->{tr_cnt_} = []; @{$new->{tr_cnt_}} = @{$self->{tr_cnt_}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
$new->{co_cnt_} = []; @{$new->{co_cnt_}} = @{$self->{co_cnt_}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
56
|
0
|
|
|
|
|
|
$new->{btr_cnt} = []; @{$new->{btr_cnt}} = @{$self->{btr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
$new->{bco_cnt} = []; @{$new->{bco_cnt}} = @{$self->{bco_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
#$new->{winTr} = []; @{$new->{winTr}} = @{$self->{winTr}}; |
|
59
|
|
|
|
|
|
|
#$new->{winCo} = []; @{$new->{winCo}} = @{$self->{winCo}}; |
|
60
|
0
|
|
|
|
|
|
bless $new, 'diffReps::ChromaModSite'; |
|
61
|
0
|
|
|
|
|
|
return $new; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Initialize a region by a significant window. |
|
65
|
|
|
|
|
|
|
sub init{ |
|
66
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$rwin) = @_; |
|
67
|
0
|
|
|
|
|
|
$self->{chrom} = $rwin->{chrom}; |
|
68
|
0
|
|
|
|
|
|
$self->{start} = $rwin->{winN}*$gdt->{step} + 1; |
|
69
|
0
|
|
|
|
|
|
$self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize}; |
|
70
|
0
|
|
|
|
|
|
$self->{siteCenter} = ($self->{start} + $self->{end}) /2; |
|
71
|
0
|
|
|
|
|
|
$self->{retrieved} = 1; |
|
72
|
0
|
|
|
|
|
|
@{$self->{tr_cnt}} = @{$rwin->{tr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
@{$self->{co_cnt}} = @{$rwin->{co_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
74
|
0
|
|
|
|
|
|
@{$self->{tr_cnt_}} = @{$rwin->{tr_cnt_}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
75
|
0
|
|
|
|
|
|
@{$self->{co_cnt_}} = @{$rwin->{co_cnt_}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @{$rwin->{btr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @{$rwin->{bco_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
$self->{rsumTr} = $rwin->{rsumTr}; |
|
79
|
0
|
|
|
|
|
|
$self->{rsumCo} = $rwin->{rsumCo}; |
|
80
|
0
|
|
|
|
|
|
$self->{dirn} = $rwin->{dirn}; |
|
81
|
0
|
|
|
|
|
|
$self->{logFC} = $rwin->{logFC}; |
|
82
|
0
|
|
|
|
|
|
$self->{pval} = $rwin->{pval}; |
|
83
|
0
|
|
|
|
|
|
$self->{winSta} = $self->{start}; |
|
84
|
|
|
|
|
|
|
#@{$self->{winTr}} = @{$rwin->{tr_cnt}}; |
|
85
|
|
|
|
|
|
|
#@{$self->{winCo}} = @{$rwin->{co_cnt}}; |
|
86
|
0
|
|
|
|
|
|
$self->{winFC} = $rwin->{logFC}; |
|
87
|
0
|
|
|
|
|
|
$self->{winP} = $rwin->{pval}; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Expand the genomic coordinates. Replace best window if necessary. |
|
91
|
|
|
|
|
|
|
# Do not retrieve counts and sums to save some computation. |
|
92
|
|
|
|
|
|
|
sub expand{ |
|
93
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$rwin) = @_; |
|
94
|
0
|
|
|
|
|
|
$self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize}; # new region end. |
|
95
|
0
|
|
|
|
|
|
$self->{siteCenter} = ($self->{start} + $self->{end}) /2; |
|
96
|
|
|
|
|
|
|
# Invalidate previous diff results. |
|
97
|
0
|
|
|
|
|
|
$self->reset_content(1); # reset everything but keep direction. |
|
98
|
|
|
|
|
|
|
# $self->{tr_cnt} = []; |
|
99
|
|
|
|
|
|
|
# $self->{co_cnt} = []; |
|
100
|
|
|
|
|
|
|
# $self->{rsumTr} = undef; |
|
101
|
|
|
|
|
|
|
# $self->{rsumCo} = undef; |
|
102
|
|
|
|
|
|
|
# $self->{logFC} = undef; |
|
103
|
|
|
|
|
|
|
# Replace best window if necessary. |
|
104
|
0
|
0
|
|
|
|
|
if($rwin->{pval} < $self->{winP}){ |
|
105
|
0
|
|
|
|
|
|
$self->{winSta} = $rwin->{winN}*$gdt->{step} + 1; |
|
106
|
|
|
|
|
|
|
#@{$self->{winTr}} = @{$rwin->{tr_cnt}}; |
|
107
|
|
|
|
|
|
|
#@{$self->{winCo}} = @{$rwin->{co_cnt}}; |
|
108
|
0
|
|
|
|
|
|
$self->{winFC} = $rwin->{logFC}; |
|
109
|
0
|
|
|
|
|
|
$self->{winP} = $rwin->{pval}; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# Print a formatted record of the current region. Do nothing if the P-val is not defined. |
|
114
|
|
|
|
|
|
|
sub print_reg{ |
|
115
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$h) = @_; |
|
116
|
0
|
0
|
|
|
|
|
if(defined $self->{pval}){ |
|
117
|
|
|
|
|
|
|
# Concatenate treatment and control counts for output. |
|
118
|
0
|
|
|
|
|
|
my @tr_cnt_fmt = fprecision(2, @{$self->{tr_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my @co_cnt_fmt = fprecision(2, @{$self->{co_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
my $tr_cnt_str = join(';', @tr_cnt_fmt); |
|
121
|
0
|
|
|
|
|
|
my $co_cnt_str = join(';', @co_cnt_fmt); |
|
122
|
0
|
|
|
|
|
|
my($tr_bkg_enr, $co_bkg_enr); |
|
123
|
0
|
0
|
|
|
|
|
if($gdt->{bkgEnr}){ |
|
124
|
0
|
|
|
|
|
|
my $btr_m = mean(@{$self->{btr_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $bco_m = mean(@{$self->{bco_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
126
|
0
|
0
|
|
|
|
|
$tr_bkg_enr = $btr_m > 0? fprecision(2, mean(@{$self->{tr_cnt_}}) / $btr_m) : INFINITE; |
|
|
0
|
|
|
|
|
|
|
|
127
|
0
|
0
|
|
|
|
|
$co_bkg_enr = $bco_m > 0? fprecision(2, mean(@{$self->{co_cnt_}}) / $bco_m) : INFINITE; |
|
|
0
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
}else{ |
|
129
|
0
|
|
|
|
|
|
$tr_bkg_enr = 'NA'; |
|
130
|
0
|
|
|
|
|
|
$co_bkg_enr = 'NA'; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
# Output: chrom,start,end,length,(treatment read count),(control read count),direction,logFC,pval,padj, |
|
133
|
|
|
|
|
|
|
# (window start),(window end),(window logFC),(window pval),(window padj). |
|
134
|
0
|
|
|
|
|
|
print $h join("\t", ($self->{chrom}, |
|
135
|
|
|
|
|
|
|
$self->{start}, $self->{end}, $self->{end}-$self->{start}+1, # region coordinates. |
|
136
|
|
|
|
|
|
|
$tr_cnt_str, $co_cnt_str, # formatted read counts. |
|
137
|
0
|
|
|
|
|
|
fprecision(2, &mean(@{$self->{tr_cnt}})), fprecision(2, &mean(@{$self->{co_cnt}})), # avg read count. |
|
|
0
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
$tr_bkg_enr, $co_bkg_enr, # enrichment vs. background. |
|
139
|
|
|
|
|
|
|
$self->{dirn}, fprecision(2,$self->{logFC}), # change direction and logFC. |
|
140
|
|
|
|
|
|
|
$self->{pval}, $self->{padj}, # diff analysis info. |
|
141
|
|
|
|
|
|
|
$self->{winSta}, $self->{winSta}+$gdt->{winSize}-1, # best window coordinates. |
|
142
|
|
|
|
|
|
|
fprecision(2,$self->{winFC}), |
|
143
|
|
|
|
|
|
|
$self->{winP}, $self->{winQ})), "\n"; # best window diff info. |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
} |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Do we need to perform stat test for the region? |
|
148
|
|
|
|
|
|
|
# If the region contains only one window, this is already done. |
|
149
|
|
|
|
|
|
|
sub needStat{ |
|
150
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
151
|
0
|
|
0
|
|
|
|
return (defined($self->{winP}) && !defined($self->{pval})); |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# Retrieve normalized read counts for the region. Override parent function. |
|
155
|
|
|
|
|
|
|
sub retrieve_norm_cnt{ |
|
156
|
0
|
|
|
0
|
0
|
|
my($self,$gdt) = @_; |
|
157
|
0
|
|
|
|
|
|
my @atr_cnt; # array treatment counts. |
|
158
|
0
|
|
|
|
|
|
my $i = 0; # iterator for normalization constants. |
|
159
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
|
0
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
push @atr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) |
|
161
|
|
|
|
|
|
|
/ $gdt->{normTr}[$i++]); |
|
162
|
|
|
|
|
|
|
} |
|
163
|
0
|
|
|
|
|
|
my @aco_cnt; # array control counts. |
|
164
|
0
|
|
|
|
|
|
$i = 0; |
|
165
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
|
0
|
|
|
|
|
|
|
|
166
|
0
|
|
|
|
|
|
push @aco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) |
|
167
|
|
|
|
|
|
|
/ $gdt->{normCo}[$i++]); |
|
168
|
|
|
|
|
|
|
} |
|
169
|
0
|
|
|
|
|
|
@{$self->{tr_cnt}} = @atr_cnt; |
|
|
0
|
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
@{$self->{co_cnt}} = @aco_cnt; |
|
|
0
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Retrieve norm count for background if needed. |
|
172
|
0
|
0
|
|
|
|
|
if($gdt->{bkgEnr}){ |
|
173
|
0
|
|
|
|
|
|
my @atr_cnt_; # array treatment counts. |
|
174
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
175
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
|
0
|
|
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
push @atr_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normTr_}->[$i++]); |
|
177
|
|
|
|
|
|
|
} |
|
178
|
0
|
|
|
|
|
|
@{$self->{tr_cnt_}} = @atr_cnt_; |
|
|
0
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
|
my @aco_cnt_; # array treatment counts. |
|
180
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
181
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
|
0
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
push @aco_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normCo_}->[$i++]); |
|
183
|
|
|
|
|
|
|
} |
|
184
|
0
|
|
|
|
|
|
@{$self->{co_cnt_}} = @aco_cnt_; |
|
|
0
|
|
|
|
|
|
|
|
185
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBtr}}){ |
|
|
0
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
my @abtr_cnt; # array background treatment counts. |
|
187
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
188
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBtr}}) { |
|
|
0
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
push @abtr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBtr}->[$i++]); |
|
190
|
|
|
|
|
|
|
} |
|
191
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @abtr_cnt; |
|
|
0
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
} |
|
193
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBco}}){ |
|
|
0
|
|
|
|
|
|
|
|
194
|
0
|
|
|
|
|
|
my @abco_cnt; # array background treatment counts. |
|
195
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
196
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBco}}) { |
|
|
0
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
|
push @abco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBco}->[$i++]); |
|
198
|
|
|
|
|
|
|
} |
|
199
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @abco_cnt; |
|
|
0
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
} |
|
201
|
0
|
0
|
|
|
|
|
if(@{$self->{btr_cnt}} == 0){ |
|
|
0
|
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @{$self->{bco_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
} |
|
204
|
0
|
0
|
|
|
|
|
if(@{$self->{bco_cnt}} == 0){ |
|
|
0
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @{$self->{btr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# Retrieve sum of raw read counts for the region. Override parent function. |
|
211
|
|
|
|
|
|
|
sub retrieve_raw_sum{ |
|
212
|
0
|
|
|
0
|
0
|
|
my($self,$gdt) = @_; |
|
213
|
0
|
|
|
|
|
|
$self->{rsumTr} = 0; |
|
214
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
|
0
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
$self->{rsumTr} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end}); |
|
216
|
|
|
|
|
|
|
} |
|
217
|
0
|
|
|
|
|
|
$self->{rsumCo} = 0; |
|
218
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
|
0
|
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
|
$self->{rsumCo} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end}); |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# Determine whether a window is within gap. |
|
224
|
|
|
|
|
|
|
sub isWithinGap{ |
|
225
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$rwin) = @_; |
|
226
|
0
|
0
|
|
|
|
|
return 0 if !defined $self->{chrom}; |
|
227
|
0
|
|
|
|
|
|
my $winSta = $rwin->{winN}*$gdt->{step}+1; |
|
228
|
0
|
|
0
|
|
|
|
return ($winSta - $self->{end}-1 <= $gdt->{gap} and $rwin->{dirn} eq $self->{dirn}); |
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Class for a sliding window of fixed size. |
|
233
|
|
|
|
|
|
|
package diffReps::SlideWindow; |
|
234
|
|
|
|
|
|
|
|
|
235
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
37
|
|
|
236
|
1
|
|
|
1
|
|
1021
|
use Statistics::TTest; |
|
|
1
|
|
|
|
|
41125
|
|
|
|
1
|
|
|
|
|
33
|
|
|
237
|
1
|
|
|
1
|
|
741
|
use MyBioinfo::NBTest; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
60
|
|
|
238
|
1
|
|
|
1
|
|
7
|
use MyBioinfo::Common; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
104
|
|
|
239
|
1
|
|
|
1
|
|
1058
|
use MyBioinfo::Math qw( gtest_gof chisqtest_gof ); |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
82
|
|
|
240
|
1
|
|
|
1
|
|
8
|
use MyShortRead::SRBed; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
2758
|
|
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# Constructor. |
|
243
|
|
|
|
|
|
|
sub new{ |
|
244
|
0
|
|
|
0
|
|
|
my $class = shift; |
|
245
|
0
|
|
|
|
|
|
my $self = { |
|
246
|
|
|
|
|
|
|
chrom => undef, |
|
247
|
|
|
|
|
|
|
winN => undef, # window number to retrieve count from SRBed class. |
|
248
|
|
|
|
|
|
|
maxN => undef, # maximum window number. |
|
249
|
|
|
|
|
|
|
retrieved => 0, # boolean tag: whether count data have been retrieved? |
|
250
|
|
|
|
|
|
|
tr_cnt => [], # normalized treatment counts. |
|
251
|
|
|
|
|
|
|
co_cnt => [], # normalized control counts. |
|
252
|
|
|
|
|
|
|
tr_cnt_ => [], # normalized treatment counts using #reads. |
|
253
|
|
|
|
|
|
|
co_cnt_ => [], # normalized control counts using #reads. |
|
254
|
|
|
|
|
|
|
btr_cnt => [], # normalized background treatment counts. |
|
255
|
|
|
|
|
|
|
bco_cnt => [], # normalized background control counts. |
|
256
|
|
|
|
|
|
|
rsumTr => undef, # sum of raw treatment counts. |
|
257
|
|
|
|
|
|
|
rsumCo => undef, # sum of raw control counts. |
|
258
|
|
|
|
|
|
|
dirn => undef, # change direction: 'Up' or 'Down'. |
|
259
|
|
|
|
|
|
|
logFC => undef, # log2 fold change. |
|
260
|
|
|
|
|
|
|
pval => undef # P-value. |
|
261
|
|
|
|
|
|
|
}; |
|
262
|
0
|
|
|
|
|
|
bless $self, $class; |
|
263
|
0
|
|
|
|
|
|
return $self; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# Set the current chromosome name, max window number and reset everything. |
|
267
|
|
|
|
|
|
|
# Obtain counts and sums for the first window. |
|
268
|
|
|
|
|
|
|
sub set_chrom{ |
|
269
|
0
|
|
|
0
|
|
|
my($self,$gdt,$chrom,$maxN) = @_; |
|
270
|
0
|
|
|
|
|
|
$self->{chrom} = $chrom; |
|
271
|
0
|
|
|
|
|
|
$self->{maxN} = $maxN; |
|
272
|
0
|
|
|
|
|
|
$self->{winN} = 0; |
|
273
|
0
|
|
|
|
|
|
$self->reset_content; |
|
274
|
|
|
|
|
|
|
#$self->{retrieved} = 0; |
|
275
|
|
|
|
|
|
|
# $self->retrieve_norm_cnt($gdt); |
|
276
|
|
|
|
|
|
|
# $self->retrieve_raw_sum($gdt); |
|
277
|
|
|
|
|
|
|
#$self->{trEnr} = undef; |
|
278
|
|
|
|
|
|
|
#$self->{coEnr} = undef; |
|
279
|
|
|
|
|
|
|
# $self->{dirn} = undef; # change direction: 'Up' or 'Down'. |
|
280
|
|
|
|
|
|
|
# $self->{logFC} = undef; # log2 fold change. |
|
281
|
|
|
|
|
|
|
# $self->{pval} = undef; # P-value. |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# Reset window number. |
|
285
|
|
|
|
|
|
|
sub reset_winN{ |
|
286
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
287
|
0
|
|
|
|
|
|
$self->{winN} = 0; |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# Reset window/region content. |
|
291
|
|
|
|
|
|
|
sub reset_content{ |
|
292
|
0
|
|
|
0
|
|
|
my($self,$keepdi) = @_; |
|
293
|
0
|
|
|
|
|
|
$self->{retrieved} = 0; # boolean tag: whether count data have been retrieved? |
|
294
|
0
|
|
|
|
|
|
$self->{tr_cnt} = []; # normalized treatment counts. |
|
295
|
0
|
|
|
|
|
|
$self->{co_cnt} = []; # normalized control counts. |
|
296
|
0
|
|
|
|
|
|
$self->{tr_cnt_} = []; # normalized treatment counts using #reads. |
|
297
|
0
|
|
|
|
|
|
$self->{co_cnt_} = []; # normalized control counts using #reads. |
|
298
|
0
|
|
|
|
|
|
$self->{btr_cnt} = []; # normalized background treatment counts. |
|
299
|
0
|
|
|
|
|
|
$self->{bco_cnt} = []; # normalized background control counts. |
|
300
|
0
|
|
|
|
|
|
$self->{rsumTr} = undef; # sum of raw treatment counts. |
|
301
|
0
|
|
|
|
|
|
$self->{rsumCo} = undef; # sum of raw control counts. |
|
302
|
0
|
0
|
|
|
|
|
$self->{dirn} = undef unless $keepdi; # change direction: 'Up' or 'Down'. |
|
303
|
0
|
|
|
|
|
|
$self->{logFC} = undef; # log2 fold change. |
|
304
|
0
|
|
|
|
|
|
$self->{pval} = undef; # P-value. |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# Retrieve norm/raw count given current window/region location. |
|
308
|
|
|
|
|
|
|
sub retrieve_new_cnt{ |
|
309
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
|
310
|
0
|
|
|
|
|
|
$self->{retrieved} = 1; |
|
311
|
0
|
|
|
|
|
|
$self->retrieve_norm_cnt($gdt); |
|
312
|
0
|
|
|
|
|
|
$self->retrieve_raw_sum($gdt); |
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Shift the window one position to the right and obtain new counts and sums. |
|
316
|
|
|
|
|
|
|
sub move_forward{ |
|
317
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
|
318
|
0
|
0
|
|
|
|
|
if($self->{winN} <= $self->{maxN}-1) { |
|
|
0
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
|
$self->{winN}++; |
|
320
|
0
|
|
|
|
|
|
$self->reset_content; |
|
321
|
|
|
|
|
|
|
# $self->retrieve_norm_cnt($gdt); |
|
322
|
|
|
|
|
|
|
# $self->retrieve_raw_sum($gdt); |
|
323
|
|
|
|
|
|
|
#$self->{trEnr} = undef; |
|
324
|
|
|
|
|
|
|
#$self->{coEnr} = undef; |
|
325
|
|
|
|
|
|
|
#$self->{dirn} = undef; # change direction: 'Up' or 'Down'. |
|
326
|
|
|
|
|
|
|
#$self->{logFC} = undef; # log2 fold change. |
|
327
|
|
|
|
|
|
|
#$self->{pval} = undef; # P-value. |
|
328
|
0
|
|
|
|
|
|
return $self->{winN}; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
# elsif($self->{winN} == $self->{maxN}-1){ |
|
331
|
|
|
|
|
|
|
# $self->{winN} = $self->{maxN}; |
|
332
|
|
|
|
|
|
|
# $self->{tr_cnt} = []; |
|
333
|
|
|
|
|
|
|
# $self->{co_cnt} = []; |
|
334
|
|
|
|
|
|
|
# $self->{tr_cnt_} = []; |
|
335
|
|
|
|
|
|
|
# $self->{co_cnt_} = []; |
|
336
|
|
|
|
|
|
|
# $self->{btr_cnt} = []; |
|
337
|
|
|
|
|
|
|
# $self->{bco_cnt} = []; |
|
338
|
|
|
|
|
|
|
# #$self->{trEnr} = undef; |
|
339
|
|
|
|
|
|
|
# #$self->{coEnr} = undef; |
|
340
|
|
|
|
|
|
|
# $self->{dirn} = undef; # change direction: 'Up' or 'Down'. |
|
341
|
|
|
|
|
|
|
# $self->{logFC} = undef; # log2 fold change. |
|
342
|
|
|
|
|
|
|
# $self->{pval} = undef; # P-value. |
|
343
|
|
|
|
|
|
|
# return $self->{winN}; |
|
344
|
|
|
|
|
|
|
# } |
|
345
|
|
|
|
|
|
|
else {return $self->{maxN};} |
|
346
|
|
|
|
|
|
|
# Max valid window index should be maxN-1. |
|
347
|
|
|
|
|
|
|
} |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
# Return current window number. |
|
350
|
|
|
|
|
|
|
sub get_winN{ |
|
351
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
352
|
0
|
0
|
|
|
|
|
if($self->{winN} < $self->{maxN}) {return $self->{winN};} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
else {return -1;} |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# Return chromosome name. |
|
357
|
|
|
|
|
|
|
sub get_chrom{ |
|
358
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
359
|
0
|
|
|
|
|
|
return $self->{chrom}; |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# Get normalized read counts for external procedures. |
|
363
|
|
|
|
|
|
|
sub get_cnt_tr{ |
|
364
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
365
|
0
|
|
|
|
|
|
return @{$self->{tr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
sub get_cnt_co{ |
|
368
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
369
|
0
|
|
|
|
|
|
return @{$self->{co_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
} |
|
371
|
|
|
|
|
|
|
sub get_cnt_tr_{ |
|
372
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
373
|
0
|
|
|
|
|
|
return @{$self->{tr_cnt_}}; |
|
|
0
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
sub get_cnt_co_{ |
|
376
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
377
|
0
|
|
|
|
|
|
return @{$self->{co_cnt_}}; |
|
|
0
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
sub get_cnt_btr{ |
|
380
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
381
|
0
|
|
|
|
|
|
return @{$self->{btr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
sub get_cnt_bco{ |
|
384
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
385
|
0
|
|
|
|
|
|
return @{$self->{bco_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
} |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# Get differential analysis info. |
|
389
|
|
|
|
|
|
|
sub get_diff_info{ |
|
390
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
391
|
|
|
|
|
|
|
return { |
|
392
|
0
|
|
|
|
|
|
dirn => $self->{dirn}, |
|
393
|
|
|
|
|
|
|
logFC => $self->{logFC}, |
|
394
|
|
|
|
|
|
|
pval => $self->{pval} |
|
395
|
|
|
|
|
|
|
}; # anonymous hash. |
|
396
|
|
|
|
|
|
|
} |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# Retrieve normalized read counts by current window number from SRBed objects. |
|
399
|
|
|
|
|
|
|
sub retrieve_norm_cnt{ |
|
400
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
|
401
|
0
|
|
|
|
|
|
my @atr_cnt; # array treatment counts. |
|
402
|
0
|
|
|
|
|
|
my $i = 0; # iterator for normalization constants. |
|
403
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
|
0
|
|
|
|
|
|
|
|
404
|
0
|
|
|
|
|
|
push @atr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr}->[$i++]); |
|
405
|
|
|
|
|
|
|
} |
|
406
|
0
|
|
|
|
|
|
@{$self->{tr_cnt}} = @atr_cnt; |
|
|
0
|
|
|
|
|
|
|
|
407
|
0
|
|
|
|
|
|
my @aco_cnt; # array control counts. |
|
408
|
0
|
|
|
|
|
|
$i = 0; |
|
409
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
|
0
|
|
|
|
|
|
|
|
410
|
0
|
|
|
|
|
|
push @aco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo}->[$i++]); |
|
411
|
|
|
|
|
|
|
} |
|
412
|
0
|
|
|
|
|
|
@{$self->{co_cnt}} = @aco_cnt; |
|
|
0
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# Retrieve norm count for background if needed. |
|
414
|
0
|
0
|
|
|
|
|
if($gdt->{bkgEnr}){ |
|
415
|
0
|
|
|
|
|
|
my @atr_cnt_; # array treatment counts. |
|
416
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
417
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
|
0
|
|
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
push @atr_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr_}->[$i++]); |
|
419
|
|
|
|
|
|
|
} |
|
420
|
0
|
|
|
|
|
|
@{$self->{tr_cnt_}} = @atr_cnt_; |
|
|
0
|
|
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
my @aco_cnt_; # array treatment counts. |
|
422
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
423
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
|
0
|
|
|
|
|
|
|
|
424
|
0
|
|
|
|
|
|
push @aco_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo_}->[$i++]); |
|
425
|
|
|
|
|
|
|
} |
|
426
|
0
|
|
|
|
|
|
@{$self->{co_cnt_}} = @aco_cnt_; |
|
|
0
|
|
|
|
|
|
|
|
427
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBtr}}){ |
|
|
0
|
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
|
my @abtr_cnt; # array background treatment counts. |
|
429
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
430
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBtr}}) { |
|
|
0
|
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
|
push @abtr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBtr}->[$i++]); |
|
432
|
|
|
|
|
|
|
} |
|
433
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @abtr_cnt; |
|
|
0
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
} |
|
435
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBco}}){ |
|
|
0
|
|
|
|
|
|
|
|
436
|
0
|
|
|
|
|
|
my @abco_cnt; # array background treatment counts. |
|
437
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
|
438
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBco}}) { |
|
|
0
|
|
|
|
|
|
|
|
439
|
0
|
|
|
|
|
|
push @abco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBco}->[$i++]); |
|
440
|
|
|
|
|
|
|
} |
|
441
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @abco_cnt; |
|
|
0
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
} |
|
443
|
0
|
0
|
|
|
|
|
if(@{$self->{btr_cnt}} == 0){ |
|
|
0
|
|
|
|
|
|
|
|
444
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @{$self->{bco_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
} |
|
446
|
0
|
0
|
|
|
|
|
if(@{$self->{bco_cnt}} == 0){ |
|
|
0
|
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @{$self->{btr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
} |
|
450
|
|
|
|
|
|
|
} |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# Retrieve sum of raw read counts by current window number from SRBed objects. |
|
453
|
|
|
|
|
|
|
sub retrieve_raw_sum{ |
|
454
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
|
455
|
0
|
|
|
|
|
|
$self->{rsumTr} = 0; |
|
456
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
|
0
|
|
|
|
|
|
|
|
457
|
0
|
|
|
|
|
|
$self->{rsumTr} += $b->get_win_count($self->{chrom}, $self->{winN}); |
|
458
|
|
|
|
|
|
|
} |
|
459
|
0
|
|
|
|
|
|
$self->{rsumCo} = 0; |
|
460
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
|
0
|
|
|
|
|
|
|
|
461
|
0
|
|
|
|
|
|
$self->{rsumCo} += $b->get_win_count($self->{chrom}, $self->{winN}); |
|
462
|
|
|
|
|
|
|
} |
|
463
|
|
|
|
|
|
|
} |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
# Determine whether current bin counts are above cutoff or not. |
|
466
|
|
|
|
|
|
|
sub isAboveCutoff{ |
|
467
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
|
468
|
0
|
|
|
|
|
|
die "Empty SRBed vectors encountered in function: isAboveCutoff.\n" |
|
469
|
0
|
0
|
0
|
|
|
|
if @{$gdt->{bedTr}} == 0 or @{$gdt->{bedCo}} == 0; |
|
|
0
|
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
|
my $tagTr = 1; |
|
471
|
0
|
|
|
|
|
|
my $tagCo = 1; |
|
472
|
0
|
0
|
0
|
|
|
|
if($gdt->{bkgEnr} and $gdt->{useBkg} > 0){ # use background as filter. |
|
473
|
0
|
0
|
|
|
|
|
if(!$self->{retrieved}){ # retrieve new count if not done so. |
|
474
|
0
|
|
|
|
|
|
$self->retrieve_new_cnt($gdt); |
|
475
|
|
|
|
|
|
|
} |
|
476
|
0
|
|
|
|
|
|
my $btr_m = mean(@{$self->{btr_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
|
my $bco_m = mean(@{$self->{bco_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
478
|
0
|
|
|
|
|
|
foreach my $c(@{$self->{tr_cnt_}}){ |
|
|
0
|
|
|
|
|
|
|
|
479
|
0
|
0
|
0
|
|
|
|
if(!$c or ($c and $btr_m and $c / $btr_m <= $gdt->{useBkg})){ |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
$tagTr = 0; |
|
481
|
0
|
|
|
|
|
|
last; |
|
482
|
|
|
|
|
|
|
} |
|
483
|
|
|
|
|
|
|
} |
|
484
|
0
|
0
|
|
|
|
|
if(!$tagTr){ |
|
485
|
0
|
|
|
|
|
|
foreach my $c(@{$self->{co_cnt_}}){ |
|
|
0
|
|
|
|
|
|
|
|
486
|
0
|
0
|
0
|
|
|
|
if(!$c or ($c and $bco_m and $c / $bco_m <= $gdt->{useBkg})){ |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
487
|
0
|
|
|
|
|
|
$tagCo = 0; |
|
488
|
0
|
|
|
|
|
|
last; |
|
489
|
|
|
|
|
|
|
} |
|
490
|
|
|
|
|
|
|
} |
|
491
|
|
|
|
|
|
|
} |
|
492
|
|
|
|
|
|
|
}else{ # use raw count and equation as filter. |
|
493
|
0
|
|
|
|
|
|
my $i = 0; |
|
494
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}){ |
|
|
0
|
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
|
my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN}); |
|
496
|
0
|
0
|
0
|
|
|
|
if(!$rawcnt or $rawcnt <= $gdt->{meanTr}[$i] + $gdt->{nsd}*$gdt->{stdTr}[$i]){ |
|
497
|
0
|
|
|
|
|
|
$tagTr = 0; |
|
498
|
0
|
|
|
|
|
|
last; |
|
499
|
|
|
|
|
|
|
} |
|
500
|
0
|
|
|
|
|
|
$i++; |
|
501
|
|
|
|
|
|
|
} |
|
502
|
0
|
0
|
|
|
|
|
if(!$tagTr){ |
|
503
|
0
|
|
|
|
|
|
$i = 0; |
|
504
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}){ |
|
|
0
|
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
|
my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN}); |
|
506
|
0
|
0
|
0
|
|
|
|
if(!$rawcnt or $rawcnt <= $gdt->{meanCo}[$i] + $gdt->{nsd}*$gdt->{stdCo}[$i]){ |
|
507
|
0
|
|
|
|
|
|
$tagCo = 0; |
|
508
|
0
|
|
|
|
|
|
last; |
|
509
|
|
|
|
|
|
|
} |
|
510
|
0
|
|
|
|
|
|
$i++; |
|
511
|
|
|
|
|
|
|
} |
|
512
|
|
|
|
|
|
|
} |
|
513
|
|
|
|
|
|
|
} |
|
514
|
0
|
|
0
|
|
|
|
return ($tagTr || $tagCo); |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# Calculate statistical scores based on current window counts. |
|
518
|
|
|
|
|
|
|
sub calc_stat{ |
|
519
|
0
|
|
|
0
|
|
|
my($self,$gdt,$meth,$eps) = @_; |
|
520
|
|
|
|
|
|
|
#return $self->{pval} if defined $self->{pval}; # no duplicated calculation please. |
|
521
|
0
|
0
|
|
|
|
|
return 1 if !defined($self->{chrom}); # do nothing if the region is not defined. |
|
522
|
0
|
0
|
|
|
|
|
if(!$self->{retrieved}){ # retrieve new count if not done so. |
|
523
|
0
|
|
|
|
|
|
$self->retrieve_new_cnt($gdt); |
|
524
|
|
|
|
|
|
|
} |
|
525
|
|
|
|
|
|
|
# Check whether counts and sums are ready for stat test. |
|
526
|
0
|
|
|
|
|
|
die "Empty count vectors encountered when doing stat test.\n" |
|
527
|
0
|
0
|
0
|
|
|
|
if @{$self->{tr_cnt}} == 0 or @{$self->{co_cnt}} == 0; |
|
|
0
|
|
|
|
|
|
|
|
528
|
0
|
0
|
0
|
|
|
|
die "Undefined raw sums for treatment or control when doing stat test.\n" |
|
529
|
|
|
|
|
|
|
if !defined($self->{rsumTr}) or !defined($self->{rsumCo}); |
|
530
|
|
|
|
|
|
|
# Noramlized mean count for each group. |
|
531
|
0
|
|
|
|
|
|
my $tr_m = mean(@{$self->{tr_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
532
|
0
|
|
|
|
|
|
my $co_m = mean(@{$self->{co_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# Special case: zero count for both groups. |
|
534
|
0
|
0
|
0
|
|
|
|
if($tr_m == 0 and $co_m == 0) { |
|
535
|
0
|
|
|
|
|
|
$self->{logFC} = 0; |
|
536
|
0
|
|
|
|
|
|
$self->{dirn} = 'Nons'; |
|
537
|
0
|
|
|
|
|
|
$self->{pval} = 1; |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
else { |
|
540
|
0
|
0
|
|
|
|
|
if($co_m == 0) {$self->{logFC} = INFINITE;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
else {$self->{logFC} = log2($tr_m/$co_m);} |
|
542
|
0
|
0
|
|
|
|
|
$self->{dirn} = $self->{logFC} > 0? 'Up' : 'Down'; |
|
543
|
|
|
|
|
|
|
# Perform statistical tests to find P-value. |
|
544
|
0
|
0
|
0
|
|
|
|
if($meth eq 'tt'){ # T-test. |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
545
|
0
|
|
|
|
|
|
my $ttest = new Statistics::TTest; |
|
546
|
0
|
|
|
|
|
|
$ttest->load_data(\@{$self->{tr_cnt}}, \@{$self->{co_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
547
|
0
|
|
|
|
|
|
$self->{pval} = $ttest->t_prob; |
|
548
|
|
|
|
|
|
|
} |
|
549
|
|
|
|
|
|
|
elsif($meth eq 'nb'){ # Negative Binomial test. |
|
550
|
0
|
|
|
|
|
|
my $tr_var = var(@{$self->{tr_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
551
|
0
|
|
|
|
|
|
my $co_var = var(@{$self->{co_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
|
|
553
|
0
|
|
|
|
|
|
my $tr_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normTr}); |
|
554
|
0
|
|
|
|
|
|
my $co_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normCo}); |
|
555
|
0
|
|
|
|
|
|
my $tr_raw_v = raw_sum_var($tr_var,$tr_m,$co_m,$gdt->{normTr},$tr_raw_m,$eps); |
|
556
|
0
|
|
|
|
|
|
my $co_raw_v = raw_sum_var($co_var,$tr_m,$co_m,$gdt->{normCo},$co_raw_m,$eps); |
|
557
|
|
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
|
$self->{pval} = nb_pval($self->{rsumTr},$self->{rsumCo},$tr_raw_m,$tr_raw_v,$co_raw_m,$co_raw_v,$eps); |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
elsif($meth eq 'gt' or $meth eq 'cs'){ |
|
561
|
0
|
|
|
|
|
|
my $tr_sum = sum(@{$self->{tr_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
562
|
0
|
|
|
|
|
|
my $co_sum = sum(@{$self->{co_cnt}}); |
|
|
0
|
|
|
|
|
|
|
|
563
|
0
|
|
|
|
|
|
my $a_cnt = [$tr_sum, $co_sum]; # ref to array of normalized counts. |
|
564
|
0
|
|
|
|
|
|
my $tr_rep = scalar @{$self->{tr_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
565
|
0
|
|
|
|
|
|
my $co_rep = scalar @{$self->{co_cnt}}; |
|
|
0
|
|
|
|
|
|
|
|
566
|
0
|
|
|
|
|
|
my $tot_rep = $tr_rep + $co_rep; |
|
567
|
0
|
|
|
|
|
|
my $a_p = [$tr_rep/$tot_rep, $co_rep/$tot_rep]; # ref to array of probabilities. |
|
568
|
0
|
|
|
|
|
|
my @test_res; # result vector: statistic, DOF, p-value. |
|
569
|
0
|
0
|
|
|
|
|
if($meth eq 'gt'){ |
|
570
|
0
|
|
|
|
|
|
@test_res = gtest_gof($a_cnt, $a_p); |
|
571
|
|
|
|
|
|
|
}else{ |
|
572
|
0
|
|
|
|
|
|
@test_res = chisqtest_gof($a_cnt, $a_p); |
|
573
|
|
|
|
|
|
|
} |
|
574
|
0
|
|
|
|
|
|
$self->{pval} = $test_res[2]; |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
else{ |
|
577
|
0
|
|
|
|
|
|
die "Undefined statistical test! Halt execution.\n"; |
|
578
|
|
|
|
|
|
|
} |
|
579
|
|
|
|
|
|
|
} |
|
580
|
0
|
|
|
|
|
|
return $self->{pval}; |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
# Subroutines for NB test. Copied from Common.pm. Written by Ying Jin. |
|
585
|
|
|
|
|
|
|
# NOTES: In the original diffPermute program, norm constants were multiplied to |
|
586
|
|
|
|
|
|
|
# raw counts to get normalized values. This only saves ignorable execution time |
|
587
|
|
|
|
|
|
|
# but causes confusion. They are now reversed, so do the following subroutines. |
|
588
|
|
|
|
|
|
|
sub raw_sum_mean{ |
|
589
|
0
|
|
|
0
|
|
|
my ($m1,$m2,$norm_ref)=@_; |
|
590
|
0
|
|
|
|
|
|
my $v = 0; |
|
591
|
0
|
|
|
|
|
|
for my $i(@{$norm_ref}){ |
|
|
0
|
|
|
|
|
|
|
|
592
|
0
|
|
|
|
|
|
$v += $i; |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
|
|
595
|
0
|
|
|
|
|
|
return $v*($m1+$m2)/2; |
|
596
|
|
|
|
|
|
|
} |
|
597
|
|
|
|
|
|
|
sub raw_sum_var{ |
|
598
|
0
|
|
|
0
|
|
|
my ($base_var,$m1,$m2,$norm,$raw_mean,$eps)=@_; |
|
599
|
0
|
|
|
|
|
|
my $base_mean = ($m1+$m2)/2; |
|
600
|
0
|
|
|
|
|
|
my $z =0; |
|
601
|
0
|
|
|
|
|
|
my $s = 0; |
|
602
|
0
|
|
|
|
|
|
for my $i(@{$norm}){ |
|
|
0
|
|
|
|
|
|
|
|
603
|
0
|
|
|
|
|
|
$z += 1/$i; |
|
604
|
0
|
|
|
|
|
|
$s += $i**2; |
|
605
|
|
|
|
|
|
|
} |
|
606
|
0
|
|
|
|
|
|
$z = $z/@{$norm}; |
|
|
0
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
|
|
608
|
0
|
|
|
|
|
|
my $var = $base_var - $z * $base_mean; |
|
609
|
0
|
0
|
|
|
|
|
if($var < $eps*$base_mean){ |
|
610
|
0
|
|
|
|
|
|
$var = $eps*$base_mean; |
|
611
|
|
|
|
|
|
|
} |
|
612
|
|
|
|
|
|
|
|
|
613
|
0
|
|
|
|
|
|
return $var*$s + $raw_mean; |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
} |
|
616
|
|
|
|
|
|
|
################################### |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# Class for managing a list of significant regions: add, adjust P-values, output. |
|
621
|
|
|
|
|
|
|
package diffReps::RegionList; |
|
622
|
1
|
|
|
1
|
|
10
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
41
|
|
|
623
|
1
|
|
|
1
|
|
6
|
use MyBioinfo::Common; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
157
|
|
|
624
|
1
|
|
|
1
|
|
7
|
use diffReps::DiffRes qw( cmpsite ); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
979
|
|
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
# Constructor. |
|
627
|
|
|
|
|
|
|
sub new{ |
|
628
|
0
|
|
|
0
|
|
|
my $class = shift; |
|
629
|
0
|
|
|
|
|
|
my $self = { |
|
630
|
|
|
|
|
|
|
regList => [], # region list. |
|
631
|
|
|
|
|
|
|
adjusted => 0 # bool tag for P-value adjustment. |
|
632
|
|
|
|
|
|
|
}; |
|
633
|
0
|
|
|
|
|
|
bless $self, $class; |
|
634
|
0
|
|
|
|
|
|
return $self; |
|
635
|
|
|
|
|
|
|
} |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
# Add a significant region into list if the P-value is defined. |
|
638
|
|
|
|
|
|
|
sub add{ |
|
639
|
0
|
|
|
0
|
|
|
my($self,$rreg) = @_; |
|
640
|
0
|
0
|
|
|
|
|
if(defined $rreg->{pval}){ |
|
641
|
0
|
|
|
|
|
|
push @{$self->{regList}}, $rreg->dcopy(); |
|
|
0
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
} |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
# Add a significant region into list if the P-value is defined. |
|
646
|
|
|
|
|
|
|
# Delete the original region. |
|
647
|
|
|
|
|
|
|
sub add_del{ |
|
648
|
0
|
|
|
0
|
|
|
my($self,$rreg) = @_; |
|
649
|
0
|
0
|
|
|
|
|
if(defined $rreg->{pval}){ |
|
650
|
0
|
|
|
|
|
|
push @{$self->{regList}}, $rreg->dcopy(); |
|
|
0
|
|
|
|
|
|
|
|
651
|
0
|
|
|
|
|
|
%{$rreg} = %{new diffReps::ChromaModSite}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
} |
|
653
|
|
|
|
|
|
|
} |
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# Append another region list to the end. |
|
656
|
|
|
|
|
|
|
sub append_list{ |
|
657
|
0
|
|
|
0
|
|
|
my($self,$rl) = @_; |
|
658
|
0
|
0
|
|
|
|
|
if(@{$rl->{'regList'}}){ |
|
|
0
|
|
|
|
|
|
|
|
659
|
0
|
|
|
|
|
|
push @{$self->{'regList'}}, @{$rl->{'regList'}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
660
|
0
|
|
|
|
|
|
$self->{'adjusted'} = 0; |
|
661
|
|
|
|
|
|
|
} |
|
662
|
|
|
|
|
|
|
} |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
# Sort all differential sites according to genomic coordinates. |
|
665
|
|
|
|
|
|
|
sub sort{ |
|
666
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
667
|
0
|
|
|
|
|
|
@{$self->{regList}} = sort cmpsite @{$self->{regList}}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
} |
|
669
|
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
# Adjust P-values for all regions in the list. |
|
671
|
|
|
|
|
|
|
sub adjPval{ |
|
672
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
673
|
0
|
0
|
|
|
|
|
return if $self->{adjusted}; |
|
674
|
0
|
|
|
|
|
|
my $N; # the 'N' in BH formula. |
|
675
|
0
|
0
|
|
|
|
|
if(@_ > 0) {$N = shift;} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
676
|
0
|
|
|
|
|
|
else {$N = @{$self->{regList}};} |
|
677
|
|
|
|
|
|
|
# Extract all P-values from the list and feed them to an adjustment procedure. |
|
678
|
0
|
|
|
|
|
|
my(@p1, @p2); # pair of region and window P-values. |
|
679
|
0
|
|
|
|
|
|
foreach my $r(@{$self->{regList}}) { |
|
|
0
|
|
|
|
|
|
|
|
680
|
0
|
|
|
|
|
|
push @p1, $r->{pval}; |
|
681
|
0
|
|
|
|
|
|
push @p2, $r->{winP}; |
|
682
|
|
|
|
|
|
|
} |
|
683
|
0
|
|
|
|
|
|
my @q1 = padjBH(\@p1, $N); |
|
684
|
0
|
|
|
|
|
|
my @q2 = padjBH(\@p2, $N); |
|
685
|
0
|
|
|
|
|
|
my $i = 0; # iterator for adjusted P-value vector. |
|
686
|
0
|
|
|
|
|
|
foreach my $r(@{$self->{regList}}) { |
|
|
0
|
|
|
|
|
|
|
|
687
|
0
|
|
|
|
|
|
$r->{padj} = $q1[$i]; |
|
688
|
0
|
|
|
|
|
|
$r->{winQ} = $q2[$i++]; |
|
689
|
|
|
|
|
|
|
} |
|
690
|
|
|
|
|
|
|
# Set bool tag. |
|
691
|
0
|
|
|
|
|
|
$self->{adjusted} = 1; |
|
692
|
|
|
|
|
|
|
} |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# Print header line to file handle. |
|
695
|
|
|
|
|
|
|
sub gen_header{ |
|
696
|
0
|
|
|
0
|
|
|
my($self,$hrep) = @_; |
|
697
|
|
|
|
|
|
|
# Print header line. |
|
698
|
0
|
|
|
|
|
|
print $hrep "Chrom\tStart\tEnd\tLength\tTreatment.cnt\tControl.cnt\tTreatment.avg\tControl.avg\tTreatment.enr\tControl.enr\tEvent\tlog2FC\tpval\tpadj\twinSta\twinEnd\twinFC\twinP\twinQ\n"; |
|
699
|
|
|
|
|
|
|
} |
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
# Output all formatted regions. |
|
702
|
|
|
|
|
|
|
sub output{ |
|
703
|
0
|
|
|
0
|
|
|
my($self,$gdt,$h) = @_; |
|
704
|
0
|
0
|
|
|
|
|
if(!$self->{adjusted}) {$self->adjPval();} |
|
|
0
|
|
|
|
|
|
|
|
705
|
0
|
|
|
|
|
|
foreach my $r(@{$self->{regList}}) {$r->print_reg($gdt,$h);} |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
} |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
# Preloaded methods go here. |
|
712
|
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# Autoload methods go after =cut, and are processed by the autosplit program. |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
1; |
|
716
|
|
|
|
|
|
|
__END__ |