line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
1
|
|
|
1
|
|
1008
|
use 5.006; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
126
|
|
2
|
|
|
|
|
|
|
our $VERSION = '1.122'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# Class for a chromatin modification site consisting consecutive significant windows. |
5
|
|
|
|
|
|
|
package diffReps::ChromaModSite; |
6
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
285
|
|
7
|
1
|
|
|
1
|
|
6
|
use MyBioinfo::Common; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
126
|
|
8
|
1
|
|
|
1
|
|
6
|
use MyShortRead::SRBed; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
2252
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @ISA = qw(diffReps::SlideWindow); # Inherited from 'SlideWindow'. |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Constructor. |
13
|
|
|
|
|
|
|
sub new{ |
14
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
15
|
0
|
|
|
|
|
|
my $self = { |
16
|
|
|
|
|
|
|
chrom => undef, |
17
|
|
|
|
|
|
|
start => undef, |
18
|
|
|
|
|
|
|
end => undef, |
19
|
|
|
|
|
|
|
siteCenter => undef, |
20
|
|
|
|
|
|
|
retrieved => 0, # boolean tag: whether count data have been retrieved? |
21
|
|
|
|
|
|
|
tr_cnt => [], # region normalized treatment counts. |
22
|
|
|
|
|
|
|
co_cnt => [], # region normalized control counts. |
23
|
|
|
|
|
|
|
tr_cnt_ => [], # normalized treatment counts using #reads. |
24
|
|
|
|
|
|
|
co_cnt_ => [], # normalized control counts using #reads. |
25
|
|
|
|
|
|
|
btr_cnt => [], # normalized background treatment counts. |
26
|
|
|
|
|
|
|
bco_cnt => [], # normalized background control counts. |
27
|
|
|
|
|
|
|
#trEnr => undef, # treatment enrichment vs. background. |
28
|
|
|
|
|
|
|
#coEnr => undef, # control enrichment vs. background. |
29
|
|
|
|
|
|
|
rsumTr => undef, # sum of raw treatment counts. |
30
|
|
|
|
|
|
|
rsumCo => undef, # sum of raw control counts. |
31
|
|
|
|
|
|
|
dirn => undef, # change direction: 'Up' or 'Down'. |
32
|
|
|
|
|
|
|
logFC => undef, # log2 fold change. |
33
|
|
|
|
|
|
|
pval => undef, # P-value. |
34
|
|
|
|
|
|
|
padj => undef, # adjusted P-value. |
35
|
|
|
|
|
|
|
winSta => undef, # best window start. |
36
|
|
|
|
|
|
|
#winTr => [], # window normalized treatment counts. |
37
|
|
|
|
|
|
|
#winCo => [], # window normalized control counts. |
38
|
|
|
|
|
|
|
winFC => undef, # window log2 fold change. |
39
|
|
|
|
|
|
|
winP => undef, # window P-value. |
40
|
|
|
|
|
|
|
winQ => undef # window adjusted P-value. |
41
|
|
|
|
|
|
|
}; |
42
|
0
|
|
|
|
|
|
bless $self, $class; |
43
|
0
|
|
|
|
|
|
return $self; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Copy member data into an anonymous hash table. |
47
|
|
|
|
|
|
|
sub dcopy{ |
48
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
49
|
0
|
|
|
|
|
|
my $new = {}; |
50
|
0
|
|
|
|
|
|
%{$new} = %{$self}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Deep copy array members that are array variables. |
52
|
0
|
|
|
|
|
|
$new->{tr_cnt} = []; @{$new->{tr_cnt}} = @{$self->{tr_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
53
|
0
|
|
|
|
|
|
$new->{co_cnt} = []; @{$new->{co_cnt}} = @{$self->{co_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
54
|
0
|
|
|
|
|
|
$new->{tr_cnt_} = []; @{$new->{tr_cnt_}} = @{$self->{tr_cnt_}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
$new->{co_cnt_} = []; @{$new->{co_cnt_}} = @{$self->{co_cnt_}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
56
|
0
|
|
|
|
|
|
$new->{btr_cnt} = []; @{$new->{btr_cnt}} = @{$self->{btr_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
$new->{bco_cnt} = []; @{$new->{bco_cnt}} = @{$self->{bco_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
#$new->{winTr} = []; @{$new->{winTr}} = @{$self->{winTr}}; |
59
|
|
|
|
|
|
|
#$new->{winCo} = []; @{$new->{winCo}} = @{$self->{winCo}}; |
60
|
0
|
|
|
|
|
|
bless $new, 'diffReps::ChromaModSite'; |
61
|
0
|
|
|
|
|
|
return $new; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# Initialize a region by a significant window. |
65
|
|
|
|
|
|
|
sub init{ |
66
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$rwin) = @_; |
67
|
0
|
|
|
|
|
|
$self->{chrom} = $rwin->{chrom}; |
68
|
0
|
|
|
|
|
|
$self->{start} = $rwin->{winN}*$gdt->{step} + 1; |
69
|
0
|
|
|
|
|
|
$self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize}; |
70
|
0
|
|
|
|
|
|
$self->{siteCenter} = ($self->{start} + $self->{end}) /2; |
71
|
0
|
|
|
|
|
|
$self->{retrieved} = 1; |
72
|
0
|
|
|
|
|
|
@{$self->{tr_cnt}} = @{$rwin->{tr_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
@{$self->{co_cnt}} = @{$rwin->{co_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
74
|
0
|
|
|
|
|
|
@{$self->{tr_cnt_}} = @{$rwin->{tr_cnt_}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
75
|
0
|
|
|
|
|
|
@{$self->{co_cnt_}} = @{$rwin->{co_cnt_}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @{$rwin->{btr_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @{$rwin->{bco_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
$self->{rsumTr} = $rwin->{rsumTr}; |
79
|
0
|
|
|
|
|
|
$self->{rsumCo} = $rwin->{rsumCo}; |
80
|
0
|
|
|
|
|
|
$self->{dirn} = $rwin->{dirn}; |
81
|
0
|
|
|
|
|
|
$self->{logFC} = $rwin->{logFC}; |
82
|
0
|
|
|
|
|
|
$self->{pval} = $rwin->{pval}; |
83
|
0
|
|
|
|
|
|
$self->{winSta} = $self->{start}; |
84
|
|
|
|
|
|
|
#@{$self->{winTr}} = @{$rwin->{tr_cnt}}; |
85
|
|
|
|
|
|
|
#@{$self->{winCo}} = @{$rwin->{co_cnt}}; |
86
|
0
|
|
|
|
|
|
$self->{winFC} = $rwin->{logFC}; |
87
|
0
|
|
|
|
|
|
$self->{winP} = $rwin->{pval}; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Expand the genomic coordinates. Replace best window if necessary. |
91
|
|
|
|
|
|
|
# Do not retrieve counts and sums to save some computation. |
92
|
|
|
|
|
|
|
sub expand{ |
93
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$rwin) = @_; |
94
|
0
|
|
|
|
|
|
$self->{end} = $rwin->{winN}*$gdt->{step} + $gdt->{winSize}; # new region end. |
95
|
0
|
|
|
|
|
|
$self->{siteCenter} = ($self->{start} + $self->{end}) /2; |
96
|
|
|
|
|
|
|
# Invalidate previous diff results. |
97
|
0
|
|
|
|
|
|
$self->reset_content(1); # reset everything but keep direction. |
98
|
|
|
|
|
|
|
# $self->{tr_cnt} = []; |
99
|
|
|
|
|
|
|
# $self->{co_cnt} = []; |
100
|
|
|
|
|
|
|
# $self->{rsumTr} = undef; |
101
|
|
|
|
|
|
|
# $self->{rsumCo} = undef; |
102
|
|
|
|
|
|
|
# $self->{logFC} = undef; |
103
|
|
|
|
|
|
|
# Replace best window if necessary. |
104
|
0
|
0
|
|
|
|
|
if($rwin->{pval} < $self->{winP}){ |
105
|
0
|
|
|
|
|
|
$self->{winSta} = $rwin->{winN}*$gdt->{step} + 1; |
106
|
|
|
|
|
|
|
#@{$self->{winTr}} = @{$rwin->{tr_cnt}}; |
107
|
|
|
|
|
|
|
#@{$self->{winCo}} = @{$rwin->{co_cnt}}; |
108
|
0
|
|
|
|
|
|
$self->{winFC} = $rwin->{logFC}; |
109
|
0
|
|
|
|
|
|
$self->{winP} = $rwin->{pval}; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# Print a formatted record of the current region. Do nothing if the P-val is not defined. |
114
|
|
|
|
|
|
|
sub print_reg{ |
115
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$h) = @_; |
116
|
0
|
0
|
|
|
|
|
if(defined $self->{pval}){ |
117
|
|
|
|
|
|
|
# Concatenate treatment and control counts for output. |
118
|
0
|
|
|
|
|
|
my @tr_cnt_fmt = fprecision(2, @{$self->{tr_cnt}}); |
|
0
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my @co_cnt_fmt = fprecision(2, @{$self->{co_cnt}}); |
|
0
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
my $tr_cnt_str = join(';', @tr_cnt_fmt); |
121
|
0
|
|
|
|
|
|
my $co_cnt_str = join(';', @co_cnt_fmt); |
122
|
0
|
|
|
|
|
|
my($tr_bkg_enr, $co_bkg_enr); |
123
|
0
|
0
|
|
|
|
|
if($gdt->{bkgEnr}){ |
124
|
0
|
|
|
|
|
|
my $btr_m = mean(@{$self->{btr_cnt}}); |
|
0
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
my $bco_m = mean(@{$self->{bco_cnt}}); |
|
0
|
|
|
|
|
|
|
126
|
0
|
0
|
|
|
|
|
$tr_bkg_enr = $btr_m > 0? fprecision(2, mean(@{$self->{tr_cnt_}}) / $btr_m) : INFINITE; |
|
0
|
|
|
|
|
|
|
127
|
0
|
0
|
|
|
|
|
$co_bkg_enr = $bco_m > 0? fprecision(2, mean(@{$self->{co_cnt_}}) / $bco_m) : INFINITE; |
|
0
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
}else{ |
129
|
0
|
|
|
|
|
|
$tr_bkg_enr = 'NA'; |
130
|
0
|
|
|
|
|
|
$co_bkg_enr = 'NA'; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
# Output: chrom,start,end,length,(treatment read count),(control read count),direction,logFC,pval,padj, |
133
|
|
|
|
|
|
|
# (window start),(window end),(window logFC),(window pval),(window padj). |
134
|
0
|
|
|
|
|
|
print $h join("\t", ($self->{chrom}, |
135
|
|
|
|
|
|
|
$self->{start}, $self->{end}, $self->{end}-$self->{start}+1, # region coordinates. |
136
|
|
|
|
|
|
|
$tr_cnt_str, $co_cnt_str, # formatted read counts. |
137
|
0
|
|
|
|
|
|
fprecision(2, &mean(@{$self->{tr_cnt}})), fprecision(2, &mean(@{$self->{co_cnt}})), # avg read count. |
|
0
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
$tr_bkg_enr, $co_bkg_enr, # enrichment vs. background. |
139
|
|
|
|
|
|
|
$self->{dirn}, fprecision(2,$self->{logFC}), # change direction and logFC. |
140
|
|
|
|
|
|
|
$self->{pval}, $self->{padj}, # diff analysis info. |
141
|
|
|
|
|
|
|
$self->{winSta}, $self->{winSta}+$gdt->{winSize}-1, # best window coordinates. |
142
|
|
|
|
|
|
|
fprecision(2,$self->{winFC}), |
143
|
|
|
|
|
|
|
$self->{winP}, $self->{winQ})), "\n"; # best window diff info. |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Do we need to perform stat test for the region? |
148
|
|
|
|
|
|
|
# If the region contains only one window, this is already done. |
149
|
|
|
|
|
|
|
sub needStat{ |
150
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
151
|
0
|
|
0
|
|
|
|
return (defined($self->{winP}) && !defined($self->{pval})); |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# Retrieve normalized read counts for the region. Override parent function. |
155
|
|
|
|
|
|
|
sub retrieve_norm_cnt{ |
156
|
0
|
|
|
0
|
0
|
|
my($self,$gdt) = @_; |
157
|
0
|
|
|
|
|
|
my @atr_cnt; # array treatment counts. |
158
|
0
|
|
|
|
|
|
my $i = 0; # iterator for normalization constants. |
159
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
0
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
push @atr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) |
161
|
|
|
|
|
|
|
/ $gdt->{normTr}[$i++]); |
162
|
|
|
|
|
|
|
} |
163
|
0
|
|
|
|
|
|
my @aco_cnt; # array control counts. |
164
|
0
|
|
|
|
|
|
$i = 0; |
165
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
0
|
|
|
|
|
|
|
166
|
0
|
|
|
|
|
|
push @aco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) |
167
|
|
|
|
|
|
|
/ $gdt->{normCo}[$i++]); |
168
|
|
|
|
|
|
|
} |
169
|
0
|
|
|
|
|
|
@{$self->{tr_cnt}} = @atr_cnt; |
|
0
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
@{$self->{co_cnt}} = @aco_cnt; |
|
0
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Retrieve norm count for background if needed. |
172
|
0
|
0
|
|
|
|
|
if($gdt->{bkgEnr}){ |
173
|
0
|
|
|
|
|
|
my @atr_cnt_; # array treatment counts. |
174
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
175
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
0
|
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
push @atr_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normTr_}->[$i++]); |
177
|
|
|
|
|
|
|
} |
178
|
0
|
|
|
|
|
|
@{$self->{tr_cnt_}} = @atr_cnt_; |
|
0
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
|
my @aco_cnt_; # array treatment counts. |
180
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
181
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
0
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
push @aco_cnt_, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normCo_}->[$i++]); |
183
|
|
|
|
|
|
|
} |
184
|
0
|
|
|
|
|
|
@{$self->{co_cnt_}} = @aco_cnt_; |
|
0
|
|
|
|
|
|
|
185
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBtr}}){ |
|
0
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
my @abtr_cnt; # array background treatment counts. |
187
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
188
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBtr}}) { |
|
0
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
push @abtr_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBtr}->[$i++]); |
190
|
|
|
|
|
|
|
} |
191
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @abtr_cnt; |
|
0
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
} |
193
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBco}}){ |
|
0
|
|
|
|
|
|
|
194
|
0
|
|
|
|
|
|
my @abco_cnt; # array background treatment counts. |
195
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
196
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBco}}) { |
|
0
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
|
push @abco_cnt, ($b->get_region_count($self->{chrom}, $self->{start}, $self->{end}) / $gdt->{normBco}->[$i++]); |
198
|
|
|
|
|
|
|
} |
199
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @abco_cnt; |
|
0
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
} |
201
|
0
|
0
|
|
|
|
|
if(@{$self->{btr_cnt}} == 0){ |
|
0
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @{$self->{bco_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
} |
204
|
0
|
0
|
|
|
|
|
if(@{$self->{bco_cnt}} == 0){ |
|
0
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @{$self->{btr_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
# Retrieve sum of raw read counts for the region. Override parent function. |
211
|
|
|
|
|
|
|
sub retrieve_raw_sum{ |
212
|
0
|
|
|
0
|
0
|
|
my($self,$gdt) = @_; |
213
|
0
|
|
|
|
|
|
$self->{rsumTr} = 0; |
214
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
0
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
$self->{rsumTr} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end}); |
216
|
|
|
|
|
|
|
} |
217
|
0
|
|
|
|
|
|
$self->{rsumCo} = 0; |
218
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
0
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
|
$self->{rsumCo} += $b->get_region_count($self->{chrom}, $self->{start}, $self->{end}); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# Determine whether a window is within gap. |
224
|
|
|
|
|
|
|
sub isWithinGap{ |
225
|
0
|
|
|
0
|
0
|
|
my($self,$gdt,$rwin) = @_; |
226
|
0
|
0
|
|
|
|
|
return 0 if !defined $self->{chrom}; |
227
|
0
|
|
|
|
|
|
my $winSta = $rwin->{winN}*$gdt->{step}+1; |
228
|
0
|
|
0
|
|
|
|
return ($winSta - $self->{end}-1 <= $gdt->{gap} and $rwin->{dirn} eq $self->{dirn}); |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
# Class for a sliding window of fixed size. |
233
|
|
|
|
|
|
|
package diffReps::SlideWindow; |
234
|
|
|
|
|
|
|
|
235
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
37
|
|
236
|
1
|
|
|
1
|
|
1021
|
use Statistics::TTest; |
|
1
|
|
|
|
|
41125
|
|
|
1
|
|
|
|
|
33
|
|
237
|
1
|
|
|
1
|
|
741
|
use MyBioinfo::NBTest; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
60
|
|
238
|
1
|
|
|
1
|
|
7
|
use MyBioinfo::Common; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
104
|
|
239
|
1
|
|
|
1
|
|
1058
|
use MyBioinfo::Math qw( gtest_gof chisqtest_gof ); |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
82
|
|
240
|
1
|
|
|
1
|
|
8
|
use MyShortRead::SRBed; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2758
|
|
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# Constructor. |
243
|
|
|
|
|
|
|
sub new{ |
244
|
0
|
|
|
0
|
|
|
my $class = shift; |
245
|
0
|
|
|
|
|
|
my $self = { |
246
|
|
|
|
|
|
|
chrom => undef, |
247
|
|
|
|
|
|
|
winN => undef, # window number to retrieve count from SRBed class. |
248
|
|
|
|
|
|
|
maxN => undef, # maximum window number. |
249
|
|
|
|
|
|
|
retrieved => 0, # boolean tag: whether count data have been retrieved? |
250
|
|
|
|
|
|
|
tr_cnt => [], # normalized treatment counts. |
251
|
|
|
|
|
|
|
co_cnt => [], # normalized control counts. |
252
|
|
|
|
|
|
|
tr_cnt_ => [], # normalized treatment counts using #reads. |
253
|
|
|
|
|
|
|
co_cnt_ => [], # normalized control counts using #reads. |
254
|
|
|
|
|
|
|
btr_cnt => [], # normalized background treatment counts. |
255
|
|
|
|
|
|
|
bco_cnt => [], # normalized background control counts. |
256
|
|
|
|
|
|
|
rsumTr => undef, # sum of raw treatment counts. |
257
|
|
|
|
|
|
|
rsumCo => undef, # sum of raw control counts. |
258
|
|
|
|
|
|
|
dirn => undef, # change direction: 'Up' or 'Down'. |
259
|
|
|
|
|
|
|
logFC => undef, # log2 fold change. |
260
|
|
|
|
|
|
|
pval => undef # P-value. |
261
|
|
|
|
|
|
|
}; |
262
|
0
|
|
|
|
|
|
bless $self, $class; |
263
|
0
|
|
|
|
|
|
return $self; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# Set the current chromosome name, max window number and reset everything. |
267
|
|
|
|
|
|
|
# Obtain counts and sums for the first window. |
268
|
|
|
|
|
|
|
sub set_chrom{ |
269
|
0
|
|
|
0
|
|
|
my($self,$gdt,$chrom,$maxN) = @_; |
270
|
0
|
|
|
|
|
|
$self->{chrom} = $chrom; |
271
|
0
|
|
|
|
|
|
$self->{maxN} = $maxN; |
272
|
0
|
|
|
|
|
|
$self->{winN} = 0; |
273
|
0
|
|
|
|
|
|
$self->reset_content; |
274
|
|
|
|
|
|
|
#$self->{retrieved} = 0; |
275
|
|
|
|
|
|
|
# $self->retrieve_norm_cnt($gdt); |
276
|
|
|
|
|
|
|
# $self->retrieve_raw_sum($gdt); |
277
|
|
|
|
|
|
|
#$self->{trEnr} = undef; |
278
|
|
|
|
|
|
|
#$self->{coEnr} = undef; |
279
|
|
|
|
|
|
|
# $self->{dirn} = undef; # change direction: 'Up' or 'Down'. |
280
|
|
|
|
|
|
|
# $self->{logFC} = undef; # log2 fold change. |
281
|
|
|
|
|
|
|
# $self->{pval} = undef; # P-value. |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# Reset window number. |
285
|
|
|
|
|
|
|
sub reset_winN{ |
286
|
0
|
|
|
0
|
|
|
my $self = shift; |
287
|
0
|
|
|
|
|
|
$self->{winN} = 0; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# Reset window/region content. |
291
|
|
|
|
|
|
|
sub reset_content{ |
292
|
0
|
|
|
0
|
|
|
my($self,$keepdi) = @_; |
293
|
0
|
|
|
|
|
|
$self->{retrieved} = 0; # boolean tag: whether count data have been retrieved? |
294
|
0
|
|
|
|
|
|
$self->{tr_cnt} = []; # normalized treatment counts. |
295
|
0
|
|
|
|
|
|
$self->{co_cnt} = []; # normalized control counts. |
296
|
0
|
|
|
|
|
|
$self->{tr_cnt_} = []; # normalized treatment counts using #reads. |
297
|
0
|
|
|
|
|
|
$self->{co_cnt_} = []; # normalized control counts using #reads. |
298
|
0
|
|
|
|
|
|
$self->{btr_cnt} = []; # normalized background treatment counts. |
299
|
0
|
|
|
|
|
|
$self->{bco_cnt} = []; # normalized background control counts. |
300
|
0
|
|
|
|
|
|
$self->{rsumTr} = undef; # sum of raw treatment counts. |
301
|
0
|
|
|
|
|
|
$self->{rsumCo} = undef; # sum of raw control counts. |
302
|
0
|
0
|
|
|
|
|
$self->{dirn} = undef unless $keepdi; # change direction: 'Up' or 'Down'. |
303
|
0
|
|
|
|
|
|
$self->{logFC} = undef; # log2 fold change. |
304
|
0
|
|
|
|
|
|
$self->{pval} = undef; # P-value. |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# Retrieve norm/raw count given current window/region location. |
308
|
|
|
|
|
|
|
sub retrieve_new_cnt{ |
309
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
310
|
0
|
|
|
|
|
|
$self->{retrieved} = 1; |
311
|
0
|
|
|
|
|
|
$self->retrieve_norm_cnt($gdt); |
312
|
0
|
|
|
|
|
|
$self->retrieve_raw_sum($gdt); |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Shift the window one position to the right and obtain new counts and sums. |
316
|
|
|
|
|
|
|
sub move_forward{ |
317
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
318
|
0
|
0
|
|
|
|
|
if($self->{winN} <= $self->{maxN}-1) { |
|
0
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
|
$self->{winN}++; |
320
|
0
|
|
|
|
|
|
$self->reset_content; |
321
|
|
|
|
|
|
|
# $self->retrieve_norm_cnt($gdt); |
322
|
|
|
|
|
|
|
# $self->retrieve_raw_sum($gdt); |
323
|
|
|
|
|
|
|
#$self->{trEnr} = undef; |
324
|
|
|
|
|
|
|
#$self->{coEnr} = undef; |
325
|
|
|
|
|
|
|
#$self->{dirn} = undef; # change direction: 'Up' or 'Down'. |
326
|
|
|
|
|
|
|
#$self->{logFC} = undef; # log2 fold change. |
327
|
|
|
|
|
|
|
#$self->{pval} = undef; # P-value. |
328
|
0
|
|
|
|
|
|
return $self->{winN}; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
# elsif($self->{winN} == $self->{maxN}-1){ |
331
|
|
|
|
|
|
|
# $self->{winN} = $self->{maxN}; |
332
|
|
|
|
|
|
|
# $self->{tr_cnt} = []; |
333
|
|
|
|
|
|
|
# $self->{co_cnt} = []; |
334
|
|
|
|
|
|
|
# $self->{tr_cnt_} = []; |
335
|
|
|
|
|
|
|
# $self->{co_cnt_} = []; |
336
|
|
|
|
|
|
|
# $self->{btr_cnt} = []; |
337
|
|
|
|
|
|
|
# $self->{bco_cnt} = []; |
338
|
|
|
|
|
|
|
# #$self->{trEnr} = undef; |
339
|
|
|
|
|
|
|
# #$self->{coEnr} = undef; |
340
|
|
|
|
|
|
|
# $self->{dirn} = undef; # change direction: 'Up' or 'Down'. |
341
|
|
|
|
|
|
|
# $self->{logFC} = undef; # log2 fold change. |
342
|
|
|
|
|
|
|
# $self->{pval} = undef; # P-value. |
343
|
|
|
|
|
|
|
# return $self->{winN}; |
344
|
|
|
|
|
|
|
# } |
345
|
|
|
|
|
|
|
else {return $self->{maxN};} |
346
|
|
|
|
|
|
|
# Max valid window index should be maxN-1. |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
# Return current window number. |
350
|
|
|
|
|
|
|
sub get_winN{ |
351
|
0
|
|
|
0
|
|
|
my $self = shift; |
352
|
0
|
0
|
|
|
|
|
if($self->{winN} < $self->{maxN}) {return $self->{winN};} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
else {return -1;} |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# Return chromosome name. |
357
|
|
|
|
|
|
|
sub get_chrom{ |
358
|
0
|
|
|
0
|
|
|
my $self = shift; |
359
|
0
|
|
|
|
|
|
return $self->{chrom}; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# Get normalized read counts for external procedures. |
363
|
|
|
|
|
|
|
sub get_cnt_tr{ |
364
|
0
|
|
|
0
|
|
|
my $self = shift; |
365
|
0
|
|
|
|
|
|
return @{$self->{tr_cnt}}; |
|
0
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
sub get_cnt_co{ |
368
|
0
|
|
|
0
|
|
|
my $self = shift; |
369
|
0
|
|
|
|
|
|
return @{$self->{co_cnt}}; |
|
0
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
sub get_cnt_tr_{ |
372
|
0
|
|
|
0
|
|
|
my $self = shift; |
373
|
0
|
|
|
|
|
|
return @{$self->{tr_cnt_}}; |
|
0
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
sub get_cnt_co_{ |
376
|
0
|
|
|
0
|
|
|
my $self = shift; |
377
|
0
|
|
|
|
|
|
return @{$self->{co_cnt_}}; |
|
0
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
sub get_cnt_btr{ |
380
|
0
|
|
|
0
|
|
|
my $self = shift; |
381
|
0
|
|
|
|
|
|
return @{$self->{btr_cnt}}; |
|
0
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
sub get_cnt_bco{ |
384
|
0
|
|
|
0
|
|
|
my $self = shift; |
385
|
0
|
|
|
|
|
|
return @{$self->{bco_cnt}}; |
|
0
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# Get differential analysis info. |
389
|
|
|
|
|
|
|
sub get_diff_info{ |
390
|
0
|
|
|
0
|
|
|
my $self = shift; |
391
|
|
|
|
|
|
|
return { |
392
|
0
|
|
|
|
|
|
dirn => $self->{dirn}, |
393
|
|
|
|
|
|
|
logFC => $self->{logFC}, |
394
|
|
|
|
|
|
|
pval => $self->{pval} |
395
|
|
|
|
|
|
|
}; # anonymous hash. |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# Retrieve normalized read counts by current window number from SRBed objects. |
399
|
|
|
|
|
|
|
sub retrieve_norm_cnt{ |
400
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
401
|
0
|
|
|
|
|
|
my @atr_cnt; # array treatment counts. |
402
|
0
|
|
|
|
|
|
my $i = 0; # iterator for normalization constants. |
403
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
0
|
|
|
|
|
|
|
404
|
0
|
|
|
|
|
|
push @atr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr}->[$i++]); |
405
|
|
|
|
|
|
|
} |
406
|
0
|
|
|
|
|
|
@{$self->{tr_cnt}} = @atr_cnt; |
|
0
|
|
|
|
|
|
|
407
|
0
|
|
|
|
|
|
my @aco_cnt; # array control counts. |
408
|
0
|
|
|
|
|
|
$i = 0; |
409
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
0
|
|
|
|
|
|
|
410
|
0
|
|
|
|
|
|
push @aco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo}->[$i++]); |
411
|
|
|
|
|
|
|
} |
412
|
0
|
|
|
|
|
|
@{$self->{co_cnt}} = @aco_cnt; |
|
0
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# Retrieve norm count for background if needed. |
414
|
0
|
0
|
|
|
|
|
if($gdt->{bkgEnr}){ |
415
|
0
|
|
|
|
|
|
my @atr_cnt_; # array treatment counts. |
416
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
417
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
0
|
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
push @atr_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normTr_}->[$i++]); |
419
|
|
|
|
|
|
|
} |
420
|
0
|
|
|
|
|
|
@{$self->{tr_cnt_}} = @atr_cnt_; |
|
0
|
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
my @aco_cnt_; # array treatment counts. |
422
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
423
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
0
|
|
|
|
|
|
|
424
|
0
|
|
|
|
|
|
push @aco_cnt_, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normCo_}->[$i++]); |
425
|
|
|
|
|
|
|
} |
426
|
0
|
|
|
|
|
|
@{$self->{co_cnt_}} = @aco_cnt_; |
|
0
|
|
|
|
|
|
|
427
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBtr}}){ |
|
0
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
|
my @abtr_cnt; # array background treatment counts. |
429
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
430
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBtr}}) { |
|
0
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
|
push @abtr_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBtr}->[$i++]); |
432
|
|
|
|
|
|
|
} |
433
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @abtr_cnt; |
|
0
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
} |
435
|
0
|
0
|
|
|
|
|
if(@{$gdt->{bedBco}}){ |
|
0
|
|
|
|
|
|
|
436
|
0
|
|
|
|
|
|
my @abco_cnt; # array background treatment counts. |
437
|
0
|
|
|
|
|
|
$i = 0; # iterator for normalization constants. |
438
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedBco}}) { |
|
0
|
|
|
|
|
|
|
439
|
0
|
|
|
|
|
|
push @abco_cnt, ($b->get_win_count($self->{chrom}, $self->{winN}) / $gdt->{normBco}->[$i++]); |
440
|
|
|
|
|
|
|
} |
441
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @abco_cnt; |
|
0
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
} |
443
|
0
|
0
|
|
|
|
|
if(@{$self->{btr_cnt}} == 0){ |
|
0
|
|
|
|
|
|
|
444
|
0
|
|
|
|
|
|
@{$self->{btr_cnt}} = @{$self->{bco_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
} |
446
|
0
|
0
|
|
|
|
|
if(@{$self->{bco_cnt}} == 0){ |
|
0
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
|
@{$self->{bco_cnt}} = @{$self->{btr_cnt}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
# Retrieve sum of raw read counts by current window number from SRBed objects. |
453
|
|
|
|
|
|
|
sub retrieve_raw_sum{ |
454
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
455
|
0
|
|
|
|
|
|
$self->{rsumTr} = 0; |
456
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}) { |
|
0
|
|
|
|
|
|
|
457
|
0
|
|
|
|
|
|
$self->{rsumTr} += $b->get_win_count($self->{chrom}, $self->{winN}); |
458
|
|
|
|
|
|
|
} |
459
|
0
|
|
|
|
|
|
$self->{rsumCo} = 0; |
460
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}) { |
|
0
|
|
|
|
|
|
|
461
|
0
|
|
|
|
|
|
$self->{rsumCo} += $b->get_win_count($self->{chrom}, $self->{winN}); |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
# Determine whether current bin counts are above cutoff or not. |
466
|
|
|
|
|
|
|
sub isAboveCutoff{ |
467
|
0
|
|
|
0
|
|
|
my($self,$gdt) = @_; |
468
|
0
|
|
|
|
|
|
die "Empty SRBed vectors encountered in function: isAboveCutoff.\n" |
469
|
0
|
0
|
0
|
|
|
|
if @{$gdt->{bedTr}} == 0 or @{$gdt->{bedCo}} == 0; |
|
0
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
|
my $tagTr = 1; |
471
|
0
|
|
|
|
|
|
my $tagCo = 1; |
472
|
0
|
0
|
0
|
|
|
|
if($gdt->{bkgEnr} and $gdt->{useBkg} > 0){ # use background as filter. |
473
|
0
|
0
|
|
|
|
|
if(!$self->{retrieved}){ # retrieve new count if not done so. |
474
|
0
|
|
|
|
|
|
$self->retrieve_new_cnt($gdt); |
475
|
|
|
|
|
|
|
} |
476
|
0
|
|
|
|
|
|
my $btr_m = mean(@{$self->{btr_cnt}}); |
|
0
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
|
my $bco_m = mean(@{$self->{bco_cnt}}); |
|
0
|
|
|
|
|
|
|
478
|
0
|
|
|
|
|
|
foreach my $c(@{$self->{tr_cnt_}}){ |
|
0
|
|
|
|
|
|
|
479
|
0
|
0
|
0
|
|
|
|
if(!$c or ($c and $btr_m and $c / $btr_m <= $gdt->{useBkg})){ |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
480
|
0
|
|
|
|
|
|
$tagTr = 0; |
481
|
0
|
|
|
|
|
|
last; |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
} |
484
|
0
|
0
|
|
|
|
|
if(!$tagTr){ |
485
|
0
|
|
|
|
|
|
foreach my $c(@{$self->{co_cnt_}}){ |
|
0
|
|
|
|
|
|
|
486
|
0
|
0
|
0
|
|
|
|
if(!$c or ($c and $bco_m and $c / $bco_m <= $gdt->{useBkg})){ |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
487
|
0
|
|
|
|
|
|
$tagCo = 0; |
488
|
0
|
|
|
|
|
|
last; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
}else{ # use raw count and equation as filter. |
493
|
0
|
|
|
|
|
|
my $i = 0; |
494
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedTr}}){ |
|
0
|
|
|
|
|
|
|
495
|
0
|
|
|
|
|
|
my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN}); |
496
|
0
|
0
|
0
|
|
|
|
if(!$rawcnt or $rawcnt <= $gdt->{meanTr}[$i] + $gdt->{nsd}*$gdt->{stdTr}[$i]){ |
497
|
0
|
|
|
|
|
|
$tagTr = 0; |
498
|
0
|
|
|
|
|
|
last; |
499
|
|
|
|
|
|
|
} |
500
|
0
|
|
|
|
|
|
$i++; |
501
|
|
|
|
|
|
|
} |
502
|
0
|
0
|
|
|
|
|
if(!$tagTr){ |
503
|
0
|
|
|
|
|
|
$i = 0; |
504
|
0
|
|
|
|
|
|
foreach my $b(@{$gdt->{bedCo}}){ |
|
0
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
|
my $rawcnt = $b->get_win_count($self->{chrom}, $self->{winN}); |
506
|
0
|
0
|
0
|
|
|
|
if(!$rawcnt or $rawcnt <= $gdt->{meanCo}[$i] + $gdt->{nsd}*$gdt->{stdCo}[$i]){ |
507
|
0
|
|
|
|
|
|
$tagCo = 0; |
508
|
0
|
|
|
|
|
|
last; |
509
|
|
|
|
|
|
|
} |
510
|
0
|
|
|
|
|
|
$i++; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
} |
514
|
0
|
|
0
|
|
|
|
return ($tagTr || $tagCo); |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# Calculate statistical scores based on current window counts. |
518
|
|
|
|
|
|
|
sub calc_stat{ |
519
|
0
|
|
|
0
|
|
|
my($self,$gdt,$meth,$eps) = @_; |
520
|
|
|
|
|
|
|
#return $self->{pval} if defined $self->{pval}; # no duplicated calculation please. |
521
|
0
|
0
|
|
|
|
|
return 1 if !defined($self->{chrom}); # do nothing if the region is not defined. |
522
|
0
|
0
|
|
|
|
|
if(!$self->{retrieved}){ # retrieve new count if not done so. |
523
|
0
|
|
|
|
|
|
$self->retrieve_new_cnt($gdt); |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
# Check whether counts and sums are ready for stat test. |
526
|
0
|
|
|
|
|
|
die "Empty count vectors encountered when doing stat test.\n" |
527
|
0
|
0
|
0
|
|
|
|
if @{$self->{tr_cnt}} == 0 or @{$self->{co_cnt}} == 0; |
|
0
|
|
|
|
|
|
|
528
|
0
|
0
|
0
|
|
|
|
die "Undefined raw sums for treatment or control when doing stat test.\n" |
529
|
|
|
|
|
|
|
if !defined($self->{rsumTr}) or !defined($self->{rsumCo}); |
530
|
|
|
|
|
|
|
# Noramlized mean count for each group. |
531
|
0
|
|
|
|
|
|
my $tr_m = mean(@{$self->{tr_cnt}}); |
|
0
|
|
|
|
|
|
|
532
|
0
|
|
|
|
|
|
my $co_m = mean(@{$self->{co_cnt}}); |
|
0
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# Special case: zero count for both groups. |
534
|
0
|
0
|
0
|
|
|
|
if($tr_m == 0 and $co_m == 0) { |
535
|
0
|
|
|
|
|
|
$self->{logFC} = 0; |
536
|
0
|
|
|
|
|
|
$self->{dirn} = 'Nons'; |
537
|
0
|
|
|
|
|
|
$self->{pval} = 1; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
else { |
540
|
0
|
0
|
|
|
|
|
if($co_m == 0) {$self->{logFC} = INFINITE;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
else {$self->{logFC} = log2($tr_m/$co_m);} |
542
|
0
|
0
|
|
|
|
|
$self->{dirn} = $self->{logFC} > 0? 'Up' : 'Down'; |
543
|
|
|
|
|
|
|
# Perform statistical tests to find P-value. |
544
|
0
|
0
|
0
|
|
|
|
if($meth eq 'tt'){ # T-test. |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
545
|
0
|
|
|
|
|
|
my $ttest = new Statistics::TTest; |
546
|
0
|
|
|
|
|
|
$ttest->load_data(\@{$self->{tr_cnt}}, \@{$self->{co_cnt}}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
547
|
0
|
|
|
|
|
|
$self->{pval} = $ttest->t_prob; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
elsif($meth eq 'nb'){ # Negative Binomial test. |
550
|
0
|
|
|
|
|
|
my $tr_var = var(@{$self->{tr_cnt}}); |
|
0
|
|
|
|
|
|
|
551
|
0
|
|
|
|
|
|
my $co_var = var(@{$self->{co_cnt}}); |
|
0
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
|
553
|
0
|
|
|
|
|
|
my $tr_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normTr}); |
554
|
0
|
|
|
|
|
|
my $co_raw_m = raw_sum_mean($tr_m,$co_m,$gdt->{normCo}); |
555
|
0
|
|
|
|
|
|
my $tr_raw_v = raw_sum_var($tr_var,$tr_m,$co_m,$gdt->{normTr},$tr_raw_m,$eps); |
556
|
0
|
|
|
|
|
|
my $co_raw_v = raw_sum_var($co_var,$tr_m,$co_m,$gdt->{normCo},$co_raw_m,$eps); |
557
|
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
|
$self->{pval} = nb_pval($self->{rsumTr},$self->{rsumCo},$tr_raw_m,$tr_raw_v,$co_raw_m,$co_raw_v,$eps); |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
elsif($meth eq 'gt' or $meth eq 'cs'){ |
561
|
0
|
|
|
|
|
|
my $tr_sum = sum(@{$self->{tr_cnt}}); |
|
0
|
|
|
|
|
|
|
562
|
0
|
|
|
|
|
|
my $co_sum = sum(@{$self->{co_cnt}}); |
|
0
|
|
|
|
|
|
|
563
|
0
|
|
|
|
|
|
my $a_cnt = [$tr_sum, $co_sum]; # ref to array of normalized counts. |
564
|
0
|
|
|
|
|
|
my $tr_rep = scalar @{$self->{tr_cnt}}; |
|
0
|
|
|
|
|
|
|
565
|
0
|
|
|
|
|
|
my $co_rep = scalar @{$self->{co_cnt}}; |
|
0
|
|
|
|
|
|
|
566
|
0
|
|
|
|
|
|
my $tot_rep = $tr_rep + $co_rep; |
567
|
0
|
|
|
|
|
|
my $a_p = [$tr_rep/$tot_rep, $co_rep/$tot_rep]; # ref to array of probabilities. |
568
|
0
|
|
|
|
|
|
my @test_res; # result vector: statistic, DOF, p-value. |
569
|
0
|
0
|
|
|
|
|
if($meth eq 'gt'){ |
570
|
0
|
|
|
|
|
|
@test_res = gtest_gof($a_cnt, $a_p); |
571
|
|
|
|
|
|
|
}else{ |
572
|
0
|
|
|
|
|
|
@test_res = chisqtest_gof($a_cnt, $a_p); |
573
|
|
|
|
|
|
|
} |
574
|
0
|
|
|
|
|
|
$self->{pval} = $test_res[2]; |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
else{ |
577
|
0
|
|
|
|
|
|
die "Undefined statistical test! Halt execution.\n"; |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
} |
580
|
0
|
|
|
|
|
|
return $self->{pval}; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
# Subroutines for NB test. Copied from Common.pm. Written by Ying Jin. |
585
|
|
|
|
|
|
|
# NOTES: In the original diffPermute program, norm constants were multiplied to |
586
|
|
|
|
|
|
|
# raw counts to get normalized values. This only saves ignorable execution time |
587
|
|
|
|
|
|
|
# but causes confusion. They are now reversed, so do the following subroutines. |
588
|
|
|
|
|
|
|
sub raw_sum_mean{ |
589
|
0
|
|
|
0
|
|
|
my ($m1,$m2,$norm_ref)=@_; |
590
|
0
|
|
|
|
|
|
my $v = 0; |
591
|
0
|
|
|
|
|
|
for my $i(@{$norm_ref}){ |
|
0
|
|
|
|
|
|
|
592
|
0
|
|
|
|
|
|
$v += $i; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
0
|
|
|
|
|
|
return $v*($m1+$m2)/2; |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
sub raw_sum_var{ |
598
|
0
|
|
|
0
|
|
|
my ($base_var,$m1,$m2,$norm,$raw_mean,$eps)=@_; |
599
|
0
|
|
|
|
|
|
my $base_mean = ($m1+$m2)/2; |
600
|
0
|
|
|
|
|
|
my $z =0; |
601
|
0
|
|
|
|
|
|
my $s = 0; |
602
|
0
|
|
|
|
|
|
for my $i(@{$norm}){ |
|
0
|
|
|
|
|
|
|
603
|
0
|
|
|
|
|
|
$z += 1/$i; |
604
|
0
|
|
|
|
|
|
$s += $i**2; |
605
|
|
|
|
|
|
|
} |
606
|
0
|
|
|
|
|
|
$z = $z/@{$norm}; |
|
0
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
|
608
|
0
|
|
|
|
|
|
my $var = $base_var - $z * $base_mean; |
609
|
0
|
0
|
|
|
|
|
if($var < $eps*$base_mean){ |
610
|
0
|
|
|
|
|
|
$var = $eps*$base_mean; |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
|
613
|
0
|
|
|
|
|
|
return $var*$s + $raw_mean; |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
################################### |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# Class for managing a list of significant regions: add, adjust P-values, output. |
621
|
|
|
|
|
|
|
package diffReps::RegionList; |
622
|
1
|
|
|
1
|
|
10
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
623
|
1
|
|
|
1
|
|
6
|
use MyBioinfo::Common; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
157
|
|
624
|
1
|
|
|
1
|
|
7
|
use diffReps::DiffRes qw( cmpsite ); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
979
|
|
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
# Constructor. |
627
|
|
|
|
|
|
|
sub new{ |
628
|
0
|
|
|
0
|
|
|
my $class = shift; |
629
|
0
|
|
|
|
|
|
my $self = { |
630
|
|
|
|
|
|
|
regList => [], # region list. |
631
|
|
|
|
|
|
|
adjusted => 0 # bool tag for P-value adjustment. |
632
|
|
|
|
|
|
|
}; |
633
|
0
|
|
|
|
|
|
bless $self, $class; |
634
|
0
|
|
|
|
|
|
return $self; |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
# Add a significant region into list if the P-value is defined. |
638
|
|
|
|
|
|
|
sub add{ |
639
|
0
|
|
|
0
|
|
|
my($self,$rreg) = @_; |
640
|
0
|
0
|
|
|
|
|
if(defined $rreg->{pval}){ |
641
|
0
|
|
|
|
|
|
push @{$self->{regList}}, $rreg->dcopy(); |
|
0
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
# Add a significant region into list if the P-value is defined. |
646
|
|
|
|
|
|
|
# Delete the original region. |
647
|
|
|
|
|
|
|
sub add_del{ |
648
|
0
|
|
|
0
|
|
|
my($self,$rreg) = @_; |
649
|
0
|
0
|
|
|
|
|
if(defined $rreg->{pval}){ |
650
|
0
|
|
|
|
|
|
push @{$self->{regList}}, $rreg->dcopy(); |
|
0
|
|
|
|
|
|
|
651
|
0
|
|
|
|
|
|
%{$rreg} = %{new diffReps::ChromaModSite}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# Append another region list to the end. |
656
|
|
|
|
|
|
|
sub append_list{ |
657
|
0
|
|
|
0
|
|
|
my($self,$rl) = @_; |
658
|
0
|
0
|
|
|
|
|
if(@{$rl->{'regList'}}){ |
|
0
|
|
|
|
|
|
|
659
|
0
|
|
|
|
|
|
push @{$self->{'regList'}}, @{$rl->{'regList'}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
660
|
0
|
|
|
|
|
|
$self->{'adjusted'} = 0; |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
} |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
# Sort all differential sites according to genomic coordinates. |
665
|
|
|
|
|
|
|
sub sort{ |
666
|
0
|
|
|
0
|
|
|
my $self = shift; |
667
|
0
|
|
|
|
|
|
@{$self->{regList}} = sort cmpsite @{$self->{regList}}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
# Adjust P-values for all regions in the list. |
671
|
|
|
|
|
|
|
sub adjPval{ |
672
|
0
|
|
|
0
|
|
|
my $self = shift; |
673
|
0
|
0
|
|
|
|
|
return if $self->{adjusted}; |
674
|
0
|
|
|
|
|
|
my $N; # the 'N' in BH formula. |
675
|
0
|
0
|
|
|
|
|
if(@_ > 0) {$N = shift;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
676
|
0
|
|
|
|
|
|
else {$N = @{$self->{regList}};} |
677
|
|
|
|
|
|
|
# Extract all P-values from the list and feed them to an adjustment procedure. |
678
|
0
|
|
|
|
|
|
my(@p1, @p2); # pair of region and window P-values. |
679
|
0
|
|
|
|
|
|
foreach my $r(@{$self->{regList}}) { |
|
0
|
|
|
|
|
|
|
680
|
0
|
|
|
|
|
|
push @p1, $r->{pval}; |
681
|
0
|
|
|
|
|
|
push @p2, $r->{winP}; |
682
|
|
|
|
|
|
|
} |
683
|
0
|
|
|
|
|
|
my @q1 = padjBH(\@p1, $N); |
684
|
0
|
|
|
|
|
|
my @q2 = padjBH(\@p2, $N); |
685
|
0
|
|
|
|
|
|
my $i = 0; # iterator for adjusted P-value vector. |
686
|
0
|
|
|
|
|
|
foreach my $r(@{$self->{regList}}) { |
|
0
|
|
|
|
|
|
|
687
|
0
|
|
|
|
|
|
$r->{padj} = $q1[$i]; |
688
|
0
|
|
|
|
|
|
$r->{winQ} = $q2[$i++]; |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
# Set bool tag. |
691
|
0
|
|
|
|
|
|
$self->{adjusted} = 1; |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# Print header line to file handle. |
695
|
|
|
|
|
|
|
sub gen_header{ |
696
|
0
|
|
|
0
|
|
|
my($self,$hrep) = @_; |
697
|
|
|
|
|
|
|
# Print header line. |
698
|
0
|
|
|
|
|
|
print $hrep "Chrom\tStart\tEnd\tLength\tTreatment.cnt\tControl.cnt\tTreatment.avg\tControl.avg\tTreatment.enr\tControl.enr\tEvent\tlog2FC\tpval\tpadj\twinSta\twinEnd\twinFC\twinP\twinQ\n"; |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
# Output all formatted regions. |
702
|
|
|
|
|
|
|
sub output{ |
703
|
0
|
|
|
0
|
|
|
my($self,$gdt,$h) = @_; |
704
|
0
|
0
|
|
|
|
|
if(!$self->{adjusted}) {$self->adjPval();} |
|
0
|
|
|
|
|
|
|
705
|
0
|
|
|
|
|
|
foreach my $r(@{$self->{regList}}) {$r->print_reg($gdt,$h);} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
# Preloaded methods go here. |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# Autoload methods go after =cut, and are processed by the autosplit program. |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
1; |
716
|
|
|
|
|
|
|
__END__ |