line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# My Perl library to deal with short read sequencing data. |
3
|
|
|
|
|
|
|
# created by Li Shen, Nov 2009. |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# strGCPercent added March 2010 - Li Shen |
6
|
|
|
|
|
|
|
# bin_genome_count,read_chrom_len,sep_chrom_bed,del_chrfile added April 2010 - Li Shen |
7
|
|
|
|
|
|
|
# bin_genome_count_direction added June 2010 - Li Shen |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
15
|
use 5.006; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
41
|
|
12
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
62
|
|
13
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
55
|
|
14
|
|
|
|
|
|
|
package MyShortRead::MyShortRead; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
5
|
use POSIX; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
9
|
|
18
|
1
|
|
|
1
|
|
4522
|
use Time::HiRes qw(time); |
|
1
|
|
|
|
|
3622
|
|
|
1
|
|
|
|
|
5
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
require Exporter; |
21
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# Items to export into callers namespace by default. Note: do not export |
24
|
|
|
|
|
|
|
# names by default without a very good reason. Use EXPORT_OK instead. |
25
|
|
|
|
|
|
|
# Do not simply export all your public functions/methods/constants. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# This allows declaration use MyShortRead::MyShortRead ':all'; |
28
|
|
|
|
|
|
|
# If you do not need this, moving things directly into @EXPORT or @EXPORT_OK |
29
|
|
|
|
|
|
|
# will save memory. |
30
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( 'all' => [ qw( |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
) ] ); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our @EXPORT = qw(compare2 overlap2 strGCPercent bin_genome_count bin_genome_count_direction read_chrlen_tbl read_chrlen_ordered sep_chrom_bed del_chrfile order_chr); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
our $VERSION = '1.00'; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Preloaded methods go here. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Comparing two genomic intervals according to start or end position. |
44
|
|
|
|
|
|
|
sub compare2{ |
45
|
0
|
|
|
0
|
0
|
|
my($a,$b) = @_; |
46
|
0
|
|
|
|
|
|
my $cmpby = 'start'; |
47
|
0
|
0
|
|
|
|
|
if(@_ > 2) {$cmpby = $_[2];} |
|
0
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
0
|
|
|
|
|
|
$a->{chrom} =~ /^(chr)?(\w+)$/; |
50
|
0
|
|
|
|
|
|
my $chr1 = $2; |
51
|
0
|
|
|
|
|
|
$b->{chrom} =~ /^(chr)?(\w+)$/; |
52
|
0
|
|
|
|
|
|
my $chr2 = $2; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# $chr1 = 100 if uc $chr1 eq 'X'; |
55
|
|
|
|
|
|
|
# $chr1 = 101 if uc $chr1 eq 'Y'; |
56
|
|
|
|
|
|
|
# $chr1 = 102 if uc $chr1 eq 'M'; |
57
|
|
|
|
|
|
|
# $chr1 = 199 if uc $chr1 eq 'Z'; |
58
|
|
|
|
|
|
|
# $chr2 = 100 if uc $chr2 eq 'X'; |
59
|
|
|
|
|
|
|
# $chr2 = 101 if uc $chr2 eq 'Y'; |
60
|
|
|
|
|
|
|
# $chr2 = 102 if uc $chr2 eq 'M'; |
61
|
|
|
|
|
|
|
# $chr2 = 199 if uc $chr2 eq 'Z'; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# return $chr1 <=> $chr2 unless $chr1 == $chr2; |
64
|
|
|
|
|
|
|
|
65
|
0
|
0
|
|
|
|
|
return $chr1 cmp $chr2 unless $chr1 eq $chr2; |
66
|
|
|
|
|
|
|
|
67
|
0
|
0
|
|
|
|
|
if($cmpby eq 'end') {return $a->{'end'} <=> $b->{'end'};} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
else {return $a->{'start'} <=> $b->{'start'};} |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Order chromosome names. |
72
|
|
|
|
|
|
|
sub order_chr{ |
73
|
0
|
|
|
0
|
0
|
|
my($a,$b) = @_; |
74
|
0
|
|
|
|
|
|
my($chr1, $chr2); |
75
|
|
|
|
|
|
|
# Find chromosome names. |
76
|
0
|
0
|
|
|
|
|
if(exists $a->{'chr'}) {$chr1 = $a->{'chr'};} |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
|
elsif(exists $a->{chrom}) {$chr1 = $a->{chrom};} |
78
|
0
|
|
|
|
|
|
else {warn "Cannot find chromosome name! Order is not determined.\n"; return 0;} |
79
|
0
|
0
|
|
|
|
|
if(exists $b->{'chr'}) {$chr2 = $b->{'chr'};} |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
80
|
0
|
|
|
|
|
|
elsif(exists $b->{chrom}) {$chr2 = $b->{chrom};} |
81
|
0
|
|
|
|
|
|
else {warn "Cannot find chromosome name! Order is not determined.\n"; return 0;} |
82
|
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
return $chr1 cmp $chr2; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# # Get rid of 'chr' if exists. |
86
|
|
|
|
|
|
|
# $chr1 =~ /^(chr)?(\w+)$/; $chr1 = $2; |
87
|
|
|
|
|
|
|
# $chr2 =~ /^(chr)?(\w+)$/; $chr2 = $2; |
88
|
|
|
|
|
|
|
# # Convert letters to large numbers. |
89
|
|
|
|
|
|
|
# $chr1 = 100 if uc $chr1 eq 'X'; |
90
|
|
|
|
|
|
|
# $chr1 = 101 if uc $chr1 eq 'Y'; |
91
|
|
|
|
|
|
|
# $chr1 = 102 if uc $chr1 eq 'M'; |
92
|
|
|
|
|
|
|
# $chr1 = 199 if uc $chr1 eq 'Z'; |
93
|
|
|
|
|
|
|
# $chr2 = 100 if uc $chr2 eq 'X'; |
94
|
|
|
|
|
|
|
# $chr2 = 101 if uc $chr2 eq 'Y'; |
95
|
|
|
|
|
|
|
# $chr2 = 102 if uc $chr2 eq 'M'; |
96
|
|
|
|
|
|
|
# $chr2 = 199 if uc $chr2 eq 'Z'; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# return $chr1 <=> $chr2; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# Find the interval list with the lowest start or end position. |
102
|
|
|
|
|
|
|
# return the index of the lowest list and set the position through reference. |
103
|
|
|
|
|
|
|
sub find_lowest_list{ |
104
|
0
|
|
|
0
|
0
|
|
my($rlists, $rpos) = @_; |
105
|
0
|
|
|
|
|
|
my $cmpby = 'start'; |
106
|
0
|
0
|
|
|
|
|
if(@_ > 2) {$cmpby = $_[2];} |
|
0
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
|
my $ll = 0; # initialize lowest list. |
108
|
0
|
|
|
|
|
|
my $lowest_intrv = $rlists->[$ll]{cur_intrv}; # intrv with the lowest end position. |
109
|
0
|
|
|
|
|
|
for my $i(1..$#{$rlists}){ |
|
0
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
my $cur_intrv = $rlists->[$i]{cur_intrv}; |
111
|
0
|
0
|
|
|
|
|
if(compare2($cur_intrv, $lowest_intrv, $cmpby) < 0){ |
112
|
0
|
|
|
|
|
|
$lowest_intrv = $cur_intrv; |
113
|
0
|
|
|
|
|
|
$ll = $i; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
# return the lowest position through reference. |
117
|
0
|
0
|
|
|
|
|
${$rpos} = $cmpby eq 'end'? $lowest_intrv->{'end'} : $lowest_intrv->{'start'}; |
|
0
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
return $ll; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# Judge whether two intervals overlap. |
122
|
|
|
|
|
|
|
sub overlap2{ |
123
|
0
|
|
|
0
|
0
|
|
my($refintrv1, $refintrv2) = @_; |
124
|
0
|
0
|
|
|
|
|
if($refintrv1->{chrom} eq $refintrv2->{chrom}){ |
125
|
0
|
0
|
0
|
|
|
|
if($refintrv1->{'start'} <= $refintrv2->{'end'} and |
126
|
|
|
|
|
|
|
$refintrv1->{'end'} >= $refintrv2->{'start'}){ |
127
|
0
|
|
|
|
|
|
return 1; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
0
|
|
|
|
|
|
return 0; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Calculate the GC percentage given a DNA sequence in string representation. |
134
|
|
|
|
|
|
|
sub strGCPercent{ |
135
|
0
|
|
|
0
|
0
|
|
my $dnaSeq = shift; |
136
|
0
|
|
|
|
|
|
my $c = ($dnaSeq =~ s/[gc]/X/gi); # Count the # of G or C without regarding to case. |
137
|
0
|
|
|
|
|
|
my $n = ($dnaSeq =~ s/n/Y/gi); # count the # of 'N's. |
138
|
0
|
|
|
|
|
|
return ($c+$n/2) / length($dnaSeq); |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# Given a BED file, bin the genome and count the reads in bins. |
142
|
|
|
|
|
|
|
sub bin_genome_count{ |
143
|
|
|
|
|
|
|
# BED file,bin size,chromosome length,fragment size,reference to bin vector. |
144
|
0
|
|
|
0
|
0
|
|
my($bedfile,$binsize,$chrlen,$fragsize,$r_binVec) = @_; |
145
|
0
|
0
|
|
|
|
|
open HBED, "<", $bedfile or die "Open bed file error: $!\n"; |
146
|
0
|
|
|
|
|
|
$#{$r_binVec} = ceil($chrlen/$binsize)-1; # Calculate bin vector size.\ |
|
0
|
|
|
|
|
|
|
147
|
0
|
|
|
|
|
|
foreach my $i(0..$#{$r_binVec}) {$r_binVec->[$i] = 0;} # Initialize bin vector to zeros. |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
my $line = 0; |
149
|
0
|
|
|
|
|
|
while(){ |
150
|
0
|
|
|
|
|
|
$line++; |
151
|
0
|
|
|
|
|
|
chomp; |
152
|
0
|
|
|
|
|
|
my @cells = split /\t/; |
153
|
0
|
0
|
|
|
|
|
if(@cells < 6){ |
154
|
0
|
|
|
|
|
|
warn "Format error at $bedfile, line:$line. Skip!\n"; |
155
|
0
|
|
|
|
|
|
next; |
156
|
|
|
|
|
|
|
} |
157
|
0
|
|
|
|
|
|
my($chrom,$start,$end,$name,$score,$strand) = @cells; |
158
|
|
|
|
|
|
|
# Shift read location by fragment size. |
159
|
0
|
|
|
|
|
|
my $loc; |
160
|
0
|
0
|
|
|
|
|
if($strand eq '+') {$loc = $start + $fragsize/2;} |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
else {$loc = $end - $fragsize/2;} |
162
|
|
|
|
|
|
|
# Increment read count in bin. |
163
|
|
|
|
|
|
|
# $loc--; # Decrement location to correctly assign the read to bin vector.(NO! BED is 0-based.) |
164
|
0
|
|
|
|
|
|
$r_binVec->[floor($loc/$binsize)]++; |
165
|
|
|
|
|
|
|
} |
166
|
0
|
|
|
|
|
|
close HBED; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# Bin the genome and count the reads with direction. |
170
|
|
|
|
|
|
|
sub bin_genome_count_direction{ |
171
|
|
|
|
|
|
|
# BED file,bin size,chromosome length,fragment size,reference to bin vector. |
172
|
0
|
|
|
0
|
0
|
|
my($bedfile,$binsize,$chrlen,$fragsize,$r_binVec_F,$r_binVec_R) = @_; |
173
|
0
|
0
|
|
|
|
|
open HBED, "<", $bedfile or die "Open bed file error: $!\n"; |
174
|
0
|
|
|
|
|
|
$#{$r_binVec_F} = ceil($chrlen/$binsize)-1; # Calculate bin vector size.\ |
|
0
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
|
$#{$r_binVec_R} = ceil($chrlen/$binsize)-1; # Calculate bin vector size.\ |
|
0
|
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
foreach my $i(0..$#{$r_binVec_F}) {$r_binVec_F->[$i] = 0;} # Initialize bin vector to zeros. |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
177
|
0
|
|
|
|
|
|
foreach my $i(0..$#{$r_binVec_R}) {$r_binVec_R->[$i] = 0;} # Initialize bin vector to zeros. |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
178
|
0
|
|
|
|
|
|
my $line = 0; |
179
|
0
|
|
|
|
|
|
while(){ |
180
|
0
|
|
|
|
|
|
$line++; |
181
|
0
|
|
|
|
|
|
chomp; |
182
|
0
|
|
|
|
|
|
my @cells = split; |
183
|
0
|
0
|
|
|
|
|
if(@cells < 6){ |
184
|
0
|
|
|
|
|
|
warn "Format error at $bedfile, line:$line. Skip!\n"; |
185
|
0
|
|
|
|
|
|
next; |
186
|
|
|
|
|
|
|
} |
187
|
0
|
|
|
|
|
|
my($chrom,$start,$end,$name,$score,$strand) = @cells; |
188
|
|
|
|
|
|
|
# Shift read location by fragment size and then increment the read count at the correct location. |
189
|
0
|
|
|
|
|
|
my $loc; |
190
|
0
|
0
|
|
|
|
|
if($strand eq '+') { |
191
|
0
|
|
|
|
|
|
$loc = $start + $fragsize/2; |
192
|
0
|
|
|
|
|
|
$r_binVec_F->[floor($loc/$binsize)]++; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
else { |
195
|
0
|
|
|
|
|
|
$loc = $end - $fragsize/2; |
196
|
0
|
|
|
|
|
|
$r_binVec_R->[floor($loc/$binsize)]++; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
0
|
|
|
|
|
|
close HBED; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# Read chromosome name and length into a hash table. |
203
|
|
|
|
|
|
|
sub read_chrlen_tbl{ |
204
|
0
|
|
|
0
|
0
|
|
my $chrfile = shift; |
205
|
0
|
0
|
|
|
|
|
open HCHR, "<", $chrfile or die "Cannot open chromosome length file:$!\n"; |
206
|
0
|
|
|
|
|
|
my $line = 0; |
207
|
0
|
|
|
|
|
|
my %chrlen; # hash table for chromosome length. |
208
|
0
|
|
|
|
|
|
while(){ |
209
|
0
|
|
|
|
|
|
$line++; |
210
|
0
|
|
|
|
|
|
chomp; |
211
|
0
|
0
|
|
|
|
|
next if $_ eq ''; # skip empty line. |
212
|
0
|
|
|
|
|
|
my @cells = split; |
213
|
0
|
0
|
|
|
|
|
warn "Insufficient chromosome information at $chrfile, line:$line. Skip!\n" if @cells < 2; |
214
|
0
|
|
|
|
|
|
my($chrom,$len) = @cells; |
215
|
0
|
0
|
0
|
|
|
|
if($chrom =~ /^\S+$/ and $len =~ /^[0-9]+$/){ |
|
0
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
|
$chrlen{$chrom} = $len; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
else {warn "Format error at $chrfile, line:$line. Skip!\n"} |
219
|
|
|
|
|
|
|
} |
220
|
0
|
|
|
|
|
|
return %chrlen; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
# Read chromosome name and length into an array and order them by name. |
224
|
|
|
|
|
|
|
sub read_chrlen_ordered{ |
225
|
0
|
|
|
0
|
0
|
|
my $chrfile = shift; |
226
|
0
|
0
|
|
|
|
|
open HCHR, "<", $chrfile or die "Cannot open chromosome length file:$!\n"; |
227
|
0
|
|
|
|
|
|
my $line = 0; |
228
|
0
|
|
|
|
|
|
my @chrlen; # array for chromosome length. |
229
|
0
|
|
|
|
|
|
while(){ |
230
|
0
|
|
|
|
|
|
$line++; |
231
|
0
|
|
|
|
|
|
chomp; |
232
|
0
|
0
|
|
|
|
|
next if $_ eq ''; # skip empty line. |
233
|
0
|
|
|
|
|
|
my @cells = split; |
234
|
0
|
0
|
|
|
|
|
warn "Insufficient chromosome information at $chrfile, line:$line. Skip!\n" if @cells < 2; |
235
|
0
|
|
|
|
|
|
my($chrom,$len) = @cells; |
236
|
0
|
0
|
0
|
|
|
|
if($chrom =~ /^\S+$/ and $len =~ /^[0-9]+$/){ |
|
0
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
my %rec = ( |
238
|
|
|
|
|
|
|
chrom => $chrom, |
239
|
|
|
|
|
|
|
len => $len); |
240
|
0
|
|
|
|
|
|
push @chrlen, \%rec; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
else {warn "Format error at $chrfile, line:$line. Skip!\n"} |
243
|
|
|
|
|
|
|
} |
244
|
0
|
|
|
|
|
|
return sort {order_chr($a,$b)} @chrlen; # return ordered list. |
|
0
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
# Separate a BED file into small ones by chromosome names. |
249
|
|
|
|
|
|
|
sub sep_chrom_bed{ |
250
|
|
|
|
|
|
|
# BED file, reference to chromosome length hash table. |
251
|
0
|
|
|
0
|
0
|
|
my($bedfile,@chrname) = @_; |
252
|
0
|
|
|
|
|
|
my $timestamp = time; |
253
|
0
|
|
|
|
|
|
my $timehead = ".$timestamp."; # timestamp file name head. |
254
|
0
|
|
|
|
|
|
my %hsh_chrfile; # hash of chromosome file handles. |
255
|
|
|
|
|
|
|
my %hsh_chrname; # hash of chromosome file names. |
256
|
|
|
|
|
|
|
# Create chromosome files for writing. |
257
|
0
|
|
|
|
|
|
foreach my $chrom(@chrname){ |
258
|
0
|
|
|
|
|
|
my $chrfile = $timehead . $chrom . ".bed"; |
259
|
0
|
|
|
|
|
|
while(-e $chrfile){ # prevent existing files from being overridden. |
260
|
0
|
|
|
|
|
|
$chrfile = ($timehead + rand) . $chrom . ".bed"; |
261
|
|
|
|
|
|
|
} |
262
|
0
|
|
|
|
|
|
my $fh; |
263
|
0
|
0
|
|
|
|
|
open $fh, ">", $chrfile or die "Cannot create bed file:$!\n"; |
264
|
0
|
|
|
|
|
|
$hsh_chrfile{$chrom} = $fh; |
265
|
0
|
|
|
|
|
|
$hsh_chrname{$chrom} = $chrfile; |
266
|
|
|
|
|
|
|
} |
267
|
0
|
0
|
|
|
|
|
open HBED, "<", $bedfile or die "Cannot open $bedfile:$!\n"; |
268
|
0
|
|
|
|
|
|
my $line = 0; # line number in BED file. |
269
|
0
|
|
|
|
|
|
my $nread = 0; # actual number of reads splitted. |
270
|
|
|
|
|
|
|
# Go through bed file and assign each line to corresponding file. |
271
|
0
|
|
|
|
|
|
while(){ |
272
|
0
|
|
|
|
|
|
$line++; |
273
|
0
|
|
|
|
|
|
chomp; |
274
|
0
|
0
|
|
|
|
|
next if $_ eq ''; |
275
|
0
|
|
|
|
|
|
my @cells = split; |
276
|
0
|
0
|
|
|
|
|
if(@cells < 6){ |
277
|
0
|
|
|
|
|
|
warn "Insufficient tag information at $bedfile, line:$line. Skip!\n"; |
278
|
0
|
|
|
|
|
|
next; |
279
|
|
|
|
|
|
|
} |
280
|
0
|
|
|
|
|
|
my($chrom,$start,$end,$name,$score,$strand) = @cells; |
281
|
0
|
0
|
|
|
|
|
if(exists $hsh_chrfile{$chrom}){ |
282
|
0
|
|
|
|
|
|
$nread++; |
283
|
0
|
|
|
|
|
|
print {$hsh_chrfile{$chrom}} $_ . "\n"; |
|
0
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
else{ |
286
|
0
|
|
|
|
|
|
warn "Unspecified chromosome name at $bedfile, line:$line.\nCheck your chromosome length file. Skip!\n"; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
# Close all files. |
290
|
0
|
|
|
|
|
|
close HBED; |
291
|
0
|
|
|
|
|
|
foreach my $hdl(values %hsh_chrfile) {close $hdl;} |
|
0
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# Return chromosome file name vector. |
294
|
0
|
|
|
|
|
|
return ($nread, \%hsh_chrname); |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# Delete all separated chromosome files. |
298
|
|
|
|
|
|
|
sub del_chrfile{ |
299
|
0
|
|
|
0
|
0
|
|
foreach my $file(@_) { |
300
|
0
|
0
|
|
|
|
|
unlink $file or warn "Cannot delete $file: $!\n"; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
1; |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
__END__ |