line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2014-12-20 00:34:19 mtw> |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::Util; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
1402
|
use 5.12.0; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
38
|
|
7
|
1
|
|
|
1
|
|
5
|
use Exporter; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
8
|
1
|
|
|
1
|
|
4
|
use version; our $VERSION = qv('0.12_07'); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
9
|
1
|
|
|
1
|
|
73
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
10
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
26
|
|
11
|
1
|
|
|
1
|
|
207
|
use Bio::Perl 1.00690001; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use Bio::DB::Sam 1.39; |
13
|
|
|
|
|
|
|
use Data::Dumper; |
14
|
|
|
|
|
|
|
use File::Basename qw(basename fileparse); |
15
|
|
|
|
|
|
|
use File::Temp qw(tempfile); |
16
|
|
|
|
|
|
|
use IPC::Cmd qw(can_run run); |
17
|
|
|
|
|
|
|
use Path::Class; |
18
|
|
|
|
|
|
|
use Math::Round; |
19
|
|
|
|
|
|
|
use Carp; |
20
|
|
|
|
|
|
|
use Bio::ViennaNGS::FeatureChain; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
23
|
|
|
|
|
|
|
our @EXPORT = (); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @EXPORT_OK = qw ( bed_or_bam2bw sortbed bed2bigBed computeTPM |
26
|
|
|
|
|
|
|
featCount_data parse_multicov write_multicov |
27
|
|
|
|
|
|
|
unique_array kmer_enrichment extend_chain |
28
|
|
|
|
|
|
|
parse_bed6 fetch_chrom_sizes); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
32
|
|
|
|
|
|
|
#^^^^^^^^^^ Variables ^^^^^^^^^^^# |
33
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
our @featCount = (); |
36
|
|
|
|
|
|
|
my %unique = (); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
39
|
|
|
|
|
|
|
#^^^^^^^^^^^ Subroutines ^^^^^^^^^^# |
40
|
|
|
|
|
|
|
#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub featCount_data { |
43
|
|
|
|
|
|
|
return \@featCount; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub bed_or_bam2bw { |
47
|
|
|
|
|
|
|
my ($type,$infile,$chromsizes,$strand,$dest,$want_norm,$size,$scale,$log) = @_; |
48
|
|
|
|
|
|
|
my ($fn_bg_tmp,$fn_bg,$fn_bw); |
49
|
|
|
|
|
|
|
my ($bn,$path,$ext,$cmd); |
50
|
|
|
|
|
|
|
my @processed_files = (); |
51
|
|
|
|
|
|
|
my $factor = 1.; |
52
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
croak "ERROR [$this_function] \$type is '$type', however it is expected to be either 'bam' or 'bed'\n" |
55
|
|
|
|
|
|
|
unless ($type eq "bam") || ($type eq "bed"); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $genomeCoverageBed = can_run('genomeCoverageBed') or |
58
|
|
|
|
|
|
|
croak "ERROR [$this_function] genomeCoverageBed utility not found"; |
59
|
|
|
|
|
|
|
my $bedGraphToBigWig = can_run('bedGraphToBigWig') or |
60
|
|
|
|
|
|
|
croak "ERROR [$this_function] bedGraphToBigWig utility not found"; |
61
|
|
|
|
|
|
|
my $awk = can_run('awk') or |
62
|
|
|
|
|
|
|
croak "ERROR [$this_function] awk utility not found"; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
if(defined $log){ |
65
|
|
|
|
|
|
|
open(LOG, ">>", $log) or croak $!; |
66
|
|
|
|
|
|
|
print LOG "LOG [$this_function] \$infile: $infile\n"; |
67
|
|
|
|
|
|
|
print LOG "LOG [$this_function] \$dest: $dest\n"; |
68
|
|
|
|
|
|
|
print LOG "LOG [$this_function] \$chromsizes: $chromsizes\n"; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot find $infile\n" |
72
|
|
|
|
|
|
|
unless (-e $infile); |
73
|
|
|
|
|
|
|
croak "ERROR [$this_function] $dest does not exist\n" |
74
|
|
|
|
|
|
|
unless (-d $dest); |
75
|
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot find $chromsizes\n" |
76
|
|
|
|
|
|
|
unless (-e $chromsizes); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
if ($want_norm == 1){ |
79
|
|
|
|
|
|
|
$factor = $scale/$size; |
80
|
|
|
|
|
|
|
print LOG "LOG [$this_function] normalization: $factor = ($scale/$size)\n" |
81
|
|
|
|
|
|
|
if(defined $log); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
($bn,$path,$ext) = fileparse($infile, qr /\..*/); |
85
|
|
|
|
|
|
|
$fn_bg_tmp = file($dest,$bn.".tmp.bg"); |
86
|
|
|
|
|
|
|
$fn_bg = file($dest,$bn.".bg"); |
87
|
|
|
|
|
|
|
if($strand eq "+"){ |
88
|
|
|
|
|
|
|
$fn_bw = file($dest,$bn.".pos.bw"); |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
else { |
91
|
|
|
|
|
|
|
$fn_bw = file($dest,$bn.".neg.bw"); |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
$cmd = "$genomeCoverageBed -bg -scale $factor -split "; |
95
|
|
|
|
|
|
|
if ($type eq "bed"){ $cmd .= "-i $infile -g $chromsizes"; } # chrom.sizes only required for processing BED |
96
|
|
|
|
|
|
|
else { $cmd .= "-ibam $infile "; } |
97
|
|
|
|
|
|
|
$cmd .= " > $fn_bg_tmp"; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
if($strand eq "-"){ |
100
|
|
|
|
|
|
|
$cmd .= " && cat $fn_bg_tmp | $awk \'{ \$4 = - \$4 ; print \$0 }\' > $fn_bg"; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
else{ |
103
|
|
|
|
|
|
|
$fn_bg = $fn_bg_tmp; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
$cmd .= " && $bedGraphToBigWig $fn_bg $chromsizes $fn_bw"; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
if (defined $log){ print LOG "LOG [$this_function] $cmd\n";} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = |
110
|
|
|
|
|
|
|
run( command => $cmd, verbose => 0 ); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
if( !$success ) { |
113
|
|
|
|
|
|
|
print STDERR "ERROR [$this_function] External command call unsuccessful\n"; |
114
|
|
|
|
|
|
|
print STDERR "ERROR: this is what the command printed:\n"; |
115
|
|
|
|
|
|
|
print join "", @$full_buf; |
116
|
|
|
|
|
|
|
croak $!; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
if (defined $log){ close(LOG); } |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
unlink ($fn_bg_tmp); |
121
|
|
|
|
|
|
|
unlink ($fn_bg); |
122
|
|
|
|
|
|
|
return $fn_bw; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub bed2bigBed { |
126
|
|
|
|
|
|
|
my ($infile,$chromsizes,$dest,$log) = @_; |
127
|
|
|
|
|
|
|
my ($bn,$path,$ext,$cmd,$outfile); |
128
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
129
|
|
|
|
|
|
|
my $bed2bigBed = can_run('bedToBigBed') or |
130
|
|
|
|
|
|
|
croak "ERROR [$this_function] bedToBigBed utility not found"; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
if (defined $log){ |
133
|
|
|
|
|
|
|
open(LOG, ">>", $log) or croak $!; |
134
|
|
|
|
|
|
|
print LOG "LOG [$this_function] \$infile: $infile -- \$chromsizes: $chromsizes --\$dest: $dest\n"; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot find $infile" |
138
|
|
|
|
|
|
|
unless (-e $infile); |
139
|
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot find $chromsizes" |
140
|
|
|
|
|
|
|
unless (-e $chromsizes); |
141
|
|
|
|
|
|
|
croak "ERROR [$this_function] $dest does not exist" |
142
|
|
|
|
|
|
|
unless (-d $dest); |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# .bed6 .bed12 extensions are replaced by .bb |
145
|
|
|
|
|
|
|
($bn,$path,$ext) = fileparse($infile, qr /\.bed[126]?/); |
146
|
|
|
|
|
|
|
$outfile = file($dest, "$bn.bb"); |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
$cmd = "$bed2bigBed $infile -extraIndex=name -tab $chromsizes $outfile"; |
149
|
|
|
|
|
|
|
if (defined $log){ print LOG "LOG [$this_function] $cmd\n"; } |
150
|
|
|
|
|
|
|
my( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = |
151
|
|
|
|
|
|
|
run( command => $cmd, verbose => 0 ); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
if( !$success ) { |
154
|
|
|
|
|
|
|
print STDERR "ERROR [$this_function] Call to $bed2bigBed unsuccessful\n"; |
155
|
|
|
|
|
|
|
print STDERR "ERROR: this is what the command printed:\n"; |
156
|
|
|
|
|
|
|
print join "", @$full_buf; |
157
|
|
|
|
|
|
|
croak $!; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
if (defined $log){ close(LOG); } |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
return $outfile; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub sortbed { |
166
|
|
|
|
|
|
|
my ($infile,$dest,$outfile,$rm_orig,$log) = @_; |
167
|
|
|
|
|
|
|
my ($cmd,$out); |
168
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
169
|
|
|
|
|
|
|
my $bedtools = can_run('bedtools') or |
170
|
|
|
|
|
|
|
croak "ERROR [$this_function bedtools utility not found"; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
croak "ERROR [$this_function] Cannot find $infile" |
173
|
|
|
|
|
|
|
unless (-e $infile); |
174
|
|
|
|
|
|
|
croak "ERROR [$this_function] $dest does not exist" |
175
|
|
|
|
|
|
|
unless (-d $dest); |
176
|
|
|
|
|
|
|
if (defined $log){open(LOG, ">>", $log) or croak $!;} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
$out = file($dest,$outfile); |
179
|
|
|
|
|
|
|
$cmd = "$bedtools sort -i $infile > $out"; |
180
|
|
|
|
|
|
|
if (defined $log){ print LOG "LOG [$this_function] $cmd\n"; } |
181
|
|
|
|
|
|
|
my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = |
182
|
|
|
|
|
|
|
run( command => $cmd, verbose => 0 ); |
183
|
|
|
|
|
|
|
if( !$success ) { |
184
|
|
|
|
|
|
|
print STDERR "ERROR [$this_function] Call to $bedtools unsuccessful\n"; |
185
|
|
|
|
|
|
|
print STDERR "ERROR: this is what the command printed:\n"; |
186
|
|
|
|
|
|
|
print join "", @$full_buf; |
187
|
|
|
|
|
|
|
croak $!; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
if($rm_orig){ |
191
|
|
|
|
|
|
|
unlink($infile) or |
192
|
|
|
|
|
|
|
carp "WARN [$this_function] Could not unlink $infile"; |
193
|
|
|
|
|
|
|
if (defined $log){ |
194
|
|
|
|
|
|
|
print LOG "[$this_function] removed $infile $!\n"; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
if (defined $log){ close(LOG); } |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub computeTPM { |
202
|
|
|
|
|
|
|
my ($featCount_sample,$rl) = @_; |
203
|
|
|
|
|
|
|
my ($TPM,$T,$totalTPM) = (0)x3; |
204
|
|
|
|
|
|
|
my ($i,$features,$meanTPM); |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
$features = keys %$featCount_sample; # of of features in hash |
207
|
|
|
|
|
|
|
print Dumper(\%$featCount_sample);print ">>$rl<<\n"; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# iterate through $featCount_sample twice: |
210
|
|
|
|
|
|
|
# 1. for computing T (denominator in TPM formula) |
211
|
|
|
|
|
|
|
foreach $i (keys %$featCount_sample){ |
212
|
|
|
|
|
|
|
$T += ($$featCount_sample{$i}{count} * $rl)/($$featCount_sample{$i}{len}); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
# 2. for computng actual TPM values |
215
|
|
|
|
|
|
|
foreach $i (keys %$featCount_sample){ |
216
|
|
|
|
|
|
|
$TPM = 1000000 * $$featCount_sample{$i}{count} * $rl/($$featCount_sample{$i}{len} * $T); |
217
|
|
|
|
|
|
|
$$featCount_sample{$i}{TPM} = $TPM; |
218
|
|
|
|
|
|
|
$totalTPM += $TPM; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
$meanTPM = $totalTPM/$features; |
221
|
|
|
|
|
|
|
# print "totalTPM=$totalTPM | meanTPM=$meanTPM\n"; |
222
|
|
|
|
|
|
|
return $meanTPM; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub parse_multicov { |
226
|
|
|
|
|
|
|
my ($file) = @_; |
227
|
|
|
|
|
|
|
my @mcData = (); |
228
|
|
|
|
|
|
|
my ($mcSamples,$i); |
229
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
croak "ERROR [$this_function] multicov file $file not available\n" |
232
|
|
|
|
|
|
|
unless (-e $file); |
233
|
|
|
|
|
|
|
open (MULTICOV_IN, "< $file") or croak $!; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
while (<MULTICOV_IN>){ |
236
|
|
|
|
|
|
|
chomp; |
237
|
|
|
|
|
|
|
@mcData = split(/\t/); # 0:chr|1:start|2:end|3:name|4:score|5:strand |
238
|
|
|
|
|
|
|
$mcSamples = (scalar @mcData)-6; # multicov extends BED6 |
239
|
|
|
|
|
|
|
#print "$_\n"; |
240
|
|
|
|
|
|
|
for ($i=0;$i<$mcSamples;$i++){ |
241
|
|
|
|
|
|
|
$featCount[$i]{$mcData[3]} = { |
242
|
|
|
|
|
|
|
chr => $mcData[0], |
243
|
|
|
|
|
|
|
start => $mcData[1], |
244
|
|
|
|
|
|
|
end => $mcData[2], |
245
|
|
|
|
|
|
|
name => $mcData[3], |
246
|
|
|
|
|
|
|
score => $mcData[4], |
247
|
|
|
|
|
|
|
strand => $mcData[5], |
248
|
|
|
|
|
|
|
len => eval($mcData[2]-$mcData[1]), |
249
|
|
|
|
|
|
|
count => $mcData[eval(6+$i)], |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
#print Dumper(@mcData); |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
close(MULTICOV_IN); |
255
|
|
|
|
|
|
|
return $mcSamples; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub write_multicov { |
259
|
|
|
|
|
|
|
my ($item,$dest,$base_name) = @_; |
260
|
|
|
|
|
|
|
my ($outfile,$mcSamples,$nrFeatures,$feat,$i); |
261
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
croak "ERROR [$this_function]: $dest does not exist\n" |
264
|
|
|
|
|
|
|
unless (-d $dest); |
265
|
|
|
|
|
|
|
$outfile = file($dest,$base_name.".".$item.".multicov.csv"); |
266
|
|
|
|
|
|
|
open (MULTICOV_OUT, "> $outfile") or croak $!; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
$mcSamples = scalar @featCount; # of samples in %{$featCount} |
269
|
|
|
|
|
|
|
$nrFeatures = scalar keys %{$featCount[1]}; # of keys in %{$featCount}[1] |
270
|
|
|
|
|
|
|
#print "=====> write_multicov: writing multicov file $outfile with $nrFeatures lines and $mcSamples conditions\n"; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# check whether each column in %$featCount has the same number of entries |
273
|
|
|
|
|
|
|
for($i=0;$i<$mcSamples;$i++){ |
274
|
|
|
|
|
|
|
my $fc = scalar keys %{$featCount[$i]}; # of keys in %{$featCount} |
275
|
|
|
|
|
|
|
#print "condition $i => $fc keys\n"; |
276
|
|
|
|
|
|
|
unless($nrFeatures == $fc){ |
277
|
|
|
|
|
|
|
croak "ERROR [$this_function]: unequal element count in \%\$featCount\nExpected $nrFeatures have $fc in condition $i\n"; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
foreach $feat (keys %{$featCount[1]}){ |
282
|
|
|
|
|
|
|
my @mcLine = (); |
283
|
|
|
|
|
|
|
# process standard BED6 fields first |
284
|
|
|
|
|
|
|
push @mcLine, (${$featCount[1]}{$feat}->{chr}, |
285
|
|
|
|
|
|
|
${$featCount[1]}{$feat}->{start}, |
286
|
|
|
|
|
|
|
${$featCount[1]}{$feat}->{end}, |
287
|
|
|
|
|
|
|
${$featCount[1]}{$feat}->{name}, |
288
|
|
|
|
|
|
|
${$featCount[1]}{$feat}->{score}, |
289
|
|
|
|
|
|
|
${$featCount[1]}{$feat}->{strand}); |
290
|
|
|
|
|
|
|
# process multicov values for all samples |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
for($i=0;$i<$mcSamples;$i++){ |
293
|
|
|
|
|
|
|
# print "------------> "; print "processing $i th condition "; print "<-----------\n"; |
294
|
|
|
|
|
|
|
unless (defined ${$featCount[$i]}{$feat}){ |
295
|
|
|
|
|
|
|
croak "Could not find item $feat in mcSample $i\n"; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
push @mcLine, ${$featCount[$i]}{$feat}->{$item}; |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
#print Dumper(\@mcLine); |
301
|
|
|
|
|
|
|
print MULTICOV_OUT join("\t",@mcLine)."\n"; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
close(MULTICOV_OUT); |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
sub unique_array{ |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
my $arrayref = shift; |
309
|
|
|
|
|
|
|
my @array = @{$arrayref}; |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
foreach my $item (@array) |
312
|
|
|
|
|
|
|
{ |
313
|
|
|
|
|
|
|
$unique{$item} ++; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
my @arrayuid = sort {$a cmp $b} keys %unique; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
return(\@arrayuid); |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub kmer_enrichment{ |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
my @seqs = @{$_[0]}; |
323
|
|
|
|
|
|
|
my $klen = $_[1]; |
324
|
|
|
|
|
|
|
# my @seq = split( //, $read_tmp ); |
325
|
|
|
|
|
|
|
my $kstring =''; |
326
|
|
|
|
|
|
|
#return variables |
327
|
|
|
|
|
|
|
my %km; |
328
|
|
|
|
|
|
|
foreach my $sequences (@seqs){ |
329
|
|
|
|
|
|
|
# print STDERR $sequences,"\n"; |
330
|
|
|
|
|
|
|
my @seq = split( //, $sequences ); |
331
|
|
|
|
|
|
|
for ( my $seq_pos = 0; $seq_pos <= $#seq-$klen ; $seq_pos++ ) { |
332
|
|
|
|
|
|
|
for (my $i=$seq_pos;$i<=$seq_pos+($klen-1);$i++){ |
333
|
|
|
|
|
|
|
$kstring .= $seq[$i]; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
$km{$kstring}++; |
336
|
|
|
|
|
|
|
$kstring = ""; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
return( \%km ); |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
sub extend_chain{ |
343
|
|
|
|
|
|
|
my %sizes = %{$_[0]}; |
344
|
|
|
|
|
|
|
my $chain = $_[1]; |
345
|
|
|
|
|
|
|
my $l = $_[2]; |
346
|
|
|
|
|
|
|
my $r = $_[3]; |
347
|
|
|
|
|
|
|
my $u = $_[4]; |
348
|
|
|
|
|
|
|
my $d = $_[5]; |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
##return a new chain with extended coordinates |
351
|
|
|
|
|
|
|
my $extendchain = $chain -> clone(); |
352
|
|
|
|
|
|
|
## got through all features in original chain, calculate new start and end and safe in extendchain |
353
|
|
|
|
|
|
|
my @featarray = @{$extendchain->chain}; |
354
|
|
|
|
|
|
|
foreach my $feature (@featarray){ |
355
|
|
|
|
|
|
|
my $chrom = $feature->chromosome; |
356
|
|
|
|
|
|
|
my $start = $feature->start; |
357
|
|
|
|
|
|
|
my $end = $feature->end; |
358
|
|
|
|
|
|
|
my $strand = $feature->strand; |
359
|
|
|
|
|
|
|
my $right = 0; |
360
|
|
|
|
|
|
|
my $left = 0; |
361
|
|
|
|
|
|
|
my $width = nearest(1,($end-$start)/2); |
362
|
|
|
|
|
|
|
$width = 0 if ($d > 0 || $u > 0); |
363
|
|
|
|
|
|
|
if ($strand eq "+"){ |
364
|
|
|
|
|
|
|
if ($d > 0){ |
365
|
|
|
|
|
|
|
$start = $end; |
366
|
|
|
|
|
|
|
$r = $d; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
if ($u > 0){ |
369
|
|
|
|
|
|
|
$end = $start; |
370
|
|
|
|
|
|
|
$l = $u; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
$right=$r; |
373
|
|
|
|
|
|
|
$left=$l; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
elsif ($strand eq "-"){ |
376
|
|
|
|
|
|
|
if ($u > 0){ |
377
|
|
|
|
|
|
|
$start = $end; |
378
|
|
|
|
|
|
|
$l = $u; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
if ($d > 0){ |
381
|
|
|
|
|
|
|
$end = $start; |
382
|
|
|
|
|
|
|
$r = $d; |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
$right=$l; |
385
|
|
|
|
|
|
|
$left=$r; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
if (($right-$width) <= 0){ |
388
|
|
|
|
|
|
|
$right = 0; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
else{ |
391
|
|
|
|
|
|
|
$right-=$width; |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
if (($left-$width) <= 0 ){ |
394
|
|
|
|
|
|
|
$left = 0; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
else{ |
397
|
|
|
|
|
|
|
$left-=$width; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
if ( $start-$left >= 1 ){ |
400
|
|
|
|
|
|
|
if ($end+$right >= $sizes{"chr".$chrom}){ |
401
|
|
|
|
|
|
|
$end = $sizes{"chr".$chrom}; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
else{ |
404
|
|
|
|
|
|
|
$end += $right; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
$start -= $left; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
elsif ( $start-$left <= 0 ){ |
409
|
|
|
|
|
|
|
$start = 0; |
410
|
|
|
|
|
|
|
if ($end+$right >= $sizes{"chr".$chrom}){ |
411
|
|
|
|
|
|
|
$end = $sizes{"chr".$chrom}; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
else{ |
414
|
|
|
|
|
|
|
$end = $end+$right; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
else{ |
418
|
|
|
|
|
|
|
die "Something wrong here!\n"; |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
$feature->start($start); |
421
|
|
|
|
|
|
|
$feature->end($end); |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
$extendchain->type('extended'); |
424
|
|
|
|
|
|
|
return($extendchain); |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
sub parse_bed6{ |
428
|
|
|
|
|
|
|
my $bedfile = shift; |
429
|
|
|
|
|
|
|
open (my $Bed, "<:gzip(autopop)",$bedfile) or die "$!"; |
430
|
|
|
|
|
|
|
my @featurelist; ## This will become a FeatureChain |
431
|
|
|
|
|
|
|
while(<$Bed>){ |
432
|
|
|
|
|
|
|
### This should be done by FeatureIO |
433
|
|
|
|
|
|
|
chomp (my $raw = $_); |
434
|
|
|
|
|
|
|
push my @line , split (/\t/,$raw); |
435
|
|
|
|
|
|
|
push @line, "\." if ( !$line[5] ); |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
(my $chromosome = $line[0])=~ s/chr//g; |
438
|
|
|
|
|
|
|
my $start = $line[1]; |
439
|
|
|
|
|
|
|
my $end = $line[2]; |
440
|
|
|
|
|
|
|
my $name = $line[3]; |
441
|
|
|
|
|
|
|
my $score = $line[4]; |
442
|
|
|
|
|
|
|
my $strand = $line[5]; |
443
|
|
|
|
|
|
|
my $extension = ''; |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
if ($line[6]){ |
446
|
|
|
|
|
|
|
for (6..$#line){ |
447
|
|
|
|
|
|
|
$extension .= $line[$_]."\t"; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
$extension = substr($extension,0,-1); |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
my $feat = Bio::ViennaNGS::Feature->new(chromosome=>$chromosome, |
452
|
|
|
|
|
|
|
start=>$start, |
453
|
|
|
|
|
|
|
end=>$end, |
454
|
|
|
|
|
|
|
name=>$name, |
455
|
|
|
|
|
|
|
score=>$score, |
456
|
|
|
|
|
|
|
strand=>$strand, |
457
|
|
|
|
|
|
|
extension=>$extension); |
458
|
|
|
|
|
|
|
push @featurelist, $feat; |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
return (\@featurelist); |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
sub fetch_chrom_sizes{ |
464
|
|
|
|
|
|
|
my $species = shift; |
465
|
|
|
|
|
|
|
my %sizes; |
466
|
|
|
|
|
|
|
my @chromsize; |
467
|
|
|
|
|
|
|
my $this_function = (caller(0))[3]; |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
my $test_fetchChromSizes = can_run('fetchChromSizes') or |
470
|
|
|
|
|
|
|
say "ERROR [$this_function] fetchChromSizes utility not found"; |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
my $cmd = "fetchChromSizes $species"; |
473
|
|
|
|
|
|
|
my( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = run(command => $cmd, verbose => 0); |
474
|
|
|
|
|
|
|
if ($success){ |
475
|
|
|
|
|
|
|
@chromsize = @{$stdout_buf}; |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
else{ |
478
|
|
|
|
|
|
|
print STDERR "Using UCSCs fetchChromSizes failed, trying alternative mysql fetch!\n"; |
479
|
|
|
|
|
|
|
my $test_fetchChromSizes = can_run('mysql') or |
480
|
|
|
|
|
|
|
die "ERROR [$this_function] mysql utility not found"; |
481
|
|
|
|
|
|
|
$cmd = "mysql --user=genome --host=genome-mysql.cse.ucsc.edu -A -e \"select chrom, size from $species.chromInfo\""; ### Alternative to UCSC fetchChromSizes, has mysql dependency |
482
|
|
|
|
|
|
|
my( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = run(command => $cmd, verbose => 0); |
483
|
|
|
|
|
|
|
if ($success){ |
484
|
|
|
|
|
|
|
@chromsize = @{$stdout_buf}; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
else{ |
487
|
|
|
|
|
|
|
print STDERR "ERROR [$this_function] External command call unsuccessful\n"; |
488
|
|
|
|
|
|
|
print STDERR "ERROR: this is what the command printed:\n"; |
489
|
|
|
|
|
|
|
print join "", @$full_buf; |
490
|
|
|
|
|
|
|
croak $!; |
491
|
|
|
|
|
|
|
die "Fetching of chromosome sizes failed, please either download fetchChromSizes from the UCSC script collection, or install mysql!\n"; |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
foreach (@chromsize){ |
496
|
|
|
|
|
|
|
chomp($_); |
497
|
|
|
|
|
|
|
foreach (split(/\n/,$_)){ |
498
|
|
|
|
|
|
|
my ($chr,$size)=split (/\t/,$_); |
499
|
|
|
|
|
|
|
$sizes{$chr}=$size; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
return(\%sizes); |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
1; |
507
|
|
|
|
|
|
|
__END__ |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head1 NAME |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Bio::ViennaNGS::Util - Utility routines for Next-Generation Sequencing data |
513
|
|
|
|
|
|
|
analysis |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=head1 SYNOPSIS |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
use Bio::ViennaNGS::Util; |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# make bigWig from BED or BAM |
520
|
|
|
|
|
|
|
$type = "bam"; |
521
|
|
|
|
|
|
|
$strand = "+"; |
522
|
|
|
|
|
|
|
$bwfile = bed_or_bam2bw($type,$infile,$cs_in,$strand,$destdir,$wantnorm,$size_p,$scale,$logfile); |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
# make bigBed from BED |
525
|
|
|
|
|
|
|
my $bb = bed2bigBed($bed_in,$cs_in,$destdir,$logfile); |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
# sort a BED file |
528
|
|
|
|
|
|
|
sortbed($bed_in,$destdir,$bed_out,$rm_orig,$logfile) |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
# compute transcript abundance in TPM |
531
|
|
|
|
|
|
|
$meanTPM = computeTPM($sample,$readlength); |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# parse a bedtools multicov compatible file |
534
|
|
|
|
|
|
|
$conds = parse_multicov($infile); |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# write bedtools multicov compatible file |
537
|
|
|
|
|
|
|
write_multicov("TPM",$destdir,$basename); |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=head1 DESCRIPTION |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
Bio::ViennaNGS::Util is a collection of utility subroutines for |
542
|
|
|
|
|
|
|
building efficient Next-Generation Sequencing (NGS) data analysis |
543
|
|
|
|
|
|
|
pipelines. |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=head2 ROUTINES |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
=over |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=item bed_or_bam2bw($type,$infile,$chromsizes,$strand,$dest,$want_norm,$size,$scale,$log) |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Creates stranded, normalized BigWig coverage profiles from |
552
|
|
|
|
|
|
|
strand-specific BAM or BED files (provided via C<$infile>). The |
553
|
|
|
|
|
|
|
routine expects a file type 'bam' or 'bed' via the C<$type> |
554
|
|
|
|
|
|
|
argument. C<$chromsizes> is the chromosome.sizes files, C<$strand> is |
555
|
|
|
|
|
|
|
either "+" or "-" and C<$dest> contains the output path for |
556
|
|
|
|
|
|
|
results. For normlization of bigWig profiles, additional attributes |
557
|
|
|
|
|
|
|
are required: C<$want_norm> triggers normalization with values 0 or |
558
|
|
|
|
|
|
|
1. C<$size> is the number of fragments/elements in the BAM or BED file |
559
|
|
|
|
|
|
|
and C<$scale> gives the number to which data is normalized (ie. every |
560
|
|
|
|
|
|
|
bedGraph entry is multiplied by a factor (C<$scale>/C<$size>). C<$log> |
561
|
|
|
|
|
|
|
is expected to contain either the full path and file name of log file |
562
|
|
|
|
|
|
|
or 'undef'. The routine returns the full file name of the newly |
563
|
|
|
|
|
|
|
generated bigWig file. |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
While this routine can handle non-straned BAM/BED files (in which case |
566
|
|
|
|
|
|
|
C<$strand> should be set to "+" and hence all coverage profiles will |
567
|
|
|
|
|
|
|
be created with a positive sign, even if they map to the negative |
568
|
|
|
|
|
|
|
strand), usage of strand-specific data is highly recommended. For BAM |
569
|
|
|
|
|
|
|
file, this is easily achieved by calling the L<bam_split> routine (see |
570
|
|
|
|
|
|
|
above) prior to this one, thus creating dedicated BAM files containing |
571
|
|
|
|
|
|
|
exclusively reads mapped to the positive or negative strand, |
572
|
|
|
|
|
|
|
respectively. |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
It is important to know that this routine B<does not extract> reads |
575
|
|
|
|
|
|
|
mapped to either strand from a non-stranded BAM/BED file if the |
576
|
|
|
|
|
|
|
C<$strand> argument is given. It rather adjusts the sign of B<all> |
577
|
|
|
|
|
|
|
mapped reads/features in a BAM/BED file and then creates bigWig |
578
|
|
|
|
|
|
|
files. See the L<split_bam> routine for extracting reads mapped to |
579
|
|
|
|
|
|
|
either strand. |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
Stranded bigWigs can easily be visualized via |
582
|
|
|
|
|
|
|
L<TrackHubs|http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html> |
583
|
|
|
|
|
|
|
in the UCSC Genome Browser. Internally, the conversion from BAM/BED to |
584
|
|
|
|
|
|
|
bigWig is accomplished via two third-party applications: |
585
|
|
|
|
|
|
|
L<genomeCoverageBed|http://bedtools.readthedocs.org/en/latest/content/tools/genomecov.html> |
586
|
|
|
|
|
|
|
and |
587
|
|
|
|
|
|
|
L<bedGraphToBigWig|http://hgdownload.cse.ucsc.edu/admin/exe/>. Intermediate |
588
|
|
|
|
|
|
|
bedGraph files are removed automatically once the bigWig files are |
589
|
|
|
|
|
|
|
ready. |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=item sortbed($infile,$dest,$outfile,$rm_orig,$log) |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
Sorts BED file C<$infile> with F<bedtools sortt>. C<$dest> and |
594
|
|
|
|
|
|
|
C<outfile> name path and filename of the resulting sorted BED |
595
|
|
|
|
|
|
|
file. C<$rm_infile> is either 1 or 0 and indicated whether the |
596
|
|
|
|
|
|
|
original C<$infile> should be deleted. C<$log> holds path and name of |
597
|
|
|
|
|
|
|
log file. |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=item bed2bigBed($infile,$chromsizes,$dest,$log) |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
Creates an indexed bigBed file from a BED file. C<$infile> is the BED |
602
|
|
|
|
|
|
|
file to be transformed, C<$chromsizes> is the chromosome.sizes file |
603
|
|
|
|
|
|
|
and C<$dest> contains the output path for results. C<$log> is the name |
604
|
|
|
|
|
|
|
of a log file, or undef if no logging is reuqired. A '.bed', '.bed6' |
605
|
|
|
|
|
|
|
or '.bed12' suffix in C<$infile> will be replace by '.bb' in the |
606
|
|
|
|
|
|
|
output. Else, the name of the output bigBed file will be the value of |
607
|
|
|
|
|
|
|
C<$infile> plus '.bb' appended. |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
The conversion from BED to bigBed is done by a third-party utility |
610
|
|
|
|
|
|
|
(bedToBigBed), which is executed by IPC::Cmd. |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=item computeTPM($featCount_sample,$rl) |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
Computes expression in Transcript per Million (TPM) [Wagner |
615
|
|
|
|
|
|
|
et.al. Theory Biosci. (2012)]. C<$featCount_sample> is a reference to |
616
|
|
|
|
|
|
|
a Hash of Hashes data straucture where keys are feature names and |
617
|
|
|
|
|
|
|
values hold a hash that must at least contain length and raw read |
618
|
|
|
|
|
|
|
counts. Practically, C<$featCount_sample> is represented by _one_ |
619
|
|
|
|
|
|
|
element of C<@featCount>, which is populated from a multicov file by |
620
|
|
|
|
|
|
|
C<parse_multicov()>. C<$rl> is the read length of the sequencing run. |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
Returns the mean TPM of the processed sample, which is invariant among |
623
|
|
|
|
|
|
|
samples. (TPM models relative molar concentration and thus fulfills |
624
|
|
|
|
|
|
|
the invariant average criterion.) |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=item parse_multicov($file) |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Parse a bedtools multicov (multiBamCov) file, i.e. an extended BED6 |
629
|
|
|
|
|
|
|
file, into an Array of Hash of Hashes data structure |
630
|
|
|
|
|
|
|
(C<@featCount>). C<$file> is the input file. Returns the number of |
631
|
|
|
|
|
|
|
samples present in the multicov file, ie. the numner of columns |
632
|
|
|
|
|
|
|
extending the canonical BED6 columns in the input multicov file. |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=item write_multicov($item,$dest,$base_name) |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
Write C<@featCount> data to a bedtools multicov (multiBamCov)-type |
637
|
|
|
|
|
|
|
file. C<$item> specifies the type of information from C<@featCount> |
638
|
|
|
|
|
|
|
HoH entries, e.g. TPM or RPKM. These values must have been computed |
639
|
|
|
|
|
|
|
and inserted into C<@featCount> beforehand by |
640
|
|
|
|
|
|
|
e.g. C<computeTPM()>. C<$dest> gives the absolute path and |
641
|
|
|
|
|
|
|
C<$base_name> the basename (will be extended by C<$item>.csv) of the |
642
|
|
|
|
|
|
|
output file. |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=back |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=over 7 |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=item L<Bio::Perl> >= 1.00690001 |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=item L<BIO::DB::Sam> >= 1.39 |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=item L<File::Basename> |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=item L<File::Temp> |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item L<Path::Class> |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=item L<IPC::Cmd> |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=item L<Carp> |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=back |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
L<Bio::ViennaNGS> uses third-party tools for computing intersections |
667
|
|
|
|
|
|
|
of BED files: F<bedtools intersect> from the |
668
|
|
|
|
|
|
|
L<BEDtools|http://bedtools.readthedocs.org/en/latest/content/tools/intersect.html> |
669
|
|
|
|
|
|
|
suite is used to compute overlaps and F<bedtools sort> is used to sort |
670
|
|
|
|
|
|
|
BED output files. Make sure that those third-party utilities are |
671
|
|
|
|
|
|
|
available on your system, and that hey can be found and executed by |
672
|
|
|
|
|
|
|
the Perl interpreter. We recommend installing the latest version of |
673
|
|
|
|
|
|
|
L<BEDtools|https://github.com/arq5x/bedtools2> on your system. |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=head1 AUTHORS |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=over |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=item Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt> |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
=item Jörg Fallmann E<lt>fall@tbi.univie.ac.atE<gt> |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=back |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Copyright (C) 2014 Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt> |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
690
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.12.4 or, |
691
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
This software is distributed in the hope that it will be useful, but |
694
|
|
|
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of |
695
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=cut |