line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::Run::BEDTools |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Dan Kortschak |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Dan Kortschak |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Tools::Run::BEDTools - Run wrapper for the BEDTools suite of programs *BETA* |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# use a BEDTools program |
21
|
|
|
|
|
|
|
$bedtools_fac = Bio::Tools::Run::BEDTools->new( -command => 'subtract' ); |
22
|
|
|
|
|
|
|
$result_file = $bedtools_fac->run( -bed1 => 'genes.bed', -bed2 => 'mask.bed' ); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# if IO::Uncompress::Gunzip is available... |
25
|
|
|
|
|
|
|
$result_file = $bedtools_fac->run( -bed1 => 'genes.bed.gz', -bed2 => 'mask.bed.gz' ); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# be more strict |
28
|
|
|
|
|
|
|
$bedtools_fac->set_parameters( -strandedness => 1 ); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# and even more... |
31
|
|
|
|
|
|
|
$bedtools_fac->set_parameters( -minimum_overlap => 1e-6 ); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# create a Bio::SeqFeature::Collection object |
34
|
|
|
|
|
|
|
$features = $bedtools_fac->result( -want => 'Bio::SeqFeature::Collection' ); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DEPRECATION WARNING |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
Most executables from BEDTools v>=2.10.1 can read GFF and VCF formats |
39
|
|
|
|
|
|
|
in addition to BED format. This requires the use of a new input file param, |
40
|
|
|
|
|
|
|
shown in the following documentation, '-bgv', in place of '-bed' for the |
41
|
|
|
|
|
|
|
executables that can do this. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
This behaviour breaks existing scripts. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 DESCRIPTION |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
This module provides a wrapper interface for Aaron R. Quinlan and Ira M. Hall's |
48
|
|
|
|
|
|
|
utilities C that allow for (among other things): |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=over |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=item * Intersecting two BED files in search of overlapping features. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=item * Merging overlapping features. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item * Screening for paired-end (PE) overlaps between PE sequences and existing genomic features. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=item * Calculating the depth and breadth of sequence coverage across defined "windows" in a genome. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=back |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
(see L for manuals and downloads). |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 OPTIONS |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
C is a suite of 17 commandline executable. This module attempts to |
68
|
|
|
|
|
|
|
provide and options comprehensively. You can browse the choices like so: |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$bedtools_fac = Bio::Tools::Run::BEDTools->new; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# all bowtie commands |
73
|
|
|
|
|
|
|
@all_commands = $bedtools_fac->available_parameters('commands'); |
74
|
|
|
|
|
|
|
@all_commands = $bedtools_fac->available_commands; # alias |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# just for default command ('bam_to_bed') |
77
|
|
|
|
|
|
|
@btb_params = $bedtools_fac->available_parameters('params'); |
78
|
|
|
|
|
|
|
@btb_switches = $bedtools_fac->available_parameters('switches'); |
79
|
|
|
|
|
|
|
@btb_all_options = $bedtools_fac->available_parameters(); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Reasonably mnemonic names have been assigned to the single-letter |
82
|
|
|
|
|
|
|
command line options. These are the names returned by |
83
|
|
|
|
|
|
|
C, and can be used in the factory constructor |
84
|
|
|
|
|
|
|
like typical BioPerl named parameters. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
As a number of options are mutually exclusive, and the interpretation of |
87
|
|
|
|
|
|
|
intent is based on last-pass option reaching bowtie with potentially unpredicted |
88
|
|
|
|
|
|
|
results. This module will prevent inconsistent switches and parameters |
89
|
|
|
|
|
|
|
from being passed. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
See L for details of BEDTools options. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head1 FILES |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
When a command requires filenames, these are provided to the C method, not |
96
|
|
|
|
|
|
|
the constructor (C). To see the set of files required by a command, use |
97
|
|
|
|
|
|
|
C or the alias C: |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
$bedtools_fac = Bio::Tools::Run::BEDTools->new( -command => 'pair_to_bed' ); |
100
|
|
|
|
|
|
|
@filespec = $bedtools_fac->filespec; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
This example returns the following array: |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
#bedpe |
105
|
|
|
|
|
|
|
#bam |
106
|
|
|
|
|
|
|
bed |
107
|
|
|
|
|
|
|
#out |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
This indicates that the bed (C BED format) file MUST be |
110
|
|
|
|
|
|
|
specified, and that the out, bedpe (C BEDPE format) and bam |
111
|
|
|
|
|
|
|
(C binary format) file MAY be specified (Note that in this case you |
112
|
|
|
|
|
|
|
MUST provide ONE of bedpe OR bam, the module at this stage does not allow |
113
|
|
|
|
|
|
|
this information to be queried). Use these in the C call like so: |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
$bedtools_fac->run( -bedpe => 'paired.bedpe', |
116
|
|
|
|
|
|
|
-bgv => 'genes.bed', |
117
|
|
|
|
|
|
|
-out => 'overlap' ); |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
The object will store the programs STDERR output for you in the C |
120
|
|
|
|
|
|
|
attribute: |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
handle_bed_warning($bedtools_fac) if ($bedtools_fac->stderr =~ /Usage:/); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
For the commands 'fasta_from_bed' and 'mask_fasta_from_bed' STDOUT will also |
125
|
|
|
|
|
|
|
be captured in the C attribute by default and all other commands |
126
|
|
|
|
|
|
|
can be forced to capture program output in STDOUT by setting the -out |
127
|
|
|
|
|
|
|
filespec parameter to '-'. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head1 FEEDBACK |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 Mailing Lists |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
134
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
135
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
138
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head2 Support |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
L |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Rather than to the module maintainer directly. Many experienced and |
147
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
148
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
149
|
|
|
|
|
|
|
with code and data examples if at all possible. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 Reporting Bugs |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
154
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
155
|
|
|
|
|
|
|
the web: |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
http://redmine.open-bio.org/projects/bioperl/ |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head1 AUTHOR - Dan Kortschak |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Email dan.kortschak adelaide.edu.au |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
Additional contributors names and emails here |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 APPENDIX |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
170
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=cut |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Let the code begin... |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
package Bio::Tools::Run::BEDTools; |
178
|
1
|
|
|
1
|
|
134630
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
179
|
|
|
|
|
|
|
our $HAVE_IO_UNCOMPRESS; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
BEGIN { |
182
|
1
|
|
|
1
|
|
50
|
eval 'require IO::Uncompress::Gunzip; $HAVE_IO_UNCOMPRESS = 1'; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
1
|
|
|
1
|
|
7
|
use IPC::Run; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
188
|
|
|
|
|
|
|
|
189
|
1
|
|
|
1
|
|
487
|
use lib '../../..'; |
|
1
|
|
|
|
|
485
|
|
|
1
|
|
|
|
|
4
|
|
190
|
1
|
|
|
1
|
|
560
|
use Bio::Tools::Run::BEDTools::Config; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
141
|
|
191
|
1
|
|
|
1
|
|
457
|
use Bio::Tools::Run::WrapperBase; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
192
|
1
|
|
|
1
|
|
521
|
use Bio::Tools::Run::WrapperBase::CommandExts; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
193
|
1
|
|
|
1
|
|
573
|
use Bio::Tools::GuessSeqFormat; |
|
1
|
|
|
|
|
2801
|
|
|
1
|
|
|
|
|
10
|
|
194
|
1
|
|
|
1
|
|
510
|
use Bio::SeqFeature::Generic; |
|
1
|
|
|
|
|
53680
|
|
|
1
|
|
|
|
|
13
|
|
195
|
1
|
|
|
1
|
|
548
|
use Bio::SeqFeature::Collection; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
use Bio::SeqIO; |
197
|
|
|
|
|
|
|
use File::Sort qw( sort_file ); |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
use base qw( Bio::Tools::Run::WrapperBase ); |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
## BEDTools |
202
|
|
|
|
|
|
|
our $program_name = '*bedtools'; |
203
|
|
|
|
|
|
|
our $default_cmd = 'bam_to_bed'; |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# Note: Other globals imported from Bio::Tools::Run::BEDTools::Config |
206
|
|
|
|
|
|
|
our $qual_param = undef; |
207
|
|
|
|
|
|
|
our $use_dash = 'single'; |
208
|
|
|
|
|
|
|
our $join = ' '; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
our %strand_translate = ( |
211
|
|
|
|
|
|
|
'+' => 1, |
212
|
|
|
|
|
|
|
'-' => -1, |
213
|
|
|
|
|
|
|
'.' => 0 |
214
|
|
|
|
|
|
|
); |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=head2 new() |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
Title : new |
219
|
|
|
|
|
|
|
Usage : my $obj = new Bio::Tools::Run::BEDTools(); |
220
|
|
|
|
|
|
|
Function: Builds a new Bio::Tools::Run::BEDTools object |
221
|
|
|
|
|
|
|
Returns : an instance of Bio::Tools::Run::BEDTools |
222
|
|
|
|
|
|
|
Args : |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=cut |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub new { |
227
|
|
|
|
|
|
|
my ($class,@args) = @_; |
228
|
|
|
|
|
|
|
unless (grep /command/, @args) { |
229
|
|
|
|
|
|
|
push @args, '-command', $default_cmd; |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
my $self = $class->SUPER::new(@args); |
232
|
|
|
|
|
|
|
foreach (keys %command_executables) { |
233
|
|
|
|
|
|
|
$self->executables($_, $self->_find_executable($command_executables{$_})); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
$self->want($self->_rearrange([qw(WANT)],@args)); |
236
|
|
|
|
|
|
|
$self->parameters_changed(1); # set on instantiation, per Bio::ParameterBaseI |
237
|
|
|
|
|
|
|
return $self; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head2 run() |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
Title : run |
243
|
|
|
|
|
|
|
Usage : $result = $bedtools_fac->run(%params); |
244
|
|
|
|
|
|
|
Function: Run a BEDTools command. |
245
|
|
|
|
|
|
|
Returns : Command results (file, IO object or Bio object) |
246
|
|
|
|
|
|
|
Args : Dependent on filespec for command. |
247
|
|
|
|
|
|
|
See $bedtools_fac->filespec and BEDTools Manual. |
248
|
|
|
|
|
|
|
Also accepts -want => '(raw|format|)' - see want(). |
249
|
|
|
|
|
|
|
Note : gzipped inputs are allowed if IO::Uncompress::Gunzip |
250
|
|
|
|
|
|
|
is available |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=cut |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
sub run { |
255
|
|
|
|
|
|
|
my $self = shift; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
my ($ann, $bed, $bg, $bgv, $bgv1, $bgv2, $bam, $bedpe, $bedpe1, $bedpe2, $seq, $genome, $out); |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
if (!(@_ % 2)) { |
260
|
|
|
|
|
|
|
my %args = @_; |
261
|
|
|
|
|
|
|
if ((grep /^-\w+/, keys %args) == keys %args) { |
262
|
|
|
|
|
|
|
($ann, $bed, $bg, $bgv, $bgv1, $bgv2, $bam, $bedpe, $bedpe1, $bedpe2, $seq, $genome, $out) = |
263
|
|
|
|
|
|
|
$self->_rearrange([qw( ANN BED BG BGV BGV1 BGV2 BAM |
264
|
|
|
|
|
|
|
BEDPE BEDPE1 BEDPE2 |
265
|
|
|
|
|
|
|
SEQ GENOME OUT )], @_); |
266
|
|
|
|
|
|
|
} else { |
267
|
|
|
|
|
|
|
$self->throw("Badly formed named args: ".join(' ',@_)); |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
} else { |
270
|
|
|
|
|
|
|
if (grep /^-\w+/, @_) { |
271
|
|
|
|
|
|
|
$self->throw("Badly formed named args: ".join(' ',@_)); |
272
|
|
|
|
|
|
|
} else { |
273
|
|
|
|
|
|
|
$self->throw("Require named args."); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# Sanity checks |
278
|
|
|
|
|
|
|
$self->executable || $self->throw("No executable!"); |
279
|
|
|
|
|
|
|
my $cmd = $self->command if $self->can('command'); |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
for ($cmd) { |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=pod |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Command |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
annotate bgv ann(s) #out |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=cut |
290
|
|
|
|
|
|
|
m/^annotate$/ && do { |
291
|
|
|
|
|
|
|
$bgv = $self->_uncompress($bgv); |
292
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); |
293
|
|
|
|
|
|
|
@$ann = map { |
294
|
|
|
|
|
|
|
my $a = $_; |
295
|
|
|
|
|
|
|
$a = $self->_uncompress($a); |
296
|
|
|
|
|
|
|
$self->_validate_file_input(-ann => $a) || $self->throw("File '$a' not BED/GFF/VCF format."); |
297
|
|
|
|
|
|
|
$a; |
298
|
|
|
|
|
|
|
} @$ann; |
299
|
|
|
|
|
|
|
last; |
300
|
|
|
|
|
|
|
}; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=pod |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
graph_union bg_files #out |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
307
|
|
|
|
|
|
|
m/^graph_union$/ && do { |
308
|
|
|
|
|
|
|
@$bg = map { |
309
|
|
|
|
|
|
|
my $g = $_; |
310
|
|
|
|
|
|
|
$g = $self->_uncompress($g); |
311
|
|
|
|
|
|
|
$self->_validate_file_input(-bg => $g) || $self->throw("File '$a' not BedGraph format."); |
312
|
|
|
|
|
|
|
$g; |
313
|
|
|
|
|
|
|
} @$bg; |
314
|
|
|
|
|
|
|
last; |
315
|
|
|
|
|
|
|
}; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=pod |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
fasta_from_bed seq bgv #out |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
mask_fasta_from_bed seq bgv #out |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=cut |
324
|
|
|
|
|
|
|
m/fasta_from_bed$/ && do { |
325
|
|
|
|
|
|
|
($out // 0) eq '-' && |
326
|
|
|
|
|
|
|
$self->throw("Cannot capture results in STDOUT with sequence commands."); |
327
|
|
|
|
|
|
|
$seq = $self->_uncompress($seq); |
328
|
|
|
|
|
|
|
$self->_validate_file_input(-seq => $seq) || $self->throw("File '$seq' not fasta format."); |
329
|
|
|
|
|
|
|
$bgv = $self->_uncompress($bgv); |
330
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); |
331
|
|
|
|
|
|
|
last; |
332
|
|
|
|
|
|
|
}; |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=pod |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
bam_to_bed bam #out |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=cut |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
m/^bam_to_bed$/ && do { |
341
|
|
|
|
|
|
|
$bam = $self->_uncompress($bam); |
342
|
|
|
|
|
|
|
$self->_validate_file_input(-bam => $bam) || $self->throw("File '$bam' not BAM format."); |
343
|
|
|
|
|
|
|
last; |
344
|
|
|
|
|
|
|
}; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=pod |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
bed_to_IGV bgv #out |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
merge bgv #out |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
sort bgv #out |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
links bgv #out |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=cut |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
m/^(?:bed_to_IGV|merge|sort|links)$/ && do { |
360
|
|
|
|
|
|
|
$bgv = $self->_uncompress($bgv); |
361
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); |
362
|
|
|
|
|
|
|
}; |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=pod |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
b12_to_b6 bed #out |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
overlap bed #out |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
group_by bed #out |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=cut |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
m/^(?:b12_to_b6|overlap|group_by)$/ && do { |
375
|
|
|
|
|
|
|
$bed = $self->_uncompress($bed); |
376
|
|
|
|
|
|
|
$self->_validate_file_input(-bed => $bed) || $self->throw("File '$bgv' not BED format."); |
377
|
|
|
|
|
|
|
if ($cmd eq 'group_by') { |
378
|
|
|
|
|
|
|
my $c =(my @c)= split(",",$self->columns); |
379
|
|
|
|
|
|
|
my $o =(my @o)= split(",",$self->operations); |
380
|
|
|
|
|
|
|
unless ($c > 0 && $o == $c) { |
381
|
|
|
|
|
|
|
$self->throw("The command 'group_by' requires "."paired "x($o == $c)."'-columns' and '-operations' parameters"); |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
last; |
385
|
|
|
|
|
|
|
}; |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=pod |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
bed_to_bam bgv #out |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
shuffle bgv genome #out |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
slop bgv genome #out |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
complement bgv genome #out |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=cut |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
m/^(?:bed_to_bam|shuffle|slop|complement)$/ && do { |
400
|
|
|
|
|
|
|
$bgv = $self->_uncompress($bgv); |
401
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bgv' not BED/GFF/VCF format."); |
402
|
|
|
|
|
|
|
$genome = $self->_uncompress($genome); |
403
|
|
|
|
|
|
|
$self->_validate_file_input(-genome => $genome) || $self->throw("File '$genome' not genome format."); |
404
|
|
|
|
|
|
|
if ($cmd eq 'slop') { |
405
|
|
|
|
|
|
|
my $l = defined $self->add_to_left; |
406
|
|
|
|
|
|
|
my $r = defined $self->add_to_right; |
407
|
|
|
|
|
|
|
my $b = defined $self->add_bidirectional; |
408
|
|
|
|
|
|
|
# I think I have a lisp |
409
|
|
|
|
|
|
|
unless (($l && $r) || ($b xor ($l || $r))) { |
410
|
|
|
|
|
|
|
$self->throw("The command 'slop' requires an unambiguous description of the slop you want"); |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
last; |
414
|
|
|
|
|
|
|
}; |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=pod |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
genome_coverage bed genome #out |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=cut |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
m/^genome_coverage$/ && do { |
423
|
|
|
|
|
|
|
$bed = $self->_uncompress($bed); |
424
|
|
|
|
|
|
|
$self->_validate_file_input(-bed => $bed) || $self->throw("File '$bed' not BED format."); |
425
|
|
|
|
|
|
|
$genome = $self->_uncompress($genome); |
426
|
|
|
|
|
|
|
$self->_validate_file_input(-genome => $genome) || $self->throw("File '$genome' not genome format."); |
427
|
|
|
|
|
|
|
my ($th, $tf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => '.bed' ); |
428
|
|
|
|
|
|
|
$th->close; |
429
|
|
|
|
|
|
|
sort_file({k => 1, I => $bed, o => $tf}); |
430
|
|
|
|
|
|
|
$bed = $tf; |
431
|
|
|
|
|
|
|
last; |
432
|
|
|
|
|
|
|
}; |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=pod |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
window bgv1 bgv2 #out |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
closest bgv1 bgv2 #out |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
coverage bgv1 bgv2 #out |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
subtract bgv1 bgv2 #out |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
=cut |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
m/^(?:window|closest|coverage|subtract)$/ && do { |
447
|
|
|
|
|
|
|
$bgv1 = $self->_uncompress($bgv1); |
448
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv1 => $bgv1) || $self->throw("File '$bgv1' not BED/GFF/VCF format."); |
449
|
|
|
|
|
|
|
$bgv2 = $self->_uncompress($bgv2); |
450
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv2 => $bgv2) || $self->throw("File '$bgv2' not BED/GFF/VCF format."); |
451
|
|
|
|
|
|
|
}; |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=pod |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
pair_to_pair bedpe1 bedpe2 #out |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=cut |
458
|
|
|
|
|
|
|
m/^pair_to_pair$/ && do { |
459
|
|
|
|
|
|
|
$bedpe1 = $self->_uncompress($bedpe1); |
460
|
|
|
|
|
|
|
$self->_validate_file_input(-bedpe1 => $bedpe1) || $self->throw("File '$bedpe1' not BEDPE format."); |
461
|
|
|
|
|
|
|
$bedpe2 = $self->_uncompress($bedpe2); |
462
|
|
|
|
|
|
|
$self->_validate_file_input(-bedpe2 => $bedpe2) || $self->throw("File '$bedpe2' not BEDPE format."); |
463
|
|
|
|
|
|
|
last; |
464
|
|
|
|
|
|
|
}; |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=pod |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
intersect bgv1|bam bgv2 #out |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
m/^intersect$/ && do { |
472
|
|
|
|
|
|
|
$bgv1 = $self->_uncompress($bgv1); |
473
|
|
|
|
|
|
|
$bam = $self->_uncompress($bam); |
474
|
|
|
|
|
|
|
($bam && $self->_validate_file_input(-bam => $bam)) || ($bgv1 && $self->_validate_file_input(-bgv1 => $bgv1)) || |
475
|
|
|
|
|
|
|
$self->throw("File in position 1. not correct format."); |
476
|
|
|
|
|
|
|
$bgv2 = $self->_uncompress($bgv2); |
477
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv2 => $bgv2) || $self->throw("File '$bgv2' not BED/GFF/VCF format."); |
478
|
|
|
|
|
|
|
last; |
479
|
|
|
|
|
|
|
}; |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=pod |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
pair_to_bed bedpe|bam bgv #out |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
bgv* signifies any of BED, GFF or VCF. ann is a bgv. |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
NOTE: Replace 'bgv' with 'bed' unless $use_bgv is set. |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=cut |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
m/^pair_to_bed$/ && do { |
493
|
|
|
|
|
|
|
$bedpe = $self->_uncompress($bedpe); |
494
|
|
|
|
|
|
|
$bam = $self->_uncompress($bam); |
495
|
|
|
|
|
|
|
($bam && $self->_validate_file_input(-bam => $bam)) || ($bedpe && $self->_validate_file_input(-bedpe => $bedpe)) || |
496
|
|
|
|
|
|
|
$self->throw("File in position 1. not correct format."); |
497
|
|
|
|
|
|
|
$bgv = $self->_uncompress($bgv); |
498
|
|
|
|
|
|
|
$self->_validate_file_input(-bgv => $bgv) || $self->throw("File '$bed' not BED/GFF/VCF format."); |
499
|
|
|
|
|
|
|
last; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
my %params = ( |
504
|
|
|
|
|
|
|
'-ann' => $ann, |
505
|
|
|
|
|
|
|
'-bam' => $bam, |
506
|
|
|
|
|
|
|
'-bed' => $bed, |
507
|
|
|
|
|
|
|
'-bgv' => $bgv, |
508
|
|
|
|
|
|
|
'-bg' => $bg, |
509
|
|
|
|
|
|
|
'-bgv1' => $bgv1, |
510
|
|
|
|
|
|
|
'-bgv2' => $bgv2, |
511
|
|
|
|
|
|
|
'-bedpe' => $bedpe, |
512
|
|
|
|
|
|
|
'-bedpe1' => $bedpe1, |
513
|
|
|
|
|
|
|
'-bedpe2' => $bedpe2, |
514
|
|
|
|
|
|
|
'-seq' => $seq, |
515
|
|
|
|
|
|
|
'-genome' => $genome |
516
|
|
|
|
|
|
|
); |
517
|
|
|
|
|
|
|
map { |
518
|
|
|
|
|
|
|
delete $params{$_} unless defined $params{$_} |
519
|
|
|
|
|
|
|
} keys %params; |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
my $format = $self->_determine_format(\%params); |
522
|
|
|
|
|
|
|
my $suffix = '.'.$format; |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
if (!defined $out) { |
525
|
|
|
|
|
|
|
my ($outh, $outf) = $self->io->tempfile( -dir => $self->tempdir(), -suffix => $suffix ); |
526
|
|
|
|
|
|
|
$outh->close; |
527
|
|
|
|
|
|
|
$out = $outf; |
528
|
|
|
|
|
|
|
} elsif ($out ne '-') { |
529
|
|
|
|
|
|
|
$out .= $suffix; |
530
|
|
|
|
|
|
|
} else { |
531
|
|
|
|
|
|
|
undef $out; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
$params{'-out'} = $out if defined $out; |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
$self->_run(%params); |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
$self->{'_result'}->{'file_name'} = $out // '-'; |
538
|
|
|
|
|
|
|
$self->{'_result'}->{'format'} = $format; |
539
|
|
|
|
|
|
|
$self->{'_result'}->{'file'} = defined $out ? Bio::Root::IO->new( -file => $out ) : undef; |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
return $self->result; |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
sub _uncompress { |
545
|
|
|
|
|
|
|
my ($self, $file) = @_; |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
return if !defined $file; |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
return $file unless ($file =~ m/\.gz[^.]*$/); |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
return $file unless (-e $file && -r _); # other people will deal with this |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
unless ($HAVE_IO_UNCOMPRESS) { |
554
|
|
|
|
|
|
|
croak( "IO::Uncompress::Gunzip not available, can't expand '$file'" ); |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
my ($tfh, $tf) = $self->io->tempfile( -dir => $self->tempdir() ); |
557
|
|
|
|
|
|
|
my $z = IO::Uncompress::Gunzip->new($file); |
558
|
|
|
|
|
|
|
while (my $block = $z->getline) { print $tfh $block } |
559
|
|
|
|
|
|
|
close $tfh; |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
return $tf |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
=head2 want() |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
Title : want |
567
|
|
|
|
|
|
|
Usage : $bowtiefac->want( $class ) |
568
|
|
|
|
|
|
|
Function: make factory return $class, or 'raw' results in file |
569
|
|
|
|
|
|
|
or 'format' for result format |
570
|
|
|
|
|
|
|
All commands can return Bio::Root::IO |
571
|
|
|
|
|
|
|
commands returning: can return object: |
572
|
|
|
|
|
|
|
- BED or BEDPE - Bio::SeqFeature::Collection |
573
|
|
|
|
|
|
|
- sequence - Bio::SeqIO |
574
|
|
|
|
|
|
|
Returns : return wanted type |
575
|
|
|
|
|
|
|
Args : [optional] string indicating class or raw of wanted result |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub want { |
580
|
|
|
|
|
|
|
my $self = shift; |
581
|
|
|
|
|
|
|
return $self->{'_want'} = shift if @_; |
582
|
|
|
|
|
|
|
return $self->{'_want'}; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=head2 result() |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
Title : result |
588
|
|
|
|
|
|
|
Usage : $bedtoolsfac->result( [-want => $type|$format] ) |
589
|
|
|
|
|
|
|
Function: return result in wanted format |
590
|
|
|
|
|
|
|
Returns : results |
591
|
|
|
|
|
|
|
Args : [optional] hashref of wanted type |
592
|
|
|
|
|
|
|
Note : -want arg does not persist between result() call when |
593
|
|
|
|
|
|
|
specified in result(), for persistence, use want() |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=cut |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub result { |
598
|
|
|
|
|
|
|
my ($self, @args) = @_; |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
my $want = $self->_rearrange([qw(WANT)],@args); |
601
|
|
|
|
|
|
|
$want ||= $self->want; |
602
|
|
|
|
|
|
|
my $cmd = $self->command if $self->can('command'); |
603
|
|
|
|
|
|
|
my $format = $self->{'_result'}->{'format'}; |
604
|
|
|
|
|
|
|
my $file_name = $self->{'_result'}->{'file_name'}; |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
return $self->{'_result'}->{'format'} if (defined $want && $want eq 'format'); |
607
|
|
|
|
|
|
|
return $self->{'_result'}->{'file_name'} if (!$want || $want eq 'raw'); |
608
|
|
|
|
|
|
|
return $self->{'_result'}->{'file'} if ($want =~ m/^Bio::Root::IO/); # this will be undef if -out eq '-' |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
for ($format) { # these are dissected more finely than seems resonable to allow easy extension |
611
|
|
|
|
|
|
|
m/bed/ && do { |
612
|
|
|
|
|
|
|
for ($want) { |
613
|
|
|
|
|
|
|
m/Bio::SeqFeature::Collection/ && do { |
614
|
|
|
|
|
|
|
unless (defined $self->{'_result'}->{'object'} && |
615
|
|
|
|
|
|
|
ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqFeature::Collection/) { |
616
|
|
|
|
|
|
|
$self->{'_result'}->{'object'} = $self->_read_bed; |
617
|
|
|
|
|
|
|
} |
618
|
|
|
|
|
|
|
return $self->{'_result'}->{'object'}; |
619
|
|
|
|
|
|
|
}; |
620
|
|
|
|
|
|
|
$self->warn("Cannot make '$_' for $format."); |
621
|
|
|
|
|
|
|
return; |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
last; |
624
|
|
|
|
|
|
|
}; |
625
|
|
|
|
|
|
|
m/bedpe/ && do { |
626
|
|
|
|
|
|
|
for ($want) { |
627
|
|
|
|
|
|
|
m/Bio::SeqFeature::Collection/ && do { |
628
|
|
|
|
|
|
|
unless (defined $self->{'_result'}->{'object'} && |
629
|
|
|
|
|
|
|
ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqFeature::Collection/) { |
630
|
|
|
|
|
|
|
$self->{'_result'}->{'object'} = $self->_read_bedpe; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
return $self->{'_result'}->{'object'}; |
633
|
|
|
|
|
|
|
}; |
634
|
|
|
|
|
|
|
$self->warn("Cannot make '$_' for $format."); |
635
|
|
|
|
|
|
|
return; |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
last; |
638
|
|
|
|
|
|
|
}; |
639
|
|
|
|
|
|
|
m/bam/ && do { |
640
|
|
|
|
|
|
|
$self->warn("Cannot make '$_' for $format."); |
641
|
|
|
|
|
|
|
return; |
642
|
|
|
|
|
|
|
}; |
643
|
|
|
|
|
|
|
m/^(?:fasta|raw)$/ && do { |
644
|
|
|
|
|
|
|
for ($want) { |
645
|
|
|
|
|
|
|
m/Bio::SeqIO/ && do { |
646
|
|
|
|
|
|
|
$file_name eq '-' && $self->throw("Cannot make a SeqIO object from STDOUT."); |
647
|
|
|
|
|
|
|
unless (defined $self->{'_result'}->{'object'} && |
648
|
|
|
|
|
|
|
ref($self->{'_result'}->{'object'}) =~ m/^Bio::SeqIO/) { |
649
|
|
|
|
|
|
|
$self->{'_result'}->{'object'} = |
650
|
|
|
|
|
|
|
Bio::SeqIO->new(-file => $file_name, |
651
|
|
|
|
|
|
|
-format => $format); |
652
|
|
|
|
|
|
|
} |
653
|
|
|
|
|
|
|
return $self->{'_result'}->{'object'}; |
654
|
|
|
|
|
|
|
}; |
655
|
|
|
|
|
|
|
$self->warn("Cannot make '$_' for $format."); |
656
|
|
|
|
|
|
|
return; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
last; |
659
|
|
|
|
|
|
|
}; |
660
|
|
|
|
|
|
|
m/tab/ && do { |
661
|
|
|
|
|
|
|
$self->warn("Cannot make '$_' for $format."); |
662
|
|
|
|
|
|
|
return; |
663
|
|
|
|
|
|
|
}; |
664
|
|
|
|
|
|
|
m/igv/ && do { |
665
|
|
|
|
|
|
|
$self->warn("Cannot make '$_' for $format."); |
666
|
|
|
|
|
|
|
return; |
667
|
|
|
|
|
|
|
}; |
668
|
|
|
|
|
|
|
m/html/ && do { |
669
|
|
|
|
|
|
|
$self->warn("Cannot make '$_' for $format."); |
670
|
|
|
|
|
|
|
return; |
671
|
|
|
|
|
|
|
}; |
672
|
|
|
|
|
|
|
do { |
673
|
|
|
|
|
|
|
$self->warn("Result format '$_' not recognised - have you called run() yet?"); |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
=head2 _determine_format() |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
Title : _determine_format( $has_run ) |
681
|
|
|
|
|
|
|
Usage : $bedtools-fac->_determine_format |
682
|
|
|
|
|
|
|
Function: determine the format of output for current options |
683
|
|
|
|
|
|
|
Returns : format of bowtie output |
684
|
|
|
|
|
|
|
Args : [optional] boolean to indicate result exists |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=cut |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
sub _determine_format { |
689
|
|
|
|
|
|
|
my ($self, $params) = @_; |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
my $cmd = $self->command if $self->can('command'); |
692
|
|
|
|
|
|
|
my $format = $format_lookup{$cmd}; |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
#special cases - dependent on switches and files |
695
|
|
|
|
|
|
|
for ($cmd) { |
696
|
|
|
|
|
|
|
m/^intersect$/ && do { |
697
|
|
|
|
|
|
|
return 'bed' if !defined $$params{'-bam'} || $self->write_bed; |
698
|
|
|
|
|
|
|
return 'bam'; |
699
|
|
|
|
|
|
|
}; |
700
|
|
|
|
|
|
|
m/^pair_to_bed$/ && do { |
701
|
|
|
|
|
|
|
return 'bedpe' if !defined $$params{'-bam'} || $self->write_bedpe; |
702
|
|
|
|
|
|
|
return 'bam'; |
703
|
|
|
|
|
|
|
}; |
704
|
|
|
|
|
|
|
m/^fasta_from_bed$/ && do { |
705
|
|
|
|
|
|
|
return $self->output_tab_format ? 'tab' : 'fasta'; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
} |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
return $format; |
710
|
|
|
|
|
|
|
} |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=head2 _read_bed() |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
Title : _read_bed() |
715
|
|
|
|
|
|
|
Usage : $bedtools_fac->_read_bed |
716
|
|
|
|
|
|
|
Function: return a Bio::SeqFeature::Collection object from a BED file |
717
|
|
|
|
|
|
|
Returns : Bio::SeqFeature::Collection |
718
|
|
|
|
|
|
|
Args : |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=cut |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub _read_bed { |
723
|
|
|
|
|
|
|
my ($self) = shift; |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
my @features; |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
if ($self->{'_result'}->{'file_name'} ne '-') { |
728
|
|
|
|
|
|
|
my $in = $self->{'_result'}->{'file'}; |
729
|
|
|
|
|
|
|
while (my $feature = $in->_readline) { |
730
|
|
|
|
|
|
|
chomp $feature; |
731
|
|
|
|
|
|
|
push @features, _read_bed_line($feature); |
732
|
|
|
|
|
|
|
} |
733
|
|
|
|
|
|
|
} else { |
734
|
|
|
|
|
|
|
for my $feature (split("\cJ", $self->stdout)) { |
735
|
|
|
|
|
|
|
push @features, _read_bed_line($feature); |
736
|
|
|
|
|
|
|
} |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
my $collection = Bio::SeqFeature::Collection->new; |
740
|
|
|
|
|
|
|
$collection->add_features(\@features); |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
return $collection; |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
sub _read_bed_line { |
746
|
|
|
|
|
|
|
my $feature = shift; |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
my ($chr, $start, $end, $name, $score, $strand, |
749
|
|
|
|
|
|
|
$thick_start, $thick_end, $item_RGB, $block_count, $block_size, $block_start) = |
750
|
|
|
|
|
|
|
split("\cI",$feature); |
751
|
|
|
|
|
|
|
$strand ||= '.'; # BED3 doesn't have strand data - for 'merge' and 'complement' |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
return Bio::SeqFeature::Generic->new( -seq_id => $chr, |
754
|
|
|
|
|
|
|
-primary => $name, |
755
|
|
|
|
|
|
|
-start => $start, |
756
|
|
|
|
|
|
|
-end => $end, |
757
|
|
|
|
|
|
|
-strand => $strand_translate{$strand}, |
758
|
|
|
|
|
|
|
-score => $score, |
759
|
|
|
|
|
|
|
-tag => { thick_start => $thick_start, |
760
|
|
|
|
|
|
|
thick_end => $thick_end, |
761
|
|
|
|
|
|
|
item_RGB => $item_RGB, |
762
|
|
|
|
|
|
|
block_count => $block_count, |
763
|
|
|
|
|
|
|
block_size => $block_size, |
764
|
|
|
|
|
|
|
block_start => $block_size } |
765
|
|
|
|
|
|
|
); |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=head2 _read_bedpe() |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
Title : _read_bedpe() |
771
|
|
|
|
|
|
|
Usage : $bedtools_fac->_read_bedpe |
772
|
|
|
|
|
|
|
Function: return a Bio::SeqFeature::Collection object from a BEDPE file |
773
|
|
|
|
|
|
|
Returns : Bio::SeqFeature::Collection |
774
|
|
|
|
|
|
|
Args : |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
=cut |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
sub _read_bedpe { |
779
|
|
|
|
|
|
|
my ($self) = shift; |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
my @features; |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
if ($self->{'_result'}->{'file_name'} ne '-') { |
784
|
|
|
|
|
|
|
my $in = $self->{'_result'}->{'file'}; |
785
|
|
|
|
|
|
|
while (my $feature = $in->_readline) { |
786
|
|
|
|
|
|
|
chomp $feature; |
787
|
|
|
|
|
|
|
push @features, _read_bedpe_line($feature); |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
} else { |
790
|
|
|
|
|
|
|
for my $feature (split("\cJ", $self->stdout)) { |
791
|
|
|
|
|
|
|
push @features, _read_bedpe_line($feature); |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
my $collection = Bio::SeqFeature::Collection->new; |
796
|
|
|
|
|
|
|
$collection->add_features(\@features); |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
return $collection; |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
sub _read_bedpe_line { |
802
|
|
|
|
|
|
|
my $feature = shift; |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
my ($chr1, $start1, $end1, $chr2, $start2, $end2, $name, $score, $strand1, $strand2, @add) = |
805
|
|
|
|
|
|
|
split("\cI",$feature); |
806
|
|
|
|
|
|
|
$strand1 ||= '.'; |
807
|
|
|
|
|
|
|
$strand2 ||= '.'; |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
return Bio::SeqFeature::FeaturePair->new( -primary => $name, |
810
|
|
|
|
|
|
|
-seq_id => $chr1, |
811
|
|
|
|
|
|
|
-start => $start1, |
812
|
|
|
|
|
|
|
-end => $end1, |
813
|
|
|
|
|
|
|
-strand => $strand_translate{$strand1}, |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
-hprimary_tag => $name, |
816
|
|
|
|
|
|
|
-hseqname => $chr2, |
817
|
|
|
|
|
|
|
-hstart => $start2, |
818
|
|
|
|
|
|
|
-hend => $end2, |
819
|
|
|
|
|
|
|
-hstrand => $strand_translate{$strand2}, |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
-score => $score |
822
|
|
|
|
|
|
|
); |
823
|
|
|
|
|
|
|
} |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=head2 _validate_file_input() |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Title : _validate_file_input |
828
|
|
|
|
|
|
|
Usage : $bedtools_fac->_validate_file_input( -type => $file ) |
829
|
|
|
|
|
|
|
Function: validate file type for file spec |
830
|
|
|
|
|
|
|
Returns : file type if valid type for file spec |
831
|
|
|
|
|
|
|
Args : hash of filespec => file_name |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
=cut |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
sub _validate_file_input { |
836
|
|
|
|
|
|
|
my ($self, @args) = @_; |
837
|
|
|
|
|
|
|
my (%args); |
838
|
|
|
|
|
|
|
if (grep (/^-/, @args) && (@args > 1)) { # named parms |
839
|
|
|
|
|
|
|
$self->throw("Wrong number of args - requires one named arg") if (@args > 2); |
840
|
|
|
|
|
|
|
s/^-// for @args; |
841
|
|
|
|
|
|
|
%args = @args; |
842
|
|
|
|
|
|
|
} else { |
843
|
|
|
|
|
|
|
$self->throw("Must provide named filespec"); |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
for (keys %args) { |
847
|
|
|
|
|
|
|
m/bam/ && do { |
848
|
|
|
|
|
|
|
return 'bam'; |
849
|
|
|
|
|
|
|
}; |
850
|
|
|
|
|
|
|
do { |
851
|
|
|
|
|
|
|
return unless ( -e $args{$_} && -r _ ); |
852
|
|
|
|
|
|
|
my $guesser = Bio::Tools::GuessSeqFormat->new(-file=>$args{$_}); |
853
|
|
|
|
|
|
|
return $guesser->guess if grep {$guesser->guess =~ m/$_/} @{$accepted_types{$_}}; |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
} |
856
|
|
|
|
|
|
|
return; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=head2 version() |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
Title : version |
862
|
|
|
|
|
|
|
Usage : $version = $bedtools_fac->version() |
863
|
|
|
|
|
|
|
Function: Returns the program version (if available) |
864
|
|
|
|
|
|
|
Returns : string representing location and version of the program |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
=cut |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
sub version{ |
869
|
|
|
|
|
|
|
my ($self) = @_; |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
my $cmd = $self->command if $self->can('command'); |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
defined $cmd or $self->throw("No command defined - cannot determine program executable"); |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
# new bahaviour for some BEDTools executables - breaks previous approach to getting version |
876
|
|
|
|
|
|
|
# $dummy can be any non-recognised parameter - '-version' works for most |
877
|
|
|
|
|
|
|
my $dummy = '-version'; |
878
|
|
|
|
|
|
|
$dummy = '-examples' if $cmd =~ /graph_union/; |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
my ($in, $out, $err); |
881
|
|
|
|
|
|
|
my $dum; |
882
|
|
|
|
|
|
|
$in = \$dum; |
883
|
|
|
|
|
|
|
$out = \$self->{'stdout'}; |
884
|
|
|
|
|
|
|
$err = \$self->{'stderr'}; |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
# Get program executable |
887
|
|
|
|
|
|
|
my $exe = $self->executable; |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
my @ipc_args = ( $exe, $dummy ); |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
eval { |
892
|
|
|
|
|
|
|
IPC::Run::run(\@ipc_args, $in, $out, $err) or |
893
|
|
|
|
|
|
|
die ("There was a problem running $exe : $!"); |
894
|
|
|
|
|
|
|
}; |
895
|
|
|
|
|
|
|
# We don't bother trying to catch this: version is returned as an illegal file seek |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
my @details = split("\n",$self->stderr); |
898
|
|
|
|
|
|
|
(my $version) = grep /^Program: .*$/, @details; |
899
|
|
|
|
|
|
|
$version =~ s/^Program: //; |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
return $version; |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
sub available_commands { shift->available_parameters('commands') }; |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
sub filespec { shift->available_parameters('filespec') }; |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
1; |