line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::Run::Alignment::Gmap |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Cared for by George Hartzell |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Copyright George Hartzell |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Bio::Tools::Run::Alignment::Gmap - Wrapper for running gmap. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 SYNOPSIS |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
use Bio::Tools::Run::Alignment::Gmap; |
19
|
|
|
|
|
|
|
use Bio::SeqIO; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $sio = Bio::SeqIO->new(-file=>$filename ,-format=>'fasta'); |
22
|
|
|
|
|
|
|
my @seq; |
23
|
|
|
|
|
|
|
while(my $seq = $sio->next_seq()){ |
24
|
|
|
|
|
|
|
push @seq,$seq; |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
my $mapper =Bio::Tools::Run::Gmap->new(); |
27
|
|
|
|
|
|
|
my $result = $mapper->run(\@seq); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 DESCRIPTION |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Bioperl-run wrapper around gmap. See |
33
|
|
|
|
|
|
|
L for information about gmap. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
It requires a reference to an array of bioperl SeqI objects and |
36
|
|
|
|
|
|
|
returns a reference to a filehandle from which the gmap output can be |
37
|
|
|
|
|
|
|
read. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
One can explicitly set the name of the genome database (defaults to |
40
|
|
|
|
|
|
|
NHGD_R36) using the 'genome_db()' method. One can also explicitly set |
41
|
|
|
|
|
|
|
the flags that are passed to gmap (defaults to '-f 9 -5 -e') using the |
42
|
|
|
|
|
|
|
'flags()' method. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
The name of the gmap executable can be overridden using the |
45
|
|
|
|
|
|
|
program_name() method and the directory in which to find that |
46
|
|
|
|
|
|
|
executable can be overridden using the program_dir() method. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 FEEDBACK |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Mailing Lists |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
53
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
54
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
57
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 Reporting Bugs |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
62
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via |
63
|
|
|
|
|
|
|
the web: |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
http://redmine.open-bio.org/projects/bioperl/ |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 AUTHOR - George Hartzell |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Email hartzell@alerce.com |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Describe contact details here |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Additional contributors names and emails here |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 APPENDIX |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
80
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# Let the code begin... |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# TODO handle stderr output from gmap. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
package Bio::Tools::Run::Alignment::Gmap; |
90
|
1
|
|
|
1
|
|
106147
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
91
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
22
|
|
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# Object preamble - inherits from Bio::Root::Root |
94
|
|
|
|
|
|
|
|
95
|
1
|
|
|
1
|
|
431
|
use Bio::Root::Root; |
|
1
|
|
|
|
|
17846
|
|
|
1
|
|
|
|
|
29
|
|
96
|
1
|
|
|
1
|
|
509
|
use Bio::SeqIO; |
|
1
|
|
|
|
|
21512
|
|
|
1
|
|
|
|
|
30
|
|
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
1
|
|
|
|
|
436
|
use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase |
100
|
1
|
|
|
1
|
|
7
|
Bio::Factory::ApplicationFactoryI); |
|
1
|
|
|
|
|
1
|
|
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head2 new |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Title : new |
105
|
|
|
|
|
|
|
Usage : my $obj = new Bio::Tools::Run::Alignment::Gmap(); |
106
|
|
|
|
|
|
|
Function: Builds a new Bio::Tools::Run::Alignment::Gmap object |
107
|
|
|
|
|
|
|
Returns : an instance of Bio::Tools::Run::Alignment::Gmap |
108
|
|
|
|
|
|
|
Args : |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=cut |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub new { |
114
|
1
|
|
|
1
|
1
|
30354
|
my($class,@args) = @_; |
115
|
|
|
|
|
|
|
|
116
|
1
|
|
|
|
|
6
|
my $self = $class->SUPER::new(@args); |
117
|
1
|
|
|
|
|
13
|
$self->{_program_name} = 'gmap'; |
118
|
1
|
|
|
|
|
3
|
return $self; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head2 version |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Title : version |
124
|
|
|
|
|
|
|
Usage : print "gmap version: " . $mapper->version() . "\n"; |
125
|
|
|
|
|
|
|
Function: retrieves and returns the version of the gmap package. |
126
|
|
|
|
|
|
|
Example : |
127
|
|
|
|
|
|
|
Returns : scalar string containing the version number. Probably looks |
128
|
|
|
|
|
|
|
like YYYY-MM-DD. |
129
|
|
|
|
|
|
|
Args : none. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=cut |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub version { |
135
|
0
|
|
|
0
|
1
|
0
|
my ($self,@args) = @_; |
136
|
0
|
|
|
|
|
0
|
my $version; |
137
|
|
|
|
|
|
|
|
138
|
0
|
|
|
|
|
0
|
my $str = $self->executable; |
139
|
0
|
|
|
|
|
0
|
$str .= ' --version'; |
140
|
0
|
|
|
|
|
0
|
$self->debug("gmap version command = $str\n"); |
141
|
|
|
|
|
|
|
|
142
|
0
|
0
|
|
|
|
0
|
open(GMAPRUN, "$str |") || $self->throw($@); |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
{ |
145
|
0
|
|
|
|
|
0
|
local $/ = undef; |
|
0
|
|
|
|
|
0
|
|
146
|
0
|
|
|
|
|
0
|
my $result = ; |
147
|
0
|
|
|
|
|
0
|
($version) = ($result =~ m|.*Part of GMAP package, version (.*).*|); |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
0
|
|
|
|
|
0
|
return($version); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head2 program_name |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Title : program_name |
157
|
|
|
|
|
|
|
Usage : $mapper->program_name('gmap-dev'); |
158
|
|
|
|
|
|
|
my $pname = $mapper->program_name(); |
159
|
|
|
|
|
|
|
Function: sets/gets the name of the program to run. |
160
|
|
|
|
|
|
|
Returns : string representing the name of the executable. |
161
|
|
|
|
|
|
|
Args : [optional] string representing the name of the executable |
162
|
|
|
|
|
|
|
to set. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=cut |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub program_name { |
167
|
6
|
|
|
6
|
1
|
6
|
my $self = shift; |
168
|
|
|
|
|
|
|
|
169
|
6
|
50
|
|
|
|
13
|
$self->{_program_name} = shift if @_; |
170
|
6
|
|
|
|
|
28
|
return $self->{_program_name}; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 program_dir |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Title : program_dir |
176
|
|
|
|
|
|
|
Usage : $mapper->program_dir('/usr/local/sandbox/gmap/bin'); |
177
|
|
|
|
|
|
|
my $pdir = $mapper->program_dir(); |
178
|
|
|
|
|
|
|
Function: sets/gets the directory path in which |
179
|
|
|
|
|
|
|
to find the gmap executable. |
180
|
|
|
|
|
|
|
Returns : string representing the path to the directory. |
181
|
|
|
|
|
|
|
Args : [optional] string representing the directory path to set. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=cut |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub program_dir { |
186
|
3
|
|
|
3
|
1
|
4
|
my $self = shift; |
187
|
|
|
|
|
|
|
|
188
|
3
|
50
|
|
|
|
8
|
$self->{_program_dir} = shift if @_; |
189
|
3
|
|
|
|
|
7
|
return $self->{_program_dir}; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head2 input_file |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Title : input_file |
195
|
|
|
|
|
|
|
Usage : $mapper->input_file('/tmp/moose.fasta'); |
196
|
|
|
|
|
|
|
my $filename = $mapper->input_file(); |
197
|
|
|
|
|
|
|
Function: sets/gets the name of a file containing sequences |
198
|
|
|
|
|
|
|
to be mapped. |
199
|
|
|
|
|
|
|
Returns : string containing the name of the query sequence. |
200
|
|
|
|
|
|
|
Args : [optional] string representing the directory path to set. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=cut |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub input_file { |
205
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
206
|
|
|
|
|
|
|
|
207
|
0
|
0
|
|
|
|
|
$self->{_input_file} = shift if @_; |
208
|
0
|
|
|
|
|
|
return $self->{_input_file}; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 genome_db |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
Title : genome_db |
214
|
|
|
|
|
|
|
Usage : $mapper->genome_db('NHGD_R36'); |
215
|
|
|
|
|
|
|
my $genome_db = $mapper->genome_db(); |
216
|
|
|
|
|
|
|
Function: sets/gets the name of the genome database, this will be |
217
|
|
|
|
|
|
|
passed to gmap using its '-d' flag. |
218
|
|
|
|
|
|
|
Returns : name of the genome database. |
219
|
|
|
|
|
|
|
Args : [optional] string representing the genome db to set. |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=cut |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
sub genome_db { |
224
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
225
|
|
|
|
|
|
|
|
226
|
0
|
0
|
|
|
|
|
$self->{_genome_db} = shift if @_; |
227
|
0
|
|
|
|
|
|
return $self->{_genome_db}; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head2 flags |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Title : flags |
233
|
|
|
|
|
|
|
Usage : $mapper->flags('-A -e -5'); |
234
|
|
|
|
|
|
|
my $flags = $mapper->flags(); |
235
|
|
|
|
|
|
|
Function: sets/gets the flags that will be passed to gmap. |
236
|
|
|
|
|
|
|
Returns : the current value of the flags that will be passed to gmap. |
237
|
|
|
|
|
|
|
Args : [optional] the flags to set. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=cut |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
sub flags { |
242
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
243
|
|
|
|
|
|
|
|
244
|
0
|
0
|
|
|
|
|
$self->{_flags} = shift if @_; |
245
|
0
|
|
|
|
|
|
return $self->{_flags}; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 run |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Title : run |
251
|
|
|
|
|
|
|
Usage : $mapper->run() |
252
|
|
|
|
|
|
|
Function: runs gmap |
253
|
|
|
|
|
|
|
Example : |
254
|
|
|
|
|
|
|
Returns : a file handle, opened for reading, for gmap's output. |
255
|
|
|
|
|
|
|
Args : An array of references query sequences (as Bio::Seq objects) |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=cut |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
sub run { |
261
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
262
|
|
|
|
|
|
|
|
263
|
0
|
0
|
|
|
|
|
$self->input_file( $self->_build_fasta_input_file(@_) ) if(@_); |
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
my $result = $self->_run(); |
266
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
return $result; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 _build_fasta_input_file |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Title : _build_fasta_input_file |
273
|
|
|
|
|
|
|
Usage : my $seq_file = $self->_build_fasta_input_file(@_); |
274
|
|
|
|
|
|
|
Function: |
275
|
|
|
|
|
|
|
Example : |
276
|
|
|
|
|
|
|
Returns : The name of the temporary file that contains the sequence. |
277
|
|
|
|
|
|
|
Args : A reference to an array of Bio::Seq objects. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|
281
|
1
|
|
|
1
|
|
432
|
use File::Temp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
307
|
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub _build_fasta_input_file { |
284
|
0
|
|
|
0
|
|
|
my $self = shift; |
285
|
0
|
|
|
|
|
|
my $seqs = shift; |
286
|
0
|
|
|
|
|
|
my $seq_count = 0; |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# the object returned by File::Temp->new() is magic. Used normally |
289
|
|
|
|
|
|
|
# it behaves as a filehandle opened onto the temporary file. Used |
290
|
|
|
|
|
|
|
# as a string it behaves as a string that is the name of the |
291
|
|
|
|
|
|
|
# temporary file. |
292
|
|
|
|
|
|
|
# It is up to the user to remove the when finished with it. |
293
|
0
|
|
|
|
|
|
my $file_tmp = File::Temp->new( TEMPLATE => 'mvp-gmap-tempfile-XXXXXX', |
294
|
|
|
|
|
|
|
TMPDIR => 1, |
295
|
|
|
|
|
|
|
UNLINK => 0, |
296
|
|
|
|
|
|
|
); |
297
|
0
|
|
|
|
|
|
my $seqio = Bio::SeqIO->new( -fh => $file_tmp, -format => 'Fasta' ); |
298
|
|
|
|
|
|
|
|
299
|
0
|
0
|
|
|
|
|
if (ref($seqs) =~ /ARRAY/i) { |
300
|
0
|
|
|
|
|
|
foreach my $seq (@$seqs) { |
301
|
0
|
0
|
|
|
|
|
throw |
302
|
|
|
|
|
|
|
Bio::Root::BadParameter(-text => |
303
|
|
|
|
|
|
|
"sequence args must be a Bio::SeqI subclass.", |
304
|
|
|
|
|
|
|
) |
305
|
|
|
|
|
|
|
unless ($seq->isa("Bio::PrimarySeqI")); |
306
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
$seqio->write_seq($seq); |
308
|
0
|
|
|
|
|
|
$seq_count++; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
} |
311
|
0
|
0
|
|
|
|
|
if ($seq_count == 0) { |
312
|
0
|
|
|
|
|
|
throw |
313
|
|
|
|
|
|
|
Bio::Root::BadParameter(-text => <
|
314
|
|
|
|
|
|
|
You must supply a reference to an array of Bio::SeqI objects. |
315
|
|
|
|
|
|
|
EOM |
316
|
|
|
|
|
|
|
); |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# pass back the filename of the temporary file (see above) |
320
|
0
|
|
|
|
|
|
return ("$file_tmp"); |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
sub _run { |
325
|
0
|
|
|
0
|
|
|
my $self = shift; |
326
|
|
|
|
|
|
|
|
327
|
0
|
0
|
|
|
|
|
my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; |
328
|
0
|
|
|
|
|
|
my $str = $self->executable; |
329
|
0
|
|
0
|
|
|
|
$str .= ' -d' . ($self->genome_db() || 'NHGD_R36'); |
330
|
0
|
|
0
|
|
|
|
$str .= ' ' . ($self->flags() || '-f 9 -5 -e'); |
331
|
0
|
|
|
|
|
|
$str .= ' ' . $self->input_file(); |
332
|
0
|
|
|
|
|
|
$str .= " 2> $null"; |
333
|
0
|
|
|
|
|
|
$str .= '; rm -f ' . $self->input_file(); |
334
|
0
|
|
|
|
|
|
$self->debug("gmap command = $str\n"); |
335
|
|
|
|
|
|
|
|
336
|
0
|
0
|
|
|
|
|
open(GMAPRUN, "$str |") || $self->throw("Can't open gmap (command = \"$str\"): $!"); |
337
|
|
|
|
|
|
|
|
338
|
0
|
|
|
|
|
|
return (\*GMAPRUN); |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
1; |