File Coverage

blib/lib/Bio/Tools/Run/Alignment/Gmap.pm
Criterion Covered Total %
statement 28 76 36.8
branch 2 24 8.3
condition 0 4 0.0
subroutine 9 16 56.2
pod 8 8 100.0
total 47 128 36.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tools::Run::Alignment::Gmap
3             #
4             # Cared for by George Hartzell
5             #
6             # Copyright George Hartzell
7             #
8             # You may distribute this module under the same terms as perl itself
9              
10             # POD documentation - main docs before the code
11              
12             =head1 NAME
13              
14             Bio::Tools::Run::Alignment::Gmap - Wrapper for running gmap.
15              
16             =head1 SYNOPSIS
17              
18             use Bio::Tools::Run::Alignment::Gmap;
19             use Bio::SeqIO;
20              
21             my $sio = Bio::SeqIO->new(-file=>$filename ,-format=>'fasta');
22             my @seq;
23             while(my $seq = $sio->next_seq()){
24             push @seq,$seq;
25             }
26             my $mapper =Bio::Tools::Run::Gmap->new();
27             my $result = $mapper->run(\@seq);
28              
29              
30             =head1 DESCRIPTION
31              
32             Bioperl-run wrapper around gmap. See
33             L for information about gmap.
34              
35             It requires a reference to an array of bioperl SeqI objects and
36             returns a reference to a filehandle from which the gmap output can be
37             read.
38              
39             One can explicitly set the name of the genome database (defaults to
40             NHGD_R36) using the 'genome_db()' method. One can also explicitly set
41             the flags that are passed to gmap (defaults to '-f 9 -5 -e') using the
42             'flags()' method.
43              
44             The name of the gmap executable can be overridden using the
45             program_name() method and the directory in which to find that
46             executable can be overridden using the program_dir() method.
47              
48             =head1 FEEDBACK
49              
50             =head2 Mailing Lists
51              
52             User feedback is an integral part of the evolution of this and other
53             Bioperl modules. Send your comments and suggestions preferably to
54             the Bioperl mailing list. Your participation is much appreciated.
55              
56             bioperl-l@bioperl.org - General discussion
57             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
58              
59             =head2 Reporting Bugs
60              
61             Report bugs to the Bioperl bug tracking system to help us keep track
62             of the bugs and their resolution. Bug reports can be submitted via
63             the web:
64              
65             http://redmine.open-bio.org/projects/bioperl/
66              
67             =head1 AUTHOR - George Hartzell
68              
69             Email hartzell@alerce.com
70              
71             Describe contact details here
72              
73             =head1 CONTRIBUTORS
74              
75             Additional contributors names and emails here
76              
77             =head1 APPENDIX
78              
79             The rest of the documentation details each of the object methods.
80             Internal methods are usually preceded with a _
81              
82             =cut
83              
84              
85             # Let the code begin...
86              
87             # TODO handle stderr output from gmap.
88              
89             package Bio::Tools::Run::Alignment::Gmap;
90 1     1   102495 use strict;
  1         3  
  1         23  
91 1     1   4 use warnings;
  1         2  
  1         23  
92              
93             # Object preamble - inherits from Bio::Root::Root
94              
95 1     1   255 use Bio::Root::Root;
  1         16752  
  1         28  
96 1     1   313 use Bio::SeqIO;
  1         20515  
  1         31  
97              
98              
99 1         327 use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase
100 1     1   6 Bio::Factory::ApplicationFactoryI);
  1         2  
101              
102             =head2 new
103              
104             Title : new
105             Usage : my $obj = new Bio::Tools::Run::Alignment::Gmap();
106             Function: Builds a new Bio::Tools::Run::Alignment::Gmap object
107             Returns : an instance of Bio::Tools::Run::Alignment::Gmap
108             Args :
109              
110              
111             =cut
112              
113             sub new {
114 1     1 1 28844 my($class,@args) = @_;
115              
116 1         6 my $self = $class->SUPER::new(@args);
117 1         14 $self->{_program_name} = 'gmap';
118 1         3 return $self;
119             }
120              
121             =head2 version
122              
123             Title : version
124             Usage : print "gmap version: " . $mapper->version() . "\n";
125             Function: retrieves and returns the version of the gmap package.
126             Example :
127             Returns : scalar string containing the version number. Probably looks
128             like YYYY-MM-DD.
129             Args : none.
130              
131              
132             =cut
133              
134             sub version {
135 0     0 1 0 my ($self,@args) = @_;
136 0         0 my $version;
137              
138 0         0 my $str = $self->executable;
139 0         0 $str .= ' --version';
140 0         0 $self->debug("gmap version command = $str\n");
141              
142 0 0       0 open(GMAPRUN, "$str |") || $self->throw($@);
143              
144             {
145 0         0 local $/ = undef;
  0         0  
146 0         0 my $result = ;
147 0         0 ($version) = ($result =~ m|.*Part of GMAP package, version (.*).*|);
148             }
149              
150 0         0 return($version);
151              
152             }
153              
154             =head2 program_name
155              
156             Title : program_name
157             Usage : $mapper->program_name('gmap-dev');
158             my $pname = $mapper->program_name();
159             Function: sets/gets the name of the program to run.
160             Returns : string representing the name of the executable.
161             Args : [optional] string representing the name of the executable
162             to set.
163              
164             =cut
165              
166             sub program_name {
167 6     6 1 9 my $self = shift;
168              
169 6 50       15 $self->{_program_name} = shift if @_;
170 6         34 return $self->{_program_name};
171             }
172              
173             =head2 program_dir
174              
175             Title : program_dir
176             Usage : $mapper->program_dir('/usr/local/sandbox/gmap/bin');
177             my $pdir = $mapper->program_dir();
178             Function: sets/gets the directory path in which
179             to find the gmap executable.
180             Returns : string representing the path to the directory.
181             Args : [optional] string representing the directory path to set.
182              
183             =cut
184              
185             sub program_dir {
186 3     3 1 5 my $self = shift;
187              
188 3 50       6 $self->{_program_dir} = shift if @_;
189 3         10 return $self->{_program_dir};
190             }
191              
192             =head2 input_file
193              
194             Title : input_file
195             Usage : $mapper->input_file('/tmp/moose.fasta');
196             my $filename = $mapper->input_file();
197             Function: sets/gets the name of a file containing sequences
198             to be mapped.
199             Returns : string containing the name of the query sequence.
200             Args : [optional] string representing the directory path to set.
201              
202             =cut
203              
204             sub input_file {
205 0     0 1   my $self = shift;
206              
207 0 0         $self->{_input_file} = shift if @_;
208 0           return $self->{_input_file};
209             }
210              
211             =head2 genome_db
212              
213             Title : genome_db
214             Usage : $mapper->genome_db('NHGD_R36');
215             my $genome_db = $mapper->genome_db();
216             Function: sets/gets the name of the genome database, this will be
217             passed to gmap using its '-d' flag.
218             Returns : name of the genome database.
219             Args : [optional] string representing the genome db to set.
220              
221             =cut
222              
223             sub genome_db {
224 0     0 1   my $self = shift;
225              
226 0 0         $self->{_genome_db} = shift if @_;
227 0           return $self->{_genome_db};
228             }
229              
230             =head2 flags
231              
232             Title : flags
233             Usage : $mapper->flags('-A -e -5');
234             my $flags = $mapper->flags();
235             Function: sets/gets the flags that will be passed to gmap.
236             Returns : the current value of the flags that will be passed to gmap.
237             Args : [optional] the flags to set.
238              
239             =cut
240              
241             sub flags {
242 0     0 1   my $self = shift;
243              
244 0 0         $self->{_flags} = shift if @_;
245 0           return $self->{_flags};
246             }
247              
248             =head2 run
249              
250             Title : run
251             Usage : $mapper->run()
252             Function: runs gmap
253             Example :
254             Returns : a file handle, opened for reading, for gmap's output.
255             Args : An array of references query sequences (as Bio::Seq objects)
256              
257              
258             =cut
259              
260             sub run {
261 0     0 1   my $self = shift;
262              
263 0 0         $self->input_file( $self->_build_fasta_input_file(@_) ) if(@_);
264              
265 0           my $result = $self->_run();
266              
267 0           return $result;
268             }
269              
270             =head2 _build_fasta_input_file
271              
272             Title : _build_fasta_input_file
273             Usage : my $seq_file = $self->_build_fasta_input_file(@_);
274             Function:
275             Example :
276             Returns : The name of the temporary file that contains the sequence.
277             Args : A reference to an array of Bio::Seq objects.
278              
279             =cut
280              
281 1     1   464 use File::Temp;
  1         2  
  1         341  
282              
283             sub _build_fasta_input_file {
284 0     0     my $self = shift;
285 0           my $seqs = shift;
286 0           my $seq_count = 0;
287              
288             # the object returned by File::Temp->new() is magic. Used normally
289             # it behaves as a filehandle opened onto the temporary file. Used
290             # as a string it behaves as a string that is the name of the
291             # temporary file.
292             # It is up to the user to remove the when finished with it.
293 0           my $file_tmp = File::Temp->new( TEMPLATE => 'mvp-gmap-tempfile-XXXXXX',
294             TMPDIR => 1,
295             UNLINK => 0,
296             );
297 0           my $seqio = Bio::SeqIO->new( -fh => $file_tmp, -format => 'Fasta' );
298              
299 0 0         if (ref($seqs) =~ /ARRAY/i) {
300 0           foreach my $seq (@$seqs) {
301 0 0         throw
302             Bio::Root::BadParameter(-text =>
303             "sequence args must be a Bio::SeqI subclass.",
304             )
305             unless ($seq->isa("Bio::PrimarySeqI"));
306              
307 0           $seqio->write_seq($seq);
308 0           $seq_count++;
309             }
310             }
311 0 0         if ($seq_count == 0) {
312 0           throw
313             Bio::Root::BadParameter(-text => <
314             You must supply a reference to an array of Bio::SeqI objects.
315             EOM
316             );
317             }
318              
319             # pass back the filename of the temporary file (see above)
320 0           return ("$file_tmp");
321             }
322              
323              
324             sub _run {
325 0     0     my $self = shift;
326              
327 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
328 0           my $str = $self->executable;
329 0   0       $str .= ' -d' . ($self->genome_db() || 'NHGD_R36');
330 0   0       $str .= ' ' . ($self->flags() || '-f 9 -5 -e');
331 0           $str .= ' ' . $self->input_file();
332 0           $str .= " 2> $null";
333 0           $str .= '; rm -f ' . $self->input_file();
334 0           $self->debug("gmap command = $str\n");
335              
336 0 0         open(GMAPRUN, "$str |") || $self->throw("Can't open gmap (command = \"$str\"): $!");
337              
338 0           return (\*GMAPRUN);
339             }
340              
341             1;