File Coverage

blib/lib/Bio/Tools/Run/Alignment/Sim4.pm
Criterion Covered Total %
statement 54 134 40.3
branch 6 46 13.0
condition 0 9 0.0
subroutine 14 19 73.6
pod 5 5 100.0
total 79 213 37.0


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Tools::Run::Alignment::Sim4
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by
6             #
7             # Copyright Shawn Hoon
8             #
9             # You may distribute this module under the same terms as perl itself
10             # POD documentation - main docs before the code
11              
12             =head1 NAME
13              
14             Bio::Tools::Run::Alignment::Sim4 - Wrapper for Sim4 program that allows
15             for alignment of cdna to genomic sequences
16              
17             =head1 SYNOPSIS
18              
19             use Bio::Tools::Run::Alignment::Sim4;
20              
21             my @params = (W=>15,K=>17,D=>10,N=>10,cdna_seq=>"mouse_cdna.fa",genomic_seq=>"mouse_genomic.fa");
22             my $sim4 = Bio::Tools::Run::Alignment::Sim4->new(@params);
23              
24             my @exon_sets = $sim4->align;
25              
26             foreach my $set(@exon_sets){
27             foreach my $exon($set->sub_SeqFeature){
28             print $exon->start."\t".$exon->end."\t".$exon->strand."\n";
29             print "\tMatched ".$exon->est_hit->seq_id."\t".$exon->est_hit->start."\t".$exon->est_hit->end."\n";
30             }
31             }
32              
33             One can also provide a est database
34              
35             $sio = Bio::SeqIO->new(-file=>"est.fa",-format=>"fasta");
36             @est_seq=();
37             while(my $seq = $sio->next_seq){
38             push @est_seq,$seq;
39             }
40              
41             my @exon_sets = $factory->align(\@est_seq,$genomic);
42              
43             =head1 DESCRIPTION
44              
45             Sim4 program is developed by Florea et al. for aligning cdna/est
46             sequence to genomic sequences
47              
48             Florea L, Hartzell G, Zhang Z, Rubin GM, Miller W.
49             A computer program for aligning a cDNA sequence with a genomic DNA sequence.
50             Genome Res 1998 Sep;8(9):967-74
51              
52             The program is available for download here:
53             http://globin.cse.psu.edu/
54              
55             =head1 FEEDBACK
56              
57             =head2 Mailing Lists
58              
59             User feedback is an integral part of the evolution of this and other
60             Bioperl modules. Send your comments and suggestions preferably to one
61             of the Bioperl mailing lists. Your participation is much appreciated.
62              
63             bioperl-l@bioperl.org - General discussion
64             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65              
66             =head2 Support
67              
68             Please direct usage questions or support issues to the mailing list:
69              
70             I
71              
72             rather than to the module maintainer directly. Many experienced and
73             reponsive experts will be able look at the problem and quickly
74             address it. Please include a thorough description of the problem
75             with code and data examples if at all possible.
76              
77             =head2 Reporting Bugs
78              
79             Report bugs to the Bioperl bug tracking system to help us keep track
80             the bugs and their resolution. Bug reports can be submitted via the
81             web:
82              
83             http://redmine.open-bio.org/projects/bioperl/
84              
85             =head1 AUTHOR - Shawn Hoon
86              
87             Email shawnh@fugu-sg.org
88              
89             =head1 APPENDIX
90              
91             The rest of the documentation details each of the object
92             methods. Internal methods are usually preceded with a _
93              
94             =cut
95              
96              
97             package Bio::Tools::Run::Alignment::Sim4;
98 1         143 use vars qw($AUTOLOAD @ISA $PROGRAM $PROGRAMDIR $PROGRAMNAME
99 1     1   103407 @SIM4_PARAMS @OTHER_PARAMS @OTHER_SWITCHES %OK_FIELD);
  1         2  
100 1     1   4 use strict;
  1         1  
  1         16  
101 1     1   468 use Bio::SeqIO;
  1         39313  
  1         24  
102 1     1   680 use Bio::SimpleAlign;
  1         50701  
  1         36  
103 1     1   445 use Bio::AlignIO;
  1         1034  
  1         24  
104 1     1   4 use Bio::Root::Root;
  1         1  
  1         12  
105 1     1   3 use Bio::Root::IO;
  1         0  
  1         14  
106 1     1   385 use Bio::Tools::Run::WrapperBase;
  1         2  
  1         21  
107 1     1   479 use Bio::Tools::Sim4::Results;
  1         6728  
  1         66  
108              
109             @ISA = qw(Bio::Root::Root Bio::Tools::Run::WrapperBase);
110              
111             # You will need to enable Sim4 to find the Sim4 program. This
112             # can be done in (at least) two ways:
113             #
114             # 1. define an environmental variable SIM4DIR
115             # export SIM4DIR =/usr/local/share/sim4
116             # where the sim4 package is installed
117             #
118             # 2. include a definition of an environmental variable SIM4 in
119             # every script that will use Sim4.pm
120             # $ENV{SIMR4DIR} = '/usr/local/share/sim4';
121              
122             BEGIN {
123              
124 1     1   3 @SIM4_PARAMS= qw(A W X K C R D H P N B);
125 1         1 @OTHER_PARAMS= qw(CDNA_SEQ GENOMIC_SEQ OUTFILE);
126 1         2 @OTHER_SWITCHES = qw(SILENT QUIET VERBOSE);
127              
128             # Authorize attribute fields
129 1         2 foreach my $attr ( @SIM4_PARAMS, @OTHER_PARAMS,
130 17         792 @OTHER_SWITCHES) { $OK_FIELD{$attr}++; }
131             }
132              
133             =head2 program_name
134              
135             Title : program_name
136             Usage : $factory->program_name()
137             Function: holds the program name
138             Returns: string
139             Args : None
140              
141             =cut
142              
143             sub program_name {
144 6     6 1 21 return 'sim4';
145             }
146              
147             =head2 program_dir
148              
149             Title : program_dir
150             Usage : $factory->program_dir(@params)
151             Function: returns the program directory, obtained from ENV variable.
152             Returns: string
153             Args :
154              
155             =cut
156              
157             sub program_dir {
158 3 50   3 1 10 return Bio::Root::IO->catfile($ENV{SIM4DIR}) if $ENV{SIM4DIR};
159             }
160              
161             sub new {
162 1     1 1 91 my ($class, @args) = @_;
163 1         8 my $self = $class->SUPER::new(@args);
164             # to facilitiate tempfile cleanup
165 1         38 $self->io->_initialize_io();
166 1         24 $self->A(0); # default
167 1         1 my ($attr, $value);
168            
169 1         3 while (@args) {
170 6         4 $attr = shift @args;
171 6         6 $value = shift @args;
172 6 50       11 if ($attr =~/est_first/i ) { #NEW
173 0         0 $self->{est_first} = $value; #NEW
174 0         0 next; #NEW
175             } #NEW
176 6 50       8 next if( $attr =~ /^-/ ); # don't want named parameters
177 6 50       8 if ($attr =~/'PROGRAM'/i ) {
178 0         0 $self->executable($value);
179 0         0 next;
180             }
181 6         21 $self->$attr($value);
182             }
183 1         2 return $self;
184             }
185              
186             sub AUTOLOAD {
187 7     7   7 my $self = shift;
188 7         5 my $attr = $AUTOLOAD;
189 7         15 $attr =~ s/.*:://;
190 7         7 $attr = uc $attr;
191 7 50       23 $self->throw("Unallowed parameter: $attr !") unless $OK_FIELD{$attr};
192 7 50       16 $self->{$attr} = shift if @_;
193 7         10 return $self->{$attr};
194             }
195              
196             =head2 version
197              
198             Title : version
199             Usage : not supported
200             Function: Cannot determine from program
201             Example :
202             Returns : float or undef
203             Args : none
204              
205             =cut
206              
207             sub version {
208 0     0 1   my ($self) = @_;
209 0           return undef;
210             }
211              
212             =head2 align
213              
214             Title : align
215             Usage :
216             $cdna = 't/data/cdna.fa';
217             $genomic = 't/data/cdna.fa';
218             @exon_set = $factory->align($cdna,$genomic);
219             or
220             #@seq_array is array of Seq objs
221             $cdna = \@seq_array;
222             @exon_set = $factory->align($cdna,$genomic);
223             of
224             @exon_set = $factory->align($cdna->[0],$genomic)
225              
226             Function: Perform a Sim4 alignment
227             Returns : An array of Bio::SeqFeature::Generic objects which has
228             exons as sub seqfeatures.
229             Args : Name of two files containing fasta sequences,
230             or 2 Bio::SeqI objects
231             or a combination of both
232             first is assumed to be cdna
233             second is assumed to be genomic
234             More than one cdna may be provided. If an object,
235             assume that its an array ref.
236              
237             =cut
238              
239             sub align {
240              
241 0     0 1   my ($self,$cdna,$genomic) = @_;
242              
243 0 0         $self->cdna_seq($cdna) if $cdna;
244 0 0         $self->throw("Need to provide a cdna sequence") unless $self->cdna_seq;
245              
246 0 0         $self->genomic_seq($genomic) if $genomic;
247 0 0         $self->throw("Need to provide a genomic sequence") unless $self->genomic_seq;
248            
249 0           my ($temp,$infile1, $infile2, $est_first,$seq);
250 0           my ($attr, $value, $switch);
251              
252             # Create input file pointer
253 0           ($est_first,$infile1,$infile2)= $self->_setinput($self->cdna_seq,$self->genomic_seq);
254 0 0 0       if (!($infile1 && $infile2)) {$self->throw("Bad input data (sequences need an id ) or less than 2 sequences in align!");}
  0            
255              
256             # Create parameter string to pass to Sim4 program
257 0           my $param_string = $self->_setparams();
258              
259             # run Sim4
260 0           my @exon_sets = $self->_run($est_first,$infile1,$infile2,$param_string);
261 0           return @exon_sets;
262             }
263              
264             #################################################
265             #internal methods
266              
267             =head2 _run
268              
269             Title : _run
270             Usage : Internal function, not to be called directly
271             Function: makes actual system call to Sim4 program
272             Example :
273             Returns : nothing; Sim4 output is written to a temp file
274             Args : Name of a file containing a set of unaligned fasta sequences
275             and hash of parameters to be passed to Sim4
276              
277             =cut
278              
279             sub _run {
280 0     0     my ($self,$estfirst,$infile1,$infile2,$param_string) = @_;
281 0           my $instring;
282 0           $self->debug( "Program ".$self->executable."\n");
283 0 0         if(! $self->outfile){
284 0           my ($tfh, $outfile) = $self->io->tempfile(-dir=>$self->tempdir);
285 0           close($tfh);
286 0           undef $tfh;
287 0           $self->outfile($outfile);
288             }
289 0           my $outfile = $self->outfile();
290 0           my $commandstring = $self->executable." $infile1 $infile2 $param_string > $outfile";
291 0 0 0       if($self->quiet || $self->silent || ($self->verbose < 0)){
      0        
292 0 0         my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null';
293 0           $commandstring .= " 2>$null";
294             }
295 0           $self->debug( "Sim4 command = $commandstring");
296 0           my $status = system($commandstring);
297 0 0         $self->throw( "Sim4 call ($commandstring) crashed: $? \n") unless $status==0;
298              
299             #use Sim4 parser
300 0           my $sim4_parser = Bio::Tools::Sim4::Results->new(-file=>$outfile,-estfirst=>$estfirst);
301 0           my @out;
302 0           while(my $exonset = $sim4_parser->next_exonset){
303 0           push @out, $exonset;
304             }
305 0           return @out;
306             }
307              
308             =head2 _setinput()
309              
310             Title : _setinput
311             Usage : Internal function, not to be called directly
312             Function: Create input file for Sim4 program
313             Example :
314             Returns : name of file containing Sim4 data input
315             Args : Seq or Align object reference or input file name
316              
317             =cut
318              
319             sub _setinput {
320 0     0     my ($self, $cdna,$genomic) = @_;
321 0           my ($infilename, $seq, $temp, $tfh1,$tfh2,$outfile1,$outfile2);
322             #my $estfirst=1;
323 0 0         my $estfirst= defined($self->{est_first}) ? $self->{_est_first} : 1;
324 0           my ($cdna_file,$genomic_file);
325             #a sequence obj
326 0 0         if(ref($cdna)) {
327 0 0         my @cdna = ref $cdna eq "ARRAY" ? @{$cdna} : ($cdna);
  0            
328 0           ($tfh1,$cdna_file) = $self->io->tempfile(-dir=>$self->tempdir);
329 0           my $seqio = Bio::SeqIO->new(-fh=>$tfh1,-format=>'fasta');
330 0           foreach my $c (@cdna){
331 0           $seqio->write_seq($c);
332             }
333 0           close $tfh1;
334 0           undef $tfh1;
335              
336             #if we have a est database, then input will go second
337 0 0         if($#cdna > 0){
338 0           $estfirst=0;
339             }
340             }
341             else {
342 0           my $sio = Bio::SeqIO->new(-file=>$cdna,-format=>"fasta");
343 0           my $count = 0;
344 0           while(my $seq = $sio->next_seq){
345 0           $count++;
346             }
347 0 0         $estfirst = $count > 1 ? 0:1;
348 0           $cdna_file = $cdna;
349             }
350 0 0         if( ref($genomic) ) {
351 0           ($tfh1,$genomic_file) = $self->io->tempfile(-dir=>$self->tempdir);
352 0           my $seqio = Bio::SeqIO->new(-fh=>$tfh1,-format=>'fasta');
353 0           $seqio->write_seq($genomic);
354 0           close $tfh1;
355 0           undef $tfh1;
356             }
357             else {
358 0           $genomic_file = $genomic;
359             }
360 0 0         return ($estfirst,$cdna_file,$genomic_file) if $estfirst;
361 0           return ($estfirst,$genomic_file,$cdna_file);
362             }
363              
364              
365             =head2 _setparams()
366              
367             Title : _setparams
368             Usage : Internal function, not to be called directly
369             Function: Create parameter inputs for Sim4 program
370             Example :
371             Returns : parameter string to be passed to Sim4
372             during align or profile_align
373             Args : name of calling object
374              
375             =cut
376              
377             sub _setparams {
378 0     0     my ($attr, $value, $self);
379              
380 0           $self = shift;
381              
382 0           my $param_string = "";
383 0           for $attr ( @SIM4_PARAMS ) {
384 0           $value = $self->$attr();
385 0 0         next unless (defined $value);
386 0           my $attr_key = uc $attr; #put params in format expected by Sim4
387 0           $attr_key = ' '.$attr_key;
388 0           $param_string .= $attr_key.'='.$value;
389             }
390              
391 0           return $param_string;
392             }
393              
394             1; # Needed to keep compiler happy