line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::Run::Alignment::Lagan |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Please direct questions and support issues to |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Cared for by Bioperl |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright Bioperl, Stephen Montgomery |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Special thanks to Jason Stajich. |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::Tools::Run::Alignment::Lagan - Object for the local execution of the LAGAN suite of tools (including MLAGAN for multiple sequence alignments) |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use Bio::Tools::Run::Alignment::Lagan; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
@params = |
24
|
|
|
|
|
|
|
('chaos' => "The contents of this string will be passed as args to chaos", |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#Read you chaos README file for more info/This functionality |
27
|
|
|
|
|
|
|
#has not been tested and will be integrated in future versions. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
'order' => "\"-gs -7 -gc -2 -mt 2 -ms -1\"", |
30
|
|
|
|
|
|
|
#Where gap start penalty of- 7, gap continue of -2, match of 2, |
31
|
|
|
|
|
|
|
#and mismatch of -1. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
'recurse' => "\"(12,25),(7,25),(4,30)"\", |
34
|
|
|
|
|
|
|
#A list of (wordlength,score cutoff) pairs to be used in the |
35
|
|
|
|
|
|
|
#recursive anchoring |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
'tree' => "\"(sample1 (sample2 sample3))"\", |
38
|
|
|
|
|
|
|
#Used by mlagan / tree can also be passed when calling mlagan directly |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
#SCORING PARAMETERS FOR MLAGAN: |
41
|
|
|
|
|
|
|
'match' => 12, |
42
|
|
|
|
|
|
|
'mismatch' => -8, |
43
|
|
|
|
|
|
|
'gapstart' => -50, |
44
|
|
|
|
|
|
|
'gapend' => -50, |
45
|
|
|
|
|
|
|
'gapcont' => -2, |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 DESCRIPTION |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
To run mlagan/lagan, you must have an environment variable that points to |
52
|
|
|
|
|
|
|
the executable directory with files lagan.pl etc. |
53
|
|
|
|
|
|
|
"LAGAN_DIR=/opt/lagan_executables/" |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Simply having the executables in your path is not supported because the |
56
|
|
|
|
|
|
|
executables themselves only work with the environment variable set. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
All lagan and mlagan parameters listed in their Readmes can be set |
59
|
|
|
|
|
|
|
except for the mfa flag which has been turned on by default to prevent |
60
|
|
|
|
|
|
|
parsing of the alignment format. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
TO USE LAGAN: |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my $lagan = Bio::Tools::Run::Alignment::Lagan->new(@params); |
65
|
|
|
|
|
|
|
my $report_out = $lagan->lagan($seq1, $seq2); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
A SimpleAlign object is returned. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
TO USE MLAGAN: |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
my $lagan = Bio::Tools::Run::Alignment::Lagan->new(); |
72
|
|
|
|
|
|
|
my $tree = "(($seqname1 $seqname2) $seqname3)"; |
73
|
|
|
|
|
|
|
my @sequence_objs; #an array of bioperl Seq objects |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
##If you use an unblessed seq array |
76
|
|
|
|
|
|
|
my $seq_ref = \@sequence_objs; |
77
|
|
|
|
|
|
|
bless $seq_ref, "ARRAY"; |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
my $report_out = $lagan->mlagan($seq_ref, $tree); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
A SimpleAlign object is returned |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Only basic mlagan/lagan functionality has been implemented due to the |
84
|
|
|
|
|
|
|
iterative development of their project. Future maintenance upgrades |
85
|
|
|
|
|
|
|
will include enhanced features and scoring. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=head1 FEEDBACK |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 Mailing Lists |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
92
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to |
93
|
|
|
|
|
|
|
the Bioperl mailing list. Your participation is much appreciated. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
97
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head2 Support |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
I |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
106
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
107
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
108
|
|
|
|
|
|
|
with code and data examples if at all possible. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 Reporting Bugs |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
113
|
|
|
|
|
|
|
of the bugs and their resolution. Bug reports can be submitted via the |
114
|
|
|
|
|
|
|
web: |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
http://redmine.open-bio.org/projects/bioperl/ |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head1 AUTHOR - Stephen Montgomery |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Email smontgom@bcgsc.bc.ca |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Genome Sciences Centre in beautiful Vancouver, British Columbia CANADA |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
MLagan/Lagan is the hard work of Michael Brudno et al. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Sendu Bala bix@sendu.me.uk |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 APPENDIX |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
133
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=cut |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
package Bio::Tools::Run::Alignment::Lagan; |
138
|
|
|
|
|
|
|
|
139
|
1
|
|
|
1
|
|
186258
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
140
|
1
|
|
|
1
|
|
3
|
use Bio::Root::IO; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
141
|
1
|
|
|
1
|
|
3
|
use Bio::Seq; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
13
|
|
142
|
1
|
|
|
1
|
|
464
|
use Bio::SeqIO; |
|
1
|
|
|
|
|
6194
|
|
|
1
|
|
|
|
|
23
|
|
143
|
1
|
|
|
1
|
|
4
|
use Bio::AlignIO; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
14
|
|
144
|
1
|
|
|
1
|
|
3
|
use Bio::SimpleAlign; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
14
|
|
145
|
1
|
|
|
1
|
|
2
|
use File::Spec; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
146
|
1
|
|
|
1
|
|
409
|
use Bio::Matrix::IO; |
|
1
|
|
|
|
|
567
|
|
|
1
|
|
|
|
|
22
|
|
147
|
1
|
|
|
1
|
|
5
|
use Cwd; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
48
|
|
148
|
|
|
|
|
|
|
|
149
|
1
|
|
|
1
|
|
10
|
use base qw(Bio::Tools::Run::WrapperBase); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
377
|
|
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
our @LAGAN_PARAMS = qw(chaos order recurse mfa out lazy maskedonly |
152
|
|
|
|
|
|
|
usebounds rc translate draft info fastreject); |
153
|
|
|
|
|
|
|
our @OTHER_PARAMS = qw(outfile); |
154
|
|
|
|
|
|
|
our @LAGAN_SWITCHES = qw(silent quiet); |
155
|
|
|
|
|
|
|
our @MLAGAN_PARAMS = qw(nested postir translate lazy verbose tree match mismatch |
156
|
|
|
|
|
|
|
gapstart gapend gapcont out version); |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
#Not all of these parameters are useful in this context, care |
159
|
|
|
|
|
|
|
#should be used in setting only standard ones |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
#The LAGAN_DIR environment variable must be set |
162
|
|
|
|
|
|
|
our $PROGRAM_DIR = $ENV{'LAGAN_DIR'} || ''; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub new { |
165
|
1
|
|
|
1
|
1
|
3438
|
my($class, @args) = @_; |
166
|
1
|
|
|
|
|
8
|
my $self = $class->SUPER::new(@args); |
167
|
|
|
|
|
|
|
|
168
|
1
|
|
|
|
|
35
|
$self->_set_from_args(\@args, -methods => [@LAGAN_PARAMS, @OTHER_PARAMS, |
169
|
|
|
|
|
|
|
@LAGAN_SWITCHES, @MLAGAN_PARAMS], |
170
|
|
|
|
|
|
|
-create => 1); |
171
|
|
|
|
|
|
|
|
172
|
1
|
|
|
|
|
2119
|
my ($tfh, $tempfile) = $self->io->tempfile(); |
173
|
1
|
|
33
|
|
|
474
|
my $outfile = $self->out || $self->outfile || $tempfile; |
174
|
1
|
|
|
|
|
42
|
$self->out($outfile); |
175
|
1
|
|
|
|
|
28
|
close($tfh); |
176
|
1
|
|
|
|
|
3
|
undef $tfh; |
177
|
1
|
|
|
|
|
4
|
return $self; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 lagan |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Runs the Lagan pairwise alignment algorithm |
183
|
|
|
|
|
|
|
Inputs should be two PrimarySeq objects. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Returns an SimpleAlign object / preloaded with the tmp file of the |
186
|
|
|
|
|
|
|
Lagan multifasta output. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub lagan { |
191
|
0
|
|
|
0
|
1
|
0
|
my ($self, $input1, $input2) = @_; |
192
|
0
|
|
|
|
|
0
|
$self->io->_io_cleanup(); |
193
|
0
|
|
|
|
|
0
|
my $executable = 'lagan.pl'; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
#my (undef, $tempfile) = $self->io->tempfile(); |
196
|
|
|
|
|
|
|
#$self->out($tempfile); |
197
|
0
|
|
|
|
|
0
|
my ($infile1, $infile2) = $self->_setinput($executable, $input1, $input2); |
198
|
0
|
|
|
|
|
0
|
my $lagan_report = &_generic_lagan( $self, |
199
|
|
|
|
|
|
|
$executable, |
200
|
|
|
|
|
|
|
$infile1, |
201
|
|
|
|
|
|
|
$infile2 ); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head2 mlagan |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
Runs the Mlagan multiple sequence alignment algorithm. |
207
|
|
|
|
|
|
|
Inputs should be an Array of Primary Seq objects and a Phylogenetic Tree in |
208
|
|
|
|
|
|
|
String format or as a Bio::Tree::TreeI compliant object. |
209
|
|
|
|
|
|
|
Returns an SimpleAlign object / preloaded with the tmp file of the Mlagan |
210
|
|
|
|
|
|
|
multifasta output. |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=cut |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub mlagan { |
215
|
0
|
|
|
0
|
1
|
0
|
my ($self, $input1, $tree) = @_; |
216
|
0
|
|
|
|
|
0
|
$self->io->_io_cleanup(); |
217
|
0
|
|
|
|
|
0
|
my $executable = 'mlagan'; |
218
|
|
|
|
|
|
|
|
219
|
0
|
0
|
0
|
|
|
0
|
if ($tree && ref($tree) && $tree->isa('Bio::Tree::TreeI')) { |
|
|
|
0
|
|
|
|
|
220
|
|
|
|
|
|
|
# fiddle tree so mlagan will like it |
221
|
0
|
|
|
|
|
0
|
my %orig_ids; |
222
|
0
|
|
|
|
|
0
|
foreach my $node ($tree->get_nodes) { |
223
|
0
|
|
|
|
|
0
|
my $seq_id = $node->name('supplied'); |
224
|
0
|
0
|
|
|
|
0
|
$seq_id = $seq_id ? shift @{$seq_id} : ($node->node_name ? $node->node_name : $node->id); |
|
0
|
0
|
|
|
|
0
|
|
225
|
0
|
|
|
|
|
0
|
$orig_ids{$seq_id} = $node->id; |
226
|
0
|
|
|
|
|
0
|
$node->id($seq_id); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# convert to string |
230
|
0
|
|
|
|
|
0
|
my $tree_obj = $tree; |
231
|
0
|
|
|
|
|
0
|
$tree = $tree->simplify_to_leaves_string; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# more fiddling |
234
|
0
|
|
|
|
|
0
|
$tree =~ s/ /_/g; |
235
|
0
|
|
|
|
|
0
|
$tree =~ s/"//g; |
236
|
0
|
|
|
|
|
0
|
$tree =~ s/,/ /g; |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# unfiddle the tree object |
239
|
0
|
|
|
|
|
0
|
foreach my $node ($tree_obj->get_nodes) { |
240
|
0
|
|
|
|
|
0
|
$node->id($orig_ids{$node->id}); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
0
|
|
|
|
|
0
|
my $infiles; |
245
|
0
|
|
|
|
|
0
|
($infiles, $tree) = $self->_setinput($executable, $input1, $tree); |
246
|
0
|
|
|
|
|
0
|
my $lagan_report = &_generic_lagan ( $self, |
247
|
|
|
|
|
|
|
$executable, |
248
|
|
|
|
|
|
|
$infiles, |
249
|
|
|
|
|
|
|
$tree ); |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 nuc_matrix |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Title : nuc_matrix |
255
|
|
|
|
|
|
|
Usage : my $matrix_obj = $obj->nuc_matrix(); |
256
|
|
|
|
|
|
|
-or- |
257
|
|
|
|
|
|
|
$obj->nuc_matrix($matrix_obj); |
258
|
|
|
|
|
|
|
-or- |
259
|
|
|
|
|
|
|
$obj->nuc_matrix($matrix_file); |
260
|
|
|
|
|
|
|
Function: Get/set the substitution matrix for use by mlagan. By default the |
261
|
|
|
|
|
|
|
file $LAGAN_DIR/nucmatrix.txt is used by mlagan. By default this |
262
|
|
|
|
|
|
|
method returns a corresponding Matrix. |
263
|
|
|
|
|
|
|
Returns : Bio::Matrix::Mlagan object |
264
|
|
|
|
|
|
|
Args : none to get, OR to set: |
265
|
|
|
|
|
|
|
Bio::Matrix::MLagan object |
266
|
|
|
|
|
|
|
OR |
267
|
|
|
|
|
|
|
filename of an mlagan substitution matrix file |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
NB: due to a bug in mlagan 2.0, the -nucmatrixfile option does not |
270
|
|
|
|
|
|
|
work, so this Bioperl wrapper is unable to simply point mlagan to |
271
|
|
|
|
|
|
|
your desired matrix file (or to a temp file generated from your |
272
|
|
|
|
|
|
|
matrix object). Instead the $LAGAN_DIR/nucmatrix.txt file must |
273
|
|
|
|
|
|
|
actually be replaced. This wrapper will make a back-up copy of that |
274
|
|
|
|
|
|
|
file, write the new file in its place, then revert things back to the |
275
|
|
|
|
|
|
|
way they were after the alignment has been produced. For this reason, |
276
|
|
|
|
|
|
|
$LAGAN_DIR must be writable, as must $LAGAN_DIR/nucmatrix.txt. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub nuc_matrix { |
281
|
0
|
|
|
0
|
1
|
0
|
my ($self, $thing, $gap_open, $gap_continue) = @_; |
282
|
|
|
|
|
|
|
|
283
|
0
|
0
|
|
|
|
0
|
if ($thing) { |
284
|
0
|
0
|
0
|
|
|
0
|
if (-e $thing) { |
|
|
0
|
|
|
|
|
|
285
|
0
|
|
|
|
|
0
|
my $min = Bio::Matrix::IO->new(-format => 'mlagan', |
286
|
|
|
|
|
|
|
-file => $thing); |
287
|
0
|
|
|
|
|
0
|
$self->{_nuc_matrix} = $min->next_matrix; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
elsif (ref($thing) && $thing->isa('Bio::Matrix::Mlagan')) { |
290
|
0
|
|
|
|
|
0
|
$self->{_nuc_matrix} = $thing; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
else { |
293
|
0
|
|
|
|
|
0
|
$self->throw("Unknown kind of thing supplied, '$thing'"); |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
0
|
$self->{_nuc_matrix_set} = 1; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
0
|
0
|
|
|
|
0
|
unless (defined $self->{_nuc_matrix}) { |
300
|
|
|
|
|
|
|
# read the program default file |
301
|
0
|
|
|
|
|
0
|
my $min = Bio::Matrix::IO->new(-format => 'mlagan', |
302
|
|
|
|
|
|
|
-file => File::Spec->catfile($PROGRAM_DIR, 'nucmatrix.txt')); |
303
|
0
|
|
|
|
|
0
|
$self->{_nuc_matrix} = $min->next_matrix; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
0
|
0
|
|
|
|
0
|
$self->{_nuc_matrix_set} = 1 if defined wantarray; |
307
|
0
|
|
|
|
|
0
|
return $self->{_nuc_matrix}; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 _setinput |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Title : _setinput |
313
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
314
|
|
|
|
|
|
|
Function: Create input file(s) for Lagan executables |
315
|
|
|
|
|
|
|
Returns : name of files containing Lagan data input / |
316
|
|
|
|
|
|
|
or array of files and phylo tree for Mlagan data input |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=cut |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub _setinput { |
321
|
0
|
|
|
0
|
|
0
|
my ($self, $executable, $input1, $input2) = @_; |
322
|
0
|
|
|
|
|
0
|
my ($fh, $infile1, $infile2, $temp1, $temp2, $seq1, $seq2); |
323
|
|
|
|
|
|
|
|
324
|
0
|
|
|
|
|
0
|
$self->io->_io_cleanup(); |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
SWITCH: { |
327
|
0
|
0
|
|
|
|
0
|
if (ref($input1) =~ /ARRAY/i) { |
|
0
|
0
|
|
|
|
0
|
|
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
##INPUTS TO MLAGAN / WILL hAVE TO BE CHANGED IF LAGAN EVER |
330
|
|
|
|
|
|
|
##SUPPORTS MULTI-INPUT |
331
|
0
|
|
|
|
|
0
|
my @infilearr; |
332
|
0
|
|
|
|
|
0
|
foreach $seq1 (@$input1) { |
333
|
0
|
|
|
|
|
0
|
($fh, $infile1) = $self->io->tempfile(); |
334
|
0
|
|
|
|
|
0
|
my $temp = Bio::SeqIO->new( -fh => $fh, |
335
|
|
|
|
|
|
|
-format => 'Fasta' ); |
336
|
0
|
0
|
|
|
|
0
|
unless ($seq1->isa("Bio::PrimarySeqI")) { |
337
|
0
|
|
|
|
|
0
|
return 0; |
338
|
|
|
|
|
|
|
} |
339
|
0
|
|
|
|
|
0
|
$temp->write_seq($seq1); |
340
|
0
|
|
|
|
|
0
|
close $fh; |
341
|
0
|
|
|
|
|
0
|
undef $fh; |
342
|
0
|
|
|
|
|
0
|
push @infilearr, $infile1; |
343
|
|
|
|
|
|
|
} |
344
|
0
|
|
|
|
|
0
|
$infile1 = \@infilearr; |
345
|
0
|
|
|
|
|
0
|
last SWITCH; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
elsif ($input1->isa("Bio::PrimarySeqI")) { |
348
|
|
|
|
|
|
|
##INPUTS TO LAGAN |
349
|
0
|
|
|
|
|
0
|
($fh, $infile1) = $self->io->tempfile(); |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
#Want to make sure their are no white spaces in sequence. |
352
|
|
|
|
|
|
|
#Happens if input1 is taken from an alignment. |
353
|
|
|
|
|
|
|
|
354
|
0
|
|
|
|
|
0
|
my $sequence = $input1->seq(); |
355
|
0
|
|
|
|
|
0
|
$sequence =~ s/\W+//g; |
356
|
0
|
|
|
|
|
0
|
$input1->seq($sequence); |
357
|
0
|
|
|
|
|
0
|
$temp1 = Bio::SeqIO->new( -fh => $fh, |
358
|
|
|
|
|
|
|
-format => 'Fasta' ); |
359
|
0
|
|
|
|
|
0
|
$temp1->write_seq($input1); |
360
|
0
|
|
|
|
|
0
|
close $fh; |
361
|
0
|
|
|
|
|
0
|
undef $fh; |
362
|
0
|
|
|
|
|
0
|
last SWITCH; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
SWITCH2: { |
366
|
0
|
0
|
|
|
|
0
|
if (ref($input2)) { |
|
0
|
|
|
|
|
0
|
|
367
|
0
|
0
|
|
|
|
0
|
if ($input2->isa("Bio::PrimarySeqI")) { |
368
|
0
|
|
|
|
|
0
|
($fh, $infile2) = $self->io->tempfile(); |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
#Want to make sure their are no white spaces in |
371
|
|
|
|
|
|
|
#sequence. Happens if input2 is taken from an |
372
|
|
|
|
|
|
|
#alignment. |
373
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
0
|
my $sequence = $input2->seq(); |
375
|
0
|
|
|
|
|
0
|
$sequence =~ s/\W+//g; |
376
|
0
|
|
|
|
|
0
|
$input2->seq($sequence); |
377
|
|
|
|
|
|
|
|
378
|
0
|
|
|
|
|
0
|
$temp2 = Bio::SeqIO->new( -fh => $fh, |
379
|
|
|
|
|
|
|
-format => 'Fasta' ); |
380
|
0
|
|
|
|
|
0
|
$temp2->write_seq($input2); |
381
|
0
|
|
|
|
|
0
|
close $fh; |
382
|
0
|
|
|
|
|
0
|
undef $fh; |
383
|
0
|
|
|
|
|
0
|
last SWITCH2; |
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
} else { |
386
|
0
|
|
|
|
|
0
|
$infile2 = $input2; |
387
|
|
|
|
|
|
|
##A tree as a scalar has been passed, pass it through |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
0
|
|
|
|
|
0
|
return ($infile1, $infile2); |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head2 _generic_lagan |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Title : _generic_lagan |
396
|
|
|
|
|
|
|
Usage : internal function not called directly |
397
|
|
|
|
|
|
|
Returns : SimpleAlign object |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=cut |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
sub _generic_lagan { |
403
|
0
|
|
|
0
|
|
0
|
my ($self, $executable, $input1, $input2) = @_; |
404
|
0
|
|
|
|
|
0
|
my $param_string = $self->_setparams($executable); |
405
|
0
|
|
|
|
|
0
|
my $lagan_report = &_runlagan($self, $executable, $param_string, |
406
|
|
|
|
|
|
|
$input1, $input2); |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=head2 _setparams |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
Title : _setparams |
412
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
413
|
|
|
|
|
|
|
Function: Create parameter inputs for (m)Lagan program |
414
|
|
|
|
|
|
|
Returns : parameter string to be passed to Lagan |
415
|
|
|
|
|
|
|
Args : Reference to calling object and name of (m)Lagan executable |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=cut |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
sub _setparams { |
421
|
0
|
|
|
0
|
|
0
|
my ($self, $executable) = @_; |
422
|
|
|
|
|
|
|
|
423
|
0
|
|
|
|
|
0
|
my (@execparams, $nucmatrixfile); |
424
|
0
|
0
|
|
|
|
0
|
if ($executable eq 'lagan.pl') { |
|
|
0
|
|
|
|
|
|
425
|
0
|
|
|
|
|
0
|
@execparams = @LAGAN_PARAMS; |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
elsif ($executable eq 'mlagan') { |
428
|
0
|
|
|
|
|
0
|
@execparams = @MLAGAN_PARAMS; |
429
|
|
|
|
|
|
|
|
430
|
0
|
0
|
|
|
|
0
|
if ($self->{_nuc_matrix_set}) { |
431
|
|
|
|
|
|
|
# we create this file on every call because we have no way of |
432
|
|
|
|
|
|
|
# knowing if user altered the matrix object |
433
|
0
|
|
|
|
|
0
|
(my $handle, $nucmatrixfile) = $self->io->tempfile(); |
434
|
0
|
|
|
|
|
0
|
my $mout = Bio::Matrix::IO->new(-format => 'mlagan', |
435
|
|
|
|
|
|
|
-fh => $handle); |
436
|
0
|
|
|
|
|
0
|
$mout->write_matrix($self->nuc_matrix); |
437
|
0
|
|
|
|
|
0
|
$self->{_nucmatrixfile} = $nucmatrixfile; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
##EXPAND OTHER LAGAN SUITE PROGRAMS HERE |
441
|
|
|
|
|
|
|
|
442
|
0
|
|
|
|
|
0
|
my $param_string = $self->SUPER::_setparams(-params => [@execparams], |
443
|
|
|
|
|
|
|
-dash => 1); |
444
|
0
|
0
|
|
|
|
0
|
$param_string .= " -nucmatrixfile $nucmatrixfile" if $nucmatrixfile; |
445
|
0
|
|
|
|
|
0
|
return $param_string . " -mfa "; |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=head2 _runlagan |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
Title : _runlagan |
452
|
|
|
|
|
|
|
Usage : Internal function, not to be called directly |
453
|
|
|
|
|
|
|
Function: makes actual system call to (m)Lagan program |
454
|
|
|
|
|
|
|
Example : |
455
|
|
|
|
|
|
|
Returns : Report object in the SimpleAlign object |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=cut |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub _runlagan { |
460
|
0
|
|
|
0
|
|
0
|
my ($self, $executable, $param_string, $input1, $input2) = @_; |
461
|
0
|
|
|
|
|
0
|
my ($lagan_obj, $exe); |
462
|
0
|
0
|
|
|
|
0
|
if ( ! ($exe = $self->executable($executable))) { |
463
|
0
|
|
|
|
|
0
|
return; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
0
|
my $version = $self->version; |
467
|
|
|
|
|
|
|
|
468
|
0
|
|
|
|
|
0
|
my $command_string; |
469
|
0
|
0
|
|
|
|
0
|
if ($executable eq 'lagan.pl') { |
470
|
0
|
|
|
|
|
0
|
$command_string = $exe . " " . $input1 . " " . $input2 . $param_string; |
471
|
|
|
|
|
|
|
} |
472
|
0
|
0
|
|
|
|
0
|
if ($executable eq 'mlagan') { |
473
|
0
|
|
|
|
|
0
|
$command_string = $exe; |
474
|
0
|
|
|
|
|
0
|
foreach my $tempfile (@$input1) { |
475
|
0
|
|
|
|
|
0
|
$command_string .= " " . $tempfile; |
476
|
|
|
|
|
|
|
} |
477
|
0
|
0
|
|
|
|
0
|
if (defined $input2) { |
478
|
0
|
|
|
|
|
0
|
$command_string .= " -tree " . "\"" . $input2 . "\""; |
479
|
|
|
|
|
|
|
} |
480
|
0
|
|
|
|
|
0
|
$command_string .= " " . $param_string; |
481
|
|
|
|
|
|
|
|
482
|
0
|
|
|
|
|
0
|
my $matrix_file = $self->{_nucmatrixfile}; |
483
|
0
|
0
|
0
|
|
|
0
|
if ($version <= 3 && $matrix_file) { |
484
|
|
|
|
|
|
|
# mlagan 2.0 bug-workaround |
485
|
0
|
|
|
|
|
0
|
my $orig = File::Spec->catfile($PROGRAM_DIR, 'nucmatrix.txt'); |
486
|
0
|
0
|
|
|
|
0
|
-e $orig || $self->throw("Strange, $orig doesn't seem to exist"); |
487
|
0
|
0
|
|
|
|
0
|
system("cp $orig $orig.bk") && $self->throw("Backup of $orig failed: $!"); |
488
|
0
|
0
|
|
|
|
0
|
system("cp $matrix_file $orig") && $self->throw("Copy of $matrix_file -> $orig failed: $!"); |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
0
|
0
|
0
|
|
|
0
|
if (($self->silent || $self->quiet) && |
|
|
|
0
|
|
|
|
|
493
|
|
|
|
|
|
|
($^O !~ /os2|dos|MSWin32|amigaos/)) { |
494
|
0
|
0
|
|
|
|
0
|
my $null = ($^O =~ m/mswin/i) ? 'NUL' : '/dev/null'; |
495
|
0
|
|
|
|
|
0
|
$command_string .= " > $null 2> $null"; |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
# will do brute-force clean up of junk files generated by lagan |
500
|
0
|
|
|
|
|
0
|
my $cwd = cwd(); |
501
|
0
|
0
|
|
|
|
0
|
opendir(my $cwd_dir, $cwd) || $self->throw("Could not open the current directory '$cwd'!"); |
502
|
0
|
|
|
|
|
0
|
my %ok_files; |
503
|
0
|
|
|
|
|
0
|
foreach my $thing (readdir($cwd_dir)) { |
504
|
0
|
0
|
|
|
|
0
|
if ($thing =~ /anch/) { |
505
|
0
|
|
|
|
|
0
|
$ok_files{$thing} = 1; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
} |
508
|
0
|
|
|
|
|
0
|
closedir($cwd_dir); |
509
|
|
|
|
|
|
|
|
510
|
0
|
|
|
|
|
0
|
$self->debug("$command_string\n"); |
511
|
0
|
0
|
|
|
|
0
|
my $status = system(($version <= 3 ? '_POSIX2_VERSION=1 ' : '').$command_string); # temporary hack whilst lagan script 'rechaos.pl' uses obsolete sort syntax |
512
|
|
|
|
|
|
|
|
513
|
0
|
0
|
0
|
|
|
0
|
if ($version <= 1 && $self->{_nucmatrixfile}) { |
514
|
0
|
|
|
|
|
0
|
my $orig = File::Spec->catfile($PROGRAM_DIR, 'nucmatrix.txt'); |
515
|
0
|
0
|
|
|
|
0
|
system("mv $orig.bk $orig") && $self->warn("Restore of $orig from $orig.bk failed: $!"); |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
|
518
|
0
|
0
|
|
|
|
0
|
opendir($cwd_dir, $cwd) || $self->throw("Could not open the current directory '$cwd'!"); |
519
|
0
|
|
|
|
|
0
|
foreach my $thing (readdir($cwd_dir)) { |
520
|
0
|
0
|
|
|
|
0
|
if ($thing =~ /anch/) { |
521
|
0
|
0
|
|
|
|
0
|
unlink($thing) unless $ok_files{$thing}; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
} |
524
|
0
|
|
|
|
|
0
|
closedir($cwd_dir); |
525
|
|
|
|
|
|
|
|
526
|
0
|
|
|
|
|
0
|
my $outfile = $self->out(); |
527
|
0
|
|
|
|
|
0
|
my $align = Bio::AlignIO->new( '-file' => $outfile, |
528
|
|
|
|
|
|
|
'-format' => 'fasta' ); |
529
|
0
|
|
|
|
|
0
|
my $aln = $align->next_aln(); |
530
|
|
|
|
|
|
|
|
531
|
0
|
|
|
|
|
0
|
return $aln; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head2 executable |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Title : executable |
537
|
|
|
|
|
|
|
Usage : my $exe = $lagan->executable('mlagan'); |
538
|
|
|
|
|
|
|
Function: Finds the full path to the 'lagan' executable |
539
|
|
|
|
|
|
|
Returns : string representing the full path to the exe |
540
|
|
|
|
|
|
|
Args : [optional] name of executable to set path to |
541
|
|
|
|
|
|
|
[optional] boolean flag whether or not warn when exe is not found |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
Thanks to Jason Stajich for providing the framework for this subroutine |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=cut |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
sub executable { |
548
|
1
|
|
|
1
|
1
|
754
|
my ($self, $exename, $exe, $warn) = @_; |
549
|
1
|
50
|
|
|
|
4
|
$exename = 'lagan.pl' unless defined $exename; |
550
|
|
|
|
|
|
|
|
551
|
1
|
50
|
33
|
|
|
3
|
if ( defined $exe && -x $exe ) { |
552
|
0
|
|
|
|
|
0
|
$self->{'_pathtoexe'}->{$exename} = $exe; |
553
|
|
|
|
|
|
|
} |
554
|
1
|
50
|
|
|
|
4
|
unless ( defined $self->{'_pathtoexe'}->{$exename} ) { |
555
|
1
|
|
|
|
|
4
|
my $f = $self->program_path($exename); |
556
|
1
|
50
|
33
|
|
|
27
|
$exe = $self->{'_pathtoexe'}->{$exename} = $f if(-e $f && -x $f ); |
557
|
|
|
|
|
|
|
|
558
|
1
|
50
|
|
|
|
4
|
unless( $exe ) { |
559
|
1
|
50
|
33
|
|
|
8
|
if ( ($exe = $self->io->exists_exe($exename)) && -x $exe ) { |
560
|
0
|
|
|
|
|
0
|
$self->{'_pathtoexe'}->{$exename} = $exe; |
561
|
|
|
|
|
|
|
} else { |
562
|
1
|
50
|
|
|
|
213
|
$self->warn("Cannot find executable for $exename") if $warn; |
563
|
1
|
|
|
|
|
4
|
$self->{'_pathtoexe'}->{$exename} = undef; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
# even if its executable, we still need the environment variable to have |
569
|
|
|
|
|
|
|
# been set |
570
|
1
|
50
|
|
|
|
3
|
if (! $PROGRAM_DIR) { |
571
|
1
|
|
|
|
|
6
|
$self->warn("Environment variable LAGAN_DIR must be set, even if the lagan executables are in your path"); |
572
|
1
|
|
|
|
|
14
|
$self->{'_pathtoexe'}->{$exename} = undef; |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
1
|
|
|
|
|
3
|
return $self->{'_pathtoexe'}->{$exename}; |
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=head2 program_path |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
Title : program_path |
581
|
|
|
|
|
|
|
Usage : my $path = $lagan->program_path(); |
582
|
|
|
|
|
|
|
Function: Builds path for executable |
583
|
|
|
|
|
|
|
Returns : string representing the full path to the exe |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
Thanks to Jason Stajich for providing the framework for this subroutine |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=cut |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub program_path { |
590
|
1
|
|
|
1
|
1
|
1
|
my ($self,$program_name) = @_; |
591
|
1
|
|
|
|
|
1
|
my @path; |
592
|
1
|
50
|
|
|
|
4
|
push @path, $self->program_dir if $self->program_dir; |
593
|
1
|
50
|
|
|
|
7
|
push @path, $program_name .($^O =~ /mswin/i ?'':''); |
594
|
|
|
|
|
|
|
# Option for Windows variants / None so far |
595
|
|
|
|
|
|
|
|
596
|
1
|
|
|
|
|
5
|
return Bio::Root::IO->catfile(@path); |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=head2 program_dir |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
Title : program_dir |
602
|
|
|
|
|
|
|
Usage : my $dir = $lagan->program_dir(); |
603
|
|
|
|
|
|
|
Function: Abstract get method for dir of program. To be implemented |
604
|
|
|
|
|
|
|
by wrapper. |
605
|
|
|
|
|
|
|
Returns : string representing program directory |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
Thanks to Jason Stajich for providing the framework for this subroutine |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
=cut |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
sub program_dir { |
612
|
1
|
|
|
1
|
1
|
2
|
$PROGRAM_DIR; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=head2 version |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
Title : version |
618
|
|
|
|
|
|
|
Usage : my $version = $lagan->version; |
619
|
|
|
|
|
|
|
Function: returns the program version |
620
|
|
|
|
|
|
|
Returns : number |
621
|
|
|
|
|
|
|
Args : none |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=cut |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub version { |
626
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
627
|
0
|
|
0
|
|
|
|
my $exe = $self->executable('mlagan') || return; |
628
|
|
|
|
|
|
|
|
629
|
0
|
0
|
|
|
|
|
open(my $VER, "$exe -version 2>&1 |") || die "Could not open command '$exe -version'\n"; |
630
|
0
|
|
|
|
|
|
my $version; |
631
|
0
|
|
|
|
|
|
while (my $line = <$VER>) { |
632
|
0
|
|
|
|
|
|
($version) = $line =~ /(\d+\S+)/; |
633
|
|
|
|
|
|
|
} |
634
|
0
|
0
|
|
|
|
|
close($VER) || die "Could not complete command '$exe -version'\n"; |
635
|
|
|
|
|
|
|
|
636
|
0
|
|
|
|
|
|
return $version; |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
1; |