line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Tools::Run::Genemark |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Bioperl |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Bioperl, Mark Johnson |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# Special thanks to Chris Fields, Sendu Bala |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 NAME |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Bio::Tools::Run::Genemark - Wrapper for local execution of the GeneMark |
19
|
|
|
|
|
|
|
family of programs. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# GeneMark.hmm (prokaryotic) |
24
|
|
|
|
|
|
|
my $factory = |
25
|
|
|
|
|
|
|
Bio::Tools::Run::Genemark->new('-program' => 'gmhmmp', |
26
|
|
|
|
|
|
|
'-m' => 'model.icm'); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Pass the factory Bio::Seq objects |
29
|
|
|
|
|
|
|
# returns a Bio::Tools::Genemark object |
30
|
|
|
|
|
|
|
my $genemark = $factory->run($seq); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 DESCRIPTION |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Wrapper module for the GeneMark family of programs. Should work with |
35
|
|
|
|
|
|
|
all flavors of GeneMark.hmm at least, although only the prokaryotic |
36
|
|
|
|
|
|
|
version has been tested. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
General information about GeneMark is available at |
39
|
|
|
|
|
|
|
L. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Contact information for licensing inquiries is available at: |
42
|
|
|
|
|
|
|
L |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Note that GeneMark.hmm (prokaryotic at least) will only process the |
45
|
|
|
|
|
|
|
first sequence in a fasta file (if you run() more than one sequence |
46
|
|
|
|
|
|
|
at a time, only the first will be processed). |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 FEEDBACK |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Mailing Lists |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
53
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
54
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
57
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 Support |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
I |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
66
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
67
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
68
|
|
|
|
|
|
|
with code and data examples if at all possible. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head2 Reporting Bugs |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
73
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
74
|
|
|
|
|
|
|
web: |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
http://redmine.open-bio.org/projects/bioperl/ |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 AUTHOR - Mark Johnson |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
Email: johnsonm-at-gmail-dot-com |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 APPENDIX |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
85
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=cut |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
package Bio::Tools::Run::Genemark; |
90
|
|
|
|
|
|
|
|
91
|
1
|
|
|
1
|
|
127348
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
92
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
24
|
|
93
|
|
|
|
|
|
|
|
94
|
1
|
|
|
1
|
|
504
|
use Bio::SeqIO; |
|
1
|
|
|
|
|
39055
|
|
|
1
|
|
|
|
|
10
|
|
95
|
1
|
|
|
1
|
|
28
|
use Bio::Root::Root; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
3
|
|
96
|
1
|
|
|
1
|
|
403
|
use Bio::Tools::Run::WrapperBase; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
18
|
|
97
|
1
|
|
|
1
|
|
465
|
use Bio::Tools::Genemark; |
|
1
|
|
|
|
|
49723
|
|
|
1
|
|
|
|
|
11
|
|
98
|
1
|
|
|
1
|
|
450
|
use English; |
|
1
|
|
|
|
|
1498
|
|
|
1
|
|
|
|
|
4
|
|
99
|
1
|
|
|
1
|
|
364
|
use IPC::Run; # Should be okay on WIN32 (See IPC::Run Docs) |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
37
|
|
100
|
|
|
|
|
|
|
|
101
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::Root::Root Bio::Tools::Run::WrapperBase); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
744
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
our @params = (qw(program)); |
104
|
|
|
|
|
|
|
our @genemark_params = (qw(i m p)); |
105
|
|
|
|
|
|
|
our @genemark_switches = (qw(a n r)); |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head2 program_name |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Title : program_name |
110
|
|
|
|
|
|
|
Usage : $factory>program_name() |
111
|
|
|
|
|
|
|
Function: gets/sets the program name |
112
|
|
|
|
|
|
|
Returns: string |
113
|
|
|
|
|
|
|
Args : string |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub program_name { |
118
|
|
|
|
|
|
|
|
119
|
0
|
|
|
0
|
1
|
|
my ($self, $val) = @_; |
120
|
|
|
|
|
|
|
|
121
|
0
|
0
|
|
|
|
|
$self->program($val) if $val; |
122
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
return $self->program(); |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head2 program_dir |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
Title : program_dir |
130
|
|
|
|
|
|
|
Usage : $factory->program_dir() |
131
|
|
|
|
|
|
|
Function: gets/sets the program dir |
132
|
|
|
|
|
|
|
Returns: string |
133
|
|
|
|
|
|
|
Args : string |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=cut |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub program_dir { |
138
|
|
|
|
|
|
|
|
139
|
0
|
|
|
0
|
1
|
|
my ($self, $val) = @_; |
140
|
|
|
|
|
|
|
|
141
|
0
|
0
|
|
|
|
|
$self->{'_program_dir'} = $val if $val; |
142
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
return $self->{'_program_dir'}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 new |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Title : new |
150
|
|
|
|
|
|
|
Usage : $genemark->new(@params) |
151
|
|
|
|
|
|
|
Function: creates a new Genemark factory |
152
|
|
|
|
|
|
|
Returns: Bio::Tools::Run::Genemark |
153
|
|
|
|
|
|
|
Args : |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=cut |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub new { |
158
|
|
|
|
|
|
|
|
159
|
0
|
|
|
0
|
1
|
|
my ($class,@args) = @_; |
160
|
0
|
|
|
|
|
|
my $self = $class->SUPER::new(@args); |
161
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
|
$self->io->_initialize_io(); |
163
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
$self->_set_from_args( |
165
|
|
|
|
|
|
|
\@args, |
166
|
|
|
|
|
|
|
-methods => [ |
167
|
|
|
|
|
|
|
@params, |
168
|
|
|
|
|
|
|
@genemark_params, |
169
|
|
|
|
|
|
|
@genemark_switches, |
170
|
|
|
|
|
|
|
], |
171
|
|
|
|
|
|
|
-create => 1, |
172
|
|
|
|
|
|
|
); |
173
|
|
|
|
|
|
|
|
174
|
0
|
0
|
|
|
|
|
unless (defined($self->program())) { |
175
|
0
|
|
|
|
|
|
$self->throw('Must specify program'); |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
0
|
0
|
|
|
|
|
unless (defined($self->m())) { |
179
|
0
|
|
|
|
|
|
$self->throw('Must specify model'); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
0
|
|
|
|
|
|
return $self; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head2 run |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Title : run |
189
|
|
|
|
|
|
|
Usage : $obj->run($seq_file) |
190
|
|
|
|
|
|
|
Function: Runs Genemark |
191
|
|
|
|
|
|
|
Returns : A Bio::Tools::Genemark object |
192
|
|
|
|
|
|
|
Args : An array of Bio::PrimarySeqI objects |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=cut |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub run { |
197
|
|
|
|
|
|
|
|
198
|
0
|
|
|
0
|
1
|
|
my ($self, @seq) = @_; |
199
|
|
|
|
|
|
|
|
200
|
0
|
0
|
|
|
|
|
unless (@seq) { |
201
|
0
|
|
|
|
|
|
$self->throw("Must supply at least one Bio::PrimarySeqI"); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
0
|
|
|
|
|
|
foreach my $seq (@seq) { |
205
|
|
|
|
|
|
|
|
206
|
0
|
0
|
|
|
|
|
unless ($seq->isa('Bio::PrimarySeqI')) { |
207
|
0
|
|
|
|
|
|
$self->throw("Object does not implement Bio::PrimarySeqI"); |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
my $program_name = $self->program_name(); |
213
|
0
|
|
|
|
|
|
my $file_name = $self->_write_seq_file(@seq); |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# GeneMark.hmm (prokaryotic version, anyway) ignores sequences after the |
216
|
|
|
|
|
|
|
# first in a fasta file |
217
|
0
|
0
|
|
|
|
|
if ($program_name eq 'gmhmmp') { |
218
|
0
|
0
|
|
|
|
|
if (@seq > 1) { |
219
|
0
|
|
|
|
|
|
$self->warn("Program $program_name processes one sequence at a time"); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
return $self->_run($file_name, $seq[0]->display_id()); |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=head2 _run |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
Title : _run |
230
|
|
|
|
|
|
|
Usage : $obj->_run() |
231
|
|
|
|
|
|
|
Function: Internal(not to be used directly) |
232
|
|
|
|
|
|
|
Returns : An instance of Bio::Tools::Genemark |
233
|
|
|
|
|
|
|
Args : file name, sequence identifier (optional) |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=cut |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub _run { |
238
|
|
|
|
|
|
|
|
239
|
0
|
|
|
0
|
|
|
my ($self, $seq_file_name, $seq_id) = @_; |
240
|
|
|
|
|
|
|
|
241
|
0
|
|
|
|
|
|
my ($temp_fh, $temp_file_name) = |
242
|
|
|
|
|
|
|
$self->io->tempfile(-dir=>$self->tempdir()); |
243
|
0
|
|
|
|
|
|
close($temp_fh); |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
# IPC::Run wants an array where the first element is the executable |
246
|
0
|
|
|
|
|
|
my @cmd = ( |
247
|
|
|
|
|
|
|
$self->executable(), |
248
|
|
|
|
|
|
|
split(/\s+/, $self->_setparams()), |
249
|
|
|
|
|
|
|
'-o', |
250
|
|
|
|
|
|
|
$temp_file_name, |
251
|
|
|
|
|
|
|
$seq_file_name, |
252
|
|
|
|
|
|
|
); |
253
|
|
|
|
|
|
|
|
254
|
0
|
|
|
|
|
|
my $cmd = join(' ', @cmd); |
255
|
0
|
|
|
|
|
|
$self->debug("GeneMark Command = $cmd"); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# Run the program via IPC::Run so: |
258
|
|
|
|
|
|
|
# 1) The console doesn't get cluttered up with the program's STDERR/STDOUT |
259
|
|
|
|
|
|
|
# 2) We don't have to embed STDERR/STDOUT redirection in $cmd |
260
|
|
|
|
|
|
|
# 3) We don't have to deal with signal handling (IPC::Run should take care |
261
|
|
|
|
|
|
|
# of everything automagically. |
262
|
0
|
|
|
|
|
|
my ($program_stdout, $program_stderr); |
263
|
|
|
|
|
|
|
|
264
|
0
|
|
|
|
|
|
eval { |
265
|
0
|
0
|
|
|
|
|
IPC::Run::run( |
266
|
|
|
|
|
|
|
\@cmd, |
267
|
|
|
|
|
|
|
\undef, |
268
|
|
|
|
|
|
|
\$program_stdout, |
269
|
|
|
|
|
|
|
\$program_stderr, |
270
|
|
|
|
|
|
|
) || die $CHILD_ERROR; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
}; |
273
|
|
|
|
|
|
|
|
274
|
0
|
0
|
|
|
|
|
if ($EVAL_ERROR) { |
275
|
0
|
|
|
|
|
|
$self->throw("GeneMark call crashed: $EVAL_ERROR"); |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
## The prokaryotic version of GeneMark.HMM, at least, returns |
279
|
|
|
|
|
|
|
## 0 (success) even when the license has expired. |
280
|
0
|
0
|
0
|
|
|
|
if ((-z $temp_file_name) && ($program_stderr =~ /license period has ended/i)) { |
|
|
0
|
|
|
|
|
|
281
|
0
|
|
|
|
|
|
$self->throw($program_stderr); |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
elsif ($program_stderr =~ /\d+ days remaining/i) { |
284
|
0
|
|
|
|
|
|
$self->warn($program_stderr); |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
0
|
0
|
|
|
|
|
$self->debug(join("\n", 'GeneMark STDOUT:', $program_stdout)) if $program_stdout; |
288
|
0
|
0
|
|
|
|
|
$self->debug(join("\n", 'GeneMark STDERR:', $program_stderr)) if $program_stderr; |
289
|
|
|
|
|
|
|
|
290
|
0
|
|
|
|
|
|
return Bio::Tools::Genemark->new(-file => $temp_file_name, |
291
|
|
|
|
|
|
|
-seqname => $seq_id); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
sub _setparams { |
296
|
|
|
|
|
|
|
|
297
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
298
|
|
|
|
|
|
|
|
299
|
0
|
|
|
|
|
|
my $param_string = $self->SUPER::_setparams( |
300
|
|
|
|
|
|
|
-params => [@genemark_params], |
301
|
|
|
|
|
|
|
-switches => [@genemark_switches], |
302
|
|
|
|
|
|
|
-dash => 1, |
303
|
|
|
|
|
|
|
); |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# Kill leading and trailing whitespace |
306
|
0
|
|
|
|
|
|
$param_string =~ s/^\s+//g; |
307
|
0
|
|
|
|
|
|
$param_string =~ s/\s+$//g; |
308
|
|
|
|
|
|
|
|
309
|
0
|
|
|
|
|
|
return $param_string; |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=head2 _write_seq_file |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
Title : _write_seq_file |
316
|
|
|
|
|
|
|
Usage : obj->_write_seq_file($seq) or obj->_write_seq_file(@seq) |
317
|
|
|
|
|
|
|
Function: Internal(not to be used directly) |
318
|
|
|
|
|
|
|
Returns : Name of a temp file containing program output |
319
|
|
|
|
|
|
|
Args : One or more Bio::PrimarySeqI objects |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=cut |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub _write_seq_file { |
324
|
|
|
|
|
|
|
|
325
|
0
|
|
|
0
|
|
|
my ($self, @seq) = @_; |
326
|
|
|
|
|
|
|
|
327
|
0
|
|
|
|
|
|
my ($fh, $file_name) = $self->io->tempfile(-dir=>$self->tempdir()); |
328
|
0
|
|
|
|
|
|
my $out = Bio::SeqIO->new(-fh => $fh , '-format' => 'Fasta'); |
329
|
|
|
|
|
|
|
|
330
|
0
|
|
|
|
|
|
foreach my $seq (@seq){ |
331
|
0
|
|
|
|
|
|
$out->write_seq($seq); |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
0
|
|
|
|
|
|
close($fh); |
335
|
0
|
|
|
|
|
|
$out->close(); |
336
|
|
|
|
|
|
|
|
337
|
0
|
|
|
|
|
|
return $file_name; |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
1; |