line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Bio::Prospect::LocalClient -- execute Prospect locally |
4
|
|
|
|
|
|
|
$Id: LocalClient.pm,v 1.32 2003/11/18 19:45:45 rkh Exp $ |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 SYNOPSIS |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
my $in = new Bio::SeqIO( -format=> 'Fasta', '-file' => $ARGV[0] ); |
9
|
|
|
|
|
|
|
my $po = new Bio::Prospect::Options( seq=>1, svm=>1, global_local=>1, |
10
|
|
|
|
|
|
|
templates=>['1alu', '1bgc','1eera']); |
11
|
|
|
|
|
|
|
my $pf = new Bio::Prospect::LocalClient( {options=>$po ); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
while ( my $s = $in->next_seq() ) { |
14
|
|
|
|
|
|
|
my @threads = $pf->thread( $s ); |
15
|
|
|
|
|
|
|
} |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
B<Bio::Prospect::LocalClient> is runs Prospect locally. It is intended to be |
20
|
|
|
|
|
|
|
used to facilitate high-throughput protein sequence threading and as the |
21
|
|
|
|
|
|
|
server-side component of B<Bio::Prospect::SoapClient>, with which it is API |
22
|
|
|
|
|
|
|
compatible. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 ROUTINES & METHODS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=cut |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
package Bio::Prospect::LocalClient; |
30
|
|
|
|
|
|
|
|
31
|
1
|
|
|
1
|
|
835
|
use base Bio::Prospect::Client; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
753
|
|
32
|
|
|
|
|
|
|
|
33
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
34
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
35
|
1
|
|
|
1
|
|
5
|
use File::Temp qw( tempfile tempdir ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
66
|
|
36
|
1
|
|
|
1
|
|
5
|
use Carp qw(cluck); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
51
|
|
37
|
1
|
|
|
1
|
|
952
|
use IO::File; |
|
1
|
|
|
|
|
993
|
|
|
1
|
|
|
|
|
149
|
|
38
|
1
|
|
|
1
|
|
848
|
use Bio::Prospect::Exceptions; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
33
|
|
39
|
1
|
|
|
1
|
|
986
|
use Bio::Prospect::utilities; |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
41
|
|
40
|
1
|
|
|
1
|
|
930
|
use Bio::Prospect::ThreadSummary; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
34
|
|
41
|
1
|
|
|
1
|
|
802
|
use Bio::Prospect::Init; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
25
|
|
42
|
1
|
|
|
1
|
|
6
|
use Digest::MD5; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
43
|
1
|
|
|
1
|
|
5
|
use vars qw( $VERSION ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
2932
|
|
44
|
|
|
|
|
|
|
$VERSION = sprintf( "%d.%02d", q$Revision: 1.32 $ =~ /(\d+)\.(\d+)/ ); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
49
|
|
|
|
|
|
|
# new() |
50
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 new() |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Name: new() |
55
|
|
|
|
|
|
|
Purpose: constructor |
56
|
|
|
|
|
|
|
Arguments: hash reference with following key/value pairs |
57
|
|
|
|
|
|
|
options => Bio::Prospect::Options object (required) |
58
|
|
|
|
|
|
|
Returns: Bio::Prospect::LocalClient |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=cut |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub new(;%) { |
64
|
1
|
|
|
1
|
1
|
343
|
my $self = shift->SUPER::new(@_); |
65
|
1
|
|
|
|
|
4
|
$self->_setenv(); |
66
|
0
|
|
|
|
|
0
|
$self->_prepare_options(); |
67
|
0
|
|
|
|
|
0
|
$self->{'xmlCacheName'} = 'xmlCache'; # name of xml file cache |
68
|
0
|
|
|
|
|
0
|
$self->{'sortCacheName'} = 'sortCache'; # name of sort file cache |
69
|
0
|
|
|
|
|
0
|
return $self; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
74
|
|
|
|
|
|
|
# thread() |
75
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 thread() |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Name: thread() |
80
|
|
|
|
|
|
|
Purpose: return a list of Thread objects |
81
|
|
|
|
|
|
|
Arguments: scalar sequence or Bio::PrimarySeqI-derived object |
82
|
|
|
|
|
|
|
Returns: list of Bio::Prospect::Thread objects |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub thread($$) { |
87
|
0
|
|
|
0
|
1
|
0
|
my ($self,$s) = @_; |
88
|
|
|
|
|
|
|
|
89
|
0
|
0
|
0
|
|
|
0
|
if ( not defined $s or (ref $s and not $s->isa('Bio::PrimarySeqI')) ) { |
|
|
|
0
|
|
|
|
|
90
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::BadUsage( |
91
|
|
|
|
|
|
|
"Bio::Prospect::LocalClient::thread() requires one Bio::PrimarySeqI subclass or " . |
92
|
|
|
|
|
|
|
"scalar sequence argument" ); |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
0
|
0
|
|
|
|
0
|
my $seq = ref $s ? $s->seq() : $s; |
96
|
0
|
|
|
|
|
0
|
my $xfn = $self->_thread_to_file( $seq ); |
97
|
0
|
|
|
|
|
0
|
my $pf = new Bio::Prospect::File; |
98
|
0
|
0
|
|
|
|
0
|
$pf->open( "<$xfn" ) || throw Bio::Prospect::RuntimeError("$xfn: $!\n"); |
99
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
0
|
$self->{'threads'} = []; |
101
|
0
|
|
|
|
|
0
|
while( my $t = $pf->next_thread() ) { |
102
|
0
|
|
|
|
|
0
|
push @{$self->{'threads'}}, $t; |
|
0
|
|
|
|
|
0
|
|
103
|
|
|
|
|
|
|
} |
104
|
0
|
|
|
|
|
0
|
return( @{$self->{'threads'}} ); |
|
0
|
|
|
|
|
0
|
|
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
109
|
|
|
|
|
|
|
# thread_summary() |
110
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 thread_summary() |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Name: thread_summary() |
115
|
|
|
|
|
|
|
Purpose: return a list of ThreadSummary objects |
116
|
|
|
|
|
|
|
Arguments: Bio::Seq object |
117
|
|
|
|
|
|
|
Returns: list of rospect2::ThreadSummary objects |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub thread_summary($$) { |
122
|
0
|
|
|
0
|
1
|
0
|
my ($self,$s) = @_; |
123
|
0
|
|
|
|
|
0
|
my @summary; |
124
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
0
|
foreach my $t ( $self->thread($s) ) { |
126
|
0
|
|
|
|
|
0
|
push @summary, new Bio::Prospect::ThreadSummary( $t ); |
127
|
|
|
|
|
|
|
} |
128
|
0
|
|
|
|
|
0
|
return( @summary ); |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
133
|
|
|
|
|
|
|
# xml() |
134
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 xml() |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Name: xml() |
139
|
|
|
|
|
|
|
Purpose: return xml string |
140
|
|
|
|
|
|
|
Arguments: Bio::Seq object |
141
|
|
|
|
|
|
|
Returns: string |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub xml($$) { |
146
|
0
|
|
|
0
|
1
|
0
|
my ($self,$s) = @_; |
147
|
0
|
|
|
|
|
0
|
my $xfn = $self->_thread_to_file( $s ); |
148
|
0
|
0
|
|
|
|
0
|
my $in = new IO::File "<$xfn" or throw |
149
|
|
|
|
|
|
|
Bio::Prospect::RuntimeError( "can't open $xfn for reading"); |
150
|
0
|
|
|
|
|
0
|
my $xml=''; |
151
|
0
|
|
|
|
|
0
|
while(<$in>){ $xml .= $_; } |
|
0
|
|
|
|
|
0
|
|
152
|
0
|
|
|
|
|
0
|
return( $xml ); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
157
|
|
|
|
|
|
|
# DEPRECATED METHODS - will be removed in subsequent releases. |
158
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub score_summary($$) { |
161
|
0
|
|
|
0
|
0
|
0
|
cluck("This function is deprecated on Oct-23-2003:\n"); |
162
|
0
|
|
|
|
|
0
|
my ($self,$s) = @_; |
163
|
0
|
|
|
|
|
0
|
my $xfn = $self->thread_to_file( $s ); |
164
|
0
|
|
|
|
|
0
|
return Bio::Prospect::utilities::score_summary( $xfn ); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub thread_to_file($$) { |
168
|
0
|
|
|
0
|
0
|
0
|
cluck("This function is deprecated on Oct-23-2003:\n"); |
169
|
0
|
|
|
|
|
0
|
return _thread_to_file($_[0],$_[1]); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
174
|
|
|
|
|
|
|
# INTERNAL METHODS: not intended for use outside this module |
175
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=pod |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 INTERNAL METHODS & ROUTINES |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The following functions are documented for developers' benefit. THESE |
182
|
|
|
|
|
|
|
SHOULD NOT BE CALLED OUTSIDE OF THIS MODULE. YOU'VE BEEN WARNED. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
188
|
|
|
|
|
|
|
# _get_svm_scores() |
189
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head2 _get_svm_scores() |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Name: _get_svm_scores() |
194
|
|
|
|
|
|
|
Purpose: return a hash of svm scores from a prospect sort file |
195
|
|
|
|
|
|
|
Arguments: sort filename |
196
|
|
|
|
|
|
|
Returns: hash |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=cut |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub _get_svm_scores($$) { |
201
|
0
|
|
|
0
|
|
0
|
my ($self,$fn) = @_; |
202
|
0
|
|
|
|
|
0
|
my %retval; |
203
|
|
|
|
|
|
|
|
204
|
0
|
|
0
|
|
|
0
|
my $in = new IO::File $fn || throw Bio::Prospect::RuntimeError( "can't open $fn for reading" ); |
205
|
0
|
|
|
|
|
0
|
my @fld; |
206
|
0
|
|
|
|
|
0
|
while(<$in>) { |
207
|
0
|
0
|
|
|
|
0
|
next if m/^:Protein/; |
208
|
0
|
|
|
|
|
0
|
@fld = split /\s+/; |
209
|
0
|
|
|
|
|
0
|
$retval{$fld[0]} = $fld[3]; |
210
|
|
|
|
|
|
|
} |
211
|
0
|
0
|
|
|
|
0
|
if ( scalar (keys %retval) == 0 ) { |
212
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
213
|
|
|
|
|
|
|
( 'Sort file is empty', |
214
|
|
|
|
|
|
|
"The sort file for this sequence is empty. sortProspect likely failed!", |
215
|
|
|
|
|
|
|
"Execute sortProspect on the command-line and check output messages. sortProspect " . |
216
|
|
|
|
|
|
|
"can fail because of erroneous characters in the output xml file (e.g. null character)." |
217
|
|
|
|
|
|
|
); |
218
|
|
|
|
|
|
|
} |
219
|
0
|
|
|
|
|
0
|
close($in); |
220
|
0
|
|
|
|
|
0
|
return %retval; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub _thread_to_file($$) |
225
|
|
|
|
|
|
|
{ |
226
|
0
|
|
|
0
|
|
0
|
my ($self,$s) = @_; |
227
|
0
|
|
|
|
|
0
|
my $xfn; |
228
|
0
|
0
|
|
|
|
0
|
my $seq = ref $s ? $s->seq() : $s; |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
# check the cache for a cached file cooresponding to this sequence. |
232
|
|
|
|
|
|
|
# if available then return it rather than running prospect |
233
|
0
|
|
|
|
|
0
|
my $cached = $self->_get_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'} ); |
234
|
0
|
0
|
0
|
|
|
0
|
if ( defined $cached and -e $cached ) { |
235
|
0
|
0
|
|
|
|
0
|
warn("retrieved cache threading info $cached\n") if $ENV{DEBUG}; |
236
|
0
|
|
|
|
|
0
|
return $cached; |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
0
|
|
|
|
|
0
|
my $ifn = $self->_write_seqfile( $seq ); |
240
|
0
|
|
|
|
|
0
|
$xfn = $self->_thread1( $ifn ); |
241
|
0
|
|
|
|
|
0
|
unlink( $ifn ); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# new version of prospect outputs svm score when threading. no |
244
|
|
|
|
|
|
|
# longer need to run sortProspect in this case. for backwards |
245
|
|
|
|
|
|
|
# compatibility, check the xml file for svmScore tag. If |
246
|
|
|
|
|
|
|
# not present, then run sortProspect. |
247
|
0
|
0
|
|
|
|
0
|
if ( $self->_hasSvmScore( $xfn ) ) { |
248
|
0
|
0
|
|
|
|
0
|
print(STDERR "xml file ($xfn) already contains svm scores - skip sortProspect step\n") if $ENV{DEBUG}; |
249
|
0
|
|
|
|
|
0
|
$self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $xfn ); |
250
|
0
|
|
|
|
|
0
|
return $xfn; |
251
|
|
|
|
|
|
|
} else { |
252
|
0
|
0
|
|
|
|
0
|
print(STDERR "xml file ($xfn) doesn't contain svm scores - run sortProspect\n") if $ENV{DEBUG}; |
253
|
|
|
|
|
|
|
# run sortProspect so as to get svm score |
254
|
0
|
|
|
|
|
0
|
my $sfn = $self->_sort1( $xfn ); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# insert svm score into the prospect output |
257
|
0
|
|
|
|
|
0
|
my $ffn = $self->_output_svm_score( $xfn, $sfn ); |
258
|
0
|
|
|
|
|
0
|
unlink( $xfn ); |
259
|
0
|
|
|
|
|
0
|
unlink( $sfn ); |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# cache the prospect output filename |
262
|
0
|
|
|
|
|
0
|
$self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $ffn ); |
263
|
0
|
|
|
|
|
0
|
return $ffn; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=pod |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=over |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=item B<::_thread_to_file( Bio::Seq | scalar )> |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Thread one sequence in the Bio::Seq object or the scalar string. The xml |
273
|
|
|
|
|
|
|
ouptut filename is returned. Threading results are cached by sequence for |
274
|
|
|
|
|
|
|
the lifetime of the LocalClient object. See also B<::thread>. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=back |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub _thread1($$) |
284
|
|
|
|
|
|
|
{ |
285
|
0
|
|
|
0
|
|
0
|
my ($self,$ifn) = @_; |
286
|
0
|
|
|
|
|
0
|
my $xfn = "$ifn.xml"; |
287
|
0
|
|
|
|
|
0
|
my @cl = @{$self->{commandline}}; |
|
0
|
|
|
|
|
0
|
|
288
|
0
|
|
|
|
|
0
|
$cl[1] = sprintf($cl[1],$ifn); |
289
|
0
|
|
|
|
|
0
|
$cl[2] = sprintf($cl[2],$xfn); |
290
|
0
|
0
|
|
|
|
0
|
print(STDERR "about to @cl\n") if $ENV{DEBUG}; |
291
|
0
|
0
|
|
|
|
0
|
if ( eval { system("@cl") } ) { |
|
0
|
|
|
|
|
0
|
|
292
|
0
|
|
|
|
|
0
|
my $s = $?; |
293
|
0
|
0
|
|
|
|
0
|
if ($s & 127) { |
294
|
0
|
|
|
|
|
0
|
$s &= 127; |
295
|
0
|
|
|
|
|
0
|
my $sn = Bio::Prospect::utilities::signame($s); |
296
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
297
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
298
|
|
|
|
|
|
|
"received signal $s ($sn)" ); |
299
|
|
|
|
|
|
|
} |
300
|
0
|
|
|
|
|
0
|
$s >>= 8; |
301
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
302
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
303
|
|
|
|
|
|
|
"system(@cl) exited with status $s", |
304
|
|
|
|
|
|
|
'check your prospect installation manually' ); |
305
|
|
|
|
|
|
|
} |
306
|
0
|
|
|
|
|
0
|
my $fh = new IO::File; |
307
|
0
|
0
|
|
|
|
0
|
$fh->open("<$xfn") |
308
|
|
|
|
|
|
|
|| throw Bio::Prospect::Exception("Prospect failed", |
309
|
|
|
|
|
|
|
"prospect completed but didn't create an output file"); |
310
|
0
|
|
|
|
|
0
|
while(<$fh>) { # ugh-prospect sometimes barfs |
311
|
0
|
0
|
|
|
|
0
|
if (m/<scoreInfo>/) { # and completes with status 0 |
312
|
0
|
|
|
|
|
0
|
$fh->close(); return $xfn; } # (e.g., large sequences) |
|
0
|
|
|
|
|
0
|
|
313
|
|
|
|
|
|
|
} |
314
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::Exception("Prospect failed", |
315
|
|
|
|
|
|
|
"prospect completed but the output wasn't valid", |
316
|
|
|
|
|
|
|
"prospect may fail if the sequence is " |
317
|
|
|
|
|
|
|
."too large or there's not enough memory. Try " |
318
|
|
|
|
|
|
|
."running the sequence manually."); |
319
|
0
|
|
|
|
|
0
|
return undef; |
320
|
|
|
|
|
|
|
=pod |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=over |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=item B<::_thread1( filename )> |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Threads the fasta-formatted sequence in C<filename> which is passed |
327
|
|
|
|
|
|
|
directly to prospect. The name of a temporary file which contains the raw |
328
|
|
|
|
|
|
|
xml output is returned. This method will work with multiple sequences in |
329
|
|
|
|
|
|
|
C<filename>, but other routines in this module will not understand |
330
|
|
|
|
|
|
|
multi-query xml output reliably. Most callers should use thread() |
331
|
|
|
|
|
|
|
instead. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=back |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=cut |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
340
|
|
|
|
|
|
|
# _hasSvmScore() |
341
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=head2 _hasSvmScore() |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
Name: _hasSvmScore() |
346
|
|
|
|
|
|
|
Purpose: check whether the prospect xml file already contains a svmScore tag |
347
|
|
|
|
|
|
|
Arguments: prospect xml file |
348
|
|
|
|
|
|
|
Returns: 1 (has svm score) or 0 (no svm score) |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=cut |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub _hasSvmScore { |
353
|
0
|
|
|
0
|
|
0
|
my ($self,$xmlFile) = @_; |
354
|
0
|
0
|
|
|
|
0
|
my $in = new IO::File "$xmlFile" or |
355
|
|
|
|
|
|
|
throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading"); |
356
|
0
|
|
|
|
|
0
|
my $retval = 0; |
357
|
0
|
|
|
|
|
0
|
while(<$in>) { |
358
|
0
|
0
|
|
|
|
0
|
if ( m/svmScore/ ) { |
359
|
0
|
|
|
|
|
0
|
$retval = 1; |
360
|
0
|
|
|
|
|
0
|
last; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
} |
363
|
0
|
|
|
|
|
0
|
$in->close(); |
364
|
0
|
|
|
|
|
0
|
return $retval; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
369
|
|
|
|
|
|
|
# _output_svm_score() |
370
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=head2 _output_svm_score() |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
Name: _output_svm_score() |
375
|
|
|
|
|
|
|
Purpose: output the svm score in the propsect output file |
376
|
|
|
|
|
|
|
Arguments: prospect xml file, prospect sort file |
377
|
|
|
|
|
|
|
Returns: prospect xml file with svm score |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=cut |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub _output_svm_score { |
382
|
0
|
|
|
0
|
|
0
|
my ($self,$xmlFile,$sortFile) = @_; |
383
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
0
|
my %svm = $self->_get_svm_scores( $sortFile ); |
385
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
my $outFile = "$xmlFile.svm"; |
387
|
0
|
0
|
|
|
|
0
|
my $in = new IO::File "$xmlFile" or throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading"); |
388
|
0
|
0
|
|
|
|
0
|
my $out = new IO::File ">$outFile" or throw Bio::Prospect::RuntimeError("can't open $outFile for reading"); |
389
|
|
|
|
|
|
|
|
390
|
0
|
|
|
|
|
0
|
local $/ = '</threading>'; |
391
|
0
|
|
|
|
|
0
|
while(<$in>) { |
392
|
0
|
0
|
|
|
|
0
|
next if ! m/threading/; # make sure that we have valid prospect thread |
393
|
0
|
|
|
|
|
0
|
m#template="(\w+)"#; |
394
|
0
|
|
|
|
|
0
|
my $t = $1; |
395
|
0
|
0
|
0
|
|
|
0
|
if ( ! defined $svm{$t} or $svm{$t} eq '') { |
396
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
397
|
|
|
|
|
|
|
( 'Unable to retrieve svm sort', |
398
|
|
|
|
|
|
|
"no svm score for template=$t" ); |
399
|
|
|
|
|
|
|
} |
400
|
0
|
|
|
|
|
0
|
s#(<rawScore>.*?</rawScore>)#$1\n<svmScore>$svm{$t}</svmScore>#g; |
401
|
0
|
|
|
|
|
0
|
print $out $_; |
402
|
|
|
|
|
|
|
} |
403
|
0
|
|
|
|
|
0
|
close($in); |
404
|
0
|
|
|
|
|
0
|
return( $outFile ); |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
409
|
|
|
|
|
|
|
# _sort1() |
410
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 _sort1() |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Name: _sort1() |
415
|
|
|
|
|
|
|
Purpose: run sortProspect on threading file |
416
|
|
|
|
|
|
|
Arguments: prospect xml file |
417
|
|
|
|
|
|
|
Returns: filename of sortProspect results |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=cut |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
sub _sort1($$) { |
422
|
0
|
|
|
0
|
|
0
|
my ($self,$xfn) = @_; |
423
|
0
|
|
|
|
|
0
|
my $sfn = "$xfn.sort"; |
424
|
0
|
|
|
|
|
0
|
my $cmd = "sortProspect $xfn 2>/dev/null 1>$sfn"; |
425
|
0
|
0
|
|
|
|
0
|
print(STDERR "about to $cmd\n") if $ENV{DEBUG}; |
426
|
0
|
0
|
|
|
|
0
|
if ( eval { system("$cmd") } ) |
|
0
|
|
|
|
|
0
|
|
427
|
|
|
|
|
|
|
{ |
428
|
0
|
|
|
|
|
0
|
my $s = $?; |
429
|
0
|
0
|
|
|
|
0
|
if ($s & 127) |
430
|
|
|
|
|
|
|
{ |
431
|
0
|
|
|
|
|
0
|
$s &= 127; |
432
|
0
|
|
|
|
|
0
|
my $sn = Bio::Prospect::utilities::signame($s); |
433
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
434
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
435
|
|
|
|
|
|
|
"received signal $s ($sn)" ); |
436
|
|
|
|
|
|
|
} |
437
|
0
|
|
|
|
|
0
|
$s >>= 8; |
438
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
439
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
440
|
|
|
|
|
|
|
"system($cmd) exited with status $s", |
441
|
|
|
|
|
|
|
'check your prospect installation manually' ); |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
# sanity checks on the sort output?? |
444
|
0
|
|
|
|
|
0
|
return $sfn; |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
sub _setenv { |
449
|
1
|
50
|
|
1
|
|
12
|
if (not -d $Bio::Prospect::Init::PROSPECT_PATH ) { |
450
|
1
|
|
|
|
|
20
|
throw Bio::Prospect::Exception |
451
|
|
|
|
|
|
|
( "PROSPECT_PATH is not set correctly", |
452
|
|
|
|
|
|
|
"PROSPECT_PATH ($Bio::Prospect::Init::PROSPECT_PATH}) is not a valid directory", |
453
|
|
|
|
|
|
|
"Check your prospect installation and set PROSPECT_PATH in Bio::Prospect::Init or as an environment variable" ); |
454
|
|
|
|
|
|
|
} else { |
455
|
0
|
|
|
|
|
|
$ENV{'PROSPECT_PATH'} = $Bio::Prospect::Init::PROSPECT_PATH; |
456
|
|
|
|
|
|
|
} |
457
|
0
|
0
|
|
|
|
|
if (not -d $Bio::Prospect::Init::PDB_PATH) { |
458
|
0
|
|
|
|
|
|
throw Bio::Prospect::Exception |
459
|
|
|
|
|
|
|
( "PDB_PATH is not set correctly", |
460
|
|
|
|
|
|
|
"PDB_PATH ($Bio::Prospect::Init::PDB_PATH) is not a valid directory", |
461
|
|
|
|
|
|
|
"Check your prospect installation and set PDB_PATH in Bio::Prospect::Init or as an environment variable" ); |
462
|
|
|
|
|
|
|
} else { |
463
|
0
|
|
|
|
|
|
$ENV{'PDB_PATH'} = $Bio::Prospect::Init::PDB_PATH; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
sub _prepare_options($$) { |
469
|
0
|
|
|
0
|
|
|
my $self = shift; |
470
|
0
|
|
|
|
|
|
my $opts = $self->{options}; |
471
|
|
|
|
|
|
|
|
472
|
0
|
0
|
|
|
|
|
(ref $opts eq 'Bio::Prospect::Options') |
473
|
|
|
|
|
|
|
|| throw Bio::Prospect::BadUsage('Bio::Prospect::Options argument is missing'); |
474
|
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
|
my @cl = ( "$Bio::Prospect::Init::PROSPECT_PATH/bin/prospect" ); |
476
|
|
|
|
|
|
|
|
477
|
0
|
0
|
|
|
|
|
if (exists $opts->{phd}) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
478
|
0
|
|
|
|
|
|
throw Exception::NotYetSupported |
479
|
|
|
|
|
|
|
( "phd threading isn't implemented" ); |
480
|
|
|
|
|
|
|
} elsif (exists $opts->{ssp}) { |
481
|
0
|
|
|
|
|
|
throw Exception::NotYetSupported |
482
|
|
|
|
|
|
|
( "ssp threading isn't implemented" ); |
483
|
|
|
|
|
|
|
} elsif (exists $opts->{seq}) { |
484
|
0
|
|
|
|
|
|
push( @cl, '-seqfile %s' ); |
485
|
|
|
|
|
|
|
} else { |
486
|
0
|
|
|
|
|
|
throw Bio::Prospect::BadUsage("Bio::Prospect::Options doesn't specify input type"); |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
|
push(@cl, '-o %s'); |
490
|
0
|
|
0
|
|
|
|
push(@cl, '-ncpus '.($opts->{ncpus}||2) ); |
491
|
0
|
0
|
|
|
|
|
push(@cl, '-freqfile',$opts->{freqfile} ) if ( exists $opts->{freqfile} ); |
492
|
0
|
0
|
|
|
|
|
push(@cl, '-reliab') if $opts->{zscore}; |
493
|
0
|
0
|
|
|
|
|
push(@cl, '-3d') if $opts->{'3d'}; |
494
|
0
|
0
|
|
|
|
|
push(@cl, $opts->{global_local} ? '-global_local' : '-global'); |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# template set selection |
497
|
|
|
|
|
|
|
# ONE of -scop, -tfile, -templates (array), or -fssp (default) |
498
|
0
|
0
|
|
|
|
|
if ($opts->{scop}) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
499
|
0
|
|
|
|
|
|
push(@cl, '-scop') |
500
|
|
|
|
|
|
|
} elsif (exists $opts->{tfile}) { |
501
|
0
|
|
|
|
|
|
push(@cl, '-tfile', $opts->{tfile}) |
502
|
|
|
|
|
|
|
} elsif (exists $opts->{templates}) { |
503
|
0
|
|
|
|
|
|
my ($fh,$fn) = $self->_tempfile('lst'); |
504
|
0
|
|
|
|
|
|
$fh->print(join("\n",@{$opts->{templates}}),"\n"); |
|
0
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
|
$fh->close(); |
506
|
0
|
|
|
|
|
|
push(@cl, '-tfile', $fn); |
507
|
|
|
|
|
|
|
} else { |
508
|
0
|
|
|
|
|
|
push(@cl, '-fssp'); |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
|
511
|
0
|
0
|
0
|
|
|
|
push(@cl, '2> /dev/null' ) unless (defined $ENV{DEBUG} and $ENV{DEBUG}>5); |
512
|
0
|
|
|
|
|
|
push(@cl, '1>&2'); |
513
|
|
|
|
|
|
|
|
514
|
0
|
|
|
|
|
|
@{$self->{commandline}} = @cl; |
|
0
|
|
|
|
|
|
|
515
|
0
|
|
|
|
|
|
return @cl; |
516
|
|
|
|
|
|
|
=pod |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=over |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=item B<::_prepare_options()> |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
Prepares temporary files based on options (e.g., writes a temporary |
523
|
|
|
|
|
|
|
`tfile') and generates an array of command line options in |
524
|
|
|
|
|
|
|
@{$self->{commandline}}. Args 1 and 2 are input and output respectively |
525
|
|
|
|
|
|
|
and MUST be sprintf'd before use. See thread_1_file(). |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=back |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=cut |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub _write_seqfile($$) |
533
|
|
|
|
|
|
|
{ |
534
|
0
|
|
|
0
|
|
|
my ($self,$seq) = @_; |
535
|
0
|
0
|
|
|
|
|
throw Exception ('seq undefined') unless defined $seq; |
536
|
0
|
|
|
|
|
|
my ($fh,$fn) = $self->_tempfile('fa'); |
537
|
0
|
|
|
|
|
|
$seq =~ s/\s//g; |
538
|
0
|
|
|
|
|
|
my $len = length($seq); |
539
|
0
|
|
|
|
|
|
$seq =~ s/.{60}/$&\n/g; # wrap at 60 cols |
540
|
0
|
|
|
|
|
|
$fh->print( ">LocalClient /len=$len\n$seq\n"); |
541
|
0
|
|
|
|
|
|
$fh->close(); |
542
|
0
|
|
|
|
|
|
return $fn; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=pod |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head1 SEE ALSO |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
B<Bio::Prospect::Options>, B<Bio::Prospect::File>, |
553
|
|
|
|
|
|
|
B<Bio::Prospect::Client>, B<Bio::Prospect::SoapClient>, |
554
|
|
|
|
|
|
|
B<Bio::Prospect::Thread>, B<Bio::Prospect::ThreadSummary> |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
http://www.bioinformaticssolutions.com/ |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=cut |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
1; |