| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
=head1 NAME |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Bio::Prospect::LocalClient -- execute Prospect locally |
|
4
|
|
|
|
|
|
|
$Id: LocalClient.pm,v 1.32 2003/11/18 19:45:45 rkh Exp $ |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
my $in = new Bio::SeqIO( -format=> 'Fasta', '-file' => $ARGV[0] ); |
|
9
|
|
|
|
|
|
|
my $po = new Bio::Prospect::Options( seq=>1, svm=>1, global_local=>1, |
|
10
|
|
|
|
|
|
|
templates=>['1alu', '1bgc','1eera']); |
|
11
|
|
|
|
|
|
|
my $pf = new Bio::Prospect::LocalClient( {options=>$po ); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
while ( my $s = $in->next_seq() ) { |
|
14
|
|
|
|
|
|
|
my @threads = $pf->thread( $s ); |
|
15
|
|
|
|
|
|
|
} |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
B<Bio::Prospect::LocalClient> is runs Prospect locally. It is intended to be |
|
20
|
|
|
|
|
|
|
used to facilitate high-throughput protein sequence threading and as the |
|
21
|
|
|
|
|
|
|
server-side component of B<Bio::Prospect::SoapClient>, with which it is API |
|
22
|
|
|
|
|
|
|
compatible. |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 ROUTINES & METHODS |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=cut |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
package Bio::Prospect::LocalClient; |
|
30
|
|
|
|
|
|
|
|
|
31
|
1
|
|
|
1
|
|
835
|
use base Bio::Prospect::Client; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
753
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
30
|
|
|
34
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
35
|
|
|
35
|
1
|
|
|
1
|
|
5
|
use File::Temp qw( tempfile tempdir ); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
66
|
|
|
36
|
1
|
|
|
1
|
|
5
|
use Carp qw(cluck); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
51
|
|
|
37
|
1
|
|
|
1
|
|
952
|
use IO::File; |
|
|
1
|
|
|
|
|
993
|
|
|
|
1
|
|
|
|
|
149
|
|
|
38
|
1
|
|
|
1
|
|
848
|
use Bio::Prospect::Exceptions; |
|
|
1
|
|
|
|
|
5
|
|
|
|
1
|
|
|
|
|
33
|
|
|
39
|
1
|
|
|
1
|
|
986
|
use Bio::Prospect::utilities; |
|
|
1
|
|
|
|
|
7
|
|
|
|
1
|
|
|
|
|
41
|
|
|
40
|
1
|
|
|
1
|
|
930
|
use Bio::Prospect::ThreadSummary; |
|
|
1
|
|
|
|
|
6
|
|
|
|
1
|
|
|
|
|
34
|
|
|
41
|
1
|
|
|
1
|
|
802
|
use Bio::Prospect::Init; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
25
|
|
|
42
|
1
|
|
|
1
|
|
6
|
use Digest::MD5; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
38
|
|
|
43
|
1
|
|
|
1
|
|
5
|
use vars qw( $VERSION ); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
2932
|
|
|
44
|
|
|
|
|
|
|
$VERSION = sprintf( "%d.%02d", q$Revision: 1.32 $ =~ /(\d+)\.(\d+)/ ); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
49
|
|
|
|
|
|
|
# new() |
|
50
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 new() |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Name: new() |
|
55
|
|
|
|
|
|
|
Purpose: constructor |
|
56
|
|
|
|
|
|
|
Arguments: hash reference with following key/value pairs |
|
57
|
|
|
|
|
|
|
options => Bio::Prospect::Options object (required) |
|
58
|
|
|
|
|
|
|
Returns: Bio::Prospect::LocalClient |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=cut |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub new(;%) { |
|
64
|
1
|
|
|
1
|
1
|
343
|
my $self = shift->SUPER::new(@_); |
|
65
|
1
|
|
|
|
|
4
|
$self->_setenv(); |
|
66
|
0
|
|
|
|
|
0
|
$self->_prepare_options(); |
|
67
|
0
|
|
|
|
|
0
|
$self->{'xmlCacheName'} = 'xmlCache'; # name of xml file cache |
|
68
|
0
|
|
|
|
|
0
|
$self->{'sortCacheName'} = 'sortCache'; # name of sort file cache |
|
69
|
0
|
|
|
|
|
0
|
return $self; |
|
70
|
|
|
|
|
|
|
} |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
74
|
|
|
|
|
|
|
# thread() |
|
75
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 thread() |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Name: thread() |
|
80
|
|
|
|
|
|
|
Purpose: return a list of Thread objects |
|
81
|
|
|
|
|
|
|
Arguments: scalar sequence or Bio::PrimarySeqI-derived object |
|
82
|
|
|
|
|
|
|
Returns: list of Bio::Prospect::Thread objects |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub thread($$) { |
|
87
|
0
|
|
|
0
|
1
|
0
|
my ($self,$s) = @_; |
|
88
|
|
|
|
|
|
|
|
|
89
|
0
|
0
|
0
|
|
|
0
|
if ( not defined $s or (ref $s and not $s->isa('Bio::PrimarySeqI')) ) { |
|
|
|
|
0
|
|
|
|
|
|
90
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::BadUsage( |
|
91
|
|
|
|
|
|
|
"Bio::Prospect::LocalClient::thread() requires one Bio::PrimarySeqI subclass or " . |
|
92
|
|
|
|
|
|
|
"scalar sequence argument" ); |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
0
|
0
|
|
|
|
0
|
my $seq = ref $s ? $s->seq() : $s; |
|
96
|
0
|
|
|
|
|
0
|
my $xfn = $self->_thread_to_file( $seq ); |
|
97
|
0
|
|
|
|
|
0
|
my $pf = new Bio::Prospect::File; |
|
98
|
0
|
0
|
|
|
|
0
|
$pf->open( "<$xfn" ) || throw Bio::Prospect::RuntimeError("$xfn: $!\n"); |
|
99
|
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
0
|
$self->{'threads'} = []; |
|
101
|
0
|
|
|
|
|
0
|
while( my $t = $pf->next_thread() ) { |
|
102
|
0
|
|
|
|
|
0
|
push @{$self->{'threads'}}, $t; |
|
|
0
|
|
|
|
|
0
|
|
|
103
|
|
|
|
|
|
|
} |
|
104
|
0
|
|
|
|
|
0
|
return( @{$self->{'threads'}} ); |
|
|
0
|
|
|
|
|
0
|
|
|
105
|
|
|
|
|
|
|
} |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
109
|
|
|
|
|
|
|
# thread_summary() |
|
110
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 thread_summary() |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Name: thread_summary() |
|
115
|
|
|
|
|
|
|
Purpose: return a list of ThreadSummary objects |
|
116
|
|
|
|
|
|
|
Arguments: Bio::Seq object |
|
117
|
|
|
|
|
|
|
Returns: list of rospect2::ThreadSummary objects |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub thread_summary($$) { |
|
122
|
0
|
|
|
0
|
1
|
0
|
my ($self,$s) = @_; |
|
123
|
0
|
|
|
|
|
0
|
my @summary; |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
|
|
|
|
0
|
foreach my $t ( $self->thread($s) ) { |
|
126
|
0
|
|
|
|
|
0
|
push @summary, new Bio::Prospect::ThreadSummary( $t ); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
0
|
|
|
|
|
0
|
return( @summary ); |
|
129
|
|
|
|
|
|
|
} |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
133
|
|
|
|
|
|
|
# xml() |
|
134
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=head2 xml() |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Name: xml() |
|
139
|
|
|
|
|
|
|
Purpose: return xml string |
|
140
|
|
|
|
|
|
|
Arguments: Bio::Seq object |
|
141
|
|
|
|
|
|
|
Returns: string |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub xml($$) { |
|
146
|
0
|
|
|
0
|
1
|
0
|
my ($self,$s) = @_; |
|
147
|
0
|
|
|
|
|
0
|
my $xfn = $self->_thread_to_file( $s ); |
|
148
|
0
|
0
|
|
|
|
0
|
my $in = new IO::File "<$xfn" or throw |
|
149
|
|
|
|
|
|
|
Bio::Prospect::RuntimeError( "can't open $xfn for reading"); |
|
150
|
0
|
|
|
|
|
0
|
my $xml=''; |
|
151
|
0
|
|
|
|
|
0
|
while(<$in>){ $xml .= $_; } |
|
|
0
|
|
|
|
|
0
|
|
|
152
|
0
|
|
|
|
|
0
|
return( $xml ); |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
157
|
|
|
|
|
|
|
# DEPRECATED METHODS - will be removed in subsequent releases. |
|
158
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub score_summary($$) { |
|
161
|
0
|
|
|
0
|
0
|
0
|
cluck("This function is deprecated on Oct-23-2003:\n"); |
|
162
|
0
|
|
|
|
|
0
|
my ($self,$s) = @_; |
|
163
|
0
|
|
|
|
|
0
|
my $xfn = $self->thread_to_file( $s ); |
|
164
|
0
|
|
|
|
|
0
|
return Bio::Prospect::utilities::score_summary( $xfn ); |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub thread_to_file($$) { |
|
168
|
0
|
|
|
0
|
0
|
0
|
cluck("This function is deprecated on Oct-23-2003:\n"); |
|
169
|
0
|
|
|
|
|
0
|
return _thread_to_file($_[0],$_[1]); |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
174
|
|
|
|
|
|
|
# INTERNAL METHODS: not intended for use outside this module |
|
175
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=pod |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 INTERNAL METHODS & ROUTINES |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
The following functions are documented for developers' benefit. THESE |
|
182
|
|
|
|
|
|
|
SHOULD NOT BE CALLED OUTSIDE OF THIS MODULE. YOU'VE BEEN WARNED. |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
188
|
|
|
|
|
|
|
# _get_svm_scores() |
|
189
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head2 _get_svm_scores() |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Name: _get_svm_scores() |
|
194
|
|
|
|
|
|
|
Purpose: return a hash of svm scores from a prospect sort file |
|
195
|
|
|
|
|
|
|
Arguments: sort filename |
|
196
|
|
|
|
|
|
|
Returns: hash |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=cut |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
sub _get_svm_scores($$) { |
|
201
|
0
|
|
|
0
|
|
0
|
my ($self,$fn) = @_; |
|
202
|
0
|
|
|
|
|
0
|
my %retval; |
|
203
|
|
|
|
|
|
|
|
|
204
|
0
|
|
0
|
|
|
0
|
my $in = new IO::File $fn || throw Bio::Prospect::RuntimeError( "can't open $fn for reading" ); |
|
205
|
0
|
|
|
|
|
0
|
my @fld; |
|
206
|
0
|
|
|
|
|
0
|
while(<$in>) { |
|
207
|
0
|
0
|
|
|
|
0
|
next if m/^:Protein/; |
|
208
|
0
|
|
|
|
|
0
|
@fld = split /\s+/; |
|
209
|
0
|
|
|
|
|
0
|
$retval{$fld[0]} = $fld[3]; |
|
210
|
|
|
|
|
|
|
} |
|
211
|
0
|
0
|
|
|
|
0
|
if ( scalar (keys %retval) == 0 ) { |
|
212
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
|
213
|
|
|
|
|
|
|
( 'Sort file is empty', |
|
214
|
|
|
|
|
|
|
"The sort file for this sequence is empty. sortProspect likely failed!", |
|
215
|
|
|
|
|
|
|
"Execute sortProspect on the command-line and check output messages. sortProspect " . |
|
216
|
|
|
|
|
|
|
"can fail because of erroneous characters in the output xml file (e.g. null character)." |
|
217
|
|
|
|
|
|
|
); |
|
218
|
|
|
|
|
|
|
} |
|
219
|
0
|
|
|
|
|
0
|
close($in); |
|
220
|
0
|
|
|
|
|
0
|
return %retval; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
sub _thread_to_file($$) |
|
225
|
|
|
|
|
|
|
{ |
|
226
|
0
|
|
|
0
|
|
0
|
my ($self,$s) = @_; |
|
227
|
0
|
|
|
|
|
0
|
my $xfn; |
|
228
|
0
|
0
|
|
|
|
0
|
my $seq = ref $s ? $s->seq() : $s; |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
# check the cache for a cached file cooresponding to this sequence. |
|
232
|
|
|
|
|
|
|
# if available then return it rather than running prospect |
|
233
|
0
|
|
|
|
|
0
|
my $cached = $self->_get_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'} ); |
|
234
|
0
|
0
|
0
|
|
|
0
|
if ( defined $cached and -e $cached ) { |
|
235
|
0
|
0
|
|
|
|
0
|
warn("retrieved cache threading info $cached\n") if $ENV{DEBUG}; |
|
236
|
0
|
|
|
|
|
0
|
return $cached; |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
|
|
239
|
0
|
|
|
|
|
0
|
my $ifn = $self->_write_seqfile( $seq ); |
|
240
|
0
|
|
|
|
|
0
|
$xfn = $self->_thread1( $ifn ); |
|
241
|
0
|
|
|
|
|
0
|
unlink( $ifn ); |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# new version of prospect outputs svm score when threading. no |
|
244
|
|
|
|
|
|
|
# longer need to run sortProspect in this case. for backwards |
|
245
|
|
|
|
|
|
|
# compatibility, check the xml file for svmScore tag. If |
|
246
|
|
|
|
|
|
|
# not present, then run sortProspect. |
|
247
|
0
|
0
|
|
|
|
0
|
if ( $self->_hasSvmScore( $xfn ) ) { |
|
248
|
0
|
0
|
|
|
|
0
|
print(STDERR "xml file ($xfn) already contains svm scores - skip sortProspect step\n") if $ENV{DEBUG}; |
|
249
|
0
|
|
|
|
|
0
|
$self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $xfn ); |
|
250
|
0
|
|
|
|
|
0
|
return $xfn; |
|
251
|
|
|
|
|
|
|
} else { |
|
252
|
0
|
0
|
|
|
|
0
|
print(STDERR "xml file ($xfn) doesn't contain svm scores - run sortProspect\n") if $ENV{DEBUG}; |
|
253
|
|
|
|
|
|
|
# run sortProspect so as to get svm score |
|
254
|
0
|
|
|
|
|
0
|
my $sfn = $self->_sort1( $xfn ); |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# insert svm score into the prospect output |
|
257
|
0
|
|
|
|
|
0
|
my $ffn = $self->_output_svm_score( $xfn, $sfn ); |
|
258
|
0
|
|
|
|
|
0
|
unlink( $xfn ); |
|
259
|
0
|
|
|
|
|
0
|
unlink( $sfn ); |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# cache the prospect output filename |
|
262
|
0
|
|
|
|
|
0
|
$self->_put_cache_file( Digest::MD5::md5_hex( $seq ), $self->{'xmlCacheName'}, $ffn ); |
|
263
|
0
|
|
|
|
|
0
|
return $ffn; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=pod |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=over |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=item B<::_thread_to_file( Bio::Seq | scalar )> |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
Thread one sequence in the Bio::Seq object or the scalar string. The xml |
|
273
|
|
|
|
|
|
|
ouptut filename is returned. Threading results are cached by sequence for |
|
274
|
|
|
|
|
|
|
the lifetime of the LocalClient object. See also B<::thread>. |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=back |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub _thread1($$) |
|
284
|
|
|
|
|
|
|
{ |
|
285
|
0
|
|
|
0
|
|
0
|
my ($self,$ifn) = @_; |
|
286
|
0
|
|
|
|
|
0
|
my $xfn = "$ifn.xml"; |
|
287
|
0
|
|
|
|
|
0
|
my @cl = @{$self->{commandline}}; |
|
|
0
|
|
|
|
|
0
|
|
|
288
|
0
|
|
|
|
|
0
|
$cl[1] = sprintf($cl[1],$ifn); |
|
289
|
0
|
|
|
|
|
0
|
$cl[2] = sprintf($cl[2],$xfn); |
|
290
|
0
|
0
|
|
|
|
0
|
print(STDERR "about to @cl\n") if $ENV{DEBUG}; |
|
291
|
0
|
0
|
|
|
|
0
|
if ( eval { system("@cl") } ) { |
|
|
0
|
|
|
|
|
0
|
|
|
292
|
0
|
|
|
|
|
0
|
my $s = $?; |
|
293
|
0
|
0
|
|
|
|
0
|
if ($s & 127) { |
|
294
|
0
|
|
|
|
|
0
|
$s &= 127; |
|
295
|
0
|
|
|
|
|
0
|
my $sn = Bio::Prospect::utilities::signame($s); |
|
296
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
|
297
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
|
298
|
|
|
|
|
|
|
"received signal $s ($sn)" ); |
|
299
|
|
|
|
|
|
|
} |
|
300
|
0
|
|
|
|
|
0
|
$s >>= 8; |
|
301
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
|
302
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
|
303
|
|
|
|
|
|
|
"system(@cl) exited with status $s", |
|
304
|
|
|
|
|
|
|
'check your prospect installation manually' ); |
|
305
|
|
|
|
|
|
|
} |
|
306
|
0
|
|
|
|
|
0
|
my $fh = new IO::File; |
|
307
|
0
|
0
|
|
|
|
0
|
$fh->open("<$xfn") |
|
308
|
|
|
|
|
|
|
|| throw Bio::Prospect::Exception("Prospect failed", |
|
309
|
|
|
|
|
|
|
"prospect completed but didn't create an output file"); |
|
310
|
0
|
|
|
|
|
0
|
while(<$fh>) { # ugh-prospect sometimes barfs |
|
311
|
0
|
0
|
|
|
|
0
|
if (m/<scoreInfo>/) { # and completes with status 0 |
|
312
|
0
|
|
|
|
|
0
|
$fh->close(); return $xfn; } # (e.g., large sequences) |
|
|
0
|
|
|
|
|
0
|
|
|
313
|
|
|
|
|
|
|
} |
|
314
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::Exception("Prospect failed", |
|
315
|
|
|
|
|
|
|
"prospect completed but the output wasn't valid", |
|
316
|
|
|
|
|
|
|
"prospect may fail if the sequence is " |
|
317
|
|
|
|
|
|
|
."too large or there's not enough memory. Try " |
|
318
|
|
|
|
|
|
|
."running the sequence manually."); |
|
319
|
0
|
|
|
|
|
0
|
return undef; |
|
320
|
|
|
|
|
|
|
=pod |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=over |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=item B<::_thread1( filename )> |
|
325
|
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
Threads the fasta-formatted sequence in C<filename> which is passed |
|
327
|
|
|
|
|
|
|
directly to prospect. The name of a temporary file which contains the raw |
|
328
|
|
|
|
|
|
|
xml output is returned. This method will work with multiple sequences in |
|
329
|
|
|
|
|
|
|
C<filename>, but other routines in this module will not understand |
|
330
|
|
|
|
|
|
|
multi-query xml output reliably. Most callers should use thread() |
|
331
|
|
|
|
|
|
|
instead. |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=back |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=cut |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
340
|
|
|
|
|
|
|
# _hasSvmScore() |
|
341
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=head2 _hasSvmScore() |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
Name: _hasSvmScore() |
|
346
|
|
|
|
|
|
|
Purpose: check whether the prospect xml file already contains a svmScore tag |
|
347
|
|
|
|
|
|
|
Arguments: prospect xml file |
|
348
|
|
|
|
|
|
|
Returns: 1 (has svm score) or 0 (no svm score) |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=cut |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub _hasSvmScore { |
|
353
|
0
|
|
|
0
|
|
0
|
my ($self,$xmlFile) = @_; |
|
354
|
0
|
0
|
|
|
|
0
|
my $in = new IO::File "$xmlFile" or |
|
355
|
|
|
|
|
|
|
throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading"); |
|
356
|
0
|
|
|
|
|
0
|
my $retval = 0; |
|
357
|
0
|
|
|
|
|
0
|
while(<$in>) { |
|
358
|
0
|
0
|
|
|
|
0
|
if ( m/svmScore/ ) { |
|
359
|
0
|
|
|
|
|
0
|
$retval = 1; |
|
360
|
0
|
|
|
|
|
0
|
last; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
} |
|
363
|
0
|
|
|
|
|
0
|
$in->close(); |
|
364
|
0
|
|
|
|
|
0
|
return $retval; |
|
365
|
|
|
|
|
|
|
} |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
369
|
|
|
|
|
|
|
# _output_svm_score() |
|
370
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=head2 _output_svm_score() |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
Name: _output_svm_score() |
|
375
|
|
|
|
|
|
|
Purpose: output the svm score in the propsect output file |
|
376
|
|
|
|
|
|
|
Arguments: prospect xml file, prospect sort file |
|
377
|
|
|
|
|
|
|
Returns: prospect xml file with svm score |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=cut |
|
380
|
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub _output_svm_score { |
|
382
|
0
|
|
|
0
|
|
0
|
my ($self,$xmlFile,$sortFile) = @_; |
|
383
|
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
0
|
my %svm = $self->_get_svm_scores( $sortFile ); |
|
385
|
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
my $outFile = "$xmlFile.svm"; |
|
387
|
0
|
0
|
|
|
|
0
|
my $in = new IO::File "$xmlFile" or throw Bio::Prospect::RuntimeError("can't open $xmlFile for reading"); |
|
388
|
0
|
0
|
|
|
|
0
|
my $out = new IO::File ">$outFile" or throw Bio::Prospect::RuntimeError("can't open $outFile for reading"); |
|
389
|
|
|
|
|
|
|
|
|
390
|
0
|
|
|
|
|
0
|
local $/ = '</threading>'; |
|
391
|
0
|
|
|
|
|
0
|
while(<$in>) { |
|
392
|
0
|
0
|
|
|
|
0
|
next if ! m/threading/; # make sure that we have valid prospect thread |
|
393
|
0
|
|
|
|
|
0
|
m#template="(\w+)"#; |
|
394
|
0
|
|
|
|
|
0
|
my $t = $1; |
|
395
|
0
|
0
|
0
|
|
|
0
|
if ( ! defined $svm{$t} or $svm{$t} eq '') { |
|
396
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
|
397
|
|
|
|
|
|
|
( 'Unable to retrieve svm sort', |
|
398
|
|
|
|
|
|
|
"no svm score for template=$t" ); |
|
399
|
|
|
|
|
|
|
} |
|
400
|
0
|
|
|
|
|
0
|
s#(<rawScore>.*?</rawScore>)#$1\n<svmScore>$svm{$t}</svmScore>#g; |
|
401
|
0
|
|
|
|
|
0
|
print $out $_; |
|
402
|
|
|
|
|
|
|
} |
|
403
|
0
|
|
|
|
|
0
|
close($in); |
|
404
|
0
|
|
|
|
|
0
|
return( $outFile ); |
|
405
|
|
|
|
|
|
|
} |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
409
|
|
|
|
|
|
|
# _sort1() |
|
410
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 _sort1() |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Name: _sort1() |
|
415
|
|
|
|
|
|
|
Purpose: run sortProspect on threading file |
|
416
|
|
|
|
|
|
|
Arguments: prospect xml file |
|
417
|
|
|
|
|
|
|
Returns: filename of sortProspect results |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
=cut |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
sub _sort1($$) { |
|
422
|
0
|
|
|
0
|
|
0
|
my ($self,$xfn) = @_; |
|
423
|
0
|
|
|
|
|
0
|
my $sfn = "$xfn.sort"; |
|
424
|
0
|
|
|
|
|
0
|
my $cmd = "sortProspect $xfn 2>/dev/null 1>$sfn"; |
|
425
|
0
|
0
|
|
|
|
0
|
print(STDERR "about to $cmd\n") if $ENV{DEBUG}; |
|
426
|
0
|
0
|
|
|
|
0
|
if ( eval { system("$cmd") } ) |
|
|
0
|
|
|
|
|
0
|
|
|
427
|
|
|
|
|
|
|
{ |
|
428
|
0
|
|
|
|
|
0
|
my $s = $?; |
|
429
|
0
|
0
|
|
|
|
0
|
if ($s & 127) |
|
430
|
|
|
|
|
|
|
{ |
|
431
|
0
|
|
|
|
|
0
|
$s &= 127; |
|
432
|
0
|
|
|
|
|
0
|
my $sn = Bio::Prospect::utilities::signame($s); |
|
433
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
|
434
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
|
435
|
|
|
|
|
|
|
"received signal $s ($sn)" ); |
|
436
|
|
|
|
|
|
|
} |
|
437
|
0
|
|
|
|
|
0
|
$s >>= 8; |
|
438
|
0
|
|
|
|
|
0
|
throw Bio::Prospect::RuntimeError |
|
439
|
|
|
|
|
|
|
( 'failed to execute Prospect', |
|
440
|
|
|
|
|
|
|
"system($cmd) exited with status $s", |
|
441
|
|
|
|
|
|
|
'check your prospect installation manually' ); |
|
442
|
|
|
|
|
|
|
} |
|
443
|
|
|
|
|
|
|
# sanity checks on the sort output?? |
|
444
|
0
|
|
|
|
|
0
|
return $sfn; |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
sub _setenv { |
|
449
|
1
|
50
|
|
1
|
|
12
|
if (not -d $Bio::Prospect::Init::PROSPECT_PATH ) { |
|
450
|
1
|
|
|
|
|
20
|
throw Bio::Prospect::Exception |
|
451
|
|
|
|
|
|
|
( "PROSPECT_PATH is not set correctly", |
|
452
|
|
|
|
|
|
|
"PROSPECT_PATH ($Bio::Prospect::Init::PROSPECT_PATH}) is not a valid directory", |
|
453
|
|
|
|
|
|
|
"Check your prospect installation and set PROSPECT_PATH in Bio::Prospect::Init or as an environment variable" ); |
|
454
|
|
|
|
|
|
|
} else { |
|
455
|
0
|
|
|
|
|
|
$ENV{'PROSPECT_PATH'} = $Bio::Prospect::Init::PROSPECT_PATH; |
|
456
|
|
|
|
|
|
|
} |
|
457
|
0
|
0
|
|
|
|
|
if (not -d $Bio::Prospect::Init::PDB_PATH) { |
|
458
|
0
|
|
|
|
|
|
throw Bio::Prospect::Exception |
|
459
|
|
|
|
|
|
|
( "PDB_PATH is not set correctly", |
|
460
|
|
|
|
|
|
|
"PDB_PATH ($Bio::Prospect::Init::PDB_PATH) is not a valid directory", |
|
461
|
|
|
|
|
|
|
"Check your prospect installation and set PDB_PATH in Bio::Prospect::Init or as an environment variable" ); |
|
462
|
|
|
|
|
|
|
} else { |
|
463
|
0
|
|
|
|
|
|
$ENV{'PDB_PATH'} = $Bio::Prospect::Init::PDB_PATH; |
|
464
|
|
|
|
|
|
|
} |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
sub _prepare_options($$) { |
|
469
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
470
|
0
|
|
|
|
|
|
my $opts = $self->{options}; |
|
471
|
|
|
|
|
|
|
|
|
472
|
0
|
0
|
|
|
|
|
(ref $opts eq 'Bio::Prospect::Options') |
|
473
|
|
|
|
|
|
|
|| throw Bio::Prospect::BadUsage('Bio::Prospect::Options argument is missing'); |
|
474
|
|
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
|
my @cl = ( "$Bio::Prospect::Init::PROSPECT_PATH/bin/prospect" ); |
|
476
|
|
|
|
|
|
|
|
|
477
|
0
|
0
|
|
|
|
|
if (exists $opts->{phd}) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
478
|
0
|
|
|
|
|
|
throw Exception::NotYetSupported |
|
479
|
|
|
|
|
|
|
( "phd threading isn't implemented" ); |
|
480
|
|
|
|
|
|
|
} elsif (exists $opts->{ssp}) { |
|
481
|
0
|
|
|
|
|
|
throw Exception::NotYetSupported |
|
482
|
|
|
|
|
|
|
( "ssp threading isn't implemented" ); |
|
483
|
|
|
|
|
|
|
} elsif (exists $opts->{seq}) { |
|
484
|
0
|
|
|
|
|
|
push( @cl, '-seqfile %s' ); |
|
485
|
|
|
|
|
|
|
} else { |
|
486
|
0
|
|
|
|
|
|
throw Bio::Prospect::BadUsage("Bio::Prospect::Options doesn't specify input type"); |
|
487
|
|
|
|
|
|
|
} |
|
488
|
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
|
push(@cl, '-o %s'); |
|
490
|
0
|
|
0
|
|
|
|
push(@cl, '-ncpus '.($opts->{ncpus}||2) ); |
|
491
|
0
|
0
|
|
|
|
|
push(@cl, '-freqfile',$opts->{freqfile} ) if ( exists $opts->{freqfile} ); |
|
492
|
0
|
0
|
|
|
|
|
push(@cl, '-reliab') if $opts->{zscore}; |
|
493
|
0
|
0
|
|
|
|
|
push(@cl, '-3d') if $opts->{'3d'}; |
|
494
|
0
|
0
|
|
|
|
|
push(@cl, $opts->{global_local} ? '-global_local' : '-global'); |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# template set selection |
|
497
|
|
|
|
|
|
|
# ONE of -scop, -tfile, -templates (array), or -fssp (default) |
|
498
|
0
|
0
|
|
|
|
|
if ($opts->{scop}) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
499
|
0
|
|
|
|
|
|
push(@cl, '-scop') |
|
500
|
|
|
|
|
|
|
} elsif (exists $opts->{tfile}) { |
|
501
|
0
|
|
|
|
|
|
push(@cl, '-tfile', $opts->{tfile}) |
|
502
|
|
|
|
|
|
|
} elsif (exists $opts->{templates}) { |
|
503
|
0
|
|
|
|
|
|
my ($fh,$fn) = $self->_tempfile('lst'); |
|
504
|
0
|
|
|
|
|
|
$fh->print(join("\n",@{$opts->{templates}}),"\n"); |
|
|
0
|
|
|
|
|
|
|
|
505
|
0
|
|
|
|
|
|
$fh->close(); |
|
506
|
0
|
|
|
|
|
|
push(@cl, '-tfile', $fn); |
|
507
|
|
|
|
|
|
|
} else { |
|
508
|
0
|
|
|
|
|
|
push(@cl, '-fssp'); |
|
509
|
|
|
|
|
|
|
} |
|
510
|
|
|
|
|
|
|
|
|
511
|
0
|
0
|
0
|
|
|
|
push(@cl, '2> /dev/null' ) unless (defined $ENV{DEBUG} and $ENV{DEBUG}>5); |
|
512
|
0
|
|
|
|
|
|
push(@cl, '1>&2'); |
|
513
|
|
|
|
|
|
|
|
|
514
|
0
|
|
|
|
|
|
@{$self->{commandline}} = @cl; |
|
|
0
|
|
|
|
|
|
|
|
515
|
0
|
|
|
|
|
|
return @cl; |
|
516
|
|
|
|
|
|
|
=pod |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=over |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=item B<::_prepare_options()> |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
Prepares temporary files based on options (e.g., writes a temporary |
|
523
|
|
|
|
|
|
|
`tfile') and generates an array of command line options in |
|
524
|
|
|
|
|
|
|
@{$self->{commandline}}. Args 1 and 2 are input and output respectively |
|
525
|
|
|
|
|
|
|
and MUST be sprintf'd before use. See thread_1_file(). |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=back |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=cut |
|
530
|
|
|
|
|
|
|
} |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub _write_seqfile($$) |
|
533
|
|
|
|
|
|
|
{ |
|
534
|
0
|
|
|
0
|
|
|
my ($self,$seq) = @_; |
|
535
|
0
|
0
|
|
|
|
|
throw Exception ('seq undefined') unless defined $seq; |
|
536
|
0
|
|
|
|
|
|
my ($fh,$fn) = $self->_tempfile('fa'); |
|
537
|
0
|
|
|
|
|
|
$seq =~ s/\s//g; |
|
538
|
0
|
|
|
|
|
|
my $len = length($seq); |
|
539
|
0
|
|
|
|
|
|
$seq =~ s/.{60}/$&\n/g; # wrap at 60 cols |
|
540
|
0
|
|
|
|
|
|
$fh->print( ">LocalClient /len=$len\n$seq\n"); |
|
541
|
0
|
|
|
|
|
|
$fh->close(); |
|
542
|
0
|
|
|
|
|
|
return $fn; |
|
543
|
|
|
|
|
|
|
} |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=pod |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
B<Bio::Prospect::Options>, B<Bio::Prospect::File>, |
|
553
|
|
|
|
|
|
|
B<Bio::Prospect::Client>, B<Bio::Prospect::SoapClient>, |
|
554
|
|
|
|
|
|
|
B<Bio::Prospect::Thread>, B<Bio::Prospect::ThreadSummary> |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
http://www.bioinformaticssolutions.com/ |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=cut |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
1; |