line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
2
|
|
|
|
|
|
|
# MRS::Client |
3
|
|
|
|
|
|
|
# Authors: Martin Senger |
4
|
|
|
|
|
|
|
# For copyright and disclaimer see MRS::Client pod. |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# ABSTRACT: A SOAP-based client of the MRS Retrieval server |
7
|
|
|
|
|
|
|
# PODNAME: MRS::Client |
8
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
9
|
4
|
|
|
4
|
|
99561
|
use strict; |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
170
|
|
10
|
4
|
|
|
4
|
|
22
|
use warnings; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
239
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
package MRS::Client; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
our $VERSION = '1.0.1'; # VERSION |
15
|
|
|
|
|
|
|
|
16
|
4
|
|
|
4
|
|
22
|
use vars qw( $AUTOLOAD ); |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
217
|
|
17
|
4
|
|
|
4
|
|
30
|
use Carp; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
319
|
|
18
|
4
|
|
|
4
|
|
3252
|
use XML::Compile::SOAP11 2.26; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use XML::Compile::WSDL11; |
20
|
|
|
|
|
|
|
use XML::Compile::Transport::SOAPHTTP; |
21
|
|
|
|
|
|
|
use File::Basename; |
22
|
|
|
|
|
|
|
use Data::Dumper; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
use MRS::Constants; |
25
|
|
|
|
|
|
|
use MRS::Client::Databank; |
26
|
|
|
|
|
|
|
use MRS::Client::Find; |
27
|
|
|
|
|
|
|
use MRS::Client::Blast; |
28
|
|
|
|
|
|
|
use MRS::Client::Clustal; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
31
|
|
|
|
|
|
|
# |
32
|
|
|
|
|
|
|
# Expoted constants |
33
|
|
|
|
|
|
|
# |
34
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
35
|
|
|
|
|
|
|
use constant DEFAULT_SEARCH_ENDPOINT => 'http://mrs.cmbi.ru.nl/m6/mrsws/search'; |
36
|
|
|
|
|
|
|
use constant DEFAULT_BLAST_ENDPOINT => 'http://mrs.cmbi.ru.nl/m6/mrsws/blast'; |
37
|
|
|
|
|
|
|
use constant DEFAULT_CLUSTAL_ENDPOINT => 'http://mrs.cmbi.ru.nl/m6/mrsws/clustal'; |
38
|
|
|
|
|
|
|
use constant DEFAULT_ADMIN_ENDPOINT => 'http://mrs.cmbi.ru.nl/m6/mrsws/admin'; |
39
|
|
|
|
|
|
|
use constant DEFAULT_SEARCH_WSDL => 'search.wsdl.template'; |
40
|
|
|
|
|
|
|
use constant DEFAULT_BLAST_WSDL => 'blast.wsdl.template'; |
41
|
|
|
|
|
|
|
use constant DEFAULT_CLUSTAL_WSDL => 'clustal.wsdl.template'; |
42
|
|
|
|
|
|
|
use constant DEFAULT_ADMIN_WSDL => 'admin.wsdl.template'; |
43
|
|
|
|
|
|
|
use constant DEFAULT_SEARCH_WSDL_6 => 'search.wsdl.template.v6'; |
44
|
|
|
|
|
|
|
use constant DEFAULT_BLAST_WSDL_6 => 'blast.wsdl.template.v6'; |
45
|
|
|
|
|
|
|
use constant DEFAULT_CLUSTAL_WSDL_6 => 'clustal.wsdl.template'; # no ClustalW in MRS 6 |
46
|
|
|
|
|
|
|
use constant DEFAULT_ADMIN_WSDL_6 => 'admin.wsdl.template'; # no Admin in MRS 6 |
47
|
|
|
|
|
|
|
use constant DEFAULT_SEARCH_SERVICE => 'mrsws_search'; |
48
|
|
|
|
|
|
|
use constant DEFAULT_BLAST_SERVICE => 'mrsws_blast'; |
49
|
|
|
|
|
|
|
use constant DEFAULT_CLUSTAL_SERVICE => 'mrsws_clustal'; |
50
|
|
|
|
|
|
|
use constant DEFAULT_ADMIN_SERVICE => 'mrsws_admin'; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
54
|
|
|
|
|
|
|
# A list of allowed options/arguments (used in the new() method) |
55
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
56
|
|
|
|
|
|
|
{ |
57
|
|
|
|
|
|
|
my %_allowed = |
58
|
|
|
|
|
|
|
( |
59
|
|
|
|
|
|
|
search_url => 1, |
60
|
|
|
|
|
|
|
blast_url => 1, |
61
|
|
|
|
|
|
|
clustal_url => 1, |
62
|
|
|
|
|
|
|
admin_url => 1, |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
search_service => 1, |
65
|
|
|
|
|
|
|
blast_service => 1, |
66
|
|
|
|
|
|
|
clustal_service => 1, |
67
|
|
|
|
|
|
|
admin_service => 1, |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
search_wsdl => 1, |
70
|
|
|
|
|
|
|
blast_wsdl => 1, |
71
|
|
|
|
|
|
|
clustal_wsdl => 1, |
72
|
|
|
|
|
|
|
admin_wsdl => 1, |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
host => 1, |
75
|
|
|
|
|
|
|
mrs_version => 1, |
76
|
|
|
|
|
|
|
debug => 1, |
77
|
|
|
|
|
|
|
); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub _accessible { |
80
|
|
|
|
|
|
|
my ($self, $attr) = @_; |
81
|
|
|
|
|
|
|
exists $_allowed{$attr}; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
86
|
|
|
|
|
|
|
# Deal with 'set' and 'get' methods. |
87
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
88
|
|
|
|
|
|
|
sub AUTOLOAD { |
89
|
|
|
|
|
|
|
my ($self, $value) = @_; |
90
|
|
|
|
|
|
|
my $ref_sub; |
91
|
|
|
|
|
|
|
if ($AUTOLOAD =~ /.*::(\w+)/ && $self->_accessible ("$1")) { |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# get/set method |
94
|
|
|
|
|
|
|
my $attr_name = "$1"; |
95
|
|
|
|
|
|
|
$ref_sub = |
96
|
|
|
|
|
|
|
sub { |
97
|
|
|
|
|
|
|
# get method |
98
|
|
|
|
|
|
|
local *__ANON__ = "__ANON__$attr_name" . "_" . ref ($self); |
99
|
|
|
|
|
|
|
my ($this, $value) = @_; |
100
|
|
|
|
|
|
|
return $this->{$attr_name} unless defined $value; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# set method |
103
|
|
|
|
|
|
|
$this->{$attr_name} = $value; |
104
|
|
|
|
|
|
|
return $this->{$attr_name}; |
105
|
|
|
|
|
|
|
}; |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
} else { |
108
|
|
|
|
|
|
|
throw ("No such method: $AUTOLOAD"); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
## no critic |
112
|
|
|
|
|
|
|
no strict 'refs'; |
113
|
|
|
|
|
|
|
*{$AUTOLOAD} = $ref_sub; |
114
|
|
|
|
|
|
|
use strict 'refs'; |
115
|
|
|
|
|
|
|
return $ref_sub->($self, $value); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
119
|
|
|
|
|
|
|
# Keep it here! The reason is the existence of AUTOLOAD... |
120
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
121
|
|
|
|
|
|
|
sub DESTROY { |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
125
|
|
|
|
|
|
|
# new |
126
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
127
|
|
|
|
|
|
|
sub new { |
128
|
|
|
|
|
|
|
my ($class, @args) = @_; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# create an object |
131
|
|
|
|
|
|
|
my $self = bless {}, ref ($class) || $class; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# set default values |
134
|
|
|
|
|
|
|
$self->search_url ($ENV{'MRS_SEARCH_URL'} or DEFAULT_SEARCH_ENDPOINT); |
135
|
|
|
|
|
|
|
$self->blast_url ($ENV{'MRS_BLAST_URL'} or DEFAULT_BLAST_ENDPOINT); |
136
|
|
|
|
|
|
|
$self->clustal_url ($ENV{'MRS_CLUSTAL_URL'} or DEFAULT_CLUSTAL_ENDPOINT); |
137
|
|
|
|
|
|
|
$self->admin_url ($ENV{'MRS_ADMIN_URL'} or DEFAULT_ADMIN_ENDPOINT); |
138
|
|
|
|
|
|
|
$self->search_service (DEFAULT_SEARCH_SERVICE); |
139
|
|
|
|
|
|
|
$self->blast_service (DEFAULT_BLAST_SERVICE); |
140
|
|
|
|
|
|
|
$self->clustal_service (DEFAULT_CLUSTAL_SERVICE); |
141
|
|
|
|
|
|
|
$self->admin_service (DEFAULT_ADMIN_SERVICE); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
$self->{compiled_operations} = {}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# set all @args into this object with 'set' values |
146
|
|
|
|
|
|
|
my (%args) = (@args == 1 ? (value => $args[0]) : @args); |
147
|
|
|
|
|
|
|
foreach my $key (keys %args) { |
148
|
|
|
|
|
|
|
## no critic |
149
|
|
|
|
|
|
|
no strict 'refs'; |
150
|
|
|
|
|
|
|
$self->$key ($args {$key}); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
$self->host ($ENV{'MRS_HOST'}) if $ENV{'MRS_HOST'}; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# set MRS version |
155
|
|
|
|
|
|
|
$self->{mrs_version} = $ENV{'MRS_VERSION'} if $ENV{'MRS_VERSION'}; |
156
|
|
|
|
|
|
|
$self->{mrs_version} = 6 unless $self->{mrs_version}; |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# done |
159
|
|
|
|
|
|
|
return $self; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
163
|
|
|
|
|
|
|
# |
164
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
165
|
|
|
|
|
|
|
sub is_v6 { |
166
|
|
|
|
|
|
|
my $self = shift; |
167
|
|
|
|
|
|
|
return (defined $self->{mrs_version} and $self->{mrs_version} eq '6'); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
171
|
|
|
|
|
|
|
# |
172
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
173
|
|
|
|
|
|
|
sub host { |
174
|
|
|
|
|
|
|
my ($self, $host) = @_; |
175
|
|
|
|
|
|
|
return $self->{host} unless $host; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
my $current = $self->{host}; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# use $host and default ports, |
180
|
|
|
|
|
|
|
# unless some URLs were given specifically |
181
|
|
|
|
|
|
|
if ( $self->search_url eq DEFAULT_SEARCH_ENDPOINT or |
182
|
|
|
|
|
|
|
($current and $self->search_url eq "http://$current:18081/") ) { |
183
|
|
|
|
|
|
|
$self->search_url ("http://$host:18081/"); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
if ( $self->blast_url eq DEFAULT_BLAST_ENDPOINT or |
186
|
|
|
|
|
|
|
($current and $self->blast_url eq "http://$current:18082/") ) { |
187
|
|
|
|
|
|
|
$self->blast_url ("http://$host:18082/"); |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
if ( $self->clustal_url eq DEFAULT_CLUSTAL_ENDPOINT or |
190
|
|
|
|
|
|
|
($current and $self->clustal_url eq "http://$current:18083/") ) { |
191
|
|
|
|
|
|
|
$self->clustal_url ("http://$host:18083/"); |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
if ( $self->admin_url eq DEFAULT_ADMIN_ENDPOINT or |
194
|
|
|
|
|
|
|
($current and $self->admin_url eq "http://$current:18084/") ) { |
195
|
|
|
|
|
|
|
$self->admin_url ("http://$host:18084/"); |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
$self->{host} = $host; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
202
|
|
|
|
|
|
|
# Read the WSDL file, create from it a proxy and store it in |
203
|
|
|
|
|
|
|
# itself. Do it only once unless $force_creation is defined. |
204
|
|
|
|
|
|
|
# |
205
|
|
|
|
|
|
|
# $ptype tells what kind of proxy to create: search, blast, clustal or |
206
|
|
|
|
|
|
|
# admin. |
207
|
|
|
|
|
|
|
# |
208
|
|
|
|
|
|
|
# What WSDL file is read: It reads file previously set by one of the |
209
|
|
|
|
|
|
|
# methods (depending which proxy should be read): search_wsdl(), |
210
|
|
|
|
|
|
|
# blast_wsdl(), clustal_wsdl or admin_wsdl(). If such method was not |
211
|
|
|
|
|
|
|
# called, the default WSDL is read from the file named '$ptype |
212
|
|
|
|
|
|
|
# . _proxy', located in the same directory as this module. |
213
|
|
|
|
|
|
|
# ----------------------------------------------------------------- |
214
|
|
|
|
|
|
|
sub _create_proxy { |
215
|
|
|
|
|
|
|
my ($self, $ptype, $default_wsdl, $force_creation) = @_; |
216
|
|
|
|
|
|
|
$self->{$ptype . '_proxy'} = undef if $force_creation; |
217
|
|
|
|
|
|
|
if (not defined $self->{$ptype . '_proxy'}) { |
218
|
|
|
|
|
|
|
my $wsdl; |
219
|
|
|
|
|
|
|
if (not defined $self->{$ptype . '_wsdl'}) { |
220
|
|
|
|
|
|
|
$wsdl = _readfile ( (fileparse (__FILE__))[-2] . $self->_default_wsdl ($ptype) ); |
221
|
|
|
|
|
|
|
$wsdl =~ s/\${LOCATION}/$self->{$ptype . '_url'}/eg; |
222
|
|
|
|
|
|
|
$wsdl =~ s/\${SERVICE}/$self->{$ptype . '_service'}/eg; |
223
|
|
|
|
|
|
|
} else { |
224
|
|
|
|
|
|
|
$wsdl = XML::LibXML->new->parse_file ($self->{$ptype . '_wsdl'}); |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
$self->{$ptype . '_proxy'} = XML::Compile::WSDL11->new ($wsdl); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub _default_wsdl { |
231
|
|
|
|
|
|
|
my ($self, $ptype) = @_; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
if ($self->is_v6) { |
234
|
|
|
|
|
|
|
return DEFAULT_SEARCH_WSDL_6 if $ptype eq 'search'; |
235
|
|
|
|
|
|
|
return DEFAULT_BLAST_WSDL_6 if $ptype eq 'blast'; |
236
|
|
|
|
|
|
|
return DEFAULT_CLUSTAL_WSDL_6 if $ptype eq 'clustal'; |
237
|
|
|
|
|
|
|
return DEFAULT_ADMIN_WSDL_6 if $ptype eq 'admin'; |
238
|
|
|
|
|
|
|
} else { |
239
|
|
|
|
|
|
|
return DEFAULT_SEARCH_WSDL if $ptype eq 'search'; |
240
|
|
|
|
|
|
|
return DEFAULT_BLAST_WSDL if $ptype eq 'blast'; |
241
|
|
|
|
|
|
|
return DEFAULT_CLUSTAL_WSDL if $ptype eq 'clustal'; |
242
|
|
|
|
|
|
|
return DEFAULT_ADMIN_WSDL if $ptype eq 'admin'; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
die "Uknown proxy type '" . $ptype . "'\n"; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub _readfile { |
248
|
|
|
|
|
|
|
my $filename = shift; |
249
|
|
|
|
|
|
|
my $data; |
250
|
|
|
|
|
|
|
{ |
251
|
|
|
|
|
|
|
local $/=undef; |
252
|
|
|
|
|
|
|
open my $file, '<', $filename or croak "Couldn't open file $filename: $!\n"; |
253
|
|
|
|
|
|
|
$data = <$file>; |
254
|
|
|
|
|
|
|
close $file; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
return $data; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
260
|
|
|
|
|
|
|
# Make a SOAP call to a MRS server, using $proxy (created usually by |
261
|
|
|
|
|
|
|
# _create_proxy), invoking $operation with $parameters (a hash |
262
|
|
|
|
|
|
|
# reference). |
263
|
|
|
|
|
|
|
# ----------------------------------------------------------------- |
264
|
|
|
|
|
|
|
sub _call { |
265
|
|
|
|
|
|
|
my ($self, $proxy, $operation, $parameters) = @_; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# the compiled client for the same operation may be already |
268
|
|
|
|
|
|
|
# cached; if not then compile it and save for later |
269
|
|
|
|
|
|
|
my $call = $self->{compiled_operations}->{$operation}; |
270
|
|
|
|
|
|
|
unless (defined $call) { |
271
|
|
|
|
|
|
|
$call = $proxy->compileClient ($operation); |
272
|
|
|
|
|
|
|
$self->{compiled_operations}->{$operation} = $call; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# make a SOAP call |
276
|
|
|
|
|
|
|
my ($answer, $trace) = $call->( %$parameters ); |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
if ($self->{debug}) { |
279
|
|
|
|
|
|
|
print "OPERATION: $operation, PARAMS:\n".Dumper ($parameters); |
280
|
|
|
|
|
|
|
print "RESPONSE:\n".Dumper ($answer); |
281
|
|
|
|
|
|
|
print $trace->printResponse unless defined $answer; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
# print "CALL TRA:\n".Dumper ($trace); |
284
|
|
|
|
|
|
|
# $trace->printTimings; |
285
|
|
|
|
|
|
|
# $trace->printRequest; |
286
|
|
|
|
|
|
|
# $trace->printResponse; |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
croak 'ERROR: ' . $answer->{Fault}->{'faultstring'} . "\n" |
289
|
|
|
|
|
|
|
if defined $answer and defined $answer->{Fault}; |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
return $answer; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
295
|
|
|
|
|
|
|
# Factory method for creating one or more databanks: |
296
|
|
|
|
|
|
|
# it returns an array of MRS::Client::Databank if $db is undef or empty or 'all' |
297
|
|
|
|
|
|
|
# else it returns a databank indicated by $db (which is an Id) |
298
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
299
|
|
|
|
|
|
|
sub db { |
300
|
|
|
|
|
|
|
my ($self, $db) = @_; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
return MRS::Client::Databank->new (id => $db, client => $self) |
303
|
|
|
|
|
|
|
if $db and $db ne 'all'; |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
$self->_create_proxy ('search'); |
306
|
|
|
|
|
|
|
my $answer = $self->_call ( |
307
|
|
|
|
|
|
|
$self->{search_proxy}, 'GetDatabankInfo', { db => 'all' }); |
308
|
|
|
|
|
|
|
my @dbs = (); |
309
|
|
|
|
|
|
|
return @dbs unless defined $answer; |
310
|
|
|
|
|
|
|
foreach my $info (@{ $answer->{parameters}->{info} }) { |
311
|
|
|
|
|
|
|
push (@dbs, MRS::Client::Databank->new (%$info, client => $self, info_retrieved => 1)); |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
return @dbs; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
317
|
|
|
|
|
|
|
# The same as db->find but acting on all available databanks |
318
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
319
|
|
|
|
|
|
|
sub find { |
320
|
|
|
|
|
|
|
my $self = shift; |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
my $multi = MRS::Client::MultiFind->new ($self, @_); |
323
|
|
|
|
|
|
|
# $multi->{client} = $self; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# create individual finds for each available databank |
326
|
|
|
|
|
|
|
$multi->{args} = \@_; # will be needed for cloning |
327
|
|
|
|
|
|
|
$multi->{children} = $multi->_read_first_hits; |
328
|
|
|
|
|
|
|
$multi->{current} = 0; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# do we have any hits, at all? |
331
|
|
|
|
|
|
|
$multi->{eod} = 1 if @{ $multi->{children} } == 0; |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
return $multi; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
337
|
|
|
|
|
|
|
# Create a blast object - it can be used for running more jobs, with |
338
|
|
|
|
|
|
|
# different parameters [TBD: , giving a statistics about all jobs?] |
339
|
|
|
|
|
|
|
# |
340
|
|
|
|
|
|
|
# Create maximum one blast object; we do not need more. |
341
|
|
|
|
|
|
|
# ----------------------------------------------------------------- |
342
|
|
|
|
|
|
|
sub blast { |
343
|
|
|
|
|
|
|
my $self = shift; |
344
|
|
|
|
|
|
|
return $self->{blastobj} if $self->{blastobj}; |
345
|
|
|
|
|
|
|
$self->{blastobj} = MRS::Client::Blast->_new (client => $self); |
346
|
|
|
|
|
|
|
return $self->{blastobj}; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
350
|
|
|
|
|
|
|
# Create a clustal object; a simple factory method. |
351
|
|
|
|
|
|
|
# ----------------------------------------------------------------- |
352
|
|
|
|
|
|
|
sub clustal { |
353
|
|
|
|
|
|
|
my $self = shift; |
354
|
|
|
|
|
|
|
croak "ClustalW service is not available in MRS server version 6 and above.\n" |
355
|
|
|
|
|
|
|
if $self->is_v6; |
356
|
|
|
|
|
|
|
return MRS::Client::Clustal->_new (client => $self); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
360
|
|
|
|
|
|
|
# |
361
|
|
|
|
|
|
|
# Admin calls ... work in progress, and not really supported, AND it |
362
|
|
|
|
|
|
|
# disappeared completely in MRS 6 |
363
|
|
|
|
|
|
|
# |
364
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
367
|
|
|
|
|
|
|
# Return a script that parses a databank. $script is its name. |
368
|
|
|
|
|
|
|
#----------------------------------------------------------------- |
369
|
|
|
|
|
|
|
sub parser { |
370
|
|
|
|
|
|
|
my ($self, $script) = @_; |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
croak "Empty parser name. Cannot retrieve it, I am afraid.\n" |
373
|
|
|
|
|
|
|
unless $script; |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
$self->_create_proxy ('admin'); |
376
|
|
|
|
|
|
|
my $answer = $self->_call ( |
377
|
|
|
|
|
|
|
$self->{admin_proxy}, 'GetParserScript', |
378
|
|
|
|
|
|
|
{ script => $script, |
379
|
|
|
|
|
|
|
format => 'plain' }); |
380
|
|
|
|
|
|
|
return $answer->{parameters}->{response}; |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
1; |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=pod |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head1 NAME |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
MRS::Client - A SOAP-based client of the MRS Retrieval server |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=head1 VERSION |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
version 1.0.1 |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=head1 SYNOPSIS |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# 1. create a client that does all the work: |
399
|
|
|
|
|
|
|
use MRS::Client; |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
# ...by default it connects to the MRS service at http://mrs.cmbi.ru.nl/m6 |
402
|
|
|
|
|
|
|
my $client = MRS::Client->new(); |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# ...or let the client talk to your own MRS servers |
405
|
|
|
|
|
|
|
my $client = MRS::Client->new ( search_url => 'http://localhost:18081/', |
406
|
|
|
|
|
|
|
blast_url => 'http://localhost:18082/',; |
407
|
|
|
|
|
|
|
clustal_url => 'http://localhost:18083/'); # this only for MRS 5 |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# ...or specify only a host, assuming the default ports are used |
410
|
|
|
|
|
|
|
my $client = MRS::Client->new ( host => 'localhost'); |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# 2a. make various queries to a selected database: |
413
|
|
|
|
|
|
|
print $client->db ('uniprot')->find ('sapiens')->count; |
414
|
|
|
|
|
|
|
175642 |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
print $client->db ('uniprot')->find ('sapiens')->next; |
417
|
|
|
|
|
|
|
ID Q14547_HUMAN Unreviewed; 60 AA. |
418
|
|
|
|
|
|
|
AC Q14547; |
419
|
|
|
|
|
|
|
DT 01-NOV-1996, integrated into UniProtKB/TrEMBL. |
420
|
|
|
|
|
|
|
DT 01-NOV-1996, sequence version 1. |
421
|
|
|
|
|
|
|
DT 19-JAN-2010, entry version 51. |
422
|
|
|
|
|
|
|
DE SubName: Full=Homeobox-like; |
423
|
|
|
|
|
|
|
DE Flags: Fragment; |
424
|
|
|
|
|
|
|
OS Homo sapiens (Human). |
425
|
|
|
|
|
|
|
... |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# show id, relevance score and title of two terms connected by AND |
428
|
|
|
|
|
|
|
my $query = $client->db ('enzyme')->find ('and' => ['snake', 'human'], |
429
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->HEADER); |
430
|
|
|
|
|
|
|
while (my $record = $query->next) { |
431
|
|
|
|
|
|
|
print $record . "\n"; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
enzyme 3.4.21.95 17.6527424 Snake venom factor V activator. |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# ...show only title, but now the same two terms are connected by OR |
436
|
|
|
|
|
|
|
my $query = $client->db ('enzyme')->find ('or' => ['snake', 'human'], |
437
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->TITLE); |
438
|
|
|
|
|
|
|
while (my $record = $query->next) { |
439
|
|
|
|
|
|
|
print $record . "\n"; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
Snake venom factor V activator. |
442
|
|
|
|
|
|
|
Jararhagin. |
443
|
|
|
|
|
|
|
Bothropasin. |
444
|
|
|
|
|
|
|
Trimerelysin I. |
445
|
|
|
|
|
|
|
... |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
# combine term-based (ranked) query with additional boolean expression |
448
|
|
|
|
|
|
|
my $query = $client->db ('uniprot')->find ('and' => ['snake', 'human'], |
449
|
|
|
|
|
|
|
query => 'NOT (kinase OR reductase)', |
450
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->HEADER); |
451
|
|
|
|
|
|
|
print "Count: " . $query->count . "\n"; |
452
|
|
|
|
|
|
|
while (my $record = $query->next) { |
453
|
|
|
|
|
|
|
print $record . "\n"; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
Count: 75 |
456
|
|
|
|
|
|
|
nxs11_micsu 23.3861961 Short neurotoxin MS11; |
457
|
|
|
|
|
|
|
nxl2_micsu 22.7922745 Long neurotoxin MS2; |
458
|
|
|
|
|
|
|
nxl5_micsu 22.2648716 Long neurotoxin MS5; |
459
|
|
|
|
|
|
|
... |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
# 2b. explore full information about a database |
462
|
|
|
|
|
|
|
print $client->db ('enzyme'); |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
# ...or extract only information parts you want |
465
|
|
|
|
|
|
|
print $client->db ('enzyme')->version; |
466
|
|
|
|
|
|
|
print $client->db ('enzyme')->count; |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
# 3. Or, almost all functionality is also available in a provided |
469
|
|
|
|
|
|
|
# script I: |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
mrsclient -h |
472
|
|
|
|
|
|
|
mrsclient -C |
473
|
|
|
|
|
|
|
mrsclient -c -n insulin |
474
|
|
|
|
|
|
|
mrsclient -c -p -d enzyme -a 'endothelin tyrosine' |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# 4. Run blastp on protein sequences: |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
my @run_args = (fasta_file => 'protein.fasta', db => 'uniprot'); |
479
|
|
|
|
|
|
|
my $job = $client->blast->run (@run_args); |
480
|
|
|
|
|
|
|
print STDERR 'JOB ID: ' . $job->id . ' [' . $job->status . "]\n"; |
481
|
|
|
|
|
|
|
print $job; |
482
|
|
|
|
|
|
|
while (not $job->completed) { |
483
|
|
|
|
|
|
|
print STDERR 'Waiting for 10 seconds... [status: ' . $job->status . "]\n"; |
484
|
|
|
|
|
|
|
sleep 10; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
print $job->error if $job->failed; |
487
|
|
|
|
|
|
|
print $job->results; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# Or, use for it the provide script I: |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
mrsblast -h |
492
|
|
|
|
|
|
|
mrsblast -i /tmp/snake.protein.fasta -d uniprot -x result.xml |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
# 5. Run clustalw multiple alignment: |
495
|
|
|
|
|
|
|
# (available only for MRS version 5 and lower) |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
my $result = $client->clustal->run (fasta_file => 'multiple.fasta' ); |
498
|
|
|
|
|
|
|
print "ERROR: " . $result->failed if $result->failed; |
499
|
|
|
|
|
|
|
print $result->diagnostics; |
500
|
|
|
|
|
|
|
print $result; |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# Or, use for it the provide script I: |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
mrsclustal -h |
505
|
|
|
|
|
|
|
mrsclustal -i multiple.fasta |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
=head1 DESCRIPTION |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
This module is a SOAP-based (Web Services) client that can talk, and |
510
|
|
|
|
|
|
|
get data from an B, a search engine for biological and |
511
|
|
|
|
|
|
|
medical databanks that searches well over a terabyte of indexed |
512
|
|
|
|
|
|
|
text. See details about MRS and its author Maarten Hekkelman in |
513
|
|
|
|
|
|
|
L"ACKNOWLEDGMENTS">. |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
Because this module is only a client, you need an MRS server |
516
|
|
|
|
|
|
|
running. You can install your own (see details in the MRS |
517
|
|
|
|
|
|
|
distribution), or you need to know a site that runs it. By default, |
518
|
|
|
|
|
|
|
this module contacts the MRS server at CMBI |
519
|
|
|
|
|
|
|
(F). |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
The usual scenario is the following: |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=over |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=item * |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
Create a new instance of a client by calling: |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
my $client = MRS::Client->new (%args); |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
=item * |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
Optionally, find out what databanks are available by calling: |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
my @ids = map { $_->id } $client->db; |
536
|
|
|
|
|
|
|
print "Names:\n" . join ("\n", @ids) . "\n"; |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=item * |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Make one or more queries on a selected databanks and iterate over the |
541
|
|
|
|
|
|
|
result: |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
my $query = $client->db ('enzyme')->find (['cone', 'snail']); |
544
|
|
|
|
|
|
|
while (my $record = $query->next) { |
545
|
|
|
|
|
|
|
print $record . "\n"; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
Or, make the same query on all available databanks: |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
my $query = $client->find (['cone', 'snail']); |
551
|
|
|
|
|
|
|
while (my $record = $query->next) { |
552
|
|
|
|
|
|
|
print $record . "\n"; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
The format of returned records is specified by a parameter of the |
556
|
|
|
|
|
|
|
I method (see more in L<"METHODS">). |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=item * |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
Additionally, this module provides access to I program, using |
561
|
|
|
|
|
|
|
MRS indexed databases. And it can invoke multiple alignment program |
562
|
|
|
|
|
|
|
I. |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
=back |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=head1 ATTENTION |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
I C: Because |
569
|
|
|
|
|
|
|
the latest version of MRS server (version 6) is not backward |
570
|
|
|
|
|
|
|
compatible with the previous version of the MRS server (version 5), |
571
|
|
|
|
|
|
|
there are some significant (but fortunately not huge) changes needed |
572
|
|
|
|
|
|
|
in your programs. Read details in L"MRS VERSIONS">. |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
=head1 METHODS |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=head2 MRS::Client |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
The main module is C. It lets the user specify which MRS |
579
|
|
|
|
|
|
|
server to use, and few other global options. It also has a factory |
580
|
|
|
|
|
|
|
method for creating individual databanks objects. Additionally, it |
581
|
|
|
|
|
|
|
allows making query over all databanks. Finally, it covers all the |
582
|
|
|
|
|
|
|
SOAP communication with the server. |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=head3 new |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
use MRS::Client; |
587
|
|
|
|
|
|
|
my $client = MRS::Client->new (@parameters); |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
The parameters are name-value pairs. The following names are recognized: |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=over |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=item search_url, blast_url, clustal_url |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
The URLs of the individual MRS servers, one providing searches (the |
596
|
|
|
|
|
|
|
main one), one running blast and one running clustal. Default values |
597
|
|
|
|
|
|
|
lead your searches to CMBI. If you have installed MRS servers on your |
598
|
|
|
|
|
|
|
own site, and you are using the default values coming with the MRS |
599
|
|
|
|
|
|
|
distribution, you create a client by (but see below parameter I |
600
|
|
|
|
|
|
|
for a shortcut): |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
my $client = MRS::Client->new ( search_url => 'http://localhost:18081/', |
603
|
|
|
|
|
|
|
blast_url => 'http://localhost:18082/', |
604
|
|
|
|
|
|
|
clustal_url => 'http://localhost:18083/', # this only for MRS 5 |
605
|
|
|
|
|
|
|
); |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
Technical detail: These URLs will be used in the location field of the |
608
|
|
|
|
|
|
|
WSDL description. |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
Alternatively, you can specify these parameters by environment |
611
|
|
|
|
|
|
|
variables (because they will be probably same for most users from the |
612
|
|
|
|
|
|
|
same site). The parameters, however, still have precedence over the |
613
|
|
|
|
|
|
|
values of environment variables (even if they exist). The variables |
614
|
|
|
|
|
|
|
are: I, I and I. |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
B Some sites may not have all MRS servers running. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=item host |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
A shortcut for specifying a host name in all URLs. The same as in the |
621
|
|
|
|
|
|
|
above example can be accomplished by: |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
my $client = MRS::Client->new (host => 'localhost'); |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
Again, you can specify this parameter by an environment variables |
626
|
|
|
|
|
|
|
MRS_HOST. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=item search_service, blast_service, clustal_service |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
The MRS servers are SOAP-based Web Services. Every Web Service has its |
631
|
|
|
|
|
|
|
own I (the name used in the WSDL). You can change this |
632
|
|
|
|
|
|
|
service name if you are accessing site where they use non-default |
633
|
|
|
|
|
|
|
names. The default names - I guess almost always used - are: |
634
|
|
|
|
|
|
|
mrsws_search, mrsws_blast, mrsws_clustal. |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=item search_wsdl, blast_wsdl, clustal_wsdl |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
You can also specify your own WSDL file, each one for each set of |
639
|
|
|
|
|
|
|
operations. It is meant more for debugging purposes because this |
640
|
|
|
|
|
|
|
C module understands only current operations and adding |
641
|
|
|
|
|
|
|
new ones to a new WSDL does not magically start using them. These |
642
|
|
|
|
|
|
|
parameters may be useful when extending the C. |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=back |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
=head3 setters/getters |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
The same names as the argument names described above can be used as |
649
|
|
|
|
|
|
|
method names to get or set the parameter value. A method without an |
650
|
|
|
|
|
|
|
argument gets the current value, a method with an argument sets the |
651
|
|
|
|
|
|
|
new value. For example: |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
print $client->search_url; |
654
|
|
|
|
|
|
|
$client->search_url ('http://my.own.server/mrs/search'); |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=head3 db |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
This is a factory method creating one or more databanks instances. It |
659
|
|
|
|
|
|
|
accepts a single argument, a databank ID: |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
print $client->db ('enzyme'); |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
Id: enzyme |
664
|
|
|
|
|
|
|
Name: Enzyme |
665
|
|
|
|
|
|
|
Version: 2013-05-27 |
666
|
|
|
|
|
|
|
Count: 6115 |
667
|
|
|
|
|
|
|
URL: http://ca.expasy.org/enzyme/ |
668
|
|
|
|
|
|
|
Parser: enzyme |
669
|
|
|
|
|
|
|
Files: |
670
|
|
|
|
|
|
|
Version: 2013-05-27 |
671
|
|
|
|
|
|
|
Modified: 2013-05-27 11:46 GMT |
672
|
|
|
|
|
|
|
Entries count: 6115 |
673
|
|
|
|
|
|
|
Raw data size: 7436504 |
674
|
|
|
|
|
|
|
File size: 45563041 |
675
|
|
|
|
|
|
|
Unique Id: fc0540bd-58a2-4de7-b3ff-6daff64ca13c |
676
|
|
|
|
|
|
|
Indices: |
677
|
|
|
|
|
|
|
enzyme text 14881 Unique |
678
|
|
|
|
|
|
|
enzyme de 3650 Unique Description |
679
|
|
|
|
|
|
|
enzyme dr 420832 Unique Database Reference |
680
|
|
|
|
|
|
|
enzyme id 6114 Unique Identification |
681
|
|
|
|
|
|
|
enzyme pr 398 Unique Prosite Reference |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
You can find out what databanks IDs are available by: |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
print join ("\n", map { $_->id } $client->db); |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Which brings us to the usage of the I method without any |
688
|
|
|
|
|
|
|
parameter, or with an empty parameter. In such cases, it creates an |
689
|
|
|
|
|
|
|
array of C instances. |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
=head3 find |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Make the same query to all databanks. The parameters are the same as |
694
|
|
|
|
|
|
|
for the I method called for an individual databank (see below). |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
print "Databank\tID\tScore\tTitle\n"; |
697
|
|
|
|
|
|
|
my $query = $client->find ('and' => ['cone', 'snail'], |
698
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->HEADER); |
699
|
|
|
|
|
|
|
while (my |
700
|
|
|
|
|
|
|
$record = $query->next) { |
701
|
|
|
|
|
|
|
print $record . "\n"; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
print $query->count . "\n"; |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Databank ID Score Title |
706
|
|
|
|
|
|
|
interpro ipr020242 29.7122746 Conotoxin I2-superfamily |
707
|
|
|
|
|
|
|
interpro ipr012322 27.8191032 Conotoxin, delta-type, conserved site |
708
|
|
|
|
|
|
|
... |
709
|
|
|
|
|
|
|
omim 114020 3.40963793 cadherin 2 |
710
|
|
|
|
|
|
|
omim 192090 3.40769672 cadherin 1 |
711
|
|
|
|
|
|
|
sprot cxd6d_concn 19.4017849 Delta-conotoxin CnVID; |
712
|
|
|
|
|
|
|
sprot cxd6c_concn 19.3984871 Delta-conotoxin CnVIC; |
713
|
|
|
|
|
|
|
... |
714
|
|
|
|
|
|
|
taxonomy 6495 53.980381 Conus tulipa fish-hunting cone snail |
715
|
|
|
|
|
|
|
trembl q71ks8_contu 22.1446457 Four-loop conotoxin preproprotein; |
716
|
|
|
|
|
|
|
trembl q9u7q6_contu 20.6787205 Calmodulin; |
717
|
|
|
|
|
|
|
... |
718
|
|
|
|
|
|
|
149 |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
The query (method I) returns entries sequentially, one databank |
721
|
|
|
|
|
|
|
after another. As with individual databanks, even here you can select |
722
|
|
|
|
|
|
|
maximum number of entries to deliver - the number is applied for each |
723
|
|
|
|
|
|
|
databank separately: |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
my $query = $client->find ('and' => ['cone', 'snail'], |
726
|
|
|
|
|
|
|
max_entries => 2, |
727
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->HEADER); |
728
|
|
|
|
|
|
|
while (my |
729
|
|
|
|
|
|
|
$record = $query->next) { |
730
|
|
|
|
|
|
|
print $record . "\n"; |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
interpro ipr020242 29.7122746 Conotoxin I2-superfamily |
734
|
|
|
|
|
|
|
interpro ipr012322 27.8191032 Conotoxin, delta-type, conserved site |
735
|
|
|
|
|
|
|
omim 114020 3.40963793 cadherin 2 |
736
|
|
|
|
|
|
|
omim 192090 3.40769672 cadherin 1 |
737
|
|
|
|
|
|
|
sprot cxd6d_concn 19.4017849 Delta-conotoxin CnVID; |
738
|
|
|
|
|
|
|
sprot cxd6c_concn 19.3984871 Delta-conotoxin CnVIC; |
739
|
|
|
|
|
|
|
taxonomy 6495 53.980381 Conus tulipa fish-hunting cone snail |
740
|
|
|
|
|
|
|
trembl q71ks8_contu 22.1446457 Four-loop conotoxin preproprotein; |
741
|
|
|
|
|
|
|
trembl q9u7q6_contu 20.6787205 Calmodulin; |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=head3 blast |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
$client->blast |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
A factory method for creating a singleton instance of |
748
|
|
|
|
|
|
|
F. |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
=head3 clustal |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
$client->clustal |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
A factory method for creating instances of F. |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=head2 MRS::Client::Databank |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
This package represents an MRS databank and allows to query it. Each |
759
|
|
|
|
|
|
|
databank consists of one or more files (represented by |
760
|
|
|
|
|
|
|
C) and of indices |
761
|
|
|
|
|
|
|
(C). |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
A databank instance can be created by a I method but usually it |
764
|
|
|
|
|
|
|
is created by a factory method available in the C: |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
my $db = $client->db ('enzyme'); |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
The factory method, as well as the I method, creates only a |
769
|
|
|
|
|
|
|
"shell" databank instance - that is good enough for making queries but |
770
|
|
|
|
|
|
|
which does not contain any databank properties (name, indices, |
771
|
|
|
|
|
|
|
etc.) yet. The properties will be fetched from the MRS server only when |
772
|
|
|
|
|
|
|
you ask for them (using the "getter" methods described below). |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
=head3 new |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
The only, and mandatory, parameter is I: |
777
|
|
|
|
|
|
|
|
778
|
|
|
|
|
|
|
$db = MRS::Client::Databank->new (id => 'interpro'); |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
The arguments syntax (the hash) is prepared for more arguments later |
781
|
|
|
|
|
|
|
(perhaps). But it should not bother you because you would rarely use |
782
|
|
|
|
|
|
|
this method - having the factory method I in the client. |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
I Do not use this method directly, or check first how |
785
|
|
|
|
|
|
|
it is used in the module C. |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
=head3 find |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
This is the crucial method of the whole C module. It |
790
|
|
|
|
|
|
|
queries a databank and returns an C instance that |
791
|
|
|
|
|
|
|
can be used to iterate over found entries. |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
It takes many arguments. At least one of the "query" arguments (which |
794
|
|
|
|
|
|
|
are I, I and I) must be supplied; other arguments are |
795
|
|
|
|
|
|
|
optional. |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
The arguments can always be specified as a hash, but for usual cases |
798
|
|
|
|
|
|
|
there are few shortcuts. Let's look at the arguments as used in the |
799
|
|
|
|
|
|
|
hash: |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
=over |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
=item C |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
The value is an array reference where elements are terms that will be |
806
|
|
|
|
|
|
|
combined by the AND boolean operator in a ranked query. For example: |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
$find = $db->find ('and' => ['human', 'snake']); |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
This argument can also be used directly, not as a hash, assuming that |
811
|
|
|
|
|
|
|
you do not need to use any other arguments: |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
$find = $db->find (['human', 'snake']); |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
=item C |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
The value is an array reference where elements are terms that will be |
818
|
|
|
|
|
|
|
combined by the OR boolean operator in a ranked query. For example: |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$find = $db->find ('or' => ['human', 'snake']); |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
There can be either an I or an I argument, but not both. If |
823
|
|
|
|
|
|
|
there are used both, a warning is issued and the I one will take |
824
|
|
|
|
|
|
|
precedence. |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
=item C |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
The value is an expression, usually using some boolean operators (in |
829
|
|
|
|
|
|
|
upper cases!): |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
$find = $db->find (query => 'hemoglobinase AND NOT human'); |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
If there are no boolean operators, it is used as a single term. For |
834
|
|
|
|
|
|
|
example, these are equivalent: |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
$find = $db->find (query => 'hemoglobinase activity'); |
837
|
|
|
|
|
|
|
$find = $db->find ('and' => ['hemoglobinase activity']); |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
You can also use both, I or I, and I. The query then |
840
|
|
|
|
|
|
|
is an additional filter applied to the results found by the I or |
841
|
|
|
|
|
|
|
I terms. For example: |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
$find = $db->find ('and' => ['human', 'snake'], |
844
|
|
|
|
|
|
|
query => 'NOT neurotoxin'); |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
As a shortcut, the query parameter can also be used without a hash, |
847
|
|
|
|
|
|
|
assuming again that you do not need to use any other arguments: |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
$find = $db->find ('hemoglobinase AND NOT human'); |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=item C |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
B This argument is used only by MRS version 5, |
854
|
|
|
|
|
|
|
See L for details. |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
The ranked queries (the ones achieved by I or I arguments) |
857
|
|
|
|
|
|
|
have assigned relevance score to their hits. The relevance score |
858
|
|
|
|
|
|
|
depends on the used algorithm. The available values for this arguments |
859
|
|
|
|
|
|
|
are defined in C: |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
package MRS::Algorithm; |
862
|
|
|
|
|
|
|
use constant { |
863
|
|
|
|
|
|
|
VECTOR => 'Vector', |
864
|
|
|
|
|
|
|
DICE => 'Dice', |
865
|
|
|
|
|
|
|
JACCARD => 'Jaccard', |
866
|
|
|
|
|
|
|
}; |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
The default algorithm is "Vector". For example (using the format |
869
|
|
|
|
|
|
|
"header" - which is the only one that shows relevance scores): |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
$client->$db('enzyme')->find ('and' => 'venom', |
872
|
|
|
|
|
|
|
algorithm => MRS::Algorithm->Dice, |
873
|
|
|
|
|
|
|
max_entries => 3, |
874
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->HEADER); |
875
|
|
|
|
|
|
|
enzyme 3.4.24.43 14.9607477 Atroxase. |
876
|
|
|
|
|
|
|
enzyme 3.4.24.49 13.6817474 Bothropasin. |
877
|
|
|
|
|
|
|
enzyme 3.4.24.73 13.2007284 Jararhagin. |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
$client->$db('enzyme')->find ('and' => 'venom', |
880
|
|
|
|
|
|
|
algorithm => MRS::Algorithm->Vector, |
881
|
|
|
|
|
|
|
max_entries => 3, |
882
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->HEADER); |
883
|
|
|
|
|
|
|
enzyme 3.1.15.1 21.6520195 Venom exonuclease. |
884
|
|
|
|
|
|
|
enzyme 3.4.21.60 19.3931656 Scutelarin. |
885
|
|
|
|
|
|
|
enzyme 5.1.1.16 16.7410889 Protein-serine epimerase. |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
=item C, C, C |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
These arguments do not affect the query itself but it tells which |
890
|
|
|
|
|
|
|
entries from the found ones to retrieve (by the I method - see |
891
|
|
|
|
|
|
|
below). |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
All these three arguments have an integer value. |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
C tells to skip entries at the beginning of the whole result |
896
|
|
|
|
|
|
|
and start returning only with the entry with this order number. The |
897
|
|
|
|
|
|
|
counting start from 1. |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
C is the same as the C, except the counting starts from |
900
|
|
|
|
|
|
|
zero. |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
C is the maximum entries to retrieve. |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
=item C |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
This argument also does not affect the query itself but it defines the |
907
|
|
|
|
|
|
|
format of the returned entries. The available values for this arguments |
908
|
|
|
|
|
|
|
are defined in C: |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
package MRS::EntryFormat; |
911
|
|
|
|
|
|
|
use constant { |
912
|
|
|
|
|
|
|
PLAIN => 'plain', |
913
|
|
|
|
|
|
|
TITLE => 'title', |
914
|
|
|
|
|
|
|
HTML => 'html', |
915
|
|
|
|
|
|
|
FASTA => 'fasta', |
916
|
|
|
|
|
|
|
SEQUENCE => 'sequence', |
917
|
|
|
|
|
|
|
HEADER => 'header', |
918
|
|
|
|
|
|
|
}; |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
The default format is 'plain'. The 'fasta' and 'sequence' formats are |
921
|
|
|
|
|
|
|
available only for databanks that have sequence data. For all formats, |
922
|
|
|
|
|
|
|
except for the 'header', the entries are returned as strings. For |
923
|
|
|
|
|
|
|
'header', the entries are instances of C. |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
Be aware that C is also a built-in Perl function, so better |
926
|
|
|
|
|
|
|
quote it when used as a hash key (it seems to work also without quotes |
927
|
|
|
|
|
|
|
except the emacs TAB key is confused if there are no surrounding |
928
|
|
|
|
|
|
|
quotes; just a minor annoyance). |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=item C |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
This argument (C) enhances the C argument. It |
933
|
|
|
|
|
|
|
is used (at least at the moment) only for HTML format; for other |
934
|
|
|
|
|
|
|
formats, it is ignored. See, however, the L"MRS VERSIONS"> about the |
935
|
|
|
|
|
|
|
abandoned HTML format. |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
Be aware, however, that the C depends on the structure of the |
938
|
|
|
|
|
|
|
HTML provided by the MRS. This structure is not defined in the MRS |
939
|
|
|
|
|
|
|
server API, so it can change easily. It even depends on the way how |
940
|
|
|
|
|
|
|
the authors write their parsing scripts. When the HTML |
941
|
|
|
|
|
|
|
output changes this module must be changed, as well. Caveat emptor. |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
The C is a hashref with keys that change (slightly or |
944
|
|
|
|
|
|
|
significantly) the returned HTML. Here are all possible keys |
945
|
|
|
|
|
|
|
(with a randomly picked up values): |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
xformat => { MRS::XFormat::CSS_CLASS() => 'mrslink', |
948
|
|
|
|
|
|
|
MRS::XFormat::URL_PREFIX() => 'http://cbrcgit:8080/mrs-web/' |
949
|
|
|
|
|
|
|
MRS::XFormat::REMOVE_DEAD() => 1, # 'or' => ['...'] |
950
|
|
|
|
|
|
|
MRS::XFormat::ONLY_LINKS() => 1 } |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
C specifies a CSS-class name that will be |
953
|
|
|
|
|
|
|
added to all C tags in the returned HTML. It allows, for example, |
954
|
|
|
|
|
|
|
an easy post-processing by various JavaScript libraries. For example, |
955
|
|
|
|
|
|
|
if the original HTML contains: |
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
it will become (using the value shown above): |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
C helps to keep the returned HTML |
964
|
|
|
|
|
|
|
independent on the machine where it was created. This option pre-pends |
965
|
|
|
|
|
|
|
the given prefix to the relative URLs in the hyperlinks that point to |
966
|
|
|
|
|
|
|
the data in an MRS web application. For example, if the original HTML |
967
|
|
|
|
|
|
|
contains: |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
it will become: |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
Other hyperlinks - those not starting with C or C - are |
976
|
|
|
|
|
|
|
not affected. |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
C deals with the fact that the MRS server |
979
|
|
|
|
|
|
|
creates hyperlinks pointing to other MRS databanks without checking |
980
|
|
|
|
|
|
|
that they actually exists in the local MRS installation. This may be |
981
|
|
|
|
|
|
|
fixed later (quoting Maarten) but before it happens this option (if with a true |
982
|
|
|
|
|
|
|
value) removes (from the returned HTML) all hyperlinks that point to |
983
|
|
|
|
|
|
|
the not-installed MRS databanks. For example, if the original HTML has |
984
|
|
|
|
|
|
|
these hyperlinks: |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
AF536179 |
987
|
|
|
|
|
|
|
D00735 |
988
|
|
|
|
|
|
|
1VZN |
989
|
|
|
|
|
|
|
2FK4 |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
and the C database is not locally installed, the returned HTML |
992
|
|
|
|
|
|
|
will change to: |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
AF536179 |
995
|
|
|
|
|
|
|
D00735 |
996
|
|
|
|
|
|
|
1VZN |
997
|
|
|
|
|
|
|
2FK4 |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
There is a small caveat, however. The MRS::Client needs to know what |
1000
|
|
|
|
|
|
|
databanks are installed. It finds out by asking the MRS server by |
1001
|
|
|
|
|
|
|
using the method C (explained elsewhere in this document). This |
1002
|
|
|
|
|
|
|
method returns much more than is needed, so it can be slightly |
1003
|
|
|
|
|
|
|
expensive. Therefore, if your concern is the highest speed, you can |
1004
|
|
|
|
|
|
|
help the MRS::Client by providing a list of databanks that you know |
1005
|
|
|
|
|
|
|
you have installed. Actually, in most cases, you can create such list |
1006
|
|
|
|
|
|
|
also by calling the C method but depending on your code you can |
1007
|
|
|
|
|
|
|
call it just ones an reuse it. For example, if you wish to keep |
1008
|
|
|
|
|
|
|
hyperlinks only for 'uniprot' and 'embl', you specify; |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
xformat => { MRS::XFormat::REMOVE_DEAD() => ['uniprot', 'embl'] } |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
Finally, there is an option C. It has a very |
1013
|
|
|
|
|
|
|
specific function: to extract and return C the hyperlinks, not |
1014
|
|
|
|
|
|
|
the whole HTML. It is, therefore, predestined for further |
1015
|
|
|
|
|
|
|
post-processing. Note that all changes in the hyperlinks described |
1016
|
|
|
|
|
|
|
earlier are also applied here (e.g. adding an absolute URL or a CSS |
1017
|
|
|
|
|
|
|
class). |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
When this option is used, the whole method "$find->next" (or |
1020
|
|
|
|
|
|
|
"db->entry") returns a reference to an array of extracted |
1021
|
|
|
|
|
|
|
hyperlinks: |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
my $find = $client->db('sprot')->find |
1024
|
|
|
|
|
|
|
(and => ['DNP_DENAN'], |
1025
|
|
|
|
|
|
|
'format' => MRS::EntryFormat->HTML, |
1026
|
|
|
|
|
|
|
xformat => { |
1027
|
|
|
|
|
|
|
MRS::XFormat::ONLY_LINKS() => 1, |
1028
|
|
|
|
|
|
|
MRS::XFormat::CSS_CLASS() => 'mrslink', |
1029
|
|
|
|
|
|
|
}, |
1030
|
|
|
|
|
|
|
); |
1031
|
|
|
|
|
|
|
while (my $record = $find->next) { |
1032
|
|
|
|
|
|
|
print join ("\n", @$record) . "\n"; |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
Which prints something like: |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
8618 |
1037
|
|
|
|
|
|
|
Eukaryota |
1038
|
|
|
|
|
|
|
... |
1039
|
|
|
|
|
|
|
Disulfide bond |
1040
|
|
|
|
|
|
|
... |
1041
|
|
|
|
|
|
|
92332489 |
1042
|
|
|
|
|
|
|
... |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
=back |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
=head3 count |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
It returns a number of entries in the whole databank. |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
print $client->db ('enzyme')->count; |
1052
|
|
|
|
|
|
|
4645 |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
Do not confuse it with the method of the same name but called on the |
1055
|
|
|
|
|
|
|
object returned by the I method - that one returns a number of |
1056
|
|
|
|
|
|
|
hits of that particular query. |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
=head3 entry |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
It takes an entry ID (mandatory), and optionally its format and |
1061
|
|
|
|
|
|
|
extended format, and it returns the given entry: |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
print $client->db ('enzyme')->entry ('3.4.21.60'); |
1064
|
|
|
|
|
|
|
ID 3.4.21.60 |
1065
|
|
|
|
|
|
|
DE Scutelarin. |
1066
|
|
|
|
|
|
|
AN Taipan activator. |
1067
|
|
|
|
|
|
|
CA Selective cleavage of Arg-|-Thr and Arg-|-Ile in prothrombin to form |
1068
|
|
|
|
|
|
|
CA thrombin and two inactive fragments. |
1069
|
|
|
|
|
|
|
CC -!- From the venom of Taipan snake (Oxyuranus scutellatus). |
1070
|
|
|
|
|
|
|
CC -!- Converts prothrombin to thrombin in the absence of coagulation factor |
1071
|
|
|
|
|
|
|
CC Va, and is potentiated by phospholipid and calcium. |
1072
|
|
|
|
|
|
|
CC -!- Specificity is similar to that of factor Xa. |
1073
|
|
|
|
|
|
|
CC -!- Binds calcium via gamma-carboxyglutamic acid residues. |
1074
|
|
|
|
|
|
|
CC -!- Similar enzymes are known from the venom of other Australian elapid |
1075
|
|
|
|
|
|
|
CC snakes Pseudonaja textilis, Oxyuranus microlepidotus and Demansia |
1076
|
|
|
|
|
|
|
CC nuchalis affinis. |
1077
|
|
|
|
|
|
|
CC -!- Formerly EC 3.4.99.28. |
1078
|
|
|
|
|
|
|
// |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
print $client->db ('enzyme')->entry ('3.4.21.60', |
1081
|
|
|
|
|
|
|
MRS::EntryFormat->TITLE); |
1082
|
|
|
|
|
|
|
Scutelarin. |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
The optional C is a hashref and it was explained |
1085
|
|
|
|
|
|
|
earlier in the section about the C method. |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
=head3 id, name, version, blastable, url, script, files, indices, aliases |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
There are several methods delivering databank properties. They have no |
1090
|
|
|
|
|
|
|
arguments: |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
my $db = $client->db('omim'); |
1093
|
|
|
|
|
|
|
print $db->id . "\n"; |
1094
|
|
|
|
|
|
|
print $db->name . "\n"; |
1095
|
|
|
|
|
|
|
print $db->version . "\n"; |
1096
|
|
|
|
|
|
|
print $db->blastable . "\n"; |
1097
|
|
|
|
|
|
|
print $db->url . "\n"; |
1098
|
|
|
|
|
|
|
print $db->script . "\n"; |
1099
|
|
|
|
|
|
|
print $db->aliases . "\n"; |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
=head3 files |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
Each databank consists of one or more files. This method returns a |
1104
|
|
|
|
|
|
|
reference to an array of C |
1105
|
|
|
|
|
|
|
instances. Each such instance has properties reachable by the |
1106
|
|
|
|
|
|
|
following "getters" methods: |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
sub say { print @_, "\n"; } |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
my $db_files = $client->db('uniprot')->files; |
1111
|
|
|
|
|
|
|
foreach my $file (@{ $db_files }) { |
1112
|
|
|
|
|
|
|
say $file->id; |
1113
|
|
|
|
|
|
|
say $file->version; |
1114
|
|
|
|
|
|
|
say $file->last_modified; |
1115
|
|
|
|
|
|
|
say $file->entries_count; |
1116
|
|
|
|
|
|
|
say $file->raw_data_size; |
1117
|
|
|
|
|
|
|
say $file->file_size; |
1118
|
|
|
|
|
|
|
say ''; |
1119
|
|
|
|
|
|
|
} |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
=head3 indices |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
Each databank is indexed by (usually several) indices. This method |
1124
|
|
|
|
|
|
|
returns a reference to an array of C |
1125
|
|
|
|
|
|
|
instances. Each such instance has properties reachable by the |
1126
|
|
|
|
|
|
|
"getters" method: |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
my $db_indices = $client->db('uniprot')->indices; |
1129
|
|
|
|
|
|
|
foreach my $idx (@{ $db_indices }) { |
1130
|
|
|
|
|
|
|
printf ("%-15s%-15s%9d %-9s %s\n", |
1131
|
|
|
|
|
|
|
$idx->db, |
1132
|
|
|
|
|
|
|
$idx->id, |
1133
|
|
|
|
|
|
|
$idx->count, |
1134
|
|
|
|
|
|
|
$idx->type, |
1135
|
|
|
|
|
|
|
$idx->description); |
1136
|
|
|
|
|
|
|
} |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
The index I is important because it can be used in the |
1139
|
|
|
|
|
|
|
queries. For example, assuming that the database has an index I |
1140
|
|
|
|
|
|
|
(organism species): |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
$db->find (query => 'rds AND os:human'); |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
=head2 MRS::Client::Find |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
This object carries results of a query; it is returned by the I |
1147
|
|
|
|
|
|
|
method, called either on a databank instance or on the whole |
1148
|
|
|
|
|
|
|
client. Actually, in case of the whole client, the returned type is of |
1149
|
|
|
|
|
|
|
type C which is a subclass |
1150
|
|
|
|
|
|
|
C. |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
=head3 db, terms, query, all_terms_required, max_entries |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
The getter methods just reflect query arguments (the ones given to the |
1155
|
|
|
|
|
|
|
C method): |
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
sub say { print @_, "\n"; } |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
my $find = $client->db('uniprot')->find('sapiens'); |
1160
|
|
|
|
|
|
|
say $find->db; |
1161
|
|
|
|
|
|
|
say join (", ", @ {$find->terms }); |
1162
|
|
|
|
|
|
|
say $find->query; |
1163
|
|
|
|
|
|
|
say $find->max_entries; |
1164
|
|
|
|
|
|
|
say $find->all_terms_required; |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
The I (a ref array) are either from the I or I |
1167
|
|
|
|
|
|
|
argument, and the I is 1 (when terms are coming |
1168
|
|
|
|
|
|
|
from the I) or zero. |
1169
|
|
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
=head3 count |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
Finally, you can get the number of hits of this query. Be aware (as |
1173
|
|
|
|
|
|
|
mentioned elsewhere in this document) that boolean queries return only |
1174
|
|
|
|
|
|
|
an estimate, usually much higher than is the reality. |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
=head2 MRS::Client::MultiFind |
1177
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
This object is returned from the C method made to all |
1179
|
|
|
|
|
|
|
databanks. It is a subclass of the C with one |
1180
|
|
|
|
|
|
|
additional method: |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
=head3 db_counts |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
It returns databank names and their total counts in a hash (not a |
1185
|
|
|
|
|
|
|
reference) where keys are the databank names and values the entry |
1186
|
|
|
|
|
|
|
counts: |
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
my %counts = $find->db_counts; |
1189
|
|
|
|
|
|
|
foreach my $db (sort keys %counts) { |
1190
|
|
|
|
|
|
|
printf ("%-15s %9d\n", $db, $counts{$db}); |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
=head2 MRS::Client::Hit |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
Finally, a tiny object representing a hit, a result of a query before |
1196
|
|
|
|
|
|
|
going to a databank for the full contents of a found entry. It |
1197
|
|
|
|
|
|
|
contains the databank's ID (where the hit was found), the score that |
1198
|
|
|
|
|
|
|
this hit achieved (for boolean queries, the score is always 1) and the |
1199
|
|
|
|
|
|
|
ID and title of the entry represented by this hit. |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
The corresponding getters methods are I, I, I and |
1202
|
|
|
|
|
|
|
I. |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
The I method (as shown above) returns just hits (instead of the |
1205
|
|
|
|
|
|
|
full entries) when the format IHEADER> is |
1206
|
|
|
|
|
|
|
specified. |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
=head2 MRS::Client::Blast |
1209
|
|
|
|
|
|
|
|
1210
|
|
|
|
|
|
|
The MRS servers provide sequence homology searches, the famous Blast |
1211
|
|
|
|
|
|
|
program (namely the I program for protein sequences). An input |
1212
|
|
|
|
|
|
|
sequence (in FASTA format) is searched against one of the MRS |
1213
|
|
|
|
|
|
|
databanks. It can be any MRS databank whose method C |
1214
|
|
|
|
|
|
|
returns true (e.g. uniprot). An input sequence and a databank are the |
1215
|
|
|
|
|
|
|
only mandatory input parameters. Other common Blast parameters are |
1216
|
|
|
|
|
|
|
also supported. |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
The invocation is asynchronous. It means that the I method |
1219
|
|
|
|
|
|
|
returns immediately, without waiting for the Blast program to finish, |
1220
|
|
|
|
|
|
|
giving back a I, a handler that can be used later for polling |
1221
|
|
|
|
|
|
|
for status, and, once status indicates the Blast finishes, for getting |
1222
|
|
|
|
|
|
|
results (or an error message). This is the typical usage: |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
my @run_args = (fasta_file => '...', db => '...', ...); |
1225
|
|
|
|
|
|
|
my $job = $client->blast->run (@run_args); |
1226
|
|
|
|
|
|
|
sleep 10 while (not $job->completed); |
1227
|
|
|
|
|
|
|
print $job->error if $job->failed; |
1228
|
|
|
|
|
|
|
print $job->results; |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
529.0 1.346582e-149 [vsph_trije ] 1 Snake venom serine protease homolog; |
1231
|
|
|
|
|
|
|
509.0 1.411994e-143 [vspa_triga ] 1 Venom serine proteinase 2A; |
1232
|
|
|
|
|
|
|
508.0 2.823987e-143 [vsp1m_trist ] 1 Venom serine protease 1 homolog; |
1233
|
|
|
|
|
|
|
506.0 1.129595e-142 [vsp07_trist ] 1 Venom serine protease KN7 homolog; |
1234
|
|
|
|
|
|
|
488.0 2.961165e-137 [vsp2_trifl ] 1 Venom serine proteinase 2; |
1235
|
|
|
|
|
|
|
487.0 5.922331e-137 [vsp1_trije ] 1 Venom serine proteinase-like protein; |
1236
|
|
|
|
|
|
|
456.0 1.271811e-127 [vsp04_trist ] 1 Venom serine protease KN4 homolog; |
1237
|
|
|
|
|
|
|
... |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
You can also use provided script C that polls for you (if |
1240
|
|
|
|
|
|
|
you wish so). |
1241
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
In order to create an C instance, use the factory method: |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
my $blast = $client->blast; |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
=head3 run |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
The main method that starts Blast with the given parameters and |
1249
|
|
|
|
|
|
|
immediately returns an object C that can be |
1250
|
|
|
|
|
|
|
used for all other important methods. If you plan to stop your Perl |
1251
|
|
|
|
|
|
|
program and start it again later, you need to remember the job ID: |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
my $job = $blast->run (...); |
1254
|
|
|
|
|
|
|
print $job->id; |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
The job ID can be later used to re-create the same (well, similar) Job |
1257
|
|
|
|
|
|
|
object (see method I below) that again provides all important |
1258
|
|
|
|
|
|
|
methods (such as getting results). |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
The method I has following arguments (the Job object has the same |
1261
|
|
|
|
|
|
|
"getter" methods), all given as a hash: |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
=over |
1264
|
|
|
|
|
|
|
|
1265
|
|
|
|
|
|
|
=item db |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
An MRS databank to search against. Mandatory parameter. |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
=item fasta |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
A protein sequence in a FASTA format. Mandatory parameter unless |
1272
|
|
|
|
|
|
|
C is given. |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=item fasta_file |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
A name of a file containing a protein sequence in a FASTA |
1277
|
|
|
|
|
|
|
format. Mandatory parameter unless C is given. |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
=item filter |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
Low complexity filter. Boolean parameter. Default is 1. |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
=item expect |
1284
|
|
|
|
|
|
|
|
1285
|
|
|
|
|
|
|
E-value cutoff. A float value. Default is 10.0. |
1286
|
|
|
|
|
|
|
|
1287
|
|
|
|
|
|
|
=item word_size |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
An integer. Default is 3. |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=item matrix |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
Scoring matrix. Default BLOSUM62. |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=item open_cost |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
Gap opening penalty. An integer. Default is 11. |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
=item extend_cost |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
Gap extension penalty. Default is 1. |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=item query |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
An MRS boolean query to limit the search space. |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
=item gapped |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
A boolean parameter. Its true value performs gapped alignment. Default |
1310
|
|
|
|
|
|
|
is true. |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
=item max_hits |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
Limit reported hits. An integer. Default is 250. |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
=back |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
=head3 job |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
The method finds or re-creates a Job object of the given ID: |
1321
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
my $job = $client->blast->job ('0f37a544-a7a2-4239-b950-65a6aa07d1ef'); |
1323
|
|
|
|
|
|
|
print $job->id; |
1324
|
|
|
|
|
|
|
print $job->status; |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
It dies with an error if such Job is not known to the MRS server. |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
The returned Job object can be used to ask for the Job status, or for |
1329
|
|
|
|
|
|
|
getting the Job results. There is one caveat, however. The re-created |
1330
|
|
|
|
|
|
|
Job object is not that "rich" as was its original version: it does not |
1331
|
|
|
|
|
|
|
know, for example, what parameters were used to start this blast |
1332
|
|
|
|
|
|
|
job. Unfortunately, the MRS server keeps only the Job ID and nothing |
1333
|
|
|
|
|
|
|
else. Fortunately, the parameters are needed only for the results in |
1334
|
|
|
|
|
|
|
the XML format (see more about available formats below, in the method |
1335
|
|
|
|
|
|
|
I<$job-Eresults>) - and you can add them (if you still have them), as a |
1336
|
|
|
|
|
|
|
hash, to the C method when re-creating a new Job instance: |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
my $job - $client->blast->job ('0f37a544-a7a2-4239-b950-65a6aa07d1ef', |
1339
|
|
|
|
|
|
|
fasta => '...', |
1340
|
|
|
|
|
|
|
db => 'iniprot', ...); |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=head2 MRS::Client::Blast::Job |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
The Job object represents a single Blast invocation with a set of |
1345
|
|
|
|
|
|
|
input parameters and, later, with results. It is also used to poll for |
1346
|
|
|
|
|
|
|
the status of the running job. Instances of this objects are created |
1347
|
|
|
|
|
|
|
by the I or I methods of the C object. The Job's |
1348
|
|
|
|
|
|
|
methods are: |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
=over |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
=item id |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
Job ID, an important handler if you have to re-create an |
1355
|
|
|
|
|
|
|
C object. |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
=item "getter" methods |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
All these methods are equivalent to (and named the same as) the |
1360
|
|
|
|
|
|
|
parameters given to the C method (described above): |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
=over |
1363
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
=item db |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
=item fasta |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
=item fasta_file |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
=item filter |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
=item expect |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
=item word_size |
1375
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
=item matrix |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
=item open_cost |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
=item extend_cost |
1381
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
=item query |
1383
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
=item max_hits |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
=item gapped |
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
=item |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
=back |
1391
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
=item status, completed, failed |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
The I returns one of the C: |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
use constant { |
1397
|
|
|
|
|
|
|
UNKNOWN => 'unknown', |
1398
|
|
|
|
|
|
|
QUEUED => 'queued', |
1399
|
|
|
|
|
|
|
RUNNING => 'running', |
1400
|
|
|
|
|
|
|
ERROR => 'error', |
1401
|
|
|
|
|
|
|
FINISHED => 'finished', |
1402
|
|
|
|
|
|
|
}; |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
The I returns true if the status is either C or |
1405
|
|
|
|
|
|
|
C. The I returns true if the status is |
1406
|
|
|
|
|
|
|
C. Typical usage for polling a running job is: |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
sleep 10 while (not $job->completed); |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
=item error |
1411
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
It returns an error message, or undef if the status is not |
1413
|
|
|
|
|
|
|
C. Typical usage is: |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
print $job->error if $job->failed; |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
=item results |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
Finally, the more interesting method. It returns an object of type |
1420
|
|
|
|
|
|
|
C that can be either used on its own (see |
1421
|
|
|
|
|
|
|
its "getter" method below), or converted to strings of one of the |
1422
|
|
|
|
|
|
|
format predefined in C: |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
use constant { |
1425
|
|
|
|
|
|
|
XML => 'xml', |
1426
|
|
|
|
|
|
|
HITS => 'hits', |
1427
|
|
|
|
|
|
|
FULL => 'full', |
1428
|
|
|
|
|
|
|
STATS => 'stats', |
1429
|
|
|
|
|
|
|
}; |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
The format is the only parameter of this method. Default format is |
1432
|
|
|
|
|
|
|
C. The conversion to the given format is done by overloading the |
1433
|
|
|
|
|
|
|
double quotes operator, calling internally the method "as_string". You |
1434
|
|
|
|
|
|
|
just print the object: |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
print $job->results; |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
447.0 6.511672e-125 [vspgl_glosh ] 1 Thrombin-like enzyme gloshedobin; |
1439
|
|
|
|
|
|
|
429.0 1.706996e-119 [vsp2_viple ] 1 Venom serine proteinase-like protein 2; |
1440
|
|
|
|
|
|
|
421.0 4.369909e-117 [vsp12_trist ] 1 Venom serine protease KN12; |
1441
|
|
|
|
|
|
|
419.0 1.747964e-116 [vsps1_trist ] 1 Thrombin-like enzyme stejnefibrase-1; |
1442
|
|
|
|
|
|
|
... |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
Where lines are individual hits and columns are: I, |
1445
|
|
|
|
|
|
|
I, sequence ID, number of HSPs for this hit, sequence |
1446
|
|
|
|
|
|
|
description. |
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
Or, giving just the Blast run statistics: |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
print $job->results (MRS::BlastOutputFormat->STATS); |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
DB count: 514212 |
1453
|
|
|
|
|
|
|
DB length: 180900945 |
1454
|
|
|
|
|
|
|
Search space: 23664675636 |
1455
|
|
|
|
|
|
|
Kappa: 0.041 |
1456
|
|
|
|
|
|
|
Lambda: 0.267 |
1457
|
|
|
|
|
|
|
Entropy: 0.140 |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
Or, showing everything (in a rather un-parsable form, useful more for |
1460
|
|
|
|
|
|
|
testing than anything else): |
1461
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
print $job->results (MRS::BlastOutputFormat->FULL); |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
Or, in an XML format: |
1465
|
|
|
|
|
|
|
|
1466
|
|
|
|
|
|
|
print $job->results (MRS::BlastOutputFormat->XML); |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
=back |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
=head2 MRS::Client::Blast::Result |
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
You can explore the returned Blast results by the following "getter" |
1473
|
|
|
|
|
|
|
methods - going from the whole result to the individual hits and |
1474
|
|
|
|
|
|
|
inside hits to the individual HSPs (High-scoring pairs): |
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=over |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
=item db_count |
1479
|
|
|
|
|
|
|
|
1480
|
|
|
|
|
|
|
=item db_length |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
=item db_space |
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
Effective search space. |
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
=item kappa |
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
=item lambda |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
=item entropy |
1491
|
|
|
|
|
|
|
|
1492
|
|
|
|
|
|
|
=item hits |
1493
|
|
|
|
|
|
|
|
1494
|
|
|
|
|
|
|
It returns a reference to an array of Cs |
1495
|
|
|
|
|
|
|
where each hit has methods: |
1496
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
=over |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=item id |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
=item title |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
=item sequences |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
It is a reference to an array of sequence IDs. |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
=item hsps |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
It is a reference to an array of Cs |
1510
|
|
|
|
|
|
|
where each HSP has methods: |
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
=over |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
=item score |
1515
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
=item bit_score |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
=item expect |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
=item query_start |
1521
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
=item subject_start |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
=item identity |
1525
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
=item positive |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
=item gaps |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
=item subject_length |
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
=item query_align |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
=item subject_align |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
=item midline |
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
=back |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
=back |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
=back |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
Try to explore various result formats by using the provided script |
1545
|
|
|
|
|
|
|
C. This waits for a job to be completed and then prints its |
1546
|
|
|
|
|
|
|
hits: |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
mrsblast -d sprot -i 'your.fasta' |
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
This shows Blast statistics: |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
mrsblast -d sprot -i 'your.fasta' -N |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
This produces an XML output to a given file: |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
mrsblast -d sprot -i 'your.fasta' -x results.xml |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
Finally, this gives a long listing with all details: |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
mrsblast -d sprot -i 'your.fasta' -f |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
=head2 MRS::Client::Clustal |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
B This module is used only by MRS version 5, |
1565
|
|
|
|
|
|
|
See L for details. |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
The module wrapping the multiple alignment program I. The |
1568
|
|
|
|
|
|
|
program is optional and, therefore, not all MRS servers may have |
1569
|
|
|
|
|
|
|
it. Use the factory method for creating instances of |
1570
|
|
|
|
|
|
|
F: |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
$client->clustal |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
=head3 run |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
The main method, invoking I with mandatory input sequences |
1577
|
|
|
|
|
|
|
and optionally a couple of other parameters: |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
my $result = $client->clustal->run (fasta_file => 'my.proteins.fasta'); |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
=over |
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
=item fasta_file |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
A file with multiple sequences in FASTA format. |
1586
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
=item open_cost |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
A gap opening penalty (an integer). |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=item extend_cost |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
A gap extension penalty (a float). |
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
=back |
1596
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
It returns result in an instance of F. |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
=head3 open_cost |
1600
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
It returns what gap opening penalty has been set in the I method. |
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=head3 extend_cost |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
It returns what gap extension penalty has been set in the I method. |
1606
|
|
|
|
|
|
|
|
1607
|
|
|
|
|
|
|
=head2 MRS::Client::Clustal::Result |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
It is created by running: |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
$client->clustal->run (...); |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
=head3 alignment |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
It returns a reference to an array of |
1616
|
|
|
|
|
|
|
F instances. Each of them has methods |
1617
|
|
|
|
|
|
|
I and I. You can also just print the formatted alignment |
1618
|
|
|
|
|
|
|
(it uses its own I method that overloads double quotes |
1619
|
|
|
|
|
|
|
operator): |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
print $client->clustal->run (fasta_file => 'several.proteins.fasta'); |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
vsph_trije : -VMGWGTISATKETHPDVPYCANINILDYSVCRAAYARLPATSRTLCAGILE-----GGKDSCLTD----SGGPLICNGQFQGIVSWGGHPCGQP-RKPGLYTKVFDHLDWIKSIIAGNKDATCPP |
1624
|
|
|
|
|
|
|
nxsa_latse : ----MKTLLLTLVVVTIV--CLDLGYTR--ICFNHQSSQPQTTKT-CS---------PGESSCYNK----QWS------DFRGTIIERG--CGCPTVKPGI------KLSCCESEVCNN------- |
1625
|
|
|
|
|
|
|
pa21b_pseau: NLIQFGNMIQCANKGSRP--SLDYADYG-CYCGWGGSGTPVDELDRCCQVHDNCYEQAGKKGCFPKLTLYSWKCTGNVPTCNSKPGCKSFVCACDAAAAKC----FAKAPYKKENYNIDTKKRCK- |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
=head3 diagnostics |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
It shows the standard output of the underlying F program: |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
my $result = $client->clustal->run (fasta_file => 'several.proteins.fasta'); |
1632
|
|
|
|
|
|
|
print $result->diagnostics; |
1633
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
CLUSTAL 2.0.10 Multiple Sequence Alignments |
1635
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
Sequence type explicitly set to Protein |
1637
|
|
|
|
|
|
|
Sequence format is Pearson |
1638
|
|
|
|
|
|
|
Sequence 1: vsph_trije 115 aa |
1639
|
|
|
|
|
|
|
Sequence 2: nxsa_latse 83 aa |
1640
|
|
|
|
|
|
|
Sequence 3: pa21b_pseau 118 aa |
1641
|
|
|
|
|
|
|
Start of Pairwise alignments |
1642
|
|
|
|
|
|
|
Aligning... |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
Sequences (1:2) Aligned. Score: 13 |
1645
|
|
|
|
|
|
|
Sequences (1:3) Aligned. Score: 5 |
1646
|
|
|
|
|
|
|
Sequences (2:3) Aligned. Score: 8 |
1647
|
|
|
|
|
|
|
Guide tree file created: ... |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
There are 2 groups |
1650
|
|
|
|
|
|
|
Start of Multiple Alignment |
1651
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
Aligning... |
1653
|
|
|
|
|
|
|
Group 1: Delayed |
1654
|
|
|
|
|
|
|
Group 2: Delayed |
1655
|
|
|
|
|
|
|
Alignment Score -93 |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
GDE-Alignment file created ... |
1658
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
=head3 failed |
1660
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
It returns standard error output of the underlying F |
1662
|
|
|
|
|
|
|
program. It the program finished without problems, it returns undef. |
1663
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
=head1 MRS VERSIONS |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
The SOAP API of the MRS server slightly (or significantly, depending |
1667
|
|
|
|
|
|
|
on what you were using) changed between version 5 and 6 (the version |
1668
|
|
|
|
|
|
|
numbers indicate the MRS server version, not the version of the |
1669
|
|
|
|
|
|
|
C module). The C module can work with both |
1670
|
|
|
|
|
|
|
MRS server versions, but sometimes you have to tell what version you |
1671
|
|
|
|
|
|
|
are planning to connect to. |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
=head3 new parameter C |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
By default, the C assumes that it connects to an MRS |
1676
|
|
|
|
|
|
|
server version 6 (or higher). But for MRS servers version 5 you need |
1677
|
|
|
|
|
|
|
to add a new argument B to the client instance |
1678
|
|
|
|
|
|
|
constructor with a value that differs from 6 (and it not zero or |
1679
|
|
|
|
|
|
|
undef): |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
my $client = MRS::Client->new (mrs_version => 5, host => '...'); |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
You can also set the expected version by an environment variable |
1684
|
|
|
|
|
|
|
C: |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
$ENV{MRS_VERSION} = 5; |
1687
|
|
|
|
|
|
|
my $client = MRS::Client->new (host => '...'); |
1688
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
You can also check what version your client is talking to, by a new |
1690
|
|
|
|
|
|
|
method B (mostly used rather internally): |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
$client->is_v6() # returns 1 or 0 |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
The command-line tool C got an additional parameter B<-V>: |
1695
|
|
|
|
|
|
|
|
1696
|
|
|
|
|
|
|
mrsclient -V5 -H... -l |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
=head3 missing some result formats |
1699
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
The MRS 6 server does not support anymore B and B |
1701
|
|
|
|
|
|
|
result formats. The C format does not matter much because |
1702
|
|
|
|
|
|
|
the C format continues to be provided and it is easy to get the |
1703
|
|
|
|
|
|
|
pure sequence from it. But the lack of the C format is probably |
1704
|
|
|
|
|
|
|
the most significant (downgrade) change. |
1705
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
=head3 search algorithm not supported |
1707
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
The MRS 6 server does not accept anymore requests for different search |
1709
|
|
|
|
|
|
|
algorithms; it uses always the B algorithm. |
1710
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
=head3 no ClustalW service |
1712
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
The MRS 6 server does not provide multiple sequence alignment |
1714
|
|
|
|
|
|
|
service. All remarks about ClustalW in this document are, therefore, |
1715
|
|
|
|
|
|
|
valid only for the MRS 5. |
1716
|
|
|
|
|
|
|
|
1717
|
|
|
|
|
|
|
=head3 aliases |
1718
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
The MRS 6 brings a new concept: I. An alias is a set of |
1720
|
|
|
|
|
|
|
databases, usually closely related. A typical example is an alias |
1721
|
|
|
|
|
|
|
C that combines together two databases, the C |
1722
|
|
|
|
|
|
|
(SwissProt) and C (TrEMBL). You can use an alias in all places |
1723
|
|
|
|
|
|
|
where so far only database IDs were possible. |
1724
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
However, the list of databases returned by the "db()" method does not |
1726
|
|
|
|
|
|
|
include the aliases. You need to ask individual databases for their |
1727
|
|
|
|
|
|
|
aliases: |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
$client->db('sprot')->aliases(); |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
=head1 MISSING FEATURES, CAVEATS, BUGS |
1732
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
=over |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
=item * |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
The MRS distinguishes between so-called I and |
1738
|
|
|
|
|
|
|
I, and it recognizes also I. I |
1739
|
|
|
|
|
|
|
probably need to learn more about their differences. That's why you |
1740
|
|
|
|
|
|
|
may see some differences in query results shown by this module and the |
1741
|
|
|
|
|
|
|
B web application (an application distributed together with |
1742
|
|
|
|
|
|
|
the implementation of the MRS servers). |
1743
|
|
|
|
|
|
|
|
1744
|
|
|
|
|
|
|
The contents of the search field in the I is first parsed in |
1745
|
|
|
|
|
|
|
order to find out if it is a boolean expression, or not. Depending on |
1746
|
|
|
|
|
|
|
the result it uses either a ranked or boolean query. It also splits |
1747
|
|
|
|
|
|
|
the terms and combine them (by default) with the logical AND. For |
1748
|
|
|
|
|
|
|
example, in I if you type (using the F): |
1749
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
cone snail |
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
you get 134 entries. You get the same number of hits by the |
1753
|
|
|
|
|
|
|
C module when using an I argument: |
1754
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
print $client->db('uniprot')->find ('and' => ['cone','snail'])->count; |
1756
|
|
|
|
|
|
|
134 |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
But you cannot just pass the whole expression as a query string (as |
1759
|
|
|
|
|
|
|
you do in I): |
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
print $client->db('uniprot')->find ('cone snail')->count; |
1762
|
|
|
|
|
|
|
0 |
1763
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
You get zero entries because the C considers the above as |
1765
|
|
|
|
|
|
|
one term. And if you add a boolean operator: |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
print $client->db('uniprot')->find ('cone AND snail')->count; |
1768
|
|
|
|
|
|
|
4609 |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
then the boolean query was used and, as explained by the MRS, the |
1771
|
|
|
|
|
|
|
"query did not return an exact result, displaying the closest |
1772
|
|
|
|
|
|
|
matches". But, fortunately, when you iterate over this result, you |
1773
|
|
|
|
|
|
|
will get, correctly, just the 134 entries. |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
=item * |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
The MRS servers provide few more operations that are not-yet covered |
1778
|
|
|
|
|
|
|
by this module. It would be useful to discuss which of those are worth |
1779
|
|
|
|
|
|
|
to implement. They are: |
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
GetMetaData |
1782
|
|
|
|
|
|
|
FindSimilar |
1783
|
|
|
|
|
|
|
GetLinked |
1784
|
|
|
|
|
|
|
Cooccurrence |
1785
|
|
|
|
|
|
|
SpellCheck |
1786
|
|
|
|
|
|
|
SuggestSearchTerms |
1787
|
|
|
|
|
|
|
CompareDocuments |
1788
|
|
|
|
|
|
|
ClusterDocuments |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
There is also a potentially useful attribute I in the |
1791
|
|
|
|
|
|
|
databank's info which has not been yet explored by this module. |
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
=back |
1794
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
=head1 ADDITIONAL FILES |
1796
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
Almost all functionality of the C module is also |
1798
|
|
|
|
|
|
|
available from a command-line controlled scripts F, |
1799
|
|
|
|
|
|
|
F and F. Try , for example: |
1800
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
mrsclient -h |
1802
|
|
|
|
|
|
|
mrsclient -C |
1803
|
|
|
|
|
|
|
mrsclient -c -n insulin |
1804
|
|
|
|
|
|
|
mrsclient -c -p -d enzyme -a 'endothelin tyrosine' |
1805
|
|
|
|
|
|
|
mrsblast -h |
1806
|
|
|
|
|
|
|
mrsclustal -h |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
The C module uses the following modules: |
1811
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
XML::Compile::SOAP11 |
1813
|
|
|
|
|
|
|
XML::Compile::WSDL11 |
1814
|
|
|
|
|
|
|
XML::Compile::Transport::SOAPHTTP |
1815
|
|
|
|
|
|
|
File::Basename |
1816
|
|
|
|
|
|
|
File::Path |
1817
|
|
|
|
|
|
|
Math::BigInt |
1818
|
|
|
|
|
|
|
FindBin |
1819
|
|
|
|
|
|
|
Getopt::Std |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
=head1 BUGS |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
Please report any bugs or feature requests to |
1824
|
|
|
|
|
|
|
L. |
1825
|
|
|
|
|
|
|
|
1826
|
|
|
|
|
|
|
=head1 ACKNOWLEDGMENTS |
1827
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
This client module would be useless without having an MRS server |
1829
|
|
|
|
|
|
|
(e.g. at F). The MRS stands for |
1830
|
|
|
|
|
|
|
B and was developed (and is maintained) by |
1831
|
|
|
|
|
|
|
I at the CMBI (F), with the |
1832
|
|
|
|
|
|
|
help and contributions from many others. |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
The MRS itself has also its own Perl module F, called plugin |
1835
|
|
|
|
|
|
|
and distributed together with the MRS, that accesses MRS server(s) |
1836
|
|
|
|
|
|
|
directly, without using the SOAP Web Services protocol. The plugin |
1837
|
|
|
|
|
|
|
was helpful to find out what the server might expect. |
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
Additionally, the MRS distribution has few testing scripts that use |
1840
|
|
|
|
|
|
|
SOAP protocol to access data in the same way as this C |
1841
|
|
|
|
|
|
|
module does. Therefore, this module can be seen as an extension of |
1842
|
|
|
|
|
|
|
these testing scripts into a slightly more comprehensive and perhaps |
1843
|
|
|
|
|
|
|
more documented package. |
1844
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
The MRS server provides Blast results that are not in XML. In order to |
1846
|
|
|
|
|
|
|
make an XML output, this module uses, hopefully, the same format and |
1847
|
|
|
|
|
|
|
conversion as found in the MRS web application. |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
=head1 AUTHOR |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
Martin Senger |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
1854
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
This software is copyright (c) 2013 by Martin Senger, CBRC - KAUST (Computational Biology Research Center - King Abdullah University of Science and Technology) All Rights Reserved.. |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
1858
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
=cut |
1861
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
__END__ |