line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::DB::WebDBSeqI |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Jason Stajich |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::DB::WebDBSeqI - Object Interface to generalize Web Databases |
18
|
|
|
|
|
|
|
for retrieving sequences |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# get a WebDBSeqI object somehow |
23
|
|
|
|
|
|
|
# assuming it is a nucleotide db |
24
|
|
|
|
|
|
|
my $seq = $db->get_Seq_by_id('ROA1_HUMAN') |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 DESCRIPTION |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Provides core set of functionality for connecting to a web based |
29
|
|
|
|
|
|
|
database for retriving sequences. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Users wishing to add another Web Based Sequence Dabatase will need to |
32
|
|
|
|
|
|
|
extend this class (see L or L for |
33
|
|
|
|
|
|
|
examples) and implement the get_request method which returns a |
34
|
|
|
|
|
|
|
HTTP::Request for the specified uids (accessions, ids, etc depending |
35
|
|
|
|
|
|
|
on what query types the database accepts). |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 FEEDBACK |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head2 Mailing Lists |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
User feedback is an integral part of the |
42
|
|
|
|
|
|
|
evolution of this and other Bioperl modules. Send |
43
|
|
|
|
|
|
|
your comments and suggestions preferably to one |
44
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation |
45
|
|
|
|
|
|
|
is much appreciated. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
48
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Support |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
I |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
57
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
58
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
59
|
|
|
|
|
|
|
with code and data examples if at all possible. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head2 Reporting Bugs |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to |
64
|
|
|
|
|
|
|
help us keep track the bugs and their resolution. |
65
|
|
|
|
|
|
|
Bug reports can be submitted via the web. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Email E jason@bioperl.org E |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head1 APPENDIX |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
The rest of the documentation details each of the |
76
|
|
|
|
|
|
|
object methods. Internal methods are usually |
77
|
|
|
|
|
|
|
preceded with a _ |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Let the code begin... |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
package Bio::DB::WebDBSeqI; |
84
|
7
|
|
|
7
|
|
76
|
use strict; |
|
7
|
|
|
|
|
11
|
|
|
7
|
|
|
|
|
187
|
|
85
|
7
|
|
|
|
|
375
|
use vars qw($MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE |
86
|
7
|
|
|
7
|
|
21
|
$DEFAULTFORMAT $LAST_INVOCATION_TIME @ATTRIBUTES); |
|
7
|
|
|
|
|
8
|
|
87
|
|
|
|
|
|
|
|
88
|
7
|
|
|
7
|
|
1201
|
use Bio::SeqIO; |
|
7
|
|
|
|
|
9
|
|
|
7
|
|
|
|
|
167
|
|
89
|
7
|
|
|
7
|
|
26
|
use Bio::Root::IO; |
|
7
|
|
|
|
|
8
|
|
|
7
|
|
|
|
|
102
|
|
90
|
7
|
|
|
7
|
|
1570
|
use LWP::UserAgent; |
|
7
|
|
|
|
|
42423
|
|
|
7
|
|
|
|
|
151
|
|
91
|
7
|
|
|
7
|
|
2945
|
use POSIX 'setsid'; |
|
7
|
|
|
|
|
31783
|
|
|
7
|
|
|
|
|
32
|
|
92
|
7
|
|
|
7
|
|
5777
|
use HTTP::Request::Common; |
|
7
|
|
|
|
|
8
|
|
|
7
|
|
|
|
|
315
|
|
93
|
7
|
|
|
7
|
|
26
|
use HTTP::Response; |
|
7
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
163
|
|
94
|
7
|
|
|
7
|
|
25
|
use File::Spec; |
|
7
|
|
|
|
|
10
|
|
|
7
|
|
|
|
|
173
|
|
95
|
7
|
|
|
7
|
|
2838
|
use IO::Pipe; |
|
7
|
|
|
|
|
5787
|
|
|
7
|
|
|
|
|
162
|
|
96
|
7
|
|
|
7
|
|
1227
|
use IO::String; |
|
7
|
|
|
|
|
5095
|
|
|
7
|
|
|
|
|
137
|
|
97
|
7
|
|
|
7
|
|
30
|
use Bio::Root::Root; |
|
7
|
|
|
|
|
10
|
|
|
7
|
|
|
|
|
133
|
|
98
|
|
|
|
|
|
|
|
99
|
7
|
|
|
7
|
|
21
|
use base qw(Bio::DB::RandomAccessI); |
|
7
|
|
|
|
|
8
|
|
|
7
|
|
|
|
|
1707
|
|
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
BEGIN { |
102
|
7
|
|
|
7
|
|
14
|
$MODVERSION = '0.8'; |
103
|
7
|
|
|
|
|
23
|
%RETRIEVAL_TYPES = ('io_string' => 1, |
104
|
|
|
|
|
|
|
'tempfile' => 1, |
105
|
|
|
|
|
|
|
'pipeline' => 1, |
106
|
|
|
|
|
|
|
); |
107
|
7
|
|
|
|
|
8
|
$DEFAULT_RETRIEVAL_TYPE = 'pipeline'; |
108
|
7
|
|
|
|
|
7
|
$DEFAULTFORMAT = 'fasta'; |
109
|
7
|
|
|
|
|
15128
|
$LAST_INVOCATION_TIME = 0; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub new { |
113
|
3
|
|
|
3
|
1
|
6
|
my ($class, @args) = @_; |
114
|
3
|
|
|
|
|
22
|
my $self = $class->SUPER::new(@args); |
115
|
3
|
|
|
|
|
28
|
my ($baseaddress, $params, $ret_type, $format,$delay,$db) = |
116
|
|
|
|
|
|
|
$self->_rearrange([qw(BASEADDRESS PARAMS RETRIEVALTYPE FORMAT DELAY DB)], |
117
|
|
|
|
|
|
|
@args); |
118
|
|
|
|
|
|
|
|
119
|
3
|
50
|
|
|
|
12
|
$ret_type = $DEFAULT_RETRIEVAL_TYPE unless ( $ret_type); |
120
|
3
|
50
|
|
|
|
8
|
$baseaddress && $self->url_base_address($baseaddress); |
121
|
3
|
50
|
|
|
|
6
|
$params && $self->url_params($params); |
122
|
3
|
50
|
|
|
|
7
|
$db && $self->db($db); |
123
|
3
|
50
|
|
|
|
22
|
$ret_type && $self->retrieval_type($ret_type); |
124
|
3
|
50
|
|
|
|
14
|
$delay = $self->delay_policy unless defined $delay; |
125
|
3
|
|
|
|
|
12
|
$self->delay($delay); |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# insure we always have a default format set for retrieval |
129
|
|
|
|
|
|
|
# even though this will be immedietly overwritten by most sub classes |
130
|
3
|
50
|
33
|
|
|
17
|
$format = $self->default_format unless ( defined $format && |
131
|
|
|
|
|
|
|
$format ne '' ); |
132
|
|
|
|
|
|
|
|
133
|
3
|
|
|
|
|
13
|
$self->request_format($format); |
134
|
3
|
|
|
|
|
20
|
my $ua = LWP::UserAgent->new(env_proxy => 1); |
135
|
3
|
|
|
|
|
22638
|
$ua->agent(ref($self) ."/$MODVERSION"); |
136
|
3
|
|
|
|
|
125
|
$self->ua($ua); |
137
|
3
|
|
|
|
|
6
|
$self->{'_authentication'} = []; |
138
|
3
|
|
|
|
|
8
|
return $self; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# from Bio::DB::RandomAccessI |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head2 get_Seq_by_id |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Title : get_Seq_by_id |
146
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') |
147
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by its name |
148
|
|
|
|
|
|
|
Returns : a Bio::Seq object |
149
|
|
|
|
|
|
|
Args : the id (as a string) of a sequence |
150
|
|
|
|
|
|
|
Throws : "id does not exist" exception |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub get_Seq_by_id { |
156
|
0
|
|
|
0
|
1
|
0
|
my ($self,$seqid) = @_; |
157
|
0
|
|
|
|
|
0
|
$self->_sleep; |
158
|
0
|
|
|
|
|
0
|
my $seqio = $self->get_Stream_by_id([$seqid]); |
159
|
0
|
0
|
|
|
|
0
|
$self->throw("id does not exist") if( !defined $seqio ) ; |
160
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
0
|
|
|
|
|
161
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_id\n". |
162
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
163
|
0
|
|
|
|
|
0
|
return $seqio; |
164
|
|
|
|
|
|
|
} |
165
|
0
|
|
|
|
|
0
|
my @seqs; |
166
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
0
|
|
|
|
|
0
|
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# Since $seqio will not be used anymore, explicitly close its filehandle |
169
|
|
|
|
|
|
|
# or it will cause trouble later on cleanup |
170
|
0
|
|
|
|
|
0
|
$seqio->close; |
171
|
|
|
|
|
|
|
|
172
|
0
|
0
|
|
|
|
0
|
$self->throw("id '$seqid' does not exist") unless @seqs; |
173
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 get_Seq_by_acc |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Title : get_Seq_by_acc |
179
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_acc('X77802'); |
180
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by accession number |
181
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
182
|
|
|
|
|
|
|
Args : accession number (as a string) |
183
|
|
|
|
|
|
|
Throws : "acc does not exist" exception |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=cut |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub get_Seq_by_acc { |
188
|
1
|
|
|
1
|
1
|
5
|
my ($self,$seqid) = @_; |
189
|
1
|
|
|
|
|
5
|
$self->_sleep; |
190
|
1
|
|
|
|
|
8
|
my $seqio = $self->get_Stream_by_acc($seqid); |
191
|
0
|
0
|
|
|
|
0
|
$self->throw("acc '$seqid' does not exist") if( ! defined $seqio ); |
192
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
0
|
|
|
|
|
193
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_acc\n". |
194
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
195
|
0
|
|
|
|
|
0
|
return $seqio; |
196
|
|
|
|
|
|
|
} |
197
|
0
|
|
|
|
|
0
|
my @seqs; |
198
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
0
|
|
|
|
|
0
|
|
199
|
0
|
0
|
|
|
|
0
|
$self->throw("acc $seqid does not exist") unless @seqs; |
200
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head2 get_Seq_by_gi |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
Title : get_Seq_by_gi |
207
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_gi('405830'); |
208
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by gi number |
209
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
210
|
|
|
|
|
|
|
Args : gi number (as a string) |
211
|
|
|
|
|
|
|
Throws : "gi does not exist" exception |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=cut |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub get_Seq_by_gi { |
216
|
0
|
|
|
0
|
1
|
0
|
my ($self,$seqid) = @_; |
217
|
0
|
|
|
|
|
0
|
$self->_sleep; |
218
|
0
|
|
|
|
|
0
|
my $seqio = $self->get_Stream_by_gi($seqid); |
219
|
0
|
0
|
|
|
|
0
|
$self->throw("gi does not exist") if( !defined $seqio ); |
220
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
0
|
|
|
|
|
221
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_gi\n". |
222
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
223
|
0
|
|
|
|
|
0
|
return $seqio; |
224
|
|
|
|
|
|
|
} |
225
|
0
|
|
|
|
|
0
|
my @seqs; |
226
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
0
|
|
|
|
|
0
|
|
227
|
0
|
0
|
|
|
|
0
|
$self->throw("gi does not exist") unless @seqs; |
228
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 get_Seq_by_version |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Title : get_Seq_by_version |
234
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_version('X77802.1'); |
235
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by sequence version |
236
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
237
|
|
|
|
|
|
|
Args : accession.version (as a string) |
238
|
|
|
|
|
|
|
Throws : "acc.version does not exist" exception |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub get_Seq_by_version { |
243
|
0
|
|
|
0
|
1
|
0
|
my ($self,$seqid) = @_; |
244
|
0
|
|
|
|
|
0
|
$self->_sleep; |
245
|
0
|
|
|
|
|
0
|
my $seqio = $self->get_Stream_by_version($seqid); |
246
|
0
|
0
|
|
|
|
0
|
$self->throw("accession.version does not exist") if( !defined $seqio ); |
247
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
0
|
|
|
|
|
248
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_version\n". |
249
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
250
|
0
|
|
|
|
|
0
|
return $seqio; |
251
|
|
|
|
|
|
|
} |
252
|
0
|
|
|
|
|
0
|
my @seqs; |
253
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
0
|
|
|
|
|
0
|
|
254
|
0
|
0
|
|
|
|
0
|
$self->throw("accession.version does not exist") unless @seqs; |
255
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# implementing class must define these |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 get_request |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : get_request |
263
|
|
|
|
|
|
|
Usage : my $url = $self->get_request |
264
|
|
|
|
|
|
|
Function: returns a HTTP::Request object |
265
|
|
|
|
|
|
|
Returns : |
266
|
|
|
|
|
|
|
Args : %qualifiers = a hash of qualifiers (ids, format, etc) |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub get_request { |
271
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
272
|
0
|
|
|
|
|
0
|
my $msg = "Implementing class must define method get_request in class WebDBSeqI"; |
273
|
0
|
|
|
|
|
0
|
$self->throw($msg); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# class methods |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head2 get_Stream_by_id |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
Title : get_Stream_by_id |
281
|
|
|
|
|
|
|
Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] ); |
282
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by unique identifiers |
283
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
284
|
|
|
|
|
|
|
Args : $ref : a reference to an array of unique identifiers for |
285
|
|
|
|
|
|
|
the desired sequence entries |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=cut |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub get_Stream_by_id { |
291
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids) = @_; |
292
|
0
|
|
|
|
|
0
|
my ($webfmt,$localfmt) = $self->request_format; |
293
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single', |
294
|
|
|
|
|
|
|
'-format' => $webfmt); |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
*get_Stream_by_batch = sub { |
298
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
299
|
0
|
|
|
|
|
0
|
$self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead'); |
300
|
0
|
|
|
|
|
0
|
$self->get_Stream_by_id(@_) |
301
|
|
|
|
|
|
|
}; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head2 get_Stream_by_acc |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Title : get_Stream_by_acc |
307
|
|
|
|
|
|
|
Usage : $seq = $db->get_Stream_by_acc([$acc1, $acc2]); |
308
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by accession numbers |
309
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
310
|
|
|
|
|
|
|
Args : $ref : a reference to an array of accession numbers for |
311
|
|
|
|
|
|
|
the desired sequence entries |
312
|
|
|
|
|
|
|
Note : For GenBank, this just calls the same code for get_Stream_by_id() |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=cut |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub get_Stream_by_acc { |
317
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids ) = @_; |
318
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single'); |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=head2 get_Stream_by_gi |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
Title : get_Stream_by_gi |
325
|
|
|
|
|
|
|
Usage : $seq = $db->get_Stream_by_gi([$gi1, $gi2]); |
326
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by gi numbers |
327
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
328
|
|
|
|
|
|
|
Args : $ref : a reference to an array of gi numbers for |
329
|
|
|
|
|
|
|
the desired sequence entries |
330
|
|
|
|
|
|
|
Note : For GenBank, this just calls the same code for get_Stream_by_id() |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=cut |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub get_Stream_by_gi { |
335
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids ) = @_; |
336
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'gi'); |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head2 get_Stream_by_version |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Title : get_Stream_by_version |
342
|
|
|
|
|
|
|
Usage : $seq = $db->get_Stream_by_version([$version1, $version2]); |
343
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by accession.versions |
344
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
345
|
|
|
|
|
|
|
Args : $ref : a reference to an array of accession.version strings for |
346
|
|
|
|
|
|
|
the desired sequence entries |
347
|
|
|
|
|
|
|
Note : For GenBank, this is implemeted in NCBIHelper |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=cut |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub get_Stream_by_version { |
352
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids ) = @_; |
353
|
|
|
|
|
|
|
# $self->throw("Implementing class should define this method!"); |
354
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'version'); # how it should work |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 get_Stream_by_query |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
Title : get_Stream_by_query |
360
|
|
|
|
|
|
|
Usage : $stream = $db->get_Stream_by_query($query); |
361
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by way of a query string or oject |
362
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
363
|
|
|
|
|
|
|
Args : $query : A string that uses the appropriate query language |
364
|
|
|
|
|
|
|
for the database or a Bio::DB::QueryI object. It is suggested |
365
|
|
|
|
|
|
|
that you create the Bio::DB::Query object first and interrogate |
366
|
|
|
|
|
|
|
it for the entry count before you fetch a potentially large stream. |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=cut |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
sub get_Stream_by_query { |
371
|
0
|
|
|
0
|
1
|
0
|
my ($self, $query ) = @_; |
372
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-query' => $query, '-mode'=>'query'); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=head2 default_format |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
Title : default_format |
378
|
|
|
|
|
|
|
Usage : my $format = $self->default_format |
379
|
|
|
|
|
|
|
Function: Returns default sequence format for this module |
380
|
|
|
|
|
|
|
Returns : string |
381
|
|
|
|
|
|
|
Args : none |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=cut |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
sub default_format { |
386
|
1
|
|
|
1
|
1
|
3
|
return $DEFAULTFORMAT; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# sorry, but this is hacked in because of BioFetch problems... |
390
|
|
|
|
|
|
|
sub db { |
391
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
392
|
0
|
|
|
|
|
0
|
my $d = $self->{_db}; |
393
|
0
|
0
|
|
|
|
0
|
$self->{_db} = shift if @_; |
394
|
0
|
|
|
|
|
0
|
$d; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head2 request_format |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Title : request_format |
400
|
|
|
|
|
|
|
Usage : my ($req_format, $ioformat) = $self->request_format; |
401
|
|
|
|
|
|
|
$self->request_format("genbank"); |
402
|
|
|
|
|
|
|
$self->request_format("fasta"); |
403
|
|
|
|
|
|
|
Function: Get/Set sequence format retrieval. The get-form will normally not |
404
|
|
|
|
|
|
|
be used outside of this and derived modules. |
405
|
|
|
|
|
|
|
Returns : Array of two strings, the first representing the format for |
406
|
|
|
|
|
|
|
retrieval, and the second specifying the corresponding SeqIO format. |
407
|
|
|
|
|
|
|
Args : $format = sequence format |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=cut |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
sub request_format { |
412
|
5
|
|
|
5
|
1
|
7
|
my ($self, $value) = @_; |
413
|
|
|
|
|
|
|
|
414
|
5
|
100
|
|
|
|
9
|
if( defined $value ) { |
415
|
2
|
|
|
|
|
5
|
$self->{'_format'} = [ $value, $value]; |
416
|
|
|
|
|
|
|
} |
417
|
5
|
|
|
|
|
4
|
return @{$self->{'_format'}}; |
|
5
|
|
|
|
|
13
|
|
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=head2 get_seq_stream |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Title : get_seq_stream |
423
|
|
|
|
|
|
|
Usage : my $seqio = $self->get_seq_stream(%qualifiers) |
424
|
|
|
|
|
|
|
Function: builds a url and queries a web db |
425
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream capable of producing sequence |
426
|
|
|
|
|
|
|
Args : %qualifiers = a hash qualifiers that the implementing class |
427
|
|
|
|
|
|
|
will process to make a url suitable for web querying |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=cut |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub get_seq_stream { |
432
|
0
|
|
|
0
|
1
|
0
|
my ($self, %qualifiers) = @_; |
433
|
0
|
|
|
|
|
0
|
my ($rformat, $ioformat) = $self->request_format(); |
434
|
0
|
|
|
|
|
0
|
my $seen = 0; |
435
|
0
|
|
|
|
|
0
|
foreach my $key ( keys %qualifiers ) { |
436
|
0
|
0
|
|
|
|
0
|
if( $key =~ /format/i ) { |
437
|
0
|
|
|
|
|
0
|
$rformat = $qualifiers{$key}; |
438
|
0
|
|
|
|
|
0
|
$seen = 1; |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
} |
441
|
0
|
0
|
|
|
|
0
|
$qualifiers{'-format'} = $rformat if( !$seen); |
442
|
0
|
|
|
|
|
0
|
($rformat, $ioformat) = $self->request_format($rformat); |
443
|
|
|
|
|
|
|
# These parameters are implemented for Bio::DB::GenBank objects only |
444
|
0
|
0
|
|
|
|
0
|
if($self->isa('Bio::DB::GenBank')) { |
445
|
0
|
0
|
|
|
|
0
|
$self->seq_start() && ($qualifiers{'-seq_start'} = $self->seq_start()); |
446
|
0
|
0
|
|
|
|
0
|
$self->seq_stop() && ($qualifiers{'-seq_stop'} = $self->seq_stop()); |
447
|
0
|
0
|
|
|
|
0
|
$self->strand() && ($qualifiers{'-strand'} = $self->strand()); |
448
|
0
|
0
|
|
|
|
0
|
defined $self->complexity() && ($qualifiers{'-complexity'} = $self->complexity()); |
449
|
|
|
|
|
|
|
} |
450
|
0
|
|
|
|
|
0
|
my $request = $self->get_request(%qualifiers); |
451
|
0
|
0
|
|
|
|
0
|
$request->proxy_authorization_basic($self->authentication) |
452
|
|
|
|
|
|
|
if ( $self->authentication); |
453
|
0
|
|
|
|
|
0
|
$self->debug("request is ". $request->as_string(). "\n"); |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# workaround for MSWin systems |
456
|
0
|
0
|
0
|
|
|
0
|
$self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/; |
457
|
|
|
|
|
|
|
|
458
|
0
|
0
|
|
|
|
0
|
if ($self->retrieval_type =~ /pipeline/) { |
459
|
|
|
|
|
|
|
# Try to create a stream using POSIX fork-and-pipe facility. |
460
|
|
|
|
|
|
|
# this is a *big* win when fetching thousands of sequences from |
461
|
|
|
|
|
|
|
# a web database because we can return the first entry while |
462
|
|
|
|
|
|
|
# transmission is still in progress. |
463
|
|
|
|
|
|
|
# Also, no need to keep sequence in memory or in a temporary file. |
464
|
|
|
|
|
|
|
# If this fails (Windows, MacOS 9), we fall back to non-pipelined access. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
# fork and pipe: _stream_request()=> |
467
|
0
|
|
|
|
|
0
|
my ($result,$stream) = $self->_open_pipe(); |
468
|
|
|
|
|
|
|
|
469
|
0
|
0
|
|
|
|
0
|
if (defined $result) { |
470
|
0
|
|
|
|
|
0
|
$DB::fork_TTY = File::Spec->devnull; # prevents complaints from debugger |
471
|
0
|
0
|
|
|
|
0
|
if (!$result) { # in child process |
472
|
0
|
|
|
|
|
0
|
$self->_stream_request($request,$stream); |
473
|
0
|
|
|
|
|
0
|
POSIX::_exit(0); #prevent END blocks from executing in this forked child |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
else { |
476
|
0
|
|
|
|
|
0
|
return Bio::SeqIO->new('-verbose' => $self->verbose, |
477
|
|
|
|
|
|
|
'-format' => $ioformat, |
478
|
|
|
|
|
|
|
'-fh' => $stream); |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
else { |
482
|
0
|
|
|
|
|
0
|
$self->retrieval_type('io_string'); |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
0
|
0
|
|
|
|
0
|
if ($self->retrieval_type =~ /temp/i) { |
487
|
0
|
|
|
|
|
0
|
my $dir = $self->io->tempdir( CLEANUP => 1); |
488
|
0
|
|
|
|
|
0
|
my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir ); |
489
|
0
|
|
|
|
|
0
|
close $fh; |
490
|
0
|
|
|
|
|
0
|
my $resp = $self->_request($request, $tmpfile); |
491
|
0
|
0
|
0
|
|
|
0
|
if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) { |
|
|
|
0
|
|
|
|
|
492
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Error - check query sequences!\n"); |
493
|
|
|
|
|
|
|
} |
494
|
0
|
|
|
|
|
0
|
$self->postprocess_data('type' => 'file', |
495
|
|
|
|
|
|
|
'location' => $tmpfile); |
496
|
|
|
|
|
|
|
# this may get reset when requesting batch mode |
497
|
0
|
|
|
|
|
0
|
($rformat,$ioformat) = $self->request_format(); |
498
|
0
|
0
|
|
|
|
0
|
if( $self->verbose > 0 ) { |
499
|
0
|
0
|
|
|
|
0
|
open my $ERR, '<', $tmpfile or $self->throw("Could not read file '$tmpfile': $!"); |
500
|
0
|
|
|
|
|
0
|
while(<$ERR>) { $self->debug($_);} |
|
0
|
|
|
|
|
0
|
|
501
|
0
|
|
|
|
|
0
|
close $ERR; |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
0
|
|
|
|
|
0
|
return Bio::SeqIO->new('-verbose' => $self->verbose, |
505
|
|
|
|
|
|
|
'-format' => $ioformat, |
506
|
|
|
|
|
|
|
'-file' => $tmpfile); |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
0
|
0
|
|
|
|
0
|
if ($self->retrieval_type =~ /io_string/i ) { |
510
|
0
|
|
|
|
|
0
|
my $resp = $self->_request($request); |
511
|
0
|
|
|
|
|
0
|
my $content = $resp->content_ref; |
512
|
0
|
|
|
|
|
0
|
$self->debug( "content is $$content\n"); |
513
|
0
|
0
|
0
|
|
|
0
|
if (!$resp->is_success() || length($$content) == 0) { |
514
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Error - check query sequences!\n"); |
515
|
|
|
|
|
|
|
} |
516
|
0
|
|
|
|
|
0
|
($rformat,$ioformat) = $self->request_format(); |
517
|
0
|
|
|
|
|
0
|
$self->postprocess_data('type'=> 'string', |
518
|
|
|
|
|
|
|
'location' => $content); |
519
|
0
|
|
|
|
|
0
|
$self->debug( "str is $$content\n"); |
520
|
0
|
|
|
|
|
0
|
return Bio::SeqIO->new('-verbose' => $self->verbose, |
521
|
|
|
|
|
|
|
'-format' => $ioformat, |
522
|
|
|
|
|
|
|
'-fh' => new IO::String($$content)); |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# if we got here, we don't know how to handle the retrieval type |
526
|
0
|
|
|
|
|
0
|
$self->throw("retrieval type " . $self->retrieval_type . |
527
|
|
|
|
|
|
|
" unsupported\n"); |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 url_base_address |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Title : url_base_address |
533
|
|
|
|
|
|
|
Usage : my $address = $self->url_base_address or |
534
|
|
|
|
|
|
|
$self->url_base_address($address) |
535
|
|
|
|
|
|
|
Function: Get/Set the base URL for the Web Database |
536
|
|
|
|
|
|
|
Returns : Base URL for the Web Database |
537
|
|
|
|
|
|
|
Args : $address - URL for the WebDatabase |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=cut |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
sub url_base_address { |
542
|
3
|
|
|
3
|
1
|
4
|
my $self = shift; |
543
|
3
|
|
|
|
|
2
|
my $d = $self->{'_baseaddress'}; |
544
|
3
|
100
|
|
|
|
6
|
$self->{'_baseaddress'} = shift if @_; |
545
|
3
|
|
|
|
|
8
|
$d; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head2 proxy |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Title : proxy |
552
|
|
|
|
|
|
|
Usage : $httpproxy = $db->proxy('http') or |
553
|
|
|
|
|
|
|
$db->proxy(['http','ftp'], 'http://myproxy' ) |
554
|
|
|
|
|
|
|
Function: Get/Set a proxy for use of proxy |
555
|
|
|
|
|
|
|
Returns : a string indicating the proxy |
556
|
|
|
|
|
|
|
Args : $protocol : an array ref of the protocol(s) to set/get |
557
|
|
|
|
|
|
|
$proxyurl : url of the proxy to use for the specified protocol |
558
|
|
|
|
|
|
|
$username : username (if proxy requires authentication) |
559
|
|
|
|
|
|
|
$password : password (if proxy requires authentication) |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=cut |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
sub proxy { |
564
|
0
|
|
|
0
|
1
|
0
|
my ($self,$protocol,$proxy,$username,$password) = @_; |
565
|
0
|
0
|
0
|
|
|
0
|
return if ( !defined $self->ua || !defined $protocol |
|
|
|
0
|
|
|
|
|
566
|
|
|
|
|
|
|
|| !defined $proxy ); |
567
|
0
|
0
|
0
|
|
|
0
|
$self->authentication($username, $password) |
568
|
|
|
|
|
|
|
if ($username && $password); |
569
|
0
|
|
|
|
|
0
|
return $self->ua->proxy($protocol,$proxy); |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head2 authentication |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Title : authentication |
575
|
|
|
|
|
|
|
Usage : $db->authentication($user,$pass) |
576
|
|
|
|
|
|
|
Function: Get/Set authentication credentials |
577
|
|
|
|
|
|
|
Returns : Array of user/pass |
578
|
|
|
|
|
|
|
Args : Array or user/pass |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=cut |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
sub authentication{ |
584
|
0
|
|
|
0
|
1
|
0
|
my ($self,$u,$p) = @_; |
585
|
|
|
|
|
|
|
|
586
|
0
|
0
|
0
|
|
|
0
|
if( defined $u && defined $p ) { |
587
|
0
|
|
|
|
|
0
|
$self->{'_authentication'} = [ $u,$p]; |
588
|
|
|
|
|
|
|
} |
589
|
0
|
|
|
|
|
0
|
return @{$self->{'_authentication'}}; |
|
0
|
|
|
|
|
0
|
|
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head2 retrieval_type |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
Title : retrieval_type |
596
|
|
|
|
|
|
|
Usage : $self->retrieval_type($type); |
597
|
|
|
|
|
|
|
my $type = $self->retrieval_type |
598
|
|
|
|
|
|
|
Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile) |
599
|
|
|
|
|
|
|
Returns : string representing retrieval type |
600
|
|
|
|
|
|
|
Args : $value - the value to store |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
This setting affects how the data stream from the remote web server is |
603
|
|
|
|
|
|
|
processed and passed to the Bio::SeqIO layer. Three types of retrieval |
604
|
|
|
|
|
|
|
types are currently allowed: |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
pipeline Perform a fork in an attempt to begin streaming |
607
|
|
|
|
|
|
|
while the data is still downloading from the remote |
608
|
|
|
|
|
|
|
server. Disk, memory and speed efficient, but will |
609
|
|
|
|
|
|
|
not work on Windows or MacOS 9 platforms. |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
io_string Store downloaded database entry(s) in memory. Can be |
612
|
|
|
|
|
|
|
problematic for batch downloads because entire set |
613
|
|
|
|
|
|
|
of entries must fit in memory. Alll entries must be |
614
|
|
|
|
|
|
|
downloaded before processing can begin. |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
tempfile Store downloaded database entry(s) in a temporary file. |
617
|
|
|
|
|
|
|
All entries must be downloaded before processing can |
618
|
|
|
|
|
|
|
begin. |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
The default is pipeline, with automatic fallback to io_string if |
621
|
|
|
|
|
|
|
pipelining is not available. |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=cut |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub retrieval_type { |
626
|
3
|
|
|
3
|
1
|
4
|
my ($self, $value) = @_; |
627
|
3
|
50
|
|
|
|
8
|
if( defined $value ) { |
628
|
3
|
|
|
|
|
5
|
$value = lc $value; |
629
|
3
|
50
|
|
|
|
9
|
if( ! $RETRIEVAL_TYPES{$value} ) { |
630
|
0
|
|
|
|
|
0
|
$self->warn("invalid retrieval type $value must be one of (" . |
631
|
|
|
|
|
|
|
join(",", keys %RETRIEVAL_TYPES), ")"); |
632
|
0
|
|
|
|
|
0
|
$value = $DEFAULT_RETRIEVAL_TYPE; |
633
|
|
|
|
|
|
|
} |
634
|
3
|
|
|
|
|
9
|
$self->{'_retrieval_type'} = $value; |
635
|
|
|
|
|
|
|
} |
636
|
3
|
|
|
|
|
5
|
return $self->{'_retrieval_type'}; |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=head2 url_params |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
Title : url_params |
642
|
|
|
|
|
|
|
Usage : my $params = $self->url_params or |
643
|
|
|
|
|
|
|
$self->url_params($params) |
644
|
|
|
|
|
|
|
Function: Get/Set the URL parameters for the Web Database |
645
|
|
|
|
|
|
|
Returns : url parameters for Web Database |
646
|
|
|
|
|
|
|
Args : $params - parameters to be appended to the URL for the WebDatabase |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=cut |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub url_params { |
651
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
652
|
0
|
0
|
|
|
|
0
|
if( defined $value ) { |
653
|
0
|
|
|
|
|
0
|
$self->{'_urlparams'} = $value; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
} |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=head2 ua |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
Title : ua |
660
|
|
|
|
|
|
|
Usage : my $ua = $self->ua or |
661
|
|
|
|
|
|
|
$self->ua($ua) |
662
|
|
|
|
|
|
|
Function: Get/Set a LWP::UserAgent for use |
663
|
|
|
|
|
|
|
Returns : reference to LWP::UserAgent Object |
664
|
|
|
|
|
|
|
Args : $ua - must be a LWP::UserAgent |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=cut |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
sub ua { |
669
|
4
|
|
|
4
|
1
|
7
|
my ($self, $ua) = @_; |
670
|
4
|
100
|
66
|
|
|
29
|
if( defined $ua && $ua->isa("LWP::UserAgent") ) { |
671
|
3
|
|
|
|
|
6
|
$self->{'_ua'} = $ua; |
672
|
|
|
|
|
|
|
} |
673
|
4
|
|
|
|
|
7
|
return $self->{'_ua'}; |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=head2 postprocess_data |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
Title : postprocess_data |
679
|
|
|
|
|
|
|
Usage : $self->postprocess_data ( 'type' => 'string', |
680
|
|
|
|
|
|
|
'location' => \$datastr); |
681
|
|
|
|
|
|
|
Function: process downloaded data before loading into a Bio::SeqIO |
682
|
|
|
|
|
|
|
Returns : void |
683
|
|
|
|
|
|
|
Args : hash with two keys - 'type' can be 'string' or 'file' |
684
|
|
|
|
|
|
|
- 'location' either file location or string |
685
|
|
|
|
|
|
|
reference containing data |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
=cut |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
sub postprocess_data { |
690
|
0
|
|
|
0
|
1
|
0
|
my ( $self, %args) = @_; |
691
|
0
|
|
|
|
|
0
|
return; |
692
|
|
|
|
|
|
|
} |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# private methods |
695
|
|
|
|
|
|
|
sub _request { |
696
|
0
|
|
|
0
|
|
0
|
my ($self, $url,$tmpfile) = @_; |
697
|
0
|
|
|
|
|
0
|
my ($resp); |
698
|
0
|
0
|
0
|
|
|
0
|
if( defined $tmpfile && $tmpfile ne '' ) { |
699
|
0
|
|
|
|
|
0
|
$resp = $self->ua->request($url, $tmpfile); |
700
|
|
|
|
|
|
|
} else { |
701
|
0
|
|
|
|
|
0
|
$resp = $self->ua->request($url); |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
0
|
0
|
|
|
|
0
|
if( $resp->is_error ) { |
705
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Request Error:\n".$resp->as_string); |
706
|
|
|
|
|
|
|
} |
707
|
0
|
|
|
|
|
0
|
return $resp; |
708
|
|
|
|
|
|
|
} |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
#mod_perl-safe replacement for the open(BLEH,'-|') call. if running |
711
|
|
|
|
|
|
|
#under mod_perl, detects it and closes the child's STDIN and STDOUT |
712
|
|
|
|
|
|
|
#handles |
713
|
|
|
|
|
|
|
sub _open_pipe { |
714
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
715
|
|
|
|
|
|
|
# is mod_perl running? Which API? |
716
|
0
|
|
|
|
|
0
|
my $mp = $self->mod_perl_api; |
717
|
0
|
0
|
0
|
|
|
0
|
if($mp and ! our $loaded_apache_sp) { |
718
|
0
|
0
|
|
|
|
0
|
my $load_api = ($mp == 1) ? 'use Apache::SubProcess': 'use Apache2::SubProcess'; |
719
|
0
|
|
|
|
|
0
|
eval $load_api; |
720
|
0
|
0
|
|
|
|
0
|
$@ and $self->throw("$@\n$load_api module required for running under mod_perl"); |
721
|
0
|
|
|
|
|
0
|
$loaded_apache_sp = 1; |
722
|
|
|
|
|
|
|
} |
723
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
my $pipe = IO::Pipe->new(); |
725
|
|
|
|
|
|
|
|
726
|
0
|
|
|
|
|
0
|
local $SIG{CHLD} = 'IGNORE'; |
727
|
0
|
0
|
|
|
|
0
|
defined(my $pid = fork) |
728
|
|
|
|
|
|
|
or $self->throw("Couldn't fork: $!"); |
729
|
|
|
|
|
|
|
|
730
|
0
|
0
|
|
|
|
0
|
unless($pid) { |
731
|
|
|
|
|
|
|
#CHILD |
732
|
0
|
|
|
|
|
0
|
$pipe->writer(); |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
#if we're running under mod_perl, clean up some things after this fork |
735
|
0
|
0
|
0
|
|
|
0
|
if ($ENV{MOD_PERL} and my $r = eval{Apache->request} ) { |
|
0
|
|
|
|
|
0
|
|
736
|
0
|
|
|
|
|
0
|
$r->cleanup_for_exec; |
737
|
|
|
|
|
|
|
#don't read or write the mod_perl parent's tied filehandles |
738
|
0
|
|
|
|
|
0
|
close STDIN; close STDOUT; |
|
0
|
|
|
|
|
0
|
|
739
|
0
|
0
|
|
|
|
0
|
setsid() or $self->throw('Could not detach from parent'); |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
} else { |
742
|
|
|
|
|
|
|
#PARENT |
743
|
0
|
|
|
|
|
0
|
$pipe->reader(); |
744
|
|
|
|
|
|
|
} |
745
|
0
|
|
|
|
|
0
|
return ( $pid, $pipe ); |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
# send web request to specified filehandle, or stdout, for streaming purposes |
749
|
|
|
|
|
|
|
sub _stream_request { |
750
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
751
|
0
|
|
|
|
|
0
|
my $request = shift; |
752
|
0
|
|
0
|
|
|
0
|
my $dest_fh = shift || \*STDOUT; |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# fork so as to pipe output of fetch process through to |
755
|
|
|
|
|
|
|
# postprocess_data method call. |
756
|
0
|
|
|
|
|
0
|
my ($child,$fetch) = $self->_open_pipe(); |
757
|
|
|
|
|
|
|
|
758
|
0
|
0
|
|
|
|
0
|
if ($child) { |
759
|
|
|
|
|
|
|
#PARENT |
760
|
0
|
|
|
|
|
0
|
local ($/) = "//\n"; # assume genbank/swiss format |
761
|
0
|
|
|
|
|
0
|
$| = 1; |
762
|
0
|
|
|
|
|
0
|
my $records = 0; |
763
|
0
|
|
|
|
|
0
|
while (my $record = <$fetch>) { |
764
|
0
|
|
|
|
|
0
|
$records++; |
765
|
0
|
|
|
|
|
0
|
$self->postprocess_data('type' => 'string', |
766
|
|
|
|
|
|
|
'location' => \$record); |
767
|
0
|
|
|
|
|
0
|
print $dest_fh $record; |
768
|
|
|
|
|
|
|
} |
769
|
0
|
|
|
|
|
0
|
$/ = "\n"; # reset to be safe; |
770
|
0
|
|
|
|
|
0
|
close $dest_fh; #must explicitly close here, because the hard |
771
|
|
|
|
|
|
|
#exits don't cloes them for us |
772
|
|
|
|
|
|
|
} |
773
|
|
|
|
|
|
|
else { |
774
|
|
|
|
|
|
|
#CHILD |
775
|
0
|
|
|
|
|
0
|
$| = 1; |
776
|
|
|
|
|
|
|
my $resp = $self->ua->request($request, |
777
|
0
|
|
|
0
|
|
0
|
sub { print $fetch $_[0] } |
778
|
0
|
|
|
|
|
0
|
); |
779
|
0
|
0
|
|
|
|
0
|
if( $resp->is_error ) { |
780
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Request Error:\n".$resp->as_string); |
781
|
|
|
|
|
|
|
} |
782
|
0
|
|
|
|
|
0
|
close $fetch; #must explicitly close here, because the hard exists |
783
|
|
|
|
|
|
|
#don't close them for us |
784
|
0
|
|
|
|
|
0
|
POSIX::_exit(0); |
785
|
|
|
|
|
|
|
} |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
sub io { |
789
|
0
|
|
|
0
|
0
|
0
|
my ($self,$io) = @_; |
790
|
|
|
|
|
|
|
|
791
|
0
|
0
|
0
|
|
|
0
|
if(defined($io) || (! exists($self->{'_io'}))) { |
792
|
0
|
0
|
|
|
|
0
|
$io = Bio::Root::IO->new() unless $io; |
793
|
0
|
|
|
|
|
0
|
$self->{'_io'} = $io; |
794
|
|
|
|
|
|
|
} |
795
|
0
|
|
|
|
|
0
|
return $self->{'_io'}; |
796
|
|
|
|
|
|
|
} |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=head2 delay |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
Title : delay |
802
|
|
|
|
|
|
|
Usage : $secs = $self->delay([$secs]) |
803
|
|
|
|
|
|
|
Function: get/set number of seconds to delay between fetches |
804
|
|
|
|
|
|
|
Returns : number of seconds to delay |
805
|
|
|
|
|
|
|
Args : new value |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
NOTE: the default is to use the value specified by delay_policy(). |
808
|
|
|
|
|
|
|
This can be overridden by calling this method, or by passing the |
809
|
|
|
|
|
|
|
-delay argument to new(). |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
=cut |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
sub delay { |
814
|
4
|
|
|
4
|
1
|
3
|
my $self = shift; |
815
|
4
|
|
|
|
|
6
|
my $d = $self->{'_delay'}; |
816
|
4
|
100
|
|
|
|
10
|
$self->{'_delay'} = shift if @_; |
817
|
4
|
|
|
|
|
6
|
$d; |
818
|
|
|
|
|
|
|
} |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=head2 delay_policy |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Title : delay_policy |
823
|
|
|
|
|
|
|
Usage : $secs = $self->delay_policy |
824
|
|
|
|
|
|
|
Function: return number of seconds to delay between calls to remote db |
825
|
|
|
|
|
|
|
Returns : number of seconds to delay |
826
|
|
|
|
|
|
|
Args : none |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
NOTE: The default delay policy is 0s. Override in subclasses to |
829
|
|
|
|
|
|
|
implement delays. The timer has only second resolution, so the delay |
830
|
|
|
|
|
|
|
will actually be +/- 1s. |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=cut |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
sub delay_policy { |
835
|
2
|
|
|
2
|
1
|
3
|
my $self = shift; |
836
|
2
|
|
|
|
|
3
|
return 0; |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
=head2 _sleep |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
Title : _sleep |
842
|
|
|
|
|
|
|
Usage : $self->_sleep |
843
|
|
|
|
|
|
|
Function: sleep for a number of seconds indicated by the delay policy |
844
|
|
|
|
|
|
|
Returns : none |
845
|
|
|
|
|
|
|
Args : none |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
NOTE: This method keeps track of the last time it was called and only |
848
|
|
|
|
|
|
|
imposes a sleep if it was called more recently than the delay_policy() |
849
|
|
|
|
|
|
|
allows. |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=cut |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
sub _sleep { |
854
|
1
|
|
|
1
|
|
2
|
my $self = shift; |
855
|
1
|
|
|
|
|
1
|
my $last_invocation = $LAST_INVOCATION_TIME; |
856
|
1
|
50
|
|
|
|
4
|
if (time - $LAST_INVOCATION_TIME < $self->delay) { |
857
|
0
|
|
|
|
|
0
|
my $delay = $self->delay - (time - $LAST_INVOCATION_TIME); |
858
|
0
|
0
|
|
|
|
0
|
warn "sleeping for $delay seconds\n" if $self->verbose > 0; |
859
|
0
|
|
|
|
|
0
|
sleep $delay; |
860
|
|
|
|
|
|
|
} |
861
|
1
|
|
|
|
|
2
|
$LAST_INVOCATION_TIME = time; |
862
|
|
|
|
|
|
|
} |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=head2 mod_perl_api |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
Title : mod_perl_api |
867
|
|
|
|
|
|
|
Usage : $version = self->mod_perl_api |
868
|
|
|
|
|
|
|
Function: Returns API version of mod_perl being used based on set env. variables |
869
|
|
|
|
|
|
|
Returns : mod_perl API version; if mod_perl isn't loaded, returns 0 |
870
|
|
|
|
|
|
|
Args : none |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
=cut |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
sub mod_perl_api { |
875
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
876
|
|
|
|
|
|
|
my $v = $ENV{MOD_PERL} ? |
877
|
0
|
0
|
0
|
|
|
|
( exists $ENV{MOD_PERL_API_VERSION} && $ENV{MOD_PERL_API_VERSION} >= 2 ) ? |
|
|
0
|
|
|
|
|
|
878
|
|
|
|
|
|
|
2 : |
879
|
|
|
|
|
|
|
1 |
880
|
|
|
|
|
|
|
: 0; |
881
|
0
|
|
|
|
|
|
return $v; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
1; |