| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::DB::WebDBSeqI |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Jason Stajich |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Jason Stajich |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
# |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Bio::DB::WebDBSeqI - Object Interface to generalize Web Databases |
|
18
|
|
|
|
|
|
|
for retrieving sequences |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# get a WebDBSeqI object somehow |
|
23
|
|
|
|
|
|
|
# assuming it is a nucleotide db |
|
24
|
|
|
|
|
|
|
my $seq = $db->get_Seq_by_id('ROA1_HUMAN') |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Provides core set of functionality for connecting to a web based |
|
29
|
|
|
|
|
|
|
database for retrieving sequences. |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Users wishing to add another Web Based Sequence Dabatase will need to |
|
32
|
|
|
|
|
|
|
extend this class (see L or L for |
|
33
|
|
|
|
|
|
|
examples) and implement the get_request method which returns a |
|
34
|
|
|
|
|
|
|
HTTP::Request for the specified uids (accessions, ids, etc depending |
|
35
|
|
|
|
|
|
|
on what query types the database accepts). |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
User feedback is an integral part of the |
|
42
|
|
|
|
|
|
|
evolution of this and other Bioperl modules. Send |
|
43
|
|
|
|
|
|
|
your comments and suggestions preferably to one |
|
44
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation |
|
45
|
|
|
|
|
|
|
is much appreciated. |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
48
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head2 Support |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
I |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
57
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
58
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
59
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to |
|
64
|
|
|
|
|
|
|
help us keep track the bugs and their resolution. |
|
65
|
|
|
|
|
|
|
Bug reports can be submitted via the web. |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head1 AUTHOR - Jason Stajich |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Email E jason@bioperl.org E |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head1 APPENDIX |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
The rest of the documentation details each of the |
|
76
|
|
|
|
|
|
|
object methods. Internal methods are usually |
|
77
|
|
|
|
|
|
|
preceded with a _ |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=cut |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Let the code begin... |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
package Bio::DB::WebDBSeqI; |
|
84
|
7
|
|
|
7
|
|
49
|
use strict; |
|
|
7
|
|
|
|
|
13
|
|
|
|
7
|
|
|
|
|
242
|
|
|
85
|
7
|
|
|
|
|
414
|
use vars qw($MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE |
|
86
|
7
|
|
|
7
|
|
31
|
$DEFAULTFORMAT $LAST_INVOCATION_TIME @ATTRIBUTES); |
|
|
7
|
|
|
|
|
13
|
|
|
87
|
|
|
|
|
|
|
|
|
88
|
7
|
|
|
7
|
|
950
|
use Bio::SeqIO; |
|
|
7
|
|
|
|
|
16
|
|
|
|
7
|
|
|
|
|
193
|
|
|
89
|
7
|
|
|
7
|
|
60
|
use Bio::Root::IO; |
|
|
7
|
|
|
|
|
11
|
|
|
|
7
|
|
|
|
|
129
|
|
|
90
|
7
|
|
|
7
|
|
1287
|
use LWP::UserAgent; |
|
|
7
|
|
|
|
|
51606
|
|
|
|
7
|
|
|
|
|
222
|
|
|
91
|
7
|
|
|
7
|
|
2172
|
use POSIX 'setsid'; |
|
|
7
|
|
|
|
|
34875
|
|
|
|
7
|
|
|
|
|
32
|
|
|
92
|
7
|
|
|
7
|
|
7685
|
use HTTP::Request::Common; |
|
|
7
|
|
|
|
|
42
|
|
|
|
7
|
|
|
|
|
375
|
|
|
93
|
7
|
|
|
7
|
|
42
|
use HTTP::Response; |
|
|
7
|
|
|
|
|
49
|
|
|
|
7
|
|
|
|
|
187
|
|
|
94
|
7
|
|
|
7
|
|
36
|
use File::Spec; |
|
|
7
|
|
|
|
|
11
|
|
|
|
7
|
|
|
|
|
145
|
|
|
95
|
7
|
|
|
7
|
|
1728
|
use IO::Pipe; |
|
|
7
|
|
|
|
|
6255
|
|
|
|
7
|
|
|
|
|
175
|
|
|
96
|
7
|
|
|
7
|
|
816
|
use IO::String; |
|
|
7
|
|
|
|
|
5385
|
|
|
|
7
|
|
|
|
|
154
|
|
|
97
|
7
|
|
|
7
|
|
38
|
use Bio::Root::Root; |
|
|
7
|
|
|
|
|
13
|
|
|
|
7
|
|
|
|
|
162
|
|
|
98
|
|
|
|
|
|
|
|
|
99
|
7
|
|
|
7
|
|
30
|
use base qw(Bio::DB::RandomAccessI); |
|
|
7
|
|
|
|
|
12
|
|
|
|
7
|
|
|
|
|
1564
|
|
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
BEGIN { |
|
102
|
7
|
|
|
7
|
|
21
|
$MODVERSION = '0.8'; |
|
103
|
7
|
|
|
|
|
24
|
%RETRIEVAL_TYPES = ('io_string' => 1, |
|
104
|
|
|
|
|
|
|
'tempfile' => 1, |
|
105
|
|
|
|
|
|
|
'pipeline' => 1, |
|
106
|
|
|
|
|
|
|
); |
|
107
|
7
|
|
|
|
|
13
|
$DEFAULT_RETRIEVAL_TYPE = 'pipeline'; |
|
108
|
7
|
|
|
|
|
10
|
$DEFAULTFORMAT = 'fasta'; |
|
109
|
7
|
|
|
|
|
16406
|
$LAST_INVOCATION_TIME = 0; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub new { |
|
113
|
3
|
|
|
3
|
1
|
9
|
my ($class, @args) = @_; |
|
114
|
3
|
|
|
|
|
22
|
my $self = $class->SUPER::new(@args); |
|
115
|
3
|
|
|
|
|
26
|
my ($baseaddress, $params, $ret_type, $format,$delay,$db) = |
|
116
|
|
|
|
|
|
|
$self->_rearrange([qw(BASEADDRESS PARAMS RETRIEVALTYPE FORMAT DELAY DB)], |
|
117
|
|
|
|
|
|
|
@args); |
|
118
|
|
|
|
|
|
|
|
|
119
|
3
|
50
|
|
|
|
11
|
$ret_type = $DEFAULT_RETRIEVAL_TYPE unless ( $ret_type); |
|
120
|
3
|
50
|
|
|
|
7
|
$baseaddress && $self->url_base_address($baseaddress); |
|
121
|
3
|
50
|
|
|
|
7
|
$params && $self->url_params($params); |
|
122
|
3
|
50
|
|
|
|
7
|
$db && $self->db($db); |
|
123
|
3
|
50
|
|
|
|
19
|
$ret_type && $self->retrieval_type($ret_type); |
|
124
|
3
|
50
|
|
|
|
15
|
$delay = $self->delay_policy unless defined $delay; |
|
125
|
3
|
|
|
|
|
16
|
$self->delay($delay); |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# insure we always have a default format set for retrieval |
|
129
|
|
|
|
|
|
|
# even though this will be immedietly overwritten by most sub classes |
|
130
|
3
|
50
|
33
|
|
|
21
|
$format = $self->default_format unless ( defined $format && |
|
131
|
|
|
|
|
|
|
$format ne '' ); |
|
132
|
|
|
|
|
|
|
|
|
133
|
3
|
|
|
|
|
20
|
$self->request_format($format); |
|
134
|
3
|
|
|
|
|
20
|
my $ua = LWP::UserAgent->new(env_proxy => 1); |
|
135
|
3
|
|
|
|
|
23728
|
$ua->agent(ref($self) ."/$MODVERSION"); |
|
136
|
3
|
|
|
|
|
219
|
$self->ua($ua); |
|
137
|
3
|
|
|
|
|
7
|
$self->{'_authentication'} = []; |
|
138
|
3
|
|
|
|
|
9
|
return $self; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# from Bio::DB::RandomAccessI |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head2 get_Seq_by_id |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Title : get_Seq_by_id |
|
146
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') |
|
147
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by its name |
|
148
|
|
|
|
|
|
|
Returns : a Bio::Seq object |
|
149
|
|
|
|
|
|
|
Args : the id (as a string) of a sequence |
|
150
|
|
|
|
|
|
|
Throws : "id does not exist" exception |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub get_Seq_by_id { |
|
156
|
0
|
|
|
0
|
1
|
0
|
my ($self,$seqid) = @_; |
|
157
|
0
|
|
|
|
|
0
|
$self->_sleep; |
|
158
|
0
|
|
|
|
|
0
|
my $seqio = $self->get_Stream_by_id([$seqid]); |
|
159
|
0
|
0
|
|
|
|
0
|
$self->throw("id does not exist") if( !defined $seqio ) ; |
|
160
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
|
0
|
|
|
|
|
|
161
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_id\n". |
|
162
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
|
163
|
0
|
|
|
|
|
0
|
return $seqio; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
0
|
|
|
|
|
0
|
my @seqs; |
|
166
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
|
0
|
|
|
|
|
0
|
|
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# Since $seqio will not be used anymore, explicitly close its filehandle |
|
169
|
|
|
|
|
|
|
# or it will cause trouble later on cleanup |
|
170
|
0
|
|
|
|
|
0
|
$seqio->close; |
|
171
|
|
|
|
|
|
|
|
|
172
|
0
|
0
|
|
|
|
0
|
$self->throw("id '$seqid' does not exist") unless @seqs; |
|
173
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 get_Seq_by_acc |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
Title : get_Seq_by_acc |
|
179
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_acc('X77802'); |
|
180
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by accession number |
|
181
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
|
182
|
|
|
|
|
|
|
Args : accession number (as a string) |
|
183
|
|
|
|
|
|
|
Throws : "acc does not exist" exception |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=cut |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub get_Seq_by_acc { |
|
188
|
1
|
|
|
1
|
1
|
3
|
my ($self,$seqid) = @_; |
|
189
|
1
|
|
|
|
|
5
|
$self->_sleep; |
|
190
|
1
|
|
|
|
|
6
|
my $seqio = $self->get_Stream_by_acc($seqid); |
|
191
|
0
|
0
|
|
|
|
0
|
$self->throw("acc '$seqid' does not exist") if( ! defined $seqio ); |
|
192
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
|
0
|
|
|
|
|
|
193
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_acc\n". |
|
194
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
|
195
|
0
|
|
|
|
|
0
|
return $seqio; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
0
|
|
|
|
|
0
|
my @seqs; |
|
198
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
|
0
|
|
|
|
|
0
|
|
|
199
|
0
|
0
|
|
|
|
0
|
$self->throw("acc $seqid does not exist") unless @seqs; |
|
200
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head2 get_Seq_by_gi |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
Title : get_Seq_by_gi |
|
207
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_gi('405830'); |
|
208
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by gi number |
|
209
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
|
210
|
|
|
|
|
|
|
Args : gi number (as a string) |
|
211
|
|
|
|
|
|
|
Throws : "gi does not exist" exception |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=cut |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub get_Seq_by_gi { |
|
216
|
0
|
|
|
0
|
1
|
0
|
my ($self,$seqid) = @_; |
|
217
|
0
|
|
|
|
|
0
|
$self->_sleep; |
|
218
|
0
|
|
|
|
|
0
|
my $seqio = $self->get_Stream_by_gi($seqid); |
|
219
|
0
|
0
|
|
|
|
0
|
$self->throw("gi does not exist") if( !defined $seqio ); |
|
220
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
|
0
|
|
|
|
|
|
221
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_gi\n". |
|
222
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
|
223
|
0
|
|
|
|
|
0
|
return $seqio; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
0
|
|
|
|
|
0
|
my @seqs; |
|
226
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
|
0
|
|
|
|
|
0
|
|
|
227
|
0
|
0
|
|
|
|
0
|
$self->throw("gi does not exist") unless @seqs; |
|
228
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
229
|
|
|
|
|
|
|
} |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 get_Seq_by_version |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Title : get_Seq_by_version |
|
234
|
|
|
|
|
|
|
Usage : $seq = $db->get_Seq_by_version('X77802.1'); |
|
235
|
|
|
|
|
|
|
Function: Gets a Bio::Seq object by sequence version |
|
236
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
|
237
|
|
|
|
|
|
|
Args : accession.version (as a string) |
|
238
|
|
|
|
|
|
|
Throws : "acc.version does not exist" exception |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub get_Seq_by_version { |
|
243
|
0
|
|
|
0
|
1
|
0
|
my ($self,$seqid) = @_; |
|
244
|
0
|
|
|
|
|
0
|
$self->_sleep; |
|
245
|
0
|
|
|
|
|
0
|
my $seqio = $self->get_Stream_by_version($seqid); |
|
246
|
0
|
0
|
|
|
|
0
|
$self->throw("accession.version does not exist") if( !defined $seqio ); |
|
247
|
0
|
0
|
0
|
|
|
0
|
if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) { |
|
|
|
|
0
|
|
|
|
|
|
248
|
0
|
|
|
|
|
0
|
$self->warn("When complexity is set to 0, use get_Stream_by_version\n". |
|
249
|
|
|
|
|
|
|
"Returning Bio::SeqIO object"); |
|
250
|
0
|
|
|
|
|
0
|
return $seqio; |
|
251
|
|
|
|
|
|
|
} |
|
252
|
0
|
|
|
|
|
0
|
my @seqs; |
|
253
|
0
|
|
|
|
|
0
|
while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; } |
|
|
0
|
|
|
|
|
0
|
|
|
254
|
0
|
0
|
|
|
|
0
|
$self->throw("accession.version does not exist") unless @seqs; |
|
255
|
0
|
0
|
|
|
|
0
|
if( wantarray ) { return @seqs } else { return shift @seqs } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# implementing class must define these |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 get_request |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Title : get_request |
|
263
|
|
|
|
|
|
|
Usage : my $url = $self->get_request |
|
264
|
|
|
|
|
|
|
Function: returns a HTTP::Request object |
|
265
|
|
|
|
|
|
|
Returns : |
|
266
|
|
|
|
|
|
|
Args : %qualifiers = a hash of qualifiers (ids, format, etc) |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub get_request { |
|
271
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
272
|
0
|
|
|
|
|
0
|
my $msg = "Implementing class must define method get_request in class WebDBSeqI"; |
|
273
|
0
|
|
|
|
|
0
|
$self->throw($msg); |
|
274
|
|
|
|
|
|
|
} |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# class methods |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head2 get_Stream_by_id |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
Title : get_Stream_by_id |
|
281
|
|
|
|
|
|
|
Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] ); |
|
282
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by unique identifiers |
|
283
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
|
284
|
|
|
|
|
|
|
Args : $ref : a reference to an array of unique identifiers for |
|
285
|
|
|
|
|
|
|
the desired sequence entries |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=cut |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub get_Stream_by_id { |
|
291
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids) = @_; |
|
292
|
0
|
|
|
|
|
0
|
my ($webfmt,$localfmt) = $self->request_format; |
|
293
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single', |
|
294
|
|
|
|
|
|
|
'-format' => $webfmt); |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
*get_Stream_by_batch = sub { |
|
298
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
299
|
0
|
|
|
|
|
0
|
$self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead'); |
|
300
|
0
|
|
|
|
|
0
|
$self->get_Stream_by_id(@_) |
|
301
|
|
|
|
|
|
|
}; |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head2 get_Stream_by_acc |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
Title : get_Stream_by_acc |
|
307
|
|
|
|
|
|
|
Usage : $seq = $db->get_Stream_by_acc([$acc1, $acc2]); |
|
308
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by accession numbers |
|
309
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
|
310
|
|
|
|
|
|
|
Args : $ref : a reference to an array of accession numbers for |
|
311
|
|
|
|
|
|
|
the desired sequence entries |
|
312
|
|
|
|
|
|
|
Note : For GenBank, this just calls the same code for get_Stream_by_id() |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=cut |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub get_Stream_by_acc { |
|
317
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids ) = @_; |
|
318
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single'); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=head2 get_Stream_by_gi |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
Title : get_Stream_by_gi |
|
325
|
|
|
|
|
|
|
Usage : $seq = $db->get_Stream_by_gi([$gi1, $gi2]); |
|
326
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by gi numbers |
|
327
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
|
328
|
|
|
|
|
|
|
Args : $ref : a reference to an array of gi numbers for |
|
329
|
|
|
|
|
|
|
the desired sequence entries |
|
330
|
|
|
|
|
|
|
Note : For GenBank, this just calls the same code for get_Stream_by_id() |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=cut |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub get_Stream_by_gi { |
|
335
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids ) = @_; |
|
336
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'gi'); |
|
337
|
|
|
|
|
|
|
} |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head2 get_Stream_by_version |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
Title : get_Stream_by_version |
|
342
|
|
|
|
|
|
|
Usage : $seq = $db->get_Stream_by_version([$version1, $version2]); |
|
343
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by accession.versions |
|
344
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
|
345
|
|
|
|
|
|
|
Args : $ref : a reference to an array of accession.version strings for |
|
346
|
|
|
|
|
|
|
the desired sequence entries |
|
347
|
|
|
|
|
|
|
Note : For GenBank, this is implemented in NCBIHelper |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=cut |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub get_Stream_by_version { |
|
352
|
0
|
|
|
0
|
1
|
0
|
my ($self, $ids ) = @_; |
|
353
|
|
|
|
|
|
|
# $self->throw("Implementing class should define this method!"); |
|
354
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-uids' => $ids, '-mode' => 'version'); # how it should work |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 get_Stream_by_query |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
Title : get_Stream_by_query |
|
360
|
|
|
|
|
|
|
Usage : $stream = $db->get_Stream_by_query($query); |
|
361
|
|
|
|
|
|
|
Function: Gets a series of Seq objects by way of a query string or oject |
|
362
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream object |
|
363
|
|
|
|
|
|
|
Args : $query : A string that uses the appropriate query language |
|
364
|
|
|
|
|
|
|
for the database or a Bio::DB::QueryI object. It is suggested |
|
365
|
|
|
|
|
|
|
that you create the Bio::DB::Query object first and interrogate |
|
366
|
|
|
|
|
|
|
it for the entry count before you fetch a potentially large stream. |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=cut |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
sub get_Stream_by_query { |
|
371
|
0
|
|
|
0
|
1
|
0
|
my ($self, $query ) = @_; |
|
372
|
0
|
|
|
|
|
0
|
return $self->get_seq_stream('-query' => $query, '-mode'=>'query'); |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=head2 default_format |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
Title : default_format |
|
378
|
|
|
|
|
|
|
Usage : my $format = $self->default_format |
|
379
|
|
|
|
|
|
|
Function: Returns default sequence format for this module |
|
380
|
|
|
|
|
|
|
Returns : string |
|
381
|
|
|
|
|
|
|
Args : none |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=cut |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
sub default_format { |
|
386
|
1
|
|
|
1
|
1
|
2
|
return $DEFAULTFORMAT; |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# sorry, but this is hacked in because of BioFetch problems... |
|
390
|
|
|
|
|
|
|
sub db { |
|
391
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
|
392
|
0
|
|
|
|
|
0
|
my $d = $self->{_db}; |
|
393
|
0
|
0
|
|
|
|
0
|
$self->{_db} = shift if @_; |
|
394
|
0
|
|
|
|
|
0
|
$d; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head2 request_format |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Title : request_format |
|
400
|
|
|
|
|
|
|
Usage : my ($req_format, $ioformat) = $self->request_format; |
|
401
|
|
|
|
|
|
|
$self->request_format("genbank"); |
|
402
|
|
|
|
|
|
|
$self->request_format("fasta"); |
|
403
|
|
|
|
|
|
|
Function: Get/Set sequence format retrieval. The get-form will normally not |
|
404
|
|
|
|
|
|
|
be used outside of this and derived modules. |
|
405
|
|
|
|
|
|
|
Returns : Array of two strings, the first representing the format for |
|
406
|
|
|
|
|
|
|
retrieval, and the second specifying the corresponding SeqIO format. |
|
407
|
|
|
|
|
|
|
Args : $format = sequence format |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=cut |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
sub request_format { |
|
412
|
5
|
|
|
5
|
1
|
12
|
my ($self, $value) = @_; |
|
413
|
|
|
|
|
|
|
|
|
414
|
5
|
100
|
|
|
|
10
|
if( defined $value ) { |
|
415
|
2
|
|
|
|
|
4
|
$self->{'_format'} = [ $value, $value]; |
|
416
|
|
|
|
|
|
|
} |
|
417
|
5
|
|
|
|
|
6
|
return @{$self->{'_format'}}; |
|
|
5
|
|
|
|
|
16
|
|
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=head2 get_seq_stream |
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Title : get_seq_stream |
|
423
|
|
|
|
|
|
|
Usage : my $seqio = $self->get_seq_stream(%qualifiers) |
|
424
|
|
|
|
|
|
|
Function: builds a url and queries a web db |
|
425
|
|
|
|
|
|
|
Returns : a Bio::SeqIO stream capable of producing sequence |
|
426
|
|
|
|
|
|
|
Args : %qualifiers = a hash qualifiers that the implementing class |
|
427
|
|
|
|
|
|
|
will process to make a url suitable for web querying |
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=cut |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
sub get_seq_stream { |
|
432
|
0
|
|
|
0
|
1
|
0
|
my ($self, %qualifiers) = @_; |
|
433
|
0
|
|
|
|
|
0
|
my ($rformat, $ioformat) = $self->request_format(); |
|
434
|
0
|
|
|
|
|
0
|
my $seen = 0; |
|
435
|
0
|
|
|
|
|
0
|
foreach my $key ( keys %qualifiers ) { |
|
436
|
0
|
0
|
|
|
|
0
|
if( $key =~ /format/i ) { |
|
437
|
0
|
|
|
|
|
0
|
$rformat = $qualifiers{$key}; |
|
438
|
0
|
|
|
|
|
0
|
$seen = 1; |
|
439
|
|
|
|
|
|
|
} |
|
440
|
|
|
|
|
|
|
} |
|
441
|
0
|
0
|
|
|
|
0
|
$qualifiers{'-format'} = $rformat if( !$seen); |
|
442
|
0
|
|
|
|
|
0
|
($rformat, $ioformat) = $self->request_format($rformat); |
|
443
|
|
|
|
|
|
|
# These parameters are implemented for Bio::DB::GenBank objects only |
|
444
|
0
|
0
|
|
|
|
0
|
if($self->isa('Bio::DB::GenBank')) { |
|
445
|
0
|
0
|
|
|
|
0
|
$self->seq_start() && ($qualifiers{'-seq_start'} = $self->seq_start()); |
|
446
|
0
|
0
|
|
|
|
0
|
$self->seq_stop() && ($qualifiers{'-seq_stop'} = $self->seq_stop()); |
|
447
|
0
|
0
|
|
|
|
0
|
$self->strand() && ($qualifiers{'-strand'} = $self->strand()); |
|
448
|
0
|
0
|
|
|
|
0
|
defined $self->complexity() && ($qualifiers{'-complexity'} = $self->complexity()); |
|
449
|
|
|
|
|
|
|
} |
|
450
|
0
|
|
|
|
|
0
|
my $request = $self->get_request(%qualifiers); |
|
451
|
0
|
0
|
|
|
|
0
|
$request->proxy_authorization_basic($self->authentication) |
|
452
|
|
|
|
|
|
|
if ( $self->authentication); |
|
453
|
0
|
|
|
|
|
0
|
$self->debug("request is ". $request->as_string(). "\n"); |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# workaround for MSWin systems |
|
456
|
0
|
0
|
0
|
|
|
0
|
$self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/; |
|
457
|
|
|
|
|
|
|
|
|
458
|
0
|
0
|
|
|
|
0
|
if ($self->retrieval_type =~ /pipeline/) { |
|
459
|
|
|
|
|
|
|
# Try to create a stream using POSIX fork-and-pipe facility. |
|
460
|
|
|
|
|
|
|
# this is a *big* win when fetching thousands of sequences from |
|
461
|
|
|
|
|
|
|
# a web database because we can return the first entry while |
|
462
|
|
|
|
|
|
|
# transmission is still in progress. |
|
463
|
|
|
|
|
|
|
# Also, no need to keep sequence in memory or in a temporary file. |
|
464
|
|
|
|
|
|
|
# If this fails (Windows, MacOS 9), we fall back to non-pipelined access. |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
# fork and pipe: _stream_request()=> |
|
467
|
0
|
|
|
|
|
0
|
my ($result,$stream) = $self->_open_pipe(); |
|
468
|
|
|
|
|
|
|
|
|
469
|
0
|
0
|
|
|
|
0
|
if (defined $result) { |
|
470
|
0
|
|
|
|
|
0
|
$DB::fork_TTY = File::Spec->devnull; # prevents complaints from debugger |
|
471
|
0
|
0
|
|
|
|
0
|
if (!$result) { # in child process |
|
472
|
0
|
|
|
|
|
0
|
$self->_stream_request($request,$stream); |
|
473
|
0
|
|
|
|
|
0
|
POSIX::_exit(0); #prevent END blocks from executing in this forked child |
|
474
|
|
|
|
|
|
|
} |
|
475
|
|
|
|
|
|
|
else { |
|
476
|
0
|
|
|
|
|
0
|
return Bio::SeqIO->new('-verbose' => $self->verbose, |
|
477
|
|
|
|
|
|
|
'-format' => $ioformat, |
|
478
|
|
|
|
|
|
|
'-fh' => $stream); |
|
479
|
|
|
|
|
|
|
} |
|
480
|
|
|
|
|
|
|
} |
|
481
|
|
|
|
|
|
|
else { |
|
482
|
0
|
|
|
|
|
0
|
$self->retrieval_type('io_string'); |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
|
|
486
|
0
|
0
|
|
|
|
0
|
if ($self->retrieval_type =~ /temp/i) { |
|
487
|
0
|
|
|
|
|
0
|
my $dir = $self->io->tempdir( CLEANUP => 1); |
|
488
|
0
|
|
|
|
|
0
|
my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir ); |
|
489
|
0
|
|
|
|
|
0
|
close $fh; |
|
490
|
0
|
|
|
|
|
0
|
my $resp = $self->_request($request, $tmpfile); |
|
491
|
0
|
0
|
0
|
|
|
0
|
if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) { |
|
|
|
|
0
|
|
|
|
|
|
492
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Error - check query sequences!\n"); |
|
493
|
|
|
|
|
|
|
} |
|
494
|
0
|
|
|
|
|
0
|
$self->postprocess_data('type' => 'file', |
|
495
|
|
|
|
|
|
|
'location' => $tmpfile); |
|
496
|
|
|
|
|
|
|
# this may get reset when requesting batch mode |
|
497
|
0
|
|
|
|
|
0
|
($rformat,$ioformat) = $self->request_format(); |
|
498
|
0
|
0
|
|
|
|
0
|
if( $self->verbose > 0 ) { |
|
499
|
0
|
0
|
|
|
|
0
|
open my $ERR, '<', $tmpfile or $self->throw("Could not read file '$tmpfile': $!"); |
|
500
|
0
|
|
|
|
|
0
|
while(<$ERR>) { $self->debug($_);} |
|
|
0
|
|
|
|
|
0
|
|
|
501
|
0
|
|
|
|
|
0
|
close $ERR; |
|
502
|
|
|
|
|
|
|
} |
|
503
|
|
|
|
|
|
|
|
|
504
|
0
|
|
|
|
|
0
|
return Bio::SeqIO->new('-verbose' => $self->verbose, |
|
505
|
|
|
|
|
|
|
'-format' => $ioformat, |
|
506
|
|
|
|
|
|
|
'-file' => $tmpfile); |
|
507
|
|
|
|
|
|
|
} |
|
508
|
|
|
|
|
|
|
|
|
509
|
0
|
0
|
|
|
|
0
|
if ($self->retrieval_type =~ /io_string/i ) { |
|
510
|
0
|
|
|
|
|
0
|
my $resp = $self->_request($request); |
|
511
|
0
|
|
|
|
|
0
|
my $content = $resp->content_ref; |
|
512
|
0
|
|
|
|
|
0
|
$self->debug( "content is $$content\n"); |
|
513
|
0
|
0
|
0
|
|
|
0
|
if (!$resp->is_success() || length($$content) == 0) { |
|
514
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Error - check query sequences!\n"); |
|
515
|
|
|
|
|
|
|
} |
|
516
|
0
|
|
|
|
|
0
|
($rformat,$ioformat) = $self->request_format(); |
|
517
|
0
|
|
|
|
|
0
|
$self->postprocess_data('type'=> 'string', |
|
518
|
|
|
|
|
|
|
'location' => $content); |
|
519
|
0
|
|
|
|
|
0
|
$self->debug( "str is $$content\n"); |
|
520
|
0
|
|
|
|
|
0
|
return Bio::SeqIO->new('-verbose' => $self->verbose, |
|
521
|
|
|
|
|
|
|
'-format' => $ioformat, |
|
522
|
|
|
|
|
|
|
'-fh' => new IO::String($$content)); |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# if we got here, we don't know how to handle the retrieval type |
|
526
|
0
|
|
|
|
|
0
|
$self->throw("retrieval type " . $self->retrieval_type . |
|
527
|
|
|
|
|
|
|
" unsupported\n"); |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 url_base_address |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
Title : url_base_address |
|
533
|
|
|
|
|
|
|
Usage : my $address = $self->url_base_address or |
|
534
|
|
|
|
|
|
|
$self->url_base_address($address) |
|
535
|
|
|
|
|
|
|
Function: Get/Set the base URL for the Web Database |
|
536
|
|
|
|
|
|
|
Returns : Base URL for the Web Database |
|
537
|
|
|
|
|
|
|
Args : $address - URL for the WebDatabase |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=cut |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
sub url_base_address { |
|
542
|
3
|
|
|
3
|
1
|
4
|
my $self = shift; |
|
543
|
3
|
|
|
|
|
5
|
my $d = $self->{'_baseaddress'}; |
|
544
|
3
|
100
|
|
|
|
6
|
$self->{'_baseaddress'} = shift if @_; |
|
545
|
3
|
|
|
|
|
10
|
$d; |
|
546
|
|
|
|
|
|
|
} |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head2 proxy |
|
550
|
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Title : proxy |
|
552
|
|
|
|
|
|
|
Usage : $httpproxy = $db->proxy('http') or |
|
553
|
|
|
|
|
|
|
$db->proxy(['http','ftp'], 'http://myproxy' ) |
|
554
|
|
|
|
|
|
|
Function: Get/Set a proxy for use of proxy |
|
555
|
|
|
|
|
|
|
Returns : a string indicating the proxy |
|
556
|
|
|
|
|
|
|
Args : $protocol : an array ref of the protocol(s) to set/get |
|
557
|
|
|
|
|
|
|
$proxyurl : url of the proxy to use for the specified protocol |
|
558
|
|
|
|
|
|
|
$username : username (if proxy requires authentication) |
|
559
|
|
|
|
|
|
|
$password : password (if proxy requires authentication) |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=cut |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
sub proxy { |
|
564
|
0
|
|
|
0
|
1
|
0
|
my ($self,$protocol,$proxy,$username,$password) = @_; |
|
565
|
0
|
0
|
0
|
|
|
0
|
return if ( !defined $self->ua || !defined $protocol |
|
|
|
|
0
|
|
|
|
|
|
566
|
|
|
|
|
|
|
|| !defined $proxy ); |
|
567
|
0
|
0
|
0
|
|
|
0
|
$self->authentication($username, $password) |
|
568
|
|
|
|
|
|
|
if ($username && $password); |
|
569
|
0
|
|
|
|
|
0
|
return $self->ua->proxy($protocol,$proxy); |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head2 authentication |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Title : authentication |
|
575
|
|
|
|
|
|
|
Usage : $db->authentication($user,$pass) |
|
576
|
|
|
|
|
|
|
Function: Get/Set authentication credentials |
|
577
|
|
|
|
|
|
|
Returns : Array of user/pass |
|
578
|
|
|
|
|
|
|
Args : Array or user/pass |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=cut |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
sub authentication{ |
|
584
|
0
|
|
|
0
|
1
|
0
|
my ($self,$u,$p) = @_; |
|
585
|
|
|
|
|
|
|
|
|
586
|
0
|
0
|
0
|
|
|
0
|
if( defined $u && defined $p ) { |
|
587
|
0
|
|
|
|
|
0
|
$self->{'_authentication'} = [ $u,$p]; |
|
588
|
|
|
|
|
|
|
} |
|
589
|
0
|
|
|
|
|
0
|
return @{$self->{'_authentication'}}; |
|
|
0
|
|
|
|
|
0
|
|
|
590
|
|
|
|
|
|
|
} |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head2 retrieval_type |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
Title : retrieval_type |
|
596
|
|
|
|
|
|
|
Usage : $self->retrieval_type($type); |
|
597
|
|
|
|
|
|
|
my $type = $self->retrieval_type |
|
598
|
|
|
|
|
|
|
Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile) |
|
599
|
|
|
|
|
|
|
Returns : string representing retrieval type |
|
600
|
|
|
|
|
|
|
Args : $value - the value to store |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
This setting affects how the data stream from the remote web server is |
|
603
|
|
|
|
|
|
|
processed and passed to the Bio::SeqIO layer. Three types of retrieval |
|
604
|
|
|
|
|
|
|
types are currently allowed: |
|
605
|
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
pipeline Perform a fork in an attempt to begin streaming |
|
607
|
|
|
|
|
|
|
while the data is still downloading from the remote |
|
608
|
|
|
|
|
|
|
server. Disk, memory and speed efficient, but will |
|
609
|
|
|
|
|
|
|
not work on Windows or MacOS 9 platforms. |
|
610
|
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
io_string Store downloaded database entry(s) in memory. Can be |
|
612
|
|
|
|
|
|
|
problematic for batch downloads because entire set |
|
613
|
|
|
|
|
|
|
of entries must fit in memory. Alll entries must be |
|
614
|
|
|
|
|
|
|
downloaded before processing can begin. |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
tempfile Store downloaded database entry(s) in a temporary file. |
|
617
|
|
|
|
|
|
|
All entries must be downloaded before processing can |
|
618
|
|
|
|
|
|
|
begin. |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
The default is pipeline, with automatic fallback to io_string if |
|
621
|
|
|
|
|
|
|
pipelining is not available. |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=cut |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub retrieval_type { |
|
626
|
3
|
|
|
3
|
1
|
7
|
my ($self, $value) = @_; |
|
627
|
3
|
50
|
|
|
|
8
|
if( defined $value ) { |
|
628
|
3
|
|
|
|
|
7
|
$value = lc $value; |
|
629
|
3
|
50
|
|
|
|
16
|
if( ! $RETRIEVAL_TYPES{$value} ) { |
|
630
|
0
|
|
|
|
|
0
|
$self->warn("invalid retrieval type $value must be one of (" . |
|
631
|
|
|
|
|
|
|
join(",", keys %RETRIEVAL_TYPES), ")"); |
|
632
|
0
|
|
|
|
|
0
|
$value = $DEFAULT_RETRIEVAL_TYPE; |
|
633
|
|
|
|
|
|
|
} |
|
634
|
3
|
|
|
|
|
9
|
$self->{'_retrieval_type'} = $value; |
|
635
|
|
|
|
|
|
|
} |
|
636
|
3
|
|
|
|
|
6
|
return $self->{'_retrieval_type'}; |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=head2 url_params |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
Title : url_params |
|
642
|
|
|
|
|
|
|
Usage : my $params = $self->url_params or |
|
643
|
|
|
|
|
|
|
$self->url_params($params) |
|
644
|
|
|
|
|
|
|
Function: Get/Set the URL parameters for the Web Database |
|
645
|
|
|
|
|
|
|
Returns : url parameters for Web Database |
|
646
|
|
|
|
|
|
|
Args : $params - parameters to be appended to the URL for the WebDatabase |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
=cut |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
sub url_params { |
|
651
|
0
|
|
|
0
|
1
|
0
|
my ($self, $value) = @_; |
|
652
|
0
|
0
|
|
|
|
0
|
if( defined $value ) { |
|
653
|
0
|
|
|
|
|
0
|
$self->{'_urlparams'} = $value; |
|
654
|
|
|
|
|
|
|
} |
|
655
|
|
|
|
|
|
|
} |
|
656
|
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=head2 ua |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
Title : ua |
|
660
|
|
|
|
|
|
|
Usage : my $ua = $self->ua or |
|
661
|
|
|
|
|
|
|
$self->ua($ua) |
|
662
|
|
|
|
|
|
|
Function: Get/Set a LWP::UserAgent for use |
|
663
|
|
|
|
|
|
|
Returns : reference to LWP::UserAgent Object |
|
664
|
|
|
|
|
|
|
Args : $ua - must be a LWP::UserAgent |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=cut |
|
667
|
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
sub ua { |
|
669
|
4
|
|
|
4
|
1
|
12
|
my ($self, $ua) = @_; |
|
670
|
4
|
100
|
66
|
|
|
34
|
if( defined $ua && $ua->isa("LWP::UserAgent") ) { |
|
671
|
3
|
|
|
|
|
8
|
$self->{'_ua'} = $ua; |
|
672
|
|
|
|
|
|
|
} |
|
673
|
4
|
|
|
|
|
10
|
return $self->{'_ua'}; |
|
674
|
|
|
|
|
|
|
} |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=head2 postprocess_data |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
Title : postprocess_data |
|
679
|
|
|
|
|
|
|
Usage : $self->postprocess_data ( 'type' => 'string', |
|
680
|
|
|
|
|
|
|
'location' => \$datastr); |
|
681
|
|
|
|
|
|
|
Function: process downloaded data before loading into a Bio::SeqIO |
|
682
|
|
|
|
|
|
|
Returns : void |
|
683
|
|
|
|
|
|
|
Args : hash with two keys - 'type' can be 'string' or 'file' |
|
684
|
|
|
|
|
|
|
- 'location' either file location or string |
|
685
|
|
|
|
|
|
|
reference containing data |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
=cut |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
sub postprocess_data { |
|
690
|
0
|
|
|
0
|
1
|
0
|
my ( $self, %args) = @_; |
|
691
|
0
|
|
|
|
|
0
|
return; |
|
692
|
|
|
|
|
|
|
} |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# private methods |
|
695
|
|
|
|
|
|
|
sub _request { |
|
696
|
0
|
|
|
0
|
|
0
|
my ($self, $url,$tmpfile) = @_; |
|
697
|
0
|
|
|
|
|
0
|
my ($resp); |
|
698
|
0
|
0
|
0
|
|
|
0
|
if( defined $tmpfile && $tmpfile ne '' ) { |
|
699
|
0
|
|
|
|
|
0
|
$resp = $self->ua->request($url, $tmpfile); |
|
700
|
|
|
|
|
|
|
} else { |
|
701
|
0
|
|
|
|
|
0
|
$resp = $self->ua->request($url); |
|
702
|
|
|
|
|
|
|
} |
|
703
|
|
|
|
|
|
|
|
|
704
|
0
|
0
|
|
|
|
0
|
if( $resp->is_error ) { |
|
705
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Request Error:\n".$resp->as_string); |
|
706
|
|
|
|
|
|
|
} |
|
707
|
0
|
|
|
|
|
0
|
return $resp; |
|
708
|
|
|
|
|
|
|
} |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
#mod_perl-safe replacement for the open(BLEH,'-|') call. if running |
|
711
|
|
|
|
|
|
|
#under mod_perl, detects it and closes the child's STDIN and STDOUT |
|
712
|
|
|
|
|
|
|
#handles |
|
713
|
|
|
|
|
|
|
sub _open_pipe { |
|
714
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
|
715
|
|
|
|
|
|
|
# is mod_perl running? Which API? |
|
716
|
0
|
|
|
|
|
0
|
my $mp = $self->mod_perl_api; |
|
717
|
0
|
0
|
0
|
|
|
0
|
if($mp and ! our $loaded_apache_sp) { |
|
718
|
0
|
0
|
|
|
|
0
|
my $load_api = ($mp == 1) ? 'use Apache::SubProcess': 'use Apache2::SubProcess'; |
|
719
|
0
|
|
|
|
|
0
|
eval $load_api; |
|
720
|
0
|
0
|
|
|
|
0
|
$@ and $self->throw("$@\n$load_api module required for running under mod_perl"); |
|
721
|
0
|
|
|
|
|
0
|
$loaded_apache_sp = 1; |
|
722
|
|
|
|
|
|
|
} |
|
723
|
|
|
|
|
|
|
|
|
724
|
0
|
|
|
|
|
0
|
my $pipe = IO::Pipe->new(); |
|
725
|
|
|
|
|
|
|
|
|
726
|
0
|
|
|
|
|
0
|
local $SIG{CHLD} = 'IGNORE'; |
|
727
|
0
|
0
|
|
|
|
0
|
defined(my $pid = fork) |
|
728
|
|
|
|
|
|
|
or $self->throw("Couldn't fork: $!"); |
|
729
|
|
|
|
|
|
|
|
|
730
|
0
|
0
|
|
|
|
0
|
unless($pid) { |
|
731
|
|
|
|
|
|
|
#CHILD |
|
732
|
0
|
|
|
|
|
0
|
$pipe->writer(); |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
#if we're running under mod_perl, clean up some things after this fork |
|
735
|
0
|
0
|
0
|
|
|
0
|
if ($ENV{MOD_PERL} and my $r = eval{Apache->request} ) { |
|
|
0
|
|
|
|
|
0
|
|
|
736
|
0
|
|
|
|
|
0
|
$r->cleanup_for_exec; |
|
737
|
|
|
|
|
|
|
#don't read or write the mod_perl parent's tied filehandles |
|
738
|
0
|
|
|
|
|
0
|
close STDIN; close STDOUT; |
|
|
0
|
|
|
|
|
0
|
|
|
739
|
0
|
0
|
|
|
|
0
|
setsid() or $self->throw('Could not detach from parent'); |
|
740
|
|
|
|
|
|
|
} |
|
741
|
|
|
|
|
|
|
} else { |
|
742
|
|
|
|
|
|
|
#PARENT |
|
743
|
0
|
|
|
|
|
0
|
$pipe->reader(); |
|
744
|
|
|
|
|
|
|
} |
|
745
|
0
|
|
|
|
|
0
|
return ( $pid, $pipe ); |
|
746
|
|
|
|
|
|
|
} |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
# send web request to specified filehandle, or stdout, for streaming purposes |
|
749
|
|
|
|
|
|
|
sub _stream_request { |
|
750
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
751
|
0
|
|
|
|
|
0
|
my $request = shift; |
|
752
|
0
|
|
0
|
|
|
0
|
my $dest_fh = shift || \*STDOUT; |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# fork so as to pipe output of fetch process through to |
|
755
|
|
|
|
|
|
|
# postprocess_data method call. |
|
756
|
0
|
|
|
|
|
0
|
my ($child,$fetch) = $self->_open_pipe(); |
|
757
|
|
|
|
|
|
|
|
|
758
|
0
|
0
|
|
|
|
0
|
if ($child) { |
|
759
|
|
|
|
|
|
|
#PARENT |
|
760
|
0
|
|
|
|
|
0
|
local ($/) = "//\n"; # assume genbank/swiss format |
|
761
|
0
|
|
|
|
|
0
|
$| = 1; |
|
762
|
0
|
|
|
|
|
0
|
my $records = 0; |
|
763
|
0
|
|
|
|
|
0
|
while (my $record = <$fetch>) { |
|
764
|
0
|
|
|
|
|
0
|
$records++; |
|
765
|
0
|
|
|
|
|
0
|
$self->postprocess_data('type' => 'string', |
|
766
|
|
|
|
|
|
|
'location' => \$record); |
|
767
|
0
|
|
|
|
|
0
|
print $dest_fh $record; |
|
768
|
|
|
|
|
|
|
} |
|
769
|
0
|
|
|
|
|
0
|
$/ = "\n"; # reset to be safe; |
|
770
|
0
|
|
|
|
|
0
|
close $dest_fh; #must explicitly close here, because the hard |
|
771
|
|
|
|
|
|
|
#exits don't cloes them for us |
|
772
|
|
|
|
|
|
|
} |
|
773
|
|
|
|
|
|
|
else { |
|
774
|
|
|
|
|
|
|
#CHILD |
|
775
|
0
|
|
|
|
|
0
|
$| = 1; |
|
776
|
|
|
|
|
|
|
my $resp = $self->ua->request($request, |
|
777
|
0
|
|
|
0
|
|
0
|
sub { print $fetch $_[0] } |
|
778
|
0
|
|
|
|
|
0
|
); |
|
779
|
0
|
0
|
|
|
|
0
|
if( $resp->is_error ) { |
|
780
|
0
|
|
|
|
|
0
|
$self->throw("WebDBSeqI Request Error:\n".$resp->as_string); |
|
781
|
|
|
|
|
|
|
} |
|
782
|
0
|
|
|
|
|
0
|
close $fetch; #must explicitly close here, because the hard exists |
|
783
|
|
|
|
|
|
|
#don't close them for us |
|
784
|
0
|
|
|
|
|
0
|
POSIX::_exit(0); |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
sub io { |
|
789
|
0
|
|
|
0
|
0
|
0
|
my ($self,$io) = @_; |
|
790
|
|
|
|
|
|
|
|
|
791
|
0
|
0
|
0
|
|
|
0
|
if(defined($io) || (! exists($self->{'_io'}))) { |
|
792
|
0
|
0
|
|
|
|
0
|
$io = Bio::Root::IO->new() unless $io; |
|
793
|
0
|
|
|
|
|
0
|
$self->{'_io'} = $io; |
|
794
|
|
|
|
|
|
|
} |
|
795
|
0
|
|
|
|
|
0
|
return $self->{'_io'}; |
|
796
|
|
|
|
|
|
|
} |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=head2 delay |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
Title : delay |
|
802
|
|
|
|
|
|
|
Usage : $secs = $self->delay([$secs]) |
|
803
|
|
|
|
|
|
|
Function: get/set number of seconds to delay between fetches |
|
804
|
|
|
|
|
|
|
Returns : number of seconds to delay |
|
805
|
|
|
|
|
|
|
Args : new value |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
NOTE: the default is to use the value specified by delay_policy(). |
|
808
|
|
|
|
|
|
|
This can be overridden by calling this method, or by passing the |
|
809
|
|
|
|
|
|
|
-delay argument to new(). |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
=cut |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
sub delay { |
|
814
|
4
|
|
|
4
|
1
|
7
|
my $self = shift; |
|
815
|
4
|
|
|
|
|
9
|
my $d = $self->{'_delay'}; |
|
816
|
4
|
100
|
|
|
|
11
|
$self->{'_delay'} = shift if @_; |
|
817
|
4
|
|
|
|
|
8
|
$d; |
|
818
|
|
|
|
|
|
|
} |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=head2 delay_policy |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
Title : delay_policy |
|
823
|
|
|
|
|
|
|
Usage : $secs = $self->delay_policy |
|
824
|
|
|
|
|
|
|
Function: return number of seconds to delay between calls to remote db |
|
825
|
|
|
|
|
|
|
Returns : number of seconds to delay |
|
826
|
|
|
|
|
|
|
Args : none |
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
NOTE: The default delay policy is 0s. Override in subclasses to |
|
829
|
|
|
|
|
|
|
implement delays. The timer has only second resolution, so the delay |
|
830
|
|
|
|
|
|
|
will actually be +/- 1s. |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
=cut |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
sub delay_policy { |
|
835
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
|
836
|
2
|
|
|
|
|
5
|
return 0; |
|
837
|
|
|
|
|
|
|
} |
|
838
|
|
|
|
|
|
|
|
|
839
|
|
|
|
|
|
|
=head2 _sleep |
|
840
|
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
Title : _sleep |
|
842
|
|
|
|
|
|
|
Usage : $self->_sleep |
|
843
|
|
|
|
|
|
|
Function: sleep for a number of seconds indicated by the delay policy |
|
844
|
|
|
|
|
|
|
Returns : none |
|
845
|
|
|
|
|
|
|
Args : none |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
NOTE: This method keeps track of the last time it was called and only |
|
848
|
|
|
|
|
|
|
imposes a sleep if it was called more recently than the delay_policy() |
|
849
|
|
|
|
|
|
|
allows. |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=cut |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
sub _sleep { |
|
854
|
1
|
|
|
1
|
|
3
|
my $self = shift; |
|
855
|
1
|
|
|
|
|
1
|
my $last_invocation = $LAST_INVOCATION_TIME; |
|
856
|
1
|
50
|
|
|
|
4
|
if (time - $LAST_INVOCATION_TIME < $self->delay) { |
|
857
|
0
|
|
|
|
|
0
|
my $delay = $self->delay - (time - $LAST_INVOCATION_TIME); |
|
858
|
0
|
0
|
|
|
|
0
|
warn "sleeping for $delay seconds\n" if $self->verbose > 0; |
|
859
|
0
|
|
|
|
|
0
|
sleep $delay; |
|
860
|
|
|
|
|
|
|
} |
|
861
|
1
|
|
|
|
|
2
|
$LAST_INVOCATION_TIME = time; |
|
862
|
|
|
|
|
|
|
} |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
=head2 mod_perl_api |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
Title : mod_perl_api |
|
867
|
|
|
|
|
|
|
Usage : $version = self->mod_perl_api |
|
868
|
|
|
|
|
|
|
Function: Returns API version of mod_perl being used based on set env. variables |
|
869
|
|
|
|
|
|
|
Returns : mod_perl API version; if mod_perl isn't loaded, returns 0 |
|
870
|
|
|
|
|
|
|
Args : none |
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
=cut |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
sub mod_perl_api { |
|
875
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
876
|
|
|
|
|
|
|
my $v = $ENV{MOD_PERL} ? |
|
877
|
0
|
0
|
0
|
|
|
|
( exists $ENV{MOD_PERL_API_VERSION} && $ENV{MOD_PERL_API_VERSION} >= 2 ) ? |
|
|
|
0
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
2 : |
|
879
|
|
|
|
|
|
|
1 |
|
880
|
|
|
|
|
|
|
: 0; |
|
881
|
0
|
|
|
|
|
|
return $v; |
|
882
|
|
|
|
|
|
|
} |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
1; |