File Coverage

Bio/DB/WebDBSeqI.pm
Criterion Covered Total %
statement 94 298 31.5
branch 18 138 13.0
condition 3 71 4.2
subroutine 24 46 52.1
pod 24 26 92.3
total 163 579 28.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::DB::WebDBSeqI
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13             #
14              
15             =head1 NAME
16              
17             Bio::DB::WebDBSeqI - Object Interface to generalize Web Databases
18             for retrieving sequences
19              
20             =head1 SYNOPSIS
21              
22             # get a WebDBSeqI object somehow
23             # assuming it is a nucleotide db
24             my $seq = $db->get_Seq_by_id('ROA1_HUMAN')
25              
26             =head1 DESCRIPTION
27              
28             Provides core set of functionality for connecting to a web based
29             database for retriving sequences.
30              
31             Users wishing to add another Web Based Sequence Dabatase will need to
32             extend this class (see L or L for
33             examples) and implement the get_request method which returns a
34             HTTP::Request for the specified uids (accessions, ids, etc depending
35             on what query types the database accepts).
36              
37             =head1 FEEDBACK
38              
39             =head2 Mailing Lists
40              
41             User feedback is an integral part of the
42             evolution of this and other Bioperl modules. Send
43             your comments and suggestions preferably to one
44             of the Bioperl mailing lists. Your participation
45             is much appreciated.
46              
47             bioperl-l@bioperl.org - General discussion
48             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
49              
50             =head2 Support
51              
52             Please direct usage questions or support issues to the mailing list:
53              
54             I
55              
56             rather than to the module maintainer directly. Many experienced and
57             reponsive experts will be able look at the problem and quickly
58             address it. Please include a thorough description of the problem
59             with code and data examples if at all possible.
60              
61             =head2 Reporting Bugs
62              
63             Report bugs to the Bioperl bug tracking system to
64             help us keep track the bugs and their resolution.
65             Bug reports can be submitted via the web.
66              
67             https://github.com/bioperl/bioperl-live/issues
68              
69             =head1 AUTHOR - Jason Stajich
70              
71             Email E jason@bioperl.org E
72              
73             =head1 APPENDIX
74              
75             The rest of the documentation details each of the
76             object methods. Internal methods are usually
77             preceded with a _
78              
79             =cut
80              
81             # Let the code begin...
82              
83             package Bio::DB::WebDBSeqI;
84 7     7   76 use strict;
  7         11  
  7         187  
85 7         375 use vars qw($MODVERSION %RETRIEVAL_TYPES $DEFAULT_RETRIEVAL_TYPE
86 7     7   21 $DEFAULTFORMAT $LAST_INVOCATION_TIME @ATTRIBUTES);
  7         8  
87              
88 7     7   1201 use Bio::SeqIO;
  7         9  
  7         167  
89 7     7   26 use Bio::Root::IO;
  7         8  
  7         102  
90 7     7   1570 use LWP::UserAgent;
  7         42423  
  7         151  
91 7     7   2945 use POSIX 'setsid';
  7         31783  
  7         32  
92 7     7   5777 use HTTP::Request::Common;
  7         8  
  7         315  
93 7     7   26 use HTTP::Response;
  7         7  
  7         163  
94 7     7   25 use File::Spec;
  7         10  
  7         173  
95 7     7   2838 use IO::Pipe;
  7         5787  
  7         162  
96 7     7   1227 use IO::String;
  7         5095  
  7         137  
97 7     7   30 use Bio::Root::Root;
  7         10  
  7         133  
98              
99 7     7   21 use base qw(Bio::DB::RandomAccessI);
  7         8  
  7         1707  
100              
101             BEGIN {
102 7     7   14 $MODVERSION = '0.8';
103 7         23 %RETRIEVAL_TYPES = ('io_string' => 1,
104             'tempfile' => 1,
105             'pipeline' => 1,
106             );
107 7         8 $DEFAULT_RETRIEVAL_TYPE = 'pipeline';
108 7         7 $DEFAULTFORMAT = 'fasta';
109 7         15128 $LAST_INVOCATION_TIME = 0;
110             }
111              
112             sub new {
113 3     3 1 6 my ($class, @args) = @_;
114 3         22 my $self = $class->SUPER::new(@args);
115 3         28 my ($baseaddress, $params, $ret_type, $format,$delay,$db) =
116             $self->_rearrange([qw(BASEADDRESS PARAMS RETRIEVALTYPE FORMAT DELAY DB)],
117             @args);
118              
119 3 50       12 $ret_type = $DEFAULT_RETRIEVAL_TYPE unless ( $ret_type);
120 3 50       8 $baseaddress && $self->url_base_address($baseaddress);
121 3 50       6 $params && $self->url_params($params);
122 3 50       7 $db && $self->db($db);
123 3 50       22 $ret_type && $self->retrieval_type($ret_type);
124 3 50       14 $delay = $self->delay_policy unless defined $delay;
125 3         12 $self->delay($delay);
126              
127              
128             # insure we always have a default format set for retrieval
129             # even though this will be immedietly overwritten by most sub classes
130 3 50 33     17 $format = $self->default_format unless ( defined $format &&
131             $format ne '' );
132              
133 3         13 $self->request_format($format);
134 3         20 my $ua = LWP::UserAgent->new(env_proxy => 1);
135 3         22638 $ua->agent(ref($self) ."/$MODVERSION");
136 3         125 $self->ua($ua);
137 3         6 $self->{'_authentication'} = [];
138 3         8 return $self;
139             }
140              
141             # from Bio::DB::RandomAccessI
142              
143             =head2 get_Seq_by_id
144              
145             Title : get_Seq_by_id
146             Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
147             Function: Gets a Bio::Seq object by its name
148             Returns : a Bio::Seq object
149             Args : the id (as a string) of a sequence
150             Throws : "id does not exist" exception
151              
152              
153             =cut
154              
155             sub get_Seq_by_id {
156 0     0 1 0 my ($self,$seqid) = @_;
157 0         0 $self->_sleep;
158 0         0 my $seqio = $self->get_Stream_by_id([$seqid]);
159 0 0       0 $self->throw("id does not exist") if( !defined $seqio ) ;
160 0 0 0     0 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
      0        
161 0         0 $self->warn("When complexity is set to 0, use get_Stream_by_id\n".
162             "Returning Bio::SeqIO object");
163 0         0 return $seqio;
164             }
165 0         0 my @seqs;
166 0         0 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  0         0  
167              
168             # Since $seqio will not be used anymore, explicitly close its filehandle
169             # or it will cause trouble later on cleanup
170 0         0 $seqio->close;
171              
172 0 0       0 $self->throw("id '$seqid' does not exist") unless @seqs;
173 0 0       0 if( wantarray ) { return @seqs } else { return shift @seqs }
  0         0  
  0         0  
174             }
175              
176             =head2 get_Seq_by_acc
177              
178             Title : get_Seq_by_acc
179             Usage : $seq = $db->get_Seq_by_acc('X77802');
180             Function: Gets a Bio::Seq object by accession number
181             Returns : A Bio::Seq object
182             Args : accession number (as a string)
183             Throws : "acc does not exist" exception
184              
185             =cut
186              
187             sub get_Seq_by_acc {
188 1     1 1 5 my ($self,$seqid) = @_;
189 1         5 $self->_sleep;
190 1         8 my $seqio = $self->get_Stream_by_acc($seqid);
191 0 0       0 $self->throw("acc '$seqid' does not exist") if( ! defined $seqio );
192 0 0 0     0 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
      0        
193 0         0 $self->warn("When complexity is set to 0, use get_Stream_by_acc\n".
194             "Returning Bio::SeqIO object");
195 0         0 return $seqio;
196             }
197 0         0 my @seqs;
198 0         0 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  0         0  
199 0 0       0 $self->throw("acc $seqid does not exist") unless @seqs;
200 0 0       0 if( wantarray ) { return @seqs } else { return shift @seqs }
  0         0  
  0         0  
201             }
202              
203              
204             =head2 get_Seq_by_gi
205              
206             Title : get_Seq_by_gi
207             Usage : $seq = $db->get_Seq_by_gi('405830');
208             Function: Gets a Bio::Seq object by gi number
209             Returns : A Bio::Seq object
210             Args : gi number (as a string)
211             Throws : "gi does not exist" exception
212              
213             =cut
214              
215             sub get_Seq_by_gi {
216 0     0 1 0 my ($self,$seqid) = @_;
217 0         0 $self->_sleep;
218 0         0 my $seqio = $self->get_Stream_by_gi($seqid);
219 0 0       0 $self->throw("gi does not exist") if( !defined $seqio );
220 0 0 0     0 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
      0        
221 0         0 $self->warn("When complexity is set to 0, use get_Stream_by_gi\n".
222             "Returning Bio::SeqIO object");
223 0         0 return $seqio;
224             }
225 0         0 my @seqs;
226 0         0 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  0         0  
227 0 0       0 $self->throw("gi does not exist") unless @seqs;
228 0 0       0 if( wantarray ) { return @seqs } else { return shift @seqs }
  0         0  
  0         0  
229             }
230              
231             =head2 get_Seq_by_version
232              
233             Title : get_Seq_by_version
234             Usage : $seq = $db->get_Seq_by_version('X77802.1');
235             Function: Gets a Bio::Seq object by sequence version
236             Returns : A Bio::Seq object
237             Args : accession.version (as a string)
238             Throws : "acc.version does not exist" exception
239              
240             =cut
241              
242             sub get_Seq_by_version {
243 0     0 1 0 my ($self,$seqid) = @_;
244 0         0 $self->_sleep;
245 0         0 my $seqio = $self->get_Stream_by_version($seqid);
246 0 0       0 $self->throw("accession.version does not exist") if( !defined $seqio );
247 0 0 0     0 if ($self->can('complexity') && defined $self->complexity && $self->complexity==0) {
      0        
248 0         0 $self->warn("When complexity is set to 0, use get_Stream_by_version\n".
249             "Returning Bio::SeqIO object");
250 0         0 return $seqio;
251             }
252 0         0 my @seqs;
253 0         0 while( my $seq = $seqio->next_seq() ) { push @seqs, $seq; }
  0         0  
254 0 0       0 $self->throw("accession.version does not exist") unless @seqs;
255 0 0       0 if( wantarray ) { return @seqs } else { return shift @seqs }
  0         0  
  0         0  
256             }
257              
258             # implementing class must define these
259              
260             =head2 get_request
261              
262             Title : get_request
263             Usage : my $url = $self->get_request
264             Function: returns a HTTP::Request object
265             Returns :
266             Args : %qualifiers = a hash of qualifiers (ids, format, etc)
267              
268             =cut
269              
270             sub get_request {
271 0     0 1 0 my ($self) = @_;
272 0         0 my $msg = "Implementing class must define method get_request in class WebDBSeqI";
273 0         0 $self->throw($msg);
274             }
275              
276             # class methods
277              
278             =head2 get_Stream_by_id
279              
280             Title : get_Stream_by_id
281             Usage : $stream = $db->get_Stream_by_id( [$uid1, $uid2] );
282             Function: Gets a series of Seq objects by unique identifiers
283             Returns : a Bio::SeqIO stream object
284             Args : $ref : a reference to an array of unique identifiers for
285             the desired sequence entries
286              
287              
288             =cut
289              
290             sub get_Stream_by_id {
291 0     0 1 0 my ($self, $ids) = @_;
292 0         0 my ($webfmt,$localfmt) = $self->request_format;
293 0         0 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single',
294             '-format' => $webfmt);
295             }
296              
297             *get_Stream_by_batch = sub {
298 0     0   0 my $self = shift;
299 0         0 $self->deprecated('get_Stream_by_batch() is deprecated; use get_Stream_by_id() instead');
300 0         0 $self->get_Stream_by_id(@_)
301             };
302              
303              
304             =head2 get_Stream_by_acc
305              
306             Title : get_Stream_by_acc
307             Usage : $seq = $db->get_Stream_by_acc([$acc1, $acc2]);
308             Function: Gets a series of Seq objects by accession numbers
309             Returns : a Bio::SeqIO stream object
310             Args : $ref : a reference to an array of accession numbers for
311             the desired sequence entries
312             Note : For GenBank, this just calls the same code for get_Stream_by_id()
313              
314             =cut
315              
316             sub get_Stream_by_acc {
317 0     0 1 0 my ($self, $ids ) = @_;
318 0         0 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'single');
319             }
320              
321              
322             =head2 get_Stream_by_gi
323              
324             Title : get_Stream_by_gi
325             Usage : $seq = $db->get_Stream_by_gi([$gi1, $gi2]);
326             Function: Gets a series of Seq objects by gi numbers
327             Returns : a Bio::SeqIO stream object
328             Args : $ref : a reference to an array of gi numbers for
329             the desired sequence entries
330             Note : For GenBank, this just calls the same code for get_Stream_by_id()
331              
332             =cut
333              
334             sub get_Stream_by_gi {
335 0     0 1 0 my ($self, $ids ) = @_;
336 0         0 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'gi');
337             }
338              
339             =head2 get_Stream_by_version
340              
341             Title : get_Stream_by_version
342             Usage : $seq = $db->get_Stream_by_version([$version1, $version2]);
343             Function: Gets a series of Seq objects by accession.versions
344             Returns : a Bio::SeqIO stream object
345             Args : $ref : a reference to an array of accession.version strings for
346             the desired sequence entries
347             Note : For GenBank, this is implemeted in NCBIHelper
348              
349             =cut
350              
351             sub get_Stream_by_version {
352 0     0 1 0 my ($self, $ids ) = @_;
353             # $self->throw("Implementing class should define this method!");
354 0         0 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'version'); # how it should work
355             }
356              
357             =head2 get_Stream_by_query
358              
359             Title : get_Stream_by_query
360             Usage : $stream = $db->get_Stream_by_query($query);
361             Function: Gets a series of Seq objects by way of a query string or oject
362             Returns : a Bio::SeqIO stream object
363             Args : $query : A string that uses the appropriate query language
364             for the database or a Bio::DB::QueryI object. It is suggested
365             that you create the Bio::DB::Query object first and interrogate
366             it for the entry count before you fetch a potentially large stream.
367              
368             =cut
369              
370             sub get_Stream_by_query {
371 0     0 1 0 my ($self, $query ) = @_;
372 0         0 return $self->get_seq_stream('-query' => $query, '-mode'=>'query');
373             }
374              
375             =head2 default_format
376              
377             Title : default_format
378             Usage : my $format = $self->default_format
379             Function: Returns default sequence format for this module
380             Returns : string
381             Args : none
382              
383             =cut
384              
385             sub default_format {
386 1     1 1 3 return $DEFAULTFORMAT;
387             }
388              
389             # sorry, but this is hacked in because of BioFetch problems...
390             sub db {
391 0     0 0 0 my $self = shift;
392 0         0 my $d = $self->{_db};
393 0 0       0 $self->{_db} = shift if @_;
394 0         0 $d;
395             }
396              
397             =head2 request_format
398              
399             Title : request_format
400             Usage : my ($req_format, $ioformat) = $self->request_format;
401             $self->request_format("genbank");
402             $self->request_format("fasta");
403             Function: Get/Set sequence format retrieval. The get-form will normally not
404             be used outside of this and derived modules.
405             Returns : Array of two strings, the first representing the format for
406             retrieval, and the second specifying the corresponding SeqIO format.
407             Args : $format = sequence format
408              
409             =cut
410              
411             sub request_format {
412 5     5 1 7 my ($self, $value) = @_;
413              
414 5 100       9 if( defined $value ) {
415 2         5 $self->{'_format'} = [ $value, $value];
416             }
417 5         4 return @{$self->{'_format'}};
  5         13  
418             }
419              
420             =head2 get_seq_stream
421              
422             Title : get_seq_stream
423             Usage : my $seqio = $self->get_seq_stream(%qualifiers)
424             Function: builds a url and queries a web db
425             Returns : a Bio::SeqIO stream capable of producing sequence
426             Args : %qualifiers = a hash qualifiers that the implementing class
427             will process to make a url suitable for web querying
428              
429             =cut
430              
431             sub get_seq_stream {
432 0     0 1 0 my ($self, %qualifiers) = @_;
433 0         0 my ($rformat, $ioformat) = $self->request_format();
434 0         0 my $seen = 0;
435 0         0 foreach my $key ( keys %qualifiers ) {
436 0 0       0 if( $key =~ /format/i ) {
437 0         0 $rformat = $qualifiers{$key};
438 0         0 $seen = 1;
439             }
440             }
441 0 0       0 $qualifiers{'-format'} = $rformat if( !$seen);
442 0         0 ($rformat, $ioformat) = $self->request_format($rformat);
443             # These parameters are implemented for Bio::DB::GenBank objects only
444 0 0       0 if($self->isa('Bio::DB::GenBank')) {
445 0 0       0 $self->seq_start() && ($qualifiers{'-seq_start'} = $self->seq_start());
446 0 0       0 $self->seq_stop() && ($qualifiers{'-seq_stop'} = $self->seq_stop());
447 0 0       0 $self->strand() && ($qualifiers{'-strand'} = $self->strand());
448 0 0       0 defined $self->complexity() && ($qualifiers{'-complexity'} = $self->complexity());
449             }
450 0         0 my $request = $self->get_request(%qualifiers);
451 0 0       0 $request->proxy_authorization_basic($self->authentication)
452             if ( $self->authentication);
453 0         0 $self->debug("request is ". $request->as_string(). "\n");
454              
455             # workaround for MSWin systems
456 0 0 0     0 $self->retrieval_type('io_string') if $self->retrieval_type =~ /pipeline/ && $^O =~ /^MSWin/;
457              
458 0 0       0 if ($self->retrieval_type =~ /pipeline/) {
459             # Try to create a stream using POSIX fork-and-pipe facility.
460             # this is a *big* win when fetching thousands of sequences from
461             # a web database because we can return the first entry while
462             # transmission is still in progress.
463             # Also, no need to keep sequence in memory or in a temporary file.
464             # If this fails (Windows, MacOS 9), we fall back to non-pipelined access.
465              
466             # fork and pipe: _stream_request()=>
467 0         0 my ($result,$stream) = $self->_open_pipe();
468              
469 0 0       0 if (defined $result) {
470 0         0 $DB::fork_TTY = File::Spec->devnull; # prevents complaints from debugger
471 0 0       0 if (!$result) { # in child process
472 0         0 $self->_stream_request($request,$stream);
473 0         0 POSIX::_exit(0); #prevent END blocks from executing in this forked child
474             }
475             else {
476 0         0 return Bio::SeqIO->new('-verbose' => $self->verbose,
477             '-format' => $ioformat,
478             '-fh' => $stream);
479             }
480             }
481             else {
482 0         0 $self->retrieval_type('io_string');
483             }
484             }
485              
486 0 0       0 if ($self->retrieval_type =~ /temp/i) {
487 0         0 my $dir = $self->io->tempdir( CLEANUP => 1);
488 0         0 my ( $fh, $tmpfile) = $self->io()->tempfile( DIR => $dir );
489 0         0 close $fh;
490 0         0 my $resp = $self->_request($request, $tmpfile);
491 0 0 0     0 if( ! -e $tmpfile || -z $tmpfile || ! $resp->is_success() ) {
      0        
492 0         0 $self->throw("WebDBSeqI Error - check query sequences!\n");
493             }
494 0         0 $self->postprocess_data('type' => 'file',
495             'location' => $tmpfile);
496             # this may get reset when requesting batch mode
497 0         0 ($rformat,$ioformat) = $self->request_format();
498 0 0       0 if( $self->verbose > 0 ) {
499 0 0       0 open my $ERR, '<', $tmpfile or $self->throw("Could not read file '$tmpfile': $!");
500 0         0 while(<$ERR>) { $self->debug($_);}
  0         0  
501 0         0 close $ERR;
502             }
503              
504 0         0 return Bio::SeqIO->new('-verbose' => $self->verbose,
505             '-format' => $ioformat,
506             '-file' => $tmpfile);
507             }
508              
509 0 0       0 if ($self->retrieval_type =~ /io_string/i ) {
510 0         0 my $resp = $self->_request($request);
511 0         0 my $content = $resp->content_ref;
512 0         0 $self->debug( "content is $$content\n");
513 0 0 0     0 if (!$resp->is_success() || length($$content) == 0) {
514 0         0 $self->throw("WebDBSeqI Error - check query sequences!\n");
515             }
516 0         0 ($rformat,$ioformat) = $self->request_format();
517 0         0 $self->postprocess_data('type'=> 'string',
518             'location' => $content);
519 0         0 $self->debug( "str is $$content\n");
520 0         0 return Bio::SeqIO->new('-verbose' => $self->verbose,
521             '-format' => $ioformat,
522             '-fh' => new IO::String($$content));
523             }
524              
525             # if we got here, we don't know how to handle the retrieval type
526 0         0 $self->throw("retrieval type " . $self->retrieval_type .
527             " unsupported\n");
528             }
529              
530             =head2 url_base_address
531              
532             Title : url_base_address
533             Usage : my $address = $self->url_base_address or
534             $self->url_base_address($address)
535             Function: Get/Set the base URL for the Web Database
536             Returns : Base URL for the Web Database
537             Args : $address - URL for the WebDatabase
538              
539             =cut
540              
541             sub url_base_address {
542 3     3 1 4 my $self = shift;
543 3         2 my $d = $self->{'_baseaddress'};
544 3 100       6 $self->{'_baseaddress'} = shift if @_;
545 3         8 $d;
546             }
547              
548              
549             =head2 proxy
550              
551             Title : proxy
552             Usage : $httpproxy = $db->proxy('http') or
553             $db->proxy(['http','ftp'], 'http://myproxy' )
554             Function: Get/Set a proxy for use of proxy
555             Returns : a string indicating the proxy
556             Args : $protocol : an array ref of the protocol(s) to set/get
557             $proxyurl : url of the proxy to use for the specified protocol
558             $username : username (if proxy requires authentication)
559             $password : password (if proxy requires authentication)
560              
561             =cut
562              
563             sub proxy {
564 0     0 1 0 my ($self,$protocol,$proxy,$username,$password) = @_;
565 0 0 0     0 return if ( !defined $self->ua || !defined $protocol
      0        
566             || !defined $proxy );
567 0 0 0     0 $self->authentication($username, $password)
568             if ($username && $password);
569 0         0 return $self->ua->proxy($protocol,$proxy);
570             }
571              
572             =head2 authentication
573              
574             Title : authentication
575             Usage : $db->authentication($user,$pass)
576             Function: Get/Set authentication credentials
577             Returns : Array of user/pass
578             Args : Array or user/pass
579              
580              
581             =cut
582              
583             sub authentication{
584 0     0 1 0 my ($self,$u,$p) = @_;
585              
586 0 0 0     0 if( defined $u && defined $p ) {
587 0         0 $self->{'_authentication'} = [ $u,$p];
588             }
589 0         0 return @{$self->{'_authentication'}};
  0         0  
590             }
591              
592              
593             =head2 retrieval_type
594              
595             Title : retrieval_type
596             Usage : $self->retrieval_type($type);
597             my $type = $self->retrieval_type
598             Function: Get/Set a proxy for retrieval_type (pipeline, io_string or tempfile)
599             Returns : string representing retrieval type
600             Args : $value - the value to store
601              
602             This setting affects how the data stream from the remote web server is
603             processed and passed to the Bio::SeqIO layer. Three types of retrieval
604             types are currently allowed:
605              
606             pipeline Perform a fork in an attempt to begin streaming
607             while the data is still downloading from the remote
608             server. Disk, memory and speed efficient, but will
609             not work on Windows or MacOS 9 platforms.
610              
611             io_string Store downloaded database entry(s) in memory. Can be
612             problematic for batch downloads because entire set
613             of entries must fit in memory. Alll entries must be
614             downloaded before processing can begin.
615              
616             tempfile Store downloaded database entry(s) in a temporary file.
617             All entries must be downloaded before processing can
618             begin.
619              
620             The default is pipeline, with automatic fallback to io_string if
621             pipelining is not available.
622              
623             =cut
624              
625             sub retrieval_type {
626 3     3 1 4 my ($self, $value) = @_;
627 3 50       8 if( defined $value ) {
628 3         5 $value = lc $value;
629 3 50       9 if( ! $RETRIEVAL_TYPES{$value} ) {
630 0         0 $self->warn("invalid retrieval type $value must be one of (" .
631             join(",", keys %RETRIEVAL_TYPES), ")");
632 0         0 $value = $DEFAULT_RETRIEVAL_TYPE;
633             }
634 3         9 $self->{'_retrieval_type'} = $value;
635             }
636 3         5 return $self->{'_retrieval_type'};
637             }
638              
639             =head2 url_params
640              
641             Title : url_params
642             Usage : my $params = $self->url_params or
643             $self->url_params($params)
644             Function: Get/Set the URL parameters for the Web Database
645             Returns : url parameters for Web Database
646             Args : $params - parameters to be appended to the URL for the WebDatabase
647              
648             =cut
649              
650             sub url_params {
651 0     0 1 0 my ($self, $value) = @_;
652 0 0       0 if( defined $value ) {
653 0         0 $self->{'_urlparams'} = $value;
654             }
655             }
656              
657             =head2 ua
658              
659             Title : ua
660             Usage : my $ua = $self->ua or
661             $self->ua($ua)
662             Function: Get/Set a LWP::UserAgent for use
663             Returns : reference to LWP::UserAgent Object
664             Args : $ua - must be a LWP::UserAgent
665              
666             =cut
667              
668             sub ua {
669 4     4 1 7 my ($self, $ua) = @_;
670 4 100 66     29 if( defined $ua && $ua->isa("LWP::UserAgent") ) {
671 3         6 $self->{'_ua'} = $ua;
672             }
673 4         7 return $self->{'_ua'};
674             }
675              
676             =head2 postprocess_data
677              
678             Title : postprocess_data
679             Usage : $self->postprocess_data ( 'type' => 'string',
680             'location' => \$datastr);
681             Function: process downloaded data before loading into a Bio::SeqIO
682             Returns : void
683             Args : hash with two keys - 'type' can be 'string' or 'file'
684             - 'location' either file location or string
685             reference containing data
686              
687             =cut
688              
689             sub postprocess_data {
690 0     0 1 0 my ( $self, %args) = @_;
691 0         0 return;
692             }
693              
694             # private methods
695             sub _request {
696 0     0   0 my ($self, $url,$tmpfile) = @_;
697 0         0 my ($resp);
698 0 0 0     0 if( defined $tmpfile && $tmpfile ne '' ) {
699 0         0 $resp = $self->ua->request($url, $tmpfile);
700             } else {
701 0         0 $resp = $self->ua->request($url);
702             }
703              
704 0 0       0 if( $resp->is_error ) {
705 0         0 $self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
706             }
707 0         0 return $resp;
708             }
709              
710             #mod_perl-safe replacement for the open(BLEH,'-|') call. if running
711             #under mod_perl, detects it and closes the child's STDIN and STDOUT
712             #handles
713             sub _open_pipe {
714 0     0   0 my ($self) = @_;
715             # is mod_perl running? Which API?
716 0         0 my $mp = $self->mod_perl_api;
717 0 0 0     0 if($mp and ! our $loaded_apache_sp) {
718 0 0       0 my $load_api = ($mp == 1) ? 'use Apache::SubProcess': 'use Apache2::SubProcess';
719 0         0 eval $load_api;
720 0 0       0 $@ and $self->throw("$@\n$load_api module required for running under mod_perl");
721 0         0 $loaded_apache_sp = 1;
722             }
723              
724 0         0 my $pipe = IO::Pipe->new();
725              
726 0         0 local $SIG{CHLD} = 'IGNORE';
727 0 0       0 defined(my $pid = fork)
728             or $self->throw("Couldn't fork: $!");
729              
730 0 0       0 unless($pid) {
731             #CHILD
732 0         0 $pipe->writer();
733              
734             #if we're running under mod_perl, clean up some things after this fork
735 0 0 0     0 if ($ENV{MOD_PERL} and my $r = eval{Apache->request} ) {
  0         0  
736 0         0 $r->cleanup_for_exec;
737             #don't read or write the mod_perl parent's tied filehandles
738 0         0 close STDIN; close STDOUT;
  0         0  
739 0 0       0 setsid() or $self->throw('Could not detach from parent');
740             }
741             } else {
742             #PARENT
743 0         0 $pipe->reader();
744             }
745 0         0 return ( $pid, $pipe );
746             }
747              
748             # send web request to specified filehandle, or stdout, for streaming purposes
749             sub _stream_request {
750 0     0   0 my $self = shift;
751 0         0 my $request = shift;
752 0   0     0 my $dest_fh = shift || \*STDOUT;
753              
754             # fork so as to pipe output of fetch process through to
755             # postprocess_data method call.
756 0         0 my ($child,$fetch) = $self->_open_pipe();
757              
758 0 0       0 if ($child) {
759             #PARENT
760 0         0 local ($/) = "//\n"; # assume genbank/swiss format
761 0         0 $| = 1;
762 0         0 my $records = 0;
763 0         0 while (my $record = <$fetch>) {
764 0         0 $records++;
765 0         0 $self->postprocess_data('type' => 'string',
766             'location' => \$record);
767 0         0 print $dest_fh $record;
768             }
769 0         0 $/ = "\n"; # reset to be safe;
770 0         0 close $dest_fh; #must explicitly close here, because the hard
771             #exits don't cloes them for us
772             }
773             else {
774             #CHILD
775 0         0 $| = 1;
776             my $resp = $self->ua->request($request,
777 0     0   0 sub { print $fetch $_[0] }
778 0         0 );
779 0 0       0 if( $resp->is_error ) {
780 0         0 $self->throw("WebDBSeqI Request Error:\n".$resp->as_string);
781             }
782 0         0 close $fetch; #must explicitly close here, because the hard exists
783             #don't close them for us
784 0         0 POSIX::_exit(0);
785             }
786             }
787              
788             sub io {
789 0     0 0 0 my ($self,$io) = @_;
790              
791 0 0 0     0 if(defined($io) || (! exists($self->{'_io'}))) {
792 0 0       0 $io = Bio::Root::IO->new() unless $io;
793 0         0 $self->{'_io'} = $io;
794             }
795 0         0 return $self->{'_io'};
796             }
797              
798              
799             =head2 delay
800              
801             Title : delay
802             Usage : $secs = $self->delay([$secs])
803             Function: get/set number of seconds to delay between fetches
804             Returns : number of seconds to delay
805             Args : new value
806              
807             NOTE: the default is to use the value specified by delay_policy().
808             This can be overridden by calling this method, or by passing the
809             -delay argument to new().
810              
811             =cut
812              
813             sub delay {
814 4     4 1 3 my $self = shift;
815 4         6 my $d = $self->{'_delay'};
816 4 100       10 $self->{'_delay'} = shift if @_;
817 4         6 $d;
818             }
819              
820             =head2 delay_policy
821              
822             Title : delay_policy
823             Usage : $secs = $self->delay_policy
824             Function: return number of seconds to delay between calls to remote db
825             Returns : number of seconds to delay
826             Args : none
827              
828             NOTE: The default delay policy is 0s. Override in subclasses to
829             implement delays. The timer has only second resolution, so the delay
830             will actually be +/- 1s.
831              
832             =cut
833              
834             sub delay_policy {
835 2     2 1 3 my $self = shift;
836 2         3 return 0;
837             }
838              
839             =head2 _sleep
840              
841             Title : _sleep
842             Usage : $self->_sleep
843             Function: sleep for a number of seconds indicated by the delay policy
844             Returns : none
845             Args : none
846              
847             NOTE: This method keeps track of the last time it was called and only
848             imposes a sleep if it was called more recently than the delay_policy()
849             allows.
850              
851             =cut
852              
853             sub _sleep {
854 1     1   2 my $self = shift;
855 1         1 my $last_invocation = $LAST_INVOCATION_TIME;
856 1 50       4 if (time - $LAST_INVOCATION_TIME < $self->delay) {
857 0         0 my $delay = $self->delay - (time - $LAST_INVOCATION_TIME);
858 0 0       0 warn "sleeping for $delay seconds\n" if $self->verbose > 0;
859 0         0 sleep $delay;
860             }
861 1         2 $LAST_INVOCATION_TIME = time;
862             }
863              
864             =head2 mod_perl_api
865              
866             Title : mod_perl_api
867             Usage : $version = self->mod_perl_api
868             Function: Returns API version of mod_perl being used based on set env. variables
869             Returns : mod_perl API version; if mod_perl isn't loaded, returns 0
870             Args : none
871              
872             =cut
873              
874             sub mod_perl_api {
875 0     0 1   my $self = shift;
876             my $v = $ENV{MOD_PERL} ?
877 0 0 0       ( exists $ENV{MOD_PERL_API_VERSION} && $ENV{MOD_PERL_API_VERSION} >= 2 ) ?
    0          
878             2 :
879             1
880             : 0;
881 0           return $v;
882             }
883              
884             1;