File Coverage

Bio/DB/DBFetch.pm
Criterion Covered Total %
statement 31 96 32.2
branch 6 46 13.0
condition 1 15 6.6
subroutine 9 16 56.2
pod 11 11 100.0
total 58 184 31.5


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::DB::DBFetch
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::DB::DBFetch - Database object for retrieving using the dbfetch script
17              
18             =head1 SYNOPSIS
19              
20             #do not use this module directly
21              
22             =head1 DESCRIPTION
23              
24             Allows the dynamic retrieval of entries from databases using the
25             dbfetch script at EBI:
26             LEwww.ebi.ac.ukEcgi-binEdbfetch>.
27              
28             In order to make changes transparent we have host type (currently only
29             ebi) and location (defaults to ebi) separated out. This allows later
30             additions of more servers in different geographical locations.
31              
32             This is a superclass which is called by instantiable subclasses with
33             correct parameters.
34              
35             =head1 FEEDBACK
36              
37             =head2 Mailing Lists
38              
39             User feedback is an integral part of the evolution of this and other
40             Bioperl modules. Send your comments and suggestions preferably to one
41             of the Bioperl mailing lists. Your participation is much appreciated.
42              
43             bioperl-l@bioperl.org - General discussion
44             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
45              
46             =head2 Support
47              
48             Please direct usage questions or support issues to the mailing list:
49              
50             I
51              
52             rather than to the module maintainer directly. Many experienced and
53             reponsive experts will be able look at the problem and quickly
54             address it. Please include a thorough description of the problem
55             with code and data examples if at all possible.
56              
57             =head2 Reporting Bugs
58              
59             Report bugs to the Bioperl bug tracking system to help us keep track
60             the bugs and their resolution. Bug reports can be submitted via the
61             web:
62              
63             https://github.com/bioperl/bioperl-live/issues
64              
65             =head1 AUTHOR - Heikki Lehvaslaiho
66              
67             Email Heikki Lehvaslaiho Eheikki-at-bioperl-dot-orgE
68              
69             =head1 APPENDIX
70              
71             The rest of the documentation details each of the object
72             methods. Internal methods are usually preceded with a _
73              
74             =cut
75              
76             # Let the code begin...
77              
78             package Bio::DB::DBFetch;
79 5     5   22 use strict;
  5         6  
  5         198  
80 5         241 use vars qw($MODVERSION $DEFAULTFORMAT $DEFAULTLOCATION
81 5     5   14 $DEFAULTSERVERTYPE);
  5         6  
82              
83             $MODVERSION = '0.1';
84 5     5   1363 use HTTP::Request::Common;
  5         51428  
  5         286  
85              
86 5     5   23 use base qw(Bio::DB::WebDBSeqI);
  5         7  
  5         1911  
87              
88             # the new way to make modules a little more lightweight
89              
90             BEGIN {
91             # global vars
92 5     5   8 $DEFAULTSERVERTYPE = 'dbfetch';
93 5         3781 $DEFAULTLOCATION = 'ebi';
94             }
95              
96              
97             =head1 Routines from Bio::DB::WebDBSeqI
98              
99             =head2 get_request
100              
101             Title : get_request
102             Usage : my $url = $self->get_request
103             Function: returns a HTTP::Request object
104             Returns :
105             Args : %qualifiers = a hash of qualifiers (ids, format, etc)
106              
107             =cut
108              
109             sub get_request {
110 0     0 1 0 my ($self, @qualifiers) = @_;
111 0         0 my ($uids, $format) = $self->_rearrange([qw(UIDS FORMAT)],
112             @qualifiers);
113              
114 0 0       0 $self->throw("Must specify a value for UIDs to fetch")
115             unless defined $uids;
116 0         0 my $tmp;
117 0         0 my $format_string = '';
118 0   0     0 $format ||= $self->default_format;
119 0         0 ($format, $tmp) = $self->request_format($format);
120 0         0 $format_string = "&format=$format";
121 0         0 my $url = $self->location_url();
122 0         0 my $uid;
123 0 0       0 if( ref($uids) =~ /ARRAY/i ) {
124 0         0 $uid = join (',', @$uids);
125 0 0       0 $self->warn ('The server will accept maximum of 50 entries in a request. The rest are ignored.')
126             if scalar @$uids >50;
127             } else {
128 0         0 $uid = $uids;
129             }
130              
131 0         0 return GET $url. $format_string. '&id='. $uid;
132             }
133              
134              
135             =head2 postprocess_data
136              
137             Title : postprocess_data
138             Usage : $self->postprocess_data ( 'type' => 'string',
139             'location' => \$datastr);
140             Function: process downloaded data before loading into a Bio::SeqIO
141             Returns : void
142             Args : hash with two keys - 'type' can be 'string' or 'file'
143             - 'location' either file location or string
144             reference containing data
145              
146             =cut
147              
148             # remove occasional blank lines at top of web output
149             sub postprocess_data {
150 0     0 1 0 my ($self, %args) = @_;
151 0 0       0 if ($args{type} eq 'string') {
    0          
152 0         0 ${$args{location}} =~ s/^\s+//; # get rid of leading whitespace
  0         0  
153             }
154             elsif ($args{type} eq 'file') {
155 0         0 my $F;
156 0 0       0 open $F,"<", $args{location} or $self->throw("Cannot open $args{location}: $!");
157 0         0 my @data = <$F>;
158 0         0 for (@data) {
159 0 0       0 last unless /^\s+$/;
160 0         0 shift @data;
161             }
162 0 0       0 open $F,">", $args{location} or $self->throw("Cannot write to $args{location}: $!");
163 0         0 print $F @data;
164 0         0 close $F;
165             }
166             }
167              
168             =head2 default_format
169              
170             Title : default_format
171             Usage : my $format = $self->default_format
172             Function: Returns default sequence format for this module
173             Returns : string
174             Args : none
175              
176             =cut
177              
178             sub default_format {
179 3     3 1 4 my ($self) = @_;
180 3         5 return $self->{'_default_format'};
181             }
182              
183             =head1 Bio::DB::DBFetch specific routines
184              
185             =head2 get_Stream_by_id
186              
187             Title : get_Stream_by_id
188             Usage : $seq = $db->get_Stream_by_id($ref);
189             Function: Retrieves Seq objects from the server 'en masse', rather than one
190             at a time. For large numbers of sequences, this is far superior
191             than get_Stream_by_[id/acc]().
192             Example :
193             Returns : a Bio::SeqIO stream object
194             Args : $ref : either an array reference, a filename, or a filehandle
195             from which to get the list of unique ids/accession numbers.
196              
197             NOTE: for backward compatibility, this method is also called
198             get_Stream_by_batch.
199              
200             =cut
201              
202             sub get_Stream_by_id {
203 0     0 1 0 my ($self, $ids) = @_;
204 0         0 return $self->get_seq_stream('-uids' => $ids, '-mode' => 'batch');
205             }
206              
207             =head2 get_Seq_by_version
208              
209             Title : get_Seq_by_version
210             Usage : $seq = $db->get_Seq_by_version('X77802.1');
211             Function: Gets a Bio::Seq object by accession number
212             Returns : A Bio::Seq object
213             Args : version number (as a string)
214             Throws : "version does not exist" exception
215              
216             =cut
217              
218             sub get_Seq_by_version {
219 0     0 1 0 my ($self,$seqid) = @_;
220 0         0 my $seqio = $self->get_Stream_by_acc([$seqid]);
221 0 0       0 $self->throw("version does not exist") if( !defined $seqio );
222 0         0 return $seqio->next_seq();
223             }
224              
225             =head2 request_format
226              
227             Title : request_format
228             Usage : my ($req_format, $ioformat) = $self->request_format;
229             $self->request_format("genbank");
230             $self->request_format("fasta");
231             Function: Get/Set sequence format retrieval. The get-form will normally not
232             be used outside of this and derived modules.
233             Returns : Array of two strings, the first representing the format for
234             retrieval, and the second specifying the corresponding SeqIO format.
235             Args : $format = sequence format
236              
237             =cut
238              
239             sub request_format {
240 1     1 1 2 my ($self, $value) = @_;
241 1 50       3 if( defined $value ) {
242 0         0 $value = lc $value;
243 0         0 $self->{'_format'} = $value;
244 0         0 return ($value, $value);
245             }
246 1         1 $value = $self->{'_format'};
247 1 50 33     4 if( $value and defined $self->formatmap->{$value} ) {
248 0         0 return ($value, $self->formatmap->{$value});
249             } else {
250             # Try to fall back to a default.
251 1         3 return ($self->default_format, $self->default_format );
252             }
253             }
254              
255              
256             =head2 servertype
257              
258             Title : servertype
259             Usage : my $servertype = $self->servertype
260             $self->servertype($servertype);
261             Function: Get/Set server type
262             Returns : string
263             Args : server type string [optional]
264              
265             =cut
266              
267             sub servertype {
268 0     0 1 0 my ($self, $servertype) = @_;
269 0 0 0     0 if( defined $servertype && $servertype ne '') {
270             $self->throw("You gave an invalid server type ($servertype)".
271             " - available types are ".
272 0 0       0 keys %{$self->hosts}) unless( $self->hosts->{$servertype} );
  0         0  
273 0         0 $self->{'_servertype'} = $servertype;
274             }
275 0 0       0 $self->{'_servertype'} = $DEFAULTSERVERTYPE unless $self->{'_servertype'};
276 0         0 return $self->{'_servertype'};
277             }
278              
279             =head2 hostlocation
280              
281             Title : hostlocation
282             Usage : my $location = $self->hostlocation()
283             $self->hostlocation($location)
284             Function: Set/Get Hostlocation
285             Returns : string representing hostlocation
286             Args : string specifying hostlocation [optional]
287              
288             =cut
289              
290             sub hostlocation {
291 0     0 1 0 my ($self, $location ) = @_;
292 0         0 my $servertype = $self->servertype;
293 0 0       0 $self->throw("Must have a valid servertype defined not $servertype")
294             unless defined $servertype;
295 0         0 my %hosts = %{$self->hosts->{$servertype}->{'hosts'}};
  0         0  
296 0 0 0     0 if( defined $location && $location ne '' ) {
297 0         0 $location = lc $location;
298 0 0       0 if( ! $hosts{$location} ) {
299 0         0 $self->throw("Must specify a known host, not $location,".
300             " possible values (".
301             join(",", sort keys %hosts ). ")");
302             }
303 0         0 $self->{'_hostlocation'} = $location;
304             }
305 0 0       0 $self->{'_hostlocation'} = $DEFAULTLOCATION unless $self->{'_hostlocation'};
306 0         0 return $self->{'_hostlocation'};
307             }
308              
309             =head2 location_url
310              
311             Title : location
312             Usage : my $url = $self->location_url()
313             Function: Get host url
314             Returns : string representing url
315             Args : none
316              
317             =cut
318              
319             sub location_url {
320 0     0 1 0 my ($self) = @_;
321 0         0 my $servertype = $self->servertype();
322 0         0 my $location = $self->hostlocation();
323 0 0 0     0 if( ! defined $location || !defined $servertype ) {
324 0         0 $self->throw("must have a valid hostlocation and servertype set before calling location_url");
325             }
326             return sprintf($self->hosts->{$servertype}->{'baseurl'},
327 0         0 $self->hosts->{$servertype}->{'hosts'}->{$location});
328             }
329              
330             =head1 Bio::DB::DBFetch routines
331              
332             These methods allow subclasses to pass parameters.
333              
334             =head2 hosts
335              
336             Title : hosts
337             Usage :
338             Function: get/set for host hash
339             Returns :
340             Args : optional hash
341              
342             =cut
343              
344             sub hosts {
345 1     1 1 2 my ($self, $value) = @_;
346 1 50       3 if (defined $value) {
347 1         3 $self->{'_hosts'} = $value;
348             }
349 1 50       8 unless (exists $self->{'_hosts'}) {
350 0         0 return ('');
351             } else {
352 1         2 return $self->{'_hosts'};
353             }
354             }
355              
356             =head2 formatmap
357              
358             Title : formatmap
359             Usage :
360             Function: get/set for format hash
361             Returns :
362             Args : optional hash
363              
364             =cut
365              
366             sub formatmap {
367 1     1 1 1 my ($self, $value) = @_;
368 1 50       3 if (defined $value) {
369 1         2 $self->{'_formatmap'} = $value;
370             }
371 1 50       2 unless (exists $self->{'_formatmap'}) {
372 0         0 return ('');
373             } else {
374 1         1 return $self->{'_formatmap'};
375             }
376             }
377              
378              
379             1;
380             __END__