blib/lib/Bio/DB/Query/HIVQuery.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 267 | 487 | 54.8 |
branch | 82 | 196 | 41.8 |
condition | 17 | 54 | 31.4 |
subroutine | 35 | 49 | 71.4 |
pod | 15 | 19 | 78.9 |
total | 416 | 805 | 51.6 |
line | stmt | bran | cond | sub | pod | time | code | |
---|---|---|---|---|---|---|---|---|
1 | # to do: support for comment, reference annotations | |||||||
2 | ||||||||
3 | # $Id: HIVQuery.pm 232 2008-12-11 14:51:51Z maj $ | |||||||
4 | # | |||||||
5 | # BioPerl module for Bio::DB::Query::LANLQuery | |||||||
6 | # | |||||||
7 | # Please direct questions and support issues to |
|||||||
8 | # | |||||||
9 | # Cared for by Mark A. Jensen |
|||||||
10 | # | |||||||
11 | # Copyright Mark A. Jensen | |||||||
12 | # | |||||||
13 | # You may distribute this module under the same terms as perl itself | |||||||
14 | ||||||||
15 | # POD documentation - main docs before the code | |||||||
16 | ||||||||
17 | =head1 NAME | |||||||
18 | ||||||||
19 | Bio::DB::Query::HIVQuery - Query interface to the Los Alamos HIV Sequence Database | |||||||
20 | ||||||||
21 | =head1 SYNOPSIS | |||||||
22 | ||||||||
23 | $q = new Bio::DB::Query::HIVQuery(" C[subtype] ZA[country] CXCR4[coreceptor] "); | |||||||
24 | $q = new Bio::DB::Query::HIVQuery( | |||||||
25 | -query=>{'subtype'=>'C', | |||||||
26 | 'country'=>'ZA', | |||||||
27 | 'coreceptor'=>'CXCR4'}); | |||||||
28 | ||||||||
29 | $ac = $q->get_annotations_by_id(($q->ids)[0]); | |||||||
30 | $ac->get_value('Geo', 'country') # returns 'SOUTH AFRICA' | |||||||
31 | ||||||||
32 | $db = new Bio::DB::HIV(); | |||||||
33 | $seqio = $db->get_Stream_by_query($q); # returns annotated Bio::Seqs | |||||||
34 | ||||||||
35 | # get subtype C sequences from South Africa and Brazil, | |||||||
36 | # with associated info on patient health, coreceptor use, and | |||||||
37 | # infection period: | |||||||
38 | ||||||||
39 | $q = new Bio::DB::Query::HIVQuery( | |||||||
40 | -query => { | |||||||
41 | 'query' => {'subtype'=>'C', | |||||||
42 | 'country'=>['ZA', 'BR']}, | |||||||
43 | 'annot' => ['patient_health', | |||||||
44 | 'coreceptor', | |||||||
45 | 'days_post_infection'] | |||||||
46 | }); | |||||||
47 | ||||||||
48 | ||||||||
49 | =head1 DESCRIPTION | |||||||
50 | ||||||||
51 | Bio::DB::Query::HIVQuery provides a query-like interface to the | |||||||
52 | cgi-based Los Alamos National Laboratory (LANL) HIV Sequence | |||||||
53 | Database. It uses Bioperl facilities to capture both sequences and | |||||||
54 | annotations in batch in an automated and computable way. Use with | |||||||
55 | L |
|||||||
56 | streams. | |||||||
57 | ||||||||
58 | =head2 Query format | |||||||
59 | ||||||||
60 | The interface implements a simple query language emulation that understands AND, | |||||||
61 | OR, and parenthetical nesting. The basic query unit is | |||||||
62 | ||||||||
63 | (match1 match2 ...)[fieldname] | |||||||
64 | ||||||||
65 | Sequences are returned for which C |
|||||||
66 | These units can be combined with AND, OR and parentheses. For example: | |||||||
67 | ||||||||
68 | (B, C)[subtype] AND (2000, 2001, 2002, 2003)[year] AND ((CN)[country] OR (ZA)[country]) | |||||||
69 | ||||||||
70 | which can be shortened to | |||||||
71 | ||||||||
72 | (B C)[subtype] (2000 2001 2002 2003)[year] (CN ZA)[country] | |||||||
73 | ||||||||
74 | The user can specify annotation fields, that do not restrict the query, but | |||||||
75 | arrange for the return of the associated field data for each sequence returned. | |||||||
76 | Specify annotation fields between curly braces, as in: | |||||||
77 | ||||||||
78 | (B C)[subtype] 2000[year] {country cd4_count cd8_count} | |||||||
79 | ||||||||
80 | Annotations can be accessed off the query using methods described in APPENDIX. | |||||||
81 | ||||||||
82 | =head2 Hash specifications for query construction | |||||||
83 | ||||||||
84 | Single query specifications can be made as hash references provided to the | |||||||
85 | C<-query> argument of the constructor. There are two forms: | |||||||
86 | ||||||||
87 | -query => { 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' } | |||||||
88 | ||||||||
89 | equivalent to | |||||||
90 | ||||||||
91 | -query => [ 'country'=>'BR', 'phenotype'=>'NSI', 'cd4_count'=>'Any' ] | |||||||
92 | ||||||||
93 | or | |||||||
94 | ||||||||
95 | -query => { 'query' => {'country'=>'BR', 'phenotype'=>'NSI'}, | |||||||
96 | 'annot' => ['cd4_count'] } | |||||||
97 | ||||||||
98 | In both cases, the CD4 count is included in the annotations returned, but does | |||||||
99 | not restrict the rest of the query. | |||||||
100 | ||||||||
101 | To 'OR' multiple values of a field, use an anonymous array ref: | |||||||
102 | ||||||||
103 | -query => { 'country'=>['ZA','BR','NL'], 'subtype'=>['A', 'C', 'D'] } | |||||||
104 | ||||||||
105 | =head2 Valid query field names | |||||||
106 | ||||||||
107 | An attempt was made to make the query field names natural and easy to | |||||||
108 | remember. Aliases are specified in an XML file (C |
|||||||
109 | of the distribution. Custom field aliases can be set up by modifying this file. | |||||||
110 | ||||||||
111 | An HTML cheatsheet with valid field names, aliases, and match data can be | |||||||
112 | generated from the XML by using C |
|||||||
113 | can also be validated locally before it is unleashed on the server; see below. | |||||||
114 | ||||||||
115 | =head2 Annotations | |||||||
116 | ||||||||
117 | LANL DB annotations have been organized into a number of natural | |||||||
118 | groupings, tagged C |
|||||||
119 | successful query, each id is associated with a tree of | |||||||
120 | L |
|||||||
121 | methods C |
|||||||
122 | ||||||||
123 | =head2 Delayed/partial query runs | |||||||
124 | ||||||||
125 | Accessing the LANL DB involves multiple HTTP requests. The query can | |||||||
126 | be instructed to proceed through all (the default) or only some of | |||||||
127 | them, using the named parameter C |
|||||||
128 | ||||||||
129 | To validate a query locally, use | |||||||
130 | ||||||||
131 | $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>0 ) | |||||||
132 | ||||||||
133 | which will throw an exception if a field name or option is invalid. | |||||||
134 | ||||||||
135 | To get a query count only, you can save a server hit by using | |||||||
136 | ||||||||
137 | $q = new Bio::DB::Query::HIVQuery( -query => {...}, -RUN_OPTION=>1 ) | |||||||
138 | ||||||||
139 | and asking for C<$q-E |
|||||||
140 | ||||||||
141 | $q->_do_query(2) | |||||||
142 | ||||||||
143 | which picks up where you left off. | |||||||
144 | ||||||||
145 | C<-RUN_OPTION=E |
|||||||
146 | annotations. | |||||||
147 | ||||||||
148 | =head2 Query re-use | |||||||
149 | ||||||||
150 | You can clear the query results, retaining the same LANL session and query spec, | |||||||
151 | by doing C<$q-E |
|||||||
152 | C<$q-E |
|||||||
153 | ||||||||
154 | =head1 FEEDBACK | |||||||
155 | ||||||||
156 | =head2 Mailing Lists | |||||||
157 | ||||||||
158 | User feedback is an integral part of the evolution of this and other | |||||||
159 | Bioperl modules. Send your comments and suggestions preferably to | |||||||
160 | the Bioperl mailing list. Your participation is much appreciated. | |||||||
161 | ||||||||
162 | bioperl-l@bioperl.org - General discussion | |||||||
163 | http://bioperl.org/wiki/Mailing_lists - About the mailing lists | |||||||
164 | ||||||||
165 | =head2 Support | |||||||
166 | ||||||||
167 | Please direct usage questions or support issues to the mailing list: | |||||||
168 | ||||||||
169 | I |
|||||||
170 | ||||||||
171 | rather than to the module maintainer directly. Many experienced and | |||||||
172 | reponsive experts will be able look at the problem and quickly | |||||||
173 | address it. Please include a thorough description of the problem | |||||||
174 | with code and data examples if at all possible. | |||||||
175 | ||||||||
176 | =head2 Reporting Bugs | |||||||
177 | ||||||||
178 | Report bugs to the Bioperl bug tracking system to help us keep track | |||||||
179 | of the bugs and their resolution. Bug reports can be submitted via | |||||||
180 | the web: | |||||||
181 | ||||||||
182 | https://github.com/bioperl/bioperl-live/issues | |||||||
183 | ||||||||
184 | =head1 AUTHOR - Mark A. Jensen | |||||||
185 | ||||||||
186 | Email maj@fortinbras.us | |||||||
187 | ||||||||
188 | =head1 CONTRIBUTORS | |||||||
189 | ||||||||
190 | Mark A. Jensen | |||||||
191 | ||||||||
192 | =head1 APPENDIX | |||||||
193 | ||||||||
194 | The rest of the documentation details each of the object methods. | |||||||
195 | Internal methods are usually preceded with a _ | |||||||
196 | ||||||||
197 | =cut | |||||||
198 | ||||||||
199 | # Let the code begin... | |||||||
200 | ||||||||
201 | package Bio::DB::Query::HIVQuery; | |||||||
202 | 1 | 1 | 756 | use strict; | ||||
1 | 1 | |||||||
1 | 27 | |||||||
203 | 1 | 1 | 4 | use vars qw( $LANL_BASE $LANL_MAP_DB $LANL_MAKE_SEARCH_IF $LANL_SEARCH $SCHEMA_FILE $RUN_OPTION ); | ||||
1 | 1 | |||||||
1 | 67 | |||||||
204 | ||||||||
205 | # Object preamble - inherits from Bio::DB::QueryI | |||||||
206 | 1 | 1 | 345 | use Bio::Root::Root; | ||||
1 | 1 | |||||||
1 | 25 | |||||||
207 | 1 | 1 | 383 | use Bio::Annotation::Collection; | ||||
1 | 2 | |||||||
1 | 20 | |||||||
208 | 1 | 1 | 339 | use Bio::Annotation::Comment; | ||||
1 | 1 | |||||||
1 | 20 | |||||||
209 | 1 | 1 | 365 | use Bio::Annotation::Reference; | ||||
1 | 1 | |||||||
1 | 21 | |||||||
210 | 1 | 1 | 319 | use Bio::WebAgent; | ||||
1 | 1 | |||||||
1 | 20 | |||||||
211 | 1 | 1 | 4 | use XML::Simple; | ||||
1 | 1 | |||||||
1 | 6 | |||||||
212 | 1 | 1 | 62 | use CGI; | ||||
1 | 1 | |||||||
1 | 5 | |||||||
213 | ||||||||
214 | 1 | 1 | 597 | use Bio::DB::HIV::HIVQueryHelper; | ||||
1 | 1 | |||||||
1 | 28 | |||||||
215 | ||||||||
216 | 1 | 1 | 4 | use base qw(Bio::Root::Root Bio::DB::QueryI); | ||||
1 | 1 | |||||||
1 | 368 | |||||||
217 | ||||||||
218 | # globals | |||||||
219 | BEGIN { | |||||||
220 | # change base to new search page 01/14/09 /maj | |||||||
221 | 1 | 1 | 1 | $LANL_BASE = "http://www.hiv.lanl.gov/components/sequence/HIV/asearch"; | ||||
222 | 1 | 1 | $LANL_MAP_DB = "map_db.comp"; | |||||
223 | 1 | 1 | $LANL_MAKE_SEARCH_IF = "make_search_if.comp"; | |||||
224 | 1 | 1 | $LANL_SEARCH = "search.comp"; | |||||
225 | 1 | 5 | $SCHEMA_FILE = Bio::Root::IO->catfile(qw(Bio DB HIV lanl-schema.xml)); | |||||
226 | 1 | 3 | $RUN_OPTION = 2; # execute query | |||||
227 | # exceptions | |||||||
228 | 1 | 10 | @Bio::SchemaNotInit::Exception::ISA = qw( Bio::Root::Exception ); | |||||
229 | 1 | 6 | @Bio::WebError::Exception::ISA = qw( Bio::Root::Exception ); | |||||
230 | 1 | 6 | @Bio::QueryNotMade::Exception::ISA = qw( Bio::Root::Exception ); | |||||
231 | 1 | 12 | @Bio::QueryStringException::Exception::ISA = qw( Bio::Root::Exception ); | |||||
232 | 1 | 3682 | @Bio::HIVSorry::Exception::ISA = qw ( Bio::Root::Exception ); | |||||
233 | ||||||||
234 | } | |||||||
235 | ||||||||
236 | =head1 Constructor | |||||||
237 | ||||||||
238 | =head2 new | |||||||
239 | ||||||||
240 | Title : new | |||||||
241 | Usage : my $hiv_query = new Bio::DB::Query::HIVQuery(); | |||||||
242 | Function: Builds a new Bio::DB::Query::HIVQuery object, | |||||||
243 | running a sequence query against the Los Alamos | |||||||
244 | HIV sequence database | |||||||
245 | Returns : an instance of Bio::DB::Query::HIVQuery | |||||||
246 | Args : | |||||||
247 | ||||||||
248 | =cut | |||||||
249 | ||||||||
250 | sub new { | |||||||
251 | 1 | 1 | 1 | 132 | my($class,@args) = @_; | |||
252 | 1 | 11 | my $self = $class->SUPER::new(@args); | |||||
253 | # constructor option for web agent parameter spec: added 01/14/09 /maj | |||||||
254 | 1 | 10 | my ($query, $ids, $lanl_base, $lanl_map_db, $lanl_make_search_if, $lanl_search, $schema_file,$run_option, $uahash) = | |||||
255 | $self->_rearrange([ qw(QUERY | |||||||
256 | IDS | |||||||
257 | LANL_BASE | |||||||
258 | LANL_MAP_DB | |||||||
259 | LANL_MAKE_SEARCH_IF | |||||||
260 | LANL_SEARCH | |||||||
261 | SCHEMA_FILE | |||||||
262 | RUN_OPTION | |||||||
263 | USER_AGENT_HASH | |||||||
264 | )], @args); | |||||||
265 | ||||||||
266 | # default globals | |||||||
267 | 1 | 33 | 6 | $lanl_base||= $LANL_BASE; | ||||
268 | 1 | 33 | 4 | $lanl_map_db||=$LANL_MAP_DB; | ||||
269 | 1 | 33 | 10 | $lanl_make_search_if||=$LANL_MAKE_SEARCH_IF; | ||||
270 | 1 | 33 | 4 | $lanl_search||=$LANL_SEARCH; | ||||
271 | 1 | 33 | 3 | $schema_file||=$SCHEMA_FILE; | ||||
272 | 1 | 50 | 5 | $uahash ||= {timeout => 90}; | ||||
273 | 1 | 50 | 2 | defined $run_option || ($run_option = $RUN_OPTION); | ||||
274 | ||||||||
275 | 1 | 4 | $self->lanl_base($lanl_base); | |||||
276 | 1 | 3 | $self->map_db($lanl_map_db); | |||||
277 | 1 | 4 | $self->make_search_if($lanl_make_search_if); | |||||
278 | 1 | 3 | $self->search_($lanl_search); | |||||
279 | 1 | 4 | $self->_run_option($run_option); | |||||
280 | 1 | 3 | $self->_ua_hash($uahash); | |||||
281 | ||||||||
282 | # catch this at the top | |||||||
283 | 1 | 50 | 21 | if (-e $schema_file) { | ||||
284 | 1 | 3 | $self->_schema_file($schema_file); | |||||
285 | } | |||||||
286 | else { # look around | |||||||
287 | 0 | 0 | my ($p) = $self->_schema_file( [grep {$_} map { | |||||
288 | 0 | 0 | my $p = Bio::Root::IO->catfile($_, $schema_file); | |||||
0 | 0 | |||||||
289 | 0 | 0 | 0 | $p if -e $p | ||||
290 | } (@INC,"")]->[0]); | |||||||
291 | 0 | 0 | 0 | $self->throw(-class=>"Bio::Root::NoSuchThing", | ||||
292 | -text=>"Schema file \"".$self->_schema_file."\" cannot be found", | |||||||
293 | -value=>$self->_schema_file) unless -e $self->_schema_file; | |||||||
294 | } | |||||||
295 | ||||||||
296 | 1 | 4 | $self->count(0); | |||||
297 | 1 | 2 | $self->{_schema} = HIVSchema->new($self->_schema_file); | |||||
298 | ||||||||
299 | # internal storage and flags | |||||||
300 | 1 | 5 | $self->{'_lanl_query'} = []; | |||||
301 | 1 | 2 | $self->{'_lanl_response'} = []; | |||||
302 | 1 | 2 | $self->{'_annotations'} = {}; # container for annotation collections assoc. with ids | |||||
303 | 1 | 2 | $self->{'_RUN_LEVEL'} = undef; # set in _do_query() | |||||
304 | ||||||||
305 | # work | |||||||
306 | 1 | 50 | 4 | defined $query && $self->query($query); | ||||
307 | 1 | 50 | 3 | defined $ids && $self->ids($ids); | ||||
308 | ||||||||
309 | # exec query | |||||||
310 | ||||||||
311 | 1 | 50 | 5 | $self->_do_query($self->_run_option) if $self->query; | ||||
312 | ||||||||
313 | 1 | 6 | return $self; | |||||
314 | } | |||||||
315 | ||||||||
316 | =head1 QueryI compliance | |||||||
317 | ||||||||
318 | =head2 count | |||||||
319 | ||||||||
320 | Title : count | |||||||
321 | Usage : $hiv_query->count($newval) | |||||||
322 | Function: return number of sequences found | |||||||
323 | Example : | |||||||
324 | Returns : value of count (a scalar) | |||||||
325 | Args : on set, new value (a scalar or undef, optional) | |||||||
326 | Note : count warns if it is accessed for reading before query | |||||||
327 | has been executed to at least level 1 | |||||||
328 | ||||||||
329 | =cut | |||||||
330 | ||||||||
331 | sub count{ | |||||||
332 | 11 | 11 | 1 | 27 | my $self = shift; | |||
333 | 11 | 100 | 27 | return $self->{'count'} = shift if @_; | ||||
334 | 1 | 50 | 33 | 9 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 1)) { | |||
335 | 1 | 8 | $self->warn('Query not yet run at > level 1'); | |||||
336 | } | |||||||
337 | 0 | 0 | return $self->{'count'}; | |||||
338 | } | |||||||
339 | ||||||||
340 | =head2 ids | |||||||
341 | ||||||||
342 | Title : ids | |||||||
343 | Usage : $hiv_query->ids($newval) | |||||||
344 | Function: LANL ids of returned sequences | |||||||
345 | Example : | |||||||
346 | Returns : value of ids (an arrayref of sequence accessions/ids) | |||||||
347 | Args : on set, new value (an arrayref or undef, optional) | |||||||
348 | ||||||||
349 | =cut | |||||||
350 | ||||||||
351 | sub ids{ | |||||||
352 | 10 | 10 | 1 | 27 | my $self = shift; | |||
353 | 10 | 100 | 25 | if (@_) { | ||||
354 | 9 | 9 | my $a = shift; | |||||
355 | 9 | 50 | 26 | $self->throw(-class=>'Bio::Root::BadParameter', | ||||
356 | -text=>'Arrayref required', | |||||||
357 | -value=> ref $a) unless ref($a) eq 'ARRAY'; | |||||||
358 | 9 | 50 | 16 | if (@$a) { | ||||
359 | 0 | 0 | @{$self->{'ids'}}{@$a} = (1) x @$a; | |||||
0 | 0 | |||||||
360 | 0 | 0 | return $a; | |||||
361 | } | |||||||
362 | else { #with empty arrayref, clear the hash | |||||||
363 | 9 | 19 | $self->{'ids'} = {}; | |||||
364 | } | |||||||
365 | } | |||||||
366 | 10 | 50 | 27 | return keys %{$self->{'ids'}} if $self->{'ids'}; | ||||
10 | 21 | |||||||
367 | } | |||||||
368 | ||||||||
369 | =head2 query | |||||||
370 | ||||||||
371 | Title : query | |||||||
372 | Usage : $hiv_query->query | |||||||
373 | Function: Get/set the submitted query hash or string | |||||||
374 | Example : | |||||||
375 | Returns : hashref or string | |||||||
376 | Args : query in hash or string form (see DESCRIPTION) | |||||||
377 | ||||||||
378 | =cut | |||||||
379 | ||||||||
380 | sub query { | |||||||
381 | 18 | 18 | 1 | 20 | my $self = shift; | |||
382 | 18 | 100 | 40 | return $self->{'query'} = shift if @_; | ||||
383 | 10 | 25 | return $self->{'query'}; | |||||
384 | } | |||||||
385 | ||||||||
386 | =head1 Bio::DB::Query::HIVQuery specific methods | |||||||
387 | ||||||||
388 | =head2 help | |||||||
389 | ||||||||
390 | Title : help | |||||||
391 | Usage : $hiv_query->help("help.html") | |||||||
392 | Function: get html-formatted listing of valid fields/aliases/options | |||||||
393 | based on current schema xml | |||||||
394 | Example : perl -MBio::DB::Query::HIVQuery -e "new Bio::DB::Query::HIVQuery()->help" | lynx -stdin | |||||||
395 | Returns : HTML | |||||||
396 | Args : optional filename; otherwise prints to stdout | |||||||
397 | ||||||||
398 | =cut | |||||||
399 | ||||||||
400 | sub help{ | |||||||
401 | 1 | 1 | 1 | 64 | my ($self, $fname) = @_; | |||
402 | 1 | 2 | my (@ret, @tok); | |||||
403 | 1 | 4 | my $schema = $self->_schema; | |||||
404 | 1 | 9 | my $h = CGI->new(); | |||||
405 | ||||||||
406 | 1 | 233 | my (@tbls, @flds, @als, @opts, $fh); | |||||
407 | 1 | 50 | 4 | if ($fname) { | ||||
408 | 1 | 50 | 46 | open $fh, '>', $fname or $self->throw(-class => 'Bio::Root::IOException', | ||||
409 | -text => "Error opening help html file $fname for writing", | |||||||
410 | -value => $!); | |||||||
411 | } | |||||||
412 | else { | |||||||
413 | 0 | 0 | open $fh, ">&1"; | |||||
414 | } | |||||||
415 | 1 | 4 | @tbls = $schema->tables; | |||||
416 | 1 | 10 | @tbls = ('COMMAND', grep !/COMMAND/,@tbls); | |||||
417 | 1 | 7 | print $fh ( | |||||
418 | $h->start_html(-title=>"HIVQuery Help") | |||||||
419 | ); | |||||||
420 | 1 | 345 | print $fh $h->a({-id=>'TOP'}, $h->h2("Valid HIVQuery query fields and match data")); | |||||
421 | 1 | 85 | print $fh "Fields are organized below according to their Los Alamos HIV database tables. Use aliases in place of full field names in queries; for example: "; |
|||||
422 | 1 | 1 | print $fh " "; |
|||||
423 | 1 | 2 | print $fh "rather than"; | |||||
424 | 1 | 2 | print $fh " "; |
|||||
425 | 1 | 2 | print $fh "(which does work, however). Click hyperlinks to see valid search options within the field. The token Any is the wildcard for all fields."; |
|||||
426 | 1 | 6 | print $fh $h->start_table({-style=>"font-family:sans-serif;"}) ; | |||||
427 | 1 | 38 | foreach my $tbl (@tbls) { | |||||
428 | 14 | 3486 | @flds = grep /^$tbl/, $schema->fields; | |||||
429 | 14 | 104 | @flds = grep !/_id/, @flds; | |||||
430 | 14 | 49 | print $fh ( | |||||
431 | $h->start_Tr({-style=>"background-color: lightblue;"}), | |||||||
432 | $h->td([$h->a({-id=>$tbl},$tbl), $h->span({-style=>"font-style:italic"},"fields"), $h->span({-style=>"font-style:italic"}, "aliases")]), | |||||||
433 | $h->end_Tr | |||||||
434 | ); | |||||||
435 | 14 | 2154 | foreach my $fld (@flds) { | |||||
436 | 74 | 16267 | @als = reverse $schema->aliases($fld); | |||||
437 | 74 | 100 | 333 | print $fh ( | ||||
438 | # note that aliases can sometimes be empty | |||||||
439 | $h->Tr( $h->td( ["", $h->a({-href=>"#opt$fld"}, shift @als || '???'), $h->code(join(',',@als))] )) | |||||||
440 | ); | |||||||
441 | 74 | 5785 | my @tmp = grep {$_} $schema->options($fld); | |||||
1158 | 949 | |||||||
442 | 74 | 100 | 100 | 158 | @tmp = sort {(($a =~ /^[0-9]+$/) && $b =~ /^[0-9]+$/) ? $a<=>$b : $a cmp $b} @tmp; | |||
5860 | 9513 | |||||||
443 | 74 | 100 | 263 | if (grep /Any/,@tmp) { | ||||
444 | 31 | 324 | @tmp = grep !/Any/, @tmp; | |||||
445 | 31 | 93 | unshift @tmp, 'Any'; | |||||
446 | } | |||||||
447 | #print STDERR join(', ',@tmp)."\n"; | |||||||
448 | 74 | 100 | 200 | push @opts, $h->div( | ||||
100 | ||||||||
449 | {-style=>"font-family:sans-serif;font-size:small"}, | |||||||
450 | $h->hr, | |||||||
451 | $h->a( | |||||||
452 | {-id=>"opt$fld"}, | |||||||
453 | "Valid options for $fld: " | |||||||
454 | ), | |||||||
455 | $h->blockquote( | |||||||
456 | @tmp ? $h->code(join(", ", @tmp)) : $h->i("free text") | |||||||
457 | ), | |||||||
458 | $h->span( | |||||||
459 | "Other aliases: " | |||||||
460 | ), | |||||||
461 | $h->blockquote( | |||||||
462 | @als ? $h->code(join(",",@als)) : "none" | |||||||
463 | ), | |||||||
464 | " ", | |||||||
465 | $h->table( | |||||||
466 | $h->Tr( | |||||||
467 | $h->td([ | |||||||
468 | $h->a({-href=>"#$tbl"}, $h->small('BACK')), | |||||||
469 | $h->a({-href=>"#TOP"}, $h->small('TOP')) | |||||||
470 | ]) | |||||||
471 | ) | |||||||
472 | ) | |||||||
473 | ); | |||||||
474 | ||||||||
475 | } | |||||||
476 | } | |||||||
477 | 1 | 266 | print $fh $h->end_table; | |||||
478 | 1 | 283 | print $fh @opts; | |||||
479 | 1 | 6 | print $fh $h->end_html; | |||||
480 | 1 | 37 | close($fh); | |||||
481 | 1 | 34 | return 1; | |||||
482 | } | |||||||
483 | ||||||||
484 | =head1 Annotation manipulation methods | |||||||
485 | ||||||||
486 | =head2 get_annotations_by_ids | |||||||
487 | ||||||||
488 | Title : get_annotations_by_ids (or ..._by_id) | |||||||
489 | Usage : $ac = $hiv_query->get_annotations_by_ids(@ids) | |||||||
490 | Function: Get the Bio::Annotation::Collection for these sequence ids | |||||||
491 | Example : | |||||||
492 | Returns : A Bio::Annotation::Collection object | |||||||
493 | Args : an array of sequence ids | |||||||
494 | ||||||||
495 | =cut | |||||||
496 | ||||||||
497 | sub get_annotations_by_ids{ | |||||||
498 | 1 | 1 | 1 | 2 | my $self = shift; | |||
499 | 1 | 2 | my @ids = @_; | |||||
500 | 1 | 1 | my @ret; | |||||
501 | 1 | 50 | 33 | 5 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) { | |||
502 | 1 | 3 | $self->warn('Requires query run at level 2'); | |||||
503 | 0 | 0 | return (); | |||||
504 | } | |||||||
505 | 0 | 0 | 0 | @ret = map {$self->{'_annotations'}->{$_}} @ids if exists($self->{'_annotations'}); | ||||
0 | 0 | |||||||
506 | ||||||||
507 | 0 | 0 | 0 | return (wantarray ? @ret : $ret[0]) if @ret; | ||||
0 | ||||||||
508 | 0 | 0 | return {}; | |||||
509 | } | |||||||
510 | ||||||||
511 | # singular alias | |||||||
512 | sub get_annotations_by_id { | |||||||
513 | 1 | 1 | 0 | 4 | shift->get_annotations_by_ids(@_); | |||
514 | } | |||||||
515 | ||||||||
516 | =head2 add_annotations_for_id | |||||||
517 | ||||||||
518 | Title : add_annotations_for_id | |||||||
519 | Usage : $hiv_query->add_annotations_for_id( $id ) to create a new | |||||||
520 | empty collection for $id | |||||||
521 | $hiv_query->add_annotations_for_id( $id, $ac ) to associate | |||||||
522 | $ac with $id | |||||||
523 | Function: Associate a Bio::Annotation::Collection with this sequence id | |||||||
524 | Example : | |||||||
525 | Returns : a Bio::Annotation::Collection object | |||||||
526 | Args : sequence id [, Bio::Annotation::Collection object] | |||||||
527 | ||||||||
528 | =cut | |||||||
529 | ||||||||
530 | sub add_annotations_for_id{ | |||||||
531 | 0 | 0 | 1 | 0 | my $self = shift; | |||
532 | 0 | 0 | my ($id, $ac) = @_; | |||||
533 | 0 | 0 | 0 | $id = "" unless defined $id; # avoid warnings | ||||
534 | 0 | 0 | 0 | $ac = Bio::Annotation::Collection->new() unless defined $ac; | ||||
535 | 0 | 0 | 0 | $self->throw(-class=>'Bio::Root::BadParameter' | ||||
536 | -text=>'Bio::Annotation::Collection required at arg 2', | |||||||
537 | -value=>"") unless ref($ac) eq 'Bio::Annotation::Collection'; | |||||||
538 | ||||||||
539 | 0 | 0 | 0 | $self->{'_annotations'}->{$id} = $ac unless exists($self->{'_annotations'}->{$id}); | ||||
540 | 0 | 0 | return $ac; | |||||
541 | } | |||||||
542 | ||||||||
543 | =head2 remove_annotations_for_ids | |||||||
544 | ||||||||
545 | Title : remove_annotations_for_ids (or ..._for_id) | |||||||
546 | Usage : $hiv_query->remove_annotations_for_ids( @ids) | |||||||
547 | Function: Remove annotation collection for this sequence id | |||||||
548 | Example : | |||||||
549 | Returns : An array of the previous annotation collections for these ids | |||||||
550 | Args : an array of sequence ids | |||||||
551 | ||||||||
552 | =cut | |||||||
553 | ||||||||
554 | sub remove_annotations_for_ids { | |||||||
555 | 0 | 0 | 1 | 0 | my $self = shift; | |||
556 | 0 | 0 | my @ids = @_; | |||||
557 | 0 | 0 | my @ac; | |||||
558 | 0 | 0 | foreach (@ids) { | |||||
559 | 0 | 0 | push @ac, delete $self->{'_annotations'}->{$_}; | |||||
560 | } | |||||||
561 | 0 | 0 | return @ac; | |||||
562 | } | |||||||
563 | ||||||||
564 | # singular alias | |||||||
565 | sub remove_annotations_for_id { | |||||||
566 | 0 | 0 | 0 | 0 | shift->remove_annotations_for_ids(@_); | |||
567 | } | |||||||
568 | ||||||||
569 | =head2 remove_annotations | |||||||
570 | ||||||||
571 | Title : remove_annotations | |||||||
572 | Usage : $hiv_query->remove_annotations() | |||||||
573 | Function: Remove all annotation collections for this object | |||||||
574 | Example : | |||||||
575 | Returns : The previous annotation collection hash for this object | |||||||
576 | Args : none | |||||||
577 | ||||||||
578 | =cut | |||||||
579 | ||||||||
580 | sub remove_annotations { | |||||||
581 | 0 | 0 | 1 | 0 | my $self = shift; | |||
582 | ||||||||
583 | 0 | 0 | my $ach = $self->{'_annotations'}; | |||||
584 | 0 | 0 | $self->{'_annotations'} = {}; | |||||
585 | 0 | 0 | return $ach; | |||||
586 | } | |||||||
587 | ||||||||
588 | =head2 get_value | |||||||
589 | ||||||||
590 | Title : get_value | |||||||
591 | Usage : $ac->get_value($tagname) -or- | |||||||
592 | $ac->get_value( $tag_level1, $tag_level2,... ) | |||||||
593 | Function: access the annotation value assocated with the given tags | |||||||
594 | Example : | |||||||
595 | Returns : a scalar | |||||||
596 | Args : an array of tagnames that descend into the annotation tree | |||||||
597 | Note : this is a L |
|||||||
598 | L |
|||||||
599 | ||||||||
600 | =cut | |||||||
601 | ||||||||
602 | =head2 put_value | |||||||
603 | ||||||||
604 | Title : put_value | |||||||
605 | Usage : $ac->put_value($tagname, $value) -or- | |||||||
606 | $ac->put_value([$tag_level1, $tag_level2, ...], $value) -or- | |||||||
607 | $ac->put_value( [$tag_level1, $tag_level2, ...] ) | |||||||
608 | Function: create a node in an annotation tree, and assign a scalar value to it | |||||||
609 | if a value is specified | |||||||
610 | Example : | |||||||
611 | Returns : scalar or a Bio::AnnotationCollection object | |||||||
612 | Args : $tagname, $value scalars (can be specified as -KEYS=>$tagname, | |||||||
613 | -VALUE=>$value) -or- | |||||||
614 | \@tagnames, $value (or as -KEYS=>\@tagnames, -VALUE=>$value ) | |||||||
615 | Notes : This is a L |
|||||||
616 | L |
|||||||
617 | If intervening nodes do not exist, put_value creates them, replacing | |||||||
618 | existing nodes. So if $ac->put_value('x', 10) was done, then later, | |||||||
619 | $ac->put_value(['x', 'y'], 20), the original value of 'x' is trashed, | |||||||
620 | and $ac->get_value('x') will now return the annotation collection | |||||||
621 | with tagname 'y'. | |||||||
622 | ||||||||
623 | =cut | |||||||
624 | ||||||||
625 | =head2 get_keys | |||||||
626 | ||||||||
627 | Title : get_keys | |||||||
628 | Usage : $ac->get_keys($tagname_level_1, $tagname_level_2,...) | |||||||
629 | Function: Get an array of tagnames underneath the named tag nodes | |||||||
630 | Example : # prints the values of the members of Category 1... | |||||||
631 | print map { $ac->get_value($_) } $ac->get_keys('Category 1') ; | |||||||
632 | Returns : array of tagnames or empty list if the arguments represent a leaf | |||||||
633 | Args : [array of] tagname[s] | |||||||
634 | ||||||||
635 | =cut | |||||||
636 | ||||||||
637 | =head1 GenBank accession manipulation methods | |||||||
638 | ||||||||
639 | =head2 get_accessions | |||||||
640 | ||||||||
641 | Title : get_accessions | |||||||
642 | Usage : $hiv_query->get_accessions() | |||||||
643 | Function: Return an array of GenBank accessions associated with these | |||||||
644 | sequences (available only after a query is subjected to a | |||||||
645 | full run (i.e., when $RUN_OPTION == 2) | |||||||
646 | Example : | |||||||
647 | Returns : array of gb accession numbers, or () if none found for this query | |||||||
648 | Args : none | |||||||
649 | ||||||||
650 | =cut | |||||||
651 | ||||||||
652 | sub get_accessions{ | |||||||
653 | 0 | 0 | 1 | 0 | my $self = shift; | |||
654 | 0 | 0 | my @ret; | |||||
655 | 0 | 0 | 0 | 0 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) { | |||
656 | 0 | 0 | $self->warn('Requires query run at level 2'); | |||||
657 | 0 | 0 | return (); | |||||
658 | } | |||||||
659 | 0 | 0 | my @ac = $self->get_annotations_by_ids($self->ids); | |||||
660 | 0 | 0 | foreach (@ac) { | |||||
661 | 0 | 0 | push @ret, $_->get_value('Special','accession'); | |||||
662 | }; | |||||||
663 | 0 | 0 | return @ret; | |||||
664 | } | |||||||
665 | ||||||||
666 | =head2 get_accessions_by_ids | |||||||
667 | ||||||||
668 | Title : get_accessions_by_ids (or ..._by_id) | |||||||
669 | Usage : $hiv_query->get_accessions_by_ids(@ids) | |||||||
670 | Function: Return an array of GenBank accessions associated with these | |||||||
671 | LANL ids (available only after a query is subjected to a | |||||||
672 | full run (i.e., when $RUN_OPTION == 2) | |||||||
673 | Example : | |||||||
674 | Returns : array of gb accession numbers, or () if none found for this query | |||||||
675 | Args : none | |||||||
676 | ||||||||
677 | =cut | |||||||
678 | ||||||||
679 | sub get_accessions_by_ids { | |||||||
680 | 0 | 0 | 1 | 0 | my $self = shift; | |||
681 | 0 | 0 | my @ids = @_; | |||||
682 | 0 | 0 | my @ret; | |||||
683 | 0 | 0 | 0 | 0 | if (!$self->{'_RUN_LEVEL'} || ($self->{'_RUN_LEVEL'} < 2)) { | |||
684 | 0 | 0 | $self->warn('Requires query run at level 2'); | |||||
685 | 0 | 0 | return (); | |||||
686 | } | |||||||
687 | 0 | 0 | my @ac = $self->get_annotations_by_ids(@ids); | |||||
688 | 0 | 0 | foreach (@ac) { | |||||
689 | 0 | 0 | push @ret, $_->get_value('Special', 'accession'); | |||||
690 | }; | |||||||
691 | 0 | 0 | 0 | return wantarray ? @ret : $ret[0]; | ||||
692 | } | |||||||
693 | ||||||||
694 | # singular alias | |||||||
695 | sub get_accessions_by_id { | |||||||
696 | 0 | 0 | 0 | 0 | shift->get_accessions_by_ids(@_); | |||
697 | } | |||||||
698 | ||||||||
699 | ########## | |||||||
700 | ||||||||
701 | =head1 Query control methods | |||||||
702 | ||||||||
703 | =head2 _do_query | |||||||
704 | ||||||||
705 | Title : _do_query | |||||||
706 | Usage : $hiv_query->_do_query or $hiv_query->_do_query($run_level) | |||||||
707 | Function: Execute the query according to argument or $RUN_OPTION | |||||||
708 | and set _RUN_LEVEL | |||||||
709 | extent of query reflects the value of argument | |||||||
710 | 0 : validate only (no HTTP action) | |||||||
711 | 1 : return sequence count only | |||||||
712 | 2 : return sequence ids (full query, returns with annotations) | |||||||
713 | noop if current _RUN_LEVEL of query is >= argument or $RUN_OPTION, | |||||||
714 | Example : | |||||||
715 | Returns : actual _RUN_LEVEL (0, 1, or 2) achieved | |||||||
716 | Args : desired run level (optional, global $RUN_OPTION is default) | |||||||
717 | ||||||||
718 | =cut | |||||||
719 | ||||||||
720 | sub _do_query{ | |||||||
721 | 9 | 9 | 105 | my ($self,$rl) = @_; | ||||
722 | 9 | 100 | 22 | $rl = $RUN_OPTION unless defined $rl; | ||||
723 | 9 | 50 | 86 | $self->throw(-class=>"Bio::Root::BadParameter", | ||||
724 | -text=>"Invalid run option \"$RUN_OPTION\"", | |||||||
725 | -value=>$RUN_OPTION) unless grep /^$RUN_OPTION$/, (0, 1, 2); | |||||||
726 | 9 | 50 | 20 | (!defined($self->{'_RUN_LEVEL'})) && do { | ||||
727 | 9 | 18 | $self->_create_lanl_query(); | |||||
728 | 5 | 13 | $self->{'_RUN_LEVEL'} = 0; | |||||
729 | }; | |||||||
730 | 5 | 0 | 0 | 11 | ($rl > 0) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 0)) && do { | |||
33 | ||||||||
731 | 0 | 0 | $self->_do_lanl_request(); | |||||
732 | 0 | 0 | $self->{'_RUN_LEVEL'} = 1; | |||||
733 | }; | |||||||
734 | 5 | 0 | 0 | 12 | ($rl > 1) && (!defined($self->{'_RUN_LEVEL'}) || ($self->{'_RUN_LEVEL'} <= 1)) && do { | |||
33 | ||||||||
735 | 0 | 0 | $self->_parse_lanl_response(); | |||||
736 | 0 | 0 | $self->{'_RUN_LEVEL'} = 2; | |||||
737 | }; | |||||||
738 | 5 | 68 | return $self->{'_RUN_LEVEL'}; | |||||
739 | } | |||||||
740 | ||||||||
741 | =head2 _reset | |||||||
742 | ||||||||
743 | Title : _reset | |||||||
744 | Usage : $hiv_query->_reset | |||||||
745 | Function: Resets query storage, count, and ids, while retaining session id, | |||||||
746 | original query string, and db schema | |||||||
747 | Example : | |||||||
748 | Returns : void | |||||||
749 | Args : none | |||||||
750 | ||||||||
751 | =cut | |||||||
752 | ||||||||
753 | sub _reset{ | |||||||
754 | 9 | 9 | 13 | my $self = shift; | ||||
755 | 9 | 26 | $self->ids([]); | |||||
756 | 9 | 22 | $self->count(0); | |||||
757 | 9 | 13 | $self->{'_annotations'} = {}; | |||||
758 | 9 | 19 | $self->{'_lanl_response'} = []; | |||||
759 | 9 | 11 | $self->{'_lanl_query'} = []; | |||||
760 | 9 | 21 | $self->{'_RUN_LEVEL'} = undef; | |||||
761 | 9 | 11 | return; | |||||
762 | } | |||||||
763 | ||||||||
764 | =head2 _session_id | |||||||
765 | ||||||||
766 | Title : _session_id | |||||||
767 | Usage : $hiv_query->_session_id($newval) | |||||||
768 | Function: Get/set HIV db session id (initialized in _do_lanl_request) | |||||||
769 | Example : | |||||||
770 | Returns : value of _session_id (a scalar) | |||||||
771 | Args : on set, new value (a scalar or undef, optional) | |||||||
772 | ||||||||
773 | =cut | |||||||
774 | ||||||||
775 | sub _session_id{ | |||||||
776 | 0 | 0 | 0 | my $self = shift; | ||||
777 | ||||||||
778 | 0 | 0 | 0 | return $self->{'_session_id'} = shift if @_; | ||||
779 | 0 | 0 | return $self->{'_session_id'}; | |||||
780 | } | |||||||
781 | =head2 _run_level | |||||||
782 | ||||||||
783 | Title : _run_level | |||||||
784 | Usage : $obj->_run_level($newval) | |||||||
785 | Function: returns the level at which the query has so far been run | |||||||
786 | Example : | |||||||
787 | Returns : value of _run_level (a scalar) | |||||||
788 | Args : on set, new value (a scalar or undef, optional) | |||||||
789 | ||||||||
790 | =cut | |||||||
791 | ||||||||
792 | sub _run_level{ | |||||||
793 | 0 | 0 | 0 | my $self = shift; | ||||
794 | ||||||||
795 | 0 | 0 | 0 | return $self->{'_RUN_LEVEL'} = shift if @_; | ||||
796 | 0 | 0 | return $self->{'_RUN_LEVEL'}; | |||||
797 | } | |||||||
798 | ||||||||
799 | =head2 _run_option | |||||||
800 | ||||||||
801 | Title : _run_option | |||||||
802 | Usage : $hiv_query->_run_option($newval) | |||||||
803 | Function: Get/set HIV db query run option (see _do_query for values) | |||||||
804 | Example : | |||||||
805 | Returns : value of _run_option (a scalar) | |||||||
806 | Args : on set, new value (a scalar or undef, optional) | |||||||
807 | ||||||||
808 | =cut | |||||||
809 | ||||||||
810 | sub _run_option{ | |||||||
811 | 2 | 2 | 4 | my $self = shift; | ||||
812 | ||||||||
813 | 2 | 100 | 6 | return $self->{'_run_option'} = shift if @_; | ||||
814 | 1 | 3 | return $self->{'_run_option'}; | |||||
815 | } | |||||||
816 | ||||||||
817 | =head2 _ua_hash | |||||||
818 | ||||||||
819 | Title : _ua_hash | |||||||
820 | Usage : $obj->_ua_hash($newval) | |||||||
821 | Function: | |||||||
822 | Example : | |||||||
823 | Returns : value of _ua_hash (a scalar) | |||||||
824 | Args : on set, new value (a scalar or undef, optional) | |||||||
825 | ||||||||
826 | =cut | |||||||
827 | ||||||||
828 | sub _ua_hash{ | |||||||
829 | 1 | 1 | 1 | my $self = shift; | ||||
830 | 1 | 50 | 3 | if (@_) { | ||||
831 | 1 | 4 | for (ref $_[0]) { | |||||
832 | 1 | 50 | 2 | $_ eq 'HASH' && do { | ||||
833 | 1 | 2 | $self->{'_ua_hash'} = $_[0]; | |||||
834 | 1 | 3 | last; | |||||
835 | }; | |||||||
836 | 0 | 0 | 0 | !$_ && do { | ||||
837 | 0 | 0 | $self->{'_ua_hash'} = {@_}; | |||||
838 | 0 | 0 | last; | |||||
839 | }; | |||||||
840 | 0 | 0 | do { | |||||
841 | 0 | 0 | $self->throw("Type ".ref($_)." unsupported as arg in _ua_hash"); | |||||
842 | }; | |||||||
843 | ||||||||
844 | } | |||||||
845 | } | |||||||
846 | 1 | 1 | return %{$self->{'_ua_hash'}}; | |||||
1 | 1 | |||||||
847 | } | |||||||
848 | ||||||||
849 | ||||||||
850 | ####### | |||||||
851 | ||||||||
852 | =head1 Internals | |||||||
853 | ||||||||
854 | =head2 add_id | |||||||
855 | ||||||||
856 | Title : add_id | |||||||
857 | Usage : $hiv_query->add_id($id) | |||||||
858 | Function: Add new id to ids | |||||||
859 | Example : | |||||||
860 | Returns : the new id | |||||||
861 | Args : a sequence id | |||||||
862 | ||||||||
863 | =cut | |||||||
864 | ||||||||
865 | sub add_id { | |||||||
866 | 0 | 0 | 1 | 0 | my $self = shift; | |||
867 | 0 | 0 | my $id = shift; | |||||
868 | 0 | 0 | 0 | $id = "" unless defined $id; # avoid warnings | ||||
869 | 0 | 0 | ${$self->{'ids'}}{$id}++; | |||||
0 | 0 | |||||||
870 | 0 | 0 | return $id; | |||||
871 | } | |||||||
872 | ||||||||
873 | ||||||||
874 | sub lanl_base{ | |||||||
875 | 4 | 4 | 0 | 6 | my $self = shift; | |||
876 | 4 | 100 | 11 | return $self->{'lanl_base'} = shift if @_; | ||||
877 | 3 | 13 | return $self->{'lanl_base'}; | |||||
878 | } | |||||||
879 | ||||||||
880 | =head2 map_db | |||||||
881 | ||||||||
882 | Title : map_db | |||||||
883 | Usage : $obj->map_db($newval) | |||||||
884 | Function: | |||||||
885 | Example : | |||||||
886 | Returns : value of map_db (a scalar) | |||||||
887 | Args : on set, new value (a scalar or undef, optional) | |||||||
888 | ||||||||
889 | =cut | |||||||
890 | ||||||||
891 | sub map_db{ | |||||||
892 | 2 | 2 | 1 | 3 | my $self = shift; | |||
893 | 2 | 100 | 5 | return $self->{'map_db'} = shift if @_; | ||||
894 | 1 | 6 | return $self->{'map_db'}; | |||||
895 | } | |||||||
896 | ||||||||
897 | =head2 make_search_if | |||||||
898 | ||||||||
899 | Title : make_search_if | |||||||
900 | Usage : $obj->make_search_if($newval) | |||||||
901 | Function: | |||||||
902 | Example : | |||||||
903 | Returns : value of make_search_if (a scalar) | |||||||
904 | Args : on set, new value (a scalar or undef, optional) | |||||||
905 | ||||||||
906 | =cut | |||||||
907 | ||||||||
908 | sub make_search_if{ | |||||||
909 | 2 | 2 | 1 | 4 | my $self = shift; | |||
910 | 2 | 100 | 5 | return $self->{'make_search_if'} = shift if @_; | ||||
911 | 1 | 3 | return $self->{'make_search_if'}; | |||||
912 | } | |||||||
913 | ||||||||
914 | =head2 search_ | |||||||
915 | ||||||||
916 | Title : search_ | |||||||
917 | Usage : $obj->search_($newval) | |||||||
918 | Function: | |||||||
919 | Example : | |||||||
920 | Returns : value of search_ (a scalar) | |||||||
921 | Args : on set, new value (a scalar or undef, optional) | |||||||
922 | ||||||||
923 | =cut | |||||||
924 | ||||||||
925 | sub search_{ | |||||||
926 | 2 | 2 | 1 | 4 | my $self = shift; | |||
927 | 2 | 100 | 6 | return $self->{'search_'} = shift if @_; | ||||
928 | 1 | 3 | return $self->{'search_'}; | |||||
929 | } | |||||||
930 | ||||||||
931 | =head2 _map_db_uri | |||||||
932 | ||||||||
933 | Title : _map_db_uri | |||||||
934 | Usage : | |||||||
935 | Function: return the full map_db uri ("Database Map") | |||||||
936 | Example : | |||||||
937 | Returns : scalar string | |||||||
938 | Args : none | |||||||
939 | ||||||||
940 | =cut | |||||||
941 | ||||||||
942 | sub _map_db_uri{ | |||||||
943 | 1 | 1 | 1554 | my $self = shift; | ||||
944 | 1 | 4 | return $self->lanl_base."/".$self->map_db; | |||||
945 | } | |||||||
946 | ||||||||
947 | ||||||||
948 | =head2 _make_search_if_uri | |||||||
949 | ||||||||
950 | Title : _make_search_if_uri | |||||||
951 | Usage : | |||||||
952 | Function: return the full make_search_if uri ("Make Search Interface") | |||||||
953 | Example : | |||||||
954 | Returns : scalar string | |||||||
955 | Args : none | |||||||
956 | ||||||||
957 | =cut | |||||||
958 | ||||||||
959 | sub _make_search_if_uri{ | |||||||
960 | 1 | 1 | 2 | my $self = shift; | ||||
961 | 1 | 2 | return $self->lanl_base."/".$self->make_search_if; | |||||
962 | } | |||||||
963 | ||||||||
964 | =head2 _search_uri | |||||||
965 | ||||||||
966 | Title : _search_uri | |||||||
967 | Usage : | |||||||
968 | Function: return the full search cgi uri ("Search Database") | |||||||
969 | Example : | |||||||
970 | Returns : scalar string | |||||||
971 | Args : none | |||||||
972 | ||||||||
973 | =cut | |||||||
974 | ||||||||
975 | sub _search_uri{ | |||||||
976 | 1 | 1 | 2 | my $self = shift; | ||||
977 | 1 | 2 | return $self->lanl_base."/".$self->search_; | |||||
978 | } | |||||||
979 | ||||||||
980 | =head2 _schema_file | |||||||
981 | ||||||||
982 | Title : _schema_file | |||||||
983 | Usage : $hiv_query->_schema_file($newval) | |||||||
984 | Function: | |||||||
985 | Example : | |||||||
986 | Returns : value of _schema_file (an XML string or filename) | |||||||
987 | Args : on set, new value (an XML string or filename, or undef, optional) | |||||||
988 | ||||||||
989 | =cut | |||||||
990 | ||||||||
991 | sub _schema_file { | |||||||
992 | 3 | 3 | 4 | my $self = shift; | ||||
993 | ||||||||
994 | 3 | 100 | 10 | return $self->{'_schema_file'} = shift if @_; | ||||
995 | 2 | 12 | return $self->{'_schema_file'}; | |||||
996 | } | |||||||
997 | ||||||||
998 | =head2 _schema | |||||||
999 | ||||||||
1000 | Title : _schema | |||||||
1001 | Usage : $hiv_query->_schema($newVal) | |||||||
1002 | Function: | |||||||
1003 | Example : | |||||||
1004 | Returns : value of _schema (an HIVSchema object in package | |||||||
1005 | L |
|||||||
1006 | Args : none (field set directly in new()) | |||||||
1007 | ||||||||
1008 | =cut | |||||||
1009 | ||||||||
1010 | sub _schema{ | |||||||
1011 | 10 | 10 | 11 | my $self = shift; | ||||
1012 | ||||||||
1013 | $self->{'_schema'} ? | |||||||
1014 | 10 | 100 | 32 | return $self->{'_schema'} : | ||||
1015 | $self->throw(-class=>'Bio::SchemaNotInit::Exception', | |||||||
1016 | -text=>"DB schema not initialized", | |||||||
1017 | -value=>""); | |||||||
1018 | ||||||||
1019 | } | |||||||
1020 | ||||||||
1021 | =head2 _lanl_query | |||||||
1022 | ||||||||
1023 | Title : _lanl_query | |||||||
1024 | Usage : $hiv_query->_lanl_query(\@query_parms) | |||||||
1025 | Function: pushes \@query_parms onto @{$self->{'_lanl_query'} | |||||||
1026 | Example : | |||||||
1027 | Returns : value of _lanl_query (an arrayref) | |||||||
1028 | Args : on set, new value (an arrayref or undef, optional) | |||||||
1029 | ||||||||
1030 | =cut | |||||||
1031 | ||||||||
1032 | sub _lanl_query{ | |||||||
1033 | 5 | 5 | 6 | my $self = shift; | ||||
1034 | 5 | 7 | my $a = shift; | |||||
1035 | 5 | 50 | 8 | return $self->{'_lanl_query'} unless $a; | ||||
1036 | 5 | 50 | 13 | if (ref $a eq 'ARRAY') { | ||||
1037 | 5 | 7 | push @{$self->{'_lanl_query'}}, $a; | |||||
5 | 8 | |||||||
1038 | 5 | 21 | return $a; | |||||
1039 | } | |||||||
1040 | else { | |||||||
1041 | 0 | 0 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||
1042 | -text=>'Array ref required for argument.', | |||||||
1043 | -value=>$a); | |||||||
1044 | } | |||||||
1045 | ||||||||
1046 | } | |||||||
1047 | ||||||||
1048 | =head2 _lanl_response | |||||||
1049 | ||||||||
1050 | Title : _lanl_response | |||||||
1051 | Usage : $hiv_query->_lanl_response($response) | |||||||
1052 | Function: pushes $response onto @{$hiv_query->{'_lanl_response'}} | |||||||
1053 | Example : | |||||||
1054 | Returns : value of _lanl_response (an arrayref of HTTP::Response objects) | |||||||
1055 | Args : on set, new value (an HTTP::Response object or undef, optional) | |||||||
1056 | ||||||||
1057 | =cut | |||||||
1058 | ||||||||
1059 | sub _lanl_response{ | |||||||
1060 | 0 | 0 | 0 | my $self = shift; | ||||
1061 | 0 | 0 | 0 | if (@_) { | ||||
1062 | 0 | 0 | my $r = shift; | |||||
1063 | 0 | 0 | 0 | $self->throw(-class=>'Bio::Root::BadParameter', | ||||
1064 | -text=>'Requires an HTTP::Response object', | |||||||
1065 | -value=> ref $r) unless ref($r) eq 'HTTP::Response'; | |||||||
1066 | 0 | 0 | push @{$self->{'_lanl_response'}}, $r; | |||||
0 | 0 | |||||||
1067 | 0 | 0 | return $r; | |||||
1068 | } | |||||||
1069 | 0 | 0 | return $self->{'_lanl_response'}; | |||||
1070 | } | |||||||
1071 | ||||||||
1072 | =head2 _create_lanl_query | |||||||
1073 | ||||||||
1074 | Title : _create_lanl_query | |||||||
1075 | Usage : $hiv_query->_create_lanl_query() | |||||||
1076 | Function: validate query hash or string, prepare for _do_lanl_request | |||||||
1077 | Example : | |||||||
1078 | Returns : 1 if successful; throws exception on invalid query | |||||||
1079 | Args : | |||||||
1080 | ||||||||
1081 | =cut | |||||||
1082 | ||||||||
1083 | sub _create_lanl_query { | |||||||
1084 | 9 | 9 | 14 | my $self = shift; | ||||
1085 | 9 | 12 | my (%inhash, @query, @qhashes); | |||||
1086 | 0 | 0 | my ($schema, @validFields, @validAliases); | |||||
1087 | ||||||||
1088 | 9 | 17 | for ($self->query) { | |||||
1089 | 9 | 50 | 19 | !defined && do { | ||||
1090 | 0 | 0 | $self->throw(-class=>'Bio::Root::NoSuchThing', | |||||
1091 | -text=>'Query not specified', | |||||||
1092 | -value=>''); | |||||||
1093 | 0 | 0 | last; | |||||
1094 | }; | |||||||
1095 | 9 | 100 | 20 | ref eq 'HASH' && do { | ||||
1096 | 2 | 6 | %inhash = %$_; | |||||
1097 | 2 | 100 | 4 | if ( grep /HASH/, map {ref} values %inhash ) { | ||||
5 | 14 | |||||||
1098 | # check for {query=>{},annot=>[]} style | |||||||
1099 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||||
1100 | -text=>'Query style unrecognized', | |||||||
1101 | 1 | 50 | 4 | -value=>"") unless defined $inhash{query}; | ||||
1102 | 1 | 3 | push @qhashes, $_; | |||||
1103 | } | |||||||
1104 | 2 | 3 | last; | |||||
1105 | }; | |||||||
1106 | 7 | 100 | 15 | ref eq 'ARRAY' && do { | ||||
1107 | 3 | 10 | $inhash{'query'} = {@$_}; | |||||
1108 | 3 | 5 | push @qhashes, \%inhash; | |||||
1109 | 3 | 5 | last; | |||||
1110 | }; | |||||||
1111 | #else | |||||||
1112 | 4 | 4 | do { | |||||
1113 | 4 | 11 | @qhashes = $self->_parse_query_string($_); | |||||
1114 | }; | |||||||
1115 | } | |||||||
1116 | 9 | 22 | $schema = $self->_schema; | |||||
1117 | 8 | 23 | @validFields = $schema->fields; | |||||
1118 | 8 | 35 | @validAliases = $schema->aliases; | |||||
1119 | ||||||||
1120 | # validate args based on the xml specification file | |||||||
1121 | # only checks blanks and fields with explicitly specified options | |||||||
1122 | # text fields can put anything, and the query will be run before | |||||||
1123 | # an error is caught in these | |||||||
1124 | 8 | 46 | foreach my $qh (@qhashes) { | |||||
1125 | 8 | 12 | @query=(); | |||||
1126 | 8 | 6 | foreach my $k (keys %{$$qh{'query'}}) { | |||||
8 | 29 | |||||||
1127 | 17 | 8 | my $fld; | |||||
1128 | # validate field | |||||||
1129 | 17 | 50 | 1930 | if (grep /^$k$/, @validFields) { | ||||
100 | ||||||||
1130 | 0 | 0 | $fld = $k; | |||||
1131 | } | |||||||
1132 | elsif (grep /^$k$/, @validAliases) { | |||||||
1133 | 15 | 32 | foreach (@validFields) { | |||||
1134 | 1018 | 100 | 1302 | if (grep (/^$k$/, $schema->aliases($_))) { | ||||
1135 | 15 | 15 | $fld = $_; | |||||
1136 | 15 | 26 | last; | |||||
1137 | } | |||||||
1138 | # $fld contains the field corresp. to the alias | |||||||
1139 | } | |||||||
1140 | } | |||||||
1141 | else { | |||||||
1142 | 2 | 21 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||
1143 | -text=>"Invalid field or alias \"$k\"", | |||||||
1144 | -value=>$qh); | |||||||
1145 | } | |||||||
1146 | # validate matchdata | |||||||
1147 | 15 | 34 | my $vf = $schema->_sfieldh($fld); | |||||
1148 | 15 | 100 | 46 | my @md = (ref($qh->{'query'}{$k}) eq 'ARRAY') ? @{$qh->{'query'}{$k}} : $qh->{'query'}{$k}; | ||||
6 | 12 | |||||||
1149 | 15 | 50 | 46 | if ($$vf{type} eq 'text') { | ||||
50 | ||||||||
1150 | 0 | 0 | foreach (@md) { | |||||
1151 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||||
1152 | -text=>'Value for field \"$k\" cannot be empty', | |||||||
1153 | -value=>$qh) | |||||||
1154 | 0 | 0 | 0 | 0 | if ($_ eq "") && ($$vf{blank_ok} eq 'false'); | |||
1155 | } | |||||||
1156 | } | |||||||
1157 | elsif ($$vf{type} eq 'option') { | |||||||
1158 | 15 | 20 | foreach my $md (@md) { | |||||
1159 | $self->throw(-class=>'Bio::Root::BadParameter', | |||||||
1160 | -text=>"Invalid value \"".$md."\" for field \"$fld\"", | |||||||
1161 | -value=>$md) | |||||||
1162 | 21 | 50 | 66 | 45 | unless $$vf{option} && grep {defined $_ && /^$md$/} @{$$vf{option}}; | |||
3086 | 100 | 7864 | ||||||
21 | 33 | |||||||
1163 | } | |||||||
1164 | } | |||||||
1165 | # validated; add to query | |||||||
1166 | 14 | 21 | foreach (@md) { | |||||
1167 | 20 | 36 | push @query, ($fld => $_); | |||||
1168 | } | |||||||
1169 | } | |||||||
1170 | 5 | 100 | 16 | if ($qh->{'annot'}) { | ||||
1171 | # validate the column names to be included in the query | |||||||
1172 | # to obtain annotations | |||||||
1173 | 2 | 3 | my @annot_cols = @{$qh->{'annot'}}; | |||||
2 | 6 | |||||||
1174 | 2 | 3 | foreach my $k (@annot_cols) { | |||||
1175 | 2 | 3 | my $fld; | |||||
1176 | # validate field | |||||||
1177 | 2 | 50 | 206 | if (grep /^$k$/, @validFields) { | ||||
50 | ||||||||
1178 | 0 | 0 | $fld = $k; | |||||
1179 | } | |||||||
1180 | elsif (grep /^$k$/, @validAliases) { | |||||||
1181 | 2 | 3 | foreach (@validFields) { | |||||
1182 | 130 | 100 | 173 | if (grep (/^$k$/, $schema->aliases($_))) { | ||||
1183 | 2 | 2 | $fld = $_; | |||||
1184 | 2 | 3 | last; | |||||
1185 | } | |||||||
1186 | # $fld should contain the field corresp. to the alias | |||||||
1187 | } | |||||||
1188 | } | |||||||
1189 | else { | |||||||
1190 | 0 | 0 | $self->throw(-class=>'Bio::Root::NoSuchThing', | |||||
1191 | -text=>"Invalid field or alias \"$k\"", | |||||||
1192 | -value=>$k); | |||||||
1193 | } | |||||||
1194 | # lazy: 'Any' may not be the right default (but appears to | |||||||
1195 | # be, based on the lanl html) | |||||||
1196 | 2 | 4 | push @query, ($fld => 'Any'); | |||||
1197 | } | |||||||
1198 | } | |||||||
1199 | ||||||||
1200 | # insure that LANL and GenBank ids are retrieved | |||||||
1201 | 5 | 50 | 30 | push @query, ('sequenceentry.se_id' => 'Any') unless grep /SequenceEntry\.SE_id/, @query; | ||||
1202 | 5 | 50 | 17 | push @query, ('sequenceaccessions.sa_genbankaccession' => 'Any') | ||||
1203 | unless grep /SequenceAccessions\.SA_GenBankAccession/, @query; | |||||||
1204 | ||||||||
1205 | # an "order" field is required by the LANL CGI | |||||||
1206 | # if not specified, default to SE_id | |||||||
1207 | ||||||||
1208 | 5 | 50 | 23 | push @query, ('order'=>'sequenceentry.se_id') unless grep /order/, @query; | ||||
1209 | ||||||||
1210 | # @query now contains sfield=>matchdata pairs, as specified by user | |||||||
1211 | # include appropriate indexes to create correct automatic joins | |||||||
1212 | # established by the LANL CGI | |||||||
1213 | 5 | 6 | my (@qtbl, @qpk, @qfk); | |||||
1214 | ||||||||
1215 | # the tables represented in query: | |||||||
1216 | 5 | 28 | my %q = @query; # squish the tables in the current query into hash keys | |||||
1217 | 5 | 25 | @qtbl = $schema->tbl('-s', keys %q); | |||||
1218 | ||||||||
1219 | 5 | 50 | 13 | if (@qtbl > 1) { | ||||
1220 | # more than one table, see if they can be connected | |||||||
1221 | # get primary keys of query tables | |||||||
1222 | 5 | 12 | @qpk = $schema->pk(@qtbl); | |||||
1223 | ||||||||
1224 | # we need to get each query table to join to | |||||||
1225 | # SequenceEntry. | |||||||
1226 | # | |||||||
1227 | # The schema is a graph with tables as nodes and | |||||||
1228 | # foreign keys<->primary keys as branches. To get a | |||||||
1229 | # join that works, need to include in the query | |||||||
1230 | # all branches along a path from SequenceEntry | |||||||
1231 | # to each query table. | |||||||
1232 | # | |||||||
1233 | # find_join does it... | |||||||
1234 | my @joink = map { | |||||||
1235 | 5 | 6 | my @k = $schema->find_join($_,'sequenceentry'); | |||||
15 | 34 | |||||||
1236 | 15 | 100 | 18 | map {$_ || ()} @k | ||||
15 | 47 | |||||||
1237 | } @qtbl; | |||||||
1238 | # squish the keys in @joink | |||||||
1239 | 5 | 8 | my %j; | |||||
1240 | 5 | 17 | @j{@joink} = (1) x @joink; | |||||
1241 | 5 | 11 | @joink = keys %j; | |||||
1242 | # add the fields not currently in the query | |||||||
1243 | 5 | 11 | foreach (@qpk, @joink) { | |||||
1244 | 15 | 12 | my $fld = $_; | |||||
1245 | 15 | 100 | 243 | if (!grep(/^$fld$/,keys %q)) { | ||||
1246 | # lazy: 'Any' may not be the right default (but appears to | |||||||
1247 | # be, based on the lanl html) | |||||||
1248 | 10 | 27 | push @query, ($_ => 'Any'); | |||||
1249 | } | |||||||
1250 | } | |||||||
1251 | ||||||||
1252 | } | |||||||
1253 | ||||||||
1254 | # set object property | |||||||
1255 | 5 | 30 | $self->_lanl_query([@query]); | |||||
1256 | } | |||||||
1257 | 5 | 129 | return 1; | |||||
1258 | } | |||||||
1259 | ||||||||
1260 | # _do_lanl_request : post the queries created by _create_lanl_query | |||||||
1261 | # | |||||||
1262 | # @args (or {@args}) should be unaliased Table.Column=>Matchdata | |||||||
1263 | # pairs (these will be used directly in the POSTs) | |||||||
1264 | ||||||||
1265 | =head2 _do_lanl_request | |||||||
1266 | ||||||||
1267 | Title : _do_lanl_request | |||||||
1268 | Usage : $hiv_query->_do_lanl_request() | |||||||
1269 | Function: Perform search request on _create_lanl_query-validated query | |||||||
1270 | Example : | |||||||
1271 | Returns : 1 if successful | |||||||
1272 | Args : | |||||||
1273 | ||||||||
1274 | =cut | |||||||
1275 | ||||||||
1276 | sub _do_lanl_request { | |||||||
1277 | 0 | 0 | 0 | my $self = shift; | ||||
1278 | 0 | 0 | my (@queries, @query, @interface,$interfGet,$searchGet,$response); | |||||
1279 | 0 | 0 | my ($numseqs, $count); | |||||
1280 | ||||||||
1281 | # handle args | |||||||
1282 | 0 | 0 | 0 | if (!$self->_lanl_query) { | ||||
1283 | 0 | 0 | $self->throw(-class=>"Bio::Root::BadParameter", | |||||
1284 | -text=>"_lanl_query empty, run _create_lanl_request first", | |||||||
1285 | -value=>""); | |||||||
1286 | } | |||||||
1287 | else { | |||||||
1288 | 0 | 0 | @queries = @{$self->_lanl_query}; | |||||
0 | 0 | |||||||
1289 | } | |||||||
1290 | ||||||||
1291 | ## utility vars | |||||||
1292 | ## search site specific CGI parms | |||||||
1293 | 0 | 0 | my @search_pms = ('action'=>'Search'); | |||||
1294 | 0 | 0 | my @searchif_pms = ('action'=>'Search Interface'); | |||||
1295 | # don't get the actual sequence data here (i.e., the cgi parm | |||||||
1296 | # 'incl_seq' remains undefined... | |||||||
1297 | 0 | 0 | my @download_pms = ('action Download.x'=>1, 'action Download.y'=>1); | |||||
1298 | ||||||||
1299 | ## HTML-testing regexps | |||||||
1300 | 0 | 0 | my $tags_re = qr{(?:\s*<[^>]+>\s*)}; | |||||
1301 | 0 | 0 | my $session_id_re = qr{ | |||||
1302 | 0 | 0 | my $search_form_re = qr{ | |||||
1303 | 0 | 0 | my $seqs_found_re = qr{Displaying$tags_re*(?:\s*[0-9-]*\s*)*$tags_re*of$tags_re*\s*([0-9]+)$tags_re*sequences found}; | |||||
1304 | 0 | 0 | my $no_seqs_found_re = qr{Sorry.*no sequences found}; | |||||
1305 | 0 | 0 | my $too_many_re = qr{too many records: $tags_re*([0-9]+)}; | |||||
1306 | 0 | 0 | my $sys_error_re = qr{[Ss]ystem error}; | |||||
1307 | 0 | 0 | my $sys_error_extract_re = qr{${tags_re}error:.*? | ]+>${tags_re}(.*?) }; |
||||
1308 | # find something like: | |||||||
1309 | # tables without join: SequenceAccessions |
|||||||
1310 | 0 | 0 | my $tbl_no_join_re = qr{tables without join}i; | |||||
1311 | # my $sorry_bud_re = qr{}; | |||||||
1312 | ||||||||
1313 | ||||||||
1314 | 0 | 0 | foreach my $q (@queries) { | |||||
1315 | 0 | 0 | @query = @$q; | |||||
1316 | # default query control parameters | |||||||
1317 | 0 | 0 | my %qctrl = ( | |||||
1318 | max_rec=>100, | |||||||
1319 | sort_dir=>'ASC', | |||||||
1320 | translate=>'FALSE' # nucleotides | |||||||
1321 | ); | |||||||
1322 | ||||||||
1323 | # do work... | |||||||
1324 | ||||||||
1325 | # pull out commands, designated by the COMMAND pseudo-table... | |||||||
1326 | 0 | 0 | 0 | my @commands = map { $query[$_] =~ s/^COMMAND\.// ? @query[$_..$_+1] : () } (0..$#query-1); | ||||
0 | 0 | |||||||
1327 | 0 | 0 | 0 | @query = map { $query[$_] =~ /^COMMAND/ ? () : @query[2*$_..2*$_+1] } (0..($#query-1)/2); | ||||
0 | 0 | |||||||
1328 | ||||||||
1329 | ||||||||
1330 | # set control parameters explicitly made in query | |||||||
1331 | 0 | 0 | foreach my $cp (keys %qctrl) { | |||||
1332 | 0 | 0 | 0 | if (!grep( /^$cp$/, @query)) { | ||||
1333 | 0 | 0 | push @query, ($cp, $qctrl{$cp}); | |||||
1334 | } | |||||||
1335 | } | |||||||
1336 | ||||||||
1337 | # note that @interface must be an array, since a single 'key' (the table) | |||||||
1338 | # can be associated with multiple 'values' (the columns) in the POST | |||||||
1339 | ||||||||
1340 | # squish fieldnames into hash keys | |||||||
1341 | 0 | 0 | my %q = @query; | |||||
1342 | 0 | 0 | @interface = grep {defined} map {my ($tbl,$col) = /^(.*)\.(.*)$/} keys %q; | |||||
0 | 0 | |||||||
0 | 0 | |||||||
1343 | 0 | 0 | my $err_val = ""; # to contain informative (ha!) value if error is parsed | |||||
1344 | ||||||||
1345 | 0 | 0 | eval { # encapsulate communication errors here, defer biothrows... | |||||
1346 | ||||||||
1347 | #mark the useragent should be setable from outside (so we can modify timeouts, etc) | |||||||
1348 | 0 | 0 | my $ua = Bio::WebAgent->new($self->_ua_hash); | |||||
1349 | 0 | 0 | my $idPing = $ua->get($self->_map_db_uri); | |||||
1350 | 0 | 0 | 0 | $idPing->is_success || do { | ||||
1351 | 0 | 0 | $response=$idPing; | |||||
1352 | 0 | 0 | die "Connect failed"; | |||||
1353 | }; | |||||||
1354 | # get the session id | |||||||
1355 | 0 | 0 | 0 | if (!$self->_session_id) { | ||||
1356 | 0 | 0 | ($self->{'_session_id'}) = ($idPing->content =~ /$session_id_re/); | |||||
1357 | 0 | 0 | 0 | $self->_session_id || do { | ||||
1358 | 0 | 0 | $response=$idPing; | |||||
1359 | 0 | 0 | die "Session not established"; | |||||
1360 | }; | |||||||
1361 | } | |||||||
1362 | # 10/07/08: | |||||||
1363 | # strange bug: if action=>'Search+Interface' below (note "+"), | |||||||
1364 | # the response to the search (in $searchGet) shows the correct | |||||||
1365 | # >number< of sequences found, but also an error "No sequences | |||||||
1366 | # match" and an SQL barf. Changing the "+" to a " " sets up the | |||||||
1367 | # interface to lead to the actual sequences being delivered as | |||||||
1368 | # expected. maj | |||||||
1369 | 0 | 0 | $interfGet = $ua->post($self->_make_search_if_uri, [@interface, @searchif_pms, id=>$self->_session_id]); | |||||
1370 | 0 | 0 | 0 | $interfGet->is_success || do { | ||||
1371 | 0 | 0 | $response=$interfGet; | |||||
1372 | 0 | 0 | die "Interface request failed"; | |||||
1373 | }; | |||||||
1374 | # see if a search form was returned... | |||||||
1375 | ||||||||
1376 | 0 | 0 | 0 | $interfGet->content =~ /$search_form_re/ || do { | ||||
1377 | 0 | 0 | $response=$interfGet; | |||||
1378 | 0 | 0 | die "Interface request failed"; | |||||
1379 | }; | |||||||
1380 | ||||||||
1381 | 0 | 0 | $searchGet = $ua->post($self->_search_uri, [@query, @commands, @search_pms, id=>$self->_session_id]); | |||||
1382 | 0 | 0 | 0 | $searchGet->is_success || do { | ||||
1383 | 0 | 0 | $response = $searchGet; | |||||
1384 | 0 | 0 | die "Search failed"; | |||||
1385 | }; | |||||||
1386 | 0 | 0 | $response = $searchGet; | |||||
1387 | 0 | 0 | for ($searchGet->content) { | |||||
1388 | 0 | 0 | 0 | /$no_seqs_found_re/ && do { | ||||
1389 | 0 | 0 | $err_val = 0; | |||||
1390 | 0 | 0 | die "No sequences found"; | |||||
1391 | 0 | 0 | last; | |||||
1392 | }; | |||||||
1393 | 0 | 0 | 0 | /$too_many_re/ && do { | ||||
1394 | 0 | 0 | $err_val = $1; | |||||
1395 | 0 | 0 | die "Too many records ($1): must be <10000"; | |||||
1396 | 0 | 0 | last; | |||||
1397 | }; | |||||||
1398 | 0 | 0 | 0 | /$tbl_no_join_re/ && do { | ||||
1399 | 0 | 0 | die "Some required tables went unjoined to query"; | |||||
1400 | 0 | 0 | last; | |||||
1401 | }; | |||||||
1402 | 0 | 0 | 0 | /$sys_error_re/ && do { | ||||
1403 | 0 | 0 | /$sys_error_extract_re/; | |||||
1404 | 0 | 0 | $err_val = $1; | |||||
1405 | 0 | 0 | die "LANL system error"; | |||||
1406 | }; | |||||||
1407 | 0 | 0 | 0 | /$seqs_found_re/ && do { | ||||
1408 | 0 | 0 | $numseqs = $1; | |||||
1409 | 0 | 0 | $count += $numseqs; | |||||
1410 | 0 | 0 | last; | |||||
1411 | }; | |||||||
1412 | # else... | |||||||
1413 | 0 | 0 | do { | |||||
1414 | 0 | 0 | die "Search failed (response not parsed)"; | |||||
1415 | }; | |||||||
1416 | } | |||||||
1417 | 0 | 0 | $response = $ua->post($self->_search_uri, [@download_pms, id=>$self->_session_id]); | |||||
1418 | 0 | 0 | 0 | $response->is_success || die "Query failed"; | ||||
1419 | # $response->content is a tab-separated value table of sequences | |||||||
1420 | # and metadata, first line starts with \# and contains fieldnames | |||||||
1421 | }; | |||||||
1422 | 0 | 0 | $self->_lanl_response($response); | |||||
1423 | # throw, if necessary | |||||||
1424 | 0 | 0 | 0 | if ($@) { | ||||
1425 | 0 | 0 | 0 | ($@ !~ "No sequences found") && do { | ||||
1426 | 0 | 0 | $self->throw(-class=>'Bio::WebError::Exception', | |||||
1427 | -text=>$@, | |||||||
1428 | -value=>$err_val); | |||||||
1429 | }; | |||||||
1430 | } | |||||||
1431 | } | |||||||
1432 | ||||||||
1433 | 0 | 0 | 0 | $self->warn("No sequences found for this query") unless $count; | ||||
1434 | 0 | 0 | $self->count($count); | |||||
1435 | 0 | 0 | return 1; # made it. | |||||
1436 | ||||||||
1437 | } | |||||||
1438 | ||||||||
1439 | =head2 _parse_lanl_response | |||||||
1440 | ||||||||
1441 | Title : _parse_lanl_response | |||||||
1442 | Usage : $hiv_query->_parse_lanl_response() | |||||||
1443 | Function: Parse the tab-separated-value response obtained by _do_lanl_request | |||||||
1444 | for sequence ids, accessions, and annotations | |||||||
1445 | Example : | |||||||
1446 | Returns : 1 if successful | |||||||
1447 | Args : | |||||||
1448 | ||||||||
1449 | =cut | |||||||
1450 | ||||||||
1451 | sub _parse_lanl_response { | |||||||
1452 | ||||||||
1453 | ### handle parsing and merging multiple responses into the query object | |||||||
1454 | ### (ids and annotations) | |||||||
1455 | 0 | 0 | 0 | my $self = shift; | ||||
1456 | ||||||||
1457 | 0 | 0 | my ($seqGet) = (@_); | |||||
1458 | 0 | 0 | my (@data, @cols, %antbl, %antype); | |||||
1459 | 0 | 0 | my $numseq = 0; | |||||
1460 | 0 | 0 | my ($schema, @retseqs, %rec, $ac); | |||||
1461 | 0 | 0 | $schema = $self->_schema; | |||||
1462 | ||||||||
1463 | 0 | 0 | 0 | $self->_lanl_response || | ||||
1464 | $self->throw(-class=>"Bio::QueryNotMade::Exception", | |||||||
1465 | -text=>"Query not yet performed; call _do_lanl_request()", | |||||||
1466 | -value=>""); | |||||||
1467 | 0 | 0 | foreach my $rsp (@{$self->_lanl_response}) { | |||||
0 | 0 | |||||||
1468 | 0 | 0 | @data = split(/\r|\n/, $rsp->content); | |||||
1469 | 0 | 0 | my $l; | |||||
1470 | 0 | 0 | do { | |||||
1471 | 0 | 0 | $l = shift @data; | |||||
1472 | } while ($l !~ /Number/); | |||||||
1473 | 0 | 0 | $numseq += ( $l =~ /Number.*:\s([0-9]+)/ )[0]; | |||||
1474 | 0 | 0 | @cols = split(/\t/, shift(@data)); | |||||
1475 | # mappings from column headings to annotation keys | |||||||
1476 | # squish into hash keys | |||||||
1477 | 0 | 0 | my %q = @{ shift @{$self->_lanl_query} }; | |||||
0 | 0 | |||||||
0 | 0 | |||||||
1478 | 0 | 0 | %antbl = $schema->ankh(keys %q); | |||||
1479 | # get the category for each annotation | |||||||
1480 | 0 | 0 | map { $antype{ $_->{ankey} } = $_->{antype} } values %antbl; | |||||
0 | 0 | |||||||
1481 | # normalize column headers | |||||||
1482 | 0 | 0 | map { tr/ /_/; $_ = lc; } @cols; | |||||
0 | 0 | |||||||
0 | 0 | |||||||
1483 | 0 | 0 | foreach (@data) { | |||||
1484 | 0 | 0 | @rec{@cols} = split /\t/; | |||||
1485 | 0 | 0 | my $id = $rec{'se_id'}; | |||||
1486 | 0 | 0 | $self->add_id($id); | |||||
1487 | 0 | 0 | $ac = Bio::Annotation::Collection->new(); | |||||
1488 | #create annotations | |||||||
1489 | 0 | 0 | foreach (@cols) { | |||||
1490 | 0 | 0 | 0 | next if $_ eq '#'; | ||||
1491 | 0 | 0 | 0 | my $t = $antype{$_} || "Unclassified"; | ||||
1492 | 0 | 0 | my $d = $rec{$_}; # the data | |||||
1493 | 0 | 0 | $ac->put_value(-KEYS=>[$t, $_], -VALUE=>$d); | |||||
1494 | } | |||||||
1495 | 0 | 0 | $self->add_annotations_for_id($id, $ac); | |||||
1496 | } | |||||||
1497 | 0 | 0 | 1; | |||||
1498 | } | |||||||
1499 | 0 | 0 | return 1; # made it. | |||||
1500 | } | |||||||
1501 | ||||||||
1502 | =head2 _parse_query_string | |||||||
1503 | ||||||||
1504 | Title : _parse_query_string | |||||||
1505 | Usage : $hiv_query->_parse_query_string($str) | |||||||
1506 | Function: Parses a query string using query language emulator QRY | |||||||
1507 | : in L |
|||||||
1508 | Example : | |||||||
1509 | Returns : arrayref of hash structures suitable for passing to _create_lanl_query | |||||||
1510 | Args : a string scalar | |||||||
1511 | ||||||||
1512 | =cut | |||||||
1513 | ||||||||
1514 | sub _parse_query_string { | |||||||
1515 | 4 | 4 | 5 | my $self = shift; | ||||
1516 | 4 | 5 | my $qstring = shift; | |||||
1517 | 4 | 4 | my ($ptree, @ret); | |||||
1518 | #syntax errors thrown in QRY (in HIVQueryHelper module) | |||||||
1519 | 4 | 13 | $ptree = QRY::_parse_q( $qstring ); | |||||
1520 | 4 | 12 | @ret = QRY::_make_q($ptree); | |||||
1521 | 4 | 62 | return @ret; | |||||
1522 | } | |||||||
1523 | ||||||||
1524 | =head1 Dude, sorry- | |||||||
1525 | ||||||||
1526 | =head2 _sorry | |||||||
1527 | ||||||||
1528 | Title : _sorry | |||||||
1529 | Usage : $hiv_query->_sorry("-president=>Powell") | |||||||
1530 | Function: Throws an exception for unsupported option or parameter | |||||||
1531 | Example : | |||||||
1532 | Returns : | |||||||
1533 | Args : scalar string | |||||||
1534 | ||||||||
1535 | =cut | |||||||
1536 | ||||||||
1537 | sub _sorry{ | |||||||
1538 | 0 | 0 | my $self = shift; | |||||
1539 | 0 | my $parm = shift; | ||||||
1540 | 0 | $self->throw(-class=>"Bio::HIVSorry::Exception", | ||||||
1541 | -text=>"Sorry, option/parameter \"$parm\" not (yet) supported. See manpage to complain.", | |||||||
1542 | -value=>$parm); | |||||||
1543 | 0 | return; | ||||||
1544 | } | |||||||
1545 | ||||||||
1546 | 1; |